Expose dBFS when doing audio analysis (#6979)

* Expose dBFS when doing audio analysis

* Implement metadata communicator

* revert test changes

* Reverting the tests changes. For real this time

* Address feedback

* Address feedback

* Address feedback

* Address feedback
This commit is contained in:
jpverdejo
2023-07-13 19:52:33 -05:00
committed by GitHub
parent 2d52fbd8e8
commit b909ff4ecb
4 changed files with 87 additions and 4 deletions

View File

@@ -7,12 +7,13 @@ import os
import signal
import threading
from types import FrameType
from typing import Optional
from typing import Optional, Tuple
import numpy as np
import requests
from setproctitle import setproctitle
from frigate.comms.inter_process import InterProcessCommunicator
from frigate.config import CameraConfig, FrigateConfig
from frigate.const import (
AUDIO_DURATION,
@@ -51,6 +52,7 @@ def get_ffmpeg_command(input_args: list[str], input_path: str, pipe: str) -> lis
def listen_to_audio(
config: FrigateConfig,
process_info: dict[str, FeatureMetricsTypes],
inter_process_communicator: InterProcessCommunicator,
) -> None:
stop_event = mp.Event()
audio_threads: list[threading.Thread] = []
@@ -74,7 +76,9 @@ def listen_to_audio(
for camera in config.cameras.values():
if camera.enabled and camera.audio.enabled_in_config:
audio = AudioEventMaintainer(camera, process_info, stop_event)
audio = AudioEventMaintainer(
camera, process_info, stop_event, inter_process_communicator
)
audio_threads.append(audio)
audio.start()
@@ -144,11 +148,13 @@ class AudioEventMaintainer(threading.Thread):
camera: CameraConfig,
feature_metrics: dict[str, FeatureMetricsTypes],
stop_event: mp.Event,
inter_process_communicator: InterProcessCommunicator,
) -> None:
threading.Thread.__init__(self)
self.name = f"{camera.name}_audio_event_processor"
self.config = camera
self.feature_metrics = feature_metrics
self.inter_process_communicator = inter_process_communicator
self.detections: dict[dict[str, any]] = feature_metrics
self.stop_event = stop_event
self.detector = AudioTfl(stop_event)
@@ -169,7 +175,8 @@ class AudioEventMaintainer(threading.Thread):
if not self.feature_metrics[self.config.name]["audio_enabled"].value:
return
rms = np.sqrt(np.mean(np.absolute(np.square(audio.astype(np.float32)))))
audio_as_float = audio.astype(np.float32)
rms, _ = self.calculate_audio_levels(audio_as_float)
# only run audio detection when volume is above min_volume
if rms >= self.config.audio.min_volume:
@@ -184,6 +191,23 @@ class AudioEventMaintainer(threading.Thread):
self.expire_detections()
def calculate_audio_levels(self, audio_as_float: np.float32) -> Tuple[float, float]:
# Calculate RMS (Root-Mean-Square) which represents the average signal amplitude
# Note: np.float32 isn't serializable, we must use np.float64 to publish the message
rms = np.sqrt(np.mean(np.absolute(audio_as_float**2)))
# Transform RMS to dBFS (decibels relative to full scale)
dBFS = 20 * np.log10(np.abs(rms) / AUDIO_MAX_BIT_RANGE)
self.inter_process_communicator.queue.put(
(f"{self.config.name}/audio/dBFS", float(dBFS))
)
self.inter_process_communicator.queue.put(
(f"{self.config.name}/audio/rms", float(rms))
)
return float(rms), float(dBFS)
def handle_detection(self, label: str, score: float) -> None:
if self.detections.get(label):
self.detections[label][