Expose dBFS when doing audio analysis (#6979)

* Expose dBFS when doing audio analysis * Implement metadata communicator * revert test changes * Reverting the tests changes. For real this time * Address feedback * Address feedback * Address feedback * Address feedback
2023-07-13 19:52:33 -05:00
parent 2d52fbd8e8
commit b909ff4ecb
4 changed files with 87 additions and 4 deletions
--- a/frigate/events/audio.py
+++ b/frigate/events/audio.py
@@ -7,12 +7,13 @@ import os
 import signal
 import threading
 from types import FrameType
-from typing import Optional
+from typing import Optional, Tuple

 import numpy as np
 import requests
 from setproctitle import setproctitle

+from frigate.comms.inter_process import InterProcessCommunicator
 from frigate.config import CameraConfig, FrigateConfig
 from frigate.const import (
    AUDIO_DURATION,
@@ -51,6 +52,7 @@ def get_ffmpeg_command(input_args: list[str], input_path: str, pipe: str) -> lis
 def listen_to_audio(
    config: FrigateConfig,
    process_info: dict[str, FeatureMetricsTypes],
+    inter_process_communicator: InterProcessCommunicator,
 ) -> None:
    stop_event = mp.Event()
    audio_threads: list[threading.Thread] = []
@@ -74,7 +76,9 @@ def listen_to_audio(

    for camera in config.cameras.values():
        if camera.enabled and camera.audio.enabled_in_config:
-            audio = AudioEventMaintainer(camera, process_info, stop_event)
+            audio = AudioEventMaintainer(
+                camera, process_info, stop_event, inter_process_communicator
+            )
            audio_threads.append(audio)
            audio.start()

@@ -144,11 +148,13 @@ class AudioEventMaintainer(threading.Thread):
        camera: CameraConfig,
        feature_metrics: dict[str, FeatureMetricsTypes],
        stop_event: mp.Event,
+        inter_process_communicator: InterProcessCommunicator,
    ) -> None:
        threading.Thread.__init__(self)
        self.name = f"{camera.name}_audio_event_processor"
        self.config = camera
        self.feature_metrics = feature_metrics
+        self.inter_process_communicator = inter_process_communicator
        self.detections: dict[dict[str, any]] = feature_metrics
        self.stop_event = stop_event
        self.detector = AudioTfl(stop_event)
@@ -169,7 +175,8 @@ class AudioEventMaintainer(threading.Thread):
        if not self.feature_metrics[self.config.name]["audio_enabled"].value:
            return

-        rms = np.sqrt(np.mean(np.absolute(np.square(audio.astype(np.float32)))))
+        audio_as_float = audio.astype(np.float32)
+        rms, _ = self.calculate_audio_levels(audio_as_float)

        # only run audio detection when volume is above min_volume
        if rms >= self.config.audio.min_volume:
@@ -184,6 +191,23 @@ class AudioEventMaintainer(threading.Thread):

        self.expire_detections()

+    def calculate_audio_levels(self, audio_as_float: np.float32) -> Tuple[float, float]:
+        # Calculate RMS (Root-Mean-Square) which represents the average signal amplitude
+        # Note: np.float32 isn't serializable, we must use np.float64 to publish the message
+        rms = np.sqrt(np.mean(np.absolute(audio_as_float**2)))
+
+        # Transform RMS to dBFS (decibels relative to full scale)
+        dBFS = 20 * np.log10(np.abs(rms) / AUDIO_MAX_BIT_RANGE)
+
+        self.inter_process_communicator.queue.put(
+            (f"{self.config.name}/audio/dBFS", float(dBFS))
+        )
+        self.inter_process_communicator.queue.put(
+            (f"{self.config.name}/audio/rms", float(rms))
+        )
+
+        return float(rms), float(dBFS)
+
    def handle_detection(self, label: str, score: float) -> None:
        if self.detections.get(label):
            self.detections[label][