forked from Github/frigate
Expose dBFS when doing audio analysis (#6979)
* Expose dBFS when doing audio analysis * Implement metadata communicator * revert test changes * Reverting the tests changes. For real this time * Address feedback * Address feedback * Address feedback * Address feedback
This commit is contained in:
@@ -7,12 +7,13 @@ import os
|
||||
import signal
|
||||
import threading
|
||||
from types import FrameType
|
||||
from typing import Optional
|
||||
from typing import Optional, Tuple
|
||||
|
||||
import numpy as np
|
||||
import requests
|
||||
from setproctitle import setproctitle
|
||||
|
||||
from frigate.comms.inter_process import InterProcessCommunicator
|
||||
from frigate.config import CameraConfig, FrigateConfig
|
||||
from frigate.const import (
|
||||
AUDIO_DURATION,
|
||||
@@ -51,6 +52,7 @@ def get_ffmpeg_command(input_args: list[str], input_path: str, pipe: str) -> lis
|
||||
def listen_to_audio(
|
||||
config: FrigateConfig,
|
||||
process_info: dict[str, FeatureMetricsTypes],
|
||||
inter_process_communicator: InterProcessCommunicator,
|
||||
) -> None:
|
||||
stop_event = mp.Event()
|
||||
audio_threads: list[threading.Thread] = []
|
||||
@@ -74,7 +76,9 @@ def listen_to_audio(
|
||||
|
||||
for camera in config.cameras.values():
|
||||
if camera.enabled and camera.audio.enabled_in_config:
|
||||
audio = AudioEventMaintainer(camera, process_info, stop_event)
|
||||
audio = AudioEventMaintainer(
|
||||
camera, process_info, stop_event, inter_process_communicator
|
||||
)
|
||||
audio_threads.append(audio)
|
||||
audio.start()
|
||||
|
||||
@@ -144,11 +148,13 @@ class AudioEventMaintainer(threading.Thread):
|
||||
camera: CameraConfig,
|
||||
feature_metrics: dict[str, FeatureMetricsTypes],
|
||||
stop_event: mp.Event,
|
||||
inter_process_communicator: InterProcessCommunicator,
|
||||
) -> None:
|
||||
threading.Thread.__init__(self)
|
||||
self.name = f"{camera.name}_audio_event_processor"
|
||||
self.config = camera
|
||||
self.feature_metrics = feature_metrics
|
||||
self.inter_process_communicator = inter_process_communicator
|
||||
self.detections: dict[dict[str, any]] = feature_metrics
|
||||
self.stop_event = stop_event
|
||||
self.detector = AudioTfl(stop_event)
|
||||
@@ -169,7 +175,8 @@ class AudioEventMaintainer(threading.Thread):
|
||||
if not self.feature_metrics[self.config.name]["audio_enabled"].value:
|
||||
return
|
||||
|
||||
rms = np.sqrt(np.mean(np.absolute(np.square(audio.astype(np.float32)))))
|
||||
audio_as_float = audio.astype(np.float32)
|
||||
rms, _ = self.calculate_audio_levels(audio_as_float)
|
||||
|
||||
# only run audio detection when volume is above min_volume
|
||||
if rms >= self.config.audio.min_volume:
|
||||
@@ -184,6 +191,23 @@ class AudioEventMaintainer(threading.Thread):
|
||||
|
||||
self.expire_detections()
|
||||
|
||||
def calculate_audio_levels(self, audio_as_float: np.float32) -> Tuple[float, float]:
|
||||
# Calculate RMS (Root-Mean-Square) which represents the average signal amplitude
|
||||
# Note: np.float32 isn't serializable, we must use np.float64 to publish the message
|
||||
rms = np.sqrt(np.mean(np.absolute(audio_as_float**2)))
|
||||
|
||||
# Transform RMS to dBFS (decibels relative to full scale)
|
||||
dBFS = 20 * np.log10(np.abs(rms) / AUDIO_MAX_BIT_RANGE)
|
||||
|
||||
self.inter_process_communicator.queue.put(
|
||||
(f"{self.config.name}/audio/dBFS", float(dBFS))
|
||||
)
|
||||
self.inter_process_communicator.queue.put(
|
||||
(f"{self.config.name}/audio/rms", float(rms))
|
||||
)
|
||||
|
||||
return float(rms), float(dBFS)
|
||||
|
||||
def handle_detection(self, label: str, score: float) -> None:
|
||||
if self.detections.get(label):
|
||||
self.detections[label][
|
||||
|
||||
Reference in New Issue
Block a user