Nvidia Jetson ffmpeg + TensorRT support (#6458)

* Non-Jetson changes Required for later commits: - Allow base image to be overridden (and don't assume its WORKDIR) - Ensure python3.9 - Map hwaccel decode presets as strings instead of lists Not required: - Fix existing documentation - Simplify hwaccel scale logic * Prepare for multi-arch tensorrt build * Add tensorrt images for Jetson boards * Add Jetson ffmpeg hwaccel * Update docs * Add CODEOWNERS * CI * Change default model from yolov7-tiny-416 to yolov7-320 In my experience the tiny models perform markedly worse without being much faster * fixup! Update docs
2023-07-26 06:50:41 -04:00
parent 680198148b
commit a96a951e23
28 changed files with 567 additions and 139 deletions
--- a/frigate/config.py
+++ b/frigate/config.py
@@ -804,9 +804,19 @@ class CameraConfig(FrigateBaseModel):
            ffmpeg_input.global_args or self.ffmpeg.global_args
        )
        hwaccel_args = get_ffmpeg_arg_list(
-            parse_preset_hardware_acceleration_decode(ffmpeg_input.hwaccel_args)
+            parse_preset_hardware_acceleration_decode(
+                ffmpeg_input.hwaccel_args,
+                self.detect.fps,
+                self.detect.width,
+                self.detect.height,
+            )
            or ffmpeg_input.hwaccel_args
-            or parse_preset_hardware_acceleration_decode(self.ffmpeg.hwaccel_args)
+            or parse_preset_hardware_acceleration_decode(
+                self.ffmpeg.hwaccel_args,
+                self.detect.fps,
+                self.detect.width,
+                self.detect.height,
+            )
            or self.ffmpeg.hwaccel_args
        )
        input_args = get_ffmpeg_arg_list(
--- a/frigate/ffmpeg_presets.py
+++ b/frigate/ffmpeg_presets.py
@@ -55,58 +55,16 @@ _user_agent_args = [
 ]

 PRESETS_HW_ACCEL_DECODE = {
-    "preset-rpi-32-h264": ["-c:v:1", "h264_v4l2m2m"],
-    "preset-rpi-64-h264": ["-c:v:1", "h264_v4l2m2m"],
-    "preset-vaapi": [
-        "-hwaccel_flags",
-        "allow_profile_mismatch",
-        "-hwaccel",
-        "vaapi",
-        "-hwaccel_device",
-        _gpu_selector.get_selected_gpu(),
-        "-hwaccel_output_format",
-        "vaapi",
-    ],
-    "preset-intel-qsv-h264": [
-        "-hwaccel",
-        "qsv",
-        "-qsv_device",
-        _gpu_selector.get_selected_gpu(),
-        "-hwaccel_output_format",
-        "qsv",
-        "-c:v",
-        "h264_qsv",
-    ],
-    "preset-intel-qsv-h265": [
-        "-load_plugin",
-        "hevc_hw",
-        "-hwaccel",
-        "qsv",
-        "-qsv_device",
-        _gpu_selector.get_selected_gpu(),
-        "-hwaccel_output_format",
-        "qsv",
-        "-c:v",
-        "hevc_qsv",
-    ],
-    "preset-nvidia-h264": [
-        "-hwaccel",
-        "cuda",
-        "-hwaccel_output_format",
-        "cuda",
-    ],
-    "preset-nvidia-h265": [
-        "-hwaccel",
-        "cuda",
-        "-hwaccel_output_format",
-        "cuda",
-    ],
-    "preset-nvidia-mjpeg": [
-        "-hwaccel",
-        "cuda",
-        "-hwaccel_output_format",
-        "cuda",
-    ],
+    "preset-rpi-32-h264": "-c:v:1 h264_v4l2m2m",
+    "preset-rpi-64-h264": "-c:v:1 h264_v4l2m2m",
+    "preset-vaapi": f"-hwaccel_flags allow_profile_mismatch -hwaccel vaapi -hwaccel_devic {_gpu_selector.get_selected_gpu()} -hwaccel_output_format vaapi",
+    "preset-intel-qsv-h264": f"-hwaccel qsv -qsv_devic {_gpu_selector.get_selected_gpu()} -hwaccel_output_format qsv -c:v h264_qsv",
+    "preset-intel-qsv-h265": f"-load_plugin hevc_hw -hwaccel qsv -qsv_device {_gpu_selector.get_selected_gpu()} -hwaccel_output_format qsv -c:v hevc_qsv",
+    "preset-nvidia-h264": "-hwaccel cuda -hwaccel_output_format cuda",
+    "preset-nvidia-h265": "-hwaccel cuda -hwaccel_output_format cuda",
+    "preset-nvidia-mjpeg": "-hwaccel cuda -hwaccel_output_format cuda",
+    "preset-jetson-h264": "-c:v h264_nvmpi -resize {1}x{2}",
+    "preset-jetson-h265": "-c:v hevc_nvmpi -resize {1}x{2}",
 }

 PRESETS_HW_ACCEL_SCALE = {
@@ -117,6 +75,8 @@ PRESETS_HW_ACCEL_SCALE = {
    "preset-intel-qsv-h265": "-r {0} -vf vpp_qsv=framerate={0}:w={1}:h={2}:format=nv12,hwdownload,format=nv12,format=yuv420p",
    "preset-nvidia-h264": "-r {0} -vf fps={0},scale_cuda=w={1}:h={2}:format=nv12,hwdownload,format=nv12,format=yuv420p",
    "preset-nvidia-h265": "-r {0} -vf fps={0},scale_cuda=w={1}:h={2}:format=nv12,hwdownload,format=nv12,format=yuv420p",
+    "preset-jetson-h264": "-r {0}",  # scaled in decoder
+    "preset-jetson-h265": "-r {0}",  # scaled in decoder
    "default": "-r {0} -vf fps={0},scale={1}:{2}",
 }

@@ -128,6 +88,8 @@ PRESETS_HW_ACCEL_ENCODE_BIRDSEYE = {
    "preset-intel-qsv-h265": "ffmpeg -hide_banner {0} -c:v h264_qsv -g 50 -bf 0 -profile:v high -level:v 4.1 -async_depth:v 1 {1}",
    "preset-nvidia-h264": "ffmpeg -hide_banner {0} -c:v h264_nvenc -g 50 -profile:v high -level:v auto -preset:v p2 -tune:v ll {1}",
    "preset-nvidia-h265": "ffmpeg -hide_banner {0} -c:v h264_nvenc -g 50 -profile:v high -level:v auto -preset:v p2 -tune:v ll {1}",
+    "preset-jetson-h264": "ffmpeg -hide_banner {0} -c:v h264_nvmpi -profile high {1}",
+    "preset-jetson-h265": "ffmpeg -hide_banner {0} -c:v h264_nvmpi -profile high {1}",
    "default": "ffmpeg -hide_banner {0} -c:v libx264 -g 50 -profile:v high -level:v 4.1 -preset:v superfast -tune:v zerolatency {1}",
 }

@@ -139,16 +101,28 @@ PRESETS_HW_ACCEL_ENCODE_TIMELAPSE = {
    "preset-intel-qsv-h265": "ffmpeg -hide_banner {0} -c:v hevc_qsv -g 50 -bf 0 -profile:v high -level:v 4.1 -async_depth:v 1 {1}",
    "preset-nvidia-h264": "ffmpeg -hide_banner -hwaccel cuda -hwaccel_output_format cuda -extra_hw_frames 8 {0} -c:v h264_nvenc {1}",
    "preset-nvidia-h265": "ffmpeg -hide_banner -hwaccel cuda -hwaccel_output_format cuda -extra_hw_frames 8 {0} -c:v hevc_nvenc {1}",
+    "preset-jetson-h264": "ffmpeg -hide_banner {0} -c:v h264_nvmpi -profile high {1}",
+    "preset-jetson-h265": "ffmpeg -hide_banner {0} -c:v hevc_nvmpi -profile high {1}",
    "default": "ffmpeg -hide_banner {0} -c:v libx264 -preset:v ultrafast -tune:v zerolatency {1}",
 }


-def parse_preset_hardware_acceleration_decode(arg: Any) -> list[str]:
+def parse_preset_hardware_acceleration_decode(
+    arg: Any,
+    fps: int,
+    width: int,
+    height: int,
+) -> list[str]:
    """Return the correct preset if in preset format otherwise return None."""
    if not isinstance(arg, str):
        return None

-    return PRESETS_HW_ACCEL_DECODE.get(arg, None)
+    decode = PRESETS_HW_ACCEL_DECODE.get(arg, None)
+
+    if not decode:
+        return None
+
+    return decode.format(fps, width, height).split(" ")


 def parse_preset_hardware_acceleration_scale(
@@ -160,20 +134,13 @@ def parse_preset_hardware_acceleration_scale(
 ) -> list[str]:
    """Return the correct scaling preset or default preset if none is set."""
    if not isinstance(arg, str) or " " in arg:
-        scale = PRESETS_HW_ACCEL_SCALE["default"].format(fps, width, height).split(" ")
-        scale.extend(detect_args)
-        return scale
-
-    scale = PRESETS_HW_ACCEL_SCALE.get(arg, "")
-
-    if scale:
-        scale = scale.format(fps, width, height).split(" ")
-        scale.extend(detect_args)
-        return scale
+        scale = PRESETS_HW_ACCEL_SCALE["default"]
    else:
-        scale = scale.format(fps, width, height).split(" ")
-        scale.extend(detect_args)
-        return scale
+        scale = PRESETS_HW_ACCEL_SCALE.get(arg, "")
+
+    scale = scale.format(fps, width, height).split(" ")
+    scale.extend(detect_args)
+    return scale


 class EncodeTypeEnum(str, Enum):
@@ -193,6 +160,10 @@ def parse_preset_hardware_acceleration_encode(
    if not isinstance(arg, str):
        return arg_map["default"].format(input, output)

+    # Not all jetsons have HW encoders, so fall back to default SW encoder if not
+    if arg.startswith("preset-jetson-") and not os.path.exists("/dev/nvhost-msenc"):
+        arg = "default"
+
    return arg_map.get(arg, arg_map["default"]).format(
        input,
        output,
--- a/frigate/stats.py
+++ b/frigate/stats.py
@@ -22,6 +22,7 @@ from frigate.util.services import (
    get_bandwidth_stats,
    get_cpu_stats,
    get_intel_gpu_stats,
+    get_jetson_stats,
    get_nvidia_gpu_stats,
 )
 from frigate.version import VERSION
@@ -180,6 +181,15 @@ async def set_gpu_stats(
            else:
                stats["nvidia-gpu"] = {"gpu": -1, "mem": -1}
                hwaccel_errors.append(args)
+        elif "nvmpi" in args or "jetson" in args:
+            # nvidia Jetson
+            jetson_usage = get_jetson_stats()
+
+            if jetson_usage:
+                stats["jetson-gpu"] = jetson_usage
+            else:
+                stats["jetson-gpu"] = {"gpu": -1, "mem": -1}
+                hwaccel_errors.append(args)
        elif "qsv" in args:
            if not config.telemetry.stats.intel_gpu_stats:
                continue
--- a/frigate/util/services.py
+++ b/frigate/util/services.py
@@ -309,6 +309,21 @@ def get_nvidia_gpu_stats() -> dict[int, dict]:
        return results


+def get_jetson_stats() -> dict[int, dict]:
+    results = {}
+
+    try:
+        results["mem"] = "-"  # no discrete gpu memory
+
+        with open("/sys/devices/gpu.0/load", "r") as f:
+            gpuload = float(f.readline()) / 10
+            results["gpu"] = f"{gpuload}%"
+    except Exception:
+        return None
+
+    return results
+
+
 def ffprobe_stream(path: str) -> sp.CompletedProcess:
    """Run ffprobe on stream."""
    clean_path = escape_special_characters(path)