forked from Github/frigate
Nvidia Jetson ffmpeg + TensorRT support (#6458)
* Non-Jetson changes Required for later commits: - Allow base image to be overridden (and don't assume its WORKDIR) - Ensure python3.9 - Map hwaccel decode presets as strings instead of lists Not required: - Fix existing documentation - Simplify hwaccel scale logic * Prepare for multi-arch tensorrt build * Add tensorrt images for Jetson boards * Add Jetson ffmpeg hwaccel * Update docs * Add CODEOWNERS * CI * Change default model from yolov7-tiny-416 to yolov7-320 In my experience the tiny models perform markedly worse without being much faster * fixup! Update docs
This commit is contained in:
@@ -804,9 +804,19 @@ class CameraConfig(FrigateBaseModel):
|
||||
ffmpeg_input.global_args or self.ffmpeg.global_args
|
||||
)
|
||||
hwaccel_args = get_ffmpeg_arg_list(
|
||||
parse_preset_hardware_acceleration_decode(ffmpeg_input.hwaccel_args)
|
||||
parse_preset_hardware_acceleration_decode(
|
||||
ffmpeg_input.hwaccel_args,
|
||||
self.detect.fps,
|
||||
self.detect.width,
|
||||
self.detect.height,
|
||||
)
|
||||
or ffmpeg_input.hwaccel_args
|
||||
or parse_preset_hardware_acceleration_decode(self.ffmpeg.hwaccel_args)
|
||||
or parse_preset_hardware_acceleration_decode(
|
||||
self.ffmpeg.hwaccel_args,
|
||||
self.detect.fps,
|
||||
self.detect.width,
|
||||
self.detect.height,
|
||||
)
|
||||
or self.ffmpeg.hwaccel_args
|
||||
)
|
||||
input_args = get_ffmpeg_arg_list(
|
||||
|
||||
@@ -55,58 +55,16 @@ _user_agent_args = [
|
||||
]
|
||||
|
||||
PRESETS_HW_ACCEL_DECODE = {
|
||||
"preset-rpi-32-h264": ["-c:v:1", "h264_v4l2m2m"],
|
||||
"preset-rpi-64-h264": ["-c:v:1", "h264_v4l2m2m"],
|
||||
"preset-vaapi": [
|
||||
"-hwaccel_flags",
|
||||
"allow_profile_mismatch",
|
||||
"-hwaccel",
|
||||
"vaapi",
|
||||
"-hwaccel_device",
|
||||
_gpu_selector.get_selected_gpu(),
|
||||
"-hwaccel_output_format",
|
||||
"vaapi",
|
||||
],
|
||||
"preset-intel-qsv-h264": [
|
||||
"-hwaccel",
|
||||
"qsv",
|
||||
"-qsv_device",
|
||||
_gpu_selector.get_selected_gpu(),
|
||||
"-hwaccel_output_format",
|
||||
"qsv",
|
||||
"-c:v",
|
||||
"h264_qsv",
|
||||
],
|
||||
"preset-intel-qsv-h265": [
|
||||
"-load_plugin",
|
||||
"hevc_hw",
|
||||
"-hwaccel",
|
||||
"qsv",
|
||||
"-qsv_device",
|
||||
_gpu_selector.get_selected_gpu(),
|
||||
"-hwaccel_output_format",
|
||||
"qsv",
|
||||
"-c:v",
|
||||
"hevc_qsv",
|
||||
],
|
||||
"preset-nvidia-h264": [
|
||||
"-hwaccel",
|
||||
"cuda",
|
||||
"-hwaccel_output_format",
|
||||
"cuda",
|
||||
],
|
||||
"preset-nvidia-h265": [
|
||||
"-hwaccel",
|
||||
"cuda",
|
||||
"-hwaccel_output_format",
|
||||
"cuda",
|
||||
],
|
||||
"preset-nvidia-mjpeg": [
|
||||
"-hwaccel",
|
||||
"cuda",
|
||||
"-hwaccel_output_format",
|
||||
"cuda",
|
||||
],
|
||||
"preset-rpi-32-h264": "-c:v:1 h264_v4l2m2m",
|
||||
"preset-rpi-64-h264": "-c:v:1 h264_v4l2m2m",
|
||||
"preset-vaapi": f"-hwaccel_flags allow_profile_mismatch -hwaccel vaapi -hwaccel_devic {_gpu_selector.get_selected_gpu()} -hwaccel_output_format vaapi",
|
||||
"preset-intel-qsv-h264": f"-hwaccel qsv -qsv_devic {_gpu_selector.get_selected_gpu()} -hwaccel_output_format qsv -c:v h264_qsv",
|
||||
"preset-intel-qsv-h265": f"-load_plugin hevc_hw -hwaccel qsv -qsv_device {_gpu_selector.get_selected_gpu()} -hwaccel_output_format qsv -c:v hevc_qsv",
|
||||
"preset-nvidia-h264": "-hwaccel cuda -hwaccel_output_format cuda",
|
||||
"preset-nvidia-h265": "-hwaccel cuda -hwaccel_output_format cuda",
|
||||
"preset-nvidia-mjpeg": "-hwaccel cuda -hwaccel_output_format cuda",
|
||||
"preset-jetson-h264": "-c:v h264_nvmpi -resize {1}x{2}",
|
||||
"preset-jetson-h265": "-c:v hevc_nvmpi -resize {1}x{2}",
|
||||
}
|
||||
|
||||
PRESETS_HW_ACCEL_SCALE = {
|
||||
@@ -117,6 +75,8 @@ PRESETS_HW_ACCEL_SCALE = {
|
||||
"preset-intel-qsv-h265": "-r {0} -vf vpp_qsv=framerate={0}:w={1}:h={2}:format=nv12,hwdownload,format=nv12,format=yuv420p",
|
||||
"preset-nvidia-h264": "-r {0} -vf fps={0},scale_cuda=w={1}:h={2}:format=nv12,hwdownload,format=nv12,format=yuv420p",
|
||||
"preset-nvidia-h265": "-r {0} -vf fps={0},scale_cuda=w={1}:h={2}:format=nv12,hwdownload,format=nv12,format=yuv420p",
|
||||
"preset-jetson-h264": "-r {0}", # scaled in decoder
|
||||
"preset-jetson-h265": "-r {0}", # scaled in decoder
|
||||
"default": "-r {0} -vf fps={0},scale={1}:{2}",
|
||||
}
|
||||
|
||||
@@ -128,6 +88,8 @@ PRESETS_HW_ACCEL_ENCODE_BIRDSEYE = {
|
||||
"preset-intel-qsv-h265": "ffmpeg -hide_banner {0} -c:v h264_qsv -g 50 -bf 0 -profile:v high -level:v 4.1 -async_depth:v 1 {1}",
|
||||
"preset-nvidia-h264": "ffmpeg -hide_banner {0} -c:v h264_nvenc -g 50 -profile:v high -level:v auto -preset:v p2 -tune:v ll {1}",
|
||||
"preset-nvidia-h265": "ffmpeg -hide_banner {0} -c:v h264_nvenc -g 50 -profile:v high -level:v auto -preset:v p2 -tune:v ll {1}",
|
||||
"preset-jetson-h264": "ffmpeg -hide_banner {0} -c:v h264_nvmpi -profile high {1}",
|
||||
"preset-jetson-h265": "ffmpeg -hide_banner {0} -c:v h264_nvmpi -profile high {1}",
|
||||
"default": "ffmpeg -hide_banner {0} -c:v libx264 -g 50 -profile:v high -level:v 4.1 -preset:v superfast -tune:v zerolatency {1}",
|
||||
}
|
||||
|
||||
@@ -139,16 +101,28 @@ PRESETS_HW_ACCEL_ENCODE_TIMELAPSE = {
|
||||
"preset-intel-qsv-h265": "ffmpeg -hide_banner {0} -c:v hevc_qsv -g 50 -bf 0 -profile:v high -level:v 4.1 -async_depth:v 1 {1}",
|
||||
"preset-nvidia-h264": "ffmpeg -hide_banner -hwaccel cuda -hwaccel_output_format cuda -extra_hw_frames 8 {0} -c:v h264_nvenc {1}",
|
||||
"preset-nvidia-h265": "ffmpeg -hide_banner -hwaccel cuda -hwaccel_output_format cuda -extra_hw_frames 8 {0} -c:v hevc_nvenc {1}",
|
||||
"preset-jetson-h264": "ffmpeg -hide_banner {0} -c:v h264_nvmpi -profile high {1}",
|
||||
"preset-jetson-h265": "ffmpeg -hide_banner {0} -c:v hevc_nvmpi -profile high {1}",
|
||||
"default": "ffmpeg -hide_banner {0} -c:v libx264 -preset:v ultrafast -tune:v zerolatency {1}",
|
||||
}
|
||||
|
||||
|
||||
def parse_preset_hardware_acceleration_decode(arg: Any) -> list[str]:
|
||||
def parse_preset_hardware_acceleration_decode(
|
||||
arg: Any,
|
||||
fps: int,
|
||||
width: int,
|
||||
height: int,
|
||||
) -> list[str]:
|
||||
"""Return the correct preset if in preset format otherwise return None."""
|
||||
if not isinstance(arg, str):
|
||||
return None
|
||||
|
||||
return PRESETS_HW_ACCEL_DECODE.get(arg, None)
|
||||
decode = PRESETS_HW_ACCEL_DECODE.get(arg, None)
|
||||
|
||||
if not decode:
|
||||
return None
|
||||
|
||||
return decode.format(fps, width, height).split(" ")
|
||||
|
||||
|
||||
def parse_preset_hardware_acceleration_scale(
|
||||
@@ -160,20 +134,13 @@ def parse_preset_hardware_acceleration_scale(
|
||||
) -> list[str]:
|
||||
"""Return the correct scaling preset or default preset if none is set."""
|
||||
if not isinstance(arg, str) or " " in arg:
|
||||
scale = PRESETS_HW_ACCEL_SCALE["default"].format(fps, width, height).split(" ")
|
||||
scale.extend(detect_args)
|
||||
return scale
|
||||
|
||||
scale = PRESETS_HW_ACCEL_SCALE.get(arg, "")
|
||||
|
||||
if scale:
|
||||
scale = scale.format(fps, width, height).split(" ")
|
||||
scale.extend(detect_args)
|
||||
return scale
|
||||
scale = PRESETS_HW_ACCEL_SCALE["default"]
|
||||
else:
|
||||
scale = scale.format(fps, width, height).split(" ")
|
||||
scale.extend(detect_args)
|
||||
return scale
|
||||
scale = PRESETS_HW_ACCEL_SCALE.get(arg, "")
|
||||
|
||||
scale = scale.format(fps, width, height).split(" ")
|
||||
scale.extend(detect_args)
|
||||
return scale
|
||||
|
||||
|
||||
class EncodeTypeEnum(str, Enum):
|
||||
@@ -193,6 +160,10 @@ def parse_preset_hardware_acceleration_encode(
|
||||
if not isinstance(arg, str):
|
||||
return arg_map["default"].format(input, output)
|
||||
|
||||
# Not all jetsons have HW encoders, so fall back to default SW encoder if not
|
||||
if arg.startswith("preset-jetson-") and not os.path.exists("/dev/nvhost-msenc"):
|
||||
arg = "default"
|
||||
|
||||
return arg_map.get(arg, arg_map["default"]).format(
|
||||
input,
|
||||
output,
|
||||
|
||||
@@ -22,6 +22,7 @@ from frigate.util.services import (
|
||||
get_bandwidth_stats,
|
||||
get_cpu_stats,
|
||||
get_intel_gpu_stats,
|
||||
get_jetson_stats,
|
||||
get_nvidia_gpu_stats,
|
||||
)
|
||||
from frigate.version import VERSION
|
||||
@@ -180,6 +181,15 @@ async def set_gpu_stats(
|
||||
else:
|
||||
stats["nvidia-gpu"] = {"gpu": -1, "mem": -1}
|
||||
hwaccel_errors.append(args)
|
||||
elif "nvmpi" in args or "jetson" in args:
|
||||
# nvidia Jetson
|
||||
jetson_usage = get_jetson_stats()
|
||||
|
||||
if jetson_usage:
|
||||
stats["jetson-gpu"] = jetson_usage
|
||||
else:
|
||||
stats["jetson-gpu"] = {"gpu": -1, "mem": -1}
|
||||
hwaccel_errors.append(args)
|
||||
elif "qsv" in args:
|
||||
if not config.telemetry.stats.intel_gpu_stats:
|
||||
continue
|
||||
|
||||
@@ -309,6 +309,21 @@ def get_nvidia_gpu_stats() -> dict[int, dict]:
|
||||
return results
|
||||
|
||||
|
||||
def get_jetson_stats() -> dict[int, dict]:
|
||||
results = {}
|
||||
|
||||
try:
|
||||
results["mem"] = "-" # no discrete gpu memory
|
||||
|
||||
with open("/sys/devices/gpu.0/load", "r") as f:
|
||||
gpuload = float(f.readline()) / 10
|
||||
results["gpu"] = f"{gpuload}%"
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def ffprobe_stream(path: str) -> sp.CompletedProcess:
|
||||
"""Run ffprobe on stream."""
|
||||
clean_path = escape_special_characters(path)
|
||||
|
||||
Reference in New Issue
Block a user