Nvidia Jetson ffmpeg + TensorRT support (#6458)

* Non-Jetson changes

Required for later commits:
- Allow base image to be overridden (and don't assume its WORKDIR)
- Ensure python3.9
- Map hwaccel decode presets as strings instead of lists
Not required:
- Fix existing documentation
- Simplify hwaccel scale logic

* Prepare for multi-arch tensorrt build

* Add tensorrt images for Jetson boards

* Add Jetson ffmpeg hwaccel

* Update docs

* Add CODEOWNERS

* CI

* Change default model from yolov7-tiny-416 to yolov7-320

In my experience the tiny models perform markedly worse without being
much faster

* fixup! Update docs
This commit is contained in:
Andrew Reiter
2023-07-26 06:50:41 -04:00
committed by GitHub
parent 680198148b
commit a96a951e23
28 changed files with 567 additions and 139 deletions

View File

@@ -804,9 +804,19 @@ class CameraConfig(FrigateBaseModel):
ffmpeg_input.global_args or self.ffmpeg.global_args
)
hwaccel_args = get_ffmpeg_arg_list(
parse_preset_hardware_acceleration_decode(ffmpeg_input.hwaccel_args)
parse_preset_hardware_acceleration_decode(
ffmpeg_input.hwaccel_args,
self.detect.fps,
self.detect.width,
self.detect.height,
)
or ffmpeg_input.hwaccel_args
or parse_preset_hardware_acceleration_decode(self.ffmpeg.hwaccel_args)
or parse_preset_hardware_acceleration_decode(
self.ffmpeg.hwaccel_args,
self.detect.fps,
self.detect.width,
self.detect.height,
)
or self.ffmpeg.hwaccel_args
)
input_args = get_ffmpeg_arg_list(

View File

@@ -55,58 +55,16 @@ _user_agent_args = [
]
PRESETS_HW_ACCEL_DECODE = {
"preset-rpi-32-h264": ["-c:v:1", "h264_v4l2m2m"],
"preset-rpi-64-h264": ["-c:v:1", "h264_v4l2m2m"],
"preset-vaapi": [
"-hwaccel_flags",
"allow_profile_mismatch",
"-hwaccel",
"vaapi",
"-hwaccel_device",
_gpu_selector.get_selected_gpu(),
"-hwaccel_output_format",
"vaapi",
],
"preset-intel-qsv-h264": [
"-hwaccel",
"qsv",
"-qsv_device",
_gpu_selector.get_selected_gpu(),
"-hwaccel_output_format",
"qsv",
"-c:v",
"h264_qsv",
],
"preset-intel-qsv-h265": [
"-load_plugin",
"hevc_hw",
"-hwaccel",
"qsv",
"-qsv_device",
_gpu_selector.get_selected_gpu(),
"-hwaccel_output_format",
"qsv",
"-c:v",
"hevc_qsv",
],
"preset-nvidia-h264": [
"-hwaccel",
"cuda",
"-hwaccel_output_format",
"cuda",
],
"preset-nvidia-h265": [
"-hwaccel",
"cuda",
"-hwaccel_output_format",
"cuda",
],
"preset-nvidia-mjpeg": [
"-hwaccel",
"cuda",
"-hwaccel_output_format",
"cuda",
],
"preset-rpi-32-h264": "-c:v:1 h264_v4l2m2m",
"preset-rpi-64-h264": "-c:v:1 h264_v4l2m2m",
"preset-vaapi": f"-hwaccel_flags allow_profile_mismatch -hwaccel vaapi -hwaccel_devic {_gpu_selector.get_selected_gpu()} -hwaccel_output_format vaapi",
"preset-intel-qsv-h264": f"-hwaccel qsv -qsv_devic {_gpu_selector.get_selected_gpu()} -hwaccel_output_format qsv -c:v h264_qsv",
"preset-intel-qsv-h265": f"-load_plugin hevc_hw -hwaccel qsv -qsv_device {_gpu_selector.get_selected_gpu()} -hwaccel_output_format qsv -c:v hevc_qsv",
"preset-nvidia-h264": "-hwaccel cuda -hwaccel_output_format cuda",
"preset-nvidia-h265": "-hwaccel cuda -hwaccel_output_format cuda",
"preset-nvidia-mjpeg": "-hwaccel cuda -hwaccel_output_format cuda",
"preset-jetson-h264": "-c:v h264_nvmpi -resize {1}x{2}",
"preset-jetson-h265": "-c:v hevc_nvmpi -resize {1}x{2}",
}
PRESETS_HW_ACCEL_SCALE = {
@@ -117,6 +75,8 @@ PRESETS_HW_ACCEL_SCALE = {
"preset-intel-qsv-h265": "-r {0} -vf vpp_qsv=framerate={0}:w={1}:h={2}:format=nv12,hwdownload,format=nv12,format=yuv420p",
"preset-nvidia-h264": "-r {0} -vf fps={0},scale_cuda=w={1}:h={2}:format=nv12,hwdownload,format=nv12,format=yuv420p",
"preset-nvidia-h265": "-r {0} -vf fps={0},scale_cuda=w={1}:h={2}:format=nv12,hwdownload,format=nv12,format=yuv420p",
"preset-jetson-h264": "-r {0}", # scaled in decoder
"preset-jetson-h265": "-r {0}", # scaled in decoder
"default": "-r {0} -vf fps={0},scale={1}:{2}",
}
@@ -128,6 +88,8 @@ PRESETS_HW_ACCEL_ENCODE_BIRDSEYE = {
"preset-intel-qsv-h265": "ffmpeg -hide_banner {0} -c:v h264_qsv -g 50 -bf 0 -profile:v high -level:v 4.1 -async_depth:v 1 {1}",
"preset-nvidia-h264": "ffmpeg -hide_banner {0} -c:v h264_nvenc -g 50 -profile:v high -level:v auto -preset:v p2 -tune:v ll {1}",
"preset-nvidia-h265": "ffmpeg -hide_banner {0} -c:v h264_nvenc -g 50 -profile:v high -level:v auto -preset:v p2 -tune:v ll {1}",
"preset-jetson-h264": "ffmpeg -hide_banner {0} -c:v h264_nvmpi -profile high {1}",
"preset-jetson-h265": "ffmpeg -hide_banner {0} -c:v h264_nvmpi -profile high {1}",
"default": "ffmpeg -hide_banner {0} -c:v libx264 -g 50 -profile:v high -level:v 4.1 -preset:v superfast -tune:v zerolatency {1}",
}
@@ -139,16 +101,28 @@ PRESETS_HW_ACCEL_ENCODE_TIMELAPSE = {
"preset-intel-qsv-h265": "ffmpeg -hide_banner {0} -c:v hevc_qsv -g 50 -bf 0 -profile:v high -level:v 4.1 -async_depth:v 1 {1}",
"preset-nvidia-h264": "ffmpeg -hide_banner -hwaccel cuda -hwaccel_output_format cuda -extra_hw_frames 8 {0} -c:v h264_nvenc {1}",
"preset-nvidia-h265": "ffmpeg -hide_banner -hwaccel cuda -hwaccel_output_format cuda -extra_hw_frames 8 {0} -c:v hevc_nvenc {1}",
"preset-jetson-h264": "ffmpeg -hide_banner {0} -c:v h264_nvmpi -profile high {1}",
"preset-jetson-h265": "ffmpeg -hide_banner {0} -c:v hevc_nvmpi -profile high {1}",
"default": "ffmpeg -hide_banner {0} -c:v libx264 -preset:v ultrafast -tune:v zerolatency {1}",
}
def parse_preset_hardware_acceleration_decode(arg: Any) -> list[str]:
def parse_preset_hardware_acceleration_decode(
arg: Any,
fps: int,
width: int,
height: int,
) -> list[str]:
"""Return the correct preset if in preset format otherwise return None."""
if not isinstance(arg, str):
return None
return PRESETS_HW_ACCEL_DECODE.get(arg, None)
decode = PRESETS_HW_ACCEL_DECODE.get(arg, None)
if not decode:
return None
return decode.format(fps, width, height).split(" ")
def parse_preset_hardware_acceleration_scale(
@@ -160,20 +134,13 @@ def parse_preset_hardware_acceleration_scale(
) -> list[str]:
"""Return the correct scaling preset or default preset if none is set."""
if not isinstance(arg, str) or " " in arg:
scale = PRESETS_HW_ACCEL_SCALE["default"].format(fps, width, height).split(" ")
scale.extend(detect_args)
return scale
scale = PRESETS_HW_ACCEL_SCALE.get(arg, "")
if scale:
scale = scale.format(fps, width, height).split(" ")
scale.extend(detect_args)
return scale
scale = PRESETS_HW_ACCEL_SCALE["default"]
else:
scale = scale.format(fps, width, height).split(" ")
scale.extend(detect_args)
return scale
scale = PRESETS_HW_ACCEL_SCALE.get(arg, "")
scale = scale.format(fps, width, height).split(" ")
scale.extend(detect_args)
return scale
class EncodeTypeEnum(str, Enum):
@@ -193,6 +160,10 @@ def parse_preset_hardware_acceleration_encode(
if not isinstance(arg, str):
return arg_map["default"].format(input, output)
# Not all jetsons have HW encoders, so fall back to default SW encoder if not
if arg.startswith("preset-jetson-") and not os.path.exists("/dev/nvhost-msenc"):
arg = "default"
return arg_map.get(arg, arg_map["default"]).format(
input,
output,

View File

@@ -22,6 +22,7 @@ from frigate.util.services import (
get_bandwidth_stats,
get_cpu_stats,
get_intel_gpu_stats,
get_jetson_stats,
get_nvidia_gpu_stats,
)
from frigate.version import VERSION
@@ -180,6 +181,15 @@ async def set_gpu_stats(
else:
stats["nvidia-gpu"] = {"gpu": -1, "mem": -1}
hwaccel_errors.append(args)
elif "nvmpi" in args or "jetson" in args:
# nvidia Jetson
jetson_usage = get_jetson_stats()
if jetson_usage:
stats["jetson-gpu"] = jetson_usage
else:
stats["jetson-gpu"] = {"gpu": -1, "mem": -1}
hwaccel_errors.append(args)
elif "qsv" in args:
if not config.telemetry.stats.intel_gpu_stats:
continue

View File

@@ -309,6 +309,21 @@ def get_nvidia_gpu_stats() -> dict[int, dict]:
return results
def get_jetson_stats() -> dict[int, dict]:
results = {}
try:
results["mem"] = "-" # no discrete gpu memory
with open("/sys/devices/gpu.0/load", "r") as f:
gpuload = float(f.readline()) / 10
results["gpu"] = f"{gpuload}%"
except Exception:
return None
return results
def ffprobe_stream(path: str) -> sp.CompletedProcess:
"""Run ffprobe on stream."""
clean_path = escape_special_characters(path)