forked from Github/frigate
Improve Nvidia GPU stats (#14206)
* :Add support for nvidia driver info * Don't show temperature if detector isn't called coral * Add encoder and decoder info for Nvidia GPUs * Fix device info * Implement GPU info for nvidia GPU * Update web/src/views/system/GeneralMetrics.tsx Co-authored-by: Josh Hawkins <32435876+hawkeye217@users.noreply.github.com> * Update web/src/views/system/GeneralMetrics.tsx Co-authored-by: Josh Hawkins <32435876+hawkeye217@users.noreply.github.com> --------- Co-authored-by: Josh Hawkins <32435876+hawkeye217@users.noreply.github.com>
This commit is contained in:
@@ -28,7 +28,12 @@ from frigate.util.builtin import (
|
||||
get_tz_modifiers,
|
||||
update_yaml_from_url,
|
||||
)
|
||||
from frigate.util.services import ffprobe_stream, restart_frigate, vainfo_hwaccel
|
||||
from frigate.util.services import (
|
||||
ffprobe_stream,
|
||||
get_nvidia_driver_info,
|
||||
restart_frigate,
|
||||
vainfo_hwaccel,
|
||||
)
|
||||
from frigate.version import VERSION
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -382,6 +387,11 @@ def vainfo():
|
||||
)
|
||||
|
||||
|
||||
@router.get("/nvinfo")
|
||||
def nvinfo():
|
||||
return JSONResponse(content=get_nvidia_driver_info())
|
||||
|
||||
|
||||
@router.get("/logs/{service}", tags=[Tags.logs])
|
||||
def logs(
|
||||
service: str = Path(enum=["frigate", "nginx", "go2rtc"]),
|
||||
|
||||
@@ -339,7 +339,10 @@ def get_intel_gpu_stats() -> dict[str, str]:
|
||||
|
||||
def try_get_info(f, h, default="N/A"):
|
||||
try:
|
||||
v = f(h)
|
||||
if h:
|
||||
v = f(h)
|
||||
else:
|
||||
v = f()
|
||||
except nvml.NVMLError_NotSupported:
|
||||
v = default
|
||||
return v
|
||||
@@ -356,6 +359,8 @@ def get_nvidia_gpu_stats() -> dict[int, dict]:
|
||||
util = try_get_info(nvml.nvmlDeviceGetUtilizationRates, handle)
|
||||
enc = try_get_info(nvml.nvmlDeviceGetEncoderUtilization, handle)
|
||||
dec = try_get_info(nvml.nvmlDeviceGetDecoderUtilization, handle)
|
||||
pstate = try_get_info(nvml.nvmlDeviceGetPowerState, handle, default=None)
|
||||
|
||||
if util != "N/A":
|
||||
gpu_util = util.gpu
|
||||
else:
|
||||
@@ -382,6 +387,7 @@ def get_nvidia_gpu_stats() -> dict[int, dict]:
|
||||
"mem": gpu_mem_util,
|
||||
"enc": enc_util,
|
||||
"dec": dec_util,
|
||||
"pstate": pstate or "unknown",
|
||||
}
|
||||
except Exception:
|
||||
pass
|
||||
@@ -432,6 +438,31 @@ def vainfo_hwaccel(device_name: Optional[str] = None) -> sp.CompletedProcess:
|
||||
return sp.run(ffprobe_cmd, capture_output=True)
|
||||
|
||||
|
||||
def get_nvidia_driver_info() -> dict[str, any]:
|
||||
"""Get general hardware info for nvidia GPU."""
|
||||
results = {}
|
||||
try:
|
||||
nvml.nvmlInit()
|
||||
deviceCount = nvml.nvmlDeviceGetCount()
|
||||
for i in range(deviceCount):
|
||||
handle = nvml.nvmlDeviceGetHandleByIndex(i)
|
||||
driver = try_get_info(nvml.nvmlSystemGetDriverVersion, None, default=None)
|
||||
cuda_compute = try_get_info(
|
||||
nvml.nvmlDeviceGetCudaComputeCapability, handle, default=None
|
||||
)
|
||||
vbios = try_get_info(nvml.nvmlDeviceGetVbiosVersion, handle, default=None)
|
||||
results[i] = {
|
||||
"name": nvml.nvmlDeviceGetName(handle),
|
||||
"driver": driver or "unknown",
|
||||
"cuda_compute": cuda_compute or "unknown",
|
||||
"vbios": vbios or "unknown",
|
||||
}
|
||||
except Exception:
|
||||
pass
|
||||
finally:
|
||||
return results
|
||||
|
||||
|
||||
def auto_detect_hwaccel() -> str:
|
||||
"""Detect hwaccel args by default."""
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user