Rocm yolonas (#13816)

* Implement ROCm detectors * Cleanup tensor input * Fixup image creation * Add support for yolonas in onnx * Get build working with onnx * Update docs and simplify config * Remove unused imports
2024-09-18 18:34:07 -06:00
parent efd1194307
commit 4515eb4637
14 changed files with 194 additions and 154 deletions
--- a/frigate/detectors/plugins/hailo8l.py
+++ b/frigate/detectors/plugins/hailo8l.py
@@ -24,7 +24,6 @@ from typing_extensions import Literal

 from frigate.detectors.detection_api import DetectionApi
 from frigate.detectors.detector_config import BaseDetectorConfig
-from frigate.detectors.util import preprocess  # Assuming this function is available

 # Set up logging
 logger = logging.getLogger(__name__)
@@ -146,17 +145,9 @@ class HailoDetector(DetectionApi):
                f"[detect_raw] Converted tensor_input to numpy array: shape {tensor_input.shape}"
            )

-        # Preprocess the tensor input using Frigate's preprocess function
-        processed_tensor = preprocess(
-            tensor_input, (1, self.h8l_model_height, self.h8l_model_width, 3), np.uint8
-        )
+        input_data = tensor_input
        logger.debug(
-            f"[detect_raw] Tensor data and shape after preprocessing: {processed_tensor} {processed_tensor.shape}"
-        )
-
-        input_data = processed_tensor
-        logger.debug(
-            f"[detect_raw] Input data for inference shape: {processed_tensor.shape}, dtype: {processed_tensor.dtype}"
+            f"[detect_raw] Input data for inference shape: {tensor_input.shape}, dtype: {tensor_input.dtype}"
        )

        try:
--- a/frigate/detectors/plugins/onnx.py
+++ b/frigate/detectors/plugins/onnx.py
@@ -1,7 +1,6 @@
 import logging
 import os

-import cv2
 import numpy as np
 from typing_extensions import Literal

@@ -9,7 +8,6 @@ from frigate.detectors.detection_api import DetectionApi
 from frigate.detectors.detector_config import (
    BaseDetectorConfig,
    ModelTypeEnum,
-    PixelFormatEnum,
 )

 logger = logging.getLogger(__name__)
@@ -73,24 +71,13 @@ class ONNXDetector(DetectionApi):
        self.w = detector_config.model.width
        self.onnx_model_type = detector_config.model.model_type
        self.onnx_model_px = detector_config.model.input_pixel_format
+        self.onnx_model_shape = detector_config.model.input_tensor
        path = detector_config.model.path

        logger.info(f"ONNX: {path} loaded")

    def detect_raw(self, tensor_input):
        model_input_name = self.model.get_inputs()[0].name
-        model_input_shape = self.model.get_inputs()[0].shape
-
-        # adjust input shape
-        if self.onnx_model_type == ModelTypeEnum.yolonas:
-            tensor_input = cv2.dnn.blobFromImage(
-                tensor_input[0],
-                1.0,
-                (model_input_shape[3], model_input_shape[2]),
-                None,
-                swapRB=self.onnx_model_px == PixelFormatEnum.bgr,
-            ).astype(np.uint8)
-
        tensor_output = self.model.run(None, {model_input_name: tensor_input})

        if self.onnx_model_type == ModelTypeEnum.yolonas:
--- a/frigate/detectors/plugins/rocm.py
+++ b/frigate/detectors/plugins/rocm.py
@@ -9,8 +9,10 @@ from pydantic import Field
 from typing_extensions import Literal

 from frigate.detectors.detection_api import DetectionApi
-from frigate.detectors.detector_config import BaseDetectorConfig
-from frigate.detectors.util import preprocess
+from frigate.detectors.detector_config import (
+    BaseDetectorConfig,
+    ModelTypeEnum,
+)

 logger = logging.getLogger(__name__)

@@ -74,7 +76,16 @@ class ROCmDetector(DetectionApi):
            logger.error("AMD/ROCm: module loading failed, missing ROCm environment?")
            raise

+        if detector_config.conserve_cpu:
+            logger.info("AMD/ROCm: switching HIP to blocking mode to conserve CPU")
+            ctypes.CDLL("/opt/rocm/lib/libamdhip64.so").hipSetDeviceFlags(4)
+
+        self.h = detector_config.model.height
+        self.w = detector_config.model.width
+        self.rocm_model_type = detector_config.model.model_type
+        self.rocm_model_px = detector_config.model.input_pixel_format
        path = detector_config.model.path
+
        mxr_path = os.path.splitext(path)[0] + ".mxr"
        if path.endswith(".mxr"):
            logger.info(f"AMD/ROCm: loading parsed model from {mxr_path}")
@@ -84,6 +95,7 @@ class ROCmDetector(DetectionApi):
            self.model = migraphx.load(mxr_path)
        else:
            logger.info(f"AMD/ROCm: loading model from {path}")
+
            if path.endswith(".onnx"):
                self.model = migraphx.parse_onnx(path)
            elif (
@@ -95,30 +107,51 @@ class ROCmDetector(DetectionApi):
                self.model = migraphx.parse_tf(path)
            else:
                raise Exception(f"AMD/ROCm: unknown model format {path}")
+
            logger.info("AMD/ROCm: compiling the model")
+
            self.model.compile(
                migraphx.get_target("gpu"), offload_copy=True, fast_math=True
            )
+
            logger.info(f"AMD/ROCm: saving parsed model into {mxr_path}")
+
            os.makedirs("/config/model_cache/rocm", exist_ok=True)
            migraphx.save(self.model, mxr_path)
+
        logger.info("AMD/ROCm: model loaded")

    def detect_raw(self, tensor_input):
        model_input_name = self.model.get_parameter_names()[0]
-        model_input_shape = tuple(
-            self.model.get_parameter_shapes()[model_input_name].lens()
-        )
-        tensor_input = preprocess(tensor_input, model_input_shape, np.float32)
-
        detector_result = self.model.run({model_input_name: tensor_input})[0]
-
        addr = ctypes.cast(detector_result.data_ptr(), ctypes.POINTER(ctypes.c_float))
-        # ruff: noqa: F841
+
        tensor_output = np.ctypeslib.as_array(
            addr, shape=detector_result.get_shape().lens()
        )

-        raise Exception(
-            "No models are currently supported for rocm. See the docs for more info."
-        )
+        if self.rocm_model_type == ModelTypeEnum.yolonas:
+            predictions = tensor_output
+
+            detections = np.zeros((20, 6), np.float32)
+
+            for i, prediction in enumerate(predictions):
+                if i == 20:
+                    break
+                (_, x_min, y_min, x_max, y_max, confidence, class_id) = prediction
+                # when running in GPU mode, empty predictions in the output have class_id of -1
+                if class_id < 0:
+                    break
+                detections[i] = [
+                    class_id,
+                    confidence,
+                    y_min / self.h,
+                    x_min / self.w,
+                    y_max / self.h,
+                    x_max / self.w,
+                ]
+            return detections
+        else:
+            raise Exception(
+                f"{self.rocm_model_type} is currently not supported for rocm. See the docs for more info on supported models."
+            )
--- a/frigate/detectors/util.py
+++ b/frigate/detectors/util.py
@@ -1,36 +0,0 @@
-import logging
-
-import cv2
-import numpy as np
-
-logger = logging.getLogger(__name__)
-
-
-def preprocess(tensor_input, model_input_shape, model_input_element_type):
-    model_input_shape = tuple(model_input_shape)
-    assert tensor_input.dtype == np.uint8, f"tensor_input.dtype: {tensor_input.dtype}"
-    if len(tensor_input.shape) == 3:
-        tensor_input = tensor_input[np.newaxis, :]
-    if model_input_element_type == np.uint8:
-        # nothing to do for uint8 model input
-        assert (
-            model_input_shape == tensor_input.shape
-        ), f"model_input_shape: {model_input_shape}, tensor_input.shape: {tensor_input.shape}"
-        return tensor_input
-    assert (
-        model_input_element_type == np.float32
-    ), f"model_input_element_type: {model_input_element_type}"
-    # tensor_input must be nhwc
-    assert tensor_input.shape[3] == 3, f"tensor_input.shape: {tensor_input.shape}"
-    if tensor_input.shape[1:3] != model_input_shape[2:4]:
-        logger.warn(
-            f"preprocess: tensor_input.shape {tensor_input.shape} and model_input_shape {model_input_shape} do not match!"
-        )
-    # cv2.dnn.blobFromImage is faster than running it through numpy
-    return cv2.dnn.blobFromImage(
-        tensor_input[0],
-        1.0 / 255,
-        (model_input_shape[3], model_input_shape[2]),
-        None,
-        swapRB=False,
-    )