Refactor to simplify support for additional detector types (#3656)

* Refactor EdgeTPU and CPU model handling to detector submodules. * Fix selecting the correct detection device type from the config * Remove detector type check when creating ObjectDetectProcess * Fixes after rebasing to 0.11 * Add init file to detector folder * Rename to detect_api Co-authored-by: Nicolas Mowen <nickmowen213@gmail.com> * Add unit test for LocalObjectDetector class * Add configuration for model inputs Support transforming detection regions to RGB or BGR. Support specifying the input tensor shape. The tensor shape has a standard format ["BHWC"] when handed to the detector, but can be transformed in the detector to match the model shape using the model input_tensor config. * Add documentation for new model config parameters * Add input tensor transpose to LocalObjectDetector * Change the model input tensor config to use an enumeration * Updates for model config documentation Co-authored-by: Nicolas Mowen <nickmowen213@gmail.com>
2022-11-03 22:23:09 -04:00
parent 1bc9efd529
commit 4383b883c0
17 changed files with 456 additions and 150 deletions
--- a/frigate/video.py
+++ b/frigate/video.py
@@ -11,11 +11,11 @@ import time
 from collections import defaultdict

 import numpy as np
-from cv2 import cv2, reduce
+import cv2
 from setproctitle import setproctitle

-from frigate.config import CameraConfig, DetectConfig
-from frigate.edgetpu import RemoteObjectDetector
+from frigate.config import CameraConfig, DetectConfig, PixelFormatEnum
+from frigate.object_detection import RemoteObjectDetector
 from frigate.log import LogPipe
 from frigate.motion import MotionDetector
 from frigate.objects import ObjectTracker
@@ -29,7 +29,9 @@ from frigate.util import (
    intersection,
    intersection_over_union,
    listen,
+    yuv_crop_and_resize,
    yuv_region_2_rgb,
+    yuv_region_2_bgr,
 )

 logger = logging.getLogger(__name__)
@@ -89,13 +91,20 @@ def filtered(obj, objects_to_track, object_filters):
    return False


-def create_tensor_input(frame, model_shape, region):
-    cropped_frame = yuv_region_2_rgb(frame, region)
+def create_tensor_input(frame, model_config, region):
+    if model_config.input_pixel_format == PixelFormatEnum.rgb:
+        cropped_frame = yuv_region_2_rgb(frame, region)
+    elif model_config.input_pixel_format == PixelFormatEnum.bgr:
+        cropped_frame = yuv_region_2_bgr(frame, region)
+    else:
+        cropped_frame = yuv_crop_and_resize(frame, region)

-    # Resize to 300x300 if needed
-    if cropped_frame.shape != (model_shape[0], model_shape[1], 3):
+    # Resize if needed
+    if cropped_frame.shape != (model_config.height, model_config.width, 3):
        cropped_frame = cv2.resize(
-            cropped_frame, dsize=model_shape, interpolation=cv2.INTER_LINEAR
+            cropped_frame,
+            dsize=(model_config.height, model_config.width),
+            interpolation=cv2.INTER_LINEAR,
        )

    # Expand dimensions since the model expects images to have shape: [1, height, width, 3]
@@ -340,7 +349,7 @@ def capture_camera(name, config: CameraConfig, process_info):
 def track_camera(
    name,
    config: CameraConfig,
-    model_shape,
+    model_config,
    labelmap,
    detection_queue,
    result_connection,
@@ -378,7 +387,7 @@ def track_camera(
        motion_contour_area,
    )
    object_detector = RemoteObjectDetector(
-        name, labelmap, detection_queue, result_connection, model_shape
+        name, labelmap, detection_queue, result_connection, model_config
    )

    object_tracker = ObjectTracker(config.detect)
@@ -389,7 +398,7 @@ def track_camera(
        name,
        frame_queue,
        frame_shape,
-        model_shape,
+        model_config,
        config.detect,
        frame_manager,
        motion_detector,
@@ -443,12 +452,12 @@ def detect(
    detect_config: DetectConfig,
    object_detector,
    frame,
-    model_shape,
+    model_config,
    region,
    objects_to_track,
    object_filters,
 ):
-    tensor_input = create_tensor_input(frame, model_shape, region)
+    tensor_input = create_tensor_input(frame, model_config, region)

    detections = []
    region_detections = object_detector.detect(tensor_input)
@@ -487,7 +496,7 @@ def process_frames(
    camera_name: str,
    frame_queue: mp.Queue,
    frame_shape,
-    model_shape,
+    model_config,
    detect_config: DetectConfig,
    frame_manager: FrameManager,
    motion_detector: MotionDetector,
@@ -571,7 +580,7 @@ def process_frames(
            # combine motion boxes with known locations of existing objects
            combined_boxes = reduce_boxes(motion_boxes + tracked_object_boxes)

-            region_min_size = max(model_shape[0], model_shape[1])
+            region_min_size = max(model_config.height, model_config.width)
            # compute regions
            regions = [
                calculate_region(
@@ -634,7 +643,7 @@ def process_frames(
                        detect_config,
                        object_detector,
                        frame,
-                        model_shape,
+                        model_config,
                        region,
                        objects_to_track,
                        object_filters,
@@ -694,7 +703,7 @@ def process_frames(
                                    detect_config,
                                    object_detector,
                                    frame,
-                                    model_shape,
+                                    model_config,
                                    region,
                                    objects_to_track,
                                    object_filters,