Improve face recognition (#14537)

* Increase requirements for face to be set * Manage faces properly * Add basic docs * Simplify * Separate out face recognition frome semantic search * Update docs * Formatting
2024-10-23 09:03:18 -06:00
parent ca5711d1ab
commit e35fb8f056
7 changed files with 96 additions and 34 deletions
--- a/frigate/config/config.py
+++ b/frigate/config/config.py
@@ -57,7 +57,7 @@ from .logger import LoggerConfig
 from .mqtt import MqttConfig
 from .notification import NotificationConfig
 from .proxy import ProxyConfig
-from .semantic_search import SemanticSearchConfig
+from .semantic_search import FaceRecognitionConfig, SemanticSearchConfig
 from .telemetry import TelemetryConfig
 from .tls import TlsConfig
 from .ui import UIConfig
@@ -159,6 +159,16 @@ class RestreamConfig(BaseModel):
    model_config = ConfigDict(extra="allow")


+def verify_semantic_search_dependent_configs(config: FrigateConfig) -> None:
+    """Verify that semantic search is enabled if required features are enabled."""
+    if not config.semantic_search.enabled:
+        if config.genai.enabled:
+            raise ValueError("Genai requires semantic search to be enabled.")
+
+        if config.face_recognition.enabled:
+            raise ValueError("Face recognition requires semantic to be enabled.")
+
+
 def verify_config_roles(camera_config: CameraConfig) -> None:
    """Verify that roles are setup in the config correctly."""
    assigned_roles = list(
@@ -320,6 +330,9 @@ class FrigateConfig(FrigateBaseModel):
    semantic_search: SemanticSearchConfig = Field(
        default_factory=SemanticSearchConfig, title="Semantic search configuration."
    )
+    face_recognition: FaceRecognitionConfig = Field(
+        default_factory=FaceRecognitionConfig, title="Face recognition config."
+    )
    ui: UIConfig = Field(default_factory=UIConfig, title="UI configuration.")

    # Detector config
@@ -625,6 +638,7 @@ class FrigateConfig(FrigateBaseModel):
            detector_config.model.compute_model_hash()
            self.detectors[key] = detector_config

+        verify_semantic_search_dependent_configs(self)
        return self

    @field_validator("cameras")
--- a/frigate/config/semantic_search.py
+++ b/frigate/config/semantic_search.py
@@ -7,6 +7,16 @@ from .base import FrigateBaseModel
 __all__ = ["FaceRecognitionConfig", "SemanticSearchConfig"]


+class SemanticSearchConfig(FrigateBaseModel):
+    enabled: bool = Field(default=False, title="Enable semantic search.")
+    reindex: Optional[bool] = Field(
+        default=False, title="Reindex all detections on startup."
+    )
+    model_size: str = Field(
+        default="small", title="The size of the embeddings model used."
+    )
+
+
 class FaceRecognitionConfig(FrigateBaseModel):
    enabled: bool = Field(default=False, title="Enable face recognition.")
    threshold: float = Field(
@@ -15,16 +25,3 @@ class FaceRecognitionConfig(FrigateBaseModel):
    min_area: int = Field(
        default=500, title="Min area of face box to consider running face recognition."
    )
-
-
-class SemanticSearchConfig(FrigateBaseModel):
-    enabled: bool = Field(default=False, title="Enable semantic search.")
-    reindex: Optional[bool] = Field(
-        default=False, title="Reindex all detections on startup."
-    )
-    face_recognition: FaceRecognitionConfig = Field(
-        default_factory=FaceRecognitionConfig, title="Face recognition config."
-    )
-    model_size: str = Field(
-        default="small", title="The size of the embeddings model used."
-    )
--- a/frigate/embeddings/embeddings.py
+++ b/frigate/embeddings/embeddings.py
@@ -11,7 +11,7 @@ from numpy import ndarray
 from playhouse.shortcuts import model_to_dict

 from frigate.comms.inter_process import InterProcessRequestor
-from frigate.config.semantic_search import SemanticSearchConfig
+from frigate.config import FrigateConfig
 from frigate.const import (
    CONFIG_DIR,
    FACE_DIR,
@@ -62,9 +62,7 @@ def get_metadata(event: Event) -> dict:
 class Embeddings:
    """SQLite-vec embeddings database."""

-    def __init__(
-        self, config: SemanticSearchConfig, db: SqliteVecQueueDatabase
-    ) -> None:
+    def __init__(self, config: FrigateConfig, db: SqliteVecQueueDatabase) -> None:
        self.config = config
        self.db = db
        self.requestor = InterProcessRequestor()
@@ -76,7 +74,7 @@ class Embeddings:
            "jinaai/jina-clip-v1-text_model_fp16.onnx",
            "jinaai/jina-clip-v1-tokenizer",
            "jinaai/jina-clip-v1-vision_model_fp16.onnx"
-            if config.model_size == "large"
+            if config.semantic_search.model_size == "large"
            else "jinaai/jina-clip-v1-vision_model_quantized.onnx",
            "jinaai/jina-clip-v1-preprocessor_config.json",
        ]
@@ -97,7 +95,7 @@ class Embeddings:
            download_urls={
                "text_model_fp16.onnx": "https://huggingface.co/jinaai/jina-clip-v1/resolve/main/onnx/text_model_fp16.onnx",
            },
-            model_size=config.model_size,
+            model_size=config.semantic_search.model_size,
            model_type=ModelTypeEnum.text,
            requestor=self.requestor,
            device="CPU",
@@ -105,7 +103,7 @@ class Embeddings:

        model_file = (
            "vision_model_fp16.onnx"
-            if self.config.model_size == "large"
+            if self.config.semantic_search.model_size == "large"
            else "vision_model_quantized.onnx"
        )

--- a/frigate/embeddings/maintainer.py
+++ b/frigate/embeddings/maintainer.py
@@ -34,6 +34,7 @@ from .embeddings import Embeddings

 logger = logging.getLogger(__name__)

+REQUIRED_FACES = 2
 MAX_THUMBNAILS = 10


@@ -48,7 +49,7 @@ class EmbeddingMaintainer(threading.Thread):
    ) -> None:
        super().__init__(name="embeddings_maintainer")
        self.config = config
-        self.embeddings = Embeddings(config.semantic_search, db)
+        self.embeddings = Embeddings(config, db)

        # Check if we need to re-index events
        if config.semantic_search.reindex:
@@ -63,10 +64,9 @@ class EmbeddingMaintainer(threading.Thread):
        self.frame_manager = SharedMemoryFrameManager()

        # set face recognition conditions
-        self.face_recognition_enabled = (
-            self.config.semantic_search.face_recognition.enabled
-        )
+        self.face_recognition_enabled = self.config.face_recognition.enabled
        self.requires_face_detection = "face" not in self.config.model.all_attributes
+        self.detected_faces: dict[str, float] = {}

        # create communication for updating event descriptions
        self.requestor = InterProcessRequestor()
@@ -184,6 +184,9 @@ class EmbeddingMaintainer(threading.Thread):
            event_id, camera, updated_db = ended
            camera_config = self.config.cameras[camera]

+            if event_id in self.detected_faces:
+                self.detected_faces.pop(event_id)
+
            if updated_db:
                try:
                    event: Event = Event.get(Event.id == event_id)
@@ -308,25 +311,28 @@ class EmbeddingMaintainer(threading.Thread):

    def _search_face(self, query_embedding: bytes) -> list:
        """Search for the face most closely matching the embedding."""
-        sql_query = """
+        sql_query = f"""
            SELECT
                id,
                distance
            FROM vec_faces
            WHERE face_embedding MATCH ?
-                AND k = 10 ORDER BY distance
+                AND k = {REQUIRED_FACES} ORDER BY distance
        """
        return self.embeddings.db.execute_sql(sql_query, [query_embedding]).fetchall()

    def _process_face(self, obj_data: dict[str, any], frame: np.ndarray) -> None:
        """Look for faces in image."""
+        id = obj_data["id"]
+
        # don't run for non person objects
        if obj_data.get("label") != "person":
            logger.debug("Not a processing face for non person object.")
            return

-        # don't overwrite sub label for objects that have one
-        if obj_data.get("sub_label"):
+        # don't overwrite sub label for objects that have a sub label
+        # that is not a face
+        if obj_data.get("sub_label") and id not in self.detected_faces:
            logger.debug(
                f"Not processing face due to existing sub label: {obj_data.get('sub_label')}."
            )
@@ -380,18 +386,35 @@ class EmbeddingMaintainer(threading.Thread):
        best_faces = self._search_face(query_embedding)
        logger.debug(f"Detected best faces for person as: {best_faces}")

-        if not best_faces:
+        if not best_faces or len(best_faces) < REQUIRED_FACES:
            return

        sub_label = str(best_faces[0][0]).split("-")[0]
-        score = 1.0 - best_faces[0][1]
+        avg_score = 0

-        if score < self.config.semantic_search.face_recognition.threshold:
+        for face in best_faces:
+            score = 1.0 - face[1]
+
+            if face[0] != sub_label:
+                logger.debug("Detected multiple faces, result is not valid.")
+                return None
+
+            avg_score += score
+
+        avg_score = avg_score / REQUIRED_FACES
+
+        if avg_score < self.config.semantic_search.face_recognition.threshold or (
+            id in self.detected_faces and avg_score <= self.detected_faces[id]
+        ):
+            logger.debug(
+                "Detected face does not score higher than threshold / previous face."
+            )
            return None

+        self.detected_faces[id] = avg_score
        requests.post(
-            f"{FRIGATE_LOCALHOST}/api/events/{obj_data['id']}/sub_label",
-            json={"subLabel": sub_label, "subLabelScore": score},
+            f"{FRIGATE_LOCALHOST}/api/events/{id}/sub_label",
+            json={"subLabel": sub_label, "subLabelScore": avg_score},
        )

    def _create_thumbnail(self, yuv_frame, box, height=500) -> Optional[bytes]: