forked from Github/frigate
Improve face recognition (#14537)
* Increase requirements for face to be set * Manage faces properly * Add basic docs * Simplify * Separate out face recognition frome semantic search * Update docs * Formatting
This commit is contained in:
@@ -57,7 +57,7 @@ from .logger import LoggerConfig
|
||||
from .mqtt import MqttConfig
|
||||
from .notification import NotificationConfig
|
||||
from .proxy import ProxyConfig
|
||||
from .semantic_search import SemanticSearchConfig
|
||||
from .semantic_search import FaceRecognitionConfig, SemanticSearchConfig
|
||||
from .telemetry import TelemetryConfig
|
||||
from .tls import TlsConfig
|
||||
from .ui import UIConfig
|
||||
@@ -159,6 +159,16 @@ class RestreamConfig(BaseModel):
|
||||
model_config = ConfigDict(extra="allow")
|
||||
|
||||
|
||||
def verify_semantic_search_dependent_configs(config: FrigateConfig) -> None:
|
||||
"""Verify that semantic search is enabled if required features are enabled."""
|
||||
if not config.semantic_search.enabled:
|
||||
if config.genai.enabled:
|
||||
raise ValueError("Genai requires semantic search to be enabled.")
|
||||
|
||||
if config.face_recognition.enabled:
|
||||
raise ValueError("Face recognition requires semantic to be enabled.")
|
||||
|
||||
|
||||
def verify_config_roles(camera_config: CameraConfig) -> None:
|
||||
"""Verify that roles are setup in the config correctly."""
|
||||
assigned_roles = list(
|
||||
@@ -320,6 +330,9 @@ class FrigateConfig(FrigateBaseModel):
|
||||
semantic_search: SemanticSearchConfig = Field(
|
||||
default_factory=SemanticSearchConfig, title="Semantic search configuration."
|
||||
)
|
||||
face_recognition: FaceRecognitionConfig = Field(
|
||||
default_factory=FaceRecognitionConfig, title="Face recognition config."
|
||||
)
|
||||
ui: UIConfig = Field(default_factory=UIConfig, title="UI configuration.")
|
||||
|
||||
# Detector config
|
||||
@@ -625,6 +638,7 @@ class FrigateConfig(FrigateBaseModel):
|
||||
detector_config.model.compute_model_hash()
|
||||
self.detectors[key] = detector_config
|
||||
|
||||
verify_semantic_search_dependent_configs(self)
|
||||
return self
|
||||
|
||||
@field_validator("cameras")
|
||||
|
||||
@@ -7,6 +7,16 @@ from .base import FrigateBaseModel
|
||||
__all__ = ["FaceRecognitionConfig", "SemanticSearchConfig"]
|
||||
|
||||
|
||||
class SemanticSearchConfig(FrigateBaseModel):
|
||||
enabled: bool = Field(default=False, title="Enable semantic search.")
|
||||
reindex: Optional[bool] = Field(
|
||||
default=False, title="Reindex all detections on startup."
|
||||
)
|
||||
model_size: str = Field(
|
||||
default="small", title="The size of the embeddings model used."
|
||||
)
|
||||
|
||||
|
||||
class FaceRecognitionConfig(FrigateBaseModel):
|
||||
enabled: bool = Field(default=False, title="Enable face recognition.")
|
||||
threshold: float = Field(
|
||||
@@ -15,16 +25,3 @@ class FaceRecognitionConfig(FrigateBaseModel):
|
||||
min_area: int = Field(
|
||||
default=500, title="Min area of face box to consider running face recognition."
|
||||
)
|
||||
|
||||
|
||||
class SemanticSearchConfig(FrigateBaseModel):
|
||||
enabled: bool = Field(default=False, title="Enable semantic search.")
|
||||
reindex: Optional[bool] = Field(
|
||||
default=False, title="Reindex all detections on startup."
|
||||
)
|
||||
face_recognition: FaceRecognitionConfig = Field(
|
||||
default_factory=FaceRecognitionConfig, title="Face recognition config."
|
||||
)
|
||||
model_size: str = Field(
|
||||
default="small", title="The size of the embeddings model used."
|
||||
)
|
||||
|
||||
@@ -11,7 +11,7 @@ from numpy import ndarray
|
||||
from playhouse.shortcuts import model_to_dict
|
||||
|
||||
from frigate.comms.inter_process import InterProcessRequestor
|
||||
from frigate.config.semantic_search import SemanticSearchConfig
|
||||
from frigate.config import FrigateConfig
|
||||
from frigate.const import (
|
||||
CONFIG_DIR,
|
||||
FACE_DIR,
|
||||
@@ -62,9 +62,7 @@ def get_metadata(event: Event) -> dict:
|
||||
class Embeddings:
|
||||
"""SQLite-vec embeddings database."""
|
||||
|
||||
def __init__(
|
||||
self, config: SemanticSearchConfig, db: SqliteVecQueueDatabase
|
||||
) -> None:
|
||||
def __init__(self, config: FrigateConfig, db: SqliteVecQueueDatabase) -> None:
|
||||
self.config = config
|
||||
self.db = db
|
||||
self.requestor = InterProcessRequestor()
|
||||
@@ -76,7 +74,7 @@ class Embeddings:
|
||||
"jinaai/jina-clip-v1-text_model_fp16.onnx",
|
||||
"jinaai/jina-clip-v1-tokenizer",
|
||||
"jinaai/jina-clip-v1-vision_model_fp16.onnx"
|
||||
if config.model_size == "large"
|
||||
if config.semantic_search.model_size == "large"
|
||||
else "jinaai/jina-clip-v1-vision_model_quantized.onnx",
|
||||
"jinaai/jina-clip-v1-preprocessor_config.json",
|
||||
]
|
||||
@@ -97,7 +95,7 @@ class Embeddings:
|
||||
download_urls={
|
||||
"text_model_fp16.onnx": "https://huggingface.co/jinaai/jina-clip-v1/resolve/main/onnx/text_model_fp16.onnx",
|
||||
},
|
||||
model_size=config.model_size,
|
||||
model_size=config.semantic_search.model_size,
|
||||
model_type=ModelTypeEnum.text,
|
||||
requestor=self.requestor,
|
||||
device="CPU",
|
||||
@@ -105,7 +103,7 @@ class Embeddings:
|
||||
|
||||
model_file = (
|
||||
"vision_model_fp16.onnx"
|
||||
if self.config.model_size == "large"
|
||||
if self.config.semantic_search.model_size == "large"
|
||||
else "vision_model_quantized.onnx"
|
||||
)
|
||||
|
||||
|
||||
@@ -34,6 +34,7 @@ from .embeddings import Embeddings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
REQUIRED_FACES = 2
|
||||
MAX_THUMBNAILS = 10
|
||||
|
||||
|
||||
@@ -48,7 +49,7 @@ class EmbeddingMaintainer(threading.Thread):
|
||||
) -> None:
|
||||
super().__init__(name="embeddings_maintainer")
|
||||
self.config = config
|
||||
self.embeddings = Embeddings(config.semantic_search, db)
|
||||
self.embeddings = Embeddings(config, db)
|
||||
|
||||
# Check if we need to re-index events
|
||||
if config.semantic_search.reindex:
|
||||
@@ -63,10 +64,9 @@ class EmbeddingMaintainer(threading.Thread):
|
||||
self.frame_manager = SharedMemoryFrameManager()
|
||||
|
||||
# set face recognition conditions
|
||||
self.face_recognition_enabled = (
|
||||
self.config.semantic_search.face_recognition.enabled
|
||||
)
|
||||
self.face_recognition_enabled = self.config.face_recognition.enabled
|
||||
self.requires_face_detection = "face" not in self.config.model.all_attributes
|
||||
self.detected_faces: dict[str, float] = {}
|
||||
|
||||
# create communication for updating event descriptions
|
||||
self.requestor = InterProcessRequestor()
|
||||
@@ -184,6 +184,9 @@ class EmbeddingMaintainer(threading.Thread):
|
||||
event_id, camera, updated_db = ended
|
||||
camera_config = self.config.cameras[camera]
|
||||
|
||||
if event_id in self.detected_faces:
|
||||
self.detected_faces.pop(event_id)
|
||||
|
||||
if updated_db:
|
||||
try:
|
||||
event: Event = Event.get(Event.id == event_id)
|
||||
@@ -308,25 +311,28 @@ class EmbeddingMaintainer(threading.Thread):
|
||||
|
||||
def _search_face(self, query_embedding: bytes) -> list:
|
||||
"""Search for the face most closely matching the embedding."""
|
||||
sql_query = """
|
||||
sql_query = f"""
|
||||
SELECT
|
||||
id,
|
||||
distance
|
||||
FROM vec_faces
|
||||
WHERE face_embedding MATCH ?
|
||||
AND k = 10 ORDER BY distance
|
||||
AND k = {REQUIRED_FACES} ORDER BY distance
|
||||
"""
|
||||
return self.embeddings.db.execute_sql(sql_query, [query_embedding]).fetchall()
|
||||
|
||||
def _process_face(self, obj_data: dict[str, any], frame: np.ndarray) -> None:
|
||||
"""Look for faces in image."""
|
||||
id = obj_data["id"]
|
||||
|
||||
# don't run for non person objects
|
||||
if obj_data.get("label") != "person":
|
||||
logger.debug("Not a processing face for non person object.")
|
||||
return
|
||||
|
||||
# don't overwrite sub label for objects that have one
|
||||
if obj_data.get("sub_label"):
|
||||
# don't overwrite sub label for objects that have a sub label
|
||||
# that is not a face
|
||||
if obj_data.get("sub_label") and id not in self.detected_faces:
|
||||
logger.debug(
|
||||
f"Not processing face due to existing sub label: {obj_data.get('sub_label')}."
|
||||
)
|
||||
@@ -380,18 +386,35 @@ class EmbeddingMaintainer(threading.Thread):
|
||||
best_faces = self._search_face(query_embedding)
|
||||
logger.debug(f"Detected best faces for person as: {best_faces}")
|
||||
|
||||
if not best_faces:
|
||||
if not best_faces or len(best_faces) < REQUIRED_FACES:
|
||||
return
|
||||
|
||||
sub_label = str(best_faces[0][0]).split("-")[0]
|
||||
score = 1.0 - best_faces[0][1]
|
||||
avg_score = 0
|
||||
|
||||
if score < self.config.semantic_search.face_recognition.threshold:
|
||||
for face in best_faces:
|
||||
score = 1.0 - face[1]
|
||||
|
||||
if face[0] != sub_label:
|
||||
logger.debug("Detected multiple faces, result is not valid.")
|
||||
return None
|
||||
|
||||
avg_score += score
|
||||
|
||||
avg_score = avg_score / REQUIRED_FACES
|
||||
|
||||
if avg_score < self.config.semantic_search.face_recognition.threshold or (
|
||||
id in self.detected_faces and avg_score <= self.detected_faces[id]
|
||||
):
|
||||
logger.debug(
|
||||
"Detected face does not score higher than threshold / previous face."
|
||||
)
|
||||
return None
|
||||
|
||||
self.detected_faces[id] = avg_score
|
||||
requests.post(
|
||||
f"{FRIGATE_LOCALHOST}/api/events/{obj_data['id']}/sub_label",
|
||||
json={"subLabel": sub_label, "subLabelScore": score},
|
||||
f"{FRIGATE_LOCALHOST}/api/events/{id}/sub_label",
|
||||
json={"subLabel": sub_label, "subLabelScore": avg_score},
|
||||
)
|
||||
|
||||
def _create_thumbnail(self, yuv_frame, box, height=500) -> Optional[bytes]:
|
||||
|
||||
Reference in New Issue
Block a user