forked from Github/frigate
Use sqlite-vec extension instead of chromadb for embeddings (#14163)
* swap sqlite_vec for chroma in requirements * load sqlite_vec in embeddings manager * remove chroma and revamp Embeddings class for sqlite_vec * manual minilm onnx inference * remove chroma in clip model * migrate api from chroma to sqlite_vec * migrate event cleanup from chroma to sqlite_vec * migrate embedding maintainer from chroma to sqlite_vec * genai description for sqlite_vec * load sqlite_vec in main thread db * extend the SqliteQueueDatabase class and use peewee db.execute_sql * search with Event type for similarity * fix similarity search * install and add comment about transformers * fix normalization * add id filter * clean up * clean up * fully remove chroma and add transformers env var * readd uvicorn for fastapi * readd tokenizer parallelism env var * remove chroma from docs * remove chroma from UI * try removing custom pysqlite3 build * hard code limit * optimize queries * revert explore query * fix query * keep building pysqlite3 * single pass fetch and process * remove unnecessary re-embed * update deps * move SqliteVecQueueDatabase to db directory * make search thumbnail take up full size of results box * improve typing * improve model downloading and add status screen * daemon downloading thread * catch case when semantic search is disabled * fix typing * build sqlite_vec from source * resolve conflict * file permissions * try build deps * remove sources * sources * fix thread start * include git in build * reorder embeddings after detectors are started * build with sqlite amalgamation * non-platform specific * use wget instead of curl * remove unzip -d * remove sqlite_vec from requirements and load the compiled version * fix build * avoid race in db connection * add scale_factor and bias to description zscore normalization
This commit is contained in:
@@ -8,6 +8,8 @@ from enum import Enum
|
||||
from multiprocessing.synchronize import Event as MpEvent
|
||||
from pathlib import Path
|
||||
|
||||
from playhouse.sqliteq import SqliteQueueDatabase
|
||||
|
||||
from frigate.config import FrigateConfig
|
||||
from frigate.const import CLIPS_DIR
|
||||
from frigate.embeddings.embeddings import Embeddings
|
||||
@@ -22,16 +24,19 @@ class EventCleanupType(str, Enum):
|
||||
|
||||
|
||||
class EventCleanup(threading.Thread):
|
||||
def __init__(self, config: FrigateConfig, stop_event: MpEvent):
|
||||
def __init__(
|
||||
self, config: FrigateConfig, stop_event: MpEvent, db: SqliteQueueDatabase
|
||||
):
|
||||
super().__init__(name="event_cleanup")
|
||||
self.config = config
|
||||
self.stop_event = stop_event
|
||||
self.db = db
|
||||
self.camera_keys = list(self.config.cameras.keys())
|
||||
self.removed_camera_labels: list[str] = None
|
||||
self.camera_labels: dict[str, dict[str, any]] = {}
|
||||
|
||||
if self.config.semantic_search.enabled:
|
||||
self.embeddings = Embeddings()
|
||||
self.embeddings = Embeddings(self.db)
|
||||
|
||||
def get_removed_camera_labels(self) -> list[Event]:
|
||||
"""Get a list of distinct labels for removed cameras."""
|
||||
@@ -229,15 +234,8 @@ class EventCleanup(threading.Thread):
|
||||
Event.delete().where(Event.id << chunk).execute()
|
||||
|
||||
if self.config.semantic_search.enabled:
|
||||
for collection in [
|
||||
self.embeddings.thumbnail,
|
||||
self.embeddings.description,
|
||||
]:
|
||||
existing_ids = collection.get(ids=chunk, include=[])["ids"]
|
||||
if existing_ids:
|
||||
collection.delete(ids=existing_ids)
|
||||
logger.debug(
|
||||
f"Deleted {len(existing_ids)} embeddings from {collection.__class__.__name__}"
|
||||
)
|
||||
self.embeddings.delete_description(chunk)
|
||||
self.embeddings.delete_thumbnail(chunk)
|
||||
logger.debug(f"Deleted {len(events_to_delete)} embeddings")
|
||||
|
||||
logger.info("Exiting event cleanup...")
|
||||
|
||||
Reference in New Issue
Block a user