Override onnx clip loading (#13800)

* Set caching options for hardware providers * Always use CPU for searching * Use new install strategy to remove onnxruntime and then install post wheels
2024-09-17 13:24:35 -06:00
parent 90d7fc6bc5
commit 2362d0e838
6 changed files with 82 additions and 7 deletions
--- a/frigate/embeddings/embeddings.py
+++ b/frigate/embeddings/embeddings.py
@@ -85,7 +85,10 @@ class Embeddings:
    @property
    def description(self) -> Collection:
        return self.client.get_or_create_collection(
-            name="event_description", embedding_function=MiniLMEmbedding()
+            name="event_description",
+            embedding_function=MiniLMEmbedding(
+                preferred_providers=["CPUExecutionProvider"]
+            ),
        )

    def reindex(self) -> None:
--- a/frigate/embeddings/functions/clip.py
+++ b/frigate/embeddings/functions/clip.py
@@ -1,9 +1,13 @@
 """CLIP Embeddings for Frigate."""

+import errno
+import logging
 import os
+from pathlib import Path
 from typing import Tuple, Union

 import onnxruntime as ort
+import requests
 from chromadb import EmbeddingFunction, Embeddings
 from chromadb.api.types import (
    Documents,
@@ -39,10 +43,69 @@ class Clip(OnnxClip):
        models = []
        for model_file in [IMAGE_MODEL_FILE, TEXT_MODEL_FILE]:
            path = os.path.join(MODEL_CACHE_DIR, "clip", model_file)
-            models.append(OnnxClip._load_model(path, silent))
+            models.append(Clip._load_model(path, silent))

        return models[0], models[1]

+    @staticmethod
+    def _load_model(path: str, silent: bool):
+        providers = ort.get_available_providers()
+        options = []
+
+        for provider in providers:
+            if provider == "TensorrtExecutionProvider":
+                options.append(
+                    {
+                        "trt_timing_cache_enable": True,
+                        "trt_timing_cache_path": "/config/model_cache/tensorrt/ort",
+                        "trt_engine_cache_enable": True,
+                        "trt_engine_cache_path": "/config/model_cache/tensorrt/ort/trt-engines",
+                    }
+                )
+            elif provider == "OpenVINOExecutionProvider":
+                options.append({"cache_dir": "/config/model_cache/openvino/ort"})
+            else:
+                options.append({})
+
+        try:
+            if os.path.exists(path):
+                return ort.InferenceSession(
+                    path, providers=providers, provider_options=options
+                )
+            else:
+                raise FileNotFoundError(
+                    errno.ENOENT,
+                    os.strerror(errno.ENOENT),
+                    path,
+                )
+        except Exception:
+            s3_url = f"https://lakera-clip.s3.eu-west-1.amazonaws.com/{os.path.basename(path)}"
+            if not silent:
+                logging.info(
+                    f"The model file ({path}) doesn't exist "
+                    f"or it is invalid. Downloading it from the public S3 "
+                    f"bucket: {s3_url}."  # noqa: E501
+                )
+
+            # Download from S3
+            # Saving to a temporary file first to avoid corrupting the file
+            temporary_filename = Path(path).with_name(os.path.basename(path) + ".part")
+
+            # Create any missing directories in the path
+            temporary_filename.parent.mkdir(parents=True, exist_ok=True)
+
+            with requests.get(s3_url, stream=True) as r:
+                r.raise_for_status()
+                with open(temporary_filename, "wb") as f:
+                    for chunk in r.iter_content(chunk_size=8192):
+                        f.write(chunk)
+                    f.flush()
+            # Finally move the temporary file to the correct location
+            temporary_filename.rename(path)
+            return ort.InferenceSession(
+                path, providers=provider, provider_options=options
+            )
+

 class ClipEmbedding(EmbeddingFunction):
    """Embedding function for CLIP model used in Chroma."""