Model and genai fixes (#14481)

* disable mem arena in options for cpu only * add try/except around ollama initialization * update docs
2024-10-21 10:00:45 -05:00
parent 4bb420d049
commit 8364e68667
3 changed files with 21 additions and 13 deletions
--- a/frigate/genai/ollama.py
+++ b/frigate/genai/ollama.py
@@ -21,12 +21,20 @@ class OllamaClient(GenAIClient):

    def _init_provider(self):
        """Initialize the client."""
-        client = ApiClient(host=self.genai_config.base_url, timeout=self.timeout)
-        response = client.pull(self.genai_config.model)
-        if response["status"] != "success":
-            logger.error("Failed to pull %s model from Ollama", self.genai_config.model)
+        try:
+            client = ApiClient(host=self.genai_config.base_url, timeout=self.timeout)
+            # ensure the model is available locally
+            response = client.show(self.genai_config.model)
+            if response.get("error"):
+                logger.error(
+                    "Ollama error: %s",
+                    response["error"],
+                )
+                return None
+            return client
+        except Exception as e:
+            logger.warning("Error initializing Ollama: %s", str(e))
            return None
-        return client

    def _send(self, prompt: str, images: list[bytes]) -> Optional[str]:
        """Submit a request to Ollama"""
--- a/frigate/util/model.py
+++ b/frigate/util/model.py
@@ -20,7 +20,7 @@ def get_ort_providers(
            ["CPUExecutionProvider"],
            [
                {
-                    "arena_extend_strategy": "kSameAsRequested",
+                    "enable_cpu_mem_arena": False,
                }
            ],
        )
@@ -53,7 +53,7 @@ def get_ort_providers(
            providers.append(provider)
            options.append(
                {
-                    "arena_extend_strategy": "kSameAsRequested",
+                    "enable_cpu_mem_arena": False,
                }
            )
        else:
@@ -85,12 +85,8 @@ class ONNXModelRunner:
        else:
            # Use ONNXRuntime
            self.type = "ort"
-            options = ort.SessionOptions()
-            if device == "CPU":
-                options.enable_cpu_mem_arena = False
            self.ort = ort.InferenceSession(
                model_path,
-                sess_options=options,
                providers=providers,
                provider_options=options,
            )