Model and genai fixes (#14481)

* disable mem arena in options for cpu only

* add try/except around ollama initialization

* update docs
This commit is contained in:
Josh Hawkins
2024-10-21 10:00:45 -05:00
committed by GitHub
parent 4bb420d049
commit 8364e68667
3 changed files with 21 additions and 13 deletions

View File

@@ -21,12 +21,20 @@ class OllamaClient(GenAIClient):
def _init_provider(self):
"""Initialize the client."""
client = ApiClient(host=self.genai_config.base_url, timeout=self.timeout)
response = client.pull(self.genai_config.model)
if response["status"] != "success":
logger.error("Failed to pull %s model from Ollama", self.genai_config.model)
try:
client = ApiClient(host=self.genai_config.base_url, timeout=self.timeout)
# ensure the model is available locally
response = client.show(self.genai_config.model)
if response.get("error"):
logger.error(
"Ollama error: %s",
response["error"],
)
return None
return client
except Exception as e:
logger.warning("Error initializing Ollama: %s", str(e))
return None
return client
def _send(self, prompt: str, images: list[bytes]) -> Optional[str]:
"""Submit a request to Ollama"""

View File

@@ -20,7 +20,7 @@ def get_ort_providers(
["CPUExecutionProvider"],
[
{
"arena_extend_strategy": "kSameAsRequested",
"enable_cpu_mem_arena": False,
}
],
)
@@ -53,7 +53,7 @@ def get_ort_providers(
providers.append(provider)
options.append(
{
"arena_extend_strategy": "kSameAsRequested",
"enable_cpu_mem_arena": False,
}
)
else:
@@ -85,12 +85,8 @@ class ONNXModelRunner:
else:
# Use ONNXRuntime
self.type = "ort"
options = ort.SessionOptions()
if device == "CPU":
options.enable_cpu_mem_arena = False
self.ort = ort.InferenceSession(
model_path,
sess_options=options,
providers=providers,
provider_options=options,
)