Add config option to select fp16 or quantized jina vision model (#14270)

* Add config option to select fp16 or quantized jina vision model

* requires_fp16 for text and large models only

* fix model type check

* fix cpu

* pass model size
This commit is contained in:
Josh Hawkins
2024-10-10 17:46:21 -05:00
committed by GitHub
parent dd6276e706
commit 54eb03d2a1
7 changed files with 44 additions and 10 deletions

View File

@@ -13,3 +13,6 @@ class SemanticSearchConfig(FrigateBaseModel):
default=False, title="Reindex all detections on startup."
)
device: str = Field(default="AUTO", title="Device Type")
model_size: str = Field(
default="small", title="The size of the embeddings model used."
)

View File

@@ -68,7 +68,9 @@ class Embeddings:
models = [
"jinaai/jina-clip-v1-text_model_fp16.onnx",
"jinaai/jina-clip-v1-tokenizer",
"jinaai/jina-clip-v1-vision_model_fp16.onnx",
"jinaai/jina-clip-v1-vision_model_fp16.onnx"
if config.model_size == "large"
else "jinaai/jina-clip-v1-vision_model_quantized.onnx",
"jinaai/jina-clip-v1-preprocessor_config.json",
]
@@ -95,19 +97,29 @@ class Embeddings:
"text_model_fp16.onnx": "https://huggingface.co/jinaai/jina-clip-v1/resolve/main/onnx/text_model_fp16.onnx",
},
embedding_function=jina_text_embedding_function,
model_size=config.model_size,
model_type="text",
requestor=self.requestor,
device="CPU",
)
model_file = (
"vision_model_fp16.onnx"
if self.config.model_size == "large"
else "vision_model_quantized.onnx"
)
download_urls = {
model_file: f"https://huggingface.co/jinaai/jina-clip-v1/resolve/main/onnx/{model_file}",
"preprocessor_config.json": "https://huggingface.co/jinaai/jina-clip-v1/resolve/main/preprocessor_config.json",
}
self.vision_embedding = GenericONNXEmbedding(
model_name="jinaai/jina-clip-v1",
model_file="vision_model_fp16.onnx",
download_urls={
"vision_model_fp16.onnx": "https://huggingface.co/jinaai/jina-clip-v1/resolve/main/onnx/vision_model_fp16.onnx",
"preprocessor_config.json": "https://huggingface.co/jinaai/jina-clip-v1/resolve/main/preprocessor_config.json",
},
model_file=model_file,
download_urls=download_urls,
embedding_function=jina_vision_embedding_function,
model_size=config.model_size,
model_type="vision",
requestor=self.requestor,
device=self.config.device,

View File

@@ -41,6 +41,7 @@ class GenericONNXEmbedding:
model_file: str,
download_urls: Dict[str, str],
embedding_function: Callable[[List[np.ndarray]], np.ndarray],
model_size: str,
model_type: str,
requestor: InterProcessRequestor,
tokenizer_file: Optional[str] = None,
@@ -54,7 +55,9 @@ class GenericONNXEmbedding:
self.embedding_function = embedding_function
self.model_type = model_type # 'text' or 'vision'
self.providers, self.provider_options = get_ort_providers(
force_cpu=device == "CPU", requires_fp16=True, openvino_device=device
force_cpu=device == "CPU",
requires_fp16=model_size == "large" or self.model_type == "text",
openvino_device=device,
)
self.download_path = os.path.join(MODEL_CACHE_DIR, self.model_name)