forked from Github/frigate
Add config option to select fp16 or quantized jina vision model (#14270)
* Add config option to select fp16 or quantized jina vision model * requires_fp16 for text and large models only * fix model type check * fix cpu * pass model size
This commit is contained in:
@@ -13,3 +13,6 @@ class SemanticSearchConfig(FrigateBaseModel):
|
||||
default=False, title="Reindex all detections on startup."
|
||||
)
|
||||
device: str = Field(default="AUTO", title="Device Type")
|
||||
model_size: str = Field(
|
||||
default="small", title="The size of the embeddings model used."
|
||||
)
|
||||
|
||||
@@ -68,7 +68,9 @@ class Embeddings:
|
||||
models = [
|
||||
"jinaai/jina-clip-v1-text_model_fp16.onnx",
|
||||
"jinaai/jina-clip-v1-tokenizer",
|
||||
"jinaai/jina-clip-v1-vision_model_fp16.onnx",
|
||||
"jinaai/jina-clip-v1-vision_model_fp16.onnx"
|
||||
if config.model_size == "large"
|
||||
else "jinaai/jina-clip-v1-vision_model_quantized.onnx",
|
||||
"jinaai/jina-clip-v1-preprocessor_config.json",
|
||||
]
|
||||
|
||||
@@ -95,19 +97,29 @@ class Embeddings:
|
||||
"text_model_fp16.onnx": "https://huggingface.co/jinaai/jina-clip-v1/resolve/main/onnx/text_model_fp16.onnx",
|
||||
},
|
||||
embedding_function=jina_text_embedding_function,
|
||||
model_size=config.model_size,
|
||||
model_type="text",
|
||||
requestor=self.requestor,
|
||||
device="CPU",
|
||||
)
|
||||
|
||||
model_file = (
|
||||
"vision_model_fp16.onnx"
|
||||
if self.config.model_size == "large"
|
||||
else "vision_model_quantized.onnx"
|
||||
)
|
||||
|
||||
download_urls = {
|
||||
model_file: f"https://huggingface.co/jinaai/jina-clip-v1/resolve/main/onnx/{model_file}",
|
||||
"preprocessor_config.json": "https://huggingface.co/jinaai/jina-clip-v1/resolve/main/preprocessor_config.json",
|
||||
}
|
||||
|
||||
self.vision_embedding = GenericONNXEmbedding(
|
||||
model_name="jinaai/jina-clip-v1",
|
||||
model_file="vision_model_fp16.onnx",
|
||||
download_urls={
|
||||
"vision_model_fp16.onnx": "https://huggingface.co/jinaai/jina-clip-v1/resolve/main/onnx/vision_model_fp16.onnx",
|
||||
"preprocessor_config.json": "https://huggingface.co/jinaai/jina-clip-v1/resolve/main/preprocessor_config.json",
|
||||
},
|
||||
model_file=model_file,
|
||||
download_urls=download_urls,
|
||||
embedding_function=jina_vision_embedding_function,
|
||||
model_size=config.model_size,
|
||||
model_type="vision",
|
||||
requestor=self.requestor,
|
||||
device=self.config.device,
|
||||
|
||||
@@ -41,6 +41,7 @@ class GenericONNXEmbedding:
|
||||
model_file: str,
|
||||
download_urls: Dict[str, str],
|
||||
embedding_function: Callable[[List[np.ndarray]], np.ndarray],
|
||||
model_size: str,
|
||||
model_type: str,
|
||||
requestor: InterProcessRequestor,
|
||||
tokenizer_file: Optional[str] = None,
|
||||
@@ -54,7 +55,9 @@ class GenericONNXEmbedding:
|
||||
self.embedding_function = embedding_function
|
||||
self.model_type = model_type # 'text' or 'vision'
|
||||
self.providers, self.provider_options = get_ort_providers(
|
||||
force_cpu=device == "CPU", requires_fp16=True, openvino_device=device
|
||||
force_cpu=device == "CPU",
|
||||
requires_fp16=model_size == "large" or self.model_type == "text",
|
||||
openvino_device=device,
|
||||
)
|
||||
|
||||
self.download_path = os.path.join(MODEL_CACHE_DIR, self.model_name)
|
||||
|
||||
Reference in New Issue
Block a user