Embeddings normalization fixes (#14284)

* Use cosine distance metric for vec tables

* Only apply normalization to multi modal searches

* Catch possible edge case in stddev calc

* Use sigmoid function for normalization for multi modal searches only

* Ensure we get model state on initial page load

* Only save stats for multi modal searches and only use cosine similarity for image -> image search
This commit is contained in:
Josh Hawkins
2024-10-11 13:11:11 -05:00
committed by GitHub
parent d4b9b5a7dd
commit 8a8a0c7dec
5 changed files with 41 additions and 26 deletions

View File

@@ -2,6 +2,7 @@ import {
useEmbeddingsReindexProgress,
useEventUpdate,
useModelState,
useWs,
} from "@/api/ws";
import ActivityIndicator from "@/components/indicators/activity-indicator";
import AnimatedCircularProgressBar from "@/components/ui/circular-progress-bar";
@@ -202,6 +203,14 @@ export default function Explore() {
// model states
const { send: sendCommand } = useWs("model_state", "modelState");
useEffect(() => {
sendCommand("modelState");
// only run on mount
// eslint-disable-next-line react-hooks/exhaustive-deps
}, []);
const { payload: textModelState } = useModelState(
"jinaai/jina-clip-v1-text_model_fp16.onnx",
);

View File

@@ -187,13 +187,19 @@ export default function SearchView({
}
}, [searchResults, searchDetail]);
// confidence score - probably needs tweaking
// confidence score
const zScoreToConfidence = (score: number) => {
// Sigmoid function: 1 / (1 + e^x)
const confidence = 1 / (1 + Math.exp(score));
// Normalizing is not needed for similarity searches
// Sigmoid function for normalized: 1 / (1 + e^x)
// Cosine for similarity
if (searchFilter) {
const notNormalized = searchFilter?.search_type?.includes("similarity");
return Math.round(confidence * 100);
const confidence = notNormalized ? 1 - score : 1 / (1 + Math.exp(score));
return Math.round(confidence * 100);
}
};
const hasExistingSearch = useMemo(