Embeddings normalization fixes (#14284)

* Use cosine distance metric for vec tables

* Only apply normalization to multi modal searches

* Catch possible edge case in stddev calc

* Use sigmoid function for normalization for multi modal searches only

* Ensure we get model state on initial page load

* Only save stats for multi modal searches and only use cosine similarity for image -> image search
This commit is contained in:
Josh Hawkins
2024-10-11 13:11:11 -05:00
committed by GitHub
parent d4b9b5a7dd
commit 8a8a0c7dec
5 changed files with 41 additions and 26 deletions

View File

@@ -187,13 +187,19 @@ export default function SearchView({
}
}, [searchResults, searchDetail]);
// confidence score - probably needs tweaking
// confidence score
const zScoreToConfidence = (score: number) => {
// Sigmoid function: 1 / (1 + e^x)
const confidence = 1 / (1 + Math.exp(score));
// Normalizing is not needed for similarity searches
// Sigmoid function for normalized: 1 / (1 + e^x)
// Cosine for similarity
if (searchFilter) {
const notNormalized = searchFilter?.search_type?.includes("similarity");
return Math.round(confidence * 100);
const confidence = notNormalized ? 1 - score : 1 / (1 + Math.exp(score));
return Math.round(confidence * 100);
}
};
const hasExistingSearch = useMemo(