Use sqlite-vec extension instead of chromadb for embeddings (#14163)

* swap sqlite_vec for chroma in requirements

* load sqlite_vec in embeddings manager

* remove chroma and revamp Embeddings class for sqlite_vec

* manual minilm onnx inference

* remove chroma in clip model

* migrate api from chroma to sqlite_vec

* migrate event cleanup from chroma to sqlite_vec

* migrate embedding maintainer from chroma to sqlite_vec

* genai description for sqlite_vec

* load sqlite_vec in main thread db

* extend the SqliteQueueDatabase class and use peewee db.execute_sql

* search with Event type for similarity

* fix similarity search

* install and add comment about transformers

* fix normalization

* add id filter

* clean up

* clean up

* fully remove chroma and add transformers env var

* readd uvicorn for fastapi

* readd tokenizer parallelism env var

* remove chroma from docs

* remove chroma from UI

* try removing custom pysqlite3 build

* hard code limit

* optimize queries

* revert explore query

* fix query

* keep building pysqlite3

* single pass fetch and process

* remove unnecessary re-embed

* update deps

* move SqliteVecQueueDatabase to db directory

* make search thumbnail take up full size of results box

* improve typing

* improve model downloading and add status screen

* daemon downloading thread

* catch case when semantic search is disabled

* fix typing

* build sqlite_vec from source

* resolve conflict

* file permissions

* try build deps

* remove sources

* sources

* fix thread start

* include git in build

* reorder embeddings after detectors are started

* build with sqlite amalgamation

* non-platform specific

* use wget instead of curl

* remove unzip -d

* remove sqlite_vec from requirements and load the compiled version

* fix build

* avoid race in db connection

* add scale_factor and bias to description zscore normalization
This commit is contained in:
Josh Hawkins
2024-10-07 15:30:45 -05:00
committed by GitHub
parent 757150dec1
commit 24ac9f3e5a
42 changed files with 951 additions and 533 deletions

View File

@@ -1 +0,0 @@
chroma-pipeline

View File

@@ -1,4 +0,0 @@
#!/command/with-contenv bash
# shellcheck shell=bash
exec logutil-service /dev/shm/logs/chroma

View File

@@ -1,28 +0,0 @@
#!/command/with-contenv bash
# shellcheck shell=bash
# Take down the S6 supervision tree when the service exits
set -o errexit -o nounset -o pipefail
# Logs should be sent to stdout so that s6 can collect them
declare exit_code_container
exit_code_container=$(cat /run/s6-linux-init-container-results/exitcode)
readonly exit_code_container
readonly exit_code_service="${1}"
readonly exit_code_signal="${2}"
readonly service="ChromaDB"
echo "[INFO] Service ${service} exited with code ${exit_code_service} (by signal ${exit_code_signal})"
if [[ "${exit_code_service}" -eq 256 ]]; then
if [[ "${exit_code_container}" -eq 0 ]]; then
echo $((128 + exit_code_signal)) >/run/s6-linux-init-container-results/exitcode
fi
elif [[ "${exit_code_service}" -ne 0 ]]; then
if [[ "${exit_code_container}" -eq 0 ]]; then
echo "${exit_code_service}" >/run/s6-linux-init-container-results/exitcode
fi
fi
exec /run/s6/basedir/bin/halt

View File

@@ -1,27 +0,0 @@
#!/command/with-contenv bash
# shellcheck shell=bash
# Start the Frigate service
set -o errexit -o nounset -o pipefail
# Logs should be sent to stdout so that s6 can collect them
# Tell S6-Overlay not to restart this service
s6-svc -O .
search_enabled=`python3 /usr/local/semantic_search/get_search_settings.py | jq -r .enabled`
# Replace the bash process with the Frigate process, redirecting stderr to stdout
exec 2>&1
if [[ "$search_enabled" == 'true' ]]; then
echo "[INFO] Starting ChromaDB..."
exec /usr/local/chroma run --path /config/chroma --host 127.0.0.1
else
while true
do
sleep 9999
continue
done
exit 0
fi

View File

@@ -1 +0,0 @@
longrun

View File

@@ -4,7 +4,7 @@
set -o errexit -o nounset -o pipefail
dirs=(/dev/shm/logs/frigate /dev/shm/logs/go2rtc /dev/shm/logs/nginx /dev/shm/logs/certsync /dev/shm/logs/chroma)
dirs=(/dev/shm/logs/frigate /dev/shm/logs/go2rtc /dev/shm/logs/nginx /dev/shm/logs/certsync)
mkdir -p "${dirs[@]}"
chown nobody:nogroup "${dirs[@]}"