Search backend refactor with typesense impl (#5528)

* initial elasticsearch impl

* working elastic cluster

* replace SearchError with ApiError for preparation of search backend

* start factoring meili out to trait

* move meili to backend

* update routes to use search backend trait

* wip

* Update projects.rs

* search backend is only init'd once in config

* wip

* wip: backend agnostic

* change search internal routes to delegate to backend

* initial elasticsearch impl

* fix filtering

* elastic impl

* refactor indexing into its own module

* clean up elastic code

* fix ci

* fix tests

* fix elastic health check

* fix up env rebase

* fix compile

* dummy commit to update github pr

* Fix rebase

* Elastic basic https auth

* Fix duplicate projects showing up

* Fix up tests

* Replace search `ApiErrors` with `eyre::Reports`, propagate background task errors

* clean up agents files

* make index chunk size configurable

* make `match_phrase` in elastic case-insensitive

* use current/next indices and swap between them

* test case for error body

* Fix failing case

* da merge

* factor out common stuff from search backends

* allow fetching hit metadata from search results

* allow customising elasticsearch search config

* bit of docs

* add mappings to indices for elastic

* Implement Typesense

* wip

* fix up some sort fields stuff

* use different approach to filterable field sets

* remove a bunch of search fields which weren't used for filtering

* bucket text matches

* Bucketing by text_match for typesense

* fix tombi lint

* fix some sentry errors and dont prioritise 2+ term matches

* tweak ts query settings

* expose some more search settings

* query sort changes

* small fixes

* should fix pagination stuff

* fix healthcheck maybe

* ragebait ci

* tests

* tests

* revert environment
This commit is contained in:
aecsocket
2026-03-12 17:58:55 +00:00
committed by GitHub
parent 1c1683adb6
commit f0224dfff7
36 changed files with 3848 additions and 762 deletions

View File

@@ -18,6 +18,20 @@ services:
interval: 3s
timeout: 5s
retries: 3
typesense0:
image: typesense/typesense:30.1
container_name: labrinth-typesense0
restart: on-failure
ports:
- '127.0.0.1:8108:8108'
volumes:
- typesense-data:/data
command: --data-dir=/data --api-key=modrinth --enable-cors
healthcheck:
test: ['CMD-SHELL', "bash -lc '</dev/tcp/127.0.0.1/8108'"]
interval: 3s
timeout: 5s
retries: 3
meilisearch0:
image: getmeili/meilisearch:v1.12.0
container_name: labrinth-meilisearch0
@@ -37,6 +51,167 @@ services:
interval: 3s
timeout: 5s
retries: 3
elasticsearch-certs:
image: elasticsearch:9.3.0
container_name: labrinth-elasticsearch-certs
user: '0'
networks:
- elasticsearch-mesh
restart: 'no'
volumes:
- elasticsearch-certs:/usr/share/elasticsearch/config/certs
command: |
bash -c '
set -euo pipefail
if [ ! -s config/certs/ca/ca.crt ] || [ ! -s config/certs/elasticsearch0/elasticsearch0.crt ] || [ ! -s config/certs/elasticsearch1/elasticsearch1.crt ] || [ ! -s config/certs/elasticsearch2/elasticsearch2.crt ]; then
rm -rf config/certs/*
printf "%s\n" \
"instances:" \
" - name: elasticsearch0" \
" dns:" \
" - elasticsearch0" \
" - localhost" \
" ip:" \
" - 127.0.0.1" \
" - name: elasticsearch1" \
" dns:" \
" - elasticsearch1" \
" - localhost" \
" ip:" \
" - 127.0.0.1" \
" - name: elasticsearch2" \
" dns:" \
" - elasticsearch2" \
" - localhost" \
" ip:" \
" - 127.0.0.1" \
> config/certs/instances.yml
bin/elasticsearch-certutil ca --silent --pem --out config/certs/ca.zip
unzip config/certs/ca.zip -d config/certs
bin/elasticsearch-certutil cert --silent --pem --in config/certs/instances.yml --ca-cert config/certs/ca/ca.crt --ca-key config/certs/ca/ca.key --out config/certs/certs.zip
unzip config/certs/certs.zip -d config/certs
fi
chown -R 1000:0 config/certs
find config/certs -type d -exec chmod 750 {} \;
find config/certs -type f -exec chmod 640 {} \;
echo "Set up certificates"
'
elasticsearch0:
image: elasticsearch:9.3.0
container_name: labrinth-elasticsearch0
networks:
- elasticsearch-mesh
restart: on-failure
depends_on:
elasticsearch-certs:
condition: service_completed_successfully
ports:
- '127.0.0.1:9200:9200'
volumes:
- elasticsearch0-data:/usr/share/elasticsearch/data
- elasticsearch-certs:/usr/share/elasticsearch/config/certs:ro
environment:
- logger.level=WARN
- node.name=elasticsearch0
- cluster.name=labrinth
- cluster.initial_master_nodes=elasticsearch0,elasticsearch1,elasticsearch2
- discovery.seed_hosts=elasticsearch1,elasticsearch2
- bootstrap.memory_lock=false
# auth
- xpack.security.enabled=true
- xpack.security.transport.ssl.enabled=true
- xpack.security.transport.ssl.verification_mode=certificate
- xpack.security.transport.ssl.key=certs/elasticsearch0/elasticsearch0.key
- xpack.security.transport.ssl.certificate=certs/elasticsearch0/elasticsearch0.crt
- xpack.security.transport.ssl.certificate_authorities=certs/ca/ca.crt
- ELASTIC_USERNAME=elastic
- ELASTIC_PASSWORD=elastic
mem_limit: 1g
healthcheck:
test:
[
'CMD-SHELL',
'curl -s -u elastic:elastic http://localhost:9200/_cluster/health | grep -qE "\"status\":\"(yellow|green)\""',
]
interval: 10s
timeout: 5s
retries: 10
elasticsearch1:
image: elasticsearch:9.3.0
container_name: labrinth-elasticsearch1
networks:
- elasticsearch-mesh
restart: on-failure
depends_on:
elasticsearch-certs:
condition: service_completed_successfully
volumes:
- elasticsearch1-data:/usr/share/elasticsearch/data
- elasticsearch-certs:/usr/share/elasticsearch/config/certs:ro
environment:
- logger.level=WARN
- node.name=elasticsearch1
- cluster.name=labrinth
- cluster.initial_master_nodes=elasticsearch0,elasticsearch1,elasticsearch2
- discovery.seed_hosts=elasticsearch0,elasticsearch2
- bootstrap.memory_lock=false
# auth
- xpack.security.enabled=true
- xpack.security.transport.ssl.enabled=true
- xpack.security.transport.ssl.verification_mode=certificate
- xpack.security.transport.ssl.key=certs/elasticsearch1/elasticsearch1.key
- xpack.security.transport.ssl.certificate=certs/elasticsearch1/elasticsearch1.crt
- xpack.security.transport.ssl.certificate_authorities=certs/ca/ca.crt
- ELASTIC_USERNAME=elastic
- ELASTIC_PASSWORD=elastic
mem_limit: 1g
healthcheck:
test:
[
'CMD-SHELL',
'curl -s -u elastic:elastic http://localhost:9200/_cluster/health | grep -qE "\"status\":\"(yellow|green)\""',
]
interval: 10s
timeout: 5s
retries: 10
elasticsearch2:
image: elasticsearch:9.3.0
container_name: labrinth-elasticsearch2
networks:
- elasticsearch-mesh
restart: on-failure
depends_on:
elasticsearch-certs:
condition: service_completed_successfully
volumes:
- elasticsearch2-data:/usr/share/elasticsearch/data
- elasticsearch-certs:/usr/share/elasticsearch/config/certs:ro
environment:
- logger.level=WARN
- node.name=elasticsearch2
- cluster.name=labrinth
- cluster.initial_master_nodes=elasticsearch0,elasticsearch1,elasticsearch2
- discovery.seed_hosts=elasticsearch0,elasticsearch1
- bootstrap.memory_lock=false
# auth
- xpack.security.enabled=true
- xpack.security.transport.ssl.enabled=true
- xpack.security.transport.ssl.verification_mode=certificate
- xpack.security.transport.ssl.key=certs/elasticsearch2/elasticsearch2.key
- xpack.security.transport.ssl.certificate=certs/elasticsearch2/elasticsearch2.crt
- xpack.security.transport.ssl.certificate_authorities=certs/ca/ca.crt
- ELASTIC_USERNAME=elastic
- ELASTIC_PASSWORD=elastic
mem_limit: 1g
healthcheck:
test:
[
'CMD-SHELL',
'curl -s -u elastic:elastic http://localhost:9200/_cluster/health | grep -qE "\"status\":\"(yellow|green)\""',
]
interval: 10s
timeout: 5s
retries: 10
redis:
image: redis:alpine
container_name: labrinth-redis
@@ -109,6 +284,12 @@ services:
condition: service_healthy
meilisearch:
condition: service_healthy
elasticsearch0:
condition: service_healthy
elasticsearch1:
condition: service_healthy
elasticsearch2:
condition: service_healthy
redis:
condition: service_healthy
clickhouse:
@@ -143,7 +324,6 @@ services:
# Delphi must send a message on a webhook to our backend,
# so it must have access to our local network
- 'host.docker.internal:host-gateway'
# Sharded Meilisearch
meilisearch1:
profiles:
@@ -166,7 +346,6 @@ services:
interval: 3s
timeout: 5s
retries: 3
nginx-meilisearch-lb:
profiles:
- sharded-meilisearch
@@ -186,9 +365,16 @@ services:
networks:
meilisearch-mesh:
driver: bridge
elasticsearch-mesh:
driver: bridge
volumes:
typesense-data:
meilisearch-data:
meilisearch1-data:
elasticsearch0-data:
elasticsearch1-data:
elasticsearch2-data:
elasticsearch-certs:
db-data:
redis-data:
labrinth-cdn-data: