Search backend refactor with typesense impl (#5528)

* initial elasticsearch impl

* working elastic cluster

* replace SearchError with ApiError for preparation of search backend

* start factoring meili out to trait

* move meili to backend

* update routes to use search backend trait

* wip

* Update projects.rs

* search backend is only init'd once in config

* wip

* wip: backend agnostic

* change search internal routes to delegate to backend

* initial elasticsearch impl

* fix filtering

* elastic impl

* refactor indexing into its own module

* clean up elastic code

* fix ci

* fix tests

* fix elastic health check

* fix up env rebase

* fix compile

* dummy commit to update github pr

* Fix rebase

* Elastic basic https auth

* Fix duplicate projects showing up

* Fix up tests

* Replace search `ApiErrors` with `eyre::Reports`, propagate background task errors

* clean up agents files

* make index chunk size configurable

* make `match_phrase` in elastic case-insensitive

* use current/next indices and swap between them

* test case for error body

* Fix failing case

* da merge

* factor out common stuff from search backends

* allow fetching hit metadata from search results

* allow customising elasticsearch search config

* bit of docs

* add mappings to indices for elastic

* Implement Typesense

* wip

* fix up some sort fields stuff

* use different approach to filterable field sets

* remove a bunch of search fields which weren't used for filtering

* bucket text matches

* Bucketing by text_match for typesense

* fix tombi lint

* fix some sentry errors and dont prioritise 2+ term matches

* tweak ts query settings

* expose some more search settings

* query sort changes

* small fixes

* should fix pagination stuff

* fix healthcheck maybe

* ragebait ci

* tests

* tests

* revert environment
This commit is contained in:
aecsocket
2026-03-12 17:58:55 +00:00
committed by GitHub
parent 1c1683adb6
commit f0224dfff7
36 changed files with 3848 additions and 762 deletions

View File

@@ -7,7 +7,7 @@ use crate::models::pats::Scopes;
use crate::queue::analytics::AnalyticsQueue;
use crate::queue::session::AuthQueue;
use crate::routes::ApiError;
use crate::search::SearchConfig;
use crate::search::SearchBackend;
use crate::util::date::get_current_tenths_of_ms;
use crate::util::error::Context;
use crate::util::guards::admin_key_guard;
@@ -154,11 +154,11 @@ pub async fn count_download(
pub async fn force_reindex(
pool: web::Data<PgPool>,
redis: web::Data<RedisPool>,
config: web::Data<SearchConfig>,
search_backend: web::Data<dyn SearchBackend>,
) -> Result<HttpResponse, ApiError> {
use crate::search::indexing::index_projects;
let redis = redis.get_ref();
index_projects(pool.as_ref().clone(), redis.clone(), &config)
search_backend
.index_projects(pool.as_ref().clone(), redis.clone())
.await
.wrap_internal_err("failed to index projects")?;
Ok(HttpResponse::NoContent().finish())

View File

@@ -1,12 +1,9 @@
use crate::routes::ApiError;
use crate::search::SearchConfig;
use crate::util::guards::admin_key_guard;
use actix_web::{HttpResponse, delete, get, web};
use meilisearch_sdk::tasks::{Task, TasksCancelQuery};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::time::Duration;
use utoipa::ToSchema;
use crate::{
routes::ApiError,
search::{SearchBackend, TasksCancelFilter},
};
use actix_web::{delete, get, web};
pub fn config(cfg: &mut utoipa_actix_web::service_config::ServiceConfig) {
cfg.service(tasks).service(tasks_cancel);
@@ -15,107 +12,20 @@ pub fn config(cfg: &mut utoipa_actix_web::service_config::ServiceConfig) {
#[utoipa::path]
#[get("tasks", guard = "admin_key_guard")]
pub async fn tasks(
config: web::Data<SearchConfig>,
) -> Result<HttpResponse, ApiError> {
let client = config.make_batch_client()?;
let tasks = client
.with_all_clients("get_tasks", async |client| {
let tasks = client.get_tasks().await?;
Ok(tasks.results)
})
.await?;
#[derive(Serialize, ToSchema)]
struct MeiliTask<Time> {
uid: u32,
status: &'static str,
duration: Option<Duration>,
enqueued_at: Option<Time>,
}
#[derive(Serialize, ToSchema)]
struct TaskList<Time> {
by_instance: HashMap<String, Vec<MeiliTask<Time>>>,
}
let response = tasks
.into_iter()
.enumerate()
.map(|(idx, instance_tasks)| {
let tasks = instance_tasks
.into_iter()
.filter_map(|task| {
Some(match task {
Task::Enqueued { content } => MeiliTask {
uid: content.uid,
status: "enqueued",
duration: None,
enqueued_at: Some(content.enqueued_at),
},
Task::Processing { content } => MeiliTask {
uid: content.uid,
status: "processing",
duration: None,
enqueued_at: Some(content.enqueued_at),
},
Task::Failed { content } => MeiliTask {
uid: content.task.uid,
status: "failed",
duration: Some(content.task.duration),
enqueued_at: Some(content.task.enqueued_at),
},
Task::Succeeded { content: _ } => return None,
})
})
.collect();
(idx.to_string(), tasks)
})
.collect::<HashMap<String, Vec<MeiliTask<_>>>>();
Ok(HttpResponse::Ok().json(TaskList {
by_instance: response,
}))
}
#[derive(Deserialize, Serialize, ToSchema)]
#[serde(tag = "type", rename_all = "snake_case")]
enum TasksCancelFilter {
All,
AllEnqueued,
Indexes { indexes: Vec<String> },
search: web::Data<dyn SearchBackend>,
) -> Result<web::Json<serde_json::Value>, ApiError> {
Ok(web::Json(search.tasks().await.map_err(ApiError::Internal)?))
}
#[utoipa::path]
#[delete("tasks", guard = "admin_key_guard")]
pub async fn tasks_cancel(
config: web::Data<SearchConfig>,
search: web::Data<dyn SearchBackend>,
body: web::Json<TasksCancelFilter>,
) -> Result<HttpResponse, ApiError> {
let client = config.make_batch_client()?;
let all_results = client
.with_all_clients("cancel_tasks", async |client| {
let mut q = TasksCancelQuery::new(client);
match &body.0 {
TasksCancelFilter::All => {}
TasksCancelFilter::Indexes { indexes } => {
q.with_index_uids(indexes.iter().map(|s| s.as_str()));
}
TasksCancelFilter::AllEnqueued => {
q.with_statuses(["enqueued"]);
}
};
let result = client.cancel_tasks_with(&q).await;
Ok(result)
})
.await?;
for r in all_results {
r?;
}
Ok(HttpResponse::Ok().finish())
) -> Result<(), ApiError> {
search
.tasks_cancel(&body)
.await
.map_err(ApiError::Internal)?;
Ok(())
}

View File

@@ -93,8 +93,6 @@ pub enum ApiError {
Auth(eyre::Report),
#[error("Invalid input: {0}")]
InvalidInput(String),
#[error("Environment error")]
Env(#[from] dotenvy::Error),
#[error("Error while uploading file: {0}")]
FileHosting(#[from] FileHostingError),
#[error("database error")]
@@ -117,8 +115,6 @@ pub enum ApiError {
Validation(String),
#[error("Search error: {0}")]
Search(#[from] meilisearch_sdk::errors::Error),
#[error("search indexing error")]
Indexing(#[from] crate::search::indexing::IndexingError),
#[error("Payments error: {0}")]
Payments(String),
#[error("Discord error: {0}")]
@@ -176,7 +172,6 @@ impl ApiError {
Self::Internal(..) => "internal_error",
Self::Request(..) => "request_error",
Self::Auth(..) => "auth_error",
Self::Env(..) => "environment_error",
Self::Database(..) => "database_error",
Self::SqlxDatabase(..) => "database_error",
Self::RedisDatabase(..) => "database_error",
@@ -185,7 +180,6 @@ impl ApiError {
Self::Xml(..) => "xml_error",
Self::Json(..) => "json_error",
Self::Search(..) => "search_error",
Self::Indexing(..) => "indexing_error",
Self::FileHosting(..) => "file_hosting_error",
Self::InvalidInput(..) => "invalid_input",
Self::Validation(..) => "invalid_input",
@@ -241,7 +235,6 @@ impl actix_web::ResponseError for ApiError {
Self::Request(..) => StatusCode::BAD_REQUEST,
Self::Auth(..) => StatusCode::UNAUTHORIZED,
Self::InvalidInput(..) => StatusCode::BAD_REQUEST,
Self::Env(..) => StatusCode::INTERNAL_SERVER_ERROR,
Self::Database(..) => StatusCode::INTERNAL_SERVER_ERROR,
Self::SqlxDatabase(..) => StatusCode::INTERNAL_SERVER_ERROR,
Self::RedisDatabase(..) => StatusCode::INTERNAL_SERVER_ERROR,
@@ -251,7 +244,6 @@ impl actix_web::ResponseError for ApiError {
Self::Xml(..) => StatusCode::INTERNAL_SERVER_ERROR,
Self::Json(..) => StatusCode::BAD_REQUEST,
Self::Search(..) => StatusCode::INTERNAL_SERVER_ERROR,
Self::Indexing(..) => StatusCode::INTERNAL_SERVER_ERROR,
Self::FileHosting(..) => StatusCode::INTERNAL_SERVER_ERROR,
Self::Validation(..) => StatusCode::BAD_REQUEST,
Self::Payments(..) => StatusCode::FAILED_DEPENDENCY,

View File

@@ -4,7 +4,7 @@ use crate::database::models::{project_item, version_item};
use crate::database::redis::RedisPool;
use crate::file_hosting::FileHost;
use crate::models::projects::{
Link, MonetizationStatus, Project, ProjectStatus, SearchRequest, Version,
Link, MonetizationStatus, Project, ProjectStatus, Version,
};
use crate::models::v2::projects::{
DonationLink, LegacyProject, LegacySideType, LegacyVersion,
@@ -14,7 +14,7 @@ use crate::queue::moderation::AutomatedModerationQueue;
use crate::queue::session::AuthQueue;
use crate::routes::v3::projects::ProjectIds;
use crate::routes::{ApiError, v2_reroute, v3};
use crate::search::{SearchConfig, SearchError, search_for_project};
use crate::search::{SearchBackend, SearchRequest};
use actix_web::{HttpRequest, HttpResponse, delete, get, patch, post, web};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
@@ -53,9 +53,9 @@ pub fn config(cfg: &mut web::ServiceConfig) {
#[get("search")]
pub async fn project_search(
web::Query(info): web::Query<SearchRequest>,
config: web::Data<SearchConfig>,
search_backend: web::Data<dyn SearchBackend>,
redis: web::Data<RedisPool>,
) -> Result<HttpResponse, SearchError> {
) -> Result<HttpResponse, ApiError> {
// Search now uses loader_fields instead of explicit 'client_side' and 'server_side' fields
// While the backend for this has changed, it doesnt affect much
// in the API calls except that 'versions:x' is now 'game_versions:x'
@@ -100,7 +100,7 @@ pub async fn project_search(
..info
};
let results = search_for_project(&info, &config, &redis).await?;
let results = search_backend.search_for_project(&info, &redis).await?;
let results = LegacySearchResults::from(results);
@@ -410,7 +410,7 @@ pub async fn project_edit(
req: HttpRequest,
info: web::Path<(String,)>,
pool: web::Data<PgPool>,
search_config: web::Data<SearchConfig>,
search_backend: web::Data<dyn SearchBackend>,
new_project: web::Json<EditProject>,
redis: web::Data<RedisPool>,
session_queue: web::Data<AuthQueue>,
@@ -524,7 +524,7 @@ pub async fn project_edit(
req.clone(),
info,
pool.clone(),
search_config,
search_backend,
web::Json(new_project),
redis.clone(),
session_queue.clone(),
@@ -918,7 +918,7 @@ pub async fn project_delete(
info: web::Path<(String,)>,
pool: web::Data<PgPool>,
redis: web::Data<RedisPool>,
search_config: web::Data<SearchConfig>,
search_backend: web::Data<dyn SearchBackend>,
session_queue: web::Data<AuthQueue>,
) -> Result<HttpResponse, ApiError> {
// Returns NoContent, so no need to convert
@@ -927,7 +927,7 @@ pub async fn project_delete(
info,
pool,
redis,
search_config,
search_backend,
session_queue,
)
.await

View File

@@ -11,7 +11,7 @@ use crate::models::projects::{
use crate::models::v2::projects::LegacyVersion;
use crate::queue::session::AuthQueue;
use crate::routes::{v2_reroute, v3};
use crate::search::SearchConfig;
use crate::search::SearchBackend;
use actix_web::{HttpRequest, HttpResponse, delete, get, patch, web};
use serde::{Deserialize, Serialize};
use validator::Validate;
@@ -357,7 +357,7 @@ pub async fn version_delete(
pool: web::Data<PgPool>,
redis: web::Data<RedisPool>,
session_queue: web::Data<AuthQueue>,
search_config: web::Data<SearchConfig>,
search_backend: web::Data<dyn SearchBackend>,
) -> Result<HttpResponse, ApiError> {
// Returns NoContent, so we don't need to convert the response
v3::versions::version_delete(
@@ -366,7 +366,7 @@ pub async fn version_delete(
pool,
redis,
session_queue,
search_config,
search_backend,
)
.await
.or_else(v2_reroute::flatten_404_error)

View File

@@ -22,7 +22,6 @@ use crate::models::teams::{OrganizationPermissions, ProjectPermissions};
use crate::models::threads::ThreadType;
use crate::models::v3::user_limits::UserLimits;
use crate::queue::session::AuthQueue;
use crate::search::indexing::IndexingError;
use crate::util::guards::admin_key_guard;
use crate::util::http::HttpClient;
use crate::util::img::upload_image_optimized;
@@ -55,14 +54,10 @@ pub fn config(cfg: &mut utoipa_actix_web::service_config::ServiceConfig) {
#[derive(Error, Debug)]
pub enum CreateError {
#[error("Environment Error")]
EnvError(#[from] dotenvy::Error),
#[error("An unknown database error occurred")]
SqlxDatabaseError(#[from] sqlx::Error),
#[error("Database Error: {0}")]
DatabaseError(#[from] models::DatabaseError),
#[error("Indexing Error: {0}")]
IndexingError(#[from] IndexingError),
#[error("Error while parsing multipart payload: {0}")]
MultipartError(#[from] actix_multipart::MultipartError),
#[error("Error while parsing JSON: {0}")]
@@ -126,12 +121,10 @@ impl From<crate::routes::ApiError> for CreateError {
impl actix_web::ResponseError for CreateError {
fn status_code(&self) -> StatusCode {
match self {
CreateError::EnvError(..) => StatusCode::INTERNAL_SERVER_ERROR,
CreateError::SqlxDatabaseError(..) => {
StatusCode::INTERNAL_SERVER_ERROR
}
CreateError::DatabaseError(..) => StatusCode::INTERNAL_SERVER_ERROR,
CreateError::IndexingError(..) => StatusCode::INTERNAL_SERVER_ERROR,
CreateError::FileHostingError(..) => {
StatusCode::INTERNAL_SERVER_ERROR
}
@@ -159,10 +152,8 @@ impl actix_web::ResponseError for CreateError {
fn error_response(&self) -> HttpResponse {
HttpResponse::build(self.status_code()).json(ApiError {
error: match self {
CreateError::EnvError(..) => "environment_error",
CreateError::SqlxDatabaseError(..) => "database_error",
CreateError::DatabaseError(..) => "database_error",
CreateError::IndexingError(..) => "indexing_error",
CreateError::FileHostingError(..) => "file_hosting_error",
CreateError::SerDeError(..) => "invalid_input",
CreateError::MultipartError(..) => "invalid_input",

View File

@@ -20,8 +20,7 @@ use crate::models::images::ImageContext;
use crate::models::notifications::NotificationBody;
use crate::models::pats::Scopes;
use crate::models::projects::{
MonetizationStatus, Project, ProjectStatus, SearchRequest,
SideTypesMigrationReviewStatus,
MonetizationStatus, Project, ProjectStatus, SideTypesMigrationReviewStatus,
};
use crate::models::teams::ProjectPermissions;
use crate::models::threads::MessageBody;
@@ -30,8 +29,7 @@ use crate::queue::moderation::AutomatedModerationQueue;
use crate::queue::session::AuthQueue;
use crate::routes::ApiError;
use crate::routes::internal::delphi;
use crate::search::indexing::remove_documents;
use crate::search::{SearchConfig, SearchError, search_for_project};
use crate::search::{SearchBackend, SearchQuery, SearchRequest, SearchResults};
use crate::util::error::Context;
use crate::util::img;
use crate::util::img::{delete_old_images, upload_image_optimized};
@@ -48,6 +46,7 @@ use validator::Validate;
pub fn config(cfg: &mut web::ServiceConfig) {
cfg.route("search", web::get().to(project_search));
cfg.service(project_search_post);
cfg.route("projects", web::get().to(projects_get));
cfg.route("projects", web::patch().to(projects_edit));
cfg.route("projects_random", web::get().to(random_projects_get));
@@ -292,7 +291,7 @@ async fn project_edit(
req: HttpRequest,
info: web::Path<(String,)>,
pool: web::Data<PgPool>,
search_config: web::Data<SearchConfig>,
search_backend: web::Data<dyn SearchBackend>,
web::Json(new_project): web::Json<EditProject>,
redis: web::Data<RedisPool>,
session_queue: web::Data<AuthQueue>,
@@ -302,7 +301,7 @@ async fn project_edit(
req,
info,
pool,
search_config,
search_backend,
web::Json(new_project),
redis,
session_queue,
@@ -315,7 +314,7 @@ pub async fn project_edit_internal(
req: HttpRequest,
info: web::Path<(String,)>,
pool: web::Data<PgPool>,
search_config: web::Data<SearchConfig>,
search_backend: web::Data<dyn SearchBackend>,
web::Json(new_project): web::Json<EditProject>,
redis: web::Data<RedisPool>,
session_queue: web::Data<AuthQueue>,
@@ -1126,16 +1125,16 @@ pub async fn project_edit_internal(
project_item.inner.status.is_searchable(),
new_project.status.map(|status| status.is_searchable()),
) {
remove_documents(
&project_item
.versions
.into_iter()
.map(|x| x.into())
.collect::<Vec<_>>(),
&search_config,
)
.await
.wrap_internal_err("failed to remove documents")?;
search_backend
.remove_documents(
&project_item
.versions
.into_iter()
.map(|x| x.into())
.collect::<Vec<_>>(),
)
.await
.wrap_internal_err("failed to remove documents")?;
}
Ok(HttpResponse::NoContent().body(""))
@@ -1190,11 +1189,13 @@ pub async fn edit_project_categories(
// }
pub async fn project_search(
web::Query(info): web::Query<SearchRequest>,
config: web::Data<SearchConfig>,
web::Query(info): web::Query<SearchQuery>,
search_backend: web::Data<dyn SearchBackend>,
redis: web::Data<RedisPool>,
) -> Result<HttpResponse, SearchError> {
let results = search_for_project(&info, &config, &redis).await?;
) -> Result<web::Json<SearchResults>, ApiError> {
let results = search_backend
.search_for_project(&SearchRequest::from(info), &redis)
.await?;
// TODO: add this back
// let results = ReturnSearchResults {
@@ -1208,7 +1209,18 @@ pub async fn project_search(
// total_hits: results.total_hits,
// };
Ok(HttpResponse::Ok().json(results))
Ok(web::Json(results))
}
// for more complicated search queries
#[post("/search")]
pub async fn project_search_post(
web::Json(info): web::Json<SearchRequest>,
search_backend: web::Data<dyn SearchBackend>,
redis: web::Data<RedisPool>,
) -> Result<web::Json<SearchResults>, ApiError> {
let results = search_backend.search_for_project(&info, &redis).await?;
Ok(web::Json(results))
}
//checks the validity of a project id or slug
@@ -2452,7 +2464,7 @@ async fn project_delete(
info: web::Path<(String,)>,
pool: web::Data<PgPool>,
redis: web::Data<RedisPool>,
search_config: web::Data<SearchConfig>,
search_backend: web::Data<dyn SearchBackend>,
session_queue: web::Data<AuthQueue>,
) -> Result<(), ApiError> {
project_delete_internal(
@@ -2460,7 +2472,7 @@ async fn project_delete(
info,
pool,
redis,
search_config,
search_backend,
session_queue,
)
.await
@@ -2471,7 +2483,7 @@ pub async fn project_delete_internal(
info: web::Path<(String,)>,
pool: web::Data<PgPool>,
redis: web::Data<RedisPool>,
search_config: web::Data<SearchConfig>,
search_backend: web::Data<dyn SearchBackend>,
session_queue: web::Data<AuthQueue>,
) -> Result<(), ApiError> {
let (_, user) = get_user_from_headers(
@@ -2583,16 +2595,16 @@ pub async fn project_delete_internal(
.await
.wrap_internal_err("failed to commit transaction")?;
remove_documents(
&project
.versions
.into_iter()
.map(|x| x.into())
.collect::<Vec<_>>(),
&search_config,
)
.await
.wrap_internal_err("failed to remove project version documents")?;
search_backend
.remove_documents(
&project
.versions
.into_iter()
.map(|x| x.into())
.collect::<Vec<_>>(),
)
.await
.wrap_internal_err("failed to remove project version documents")?;
if result.is_some() {
Ok(())

View File

@@ -26,8 +26,7 @@ use crate::models::projects::{Loader, skip_nulls};
use crate::models::teams::ProjectPermissions;
use crate::queue::session::AuthQueue;
use crate::routes::internal::delphi;
use crate::search::SearchConfig;
use crate::search::indexing::remove_documents;
use crate::search::SearchBackend;
use crate::util::error::Context;
use crate::util::img;
use crate::util::validate::validation_errors_to_string;
@@ -915,7 +914,7 @@ pub async fn version_delete(
pool: web::Data<PgPool>,
redis: web::Data<RedisPool>,
session_queue: web::Data<AuthQueue>,
search_config: web::Data<SearchConfig>,
search_backend: web::Data<dyn SearchBackend>,
) -> Result<HttpResponse, ApiError> {
let user = get_user_from_headers(
&req,
@@ -1022,10 +1021,10 @@ pub async fn version_delete(
&redis,
)
.await?;
remove_documents(&[version.inner.id.into()], &search_config)
search_backend
.remove_documents(&[version.inner.id.into()])
.await
.wrap_internal_err("failed to remove documents")?;
if result.is_some() {
Ok(HttpResponse::NoContent().body(""))
} else {