Search backend refactor with typesense impl (#5528)
* initial elasticsearch impl * working elastic cluster * replace SearchError with ApiError for preparation of search backend * start factoring meili out to trait * move meili to backend * update routes to use search backend trait * wip * Update projects.rs * search backend is only init'd once in config * wip * wip: backend agnostic * change search internal routes to delegate to backend * initial elasticsearch impl * fix filtering * elastic impl * refactor indexing into its own module * clean up elastic code * fix ci * fix tests * fix elastic health check * fix up env rebase * fix compile * dummy commit to update github pr * Fix rebase * Elastic basic https auth * Fix duplicate projects showing up * Fix up tests * Replace search `ApiErrors` with `eyre::Reports`, propagate background task errors * clean up agents files * make index chunk size configurable * make `match_phrase` in elastic case-insensitive * use current/next indices and swap between them * test case for error body * Fix failing case * da merge * factor out common stuff from search backends * allow fetching hit metadata from search results * allow customising elasticsearch search config * bit of docs * add mappings to indices for elastic * Implement Typesense * wip * fix up some sort fields stuff * use different approach to filterable field sets * remove a bunch of search fields which weren't used for filtering * bucket text matches * Bucketing by text_match for typesense * fix tombi lint * fix some sentry errors and dont prioritise 2+ term matches * tweak ts query settings * expose some more search settings * query sort changes * small fixes * should fix pagination stuff * fix healthcheck maybe * ragebait ci * tests * tests * revert environment
This commit is contained in:
123
apps/labrinth/src/search/backend/common.rs
Normal file
123
apps/labrinth/src/search/backend/common.rs
Normal file
@@ -0,0 +1,123 @@
|
||||
use crate::routes::ApiError;
|
||||
use crate::search::SearchRequest;
|
||||
use crate::util::error::Context;
|
||||
use eyre::eyre;
|
||||
use std::borrow::Cow;
|
||||
|
||||
pub struct ParsedSearchRequest<'a> {
|
||||
pub offset: usize,
|
||||
pub hits_per_page: usize,
|
||||
pub page: usize,
|
||||
pub index: &'a str,
|
||||
pub query: &'a str,
|
||||
}
|
||||
|
||||
pub fn parse_search_request(
|
||||
info: &SearchRequest,
|
||||
) -> Result<ParsedSearchRequest<'_>, ApiError> {
|
||||
let offset = info
|
||||
.offset
|
||||
.as_deref()
|
||||
.unwrap_or("0")
|
||||
.parse::<usize>()
|
||||
.wrap_request_err("invalid offset")?;
|
||||
let limit = info
|
||||
.limit
|
||||
.as_deref()
|
||||
.unwrap_or("10")
|
||||
.parse::<usize>()
|
||||
.wrap_request_err("invalid limit")?
|
||||
.min(100);
|
||||
let hits_per_page = if limit == 0 { 1 } else { limit };
|
||||
|
||||
Ok(ParsedSearchRequest {
|
||||
offset,
|
||||
hits_per_page,
|
||||
page: offset / hits_per_page + 1,
|
||||
index: info.index.as_deref().unwrap_or("relevance"),
|
||||
query: info.query.as_deref().unwrap_or_default(),
|
||||
})
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum SearchIndex {
|
||||
Relevance,
|
||||
Downloads,
|
||||
Follows,
|
||||
Updated,
|
||||
Newest,
|
||||
MinecraftJavaServerVerifiedPlays2w,
|
||||
MinecraftJavaServerPlayersOnline,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum SearchIndexName {
|
||||
Projects,
|
||||
ProjectsFiltered,
|
||||
}
|
||||
|
||||
pub struct SearchSort {
|
||||
pub index_name: SearchIndexName,
|
||||
pub index: SearchIndex,
|
||||
}
|
||||
|
||||
pub fn parse_search_index(
|
||||
index: &str,
|
||||
new_filters: Option<&str>,
|
||||
) -> Result<SearchSort, ApiError> {
|
||||
let projects_name = SearchIndexName::Projects;
|
||||
let projects_filtered_name = SearchIndexName::ProjectsFiltered;
|
||||
|
||||
// TODO: this is a dumb hack, the frontend should pass the project type it's filtering directly
|
||||
let is_server = new_filters
|
||||
.is_some_and(|f| f.contains("project_types = minecraft_java_server"));
|
||||
|
||||
Ok(match index {
|
||||
"relevance" => SearchSort {
|
||||
index_name: projects_name,
|
||||
index: if is_server {
|
||||
SearchIndex::MinecraftJavaServerVerifiedPlays2w
|
||||
} else {
|
||||
SearchIndex::Relevance
|
||||
},
|
||||
},
|
||||
"downloads" => SearchSort {
|
||||
index_name: projects_filtered_name,
|
||||
index: SearchIndex::Downloads,
|
||||
},
|
||||
"follows" => SearchSort {
|
||||
index_name: projects_name,
|
||||
index: SearchIndex::Follows,
|
||||
},
|
||||
"updated" | "date_modified" => SearchSort {
|
||||
index_name: projects_name,
|
||||
index: SearchIndex::Updated,
|
||||
},
|
||||
"newest" | "date_created" => SearchSort {
|
||||
index_name: projects_name,
|
||||
index: SearchIndex::Newest,
|
||||
},
|
||||
"minecraft_java_server.verified_plays_2w" => SearchSort {
|
||||
index_name: projects_name,
|
||||
index: SearchIndex::MinecraftJavaServerVerifiedPlays2w,
|
||||
},
|
||||
"minecraft_java_server.ping.data.players_online" => SearchSort {
|
||||
index_name: projects_name,
|
||||
index: SearchIndex::MinecraftJavaServerPlayersOnline,
|
||||
},
|
||||
i => return Err(ApiError::Request(eyre!("invalid index '{i}'"))),
|
||||
})
|
||||
}
|
||||
|
||||
pub fn combined_search_filters(info: &SearchRequest) -> Option<Cow<'_, str>> {
|
||||
if let Some(filters) = info.new_filters.as_deref() {
|
||||
return Some(filters.into());
|
||||
}
|
||||
|
||||
match (info.filters.as_deref(), info.version.as_deref()) {
|
||||
(Some(f), Some(v)) => Some(format!("({f}) AND ({v})").into()),
|
||||
(Some(f), None) => Some(f.into()),
|
||||
(None, Some(v)) => Some(v.into()),
|
||||
(None, None) => None,
|
||||
}
|
||||
}
|
||||
1388
apps/labrinth/src/search/backend/elasticsearch/mod.rs
Normal file
1388
apps/labrinth/src/search/backend/elasticsearch/mod.rs
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,43 +1,23 @@
|
||||
/// This module is used for the indexing from any source.
|
||||
pub mod local_import;
|
||||
|
||||
use std::sync::LazyLock;
|
||||
use std::time::Duration;
|
||||
|
||||
use crate::database::PgPool;
|
||||
use crate::database::redis::RedisPool;
|
||||
use crate::env::ENV;
|
||||
use crate::search::{SearchConfig, UploadSearchProject};
|
||||
use crate::search::backend::meilisearch::MeilisearchConfig;
|
||||
use crate::search::indexing::index_local;
|
||||
use crate::search::{SearchField, UploadSearchProject};
|
||||
use crate::util::error::Context;
|
||||
use ariadne::ids::base62_impl::to_base62;
|
||||
use eyre::eyre;
|
||||
use eyre::{Result, eyre};
|
||||
use futures::StreamExt;
|
||||
use futures::stream::FuturesOrdered;
|
||||
use local_import::index_local;
|
||||
use meilisearch_sdk::client::{Client, SwapIndexes};
|
||||
use meilisearch_sdk::indexes::Index;
|
||||
use meilisearch_sdk::settings::{PaginationSetting, Settings};
|
||||
use meilisearch_sdk::task_info::TaskInfo;
|
||||
use thiserror::Error;
|
||||
use tracing::{Instrument, error, info, info_span, instrument};
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
pub enum IndexingError {
|
||||
#[error(transparent)]
|
||||
Internal(#[from] eyre::Report),
|
||||
#[error("Error while connecting to the MeiliSearch database")]
|
||||
Indexing(#[from] meilisearch_sdk::errors::Error),
|
||||
#[error("Error while serializing or deserializing JSON: {0}")]
|
||||
Serde(#[from] serde_json::Error),
|
||||
#[error("Database Error: {0}")]
|
||||
Sqlx(#[from] sqlx::error::Error),
|
||||
#[error("Database Error: {0}")]
|
||||
Database(#[from] crate::database::models::DatabaseError),
|
||||
#[error("Environment Error")]
|
||||
Env(#[from] dotenvy::Error),
|
||||
#[error("Error while awaiting index creation task")]
|
||||
Task,
|
||||
}
|
||||
|
||||
// // The chunk size for adding projects to the indexing database. If the request size
|
||||
// // is too large (>10MiB) then the request fails with an error. This chunk size
|
||||
// // assumes a max average size of 4KiB per project to avoid this cap.
|
||||
@@ -51,8 +31,8 @@ fn search_operation_timeout() -> std::time::Duration {
|
||||
|
||||
pub async fn remove_documents(
|
||||
ids: &[crate::models::ids::VersionId],
|
||||
config: &SearchConfig,
|
||||
) -> eyre::Result<()> {
|
||||
config: &MeilisearchConfig,
|
||||
) -> Result<()> {
|
||||
let mut indexes = get_indexes_for_indexing(config, false, false)
|
||||
.await
|
||||
.wrap_err("failed to get current indexes")?;
|
||||
@@ -108,8 +88,8 @@ pub async fn remove_documents(
|
||||
pub async fn index_projects(
|
||||
ro_pool: PgPool,
|
||||
redis: RedisPool,
|
||||
config: &SearchConfig,
|
||||
) -> eyre::Result<()> {
|
||||
config: &MeilisearchConfig,
|
||||
) -> Result<()> {
|
||||
info!("Indexing projects.");
|
||||
|
||||
info!("Ensuring current indexes exists");
|
||||
@@ -197,9 +177,9 @@ pub async fn index_projects(
|
||||
}
|
||||
|
||||
pub async fn swap_index(
|
||||
config: &SearchConfig,
|
||||
config: &MeilisearchConfig,
|
||||
index_name: &str,
|
||||
) -> Result<(), IndexingError> {
|
||||
) -> Result<()> {
|
||||
let client = config.make_batch_client()?;
|
||||
let index_name_next = config.get_index_name(index_name, true);
|
||||
let index_name = config.get_index_name(index_name, false);
|
||||
@@ -210,12 +190,13 @@ pub async fn swap_index(
|
||||
|
||||
let swap_indices_ref = &swap_indices;
|
||||
|
||||
// is it "indexes" or "indices"? who knows! roll a die!
|
||||
client
|
||||
.with_all_clients("swap_indexes", |client| async move {
|
||||
let task = client
|
||||
.swap_indexes([swap_indices_ref])
|
||||
.await
|
||||
.map_err(IndexingError::Indexing)?;
|
||||
.wrap_err("failed to swap indices")?;
|
||||
|
||||
monitor_task(
|
||||
client,
|
||||
@@ -233,10 +214,10 @@ pub async fn swap_index(
|
||||
|
||||
#[instrument(skip(config))]
|
||||
pub async fn get_indexes_for_indexing(
|
||||
config: &SearchConfig,
|
||||
config: &MeilisearchConfig,
|
||||
next: bool, // Get the 'next' one
|
||||
update_settings: bool,
|
||||
) -> Result<Vec<Vec<Index>>, IndexingError> {
|
||||
) -> Result<Vec<Vec<Index>>> {
|
||||
let client = config.make_batch_client()?;
|
||||
let project_name = config.get_index_name("projects", next);
|
||||
let project_filtered_name =
|
||||
@@ -381,7 +362,7 @@ async fn add_to_index(
|
||||
client: &Client,
|
||||
index: &Index,
|
||||
mods: &[UploadSearchProject],
|
||||
) -> Result<(), IndexingError> {
|
||||
) -> Result<()> {
|
||||
for chunk in mods.chunks(MEILISEARCH_CHUNK_SIZE) {
|
||||
info!(
|
||||
"Adding chunk of {} versions starting with version id {}",
|
||||
@@ -419,7 +400,7 @@ async fn monitor_task(
|
||||
task: TaskInfo,
|
||||
timeout: Duration,
|
||||
poll: Option<Duration>,
|
||||
) -> Result<(), IndexingError> {
|
||||
) -> Result<()> {
|
||||
let now = std::time::Instant::now();
|
||||
|
||||
let id = task.get_task_uid();
|
||||
@@ -465,7 +446,7 @@ async fn update_and_add_to_index(
|
||||
index: &Index,
|
||||
projects: &[UploadSearchProject],
|
||||
_additional_fields: &[String],
|
||||
) -> Result<(), IndexingError> {
|
||||
) -> Result<()> {
|
||||
// TODO: Uncomment this- hardcoding loader_fields is a band-aid fix, and will be fixed soon
|
||||
// let mut new_filterable_attributes: Vec<String> = index.get_filterable_attributes().await?;
|
||||
// let mut new_displayed_attributes = index.get_displayed_attributes().await?;
|
||||
@@ -509,8 +490,8 @@ pub async fn add_projects_batch_client(
|
||||
indices: &[Vec<Index>],
|
||||
projects: Vec<UploadSearchProject>,
|
||||
additional_fields: Vec<String>,
|
||||
config: &SearchConfig,
|
||||
) -> Result<(), IndexingError> {
|
||||
config: &MeilisearchConfig,
|
||||
) -> Result<()> {
|
||||
let client = config.make_batch_client()?;
|
||||
|
||||
let index_references = indices
|
||||
@@ -558,12 +539,92 @@ fn default_settings() -> Settings {
|
||||
.with_displayed_attributes(DEFAULT_DISPLAYED_ATTRIBUTES)
|
||||
.with_searchable_attributes(DEFAULT_SEARCHABLE_ATTRIBUTES)
|
||||
.with_sortable_attributes(DEFAULT_SORTABLE_ATTRIBUTES)
|
||||
.with_filterable_attributes(DEFAULT_ATTRIBUTES_FOR_FACETING)
|
||||
.with_filterable_attributes(&*MEILI_FILTERABLE_ATTRIBUTES)
|
||||
.with_pagination(PaginationSetting {
|
||||
max_total_hits: 2147483647,
|
||||
})
|
||||
}
|
||||
|
||||
pub struct MeilisearchFieldSpec {
|
||||
pub path: &'static str,
|
||||
pub filterable: bool,
|
||||
}
|
||||
|
||||
impl SearchField {
|
||||
pub const fn meilisearch_spec(self) -> MeilisearchFieldSpec {
|
||||
match self {
|
||||
SearchField::Categories => MeilisearchFieldSpec {
|
||||
path: "categories",
|
||||
filterable: true,
|
||||
},
|
||||
SearchField::ProjectTypes => MeilisearchFieldSpec {
|
||||
path: "project_types",
|
||||
filterable: true,
|
||||
},
|
||||
SearchField::ProjectId => MeilisearchFieldSpec {
|
||||
path: "project_id",
|
||||
filterable: true,
|
||||
},
|
||||
SearchField::OpenSource => MeilisearchFieldSpec {
|
||||
path: "open_source",
|
||||
filterable: true,
|
||||
},
|
||||
SearchField::Environment => MeilisearchFieldSpec {
|
||||
path: "environment",
|
||||
filterable: true,
|
||||
},
|
||||
SearchField::GameVersions => MeilisearchFieldSpec {
|
||||
path: "game_versions",
|
||||
filterable: true,
|
||||
},
|
||||
SearchField::ClientSide => MeilisearchFieldSpec {
|
||||
path: "client_side",
|
||||
filterable: true,
|
||||
},
|
||||
SearchField::ServerSide => MeilisearchFieldSpec {
|
||||
path: "server_side",
|
||||
filterable: true,
|
||||
},
|
||||
SearchField::MinecraftServerRegion => MeilisearchFieldSpec {
|
||||
path: "minecraft_server.region",
|
||||
filterable: true,
|
||||
},
|
||||
SearchField::MinecraftServerLanguages => MeilisearchFieldSpec {
|
||||
path: "minecraft_server.languages",
|
||||
filterable: true,
|
||||
},
|
||||
SearchField::MinecraftJavaServerContentKind => {
|
||||
MeilisearchFieldSpec {
|
||||
path: "minecraft_java_server.content.kind",
|
||||
filterable: true,
|
||||
}
|
||||
}
|
||||
SearchField::MinecraftJavaServerContentSupportedGameVersions => {
|
||||
MeilisearchFieldSpec {
|
||||
path: "minecraft_java_server.content.supported_game_versions",
|
||||
filterable: true,
|
||||
}
|
||||
}
|
||||
SearchField::MinecraftJavaServerPingData => MeilisearchFieldSpec {
|
||||
path: "minecraft_java_server.ping.data",
|
||||
filterable: true,
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static MEILI_FILTERABLE_ATTRIBUTES: LazyLock<Vec<&'static str>> =
|
||||
LazyLock::new(|| {
|
||||
use strum::IntoEnumIterator;
|
||||
|
||||
SearchField::iter()
|
||||
.filter_map(|field| {
|
||||
let spec = field.meilisearch_spec();
|
||||
spec.filterable.then_some(spec.path)
|
||||
})
|
||||
.collect()
|
||||
});
|
||||
|
||||
const DEFAULT_DISPLAYED_ATTRIBUTES: &[&str] = &[
|
||||
"project_id",
|
||||
"version_id",
|
||||
@@ -617,41 +678,6 @@ const DEFAULT_DISPLAYED_ATTRIBUTES: &[&str] = &[
|
||||
const DEFAULT_SEARCHABLE_ATTRIBUTES: &[&str] =
|
||||
&["name", "summary", "author", "slug"];
|
||||
|
||||
const DEFAULT_ATTRIBUTES_FOR_FACETING: &[&str] = &[
|
||||
"categories",
|
||||
"license",
|
||||
"project_types",
|
||||
"downloads",
|
||||
"follows",
|
||||
"author",
|
||||
"name",
|
||||
"date_created",
|
||||
"created_timestamp",
|
||||
"date_modified",
|
||||
"modified_timestamp",
|
||||
"version_published_timestamp",
|
||||
"project_id",
|
||||
"open_source",
|
||||
"color",
|
||||
// Note: loader fields are not here, but are added on as they are needed (so they can be dynamically added depending on which exist).
|
||||
// TODO: remove these- as they should be automatically populated. This is a band-aid fix.
|
||||
"environment",
|
||||
"game_versions",
|
||||
"mrpack_loaders",
|
||||
// V2 legacy fields for logical consistency
|
||||
"client_side",
|
||||
"server_side",
|
||||
"minecraft_server.country",
|
||||
"minecraft_server.region",
|
||||
"minecraft_server.languages",
|
||||
"minecraft_java_server.content.kind",
|
||||
"minecraft_java_server.content.supported_game_versions",
|
||||
"minecraft_java_server.content.recommended_game_version",
|
||||
"minecraft_java_server.verified_plays_2w",
|
||||
"minecraft_java_server.ping.data",
|
||||
"minecraft_java_server.ping.data.players_online",
|
||||
];
|
||||
|
||||
const DEFAULT_SORTABLE_ATTRIBUTES: &[&str] = &[
|
||||
"downloads",
|
||||
"follows",
|
||||
489
apps/labrinth/src/search/backend/meilisearch/mod.rs
Normal file
489
apps/labrinth/src/search/backend/meilisearch/mod.rs
Normal file
@@ -0,0 +1,489 @@
|
||||
use crate::database::PgPool;
|
||||
use crate::database::redis::RedisPool;
|
||||
use crate::env::ENV;
|
||||
use crate::models::ids::VersionId;
|
||||
use crate::routes::ApiError;
|
||||
use crate::search::backend::{
|
||||
SearchIndex, SearchIndexName, combined_search_filters, parse_search_index,
|
||||
parse_search_request,
|
||||
};
|
||||
use crate::search::{
|
||||
ResultSearchProject, SearchBackend, SearchRequest, SearchResults,
|
||||
TasksCancelFilter,
|
||||
};
|
||||
use crate::util::error::Context;
|
||||
use async_trait::async_trait;
|
||||
use eyre::Result;
|
||||
use futures::TryStreamExt;
|
||||
use futures::stream::FuturesOrdered;
|
||||
use itertools::Itertools;
|
||||
use meilisearch_sdk::client::Client;
|
||||
use meilisearch_sdk::tasks::{Task, TasksCancelQuery};
|
||||
use serde::Serialize;
|
||||
use serde_json::Value;
|
||||
use std::collections::HashMap;
|
||||
use std::fmt::Write;
|
||||
use std::time::Duration;
|
||||
use tracing::{Instrument, info_span};
|
||||
|
||||
pub mod indexing;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct MeilisearchReadClient {
|
||||
pub client: Client,
|
||||
}
|
||||
|
||||
impl std::ops::Deref for MeilisearchReadClient {
|
||||
type Target = Client;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.client
|
||||
}
|
||||
}
|
||||
|
||||
pub struct BatchClient {
|
||||
pub clients: Vec<Client>,
|
||||
}
|
||||
|
||||
impl BatchClient {
|
||||
pub fn new(clients: Vec<Client>) -> Self {
|
||||
Self { clients }
|
||||
}
|
||||
|
||||
pub async fn with_all_clients<'a, T, G, Fut>(
|
||||
&'a self,
|
||||
task_name: &str,
|
||||
generator: G,
|
||||
) -> Result<Vec<T>>
|
||||
where
|
||||
G: Fn(&'a Client) -> Fut,
|
||||
Fut: Future<Output = Result<T>> + 'a,
|
||||
{
|
||||
let mut tasks = FuturesOrdered::new();
|
||||
for (idx, client) in self.clients.iter().enumerate() {
|
||||
tasks.push_back(generator(client).instrument(info_span!(
|
||||
"client_task",
|
||||
task.name = task_name,
|
||||
client.idx = idx,
|
||||
)));
|
||||
}
|
||||
|
||||
let results = tasks.try_collect::<Vec<T>>().await?;
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
pub fn across_all<T, F, R>(&self, data: Vec<T>, mut predicate: F) -> Vec<R>
|
||||
where
|
||||
F: FnMut(T, &Client) -> R,
|
||||
{
|
||||
assert_eq!(
|
||||
data.len(),
|
||||
self.clients.len(),
|
||||
"mismatch between data len and meilisearch client count"
|
||||
);
|
||||
self.clients
|
||||
.iter()
|
||||
.zip(data)
|
||||
.map(|(client, item)| predicate(item, client))
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct MeilisearchConfig {
|
||||
pub addresses: Vec<String>,
|
||||
pub read_lb_address: String,
|
||||
pub key: String,
|
||||
pub meta_namespace: String,
|
||||
}
|
||||
|
||||
impl MeilisearchConfig {
|
||||
pub fn new(meta_namespace: Option<String>) -> Self {
|
||||
Self {
|
||||
addresses: ENV.MEILISEARCH_WRITE_ADDRS.0.clone(),
|
||||
key: ENV.MEILISEARCH_KEY.clone(),
|
||||
meta_namespace: meta_namespace.unwrap_or_default(),
|
||||
read_lb_address: ENV.MEILISEARCH_READ_ADDR.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn make_loadbalanced_read_client(
|
||||
&self,
|
||||
) -> Result<MeilisearchReadClient, meilisearch_sdk::errors::Error> {
|
||||
Ok(MeilisearchReadClient {
|
||||
client: Client::new(&self.read_lb_address, Some(&self.key))?,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn make_batch_client(
|
||||
&self,
|
||||
) -> Result<BatchClient, meilisearch_sdk::errors::Error> {
|
||||
Ok(BatchClient::new(
|
||||
self.addresses
|
||||
.iter()
|
||||
.map(|address| {
|
||||
Client::new(address.as_str(), Some(self.key.as_str()))
|
||||
})
|
||||
.collect::<Result<Vec<_>, _>>()?,
|
||||
))
|
||||
}
|
||||
|
||||
pub fn get_index_name(&self, index: &str, next: bool) -> String {
|
||||
let alt = if next { "_alt" } else { "" };
|
||||
format!("{}_{}_{}", self.meta_namespace, index, alt)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Meilisearch {
|
||||
pub config: MeilisearchConfig,
|
||||
}
|
||||
|
||||
impl Meilisearch {
|
||||
pub fn new(config: MeilisearchConfig) -> Self {
|
||||
Self { config }
|
||||
}
|
||||
|
||||
fn get_sort_index(
|
||||
&self,
|
||||
index: &str,
|
||||
new_filters: Option<&str>,
|
||||
) -> Result<(String, &'static [&'static str]), ApiError> {
|
||||
let sort = parse_search_index(index, new_filters)?;
|
||||
let index_name = match sort.index_name {
|
||||
SearchIndexName::Projects => {
|
||||
self.config.get_index_name("projects", false)
|
||||
}
|
||||
SearchIndexName::ProjectsFiltered => {
|
||||
self.config.get_index_name("projects_filtered", false)
|
||||
}
|
||||
};
|
||||
|
||||
Ok(match sort.index {
|
||||
SearchIndex::Relevance => (
|
||||
index_name,
|
||||
&["downloads:desc", "version_published_timestamp:desc"],
|
||||
),
|
||||
SearchIndex::Downloads => (
|
||||
index_name,
|
||||
&["downloads:desc", "version_published_timestamp:desc"],
|
||||
),
|
||||
SearchIndex::Follows => (
|
||||
index_name,
|
||||
&["follows:desc", "version_published_timestamp:desc"],
|
||||
),
|
||||
SearchIndex::Updated => (
|
||||
index_name,
|
||||
&["date_modified:desc", "version_published_timestamp:desc"],
|
||||
),
|
||||
SearchIndex::Newest => (
|
||||
index_name,
|
||||
&["date_created:desc", "version_published_timestamp:desc"],
|
||||
),
|
||||
SearchIndex::MinecraftJavaServerVerifiedPlays2w => (
|
||||
index_name,
|
||||
&[
|
||||
"minecraft_java_server.verified_plays_2w:desc",
|
||||
"minecraft_java_server.ping.data.players_online:desc",
|
||||
"version_published_timestamp:desc",
|
||||
],
|
||||
),
|
||||
SearchIndex::MinecraftJavaServerPlayersOnline => (
|
||||
index_name,
|
||||
&[
|
||||
"minecraft_java_server.ping.data.players_online:desc",
|
||||
"version_published_timestamp:desc",
|
||||
],
|
||||
),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl SearchBackend for Meilisearch {
|
||||
async fn search_for_project_raw(
|
||||
&self,
|
||||
info: &SearchRequest,
|
||||
) -> Result<SearchResults, ApiError> {
|
||||
let parsed = parse_search_request(info)?;
|
||||
|
||||
let (index_name, sort_name) =
|
||||
self.get_sort_index(parsed.index, info.new_filters.as_deref())?;
|
||||
let client = self
|
||||
.config
|
||||
.make_loadbalanced_read_client()
|
||||
.wrap_internal_err("failed to make load-balanced read client")?;
|
||||
let meilisearch_index = client
|
||||
.get_index(index_name)
|
||||
.await
|
||||
.wrap_internal_err("failed to get index")?;
|
||||
|
||||
let mut filter_string = String::new();
|
||||
|
||||
let results = {
|
||||
let mut query = meilisearch_index.search();
|
||||
query
|
||||
.with_page(parsed.page)
|
||||
.with_hits_per_page(parsed.hits_per_page)
|
||||
.with_query(parsed.query)
|
||||
.with_sort(sort_name);
|
||||
|
||||
if let Some(new_filters) = info.new_filters.as_deref() {
|
||||
query.with_filter(new_filters);
|
||||
} else {
|
||||
let facets = if let Some(facets) = &info.facets {
|
||||
let facets =
|
||||
serde_json::from_str::<Vec<Vec<Value>>>(facets)
|
||||
.wrap_request_err("failed to parse facets")?;
|
||||
Some(facets)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let filters =
|
||||
combined_search_filters(info).unwrap_or_else(|| "".into());
|
||||
|
||||
if let Some(facets) = facets {
|
||||
let facets: Vec<Vec<Vec<String>>> =
|
||||
facets
|
||||
.into_iter()
|
||||
.map(|facets| {
|
||||
facets
|
||||
.into_iter()
|
||||
.map(|facet| {
|
||||
if facet.is_array() {
|
||||
serde_json::from_value::<Vec<String>>(facet)
|
||||
.unwrap_or_default()
|
||||
} else {
|
||||
vec![
|
||||
serde_json::from_value::<String>(facet)
|
||||
.unwrap_or_default(),
|
||||
]
|
||||
}
|
||||
})
|
||||
.collect_vec()
|
||||
})
|
||||
.collect_vec();
|
||||
|
||||
filter_string.push('(');
|
||||
for (index, facet_outer_list) in facets.iter().enumerate() {
|
||||
filter_string.push('(');
|
||||
|
||||
for (facet_outer_index, facet_inner_list) in
|
||||
facet_outer_list.iter().enumerate()
|
||||
{
|
||||
filter_string.push('(');
|
||||
for (facet_inner_index, facet) in
|
||||
facet_inner_list.iter().enumerate()
|
||||
{
|
||||
filter_string
|
||||
.push_str(&facet.replace(':', " = "));
|
||||
if facet_inner_index
|
||||
!= (facet_inner_list.len() - 1)
|
||||
{
|
||||
filter_string.push_str(" AND ")
|
||||
}
|
||||
}
|
||||
filter_string.push(')');
|
||||
|
||||
if facet_outer_index != (facet_outer_list.len() - 1)
|
||||
{
|
||||
filter_string.push_str(" OR ")
|
||||
}
|
||||
}
|
||||
|
||||
filter_string.push(')');
|
||||
|
||||
if index != (facets.len() - 1) {
|
||||
filter_string.push_str(" AND ")
|
||||
}
|
||||
}
|
||||
filter_string.push(')');
|
||||
|
||||
if !filters.is_empty() {
|
||||
write!(filter_string, " AND ({filters})")
|
||||
.expect("write should not fail");
|
||||
}
|
||||
} else {
|
||||
filter_string.push_str(&filters);
|
||||
}
|
||||
|
||||
if !filter_string.is_empty() {
|
||||
query.with_filter(&filter_string);
|
||||
}
|
||||
}
|
||||
|
||||
if info.show_metadata {
|
||||
query.with_show_ranking_score(true);
|
||||
query.with_show_ranking_score_details(true);
|
||||
query.execute().await?
|
||||
} else {
|
||||
query.execute::<ResultSearchProject>().await?
|
||||
}
|
||||
};
|
||||
|
||||
if info.show_metadata {
|
||||
let hits = results
|
||||
.hits
|
||||
.into_iter()
|
||||
.map(|hit| {
|
||||
let metadata = serde_json::to_value(&hit)
|
||||
.ok()
|
||||
.and_then(|value| value.as_object().cloned())
|
||||
.map(|mut value| {
|
||||
value.remove("_formatted");
|
||||
value.remove("_matchesPosition");
|
||||
value.remove("_federation");
|
||||
let result = value.remove("result");
|
||||
let metadata = Value::Object(value);
|
||||
(result, metadata)
|
||||
});
|
||||
|
||||
let (result, metadata) =
|
||||
metadata.unwrap_or((None, Value::Null));
|
||||
let mut result = result
|
||||
.and_then(|value| {
|
||||
serde_json::from_value::<ResultSearchProject>(value)
|
||||
.ok()
|
||||
})
|
||||
.unwrap_or(hit.result);
|
||||
|
||||
if !metadata.is_null() {
|
||||
result.search_metadata = Some(metadata);
|
||||
}
|
||||
|
||||
result
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(SearchResults {
|
||||
hits,
|
||||
page: results.page.unwrap_or_default(),
|
||||
hits_per_page: results.hits_per_page.unwrap_or_default(),
|
||||
total_hits: results.total_hits.unwrap_or_default(),
|
||||
})
|
||||
} else {
|
||||
Ok(SearchResults {
|
||||
hits: results.hits.into_iter().map(|r| r.result).collect(),
|
||||
page: results.page.unwrap_or_default(),
|
||||
hits_per_page: results.hits_per_page.unwrap_or_default(),
|
||||
total_hits: results.total_hits.unwrap_or_default(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
async fn index_projects(
|
||||
&self,
|
||||
ro_pool: PgPool,
|
||||
redis: RedisPool,
|
||||
) -> eyre::Result<()> {
|
||||
indexing::index_projects(ro_pool, redis, &self.config).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn remove_documents(&self, ids: &[VersionId]) -> eyre::Result<()> {
|
||||
indexing::remove_documents(ids, &self.config).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn tasks(&self) -> eyre::Result<Value> {
|
||||
let client = self
|
||||
.config
|
||||
.make_batch_client()
|
||||
.wrap_internal_err("failed to make batch client")?;
|
||||
let tasks = client
|
||||
.with_all_clients("get_tasks", async |client| {
|
||||
let tasks = client.get_tasks().await?;
|
||||
Ok(tasks.results)
|
||||
})
|
||||
.await
|
||||
.wrap_internal_err("failed to get tasks")?;
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct MeiliTask<Time> {
|
||||
uid: u32,
|
||||
status: &'static str,
|
||||
duration: Option<Duration>,
|
||||
enqueued_at: Option<Time>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct TaskList<Time> {
|
||||
by_instance: HashMap<String, Vec<MeiliTask<Time>>>,
|
||||
}
|
||||
|
||||
let response = tasks
|
||||
.into_iter()
|
||||
.enumerate()
|
||||
.map(|(idx, instance_tasks)| {
|
||||
let tasks = instance_tasks
|
||||
.into_iter()
|
||||
.filter_map(|task| {
|
||||
Some(match task {
|
||||
Task::Enqueued { content } => MeiliTask {
|
||||
uid: content.uid,
|
||||
status: "enqueued",
|
||||
duration: None,
|
||||
enqueued_at: Some(content.enqueued_at),
|
||||
},
|
||||
Task::Processing { content } => MeiliTask {
|
||||
uid: content.uid,
|
||||
status: "processing",
|
||||
duration: None,
|
||||
enqueued_at: Some(content.enqueued_at),
|
||||
},
|
||||
Task::Failed { content } => MeiliTask {
|
||||
uid: content.task.uid,
|
||||
status: "failed",
|
||||
duration: Some(content.task.duration),
|
||||
enqueued_at: Some(content.task.enqueued_at),
|
||||
},
|
||||
Task::Succeeded { .. } => return None,
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
|
||||
(idx.to_string(), tasks)
|
||||
})
|
||||
.collect::<HashMap<String, Vec<MeiliTask<_>>>>();
|
||||
|
||||
let response = serde_json::to_value(TaskList {
|
||||
by_instance: response,
|
||||
})
|
||||
.wrap_internal_err("failed to serialize tasks response")?;
|
||||
Ok(response)
|
||||
}
|
||||
|
||||
async fn tasks_cancel(
|
||||
&self,
|
||||
filter: &TasksCancelFilter,
|
||||
) -> eyre::Result<()> {
|
||||
let client = self
|
||||
.config
|
||||
.make_batch_client()
|
||||
.wrap_internal_err("failed to make batch client")?;
|
||||
let all_results = client
|
||||
.with_all_clients("cancel_tasks", async |client| {
|
||||
let mut q = TasksCancelQuery::new(client);
|
||||
match filter {
|
||||
TasksCancelFilter::All => {}
|
||||
TasksCancelFilter::Indexes { indexes } => {
|
||||
q.with_index_uids(indexes.iter().map(|s| s.as_str()));
|
||||
}
|
||||
TasksCancelFilter::AllEnqueued => {
|
||||
q.with_statuses(["enqueued"]);
|
||||
}
|
||||
};
|
||||
|
||||
let result = client.cancel_tasks_with(&q).await;
|
||||
Ok(result)
|
||||
})
|
||||
.await
|
||||
.wrap_internal_err("failed to cancel tasks")?;
|
||||
|
||||
for r in all_results {
|
||||
r.wrap_internal_err("failed to cancel tasks")?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
12
apps/labrinth/src/search/backend/mod.rs
Normal file
12
apps/labrinth/src/search/backend/mod.rs
Normal file
@@ -0,0 +1,12 @@
|
||||
mod common;
|
||||
pub mod elasticsearch;
|
||||
pub mod meilisearch;
|
||||
pub mod typesense;
|
||||
|
||||
pub use common::{
|
||||
ParsedSearchRequest, SearchIndex, SearchIndexName, SearchSort,
|
||||
combined_search_filters, parse_search_index, parse_search_request,
|
||||
};
|
||||
pub use elasticsearch::Elasticsearch;
|
||||
pub use meilisearch::{Meilisearch, MeilisearchConfig};
|
||||
pub use typesense::{Typesense, TypesenseConfig};
|
||||
1094
apps/labrinth/src/search/backend/typesense/mod.rs
Normal file
1094
apps/labrinth/src/search/backend/typesense/mod.rs
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,11 +1,11 @@
|
||||
use chrono::{DateTime, Utc};
|
||||
use dashmap::DashMap;
|
||||
use eyre::Result;
|
||||
use futures::TryStreamExt;
|
||||
use itertools::Itertools;
|
||||
use std::collections::HashMap;
|
||||
use tracing::info;
|
||||
|
||||
use super::IndexingError;
|
||||
use crate::database::PgPool;
|
||||
use crate::database::models::loader_fields::{
|
||||
QueryLoaderField, QueryLoaderFieldEnumValue, QueryVersionField,
|
||||
@@ -29,7 +29,7 @@ pub async fn index_local(
|
||||
redis: &RedisPool,
|
||||
cursor: i64,
|
||||
limit: i64,
|
||||
) -> Result<(Vec<UploadSearchProject>, i64), IndexingError> {
|
||||
) -> eyre::Result<(Vec<UploadSearchProject>, i64)> {
|
||||
info!("Indexing local projects!");
|
||||
|
||||
// todo: loaders, project type, game versions
|
||||
@@ -84,7 +84,8 @@ pub async fn index_local(
|
||||
}
|
||||
})
|
||||
.try_collect::<Vec<PartialProject>>()
|
||||
.await?;
|
||||
.await
|
||||
.wrap_err("failed to fetch projects")?;
|
||||
|
||||
let project_ids = db_projects.iter().map(|x| x.id.0).collect::<Vec<i64>>();
|
||||
let project_components = db_projects
|
||||
@@ -430,6 +431,7 @@ pub async fn index_local(
|
||||
display_categories: display_categories.clone(),
|
||||
follows: project.follows,
|
||||
downloads: project.downloads,
|
||||
log_downloads: (project.downloads.max(1) as f64).ln(),
|
||||
icon_url: project.icon_url.clone(),
|
||||
author: owner.clone(),
|
||||
date_created: project.approved,
|
||||
@@ -473,7 +475,7 @@ struct PartialVersion {
|
||||
async fn index_versions(
|
||||
pool: &PgPool,
|
||||
project_ids: Vec<i64>,
|
||||
) -> Result<HashMap<DBProjectId, Vec<PartialVersion>>, IndexingError> {
|
||||
) -> Result<HashMap<DBProjectId, Vec<PartialVersion>>> {
|
||||
let versions: HashMap<DBProjectId, Vec<(DBVersionId, DateTime<Utc>)>> =
|
||||
sqlx::query!(
|
||||
"
|
||||
@@ -497,7 +499,8 @@ async fn index_versions(
|
||||
async move { Ok(acc) }
|
||||
},
|
||||
)
|
||||
.await?;
|
||||
.await
|
||||
.wrap_err("failed to fetch versions")?;
|
||||
|
||||
// Get project types, loaders
|
||||
#[derive(Default)]
|
||||
@@ -538,7 +541,8 @@ async fn index_versions(
|
||||
(version_id, version_loader_data)
|
||||
})
|
||||
.try_collect()
|
||||
.await?;
|
||||
.await
|
||||
.wrap_err("failed to fetch loaders and project types")?;
|
||||
|
||||
// Get version fields
|
||||
let version_fields: DashMap<DBVersionId, Vec<QueryVersionField>> =
|
||||
@@ -570,7 +574,10 @@ async fn index_versions(
|
||||
async move { Ok(acc) }
|
||||
},
|
||||
)
|
||||
.await?;
|
||||
.await
|
||||
.wrap_err("failed to fetch version fields")?;
|
||||
|
||||
// Get version fields
|
||||
|
||||
// Convert to partial versions
|
||||
let mut res_versions: HashMap<DBProjectId, Vec<PartialVersion>> =
|
||||
@@ -1,188 +1,222 @@
|
||||
use crate::env::ENV;
|
||||
use crate::database::redis::RedisPool;
|
||||
use crate::models::exp;
|
||||
use crate::models::exp::minecraft::JavaServerPing;
|
||||
use crate::models::ids::ProjectId;
|
||||
use crate::models::projects::SearchRequest;
|
||||
use crate::models::ids::{ProjectId, VersionId};
|
||||
use crate::queue::server_ping;
|
||||
use crate::{database::models::DatabaseError, database::redis::RedisPool};
|
||||
use crate::{models::error::ApiError, search::indexing::IndexingError};
|
||||
use actix_web::HttpResponse;
|
||||
use actix_web::http::StatusCode;
|
||||
use crate::routes::ApiError;
|
||||
use crate::{database::PgPool, env::ENV};
|
||||
use ariadne::ids::base62_impl::parse_base62;
|
||||
use async_trait::async_trait;
|
||||
use chrono::{DateTime, Utc};
|
||||
use futures::TryStreamExt;
|
||||
use futures::stream::FuturesOrdered;
|
||||
use itertools::Itertools;
|
||||
use meilisearch_sdk::client::Client;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::Value;
|
||||
use std::borrow::Cow;
|
||||
use std::collections::HashMap;
|
||||
use std::fmt::Write;
|
||||
use std::{collections::HashMap, str::FromStr};
|
||||
use thiserror::Error;
|
||||
use tracing::{Instrument, info_span};
|
||||
use utoipa::ToSchema;
|
||||
|
||||
pub mod backend;
|
||||
pub mod indexing;
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
pub enum SearchError {
|
||||
#[error("MeiliSearch Error: {0}")]
|
||||
MeiliSearch(#[from] meilisearch_sdk::errors::Error),
|
||||
#[error("Error while serializing or deserializing JSON: {0}")]
|
||||
Serde(#[from] serde_json::Error),
|
||||
#[error("Error while parsing an integer: {0}")]
|
||||
IntParsing(#[from] std::num::ParseIntError),
|
||||
#[error("Error while formatting strings: {0}")]
|
||||
FormatError(#[from] std::fmt::Error),
|
||||
#[error("Environment Error")]
|
||||
Env(#[from] dotenvy::Error),
|
||||
#[error("Invalid index to sort by: {0}")]
|
||||
InvalidIndex(String),
|
||||
#[error("Database error: {0}")]
|
||||
Database(#[from] DatabaseError),
|
||||
/// Search parameters which can fit in a URL query string.
|
||||
///
|
||||
/// Used with `GET /*/search` endpoints.
|
||||
///
|
||||
/// Can be converted into a [`SearchRequest`] using [`From`].
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
pub struct SearchQuery {
|
||||
pub query: Option<String>,
|
||||
pub offset: Option<String>,
|
||||
pub index: Option<String>,
|
||||
pub limit: Option<String>,
|
||||
|
||||
pub new_filters: Option<String>,
|
||||
|
||||
// TODO: Deprecated values below. WILL BE REMOVED V3!
|
||||
pub facets: Option<String>,
|
||||
pub filters: Option<String>,
|
||||
pub version: Option<String>,
|
||||
}
|
||||
|
||||
impl actix_web::ResponseError for SearchError {
|
||||
fn status_code(&self) -> StatusCode {
|
||||
match self {
|
||||
SearchError::Env(..) => StatusCode::INTERNAL_SERVER_ERROR,
|
||||
SearchError::MeiliSearch(..) => StatusCode::BAD_REQUEST,
|
||||
SearchError::Serde(..) => StatusCode::BAD_REQUEST,
|
||||
SearchError::IntParsing(..) => StatusCode::BAD_REQUEST,
|
||||
SearchError::InvalidIndex(..) => StatusCode::BAD_REQUEST,
|
||||
SearchError::FormatError(..) => StatusCode::BAD_REQUEST,
|
||||
SearchError::Database(..) => StatusCode::INTERNAL_SERVER_ERROR,
|
||||
/// Search parameters which are more complicated and more suitable for a POST
|
||||
/// request body.
|
||||
///
|
||||
/// Used with `POST /*/search` endpoints.
|
||||
///
|
||||
/// Can be converted from a [`SearchQuery`] using [`From`].
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
pub struct SearchRequest {
|
||||
pub query: Option<String>,
|
||||
pub offset: Option<String>,
|
||||
pub index: Option<String>,
|
||||
pub limit: Option<String>,
|
||||
#[serde(default)]
|
||||
pub show_metadata: bool,
|
||||
#[serde(default)]
|
||||
pub elasticsearch_config: backend::elasticsearch::RequestConfig,
|
||||
#[serde(default)]
|
||||
pub typesense_config: backend::typesense::RequestConfig,
|
||||
|
||||
pub new_filters: Option<String>,
|
||||
|
||||
pub facets: Option<String>,
|
||||
pub filters: Option<String>,
|
||||
pub version: Option<String>,
|
||||
}
|
||||
|
||||
impl From<SearchQuery> for SearchRequest {
|
||||
fn from(query: SearchQuery) -> Self {
|
||||
Self {
|
||||
query: query.query,
|
||||
offset: query.offset,
|
||||
index: query.index,
|
||||
limit: query.limit,
|
||||
show_metadata: false,
|
||||
elasticsearch_config:
|
||||
backend::elasticsearch::RequestConfig::default(),
|
||||
typesense_config: backend::typesense::RequestConfig::default(),
|
||||
new_filters: query.new_filters,
|
||||
facets: query.facets,
|
||||
filters: query.filters,
|
||||
version: query.version,
|
||||
}
|
||||
}
|
||||
|
||||
fn error_response(&self) -> HttpResponse {
|
||||
HttpResponse::build(self.status_code()).json(ApiError {
|
||||
error: match self {
|
||||
SearchError::Env(..) => "environment_error",
|
||||
SearchError::MeiliSearch(..) => "meilisearch_error",
|
||||
SearchError::Serde(..) => "invalid_input",
|
||||
SearchError::IntParsing(..) => "invalid_input",
|
||||
SearchError::InvalidIndex(..) => "invalid_input",
|
||||
SearchError::FormatError(..) => "invalid_input",
|
||||
SearchError::Database(..) => "database_error",
|
||||
},
|
||||
description: self.to_string(),
|
||||
details: None,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct MeilisearchReadClient {
|
||||
pub client: Client,
|
||||
}
|
||||
|
||||
impl std::ops::Deref for MeilisearchReadClient {
|
||||
type Target = Client;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.client
|
||||
}
|
||||
}
|
||||
|
||||
pub struct BatchClient {
|
||||
pub clients: Vec<Client>,
|
||||
}
|
||||
|
||||
impl BatchClient {
|
||||
pub fn new(clients: Vec<Client>) -> Self {
|
||||
Self { clients }
|
||||
}
|
||||
|
||||
pub async fn with_all_clients<'a, T, G, Fut>(
|
||||
&'a self,
|
||||
task_name: &str,
|
||||
generator: G,
|
||||
) -> Result<Vec<T>, IndexingError>
|
||||
where
|
||||
G: Fn(&'a Client) -> Fut,
|
||||
Fut: Future<Output = Result<T, IndexingError>> + 'a,
|
||||
{
|
||||
let mut tasks = FuturesOrdered::new();
|
||||
for (idx, client) in self.clients.iter().enumerate() {
|
||||
tasks.push_back(generator(client).instrument(info_span!(
|
||||
"client_task",
|
||||
task.name = task_name,
|
||||
client.idx = idx,
|
||||
)));
|
||||
}
|
||||
|
||||
let results = tasks.try_collect::<Vec<T>>().await?;
|
||||
#[async_trait]
|
||||
pub trait SearchBackend: Send + Sync {
|
||||
async fn search_for_project(
|
||||
&self,
|
||||
info: &SearchRequest,
|
||||
redis: &RedisPool,
|
||||
) -> Result<SearchResults, ApiError> {
|
||||
let mut results = self.search_for_project_raw(info).await?;
|
||||
hydrate_search_results(&mut results.hits, redis)
|
||||
.await
|
||||
.map_err(ApiError::Internal)?;
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
pub fn across_all<T, F, R>(&self, data: Vec<T>, mut predicate: F) -> Vec<R>
|
||||
where
|
||||
F: FnMut(T, &Client) -> R,
|
||||
{
|
||||
assert_eq!(
|
||||
data.len(),
|
||||
self.clients.len(),
|
||||
"mismatch between data len and meilisearch client count"
|
||||
);
|
||||
self.clients
|
||||
.iter()
|
||||
.zip(data)
|
||||
.map(|(client, item)| predicate(item, client))
|
||||
.collect()
|
||||
}
|
||||
async fn search_for_project_raw(
|
||||
&self,
|
||||
info: &SearchRequest,
|
||||
) -> Result<SearchResults, ApiError>;
|
||||
|
||||
async fn index_projects(
|
||||
&self,
|
||||
ro_pool: PgPool,
|
||||
redis: RedisPool,
|
||||
) -> eyre::Result<()>;
|
||||
|
||||
async fn remove_documents(&self, ids: &[VersionId]) -> eyre::Result<()>;
|
||||
|
||||
async fn tasks(&self) -> eyre::Result<Value>;
|
||||
|
||||
async fn tasks_cancel(
|
||||
&self,
|
||||
filter: &TasksCancelFilter,
|
||||
) -> eyre::Result<()>;
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct SearchConfig {
|
||||
pub addresses: Vec<String>,
|
||||
pub read_lb_address: String,
|
||||
pub key: String,
|
||||
pub meta_namespace: String,
|
||||
}
|
||||
async fn hydrate_search_results(
|
||||
hits: &mut [ResultSearchProject],
|
||||
redis_pool: &RedisPool,
|
||||
) -> eyre::Result<()> {
|
||||
// Minecraft Java servers should fetch the latest player count that we have
|
||||
// from Redis, rather than the (pretty stale) data from search backend
|
||||
// TODO: this block should be made generic over the component type,
|
||||
// for now we can hardcode MC java servers tho
|
||||
|
||||
impl SearchConfig {
|
||||
// Panics if the environment variables are not set,
|
||||
// but these are already checked for on startup.
|
||||
pub fn new(meta_namespace: Option<String>) -> Self {
|
||||
Self {
|
||||
addresses: ENV.MEILISEARCH_WRITE_ADDRS.0.clone(),
|
||||
key: ENV.MEILISEARCH_KEY.clone(),
|
||||
meta_namespace: meta_namespace.unwrap_or_default(),
|
||||
read_lb_address: ENV.MEILISEARCH_READ_ADDR.clone(),
|
||||
let project_ids = hits
|
||||
.iter()
|
||||
.filter(|hit| hit.components.minecraft_java_server.is_some())
|
||||
.filter_map(|hit| parse_base62(&hit.project_id).ok().map(ProjectId))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let pings_by_project_id = if project_ids.is_empty() {
|
||||
HashMap::new()
|
||||
} else {
|
||||
let mut redis = redis_pool.connect().await?;
|
||||
let ping_results = redis
|
||||
.get_many_deserialized_from_json::<JavaServerPing>(
|
||||
server_ping::REDIS_NAMESPACE,
|
||||
&project_ids
|
||||
.iter()
|
||||
.map(ToString::to_string)
|
||||
.collect::<Vec<_>>(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
ping_results
|
||||
.into_iter()
|
||||
.enumerate()
|
||||
.filter_map(|(idx, ping)| ping.map(|ping| (project_ids[idx], ping)))
|
||||
.collect::<HashMap<_, _>>()
|
||||
};
|
||||
|
||||
for hit in hits {
|
||||
let Some(java_server) = hit.components.minecraft_java_server.as_mut()
|
||||
else {
|
||||
continue;
|
||||
};
|
||||
if let Ok(project_id) = parse_base62(&hit.project_id).map(ProjectId) {
|
||||
java_server.ping = pings_by_project_id.get(&project_id).cloned();
|
||||
} else {
|
||||
java_server.ping = None;
|
||||
}
|
||||
}
|
||||
|
||||
pub fn make_loadbalanced_read_client(
|
||||
&self,
|
||||
) -> Result<MeilisearchReadClient, meilisearch_sdk::errors::Error> {
|
||||
Ok(MeilisearchReadClient {
|
||||
client: Client::new(&self.read_lb_address, Some(&self.key))?,
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Serialize, ToSchema)]
|
||||
#[serde(tag = "type", rename_all = "snake_case")]
|
||||
pub enum TasksCancelFilter {
|
||||
All,
|
||||
AllEnqueued,
|
||||
Indexes { indexes: Vec<String> },
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
pub enum SearchBackendKind {
|
||||
Meilisearch,
|
||||
Elasticsearch,
|
||||
Typesense,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, strum::EnumIter)]
|
||||
pub enum SearchField {
|
||||
Categories,
|
||||
ProjectTypes,
|
||||
ProjectId,
|
||||
OpenSource,
|
||||
Environment,
|
||||
GameVersions,
|
||||
ClientSide,
|
||||
ServerSide,
|
||||
MinecraftServerRegion,
|
||||
MinecraftServerLanguages,
|
||||
MinecraftJavaServerContentKind,
|
||||
MinecraftJavaServerContentSupportedGameVersions,
|
||||
MinecraftJavaServerPingData,
|
||||
}
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
#[error("invalid search backend kind")]
|
||||
pub struct InvalidSearchBackendKind;
|
||||
|
||||
impl FromStr for SearchBackendKind {
|
||||
type Err = InvalidSearchBackendKind;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
Ok(match s {
|
||||
"meilisearch" => SearchBackendKind::Meilisearch,
|
||||
"elasticsearch" => SearchBackendKind::Elasticsearch,
|
||||
"typesense" => SearchBackendKind::Typesense,
|
||||
_ => return Err(InvalidSearchBackendKind),
|
||||
})
|
||||
}
|
||||
|
||||
pub fn make_batch_client(
|
||||
&self,
|
||||
) -> Result<BatchClient, meilisearch_sdk::errors::Error> {
|
||||
Ok(BatchClient::new(
|
||||
self.addresses
|
||||
.iter()
|
||||
.map(|address| {
|
||||
Client::new(address.as_str(), Some(self.key.as_str()))
|
||||
})
|
||||
.collect::<Result<Vec<_>, _>>()?,
|
||||
))
|
||||
}
|
||||
|
||||
// Next: true if we want the next index (we are preparing the next swap), false if we want the current index (searching)
|
||||
pub fn get_index_name(&self, index: &str, next: bool) -> String {
|
||||
let alt = if next { "_alt" } else { "" };
|
||||
format!("{}_{}_{}", self.meta_namespace, index, alt)
|
||||
}
|
||||
}
|
||||
|
||||
/// A project document used for uploading projects to MeiliSearch's indices.
|
||||
/// This contains some extra data that is not returned by search results.
|
||||
#[derive(Serialize, Deserialize, Debug, Clone)]
|
||||
pub struct UploadSearchProject {
|
||||
pub version_id: String,
|
||||
@@ -197,6 +231,7 @@ pub struct UploadSearchProject {
|
||||
pub display_categories: Vec<String>,
|
||||
pub follows: i32,
|
||||
pub downloads: i32,
|
||||
pub log_downloads: f64,
|
||||
pub icon_url: Option<String>,
|
||||
pub license: String,
|
||||
pub gallery: Vec<String>,
|
||||
@@ -263,281 +298,52 @@ pub struct ResultSearchProject {
|
||||
pub components: exp::ProjectQuery,
|
||||
#[serde(flatten)]
|
||||
pub loader_fields: HashMap<String, Vec<serde_json::Value>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub search_metadata: Option<Value>,
|
||||
}
|
||||
|
||||
pub fn get_sort_index(
|
||||
config: &SearchConfig,
|
||||
index: &str,
|
||||
new_filters: Option<&str>,
|
||||
) -> Result<(String, &'static [&'static str]), SearchError> {
|
||||
let projects_name = config.get_index_name("projects", false);
|
||||
let projects_filtered_name =
|
||||
config.get_index_name("projects_filtered", false);
|
||||
|
||||
// TODO: this is a dumb hack, the frontend should pass the project type it's filtering directly
|
||||
let is_server = new_filters
|
||||
.is_some_and(|f| f.contains("project_types = minecraft_java_server"));
|
||||
|
||||
Ok(match index {
|
||||
"relevance" => (
|
||||
projects_name,
|
||||
if is_server {
|
||||
&[
|
||||
"minecraft_java_server.verified_plays_2w:desc",
|
||||
"minecraft_java_server.ping.data.players_online:desc",
|
||||
"version_published_timestamp:desc",
|
||||
]
|
||||
} else {
|
||||
&["downloads:desc", "version_published_timestamp:desc"]
|
||||
},
|
||||
),
|
||||
"downloads" => (
|
||||
projects_filtered_name,
|
||||
&["downloads:desc", "version_published_timestamp:desc"],
|
||||
),
|
||||
"follows" => (
|
||||
projects_name,
|
||||
&["follows:desc", "version_published_timestamp:desc"],
|
||||
),
|
||||
"updated" | "date_modified" => (
|
||||
projects_name,
|
||||
&["date_modified:desc", "version_published_timestamp:desc"],
|
||||
),
|
||||
"newest" | "date_created" => (
|
||||
projects_name,
|
||||
&["date_created:desc", "version_published_timestamp:desc"],
|
||||
),
|
||||
"minecraft_java_server.verified_plays_2w" => (
|
||||
projects_name,
|
||||
&[
|
||||
"minecraft_java_server.verified_plays_2w:desc",
|
||||
"version_published_timestamp:desc",
|
||||
],
|
||||
),
|
||||
"minecraft_java_server.ping.data.players_online" => (
|
||||
projects_name,
|
||||
&[
|
||||
"minecraft_java_server.ping.data.players_online:desc",
|
||||
"version_published_timestamp:desc",
|
||||
],
|
||||
),
|
||||
i => return Err(SearchError::InvalidIndex(i.to_string())),
|
||||
})
|
||||
}
|
||||
|
||||
fn normalize_filter_aliases(filters: &str) -> String {
|
||||
let mut filters = filters.replace("components.", "");
|
||||
for (from, to) in [
|
||||
(
|
||||
"minecraft_java_server.content =",
|
||||
"minecraft_java_server.content.kind =",
|
||||
),
|
||||
(
|
||||
"minecraft_java_server.content !=",
|
||||
"minecraft_java_server.content.kind !=",
|
||||
),
|
||||
(
|
||||
"minecraft_java_server.content IN ",
|
||||
"minecraft_java_server.content.kind IN ",
|
||||
),
|
||||
(
|
||||
"minecraft_java_server.content NOT IN ",
|
||||
"minecraft_java_server.content.kind NOT IN ",
|
||||
),
|
||||
] {
|
||||
filters = filters.replace(from, to);
|
||||
}
|
||||
filters
|
||||
}
|
||||
|
||||
pub async fn search_for_project(
|
||||
info: &SearchRequest,
|
||||
config: &SearchConfig,
|
||||
redis_pool: &RedisPool,
|
||||
) -> Result<SearchResults, SearchError> {
|
||||
let offset: usize = info.offset.as_deref().unwrap_or("0").parse()?;
|
||||
let index = info.index.as_deref().unwrap_or("relevance");
|
||||
let limit = info
|
||||
.limit
|
||||
.as_deref()
|
||||
.unwrap_or("10")
|
||||
.parse::<usize>()?
|
||||
.min(100);
|
||||
|
||||
let sort = get_sort_index(config, index, info.new_filters.as_deref())?;
|
||||
let client = config.make_loadbalanced_read_client()?;
|
||||
let meilisearch_index = client.get_index(sort.0).await?;
|
||||
|
||||
let mut filter_string = String::new();
|
||||
|
||||
// Convert offset and limit to page and hits_per_page
|
||||
let hits_per_page = if limit == 0 { 1 } else { limit };
|
||||
|
||||
let page = offset / hits_per_page + 1;
|
||||
|
||||
let results = {
|
||||
let mut query = meilisearch_index.search();
|
||||
query
|
||||
.with_page(page)
|
||||
.with_hits_per_page(hits_per_page)
|
||||
.with_query(info.query.as_deref().unwrap_or_default())
|
||||
.with_sort(sort.1);
|
||||
|
||||
let normalized_new_filters =
|
||||
info.new_filters.as_deref().map(normalize_filter_aliases);
|
||||
if let Some(new_filters) = normalized_new_filters.as_deref() {
|
||||
query.with_filter(new_filters);
|
||||
} else {
|
||||
let facets = if let Some(facets) = &info.facets {
|
||||
Some(serde_json::from_str::<Vec<Vec<Value>>>(facets)?)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let filters: Cow<_> =
|
||||
match (info.filters.as_deref(), info.version.as_deref()) {
|
||||
(Some(f), Some(v)) => format!("({f}) AND ({v})").into(),
|
||||
(Some(f), None) => f.into(),
|
||||
(None, Some(v)) => v.into(),
|
||||
(None, None) => "".into(),
|
||||
};
|
||||
let filters = normalize_filter_aliases(&filters);
|
||||
|
||||
if let Some(facets) = facets {
|
||||
// Search can now *optionally* have a third inner array: So Vec(AND)<Vec(OR)<Vec(AND)< _ >>>
|
||||
// For every inner facet, we will check if it can be deserialized into a Vec<&str>, and do so.
|
||||
// If not, we will assume it is a single facet and wrap it in a Vec.
|
||||
let facets: Vec<Vec<Vec<String>>> = facets
|
||||
.into_iter()
|
||||
.map(|facets| {
|
||||
facets
|
||||
.into_iter()
|
||||
.map(|facet| {
|
||||
if facet.is_array() {
|
||||
serde_json::from_value::<Vec<String>>(facet)
|
||||
.unwrap_or_default()
|
||||
} else {
|
||||
vec![
|
||||
serde_json::from_value::<String>(facet)
|
||||
.unwrap_or_default(),
|
||||
]
|
||||
}
|
||||
})
|
||||
.collect_vec()
|
||||
})
|
||||
.collect_vec();
|
||||
|
||||
filter_string.push('(');
|
||||
for (index, facet_outer_list) in facets.iter().enumerate() {
|
||||
filter_string.push('(');
|
||||
|
||||
for (facet_outer_index, facet_inner_list) in
|
||||
facet_outer_list.iter().enumerate()
|
||||
{
|
||||
filter_string.push('(');
|
||||
for (facet_inner_index, facet) in
|
||||
facet_inner_list.iter().enumerate()
|
||||
{
|
||||
let facet = normalize_filter_aliases(
|
||||
&facet.replace(':', " = "),
|
||||
);
|
||||
filter_string.push_str(&facet);
|
||||
if facet_inner_index != (facet_inner_list.len() - 1)
|
||||
{
|
||||
filter_string.push_str(" AND ")
|
||||
}
|
||||
}
|
||||
filter_string.push(')');
|
||||
|
||||
if facet_outer_index != (facet_outer_list.len() - 1) {
|
||||
filter_string.push_str(" OR ")
|
||||
}
|
||||
}
|
||||
|
||||
filter_string.push(')');
|
||||
|
||||
if index != (facets.len() - 1) {
|
||||
filter_string.push_str(" AND ")
|
||||
}
|
||||
}
|
||||
filter_string.push(')');
|
||||
|
||||
if !filters.is_empty() {
|
||||
write!(filter_string, " AND ({filters})")?;
|
||||
}
|
||||
} else {
|
||||
filter_string.push_str(&filters);
|
||||
}
|
||||
|
||||
if !filter_string.is_empty() {
|
||||
query.with_filter(&filter_string);
|
||||
}
|
||||
}
|
||||
|
||||
query.execute::<ResultSearchProject>().await?
|
||||
};
|
||||
|
||||
// Minecraft Java servers should fetch the latest player count that we have
|
||||
// from Redis, rather than the (pretty stale) data from search backend
|
||||
// TODO: this block should be made generic over the component type,
|
||||
// for now we can hardcode MC java servers tho
|
||||
let mut hits = results.hits.into_iter().map(|r| r.result).collect_vec();
|
||||
|
||||
let project_ids = hits
|
||||
.iter()
|
||||
.filter(|hit| hit.components.minecraft_java_server.is_some())
|
||||
.filter_map(|hit| parse_base62(&hit.project_id).ok().map(ProjectId))
|
||||
.collect_vec();
|
||||
|
||||
let pings_by_project_id = if project_ids.is_empty() {
|
||||
HashMap::new()
|
||||
} else {
|
||||
let mut redis = redis_pool.connect().await?;
|
||||
let ping_results = redis
|
||||
.get_many_deserialized_from_json::<JavaServerPing>(
|
||||
server_ping::REDIS_NAMESPACE,
|
||||
&project_ids.iter().map(ToString::to_string).collect_vec(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
ping_results
|
||||
.into_iter()
|
||||
.enumerate()
|
||||
.filter_map(|(idx, ping)| ping.map(|ping| (project_ids[idx], ping)))
|
||||
.collect::<HashMap<_, _>>()
|
||||
};
|
||||
|
||||
for hit in &mut hits {
|
||||
let Some(java_server) = hit.components.minecraft_java_server.as_mut()
|
||||
else {
|
||||
continue;
|
||||
};
|
||||
if let Ok(project_id) = parse_base62(&hit.project_id).map(ProjectId) {
|
||||
java_server.ping = pings_by_project_id.get(&project_id).cloned();
|
||||
} else {
|
||||
java_server.ping = None;
|
||||
impl From<UploadSearchProject> for ResultSearchProject {
|
||||
fn from(source: UploadSearchProject) -> Self {
|
||||
Self {
|
||||
version_id: source.version_id,
|
||||
project_id: source.project_id,
|
||||
project_types: source.project_types,
|
||||
slug: source.slug,
|
||||
author: source.author,
|
||||
name: source.name,
|
||||
summary: source.summary,
|
||||
categories: source.categories,
|
||||
display_categories: source.display_categories,
|
||||
downloads: source.downloads,
|
||||
follows: source.follows,
|
||||
icon_url: source.icon_url,
|
||||
date_created: source.date_created.to_rfc3339(),
|
||||
date_modified: source.date_modified.to_rfc3339(),
|
||||
license: source.license,
|
||||
gallery: source.gallery,
|
||||
featured_gallery: source.featured_gallery,
|
||||
color: source.color,
|
||||
loaders: source.loaders,
|
||||
project_loader_fields: source.project_loader_fields,
|
||||
components: source.components,
|
||||
loader_fields: source.loader_fields,
|
||||
search_metadata: None,
|
||||
}
|
||||
}
|
||||
|
||||
Ok(SearchResults {
|
||||
hits,
|
||||
page: results.page.unwrap_or_default(),
|
||||
hits_per_page: results.hits_per_page.unwrap_or_default(),
|
||||
total_hits: results.total_hits.unwrap_or_default(),
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::normalize_filter_aliases;
|
||||
|
||||
#[test]
|
||||
fn normalizes_component_filter_aliases() {
|
||||
assert_eq!(
|
||||
normalize_filter_aliases(
|
||||
"components.minecraft_java_server.content = vanilla AND components.minecraft_server.country = US"
|
||||
),
|
||||
"minecraft_java_server.content.kind = vanilla AND minecraft_server.country = US"
|
||||
);
|
||||
pub fn backend(meta_namespace: Option<String>) -> Box<dyn SearchBackend> {
|
||||
match ENV.SEARCH_BACKEND {
|
||||
SearchBackendKind::Meilisearch => {
|
||||
let config = backend::MeilisearchConfig::new(meta_namespace);
|
||||
Box::new(backend::Meilisearch::new(config))
|
||||
}
|
||||
SearchBackendKind::Elasticsearch => {
|
||||
Box::new(backend::Elasticsearch::new(meta_namespace).unwrap())
|
||||
}
|
||||
SearchBackendKind::Typesense => {
|
||||
let config = backend::TypesenseConfig::new(meta_namespace);
|
||||
Box::new(backend::Typesense::new(config))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user