Index search by original and split title (#5589)

* Index search by original and split title

* better normalization of title/author names for indexing

* replace println with warn

* fix test
This commit is contained in:
aecsocket
2026-03-20 04:01:19 +00:00
committed by GitHub
parent 00e81adbbd
commit 3c5bd0756d
3 changed files with 56 additions and 12 deletions

View File

@@ -105,18 +105,25 @@ impl Default for RequestConfig {
} }
fn default_query_by() -> Vec<String> { fn default_query_by() -> Vec<String> {
["indexed_title", "slug", "summary", "indexed_author"] [
.into_iter() "name",
.map(str::to_string) "indexed_name",
.collect() "slug",
"author",
"indexed_author",
"summary",
]
.into_iter()
.map(str::to_string)
.collect()
} }
fn default_query_by_weights() -> Vec<u8> { fn default_query_by_weights() -> Vec<u8> {
vec![15, 5, 2, 1] vec![15, 15, 10, 3, 3, 1]
} }
fn default_prefix() -> Vec<bool> { fn default_prefix() -> Vec<bool> {
vec![true, true, true, true] vec![true, true, true, true, true, true]
} }
const fn default_prioritize_exact_match() -> bool { const fn default_prioritize_exact_match() -> bool {
@@ -491,7 +498,7 @@ impl Typesense {
let mut fields = vec![ let mut fields = vec![
json!({"name": "summary", "type": "string", "facet": false}), json!({"name": "summary", "type": "string", "facet": false}),
json!({"name": "slug", "type": "string", "facet": false}), json!({"name": "slug", "type": "string", "facet": false}),
json!({"name": "indexed_title", "type": "string", "facet": false, "stem": true}), json!({"name": "indexed_name", "type": "string", "facet": false, "stem": true}),
json!({"name": "indexed_author", "type": "string", "facet": false}), json!({"name": "indexed_author", "type": "string", "facet": false}),
json!({"name": "log_downloads", "type": "float", "sort": true}), json!({"name": "log_downloads", "type": "float", "sort": true}),
json!({"name": "follows", "type": "int32", "facet": true, "sort": true}), json!({"name": "follows", "type": "int32", "facet": true, "sort": true}),

View File

@@ -4,8 +4,10 @@ use eyre::Result;
use futures::TryStreamExt; use futures::TryStreamExt;
use heck::ToKebabCase; use heck::ToKebabCase;
use itertools::Itertools; use itertools::Itertools;
use regex::Regex;
use std::collections::HashMap; use std::collections::HashMap;
use tracing::info; use std::sync::LazyLock;
use tracing::{info, warn};
use crate::database::PgPool; use crate::database::PgPool;
use crate::database::models::loader_fields::{ use crate::database::models::loader_fields::{
@@ -25,6 +27,13 @@ use crate::routes::v2_reroute;
use crate::search::UploadSearchProject; use crate::search::UploadSearchProject;
use crate::util::error::Context; use crate::util::error::Context;
fn normalize_for_search(s: &str) -> String {
static SPECIAL_CHARS_RE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"[^a-zA-Z0-9-.\s]").expect("valid regex"));
SPECIAL_CHARS_RE.replace_all(s, "").to_kebab_case()
}
pub async fn index_local( pub async fn index_local(
pool: &PgPool, pool: &PgPool,
redis: &RedisPool, redis: &RedisPool,
@@ -262,7 +271,7 @@ pub async fn index_local(
{ {
team_owner team_owner
} else { } else {
println!( warn!(
"org owner not found for project {} id: {}!", "org owner not found for project {} id: {}!",
project.name, project.id.0 project.name, project.id.0
); );
@@ -427,7 +436,7 @@ pub async fn index_local(
project_id: crate::models::ids::ProjectId::from(project.id) project_id: crate::models::ids::ProjectId::from(project.id)
.to_string(), .to_string(),
name: project.name.clone(), name: project.name.clone(),
indexed_title: project.name.to_kebab_case(), indexed_name: normalize_for_search(&project.name),
summary: project.summary.clone(), summary: project.summary.clone(),
categories: categories.clone(), categories: categories.clone(),
display_categories: display_categories.clone(), display_categories: display_categories.clone(),
@@ -436,7 +445,7 @@ pub async fn index_local(
log_downloads: (project.downloads.max(1) as f64).ln(), log_downloads: (project.downloads.max(1) as f64).ln(),
icon_url: project.icon_url.clone(), icon_url: project.icon_url.clone(),
author: owner.clone(), author: owner.clone(),
indexed_author: owner.to_kebab_case(), indexed_author: normalize_for_search(&owner),
date_created: project.approved, date_created: project.approved,
created_timestamp: project.approved.timestamp(), created_timestamp: project.approved.timestamp(),
date_modified: project.updated, date_modified: project.updated,
@@ -614,3 +623,31 @@ async fn index_versions(
Ok(res_versions) Ok(res_versions)
} }
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_normalize_for_search_removes_special_chars() {
assert_eq!(normalize_for_search("Xaero's Minimap"), "xaeros-minimap");
assert_eq!(normalize_for_search("JourneyMap"), "journey-map");
assert_eq!(normalize_for_search("journey-map"), "journey-map");
assert_eq!(normalize_for_search("SomeUserName"), "some-user-name");
}
#[test]
fn test_normalize_for_search_handles_whitespace() {
assert_eq!(
normalize_for_search("Some Project Name"),
"some-project-name"
);
assert_eq!(normalize_for_search(" padded "), "padded");
}
#[test]
fn test_normalize_for_search_handles_numbers() {
assert_eq!(normalize_for_search("Project 123"), "project-123");
assert_eq!(normalize_for_search("Test 1.0"), "test-1-0");
}
}

View File

@@ -230,7 +230,7 @@ pub struct UploadSearchProject {
pub author: String, pub author: String,
pub indexed_author: String, pub indexed_author: String,
pub name: String, pub name: String,
pub indexed_title: String, pub indexed_name: String,
pub summary: String, pub summary: String,
pub categories: Vec<String>, pub categories: Vec<String>,
pub display_categories: Vec<String>, pub display_categories: Vec<String>,