Index search by original and split title (#5589)
* Index search by original and split title * better normalization of title/author names for indexing * replace println with warn * fix test
This commit is contained in:
@@ -105,18 +105,25 @@ impl Default for RequestConfig {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn default_query_by() -> Vec<String> {
|
fn default_query_by() -> Vec<String> {
|
||||||
["indexed_title", "slug", "summary", "indexed_author"]
|
[
|
||||||
.into_iter()
|
"name",
|
||||||
.map(str::to_string)
|
"indexed_name",
|
||||||
.collect()
|
"slug",
|
||||||
|
"author",
|
||||||
|
"indexed_author",
|
||||||
|
"summary",
|
||||||
|
]
|
||||||
|
.into_iter()
|
||||||
|
.map(str::to_string)
|
||||||
|
.collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn default_query_by_weights() -> Vec<u8> {
|
fn default_query_by_weights() -> Vec<u8> {
|
||||||
vec![15, 5, 2, 1]
|
vec![15, 15, 10, 3, 3, 1]
|
||||||
}
|
}
|
||||||
|
|
||||||
fn default_prefix() -> Vec<bool> {
|
fn default_prefix() -> Vec<bool> {
|
||||||
vec![true, true, true, true]
|
vec![true, true, true, true, true, true]
|
||||||
}
|
}
|
||||||
|
|
||||||
const fn default_prioritize_exact_match() -> bool {
|
const fn default_prioritize_exact_match() -> bool {
|
||||||
@@ -491,7 +498,7 @@ impl Typesense {
|
|||||||
let mut fields = vec![
|
let mut fields = vec![
|
||||||
json!({"name": "summary", "type": "string", "facet": false}),
|
json!({"name": "summary", "type": "string", "facet": false}),
|
||||||
json!({"name": "slug", "type": "string", "facet": false}),
|
json!({"name": "slug", "type": "string", "facet": false}),
|
||||||
json!({"name": "indexed_title", "type": "string", "facet": false, "stem": true}),
|
json!({"name": "indexed_name", "type": "string", "facet": false, "stem": true}),
|
||||||
json!({"name": "indexed_author", "type": "string", "facet": false}),
|
json!({"name": "indexed_author", "type": "string", "facet": false}),
|
||||||
json!({"name": "log_downloads", "type": "float", "sort": true}),
|
json!({"name": "log_downloads", "type": "float", "sort": true}),
|
||||||
json!({"name": "follows", "type": "int32", "facet": true, "sort": true}),
|
json!({"name": "follows", "type": "int32", "facet": true, "sort": true}),
|
||||||
|
|||||||
@@ -4,8 +4,10 @@ use eyre::Result;
|
|||||||
use futures::TryStreamExt;
|
use futures::TryStreamExt;
|
||||||
use heck::ToKebabCase;
|
use heck::ToKebabCase;
|
||||||
use itertools::Itertools;
|
use itertools::Itertools;
|
||||||
|
use regex::Regex;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use tracing::info;
|
use std::sync::LazyLock;
|
||||||
|
use tracing::{info, warn};
|
||||||
|
|
||||||
use crate::database::PgPool;
|
use crate::database::PgPool;
|
||||||
use crate::database::models::loader_fields::{
|
use crate::database::models::loader_fields::{
|
||||||
@@ -25,6 +27,13 @@ use crate::routes::v2_reroute;
|
|||||||
use crate::search::UploadSearchProject;
|
use crate::search::UploadSearchProject;
|
||||||
use crate::util::error::Context;
|
use crate::util::error::Context;
|
||||||
|
|
||||||
|
fn normalize_for_search(s: &str) -> String {
|
||||||
|
static SPECIAL_CHARS_RE: LazyLock<Regex> =
|
||||||
|
LazyLock::new(|| Regex::new(r"[^a-zA-Z0-9-.\s]").expect("valid regex"));
|
||||||
|
|
||||||
|
SPECIAL_CHARS_RE.replace_all(s, "").to_kebab_case()
|
||||||
|
}
|
||||||
|
|
||||||
pub async fn index_local(
|
pub async fn index_local(
|
||||||
pool: &PgPool,
|
pool: &PgPool,
|
||||||
redis: &RedisPool,
|
redis: &RedisPool,
|
||||||
@@ -262,7 +271,7 @@ pub async fn index_local(
|
|||||||
{
|
{
|
||||||
team_owner
|
team_owner
|
||||||
} else {
|
} else {
|
||||||
println!(
|
warn!(
|
||||||
"org owner not found for project {} id: {}!",
|
"org owner not found for project {} id: {}!",
|
||||||
project.name, project.id.0
|
project.name, project.id.0
|
||||||
);
|
);
|
||||||
@@ -427,7 +436,7 @@ pub async fn index_local(
|
|||||||
project_id: crate::models::ids::ProjectId::from(project.id)
|
project_id: crate::models::ids::ProjectId::from(project.id)
|
||||||
.to_string(),
|
.to_string(),
|
||||||
name: project.name.clone(),
|
name: project.name.clone(),
|
||||||
indexed_title: project.name.to_kebab_case(),
|
indexed_name: normalize_for_search(&project.name),
|
||||||
summary: project.summary.clone(),
|
summary: project.summary.clone(),
|
||||||
categories: categories.clone(),
|
categories: categories.clone(),
|
||||||
display_categories: display_categories.clone(),
|
display_categories: display_categories.clone(),
|
||||||
@@ -436,7 +445,7 @@ pub async fn index_local(
|
|||||||
log_downloads: (project.downloads.max(1) as f64).ln(),
|
log_downloads: (project.downloads.max(1) as f64).ln(),
|
||||||
icon_url: project.icon_url.clone(),
|
icon_url: project.icon_url.clone(),
|
||||||
author: owner.clone(),
|
author: owner.clone(),
|
||||||
indexed_author: owner.to_kebab_case(),
|
indexed_author: normalize_for_search(&owner),
|
||||||
date_created: project.approved,
|
date_created: project.approved,
|
||||||
created_timestamp: project.approved.timestamp(),
|
created_timestamp: project.approved.timestamp(),
|
||||||
date_modified: project.updated,
|
date_modified: project.updated,
|
||||||
@@ -614,3 +623,31 @@ async fn index_versions(
|
|||||||
|
|
||||||
Ok(res_versions)
|
Ok(res_versions)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_normalize_for_search_removes_special_chars() {
|
||||||
|
assert_eq!(normalize_for_search("Xaero's Minimap"), "xaeros-minimap");
|
||||||
|
assert_eq!(normalize_for_search("JourneyMap"), "journey-map");
|
||||||
|
assert_eq!(normalize_for_search("journey-map"), "journey-map");
|
||||||
|
assert_eq!(normalize_for_search("SomeUserName"), "some-user-name");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_normalize_for_search_handles_whitespace() {
|
||||||
|
assert_eq!(
|
||||||
|
normalize_for_search("Some Project Name"),
|
||||||
|
"some-project-name"
|
||||||
|
);
|
||||||
|
assert_eq!(normalize_for_search(" padded "), "padded");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_normalize_for_search_handles_numbers() {
|
||||||
|
assert_eq!(normalize_for_search("Project 123"), "project-123");
|
||||||
|
assert_eq!(normalize_for_search("Test 1.0"), "test-1-0");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -230,7 +230,7 @@ pub struct UploadSearchProject {
|
|||||||
pub author: String,
|
pub author: String,
|
||||||
pub indexed_author: String,
|
pub indexed_author: String,
|
||||||
pub name: String,
|
pub name: String,
|
||||||
pub indexed_title: String,
|
pub indexed_name: String,
|
||||||
pub summary: String,
|
pub summary: String,
|
||||||
pub categories: Vec<String>,
|
pub categories: Vec<String>,
|
||||||
pub display_categories: Vec<String>,
|
pub display_categories: Vec<String>,
|
||||||
|
|||||||
Reference in New Issue
Block a user