feat(search): search_objects returns highlighted hits + estimated total

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-06-04 10:46:54 +02:00
parent 38e4525404
commit 84c4c2807b
3 changed files with 169 additions and 1 deletions
+1
View File
@@ -11,6 +11,7 @@ thiserror.workspace = true
domain = { path = "../domain" } domain = { path = "../domain" }
db = { path = "../db" } db = { path = "../db" }
sqlx.workspace = true sqlx.workspace = true
serde_json.workspace = true
[dev-dependencies] [dev-dependencies]
tokio.workspace = true tokio.workspace = true
+118
View File
@@ -8,6 +8,7 @@
use db::Db; use db::Db;
use domain::{CatalogueObject, ObjectId}; use domain::{CatalogueObject, ObjectId};
use meilisearch_sdk::search::Selectors;
use meilisearch_sdk::tasks::Task; use meilisearch_sdk::tasks::Task;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
@@ -39,6 +40,31 @@ pub struct SearchDocument {
pub fields_text: Vec<String>, pub fields_text: Vec<String>,
} }
/// Non-HTML highlight markers. These ASCII control characters cannot occur in
/// catalogue text, so the frontend can safely split on them to render matches —
/// no HTML ever crosses the API boundary.
pub const HL_PRE: &str = "\u{2}";
pub const HL_POST: &str = "\u{3}";
/// One search result: display metadata projected from the index, plus an optional
/// snippet of matched text with [`HL_PRE`]/[`HL_POST`] markers around the matches.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SearchHit {
pub id: String,
pub object_number: String,
pub object_name: String,
pub brief_description: Option<String>,
pub visibility: String,
pub snippet: Option<String>,
}
/// A page of search results plus Meilisearch's estimate of the total match count.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SearchResults {
pub hits: Vec<SearchHit>,
pub estimated_total: usize,
}
/// A Meilisearch-backed search client scoped to one index. /// A Meilisearch-backed search client scoped to one index.
#[derive(Clone)] #[derive(Clone)]
pub struct SearchClient { pub struct SearchClient {
@@ -147,6 +173,63 @@ impl SearchClient {
.collect() .collect()
} }
/// Full-text query returning display-ready hits with highlighted snippets and the
/// estimated total match count. `visibility`, when set, filters on the indexed
/// `visibility` attribute. Pagination is offset/limit.
pub async fn search_objects(
&self,
query: &str,
visibility: Option<&str>,
offset: usize,
limit: usize,
) -> Result<SearchResults, SearchError> {
let index = self.client.index(&self.index_uid);
let filter = visibility.map(|v| format!("visibility = \"{v}\""));
let highlight: &[&str] = &["object_name", "brief_description", "fields_text"];
let crop: &[(&str, Option<usize>)] = &[("brief_description", None), ("fields_text", None)];
let mut search = index.search();
search
.with_query(query)
.with_offset(offset)
.with_limit(limit)
.with_attributes_to_highlight(Selectors::Some(highlight))
.with_attributes_to_crop(Selectors::Some(crop))
.with_crop_length(20)
.with_highlight_pre_tag(HL_PRE)
.with_highlight_post_tag(HL_POST);
if let Some(filter) = &filter {
search.with_filter(filter);
}
let results = search.execute::<SearchDocument>().await?;
let hits = results
.hits
.into_iter()
.map(|hit| {
let snippet = hit.formatted_result.as_ref().and_then(extract_snippet);
let doc = hit.result;
SearchHit {
id: doc.id,
object_number: doc.object_number,
object_name: doc.object_name,
brief_description: doc.brief_description,
visibility: doc.visibility,
snippet,
}
})
.collect();
Ok(SearchResults {
hits,
estimated_total: results.estimated_total_hits.unwrap_or(0),
})
}
/// Sync a single object's index entry with the database after a catalogue write /// Sync a single object's index entry with the database after a catalogue write
/// commits: re-project and index it if it still exists, otherwise remove it. This /// commits: re-project and index it if it still exists, otherwise remove it. This
/// is the uniform on-write path for create/update/delete/field/visibility changes — /// is the uniform on-write path for create/update/delete/field/visibility changes —
@@ -272,3 +355,38 @@ pub async fn build_document(
fields_text, fields_text,
}) })
} }
/// Pick the best snippet from Meilisearch's `_formatted` map: prefer a highlighted
/// `brief_description`, then a highlighted `fields_text` entry, then `object_name`;
/// fall back to an unhighlighted `brief_description` so a hit still shows context.
fn extract_snippet(formatted: &serde_json::Map<String, serde_json::Value>) -> Option<String> {
let has_mark = |s: &str| s.contains(HL_PRE);
if let Some(serde_json::Value::String(s)) = formatted.get("brief_description") {
if has_mark(s) {
return Some(s.clone());
}
}
if let Some(serde_json::Value::Array(items)) = formatted.get("fields_text") {
for item in items {
if let Some(s) = item.as_str() {
if has_mark(s) {
return Some(s.to_owned());
}
}
}
}
if let Some(serde_json::Value::String(s)) = formatted.get("object_name") {
if has_mark(s) {
return Some(s.clone());
}
}
if let Some(serde_json::Value::String(s)) = formatted.get("brief_description") {
return Some(s.clone());
}
None
}
+50 -1
View File
@@ -1,4 +1,4 @@
use search::{SearchClient, SearchDocument}; use search::{self, SearchClient, SearchDocument};
fn meili() -> (String, String) { fn meili() -> (String, String) {
( (
@@ -51,6 +51,55 @@ async fn index_search_and_remove() {
assert!(client.search("wood").await.unwrap().is_empty()); assert!(client.search("wood").await.unwrap().is_empty());
} }
#[tokio::test]
async fn search_objects_returns_hits_with_highlight_filter_and_paging() {
let (url, key) = meili();
let client = SearchClient::connect(&url, &key, &unique_index()).unwrap();
client.ensure_index().await.unwrap();
let a = domain::ObjectId::new();
let b = domain::ObjectId::new();
let c = domain::ObjectId::new();
let mut bronze_a = doc(
&a.to_string(),
"Bronze figurine",
&["cast bronze with green patina"],
);
bronze_a.visibility = "public".to_string();
let mut bronze_b = doc(&b.to_string(), "Ceremonial bowl", &["bronze alloy rim"]);
bronze_b.visibility = "public".to_string();
let mut bronze_c = doc(&c.to_string(), "Door fitting", &["bronze hinge"]);
bronze_c.visibility = "draft".to_string();
client.index_object(&bronze_a).await.unwrap();
client.index_object(&bronze_b).await.unwrap();
client.index_object(&bronze_c).await.unwrap();
let results = client.search_objects("bronze", None, 0, 20).await.unwrap();
assert_eq!(results.estimated_total, 3);
assert_eq!(results.hits.len(), 3);
let hit = results.hits.iter().find(|h| h.id == a.to_string()).unwrap();
assert_eq!(hit.object_name, "Bronze figurine");
assert_eq!(hit.object_number, format!("N-{a}"));
let snippet = hit.snippet.as_ref().expect("a matched snippet");
assert!(
snippet.contains(search::HL_PRE),
"snippet must mark the match"
);
assert!(snippet.contains(search::HL_POST));
let public = client
.search_objects("bronze", Some("public"), 0, 20)
.await
.unwrap();
assert_eq!(public.estimated_total, 2);
assert!(public.hits.iter().all(|h| h.visibility == "public"));
let page = client.search_objects("bronze", None, 0, 1).await.unwrap();
assert_eq!(page.hits.len(), 1);
assert_eq!(page.estimated_total, 3);
}
#[tokio::test] #[tokio::test]
async fn ensure_index_is_idempotent() { async fn ensure_index_is_idempotent() {
let (url, key) = meili(); let (url, key) = meili();