From 84c4c2807b322a2bd8d4fb1570539b82db0f1c17 Mon Sep 17 00:00:00 2001 From: Anders Olsson Date: Thu, 4 Jun 2026 10:46:54 +0200 Subject: [PATCH] feat(search): search_objects returns highlighted hits + estimated total Co-Authored-By: Claude Sonnet 4.6 --- crates/search/Cargo.toml | 1 + crates/search/src/lib.rs | 118 ++++++++++++++++++++++++++++++++++ crates/search/tests/search.rs | 51 ++++++++++++++- 3 files changed, 169 insertions(+), 1 deletion(-) diff --git a/crates/search/Cargo.toml b/crates/search/Cargo.toml index 858a735..a5da593 100644 --- a/crates/search/Cargo.toml +++ b/crates/search/Cargo.toml @@ -11,6 +11,7 @@ thiserror.workspace = true domain = { path = "../domain" } db = { path = "../db" } sqlx.workspace = true +serde_json.workspace = true [dev-dependencies] tokio.workspace = true diff --git a/crates/search/src/lib.rs b/crates/search/src/lib.rs index 8dbe066..a74f202 100644 --- a/crates/search/src/lib.rs +++ b/crates/search/src/lib.rs @@ -8,6 +8,7 @@ use db::Db; use domain::{CatalogueObject, ObjectId}; +use meilisearch_sdk::search::Selectors; use meilisearch_sdk::tasks::Task; use serde::{Deserialize, Serialize}; @@ -39,6 +40,31 @@ pub struct SearchDocument { pub fields_text: Vec, } +/// Non-HTML highlight markers. These ASCII control characters cannot occur in +/// catalogue text, so the frontend can safely split on them to render matches — +/// no HTML ever crosses the API boundary. +pub const HL_PRE: &str = "\u{2}"; +pub const HL_POST: &str = "\u{3}"; + +/// One search result: display metadata projected from the index, plus an optional +/// snippet of matched text with [`HL_PRE`]/[`HL_POST`] markers around the matches. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SearchHit { + pub id: String, + pub object_number: String, + pub object_name: String, + pub brief_description: Option, + pub visibility: String, + pub snippet: Option, +} + +/// A page of search results plus Meilisearch's estimate of the total match count. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SearchResults { + pub hits: Vec, + pub estimated_total: usize, +} + /// A Meilisearch-backed search client scoped to one index. #[derive(Clone)] pub struct SearchClient { @@ -147,6 +173,63 @@ impl SearchClient { .collect() } + /// Full-text query returning display-ready hits with highlighted snippets and the + /// estimated total match count. `visibility`, when set, filters on the indexed + /// `visibility` attribute. Pagination is offset/limit. + pub async fn search_objects( + &self, + query: &str, + visibility: Option<&str>, + offset: usize, + limit: usize, + ) -> Result { + let index = self.client.index(&self.index_uid); + + let filter = visibility.map(|v| format!("visibility = \"{v}\"")); + let highlight: &[&str] = &["object_name", "brief_description", "fields_text"]; + let crop: &[(&str, Option)] = &[("brief_description", None), ("fields_text", None)]; + + let mut search = index.search(); + search + .with_query(query) + .with_offset(offset) + .with_limit(limit) + .with_attributes_to_highlight(Selectors::Some(highlight)) + .with_attributes_to_crop(Selectors::Some(crop)) + .with_crop_length(20) + .with_highlight_pre_tag(HL_PRE) + .with_highlight_post_tag(HL_POST); + + if let Some(filter) = &filter { + search.with_filter(filter); + } + + let results = search.execute::().await?; + + let hits = results + .hits + .into_iter() + .map(|hit| { + let snippet = hit.formatted_result.as_ref().and_then(extract_snippet); + let doc = hit.result; + + SearchHit { + id: doc.id, + object_number: doc.object_number, + object_name: doc.object_name, + brief_description: doc.brief_description, + visibility: doc.visibility, + snippet, + } + }) + .collect(); + + Ok(SearchResults { + hits, + estimated_total: results.estimated_total_hits.unwrap_or(0), + }) + } + /// Sync a single object's index entry with the database after a catalogue write /// commits: re-project and index it if it still exists, otherwise remove it. This /// is the uniform on-write path for create/update/delete/field/visibility changes — @@ -272,3 +355,38 @@ pub async fn build_document( fields_text, }) } + +/// Pick the best snippet from Meilisearch's `_formatted` map: prefer a highlighted +/// `brief_description`, then a highlighted `fields_text` entry, then `object_name`; +/// fall back to an unhighlighted `brief_description` so a hit still shows context. +fn extract_snippet(formatted: &serde_json::Map) -> Option { + let has_mark = |s: &str| s.contains(HL_PRE); + + if let Some(serde_json::Value::String(s)) = formatted.get("brief_description") { + if has_mark(s) { + return Some(s.clone()); + } + } + + if let Some(serde_json::Value::Array(items)) = formatted.get("fields_text") { + for item in items { + if let Some(s) = item.as_str() { + if has_mark(s) { + return Some(s.to_owned()); + } + } + } + } + + if let Some(serde_json::Value::String(s)) = formatted.get("object_name") { + if has_mark(s) { + return Some(s.clone()); + } + } + + if let Some(serde_json::Value::String(s)) = formatted.get("brief_description") { + return Some(s.clone()); + } + + None +} diff --git a/crates/search/tests/search.rs b/crates/search/tests/search.rs index dcac090..dec4543 100644 --- a/crates/search/tests/search.rs +++ b/crates/search/tests/search.rs @@ -1,4 +1,4 @@ -use search::{SearchClient, SearchDocument}; +use search::{self, SearchClient, SearchDocument}; fn meili() -> (String, String) { ( @@ -51,6 +51,55 @@ async fn index_search_and_remove() { assert!(client.search("wood").await.unwrap().is_empty()); } +#[tokio::test] +async fn search_objects_returns_hits_with_highlight_filter_and_paging() { + let (url, key) = meili(); + let client = SearchClient::connect(&url, &key, &unique_index()).unwrap(); + client.ensure_index().await.unwrap(); + + let a = domain::ObjectId::new(); + let b = domain::ObjectId::new(); + let c = domain::ObjectId::new(); + let mut bronze_a = doc( + &a.to_string(), + "Bronze figurine", + &["cast bronze with green patina"], + ); + bronze_a.visibility = "public".to_string(); + let mut bronze_b = doc(&b.to_string(), "Ceremonial bowl", &["bronze alloy rim"]); + bronze_b.visibility = "public".to_string(); + let mut bronze_c = doc(&c.to_string(), "Door fitting", &["bronze hinge"]); + bronze_c.visibility = "draft".to_string(); + client.index_object(&bronze_a).await.unwrap(); + client.index_object(&bronze_b).await.unwrap(); + client.index_object(&bronze_c).await.unwrap(); + + let results = client.search_objects("bronze", None, 0, 20).await.unwrap(); + assert_eq!(results.estimated_total, 3); + assert_eq!(results.hits.len(), 3); + + let hit = results.hits.iter().find(|h| h.id == a.to_string()).unwrap(); + assert_eq!(hit.object_name, "Bronze figurine"); + assert_eq!(hit.object_number, format!("N-{a}")); + let snippet = hit.snippet.as_ref().expect("a matched snippet"); + assert!( + snippet.contains(search::HL_PRE), + "snippet must mark the match" + ); + assert!(snippet.contains(search::HL_POST)); + + let public = client + .search_objects("bronze", Some("public"), 0, 20) + .await + .unwrap(); + assert_eq!(public.estimated_total, 2); + assert!(public.hits.iter().all(|h| h.visibility == "public")); + + let page = client.search_objects("bronze", None, 0, 1).await.unwrap(); + assert_eq!(page.hits.len(), 1); + assert_eq!(page.estimated_total, 3); +} + #[tokio::test] async fn ensure_index_is_idempotent() { let (url, key) = meili();