feat(search): search_objects returns highlighted hits + estimated total
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -11,6 +11,7 @@ thiserror.workspace = true
|
|||||||
domain = { path = "../domain" }
|
domain = { path = "../domain" }
|
||||||
db = { path = "../db" }
|
db = { path = "../db" }
|
||||||
sqlx.workspace = true
|
sqlx.workspace = true
|
||||||
|
serde_json.workspace = true
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
tokio.workspace = true
|
tokio.workspace = true
|
||||||
|
|||||||
@@ -8,6 +8,7 @@
|
|||||||
|
|
||||||
use db::Db;
|
use db::Db;
|
||||||
use domain::{CatalogueObject, ObjectId};
|
use domain::{CatalogueObject, ObjectId};
|
||||||
|
use meilisearch_sdk::search::Selectors;
|
||||||
use meilisearch_sdk::tasks::Task;
|
use meilisearch_sdk::tasks::Task;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
@@ -39,6 +40,31 @@ pub struct SearchDocument {
|
|||||||
pub fields_text: Vec<String>,
|
pub fields_text: Vec<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Non-HTML highlight markers. These ASCII control characters cannot occur in
|
||||||
|
/// catalogue text, so the frontend can safely split on them to render matches —
|
||||||
|
/// no HTML ever crosses the API boundary.
|
||||||
|
pub const HL_PRE: &str = "\u{2}";
|
||||||
|
pub const HL_POST: &str = "\u{3}";
|
||||||
|
|
||||||
|
/// One search result: display metadata projected from the index, plus an optional
|
||||||
|
/// snippet of matched text with [`HL_PRE`]/[`HL_POST`] markers around the matches.
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct SearchHit {
|
||||||
|
pub id: String,
|
||||||
|
pub object_number: String,
|
||||||
|
pub object_name: String,
|
||||||
|
pub brief_description: Option<String>,
|
||||||
|
pub visibility: String,
|
||||||
|
pub snippet: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A page of search results plus Meilisearch's estimate of the total match count.
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct SearchResults {
|
||||||
|
pub hits: Vec<SearchHit>,
|
||||||
|
pub estimated_total: usize,
|
||||||
|
}
|
||||||
|
|
||||||
/// A Meilisearch-backed search client scoped to one index.
|
/// A Meilisearch-backed search client scoped to one index.
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct SearchClient {
|
pub struct SearchClient {
|
||||||
@@ -147,6 +173,63 @@ impl SearchClient {
|
|||||||
.collect()
|
.collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Full-text query returning display-ready hits with highlighted snippets and the
|
||||||
|
/// estimated total match count. `visibility`, when set, filters on the indexed
|
||||||
|
/// `visibility` attribute. Pagination is offset/limit.
|
||||||
|
pub async fn search_objects(
|
||||||
|
&self,
|
||||||
|
query: &str,
|
||||||
|
visibility: Option<&str>,
|
||||||
|
offset: usize,
|
||||||
|
limit: usize,
|
||||||
|
) -> Result<SearchResults, SearchError> {
|
||||||
|
let index = self.client.index(&self.index_uid);
|
||||||
|
|
||||||
|
let filter = visibility.map(|v| format!("visibility = \"{v}\""));
|
||||||
|
let highlight: &[&str] = &["object_name", "brief_description", "fields_text"];
|
||||||
|
let crop: &[(&str, Option<usize>)] = &[("brief_description", None), ("fields_text", None)];
|
||||||
|
|
||||||
|
let mut search = index.search();
|
||||||
|
search
|
||||||
|
.with_query(query)
|
||||||
|
.with_offset(offset)
|
||||||
|
.with_limit(limit)
|
||||||
|
.with_attributes_to_highlight(Selectors::Some(highlight))
|
||||||
|
.with_attributes_to_crop(Selectors::Some(crop))
|
||||||
|
.with_crop_length(20)
|
||||||
|
.with_highlight_pre_tag(HL_PRE)
|
||||||
|
.with_highlight_post_tag(HL_POST);
|
||||||
|
|
||||||
|
if let Some(filter) = &filter {
|
||||||
|
search.with_filter(filter);
|
||||||
|
}
|
||||||
|
|
||||||
|
let results = search.execute::<SearchDocument>().await?;
|
||||||
|
|
||||||
|
let hits = results
|
||||||
|
.hits
|
||||||
|
.into_iter()
|
||||||
|
.map(|hit| {
|
||||||
|
let snippet = hit.formatted_result.as_ref().and_then(extract_snippet);
|
||||||
|
let doc = hit.result;
|
||||||
|
|
||||||
|
SearchHit {
|
||||||
|
id: doc.id,
|
||||||
|
object_number: doc.object_number,
|
||||||
|
object_name: doc.object_name,
|
||||||
|
brief_description: doc.brief_description,
|
||||||
|
visibility: doc.visibility,
|
||||||
|
snippet,
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
Ok(SearchResults {
|
||||||
|
hits,
|
||||||
|
estimated_total: results.estimated_total_hits.unwrap_or(0),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
/// Sync a single object's index entry with the database after a catalogue write
|
/// Sync a single object's index entry with the database after a catalogue write
|
||||||
/// commits: re-project and index it if it still exists, otherwise remove it. This
|
/// commits: re-project and index it if it still exists, otherwise remove it. This
|
||||||
/// is the uniform on-write path for create/update/delete/field/visibility changes —
|
/// is the uniform on-write path for create/update/delete/field/visibility changes —
|
||||||
@@ -272,3 +355,38 @@ pub async fn build_document(
|
|||||||
fields_text,
|
fields_text,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Pick the best snippet from Meilisearch's `_formatted` map: prefer a highlighted
|
||||||
|
/// `brief_description`, then a highlighted `fields_text` entry, then `object_name`;
|
||||||
|
/// fall back to an unhighlighted `brief_description` so a hit still shows context.
|
||||||
|
fn extract_snippet(formatted: &serde_json::Map<String, serde_json::Value>) -> Option<String> {
|
||||||
|
let has_mark = |s: &str| s.contains(HL_PRE);
|
||||||
|
|
||||||
|
if let Some(serde_json::Value::String(s)) = formatted.get("brief_description") {
|
||||||
|
if has_mark(s) {
|
||||||
|
return Some(s.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(serde_json::Value::Array(items)) = formatted.get("fields_text") {
|
||||||
|
for item in items {
|
||||||
|
if let Some(s) = item.as_str() {
|
||||||
|
if has_mark(s) {
|
||||||
|
return Some(s.to_owned());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(serde_json::Value::String(s)) = formatted.get("object_name") {
|
||||||
|
if has_mark(s) {
|
||||||
|
return Some(s.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(serde_json::Value::String(s)) = formatted.get("brief_description") {
|
||||||
|
return Some(s.clone());
|
||||||
|
}
|
||||||
|
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
use search::{SearchClient, SearchDocument};
|
use search::{self, SearchClient, SearchDocument};
|
||||||
|
|
||||||
fn meili() -> (String, String) {
|
fn meili() -> (String, String) {
|
||||||
(
|
(
|
||||||
@@ -51,6 +51,55 @@ async fn index_search_and_remove() {
|
|||||||
assert!(client.search("wood").await.unwrap().is_empty());
|
assert!(client.search("wood").await.unwrap().is_empty());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn search_objects_returns_hits_with_highlight_filter_and_paging() {
|
||||||
|
let (url, key) = meili();
|
||||||
|
let client = SearchClient::connect(&url, &key, &unique_index()).unwrap();
|
||||||
|
client.ensure_index().await.unwrap();
|
||||||
|
|
||||||
|
let a = domain::ObjectId::new();
|
||||||
|
let b = domain::ObjectId::new();
|
||||||
|
let c = domain::ObjectId::new();
|
||||||
|
let mut bronze_a = doc(
|
||||||
|
&a.to_string(),
|
||||||
|
"Bronze figurine",
|
||||||
|
&["cast bronze with green patina"],
|
||||||
|
);
|
||||||
|
bronze_a.visibility = "public".to_string();
|
||||||
|
let mut bronze_b = doc(&b.to_string(), "Ceremonial bowl", &["bronze alloy rim"]);
|
||||||
|
bronze_b.visibility = "public".to_string();
|
||||||
|
let mut bronze_c = doc(&c.to_string(), "Door fitting", &["bronze hinge"]);
|
||||||
|
bronze_c.visibility = "draft".to_string();
|
||||||
|
client.index_object(&bronze_a).await.unwrap();
|
||||||
|
client.index_object(&bronze_b).await.unwrap();
|
||||||
|
client.index_object(&bronze_c).await.unwrap();
|
||||||
|
|
||||||
|
let results = client.search_objects("bronze", None, 0, 20).await.unwrap();
|
||||||
|
assert_eq!(results.estimated_total, 3);
|
||||||
|
assert_eq!(results.hits.len(), 3);
|
||||||
|
|
||||||
|
let hit = results.hits.iter().find(|h| h.id == a.to_string()).unwrap();
|
||||||
|
assert_eq!(hit.object_name, "Bronze figurine");
|
||||||
|
assert_eq!(hit.object_number, format!("N-{a}"));
|
||||||
|
let snippet = hit.snippet.as_ref().expect("a matched snippet");
|
||||||
|
assert!(
|
||||||
|
snippet.contains(search::HL_PRE),
|
||||||
|
"snippet must mark the match"
|
||||||
|
);
|
||||||
|
assert!(snippet.contains(search::HL_POST));
|
||||||
|
|
||||||
|
let public = client
|
||||||
|
.search_objects("bronze", Some("public"), 0, 20)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(public.estimated_total, 2);
|
||||||
|
assert!(public.hits.iter().all(|h| h.visibility == "public"));
|
||||||
|
|
||||||
|
let page = client.search_objects("bronze", None, 0, 1).await.unwrap();
|
||||||
|
assert_eq!(page.hits.len(), 1);
|
||||||
|
assert_eq!(page.estimated_total, 3);
|
||||||
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn ensure_index_is_idempotent() {
|
async fn ensure_index_is_idempotent() {
|
||||||
let (url, key) = meili();
|
let (url, key) = meili();
|
||||||
|
|||||||
Reference in New Issue
Block a user