Compare commits
3 Commits
f30ce9d9dc
...
5ee9fd88f1
| Author | SHA1 | Date | |
|---|---|---|---|
| 5ee9fd88f1 | |||
| adc7c61ee2 | |||
| 91a9eb2964 |
@@ -4,6 +4,7 @@ pub mod audit;
|
||||
pub mod authority;
|
||||
pub mod catalog;
|
||||
pub mod fields;
|
||||
pub mod seed;
|
||||
pub mod vocab;
|
||||
|
||||
use sqlx::postgres::{PgPool, PgPoolOptions};
|
||||
|
||||
@@ -0,0 +1,160 @@
|
||||
//! Seed data: a representative subset of the Spectrum Cataloguing field set.
|
||||
//!
|
||||
//! Idempotent — each vocabulary and field definition is created only if a row with
|
||||
//! that key does not already exist. Vocabularies are seeded empty; their terms are
|
||||
//! populated by the organization or a later import. The inventory-minimum fields
|
||||
//! (object number, name, location, …) live in the typed object core, not here.
|
||||
|
||||
use domain::{AuthorityKind, FieldType, LocalizedLabel, NewFieldDefinition, VocabularyId};
|
||||
|
||||
use crate::{fields, vocab};
|
||||
|
||||
/// Seed the Spectrum cataloguing vocabularies and field definitions on `conn`.
|
||||
/// Pass a transaction connection (`&mut *tx`) so the whole seed is atomic.
|
||||
pub async fn seed_spectrum_cataloguing(conn: &mut sqlx::PgConnection) -> Result<(), sqlx::Error> {
|
||||
let material = ensure_vocabulary(conn, "material").await?;
|
||||
let object_name = ensure_vocabulary(conn, "object_name").await?;
|
||||
let technique = ensure_vocabulary(conn, "technique").await?;
|
||||
|
||||
let definitions = [
|
||||
def(
|
||||
"object_type",
|
||||
FieldType::Term {
|
||||
vocabulary_id: object_name,
|
||||
},
|
||||
"identification",
|
||||
&[("sv", "Sakord"), ("en", "Object type")],
|
||||
),
|
||||
def(
|
||||
"title",
|
||||
FieldType::LocalizedText,
|
||||
"identification",
|
||||
&[("sv", "Titel"), ("en", "Title")],
|
||||
),
|
||||
def(
|
||||
"comments",
|
||||
FieldType::Text,
|
||||
"identification",
|
||||
&[("sv", "Kommentarer"), ("en", "Comments")],
|
||||
),
|
||||
def(
|
||||
"material",
|
||||
FieldType::Term {
|
||||
vocabulary_id: material,
|
||||
},
|
||||
"description",
|
||||
&[("sv", "Material"), ("en", "Material")],
|
||||
),
|
||||
def(
|
||||
"technique",
|
||||
FieldType::Term {
|
||||
vocabulary_id: technique,
|
||||
},
|
||||
"description",
|
||||
&[("sv", "Teknik"), ("en", "Technique")],
|
||||
),
|
||||
def(
|
||||
"physical_description",
|
||||
FieldType::Text,
|
||||
"description",
|
||||
&[("sv", "Fysisk beskrivning"), ("en", "Physical description")],
|
||||
),
|
||||
def(
|
||||
"dimensions",
|
||||
FieldType::Text,
|
||||
"description",
|
||||
&[("sv", "Mått"), ("en", "Dimensions")],
|
||||
),
|
||||
def(
|
||||
"inscription",
|
||||
FieldType::Text,
|
||||
"description",
|
||||
&[("sv", "Inskription"), ("en", "Inscription")],
|
||||
),
|
||||
def(
|
||||
"content_description",
|
||||
FieldType::Text,
|
||||
"content",
|
||||
&[
|
||||
("sv", "Innehållsbeskrivning"),
|
||||
("en", "Content description"),
|
||||
],
|
||||
),
|
||||
def(
|
||||
"production_date",
|
||||
FieldType::Date,
|
||||
"production",
|
||||
&[("sv", "Tillverkningsdatum"), ("en", "Production date")],
|
||||
),
|
||||
def(
|
||||
"production_place",
|
||||
FieldType::Authority {
|
||||
kind: Some(AuthorityKind::Place),
|
||||
},
|
||||
"production",
|
||||
&[("sv", "Tillverkningsplats"), ("en", "Production place")],
|
||||
),
|
||||
def(
|
||||
"production_person",
|
||||
FieldType::Authority {
|
||||
kind: Some(AuthorityKind::Person),
|
||||
},
|
||||
"production",
|
||||
&[("sv", "Tillverkare"), ("en", "Maker")],
|
||||
),
|
||||
];
|
||||
|
||||
for definition in &definitions {
|
||||
ensure_field_definition(conn, definition).await?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Get-or-create a vocabulary by key, returning its id.
|
||||
async fn ensure_vocabulary(
|
||||
conn: &mut sqlx::PgConnection,
|
||||
key: &str,
|
||||
) -> Result<VocabularyId, sqlx::Error> {
|
||||
if let Some(existing) = vocab::vocabulary_by_key(&mut *conn, key).await? {
|
||||
Ok(existing.id)
|
||||
} else {
|
||||
Ok(vocab::create_vocabulary(&mut *conn, key).await?.id)
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a field definition only if its key is not already present.
|
||||
async fn ensure_field_definition(
|
||||
conn: &mut sqlx::PgConnection,
|
||||
definition: &NewFieldDefinition,
|
||||
) -> Result<(), sqlx::Error> {
|
||||
if fields::field_definition_by_key(&mut *conn, &definition.key)
|
||||
.await?
|
||||
.is_none()
|
||||
{
|
||||
fields::create_field_definition(&mut *conn, definition).await?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn def(
|
||||
key: &str,
|
||||
field_type: FieldType,
|
||||
group: &str,
|
||||
label_pairs: &[(&str, &str)],
|
||||
) -> NewFieldDefinition {
|
||||
NewFieldDefinition {
|
||||
key: key.to_owned(),
|
||||
field_type,
|
||||
required: false,
|
||||
group_key: Some(group.to_owned()),
|
||||
labels: label_pairs
|
||||
.iter()
|
||||
.map(|(lang, label)| LocalizedLabel {
|
||||
lang: (*lang).to_owned(),
|
||||
label: (*label).to_owned(),
|
||||
})
|
||||
.collect(),
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,96 @@
|
||||
use db::{Db, fields, seed, vocab};
|
||||
use domain::{AuthorityKind, FieldType};
|
||||
use sqlx::PgPool;
|
||||
|
||||
#[sqlx::test]
|
||||
async fn seed_creates_vocabularies_and_field_definitions(pool: PgPool) {
|
||||
let db = Db::from_pool(pool);
|
||||
|
||||
let mut tx = db.pool().begin().await.unwrap();
|
||||
seed::seed_spectrum_cataloguing(&mut tx).await.unwrap();
|
||||
tx.commit().await.unwrap();
|
||||
|
||||
for key in ["material", "object_name", "technique"] {
|
||||
assert!(
|
||||
vocab::vocabulary_by_key(db.pool(), key)
|
||||
.await
|
||||
.unwrap()
|
||||
.is_some(),
|
||||
"vocabulary {key} should be seeded"
|
||||
);
|
||||
}
|
||||
|
||||
let material_vocab = vocab::vocabulary_by_key(db.pool(), "material")
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
let material_field = fields::field_definition_by_key(db.pool(), "material")
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
material_field.field_type,
|
||||
FieldType::Term {
|
||||
vocabulary_id: material_vocab.id
|
||||
}
|
||||
);
|
||||
assert_eq!(material_field.group_key.as_deref(), Some("description"));
|
||||
|
||||
let place = fields::field_definition_by_key(db.pool(), "production_place")
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
place.field_type,
|
||||
FieldType::Authority {
|
||||
kind: Some(AuthorityKind::Place)
|
||||
}
|
||||
);
|
||||
|
||||
let title = fields::field_definition_by_key(db.pool(), "title")
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
assert_eq!(title.field_type, FieldType::LocalizedText);
|
||||
let date = fields::field_definition_by_key(db.pool(), "production_date")
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
assert_eq!(date.field_type, FieldType::Date);
|
||||
|
||||
assert_eq!(
|
||||
fields::list_field_definitions(db.pool())
|
||||
.await
|
||||
.unwrap()
|
||||
.len(),
|
||||
12
|
||||
);
|
||||
}
|
||||
|
||||
#[sqlx::test]
|
||||
async fn seed_is_idempotent(pool: PgPool) {
|
||||
let db = Db::from_pool(pool);
|
||||
|
||||
for _ in 0..2 {
|
||||
let mut tx = db.pool().begin().await.unwrap();
|
||||
seed::seed_spectrum_cataloguing(&mut tx).await.unwrap();
|
||||
tx.commit().await.unwrap();
|
||||
}
|
||||
|
||||
assert_eq!(
|
||||
fields::list_field_definitions(db.pool())
|
||||
.await
|
||||
.unwrap()
|
||||
.len(),
|
||||
12
|
||||
);
|
||||
for key in ["material", "object_name", "technique"] {
|
||||
assert!(
|
||||
vocab::vocabulary_by_key(db.pool(), key)
|
||||
.await
|
||||
.unwrap()
|
||||
.is_some(),
|
||||
"vocabulary {key} should remain after re-seeding"
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,223 @@
|
||||
# Spectrum Cataloguing Seed Implementation Plan
|
||||
|
||||
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
|
||||
|
||||
**Goal:** Seed a representative subset of the Spectrum Cataloguing field set — empty controlled vocabularies + the descriptive field definitions that bind to them and to authorities — turning the abstract registry (Plans 2/4) into usable museum fields. Idempotent; no terms seeded (orgs/imports populate vocabularies later).
|
||||
|
||||
**Architecture:** A new `db::seed` module with `seed_spectrum_cataloguing(&mut PgConnection)`: get-or-create the vocabularies by key, then get-or-create each field definition by key (using the vocabularies' ids for `Term`-bound fields). Built entirely on the existing `db::vocab`/`db::fields` repositories. No migration, no domain changes. Invoking the seed (CLI / server flag / per-org provisioning) is a deferred follow-on.
|
||||
|
||||
**Tech Stack:** Rust 2024, sqlx 0.8. Tests use `#[sqlx::test]`.
|
||||
|
||||
## Design decisions (approved)
|
||||
- Representative subset (~12 descriptive fields + 3 vocabularies), not all ~90 Spectrum units; the inventory minimum stays in the typed core (Plan 3).
|
||||
- Seed empty vocabularies + the field definitions only — not terms.
|
||||
- Idempotent (get-or-create by unique key); safe to re-run.
|
||||
- Wiring (how/when the seed runs) deferred.
|
||||
|
||||
## Prerequisites
|
||||
- Postgres for tests; pass `DATABASE_URL` inline. Pass transaction connections as `&mut tx` (NOT `&mut *tx`).
|
||||
|
||||
## File Structure
|
||||
```
|
||||
crates/db/
|
||||
src/seed.rs seed_spectrum_cataloguing + helpers
|
||||
src/lib.rs pub mod seed;
|
||||
tests/seed.rs
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Task 1: `db::seed` — Spectrum cataloguing seed
|
||||
|
||||
**Files:** create `crates/db/src/seed.rs`, `crates/db/tests/seed.rs`; modify `crates/db/src/lib.rs`.
|
||||
|
||||
- [ ] **Step 1: Write the failing test** `crates/db/tests/seed.rs`:
|
||||
```rust
|
||||
use db::{Db, fields, seed, vocab};
|
||||
use domain::{AuthorityKind, FieldType};
|
||||
use sqlx::PgPool;
|
||||
|
||||
#[sqlx::test]
|
||||
async fn seed_creates_vocabularies_and_field_definitions(pool: PgPool) {
|
||||
let db = Db::from_pool(pool);
|
||||
|
||||
let mut tx = db.pool().begin().await.unwrap();
|
||||
seed::seed_spectrum_cataloguing(&mut tx).await.unwrap();
|
||||
tx.commit().await.unwrap();
|
||||
|
||||
for key in ["material", "object_name", "technique"] {
|
||||
assert!(
|
||||
vocab::vocabulary_by_key(db.pool(), key).await.unwrap().is_some(),
|
||||
"vocabulary {key} should be seeded"
|
||||
);
|
||||
}
|
||||
|
||||
// a Term field is bound to the right vocabulary
|
||||
let material_vocab = vocab::vocabulary_by_key(db.pool(), "material").await.unwrap().unwrap();
|
||||
let material_field = fields::field_definition_by_key(db.pool(), "material").await.unwrap().unwrap();
|
||||
assert_eq!(material_field.field_type, FieldType::Term { vocabulary_id: material_vocab.id });
|
||||
|
||||
// an Authority field carries its kind
|
||||
let place = fields::field_definition_by_key(db.pool(), "production_place").await.unwrap().unwrap();
|
||||
assert_eq!(place.field_type, FieldType::Authority { kind: Some(AuthorityKind::Place) });
|
||||
|
||||
// a localized-text and a date field exist
|
||||
let title = fields::field_definition_by_key(db.pool(), "title").await.unwrap().unwrap();
|
||||
assert_eq!(title.field_type, FieldType::LocalizedText);
|
||||
let date = fields::field_definition_by_key(db.pool(), "production_date").await.unwrap().unwrap();
|
||||
assert_eq!(date.field_type, FieldType::Date);
|
||||
|
||||
assert_eq!(fields::list_field_definitions(db.pool()).await.unwrap().len(), 12);
|
||||
}
|
||||
|
||||
#[sqlx::test]
|
||||
async fn seed_is_idempotent(pool: PgPool) {
|
||||
let db = Db::from_pool(pool);
|
||||
|
||||
for _ in 0..2 {
|
||||
let mut tx = db.pool().begin().await.unwrap();
|
||||
seed::seed_spectrum_cataloguing(&mut tx).await.unwrap();
|
||||
tx.commit().await.unwrap();
|
||||
}
|
||||
|
||||
// re-running did not duplicate (would have hit the UNIQUE key constraints otherwise)
|
||||
assert_eq!(fields::list_field_definitions(db.pool()).await.unwrap().len(), 12);
|
||||
let materials = vocab::vocabulary_by_key(db.pool(), "material").await.unwrap();
|
||||
assert!(materials.is_some());
|
||||
}
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Run to verify it fails.** `DATABASE_URL=<url> cargo test -p db --test seed` → FAIL (`db::seed` missing).
|
||||
|
||||
- [ ] **Step 3: Implement** `crates/db/src/seed.rs`:
|
||||
```rust
|
||||
//! Seed data: a representative subset of the Spectrum Cataloguing field set.
|
||||
//!
|
||||
//! Idempotent — each vocabulary and field definition is created only if a row with
|
||||
//! that key does not already exist. Vocabularies are seeded empty; their terms are
|
||||
//! populated by the organization or a later import. The inventory-minimum fields
|
||||
//! (object number, name, location, …) live in the typed object core, not here.
|
||||
|
||||
use domain::{AuthorityKind, FieldType, LocalizedLabel, NewFieldDefinition, VocabularyId};
|
||||
|
||||
use crate::{fields, vocab};
|
||||
|
||||
/// Seed the Spectrum cataloguing vocabularies and field definitions on `conn`.
|
||||
/// Pass a transaction connection (`&mut *tx`) so the whole seed is atomic.
|
||||
pub async fn seed_spectrum_cataloguing(conn: &mut sqlx::PgConnection) -> Result<(), sqlx::Error> {
|
||||
let material = ensure_vocabulary(conn, "material").await?;
|
||||
let object_name = ensure_vocabulary(conn, "object_name").await?;
|
||||
let technique = ensure_vocabulary(conn, "technique").await?;
|
||||
|
||||
let definitions = [
|
||||
def("object_type", FieldType::Term { vocabulary_id: object_name }, "identification",
|
||||
&[("sv", "Sakord"), ("en", "Object type")]),
|
||||
def("title", FieldType::LocalizedText, "identification",
|
||||
&[("sv", "Titel"), ("en", "Title")]),
|
||||
def("comments", FieldType::Text, "identification",
|
||||
&[("sv", "Kommentarer"), ("en", "Comments")]),
|
||||
def("material", FieldType::Term { vocabulary_id: material }, "description",
|
||||
&[("sv", "Material"), ("en", "Material")]),
|
||||
def("technique", FieldType::Term { vocabulary_id: technique }, "description",
|
||||
&[("sv", "Teknik"), ("en", "Technique")]),
|
||||
def("physical_description", FieldType::Text, "description",
|
||||
&[("sv", "Fysisk beskrivning"), ("en", "Physical description")]),
|
||||
def("dimensions", FieldType::Text, "description",
|
||||
&[("sv", "Mått"), ("en", "Dimensions")]),
|
||||
def("inscription", FieldType::Text, "description",
|
||||
&[("sv", "Inskription"), ("en", "Inscription")]),
|
||||
def("content_description", FieldType::Text, "content",
|
||||
&[("sv", "Innehållsbeskrivning"), ("en", "Content description")]),
|
||||
def("production_date", FieldType::Date, "production",
|
||||
&[("sv", "Tillverkningsdatum"), ("en", "Production date")]),
|
||||
def("production_place", FieldType::Authority { kind: Some(AuthorityKind::Place) }, "production",
|
||||
&[("sv", "Tillverkningsplats"), ("en", "Production place")]),
|
||||
def("production_person", FieldType::Authority { kind: Some(AuthorityKind::Person) }, "production",
|
||||
&[("sv", "Tillverkare"), ("en", "Producer")]),
|
||||
];
|
||||
|
||||
for definition in &definitions {
|
||||
ensure_field_definition(conn, definition).await?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Get-or-create a vocabulary by key, returning its id.
|
||||
async fn ensure_vocabulary(
|
||||
conn: &mut sqlx::PgConnection,
|
||||
key: &str,
|
||||
) -> Result<VocabularyId, sqlx::Error> {
|
||||
if let Some(existing) = vocab::vocabulary_by_key(&mut *conn, key).await? {
|
||||
Ok(existing.id)
|
||||
} else {
|
||||
Ok(vocab::create_vocabulary(&mut *conn, key).await?.id)
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a field definition only if its key is not already present.
|
||||
async fn ensure_field_definition(
|
||||
conn: &mut sqlx::PgConnection,
|
||||
definition: &NewFieldDefinition,
|
||||
) -> Result<(), sqlx::Error> {
|
||||
if fields::field_definition_by_key(&mut *conn, &definition.key).await?.is_none() {
|
||||
fields::create_field_definition(&mut *conn, definition).await?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn def(
|
||||
key: &str,
|
||||
field_type: FieldType,
|
||||
group: &str,
|
||||
label_pairs: &[(&str, &str)],
|
||||
) -> NewFieldDefinition {
|
||||
NewFieldDefinition {
|
||||
key: key.to_owned(),
|
||||
field_type,
|
||||
required: false,
|
||||
group_key: Some(group.to_owned()),
|
||||
labels: label_pairs
|
||||
.iter()
|
||||
.map(|(lang, label)| LocalizedLabel { lang: (*lang).to_owned(), label: (*label).to_owned() })
|
||||
.collect(),
|
||||
}
|
||||
}
|
||||
```
|
||||
Add to `crates/db/src/lib.rs` (top-level): `pub mod seed;`
|
||||
|
||||
- [ ] **Step 4: Run to verify it passes.** `DATABASE_URL=<url> cargo test -p db --test seed` → PASS (2 tests).
|
||||
|
||||
- [ ] **Step 5: Full workspace check.**
|
||||
```bash
|
||||
cargo +nightly fmt --check
|
||||
DATABASE_URL=<url> cargo clippy --workspace --all-targets -- -D warnings
|
||||
DATABASE_URL=<url> cargo test --workspace
|
||||
```
|
||||
Expected: all green.
|
||||
|
||||
- [ ] **Step 6: Commit.**
|
||||
```bash
|
||||
git add crates/db
|
||||
git commit -m "feat(db): seed a representative Spectrum cataloguing field set (idempotent)"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Self-Review (completed)
|
||||
|
||||
**Spec coverage:**
|
||||
- Representative Spectrum descriptive field set as vocabularies + field definitions → the `definitions` array + `ensure_*`. ✓
|
||||
- Empty vocabularies, no terms; inventory minimum stays in the core. ✓
|
||||
- Idempotent (get-or-create by key) → `ensure_vocabulary`/`ensure_field_definition`; tested by `seed_is_idempotent`. ✓
|
||||
- Built on existing repos; no migration/domain change; SQL stays in `db`. ✓
|
||||
- Wiring deferred. ✓ (intentional)
|
||||
|
||||
**Placeholder scan:** none. `<url>` is the documented `DATABASE_URL`.
|
||||
|
||||
**Type consistency:** `seed_spectrum_cataloguing(&mut PgConnection) -> Result<(), sqlx::Error>`; uses `vocab::vocabulary_by_key`/`create_vocabulary`, `fields::field_definition_by_key`/`create_field_definition`, and `domain::{FieldType, NewFieldDefinition, LocalizedLabel, AuthorityKind, VocabularyId}` exactly as defined. The test's expected count (12) matches the `definitions` array length.
|
||||
|
||||
## Notes for follow-on plans
|
||||
- **Wiring the seed:** options are a server `--seed`/config flag at startup, a small CLI subcommand, or running it as part of per-org provisioning (the control plane). Decide alongside the provisioning work.
|
||||
- **Populating vocabulary terms:** Getty AAT / KulturNav / Wikidata import (VISION post-MVP) fills the empty `material`/`object_name`/`technique` vocabularies.
|
||||
- The seeded set is a starting point — extend toward the full Spectrum unit list (`reference/spectrum-5.0-cataloguing-units-of-information.md`) as needed.
|
||||
Reference in New Issue
Block a user