From f8555722af9d29ee140ac9fba5457ed0dc46a161 Mon Sep 17 00:00:00 2001 From: Anders Olsson Date: Fri, 5 Jun 2026 14:34:29 +0200 Subject: [PATCH] docs: add implementation plan for WASM provider service Co-Authored-By: Claude Opus 4.8 (1M context) --- .../plans/2026-06-05-wasm-provider-service.md | 2224 +++++++++++++++++ 1 file changed, 2224 insertions(+) create mode 100644 docs/superpowers/plans/2026-06-05-wasm-provider-service.md diff --git a/docs/superpowers/plans/2026-06-05-wasm-provider-service.md b/docs/superpowers/plans/2026-06-05-wasm-provider-service.md new file mode 100644 index 0000000..178abac --- /dev/null +++ b/docs/superpowers/plans/2026-06-05-wasm-provider-service.md @@ -0,0 +1,2224 @@ +# WASM Provider Service Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Rebuild whoareyou as an async HTTP service that looks up Swedish phone numbers via WASM-component providers (hitta.se in v1), retiring the CLI. + +**Architecture:** Cargo workspace with an axum server hosting wasmtime; providers are pure WASM components (WIT contract: `metadata`/`requests`/`parse`) — the host fetches all URLs and caches parsed results in moka. Provider parse logic is plain Rust, unit-tested natively against HTML fixtures; WIT glue is a thin `cfg(wasm32)` layer. + +**Tech Stack:** Rust edition 2024 · tokio · axum 0.8 · reqwest 0.13 · moka 0.12 · wasmtime + wasmtime-wasi 45 · wit-bindgen 0.57 · thiserror 2 · tracing · insta 1.47 + +**Spec:** `docs/superpowers/specs/2026-06-05-wasm-provider-service-design.md` + +--- + +## File structure + +``` +whoareyou/ +├── Cargo.toml # workspace (NEW) +├── justfile # build orchestration (NEW) +├── wit/provider.wit # provider contract (NEW) +├── crates/ +│ ├── server/ # package whoareyou-server (lib + bin) +│ │ ├── Cargo.toml +│ │ ├── src/lib.rs # module exports +│ │ ├── src/main.rs # wiring only +│ │ ├── src/config.rs # env config +│ │ ├── src/error.rs # HostError, FetchError, ConfigError +│ │ ├── src/model.rs # Entry, Comment, ProviderResult, API types +│ │ ├── src/service.rs # ProviderHandle + Fetch traits, LookupService +│ │ ├── src/fetch.rs # ReqwestFetcher +│ │ ├── src/http.rs # axum router, normalize() +│ │ ├── src/wasm.rs # wasmtime host, WasmProvider +│ │ └── tests/component.rs # loads the real .wasm +│ └── providers/hitta/ # package whoareyou-provider-hitta (cdylib+rlib) +│ ├── Cargo.toml +│ ├── src/lib.rs +│ ├── src/parser.rs # pure parse logic + native tests +│ └── src/component.rs # wit-bindgen glue (wasm32 only) +├── fixtures/hitta/*.html # KEPT (+ one fresh fixture) +├── fetch-fixture # KEPT, trimmed to hitta +└── DELETED: src/, definitions/, _build.rs, NOTEPAD.md, old Cargo.toml contents +``` + +`whoareyou-server` is a lib + thin bin so `tests/component.rs` can use its modules. + +--- + +### Task 1: Workspace scaffold & demolition + +**Files:** +- Delete: `src/`, `definitions/`, `_build.rs`, `NOTEPAD.md` +- Create: `Cargo.toml` (workspace), `wit/provider.wit`, `crates/server/{Cargo.toml,src/lib.rs,src/main.rs}`, `crates/providers/hitta/{Cargo.toml,src/lib.rs}` +- Modify: `.gitignore` + +- [ ] **Step 1: Install the wasm target** + +Run: `rustup target add wasm32-wasip2` +Expected: installs or "is up to date". + +- [ ] **Step 2: Delete the old code** + +```bash +git rm -r src definitions _build.rs NOTEPAD.md +``` + +(The old hitta parser is reproduced in Task 3 — nothing needed from the deleted tree.) + +- [ ] **Step 3: Write the workspace `Cargo.toml`** (replaces the old package manifest) + +```toml +[workspace] +resolver = "3" +members = ["crates/server", "crates/providers/hitta"] + +[workspace.package] +version = "0.1.0" +edition = "2024" +authors = ["Anders Olsson "] +``` + +- [ ] **Step 4: Write `wit/provider.wit`** + +```wit +package whoareyou:provider@0.1.0; + +interface lookup { + record provider-info { + name: string, + version: string, + } + + record request { + url: string, + } + + record response { + status: u16, + body: string, + } + + record comment { + timestamp: option, + title: option, + message: string, + } + + record entry { + messages: list, + history: list, + comments: list, + } + + variant lookup-error { + no-data, + parse-failed(string), + } + + metadata: func() -> provider-info; + requests: func(number: string) -> list; + parse: func(number: string, responses: list) -> result; +} + +world provider { + export lookup; +} +``` + +- [ ] **Step 5: Create the server crate stub** + +`crates/server/Cargo.toml`: + +```toml +[package] +name = "whoareyou-server" +version.workspace = true +edition.workspace = true +authors.workspace = true + +[dependencies] + +[dev-dependencies] +``` + +`crates/server/src/lib.rs`: + +```rust +// modules added as they are implemented +``` + +`crates/server/src/main.rs`: + +```rust +fn main() {} +``` + +- [ ] **Step 6: Create the hitta provider crate stub** + +`crates/providers/hitta/Cargo.toml`: + +```toml +[package] +name = "whoareyou-provider-hitta" +version.workspace = true +edition.workspace = true +authors.workspace = true + +[lib] +crate-type = ["cdylib", "rlib"] + +[dependencies] + +[dev-dependencies] +``` + +`crates/providers/hitta/src/lib.rs`: + +```rust +// modules added as they are implemented +``` + +- [ ] **Step 7: Ignore the components dir** + +Append to `.gitignore` (create if missing): + +``` +components/ +``` + +- [ ] **Step 8: Verify the workspace builds** + +Run: `cargo check --workspace` +Expected: success (two empty crates). `Cargo.lock` regenerates — that's fine. + +- [ ] **Step 9: Commit** + +```bash +git add -A +git commit -m "refactor!: replace CLI with workspace scaffold for WASM provider service" +``` + +--- + +### Task 2: Refresh hitta fixture & audit page structure + +The 2019 fixtures predate any hitta.se redesign. Before porting the parser, capture what the site serves **today** so Task 3 is written against reality. + +**Files:** +- Create: `fixtures/hitta/fresh-0104754350.html` + +- [ ] **Step 1: Fetch a fresh copy of a known number's page** + +Run: + +```bash +curl -sL -A "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)" \ + "https://www.hitta.se/vem-ringde/0104754350" \ + -o fixtures/hitta/fresh-0104754350.html +wc -c fixtures/hitta/fresh-0104754350.html +``` + +Expected: a non-trivial file (> 10 KB). If the response is a bot-block page (check with `head -c 2000`), retry with the `http --follow` (httpie) variant from `fetch-fixture`, or fetch the page in a real browser (View Source → save). The fixture MUST contain real page markup before continuing. + +- [ ] **Step 2: Audit the page structure** + +Run: + +```bash +grep -c "__NEXT_DATA__" fixtures/hitta/fresh-0104754350.html +grep -o '__NEXT_DATA__[^>]\{0,80\}' fixtures/hitta/fresh-0104754350.html | head -3 +``` + +Two outcomes — record which one applies, it determines Step 3 of Task 3: + +- **(a) `__NEXT_DATA__` still present.** Check whether it's still `` form. Note which. +- **(b) Gone entirely.** Inspect the page (`python3 -m json.tool` on any embedded JSON, or read the HTML) and locate where phone data + comments live now. Write down the JSON path to: comments list, comment text, comment timestamp, and the statistics/"X others searched" text — Task 3's serde structs must be adapted to those paths (the *shape* of the parser — regex/JSON extraction → typed structs → `ParsedEntry` — stays identical). + +- [ ] **Step 3: Commit the fixture** + +```bash +git add fixtures/hitta/fresh-0104754350.html +git commit -m "test: add fresh hitta.se fixture for parser port" +``` + +--- + +### Task 3: hitta parser (pure logic, native TDD) + +Port the old `src/probe/hitta.rs` parse logic (reproduced below) into the provider crate as plain functions. All tests run natively — no WASM involved. + +**Files:** +- Create: `crates/providers/hitta/src/parser.rs` +- Modify: `crates/providers/hitta/src/lib.rs`, `crates/providers/hitta/Cargo.toml` + +- [ ] **Step 1: Add dependencies** + +In `crates/providers/hitta/Cargo.toml` set: + +```toml +[dependencies] +regex = "1" +serde = { version = "1", features = ["derive"] } +serde_json = "1" + +[dev-dependencies] +insta = { version = "1.47", features = ["yaml"] } +``` + +- [ ] **Step 2: Declare the module** + +`crates/providers/hitta/src/lib.rs`: + +```rust +pub mod parser; +``` + +- [ ] **Step 3: Write the failing tests** + +Append to `crates/providers/hitta/src/parser.rs` (create the file with ONLY this test module first; the types/functions it references don't exist yet, that's the point): + +```rust +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn requests_single_hitta_url() { + assert_eq!( + request_urls("0700000000"), + vec!["https://www.hitta.se/vem-ringde/0700000000".to_string()] + ); + } + + #[test] + fn parses_number_with_comments() { + let body = include_str!("../../../../fixtures/hitta/0104754350.html"); + let entry = parse(body).unwrap(); + + assert_eq!(entry.messages, Vec::::new()); + assert_eq!(entry.history, vec!["42 andra har rapporterat detta nummer"]); + assert_eq!(entry.comments.len(), 29); + + // newest first + let first = &entry.comments[0]; + assert_eq!(first.timestamp, Some(1547746162)); // 2019-01-17T17:29:22Z + assert_eq!(first.title, None); + assert_eq!(first.message, "Varmsälj från Folksam"); + } + + #[test] + fn parses_number_with_history_only() { + let body = include_str!("../../../../fixtures/hitta/0702269893.html"); + let entry = parse(body).unwrap(); + + assert_eq!(entry.history, vec!["Tre andra har också sökt på detta nummer"]); + assert!(entry.comments.is_empty()); + } + + #[test] + fn no_phone_data_is_no_data() { + let body = include_str!("../../../../fixtures/hitta/0313908905.html"); + assert_eq!(parse(body), Err(ParseError::NoData)); + } + + #[test] + fn unparseable_page_is_failed() { + let body = include_str!("../../../../fixtures/hitta/0701807618.html"); + assert!(matches!(parse(body), Err(ParseError::Failed(_)))); + } + + #[test] + fn garbage_is_failed() { + assert!(matches!(parse(""), Err(ParseError::Failed(_)))); + } + + #[test] + fn parses_fresh_fixture() { + let body = include_str!("../../../../fixtures/hitta/fresh-0104754350.html"); + insta::assert_yaml_snapshot!(parse(body)); + } +} +``` + +Semantics note (differs from the old CLI): the old code returned `Ok` with an +all-empty entry when JSON parsed but `phoneData` was absent. That is now +`Err(ParseError::NoData)`. Old fixtures `0313908905`, `0751793426/83/99` fall +in that bucket; `0701807618`, `0546780862` fail the regex → `Failed`. + +- [ ] **Step 4: Run tests to verify they fail** + +Run: `cargo test -p whoareyou-provider-hitta` +Expected: COMPILE ERROR — `request_urls`, `parse`, `ParseError` not found. + +- [ ] **Step 5: Implement the parser** + +Prepend to `crates/providers/hitta/src/parser.rs` (above the test module). This is the 2019 logic ported; **if Task 2 found outcome (b) or the modern `