From 31922bbc3f7c943a74db7d4e643634b5dee32d49 Mon Sep 17 00:00:00 2001 From: Anders Olsson Date: Thu, 17 Jan 2019 15:02:50 +0100 Subject: [PATCH] Collect a lot more data. --- src/probe/eniro.rs | 16 +++++++-- src/probe/hitta.rs | 21 ++++++++--- src/probe/konsument_info.rs | 13 +++++-- src/probe/telefonforsaljare.rs | 53 +++++++++++++++++++++++----- src/probe/vem_ringde.rs | 64 +++++++++++++++++++++++++++++++++- 5 files changed, 150 insertions(+), 17 deletions(-) diff --git a/src/probe/eniro.rs b/src/probe/eniro.rs index 06ea38f..a332317 100644 --- a/src/probe/eniro.rs +++ b/src/probe/eniro.rs @@ -4,6 +4,13 @@ use unhtml_derive::FromHtml; use crate::context::Context; use crate::probe::Probe; +#[derive(Debug, FromHtml)] +#[html(selector = ".CompanyResultListItem")] +struct Company { + #[html(selector = "h3.name > a", attr = "inner")] + name: String, +} + #[derive(Debug, FromHtml)] #[html(selector = ".PhoneNoHit")] struct Error { @@ -30,9 +37,14 @@ impl Probe for Eniro { body }; - if let Ok(error) = Error::from_html(&body) { + if let Ok(company) = Company::from_html(&body) { println!("eniro.se:"); - println!(" Antal sökningar på det här numret: {}", error.message); + println!(" {}", company.name); + + Ok(()) + } else if let Ok(error) = Error::from_html(&body) { + println!("eniro.se:"); + println!(" Antal sökningar på det här numret: {}", error.message); Ok(()) } else { diff --git a/src/probe/hitta.rs b/src/probe/hitta.rs index bcc993a..51c3f48 100644 --- a/src/probe/hitta.rs +++ b/src/probe/hitta.rs @@ -1,3 +1,4 @@ +use log::debug; use regex::Regex; use serde::Deserialize; @@ -19,7 +20,8 @@ struct Props { #[derive(Debug, Deserialize)] #[serde(rename_all = "camelCase")] struct PageProps { - phone_data: PhoneData, + status_code: Option, + phone_data: Option, } #[derive(Debug, Deserialize)] @@ -65,17 +67,28 @@ impl Probe for Hitta { if let Ok(data) = serde_json::from_str::(&json) { println!("hitta.se:"); - println!(" {}", data.props.page_props.phone_data.statistics_text); - for comment in &data.props.page_props.phone_data.comments { - println!(" * {}", comment.comment); + if let Some(phone_data) = data.props.page_props.phone_data { + println!(" {}", phone_data.statistics_text); + + for comment in &phone_data.comments { + println!(" * {}", comment.comment); + } + } else { + println!(" Vi hittar det mesta, men inte just den här sidan."); } Ok(()) } else { + if let Err(error) = serde_json::from_str::(&json) { + debug!("Hitta: failed to deserialize data: {:#?}", error); + } + Err(()) } } else { + debug!("Hitta: failed to find __NEXT_DATA__"); + Err(()) } } diff --git a/src/probe/konsument_info.rs b/src/probe/konsument_info.rs index 885afc7..7151c5a 100644 --- a/src/probe/konsument_info.rs +++ b/src/probe/konsument_info.rs @@ -11,6 +11,13 @@ struct Info { message: String, } +#[derive(Debug, FromHtml)] +#[html(selector = ".body-content > .row")] +struct Error { + #[html(selector = ".col-md-12", attr = "inner")] + message: String, +} + pub struct KonsumentInfo; impl Probe for KonsumentInfo { @@ -33,9 +40,11 @@ impl Probe for KonsumentInfo { println!("konsumentinfo.se:"); if let Ok(info) = Info::from_html(&body) { - println!(" {}", info.message); + println!(" {}", info.message); + } else if let Ok(error) = Error::from_html(&body) { + println!(" {}", error.message); } else { - println!(" Failed to find any data"); + println!(" Failed to find any data"); } Ok(()) diff --git a/src/probe/telefonforsaljare.rs b/src/probe/telefonforsaljare.rs index 694c7c6..a3209e3 100644 --- a/src/probe/telefonforsaljare.rs +++ b/src/probe/telefonforsaljare.rs @@ -1,14 +1,39 @@ -use unhtml::FromHtml; +use log::debug; +use unhtml::{self, FromHtml, VecFromHtml}; use unhtml_derive::FromHtml; use crate::context::Context; use crate::probe::Probe; #[derive(Debug, FromHtml)] -#[html(selector = "[itemtype='//data-vocabulary.org/Review-aggregate']")] -struct Info { - #[html(selector = "p", attr = "inner")] - message: String, +#[html(selector = "article")] +struct Page { + content: Content, + + #[html(selector = "#kommentarer > [itemtype='http://data-vocabulary.org/Review']")] + comments: Vec, +} + +#[derive(Debug, FromHtml)] +#[html(selector = "#content")] +struct Content { + #[html(selector = "p:nth-child(2)", attr = "inner")] + title: String, + + #[html(selector = "p:nth-child(5)", attr = "inner")] + history: String, +} + +#[derive(Debug, FromHtml)] +struct Comment { + #[html(selector = "small", attr = "datetime")] + datetime: String, + + #[html(selector = "h3", attr = "inner")] + title: String, + + #[html(selector = "[itemprop='description']", attr = "inner")] + comment: String, } pub struct Telefonforsaljare; @@ -32,10 +57,22 @@ impl Probe for Telefonforsaljare { println!("telefonforsaljare.nu:"); - if let Ok(info) = Info::from_html(&body) { - println!(" {}", info.message); + if let Ok(page) = Page::from_html(&body) { + println!(" {}", page.content.title); + println!(" {}", page.content.history); + + for comment in &page.comments { + println!( + " * {}: {} - {}", + comment.datetime, comment.title, comment.comment + ); + } } else { - println!(" Failed to find any data"); + if let Err(error) = Page::from_html(&body) { + debug!("telefonforsaljare: failed to parse page: {:#?}", error); + } + + println!(" Failed to find any data"); } Ok(()) diff --git a/src/probe/vem_ringde.rs b/src/probe/vem_ringde.rs index 4671f2a..6ce2328 100644 --- a/src/probe/vem_ringde.rs +++ b/src/probe/vem_ringde.rs @@ -1,6 +1,38 @@ +use log::debug; +use unhtml::{self, FromHtml, VecFromHtml}; +use unhtml_derive::FromHtml; + use crate::context::Context; use crate::probe::Probe; +#[derive(Debug, FromHtml)] +#[html(selector = "#content")] +struct Page { + #[html(selector = "#toporganisations > li")] + owners: Vec, + + #[html(selector = "#calls > ol.table > li")] + calls: Vec, +} + +#[derive(Debug, FromHtml)] +struct Owner { + #[html(selector = "a", attr = "inner")] + title: String, + + #[html(selector = "span", attr = "inner")] + calls: String, +} + +#[derive(Debug, FromHtml)] +struct Call { + #[html(selector = ".w40", attr = "inner")] + who: String, + + #[html(selector = ".w15", attr = "inner")] + date: String, +} + pub struct VemRingde; impl Probe for VemRingde { @@ -9,7 +41,7 @@ impl Probe for VemRingde { } fn search(&mut self, ctx: &mut Context, number: &str) -> Result<(), ()> { - let _body = if let Some(cache) = ctx.cache_get("vem_ringde", &number) { + let body = if let Some(cache) = ctx.cache_get("vem_ringde", &number) { String::from_utf8(cache.data).unwrap() } else { let body = reqwest::get(&self.uri(number)).unwrap().text().unwrap(); @@ -20,6 +52,36 @@ impl Probe for VemRingde { body }; + println!("vemringde.se:"); + + if let Ok(page) = Page::from_html(&body) { + if !page.owners.is_empty() { + println!(" ägare:"); + + for owner in &page.owners { + println!(" * {} - {}", owner.title, owner.calls); + } + } + + if !page.calls.is_empty() { + if !page.owners.is_empty() { + println!(); + } + + println!(" samtal:"); + + for call in &page.calls { + println!(" * {}: {}", call.date, call.who); + } + } + } else { + if let Err(error) = Page::from_html(&body) { + debug!("vemringde: failed to parse page: {:#?}", error); + } + + println!(" Failed to find any data"); + } + Err(()) } }