From 7f12e84accc93216a05a5a9fcb00d824e2ff0665 Mon Sep 17 00:00:00 2001 From: Anders Olsson Date: Thu, 7 Feb 2019 11:42:19 +0100 Subject: [PATCH] More tests, collect more data, and refactor some code. --- fixtures/eniro/0546780862.html | 141 ++++++++ fixtures/hitta/0546780862.html | 102 ++++++ fixtures/konsumentinfo/0546780862.html | 78 +++++ fixtures/telefonforsaljare/0546780862.html | 172 ++++++++++ fixtures/vemringde/0546780862.html | 370 +++++++++++++++++++++ src/entry.rs | 131 ++++++++ src/lib.rs | 1 + src/probe.rs | 66 +--- src/probe/eniro.rs | 14 +- src/probe/hitta.rs | 111 ++++--- src/probe/konsument_info.rs | 7 + src/probe/telefonforsaljare.rs | 53 +-- src/probe/vem_ringde.rs | 154 +++++++-- 13 files changed, 1240 insertions(+), 160 deletions(-) create mode 100644 fixtures/eniro/0546780862.html create mode 100644 fixtures/hitta/0546780862.html create mode 100644 fixtures/konsumentinfo/0546780862.html create mode 100644 fixtures/telefonforsaljare/0546780862.html create mode 100644 fixtures/vemringde/0546780862.html create mode 100644 src/entry.rs diff --git a/fixtures/eniro/0546780862.html b/fixtures/eniro/0546780862.html new file mode 100644 index 0000000..f502892 --- /dev/null +++ b/fixtures/eniro/0546780862.html @@ -0,0 +1,141 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0546780862 | Företag | eniro.se + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
Vägbeskrivning

0546780862

gav 1 företag
Karta
Din sökning på 0546780862 gav 1 företag och du har nått slutet av listan.
+
+ + + + + + + + + + + + diff --git a/fixtures/hitta/0546780862.html b/fixtures/hitta/0546780862.html new file mode 100644 index 0000000..8aa24d9 --- /dev/null +++ b/fixtures/hitta/0546780862.html @@ -0,0 +1,102 @@ +Vem ringde? | 054-678 08 62

054-678 08 62

Numret tillhör Dalslänningen

ett företag från Karlstad.

Mer om Dalslänningen

Andra format av 054-678 08 62

+46546780862   0046546780862   0546780862   +46 546780862   tlf 546780862   +4654 678 08 62   +4654-678 08 62   tel:+4654-678 08 62   054-678 08 62   054-6780862  
\ No newline at end of file diff --git a/fixtures/konsumentinfo/0546780862.html b/fixtures/konsumentinfo/0546780862.html new file mode 100644 index 0000000..6ff1e2a --- /dev/null +++ b/fixtures/konsumentinfo/0546780862.html @@ -0,0 +1,78 @@ + + + + + + +Konsument info + + + + + + + + + + + + + + + +
+
+ +Hjälp oss. Ge ett bidrag på valfritt belopp. Vi behöver din hjälp. Läs mer här. + +
+
+
+
+

Felaktigt nummer angett

+
+Finns inget nummer I vårat system som matchar det som du har angivet. +
+
+
+ +
+ + + + diff --git a/fixtures/telefonforsaljare/0546780862.html b/fixtures/telefonforsaljare/0546780862.html new file mode 100644 index 0000000..c0c8498 --- /dev/null +++ b/fixtures/telefonforsaljare/0546780862.html @@ -0,0 +1,172 @@ + + + + +0546780862 - Har 054-678 08 62 ringt? | Nummerupplysning + +
+
+
+
+ + + + + +
+
+
+ + +

Har 054-678 08 62 ringt dig?

+ + + + +
+ +

Numret leder till Karlstad och använder operatören Weblink IP Phone AB.

+
+ + +

De senaste 24 timmarna har 1 personer sökt efter numret 0546780862. Det kan tyda på att numret används av telefonförsäljare. Totalt har minst 12 personer sökt efter numret.

Sökningar efter 0546780862 de senaste två veckorna:

Kommentera ditt samtal från 054-678 08 62 nedan:

+ + +

Har telefonförsäljare från 0546780862 ringt dig?

+ +
+
+ + +
+ + + + + +
+
+ +
+ + + + + + + + + + + + +
+ + +
+ + +
+ +
+ + +
+ +
+ +
+
+ + + +
+ + + + +
+ + + +
+ + +

Tips

+

Om du vill slippa telefonförsäljare hem skaffa ett gratis kontantkort. Det eftersom de ofta kollar ditt nummer på nummerupplysningen och då ringer till kontantkortet i stället. Vi har testat detta erbjudande själva och det är helt gratis utan förpliktelser. +Ett annat tips är att ringa via mobilt bredband & Skype. +
+Telemarketing är ett vanligt jobb bland ungdomar. Eftersom de jobbar provisionsbaserat kan de då och då säga till sin arbetsgivare att de sålt en produkt utan att de gjort det och kunden får då en faktura trots att så inte var överenskommet. Du ska då alltid meddela företaget att du bestrider fakturan. Få det kostnadsfria kontantkortet hemskickat till dörren +.

+ + + + +

+Numret 0546780862 kan även skrivas som 054-6780862 eller 0546-780862.

Internationell formatering av numret:
+46546780862
0046546780862

+ +

Telefonnummer som liknar 0546780862

+
+
+
+ + + + + + +
+
+ + + + diff --git a/fixtures/vemringde/0546780862.html b/fixtures/vemringde/0546780862.html new file mode 100644 index 0000000..bc286a3 --- /dev/null +++ b/fixtures/vemringde/0546780862.html @@ -0,0 +1,370 @@ + + + + Har 0546780862 ringt dig? | Vem ringde – missat samtal + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+
+
+ + + +
+ +
+
+ + + + +
+ + + + +

Har 0546780862 ringt dig?

+
+
  1. + + + + + +
    +
    +
    +
+
+
+ + +
+

Gilla och dela vår info om 0546780862

+ + + + +
+ + + + + + + +

Inga samtal från 0546780862, har du provat Eniro?

+ +

Varför?

+

Det kan finns flera anledningar:

+
    +
  • Ägaren är en privatperson, har du provat söka på Eniro?
  • +
  • Ägaren av numret använder inte telemarketing i sin verksamhet
  • +
  • Ingen har rapporterat in det + , om du anser att det behöver göras;
    + bli den första, fyll i formuläret under rubriken "Har 0546780862 ringt dig?"
  • +
+ +

Du kan också gå till:

+ + +
    + +
    + +
    + + + + +
    + +
    + + + +
    +
    + +
    +
    +

    Vem ringde

    +
      +
    • Riktnummer 054 = Karlstad
    • Fyll i vem som ringde till vänster, och spara!
    • +
    +
    +
    +
    +
    +

    Dela med dig av Vemringde.se

    + +

    +
    +
    +

    Annons

    +
    + + + + + +
    +
    +
    + +
    +
    + +
    +
    + +
    + + +
    + + + + + + + diff --git a/src/entry.rs b/src/entry.rs new file mode 100644 index 0000000..5b32a39 --- /dev/null +++ b/src/entry.rs @@ -0,0 +1,131 @@ +use std::fmt; + +use chrono::offset::LocalResult; +use chrono::{Local, NaiveDate, NaiveDateTime, TimeZone, Utc}; + +use serde::{de, Deserialize, Deserializer, Serialize, Serializer}; + +#[derive(Debug, PartialEq, Serialize)] +pub struct Entry { + pub messages: Vec, + pub history: Vec, + pub comments: Vec, +} + +impl fmt::Display for Entry { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + if !self.messages.is_empty() { + for message in &self.messages { + writeln!(f, " {}", message)?; + } + } + + if !self.history.is_empty() { + for history in &self.history { + writeln!(f, " {}", history)?; + } + } + + if !self.comments.is_empty() { + for comment in &self.comments { + writeln!(f, " * {}", comment)?; + } + } + + Ok(()) + } +} + +#[derive(Debug, PartialEq, Serialize)] +pub struct Comment { + pub datetime: Date, + pub title: Option, + pub message: String, +} + +impl fmt::Display for Comment { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + if let Some(ref title) = self.title { + write!(f, "{}: {} - {}", self.datetime, title, self.message) + } else { + write!(f, "{}: {}", self.datetime, self.message) + } + } +} + +#[derive(Debug, PartialEq, Eq, Serialize, PartialOrd, Ord)] +pub enum Date { + DateTime(chrono::DateTime), + #[serde(serialize_with = "serialize_date")] + Date(chrono::Date), +} + +impl Date { + pub fn datetime_from(tz: T, s: &str, fmt: &str) -> Result + where + T: TimeZone, + { + let datetime = NaiveDateTime::parse_from_str(s, fmt).map_err(|_| ())?; + let datetime = match tz.from_local_datetime(&datetime) { + LocalResult::Single(datetime) => datetime, + _ => return Err(()), + }; + + Ok(Date::DateTime(datetime.with_timezone(&Utc))) + } + + pub fn date_from(tz: T, s: &str, fmt: &str) -> Result + where + T: TimeZone, + { + let date = NaiveDate::parse_from_str(s, fmt).map_err(|_| ())?; + let date = match tz.from_local_date(&date) { + LocalResult::Single(date) => date, + _ => return Err(()), + }; + + Ok(Date::Date(date.with_timezone(&Utc))) + } +} + +impl fmt::Display for Date { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Date::DateTime(datetime) => { + let datetime = datetime.with_timezone(&Local); + + write!(f, "{}", datetime.format("%Y-%m-%d %H:%M:%S")) + } + Date::Date(date) => { + let date = date.with_timezone(&Local); + + write!(f, "{}", date.format("%Y-%m-%d")) + } + } + } +} + +fn serialize_date(date: &chrono::Date, serializer: S) -> Result +where + S: Serializer, +{ + let date = date.with_timezone(&Local); + let s = format!("{}", date.format("%Y-%m-%d")); + + Serialize::serialize(&s, serializer) +} + +#[allow(dead_code)] +fn deserialize_date<'de, D>(deserializer: D) -> Result, D::Error> +where + D: Deserializer<'de>, +{ + let s = String::deserialize(deserializer)?; + let date = NaiveDate::parse_from_str(&s, "%Y-%m-%d").map_err(de::Error::custom)?; + let date = match Utc.from_local_date(&date) { + LocalResult::Single(date) => date, + _ => return Err(de::Error::custom("")), + }; + + Ok(date.with_timezone(&Utc)) +} diff --git a/src/lib.rs b/src/lib.rs index 7bec293..d67aca9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,5 @@ mod context; +pub mod entry; mod probe; pub use crate::context::Context; diff --git a/src/probe.rs b/src/probe.rs index bbe8214..10a58c5 100644 --- a/src/probe.rs +++ b/src/probe.rs @@ -1,7 +1,4 @@ -use std::fmt; - -use chrono::{DateTime, Local, Utc}; -use serde::Serialize; +use crate::entry::Entry; mod eniro; mod hitta; @@ -15,67 +12,6 @@ pub use self::konsument_info::KonsumentInfo; pub use self::telefonforsaljare::Telefonforsaljare; pub use self::vem_ringde::VemRingde; -#[derive(Debug, PartialEq, Serialize)] -pub struct Entry { - pub messages: Vec, - pub history: Vec, - pub comments: Vec, -} - -impl fmt::Display for Entry { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - if !self.messages.is_empty() { - for message in &self.messages { - writeln!(f, " {}", message)?; - } - } - - if !self.history.is_empty() { - for history in &self.history { - writeln!(f, " {}", history)?; - } - } - - if !self.comments.is_empty() { - for comment in &self.comments { - writeln!(f, " * {}", comment)?; - } - } - - Ok(()) - } -} - -#[derive(Debug, PartialEq, Serialize)] -pub struct Comment { - pub datetime: DateTime, - pub title: Option, - pub message: String, -} - -impl fmt::Display for Comment { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let datetime = self.datetime.with_timezone(&Local); - - if let Some(ref title) = self.title { - write!( - f, - "{}: {} - {}", - datetime.format("%Y-%m-%d %H:%M:%S"), - title, - self.message - ) - } else { - write!( - f, - "{}: {}", - datetime.format("%Y-%m-%d %H:%M:%S"), - self.message - ) - } - } -} - pub trait Probe { fn provider(&self) -> &'static str; fn uri(&self, _: &str) -> String; diff --git a/src/probe/eniro.rs b/src/probe/eniro.rs index e46abf3..06f7045 100644 --- a/src/probe/eniro.rs +++ b/src/probe/eniro.rs @@ -1,6 +1,7 @@ use scraper::{Html, Selector}; -use crate::probe::{Entry, Probe}; +use crate::entry::Entry; +use crate::probe::Probe; fn from_html(document: &str) -> Result { let html = Html::parse_document(document); @@ -156,4 +157,15 @@ mod tests { - 0 denna vecka och 1 totalt. comments: []"###); } + + #[test] + fn test_0546780862() { + let document = include_str!("../../fixtures/eniro/0546780862.html"); + + assert_yaml_snapshot_matches!(from_html(&document), @r###"Ok: + messages: + - Nya Wermlands-Tidningens AB + history: [] + comments: []"###); + } } diff --git a/src/probe/hitta.rs b/src/probe/hitta.rs index ab63851..f3547aa 100644 --- a/src/probe/hitta.rs +++ b/src/probe/hitta.rs @@ -1,9 +1,10 @@ use chrono::{TimeZone, Utc}; -use log::debug; +use log::{debug, trace}; use regex::Regex; use serde::Deserialize; -use crate::probe::{self, Entry, Probe}; +use crate::entry::{self, Date, Entry}; +use crate::probe::Probe; #[derive(Debug, Deserialize)] #[serde(rename_all = "camelCase")] @@ -50,12 +51,10 @@ fn from_html(document: &str) -> Result { let json = result.get(1).unwrap().as_str(); - /* - println!( - "json: {:#?}", + trace!( + "Hitta: {:#?}", serde_json::from_str::(&json) ); - */ if let Ok(data) = serde_json::from_str::(&json) { let messages = Vec::new(); @@ -66,11 +65,11 @@ fn from_html(document: &str) -> Result { history.push(phone_data.statistics_text); for comment in phone_data.comments { - comments.push(probe::Comment { - datetime: Utc.timestamp( + comments.push(entry::Comment { + datetime: Date::DateTime(Utc.timestamp( (comment.timestamp / 1000) as i64, (comment.timestamp % 1000) as u32, - ), + )), title: None, message: comment.comment, }); @@ -131,91 +130,120 @@ mod tests { history: - 42 andra har rapporterat detta nummer comments: - - datetime: "2019-01-17T17:29:22Z" + - datetime: + DateTime: "2019-01-17T17:29:22Z" title: ~ message: Varmsälj från Folksam - - datetime: "2018-12-14T13:45:28Z" + - datetime: + DateTime: "2018-12-14T13:45:28Z" title: ~ message: Folksam - - datetime: "2018-11-28T07:30:18Z" + - datetime: + DateTime: "2018-11-28T07:30:18Z" title: ~ message: Höglandschskt - - datetime: "2018-11-20T19:18:09Z" + - datetime: + DateTime: "2018-11-20T19:18:09Z" title: ~ message: "Försäljare " - - datetime: "2018-11-19T17:38:34Z" + - datetime: + DateTime: "2018-11-19T17:38:34Z" title: ~ message: mögg från Folksam - - datetime: "2018-11-12T16:00:41Z" + - datetime: + DateTime: "2018-11-12T16:00:41Z" title: ~ message: Folksam försäkringsförsäljare - - datetime: "2018-10-25T10:28:36Z" + - datetime: + DateTime: "2018-10-25T10:28:36Z" title: ~ message: folksam - - datetime: "2018-10-10T07:30:40Z" + - datetime: + DateTime: "2018-10-10T07:30:40Z" title: ~ message: Telefonförsäljare - - datetime: "2018-10-04T10:04:55Z" + - datetime: + DateTime: "2018-10-04T10:04:55Z" title: ~ message: Folksam säljare - - datetime: "2018-10-03T13:55:19Z" + - datetime: + DateTime: "2018-10-03T13:55:19Z" title: ~ message: Sa inget. - - datetime: "2018-08-24T16:56:46Z" + - datetime: + DateTime: "2018-08-24T16:56:46Z" title: ~ message: Folksam - - datetime: "2018-08-24T09:42:43Z" + - datetime: + DateTime: "2018-08-24T09:42:43Z" title: ~ message: Achmati azmut från folksam - - datetime: "2018-08-21T18:29:29Z" + - datetime: + DateTime: "2018-08-21T18:29:29Z" title: ~ message: Folksam - - datetime: "2018-08-16T18:56:56Z" + - datetime: + DateTime: "2018-08-16T18:56:56Z" title: ~ message: Säljare från Folksam. - - datetime: "2018-08-16T14:48:59Z" + - datetime: + DateTime: "2018-08-16T14:48:59Z" title: ~ message: "Folksam " - - datetime: "2018-08-09T16:30:28Z" + - datetime: + DateTime: "2018-08-09T16:30:28Z" title: ~ message: Folksam - - datetime: "2018-08-02T16:29:32Z" + - datetime: + DateTime: "2018-08-02T16:29:32Z" title: ~ message: "Folksam " - - datetime: "2018-08-02T15:33:38Z" + - datetime: + DateTime: "2018-08-02T15:33:38Z" title: ~ message: "Folksam " - - datetime: "2018-07-25T08:28:27Z" + - datetime: + DateTime: "2018-07-25T08:28:27Z" title: ~ message: Säljare Folksam - - datetime: "2018-07-17T21:20:51Z" + - datetime: + DateTime: "2018-07-17T21:20:51Z" title: ~ message: "Inga Hansson " - - datetime: "2018-07-16T18:11:46Z" + - datetime: + DateTime: "2018-07-16T18:11:46Z" title: ~ message: Folksam - - datetime: "2018-07-06T15:45:46Z" + - datetime: + DateTime: "2018-07-06T15:45:46Z" title: ~ message: "Folksam " - - datetime: "2018-07-05T17:24:07Z" + - datetime: + DateTime: "2018-07-05T17:24:07Z" title: ~ message: folksam - - datetime: "2018-07-05T11:15:02Z" + - datetime: + DateTime: "2018-07-05T11:15:02Z" title: ~ message: Vesran - - datetime: "2018-07-04T13:30:49Z" + - datetime: + DateTime: "2018-07-04T13:30:49Z" title: ~ message: Folksam - - datetime: "2018-06-29T10:52:51Z" + - datetime: + DateTime: "2018-06-29T10:52:51Z" title: ~ message: folksam - - datetime: "2018-06-28T13:33:01Z" + - datetime: + DateTime: "2018-06-28T13:33:01Z" title: ~ message: Säljare folksam - - datetime: "2018-06-28T07:42:42Z" + - datetime: + DateTime: "2018-06-28T07:42:42Z" title: ~ message: Folksam försäkringar - - datetime: "2018-06-26T12:59:33Z" + - datetime: + DateTime: "2018-06-26T12:59:33Z" title: ~ message: Säljare Folksam"###); } @@ -288,4 +316,11 @@ mod tests { assert_yaml_snapshot_matches!(from_html(&document), @"Err: ~"); } + + #[test] + fn test_0546780862() { + let document = include_str!("../../fixtures/hitta/0546780862.html"); + + assert_yaml_snapshot_matches!(from_html(&document), @"Err: ~"); + } } diff --git a/src/probe/konsument_info.rs b/src/probe/konsument_info.rs index 3ca47cf..b138671 100644 --- a/src/probe/konsument_info.rs +++ b/src/probe/konsument_info.rs @@ -117,4 +117,11 @@ mod tests { assert_yaml_snapshot_matches!(from_html(&document), @"Err: ~"); } + + #[test] + fn test_0546780862() { + let document = include_str!("../../fixtures/konsumentinfo/0546780862.html"); + + assert_yaml_snapshot_matches!(from_html(&document), @"Err: ~"); + } } diff --git a/src/probe/telefonforsaljare.rs b/src/probe/telefonforsaljare.rs index 34e1fe7..2fde524 100644 --- a/src/probe/telefonforsaljare.rs +++ b/src/probe/telefonforsaljare.rs @@ -1,19 +1,8 @@ -use chrono::offset::LocalResult; -use chrono::{DateTime, NaiveDateTime, TimeZone, Utc}; use chrono_tz::Europe::Stockholm; use scraper::{Html, Selector}; -use crate::probe::{Comment, Entry, Probe}; - -fn stockholm_to_utc(s: &str, fmt: &str) -> Result, ()> { - let datetime = NaiveDateTime::parse_from_str(s, fmt).map_err(|_| ())?; - let datetime = match Stockholm.from_local_datetime(&datetime) { - LocalResult::Single(datetime) => datetime, - _ => return Err(()), - }; - - Ok(datetime.with_timezone(&Utc)) -} +use crate::entry::{Comment, Date, Entry}; +use crate::probe::Probe; fn from_html(document: &str) -> Result { let html = Html::parse_document(document); @@ -76,7 +65,8 @@ fn from_html(document: &str) -> Result { let message = htmlescape::decode_html(&message).unwrap(); comments.push(Comment { - datetime: stockholm_to_utc(&datetime, "%Y-%m-%d %H:%M:%S").unwrap(), + datetime: Date::datetime_from(Stockholm, &datetime, "%Y-%m-%d %H:%M:%S") + .expect("failed to parse datetime"), title: if title.is_empty() { None } else { Some(title) }, message, }); @@ -128,22 +118,28 @@ mod tests { history: - De senaste 24 timmarna har 9 personer sökt efter numret 0104754350. Det kan tyda på att numret används av telefonförsäljare. Totalt har minst 4786 personer sökt efter numret. comments: - - datetime: "2018-05-09T12:31:39Z" + - datetime: + DateTime: "2018-05-09T12:31:39Z" title: Folksam message: Svara inte på okända nummer. Blockerat! - - datetime: "2017-12-05T16:33:10Z" + - datetime: + DateTime: "2017-12-05T16:33:10Z" title: Folksam message: Svarade aldrig men när jag ringde upp var det Folksam - - datetime: "2017-11-28T10:30:10Z" + - datetime: + DateTime: "2017-11-28T10:30:10Z" title: ~ message: Ringde och la på - - datetime: "2017-11-20T14:53:16Z" + - datetime: + DateTime: "2017-11-20T14:53:16Z" title: Folksam message: färsäljare - - datetime: "2017-11-16T12:38:07Z" + - datetime: + DateTime: "2017-11-16T12:38:07Z" title: Folksam message: "missat samtal, ringde tillbaka och automatsvar sa att det var folksam som sökt mig för att presentera ett erbjudande." - - datetime: "2017-10-25T05:59:26Z" + - datetime: + DateTime: "2017-10-25T05:59:26Z" title: Folksam message: Försäljare"###); } @@ -169,7 +165,8 @@ mod tests { history: - De senaste 24 timmarna har 3 personer sökt efter numret 0702269893. Det kan tyda på att numret används av telefonförsäljare. Totalt har minst 4 personer sökt efter numret. comments: - - datetime: "2019-01-18T13:30:55Z" + - datetime: + DateTime: "2019-01-18T13:30:55Z" title: Alnö Design & Produktion AB message: "Renhållning, service, kemprodukter""###); } @@ -184,7 +181,8 @@ mod tests { history: - De senaste 24 timmarna har 1 personer sökt efter numret 0726443387. Det kan tyda på att numret används av telefonförsäljare. Totalt har minst 231 personer sökt efter numret. comments: - - datetime: "2018-10-31T17:48:27Z" + - datetime: + DateTime: "2018-10-31T17:48:27Z" title: Tele2 message: Bättre priser som inte finns online"###); } @@ -232,4 +230,15 @@ mod tests { - De senaste 24 timmarna har 1 personer sökt efter numret 0701807618. Det kan tyda på att numret används av telefonförsäljare. Totalt har minst 2 personer sökt efter numret. comments: []"###); } + + #[test] + fn test_0546780862() { + let document = include_str!("../../fixtures/telefonforsaljare/0546780862.html"); + + assert_yaml_snapshot_matches!(from_html(&document), @r###"Ok: + messages: [] + history: + - De senaste 24 timmarna har 1 personer sökt efter numret 0546780862. Det kan tyda på att numret används av telefonförsäljare. Totalt har minst 12 personer sökt efter numret. + comments: []"###); + } } diff --git a/src/probe/vem_ringde.rs b/src/probe/vem_ringde.rs index b2f2336..ee919d4 100644 --- a/src/probe/vem_ringde.rs +++ b/src/probe/vem_ringde.rs @@ -1,16 +1,17 @@ use std::str; -// use log::debug; +use chrono_tz::Europe::Stockholm; use scraper::{Html, Selector}; -use crate::probe::{Entry, Probe}; +use crate::entry::{Comment, Date, Entry}; +use crate::probe::Probe; fn from_html(document: &str) -> Result { let html = Html::parse_document(document); let mut messages = Vec::new(); let history = Vec::new(); - let comments = Vec::new(); + let mut comments = Vec::new(); let selector = Selector::parse("#toporganisations li").unwrap(); @@ -25,11 +26,47 @@ fn from_html(document: &str) -> Result { messages.push(message); } - Ok(Entry { - messages, - history, - comments, - }) + let selector = Selector::parse("#calls ol li").expect("failed to build selector"); + + for element in html.select(&selector) { + let selector = Selector::parse("div:nth-child(4)").expect("failed to build selector"); + + let date = element + .select(&selector) + .next() + .expect("failed to find datetime") + .inner_html(); + + let selector = Selector::parse("div:nth-child(3)").expect("failed to build selector"); + + let message = element + .select(&selector) + .next() + .unwrap() + .text() + .map(str::trim) + .filter(|s| !s.is_empty()) + .collect::>() + .join(" "); + + let message = htmlescape::decode_html(&message).unwrap(); + + comments.push(Comment { + datetime: Date::date_from(Stockholm, &date, "%Y-%m-%d").expect("failed to parse date"), + title: None, + message, + }); + } + + if !messages.is_empty() || !comments.is_empty() { + Ok(Entry { + messages, + history, + comments, + }) + } else { + Err(()) + } } pub struct VemRingde; @@ -69,7 +106,63 @@ mod tests { messages: - Folksam (5 samtal) history: [] - comments: []"###); + comments: + - datetime: + Date: 2018-11-07 + title: ~ + message: Folksam + - datetime: + Date: 2018-06-05 + title: ~ + message: Folksam + - datetime: + Date: 2018-04-18 + title: ~ + message: Folksam + - datetime: + Date: 2018-03-19 + title: ~ + message: okänd + - datetime: + Date: 2018-03-07 + title: ~ + message: okänd + - datetime: + Date: 2018-02-06 + title: ~ + message: Folksam spam + - datetime: + Date: 2017-12-20 + title: ~ + message: svarade ej + - datetime: + Date: 2017-12-07 + title: ~ + message: okänd + - datetime: + Date: 2017-12-05 + title: ~ + message: okänd + - datetime: + Date: 2017-11-21 + title: ~ + message: Försäljare folksam + - datetime: + Date: 2017-11-14 + title: ~ + message: Folksam + - datetime: + Date: 2017-11-06 + title: ~ + message: Folksam + - datetime: + Date: 2017-10-24 + title: ~ + message: telemarketing + - datetime: + Date: 2017-10-23 + title: ~ + message: okänd"###); } #[test] @@ -79,66 +172,59 @@ mod tests { assert_yaml_snapshot_matches!(from_html(&document), @r###"Ok: messages: [] history: [] - comments: []"###); + comments: + - datetime: + Date: 2018-11-26 + title: ~ + message: callcenter"###); } #[test] fn test_0702269893() { let document = include_str!("../../fixtures/vemringde/0702269893.html"); - assert_yaml_snapshot_matches!(from_html(&document), @r###"Ok: - messages: [] - history: [] - comments: []"###); + assert_yaml_snapshot_matches!(from_html(&document), @"Err: ~"); } #[test] fn test_0726443387() { let document = include_str!("../../fixtures/vemringde/0726443387.html"); - assert_yaml_snapshot_matches!(from_html(&document), @r###"Ok: - messages: [] - history: [] - comments: []"###); + assert_yaml_snapshot_matches!(from_html(&document), @"Err: ~"); } #[test] fn test_0751793426() { let document = include_str!("../../fixtures/vemringde/0751793426.html"); - assert_yaml_snapshot_matches!(from_html(&document), @r###"Ok: - messages: [] - history: [] - comments: []"###); + assert_yaml_snapshot_matches!(from_html(&document), @"Err: ~"); } #[test] fn test_0751793483() { let document = include_str!("../../fixtures/vemringde/0751793483.html"); - assert_yaml_snapshot_matches!(from_html(&document), @r###"Ok: - messages: [] - history: [] - comments: []"###); + assert_yaml_snapshot_matches!(from_html(&document), @"Err: ~"); } #[test] fn test_0751793499() { let document = include_str!("../../fixtures/vemringde/0751793499.html"); - assert_yaml_snapshot_matches!(from_html(&document), @r###"Ok: - messages: [] - history: [] - comments: []"###); + assert_yaml_snapshot_matches!(from_html(&document), @"Err: ~"); } #[test] fn test_0701807618() { let document = include_str!("../../fixtures/vemringde/0701807618.html"); - assert_yaml_snapshot_matches!(from_html(&document), @r###"Ok: - messages: [] - history: [] - comments: []"###); + assert_yaml_snapshot_matches!(from_html(&document), @"Err: ~"); + } + + #[test] + fn test_0546780862() { + let document = include_str!("../../fixtures/vemringde/0546780862.html"); + + assert_yaml_snapshot_matches!(from_html(&document), @"Err: ~"); } }