More tests, collect more data, and refactor some code.

This commit is contained in:
2019-02-07 11:42:19 +01:00
parent 20e41d8f65
commit 7f12e84acc
13 changed files with 1240 additions and 160 deletions

131
src/entry.rs Normal file
View File

@@ -0,0 +1,131 @@
use std::fmt;
use chrono::offset::LocalResult;
use chrono::{Local, NaiveDate, NaiveDateTime, TimeZone, Utc};
use serde::{de, Deserialize, Deserializer, Serialize, Serializer};
#[derive(Debug, PartialEq, Serialize)]
pub struct Entry {
pub messages: Vec<String>,
pub history: Vec<String>,
pub comments: Vec<Comment>,
}
impl fmt::Display for Entry {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
if !self.messages.is_empty() {
for message in &self.messages {
writeln!(f, " {}", message)?;
}
}
if !self.history.is_empty() {
for history in &self.history {
writeln!(f, " {}", history)?;
}
}
if !self.comments.is_empty() {
for comment in &self.comments {
writeln!(f, " * {}", comment)?;
}
}
Ok(())
}
}
#[derive(Debug, PartialEq, Serialize)]
pub struct Comment {
pub datetime: Date,
pub title: Option<String>,
pub message: String,
}
impl fmt::Display for Comment {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
if let Some(ref title) = self.title {
write!(f, "{}: {} - {}", self.datetime, title, self.message)
} else {
write!(f, "{}: {}", self.datetime, self.message)
}
}
}
#[derive(Debug, PartialEq, Eq, Serialize, PartialOrd, Ord)]
pub enum Date {
DateTime(chrono::DateTime<Utc>),
#[serde(serialize_with = "serialize_date")]
Date(chrono::Date<Utc>),
}
impl Date {
pub fn datetime_from<T>(tz: T, s: &str, fmt: &str) -> Result<Date, ()>
where
T: TimeZone,
{
let datetime = NaiveDateTime::parse_from_str(s, fmt).map_err(|_| ())?;
let datetime = match tz.from_local_datetime(&datetime) {
LocalResult::Single(datetime) => datetime,
_ => return Err(()),
};
Ok(Date::DateTime(datetime.with_timezone(&Utc)))
}
pub fn date_from<T>(tz: T, s: &str, fmt: &str) -> Result<Date, ()>
where
T: TimeZone,
{
let date = NaiveDate::parse_from_str(s, fmt).map_err(|_| ())?;
let date = match tz.from_local_date(&date) {
LocalResult::Single(date) => date,
_ => return Err(()),
};
Ok(Date::Date(date.with_timezone(&Utc)))
}
}
impl fmt::Display for Date {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Date::DateTime(datetime) => {
let datetime = datetime.with_timezone(&Local);
write!(f, "{}", datetime.format("%Y-%m-%d %H:%M:%S"))
}
Date::Date(date) => {
let date = date.with_timezone(&Local);
write!(f, "{}", date.format("%Y-%m-%d"))
}
}
}
}
fn serialize_date<S>(date: &chrono::Date<Utc>, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
let date = date.with_timezone(&Local);
let s = format!("{}", date.format("%Y-%m-%d"));
Serialize::serialize(&s, serializer)
}
#[allow(dead_code)]
fn deserialize_date<'de, D>(deserializer: D) -> Result<chrono::Date<Utc>, D::Error>
where
D: Deserializer<'de>,
{
let s = String::deserialize(deserializer)?;
let date = NaiveDate::parse_from_str(&s, "%Y-%m-%d").map_err(de::Error::custom)?;
let date = match Utc.from_local_date(&date) {
LocalResult::Single(date) => date,
_ => return Err(de::Error::custom("")),
};
Ok(date.with_timezone(&Utc))
}

View File

@@ -1,4 +1,5 @@
mod context;
pub mod entry;
mod probe;
pub use crate::context::Context;

View File

@@ -1,7 +1,4 @@
use std::fmt;
use chrono::{DateTime, Local, Utc};
use serde::Serialize;
use crate::entry::Entry;
mod eniro;
mod hitta;
@@ -15,67 +12,6 @@ pub use self::konsument_info::KonsumentInfo;
pub use self::telefonforsaljare::Telefonforsaljare;
pub use self::vem_ringde::VemRingde;
#[derive(Debug, PartialEq, Serialize)]
pub struct Entry {
pub messages: Vec<String>,
pub history: Vec<String>,
pub comments: Vec<Comment>,
}
impl fmt::Display for Entry {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
if !self.messages.is_empty() {
for message in &self.messages {
writeln!(f, " {}", message)?;
}
}
if !self.history.is_empty() {
for history in &self.history {
writeln!(f, " {}", history)?;
}
}
if !self.comments.is_empty() {
for comment in &self.comments {
writeln!(f, " * {}", comment)?;
}
}
Ok(())
}
}
#[derive(Debug, PartialEq, Serialize)]
pub struct Comment {
pub datetime: DateTime<Utc>,
pub title: Option<String>,
pub message: String,
}
impl fmt::Display for Comment {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let datetime = self.datetime.with_timezone(&Local);
if let Some(ref title) = self.title {
write!(
f,
"{}: {} - {}",
datetime.format("%Y-%m-%d %H:%M:%S"),
title,
self.message
)
} else {
write!(
f,
"{}: {}",
datetime.format("%Y-%m-%d %H:%M:%S"),
self.message
)
}
}
}
pub trait Probe {
fn provider(&self) -> &'static str;
fn uri(&self, _: &str) -> String;

View File

@@ -1,6 +1,7 @@
use scraper::{Html, Selector};
use crate::probe::{Entry, Probe};
use crate::entry::Entry;
use crate::probe::Probe;
fn from_html(document: &str) -> Result<Entry, ()> {
let html = Html::parse_document(document);
@@ -156,4 +157,15 @@ mod tests {
- 0 denna vecka och 1 totalt.
comments: []"###);
}
#[test]
fn test_0546780862() {
let document = include_str!("../../fixtures/eniro/0546780862.html");
assert_yaml_snapshot_matches!(from_html(&document), @r###"Ok:
messages:
- Nya Wermlands-Tidningens AB
history: []
comments: []"###);
}
}

View File

@@ -1,9 +1,10 @@
use chrono::{TimeZone, Utc};
use log::debug;
use log::{debug, trace};
use regex::Regex;
use serde::Deserialize;
use crate::probe::{self, Entry, Probe};
use crate::entry::{self, Date, Entry};
use crate::probe::Probe;
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
@@ -50,12 +51,10 @@ fn from_html(document: &str) -> Result<Entry, ()> {
let json = result.get(1).unwrap().as_str();
/*
println!(
"json: {:#?}",
trace!(
"Hitta: {:#?}",
serde_json::from_str::<serde_json::Value>(&json)
);
*/
if let Ok(data) = serde_json::from_str::<Data>(&json) {
let messages = Vec::new();
@@ -66,11 +65,11 @@ fn from_html(document: &str) -> Result<Entry, ()> {
history.push(phone_data.statistics_text);
for comment in phone_data.comments {
comments.push(probe::Comment {
datetime: Utc.timestamp(
comments.push(entry::Comment {
datetime: Date::DateTime(Utc.timestamp(
(comment.timestamp / 1000) as i64,
(comment.timestamp % 1000) as u32,
),
)),
title: None,
message: comment.comment,
});
@@ -131,91 +130,120 @@ mod tests {
history:
- 42 andra har rapporterat detta nummer
comments:
- datetime: "2019-01-17T17:29:22Z"
- datetime:
DateTime: "2019-01-17T17:29:22Z"
title: ~
message: Varmsälj från Folksam
- datetime: "2018-12-14T13:45:28Z"
- datetime:
DateTime: "2018-12-14T13:45:28Z"
title: ~
message: Folksam
- datetime: "2018-11-28T07:30:18Z"
- datetime:
DateTime: "2018-11-28T07:30:18Z"
title: ~
message: Höglandschskt
- datetime: "2018-11-20T19:18:09Z"
- datetime:
DateTime: "2018-11-20T19:18:09Z"
title: ~
message: "Försäljare "
- datetime: "2018-11-19T17:38:34Z"
- datetime:
DateTime: "2018-11-19T17:38:34Z"
title: ~
message: mögg från Folksam
- datetime: "2018-11-12T16:00:41Z"
- datetime:
DateTime: "2018-11-12T16:00:41Z"
title: ~
message: Folksam försäkringsförsäljare
- datetime: "2018-10-25T10:28:36Z"
- datetime:
DateTime: "2018-10-25T10:28:36Z"
title: ~
message: folksam
- datetime: "2018-10-10T07:30:40Z"
- datetime:
DateTime: "2018-10-10T07:30:40Z"
title: ~
message: Telefonförsäljare
- datetime: "2018-10-04T10:04:55Z"
- datetime:
DateTime: "2018-10-04T10:04:55Z"
title: ~
message: Folksam säljare
- datetime: "2018-10-03T13:55:19Z"
- datetime:
DateTime: "2018-10-03T13:55:19Z"
title: ~
message: Sa inget.
- datetime: "2018-08-24T16:56:46Z"
- datetime:
DateTime: "2018-08-24T16:56:46Z"
title: ~
message: Folksam
- datetime: "2018-08-24T09:42:43Z"
- datetime:
DateTime: "2018-08-24T09:42:43Z"
title: ~
message: Achmati azmut från folksam
- datetime: "2018-08-21T18:29:29Z"
- datetime:
DateTime: "2018-08-21T18:29:29Z"
title: ~
message: Folksam
- datetime: "2018-08-16T18:56:56Z"
- datetime:
DateTime: "2018-08-16T18:56:56Z"
title: ~
message: Säljare från Folksam.
- datetime: "2018-08-16T14:48:59Z"
- datetime:
DateTime: "2018-08-16T14:48:59Z"
title: ~
message: "Folksam "
- datetime: "2018-08-09T16:30:28Z"
- datetime:
DateTime: "2018-08-09T16:30:28Z"
title: ~
message: Folksam
- datetime: "2018-08-02T16:29:32Z"
- datetime:
DateTime: "2018-08-02T16:29:32Z"
title: ~
message: "Folksam "
- datetime: "2018-08-02T15:33:38Z"
- datetime:
DateTime: "2018-08-02T15:33:38Z"
title: ~
message: "Folksam "
- datetime: "2018-07-25T08:28:27Z"
- datetime:
DateTime: "2018-07-25T08:28:27Z"
title: ~
message: Säljare Folksam
- datetime: "2018-07-17T21:20:51Z"
- datetime:
DateTime: "2018-07-17T21:20:51Z"
title: ~
message: "Inga Hansson "
- datetime: "2018-07-16T18:11:46Z"
- datetime:
DateTime: "2018-07-16T18:11:46Z"
title: ~
message: Folksam
- datetime: "2018-07-06T15:45:46Z"
- datetime:
DateTime: "2018-07-06T15:45:46Z"
title: ~
message: "Folksam "
- datetime: "2018-07-05T17:24:07Z"
- datetime:
DateTime: "2018-07-05T17:24:07Z"
title: ~
message: folksam
- datetime: "2018-07-05T11:15:02Z"
- datetime:
DateTime: "2018-07-05T11:15:02Z"
title: ~
message: Vesran
- datetime: "2018-07-04T13:30:49Z"
- datetime:
DateTime: "2018-07-04T13:30:49Z"
title: ~
message: Folksam
- datetime: "2018-06-29T10:52:51Z"
- datetime:
DateTime: "2018-06-29T10:52:51Z"
title: ~
message: folksam
- datetime: "2018-06-28T13:33:01Z"
- datetime:
DateTime: "2018-06-28T13:33:01Z"
title: ~
message: Säljare folksam
- datetime: "2018-06-28T07:42:42Z"
- datetime:
DateTime: "2018-06-28T07:42:42Z"
title: ~
message: Folksam försäkringar
- datetime: "2018-06-26T12:59:33Z"
- datetime:
DateTime: "2018-06-26T12:59:33Z"
title: ~
message: Säljare Folksam"###);
}
@@ -288,4 +316,11 @@ mod tests {
assert_yaml_snapshot_matches!(from_html(&document), @"Err: ~");
}
#[test]
fn test_0546780862() {
let document = include_str!("../../fixtures/hitta/0546780862.html");
assert_yaml_snapshot_matches!(from_html(&document), @"Err: ~");
}
}

View File

@@ -117,4 +117,11 @@ mod tests {
assert_yaml_snapshot_matches!(from_html(&document), @"Err: ~");
}
#[test]
fn test_0546780862() {
let document = include_str!("../../fixtures/konsumentinfo/0546780862.html");
assert_yaml_snapshot_matches!(from_html(&document), @"Err: ~");
}
}

View File

@@ -1,19 +1,8 @@
use chrono::offset::LocalResult;
use chrono::{DateTime, NaiveDateTime, TimeZone, Utc};
use chrono_tz::Europe::Stockholm;
use scraper::{Html, Selector};
use crate::probe::{Comment, Entry, Probe};
fn stockholm_to_utc(s: &str, fmt: &str) -> Result<DateTime<Utc>, ()> {
let datetime = NaiveDateTime::parse_from_str(s, fmt).map_err(|_| ())?;
let datetime = match Stockholm.from_local_datetime(&datetime) {
LocalResult::Single(datetime) => datetime,
_ => return Err(()),
};
Ok(datetime.with_timezone(&Utc))
}
use crate::entry::{Comment, Date, Entry};
use crate::probe::Probe;
fn from_html(document: &str) -> Result<Entry, ()> {
let html = Html::parse_document(document);
@@ -76,7 +65,8 @@ fn from_html(document: &str) -> Result<Entry, ()> {
let message = htmlescape::decode_html(&message).unwrap();
comments.push(Comment {
datetime: stockholm_to_utc(&datetime, "%Y-%m-%d %H:%M:%S").unwrap(),
datetime: Date::datetime_from(Stockholm, &datetime, "%Y-%m-%d %H:%M:%S")
.expect("failed to parse datetime"),
title: if title.is_empty() { None } else { Some(title) },
message,
});
@@ -128,22 +118,28 @@ mod tests {
history:
- De senaste 24 timmarna har 9 personer sökt efter numret 0104754350. Det kan tyda på att numret används av telefonförsäljare. Totalt har minst 4786 personer sökt efter numret.
comments:
- datetime: "2018-05-09T12:31:39Z"
- datetime:
DateTime: "2018-05-09T12:31:39Z"
title: Folksam
message: Svara inte på okända nummer. Blockerat!
- datetime: "2017-12-05T16:33:10Z"
- datetime:
DateTime: "2017-12-05T16:33:10Z"
title: Folksam
message: Svarade aldrig men när jag ringde upp var det Folksam
- datetime: "2017-11-28T10:30:10Z"
- datetime:
DateTime: "2017-11-28T10:30:10Z"
title: ~
message: Ringde och la på
- datetime: "2017-11-20T14:53:16Z"
- datetime:
DateTime: "2017-11-20T14:53:16Z"
title: Folksam
message: färsäljare
- datetime: "2017-11-16T12:38:07Z"
- datetime:
DateTime: "2017-11-16T12:38:07Z"
title: Folksam
message: "missat samtal, ringde tillbaka och automatsvar sa att det var folksam som sökt mig för att presentera ett erbjudande."
- datetime: "2017-10-25T05:59:26Z"
- datetime:
DateTime: "2017-10-25T05:59:26Z"
title: Folksam
message: Försäljare"###);
}
@@ -169,7 +165,8 @@ mod tests {
history:
- De senaste 24 timmarna har 3 personer sökt efter numret 0702269893. Det kan tyda på att numret används av telefonförsäljare. Totalt har minst 4 personer sökt efter numret.
comments:
- datetime: "2019-01-18T13:30:55Z"
- datetime:
DateTime: "2019-01-18T13:30:55Z"
title: Alnö Design & Produktion AB
message: "Renhållning, service, kemprodukter""###);
}
@@ -184,7 +181,8 @@ mod tests {
history:
- De senaste 24 timmarna har 1 personer sökt efter numret 0726443387. Det kan tyda på att numret används av telefonförsäljare. Totalt har minst 231 personer sökt efter numret.
comments:
- datetime: "2018-10-31T17:48:27Z"
- datetime:
DateTime: "2018-10-31T17:48:27Z"
title: Tele2
message: Bättre priser som inte finns online"###);
}
@@ -232,4 +230,15 @@ mod tests {
- De senaste 24 timmarna har 1 personer sökt efter numret 0701807618. Det kan tyda på att numret används av telefonförsäljare. Totalt har minst 2 personer sökt efter numret.
comments: []"###);
}
#[test]
fn test_0546780862() {
let document = include_str!("../../fixtures/telefonforsaljare/0546780862.html");
assert_yaml_snapshot_matches!(from_html(&document), @r###"Ok:
messages: []
history:
- De senaste 24 timmarna har 1 personer sökt efter numret 0546780862. Det kan tyda på att numret används av telefonförsäljare. Totalt har minst 12 personer sökt efter numret.
comments: []"###);
}
}

View File

@@ -1,16 +1,17 @@
use std::str;
// use log::debug;
use chrono_tz::Europe::Stockholm;
use scraper::{Html, Selector};
use crate::probe::{Entry, Probe};
use crate::entry::{Comment, Date, Entry};
use crate::probe::Probe;
fn from_html(document: &str) -> Result<Entry, ()> {
let html = Html::parse_document(document);
let mut messages = Vec::new();
let history = Vec::new();
let comments = Vec::new();
let mut comments = Vec::new();
let selector = Selector::parse("#toporganisations li").unwrap();
@@ -25,11 +26,47 @@ fn from_html(document: &str) -> Result<Entry, ()> {
messages.push(message);
}
Ok(Entry {
messages,
history,
comments,
})
let selector = Selector::parse("#calls ol li").expect("failed to build selector");
for element in html.select(&selector) {
let selector = Selector::parse("div:nth-child(4)").expect("failed to build selector");
let date = element
.select(&selector)
.next()
.expect("failed to find datetime")
.inner_html();
let selector = Selector::parse("div:nth-child(3)").expect("failed to build selector");
let message = element
.select(&selector)
.next()
.unwrap()
.text()
.map(str::trim)
.filter(|s| !s.is_empty())
.collect::<Vec<_>>()
.join(" ");
let message = htmlescape::decode_html(&message).unwrap();
comments.push(Comment {
datetime: Date::date_from(Stockholm, &date, "%Y-%m-%d").expect("failed to parse date"),
title: None,
message,
});
}
if !messages.is_empty() || !comments.is_empty() {
Ok(Entry {
messages,
history,
comments,
})
} else {
Err(())
}
}
pub struct VemRingde;
@@ -69,7 +106,63 @@ mod tests {
messages:
- Folksam (5 samtal)
history: []
comments: []"###);
comments:
- datetime:
Date: 2018-11-07
title: ~
message: Folksam
- datetime:
Date: 2018-06-05
title: ~
message: Folksam
- datetime:
Date: 2018-04-18
title: ~
message: Folksam
- datetime:
Date: 2018-03-19
title: ~
message: okänd
- datetime:
Date: 2018-03-07
title: ~
message: okänd
- datetime:
Date: 2018-02-06
title: ~
message: Folksam spam
- datetime:
Date: 2017-12-20
title: ~
message: svarade ej
- datetime:
Date: 2017-12-07
title: ~
message: okänd
- datetime:
Date: 2017-12-05
title: ~
message: okänd
- datetime:
Date: 2017-11-21
title: ~
message: Försäljare folksam
- datetime:
Date: 2017-11-14
title: ~
message: Folksam
- datetime:
Date: 2017-11-06
title: ~
message: Folksam
- datetime:
Date: 2017-10-24
title: ~
message: telemarketing
- datetime:
Date: 2017-10-23
title: ~
message: okänd"###);
}
#[test]
@@ -79,66 +172,59 @@ mod tests {
assert_yaml_snapshot_matches!(from_html(&document), @r###"Ok:
messages: []
history: []
comments: []"###);
comments:
- datetime:
Date: 2018-11-26
title: ~
message: callcenter"###);
}
#[test]
fn test_0702269893() {
let document = include_str!("../../fixtures/vemringde/0702269893.html");
assert_yaml_snapshot_matches!(from_html(&document), @r###"Ok:
messages: []
history: []
comments: []"###);
assert_yaml_snapshot_matches!(from_html(&document), @"Err: ~");
}
#[test]
fn test_0726443387() {
let document = include_str!("../../fixtures/vemringde/0726443387.html");
assert_yaml_snapshot_matches!(from_html(&document), @r###"Ok:
messages: []
history: []
comments: []"###);
assert_yaml_snapshot_matches!(from_html(&document), @"Err: ~");
}
#[test]
fn test_0751793426() {
let document = include_str!("../../fixtures/vemringde/0751793426.html");
assert_yaml_snapshot_matches!(from_html(&document), @r###"Ok:
messages: []
history: []
comments: []"###);
assert_yaml_snapshot_matches!(from_html(&document), @"Err: ~");
}
#[test]
fn test_0751793483() {
let document = include_str!("../../fixtures/vemringde/0751793483.html");
assert_yaml_snapshot_matches!(from_html(&document), @r###"Ok:
messages: []
history: []
comments: []"###);
assert_yaml_snapshot_matches!(from_html(&document), @"Err: ~");
}
#[test]
fn test_0751793499() {
let document = include_str!("../../fixtures/vemringde/0751793499.html");
assert_yaml_snapshot_matches!(from_html(&document), @r###"Ok:
messages: []
history: []
comments: []"###);
assert_yaml_snapshot_matches!(from_html(&document), @"Err: ~");
}
#[test]
fn test_0701807618() {
let document = include_str!("../../fixtures/vemringde/0701807618.html");
assert_yaml_snapshot_matches!(from_html(&document), @r###"Ok:
messages: []
history: []
comments: []"###);
assert_yaml_snapshot_matches!(from_html(&document), @"Err: ~");
}
#[test]
fn test_0546780862() {
let document = include_str!("../../fixtures/vemringde/0546780862.html");
assert_yaml_snapshot_matches!(from_html(&document), @"Err: ~");
}
}