A lot more fixtures, use chrono for date.

This commit is contained in:
2019-01-22 10:27:38 +01:00
parent 94fa03d45c
commit 8e0563b463
44 changed files with 6493 additions and 17 deletions

View File

@@ -1,5 +1,7 @@
use std::fmt;
use chrono::{DateTime, Utc};
mod eniro;
mod hitta;
mod konsument_info;
@@ -37,11 +39,7 @@ impl fmt::Display for Entry {
if !self.comments.is_empty() {
for comment in &self.comments {
writeln!(
f,
" * {}: {} - {}",
comment.datetime, comment.title, comment.message
)?;
writeln!(f, " * {}", comment)?;
}
}
@@ -51,11 +49,23 @@ impl fmt::Display for Entry {
#[derive(Debug, PartialEq)]
pub struct Comment {
pub datetime: String,
pub datetime: DateTime<Utc>,
pub title: String,
pub message: String,
}
impl fmt::Display for Comment {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(
f,
"{}: {} - {}",
self.datetime.format("%Y-%m-%d %H:%M:%S"),
self.title,
self.message
)
}
}
pub trait Probe {
fn uri(&self, _: &str) -> String;
fn search(&mut self, _: &mut Context, _: &str) -> Result<(), ()>;

View File

@@ -7,7 +7,7 @@ fn from_html(document: &str) -> Result<Entry, ()> {
let html = Html::parse_document(document);
let mut messages = Vec::new();
let history = Vec::new();
let mut history = Vec::new();
let comments = Vec::new();
let selector = Selector::parse(".CompanyResultListItem h3.name > a").unwrap();
@@ -19,6 +19,15 @@ fn from_html(document: &str) -> Result<Entry, ()> {
messages.push(message);
}
let selector = Selector::parse("div.PhoneNoHit div.search-info-container p").unwrap();
if let Some(element) = html.select(&selector).next() {
let message = element.inner_html();
let message = htmlescape::decode_html(&message).unwrap();
history.push(message);
}
Ok(Entry {
messages,
history,
@@ -59,8 +68,42 @@ impl Probe for Eniro {
#[cfg(test)]
mod tests {
use pretty_assertions::assert_eq;
use super::*;
#[test]
fn test_0104754350() {
let document = include_str!("../../fixtures/eniro/0104754350.html");
let expected = Entry {
messages: vec![
"Företaget bedriver telefonförsäljning eller marknadsundersökningar"
.to_string(),
],
history: vec![],
comments: vec![],
};
assert_eq!(from_html(&document), Ok(expected));
}
#[test]
fn test_0313908905() {
let document = include_str!("../../fixtures/eniro/0313908905.html");
let expected = Entry {
messages: vec![],
history: vec![
"<strong>3464</strong> denna vecka och <strong>6637</strong> totalt.<!-- --> "
.to_string(),
],
comments: vec![],
};
assert_eq!(from_html(&document), Ok(expected));
}
#[test]
fn test_0702269893() {
let document = include_str!("../../fixtures/eniro/0702269893.html");
@@ -73,4 +116,68 @@ mod tests {
assert_eq!(from_html(&document), Ok(expected));
}
#[test]
fn test_0726443387() {
let document = include_str!("../../fixtures/eniro/0726443387.html");
let expected = Entry {
messages: vec![],
history: vec![
"<strong>16</strong> denna vecka och <strong>98</strong> totalt.<!-- --> "
.to_string(),
],
comments: vec![],
};
assert_eq!(from_html(&document), Ok(expected));
}
#[test]
fn test_0751793426() {
let document = include_str!("../../fixtures/eniro/0751793426.html");
let expected = Entry {
messages: vec![],
history: vec![
"<strong>20</strong> denna vecka och <strong>602</strong> totalt.<!-- --> "
.to_string(),
],
comments: vec![],
};
assert_eq!(from_html(&document), Ok(expected));
}
#[test]
fn test_0751793483() {
let document = include_str!("../../fixtures/eniro/0751793483.html");
let expected = Entry {
messages: vec![],
history: vec![
"<strong>29</strong> denna vecka och <strong>900</strong> totalt.<!-- --> "
.to_string(),
],
comments: vec![],
};
assert_eq!(from_html(&document), Ok(expected));
}
#[test]
fn test_0751793499() {
let document = include_str!("../../fixtures/eniro/0751793499.html");
let expected = Entry {
messages: vec![],
history: vec![
"<strong>303</strong> denna vecka och <strong>304</strong> totalt.<!-- --> "
.to_string(),
],
comments: vec![],
};
assert_eq!(from_html(&document), Ok(expected));
}
}

View File

@@ -1,3 +1,4 @@
use chrono::{TimeZone, Utc};
use log::debug;
use regex::Regex;
use serde::Deserialize;
@@ -50,6 +51,13 @@ fn from_html(document: &str) -> Result<Entry, ()> {
let json = result.get(1).unwrap().as_str();
/*
println!(
"json: {:#?}",
serde_json::from_str::<serde_json::Value>(&json)
);
*/
if let Ok(data) = serde_json::from_str::<Data>(&json) {
let messages = Vec::new();
let mut history = Vec::new();
@@ -60,11 +68,16 @@ fn from_html(document: &str) -> Result<Entry, ()> {
for comment in phone_data.comments {
comments.push(probe::Comment {
datetime: "".to_string(),
datetime: Utc.timestamp(
(comment.timestamp / 1000) as i64,
(comment.timestamp % 1000) as u32,
),
title: "".to_string(),
message: comment.comment,
});
}
comments.sort_by(|a, b| b.datetime.cmp(&a.datetime));
}
Ok(Entry {
@@ -114,8 +127,36 @@ impl Probe for Hitta {
#[cfg(test)]
mod tests {
use pretty_assertions::assert_eq;
use super::*;
#[test]
fn test_0104754350() {
let document = include_str!("../../fixtures/hitta/0104754350.html");
let expected = Entry {
messages: vec![],
history: vec!["42 andra har rapporterat detta nummer".to_string()],
comments: vec![],
};
assert_eq!(from_html(&document), Ok(expected));
}
#[test]
fn test_0313908905() {
let document = include_str!("../../fixtures/hitta/0313908905.html");
let expected = Entry {
messages: vec![],
history: vec![],
comments: vec![],
};
assert_eq!(from_html(&document), Ok(expected));
}
#[test]
fn test_0702269893() {
let document = include_str!("../../fixtures/hitta/0702269893.html");
@@ -128,4 +169,56 @@ mod tests {
assert_eq!(from_html(&document), Ok(expected));
}
#[test]
fn test_0726443387() {
let document = include_str!("../../fixtures/hitta/0726443387.html");
let expected = Entry {
messages: vec![],
history: vec!["1299 andra har också sökt på detta nummer".to_string()],
comments: vec![],
};
assert_eq!(from_html(&document), Ok(expected));
}
#[test]
fn test_0751793426() {
let document = include_str!("../../fixtures/hitta/0751793426.html");
let expected = Entry {
messages: vec![],
history: vec![],
comments: vec![],
};
assert_eq!(from_html(&document), Ok(expected));
}
#[test]
fn test_0751793483() {
let document = include_str!("../../fixtures/hitta/0751793483.html");
let expected = Entry {
messages: vec![],
history: vec![],
comments: vec![],
};
assert_eq!(from_html(&document), Ok(expected));
}
#[test]
fn test_0751793499() {
let document = include_str!("../../fixtures/hitta/0751793499.html");
let expected = Entry {
messages: vec![],
history: vec![],
comments: vec![],
};
assert_eq!(from_html(&document), Ok(expected));
}
}

View File

@@ -19,11 +19,15 @@ fn from_html(document: &str) -> Result<Entry, ()> {
messages.push(message);
}
Ok(Entry {
messages,
history,
comments,
})
if messages.is_empty() {
Err(())
} else {
Ok(Entry {
messages,
history,
comments,
})
}
}
pub struct KonsumentInfo;
@@ -59,8 +63,36 @@ impl Probe for KonsumentInfo {
#[cfg(test)]
mod tests {
use pretty_assertions::assert_eq;
use super::*;
#[test]
fn test_0104754350() {
let document = include_str!("../../fixtures/konsumentinfo/0104754350.html");
let expected = Entry {
messages: vec![],
history: vec![],
comments: vec![],
};
assert_eq!(from_html(&document), Ok(expected));
}
#[test]
fn test_0313908905() {
let document = include_str!("../../fixtures/konsumentinfo/0313908905.html");
let expected = Entry {
messages: vec![],
history: vec![],
comments: vec![],
};
assert_eq!(from_html(&document), Ok(expected));
}
#[test]
fn test_0702269893() {
let document = include_str!("../../fixtures/konsumentinfo/0702269893.html");
@@ -73,4 +105,56 @@ mod tests {
assert_eq!(from_html(&document), Ok(expected));
}
#[test]
fn test_0726443387() {
let document = include_str!("../../fixtures/konsumentinfo/0726443387.html");
let expected = Entry {
messages: vec![],
history: vec![],
comments: vec![],
};
assert_eq!(from_html(&document), Ok(expected));
}
#[test]
fn test_0751793426() {
let document = include_str!("../../fixtures/konsumentinfo/0751793426.html");
let expected = Entry {
messages: vec![],
history: vec![],
comments: vec![],
};
assert_eq!(from_html(&document), Ok(expected));
}
#[test]
fn test_0751793483() {
let document = include_str!("../../fixtures/konsumentinfo/0751793483.html");
let expected = Entry {
messages: vec![],
history: vec![],
comments: vec![],
};
assert_eq!(from_html(&document), Ok(expected));
}
#[test]
fn test_0751793499() {
let document = include_str!("../../fixtures/konsumentinfo/0751793499.html");
let expected = Entry {
messages: vec![],
history: vec![],
comments: vec![],
};
assert_eq!(from_html(&document), Ok(expected));
}
}

View File

@@ -1,9 +1,22 @@
use chrono::offset::LocalResult;
use chrono::{DateTime, NaiveDateTime, TimeZone, Utc};
use chrono_tz::Europe::Stockholm;
use log::debug;
use scraper::{Html, Selector};
use crate::context::Context;
use crate::probe::{Comment, Entry, Probe};
fn stockholm_to_utc(s: &str, fmt: &str) -> Result<DateTime<Utc>, ()> {
let datetime = NaiveDateTime::parse_from_str(s, fmt).map_err(|_| ())?;
let datetime = match Stockholm.from_local_datetime(&datetime) {
LocalResult::Single(datetime) => datetime,
_ => return Err(()),
};
Ok(datetime.with_timezone(&Utc))
}
fn from_html(document: &str) -> Result<Entry, ()> {
let html = Html::parse_document(document);
@@ -54,7 +67,7 @@ fn from_html(document: &str) -> Result<Entry, ()> {
let message = htmlescape::decode_html(&message).unwrap();
comments.push(Comment {
datetime,
datetime: stockholm_to_utc(&datetime, "%Y-%m-%d %H:%M:%S").unwrap(),
title,
message,
});
@@ -105,8 +118,36 @@ impl Probe for Telefonforsaljare {
#[cfg(test)]
mod tests {
use pretty_assertions::assert_eq;
use super::*;
#[test]
fn test_0104754350() {
let document = include_str!("../../fixtures/telefonforsaljare/0104754350.html");
let expected = Entry {
messages: vec![],
history: vec![],
comments: vec![],
};
assert_eq!(from_html(&document), Ok(expected));
}
#[test]
fn test_0313908905() {
let document = include_str!("../../fixtures/telefonforsaljare/0313908905.html");
let expected = Entry {
messages: vec![],
history: vec![],
comments: vec![],
};
assert_eq!(from_html(&document), Ok(expected));
}
#[test]
fn test_0702269893() {
let document = include_str!("../../fixtures/telefonforsaljare/0702269893.html");
@@ -116,7 +157,7 @@ mod tests {
history: vec!["De senaste 24 timmarna har <strong>3 personer</strong> sökt efter numret 0702269893. Det kan tyda på att numret används av telefonförsäljare. Totalt har minst <strong>4 personer</strong> sökt efter numret.".to_string()],
comments: vec![
Comment {
datetime: "2019-01-18 14:30:55".to_string(),
datetime: stockholm_to_utc("2019-01-18 14:30:55", "%Y-%m-%d %H:%M:%S").unwrap(),
title: "Alnö Design & Produktion AB".to_string(),
message: "Renhållning, service, kemprodukter".to_string(),
}
@@ -125,4 +166,56 @@ mod tests {
assert_eq!(from_html(&document), Ok(expected));
}
#[test]
fn test_0726443387() {
let document = include_str!("../../fixtures/telefonforsaljare/0726443387.html");
let expected = Entry {
messages: vec![],
history: vec![],
comments: vec![],
};
assert_eq!(from_html(&document), Ok(expected));
}
#[test]
fn test_0751793426() {
let document = include_str!("../../fixtures/telefonforsaljare/0751793426.html");
let expected = Entry {
messages: vec![],
history: vec![],
comments: vec![],
};
assert_eq!(from_html(&document), Ok(expected));
}
#[test]
fn test_0751793483() {
let document = include_str!("../../fixtures/telefonforsaljare/0751793483.html");
let expected = Entry {
messages: vec![],
history: vec![],
comments: vec![],
};
assert_eq!(from_html(&document), Ok(expected));
}
#[test]
fn test_0751793499() {
let document = include_str!("../../fixtures/telefonforsaljare/0751793499.html");
let expected = Entry {
messages: vec![],
history: vec![],
comments: vec![],
};
assert_eq!(from_html(&document), Ok(expected));
}
}

View File

@@ -5,7 +5,8 @@ use crate::context::Context;
use crate::probe::{Comment, Entry, Probe};
fn from_html(document: &str) -> Result<Entry, ()> {
let _html = Html::parse_document(document);
/*
let html = Html::parse_document(document);
let messages = Vec::new();
let history = Vec::new();
@@ -16,6 +17,9 @@ fn from_html(document: &str) -> Result<Entry, ()> {
history,
comments,
})
*/
Err(())
}
pub struct VemRingde;
@@ -85,8 +89,36 @@ impl Probe for VemRingde {
#[cfg(test)]
mod tests {
use pretty_assertions::assert_eq;
use super::*;
#[test]
fn test_0104754350() {
let document = include_str!("../../fixtures/vemringde/0104754350.html");
let expected = Entry {
messages: vec![],
history: vec![],
comments: vec![],
};
assert_eq!(from_html(&document), Ok(expected));
}
#[test]
fn test_0313908905() {
let document = include_str!("../../fixtures/vemringde/0313908905.html");
let expected = Entry {
messages: vec![],
history: vec![],
comments: vec![],
};
assert_eq!(from_html(&document), Ok(expected));
}
#[test]
fn test_0702269893() {
let document = include_str!("../../fixtures/vemringde/0702269893.html");
@@ -99,4 +131,56 @@ mod tests {
assert_eq!(from_html(&document), Ok(expected));
}
#[test]
fn test_0726443387() {
let document = include_str!("../../fixtures/vemringde/0726443387.html");
let expected = Entry {
messages: vec![],
history: vec![],
comments: vec![],
};
assert_eq!(from_html(&document), Ok(expected));
}
#[test]
fn test_0751793426() {
let document = include_str!("../../fixtures/vemringde/0751793426.html");
let expected = Entry {
messages: vec![],
history: vec![],
comments: vec![],
};
assert_eq!(from_html(&document), Ok(expected));
}
#[test]
fn test_0751793483() {
let document = include_str!("../../fixtures/vemringde/0751793483.html");
let expected = Entry {
messages: vec![],
history: vec![],
comments: vec![],
};
assert_eq!(from_html(&document), Ok(expected));
}
#[test]
fn test_0751793499() {
let document = include_str!("../../fixtures/vemringde/0751793499.html");
let expected = Entry {
messages: vec![],
history: vec![],
comments: vec![],
};
assert_eq!(from_html(&document), Ok(expected));
}
}