Added test, but broke stuff.
This commit is contained in:
44
src/probe.rs
44
src/probe.rs
@@ -1,3 +1,5 @@
|
||||
use std::fmt;
|
||||
|
||||
mod eniro;
|
||||
mod hitta;
|
||||
mod konsument_info;
|
||||
@@ -12,6 +14,48 @@ pub use self::vem_ringde::VemRingde;
|
||||
|
||||
use crate::context::Context;
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct Entry {
|
||||
pub messages: Vec<String>,
|
||||
pub history: Vec<String>,
|
||||
pub comments: Vec<Comment>,
|
||||
}
|
||||
|
||||
impl fmt::Display for Entry {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
if !self.messages.is_empty() {
|
||||
for message in &self.messages {
|
||||
writeln!(f, " {}", message)?;
|
||||
}
|
||||
}
|
||||
|
||||
if !self.history.is_empty() {
|
||||
for history in &self.history {
|
||||
writeln!(f, " {}", history)?;
|
||||
}
|
||||
}
|
||||
|
||||
if !self.comments.is_empty() {
|
||||
for comment in &self.comments {
|
||||
writeln!(
|
||||
f,
|
||||
" * {}: {} - {}",
|
||||
comment.datetime, comment.title, comment.message
|
||||
)?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct Comment {
|
||||
pub datetime: String,
|
||||
pub title: String,
|
||||
pub message: String,
|
||||
}
|
||||
|
||||
pub trait Probe {
|
||||
fn uri(&self, _: &str) -> String;
|
||||
fn search(&mut self, _: &mut Context, _: &str) -> Result<(), ()>;
|
||||
|
||||
@@ -1,21 +1,29 @@
|
||||
use unhtml::FromHtml;
|
||||
use unhtml_derive::FromHtml;
|
||||
use scraper::{Html, Selector};
|
||||
|
||||
use crate::context::Context;
|
||||
use crate::probe::Probe;
|
||||
use crate::probe::{Entry, Probe};
|
||||
|
||||
#[derive(Debug, FromHtml)]
|
||||
#[html(selector = ".CompanyResultListItem")]
|
||||
struct Company {
|
||||
#[html(selector = "h3.name > a", attr = "inner")]
|
||||
name: String,
|
||||
}
|
||||
fn from_html(document: &str) -> Result<Entry, ()> {
|
||||
let html = Html::parse_document(document);
|
||||
|
||||
#[derive(Debug, FromHtml)]
|
||||
#[html(selector = ".PhoneNoHit")]
|
||||
struct Error {
|
||||
#[html(selector = ".search-info-container > p", attr = "inner")]
|
||||
message: String,
|
||||
let mut messages = Vec::new();
|
||||
let history = Vec::new();
|
||||
let comments = Vec::new();
|
||||
|
||||
let selector = Selector::parse(".CompanyResultListItem h3.name > a").unwrap();
|
||||
|
||||
if let Some(element) = html.select(&selector).next() {
|
||||
let message = element.inner_html();
|
||||
let message = htmlescape::decode_html(&message).unwrap();
|
||||
|
||||
messages.push(message);
|
||||
}
|
||||
|
||||
Ok(Entry {
|
||||
messages,
|
||||
history,
|
||||
comments,
|
||||
})
|
||||
}
|
||||
|
||||
pub struct Eniro;
|
||||
@@ -37,18 +45,32 @@ impl Probe for Eniro {
|
||||
body
|
||||
};
|
||||
|
||||
if let Ok(company) = Company::from_html(&body) {
|
||||
println!("eniro.se:");
|
||||
println!(" {}", company.name);
|
||||
match from_html(&body) {
|
||||
Ok(entry) => {
|
||||
println!("eniro.se:");
|
||||
print!("{}", entry);
|
||||
|
||||
Ok(())
|
||||
} else if let Ok(error) = Error::from_html(&body) {
|
||||
println!("eniro.se:");
|
||||
println!(" Antal sökningar på det här numret: {}", error.message);
|
||||
|
||||
Ok(())
|
||||
} else {
|
||||
Err(())
|
||||
Ok(())
|
||||
}
|
||||
Err(_) => Err(()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_0702269893() {
|
||||
let document = include_str!("../../fixtures/eniro/0702269893.html");
|
||||
|
||||
let expected = Entry {
|
||||
messages: vec!["Anonym Kund För Refill".to_string()],
|
||||
history: vec![],
|
||||
comments: vec![],
|
||||
};
|
||||
|
||||
assert_eq!(from_html(&document), Ok(expected));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,7 +3,7 @@ use regex::Regex;
|
||||
use serde::Deserialize;
|
||||
|
||||
use crate::context::Context;
|
||||
use crate::probe::Probe;
|
||||
use crate::probe::{self, Entry, Probe};
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
@@ -41,6 +41,46 @@ struct Comment {
|
||||
timestamp: u64,
|
||||
}
|
||||
|
||||
fn from_html(document: &str) -> Result<Entry, ()> {
|
||||
let re = Regex::new(r#"<script>__NEXT_DATA__ = (.*?);__NEXT_LOADED_PAGES__"#).unwrap();
|
||||
|
||||
let result = re.captures(&document).ok_or_else(|| {
|
||||
debug!("Hitta: failed to find __NEXT_DATA__");
|
||||
})?;
|
||||
|
||||
let json = result.get(1).unwrap().as_str();
|
||||
|
||||
if let Ok(data) = serde_json::from_str::<Data>(&json) {
|
||||
let messages = Vec::new();
|
||||
let mut history = Vec::new();
|
||||
let mut comments = Vec::new();
|
||||
|
||||
if let Some(phone_data) = data.props.page_props.phone_data {
|
||||
history.push(phone_data.statistics_text);
|
||||
|
||||
for comment in phone_data.comments {
|
||||
comments.push(probe::Comment {
|
||||
datetime: "".to_string(),
|
||||
title: "".to_string(),
|
||||
message: comment.comment,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Entry {
|
||||
messages,
|
||||
history,
|
||||
comments,
|
||||
})
|
||||
} else {
|
||||
if let Err(error) = serde_json::from_str::<Data>(&json) {
|
||||
debug!("Hitta: failed to deserialize data: {:#?}", error);
|
||||
}
|
||||
|
||||
Err(())
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Hitta;
|
||||
|
||||
impl Probe for Hitta {
|
||||
@@ -60,36 +100,32 @@ impl Probe for Hitta {
|
||||
body
|
||||
};
|
||||
|
||||
let re = Regex::new(r#"<script>__NEXT_DATA__ = (.*?);__NEXT_LOADED_PAGES__"#).unwrap();
|
||||
|
||||
if let Some(result) = re.captures(&body) {
|
||||
let json = result.get(1).unwrap().as_str();
|
||||
|
||||
if let Ok(data) = serde_json::from_str::<Data>(&json) {
|
||||
match from_html(&body) {
|
||||
Ok(entry) => {
|
||||
println!("hitta.se:");
|
||||
|
||||
if let Some(phone_data) = data.props.page_props.phone_data {
|
||||
println!(" {}", phone_data.statistics_text);
|
||||
|
||||
for comment in &phone_data.comments {
|
||||
println!(" * {}", comment.comment);
|
||||
}
|
||||
} else {
|
||||
println!(" Vi hittar det mesta, men inte just den här sidan.");
|
||||
}
|
||||
print!("{}", entry);
|
||||
|
||||
Ok(())
|
||||
} else {
|
||||
if let Err(error) = serde_json::from_str::<Data>(&json) {
|
||||
debug!("Hitta: failed to deserialize data: {:#?}", error);
|
||||
}
|
||||
|
||||
Err(())
|
||||
}
|
||||
} else {
|
||||
debug!("Hitta: failed to find __NEXT_DATA__");
|
||||
|
||||
Err(())
|
||||
Err(_) => Err(()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_0702269893() {
|
||||
let document = include_str!("../../fixtures/hitta/0702269893.html");
|
||||
|
||||
let expected = Entry {
|
||||
messages: vec![],
|
||||
history: vec!["Tre andra har också sökt på detta nummer".to_string()],
|
||||
comments: vec![],
|
||||
};
|
||||
|
||||
assert_eq!(from_html(&document), Ok(expected));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,21 +1,29 @@
|
||||
use unhtml::FromHtml;
|
||||
use unhtml_derive::FromHtml;
|
||||
use scraper::{Html, Selector};
|
||||
|
||||
use crate::context::Context;
|
||||
use crate::probe::Probe;
|
||||
use crate::probe::{Entry, Probe};
|
||||
|
||||
#[derive(Debug, FromHtml)]
|
||||
#[html(selector = ".panel-body")]
|
||||
struct Info {
|
||||
#[html(selector = "h4", attr = "inner")]
|
||||
message: String,
|
||||
}
|
||||
fn from_html(document: &str) -> Result<Entry, ()> {
|
||||
let html = Html::parse_document(document);
|
||||
|
||||
#[derive(Debug, FromHtml)]
|
||||
#[html(selector = ".body-content > .row")]
|
||||
struct Error {
|
||||
#[html(selector = ".col-md-12", attr = "inner")]
|
||||
message: String,
|
||||
let mut messages = Vec::new();
|
||||
let history = Vec::new();
|
||||
let comments = Vec::new();
|
||||
|
||||
let selector = Selector::parse(".panel-heading > h1:nth-child(3)").unwrap();
|
||||
|
||||
if let Some(element) = html.select(&selector).next() {
|
||||
let message = element.inner_html();
|
||||
let message = htmlescape::decode_html(&message).unwrap();
|
||||
|
||||
messages.push(message);
|
||||
}
|
||||
|
||||
Ok(Entry {
|
||||
messages,
|
||||
history,
|
||||
comments,
|
||||
})
|
||||
}
|
||||
|
||||
pub struct KonsumentInfo;
|
||||
@@ -37,16 +45,32 @@ impl Probe for KonsumentInfo {
|
||||
body
|
||||
};
|
||||
|
||||
println!("konsumentinfo.se:");
|
||||
match from_html(&body) {
|
||||
Ok(entry) => {
|
||||
println!("konsumentinfo.se:");
|
||||
print!("{}", entry);
|
||||
|
||||
if let Ok(info) = Info::from_html(&body) {
|
||||
println!(" {}", info.message);
|
||||
} else if let Ok(error) = Error::from_html(&body) {
|
||||
println!(" {}", error.message);
|
||||
} else {
|
||||
println!(" Failed to find any data");
|
||||
Ok(())
|
||||
}
|
||||
Err(_) => Err(()),
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_0702269893() {
|
||||
let document = include_str!("../../fixtures/konsumentinfo/0702269893.html");
|
||||
|
||||
let expected = Entry {
|
||||
messages: vec!["Hydroscand AB".to_string()],
|
||||
history: vec![],
|
||||
comments: vec![],
|
||||
};
|
||||
|
||||
assert_eq!(from_html(&document), Ok(expected));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,39 +1,70 @@
|
||||
use log::debug;
|
||||
use unhtml::{self, FromHtml, VecFromHtml};
|
||||
use unhtml_derive::FromHtml;
|
||||
use scraper::{Html, Selector};
|
||||
|
||||
use crate::context::Context;
|
||||
use crate::probe::Probe;
|
||||
use crate::probe::{Comment, Entry, Probe};
|
||||
|
||||
#[derive(Debug, FromHtml)]
|
||||
#[html(selector = "article")]
|
||||
struct Page {
|
||||
content: Content,
|
||||
fn from_html(document: &str) -> Result<Entry, ()> {
|
||||
let html = Html::parse_document(document);
|
||||
|
||||
#[html(selector = "#kommentarer > [itemtype='http://data-vocabulary.org/Review']")]
|
||||
comments: Vec<Comment>,
|
||||
}
|
||||
let mut messages = Vec::new();
|
||||
|
||||
#[derive(Debug, FromHtml)]
|
||||
#[html(selector = "#content")]
|
||||
struct Content {
|
||||
#[html(selector = "p:nth-child(2)", attr = "inner", default = "")]
|
||||
title: String,
|
||||
let selector = Selector::parse("#content p:nth-child(2) i").unwrap();
|
||||
|
||||
#[html(selector = "p:nth-child(4)", attr = "inner")]
|
||||
history: String,
|
||||
}
|
||||
if let Some(element) = html.select(&selector).next() {
|
||||
let message = element.inner_html();
|
||||
let message = htmlescape::decode_html(&message).unwrap();
|
||||
|
||||
#[derive(Debug, FromHtml)]
|
||||
struct Comment {
|
||||
#[html(selector = "small", attr = "datetime")]
|
||||
datetime: String,
|
||||
messages.push(message);
|
||||
}
|
||||
|
||||
#[html(selector = "h3", attr = "inner")]
|
||||
title: String,
|
||||
let mut history = Vec::new();
|
||||
|
||||
#[html(selector = "[itemprop='description']", attr = "inner")]
|
||||
comment: String,
|
||||
let selector = Selector::parse("#content p:nth-child(5)").unwrap();
|
||||
|
||||
if let Some(element) = html.select(&selector).next() {
|
||||
history.push(element.inner_html());
|
||||
}
|
||||
|
||||
let mut comments = Vec::new();
|
||||
|
||||
let selector =
|
||||
Selector::parse("#kommentarer > [itemtype='http://data-vocabulary.org/Review']").unwrap();
|
||||
|
||||
for comment in html.select(&selector) {
|
||||
let selector = Selector::parse("small").unwrap();
|
||||
|
||||
let datetime = comment
|
||||
.select(&selector)
|
||||
.next()
|
||||
.unwrap()
|
||||
.value()
|
||||
.attr("datetime")
|
||||
.unwrap()
|
||||
.to_string();
|
||||
|
||||
let selector = Selector::parse("h3").unwrap();
|
||||
|
||||
let title = comment.select(&selector).next().unwrap().inner_html();
|
||||
let title = htmlescape::decode_html(&title).unwrap();
|
||||
|
||||
let selector = Selector::parse("[itemprop='description']").unwrap();
|
||||
|
||||
let message = comment.select(&selector).next().unwrap().inner_html();
|
||||
let message = htmlescape::decode_html(&message).unwrap();
|
||||
|
||||
comments.push(Comment {
|
||||
datetime,
|
||||
title,
|
||||
message,
|
||||
});
|
||||
}
|
||||
|
||||
Ok(Entry {
|
||||
messages,
|
||||
history,
|
||||
comments,
|
||||
})
|
||||
}
|
||||
|
||||
pub struct Telefonforsaljare;
|
||||
@@ -57,27 +88,41 @@ impl Probe for Telefonforsaljare {
|
||||
|
||||
println!("telefonforsaljare.nu:");
|
||||
|
||||
if let Ok(page) = Page::from_html(&body) {
|
||||
if !page.content.title.is_empty() {
|
||||
println!(" {}", page.content.title);
|
||||
match from_html(&body) {
|
||||
Ok(entry) => {
|
||||
print!("{}", entry);
|
||||
}
|
||||
Err(_) => {
|
||||
debug!("telefonforsaljare: failed to parse page");
|
||||
|
||||
println!(" {}", page.content.history);
|
||||
|
||||
for comment in &page.comments {
|
||||
println!(
|
||||
" * {}: {} - {}",
|
||||
comment.datetime, comment.title, comment.comment
|
||||
);
|
||||
println!(" Failed to find any data");
|
||||
}
|
||||
} else {
|
||||
if let Err(error) = Page::from_html(&body) {
|
||||
debug!("telefonforsaljare: failed to parse page: {:#?}", error);
|
||||
}
|
||||
|
||||
println!(" Failed to find any data");
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_0702269893() {
|
||||
let document = include_str!("../../fixtures/telefonforsaljare/0702269893.html");
|
||||
|
||||
let expected = Entry {
|
||||
messages: vec!["Alnö Design & Produktion AB".to_string()],
|
||||
history: vec!["De senaste 24 timmarna har <strong>3 personer</strong> sökt efter numret 0702269893. Det kan tyda på att numret används av telefonförsäljare. Totalt har minst <strong>4 personer</strong> sökt efter numret.".to_string()],
|
||||
comments: vec![
|
||||
Comment {
|
||||
datetime: "2019-01-18 14:30:55".to_string(),
|
||||
title: "Alnö Design & Produktion AB".to_string(),
|
||||
message: "Renhållning, service, kemprodukter".to_string(),
|
||||
}
|
||||
],
|
||||
};
|
||||
|
||||
assert_eq!(from_html(&document), Ok(expected));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,36 +1,21 @@
|
||||
use log::debug;
|
||||
use unhtml::{self, FromHtml, VecFromHtml};
|
||||
use unhtml_derive::FromHtml;
|
||||
use scraper::{Html, Selector};
|
||||
|
||||
use crate::context::Context;
|
||||
use crate::probe::Probe;
|
||||
use crate::probe::{Comment, Entry, Probe};
|
||||
|
||||
#[derive(Debug, FromHtml)]
|
||||
#[html(selector = "#content")]
|
||||
struct Page {
|
||||
#[html(selector = "#toporganisations > li")]
|
||||
owners: Vec<Owner>,
|
||||
fn from_html(document: &str) -> Result<Entry, ()> {
|
||||
let _html = Html::parse_document(document);
|
||||
|
||||
#[html(selector = "#calls > ol.table > li")]
|
||||
calls: Vec<Call>,
|
||||
}
|
||||
let messages = Vec::new();
|
||||
let history = Vec::new();
|
||||
let comments = Vec::new();
|
||||
|
||||
#[derive(Debug, FromHtml)]
|
||||
struct Owner {
|
||||
#[html(selector = "a", attr = "inner")]
|
||||
title: String,
|
||||
|
||||
#[html(selector = "span", attr = "inner")]
|
||||
calls: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, FromHtml)]
|
||||
struct Call {
|
||||
#[html(selector = ".w40", attr = "inner")]
|
||||
who: String,
|
||||
|
||||
#[html(selector = ".w15", attr = "inner")]
|
||||
date: String,
|
||||
Ok(Entry {
|
||||
messages,
|
||||
history,
|
||||
comments,
|
||||
})
|
||||
}
|
||||
|
||||
pub struct VemRingde;
|
||||
@@ -52,8 +37,17 @@ impl Probe for VemRingde {
|
||||
body
|
||||
};
|
||||
|
||||
println!("vemringde.se:");
|
||||
match from_html(&body) {
|
||||
Ok(entry) => {
|
||||
println!("vemringde.se:");
|
||||
print!("{}", entry);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
Err(_) => Err(()),
|
||||
}
|
||||
|
||||
/*
|
||||
if let Ok(page) = Page::from_html(&body) {
|
||||
if !page.owners.is_empty() {
|
||||
println!(" ägare:");
|
||||
@@ -85,7 +79,24 @@ impl Probe for VemRingde {
|
||||
|
||||
println!(" Failed to find any data");
|
||||
}
|
||||
|
||||
Err(())
|
||||
*/
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_0702269893() {
|
||||
let document = include_str!("../../fixtures/vemringde/0702269893.html");
|
||||
|
||||
let expected = Entry {
|
||||
messages: vec![],
|
||||
history: vec![],
|
||||
comments: vec![],
|
||||
};
|
||||
|
||||
assert_eq!(from_html(&document), Ok(expected));
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user