Dry.
This commit is contained in:
3
.gitignore
vendored
3
.gitignore
vendored
@@ -1,2 +1,3 @@
|
|||||||
/target
|
/target
|
||||||
**/*.rs.bk
|
|
||||||
|
*.pending-snap
|
||||||
|
|||||||
1
Cargo.lock
generated
1
Cargo.lock
generated
@@ -1818,6 +1818,7 @@ dependencies = [
|
|||||||
"fern 0.5.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
"fern 0.5.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"htmlescape 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
"htmlescape 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"insta 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
"insta 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
"lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
"log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
"regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"reqwest 0.9.9 (registry+https://github.com/rust-lang/crates.io-index)",
|
"reqwest 0.9.9 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ chrono-tz = "0.5"
|
|||||||
directories = "1.0"
|
directories = "1.0"
|
||||||
fern = { version = "0.5", features = ["colored"] }
|
fern = { version = "0.5", features = ["colored"] }
|
||||||
htmlescape = "0.3"
|
htmlescape = "0.3"
|
||||||
|
lazy_static = "1.2"
|
||||||
log = "0.4"
|
log = "0.4"
|
||||||
regex = "1.1"
|
regex = "1.1"
|
||||||
reqwest = "0.9"
|
reqwest = "0.9"
|
||||||
|
|||||||
37
src/html.rs
Normal file
37
src/html.rs
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
use std::str;
|
||||||
|
|
||||||
|
use scraper::{ElementRef, Html};
|
||||||
|
|
||||||
|
pub trait SelectExt {
|
||||||
|
fn element(&self) -> ElementRef;
|
||||||
|
|
||||||
|
fn easy_text(&self) -> String {
|
||||||
|
let data = self
|
||||||
|
.element()
|
||||||
|
.text()
|
||||||
|
.map(str::trim)
|
||||||
|
.filter(|s| !s.is_empty())
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.join(" ");
|
||||||
|
|
||||||
|
htmlescape::decode_html(&data).unwrap_or(data)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn easy_inner_html(&self) -> String {
|
||||||
|
let data = self.element().inner_html();
|
||||||
|
|
||||||
|
htmlescape::decode_html(&data).unwrap_or(data)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SelectExt for Html {
|
||||||
|
fn element(&self) -> ElementRef {
|
||||||
|
self.root_element()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> SelectExt for ElementRef<'a> {
|
||||||
|
fn element(&self) -> ElementRef {
|
||||||
|
*self
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,5 +1,6 @@
|
|||||||
mod context;
|
mod context;
|
||||||
pub mod entry;
|
pub mod entry;
|
||||||
|
mod html;
|
||||||
mod probe;
|
mod probe;
|
||||||
|
|
||||||
pub use crate::context::Context;
|
pub use crate::context::Context;
|
||||||
|
|||||||
@@ -1,8 +1,18 @@
|
|||||||
|
use lazy_static::lazy_static;
|
||||||
use scraper::{Html, Selector};
|
use scraper::{Html, Selector};
|
||||||
|
|
||||||
use crate::entry::Entry;
|
use crate::entry::Entry;
|
||||||
|
use crate::html::SelectExt;
|
||||||
use crate::probe::Probe;
|
use crate::probe::Probe;
|
||||||
|
|
||||||
|
lazy_static! {
|
||||||
|
static ref MESSAGE: Selector = Selector::parse(".CompanyResultListItem h3.name > a").unwrap();
|
||||||
|
static ref HISTORY_1: Selector =
|
||||||
|
Selector::parse("div.PhoneNoHit div.search-info-container p").unwrap();
|
||||||
|
static ref HISTORY_2: Selector =
|
||||||
|
Selector::parse("div.feedback-types div.feedback-type-item").unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
fn from_html(document: &str) -> Result<Entry, ()> {
|
fn from_html(document: &str) -> Result<Entry, ()> {
|
||||||
let html = Html::parse_document(document);
|
let html = Html::parse_document(document);
|
||||||
|
|
||||||
@@ -10,40 +20,23 @@ fn from_html(document: &str) -> Result<Entry, ()> {
|
|||||||
let mut history = Vec::new();
|
let mut history = Vec::new();
|
||||||
let comments = Vec::new();
|
let comments = Vec::new();
|
||||||
|
|
||||||
let selector = Selector::parse(".CompanyResultListItem h3.name > a").unwrap();
|
if let Some(message) = html
|
||||||
|
.select(&MESSAGE)
|
||||||
if let Some(element) = html.select(&selector).next() {
|
.next()
|
||||||
let message = element.inner_html();
|
.map(|element| element.easy_text())
|
||||||
let message = htmlescape::decode_html(&message).unwrap();
|
{
|
||||||
|
|
||||||
messages.push(message);
|
messages.push(message);
|
||||||
}
|
}
|
||||||
|
|
||||||
let selector = Selector::parse("div.PhoneNoHit div.search-info-container p").unwrap();
|
if let Some(message) = html
|
||||||
|
.select(&HISTORY_1)
|
||||||
if let Some(element) = html.select(&selector).next() {
|
.next()
|
||||||
let message = element
|
.map(|element| element.easy_text())
|
||||||
.text()
|
{
|
||||||
.map(str::trim)
|
|
||||||
.filter(|s| !s.is_empty())
|
|
||||||
.collect::<Vec<_>>()
|
|
||||||
.join(" ");
|
|
||||||
|
|
||||||
let message = htmlescape::decode_html(&message).unwrap();
|
|
||||||
|
|
||||||
history.push(message);
|
history.push(message);
|
||||||
}
|
}
|
||||||
|
|
||||||
let selector = Selector::parse("div.feedback-types div.feedback-type-item").unwrap();
|
for message in html.select(&HISTORY_2).map(|element| element.easy_text()) {
|
||||||
|
|
||||||
for element in html.select(&selector) {
|
|
||||||
let message = element
|
|
||||||
.text()
|
|
||||||
.map(str::trim)
|
|
||||||
.filter(|s| !s.is_empty())
|
|
||||||
.collect::<Vec<_>>()
|
|
||||||
.join(" ");
|
|
||||||
|
|
||||||
history.push(message);
|
history.push(message);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,7 +1,13 @@
|
|||||||
|
use lazy_static::lazy_static;
|
||||||
use scraper::{Html, Selector};
|
use scraper::{Html, Selector};
|
||||||
|
|
||||||
|
use crate::html::SelectExt;
|
||||||
use crate::probe::{Entry, Probe};
|
use crate::probe::{Entry, Probe};
|
||||||
|
|
||||||
|
lazy_static! {
|
||||||
|
static ref MESSAGE: Selector = Selector::parse(".panel-heading > h1:nth-child(3)").unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
fn from_html(document: &str) -> Result<Entry, ()> {
|
fn from_html(document: &str) -> Result<Entry, ()> {
|
||||||
let html = Html::parse_document(document);
|
let html = Html::parse_document(document);
|
||||||
|
|
||||||
@@ -9,12 +15,11 @@ fn from_html(document: &str) -> Result<Entry, ()> {
|
|||||||
let history = Vec::new();
|
let history = Vec::new();
|
||||||
let comments = Vec::new();
|
let comments = Vec::new();
|
||||||
|
|
||||||
let selector = Selector::parse(".panel-heading > h1:nth-child(3)").unwrap();
|
if let Some(message) = html
|
||||||
|
.select(&MESSAGE)
|
||||||
if let Some(element) = html.select(&selector).next() {
|
.next()
|
||||||
let message = element.inner_html();
|
.map(|element| element.easy_text())
|
||||||
let message = htmlescape::decode_html(&message).unwrap();
|
{
|
||||||
|
|
||||||
messages.push(message);
|
messages.push(message);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,52 +1,51 @@
|
|||||||
use chrono_tz::Europe::Stockholm;
|
use chrono_tz::Europe::Stockholm;
|
||||||
|
use lazy_static::lazy_static;
|
||||||
use scraper::{Html, Selector};
|
use scraper::{Html, Selector};
|
||||||
|
|
||||||
use crate::entry::{Comment, Date, Entry};
|
use crate::entry::{Comment, Date, Entry};
|
||||||
|
use crate::html::SelectExt;
|
||||||
use crate::probe::Probe;
|
use crate::probe::Probe;
|
||||||
|
|
||||||
|
lazy_static! {
|
||||||
|
static ref MESSAGE: Selector = Selector::parse("#content p:nth-child(2) i").unwrap();
|
||||||
|
static ref HISTORY_1: Selector = Selector::parse("#content p:nth-child(4)").unwrap();
|
||||||
|
static ref HISTORY_2: Selector = Selector::parse("#content p:nth-child(5)").unwrap();
|
||||||
|
static ref COMMENTS: Selector =
|
||||||
|
Selector::parse("#kommentarer > [itemtype='http://data-vocabulary.org/Review']").unwrap();
|
||||||
|
static ref COMMENT_DATETIME: Selector = Selector::parse("small").unwrap();
|
||||||
|
static ref COMMENT_TITLE: Selector = Selector::parse("h3").unwrap();
|
||||||
|
static ref COMMENT_MESSAGE: Selector = Selector::parse("[itemprop='description']").unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
fn from_html(document: &str) -> Result<Entry, ()> {
|
fn from_html(document: &str) -> Result<Entry, ()> {
|
||||||
let html = Html::parse_document(document);
|
let html = Html::parse_document(document);
|
||||||
|
|
||||||
let mut messages = Vec::new();
|
let mut messages = Vec::new();
|
||||||
|
let mut history = Vec::new();
|
||||||
|
let mut comments = Vec::new();
|
||||||
|
|
||||||
let selector = Selector::parse("#content p:nth-child(2) i").unwrap();
|
if let Some(element) = html.select(&MESSAGE).next() {
|
||||||
|
|
||||||
if let Some(element) = html.select(&selector).next() {
|
|
||||||
let message = element.inner_html();
|
let message = element.inner_html();
|
||||||
let message = htmlescape::decode_html(&message).unwrap();
|
let message = htmlescape::decode_html(&message).unwrap();
|
||||||
|
|
||||||
messages.push(message);
|
messages.push(message);
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut history = Vec::new();
|
if let Some(message) = html
|
||||||
|
.select(if messages.is_empty() {
|
||||||
let selector = if messages.is_empty() {
|
&HISTORY_1
|
||||||
Selector::parse("#content p:nth-child(4)").unwrap()
|
|
||||||
} else {
|
} else {
|
||||||
Selector::parse("#content p:nth-child(5)").unwrap()
|
&HISTORY_2
|
||||||
};
|
})
|
||||||
|
.next()
|
||||||
if let Some(element) = html.select(&selector).next() {
|
.map(|element| element.easy_text())
|
||||||
let message = element
|
{
|
||||||
.text()
|
|
||||||
.map(str::trim)
|
|
||||||
.filter(|s| !s.is_empty())
|
|
||||||
.collect::<Vec<_>>()
|
|
||||||
.join(" ");
|
|
||||||
|
|
||||||
history.push(message);
|
history.push(message);
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut comments = Vec::new();
|
for comment in html.select(&COMMENTS) {
|
||||||
|
|
||||||
let selector =
|
|
||||||
Selector::parse("#kommentarer > [itemtype='http://data-vocabulary.org/Review']").unwrap();
|
|
||||||
|
|
||||||
for comment in html.select(&selector) {
|
|
||||||
let selector = Selector::parse("small").unwrap();
|
|
||||||
|
|
||||||
let datetime = comment
|
let datetime = comment
|
||||||
.select(&selector)
|
.select(&COMMENT_DATETIME)
|
||||||
.next()
|
.next()
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.value()
|
.value()
|
||||||
@@ -54,20 +53,22 @@ fn from_html(document: &str) -> Result<Entry, ()> {
|
|||||||
.unwrap()
|
.unwrap()
|
||||||
.to_string();
|
.to_string();
|
||||||
|
|
||||||
let selector = Selector::parse("h3").unwrap();
|
let title = comment
|
||||||
|
.select(&COMMENT_TITLE)
|
||||||
|
.next()
|
||||||
|
.map(|element| element.easy_inner_html())
|
||||||
|
.filter(|title| !title.is_empty());
|
||||||
|
|
||||||
let title = comment.select(&selector).next().unwrap().inner_html();
|
let message = comment
|
||||||
let title = htmlescape::decode_html(&title).unwrap();
|
.select(&COMMENT_MESSAGE)
|
||||||
|
.next()
|
||||||
let selector = Selector::parse("[itemprop='description']").unwrap();
|
.map(|element| element.easy_inner_html())
|
||||||
|
.unwrap_or_else(String::new);
|
||||||
let message = comment.select(&selector).next().unwrap().inner_html();
|
|
||||||
let message = htmlescape::decode_html(&message).unwrap();
|
|
||||||
|
|
||||||
comments.push(Comment {
|
comments.push(Comment {
|
||||||
datetime: Date::datetime_from(Stockholm, &datetime, "%Y-%m-%d %H:%M:%S")
|
datetime: Date::datetime_from(Stockholm, &datetime, "%Y-%m-%d %H:%M:%S")
|
||||||
.expect("failed to parse datetime"),
|
.expect("failed to parse datetime"),
|
||||||
title: if title.is_empty() { None } else { Some(title) },
|
title,
|
||||||
message,
|
message,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,11 +1,20 @@
|
|||||||
use std::str;
|
use std::str;
|
||||||
|
|
||||||
use chrono_tz::Europe::Stockholm;
|
use chrono_tz::Europe::Stockholm;
|
||||||
|
use lazy_static::lazy_static;
|
||||||
use scraper::{Html, Selector};
|
use scraper::{Html, Selector};
|
||||||
|
|
||||||
use crate::entry::{Comment, Date, Entry};
|
use crate::entry::{Comment, Date, Entry};
|
||||||
|
use crate::html::SelectExt;
|
||||||
use crate::probe::Probe;
|
use crate::probe::Probe;
|
||||||
|
|
||||||
|
lazy_static! {
|
||||||
|
static ref MESSAGE: Selector = Selector::parse("#toporganisations li").unwrap();
|
||||||
|
static ref COMMENTS: Selector = Selector::parse("#calls ol li").unwrap();
|
||||||
|
static ref COMMENT_DATETIME: Selector = Selector::parse("div:nth-child(4)").unwrap();
|
||||||
|
static ref COMMENT_MESSAGE: Selector = Selector::parse("div:nth-child(3)").unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
fn from_html(document: &str) -> Result<Entry, ()> {
|
fn from_html(document: &str) -> Result<Entry, ()> {
|
||||||
let html = Html::parse_document(document);
|
let html = Html::parse_document(document);
|
||||||
|
|
||||||
@@ -13,43 +22,22 @@ fn from_html(document: &str) -> Result<Entry, ()> {
|
|||||||
let history = Vec::new();
|
let history = Vec::new();
|
||||||
let mut comments = Vec::new();
|
let mut comments = Vec::new();
|
||||||
|
|
||||||
let selector = Selector::parse("#toporganisations li").unwrap();
|
for message in html.select(&MESSAGE).map(|element| element.easy_text()) {
|
||||||
|
|
||||||
for element in html.select(&selector) {
|
|
||||||
let message = element
|
|
||||||
.text()
|
|
||||||
.map(str::trim)
|
|
||||||
.filter(|s| !s.is_empty())
|
|
||||||
.collect::<Vec<_>>()
|
|
||||||
.join(" ");
|
|
||||||
|
|
||||||
messages.push(message);
|
messages.push(message);
|
||||||
}
|
}
|
||||||
|
|
||||||
let selector = Selector::parse("#calls ol li").expect("failed to build selector");
|
for element in html.select(&COMMENTS) {
|
||||||
|
|
||||||
for element in html.select(&selector) {
|
|
||||||
let selector = Selector::parse("div:nth-child(4)").expect("failed to build selector");
|
|
||||||
|
|
||||||
let date = element
|
let date = element
|
||||||
.select(&selector)
|
.select(&COMMENT_DATETIME)
|
||||||
.next()
|
.next()
|
||||||
.expect("failed to find datetime")
|
.map(|element| element.easy_inner_html())
|
||||||
.inner_html();
|
.expect("failed to find datetime");
|
||||||
|
|
||||||
let selector = Selector::parse("div:nth-child(3)").expect("failed to build selector");
|
|
||||||
|
|
||||||
let message = element
|
let message = element
|
||||||
.select(&selector)
|
.select(&COMMENT_MESSAGE)
|
||||||
.next()
|
.next()
|
||||||
.unwrap()
|
.map(|element| element.easy_text())
|
||||||
.text()
|
.unwrap_or_else(String::new);
|
||||||
.map(str::trim)
|
|
||||||
.filter(|s| !s.is_empty())
|
|
||||||
.collect::<Vec<_>>()
|
|
||||||
.join(" ");
|
|
||||||
|
|
||||||
let message = htmlescape::decode_html(&message).unwrap();
|
|
||||||
|
|
||||||
comments.push(Comment {
|
comments.push(Comment {
|
||||||
datetime: Date::date_from(Stockholm, &date, "%Y-%m-%d").expect("failed to parse date"),
|
datetime: Date::date_from(Stockholm, &date, "%Y-%m-%d").expect("failed to parse date"),
|
||||||
|
|||||||
Reference in New Issue
Block a user