Dry.
This commit is contained in:
3
.gitignore
vendored
3
.gitignore
vendored
@@ -1,2 +1,3 @@
|
||||
/target
|
||||
**/*.rs.bk
|
||||
|
||||
*.pending-snap
|
||||
|
||||
1
Cargo.lock
generated
1
Cargo.lock
generated
@@ -1818,6 +1818,7 @@ dependencies = [
|
||||
"fern 0.5.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"htmlescape 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"insta 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"reqwest 0.9.9 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
|
||||
@@ -11,6 +11,7 @@ chrono-tz = "0.5"
|
||||
directories = "1.0"
|
||||
fern = { version = "0.5", features = ["colored"] }
|
||||
htmlescape = "0.3"
|
||||
lazy_static = "1.2"
|
||||
log = "0.4"
|
||||
regex = "1.1"
|
||||
reqwest = "0.9"
|
||||
|
||||
37
src/html.rs
Normal file
37
src/html.rs
Normal file
@@ -0,0 +1,37 @@
|
||||
use std::str;
|
||||
|
||||
use scraper::{ElementRef, Html};
|
||||
|
||||
pub trait SelectExt {
|
||||
fn element(&self) -> ElementRef;
|
||||
|
||||
fn easy_text(&self) -> String {
|
||||
let data = self
|
||||
.element()
|
||||
.text()
|
||||
.map(str::trim)
|
||||
.filter(|s| !s.is_empty())
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ");
|
||||
|
||||
htmlescape::decode_html(&data).unwrap_or(data)
|
||||
}
|
||||
|
||||
fn easy_inner_html(&self) -> String {
|
||||
let data = self.element().inner_html();
|
||||
|
||||
htmlescape::decode_html(&data).unwrap_or(data)
|
||||
}
|
||||
}
|
||||
|
||||
impl SelectExt for Html {
|
||||
fn element(&self) -> ElementRef {
|
||||
self.root_element()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> SelectExt for ElementRef<'a> {
|
||||
fn element(&self) -> ElementRef {
|
||||
*self
|
||||
}
|
||||
}
|
||||
@@ -1,5 +1,6 @@
|
||||
mod context;
|
||||
pub mod entry;
|
||||
mod html;
|
||||
mod probe;
|
||||
|
||||
pub use crate::context::Context;
|
||||
|
||||
@@ -1,8 +1,18 @@
|
||||
use lazy_static::lazy_static;
|
||||
use scraper::{Html, Selector};
|
||||
|
||||
use crate::entry::Entry;
|
||||
use crate::html::SelectExt;
|
||||
use crate::probe::Probe;
|
||||
|
||||
lazy_static! {
|
||||
static ref MESSAGE: Selector = Selector::parse(".CompanyResultListItem h3.name > a").unwrap();
|
||||
static ref HISTORY_1: Selector =
|
||||
Selector::parse("div.PhoneNoHit div.search-info-container p").unwrap();
|
||||
static ref HISTORY_2: Selector =
|
||||
Selector::parse("div.feedback-types div.feedback-type-item").unwrap();
|
||||
}
|
||||
|
||||
fn from_html(document: &str) -> Result<Entry, ()> {
|
||||
let html = Html::parse_document(document);
|
||||
|
||||
@@ -10,40 +20,23 @@ fn from_html(document: &str) -> Result<Entry, ()> {
|
||||
let mut history = Vec::new();
|
||||
let comments = Vec::new();
|
||||
|
||||
let selector = Selector::parse(".CompanyResultListItem h3.name > a").unwrap();
|
||||
|
||||
if let Some(element) = html.select(&selector).next() {
|
||||
let message = element.inner_html();
|
||||
let message = htmlescape::decode_html(&message).unwrap();
|
||||
|
||||
if let Some(message) = html
|
||||
.select(&MESSAGE)
|
||||
.next()
|
||||
.map(|element| element.easy_text())
|
||||
{
|
||||
messages.push(message);
|
||||
}
|
||||
|
||||
let selector = Selector::parse("div.PhoneNoHit div.search-info-container p").unwrap();
|
||||
|
||||
if let Some(element) = html.select(&selector).next() {
|
||||
let message = element
|
||||
.text()
|
||||
.map(str::trim)
|
||||
.filter(|s| !s.is_empty())
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ");
|
||||
|
||||
let message = htmlescape::decode_html(&message).unwrap();
|
||||
|
||||
if let Some(message) = html
|
||||
.select(&HISTORY_1)
|
||||
.next()
|
||||
.map(|element| element.easy_text())
|
||||
{
|
||||
history.push(message);
|
||||
}
|
||||
|
||||
let selector = Selector::parse("div.feedback-types div.feedback-type-item").unwrap();
|
||||
|
||||
for element in html.select(&selector) {
|
||||
let message = element
|
||||
.text()
|
||||
.map(str::trim)
|
||||
.filter(|s| !s.is_empty())
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ");
|
||||
|
||||
for message in html.select(&HISTORY_2).map(|element| element.easy_text()) {
|
||||
history.push(message);
|
||||
}
|
||||
|
||||
|
||||
@@ -1,7 +1,13 @@
|
||||
use lazy_static::lazy_static;
|
||||
use scraper::{Html, Selector};
|
||||
|
||||
use crate::html::SelectExt;
|
||||
use crate::probe::{Entry, Probe};
|
||||
|
||||
lazy_static! {
|
||||
static ref MESSAGE: Selector = Selector::parse(".panel-heading > h1:nth-child(3)").unwrap();
|
||||
}
|
||||
|
||||
fn from_html(document: &str) -> Result<Entry, ()> {
|
||||
let html = Html::parse_document(document);
|
||||
|
||||
@@ -9,12 +15,11 @@ fn from_html(document: &str) -> Result<Entry, ()> {
|
||||
let history = Vec::new();
|
||||
let comments = Vec::new();
|
||||
|
||||
let selector = Selector::parse(".panel-heading > h1:nth-child(3)").unwrap();
|
||||
|
||||
if let Some(element) = html.select(&selector).next() {
|
||||
let message = element.inner_html();
|
||||
let message = htmlescape::decode_html(&message).unwrap();
|
||||
|
||||
if let Some(message) = html
|
||||
.select(&MESSAGE)
|
||||
.next()
|
||||
.map(|element| element.easy_text())
|
||||
{
|
||||
messages.push(message);
|
||||
}
|
||||
|
||||
|
||||
@@ -1,52 +1,51 @@
|
||||
use chrono_tz::Europe::Stockholm;
|
||||
use lazy_static::lazy_static;
|
||||
use scraper::{Html, Selector};
|
||||
|
||||
use crate::entry::{Comment, Date, Entry};
|
||||
use crate::html::SelectExt;
|
||||
use crate::probe::Probe;
|
||||
|
||||
lazy_static! {
|
||||
static ref MESSAGE: Selector = Selector::parse("#content p:nth-child(2) i").unwrap();
|
||||
static ref HISTORY_1: Selector = Selector::parse("#content p:nth-child(4)").unwrap();
|
||||
static ref HISTORY_2: Selector = Selector::parse("#content p:nth-child(5)").unwrap();
|
||||
static ref COMMENTS: Selector =
|
||||
Selector::parse("#kommentarer > [itemtype='http://data-vocabulary.org/Review']").unwrap();
|
||||
static ref COMMENT_DATETIME: Selector = Selector::parse("small").unwrap();
|
||||
static ref COMMENT_TITLE: Selector = Selector::parse("h3").unwrap();
|
||||
static ref COMMENT_MESSAGE: Selector = Selector::parse("[itemprop='description']").unwrap();
|
||||
}
|
||||
|
||||
fn from_html(document: &str) -> Result<Entry, ()> {
|
||||
let html = Html::parse_document(document);
|
||||
|
||||
let mut messages = Vec::new();
|
||||
let mut history = Vec::new();
|
||||
let mut comments = Vec::new();
|
||||
|
||||
let selector = Selector::parse("#content p:nth-child(2) i").unwrap();
|
||||
|
||||
if let Some(element) = html.select(&selector).next() {
|
||||
if let Some(element) = html.select(&MESSAGE).next() {
|
||||
let message = element.inner_html();
|
||||
let message = htmlescape::decode_html(&message).unwrap();
|
||||
|
||||
messages.push(message);
|
||||
}
|
||||
|
||||
let mut history = Vec::new();
|
||||
|
||||
let selector = if messages.is_empty() {
|
||||
Selector::parse("#content p:nth-child(4)").unwrap()
|
||||
if let Some(message) = html
|
||||
.select(if messages.is_empty() {
|
||||
&HISTORY_1
|
||||
} else {
|
||||
Selector::parse("#content p:nth-child(5)").unwrap()
|
||||
};
|
||||
|
||||
if let Some(element) = html.select(&selector).next() {
|
||||
let message = element
|
||||
.text()
|
||||
.map(str::trim)
|
||||
.filter(|s| !s.is_empty())
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ");
|
||||
|
||||
&HISTORY_2
|
||||
})
|
||||
.next()
|
||||
.map(|element| element.easy_text())
|
||||
{
|
||||
history.push(message);
|
||||
}
|
||||
|
||||
let mut comments = Vec::new();
|
||||
|
||||
let selector =
|
||||
Selector::parse("#kommentarer > [itemtype='http://data-vocabulary.org/Review']").unwrap();
|
||||
|
||||
for comment in html.select(&selector) {
|
||||
let selector = Selector::parse("small").unwrap();
|
||||
|
||||
for comment in html.select(&COMMENTS) {
|
||||
let datetime = comment
|
||||
.select(&selector)
|
||||
.select(&COMMENT_DATETIME)
|
||||
.next()
|
||||
.unwrap()
|
||||
.value()
|
||||
@@ -54,20 +53,22 @@ fn from_html(document: &str) -> Result<Entry, ()> {
|
||||
.unwrap()
|
||||
.to_string();
|
||||
|
||||
let selector = Selector::parse("h3").unwrap();
|
||||
let title = comment
|
||||
.select(&COMMENT_TITLE)
|
||||
.next()
|
||||
.map(|element| element.easy_inner_html())
|
||||
.filter(|title| !title.is_empty());
|
||||
|
||||
let title = comment.select(&selector).next().unwrap().inner_html();
|
||||
let title = htmlescape::decode_html(&title).unwrap();
|
||||
|
||||
let selector = Selector::parse("[itemprop='description']").unwrap();
|
||||
|
||||
let message = comment.select(&selector).next().unwrap().inner_html();
|
||||
let message = htmlescape::decode_html(&message).unwrap();
|
||||
let message = comment
|
||||
.select(&COMMENT_MESSAGE)
|
||||
.next()
|
||||
.map(|element| element.easy_inner_html())
|
||||
.unwrap_or_else(String::new);
|
||||
|
||||
comments.push(Comment {
|
||||
datetime: Date::datetime_from(Stockholm, &datetime, "%Y-%m-%d %H:%M:%S")
|
||||
.expect("failed to parse datetime"),
|
||||
title: if title.is_empty() { None } else { Some(title) },
|
||||
title,
|
||||
message,
|
||||
});
|
||||
}
|
||||
|
||||
@@ -1,11 +1,20 @@
|
||||
use std::str;
|
||||
|
||||
use chrono_tz::Europe::Stockholm;
|
||||
use lazy_static::lazy_static;
|
||||
use scraper::{Html, Selector};
|
||||
|
||||
use crate::entry::{Comment, Date, Entry};
|
||||
use crate::html::SelectExt;
|
||||
use crate::probe::Probe;
|
||||
|
||||
lazy_static! {
|
||||
static ref MESSAGE: Selector = Selector::parse("#toporganisations li").unwrap();
|
||||
static ref COMMENTS: Selector = Selector::parse("#calls ol li").unwrap();
|
||||
static ref COMMENT_DATETIME: Selector = Selector::parse("div:nth-child(4)").unwrap();
|
||||
static ref COMMENT_MESSAGE: Selector = Selector::parse("div:nth-child(3)").unwrap();
|
||||
}
|
||||
|
||||
fn from_html(document: &str) -> Result<Entry, ()> {
|
||||
let html = Html::parse_document(document);
|
||||
|
||||
@@ -13,43 +22,22 @@ fn from_html(document: &str) -> Result<Entry, ()> {
|
||||
let history = Vec::new();
|
||||
let mut comments = Vec::new();
|
||||
|
||||
let selector = Selector::parse("#toporganisations li").unwrap();
|
||||
|
||||
for element in html.select(&selector) {
|
||||
let message = element
|
||||
.text()
|
||||
.map(str::trim)
|
||||
.filter(|s| !s.is_empty())
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ");
|
||||
|
||||
for message in html.select(&MESSAGE).map(|element| element.easy_text()) {
|
||||
messages.push(message);
|
||||
}
|
||||
|
||||
let selector = Selector::parse("#calls ol li").expect("failed to build selector");
|
||||
|
||||
for element in html.select(&selector) {
|
||||
let selector = Selector::parse("div:nth-child(4)").expect("failed to build selector");
|
||||
|
||||
for element in html.select(&COMMENTS) {
|
||||
let date = element
|
||||
.select(&selector)
|
||||
.select(&COMMENT_DATETIME)
|
||||
.next()
|
||||
.expect("failed to find datetime")
|
||||
.inner_html();
|
||||
|
||||
let selector = Selector::parse("div:nth-child(3)").expect("failed to build selector");
|
||||
.map(|element| element.easy_inner_html())
|
||||
.expect("failed to find datetime");
|
||||
|
||||
let message = element
|
||||
.select(&selector)
|
||||
.select(&COMMENT_MESSAGE)
|
||||
.next()
|
||||
.unwrap()
|
||||
.text()
|
||||
.map(str::trim)
|
||||
.filter(|s| !s.is_empty())
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ");
|
||||
|
||||
let message = htmlescape::decode_html(&message).unwrap();
|
||||
.map(|element| element.easy_text())
|
||||
.unwrap_or_else(String::new);
|
||||
|
||||
comments.push(Comment {
|
||||
datetime: Date::date_from(Stockholm, &date, "%Y-%m-%d").expect("failed to parse date"),
|
||||
|
||||
Reference in New Issue
Block a user