Definition support.

This commit is contained in:
2019-02-12 13:04:39 +01:00
parent 70f89cc35c
commit 7569896f6b
10 changed files with 404 additions and 20 deletions

28
Cargo.lock generated
View File

@@ -87,9 +87,10 @@ dependencies = [
[[package]] [[package]]
name = "bincode" name = "bincode"
version = "1.0.1" version = "1.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [ dependencies = [
"autocfg 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
"byteorder 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)", "byteorder 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 1.0.87 (registry+https://github.com/rust-lang/crates.io-index)", "serde 1.0.87 (registry+https://github.com/rust-lang/crates.io-index)",
] ]
@@ -1570,6 +1571,15 @@ dependencies = [
"winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
] ]
[[package]]
name = "tinytemplate"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"serde 1.0.87 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_json 1.0.38 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]] [[package]]
name = "tokio" name = "tokio"
version = "0.1.15" version = "0.1.15"
@@ -1673,6 +1683,14 @@ dependencies = [
"tokio-executor 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", "tokio-executor 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
] ]
[[package]]
name = "toml"
version = "0.4.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"serde 1.0.87 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]] [[package]]
name = "try-lock" name = "try-lock"
version = "0.2.2" version = "0.2.2"
@@ -1811,7 +1829,7 @@ dependencies = [
name = "whoareyou" name = "whoareyou"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"bincode 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", "bincode 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"chrono 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", "chrono 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
"chrono-tz 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", "chrono-tz 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
"directories 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", "directories 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -1826,6 +1844,8 @@ dependencies = [
"serde 1.0.87 (registry+https://github.com/rust-lang/crates.io-index)", "serde 1.0.87 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_json 1.0.38 (registry+https://github.com/rust-lang/crates.io-index)", "serde_json 1.0.38 (registry+https://github.com/rust-lang/crates.io-index)",
"structopt 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)", "structopt 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)",
"tinytemplate 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
"toml 0.4.10 (registry+https://github.com/rust-lang/crates.io-index)",
] ]
[[package]] [[package]]
@@ -1886,7 +1906,7 @@ dependencies = [
"checksum backtrace 0.3.13 (registry+https://github.com/rust-lang/crates.io-index)" = "b5b493b66e03090ebc4343eb02f94ff944e0cbc9ac6571491d170ba026741eb5" "checksum backtrace 0.3.13 (registry+https://github.com/rust-lang/crates.io-index)" = "b5b493b66e03090ebc4343eb02f94ff944e0cbc9ac6571491d170ba026741eb5"
"checksum backtrace-sys 0.1.28 (registry+https://github.com/rust-lang/crates.io-index)" = "797c830ac25ccc92a7f8a7b9862bde440715531514594a6154e3d4a54dd769b6" "checksum backtrace-sys 0.1.28 (registry+https://github.com/rust-lang/crates.io-index)" = "797c830ac25ccc92a7f8a7b9862bde440715531514594a6154e3d4a54dd769b6"
"checksum base64 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)" = "0b25d992356d2eb0ed82172f5248873db5560c4721f564b13cb5193bda5e668e" "checksum base64 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)" = "0b25d992356d2eb0ed82172f5248873db5560c4721f564b13cb5193bda5e668e"
"checksum bincode 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "9f2fb9e29e72fd6bc12071533d5dc7664cb01480c59406f656d7ac25c7bd8ff7" "checksum bincode 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "58470ad6460f0b0e89b0df5f17b8bd77ebae26af69dca0bd9ddc8b9e38abb2ff"
"checksum bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "228047a76f468627ca71776ecdebd732a3423081fcf5125585bcd7c49886ce12" "checksum bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "228047a76f468627ca71776ecdebd732a3423081fcf5125585bcd7c49886ce12"
"checksum block-buffer 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a076c298b9ecdb530ed9d967e74a6027d6a7478924520acddcddc24c1c8ab3ab" "checksum block-buffer 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a076c298b9ecdb530ed9d967e74a6027d6a7478924520acddcddc24c1c8ab3ab"
"checksum byte-tools 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "560c32574a12a89ecd91f5e742165893f86e3ab98d21f8ea548658eb9eef5f40" "checksum byte-tools 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "560c32574a12a89ecd91f5e742165893f86e3ab98d21f8ea548658eb9eef5f40"
@@ -2053,6 +2073,7 @@ dependencies = [
"checksum thin-slice 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "8eaa81235c7058867fa8c0e7314f33dcce9c215f535d1913822a2b3f5e289f3c" "checksum thin-slice 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "8eaa81235c7058867fa8c0e7314f33dcce9c215f535d1913822a2b3f5e289f3c"
"checksum thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c6b53e329000edc2b34dbe8545fd20e55a333362d0a321909685a19bd28c3f1b" "checksum thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c6b53e329000edc2b34dbe8545fd20e55a333362d0a321909685a19bd28c3f1b"
"checksum time 0.1.42 (registry+https://github.com/rust-lang/crates.io-index)" = "db8dcfca086c1143c9270ac42a2bbd8a7ee477b78ac8e45b19abfb0cbede4b6f" "checksum time 0.1.42 (registry+https://github.com/rust-lang/crates.io-index)" = "db8dcfca086c1143c9270ac42a2bbd8a7ee477b78ac8e45b19abfb0cbede4b6f"
"checksum tinytemplate 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7655088894274afb52b807bd3c87072daa1fedd155068b8705cabfd628956115"
"checksum tokio 0.1.15 (registry+https://github.com/rust-lang/crates.io-index)" = "e0500b88064f08bebddd0c0bed39e19f5c567a5f30975bee52b0c0d3e2eeb38c" "checksum tokio 0.1.15 (registry+https://github.com/rust-lang/crates.io-index)" = "e0500b88064f08bebddd0c0bed39e19f5c567a5f30975bee52b0c0d3e2eeb38c"
"checksum tokio-current-thread 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "331c8acc267855ec06eb0c94618dcbbfea45bed2d20b77252940095273fb58f6" "checksum tokio-current-thread 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "331c8acc267855ec06eb0c94618dcbbfea45bed2d20b77252940095273fb58f6"
"checksum tokio-executor 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "30c6dbf2d1ad1de300b393910e8a3aa272b724a400b6531da03eed99e329fbf0" "checksum tokio-executor 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "30c6dbf2d1ad1de300b393910e8a3aa272b724a400b6531da03eed99e329fbf0"
@@ -2061,6 +2082,7 @@ dependencies = [
"checksum tokio-tcp 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "1d14b10654be682ac43efee27401d792507e30fd8d26389e1da3b185de2e4119" "checksum tokio-tcp 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "1d14b10654be682ac43efee27401d792507e30fd8d26389e1da3b185de2e4119"
"checksum tokio-threadpool 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "c3fd86cb15547d02daa2b21aadaf4e37dee3368df38a526178a5afa3c034d2fb" "checksum tokio-threadpool 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "c3fd86cb15547d02daa2b21aadaf4e37dee3368df38a526178a5afa3c034d2fb"
"checksum tokio-timer 0.2.10 (registry+https://github.com/rust-lang/crates.io-index)" = "2910970404ba6fa78c5539126a9ae2045d62e3713041e447f695f41405a120c6" "checksum tokio-timer 0.2.10 (registry+https://github.com/rust-lang/crates.io-index)" = "2910970404ba6fa78c5539126a9ae2045d62e3713041e447f695f41405a120c6"
"checksum toml 0.4.10 (registry+https://github.com/rust-lang/crates.io-index)" = "758664fc71a3a69038656bee8b6be6477d2a6c315a6b81f7081f591bffa4111f"
"checksum try-lock 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "e604eb7b43c06650e854be16a2a03155743d3752dd1c943f6829e26b7a36e382" "checksum try-lock 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "e604eb7b43c06650e854be16a2a03155743d3752dd1c943f6829e26b7a36e382"
"checksum typenum 1.10.0 (registry+https://github.com/rust-lang/crates.io-index)" = "612d636f949607bdf9b123b4a6f6d966dedf3ff669f7f045890d3a4a73948169" "checksum typenum 1.10.0 (registry+https://github.com/rust-lang/crates.io-index)" = "612d636f949607bdf9b123b4a6f6d966dedf3ff669f7f045890d3a4a73948169"
"checksum ucd-trie 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "71a9c5b1fe77426cf144cc30e49e955270f5086e31a6441dfa8b32efc09b9d77" "checksum ucd-trie 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "71a9c5b1fe77426cf144cc30e49e955270f5086e31a6441dfa8b32efc09b9d77"

View File

@@ -5,7 +5,7 @@ authors = ["Anders Olsson <anders.e.olsson@gmail.com>"]
edition = "2018" edition = "2018"
[dependencies] [dependencies]
bincode = "1.0" bincode = "1.1"
chrono = { version = "0.4", features = ["serde"] } chrono = { version = "0.4", features = ["serde"] }
chrono-tz = "0.5" chrono-tz = "0.5"
directories = "1.0" directories = "1.0"
@@ -19,6 +19,8 @@ scraper = "0.9"
serde = { version = "1.0", features = ["derive"] } serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0" serde_json = "1.0"
structopt = "0.2" structopt = "0.2"
tinytemplate = "1.0"
toml = "0.4"
[dev-dependencies] [dev-dependencies]
insta = "0.6" insta = "0.6"

11
definitions/eniro.toml Normal file
View File

@@ -0,0 +1,11 @@
name = "eniro.se"
path = "https://gulasidorna.eniro.se/hitta:{ number }"
[[messages]]
selector = ".CompanyResultListItem h3.name > a"
[[history]]
selector = "div.PhoneNoHit div.search-info-container p"
[[history]]
selector = "div.feedback-types div.feedback-type-item"

View File

@@ -0,0 +1,5 @@
name = "konsumentinfo.se"
path = "http://konsumentinfo.se/telefonnummer/sverige/{ number }"
[[messages]]
selector = ".panel-heading > h1:nth-child(3)"

View File

@@ -0,0 +1,29 @@
name = "telefonforsaljare.nu"
path = "http://www.telefonforsaljare.nu/telefonnummer/{ number }/"
[[messages]]
selector = "#content p:nth-child(2) i"
[[history]]
selector = "#content p:nth-child(4)"
[[history]]
selector = "#content p:nth-child(5)"
[[comments]]
selector = "#kommentarer > [itemtype='http://data-vocabulary.org/Review']"
[comments.date_time]
selector = "small"
data = "attr:datetime"
kind = "date_time"
format = "%Y-%m-%d %H:%M:%S"
tz = "Europe/Stockholm"
[comments.title]
selector = "h3"
data = "inner_html"
[comments.message]
selector = "[itemprop='description']"
data = "inner_html"

View File

@@ -0,0 +1,18 @@
name = "vemringde.se"
path = "http://vemringde.se/?q={ number }"
[[messages]]
selector = "#toporganisations li"
[[comments]]
selector = "#calls ol li"
[comments.date_time]
selector = "div:nth-child(4)"
data = "inner_html"
kind = "date"
format = "%Y-%m-%d"
tz = "Europe/Stockholm"
[comments.message]
selector = "div:nth-child(3)"

283
src/definition.rs Normal file
View File

@@ -0,0 +1,283 @@
use std::str;
use chrono_tz::Tz;
use scraper::{ElementRef, Html, Selector};
use serde::{de, Deserialize, Deserializer, Serialize};
use tinytemplate::TinyTemplate;
use crate::entry::{self, Date, Entry};
use crate::probe::Probe;
#[derive(Serialize)]
struct Context {
number: String,
}
#[derive(Debug, Deserialize)]
pub struct Definition {
name: String,
path: String,
messages: Vec<Field>,
#[serde(default)]
history: Vec<Field>,
#[serde(default)]
comments: Vec<Comment>,
}
#[derive(Debug, Deserialize)]
struct Comment {
#[serde(deserialize_with = "deserialize_selector")]
selector: Selector,
#[serde(rename = "date_time")]
datetime: Option<DateTime>,
title: Option<Field>,
message: Option<Field>,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "snake_case")]
struct DateTime {
#[serde(flatten)]
field: Field,
kind: DateTimeKind,
format: String,
#[serde(deserialize_with = "deserialize_tz")]
tz: Tz,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "snake_case")]
enum DateTimeKind {
Date,
DateTime,
}
#[derive(Debug, Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
enum Filter {}
#[derive(Debug, Deserialize)]
struct Field {
#[serde(deserialize_with = "deserialize_selector")]
selector: Selector,
#[serde(default)]
data: Data,
#[serde(default)]
filters: Vec<Filter>,
}
#[derive(Debug)]
enum Data {
Text,
InnerHtml,
Attr { attr: String },
}
impl Data {
fn extract(&self, element: &ElementRef) -> Option<String> {
match self {
Data::Text => Some(
element
.text()
.map(str::trim)
.filter(|s| !s.is_empty())
.collect::<Vec<_>>()
.join(" "),
),
Data::InnerHtml => Some(element.inner_html()),
Data::Attr { attr } => element.value().attr(attr).map(|data| data.to_string()),
}
}
}
impl Default for Data {
fn default() -> Self {
Data::Text
}
}
impl<'de> Deserialize<'de> for Data {
fn deserialize<D>(deserializer: D) -> Result<Data, D::Error>
where
D: Deserializer<'de>,
{
use std::fmt;
use serde::de::{self, Visitor};
struct StrVisitor;
impl<'de> Visitor<'de> for StrVisitor {
type Value = Data;
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
formatter.write_str("an str")
}
fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
where
E: de::Error,
{
match value {
"text" => Ok(Data::Text),
"inner_html" => Ok(Data::InnerHtml),
s if s.starts_with("attr:") => {
let attr = s.splitn(2, ":").nth(1).unwrap();
Ok(Data::Attr {
attr: attr.to_string(),
})
}
_ => Err(E::custom(format!("unknown data type: {}", value))),
}
}
}
deserializer.deserialize_str(StrVisitor)
}
}
impl Probe for Definition {
fn provider(&self) -> &str {
&self.name
}
fn uri(&self, number: &str) -> String {
let mut tt = TinyTemplate::new();
tt.add_template("path", &self.path)
.expect("failed to add path template");
let context = Context {
number: number.to_string(),
};
tt.render("path", &context)
.expect("failed to render path template")
}
fn fetch(&self, number: &str) -> Result<String, ()> {
reqwest::get(&self.uri(number))
.map_err(|_| ())?
.text()
.map_err(|_| ())
}
fn parse(&self, data: &str) -> Result<Entry, ()> {
let html = Html::parse_document(data);
let mut messages = Vec::new();
let mut history = Vec::new();
let mut comments = Vec::new();
for field in &self.messages {
for element in html.select(&field.selector) {
if let Some(data) = field.data.extract(&element) {
messages.push(data);
}
}
}
for field in &self.history {
for element in html.select(&field.selector) {
if let Some(data) = field.data.extract(&element) {
history.push(data);
}
}
}
for comment in &self.comments {
for comments_element in html.select(&comment.selector) {
let mut datetime: Option<Date> = None;
let mut title: Option<String> = None;
let mut message: Option<String> = None;
if let Some(ref datetime_field) = comment.datetime {
for comment_element in comments_element.select(&datetime_field.field.selector) {
if let Some(data) = datetime_field.field.data.extract(&comment_element) {
// for filter in &datetime_field.field.filters {}
let data = match datetime_field.kind {
DateTimeKind::Date => Date::date_from(
datetime_field.tz,
&data,
&datetime_field.format,
)
.expect("failed to parse date"),
DateTimeKind::DateTime => Date::datetime_from(
datetime_field.tz,
&data,
&datetime_field.format,
)
.expect("failed to parse date time"),
};
datetime = Some(data);
}
}
}
if let Some(ref title_field) = comment.title {
for comment_element in comments_element.select(&title_field.selector) {
if let Some(data) = title_field
.data
.extract(&comment_element)
.filter(|data| !data.is_empty())
{
// for filter in &message_field.filters {}
title = Some(data);
}
}
}
if let Some(ref message_field) = comment.message {
for comment_element in comments_element.select(&message_field.selector) {
if let Some(data) = message_field.data.extract(&comment_element) {
// for filter in &message_field.filters {}
message = Some(data);
}
}
}
if datetime.is_some() && message.is_some() {
comments.push(entry::Comment {
datetime: datetime.unwrap(),
title,
message: message.unwrap(),
});
}
}
}
if !messages.is_empty() || !history.is_empty() || !comments.is_empty() {
Ok(Entry {
messages,
history,
comments,
})
} else {
Err(())
}
}
}
fn deserialize_selector<'de, D>(deserializer: D) -> Result<Selector, D::Error>
where
D: Deserializer<'de>,
{
let s = String::deserialize(deserializer)?;
Selector::parse(&s).map_err(|_| de::Error::custom("failed to parse selector"))
}
fn deserialize_tz<'de, D>(deserializer: D) -> Result<Tz, D::Error>
where
D: Deserializer<'de>,
{
let s = String::deserialize(deserializer)?;
s.parse::<Tz>()
.map_err(|_| de::Error::custom("failed to parse tz"))
}

View File

@@ -1,7 +1,9 @@
mod context; mod context;
pub mod definition;
pub mod entry; pub mod entry;
mod html; mod html;
mod probe; mod probe;
pub use crate::context::Context; pub use crate::context::Context;
pub use crate::definition::Definition;
pub use crate::probe::*; pub use crate::probe::*;

View File

@@ -1,3 +1,6 @@
use std::fs;
use std::io::Read;
use std::path::PathBuf;
use std::process::Command; use std::process::Command;
use fern::colors::{Color, ColoredLevelConfig}; use fern::colors::{Color, ColoredLevelConfig};
@@ -13,6 +16,9 @@ struct Opt {
#[structopt(short = "o", long = "open")] #[structopt(short = "o", long = "open")]
open: bool, open: bool,
#[structopt(short = "d", long = "definitions", parse(from_os_str))]
definitions: Vec<PathBuf>,
number: String, number: String,
} }
@@ -52,13 +58,27 @@ fn main() {
config.apply().expect("failed to init fern"); config.apply().expect("failed to init fern");
let mut probes: Vec<Box<dyn Probe>> = vec![ let mut probes: Vec<Box<dyn Probe>> = vec![Box::new(Hitta)];
Box::new(Eniro),
Box::new(Hitta), let mut buffer = Vec::new();
Box::new(KonsumentInfo),
Box::new(Telefonforsaljare), for definition in &opt.definitions {
Box::new(VemRingde), let definition = fs::File::open(&definition)
]; .and_then(|mut file| {
file.read_to_end(&mut buffer)
.expect("failed to read definition file");
let definition: Definition =
toml::from_slice(&buffer).expect("failed to parse definition file");
buffer.clear();
Ok(definition)
})
.expect("failed to open definition file");
probes.push(Box::new(definition));
}
if opt.open { if opt.open {
for probe in &mut probes { for probe in &mut probes {

View File

@@ -1,19 +1,11 @@
use crate::entry::Entry; use crate::entry::Entry;
mod eniro;
mod hitta; mod hitta;
mod konsument_info;
mod telefonforsaljare;
mod vem_ringde;
pub use self::eniro::Eniro;
pub use self::hitta::Hitta; pub use self::hitta::Hitta;
pub use self::konsument_info::KonsumentInfo;
pub use self::telefonforsaljare::Telefonforsaljare;
pub use self::vem_ringde::VemRingde;
pub trait Probe { pub trait Probe {
fn provider(&self) -> &'static str; fn provider(&self) -> &str;
fn uri(&self, _: &str) -> String; fn uri(&self, _: &str) -> String;
fn fetch(&self, _: &str) -> Result<String, ()>; fn fetch(&self, _: &str) -> Result<String, ()>;