From d056b52e71754570f42c01780e9d650615b8a596 Mon Sep 17 00:00:00 2001 From: Anders Olsson Date: Thu, 17 Jan 2019 11:53:56 +0100 Subject: [PATCH] Added some basic logging. Fix cache invalidation (1 day for now). --- Cargo.lock | 39 ++++++++++++++++++++++++++++++ Cargo.toml | 5 +++- src/context.rs | 35 +++++++++++++++++++++++++-- src/main.rs | 44 ++++++++++++++++++++++++++++++---- src/probe.rs | 1 + src/probe/eniro.rs | 24 ++++++++++--------- src/probe/hitta.rs | 18 +++++++------- src/probe/konsument_info.rs | 21 ++++++++-------- src/probe/telefonforsaljare.rs | 21 ++++++++-------- src/probe/vem_ringde.rs | 20 +++++++++------- 10 files changed, 171 insertions(+), 57 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f942d43..16e5ae0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -118,6 +118,17 @@ name = "cfg-if" version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "chrono" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "num-integer 0.1.39 (registry+https://github.com/rust-lang/crates.io-index)", + "num-traits 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)", + "serde 1.0.84 (registry+https://github.com/rust-lang/crates.io-index)", + "time 0.1.42 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "clap" version = "2.32.0" @@ -290,6 +301,14 @@ dependencies = [ "synstructure 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "fern" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "fnv" version = "1.0.6" @@ -660,6 +679,19 @@ name = "nodrop" version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "num-integer" +version = "0.1.39" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "num-traits 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "num-traits" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "num_cpus" version = "1.9.0" @@ -1546,7 +1578,10 @@ name = "whoareyou" version = "0.1.0" dependencies = [ "bincode 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "chrono 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", "directories 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", + "fern 0.5.7 (registry+https://github.com/rust-lang/crates.io-index)", + "log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "reqwest 0.9.8 (registry+https://github.com/rust-lang/crates.io-index)", "scraper 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", @@ -1612,6 +1647,7 @@ dependencies = [ "checksum bytes 0.4.11 (registry+https://github.com/rust-lang/crates.io-index)" = "40ade3d27603c2cb345eb0912aec461a6dec7e06a4ae48589904e808335c7afa" "checksum cc 1.0.28 (registry+https://github.com/rust-lang/crates.io-index)" = "bb4a8b715cb4597106ea87c7c84b2f1d452c7492033765df7f32651e66fcf749" "checksum cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "082bb9b28e00d3c9d39cc03e64ce4cea0f1bb9b3fde493f0cbc008472d22bdf4" +"checksum chrono 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)" = "45912881121cb26fad7c38c17ba7daa18764771836b34fab7d3fbd93ed633878" "checksum clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b957d88f4b6a63b9d70d5f454ac8011819c6efa7727858f458ab71c756ce2d3e" "checksum cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f" "checksum core-foundation 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "286e0b41c3a20da26536c6000a280585d519fd07b3956b43aed8a79e9edce980" @@ -1630,6 +1666,7 @@ dependencies = [ "checksum encoding_rs 0.8.14 (registry+https://github.com/rust-lang/crates.io-index)" = "a69d152eaa438a291636c1971b0a370212165ca8a75759eb66818c5ce9b538f7" "checksum failure 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "795bd83d3abeb9220f257e597aa0080a508b27533824adf336529648f6abf7e2" "checksum failure_derive 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "ea1063915fd7ef4309e222a5a07cf9c319fb9c7836b1f89b85458672dbb127e1" +"checksum fern 0.5.7 (registry+https://github.com/rust-lang/crates.io-index)" = "b48af88aaf938b11baef948a5599e66e709cf92854aa2b87c71f1bcf20f80a01" "checksum fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "2fad85553e09a6f881f739c29f0b00b0f01357c743266d478b68951ce23285f3" "checksum foreign-types 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" "checksum foreign-types-shared 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" @@ -1671,6 +1708,8 @@ dependencies = [ "checksum net2 0.2.33 (registry+https://github.com/rust-lang/crates.io-index)" = "42550d9fb7b6684a6d404d9fa7250c2eb2646df731d1c06afc06dcee9e1bcf88" "checksum new_debug_unreachable 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "0cdc457076c78ab54d5e0d6fa7c47981757f1e34dc39ff92787f217dede586c4" "checksum nodrop 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)" = "2f9667ddcc6cc8a43afc9b7917599d7216aa09c463919ea32c59ed6cac8bc945" +"checksum num-integer 0.1.39 (registry+https://github.com/rust-lang/crates.io-index)" = "e83d528d2677f0518c570baf2b7abdcf0cd2d248860b68507bdcb3e91d4c0cea" +"checksum num-traits 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "0b3a5d7cc97d6d30d8b9bc8fa19bf45349ffe46241e8816f50f62f6d6aaabee1" "checksum num_cpus 1.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "5a69d464bdc213aaaff628444e99578ede64e9c854025aa43b9796530afa9238" "checksum openssl 0.10.16 (registry+https://github.com/rust-lang/crates.io-index)" = "ec7bd7ca4cce6dbdc77e7c1230682740d307d1218a87fb0349a571272be749f9" "checksum openssl-probe 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "77af24da69f9d9341038eba93a073b1fdaaa1b788221b00a69bce9e762cb32de" diff --git a/Cargo.toml b/Cargo.toml index 7f7c8a1..47a5429 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,7 +6,10 @@ edition = "2018" [dependencies] bincode = "1.0" +chrono = { version = "0.4", features = ["serde"] } directories = "1.0" +fern = "0.5" +log = "0.4" regex = "1.1" reqwest = "0.9" scraper = "0.9" @@ -14,4 +17,4 @@ serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" structopt = "0.2" unhtml = "0.4" -unhtml_derive = "0.4" \ No newline at end of file +unhtml_derive = "0.4" diff --git a/src/context.rs b/src/context.rs index b2a6c1b..89e8a58 100644 --- a/src/context.rs +++ b/src/context.rs @@ -1,12 +1,15 @@ use std::fs; use std::io; +use chrono::prelude::*; +use chrono::Duration; use directories::ProjectDirs; +use log::debug; use serde::{Deserialize, Serialize}; #[derive(Serialize, Deserialize)] pub struct Cache { - timestamp: u64, + timestamp: DateTime, pub data: Vec, } @@ -25,14 +28,37 @@ impl Context { let cache = self.dirs.cache_dir().join(format!("{}-{}.bin", bin, key)); if cache.exists() { + debug!("cache: bin={} key={} path={:?} exists", bin, key, cache); + fs::File::open(cache) .and_then(|file| { bincode::deserialize_from(&file).map_err(|_| { + debug!("cache: bin={} key={} faild to deserialize", bin, key); + io::Error::new(io::ErrorKind::Other, "failed to deserialize cache entry") }) }) + .and_then(|cache: Cache| { + if cache.timestamp > Utc::now() { + debug!("cache: bin={} key={} ok", bin, key); + + Ok(cache) + } else { + debug!( + "cache: bin={} key={} outdated ({})", + bin, key, cache.timestamp + ); + + Err(io::Error::new( + io::ErrorKind::Other, + "failed to deserialize cache entry", + )) + } + }) .ok() } else { + debug!("cache: bin={} key={} don't exists", bin, key); + None } } @@ -42,7 +68,7 @@ impl Context { D: AsRef<[u8]>, { let entry = Cache { - timestamp: 0, + timestamp: Utc::now() + Duration::days(1), data: data.as_ref().to_vec(), }; @@ -54,6 +80,11 @@ impl Context { let cache = cache.join(format!("{}-{}.bin", bin, key)); + debug!( + "cache: save: bin={} key={} path={:?} timestamp={}", + bin, key, cache, entry.timestamp + ); + fs::OpenOptions::new() .create(true) .write(true) diff --git a/src/main.rs b/src/main.rs index 25dbc56..d8f8dc7 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,3 +1,5 @@ +use std::process::Command; + use structopt::StructOpt; mod context; @@ -9,12 +11,35 @@ use crate::probe::*; #[derive(Debug, StructOpt)] #[structopt(name = "whoareyou", about = "Search for swedish phone numbers.")] struct Opt { + #[structopt(short = "o", long = "open")] + open: bool, + number: String, } fn main() { let opt = Opt::from_args(); + fern::Dispatch::new() + .format(|out, message, record| { + out.finish(format_args!( + "{}[{}][{}] {}", + chrono::Local::now().format("[%Y-%m-%d %H:%M:%S]"), + record.target(), + record.level(), + message + )) + }) + .level(log::LevelFilter::Off) + .level_for("reqwest", log::LevelFilter::Off) + .level_for("hyper", log::LevelFilter::Off) + .level_for("tokio_reactor", log::LevelFilter::Off) + .level_for("html5ever", log::LevelFilter::Off) + .level_for("selectors", log::LevelFilter::Off) + .chain(std::io::stdout()) + .apply() + .expect("failed to init fern"); + let mut probes: Vec> = vec![ Box::new(Eniro), Box::new(Hitta), @@ -23,11 +48,22 @@ fn main() { Box::new(VemRingde), ]; - let mut ctx = Context::new(); + if opt.open { + for probe in &mut probes { + let uri = probe.uri(&opt.number); - for probe in &mut probes { - if probe.search(&mut ctx, &opt.number).is_ok() { - println!(); + Command::new("open") + .arg(uri) + .output() + .expect("failed to execute process"); + } + } else { + let mut ctx = Context::new(); + + for probe in &mut probes { + if probe.search(&mut ctx, &opt.number).is_ok() { + println!(); + } } } } diff --git a/src/probe.rs b/src/probe.rs index e6999b0..086756e 100644 --- a/src/probe.rs +++ b/src/probe.rs @@ -13,5 +13,6 @@ pub use self::vem_ringde::VemRingde; use crate::context::Context; pub trait Probe { + fn uri(&self, _: &str) -> String; fn search(&mut self, _: &mut Context, _: &str) -> Result<(), ()>; } diff --git a/src/probe/eniro.rs b/src/probe/eniro.rs index 2debeb8..06ea38f 100644 --- a/src/probe/eniro.rs +++ b/src/probe/eniro.rs @@ -5,32 +5,34 @@ use crate::context::Context; use crate::probe::Probe; #[derive(Debug, FromHtml)] -#[html(selector = ".error-box")] +#[html(selector = ".PhoneNoHit")] struct Error { - #[html(selector = "h2", attr = "inner")] + #[html(selector = ".search-info-container > p", attr = "inner")] message: String, } -// https://gulasidorna.eniro.se/hitta:{} pub struct Eniro; impl Probe for Eniro { + fn uri(&self, number: &str) -> String { + format!("https://gulasidorna.eniro.se/hitta:{}", number) + } + fn search(&mut self, ctx: &mut Context, number: &str) -> Result<(), ()> { let body = if let Some(cache) = ctx.cache_get("eniro", &number) { String::from_utf8(cache.data).unwrap() } else { - reqwest::get(&format!("https://gulasidorna.eniro.se/hitta:{}", number)) - .unwrap() - .text() - .unwrap() - }; + let body = reqwest::get(&self.uri(number)).unwrap().text().unwrap(); - ctx.cache_set("eniro", &number, body.as_bytes()) - .expect("wut?! why not?!"); + ctx.cache_set("eniro", &number, body.as_bytes()) + .expect("wut?! why not?!"); + + body + }; if let Ok(error) = Error::from_html(&body) { println!("eniro.se:"); - println!(" {}", error.message); + println!(" Antal sökningar på det här numret: {}", error.message); Ok(()) } else { diff --git a/src/probe/hitta.rs b/src/probe/hitta.rs index dff7c16..bcc993a 100644 --- a/src/probe/hitta.rs +++ b/src/probe/hitta.rs @@ -39,22 +39,24 @@ struct Comment { timestamp: u64, } -// https://www.hitta.se/vem-ringde/{} pub struct Hitta; impl Probe for Hitta { + fn uri(&self, number: &str) -> String { + format!("https://www.hitta.se/vem-ringde/{}", number) + } + fn search(&mut self, ctx: &mut Context, number: &str) -> Result<(), ()> { let body = if let Some(cache) = ctx.cache_get("hitta", &number) { String::from_utf8(cache.data).unwrap() } else { - reqwest::get(&format!("https://www.hitta.se/vem-ringde/{}", number)) - .unwrap() - .text() - .unwrap() - }; + let body = reqwest::get(&self.uri(number)).unwrap().text().unwrap(); - ctx.cache_set("hitta", &number, body.as_bytes()) - .expect("wut?! why not?!"); + ctx.cache_set("hitta", &number, body.as_bytes()) + .expect("wut?! why not?!"); + + body + }; let re = Regex::new(r#"