Added some basic logging.

Fix cache invalidation (1 day for now).
This commit is contained in:
2019-01-17 11:53:56 +01:00
parent a3e515c3a7
commit d056b52e71
10 changed files with 171 additions and 57 deletions

39
Cargo.lock generated
View File

@@ -118,6 +118,17 @@ name = "cfg-if"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "chrono"
version = "0.4.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"num-integer 0.1.39 (registry+https://github.com/rust-lang/crates.io-index)",
"num-traits 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 1.0.84 (registry+https://github.com/rust-lang/crates.io-index)",
"time 0.1.42 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "clap"
version = "2.32.0"
@@ -290,6 +301,14 @@ dependencies = [
"synstructure 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "fern"
version = "0.5.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "fnv"
version = "1.0.6"
@@ -660,6 +679,19 @@ name = "nodrop"
version = "0.1.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "num-integer"
version = "0.1.39"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"num-traits 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "num-traits"
version = "0.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "num_cpus"
version = "1.9.0"
@@ -1546,7 +1578,10 @@ name = "whoareyou"
version = "0.1.0"
dependencies = [
"bincode 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
"chrono 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
"directories 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
"fern 0.5.7 (registry+https://github.com/rust-lang/crates.io-index)",
"log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"reqwest 0.9.8 (registry+https://github.com/rust-lang/crates.io-index)",
"scraper 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -1612,6 +1647,7 @@ dependencies = [
"checksum bytes 0.4.11 (registry+https://github.com/rust-lang/crates.io-index)" = "40ade3d27603c2cb345eb0912aec461a6dec7e06a4ae48589904e808335c7afa"
"checksum cc 1.0.28 (registry+https://github.com/rust-lang/crates.io-index)" = "bb4a8b715cb4597106ea87c7c84b2f1d452c7492033765df7f32651e66fcf749"
"checksum cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "082bb9b28e00d3c9d39cc03e64ce4cea0f1bb9b3fde493f0cbc008472d22bdf4"
"checksum chrono 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)" = "45912881121cb26fad7c38c17ba7daa18764771836b34fab7d3fbd93ed633878"
"checksum clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b957d88f4b6a63b9d70d5f454ac8011819c6efa7727858f458ab71c756ce2d3e"
"checksum cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f"
"checksum core-foundation 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "286e0b41c3a20da26536c6000a280585d519fd07b3956b43aed8a79e9edce980"
@@ -1630,6 +1666,7 @@ dependencies = [
"checksum encoding_rs 0.8.14 (registry+https://github.com/rust-lang/crates.io-index)" = "a69d152eaa438a291636c1971b0a370212165ca8a75759eb66818c5ce9b538f7"
"checksum failure 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "795bd83d3abeb9220f257e597aa0080a508b27533824adf336529648f6abf7e2"
"checksum failure_derive 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "ea1063915fd7ef4309e222a5a07cf9c319fb9c7836b1f89b85458672dbb127e1"
"checksum fern 0.5.7 (registry+https://github.com/rust-lang/crates.io-index)" = "b48af88aaf938b11baef948a5599e66e709cf92854aa2b87c71f1bcf20f80a01"
"checksum fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "2fad85553e09a6f881f739c29f0b00b0f01357c743266d478b68951ce23285f3"
"checksum foreign-types 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1"
"checksum foreign-types-shared 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b"
@@ -1671,6 +1708,8 @@ dependencies = [
"checksum net2 0.2.33 (registry+https://github.com/rust-lang/crates.io-index)" = "42550d9fb7b6684a6d404d9fa7250c2eb2646df731d1c06afc06dcee9e1bcf88"
"checksum new_debug_unreachable 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "0cdc457076c78ab54d5e0d6fa7c47981757f1e34dc39ff92787f217dede586c4"
"checksum nodrop 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)" = "2f9667ddcc6cc8a43afc9b7917599d7216aa09c463919ea32c59ed6cac8bc945"
"checksum num-integer 0.1.39 (registry+https://github.com/rust-lang/crates.io-index)" = "e83d528d2677f0518c570baf2b7abdcf0cd2d248860b68507bdcb3e91d4c0cea"
"checksum num-traits 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "0b3a5d7cc97d6d30d8b9bc8fa19bf45349ffe46241e8816f50f62f6d6aaabee1"
"checksum num_cpus 1.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "5a69d464bdc213aaaff628444e99578ede64e9c854025aa43b9796530afa9238"
"checksum openssl 0.10.16 (registry+https://github.com/rust-lang/crates.io-index)" = "ec7bd7ca4cce6dbdc77e7c1230682740d307d1218a87fb0349a571272be749f9"
"checksum openssl-probe 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "77af24da69f9d9341038eba93a073b1fdaaa1b788221b00a69bce9e762cb32de"

View File

@@ -6,7 +6,10 @@ edition = "2018"
[dependencies]
bincode = "1.0"
chrono = { version = "0.4", features = ["serde"] }
directories = "1.0"
fern = "0.5"
log = "0.4"
regex = "1.1"
reqwest = "0.9"
scraper = "0.9"

View File

@@ -1,12 +1,15 @@
use std::fs;
use std::io;
use chrono::prelude::*;
use chrono::Duration;
use directories::ProjectDirs;
use log::debug;
use serde::{Deserialize, Serialize};
#[derive(Serialize, Deserialize)]
pub struct Cache {
timestamp: u64,
timestamp: DateTime<Utc>,
pub data: Vec<u8>,
}
@@ -25,14 +28,37 @@ impl Context {
let cache = self.dirs.cache_dir().join(format!("{}-{}.bin", bin, key));
if cache.exists() {
debug!("cache: bin={} key={} path={:?} exists", bin, key, cache);
fs::File::open(cache)
.and_then(|file| {
bincode::deserialize_from(&file).map_err(|_| {
debug!("cache: bin={} key={} faild to deserialize", bin, key);
io::Error::new(io::ErrorKind::Other, "failed to deserialize cache entry")
})
})
.and_then(|cache: Cache| {
if cache.timestamp > Utc::now() {
debug!("cache: bin={} key={} ok", bin, key);
Ok(cache)
} else {
debug!(
"cache: bin={} key={} outdated ({})",
bin, key, cache.timestamp
);
Err(io::Error::new(
io::ErrorKind::Other,
"failed to deserialize cache entry",
))
}
})
.ok()
} else {
debug!("cache: bin={} key={} don't exists", bin, key);
None
}
}
@@ -42,7 +68,7 @@ impl Context {
D: AsRef<[u8]>,
{
let entry = Cache {
timestamp: 0,
timestamp: Utc::now() + Duration::days(1),
data: data.as_ref().to_vec(),
};
@@ -54,6 +80,11 @@ impl Context {
let cache = cache.join(format!("{}-{}.bin", bin, key));
debug!(
"cache: save: bin={} key={} path={:?} timestamp={}",
bin, key, cache, entry.timestamp
);
fs::OpenOptions::new()
.create(true)
.write(true)

View File

@@ -1,3 +1,5 @@
use std::process::Command;
use structopt::StructOpt;
mod context;
@@ -9,12 +11,35 @@ use crate::probe::*;
#[derive(Debug, StructOpt)]
#[structopt(name = "whoareyou", about = "Search for swedish phone numbers.")]
struct Opt {
#[structopt(short = "o", long = "open")]
open: bool,
number: String,
}
fn main() {
let opt = Opt::from_args();
fern::Dispatch::new()
.format(|out, message, record| {
out.finish(format_args!(
"{}[{}][{}] {}",
chrono::Local::now().format("[%Y-%m-%d %H:%M:%S]"),
record.target(),
record.level(),
message
))
})
.level(log::LevelFilter::Off)
.level_for("reqwest", log::LevelFilter::Off)
.level_for("hyper", log::LevelFilter::Off)
.level_for("tokio_reactor", log::LevelFilter::Off)
.level_for("html5ever", log::LevelFilter::Off)
.level_for("selectors", log::LevelFilter::Off)
.chain(std::io::stdout())
.apply()
.expect("failed to init fern");
let mut probes: Vec<Box<Probe>> = vec![
Box::new(Eniro),
Box::new(Hitta),
@@ -23,6 +48,16 @@ fn main() {
Box::new(VemRingde),
];
if opt.open {
for probe in &mut probes {
let uri = probe.uri(&opt.number);
Command::new("open")
.arg(uri)
.output()
.expect("failed to execute process");
}
} else {
let mut ctx = Context::new();
for probe in &mut probes {
@@ -30,4 +65,5 @@ fn main() {
println!();
}
}
}
}

View File

@@ -13,5 +13,6 @@ pub use self::vem_ringde::VemRingde;
use crate::context::Context;
pub trait Probe {
fn uri(&self, _: &str) -> String;
fn search(&mut self, _: &mut Context, _: &str) -> Result<(), ()>;
}

View File

@@ -5,32 +5,34 @@ use crate::context::Context;
use crate::probe::Probe;
#[derive(Debug, FromHtml)]
#[html(selector = ".error-box")]
#[html(selector = ".PhoneNoHit")]
struct Error {
#[html(selector = "h2", attr = "inner")]
#[html(selector = ".search-info-container > p", attr = "inner")]
message: String,
}
// https://gulasidorna.eniro.se/hitta:{}
pub struct Eniro;
impl Probe for Eniro {
fn uri(&self, number: &str) -> String {
format!("https://gulasidorna.eniro.se/hitta:{}", number)
}
fn search(&mut self, ctx: &mut Context, number: &str) -> Result<(), ()> {
let body = if let Some(cache) = ctx.cache_get("eniro", &number) {
String::from_utf8(cache.data).unwrap()
} else {
reqwest::get(&format!("https://gulasidorna.eniro.se/hitta:{}", number))
.unwrap()
.text()
.unwrap()
};
let body = reqwest::get(&self.uri(number)).unwrap().text().unwrap();
ctx.cache_set("eniro", &number, body.as_bytes())
.expect("wut?! why not?!");
body
};
if let Ok(error) = Error::from_html(&body) {
println!("eniro.se:");
println!(" {}", error.message);
println!(" Antal sökningar på det här numret: {}", error.message);
Ok(())
} else {

View File

@@ -39,23 +39,25 @@ struct Comment {
timestamp: u64,
}
// https://www.hitta.se/vem-ringde/{}
pub struct Hitta;
impl Probe for Hitta {
fn uri(&self, number: &str) -> String {
format!("https://www.hitta.se/vem-ringde/{}", number)
}
fn search(&mut self, ctx: &mut Context, number: &str) -> Result<(), ()> {
let body = if let Some(cache) = ctx.cache_get("hitta", &number) {
String::from_utf8(cache.data).unwrap()
} else {
reqwest::get(&format!("https://www.hitta.se/vem-ringde/{}", number))
.unwrap()
.text()
.unwrap()
};
let body = reqwest::get(&self.uri(number)).unwrap().text().unwrap();
ctx.cache_set("hitta", &number, body.as_bytes())
.expect("wut?! why not?!");
body
};
let re = Regex::new(r#"<script>__NEXT_DATA__ = (.*?);__NEXT_LOADED_PAGES__"#).unwrap();
if let Some(result) = re.captures(&body) {

View File

@@ -11,26 +11,25 @@ struct Info {
message: String,
}
// http://konsumentinfo.se/telefonnummer/sverige/{}
pub struct KonsumentInfo;
impl Probe for KonsumentInfo {
fn uri(&self, number: &str) -> String {
format!("http://konsumentinfo.se/telefonnummer/sverige/{}", number)
}
fn search(&mut self, ctx: &mut Context, number: &str) -> Result<(), ()> {
let body = if let Some(cache) = ctx.cache_get("konsument_info", &number) {
String::from_utf8(cache.data).unwrap()
} else {
reqwest::get(&format!(
"http://konsumentinfo.se/telefonnummer/sverige/{}",
number
))
.unwrap()
.text()
.unwrap()
};
let body = reqwest::get(&self.uri(number)).unwrap().text().unwrap();
ctx.cache_set("konsument_info", &number, body.as_bytes())
.expect("wut?! why not?!");
body
};
println!("konsumentinfo.se:");
if let Ok(info) = Info::from_html(&body) {

View File

@@ -11,26 +11,25 @@ struct Info {
message: String,
}
// http://www.telefonforsaljare.nu/telefonnummer/{}/
pub struct Telefonforsaljare;
impl Probe for Telefonforsaljare {
fn uri(&self, number: &str) -> String {
format!("http://www.telefonforsaljare.nu/telefonnummer/{}/", number)
}
fn search(&mut self, ctx: &mut Context, number: &str) -> Result<(), ()> {
let body = if let Some(cache) = ctx.cache_get("telefonforsaljare", &number) {
String::from_utf8(cache.data).unwrap()
} else {
reqwest::get(&format!(
"http://www.telefonforsaljare.nu/telefonnummer/{}/",
number
))
.unwrap()
.text()
.unwrap()
};
let body = reqwest::get(&self.uri(number)).unwrap().text().unwrap();
ctx.cache_set("telefonforsaljare", &number, body.as_bytes())
.expect("wut?! why not?!");
body
};
println!("telefonforsaljare.nu:");
if let Ok(info) = Info::from_html(&body) {

View File

@@ -1,23 +1,25 @@
use crate::context::Context;
use crate::probe::Probe;
// http://vemringde.se/?q={}
pub struct VemRingde;
impl Probe for VemRingde {
fn uri(&self, number: &str) -> String {
format!("http://vemringde.se/?q={}", number)
}
fn search(&mut self, ctx: &mut Context, number: &str) -> Result<(), ()> {
let body = if let Some(cache) = ctx.cache_get("vem_ringde", &number) {
let _body = if let Some(cache) = ctx.cache_get("vem_ringde", &number) {
String::from_utf8(cache.data).unwrap()
} else {
reqwest::get(&format!("http://vemringde.se/?q={}", number))
.unwrap()
.text()
.unwrap()
};
let body = reqwest::get(&self.uri(number)).unwrap().text().unwrap();
ctx.cache_set("vem_ringde", &number, body.as_bytes())
.expect("wut?! why not?!");
body
};
Err(())
}
}