use scraper::{Html, Selector}; use crate::context::Context; use crate::probe::{Entry, Probe}; fn from_html(document: &str) -> Result { let html = Html::parse_document(document); let mut messages = Vec::new(); let mut history = Vec::new(); let comments = Vec::new(); let selector = Selector::parse(".CompanyResultListItem h3.name > a").unwrap(); if let Some(element) = html.select(&selector).next() { let message = element.inner_html(); let message = htmlescape::decode_html(&message).unwrap(); messages.push(message); } let selector = Selector::parse("div.PhoneNoHit div.search-info-container p").unwrap(); if let Some(element) = html.select(&selector).next() { let message = element .text() .map(str::trim) .filter(|s| !s.is_empty()) .collect::>() .join(" "); let message = htmlescape::decode_html(&message).unwrap(); history.push(message); } Ok(Entry { messages, history, comments, }) } pub struct Eniro; impl Probe for Eniro { fn uri(&self, number: &str) -> String { format!("https://gulasidorna.eniro.se/hitta:{}", number) } fn search(&mut self, ctx: &mut Context, number: &str) -> Result<(), ()> { let body = if let Some(cache) = ctx.cache_get("eniro", &number) { String::from_utf8(cache.data).unwrap() } else { let body = reqwest::get(&self.uri(number)).unwrap().text().unwrap(); ctx.cache_set("eniro", &number, body.as_bytes()) .expect("wut?! why not?!"); body }; match from_html(&body) { Ok(entry) => { println!("eniro.se:"); print!("{}", entry); Ok(()) } Err(_) => Err(()), } } } #[cfg(test)] mod tests { use pretty_assertions::assert_eq; use super::*; #[test] fn test_0104754350() { let document = include_str!("../../fixtures/eniro/0104754350.html"); let expected = Entry { messages: vec![ "Företaget bedriver telefonförsäljning eller marknadsundersökningar" .to_string(), ], history: vec![], comments: vec![], }; assert_eq!(from_html(&document), Ok(expected)); } #[test] fn test_0313908905() { let document = include_str!("../../fixtures/eniro/0313908905.html"); let expected = Entry { messages: vec![], history: vec!["3464 denna vecka och 6637 totalt.".to_string()], comments: vec![], }; assert_eq!(from_html(&document), Ok(expected)); } #[test] fn test_0702269893() { let document = include_str!("../../fixtures/eniro/0702269893.html"); let expected = Entry { messages: vec!["Anonym Kund För Refill".to_string()], history: vec![], comments: vec![], }; assert_eq!(from_html(&document), Ok(expected)); } #[test] fn test_0726443387() { let document = include_str!("../../fixtures/eniro/0726443387.html"); let expected = Entry { messages: vec![], history: vec!["16 denna vecka och 98 totalt.".to_string()], comments: vec![], }; assert_eq!(from_html(&document), Ok(expected)); } #[test] fn test_0751793426() { let document = include_str!("../../fixtures/eniro/0751793426.html"); let expected = Entry { messages: vec![], history: vec!["20 denna vecka och 602 totalt.".to_string()], comments: vec![], }; assert_eq!(from_html(&document), Ok(expected)); } #[test] fn test_0751793483() { let document = include_str!("../../fixtures/eniro/0751793483.html"); let expected = Entry { messages: vec![], history: vec!["29 denna vecka och 900 totalt.".to_string()], comments: vec![], }; assert_eq!(from_html(&document), Ok(expected)); } #[test] fn test_0751793499() { let document = include_str!("../../fixtures/eniro/0751793499.html"); let expected = Entry { messages: vec![], history: vec!["303 denna vecka och 304 totalt.".to_string()], comments: vec![], }; assert_eq!(from_html(&document), Ok(expected)); } }