use lazy_static::lazy_static; use scraper::{Html, Selector}; use crate::entry::Entry; use crate::html::SelectExt; use crate::probe::Probe; lazy_static! { static ref MESSAGE: Selector = Selector::parse(".CompanyResultListItem h3.name > a").unwrap(); static ref HISTORY_1: Selector = Selector::parse("div.PhoneNoHit div.search-info-container p").unwrap(); static ref HISTORY_2: Selector = Selector::parse("div.feedback-types div.feedback-type-item").unwrap(); } fn from_html(document: &str) -> Result { let html = Html::parse_document(document); let mut messages = Vec::new(); let mut history = Vec::new(); let comments = Vec::new(); if let Some(message) = html .select(&MESSAGE) .next() .map(|element| element.easy_text()) { messages.push(message); } if let Some(message) = html .select(&HISTORY_1) .next() .map(|element| element.easy_text()) { history.push(message); } for message in html.select(&HISTORY_2).map(|element| element.easy_text()) { history.push(message); } Ok(Entry { messages, history, comments, }) } pub struct Eniro; impl Probe for Eniro { fn provider(&self) -> &'static str { "eniro.se" } fn uri(&self, number: &str) -> String { format!("https://gulasidorna.eniro.se/hitta:{}", number) } fn fetch(&self, number: &str) -> Result { reqwest::get(&self.uri(number)) .map_err(|_| ())? .text() .map_err(|_| ()) } fn parse(&self, data: &str) -> Result { from_html(&data) } } #[cfg(test)] mod tests { use insta::assert_yaml_snapshot_matches; use super::*; #[test] fn test_0104754350() { let document = include_str!("../../fixtures/eniro/0104754350.html"); assert_yaml_snapshot_matches!(from_html(&document), @r###"Ok: messages: - Företaget bedriver telefonförsäljning eller marknadsundersökningar history: [] comments: []"###); } #[test] fn test_0313908905() { let document = include_str!("../../fixtures/eniro/0313908905.html"); assert_yaml_snapshot_matches!(from_html(&document), @r###"Ok: messages: [] history: - 3464 denna vecka och 6637 totalt. - 76 Försäljning - 47 Oseriös verksamhet - 37 Annat comments: []"###); } #[test] fn test_0702269893() { let document = include_str!("../../fixtures/eniro/0702269893.html"); assert_yaml_snapshot_matches!(from_html(&document), @r###"Ok: messages: - Anonym Kund För Refill history: [] comments: []"###); } #[test] fn test_0726443387() { let document = include_str!("../../fixtures/eniro/0726443387.html"); assert_yaml_snapshot_matches!(from_html(&document), @r###"Ok: messages: [] history: - 16 denna vecka och 98 totalt. comments: []"###); } #[test] fn test_0751793426() { let document = include_str!("../../fixtures/eniro/0751793426.html"); assert_yaml_snapshot_matches!(from_html(&document), @r###"Ok: messages: [] history: - 20 denna vecka och 602 totalt. - 11 Försäljning - 9 Annat - 7 Oseriös verksamhet comments: []"###); } #[test] fn test_0751793483() { let document = include_str!("../../fixtures/eniro/0751793483.html"); assert_yaml_snapshot_matches!(from_html(&document), @r###"Ok: messages: [] history: - 29 denna vecka och 900 totalt. - 5 Annat - 4 Oseriös verksamhet - 3 Marknadsföring comments: []"###); } #[test] fn test_0751793499() { let document = include_str!("../../fixtures/eniro/0751793499.html"); assert_yaml_snapshot_matches!(from_html(&document), @r###"Ok: messages: [] history: - 303 denna vecka och 304 totalt. comments: []"###); } #[test] fn test_0701807618() { let document = include_str!("../../fixtures/eniro/0701807618.html"); assert_yaml_snapshot_matches!(from_html(&document), @r###"Ok: messages: [] history: - 0 denna vecka och 1 totalt. comments: []"###); } #[test] fn test_0546780862() { let document = include_str!("../../fixtures/eniro/0546780862.html"); assert_yaml_snapshot_matches!(from_html(&document), @r###"Ok: messages: - Nya Wermlands-Tidningens AB history: [] comments: []"###); } }