Better handling for hitta.se

This commit is contained in:
2018-12-19 14:02:23 +01:00
parent 2dfd8abfc1
commit 1d8144bfa7
3 changed files with 217 additions and 13 deletions

View File

@@ -1,8 +1,42 @@
use scraper::{Html, Selector};
use regex::Regex;
use serde_derive::Deserialize;
use crate::context::Context;
use crate::probe::Probe;
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
struct Data {
props: Props,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
struct Props {
page_props: PageProps,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
struct PageProps {
phone_data: PhoneData,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
struct PhoneData {
alternative_formats: Vec<String>,
clean_number: String,
comments: Vec<Comment>,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
struct Comment {
comment: String,
timestamp: u64,
}
// https://www.hitta.se/vem-ringde/{}
pub struct Hitta;
@@ -20,22 +54,27 @@ impl Probe for Hitta {
ctx.cache_set("hitta", &number, body.as_bytes())
.expect("wut?! why not?!");
let document = Html::parse_document(&body);
let re = Regex::new(r#"<script>__NEXT_DATA__ = (.*?);__NEXT_LOADED_PAGES__"#).unwrap();
// Header.
let selector = Selector::parse(r#"div[class^="Header__WhoCalledHeader"] > h1"#).unwrap();
if let Some(result) = re.captures(&body) {
let json = result.get(1).unwrap().as_str();
print!("Hitta.se:");
/*
let data: serde_json::Value = serde_json::from_str(&json).unwrap();
for element in document.select(&selector) {
println!(" {}", element.text().collect::<String>());
}
println!("{:#?}", data);
*/
// Comments.
let selector = Selector::parse(r#".topComment--comment"#).unwrap();
let data: Data = serde_json::from_str(&json).unwrap();
for element in document.select(&selector) {
println!(" * {}", element.text().collect::<String>());
println!(
"hitta.se: {}",
data.props.page_props.phone_data.clean_number
);
for comment in &data.props.page_props.phone_data.comments {
println!(" * {}", comment.comment);
}
}
}
}