Files
whoareyou/src/probe/hitta.rs
2019-02-06 09:24:42 +01:00

286 lines
7.2 KiB
Rust

use chrono::{TimeZone, Utc};
use log::debug;
use regex::Regex;
use serde::Deserialize;
use crate::context::Context;
use crate::probe::{self, Entry, Probe};
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
struct Data {
props: Props,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
struct Props {
page_props: PageProps,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
struct PageProps {
status_code: Option<u16>,
phone_data: Option<PhoneData>,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
struct PhoneData {
alternative_formats: Vec<String>,
clean_number: String,
#[serde(default)]
comments: Vec<Comment>,
statistics_text: String,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
struct Comment {
comment: String,
timestamp: u64,
}
fn from_html(document: &str) -> Result<Entry, ()> {
let re = Regex::new(r#"<script>__NEXT_DATA__ = (.*?);__NEXT_LOADED_PAGES__"#).unwrap();
let result = re.captures(&document).ok_or_else(|| {
debug!("Hitta: failed to find __NEXT_DATA__");
})?;
let json = result.get(1).unwrap().as_str();
/*
println!(
"json: {:#?}",
serde_json::from_str::<serde_json::Value>(&json)
);
*/
if let Ok(data) = serde_json::from_str::<Data>(&json) {
let messages = Vec::new();
let mut history = Vec::new();
let mut comments = Vec::new();
if let Some(phone_data) = data.props.page_props.phone_data {
history.push(phone_data.statistics_text);
for comment in phone_data.comments {
comments.push(probe::Comment {
datetime: Utc.timestamp(
(comment.timestamp / 1000) as i64,
(comment.timestamp % 1000) as u32,
),
title: None,
message: comment.comment,
});
}
comments.sort_by(|a, b| b.datetime.cmp(&a.datetime));
}
Ok(Entry {
messages,
history,
comments,
})
} else {
if let Err(error) = serde_json::from_str::<Data>(&json) {
debug!("Hitta: failed to deserialize data: {:#?}", error);
}
Err(())
}
}
pub struct Hitta;
impl Probe for Hitta {
fn uri(&self, number: &str) -> String {
format!("https://www.hitta.se/vem-ringde/{}", number)
}
fn search(&mut self, ctx: &mut Context, number: &str) -> Result<Entry, ()> {
let body = if let Some(cache) = ctx.cache_get("hitta", &number) {
String::from_utf8(cache.data).unwrap()
} else {
let body = reqwest::get(&self.uri(number)).unwrap().text().unwrap();
ctx.cache_set("hitta", &number, body.as_bytes())
.expect("wut?! why not?!");
body
};
from_html(&body)
}
}
#[cfg(test)]
mod tests {
use insta::assert_yaml_snapshot_matches;
use super::*;
#[test]
fn test_0104754350() {
let document = include_str!("../../fixtures/hitta/0104754350.html");
assert_yaml_snapshot_matches!(from_html(&document), @r###"Ok:
messages: []
history:
- 42 andra har rapporterat detta nummer
comments:
- datetime: "2019-01-17T17:29:22Z"
title: ~
message: Varmsälj från Folksam
- datetime: "2018-12-14T13:45:28Z"
title: ~
message: Folksam
- datetime: "2018-11-28T07:30:18Z"
title: ~
message: Höglandschskt
- datetime: "2018-11-20T19:18:09Z"
title: ~
message: "Försäljare "
- datetime: "2018-11-19T17:38:34Z"
title: ~
message: mögg från Folksam
- datetime: "2018-11-12T16:00:41Z"
title: ~
message: Folksam försäkringsförsäljare
- datetime: "2018-10-25T10:28:36Z"
title: ~
message: folksam
- datetime: "2018-10-10T07:30:40Z"
title: ~
message: Telefonförsäljare
- datetime: "2018-10-04T10:04:55Z"
title: ~
message: Folksam säljare
- datetime: "2018-10-03T13:55:19Z"
title: ~
message: Sa inget.
- datetime: "2018-08-24T16:56:46Z"
title: ~
message: Folksam
- datetime: "2018-08-24T09:42:43Z"
title: ~
message: Achmati azmut från folksam
- datetime: "2018-08-21T18:29:29Z"
title: ~
message: Folksam
- datetime: "2018-08-16T18:56:56Z"
title: ~
message: Säljare från Folksam.
- datetime: "2018-08-16T14:48:59Z"
title: ~
message: "Folksam "
- datetime: "2018-08-09T16:30:28Z"
title: ~
message: Folksam
- datetime: "2018-08-02T16:29:32Z"
title: ~
message: "Folksam "
- datetime: "2018-08-02T15:33:38Z"
title: ~
message: "Folksam "
- datetime: "2018-07-25T08:28:27Z"
title: ~
message: Säljare Folksam
- datetime: "2018-07-17T21:20:51Z"
title: ~
message: "Inga Hansson "
- datetime: "2018-07-16T18:11:46Z"
title: ~
message: Folksam
- datetime: "2018-07-06T15:45:46Z"
title: ~
message: "Folksam "
- datetime: "2018-07-05T17:24:07Z"
title: ~
message: folksam
- datetime: "2018-07-05T11:15:02Z"
title: ~
message: Vesran
- datetime: "2018-07-04T13:30:49Z"
title: ~
message: Folksam
- datetime: "2018-06-29T10:52:51Z"
title: ~
message: folksam
- datetime: "2018-06-28T13:33:01Z"
title: ~
message: Säljare folksam
- datetime: "2018-06-28T07:42:42Z"
title: ~
message: Folksam försäkringar
- datetime: "2018-06-26T12:59:33Z"
title: ~
message: Säljare Folksam"###);
}
#[test]
fn test_0313908905() {
let document = include_str!("../../fixtures/hitta/0313908905.html");
assert_yaml_snapshot_matches!(from_html(&document), @r###"Ok:
messages: []
history: []
comments: []"###);
}
#[test]
fn test_0702269893() {
let document = include_str!("../../fixtures/hitta/0702269893.html");
assert_yaml_snapshot_matches!(from_html(&document), @r###"Ok:
messages: []
history:
- Tre andra har också sökt på detta nummer
comments: []"###);
}
#[test]
fn test_0726443387() {
let document = include_str!("../../fixtures/hitta/0726443387.html");
assert_yaml_snapshot_matches!(from_html(&document), @r###"Ok:
messages: []
history:
- 1299 andra har också sökt på detta nummer
comments: []"###);
}
#[test]
fn test_0751793426() {
let document = include_str!("../../fixtures/hitta/0751793426.html");
assert_yaml_snapshot_matches!(from_html(&document), @r###"Ok:
messages: []
history: []
comments: []"###);
}
#[test]
fn test_0751793483() {
let document = include_str!("../../fixtures/hitta/0751793483.html");
assert_yaml_snapshot_matches!(from_html(&document), @r###"Ok:
messages: []
history: []
comments: []"###);
}
#[test]
fn test_0751793499() {
let document = include_str!("../../fixtures/hitta/0751793499.html");
assert_yaml_snapshot_matches!(from_html(&document), @r###"Ok:
messages: []
history: []
comments: []"###);
}
}