286 lines
7.2 KiB
Rust
286 lines
7.2 KiB
Rust
use chrono::{TimeZone, Utc};
|
|
use log::debug;
|
|
use regex::Regex;
|
|
use serde::Deserialize;
|
|
|
|
use crate::context::Context;
|
|
use crate::probe::{self, Entry, Probe};
|
|
|
|
#[derive(Debug, Deserialize)]
|
|
#[serde(rename_all = "camelCase")]
|
|
struct Data {
|
|
props: Props,
|
|
}
|
|
|
|
#[derive(Debug, Deserialize)]
|
|
#[serde(rename_all = "camelCase")]
|
|
struct Props {
|
|
page_props: PageProps,
|
|
}
|
|
|
|
#[derive(Debug, Deserialize)]
|
|
#[serde(rename_all = "camelCase")]
|
|
struct PageProps {
|
|
status_code: Option<u16>,
|
|
phone_data: Option<PhoneData>,
|
|
}
|
|
|
|
#[derive(Debug, Deserialize)]
|
|
#[serde(rename_all = "camelCase")]
|
|
struct PhoneData {
|
|
alternative_formats: Vec<String>,
|
|
clean_number: String,
|
|
#[serde(default)]
|
|
comments: Vec<Comment>,
|
|
statistics_text: String,
|
|
}
|
|
|
|
#[derive(Debug, Deserialize)]
|
|
#[serde(rename_all = "camelCase")]
|
|
struct Comment {
|
|
comment: String,
|
|
timestamp: u64,
|
|
}
|
|
|
|
fn from_html(document: &str) -> Result<Entry, ()> {
|
|
let re = Regex::new(r#"<script>__NEXT_DATA__ = (.*?);__NEXT_LOADED_PAGES__"#).unwrap();
|
|
|
|
let result = re.captures(&document).ok_or_else(|| {
|
|
debug!("Hitta: failed to find __NEXT_DATA__");
|
|
})?;
|
|
|
|
let json = result.get(1).unwrap().as_str();
|
|
|
|
/*
|
|
println!(
|
|
"json: {:#?}",
|
|
serde_json::from_str::<serde_json::Value>(&json)
|
|
);
|
|
*/
|
|
|
|
if let Ok(data) = serde_json::from_str::<Data>(&json) {
|
|
let messages = Vec::new();
|
|
let mut history = Vec::new();
|
|
let mut comments = Vec::new();
|
|
|
|
if let Some(phone_data) = data.props.page_props.phone_data {
|
|
history.push(phone_data.statistics_text);
|
|
|
|
for comment in phone_data.comments {
|
|
comments.push(probe::Comment {
|
|
datetime: Utc.timestamp(
|
|
(comment.timestamp / 1000) as i64,
|
|
(comment.timestamp % 1000) as u32,
|
|
),
|
|
title: None,
|
|
message: comment.comment,
|
|
});
|
|
}
|
|
|
|
comments.sort_by(|a, b| b.datetime.cmp(&a.datetime));
|
|
}
|
|
|
|
Ok(Entry {
|
|
messages,
|
|
history,
|
|
comments,
|
|
})
|
|
} else {
|
|
if let Err(error) = serde_json::from_str::<Data>(&json) {
|
|
debug!("Hitta: failed to deserialize data: {:#?}", error);
|
|
}
|
|
|
|
Err(())
|
|
}
|
|
}
|
|
|
|
pub struct Hitta;
|
|
|
|
impl Probe for Hitta {
|
|
fn uri(&self, number: &str) -> String {
|
|
format!("https://www.hitta.se/vem-ringde/{}", number)
|
|
}
|
|
|
|
fn search(&mut self, ctx: &mut Context, number: &str) -> Result<Entry, ()> {
|
|
let body = if let Some(cache) = ctx.cache_get("hitta", &number) {
|
|
String::from_utf8(cache.data).unwrap()
|
|
} else {
|
|
let body = reqwest::get(&self.uri(number)).unwrap().text().unwrap();
|
|
|
|
ctx.cache_set("hitta", &number, body.as_bytes())
|
|
.expect("wut?! why not?!");
|
|
|
|
body
|
|
};
|
|
|
|
from_html(&body)
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use insta::assert_yaml_snapshot_matches;
|
|
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn test_0104754350() {
|
|
let document = include_str!("../../fixtures/hitta/0104754350.html");
|
|
|
|
assert_yaml_snapshot_matches!(from_html(&document), @r###"Ok:
|
|
messages: []
|
|
history:
|
|
- 42 andra har rapporterat detta nummer
|
|
comments:
|
|
- datetime: "2019-01-17T17:29:22Z"
|
|
title: ~
|
|
message: Varmsälj från Folksam
|
|
- datetime: "2018-12-14T13:45:28Z"
|
|
title: ~
|
|
message: Folksam
|
|
- datetime: "2018-11-28T07:30:18Z"
|
|
title: ~
|
|
message: Höglandschskt
|
|
- datetime: "2018-11-20T19:18:09Z"
|
|
title: ~
|
|
message: "Försäljare "
|
|
- datetime: "2018-11-19T17:38:34Z"
|
|
title: ~
|
|
message: mögg från Folksam
|
|
- datetime: "2018-11-12T16:00:41Z"
|
|
title: ~
|
|
message: Folksam försäkringsförsäljare
|
|
- datetime: "2018-10-25T10:28:36Z"
|
|
title: ~
|
|
message: folksam
|
|
- datetime: "2018-10-10T07:30:40Z"
|
|
title: ~
|
|
message: Telefonförsäljare
|
|
- datetime: "2018-10-04T10:04:55Z"
|
|
title: ~
|
|
message: Folksam säljare
|
|
- datetime: "2018-10-03T13:55:19Z"
|
|
title: ~
|
|
message: Sa inget.
|
|
- datetime: "2018-08-24T16:56:46Z"
|
|
title: ~
|
|
message: Folksam
|
|
- datetime: "2018-08-24T09:42:43Z"
|
|
title: ~
|
|
message: Achmati azmut från folksam
|
|
- datetime: "2018-08-21T18:29:29Z"
|
|
title: ~
|
|
message: Folksam
|
|
- datetime: "2018-08-16T18:56:56Z"
|
|
title: ~
|
|
message: Säljare från Folksam.
|
|
- datetime: "2018-08-16T14:48:59Z"
|
|
title: ~
|
|
message: "Folksam "
|
|
- datetime: "2018-08-09T16:30:28Z"
|
|
title: ~
|
|
message: Folksam
|
|
- datetime: "2018-08-02T16:29:32Z"
|
|
title: ~
|
|
message: "Folksam "
|
|
- datetime: "2018-08-02T15:33:38Z"
|
|
title: ~
|
|
message: "Folksam "
|
|
- datetime: "2018-07-25T08:28:27Z"
|
|
title: ~
|
|
message: Säljare Folksam
|
|
- datetime: "2018-07-17T21:20:51Z"
|
|
title: ~
|
|
message: "Inga Hansson "
|
|
- datetime: "2018-07-16T18:11:46Z"
|
|
title: ~
|
|
message: Folksam
|
|
- datetime: "2018-07-06T15:45:46Z"
|
|
title: ~
|
|
message: "Folksam "
|
|
- datetime: "2018-07-05T17:24:07Z"
|
|
title: ~
|
|
message: folksam
|
|
- datetime: "2018-07-05T11:15:02Z"
|
|
title: ~
|
|
message: Vesran
|
|
- datetime: "2018-07-04T13:30:49Z"
|
|
title: ~
|
|
message: Folksam
|
|
- datetime: "2018-06-29T10:52:51Z"
|
|
title: ~
|
|
message: folksam
|
|
- datetime: "2018-06-28T13:33:01Z"
|
|
title: ~
|
|
message: Säljare folksam
|
|
- datetime: "2018-06-28T07:42:42Z"
|
|
title: ~
|
|
message: Folksam försäkringar
|
|
- datetime: "2018-06-26T12:59:33Z"
|
|
title: ~
|
|
message: Säljare Folksam"###);
|
|
}
|
|
|
|
#[test]
|
|
fn test_0313908905() {
|
|
let document = include_str!("../../fixtures/hitta/0313908905.html");
|
|
|
|
assert_yaml_snapshot_matches!(from_html(&document), @r###"Ok:
|
|
messages: []
|
|
history: []
|
|
comments: []"###);
|
|
}
|
|
|
|
#[test]
|
|
fn test_0702269893() {
|
|
let document = include_str!("../../fixtures/hitta/0702269893.html");
|
|
|
|
assert_yaml_snapshot_matches!(from_html(&document), @r###"Ok:
|
|
messages: []
|
|
history:
|
|
- Tre andra har också sökt på detta nummer
|
|
comments: []"###);
|
|
}
|
|
|
|
#[test]
|
|
fn test_0726443387() {
|
|
let document = include_str!("../../fixtures/hitta/0726443387.html");
|
|
|
|
assert_yaml_snapshot_matches!(from_html(&document), @r###"Ok:
|
|
messages: []
|
|
history:
|
|
- 1299 andra har också sökt på detta nummer
|
|
comments: []"###);
|
|
}
|
|
|
|
#[test]
|
|
fn test_0751793426() {
|
|
let document = include_str!("../../fixtures/hitta/0751793426.html");
|
|
|
|
assert_yaml_snapshot_matches!(from_html(&document), @r###"Ok:
|
|
messages: []
|
|
history: []
|
|
comments: []"###);
|
|
}
|
|
|
|
#[test]
|
|
fn test_0751793483() {
|
|
let document = include_str!("../../fixtures/hitta/0751793483.html");
|
|
|
|
assert_yaml_snapshot_matches!(from_html(&document), @r###"Ok:
|
|
messages: []
|
|
history: []
|
|
comments: []"###);
|
|
}
|
|
|
|
#[test]
|
|
fn test_0751793499() {
|
|
let document = include_str!("../../fixtures/hitta/0751793499.html");
|
|
|
|
assert_yaml_snapshot_matches!(from_html(&document), @r###"Ok:
|
|
messages: []
|
|
history: []
|
|
comments: []"###);
|
|
}
|
|
}
|