use std::str; use chrono_tz::Tz; use scraper::{ElementRef, Html, Selector}; use serde::{de, Deserialize, Deserializer, Serialize}; use tinytemplate::TinyTemplate; use crate::entry::{self, Date, Entry}; use crate::probe::Probe; #[derive(Serialize)] struct Context { number: String, } #[derive(Debug, Deserialize)] pub struct Definition { name: String, path: String, messages: Vec, #[serde(default)] history: Vec, #[serde(default)] comments: Vec, } #[derive(Debug, Deserialize)] struct Comment { #[serde(deserialize_with = "deserialize_selector")] selector: Selector, #[serde(rename = "date_time")] datetime: Option, title: Option, message: Option, } #[derive(Debug, Deserialize)] #[serde(rename_all = "snake_case")] struct DateTime { #[serde(flatten)] field: Field, kind: DateTimeKind, format: String, #[serde(deserialize_with = "deserialize_tz")] tz: Tz, } #[derive(Debug, Deserialize)] #[serde(rename_all = "snake_case")] enum DateTimeKind { Date, DateTime, } #[derive(Debug, Deserialize)] #[serde(tag = "type", rename_all = "snake_case")] enum Filter {} #[derive(Debug, Deserialize)] struct Field { #[serde(deserialize_with = "deserialize_selector")] selector: Selector, #[serde(default)] data: Data, #[serde(default)] filters: Vec, } #[derive(Debug)] enum Data { Text, InnerHtml, Attr { attr: String }, } impl Data { fn extract(&self, element: &ElementRef) -> Option { match self { Data::Text => Some( element .text() .map(str::trim) .filter(|s| !s.is_empty()) .collect::>() .join(" "), ), Data::InnerHtml => Some(element.inner_html()), Data::Attr { attr } => element.value().attr(attr).map(|data| data.to_string()), } } } impl Default for Data { fn default() -> Self { Data::Text } } impl<'de> Deserialize<'de> for Data { fn deserialize(deserializer: D) -> Result where D: Deserializer<'de>, { use std::fmt; use serde::de::{self, Visitor}; struct StrVisitor; impl<'de> Visitor<'de> for StrVisitor { type Value = Data; fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { formatter.write_str("an str") } fn visit_str(self, value: &str) -> Result where E: de::Error, { match value { "text" => Ok(Data::Text), "inner_html" => Ok(Data::InnerHtml), s if s.starts_with("attr:") => { let attr = s.splitn(2, ":").nth(1).unwrap(); Ok(Data::Attr { attr: attr.to_string(), }) } _ => Err(E::custom(format!("unknown data type: {}", value))), } } } deserializer.deserialize_str(StrVisitor) } } impl Probe for Definition { fn provider(&self) -> &str { &self.name } fn uri(&self, number: &str) -> String { let mut tt = TinyTemplate::new(); tt.add_template("path", &self.path) .expect("failed to add path template"); let context = Context { number: number.to_string(), }; tt.render("path", &context) .expect("failed to render path template") } fn fetch(&self, number: &str) -> Result { reqwest::get(&self.uri(number)) .map_err(|_| ())? .text() .map_err(|_| ()) } fn parse(&self, data: &str) -> Result { let html = Html::parse_document(data); let mut messages = Vec::new(); let mut history = Vec::new(); let mut comments = Vec::new(); for field in &self.messages { for element in html.select(&field.selector) { if let Some(data) = field.data.extract(&element) { messages.push(data); } } } for field in &self.history { for element in html.select(&field.selector) { if let Some(data) = field.data.extract(&element) { history.push(data); } } } for comment in &self.comments { for comments_element in html.select(&comment.selector) { let mut datetime: Option = None; let mut title: Option = None; let mut message: Option = None; if let Some(ref datetime_field) = comment.datetime { for comment_element in comments_element.select(&datetime_field.field.selector) { if let Some(data) = datetime_field.field.data.extract(&comment_element) { // for filter in &datetime_field.field.filters {} let data = match datetime_field.kind { DateTimeKind::Date => Date::date_from( datetime_field.tz, &data, &datetime_field.format, ) .expect("failed to parse date"), DateTimeKind::DateTime => Date::datetime_from( datetime_field.tz, &data, &datetime_field.format, ) .expect("failed to parse date time"), }; datetime = Some(data); } } } if let Some(ref title_field) = comment.title { for comment_element in comments_element.select(&title_field.selector) { if let Some(data) = title_field .data .extract(&comment_element) .filter(|data| !data.is_empty()) { // for filter in &message_field.filters {} title = Some(data); } } } if let Some(ref message_field) = comment.message { for comment_element in comments_element.select(&message_field.selector) { if let Some(data) = message_field.data.extract(&comment_element) { // for filter in &message_field.filters {} message = Some(data); } } } if datetime.is_some() && message.is_some() { comments.push(entry::Comment { datetime: datetime.unwrap(), title, message: message.unwrap(), }); } } } if !messages.is_empty() || !history.is_empty() || !comments.is_empty() { Ok(Entry { messages, history, comments, }) } else { Err(()) } } } fn deserialize_selector<'de, D>(deserializer: D) -> Result where D: Deserializer<'de>, { let s = String::deserialize(deserializer)?; Selector::parse(&s).map_err(|_| de::Error::custom("failed to parse selector")) } fn deserialize_tz<'de, D>(deserializer: D) -> Result where D: Deserializer<'de>, { let s = String::deserialize(deserializer)?; s.parse::() .map_err(|_| de::Error::custom("failed to parse tz")) }