Definition support.

This commit is contained in:
2019-02-12 13:04:39 +01:00
parent 70f89cc35c
commit 7569896f6b
10 changed files with 404 additions and 20 deletions

283
src/definition.rs Normal file
View File

@@ -0,0 +1,283 @@
use std::str;
use chrono_tz::Tz;
use scraper::{ElementRef, Html, Selector};
use serde::{de, Deserialize, Deserializer, Serialize};
use tinytemplate::TinyTemplate;
use crate::entry::{self, Date, Entry};
use crate::probe::Probe;
#[derive(Serialize)]
struct Context {
number: String,
}
#[derive(Debug, Deserialize)]
pub struct Definition {
name: String,
path: String,
messages: Vec<Field>,
#[serde(default)]
history: Vec<Field>,
#[serde(default)]
comments: Vec<Comment>,
}
#[derive(Debug, Deserialize)]
struct Comment {
#[serde(deserialize_with = "deserialize_selector")]
selector: Selector,
#[serde(rename = "date_time")]
datetime: Option<DateTime>,
title: Option<Field>,
message: Option<Field>,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "snake_case")]
struct DateTime {
#[serde(flatten)]
field: Field,
kind: DateTimeKind,
format: String,
#[serde(deserialize_with = "deserialize_tz")]
tz: Tz,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "snake_case")]
enum DateTimeKind {
Date,
DateTime,
}
#[derive(Debug, Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
enum Filter {}
#[derive(Debug, Deserialize)]
struct Field {
#[serde(deserialize_with = "deserialize_selector")]
selector: Selector,
#[serde(default)]
data: Data,
#[serde(default)]
filters: Vec<Filter>,
}
#[derive(Debug)]
enum Data {
Text,
InnerHtml,
Attr { attr: String },
}
impl Data {
fn extract(&self, element: &ElementRef) -> Option<String> {
match self {
Data::Text => Some(
element
.text()
.map(str::trim)
.filter(|s| !s.is_empty())
.collect::<Vec<_>>()
.join(" "),
),
Data::InnerHtml => Some(element.inner_html()),
Data::Attr { attr } => element.value().attr(attr).map(|data| data.to_string()),
}
}
}
impl Default for Data {
fn default() -> Self {
Data::Text
}
}
impl<'de> Deserialize<'de> for Data {
fn deserialize<D>(deserializer: D) -> Result<Data, D::Error>
where
D: Deserializer<'de>,
{
use std::fmt;
use serde::de::{self, Visitor};
struct StrVisitor;
impl<'de> Visitor<'de> for StrVisitor {
type Value = Data;
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
formatter.write_str("an str")
}
fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
where
E: de::Error,
{
match value {
"text" => Ok(Data::Text),
"inner_html" => Ok(Data::InnerHtml),
s if s.starts_with("attr:") => {
let attr = s.splitn(2, ":").nth(1).unwrap();
Ok(Data::Attr {
attr: attr.to_string(),
})
}
_ => Err(E::custom(format!("unknown data type: {}", value))),
}
}
}
deserializer.deserialize_str(StrVisitor)
}
}
impl Probe for Definition {
fn provider(&self) -> &str {
&self.name
}
fn uri(&self, number: &str) -> String {
let mut tt = TinyTemplate::new();
tt.add_template("path", &self.path)
.expect("failed to add path template");
let context = Context {
number: number.to_string(),
};
tt.render("path", &context)
.expect("failed to render path template")
}
fn fetch(&self, number: &str) -> Result<String, ()> {
reqwest::get(&self.uri(number))
.map_err(|_| ())?
.text()
.map_err(|_| ())
}
fn parse(&self, data: &str) -> Result<Entry, ()> {
let html = Html::parse_document(data);
let mut messages = Vec::new();
let mut history = Vec::new();
let mut comments = Vec::new();
for field in &self.messages {
for element in html.select(&field.selector) {
if let Some(data) = field.data.extract(&element) {
messages.push(data);
}
}
}
for field in &self.history {
for element in html.select(&field.selector) {
if let Some(data) = field.data.extract(&element) {
history.push(data);
}
}
}
for comment in &self.comments {
for comments_element in html.select(&comment.selector) {
let mut datetime: Option<Date> = None;
let mut title: Option<String> = None;
let mut message: Option<String> = None;
if let Some(ref datetime_field) = comment.datetime {
for comment_element in comments_element.select(&datetime_field.field.selector) {
if let Some(data) = datetime_field.field.data.extract(&comment_element) {
// for filter in &datetime_field.field.filters {}
let data = match datetime_field.kind {
DateTimeKind::Date => Date::date_from(
datetime_field.tz,
&data,
&datetime_field.format,
)
.expect("failed to parse date"),
DateTimeKind::DateTime => Date::datetime_from(
datetime_field.tz,
&data,
&datetime_field.format,
)
.expect("failed to parse date time"),
};
datetime = Some(data);
}
}
}
if let Some(ref title_field) = comment.title {
for comment_element in comments_element.select(&title_field.selector) {
if let Some(data) = title_field
.data
.extract(&comment_element)
.filter(|data| !data.is_empty())
{
// for filter in &message_field.filters {}
title = Some(data);
}
}
}
if let Some(ref message_field) = comment.message {
for comment_element in comments_element.select(&message_field.selector) {
if let Some(data) = message_field.data.extract(&comment_element) {
// for filter in &message_field.filters {}
message = Some(data);
}
}
}
if datetime.is_some() && message.is_some() {
comments.push(entry::Comment {
datetime: datetime.unwrap(),
title,
message: message.unwrap(),
});
}
}
}
if !messages.is_empty() || !history.is_empty() || !comments.is_empty() {
Ok(Entry {
messages,
history,
comments,
})
} else {
Err(())
}
}
}
fn deserialize_selector<'de, D>(deserializer: D) -> Result<Selector, D::Error>
where
D: Deserializer<'de>,
{
let s = String::deserialize(deserializer)?;
Selector::parse(&s).map_err(|_| de::Error::custom("failed to parse selector"))
}
fn deserialize_tz<'de, D>(deserializer: D) -> Result<Tz, D::Error>
where
D: Deserializer<'de>,
{
let s = String::deserialize(deserializer)?;
s.parse::<Tz>()
.map_err(|_| de::Error::custom("failed to parse tz"))
}