Definition support.
This commit is contained in:
283
src/definition.rs
Normal file
283
src/definition.rs
Normal file
@@ -0,0 +1,283 @@
|
||||
use std::str;
|
||||
|
||||
use chrono_tz::Tz;
|
||||
use scraper::{ElementRef, Html, Selector};
|
||||
use serde::{de, Deserialize, Deserializer, Serialize};
|
||||
use tinytemplate::TinyTemplate;
|
||||
|
||||
use crate::entry::{self, Date, Entry};
|
||||
use crate::probe::Probe;
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct Context {
|
||||
number: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct Definition {
|
||||
name: String,
|
||||
path: String,
|
||||
messages: Vec<Field>,
|
||||
#[serde(default)]
|
||||
history: Vec<Field>,
|
||||
#[serde(default)]
|
||||
comments: Vec<Comment>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct Comment {
|
||||
#[serde(deserialize_with = "deserialize_selector")]
|
||||
selector: Selector,
|
||||
#[serde(rename = "date_time")]
|
||||
datetime: Option<DateTime>,
|
||||
title: Option<Field>,
|
||||
message: Option<Field>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
struct DateTime {
|
||||
#[serde(flatten)]
|
||||
field: Field,
|
||||
kind: DateTimeKind,
|
||||
format: String,
|
||||
#[serde(deserialize_with = "deserialize_tz")]
|
||||
tz: Tz,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
enum DateTimeKind {
|
||||
Date,
|
||||
DateTime,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
#[serde(tag = "type", rename_all = "snake_case")]
|
||||
enum Filter {}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct Field {
|
||||
#[serde(deserialize_with = "deserialize_selector")]
|
||||
selector: Selector,
|
||||
#[serde(default)]
|
||||
data: Data,
|
||||
#[serde(default)]
|
||||
filters: Vec<Filter>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
enum Data {
|
||||
Text,
|
||||
InnerHtml,
|
||||
Attr { attr: String },
|
||||
}
|
||||
|
||||
impl Data {
|
||||
fn extract(&self, element: &ElementRef) -> Option<String> {
|
||||
match self {
|
||||
Data::Text => Some(
|
||||
element
|
||||
.text()
|
||||
.map(str::trim)
|
||||
.filter(|s| !s.is_empty())
|
||||
.collect::<Vec<_>>()
|
||||
.join(" "),
|
||||
),
|
||||
Data::InnerHtml => Some(element.inner_html()),
|
||||
Data::Attr { attr } => element.value().attr(attr).map(|data| data.to_string()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for Data {
|
||||
fn default() -> Self {
|
||||
Data::Text
|
||||
}
|
||||
}
|
||||
|
||||
impl<'de> Deserialize<'de> for Data {
|
||||
fn deserialize<D>(deserializer: D) -> Result<Data, D::Error>
|
||||
where
|
||||
D: Deserializer<'de>,
|
||||
{
|
||||
use std::fmt;
|
||||
|
||||
use serde::de::{self, Visitor};
|
||||
|
||||
struct StrVisitor;
|
||||
|
||||
impl<'de> Visitor<'de> for StrVisitor {
|
||||
type Value = Data;
|
||||
|
||||
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
|
||||
formatter.write_str("an str")
|
||||
}
|
||||
|
||||
fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
|
||||
where
|
||||
E: de::Error,
|
||||
{
|
||||
match value {
|
||||
"text" => Ok(Data::Text),
|
||||
"inner_html" => Ok(Data::InnerHtml),
|
||||
s if s.starts_with("attr:") => {
|
||||
let attr = s.splitn(2, ":").nth(1).unwrap();
|
||||
|
||||
Ok(Data::Attr {
|
||||
attr: attr.to_string(),
|
||||
})
|
||||
}
|
||||
_ => Err(E::custom(format!("unknown data type: {}", value))),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
deserializer.deserialize_str(StrVisitor)
|
||||
}
|
||||
}
|
||||
|
||||
impl Probe for Definition {
|
||||
fn provider(&self) -> &str {
|
||||
&self.name
|
||||
}
|
||||
|
||||
fn uri(&self, number: &str) -> String {
|
||||
let mut tt = TinyTemplate::new();
|
||||
|
||||
tt.add_template("path", &self.path)
|
||||
.expect("failed to add path template");
|
||||
|
||||
let context = Context {
|
||||
number: number.to_string(),
|
||||
};
|
||||
|
||||
tt.render("path", &context)
|
||||
.expect("failed to render path template")
|
||||
}
|
||||
|
||||
fn fetch(&self, number: &str) -> Result<String, ()> {
|
||||
reqwest::get(&self.uri(number))
|
||||
.map_err(|_| ())?
|
||||
.text()
|
||||
.map_err(|_| ())
|
||||
}
|
||||
|
||||
fn parse(&self, data: &str) -> Result<Entry, ()> {
|
||||
let html = Html::parse_document(data);
|
||||
|
||||
let mut messages = Vec::new();
|
||||
let mut history = Vec::new();
|
||||
let mut comments = Vec::new();
|
||||
|
||||
for field in &self.messages {
|
||||
for element in html.select(&field.selector) {
|
||||
if let Some(data) = field.data.extract(&element) {
|
||||
messages.push(data);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for field in &self.history {
|
||||
for element in html.select(&field.selector) {
|
||||
if let Some(data) = field.data.extract(&element) {
|
||||
history.push(data);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for comment in &self.comments {
|
||||
for comments_element in html.select(&comment.selector) {
|
||||
let mut datetime: Option<Date> = None;
|
||||
let mut title: Option<String> = None;
|
||||
let mut message: Option<String> = None;
|
||||
|
||||
if let Some(ref datetime_field) = comment.datetime {
|
||||
for comment_element in comments_element.select(&datetime_field.field.selector) {
|
||||
if let Some(data) = datetime_field.field.data.extract(&comment_element) {
|
||||
// for filter in &datetime_field.field.filters {}
|
||||
|
||||
let data = match datetime_field.kind {
|
||||
DateTimeKind::Date => Date::date_from(
|
||||
datetime_field.tz,
|
||||
&data,
|
||||
&datetime_field.format,
|
||||
)
|
||||
.expect("failed to parse date"),
|
||||
DateTimeKind::DateTime => Date::datetime_from(
|
||||
datetime_field.tz,
|
||||
&data,
|
||||
&datetime_field.format,
|
||||
)
|
||||
.expect("failed to parse date time"),
|
||||
};
|
||||
|
||||
datetime = Some(data);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(ref title_field) = comment.title {
|
||||
for comment_element in comments_element.select(&title_field.selector) {
|
||||
if let Some(data) = title_field
|
||||
.data
|
||||
.extract(&comment_element)
|
||||
.filter(|data| !data.is_empty())
|
||||
{
|
||||
// for filter in &message_field.filters {}
|
||||
|
||||
title = Some(data);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(ref message_field) = comment.message {
|
||||
for comment_element in comments_element.select(&message_field.selector) {
|
||||
if let Some(data) = message_field.data.extract(&comment_element) {
|
||||
// for filter in &message_field.filters {}
|
||||
|
||||
message = Some(data);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if datetime.is_some() && message.is_some() {
|
||||
comments.push(entry::Comment {
|
||||
datetime: datetime.unwrap(),
|
||||
title,
|
||||
message: message.unwrap(),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !messages.is_empty() || !history.is_empty() || !comments.is_empty() {
|
||||
Ok(Entry {
|
||||
messages,
|
||||
history,
|
||||
comments,
|
||||
})
|
||||
} else {
|
||||
Err(())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn deserialize_selector<'de, D>(deserializer: D) -> Result<Selector, D::Error>
|
||||
where
|
||||
D: Deserializer<'de>,
|
||||
{
|
||||
let s = String::deserialize(deserializer)?;
|
||||
|
||||
Selector::parse(&s).map_err(|_| de::Error::custom("failed to parse selector"))
|
||||
}
|
||||
|
||||
fn deserialize_tz<'de, D>(deserializer: D) -> Result<Tz, D::Error>
|
||||
where
|
||||
D: Deserializer<'de>,
|
||||
{
|
||||
let s = String::deserialize(deserializer)?;
|
||||
|
||||
s.parse::<Tz>()
|
||||
.map_err(|_| de::Error::custom("failed to parse tz"))
|
||||
}
|
||||
Reference in New Issue
Block a user