284 lines
8.0 KiB
Rust
284 lines
8.0 KiB
Rust
use std::str;
|
|
|
|
use chrono_tz::Tz;
|
|
use scraper::{ElementRef, Html, Selector};
|
|
use serde::{de, Deserialize, Deserializer, Serialize};
|
|
use tinytemplate::TinyTemplate;
|
|
|
|
use crate::entry::{self, Date, Entry};
|
|
use crate::probe::Probe;
|
|
|
|
#[derive(Serialize)]
|
|
struct Context {
|
|
number: String,
|
|
}
|
|
|
|
#[derive(Debug, Deserialize)]
|
|
pub struct Definition {
|
|
name: String,
|
|
path: String,
|
|
messages: Vec<Field>,
|
|
#[serde(default)]
|
|
history: Vec<Field>,
|
|
#[serde(default)]
|
|
comments: Vec<Comment>,
|
|
}
|
|
|
|
#[derive(Debug, Deserialize)]
|
|
struct Comment {
|
|
#[serde(deserialize_with = "deserialize_selector")]
|
|
selector: Selector,
|
|
#[serde(rename = "date_time")]
|
|
datetime: Option<DateTime>,
|
|
title: Option<Field>,
|
|
message: Option<Field>,
|
|
}
|
|
|
|
#[derive(Debug, Deserialize)]
|
|
#[serde(rename_all = "snake_case")]
|
|
struct DateTime {
|
|
#[serde(flatten)]
|
|
field: Field,
|
|
kind: DateTimeKind,
|
|
format: String,
|
|
#[serde(deserialize_with = "deserialize_tz")]
|
|
tz: Tz,
|
|
}
|
|
|
|
#[derive(Debug, Deserialize)]
|
|
#[serde(rename_all = "snake_case")]
|
|
enum DateTimeKind {
|
|
Date,
|
|
DateTime,
|
|
}
|
|
|
|
#[derive(Debug, Deserialize)]
|
|
#[serde(tag = "type", rename_all = "snake_case")]
|
|
enum Filter {}
|
|
|
|
#[derive(Debug, Deserialize)]
|
|
struct Field {
|
|
#[serde(deserialize_with = "deserialize_selector")]
|
|
selector: Selector,
|
|
#[serde(default)]
|
|
data: Data,
|
|
#[serde(default)]
|
|
filters: Vec<Filter>,
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
enum Data {
|
|
Text,
|
|
InnerHtml,
|
|
Attr { attr: String },
|
|
}
|
|
|
|
impl Data {
|
|
fn extract(&self, element: &ElementRef) -> Option<String> {
|
|
match self {
|
|
Data::Text => Some(
|
|
element
|
|
.text()
|
|
.map(str::trim)
|
|
.filter(|s| !s.is_empty())
|
|
.collect::<Vec<_>>()
|
|
.join(" "),
|
|
),
|
|
Data::InnerHtml => Some(element.inner_html()),
|
|
Data::Attr { attr } => element.value().attr(attr).map(|data| data.to_string()),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl Default for Data {
|
|
fn default() -> Self {
|
|
Data::Text
|
|
}
|
|
}
|
|
|
|
impl<'de> Deserialize<'de> for Data {
|
|
fn deserialize<D>(deserializer: D) -> Result<Data, D::Error>
|
|
where
|
|
D: Deserializer<'de>,
|
|
{
|
|
use std::fmt;
|
|
|
|
use serde::de::{self, Visitor};
|
|
|
|
struct StrVisitor;
|
|
|
|
impl<'de> Visitor<'de> for StrVisitor {
|
|
type Value = Data;
|
|
|
|
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
|
|
formatter.write_str("an str")
|
|
}
|
|
|
|
fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
|
|
where
|
|
E: de::Error,
|
|
{
|
|
match value {
|
|
"text" => Ok(Data::Text),
|
|
"inner_html" => Ok(Data::InnerHtml),
|
|
s if s.starts_with("attr:") => {
|
|
let attr = s.splitn(2, ":").nth(1).unwrap();
|
|
|
|
Ok(Data::Attr {
|
|
attr: attr.to_string(),
|
|
})
|
|
}
|
|
_ => Err(E::custom(format!("unknown data type: {}", value))),
|
|
}
|
|
}
|
|
}
|
|
|
|
deserializer.deserialize_str(StrVisitor)
|
|
}
|
|
}
|
|
|
|
impl Probe for Definition {
|
|
fn provider(&self) -> &str {
|
|
&self.name
|
|
}
|
|
|
|
fn uri(&self, number: &str) -> String {
|
|
let mut tt = TinyTemplate::new();
|
|
|
|
tt.add_template("path", &self.path)
|
|
.expect("failed to add path template");
|
|
|
|
let context = Context {
|
|
number: number.to_string(),
|
|
};
|
|
|
|
tt.render("path", &context)
|
|
.expect("failed to render path template")
|
|
}
|
|
|
|
fn fetch(&self, number: &str) -> Result<String, ()> {
|
|
reqwest::get(&self.uri(number))
|
|
.map_err(|_| ())?
|
|
.text()
|
|
.map_err(|_| ())
|
|
}
|
|
|
|
fn parse(&self, data: &str) -> Result<Entry, ()> {
|
|
let html = Html::parse_document(data);
|
|
|
|
let mut messages = Vec::new();
|
|
let mut history = Vec::new();
|
|
let mut comments = Vec::new();
|
|
|
|
for field in &self.messages {
|
|
for element in html.select(&field.selector) {
|
|
if let Some(data) = field.data.extract(&element) {
|
|
messages.push(data);
|
|
}
|
|
}
|
|
}
|
|
|
|
for field in &self.history {
|
|
for element in html.select(&field.selector) {
|
|
if let Some(data) = field.data.extract(&element) {
|
|
history.push(data);
|
|
}
|
|
}
|
|
}
|
|
|
|
for comment in &self.comments {
|
|
for comments_element in html.select(&comment.selector) {
|
|
let mut datetime: Option<Date> = None;
|
|
let mut title: Option<String> = None;
|
|
let mut message: Option<String> = None;
|
|
|
|
if let Some(ref datetime_field) = comment.datetime {
|
|
for comment_element in comments_element.select(&datetime_field.field.selector) {
|
|
if let Some(data) = datetime_field.field.data.extract(&comment_element) {
|
|
// for filter in &datetime_field.field.filters {}
|
|
|
|
let data = match datetime_field.kind {
|
|
DateTimeKind::Date => Date::date_from(
|
|
datetime_field.tz,
|
|
&data,
|
|
&datetime_field.format,
|
|
)
|
|
.expect("failed to parse date"),
|
|
DateTimeKind::DateTime => Date::datetime_from(
|
|
datetime_field.tz,
|
|
&data,
|
|
&datetime_field.format,
|
|
)
|
|
.expect("failed to parse date time"),
|
|
};
|
|
|
|
datetime = Some(data);
|
|
}
|
|
}
|
|
}
|
|
|
|
if let Some(ref title_field) = comment.title {
|
|
for comment_element in comments_element.select(&title_field.selector) {
|
|
if let Some(data) = title_field
|
|
.data
|
|
.extract(&comment_element)
|
|
.filter(|data| !data.is_empty())
|
|
{
|
|
// for filter in &message_field.filters {}
|
|
|
|
title = Some(data);
|
|
}
|
|
}
|
|
}
|
|
|
|
if let Some(ref message_field) = comment.message {
|
|
for comment_element in comments_element.select(&message_field.selector) {
|
|
if let Some(data) = message_field.data.extract(&comment_element) {
|
|
// for filter in &message_field.filters {}
|
|
|
|
message = Some(data);
|
|
}
|
|
}
|
|
}
|
|
|
|
if datetime.is_some() && message.is_some() {
|
|
comments.push(entry::Comment {
|
|
datetime: datetime.unwrap(),
|
|
title,
|
|
message: message.unwrap(),
|
|
});
|
|
}
|
|
}
|
|
}
|
|
|
|
if !messages.is_empty() || !history.is_empty() || !comments.is_empty() {
|
|
Ok(Entry {
|
|
messages,
|
|
history,
|
|
comments,
|
|
})
|
|
} else {
|
|
Err(())
|
|
}
|
|
}
|
|
}
|
|
|
|
fn deserialize_selector<'de, D>(deserializer: D) -> Result<Selector, D::Error>
|
|
where
|
|
D: Deserializer<'de>,
|
|
{
|
|
let s = String::deserialize(deserializer)?;
|
|
|
|
Selector::parse(&s).map_err(|_| de::Error::custom("failed to parse selector"))
|
|
}
|
|
|
|
fn deserialize_tz<'de, D>(deserializer: D) -> Result<Tz, D::Error>
|
|
where
|
|
D: Deserializer<'de>,
|
|
{
|
|
let s = String::deserialize(deserializer)?;
|
|
|
|
s.parse::<Tz>()
|
|
.map_err(|_| de::Error::custom("failed to parse tz"))
|
|
}
|