Initial commit.
This commit is contained in:
3
.gitignore
vendored
Normal file
3
.gitignore
vendored
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
/target
|
||||||
|
**/*.rs.bk
|
||||||
|
Cargo.lock
|
||||||
8
Cargo.toml
Normal file
8
Cargo.toml
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
[package]
|
||||||
|
name = "dict-tei"
|
||||||
|
version = "0.1.0"
|
||||||
|
authors = ["Anders Olsson <anders.e.olsson@gmail.com>"]
|
||||||
|
edition = "2018"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
quick-xml = "0.13"
|
||||||
477088
data/pol-eng.tei
Normal file
477088
data/pol-eng.tei
Normal file
File diff suppressed because it is too large
Load Diff
169819
data/swe-pol.tei
Normal file
169819
data/swe-pol.tei
Normal file
File diff suppressed because it is too large
Load Diff
143
src/lib.rs
Normal file
143
src/lib.rs
Normal file
@@ -0,0 +1,143 @@
|
|||||||
|
use std::fs::File;
|
||||||
|
use std::io::{BufRead, BufReader};
|
||||||
|
use std::path::{Path, PathBuf};
|
||||||
|
|
||||||
|
use quick_xml::events::Event;
|
||||||
|
use quick_xml::Reader as XmlReader;
|
||||||
|
|
||||||
|
pub struct Reader<B: BufRead> {
|
||||||
|
inner: XmlReader<B>,
|
||||||
|
base_path: Option<PathBuf>,
|
||||||
|
include_handler: Box<dyn Fn(&Path, Option<&Path>) -> Vec<u8>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Reader<BufReader<File>> {
|
||||||
|
pub fn from_path<P>(path: P) -> Reader<BufReader<File>>
|
||||||
|
where
|
||||||
|
P: Into<PathBuf>,
|
||||||
|
{
|
||||||
|
let path = path.into();
|
||||||
|
let inner = XmlReader::from_file(&path).expect("failed to open path");
|
||||||
|
|
||||||
|
Reader {
|
||||||
|
inner: inner,
|
||||||
|
base_path: Some(path),
|
||||||
|
include_handler: Box::new(|_, _| vec![]),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Reader<&'a [u8]> {
|
||||||
|
pub fn from_str(s: &str) -> Reader<&[u8]> {
|
||||||
|
Reader {
|
||||||
|
inner: XmlReader::from_str(s),
|
||||||
|
base_path: None,
|
||||||
|
include_handler: Box::new(|_, _| vec![]),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<B: BufRead> Reader<B> {
|
||||||
|
pub fn parse(&mut self) {
|
||||||
|
enum State {
|
||||||
|
None,
|
||||||
|
Definition(String, Vec<String>),
|
||||||
|
}
|
||||||
|
|
||||||
|
self.inner.trim_text(true);
|
||||||
|
|
||||||
|
let mut buf = Vec::new();
|
||||||
|
|
||||||
|
let mut state = State::None;
|
||||||
|
let mut words = Vec::new();
|
||||||
|
|
||||||
|
loop {
|
||||||
|
state = match (state, self.inner.read_event(&mut buf)) {
|
||||||
|
(State::None, Ok(Event::Start(ref e))) => match e.name() {
|
||||||
|
b"orth" => {
|
||||||
|
let word = self.inner.read_text(e.name(), &mut Vec::new()).unwrap();
|
||||||
|
|
||||||
|
State::Definition(word, Vec::new())
|
||||||
|
}
|
||||||
|
_ => State::None,
|
||||||
|
},
|
||||||
|
(State::Definition(word, mut translations), Ok(Event::Start(e))) => {
|
||||||
|
match e.name() {
|
||||||
|
b"orth" => {
|
||||||
|
words.push((word, translations));
|
||||||
|
|
||||||
|
let word = self.inner.read_text(e.name(), &mut Vec::new()).unwrap();
|
||||||
|
|
||||||
|
State::Definition(word, Vec::new())
|
||||||
|
}
|
||||||
|
b"quote" => {
|
||||||
|
let translation =
|
||||||
|
self.inner.read_text(e.name(), &mut Vec::new()).unwrap();
|
||||||
|
|
||||||
|
translations.push(translation);
|
||||||
|
|
||||||
|
State::Definition(word, translations)
|
||||||
|
}
|
||||||
|
_ => State::Definition(word, translations),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
(_, Err(e)) => panic!(
|
||||||
|
"Error at position {}: {:?}",
|
||||||
|
self.inner.buffer_position(),
|
||||||
|
e
|
||||||
|
),
|
||||||
|
(_, Ok(Event::Eof)) => break,
|
||||||
|
(state, _) => state,
|
||||||
|
};
|
||||||
|
|
||||||
|
buf.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
println!("{:#?}", words);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn base_path<P>(&mut self, path: P)
|
||||||
|
where
|
||||||
|
P: Into<PathBuf>,
|
||||||
|
{
|
||||||
|
self.base_path = Some(path.into());
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn include_handler<F>(&mut self, f: F)
|
||||||
|
where
|
||||||
|
F: Fn(&Path, Option<&Path>) -> Vec<u8> + 'static,
|
||||||
|
{
|
||||||
|
self.include_handler = Box::new(f);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
// #[test]
|
||||||
|
fn it_works() {
|
||||||
|
let fixture = include_str!("../data/pol-eng.tei");
|
||||||
|
|
||||||
|
let mut reader = Reader::from_str(&fixture);
|
||||||
|
|
||||||
|
reader.parse();
|
||||||
|
|
||||||
|
assert!(true == false);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_include_handler() {
|
||||||
|
let mut reader = Reader::from_path("data/pol-eng.tei");
|
||||||
|
|
||||||
|
reader.include_handler(|path, _| {
|
||||||
|
println!("path: {:?}", path);
|
||||||
|
|
||||||
|
vec![]
|
||||||
|
});
|
||||||
|
|
||||||
|
reader.parse();
|
||||||
|
|
||||||
|
assert!(true == false);
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user