diff --git a/Cargo.lock b/Cargo.lock index 7c13c8f..a21c043 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3,6 +3,14 @@ name = "adler32" version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "aho-corasick" +version = "0.6.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "memchr 2.1.2 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "arrayvec" version = "0.4.9" @@ -11,6 +19,33 @@ dependencies = [ "nodrop 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "autocfg" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "backtrace" +version = "0.3.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "autocfg 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", + "backtrace-sys 0.1.26 (registry+https://github.com/rust-lang/crates.io-index)", + "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.45 (registry+https://github.com/rust-lang/crates.io-index)", + "rustc-demangle 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "backtrace-sys" +version = "0.1.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "cc 1.0.26 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.45 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "base64" version = "0.9.3" @@ -185,6 +220,26 @@ dependencies = [ "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "failure" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "backtrace 0.3.13 (registry+https://github.com/rust-lang/crates.io-index)", + "failure_derive 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "failure_derive" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "proc-macro2 0.4.24 (registry+https://github.com/rust-lang/crates.io-index)", + "quote 0.6.10 (registry+https://github.com/rust-lang/crates.io-index)", + "syn 0.15.23 (registry+https://github.com/rust-lang/crates.io-index)", + "synstructure 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "fnv" version = "1.0.6" @@ -444,6 +499,16 @@ name = "matches" version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "memchr" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.45 (registry+https://github.com/rust-lang/crates.io-index)", + "version_check 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "memoffset" version = "0.2.1" @@ -774,6 +839,26 @@ name = "redox_syscall" version = "0.1.44" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "regex" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "aho-corasick 0.6.9 (registry+https://github.com/rust-lang/crates.io-index)", + "memchr 2.1.2 (registry+https://github.com/rust-lang/crates.io-index)", + "regex-syntax 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)", + "thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", + "utf8-ranges 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "regex-syntax" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "ucd-util 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "remove_dir_all" version = "0.5.1" @@ -808,6 +893,11 @@ dependencies = [ "uuid 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "rustc-demangle" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "rustc_version" version = "0.2.3" @@ -1030,6 +1120,17 @@ dependencies = [ "unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "synstructure" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "proc-macro2 0.4.24 (registry+https://github.com/rust-lang/crates.io-index)", + "quote 0.6.10 (registry+https://github.com/rust-lang/crates.io-index)", + "syn 0.15.23 (registry+https://github.com/rust-lang/crates.io-index)", + "unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "tempfile" version = "3.0.5" @@ -1058,6 +1159,14 @@ name = "thin-slice" version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "thread_local" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "time" version = "0.1.41" @@ -1228,6 +1337,33 @@ name = "try-lock" version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "ucd-util" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "unhtml" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "failure 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", + "failure_derive 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", + "scraper 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "unhtml_derive" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "proc-macro2 0.4.24 (registry+https://github.com/rust-lang/crates.io-index)", + "quote 0.6.10 (registry+https://github.com/rust-lang/crates.io-index)", + "syn 0.15.23 (registry+https://github.com/rust-lang/crates.io-index)", + "synstructure 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)", + "unhtml 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "unicase" version = "1.4.2" @@ -1290,6 +1426,11 @@ name = "utf-8" version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "utf8-ranges" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "uuid" version = "0.7.1" @@ -1329,10 +1470,14 @@ version = "0.1.0" dependencies = [ "bincode 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", "directories 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "reqwest 0.9.5 (registry+https://github.com/rust-lang/crates.io-index)", "scraper 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", "serde 1.0.82 (registry+https://github.com/rust-lang/crates.io-index)", "serde_derive 1.0.82 (registry+https://github.com/rust-lang/crates.io-index)", + "serde_json 1.0.33 (registry+https://github.com/rust-lang/crates.io-index)", + "unhtml 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "unhtml_derive 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -1375,7 +1520,11 @@ dependencies = [ [metadata] "checksum adler32 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "7e522997b529f05601e05166c07ed17789691f562762c7f3b987263d2dedee5c" +"checksum aho-corasick 0.6.9 (registry+https://github.com/rust-lang/crates.io-index)" = "1e9a933f4e58658d7b12defcf96dc5c720f20832deebe3e0a19efd3b6aaeeb9e" "checksum arrayvec 0.4.9 (registry+https://github.com/rust-lang/crates.io-index)" = "d18513977c2d8261c448511c5c53dc66b26dfccbc3d4446672dea1e71a7d8a26" +"checksum autocfg 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4e5f34df7a019573fb8bdc7e24a2bfebe51a2a1d6bfdbaeccedb3c41fc574727" +"checksum backtrace 0.3.13 (registry+https://github.com/rust-lang/crates.io-index)" = "b5b493b66e03090ebc4343eb02f94ff944e0cbc9ac6571491d170ba026741eb5" +"checksum backtrace-sys 0.1.26 (registry+https://github.com/rust-lang/crates.io-index)" = "3fcce89e5ad5c8949caa9434501f7b55415b3e7ad5270cb88c75a8d35e8f1279" "checksum base64 0.9.3 (registry+https://github.com/rust-lang/crates.io-index)" = "489d6c0ed21b11d038c31b6ceccca973e65d73ba3bd8ecb9a2babf5546164643" "checksum bincode 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "9f2fb9e29e72fd6bc12071533d5dc7664cb01480c59406f656d7ac25c7bd8ff7" "checksum bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "228047a76f468627ca71776ecdebd732a3423081fcf5125585bcd7c49886ce12" @@ -1397,6 +1546,8 @@ dependencies = [ "checksum dtoa-short 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "59020b8513b76630c49d918c33db9f4c91638e7d3404a28084083b87e33f76f2" "checksum ego-tree 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "9733f6ada1734cb25b4033b2855ec78feb267971a2dd76012974906ba8780074" "checksum encoding_rs 0.8.13 (registry+https://github.com/rust-lang/crates.io-index)" = "1a8fa54e6689eb2549c4efed8d00d7f3b2b994a064555b0e8df4ae3764bcc4be" +"checksum failure 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "6dd377bcc1b1b7ce911967e3ec24fa19c3224394ec05b54aa7b083d498341ac7" +"checksum failure_derive 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "64c2d913fe8ed3b6c6518eedf4538255b989945c14c2a7d5cbff62a5e2120596" "checksum fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "2fad85553e09a6f881f739c29f0b00b0f01357c743266d478b68951ce23285f3" "checksum foreign-types 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" "checksum foreign-types-shared 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" @@ -1427,6 +1578,7 @@ dependencies = [ "checksum mac 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" "checksum markup5ever 0.7.5 (registry+https://github.com/rust-lang/crates.io-index)" = "897636f9850c3eef4905a5540683ed53dc9393860f0846cab2c2ddf9939862ff" "checksum matches 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "7ffc5c5338469d4d3ea17d269fa8ea3512ad247247c30bd2df69e68309ed0a08" +"checksum memchr 2.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "db4c41318937f6e76648f42826b1d9ade5c09cafb5aef7e351240a70f39206e9" "checksum memoffset 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "0f9dc261e2b62d7a622bf416ea3c5245cdd5d9a7fcc428c0d06804dfce1775b3" "checksum mime 0.3.12 (registry+https://github.com/rust-lang/crates.io-index)" = "0a907b83e7b9e987032439a387e187119cddafc92d5c2aaeb1d92580a793f630" "checksum mime_guess 2.0.0-alpha.6 (registry+https://github.com/rust-lang/crates.io-index)" = "30de2e4613efcba1ec63d8133f344076952090c122992a903359be5a4f99c3ed" @@ -1464,8 +1616,11 @@ dependencies = [ "checksum rand_pcg 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "086bd09a33c7044e56bb44d5bdde5a60e7f119a9e95b0775f545de759a32fe05" "checksum rand_xorshift 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "effa3fcaa47e18db002bdde6060944b6d2f9cfd8db471c30e873448ad9187be3" "checksum redox_syscall 0.1.44 (registry+https://github.com/rust-lang/crates.io-index)" = "a84bcd297b87a545980a2d25a0beb72a1f490c31f0a9fde52fca35bfbb1ceb70" +"checksum regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "37e7cbbd370869ce2e8dff25c7018702d10b21a20ef7135316f8daecd6c25b7f" +"checksum regex-syntax 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)" = "4e47a2ed29da7a9e1960e1639e7a982e6edc6d49be308a3b02daf511504a16d1" "checksum remove_dir_all 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3488ba1b9a2084d38645c4c08276a1752dcbf2c7130d74f1569681ad5d2799c5" "checksum reqwest 0.9.5 (registry+https://github.com/rust-lang/crates.io-index)" = "ab52e462d1e15891441aeefadff68bdea005174328ce3da0a314f2ad313ec837" +"checksum rustc-demangle 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "01b90379b8664dd83460d59bdc5dd1fd3172b8913788db483ed1325171eab2f7" "checksum rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a" "checksum ryu 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "eb9e9b8cde282a9fe6a42dd4681319bfb63f121b8a8ee9439c6f4107e58a46f7" "checksum safemem 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8dca453248a96cb0749e36ccdfe2b0b4e54a61bfef89fb97ec621eb8e0a93dd9" @@ -1492,9 +1647,11 @@ dependencies = [ "checksum string_cache_shared 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b1884d1bc09741d466d9b14e6d37ac89d6909cbcac41dd9ae982d4d063bbedfc" "checksum syn 0.14.9 (registry+https://github.com/rust-lang/crates.io-index)" = "261ae9ecaa397c42b960649561949d69311f08eeaea86a65696e6e46517cf741" "checksum syn 0.15.23 (registry+https://github.com/rust-lang/crates.io-index)" = "9545a6a093a3f0bd59adb472700acc08cad3776f860f16a897dfce8c88721cbc" +"checksum synstructure 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)" = "73687139bf99285483c96ac0add482c3776528beac1d97d444f6e91f203a2015" "checksum tempfile 3.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "7e91405c14320e5c79b3d148e1c86f40749a36e490642202a31689cb1a3452b2" "checksum tendril 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "707feda9f2582d5d680d733e38755547a3e8fb471e7ba11452ecfd9ce93a5d3b" "checksum thin-slice 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "8eaa81235c7058867fa8c0e7314f33dcce9c215f535d1913822a2b3f5e289f3c" +"checksum thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c6b53e329000edc2b34dbe8545fd20e55a333362d0a321909685a19bd28c3f1b" "checksum time 0.1.41 (registry+https://github.com/rust-lang/crates.io-index)" = "847da467bf0db05882a9e2375934a8a55cffdc9db0d128af1518200260ba1f6c" "checksum tokio 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)" = "a7817d4c98cc5be21360b3b37d6036fe9b7aefa5b7a201b7b16ff33423822f7d" "checksum tokio-codec 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "5c501eceaf96f0e1793cf26beb63da3d11c738c4a943fdf3746d81d64684c39f" @@ -1509,6 +1666,9 @@ dependencies = [ "checksum tokio-udp 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "66268575b80f4a4a710ef83d087fdfeeabdce9b74c797535fbac18a2cb906e92" "checksum tokio-uds 0.2.4 (registry+https://github.com/rust-lang/crates.io-index)" = "99ce87382f6c1a24b513a72c048b2c8efe66cb5161c9061d00bee510f08dc168" "checksum try-lock 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "e604eb7b43c06650e854be16a2a03155743d3752dd1c943f6829e26b7a36e382" +"checksum ucd-util 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "535c204ee4d8434478593480b8f86ab45ec9aae0e83c568ca81abf0fd0e88f86" +"checksum unhtml 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "15c2aa7fee745952e29bed92d923307d0025ebc2fea82a5128839b3ee69c57c7" +"checksum unhtml_derive 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c413ceac66ab2553890887dbce70183d5bfb16079c86b364e2eafcd0864cd2a6" "checksum unicase 1.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7f4765f83163b74f957c797ad9253caf97f103fb064d3999aea9568d09fc8a33" "checksum unicase 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "9d3218ea14b4edcaccfa0df0a64a3792a2c32cc706f1b336e48867f9d3147f90" "checksum unicode-bidi 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "49f2bd0c6468a8230e1db229cff8029217cf623c767ea5d60bfbd42729ea54d5" @@ -1518,6 +1678,7 @@ dependencies = [ "checksum unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "382810877fe448991dfc7f0dd6e3ae5d58088fd0ea5e35189655f84e6814fa56" "checksum url 1.7.2 (registry+https://github.com/rust-lang/crates.io-index)" = "dd4e7c0d531266369519a4aa4f399d748bd37043b00bde1e4ff1f60a120b355a" "checksum utf-8 0.7.4 (registry+https://github.com/rust-lang/crates.io-index)" = "bab35f71693630bb1953dce0f2bcd780e7cde025027124a202ac08a45ba25141" +"checksum utf8-ranges 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "796f7e48bef87609f7ade7e06495a87d5cd06c7866e6a5cbfceffc558a243737" "checksum uuid 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)" = "dab5c5526c5caa3d106653401a267fed923e7046f35895ffcb5ca42db64942e6" "checksum vcpkg 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "def296d3eb3b12371b2c7d0e83bfe1403e4db2d7a0bba324a12b21c4ee13143d" "checksum version_check 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "914b1a6776c4c929a602fafd8bc742e06365d4bcbe48c30f9cca5824f70dc9dd" diff --git a/Cargo.toml b/Cargo.toml index b903518..3bdc8f3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,9 +5,13 @@ authors = ["Anders Olsson "] edition = "2018" [dependencies] +bincode = "1.0" directories = "1.0" +regex = "1.1" reqwest = "0.9" scraper = "0.9" serde = "1.0" serde_derive = "1.0" -bincode = "1.0" +serde_json = "1.0" +unhtml = "0.3" +unhtml_derive = "0.3" diff --git a/src/probe/hitta.rs b/src/probe/hitta.rs index 08dc3f2..6fed71b 100644 --- a/src/probe/hitta.rs +++ b/src/probe/hitta.rs @@ -1,8 +1,42 @@ -use scraper::{Html, Selector}; +use regex::Regex; +use serde_derive::Deserialize; use crate::context::Context; use crate::probe::Probe; +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +struct Data { + props: Props, +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +struct Props { + page_props: PageProps, +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +struct PageProps { + phone_data: PhoneData, +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +struct PhoneData { + alternative_formats: Vec, + clean_number: String, + comments: Vec, +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +struct Comment { + comment: String, + timestamp: u64, +} + // https://www.hitta.se/vem-ringde/{} pub struct Hitta; @@ -20,22 +54,27 @@ impl Probe for Hitta { ctx.cache_set("hitta", &number, body.as_bytes()) .expect("wut?! why not?!"); - let document = Html::parse_document(&body); + let re = Regex::new(r#"