Use build script instead.
Map between macro and independen languages for better coverage.
This commit is contained in:
@@ -109,8 +109,8 @@ fn main() -> Result<()> {
|
||||
}
|
||||
}
|
||||
|
||||
let mut languages = Vec::new();
|
||||
let mut matchers = Vec::new();
|
||||
let stdout = io::stdout();
|
||||
let mut lock = stdout.lock();
|
||||
|
||||
for (language, mut results) in entries.into_iter() {
|
||||
results.sort_unstable();
|
||||
@@ -170,96 +170,14 @@ fn main() -> Result<()> {
|
||||
|
||||
regex.push_str(")+$");
|
||||
|
||||
let re = Regex::new(®ex).expect("failed to build regex");
|
||||
let _ = Regex::new(®ex).expect("failed to build regex");
|
||||
|
||||
languages.push(language);
|
||||
matchers.push(re);
|
||||
assert!(!regex.contains('\n'));
|
||||
assert!(!regex.contains('\t'));
|
||||
|
||||
writeln!(lock, "{}\t{}", language.to_639_3(), regex).expect("failed to write to stdout");
|
||||
}
|
||||
|
||||
assert_eq!(languages.len(), matchers.len());
|
||||
|
||||
if languages.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let stdout = io::stdout();
|
||||
let mut lock = stdout.lock();
|
||||
|
||||
writeln!(lock, "use isolang::Language;").expect("failed to write to stdout");
|
||||
writeln!(lock, "use once_cell::sync::Lazy;").expect("failed to write to stdout");
|
||||
writeln!(lock, "use regex::{{Regex, RegexSet}};").expect("failed to write to stdout");
|
||||
|
||||
writeln!(lock).expect("failed to write to stdout");
|
||||
|
||||
writeln!(
|
||||
lock,
|
||||
"pub static LANGUAGES: [Language; {}] = [",
|
||||
languages.len()
|
||||
)
|
||||
.expect("failed to write to stdout");
|
||||
|
||||
for language in &languages {
|
||||
writeln!(lock, " Language::{},", title(language.to_639_3()))
|
||||
.expect("failed to write to stdout");
|
||||
}
|
||||
|
||||
writeln!(lock, "];").expect("failed to write to stdout");
|
||||
|
||||
writeln!(lock).expect("failed to write to stdout");
|
||||
|
||||
writeln!(
|
||||
lock,
|
||||
"pub static REGEX_SET: Lazy<RegexSet> = Lazy::new(|| {{",
|
||||
)
|
||||
.expect("failed to write to stdout");
|
||||
writeln!(lock, " RegexSet::new(vec![",).expect("failed to write to stdout");
|
||||
|
||||
for matcher in &matchers {
|
||||
writeln!(lock, " \"{}\",", matcher).expect("failed to write to stdout");
|
||||
}
|
||||
|
||||
writeln!(lock, " ]).unwrap()",).expect("failed to write to stdout");
|
||||
|
||||
writeln!(lock, "}});").expect("failed to write to stdout");
|
||||
|
||||
writeln!(lock).expect("failed to write to stdout");
|
||||
|
||||
for (language, matcher) in languages.iter().zip(matchers.iter()) {
|
||||
writeln!(
|
||||
lock,
|
||||
"pub static RE_{}: Lazy<Regex> = Lazy::new(|| Regex::new(\"{}\").unwrap());",
|
||||
language.to_639_3().to_ascii_uppercase(),
|
||||
matcher,
|
||||
)
|
||||
.expect("failed to write to stdout");
|
||||
}
|
||||
|
||||
writeln!(lock).expect("failed to write to stdout");
|
||||
|
||||
writeln!(lock, "impl super::LanguageExt for Language {{",).expect("failed to write to stdout");
|
||||
writeln!(
|
||||
lock,
|
||||
" fn is_match(&self, word: &str) -> Option<bool> {{",
|
||||
)
|
||||
.expect("failed to write to stdout");
|
||||
writeln!(lock, " match self {{",).expect("failed to write to stdout");
|
||||
|
||||
for language in &languages {
|
||||
writeln!(
|
||||
lock,
|
||||
" Language::{} => Some(RE_{}.is_match(word)),",
|
||||
title(language.to_639_3()),
|
||||
language.to_639_3().to_ascii_uppercase(),
|
||||
)
|
||||
.expect("failed to write to stdout");
|
||||
}
|
||||
|
||||
writeln!(lock, " _ => None,",).expect("failed to write to stdout");
|
||||
|
||||
writeln!(lock, " }}",).expect("failed to write to stdout");
|
||||
writeln!(lock, " }}",).expect("failed to write to stdout");
|
||||
writeln!(lock, "}}",).expect("failed to write to stdout");
|
||||
|
||||
lock.flush().expect("failed to flush to stdout");
|
||||
|
||||
Ok(())
|
||||
|
||||
@@ -3,6 +3,7 @@ name = "wordlang"
|
||||
version = "0.1.0"
|
||||
authors = ["Anders Olsson <anders.e.olsson@gmail.com>"]
|
||||
edition = "2018"
|
||||
build = "build.rs"
|
||||
|
||||
[dependencies]
|
||||
isolang = "1.0"
|
||||
|
||||
180
wordlang/build.rs
Normal file
180
wordlang/build.rs
Normal file
@@ -0,0 +1,180 @@
|
||||
use std::collections::HashMap;
|
||||
use std::env;
|
||||
use std::error::Error;
|
||||
use std::fs::File;
|
||||
use std::io::{BufRead, BufReader, BufWriter, Write};
|
||||
use std::path::Path;
|
||||
|
||||
fn title(s: &str) -> String {
|
||||
let mut c = s.chars();
|
||||
match c.next() {
|
||||
None => String::new(),
|
||||
Some(f) => f.to_uppercase().chain(c).collect(),
|
||||
}
|
||||
}
|
||||
|
||||
fn read_languages() -> Result<HashMap<String, String>, Box<dyn Error>> {
|
||||
let lines = File::open("languages.txt")
|
||||
.map(BufReader::new)?
|
||||
.lines()
|
||||
.filter_map(Result::ok)
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let mut entries = HashMap::new();
|
||||
|
||||
for line in &lines {
|
||||
let parts = line.splitn(2, '\t').collect::<Vec<_>>();
|
||||
|
||||
let lang = parts[0].to_string();
|
||||
let regex = parts[1].to_string();
|
||||
|
||||
entries.insert(lang, regex);
|
||||
}
|
||||
|
||||
Ok(entries)
|
||||
}
|
||||
|
||||
fn read_mappings() -> Result<(HashMap<String, Vec<String>>, HashMap<String, String>), Box<dyn Error>>
|
||||
{
|
||||
let lines = File::open("iso-639-3-macrolanguages.tab")
|
||||
.map(BufReader::new)?
|
||||
.lines()
|
||||
.filter_map(Result::ok)
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let mut m_to_l = HashMap::new();
|
||||
let mut l_to_m = HashMap::new();
|
||||
|
||||
for line in &lines {
|
||||
let parts = line.splitn(3, '\t').collect::<Vec<_>>();
|
||||
|
||||
let m = parts[0].to_string();
|
||||
let l = parts[1].to_string();
|
||||
|
||||
m_to_l
|
||||
.entry(m.clone())
|
||||
.and_modify(|map: &mut Vec<String>| map.push(l.clone()))
|
||||
.or_insert_with(|| vec![l.clone()]);
|
||||
|
||||
l_to_m.insert(l.clone(), m.clone());
|
||||
}
|
||||
|
||||
Ok((m_to_l, l_to_m))
|
||||
}
|
||||
|
||||
fn main() -> Result<(), Box<dyn Error>> {
|
||||
let languages = read_languages()?;
|
||||
let (m_to_l, l_to_m) = read_mappings()?;
|
||||
|
||||
let path = Path::new(&env::var("OUT_DIR")?).join("languages.rs");
|
||||
let mut file = BufWriter::new(File::create(&path)?);
|
||||
|
||||
writeln!(
|
||||
file,
|
||||
"pub static LANGUAGES: [Language; {}] = [",
|
||||
languages.len()
|
||||
)?;
|
||||
|
||||
for (language, _) in &languages {
|
||||
writeln!(file, " Language::{},", title(language))?;
|
||||
}
|
||||
|
||||
writeln!(file, "];")?;
|
||||
|
||||
writeln!(file)?;
|
||||
|
||||
writeln!(
|
||||
file,
|
||||
"pub static REGEX_SET: Lazy<RegexSet> = Lazy::new(|| {{",
|
||||
)?;
|
||||
|
||||
writeln!(file, " RegexSet::new(vec![",)?;
|
||||
|
||||
for (_, regex) in &languages {
|
||||
writeln!(file, " \"{}\",", regex)?;
|
||||
}
|
||||
|
||||
writeln!(file, " ]).unwrap()",)?;
|
||||
|
||||
writeln!(file, "}});")?;
|
||||
|
||||
writeln!(file)?;
|
||||
|
||||
for (language, regex) in &languages {
|
||||
writeln!(
|
||||
file,
|
||||
"pub static RE_{}: Lazy<Regex> = Lazy::new(|| Regex::new(\"{}\").unwrap());",
|
||||
language.to_ascii_uppercase(),
|
||||
regex,
|
||||
)?;
|
||||
}
|
||||
|
||||
writeln!(file)?;
|
||||
|
||||
writeln!(file, "impl LanguageExt for Language {{",)?;
|
||||
writeln!(
|
||||
file,
|
||||
" fn is_match(&self, word: &str) -> Option<bool> {{",
|
||||
)?;
|
||||
writeln!(file, " match self {{",)?;
|
||||
|
||||
let mut table = HashMap::new();
|
||||
|
||||
for (language, _) in &languages {
|
||||
let mut sub = vec![language];
|
||||
|
||||
if let Some(ls) = m_to_l.get(language) {
|
||||
for l in ls {
|
||||
if !languages.contains_key(l) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if !sub.contains(&l) {
|
||||
sub.push(l);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(m) = l_to_m.get(language) {
|
||||
if languages.contains_key(m) && !sub.contains(&m) {
|
||||
sub.push(m);
|
||||
}
|
||||
}
|
||||
|
||||
table.insert(language, sub);
|
||||
}
|
||||
|
||||
for (language, _) in &languages {
|
||||
if let Some(m) = l_to_m.get(language) {
|
||||
table
|
||||
.entry(m)
|
||||
.and_modify(|sub: &mut Vec<&String>| {
|
||||
if !sub.contains(&language) {
|
||||
sub.push(language);
|
||||
}
|
||||
})
|
||||
.or_insert_with(|| vec![language]);
|
||||
}
|
||||
}
|
||||
|
||||
for (parent, children) in &table {
|
||||
writeln!(file, " Language::{} => {{", title(parent))?;
|
||||
|
||||
let statment = children
|
||||
.iter()
|
||||
.map(|child| format!("RE_{}.is_match(word)", child.to_ascii_uppercase()))
|
||||
.collect::<Vec<_>>()
|
||||
.join(" | ");
|
||||
|
||||
writeln!(file, " Some({})", statment)?;
|
||||
writeln!(file, " }}")?;
|
||||
}
|
||||
|
||||
writeln!(file, " _ => None,",)?;
|
||||
|
||||
writeln!(file, " }}",)?;
|
||||
writeln!(file, " }}",)?;
|
||||
writeln!(file, "}}",)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
454
wordlang/iso-639-3-macrolanguages.tab
Normal file
454
wordlang/iso-639-3-macrolanguages.tab
Normal file
@@ -0,0 +1,454 @@
|
||||
M_Id I_Id I_Status
|
||||
aka fat A
|
||||
aka twi A
|
||||
ara aao A
|
||||
ara abh A
|
||||
ara abv A
|
||||
ara acm A
|
||||
ara acq A
|
||||
ara acw A
|
||||
ara acx A
|
||||
ara acy A
|
||||
ara adf A
|
||||
ara aeb A
|
||||
ara aec A
|
||||
ara afb A
|
||||
ara ajp A
|
||||
ara apc A
|
||||
ara apd A
|
||||
ara arb A
|
||||
ara arq A
|
||||
ara ars A
|
||||
ara ary A
|
||||
ara arz A
|
||||
ara auz A
|
||||
ara avl A
|
||||
ara ayh A
|
||||
ara ayl A
|
||||
ara ayn A
|
||||
ara ayp A
|
||||
ara bbz R
|
||||
ara pga A
|
||||
ara shu A
|
||||
ara ssh A
|
||||
aym ayc A
|
||||
aym ayr A
|
||||
aze azb A
|
||||
aze azj A
|
||||
bal bcc A
|
||||
bal bgn A
|
||||
bal bgp A
|
||||
bik bcl A
|
||||
bik bhk R
|
||||
bik bln A
|
||||
bik bto A
|
||||
bik cts A
|
||||
bik fbl A
|
||||
bik lbl A
|
||||
bik rbl A
|
||||
bik ubl A
|
||||
bnc ebk A
|
||||
bnc lbk A
|
||||
bnc obk A
|
||||
bnc rbk A
|
||||
bnc vbk A
|
||||
bua bxm A
|
||||
bua bxr A
|
||||
bua bxu A
|
||||
chm mhr A
|
||||
chm mrj A
|
||||
cre crj A
|
||||
cre crk A
|
||||
cre crl A
|
||||
cre crm A
|
||||
cre csw A
|
||||
cre cwd A
|
||||
del umu A
|
||||
del unm A
|
||||
den scs A
|
||||
den xsl A
|
||||
din dib A
|
||||
din dik A
|
||||
din dip A
|
||||
din diw A
|
||||
din dks A
|
||||
doi dgo A
|
||||
doi xnr A
|
||||
est ekk A
|
||||
est vro A
|
||||
fas pes A
|
||||
fas prs A
|
||||
ful ffm A
|
||||
ful fub A
|
||||
ful fuc A
|
||||
ful fue A
|
||||
ful fuf A
|
||||
ful fuh A
|
||||
ful fui A
|
||||
ful fuq A
|
||||
ful fuv A
|
||||
gba bdt A
|
||||
gba gbp A
|
||||
gba gbq A
|
||||
gba gmm A
|
||||
gba gso A
|
||||
gba gya A
|
||||
gba mdo R
|
||||
gon esg A
|
||||
gon ggo R
|
||||
gon gno A
|
||||
gon wsg A
|
||||
grb gbo A
|
||||
grb gec A
|
||||
grb grj A
|
||||
grb grv A
|
||||
grb gry A
|
||||
grn gnw A
|
||||
grn gug A
|
||||
grn gui A
|
||||
grn gun A
|
||||
grn nhd A
|
||||
hai hax A
|
||||
hai hdn A
|
||||
hbs bos A
|
||||
hbs cnr A
|
||||
hbs hrv A
|
||||
hbs srp A
|
||||
hmn blu R
|
||||
hmn cqd A
|
||||
hmn hea A
|
||||
hmn hma A
|
||||
hmn hmc A
|
||||
hmn hmd A
|
||||
hmn hme A
|
||||
hmn hmg A
|
||||
hmn hmh A
|
||||
hmn hmi A
|
||||
hmn hmj A
|
||||
hmn hml A
|
||||
hmn hmm A
|
||||
hmn hmp A
|
||||
hmn hmq A
|
||||
hmn hms A
|
||||
hmn hmw A
|
||||
hmn hmy A
|
||||
hmn hmz A
|
||||
hmn hnj A
|
||||
hmn hrm A
|
||||
hmn huj A
|
||||
hmn mmr A
|
||||
hmn muq A
|
||||
hmn mww A
|
||||
hmn sfm A
|
||||
iku ike A
|
||||
iku ikt A
|
||||
ipk esi A
|
||||
ipk esk A
|
||||
jrb ajt A
|
||||
jrb aju A
|
||||
jrb jye A
|
||||
jrb yhd A
|
||||
jrb yud A
|
||||
kau kby A
|
||||
kau knc A
|
||||
kau krt A
|
||||
kln enb A
|
||||
kln eyo A
|
||||
kln niq A
|
||||
kln oki A
|
||||
kln pko A
|
||||
kln sgc A
|
||||
kln spy A
|
||||
kln tec A
|
||||
kln tuy A
|
||||
kok gom A
|
||||
kok knn A
|
||||
kom koi A
|
||||
kom kpv A
|
||||
kon kng A
|
||||
kon kwy A
|
||||
kon ldi A
|
||||
kpe gkp A
|
||||
kpe xpe A
|
||||
kur ckb A
|
||||
kur kmr A
|
||||
kur sdh A
|
||||
lah hnd A
|
||||
lah hno A
|
||||
lah jat A
|
||||
lah phr A
|
||||
lah pmu R
|
||||
lah pnb A
|
||||
lah skr A
|
||||
lah xhe A
|
||||
lav ltg A
|
||||
lav lvs A
|
||||
luy bxk A
|
||||
luy ida A
|
||||
luy lkb A
|
||||
luy lko A
|
||||
luy lks A
|
||||
luy lri A
|
||||
luy lrm A
|
||||
luy lsm A
|
||||
luy lto A
|
||||
luy lts A
|
||||
luy lwg A
|
||||
luy nle A
|
||||
luy nyd A
|
||||
luy rag A
|
||||
man emk A
|
||||
man mku A
|
||||
man mlq A
|
||||
man mnk A
|
||||
man msc A
|
||||
man mwk A
|
||||
man myq R
|
||||
mlg bhr A
|
||||
mlg bjq R
|
||||
mlg bmm A
|
||||
mlg bzc A
|
||||
mlg msh A
|
||||
mlg plt A
|
||||
mlg skg A
|
||||
mlg tdx A
|
||||
mlg tkg A
|
||||
mlg txy A
|
||||
mlg xmv A
|
||||
mlg xmw A
|
||||
mon khk A
|
||||
mon mvf A
|
||||
msa bjn A
|
||||
msa btj A
|
||||
msa bve A
|
||||
msa bvu A
|
||||
msa coa A
|
||||
msa dup A
|
||||
msa hji A
|
||||
msa ind A
|
||||
msa jak A
|
||||
msa jax A
|
||||
msa kvb A
|
||||
msa kvr A
|
||||
msa kxd A
|
||||
msa lce A
|
||||
msa lcf A
|
||||
msa liw A
|
||||
msa max A
|
||||
msa meo A
|
||||
msa mfa A
|
||||
msa mfb A
|
||||
msa min A
|
||||
msa mly R
|
||||
msa mqg A
|
||||
msa msi A
|
||||
msa mui A
|
||||
msa orn A
|
||||
msa ors A
|
||||
msa pel A
|
||||
msa pse A
|
||||
msa tmw A
|
||||
msa urk A
|
||||
msa vkk A
|
||||
msa vkt A
|
||||
msa xmm A
|
||||
msa zlm A
|
||||
msa zmi A
|
||||
msa zsm A
|
||||
mwr dhd A
|
||||
mwr mtr A
|
||||
mwr mve A
|
||||
mwr rwr A
|
||||
mwr swv A
|
||||
mwr wry A
|
||||
nep dty A
|
||||
nep npi A
|
||||
nor nno A
|
||||
nor nob A
|
||||
oji ciw A
|
||||
oji ojb A
|
||||
oji ojc A
|
||||
oji ojg A
|
||||
oji ojs A
|
||||
oji ojw A
|
||||
oji otw A
|
||||
ori ory A
|
||||
ori spv A
|
||||
orm gax A
|
||||
orm gaz A
|
||||
orm hae A
|
||||
orm orc A
|
||||
pus pbt A
|
||||
pus pbu A
|
||||
pus pst A
|
||||
que cqu R
|
||||
que qub A
|
||||
que qud A
|
||||
que quf A
|
||||
que qug A
|
||||
que quh A
|
||||
que quk A
|
||||
que qul A
|
||||
que qup A
|
||||
que qur A
|
||||
que qus A
|
||||
que quw A
|
||||
que qux A
|
||||
que quy A
|
||||
que quz A
|
||||
que qva A
|
||||
que qvc A
|
||||
que qve A
|
||||
que qvh A
|
||||
que qvi A
|
||||
que qvj A
|
||||
que qvl A
|
||||
que qvm A
|
||||
que qvn A
|
||||
que qvo A
|
||||
que qvp A
|
||||
que qvs A
|
||||
que qvw A
|
||||
que qvz A
|
||||
que qwa A
|
||||
que qwc A
|
||||
que qwh A
|
||||
que qws A
|
||||
que qxa A
|
||||
que qxc A
|
||||
que qxh A
|
||||
que qxl A
|
||||
que qxn A
|
||||
que qxo A
|
||||
que qxp A
|
||||
que qxr A
|
||||
que qxt A
|
||||
que qxu A
|
||||
que qxw A
|
||||
raj bgq A
|
||||
raj gda A
|
||||
raj gju A
|
||||
raj hoj A
|
||||
raj mup A
|
||||
raj wbr A
|
||||
rom rmc A
|
||||
rom rmf A
|
||||
rom rml A
|
||||
rom rmn A
|
||||
rom rmo A
|
||||
rom rmw A
|
||||
rom rmy A
|
||||
sqi aae A
|
||||
sqi aat A
|
||||
sqi aln A
|
||||
sqi als A
|
||||
srd sdc A
|
||||
srd sdn A
|
||||
srd src A
|
||||
srd sro A
|
||||
swa swc A
|
||||
swa swh A
|
||||
syr aii A
|
||||
syr cld A
|
||||
tmh taq A
|
||||
tmh thv A
|
||||
tmh thz A
|
||||
tmh ttq A
|
||||
uzb uzn A
|
||||
uzb uzs A
|
||||
yid ydd A
|
||||
yid yih A
|
||||
zap zaa A
|
||||
zap zab A
|
||||
zap zac A
|
||||
zap zad A
|
||||
zap zae A
|
||||
zap zaf A
|
||||
zap zai A
|
||||
zap zam A
|
||||
zap zao A
|
||||
zap zaq A
|
||||
zap zar A
|
||||
zap zas A
|
||||
zap zat A
|
||||
zap zav A
|
||||
zap zaw A
|
||||
zap zax A
|
||||
zap zca A
|
||||
zap zoo A
|
||||
zap zpa A
|
||||
zap zpb A
|
||||
zap zpc A
|
||||
zap zpd A
|
||||
zap zpe A
|
||||
zap zpf A
|
||||
zap zpg A
|
||||
zap zph A
|
||||
zap zpi A
|
||||
zap zpj A
|
||||
zap zpk A
|
||||
zap zpl A
|
||||
zap zpm A
|
||||
zap zpn A
|
||||
zap zpo A
|
||||
zap zpp A
|
||||
zap zpq A
|
||||
zap zpr A
|
||||
zap zps A
|
||||
zap zpt A
|
||||
zap zpu A
|
||||
zap zpv A
|
||||
zap zpw A
|
||||
zap zpx A
|
||||
zap zpy A
|
||||
zap zpz A
|
||||
zap zsr A
|
||||
zap ztc R
|
||||
zap zte A
|
||||
zap ztg A
|
||||
zap ztl A
|
||||
zap ztm A
|
||||
zap ztn A
|
||||
zap ztp A
|
||||
zap ztq A
|
||||
zap zts A
|
||||
zap ztt A
|
||||
zap ztu A
|
||||
zap ztx A
|
||||
zap zty A
|
||||
zha ccx R
|
||||
zha ccy R
|
||||
zha zch A
|
||||
zha zeh A
|
||||
zha zgb A
|
||||
zha zgm A
|
||||
zha zgn A
|
||||
zha zhd A
|
||||
zha zhn A
|
||||
zha zlj A
|
||||
zha zln A
|
||||
zha zlq A
|
||||
zha zqe A
|
||||
zha zyb A
|
||||
zha zyg A
|
||||
zha zyj A
|
||||
zha zyn A
|
||||
zha zzj A
|
||||
zho cdo A
|
||||
zho cjy A
|
||||
zho cmn A
|
||||
zho cnp A
|
||||
zho cpx A
|
||||
zho csp A
|
||||
zho czh A
|
||||
zho czo A
|
||||
zho gan A
|
||||
zho hak A
|
||||
zho hsn A
|
||||
zho lzh A
|
||||
zho mnp A
|
||||
zho nan A
|
||||
zho wuu A
|
||||
zho yue A
|
||||
zza diq A
|
||||
zza kiu A
|
||||
211
wordlang/languages.txt
Normal file
211
wordlang/languages.txt
Normal file
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -1,16 +1,18 @@
|
||||
pub use isolang::Language;
|
||||
use isolang::Language;
|
||||
use once_cell::sync::Lazy;
|
||||
use regex::{Regex, RegexSet};
|
||||
|
||||
mod data;
|
||||
include!(concat!(env!("OUT_DIR"), "/languages.rs"));
|
||||
|
||||
pub trait LanguageExt {
|
||||
fn is_match(&self, word: &str) -> Option<bool>;
|
||||
}
|
||||
|
||||
pub fn matches(word: &str) -> Vec<Language> {
|
||||
data::REGEX_SET
|
||||
REGEX_SET
|
||||
.matches(word)
|
||||
.into_iter()
|
||||
.map(|idx| data::LANGUAGES[idx])
|
||||
.map(|idx| LANGUAGES[idx])
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
|
||||
@@ -26,7 +28,15 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_match() {
|
||||
assert_eq!(Language::Swe.is_match("hello"), Some(true));
|
||||
fn test_ben_is_match() {
|
||||
assert_eq!(Language::Ben.is_match("আফার"), Some(true));
|
||||
assert_eq!(Language::Ben.is_match("\u{09BC}"), Some(true));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_nor_is_match() {
|
||||
assert_eq!(Language::Nor.is_match("solnedgang"), Some(true));
|
||||
assert_eq!(Language::Nno.is_match("solnedgang"), Some(true));
|
||||
assert_eq!(Language::Nob.is_match("solnedgang"), Some(true));
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user