From baf2f93b3002c2a0769bbd53f37d845c7717d95b Mon Sep 17 00:00:00 2001 From: JJ Date: Wed, 4 Jan 2023 15:56:30 -0800 Subject: pretty terrible printing implementation --- .cargo/config.toml | 1 + Cargo.lock | 5 +++ Cargo.toml | 2 ++ src/lib.rs | 95 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- 4 files changed, 101 insertions(+), 2 deletions(-) diff --git a/.cargo/config.toml b/.cargo/config.toml index 1fe801c..a32fc23 100644 --- a/.cargo/config.toml +++ b/.cargo/config.toml @@ -3,3 +3,4 @@ [env] index_path = "data/enwiktionary-20221220-pages-articles-multistream-index.txt.bz2" dictionary_path = "data/enwiktionary-20221220-pages-articles-multistream.xml.bz2" +RUST_BACKTRACE = "1" diff --git a/Cargo.lock b/Cargo.lock index 71adf65..299db1b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -34,6 +34,7 @@ name = "dictionarium" version = "0.1.0" dependencies = [ "bzip2", + "parse_wiki_text", ] [[package]] @@ -42,6 +43,10 @@ version = "0.2.139" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "201de327520df007757c1f0adce6e827fe8562fbc28bfd9c15571c66ca1f5f79" +[[package]] +name = "parse_wiki_text" +version = "0.1.5" + [[package]] name = "pkg-config" version = "0.3.26" diff --git a/Cargo.toml b/Cargo.toml index f743eff..2fe11fd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,3 +7,5 @@ edition = "2021" [dependencies] bzip2 = "0.4.3" +parse_wiki_text = "0.1.5" +# peg = "0.8.1" diff --git a/src/lib.rs b/src/lib.rs index 96116fe..569088c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,10 +1,12 @@ #![allow(non_upper_case_globals)] #![allow(unused_variables)] +#![feature(let_chains)] use std::io::*; use std::fs::File; // note that bufread::MultiBzDecoder is _distinct_ from read::MultiBzDecoder use bzip2::bufread::*; +use parse_wiki_text::*; // https://github.com/rust-lang/rfcs/issues/1349 const version: &str = env!("CARGO_PKG_VERSION"); @@ -86,8 +88,97 @@ fn correct(word: &str) -> Option<&str> { // but it's fine because we're working with MUCH smaller strings lol fn display(definition: String) { // todo: implement - for line in definition.lines() { - println!("{}", line); + let definition = Configuration::default().parse(&definition); + // dbg!(definition.warnings); + // dbg!(&definition.nodes); + + // this is really quite terrible code + let mut inside_heading = false; + let mut english_heading = false; + for node in &definition.nodes { + if !inside_heading { + if let Node::Heading { nodes, level, .. } = node { + if *level == 2 { + if let Node::Text { value, .. } = nodes.get(0).unwrap() { // :-( + if *value == "English" { + inside_heading = true; + english_heading = true; + println!("{}", value); + } + } + } + } + } else { + if let Node::Heading { nodes, level, .. } = node && *level == 2 { + inside_heading = false; + } else { + display_ii(node); + } + } + } + if !english_heading { + assert!(inside_heading == false); + for node in &definition.nodes { + if !inside_heading { + if let Node::Heading { nodes, level, .. } = node { + if *level == 2 { + if let Node::Text { value, .. } = nodes.get(0).unwrap() { + inside_heading = true; + println!("{}", value); + } + } + } + } else { + if let Node::Heading { nodes, level, .. } = node && *level == 2 { + inside_heading = false; + } else { + display_ii(node); + } + } + } + } +} + +// no overloading?? O_O +fn display_ii(node: &Node) { + match node { + Node::CharacterEntity { character, .. } => print!("{}", character), + Node::Text { value, .. } => print!("{}", value), + Node::Link { text, target, .. } => { + assert!(text.len() == 1); + display_ii(text.get(0).unwrap()); + }, + + Node::Heading { nodes, level, .. } => { + assert!(nodes.len() == 1); + display_ii(nodes.get(0).unwrap()); + println!(); + }, + Node::HorizontalDivider { end, start } => println!("\n------"), + Node::ParagraphBreak { .. } => print!(" "), + Node::Template { name, parameters, .. } => { + for parameter in parameters.iter().rev() { + if let Some(name) = ¶meter.name { + // print!("("); + // for value in ¶meter.value { + // display_ii(&value); + // } + // print!(") "); + } else { + for value in ¶meter.value { + display_ii(&value); + } + break; + } + } + } + Node::OrderedList { items, .. } => (), + Node::UnorderedList { items, .. } => (), + + Node::Preformatted { nodes, .. } => (), + Node::Category { .. } => (), + Node::Italic { .. } => (), + _ => todo!() } } -- cgit v1.2.3-70-g09d2