From 6aeacb7abca05bac190d093cbefc3a88c46052b6 Mon Sep 17 00:00:00 2001 From: JJ Date: Sat, 17 Jun 2023 14:56:58 -0700 Subject: split lib.rs functionality into separate modules --- src/correct.rs | 4 ++ src/display.rs | 67 +++++++++++++++++++++++ src/lib.rs | 165 ++++----------------------------------------------------- src/lookup.rs | 68 ++++++++++++++++++++++++ src/main.rs | 7 ++- src/state.rs | 14 +++++ 6 files changed, 167 insertions(+), 158 deletions(-) create mode 100644 src/correct.rs create mode 100644 src/display.rs create mode 100644 src/lookup.rs create mode 100644 src/state.rs diff --git a/src/correct.rs b/src/correct.rs new file mode 100644 index 0000000..333aba5 --- /dev/null +++ b/src/correct.rs @@ -0,0 +1,4 @@ +// http://norvig.com/spell-correct.html +pub fn correct(word: &str) -> Option<&str> { + todo!(); +} diff --git a/src/display.rs b/src/display.rs new file mode 100644 index 0000000..3c372d2 --- /dev/null +++ b/src/display.rs @@ -0,0 +1,67 @@ +#![allow(unused_variables)] + +use crate::state::*; +use parse_wiki_text::*; + +// i really miss static blocks +const skippable_headers: &[&str; 15] = + &["Synonyms", "Antonyms", "Hyponyms", "Anagrams", "Translations", + "Pronunciation", "Declension", "Inflection", "Descendants", + "Derived terms", "Related terms", "See also", "Further reading", + "References", "Alternative forms"]; + +// now we do somewhat inefficient string manipulation +// but it's fine because we're working with MUCH smaller strings lol +pub fn display(definition: String, state: &State) { + let definition = Configuration::default().parse(&definition); + + // impl display on that shit then + // definition.display(&state.lang); + + // this is really quite terrible code + if !display_language(&definition, &state.lang) { + display_language(&definition, ""); + } +} + +// no overloading?? O_O +// matching on an enum of structs SUCKS +// functions as parameters is too hard +pub fn display_language(definition: &Output, lang: &str) -> bool { + let mut inside_heading = false; + let mut correct_language = false; + let mut skipping_heading = false; + for (i, node) in definition.nodes.iter().enumerate() { + + if let Node::Heading { nodes, level, .. } = node + && let Some(Node::Text { value, .. }) = nodes.get(0) { + if inside_heading { + if *level == 2 { + inside_heading = false; + } else if skippable_headers.contains(value) { + skipping_heading = true; + } else { + if skipping_heading && !skippable_headers.contains(value) { + skipping_heading = false; + } + print!("\n{}\n", node); + } + } else if *level == 2 && *value == lang { + inside_heading = true; + correct_language = true; + print!("{}", node); + } + } else if inside_heading && !skipping_heading { + if let Node::OrderedList { .. } | Node::UnorderedList { .. } | Node::DefinitionList { .. } = node { + print!("{}", format!("{}", node).trim()); + } else { + print!("{}", node); + } + } + } + if correct_language { + println!(); + } + return correct_language; +} + diff --git a/src/lib.rs b/src/lib.rs index cf2249c..bf08223 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,24 +2,24 @@ #![allow(unused_variables)] #![feature(let_chains)] -use std::{io::*, fs::File}; -// note that bufread::MultiBzDecoder is _distinct_ from read::MultiBzDecoder -use bzip2::bufread::*; -use parse_wiki_text::*; +pub mod correct; +pub mod display; +pub mod lookup; +pub mod state; // https://github.com/rust-lang/rfcs/issues/1349 const version: &str = env!("CARGO_PKG_VERSION"); const index_path: &str = env!("index_path"); const dictionary_path: &str = env!("dictionary_path"); -pub fn handle_word(word: String, state: &State) { +pub fn handle_word(word: String, state: &state::State) { // if lets are kinda clunky - if let Some(definition) = lookup(&word).unwrap() { - display(definition, &state); - } else if let Some(corrected) = correct(&word) { + if let Some(definition) = lookup::lookup(&word).unwrap() { + display::display(definition, &state); + } else if let Some(corrected) = correct::correct(&word) { println!("Could not find word {}, continuing with {}...", word, corrected); - if let Some(definition) = lookup(&corrected).unwrap() { - display(definition, &state); + if let Some(definition) = lookup::lookup(&corrected).unwrap() { + display::display(definition, &state); } else { println!("Could not find corrected word {}.", corrected); } @@ -28,151 +28,8 @@ pub fn handle_word(word: String, state: &State) { } } -// i don't like that there are multiple result types -// that seems Bad -// also having to explicitly box dyn Error sucks, fine fuck you it's the rust way -type Lookup = std::result::Result, Box>; - -// WHY can you not implement traits on external types, like what?? -// fortunately we needed to copy-paste the parse_wiki_text library to fix some bugs anyhow -fn lookup(word: &str) -> Lookup { - if let Ok(file) = File::open(index_path) { - return lookup_local(word, file); - } else { - return lookup_online(word); - } -} - -fn lookup_local(word: &str, file: File) -> Lookup { - let reader = BufReader::new(MultiBzDecoder::new(BufReader::new(file))); - for line in reader.lines() { - let line = line.expect("Failed to read line"); - - // format: file-offset:page-id:page-title - let line = line.splitn(3, ":").collect::>(); - assert!(line.len() == 3, "Failed to parse line. Is your index file valid?"); - - let offset = line.get(0).unwrap().parse::()?; - let id = line.get(1).unwrap().parse::()?; - let title = *line.get(2).unwrap(); // this dereference now makes sense - - if title == word { - let file = File::open(dictionary_path)?; - let mut reader = BufReader::new(file); - - // note: our chunk contains multiple pages - let offset = reader.seek(SeekFrom::Start(offset)) - .expect("Bad offset. Is your index file valid?"); - let reader = BufReader::new(BzDecoder::new(reader)); - - let mut buffer = String::new(); - let mut page = false; - for line in reader.lines() { - let line = line.unwrap(); - if line == format!(" {}", title) { - buffer.push_str(" "); - buffer.push_str("\n"); - page = true; - } - if page { - buffer.push_str(&line); - buffer.push_str("\n"); - if line == " " { - break; - } - } - } - return Ok(Some(buffer)); - } - } - return Ok(None); -} - -fn lookup_online(word: &str) -> Lookup { - todo!(); -} - -// http://norvig.com/spell-correct.html -fn correct(word: &str) -> Option<&str> { - todo!(); -} - -// now we do somewhat inefficient string manipulation -// but it's fine because we're working with MUCH smaller strings lol -fn display(definition: String, state: &State) { - let definition = Configuration::default().parse(&definition); - - // this is really quite terrible code - if !display_language(&definition, &state.lang) { - display_language(&definition, ""); - } -} - -// i really miss static blocks -const skippable_headers: &[&str; 15] = - &["Synonyms", "Antonyms", "Hyponyms", "Anagrams", "Translations", - "Pronunciation", "Declension", "Inflection", "Descendants", - "Derived terms", "Related terms", "See also", "Further reading", - "References", "Alternative forms"]; - -// no overloading?? O_O -// matching on an enum of structs SUCKS -// functions as parameters is too hard -fn display_language(definition: &Output, lang: &str) -> bool { - let mut inside_heading = false; - let mut correct_language = false; - let mut skipping_heading = false; - for (i, node) in definition.nodes.iter().enumerate() { - - if let Node::Heading { nodes, level, .. } = node - && let Some(Node::Text { value, .. }) = nodes.get(0) { - if inside_heading { - if *level == 2 { - inside_heading = false; - } else if skippable_headers.contains(value) { - skipping_heading = true; - } else { - if skipping_heading && !skippable_headers.contains(value) { - skipping_heading = false; - } - print!("\n{}\n", node); - } - } else if *level == 2 && *value == lang { - inside_heading = true; - correct_language = true; - print!("{}", node); - } - } else if inside_heading && !skipping_heading { - if let Node::OrderedList { .. } | Node::UnorderedList { .. } | Node::DefinitionList { .. } = node { - print!("{}", format!("{}", node).trim()); - } else { - print!("{}", node); - } - } - } - if correct_language { - println!(); - } - return correct_language; -} - -// default values on structs please ;_; -pub struct State { - pub full: bool, - pub lang: String, -} - -impl State { - pub fn new() -> State { - return State { - full: false, - lang: String::from("English"), - } - } -} - // mut state: State, yet state: &mut State?? huh?? -pub fn handle_parameter(word: &str, state: &mut State) { +pub fn handle_parameter(word: &str, state: &mut state::State) { match word { // todo: extend "--help" => { println!("dictionarium {}\n", version); diff --git a/src/lookup.rs b/src/lookup.rs new file mode 100644 index 0000000..e9882a4 --- /dev/null +++ b/src/lookup.rs @@ -0,0 +1,68 @@ +use std::{io::*, fs::File}; +// note that bufread::MultiBzDecoder is _distinct_ from read::MultiBzDecoder +use bzip2::bufread::*; + +// i don't like that there are multiple result types +// that seems Bad +// also having to explicitly box dyn Error sucks, fine fuck you it's the rust way +type Lookup = std::result::Result, Box>; + +// WHY can you not implement traits on external types, like what?? +// fortunately we needed to copy-paste the parse_wiki_text library to fix some bugs anyhow +pub fn lookup(word: &str) -> Lookup { + if let Ok(file) = File::open(crate::index_path) { + return lookup_local(word, file); + } else { + return lookup_online(word); + } +} + +fn lookup_local(word: &str, file: File) -> Lookup { + let reader = BufReader::new(MultiBzDecoder::new(BufReader::new(file))); + for line in reader.lines() { + let line = line.expect("Failed to read line"); + + // format: file-offset:page-id:page-title + let line = line.splitn(3, ":").collect::>(); + assert!(line.len() == 3, "Failed to parse line. Is your index file valid?"); + + let offset = line.get(0).unwrap().parse::()?; + let id = line.get(1).unwrap().parse::()?; + let title = *line.get(2).unwrap(); // this dereference now makes sense + + if title == word { + let file = File::open(crate::dictionary_path)?; + let mut reader = BufReader::new(file); + + // note: our chunk contains multiple pages + let offset = reader.seek(SeekFrom::Start(offset)) + .expect("Bad offset. Is your index file valid?"); + let reader = BufReader::new(BzDecoder::new(reader)); + + let mut buffer = String::new(); + let mut page = false; + for line in reader.lines() { + let line = line.unwrap(); + if line == format!(" {}", title) { + buffer.push_str(" "); + buffer.push_str("\n"); + page = true; + } + if page { + buffer.push_str(&line); + buffer.push_str("\n"); + if line == " " { + break; + } + } + } + return Ok(Some(buffer)); + } + } + return Ok(None); +} + +fn lookup_online(word: &str) -> Lookup { + todo!(); +} + diff --git a/src/main.rs b/src/main.rs index e31b384..4c2174d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,9 +1,8 @@ -use std::env; -use dictionarium; +use dictionarium::*; fn main() { - let mut state = dictionarium::State::new(); - let args: Vec = env::args().skip(1).collect(); + let mut state = state::State::new(); + let args: Vec = std::env::args().skip(1).collect(); if args.len() == 0 { dictionarium::handle_parameter("--help", &mut state); diff --git a/src/state.rs b/src/state.rs new file mode 100644 index 0000000..8b8fa52 --- /dev/null +++ b/src/state.rs @@ -0,0 +1,14 @@ +// default values on structs please ;_; +pub struct State { + pub full: bool, + pub lang: String, +} + +impl State { + pub fn new() -> State { + return State { + full: false, + lang: String::from("English"), + } + } +} -- cgit v1.2.3-70-g09d2