diff options
author | JJ | 2022-12-24 04:09:40 +0000 |
---|---|---|
committer | JJ | 2023-01-01 00:18:45 +0000 |
commit | e2c4f8c46e27ba182471b945911b21ac6546bd4e (patch) | |
tree | 611ed768f8200b7d1e4a93e50d14856dbe0d323f /src |
Initial implementation
Diffstat (limited to 'src')
-rw-r--r-- | src/lib.rs | 113 | ||||
-rw-r--r-- | src/main.rs | 24 |
2 files changed, 137 insertions, 0 deletions
diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..c3f15e6 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,113 @@ +#![allow(non_upper_case_globals)] +#![allow(unused_variables)] + +use std::io::*; +use std::fs::File; +// note that bufread::MultiBzDecoder is _distinct_ from read::MultiBzDecoder +use bzip2::bufread::*; + +// https://github.com/rust-lang/rfcs/issues/1349 +const version: &str = env!("CARGO_PKG_VERSION"); +const index_path: &str = env!("index_path"); +const dictionary_path: &str = env!("dictionary_path"); + +pub fn handle(word: String) { + // if lets are kinda clunky + if let Some(definition) = lookup(&word) { + display(definition); + } else if let Some(corrected) = correct(&word) { + println!("Could not find word {}, continuing with {}...", word, corrected); + if let Some(definition) = lookup(&corrected) { + display(definition); + } else { + println!("Could not find corrected word {}.", corrected); + } + } else { + println!("Could not find word {}. Check your spelling?", word); + } +} + +fn lookup(word: &str) -> Option<String> { + let file = File::open(index_path).expect("Failed to open index file"); + let reader = BufReader::new(MultiBzDecoder::new(BufReader::new(file))); + for line in reader.lines() { + let line = line.expect("Failed to read line"); + + if line.len() > word.len() { + let diff = line.len() - word.len(); + + if line.get(diff..).unwrap_or_default() == word { + let line = line.splitn(3, ":").collect::<Vec<&str>>(); + + // format: file-offset:page-id:page-title + assert!(line.len() == 3); + let offset = line.get(0).unwrap().parse::<u64>() + .expect("Failed to parse offset. Is your index file valid?"); + let id = line.get(1).unwrap().parse::<u64>() + .expect("Failed to parse id. Is your index file valid?"); + let title = *line.get(2).unwrap(); // this dereference now makes sense + assert!(word == title); + + let file = File::open(dictionary_path) + .expect("Failed to open dictionary file"); + let mut reader = BufReader::new(file); + + // note: our chunk contains multiple pages + let offset = reader.seek(SeekFrom::Start(offset)) + .expect("Bad offset. Is your index file valid?"); + let reader = BufReader::new(BzDecoder::new(reader)); + + let mut buffer = String::new(); + let mut page = false; + for line in reader.lines() { + let line = line.unwrap(); + if line == format!(" <title>{}</title>", title) { + buffer.push_str(" <page>"); + buffer.push_str("\n"); + page = true; + } + if page { + buffer.push_str(&line); + buffer.push_str("\n"); + if line == " </page>" { + break; + } + } + } + return Some(buffer); + } + } + } + return None; +} + +// http://norvig.com/spell-correct.html +fn correct(word: &str) -> Option<&str> { + // todo: implement + return None; +} + +// now we do inefficient string manipulation +// but it's fine because we're working with MUCH smaller strings lol +fn display(definition: String) { + // todo: implement + for line in definition.lines() { + println!("{}", line); + } +} + +pub fn param(word: String) { + match word.as_str() { // curious about this + "--help" => { + println!("dictionarium {}", version); + println!(""); + println!("Usage: dictionarium <word>"); + }, + "--full" => { // set some global variable + + }, + _ => { + println!("Unknown flag \"{}\".", word); + } + } +} diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..62da4a5 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,24 @@ +use std::env; +use dictionarium; + +fn main() { + let args: Vec<String> = env::args().skip(1).collect(); + + if args.len() == 0 { + dictionarium::param(String::from("--help")); + } else { + let mut words = Vec::<String>::new(); + for word in args { + if word.len() > 2 && word.get(0..2).unwrap_or_default() == "--" { + dictionarium::param(word); + } else { + words.push(word); + } + } + + // we accept multiple words gladly + for word in words { + dictionarium::handle(word); + } + } +} |