aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJJ2023-06-17 21:56:58 +0000
committerJJ2023-06-17 21:56:58 +0000
commit6aeacb7abca05bac190d093cbefc3a88c46052b6 (patch)
tree10aacf4927f9456b9f18b290d81b17203ce7c00b
parent4d7f63203724d246d5b58a50a57b6e7d42ab7951 (diff)
split lib.rs functionality into separate modules
-rw-r--r--src/correct.rs4
-rw-r--r--src/display.rs67
-rw-r--r--src/lib.rs165
-rw-r--r--src/lookup.rs68
-rw-r--r--src/main.rs7
-rw-r--r--src/state.rs14
6 files changed, 167 insertions, 158 deletions
diff --git a/src/correct.rs b/src/correct.rs
new file mode 100644
index 0000000..333aba5
--- /dev/null
+++ b/src/correct.rs
@@ -0,0 +1,4 @@
+// http://norvig.com/spell-correct.html
+pub fn correct(word: &str) -> Option<&str> {
+ todo!();
+}
diff --git a/src/display.rs b/src/display.rs
new file mode 100644
index 0000000..3c372d2
--- /dev/null
+++ b/src/display.rs
@@ -0,0 +1,67 @@
+#![allow(unused_variables)]
+
+use crate::state::*;
+use parse_wiki_text::*;
+
+// i really miss static blocks
+const skippable_headers: &[&str; 15] =
+ &["Synonyms", "Antonyms", "Hyponyms", "Anagrams", "Translations",
+ "Pronunciation", "Declension", "Inflection", "Descendants",
+ "Derived terms", "Related terms", "See also", "Further reading",
+ "References", "Alternative forms"];
+
+// now we do somewhat inefficient string manipulation
+// but it's fine because we're working with MUCH smaller strings lol
+pub fn display(definition: String, state: &State) {
+ let definition = Configuration::default().parse(&definition);
+
+ // impl display on that shit then
+ // definition.display(&state.lang);
+
+ // this is really quite terrible code
+ if !display_language(&definition, &state.lang) {
+ display_language(&definition, "");
+ }
+}
+
+// no overloading?? O_O
+// matching on an enum of structs SUCKS
+// functions as parameters is too hard
+pub fn display_language(definition: &Output, lang: &str) -> bool {
+ let mut inside_heading = false;
+ let mut correct_language = false;
+ let mut skipping_heading = false;
+ for (i, node) in definition.nodes.iter().enumerate() {
+
+ if let Node::Heading { nodes, level, .. } = node
+ && let Some(Node::Text { value, .. }) = nodes.get(0) {
+ if inside_heading {
+ if *level == 2 {
+ inside_heading = false;
+ } else if skippable_headers.contains(value) {
+ skipping_heading = true;
+ } else {
+ if skipping_heading && !skippable_headers.contains(value) {
+ skipping_heading = false;
+ }
+ print!("\n{}\n", node);
+ }
+ } else if *level == 2 && *value == lang {
+ inside_heading = true;
+ correct_language = true;
+ print!("{}", node);
+ }
+ } else if inside_heading && !skipping_heading {
+ if let Node::OrderedList { .. } | Node::UnorderedList { .. } | Node::DefinitionList { .. } = node {
+ print!("{}", format!("{}", node).trim());
+ } else {
+ print!("{}", node);
+ }
+ }
+ }
+ if correct_language {
+ println!();
+ }
+ return correct_language;
+}
+
diff --git a/src/lib.rs b/src/lib.rs
index cf2249c..bf08223 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -2,24 +2,24 @@
#![allow(unused_variables)]
#![feature(let_chains)]
-use std::{io::*, fs::File};
-// note that bufread::MultiBzDecoder is _distinct_ from read::MultiBzDecoder
-use bzip2::bufread::*;
-use parse_wiki_text::*;
+pub mod correct;
+pub mod display;
+pub mod lookup;
+pub mod state;
// https://github.com/rust-lang/rfcs/issues/1349
const version: &str = env!("CARGO_PKG_VERSION");
const index_path: &str = env!("index_path");
const dictionary_path: &str = env!("dictionary_path");
-pub fn handle_word(word: String, state: &State) {
+pub fn handle_word(word: String, state: &state::State) {
// if lets are kinda clunky
- if let Some(definition) = lookup(&word).unwrap() {
- display(definition, &state);
- } else if let Some(corrected) = correct(&word) {
+ if let Some(definition) = lookup::lookup(&word).unwrap() {
+ display::display(definition, &state);
+ } else if let Some(corrected) = correct::correct(&word) {
println!("Could not find word {}, continuing with {}...", word, corrected);
- if let Some(definition) = lookup(&corrected).unwrap() {
- display(definition, &state);
+ if let Some(definition) = lookup::lookup(&corrected).unwrap() {
+ display::display(definition, &state);
} else {
println!("Could not find corrected word {}.", corrected);
}
@@ -28,151 +28,8 @@ pub fn handle_word(word: String, state: &State) {
}
}
-// i don't like that there are multiple result types
-// that seems Bad
-// also having to explicitly box dyn Error sucks, fine fuck you it's the rust way
-type Lookup = std::result::Result<Option<String>, Box<dyn std::error::Error>>;
-
-// WHY can you not implement traits on external types, like what??
-// fortunately we needed to copy-paste the parse_wiki_text library to fix some bugs anyhow
-fn lookup(word: &str) -> Lookup {
- if let Ok(file) = File::open(index_path) {
- return lookup_local(word, file);
- } else {
- return lookup_online(word);
- }
-}
-
-fn lookup_local(word: &str, file: File) -> Lookup {
- let reader = BufReader::new(MultiBzDecoder::new(BufReader::new(file)));
- for line in reader.lines() {
- let line = line.expect("Failed to read line");
-
- // format: file-offset:page-id:page-title
- let line = line.splitn(3, ":").collect::<Vec<&str>>();
- assert!(line.len() == 3, "Failed to parse line. Is your index file valid?");
-
- let offset = line.get(0).unwrap().parse::<u64>()?;
- let id = line.get(1).unwrap().parse::<u64>()?;
- let title = *line.get(2).unwrap(); // this dereference now makes sense
-
- if title == word {
- let file = File::open(dictionary_path)?;
- let mut reader = BufReader::new(file);
-
- // note: our chunk contains multiple pages
- let offset = reader.seek(SeekFrom::Start(offset))
- .expect("Bad offset. Is your index file valid?");
- let reader = BufReader::new(BzDecoder::new(reader));
-
- let mut buffer = String::new();
- let mut page = false;
- for line in reader.lines() {
- let line = line.unwrap();
- if line == format!(" <title>{}</title>", title) {
- buffer.push_str(" <page>");
- buffer.push_str("\n");
- page = true;
- }
- if page {
- buffer.push_str(&line);
- buffer.push_str("\n");
- if line == " </page>" {
- break;
- }
- }
- }
- return Ok(Some(buffer));
- }
- }
- return Ok(None);
-}
-
-fn lookup_online(word: &str) -> Lookup {
- todo!();
-}
-
-// http://norvig.com/spell-correct.html
-fn correct(word: &str) -> Option<&str> {
- todo!();
-}
-
-// now we do somewhat inefficient string manipulation
-// but it's fine because we're working with MUCH smaller strings lol
-fn display(definition: String, state: &State) {
- let definition = Configuration::default().parse(&definition);
-
- // this is really quite terrible code
- if !display_language(&definition, &state.lang) {
- display_language(&definition, "");
- }
-}
-
-// i really miss static blocks
-const skippable_headers: &[&str; 15] =
- &["Synonyms", "Antonyms", "Hyponyms", "Anagrams", "Translations",
- "Pronunciation", "Declension", "Inflection", "Descendants",
- "Derived terms", "Related terms", "See also", "Further reading",
- "References", "Alternative forms"];
-
-// no overloading?? O_O
-// matching on an enum of structs SUCKS
-// functions as parameters is too hard
-fn display_language(definition: &Output, lang: &str) -> bool {
- let mut inside_heading = false;
- let mut correct_language = false;
- let mut skipping_heading = false;
- for (i, node) in definition.nodes.iter().enumerate() {
-
- if let Node::Heading { nodes, level, .. } = node
- && let Some(Node::Text { value, .. }) = nodes.get(0) {
- if inside_heading {
- if *level == 2 {
- inside_heading = false;
- } else if skippable_headers.contains(value) {
- skipping_heading = true;
- } else {
- if skipping_heading && !skippable_headers.contains(value) {
- skipping_heading = false;
- }
- print!("\n{}\n", node);
- }
- } else if *level == 2 && *value == lang {
- inside_heading = true;
- correct_language = true;
- print!("{}", node);
- }
- } else if inside_heading && !skipping_heading {
- if let Node::OrderedList { .. } | Node::UnorderedList { .. } | Node::DefinitionList { .. } = node {
- print!("{}", format!("{}", node).trim());
- } else {
- print!("{}", node);
- }
- }
- }
- if correct_language {
- println!();
- }
- return correct_language;
-}
-
-// default values on structs please ;_;
-pub struct State {
- pub full: bool,
- pub lang: String,
-}
-
-impl State {
- pub fn new() -> State {
- return State {
- full: false,
- lang: String::from("English"),
- }
- }
-}
-
// mut state: State, yet state: &mut State?? huh??
-pub fn handle_parameter(word: &str, state: &mut State) {
+pub fn handle_parameter(word: &str, state: &mut state::State) {
match word { // todo: extend
"--help" => {
println!("dictionarium {}\n", version);
diff --git a/src/lookup.rs b/src/lookup.rs
new file mode 100644
index 0000000..e9882a4
--- /dev/null
+++ b/src/lookup.rs
@@ -0,0 +1,68 @@
+use std::{io::*, fs::File};
+// note that bufread::MultiBzDecoder is _distinct_ from read::MultiBzDecoder
+use bzip2::bufread::*;
+
+// i don't like that there are multiple result types
+// that seems Bad
+// also having to explicitly box dyn Error sucks, fine fuck you it's the rust way
+type Lookup = std::result::Result<Option<String>, Box<dyn std::error::Error>>;
+
+// WHY can you not implement traits on external types, like what??
+// fortunately we needed to copy-paste the parse_wiki_text library to fix some bugs anyhow
+pub fn lookup(word: &str) -> Lookup {
+ if let Ok(file) = File::open(crate::index_path) {
+ return lookup_local(word, file);
+ } else {
+ return lookup_online(word);
+ }
+}
+
+fn lookup_local(word: &str, file: File) -> Lookup {
+ let reader = BufReader::new(MultiBzDecoder::new(BufReader::new(file)));
+ for line in reader.lines() {
+ let line = line.expect("Failed to read line");
+
+ // format: file-offset:page-id:page-title
+ let line = line.splitn(3, ":").collect::<Vec<&str>>();
+ assert!(line.len() == 3, "Failed to parse line. Is your index file valid?");
+
+ let offset = line.get(0).unwrap().parse::<u64>()?;
+ let id = line.get(1).unwrap().parse::<u64>()?;
+ let title = *line.get(2).unwrap(); // this dereference now makes sense
+
+ if title == word {
+ let file = File::open(crate::dictionary_path)?;
+ let mut reader = BufReader::new(file);
+
+ // note: our chunk contains multiple pages
+ let offset = reader.seek(SeekFrom::Start(offset))
+ .expect("Bad offset. Is your index file valid?");
+ let reader = BufReader::new(BzDecoder::new(reader));
+
+ let mut buffer = String::new();
+ let mut page = false;
+ for line in reader.lines() {
+ let line = line.unwrap();
+ if line == format!(" <title>{}</title>", title) {
+ buffer.push_str(" <page>");
+ buffer.push_str("\n");
+ page = true;
+ }
+ if page {
+ buffer.push_str(&line);
+ buffer.push_str("\n");
+ if line == " </page>" {
+ break;
+ }
+ }
+ }
+ return Ok(Some(buffer));
+ }
+ }
+ return Ok(None);
+}
+
+fn lookup_online(word: &str) -> Lookup {
+ todo!();
+}
+
diff --git a/src/main.rs b/src/main.rs
index e31b384..4c2174d 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,9 +1,8 @@
-use std::env;
-use dictionarium;
+use dictionarium::*;
fn main() {
- let mut state = dictionarium::State::new();
- let args: Vec<String> = env::args().skip(1).collect();
+ let mut state = state::State::new();
+ let args: Vec<String> = std::env::args().skip(1).collect();
if args.len() == 0 {
dictionarium::handle_parameter("--help", &mut state);
diff --git a/src/state.rs b/src/state.rs
new file mode 100644
index 0000000..8b8fa52
--- /dev/null
+++ b/src/state.rs
@@ -0,0 +1,14 @@
+// default values on structs please ;_;
+pub struct State {
+ pub full: bool,
+ pub lang: String,
+}
+
+impl State {
+ pub fn new() -> State {
+ return State {
+ full: false,
+ lang: String::from("English"),
+ }
+ }
+}