1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
|
#![allow(non_upper_case_globals)]
#![allow(unused_variables)]
use std::io::*;
use std::fs::File;
// note that bufread::MultiBzDecoder is _distinct_ from read::MultiBzDecoder
use bzip2::bufread::*;
// https://github.com/rust-lang/rfcs/issues/1349
const version: &str = env!("CARGO_PKG_VERSION");
const index_path: &str = env!("index_path");
const dictionary_path: &str = env!("dictionary_path");
pub fn handle(word: String) {
// if lets are kinda clunky
if let Some(definition) = lookup(&word) {
display(definition);
} else if let Some(corrected) = correct(&word) {
println!("Could not find word {}, continuing with {}...", word, corrected);
if let Some(definition) = lookup(&corrected) {
display(definition);
} else {
println!("Could not find corrected word {}.", corrected);
}
} else {
println!("Could not find word {}. Check your spelling?", word);
}
}
fn lookup(word: &str) -> Option<String> {
let file = File::open(index_path).expect("Failed to open index file");
let reader = BufReader::new(MultiBzDecoder::new(BufReader::new(file)));
for line in reader.lines() {
let line = line.expect("Failed to read line");
// format: file-offset:page-id:page-title
let line = line.splitn(3, ":").collect::<Vec<&str>>();
assert!(line.len() == 3, "Failed to parse line. Is your index file valid?");
let offset = line.get(0).unwrap().parse::<u64>()
.expect("Failed to parse offset. Is your index file valid?");
let id = line.get(1).unwrap().parse::<u64>()
.expect("Failed to parse id. Is your index file valid?");
let title = *line.get(2).unwrap(); // this dereference now makes sense
if title == word {
let file = File::open(dictionary_path)
.expect("Failed to open dictionary file");
let mut reader = BufReader::new(file);
// note: our chunk contains multiple pages
let offset = reader.seek(SeekFrom::Start(offset))
.expect("Bad offset. Is your index file valid?");
let reader = BufReader::new(BzDecoder::new(reader));
let mut buffer = String::new();
let mut page = false;
for line in reader.lines() {
let line = line.unwrap();
if line == format!(" <title>{}</title>", title) {
buffer.push_str(" <page>");
buffer.push_str("\n");
page = true;
}
if page {
buffer.push_str(&line);
buffer.push_str("\n");
if line == " </page>" {
break;
}
}
}
return Some(buffer);
}
}
return None;
}
// http://norvig.com/spell-correct.html
fn correct(word: &str) -> Option<&str> {
// todo: implement
return None;
}
// now we do inefficient string manipulation
// but it's fine because we're working with MUCH smaller strings lol
fn display(definition: String) {
// todo: implement
for line in definition.lines() {
println!("{}", line);
}
}
pub fn param(word: String) {
match word.as_str() { // curious about this
"--help" => {
println!("dictionarium {}", version);
println!("");
println!("Usage: dictionarium <word>");
},
"--full" => { // set some global variable
},
_ => {
println!("Unknown flag \"{}\".", word);
}
}
}
|