aboutsummaryrefslogtreecommitdiff
path: root/src/lookup.rs
blob: e9882a4d4c4c608e9bbafb656736d57fc80c71d4 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
use std::{io::*, fs::File};
// note that bufread::MultiBzDecoder is _distinct_ from read::MultiBzDecoder
use bzip2::bufread::*;

// i don't like that there are multiple result types
// that seems Bad
// also having to explicitly box dyn Error sucks, fine fuck you it's the rust way
type Lookup = std::result::Result<Option<String>, Box<dyn std::error::Error>>;

// WHY can you not implement traits on external types, like what??
// fortunately we needed to copy-paste the parse_wiki_text library to fix some bugs anyhow
pub fn lookup(word: &str) -> Lookup {
    if let Ok(file) = File::open(crate::index_path) {
        return lookup_local(word, file);
    } else {
        return lookup_online(word);
    }
}

fn lookup_local(word: &str, file: File) -> Lookup {
    let reader = BufReader::new(MultiBzDecoder::new(BufReader::new(file)));
    for line in reader.lines() {
        let line = line.expect("Failed to read line");

        // format: file-offset:page-id:page-title
        let line = line.splitn(3, ":").collect::<Vec<&str>>();
        assert!(line.len() == 3, "Failed to parse line. Is your index file valid?");

        let offset = line.get(0).unwrap().parse::<u64>()?;
        let id = line.get(1).unwrap().parse::<u64>()?;
        let title = *line.get(2).unwrap(); // this dereference now makes sense

        if title == word {
            let file = File::open(crate::dictionary_path)?;
            let mut reader = BufReader::new(file);

            // note: our chunk contains multiple pages
            let offset = reader.seek(SeekFrom::Start(offset))
                .expect("Bad offset. Is your index file valid?");
            let reader = BufReader::new(BzDecoder::new(reader));

            let mut buffer = String::new();
            let mut page = false;
            for line in reader.lines() {
                let line = line.unwrap();
                if line == format!("    <title>{}</title>", title) {
                    buffer.push_str("  <page>");
                    buffer.push_str("\n");
                    page = true;
                }
                if page {
                    buffer.push_str(&line);
                    buffer.push_str("\n");
                    if line == "  </page>" {
                        break;
                    }
                }
            }
            return Ok(Some(buffer));
        }
    }
    return Ok(None);
}

fn lookup_online(word: &str) -> Lookup {
    todo!();
}