use multipeek::multipeek;

/// **Basic** syntax tokens.
pub enum Token {
    Word(String),   // identifiers.
    Lit(String),    // literal value, ex. for strings/comments.
    Sep(char),      // punctuation. non-word tokens.
    Ind(usize)      // indentation.
}

/// Lexes a file into a Vec of fundamental Tokens.
pub fn tokenize(input: &str) -> Vec<Token> {
    // The design of this lexer utilizes to great extent multipeek's arbitrary peeking.
    // Tokens are matched by looping within their case until complete.
    // This then eliminates the need for almost all global parser state.

    use Token::*;
    let mut start_of_line = true;   // state
    let mut buf = String::new();    // buffer
    let mut res = Vec::new();       // result

    // `char` in rust is four bytes it's fine
    let mut input = multipeek(input.chars());
    while let Some(c) = input.next() {
        match c {
            ' ' => {
                if start_of_line { // indentation, to be dealt with later
                    let mut indendation_level = 1;
                    while let Some(x) = input.peek() {
                        match x {
                            ' ' => indendation_level += 1,
                            '\n' => break, // empty line
                            _ => {
                                res.push(Ind(indendation_level));
                                break;
                            }
                        }
                    }
                } else { // get rid of whitespace
                    while input.peek() == Some(&' ') { input.next(); }
                }
            },
            '\n' => { // newlines are separators
                start_of_line = true;
                res.push(Sep('\n'))
            },
            c if c.is_whitespace() => (), // tabs etc are not supported
            '\'' => { // single quoted strings, i.e. chars
                res.push(Sep('\''));
                while let Some(x) = input.next() {
                    match x {
                        '\'' => break,
                        '\\' => if let Some(y) = input.next() { buf.push(y) },
                        _ => buf.push(x)
                    }
                }
                res.push(Lit(String::from(&buf)));
                res.push(Sep('\''));
            },
            '"' => { // triple quoted strings
                if input.peek_nth(0) == Some(&'"') &&
                   input.peek_nth(1) == Some(&'"') {
                    input.next(); input.next();
                    res.push(Sep('"')); res.push(Sep('"')); res.push(Sep('"'));
                    while let Some(x) = input.next() {
                        match x {
                            '"' if input.peek_nth(1) == Some(&'"') &&
                                   input.peek_nth(2) == Some(&'"') => {
                                break;
                           },
                           _ => buf.push(x)
                        }
                    }
                    res.push(Lit(String::from(&buf)));
                    input.next(); input.next();
                    res.push(Sep('"')); res.push(Sep('"')); res.push(Sep('"'));
                } else { // regular strings
                    res.push(Sep('"'));
                    while let Some(x) = input.next() {
                        match x {
                            '"' => break,
                            '\\' => if let Some(y) = input.next() { buf.push(y) },
                            _ => buf.push(x)
                        }
                    }
                    res.push(Lit(String::from(&buf)));
                    res.push(Sep('"'));
                }
            },
            '#' => { // block comment, can be nested
                if input.peek() == Some(&'[') {
                    input.next();
                    res.push(Sep('#')); res.push(Sep('['));
                    let mut comment_level = 1;
                    while let Some(x) = input.next() && comment_level > 0 {
                        match x {
                            '#' if input.peek() == Some(&'[') => {
                                comment_level += 1;
                                input.next();
                            },
                            ']' if input.peek() == Some(&'#') => {
                                comment_level -= 1;
                                input.next();
                            },
                            _ => buf.push(x)
                        }
                    }
                    res.push(Lit(String::from(&buf)));
                    res.push(Sep(']')); res.push(Sep('#'));
                } else { // standard comment, runs until eol
                    res.push(Sep('#'));
                    while let Some(x) = input.peek() {
                        match x {
                            '\n' => break,
                            _ => {
                                buf.push(*x);
                                input.next();
                            }
                        }
                    }
                    res.push(Lit(String::from(&buf)));
                }
            },
            'a'..'z' | 'A'..'Z' | '0'..'9' | '_' => { // valid identifier
                while let Some(x) = input.peek() {
                    match x {
                        'a'..'z' | 'A'..'Z' | '0'..'9' | '_' => {
                            buf.push(*x);
                            input.next();
                        },
                        _ => break
                    }
                }
                res.push(Word(String::from(&buf)));
            },
            '.' | ',' | ':' | ';' |
            '(' | ')' | '[' | ']' | '{' | '}' => res.push(Sep(c)),  // Sep
            _ => res.push(Sep(c))   // for now: treat unknown chars as Sep
        }
        buf.clear();
    }
    return res;
}