use multipeek::multipeek;

pub type Result<T> = core::result::Result<T, Box<dyn std::error::Error>>;
pub type TokenStream = Vec<Token>;

/// **Basic** syntax tokens. Form an unambiguous TokenStream.
#[derive(Clone, PartialEq)]
pub enum Token {
    Word(String),   // identifiers.
    Lit(String),    // literal value, ex. for strings/comments.
    Sep(char),      // punctuation. non-word tokens.
    Begin, End      // scope indicators.
}

/// All keywords that may continue a line. For knowing valid line splits.
const valid_continuations: [&str; 7] = ["and", "or", "xor", "in", "notin", "is", "isnot"];

/// Parses whitespace-sensitive code into an unambiguous TokenStream.
/// Also useful for formatting.
// todo: support indentation within expressions
// nim: "As a rule of thumb, indentation within expressions is
// allowed after operators, an open parenthesis and after commas."
pub fn tokenize(input: &str) -> Result<TokenStream> {
    // The design of this lexer utilizes to great extent multipeek's arbitrary peeking.
    // Tokens are matched by looping within their case until complete.
    // This then eliminates the need for almost all global parser state.

    use Token::*;
    let mut start_of_line = true;   // state
    let mut indent_level = 0;       // state
    let mut indent_width = None;    // state
    let mut buf = String::new();    // buffer
    let mut res = Vec::new();       // result

    // `char` in rust is four bytes it's fine
    let mut input = multipeek(input.chars());
    while let Some(c) = input.next() {
        match c {
            ' ' => {
                if start_of_line { // indentation
                    let mut current_indent_level = 1;
                    while let Some(x) = input.peek() {
                        match x {
                            ' ' => current_indent_level += 1,
                            '\n' => break, // empty line
                            _ => { // indentation ends
                                // really gross. this just checks if the previous token was a newline,
                                // and that the token before it was punctuation or a known "operator",
                                // and if so disregards indentation and treats it as a line continuation.
                                if let Some(&Sep('\n')) = res.get(res.len() - 1) {
                                    if let Some(y) = res.get(res.len() - 2) {
                                        if let Word(z) = y {
                                            if valid_continuations.contains(&&z[..]) {
                                                res.pop();
                                                break;
                                            }
                                        } else if let Sep(_) = y {
                                            res.pop();
                                            break;
                                        }
                                    }
                                }

                                // will only fire once. allows us to support X number of spaces so long as it's consistent
                                if indent_width.is_none() {
                                    indent_width = Some(current_indent_level);
                                }

                                let indent_width = indent_width.unwrap(); // safe. see above
                                if current_indent_level % indent_width != 0 {
                                    return Err("indentation is offset".into());
                                }

                                let diff = (current_indent_level as isize - indent_level as isize) / indent_width as isize;
                                match diff {
                                    0 => (),                // same level of indentation
                                    1 => res.push(Begin),   // new level of indentation
                                    -1 => res.push(End),    // old level of indentation
                                    _ => return Err("indentation stepped by too much in one go".into())
                                }
                                indent_level = current_indent_level;
                                break;
                            }
                        }
                    }
                } else { // get rid of excess (all) whitespace
                    while input.peek() == Some(&' ') { input.next(); }
                }
            },
            '\n' => { // newlines are separators
                start_of_line = true;
                res.push(Sep('\n'))
            },
            c if c.is_whitespace() => return Err("tabs etc are not supported".into()),
            '\'' => { // single quoted strings, i.e. chars
                res.push(Sep('\''));
                while let Some(x) = input.next() {
                    match x {
                        '\'' => break,
                        '\\' => if let Some(y) = input.next() { buf.push(y) },
                        _ => buf.push(x)
                    }
                }
                res.push(Lit(String::from(&buf)));
                res.push(Sep('\''));
            },
            '"' => { // triple quoted strings
                if input.peek_nth(0) == Some(&'"') &&
                   input.peek_nth(1) == Some(&'"') {
                    input.next(); input.next();
                    res.push(Sep('"')); res.push(Sep('"')); res.push(Sep('"'));
                    while let Some(x) = input.next() {
                        match x {
                            '"' if input.peek_nth(1) == Some(&'"') &&
                                   input.peek_nth(2) == Some(&'"') => {
                                break;
                           },
                           _ => buf.push(x)
                        }
                    }
                    res.push(Lit(String::from(&buf)));
                    input.next(); input.next();
                    res.push(Sep('"')); res.push(Sep('"')); res.push(Sep('"'));
                } else { // regular strings
                    res.push(Sep('"'));
                    while let Some(x) = input.next() {
                        match x {
                            '"' => break,
                            '\\' => if let Some(y) = input.next() { buf.push(y) },
                            _ => buf.push(x)
                        }
                    }
                    res.push(Lit(String::from(&buf)));
                    res.push(Sep('"'));
                }
            },
            '#' => { // block comment, can be nested
                if input.peek() == Some(&'[') {
                    input.next();
                    res.push(Sep('#')); res.push(Sep('['));
                    let mut comment_level = 1;
                    while let Some(x) = input.next() && comment_level > 0 {
                        match x {
                            '#' if input.peek() == Some(&'[') => {
                                comment_level += 1;
                                input.next();
                            },
                            ']' if input.peek() == Some(&'#') => {
                                comment_level -= 1;
                                input.next();
                            },
                            _ => buf.push(x)
                        }
                    }
                    res.push(Lit(String::from(&buf)));
                    res.push(Sep(']')); res.push(Sep('#'));
                } else { // standard comment, runs until eol
                    res.push(Sep('#'));
                    while let Some(x) = input.peek() {
                        match x {
                            '\n' => break,
                            _ => {
                                buf.push(*x);
                                input.next();
                            }
                        }
                    }
                    res.push(Lit(String::from(&buf)));
                }
            },
            'a'..'z' | 'A'..'Z' | '0'..'9' | '_' => { // valid identifier
                while let Some(x) = input.peek() {
                    match x {
                        'a'..'z' | 'A'..'Z' | '0'..'9' | '_' => {
                            buf.push(*x);
                            input.next();
                        },
                        _ => break
                    }
                }
                res.push(Word(String::from(&buf)));
            },
            '.' | ',' | ':' | ';' | // punctuation
            '(' | ')' | '[' | ']' | '{' | '}' => res.push(Sep(c)),
            _ => res.push(Sep(c))   // for now: treat unknown chars as Sep
        }
        buf.clear();
    }
    return Ok(res);
}