use multipeek::multipeek;

pub type Result<T> = core::result::Result<T, Box<dyn std::error::Error>>;
pub struct TokenStream(Vec<Token>);

#[derive(Clone, PartialEq, Debug)]
pub enum LexicalError {
    InvalidIndentation,
    MismatchedParens,
    MismatchedBrackets,
    UnknownPunctuation,
}

impl std::fmt::Display for LexicalError {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "{:?}", self)
    }
}
impl std::error::Error for LexicalError {}

/// **Basic** syntax tokens. Form an unambiguous TokenStream.
#[derive(Clone, PartialEq)]
pub enum Token {
    Word(String),   // identifiers.
    Num(String),    // numeric value, ex. 413, 0b101011, 0xabcd
    Lit(Literal),   // literal value, ex. for strings/comments.
    Sep(Punctuation),   // punctuation. non-word tokens. operators are lexed as this and later transformed to words.
    Indent(usize),      // indentation. denotes line breaks and scope at which a line starts.
}

#[derive(Clone, PartialEq)]
pub enum Literal {
    Char(String),
    SingleLineString(String),
    MultiLineString(String),
    Comment(String),
    DocComment(String),
    MultiLineComment(String),
}

/// All punctuation recognized by the lexer.
/// Note the distinction between FuncLeftParen and TupleLeftParen.
#[derive(Clone, PartialEq)]
pub enum Punctuation {
    Comma,               // ,
    Period,              // .
    Semicolon,           // ;
    Colon,               // :
    BackTick,            // `
    SingleQuote,         // '
    DoubleQuote,         // "
    FuncLeftParen,       // (
    FuncRightParen,      // )
    TupleLeftParen,      // (
    TupleRightParen,     // )
    GenericLeftBracket,  // [
    GenericRightBracket, // ]
    ArrayLeftBracket,    // [
    ArrayRightBracket,   // ]
    StructLeftBrace,     // }
    StructRightBrace,    // }
    Equals,      // =
    Plus,        // +
    Minus,       // distinction between minus and negative.
    Negative,    // negative binds tightly: there is no whitespace following.
    Times,       // *
    Slash,       // /
    LessThan,    // <
    GreaterThan, // >
    At,          // @
    Sha,         // $
    Tilde,       // ~
    And,         // &
    Percent,     // %
    Or,          // |
    Exclamation, // !
    Question,    // ?
    Caret,       // ^
    Backslash,   // \
}

/// Parses whitespace-sensitive code into an unambiguous TokenStream.
/// Also useful for formatting.
pub fn tokenize(input: &str) -> Result<TokenStream> {
    // The design of this lexer utilizes to great extent multipeek's arbitrary peeking.
    // Tokens are matched by looping within their case until complete.
    // This then eliminates the need for most global parser state. (i hate state)

    use Token::*;
    use Literal::*;
    use Punctuation::*;
    use LexicalError::*;
    enum Paren { Func, Tuple }
    enum Bracket { Generic, Array }
    struct State {
        start_of_line: bool,
        paren_stack: Vec<Paren>,
        bracket_stack: Vec<Bracket>,
    }

    let mut state = State {
        start_of_line: true,
        paren_stack: vec!(),
        bracket_stack: vec!(),
    };

    let mut buf = String::new();
    let mut res = Vec::new();

    // `char` in rust is four bytes it's fine
    let mut input = multipeek(input.chars());
    while let Some(c) = input.next() {
        match c {
            ' ' => { // indentation! and whitespace
                match res.last() {
                    Some(Indent(_)) => { // indentation!
                        res.pop(); // discard previous empty or useless Indent token
                        let mut current_indent_level = 1;
                        while let Some(x) = input.peek() {
                            match x {
                                ' ' => { current_indent_level += 1; input.next(); },
                                _ => match res.last() { // indentation ends
                                    Some(Word(a)) if a == "==" || a == "and" || a == "or" ||
                                                     a == "xor" || a == "in" || a == "is" => break,
                                    Some(Sep(FuncLeftParen)) | Some(Sep(TupleLeftParen)) |
                                    Some(Sep(GenericLeftBracket)) | Some(Sep(ArrayLeftBracket)) |
                                    Some(Sep(StructLeftBrace)) | Some(Sep(Comma)) => break,
                                    _ => {
                                        res.push(Indent(current_indent_level));
                                        break;
                                    }
                                }
                            }
                        }
                    },
                    _ => { // get rid of excess (all) whitespace between words/operators
                        while input.peek().is_some_and(|x| x.is_whitespace() && x != &'\n') { input.next(); }
                    }
                }
            },
            '\t' => return Err(InvalidIndentation.into()),
            '\n' => res.push(Indent(0)),
            '\'' => { // chars!
                while let Some(x) = input.next() {
                    match x {
                        '\'' => break,
                        '\\' => if let Some(y) = input.next() { buf.push(y) },
                        _ => buf.push(x)
                    }
                }
                res.push(Lit(Char(String::from(&buf))));
            },
            '"' => { // strings!
                match (input.peek_nth(0).copied(), input.peek_nth(1).copied()) {
                    (Some('"'), Some('"')) => { // triple quoted strings
                        input.next(); input.next();
                        while let Some(x) = input.next() {
                            match x {
                                '"' if input.peek_nth(0) == Some(&'"') &&
                                       input.peek_nth(1) == Some(&'"') => {
                                    input.next(); input.next();
                                    break;
                               },
                               _ => buf.push(x)
                            }
                        }
                        res.push(Lit(MultiLineString(String::from(&buf))));
                    },
                    (_, _) => { // single quoted strings
                        while let Some(x) = input.next() {
                            match x {
                                '"' => break,
                                '\\' => if let Some(y) = input.next() { buf.push(y) },
                                _ => buf.push(x)
                            }
                        }
                        res.push(Lit(SingleLineString(String::from(&buf))));
                    }
                }
            },
            '#' => { // comments!
                match input.peek() {
                    Some('[') => { // block comment, can be nested
                        input.next();
                        let mut comment_level = 1;
                        while let Some(x) = input.next() && comment_level > 0 {
                            match x {
                                '#' if input.peek() == Some(&'[') => {
                                    comment_level += 1;
                                    input.next();
                                },
                                ']' if input.peek() == Some(&'#') => {
                                    comment_level -= 1;
                                    input.next();
                                },
                                _ => buf.push(x)
                            }
                        }
                        res.push(Lit(MultiLineComment(String::from(&buf))));
                    },
                    Some(&'#') => { // documentation comment
                        input.next();
                        while let Some(x) = input.next() {
                            match x {
                                '\n' => break,
                                _ => {
                                    buf.push(x);
                                }
                            }
                        }
                        res.push(Lit(DocComment(String::from(&buf))));
                    },
                    _ => { // standard comment, runs til EOL
                        while let Some(x) = input.next() {
                            match x {
                                '\n' => break,
                                _ => {
                                    buf.push(x);
                                }
                            }
                        }
                        res.push(Lit(Comment(String::from(&buf))));
                    }
                }
            },
            c if c.is_alphabetic() || c == '_' => { // valid identifiers!
                buf.push(c);
                while let Some(x) = input.next() {
                    match x {
                        x if x.is_alphanumeric() || x == '_' => buf.push(x),
                        _ => {
                            res.push(Word(String::from(&buf)));
                            match x { // () and [] denote both parameters/generics and tuples/arrays
                                '(' => { // we must disambiguate by treating those *directly* after words as such
                                    res.push(Sep(FuncLeftParen));
                                    state.paren_stack.push(Paren::Func);
                                },
                                '[' => {
                                    res.push(Sep(GenericLeftBracket));
                                    state.bracket_stack.push(Bracket::Generic);
                                },
                                _ => {},
                            }
                            break;
                        }
                    }
                }
            },
            '0'..='9' => { // numeric literals!
                buf.push(c);
                while let Some(x) = input.next() {
                    match x {
                        'a'..='z' | 'A'..='Z' | '0'..='9' | '_' => {
                            buf.push(x);
                            input.next();
                        },
                        _ => break
                    }
                }
                res.push(Num(String::from(&buf)))
            },
            '-' => { // `-` is special. it can be the *prefix* operator "Negative", or part of a regular operator.
                match input.peek() {
                    Some(' ') => res.push(Sep(Minus)),
                    _ => res.push(Sep(Negative))
                }
            },
            '(' => { // note: FuncParens were matched above, directly after identifiers
                res.push(Sep(TupleLeftParen));
                state.paren_stack.push(Paren::Tuple);
            },
            '[' => { // note: GenericBrackets were matched above, directly after identifiers
                res.push(Sep(ArrayLeftBracket));
                state.bracket_stack.push(Bracket::Array);
            },
            ')' => {
                match state.paren_stack.pop() {
                    Some(Paren::Func) => res.push(Sep(FuncRightParen)),
                    Some(Paren::Tuple) => res.push(Sep(TupleRightParen)),
                    None => return Err(MismatchedParens.into()),
                }
            },
            ']' => {
                match state.bracket_stack.pop() {
                    Some(Bracket::Generic) => res.push(Sep(GenericRightBracket)),
                    Some(Bracket::Array) => res.push(Sep(ArrayRightBracket)),
                    None => return Err(MismatchedBrackets.into()),
                }
                if input.peek() == Some(&'[') { // parameters following generics
                    res.push(Sep(FuncLeftParen));
                    state.paren_stack.push(Paren::Func);
                    input.next();
                }
            },
            ',' => res.push(Sep(Comma)),
            '.' => res.push(Sep(Period)),
            ';' => res.push(Sep(Semicolon)),
            ':' => res.push(Sep(Colon)),
            '`' => res.push(Sep(BackTick)),
            '{' => res.push(Sep(StructLeftBrace)),
            '}' => res.push(Sep(StructRightBrace)),
            '=' => res.push(Sep(Equals)),
            '+' => res.push(Sep(Plus)),
            '*' => res.push(Sep(Times)),
            '/' => res.push(Sep(Slash)),
            '<' => res.push(Sep(LessThan)),
            '>' => res.push(Sep(GreaterThan)),
            '@' => res.push(Sep(At)),
            '$' => res.push(Sep(Sha)),
            '~' => res.push(Sep(Tilde)),
            '&' => res.push(Sep(And)),
            '|' => res.push(Sep(Or)),
            '!' => res.push(Sep(Exclamation)),
            '?' => res.push(Sep(Question)),
            '^' => res.push(Sep(Caret)),
            '\\' => res.push(Sep(Backslash)),
            _ => return Err(UnknownPunctuation.into())
        }
        buf.clear();
    }
    Ok(TokenStream(res))
}

impl std::fmt::Display for TokenStream {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        use Token::*;
        let mut prev_token = Indent(0);
        for token in &self.0 {
            match (&prev_token, &token) {
                (Word(_), Word(_)) | (Word(_), Num(_)) |
                (Num(_), Word(_)) | (Num(_), Num(_)) => write!(f, " {}", token)?,
                _ => write!(f, "{}", token)?,
            }
            prev_token = token.clone();
        }
        Ok(())
    }
}

impl std::fmt::Display for Token {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        use Token::*;
        match self {
            Word(token) => write!(f, "{}", token),
            Num(token) => write!(f, "{}", token),
            Lit(lit) => write!(f, "{}", lit),
            Sep(sep) => write!(f, "{}", sep),
            Indent(i) => write!(f, "\n{}", " ".repeat(*i)),
        }
    }
}

impl std::fmt::Display for Literal {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        use Literal::*;
        match self {
            Char(token) => write!(f, "'{}'", token),
            SingleLineString(token) => write!(f, "\"{}\"", token),
            MultiLineString(token) => write!(f, "\"\"\"{}\"\"\"", token),
            Comment(token) => write!(f, "#{}", token),
            DocComment(token) => write!(f, "##{}", token),
            MultiLineComment(token) => write!(f, "#[{}]#", token),
        }
    }
}

impl std::fmt::Display for Punctuation {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        use Punctuation::*;
        match self {
            Comma => write!(f, ","),
            Period => write!(f, "."),
            Semicolon => write!(f, ";"),
            Colon => write!(f, ":"),
            BackTick => write!(f, "`"),
            SingleQuote => write!(f, "'"),
            DoubleQuote => write!(f, "\""),
            FuncLeftParen => write!(f, "("),
            FuncRightParen => write!(f, ")"),
            TupleLeftParen => write!(f, " ("),
            TupleRightParen => write!(f, ")"),
            GenericLeftBracket => write!(f, "["),
            GenericRightBracket => write!(f, "]"),
            ArrayLeftBracket => write!(f, " ["),
            ArrayRightBracket => write!(f, "]"),
            StructLeftBrace => write!(f, "{{"),
            StructRightBrace => write!(f, "}}"),
            Equals => write!(f, "="),
            Plus => write!(f, "+"),
            Minus => write!(f, "- "),
            Negative => write!(f, "-"),
            Times => write!(f, "*"),
            Slash => write!(f, "/"),
            LessThan => write!(f, "<"),
            GreaterThan => write!(f, ">"),
            At => write!(f, "@"),
            Sha => write!(f, "$"),
            Tilde => write!(f, "~"),
            And => write!(f, "&"),
            Percent => write!(f, "%"),
            Or => write!(f, "|"),
            Exclamation => write!(f, "!"),
            Question => write!(f, "?"),
            Caret => write!(f, "^"),
            Backslash => write!(f, "\\"),
        }
    }
}