1 files changed, 372 insertions, 310 deletions
diff --git a/src/frontend/lex.rs b/src/frontend/lex.rs
index e0aa76d..287ed9c 100644
--- a/src/frontend/lex.rs
+++ b/src/frontend/lex.rs
@@ -2,25 +2,41 @@ use multipeek::multipeek;
 
 use crate::*;
 
-pub struct TokenStream(Vec<Token>);
+/// We lex input into a TokenStream containing the position of the token and the Token itself.
+pub struct TokenStream(Vec<(usize, Token)>);
 
+/// Iterating over a TokenStream pops both the Tokens and the spans.
+/// The parser only *really* cares about Tokens... spans are for use in error reporting.
 impl IntoIterator for TokenStream {
-    type Item = Token;
-    type IntoIter = std::vec::IntoIter<Token>;
+    type Item = (usize, Token);
+    type IntoIter = std::vec::IntoIter<(usize, Token)>;
 
     fn into_iter(self) -> Self::IntoIter {
         self.0.into_iter()
     }
 }
 
+/// Basic syntax tokens. Form an unambiguous TokenStream.
+#[derive(Clone, PartialEq)]
+pub enum Token {
+    Key(Keyword),       // keyword identifiers. ex. for, use, func...
+    Lit(Literal),       // literal values. ex. for strings/comments.
+    Sep(Punctuation),   // punctuation. non-word tokens.
+    Op(String),         // operators. lexographically separate so the parser can parse infix notation
+    Num(String),        // numeric values. ex. 413, 0b101011, 0xabcd, 612.1024
+    Ident(String),      // identifiers. variable names, function names, method calls...
+    Indent(usize),      // indentation. denotes line breaks and scope at which a line starts
+    End                 // explicit scope end marker. inserted in normalization.
+}
+
 #[derive(Clone, PartialEq, Debug)]
 pub enum LexicalError {
-    InvalidIndentation,
-    MismatchedParens,
-    MismatchedBrackets,
-    UnknownPunctuation,
+    InvalidIndentation(usize),
+    MismatchedParens(usize),
+    MismatchedBrackets(usize),
+    UnknownPunctuation(usize),
+    EarlyEndOfInput,
 }
-
 impl std::fmt::Display for LexicalError {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         write!(f, "{:?}", self)
@@ -28,18 +44,7 @@ impl std::fmt::Display for LexicalError {
 }
 impl std::error::Error for LexicalError {}
 
-/// **Basic** syntax tokens. Form an unambiguous TokenStream.
-#[derive(Clone, PartialEq)]
-pub enum Token {
-    Key(Keyword),     // keyword identifiers.
-    Word(String),     // non-keyword identifiers.
-    Op(String),       // operators. separate tokens lexographically so the parser knows to parse infix notation.
-    Num(String),      // numeric value, ex. 413, 0b101011, 0xabcd
-    Lit(Literal),     // literal value, ex. for strings/comments.
-    Sep(Punctuation), // punctuation. non-word tokens. ~~operators are lexed as this and later transformed to words.~~
-    Indent(usize),    // indentation. denotes line breaks and scope at which a line starts.
-}
-
+/// Literal text. This can be chars, strings, and comments.
 #[derive(Clone, PartialEq)]
 pub enum Literal {
     Char(String),
@@ -51,7 +56,7 @@ pub enum Literal {
 }
 
 /// Keywords, made explicit for easier use with Rust.
-/// (strings inside match patterns are fucky!!)
+/// (static strings inside match patterns are fucky!!)
 #[derive(Clone, PartialEq)]
 pub enum Keyword {
     Pub, Let, Var, Const,
@@ -86,8 +91,8 @@ pub enum Punctuation {
     GenericRightBracket, // ]
     ArrayLeftBracket,    // [
     ArrayRightBracket,   // ]
-    ScopeLeftBrace,      // {
-    ScopeRightBrace,     // }
+    DependentLeftBrace,  // { (not currently used)
+    DependentRightBrace, // }
     StructLeftBrace,     // {
     StructRightBrace,    // }
     Equals,      // =
@@ -110,320 +115,371 @@ pub enum Punctuation {
     Backslash,   // \
 }
 
-/// Parses whitespace-sensitive code into an unambiguous TokenStream.
-/// Also useful for formatting.
-pub fn tokenize(input: &str) -> Result<TokenStream> {
-    // The design of this lexer utilizes to great extent multipeek's arbitrary peeking.
-    // Tokens are matched by looping within their case until complete.
-    // This then eliminates the need for most global parser state. (i hate state)
+struct Input<'a>(multipeek::MultiPeek<std::str::CharIndices<'a>>);
 
-    use Token::*;
-    use Literal::*;
-    use Punctuation::*;
-    use LexicalError::*;
-    enum Paren { Func, Tuple }
-    enum Bracket { Generic, Array }
-    struct State {
-        start_of_line: bool,
-        paren_stack: Vec<Paren>,
-        bracket_stack: Vec<Bracket>,
+impl<'a> Input<'a> {
+    /// Map input.next() to return Results for use with the propagation operator
+    fn next(&mut self) -> Result<char> {
+        self.0.next().map(|x| x.1).ok_or(EarlyEndOfInput.into())
+    }
+
+    fn peek_eat(&mut self, expected: char) -> bool {
+        if self.peek().is_ok_and(|x| x == expected) {
+            self.0.next();
+            true
+        } else {
+            false
+        }
     }
 
+    fn peek(&mut self) -> Result<char> {
+        self.0.peek().copied().map(|x| x.1).ok_or(EarlyEndOfInput.into())
+    }
+
+    fn peek_nth(&mut self, n: usize) -> Result<char> {
+        self.0.peek_nth(n).copied().map(|x| x.1).ok_or(EarlyEndOfInput.into())
+    }
+
+    fn consume_indentation(&mut self) -> usize {
+        let mut count = 0;
+        while self.peek_eat(' ') {
+            count += 1;
+        }
+        if self.peek_eat('\n') {
+            self.consume_indentation()
+        } else {
+            count
+        }
+    }
+
+    fn strip(&mut self) {
+        loop {
+            if !self.peek_eat(' ') {
+                break
+            }
+        }
+    }
+}
+
+use Token::*;
+use Literal::*;
+use Punctuation::*;
+use LexicalError::*;
+enum Paren { Func, Tuple }
+enum Bracket { Generic, Array }
+struct State {
+    start_of_line: bool,
+    paren_stack: Vec<Paren>,
+    bracket_stack: Vec<Bracket>,
+    next_token_is_parameter_punctuation: bool,
+}
+
+/// Parses whitespace-sensitive code into an unambiguous TokenStream.
+pub fn lex(input: &str) -> Result<TokenStream> {
+    let mut input = Input(multipeek(input.char_indices()));
+    let mut res = TokenStream(Vec::new());
     let mut state = State {
         start_of_line: true,
         paren_stack: vec!(),
         bracket_stack: vec!(),
+        next_token_is_parameter_punctuation: false,
     };
+    res.0.push((0, Indent(input.consume_indentation())));
+    loop {
+        input.strip();
+        match input.0.next() {
+            Some((pos, c)) => res.0.push((pos, lex_token(&mut input, &mut state, pos, c)?)),
+            None => break
+        }
+    }
+    Ok(res)
+}
 
-    let mut buf = String::new();
-    let mut res = Vec::new();
-
-    // `char` in rust is four bytes it's fine
-    let mut input = multipeek(input.chars());
-    while let Some(c) = input.next() {
-        match c {
-            ' ' => { // indentation! and whitespace
-                match res.last() {
-                    Some(Indent(_)) => { // indentation!
-                        res.pop(); // discard previous empty or useless Indent token
-                        let mut current_indent_level = 1;
-                        while let Some(x) = input.peek() {
-                            match x {
-                                ' ' => { current_indent_level += 1; input.next(); },
-                                _ => match res.last() { // indentation ends
-                                    Some(Word(a)) if a == "==" || a == "and" || a == "or" ||
-                                                     a == "xor" || a == "in" || a == "is" => break,
-                                    Some(Sep(FuncLeftParen)) | Some(Sep(TupleLeftParen)) |
-                                    Some(Sep(GenericLeftBracket)) | Some(Sep(ArrayLeftBracket)) |
-                                    Some(Sep(StructLeftBrace)) | Some(Sep(Comma)) => break,
-                                    _ => {
-                                        res.push(Indent(current_indent_level));
-                                        break;
-                                    }
-                                }
-                            }
-                        }
-                    },
-                    _ => { // get rid of excess (all) whitespace between words/operators
-                        while input.peek().is_some_and(|x| x.is_whitespace() && x != &'\n') { input.next(); }
+fn lex_token(input: &mut Input, state: &mut State, pos: usize, c: char) -> Result<Token> {
+    Ok(match c {
+        '\n' => Indent(input.consume_indentation()),
+        '\t' => return Err(InvalidIndentation(pos).into()),
+        ' ' => unreachable!(),
+        ':' => Sep(Colon),
+        '=' => Sep(Equals),
+        '\'' => lex_char(input)?,
+        '"' => lex_string(input)?,
+        '#' => lex_comment(input)?,
+        c if c.is_alphabetic() || c == '_' => lex_identifier(input, state, c), // valid identifiers!
+        c if c.is_digit(10) => lex_number(input, c), // numeric literals!
+        '-' => {
+            // `-` is special. it can be the *prefix* operator "Negative", or part of a regular operator.
+            match input.peek()? {
+                ' ' => Sep(Minus),
+                _ => Sep(Negative)
+            }
+        },
+        '(' => {
+            if state.next_token_is_parameter_punctuation {
+                state.next_token_is_parameter_punctuation = false;
+                state.paren_stack.push(Paren::Func);
+                Sep(FuncLeftParen)
+            } else {
+                state.paren_stack.push(Paren::Tuple);
+                Sep(TupleLeftParen)
+            }
+        },
+        '[' => {
+            if state.next_token_is_parameter_punctuation {
+                state.next_token_is_parameter_punctuation = false;
+                state.bracket_stack.push(Bracket::Generic);
+                Sep(GenericLeftBracket)
+            } else {
+                state.bracket_stack.push(Bracket::Array);
+                Sep(ArrayLeftBracket)
+            }
+        },
+        ')' => {
+            match state.paren_stack.pop() {
+                Some(Paren::Func) => Sep(FuncRightParen),
+                Some(Paren::Tuple) => Sep(TupleRightParen),
+                None => return Err(MismatchedParens(pos).into()),
+            }
+        },
+        ']' => {
+            match state.bracket_stack.pop() {
+                Some(Bracket::Generic) => {
+                    if let Ok('(') = input.peek() {
+                        state.next_token_is_parameter_punctuation = true;
                     }
+                    Sep(GenericRightBracket)
+                },
+                Some(Bracket::Array) => Sep(ArrayRightBracket),
+                None => return Err(MismatchedBrackets(pos).into()),
+            }
+        },
+        '`' => { // backticks are used for injections, so generics/parameters may follow
+            // todo: SERIOUS ambiguity issue with `injection`[]
+            match input.peek() {
+                Ok('(') | Ok('[') => {
+                    state.next_token_is_parameter_punctuation = true;
+                },
+                _ => ()
+            }
+            Sep(BackTick)
+        },
+        ',' => Sep(Comma),
+        '.' => Sep(Period),
+        ';' => Sep(Semicolon),
+        '{' => Sep(StructLeftBrace),
+        '}' => Sep(StructRightBrace),
+        '+' => Sep(Plus),
+        '*' => Sep(Times),
+        '/' => Sep(Slash),
+        '<' => Sep(LessThan),
+        '>' => Sep(GreaterThan),
+        '@' => Sep(At),
+        '$' => Sep(Sha),
+        '~' => Sep(Tilde),
+        '&' => Sep(And),
+        '|' => Sep(Or),
+        '!' => Sep(Exclamation),
+        '?' => Sep(Question),
+        '^' => Sep(Caret),
+        '\\' => Sep(Backslash),
+        _ => return Err(UnknownPunctuation(pos).into())
+    })
+}
+
+/// Char ::= '\'' <anything> '\''
+fn lex_char(input: &mut Input) -> Result<Token> {
+    let mut buf = String::new();
+    loop {
+        match input.next()? {
+            '\'' => break,
+            '\\' => {
+                if let y = input.next()? {
+                    buf.push('\\');
+                    buf.push(y);
                 }
             },
-            '\t' => return Err(InvalidIndentation.into()),
-            '\n' => res.push(Indent(0)),
-            '\'' => { // chars!
-                while let Some(x) = input.next() {
-                    match x {
-                        '\'' => break,
-                        '\\' => if let Some(y) = input.next() { buf.push(y) },
-                        _ => buf.push(x)
-                    }
+            x => buf.push(x)
+        }
+    }
+    Ok(Lit(Char(buf)))
+}
+
+/// String ::= '"' <anything> '"' | '"""' <anything> '"""'
+fn lex_string(input: &mut Input) -> Result<Token> {
+    let mut buf = String::new();
+    match (input.peek_nth(0), input.peek_nth(1)) {
+        (Ok('"'), Ok('"')) => { // triple quoted strings
+            input.next()?; input.next()?;
+            loop {
+                match input.next()? {
+                    '"' if input.peek_nth(0)? == '"' &&
+                           input.peek_nth(1)? == '"' => {
+                        input.next()?; input.next()?;
+                        break;
+                   },
+                   x => buf.push(x)
                 }
-                res.push(Lit(Char(String::from(&buf))));
-            },
-            '"' => { // strings!
-                match (input.peek_nth(0).copied(), input.peek_nth(1).copied()) {
-                    (Some('"'), Some('"')) => { // triple quoted strings
-                        input.next(); input.next();
-                        while let Some(x) = input.next() {
-                            match x {
-                                '"' if input.peek_nth(0) == Some(&'"') &&
-                                       input.peek_nth(1) == Some(&'"') => {
-                                    input.next(); input.next();
-                                    break;
-                               },
-                               _ => buf.push(x)
-                            }
-                        }
-                        res.push(Lit(MultiLineString(String::from(&buf))));
+            }
+            Ok(Lit(MultiLineString(buf)))
+        },
+        _ => { // single quoted strings
+            loop {
+                match input.next()? {
+                    '"' => break,
+                    '\\' => if let y = input.next()? {
+                        buf.push(y) // FIXME
                     },
-                    (_, _) => { // single quoted strings
-                        while let Some(x) = input.next() {
-                            match x {
-                                '"' => break,
-                                '\\' => if let Some(y) = input.next() { buf.push(y) },
-                                _ => buf.push(x)
-                            }
-                        }
-                        res.push(Lit(SingleLineString(String::from(&buf))));
-                    }
+                    x => buf.push(x)
                 }
-            },
-            '#' => { // comments!
-                match input.peek() {
-                    Some('[') => { // block comment, can be nested
-                        input.next();
-                        let mut comment_level = 1;
-                        while let Some(x) = input.next() && comment_level > 0 {
-                            match x {
-                                '#' if input.peek() == Some(&'[') => {
-                                    comment_level += 1;
-                                    input.next();
-                                },
-                                ']' if input.peek() == Some(&'#') => {
-                                    comment_level -= 1;
-                                    input.next();
-                                },
-                                _ => buf.push(x)
-                            }
-                        }
-                        res.push(Lit(MultiLineComment(String::from(&buf))));
+            }
+            Ok(Lit(SingleLineString(buf)))
+        }
+    }
+}
+
+/// Comment ::= '#' <anything> '\n' | '#[' Comment | <anything> ']#'
+fn lex_comment(input: &mut Input) -> Result<Token> {
+    let mut buf = String::new();
+    match input.peek() {
+        Ok('[') => { // block comment, can be nested
+            input.next()?;
+            let mut comment_level = 1;
+            while comment_level > 0 {
+                match input.next()? {
+                    '#' if input.peek_eat('[') => {
+                        comment_level += 1;
+                        buf.push_str("#[");
                     },
-                    Some(&'#') => { // documentation comment
-                        input.next();
-                        while let Some(x) = input.peek() {
-                            match x {
-                                '\n' => break,
-                                _ => {
-                                    buf.push(*x);
-                                }
-                            }
-                            input.next();
-                        }
-                        res.push(Lit(DocComment(String::from(&buf))));
+                    ']' if input.peek_eat('#') => {
+                        comment_level -= 1;
+                        buf.push_str("]#");
                     },
-                    _ => { // standard comment, runs til EOL
-                        while let Some(x) = input.peek() {
-                            match x {
-                                '\n' => break,
-                                _ => {
-                                    buf.push(*x);
-                                }
-                            }
-                            input.next();
-                        }
-                        res.push(Lit(Comment(String::from(&buf))));
-                    }
-                }
-            },
-            c if c.is_alphabetic() || c == '_' => { // valid identifiers!
-                buf.push(c);
-                while let Some(x) = input.peek() {
-                    match x {
-                        x if x.is_alphanumeric() || x == &'_' => {
-                            buf.push(*x);
-                            input.next();
-                        },
-                        _ => {
-                            use Keyword::*;
-                            match buf.as_str() { // keywords!
-                                "pub" => res.push(Key(Pub)),
-                                "let" => res.push(Key(Let)),
-                                "var" => res.push(Key(Var)),
-                                "const" => res.push(Key(Const)),
-                                "func"  => res.push(Key(Func)),
-                                "macro" => res.push(Key(Macro)),
-                                "type"  => res.push(Key(Type)),
-                                "mod" => res.push(Key(Mod)),
-                                "use" => res.push(Key(Use)),
-                                "as"  => res.push(Key(As)),
-                                "for"    => res.push(Key(For)),
-                                "while"  => res.push(Key(While)),
-                                "loop"   => res.push(Key(Loop)),
-                                "block"  => res.push(Key(Block)),
-                                "static" => res.push(Key(Static)),
-                                "if"    => res.push(Key(If)),
-                                "when"  => res.push(Key(When)),
-                                "elif"  => res.push(Key(Elif)),
-                                "else"  => res.push(Key(Else)),
-                                "match" => res.push(Key(Match)),
-                                "where" => res.push(Key(Where)),
-                                "try"     => res.push(Key(Try)),
-                                "catch"   => res.push(Key(Catch)),
-                                "finally" => res.push(Key(Finally)),
-                                "struct"    => res.push(Key(Struct)),
-                                "tuple"     => res.push(Key(Tuple)),
-                                "enum"      => res.push(Key(Enum)),
-                                "union"     => res.push(Key(Union)),
-                                "interface" => res.push(Key(Interface)),
-                                "distinct" => res.push(Key(Distinct)),
-                                "ref"      => res.push(Key(Ref)),
-                                "ptr"      => res.push(Key(Ptr)),
-                                "mut"      => res.push(Key(Mut)),
-                                "break"    => res.push(Key(Break)),
-                                "continue" => res.push(Key(Continue)),
-                                "return"   => res.push(Key(Return)),
-                                "in" => res.push(Key(In)),
-                                "is" => res.push(Key(Is)),
-                                "of" => res.push(Key(Of)),
-                                _ => res.push(Word(String::from(&buf)))
-                            }
-                            match x { // () and [] denote both parameters/generics and tuples/arrays
-                                '(' => { // we must disambiguate by treating those *directly* after words as such
-                                    res.push(Sep(FuncLeftParen));
-                                    state.paren_stack.push(Paren::Func);
-                                    input.next();
-                                },
-                                '[' => {
-                                    res.push(Sep(GenericLeftBracket));
-                                    state.bracket_stack.push(Bracket::Generic);
-                                    input.next();
-                                },
-                                _ => {},
-                            }
-                            break;
-                        }
+                    x => buf.push(x)
+                };
+            }
+            Ok(Lit(MultiLineComment(buf)))
+        },
+        Ok('#') => { // documentation comment
+            input.next()?;
+            while let Ok(x) = input.peek() {
+                match x {
+                    '\n' => break,
+                    _ => {
+                        buf.push(x);
+                        input.next()?;
                     }
                 }
-            },
-            '0'..='9' => { // numeric literals!
-                buf.push(c);
-                while let Some(x) = input.peek() {
-                    match x {
-                        'a'..='z' | 'A'..='Z' | '0'..='9' | '_' => {
-                            buf.push(*x);
-                            input.next();
-                        },
-                        _ => break
+            }
+            Ok(Lit(DocComment(buf)))
+        },
+        _ => { // standard comment, runs til EOL
+            while let Ok(x) = input.peek() {
+                match x {
+                    '\n' => break,
+                    _ => {
+                        buf.push(x);
+                        input.next()?;
                     }
                 }
-                res.push(Num(String::from(&buf)))
-            },
-            '-' => { // `-` is special. it can be the *prefix* operator "Negative", or part of a regular operator.
-                match input.peek() {
-                    Some(' ') => res.push(Sep(Minus)),
-                    _ => res.push(Sep(Negative))
-                }
-            },
-            '(' => { // note: FuncParens were matched above, directly after identifiers
-                res.push(Sep(TupleLeftParen));
-                state.paren_stack.push(Paren::Tuple);
-            },
-            '[' => { // note: GenericBrackets were matched above, directly after identifiers
-                res.push(Sep(ArrayLeftBracket));
-                state.bracket_stack.push(Bracket::Array);
-            },
-            ')' => { // match parens
-                match state.paren_stack.pop() {
-                    Some(Paren::Func) => res.push(Sep(FuncRightParen)),
-                    Some(Paren::Tuple) => res.push(Sep(TupleRightParen)),
-                    None => return Err(MismatchedParens.into()),
-                }
-            },
-            ']' => { // match brackets
-                match state.bracket_stack.pop() {
-                    Some(Bracket::Generic) => res.push(Sep(GenericRightBracket)),
-                    Some(Bracket::Array) => res.push(Sep(ArrayRightBracket)),
-                    None => return Err(MismatchedBrackets.into()),
-                }
-                if input.peek() == Some(&'(') { // parameters following generics
-                    res.push(Sep(FuncLeftParen));
-                    state.paren_stack.push(Paren::Func);
-                    input.next();
-                }
+            }
+            Ok(Lit(Comment(buf)))
+        }
+    }
+}
+
+#[allow(unused_assignments)]
+fn lex_identifier(input: &mut Input, state: &mut State, c: char) -> Token {
+    use Keyword::*;
+    let mut buf = String::new();
+    let mut keyword = None;
+    buf.push(c);
+    loop {
+        match input.peek() {
+            Ok(x) if x.is_alphanumeric() || x == '_' => {
+                buf.push(x);
+                let _ = input.next();
             },
-            '`' => { // backticks are used for operators, so generics/parameters may follow
-                res.push(Sep(BackTick)); // todo: backticks could like not be used for operators
-                match input.peek() {
-                    Some('(') => {
-                        res.push(Sep(FuncLeftParen));
-                        state.paren_stack.push(Paren::Func);
-                        input.next();
-                    },
-                    Some('[') => {
-                        res.push(Sep(GenericLeftBracket));
-                        state.bracket_stack.push(Bracket::Generic);
-                        input.next();
-                    },
-                    _ => {}
-                }
+            _ => {
+                keyword = Some(match buf.as_str() { // keywords!
+                    "pub" => Key(Pub),
+                    "let" => Key(Let),
+                    "var" => Key(Var),
+                    "const" => Key(Const),
+                    "func"  => Key(Func),
+                    "macro" => Key(Macro),
+                    "type"  => Key(Type),
+                    "mod" => Key(Mod),
+                    "use" => Key(Use),
+                    "as"  => Key(As),
+                    "for"    => Key(For),
+                    "while"  => Key(While),
+                    "loop"   => Key(Loop),
+                    "block"  => Key(Block),
+                    "static" => Key(Static),
+                    "if"    => Key(If),
+                    "when"  => Key(When),
+                    "elif"  => Key(Elif),
+                    "else"  => Key(Else),
+                    "match" => Key(Match),
+                    "where" => Key(Where),
+                    "try"     => Key(Try),
+                    "catch"   => Key(Catch),
+                    "finally" => Key(Finally),
+                    "struct"    => Key(Struct),
+                    "tuple"     => Key(Tuple),
+                    "enum"      => Key(Enum),
+                    "union"     => Key(Union),
+                    "interface" => Key(Interface),
+                    "distinct" => Key(Distinct),
+                    "ref"      => Key(Ref),
+                    "ptr"      => Key(Ptr),
+                    "mut"      => Key(Mut),
+                    "in" => Key(In),
+                    "is" => Key(Is),
+                    "of" => Key(Of),
+                    "break"    => Key(Break),
+                    "continue" => Key(Continue),
+                    "return"   => Key(Return),
+                    _ => Ident(String::from(&buf))
+                });
+                break
+            }
+        }
+    }
+    // () and [] denote both parameters/generics and tuples/arrays
+    // we must disambiguate by treating those *directly* after words as such
+    match input.peek() {
+        Ok('(') | Ok('[') => state.next_token_is_parameter_punctuation = true,
+        _ => (),
+    }
+    keyword.unwrap()
+}
+
+fn lex_number(input: &mut Input, c: char) -> Token {
+    let mut buf = String::new();
+    buf.push(c);
+    loop {
+        match input.peek() {
+            Ok(x) if x.is_alphanumeric() => { // FIXME
+                buf.push(x);
+                let _ = input.next();
             },
-            ',' => res.push(Sep(Comma)),
-            '.' => res.push(Sep(Period)),
-            ';' => res.push(Sep(Semicolon)),
-            ':' => res.push(Sep(Colon)),
-            '{' => res.push(Sep(StructLeftBrace)),
-            '}' => res.push(Sep(StructRightBrace)),
-            '=' => res.push(Sep(Equals)),
-            '+' => res.push(Sep(Plus)),
-            '*' => res.push(Sep(Times)),
-            '/' => res.push(Sep(Slash)),
-            '<' => res.push(Sep(LessThan)),
-            '>' => res.push(Sep(GreaterThan)),
-            '@' => res.push(Sep(At)),
-            '$' => res.push(Sep(Sha)),
-            '~' => res.push(Sep(Tilde)),
-            '&' => res.push(Sep(And)),
-            '|' => res.push(Sep(Or)),
-            '!' => res.push(Sep(Exclamation)),
-            '?' => res.push(Sep(Question)),
-            '^' => res.push(Sep(Caret)),
-            '\\' => res.push(Sep(Backslash)),
-            _ => return Err(UnknownPunctuation.into())
+            _ => break
         }
-        buf.clear();
     }
-    Ok(TokenStream(res))
+    Num(buf)
 }
 
 impl std::fmt::Display for TokenStream {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         use Token::*;
         let mut prev_token = Indent(0);
-        for token in &self.0 {
-            match (&prev_token, &token) {
-                (Word(_), Word(_)) | (Word(_), Num(_)) |
-                (Num(_), Word(_)) | (Num(_), Num(_)) => write!(f, " {}", token)?,
+        for (_, token) in &self.0 {
+            match (prev_token, token) { // fixme
+                (Ident(_), Ident(_)) | (Ident(_), Num(_)) |
+                (Num(_), Ident(_)) | (Num(_), Num(_)) |
+                (Key(_), Key(_)) | (Key(_), Ident(_)) | (Ident(_), Key(_))
+                    => write!(f, " {}", token)?,
                 _ => write!(f, "{}", token)?,
             }
             prev_token = token.clone();
@@ -437,11 +493,11 @@ impl std::fmt::Display for Token {
         use Token::*;
         match self {
             Key(word) => write!(f, "{}", word),
-            Word(val) => write!(f, "{}", val),
-            Num(val) => write!(f, "{}", val),
+            Ident(val) | Op(val) | Num(val) => write!(f, "{}", val),
             Lit(lit) => write!(f, "{}", lit),
             Sep(sep) => write!(f, "{}", sep),
             Indent(i) => write!(f, "\n{}", " ".repeat(*i)),
+            End => write!(f, "end"),
         }
     }
 }
@@ -526,8 +582,8 @@ impl std::fmt::Display for Punctuation {
             GenericRightBracket => write!(f, "]"),
             ArrayLeftBracket => write!(f, " ["),
             ArrayRightBracket => write!(f, "]"),
-            ScopeLeftBrace => write!(f, "{{"),
-            ScopeRightBrace => write!(f, "}}"),
+            DependentLeftBrace => write!(f, "{{"),
+            DependentRightBrace => write!(f, "}}"),
             StructLeftBrace => write!(f, "{{"),
             StructRightBrace => write!(f, "}}"),
             Equals => write!(f, "="),
@@ -551,3 +607,9 @@ impl std::fmt::Display for Punctuation {
         }
     }
 }
+
+pub fn test() {
+    let input = std::fs::read_to_string("std/ast.pk").unwrap();
+    let lexed = lex(&input).unwrap();
+    println!("{}", lexed);
+}