aboutsummaryrefslogtreecommitdiff
path: root/src/frontend/lex.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/frontend/lex.rs')
-rw-r--r--src/frontend/lex.rs682
1 files changed, 372 insertions, 310 deletions
diff --git a/src/frontend/lex.rs b/src/frontend/lex.rs
index e0aa76d..287ed9c 100644
--- a/src/frontend/lex.rs
+++ b/src/frontend/lex.rs
@@ -2,25 +2,41 @@ use multipeek::multipeek;
use crate::*;
-pub struct TokenStream(Vec<Token>);
+/// We lex input into a TokenStream containing the position of the token and the Token itself.
+pub struct TokenStream(Vec<(usize, Token)>);
+/// Iterating over a TokenStream pops both the Tokens and the spans.
+/// The parser only *really* cares about Tokens... spans are for use in error reporting.
impl IntoIterator for TokenStream {
- type Item = Token;
- type IntoIter = std::vec::IntoIter<Token>;
+ type Item = (usize, Token);
+ type IntoIter = std::vec::IntoIter<(usize, Token)>;
fn into_iter(self) -> Self::IntoIter {
self.0.into_iter()
}
}
+/// Basic syntax tokens. Form an unambiguous TokenStream.
+#[derive(Clone, PartialEq)]
+pub enum Token {
+ Key(Keyword), // keyword identifiers. ex. for, use, func...
+ Lit(Literal), // literal values. ex. for strings/comments.
+ Sep(Punctuation), // punctuation. non-word tokens.
+ Op(String), // operators. lexographically separate so the parser can parse infix notation
+ Num(String), // numeric values. ex. 413, 0b101011, 0xabcd, 612.1024
+ Ident(String), // identifiers. variable names, function names, method calls...
+ Indent(usize), // indentation. denotes line breaks and scope at which a line starts
+ End // explicit scope end marker. inserted in normalization.
+}
+
#[derive(Clone, PartialEq, Debug)]
pub enum LexicalError {
- InvalidIndentation,
- MismatchedParens,
- MismatchedBrackets,
- UnknownPunctuation,
+ InvalidIndentation(usize),
+ MismatchedParens(usize),
+ MismatchedBrackets(usize),
+ UnknownPunctuation(usize),
+ EarlyEndOfInput,
}
-
impl std::fmt::Display for LexicalError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{:?}", self)
@@ -28,18 +44,7 @@ impl std::fmt::Display for LexicalError {
}
impl std::error::Error for LexicalError {}
-/// **Basic** syntax tokens. Form an unambiguous TokenStream.
-#[derive(Clone, PartialEq)]
-pub enum Token {
- Key(Keyword), // keyword identifiers.
- Word(String), // non-keyword identifiers.
- Op(String), // operators. separate tokens lexographically so the parser knows to parse infix notation.
- Num(String), // numeric value, ex. 413, 0b101011, 0xabcd
- Lit(Literal), // literal value, ex. for strings/comments.
- Sep(Punctuation), // punctuation. non-word tokens. ~~operators are lexed as this and later transformed to words.~~
- Indent(usize), // indentation. denotes line breaks and scope at which a line starts.
-}
-
+/// Literal text. This can be chars, strings, and comments.
#[derive(Clone, PartialEq)]
pub enum Literal {
Char(String),
@@ -51,7 +56,7 @@ pub enum Literal {
}
/// Keywords, made explicit for easier use with Rust.
-/// (strings inside match patterns are fucky!!)
+/// (static strings inside match patterns are fucky!!)
#[derive(Clone, PartialEq)]
pub enum Keyword {
Pub, Let, Var, Const,
@@ -86,8 +91,8 @@ pub enum Punctuation {
GenericRightBracket, // ]
ArrayLeftBracket, // [
ArrayRightBracket, // ]
- ScopeLeftBrace, // {
- ScopeRightBrace, // }
+ DependentLeftBrace, // { (not currently used)
+ DependentRightBrace, // }
StructLeftBrace, // {
StructRightBrace, // }
Equals, // =
@@ -110,320 +115,371 @@ pub enum Punctuation {
Backslash, // \
}
-/// Parses whitespace-sensitive code into an unambiguous TokenStream.
-/// Also useful for formatting.
-pub fn tokenize(input: &str) -> Result<TokenStream> {
- // The design of this lexer utilizes to great extent multipeek's arbitrary peeking.
- // Tokens are matched by looping within their case until complete.
- // This then eliminates the need for most global parser state. (i hate state)
+struct Input<'a>(multipeek::MultiPeek<std::str::CharIndices<'a>>);
- use Token::*;
- use Literal::*;
- use Punctuation::*;
- use LexicalError::*;
- enum Paren { Func, Tuple }
- enum Bracket { Generic, Array }
- struct State {
- start_of_line: bool,
- paren_stack: Vec<Paren>,
- bracket_stack: Vec<Bracket>,
+impl<'a> Input<'a> {
+ /// Map input.next() to return Results for use with the propagation operator
+ fn next(&mut self) -> Result<char> {
+ self.0.next().map(|x| x.1).ok_or(EarlyEndOfInput.into())
+ }
+
+ fn peek_eat(&mut self, expected: char) -> bool {
+ if self.peek().is_ok_and(|x| x == expected) {
+ self.0.next();
+ true
+ } else {
+ false
+ }
}
+ fn peek(&mut self) -> Result<char> {
+ self.0.peek().copied().map(|x| x.1).ok_or(EarlyEndOfInput.into())
+ }
+
+ fn peek_nth(&mut self, n: usize) -> Result<char> {
+ self.0.peek_nth(n).copied().map(|x| x.1).ok_or(EarlyEndOfInput.into())
+ }
+
+ fn consume_indentation(&mut self) -> usize {
+ let mut count = 0;
+ while self.peek_eat(' ') {
+ count += 1;
+ }
+ if self.peek_eat('\n') {
+ self.consume_indentation()
+ } else {
+ count
+ }
+ }
+
+ fn strip(&mut self) {
+ loop {
+ if !self.peek_eat(' ') {
+ break
+ }
+ }
+ }
+}
+
+use Token::*;
+use Literal::*;
+use Punctuation::*;
+use LexicalError::*;
+enum Paren { Func, Tuple }
+enum Bracket { Generic, Array }
+struct State {
+ start_of_line: bool,
+ paren_stack: Vec<Paren>,
+ bracket_stack: Vec<Bracket>,
+ next_token_is_parameter_punctuation: bool,
+}
+
+/// Parses whitespace-sensitive code into an unambiguous TokenStream.
+pub fn lex(input: &str) -> Result<TokenStream> {
+ let mut input = Input(multipeek(input.char_indices()));
+ let mut res = TokenStream(Vec::new());
let mut state = State {
start_of_line: true,
paren_stack: vec!(),
bracket_stack: vec!(),
+ next_token_is_parameter_punctuation: false,
};
+ res.0.push((0, Indent(input.consume_indentation())));
+ loop {
+ input.strip();
+ match input.0.next() {
+ Some((pos, c)) => res.0.push((pos, lex_token(&mut input, &mut state, pos, c)?)),
+ None => break
+ }
+ }
+ Ok(res)
+}
- let mut buf = String::new();
- let mut res = Vec::new();
-
- // `char` in rust is four bytes it's fine
- let mut input = multipeek(input.chars());
- while let Some(c) = input.next() {
- match c {
- ' ' => { // indentation! and whitespace
- match res.last() {
- Some(Indent(_)) => { // indentation!
- res.pop(); // discard previous empty or useless Indent token
- let mut current_indent_level = 1;
- while let Some(x) = input.peek() {
- match x {
- ' ' => { current_indent_level += 1; input.next(); },
- _ => match res.last() { // indentation ends
- Some(Word(a)) if a == "==" || a == "and" || a == "or" ||
- a == "xor" || a == "in" || a == "is" => break,
- Some(Sep(FuncLeftParen)) | Some(Sep(TupleLeftParen)) |
- Some(Sep(GenericLeftBracket)) | Some(Sep(ArrayLeftBracket)) |
- Some(Sep(StructLeftBrace)) | Some(Sep(Comma)) => break,
- _ => {
- res.push(Indent(current_indent_level));
- break;
- }
- }
- }
- }
- },
- _ => { // get rid of excess (all) whitespace between words/operators
- while input.peek().is_some_and(|x| x.is_whitespace() && x != &'\n') { input.next(); }
+fn lex_token(input: &mut Input, state: &mut State, pos: usize, c: char) -> Result<Token> {
+ Ok(match c {
+ '\n' => Indent(input.consume_indentation()),
+ '\t' => return Err(InvalidIndentation(pos).into()),
+ ' ' => unreachable!(),
+ ':' => Sep(Colon),
+ '=' => Sep(Equals),
+ '\'' => lex_char(input)?,
+ '"' => lex_string(input)?,
+ '#' => lex_comment(input)?,
+ c if c.is_alphabetic() || c == '_' => lex_identifier(input, state, c), // valid identifiers!
+ c if c.is_digit(10) => lex_number(input, c), // numeric literals!
+ '-' => {
+ // `-` is special. it can be the *prefix* operator "Negative", or part of a regular operator.
+ match input.peek()? {
+ ' ' => Sep(Minus),
+ _ => Sep(Negative)
+ }
+ },
+ '(' => {
+ if state.next_token_is_parameter_punctuation {
+ state.next_token_is_parameter_punctuation = false;
+ state.paren_stack.push(Paren::Func);
+ Sep(FuncLeftParen)
+ } else {
+ state.paren_stack.push(Paren::Tuple);
+ Sep(TupleLeftParen)
+ }
+ },
+ '[' => {
+ if state.next_token_is_parameter_punctuation {
+ state.next_token_is_parameter_punctuation = false;
+ state.bracket_stack.push(Bracket::Generic);
+ Sep(GenericLeftBracket)
+ } else {
+ state.bracket_stack.push(Bracket::Array);
+ Sep(ArrayLeftBracket)
+ }
+ },
+ ')' => {
+ match state.paren_stack.pop() {
+ Some(Paren::Func) => Sep(FuncRightParen),
+ Some(Paren::Tuple) => Sep(TupleRightParen),
+ None => return Err(MismatchedParens(pos).into()),
+ }
+ },
+ ']' => {
+ match state.bracket_stack.pop() {
+ Some(Bracket::Generic) => {
+ if let Ok('(') = input.peek() {
+ state.next_token_is_parameter_punctuation = true;
}
+ Sep(GenericRightBracket)
+ },
+ Some(Bracket::Array) => Sep(ArrayRightBracket),
+ None => return Err(MismatchedBrackets(pos).into()),
+ }
+ },
+ '`' => { // backticks are used for injections, so generics/parameters may follow
+ // todo: SERIOUS ambiguity issue with `injection`[]
+ match input.peek() {
+ Ok('(') | Ok('[') => {
+ state.next_token_is_parameter_punctuation = true;
+ },
+ _ => ()
+ }
+ Sep(BackTick)
+ },
+ ',' => Sep(Comma),
+ '.' => Sep(Period),
+ ';' => Sep(Semicolon),
+ '{' => Sep(StructLeftBrace),
+ '}' => Sep(StructRightBrace),
+ '+' => Sep(Plus),
+ '*' => Sep(Times),
+ '/' => Sep(Slash),
+ '<' => Sep(LessThan),
+ '>' => Sep(GreaterThan),
+ '@' => Sep(At),
+ '$' => Sep(Sha),
+ '~' => Sep(Tilde),
+ '&' => Sep(And),
+ '|' => Sep(Or),
+ '!' => Sep(Exclamation),
+ '?' => Sep(Question),
+ '^' => Sep(Caret),
+ '\\' => Sep(Backslash),
+ _ => return Err(UnknownPunctuation(pos).into())
+ })
+}
+
+/// Char ::= '\'' <anything> '\''
+fn lex_char(input: &mut Input) -> Result<Token> {
+ let mut buf = String::new();
+ loop {
+ match input.next()? {
+ '\'' => break,
+ '\\' => {
+ if let y = input.next()? {
+ buf.push('\\');
+ buf.push(y);
}
},
- '\t' => return Err(InvalidIndentation.into()),
- '\n' => res.push(Indent(0)),
- '\'' => { // chars!
- while let Some(x) = input.next() {
- match x {
- '\'' => break,
- '\\' => if let Some(y) = input.next() { buf.push(y) },
- _ => buf.push(x)
- }
+ x => buf.push(x)
+ }
+ }
+ Ok(Lit(Char(buf)))
+}
+
+/// String ::= '"' <anything> '"' | '"""' <anything> '"""'
+fn lex_string(input: &mut Input) -> Result<Token> {
+ let mut buf = String::new();
+ match (input.peek_nth(0), input.peek_nth(1)) {
+ (Ok('"'), Ok('"')) => { // triple quoted strings
+ input.next()?; input.next()?;
+ loop {
+ match input.next()? {
+ '"' if input.peek_nth(0)? == '"' &&
+ input.peek_nth(1)? == '"' => {
+ input.next()?; input.next()?;
+ break;
+ },
+ x => buf.push(x)
}
- res.push(Lit(Char(String::from(&buf))));
- },
- '"' => { // strings!
- match (input.peek_nth(0).copied(), input.peek_nth(1).copied()) {
- (Some('"'), Some('"')) => { // triple quoted strings
- input.next(); input.next();
- while let Some(x) = input.next() {
- match x {
- '"' if input.peek_nth(0) == Some(&'"') &&
- input.peek_nth(1) == Some(&'"') => {
- input.next(); input.next();
- break;
- },
- _ => buf.push(x)
- }
- }
- res.push(Lit(MultiLineString(String::from(&buf))));
+ }
+ Ok(Lit(MultiLineString(buf)))
+ },
+ _ => { // single quoted strings
+ loop {
+ match input.next()? {
+ '"' => break,
+ '\\' => if let y = input.next()? {
+ buf.push(y) // FIXME
},
- (_, _) => { // single quoted strings
- while let Some(x) = input.next() {
- match x {
- '"' => break,
- '\\' => if let Some(y) = input.next() { buf.push(y) },
- _ => buf.push(x)
- }
- }
- res.push(Lit(SingleLineString(String::from(&buf))));
- }
+ x => buf.push(x)
}
- },
- '#' => { // comments!
- match input.peek() {
- Some('[') => { // block comment, can be nested
- input.next();
- let mut comment_level = 1;
- while let Some(x) = input.next() && comment_level > 0 {
- match x {
- '#' if input.peek() == Some(&'[') => {
- comment_level += 1;
- input.next();
- },
- ']' if input.peek() == Some(&'#') => {
- comment_level -= 1;
- input.next();
- },
- _ => buf.push(x)
- }
- }
- res.push(Lit(MultiLineComment(String::from(&buf))));
+ }
+ Ok(Lit(SingleLineString(buf)))
+ }
+ }
+}
+
+/// Comment ::= '#' <anything> '\n' | '#[' Comment | <anything> ']#'
+fn lex_comment(input: &mut Input) -> Result<Token> {
+ let mut buf = String::new();
+ match input.peek() {
+ Ok('[') => { // block comment, can be nested
+ input.next()?;
+ let mut comment_level = 1;
+ while comment_level > 0 {
+ match input.next()? {
+ '#' if input.peek_eat('[') => {
+ comment_level += 1;
+ buf.push_str("#[");
},
- Some(&'#') => { // documentation comment
- input.next();
- while let Some(x) = input.peek() {
- match x {
- '\n' => break,
- _ => {
- buf.push(*x);
- }
- }
- input.next();
- }
- res.push(Lit(DocComment(String::from(&buf))));
+ ']' if input.peek_eat('#') => {
+ comment_level -= 1;
+ buf.push_str("]#");
},
- _ => { // standard comment, runs til EOL
- while let Some(x) = input.peek() {
- match x {
- '\n' => break,
- _ => {
- buf.push(*x);
- }
- }
- input.next();
- }
- res.push(Lit(Comment(String::from(&buf))));
- }
- }
- },
- c if c.is_alphabetic() || c == '_' => { // valid identifiers!
- buf.push(c);
- while let Some(x) = input.peek() {
- match x {
- x if x.is_alphanumeric() || x == &'_' => {
- buf.push(*x);
- input.next();
- },
- _ => {
- use Keyword::*;
- match buf.as_str() { // keywords!
- "pub" => res.push(Key(Pub)),
- "let" => res.push(Key(Let)),
- "var" => res.push(Key(Var)),
- "const" => res.push(Key(Const)),
- "func" => res.push(Key(Func)),
- "macro" => res.push(Key(Macro)),
- "type" => res.push(Key(Type)),
- "mod" => res.push(Key(Mod)),
- "use" => res.push(Key(Use)),
- "as" => res.push(Key(As)),
- "for" => res.push(Key(For)),
- "while" => res.push(Key(While)),
- "loop" => res.push(Key(Loop)),
- "block" => res.push(Key(Block)),
- "static" => res.push(Key(Static)),
- "if" => res.push(Key(If)),
- "when" => res.push(Key(When)),
- "elif" => res.push(Key(Elif)),
- "else" => res.push(Key(Else)),
- "match" => res.push(Key(Match)),
- "where" => res.push(Key(Where)),
- "try" => res.push(Key(Try)),
- "catch" => res.push(Key(Catch)),
- "finally" => res.push(Key(Finally)),
- "struct" => res.push(Key(Struct)),
- "tuple" => res.push(Key(Tuple)),
- "enum" => res.push(Key(Enum)),
- "union" => res.push(Key(Union)),
- "interface" => res.push(Key(Interface)),
- "distinct" => res.push(Key(Distinct)),
- "ref" => res.push(Key(Ref)),
- "ptr" => res.push(Key(Ptr)),
- "mut" => res.push(Key(Mut)),
- "break" => res.push(Key(Break)),
- "continue" => res.push(Key(Continue)),
- "return" => res.push(Key(Return)),
- "in" => res.push(Key(In)),
- "is" => res.push(Key(Is)),
- "of" => res.push(Key(Of)),
- _ => res.push(Word(String::from(&buf)))
- }
- match x { // () and [] denote both parameters/generics and tuples/arrays
- '(' => { // we must disambiguate by treating those *directly* after words as such
- res.push(Sep(FuncLeftParen));
- state.paren_stack.push(Paren::Func);
- input.next();
- },
- '[' => {
- res.push(Sep(GenericLeftBracket));
- state.bracket_stack.push(Bracket::Generic);
- input.next();
- },
- _ => {},
- }
- break;
- }
+ x => buf.push(x)
+ };
+ }
+ Ok(Lit(MultiLineComment(buf)))
+ },
+ Ok('#') => { // documentation comment
+ input.next()?;
+ while let Ok(x) = input.peek() {
+ match x {
+ '\n' => break,
+ _ => {
+ buf.push(x);
+ input.next()?;
}
}
- },
- '0'..='9' => { // numeric literals!
- buf.push(c);
- while let Some(x) = input.peek() {
- match x {
- 'a'..='z' | 'A'..='Z' | '0'..='9' | '_' => {
- buf.push(*x);
- input.next();
- },
- _ => break
+ }
+ Ok(Lit(DocComment(buf)))
+ },
+ _ => { // standard comment, runs til EOL
+ while let Ok(x) = input.peek() {
+ match x {
+ '\n' => break,
+ _ => {
+ buf.push(x);
+ input.next()?;
}
}
- res.push(Num(String::from(&buf)))
- },
- '-' => { // `-` is special. it can be the *prefix* operator "Negative", or part of a regular operator.
- match input.peek() {
- Some(' ') => res.push(Sep(Minus)),
- _ => res.push(Sep(Negative))
- }
- },
- '(' => { // note: FuncParens were matched above, directly after identifiers
- res.push(Sep(TupleLeftParen));
- state.paren_stack.push(Paren::Tuple);
- },
- '[' => { // note: GenericBrackets were matched above, directly after identifiers
- res.push(Sep(ArrayLeftBracket));
- state.bracket_stack.push(Bracket::Array);
- },
- ')' => { // match parens
- match state.paren_stack.pop() {
- Some(Paren::Func) => res.push(Sep(FuncRightParen)),
- Some(Paren::Tuple) => res.push(Sep(TupleRightParen)),
- None => return Err(MismatchedParens.into()),
- }
- },
- ']' => { // match brackets
- match state.bracket_stack.pop() {
- Some(Bracket::Generic) => res.push(Sep(GenericRightBracket)),
- Some(Bracket::Array) => res.push(Sep(ArrayRightBracket)),
- None => return Err(MismatchedBrackets.into()),
- }
- if input.peek() == Some(&'(') { // parameters following generics
- res.push(Sep(FuncLeftParen));
- state.paren_stack.push(Paren::Func);
- input.next();
- }
+ }
+ Ok(Lit(Comment(buf)))
+ }
+ }
+}
+
+#[allow(unused_assignments)]
+fn lex_identifier(input: &mut Input, state: &mut State, c: char) -> Token {
+ use Keyword::*;
+ let mut buf = String::new();
+ let mut keyword = None;
+ buf.push(c);
+ loop {
+ match input.peek() {
+ Ok(x) if x.is_alphanumeric() || x == '_' => {
+ buf.push(x);
+ let _ = input.next();
},
- '`' => { // backticks are used for operators, so generics/parameters may follow
- res.push(Sep(BackTick)); // todo: backticks could like not be used for operators
- match input.peek() {
- Some('(') => {
- res.push(Sep(FuncLeftParen));
- state.paren_stack.push(Paren::Func);
- input.next();
- },
- Some('[') => {
- res.push(Sep(GenericLeftBracket));
- state.bracket_stack.push(Bracket::Generic);
- input.next();
- },
- _ => {}
- }
+ _ => {
+ keyword = Some(match buf.as_str() { // keywords!
+ "pub" => Key(Pub),
+ "let" => Key(Let),
+ "var" => Key(Var),
+ "const" => Key(Const),
+ "func" => Key(Func),
+ "macro" => Key(Macro),
+ "type" => Key(Type),
+ "mod" => Key(Mod),
+ "use" => Key(Use),
+ "as" => Key(As),
+ "for" => Key(For),
+ "while" => Key(While),
+ "loop" => Key(Loop),
+ "block" => Key(Block),
+ "static" => Key(Static),
+ "if" => Key(If),
+ "when" => Key(When),
+ "elif" => Key(Elif),
+ "else" => Key(Else),
+ "match" => Key(Match),
+ "where" => Key(Where),
+ "try" => Key(Try),
+ "catch" => Key(Catch),
+ "finally" => Key(Finally),
+ "struct" => Key(Struct),
+ "tuple" => Key(Tuple),
+ "enum" => Key(Enum),
+ "union" => Key(Union),
+ "interface" => Key(Interface),
+ "distinct" => Key(Distinct),
+ "ref" => Key(Ref),
+ "ptr" => Key(Ptr),
+ "mut" => Key(Mut),
+ "in" => Key(In),
+ "is" => Key(Is),
+ "of" => Key(Of),
+ "break" => Key(Break),
+ "continue" => Key(Continue),
+ "return" => Key(Return),
+ _ => Ident(String::from(&buf))
+ });
+ break
+ }
+ }
+ }
+ // () and [] denote both parameters/generics and tuples/arrays
+ // we must disambiguate by treating those *directly* after words as such
+ match input.peek() {
+ Ok('(') | Ok('[') => state.next_token_is_parameter_punctuation = true,
+ _ => (),
+ }
+ keyword.unwrap()
+}
+
+fn lex_number(input: &mut Input, c: char) -> Token {
+ let mut buf = String::new();
+ buf.push(c);
+ loop {
+ match input.peek() {
+ Ok(x) if x.is_alphanumeric() => { // FIXME
+ buf.push(x);
+ let _ = input.next();
},
- ',' => res.push(Sep(Comma)),
- '.' => res.push(Sep(Period)),
- ';' => res.push(Sep(Semicolon)),
- ':' => res.push(Sep(Colon)),
- '{' => res.push(Sep(StructLeftBrace)),
- '}' => res.push(Sep(StructRightBrace)),
- '=' => res.push(Sep(Equals)),
- '+' => res.push(Sep(Plus)),
- '*' => res.push(Sep(Times)),
- '/' => res.push(Sep(Slash)),
- '<' => res.push(Sep(LessThan)),
- '>' => res.push(Sep(GreaterThan)),
- '@' => res.push(Sep(At)),
- '$' => res.push(Sep(Sha)),
- '~' => res.push(Sep(Tilde)),
- '&' => res.push(Sep(And)),
- '|' => res.push(Sep(Or)),
- '!' => res.push(Sep(Exclamation)),
- '?' => res.push(Sep(Question)),
- '^' => res.push(Sep(Caret)),
- '\\' => res.push(Sep(Backslash)),
- _ => return Err(UnknownPunctuation.into())
+ _ => break
}
- buf.clear();
}
- Ok(TokenStream(res))
+ Num(buf)
}
impl std::fmt::Display for TokenStream {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
use Token::*;
let mut prev_token = Indent(0);
- for token in &self.0 {
- match (&prev_token, &token) {
- (Word(_), Word(_)) | (Word(_), Num(_)) |
- (Num(_), Word(_)) | (Num(_), Num(_)) => write!(f, " {}", token)?,
+ for (_, token) in &self.0 {
+ match (prev_token, token) { // fixme
+ (Ident(_), Ident(_)) | (Ident(_), Num(_)) |
+ (Num(_), Ident(_)) | (Num(_), Num(_)) |
+ (Key(_), Key(_)) | (Key(_), Ident(_)) | (Ident(_), Key(_))
+ => write!(f, " {}", token)?,
_ => write!(f, "{}", token)?,
}
prev_token = token.clone();
@@ -437,11 +493,11 @@ impl std::fmt::Display for Token {
use Token::*;
match self {
Key(word) => write!(f, "{}", word),
- Word(val) => write!(f, "{}", val),
- Num(val) => write!(f, "{}", val),
+ Ident(val) | Op(val) | Num(val) => write!(f, "{}", val),
Lit(lit) => write!(f, "{}", lit),
Sep(sep) => write!(f, "{}", sep),
Indent(i) => write!(f, "\n{}", " ".repeat(*i)),
+ End => write!(f, "end"),
}
}
}
@@ -526,8 +582,8 @@ impl std::fmt::Display for Punctuation {
GenericRightBracket => write!(f, "]"),
ArrayLeftBracket => write!(f, " ["),
ArrayRightBracket => write!(f, "]"),
- ScopeLeftBrace => write!(f, "{{"),
- ScopeRightBrace => write!(f, "}}"),
+ DependentLeftBrace => write!(f, "{{"),
+ DependentRightBrace => write!(f, "}}"),
StructLeftBrace => write!(f, "{{"),
StructRightBrace => write!(f, "}}"),
Equals => write!(f, "="),
@@ -551,3 +607,9 @@ impl std::fmt::Display for Punctuation {
}
}
}
+
+pub fn test() {
+ let input = std::fs::read_to_string("std/ast.pk").unwrap();
+ let lexed = lex(&input).unwrap();
+ println!("{}", lexed);
+}