compiler: impl Display for TokenStream

author: JJ 2023-10-26 20:58:17 +0000
committer: JJ 2023-10-26 20:59:32 +0000
commit: a323c5cfcadcfd464db5583136d5e149e5345da2 (patch)
tree: da72955af68d8dc1d05da63bb0991239600b573c
parent: 292d4c2748e06a639545ae23c9ee31360c3b76f3 (diff)
1 files changed, 91 insertions, 5 deletions
diff --git a/src/lex.rs b/src/lex.rs
index 7e31476..49be668 100644
--- a/src/lex.rs
+++ b/src/lex.rs
@@ -1,7 +1,7 @@
 use multipeek::multipeek;
 
 pub type Result<T> = core::result::Result<T, Box<dyn std::error::Error>>;
-pub type TokenStream = Vec<Token>;
+pub struct TokenStream(Vec<Token>);
 
 #[derive(Clone, PartialEq, Debug)]
 pub enum LexicalError {
@@ -35,6 +35,7 @@ pub enum Literal {
     MultiLineString(String),
     Comment(String),
     DocComment(String),
+    MultiLineComment(String),
 }
 
 /// All punctuation recognized by the lexer.
@@ -80,7 +81,6 @@ pub enum Punctuation {
 
 /// Parses whitespace-sensitive code into an unambiguous TokenStream.
 /// Also useful for formatting.
-// todo: rewrite indentation parsing to do what nim does, annotate tokens with indentation preceding
 pub fn tokenize(input: &str) -> Result<TokenStream> {
     // The design of this lexer utilizes to great extent multipeek's arbitrary peeking.
     // Tokens are matched by looping within their case until complete.
@@ -118,7 +118,7 @@ pub fn tokenize(input: &str) -> Result<TokenStream> {
                         let mut current_indent_level = 1;
                         while let Some(x) = input.peek() {
                             match x {
-                                ' ' => current_indent_level += 1,
+                                ' ' => { current_indent_level += 1; input.next(); },
                                 _ => match res.last() { // indentation ends
                                     Some(Word(a)) if a == "==" || a == "and" || a == "or" ||
                                                      a == "xor" || a == "in" || a == "is" => break,
@@ -196,7 +196,7 @@ pub fn tokenize(input: &str) -> Result<TokenStream> {
                                 _ => buf.push(x)
                             }
                         }
-                        res.push(Lit(Comment(String::from(&buf))));
+                        res.push(Lit(MultiLineComment(String::from(&buf))));
                     },
                     Some(&'#') => { // documentation comment
                         input.next();
@@ -318,5 +318,91 @@ pub fn tokenize(input: &str) -> Result<TokenStream> {
         }
         buf.clear();
     }
-    return Ok(res);
+    Ok(TokenStream(res))
+}
+
+impl std::fmt::Display for TokenStream {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        use Token::*;
+        let mut prev_token = Indent(0);
+        for token in &self.0 {
+            match (&prev_token, &token) {
+                (Word(_), Word(_)) | (Word(_), Num(_)) |
+                (Num(_), Word(_)) | (Num(_), Num(_)) => write!(f, " {}", token)?,
+                _ => write!(f, "{}", token)?,
+            }
+            prev_token = token.clone();
+        }
+        Ok(())
+    }
+}
+
+impl std::fmt::Display for Token {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        use Token::*;
+        match self {
+            Word(token) => write!(f, "{}", token),
+            Num(token) => write!(f, "{}", token),
+            Lit(lit) => write!(f, "{}", lit),
+            Sep(sep) => write!(f, "{}", sep),
+            Indent(i) => write!(f, "\n{}", " ".repeat(*i)),
+        }
+    }
+}
+
+impl std::fmt::Display for Literal {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        use Literal::*;
+        match self {
+            Char(token) => write!(f, "'{}'", token),
+            SingleLineString(token) => write!(f, "\"{}\"", token),
+            MultiLineString(token) => write!(f, "\"\"\"{}\"\"\"", token),
+            Comment(token) => write!(f, "#{}", token),
+            DocComment(token) => write!(f, "##{}", token),
+            MultiLineComment(token) => write!(f, "#[{}]#", token),
+        }
+    }
+}
+
+impl std::fmt::Display for Punctuation {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        use Punctuation::*;
+        match self {
+            Comma => write!(f, ","),
+            Period => write!(f, "."),
+            Semicolon => write!(f, ";"),
+            Colon => write!(f, ":"),
+            BackTick => write!(f, "`"),
+            SingleQuote => write!(f, "'"),
+            DoubleQuote => write!(f, "\""),
+            FuncLeftParen => write!(f, "("),
+            FuncRightParen => write!(f, ")"),
+            TupleLeftParen => write!(f, " ("),
+            TupleRightParen => write!(f, ")"),
+            GenericLeftBracket => write!(f, "["),
+            GenericRightBracket => write!(f, "]"),
+            ArrayLeftBracket => write!(f, " ["),
+            ArrayRightBracket => write!(f, "]"),
+            StructLeftBrace => write!(f, "{{"),
+            StructRightBrace => write!(f, "}}"),
+            Equals => write!(f, "="),
+            Plus => write!(f, "+"),
+            Minus => write!(f, "- "),
+            Negative => write!(f, "-"),
+            Times => write!(f, "*"),
+            Slash => write!(f, "/"),
+            LessThan => write!(f, "<"),
+            GreaterThan => write!(f, ">"),
+            At => write!(f, "@"),
+            Sha => write!(f, "$"),
+            Tilde => write!(f, "~"),
+            And => write!(f, "&"),
+            Percent => write!(f, "%"),
+            Or => write!(f, "|"),
+            Exclamation => write!(f, "!"),
+            Question => write!(f, "?"),
+            Caret => write!(f, "^"),
+            Backslash => write!(f, "\\"),
+        }
+    }
 }
author	JJ	2023-10-26 20:58:17 +0000
committer	JJ	2023-10-26 20:59:32 +0000
commit	a323c5cfcadcfd464db5583136d5e149e5345da2 (patch)
tree	da72955af68d8dc1d05da63bb0991239600b573c
parent	292d4c2748e06a639545ae23c9ee31360c3b76f3 (diff)