aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/lex.rs96
1 files changed, 91 insertions, 5 deletions
diff --git a/src/lex.rs b/src/lex.rs
index 7e31476..49be668 100644
--- a/src/lex.rs
+++ b/src/lex.rs
@@ -1,7 +1,7 @@
use multipeek::multipeek;
pub type Result<T> = core::result::Result<T, Box<dyn std::error::Error>>;
-pub type TokenStream = Vec<Token>;
+pub struct TokenStream(Vec<Token>);
#[derive(Clone, PartialEq, Debug)]
pub enum LexicalError {
@@ -35,6 +35,7 @@ pub enum Literal {
MultiLineString(String),
Comment(String),
DocComment(String),
+ MultiLineComment(String),
}
/// All punctuation recognized by the lexer.
@@ -80,7 +81,6 @@ pub enum Punctuation {
/// Parses whitespace-sensitive code into an unambiguous TokenStream.
/// Also useful for formatting.
-// todo: rewrite indentation parsing to do what nim does, annotate tokens with indentation preceding
pub fn tokenize(input: &str) -> Result<TokenStream> {
// The design of this lexer utilizes to great extent multipeek's arbitrary peeking.
// Tokens are matched by looping within their case until complete.
@@ -118,7 +118,7 @@ pub fn tokenize(input: &str) -> Result<TokenStream> {
let mut current_indent_level = 1;
while let Some(x) = input.peek() {
match x {
- ' ' => current_indent_level += 1,
+ ' ' => { current_indent_level += 1; input.next(); },
_ => match res.last() { // indentation ends
Some(Word(a)) if a == "==" || a == "and" || a == "or" ||
a == "xor" || a == "in" || a == "is" => break,
@@ -196,7 +196,7 @@ pub fn tokenize(input: &str) -> Result<TokenStream> {
_ => buf.push(x)
}
}
- res.push(Lit(Comment(String::from(&buf))));
+ res.push(Lit(MultiLineComment(String::from(&buf))));
},
Some(&'#') => { // documentation comment
input.next();
@@ -318,5 +318,91 @@ pub fn tokenize(input: &str) -> Result<TokenStream> {
}
buf.clear();
}
- return Ok(res);
+ Ok(TokenStream(res))
+}
+
+impl std::fmt::Display for TokenStream {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ use Token::*;
+ let mut prev_token = Indent(0);
+ for token in &self.0 {
+ match (&prev_token, &token) {
+ (Word(_), Word(_)) | (Word(_), Num(_)) |
+ (Num(_), Word(_)) | (Num(_), Num(_)) => write!(f, " {}", token)?,
+ _ => write!(f, "{}", token)?,
+ }
+ prev_token = token.clone();
+ }
+ Ok(())
+ }
+}
+
+impl std::fmt::Display for Token {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ use Token::*;
+ match self {
+ Word(token) => write!(f, "{}", token),
+ Num(token) => write!(f, "{}", token),
+ Lit(lit) => write!(f, "{}", lit),
+ Sep(sep) => write!(f, "{}", sep),
+ Indent(i) => write!(f, "\n{}", " ".repeat(*i)),
+ }
+ }
+}
+
+impl std::fmt::Display for Literal {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ use Literal::*;
+ match self {
+ Char(token) => write!(f, "'{}'", token),
+ SingleLineString(token) => write!(f, "\"{}\"", token),
+ MultiLineString(token) => write!(f, "\"\"\"{}\"\"\"", token),
+ Comment(token) => write!(f, "#{}", token),
+ DocComment(token) => write!(f, "##{}", token),
+ MultiLineComment(token) => write!(f, "#[{}]#", token),
+ }
+ }
+}
+
+impl std::fmt::Display for Punctuation {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ use Punctuation::*;
+ match self {
+ Comma => write!(f, ","),
+ Period => write!(f, "."),
+ Semicolon => write!(f, ";"),
+ Colon => write!(f, ":"),
+ BackTick => write!(f, "`"),
+ SingleQuote => write!(f, "'"),
+ DoubleQuote => write!(f, "\""),
+ FuncLeftParen => write!(f, "("),
+ FuncRightParen => write!(f, ")"),
+ TupleLeftParen => write!(f, " ("),
+ TupleRightParen => write!(f, ")"),
+ GenericLeftBracket => write!(f, "["),
+ GenericRightBracket => write!(f, "]"),
+ ArrayLeftBracket => write!(f, " ["),
+ ArrayRightBracket => write!(f, "]"),
+ StructLeftBrace => write!(f, "{{"),
+ StructRightBrace => write!(f, "}}"),
+ Equals => write!(f, "="),
+ Plus => write!(f, "+"),
+ Minus => write!(f, "- "),
+ Negative => write!(f, "-"),
+ Times => write!(f, "*"),
+ Slash => write!(f, "/"),
+ LessThan => write!(f, "<"),
+ GreaterThan => write!(f, ">"),
+ At => write!(f, "@"),
+ Sha => write!(f, "$"),
+ Tilde => write!(f, "~"),
+ And => write!(f, "&"),
+ Percent => write!(f, "%"),
+ Or => write!(f, "|"),
+ Exclamation => write!(f, "!"),
+ Question => write!(f, "?"),
+ Caret => write!(f, "^"),
+ Backslash => write!(f, "\\"),
+ }
+ }
}