diff options
-rw-r--r-- | src/lex.rs | 96 |
1 files changed, 91 insertions, 5 deletions
@@ -1,7 +1,7 @@ use multipeek::multipeek; pub type Result<T> = core::result::Result<T, Box<dyn std::error::Error>>; -pub type TokenStream = Vec<Token>; +pub struct TokenStream(Vec<Token>); #[derive(Clone, PartialEq, Debug)] pub enum LexicalError { @@ -35,6 +35,7 @@ pub enum Literal { MultiLineString(String), Comment(String), DocComment(String), + MultiLineComment(String), } /// All punctuation recognized by the lexer. @@ -80,7 +81,6 @@ pub enum Punctuation { /// Parses whitespace-sensitive code into an unambiguous TokenStream. /// Also useful for formatting. -// todo: rewrite indentation parsing to do what nim does, annotate tokens with indentation preceding pub fn tokenize(input: &str) -> Result<TokenStream> { // The design of this lexer utilizes to great extent multipeek's arbitrary peeking. // Tokens are matched by looping within their case until complete. @@ -118,7 +118,7 @@ pub fn tokenize(input: &str) -> Result<TokenStream> { let mut current_indent_level = 1; while let Some(x) = input.peek() { match x { - ' ' => current_indent_level += 1, + ' ' => { current_indent_level += 1; input.next(); }, _ => match res.last() { // indentation ends Some(Word(a)) if a == "==" || a == "and" || a == "or" || a == "xor" || a == "in" || a == "is" => break, @@ -196,7 +196,7 @@ pub fn tokenize(input: &str) -> Result<TokenStream> { _ => buf.push(x) } } - res.push(Lit(Comment(String::from(&buf)))); + res.push(Lit(MultiLineComment(String::from(&buf)))); }, Some(&'#') => { // documentation comment input.next(); @@ -318,5 +318,91 @@ pub fn tokenize(input: &str) -> Result<TokenStream> { } buf.clear(); } - return Ok(res); + Ok(TokenStream(res)) +} + +impl std::fmt::Display for TokenStream { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + use Token::*; + let mut prev_token = Indent(0); + for token in &self.0 { + match (&prev_token, &token) { + (Word(_), Word(_)) | (Word(_), Num(_)) | + (Num(_), Word(_)) | (Num(_), Num(_)) => write!(f, " {}", token)?, + _ => write!(f, "{}", token)?, + } + prev_token = token.clone(); + } + Ok(()) + } +} + +impl std::fmt::Display for Token { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + use Token::*; + match self { + Word(token) => write!(f, "{}", token), + Num(token) => write!(f, "{}", token), + Lit(lit) => write!(f, "{}", lit), + Sep(sep) => write!(f, "{}", sep), + Indent(i) => write!(f, "\n{}", " ".repeat(*i)), + } + } +} + +impl std::fmt::Display for Literal { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + use Literal::*; + match self { + Char(token) => write!(f, "'{}'", token), + SingleLineString(token) => write!(f, "\"{}\"", token), + MultiLineString(token) => write!(f, "\"\"\"{}\"\"\"", token), + Comment(token) => write!(f, "#{}", token), + DocComment(token) => write!(f, "##{}", token), + MultiLineComment(token) => write!(f, "#[{}]#", token), + } + } +} + +impl std::fmt::Display for Punctuation { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + use Punctuation::*; + match self { + Comma => write!(f, ","), + Period => write!(f, "."), + Semicolon => write!(f, ";"), + Colon => write!(f, ":"), + BackTick => write!(f, "`"), + SingleQuote => write!(f, "'"), + DoubleQuote => write!(f, "\""), + FuncLeftParen => write!(f, "("), + FuncRightParen => write!(f, ")"), + TupleLeftParen => write!(f, " ("), + TupleRightParen => write!(f, ")"), + GenericLeftBracket => write!(f, "["), + GenericRightBracket => write!(f, "]"), + ArrayLeftBracket => write!(f, " ["), + ArrayRightBracket => write!(f, "]"), + StructLeftBrace => write!(f, "{{"), + StructRightBrace => write!(f, "}}"), + Equals => write!(f, "="), + Plus => write!(f, "+"), + Minus => write!(f, "- "), + Negative => write!(f, "-"), + Times => write!(f, "*"), + Slash => write!(f, "/"), + LessThan => write!(f, "<"), + GreaterThan => write!(f, ">"), + At => write!(f, "@"), + Sha => write!(f, "$"), + Tilde => write!(f, "~"), + And => write!(f, "&"), + Percent => write!(f, "%"), + Or => write!(f, "|"), + Exclamation => write!(f, "!"), + Question => write!(f, "?"), + Caret => write!(f, "^"), + Backslash => write!(f, "\\"), + } + } } |