diff options
Diffstat (limited to 'src/lex.rs')
-rw-r--r-- | src/lex.rs | 107 |
1 files changed, 105 insertions, 2 deletions
@@ -30,7 +30,8 @@ impl std::error::Error for LexicalError {} /// **Basic** syntax tokens. Form an unambiguous TokenStream. #[derive(Clone, PartialEq)] pub enum Token { - Word(String), // identifiers. + Key(Keyword), // keyword identifiers. + Word(String), // non-keyword identifiers. Num(String), // numeric value, ex. 413, 0b101011, 0xabcd Lit(Literal), // literal value, ex. for strings/comments. Sep(Punctuation), // punctuation. non-word tokens. operators are lexed as this and later transformed to words. @@ -47,6 +48,23 @@ pub enum Literal { MultiLineComment(String), } +/// Keywords, made explicit for easier use with Rust. +/// (strings inside match patterns are fucky!!) +#[derive(Clone, PartialEq)] +pub enum Keyword { + Pub, Let, Var, Const, + Func, Macro, Type, + Mod, From, Import, + For, While, Loop, + Block, Static, + If, When, Elif, Else, Match, + Try, Catch, Finally, + Struct, Tuple, Enum, Union, Interface, + Distinct, Ref, // todo: Mut once figured out + Break, Continue, Return, + In, Is, Of, As, +} + /// All punctuation recognized by the lexer. /// Note the distinction between FuncLeftParen and TupleLeftParen. #[derive(Clone, PartialEq)] @@ -243,7 +261,47 @@ pub fn tokenize(input: &str) -> Result<TokenStream> { input.next(); }, _ => { - res.push(Word(String::from(&buf))); + use Keyword::*; + match buf.as_str() { // keywords! + "pub" => res.push(Key(Pub)), + "let" => res.push(Key(Let)), + "var" => res.push(Key(Var)), + "const" => res.push(Key(Const)), + "func" => res.push(Key(Func)), + "macro" => res.push(Key(Macro)), + "type" => res.push(Key(Type)), + "mod" => res.push(Key(Mod)), + "from" => res.push(Key(From)), + "import" => res.push(Key(Import)), + "for" => res.push(Key(For)), + "while" => res.push(Key(While)), + "loop" => res.push(Key(Loop)), + "block" => res.push(Key(Block)), + "static" => res.push(Key(Static)), + "if" => res.push(Key(If)), + "when" => res.push(Key(When)), + "elif" => res.push(Key(Elif)), + "else" => res.push(Key(Else)), + "match" => res.push(Key(Match)), + "try" => res.push(Key(Try)), + "catch" => res.push(Key(Catch)), + "finally" => res.push(Key(Finally)), + "struct" => res.push(Key(Struct)), + "tuple" => res.push(Key(Tuple)), + "enum" => res.push(Key(Enum)), + "union" => res.push(Key(Union)), + "interface" => res.push(Key(Interface)), + "distinct" => res.push(Key(Distinct)), + "ref" => res.push(Key(Ref)), + "break" => res.push(Key(Break)), + "continue" => res.push(Key(Continue)), + "return" => res.push(Key(Return)), + "in" => res.push(Key(In)), + "is" => res.push(Key(Is)), + "of" => res.push(Key(Of)), + "as" => res.push(Key(As)), + _ => res.push(Word(String::from(&buf))) + } match x { // () and [] denote both parameters/generics and tuples/arrays '(' => { // we must disambiguate by treating those *directly* after words as such res.push(Sep(FuncLeftParen)); @@ -372,6 +430,7 @@ impl std::fmt::Display for Token { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { use Token::*; match self { + Key(word) => write!(f, "{}", word), Word(val) => write!(f, "{}", val), Num(val) => write!(f, "{}", val), Lit(lit) => write!(f, "{}", lit), @@ -395,6 +454,50 @@ impl std::fmt::Display for Literal { } } +impl std::fmt::Display for Keyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + use Keyword::*; + match self { + Pub => write!(f, "pub"), + Let => write!(f, "let"), + Var => write!(f, "var"), + Const => write!(f, "const"), + Func => write!(f, "func"), + Macro => write!(f, "macro"), + Type => write!(f, "type"), + Mod => write!(f, "mod"), + From => write!(f, "from"), + Import => write!(f, "import"), + For => write!(f, "for"), + While => write!(f, "while"), + Loop => write!(f, "loop"), + Block => write!(f, "block"), + Static => write!(f, "static"), + If => write!(f, "if"), + When => write!(f, "when"), + Elif => write!(f, "elif"), + Else => write!(f, "else"), + Match => write!(f, "match"), + Try => write!(f, "try"), + Catch => write!(f, "catch"), + Finally => write!(f, "finally"), + Struct => write!(f, "struct"), + Tuple => write!(f, "tuple"), + Enum => write!(f, "enum"), + Union => write!(f, "union"), + Interface => write!(f, "interface"), + Distinct => write!(f, "distinct"), + Ref => write!(f, "ref"), + Break => write!(f, "break"), + Continue => write!(f, "continue"), + Return => write!(f, "return"), + In => write!(f, "in"), + Is => write!(f, "is"), + Of => write!(f, "of"), + As => write!(f, "as"), + } + } +} impl std::fmt::Display for Punctuation { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { use Punctuation::*; |