diff options
author | JJ | 2023-10-31 09:49:41 +0000 |
---|---|---|
committer | JJ | 2023-10-31 09:50:21 +0000 |
commit | 1c14500ed698f1dc21b4b634a174af89b6318b07 (patch) | |
tree | 9dad300eec0585e6ee0d41cd0f8740da2e269a28 /src/frontend | |
parent | 87d74952a614daa7075aeecef462ff51c4dc46e0 (diff) |
compiler: restructure codebase
Diffstat (limited to 'src/frontend')
-rw-r--r-- | src/frontend/ast.rs | 116 | ||||
-rw-r--r-- | src/frontend/lex.rs | 542 | ||||
-rw-r--r-- | src/frontend/mod.rs | 3 | ||||
-rw-r--r-- | src/frontend/parse.rs | 297 |
4 files changed, 958 insertions, 0 deletions
diff --git a/src/frontend/ast.rs b/src/frontend/ast.rs new file mode 100644 index 0000000..6c7963e --- /dev/null +++ b/src/frontend/ast.rs @@ -0,0 +1,116 @@ +/// Representation of Tokens, function names, the like... +pub type Id = String; + +/// Puck's fundamental types. +pub enum Type { + Void, Never, + Integer, Float, String, // char et al are defined later + Func{from: Box<Type>, to: Box<Type>}, // todo: multiple params, effects + Struct(Vec<(Id, Box<Type>)>), + Tuple(Vec<(Option<String>, Box<Type>)>), + Union(Vec<(Id, Box<Type>)>), + Interface { + funcs: Vec<Sig>, + for_type: Option<Box<Type>>, + }, + Array{size: usize, kind: Box<Type>}, + List(Box<Type>), + Slice(Box<Type>), // todo: plus ownership + Reference(Box<Type>), + Mutable(Box<Type>), // parameters only + Static(Box<Type>), // parameters only + Alias{ id: Id, params: Vec<Type> }, // todo: this is wrong +} + +/// Function signatures. +pub struct Sig { + effect: Option<Id>, + id: Id, + generics: Vec<(Id, Option<Type>)>, + params: Vec<Type>, + result: Option<Type> +} + +/// Patterns are recognizable given zero context. +/// This is why there is a generic Number term and no Bool term. +/// Also can be considered to be a Term/Value. +pub enum Pattern { + Ident(Id), // type aliases, union variants, calls... + Number(i64), Float(f64), + Char(char), String(String), + Struct(Vec<StructPattern>), + Tuple(Vec<TuplePattern>), + List(Vec<Expr>), // arrays, slices, lists +} + +pub struct StructPattern { field: Id, value: Expr } +pub struct TuplePattern { field: Option<Id>, value: Expr } + +/// Expressions introduce a new binding or bindings, in some regard. +pub enum Binding { + Let { + id: Pattern, // id: Pattern supports ex. `let (a, b) = ...` + kind: Option<Type>, + value: Box<Expr> + }, + Var { + id: Pattern, + kind: Option<Type>, + value: Option<Box<Expr>> // variable bindings can be delayed + }, + Const { + public: bool, + id: Pattern, + kind: Option<Type>, + value: Box<Expr> + }, + FuncDecl { + public: bool, + effect: Option<Id>, + id: Id, + generics: Vec<GenericDecl>, + params: Vec<ParamDecl>, + kind: Type, + body: Vec<Expr> + }, + TypeDecl { id: Id, generics: Vec<Id>, alias: Type }, + Import { from: Option<Id>, imports: Vec<Id>, alias: Option<Id> }, + Module { id: Id, body: Vec<Expr> }, +} + +pub struct GenericDecl { id: Id, kind: Option<Type> } +pub struct ParamDecl { id: Id, kind: Type } + +/// Expressions related to control flow. +pub enum Control { + Call { id: Id, params: Vec<Expr> }, // function calls, macro invocations, field access... + If { + branches: Vec<CondBranch>, + else_body: Option<Vec<Expr>> + }, + Try { + body: Vec<Expr>, + catches: Vec<CatchBranch>, + finally: Option<Vec<Expr>> + }, + Match { + item: Pattern, + branches: Vec<MatchBranch> + }, + Block { id: Option<Id>, body: Vec<Expr> }, + Static { body: Vec<Expr> }, + For { binding: Pattern, range: Box<Expr>, body: Vec<Expr> }, + While { cond: Box<Expr>, body: Vec<Expr> }, + Loop { body: Vec<Expr> }, +} + +pub struct CondBranch { cond: Expr, body: Vec<Expr> } +pub struct CatchBranch { exceptions: Vec<Id>, binding: Option<Id>, body: Vec<Expr> } +pub struct MatchBranch { pattern: Pattern, guard: Option<Expr>, body: Vec<Expr> } + +/// Expressions are either Patterns, Bindings, or Control flow constructs. +pub enum Expr { + Pattern(Pattern), + Binding(Binding), + Control(Control), +} diff --git a/src/frontend/lex.rs b/src/frontend/lex.rs new file mode 100644 index 0000000..771ba38 --- /dev/null +++ b/src/frontend/lex.rs @@ -0,0 +1,542 @@ +use multipeek::multipeek; + +pub type Result<T> = core::result::Result<T, Box<dyn std::error::Error>>; +pub struct TokenStream(Vec<Token>); + +impl IntoIterator for TokenStream { + type Item = Token; + type IntoIter = std::vec::IntoIter<Token>; + + fn into_iter(self) -> Self::IntoIter { + self.0.into_iter() + } +} + +#[derive(Clone, PartialEq, Debug)] +pub enum LexicalError { + InvalidIndentation, + MismatchedParens, + MismatchedBrackets, + UnknownPunctuation, +} + +impl std::fmt::Display for LexicalError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{:?}", self) + } +} +impl std::error::Error for LexicalError {} + +/// **Basic** syntax tokens. Form an unambiguous TokenStream. +#[derive(Clone, PartialEq)] +pub enum Token { + Key(Keyword), // keyword identifiers. + Word(String), // non-keyword identifiers. + Num(String), // numeric value, ex. 413, 0b101011, 0xabcd + Lit(Literal), // literal value, ex. for strings/comments. + Sep(Punctuation), // punctuation. non-word tokens. operators are lexed as this and later transformed to words. + Indent(usize), // indentation. denotes line breaks and scope at which a line starts. +} + +#[derive(Clone, PartialEq)] +pub enum Literal { + Char(String), + SingleLineString(String), + MultiLineString(String), + Comment(String), + DocComment(String), + MultiLineComment(String), +} + +/// Keywords, made explicit for easier use with Rust. +/// (strings inside match patterns are fucky!!) +#[derive(Clone, PartialEq)] +pub enum Keyword { + Pub, Let, Var, Const, + Func, Macro, Type, + Mod, From, Import, + For, While, Loop, + Block, Static, + If, When, Elif, Else, Match, + Try, Catch, Finally, + Struct, Tuple, Enum, Union, Interface, + Distinct, Ref, // todo: Mut once figured out + Break, Continue, Return, + In, Is, Of, As, +} + +/// All punctuation recognized by the lexer. +/// Note the distinction between FuncLeftParen and TupleLeftParen. +#[derive(Clone, PartialEq)] +pub enum Punctuation { + Comma, // , + Period, // . + Semicolon, // ; + Colon, // : + BackTick, // ` + SingleQuote, // ' + DoubleQuote, // " + FuncLeftParen, // ( + FuncRightParen, // ) + TupleLeftParen, // ( + TupleRightParen, // ) + GenericLeftBracket, // [ + GenericRightBracket, // ] + ArrayLeftBracket, // [ + ArrayRightBracket, // ] + StructLeftBrace, // } + StructRightBrace, // } + Equals, // = + Plus, // + + Minus, // distinction between minus and negative. + Negative, // negative binds tightly: there is no whitespace following. + Times, // * + Slash, // / + LessThan, // < + GreaterThan, // > + At, // @ + Sha, // $ + Tilde, // ~ + And, // & + Percent, // % + Or, // | + Exclamation, // ! + Question, // ? + Caret, // ^ + Backslash, // \ +} + +/// Parses whitespace-sensitive code into an unambiguous TokenStream. +/// Also useful for formatting. +pub fn tokenize(input: &str) -> Result<TokenStream> { + // The design of this lexer utilizes to great extent multipeek's arbitrary peeking. + // Tokens are matched by looping within their case until complete. + // This then eliminates the need for most global parser state. (i hate state) + + use Token::*; + use Literal::*; + use Punctuation::*; + use LexicalError::*; + enum Paren { Func, Tuple } + enum Bracket { Generic, Array } + struct State { + start_of_line: bool, + paren_stack: Vec<Paren>, + bracket_stack: Vec<Bracket>, + } + + let mut state = State { + start_of_line: true, + paren_stack: vec!(), + bracket_stack: vec!(), + }; + + let mut buf = String::new(); + let mut res = Vec::new(); + + // `char` in rust is four bytes it's fine + let mut input = multipeek(input.chars()); + while let Some(c) = input.next() { + match c { + ' ' => { // indentation! and whitespace + match res.last() { + Some(Indent(_)) => { // indentation! + res.pop(); // discard previous empty or useless Indent token + let mut current_indent_level = 1; + while let Some(x) = input.peek() { + match x { + ' ' => { current_indent_level += 1; input.next(); }, + _ => match res.last() { // indentation ends + Some(Word(a)) if a == "==" || a == "and" || a == "or" || + a == "xor" || a == "in" || a == "is" => break, + Some(Sep(FuncLeftParen)) | Some(Sep(TupleLeftParen)) | + Some(Sep(GenericLeftBracket)) | Some(Sep(ArrayLeftBracket)) | + Some(Sep(StructLeftBrace)) | Some(Sep(Comma)) => break, + _ => { + res.push(Indent(current_indent_level)); + break; + } + } + } + } + }, + _ => { // get rid of excess (all) whitespace between words/operators + while input.peek().is_some_and(|x| x.is_whitespace() && x != &'\n') { input.next(); } + } + } + }, + '\t' => return Err(InvalidIndentation.into()), + '\n' => res.push(Indent(0)), + '\'' => { // chars! + while let Some(x) = input.next() { + match x { + '\'' => break, + '\\' => if let Some(y) = input.next() { buf.push(y) }, + _ => buf.push(x) + } + } + res.push(Lit(Char(String::from(&buf)))); + }, + '"' => { // strings! + match (input.peek_nth(0).copied(), input.peek_nth(1).copied()) { + (Some('"'), Some('"')) => { // triple quoted strings + input.next(); input.next(); + while let Some(x) = input.next() { + match x { + '"' if input.peek_nth(0) == Some(&'"') && + input.peek_nth(1) == Some(&'"') => { + input.next(); input.next(); + break; + }, + _ => buf.push(x) + } + } + res.push(Lit(MultiLineString(String::from(&buf)))); + }, + (_, _) => { // single quoted strings + while let Some(x) = input.next() { + match x { + '"' => break, + '\\' => if let Some(y) = input.next() { buf.push(y) }, + _ => buf.push(x) + } + } + res.push(Lit(SingleLineString(String::from(&buf)))); + } + } + }, + '#' => { // comments! + match input.peek() { + Some('[') => { // block comment, can be nested + input.next(); + let mut comment_level = 1; + while let Some(x) = input.next() && comment_level > 0 { + match x { + '#' if input.peek() == Some(&'[') => { + comment_level += 1; + input.next(); + }, + ']' if input.peek() == Some(&'#') => { + comment_level -= 1; + input.next(); + }, + _ => buf.push(x) + } + } + res.push(Lit(MultiLineComment(String::from(&buf)))); + }, + Some(&'#') => { // documentation comment + input.next(); + while let Some(x) = input.peek() { + match x { + '\n' => break, + _ => { + buf.push(*x); + } + } + input.next(); + } + res.push(Lit(DocComment(String::from(&buf)))); + }, + _ => { // standard comment, runs til EOL + while let Some(x) = input.peek() { + match x { + '\n' => break, + _ => { + buf.push(*x); + } + } + input.next(); + } + res.push(Lit(Comment(String::from(&buf)))); + } + } + }, + c if c.is_alphabetic() || c == '_' => { // valid identifiers! + buf.push(c); + while let Some(x) = input.peek() { + match x { + x if x.is_alphanumeric() || x == &'_' => { + buf.push(*x); + input.next(); + }, + _ => { + use Keyword::*; + match buf.as_str() { // keywords! + "pub" => res.push(Key(Pub)), + "let" => res.push(Key(Let)), + "var" => res.push(Key(Var)), + "const" => res.push(Key(Const)), + "func" => res.push(Key(Func)), + "macro" => res.push(Key(Macro)), + "type" => res.push(Key(Type)), + "mod" => res.push(Key(Mod)), + "from" => res.push(Key(From)), + "import" => res.push(Key(Import)), + "for" => res.push(Key(For)), + "while" => res.push(Key(While)), + "loop" => res.push(Key(Loop)), + "block" => res.push(Key(Block)), + "static" => res.push(Key(Static)), + "if" => res.push(Key(If)), + "when" => res.push(Key(When)), + "elif" => res.push(Key(Elif)), + "else" => res.push(Key(Else)), + "match" => res.push(Key(Match)), + "try" => res.push(Key(Try)), + "catch" => res.push(Key(Catch)), + "finally" => res.push(Key(Finally)), + "struct" => res.push(Key(Struct)), + "tuple" => res.push(Key(Tuple)), + "enum" => res.push(Key(Enum)), + "union" => res.push(Key(Union)), + "interface" => res.push(Key(Interface)), + "distinct" => res.push(Key(Distinct)), + "ref" => res.push(Key(Ref)), + "break" => res.push(Key(Break)), + "continue" => res.push(Key(Continue)), + "return" => res.push(Key(Return)), + "in" => res.push(Key(In)), + "is" => res.push(Key(Is)), + "of" => res.push(Key(Of)), + "as" => res.push(Key(As)), + _ => res.push(Word(String::from(&buf))) + } + match x { // () and [] denote both parameters/generics and tuples/arrays + '(' => { // we must disambiguate by treating those *directly* after words as such + res.push(Sep(FuncLeftParen)); + state.paren_stack.push(Paren::Func); + input.next(); + }, + '[' => { + res.push(Sep(GenericLeftBracket)); + state.bracket_stack.push(Bracket::Generic); + input.next(); + }, + _ => {}, + } + break; + } + } + } + }, + '0'..='9' => { // numeric literals! + buf.push(c); + while let Some(x) = input.peek() { + match x { + 'a'..='z' | 'A'..='Z' | '0'..='9' | '_' => { + buf.push(*x); + input.next(); + }, + _ => break + } + } + res.push(Num(String::from(&buf))) + }, + '-' => { // `-` is special. it can be the *prefix* operator "Negative", or part of a regular operator. + match input.peek() { + Some(' ') => res.push(Sep(Minus)), + _ => res.push(Sep(Negative)) + } + }, + '(' => { // note: FuncParens were matched above, directly after identifiers + res.push(Sep(TupleLeftParen)); + state.paren_stack.push(Paren::Tuple); + }, + '[' => { // note: GenericBrackets were matched above, directly after identifiers + res.push(Sep(ArrayLeftBracket)); + state.bracket_stack.push(Bracket::Array); + }, + ')' => { + match state.paren_stack.pop() { + Some(Paren::Func) => res.push(Sep(FuncRightParen)), + Some(Paren::Tuple) => res.push(Sep(TupleRightParen)), + None => return Err(MismatchedParens.into()), + } + }, + ']' => { + match state.bracket_stack.pop() { + Some(Bracket::Generic) => res.push(Sep(GenericRightBracket)), + Some(Bracket::Array) => res.push(Sep(ArrayRightBracket)), + None => return Err(MismatchedBrackets.into()), + } + if input.peek() == Some(&'(') { // parameters following generics + res.push(Sep(FuncLeftParen)); + state.paren_stack.push(Paren::Func); + input.next(); + } + }, + '`' => { + res.push(Sep(BackTick)); + match input.peek() { + Some('(') => { + res.push(Sep(FuncLeftParen)); + state.paren_stack.push(Paren::Func); + input.next(); + }, + Some('[') => { + res.push(Sep(GenericLeftBracket)); + state.bracket_stack.push(Bracket::Generic); + input.next(); + }, + _ => {} + } + }, + ',' => res.push(Sep(Comma)), + '.' => res.push(Sep(Period)), + ';' => res.push(Sep(Semicolon)), + ':' => res.push(Sep(Colon)), + '{' => res.push(Sep(StructLeftBrace)), + '}' => res.push(Sep(StructRightBrace)), + '=' => res.push(Sep(Equals)), + '+' => res.push(Sep(Plus)), + '*' => res.push(Sep(Times)), + '/' => res.push(Sep(Slash)), + '<' => res.push(Sep(LessThan)), + '>' => res.push(Sep(GreaterThan)), + '@' => res.push(Sep(At)), + '$' => res.push(Sep(Sha)), + '~' => res.push(Sep(Tilde)), + '&' => res.push(Sep(And)), + '|' => res.push(Sep(Or)), + '!' => res.push(Sep(Exclamation)), + '?' => res.push(Sep(Question)), + '^' => res.push(Sep(Caret)), + '\\' => res.push(Sep(Backslash)), + _ => return Err(UnknownPunctuation.into()) + } + buf.clear(); + } + Ok(TokenStream(res)) +} + +impl std::fmt::Display for TokenStream { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + use Token::*; + let mut prev_token = Indent(0); + for token in &self.0 { + match (&prev_token, &token) { + (Word(_), Word(_)) | (Word(_), Num(_)) | + (Num(_), Word(_)) | (Num(_), Num(_)) => write!(f, " {}", token)?, + _ => write!(f, "{}", token)?, + } + prev_token = token.clone(); + } + Ok(()) + } +} + +impl std::fmt::Display for Token { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + use Token::*; + match self { + Key(word) => write!(f, "{}", word), + Word(val) => write!(f, "{}", val), + Num(val) => write!(f, "{}", val), + Lit(lit) => write!(f, "{}", lit), + Sep(sep) => write!(f, "{}", sep), + Indent(i) => write!(f, "\n{}", " ".repeat(*i)), + } + } +} + +impl std::fmt::Display for Literal { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + use Literal::*; + match self { + Char(val) => write!(f, "'{}'", val), + SingleLineString(val) => write!(f, "\"{}\"", val), + MultiLineString(val) => write!(f, "\"\"\"{}\"\"\"", val), + Comment(val) => write!(f, "#{}", val), + DocComment(val) => write!(f, "##{}", val), + MultiLineComment(val) => write!(f, "#[{}]#", val), + } + } +} + +impl std::fmt::Display for Keyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + use Keyword::*; + match self { + Pub => write!(f, "pub"), + Let => write!(f, "let"), + Var => write!(f, "var"), + Const => write!(f, "const"), + Func => write!(f, "func"), + Macro => write!(f, "macro"), + Type => write!(f, "type"), + Mod => write!(f, "mod"), + From => write!(f, "from"), + Import => write!(f, "import"), + For => write!(f, "for"), + While => write!(f, "while"), + Loop => write!(f, "loop"), + Block => write!(f, "block"), + Static => write!(f, "static"), + If => write!(f, "if"), + When => write!(f, "when"), + Elif => write!(f, "elif"), + Else => write!(f, "else"), + Match => write!(f, "match"), + Try => write!(f, "try"), + Catch => write!(f, "catch"), + Finally => write!(f, "finally"), + Struct => write!(f, "struct"), + Tuple => write!(f, "tuple"), + Enum => write!(f, "enum"), + Union => write!(f, "union"), + Interface => write!(f, "interface"), + Distinct => write!(f, "distinct"), + Ref => write!(f, "ref"), + Break => write!(f, "break"), + Continue => write!(f, "continue"), + Return => write!(f, "return"), + In => write!(f, "in"), + Is => write!(f, "is"), + Of => write!(f, "of"), + As => write!(f, "as"), + } + } +} +impl std::fmt::Display for Punctuation { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + use Punctuation::*; + match self { + Comma => write!(f, ","), + Period => write!(f, "."), + Semicolon => write!(f, ";"), + Colon => write!(f, ":"), + BackTick => write!(f, "`"), + SingleQuote => write!(f, "'"), + DoubleQuote => write!(f, "\""), + FuncLeftParen => write!(f, "("), + FuncRightParen => write!(f, ")"), + TupleLeftParen => write!(f, " ("), + TupleRightParen => write!(f, ")"), + GenericLeftBracket => write!(f, "["), + GenericRightBracket => write!(f, "]"), + ArrayLeftBracket => write!(f, " ["), + ArrayRightBracket => write!(f, "]"), + StructLeftBrace => write!(f, "{{"), + StructRightBrace => write!(f, "}}"), + Equals => write!(f, "="), + Plus => write!(f, "+"), + Minus => write!(f, "- "), + Negative => write!(f, "-"), + Times => write!(f, "*"), + Slash => write!(f, "/"), + LessThan => write!(f, "<"), + GreaterThan => write!(f, ">"), + At => write!(f, "@"), + Sha => write!(f, "$"), + Tilde => write!(f, "~"), + And => write!(f, "&"), + Percent => write!(f, "%"), + Or => write!(f, "|"), + Exclamation => write!(f, "!"), + Question => write!(f, "?"), + Caret => write!(f, "^"), + Backslash => write!(f, "\\"), + } + } +} diff --git a/src/frontend/mod.rs b/src/frontend/mod.rs new file mode 100644 index 0000000..d437c73 --- /dev/null +++ b/src/frontend/mod.rs @@ -0,0 +1,3 @@ +pub mod ast; +pub mod lex; +pub mod parse; diff --git a/src/frontend/parse.rs b/src/frontend/parse.rs new file mode 100644 index 0000000..c525982 --- /dev/null +++ b/src/frontend/parse.rs @@ -0,0 +1,297 @@ +use std::fmt; + +use crate::frontend::lex::*; +use crate::frontend::ast::*; +use crate::frontend::ast::Binding::*; +use crate::frontend::ast::Control::*; +use crate::frontend::ast::Pattern::*; +use Token::*; +use Literal::*; +use Punctuation::*; + +type Input = std::iter::Peekable<std::vec::IntoIter<Token>>; + +#[derive(Clone, Copy)] +struct State { + depth: usize, + step: usize +} + +impl State { + fn indent(&self) -> State { + State { depth: self.depth + self.step, step: self.step } + } + fn dedent(&self) -> State { + State { depth: self.depth - self.step, step: self.step } + } +} + +/// Convert a basic TokenStream into an AbstractSyntaxTree +pub fn astify(input: TokenStream, name: &str) -> Result<Expr> { + let mut input = input.into_iter().peekable(); + let body = parse_body(&mut input, State { depth: 0, step: 0 })?; + Ok(Expr::Binding(Module{ id: name.to_string(), body })) +} + +/// Parse a series of Exprs, for ex. the body of a function. +fn parse_body(input: &mut Input, state: State) -> Result<Vec<Expr>> { + let mut res = Vec::new(); + while input.peek() == Some(&Indent(state.depth)) { + input.next(); + res.push(parse_expr(input, state)?); + } + Ok(res) +} + +/// Expr ::= Let | Var | Const | Func | Type | +/// Mod | Import | Block | Static | +/// For | While | Loop | If | When | Try | Match +fn parse_expr(input: &mut Input, state: State) -> Result<Expr> { + use Keyword::*; + match input.next() { + Some(Key(word)) => match word { + Pub => { + match input.next() { + Some(Key(word)) => match word { + Const => parse_const(input, state, true), + Func => parse_funcdecl(input, state, true), + Type => parse_typedecl(input, state, true), + Mod => parse_mod(input, state, true), + _ => return Err("unrecognized keyword following pub".into()), + } + Some(_) => return Err("unrecognized thing following pub".into()), + None => return Err("end of input".into()), + } + }, + Let => parse_let(input, state), + Var => parse_var(input, state), + Const => parse_const(input, state, false), + Func => parse_funcdecl(input, state, false), + Type => parse_typedecl(input, state, false), + Mod => parse_mod(input, state, false), + From => parse_import(input, state, true), + Import => parse_import(input, state, false), + Block => parse_block(input, state), + Static => parse_static(input, state), + For => parse_for(input, state), + While => parse_while(input, state), + Loop => parse_loop(input, state), + If => parse_if(input, state), + When => parse_when(input, state), + Try => parse_try(input, state), + Match => parse_match(input, state), + _ => return Err("invalid keyword starting expression".into()), + }, + _ => todo!(), // what can i do with this?? match line here + } +} + +/// Let ::= 'let' Pattern Annotation? '=' Expr +fn parse_let(input: &mut Input, state: State) -> Result<Expr> { + let id = parse_pattern(input, state)?; + let mut kind = None; + if let Some(Sep(Colon)) = input.peek() { + input.next(); + kind = Some(parse_typedesc(input, state)?); + } + if input.next() != Some(Sep(Equals)) { + return Err("= not following binding".into()) + } + let value = Box::new(parse_expr(input, state)?); + Ok(Expr::Binding(Let { id, kind, value })) +} +/// Var ::= 'var' Pattern Annotation? ('=' Expr)? +fn parse_var(input: &mut Input, state: State) -> Result<Expr> { + let id = parse_pattern(input, state)?; + let mut kind = None; + if let Some(Sep(Colon)) = input.peek() { + input.next(); + kind = Some(parse_typedesc(input, state)?); + } + let mut value = None; + if input.next() != Some(Sep(Equals)) { + value = Some(Box::new(parse_expr(input, state)?)); + } + Ok(Expr::Binding(Var { id, kind, value })) +} +// Const ::= 'pub'? 'const' Pattern Annotation? '=' Expr +fn parse_const(input: &mut Input, state: State, public: bool) -> Result<Expr> { + let id = parse_pattern(input, state)?; + let mut kind = None; + if let Some(Sep(Colon)) = input.peek() { + input.next(); + kind = Some(parse_typedesc(input, state)?); + } + if input.next() != Some(Sep(Equals)) { + return Err("= not following binding".into()) + } + let value = Box::new(parse_expr(input, state)?); + Ok(Expr::Binding(Const { public, id, kind, value })) +} +// Func ::= 'pub'? ('func' | 'proc') Ident Generics? Parameters? (':' TypeDesc) '=' Body +fn parse_funcdecl(input: &mut Input, state: State, public: bool) -> Result<Expr> { todo!() } +// TypeDecl ::= 'pub'? 'type' Pattern Generics? '=' 'distinct'? 'ref'? TypeDesc +fn parse_typedecl(input: &mut Input, state: State, public: bool) -> Result<Expr> { + let pattern = parse_pattern(input, state)?; + todo!() +} +// Mod ::= 'pub'? 'mod' Ident ':' Body +fn parse_mod(input: &mut Input, state: State, public: bool) -> Result<Expr> { + match input.next() { + Some(Word(id)) => { + match input.next() { + Some(Sep(Colon)) => { + let body = parse_body(input, state.indent())?; + Ok(Expr::Binding(Module { id, body })) + }, + _ => return Err("unexpected token following mod label".into()), + } + }, + _ => return Err("unexpected thing following mod keyword".into()), + } +} + +// Import ::= ('from' Ident)? 'import' Ident (',' Ident)* ('as' Ident)? +fn parse_import(input: &mut Input, state: State, from_scope: bool) -> Result<Expr> { + let mut from = None; + if from_scope { + match input.next() { + Some(Word(id)) => from = Some(id), + _ => return Err("identifier not following from keyword".into()) + } + if input.next() != Some(Key(Keyword::Import)) { + return Err("expected import to follow from".into()) + } + } + todo!() +} +// Block ::= 'block' Ident? ':' Body +fn parse_block(input: &mut Input, state: State) -> Result<Expr> { // todo: body + offset + match input.next() { + Some(Sep(Colon)) => { + let id = None; + let body = parse_body(input, state.indent())?; + Ok(Expr::Control(Block { id, body })) + }, + Some(Word(label)) => { + match input.next() { + Some(Sep(Colon)) => { + let id = Some(label); + let body = parse_body(input, state.indent())?; + Ok(Expr::Control(Block { id, body })) + }, + _ => return Err("unexpected token following block label".into()), + } + }, + _ => return Err("unexpected thing following block keyword".into()), + } +} +// Static ::= 'static' ':' Body +fn parse_static(input: &mut Input, state: State) -> Result<Expr> { + if input.next() != Some(Sep(Colon)) { + return Err("colon must follow static invocation".into()); + } + let body = parse_body(input, state.indent())?; + Ok(Expr::Control(Static { body })) +} + +// For ::= 'for' Pattern 'in' Expr ':' Body +fn parse_for(input: &mut Input, state: State) -> Result<Expr> { + let binding = parse_pattern(input, state)?; + if input.next() != Some(Key(Keyword::In)) { + return Err("expected in keyword after for pattern".into()); + } + let range = Box::new(parse_expr(input, state)?); + if input.next() != Some(Sep(Colon)) { + return Err("expected colon after in expression".into()); + } + let body = parse_body(input, state.indent())?; + Ok(Expr::Control(For { binding, range, body })) +} +// While ::= 'while' Expr ':' Body +fn parse_while(input: &mut Input, state: State) -> Result<Expr> { + let cond = Box::new(parse_expr(input, state)?); + if input.next() != Some(Sep(Colon)) { + return Err("expected colon after while keyword".into()); + } + let body = parse_body(input, state.indent())?; + Ok(Expr::Control(While { cond, body })) +} +// Loop ::= 'loop' ':' Body +fn parse_loop(input: &mut Input, state: State) -> Result<Expr> { + if input.next() != Some(Sep(Colon)) { + return Err("expected colon after loop keyword".into()); + } + let body = parse_body(input, state.indent())?; + Ok(Expr::Control(Loop { body })) +} + +// If ::= 'if' Expr ':' Body ('elif' Expr ':' Body)* ('else' ':' Body)? +fn parse_if(input: &mut Input, state: State) -> Result<Expr> { + let mut branches = Vec::new(); + branches.push(parse_cond_branch(input, state)?); + while input.peek() == Some(&Key(Keyword::Elif)) { + input.next(); + branches.push(parse_cond_branch(input, state)?); + } + let mut else_body = None; + if input.peek() == Some(&Key(Keyword::Else)) { + input.next(); + else_body = Some(parse_body(input, state.indent())?); + } + Ok(Expr::Control(If { branches, else_body })) +} +// When ::= 'when' Expr ':' Body ('elif' Expr ':' Body)* ('else' ':' Body)? +fn parse_when(input: &mut Input, state: State) -> Result<Expr> { + let mut branches = Vec::new(); + branches.push(parse_cond_branch(input, state)?); + while input.peek() == Some(&Key(Keyword::Elif)) { + input.next(); + branches.push(parse_cond_branch(input, state)?); + } + let mut else_body = None; + if input.peek() == Some(&Key(Keyword::Else)) { + input.next(); + else_body = Some(parse_body(input, state.indent())?); + } + let mut body = Vec::new(); + body.push(Expr::Control(If { branches, else_body })); + Ok(Expr::Control(Static { body })) +} +// Try ::= 'try' ':' Body ('except' Ident (',' Ident)* ':' Body) ('finally' ':' Body)? +fn parse_try(input: &mut Input, state: State) -> Result<Expr> { + if input.next() != Some(Sep(Colon)) { + return Err("expected colon after try keyword".into()); + } + let body = parse_body(input, state.indent())?; + let catches = Vec::new(); + while input.peek() == Some(&Key(Keyword::Catch)) { + input.next(); + todo!(); + } + let mut finally = None; + if input.peek() == Some(&Key(Keyword::Finally)) { + input.next(); + if input.next() != Some(Sep(Colon)) { + return Err("expected colon after try keyword".into()); + } + finally = Some(parse_body(input, state.indent())?); + } + Ok(Expr::Control(Try { body, catches, finally })) +} +// Match ::= 'match' Expr ('of' Pattern (',' Pattern)* ('where' Expr)? ':' Body)+ +fn parse_match(input: &mut Input, state: State) -> Result<Expr> { + let item = parse_pattern(input, state)?; + let mut branches = Vec::new(); + while input.peek() == Some(&Key(Keyword::Of)) { + input.next(); + todo!(); + } + Ok(Expr::Control(Match { item, branches })) +} + +fn parse_typedesc(input: &mut Input, state: State) -> Result<Type> { todo!() } +fn parse_pattern(input: &mut Input, state: State) -> Result<Pattern> { todo!() } +fn parse_cond_branch(input: &mut Input, state: State) -> Result<CondBranch> { todo!() } + +// lex, parse, expand, compile? |