From dc1640fd403649f6e54146754b30dbfa3145fba9 Mon Sep 17 00:00:00 2001 From: JJ Date: Sun, 5 Nov 2023 17:54:53 -0800 Subject: compiler: progress on parser --- src/frontend/ast.rs | 34 +++++++------ src/frontend/lex.rs | 15 ++++-- src/frontend/parse.rs | 134 +++++++++++++++++++++++++++++++++++++------------- 3 files changed, 127 insertions(+), 56 deletions(-) diff --git a/src/frontend/ast.rs b/src/frontend/ast.rs index 6c7963e..ed7a01a 100644 --- a/src/frontend/ast.rs +++ b/src/frontend/ast.rs @@ -5,9 +5,12 @@ pub type Id = String; pub enum Type { Void, Never, Integer, Float, String, // char et al are defined later - Func{from: Box, to: Box}, // todo: multiple params, effects + Func { // todo: multiple params, effects + from: Box, + to: Box + }, Struct(Vec<(Id, Box)>), - Tuple(Vec<(Option, Box)>), + Tuple(Vec<(Option, Box)>), Union(Vec<(Id, Box)>), Interface { funcs: Vec, @@ -17,9 +20,14 @@ pub enum Type { List(Box), Slice(Box), // todo: plus ownership Reference(Box), + Pointer(Box), + Distinct(Box), // todo: not sure Mutable(Box), // parameters only Static(Box), // parameters only - Alias{ id: Id, params: Vec }, // todo: this is wrong + Alias { // todo: this is wrong + id: Id, + generics: Vec + } } /// Function signatures. @@ -38,14 +46,11 @@ pub enum Pattern { Ident(Id), // type aliases, union variants, calls... Number(i64), Float(f64), Char(char), String(String), - Struct(Vec), - Tuple(Vec), + Struct(Vec<(Id, Expr)>), // field, value + Tuple(Vec<(Option, Expr)>), // field, value List(Vec), // arrays, slices, lists } -pub struct StructPattern { field: Id, value: Expr } -pub struct TuplePattern { field: Option, value: Expr } - /// Expressions introduce a new binding or bindings, in some regard. pub enum Binding { Let { @@ -68,8 +73,8 @@ pub enum Binding { public: bool, effect: Option, id: Id, - generics: Vec, - params: Vec, + generics: Vec<(Id, Option)>, // id, kind + params: Vec<(Id, Type)>, // id, kind kind: Type, body: Vec }, @@ -78,9 +83,6 @@ pub enum Binding { Module { id: Id, body: Vec }, } -pub struct GenericDecl { id: Id, kind: Option } -pub struct ParamDecl { id: Id, kind: Type } - /// Expressions related to control flow. pub enum Control { Call { id: Id, params: Vec }, // function calls, macro invocations, field access... @@ -104,9 +106,9 @@ pub enum Control { Loop { body: Vec }, } -pub struct CondBranch { cond: Expr, body: Vec } -pub struct CatchBranch { exceptions: Vec, binding: Option, body: Vec } -pub struct MatchBranch { pattern: Pattern, guard: Option, body: Vec } +pub struct CondBranch { pub cond: Expr, pub body: Vec } +pub struct CatchBranch { pub exceptions: Vec, pub binding: Option, pub body: Vec } +pub struct MatchBranch { pub pattern: Pattern, pub guard: Option, pub body: Vec } /// Expressions are either Patterns, Bindings, or Control flow constructs. pub enum Expr { diff --git a/src/frontend/lex.rs b/src/frontend/lex.rs index 771ba38..0d9fd22 100644 --- a/src/frontend/lex.rs +++ b/src/frontend/lex.rs @@ -60,7 +60,7 @@ pub enum Keyword { If, When, Elif, Else, Match, Try, Catch, Finally, Struct, Tuple, Enum, Union, Interface, - Distinct, Ref, // todo: Mut once figured out + Distinct, Ref, Ptr, Mut, Break, Continue, Return, In, Is, Of, As, } @@ -293,6 +293,8 @@ pub fn tokenize(input: &str) -> Result { "interface" => res.push(Key(Interface)), "distinct" => res.push(Key(Distinct)), "ref" => res.push(Key(Ref)), + "ptr" => res.push(Key(Ptr)), + "mut" => res.push(Key(Mut)), "break" => res.push(Key(Break)), "continue" => res.push(Key(Continue)), "return" => res.push(Key(Return)), @@ -347,14 +349,14 @@ pub fn tokenize(input: &str) -> Result { res.push(Sep(ArrayLeftBracket)); state.bracket_stack.push(Bracket::Array); }, - ')' => { + ')' => { // match parens match state.paren_stack.pop() { Some(Paren::Func) => res.push(Sep(FuncRightParen)), Some(Paren::Tuple) => res.push(Sep(TupleRightParen)), None => return Err(MismatchedParens.into()), } }, - ']' => { + ']' => { // match brackets match state.bracket_stack.pop() { Some(Bracket::Generic) => res.push(Sep(GenericRightBracket)), Some(Bracket::Array) => res.push(Sep(ArrayRightBracket)), @@ -366,8 +368,8 @@ pub fn tokenize(input: &str) -> Result { input.next(); } }, - '`' => { - res.push(Sep(BackTick)); + '`' => { // backticks are used for operators, so generics/parameters may follow + res.push(Sep(BackTick)); // todo: backticks could like not be used for operators match input.peek() { Some('(') => { res.push(Sep(FuncLeftParen)); @@ -488,6 +490,8 @@ impl std::fmt::Display for Keyword { Interface => write!(f, "interface"), Distinct => write!(f, "distinct"), Ref => write!(f, "ref"), + Ptr => write!(f, "ptr"), + Mut => write!(f, "mut"), Break => write!(f, "break"), Continue => write!(f, "continue"), Return => write!(f, "return"), @@ -498,6 +502,7 @@ impl std::fmt::Display for Keyword { } } } + impl std::fmt::Display for Punctuation { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { use Punctuation::*; diff --git a/src/frontend/parse.rs b/src/frontend/parse.rs index c525982..43de16e 100644 --- a/src/frontend/parse.rs +++ b/src/frontend/parse.rs @@ -1,5 +1,3 @@ -use std::fmt; - use crate::frontend::lex::*; use crate::frontend::ast::*; use crate::frontend::ast::Binding::*; @@ -43,9 +41,8 @@ fn parse_body(input: &mut Input, state: State) -> Result> { Ok(res) } -/// Expr ::= Let | Var | Const | Func | Type | -/// Mod | Import | Block | Static | -/// For | While | Loop | If | When | Try | Match +/// Expr ::= Let | Var | Const | Func | Type | Mod | Import | +/// Block | Static | For | While | Loop | If | When | Try | Match fn parse_expr(input: &mut Input, state: State) -> Result { use Keyword::*; match input.next() { @@ -69,7 +66,7 @@ fn parse_expr(input: &mut Input, state: State) -> Result { Func => parse_funcdecl(input, state, false), Type => parse_typedecl(input, state, false), Mod => parse_mod(input, state, false), - From => parse_import(input, state, true), + From => parse_import(input, state, true), // todo: probably rework imports Import => parse_import(input, state, false), Block => parse_block(input, state), Static => parse_static(input, state), @@ -86,14 +83,19 @@ fn parse_expr(input: &mut Input, state: State) -> Result { } } -/// Let ::= 'let' Pattern Annotation? '=' Expr -fn parse_let(input: &mut Input, state: State) -> Result { - let id = parse_pattern(input, state)?; +/// Annotation ::= (':' TypeDesc)? +fn parse_annotation(input: &mut Input, state: State) -> Result> { let mut kind = None; if let Some(Sep(Colon)) = input.peek() { input.next(); - kind = Some(parse_typedesc(input, state)?); + kind = Some(parse_type(input, state)?); } + Ok(kind) +} +/// Let ::= 'let' Pattern Annotation? '=' Expr +fn parse_let(input: &mut Input, state: State) -> Result { + let id = parse_pattern(input, state)?; + let kind = parse_annotation(input, state)?; if input.next() != Some(Sep(Equals)) { return Err("= not following binding".into()) } @@ -103,39 +105,31 @@ fn parse_let(input: &mut Input, state: State) -> Result { /// Var ::= 'var' Pattern Annotation? ('=' Expr)? fn parse_var(input: &mut Input, state: State) -> Result { let id = parse_pattern(input, state)?; - let mut kind = None; - if let Some(Sep(Colon)) = input.peek() { - input.next(); - kind = Some(parse_typedesc(input, state)?); - } + let kind = parse_annotation(input, state)?; let mut value = None; if input.next() != Some(Sep(Equals)) { value = Some(Box::new(parse_expr(input, state)?)); } Ok(Expr::Binding(Var { id, kind, value })) } -// Const ::= 'pub'? 'const' Pattern Annotation? '=' Expr +/// Const ::= 'pub'? 'const' Pattern Annotation? '=' Expr fn parse_const(input: &mut Input, state: State, public: bool) -> Result { let id = parse_pattern(input, state)?; - let mut kind = None; - if let Some(Sep(Colon)) = input.peek() { - input.next(); - kind = Some(parse_typedesc(input, state)?); - } + let kind = parse_annotation(input, state)?; if input.next() != Some(Sep(Equals)) { return Err("= not following binding".into()) } let value = Box::new(parse_expr(input, state)?); Ok(Expr::Binding(Const { public, id, kind, value })) } -// Func ::= 'pub'? ('func' | 'proc') Ident Generics? Parameters? (':' TypeDesc) '=' Body +/// Func ::= 'pub'? 'func' Ident Generics? Parameters? (':' TypeDesc) '=' Body fn parse_funcdecl(input: &mut Input, state: State, public: bool) -> Result { todo!() } -// TypeDecl ::= 'pub'? 'type' Pattern Generics? '=' 'distinct'? 'ref'? TypeDesc +/// TypeDecl ::= 'pub'? 'type' Pattern Generics? '=' 'distinct'? 'ref'? TypeDesc fn parse_typedecl(input: &mut Input, state: State, public: bool) -> Result { let pattern = parse_pattern(input, state)?; todo!() } -// Mod ::= 'pub'? 'mod' Ident ':' Body +/// Mod ::= 'pub'? 'mod' Ident ':' Body fn parse_mod(input: &mut Input, state: State, public: bool) -> Result { match input.next() { Some(Word(id)) => { @@ -151,7 +145,7 @@ fn parse_mod(input: &mut Input, state: State, public: bool) -> Result { } } -// Import ::= ('from' Ident)? 'import' Ident (',' Ident)* ('as' Ident)? +/// Import ::= ('from' Ident)? 'import' Ident (',' Ident)* ('as' Ident)? fn parse_import(input: &mut Input, state: State, from_scope: bool) -> Result { let mut from = None; if from_scope { @@ -165,7 +159,7 @@ fn parse_import(input: &mut Input, state: State, from_scope: bool) -> Result Result { // todo: body + offset match input.next() { Some(Sep(Colon)) => { @@ -186,7 +180,7 @@ fn parse_block(input: &mut Input, state: State) -> Result { // todo: body _ => return Err("unexpected thing following block keyword".into()), } } -// Static ::= 'static' ':' Body +/// Static ::= 'static' ':' Body fn parse_static(input: &mut Input, state: State) -> Result { if input.next() != Some(Sep(Colon)) { return Err("colon must follow static invocation".into()); @@ -195,7 +189,7 @@ fn parse_static(input: &mut Input, state: State) -> Result { Ok(Expr::Control(Static { body })) } -// For ::= 'for' Pattern 'in' Expr ':' Body +/// For ::= 'for' Pattern 'in' Expr ':' Body fn parse_for(input: &mut Input, state: State) -> Result { let binding = parse_pattern(input, state)?; if input.next() != Some(Key(Keyword::In)) { @@ -208,7 +202,7 @@ fn parse_for(input: &mut Input, state: State) -> Result { let body = parse_body(input, state.indent())?; Ok(Expr::Control(For { binding, range, body })) } -// While ::= 'while' Expr ':' Body +/// While ::= 'while' Expr ':' Body fn parse_while(input: &mut Input, state: State) -> Result { let cond = Box::new(parse_expr(input, state)?); if input.next() != Some(Sep(Colon)) { @@ -217,7 +211,7 @@ fn parse_while(input: &mut Input, state: State) -> Result { let body = parse_body(input, state.indent())?; Ok(Expr::Control(While { cond, body })) } -// Loop ::= 'loop' ':' Body +/// Loop ::= 'loop' ':' Body fn parse_loop(input: &mut Input, state: State) -> Result { if input.next() != Some(Sep(Colon)) { return Err("expected colon after loop keyword".into()); @@ -226,7 +220,7 @@ fn parse_loop(input: &mut Input, state: State) -> Result { Ok(Expr::Control(Loop { body })) } -// If ::= 'if' Expr ':' Body ('elif' Expr ':' Body)* ('else' ':' Body)? +/// If ::= 'if' Expr ':' Body ('elif' Expr ':' Body)* ('else' ':' Body)? fn parse_if(input: &mut Input, state: State) -> Result { let mut branches = Vec::new(); branches.push(parse_cond_branch(input, state)?); @@ -258,7 +252,8 @@ fn parse_when(input: &mut Input, state: State) -> Result { body.push(Expr::Control(If { branches, else_body })); Ok(Expr::Control(Static { body })) } -// Try ::= 'try' ':' Body ('except' Ident (',' Ident)* ':' Body) ('finally' ':' Body)? +fn parse_cond_branch(input: &mut Input, state: State) -> Result { todo!() } +/// Try ::= 'try' ':' Body ('except' Ident (',' Ident)* ':' Body) ('finally' ':' Body)? fn parse_try(input: &mut Input, state: State) -> Result { if input.next() != Some(Sep(Colon)) { return Err("expected colon after try keyword".into()); @@ -279,7 +274,7 @@ fn parse_try(input: &mut Input, state: State) -> Result { } Ok(Expr::Control(Try { body, catches, finally })) } -// Match ::= 'match' Expr ('of' Pattern (',' Pattern)* ('where' Expr)? ':' Body)+ +/// Match ::= 'match' Expr ('of' Pattern (',' Pattern)* ('where' Expr)? ':' Body)+ fn parse_match(input: &mut Input, state: State) -> Result { let item = parse_pattern(input, state)?; let mut branches = Vec::new(); @@ -290,8 +285,77 @@ fn parse_match(input: &mut Input, state: State) -> Result { Ok(Expr::Control(Match { item, branches })) } -fn parse_typedesc(input: &mut Input, state: State) -> Result { todo!() } +/// Type ::= +/// ('ref' | 'ptr' | 'mut' | 'static' | 'struct' | 'tuple' | 'enum' | 'union' | 'interface' | 'concept') | +/// ('ref' WrappedType) | ('ptr' WrappedType) | ('mut' WrappedType) | ('static' WrappedType) | ('distinct' WrappedType) | +/// StructType | TupleType | EnumType | UnionType | InterfaceType +/// The input stream must be normalized before attempting to parse types, because otherwise it's just a little bit hellish. +/// In particular: ref, ptr, mut, static, distinct must wrap their parameters in '[' ']' and all type declarations must be on one line. +fn parse_type(input: &mut Input, state: State) -> Result { + use Type::*; + match input.next() { + Some(Key(word)) => { + match input.peek() { // todo: check if the type is a special typeclass + Some(Sep(GenericLeftBracket)) => (), + _ => todo!() // ref, ptr, mut, static, struct, tuple, enum, union, interface, concept + } + match word { + Keyword::Distinct => Ok(Distinct(Box::new(parse_wrapped_type(input, state)?))), + Keyword::Ref => Ok(Reference(Box::new(parse_wrapped_type(input, state)?))), + Keyword::Ptr => Ok(Pointer(Box::new(parse_wrapped_type(input, state)?))), + Keyword::Var => Ok(Mutable(Box::new(parse_wrapped_type(input, state)?))), + Keyword::Const => Ok(Static(Box::new(parse_wrapped_type(input, state)?))), + Keyword::Struct => parse_struct_type(input, state), + Keyword::Tuple => parse_tuple_type(input, state), + Keyword::Enum => parse_enum_type(input, state), + Keyword::Union => parse_union_type(input, state), + Keyword::Interface => parse_interface(input, state), + _ => return Err("invalid keyword present in type!".into()) + } + }, + Some(Word(id)) => { + let mut generics = Vec::new(); + if let Some(Sep(GenericLeftBracket)) = input.peek() { + generics = parse_generics(input, state)?; + } + Ok(Alias { id, generics }) + }, + _ => return Err("error".into()) + } +} + +/// `StructType ::= ('struct' '[' Ident ':' Type (',' Ident ':' Type)* ']'` +fn parse_struct_type(input: &mut Input, state: State) -> Result { todo!() } +/// `TupleType ::= 'tuple' '[' (Ident ':')? Type (',' (Ident ':')? Type)* ']'` +fn parse_tuple_type(input: &mut Input, state: State) -> Result { todo!() } +/// `EnumType ::= 'enum' '[' Ident ('=' Pattern)? (Ident ('=' Pattern)?)* ']'` +fn parse_enum_type(input: &mut Input, state: State) -> Result { todo!() } +/// `UnionType ::= 'union' '[' Ident (':' Type)? (',' Ident (':' Type)?)* ']'` +fn parse_union_type(input: &mut Input, state: State) -> Result { todo!() } +/// `Interface ::= 'interface' '[' Signature (',' Signature)* ']'` +fn parse_interface(input: &mut Input, state: State) -> Result { todo!() } +/// `Signature ::= Ident ('[' Ident (':' Type)? (',' Ident (':' Type)?)* ']')? ('(' Type (',' Type)* ')')? (':' Type)?` +fn parse_signature(input: &mut Input, state: State) -> Result { todo!() } + +/// `WrappedType ::= Type | ('[' Type ']')` +fn parse_wrapped_type(input: &mut Input, state: State) -> Result { + if let Some(Sep(GenericLeftBracket)) = input.next() { + let result = parse_type(input, state)?; + if let Some(Sep(GenericRightBracket)) = input.next() { + Ok(result) + } else { + Err("could not find closing generic bracket!".into()) + } + } else { + parse_type(input, state) + } +} + +/// `GenericType ::= '[' Type (',' Type)* ']'` +fn parse_generics(input: &mut Input, state: State) -> Result> { todo!() } + +/// Pattern ::= Literal | Ident | '(' Pattern (',' Pattern)* ')' | Ident '(' Pattern (',' Pattern)* ')' +/// Literal ::= Char | String | Number | Float fn parse_pattern(input: &mut Input, state: State) -> Result { todo!() } -fn parse_cond_branch(input: &mut Input, state: State) -> Result { todo!() } // lex, parse, expand, compile? -- cgit v1.2.3-70-g09d2