From 0121346e894fb0e7f60312b16986d82109e4d86b Mon Sep 17 00:00:00 2001 From: JJ Date: Sat, 25 Nov 2023 21:07:23 -0800 Subject: compiler: preemptively fix EOF bugs in parser --- src/frontend/lex.rs | 3 +- src/frontend/parse.rs | 215 ++++++++++++++++++++++---------------------------- src/main.rs | 2 + 3 files changed, 97 insertions(+), 123 deletions(-) diff --git a/src/frontend/lex.rs b/src/frontend/lex.rs index 67c4ae3..f751c63 100644 --- a/src/frontend/lex.rs +++ b/src/frontend/lex.rs @@ -1,6 +1,7 @@ use multipeek::multipeek; -pub type Result = core::result::Result>; +use crate::*; + pub struct TokenStream(Vec); impl IntoIterator for TokenStream { diff --git a/src/frontend/parse.rs b/src/frontend/parse.rs index eee3911..8616346 100644 --- a/src/frontend/parse.rs +++ b/src/frontend/parse.rs @@ -1,9 +1,10 @@ use multipeek::multipeek; -use crate::frontend::lex::*; -use crate::frontend::ast::*; -use crate::frontend::ast::Binding::*; -use crate::frontend::ast::Control::*; +use crate::*; +use super::lex::*; +use super::ast::*; +use super::ast::Binding::*; +use super::ast::Control::*; use Token::*; use Literal::*; use Punctuation::*; @@ -16,13 +17,24 @@ impl Input { self.0.next().ok_or("end of input".into()) } - /// Map input.peek() to return Results for use with the propagation operator - fn peek(&mut self) -> Result<&Token> { - self.0.peek().ok_or("end of input".into()) + /// Check if the next character is an expected token, and if so, advance the iterator and return true + fn peek(&mut self, expected: Token) -> bool { + if self.0.peek() == Some(&expected) { + self.0.next(); + true + } else { + false + } + } + + /// Expose input.peek() (we don't want EOF to trigger an error when peeking) + fn peek_opt(&mut self) -> Option<&Token> { + self.0.peek() } - fn peek_nth(&mut self, n: usize) -> Result<&Token> { - self.0.peek_nth(n).ok_or("end of input".into()) + /// Expose input.peek_nth() + fn peek_nth(&mut self, n: usize) -> Option<&Token> { + self.0.peek_nth(n) } /// Asserts the next character to be a known token @@ -32,12 +44,20 @@ impl Input { token => Err(format!("expected token {} but found {}", expected, token).into()) } } + + /// Allow usage of `len` + fn len(&self) -> usize { + self.0.len() + } } /// Convert a basic TokenStream into an AbstractSyntaxTree pub fn astify(input: TokenStream, name: &str) -> Result { let mut input = Input(multipeek(input)); let body = parse_body(&mut input)?; + if input.len() > 0 { + return Err(format!("additional tokens remaining after the body!").into()); + } Ok(Expr::Binding(Module{ id: name.to_string(), body })) } @@ -45,18 +65,16 @@ pub fn astify(input: TokenStream, name: &str) -> Result { /// Body ::= Expr | ('{' Expr (';' Expr)* '}') fn parse_body(input: &mut Input) -> Result> { let mut res = Vec::new(); - if input.peek()? != &Sep(ScopeLeftBrace) { + if !input.peek(Sep(ScopeLeftBrace)) { res.push(parse_expr(input)?); return Ok(res); } - input.next()?; - while input.peek()? != &Sep(ScopeRightBrace) { + while !input.peek(Sep(ScopeRightBrace)) { res.push(parse_expr(input)?); - if input.peek()? == &Sep(Semicolon) { - input.next()?; - } + // consume semicolons. there doesn't *have* to be a semicolon though. + // this should probably be checked to be a semicolon or a right brace. + input.peek(Sep(Semicolon)); } - input.next()?; Ok(res) } @@ -64,6 +82,8 @@ fn parse_body(input: &mut Input) -> Result> { /// Block | Static | For | While | Loop | If | When | Try | Match fn parse_expr(input: &mut Input) -> Result { use Keyword::*; + // Note that this match consumes, as peeking is redundant. + // This is why subsequent functions do not check for their leading keyword, i.e. 'let' match input.next()? { Key(word) => match word { Pub => { @@ -116,7 +136,7 @@ fn parse_var(input: &mut Input) -> Result { let id = parse_pattern(input)?; let kind = parse_annotation(input)?; let mut value = None; - if input.next()? != Sep(Equals) { + if input.peek(Sep(Equals)) { value = Some(Box::new(parse_expr(input)?)); } Ok(Expr::Binding(Var { id, kind, value })) @@ -131,29 +151,26 @@ fn parse_const(input: &mut Input, public: bool) -> Result { Ok(Expr::Binding(Const { public, id, kind, value })) } -/// Annotation ::= (':' TypeDesc)? +/// Annotation ::= (':' Type)? fn parse_annotation(input: &mut Input) -> Result> { - let mut kind = None; - if input.peek()? == &Sep(Colon) { - input.next()?; - kind = Some(parse_type(input)?); + if input.peek(Sep(Colon)) { + Ok(Some(parse_type(input)?)) + } else { + Ok(None) } - Ok(kind) } -/// `Func ::= 'pub'? 'func' Ident ('[' Parameters ']')? ('(' Parameters ')')? (':' TypeDesc) '=' Body` +/// `Func ::= 'pub'? 'func' Ident ('[' Parameters ']')? ('(' Parameters ')')? Annotation '=' Body` fn parse_func(input: &mut Input, public: bool) -> Result { let effect = None; let id = parse_ident(input)?; let mut generics = Vec::new(); - if input.peek()? == &Sep(GenericLeftBracket) { - input.next()?; + if input.peek(Sep(GenericLeftBracket)) { generics = parse_parameters(input)?; input.then(Sep(GenericRightBracket))?; } let mut parameters = Vec::new(); - if input.peek()? == &Sep(FuncLeftParen) { - input.next()?; // todo: rewrite to map over an input + if input.peek(Sep(FuncLeftParen)) { // todo: rewrite to map over an input // let temp_parameters = parse_parameters(input)?; // if temp_parameters.last().is_none() { // return Err("expected a type annotation on the last function parameter".into()); @@ -166,8 +183,7 @@ fn parse_func(input: &mut Input, public: bool) -> Result { } else { stack.push(id); } - while input.peek()? == &Sep(Comma) { - input.next()?; + while input.peek(Sep(Comma)) { let (id, kind) = parse_parameter(input)?; stack.push(id); if kind.is_some() { @@ -183,8 +199,7 @@ fn parse_func(input: &mut Input, public: bool) -> Result { input.then(Sep(FuncRightParen))?; } let mut kind = Type::Void; - if input.peek()? == &Sep(Colon) { - input.next()?; + if input.peek(Sep(Colon)) { kind = parse_type(input)?; } input.then(Sep(Equals))?; @@ -192,24 +207,21 @@ fn parse_func(input: &mut Input, public: bool) -> Result { Ok(Expr::Binding(Func { public, effect, id, generics, parameters, kind, body })) } -/// `Macro ::= 'pub'? 'macro' Ident ('[' Paremeters ']')? ('(' Paremeters ')')? (':' TypeDesc) '=' Body` +/// `Macro ::= 'pub'? 'macro' Ident ('[' Parameters ']')? ('(' Parameters ')')? (':' Type) '=' Body` fn parse_macro(input: &mut Input, public: bool) -> Result { let id = parse_ident(input)?; let mut generics = Vec::new(); - if input.peek()? == &Sep(GenericLeftBracket) { - input.next()?; + if input.peek(Sep(GenericLeftBracket)) { generics = parse_parameters(input)?; input.then(Sep(GenericRightBracket))?; } let mut parameters = Vec::new(); - if input.peek()? == &Sep(FuncLeftParen) { - input.next()?; + if input.peek(Sep(FuncLeftParen)) { parameters = parse_parameters(input)?; input.then(Sep(FuncRightParen))?; } let mut kind = None; - if input.peek()? == &Sep(Colon) { - input.next()?; + if input.peek(Sep(Colon)) { kind = Some(parse_type(input)?); } input.then(Sep(Equals))?; @@ -221,8 +233,7 @@ fn parse_macro(input: &mut Input, public: bool) -> Result { fn parse_typedecl(input: &mut Input, public: bool) -> Result { let id = parse_ident(input)?; let mut generics = Vec::new(); - if input.peek()? == &Sep(GenericLeftBracket) { - input.next()?; + if input.peek(Sep(GenericLeftBracket)) { generics = parse_parameters(input)?; input.then(Sep(GenericRightBracket))?; } @@ -235,8 +246,7 @@ fn parse_typedecl(input: &mut Input, public: bool) -> Result { fn parse_parameter(input: &mut Input) -> Result<(Id, Option)> { let id = parse_ident(input)?; let mut kind = None; - if input.peek()? == &Sep(Colon) { - input.next()?; + if input.peek(Sep(Colon)) { kind = Some(parse_type(input)?); } Ok((id, kind)) @@ -246,8 +256,7 @@ fn parse_parameter(input: &mut Input) -> Result<(Id, Option)> { fn parse_parameters(input: &mut Input) -> Result)>> { let mut res = Vec::new(); res.push(parse_parameter(input)?); - while input.peek()? == &Sep(Comma) { - input.next()?; + while input.peek(Sep(Comma)) { res.push(parse_parameter(input)?); } Ok(res) @@ -255,18 +264,10 @@ fn parse_parameters(input: &mut Input) -> Result)>> { /// `Mod ::= 'pub'? 'mod' Ident ':' Body` fn parse_mod(input: &mut Input, public: bool) -> Result { - match input.next()? { - Word(id) => { - match input.next()? { - Sep(Colon) => { - let body = parse_body(input)?; - Ok(Expr::Binding(Module { id, body })) - }, - _ => return Err("unexpected token following mod label".into()), - } - }, - _ => return Err("unexpected thing following mod keyword".into()), - } + let id = parse_ident(input)?; + input.then(Sep(Colon))?; + let body = parse_body(input)?; + Ok(Expr::Binding(Module { id, body })) } /// `Use ::= 'use' Ident ('/' Ident)* ('/' (('[' Ident (',' Ident)* ']') | '*'))?` @@ -326,13 +327,11 @@ fn parse_loop(input: &mut Input) -> Result { fn parse_if(input: &mut Input) -> Result { let mut branches = Vec::new(); branches.push(parse_cond_branch(input)?); - while input.peek()? == &Key(Keyword::Elif) { - input.next()?; + while input.peek(Key(Keyword::Elif)) { branches.push(parse_cond_branch(input)?); } let mut else_body = None; - if input.peek()? == &Key(Keyword::Else) { - input.next()?; + if input.peek(Key(Keyword::Else)) { else_body = Some(parse_body(input)?); } Ok(Expr::Control(If { branches, else_body })) @@ -342,13 +341,11 @@ fn parse_if(input: &mut Input) -> Result { fn parse_when(input: &mut Input) -> Result { let mut branches = Vec::new(); branches.push(parse_cond_branch(input)?); - while input.peek()? == &Key(Keyword::Elif) { - input.next()?; + while input.peek(Key(Keyword::Elif)) { branches.push(parse_cond_branch(input)?); } let mut else_body = None; - if input.peek()? == &Key(Keyword::Else) { - input.next()?; + if input.peek(Key(Keyword::Else)) { input.then(Sep(Colon))?; else_body = Some(parse_body(input)?); } @@ -365,17 +362,15 @@ fn parse_cond_branch(input: &mut Input) -> Result { Ok(CondBranch { cond, body }) } -/// `Try ::= 'try' ':' Body ('except' Exception (',' Exception)* ':' Body) ('finally' ':' Body)?` +/// `Try ::= 'try' ':' Body ('except' Exception (',' Exception)* ':' Body)* ('finally' ':' Body)?` fn parse_try(input: &mut Input) -> Result { input.then(Sep(Colon))?; let body = parse_body(input)?; let mut catches = Vec::new(); - while input.peek()? == &Key(Keyword::Catch) { - input.next()?; + while input.peek(Key(Keyword::Catch)) { let mut exceptions = Vec::new(); exceptions.push(parse_catch_exception(input)?); - while input.peek()? == &Sep(Comma) { - input.next()?; + while input.peek(Sep(Comma)) { exceptions.push(parse_catch_exception(input)?); } input.then(Sep(Colon))?; @@ -383,8 +378,7 @@ fn parse_try(input: &mut Input) -> Result { catches.push(CatchBranch { exceptions, body }); } let mut finally = None; - if input.peek()? == &Key(Keyword::Finally) { - input.next()?; + if input.peek(Key(Keyword::Finally)) { input.then(Sep(Colon))?; finally = Some(parse_body(input)?); } @@ -395,8 +389,7 @@ fn parse_try(input: &mut Input) -> Result { fn parse_catch_exception(input: &mut Input) -> Result<(Id, Option)> { let id = parse_ident(input)?; let mut alias = None; - if input.peek()? == &Key(Keyword::As) { - input.next()?; + if input.peek(Key(Keyword::As)) { alias = Some(parse_ident(input)?); } Ok((id, alias)) @@ -406,17 +399,14 @@ fn parse_catch_exception(input: &mut Input) -> Result<(Id, Option)> { fn parse_match(input: &mut Input) -> Result { let item = parse_pattern(input)?; // fixme let mut branches = Vec::new(); - while input.peek()? == &Key(Keyword::Of) { - input.next()?; + while input.peek(Key(Keyword::Of)) { let mut patterns = Vec::new(); patterns.push(parse_pattern(input)?); - while input.peek()? == &Sep(Comma) { - input.next()?; + while input.peek(Sep(Comma)) { patterns.push(parse_pattern(input)?); } let mut guard = None; - if input.peek()? == &Key(Keyword::Where) { - input.next()?; + if input.peek(Key(Keyword::Where)) { guard = Some(parse_expr(input)?) } input.then(Sep(Colon))?; @@ -449,11 +439,9 @@ fn parse_type(input: &mut Input) -> Result { }, Word(id) => { let mut generics = Vec::new(); - if input.peek()? == &Sep(GenericLeftBracket) { - input.next()?; + if input.peek(Sep(GenericLeftBracket)) { generics.push(parse_type(input)?); - while input.peek()? == &Sep(Comma) { - input.next()?; + while input.peek(Sep(Comma)) { generics.push(parse_type(input)?); } input.then(Sep(GenericRightBracket))?; @@ -467,11 +455,9 @@ fn parse_type(input: &mut Input) -> Result { /// `StructType ::= 'struct' ('[' Ident ':' Type (',' Ident ':' Type)* ']')?` fn parse_struct_type(input: &mut Input) -> Result { let mut res = Vec::new(); - if input.peek()? == &Sep(GenericLeftBracket) { - input.next()?; + if input.peek(Sep(GenericLeftBracket)) { res.push(parse_struct_field(input)?); - while input.peek()? == &Sep(Comma) { - input.next()?; + while input.peek(Sep(Comma)) { res.push(parse_struct_field(input)?); } input.then(Sep(GenericRightBracket))?; @@ -489,11 +475,9 @@ fn parse_struct_field(input: &mut Input) -> Result<(Id, Box)> { /// `TupleType ::= 'tuple' ('[' (Ident ':')? Type (',' (Ident ':')? Type)* ']')?` fn parse_tuple_type(input: &mut Input) -> Result { let mut res = Vec::new(); - if input.peek()? == &Sep(GenericLeftBracket) { - input.next()?; + if input.peek(Sep(GenericLeftBracket)) { res.push(parse_tuple_field(input)?); - while input.peek()? == &Sep(Comma) { - input.next()?; + while input.peek(Sep(Comma)) { res.push(parse_tuple_field(input)?); } input.then(Sep(GenericRightBracket))?; @@ -501,10 +485,12 @@ fn parse_tuple_type(input: &mut Input) -> Result { Ok(Type::Tuple(res)) } +// annoyingly complex to parse. `TupleField ::= (Ident ':')? Type` fn parse_tuple_field(input: &mut Input) -> Result<(Option, Box)> { - match input.peek()?.clone() { // huh??? - Word(id) if input.peek_nth(1)? == &Sep(Colon) => { + match input.peek_opt().clone() { + Some(Word(id)) if input.peek_nth(1) == Some(&Sep(Colon)) => { input.next()?; + input.then(Sep(Colon))?; Ok((Some(id.to_string()), Box::new(parse_type(input)?))) }, _ => Ok((None, Box::new(parse_type(input)?))) @@ -514,11 +500,9 @@ fn parse_tuple_field(input: &mut Input) -> Result<(Option, Box)> { /// `EnumType ::= 'enum' ('[' Ident ('=' Pattern)? (Ident ('=' Pattern)?)* ']')?` fn parse_enum_type(input: &mut Input) -> Result { let mut res = Vec::new(); - if input.peek()? == &Sep(GenericLeftBracket) { - input.next()?; + if input.peek(Sep(GenericLeftBracket)) { res.push(parse_enum_variant(input)?); - while input.peek()? == &Sep(Comma) { - input.next()?; + while input.peek(Sep(Comma)) { res.push(parse_enum_variant(input)?); } input.then(Sep(GenericRightBracket))?; @@ -529,8 +513,7 @@ fn parse_enum_type(input: &mut Input) -> Result { fn parse_enum_variant(input: &mut Input) -> Result<(Id, Option)> { let id = parse_ident(input)?; let mut kind = None; - if input.peek()? == &Sep(Equals) { - input.next()?; + if input.peek(Sep(Equals)) { kind = Some(parse_pattern(input)?); } Ok((id, kind)) @@ -539,11 +522,9 @@ fn parse_enum_variant(input: &mut Input) -> Result<(Id, Option)> { /// `UnionType ::= 'union' ('[' Ident (':' Type)? (',' Ident (':' Type)?)* ']')?` fn parse_union_type(input: &mut Input) -> Result { let mut res = Vec::new(); - if input.peek()? == &Sep(GenericLeftBracket) { - input.next()?; + if input.peek(Sep(GenericLeftBracket)) { res.push(parse_union_variant(input)?); - while input.peek()? == &Sep(Comma) { - input.next()?; + while input.peek(Sep(Comma)) { res.push(parse_union_variant(input)?); } input.then(Sep(GenericRightBracket))?; @@ -554,8 +535,7 @@ fn parse_union_type(input: &mut Input) -> Result { fn parse_union_variant(input: &mut Input) -> Result<(Id, Box)> { let id = parse_ident(input)?; let mut kind = Box::new(Type::Alias { id: "unit".to_string(), generics: Vec::new() }); - if input.peek()? == &Sep(Colon) { - input.next()?; + if input.peek(Sep(Colon)) { kind = Box::new(parse_type(input)?); } Ok((id, kind)) @@ -564,11 +544,9 @@ fn parse_union_variant(input: &mut Input) -> Result<(Id, Box)> { /// `Interface ::= 'interface' ('[' Signature (',' Signature)* ']')?` fn parse_interface(input: &mut Input) -> Result { let mut res = Vec::new(); - if input.peek()? == &Sep(GenericLeftBracket) { - input.next()?; + if input.peek(Sep(GenericLeftBracket)) { res.push(parse_signature(input)?); - while input.peek()? == &Sep(Comma) { - input.next()?; + while input.peek(Sep(Comma)) { res.push(parse_signature(input)?); } input.then(Sep(GenericRightBracket))?; @@ -581,24 +559,20 @@ fn parse_signature(input: &mut Input) -> Result { let effect = None; let id = parse_ident(input)?; let mut generics = Vec::new(); - if input.peek()? == &Sep(GenericLeftBracket) { - input.next()?; + if input.peek(Sep(GenericLeftBracket)) { generics = parse_parameters(input)?; input.then(Sep(GenericRightBracket))?; } let mut parameters = Vec::new(); - if input.peek()? == &Sep(FuncLeftParen) { - input.next()?; + if input.peek(Sep(FuncLeftParen)) { parameters.push(parse_type(input)?); - if input.peek()? == &Sep(Comma) { - input.next()?; + if input.peek(Sep(Comma)) { parameters.push(parse_type(input)?); } input.then(Sep(FuncRightParen))?; } let mut kind = None; - if input.peek()? == &Sep(Colon) { - input.next()?; + if input.peek(Sep(Colon)) { kind = Some(parse_type(input)?); } Ok(Sig { effect, id, generics, parameters, kind }) @@ -606,7 +580,7 @@ fn parse_signature(input: &mut Input) -> Result { /// `WrappedType ::= Type | ('[' Type ']')` fn parse_wrapped_type(input: &mut Input) -> Result { - if input.next()? == Sep(GenericLeftBracket) { + if input.peek(Sep(GenericLeftBracket)) { let result = parse_type(input)?; input.then(Sep(GenericRightBracket))?; Ok(result) @@ -618,9 +592,6 @@ fn parse_wrapped_type(input: &mut Input) -> Result { /// Pattern ::= Literal | Ident | '(' Pattern (',' Pattern)* ')' | Ident '(' Pattern (',' Pattern)* ')' fn parse_pattern(input: &mut Input) -> Result { todo!() } -/// Literal ::= Char | String | Number | Float -fn parse_literal(input: &mut Input) -> Result { todo!() } - fn parse_ident(input: &mut Input) -> Result { match input.next()? { Word(id) => Ok(id), diff --git a/src/main.rs b/src/main.rs index d7c995c..ad8b5cf 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,4 +3,6 @@ mod frontend; +pub type Result = core::result::Result>; + fn main() {} -- cgit v1.2.3-70-g09d2