aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJJ2023-11-26 05:07:23 +0000
committerJJ2023-11-26 05:08:39 +0000
commit0121346e894fb0e7f60312b16986d82109e4d86b (patch)
treee9939bcb02e1a331f687b4b89e155e2c97cf7b1f
parent3851012eeb1420bef0db7cd3e9a76affdb6145b9 (diff)
compiler: preemptively fix EOF bugs in parser
-rw-r--r--src/frontend/lex.rs3
-rw-r--r--src/frontend/parse.rs215
-rw-r--r--src/main.rs2
3 files changed, 97 insertions, 123 deletions
diff --git a/src/frontend/lex.rs b/src/frontend/lex.rs
index 67c4ae3..f751c63 100644
--- a/src/frontend/lex.rs
+++ b/src/frontend/lex.rs
@@ -1,6 +1,7 @@
use multipeek::multipeek;
-pub type Result<T> = core::result::Result<T, Box<dyn std::error::Error>>;
+use crate::*;
+
pub struct TokenStream(Vec<Token>);
impl IntoIterator for TokenStream {
diff --git a/src/frontend/parse.rs b/src/frontend/parse.rs
index eee3911..8616346 100644
--- a/src/frontend/parse.rs
+++ b/src/frontend/parse.rs
@@ -1,9 +1,10 @@
use multipeek::multipeek;
-use crate::frontend::lex::*;
-use crate::frontend::ast::*;
-use crate::frontend::ast::Binding::*;
-use crate::frontend::ast::Control::*;
+use crate::*;
+use super::lex::*;
+use super::ast::*;
+use super::ast::Binding::*;
+use super::ast::Control::*;
use Token::*;
use Literal::*;
use Punctuation::*;
@@ -16,13 +17,24 @@ impl Input {
self.0.next().ok_or("end of input".into())
}
- /// Map input.peek() to return Results for use with the propagation operator
- fn peek(&mut self) -> Result<&Token> {
- self.0.peek().ok_or("end of input".into())
+ /// Check if the next character is an expected token, and if so, advance the iterator and return true
+ fn peek(&mut self, expected: Token) -> bool {
+ if self.0.peek() == Some(&expected) {
+ self.0.next();
+ true
+ } else {
+ false
+ }
+ }
+
+ /// Expose input.peek() (we don't want EOF to trigger an error when peeking)
+ fn peek_opt(&mut self) -> Option<&Token> {
+ self.0.peek()
}
- fn peek_nth(&mut self, n: usize) -> Result<&Token> {
- self.0.peek_nth(n).ok_or("end of input".into())
+ /// Expose input.peek_nth()
+ fn peek_nth(&mut self, n: usize) -> Option<&Token> {
+ self.0.peek_nth(n)
}
/// Asserts the next character to be a known token
@@ -32,12 +44,20 @@ impl Input {
token => Err(format!("expected token {} but found {}", expected, token).into())
}
}
+
+ /// Allow usage of `len`
+ fn len(&self) -> usize {
+ self.0.len()
+ }
}
/// Convert a basic TokenStream into an AbstractSyntaxTree
pub fn astify(input: TokenStream, name: &str) -> Result<Expr> {
let mut input = Input(multipeek(input));
let body = parse_body(&mut input)?;
+ if input.len() > 0 {
+ return Err(format!("additional tokens remaining after the body!").into());
+ }
Ok(Expr::Binding(Module{ id: name.to_string(), body }))
}
@@ -45,18 +65,16 @@ pub fn astify(input: TokenStream, name: &str) -> Result<Expr> {
/// Body ::= Expr | ('{' Expr (';' Expr)* '}')
fn parse_body(input: &mut Input) -> Result<Vec<Expr>> {
let mut res = Vec::new();
- if input.peek()? != &Sep(ScopeLeftBrace) {
+ if !input.peek(Sep(ScopeLeftBrace)) {
res.push(parse_expr(input)?);
return Ok(res);
}
- input.next()?;
- while input.peek()? != &Sep(ScopeRightBrace) {
+ while !input.peek(Sep(ScopeRightBrace)) {
res.push(parse_expr(input)?);
- if input.peek()? == &Sep(Semicolon) {
- input.next()?;
- }
+ // consume semicolons. there doesn't *have* to be a semicolon though.
+ // this should probably be checked to be a semicolon or a right brace.
+ input.peek(Sep(Semicolon));
}
- input.next()?;
Ok(res)
}
@@ -64,6 +82,8 @@ fn parse_body(input: &mut Input) -> Result<Vec<Expr>> {
/// Block | Static | For | While | Loop | If | When | Try | Match
fn parse_expr(input: &mut Input) -> Result<Expr> {
use Keyword::*;
+ // Note that this match consumes, as peeking is redundant.
+ // This is why subsequent functions do not check for their leading keyword, i.e. 'let'
match input.next()? {
Key(word) => match word {
Pub => {
@@ -116,7 +136,7 @@ fn parse_var(input: &mut Input) -> Result<Expr> {
let id = parse_pattern(input)?;
let kind = parse_annotation(input)?;
let mut value = None;
- if input.next()? != Sep(Equals) {
+ if input.peek(Sep(Equals)) {
value = Some(Box::new(parse_expr(input)?));
}
Ok(Expr::Binding(Var { id, kind, value }))
@@ -131,29 +151,26 @@ fn parse_const(input: &mut Input, public: bool) -> Result<Expr> {
Ok(Expr::Binding(Const { public, id, kind, value }))
}
-/// Annotation ::= (':' TypeDesc)?
+/// Annotation ::= (':' Type)?
fn parse_annotation(input: &mut Input) -> Result<Option<Type>> {
- let mut kind = None;
- if input.peek()? == &Sep(Colon) {
- input.next()?;
- kind = Some(parse_type(input)?);
+ if input.peek(Sep(Colon)) {
+ Ok(Some(parse_type(input)?))
+ } else {
+ Ok(None)
}
- Ok(kind)
}
-/// `Func ::= 'pub'? 'func' Ident ('[' Parameters ']')? ('(' Parameters ')')? (':' TypeDesc) '=' Body`
+/// `Func ::= 'pub'? 'func' Ident ('[' Parameters ']')? ('(' Parameters ')')? Annotation '=' Body`
fn parse_func(input: &mut Input, public: bool) -> Result<Expr> {
let effect = None;
let id = parse_ident(input)?;
let mut generics = Vec::new();
- if input.peek()? == &Sep(GenericLeftBracket) {
- input.next()?;
+ if input.peek(Sep(GenericLeftBracket)) {
generics = parse_parameters(input)?;
input.then(Sep(GenericRightBracket))?;
}
let mut parameters = Vec::new();
- if input.peek()? == &Sep(FuncLeftParen) {
- input.next()?; // todo: rewrite to map over an input
+ if input.peek(Sep(FuncLeftParen)) { // todo: rewrite to map over an input
// let temp_parameters = parse_parameters(input)?;
// if temp_parameters.last().is_none() {
// return Err("expected a type annotation on the last function parameter".into());
@@ -166,8 +183,7 @@ fn parse_func(input: &mut Input, public: bool) -> Result<Expr> {
} else {
stack.push(id);
}
- while input.peek()? == &Sep(Comma) {
- input.next()?;
+ while input.peek(Sep(Comma)) {
let (id, kind) = parse_parameter(input)?;
stack.push(id);
if kind.is_some() {
@@ -183,8 +199,7 @@ fn parse_func(input: &mut Input, public: bool) -> Result<Expr> {
input.then(Sep(FuncRightParen))?;
}
let mut kind = Type::Void;
- if input.peek()? == &Sep(Colon) {
- input.next()?;
+ if input.peek(Sep(Colon)) {
kind = parse_type(input)?;
}
input.then(Sep(Equals))?;
@@ -192,24 +207,21 @@ fn parse_func(input: &mut Input, public: bool) -> Result<Expr> {
Ok(Expr::Binding(Func { public, effect, id, generics, parameters, kind, body }))
}
-/// `Macro ::= 'pub'? 'macro' Ident ('[' Paremeters ']')? ('(' Paremeters ')')? (':' TypeDesc) '=' Body`
+/// `Macro ::= 'pub'? 'macro' Ident ('[' Parameters ']')? ('(' Parameters ')')? (':' Type) '=' Body`
fn parse_macro(input: &mut Input, public: bool) -> Result<Expr> {
let id = parse_ident(input)?;
let mut generics = Vec::new();
- if input.peek()? == &Sep(GenericLeftBracket) {
- input.next()?;
+ if input.peek(Sep(GenericLeftBracket)) {
generics = parse_parameters(input)?;
input.then(Sep(GenericRightBracket))?;
}
let mut parameters = Vec::new();
- if input.peek()? == &Sep(FuncLeftParen) {
- input.next()?;
+ if input.peek(Sep(FuncLeftParen)) {
parameters = parse_parameters(input)?;
input.then(Sep(FuncRightParen))?;
}
let mut kind = None;
- if input.peek()? == &Sep(Colon) {
- input.next()?;
+ if input.peek(Sep(Colon)) {
kind = Some(parse_type(input)?);
}
input.then(Sep(Equals))?;
@@ -221,8 +233,7 @@ fn parse_macro(input: &mut Input, public: bool) -> Result<Expr> {
fn parse_typedecl(input: &mut Input, public: bool) -> Result<Expr> {
let id = parse_ident(input)?;
let mut generics = Vec::new();
- if input.peek()? == &Sep(GenericLeftBracket) {
- input.next()?;
+ if input.peek(Sep(GenericLeftBracket)) {
generics = parse_parameters(input)?;
input.then(Sep(GenericRightBracket))?;
}
@@ -235,8 +246,7 @@ fn parse_typedecl(input: &mut Input, public: bool) -> Result<Expr> {
fn parse_parameter(input: &mut Input) -> Result<(Id, Option<Type>)> {
let id = parse_ident(input)?;
let mut kind = None;
- if input.peek()? == &Sep(Colon) {
- input.next()?;
+ if input.peek(Sep(Colon)) {
kind = Some(parse_type(input)?);
}
Ok((id, kind))
@@ -246,8 +256,7 @@ fn parse_parameter(input: &mut Input) -> Result<(Id, Option<Type>)> {
fn parse_parameters(input: &mut Input) -> Result<Vec<(Id, Option<Type>)>> {
let mut res = Vec::new();
res.push(parse_parameter(input)?);
- while input.peek()? == &Sep(Comma) {
- input.next()?;
+ while input.peek(Sep(Comma)) {
res.push(parse_parameter(input)?);
}
Ok(res)
@@ -255,18 +264,10 @@ fn parse_parameters(input: &mut Input) -> Result<Vec<(Id, Option<Type>)>> {
/// `Mod ::= 'pub'? 'mod' Ident ':' Body`
fn parse_mod(input: &mut Input, public: bool) -> Result<Expr> {
- match input.next()? {
- Word(id) => {
- match input.next()? {
- Sep(Colon) => {
- let body = parse_body(input)?;
- Ok(Expr::Binding(Module { id, body }))
- },
- _ => return Err("unexpected token following mod label".into()),
- }
- },
- _ => return Err("unexpected thing following mod keyword".into()),
- }
+ let id = parse_ident(input)?;
+ input.then(Sep(Colon))?;
+ let body = parse_body(input)?;
+ Ok(Expr::Binding(Module { id, body }))
}
/// `Use ::= 'use' Ident ('/' Ident)* ('/' (('[' Ident (',' Ident)* ']') | '*'))?`
@@ -326,13 +327,11 @@ fn parse_loop(input: &mut Input) -> Result<Expr> {
fn parse_if(input: &mut Input) -> Result<Expr> {
let mut branches = Vec::new();
branches.push(parse_cond_branch(input)?);
- while input.peek()? == &Key(Keyword::Elif) {
- input.next()?;
+ while input.peek(Key(Keyword::Elif)) {
branches.push(parse_cond_branch(input)?);
}
let mut else_body = None;
- if input.peek()? == &Key(Keyword::Else) {
- input.next()?;
+ if input.peek(Key(Keyword::Else)) {
else_body = Some(parse_body(input)?);
}
Ok(Expr::Control(If { branches, else_body }))
@@ -342,13 +341,11 @@ fn parse_if(input: &mut Input) -> Result<Expr> {
fn parse_when(input: &mut Input) -> Result<Expr> {
let mut branches = Vec::new();
branches.push(parse_cond_branch(input)?);
- while input.peek()? == &Key(Keyword::Elif) {
- input.next()?;
+ while input.peek(Key(Keyword::Elif)) {
branches.push(parse_cond_branch(input)?);
}
let mut else_body = None;
- if input.peek()? == &Key(Keyword::Else) {
- input.next()?;
+ if input.peek(Key(Keyword::Else)) {
input.then(Sep(Colon))?;
else_body = Some(parse_body(input)?);
}
@@ -365,17 +362,15 @@ fn parse_cond_branch(input: &mut Input) -> Result<CondBranch> {
Ok(CondBranch { cond, body })
}
-/// `Try ::= 'try' ':' Body ('except' Exception (',' Exception)* ':' Body) ('finally' ':' Body)?`
+/// `Try ::= 'try' ':' Body ('except' Exception (',' Exception)* ':' Body)* ('finally' ':' Body)?`
fn parse_try(input: &mut Input) -> Result<Expr> {
input.then(Sep(Colon))?;
let body = parse_body(input)?;
let mut catches = Vec::new();
- while input.peek()? == &Key(Keyword::Catch) {
- input.next()?;
+ while input.peek(Key(Keyword::Catch)) {
let mut exceptions = Vec::new();
exceptions.push(parse_catch_exception(input)?);
- while input.peek()? == &Sep(Comma) {
- input.next()?;
+ while input.peek(Sep(Comma)) {
exceptions.push(parse_catch_exception(input)?);
}
input.then(Sep(Colon))?;
@@ -383,8 +378,7 @@ fn parse_try(input: &mut Input) -> Result<Expr> {
catches.push(CatchBranch { exceptions, body });
}
let mut finally = None;
- if input.peek()? == &Key(Keyword::Finally) {
- input.next()?;
+ if input.peek(Key(Keyword::Finally)) {
input.then(Sep(Colon))?;
finally = Some(parse_body(input)?);
}
@@ -395,8 +389,7 @@ fn parse_try(input: &mut Input) -> Result<Expr> {
fn parse_catch_exception(input: &mut Input) -> Result<(Id, Option<Id>)> {
let id = parse_ident(input)?;
let mut alias = None;
- if input.peek()? == &Key(Keyword::As) {
- input.next()?;
+ if input.peek(Key(Keyword::As)) {
alias = Some(parse_ident(input)?);
}
Ok((id, alias))
@@ -406,17 +399,14 @@ fn parse_catch_exception(input: &mut Input) -> Result<(Id, Option<Id>)> {
fn parse_match(input: &mut Input) -> Result<Expr> {
let item = parse_pattern(input)?; // fixme
let mut branches = Vec::new();
- while input.peek()? == &Key(Keyword::Of) {
- input.next()?;
+ while input.peek(Key(Keyword::Of)) {
let mut patterns = Vec::new();
patterns.push(parse_pattern(input)?);
- while input.peek()? == &Sep(Comma) {
- input.next()?;
+ while input.peek(Sep(Comma)) {
patterns.push(parse_pattern(input)?);
}
let mut guard = None;
- if input.peek()? == &Key(Keyword::Where) {
- input.next()?;
+ if input.peek(Key(Keyword::Where)) {
guard = Some(parse_expr(input)?)
}
input.then(Sep(Colon))?;
@@ -449,11 +439,9 @@ fn parse_type(input: &mut Input) -> Result<Type> {
},
Word(id) => {
let mut generics = Vec::new();
- if input.peek()? == &Sep(GenericLeftBracket) {
- input.next()?;
+ if input.peek(Sep(GenericLeftBracket)) {
generics.push(parse_type(input)?);
- while input.peek()? == &Sep(Comma) {
- input.next()?;
+ while input.peek(Sep(Comma)) {
generics.push(parse_type(input)?);
}
input.then(Sep(GenericRightBracket))?;
@@ -467,11 +455,9 @@ fn parse_type(input: &mut Input) -> Result<Type> {
/// `StructType ::= 'struct' ('[' Ident ':' Type (',' Ident ':' Type)* ']')?`
fn parse_struct_type(input: &mut Input) -> Result<Type> {
let mut res = Vec::new();
- if input.peek()? == &Sep(GenericLeftBracket) {
- input.next()?;
+ if input.peek(Sep(GenericLeftBracket)) {
res.push(parse_struct_field(input)?);
- while input.peek()? == &Sep(Comma) {
- input.next()?;
+ while input.peek(Sep(Comma)) {
res.push(parse_struct_field(input)?);
}
input.then(Sep(GenericRightBracket))?;
@@ -489,11 +475,9 @@ fn parse_struct_field(input: &mut Input) -> Result<(Id, Box<Type>)> {
/// `TupleType ::= 'tuple' ('[' (Ident ':')? Type (',' (Ident ':')? Type)* ']')?`
fn parse_tuple_type(input: &mut Input) -> Result<Type> {
let mut res = Vec::new();
- if input.peek()? == &Sep(GenericLeftBracket) {
- input.next()?;
+ if input.peek(Sep(GenericLeftBracket)) {
res.push(parse_tuple_field(input)?);
- while input.peek()? == &Sep(Comma) {
- input.next()?;
+ while input.peek(Sep(Comma)) {
res.push(parse_tuple_field(input)?);
}
input.then(Sep(GenericRightBracket))?;
@@ -501,10 +485,12 @@ fn parse_tuple_type(input: &mut Input) -> Result<Type> {
Ok(Type::Tuple(res))
}
+// annoyingly complex to parse. `TupleField ::= (Ident ':')? Type`
fn parse_tuple_field(input: &mut Input) -> Result<(Option<Id>, Box<Type>)> {
- match input.peek()?.clone() { // huh???
- Word(id) if input.peek_nth(1)? == &Sep(Colon) => {
+ match input.peek_opt().clone() {
+ Some(Word(id)) if input.peek_nth(1) == Some(&Sep(Colon)) => {
input.next()?;
+ input.then(Sep(Colon))?;
Ok((Some(id.to_string()), Box::new(parse_type(input)?)))
},
_ => Ok((None, Box::new(parse_type(input)?)))
@@ -514,11 +500,9 @@ fn parse_tuple_field(input: &mut Input) -> Result<(Option<Id>, Box<Type>)> {
/// `EnumType ::= 'enum' ('[' Ident ('=' Pattern)? (Ident ('=' Pattern)?)* ']')?`
fn parse_enum_type(input: &mut Input) -> Result<Type> {
let mut res = Vec::new();
- if input.peek()? == &Sep(GenericLeftBracket) {
- input.next()?;
+ if input.peek(Sep(GenericLeftBracket)) {
res.push(parse_enum_variant(input)?);
- while input.peek()? == &Sep(Comma) {
- input.next()?;
+ while input.peek(Sep(Comma)) {
res.push(parse_enum_variant(input)?);
}
input.then(Sep(GenericRightBracket))?;
@@ -529,8 +513,7 @@ fn parse_enum_type(input: &mut Input) -> Result<Type> {
fn parse_enum_variant(input: &mut Input) -> Result<(Id, Option<Pattern>)> {
let id = parse_ident(input)?;
let mut kind = None;
- if input.peek()? == &Sep(Equals) {
- input.next()?;
+ if input.peek(Sep(Equals)) {
kind = Some(parse_pattern(input)?);
}
Ok((id, kind))
@@ -539,11 +522,9 @@ fn parse_enum_variant(input: &mut Input) -> Result<(Id, Option<Pattern>)> {
/// `UnionType ::= 'union' ('[' Ident (':' Type)? (',' Ident (':' Type)?)* ']')?`
fn parse_union_type(input: &mut Input) -> Result<Type> {
let mut res = Vec::new();
- if input.peek()? == &Sep(GenericLeftBracket) {
- input.next()?;
+ if input.peek(Sep(GenericLeftBracket)) {
res.push(parse_union_variant(input)?);
- while input.peek()? == &Sep(Comma) {
- input.next()?;
+ while input.peek(Sep(Comma)) {
res.push(parse_union_variant(input)?);
}
input.then(Sep(GenericRightBracket))?;
@@ -554,8 +535,7 @@ fn parse_union_type(input: &mut Input) -> Result<Type> {
fn parse_union_variant(input: &mut Input) -> Result<(Id, Box<Type>)> {
let id = parse_ident(input)?;
let mut kind = Box::new(Type::Alias { id: "unit".to_string(), generics: Vec::new() });
- if input.peek()? == &Sep(Colon) {
- input.next()?;
+ if input.peek(Sep(Colon)) {
kind = Box::new(parse_type(input)?);
}
Ok((id, kind))
@@ -564,11 +544,9 @@ fn parse_union_variant(input: &mut Input) -> Result<(Id, Box<Type>)> {
/// `Interface ::= 'interface' ('[' Signature (',' Signature)* ']')?`
fn parse_interface(input: &mut Input) -> Result<Type> {
let mut res = Vec::new();
- if input.peek()? == &Sep(GenericLeftBracket) {
- input.next()?;
+ if input.peek(Sep(GenericLeftBracket)) {
res.push(parse_signature(input)?);
- while input.peek()? == &Sep(Comma) {
- input.next()?;
+ while input.peek(Sep(Comma)) {
res.push(parse_signature(input)?);
}
input.then(Sep(GenericRightBracket))?;
@@ -581,24 +559,20 @@ fn parse_signature(input: &mut Input) -> Result<Sig> {
let effect = None;
let id = parse_ident(input)?;
let mut generics = Vec::new();
- if input.peek()? == &Sep(GenericLeftBracket) {
- input.next()?;
+ if input.peek(Sep(GenericLeftBracket)) {
generics = parse_parameters(input)?;
input.then(Sep(GenericRightBracket))?;
}
let mut parameters = Vec::new();
- if input.peek()? == &Sep(FuncLeftParen) {
- input.next()?;
+ if input.peek(Sep(FuncLeftParen)) {
parameters.push(parse_type(input)?);
- if input.peek()? == &Sep(Comma) {
- input.next()?;
+ if input.peek(Sep(Comma)) {
parameters.push(parse_type(input)?);
}
input.then(Sep(FuncRightParen))?;
}
let mut kind = None;
- if input.peek()? == &Sep(Colon) {
- input.next()?;
+ if input.peek(Sep(Colon)) {
kind = Some(parse_type(input)?);
}
Ok(Sig { effect, id, generics, parameters, kind })
@@ -606,7 +580,7 @@ fn parse_signature(input: &mut Input) -> Result<Sig> {
/// `WrappedType ::= Type | ('[' Type ']')`
fn parse_wrapped_type(input: &mut Input) -> Result<Type> {
- if input.next()? == Sep(GenericLeftBracket) {
+ if input.peek(Sep(GenericLeftBracket)) {
let result = parse_type(input)?;
input.then(Sep(GenericRightBracket))?;
Ok(result)
@@ -618,9 +592,6 @@ fn parse_wrapped_type(input: &mut Input) -> Result<Type> {
/// Pattern ::= Literal | Ident | '(' Pattern (',' Pattern)* ')' | Ident '(' Pattern (',' Pattern)* ')'
fn parse_pattern(input: &mut Input) -> Result<Pattern> { todo!() }
-/// Literal ::= Char | String | Number | Float
-fn parse_literal(input: &mut Input) -> Result<Pattern> { todo!() }
-
fn parse_ident(input: &mut Input) -> Result<Id> {
match input.next()? {
Word(id) => Ok(id),
diff --git a/src/main.rs b/src/main.rs
index d7c995c..ad8b5cf 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -3,4 +3,6 @@
mod frontend;
+pub type Result<T> = core::result::Result<T, Box<dyn std::error::Error>>;
+
fn main() {}