aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJJ2023-10-31 07:47:54 +0000
committerJJ2023-10-31 07:47:54 +0000
commit87d74952a614daa7075aeecef462ff51c4dc46e0 (patch)
tree3ab7269e32dbbbfc2adbf7061e886159cb90f968
parentf3efbfc03fabae11df0554e67769327d4dc57a83 (diff)
compiler: implement most of the parser
-rw-r--r--src/ast.rs5
-rw-r--r--src/lex.rs107
-rw-r--r--src/parse.rs322
3 files changed, 367 insertions, 67 deletions
diff --git a/src/ast.rs b/src/ast.rs
index e55b473..6c7963e 100644
--- a/src/ast.rs
+++ b/src/ast.rs
@@ -59,6 +59,7 @@ pub enum Binding {
value: Option<Box<Expr>> // variable bindings can be delayed
},
Const {
+ public: bool,
id: Pattern,
kind: Option<Type>,
value: Box<Expr>
@@ -83,7 +84,7 @@ pub struct ParamDecl { id: Id, kind: Type }
/// Expressions related to control flow.
pub enum Control {
Call { id: Id, params: Vec<Expr> }, // function calls, macro invocations, field access...
- Cond {
+ If {
branches: Vec<CondBranch>,
else_body: Option<Vec<Expr>>
},
@@ -93,7 +94,7 @@ pub enum Control {
finally: Option<Vec<Expr>>
},
Match {
- item: Box<Expr>,
+ item: Pattern,
branches: Vec<MatchBranch>
},
Block { id: Option<Id>, body: Vec<Expr> },
diff --git a/src/lex.rs b/src/lex.rs
index 396c06d..771ba38 100644
--- a/src/lex.rs
+++ b/src/lex.rs
@@ -30,7 +30,8 @@ impl std::error::Error for LexicalError {}
/// **Basic** syntax tokens. Form an unambiguous TokenStream.
#[derive(Clone, PartialEq)]
pub enum Token {
- Word(String), // identifiers.
+ Key(Keyword), // keyword identifiers.
+ Word(String), // non-keyword identifiers.
Num(String), // numeric value, ex. 413, 0b101011, 0xabcd
Lit(Literal), // literal value, ex. for strings/comments.
Sep(Punctuation), // punctuation. non-word tokens. operators are lexed as this and later transformed to words.
@@ -47,6 +48,23 @@ pub enum Literal {
MultiLineComment(String),
}
+/// Keywords, made explicit for easier use with Rust.
+/// (strings inside match patterns are fucky!!)
+#[derive(Clone, PartialEq)]
+pub enum Keyword {
+ Pub, Let, Var, Const,
+ Func, Macro, Type,
+ Mod, From, Import,
+ For, While, Loop,
+ Block, Static,
+ If, When, Elif, Else, Match,
+ Try, Catch, Finally,
+ Struct, Tuple, Enum, Union, Interface,
+ Distinct, Ref, // todo: Mut once figured out
+ Break, Continue, Return,
+ In, Is, Of, As,
+}
+
/// All punctuation recognized by the lexer.
/// Note the distinction between FuncLeftParen and TupleLeftParen.
#[derive(Clone, PartialEq)]
@@ -243,7 +261,47 @@ pub fn tokenize(input: &str) -> Result<TokenStream> {
input.next();
},
_ => {
- res.push(Word(String::from(&buf)));
+ use Keyword::*;
+ match buf.as_str() { // keywords!
+ "pub" => res.push(Key(Pub)),
+ "let" => res.push(Key(Let)),
+ "var" => res.push(Key(Var)),
+ "const" => res.push(Key(Const)),
+ "func" => res.push(Key(Func)),
+ "macro" => res.push(Key(Macro)),
+ "type" => res.push(Key(Type)),
+ "mod" => res.push(Key(Mod)),
+ "from" => res.push(Key(From)),
+ "import" => res.push(Key(Import)),
+ "for" => res.push(Key(For)),
+ "while" => res.push(Key(While)),
+ "loop" => res.push(Key(Loop)),
+ "block" => res.push(Key(Block)),
+ "static" => res.push(Key(Static)),
+ "if" => res.push(Key(If)),
+ "when" => res.push(Key(When)),
+ "elif" => res.push(Key(Elif)),
+ "else" => res.push(Key(Else)),
+ "match" => res.push(Key(Match)),
+ "try" => res.push(Key(Try)),
+ "catch" => res.push(Key(Catch)),
+ "finally" => res.push(Key(Finally)),
+ "struct" => res.push(Key(Struct)),
+ "tuple" => res.push(Key(Tuple)),
+ "enum" => res.push(Key(Enum)),
+ "union" => res.push(Key(Union)),
+ "interface" => res.push(Key(Interface)),
+ "distinct" => res.push(Key(Distinct)),
+ "ref" => res.push(Key(Ref)),
+ "break" => res.push(Key(Break)),
+ "continue" => res.push(Key(Continue)),
+ "return" => res.push(Key(Return)),
+ "in" => res.push(Key(In)),
+ "is" => res.push(Key(Is)),
+ "of" => res.push(Key(Of)),
+ "as" => res.push(Key(As)),
+ _ => res.push(Word(String::from(&buf)))
+ }
match x { // () and [] denote both parameters/generics and tuples/arrays
'(' => { // we must disambiguate by treating those *directly* after words as such
res.push(Sep(FuncLeftParen));
@@ -372,6 +430,7 @@ impl std::fmt::Display for Token {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
use Token::*;
match self {
+ Key(word) => write!(f, "{}", word),
Word(val) => write!(f, "{}", val),
Num(val) => write!(f, "{}", val),
Lit(lit) => write!(f, "{}", lit),
@@ -395,6 +454,50 @@ impl std::fmt::Display for Literal {
}
}
+impl std::fmt::Display for Keyword {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ use Keyword::*;
+ match self {
+ Pub => write!(f, "pub"),
+ Let => write!(f, "let"),
+ Var => write!(f, "var"),
+ Const => write!(f, "const"),
+ Func => write!(f, "func"),
+ Macro => write!(f, "macro"),
+ Type => write!(f, "type"),
+ Mod => write!(f, "mod"),
+ From => write!(f, "from"),
+ Import => write!(f, "import"),
+ For => write!(f, "for"),
+ While => write!(f, "while"),
+ Loop => write!(f, "loop"),
+ Block => write!(f, "block"),
+ Static => write!(f, "static"),
+ If => write!(f, "if"),
+ When => write!(f, "when"),
+ Elif => write!(f, "elif"),
+ Else => write!(f, "else"),
+ Match => write!(f, "match"),
+ Try => write!(f, "try"),
+ Catch => write!(f, "catch"),
+ Finally => write!(f, "finally"),
+ Struct => write!(f, "struct"),
+ Tuple => write!(f, "tuple"),
+ Enum => write!(f, "enum"),
+ Union => write!(f, "union"),
+ Interface => write!(f, "interface"),
+ Distinct => write!(f, "distinct"),
+ Ref => write!(f, "ref"),
+ Break => write!(f, "break"),
+ Continue => write!(f, "continue"),
+ Return => write!(f, "return"),
+ In => write!(f, "in"),
+ Is => write!(f, "is"),
+ Of => write!(f, "of"),
+ As => write!(f, "as"),
+ }
+ }
+}
impl std::fmt::Display for Punctuation {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
use Punctuation::*;
diff --git a/src/parse.rs b/src/parse.rs
index 9a60863..1dabd47 100644
--- a/src/parse.rs
+++ b/src/parse.rs
@@ -1,101 +1,297 @@
-use multipeek::*;
+use std::fmt;
+
use crate::lex::*;
-use crate::ast::Expr;
+use crate::ast::*;
use crate::ast::Binding::*;
use crate::ast::Control::*;
use crate::ast::Pattern::*;
+use Token::*;
+use Literal::*;
+use Punctuation::*;
+
+type Input = std::iter::Peekable<std::vec::IntoIter<Token>>;
+
+#[derive(Clone, Copy)]
+struct State {
+ depth: usize,
+ step: usize
+}
+
+impl State {
+ fn indent(&self) -> State {
+ State { depth: self.depth + self.step, step: self.step }
+ }
+ fn dedent(&self) -> State {
+ State { depth: self.depth - self.step, step: self.step }
+ }
+}
/// Convert a basic TokenStream into an AbstractSyntaxTree
pub fn astify(input: TokenStream, name: &str) -> Result<Expr> {
- use Token::*;
- use Literal::*;
- use Punctuation::*;
+ let mut input = input.into_iter().peekable();
+ let body = parse_body(&mut input, State { depth: 0, step: 0 })?;
+ Ok(Expr::Binding(Module{ id: name.to_string(), body }))
+}
- let mut input = multipeek(input);
+/// Parse a series of Exprs, for ex. the body of a function.
+fn parse_body(input: &mut Input, state: State) -> Result<Vec<Expr>> {
let mut res = Vec::new();
- while let Some(x) = input.peek() {
- res.push(parse(&mut input, 0)?);
+ while input.peek() == Some(&Indent(state.depth)) {
+ input.next();
+ res.push(parse_expr(input, state)?);
}
- Ok(Expr::Binding(Module{ id: name.to_string(), body: res }))
+ Ok(res)
}
-fn parse(input: &mut MultiPeek<std::vec::IntoIter<Token>>, depth: usize) -> Result<Expr> {
- use Token::*;
- use Literal::*;
- use Punctuation::*;
- let mut input = input;
- match input.peek() {
- Some(Word(val)) => match val.as_str() {
- "pub" => {
- input.next();
- if let Some(Word(val)) = input.peek() {
- match val.as_str() {
- "const" => parse_const(&mut input, true),
- "func" => parse_func(&mut input, true),
- "type" => parse_type(&mut input, true),
- "mod" => parse_mod(&mut input, true),
+/// Expr ::= Let | Var | Const | Func | Type |
+/// Mod | Import | Block | Static |
+/// For | While | Loop | If | When | Try | Match
+fn parse_expr(input: &mut Input, state: State) -> Result<Expr> {
+ use Keyword::*;
+ match input.next() {
+ Some(Key(word)) => match word {
+ Pub => {
+ match input.next() {
+ Some(Key(word)) => match word {
+ Const => parse_const(input, state, true),
+ Func => parse_funcdecl(input, state, true),
+ Type => parse_typedecl(input, state, true),
+ Mod => parse_mod(input, state, true),
_ => return Err("unrecognized keyword following pub".into()),
}
- } else {
- return Err("unrecognized thing following pub".into());
+ Some(_) => return Err("unrecognized thing following pub".into()),
+ None => return Err("end of input".into()),
}
},
- "let" => parse_let(&mut input),
- "var" => parse_var(&mut input),
- "const" => parse_const(&mut input, false),
- "func" => parse_func(&mut input, false),
- "type" => parse_type(&mut input, false),
- "mod" => parse_mod(&mut input, false),
- "from" | "import" => parse_import(&mut input),
- "block" => parse_block(&mut input),
- "static" => parse_static(&mut input),
- "for" => parse_for(&mut input),
- "while" => parse_while(&mut input),
- "loop" => parse_loop(&mut input),
- "if" => parse_if(&mut input),
- "when" => parse_when(&mut input),
- "try" => parse_try(&mut input),
- "match" => parse_match(&mut input),
- _ => parse_line(&mut input),
+ Let => parse_let(input, state),
+ Var => parse_var(input, state),
+ Const => parse_const(input, state, false),
+ Func => parse_funcdecl(input, state, false),
+ Type => parse_typedecl(input, state, false),
+ Mod => parse_mod(input, state, false),
+ From => parse_import(input, state, true),
+ Import => parse_import(input, state, false),
+ Block => parse_block(input, state),
+ Static => parse_static(input, state),
+ For => parse_for(input, state),
+ While => parse_while(input, state),
+ Loop => parse_loop(input, state),
+ If => parse_if(input, state),
+ When => parse_when(input, state),
+ Try => parse_try(input, state),
+ Match => parse_match(input, state),
+ _ => return Err("invalid keyword starting expression".into()),
},
- _ => parse_line(&mut input),
+ _ => todo!(), // what can i do with this?? match line here
}
}
+/// Let ::= 'let' Pattern Annotation? '=' Expr
+fn parse_let(input: &mut Input, state: State) -> Result<Expr> {
+ let id = parse_pattern(input, state)?;
+ let mut kind = None;
+ if let Some(Sep(Colon)) = input.peek() {
+ input.next();
+ kind = Some(parse_typedesc(input, state)?);
+ }
+ if input.next() != Some(Sep(Equals)) {
+ return Err("= not following binding".into())
+ }
+ let value = Box::new(parse_expr(input, state)?);
+ Ok(Expr::Binding(Let { id, kind, value }))
+}
+/// Var ::= 'var' Pattern Annotation? ('=' Expr)?
+fn parse_var(input: &mut Input, state: State) -> Result<Expr> {
+ let id = parse_pattern(input, state)?;
+ let mut kind = None;
+ if let Some(Sep(Colon)) = input.peek() {
+ input.next();
+ kind = Some(parse_typedesc(input, state)?);
+ }
+ let mut value = None;
+ if input.next() != Some(Sep(Equals)) {
+ value = Some(Box::new(parse_expr(input, state)?));
+ }
+ Ok(Expr::Binding(Var { id, kind, value }))
+}
// Const ::= 'pub'? 'const' Pattern Annotation? '=' Expr
-fn parse_const(input: &mut MultiPeek<std::vec::IntoIter<Token>>, public: bool) -> Result<Expr> { todo!() }
+fn parse_const(input: &mut Input, state: State, public: bool) -> Result<Expr> {
+ let id = parse_pattern(input, state)?;
+ let mut kind = None;
+ if let Some(Sep(Colon)) = input.peek() {
+ input.next();
+ kind = Some(parse_typedesc(input, state)?);
+ }
+ if input.next() != Some(Sep(Equals)) {
+ return Err("= not following binding".into())
+ }
+ let value = Box::new(parse_expr(input, state)?);
+ Ok(Expr::Binding(Const { public, id, kind, value }))
+}
// Func ::= 'pub'? ('func' | 'proc') Ident Generics? Parameters? (':' TypeDesc) '=' Body
-fn parse_func(input: &mut MultiPeek<std::vec::IntoIter<Token>>, public: bool) -> Result<Expr> { todo!() }
+fn parse_funcdecl(input: &mut Input, state: State, public: bool) -> Result<Expr> { todo!() }
// TypeDecl ::= 'pub'? 'type' Pattern Generics? '=' 'distinct'? 'ref'? TypeDesc
-fn parse_type(input: &mut MultiPeek<std::vec::IntoIter<Token>>, public: bool) -> Result<Expr> { todo!() }
+fn parse_typedecl(input: &mut Input, state: State, public: bool) -> Result<Expr> {
+ let pattern = parse_pattern(input, state)?;
+ todo!()
+}
// Mod ::= 'pub'? 'mod' Ident ':' Body
-fn parse_mod(input: &mut MultiPeek<std::vec::IntoIter<Token>>, public: bool) -> Result<Expr> { todo!() }
+fn parse_mod(input: &mut Input, state: State, public: bool) -> Result<Expr> {
+ match input.next() {
+ Some(Word(id)) => {
+ match input.next() {
+ Some(Sep(Colon)) => {
+ let body = parse_body(input, state.indent())?;
+ Ok(Expr::Binding(Module { id, body }))
+ },
+ _ => return Err("unexpected token following mod label".into()),
+ }
+ },
+ _ => return Err("unexpected thing following mod keyword".into()),
+ }
+}
-// Let ::= 'let' Pattern Annotation? '=' Expr
-fn parse_let(input: &mut MultiPeek<std::vec::IntoIter<Token>>) -> Result<Expr> { todo!() }
-// Var ::= 'var' Pattern Annotation? ('=' Expr)?
-fn parse_var(input: &mut MultiPeek<std::vec::IntoIter<Token>>) -> Result<Expr> { todo!() }
// Import ::= ('from' Ident)? 'import' Ident (',' Ident)* ('as' Ident)?
-fn parse_import(input: &mut MultiPeek<std::vec::IntoIter<Token>>) -> Result<Expr> { todo!() }
+fn parse_import(input: &mut Input, state: State, from_scope: bool) -> Result<Expr> {
+ let mut from = None;
+ if from_scope {
+ match input.next() {
+ Some(Word(id)) => from = Some(id),
+ _ => return Err("identifier not following from keyword".into())
+ }
+ if input.next() != Some(Key(Keyword::Import)) {
+ return Err("expected import to follow from".into())
+ }
+ }
+ todo!()
+}
// Block ::= 'block' Ident? ':' Body
-fn parse_block(input: &mut MultiPeek<std::vec::IntoIter<Token>>) -> Result<Expr> { todo!() }
+fn parse_block(input: &mut Input, state: State) -> Result<Expr> { // todo: body + offset
+ match input.next() {
+ Some(Sep(Colon)) => {
+ let id = None;
+ let body = parse_body(input, state.indent())?;
+ Ok(Expr::Control(Block { id, body }))
+ },
+ Some(Word(label)) => {
+ match input.next() {
+ Some(Sep(Colon)) => {
+ let id = Some(label);
+ let body = parse_body(input, state.indent())?;
+ Ok(Expr::Control(Block { id, body }))
+ },
+ _ => return Err("unexpected token following block label".into()),
+ }
+ },
+ _ => return Err("unexpected thing following block keyword".into()),
+ }
+}
// Static ::= 'static' ':' Body
-fn parse_static(input: &mut MultiPeek<std::vec::IntoIter<Token>>) -> Result<Expr> { todo!() }
+fn parse_static(input: &mut Input, state: State) -> Result<Expr> {
+ if input.next() != Some(Sep(Colon)) {
+ return Err("colon must follow static invocation".into());
+ }
+ let body = parse_body(input, state.indent())?;
+ Ok(Expr::Control(Static { body }))
+}
+
// For ::= 'for' Pattern 'in' Expr ':' Body
-fn parse_for(input: &mut MultiPeek<std::vec::IntoIter<Token>>) -> Result<Expr> { todo!() }
+fn parse_for(input: &mut Input, state: State) -> Result<Expr> {
+ let binding = parse_pattern(input, state)?;
+ if input.next() != Some(Key(Keyword::In)) {
+ return Err("expected in keyword after for pattern".into());
+ }
+ let range = Box::new(parse_expr(input, state)?);
+ if input.next() != Some(Sep(Colon)) {
+ return Err("expected colon after in expression".into());
+ }
+ let body = parse_body(input, state.indent())?;
+ Ok(Expr::Control(For { binding, range, body }))
+}
// While ::= 'while' Expr ':' Body
-fn parse_while(input: &mut MultiPeek<std::vec::IntoIter<Token>>) -> Result<Expr> { todo!() }
+fn parse_while(input: &mut Input, state: State) -> Result<Expr> {
+ let cond = Box::new(parse_expr(input, state)?);
+ if input.next() != Some(Sep(Colon)) {
+ return Err("expected colon after while keyword".into());
+ }
+ let body = parse_body(input, state.indent())?;
+ Ok(Expr::Control(While { cond, body }))
+}
// Loop ::= 'loop' ':' Body
-fn parse_loop(input: &mut MultiPeek<std::vec::IntoIter<Token>>) -> Result<Expr> { todo!() }
+fn parse_loop(input: &mut Input, state: State) -> Result<Expr> {
+ if input.next() != Some(Sep(Colon)) {
+ return Err("expected colon after loop keyword".into());
+ }
+ let body = parse_body(input, state.indent())?;
+ Ok(Expr::Control(Loop { body }))
+}
+
// If ::= 'if' Expr ':' Body ('elif' Expr ':' Body)* ('else' ':' Body)?
-fn parse_if(input: &mut MultiPeek<std::vec::IntoIter<Token>>) -> Result<Expr> { todo!() }
+fn parse_if(input: &mut Input, state: State) -> Result<Expr> {
+ let mut branches = Vec::new();
+ branches.push(parse_cond_branch(input, state)?);
+ while input.peek() == Some(&Key(Keyword::Elif)) {
+ input.next();
+ branches.push(parse_cond_branch(input, state)?);
+ }
+ let mut else_body = None;
+ if input.peek() == Some(&Key(Keyword::Else)) {
+ input.next();
+ else_body = Some(parse_body(input, state.indent())?);
+ }
+ Ok(Expr::Control(If { branches, else_body }))
+}
// When ::= 'when' Expr ':' Body ('elif' Expr ':' Body)* ('else' ':' Body)?
-fn parse_when(input: &mut MultiPeek<std::vec::IntoIter<Token>>) -> Result<Expr> { todo!() }
+fn parse_when(input: &mut Input, state: State) -> Result<Expr> {
+ let mut branches = Vec::new();
+ branches.push(parse_cond_branch(input, state)?);
+ while input.peek() == Some(&Key(Keyword::Elif)) {
+ input.next();
+ branches.push(parse_cond_branch(input, state)?);
+ }
+ let mut else_body = None;
+ if input.peek() == Some(&Key(Keyword::Else)) {
+ input.next();
+ else_body = Some(parse_body(input, state.indent())?);
+ }
+ let mut body = Vec::new();
+ body.push(Expr::Control(If { branches, else_body }));
+ Ok(Expr::Control(Static { body }))
+}
// Try ::= 'try' ':' Body ('except' Ident (',' Ident)* ':' Body) ('finally' ':' Body)?
-fn parse_try(input: &mut MultiPeek<std::vec::IntoIter<Token>>) -> Result<Expr> { todo!() }
+fn parse_try(input: &mut Input, state: State) -> Result<Expr> {
+ if input.next() != Some(Sep(Colon)) {
+ return Err("expected colon after try keyword".into());
+ }
+ let body = parse_body(input, state.indent())?;
+ let catches = Vec::new();
+ while input.peek() == Some(&Key(Keyword::Catch)) {
+ input.next();
+ todo!();
+ }
+ let mut finally = None;
+ if input.peek() == Some(&Key(Keyword::Finally)) {
+ input.next();
+ if input.next() != Some(Sep(Colon)) {
+ return Err("expected colon after try keyword".into());
+ }
+ finally = Some(parse_body(input, state.indent())?);
+ }
+ Ok(Expr::Control(Try { body, catches, finally }))
+}
// Match ::= 'match' Expr ('of' Pattern (',' Pattern)* ('where' Expr)? ':' Body)+
-fn parse_match(input: &mut MultiPeek<std::vec::IntoIter<Token>>) -> Result<Expr> { todo!() }
+fn parse_match(input: &mut Input, state: State) -> Result<Expr> {
+ let item = parse_pattern(input, state)?;
+ let mut branches = Vec::new();
+ while input.peek() == Some(&Key(Keyword::Of)) {
+ input.next();
+ todo!();
+ }
+ Ok(Expr::Control(Match { item, branches }))
+}
-fn parse_line(input: &mut MultiPeek<std::vec::IntoIter<Token>>) -> Result<Expr> { todo!() }
+fn parse_typedesc(input: &mut Input, state: State) -> Result<Type> { todo!() }
+fn parse_pattern(input: &mut Input, state: State) -> Result<Pattern> { todo!() }
+fn parse_cond_branch(input: &mut Input, state: State) -> Result<CondBranch> { todo!() }
// lex, parse, expand, compile?