aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJJ2023-11-06 01:54:53 +0000
committerJJ2023-11-06 01:54:53 +0000
commitdc1640fd403649f6e54146754b30dbfa3145fba9 (patch)
tree1039640a797d369dee3acb6f2b47ec1b7e575031
parentbfe0a6fa21f1124c60dd4cc4fd60cdd9c2ec6d4f (diff)
compiler: progress on parser
-rw-r--r--src/frontend/ast.rs34
-rw-r--r--src/frontend/lex.rs15
-rw-r--r--src/frontend/parse.rs134
3 files changed, 127 insertions, 56 deletions
diff --git a/src/frontend/ast.rs b/src/frontend/ast.rs
index 6c7963e..ed7a01a 100644
--- a/src/frontend/ast.rs
+++ b/src/frontend/ast.rs
@@ -5,9 +5,12 @@ pub type Id = String;
pub enum Type {
Void, Never,
Integer, Float, String, // char et al are defined later
- Func{from: Box<Type>, to: Box<Type>}, // todo: multiple params, effects
+ Func { // todo: multiple params, effects
+ from: Box<Type>,
+ to: Box<Type>
+ },
Struct(Vec<(Id, Box<Type>)>),
- Tuple(Vec<(Option<String>, Box<Type>)>),
+ Tuple(Vec<(Option<Id>, Box<Type>)>),
Union(Vec<(Id, Box<Type>)>),
Interface {
funcs: Vec<Sig>,
@@ -17,9 +20,14 @@ pub enum Type {
List(Box<Type>),
Slice(Box<Type>), // todo: plus ownership
Reference(Box<Type>),
+ Pointer(Box<Type>),
+ Distinct(Box<Type>), // todo: not sure
Mutable(Box<Type>), // parameters only
Static(Box<Type>), // parameters only
- Alias{ id: Id, params: Vec<Type> }, // todo: this is wrong
+ Alias { // todo: this is wrong
+ id: Id,
+ generics: Vec<Type>
+ }
}
/// Function signatures.
@@ -38,14 +46,11 @@ pub enum Pattern {
Ident(Id), // type aliases, union variants, calls...
Number(i64), Float(f64),
Char(char), String(String),
- Struct(Vec<StructPattern>),
- Tuple(Vec<TuplePattern>),
+ Struct(Vec<(Id, Expr)>), // field, value
+ Tuple(Vec<(Option<Id>, Expr)>), // field, value
List(Vec<Expr>), // arrays, slices, lists
}
-pub struct StructPattern { field: Id, value: Expr }
-pub struct TuplePattern { field: Option<Id>, value: Expr }
-
/// Expressions introduce a new binding or bindings, in some regard.
pub enum Binding {
Let {
@@ -68,8 +73,8 @@ pub enum Binding {
public: bool,
effect: Option<Id>,
id: Id,
- generics: Vec<GenericDecl>,
- params: Vec<ParamDecl>,
+ generics: Vec<(Id, Option<Type>)>, // id, kind
+ params: Vec<(Id, Type)>, // id, kind
kind: Type,
body: Vec<Expr>
},
@@ -78,9 +83,6 @@ pub enum Binding {
Module { id: Id, body: Vec<Expr> },
}
-pub struct GenericDecl { id: Id, kind: Option<Type> }
-pub struct ParamDecl { id: Id, kind: Type }
-
/// Expressions related to control flow.
pub enum Control {
Call { id: Id, params: Vec<Expr> }, // function calls, macro invocations, field access...
@@ -104,9 +106,9 @@ pub enum Control {
Loop { body: Vec<Expr> },
}
-pub struct CondBranch { cond: Expr, body: Vec<Expr> }
-pub struct CatchBranch { exceptions: Vec<Id>, binding: Option<Id>, body: Vec<Expr> }
-pub struct MatchBranch { pattern: Pattern, guard: Option<Expr>, body: Vec<Expr> }
+pub struct CondBranch { pub cond: Expr, pub body: Vec<Expr> }
+pub struct CatchBranch { pub exceptions: Vec<Id>, pub binding: Option<Id>, pub body: Vec<Expr> }
+pub struct MatchBranch { pub pattern: Pattern, pub guard: Option<Expr>, pub body: Vec<Expr> }
/// Expressions are either Patterns, Bindings, or Control flow constructs.
pub enum Expr {
diff --git a/src/frontend/lex.rs b/src/frontend/lex.rs
index 771ba38..0d9fd22 100644
--- a/src/frontend/lex.rs
+++ b/src/frontend/lex.rs
@@ -60,7 +60,7 @@ pub enum Keyword {
If, When, Elif, Else, Match,
Try, Catch, Finally,
Struct, Tuple, Enum, Union, Interface,
- Distinct, Ref, // todo: Mut once figured out
+ Distinct, Ref, Ptr, Mut,
Break, Continue, Return,
In, Is, Of, As,
}
@@ -293,6 +293,8 @@ pub fn tokenize(input: &str) -> Result<TokenStream> {
"interface" => res.push(Key(Interface)),
"distinct" => res.push(Key(Distinct)),
"ref" => res.push(Key(Ref)),
+ "ptr" => res.push(Key(Ptr)),
+ "mut" => res.push(Key(Mut)),
"break" => res.push(Key(Break)),
"continue" => res.push(Key(Continue)),
"return" => res.push(Key(Return)),
@@ -347,14 +349,14 @@ pub fn tokenize(input: &str) -> Result<TokenStream> {
res.push(Sep(ArrayLeftBracket));
state.bracket_stack.push(Bracket::Array);
},
- ')' => {
+ ')' => { // match parens
match state.paren_stack.pop() {
Some(Paren::Func) => res.push(Sep(FuncRightParen)),
Some(Paren::Tuple) => res.push(Sep(TupleRightParen)),
None => return Err(MismatchedParens.into()),
}
},
- ']' => {
+ ']' => { // match brackets
match state.bracket_stack.pop() {
Some(Bracket::Generic) => res.push(Sep(GenericRightBracket)),
Some(Bracket::Array) => res.push(Sep(ArrayRightBracket)),
@@ -366,8 +368,8 @@ pub fn tokenize(input: &str) -> Result<TokenStream> {
input.next();
}
},
- '`' => {
- res.push(Sep(BackTick));
+ '`' => { // backticks are used for operators, so generics/parameters may follow
+ res.push(Sep(BackTick)); // todo: backticks could like not be used for operators
match input.peek() {
Some('(') => {
res.push(Sep(FuncLeftParen));
@@ -488,6 +490,8 @@ impl std::fmt::Display for Keyword {
Interface => write!(f, "interface"),
Distinct => write!(f, "distinct"),
Ref => write!(f, "ref"),
+ Ptr => write!(f, "ptr"),
+ Mut => write!(f, "mut"),
Break => write!(f, "break"),
Continue => write!(f, "continue"),
Return => write!(f, "return"),
@@ -498,6 +502,7 @@ impl std::fmt::Display for Keyword {
}
}
}
+
impl std::fmt::Display for Punctuation {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
use Punctuation::*;
diff --git a/src/frontend/parse.rs b/src/frontend/parse.rs
index c525982..43de16e 100644
--- a/src/frontend/parse.rs
+++ b/src/frontend/parse.rs
@@ -1,5 +1,3 @@
-use std::fmt;
-
use crate::frontend::lex::*;
use crate::frontend::ast::*;
use crate::frontend::ast::Binding::*;
@@ -43,9 +41,8 @@ fn parse_body(input: &mut Input, state: State) -> Result<Vec<Expr>> {
Ok(res)
}
-/// Expr ::= Let | Var | Const | Func | Type |
-/// Mod | Import | Block | Static |
-/// For | While | Loop | If | When | Try | Match
+/// Expr ::= Let | Var | Const | Func | Type | Mod | Import |
+/// Block | Static | For | While | Loop | If | When | Try | Match
fn parse_expr(input: &mut Input, state: State) -> Result<Expr> {
use Keyword::*;
match input.next() {
@@ -69,7 +66,7 @@ fn parse_expr(input: &mut Input, state: State) -> Result<Expr> {
Func => parse_funcdecl(input, state, false),
Type => parse_typedecl(input, state, false),
Mod => parse_mod(input, state, false),
- From => parse_import(input, state, true),
+ From => parse_import(input, state, true), // todo: probably rework imports
Import => parse_import(input, state, false),
Block => parse_block(input, state),
Static => parse_static(input, state),
@@ -86,14 +83,19 @@ fn parse_expr(input: &mut Input, state: State) -> Result<Expr> {
}
}
-/// Let ::= 'let' Pattern Annotation? '=' Expr
-fn parse_let(input: &mut Input, state: State) -> Result<Expr> {
- let id = parse_pattern(input, state)?;
+/// Annotation ::= (':' TypeDesc)?
+fn parse_annotation(input: &mut Input, state: State) -> Result<Option<Type>> {
let mut kind = None;
if let Some(Sep(Colon)) = input.peek() {
input.next();
- kind = Some(parse_typedesc(input, state)?);
+ kind = Some(parse_type(input, state)?);
}
+ Ok(kind)
+}
+/// Let ::= 'let' Pattern Annotation? '=' Expr
+fn parse_let(input: &mut Input, state: State) -> Result<Expr> {
+ let id = parse_pattern(input, state)?;
+ let kind = parse_annotation(input, state)?;
if input.next() != Some(Sep(Equals)) {
return Err("= not following binding".into())
}
@@ -103,39 +105,31 @@ fn parse_let(input: &mut Input, state: State) -> Result<Expr> {
/// Var ::= 'var' Pattern Annotation? ('=' Expr)?
fn parse_var(input: &mut Input, state: State) -> Result<Expr> {
let id = parse_pattern(input, state)?;
- let mut kind = None;
- if let Some(Sep(Colon)) = input.peek() {
- input.next();
- kind = Some(parse_typedesc(input, state)?);
- }
+ let kind = parse_annotation(input, state)?;
let mut value = None;
if input.next() != Some(Sep(Equals)) {
value = Some(Box::new(parse_expr(input, state)?));
}
Ok(Expr::Binding(Var { id, kind, value }))
}
-// Const ::= 'pub'? 'const' Pattern Annotation? '=' Expr
+/// Const ::= 'pub'? 'const' Pattern Annotation? '=' Expr
fn parse_const(input: &mut Input, state: State, public: bool) -> Result<Expr> {
let id = parse_pattern(input, state)?;
- let mut kind = None;
- if let Some(Sep(Colon)) = input.peek() {
- input.next();
- kind = Some(parse_typedesc(input, state)?);
- }
+ let kind = parse_annotation(input, state)?;
if input.next() != Some(Sep(Equals)) {
return Err("= not following binding".into())
}
let value = Box::new(parse_expr(input, state)?);
Ok(Expr::Binding(Const { public, id, kind, value }))
}
-// Func ::= 'pub'? ('func' | 'proc') Ident Generics? Parameters? (':' TypeDesc) '=' Body
+/// Func ::= 'pub'? 'func' Ident Generics? Parameters? (':' TypeDesc) '=' Body
fn parse_funcdecl(input: &mut Input, state: State, public: bool) -> Result<Expr> { todo!() }
-// TypeDecl ::= 'pub'? 'type' Pattern Generics? '=' 'distinct'? 'ref'? TypeDesc
+/// TypeDecl ::= 'pub'? 'type' Pattern Generics? '=' 'distinct'? 'ref'? TypeDesc
fn parse_typedecl(input: &mut Input, state: State, public: bool) -> Result<Expr> {
let pattern = parse_pattern(input, state)?;
todo!()
}
-// Mod ::= 'pub'? 'mod' Ident ':' Body
+/// Mod ::= 'pub'? 'mod' Ident ':' Body
fn parse_mod(input: &mut Input, state: State, public: bool) -> Result<Expr> {
match input.next() {
Some(Word(id)) => {
@@ -151,7 +145,7 @@ fn parse_mod(input: &mut Input, state: State, public: bool) -> Result<Expr> {
}
}
-// Import ::= ('from' Ident)? 'import' Ident (',' Ident)* ('as' Ident)?
+/// Import ::= ('from' Ident)? 'import' Ident (',' Ident)* ('as' Ident)?
fn parse_import(input: &mut Input, state: State, from_scope: bool) -> Result<Expr> {
let mut from = None;
if from_scope {
@@ -165,7 +159,7 @@ fn parse_import(input: &mut Input, state: State, from_scope: bool) -> Result<Exp
}
todo!()
}
-// Block ::= 'block' Ident? ':' Body
+/// Block ::= 'block' Ident? ':' Body
fn parse_block(input: &mut Input, state: State) -> Result<Expr> { // todo: body + offset
match input.next() {
Some(Sep(Colon)) => {
@@ -186,7 +180,7 @@ fn parse_block(input: &mut Input, state: State) -> Result<Expr> { // todo: body
_ => return Err("unexpected thing following block keyword".into()),
}
}
-// Static ::= 'static' ':' Body
+/// Static ::= 'static' ':' Body
fn parse_static(input: &mut Input, state: State) -> Result<Expr> {
if input.next() != Some(Sep(Colon)) {
return Err("colon must follow static invocation".into());
@@ -195,7 +189,7 @@ fn parse_static(input: &mut Input, state: State) -> Result<Expr> {
Ok(Expr::Control(Static { body }))
}
-// For ::= 'for' Pattern 'in' Expr ':' Body
+/// For ::= 'for' Pattern 'in' Expr ':' Body
fn parse_for(input: &mut Input, state: State) -> Result<Expr> {
let binding = parse_pattern(input, state)?;
if input.next() != Some(Key(Keyword::In)) {
@@ -208,7 +202,7 @@ fn parse_for(input: &mut Input, state: State) -> Result<Expr> {
let body = parse_body(input, state.indent())?;
Ok(Expr::Control(For { binding, range, body }))
}
-// While ::= 'while' Expr ':' Body
+/// While ::= 'while' Expr ':' Body
fn parse_while(input: &mut Input, state: State) -> Result<Expr> {
let cond = Box::new(parse_expr(input, state)?);
if input.next() != Some(Sep(Colon)) {
@@ -217,7 +211,7 @@ fn parse_while(input: &mut Input, state: State) -> Result<Expr> {
let body = parse_body(input, state.indent())?;
Ok(Expr::Control(While { cond, body }))
}
-// Loop ::= 'loop' ':' Body
+/// Loop ::= 'loop' ':' Body
fn parse_loop(input: &mut Input, state: State) -> Result<Expr> {
if input.next() != Some(Sep(Colon)) {
return Err("expected colon after loop keyword".into());
@@ -226,7 +220,7 @@ fn parse_loop(input: &mut Input, state: State) -> Result<Expr> {
Ok(Expr::Control(Loop { body }))
}
-// If ::= 'if' Expr ':' Body ('elif' Expr ':' Body)* ('else' ':' Body)?
+/// If ::= 'if' Expr ':' Body ('elif' Expr ':' Body)* ('else' ':' Body)?
fn parse_if(input: &mut Input, state: State) -> Result<Expr> {
let mut branches = Vec::new();
branches.push(parse_cond_branch(input, state)?);
@@ -258,7 +252,8 @@ fn parse_when(input: &mut Input, state: State) -> Result<Expr> {
body.push(Expr::Control(If { branches, else_body }));
Ok(Expr::Control(Static { body }))
}
-// Try ::= 'try' ':' Body ('except' Ident (',' Ident)* ':' Body) ('finally' ':' Body)?
+fn parse_cond_branch(input: &mut Input, state: State) -> Result<CondBranch> { todo!() }
+/// Try ::= 'try' ':' Body ('except' Ident (',' Ident)* ':' Body) ('finally' ':' Body)?
fn parse_try(input: &mut Input, state: State) -> Result<Expr> {
if input.next() != Some(Sep(Colon)) {
return Err("expected colon after try keyword".into());
@@ -279,7 +274,7 @@ fn parse_try(input: &mut Input, state: State) -> Result<Expr> {
}
Ok(Expr::Control(Try { body, catches, finally }))
}
-// Match ::= 'match' Expr ('of' Pattern (',' Pattern)* ('where' Expr)? ':' Body)+
+/// Match ::= 'match' Expr ('of' Pattern (',' Pattern)* ('where' Expr)? ':' Body)+
fn parse_match(input: &mut Input, state: State) -> Result<Expr> {
let item = parse_pattern(input, state)?;
let mut branches = Vec::new();
@@ -290,8 +285,77 @@ fn parse_match(input: &mut Input, state: State) -> Result<Expr> {
Ok(Expr::Control(Match { item, branches }))
}
-fn parse_typedesc(input: &mut Input, state: State) -> Result<Type> { todo!() }
+/// Type ::=
+/// ('ref' | 'ptr' | 'mut' | 'static' | 'struct' | 'tuple' | 'enum' | 'union' | 'interface' | 'concept') |
+/// ('ref' WrappedType) | ('ptr' WrappedType) | ('mut' WrappedType) | ('static' WrappedType) | ('distinct' WrappedType) |
+/// StructType | TupleType | EnumType | UnionType | InterfaceType
+/// The input stream must be normalized before attempting to parse types, because otherwise it's just a little bit hellish.
+/// In particular: ref, ptr, mut, static, distinct must wrap their parameters in '[' ']' and all type declarations must be on one line.
+fn parse_type(input: &mut Input, state: State) -> Result<Type> {
+ use Type::*;
+ match input.next() {
+ Some(Key(word)) => {
+ match input.peek() { // todo: check if the type is a special typeclass
+ Some(Sep(GenericLeftBracket)) => (),
+ _ => todo!() // ref, ptr, mut, static, struct, tuple, enum, union, interface, concept
+ }
+ match word {
+ Keyword::Distinct => Ok(Distinct(Box::new(parse_wrapped_type(input, state)?))),
+ Keyword::Ref => Ok(Reference(Box::new(parse_wrapped_type(input, state)?))),
+ Keyword::Ptr => Ok(Pointer(Box::new(parse_wrapped_type(input, state)?))),
+ Keyword::Var => Ok(Mutable(Box::new(parse_wrapped_type(input, state)?))),
+ Keyword::Const => Ok(Static(Box::new(parse_wrapped_type(input, state)?))),
+ Keyword::Struct => parse_struct_type(input, state),
+ Keyword::Tuple => parse_tuple_type(input, state),
+ Keyword::Enum => parse_enum_type(input, state),
+ Keyword::Union => parse_union_type(input, state),
+ Keyword::Interface => parse_interface(input, state),
+ _ => return Err("invalid keyword present in type!".into())
+ }
+ },
+ Some(Word(id)) => {
+ let mut generics = Vec::new();
+ if let Some(Sep(GenericLeftBracket)) = input.peek() {
+ generics = parse_generics(input, state)?;
+ }
+ Ok(Alias { id, generics })
+ },
+ _ => return Err("error".into())
+ }
+}
+
+/// `StructType ::= ('struct' '[' Ident ':' Type (',' Ident ':' Type)* ']'`
+fn parse_struct_type(input: &mut Input, state: State) -> Result<Type> { todo!() }
+/// `TupleType ::= 'tuple' '[' (Ident ':')? Type (',' (Ident ':')? Type)* ']'`
+fn parse_tuple_type(input: &mut Input, state: State) -> Result<Type> { todo!() }
+/// `EnumType ::= 'enum' '[' Ident ('=' Pattern)? (Ident ('=' Pattern)?)* ']'`
+fn parse_enum_type(input: &mut Input, state: State) -> Result<Type> { todo!() }
+/// `UnionType ::= 'union' '[' Ident (':' Type)? (',' Ident (':' Type)?)* ']'`
+fn parse_union_type(input: &mut Input, state: State) -> Result<Type> { todo!() }
+/// `Interface ::= 'interface' '[' Signature (',' Signature)* ']'`
+fn parse_interface(input: &mut Input, state: State) -> Result<Type> { todo!() }
+/// `Signature ::= Ident ('[' Ident (':' Type)? (',' Ident (':' Type)?)* ']')? ('(' Type (',' Type)* ')')? (':' Type)?`
+fn parse_signature(input: &mut Input, state: State) -> Result<Sig> { todo!() }
+
+/// `WrappedType ::= Type | ('[' Type ']')`
+fn parse_wrapped_type(input: &mut Input, state: State) -> Result<Type> {
+ if let Some(Sep(GenericLeftBracket)) = input.next() {
+ let result = parse_type(input, state)?;
+ if let Some(Sep(GenericRightBracket)) = input.next() {
+ Ok(result)
+ } else {
+ Err("could not find closing generic bracket!".into())
+ }
+ } else {
+ parse_type(input, state)
+ }
+}
+
+/// `GenericType ::= '[' Type (',' Type)* ']'`
+fn parse_generics(input: &mut Input, state: State) -> Result<Vec<Type>> { todo!() }
+
+/// Pattern ::= Literal | Ident | '(' Pattern (',' Pattern)* ')' | Ident '(' Pattern (',' Pattern)* ')'
+/// Literal ::= Char | String | Number | Float
fn parse_pattern(input: &mut Input, state: State) -> Result<Pattern> { todo!() }
-fn parse_cond_branch(input: &mut Input, state: State) -> Result<CondBranch> { todo!() }
// lex, parse, expand, compile?