aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJJ2024-04-06 23:48:19 +0000
committerJJ2024-04-07 17:43:08 +0000
commit51c8b349a77a8d8b1b34ce8e03518dad6e3cba00 (patch)
treeef42507e7eca659272d7c702d66945d8c5a1c335
parent22c4776f9fddef47a6ce3f309e4eafa2fbdc3a65 (diff)
compiler: rewrite lexer, clean up ast
-rw-r--r--src/frontend/ast.rs65
-rw-r--r--src/frontend/lex.rs682
-rw-r--r--src/frontend/parse.rs218
3 files changed, 544 insertions, 421 deletions
diff --git a/src/frontend/ast.rs b/src/frontend/ast.rs
index ed44712..b6ae7be 100644
--- a/src/frontend/ast.rs
+++ b/src/frontend/ast.rs
@@ -4,24 +4,25 @@ pub type Id = String;
/// Puck's fundamental types.
#[derive(Clone, PartialEq)]
pub enum Type {
- Void, Never,
+ Void, Never, Any,
Integer, Float, String, // char et al are defined later
- Func { // todo: multiple params, effects
- from: Box<Type>,
- to: Box<Type>
+ Func { // future: effects
+ from: Vec<Type>,
+ to: Vec<Type>
},
- Struct(Vec<(Id, Box<Type>)>),
- Tuple(Vec<(Option<Id>, Box<Type>)>),
- Union(Vec<(Id, Box<Type>)>),
+ Struct(Vec<(Id, Type)>),
+ Tuple(Vec<(Option<Id>, Type)>),
+ Union(Vec<(Id, Type)>),
Interface(Vec<Sig>),
- Array{size: usize, kind: Box<Type>},
+ Array {
+ size: usize,
+ kind: Box<Type>
+ },
List(Box<Type>),
Slice(Box<Type>), // todo: plus ownership
- Reference(Box<Type>),
- Pointer(Box<Type>),
+ Reference(Box<Type>), Pointer(Box<Type>),
Distinct(Box<Type>), // todo: not sure
- Mutable(Box<Type>), // parameters only
- Static(Box<Type>), // parameters only
+ Mutable(Box<Type>), Static(Box<Type>), // parameters only
Alias { // todo: this is wrong
id: Id,
generics: Vec<Type>
@@ -31,11 +32,10 @@ pub enum Type {
/// Function signatures.
#[derive(Clone, PartialEq)]
pub struct Sig {
- pub effect: Option<Id>,
pub id: Id,
pub generics: Vec<(Id, Option<Type>)>,
pub parameters: Vec<Type>,
- pub kind: Option<Type>
+ pub kind: Type
}
/// Patterns are recognizable given zero context.
@@ -50,8 +50,8 @@ pub enum Pattern {
List(Vec<Expr>), // arrays, slices, lists
}
-/// Expressions introduce a new binding or bindings, in some regard.
-pub enum Binding {
+/// Expressions introduce a new binding or bindings.
+pub enum Binding { // todo: excessive use of Option<Type>
Let {
id: Pattern, // id: Pattern supports ex. `let (a, b) = ...`
kind: Option<Type>,
@@ -70,23 +70,22 @@ pub enum Binding {
},
Func {
public: bool,
- effect: Option<Id>,
id: Id,
- generics: Vec<(Id, Option<Type>)>, // id, kind
- parameters: Vec<(Id, Type)>, // id, kind
+ generics: Vec<(Id, Option<Type>)>,
+ parameters: Vec<(Id, Type)>,
kind: Type,
body: Vec<Expr>
},
Macro {
public: bool,
id: Id,
- generics: Vec<(Id, Option<Type>)>, // id, kind
- parameters: Vec<(Id, Option<Type>)>, // id, kind
+ generics: Vec<(Id, Option<Type>)>,
+ parameters: Vec<(Id, Option<Type>)>,
kind: Option<Type>,
body: Vec<Expr>
},
TypeDecl { id: Id, generics: Vec<(Id, Option<Type>)>, alias: Type },
- Import { from: Option<Id>, imports: Vec<Id>, alias: Option<Id> },
+ Use { modules: Vec<Id> }, // todo: aliases? anything else fancy?
Module { id: Id, body: Vec<Expr> },
}
@@ -95,12 +94,12 @@ pub enum Control {
Call { id: Id, params: Vec<Expr> }, // function calls, macro invocations, field access...
If {
branches: Vec<CondBranch>,
- else_body: Option<Vec<Expr>>
+ else_body: Vec<Expr>
},
Try {
body: Vec<Expr>,
catches: Vec<CatchBranch>,
- finally: Option<Vec<Expr>>
+ finally: Vec<Expr>
},
Match {
item: Pattern,
@@ -113,9 +112,19 @@ pub enum Control {
Loop { body: Vec<Expr> },
}
-pub struct CondBranch { pub cond: Expr, pub body: Vec<Expr> }
-pub struct CatchBranch { pub exceptions: Vec<(Id, Option<Id>)>, pub body: Vec<Expr> }
-pub struct MatchBranch { pub patterns: Vec<Pattern>, pub guard: Option<Expr>, pub body: Vec<Expr> }
+pub struct CondBranch {
+ pub cond: Expr,
+ pub body: Vec<Expr>
+}
+pub struct CatchBranch {
+ pub exceptions: Vec<(Id, Option<Id>)>,
+ pub body: Vec<Expr>
+}
+pub struct MatchBranch {
+ pub patterns: Vec<Pattern>,
+ pub guard: Option<Expr>,
+ pub body: Vec<Expr>
+}
/// Expressions are either Patterns, Bindings, or Control flow constructs.
pub enum Expr {
@@ -123,3 +132,5 @@ pub enum Expr {
Binding(Binding),
Control(Control),
}
+
+pub type Ast = Vec<Expr>;
diff --git a/src/frontend/lex.rs b/src/frontend/lex.rs
index e0aa76d..287ed9c 100644
--- a/src/frontend/lex.rs
+++ b/src/frontend/lex.rs
@@ -2,25 +2,41 @@ use multipeek::multipeek;
use crate::*;
-pub struct TokenStream(Vec<Token>);
+/// We lex input into a TokenStream containing the position of the token and the Token itself.
+pub struct TokenStream(Vec<(usize, Token)>);
+/// Iterating over a TokenStream pops both the Tokens and the spans.
+/// The parser only *really* cares about Tokens... spans are for use in error reporting.
impl IntoIterator for TokenStream {
- type Item = Token;
- type IntoIter = std::vec::IntoIter<Token>;
+ type Item = (usize, Token);
+ type IntoIter = std::vec::IntoIter<(usize, Token)>;
fn into_iter(self) -> Self::IntoIter {
self.0.into_iter()
}
}
+/// Basic syntax tokens. Form an unambiguous TokenStream.
+#[derive(Clone, PartialEq)]
+pub enum Token {
+ Key(Keyword), // keyword identifiers. ex. for, use, func...
+ Lit(Literal), // literal values. ex. for strings/comments.
+ Sep(Punctuation), // punctuation. non-word tokens.
+ Op(String), // operators. lexographically separate so the parser can parse infix notation
+ Num(String), // numeric values. ex. 413, 0b101011, 0xabcd, 612.1024
+ Ident(String), // identifiers. variable names, function names, method calls...
+ Indent(usize), // indentation. denotes line breaks and scope at which a line starts
+ End // explicit scope end marker. inserted in normalization.
+}
+
#[derive(Clone, PartialEq, Debug)]
pub enum LexicalError {
- InvalidIndentation,
- MismatchedParens,
- MismatchedBrackets,
- UnknownPunctuation,
+ InvalidIndentation(usize),
+ MismatchedParens(usize),
+ MismatchedBrackets(usize),
+ UnknownPunctuation(usize),
+ EarlyEndOfInput,
}
-
impl std::fmt::Display for LexicalError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{:?}", self)
@@ -28,18 +44,7 @@ impl std::fmt::Display for LexicalError {
}
impl std::error::Error for LexicalError {}
-/// **Basic** syntax tokens. Form an unambiguous TokenStream.
-#[derive(Clone, PartialEq)]
-pub enum Token {
- Key(Keyword), // keyword identifiers.
- Word(String), // non-keyword identifiers.
- Op(String), // operators. separate tokens lexographically so the parser knows to parse infix notation.
- Num(String), // numeric value, ex. 413, 0b101011, 0xabcd
- Lit(Literal), // literal value, ex. for strings/comments.
- Sep(Punctuation), // punctuation. non-word tokens. ~~operators are lexed as this and later transformed to words.~~
- Indent(usize), // indentation. denotes line breaks and scope at which a line starts.
-}
-
+/// Literal text. This can be chars, strings, and comments.
#[derive(Clone, PartialEq)]
pub enum Literal {
Char(String),
@@ -51,7 +56,7 @@ pub enum Literal {
}
/// Keywords, made explicit for easier use with Rust.
-/// (strings inside match patterns are fucky!!)
+/// (static strings inside match patterns are fucky!!)
#[derive(Clone, PartialEq)]
pub enum Keyword {
Pub, Let, Var, Const,
@@ -86,8 +91,8 @@ pub enum Punctuation {
GenericRightBracket, // ]
ArrayLeftBracket, // [
ArrayRightBracket, // ]
- ScopeLeftBrace, // {
- ScopeRightBrace, // }
+ DependentLeftBrace, // { (not currently used)
+ DependentRightBrace, // }
StructLeftBrace, // {
StructRightBrace, // }
Equals, // =
@@ -110,320 +115,371 @@ pub enum Punctuation {
Backslash, // \
}
-/// Parses whitespace-sensitive code into an unambiguous TokenStream.
-/// Also useful for formatting.
-pub fn tokenize(input: &str) -> Result<TokenStream> {
- // The design of this lexer utilizes to great extent multipeek's arbitrary peeking.
- // Tokens are matched by looping within their case until complete.
- // This then eliminates the need for most global parser state. (i hate state)
+struct Input<'a>(multipeek::MultiPeek<std::str::CharIndices<'a>>);
- use Token::*;
- use Literal::*;
- use Punctuation::*;
- use LexicalError::*;
- enum Paren { Func, Tuple }
- enum Bracket { Generic, Array }
- struct State {
- start_of_line: bool,
- paren_stack: Vec<Paren>,
- bracket_stack: Vec<Bracket>,
+impl<'a> Input<'a> {
+ /// Map input.next() to return Results for use with the propagation operator
+ fn next(&mut self) -> Result<char> {
+ self.0.next().map(|x| x.1).ok_or(EarlyEndOfInput.into())
+ }
+
+ fn peek_eat(&mut self, expected: char) -> bool {
+ if self.peek().is_ok_and(|x| x == expected) {
+ self.0.next();
+ true
+ } else {
+ false
+ }
}
+ fn peek(&mut self) -> Result<char> {
+ self.0.peek().copied().map(|x| x.1).ok_or(EarlyEndOfInput.into())
+ }
+
+ fn peek_nth(&mut self, n: usize) -> Result<char> {
+ self.0.peek_nth(n).copied().map(|x| x.1).ok_or(EarlyEndOfInput.into())
+ }
+
+ fn consume_indentation(&mut self) -> usize {
+ let mut count = 0;
+ while self.peek_eat(' ') {
+ count += 1;
+ }
+ if self.peek_eat('\n') {
+ self.consume_indentation()
+ } else {
+ count
+ }
+ }
+
+ fn strip(&mut self) {
+ loop {
+ if !self.peek_eat(' ') {
+ break
+ }
+ }
+ }
+}
+
+use Token::*;
+use Literal::*;
+use Punctuation::*;
+use LexicalError::*;
+enum Paren { Func, Tuple }
+enum Bracket { Generic, Array }
+struct State {
+ start_of_line: bool,
+ paren_stack: Vec<Paren>,
+ bracket_stack: Vec<Bracket>,
+ next_token_is_parameter_punctuation: bool,
+}
+
+/// Parses whitespace-sensitive code into an unambiguous TokenStream.
+pub fn lex(input: &str) -> Result<TokenStream> {
+ let mut input = Input(multipeek(input.char_indices()));
+ let mut res = TokenStream(Vec::new());
let mut state = State {
start_of_line: true,
paren_stack: vec!(),
bracket_stack: vec!(),
+ next_token_is_parameter_punctuation: false,
};
+ res.0.push((0, Indent(input.consume_indentation())));
+ loop {
+ input.strip();
+ match input.0.next() {
+ Some((pos, c)) => res.0.push((pos, lex_token(&mut input, &mut state, pos, c)?)),
+ None => break
+ }
+ }
+ Ok(res)
+}
- let mut buf = String::new();
- let mut res = Vec::new();
-
- // `char` in rust is four bytes it's fine
- let mut input = multipeek(input.chars());
- while let Some(c) = input.next() {
- match c {
- ' ' => { // indentation! and whitespace
- match res.last() {
- Some(Indent(_)) => { // indentation!
- res.pop(); // discard previous empty or useless Indent token
- let mut current_indent_level = 1;
- while let Some(x) = input.peek() {
- match x {
- ' ' => { current_indent_level += 1; input.next(); },
- _ => match res.last() { // indentation ends
- Some(Word(a)) if a == "==" || a == "and" || a == "or" ||
- a == "xor" || a == "in" || a == "is" => break,
- Some(Sep(FuncLeftParen)) | Some(Sep(TupleLeftParen)) |
- Some(Sep(GenericLeftBracket)) | Some(Sep(ArrayLeftBracket)) |
- Some(Sep(StructLeftBrace)) | Some(Sep(Comma)) => break,
- _ => {
- res.push(Indent(current_indent_level));
- break;
- }
- }
- }
- }
- },
- _ => { // get rid of excess (all) whitespace between words/operators
- while input.peek().is_some_and(|x| x.is_whitespace() && x != &'\n') { input.next(); }
+fn lex_token(input: &mut Input, state: &mut State, pos: usize, c: char) -> Result<Token> {
+ Ok(match c {
+ '\n' => Indent(input.consume_indentation()),
+ '\t' => return Err(InvalidIndentation(pos).into()),
+ ' ' => unreachable!(),
+ ':' => Sep(Colon),
+ '=' => Sep(Equals),
+ '\'' => lex_char(input)?,
+ '"' => lex_string(input)?,
+ '#' => lex_comment(input)?,
+ c if c.is_alphabetic() || c == '_' => lex_identifier(input, state, c), // valid identifiers!
+ c if c.is_digit(10) => lex_number(input, c), // numeric literals!
+ '-' => {
+ // `-` is special. it can be the *prefix* operator "Negative", or part of a regular operator.
+ match input.peek()? {
+ ' ' => Sep(Minus),
+ _ => Sep(Negative)
+ }
+ },
+ '(' => {
+ if state.next_token_is_parameter_punctuation {
+ state.next_token_is_parameter_punctuation = false;
+ state.paren_stack.push(Paren::Func);
+ Sep(FuncLeftParen)
+ } else {
+ state.paren_stack.push(Paren::Tuple);
+ Sep(TupleLeftParen)
+ }
+ },
+ '[' => {
+ if state.next_token_is_parameter_punctuation {
+ state.next_token_is_parameter_punctuation = false;
+ state.bracket_stack.push(Bracket::Generic);
+ Sep(GenericLeftBracket)
+ } else {
+ state.bracket_stack.push(Bracket::Array);
+ Sep(ArrayLeftBracket)
+ }
+ },
+ ')' => {
+ match state.paren_stack.pop() {
+ Some(Paren::Func) => Sep(FuncRightParen),
+ Some(Paren::Tuple) => Sep(TupleRightParen),
+ None => return Err(MismatchedParens(pos).into()),
+ }
+ },
+ ']' => {
+ match state.bracket_stack.pop() {
+ Some(Bracket::Generic) => {
+ if let Ok('(') = input.peek() {
+ state.next_token_is_parameter_punctuation = true;
}
+ Sep(GenericRightBracket)
+ },
+ Some(Bracket::Array) => Sep(ArrayRightBracket),
+ None => return Err(MismatchedBrackets(pos).into()),
+ }
+ },
+ '`' => { // backticks are used for injections, so generics/parameters may follow
+ // todo: SERIOUS ambiguity issue with `injection`[]
+ match input.peek() {
+ Ok('(') | Ok('[') => {
+ state.next_token_is_parameter_punctuation = true;
+ },
+ _ => ()
+ }
+ Sep(BackTick)
+ },
+ ',' => Sep(Comma),
+ '.' => Sep(Period),
+ ';' => Sep(Semicolon),
+ '{' => Sep(StructLeftBrace),
+ '}' => Sep(StructRightBrace),
+ '+' => Sep(Plus),
+ '*' => Sep(Times),
+ '/' => Sep(Slash),
+ '<' => Sep(LessThan),
+ '>' => Sep(GreaterThan),
+ '@' => Sep(At),
+ '$' => Sep(Sha),
+ '~' => Sep(Tilde),
+ '&' => Sep(And),
+ '|' => Sep(Or),
+ '!' => Sep(Exclamation),
+ '?' => Sep(Question),
+ '^' => Sep(Caret),
+ '\\' => Sep(Backslash),
+ _ => return Err(UnknownPunctuation(pos).into())
+ })
+}
+
+/// Char ::= '\'' <anything> '\''
+fn lex_char(input: &mut Input) -> Result<Token> {
+ let mut buf = String::new();
+ loop {
+ match input.next()? {
+ '\'' => break,
+ '\\' => {
+ if let y = input.next()? {
+ buf.push('\\');
+ buf.push(y);
}
},
- '\t' => return Err(InvalidIndentation.into()),
- '\n' => res.push(Indent(0)),
- '\'' => { // chars!
- while let Some(x) = input.next() {
- match x {
- '\'' => break,
- '\\' => if let Some(y) = input.next() { buf.push(y) },
- _ => buf.push(x)
- }
+ x => buf.push(x)
+ }
+ }
+ Ok(Lit(Char(buf)))
+}
+
+/// String ::= '"' <anything> '"' | '"""' <anything> '"""'
+fn lex_string(input: &mut Input) -> Result<Token> {
+ let mut buf = String::new();
+ match (input.peek_nth(0), input.peek_nth(1)) {
+ (Ok('"'), Ok('"')) => { // triple quoted strings
+ input.next()?; input.next()?;
+ loop {
+ match input.next()? {
+ '"' if input.peek_nth(0)? == '"' &&
+ input.peek_nth(1)? == '"' => {
+ input.next()?; input.next()?;
+ break;
+ },
+ x => buf.push(x)
}
- res.push(Lit(Char(String::from(&buf))));
- },
- '"' => { // strings!
- match (input.peek_nth(0).copied(), input.peek_nth(1).copied()) {
- (Some('"'), Some('"')) => { // triple quoted strings
- input.next(); input.next();
- while let Some(x) = input.next() {
- match x {
- '"' if input.peek_nth(0) == Some(&'"') &&
- input.peek_nth(1) == Some(&'"') => {
- input.next(); input.next();
- break;
- },
- _ => buf.push(x)
- }
- }
- res.push(Lit(MultiLineString(String::from(&buf))));
+ }
+ Ok(Lit(MultiLineString(buf)))
+ },
+ _ => { // single quoted strings
+ loop {
+ match input.next()? {
+ '"' => break,
+ '\\' => if let y = input.next()? {
+ buf.push(y) // FIXME
},
- (_, _) => { // single quoted strings
- while let Some(x) = input.next() {
- match x {
- '"' => break,
- '\\' => if let Some(y) = input.next() { buf.push(y) },
- _ => buf.push(x)
- }
- }
- res.push(Lit(SingleLineString(String::from(&buf))));
- }
+ x => buf.push(x)
}
- },
- '#' => { // comments!
- match input.peek() {
- Some('[') => { // block comment, can be nested
- input.next();
- let mut comment_level = 1;
- while let Some(x) = input.next() && comment_level > 0 {
- match x {
- '#' if input.peek() == Some(&'[') => {
- comment_level += 1;
- input.next();
- },
- ']' if input.peek() == Some(&'#') => {
- comment_level -= 1;
- input.next();
- },
- _ => buf.push(x)
- }
- }
- res.push(Lit(MultiLineComment(String::from(&buf))));
+ }
+ Ok(Lit(SingleLineString(buf)))
+ }
+ }
+}
+
+/// Comment ::= '#' <anything> '\n' | '#[' Comment | <anything> ']#'
+fn lex_comment(input: &mut Input) -> Result<Token> {
+ let mut buf = String::new();
+ match input.peek() {
+ Ok('[') => { // block comment, can be nested
+ input.next()?;
+ let mut comment_level = 1;
+ while comment_level > 0 {
+ match input.next()? {
+ '#' if input.peek_eat('[') => {
+ comment_level += 1;
+ buf.push_str("#[");
},
- Some(&'#') => { // documentation comment
- input.next();
- while let Some(x) = input.peek() {
- match x {
- '\n' => break,
- _ => {
- buf.push(*x);
- }
- }
- input.next();
- }
- res.push(Lit(DocComment(String::from(&buf))));
+ ']' if input.peek_eat('#') => {
+ comment_level -= 1;
+ buf.push_str("]#");
},
- _ => { // standard comment, runs til EOL
- while let Some(x) = input.peek() {
- match x {
- '\n' => break,
- _ => {
- buf.push(*x);
- }
- }
- input.next();
- }
- res.push(Lit(Comment(String::from(&buf))));
- }
- }
- },
- c if c.is_alphabetic() || c == '_' => { // valid identifiers!
- buf.push(c);
- while let Some(x) = input.peek() {
- match x {
- x if x.is_alphanumeric() || x == &'_' => {
- buf.push(*x);
- input.next();
- },
- _ => {
- use Keyword::*;
- match buf.as_str() { // keywords!
- "pub" => res.push(Key(Pub)),
- "let" => res.push(Key(Let)),
- "var" => res.push(Key(Var)),
- "const" => res.push(Key(Const)),
- "func" => res.push(Key(Func)),
- "macro" => res.push(Key(Macro)),
- "type" => res.push(Key(Type)),
- "mod" => res.push(Key(Mod)),
- "use" => res.push(Key(Use)),
- "as" => res.push(Key(As)),
- "for" => res.push(Key(For)),
- "while" => res.push(Key(While)),
- "loop" => res.push(Key(Loop)),
- "block" => res.push(Key(Block)),
- "static" => res.push(Key(Static)),
- "if" => res.push(Key(If)),
- "when" => res.push(Key(When)),
- "elif" => res.push(Key(Elif)),
- "else" => res.push(Key(Else)),
- "match" => res.push(Key(Match)),
- "where" => res.push(Key(Where)),
- "try" => res.push(Key(Try)),
- "catch" => res.push(Key(Catch)),
- "finally" => res.push(Key(Finally)),
- "struct" => res.push(Key(Struct)),
- "tuple" => res.push(Key(Tuple)),
- "enum" => res.push(Key(Enum)),
- "union" => res.push(Key(Union)),
- "interface" => res.push(Key(Interface)),
- "distinct" => res.push(Key(Distinct)),
- "ref" => res.push(Key(Ref)),
- "ptr" => res.push(Key(Ptr)),
- "mut" => res.push(Key(Mut)),
- "break" => res.push(Key(Break)),
- "continue" => res.push(Key(Continue)),
- "return" => res.push(Key(Return)),
- "in" => res.push(Key(In)),
- "is" => res.push(Key(Is)),
- "of" => res.push(Key(Of)),
- _ => res.push(Word(String::from(&buf)))
- }
- match x { // () and [] denote both parameters/generics and tuples/arrays
- '(' => { // we must disambiguate by treating those *directly* after words as such
- res.push(Sep(FuncLeftParen));
- state.paren_stack.push(Paren::Func);
- input.next();
- },
- '[' => {
- res.push(Sep(GenericLeftBracket));
- state.bracket_stack.push(Bracket::Generic);
- input.next();
- },
- _ => {},
- }
- break;
- }
+ x => buf.push(x)
+ };
+ }
+ Ok(Lit(MultiLineComment(buf)))
+ },
+ Ok('#') => { // documentation comment
+ input.next()?;
+ while let Ok(x) = input.peek() {
+ match x {
+ '\n' => break,
+ _ => {
+ buf.push(x);
+ input.next()?;
}
}
- },
- '0'..='9' => { // numeric literals!
- buf.push(c);
- while let Some(x) = input.peek() {
- match x {
- 'a'..='z' | 'A'..='Z' | '0'..='9' | '_' => {
- buf.push(*x);
- input.next();
- },
- _ => break
+ }
+ Ok(Lit(DocComment(buf)))
+ },
+ _ => { // standard comment, runs til EOL
+ while let Ok(x) = input.peek() {
+ match x {
+ '\n' => break,
+ _ => {
+ buf.push(x);
+ input.next()?;
}
}
- res.push(Num(String::from(&buf)))
- },
- '-' => { // `-` is special. it can be the *prefix* operator "Negative", or part of a regular operator.
- match input.peek() {
- Some(' ') => res.push(Sep(Minus)),
- _ => res.push(Sep(Negative))
- }
- },
- '(' => { // note: FuncParens were matched above, directly after identifiers
- res.push(Sep(TupleLeftParen));
- state.paren_stack.push(Paren::Tuple);
- },
- '[' => { // note: GenericBrackets were matched above, directly after identifiers
- res.push(Sep(ArrayLeftBracket));
- state.bracket_stack.push(Bracket::Array);
- },
- ')' => { // match parens
- match state.paren_stack.pop() {
- Some(Paren::Func) => res.push(Sep(FuncRightParen)),
- Some(Paren::Tuple) => res.push(Sep(TupleRightParen)),
- None => return Err(MismatchedParens.into()),
- }
- },
- ']' => { // match brackets
- match state.bracket_stack.pop() {
- Some(Bracket::Generic) => res.push(Sep(GenericRightBracket)),
- Some(Bracket::Array) => res.push(Sep(ArrayRightBracket)),
- None => return Err(MismatchedBrackets.into()),
- }
- if input.peek() == Some(&'(') { // parameters following generics
- res.push(Sep(FuncLeftParen));
- state.paren_stack.push(Paren::Func);
- input.next();
- }
+ }
+ Ok(Lit(Comment(buf)))
+ }
+ }
+}
+
+#[allow(unused_assignments)]
+fn lex_identifier(input: &mut Input, state: &mut State, c: char) -> Token {
+ use Keyword::*;
+ let mut buf = String::new();
+ let mut keyword = None;
+ buf.push(c);
+ loop {
+ match input.peek() {
+ Ok(x) if x.is_alphanumeric() || x == '_' => {
+ buf.push(x);
+ let _ = input.next();
},
- '`' => { // backticks are used for operators, so generics/parameters may follow
- res.push(Sep(BackTick)); // todo: backticks could like not be used for operators
- match input.peek() {
- Some('(') => {
- res.push(Sep(FuncLeftParen));
- state.paren_stack.push(Paren::Func);
- input.next();
- },
- Some('[') => {
- res.push(Sep(GenericLeftBracket));
- state.bracket_stack.push(Bracket::Generic);
- input.next();
- },
- _ => {}
- }
+ _ => {
+ keyword = Some(match buf.as_str() { // keywords!
+ "pub" => Key(Pub),
+ "let" => Key(Let),
+ "var" => Key(Var),
+ "const" => Key(Const),
+ "func" => Key(Func),
+ "macro" => Key(Macro),
+ "type" => Key(Type),
+ "mod" => Key(Mod),
+ "use" => Key(Use),
+ "as" => Key(As),
+ "for" => Key(For),
+ "while" => Key(While),
+ "loop" => Key(Loop),
+ "block" => Key(Block),
+ "static" => Key(Static),
+ "if" => Key(If),
+ "when" => Key(When),
+ "elif" => Key(Elif),
+ "else" => Key(Else),
+ "match" => Key(Match),
+ "where" => Key(Where),
+ "try" => Key(Try),
+ "catch" => Key(Catch),
+ "finally" => Key(Finally),
+ "struct" => Key(Struct),
+ "tuple" => Key(Tuple),
+ "enum" => Key(Enum),
+ "union" => Key(Union),
+ "interface" => Key(Interface),
+ "distinct" => Key(Distinct),
+ "ref" => Key(Ref),
+ "ptr" => Key(Ptr),
+ "mut" => Key(Mut),
+ "in" => Key(In),
+ "is" => Key(Is),
+ "of" => Key(Of),
+ "break" => Key(Break),
+ "continue" => Key(Continue),
+ "return" => Key(Return),
+ _ => Ident(String::from(&buf))
+ });
+ break
+ }
+ }
+ }
+ // () and [] denote both parameters/generics and tuples/arrays
+ // we must disambiguate by treating those *directly* after words as such
+ match input.peek() {
+ Ok('(') | Ok('[') => state.next_token_is_parameter_punctuation = true,
+ _ => (),
+ }
+ keyword.unwrap()
+}
+
+fn lex_number(input: &mut Input, c: char) -> Token {
+ let mut buf = String::new();
+ buf.push(c);
+ loop {
+ match input.peek() {
+ Ok(x) if x.is_alphanumeric() => { // FIXME
+ buf.push(x);
+ let _ = input.next();
},
- ',' => res.push(Sep(Comma)),
- '.' => res.push(Sep(Period)),
- ';' => res.push(Sep(Semicolon)),
- ':' => res.push(Sep(Colon)),
- '{' => res.push(Sep(StructLeftBrace)),
- '}' => res.push(Sep(StructRightBrace)),
- '=' => res.push(Sep(Equals)),
- '+' => res.push(Sep(Plus)),
- '*' => res.push(Sep(Times)),
- '/' => res.push(Sep(Slash)),
- '<' => res.push(Sep(LessThan)),
- '>' => res.push(Sep(GreaterThan)),
- '@' => res.push(Sep(At)),
- '$' => res.push(Sep(Sha)),
- '~' => res.push(Sep(Tilde)),
- '&' => res.push(Sep(And)),
- '|' => res.push(Sep(Or)),
- '!' => res.push(Sep(Exclamation)),
- '?' => res.push(Sep(Question)),
- '^' => res.push(Sep(Caret)),
- '\\' => res.push(Sep(Backslash)),
- _ => return Err(UnknownPunctuation.into())
+ _ => break
}
- buf.clear();
}
- Ok(TokenStream(res))
+ Num(buf)
}
impl std::fmt::Display for TokenStream {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
use Token::*;
let mut prev_token = Indent(0);
- for token in &self.0 {
- match (&prev_token, &token) {
- (Word(_), Word(_)) | (Word(_), Num(_)) |
- (Num(_), Word(_)) | (Num(_), Num(_)) => write!(f, " {}", token)?,
+ for (_, token) in &self.0 {
+ match (prev_token, token) { // fixme
+ (Ident(_), Ident(_)) | (Ident(_), Num(_)) |
+ (Num(_), Ident(_)) | (Num(_), Num(_)) |
+ (Key(_), Key(_)) | (Key(_), Ident(_)) | (Ident(_), Key(_))
+ => write!(f, " {}", token)?,
_ => write!(f, "{}", token)?,
}
prev_token = token.clone();
@@ -437,11 +493,11 @@ impl std::fmt::Display for Token {
use Token::*;
match self {
Key(word) => write!(f, "{}", word),
- Word(val) => write!(f, "{}", val),
- Num(val) => write!(f, "{}", val),
+ Ident(val) | Op(val) | Num(val) => write!(f, "{}", val),
Lit(lit) => write!(f, "{}", lit),
Sep(sep) => write!(f, "{}", sep),
Indent(i) => write!(f, "\n{}", " ".repeat(*i)),
+ End => write!(f, "end"),
}
}
}
@@ -526,8 +582,8 @@ impl std::fmt::Display for Punctuation {
GenericRightBracket => write!(f, "]"),
ArrayLeftBracket => write!(f, " ["),
ArrayRightBracket => write!(f, "]"),
- ScopeLeftBrace => write!(f, "{{"),
- ScopeRightBrace => write!(f, "}}"),
+ DependentLeftBrace => write!(f, "{{"),
+ DependentRightBrace => write!(f, "}}"),
StructLeftBrace => write!(f, "{{"),
StructRightBrace => write!(f, "}}"),
Equals => write!(f, "="),
@@ -551,3 +607,9 @@ impl std::fmt::Display for Punctuation {
}
}
}
+
+pub fn test() {
+ let input = std::fs::read_to_string("std/ast.pk").unwrap();
+ let lexed = lex(&input).unwrap();
+ println!("{}", lexed);
+}
diff --git a/src/frontend/parse.rs b/src/frontend/parse.rs
index 8616346..352599b 100644
--- a/src/frontend/parse.rs
+++ b/src/frontend/parse.rs
@@ -9,17 +9,17 @@ use Token::*;
use Literal::*;
use Punctuation::*;
-struct Input(multipeek::MultiPeek<std::vec::IntoIter<Token>>);
+struct Input(multipeek::MultiPeek<std::vec::IntoIter<(usize, Token)>>);
impl Input {
/// Map input.next() to return Results for use with the propagation operator
fn next(&mut self) -> Result<Token> {
- self.0.next().ok_or("end of input".into())
+ self.0.next().map(|x| x.1).ok_or("end of input".into()) // FIXME
}
/// Check if the next character is an expected token, and if so, advance the iterator and return true
- fn peek(&mut self, expected: Token) -> bool {
- if self.0.peek() == Some(&expected) {
+ fn peek_eat(&mut self, expected: Token) -> bool {
+ if self.peek().is_some_and(|x| x == expected) {
self.0.next();
true
} else {
@@ -28,13 +28,13 @@ impl Input {
}
/// Expose input.peek() (we don't want EOF to trigger an error when peeking)
- fn peek_opt(&mut self) -> Option<&Token> {
- self.0.peek()
+ fn peek(&mut self) -> Option<Token> {
+ self.0.peek().map(|x| x.1.clone())
}
/// Expose input.peek_nth()
- fn peek_nth(&mut self, n: usize) -> Option<&Token> {
- self.0.peek_nth(n)
+ fn peek_nth(&mut self, n: usize) -> Option<Token> {
+ self.0.peek_nth(n).map(|x| x.1.clone())
}
/// Asserts the next character to be a known token
@@ -51,9 +51,13 @@ impl Input {
}
}
+fn normalize(input: TokenStream) -> TokenStream {
+ todo!()
+}
+
/// Convert a basic TokenStream into an AbstractSyntaxTree
-pub fn astify(input: TokenStream, name: &str) -> Result<Expr> {
- let mut input = Input(multipeek(input));
+pub fn parse(input: TokenStream, name: &str) -> Result<Expr> {
+ let mut input = Input(multipeek(normalize(input)));
let body = parse_body(&mut input)?;
if input.len() > 0 {
return Err(format!("additional tokens remaining after the body!").into());
@@ -62,18 +66,18 @@ pub fn astify(input: TokenStream, name: &str) -> Result<Expr> {
}
/// Parse a series of Exprs, for ex. the body of a function.
-/// Body ::= Expr | ('{' Expr (';' Expr)* '}')
-fn parse_body(input: &mut Input) -> Result<Vec<Expr>> {
+/// Body ::= Expr (';' Expr)*
+fn parse_body(input: &mut Input) -> Result<Ast> {
let mut res = Vec::new();
- if !input.peek(Sep(ScopeLeftBrace)) {
+ if !input.peek_eat(Sep(Colon)) && !input.peek_eat(Sep(Equals)) {
res.push(parse_expr(input)?);
return Ok(res);
}
- while !input.peek(Sep(ScopeRightBrace)) {
+ while !input.peek_eat(End) {
res.push(parse_expr(input)?);
// consume semicolons. there doesn't *have* to be a semicolon though.
// this should probably be checked to be a semicolon or a right brace.
- input.peek(Sep(Semicolon));
+ input.peek_eat(Sep(Semicolon));
}
Ok(res)
}
@@ -136,7 +140,7 @@ fn parse_var(input: &mut Input) -> Result<Expr> {
let id = parse_pattern(input)?;
let kind = parse_annotation(input)?;
let mut value = None;
- if input.peek(Sep(Equals)) {
+ if input.peek_eat(Sep(Equals)) {
value = Some(Box::new(parse_expr(input)?));
}
Ok(Expr::Binding(Var { id, kind, value }))
@@ -153,7 +157,7 @@ fn parse_const(input: &mut Input, public: bool) -> Result<Expr> {
/// Annotation ::= (':' Type)?
fn parse_annotation(input: &mut Input) -> Result<Option<Type>> {
- if input.peek(Sep(Colon)) {
+ if input.peek_eat(Sep(Colon)) {
Ok(Some(parse_type(input)?))
} else {
Ok(None)
@@ -162,15 +166,14 @@ fn parse_annotation(input: &mut Input) -> Result<Option<Type>> {
/// `Func ::= 'pub'? 'func' Ident ('[' Parameters ']')? ('(' Parameters ')')? Annotation '=' Body`
fn parse_func(input: &mut Input, public: bool) -> Result<Expr> {
- let effect = None;
let id = parse_ident(input)?;
let mut generics = Vec::new();
- if input.peek(Sep(GenericLeftBracket)) {
+ if input.peek_eat(Sep(GenericLeftBracket)) {
generics = parse_parameters(input)?;
input.then(Sep(GenericRightBracket))?;
}
let mut parameters = Vec::new();
- if input.peek(Sep(FuncLeftParen)) { // todo: rewrite to map over an input
+ if input.peek_eat(Sep(FuncLeftParen)) { // todo: rewrite to map over an input
// let temp_parameters = parse_parameters(input)?;
// if temp_parameters.last().is_none() {
// return Err("expected a type annotation on the last function parameter".into());
@@ -183,7 +186,7 @@ fn parse_func(input: &mut Input, public: bool) -> Result<Expr> {
} else {
stack.push(id);
}
- while input.peek(Sep(Comma)) {
+ while input.peek_eat(Sep(Comma)) {
let (id, kind) = parse_parameter(input)?;
stack.push(id);
if kind.is_some() {
@@ -199,29 +202,29 @@ fn parse_func(input: &mut Input, public: bool) -> Result<Expr> {
input.then(Sep(FuncRightParen))?;
}
let mut kind = Type::Void;
- if input.peek(Sep(Colon)) {
+ if input.peek_eat(Sep(Colon)) {
kind = parse_type(input)?;
}
input.then(Sep(Equals))?;
let body = parse_body(input)?;
- Ok(Expr::Binding(Func { public, effect, id, generics, parameters, kind, body }))
+ Ok(Expr::Binding(Func { public, id, generics, parameters, kind, body }))
}
/// `Macro ::= 'pub'? 'macro' Ident ('[' Parameters ']')? ('(' Parameters ')')? (':' Type) '=' Body`
fn parse_macro(input: &mut Input, public: bool) -> Result<Expr> {
let id = parse_ident(input)?;
let mut generics = Vec::new();
- if input.peek(Sep(GenericLeftBracket)) {
+ if input.peek_eat(Sep(GenericLeftBracket)) {
generics = parse_parameters(input)?;
input.then(Sep(GenericRightBracket))?;
}
let mut parameters = Vec::new();
- if input.peek(Sep(FuncLeftParen)) {
+ if input.peek_eat(Sep(FuncLeftParen)) {
parameters = parse_parameters(input)?;
input.then(Sep(FuncRightParen))?;
}
let mut kind = None;
- if input.peek(Sep(Colon)) {
+ if input.peek_eat(Sep(Colon)) {
kind = Some(parse_type(input)?);
}
input.then(Sep(Equals))?;
@@ -233,7 +236,7 @@ fn parse_macro(input: &mut Input, public: bool) -> Result<Expr> {
fn parse_typedecl(input: &mut Input, public: bool) -> Result<Expr> {
let id = parse_ident(input)?;
let mut generics = Vec::new();
- if input.peek(Sep(GenericLeftBracket)) {
+ if input.peek_eat(Sep(GenericLeftBracket)) {
generics = parse_parameters(input)?;
input.then(Sep(GenericRightBracket))?;
}
@@ -246,7 +249,7 @@ fn parse_typedecl(input: &mut Input, public: bool) -> Result<Expr> {
fn parse_parameter(input: &mut Input) -> Result<(Id, Option<Type>)> {
let id = parse_ident(input)?;
let mut kind = None;
- if input.peek(Sep(Colon)) {
+ if input.peek_eat(Sep(Colon)) {
kind = Some(parse_type(input)?);
}
Ok((id, kind))
@@ -256,7 +259,7 @@ fn parse_parameter(input: &mut Input) -> Result<(Id, Option<Type>)> {
fn parse_parameters(input: &mut Input) -> Result<Vec<(Id, Option<Type>)>> {
let mut res = Vec::new();
res.push(parse_parameter(input)?);
- while input.peek(Sep(Comma)) {
+ while input.peek_eat(Sep(Comma)) {
res.push(parse_parameter(input)?);
}
Ok(res)
@@ -281,7 +284,7 @@ fn parse_block(input: &mut Input) -> Result<Expr> {
let body = parse_body(input)?;
Ok(Expr::Control(Block { id, body }))
},
- Word(label) => {
+ Ident(label) => {
input.then(Sep(Colon))?;
let id = Some(label);
let body = parse_body(input)?;
@@ -327,12 +330,12 @@ fn parse_loop(input: &mut Input) -> Result<Expr> {
fn parse_if(input: &mut Input) -> Result<Expr> {
let mut branches = Vec::new();
branches.push(parse_cond_branch(input)?);
- while input.peek(Key(Keyword::Elif)) {
+ while input.peek_eat(Key(Keyword::Elif)) {
branches.push(parse_cond_branch(input)?);
}
- let mut else_body = None;
- if input.peek(Key(Keyword::Else)) {
- else_body = Some(parse_body(input)?);
+ let mut else_body = Vec::new();
+ if input.peek_eat(Key(Keyword::Else)) {
+ else_body = parse_body(input)?;
}
Ok(Expr::Control(If { branches, else_body }))
}
@@ -341,13 +344,13 @@ fn parse_if(input: &mut Input) -> Result<Expr> {
fn parse_when(input: &mut Input) -> Result<Expr> {
let mut branches = Vec::new();
branches.push(parse_cond_branch(input)?);
- while input.peek(Key(Keyword::Elif)) {
+ while input.peek_eat(Key(Keyword::Elif)) {
branches.push(parse_cond_branch(input)?);
}
- let mut else_body = None;
- if input.peek(Key(Keyword::Else)) {
+ let mut else_body = Vec::new();
+ if input.peek_eat(Key(Keyword::Else)) {
input.then(Sep(Colon))?;
- else_body = Some(parse_body(input)?);
+ else_body = parse_body(input)?;
}
let mut body = Vec::new();
body.push(Expr::Control(If { branches, else_body }));
@@ -367,20 +370,20 @@ fn parse_try(input: &mut Input) -> Result<Expr> {
input.then(Sep(Colon))?;
let body = parse_body(input)?;
let mut catches = Vec::new();
- while input.peek(Key(Keyword::Catch)) {
+ while input.peek_eat(Key(Keyword::Catch)) {
let mut exceptions = Vec::new();
exceptions.push(parse_catch_exception(input)?);
- while input.peek(Sep(Comma)) {
+ while input.peek_eat(Sep(Comma)) {
exceptions.push(parse_catch_exception(input)?);
}
input.then(Sep(Colon))?;
let body = parse_body(input)?;
catches.push(CatchBranch { exceptions, body });
}
- let mut finally = None;
- if input.peek(Key(Keyword::Finally)) {
+ let mut finally = Vec::new();
+ if input.peek_eat(Key(Keyword::Finally)) {
input.then(Sep(Colon))?;
- finally = Some(parse_body(input)?);
+ finally = parse_body(input)?;
}
Ok(Expr::Control(Try { body, catches, finally }))
}
@@ -389,7 +392,7 @@ fn parse_try(input: &mut Input) -> Result<Expr> {
fn parse_catch_exception(input: &mut Input) -> Result<(Id, Option<Id>)> {
let id = parse_ident(input)?;
let mut alias = None;
- if input.peek(Key(Keyword::As)) {
+ if input.peek_eat(Key(Keyword::As)) {
alias = Some(parse_ident(input)?);
}
Ok((id, alias))
@@ -399,14 +402,14 @@ fn parse_catch_exception(input: &mut Input) -> Result<(Id, Option<Id>)> {
fn parse_match(input: &mut Input) -> Result<Expr> {
let item = parse_pattern(input)?; // fixme
let mut branches = Vec::new();
- while input.peek(Key(Keyword::Of)) {
+ while input.peek_eat(Key(Keyword::Of)) {
let mut patterns = Vec::new();
patterns.push(parse_pattern(input)?);
- while input.peek(Sep(Comma)) {
+ while input.peek_eat(Sep(Comma)) {
patterns.push(parse_pattern(input)?);
}
let mut guard = None;
- if input.peek(Key(Keyword::Where)) {
+ if input.peek_eat(Key(Keyword::Where)) {
guard = Some(parse_expr(input)?)
}
input.then(Sep(Colon))?;
@@ -437,11 +440,11 @@ fn parse_type(input: &mut Input) -> Result<Type> {
Keyword::Interface => parse_interface(input),
_ => return Err("invalid keyword present in type!".into())
},
- Word(id) => {
+ Ident(id) => {
let mut generics = Vec::new();
- if input.peek(Sep(GenericLeftBracket)) {
+ if input.peek_eat(Sep(GenericLeftBracket)) {
generics.push(parse_type(input)?);
- while input.peek(Sep(Comma)) {
+ while input.peek_eat(Sep(Comma)) {
generics.push(parse_type(input)?);
}
input.then(Sep(GenericRightBracket))?;
@@ -455,9 +458,9 @@ fn parse_type(input: &mut Input) -> Result<Type> {
/// `StructType ::= 'struct' ('[' Ident ':' Type (',' Ident ':' Type)* ']')?`
fn parse_struct_type(input: &mut Input) -> Result<Type> {
let mut res = Vec::new();
- if input.peek(Sep(GenericLeftBracket)) {
+ if input.peek_eat(Sep(GenericLeftBracket)) {
res.push(parse_struct_field(input)?);
- while input.peek(Sep(Comma)) {
+ while input.peek_eat(Sep(Comma)) {
res.push(parse_struct_field(input)?);
}
input.then(Sep(GenericRightBracket))?;
@@ -465,19 +468,19 @@ fn parse_struct_type(input: &mut Input) -> Result<Type> {
Ok(Type::Struct(res))
}
-fn parse_struct_field(input: &mut Input) -> Result<(Id, Box<Type>)> {
+fn parse_struct_field(input: &mut Input) -> Result<(Id, Type)> {
let id = parse_ident(input)?;
input.then(Sep(Colon))?;
- let kind = Box::new(parse_type(input)?);
+ let kind = parse_type(input)?;
Ok((id, kind))
}
/// `TupleType ::= 'tuple' ('[' (Ident ':')? Type (',' (Ident ':')? Type)* ']')?`
fn parse_tuple_type(input: &mut Input) -> Result<Type> {
let mut res = Vec::new();
- if input.peek(Sep(GenericLeftBracket)) {
+ if input.peek_eat(Sep(GenericLeftBracket)) {
res.push(parse_tuple_field(input)?);
- while input.peek(Sep(Comma)) {
+ while input.peek_eat(Sep(Comma)) {
res.push(parse_tuple_field(input)?);
}
input.then(Sep(GenericRightBracket))?;
@@ -486,23 +489,23 @@ fn parse_tuple_type(input: &mut Input) -> Result<Type> {
}
// annoyingly complex to parse. `TupleField ::= (Ident ':')? Type`
-fn parse_tuple_field(input: &mut Input) -> Result<(Option<Id>, Box<Type>)> {
- match input.peek_opt().clone() {
- Some(Word(id)) if input.peek_nth(1) == Some(&Sep(Colon)) => {
+fn parse_tuple_field(input: &mut Input) -> Result<(Option<Id>, Type)> {
+ match input.peek().clone() {
+ Some(Ident(id)) if input.peek_nth(1) == Some(Sep(Colon)) => {
input.next()?;
input.then(Sep(Colon))?;
- Ok((Some(id.to_string()), Box::new(parse_type(input)?)))
+ Ok((Some(id.to_string()), parse_type(input)?))
},
- _ => Ok((None, Box::new(parse_type(input)?)))
+ _ => Ok((None, parse_type(input)?))
}
}
/// `EnumType ::= 'enum' ('[' Ident ('=' Pattern)? (Ident ('=' Pattern)?)* ']')?`
fn parse_enum_type(input: &mut Input) -> Result<Type> {
let mut res = Vec::new();
- if input.peek(Sep(GenericLeftBracket)) {
+ if input.peek_eat(Sep(GenericLeftBracket)) {
res.push(parse_enum_variant(input)?);
- while input.peek(Sep(Comma)) {
+ while input.peek_eat(Sep(Comma)) {
res.push(parse_enum_variant(input)?);
}
input.then(Sep(GenericRightBracket))?;
@@ -513,7 +516,7 @@ fn parse_enum_type(input: &mut Input) -> Result<Type> {
fn parse_enum_variant(input: &mut Input) -> Result<(Id, Option<Pattern>)> {
let id = parse_ident(input)?;
let mut kind = None;
- if input.peek(Sep(Equals)) {
+ if input.peek_eat(Sep(Equals)) {
kind = Some(parse_pattern(input)?);
}
Ok((id, kind))
@@ -522,9 +525,9 @@ fn parse_enum_variant(input: &mut Input) -> Result<(Id, Option<Pattern>)> {
/// `UnionType ::= 'union' ('[' Ident (':' Type)? (',' Ident (':' Type)?)* ']')?`
fn parse_union_type(input: &mut Input) -> Result<Type> {
let mut res = Vec::new();
- if input.peek(Sep(GenericLeftBracket)) {
+ if input.peek_eat(Sep(GenericLeftBracket)) {
res.push(parse_union_variant(input)?);
- while input.peek(Sep(Comma)) {
+ while input.peek_eat(Sep(Comma)) {
res.push(parse_union_variant(input)?);
}
input.then(Sep(GenericRightBracket))?;
@@ -532,11 +535,11 @@ fn parse_union_type(input: &mut Input) -> Result<Type> {
Ok(Type::Union(res))
}
-fn parse_union_variant(input: &mut Input) -> Result<(Id, Box<Type>)> {
+fn parse_union_variant(input: &mut Input) -> Result<(Id, Type)> {
let id = parse_ident(input)?;
- let mut kind = Box::new(Type::Alias { id: "unit".to_string(), generics: Vec::new() });
- if input.peek(Sep(Colon)) {
- kind = Box::new(parse_type(input)?);
+ let mut kind = Type::Alias { id: "unit".to_string(), generics: Vec::new() };
+ if input.peek_eat(Sep(Colon)) {
+ kind = parse_type(input)?;
}
Ok((id, kind))
}
@@ -544,9 +547,9 @@ fn parse_union_variant(input: &mut Input) -> Result<(Id, Box<Type>)> {
/// `Interface ::= 'interface' ('[' Signature (',' Signature)* ']')?`
fn parse_interface(input: &mut Input) -> Result<Type> {
let mut res = Vec::new();
- if input.peek(Sep(GenericLeftBracket)) {
+ if input.peek_eat(Sep(GenericLeftBracket)) {
res.push(parse_signature(input)?);
- while input.peek(Sep(Comma)) {
+ while input.peek_eat(Sep(Comma)) {
res.push(parse_signature(input)?);
}
input.then(Sep(GenericRightBracket))?;
@@ -556,31 +559,30 @@ fn parse_interface(input: &mut Input) -> Result<Type> {
/// `Signature ::= Ident ('[' Parameters ']')? ('(' Type (',' Type)* ')')? (':' Type)?`
fn parse_signature(input: &mut Input) -> Result<Sig> {
- let effect = None;
let id = parse_ident(input)?;
let mut generics = Vec::new();
- if input.peek(Sep(GenericLeftBracket)) {
+ if input.peek_eat(Sep(GenericLeftBracket)) {
generics = parse_parameters(input)?;
input.then(Sep(GenericRightBracket))?;
}
let mut parameters = Vec::new();
- if input.peek(Sep(FuncLeftParen)) {
+ if input.peek_eat(Sep(FuncLeftParen)) {
parameters.push(parse_type(input)?);
- if input.peek(Sep(Comma)) {
+ if input.peek_eat(Sep(Comma)) {
parameters.push(parse_type(input)?);
}
input.then(Sep(FuncRightParen))?;
}
- let mut kind = None;
- if input.peek(Sep(Colon)) {
- kind = Some(parse_type(input)?);
+ let mut kind = Type::Void;
+ if input.peek_eat(Sep(Colon)) {
+ kind = parse_type(input)?;
}
- Ok(Sig { effect, id, generics, parameters, kind })
+ Ok(Sig { id, generics, parameters, kind })
}
/// `WrappedType ::= Type | ('[' Type ']')`
fn parse_wrapped_type(input: &mut Input) -> Result<Type> {
- if input.peek(Sep(GenericLeftBracket)) {
+ if input.peek_eat(Sep(GenericLeftBracket)) {
let result = parse_type(input)?;
input.then(Sep(GenericRightBracket))?;
Ok(result)
@@ -589,12 +591,60 @@ fn parse_wrapped_type(input: &mut Input) -> Result<Type> {
}
}
-/// Pattern ::= Literal | Ident | '(' Pattern (',' Pattern)* ')' | Ident '(' Pattern (',' Pattern)* ')'
-fn parse_pattern(input: &mut Input) -> Result<Pattern> { todo!() }
+/// Pattern ::= Char | String | Number | Float | Ident |
+/// '(' Pattern (',' Pattern)* ')' | Ident '(' Pattern (',' Pattern)* ')'
+fn parse_pattern(input: &mut Input) -> Result<Pattern> {
+ // use Pattern::*;
+ match input.next()? {
+ Key(_) => todo!(),
+ Ident(id) => {
+ if input.peek() == Some(Sep(FuncLeftParen)) {
+ Ok(Pattern::Struct(todo!()))
+ } else {
+ Ok(Pattern::Ident(id))
+ }
+ },
+ Sep(FuncLeftParen) => { // fixme
+ parse_pattern(input)?;
+ while input.peek() == Some(Sep(Comma)) {
+ parse_pattern(input)?;
+ }
+ input.then(Sep(FuncRightParen))?;
+ todo!()
+ },
+ Num(value) => Ok(Pattern::Number(todo!())),
+ Lit(val) => match val {
+ Literal::Char(val) => Ok(Pattern::Char(val.parse::<char>()?)),
+ SingleLineString(val) | MultiLineString(val) => Ok(Pattern::String(val)),
+ token => Err(format!("expected literal but found token {}", token).into())
+ },
+ Sep(_) => todo!(),
+ _ => todo!()
+ }
+}
+
+// fine to not parse operators until here tbh
+fn parse_operator(input: &mut Input) -> Result<Id> {
+ match input.next()? {
+ Key(word) => Ok(word.to_string()),
+ Sep(tok) => {
+ let mut buf = String::new();
+ // buf.push(tok);
+ loop {
+ match input.peek() {
+ Some(Key(_)) => todo!(),
+ _ => break
+ }
+ }
+ Ok(buf)
+ },
+ token => Err(format!("expected operator but found token {}", token).into())
+ }
+}
fn parse_ident(input: &mut Input) -> Result<Id> {
match input.next()? {
- Word(id) => Ok(id),
+ Ident(id) => Ok(id),
token => Err(format!("expected identifier but found token {}", token).into())
}
}