aboutsummaryrefslogtreecommitdiff
path: root/src/frontend
diff options
context:
space:
mode:
authorJJ2023-10-31 09:49:41 +0000
committerJJ2023-10-31 09:50:21 +0000
commit1c14500ed698f1dc21b4b634a174af89b6318b07 (patch)
tree9dad300eec0585e6ee0d41cd0f8740da2e269a28 /src/frontend
parent87d74952a614daa7075aeecef462ff51c4dc46e0 (diff)
compiler: restructure codebase
Diffstat (limited to 'src/frontend')
-rw-r--r--src/frontend/ast.rs116
-rw-r--r--src/frontend/lex.rs542
-rw-r--r--src/frontend/mod.rs3
-rw-r--r--src/frontend/parse.rs297
4 files changed, 958 insertions, 0 deletions
diff --git a/src/frontend/ast.rs b/src/frontend/ast.rs
new file mode 100644
index 0000000..6c7963e
--- /dev/null
+++ b/src/frontend/ast.rs
@@ -0,0 +1,116 @@
+/// Representation of Tokens, function names, the like...
+pub type Id = String;
+
+/// Puck's fundamental types.
+pub enum Type {
+ Void, Never,
+ Integer, Float, String, // char et al are defined later
+ Func{from: Box<Type>, to: Box<Type>}, // todo: multiple params, effects
+ Struct(Vec<(Id, Box<Type>)>),
+ Tuple(Vec<(Option<String>, Box<Type>)>),
+ Union(Vec<(Id, Box<Type>)>),
+ Interface {
+ funcs: Vec<Sig>,
+ for_type: Option<Box<Type>>,
+ },
+ Array{size: usize, kind: Box<Type>},
+ List(Box<Type>),
+ Slice(Box<Type>), // todo: plus ownership
+ Reference(Box<Type>),
+ Mutable(Box<Type>), // parameters only
+ Static(Box<Type>), // parameters only
+ Alias{ id: Id, params: Vec<Type> }, // todo: this is wrong
+}
+
+/// Function signatures.
+pub struct Sig {
+ effect: Option<Id>,
+ id: Id,
+ generics: Vec<(Id, Option<Type>)>,
+ params: Vec<Type>,
+ result: Option<Type>
+}
+
+/// Patterns are recognizable given zero context.
+/// This is why there is a generic Number term and no Bool term.
+/// Also can be considered to be a Term/Value.
+pub enum Pattern {
+ Ident(Id), // type aliases, union variants, calls...
+ Number(i64), Float(f64),
+ Char(char), String(String),
+ Struct(Vec<StructPattern>),
+ Tuple(Vec<TuplePattern>),
+ List(Vec<Expr>), // arrays, slices, lists
+}
+
+pub struct StructPattern { field: Id, value: Expr }
+pub struct TuplePattern { field: Option<Id>, value: Expr }
+
+/// Expressions introduce a new binding or bindings, in some regard.
+pub enum Binding {
+ Let {
+ id: Pattern, // id: Pattern supports ex. `let (a, b) = ...`
+ kind: Option<Type>,
+ value: Box<Expr>
+ },
+ Var {
+ id: Pattern,
+ kind: Option<Type>,
+ value: Option<Box<Expr>> // variable bindings can be delayed
+ },
+ Const {
+ public: bool,
+ id: Pattern,
+ kind: Option<Type>,
+ value: Box<Expr>
+ },
+ FuncDecl {
+ public: bool,
+ effect: Option<Id>,
+ id: Id,
+ generics: Vec<GenericDecl>,
+ params: Vec<ParamDecl>,
+ kind: Type,
+ body: Vec<Expr>
+ },
+ TypeDecl { id: Id, generics: Vec<Id>, alias: Type },
+ Import { from: Option<Id>, imports: Vec<Id>, alias: Option<Id> },
+ Module { id: Id, body: Vec<Expr> },
+}
+
+pub struct GenericDecl { id: Id, kind: Option<Type> }
+pub struct ParamDecl { id: Id, kind: Type }
+
+/// Expressions related to control flow.
+pub enum Control {
+ Call { id: Id, params: Vec<Expr> }, // function calls, macro invocations, field access...
+ If {
+ branches: Vec<CondBranch>,
+ else_body: Option<Vec<Expr>>
+ },
+ Try {
+ body: Vec<Expr>,
+ catches: Vec<CatchBranch>,
+ finally: Option<Vec<Expr>>
+ },
+ Match {
+ item: Pattern,
+ branches: Vec<MatchBranch>
+ },
+ Block { id: Option<Id>, body: Vec<Expr> },
+ Static { body: Vec<Expr> },
+ For { binding: Pattern, range: Box<Expr>, body: Vec<Expr> },
+ While { cond: Box<Expr>, body: Vec<Expr> },
+ Loop { body: Vec<Expr> },
+}
+
+pub struct CondBranch { cond: Expr, body: Vec<Expr> }
+pub struct CatchBranch { exceptions: Vec<Id>, binding: Option<Id>, body: Vec<Expr> }
+pub struct MatchBranch { pattern: Pattern, guard: Option<Expr>, body: Vec<Expr> }
+
+/// Expressions are either Patterns, Bindings, or Control flow constructs.
+pub enum Expr {
+ Pattern(Pattern),
+ Binding(Binding),
+ Control(Control),
+}
diff --git a/src/frontend/lex.rs b/src/frontend/lex.rs
new file mode 100644
index 0000000..771ba38
--- /dev/null
+++ b/src/frontend/lex.rs
@@ -0,0 +1,542 @@
+use multipeek::multipeek;
+
+pub type Result<T> = core::result::Result<T, Box<dyn std::error::Error>>;
+pub struct TokenStream(Vec<Token>);
+
+impl IntoIterator for TokenStream {
+ type Item = Token;
+ type IntoIter = std::vec::IntoIter<Token>;
+
+ fn into_iter(self) -> Self::IntoIter {
+ self.0.into_iter()
+ }
+}
+
+#[derive(Clone, PartialEq, Debug)]
+pub enum LexicalError {
+ InvalidIndentation,
+ MismatchedParens,
+ MismatchedBrackets,
+ UnknownPunctuation,
+}
+
+impl std::fmt::Display for LexicalError {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ write!(f, "{:?}", self)
+ }
+}
+impl std::error::Error for LexicalError {}
+
+/// **Basic** syntax tokens. Form an unambiguous TokenStream.
+#[derive(Clone, PartialEq)]
+pub enum Token {
+ Key(Keyword), // keyword identifiers.
+ Word(String), // non-keyword identifiers.
+ Num(String), // numeric value, ex. 413, 0b101011, 0xabcd
+ Lit(Literal), // literal value, ex. for strings/comments.
+ Sep(Punctuation), // punctuation. non-word tokens. operators are lexed as this and later transformed to words.
+ Indent(usize), // indentation. denotes line breaks and scope at which a line starts.
+}
+
+#[derive(Clone, PartialEq)]
+pub enum Literal {
+ Char(String),
+ SingleLineString(String),
+ MultiLineString(String),
+ Comment(String),
+ DocComment(String),
+ MultiLineComment(String),
+}
+
+/// Keywords, made explicit for easier use with Rust.
+/// (strings inside match patterns are fucky!!)
+#[derive(Clone, PartialEq)]
+pub enum Keyword {
+ Pub, Let, Var, Const,
+ Func, Macro, Type,
+ Mod, From, Import,
+ For, While, Loop,
+ Block, Static,
+ If, When, Elif, Else, Match,
+ Try, Catch, Finally,
+ Struct, Tuple, Enum, Union, Interface,
+ Distinct, Ref, // todo: Mut once figured out
+ Break, Continue, Return,
+ In, Is, Of, As,
+}
+
+/// All punctuation recognized by the lexer.
+/// Note the distinction between FuncLeftParen and TupleLeftParen.
+#[derive(Clone, PartialEq)]
+pub enum Punctuation {
+ Comma, // ,
+ Period, // .
+ Semicolon, // ;
+ Colon, // :
+ BackTick, // `
+ SingleQuote, // '
+ DoubleQuote, // "
+ FuncLeftParen, // (
+ FuncRightParen, // )
+ TupleLeftParen, // (
+ TupleRightParen, // )
+ GenericLeftBracket, // [
+ GenericRightBracket, // ]
+ ArrayLeftBracket, // [
+ ArrayRightBracket, // ]
+ StructLeftBrace, // }
+ StructRightBrace, // }
+ Equals, // =
+ Plus, // +
+ Minus, // distinction between minus and negative.
+ Negative, // negative binds tightly: there is no whitespace following.
+ Times, // *
+ Slash, // /
+ LessThan, // <
+ GreaterThan, // >
+ At, // @
+ Sha, // $
+ Tilde, // ~
+ And, // &
+ Percent, // %
+ Or, // |
+ Exclamation, // !
+ Question, // ?
+ Caret, // ^
+ Backslash, // \
+}
+
+/// Parses whitespace-sensitive code into an unambiguous TokenStream.
+/// Also useful for formatting.
+pub fn tokenize(input: &str) -> Result<TokenStream> {
+ // The design of this lexer utilizes to great extent multipeek's arbitrary peeking.
+ // Tokens are matched by looping within their case until complete.
+ // This then eliminates the need for most global parser state. (i hate state)
+
+ use Token::*;
+ use Literal::*;
+ use Punctuation::*;
+ use LexicalError::*;
+ enum Paren { Func, Tuple }
+ enum Bracket { Generic, Array }
+ struct State {
+ start_of_line: bool,
+ paren_stack: Vec<Paren>,
+ bracket_stack: Vec<Bracket>,
+ }
+
+ let mut state = State {
+ start_of_line: true,
+ paren_stack: vec!(),
+ bracket_stack: vec!(),
+ };
+
+ let mut buf = String::new();
+ let mut res = Vec::new();
+
+ // `char` in rust is four bytes it's fine
+ let mut input = multipeek(input.chars());
+ while let Some(c) = input.next() {
+ match c {
+ ' ' => { // indentation! and whitespace
+ match res.last() {
+ Some(Indent(_)) => { // indentation!
+ res.pop(); // discard previous empty or useless Indent token
+ let mut current_indent_level = 1;
+ while let Some(x) = input.peek() {
+ match x {
+ ' ' => { current_indent_level += 1; input.next(); },
+ _ => match res.last() { // indentation ends
+ Some(Word(a)) if a == "==" || a == "and" || a == "or" ||
+ a == "xor" || a == "in" || a == "is" => break,
+ Some(Sep(FuncLeftParen)) | Some(Sep(TupleLeftParen)) |
+ Some(Sep(GenericLeftBracket)) | Some(Sep(ArrayLeftBracket)) |
+ Some(Sep(StructLeftBrace)) | Some(Sep(Comma)) => break,
+ _ => {
+ res.push(Indent(current_indent_level));
+ break;
+ }
+ }
+ }
+ }
+ },
+ _ => { // get rid of excess (all) whitespace between words/operators
+ while input.peek().is_some_and(|x| x.is_whitespace() && x != &'\n') { input.next(); }
+ }
+ }
+ },
+ '\t' => return Err(InvalidIndentation.into()),
+ '\n' => res.push(Indent(0)),
+ '\'' => { // chars!
+ while let Some(x) = input.next() {
+ match x {
+ '\'' => break,
+ '\\' => if let Some(y) = input.next() { buf.push(y) },
+ _ => buf.push(x)
+ }
+ }
+ res.push(Lit(Char(String::from(&buf))));
+ },
+ '"' => { // strings!
+ match (input.peek_nth(0).copied(), input.peek_nth(1).copied()) {
+ (Some('"'), Some('"')) => { // triple quoted strings
+ input.next(); input.next();
+ while let Some(x) = input.next() {
+ match x {
+ '"' if input.peek_nth(0) == Some(&'"') &&
+ input.peek_nth(1) == Some(&'"') => {
+ input.next(); input.next();
+ break;
+ },
+ _ => buf.push(x)
+ }
+ }
+ res.push(Lit(MultiLineString(String::from(&buf))));
+ },
+ (_, _) => { // single quoted strings
+ while let Some(x) = input.next() {
+ match x {
+ '"' => break,
+ '\\' => if let Some(y) = input.next() { buf.push(y) },
+ _ => buf.push(x)
+ }
+ }
+ res.push(Lit(SingleLineString(String::from(&buf))));
+ }
+ }
+ },
+ '#' => { // comments!
+ match input.peek() {
+ Some('[') => { // block comment, can be nested
+ input.next();
+ let mut comment_level = 1;
+ while let Some(x) = input.next() && comment_level > 0 {
+ match x {
+ '#' if input.peek() == Some(&'[') => {
+ comment_level += 1;
+ input.next();
+ },
+ ']' if input.peek() == Some(&'#') => {
+ comment_level -= 1;
+ input.next();
+ },
+ _ => buf.push(x)
+ }
+ }
+ res.push(Lit(MultiLineComment(String::from(&buf))));
+ },
+ Some(&'#') => { // documentation comment
+ input.next();
+ while let Some(x) = input.peek() {
+ match x {
+ '\n' => break,
+ _ => {
+ buf.push(*x);
+ }
+ }
+ input.next();
+ }
+ res.push(Lit(DocComment(String::from(&buf))));
+ },
+ _ => { // standard comment, runs til EOL
+ while let Some(x) = input.peek() {
+ match x {
+ '\n' => break,
+ _ => {
+ buf.push(*x);
+ }
+ }
+ input.next();
+ }
+ res.push(Lit(Comment(String::from(&buf))));
+ }
+ }
+ },
+ c if c.is_alphabetic() || c == '_' => { // valid identifiers!
+ buf.push(c);
+ while let Some(x) = input.peek() {
+ match x {
+ x if x.is_alphanumeric() || x == &'_' => {
+ buf.push(*x);
+ input.next();
+ },
+ _ => {
+ use Keyword::*;
+ match buf.as_str() { // keywords!
+ "pub" => res.push(Key(Pub)),
+ "let" => res.push(Key(Let)),
+ "var" => res.push(Key(Var)),
+ "const" => res.push(Key(Const)),
+ "func" => res.push(Key(Func)),
+ "macro" => res.push(Key(Macro)),
+ "type" => res.push(Key(Type)),
+ "mod" => res.push(Key(Mod)),
+ "from" => res.push(Key(From)),
+ "import" => res.push(Key(Import)),
+ "for" => res.push(Key(For)),
+ "while" => res.push(Key(While)),
+ "loop" => res.push(Key(Loop)),
+ "block" => res.push(Key(Block)),
+ "static" => res.push(Key(Static)),
+ "if" => res.push(Key(If)),
+ "when" => res.push(Key(When)),
+ "elif" => res.push(Key(Elif)),
+ "else" => res.push(Key(Else)),
+ "match" => res.push(Key(Match)),
+ "try" => res.push(Key(Try)),
+ "catch" => res.push(Key(Catch)),
+ "finally" => res.push(Key(Finally)),
+ "struct" => res.push(Key(Struct)),
+ "tuple" => res.push(Key(Tuple)),
+ "enum" => res.push(Key(Enum)),
+ "union" => res.push(Key(Union)),
+ "interface" => res.push(Key(Interface)),
+ "distinct" => res.push(Key(Distinct)),
+ "ref" => res.push(Key(Ref)),
+ "break" => res.push(Key(Break)),
+ "continue" => res.push(Key(Continue)),
+ "return" => res.push(Key(Return)),
+ "in" => res.push(Key(In)),
+ "is" => res.push(Key(Is)),
+ "of" => res.push(Key(Of)),
+ "as" => res.push(Key(As)),
+ _ => res.push(Word(String::from(&buf)))
+ }
+ match x { // () and [] denote both parameters/generics and tuples/arrays
+ '(' => { // we must disambiguate by treating those *directly* after words as such
+ res.push(Sep(FuncLeftParen));
+ state.paren_stack.push(Paren::Func);
+ input.next();
+ },
+ '[' => {
+ res.push(Sep(GenericLeftBracket));
+ state.bracket_stack.push(Bracket::Generic);
+ input.next();
+ },
+ _ => {},
+ }
+ break;
+ }
+ }
+ }
+ },
+ '0'..='9' => { // numeric literals!
+ buf.push(c);
+ while let Some(x) = input.peek() {
+ match x {
+ 'a'..='z' | 'A'..='Z' | '0'..='9' | '_' => {
+ buf.push(*x);
+ input.next();
+ },
+ _ => break
+ }
+ }
+ res.push(Num(String::from(&buf)))
+ },
+ '-' => { // `-` is special. it can be the *prefix* operator "Negative", or part of a regular operator.
+ match input.peek() {
+ Some(' ') => res.push(Sep(Minus)),
+ _ => res.push(Sep(Negative))
+ }
+ },
+ '(' => { // note: FuncParens were matched above, directly after identifiers
+ res.push(Sep(TupleLeftParen));
+ state.paren_stack.push(Paren::Tuple);
+ },
+ '[' => { // note: GenericBrackets were matched above, directly after identifiers
+ res.push(Sep(ArrayLeftBracket));
+ state.bracket_stack.push(Bracket::Array);
+ },
+ ')' => {
+ match state.paren_stack.pop() {
+ Some(Paren::Func) => res.push(Sep(FuncRightParen)),
+ Some(Paren::Tuple) => res.push(Sep(TupleRightParen)),
+ None => return Err(MismatchedParens.into()),
+ }
+ },
+ ']' => {
+ match state.bracket_stack.pop() {
+ Some(Bracket::Generic) => res.push(Sep(GenericRightBracket)),
+ Some(Bracket::Array) => res.push(Sep(ArrayRightBracket)),
+ None => return Err(MismatchedBrackets.into()),
+ }
+ if input.peek() == Some(&'(') { // parameters following generics
+ res.push(Sep(FuncLeftParen));
+ state.paren_stack.push(Paren::Func);
+ input.next();
+ }
+ },
+ '`' => {
+ res.push(Sep(BackTick));
+ match input.peek() {
+ Some('(') => {
+ res.push(Sep(FuncLeftParen));
+ state.paren_stack.push(Paren::Func);
+ input.next();
+ },
+ Some('[') => {
+ res.push(Sep(GenericLeftBracket));
+ state.bracket_stack.push(Bracket::Generic);
+ input.next();
+ },
+ _ => {}
+ }
+ },
+ ',' => res.push(Sep(Comma)),
+ '.' => res.push(Sep(Period)),
+ ';' => res.push(Sep(Semicolon)),
+ ':' => res.push(Sep(Colon)),
+ '{' => res.push(Sep(StructLeftBrace)),
+ '}' => res.push(Sep(StructRightBrace)),
+ '=' => res.push(Sep(Equals)),
+ '+' => res.push(Sep(Plus)),
+ '*' => res.push(Sep(Times)),
+ '/' => res.push(Sep(Slash)),
+ '<' => res.push(Sep(LessThan)),
+ '>' => res.push(Sep(GreaterThan)),
+ '@' => res.push(Sep(At)),
+ '$' => res.push(Sep(Sha)),
+ '~' => res.push(Sep(Tilde)),
+ '&' => res.push(Sep(And)),
+ '|' => res.push(Sep(Or)),
+ '!' => res.push(Sep(Exclamation)),
+ '?' => res.push(Sep(Question)),
+ '^' => res.push(Sep(Caret)),
+ '\\' => res.push(Sep(Backslash)),
+ _ => return Err(UnknownPunctuation.into())
+ }
+ buf.clear();
+ }
+ Ok(TokenStream(res))
+}
+
+impl std::fmt::Display for TokenStream {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ use Token::*;
+ let mut prev_token = Indent(0);
+ for token in &self.0 {
+ match (&prev_token, &token) {
+ (Word(_), Word(_)) | (Word(_), Num(_)) |
+ (Num(_), Word(_)) | (Num(_), Num(_)) => write!(f, " {}", token)?,
+ _ => write!(f, "{}", token)?,
+ }
+ prev_token = token.clone();
+ }
+ Ok(())
+ }
+}
+
+impl std::fmt::Display for Token {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ use Token::*;
+ match self {
+ Key(word) => write!(f, "{}", word),
+ Word(val) => write!(f, "{}", val),
+ Num(val) => write!(f, "{}", val),
+ Lit(lit) => write!(f, "{}", lit),
+ Sep(sep) => write!(f, "{}", sep),
+ Indent(i) => write!(f, "\n{}", " ".repeat(*i)),
+ }
+ }
+}
+
+impl std::fmt::Display for Literal {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ use Literal::*;
+ match self {
+ Char(val) => write!(f, "'{}'", val),
+ SingleLineString(val) => write!(f, "\"{}\"", val),
+ MultiLineString(val) => write!(f, "\"\"\"{}\"\"\"", val),
+ Comment(val) => write!(f, "#{}", val),
+ DocComment(val) => write!(f, "##{}", val),
+ MultiLineComment(val) => write!(f, "#[{}]#", val),
+ }
+ }
+}
+
+impl std::fmt::Display for Keyword {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ use Keyword::*;
+ match self {
+ Pub => write!(f, "pub"),
+ Let => write!(f, "let"),
+ Var => write!(f, "var"),
+ Const => write!(f, "const"),
+ Func => write!(f, "func"),
+ Macro => write!(f, "macro"),
+ Type => write!(f, "type"),
+ Mod => write!(f, "mod"),
+ From => write!(f, "from"),
+ Import => write!(f, "import"),
+ For => write!(f, "for"),
+ While => write!(f, "while"),
+ Loop => write!(f, "loop"),
+ Block => write!(f, "block"),
+ Static => write!(f, "static"),
+ If => write!(f, "if"),
+ When => write!(f, "when"),
+ Elif => write!(f, "elif"),
+ Else => write!(f, "else"),
+ Match => write!(f, "match"),
+ Try => write!(f, "try"),
+ Catch => write!(f, "catch"),
+ Finally => write!(f, "finally"),
+ Struct => write!(f, "struct"),
+ Tuple => write!(f, "tuple"),
+ Enum => write!(f, "enum"),
+ Union => write!(f, "union"),
+ Interface => write!(f, "interface"),
+ Distinct => write!(f, "distinct"),
+ Ref => write!(f, "ref"),
+ Break => write!(f, "break"),
+ Continue => write!(f, "continue"),
+ Return => write!(f, "return"),
+ In => write!(f, "in"),
+ Is => write!(f, "is"),
+ Of => write!(f, "of"),
+ As => write!(f, "as"),
+ }
+ }
+}
+impl std::fmt::Display for Punctuation {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ use Punctuation::*;
+ match self {
+ Comma => write!(f, ","),
+ Period => write!(f, "."),
+ Semicolon => write!(f, ";"),
+ Colon => write!(f, ":"),
+ BackTick => write!(f, "`"),
+ SingleQuote => write!(f, "'"),
+ DoubleQuote => write!(f, "\""),
+ FuncLeftParen => write!(f, "("),
+ FuncRightParen => write!(f, ")"),
+ TupleLeftParen => write!(f, " ("),
+ TupleRightParen => write!(f, ")"),
+ GenericLeftBracket => write!(f, "["),
+ GenericRightBracket => write!(f, "]"),
+ ArrayLeftBracket => write!(f, " ["),
+ ArrayRightBracket => write!(f, "]"),
+ StructLeftBrace => write!(f, "{{"),
+ StructRightBrace => write!(f, "}}"),
+ Equals => write!(f, "="),
+ Plus => write!(f, "+"),
+ Minus => write!(f, "- "),
+ Negative => write!(f, "-"),
+ Times => write!(f, "*"),
+ Slash => write!(f, "/"),
+ LessThan => write!(f, "<"),
+ GreaterThan => write!(f, ">"),
+ At => write!(f, "@"),
+ Sha => write!(f, "$"),
+ Tilde => write!(f, "~"),
+ And => write!(f, "&"),
+ Percent => write!(f, "%"),
+ Or => write!(f, "|"),
+ Exclamation => write!(f, "!"),
+ Question => write!(f, "?"),
+ Caret => write!(f, "^"),
+ Backslash => write!(f, "\\"),
+ }
+ }
+}
diff --git a/src/frontend/mod.rs b/src/frontend/mod.rs
new file mode 100644
index 0000000..d437c73
--- /dev/null
+++ b/src/frontend/mod.rs
@@ -0,0 +1,3 @@
+pub mod ast;
+pub mod lex;
+pub mod parse;
diff --git a/src/frontend/parse.rs b/src/frontend/parse.rs
new file mode 100644
index 0000000..c525982
--- /dev/null
+++ b/src/frontend/parse.rs
@@ -0,0 +1,297 @@
+use std::fmt;
+
+use crate::frontend::lex::*;
+use crate::frontend::ast::*;
+use crate::frontend::ast::Binding::*;
+use crate::frontend::ast::Control::*;
+use crate::frontend::ast::Pattern::*;
+use Token::*;
+use Literal::*;
+use Punctuation::*;
+
+type Input = std::iter::Peekable<std::vec::IntoIter<Token>>;
+
+#[derive(Clone, Copy)]
+struct State {
+ depth: usize,
+ step: usize
+}
+
+impl State {
+ fn indent(&self) -> State {
+ State { depth: self.depth + self.step, step: self.step }
+ }
+ fn dedent(&self) -> State {
+ State { depth: self.depth - self.step, step: self.step }
+ }
+}
+
+/// Convert a basic TokenStream into an AbstractSyntaxTree
+pub fn astify(input: TokenStream, name: &str) -> Result<Expr> {
+ let mut input = input.into_iter().peekable();
+ let body = parse_body(&mut input, State { depth: 0, step: 0 })?;
+ Ok(Expr::Binding(Module{ id: name.to_string(), body }))
+}
+
+/// Parse a series of Exprs, for ex. the body of a function.
+fn parse_body(input: &mut Input, state: State) -> Result<Vec<Expr>> {
+ let mut res = Vec::new();
+ while input.peek() == Some(&Indent(state.depth)) {
+ input.next();
+ res.push(parse_expr(input, state)?);
+ }
+ Ok(res)
+}
+
+/// Expr ::= Let | Var | Const | Func | Type |
+/// Mod | Import | Block | Static |
+/// For | While | Loop | If | When | Try | Match
+fn parse_expr(input: &mut Input, state: State) -> Result<Expr> {
+ use Keyword::*;
+ match input.next() {
+ Some(Key(word)) => match word {
+ Pub => {
+ match input.next() {
+ Some(Key(word)) => match word {
+ Const => parse_const(input, state, true),
+ Func => parse_funcdecl(input, state, true),
+ Type => parse_typedecl(input, state, true),
+ Mod => parse_mod(input, state, true),
+ _ => return Err("unrecognized keyword following pub".into()),
+ }
+ Some(_) => return Err("unrecognized thing following pub".into()),
+ None => return Err("end of input".into()),
+ }
+ },
+ Let => parse_let(input, state),
+ Var => parse_var(input, state),
+ Const => parse_const(input, state, false),
+ Func => parse_funcdecl(input, state, false),
+ Type => parse_typedecl(input, state, false),
+ Mod => parse_mod(input, state, false),
+ From => parse_import(input, state, true),
+ Import => parse_import(input, state, false),
+ Block => parse_block(input, state),
+ Static => parse_static(input, state),
+ For => parse_for(input, state),
+ While => parse_while(input, state),
+ Loop => parse_loop(input, state),
+ If => parse_if(input, state),
+ When => parse_when(input, state),
+ Try => parse_try(input, state),
+ Match => parse_match(input, state),
+ _ => return Err("invalid keyword starting expression".into()),
+ },
+ _ => todo!(), // what can i do with this?? match line here
+ }
+}
+
+/// Let ::= 'let' Pattern Annotation? '=' Expr
+fn parse_let(input: &mut Input, state: State) -> Result<Expr> {
+ let id = parse_pattern(input, state)?;
+ let mut kind = None;
+ if let Some(Sep(Colon)) = input.peek() {
+ input.next();
+ kind = Some(parse_typedesc(input, state)?);
+ }
+ if input.next() != Some(Sep(Equals)) {
+ return Err("= not following binding".into())
+ }
+ let value = Box::new(parse_expr(input, state)?);
+ Ok(Expr::Binding(Let { id, kind, value }))
+}
+/// Var ::= 'var' Pattern Annotation? ('=' Expr)?
+fn parse_var(input: &mut Input, state: State) -> Result<Expr> {
+ let id = parse_pattern(input, state)?;
+ let mut kind = None;
+ if let Some(Sep(Colon)) = input.peek() {
+ input.next();
+ kind = Some(parse_typedesc(input, state)?);
+ }
+ let mut value = None;
+ if input.next() != Some(Sep(Equals)) {
+ value = Some(Box::new(parse_expr(input, state)?));
+ }
+ Ok(Expr::Binding(Var { id, kind, value }))
+}
+// Const ::= 'pub'? 'const' Pattern Annotation? '=' Expr
+fn parse_const(input: &mut Input, state: State, public: bool) -> Result<Expr> {
+ let id = parse_pattern(input, state)?;
+ let mut kind = None;
+ if let Some(Sep(Colon)) = input.peek() {
+ input.next();
+ kind = Some(parse_typedesc(input, state)?);
+ }
+ if input.next() != Some(Sep(Equals)) {
+ return Err("= not following binding".into())
+ }
+ let value = Box::new(parse_expr(input, state)?);
+ Ok(Expr::Binding(Const { public, id, kind, value }))
+}
+// Func ::= 'pub'? ('func' | 'proc') Ident Generics? Parameters? (':' TypeDesc) '=' Body
+fn parse_funcdecl(input: &mut Input, state: State, public: bool) -> Result<Expr> { todo!() }
+// TypeDecl ::= 'pub'? 'type' Pattern Generics? '=' 'distinct'? 'ref'? TypeDesc
+fn parse_typedecl(input: &mut Input, state: State, public: bool) -> Result<Expr> {
+ let pattern = parse_pattern(input, state)?;
+ todo!()
+}
+// Mod ::= 'pub'? 'mod' Ident ':' Body
+fn parse_mod(input: &mut Input, state: State, public: bool) -> Result<Expr> {
+ match input.next() {
+ Some(Word(id)) => {
+ match input.next() {
+ Some(Sep(Colon)) => {
+ let body = parse_body(input, state.indent())?;
+ Ok(Expr::Binding(Module { id, body }))
+ },
+ _ => return Err("unexpected token following mod label".into()),
+ }
+ },
+ _ => return Err("unexpected thing following mod keyword".into()),
+ }
+}
+
+// Import ::= ('from' Ident)? 'import' Ident (',' Ident)* ('as' Ident)?
+fn parse_import(input: &mut Input, state: State, from_scope: bool) -> Result<Expr> {
+ let mut from = None;
+ if from_scope {
+ match input.next() {
+ Some(Word(id)) => from = Some(id),
+ _ => return Err("identifier not following from keyword".into())
+ }
+ if input.next() != Some(Key(Keyword::Import)) {
+ return Err("expected import to follow from".into())
+ }
+ }
+ todo!()
+}
+// Block ::= 'block' Ident? ':' Body
+fn parse_block(input: &mut Input, state: State) -> Result<Expr> { // todo: body + offset
+ match input.next() {
+ Some(Sep(Colon)) => {
+ let id = None;
+ let body = parse_body(input, state.indent())?;
+ Ok(Expr::Control(Block { id, body }))
+ },
+ Some(Word(label)) => {
+ match input.next() {
+ Some(Sep(Colon)) => {
+ let id = Some(label);
+ let body = parse_body(input, state.indent())?;
+ Ok(Expr::Control(Block { id, body }))
+ },
+ _ => return Err("unexpected token following block label".into()),
+ }
+ },
+ _ => return Err("unexpected thing following block keyword".into()),
+ }
+}
+// Static ::= 'static' ':' Body
+fn parse_static(input: &mut Input, state: State) -> Result<Expr> {
+ if input.next() != Some(Sep(Colon)) {
+ return Err("colon must follow static invocation".into());
+ }
+ let body = parse_body(input, state.indent())?;
+ Ok(Expr::Control(Static { body }))
+}
+
+// For ::= 'for' Pattern 'in' Expr ':' Body
+fn parse_for(input: &mut Input, state: State) -> Result<Expr> {
+ let binding = parse_pattern(input, state)?;
+ if input.next() != Some(Key(Keyword::In)) {
+ return Err("expected in keyword after for pattern".into());
+ }
+ let range = Box::new(parse_expr(input, state)?);
+ if input.next() != Some(Sep(Colon)) {
+ return Err("expected colon after in expression".into());
+ }
+ let body = parse_body(input, state.indent())?;
+ Ok(Expr::Control(For { binding, range, body }))
+}
+// While ::= 'while' Expr ':' Body
+fn parse_while(input: &mut Input, state: State) -> Result<Expr> {
+ let cond = Box::new(parse_expr(input, state)?);
+ if input.next() != Some(Sep(Colon)) {
+ return Err("expected colon after while keyword".into());
+ }
+ let body = parse_body(input, state.indent())?;
+ Ok(Expr::Control(While { cond, body }))
+}
+// Loop ::= 'loop' ':' Body
+fn parse_loop(input: &mut Input, state: State) -> Result<Expr> {
+ if input.next() != Some(Sep(Colon)) {
+ return Err("expected colon after loop keyword".into());
+ }
+ let body = parse_body(input, state.indent())?;
+ Ok(Expr::Control(Loop { body }))
+}
+
+// If ::= 'if' Expr ':' Body ('elif' Expr ':' Body)* ('else' ':' Body)?
+fn parse_if(input: &mut Input, state: State) -> Result<Expr> {
+ let mut branches = Vec::new();
+ branches.push(parse_cond_branch(input, state)?);
+ while input.peek() == Some(&Key(Keyword::Elif)) {
+ input.next();
+ branches.push(parse_cond_branch(input, state)?);
+ }
+ let mut else_body = None;
+ if input.peek() == Some(&Key(Keyword::Else)) {
+ input.next();
+ else_body = Some(parse_body(input, state.indent())?);
+ }
+ Ok(Expr::Control(If { branches, else_body }))
+}
+// When ::= 'when' Expr ':' Body ('elif' Expr ':' Body)* ('else' ':' Body)?
+fn parse_when(input: &mut Input, state: State) -> Result<Expr> {
+ let mut branches = Vec::new();
+ branches.push(parse_cond_branch(input, state)?);
+ while input.peek() == Some(&Key(Keyword::Elif)) {
+ input.next();
+ branches.push(parse_cond_branch(input, state)?);
+ }
+ let mut else_body = None;
+ if input.peek() == Some(&Key(Keyword::Else)) {
+ input.next();
+ else_body = Some(parse_body(input, state.indent())?);
+ }
+ let mut body = Vec::new();
+ body.push(Expr::Control(If { branches, else_body }));
+ Ok(Expr::Control(Static { body }))
+}
+// Try ::= 'try' ':' Body ('except' Ident (',' Ident)* ':' Body) ('finally' ':' Body)?
+fn parse_try(input: &mut Input, state: State) -> Result<Expr> {
+ if input.next() != Some(Sep(Colon)) {
+ return Err("expected colon after try keyword".into());
+ }
+ let body = parse_body(input, state.indent())?;
+ let catches = Vec::new();
+ while input.peek() == Some(&Key(Keyword::Catch)) {
+ input.next();
+ todo!();
+ }
+ let mut finally = None;
+ if input.peek() == Some(&Key(Keyword::Finally)) {
+ input.next();
+ if input.next() != Some(Sep(Colon)) {
+ return Err("expected colon after try keyword".into());
+ }
+ finally = Some(parse_body(input, state.indent())?);
+ }
+ Ok(Expr::Control(Try { body, catches, finally }))
+}
+// Match ::= 'match' Expr ('of' Pattern (',' Pattern)* ('where' Expr)? ':' Body)+
+fn parse_match(input: &mut Input, state: State) -> Result<Expr> {
+ let item = parse_pattern(input, state)?;
+ let mut branches = Vec::new();
+ while input.peek() == Some(&Key(Keyword::Of)) {
+ input.next();
+ todo!();
+ }
+ Ok(Expr::Control(Match { item, branches }))
+}
+
+fn parse_typedesc(input: &mut Input, state: State) -> Result<Type> { todo!() }
+fn parse_pattern(input: &mut Input, state: State) -> Result<Pattern> { todo!() }
+fn parse_cond_branch(input: &mut Input, state: State) -> Result<CondBranch> { todo!() }
+
+// lex, parse, expand, compile?