From 1c14500ed698f1dc21b4b634a174af89b6318b07 Mon Sep 17 00:00:00 2001 From: JJ Date: Tue, 31 Oct 2023 02:49:41 -0700 Subject: compiler: restructure codebase --- src/ast.rs | 116 ----------- src/frontend/ast.rs | 116 +++++++++++ src/frontend/lex.rs | 542 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/frontend/mod.rs | 3 + src/frontend/parse.rs | 297 +++++++++++++++++++++++++++ src/lex.rs | 542 -------------------------------------------------- src/main.rs | 5 +- src/parse.rs | 297 --------------------------- 8 files changed, 959 insertions(+), 959 deletions(-) delete mode 100644 src/ast.rs create mode 100644 src/frontend/ast.rs create mode 100644 src/frontend/lex.rs create mode 100644 src/frontend/mod.rs create mode 100644 src/frontend/parse.rs delete mode 100644 src/lex.rs delete mode 100644 src/parse.rs (limited to 'src') diff --git a/src/ast.rs b/src/ast.rs deleted file mode 100644 index 6c7963e..0000000 --- a/src/ast.rs +++ /dev/null @@ -1,116 +0,0 @@ -/// Representation of Tokens, function names, the like... -pub type Id = String; - -/// Puck's fundamental types. -pub enum Type { - Void, Never, - Integer, Float, String, // char et al are defined later - Func{from: Box, to: Box}, // todo: multiple params, effects - Struct(Vec<(Id, Box)>), - Tuple(Vec<(Option, Box)>), - Union(Vec<(Id, Box)>), - Interface { - funcs: Vec, - for_type: Option>, - }, - Array{size: usize, kind: Box}, - List(Box), - Slice(Box), // todo: plus ownership - Reference(Box), - Mutable(Box), // parameters only - Static(Box), // parameters only - Alias{ id: Id, params: Vec }, // todo: this is wrong -} - -/// Function signatures. -pub struct Sig { - effect: Option, - id: Id, - generics: Vec<(Id, Option)>, - params: Vec, - result: Option -} - -/// Patterns are recognizable given zero context. -/// This is why there is a generic Number term and no Bool term. -/// Also can be considered to be a Term/Value. -pub enum Pattern { - Ident(Id), // type aliases, union variants, calls... - Number(i64), Float(f64), - Char(char), String(String), - Struct(Vec), - Tuple(Vec), - List(Vec), // arrays, slices, lists -} - -pub struct StructPattern { field: Id, value: Expr } -pub struct TuplePattern { field: Option, value: Expr } - -/// Expressions introduce a new binding or bindings, in some regard. -pub enum Binding { - Let { - id: Pattern, // id: Pattern supports ex. `let (a, b) = ...` - kind: Option, - value: Box - }, - Var { - id: Pattern, - kind: Option, - value: Option> // variable bindings can be delayed - }, - Const { - public: bool, - id: Pattern, - kind: Option, - value: Box - }, - FuncDecl { - public: bool, - effect: Option, - id: Id, - generics: Vec, - params: Vec, - kind: Type, - body: Vec - }, - TypeDecl { id: Id, generics: Vec, alias: Type }, - Import { from: Option, imports: Vec, alias: Option }, - Module { id: Id, body: Vec }, -} - -pub struct GenericDecl { id: Id, kind: Option } -pub struct ParamDecl { id: Id, kind: Type } - -/// Expressions related to control flow. -pub enum Control { - Call { id: Id, params: Vec }, // function calls, macro invocations, field access... - If { - branches: Vec, - else_body: Option> - }, - Try { - body: Vec, - catches: Vec, - finally: Option> - }, - Match { - item: Pattern, - branches: Vec - }, - Block { id: Option, body: Vec }, - Static { body: Vec }, - For { binding: Pattern, range: Box, body: Vec }, - While { cond: Box, body: Vec }, - Loop { body: Vec }, -} - -pub struct CondBranch { cond: Expr, body: Vec } -pub struct CatchBranch { exceptions: Vec, binding: Option, body: Vec } -pub struct MatchBranch { pattern: Pattern, guard: Option, body: Vec } - -/// Expressions are either Patterns, Bindings, or Control flow constructs. -pub enum Expr { - Pattern(Pattern), - Binding(Binding), - Control(Control), -} diff --git a/src/frontend/ast.rs b/src/frontend/ast.rs new file mode 100644 index 0000000..6c7963e --- /dev/null +++ b/src/frontend/ast.rs @@ -0,0 +1,116 @@ +/// Representation of Tokens, function names, the like... +pub type Id = String; + +/// Puck's fundamental types. +pub enum Type { + Void, Never, + Integer, Float, String, // char et al are defined later + Func{from: Box, to: Box}, // todo: multiple params, effects + Struct(Vec<(Id, Box)>), + Tuple(Vec<(Option, Box)>), + Union(Vec<(Id, Box)>), + Interface { + funcs: Vec, + for_type: Option>, + }, + Array{size: usize, kind: Box}, + List(Box), + Slice(Box), // todo: plus ownership + Reference(Box), + Mutable(Box), // parameters only + Static(Box), // parameters only + Alias{ id: Id, params: Vec }, // todo: this is wrong +} + +/// Function signatures. +pub struct Sig { + effect: Option, + id: Id, + generics: Vec<(Id, Option)>, + params: Vec, + result: Option +} + +/// Patterns are recognizable given zero context. +/// This is why there is a generic Number term and no Bool term. +/// Also can be considered to be a Term/Value. +pub enum Pattern { + Ident(Id), // type aliases, union variants, calls... + Number(i64), Float(f64), + Char(char), String(String), + Struct(Vec), + Tuple(Vec), + List(Vec), // arrays, slices, lists +} + +pub struct StructPattern { field: Id, value: Expr } +pub struct TuplePattern { field: Option, value: Expr } + +/// Expressions introduce a new binding or bindings, in some regard. +pub enum Binding { + Let { + id: Pattern, // id: Pattern supports ex. `let (a, b) = ...` + kind: Option, + value: Box + }, + Var { + id: Pattern, + kind: Option, + value: Option> // variable bindings can be delayed + }, + Const { + public: bool, + id: Pattern, + kind: Option, + value: Box + }, + FuncDecl { + public: bool, + effect: Option, + id: Id, + generics: Vec, + params: Vec, + kind: Type, + body: Vec + }, + TypeDecl { id: Id, generics: Vec, alias: Type }, + Import { from: Option, imports: Vec, alias: Option }, + Module { id: Id, body: Vec }, +} + +pub struct GenericDecl { id: Id, kind: Option } +pub struct ParamDecl { id: Id, kind: Type } + +/// Expressions related to control flow. +pub enum Control { + Call { id: Id, params: Vec }, // function calls, macro invocations, field access... + If { + branches: Vec, + else_body: Option> + }, + Try { + body: Vec, + catches: Vec, + finally: Option> + }, + Match { + item: Pattern, + branches: Vec + }, + Block { id: Option, body: Vec }, + Static { body: Vec }, + For { binding: Pattern, range: Box, body: Vec }, + While { cond: Box, body: Vec }, + Loop { body: Vec }, +} + +pub struct CondBranch { cond: Expr, body: Vec } +pub struct CatchBranch { exceptions: Vec, binding: Option, body: Vec } +pub struct MatchBranch { pattern: Pattern, guard: Option, body: Vec } + +/// Expressions are either Patterns, Bindings, or Control flow constructs. +pub enum Expr { + Pattern(Pattern), + Binding(Binding), + Control(Control), +} diff --git a/src/frontend/lex.rs b/src/frontend/lex.rs new file mode 100644 index 0000000..771ba38 --- /dev/null +++ b/src/frontend/lex.rs @@ -0,0 +1,542 @@ +use multipeek::multipeek; + +pub type Result = core::result::Result>; +pub struct TokenStream(Vec); + +impl IntoIterator for TokenStream { + type Item = Token; + type IntoIter = std::vec::IntoIter; + + fn into_iter(self) -> Self::IntoIter { + self.0.into_iter() + } +} + +#[derive(Clone, PartialEq, Debug)] +pub enum LexicalError { + InvalidIndentation, + MismatchedParens, + MismatchedBrackets, + UnknownPunctuation, +} + +impl std::fmt::Display for LexicalError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{:?}", self) + } +} +impl std::error::Error for LexicalError {} + +/// **Basic** syntax tokens. Form an unambiguous TokenStream. +#[derive(Clone, PartialEq)] +pub enum Token { + Key(Keyword), // keyword identifiers. + Word(String), // non-keyword identifiers. + Num(String), // numeric value, ex. 413, 0b101011, 0xabcd + Lit(Literal), // literal value, ex. for strings/comments. + Sep(Punctuation), // punctuation. non-word tokens. operators are lexed as this and later transformed to words. + Indent(usize), // indentation. denotes line breaks and scope at which a line starts. +} + +#[derive(Clone, PartialEq)] +pub enum Literal { + Char(String), + SingleLineString(String), + MultiLineString(String), + Comment(String), + DocComment(String), + MultiLineComment(String), +} + +/// Keywords, made explicit for easier use with Rust. +/// (strings inside match patterns are fucky!!) +#[derive(Clone, PartialEq)] +pub enum Keyword { + Pub, Let, Var, Const, + Func, Macro, Type, + Mod, From, Import, + For, While, Loop, + Block, Static, + If, When, Elif, Else, Match, + Try, Catch, Finally, + Struct, Tuple, Enum, Union, Interface, + Distinct, Ref, // todo: Mut once figured out + Break, Continue, Return, + In, Is, Of, As, +} + +/// All punctuation recognized by the lexer. +/// Note the distinction between FuncLeftParen and TupleLeftParen. +#[derive(Clone, PartialEq)] +pub enum Punctuation { + Comma, // , + Period, // . + Semicolon, // ; + Colon, // : + BackTick, // ` + SingleQuote, // ' + DoubleQuote, // " + FuncLeftParen, // ( + FuncRightParen, // ) + TupleLeftParen, // ( + TupleRightParen, // ) + GenericLeftBracket, // [ + GenericRightBracket, // ] + ArrayLeftBracket, // [ + ArrayRightBracket, // ] + StructLeftBrace, // } + StructRightBrace, // } + Equals, // = + Plus, // + + Minus, // distinction between minus and negative. + Negative, // negative binds tightly: there is no whitespace following. + Times, // * + Slash, // / + LessThan, // < + GreaterThan, // > + At, // @ + Sha, // $ + Tilde, // ~ + And, // & + Percent, // % + Or, // | + Exclamation, // ! + Question, // ? + Caret, // ^ + Backslash, // \ +} + +/// Parses whitespace-sensitive code into an unambiguous TokenStream. +/// Also useful for formatting. +pub fn tokenize(input: &str) -> Result { + // The design of this lexer utilizes to great extent multipeek's arbitrary peeking. + // Tokens are matched by looping within their case until complete. + // This then eliminates the need for most global parser state. (i hate state) + + use Token::*; + use Literal::*; + use Punctuation::*; + use LexicalError::*; + enum Paren { Func, Tuple } + enum Bracket { Generic, Array } + struct State { + start_of_line: bool, + paren_stack: Vec, + bracket_stack: Vec, + } + + let mut state = State { + start_of_line: true, + paren_stack: vec!(), + bracket_stack: vec!(), + }; + + let mut buf = String::new(); + let mut res = Vec::new(); + + // `char` in rust is four bytes it's fine + let mut input = multipeek(input.chars()); + while let Some(c) = input.next() { + match c { + ' ' => { // indentation! and whitespace + match res.last() { + Some(Indent(_)) => { // indentation! + res.pop(); // discard previous empty or useless Indent token + let mut current_indent_level = 1; + while let Some(x) = input.peek() { + match x { + ' ' => { current_indent_level += 1; input.next(); }, + _ => match res.last() { // indentation ends + Some(Word(a)) if a == "==" || a == "and" || a == "or" || + a == "xor" || a == "in" || a == "is" => break, + Some(Sep(FuncLeftParen)) | Some(Sep(TupleLeftParen)) | + Some(Sep(GenericLeftBracket)) | Some(Sep(ArrayLeftBracket)) | + Some(Sep(StructLeftBrace)) | Some(Sep(Comma)) => break, + _ => { + res.push(Indent(current_indent_level)); + break; + } + } + } + } + }, + _ => { // get rid of excess (all) whitespace between words/operators + while input.peek().is_some_and(|x| x.is_whitespace() && x != &'\n') { input.next(); } + } + } + }, + '\t' => return Err(InvalidIndentation.into()), + '\n' => res.push(Indent(0)), + '\'' => { // chars! + while let Some(x) = input.next() { + match x { + '\'' => break, + '\\' => if let Some(y) = input.next() { buf.push(y) }, + _ => buf.push(x) + } + } + res.push(Lit(Char(String::from(&buf)))); + }, + '"' => { // strings! + match (input.peek_nth(0).copied(), input.peek_nth(1).copied()) { + (Some('"'), Some('"')) => { // triple quoted strings + input.next(); input.next(); + while let Some(x) = input.next() { + match x { + '"' if input.peek_nth(0) == Some(&'"') && + input.peek_nth(1) == Some(&'"') => { + input.next(); input.next(); + break; + }, + _ => buf.push(x) + } + } + res.push(Lit(MultiLineString(String::from(&buf)))); + }, + (_, _) => { // single quoted strings + while let Some(x) = input.next() { + match x { + '"' => break, + '\\' => if let Some(y) = input.next() { buf.push(y) }, + _ => buf.push(x) + } + } + res.push(Lit(SingleLineString(String::from(&buf)))); + } + } + }, + '#' => { // comments! + match input.peek() { + Some('[') => { // block comment, can be nested + input.next(); + let mut comment_level = 1; + while let Some(x) = input.next() && comment_level > 0 { + match x { + '#' if input.peek() == Some(&'[') => { + comment_level += 1; + input.next(); + }, + ']' if input.peek() == Some(&'#') => { + comment_level -= 1; + input.next(); + }, + _ => buf.push(x) + } + } + res.push(Lit(MultiLineComment(String::from(&buf)))); + }, + Some(&'#') => { // documentation comment + input.next(); + while let Some(x) = input.peek() { + match x { + '\n' => break, + _ => { + buf.push(*x); + } + } + input.next(); + } + res.push(Lit(DocComment(String::from(&buf)))); + }, + _ => { // standard comment, runs til EOL + while let Some(x) = input.peek() { + match x { + '\n' => break, + _ => { + buf.push(*x); + } + } + input.next(); + } + res.push(Lit(Comment(String::from(&buf)))); + } + } + }, + c if c.is_alphabetic() || c == '_' => { // valid identifiers! + buf.push(c); + while let Some(x) = input.peek() { + match x { + x if x.is_alphanumeric() || x == &'_' => { + buf.push(*x); + input.next(); + }, + _ => { + use Keyword::*; + match buf.as_str() { // keywords! + "pub" => res.push(Key(Pub)), + "let" => res.push(Key(Let)), + "var" => res.push(Key(Var)), + "const" => res.push(Key(Const)), + "func" => res.push(Key(Func)), + "macro" => res.push(Key(Macro)), + "type" => res.push(Key(Type)), + "mod" => res.push(Key(Mod)), + "from" => res.push(Key(From)), + "import" => res.push(Key(Import)), + "for" => res.push(Key(For)), + "while" => res.push(Key(While)), + "loop" => res.push(Key(Loop)), + "block" => res.push(Key(Block)), + "static" => res.push(Key(Static)), + "if" => res.push(Key(If)), + "when" => res.push(Key(When)), + "elif" => res.push(Key(Elif)), + "else" => res.push(Key(Else)), + "match" => res.push(Key(Match)), + "try" => res.push(Key(Try)), + "catch" => res.push(Key(Catch)), + "finally" => res.push(Key(Finally)), + "struct" => res.push(Key(Struct)), + "tuple" => res.push(Key(Tuple)), + "enum" => res.push(Key(Enum)), + "union" => res.push(Key(Union)), + "interface" => res.push(Key(Interface)), + "distinct" => res.push(Key(Distinct)), + "ref" => res.push(Key(Ref)), + "break" => res.push(Key(Break)), + "continue" => res.push(Key(Continue)), + "return" => res.push(Key(Return)), + "in" => res.push(Key(In)), + "is" => res.push(Key(Is)), + "of" => res.push(Key(Of)), + "as" => res.push(Key(As)), + _ => res.push(Word(String::from(&buf))) + } + match x { // () and [] denote both parameters/generics and tuples/arrays + '(' => { // we must disambiguate by treating those *directly* after words as such + res.push(Sep(FuncLeftParen)); + state.paren_stack.push(Paren::Func); + input.next(); + }, + '[' => { + res.push(Sep(GenericLeftBracket)); + state.bracket_stack.push(Bracket::Generic); + input.next(); + }, + _ => {}, + } + break; + } + } + } + }, + '0'..='9' => { // numeric literals! + buf.push(c); + while let Some(x) = input.peek() { + match x { + 'a'..='z' | 'A'..='Z' | '0'..='9' | '_' => { + buf.push(*x); + input.next(); + }, + _ => break + } + } + res.push(Num(String::from(&buf))) + }, + '-' => { // `-` is special. it can be the *prefix* operator "Negative", or part of a regular operator. + match input.peek() { + Some(' ') => res.push(Sep(Minus)), + _ => res.push(Sep(Negative)) + } + }, + '(' => { // note: FuncParens were matched above, directly after identifiers + res.push(Sep(TupleLeftParen)); + state.paren_stack.push(Paren::Tuple); + }, + '[' => { // note: GenericBrackets were matched above, directly after identifiers + res.push(Sep(ArrayLeftBracket)); + state.bracket_stack.push(Bracket::Array); + }, + ')' => { + match state.paren_stack.pop() { + Some(Paren::Func) => res.push(Sep(FuncRightParen)), + Some(Paren::Tuple) => res.push(Sep(TupleRightParen)), + None => return Err(MismatchedParens.into()), + } + }, + ']' => { + match state.bracket_stack.pop() { + Some(Bracket::Generic) => res.push(Sep(GenericRightBracket)), + Some(Bracket::Array) => res.push(Sep(ArrayRightBracket)), + None => return Err(MismatchedBrackets.into()), + } + if input.peek() == Some(&'(') { // parameters following generics + res.push(Sep(FuncLeftParen)); + state.paren_stack.push(Paren::Func); + input.next(); + } + }, + '`' => { + res.push(Sep(BackTick)); + match input.peek() { + Some('(') => { + res.push(Sep(FuncLeftParen)); + state.paren_stack.push(Paren::Func); + input.next(); + }, + Some('[') => { + res.push(Sep(GenericLeftBracket)); + state.bracket_stack.push(Bracket::Generic); + input.next(); + }, + _ => {} + } + }, + ',' => res.push(Sep(Comma)), + '.' => res.push(Sep(Period)), + ';' => res.push(Sep(Semicolon)), + ':' => res.push(Sep(Colon)), + '{' => res.push(Sep(StructLeftBrace)), + '}' => res.push(Sep(StructRightBrace)), + '=' => res.push(Sep(Equals)), + '+' => res.push(Sep(Plus)), + '*' => res.push(Sep(Times)), + '/' => res.push(Sep(Slash)), + '<' => res.push(Sep(LessThan)), + '>' => res.push(Sep(GreaterThan)), + '@' => res.push(Sep(At)), + '$' => res.push(Sep(Sha)), + '~' => res.push(Sep(Tilde)), + '&' => res.push(Sep(And)), + '|' => res.push(Sep(Or)), + '!' => res.push(Sep(Exclamation)), + '?' => res.push(Sep(Question)), + '^' => res.push(Sep(Caret)), + '\\' => res.push(Sep(Backslash)), + _ => return Err(UnknownPunctuation.into()) + } + buf.clear(); + } + Ok(TokenStream(res)) +} + +impl std::fmt::Display for TokenStream { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + use Token::*; + let mut prev_token = Indent(0); + for token in &self.0 { + match (&prev_token, &token) { + (Word(_), Word(_)) | (Word(_), Num(_)) | + (Num(_), Word(_)) | (Num(_), Num(_)) => write!(f, " {}", token)?, + _ => write!(f, "{}", token)?, + } + prev_token = token.clone(); + } + Ok(()) + } +} + +impl std::fmt::Display for Token { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + use Token::*; + match self { + Key(word) => write!(f, "{}", word), + Word(val) => write!(f, "{}", val), + Num(val) => write!(f, "{}", val), + Lit(lit) => write!(f, "{}", lit), + Sep(sep) => write!(f, "{}", sep), + Indent(i) => write!(f, "\n{}", " ".repeat(*i)), + } + } +} + +impl std::fmt::Display for Literal { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + use Literal::*; + match self { + Char(val) => write!(f, "'{}'", val), + SingleLineString(val) => write!(f, "\"{}\"", val), + MultiLineString(val) => write!(f, "\"\"\"{}\"\"\"", val), + Comment(val) => write!(f, "#{}", val), + DocComment(val) => write!(f, "##{}", val), + MultiLineComment(val) => write!(f, "#[{}]#", val), + } + } +} + +impl std::fmt::Display for Keyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + use Keyword::*; + match self { + Pub => write!(f, "pub"), + Let => write!(f, "let"), + Var => write!(f, "var"), + Const => write!(f, "const"), + Func => write!(f, "func"), + Macro => write!(f, "macro"), + Type => write!(f, "type"), + Mod => write!(f, "mod"), + From => write!(f, "from"), + Import => write!(f, "import"), + For => write!(f, "for"), + While => write!(f, "while"), + Loop => write!(f, "loop"), + Block => write!(f, "block"), + Static => write!(f, "static"), + If => write!(f, "if"), + When => write!(f, "when"), + Elif => write!(f, "elif"), + Else => write!(f, "else"), + Match => write!(f, "match"), + Try => write!(f, "try"), + Catch => write!(f, "catch"), + Finally => write!(f, "finally"), + Struct => write!(f, "struct"), + Tuple => write!(f, "tuple"), + Enum => write!(f, "enum"), + Union => write!(f, "union"), + Interface => write!(f, "interface"), + Distinct => write!(f, "distinct"), + Ref => write!(f, "ref"), + Break => write!(f, "break"), + Continue => write!(f, "continue"), + Return => write!(f, "return"), + In => write!(f, "in"), + Is => write!(f, "is"), + Of => write!(f, "of"), + As => write!(f, "as"), + } + } +} +impl std::fmt::Display for Punctuation { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + use Punctuation::*; + match self { + Comma => write!(f, ","), + Period => write!(f, "."), + Semicolon => write!(f, ";"), + Colon => write!(f, ":"), + BackTick => write!(f, "`"), + SingleQuote => write!(f, "'"), + DoubleQuote => write!(f, "\""), + FuncLeftParen => write!(f, "("), + FuncRightParen => write!(f, ")"), + TupleLeftParen => write!(f, " ("), + TupleRightParen => write!(f, ")"), + GenericLeftBracket => write!(f, "["), + GenericRightBracket => write!(f, "]"), + ArrayLeftBracket => write!(f, " ["), + ArrayRightBracket => write!(f, "]"), + StructLeftBrace => write!(f, "{{"), + StructRightBrace => write!(f, "}}"), + Equals => write!(f, "="), + Plus => write!(f, "+"), + Minus => write!(f, "- "), + Negative => write!(f, "-"), + Times => write!(f, "*"), + Slash => write!(f, "/"), + LessThan => write!(f, "<"), + GreaterThan => write!(f, ">"), + At => write!(f, "@"), + Sha => write!(f, "$"), + Tilde => write!(f, "~"), + And => write!(f, "&"), + Percent => write!(f, "%"), + Or => write!(f, "|"), + Exclamation => write!(f, "!"), + Question => write!(f, "?"), + Caret => write!(f, "^"), + Backslash => write!(f, "\\"), + } + } +} diff --git a/src/frontend/mod.rs b/src/frontend/mod.rs new file mode 100644 index 0000000..d437c73 --- /dev/null +++ b/src/frontend/mod.rs @@ -0,0 +1,3 @@ +pub mod ast; +pub mod lex; +pub mod parse; diff --git a/src/frontend/parse.rs b/src/frontend/parse.rs new file mode 100644 index 0000000..c525982 --- /dev/null +++ b/src/frontend/parse.rs @@ -0,0 +1,297 @@ +use std::fmt; + +use crate::frontend::lex::*; +use crate::frontend::ast::*; +use crate::frontend::ast::Binding::*; +use crate::frontend::ast::Control::*; +use crate::frontend::ast::Pattern::*; +use Token::*; +use Literal::*; +use Punctuation::*; + +type Input = std::iter::Peekable>; + +#[derive(Clone, Copy)] +struct State { + depth: usize, + step: usize +} + +impl State { + fn indent(&self) -> State { + State { depth: self.depth + self.step, step: self.step } + } + fn dedent(&self) -> State { + State { depth: self.depth - self.step, step: self.step } + } +} + +/// Convert a basic TokenStream into an AbstractSyntaxTree +pub fn astify(input: TokenStream, name: &str) -> Result { + let mut input = input.into_iter().peekable(); + let body = parse_body(&mut input, State { depth: 0, step: 0 })?; + Ok(Expr::Binding(Module{ id: name.to_string(), body })) +} + +/// Parse a series of Exprs, for ex. the body of a function. +fn parse_body(input: &mut Input, state: State) -> Result> { + let mut res = Vec::new(); + while input.peek() == Some(&Indent(state.depth)) { + input.next(); + res.push(parse_expr(input, state)?); + } + Ok(res) +} + +/// Expr ::= Let | Var | Const | Func | Type | +/// Mod | Import | Block | Static | +/// For | While | Loop | If | When | Try | Match +fn parse_expr(input: &mut Input, state: State) -> Result { + use Keyword::*; + match input.next() { + Some(Key(word)) => match word { + Pub => { + match input.next() { + Some(Key(word)) => match word { + Const => parse_const(input, state, true), + Func => parse_funcdecl(input, state, true), + Type => parse_typedecl(input, state, true), + Mod => parse_mod(input, state, true), + _ => return Err("unrecognized keyword following pub".into()), + } + Some(_) => return Err("unrecognized thing following pub".into()), + None => return Err("end of input".into()), + } + }, + Let => parse_let(input, state), + Var => parse_var(input, state), + Const => parse_const(input, state, false), + Func => parse_funcdecl(input, state, false), + Type => parse_typedecl(input, state, false), + Mod => parse_mod(input, state, false), + From => parse_import(input, state, true), + Import => parse_import(input, state, false), + Block => parse_block(input, state), + Static => parse_static(input, state), + For => parse_for(input, state), + While => parse_while(input, state), + Loop => parse_loop(input, state), + If => parse_if(input, state), + When => parse_when(input, state), + Try => parse_try(input, state), + Match => parse_match(input, state), + _ => return Err("invalid keyword starting expression".into()), + }, + _ => todo!(), // what can i do with this?? match line here + } +} + +/// Let ::= 'let' Pattern Annotation? '=' Expr +fn parse_let(input: &mut Input, state: State) -> Result { + let id = parse_pattern(input, state)?; + let mut kind = None; + if let Some(Sep(Colon)) = input.peek() { + input.next(); + kind = Some(parse_typedesc(input, state)?); + } + if input.next() != Some(Sep(Equals)) { + return Err("= not following binding".into()) + } + let value = Box::new(parse_expr(input, state)?); + Ok(Expr::Binding(Let { id, kind, value })) +} +/// Var ::= 'var' Pattern Annotation? ('=' Expr)? +fn parse_var(input: &mut Input, state: State) -> Result { + let id = parse_pattern(input, state)?; + let mut kind = None; + if let Some(Sep(Colon)) = input.peek() { + input.next(); + kind = Some(parse_typedesc(input, state)?); + } + let mut value = None; + if input.next() != Some(Sep(Equals)) { + value = Some(Box::new(parse_expr(input, state)?)); + } + Ok(Expr::Binding(Var { id, kind, value })) +} +// Const ::= 'pub'? 'const' Pattern Annotation? '=' Expr +fn parse_const(input: &mut Input, state: State, public: bool) -> Result { + let id = parse_pattern(input, state)?; + let mut kind = None; + if let Some(Sep(Colon)) = input.peek() { + input.next(); + kind = Some(parse_typedesc(input, state)?); + } + if input.next() != Some(Sep(Equals)) { + return Err("= not following binding".into()) + } + let value = Box::new(parse_expr(input, state)?); + Ok(Expr::Binding(Const { public, id, kind, value })) +} +// Func ::= 'pub'? ('func' | 'proc') Ident Generics? Parameters? (':' TypeDesc) '=' Body +fn parse_funcdecl(input: &mut Input, state: State, public: bool) -> Result { todo!() } +// TypeDecl ::= 'pub'? 'type' Pattern Generics? '=' 'distinct'? 'ref'? TypeDesc +fn parse_typedecl(input: &mut Input, state: State, public: bool) -> Result { + let pattern = parse_pattern(input, state)?; + todo!() +} +// Mod ::= 'pub'? 'mod' Ident ':' Body +fn parse_mod(input: &mut Input, state: State, public: bool) -> Result { + match input.next() { + Some(Word(id)) => { + match input.next() { + Some(Sep(Colon)) => { + let body = parse_body(input, state.indent())?; + Ok(Expr::Binding(Module { id, body })) + }, + _ => return Err("unexpected token following mod label".into()), + } + }, + _ => return Err("unexpected thing following mod keyword".into()), + } +} + +// Import ::= ('from' Ident)? 'import' Ident (',' Ident)* ('as' Ident)? +fn parse_import(input: &mut Input, state: State, from_scope: bool) -> Result { + let mut from = None; + if from_scope { + match input.next() { + Some(Word(id)) => from = Some(id), + _ => return Err("identifier not following from keyword".into()) + } + if input.next() != Some(Key(Keyword::Import)) { + return Err("expected import to follow from".into()) + } + } + todo!() +} +// Block ::= 'block' Ident? ':' Body +fn parse_block(input: &mut Input, state: State) -> Result { // todo: body + offset + match input.next() { + Some(Sep(Colon)) => { + let id = None; + let body = parse_body(input, state.indent())?; + Ok(Expr::Control(Block { id, body })) + }, + Some(Word(label)) => { + match input.next() { + Some(Sep(Colon)) => { + let id = Some(label); + let body = parse_body(input, state.indent())?; + Ok(Expr::Control(Block { id, body })) + }, + _ => return Err("unexpected token following block label".into()), + } + }, + _ => return Err("unexpected thing following block keyword".into()), + } +} +// Static ::= 'static' ':' Body +fn parse_static(input: &mut Input, state: State) -> Result { + if input.next() != Some(Sep(Colon)) { + return Err("colon must follow static invocation".into()); + } + let body = parse_body(input, state.indent())?; + Ok(Expr::Control(Static { body })) +} + +// For ::= 'for' Pattern 'in' Expr ':' Body +fn parse_for(input: &mut Input, state: State) -> Result { + let binding = parse_pattern(input, state)?; + if input.next() != Some(Key(Keyword::In)) { + return Err("expected in keyword after for pattern".into()); + } + let range = Box::new(parse_expr(input, state)?); + if input.next() != Some(Sep(Colon)) { + return Err("expected colon after in expression".into()); + } + let body = parse_body(input, state.indent())?; + Ok(Expr::Control(For { binding, range, body })) +} +// While ::= 'while' Expr ':' Body +fn parse_while(input: &mut Input, state: State) -> Result { + let cond = Box::new(parse_expr(input, state)?); + if input.next() != Some(Sep(Colon)) { + return Err("expected colon after while keyword".into()); + } + let body = parse_body(input, state.indent())?; + Ok(Expr::Control(While { cond, body })) +} +// Loop ::= 'loop' ':' Body +fn parse_loop(input: &mut Input, state: State) -> Result { + if input.next() != Some(Sep(Colon)) { + return Err("expected colon after loop keyword".into()); + } + let body = parse_body(input, state.indent())?; + Ok(Expr::Control(Loop { body })) +} + +// If ::= 'if' Expr ':' Body ('elif' Expr ':' Body)* ('else' ':' Body)? +fn parse_if(input: &mut Input, state: State) -> Result { + let mut branches = Vec::new(); + branches.push(parse_cond_branch(input, state)?); + while input.peek() == Some(&Key(Keyword::Elif)) { + input.next(); + branches.push(parse_cond_branch(input, state)?); + } + let mut else_body = None; + if input.peek() == Some(&Key(Keyword::Else)) { + input.next(); + else_body = Some(parse_body(input, state.indent())?); + } + Ok(Expr::Control(If { branches, else_body })) +} +// When ::= 'when' Expr ':' Body ('elif' Expr ':' Body)* ('else' ':' Body)? +fn parse_when(input: &mut Input, state: State) -> Result { + let mut branches = Vec::new(); + branches.push(parse_cond_branch(input, state)?); + while input.peek() == Some(&Key(Keyword::Elif)) { + input.next(); + branches.push(parse_cond_branch(input, state)?); + } + let mut else_body = None; + if input.peek() == Some(&Key(Keyword::Else)) { + input.next(); + else_body = Some(parse_body(input, state.indent())?); + } + let mut body = Vec::new(); + body.push(Expr::Control(If { branches, else_body })); + Ok(Expr::Control(Static { body })) +} +// Try ::= 'try' ':' Body ('except' Ident (',' Ident)* ':' Body) ('finally' ':' Body)? +fn parse_try(input: &mut Input, state: State) -> Result { + if input.next() != Some(Sep(Colon)) { + return Err("expected colon after try keyword".into()); + } + let body = parse_body(input, state.indent())?; + let catches = Vec::new(); + while input.peek() == Some(&Key(Keyword::Catch)) { + input.next(); + todo!(); + } + let mut finally = None; + if input.peek() == Some(&Key(Keyword::Finally)) { + input.next(); + if input.next() != Some(Sep(Colon)) { + return Err("expected colon after try keyword".into()); + } + finally = Some(parse_body(input, state.indent())?); + } + Ok(Expr::Control(Try { body, catches, finally })) +} +// Match ::= 'match' Expr ('of' Pattern (',' Pattern)* ('where' Expr)? ':' Body)+ +fn parse_match(input: &mut Input, state: State) -> Result { + let item = parse_pattern(input, state)?; + let mut branches = Vec::new(); + while input.peek() == Some(&Key(Keyword::Of)) { + input.next(); + todo!(); + } + Ok(Expr::Control(Match { item, branches })) +} + +fn parse_typedesc(input: &mut Input, state: State) -> Result { todo!() } +fn parse_pattern(input: &mut Input, state: State) -> Result { todo!() } +fn parse_cond_branch(input: &mut Input, state: State) -> Result { todo!() } + +// lex, parse, expand, compile? diff --git a/src/lex.rs b/src/lex.rs deleted file mode 100644 index 771ba38..0000000 --- a/src/lex.rs +++ /dev/null @@ -1,542 +0,0 @@ -use multipeek::multipeek; - -pub type Result = core::result::Result>; -pub struct TokenStream(Vec); - -impl IntoIterator for TokenStream { - type Item = Token; - type IntoIter = std::vec::IntoIter; - - fn into_iter(self) -> Self::IntoIter { - self.0.into_iter() - } -} - -#[derive(Clone, PartialEq, Debug)] -pub enum LexicalError { - InvalidIndentation, - MismatchedParens, - MismatchedBrackets, - UnknownPunctuation, -} - -impl std::fmt::Display for LexicalError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{:?}", self) - } -} -impl std::error::Error for LexicalError {} - -/// **Basic** syntax tokens. Form an unambiguous TokenStream. -#[derive(Clone, PartialEq)] -pub enum Token { - Key(Keyword), // keyword identifiers. - Word(String), // non-keyword identifiers. - Num(String), // numeric value, ex. 413, 0b101011, 0xabcd - Lit(Literal), // literal value, ex. for strings/comments. - Sep(Punctuation), // punctuation. non-word tokens. operators are lexed as this and later transformed to words. - Indent(usize), // indentation. denotes line breaks and scope at which a line starts. -} - -#[derive(Clone, PartialEq)] -pub enum Literal { - Char(String), - SingleLineString(String), - MultiLineString(String), - Comment(String), - DocComment(String), - MultiLineComment(String), -} - -/// Keywords, made explicit for easier use with Rust. -/// (strings inside match patterns are fucky!!) -#[derive(Clone, PartialEq)] -pub enum Keyword { - Pub, Let, Var, Const, - Func, Macro, Type, - Mod, From, Import, - For, While, Loop, - Block, Static, - If, When, Elif, Else, Match, - Try, Catch, Finally, - Struct, Tuple, Enum, Union, Interface, - Distinct, Ref, // todo: Mut once figured out - Break, Continue, Return, - In, Is, Of, As, -} - -/// All punctuation recognized by the lexer. -/// Note the distinction between FuncLeftParen and TupleLeftParen. -#[derive(Clone, PartialEq)] -pub enum Punctuation { - Comma, // , - Period, // . - Semicolon, // ; - Colon, // : - BackTick, // ` - SingleQuote, // ' - DoubleQuote, // " - FuncLeftParen, // ( - FuncRightParen, // ) - TupleLeftParen, // ( - TupleRightParen, // ) - GenericLeftBracket, // [ - GenericRightBracket, // ] - ArrayLeftBracket, // [ - ArrayRightBracket, // ] - StructLeftBrace, // } - StructRightBrace, // } - Equals, // = - Plus, // + - Minus, // distinction between minus and negative. - Negative, // negative binds tightly: there is no whitespace following. - Times, // * - Slash, // / - LessThan, // < - GreaterThan, // > - At, // @ - Sha, // $ - Tilde, // ~ - And, // & - Percent, // % - Or, // | - Exclamation, // ! - Question, // ? - Caret, // ^ - Backslash, // \ -} - -/// Parses whitespace-sensitive code into an unambiguous TokenStream. -/// Also useful for formatting. -pub fn tokenize(input: &str) -> Result { - // The design of this lexer utilizes to great extent multipeek's arbitrary peeking. - // Tokens are matched by looping within their case until complete. - // This then eliminates the need for most global parser state. (i hate state) - - use Token::*; - use Literal::*; - use Punctuation::*; - use LexicalError::*; - enum Paren { Func, Tuple } - enum Bracket { Generic, Array } - struct State { - start_of_line: bool, - paren_stack: Vec, - bracket_stack: Vec, - } - - let mut state = State { - start_of_line: true, - paren_stack: vec!(), - bracket_stack: vec!(), - }; - - let mut buf = String::new(); - let mut res = Vec::new(); - - // `char` in rust is four bytes it's fine - let mut input = multipeek(input.chars()); - while let Some(c) = input.next() { - match c { - ' ' => { // indentation! and whitespace - match res.last() { - Some(Indent(_)) => { // indentation! - res.pop(); // discard previous empty or useless Indent token - let mut current_indent_level = 1; - while let Some(x) = input.peek() { - match x { - ' ' => { current_indent_level += 1; input.next(); }, - _ => match res.last() { // indentation ends - Some(Word(a)) if a == "==" || a == "and" || a == "or" || - a == "xor" || a == "in" || a == "is" => break, - Some(Sep(FuncLeftParen)) | Some(Sep(TupleLeftParen)) | - Some(Sep(GenericLeftBracket)) | Some(Sep(ArrayLeftBracket)) | - Some(Sep(StructLeftBrace)) | Some(Sep(Comma)) => break, - _ => { - res.push(Indent(current_indent_level)); - break; - } - } - } - } - }, - _ => { // get rid of excess (all) whitespace between words/operators - while input.peek().is_some_and(|x| x.is_whitespace() && x != &'\n') { input.next(); } - } - } - }, - '\t' => return Err(InvalidIndentation.into()), - '\n' => res.push(Indent(0)), - '\'' => { // chars! - while let Some(x) = input.next() { - match x { - '\'' => break, - '\\' => if let Some(y) = input.next() { buf.push(y) }, - _ => buf.push(x) - } - } - res.push(Lit(Char(String::from(&buf)))); - }, - '"' => { // strings! - match (input.peek_nth(0).copied(), input.peek_nth(1).copied()) { - (Some('"'), Some('"')) => { // triple quoted strings - input.next(); input.next(); - while let Some(x) = input.next() { - match x { - '"' if input.peek_nth(0) == Some(&'"') && - input.peek_nth(1) == Some(&'"') => { - input.next(); input.next(); - break; - }, - _ => buf.push(x) - } - } - res.push(Lit(MultiLineString(String::from(&buf)))); - }, - (_, _) => { // single quoted strings - while let Some(x) = input.next() { - match x { - '"' => break, - '\\' => if let Some(y) = input.next() { buf.push(y) }, - _ => buf.push(x) - } - } - res.push(Lit(SingleLineString(String::from(&buf)))); - } - } - }, - '#' => { // comments! - match input.peek() { - Some('[') => { // block comment, can be nested - input.next(); - let mut comment_level = 1; - while let Some(x) = input.next() && comment_level > 0 { - match x { - '#' if input.peek() == Some(&'[') => { - comment_level += 1; - input.next(); - }, - ']' if input.peek() == Some(&'#') => { - comment_level -= 1; - input.next(); - }, - _ => buf.push(x) - } - } - res.push(Lit(MultiLineComment(String::from(&buf)))); - }, - Some(&'#') => { // documentation comment - input.next(); - while let Some(x) = input.peek() { - match x { - '\n' => break, - _ => { - buf.push(*x); - } - } - input.next(); - } - res.push(Lit(DocComment(String::from(&buf)))); - }, - _ => { // standard comment, runs til EOL - while let Some(x) = input.peek() { - match x { - '\n' => break, - _ => { - buf.push(*x); - } - } - input.next(); - } - res.push(Lit(Comment(String::from(&buf)))); - } - } - }, - c if c.is_alphabetic() || c == '_' => { // valid identifiers! - buf.push(c); - while let Some(x) = input.peek() { - match x { - x if x.is_alphanumeric() || x == &'_' => { - buf.push(*x); - input.next(); - }, - _ => { - use Keyword::*; - match buf.as_str() { // keywords! - "pub" => res.push(Key(Pub)), - "let" => res.push(Key(Let)), - "var" => res.push(Key(Var)), - "const" => res.push(Key(Const)), - "func" => res.push(Key(Func)), - "macro" => res.push(Key(Macro)), - "type" => res.push(Key(Type)), - "mod" => res.push(Key(Mod)), - "from" => res.push(Key(From)), - "import" => res.push(Key(Import)), - "for" => res.push(Key(For)), - "while" => res.push(Key(While)), - "loop" => res.push(Key(Loop)), - "block" => res.push(Key(Block)), - "static" => res.push(Key(Static)), - "if" => res.push(Key(If)), - "when" => res.push(Key(When)), - "elif" => res.push(Key(Elif)), - "else" => res.push(Key(Else)), - "match" => res.push(Key(Match)), - "try" => res.push(Key(Try)), - "catch" => res.push(Key(Catch)), - "finally" => res.push(Key(Finally)), - "struct" => res.push(Key(Struct)), - "tuple" => res.push(Key(Tuple)), - "enum" => res.push(Key(Enum)), - "union" => res.push(Key(Union)), - "interface" => res.push(Key(Interface)), - "distinct" => res.push(Key(Distinct)), - "ref" => res.push(Key(Ref)), - "break" => res.push(Key(Break)), - "continue" => res.push(Key(Continue)), - "return" => res.push(Key(Return)), - "in" => res.push(Key(In)), - "is" => res.push(Key(Is)), - "of" => res.push(Key(Of)), - "as" => res.push(Key(As)), - _ => res.push(Word(String::from(&buf))) - } - match x { // () and [] denote both parameters/generics and tuples/arrays - '(' => { // we must disambiguate by treating those *directly* after words as such - res.push(Sep(FuncLeftParen)); - state.paren_stack.push(Paren::Func); - input.next(); - }, - '[' => { - res.push(Sep(GenericLeftBracket)); - state.bracket_stack.push(Bracket::Generic); - input.next(); - }, - _ => {}, - } - break; - } - } - } - }, - '0'..='9' => { // numeric literals! - buf.push(c); - while let Some(x) = input.peek() { - match x { - 'a'..='z' | 'A'..='Z' | '0'..='9' | '_' => { - buf.push(*x); - input.next(); - }, - _ => break - } - } - res.push(Num(String::from(&buf))) - }, - '-' => { // `-` is special. it can be the *prefix* operator "Negative", or part of a regular operator. - match input.peek() { - Some(' ') => res.push(Sep(Minus)), - _ => res.push(Sep(Negative)) - } - }, - '(' => { // note: FuncParens were matched above, directly after identifiers - res.push(Sep(TupleLeftParen)); - state.paren_stack.push(Paren::Tuple); - }, - '[' => { // note: GenericBrackets were matched above, directly after identifiers - res.push(Sep(ArrayLeftBracket)); - state.bracket_stack.push(Bracket::Array); - }, - ')' => { - match state.paren_stack.pop() { - Some(Paren::Func) => res.push(Sep(FuncRightParen)), - Some(Paren::Tuple) => res.push(Sep(TupleRightParen)), - None => return Err(MismatchedParens.into()), - } - }, - ']' => { - match state.bracket_stack.pop() { - Some(Bracket::Generic) => res.push(Sep(GenericRightBracket)), - Some(Bracket::Array) => res.push(Sep(ArrayRightBracket)), - None => return Err(MismatchedBrackets.into()), - } - if input.peek() == Some(&'(') { // parameters following generics - res.push(Sep(FuncLeftParen)); - state.paren_stack.push(Paren::Func); - input.next(); - } - }, - '`' => { - res.push(Sep(BackTick)); - match input.peek() { - Some('(') => { - res.push(Sep(FuncLeftParen)); - state.paren_stack.push(Paren::Func); - input.next(); - }, - Some('[') => { - res.push(Sep(GenericLeftBracket)); - state.bracket_stack.push(Bracket::Generic); - input.next(); - }, - _ => {} - } - }, - ',' => res.push(Sep(Comma)), - '.' => res.push(Sep(Period)), - ';' => res.push(Sep(Semicolon)), - ':' => res.push(Sep(Colon)), - '{' => res.push(Sep(StructLeftBrace)), - '}' => res.push(Sep(StructRightBrace)), - '=' => res.push(Sep(Equals)), - '+' => res.push(Sep(Plus)), - '*' => res.push(Sep(Times)), - '/' => res.push(Sep(Slash)), - '<' => res.push(Sep(LessThan)), - '>' => res.push(Sep(GreaterThan)), - '@' => res.push(Sep(At)), - '$' => res.push(Sep(Sha)), - '~' => res.push(Sep(Tilde)), - '&' => res.push(Sep(And)), - '|' => res.push(Sep(Or)), - '!' => res.push(Sep(Exclamation)), - '?' => res.push(Sep(Question)), - '^' => res.push(Sep(Caret)), - '\\' => res.push(Sep(Backslash)), - _ => return Err(UnknownPunctuation.into()) - } - buf.clear(); - } - Ok(TokenStream(res)) -} - -impl std::fmt::Display for TokenStream { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - use Token::*; - let mut prev_token = Indent(0); - for token in &self.0 { - match (&prev_token, &token) { - (Word(_), Word(_)) | (Word(_), Num(_)) | - (Num(_), Word(_)) | (Num(_), Num(_)) => write!(f, " {}", token)?, - _ => write!(f, "{}", token)?, - } - prev_token = token.clone(); - } - Ok(()) - } -} - -impl std::fmt::Display for Token { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - use Token::*; - match self { - Key(word) => write!(f, "{}", word), - Word(val) => write!(f, "{}", val), - Num(val) => write!(f, "{}", val), - Lit(lit) => write!(f, "{}", lit), - Sep(sep) => write!(f, "{}", sep), - Indent(i) => write!(f, "\n{}", " ".repeat(*i)), - } - } -} - -impl std::fmt::Display for Literal { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - use Literal::*; - match self { - Char(val) => write!(f, "'{}'", val), - SingleLineString(val) => write!(f, "\"{}\"", val), - MultiLineString(val) => write!(f, "\"\"\"{}\"\"\"", val), - Comment(val) => write!(f, "#{}", val), - DocComment(val) => write!(f, "##{}", val), - MultiLineComment(val) => write!(f, "#[{}]#", val), - } - } -} - -impl std::fmt::Display for Keyword { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - use Keyword::*; - match self { - Pub => write!(f, "pub"), - Let => write!(f, "let"), - Var => write!(f, "var"), - Const => write!(f, "const"), - Func => write!(f, "func"), - Macro => write!(f, "macro"), - Type => write!(f, "type"), - Mod => write!(f, "mod"), - From => write!(f, "from"), - Import => write!(f, "import"), - For => write!(f, "for"), - While => write!(f, "while"), - Loop => write!(f, "loop"), - Block => write!(f, "block"), - Static => write!(f, "static"), - If => write!(f, "if"), - When => write!(f, "when"), - Elif => write!(f, "elif"), - Else => write!(f, "else"), - Match => write!(f, "match"), - Try => write!(f, "try"), - Catch => write!(f, "catch"), - Finally => write!(f, "finally"), - Struct => write!(f, "struct"), - Tuple => write!(f, "tuple"), - Enum => write!(f, "enum"), - Union => write!(f, "union"), - Interface => write!(f, "interface"), - Distinct => write!(f, "distinct"), - Ref => write!(f, "ref"), - Break => write!(f, "break"), - Continue => write!(f, "continue"), - Return => write!(f, "return"), - In => write!(f, "in"), - Is => write!(f, "is"), - Of => write!(f, "of"), - As => write!(f, "as"), - } - } -} -impl std::fmt::Display for Punctuation { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - use Punctuation::*; - match self { - Comma => write!(f, ","), - Period => write!(f, "."), - Semicolon => write!(f, ";"), - Colon => write!(f, ":"), - BackTick => write!(f, "`"), - SingleQuote => write!(f, "'"), - DoubleQuote => write!(f, "\""), - FuncLeftParen => write!(f, "("), - FuncRightParen => write!(f, ")"), - TupleLeftParen => write!(f, " ("), - TupleRightParen => write!(f, ")"), - GenericLeftBracket => write!(f, "["), - GenericRightBracket => write!(f, "]"), - ArrayLeftBracket => write!(f, " ["), - ArrayRightBracket => write!(f, "]"), - StructLeftBrace => write!(f, "{{"), - StructRightBrace => write!(f, "}}"), - Equals => write!(f, "="), - Plus => write!(f, "+"), - Minus => write!(f, "- "), - Negative => write!(f, "-"), - Times => write!(f, "*"), - Slash => write!(f, "/"), - LessThan => write!(f, "<"), - GreaterThan => write!(f, ">"), - At => write!(f, "@"), - Sha => write!(f, "$"), - Tilde => write!(f, "~"), - And => write!(f, "&"), - Percent => write!(f, "%"), - Or => write!(f, "|"), - Exclamation => write!(f, "!"), - Question => write!(f, "?"), - Caret => write!(f, "^"), - Backslash => write!(f, "\\"), - } - } -} diff --git a/src/main.rs b/src/main.rs index 7b635f9..d7c995c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,9 +1,6 @@ #![allow(non_upper_case_globals)] #![feature(exclusive_range_pattern, let_chains)] -mod ast; -mod lex; -mod parse; -mod tree; +mod frontend; fn main() {} diff --git a/src/parse.rs b/src/parse.rs deleted file mode 100644 index 1dabd47..0000000 --- a/src/parse.rs +++ /dev/null @@ -1,297 +0,0 @@ -use std::fmt; - -use crate::lex::*; -use crate::ast::*; -use crate::ast::Binding::*; -use crate::ast::Control::*; -use crate::ast::Pattern::*; -use Token::*; -use Literal::*; -use Punctuation::*; - -type Input = std::iter::Peekable>; - -#[derive(Clone, Copy)] -struct State { - depth: usize, - step: usize -} - -impl State { - fn indent(&self) -> State { - State { depth: self.depth + self.step, step: self.step } - } - fn dedent(&self) -> State { - State { depth: self.depth - self.step, step: self.step } - } -} - -/// Convert a basic TokenStream into an AbstractSyntaxTree -pub fn astify(input: TokenStream, name: &str) -> Result { - let mut input = input.into_iter().peekable(); - let body = parse_body(&mut input, State { depth: 0, step: 0 })?; - Ok(Expr::Binding(Module{ id: name.to_string(), body })) -} - -/// Parse a series of Exprs, for ex. the body of a function. -fn parse_body(input: &mut Input, state: State) -> Result> { - let mut res = Vec::new(); - while input.peek() == Some(&Indent(state.depth)) { - input.next(); - res.push(parse_expr(input, state)?); - } - Ok(res) -} - -/// Expr ::= Let | Var | Const | Func | Type | -/// Mod | Import | Block | Static | -/// For | While | Loop | If | When | Try | Match -fn parse_expr(input: &mut Input, state: State) -> Result { - use Keyword::*; - match input.next() { - Some(Key(word)) => match word { - Pub => { - match input.next() { - Some(Key(word)) => match word { - Const => parse_const(input, state, true), - Func => parse_funcdecl(input, state, true), - Type => parse_typedecl(input, state, true), - Mod => parse_mod(input, state, true), - _ => return Err("unrecognized keyword following pub".into()), - } - Some(_) => return Err("unrecognized thing following pub".into()), - None => return Err("end of input".into()), - } - }, - Let => parse_let(input, state), - Var => parse_var(input, state), - Const => parse_const(input, state, false), - Func => parse_funcdecl(input, state, false), - Type => parse_typedecl(input, state, false), - Mod => parse_mod(input, state, false), - From => parse_import(input, state, true), - Import => parse_import(input, state, false), - Block => parse_block(input, state), - Static => parse_static(input, state), - For => parse_for(input, state), - While => parse_while(input, state), - Loop => parse_loop(input, state), - If => parse_if(input, state), - When => parse_when(input, state), - Try => parse_try(input, state), - Match => parse_match(input, state), - _ => return Err("invalid keyword starting expression".into()), - }, - _ => todo!(), // what can i do with this?? match line here - } -} - -/// Let ::= 'let' Pattern Annotation? '=' Expr -fn parse_let(input: &mut Input, state: State) -> Result { - let id = parse_pattern(input, state)?; - let mut kind = None; - if let Some(Sep(Colon)) = input.peek() { - input.next(); - kind = Some(parse_typedesc(input, state)?); - } - if input.next() != Some(Sep(Equals)) { - return Err("= not following binding".into()) - } - let value = Box::new(parse_expr(input, state)?); - Ok(Expr::Binding(Let { id, kind, value })) -} -/// Var ::= 'var' Pattern Annotation? ('=' Expr)? -fn parse_var(input: &mut Input, state: State) -> Result { - let id = parse_pattern(input, state)?; - let mut kind = None; - if let Some(Sep(Colon)) = input.peek() { - input.next(); - kind = Some(parse_typedesc(input, state)?); - } - let mut value = None; - if input.next() != Some(Sep(Equals)) { - value = Some(Box::new(parse_expr(input, state)?)); - } - Ok(Expr::Binding(Var { id, kind, value })) -} -// Const ::= 'pub'? 'const' Pattern Annotation? '=' Expr -fn parse_const(input: &mut Input, state: State, public: bool) -> Result { - let id = parse_pattern(input, state)?; - let mut kind = None; - if let Some(Sep(Colon)) = input.peek() { - input.next(); - kind = Some(parse_typedesc(input, state)?); - } - if input.next() != Some(Sep(Equals)) { - return Err("= not following binding".into()) - } - let value = Box::new(parse_expr(input, state)?); - Ok(Expr::Binding(Const { public, id, kind, value })) -} -// Func ::= 'pub'? ('func' | 'proc') Ident Generics? Parameters? (':' TypeDesc) '=' Body -fn parse_funcdecl(input: &mut Input, state: State, public: bool) -> Result { todo!() } -// TypeDecl ::= 'pub'? 'type' Pattern Generics? '=' 'distinct'? 'ref'? TypeDesc -fn parse_typedecl(input: &mut Input, state: State, public: bool) -> Result { - let pattern = parse_pattern(input, state)?; - todo!() -} -// Mod ::= 'pub'? 'mod' Ident ':' Body -fn parse_mod(input: &mut Input, state: State, public: bool) -> Result { - match input.next() { - Some(Word(id)) => { - match input.next() { - Some(Sep(Colon)) => { - let body = parse_body(input, state.indent())?; - Ok(Expr::Binding(Module { id, body })) - }, - _ => return Err("unexpected token following mod label".into()), - } - }, - _ => return Err("unexpected thing following mod keyword".into()), - } -} - -// Import ::= ('from' Ident)? 'import' Ident (',' Ident)* ('as' Ident)? -fn parse_import(input: &mut Input, state: State, from_scope: bool) -> Result { - let mut from = None; - if from_scope { - match input.next() { - Some(Word(id)) => from = Some(id), - _ => return Err("identifier not following from keyword".into()) - } - if input.next() != Some(Key(Keyword::Import)) { - return Err("expected import to follow from".into()) - } - } - todo!() -} -// Block ::= 'block' Ident? ':' Body -fn parse_block(input: &mut Input, state: State) -> Result { // todo: body + offset - match input.next() { - Some(Sep(Colon)) => { - let id = None; - let body = parse_body(input, state.indent())?; - Ok(Expr::Control(Block { id, body })) - }, - Some(Word(label)) => { - match input.next() { - Some(Sep(Colon)) => { - let id = Some(label); - let body = parse_body(input, state.indent())?; - Ok(Expr::Control(Block { id, body })) - }, - _ => return Err("unexpected token following block label".into()), - } - }, - _ => return Err("unexpected thing following block keyword".into()), - } -} -// Static ::= 'static' ':' Body -fn parse_static(input: &mut Input, state: State) -> Result { - if input.next() != Some(Sep(Colon)) { - return Err("colon must follow static invocation".into()); - } - let body = parse_body(input, state.indent())?; - Ok(Expr::Control(Static { body })) -} - -// For ::= 'for' Pattern 'in' Expr ':' Body -fn parse_for(input: &mut Input, state: State) -> Result { - let binding = parse_pattern(input, state)?; - if input.next() != Some(Key(Keyword::In)) { - return Err("expected in keyword after for pattern".into()); - } - let range = Box::new(parse_expr(input, state)?); - if input.next() != Some(Sep(Colon)) { - return Err("expected colon after in expression".into()); - } - let body = parse_body(input, state.indent())?; - Ok(Expr::Control(For { binding, range, body })) -} -// While ::= 'while' Expr ':' Body -fn parse_while(input: &mut Input, state: State) -> Result { - let cond = Box::new(parse_expr(input, state)?); - if input.next() != Some(Sep(Colon)) { - return Err("expected colon after while keyword".into()); - } - let body = parse_body(input, state.indent())?; - Ok(Expr::Control(While { cond, body })) -} -// Loop ::= 'loop' ':' Body -fn parse_loop(input: &mut Input, state: State) -> Result { - if input.next() != Some(Sep(Colon)) { - return Err("expected colon after loop keyword".into()); - } - let body = parse_body(input, state.indent())?; - Ok(Expr::Control(Loop { body })) -} - -// If ::= 'if' Expr ':' Body ('elif' Expr ':' Body)* ('else' ':' Body)? -fn parse_if(input: &mut Input, state: State) -> Result { - let mut branches = Vec::new(); - branches.push(parse_cond_branch(input, state)?); - while input.peek() == Some(&Key(Keyword::Elif)) { - input.next(); - branches.push(parse_cond_branch(input, state)?); - } - let mut else_body = None; - if input.peek() == Some(&Key(Keyword::Else)) { - input.next(); - else_body = Some(parse_body(input, state.indent())?); - } - Ok(Expr::Control(If { branches, else_body })) -} -// When ::= 'when' Expr ':' Body ('elif' Expr ':' Body)* ('else' ':' Body)? -fn parse_when(input: &mut Input, state: State) -> Result { - let mut branches = Vec::new(); - branches.push(parse_cond_branch(input, state)?); - while input.peek() == Some(&Key(Keyword::Elif)) { - input.next(); - branches.push(parse_cond_branch(input, state)?); - } - let mut else_body = None; - if input.peek() == Some(&Key(Keyword::Else)) { - input.next(); - else_body = Some(parse_body(input, state.indent())?); - } - let mut body = Vec::new(); - body.push(Expr::Control(If { branches, else_body })); - Ok(Expr::Control(Static { body })) -} -// Try ::= 'try' ':' Body ('except' Ident (',' Ident)* ':' Body) ('finally' ':' Body)? -fn parse_try(input: &mut Input, state: State) -> Result { - if input.next() != Some(Sep(Colon)) { - return Err("expected colon after try keyword".into()); - } - let body = parse_body(input, state.indent())?; - let catches = Vec::new(); - while input.peek() == Some(&Key(Keyword::Catch)) { - input.next(); - todo!(); - } - let mut finally = None; - if input.peek() == Some(&Key(Keyword::Finally)) { - input.next(); - if input.next() != Some(Sep(Colon)) { - return Err("expected colon after try keyword".into()); - } - finally = Some(parse_body(input, state.indent())?); - } - Ok(Expr::Control(Try { body, catches, finally })) -} -// Match ::= 'match' Expr ('of' Pattern (',' Pattern)* ('where' Expr)? ':' Body)+ -fn parse_match(input: &mut Input, state: State) -> Result { - let item = parse_pattern(input, state)?; - let mut branches = Vec::new(); - while input.peek() == Some(&Key(Keyword::Of)) { - input.next(); - todo!(); - } - Ok(Expr::Control(Match { item, branches })) -} - -fn parse_typedesc(input: &mut Input, state: State) -> Result { todo!() } -fn parse_pattern(input: &mut Input, state: State) -> Result { todo!() } -fn parse_cond_branch(input: &mut Input, state: State) -> Result { todo!() } - -// lex, parse, expand, compile? -- cgit v1.2.3-70-g09d2