aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJJ2023-10-27 07:53:32 +0000
committerJJ2023-10-27 07:53:32 +0000
commitf3efbfc03fabae11df0554e67769327d4dc57a83 (patch)
tree73c372eaa118015b0c0cefe28ecad4b1e1e88c03
parent6017d62db7600af491592e4f0d78611f33dc6b5e (diff)
compiler: basic outline of the parser
-rw-r--r--src/lex.rs25
-rw-r--r--src/main.rs1
-rw-r--r--src/parse.rs101
3 files changed, 119 insertions, 8 deletions
diff --git a/src/lex.rs b/src/lex.rs
index 01b24c5..396c06d 100644
--- a/src/lex.rs
+++ b/src/lex.rs
@@ -3,6 +3,15 @@ use multipeek::multipeek;
pub type Result<T> = core::result::Result<T, Box<dyn std::error::Error>>;
pub struct TokenStream(Vec<Token>);
+impl IntoIterator for TokenStream {
+ type Item = Token;
+ type IntoIter = std::vec::IntoIter<Token>;
+
+ fn into_iter(self) -> Self::IntoIter {
+ self.0.into_iter()
+ }
+}
+
#[derive(Clone, PartialEq, Debug)]
pub enum LexicalError {
InvalidIndentation,
@@ -363,8 +372,8 @@ impl std::fmt::Display for Token {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
use Token::*;
match self {
- Word(token) => write!(f, "{}", token),
- Num(token) => write!(f, "{}", token),
+ Word(val) => write!(f, "{}", val),
+ Num(val) => write!(f, "{}", val),
Lit(lit) => write!(f, "{}", lit),
Sep(sep) => write!(f, "{}", sep),
Indent(i) => write!(f, "\n{}", " ".repeat(*i)),
@@ -376,12 +385,12 @@ impl std::fmt::Display for Literal {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
use Literal::*;
match self {
- Char(token) => write!(f, "'{}'", token),
- SingleLineString(token) => write!(f, "\"{}\"", token),
- MultiLineString(token) => write!(f, "\"\"\"{}\"\"\"", token),
- Comment(token) => write!(f, "#{}", token),
- DocComment(token) => write!(f, "##{}", token),
- MultiLineComment(token) => write!(f, "#[{}]#", token),
+ Char(val) => write!(f, "'{}'", val),
+ SingleLineString(val) => write!(f, "\"{}\"", val),
+ MultiLineString(val) => write!(f, "\"\"\"{}\"\"\"", val),
+ Comment(val) => write!(f, "#{}", val),
+ DocComment(val) => write!(f, "##{}", val),
+ MultiLineComment(val) => write!(f, "#[{}]#", val),
}
}
}
diff --git a/src/main.rs b/src/main.rs
index 837dc1c..7b635f9 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -3,6 +3,7 @@
mod ast;
mod lex;
+mod parse;
mod tree;
fn main() {}
diff --git a/src/parse.rs b/src/parse.rs
new file mode 100644
index 0000000..9a60863
--- /dev/null
+++ b/src/parse.rs
@@ -0,0 +1,101 @@
+use multipeek::*;
+use crate::lex::*;
+use crate::ast::Expr;
+use crate::ast::Binding::*;
+use crate::ast::Control::*;
+use crate::ast::Pattern::*;
+
+/// Convert a basic TokenStream into an AbstractSyntaxTree
+pub fn astify(input: TokenStream, name: &str) -> Result<Expr> {
+ use Token::*;
+ use Literal::*;
+ use Punctuation::*;
+
+ let mut input = multipeek(input);
+ let mut res = Vec::new();
+ while let Some(x) = input.peek() {
+ res.push(parse(&mut input, 0)?);
+ }
+ Ok(Expr::Binding(Module{ id: name.to_string(), body: res }))
+}
+
+fn parse(input: &mut MultiPeek<std::vec::IntoIter<Token>>, depth: usize) -> Result<Expr> {
+ use Token::*;
+ use Literal::*;
+ use Punctuation::*;
+ let mut input = input;
+ match input.peek() {
+ Some(Word(val)) => match val.as_str() {
+ "pub" => {
+ input.next();
+ if let Some(Word(val)) = input.peek() {
+ match val.as_str() {
+ "const" => parse_const(&mut input, true),
+ "func" => parse_func(&mut input, true),
+ "type" => parse_type(&mut input, true),
+ "mod" => parse_mod(&mut input, true),
+ _ => return Err("unrecognized keyword following pub".into()),
+ }
+ } else {
+ return Err("unrecognized thing following pub".into());
+ }
+ },
+ "let" => parse_let(&mut input),
+ "var" => parse_var(&mut input),
+ "const" => parse_const(&mut input, false),
+ "func" => parse_func(&mut input, false),
+ "type" => parse_type(&mut input, false),
+ "mod" => parse_mod(&mut input, false),
+ "from" | "import" => parse_import(&mut input),
+ "block" => parse_block(&mut input),
+ "static" => parse_static(&mut input),
+ "for" => parse_for(&mut input),
+ "while" => parse_while(&mut input),
+ "loop" => parse_loop(&mut input),
+ "if" => parse_if(&mut input),
+ "when" => parse_when(&mut input),
+ "try" => parse_try(&mut input),
+ "match" => parse_match(&mut input),
+ _ => parse_line(&mut input),
+ },
+ _ => parse_line(&mut input),
+ }
+}
+
+// Const ::= 'pub'? 'const' Pattern Annotation? '=' Expr
+fn parse_const(input: &mut MultiPeek<std::vec::IntoIter<Token>>, public: bool) -> Result<Expr> { todo!() }
+// Func ::= 'pub'? ('func' | 'proc') Ident Generics? Parameters? (':' TypeDesc) '=' Body
+fn parse_func(input: &mut MultiPeek<std::vec::IntoIter<Token>>, public: bool) -> Result<Expr> { todo!() }
+// TypeDecl ::= 'pub'? 'type' Pattern Generics? '=' 'distinct'? 'ref'? TypeDesc
+fn parse_type(input: &mut MultiPeek<std::vec::IntoIter<Token>>, public: bool) -> Result<Expr> { todo!() }
+// Mod ::= 'pub'? 'mod' Ident ':' Body
+fn parse_mod(input: &mut MultiPeek<std::vec::IntoIter<Token>>, public: bool) -> Result<Expr> { todo!() }
+
+// Let ::= 'let' Pattern Annotation? '=' Expr
+fn parse_let(input: &mut MultiPeek<std::vec::IntoIter<Token>>) -> Result<Expr> { todo!() }
+// Var ::= 'var' Pattern Annotation? ('=' Expr)?
+fn parse_var(input: &mut MultiPeek<std::vec::IntoIter<Token>>) -> Result<Expr> { todo!() }
+// Import ::= ('from' Ident)? 'import' Ident (',' Ident)* ('as' Ident)?
+fn parse_import(input: &mut MultiPeek<std::vec::IntoIter<Token>>) -> Result<Expr> { todo!() }
+// Block ::= 'block' Ident? ':' Body
+fn parse_block(input: &mut MultiPeek<std::vec::IntoIter<Token>>) -> Result<Expr> { todo!() }
+// Static ::= 'static' ':' Body
+fn parse_static(input: &mut MultiPeek<std::vec::IntoIter<Token>>) -> Result<Expr> { todo!() }
+// For ::= 'for' Pattern 'in' Expr ':' Body
+fn parse_for(input: &mut MultiPeek<std::vec::IntoIter<Token>>) -> Result<Expr> { todo!() }
+// While ::= 'while' Expr ':' Body
+fn parse_while(input: &mut MultiPeek<std::vec::IntoIter<Token>>) -> Result<Expr> { todo!() }
+// Loop ::= 'loop' ':' Body
+fn parse_loop(input: &mut MultiPeek<std::vec::IntoIter<Token>>) -> Result<Expr> { todo!() }
+// If ::= 'if' Expr ':' Body ('elif' Expr ':' Body)* ('else' ':' Body)?
+fn parse_if(input: &mut MultiPeek<std::vec::IntoIter<Token>>) -> Result<Expr> { todo!() }
+// When ::= 'when' Expr ':' Body ('elif' Expr ':' Body)* ('else' ':' Body)?
+fn parse_when(input: &mut MultiPeek<std::vec::IntoIter<Token>>) -> Result<Expr> { todo!() }
+// Try ::= 'try' ':' Body ('except' Ident (',' Ident)* ':' Body) ('finally' ':' Body)?
+fn parse_try(input: &mut MultiPeek<std::vec::IntoIter<Token>>) -> Result<Expr> { todo!() }
+// Match ::= 'match' Expr ('of' Pattern (',' Pattern)* ('where' Expr)? ':' Body)+
+fn parse_match(input: &mut MultiPeek<std::vec::IntoIter<Token>>) -> Result<Expr> { todo!() }
+
+fn parse_line(input: &mut MultiPeek<std::vec::IntoIter<Token>>) -> Result<Expr> { todo!() }
+
+// lex, parse, expand, compile?