From f3efbfc03fabae11df0554e67769327d4dc57a83 Mon Sep 17 00:00:00 2001 From: JJ Date: Fri, 27 Oct 2023 00:53:32 -0700 Subject: compiler: basic outline of the parser --- src/lex.rs | 25 ++++++++++----- src/main.rs | 1 + src/parse.rs | 101 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 119 insertions(+), 8 deletions(-) create mode 100644 src/parse.rs (limited to 'src') diff --git a/src/lex.rs b/src/lex.rs index 01b24c5..396c06d 100644 --- a/src/lex.rs +++ b/src/lex.rs @@ -3,6 +3,15 @@ use multipeek::multipeek; pub type Result = core::result::Result>; pub struct TokenStream(Vec); +impl IntoIterator for TokenStream { + type Item = Token; + type IntoIter = std::vec::IntoIter; + + fn into_iter(self) -> Self::IntoIter { + self.0.into_iter() + } +} + #[derive(Clone, PartialEq, Debug)] pub enum LexicalError { InvalidIndentation, @@ -363,8 +372,8 @@ impl std::fmt::Display for Token { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { use Token::*; match self { - Word(token) => write!(f, "{}", token), - Num(token) => write!(f, "{}", token), + Word(val) => write!(f, "{}", val), + Num(val) => write!(f, "{}", val), Lit(lit) => write!(f, "{}", lit), Sep(sep) => write!(f, "{}", sep), Indent(i) => write!(f, "\n{}", " ".repeat(*i)), @@ -376,12 +385,12 @@ impl std::fmt::Display for Literal { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { use Literal::*; match self { - Char(token) => write!(f, "'{}'", token), - SingleLineString(token) => write!(f, "\"{}\"", token), - MultiLineString(token) => write!(f, "\"\"\"{}\"\"\"", token), - Comment(token) => write!(f, "#{}", token), - DocComment(token) => write!(f, "##{}", token), - MultiLineComment(token) => write!(f, "#[{}]#", token), + Char(val) => write!(f, "'{}'", val), + SingleLineString(val) => write!(f, "\"{}\"", val), + MultiLineString(val) => write!(f, "\"\"\"{}\"\"\"", val), + Comment(val) => write!(f, "#{}", val), + DocComment(val) => write!(f, "##{}", val), + MultiLineComment(val) => write!(f, "#[{}]#", val), } } } diff --git a/src/main.rs b/src/main.rs index 837dc1c..7b635f9 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,6 +3,7 @@ mod ast; mod lex; +mod parse; mod tree; fn main() {} diff --git a/src/parse.rs b/src/parse.rs new file mode 100644 index 0000000..9a60863 --- /dev/null +++ b/src/parse.rs @@ -0,0 +1,101 @@ +use multipeek::*; +use crate::lex::*; +use crate::ast::Expr; +use crate::ast::Binding::*; +use crate::ast::Control::*; +use crate::ast::Pattern::*; + +/// Convert a basic TokenStream into an AbstractSyntaxTree +pub fn astify(input: TokenStream, name: &str) -> Result { + use Token::*; + use Literal::*; + use Punctuation::*; + + let mut input = multipeek(input); + let mut res = Vec::new(); + while let Some(x) = input.peek() { + res.push(parse(&mut input, 0)?); + } + Ok(Expr::Binding(Module{ id: name.to_string(), body: res })) +} + +fn parse(input: &mut MultiPeek>, depth: usize) -> Result { + use Token::*; + use Literal::*; + use Punctuation::*; + let mut input = input; + match input.peek() { + Some(Word(val)) => match val.as_str() { + "pub" => { + input.next(); + if let Some(Word(val)) = input.peek() { + match val.as_str() { + "const" => parse_const(&mut input, true), + "func" => parse_func(&mut input, true), + "type" => parse_type(&mut input, true), + "mod" => parse_mod(&mut input, true), + _ => return Err("unrecognized keyword following pub".into()), + } + } else { + return Err("unrecognized thing following pub".into()); + } + }, + "let" => parse_let(&mut input), + "var" => parse_var(&mut input), + "const" => parse_const(&mut input, false), + "func" => parse_func(&mut input, false), + "type" => parse_type(&mut input, false), + "mod" => parse_mod(&mut input, false), + "from" | "import" => parse_import(&mut input), + "block" => parse_block(&mut input), + "static" => parse_static(&mut input), + "for" => parse_for(&mut input), + "while" => parse_while(&mut input), + "loop" => parse_loop(&mut input), + "if" => parse_if(&mut input), + "when" => parse_when(&mut input), + "try" => parse_try(&mut input), + "match" => parse_match(&mut input), + _ => parse_line(&mut input), + }, + _ => parse_line(&mut input), + } +} + +// Const ::= 'pub'? 'const' Pattern Annotation? '=' Expr +fn parse_const(input: &mut MultiPeek>, public: bool) -> Result { todo!() } +// Func ::= 'pub'? ('func' | 'proc') Ident Generics? Parameters? (':' TypeDesc) '=' Body +fn parse_func(input: &mut MultiPeek>, public: bool) -> Result { todo!() } +// TypeDecl ::= 'pub'? 'type' Pattern Generics? '=' 'distinct'? 'ref'? TypeDesc +fn parse_type(input: &mut MultiPeek>, public: bool) -> Result { todo!() } +// Mod ::= 'pub'? 'mod' Ident ':' Body +fn parse_mod(input: &mut MultiPeek>, public: bool) -> Result { todo!() } + +// Let ::= 'let' Pattern Annotation? '=' Expr +fn parse_let(input: &mut MultiPeek>) -> Result { todo!() } +// Var ::= 'var' Pattern Annotation? ('=' Expr)? +fn parse_var(input: &mut MultiPeek>) -> Result { todo!() } +// Import ::= ('from' Ident)? 'import' Ident (',' Ident)* ('as' Ident)? +fn parse_import(input: &mut MultiPeek>) -> Result { todo!() } +// Block ::= 'block' Ident? ':' Body +fn parse_block(input: &mut MultiPeek>) -> Result { todo!() } +// Static ::= 'static' ':' Body +fn parse_static(input: &mut MultiPeek>) -> Result { todo!() } +// For ::= 'for' Pattern 'in' Expr ':' Body +fn parse_for(input: &mut MultiPeek>) -> Result { todo!() } +// While ::= 'while' Expr ':' Body +fn parse_while(input: &mut MultiPeek>) -> Result { todo!() } +// Loop ::= 'loop' ':' Body +fn parse_loop(input: &mut MultiPeek>) -> Result { todo!() } +// If ::= 'if' Expr ':' Body ('elif' Expr ':' Body)* ('else' ':' Body)? +fn parse_if(input: &mut MultiPeek>) -> Result { todo!() } +// When ::= 'when' Expr ':' Body ('elif' Expr ':' Body)* ('else' ':' Body)? +fn parse_when(input: &mut MultiPeek>) -> Result { todo!() } +// Try ::= 'try' ':' Body ('except' Ident (',' Ident)* ':' Body) ('finally' ':' Body)? +fn parse_try(input: &mut MultiPeek>) -> Result { todo!() } +// Match ::= 'match' Expr ('of' Pattern (',' Pattern)* ('where' Expr)? ':' Body)+ +fn parse_match(input: &mut MultiPeek>) -> Result { todo!() } + +fn parse_line(input: &mut MultiPeek>) -> Result { todo!() } + +// lex, parse, expand, compile? -- cgit v1.2.3-70-g09d2