use crate::ast::*; // (λx:T.y): T z pub fn parse(input: &str) -> Expression { return parse_str(input).expect("invalid expression"); } /// Parses a lambda-calculus-like language into an AST. pub fn parse_str(input: &str) -> Result> { // this is kinda awful // i miss my nim pegs peg::parser!{ grammar lambda() for str { rule identifier() -> String = i:['a'..='z' | 'A'..='Z' | '0'..='9']+ { i.iter().collect::() } rule constant() -> Expression = p:"-"? c:['0'..='9']+ { let value = c.iter().collect::().parse::().unwrap(); Expression::Constant { term: Term { val: if let Some(_) = p { value.wrapping_neg() } else { value }, kind: Type::Empty } } } // fucking awful but i don't know another way // k:("empty" / "unit" / etc) returns () // and i can't seem to match and raise a parse error // so ¯\_(ツ)_/¯ rule empty() -> Type = k:"empty" {Type::Empty} rule unit() -> Type = k:"unit" {Type::Unit} rule boolean() -> Type = k:"bool" {Type::Boolean} rule natural() -> Type = k:"nat" {Type::Natural} rule integer() -> Type = k:"int" {Type::Integer} rule kind() -> Type = k:(empty() / unit() / boolean() / natural() / integer()) { k } rule annotation() -> Expression = e:(conditional() / abstraction() / application() / constant() / variable()) " "* ":" " "* k:kind() { Expression::Annotation { expr: Box::new(e), kind: k } } rule variable() -> Expression = v:identifier() { Expression::Variable { id: v } } rule abstraction() -> Expression = ("λ" / "lambda ") " "* p:identifier() " "* "." " "* f:expression() { Expression::Abstraction { param: p, func: Box::new(f) } } // fixme: more cases should parse, but how? rule application() -> Expression = "(" f:(annotation() / abstraction()) ")" " "* a:expression() { Expression::Application { func: Box::new(f), arg: Box::new(a) } } rule conditional() -> Expression = "if" " "+ c:expression() " "+ "then" " "+ t:expression() " "+ "else" " "+ e:expression() { Expression::Conditional { if_cond: Box::new(c), if_then: Box::new(t), if_else: Box::new(e) } } pub rule expression() -> Expression = e:(conditional() / annotation() / abstraction() / application() / constant() / variable()) { e } pub rule ast() -> Vec = expression() ** ("\n"+) } } // assert_eq!(lambda::expression("(λx:bool.x)").unwrap(), lambda::expression("(λx: bool . x)").unwrap()); return lambda::expression(input.trim()); } /// Parses a Nim-like language into an AST. #[allow(unused_variables)] pub fn parse_file(path: &str) -> Vec { todo!(); } /// Converts a whitespace-indented language into a regular bracketed language for matching with PEGs /// Then, tokens are known to be separated by [\n ]+ (except strings. problem for later.) pub fn lex(input: &str) -> Result { #[derive(Eq, PartialEq)] enum Previous { Start, Block, Line, } struct State { blank: bool, // is the line entirely whitespace so far? level: usize, // current indentation level count: usize, // current whitespace count previous: Previous, } let indent_size: usize = 2; let mut state = State { blank: true, level: 0, count: 0, previous: Previous::Start }; let mut buffer = String::new(); let mut result = String::new(); for c in input.chars() { match c { '\n' => { if !buffer.is_empty() { if state.count == state.level { if state.previous != Previous::Start { result.push(';'); result.push('\n'); } state.previous = Previous::Line; } else if state.level + indent_size == state.count { result.push(' '); result.push('{'); result.push('\n'); state.level = state.count; state.previous = Previous::Line; } else if state.count > state.level + indent_size { return Err("invalid jump in indentation"); } else if state.count % indent_size != 0 { return Err("incorrect indentation offset, must be a multiple of indent_size"); } else if state.level > state.count { while state.level > state.count { if state.previous == Previous::Line { result.push(';'); } state.level -= indent_size; result.push('\n'); result.push_str(" ".repeat(state.level).as_str()); result.push('}'); state.previous = Previous::Block; } result.push('\n'); } else { return Err("unknown indentation error"); } result.push_str(" ".repeat(state.count).as_str()); result.push_str(&buffer); state.count = 0; buffer.clear(); } state.blank = true; }, ' ' if state.blank => { state.count += 1; }, _ => { if state.blank { state.blank = false; } buffer.push(c); }, } } if state.previous == Previous::Line { result.push(';'); } while state.level != 0 { state.level -= 2; result.push('\n'); result.push_str(" ".repeat(state.level).as_str()); result.push('}'); } return Ok(result); }