also bidirectional typechecking and subtyping
ten slides, five minutes
pub type Value = u8;
pub struct Term {
pub val: Value,
pub kind: Type, // for casting
}
pub type Identifier = String;
pub type Context = HashMap<Identifier, Term>;
#[derive(Clone, PartialEq, Eq)]
pub enum Expression {
Annotation {
expr: Box<Expression>,
kind: Type
},
Constant {
term: Term
},
Variable {
id: Identifier
},
Abstraction {
param: Identifier,
func: Box<Expression>
},
Application {
func: Box<Expression>,
arg: Box<Expression>
},
Conditional {
if_cond: Box<Expression>,
if_then: Box<Expression>,
if_else: Box<Expression>
}
}
pegs are magic
very nice for simple stuff
/// Parses a lambda-calculus-like language into an AST.
pub fn parse_str(input: &str) -> Result<Expression, peg::error::ParseError<peg::str::LineCol>>{
// this is kinda gross
// i miss my nim pegs
peg::parser!{
grammar lambda() for str {
rule identifier() -> String
= i:['a'..='z' | 'A'..='Z' | '0'..='9']+ {
i.iter().collect::<String>()
}
rule constant() -> Expression
= p:"-"? c:['0'..='9']+ {
let value = c.iter().collect::<String>().parse::<Value>().unwrap();
Expression::Constant {
term: Term {
val: if let Some(_) = p {
value.wrapping_neg()
} else {
value
},
kind: Type::Empty
}
}
}
rule primitive() -> Type
= k:$("empty" / "unit" / "bool" / "nat" / "int") {
match k {
"empty" => Type::Empty,
"unit" => Type::Unit,
"bool" => Type::Boolean,
"nat" => Type::Natural,
"int" => Type::Integer,
_ => Type::Empty
}
}
rule function() -> Type = "(" f:kind() " "* "->" " "* t:kind() ")" {
Type::Function { from: Box::new(f), to: Box::new(t) }
}
rule kind() -> Type
= k:(function() / primitive()) {
k
}
rule annotation() -> Expression
= e:(conditional() / abstraction() / application() / constant() / variable()) " "* ":" " "* k:kind() {
Expression::Annotation {
expr: Box::new(e),
kind: k
}
}
rule variable() -> Expression
= v:identifier() {
Expression::Variable {
id: v
}
}
rule abstraction() -> Expression
= ("λ" / "lambda") " "* p:identifier() " "* "." " "* f:expression() {
Expression::Abstraction {
param: p,
func: Box::new(f)
}
}
rule application() -> Expression
= "(" f:(annotation() / abstraction()) ")" " "* a:expression() {
Expression::Application {
func: Box::new(f),
arg: Box::new(a)
}
}
rule conditional() -> Expression
= "if" " "+ c:expression() " "+ "then" " "+ t:expression() " "+ "else" " "+ e:expression() {
Expression::Conditional {
if_cond: Box::new(c),
if_then: Box::new(t),
if_else: Box::new(e)
}
}
pub rule expression() -> Expression
= e:(conditional() / annotation() / abstraction() / application() / constant() / variable()) {
e
}
pub rule ast() -> Vec<Expression>
= expression() ** ("\n"+)
}
}
return lambda::expression(input.trim());
}
why did i decide to do this
/// Parses a simple language with bracket-based indentation and end-of-term semicolons.
#[allow(unused_variables)]
pub fn parse_lang(input: &str) -> Result<Vec<Expression>, peg::error::ParseError<peg::str::LineCol>> {
peg::parser! {
grammar puck() for str {
// whitespace
rule w() = ("\n" / " ")+
// todo: multiple parameters pls
rule abs() -> Expression
= "func" w() n:ident() w()? "(" p:ident() ")"
w()? ":" w()? k:function() w() "=" w() "{" w() f:expr() w() "}" {
Expression::Annotation {
expr: Box::new(Expression::Abstraction { param: p, func: Box::new(f) }),
kind: k
}
}
// fixme: this requires, uh, refactoring the ast...
rule app() -> Expression
= f:ident() "(" a:expr() ")" {
Expression::Application {
func: Box::new(Expression::Variable {
id: f
}),
arg: Box::new(a)
}
}
rule cond() -> Expression
= "if" w() c:expr() w() "=" w() "{" w() t:expr() w() "};" w()
"else" w() "=" w() "{" w() e:expr() w() "}" {
Expression::Conditional {
if_cond: Box::new(c),
if_then: Box::new(t),
if_else: Box::new(e)
}
}
// fixme: cannot say e:(expr()), left-recursion issue
rule ann() -> Expression
= e:(cond() / abs() / app() / cons() / var()) w()? ":" w() k:kind() {
Expression::Annotation {
expr: Box::new(e),
kind: k
}
}
// identifiers
rule ident() -> String = i:['a'..='z' | 'A'..='Z' | '0'..='9']+ {
i.iter().collect::<String>()
}
rule var() -> Expression
= v:ident() {
Expression::Variable {
id: v
}
}
// constants
rule cons() -> Expression = p:"-"? c:['0'..='9']+ {
let value = c.iter().collect::<String>().parse::<Value>().unwrap();
Expression::Constant {
term: Term {
val: if let Some(_) = p {
value.wrapping_neg()
} else {
value
},
kind: Type::Empty // fixme
}
}
}
// types
rule primitive() -> Type = k:$("empty" / "unit" / "bool" / "nat" / "int") {
match k {
"empty" => Type::Empty,
"unit" => Type::Unit,
"bool" => Type::Boolean,
"nat" => Type::Natural,
"int" => Type::Integer,
_ => Type::Empty // never happens
}
}
// fixme: parenthesis necessary, left-recursion issue
rule function() -> Type = "(" w()? f:kind() w()? "->" w()? t:kind() w()? ")" {
Type::Function { from: Box::new(f), to: Box::new(t) }
}
// todo: records, etc
rule kind() -> Type
= k:(function() / primitive()) {
k
}
pub rule expr() -> Expression
= e:(ann() / cond() / abs() / app() / cons() / var()) ";" {
e
}
pub rule file() -> Vec<Expression>
= expr() ++ "\n"
}
}
return puck::file(input);
}
func negate(x): (bool -> bool) =
if x: 0
else: 1
func fib(x): (int -> int) =
if eq(x, 0):
0
else:
if eq(x, 1):
1
else: # comment
add(fib(sub(x, 1)),
fib(sub(x, 2)))
negate(negate(1))
fib(5)
/// Converts a whitespace-indented language
/// into a bracketed language for PEG matching.
pub fn lex(input: &str) ->
Result<String, &'static str> {
#[derive(Eq, PartialEq)]
enum Previous {
Start,
Block,
Line,
}
struct State {
blank: bool, // entirely whitespace so far?
level: usize, // current indentation level
count: usize, // current whitespace count
previous: Previous,
comment: bool // current line a comment?
}
let indent_size: usize = 2;
let mut state = State {
blank: true,
level: 0,
count: 0,
previous: Previous::Start,
comment: false
};
let mut buffer = String::new();
let mut result = String::new();
// wow lexers are hard
for c in input.chars() {
match c {
'\n' => {
if !buffer.is_empty() {
if state.count == state.level {
if state.previous !=
Previous::Start {
result.push(';');
result.push('\n');
}
state.previous = Previous::Line;
} else if state.level + indent_size
== state.count {
result.push(' ');
result.push('{');
result.push('\n');
state.level = state.count;
state.previous = Previous::Line;
} else if state.count >
state.level + indent_size {
return Err("invalid indent jump");
} else if state.count % indent_size
!= 0 {
return Err("incorrect indent offset");
} else if state.level > state.count {
while state.level > state.count {
if state.previous ==
Previous::Line {
result.push(';');
}
state.level -= indent_size;
result.push('\n');
result.push_str(
&" ".repeat(state.level));
result.push('}');
result.push(';');
state.previous = Previous::Block;
}
result.push('\n');
} else {
return Err("unknown indentation");
}
result.push_str(
&" ".repeat(state.count));
result.push_str(&buffer);
state.count = 0;
state.comment = false;
buffer.clear();
}
state.blank = true;
},
' ' if state.blank => {
state.count += 1;
},
'#' => {
state.blank = false;
state.comment = true;
},
_ => {
if state.blank {
state.blank = false;
}
if !state.comment {
buffer.push(c);
}
},
}
}
if state.previous == Previous::Line {
result.push(';');
}
while state.level != 0 {
state.level -= 2;
result.push('\n');
result.push_str(&" ".repeat(state.level));
result.push('}');
result.push(';');
}
return Ok(result);
}
i now fully appreciate the role of lexers, whitespace and PEGs do *not* mix
/// The subtyping relation between any two types.
pub fn subtype(is: &Type, of: &Type) -> bool {
match (is, of) {
(Type::Record(is_fields), Type::Record(of_fields)) => {
// width, depth, and permutation
for (key, of_value) in of_fields {
match is_fields.get(key) {
Some(is_value) => if !subtype(is_value, of_value) {
return false;
},
None => return false
}
}
return true;
},
(Type::Function { from: is_from, to: is_to },
Type::Function { from: of_from, to: of_to }) => {
subtype(of_from, is_from) && subtype(is_to, of_to)
}
(Type::Natural, Type::Integer) => true, // obviously not, but let's pretend
(_, Type::Empty) => true,
(Type::Error, _) => true,
(_, _) if is == of => true,
(_, _) => false,
}
}
pub type Identifier = String;
pub type Context =
HashMap<Identifier, Term>;
pub type Value = u64;
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Type {
Empty, // top
Error, // bottom
Unit,
Boolean,
Natural,
Integer,
// Float,
// String,
Enum(Vec<Type>),
Record(HashMap<Identifier, Type>),
Function{from: Box<Type>,
to: Box<Type>
},
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Term {
pub val: Value,
pub kind: Type,
}
no