typeclasses

+ +

~~also~~ bidirectional typechecking and subtyping

ten slides, five minutes

the lambda calculus

no time, others also did this
rust good bottom text
algebraic data types <<3
bidirectional typechecking
infer, check, execute
bounce back and forth between infer and check
scaling, first class functions: a problem for the future

pub type Value = u8;
+
+pub struct Term {
+  pub val: Value,
+  pub kind: Type, // for casting
+}
+
+pub type Identifier = String;
+pub type Context = HashMap<Identifier, Term>;
+
+#[derive(Clone, PartialEq, Eq)]
+pub enum Expression {
+  Annotation {
+    expr: Box<Expression>, 
+    kind: Type
+  },
+  Constant {
+    term: Term
+  },
+  Variable {
+    id: Identifier
+  },
+  Abstraction {
+    param: Identifier, 
+    func: Box<Expression>
+  },
+  Application {
+    func: Box<Expression>, 
+    arg: Box<Expression>
+  },
+  Conditional {
+    if_cond: Box<Expression>, 
+    if_then: Box<Expression>, 
+    if_else: Box<Expression>
+  }
+}
+

parsing

+ +

pegs are magic

+ +

very nice for simple stuff

+ +

/// Parses a lambda-calculus-like language into an AST.
+pub fn parse_str(input: &str) -> Result<Expression, peg::error::ParseError<peg::str::LineCol>>{
+    // this is kinda gross
+    // i miss my nim pegs
+    peg::parser!{
+        grammar lambda() for str {
+            rule identifier() -> String
+            = i:['a'..='z' | 'A'..='Z' | '0'..='9']+ {
+                i.iter().collect::<String>()
+            }
+            rule constant() -> Expression
+            = p:"-"? c:['0'..='9']+ {
+                let value = c.iter().collect::<String>().parse::<Value>().unwrap();
+                Expression::Constant {
+                    term: Term {
+                        val: if let Some(_) = p {
+                            value.wrapping_neg()
+                        } else {
+                            value
+                        },
+                        kind: Type::Empty
+                    }
+                }
+            }
+            rule primitive() -> Type
+            = k:$("empty" / "unit" / "bool" / "nat" / "int") {
+                match k {
+                    "empty" => Type::Empty,
+                    "unit" => Type::Unit,
+                    "bool" => Type::Boolean,
+                    "nat" => Type::Natural,
+                    "int" => Type::Integer,
+                    _ => Type::Empty
+                }
+            }
+            rule function() -> Type = "(" f:kind() " "* "->" " "* t:kind() ")" {
+                Type::Function { from: Box::new(f), to: Box::new(t) }
+            }
+            rule kind() -> Type
+             = k:(function() / primitive()) {
+                k
+            }
+            rule annotation() -> Expression
+            = e:(conditional() / abstraction() / application() / constant() / variable()) " "* ":" " "* k:kind() {
+                Expression::Annotation {
+                    expr: Box::new(e),
+                    kind: k
+                }
+            }
+            rule variable() -> Expression
+            = v:identifier() {
+                Expression::Variable {
+                    id: v
+                }
+            }
+            rule abstraction() -> Expression
+            = ("λ" / "lambda") " "* p:identifier() " "* "." " "* f:expression() {
+                Expression::Abstraction {
+                    param: p,
+                    func: Box::new(f)
+                }
+            }
+            rule application() -> Expression
+            = "(" f:(annotation() / abstraction()) ")" " "* a:expression() {
+                Expression::Application {
+                    func: Box::new(f),
+                    arg: Box::new(a)
+                }
+            }
+            rule conditional() -> Expression
+            = "if" " "+ c:expression() " "+ "then" " "+ t:expression() " "+ "else" " "+ e:expression() {
+                Expression::Conditional {
+                    if_cond: Box::new(c),
+                    if_then: Box::new(t),
+                    if_else: Box::new(e)
+                }
+            }
+            pub rule expression() -> Expression
+            = e:(conditional() / annotation() / abstraction() / application() / constant() / variable()) {
+                e
+            }
+            pub rule ast() -> Vec<Expression>
+            = expression() ** ("\n"+)
+        }
+    }
+    return lambda::expression(input.trim());
+}

oh god

why did i decide to do this

+ +

/// Parses a simple language with bracket-based indentation and end-of-term semicolons.
+#[allow(unused_variables)]
+pub fn parse_lang(input: &str) -> Result<Vec<Expression>, peg::error::ParseError<peg::str::LineCol>> {
+  peg::parser! {
+    grammar puck() for str {
+      // whitespace
+      rule w() = ("\n" / " ")+
+      // todo: multiple parameters pls
+      rule abs() -> Expression
+      = "func" w() n:ident() w()? "(" p:ident() ")"
+        w()? ":" w()? k:function() w() "=" w() "{" w() f:expr() w() "}" {
+        Expression::Annotation {
+          expr: Box::new(Expression::Abstraction { param: p, func: Box::new(f) }),
+          kind: k
+        }
+      }
+      // fixme: this requires, uh, refactoring the ast...
+      rule app() -> Expression
+      = f:ident() "(" a:expr() ")" {
+        Expression::Application {
+          func: Box::new(Expression::Variable { 
+            id: f 
+          }),
+          arg: Box::new(a)
+        }
+      }
+      rule cond() -> Expression
+      = "if" w() c:expr() w() "=" w() "{" w() t:expr() w() "};" w() 
+        "else" w() "=" w() "{" w() e:expr() w() "}" {
+        Expression::Conditional {
+          if_cond: Box::new(c),
+          if_then: Box::new(t),
+          if_else: Box::new(e)
+        }
+      }
+      // fixme: cannot say e:(expr()), left-recursion issue
+      rule ann() -> Expression
+      = e:(cond() / abs() / app() / cons() / var()) w()? ":" w() k:kind() {
+        Expression::Annotation {
+          expr: Box::new(e),
+          kind: k
+        }
+      }
+      // identifiers
+      rule ident() -> String = i:['a'..='z' | 'A'..='Z' | '0'..='9']+ {
+        i.iter().collect::<String>()
+      }
+      rule var() -> Expression
+      = v:ident() {
+        Expression::Variable {
+          id: v
+        }
+      }
+      // constants
+      rule cons() -> Expression = p:"-"? c:['0'..='9']+ {
+        let value = c.iter().collect::<String>().parse::<Value>().unwrap();
+        Expression::Constant {
+          term: Term {
+            val: if let Some(_) = p {
+              value.wrapping_neg()
+            } else {
+              value
+            },
+            kind: Type::Empty // fixme
+          }
+        }
+      }
+      // types
+      rule primitive() -> Type = k:$("empty" / "unit" / "bool" / "nat" / "int") {
+        match k {
+          "empty" => Type::Empty,
+          "unit" => Type::Unit,
+          "bool" => Type::Boolean,
+          "nat" => Type::Natural,
+          "int" => Type::Integer,
+          _ => Type::Empty // never happens
+        }
+      }
+      // fixme: parenthesis necessary, left-recursion issue
+      rule function() -> Type = "(" w()? f:kind() w()? "->" w()? t:kind() w()? ")" {
+        Type::Function { from: Box::new(f), to: Box::new(t) }
+      }
+      // todo: records, etc
+      rule kind() -> Type
+       = k:(function() / primitive()) {
+        k
+      }
+      pub rule expr() -> Expression
+      = e:(ann() / cond() / abs() / app() / cons() / var()) ";" {
+        e
+      }
+      pub rule file() -> Vec<Expression>
+      = expr() ++ "\n"
+    }
+  }
+  return puck::file(input);
+}
+

func negate(x): (bool -> bool) =
+  if x: 0
+  else: 1
+
+func fib(x): (int -> int) =
+  if eq(x, 0):
+    0
+  else:
+    if eq(x, 1):
+      1
+    else:       # comment
+      add(fib(sub(x, 1)), 
+          fib(sub(x, 2)))
+
+negate(negate(1))
+fib(5)
+

/// Converts a whitespace-indented language 
+/// into a bracketed language for PEG matching.
+pub fn lex(input: &str) -> 
+    Result<String, &'static str> {
+  #[derive(Eq, PartialEq)]
+  enum Previous {
+    Start,
+    Block,
+    Line,
+  }
+  struct State {
+    blank: bool, // entirely whitespace so far?
+    level: usize, // current indentation level
+    count: usize, // current whitespace count
+    previous: Previous,
+    comment: bool // current line a comment?
+  }
+  let indent_size: usize = 2;
+
+  let mut state = State { 
+    blank: true, 
+    level: 0, 
+    count: 0, 
+    previous: Previous::Start, 
+    comment: false 
+  };
+  let mut buffer = String::new();
+  let mut result = String::new();
+
+  // wow lexers are hard
+  for c in input.chars() {
+    match c {
+      '\n' => {
+      if !buffer.is_empty() {
+        if state.count == state.level {
+          if state.previous != 
+            Previous::Start {
+            result.push(';');
+            result.push('\n');
+          }
+          state.previous = Previous::Line;
+        } else if state.level + indent_size
+               == state.count {
+          result.push(' ');
+          result.push('{');
+          result.push('\n');
+          state.level = state.count;
+          state.previous = Previous::Line;
+        } else if state.count > 
+                  state.level + indent_size {
+          return Err("invalid indent jump");
+        } else if state.count % indent_size 
+                                       != 0 {
+          return Err("incorrect indent offset");
+        } else if state.level > state.count {
+          while state.level > state.count {
+            if state.previous == 
+               Previous::Line {
+              result.push(';');
+            }
+            state.level -= indent_size;
+            result.push('\n');
+            result.push_str(
+                   &" ".repeat(state.level));
+            result.push('}');
+            result.push(';');
+            state.previous = Previous::Block;
+          }
+          result.push('\n');
+        } else {
+          return Err("unknown indentation");
+        }
+
+        result.push_str(
+               &" ".repeat(state.count));
+        result.push_str(&buffer);
+
+        state.count = 0;
+        state.comment = false;
+        buffer.clear();
+        }
+        state.blank = true;
+      },
+      ' ' if state.blank => {
+        state.count += 1;
+      },
+      '#' => {
+        state.blank = false;
+        state.comment = true;
+      },
+      _ => {
+        if state.blank {
+          state.blank = false;
+        }
+        if !state.comment {
+          buffer.push(c);
+        }
+      },
+    }
+  }
+  if state.previous == Previous::Line {
+    result.push(';');
+  }
+  while state.level != 0 {
+    state.level -= 2;
+    result.push('\n');
+    result.push_str(&" ".repeat(state.level));
+    result.push('}');
+    result.push(';');
+  }
+  return Ok(result);
+}

don't write a parser

i now fully appreciate the role of lexers, whitespace and PEGs do *not* mix

subtyping

/// The subtyping relation between any two types.
+pub fn subtype(is: &Type, of: &Type) -> bool {
+    match (is, of) {
+        (Type::Record(is_fields), Type::Record(of_fields)) => {
+            // width, depth, and permutation
+            for (key, of_value) in of_fields {
+                match is_fields.get(key) {
+                    Some(is_value) => if !subtype(is_value, of_value) {
+                        return false;
+                    },
+                    None => return false
+                }
+            }
+            return true;
+        },
+        (Type::Function { from: is_from, to: is_to }, 
+         Type::Function { from: of_from, to: of_to }) => {
+            subtype(of_from, is_from) && subtype(is_to, of_to)
+        }
+        (Type::Natural, Type::Integer) => true, // obviously not, but let's pretend
+        (_, Type::Empty) => true,
+        (Type::Error, _) => true,
+        (_, _) if is == of => true,
+        (_, _) => false,
+    }
+}
+

advanced ish types

function types
structs/records/products: three types for the price of one! (note: remove this, this isn't funny) +
subtyping determined with a subtype function
drop-in replacement for type equality (==) in impl
if branches must be mutual subtypes

pub type Identifier = String;
+pub type Context = 
+    HashMap<Identifier, Term>;
+
+pub type Value = u64;
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum Type {
+    Empty,  // top
+    Error,  // bottom
+    Unit,
+    Boolean,
+    Natural,
+    Integer,
+    // Float,
+    // String,
+    Enum(Vec<Type>),
+    Record(HashMap<Identifier, Type>),
+    Function{from: Box<Type>, 
+             to: Box<Type>
+    },
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct Term {
+    pub val: Value,
+    pub kind: Type,
+}
+

future plans
+

complete and test subtyping
extend system with additional higher-level types +
- enums
- actually distinct naturals and integers
- floats
- strings (ugh), vecs, collections of data
+
type classes (still really want to implement), needs: +
- a global context + first class functions
- resolving functions via identifiers
+
then, write another parser

Questions?
+

+ +