typeclasses

also bidirectional typechecking and subtyping

ten slides, five minutes

the lambda calculus

  • no time, others also did this
  • rust good bottom text
  • algebraic data types <<3
  • bidirectional typechecking
  • infer, check, execute
  • bounce back and forth between infer and check
  • scaling, first class functions:   a problem for the future
pub type Value = u8;

pub struct Term {
  pub val: Value,
  pub kind: Type, // for casting
}

pub type Identifier = String;
pub type Context = HashMap<Identifier, Term>;

#[derive(Clone, PartialEq, Eq)]
pub enum Expression {
  Annotation {
    expr: Box<Expression>, 
    kind: Type
  },
  Constant {
    term: Term
  },
  Variable {
    id: Identifier
  },
  Abstraction {
    param: Identifier, 
    func: Box<Expression>
  },
  Application {
    func: Box<Expression>, 
    arg: Box<Expression>
  },
  Conditional {
    if_cond: Box<Expression>, 
    if_then: Box<Expression>, 
    if_else: Box<Expression>
  }
}

parsing

pegs are magic

very nice for simple stuff

/// Parses a lambda-calculus-like language into an AST.
pub fn parse_str(input: &str) -> Result<Expression, peg::error::ParseError<peg::str::LineCol>>{
    // this is kinda gross
    // i miss my nim pegs
    peg::parser!{
        grammar lambda() for str {
            rule identifier() -> String
            = i:['a'..='z' | 'A'..='Z' | '0'..='9']+ {
                i.iter().collect::<String>()
            }
            rule constant() -> Expression
            = p:"-"? c:['0'..='9']+ {
                let value = c.iter().collect::<String>().parse::<Value>().unwrap();
                Expression::Constant {
                    term: Term {
                        val: if let Some(_) = p {
                            value.wrapping_neg()
                        } else {
                            value
                        },
                        kind: Type::Empty
                    }
                }
            }
            rule primitive() -> Type
            = k:$("empty" / "unit" / "bool" / "nat" / "int") {
                match k {
                    "empty" => Type::Empty,
                    "unit" => Type::Unit,
                    "bool" => Type::Boolean,
                    "nat" => Type::Natural,
                    "int" => Type::Integer,
                    _ => Type::Empty
                }
            }
            rule function() -> Type = "(" f:kind() " "* "->" " "* t:kind() ")" {
                Type::Function { from: Box::new(f), to: Box::new(t) }
            }
            rule kind() -> Type
             = k:(function() / primitive()) {
                k
            }
            rule annotation() -> Expression
            = e:(conditional() / abstraction() / application() / constant() / variable()) " "* ":" " "* k:kind() {
                Expression::Annotation {
                    expr: Box::new(e),
                    kind: k
                }
            }
            rule variable() -> Expression
            = v:identifier() {
                Expression::Variable {
                    id: v
                }
            }
            rule abstraction() -> Expression
            = ("λ" / "lambda") " "* p:identifier() " "* "." " "* f:expression() {
                Expression::Abstraction {
                    param: p,
                    func: Box::new(f)
                }
            }
            rule application() -> Expression
            = "(" f:(annotation() / abstraction()) ")" " "* a:expression() {
                Expression::Application {
                    func: Box::new(f),
                    arg: Box::new(a)
                }
            }
            rule conditional() -> Expression
            = "if" " "+ c:expression() " "+ "then" " "+ t:expression() " "+ "else" " "+ e:expression() {
                Expression::Conditional {
                    if_cond: Box::new(c),
                    if_then: Box::new(t),
                    if_else: Box::new(e)
                }
            }
            pub rule expression() -> Expression
            = e:(conditional() / annotation() / abstraction() / application() / constant() / variable()) {
                e
            }
            pub rule ast() -> Vec<Expression>
            = expression() ** ("\n"+)
        }
    }
    return lambda::expression(input.trim());
}

oh god

why did i decide to do this

/// Parses a simple language with bracket-based indentation and end-of-term semicolons.
#[allow(unused_variables)]
pub fn parse_lang(input: &str) -> Result<Vec<Expression>, peg::error::ParseError<peg::str::LineCol>> {
  peg::parser! {
    grammar puck() for str {
      // whitespace
      rule w() = ("\n" / " ")+
      // todo: multiple parameters pls
      rule abs() -> Expression
      = "func" w() n:ident() w()? "(" p:ident() ")"
        w()? ":" w()? k:function() w() "=" w() "{" w() f:expr() w() "}" {
        Expression::Annotation {
          expr: Box::new(Expression::Abstraction { param: p, func: Box::new(f) }),
          kind: k
        }
      }
      // fixme: this requires, uh, refactoring the ast...
      rule app() -> Expression
      = f:ident() "(" a:expr() ")" {
        Expression::Application {
          func: Box::new(Expression::Variable { 
            id: f 
          }),
          arg: Box::new(a)
        }
      }
      rule cond() -> Expression
      = "if" w() c:expr() w() "=" w() "{" w() t:expr() w() "};" w() 
        "else" w() "=" w() "{" w() e:expr() w() "}" {
        Expression::Conditional {
          if_cond: Box::new(c),
          if_then: Box::new(t),
          if_else: Box::new(e)
        }
      }
      // fixme: cannot say e:(expr()), left-recursion issue
      rule ann() -> Expression
      = e:(cond() / abs() / app() / cons() / var()) w()? ":" w() k:kind() {
        Expression::Annotation {
          expr: Box::new(e),
          kind: k
        }
      }
      // identifiers
      rule ident() -> String = i:['a'..='z' | 'A'..='Z' | '0'..='9']+ {
        i.iter().collect::<String>()
      }
      rule var() -> Expression
      = v:ident() {
        Expression::Variable {
          id: v
        }
      }
      // constants
      rule cons() -> Expression = p:"-"? c:['0'..='9']+ {
        let value = c.iter().collect::<String>().parse::<Value>().unwrap();
        Expression::Constant {
          term: Term {
            val: if let Some(_) = p {
              value.wrapping_neg()
            } else {
              value
            },
            kind: Type::Empty // fixme
          }
        }
      }
      // types
      rule primitive() -> Type = k:$("empty" / "unit" / "bool" / "nat" / "int") {
        match k {
          "empty" => Type::Empty,
          "unit" => Type::Unit,
          "bool" => Type::Boolean,
          "nat" => Type::Natural,
          "int" => Type::Integer,
          _ => Type::Empty // never happens
        }
      }
      // fixme: parenthesis necessary, left-recursion issue
      rule function() -> Type = "(" w()? f:kind() w()? "->" w()? t:kind() w()? ")" {
        Type::Function { from: Box::new(f), to: Box::new(t) }
      }
      // todo: records, etc
      rule kind() -> Type
       = k:(function() / primitive()) {
        k
      }
      pub rule expr() -> Expression
      = e:(ann() / cond() / abs() / app() / cons() / var()) ";" {
        e
      }
      pub rule file() -> Vec<Expression>
      = expr() ++ "\n"
    }
  }
  return puck::file(input);
}
func negate(x): (bool -> bool) =
  if x: 0
  else: 1

func fib(x): (int -> int) =
  if eq(x, 0):
    0
  else:
    if eq(x, 1):
      1
    else:       # comment
      add(fib(sub(x, 1)), 
          fib(sub(x, 2)))

negate(negate(1))
fib(5)
/// Converts a whitespace-indented language 
/// into a bracketed language for PEG matching.
pub fn lex(input: &str) -> 
    Result<String, &'static str> {
  #[derive(Eq, PartialEq)]
  enum Previous {
    Start,
    Block,
    Line,
  }
  struct State {
    blank: bool, // entirely whitespace so far?
    level: usize, // current indentation level
    count: usize, // current whitespace count
    previous: Previous,
    comment: bool // current line a comment?
  }
  let indent_size: usize = 2;

  let mut state = State { 
    blank: true, 
    level: 0, 
    count: 0, 
    previous: Previous::Start, 
    comment: false 
  };
  let mut buffer = String::new();
  let mut result = String::new();

  // wow lexers are hard
  for c in input.chars() {
    match c {
      '\n' => {
      if !buffer.is_empty() {
        if state.count == state.level {
          if state.previous != 
            Previous::Start {
            result.push(';');
            result.push('\n');
          }
          state.previous = Previous::Line;
        } else if state.level + indent_size
               == state.count {
          result.push(' ');
          result.push('{');
          result.push('\n');
          state.level = state.count;
          state.previous = Previous::Line;
        } else if state.count > 
                  state.level + indent_size {
          return Err("invalid indent jump");
        } else if state.count % indent_size 
                                       != 0 {
          return Err("incorrect indent offset");
        } else if state.level > state.count {
          while state.level > state.count {
            if state.previous == 
               Previous::Line {
              result.push(';');
            }
            state.level -= indent_size;
            result.push('\n');
            result.push_str(
                   &" ".repeat(state.level));
            result.push('}');
            result.push(';');
            state.previous = Previous::Block;
          }
          result.push('\n');
        } else {
          return Err("unknown indentation");
        }

        result.push_str(
               &" ".repeat(state.count));
        result.push_str(&buffer);

        state.count = 0;
        state.comment = false;
        buffer.clear();
        }
        state.blank = true;
      },
      ' ' if state.blank => {
        state.count += 1;
      },
      '#' => {
        state.blank = false;
        state.comment = true;
      },
      _ => {
        if state.blank {
          state.blank = false;
        }
        if !state.comment {
          buffer.push(c);
        }
      },
    }
  }
  if state.previous == Previous::Line {
    result.push(';');
  }
  while state.level != 0 {
    state.level -= 2;
    result.push('\n');
    result.push_str(&" ".repeat(state.level));
    result.push('}');
    result.push(';');
  }
  return Ok(result);
}

don't write a parser

i now fully appreciate the role of lexers, whitespace and PEGs do *not* mix

subtyping

/// The subtyping relation between any two types.
pub fn subtype(is: &Type, of: &Type) -> bool {
    match (is, of) {
        (Type::Record(is_fields), Type::Record(of_fields)) => {
            // width, depth, and permutation
            for (key, of_value) in of_fields {
                match is_fields.get(key) {
                    Some(is_value) => if !subtype(is_value, of_value) {
                        return false;
                    },
                    None => return false
                }
            }
            return true;
        },
        (Type::Function { from: is_from, to: is_to }, 
         Type::Function { from: of_from, to: of_to }) => {
            subtype(of_from, is_from) && subtype(is_to, of_to)
        }
        (Type::Natural, Type::Integer) => true, // obviously not, but let's pretend
        (_, Type::Empty) => true,
        (Type::Error, _) => true,
        (_, _) if is == of => true,
        (_, _) => false,
    }
}

advanced ish types

  • function types
  • structs/records/products: three types for the price of one! (note: remove this, this isn't funny)
  • subtyping determined with a subtype function
  • drop-in replacement for type equality (==) in impl
  • if branches must be mutual subtypes
pub type Identifier = String;
pub type Context = 
    HashMap<Identifier, Term>;

pub type Value = u64;

#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Type {
    Empty,  // top
    Error,  // bottom
    Unit,
    Boolean,
    Natural,
    Integer,
    // Float,
    // String,
    Enum(Vec<Type>),
    Record(HashMap<Identifier, Type>),
    Function{from: Box<Type>, 
             to: Box<Type>
    },
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Term {
    pub val: Value,
    pub kind: Type,
}

future plans

  • complete and test subtyping
  • extend system with additional higher-level types
    • enums
    • actually distinct naturals and integers
    • floats
    • strings (ugh), vecs, collections of data
  • type classes (still really want to implement), needs:
    • a global context + first class functions
    • resolving functions via identifiers
  • then, write another parser

Questions?
 

no