diff options
author | JJ | 2024-05-17 00:40:34 +0000 |
---|---|---|
committer | JJ | 2024-05-17 00:40:34 +0000 |
commit | e04af86491d97b297406cc4cd0d77fbbfc3a94c4 (patch) | |
tree | 6a97523e328f6070ae201ea325beb26d09e5c430 /docs/book/print.html | |
parent | 2d531db8eda6dfb62c2710296b5aaa3de190ac35 (diff) |
docs: update website
Diffstat (limited to 'docs/book/print.html')
-rw-r--r-- | docs/book/print.html | 1105 |
1 files changed, 691 insertions, 414 deletions
diff --git a/docs/book/print.html b/docs/book/print.html index be7e7c8..8803537 100644 --- a/docs/book/print.html +++ b/docs/book/print.html @@ -8,7 +8,7 @@ <!-- Custom HTML head --> - + <meta name="description" content=""> <meta name="viewport" content="width=device-width, initial-scale=1"> <meta name="theme-color" content="#ffffff"> @@ -177,39 +177,55 @@ <h1 id="-puck---an-experimental-programming-language"><a class="header" href="#-puck---an-experimental-programming-language">🧚 puck - an experimental programming language</a></h1> <p>A place where I can make some bad decisions.</p> <p>Puck is an experimental, memory safe, structurally typed, interface-first, imperative programming language. -It aims to be clean and succinct while performant: inspired by the syntax and metaprogramming of <a href="https://nim-lang.org/">Nim</a>, the error handling of <a href="https://www.swift.org/">Swift</a>, the performance and safety guarantees of <a href="https://www.rust-lang.org/">Rust</a>, the async/await and comptime of <a href="https://ziglang.org/">Zig</a>, and the module system of <a href="https://ocaml.org/">OCaml</a>.</p> +It aims to be consistent and succinct while performant: inspired by the syntax and metaprogramming of <a href="https://nim-lang.org/">Nim</a>, the error handling of <a href="https://www.swift.org/">Swift</a>, the memory management of <a href="https://www.rust-lang.org/">Rust</a> and <a href="https://koka-lang.github.io/">Koka</a>, the async/await and comptime of <a href="https://ziglang.org/">Zig</a>, and the module system of <a href="https://ocaml.org/">OCaml</a>.</p> <details> -<summary><b>Example: Interfaces</b></summary> -<pre><code class="language-nim"># Note: These declarations are adapted from the standard prelude. +<summary><b>Example: Type Classes</b></summary> +<pre><code class="language-puck"># Note: These declarations are adapted from the standard prelude. ## The Result type. Represents either success or failure. pub type Result[T, E] = union Okay(T) Error(E) -## The Err interface. Useful for dynamically dispatching errors. -pub type Err = interface +## The Err class. Useful for dynamically dispatching errors. +pub type Err = class str(Self): str dbg(Self): str ## A Result type that uses dynamically dispatched errors. -## The Error may be any type implementing Err. +## The Error may be any type implementing the Err class. pub type Result[T] = Result[T, ref Err] -## Implements the dbg function for strings. -## As the str function is already defined for strings, +## Implements the `dbg` function for strings. +## As the `str` function is already defined for strings, ## this in turn means strings now implicitly implement Err. -pub func dbg(self: str) = "\"" & self & "\"" +pub func dbg(self: str) = "\"" & self & "\"" +</code></pre> +</details> +<details> +<summary><b>Example: Metaprogramming</b></summary> +<pre><code class="language-puck"># Note: These declarations are adapted from the standard prelude. + +## Syntactic sugar for dynamic result type declarations. +pub macro !(T: type) = + quote Result[`T`] + +## Indirect access. Propagates `Error`. +pub macro ?[T, E](self: Result[T, E]) = + quote + match `self` + of Okay(x) then x + of Error(e) then return Error(e) </code></pre> </details> <details open> <summary><b>Example: Pattern Matching</b></summary> -<pre><code class="language-nim">## Opens the std.tables module for unqualified use. +<pre><code class="language-puck">## Opens the std.tables module for unqualified use. use std.tables -pub type Value = string -pub type Ident = string -pub type Expr = ref union +pub type Value = str +pub type Ident = str +pub type Expr = ref union # tagged, algebraic unions Literal(Value) Variable(Ident) Abstraction(param: Ident, body: Expr) @@ -218,29 +234,50 @@ pub type Expr = ref union then_branch: Expr, else_branch: Expr) ## Evaluate an Expr down to a Value, or return an Error. -pub func eval(context: mut HashTable[Ident, Value], expr: Expr): Result[Value] - match expr - of Literal(value): Okay(value) - of Variable(ident): - context.get(ident) - .err("Could not find variable {} in context!".fmt(ident)) - of Application(body, arg): - if body of Abstraction(param, body as inner_body): +pub func eval(context: mut Table[Ident, Value], expr: lent Expr): Value! = + match expr # structural pattern matching and guards are supported but not shown + of Literal(value) then + Okay(value.clone) # ownership necessitates we explicitly clone + of Variable(ident) then + context.get(ident) # whitespace is significant but flexible + .err("Could not find variable {} in context!" + .fmt(ident)) # uniform function call syntax allows arbitrary piping/chaining + of Application(body, arg) then + if body of Abstraction(param, body as inner_body) then # compact matching with if context.set(param, context.clone.eval(arg)?) - context.eval(inner_body) - else: - Error("Expected Abstraction, found body {} and argument {}".fmt(body, arg)) - of Conditional(condition, then_branch, else_branch): - if context.clone.eval(condition)? == "true": + context.eval(inner_body) # all values must be handled: returns are implicit + else + Error("Expected Abstraction, found body {} and arg {}".fmt(body.clone, arg.clone)) + of Conditional(condition, then_branch, else_branch) then + if context.clone.eval(condition)? == "true" then context.eval(then_case) - else: + else context.eval(else_case) - of _: Error("Invalid expression {}".fmt(expr)) + of _ then Error("Invalid expression {}".fmt(expr)) </code></pre> </details> <details> <summary><b>Example: Modules</b></summary> -<pre><code class="language-nim">... +<pre><code class="language-puck"># The top-level module declaration can be elided if the file shares the same name. +pub mod tables = + ## The Table class. Any sort of table - no matter the underlying + ## representation - must implement these methods. + pub type Table[K, V] = class + get(lent Self, lent K): lent V? + get(mut Self, lent K): mut V? + set(mut Self, lent K, V): V? + pop(mut Self, lent K): V? + clear(mut Self) + size(lent Self): uint + init(varargs (K, V)): Self + + ... + + pub mod hashtable = + use std.hashes + + pub type HashTable[K, V] = struct + ... </code></pre> </details> <h2 id="why-puck"><a class="header" href="#why-puck">Why Puck?</a></h2> @@ -249,8 +286,8 @@ Don't use it. Everything is unimplemented and it will break underneath your feet <p>That said: in the future, once somewhat stabilized, reasons why you <em>would</em> use it would be for:</p> <ul> <li>The <strong>syntax</strong>, aiming to be flexible, predictable, and succinct, through the use of <em>uniform function call syntax</em> and significant whitespace</li> -<li>The <strong>type system</strong>, being modern and powerful with a strong emphasis on safety, optional and result types, algebraic data types, interfaces, and modules</li> -<li>The <strong>memory management system</strong>, implementing a model of strict ownership while allowing individual fallbacks to reference counts if so desired</li> +<li>The <strong>type system</strong>, being modern and powerful with a strong emphasis on safety, algebraic data types, optional and result types, first-class functions, generics, interfaces, and modules</li> +<li>The <strong>memory management system</strong>, implementing a model of strict ownership with an optimized reference counting escape hatch</li> <li>The <strong>metaprogramming</strong>, providing integrated macros capable of rewriting the abstract syntax tree before or after typechecking</li> <li>The <strong>interop system</strong>, allowing foreign functions to be usable with native semantics from a bevy of languages</li> </ul> @@ -278,14 +315,15 @@ Don't use it. Everything is unimplemented and it will break underneath your feet <li><a href="https://matklad.github.io/2023/01/25/next-rust-compiler.html">Notes on the next Rust compiler</a></li> </ul> <div style="break-before: page; page-break-before: always;"></div><h1 id="an-overview-of-puck"><a class="header" href="#an-overview-of-puck">An Overview of Puck</a></h1> -<p>Puck is an experimental, high-level, memory-safe, statically-typed, whitespace-sensitive, interface-oriented, imperative programming language with functional underpinnings. </p> -<p>It attempts to explore designs in making functional programming paradigms comfortable to those familiar with imperative and object-oriented languages, as well as deal with some more technical problems along the way, such as integrated refinement types and typesafe interop. </p> +<p>Puck is an experimental, high-level, memory-safe, statically-typed, whitespace-sensitive, interface-oriented, imperative programming language with functional underpinnings.</p> +<p>It attempts to explore designs in making functional programming paradigms comfortable to those familiar with imperative and object-oriented languages, as well as deal with some more technical problems along the way, such as integrated refinement types and typesafe interop.</p> <p>This is the language I keep in my head. It reflects the way I think and reason about code.</p> <p>I do hope others enjoy it.</p> +<h2 id="declarations-and-comments"><a class="header" href="#declarations-and-comments">Declarations and Comments</a></h2> <pre><code class="language-puck">let ident: int = 413 # type annotations are optional -var phrase = "Hello, world!" -const compile_time = when linux: "linux" else: "windows" +var phrase = "Hello, world!" +const compile_time = when linux then "linux" else "windows" </code></pre> <p>Variables may be mutable (<code>var</code>), immutable (<code>let</code>), or compile-time evaluated and immutable (<code>const</code>). Type annotations on variables and other bindings follow the name of the binding (with <code>: Type</code>), and are typically optional. @@ -294,8 +332,7 @@ The type system is comprehensive, and complex enough to warrant delaying full co <ul> <li><code>int</code>, <code>uint</code>: signed and unsigned integers <ul> -<li><code>i8</code>/<code>i16</code>/<code>i32</code>/<code>i64</code>/<code>i128</code>: their fixed-size counterparts</li> -<li><code>u8</code>/<code>u16</code>/<code>u32</code>/<code>u64</code>/<code>u128</code>: their fixed-size counterparts</li> +<li><code>i[\d+]</code>, <code>u[\d+]</code>: arbitrary fixed-size counterparts</li> </ul> </li> <li><code>float</code>, <code>decimal</code>: floating-point numbers @@ -305,30 +342,33 @@ The type system is comprehensive, and complex enough to warrant delaying full co </ul> </li> <li><code>byte</code>: an alias to <code>u8</code>, representing one byte</li> -<li><code>chr</code>: an alias to <code>u32</code>, representing one Unicode character</li> +<li><code>char</code>: an alias to <code>u32</code>, representing one Unicode character</li> <li><code>bool</code>: defined as <code>union[false, true]</code></li> -<li><code>array[T, S]</code>: primitive fixed-size (<code>S</code>) arrays</li> +<li><code>array[T, size]</code>: primitive fixed-size arrays</li> <li><code>list[T]</code>: dynamic lists</li> -<li><code>str</code>: mutable strings. internally a <code>list[byte]</code>, externally a <code>list[chr]</code></li> -<li><code>slice[T]</code>: borrowed "views" into the three types above</li> +<li><code>str</code>: mutable strings. internally a <code>list[byte]</code>, externally a <code>list[char]</code></li> +<li><code>slice[T]</code>: borrowed "views" into the three types above</li> </ul> <p>Comments are declared with <code>#</code> and run until the end of the line. Documentation comments are declared with <code>##</code> and may be parsed by language servers and other tooling. Multi-line comments are declared with <code>#[ ]#</code> and may be nested. Taking cues from the Lisp family of languages, any expression may be commented out with a preceding <code>#;</code>.</p> +<h2 id="functions-and-indentation"><a class="header" href="#functions-and-indentation">Functions and Indentation</a></h2> <pre><code class="language-puck"></code></pre> -<p>Functions are declared with the <code>func</code> keyword. They take an (optional) list of generic parameters (in brackets), an (optional) list of parameters (in parentheses), and <strong>must</strong> be annotated with a return type if they return a type. Every function parameter must be annotated with a type. Their type may optionally be prefixed with either <code>mut</code> or <code>static</code>: denoting a <em>mutable</em> type (types are copied into functions and thus immutable by default), or a <em>static</em> type (known to the compiler at compile time, and usable in <code>const</code> exprs). Generic parameters may each be optionally annotated with a type functioning as a <em>constraint</em>.</p> +<p>Functions are declared with the <code>func</code> keyword. They take an (optional) list of generic parameters (in brackets), an (optional) list of parameters (in parentheses), and <strong>must</strong> be annotated with a return type if they return a type. Every function parameter must be annotated with a type. Their type may optionally be prefixed with either <code>lent</code>, <code>mut</code> or <code>const</code>: denoting an immutable or mutable borrow (more on these later), or a <em>constant</em> type (known to the compiler at compile time, and usable in <code>const</code> exprs). Generic parameters may each be optionally annotated with a type functioning as a <em>constraint</em>.</p> <!-- Functions, constants, types, and modules may be optionally prefixed with a `pub` modifier denoting visibility outside the current scope / module. More on the module system later. --> -<p>Whitespace is significant but flexible: functions may be declared entirely on one line if so desired. A new level of indentation after certain tokens (<code>:</code>, <code>=</code>) denotes a new level of scope. There are some places where arbitrary indentation and line breaks are allowed - as a general rule of thumb, after operators, commas, and opening parentheses. The particular rules governing indentation may be found in the <a href="SYNTAX.html#indentation-rules">syntax guide</a>.</p> +<p>Whitespace is significant but flexible: functions may be declared entirely on one line if so desired. A new level of indentation after certain tokens (<code>=</code>, <code>do</code>, <code>then</code>) denotes a new level of scope. There are some places where arbitrary indentation and line breaks are allowed - as a general rule of thumb, after operators, commas, and opening parentheses. The particular rules governing indentation may be found in the <a href="SYNTAX.html#indentation-rules">syntax guide</a>.</p> +<h2 id="uniform-function-call-syntax"><a class="header" href="#uniform-function-call-syntax">Uniform Function Call Syntax</a></h2> <pre><code class="language-puck">func inc(self: list[int], by: int): list[int] = self.map(x => x + by) -print inc([1, 2, 3], len("four")) # 5, 6, 7 +print inc([1, 2, 3], len("four")) # 5, 6, 7 print [1, 2, 3].inc(1) # 2, 3, 4 print [1].len # 1 </code></pre> <p>Puck supports <em>uniform function call syntax</em>: and so any function may be called using the typical syntax for method calls, that is, the first parameter of any function may be appended with a <code>.</code> and moved to precede it, in the style of a typical method. (There are no methods in Puck. All functions are statically dispatched. This may change in the future.)</p> <p>This allows for a number of syntactic cleanups. Arbitrary functions with compatible types may be chained with no need for a special pipe operator. Object field access, module member access, and function calls are unified, reducing the need for getters and setters. Given a first type, IDEs using dot-autocomplete can fill in all the functions defined for that type. Programmers from object-oriented languages may find the lack of classes more bearable. UFCS is implemented in shockingly few languages, and so Puck joins the tiny club that previously consisted of just D and Nim.</p> +<h2 id="basic-types"><a class="header" href="#basic-types">Basic Types</a></h2> <pre><code class="language-puck"></code></pre> <p>Boolean logic and integer operations are standard and as one would expect out of a typed language: <code>and</code>, <code>or</code>, <code>xor</code>, <code>not</code>, <code>shl</code>, <code>shr</code>, <code>+</code>, <code>-</code>, <code>*</code>, <code>/</code>, <code><</code>, <code>></code>, <code><=</code>, <code>>=</code>, <code>div</code>, <code>mod</code>, <code>rem</code>. Notably:</p> <ul> @@ -336,88 +376,142 @@ print [1].len # 1 <li>integer division is expressed with the keyword <code>div</code> while floating point division uses <code>/</code></li> <li><code>%</code> is absent and replaced with distinct modulus and remainder operators</li> <li>boolean operators are bitwise and also apply to integers and floats</li> -<li>more operators are available via the standard library</li> +<li>more operators are available via the standard library (<code>exp</code> and <code>log</code>)</li> </ul> <p>The above operations are performed with <em>operators</em>, special functions that take a prefixed first argument and (often) a suffixed second argument. Custom operators may be implemented, but they must consist of only a combination of the symbols <code>=</code> <code>+</code> <code>-</code> <code>*</code> <code>/</code> <code><</code> <code>></code> <code>@</code> <code>$</code> <code>~</code> <code>&</code> <code>%</code> <code>|</code> <code>!</code> <code>?</code> <code>^</code> <code>\</code> for the purpose of keeping the grammar context-free. They are are declared identically to functions.</p> -<p>Term (in)equality is expressed with the <code>==</code> and <code>!=</code> operators. Type equality is expressed with <code>is</code>. Subtyping relations may be queried with <code>of</code>, which has the additional property of introducing new bindings in the current scope (more on this in the <a href="TYPES.html">types document</a>). <!-- Membership of collections is expressed with `in`, and is overloaded for most types. --></p> -<pre><code class="language-puck">let phrase: str = "I am a string! Wheeee! ✨" -for c in phrase: +<p>Term (in)equality is expressed with the <code>==</code> and <code>!=</code> operators. Type equality is expressed with <code>is</code>. Subtyping relations may be queried with <code>of</code>, which has the additional property of introducing new bindings to the current scope in certain contexts (more on this in the <a href="TYPES.html">types document</a>).</p> +<pre><code class="language-puck">let phrase: str = "I am a string! Wheeee! ✨" +for c in phrase do stdout.write(c) # I am a string! Wheeee! ✨ -for b in phrase.bytes(): - stdout.write(b.chr) # Error: cannot convert between u8 and chr +for b in phrase.bytes() do + stdout.write(b.char) # Error: cannot convert from byte to char print phrase.last() # ✨ </code></pre> <p>String concatenation uses a distinct <code>&</code> operator rather than overloading the <code>+</code> operator (as the complement <code>-</code> has no natural meaning for strings). Strings are unified, mutable, internally a byte array, externally a char array, and are stored as a pointer to heap data after their length and capacity (fat pointer). Chars are four bytes and represent a Unicode character in UTF-8 encoding. Slices of strings are stored as a length followed by a pointer to string data, and have non-trivial interactions with the memory management system. More details can be found in the <a href="TYPES.html">type system overview</a>.</p> +<h2 id="conditionals-and-pattern-matching"><a class="header" href="#conditionals-and-pattern-matching">Conditionals and Pattern Matching</a></h2> <pre><code class="language-puck"></code></pre> -<p>Basic conditional control flow uses standard <code>if</code>/<code>elif</code>/<code>else</code> statements. The <code>when</code> statement provides a compile-time <code>if</code>. It also takes <code>elif</code> and <code>else</code> branches and is syntactic sugar for an <code>if</code> statement within a <code>static</code> block (more on those later).</p> -<p>All values in Puck must be handled, or explicitly discarded. This allows for conditional statements and many other control flow constructs to function as <em>expressions</em>, and evaluate to a value, when an unbound value is left at the end of each of their branches' scopes. This is particularly relevant for <em>functions</em>, where it is often idiomatic to omit an explicit <code>return</code> statement. There is no attempt made to differentiate without context, and so expressions and statements often look identical in syntax.</p> +<p>Basic conditional control flow uses standard <code>if</code>/<code>elif</code>/<code>else</code> statements. The <code>when</code> statement provides a compile-time <code>if</code>. It also takes <code>elif</code> and <code>else</code> branches and is syntactic sugar for an <code>if</code> statement within a <code>const</code> expression (more on those later).</p> +<p>All values in Puck must be handled, or explicitly discarded. This allows for conditional statements and many other control flow constructs to function as <em>expressions</em>: and evaluate to a value when an unbound value is left at the end of each of their branches' scopes. This is particularly relevant for <em>functions</em>, where it is often idiomatic to omit an explicit <code>return</code> statement. There is no attempt made to differentiate without context, and so expressions and statements often look identical in syntax.</p> <pre><code class="language-puck"></code></pre> <p>Exhaustive structural pattern matching is available with the <code>match</code>/<code>of</code> statement, and is particularly useful for the <code>struct</code> and <code>union</code> types. <code>of</code> branches of a <code>match</code> statement take a <em>pattern</em>, of which the unbound identifiers within will be injected into the branch's scope. Multiple patterns may be used for one branch provided they all bind the same identifiers of the same type. Branches may be <em>guarded</em> with the <code>where</code> keyword, which takes a conditional, and will necessarily remove the branch from exhaustivity checks.</p> <!-- todo: structural matching of lists and arrays --> -<p>The <code>of</code> statement also stands on its own as an operator for querying subtype equality. Used as a conditional in <code>if</code> statements or <code>while</code> loops, it retains the variable injection properties of its <code>match</code> counterpart. This allows it to be used as a compact <!-- and coherent --> alternative to <code>if let</code> statements in other languages.</p> -<pre><code class="language-puck">func may_fail: Result[T, ref Err] +<p>The <code>of</code> statement also stands on its own as an operator for querying subtype equality. Used as a conditional in <code>if</code> statements or <code>while</code> loops, it retains the variable injection properties of its <code>match</code> counterpart. This allows it to be used as a compact and coherent alternative to <code>if let</code> statements in other languages.</p> +<h2 id="error-handling"><a class="header" href="#error-handling">Error Handling</a></h2> +<pre><code class="language-puck">type Result[T] = Result[T, ref Err] +func may_fail: Result[T] = ... </code></pre> -<p>Error handling is done via a fusion of imperative <code>try</code>/<code>catch</code> statements and functional <code>Option</code>/<code>Result</code> types, with much syntactic sugar. Functions may <code>raise</code> errors, but should return <code>Option[T]</code> or <code>Result[T, E]</code> types instead by convention. The compiler will note functions that <code>raise</code> errors, and force explicit qualification of them via <code>try</code>/<code>catch</code> statements.</p> -<p>A bevy of helper functions and macros are available for <code>Option</code>/<code>Result</code> types, and are documented and available in the <code>std.options</code> and <code>std.results</code> modules (included in the prelude by default). Two in particular are of note: the <code>?</code> macro accesses the inner value of a <code>Result[T, E]</code> or propagates (returns in context) the <code>Error(e)</code>, and the <code>!</code> accesses the inner value of an <code>Option[T]</code> / <code>Result[T, E]</code> or raises an error on <code>None</code> / the specific <code>Error(e)</code>. Both operators take one parameter and so are postfix. (There is additionally another <code>?</code> postfix macro, taking in a type, as a shorthand for <code>Option[T]</code>)</p> -<p>The utility of the <code>?</code> macro is readily apparent to anyone who has written code in Rust or Swift. The utility of the <code>!</code> function is perhaps less so obvious. These errors raised by <code>!</code>, however, are known to the compiler: and they may be comprehensively caught by a single or sequence of <code>catch</code> statements. This allows for users used to a <code>try</code>/<code>catch</code> error handling style to do so with ease, with only the need to add one additional character to a function call.</p> +<p>Error handling is done via a fusion of functional monadic types and imperative exceptions, with much syntactic sugar. Functions may <code>raise</code> exceptions, but by convention should return <code>Option[T]</code> or <code>Result[T, E]</code> types instead: these may be handled in <code>match</code> or <code>if</code>/<code>of</code> statements. The compiler will track functions that <code>raise</code> errors, and warn on those that are not handled explicitly via <code>try</code>/<code>with</code> statements.</p> +<p>A bevy of helper functions and macros are available for <code>Option</code>/<code>Result</code> types, and are documented and available in the <code>std.options</code> and <code>std.results</code> modules (included in the prelude by default). Two in particular are of note: the <code>?</code> macro accesses the inner value of a <code>Result[T, E]</code> or propagates (returns in context) the <code>Error(e)</code>, and the <code>!</code> accesses the inner value of an <code>Option[T]</code> / <code>Result[T, E]</code> or raises an error on <code>None</code> / the specific <code>Error(e)</code>. Both operators take one parameter and so are postfix. The <code>?</code> and <code>!</code> macros are overloaded and additionally function on types as shorthand for <code>Option[T]</code> and <code>Result[T]</code> respectively.</p> +<p>The utility of the <code>?</code> macro is readily apparent to anyone who has written code in Rust or Swift. The utility of the <code>!</code> function is perhaps less so obvious. These errors raised by <code>!</code>, however, are known to the compiler: and they may be comprehensively caught by a single or sequence of <code>catch</code> statements. This allows for users used to a <code>try</code>/<code>with</code> error handling style to do so with ease, with only the need to add one additional character to a function call.</p> <p>More details may be found in <a href="ERRORS.html">error handling overview</a>.</p> -<pre><code class="language-puck">loop: - print "This will never normally exit." +<h2 id="blocks-and-loops"><a class="header" href="#blocks-and-loops">Blocks and Loops</a></h2> +<pre><code class="language-puck">loop + print "This will never normally exit." break -for i in 0 .. 3: # exclusive - for j in 0 ..= 3: # inclusive - print "{} {}".fmt(i, j) +for i in 0 .. 3 do # exclusive + for j in 0 ..= 3 do # inclusive + print "{} {}".fmt(i, j) </code></pre> <p>Three types of loops are available: <code>for</code> loops, <code>while</code> loops, and infinite loops (<code>loop</code> loops). For loops take a binding (which may be structural, see pattern matching) and an iterable object and will loop until the iterable object is spent. While loops take a condition that is executed upon the beginning of each iteration to determine whether to keep looping. Infinite loops are infinite are infinite are infinite are infinite are infinite are infinite and must be manually broken out of.</p> -<p>There is no special concept of iterators: iterable objects are any object that implements the <code>Iter[T]</code> interface (more on those in <a href="TYPES.html">the type system document</a>), that is, provides a <code>self.next()</code> function returning an <code>Option[T]</code>. As such, iterators are first-class constructs. For loops can be thought of as while loops that unwrap the result of the <code>next()</code> function and end iteration upon a <code>None</code> value. While loops, in turn, can be thought of as infinite loops with an explicit conditional break.</p> +<p>There is no special concept of iterators: iterable objects are any object that implements the <code>Iter[T]</code> class (more on those in <a href="TYPES.html">the type system document</a>), that is, provides a <code>self.next()</code> function returning an <code>Option[T]</code>. As such, iterators are first-class constructs. For loops can be thought of as while loops that unwrap the result of the <code>next()</code> function and end iteration upon a <code>None</code> value. While loops, in turn, can be thought of as infinite loops with an explicit conditional break.</p> <p>The <code>break</code> keyword immediately breaks out of the current loop, and the <code>continue</code> keyword immediately jumps to the next iteration of the current loop. Loops may be used in conjunction with blocks for more fine-grained control flow manipulation.</p> -<pre><code class="language-puck">block: +<pre><code class="language-puck">block statement let x = block: let y = read_input() transform_input(y) -block foo: - for i in 0 ..= 100: - block bar: - if i == 10: break foo +block foo + for i in 0 ..= 100 do + block bar + if i == 10 then break foo print i </code></pre> <p>Blocks provide arbitrary scope manipulation. They may be labelled or unlabelled. The <code>break</code> keyword additionally functions inside of blocks and without any parameters will jump out of the current enclosing block (or loop). It may also take a block label as a parameter for fine-grained scope control.</p> +<h2 id="module-system"><a class="header" href="#module-system">Module System</a></h2> <pre><code class="language-puck"></code></pre> <p>Code is segmented into modules. Modules may be made explicit with the <code>mod</code> keyword followed by a name, but there is also an implicit module structure in every codebase that follows the structure and naming of the local filesystem. For compatibility with filesystems, and for consistency, module names are exclusively lowercase (following the same rules as Windows).</p> <p>A module can be imported into another module by use of the <code>use</code> keyword, taking a path to a module or modules. Contrary to the majority of languages ex. Python, unqualified imports are <em>encouraged</em> - in fact, are idiomatic (and the default) - type-based disambiguation and official LSP support are intended to remove any ambiguity.</p> -<p>Within a module, functions, types, constants, and other modules may be <em>exported</em> for use by other modules with the <code>pub</code> keyword. All such identifiers are private by default and only accessible module-locally without. Modules are first-class and may be bound, inspected, modified, and returned. As such, imported modules may be <em>re-exported</em> for use by other modules by binding them to a public constant, i.e. <code>use my_module; pub const my_module = my_module</code>.</p> +<p>Within a module, functions, types, constants, and other modules may be <em>exported</em> for use by other modules with the <code>pub</code> keyword. All such identifiers are private by default and only accessible module-locally without. Modules are first-class and may be bound, inspected, modified, and returned. As such, imported modules may be <em>re-exported</em> for use by other modules by binding them to a public constant.</p> <p>More details may be found in the <a href="MODULES.html">modules document</a>.</p> +<h2 id="compile-time-programming"><a class="header" href="#compile-time-programming">Compile-time Programming</a></h2> <pre><code class="language-puck"></code></pre> -<p>Compile-time programming may be done via the previously-mentioned <code>const</code> keyword and <code>when</code> statements: or via <code>const</code> <em>blocks</em>. All code within a <code>const</code> block is evaluated at compile-time and all assignments and allocations made are propagated to the compiled binary as static data.</p> -<p>Further compile-time programming may be done via metaprogramming: compile-time manipulation of the abstract syntax tree. The macro system is complex, and a description may be found in the <a href="METAPROGRAMMING.html">metaprogramming document</a>.</p> +<p>Compile-time programming may be done via the previously-mentioned <code>const</code> keyword and <code>when</code> statements: or via <code>const</code> <em>blocks</em>. All code within a <code>const</code> block is evaluated at compile-time and all assignments and allocations made are propagated to the compiled binary as static data. Further compile-time programming may be done via macros: compile-time manipulation of the abstract syntax tree. The macro system is complex, and a description may be found in the <a href="METAPROGRAMMING.html">metaprogramming document</a>.</p> +<h2 id="async-system-and-threading"><a class="header" href="#async-system-and-threading">Async System and Threading</a></h2> <pre><code class="language-puck"></code></pre> <p>The async system is <em>colourblind</em>: the special <code>async</code> macro will turn any function <em>call</em> returning a <code>T</code> into an asynchronous call returning a <code>Future[T]</code>. The special <code>await</code> function will wait for any <code>Future[T]</code> and return a <code>T</code> (or an error). Async support is included in the standard library in <code>std.async</code> in order to allow for competing implementations. More details may be found in the <a href="ASYNC.html">async document</a>.</p> <p>Threading support is complex and also regulated to external libraries. OS-provided primitives will likely provide a <code>spawn</code> function, and there will be substantial restrictions for memory safety. I really haven't given much thought to this.</p> -<pre><code class="language-puck"></code></pre> -<p>Details on memory safety, references and pointers, and deep optimizations may be found in the <a href="MEMORY_MANAGEMENT.html">memory management overview</a>. -The memory model intertwines deeply with the type system. <!-- todo --></p> -<pre><code class="language-puck"></code></pre> -<p>Finally, a few notes on the type system are in order.</p> -<p>Types are declared with the <code>type</code> keyword and are transparent aliases. -That is, <code>type Foo = Bar</code> means that any function defined for <code>Bar</code> is defined for <code>Foo</code> - that is, objects of type <code>Foo</code> can be used any time an object of type <code>Bar</code> is called for. -If such behavior is not desired, the <code>distinct</code> keyword forces explicit qualification and conversion of types. <code>type Foo = distinct Baz</code> will force a type <code>Foo</code> to be wrapped in a call to the constructor <code>Baz()</code> before being passed to such functions.</p> -<p>Types, like functions, can be <em>generic</em>: declared with "holes" that may be filled in with other types upon usage. A type must have all its holes filled before it can be constructed. The syntax for generics in types much resembles the syntax for generics in functions, and <em>constraints</em> and the like also apply.</p> +<h2 id="memory-management"><a class="header" href="#memory-management">Memory Management</a></h2> +<pre><code class="language-puck"># Differences in Puck and Rust types in declarations and at call sights. +func foo(a: + lent T → &'a T + mut T → &'a mut T + T → T +): + lent T → &'a T + mut T → &'a mut T + T → T + +let t: T = ... +foo( # this is usually elided + lent t → &t + mut t → &mut t + t → t +) +</code></pre> +<p>Puck copies Rust-style ownership near verbatim. <code>&T</code> corresponds to <code>lent T</code>, <code>&mut T</code> to <code>mut T</code>, and <code>T</code> to <code>T</code>: with <code>T</code> implicitly convertible to <code>lent T</code> and <code>mut T</code> at call sites. A major goal of Puck is for all lifetimes to be inferred: there is no overt support for lifetime annotations, and it is likely code with strange lifetimes will be rejected before it can be inferred. (Total inference, however, <em>is</em> a goal.)</p> +<p>Another major difference is the consolidation of <code>Box</code>, <code>Rc</code>, <code>Arc</code>, <code>Cell</code>, <code>RefCell</code> into just two (magic) types: <code>ref</code> and <code>refc</code>. <code>ref</code> takes the role of <code>Box</code>, and <code>refc</code> both the role of <code>Rc</code> and <code>Arc</code>: while <code>Cell</code> and <code>RefCell</code> are disregarded. The underlying motivation for compiler-izing these types is to make deeper compiler optimizations accessible: particularly with <code>refc</code>, where the existing ownership framework is used to eliminate counts. Details on memory safety, references and pointers, and deep optimizations may be found in the <a href="MEMORY_MANAGEMENT.html">memory management overview</a>.</p> +<h2 id="types-system"><a class="header" href="#types-system">Types System</a></h2> +<pre><code class="language-puck"># The type Foo is defined here as an alias to a list of bytes. +type Foo = list[byte] + +# implicit conversion to Foo in declarations +let foo: Foo = [1, 2, 3] + +func fancy_dbg(self: Foo) = + print "Foo:" + # iteration is defined for list[byte] + # so self is implicitly converted from Foo to list[byte] + for elem in self do + dbg(elem) + +# NO implicit conversion to Foo on calls +[4, 5, 6].foo_dbg # this fails! + +Foo([4, 5, 6]).foo_dbg # prints: Foo:\n 4\n\ 5\n 6\n +</code></pre> +<p>Finally, a few notes on the type system are in order. Types are declared with the <code>type</code> keyword and are aliases: all functions defined on a type carry over to its alias, though the opposite is not true. Functions defined on the alias <em>must</em> take an object known to be a type of that alias: exceptions are made for type declarations, but at call sites this means that conversion must be explicit.</p> +<pre><code class="language-puck"># We do not want functions defined on list[byte] to carry over, +# as strings function differently (operating on chars). +# So we declare `str` as a struct, rather than a type alias. +pub type str = struct + data: list[byte] + +# However, the underlying `empty` function is still useful. +# So we expose it in a one-liner alias. +# In the future, a `with` macro may be available to ease carryover. +pub func empty(self: str): bool = self.data.empty + +# Alternatively, if we want total transparent type aliasing, we can use constants. +pub const MyAlias: type = VeryLongExampleType +</code></pre> +<p>If one wishes to define a new type <em>without</em> previous methods accessible, the newtype paradigm is preferred: declaring a single-field <code>struct</code>, and manually implementing functions that carry over. It can also be useful to have <em>transparent</em> type aliases, that is, simply a shorter name to refer to an existing type. These do not require type conversion, implicit or explicit, and can be used freely and interchangeably with their alias. This is done with constants.</p> +<p>Types, like functions, can be <em>generic</em>: declared with "holes" that may be filled in with other types upon usage. A type must have all its holes filled before it can be constructed. The syntax for generics in types much resembles the syntax for generics in functions, and generic <em>constraints</em> and the like also apply.</p> +<h2 id="structs-and-tuples"><a class="header" href="#structs-and-tuples">Structs and Tuples</a></h2> <pre><code class="language-puck">type MyStruct = struct a: str b: str -type MyTuple = tuple[str, b: str] +type MyTuple = (str, b: str) -let a: MyTuple = ("hello", "world") +let a: MyTuple = ("hello", "world") print a.1 # world print a.b # world </code></pre> -<p>Struct and tuple types are declared with <code>struct[<fields>]</code> and <code>tuple[<fields>]</code>, respectively. Their declarations make them look similar at a glance, but they differ fairly fundamentally. Structs are <em>unordered</em>, and every field must be named. They may be constructed with <code>{}</code> brackets. Tuples are <em>ordered</em> and so field names are optional - names are just syntactic sugar for positional access. Tuples may be constructed with <code>()</code> parenthesis.</p> -<p>I am undecided whether to allow <em>structural subtyping</em>: that is, <code>{a: Type, b: Type, c: Type}</code> being valid in a context expecting <code>{a: Type, b: Type}</code>. This has benefits (multiple inheritance with no boilerplate) but also downsides (obvious).</p> -<p>It is worth noting that there is no concept of <code>pub</code> at a field level on structs - a type is either fully transparent, or fully opaque. This is because such partial transparency breaks with structural initialization (how could one provide for hidden fields?). An idiomatic workaround is to model the desired field structure with a public-facing interface.</p> +<p>Struct and tuple types are declared with <code>struct[<fields>]</code> and <code>tuple[<fields>]</code>, respectively. Their declarations make them look similar at a glance, but they differ fairly fundamentally. Structs are <em>unordered</em>, and every field must be named. They may be constructed with <code>{}</code> brackets. Tuples are <em>ordered</em> and so field names are optional - names are just syntactic sugar for positional access (<code>foo.0</code>, <code>bar.1</code>, ...). Tuples are constructed with <code>()</code> parentheses: and also may be <em>declared</em> with such, as syntactic sugar for <code>tuple[...]</code>.</p> +<p>It is worth noting that there is no concept of <code>pub</code> at a field level on structs - a type is either fully transparent, or fully opaque. This is because such partial transparency breaks with structural initialization (how could one provide for hidden fields?). However, the <code>@[opaque]</code> attribute allows for expressing that the internal fields of a struct are not to be accessed or initialized: this, however, is only a compiler warning and can be totally suppressed with <code>@[allow(opaque)]</code>.</p> +<h2 id="unions-and-enums"><a class="header" href="#unions-and-enums">Unions and Enums</a></h2> <pre><code class="language-puck">type Expr = union Literal(int) Variable(str) @@ -427,53 +521,228 @@ print a.b # world <p>Union types are composed of a list of <em>variants</em>. Each variant has a <em>tag</em> and an <em>inner type</em> the union wraps over. Before the inner type can be accessed, the tag must be pattern matched upon, in order to handle all possible values. These are also known as <em>sum types</em> or <em>tagged unions</em> in other languages.</p> <p>Union types are the bread and butter of structural pattern matching. Composed with structs and tuples, unions provide for a very general programming construct commonly referred to as an <em>algebraic data type</em>. This is often useful as an idiomatic and safer replacement for inheritance.</p> -<pre><code class="language-puck">pub type Iter[T] = interface +<pre><code class="language-puck"></code></pre> +<p>Enum types are similarly composed of a list of <em>variants</em>. These variants, however, are static values: assigned at compile-time, and represented under the hood by a single integer. They function similarly to unions, and can be passed through to functions and pattern matched upon, however their underlying simplicity and default values mean they are much more useful for collecting constants and acting as flags than anything else.</p> +<h2 id="classes"><a class="header" href="#classes">Classes</a></h2> +<pre><code class="language-puck">pub type Iter[T] = class next(mut Self): T? -pub type Peek[T] = interface +pub type Peek[T] = class next(mut Self): T? peek(mut Self): T? peek_nth(mut Self, int): T? </code></pre> -<p>Interface types function much as type classes in Haskell or traits in Rust do. They are not concrete types, and cannot be constructed - instead, their utility is via indirection, as parameters or as <code>ref</code> types, providing constraints that some concrete type must meet. They consist of a list of function signatures, implementations of which must exist for the given type in order to compile.</p> -<p>Their major difference, however, is that Puck's interfaces are <em>implicit</em>: there is no <code>impl</code> block that implementations of their associated functions have to go under. If functions for a concrete type exist satisfying some interface, the type implements that interface. This does run the risk of accidentally implementing an interface one does not desire to, but the author believes such situations are few and far between, well worth the decreased syntactic and semantic complexity, and mitigatable with tactical usage of the <code>distinct</code> keyword.</p> -<p>As the compiler makes no such distinction between fields and single-argument functions on a type when determining identifier conflicts, interfaces similarly make no such distinction. They <em>do</em> distinguish mutable and immutable parameters, those being part of the type signature.</p> -<p>Interfaces are widely used throughout the standard library to provide general implementations of such conveniences like iteration, debug and display printing, generic error handling, and much more.</p> +<p>Class types function much as type classes in Haskell or traits in Rust do. They are not concrete types, and cannot be constructed - instead, their utility is via indirection, as parameters in functions or as <code>ref</code> types in structures, providing constraints that some concrete type must meet. They consist of a list of function signatures, implementations of which must exist for the given type passed in in order to compile.</p> +<p>Their major difference, however, is that Puck's classes are <em>implicit</em>: there is no <code>impl</code> block that implementations of their associated functions have to go under. If functions for a concrete type exist satisfying some class, the type implements that class. This does run the risk of accidentally implementing a class one does not desire to, but the author believes such situations are few and far between and well worth the decreased syntactic and semantic complexity. As a result, however, classes are entirely unable to guarantee any invariants hold (like <code>PartialOrd</code> or <code>Ord</code> in Rust do).</p> +<p>As the compiler makes no such distinction between fields and single-argument functions on a type when determining identifier conflicts, classes similarly make no such distinction. They <em>do</em> distinguish borrowed/mutable/owned parameters, those being part of the type signature.</p> +<p>Classes are widely used throughout the standard library to provide general implementations of such conveniences like iteration, debug and display printing, generic error handling, and much more.</p> <div style="break-before: page; page-break-before: always;"></div><h1 id="syntax-a-casual-and-formal-look"><a class="header" href="#syntax-a-casual-and-formal-look">Syntax: A Casual and Formal Look</a></h1> -<blockquote> -<p>! This section is <strong>incomplete</strong>. Proceed with caution.</p> -</blockquote> +<h2 id="call-syntax"><a class="header" href="#call-syntax">Call Syntax</a></h2> +<p>There is little difference between a function, macro, and operator call. There are only a few forms such calls can take, too, though notably more than most other languages (due to, among other things, uniform function call syntax): hence this section.</p> +<pre><code># The standard, unambiguous call. +routine(1, 2, 3, 4) +# The method call syntax equivalent. +1.routine(2, 3, 4) +# A block-based call. This is only really useful for macros taking in a body. +routine + 1 + 2 + 3 + 4 +# A parentheses-less call. This is only really useful for `print` and `dbg`. +# Only valid at the start of a line. +routine 1, 2, 3, 4 +</code></pre> +<p>Binary operators have some special rules.</p> +<pre><code># Valid call syntaxes for binary operators. What can constitute a binary +# operator is constrained for parsing's sake. Whitespace is optional. +1 + 2 +1+2 ++ 1, 2 # Only valid at the start of a line. Also, don't do this. ++(1, 2) +</code></pre> +<p>As do unary operators.</p> +<pre><code># The standard call for unary operators. Postfix. +1? +?(1) +</code></pre> +<p>Method call syntax has a number of advantages: notably that it can be <em>chained</em>: acting as a natural pipe operator. Redundant parenthesis can also be omitted.</p> +<pre><code># The following statements are equivalent: +foo.bar.baz +foo().bar().baz() +baz(bar(foo)) +baz + bar + foo +baz bar(foo) +baz foo.bar +</code></pre> +<h2 id="indentation-rules"><a class="header" href="#indentation-rules">Indentation Rules</a></h2> +<p>The tokens <code>=</code>, <code>then</code>, <code>do</code>, <code>of</code>, <code>else</code>, <code>block</code>, <code>const</code>, <code>block X</code>, and <code>X</code> (where <code>X</code> is an identifier) are <em>scope tokens</em>. They denote a new scope for their associated expressions (functions/macros/declarations, control flow, loops). The tokens <code>,</code>, <code>.</code> (notably not <code>...</code>), and all default binary operators (notably not <code>not</code>) are <em>continuation tokens</em>. An expression beginning or ending in one of them would always be a syntactic error.</p> +<p>Line breaks are treated as the end of a statement, with several exceptions.</p> +<pre><code class="language-puck">pub func foo() = + print "Hello, world!" + print "This is from a function." + +pub func inline_decl() = print "Hello, world!" +</code></pre> +<p>Indented lines following a line ending in a <em>scope token</em> are treated as belonging to a new scope. That is, indented lines following a line ending in a scope token form the body of the expression associated with the scope token.</p> +<p>Indentation is not obligatory after a scope token. However, this necessarily constrains the body of the associated expression to one line: no lines following will be treated as an extension of the body, only the expression associated with the original scope token. (This may change in the future.)</p> +<pre><code class="language-puck">pub func foo(really_long_parameter: ReallyLongType, +another_really_long_parameter: AnotherReallyLongType) = # no indentation! this is ok + print really_long_parameter # this line is indented relative to the first line + print really_long_type +</code></pre> +<p>Lines following a line ending in a <em>continuation token</em> (and, additionally <code>not</code> and <code>(</code>) are treated as a continuation of that line and can have any level of indentation (even negative). If they end in a scope token, however, the following lines must be indented relative to the indentation of the previous line.</p> +<pre><code class="language-puck">let really_long_parameter: ReallyLongType = ... +let another_really_long_parameter: AnotherReallyLongType = ... + +really_long_parameter + .foo(another_really_long_parameter) # some indentation! this is ok +</code></pre> +<p>Lines <em>beginning</em> in a continuation token (and, additionally <code>)</code>), too, are treated as a continuation of the previous line and can have any level of indentation. If they end in a scope token, the following lines must be indented relative to the indentation of the previous line.</p> +<pre><code class="language-puck">pub func foo() = + print "Hello, world!" +pub func bar() = # this line is no longer in the above scope. + print "Another function declaration." +</code></pre> +<p>Dedented lines <em>not</em> beginning or ending with a continuation token are treated as no longer in the previous scope, returning to the scope of the according indentation level.</p> +<pre><code class="language-puck">if cond then this +else that + +match cond +of this then ... +of that then ... +</code></pre> +<p>A line beginning with a scope token is treated as attached to the previous expression.</p> +<pre><code># Technically allowed. Please don't do this. +let foo += ... + +if cond then if cond then this +else that + +for i +in iterable +do ... + +match foo of this then ... +of that then ... + +match foo of this +then ... +of that then ... +</code></pre> +<p>This <em>can</em> lead to some ugly possibilities for formatting that are best avoided.</p> +<pre><code># Much preferred. + +let foo = + ... +let foo = ... + +if cond then + if cond then + this +else that +if cond then + if cond then this +else that + +for i in iterable do + ... +for i in iterable do ... + +match foo +of this then ... +of that then ... +</code></pre> +<p>The indentation rules are complex, but the effect is such that long statements can be broken <em>almost</em> anywhere.</p> +<h2 id="expression-rules"><a class="header" href="#expression-rules">Expression Rules</a></h2> +<p>First, a word on the distinction between <em>expressions</em> and <em>statements</em>. Expressions return a value. Statements do not. That is all.</p> +<p>There are some syntactic constructs unambiguously recognizable as statements: all declarations, modules, and <code>use</code> statements. There are no syntactic constructs unambiguously recognizable as expressions. As calls returning <code>void</code> are treated as statements, and expressions that return a type could possibly return <code>void</code>, there is no explicit distinction between expressions and statements made in the parser: or anywhere before type-checking.</p> +<p>Expressions can go almost anywhere. Our indentation rules above allow for it.</p> +<pre><code># Some different formulations of valid expressions. + +if cond then + this +else + that + +if cond then this +else that + +if cond +then this +else that + +if cond then this else that + +let foo = + if cond then + this + else + that +</code></pre> +<pre><code># Some different formulations of *invalid* expressions. +# These primarily break the rule that everything following a scope token +# (ex. `=`, `do`, `then`) not at the end of the line must be self-contained. + +let foo = if cond then + this + else + that + +let foo = if cond then this + else that + +let foo = if cond then this +else that + +# todo: how to handle this? +if cond then if cond then that +else that + +# shrimple +if cond then + if cond then that +else that + +# this should be ok +if cond then this +else that + +match foo of +this then ... +of that then ... +</code></pre> <h2 id="reserved-keywords"><a class="header" href="#reserved-keywords">Reserved Keywords</a></h2> <p>The following keywords are reserved:</p> <ul> <li>variables: <code>let</code> <code>var</code> <code>const</code></li> -<li>control flow: <code>if</code> <code>elif</code> <code>else</code></li> +<li>control flow: <code>if</code> <code>then</code> <code>elif</code> <code>else</code></li> <li>pattern matching: <code>match</code> <code>of</code></li> -<li>loops: <code>loop</code> <code>while</code> <code>for</code> <code>in</code></li> -<li>blocks: <code>block</code> <code>break</code> <code>continue</code> <code>return</code></li> -<li>functions: <code>func</code> <code>mut</code> <code>static</code> <code>varargs</code></li> +<li>error handling: <code>try</code> <code>with</code> <code>finally</code></li> +<li>loops: <code>while</code> <code>do</code> <code>for</code> <code>in</code></li> +<li>blocks: <code>loop</code> <code>block</code> <code>break</code> <code>continue</code> <code>return</code></li> <li>modules: <code>pub</code> <code>mod</code> <code>use</code> <code>as</code></li> -<li>error handling: <code>try</code> <code>catch</code> <code>finally</code></li> +<li>functions: <code>func</code> <code>varargs</code></li> <li>metaprogramming: <code>macro</code> <code>quote</code> <code>when</code></li> -<li>types: <code>type</code> <code>distinct</code> <code>ref</code></li> -<li>types: <code>struct</code> <code>tuple</code> <code>union</code> <code>enum</code> <code>interface</code></li> -<li>reserved: +<li>ownership: <code>lent</code> <code>mut</code> <code>ref</code> <code>refc</code></li> +<li>types: <code>type</code> <code>struct</code> <code>tuple</code> <code>union</code> <code>enum</code> <code>class</code></li> +</ul> +<p>The following keywords are not reserved, but liable to become so.</p> <ul> -<li><code>impl</code> <code>object</code> <code>class</code> <code>concept</code> <code>auto</code> <code>empty</code> <code>effect</code> <code>case</code></li> -<li><code>suspend</code> <code>resume</code> <code>spawn</code> <code>pool</code> <code>thread</code> <code>closure</code></li> +<li><code>impl</code> <code>object</code> <code>interface</code> <code>concept</code> <code>auto</code> <code>effect</code> <code>case</code></li> +<li><code>suspend</code> <code>resume</code> <code>spawn</code> <code>pool</code> <code>thread</code> <code>closure</code> <code>static</code></li> <li><code>cyclic</code> <code>acyclic</code> <code>sink</code> <code>move</code> <code>destroy</code> <code>copy</code> <code>trace</code> <code>deepcopy</code></li> </ul> -</li> -</ul> <p>The following identifiers are in use by the standard prelude:</p> <ul> <li>logic: <code>not</code> <code>and</code> <code>or</code> <code>xor</code> <code>shl</code> <code>shr</code> <code>div</code> <code>mod</code> <code>rem</code></li> <li>logic: <code>+</code> <code>-</code> <code>*</code> <code>/</code> <code><</code> <code>></code> <code><=</code> <code>>=</code> <code>==</code> <code>!=</code> <code>is</code></li> <li>async: <code>async</code> <code>await</code></li> -<li>types: <code>int</code> <code>uint</code> <code>float</code> +<li>types: <code>int</code> <code>uint</code> <code>float</code> <code>i\d+</code> <code>u\d+</code> <ul> -<li><code>i8</code> <code>i16</code> <code>i32</code> <code>i64</code> <code>i128</code></li> -<li><code>u8</code> <code>u16</code> <code>u32</code> <code>u64</code> <code>u128</code></li> <li><code>f32</code> <code>f64</code> <code>f128</code></li> <li><code>dec64</code> <code>dec128</code></li> </ul> @@ -486,23 +755,28 @@ pub type Peek[T] = interface <ul> <li><code>=</code> (assignment)</li> <li><code>.</code> (chaining)</li> -<li><code>,</code> (params)</li> +<li><code>,</code> (parameters)</li> <li><code>;</code> (statements)</li> <li><code>:</code> (types)</li> <li><code>#</code> (comment)</li> +<li><code>@</code> (attributes)</li> <li><code>_</code> (unused bindings)</li> <li><code>|</code> (generics)</li> <li><code>\</code> (string/char escaping)</li> -<li><code>()</code> (params, tuples)</li> -<li><code>{}</code> (scope, structs)</li> +<li><code>()</code> (parameters, tuples)</li> <li><code>[]</code> (generics, lists)</li> -<li><code>""</code> (strings)</li> +<li><code>{}</code> (scope, structs)</li> +<li><code>""</code> (strings)</li> <li><code>''</code> (chars)</li> <li><code>``</code> (unquoting)</li> -<li>unused: <code>~</code> <code>@</code> <code>$</code> <code>%</code></li> +<li>unused on qwerty: <code>~</code> <code>%</code> <code>^</code> <code>$</code> +<ul> +<li>perhaps leave <code>$</code> unused. but <code>~</code>, <code>%</code>, and <code>^</code> totally could be...</li> +</ul> +</li> </ul> <h2 id="a-formal-grammar"><a class="header" href="#a-formal-grammar">A Formal Grammar</a></h2> -<p>We now shall take a look at a more formal description of Puck's syntax. </p> +<p>We now shall take a look at a more formal description of Puck's syntax.</p> <p>Syntax rules are described in <a href="https://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_form">extended Backus–Naur form</a> (EBNF): however, most rules surrounding whitespace, and scope, and line breaks, are modified to how they would appear after a lexing step.</p> <h3 id="identifiers"><a class="header" href="#identifiers">Identifiers</a></h3> <pre><code>Ident ::= (Letter | '_') (Letter | Digit | '_')* @@ -524,81 +798,84 @@ HexDigit ::= Digit | 'A'..'F' | 'a'..'f' <pre><code>CHAR ::= '\'' (PRINT - '\'' | '\\\'')* '\'' STRING ::= SINGLE_LINE_STRING | MULTI_LINE_STRING COMMENT ::= SINGLE_LINE_COMMENT | MULTI_LINE_COMMENT | EXPRESSION_COMMENT -SINGLE_LINE_STRING ::= '"' (PRINT - '"' | '\\"')* '"' -MULTI_LINE_STRING ::= '"""' (PRINT | '\n' | '\r')* '"""' +SINGLE_LINE_STRING ::= '"' (PRINT - '"' | '\\"')* '"' +MULTI_LINE_STRING ::= '"""' (PRINT | '\n' | '\r')* '"""' SINGLE_LINE_COMMENT ::= '#' PRINT* MULTI_LINE_COMMENT ::= '#[' (PRINT | '\n' | '\r' | MULTI_LINE_COMMENT)* ']#' EXPRESSION_COMMENT ::= '#;' SINGLE_STMT PRINT ::= LETTER | DIGIT | OPR | - '"' | '#' | "'" | '(' | ')' | # notably the dual of OPR + '"' | '#' | "'" | '(' | ')' | # notably the dual of OPR ',' | ';' | '[' | ']' | '_' | '`' | '{' | '}' | ' ' | '\t' </code></pre> <h3 id="values"><a class="header" href="#values">Values</a></h3> <pre><code>Value ::= Int | Float | String | Char | Array | Tuple | Struct Array ::= '[' (Expr (',' Expr)*)? ']' -Tuple ::= '(' (Ident ':')? Expr (',' (Ident ':')? Expr)* ')' -Struct ::= '{' Ident ':' Expr (',' Ident ':' Expr)* '}' +Tuple ::= '(' (Ident '=')? Expr (',' (Ident '=')? Expr)* ')' +Struct ::= '{' Ident '=' Expr (',' Ident '=' Expr)* '}' </code></pre> <h3 id="variables"><a class="header" href="#variables">Variables</a></h3> <pre><code>Decl ::= Let | Var | Const | Func | Type -Let ::= 'let' Pattern Annotation? '=' Expr -Var ::= 'var' Pattern Annotation? ('=' Expr)? -Const ::= 'pub'? 'const' Pattern Annotation? '=' Expr -Pattern ::= Char | String | Number | Float | Ident | '(' Pattern (',' Pattern)* ')' - Ident '(' Pattern (',' Pattern)* ')' +Let ::= 'let' Pattern (':' Type)? '=' Expr +Var ::= 'var' Pattern (':' Type)? ('=' Expr)? +Const ::= 'pub'? 'const' Pattern (':' Type)? '=' Expr +Pattern ::= (Ident ('as' Ident)?) | Char | String | Number | Float | + Ident? '(' Pattern (',' Pattern)* ')' </code></pre> <h3 id="declarations"><a class="header" href="#declarations">Declarations</a></h3> -<pre><code>Func ::= 'pub'? 'func' Ident Generics? Parameters? Annotation? '=' Body -Macro ::= 'pub'? 'macro' Ident Generics? Parameters? Annotation? '=' Body -Generics ::= '[' Ident Annotation? (',' Ident Annotation?)* ']' -Parameters ::= '(' Ident Annotation? (',' Ident Annotation?)* ')' -Annotation ::= ':' Type +<pre><code>Func ::= 'pub'? 'func' Ident Generics? Parameters? (':' Type)? '=' Body +Macro ::= 'pub'? 'macro' Ident Generics? Parameters? (':' Type)? '=' Body +Generics ::= '[' Ident (':' Type)? (',' Ident (':' Type)?)* ']' +Parameters ::= '(' Ident (':' Type)? (',' Ident (':' Type)?)* ')' </code></pre> +<p>All arguments to functions must have a type. This is resolved at the semantic level, however. (Arguments to macros may lack types. This signifies a generic node.)</p> <h3 id="types"><a class="header" href="#types">Types</a></h3> <pre><code>TypeDecl ::= 'pub'? 'type' Ident Generics? '=' Type -Type ::= StructType | TupleType | EnumType | UnionType | Interface | - (('distinct' | 'ref' | 'ptr' | 'mut' | 'static') (Type | ('[' Type ']'))?) -StructType ::= 'struct' ('[' Ident ':' Type (',' Ident ':' Type)* ']')? -UnionType ::= 'union' ('[' Ident ':' Type (',' Ident ':' Type)* ']')? -TupleType ::= 'tuple' ('[' (Ident ':')? Type (',' (Ident ':')? Type)* ']')? -EnumType ::= 'enum' ('[' Ident ('=' Expr)? (',' Ident ('=' Expr)?)* ']')? -Interface ::= 'interface' ('[' Signature (',' Signature)* ']')? -Signature ::= Ident Generics? ('(' Type (',' Type)* ')')? Annotation? +Type ::= TypeStruct | TypeTuple | TypeEnum | TypeUnion | SugarUnion | + TypeClass | (Modifier* (Type | ('[' Type ']'))) +TypeStruct ::= 'struct' ('[' Ident ':' Type (',' Ident ':' Type)* ']')? +TypeUnion ::= 'union' ('[' Ident ':' Type (',' Ident ':' Type)* ']')? +SugarUnion ::= '(' Ident ':' Type (',' Ident ':' Type)* ')' +TypeTuple ::= 'tuple' ('[' (Ident ':')? Type (',' (Ident ':')? Type)* ']')? +TypeEnum ::= 'enum' ('[' Ident ('=' Expr)? (',' Ident ('=' Expr)?)* ']')? +TypeClass ::= 'class' ('[' Signature (',' Signature)* ']')? +Modifier ::= 'ref' | 'refc' | 'ptr' | 'lent' | 'mut' | 'const' +Signature ::= Ident Generics? ('(' Type (',' Type)* ')')? (':' Type)? </code></pre> <h2 id="control-flow"><a class="header" href="#control-flow">Control Flow</a></h2> -<pre><code>If ::= 'if' Expr ':' Body ('elif' Expr ':' Body)* ('else' ':' Body)? -When ::= 'when' Expr ':' Body ('elif' Expr ':' Body)* ('else' ':' Body)? -Try ::= 'try' ':' Body - ('except' Ident ('as' Ident)? (',' Ident ('as' Ident)?)*) ':' Body)* - ('finally' ':' Body)? -Match ::= 'match' Expr ('of' Pattern (',' Pattern)* ('where' Expr)? ':' Body)+ -Block ::= 'block' Ident? ':' Body -Block ::= 'static' ':' Body -Loop ::= 'loop' ':' Body -While ::= 'while' Expr ':' Body -For ::= 'for' Pattern 'in' Expr Body +<pre><code>If ::= 'if' Expr 'then' Body ('elif' Expr 'then' Body)* ('else' Body)? +When ::= 'when' Expr 'then' Body ('elif' Expr 'then' Body)* ('else' Body)? +Try ::= 'try' Body + ('except' Ident ('as' Ident)? (',' Ident ('as' Ident)?)*) 'then' Body)+ + ('finally' Body)? +Match ::= 'match' Expr ('of' Pattern (',' Pattern)* ('where' Expr)? 'then' Body)+ +While ::= 'while' Expr 'do' Body +For ::= 'for' Pattern 'in' Expr 'do' Body +Loop ::= 'loop' Body +Block ::= 'block' Ident? Body +Const ::= 'const' Body +Quote ::= 'quote' QuoteBody </code></pre> <h2 id="modules"><a class="header" href="#modules">Modules</a></h2> -<pre><code>Mod ::= 'pub'? 'mod' Ident ':' Body -Use ::= 'use' Ident ('/' Ident)* ('/' ('[' Ident (',' Ident)* ']'))? +<pre><code>Mod ::= 'pub'? 'mod' Ident '=' Body +Use ::= 'use' Ident ('.' Ident)* ('.' ('[' Ident (',' Ident)* ']'))? </code></pre> <h3 id="operators"><a class="header" href="#operators">Operators</a></h3> <pre><code>Operator ::= 'and' | 'or' | 'not' | 'xor' | 'shl' | 'shr' | - 'div' | 'mod' | 'rem' | 'is' | 'in' | - Opr+ + 'div' | 'mod' | 'rem' | 'is' | 'in' | Opr+ Opr ::= '=' | '+' | '-' | '*' | '/' | '<' | '>' | '@' | '$' | '~' | '&' | '%' | '|' | '!' | '?' | '^' | '.' | ':' | '\\' </code></pre> <h2 id="calls-and-expressions"><a class="header" href="#calls-and-expressions">Calls and Expressions</a></h2> +<p>This section is (quite) inaccurate due to complexities with respect to significant indentation. Heed caution.</p> <pre><code>Call ::= Ident ('[' Call (',' Call)* ']')? ('(' (Ident '=')? Call (',' (Ident '=')? Call)* ')')? | Ident Call (',' Call)* | Call Operator Call? | - Call ':' Body -Expr ::= Let | Var | Const | Func | Type | Mod | Use | Block | Static | - For | While | Loop | If | When | Try | Match | Call -Body ::= Expr | ('{' Expr (';' Expr)* '}') + Call Body +Stmt ::= Let | Var | Const | Func | Type | Mod | Use | Expr +Expr ::= Block | Const | For | While | Loop | If | When | Try | Match | Call +Body ::= (Stmt ';')* Expr </code></pre> <hr /> <p>References:</p> @@ -613,15 +890,14 @@ Body ::= Expr | ('{' Expr (';' Expr)* '}') <p>! This section <strong>needs a rewrite</strong>. Proceed with low standards.</p> </blockquote> <p>Puck has a comprehensive static type system, inspired by the likes of Nim, Rust, and Swift.</p> -<h2 id="basic-types"><a class="header" href="#basic-types">Basic types</a></h2> +<h2 id="basic-types-1"><a class="header" href="#basic-types-1">Basic types</a></h2> <p>Basic types can be one-of:</p> <ul> <li><code>bool</code>: internally an enum.</li> -<li><code>int</code>: integer number. x bits of precision by default. <!-- - overflow into bigints for safety and ease of cryptographical code. --> +<li><code>int</code>: integer number. x bits of precision by default. <ul> <li><code>uint</code>: same as <code>int</code>, but unsigned for more precision.</li> -<li><code>i8</code>, <code>i16</code>, <code>i32</code>, <code>i64</code>, <code>i128</code>: specified integer size</li> -<li><code>u8</code>, <code>u16</code>, <code>u32</code>, <code>u64</code>, <code>u128</code>: specified integer size</li> +<li><code>i[\d+]</code>, <code>u[\d+]</code>: arbitrarily sized integers</li> </ul> </li> <li><code>float</code>: floating-point number. @@ -629,19 +905,19 @@ Body ::= Expr | ('{' Expr (';' Expr)* '}') <li><code>f32</code>, <code>f64</code>: specified float sizes</li> </ul> </li> -<li><code>decimal</code>: precision decimal number. <!-- https://en.wikipedia.org/wiki/IEEE_754 --> +<li><code>decimal</code>: precision decimal number. <ul> <li><code>dec32</code>, <code>dec64</code>, <code>dec128</code>: specified decimal sizes</li> </ul> </li> <li><code>byte</code>: an alias to <code>u8</code>.</li> -<li><code>char</code>: a distinct alias to <code>u32</code>. For working with Unicode. <!-- - these are *packed* when part of a string: and so indexing directly into a string is a no-op. string access is O(n), swift-style. --></li> -<li><code>str</code>: a string type. mutable. internally a byte-array: externally a char-array.</li> -<li><code>void</code>: an internal type designating the absence of a value. often elided. <!-- - possibly, the empty tuple. then would `empty` be better? or `unit`? --></li> -<li><code>never</code>: a type that denotes functions that do not return. distinct from returning nothing. <!-- - the bottom type. --></li> +<li><code>char</code>: an alias to <code>u32</code>. For working with Unicode.</li> +<li><code>str</code>: a string type. mutable. packed: internally a byte-array, externally a char-array.</li> +<li><code>void</code>: an internal type designating the absence of a value. often elided.</li> +<li><code>never</code>: a type that denotes functions that do not return. distinct from returning nothing.</li> </ul> <p><code>bool</code> and <code>int</code>/<code>uint</code>/<code>float</code> and siblings (and subsequently <code>byte</code> and <code>char</code>) are all considered <strong>primitive types</strong> and are <em>always</em> copied (unless passed as mutable). More on when parameters are passed by value vs. passed by reference can be found in the <a href="MEMORY_MANAGEMENT.html">memory management document</a>.</p> -<p>Primitive types combine with <code>str</code>, <code>void</code>, and <code>never</code> to form <strong>basic types</strong>. <code>void</code> and <code>never</code> will rarely be referenced by name: instead, the absence of a type typically implicitly denotes one or the other. Still, having a name is helpful in some situations.</p> +<p>Primitive types, alongside <code>str</code>, <code>void</code>, and <code>never</code>, form <strong>basic types</strong>. <code>void</code> and <code>never</code> will rarely be referenced by name: instead, the absence of a type typically implicitly denotes one or the other. Still, having a name is helpful in some situations.</p> <h3 id="integers"><a class="header" href="#integers">integers</a></h3> <p>todo</p> <h3 id="strings"><a class="header" href="#strings">strings</a></h3> @@ -651,18 +927,18 @@ Body ::= Expr | ('{' Expr (';' Expr)* '}') <li>internally a byte array</li> <li>externally a char (four bytes) array</li> <li>prefixed with their length and capacity</li> -<li>automatically resize like a list</li> +<li>automatically resize</li> </ul> <p>They are also quite complicated. Puck has full support for Unicode and wishes to be intuitive, performant, and safe, as all languages wish to be. Strings present a problem that much effort has been expended on in (primarily) Swift and Rust to solve.</p> <h2 id="abstract-types"><a class="header" href="#abstract-types">Abstract Types</a></h2> -<p>Abstract types, broadly speaking, are types described by their <em>behavior</em> rather than their <em>implementation</em>. They are more commonly know as abstract <em>data</em> types: which is confusingly similar to "algebraic data types", another term for the <a href="TYPES.html#advanced-types">advanced types</a> they are built out of under the hood. We refer to them here as "abstract types" to mitigate some confusion.</p> +<p>Abstract types, broadly speaking, are types described by their <em>behavior</em> rather than their <em>implementation</em>. They are more commonly know as abstract <em>data</em> types: which is confusingly similar to "algebraic data types", another term for the <a href="TYPES.html#advanced-types">advanced types</a> they are built out of under the hood. We refer to them here as "abstract types" to mitigate some confusion.</p> <h3 id="iterable-types"><a class="header" href="#iterable-types">iterable types</a></h3> <p>Iterable types can be one-of:</p> <ul> -<li><code>array[S, T]</code>: Fixed-size arrays. Can only contain one type <code>T</code>. Of a fixed size <code>S</code> and cannot grow/shrink, but can mutate. Initialized in-place with <code>[a, b, c]</code>.</li> -<li><code>list[T]</code>: Dynamic arrays. Can only contain one type <code>T</code>. May grow/shrink dynamically. Initialized in-place with <code>[a, b, c]</code>. (this is the same as arrays!) <!-- Disambiguated from arrays in much the same way uints are disambiguated from ints. --></li> -<li><code>slice[T]</code>: Slices. Used to represent a "view" into some sequence of elements of type <code>T</code>. Cannot be directly constructed: they are <strong>unsized</strong>. Cannot grow/shrink, but their elements may be accessed and mutated. As they are underlyingly a reference to an array or list, they <strong>must not</strong> outlive the data they reference: this is non-trivial, and so slices interact in complex ways with the memory management system. <!-- possible syntax sugar: `[T]` --></li> -<li><code>str</code>: Strings. Described above. They are alternatively treated as either <code>list[byte]</code> or <code>list[char]</code>, depending on who's asking. Initialized in-place with <code>"abc"</code>.</li> +<li><code>array[T, size]</code>: Fixed-size arrays. Can only contain one type <code>T</code>. Of a fixed size <code>size</code> and cannot grow/shrink, but can mutate. Initialized in-place with <code>[a, b, c]</code>.</li> +<li><code>list[T]</code>: Dynamic arrays. Can only contain one type <code>T</code>. May grow/shrink dynamically. Initialized in-place with <code>[a, b, c]</code>. (this is the same as arrays!)</li> +<li><code>slice[T]</code>: Slices. Used to represent a "view" into some sequence of elements of type <code>T</code>. Cannot be directly constructed: they are <strong>unsized</strong>. Cannot grow/shrink, but their elements may be accessed and mutated. As they are underlyingly a reference to an array or list, they <strong>must not</strong> outlive the data they reference: this is non-trivial, and so slices interact in complex ways with the memory management system.</li> +<li><code>str</code>: Strings. Described above. They are alternatively treated as either <code>list[byte]</code> or <code>list[char]</code>, depending on who's asking. Initialized in-place with <code>"abc"</code>.</li> </ul> <p>These iterable types are commonly used, and bits and pieces of compiler magic are used here and there (mostly around initialization, and ownership) to ease use. All of these types are some sort of sequence: and implement the <code>Iter</code> interface, and so can be iterated (hence the name).</p> <h3 id="other-abstract-types"><a class="header" href="#other-abstract-types">other abstract types</a></h3> @@ -690,12 +966,12 @@ These are monomorphized into more specific functions at compile-time if needed.< <p>Parameter types can be one-of:</p> <ul> <li>mutable: <code>func foo(a: mut str)</code>: Marks a parameter as mutable (parameters are immutable by default). Passed as a <code>ref</code> if not one already.</li> -<li>static: <code>func foo(a: static str)</code>: Denotes a parameter whose value must be known at compile-time. Useful in macros, and with <code>when</code> for writing generic code.</li> +<li>constant: <code>func foo(a: const str)</code>: Denotes a parameter whose value must be known at compile-time. Useful in macros, and with <code>when</code> for writing generic code.</li> <li>generic: <code>func foo[T](a: list[T], b: T)</code>: The standard implementation of generics, where a parameter's exact type is not listed, and instead statically dispatched based on usage.</li> <li>constrained: <code>func foo(a: str | int | float)</code>: A basic implementation of generics, where a parameter can be one-of several listed types. The only allowed operations on such parameters are those shared by each type. Makes for particularly straightforward monomorphization. <!-- - Separated with the bitwise or operator `|` rather than the symbolic or `||` or a raw `or` to give the impression that there isn't a corresponding "and" operation (the `&` operator is preoccupied with strings). --></li> <li>functions: <code>func foo(a: (int, int) -> int)</code>: First-class functions. All functions are first class - function declarations implicitly have this type, and may be bound in variable declarations. However, the function <em>type</em> is only terribly useful as a parameter type.</li> <li>slices: <code>func foo(a: slice[...])</code>: Slices of existing lists, strings, and arrays. Generic over length. These are references under the hood, may be either immutable or mutable (with <code>mut</code>), and interact non-trivially with Puck's <a href="MEMORY_MANAGEMENT.html">ownership system</a>.</li> -<li>interfaces: <code>func foo(a: Stack[int])</code>: Implicit typeclasses. More in the <a href="TYPES.html#interfaces">interfaces section</a>. +<li>classes: <code>func foo(a: Stack[int])</code>: Implicit typeclasses. More in the <a href="TYPES.html#classes">classes section</a>. <ul> <li>ex. for above: <code>type Stack[T] = interface[push(mut Self, T); pop(mut Self): T]</code></li> </ul> @@ -705,33 +981,34 @@ These are monomorphized into more specific functions at compile-time if needed.< <p>Several of these parameter types - specifically, slices, functions, and interfaces - share a common trait: they are not <em>sized</em>. The exact size of the type is not generally known until compilation - and in some cases, not even during compilation! As the size is not always rigorously known, problems arise when attempting to construct these parameter types or compose them with other types: and so this is disallowed. They may still be used with <em>indirection</em>, however - detailed in the <a href="TYPES.html#reference-types">section on reference types</a>.</p> <h3 id="generic-types"><a class="header" href="#generic-types">generic types</a></h3> <p>Functions can take a <em>generic</em> type, that is, be defined for a number of types at once:</p> -<pre><code class="language-puck">func add[T](a: list[T], b: T) = - return a.add(b) +<pre><code class="language-puck"># fully generic. monomorphizes based on usage. +func add[T](a: list[T], b: T) = a.push(b) -func length[T](a: T) = - return a.len # monomorphizes based on usage. - # lots of things use .len, but only a few called by this do. - # throws a warning if exported for lack of specitivity. +# constrained generics. restricts possible operations to the intersection +# of defined methods on each type. +func length[T](a: str | list[T]) = + a.len # both strings and lists have a `len` method -func length(a: str | list) = - return a.len +# alternative formulation: place the constraint on a generic parameter. +# this ensures both a and b are of the *same* type. +func add[T: int | float](a: T, b: T) = a + b </code></pre> -<p>The syntax for generics is <code>func</code>, <code>ident</code>, followed by the names of the generic parameters in brackets <code>[T, U, V]</code>, followed by the function's parameters (which may then refer to the generic types). -Generics are replaced with concrete types at compile time (monomorphization) based on their usage in function calls within the main function body.</p> +<p>The syntax for generics is <code>func</code>, <code>ident</code>, followed by the names of the generic parameters in brackets <code>[T, U, V]</code>, followed by the function's parameters (which may then refer to the generic types). Generics are replaced with concrete types at compile time (monomorphization) based on their usage in function calls within the main function body.</p> <p>Constrained generics have two syntaxes: the constraint can be defined directly on a parameter, leaving off the <code>[T]</code> box, or it may be defined within the box as <code>[T: int | float]</code> for easy reuse in the parameters.</p> -<p>Other constructions like modules and type declarations themselves may also be generic.</p> +<p>Other constructions like type declarations themselves may also be generic over types. In the future, modules also may be generic: whether that is to be over types or over other modules is to be determined.</p> <h2 id="reference-types"><a class="header" href="#reference-types">Reference Types</a></h2> -<p>Types are typically constructed by value on the stack. That is, without any level of indirection: and so type declarations that recursively refer to one another, or involve unsized types (notably including parameter types), would not be allowed. However, Puck provides two avenues for indirection.</p> +<p>Types are typically constructed by value on the stack. That is, without any level of indirection: and so type declarations that recursively refer to one another, or involve unsized types (notably including parameter types), would not be allowed. However, Puck provides several avenues for indirection.</p> <p>Reference types can be one-of:</p> <ul> -<li><code>ref T</code>: An automatically-managed reference to type <code>T</code>. This is a pointer of size <code>uint</code> (native).</li> -<li><code>ptr T</code>: A manually-managed pointer to type <code>T</code>. (very) unsafe. The compiler will yell at you.</li> +<li><code>ref T</code>: An owned reference to a type <code>T</code>. This is a pointer of size <code>uint</code> (native).</li> +<li><code>refc T</code>: A reference-counted reference to a type <code>T</code>. This allows escaping the borrow checker.</li> +<li><code>ptr T</code>: A manually-managed pointer to a type <code>T</code>. (very) unsafe. The compiler will yell at you.</li> </ul> <pre><code class="language-puck">type BinaryTree = ref struct left: BinaryTree right: BinaryTree -type AbstractTree[T] = interface +type AbstractTree[T] = class func left(self: Self): Option[AbstractTree[T]] func right(self: Self): Option[AbstractTree[T]] func data(self: Self): T @@ -746,57 +1023,61 @@ type UnsafeTree = struct right: ptr UnsafeTree </code></pre> <p>The <code>ref</code> prefix may be placed at the top level of type declarations, or inside on a field of a structural type. <code>ref</code> types may often be more efficient when dealing with large data structures. They also provide for the usage of unsized types (functions, interfaces, slices) within type declarations.</p> -<p>The compiler abstracts over <code>ref</code> types to provide optimization for reference counts: and so a distinction between <code>Rc</code>/<code>Arc</code>/<code>Box</code> is not needed. Furthermore, access implicitly dereferences (with address access available via <code>.addr</code>), and so a <code>*</code> dereference operator is also not needed. Much care has been given to make references efficient and safe, and so <code>ptr</code> should be avoided if at all possible. The compiler will yell at you if you use it (or any other unsafe features).</p> -<p>The implementation of <code>ref</code> is delved into in further detail in the <a href="MEMORY_MANAGEMENT.html">memory management document</a>.</p> +<p>The compiler abstracts over <code>ref</code> types to provide optimization for reference counts: and so a distinction between <code>Rc</code>/<code>Arc</code>/<code>Box</code> is not needed. Furthermore, access implicitly dereferences (with address access available via <code>.addr</code>), and so a <code>*</code> dereference operator is also not needed.</p> +<p>Much care has been given to make references efficient and safe, and so <code>ptr</code> should be avoided if at all possible. They are only usable inside functions explicitly marked with <code>#[safe]</code>.</p> +<p>The implementations of reference types are delved into in further detail in the <a href="MEMORY_MANAGEMENT.html">memory management document</a>.</p> <h2 id="advanced-types"><a class="header" href="#advanced-types">Advanced Types</a></h2> -<p>The <code>type</code> keyword is used to declare aliases to custom data types. These types are <em>algebraic</em>: they function by composition. Algebraic data types can be one-of:</p> +<p>The <code>type</code> keyword is used to declare aliases to custom data types. These types are <em>algebraic</em>: they function by <em>composition</em>. Such <em>algebraic data types</em> can be one-of:</p> <ul> <li><code>struct</code>: An unordered, named collection of types. May have default values.</li> <li><code>tuple</code>: An ordered collection of types. Optionally named.</li> <li><code>enum</code>: Ordinal labels, that may hold values. Their default values are their ordinality.</li> <li><code>union</code>: Powerful matchable tagged unions a la Rust. Sum types.</li> -<li><code>interface</code>: Implicit typeclasses. User-defined duck typing.</li> +<li><code>class</code>: Implicit type classes. User-defined duck typing.</li> </ul> -<p>There also exist <code>distinct</code> types: while <code>type</code> declarations define an alias to an existing or new type, <code>distinct</code> types define a type that must be explicitly converted to/from. This is useful for having some level of separation from the implicit interfaces that abound.</p> +<p>All functions defined on the original type carry over. If this is not desired, the newtype paradigm is preferred: declaring a single-field <code>struct</code> and copying function declarations over.</p> +<p>Types may be explicitly to and from via the <code>Coerce</code> and <code>Convert</code> classes and provided <code>from</code> and <code>to</code> functions.</p> <h3 id="structs"><a class="header" href="#structs">structs</a></h3> <p>Structs are an <em>unordered</em> collection of named types.</p> -<p>They are declared with <code>struct[identifier: Type, ...]</code> and initialized with brackets: <code>{field: "value", another: 500}</code>.</p> -<pre><code class="language-puck">type LinkedNode[T] = struct - previous, next: Option[ref LinkedNode[T]] +<p>They are declared with <code>struct[identifier: Type, ...]</code> and initialized with brackets: <code>{ field = "value", another = 500}</code>. Structs are <em>structural</em>: while the type system is fundamentally nominal, and different type declarations are treated as distinct, a struct object initialized with <code>{}</code> is usable in any context that expects a struct with the same fields.</p> +<pre><code class="language-puck">type LinkedNode[T] = ref struct + previous: Option[LinkedNode[T]] + next: Option[LinkedNode[T]] data: T -let node = { - previous: None, next: None - data: 413 +let node = { # inferred type: LinkedNode[int], from prints_data call + previous = None, next = None + data = 413 } func pretty_print(node: LinkedNode[int]) = print node.data - if node.next of Some(node): + if node.next of Some(node) then node.pretty_print() # structural typing! prints_data(node) </code></pre> -<p>Structs are <em>structural</em> and so structs composed entirely of fields with the same signature (identical in name and type) are considered <em>equivalent</em>. -This is part of a broader structural trend in the type system, and is discussed in detail in the section on <a href="TYPES.html#subtyping">subtyping</a>.</p> <h3 id="tuples"><a class="header" href="#tuples">tuples</a></h3> <p>Tuples are an <em>ordered</em> collection of either named and/or unnamed types.</p> -<p>They are declared with <code>tuple[Type, identifier: Type, ...]</code> and initialized with parentheses: <code>(413, "hello", value: 40000)</code>. Syntax sugar allows for them to be declared with <code>()</code> as well.</p> -<p>They are exclusively ordered - named types within tuples are just syntax sugar for positional access. Passing a fully unnamed tuple into a context that expects a tuple with a named parameter is allowed so long as the types line up in order.</p> +<p>They are declared with <code>tuple[Type, identifier: Type, ...]</code> and initialized with parentheses: <code>(413, "hello", value: 40000)</code>. Syntactic sugar allows for them to be declared with <code>()</code> as well.</p> +<p>They are exclusively ordered - named types within tuples are just syntactic sugar for positional access. Passing a fully unnamed tuple into a context that expects a tuple with a named parameter is allowed (so long as the types line up).</p> <pre><code class="language-puck">let grouping = (1, 2, 3) -func foo: tuple[string, string] = ("hello", "world") +func foo: tuple[str, str] = ("hello", "world") +dbg grouping.foo # prints '("hello", "world")' + +func bar(a: (str, str)) = a.1 +dbg grouping.bar # prints '"world"' </code></pre> -<p>Tuples are particularly useful for "on-the-fly" types. Creating type aliases to tuples is discouraged - structs are generally a better choice for custom type declarations.</p> +<p>Tuples are particularly useful for "on-the-fly" types. Creating type declarations to tuples is discouraged - structs are generally a better choice, as they are fully named, support default values, and may have their layout optimized by the compiler.</p> <h3 id="enums"><a class="header" href="#enums">enums</a></h3> <p>Enums are <em>ordinal labels</em> that may have <em>associated values</em>.</p> -<p>They are declared with <code>enum[Label, AnotherLabel = 4, ...]</code> and are never initialized (their values are known statically). -Enums may be accessed directly by their label, and are ordinal and iterable regardless of their associated value. They are useful in collecting large numbers of "magic values", that would otherwise be constants.</p> +<p>They are declared with <code>enum[Label, AnotherLabel = 4, ...]</code> and are never initialized (their values are known statically). Enums may be accessed directly by their label, and are ordinal and iterable regardless of their associated value. They are useful in collecting large numbers of "magic values" that would otherwise be constants.</p> <pre><code class="language-puck">type Keys = enum Left, Right, Up, Down - A = "a" - B = "b" + A = "a" + B = "b" </code></pre> <p>In the case of an identifier conflict (with other enum labels, or types, or...) they must be prefixed with the name of their associated type (separated by a dot). This is standard for identifier conflicts: and is discussed in more detail in the <a href="MODULES.html">modules document</a>.</p> <h3 id="unions"><a class="header" href="#unions">unions</a></h3> @@ -822,83 +1103,90 @@ type Expr = ref union func eval(context: mut HashTable[Ident, Value], expr: Expr): Result[Value] match expr - of Literal(value): Okay(value) - of Variable(ident): - context.get(ident).err("Variable not in context") - of Application(body, arg): + of Literal(value) then Okay(value) + of Variable(ident) then + context.get(ident).err("Variable not in context") + of Application(body, arg) then if body of Abstraction(param, body as inner_body): context.set(param, context.eval(arg)?) # from std.tables context.eval(inner_body) - else: - Error("Expected Abstraction, found {}".fmt(body)) + else + Error("Expected Abstraction, found {}".fmt(body)) of Conditional(condition, then_case, else_case): - if context.eval(condition)? == "true": + if context.eval(condition)? == "true" then context.eval(then_case) else: context.eval(else_case) - of expr: - Error("Invalid expression {}".fmt(expr)) + of expr then + Error("Invalid expression {}".fmt(expr)) </code></pre> <p>The match statement takes exclusively a list of <code>of</code> sub-expressions, and checks for exhaustivity. The <code>expr of Type(binding)</code> syntax can be reused as a conditional, in <code>if</code> statements and elsewhere.</p> <p>The <code>of</code> <em>operator</em> is similar to the <code>is</code> operator in that it queries type equality, returning a boolean. However, unbound identifiers within <code>of</code> expressions are bound to appropriate values (if matched) and injected into the scope. This allows for succinct handling of <code>union</code> types in situations where <code>match</code> is overkill.</p> -<p>Each branch of a match expression can also have a <em>guard</em>: an arbitrary conditional that must be met in order for it to match. Guards are written as <code>where cond</code> and immediately follow the last pattern in an <code>of</code> branch, preceding the colon.</p> -<h3 id="interfaces"><a class="header" href="#interfaces">interfaces</a></h3> -<p>Interfaces can be thought of as analogous to Rust's traits, without explicit <code>impl</code> blocks and without need for the <code>derive</code> macro. Types that have functions fulfilling the interface requirements implicitly implement the associated interface.</p> -<p>The <code>interface</code> type is composed of a list of function signatures that refer to the special type <code>Self</code> that must exist for a type to be valid. The special type <code>Self</code> is replaced with the concrete type at compile time in order to typecheck. They are declared with <code>interface[signature, ...]</code>.</p> -<pre><code class="language-puck">type Stack[T] = interface +<p>Each branch of a match expression can also have a <em>guard</em>: an arbitrary conditional that must be met in order for it to match. Guards are written as <code>where cond</code> and immediately follow the last pattern in an <code>of</code> branch, preceding <code>then</code>.</p> +<h3 id="classes-1"><a class="header" href="#classes-1">classes</a></h3> +<p>Classes can be thought of as analogous to Rust's traits: without explicit <code>impl</code> blocks and without need for the <code>derive</code> macro. Types that have functions defined on them fulfilling the class requirements implicitly implement the associated class.</p> +<p>The <code>class</code> type is composed of a list of function signatures that refer to the special type <code>Self</code> that must exist for a type to be valid. The special type <code>Self</code> is replaced with the concrete type at compile time in order to typecheck. They are declared with <code>class[signature, ...]</code>.</p> +<pre><code class="language-puck">type Stack[T] = class push(self: mut Self, val: T) pop(self: mut Self): T - peek(self: Self): T + peek(self: lent Self): lent T func takes_any_stack(stack: Stack[int]) = - # only stack.push, stack.pop, and stack.peek are available methods -</code></pre> -<p>Differing from Rust, Haskell, and many others, there is no explicit <code>impl</code> block. If there exist functions for a type that satisfy all of an interface's signatures, it is considered to match and the interface typechecks. This may seem strange and ambiguous - but again, static typing and uniform function call syntax help make this a more reasonable design. The purpose of explicit <code>impl</code> blocks in ex. Rust is three-fold: to provide a limited form of uniform function call syntax; to explicitly group together associated code; and to disambiguate. UFCS provides for the first, the module system provides for the second, and the third is proposed to not matter.</p> -<p>Interfaces cannot be constructed because they are <strong>unsized</strong>. They serve purely as a list of valid operations on a type within a context: no information about their memory layout is relevant. The concrete type fulfilling an interface is known at compile time, however, and so there are no issues surrounding interfaces as parameters, just when attempted to be used as (part of) a concrete type. They can be used as part of a concrete type with <em>indirection</em>, however: <code>type Foo = struct[a: int, b: ref interface[...]]</code> is perfectly valid.</p> -<p>Interfaces also <em>cannot</em> extend or rely upon other interfaces in any way. There is no concept of an interface extending an interface. There is no concept of a parameter satisfying two interfaces. In the author's experience, while such constructions are powerful, they are also an immense source of complexity, leading to less-than-useful interface hierarchies seen in languages like Java, and yes, Rust.</p> -<p>Instead, if one wishes to form an interface that <em>also</em> satisfies another interface, they must include all of the other interface's associated functions within the new interface. Given that interfaces overwhelmingly only have a handful of associated functions, and if you're using more than one interface you <em>really</em> should be using a concrete type, the hope is that this will provide explicitness.</p> -<!-- While functions are the primary way of performing operations on types, they are not the only way, and listing all explicitly can be painful - instead, it can be desired to be able to *associate a type* and any field access or existing functions on that type with the interface. todo: i have not decided on the syntax for this yet. --> -<p>Interfaces compose with <a href="MODULES.html">modules</a> to offer fine grained access control.</p> -<!-- todo: I have not decided whether the names of parameters is / should be relevant, or enforcable, or present. I'm leaning towards them not being present. But if they are enforcable, it makes it harder to implicitly implement the wrong interface. Design notes to consider: https://blog.rust-lang.org/2015/05/11/traits.html --> -<h3 id="type-aliases-and-distinct-types"><a class="header" href="#type-aliases-and-distinct-types">type aliases and distinct types</a></h3> -<p>Any type can be declared as an <em>alias</em> to a type simply by assigning it to such. All functions defined on the original type carry over, and functions expecting one type may receive the other with no issues.</p> -<pre><code class="language-puck">type Float = float -</code></pre> -<p>It is no more than an alias. When explicit conversion between types is desired and functions carrying over is undesired, <code>distinct</code> types may be used.</p> -<pre><code class="language-puck">type MyFloat = distinct float -let foo: MyFloat = MyFloat(192.68) -</code></pre> -<p>Types then must be explicitly converted via constructors.</p> + # only stack.push, stack.pop, and stack.peek are available, regardless of the concrete type passed +</code></pre> +<p>Differing from Rust, Haskell, and many others, there is no explicit <code>impl</code> block. If there exist functions for a type that satisfy all of a class's signatures, it is considered to match and the class typechecks. This may seem strange and ambiguous - but again, static typing and uniform function call syntax help make this a more reasonable design. The purpose of explicit <code>impl</code> blocks in ex. Rust is three-fold: to provide a limited form of uniform function call syntax; to explicitly group together associated code; and to disambiguate. UFCS provides for the first, the module system provides for the second, and type-based disambiguation provides for the third, with such information exposed to the user via the language server protocol.</p> +<pre><code class="language-puck">type Set[T] = class + in(lent Self, T): bool + add(mut Self, T) + remove(mut Self, T): Option[T] + +type Foo = struct + a: int + b: ref Set[int] # indirection: now perfectly valid +</code></pre> +<p>Classes cannot be constructed, as they are <strong>unsized</strong>. They serve purely as a list of valid operations on a type: no information about their memory layout is relevant. The <em>concrete type</em> fulfilling a class is known at compile time, however, and so there are no issues surrounding the use of classes as parameters, just when attempted to be used as (part of) a concrete type in ex. a struct. They can be used with <em>indirection</em>, however: as references are sized (consisting of a memory address).</p> +<pre><code class="language-puck">## The Display class. Any type implementing `str` is printable. +## Any type that is Display must necessarily also implement Debug. +pub type Display = class + str(Self): str + dbg(Self): str + +## The Debug class. Broadly implemented for every type with compiler magic. +## Types can (and should) override the generic implementations. +pub type Debug = class + dbg(Self): str +</code></pre> +<p>Classes also <em>cannot</em> extend or rely upon other classes in any way, nor is there any concept of a parameter satisfying two classes. In the author's experience, while such constructions are powerful, they are also an immense source of complexity, leading to less-than-useful hierarchies seen in languages like Java, and yes, Rust. Instead, if one wishes to form an class that <em>also</em> satisfies another class, they must name a new class that explicitly includes all of the other class's associated functions. Given that classes in Puck overwhelmingly only have a small handful of associated functions, and if you're using more than one class you <em>really</em> should be using a concrete type: the hope is that this will provide for explicitness and reduce complexity.</p> +<p>Classes compose well with <a href="MODULES.html">modules</a> to offer fine grained access control.</p> <h2 id="errata"><a class="header" href="#errata">Errata</a></h2> <h3 id="default-values"><a class="header" href="#default-values">default values</a></h3> <p>Puck does not have any concept of <code>null</code>: all values <em>must</em> be initialized. -But always explicitly initializing types is syntactically verbose, and so most types have an associated "default value".</p> +But always explicitly initializing types is syntactically verbose, and so most types have an associated "default value".</p> <p><strong>Default values</strong>:</p> <ul> <li><code>bool</code>: <code>false</code></li> <li><code>int</code>, <code>uint</code>, etc: <code>0</code></li> <li><code>float</code>, etc: <code>0.0</code></li> <li><code>char</code>: <code>'\0'</code></li> -<li><code>str</code>: <code>""</code></li> +<li><code>str</code>: <code>""</code></li> <li><code>void</code>, <code>never</code>: unconstructable</li> <li><code>array[T]</code>, <code>list[T]</code>: <code>[]</code></li> <li><code>set[T]</code>, <code>table[T, U]</code>: <code>{}</code></li> <li><code>tuple[T, U, ...]</code>: <code>(default values of its fields)</code></li> <li><code>struct[T, U, ...]</code>: <code>{default values of its fields}</code></li> -<li><code>enum[One, Two, ...]</code>: <code><first label></code></li> +<li><code>enum[One, Two, ...]</code>: <strong>disallowed</strong></li> <li><code>union[T, U, ...]</code>: <strong>disallowed</strong></li> <li><code>slice[T]</code>, <code>func</code>: <strong>disallowed</strong></li> -<li><code>ref</code>, <code>ptr</code>: <strong>disallowed</strong></li> +<li><code>ref</code>, <code>refc</code>, <code>ptr</code>: <strong>disallowed</strong></li> </ul> -<p>For unions, slices, references, and pointers, this is a bit trickier. They all have no reasonable "default" for these types <em>aside from</em> null. +<p>For unions, slices, references, and pointers, this is a bit trickier. They all have no reasonable "default" for these types <em>aside from</em> null. Instead of giving in, the compiler instead disallows any non-initializations or other cases in which a default value would be inserted.</p> <p>todo: consider user-defined defaults (ex. structs)</p> <h3 id="signatures-and-overloading"><a class="header" href="#signatures-and-overloading">signatures and overloading</a></h3> -<p>Puck supports <em>overloading</em> - that is, there may exist multiple functions, or multiple types, or multiple modules, so long as they have the same <em>signature</em>. -The signature of a function / type / module is important. Interfaces, among other constructs, depend on the user having some understanding of what the compiler considers to be a signature. -So, it is stated here explicitly:</p> +<p>Puck supports <em>overloading</em> - that is, there may exist multiple functions, or multiple types, or multiple modules, with the same name - so long as they have a different <em>signature</em>. +The signature of a function/type/module is important. Classes, among other constructs, depend on the user having some understanding of what the compiler considers to be a signature. So we state it here explicitly:</p> <ul> -<li>The signature of a function is its name and the <em>types</em> of each of its parameters, in order. Optional parameters are ignored. Generic parameters are ??? +<li>The signature of a function is its name and the <em>types</em> of each of its parameters, in order, ignoring optional parameters. Generic parameters are ??? <ul> <li>ex. ...</li> </ul> @@ -910,144 +1198,136 @@ So, it is stated here explicitly:</p> </li> <li>The signature of a module is just its name. This may change in the future.</li> </ul> -<h3 id="subtyping"><a class="header" href="#subtyping">subtyping</a></h3> +<h3 id="structural-subtyping"><a class="header" href="#structural-subtyping">structural subtyping</a></h3> <p>Mention of subtyping has been on occasion in contexts surrounding structural type systems, particularly the section on distinct types, but no explicit description of what the subtyping rules are have been given.</p> -<p>Subtyping is the implicit conversion of compatible types, usually in a one-way direction. The following types are implicitly convertible:</p> -<ul> -<li><code>uint</code> ==> <code>int</code></li> -<li><code>int</code> ==> <code>float</code></li> -<li><code>uint</code> ==> <code>float</code></li> -<li><code>string</code> ==> <code>list[char]</code> (the opposite no, use <code>pack</code>)</li> -<li><code>array[T; n]</code> ==> <code>list[T]</code></li> -<li><code>struct[a: T, b: U, ...]</code> ==> <code>struct[a: T, b: U]</code></li> -<li><code>union[A: T, B: U]</code> ==> <code>union[A: T, B: U, ...]</code></li> -</ul> -<h3 id="inheritance"><a class="header" href="#inheritance">inheritance</a></h3> -<p>Puck is not an object-oriented language. Idiomatic design patterns in object-oriented languages are harder to accomplish and not idiomatic here.</p> -<p>But, Puck has a number of features that somewhat support the object-oriented paradigm, including:</p> -<ul> -<li>uniform function call syntax</li> -<li>structural typing / subtyping</li> -<li>interfaces</li> -</ul> -<pre><code class="language-puck">type Building = struct - size: struct[length, width: uint] - color: enum[Red, Blue, Green] - location: tuple[longitude, latitude: float] - -type House = struct - size: struct[length, width: uint] - color: enum[Red, Blue, Green] - location: tuple[longitude, latitude: float] - occupant: str - -func init(_: type[House]): House = - { size: {length, width: 500}, color: Red - location: (0.0, 0.0), occupant: "Barry" } - -func address(building: Building): str = - let number = int(building.location.0 / building.location.1).abs - let street = "Logan Lane" - return number.str & " " & street - -# subtyping! methods! -print House.init().address() - -func address(house: House): str = - let number = int(house.location.0 - house.location.1).abs - let street = "Logan Lane" - return number.str & " " & street - -# overriding! (will warn) -print address(House.init()) - -# abstract types! inheritance! -type Addressable = interface for Building - func address(self: Self) -</code></pre> -<p>These features may <em>compose</em> into code that closely resembles its object-oriented counterpart. But make no mistake! Puck is static first and functional somewhere in there: dynamic dispatch and the like are not accessible (currently).</p> <div style="break-before: page; page-break-before: always;"></div><h1 id="modules-and-namespacing"><a class="header" href="#modules-and-namespacing">Modules and Namespacing</a></h1> <blockquote> <p>! This section is <strong>incomplete</strong>. Proceed with caution.</p> </blockquote> <p>Puck has a first-class module system, inspired by such expressive designs in the ML family.</p> -<h2 id="using-modules"><a class="header" href="#using-modules">Using Modules</a></h2> -<pre><code class="language-puck"></code></pre> -<p>Modules package up code for use by others. Identifiers known at compile time may be part of a <em>module signature</em>: these being constants, functions, macros, types, and other modules themselves. They may be made accessible to external users by prefixing them with the <code>pub</code> keyword. Files are modules, named with their filename. The <code>mod</code> keyword followed by an identifier and an indented block of code explicitly defines a module, inside of the current module. Modules are first class: they may be bound to constants (having the type <code>: mod</code>) and publicly exported, or bound to local variables and passed into functions for who knows what purpose.</p> -<p>The <code>use</code> keyword lets you use other modules. The <code>use</code> keyword imports public symbols from the specified module into the current scope <em>unqualified</em>. This runs contrary to expectations coming from most other languages: from Python to Standard ML, the standard notion of an "import" usually puts the imported symbols behind another symbol to avoid "polluting the namespace". As Puck is strongly typed and allows overloading, however, the author sees no reason for namespace pollution to be of concern. These unqualified imports have the added benefit of making uniform function call syntax more widely accessible. It is inevitable that identifier conflicts will exist on occasion, of course: when this happens, the compiler will force qualification (this then does restrict uniform function call syntax).</p> -<pre><code class="language-puck"></code></pre> +<h2 id="what-are-modules"><a class="header" href="#what-are-modules">What are modules?</a></h2> +<pre><code class="language-puck">pub mod stack = + pub type Stack[T] = class + init(static type Self): Stack[T] + push(mut Self, val: T) + pop(mut Self): T? + peek(lent Self): lent T? + + pub mod list = + type ListStack[T] = list[T] + + pub func init[T](self: static type ListStack[T]): Stack[T] = [] + pub func push[T](self: mut ListStack[T], val: T) = self.push(T) + pub func pop[T](self: mut ListStack[T]): T? = self.pop + pub func peek[T](self: lent ListStack[T]): lent T? = + if self.len == 0 then None else Some(self.last) + +use stack.list + +let a = ListStack[int].init +print a.len # error: unable to access method on private type outside its module + +a.push(5) +print a.pop # Some(5) +</code></pre> +<p>Modules package up code for use by others. Identifiers known at compile time may be part of a module: these being constants, functions, macros, types, and other modules themselves. Such identifiers may be made accessible outside of the module by prefixing them with the <code>pub</code> keyword.</p> +<p>Importantly, <em>files</em> are implicitly modules, public and named with their filename. The <code>mod</code> keyword followed by an identifier and an indented block of code explicitly defines a module, inside of the current module. Modules are first class: they may be bound to constants (having the type <code>: mod</code>) and publicly exported, or bound to local variables and passed into functions for who knows what purpose.</p> +<h2 id="using-modules"><a class="header" href="#using-modules">Using modules</a></h2> +<p>The <code>use</code> keyword lets you use other modules.</p> +<p>The <code>use</code> keyword imports public symbols from the specified module into the current scope <em>unqualified</em>. This runs contrary to expectations coming from most other languages: from Python to Standard ML, the standard notion of an "import" puts the imported symbols behind another symbol to avoid "polluting the namespace". As Puck is strongly typed and allows overloading, however, we see no reason for namespace pollution to be of concern. These unqualified imports have the added benefit of making <em>uniform function call syntax</em> more widely accessible. It is inevitable that identifier conflicts will exist on occasion, of course: when this happens, the compiler will force qualification (this then does restrict uniform function call syntax). We discuss this more later.</p> <p>Nonetheless, if qualification of imports is so desired, an alternative approach is available - binding a module to a constant. Both the standard library and external libraries are available behind identifiers without use of <code>use</code>: <code>std</code> and <code>lib</code>, respectively. (FFI and local modules will likely use more identifiers, but this is not hammered out yet.) A submodule - for example, <code>std.net</code> - may be bound in a constant as <code>const net = std.net</code>, providing all of the modules' public identifiers for use, as fields of the constant <code>net</code>. We will see this construction to be extraordinarily helpful in crafting high-level public APIs for libraries later on.</p> -<p>Multiple modules can be imported at once, i.e. <code>use std.[logs, tests]</code>, <code>use lib.crypto, lib.http</code>. The standard namespaces (<code>std</code>, <code>lib</code>) deserve more than a passing mention. There are several of these: <code>std</code> for the standard library, <code>lib</code> for all external libraries, <code>crate</code> for the top-level namespace of a project (subject to change), <code>this</code> for the current containing module (subject to change)... In addition: there are a suite of <em>language</em> namespaces, for FFI - <code>rust</code>, <code>nim</code>, and <code>swift</code> preliminarily - that give access to libraries from other languages. Recall that imports are unqualified - so <code>use std</code> will allow use of the standard library without the <code>std</code> qualifier (not recommended: several modules have common names), and <code>use lib</code> will dump every library it can find into the global namespace (even less recommended). </p> +<pre><code class="language-puck">use std.[logs, test] +use lib.crypto, lib.http +</code></pre> +<p>Multiple modules can be imported at once. The standard namespaces deserve more than a passing mention. There are several of these: <code>std</code> for the standard library, <code>lib</code> for all external libraries, <code>pkg</code> for the top-level namespace of a project, <code>this</code> for the current containing module... In addition: there are a suite of <em>language</em> namespaces, for FFI - <code>rust</code>, <code>nim</code>, and <code>swift</code> preliminarily - that give access to libraries from other languages. Recall that imports are unqualified - so <code>use std</code> will allow use of the standard library without the <code>std</code> qualifier (not recommended: several modules have common names), and <code>use lib</code> will dump the name of every library it can find into the global namespace (even less recommended).</p> <h2 id="implicit-modules"><a class="header" href="#implicit-modules">Implicit Modules</a></h2> -<p>A major goal of Puck's module system is to allow the same level of expressiveness as the ML family, while cutting down on the extraneous syntax and boilerplate needed to do so. As such, access modifiers are written directly inline with their declaration, and the file system structure is reused to form an implicit module system for internal use. This - particularly the former - <em>limits</em> the structure a module can expose at first glance, but we will see later that interfaces recoup much of this lost specificity.</p> -<p>We mentioned that the filesystem forms an implicit module structure. This begets a couple of design choices. Module names <strong>must</strong> be lowercase, for compatibility with case-insensitive filesystems. Both a file and a folder with the same name can exist. Files within the aforementioned folder are treated as submodules of the aforementioned file. This again restricts the sorts of module structures we can build, but we will again see later that this restriction can be bypassed.</p> -<p>The <code>this</code> and <code>crate</code> modules are useful for this implicit structure...</p> -<h2 id="defining-interfaces"><a class="header" href="#defining-interfaces">Defining Interfaces</a></h2> +<p>A major goal of Puck's module system is to allow the same level of expressiveness as the ML family, while cutting down on the extraneous syntax and boilerplate needed to do so. As such, access modifiers are written directly inline with their declaration, and the file system structure is reused to form an implicit module system for internal use. This - particularly the former - <em>limits</em> the structure a module can expose at first glance, but we will see later that classes recoup much of this lost specificity.</p> +<p>We mentioned that the filesystem forms an implicit module structure. This begets a couple of design choices. Module names <strong>must</strong> be lowercase, for compatibility with case-insensitive filesystems. Both a file and a folder with the same name can exist. Files within the aforementioned folder are treated as submodules of the aforementioned file. This again restricts the sorts of module structures we can build, but we will see later that this restriction can be bypassed.</p> +<p>The <code>this</code> and <code>pkg</code> modules are useful for this implicit structure...</p> +<h2 id="defining-interfaces"><a class="header" href="#defining-interfaces">Defining interfaces</a></h2> <p>...</p> -<h2 id="defining-an-external-api"><a class="header" href="#defining-an-external-api">Defining an External API</a></h2> +<h2 id="defining-an-external-api"><a class="header" href="#defining-an-external-api">Defining an external API</a></h2> <p>The filesystem provides an implicit module structure, but it may not be the one you want to expose to users.</p> <p>...</p> -<div style="break-before: page; page-break-before: always;"></div><h1 id="error-handling"><a class="header" href="#error-handling">Error Handling</a></h1> -<p>Puck's error handling is shamelessly stolen from Swift. It uses a combination of <code>Option</code>/<code>Result</code> types and <code>try</code>/<code>catch</code> statements, and leans somewhat on Puck's metaprogramming capabilities.</p> -<p>There are several ways to handle errors in Puck. If the error is encoded in the type, one can:</p> +<div style="break-before: page; page-break-before: always;"></div><h1 id="error-handling-1"><a class="header" href="#error-handling-1">Error Handling</a></h1> +<p>Puck's error handling is heavily inspired syntactically by Swift and semantically by the underlying effects system. It uses a combination of monadic error handling and effectful error propagation, with much in the way of syntactic sugar for conversion between the two, and leans somewhat heavily on Puck's metaprogramming capabilities. In comparison to Rust, it is considerably more dynamic by default.</p> +<p>There are several ways to handle errors in Puck. If the error is encoded in the type (as an <code>Option</code> or <code>Result</code> type), one can:</p> <ol> <li><code>match</code> on the error</li> <li>compactly match on the error with <code>if ... of</code></li> <li>propagate the error with <code>?</code></li> <li>throw the error with <code>!</code></li> </ol> -<p>If an error is thrown, one <strong>must</strong> explicitly handle (or disregard) it with a <code>try/catch</code> block or risk runtime failure. This method of error handling may feel more familiar to Java programmers.</p> -<h2 id="errors-as-monads"><a class="header" href="#errors-as-monads">Errors as Monads</a></h2> -<p>Puck provides <a href="std/default/options.pk"><code>Option[T]</code></a> and a <a href="std/default/results.pk"><code>Result[T, E]</code></a> types, imported by default. These are <code>union</code> types and so must be pattern matched upon to be useful: but the standard library provides <a href="std/default/results.pk">a bevy of helper functions</a>. +<p>If the error is thrown (encoded as an effect), one can:</p> +<ol> +<li>ignore the error, propagating it up the call stack</li> +<li>recover from the error in a <code>try</code> block</li> +<li>convert the error to a <code>Result[T]</code> (monadic form)</li> +</ol> +<p>If an error is thrown, one <em>must</em> explicitly handle it at some level of the stack, or risk runtime failure. This method of error handling may feel more familiar to Java programmers. The compiler will warn on - but not enforce catching - such unhandled errors.</p> +<h2 id="errors-as-monads"><a class="header" href="#errors-as-monads">Errors as monads</a></h2> +<p>Puck provides <a href="std/default/options.pk"><code>Option[T]</code></a> and a <a href="std/default/results.pk"><code>Result[T, E]</code></a> types, imported by default. These are <code>union</code> types under the hood and so must be pattern matched upon to be useful: but the standard library provides <a href="std/default/results.pk">a bevy of helper functions</a>. Two in particular are of note. The <code>?</code> operator unwraps a Result or propagates its error up a function call (and may only be used in type-appropriate contexts). The <code>!</code> operator unwraps an Option or Result directly or throws an exception in the case of None or Error.</p> -<pre><code class="language-puck">pub macro `?`[T, E](self: Result[T, E]) = - quote: +<pre><code class="language-puck">pub macro ?[T, E](self: Result[T, E]) = + quote match `self` - of Okay(x): x - of Error(e): return Error(e) + of Okay(x) then x + of Error(e) then return Error(e) </code></pre> -<pre><code class="language-puck">pub func `!`[T](self: Option[T]): T = +<pre><code class="language-puck">pub func ![T](self: Option[T]): T = match self - of Some(x): x - of None: raise EmptyValue - -pub func `!`[T, E](self: Result[T, E]): T = - of Okay(x): x - of Error(e): raise e -</code></pre> -<p>The utility of the provided helpers in <a href="std/default/options.pk"><code>std.options</code></a> and <a href="std/default/results.pk"><code>std.results</code></a> should not be understated. While encoding errors into the type system may appear restrictive at first glance, some syntactic sugar goes a long way in writing compact and idiomatic code. Java programmers in particular are urged to give type-first errors a try, before falling back on unwraps and <code>try</code>/<code>catch</code>.</p> -<p>A notable helpful type is the aliasing of <code>Result[T]</code> to <code>Result[T, ref Err]</code>, for when the particular error does not matter. This breaks <code>try</code>/<code>catch</code> exhaustion (as <code>ref Err</code> denotes a reference to <em>any</em> Error), but is particularly useful when used in conjunction with the propagation operator.</p> -<h2 id="errors-as-catchable-exceptions"><a class="header" href="#errors-as-catchable-exceptions">Errors as Catchable Exceptions</a></h2> -<p>Errors raised by <code>raise</code>/<code>throw</code> (or subsequently the <code>!</code> operator) must be explicitly caught and handled via a <code>try</code>/<code>catch</code>/<code>finally</code> statement. -If an exception is not handled within a function body, the function must be explicitly marked as a throwing function via the <code>yeet</code> prefix (name to be determined). The compiler will statically determine which exceptions in particular are thrown from any given function, and enforce them to be explicitly handled or explicitly ignored.</p> -<p>Despite functioning here as exceptions: errors remain types. An error thrown from an unwrapped <code>Result[T, E]</code> is of type <code>E</code>. <code>catch</code> statements, then, may pattern match upon possible errors, behaving similarly to <code>of</code> branches.</p> -<pre><code class="language-puck">try: - ... -catch "Error": - ... -finally: - ... + of Some(x) then x + of None then raise "empty value" + +pub func ![T, E](self: Result[T, E]): T = + match self + of Okay(x) then x + of Error(e) then raise e </code></pre> -<p>This creates a distinction between two types of error handling, working in sync: functional error handling with <a href="https://en.wikipedia.org/wiki/Option_type">Option</a> and <a href="https://en.wikipedia.org/wiki/Result_type">Result</a> types, and object-oriented error handling with <a href="https://en.wikipedia.org/wiki/Exception_handling">catchable exceptions</a>. These styles may be swapped between with minimal syntactic overhead. Libraries, however, should universally use <code>Option</code>/<code>Result</code>, as this provides the best support for both styles.</p> -<!-- [nullable types](https://en.wikipedia.org/wiki/Nullable_type)?? --> -<h2 id="errors-and-void-functions"><a class="header" href="#errors-and-void-functions">Errors and Void Functions</a></h2> -<p>Some functions do not return a value but can still fail: for example, setters. -This can make it difficult to do monadic error handling elegantly: one could return a <code>Result[void, E]</code>, but...</p> -<pre><code class="language-puck">pub func set[T](self: list[T], i: uint, val: T) = - if i > self.length: +<p>The utility of the provided helpers in <a href="std/default/options.pk"><code>std.options</code></a> and <a href="std/default/results.pk"><code>std.results</code></a> should not be understated. While encoding errors into the type system may appear restrictive at first glance, some syntactic sugar goes a long way in writing compact and idiomatic code. Java programmers in particular are urged to give type-first errors a try, before falling back on unwraps and <code>try</code>/<code>with</code>.</p> +<p>A notable helpful type is the aliasing of <code>Result[T]</code> to <code>Result[T, ref Err]</code>, for when the particular error does not matter. This breaks <code>match</code> exhaustion (as <code>ref Err</code> denotes a reference to <em>any</em> Error), but is particularly useful when used in conjunction with the propagation operator.</p> +<h2 id="errors-as-checked-exceptions"><a class="header" href="#errors-as-checked-exceptions">Errors as checked exceptions</a></h2> +<p>Some functions do not return a value but can still fail: for example, setters. This can make it difficult to do monadic error handling elegantly. One could return a <code>type Success[E] = Result[void, E]</code>, but such an approach is somewhat inelegant. Instead: we treat an <code>assert</code> within a function as having an <em>effect</em>: a possible failure, that can be handled and recovered from at any point in the call stack. If a possible exception is not handled within a function body, the function is implicitly marked by the compiler as throwing that exception.</p> +<pre><code class="language-puck">pub type list[T] = struct + data: ptr T + capacity: uint + length: uint + +@[safe] +pub func set[T](self: list[T], i: uint, val: T) = + if i > self.length then raise IndexOutOfBounds - self.data.raw_set(offset = i, val) + self.data.set(offset = i, val) + +var foo = ["Hello", "world"] +foo.set(0, "Goodbye") # set can panic +# this propagates an IndexOutOfBounds effect up the call stack. </code></pre> -<h2 id="unrecoverable-exceptions"><a class="header" href="#unrecoverable-exceptions">Unrecoverable Exceptions</a></h2> +<p>Despite functioning here as exceptions: errors remain types. An error thrown from an unwrapped <code>Result[T, E]</code> is of type <code>E</code>. <code>with</code> statements, then, may pattern match upon possible errors, behaving semantically and syntactically similarly to <code>of</code> branches: though notably not requiring exhaustion.</p> +<pre><code class="language-puck">try + foo.set(0, "Goodbye") +with IndexOutOfBounds(index) then + dbg "Index out of bounds at {}".fmt(index) + panic +finally + ... +</code></pre> +<p>This creates a distinction between two types of error handling, working in sync: functional error handling with <a href="https://en.wikipedia.org/wiki/Option_type">Option</a> and <a href="https://en.wikipedia.org/wiki/Result_type">Result</a> types, and <a href="https://en.wikipedia.org/wiki/Exception_handling">object-oriented error handling</a> with <a href="...">algebraic effects</a>. These styles may be swapped between with minimal syntactic overhead. It is up to libraries to determine which classes of errors are exceptional and best given the effect treatment and which should be explicitly handled monadically. Libraries should tend towards using <code>Option</code>/<code>Result</code> as this provides the best support for both styles (thanks to the <code>!</code> operator).</p> +<h2 id="unrecoverable-exceptions"><a class="header" href="#unrecoverable-exceptions">Unrecoverable exceptions</a></h2> <p>There exist errors from which a program can not reasonably recover. These are the following:</p> <ul> -<li><code>Assertation Failure</code>: a call to an <code>assert</code> function has returned false at runtime.</li> +<li><code>Assertation Failure</code>: a call to an unhandled <code>assert</code> function has returned false at runtime.</li> <li><code>Out of Memory</code>: the executable is out of memory.</li> <li><code>Stack Overflow</code>: the executable has overflowed the stack.</li> <li>any others?</li> </ul> -<p>They are not recoverable, but the user should be aware of them as possible failure conditions.</p> -<p>References: <a href="https://docs.swift.org/swift-book/documentation/the-swift-programming-language/errorhandling">Error Handling in Swift</a></p> +<p>They are not recoverable, and not handled within the effects system, but the user should be aware of them as possible failure conditions.</p> +<hr /> +<p>References</p> +<ul> +<li><a href="https://docs.swift.org/swift-book/documentation/the-swift-programming-language/errorhandling">Error Handling in Swift</a></li> +<li><a href="https://overreacted.io/algebraic-effects-for-the-rest-of-us/">Algebraic Effects for the rest of us</a></li> +</ul> <div style="break-before: page; page-break-before: always;"></div><h1 id="asynchronous-programming"><a class="header" href="#asynchronous-programming">Asynchronous Programming</a></h1> <blockquote> <p>! This section is a <strong>draft</strong>. Many important details have yet to be ironed out.</p> @@ -1065,12 +1345,12 @@ let c: T = await async fetch_html() ... todo ... </code></pre> <pre><code class="language-puck">pub func await[T](self: Future[T]): T = - while not self.ready: - block + while not self.ready do + # block self.value! # apply callbacks? </code></pre> <p>This implementation differs from standard async/await implementations quite a bit. -In particular, this means there is no concept of an "async function" - any block of computation that resolves to a value can be made asynchronous. This allows for "anonymous" async functions, among other things.</p> +In particular, this means there is no concept of an "async function" - any block of computation that resolves to a value can be made asynchronous. This allows for "anonymous" async functions, among other things.</p> <p>This (packaging up blocks of code to suspend and resume arbitrarily) is <em>hard</em>, and requires particular portable intermediate structures out of the compiler. Luckily, Zig is doing all of the R&D here. Some design decisions to consider revolve around <em>APIs</em>. The Linux kernel interface (among other things) provides both synchronous and asynchronous versions of its API, and fast code will use one or the other, depending if it is in an async context. Zig works around this by way of a known global constant that low-level functions read at compile time to determine whether to operate on synchronous APIs or asynchronous APIs. This is... not great. But what's better?</p> <!-- Asynchronous programming is hard to design and hard to use. Even Rust doesn't do a great job. It *shouldn't* need built-in language support - we should be able to encode it as a type and provide any special syntax via macros. Note that async is not just threading! threading is solved well by Rust's rayon and Go's (blugh) goroutines. --> <h2 id="threading"><a class="header" href="#threading">Threading</a></h2> @@ -1079,7 +1359,7 @@ In particular, this means there is no concept of an "async function" - <p>References:</p> <ul> <li><a href="https://journal.stuffwithstuff.com/2015/02/01/what-color-is-your-function/">What color is your function?</a></li> -<li><a href="https://kristoff.it/blog/zig-colorblind-async-await/">What is Zig's "colorblind" async/await?</a></li> +<li><a href="https://kristoff.it/blog/zig-colorblind-async-await/">What is Zig's "colorblind" async/await?</a></li> <li><a href="https://ziglearn.org/chapter-5/">Zig Learn: Async</a></li> <li><a href="https://morestina.net/blog/1686/rust-async-is-colored">Rust async is colored and that's not a big deal</a></li> <li><a href="https://old.reddit.com/r/elixir/np688d/">Why is there no need for async/await in Elixir?</a></li> @@ -1091,78 +1371,91 @@ In particular, this means there is no concept of an "async function" - </ul> <p>Is async worth having separate from effect handlers? I think so...</p> <div style="break-before: page; page-break-before: always;"></div><h1 id="metaprogramming"><a class="header" href="#metaprogramming">Metaprogramming</a></h1> -<p>Puck has rich metaprogramming support, heavily inspired by Nim. Many features that would have to be at the compiler level in most languages (error propagation <code>?</code>, <code>std.fmt.print</code>, <code>async</code>/<code>await</code>) are instead implemented as macros within the standard library.</p> -<p>Macros take in fragments of the AST within their scope, transform them with arbitrary compile-time code, and spit back out transformed AST fragments to be injected and checked for validity. This is similar to what Nim and the Lisp family of languages do. -By keeping an intentionally minimal AST, some things not possible to express in literal code may be expressible in the AST: in particular, bindings can be injected in many places they could not be injected in ordinarily. (A minimal AST also has the benefit of being quite predictable.)</p> -<p>Macros may not change Puck's syntax: the syntax is flexible enough. Code is syntactically checked (parsed), but <em>not</em> semantically checked (typechecked) before being passed to macros. This may change in the future<!-- (to require arguments to be semantically correct)-->. Macros have the same scope as other routines, that is:</p> +<p>Puck has rich metaprogramming support, heavily inspired by Nim. Many features that would have to be at the compiler level in most languages (error propagation <code>?</code>, <code>std.fmt.print</code>, <code>?</code>, <code>!</code>, <code>-></code> type sugar, <code>=></code> closure sugar, <code>async</code>/<code>await</code>) are instead implemented as macros within the standard library.</p> +<p>Macros take in fragments of the AST within their scope, transform them with arbitrary compile-time code, and spit back out transformed AST fragments to be injected and checked for validity. This is similar to what the Lisp family of languages do. It has a number of benefits: there is no separate metaprogramming language, it is syntactically and semantically hygienic, and the underlying framework can be reused for all kinds of compile-time code execution.</p> +<p>By keeping an intentionally minimal AST, some things not possible to express in literal code may be expressible in the AST: in particular, bindings can be injected in many places they could not be injected in ordinarily. (A minimal AST also has the benefit of being quite predictable.)</p> +<p>Macros may not change Puck's syntax: the syntax is flexible enough. They have the same scope as other routines, that is:</p> <p><strong>function scope</strong>: takes the arguments within or following a function call</p> <pre><code class="language-puck">macro print(params: varargs) = - for param in params: - result.add(quote(stdout.write(`params`.str))) + var res = Call("write", [stdout]) + for param in params do + res.params.add(param) print(1, 2, 3, 4) -print "hello", " ", "world", "!" +print "hello", " ", "world", "!" </code></pre> <p><strong>block scope</strong>: takes the expression following a colon as a single argument</p> <pre><code class="language-puck">macro my_macro(body) -my_macro: +my_macro 1 2 3 4 </code></pre> -<p><strong>operator scope</strong>: takes one or two parameters either as a postfix (one parameter) or an infix (two parameters) operator</p> -<pre><code class="language-puck">macro +=(a, b) = - quote: - `a` = `a` + `b` +<p><strong>operator scope</strong>: takes one or two parameters either as an infix (two parameters) or a postfix (one parameter) operator</p> +<pre><code class="language-puck"># operators are restricted to punctuation +macro +=(a, b) = + Call("=", [a, Call("+", [a, b])]) a += b </code></pre> <p>Macros typically take a list of parameters <em>without</em> types, but they optionally may be given a type to constrain the usage of a macro. Regardless: as macros operate at compile time, their parameters are not instances of a type, but rather an <code>Expr</code> expression representing a portion of the <em>abstract syntax tree</em>. Similarly, macros always return an <code>Expr</code> to be injected into the abstract syntax tree despite the usual absence of an explicit return type, but the return type may be specified to additionally typecheck the returned <code>Expr</code>.</p> <pre><code class="language-puck"></code></pre> -<p>As macros operate at compile time, they may not inspect the <em>values</em> that their parameters evaluate to. However, parameters may be marked with <code>static[T]</code>: in which case they will be treated like parameters in functions: as values. (note static parameters may be written as <code>static[T]</code> or <code>static T</code>.) There are many restrictions on what might be <code>static</code> parameters. Currently, it is constrained to literals i.e. <code>1</code>, <code>"hello"</code>, etc, though this will hopefully be expanded to any function that may be evaluated statically in the future.</p> +<p>As macros operate at compile time, they may not inspect the <em>values</em> that their parameters evaluate to. However, parameters may be marked <code>const</code>: in which case they will be treated like parameters in functions: as values. (note constant parameters may be written as <code>const[T]</code> or <code>const T</code>.)</p> <pre><code class="language-puck">macro ?[T, E](self: Result[T, E]) = - quote: - match self - of Okay(x): x - of Error(e): return Error(e) + quote + match `self` + of Okay(x) then x + of Error(e) then return Error(e) func meow: Result[bool, ref Err] = let a = stdin.get()? </code></pre> -<p>The <code>quote</code> macro is special. It takes in literal code and returns that code <strong>as the AST</strong>. Within quoted data, backticks may be used to break out in order to evaluate and inject arbitrary code: though the code must evaluate to an expression of type <code>Expr</code>. <!-- Variables (of type `Expr`) may be *injected* into the literal code by wrapping them in backticks. This reuse of backticks does mean that defining new operators is impossible within quoted code. --></p> +<p>The <code>quote</code> macro is special. It takes in literal code and returns that code <strong>as the AST</strong>. Within quoted data, backticks may be used to break out in order to evaluate and inject arbitrary code: though the code must evaluate to an expression of type <code>Expr</code>. Thus, quoting is <em>structured</em>: one cannot simply quote any arbitrary section. Quoting is very powerful: most macros are implemented using it.</p> <pre><code class="language-puck"></code></pre> <p>The <code>Expr</code> type is available from <code>std.ast</code>, as are many helpers, and combined they provide the construction of arbitrary syntax trees (indeed, <code>quote</code> relies on and emits types of it). It is a <code>union</code> type with its variants directly corresponding to the variants of the internal AST of Puck.</p> <pre><code class="language-puck"></code></pre> -<p>Construction of macros can be difficult: and so several helpers are provided to ease debugging. The <code>Debug</code> and <code>Display</code> interfaces are implemented for abstract syntax trees: <code>dbg</code> will print a representation of the passed syntax tree as an object, and <code>print</code> will print a best-effort representation as literal code. Together with <code>quote</code> and optionally with <code>static</code>, these can be used to quickly get the representation of arbitrary code.</p> +<p>Construction of macros can be difficult: and so several helpers are provided to ease debugging. The <code>Debug</code> and <code>Display</code> interfaces are implemented for abstract syntax trees: <code>dbg</code> will print a representation of the passed syntax tree as an object, and <code>print</code> will print a best-effort representation as literal code. Together with <code>quote</code> and optionally with <code>const</code>, these can be used to quickly get the representation of arbitrary code.</p> <div style="break-before: page; page-break-before: always;"></div><h1 id="interop-with-other-languages"><a class="header" href="#interop-with-other-languages">Interop with Other Languages</a></h1> <blockquote> <p>! This section is a <strong>draft</strong>. Many important details have yet to be ironed out.</p> </blockquote> <p>A major goal of Puck is <em>minimal-overhead language interoperability</em> while maintaining type safety.</p> +<h2 id="the-problems-of-interop"><a class="header" href="#the-problems-of-interop">The problems of interop</a></h2> <p>There are three issues that complicate language interop:</p> <ol> -<li>Conflicting memory management systems, i.e. Boehm GC vs. reference counting</li> -<li>Conflicting type systems, i.e. Python vs. Rust</li> <li>The language of communication, i.e. the C ABI.</li> +<li>Conflicting type systems, i.e. Python vs. Rust</li> +<li>Conflicting memory management systems, i.e. tracing / reference counting vs. ownership</li> </ol> -<p>For the first, Puck uses what amounts to a combination of ownership and reference counting: and thus it is exchangeable in this regard with Nim (same system), Rust (ownership), Swift (reference counting), and many others. (It should be noted that ownership systems are broadly compatible with reference counting systems).</p> -<p>For the second, Puck has a type system of similar capability to that of Rust, Nim, and Swift: and thus interop with those languages should be straightforward for the user. Its type system is strictly more powerful than that of Python or C, and so interop requires additional help. Its type system is equally as powerful as but somewhat orthogonal to Java's, and so interop is a little more difficult.</p> -<p>For the third, Puck is being written at the same time as the crABI ABI spec is in development. crABI promises a C-ABI-compatible, cross-language ABI spec, which would <em>dramatically</em> simplify the task of linking to object files produced by other languages. It is being led by the Rust language team, and both the Nim and Swift teams have expressed interest in it, which bodes quite well for its future.</p> -<p>Languages often focus on interop from purely technical details. This <em>is</em> very important: but typically no thought is given to usability (and often none can be, for necessity of compiler support), and so using foreign function interfaces very much feel like using <em>foreign</em> interfaces. Puck attempts to change that.</p> +<p>For the first, Puck is being written at the same time as the crABI ABI spec is in development. crABI promises a C-ABI-compatible, cross-language ABI spec: which would <em>dramatically</em> simplify the task of linking to object files produced by other languages (so long as languages actually conform to the ABI). It is being led by the Rust language team, and both Nim and Swift developers have expressed interest in it, which bodes quite well for its future.</p> +<p>For the second, Puck has a type system of similar capability to that of Rust, Nim, and Swift: and thus interop with those languages should be a straightforward exchange of types. Its type system is strictly more powerful than that of Python or C, and so interop requires additional help. Its type system is equally as powerful as but somewhat orthogonal to Java's, and so interop will be a little more difficult.</p> +<p>For the third: Puck uses what amounts to a combination of ownership and reference counting: and thus it is exchangeable in this regard with Rust. Nim and Swift, by contrast, use reference counting: which is not directly compatible with ownership, as attempting to use an owned type as a GC'd reference will immediately lead to a use-after-free. Puck may have to explore some form of gradual typing at linking-time to accommodate making its functions available for use. Using functions from GC'd languages, however, is perfectly doable with the <code>refc</code> type: though this may necessitate copying object graphs over the call boundary.</p> +<p>There is additional significant work being put into the use of Wasm as a language runtime. Wasm allows for - among other things - the <em>sharing</em> of garbage collectors, which means that any garbage-collected language compiling to it can simply use the primitive <code>refc</code> type to denote a garbage-collected reference. This does not, however, immediately work off the bat with ownership: as ownership necessitates certain invariants that garbage collection does not preserve. There is active research into fixing this: notably RichWasm, which retrofits a structural type system with ownership atop Wasm. Such extensions necessitate the runtime environment to implement them, however, and so Puck may have to explore some form of gradual typing for the broader Wasm ecosystem.</p> +<h2 id="usability"><a class="header" href="#usability">Usability</a></h2> +<pre><code class="language-puck">use std.io +use rust.os.linux +use nim.os.sleep +... +</code></pre> +<p>Languages often focus on interop from purely technical details. This <em>is</em> very important: but typically little thought is given to usability (and often none can be, for necessity of compiler support), and so using foreign function interfaces very much feel like using <em>foreign</em> function interfaces. Puck attempts to change that.</p> +<pre><code class="language-puck">@[form(this-function)] +pub func this_function() = ... +</code></pre> +<p>A trivial concern is that identifiers are not always the same across languages: for example, in Racket <code>this-function</code> is a valid identifier, while in Puck the <code>-</code> character is disallowed outright. Matters of convention are issues, too: in Puck, <code>snake_case</code> is preferred for functions and <code>PamelCase</code> for types, but this is certainly not always the case. Puck addresses this at an individual level by attributes allowing for rewriting: and at a language level by consistent rewrite rules.</p> <p>...todo...</p> +<hr /> <p>Existing systems to learn from:</p> <ul> <li><a href="https://doc.rust-lang.org/reference/abi.html">The Rust ABI</a></li> -<li>https://www.hobofan.com/rust-interop/</li> +<li><a href="https://www.hobofan.com/rust-interop/">rust-interop</a></li> <li><a href="https://github.com/eqrion/cbindgen">CBindGen</a></li> -<li>https://github.com/chinedufn/swift-bridge</li> -<li>https://kotlinlang.org/docs/native-c-interop.html</li> -<li>https://github.com/crackcomm/rust-lang-interop</li> -<li>https://doc.rust-lang.org/reference/abi.html</li> -<li>https://doc.rust-lang.org/reference/items/functions.html#extern-function-qualifier</li> +<li><a href="https://github.com/chinedufn/swift-bridge">swift-bridge</a></li> +<li><a href="https://kotlinlang.org/docs/native-c-interop.html">Kotlin C interop</a></li> +<li><a href="https://github.com/crackcomm/rust-lang-interop">rust-lang-interop</a></li> +<li><a href="https://doc.rust-lang.org/reference/items/functions.html#extern-function-qualifier">extern in Rust</a></li> <li><a href="https://github.com/yglukhov/nimpy">NimPy</a></li> <li><a href="https://github.com/yglukhov/jnim">JNim</a></li> <li><a href="https://github.com/PMunch/futhark">Futhark</a></li> @@ -1186,22 +1479,6 @@ func meow: Result[bool, ref Err] = </div> - <!-- Livereload script (if served using the cli tool) --> - <script> - const wsProtocol = location.protocol === 'https:' ? 'wss:' : 'ws:'; - const wsAddress = wsProtocol + "//" + location.host + "/" + "__livereload"; - const socket = new WebSocket(wsAddress); - socket.onmessage = function (event) { - if (event.data === "reload") { - socket.close(); - location.reload(); - } - }; - - window.onbeforeunload = function() { - socket.close(); - } - </script> |