aboutsummaryrefslogtreecommitdiff
path: root/docs/SYNTAX.md
blob: 49fdd599762b89a8edcfca543f500edbae49abfe (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
# Syntax: A Casual and Formal Look

...

## A Formal Look

We now shall take a look at a more formal description of Puck's syntax. Syntax rules are described in extended Backus–Naur form (EBNF) - but most rules surrounding whitespace, and scope, and line breaks, are modified to how they would appear after a lexing step (whitespace is removed, line breaks are normalized, scope is switched to use `{` and `}`).

### Identifiers
```
IDENT  ::= LETTER (LETTER | DIGIT | '_')* # todo: support _
LETTER ::= 'A'..'Z' | 'a'..'z' | '\x80'..'\xff' # todo
DIGIT  ::= '0'..'9'
```

### Literals
```
INT_LIT ::= '-'? (DEC_LIT | HEX_LIT | OCT_LIT | BIN_LIT)
BIN_LIT ::= '0b' BIN_DIGIT ('_'? BIN_DIGIT)*
OCT_LIT ::= '0o' OCT_DIGIT ('_'? OCT_DIGIT)*
HEX_LIT ::= '0x' HEX_DIGIT ('_'? HEX_DIGIT)*
DEC_LIT ::= DIGIT ('_'? DIGIT)*
BIN_DIGIT ::= '0'..'1'
OCT_DIGIT ::= '0'..'7'
HEX_DIGIT ::= DIGIT | 'A'..'F' | 'a'..'f'
```

### Operators
```
OPERATOR ::= 'and' | 'or' | 'not' | 'xor' | 'shl' | 'shr' | # todo: more?
             'div' | 'mod' | 'rem' | 'is' | 'isnot' | OPR+
OPR ::= '=' | '+' | '-' | '*' | '/' | '<' | '>' | # todo: more?
        '@' | '$' | '~' | '&' | '%' | '|' |
        '!' | '?' | '^' | '.' | ':' | '\\'
```

### Chars, Strings, and Comments
```
CHAR    ::= '\'' (PRINT - '\'' | '\\\'')* '\''
STRING  ::= SINGLE_LINE_STRING | MULTI_LINE_STRING
COMMENT ::= SINGLE_LINE_COMMENT | MULTI_LINE_COMMENT | EXPRESSION_COMMENT
SINGLE_LINE_STRING  ::= '"' (PRINT - '"' | '\\"')* '"'
MULTI_LINE_STRING   ::= '"""' (PRINT | '\n' | '\r')* '"""'
SINGLE_LINE_COMMENT ::= '#' PRINT*
MULTI_LINE_COMMENT  ::= '#[' (PRINT | '\n' | '\r' | MULTI_LINE_COMMENT)* ']#'
EXPRESSION_COMMENT  ::= '#;' SINGLE_STMT
PRINT ::= LETTER | DIGIT | OPR |
          '"' | '#' | "'" | '(' | ')' | # notably the dual of OPR
          ',' | ';' | '[' | ']' | '_' |
          '`' | '{' | '}' | ' ' | '\t'
```

### Values
```
VALUE ::= INT_LIT | STRING | CHAR | LIST_DECL | ARRAY_DECL | TUPLE_DECL | STRUCT_DECL
LIST_DECL   ::= '[' (EXPR (',' EXPR)*)? ']'
ARRAY_DECL  ::= '[' (EXPR (',' EXPR)*)? ']'
TUPLE_DECL  ::= '(' (IDENT '=')? EXPR (',' (IDENT '=')? EXPR)* ')'
STRUCT_DECL ::= '{' IDENT '=' EXPR (',' IDENT '=' EXPR)* '}'
# note: no union or enum. should struct exist? only in a structural system.
```

### Variables
```
DECL       ::= LET_DECL | VAR_DECL | CONST_DECL | FUNC_DECL | TYPE_DECL
LET_DECL   ::= 'let' GROUP ANNOTATION? '=' EXPR
VAR_DECL   ::= 'var' GROUP ANNOTATION? ('=' EXPR)?
CONST_DECL ::= 'pub'? 'const' GROUP ANNOTATION? '=' EXPR
GROUP      ::= ('(' IDENT (',' IDENT)* ')') | IDENT
```

### Functions
```
FUNC_DECL  ::= SIGNATURE '=' (EXPR | STMT)
SIGNATURE  ::= 'pub'? ('pure' | 'yeet' | IDENT)? 'func' IDENT GENERICS? PARAMETERS?
PARAMETERS ::= '(' (PARAMETER (',' PARAMETER)?)? ')'
PARAMETER  ::= ('var' | 'static')? IDENT ANNOTATION?
GENERICS   ::= '[' IDENT ANNOTATION? (',' IDENT ANNOTATION?)* ']'
ANNOTATION ::= ':' TYPE_DESC
```

### Types
```
TYPE_DECL   ::= 'pub'? 'type' IDENT GENERICS? '=' 'ref'? 'distinct'? TYPE_DESC
TYPE_DESC   ::= TUPLE_TYPE | STRUCT_TYPE | UNION_TYPE | ENUM_TYPE | INTERFACE | IDENT |
                (TYPE_DESC ('|' TYPE_DESC)+)
TUPLE_TYPE  ::= 'tuple' '[' (IDENT ':')? TYPE_DESC (',' (IDENT ':')? TYPE_DESC)* ']'
STRUCT_TYPE ::= 'struct' '[' IDENT ANNOTATION (',' IDENT ANNOTATION)* ']'
UNION_TYPE  ::= 'union'  '[' IDENT ANNOTATION (',' IDENT ANNOTATION)* ']'
ENUM_TYPE   ::= 'enum' '[' IDENT ('=' EXPR)? (',' IDENT ('=' EXPR)?)* ']'
FUNC_TYPE   ::= 'func' GENERICS? PARAMETERS? ANNOTATION?
INTERFACE   ::= 'interface' '[' SIGNATURE (',' SIGNATURE)* ('for' TYPE_DESC)? ']'
```

## Control Flow
```
IF_EXPR    ::= 'if' EXPR '{' EXPR '}' ('elif' EXPR '{' EXPR '}')* 'else' '{' EXPR '}'
IF_STMT    ::= 'if' EXPR '{' STMT '}' ('elif' EXPR '{' STMT '}')* ('else' '{' STMT '}')?
WHEN_EXPR  ::= 'when' EXPR '{' EXPR '}' ('elif' EXPR '{' EXPR '}')* 'else' '{' EXPR '}'
WHEN_STMT  ::= 'when' EXPR '{' STMT '}' ('elif' EXPR '{' EXPR '}')* ('else' '{' STMT '}')?
BLOCK_EXPR ::= 'block' IDENT? '{' EXPR '}'
BLOCK_STMT ::= 'block' IDENT? '{' STMT '}'
MATCH_EXPR ::= 'match' EXPR '{'
               ('case' EXPR ('where' EXPR)? (',' EXPR ('where' EXPR)?)* '{' EXPR '}')+ '}'
MATCH_STMT ::= 'match' EXPR '{'
               ('case' EXPR ('where' EXPR)? (',' EXPR ('where' EXPR)?)* '{' STMT '}')+ '}'
LOOP_STMT  ::= 'loop' '{' STMT '}'
WHILE_STMT ::= 'while' EXPR '{' STMT '}'
FOR_STMT   ::= 'for' GROUP 'in' EXPR '{' STMT '}'
```

## Modules
```
IMPORT_STMT ::= 'import' IDENT_AS?
                ('/' (IDENT_AS | '[' (IDENT_AS (',' IDENT_AS)*)? ']'))*
EXPORT_STMT ::= 'export' IDENT_AS?
                ('/' (IDENT_AS | '[' (IDENT_AS (',' IDENT_AS)*)? ']'))*
MODULE_STMT ::= 'module' IDENT '{' STMT '}'
IDENT_AS    ::= IDENT ('as' IDENT)?
```

## Macros
```
MACRO_FUNC  ::= IDENT '(' EXPR ')'
MACRO_BLOCK ::= IDENT '{' EXPR '}' # todo
```

## Calls, Statements, and Expressions
```
OPERATION   ::= EXPR OPERATOR EXPR
PREFIX      ::= OPERATOR EXPR
SUFFIX      ::= EXPR OPERATOR
APPLICATION ::= IDENT PARAMS? | IDENT EXPR | (APPLICATION | PREFIX | SUFFIX) '.' IDENT PARAMS?
PARAMS      ::= '(' ((IDENT '=')? EXPR (',' (IDENT '=')? EXPR)*)? ')'
```

```
EXPR ::= IF_EXPR | WHEN_EXPR | BLOCK_EXPR | MATCH_EXPR |
         MACRO_FUNC | MACRO_BLOCK |
         APPLICATION | OPERATION | PREFIX | SUFFIX |
         VALUE | (STMT EXPR) # todo
STMT ::= IF_STMT | WHEN_STMT | BLOCK_STMT | MATCH_STMT |
         LOOP_STMT | WHILE_STMT | FOR_STMT |
         IMPORT_STMT | EXPORT_STMT | MODULE_STMT |
         ((APPLICATION | OPERATION | PREFIX | SUFFIX | DECL) ';') | (STMT+ STMT)
```