aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJJ2023-07-18 05:11:40 +0000
committerJJ2023-07-18 06:48:33 +0000
commitfbfeffad5d85b73102a0f4e5ed607c33319f09e6 (patch)
tree6c2a9e53e838cbcc78a3638ca7b9347c88853f0c /src
parent4a3796c1ff54b44b1493f3afed18d0dd71b5f19f (diff)
compiler: implement a borrow checker free tree and begin lexing
Diffstat (limited to 'src')
-rw-r--r--src/lex.rs121
-rw-r--r--src/main.rs8
-rw-r--r--src/tree.rs133
3 files changed, 262 insertions, 0 deletions
diff --git a/src/lex.rs b/src/lex.rs
new file mode 100644
index 0000000..7c0cc21
--- /dev/null
+++ b/src/lex.rs
@@ -0,0 +1,121 @@
+use std::todo;
+use crate::tree::Tree;
+use multipeek::multipeek;
+
+/// **Basic** syntax tokens.
+pub enum Token {
+ Word(String),
+ Lit(String),
+ Sep(char)
+}
+
+/// Parses whitespace-sensitive code into an unambiguous syntax tree.
+/// Also useful for formatting.
+pub fn tokenize(input: &str) -> Tree<Vec<Token>> {
+ use Token::*;
+ let mut indendation_level = 0;
+ let mut buffer = String::new();
+ let mut result = Tree::new(Vec::new());
+ let ctx = result.data(result.root());
+
+ // `char` in rust is four bytes it's fine
+ let mut input = multipeek(input.chars());
+ while let Some(c) = input.next() {
+ match c {
+ ' ' => todo!(),
+ '\n' => todo!(),
+ '\t' => todo!(),
+ '\'' => {
+ ctx.push(Sep('\''));
+ while let Some(x) = input.next() {
+ if x == '\\' {
+ buffer.push(x);
+ continue;
+ } else if x == '\'' {
+ break;
+ } else {
+ buffer.push(x);
+ }
+ }
+ ctx.push(Lit(String::from(&buffer)));
+ ctx.push(Sep('\''));
+ },
+ '"' => { // triple quoted strings and regular strings
+ if input.peek_nth(0) == Some(&'"') &&
+ input.peek_nth(1) == Some(&'"') {
+ input.next(); input.next();
+ ctx.push(Sep('"')); ctx.push(Sep('"')); ctx.push(Sep('"'));
+ while let Some(c) = input.next() {
+ if c == '"' &&
+ input.peek_nth(1) == Some(&'"') &&
+ input.peek_nth(2) == Some(&'"') {
+ input.next(); input.next();
+ break;
+ }
+ buffer.push(c);
+ }
+ ctx.push(Lit(String::from(&buffer)));
+ ctx.push(Sep('"')); ctx.push(Sep('"')); ctx.push(Sep('"'));
+ } else {
+ ctx.push(Sep('"'));
+ while let Some(x) = input.next() {
+ if x == '\\' {
+ buffer.push(x);
+ continue;
+ } else if x == '"' {
+ break;
+ }
+ buffer.push(c);
+ }
+ ctx.push(Lit(String::from(&buffer)));
+ ctx.push(Sep('"'));
+ }
+ },
+ '#' => {
+ if input.peek() == Some(&'[') {
+ input.next();
+ ctx.push(Sep('#')); ctx.push(Sep('['));
+ let mut comment_level = 1;
+ while let Some(x) = input.next() && comment_level > 0 {
+ if x == '#' && input.peek() == Some(&'[') {
+ comment_level += 1;
+ input.next();
+ } else if x == ']' && input.peek() == Some(&'#') {
+ comment_level -= 1;
+ input.next();
+ } else {
+ buffer.push(x);
+ }
+ }
+ ctx.push(Lit(String::from(&buffer)));
+ ctx.push(Sep(']')); ctx.push(Sep('#'));
+ } else {
+ ctx.push(Sep('#'));
+ while let Some(x) = input.next() && x != '\n' {
+ buffer.push(x);
+ }
+ ctx.push(Lit(String::from(&buffer)));
+ }
+ },
+ 'a'..'z' | 'A'..'Z' | '0'..'9' | '_' => {
+ while let Some(x) = input.peek() {
+ match x {
+ 'a'..'z' | 'A'..'Z' | '0'..'9' | '_' => {
+ buffer.push(c);
+ input.next();
+ },
+ _ => break
+ }
+ }
+ ctx.push(Word(String::from(&buffer)));
+ },
+ '.' | ',' | ':' | ';' |
+ '(' | ')' | '[' | ']' | '{' | '}' => ctx.push(Sep(c)),
+ _ => ctx.push(Sep(c))
+ }
+ buffer.clear();
+ }
+ return result;
+}
+
+// note: we can't have a TokenStream because there is significant whitespace. so, we construct the tree structure here, although we don't do anything fancy with the tokens.
diff --git a/src/main.rs b/src/main.rs
new file mode 100644
index 0000000..e61c2ec
--- /dev/null
+++ b/src/main.rs
@@ -0,0 +1,8 @@
+#![feature(exclusive_range_pattern, let_chains)]
+
+mod lex;
+// mod parse;
+// mod check;
+mod tree;
+
+fn main() { }
diff --git a/src/tree.rs b/src/tree.rs
new file mode 100644
index 0000000..02dc749
--- /dev/null
+++ b/src/tree.rs
@@ -0,0 +1,133 @@
+/// A simple append-only tree data structure. Represented as a Vec.
+pub struct Tree<T>(Vec<Node<T>>);
+
+/// The associated Node to the Tree.
+/// Somewhat optimized for memory usage rather than CPU cycles.
+/// firstborn is accessed as oldest_child and youngest_child is computed.
+pub struct Node<T> {
+ parent: Option<NodeId>,
+ previous_sibling: Option<NodeId>,
+ next_sibling: Option<NodeId>,
+ firstborn: Option<NodeId>,
+ data: T
+}
+
+#[derive(Copy, Clone)]
+pub struct NodeId(usize);
+
+impl<T> Tree<T> {
+ /// Create a new Tree with a root element.
+ pub fn new(data: T) -> Tree<T> {
+ Tree(vec![Node {
+ parent: None, previous_sibling: None,
+ next_sibling: None, firstborn: None,
+ data
+ }])
+ }
+
+ /// Get a raw Node for viewing familial relations.
+ /// This unwrap() should be safe with one Tree
+ /// instance as NodeIds may not be directly constructed.
+ fn get(&self, id: NodeId) -> &Node<T> {
+ &self.0[id.0]
+ }
+
+ /// Get a raw Node for modifying familial relations.
+ /// This unwrap() should be safe with one Tree
+ /// instance as NodeIds may not be directly constructed.
+ fn get_mut(&mut self, id: NodeId) -> &mut Node<T> {
+ &mut self.0[id.0]
+ }
+
+ /// Get the data associated with a Node.
+ pub fn data(&mut self, id: NodeId) -> &mut T {
+ &mut self.get_mut(id).data
+ }
+
+ /// Get the data associated with a Node.
+ // pub fn data(&self, id: NodeId) -> Option<T> {
+ // match self.0.get(id.0) {
+ // Some(Node{ data, .. }) => Some(*data),
+ // None => None
+ // }
+ // }
+
+ /// Get the root node of the Tree.
+ pub fn root(&self) -> NodeId { NodeId(0) }
+
+ /// Get the root node(s) of the Tree.
+ // pub fn root(&self) -> Vec<NodeId> {
+ // let mut roots = Vec::new();
+ // for (i, node) in self.0.iter().enumerate() {
+ // if let Node{ parent, .. } = node && let None = parent {
+ // roots.push(NodeId(i));
+ // }
+ // }
+ // return roots;
+ // }
+
+ /// Get all the direct children of a Node.
+ pub fn children(&self, id: NodeId) -> Vec<NodeId> {
+ let mut result = Vec::new();
+ if let Some(node) = self.0.get(id.0) {
+ let mut child = node.firstborn;
+ while let Some(id) = child {
+ result.push(id);
+ child = self.0.get(id.0).map(|x| x.next_sibling).flatten();
+ }
+ }
+ return result;
+ }
+
+ /// Get the eldest child of a Node.
+ pub fn eldest_child(&self, id: NodeId) -> Option<NodeId> {
+ self.get(id).firstborn
+ }
+
+ /// Get the youngest child of a Node.
+ pub fn youngest_child(&self, id: NodeId) -> Option<NodeId> {
+ let mut result = None;
+ if let Some(node) = self.0.get(id.0) {
+ let mut child = node.firstborn;
+ while let Some(id) = child {
+ result = Some(id);
+ child = self.0.get(id.0).map(|x| x.next_sibling).flatten();
+ }
+ }
+ return result;
+ }
+
+ /// Get the previous sibling of a Node.
+ pub fn previous_sibling(&self, id: NodeId) -> Option<NodeId> {
+ self.get(id).parent
+ }
+
+ /// Get the next sibling of a Node.
+ pub fn next_sibling(&self, id: NodeId) -> Option<NodeId> {
+ self.get(id).parent
+ }
+
+ /// Get the parent of a Node.
+ pub fn parent(&self, id: NodeId) -> Option<NodeId> {
+ self.get(id).parent
+ }
+
+ /// Add a Node as a direct child of another Node in the tree.
+ pub fn add_child(&mut self, parent: NodeId, data: T) -> NodeId {
+ let id = NodeId(self.0.len() - 1);
+ let previous_sibling = self.youngest_child(parent);
+ if let Some(node) = previous_sibling {
+ self.get_mut(node).next_sibling = Some(id);
+ } else {
+ self.get_mut(parent).firstborn = Some(id);
+ }
+ self.0.push(Node {
+ parent: Some(parent),
+ previous_sibling,
+ next_sibling: None,
+ firstborn: None,
+ data: data
+ });
+ return id;
+ }
+}