diff options
author | JJ | 2023-07-18 05:11:40 +0000 |
---|---|---|
committer | JJ | 2023-07-18 06:48:33 +0000 |
commit | fbfeffad5d85b73102a0f4e5ed607c33319f09e6 (patch) | |
tree | 6c2a9e53e838cbcc78a3638ca7b9347c88853f0c /src | |
parent | 4a3796c1ff54b44b1493f3afed18d0dd71b5f19f (diff) |
compiler: implement a borrow checker free tree and begin lexing
Diffstat (limited to 'src')
-rw-r--r-- | src/lex.rs | 121 | ||||
-rw-r--r-- | src/main.rs | 8 | ||||
-rw-r--r-- | src/tree.rs | 133 |
3 files changed, 262 insertions, 0 deletions
diff --git a/src/lex.rs b/src/lex.rs new file mode 100644 index 0000000..7c0cc21 --- /dev/null +++ b/src/lex.rs @@ -0,0 +1,121 @@ +use std::todo; +use crate::tree::Tree; +use multipeek::multipeek; + +/// **Basic** syntax tokens. +pub enum Token { + Word(String), + Lit(String), + Sep(char) +} + +/// Parses whitespace-sensitive code into an unambiguous syntax tree. +/// Also useful for formatting. +pub fn tokenize(input: &str) -> Tree<Vec<Token>> { + use Token::*; + let mut indendation_level = 0; + let mut buffer = String::new(); + let mut result = Tree::new(Vec::new()); + let ctx = result.data(result.root()); + + // `char` in rust is four bytes it's fine + let mut input = multipeek(input.chars()); + while let Some(c) = input.next() { + match c { + ' ' => todo!(), + '\n' => todo!(), + '\t' => todo!(), + '\'' => { + ctx.push(Sep('\'')); + while let Some(x) = input.next() { + if x == '\\' { + buffer.push(x); + continue; + } else if x == '\'' { + break; + } else { + buffer.push(x); + } + } + ctx.push(Lit(String::from(&buffer))); + ctx.push(Sep('\'')); + }, + '"' => { // triple quoted strings and regular strings + if input.peek_nth(0) == Some(&'"') && + input.peek_nth(1) == Some(&'"') { + input.next(); input.next(); + ctx.push(Sep('"')); ctx.push(Sep('"')); ctx.push(Sep('"')); + while let Some(c) = input.next() { + if c == '"' && + input.peek_nth(1) == Some(&'"') && + input.peek_nth(2) == Some(&'"') { + input.next(); input.next(); + break; + } + buffer.push(c); + } + ctx.push(Lit(String::from(&buffer))); + ctx.push(Sep('"')); ctx.push(Sep('"')); ctx.push(Sep('"')); + } else { + ctx.push(Sep('"')); + while let Some(x) = input.next() { + if x == '\\' { + buffer.push(x); + continue; + } else if x == '"' { + break; + } + buffer.push(c); + } + ctx.push(Lit(String::from(&buffer))); + ctx.push(Sep('"')); + } + }, + '#' => { + if input.peek() == Some(&'[') { + input.next(); + ctx.push(Sep('#')); ctx.push(Sep('[')); + let mut comment_level = 1; + while let Some(x) = input.next() && comment_level > 0 { + if x == '#' && input.peek() == Some(&'[') { + comment_level += 1; + input.next(); + } else if x == ']' && input.peek() == Some(&'#') { + comment_level -= 1; + input.next(); + } else { + buffer.push(x); + } + } + ctx.push(Lit(String::from(&buffer))); + ctx.push(Sep(']')); ctx.push(Sep('#')); + } else { + ctx.push(Sep('#')); + while let Some(x) = input.next() && x != '\n' { + buffer.push(x); + } + ctx.push(Lit(String::from(&buffer))); + } + }, + 'a'..'z' | 'A'..'Z' | '0'..'9' | '_' => { + while let Some(x) = input.peek() { + match x { + 'a'..'z' | 'A'..'Z' | '0'..'9' | '_' => { + buffer.push(c); + input.next(); + }, + _ => break + } + } + ctx.push(Word(String::from(&buffer))); + }, + '.' | ',' | ':' | ';' | + '(' | ')' | '[' | ']' | '{' | '}' => ctx.push(Sep(c)), + _ => ctx.push(Sep(c)) + } + buffer.clear(); + } + return result; +} + +// note: we can't have a TokenStream because there is significant whitespace. so, we construct the tree structure here, although we don't do anything fancy with the tokens. diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..e61c2ec --- /dev/null +++ b/src/main.rs @@ -0,0 +1,8 @@ +#![feature(exclusive_range_pattern, let_chains)] + +mod lex; +// mod parse; +// mod check; +mod tree; + +fn main() { } diff --git a/src/tree.rs b/src/tree.rs new file mode 100644 index 0000000..02dc749 --- /dev/null +++ b/src/tree.rs @@ -0,0 +1,133 @@ +/// A simple append-only tree data structure. Represented as a Vec. +pub struct Tree<T>(Vec<Node<T>>); + +/// The associated Node to the Tree. +/// Somewhat optimized for memory usage rather than CPU cycles. +/// firstborn is accessed as oldest_child and youngest_child is computed. +pub struct Node<T> { + parent: Option<NodeId>, + previous_sibling: Option<NodeId>, + next_sibling: Option<NodeId>, + firstborn: Option<NodeId>, + data: T +} + +#[derive(Copy, Clone)] +pub struct NodeId(usize); + +impl<T> Tree<T> { + /// Create a new Tree with a root element. + pub fn new(data: T) -> Tree<T> { + Tree(vec![Node { + parent: None, previous_sibling: None, + next_sibling: None, firstborn: None, + data + }]) + } + + /// Get a raw Node for viewing familial relations. + /// This unwrap() should be safe with one Tree + /// instance as NodeIds may not be directly constructed. + fn get(&self, id: NodeId) -> &Node<T> { + &self.0[id.0] + } + + /// Get a raw Node for modifying familial relations. + /// This unwrap() should be safe with one Tree + /// instance as NodeIds may not be directly constructed. + fn get_mut(&mut self, id: NodeId) -> &mut Node<T> { + &mut self.0[id.0] + } + + /// Get the data associated with a Node. + pub fn data(&mut self, id: NodeId) -> &mut T { + &mut self.get_mut(id).data + } + + /// Get the data associated with a Node. + // pub fn data(&self, id: NodeId) -> Option<T> { + // match self.0.get(id.0) { + // Some(Node{ data, .. }) => Some(*data), + // None => None + // } + // } + + /// Get the root node of the Tree. + pub fn root(&self) -> NodeId { NodeId(0) } + + /// Get the root node(s) of the Tree. + // pub fn root(&self) -> Vec<NodeId> { + // let mut roots = Vec::new(); + // for (i, node) in self.0.iter().enumerate() { + // if let Node{ parent, .. } = node && let None = parent { + // roots.push(NodeId(i)); + // } + // } + // return roots; + // } + + /// Get all the direct children of a Node. + pub fn children(&self, id: NodeId) -> Vec<NodeId> { + let mut result = Vec::new(); + if let Some(node) = self.0.get(id.0) { + let mut child = node.firstborn; + while let Some(id) = child { + result.push(id); + child = self.0.get(id.0).map(|x| x.next_sibling).flatten(); + } + } + return result; + } + + /// Get the eldest child of a Node. + pub fn eldest_child(&self, id: NodeId) -> Option<NodeId> { + self.get(id).firstborn + } + + /// Get the youngest child of a Node. + pub fn youngest_child(&self, id: NodeId) -> Option<NodeId> { + let mut result = None; + if let Some(node) = self.0.get(id.0) { + let mut child = node.firstborn; + while let Some(id) = child { + result = Some(id); + child = self.0.get(id.0).map(|x| x.next_sibling).flatten(); + } + } + return result; + } + + /// Get the previous sibling of a Node. + pub fn previous_sibling(&self, id: NodeId) -> Option<NodeId> { + self.get(id).parent + } + + /// Get the next sibling of a Node. + pub fn next_sibling(&self, id: NodeId) -> Option<NodeId> { + self.get(id).parent + } + + /// Get the parent of a Node. + pub fn parent(&self, id: NodeId) -> Option<NodeId> { + self.get(id).parent + } + + /// Add a Node as a direct child of another Node in the tree. + pub fn add_child(&mut self, parent: NodeId, data: T) -> NodeId { + let id = NodeId(self.0.len() - 1); + let previous_sibling = self.youngest_child(parent); + if let Some(node) = previous_sibling { + self.get_mut(node).next_sibling = Some(id); + } else { + self.get_mut(parent).firstborn = Some(id); + } + self.0.push(Node { + parent: Some(parent), + previous_sibling, + next_sibling: None, + firstborn: None, + data: data + }); + return id; + } +} |