From fbfeffad5d85b73102a0f4e5ed607c33319f09e6 Mon Sep 17 00:00:00 2001 From: JJ Date: Mon, 17 Jul 2023 22:11:40 -0700 Subject: compiler: implement a borrow checker free tree and begin lexing --- src/lex.rs | 121 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/main.rs | 8 ++++ src/tree.rs | 133 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 262 insertions(+) create mode 100644 src/lex.rs create mode 100644 src/main.rs create mode 100644 src/tree.rs (limited to 'src') diff --git a/src/lex.rs b/src/lex.rs new file mode 100644 index 0000000..7c0cc21 --- /dev/null +++ b/src/lex.rs @@ -0,0 +1,121 @@ +use std::todo; +use crate::tree::Tree; +use multipeek::multipeek; + +/// **Basic** syntax tokens. +pub enum Token { + Word(String), + Lit(String), + Sep(char) +} + +/// Parses whitespace-sensitive code into an unambiguous syntax tree. +/// Also useful for formatting. +pub fn tokenize(input: &str) -> Tree> { + use Token::*; + let mut indendation_level = 0; + let mut buffer = String::new(); + let mut result = Tree::new(Vec::new()); + let ctx = result.data(result.root()); + + // `char` in rust is four bytes it's fine + let mut input = multipeek(input.chars()); + while let Some(c) = input.next() { + match c { + ' ' => todo!(), + '\n' => todo!(), + '\t' => todo!(), + '\'' => { + ctx.push(Sep('\'')); + while let Some(x) = input.next() { + if x == '\\' { + buffer.push(x); + continue; + } else if x == '\'' { + break; + } else { + buffer.push(x); + } + } + ctx.push(Lit(String::from(&buffer))); + ctx.push(Sep('\'')); + }, + '"' => { // triple quoted strings and regular strings + if input.peek_nth(0) == Some(&'"') && + input.peek_nth(1) == Some(&'"') { + input.next(); input.next(); + ctx.push(Sep('"')); ctx.push(Sep('"')); ctx.push(Sep('"')); + while let Some(c) = input.next() { + if c == '"' && + input.peek_nth(1) == Some(&'"') && + input.peek_nth(2) == Some(&'"') { + input.next(); input.next(); + break; + } + buffer.push(c); + } + ctx.push(Lit(String::from(&buffer))); + ctx.push(Sep('"')); ctx.push(Sep('"')); ctx.push(Sep('"')); + } else { + ctx.push(Sep('"')); + while let Some(x) = input.next() { + if x == '\\' { + buffer.push(x); + continue; + } else if x == '"' { + break; + } + buffer.push(c); + } + ctx.push(Lit(String::from(&buffer))); + ctx.push(Sep('"')); + } + }, + '#' => { + if input.peek() == Some(&'[') { + input.next(); + ctx.push(Sep('#')); ctx.push(Sep('[')); + let mut comment_level = 1; + while let Some(x) = input.next() && comment_level > 0 { + if x == '#' && input.peek() == Some(&'[') { + comment_level += 1; + input.next(); + } else if x == ']' && input.peek() == Some(&'#') { + comment_level -= 1; + input.next(); + } else { + buffer.push(x); + } + } + ctx.push(Lit(String::from(&buffer))); + ctx.push(Sep(']')); ctx.push(Sep('#')); + } else { + ctx.push(Sep('#')); + while let Some(x) = input.next() && x != '\n' { + buffer.push(x); + } + ctx.push(Lit(String::from(&buffer))); + } + }, + 'a'..'z' | 'A'..'Z' | '0'..'9' | '_' => { + while let Some(x) = input.peek() { + match x { + 'a'..'z' | 'A'..'Z' | '0'..'9' | '_' => { + buffer.push(c); + input.next(); + }, + _ => break + } + } + ctx.push(Word(String::from(&buffer))); + }, + '.' | ',' | ':' | ';' | + '(' | ')' | '[' | ']' | '{' | '}' => ctx.push(Sep(c)), + _ => ctx.push(Sep(c)) + } + buffer.clear(); + } + return result; +} + +// note: we can't have a TokenStream because there is significant whitespace. so, we construct the tree structure here, although we don't do anything fancy with the tokens. diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..e61c2ec --- /dev/null +++ b/src/main.rs @@ -0,0 +1,8 @@ +#![feature(exclusive_range_pattern, let_chains)] + +mod lex; +// mod parse; +// mod check; +mod tree; + +fn main() { } diff --git a/src/tree.rs b/src/tree.rs new file mode 100644 index 0000000..02dc749 --- /dev/null +++ b/src/tree.rs @@ -0,0 +1,133 @@ +/// A simple append-only tree data structure. Represented as a Vec. +pub struct Tree(Vec>); + +/// The associated Node to the Tree. +/// Somewhat optimized for memory usage rather than CPU cycles. +/// firstborn is accessed as oldest_child and youngest_child is computed. +pub struct Node { + parent: Option, + previous_sibling: Option, + next_sibling: Option, + firstborn: Option, + data: T +} + +#[derive(Copy, Clone)] +pub struct NodeId(usize); + +impl Tree { + /// Create a new Tree with a root element. + pub fn new(data: T) -> Tree { + Tree(vec![Node { + parent: None, previous_sibling: None, + next_sibling: None, firstborn: None, + data + }]) + } + + /// Get a raw Node for viewing familial relations. + /// This unwrap() should be safe with one Tree + /// instance as NodeIds may not be directly constructed. + fn get(&self, id: NodeId) -> &Node { + &self.0[id.0] + } + + /// Get a raw Node for modifying familial relations. + /// This unwrap() should be safe with one Tree + /// instance as NodeIds may not be directly constructed. + fn get_mut(&mut self, id: NodeId) -> &mut Node { + &mut self.0[id.0] + } + + /// Get the data associated with a Node. + pub fn data(&mut self, id: NodeId) -> &mut T { + &mut self.get_mut(id).data + } + + /// Get the data associated with a Node. + // pub fn data(&self, id: NodeId) -> Option { + // match self.0.get(id.0) { + // Some(Node{ data, .. }) => Some(*data), + // None => None + // } + // } + + /// Get the root node of the Tree. + pub fn root(&self) -> NodeId { NodeId(0) } + + /// Get the root node(s) of the Tree. + // pub fn root(&self) -> Vec { + // let mut roots = Vec::new(); + // for (i, node) in self.0.iter().enumerate() { + // if let Node{ parent, .. } = node && let None = parent { + // roots.push(NodeId(i)); + // } + // } + // return roots; + // } + + /// Get all the direct children of a Node. + pub fn children(&self, id: NodeId) -> Vec { + let mut result = Vec::new(); + if let Some(node) = self.0.get(id.0) { + let mut child = node.firstborn; + while let Some(id) = child { + result.push(id); + child = self.0.get(id.0).map(|x| x.next_sibling).flatten(); + } + } + return result; + } + + /// Get the eldest child of a Node. + pub fn eldest_child(&self, id: NodeId) -> Option { + self.get(id).firstborn + } + + /// Get the youngest child of a Node. + pub fn youngest_child(&self, id: NodeId) -> Option { + let mut result = None; + if let Some(node) = self.0.get(id.0) { + let mut child = node.firstborn; + while let Some(id) = child { + result = Some(id); + child = self.0.get(id.0).map(|x| x.next_sibling).flatten(); + } + } + return result; + } + + /// Get the previous sibling of a Node. + pub fn previous_sibling(&self, id: NodeId) -> Option { + self.get(id).parent + } + + /// Get the next sibling of a Node. + pub fn next_sibling(&self, id: NodeId) -> Option { + self.get(id).parent + } + + /// Get the parent of a Node. + pub fn parent(&self, id: NodeId) -> Option { + self.get(id).parent + } + + /// Add a Node as a direct child of another Node in the tree. + pub fn add_child(&mut self, parent: NodeId, data: T) -> NodeId { + let id = NodeId(self.0.len() - 1); + let previous_sibling = self.youngest_child(parent); + if let Some(node) = previous_sibling { + self.get_mut(node).next_sibling = Some(id); + } else { + self.get_mut(parent).firstborn = Some(id); + } + self.0.push(Node { + parent: Some(parent), + previous_sibling, + next_sibling: None, + firstborn: None, + data: data + }); + return id; + } +} -- cgit v1.2.3-70-g09d2