use std::todo; use crate::tree::Tree; use multipeek::multipeek; /// **Basic** syntax tokens. pub enum Token { Word(String), Lit(String), Sep(char) } /// Parses whitespace-sensitive code into an unambiguous syntax tree. /// Also useful for formatting. pub fn tokenize(input: &str) -> Tree> { use Token::*; let mut indendation_level = 0; let mut buffer = String::new(); let mut result = Tree::new(Vec::new()); let ctx = result.data(result.root()); // `char` in rust is four bytes it's fine let mut input = multipeek(input.chars()); while let Some(c) = input.next() { match c { ' ' => todo!(), '\n' => todo!(), '\t' => todo!(), '\'' => { ctx.push(Sep('\'')); while let Some(x) = input.next() { if x == '\\' { buffer.push(x); continue; } else if x == '\'' { break; } else { buffer.push(x); } } ctx.push(Lit(String::from(&buffer))); ctx.push(Sep('\'')); }, '"' => { // triple quoted strings and regular strings if input.peek_nth(0) == Some(&'"') && input.peek_nth(1) == Some(&'"') { input.next(); input.next(); ctx.push(Sep('"')); ctx.push(Sep('"')); ctx.push(Sep('"')); while let Some(c) = input.next() { if c == '"' && input.peek_nth(1) == Some(&'"') && input.peek_nth(2) == Some(&'"') { input.next(); input.next(); break; } buffer.push(c); } ctx.push(Lit(String::from(&buffer))); ctx.push(Sep('"')); ctx.push(Sep('"')); ctx.push(Sep('"')); } else { ctx.push(Sep('"')); while let Some(x) = input.next() { if x == '\\' { buffer.push(x); continue; } else if x == '"' { break; } buffer.push(c); } ctx.push(Lit(String::from(&buffer))); ctx.push(Sep('"')); } }, '#' => { if input.peek() == Some(&'[') { input.next(); ctx.push(Sep('#')); ctx.push(Sep('[')); let mut comment_level = 1; while let Some(x) = input.next() && comment_level > 0 { if x == '#' && input.peek() == Some(&'[') { comment_level += 1; input.next(); } else if x == ']' && input.peek() == Some(&'#') { comment_level -= 1; input.next(); } else { buffer.push(x); } } ctx.push(Lit(String::from(&buffer))); ctx.push(Sep(']')); ctx.push(Sep('#')); } else { ctx.push(Sep('#')); while let Some(x) = input.next() && x != '\n' { buffer.push(x); } ctx.push(Lit(String::from(&buffer))); } }, 'a'..'z' | 'A'..'Z' | '0'..'9' | '_' => { while let Some(x) = input.peek() { match x { 'a'..'z' | 'A'..'Z' | '0'..'9' | '_' => { buffer.push(c); input.next(); }, _ => break } } ctx.push(Word(String::from(&buffer))); }, '.' | ',' | ':' | ';' | '(' | ')' | '[' | ']' | '{' | '}' => ctx.push(Sep(c)), _ => ctx.push(Sep(c)) } buffer.clear(); } return result; } // note: we can't have a TokenStream because there is significant whitespace. so, we construct the tree structure here, although we don't do anything fancy with the tokens.