diff options
Diffstat (limited to 'src/main/model/util')
-rw-r--r-- | src/main/model/util/AbstractTree.java | 35 | ||||
-rw-r--r-- | src/main/model/util/Lexer.java | 58 |
2 files changed, 93 insertions, 0 deletions
diff --git a/src/main/model/util/AbstractTree.java b/src/main/model/util/AbstractTree.java new file mode 100644 index 0000000..4c74732 --- /dev/null +++ b/src/main/model/util/AbstractTree.java @@ -0,0 +1,35 @@ +package model.util; + +import org.javatuples.*; + +import java.util.*; + +// Utility class for a general tree: we'll be using these a lot +public abstract class AbstractTree<T> { + + // An AbstractTree holds some kind of data; we'll want this to be generic + // e.g. a tag, attributes, a tag and attributes, etc + private T data; + // Since it's a tree every node also has children. + private ArrayList<AbstractTree<T>> children; + + // future implementations may want to consider adding an Optional<> parent; or an Optional<> prevSibling + + public T getData() { + return data; + } + + public ArrayList<AbstractTree<T>> getChildren() { + return children; + } + + // god so much boilerplate + public AbstractTree(T data, ArrayList<AbstractTree<T>> children) { + this.data = data; + this.children = children; + } + + public void addChild(AbstractTree<T> child) { + this.children.add(child); + } +} diff --git a/src/main/model/util/Lexer.java b/src/main/model/util/Lexer.java new file mode 100644 index 0000000..b35caa6 --- /dev/null +++ b/src/main/model/util/Lexer.java @@ -0,0 +1,58 @@ +package model.util; + +import java.util.*; + +// General-purpose Lexer +public class Lexer { + + // private static final Set<String> whitespace = new HashSet<String>(" ", "\n"); + + // unused, helper function for if we implement finding identifers longer than a character + private static int longestDelimiter(Set<String> delimiters) { + int longestDelimiter = 0; + for (String delimiter : delimiters) { + if (delimiter.length() > longestDelimiter) { + longestDelimiter = delimiter.length(); + } + } + return longestDelimiter; + } + + /** + * Lexes a "free-form" language. "free-form" has a specific meaning here that's important to preserve: + * "free-form" means that _additional_ whitespace characters do not affect the language: e.g. two newlines + * instead of one, four spaces instead of two, etc. They are _not_ "whitespace-insensitive", which is usually + * a misnomer. + * The name's a bit of a joke: free-form languages are generally referred to as whitespace-insensitive --> + * insensitive == rude. Jokes are funnier when you have to explain them. + * Also, insensitiveLex() and freeformLex() aren't really that good of names. + * + * NOTE: This lexer only works with single-character deliminators. + * TODO: deduplicate whitespace + */ + // public static ArrayList<String> rudeLex(String input, Set<Character> delimiters) {} + + /** + * We might as well implement a lexer for non-free-form languages, but whatever. We won't use it. + */ + public static ArrayList<String> sensitiveLex(String input, Set<Character> delimiters) { + // int longestDelimiter = longestDelimiter(delimiters); + + ArrayList<String> tokens = new ArrayList<String>(); + String currentToken = ""; + // terrible c-style for loop because we may need to manipulate the index in the future + for (int i = 0; i < input.length(); i++) { + char nextToken = input.charAt(i); + if (delimiters.contains(nextToken)) { + if (!currentToken.equals("")) { + tokens.add(currentToken); + } + tokens.add(Character.toString(nextToken)); + currentToken = ""; + } else { + currentToken += input.charAt(i); + } + } + return tokens; + } +} |