aboutsummaryrefslogtreecommitdiff
path: root/src/main/model/util
diff options
context:
space:
mode:
authorj-james2022-10-17 06:25:45 +0000
committerj-james2022-10-17 06:27:55 +0000
commit3e9bb5fae16c35938bc1f7f7669c12cc355c9331 (patch)
tree82e1ab837579e7762071ea97c064c0750a38c106 /src/main/model/util
parent0845be5ec0215fb43f9dbdef00b22a733d4080b3 (diff)
Basic prototypes of HTML/CSS lexers
Diffstat (limited to 'src/main/model/util')
-rw-r--r--src/main/model/util/AbstractTree.java35
-rw-r--r--src/main/model/util/Lexer.java58
2 files changed, 93 insertions, 0 deletions
diff --git a/src/main/model/util/AbstractTree.java b/src/main/model/util/AbstractTree.java
new file mode 100644
index 0000000..4c74732
--- /dev/null
+++ b/src/main/model/util/AbstractTree.java
@@ -0,0 +1,35 @@
+package model.util;
+
+import org.javatuples.*;
+
+import java.util.*;
+
+// Utility class for a general tree: we'll be using these a lot
+public abstract class AbstractTree<T> {
+
+ // An AbstractTree holds some kind of data; we'll want this to be generic
+ // e.g. a tag, attributes, a tag and attributes, etc
+ private T data;
+ // Since it's a tree every node also has children.
+ private ArrayList<AbstractTree<T>> children;
+
+ // future implementations may want to consider adding an Optional<> parent; or an Optional<> prevSibling
+
+ public T getData() {
+ return data;
+ }
+
+ public ArrayList<AbstractTree<T>> getChildren() {
+ return children;
+ }
+
+ // god so much boilerplate
+ public AbstractTree(T data, ArrayList<AbstractTree<T>> children) {
+ this.data = data;
+ this.children = children;
+ }
+
+ public void addChild(AbstractTree<T> child) {
+ this.children.add(child);
+ }
+}
diff --git a/src/main/model/util/Lexer.java b/src/main/model/util/Lexer.java
new file mode 100644
index 0000000..b35caa6
--- /dev/null
+++ b/src/main/model/util/Lexer.java
@@ -0,0 +1,58 @@
+package model.util;
+
+import java.util.*;
+
+// General-purpose Lexer
+public class Lexer {
+
+ // private static final Set<String> whitespace = new HashSet<String>(" ", "\n");
+
+ // unused, helper function for if we implement finding identifers longer than a character
+ private static int longestDelimiter(Set<String> delimiters) {
+ int longestDelimiter = 0;
+ for (String delimiter : delimiters) {
+ if (delimiter.length() > longestDelimiter) {
+ longestDelimiter = delimiter.length();
+ }
+ }
+ return longestDelimiter;
+ }
+
+ /**
+ * Lexes a "free-form" language. "free-form" has a specific meaning here that's important to preserve:
+ * "free-form" means that _additional_ whitespace characters do not affect the language: e.g. two newlines
+ * instead of one, four spaces instead of two, etc. They are _not_ "whitespace-insensitive", which is usually
+ * a misnomer.
+ * The name's a bit of a joke: free-form languages are generally referred to as whitespace-insensitive -->
+ * insensitive == rude. Jokes are funnier when you have to explain them.
+ * Also, insensitiveLex() and freeformLex() aren't really that good of names.
+ *
+ * NOTE: This lexer only works with single-character deliminators.
+ * TODO: deduplicate whitespace
+ */
+ // public static ArrayList<String> rudeLex(String input, Set<Character> delimiters) {}
+
+ /**
+ * We might as well implement a lexer for non-free-form languages, but whatever. We won't use it.
+ */
+ public static ArrayList<String> sensitiveLex(String input, Set<Character> delimiters) {
+ // int longestDelimiter = longestDelimiter(delimiters);
+
+ ArrayList<String> tokens = new ArrayList<String>();
+ String currentToken = "";
+ // terrible c-style for loop because we may need to manipulate the index in the future
+ for (int i = 0; i < input.length(); i++) {
+ char nextToken = input.charAt(i);
+ if (delimiters.contains(nextToken)) {
+ if (!currentToken.equals("")) {
+ tokens.add(currentToken);
+ }
+ tokens.add(Character.toString(nextToken));
+ currentToken = "";
+ } else {
+ currentToken += input.charAt(i);
+ }
+ }
+ return tokens;
+ }
+}