aboutsummaryrefslogtreecommitdiff
path: root/src/main
diff options
context:
space:
mode:
Diffstat (limited to 'src/main')
-rw-r--r--src/main/model/css/CssLexer.java63
-rw-r--r--src/main/model/css/CssTree.java54
-rw-r--r--src/main/model/html/HtmlLexer.java68
-rw-r--r--src/main/model/html/HtmlTree.java33
-rw-r--r--src/main/model/util/AbstractTree.java35
-rw-r--r--src/main/model/util/Lexer.java58
6 files changed, 0 insertions, 311 deletions
diff --git a/src/main/model/css/CssLexer.java b/src/main/model/css/CssLexer.java
deleted file mode 100644
index 657d3e1..0000000
--- a/src/main/model/css/CssLexer.java
+++ /dev/null
@@ -1,63 +0,0 @@
-package model.css;
-
-import java.util.ArrayList;
-
-/**
- * This lexer splits an input by whitespace, brackets, and semicolons.
- * Brackets and semicolons are included in the lexed output, whitespace is not.
- * <br>
- * CSS, thankfully, is far more rigid and less-forgiving of errors than HTMl.
- * It also has multiple layers of fallback for errors: ranging from: "ignore this
- * property", to "ignore this rule", to "this isn't fucking CSS" and ignore it all.
- * <br>
- * Still, even though we don't have to deal with garbage like escaped quotes (future edit: whoops, yes we do) and
- * what not, we'll still implement our lexer with a for loop instead of split() for future optimizations.
- */
-public class CssLexer {
-
- public static ArrayList<String> lex(String input) {
- String token = "";
- ArrayList<String> tokens = new ArrayList<>();
- boolean inSingleQuotes = false;
- boolean inDoubleQuotes = false;
- char previous = '\0';
-
- for (char i : input.toCharArray()) {
- // i HATE fallthrough switch statements
- switch (i) {
- case '{': case '}': case ';': case ':':
- case ' ': case '\n': case '\t':
- if (!inSingleQuotes && !inDoubleQuotes) {
- if (!token.equals("")) {
- tokens.add(token);
- token = "";
- }
- switch (i) {
- case '{': case '}': case ';': case ':':
- tokens.add(Character.toString(i));
- break;
- case ' ': case '\n': case '\t':
- break;
- }
- } else {
- token += i;
- }
- break;
- // intentional use of footgun behavior
- case '"':
- if (previous != '\\') {
- inDoubleQuotes = !inDoubleQuotes;
- }
- case '\'':
- if (previous != '\\') {
- inSingleQuotes = !inSingleQuotes;
- }
- default:
- token += i;
- break;
- }
- previous = i;
- }
- return tokens;
- }
-}
diff --git a/src/main/model/css/CssTree.java b/src/main/model/css/CssTree.java
deleted file mode 100644
index 1829327..0000000
--- a/src/main/model/css/CssTree.java
+++ /dev/null
@@ -1,54 +0,0 @@
-package model.css;
-
-import java.util.ArrayList;
-
-/**
- * This isn't really a tree. Do I even need this class?
- */
-public class CssTree {
- public class CssProperty {
- private String attribute;
- private String value;
-
- public CssProperty(String attribute, String value) {
- this.attribute = attribute;
- this.value = value;
- }
-
- public String getAttribute() {
- return attribute;
- }
-
- public String getValue() {
- return value;
- }
- }
-
- public class CssRule {
- private String selectors;
- private ArrayList<CssProperty> properties;
-
- public CssRule(String selectors, ArrayList<CssProperty> properties) {
- this.selectors = selectors;
- this.properties = properties;
- }
-
- public String getSelectors() {
- return this.selectors;
- }
-
- public ArrayList<CssProperty> getProperties() {
- return this.properties;
- }
- }
-
- private ArrayList<CssRule> rules;
-
- public CssTree(ArrayList<CssRule> rules) {
- this.rules = rules;
- }
-
- public ArrayList<CssRule> getRules() {
- return this.rules;
- }
-}
diff --git a/src/main/model/html/HtmlLexer.java b/src/main/model/html/HtmlLexer.java
deleted file mode 100644
index 8cad425..0000000
--- a/src/main/model/html/HtmlLexer.java
+++ /dev/null
@@ -1,68 +0,0 @@
-package model.html;
-
-import java.util.ArrayList;
-
-/**
- * We'll tokenize HTML by tags: disregarding the contents of the tag and attributes within the tag.
- * The file is also considered to be free-form here: whitespace duplicates are disregarded.
- */
-public class HtmlLexer {
-
- // Takes a String of raw HTML, and tokenizes it for our parser.
- public static ArrayList<String> lex(String input) {
- String token = "";
- ArrayList<String> tokens = new ArrayList<>();
- boolean inTag = false;
- boolean inSingleQuotes = false;
- boolean inDoubleQuotes = false;
-
- for (char i : input.toCharArray()) {
- token += i;
- switch (i) {
- case '<':
- if (!inSingleQuotes && !inDoubleQuotes) {
- inTag = true;
- if (!token.equals("<")) {
- tokens.add(token.substring(0, token.length() - 1));
- token = "<";
- }
- } else if (inTag) {
- System.out.printf("Probably failing parser");
- }
- break;
- case '>':
- if (!inSingleQuotes && !inDoubleQuotes) {
- if (!inTag) {
- System.out.printf("Probably failing parser");
- }
- inTag = false;
- tokens.add(token);
- token = "";
- }
- break;
- case '"':
- if (!inSingleQuotes) {
- inDoubleQuotes = !inDoubleQuotes;
- }
- break;
- case '\'':
- if (!inDoubleQuotes) {
- inSingleQuotes = !inSingleQuotes;
- }
- break;
- }
- }
- /**
- * When lexing invalid HTML: we may end up with trailing garbage: either an unfinished tag or extra text
- * (those are the only two options since this is just the lex step)
- */
- if (!token.equals("")) {
- if (inTag) {
- tokens.add(token + ">");
- } else {
- tokens.add(token);
- }
- }
- return tokens;
- }
-}
diff --git a/src/main/model/html/HtmlTree.java b/src/main/model/html/HtmlTree.java
deleted file mode 100644
index 1aae0a8..0000000
--- a/src/main/model/html/HtmlTree.java
+++ /dev/null
@@ -1,33 +0,0 @@
-package model.html;
-
-import model.util.AbstractTree;
-import org.javatuples.Pair;
-
-import java.util.ArrayList;
-import java.util.Optional;
-
-/**
- * Representation of HTML as a tree of nodes. Sorry about the generics.
- */
-public class HtmlTree extends AbstractTree<Pair<String, ArrayList<Pair<String, String>>>> {
- private String tag;
- private ArrayList<Pair<String, String>> attributes;
- private Optional<HtmlTree> parent = Optional.empty();
- private Optional<HtmlTree> sibling = Optional.empty();
-
- // I don't quite know why I can't say ArrayList<HtmlTree> children.
- public HtmlTree(String tag, ArrayList<Pair<String, String>> attributes,
- ArrayList<AbstractTree<Pair<String, ArrayList<Pair<String, String>>>>> children,
- Optional<HtmlTree> parent, Optional<HtmlTree> sibling) {
- super(new Pair<>(tag, attributes), children);
- this.tag = tag;
- this.attributes = attributes;
- this.parent = parent;
- this.sibling = sibling;
- }
-
- public HtmlTree(String tag, ArrayList<Pair<String, String>> attributes) {
- this(tag, attributes, new ArrayList<AbstractTree<Pair<String, ArrayList<Pair<String, String>>>>>(),
- Optional.empty(), Optional.empty());
- }
-}
diff --git a/src/main/model/util/AbstractTree.java b/src/main/model/util/AbstractTree.java
deleted file mode 100644
index 4c74732..0000000
--- a/src/main/model/util/AbstractTree.java
+++ /dev/null
@@ -1,35 +0,0 @@
-package model.util;
-
-import org.javatuples.*;
-
-import java.util.*;
-
-// Utility class for a general tree: we'll be using these a lot
-public abstract class AbstractTree<T> {
-
- // An AbstractTree holds some kind of data; we'll want this to be generic
- // e.g. a tag, attributes, a tag and attributes, etc
- private T data;
- // Since it's a tree every node also has children.
- private ArrayList<AbstractTree<T>> children;
-
- // future implementations may want to consider adding an Optional<> parent; or an Optional<> prevSibling
-
- public T getData() {
- return data;
- }
-
- public ArrayList<AbstractTree<T>> getChildren() {
- return children;
- }
-
- // god so much boilerplate
- public AbstractTree(T data, ArrayList<AbstractTree<T>> children) {
- this.data = data;
- this.children = children;
- }
-
- public void addChild(AbstractTree<T> child) {
- this.children.add(child);
- }
-}
diff --git a/src/main/model/util/Lexer.java b/src/main/model/util/Lexer.java
deleted file mode 100644
index b35caa6..0000000
--- a/src/main/model/util/Lexer.java
+++ /dev/null
@@ -1,58 +0,0 @@
-package model.util;
-
-import java.util.*;
-
-// General-purpose Lexer
-public class Lexer {
-
- // private static final Set<String> whitespace = new HashSet<String>(" ", "\n");
-
- // unused, helper function for if we implement finding identifers longer than a character
- private static int longestDelimiter(Set<String> delimiters) {
- int longestDelimiter = 0;
- for (String delimiter : delimiters) {
- if (delimiter.length() > longestDelimiter) {
- longestDelimiter = delimiter.length();
- }
- }
- return longestDelimiter;
- }
-
- /**
- * Lexes a "free-form" language. "free-form" has a specific meaning here that's important to preserve:
- * "free-form" means that _additional_ whitespace characters do not affect the language: e.g. two newlines
- * instead of one, four spaces instead of two, etc. They are _not_ "whitespace-insensitive", which is usually
- * a misnomer.
- * The name's a bit of a joke: free-form languages are generally referred to as whitespace-insensitive -->
- * insensitive == rude. Jokes are funnier when you have to explain them.
- * Also, insensitiveLex() and freeformLex() aren't really that good of names.
- *
- * NOTE: This lexer only works with single-character deliminators.
- * TODO: deduplicate whitespace
- */
- // public static ArrayList<String> rudeLex(String input, Set<Character> delimiters) {}
-
- /**
- * We might as well implement a lexer for non-free-form languages, but whatever. We won't use it.
- */
- public static ArrayList<String> sensitiveLex(String input, Set<Character> delimiters) {
- // int longestDelimiter = longestDelimiter(delimiters);
-
- ArrayList<String> tokens = new ArrayList<String>();
- String currentToken = "";
- // terrible c-style for loop because we may need to manipulate the index in the future
- for (int i = 0; i < input.length(); i++) {
- char nextToken = input.charAt(i);
- if (delimiters.contains(nextToken)) {
- if (!currentToken.equals("")) {
- tokens.add(currentToken);
- }
- tokens.add(Character.toString(nextToken));
- currentToken = "";
- } else {
- currentToken += input.charAt(i);
- }
- }
- return tokens;
- }
-}