diff options
Diffstat (limited to 'src/main')
-rw-r--r-- | src/main/model/css/CssLexer.java | 63 | ||||
-rw-r--r-- | src/main/model/css/CssTree.java | 54 | ||||
-rw-r--r-- | src/main/model/html/HtmlLexer.java | 68 | ||||
-rw-r--r-- | src/main/model/html/HtmlTree.java | 33 | ||||
-rw-r--r-- | src/main/model/util/AbstractTree.java | 35 | ||||
-rw-r--r-- | src/main/model/util/Lexer.java | 58 |
6 files changed, 0 insertions, 311 deletions
diff --git a/src/main/model/css/CssLexer.java b/src/main/model/css/CssLexer.java deleted file mode 100644 index 657d3e1..0000000 --- a/src/main/model/css/CssLexer.java +++ /dev/null @@ -1,63 +0,0 @@ -package model.css; - -import java.util.ArrayList; - -/** - * This lexer splits an input by whitespace, brackets, and semicolons. - * Brackets and semicolons are included in the lexed output, whitespace is not. - * <br> - * CSS, thankfully, is far more rigid and less-forgiving of errors than HTMl. - * It also has multiple layers of fallback for errors: ranging from: "ignore this - * property", to "ignore this rule", to "this isn't fucking CSS" and ignore it all. - * <br> - * Still, even though we don't have to deal with garbage like escaped quotes (future edit: whoops, yes we do) and - * what not, we'll still implement our lexer with a for loop instead of split() for future optimizations. - */ -public class CssLexer { - - public static ArrayList<String> lex(String input) { - String token = ""; - ArrayList<String> tokens = new ArrayList<>(); - boolean inSingleQuotes = false; - boolean inDoubleQuotes = false; - char previous = '\0'; - - for (char i : input.toCharArray()) { - // i HATE fallthrough switch statements - switch (i) { - case '{': case '}': case ';': case ':': - case ' ': case '\n': case '\t': - if (!inSingleQuotes && !inDoubleQuotes) { - if (!token.equals("")) { - tokens.add(token); - token = ""; - } - switch (i) { - case '{': case '}': case ';': case ':': - tokens.add(Character.toString(i)); - break; - case ' ': case '\n': case '\t': - break; - } - } else { - token += i; - } - break; - // intentional use of footgun behavior - case '"': - if (previous != '\\') { - inDoubleQuotes = !inDoubleQuotes; - } - case '\'': - if (previous != '\\') { - inSingleQuotes = !inSingleQuotes; - } - default: - token += i; - break; - } - previous = i; - } - return tokens; - } -} diff --git a/src/main/model/css/CssTree.java b/src/main/model/css/CssTree.java deleted file mode 100644 index 1829327..0000000 --- a/src/main/model/css/CssTree.java +++ /dev/null @@ -1,54 +0,0 @@ -package model.css; - -import java.util.ArrayList; - -/** - * This isn't really a tree. Do I even need this class? - */ -public class CssTree { - public class CssProperty { - private String attribute; - private String value; - - public CssProperty(String attribute, String value) { - this.attribute = attribute; - this.value = value; - } - - public String getAttribute() { - return attribute; - } - - public String getValue() { - return value; - } - } - - public class CssRule { - private String selectors; - private ArrayList<CssProperty> properties; - - public CssRule(String selectors, ArrayList<CssProperty> properties) { - this.selectors = selectors; - this.properties = properties; - } - - public String getSelectors() { - return this.selectors; - } - - public ArrayList<CssProperty> getProperties() { - return this.properties; - } - } - - private ArrayList<CssRule> rules; - - public CssTree(ArrayList<CssRule> rules) { - this.rules = rules; - } - - public ArrayList<CssRule> getRules() { - return this.rules; - } -} diff --git a/src/main/model/html/HtmlLexer.java b/src/main/model/html/HtmlLexer.java deleted file mode 100644 index 8cad425..0000000 --- a/src/main/model/html/HtmlLexer.java +++ /dev/null @@ -1,68 +0,0 @@ -package model.html; - -import java.util.ArrayList; - -/** - * We'll tokenize HTML by tags: disregarding the contents of the tag and attributes within the tag. - * The file is also considered to be free-form here: whitespace duplicates are disregarded. - */ -public class HtmlLexer { - - // Takes a String of raw HTML, and tokenizes it for our parser. - public static ArrayList<String> lex(String input) { - String token = ""; - ArrayList<String> tokens = new ArrayList<>(); - boolean inTag = false; - boolean inSingleQuotes = false; - boolean inDoubleQuotes = false; - - for (char i : input.toCharArray()) { - token += i; - switch (i) { - case '<': - if (!inSingleQuotes && !inDoubleQuotes) { - inTag = true; - if (!token.equals("<")) { - tokens.add(token.substring(0, token.length() - 1)); - token = "<"; - } - } else if (inTag) { - System.out.printf("Probably failing parser"); - } - break; - case '>': - if (!inSingleQuotes && !inDoubleQuotes) { - if (!inTag) { - System.out.printf("Probably failing parser"); - } - inTag = false; - tokens.add(token); - token = ""; - } - break; - case '"': - if (!inSingleQuotes) { - inDoubleQuotes = !inDoubleQuotes; - } - break; - case '\'': - if (!inDoubleQuotes) { - inSingleQuotes = !inSingleQuotes; - } - break; - } - } - /** - * When lexing invalid HTML: we may end up with trailing garbage: either an unfinished tag or extra text - * (those are the only two options since this is just the lex step) - */ - if (!token.equals("")) { - if (inTag) { - tokens.add(token + ">"); - } else { - tokens.add(token); - } - } - return tokens; - } -} diff --git a/src/main/model/html/HtmlTree.java b/src/main/model/html/HtmlTree.java deleted file mode 100644 index 1aae0a8..0000000 --- a/src/main/model/html/HtmlTree.java +++ /dev/null @@ -1,33 +0,0 @@ -package model.html; - -import model.util.AbstractTree; -import org.javatuples.Pair; - -import java.util.ArrayList; -import java.util.Optional; - -/** - * Representation of HTML as a tree of nodes. Sorry about the generics. - */ -public class HtmlTree extends AbstractTree<Pair<String, ArrayList<Pair<String, String>>>> { - private String tag; - private ArrayList<Pair<String, String>> attributes; - private Optional<HtmlTree> parent = Optional.empty(); - private Optional<HtmlTree> sibling = Optional.empty(); - - // I don't quite know why I can't say ArrayList<HtmlTree> children. - public HtmlTree(String tag, ArrayList<Pair<String, String>> attributes, - ArrayList<AbstractTree<Pair<String, ArrayList<Pair<String, String>>>>> children, - Optional<HtmlTree> parent, Optional<HtmlTree> sibling) { - super(new Pair<>(tag, attributes), children); - this.tag = tag; - this.attributes = attributes; - this.parent = parent; - this.sibling = sibling; - } - - public HtmlTree(String tag, ArrayList<Pair<String, String>> attributes) { - this(tag, attributes, new ArrayList<AbstractTree<Pair<String, ArrayList<Pair<String, String>>>>>(), - Optional.empty(), Optional.empty()); - } -} diff --git a/src/main/model/util/AbstractTree.java b/src/main/model/util/AbstractTree.java deleted file mode 100644 index 4c74732..0000000 --- a/src/main/model/util/AbstractTree.java +++ /dev/null @@ -1,35 +0,0 @@ -package model.util; - -import org.javatuples.*; - -import java.util.*; - -// Utility class for a general tree: we'll be using these a lot -public abstract class AbstractTree<T> { - - // An AbstractTree holds some kind of data; we'll want this to be generic - // e.g. a tag, attributes, a tag and attributes, etc - private T data; - // Since it's a tree every node also has children. - private ArrayList<AbstractTree<T>> children; - - // future implementations may want to consider adding an Optional<> parent; or an Optional<> prevSibling - - public T getData() { - return data; - } - - public ArrayList<AbstractTree<T>> getChildren() { - return children; - } - - // god so much boilerplate - public AbstractTree(T data, ArrayList<AbstractTree<T>> children) { - this.data = data; - this.children = children; - } - - public void addChild(AbstractTree<T> child) { - this.children.add(child); - } -} diff --git a/src/main/model/util/Lexer.java b/src/main/model/util/Lexer.java deleted file mode 100644 index b35caa6..0000000 --- a/src/main/model/util/Lexer.java +++ /dev/null @@ -1,58 +0,0 @@ -package model.util; - -import java.util.*; - -// General-purpose Lexer -public class Lexer { - - // private static final Set<String> whitespace = new HashSet<String>(" ", "\n"); - - // unused, helper function for if we implement finding identifers longer than a character - private static int longestDelimiter(Set<String> delimiters) { - int longestDelimiter = 0; - for (String delimiter : delimiters) { - if (delimiter.length() > longestDelimiter) { - longestDelimiter = delimiter.length(); - } - } - return longestDelimiter; - } - - /** - * Lexes a "free-form" language. "free-form" has a specific meaning here that's important to preserve: - * "free-form" means that _additional_ whitespace characters do not affect the language: e.g. two newlines - * instead of one, four spaces instead of two, etc. They are _not_ "whitespace-insensitive", which is usually - * a misnomer. - * The name's a bit of a joke: free-form languages are generally referred to as whitespace-insensitive --> - * insensitive == rude. Jokes are funnier when you have to explain them. - * Also, insensitiveLex() and freeformLex() aren't really that good of names. - * - * NOTE: This lexer only works with single-character deliminators. - * TODO: deduplicate whitespace - */ - // public static ArrayList<String> rudeLex(String input, Set<Character> delimiters) {} - - /** - * We might as well implement a lexer for non-free-form languages, but whatever. We won't use it. - */ - public static ArrayList<String> sensitiveLex(String input, Set<Character> delimiters) { - // int longestDelimiter = longestDelimiter(delimiters); - - ArrayList<String> tokens = new ArrayList<String>(); - String currentToken = ""; - // terrible c-style for loop because we may need to manipulate the index in the future - for (int i = 0; i < input.length(); i++) { - char nextToken = input.charAt(i); - if (delimiters.contains(nextToken)) { - if (!currentToken.equals("")) { - tokens.add(currentToken); - } - tokens.add(Character.toString(nextToken)); - currentToken = ""; - } else { - currentToken += input.charAt(i); - } - } - return tokens; - } -} |