From bfa72127cf120f0e98410b45a043b95ad522b729 Mon Sep 17 00:00:00 2001
From: j-james
Date: Mon, 17 Oct 2022 06:54:35 -0700
Subject: Sweeping refactor to check CheckStyle boxes
---
src/main/model/css/CssParser.java | 368 +++++++++++++++-----------
src/main/model/html/ElementNode.java | 30 ++-
src/main/model/html/HtmlParser.java | 490 ++++++++++++++++++++---------------
src/main/model/html/TextNode.java | 13 +-
src/main/model/util/Node.java | 6 +-
src/test/model/CssParserTest.java | 3 +-
src/test/model/HtmlParserTest.java | 17 +-
7 files changed, 554 insertions(+), 373 deletions(-)
diff --git a/src/main/model/css/CssParser.java b/src/main/model/css/CssParser.java
index 8d57bdc..25b6752 100644
--- a/src/main/model/css/CssParser.java
+++ b/src/main/model/css/CssParser.java
@@ -14,13 +14,6 @@ import java.util.*;
* ATTRIBUTE ::= 'color' | 'text' | ...
* VALUE ::= ??? idk lol
*/
-
-/**
- * This class assumes that it is getting _valid CSS_: that is, the style between two tags
- * of a style block, or the raw content of a .css file.
- * Making sure this assumption holds is extremely important for program robustness.
- * We do not check for validity, i.e. throw any exceptions - the driving principle of web standards is to "fail softly".
- */
public class CssParser {
/**
@@ -32,156 +25,60 @@ public class CssParser {
SINGLE_QUOTES, DOUBLE_QUOTES, // VALUE::SINGLE_QUOTES, VALUE::DOUBLE_QUOTES
}
+ // essentially the csstree type, only we don't need it to be a tree
+ private ArrayList>>> result;
+ // a bunch of useful buffers: optimizations in the future could likely come from tweaking these
+ // note that i know nothing about data structure performance: but i'm pretty sure that Strings
+ // are _not_ the right tool for the job here, lol
+ private String currentSelector;
+ private ArrayList> currentRule;
+ private String currentProperty;
+ private String currentValue;
+ // important for quote escapes
+ private char previousChar;
+
+ private ParserState state;
+
+ /// Initialize all buffers to default values
+ public CssParser() {
+ result = new ArrayList<>();
+ currentSelector = "";
+ currentRule = new ArrayList<>();
+ currentProperty = "";
+ currentValue = "";
+ previousChar = '\0';
+
+ // We safely assume to start by reading a selector.
+ state = ParserState.SELECTORS;
+ }
+
/**
- * Parses a (valid) CSS file in a left-to-right, leftmost-derivation style.
+ * Parses a (valid) CSS file in a left-to-right, leftmost-derivation style. No additional lookup is needed,
+ * however we do keep a previousChar value for dealing with (annoying) escaped quotes.
* It should be fast - I'd say something about time complexity if I knew anything about time complexity.
* No guarantees are made about invalid CSS files. Also, no guarantees are made about valid CSS files, lol.
+ *
+ * REQUIRES: A valid CSS file, as a raw String.
+ * MODIFIES: this
+ * EFFECTS: Returns a parsed CSS representation as several nested ArrayLists and Pairs of Strings.
*/
- public static ArrayList>>> parseLL(String input) {
-
- // parser buffers
- // essentially the CssTree type
- var result = new ArrayList>>>();
- var currentSelector = "";
- var currentRule = new ArrayList>();
- var currentProperty = "";
- var currentValue = "";
- var previousChar = '\0';
-
- // We safely assume to start by reading a selector.
- ParserState state = ParserState.SELECTORS;
+ public ArrayList>>> parseCSS(String input) {
for (char c : input.toCharArray()) {
// System.out.print(state);
// System.out.println(" " + c);
switch (state) {
- case SELECTORS:
- switch (c) {
- case '@':
- if (currentSelector.equals("")) {
- state = ParserState.MEDIA_SELECTORS;
- } else {
- currentSelector += c;
- }
- break;
- case '{':
- state = ParserState.ATTRIBUTE;
- break;
- case ' ': case '\n':
- break;
- // todo: do better than blindly create a string; pattern match on css selectors
- default:
- currentSelector += c;
- break;
- }
+ case SELECTORS: caseSelectors(c);
break;
- case MEDIA_SELECTORS:
- switch (c) {
- // todo: don't entirely disregard media queries, also split between @media/@...
- case '{':
- state = ParserState.SELECTORS;
- // discard currentSelector
- currentSelector = "";
- break;
- default:
- currentSelector += c;
- break;
- }
+ case MEDIA_SELECTORS: caseMediaSelectors(c);
break;
- case ATTRIBUTE:
- switch (c) {
- case ':':
- state = ParserState.VALUE;
- break;
- case '}':
- state = ParserState.SELECTORS;
- if (!currentValue.equals("") || !currentProperty.equals("")) {
- System.out.println("something's wrong");
- currentProperty = "";
- currentValue = "";
- }
- result.add(new Pair<>(currentSelector, currentRule));
- System.out.println(currentRule);
- currentSelector = "";
- currentRule = new ArrayList<>();
- break;
- case ' ': case '\n':
- break;
- default:
- currentProperty += c;
- break;
- }
+ case ATTRIBUTE: caseAttribute(c);
break;
- case VALUE:
- switch (c) {
- case ';':
- state = ParserState.ATTRIBUTE;
- currentRule.add(new Pair<>(currentProperty, currentValue));
- currentProperty = "";
- currentValue = "";
- break;
- case '}':
- state = ParserState.SELECTORS;
- if (!currentValue.equals("") || !currentProperty.equals("")) {
- currentRule.add(new Pair<>(currentProperty, currentValue));
- currentProperty = "";
- currentValue = "";
- }
- result.add(new Pair<>(currentSelector, currentRule));
- currentSelector = "";
- currentRule = new ArrayList<>();
- break;
- case '\'':
- state = ParserState.SINGLE_QUOTES;
- currentValue += c;
- break;
- case '\"':
- state = ParserState.DOUBLE_QUOTES;
- currentValue += c;
- break;
- case ' ': case '\n':
- break;
- default:
- currentValue += c;
- break;
- }
+ case VALUE: caseValue(c);
break;
- // quotes in css are exclusively? for paths: so we want to include the quotes themselves
- case SINGLE_QUOTES:
- switch (c) {
- case '\'':
- if (previousChar != '\\') {
- state = ParserState.VALUE;
- currentValue += c;
- previousChar = '\0';
- } else {
- currentValue = currentValue.substring(0, currentValue.length() - 2);
- currentValue += c;
- previousChar = c;
- }
- break;
- default:
- currentValue += c;
- break;
- }
+ case SINGLE_QUOTES: caseSingleQuotes(c);
break;
- case DOUBLE_QUOTES:
- switch (c) {
- case '\"':
- if (previousChar != '\\') {
- state = ParserState.VALUE;
- currentValue += c;
- previousChar = '\0';
- } else {
- currentValue = currentValue.substring(0, currentValue.length() - 2);
- currentValue += c;
- previousChar = c;
- }
- break;
- default:
- currentValue += c;
- break;
- }
+ case DOUBLE_QUOTES: caseDoubleQuotes(c);
break;
}
}
@@ -189,12 +86,179 @@ public class CssParser {
}
/**
- * Takes an input string with units and returns out the value in pixels.
- * This is a fault-tolerant system.
+ * EFFECTS: Handles and updates parser state/buffers for a single character while in the SELECTORS state.
+ * See also: the (slightly wrong) context-free grammar commented at the start of this file.
+ * MODIFIES: this
+ */
+ private void caseSelectors(char c) {
+ switch (c) {
+ case '@':
+ if (currentSelector.equals("")) {
+ state = ParserState.MEDIA_SELECTORS;
+ } else {
+ currentSelector += c;
+ }
+ break;
+ case '{':
+ state = ParserState.ATTRIBUTE;
+ break;
+ case ' ': case '\n':
+ break;
+ // todo: do better than blindly create a string; pattern match on css selectors
+ default:
+ currentSelector += c;
+ break;
+ }
+ }
+
+ /**
+ * EFFECTS: Handles and updates parser state/buffers for a single character while in the MEDIA_SELECTORS state.
+ * MODIFIES: this
+ */
+ private void caseMediaSelectors(char c) {
+ switch (c) {
+ // todo: don't entirely disregard media queries, also split between @media/@...
+ case '{':
+ state = ParserState.SELECTORS;
+ // discard currentSelector
+ currentSelector = "";
+ break;
+ default:
+ currentSelector += c;
+ break;
+ }
+ }
+
+ /**
+ * EFFECTS: Handles and updates parser state/buffers for a single character while in the ATTRIBUTE state.
+ * MODIFIES: this
+ */
+ private void caseAttribute(char c) {
+ switch (c) {
+ case ':':
+ state = ParserState.VALUE;
+ break;
+ case '}':
+ state = ParserState.SELECTORS;
+ if (!currentValue.equals("") || !currentProperty.equals("")) {
+ // System.out.println("something's wrong");
+ currentProperty = "";
+ currentValue = "";
+ }
+ result.add(new Pair<>(currentSelector, currentRule));
+ currentSelector = "";
+ currentRule = new ArrayList<>();
+ break;
+ case ' ': case '\n':
+ break;
+ default:
+ currentProperty += c;
+ break;
+ }
+ }
+
+ /**
+ * EFFECTS: Handles and updates parser state/buffers for a single character while in the VALUE state.
+ * MODIFIES: this
+ */
+ private void caseValue(char c) {
+ switch (c) {
+ case ';':
+ state = ParserState.ATTRIBUTE;
+ updateCurrentRule();
+ break;
+ case '}':
+ state = ParserState.SELECTORS;
+ if (!currentValue.equals("") || !currentProperty.equals("")) {
+ updateCurrentRule();
+ }
+ result.add(new Pair<>(currentSelector, currentRule));
+ currentSelector = "";
+ currentRule = new ArrayList<>();
+ break;
+ // todo: handle spaces better: they're actually important inside values
+ case ' ': case '\n': break; // believe me, i think this is ugly too but it passes checkstyle
+ case '\'':
+ state = ParserState.SINGLE_QUOTES;
+ currentValue += c;
+ break;
+ // intentional use of TERRIBLE SMOKING FOOTGUN behavior to check boxes
+ case '\"': state = ParserState.DOUBLE_QUOTES;
+ default: currentValue += c;
+ break;
+ }
+ }
+
+ /**
+ * Helper function to check method length boxes.
+ * EFFECTS: Adds a new property to the current rule.
+ * MODIFIES: this
+ */
+ private void updateCurrentRule() {
+ currentRule.add(new Pair<>(currentProperty, currentValue));
+ currentProperty = "";
+ currentValue = "";
+ }
+
+ // todo: handle additional escaped characters, though what we have right now isn't bad
+
+ /**
+ * EFFECTS: Handles and updates parser state/buffers for a single character while in the SINGLE_QUOTES state.
+ * MODIFIES: this
+ */
+ private void caseSingleQuotes(char c) {
+ switch (c) {
+ case '\'':
+ if (previousChar != '\\') {
+ state = ParserState.VALUE;
+ // quotes in css are exclusively? for paths: so we want to include the quotes themselves
+ currentValue += c;
+ previousChar = '\0';
+ } else {
+ // possibly not the best way to handle this, may be better to keep the backslash
+ currentValue = currentValue.substring(0, currentValue.length() - 2);
+ currentValue += c;
+ previousChar = c;
+ }
+ break;
+ default:
+ currentValue += c;
+ break;
+ }
+ }
+
+ /**
+ * EFFECTS: Handles and updates parser state/buffers for a single character while in the DOUBLE_QUOTES state.
+ * MODIFIES: this
+ */
+ private void caseDoubleQuotes(char c) {
+ switch (c) {
+ case '\"':
+ if (previousChar != '\\') {
+ state = ParserState.VALUE;
+ currentValue += c;
+ previousChar = '\0';
+ } else {
+ currentValue = currentValue.substring(0, currentValue.length() - 2);
+ currentValue += c;
+ previousChar = c;
+ }
+ break;
+ default:
+ currentValue += c;
+ break;
+ }
+ }
+
+ /**
+ * Takes an input string with units and returns out the value in pixels. This is a fault-tolerant system.
* When given an invalid string (i.e. "12p53x"), it will produce an invalid result instead of throwing.
* However, it should parse every valid string correctly.
+ *
+ * REQUIRES: A string of the form [NUMBER][VALIDUNIT]
+ * EFFECTS: Returns a number, in pixels, that has been converted appropriately
*/
- private double parseUnits(String input) {
+ private static double parseUnits(String input) {
String numbers = "";
String units = "";
// imagine making a language without iterable strings, fml
@@ -210,9 +274,17 @@ public class CssParser {
try {
value = Float.parseFloat(numbers);
} catch (NumberFormatException e) {
- System.out.printf("Did not parse a float from %s, proceeding with value 0.0...%n", numbers);
+ // System.out.printf("Did not parse a float from %s, proceeding with value 0.0...%n", numbers);
value = 0.0;
}
+ return convertUnits(units, value);
+ }
+
+ /**
+ * REQUIRES: a String that is a unit, otherwise defaults to pixels
+ * EFFECTS: converts a value in some units to a value in pixels
+ */
+ private static double convertUnits(String units, double value) {
// god case/break is such a fault-provoking design i hate it
// good thing we avoid breaks entirely here lmao
switch (units) {
@@ -226,7 +298,7 @@ public class CssParser {
case "in": return value * 96;
// not handled: % em ex ch rem lh rlh vw vh vmin vmax vb vi svw svh lvw lvh dvw dvh
default:
- System.out.printf("Unit %s not implemented, defaulting to %s in pixels...%n", units, value);
+ // System.out.printf("Unit %s not implemented, defaulting to %s in pixels...%n", units, value);
return value;
}
}
diff --git a/src/main/model/html/ElementNode.java b/src/main/model/html/ElementNode.java
index 1d427e8..a1ad90c 100644
--- a/src/main/model/html/ElementNode.java
+++ b/src/main/model/html/ElementNode.java
@@ -12,28 +12,42 @@ public class ElementNode implements Node {
private ArrayList children;
- public String getTag() {
- return this.tag;
- }
-
- public ArrayList getChildren() {
- return this.children;
- }
-
+ /**
+ * EFFECTS: Constructs a new ElementNode from the arguments provided.
+ * MODIFIES: this
+ */
public ElementNode(String tag, ArrayList> attributes, ArrayList children) {
this.tag = tag;
this.attributes = attributes;
this.children = children;
}
+ /**
+ * Overloads the constructor for ease of use. We often don't provide children, at first.
+ * EFFECTS: Constructs a new ElementNode from the arguments provided.
+ * MODIFIES: this
+ */
public ElementNode(String tag, ArrayList> attributes) {
this(tag, attributes, new ArrayList());
}
+ /**
+ * EFFECTS: Adds a child to the children ArrayList.
+ * MODIFIES: this
+ */
public void addChild(Node child) {
this.children.add(child);
}
+ public String getTag() {
+ return this.tag;
+ }
+
+ public ArrayList getChildren() {
+ return this.children;
+ }
+
+ // We implement this method for easy debugging.
public String getData() {
return getTag();
}
diff --git a/src/main/model/html/HtmlParser.java b/src/main/model/html/HtmlParser.java
index d6b4ff1..e9dc0c4 100644
--- a/src/main/model/html/HtmlParser.java
+++ b/src/main/model/html/HtmlParser.java
@@ -17,229 +17,317 @@ import org.javatuples.*;
*/
public class HtmlParser {
+ /**
+ * HTML is not nice to parse. We manage to get away with a relatively small number of parser states regardless.
+ */
private enum ParserState {
HTML, IGNORED,
- OPENING_TAG, KEY, VALUE,
- SINGLE_QUOTE, DOUBLE_QUOTE,
+ OPENING_TAG, KEY, VALUE, // TAG::OPENING_TAG, TAG::KEY, TAG::VALUE
+ SINGLE_QUOTES, DOUBLE_QUOTES, // VALUE::SINGLE_QUOTES, VALUE::DOUBLE_QUOTES
UNKNOWN_TAG, CLOSING_TAG,
}
- public static ArrayList parseHtmlLL(String input) {
+ // HTML documents are uniquely a list of Nodes rather than a Node themselves
+ private ArrayList result;
+ // a bunch of useful buffers. see CssParser for commentary.
+ private ArrayDeque unfinished;
+ private String currentTag;
+ private ArrayList> currentAttributes;
+ private String currentKey;
+ private String currentValue;
+ private String currentText;
+ // important for quote escapes, and multiple whitespace chars
+ private char previousChar;
- var result = new ArrayList();
- var unfinished = new ArrayDeque();
- var currentTag = "";
- var currentAttributes = new ArrayList>();
- var currentKey = "";
- var currentValue = "";
- var currentText = "";
- var previousChar = '\0'; // important for quote escapes, and multiple whitespace chars
+ private ParserState state;
+
+ public HtmlParser() {
+ result = new ArrayList<>();
+ unfinished = new ArrayDeque<>();
+ currentTag = "";
+ currentAttributes = new ArrayList<>();
+ currentKey = "";
+ currentValue = "";
+ currentText = "";
+ previousChar = '\0';
// We safely? assume to start outside of all nodes.
- ParserState state = ParserState.HTML;
+ state = ParserState.HTML;
+ }
+
+ public ArrayList parseHtml(String input) {
for (char c : input.toCharArray()) {
// System.out.print(state);
// System.out.println(" " + c + " " + currentText);
switch (state) {
- case HTML:
- switch (c) {
- case '<':
- state = ParserState.UNKNOWN_TAG;
- if (!currentText.equals("")) {
- if (unfinished.size() != 0) {
- unfinished.getLast().addChild(new TextNode(currentText));
- } else {
- result.add(new TextNode(currentText));
- }
- currentText = "";
- previousChar = '\0';
- }
- break; // FOOTGUN LANGUAGE DESIGN
- case ' ': case '\n':
- if (previousChar != ' ') {
- currentText += ' ';
- }
- previousChar = ' ';
- break;
- default:
- currentText += c;
- previousChar = c;
- break;
- }
+ case HTML: caseHtml(c);
break;
- case UNKNOWN_TAG:
- switch (c) {
- case '/':
- state = ParserState.CLOSING_TAG;
- break;
- case '>': // Why would you put <> in your HTML??? go away
- state = ParserState.HTML;
- currentText += "<>";
- System.out.println("Why would you put <> in your HTML??? go away");
- break;
- // For now, we'll straight-up ignore anything matching the syntax:
- // i.e. comments, and
- case '!':
- state = ParserState.IGNORED;
- break;
- default:
- state = ParserState.OPENING_TAG;
- currentTag += c;
- break;
- }
+ case UNKNOWN_TAG: caseUnknownTag(c);
break; // FOOTGUN LANGUAGE DESIGN STRIKES AGAIN
- case IGNORED:
- switch (c) {
- case '>':
- state = ParserState.HTML;
- break;
- default:
- break;
- }
+ case IGNORED: caseIgnored(c);
break;
- case OPENING_TAG:
- switch (c) {
- case '>':
- state = ParserState.HTML;
- var node = new ElementNode(currentTag, currentAttributes);
- if (unfinished.size() != 0) {
- unfinished.getLast().addChild(node);
- unfinished.add(node);
- } else {
- result.add(node);
- unfinished.add((ElementNode) result.get(result.size() - 1));
- }
- currentTag = "";
- currentAttributes = new ArrayList<>();
- break;
- case ' ': case '\n':
- state = ParserState.KEY;
- break;
- default:
- currentTag += c;
- break;
- }
+ case OPENING_TAG: caseOpeningTag(c);
break;
- case CLOSING_TAG:
- switch (c) {
- case '>':
- state = ParserState.HTML;
- // IMPORTANT: we don't validate that closing tags correspond to an open tag
- if (!isSelfClosingTag(currentTag)) {
- if (unfinished.size() != 0) {
- unfinished.removeLast();
- }
- }
- currentTag = "";
- break;
- case ' ': case '\n':
- break;
- default:
- currentTag += c;
- break;
- }
+ case CLOSING_TAG: caseClosingTag(c);
break;
- case KEY:
- switch (c) {
- case '>':
- state = ParserState.HTML;
- var node = new ElementNode(currentTag, currentAttributes);
- if (unfinished.size() != 0) {
- unfinished.getLast().addChild(node);
- unfinished.add(node);
- } else {
- result.add(node);
- unfinished.add((ElementNode) result.get(result.size() - 1));
- }
- currentTag = "";
- currentAttributes = new ArrayList<>();
- break;
- case '=':
- state = ParserState.VALUE;
- break;
- case ' ': case '\n':
- break;
- default:
- currentKey += c;
- break;
- }
+ case KEY: caseKey(c);
break;
- case VALUE:
- switch (c) {
- case '\'':
- state = ParserState.SINGLE_QUOTE;
- break;
- case '\"':
- state = ParserState.DOUBLE_QUOTE;
- break;
- case ' ': case '\n':
- currentAttributes.add(new Pair<>(currentKey, currentValue));
- currentKey = "";
- currentValue = "";
- case '>':
- if (!currentKey.equals("") || !currentValue.equals("")) {
- currentAttributes.add(new Pair<>(currentKey, currentValue));
- currentKey = "";
- currentValue = "";
- }
- state = ParserState.HTML;
- var node = new ElementNode(currentTag, currentAttributes);
- if (unfinished.size() != 0) {
- unfinished.getLast().addChild(node);
- unfinished.add(node);
- } else {
- result.add(node);
- unfinished.add((ElementNode) result.get(result.size() - 1));
- }
- currentTag = "";
- currentAttributes = new ArrayList<>();
- break;
- default:
- currentValue += c;
- break;
- }
+ case VALUE: caseValue(c);
break;
- case SINGLE_QUOTE:
- switch (c) {
- case '\'':
- if (previousChar != '\\') {
- state = ParserState.VALUE;
- previousChar = '\0';
- } else {
- currentValue = currentValue.substring(0, currentValue.length() - 2);
- currentValue += c;
- previousChar = c;
- }
- break;
- default:
- currentValue += c;
- previousChar = c;
- break;
- }
+ case SINGLE_QUOTES: caseSingleQuotes(c);
break;
- case DOUBLE_QUOTE:
- switch (c) {
- case '\"':
- if (previousChar != '\\') {
- state = ParserState.VALUE;
- previousChar = '\0';
- } else {
- currentValue = currentValue.substring(0, currentValue.length() - 2);
- currentValue += c;
- previousChar = c;
- }
- default:
- currentValue += c;
- previousChar = c;
- break;
- }
+ case DOUBLE_QUOTES: caseDoubleQuotes(c);
break;
}
}
return result;
}
+ /**
+ * EFFECTS: Handles and updates parser state/buffers for a single character while in the HTML state.
+ * MODIFIES: this
+ */
+ private void caseHtml(char c) {
+ switch (c) {
+ case '<':
+ state = ParserState.UNKNOWN_TAG;
+ if (!currentText.equals("")) {
+ addNewTextNode();
+ }
+ break; // FOOTGUN LANGUAGE DESIGN
+ case ' ': case '\n':
+ if (previousChar != ' ') {
+ currentText += ' ';
+ }
+ previousChar = ' ';
+ break;
+ default:
+ currentText += c;
+ previousChar = c;
+ break;
+ }
+ }
+
+ /**
+ * EFFECTS: Handles and updates parser state/buffers for a single character while in the UNKNOWN_TAG state.
+ * MODIFIES: this
+ */
+ private void caseUnknownTag(char c) {
+ switch (c) {
+ case '/':
+ state = ParserState.CLOSING_TAG;
+ break;
+ case '>': // Why would you put <> in your HTML??? go away
+ state = ParserState.HTML;
+ currentText += "<>";
+ break;
+ // For now, we'll straight-up ignore anything matching the syntax:
+ // i.e. comments, and
+ case '!':
+ state = ParserState.IGNORED;
+ break;
+ default:
+ state = ParserState.OPENING_TAG;
+ currentTag += c;
+ break;
+ }
+ }
+
+ /**
+ * EFFECTS: Handles and updates parser state/buffers for a single character while in the IGNORED state.
+ * MODIFIES: this
+ */
+ private void caseIgnored(char c) {
+ switch (c) {
+ case '>':
+ state = ParserState.HTML;
+ break;
+ default:
+ break;
+ }
+ }
+
+ /**
+ * EFFECTS: Handles and updates parser state/buffers for a single character while in the OPENING_TAG state.
+ * MODIFIES: this
+ */
+ private void caseOpeningTag(char c) {
+ switch (c) {
+ case '>':
+ addNewElementNode();
+ break;
+ case ' ': case '\n':
+ state = ParserState.KEY;
+ break;
+ default:
+ currentTag += c;
+ break;
+ }
+ }
+
+ /**
+ * EFFECTS: Handles and updates parser state/buffers for a single character while in the CLOSING_TAG state.
+ * MODIFIES: this
+ */
+ private void caseClosingTag(char c) {
+ switch (c) {
+ case '>':
+ state = ParserState.HTML;
+ // IMPORTANT: we don't validate that closing tags correspond to an open tag
+ if (!isSelfClosingTag(currentTag)) {
+ if (unfinished.size() != 0) {
+ unfinished.removeLast();
+ }
+ }
+ currentTag = "";
+ break;
+ case ' ': case '\n':
+ break;
+ default:
+ currentTag += c;
+ break;
+ }
+ }
+
+ /**
+ * EFFECTS: Handles and updates parser state/buffers for a single character while in the KEY state.
+ * MODIFIES: this
+ */
+ private void caseKey(char c) {
+ switch (c) {
+ case '>':
+ addNewElementNode();
+ break;
+ case '=':
+ state = ParserState.VALUE;
+ break;
+ case ' ': case '\n':
+ break;
+ default:
+ currentKey += c;
+ break;
+ }
+ }
+
+ /**
+ * EFFECTS: Handles and updates parser state/buffers for a single character while in the VALUE state.
+ * MODIFIES: this
+ */
+ private void caseValue(char c) {
+ switch (c) {
+ case '\'':
+ state = ParserState.SINGLE_QUOTES;
+ break;
+ case '\"':
+ state = ParserState.DOUBLE_QUOTES;
+ break;
+ case ' ': case '\n':
+ currentAttributes.add(new Pair<>(currentKey, currentValue));
+ currentKey = "";
+ currentValue = "";
+ case '>':
+ if (!currentKey.equals("") || !currentValue.equals("")) {
+ currentAttributes.add(new Pair<>(currentKey, currentValue));
+ currentKey = "";
+ currentValue = "";
+ }
+ addNewElementNode();
+ break;
+ default:
+ currentValue += c;
+ break;
+ }
+ }
+
+ /**
+ * EFFECTS: Handles and updates parser state/buffers for a single character while in the SINGLE_QUOTES state.
+ * MODIFIES: this
+ */
+ private void caseSingleQuotes(char c) {
+ switch (c) {
+ case '\'':
+ if (previousChar != '\\') {
+ state = ParserState.VALUE;
+ previousChar = '\0';
+ } else {
+ currentValue = currentValue.substring(0, currentValue.length() - 2);
+ currentValue += c;
+ previousChar = c;
+ }
+ break;
+ default:
+ currentValue += c;
+ previousChar = c;
+ break;
+ }
+ }
+
+ /**
+ * EFFECTS: Handles and updates parser state/buffers for a single character while in the DOUBLE_QUOTES state.
+ * MODIFIES: this
+ */
+ private void caseDoubleQuotes(char c) {
+ switch (c) {
+ case '\"':
+ if (previousChar != '\\') {
+ state = ParserState.VALUE;
+ previousChar = '\0';
+ } else {
+ currentValue = currentValue.substring(0, currentValue.length() - 2);
+ currentValue += c;
+ previousChar = c;
+ }
+ break; // FOOTGUN LANGUAGE DESIGN
+ default:
+ currentValue += c;
+ previousChar = c;
+ break;
+ }
+ }
+
+ /**
+ * Helper function to remove code duplication.
+ * EFFECTS: Creates and adds a new ElementNode from the current buffers to the unfinished and result stacks
+ * MODIFIES: this
+ */
+ private void addNewElementNode() {
+ state = ParserState.HTML;
+ var node = new ElementNode(currentTag, currentAttributes);
+ if (unfinished.size() != 0) {
+ unfinished.getLast().addChild(node);
+ if (!isSelfClosingTag(currentTag)) {
+ unfinished.add(node);
+ }
+ } else {
+ result.add(node);
+ if (!isSelfClosingTag(currentTag)) {
+ unfinished.add((ElementNode) result.get(result.size() - 1));
+ }
+ }
+ currentTag = "";
+ currentAttributes = new ArrayList<>();
+ }
+
+ /**
+ * Helper function to check method length boxes.
+ * EFFECTS: Creates and adds a new TextNode from the current buffers to the unfinished and result stacks
+ * MODIFIES: this
+ */
+ private void addNewTextNode() {
+ if (unfinished.size() != 0) {
+ unfinished.getLast().addChild(new TextNode(currentText));
+ } else {
+ result.add(new TextNode(currentText));
+ }
+ currentText = "";
+ previousChar = '\0';
+ }
+
+ /**
+ * Simple helper function to check if a tag is self-closing.
+ * EFFECTS: Returns whether a String tag is a self-closing tag.
+ */
private static boolean isSelfClosingTag(String tag) {
switch (tag) {
case "input": case "param":
@@ -281,7 +369,7 @@ public class HtmlParser {
-
+ qui officia deserunt mollit anim id est laborum.