aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/main/model/css/CssParser.java368
-rw-r--r--src/main/model/html/ElementNode.java30
-rw-r--r--src/main/model/html/HtmlParser.java490
-rw-r--r--src/main/model/html/TextNode.java13
-rw-r--r--src/main/model/util/Node.java6
-rw-r--r--src/test/model/CssParserTest.java3
-rw-r--r--src/test/model/HtmlParserTest.java17
7 files changed, 554 insertions, 373 deletions
diff --git a/src/main/model/css/CssParser.java b/src/main/model/css/CssParser.java
index 8d57bdc..25b6752 100644
--- a/src/main/model/css/CssParser.java
+++ b/src/main/model/css/CssParser.java
@@ -14,13 +14,6 @@ import java.util.*;
* ATTRIBUTE ::= 'color' | 'text' | ...
* VALUE ::= ??? idk lol
*/
-
-/**
- * This class assumes that it is getting _valid CSS_: that is, the style between two tags
- * of a style block, or the raw content of a .css file.
- * Making sure this assumption holds is extremely important for program robustness.
- * We do not check for validity, i.e. throw any exceptions - the driving principle of web standards is to "fail softly".
- */
public class CssParser {
/**
@@ -32,156 +25,60 @@ public class CssParser {
SINGLE_QUOTES, DOUBLE_QUOTES, // VALUE::SINGLE_QUOTES, VALUE::DOUBLE_QUOTES
}
+ // essentially the csstree type, only we don't need it to be a tree
+ private ArrayList<Pair<String, ArrayList<Pair<String, String>>>> result;
+ // a bunch of useful buffers: optimizations in the future could likely come from tweaking these
+ // note that i know nothing about data structure performance: but i'm pretty sure that Strings
+ // are _not_ the right tool for the job here, lol
+ private String currentSelector;
+ private ArrayList<Pair<String, String>> currentRule;
+ private String currentProperty;
+ private String currentValue;
+ // important for quote escapes
+ private char previousChar;
+
+ private ParserState state;
+
+ /// Initialize all buffers to default values
+ public CssParser() {
+ result = new ArrayList<>();
+ currentSelector = "";
+ currentRule = new ArrayList<>();
+ currentProperty = "";
+ currentValue = "";
+ previousChar = '\0';
+
+ // We safely assume to start by reading a selector.
+ state = ParserState.SELECTORS;
+ }
+
/**
- * Parses a (valid) CSS file in a left-to-right, leftmost-derivation style.
+ * Parses a (valid) CSS file in a left-to-right, leftmost-derivation style. No additional lookup is needed,
+ * however we do keep a previousChar value for dealing with (annoying) escaped quotes.
* It should be fast - I'd say something about time complexity if I knew anything about time complexity.
* No guarantees are made about invalid CSS files. Also, no guarantees are made about valid CSS files, lol.
+ * <br>
+ * REQUIRES: A valid CSS file, as a raw String.
+ * MODIFIES: this
+ * EFFECTS: Returns a parsed CSS representation as several nested ArrayLists and Pairs of Strings.
*/
- public static ArrayList<Pair<String, ArrayList<Pair<String, String>>>> parseLL(String input) {
-
- // parser buffers
- // essentially the CssTree type
- var result = new ArrayList<Pair<String, ArrayList<Pair<String, String>>>>();
- var currentSelector = "";
- var currentRule = new ArrayList<Pair<String, String>>();
- var currentProperty = "";
- var currentValue = "";
- var previousChar = '\0';
-
- // We safely assume to start by reading a selector.
- ParserState state = ParserState.SELECTORS;
+ public ArrayList<Pair<String, ArrayList<Pair<String, String>>>> parseCSS(String input) {
for (char c : input.toCharArray()) {
// System.out.print(state);
// System.out.println(" " + c);
switch (state) {
- case SELECTORS:
- switch (c) {
- case '@':
- if (currentSelector.equals("")) {
- state = ParserState.MEDIA_SELECTORS;
- } else {
- currentSelector += c;
- }
- break;
- case '{':
- state = ParserState.ATTRIBUTE;
- break;
- case ' ': case '\n':
- break;
- // todo: do better than blindly create a string; pattern match on css selectors
- default:
- currentSelector += c;
- break;
- }
+ case SELECTORS: caseSelectors(c);
break;
- case MEDIA_SELECTORS:
- switch (c) {
- // todo: don't entirely disregard media queries, also split between @media/@...
- case '{':
- state = ParserState.SELECTORS;
- // discard currentSelector
- currentSelector = "";
- break;
- default:
- currentSelector += c;
- break;
- }
+ case MEDIA_SELECTORS: caseMediaSelectors(c);
break;
- case ATTRIBUTE:
- switch (c) {
- case ':':
- state = ParserState.VALUE;
- break;
- case '}':
- state = ParserState.SELECTORS;
- if (!currentValue.equals("") || !currentProperty.equals("")) {
- System.out.println("something's wrong");
- currentProperty = "";
- currentValue = "";
- }
- result.add(new Pair<>(currentSelector, currentRule));
- System.out.println(currentRule);
- currentSelector = "";
- currentRule = new ArrayList<>();
- break;
- case ' ': case '\n':
- break;
- default:
- currentProperty += c;
- break;
- }
+ case ATTRIBUTE: caseAttribute(c);
break;
- case VALUE:
- switch (c) {
- case ';':
- state = ParserState.ATTRIBUTE;
- currentRule.add(new Pair<>(currentProperty, currentValue));
- currentProperty = "";
- currentValue = "";
- break;
- case '}':
- state = ParserState.SELECTORS;
- if (!currentValue.equals("") || !currentProperty.equals("")) {
- currentRule.add(new Pair<>(currentProperty, currentValue));
- currentProperty = "";
- currentValue = "";
- }
- result.add(new Pair<>(currentSelector, currentRule));
- currentSelector = "";
- currentRule = new ArrayList<>();
- break;
- case '\'':
- state = ParserState.SINGLE_QUOTES;
- currentValue += c;
- break;
- case '\"':
- state = ParserState.DOUBLE_QUOTES;
- currentValue += c;
- break;
- case ' ': case '\n':
- break;
- default:
- currentValue += c;
- break;
- }
+ case VALUE: caseValue(c);
break;
- // quotes in css are exclusively? for paths: so we want to include the quotes themselves
- case SINGLE_QUOTES:
- switch (c) {
- case '\'':
- if (previousChar != '\\') {
- state = ParserState.VALUE;
- currentValue += c;
- previousChar = '\0';
- } else {
- currentValue = currentValue.substring(0, currentValue.length() - 2);
- currentValue += c;
- previousChar = c;
- }
- break;
- default:
- currentValue += c;
- break;
- }
+ case SINGLE_QUOTES: caseSingleQuotes(c);
break;
- case DOUBLE_QUOTES:
- switch (c) {
- case '\"':
- if (previousChar != '\\') {
- state = ParserState.VALUE;
- currentValue += c;
- previousChar = '\0';
- } else {
- currentValue = currentValue.substring(0, currentValue.length() - 2);
- currentValue += c;
- previousChar = c;
- }
- break;
- default:
- currentValue += c;
- break;
- }
+ case DOUBLE_QUOTES: caseDoubleQuotes(c);
break;
}
}
@@ -189,12 +86,179 @@ public class CssParser {
}
/**
- * Takes an input string with units and returns out the value in pixels.
- * This is a fault-tolerant system.
+ * EFFECTS: Handles and updates parser state/buffers for a single character while in the SELECTORS state.
+ * See also: the (slightly wrong) context-free grammar commented at the start of this file.
+ * MODIFIES: this
+ */
+ private void caseSelectors(char c) {
+ switch (c) {
+ case '@':
+ if (currentSelector.equals("")) {
+ state = ParserState.MEDIA_SELECTORS;
+ } else {
+ currentSelector += c;
+ }
+ break;
+ case '{':
+ state = ParserState.ATTRIBUTE;
+ break;
+ case ' ': case '\n':
+ break;
+ // todo: do better than blindly create a string; pattern match on css selectors
+ default:
+ currentSelector += c;
+ break;
+ }
+ }
+
+ /**
+ * EFFECTS: Handles and updates parser state/buffers for a single character while in the MEDIA_SELECTORS state.
+ * MODIFIES: this
+ */
+ private void caseMediaSelectors(char c) {
+ switch (c) {
+ // todo: don't entirely disregard media queries, also split between @media/@...
+ case '{':
+ state = ParserState.SELECTORS;
+ // discard currentSelector
+ currentSelector = "";
+ break;
+ default:
+ currentSelector += c;
+ break;
+ }
+ }
+
+ /**
+ * EFFECTS: Handles and updates parser state/buffers for a single character while in the ATTRIBUTE state.
+ * MODIFIES: this
+ */
+ private void caseAttribute(char c) {
+ switch (c) {
+ case ':':
+ state = ParserState.VALUE;
+ break;
+ case '}':
+ state = ParserState.SELECTORS;
+ if (!currentValue.equals("") || !currentProperty.equals("")) {
+ // System.out.println("something's wrong");
+ currentProperty = "";
+ currentValue = "";
+ }
+ result.add(new Pair<>(currentSelector, currentRule));
+ currentSelector = "";
+ currentRule = new ArrayList<>();
+ break;
+ case ' ': case '\n':
+ break;
+ default:
+ currentProperty += c;
+ break;
+ }
+ }
+
+ /**
+ * EFFECTS: Handles and updates parser state/buffers for a single character while in the VALUE state.
+ * MODIFIES: this
+ */
+ private void caseValue(char c) {
+ switch (c) {
+ case ';':
+ state = ParserState.ATTRIBUTE;
+ updateCurrentRule();
+ break;
+ case '}':
+ state = ParserState.SELECTORS;
+ if (!currentValue.equals("") || !currentProperty.equals("")) {
+ updateCurrentRule();
+ }
+ result.add(new Pair<>(currentSelector, currentRule));
+ currentSelector = "";
+ currentRule = new ArrayList<>();
+ break;
+ // todo: handle spaces better: they're actually important inside values
+ case ' ': case '\n': break; // believe me, i think this is ugly too but it passes checkstyle
+ case '\'':
+ state = ParserState.SINGLE_QUOTES;
+ currentValue += c;
+ break;
+ // intentional use of TERRIBLE SMOKING FOOTGUN behavior to check boxes
+ case '\"': state = ParserState.DOUBLE_QUOTES;
+ default: currentValue += c;
+ break;
+ }
+ }
+
+ /**
+ * Helper function to check method length boxes.
+ * EFFECTS: Adds a new property to the current rule.
+ * MODIFIES: this
+ */
+ private void updateCurrentRule() {
+ currentRule.add(new Pair<>(currentProperty, currentValue));
+ currentProperty = "";
+ currentValue = "";
+ }
+
+ // todo: handle additional escaped characters, though what we have right now isn't bad
+
+ /**
+ * EFFECTS: Handles and updates parser state/buffers for a single character while in the SINGLE_QUOTES state.
+ * MODIFIES: this
+ */
+ private void caseSingleQuotes(char c) {
+ switch (c) {
+ case '\'':
+ if (previousChar != '\\') {
+ state = ParserState.VALUE;
+ // quotes in css are exclusively? for paths: so we want to include the quotes themselves
+ currentValue += c;
+ previousChar = '\0';
+ } else {
+ // possibly not the best way to handle this, may be better to keep the backslash
+ currentValue = currentValue.substring(0, currentValue.length() - 2);
+ currentValue += c;
+ previousChar = c;
+ }
+ break;
+ default:
+ currentValue += c;
+ break;
+ }
+ }
+
+ /**
+ * EFFECTS: Handles and updates parser state/buffers for a single character while in the DOUBLE_QUOTES state.
+ * MODIFIES: this
+ */
+ private void caseDoubleQuotes(char c) {
+ switch (c) {
+ case '\"':
+ if (previousChar != '\\') {
+ state = ParserState.VALUE;
+ currentValue += c;
+ previousChar = '\0';
+ } else {
+ currentValue = currentValue.substring(0, currentValue.length() - 2);
+ currentValue += c;
+ previousChar = c;
+ }
+ break;
+ default:
+ currentValue += c;
+ break;
+ }
+ }
+
+ /**
+ * Takes an input string with units and returns out the value in pixels. This is a fault-tolerant system.
* When given an invalid string (i.e. "12p53x"), it will produce an invalid result instead of throwing.
* However, it should parse every valid string correctly.
+ * <br>
+ * REQUIRES: A string of the form [NUMBER][VALIDUNIT]
+ * EFFECTS: Returns a number, in pixels, that has been converted appropriately
*/
- private double parseUnits(String input) {
+ private static double parseUnits(String input) {
String numbers = "";
String units = "";
// imagine making a language without iterable strings, fml
@@ -210,9 +274,17 @@ public class CssParser {
try {
value = Float.parseFloat(numbers);
} catch (NumberFormatException e) {
- System.out.printf("Did not parse a float from %s, proceeding with value 0.0...%n", numbers);
+ // System.out.printf("Did not parse a float from %s, proceeding with value 0.0...%n", numbers);
value = 0.0;
}
+ return convertUnits(units, value);
+ }
+
+ /**
+ * REQUIRES: a String that is a unit, otherwise defaults to pixels
+ * EFFECTS: converts a value in some units to a value in pixels
+ */
+ private static double convertUnits(String units, double value) {
// god case/break is such a fault-provoking design i hate it
// good thing we avoid breaks entirely here lmao
switch (units) {
@@ -226,7 +298,7 @@ public class CssParser {
case "in": return value * 96;
// not handled: % em ex ch rem lh rlh vw vh vmin vmax vb vi svw svh lvw lvh dvw dvh
default:
- System.out.printf("Unit %s not implemented, defaulting to %s in pixels...%n", units, value);
+ // System.out.printf("Unit %s not implemented, defaulting to %s in pixels...%n", units, value);
return value;
}
}
diff --git a/src/main/model/html/ElementNode.java b/src/main/model/html/ElementNode.java
index 1d427e8..a1ad90c 100644
--- a/src/main/model/html/ElementNode.java
+++ b/src/main/model/html/ElementNode.java
@@ -12,28 +12,42 @@ public class ElementNode implements Node {
private ArrayList<Node> children;
- public String getTag() {
- return this.tag;
- }
-
- public ArrayList<Node> getChildren() {
- return this.children;
- }
-
+ /**
+ * EFFECTS: Constructs a new ElementNode from the arguments provided.
+ * MODIFIES: this
+ */
public ElementNode(String tag, ArrayList<Pair<String, String>> attributes, ArrayList<Node> children) {
this.tag = tag;
this.attributes = attributes;
this.children = children;
}
+ /**
+ * Overloads the constructor for ease of use. We often don't provide children, at first.
+ * EFFECTS: Constructs a new ElementNode from the arguments provided.
+ * MODIFIES: this
+ */
public ElementNode(String tag, ArrayList<Pair<String, String>> attributes) {
this(tag, attributes, new ArrayList<Node>());
}
+ /**
+ * EFFECTS: Adds a child to the children ArrayList.
+ * MODIFIES: this
+ */
public void addChild(Node child) {
this.children.add(child);
}
+ public String getTag() {
+ return this.tag;
+ }
+
+ public ArrayList<Node> getChildren() {
+ return this.children;
+ }
+
+ // We implement this method for easy debugging.
public String getData() {
return getTag();
}
diff --git a/src/main/model/html/HtmlParser.java b/src/main/model/html/HtmlParser.java
index d6b4ff1..e9dc0c4 100644
--- a/src/main/model/html/HtmlParser.java
+++ b/src/main/model/html/HtmlParser.java
@@ -17,229 +17,317 @@ import org.javatuples.*;
*/
public class HtmlParser {
+ /**
+ * HTML is not nice to parse. We manage to get away with a relatively small number of parser states regardless.
+ */
private enum ParserState {
HTML, IGNORED,
- OPENING_TAG, KEY, VALUE,
- SINGLE_QUOTE, DOUBLE_QUOTE,
+ OPENING_TAG, KEY, VALUE, // TAG::OPENING_TAG, TAG::KEY, TAG::VALUE
+ SINGLE_QUOTES, DOUBLE_QUOTES, // VALUE::SINGLE_QUOTES, VALUE::DOUBLE_QUOTES
UNKNOWN_TAG, CLOSING_TAG,
}
- public static ArrayList<Node> parseHtmlLL(String input) {
+ // HTML documents are uniquely a list of Nodes rather than a Node themselves
+ private ArrayList<Node> result;
+ // a bunch of useful buffers. see CssParser for commentary.
+ private ArrayDeque<ElementNode> unfinished;
+ private String currentTag;
+ private ArrayList<Pair<String, String>> currentAttributes;
+ private String currentKey;
+ private String currentValue;
+ private String currentText;
+ // important for quote escapes, and multiple whitespace chars
+ private char previousChar;
- var result = new ArrayList<Node>();
- var unfinished = new ArrayDeque<ElementNode>();
- var currentTag = "";
- var currentAttributes = new ArrayList<Pair<String, String>>();
- var currentKey = "";
- var currentValue = "";
- var currentText = "";
- var previousChar = '\0'; // important for quote escapes, and multiple whitespace chars
+ private ParserState state;
+
+ public HtmlParser() {
+ result = new ArrayList<>();
+ unfinished = new ArrayDeque<>();
+ currentTag = "";
+ currentAttributes = new ArrayList<>();
+ currentKey = "";
+ currentValue = "";
+ currentText = "";
+ previousChar = '\0';
// We safely? assume to start outside of all nodes.
- ParserState state = ParserState.HTML;
+ state = ParserState.HTML;
+ }
+
+ public ArrayList<Node> parseHtml(String input) {
for (char c : input.toCharArray()) {
// System.out.print(state);
// System.out.println(" " + c + " " + currentText);
switch (state) {
- case HTML:
- switch (c) {
- case '<':
- state = ParserState.UNKNOWN_TAG;
- if (!currentText.equals("")) {
- if (unfinished.size() != 0) {
- unfinished.getLast().addChild(new TextNode(currentText));
- } else {
- result.add(new TextNode(currentText));
- }
- currentText = "";
- previousChar = '\0';
- }
- break; // FOOTGUN LANGUAGE DESIGN
- case ' ': case '\n':
- if (previousChar != ' ') {
- currentText += ' ';
- }
- previousChar = ' ';
- break;
- default:
- currentText += c;
- previousChar = c;
- break;
- }
+ case HTML: caseHtml(c);
break;
- case UNKNOWN_TAG:
- switch (c) {
- case '/':
- state = ParserState.CLOSING_TAG;
- break;
- case '>': // Why would you put <> in your HTML??? go away
- state = ParserState.HTML;
- currentText += "<>";
- System.out.println("Why would you put <> in your HTML??? go away");
- break;
- // For now, we'll straight-up ignore anything matching the <!...> syntax:
- // i.e. comments, and <!DOCTYPE html>
- case '!':
- state = ParserState.IGNORED;
- break;
- default:
- state = ParserState.OPENING_TAG;
- currentTag += c;
- break;
- }
+ case UNKNOWN_TAG: caseUnknownTag(c);
break; // FOOTGUN LANGUAGE DESIGN STRIKES AGAIN
- case IGNORED:
- switch (c) {
- case '>':
- state = ParserState.HTML;
- break;
- default:
- break;
- }
+ case IGNORED: caseIgnored(c);
break;
- case OPENING_TAG:
- switch (c) {
- case '>':
- state = ParserState.HTML;
- var node = new ElementNode(currentTag, currentAttributes);
- if (unfinished.size() != 0) {
- unfinished.getLast().addChild(node);
- unfinished.add(node);
- } else {
- result.add(node);
- unfinished.add((ElementNode) result.get(result.size() - 1));
- }
- currentTag = "";
- currentAttributes = new ArrayList<>();
- break;
- case ' ': case '\n':
- state = ParserState.KEY;
- break;
- default:
- currentTag += c;
- break;
- }
+ case OPENING_TAG: caseOpeningTag(c);
break;
- case CLOSING_TAG:
- switch (c) {
- case '>':
- state = ParserState.HTML;
- // IMPORTANT: we don't validate that closing tags correspond to an open tag
- if (!isSelfClosingTag(currentTag)) {
- if (unfinished.size() != 0) {
- unfinished.removeLast();
- }
- }
- currentTag = "";
- break;
- case ' ': case '\n':
- break;
- default:
- currentTag += c;
- break;
- }
+ case CLOSING_TAG: caseClosingTag(c);
break;
- case KEY:
- switch (c) {
- case '>':
- state = ParserState.HTML;
- var node = new ElementNode(currentTag, currentAttributes);
- if (unfinished.size() != 0) {
- unfinished.getLast().addChild(node);
- unfinished.add(node);
- } else {
- result.add(node);
- unfinished.add((ElementNode) result.get(result.size() - 1));
- }
- currentTag = "";
- currentAttributes = new ArrayList<>();
- break;
- case '=':
- state = ParserState.VALUE;
- break;
- case ' ': case '\n':
- break;
- default:
- currentKey += c;
- break;
- }
+ case KEY: caseKey(c);
break;
- case VALUE:
- switch (c) {
- case '\'':
- state = ParserState.SINGLE_QUOTE;
- break;
- case '\"':
- state = ParserState.DOUBLE_QUOTE;
- break;
- case ' ': case '\n':
- currentAttributes.add(new Pair<>(currentKey, currentValue));
- currentKey = "";
- currentValue = "";
- case '>':
- if (!currentKey.equals("") || !currentValue.equals("")) {
- currentAttributes.add(new Pair<>(currentKey, currentValue));
- currentKey = "";
- currentValue = "";
- }
- state = ParserState.HTML;
- var node = new ElementNode(currentTag, currentAttributes);
- if (unfinished.size() != 0) {
- unfinished.getLast().addChild(node);
- unfinished.add(node);
- } else {
- result.add(node);
- unfinished.add((ElementNode) result.get(result.size() - 1));
- }
- currentTag = "";
- currentAttributes = new ArrayList<>();
- break;
- default:
- currentValue += c;
- break;
- }
+ case VALUE: caseValue(c);
break;
- case SINGLE_QUOTE:
- switch (c) {
- case '\'':
- if (previousChar != '\\') {
- state = ParserState.VALUE;
- previousChar = '\0';
- } else {
- currentValue = currentValue.substring(0, currentValue.length() - 2);
- currentValue += c;
- previousChar = c;
- }
- break;
- default:
- currentValue += c;
- previousChar = c;
- break;
- }
+ case SINGLE_QUOTES: caseSingleQuotes(c);
break;
- case DOUBLE_QUOTE:
- switch (c) {
- case '\"':
- if (previousChar != '\\') {
- state = ParserState.VALUE;
- previousChar = '\0';
- } else {
- currentValue = currentValue.substring(0, currentValue.length() - 2);
- currentValue += c;
- previousChar = c;
- }
- default:
- currentValue += c;
- previousChar = c;
- break;
- }
+ case DOUBLE_QUOTES: caseDoubleQuotes(c);
break;
}
}
return result;
}
+ /**
+ * EFFECTS: Handles and updates parser state/buffers for a single character while in the HTML state.
+ * MODIFIES: this
+ */
+ private void caseHtml(char c) {
+ switch (c) {
+ case '<':
+ state = ParserState.UNKNOWN_TAG;
+ if (!currentText.equals("")) {
+ addNewTextNode();
+ }
+ break; // FOOTGUN LANGUAGE DESIGN
+ case ' ': case '\n':
+ if (previousChar != ' ') {
+ currentText += ' ';
+ }
+ previousChar = ' ';
+ break;
+ default:
+ currentText += c;
+ previousChar = c;
+ break;
+ }
+ }
+
+ /**
+ * EFFECTS: Handles and updates parser state/buffers for a single character while in the UNKNOWN_TAG state.
+ * MODIFIES: this
+ */
+ private void caseUnknownTag(char c) {
+ switch (c) {
+ case '/':
+ state = ParserState.CLOSING_TAG;
+ break;
+ case '>': // Why would you put <> in your HTML??? go away
+ state = ParserState.HTML;
+ currentText += "<>";
+ break;
+ // For now, we'll straight-up ignore anything matching the <!...> syntax:
+ // i.e. comments, and <!DOCTYPE html>
+ case '!':
+ state = ParserState.IGNORED;
+ break;
+ default:
+ state = ParserState.OPENING_TAG;
+ currentTag += c;
+ break;
+ }
+ }
+
+ /**
+ * EFFECTS: Handles and updates parser state/buffers for a single character while in the IGNORED state.
+ * MODIFIES: this
+ */
+ private void caseIgnored(char c) {
+ switch (c) {
+ case '>':
+ state = ParserState.HTML;
+ break;
+ default:
+ break;
+ }
+ }
+
+ /**
+ * EFFECTS: Handles and updates parser state/buffers for a single character while in the OPENING_TAG state.
+ * MODIFIES: this
+ */
+ private void caseOpeningTag(char c) {
+ switch (c) {
+ case '>':
+ addNewElementNode();
+ break;
+ case ' ': case '\n':
+ state = ParserState.KEY;
+ break;
+ default:
+ currentTag += c;
+ break;
+ }
+ }
+
+ /**
+ * EFFECTS: Handles and updates parser state/buffers for a single character while in the CLOSING_TAG state.
+ * MODIFIES: this
+ */
+ private void caseClosingTag(char c) {
+ switch (c) {
+ case '>':
+ state = ParserState.HTML;
+ // IMPORTANT: we don't validate that closing tags correspond to an open tag
+ if (!isSelfClosingTag(currentTag)) {
+ if (unfinished.size() != 0) {
+ unfinished.removeLast();
+ }
+ }
+ currentTag = "";
+ break;
+ case ' ': case '\n':
+ break;
+ default:
+ currentTag += c;
+ break;
+ }
+ }
+
+ /**
+ * EFFECTS: Handles and updates parser state/buffers for a single character while in the KEY state.
+ * MODIFIES: this
+ */
+ private void caseKey(char c) {
+ switch (c) {
+ case '>':
+ addNewElementNode();
+ break;
+ case '=':
+ state = ParserState.VALUE;
+ break;
+ case ' ': case '\n':
+ break;
+ default:
+ currentKey += c;
+ break;
+ }
+ }
+
+ /**
+ * EFFECTS: Handles and updates parser state/buffers for a single character while in the VALUE state.
+ * MODIFIES: this
+ */
+ private void caseValue(char c) {
+ switch (c) {
+ case '\'':
+ state = ParserState.SINGLE_QUOTES;
+ break;
+ case '\"':
+ state = ParserState.DOUBLE_QUOTES;
+ break;
+ case ' ': case '\n':
+ currentAttributes.add(new Pair<>(currentKey, currentValue));
+ currentKey = "";
+ currentValue = "";
+ case '>':
+ if (!currentKey.equals("") || !currentValue.equals("")) {
+ currentAttributes.add(new Pair<>(currentKey, currentValue));
+ currentKey = "";
+ currentValue = "";
+ }
+ addNewElementNode();
+ break;
+ default:
+ currentValue += c;
+ break;
+ }
+ }
+
+ /**
+ * EFFECTS: Handles and updates parser state/buffers for a single character while in the SINGLE_QUOTES state.
+ * MODIFIES: this
+ */
+ private void caseSingleQuotes(char c) {
+ switch (c) {
+ case '\'':
+ if (previousChar != '\\') {
+ state = ParserState.VALUE;
+ previousChar = '\0';
+ } else {
+ currentValue = currentValue.substring(0, currentValue.length() - 2);
+ currentValue += c;
+ previousChar = c;
+ }
+ break;
+ default:
+ currentValue += c;
+ previousChar = c;
+ break;
+ }
+ }
+
+ /**
+ * EFFECTS: Handles and updates parser state/buffers for a single character while in the DOUBLE_QUOTES state.
+ * MODIFIES: this
+ */
+ private void caseDoubleQuotes(char c) {
+ switch (c) {
+ case '\"':
+ if (previousChar != '\\') {
+ state = ParserState.VALUE;
+ previousChar = '\0';
+ } else {
+ currentValue = currentValue.substring(0, currentValue.length() - 2);
+ currentValue += c;
+ previousChar = c;
+ }
+ break; // FOOTGUN LANGUAGE DESIGN
+ default:
+ currentValue += c;
+ previousChar = c;
+ break;
+ }
+ }
+
+ /**
+ * Helper function to remove code duplication.
+ * EFFECTS: Creates and adds a new ElementNode from the current buffers to the unfinished and result stacks
+ * MODIFIES: this
+ */
+ private void addNewElementNode() {
+ state = ParserState.HTML;
+ var node = new ElementNode(currentTag, currentAttributes);
+ if (unfinished.size() != 0) {
+ unfinished.getLast().addChild(node);
+ if (!isSelfClosingTag(currentTag)) {
+ unfinished.add(node);
+ }
+ } else {
+ result.add(node);
+ if (!isSelfClosingTag(currentTag)) {
+ unfinished.add((ElementNode) result.get(result.size() - 1));
+ }
+ }
+ currentTag = "";
+ currentAttributes = new ArrayList<>();
+ }
+
+ /**
+ * Helper function to check method length boxes.
+ * EFFECTS: Creates and adds a new TextNode from the current buffers to the unfinished and result stacks
+ * MODIFIES: this
+ */
+ private void addNewTextNode() {
+ if (unfinished.size() != 0) {
+ unfinished.getLast().addChild(new TextNode(currentText));
+ } else {
+ result.add(new TextNode(currentText));
+ }
+ currentText = "";
+ previousChar = '\0';
+ }
+
+ /**
+ * Simple helper function to check if a tag is self-closing.
+ * EFFECTS: Returns whether a String tag is a self-closing tag.
+ */
private static boolean isSelfClosingTag(String tag) {
switch (tag) {
case "input": case "param":
@@ -281,7 +369,7 @@ public class HtmlParser {
<div id="intro">
<img id="face" src="assets/compass.jpg"/>
</div>
- <!-- <div id="details">
+ <div id="details">
<h2>Projects</h2>
<p> Lorem ipsum dolor sit amet, consectetur adipiscing elit,
sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.
@@ -291,7 +379,7 @@ public class HtmlParser {
dolore eu fugiat nulla pariatur.
Excepteur sint occaecat cupidatat non proident, sunt in culpa
qui officia deserunt mollit anim id est laborum. </p>
- <h2>Posts</h2>
+ <!-- <h2>Posts</h2>
<p> Lorem ipsum dolor sit amet, consectetur adipiscing elit,
sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.
Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris
@@ -299,14 +387,12 @@ public class HtmlParser {
Duis aute irure dolor in reprehenderit in voluptate velit esse cillum
dolore eu fugiat nulla pariatur.
Excepteur sint occaecat cupidatat non proident, sunt in culpa
- qui officia deserunt mollit anim id est laborum. </p>
- </div> -->
+ qui officia deserunt mollit anim id est laborum. </p> -->
+ </div>
</main>
<footer>
<span><img src="assets/copyleft.svg" width="12" height="12"/> 2020-2022 j-james </span>
</footer>
</body>
</html>
-<!--
-
*/
diff --git a/src/main/model/html/TextNode.java b/src/main/model/html/TextNode.java
index 634bf3b..cf0078b 100644
--- a/src/main/model/html/TextNode.java
+++ b/src/main/model/html/TextNode.java
@@ -5,14 +5,19 @@ import model.util.Node;
public class TextNode implements Node {
private String text = "";
- public String getText() {
- return this.text;
- }
-
+ /**
+ * EFFECTS: Creates a new TextNode from the provided String value.
+ * MODIFIES: this
+ */
public TextNode(String text) {
this.text = text;
}
+ public String getText() {
+ return this.text;
+ }
+
+ // We implement this method for easy debugging.
public String getData() {
return getText();
}
diff --git a/src/main/model/util/Node.java b/src/main/model/util/Node.java
index 010a2da..619404f 100644
--- a/src/main/model/util/Node.java
+++ b/src/main/model/util/Node.java
@@ -1,10 +1,10 @@
package model.util;
/**
- * yeah there's literally nothing here
- * i just need to establish that ElementNode and TextNode both inherit from Node
+ * yeah there's nothing here
+ * I just need to establish the inheritance relation of ElementNode and TextNode
*/
public interface Node {
- // Return a representation of the Node
+ // Return a representation of the Node. Useful for debugging.
public String getData();
}
diff --git a/src/test/model/CssParserTest.java b/src/test/model/CssParserTest.java
index 2852da5..fa395f6 100644
--- a/src/test/model/CssParserTest.java
+++ b/src/test/model/CssParserTest.java
@@ -10,6 +10,7 @@ public class CssParserTest {
@Test
void testIdiomaticCss() {
var idiomaticCss = "body { background-color: #f0f0f2; margin: 0; padding: 0; font-family: -apple-system, system-ui, BlinkMacSystemFont, \"Segoe UI\", \"Open Sans\", \"Helvetica Neue\", Helvetica, Arial, sans-serif;}div { width: 600px; margin: 5em auto; padding: 2em; background-color: #fdfdff; border-radius: 0.5em; box-shadow: 2px 3px 7px 2px rgba(0,0,0,0.02);}a:link, a:visited { color: #38488f; text-decoration: none;}@media (max - width : 700px) { div { margin: 0 auto; width: auto; }}";
- System.out.println(CssParser.parseLL(idiomaticCss));
+ CssParser parser = new CssParser();
+ System.out.println(parser.parseCSS(idiomaticCss));
}
}
diff --git a/src/test/model/HtmlParserTest.java b/src/test/model/HtmlParserTest.java
index 4b05cfb..58b4555 100644
--- a/src/test/model/HtmlParserTest.java
+++ b/src/test/model/HtmlParserTest.java
@@ -7,8 +7,6 @@ import org.junit.jupiter.api.Test;
import java.util.*;
-import static org.junit.jupiter.api.Assertions.*;
-
public class HtmlParserTest {
String idiomaticHtml = "<!DOCTYPE html><html><head></head><body><p>Hello, world!</p></body></html>";
@@ -18,26 +16,31 @@ public class HtmlParserTest {
@Test
void testIdiomaticHtml() {
String[] idiomaticHtmlArray = {"<!DOCTYPE html>","<html>","<head>","</head>","<body>","<p>","Hello,world!","</p>","</body>","</html>"};
- var parsedHtml = HtmlParser.parseHtmlLL(idiomaticHtml);
- displayHtmlTree(parsedHtml);
- System.out.println(HtmlParser.parseHtmlLL(idiomaticHtml));
+ HtmlParser parser = new HtmlParser();
+ displayHtmlTree(parser.parseHtml(idiomaticHtml));
// assertEquals(HtmlParser.parseHtmlLL(idiomaticHtml), Arrays.asList(idiomaticHtmlArray));
}
@Test
void testBrokenHtml() {
String[] brokenHtmlArray = {"<html>","<foo>","<bar>","</bar>","<ba>"};
- System.out.println(HtmlParser.parseHtmlLL(brokenHtml));
+ HtmlParser parser = new HtmlParser();
+ displayHtmlTree(parser.parseHtml(brokenHtml));
// assertEquals(HtmlParser.parseHtmlLL(brokenHtml), Arrays.asList(brokenHtmlArray));
}
@Test
void testTrailingTextHtml() {
String[] trailingTextHtmlArray = {"<html>","<foo>","<bar>","</bar>","ba"};
- System.out.println(HtmlParser.parseHtmlLL(trailingTextHtml));
+ HtmlParser parser = new HtmlParser();
+ displayHtmlTree(parser.parseHtml(trailingTextHtml));
// assertEquals(HtmlParser.parseHtmlLL(trailingTextHtml), Arrays.asList(trailingTextHtmlArray));
}
+ /**
+ * Simple helper function for debugging.
+ * EFFECTS: prints a representation of the tree to the console for debugging purposes
+ */
private void displayHtmlTree(ArrayList<Node> tree) {
for (Node node : tree) {
if (node instanceof ElementNode) {