From bfa72127cf120f0e98410b45a043b95ad522b729 Mon Sep 17 00:00:00 2001 From: j-james Date: Mon, 17 Oct 2022 06:54:35 -0700 Subject: Sweeping refactor to check CheckStyle boxes --- src/main/model/css/CssParser.java | 368 +++++++++++++++----------- src/main/model/html/ElementNode.java | 30 ++- src/main/model/html/HtmlParser.java | 490 ++++++++++++++++++++--------------- src/main/model/html/TextNode.java | 13 +- src/main/model/util/Node.java | 6 +- src/test/model/CssParserTest.java | 3 +- src/test/model/HtmlParserTest.java | 17 +- 7 files changed, 554 insertions(+), 373 deletions(-) diff --git a/src/main/model/css/CssParser.java b/src/main/model/css/CssParser.java index 8d57bdc..25b6752 100644 --- a/src/main/model/css/CssParser.java +++ b/src/main/model/css/CssParser.java @@ -14,13 +14,6 @@ import java.util.*; * ATTRIBUTE ::= 'color' | 'text' | ... * VALUE ::= ??? idk lol */ - -/** - * This class assumes that it is getting _valid CSS_: that is, the style between two tags - * of a style block, or the raw content of a .css file. - * Making sure this assumption holds is extremely important for program robustness. - * We do not check for validity, i.e. throw any exceptions - the driving principle of web standards is to "fail softly". - */ public class CssParser { /** @@ -32,156 +25,60 @@ public class CssParser { SINGLE_QUOTES, DOUBLE_QUOTES, // VALUE::SINGLE_QUOTES, VALUE::DOUBLE_QUOTES } + // essentially the csstree type, only we don't need it to be a tree + private ArrayList>>> result; + // a bunch of useful buffers: optimizations in the future could likely come from tweaking these + // note that i know nothing about data structure performance: but i'm pretty sure that Strings + // are _not_ the right tool for the job here, lol + private String currentSelector; + private ArrayList> currentRule; + private String currentProperty; + private String currentValue; + // important for quote escapes + private char previousChar; + + private ParserState state; + + /// Initialize all buffers to default values + public CssParser() { + result = new ArrayList<>(); + currentSelector = ""; + currentRule = new ArrayList<>(); + currentProperty = ""; + currentValue = ""; + previousChar = '\0'; + + // We safely assume to start by reading a selector. + state = ParserState.SELECTORS; + } + /** - * Parses a (valid) CSS file in a left-to-right, leftmost-derivation style. + * Parses a (valid) CSS file in a left-to-right, leftmost-derivation style. No additional lookup is needed, + * however we do keep a previousChar value for dealing with (annoying) escaped quotes. * It should be fast - I'd say something about time complexity if I knew anything about time complexity. * No guarantees are made about invalid CSS files. Also, no guarantees are made about valid CSS files, lol. + *
+ * REQUIRES: A valid CSS file, as a raw String. + * MODIFIES: this + * EFFECTS: Returns a parsed CSS representation as several nested ArrayLists and Pairs of Strings. */ - public static ArrayList>>> parseLL(String input) { - - // parser buffers - // essentially the CssTree type - var result = new ArrayList>>>(); - var currentSelector = ""; - var currentRule = new ArrayList>(); - var currentProperty = ""; - var currentValue = ""; - var previousChar = '\0'; - - // We safely assume to start by reading a selector. - ParserState state = ParserState.SELECTORS; + public ArrayList>>> parseCSS(String input) { for (char c : input.toCharArray()) { // System.out.print(state); // System.out.println(" " + c); switch (state) { - case SELECTORS: - switch (c) { - case '@': - if (currentSelector.equals("")) { - state = ParserState.MEDIA_SELECTORS; - } else { - currentSelector += c; - } - break; - case '{': - state = ParserState.ATTRIBUTE; - break; - case ' ': case '\n': - break; - // todo: do better than blindly create a string; pattern match on css selectors - default: - currentSelector += c; - break; - } + case SELECTORS: caseSelectors(c); break; - case MEDIA_SELECTORS: - switch (c) { - // todo: don't entirely disregard media queries, also split between @media/@... - case '{': - state = ParserState.SELECTORS; - // discard currentSelector - currentSelector = ""; - break; - default: - currentSelector += c; - break; - } + case MEDIA_SELECTORS: caseMediaSelectors(c); break; - case ATTRIBUTE: - switch (c) { - case ':': - state = ParserState.VALUE; - break; - case '}': - state = ParserState.SELECTORS; - if (!currentValue.equals("") || !currentProperty.equals("")) { - System.out.println("something's wrong"); - currentProperty = ""; - currentValue = ""; - } - result.add(new Pair<>(currentSelector, currentRule)); - System.out.println(currentRule); - currentSelector = ""; - currentRule = new ArrayList<>(); - break; - case ' ': case '\n': - break; - default: - currentProperty += c; - break; - } + case ATTRIBUTE: caseAttribute(c); break; - case VALUE: - switch (c) { - case ';': - state = ParserState.ATTRIBUTE; - currentRule.add(new Pair<>(currentProperty, currentValue)); - currentProperty = ""; - currentValue = ""; - break; - case '}': - state = ParserState.SELECTORS; - if (!currentValue.equals("") || !currentProperty.equals("")) { - currentRule.add(new Pair<>(currentProperty, currentValue)); - currentProperty = ""; - currentValue = ""; - } - result.add(new Pair<>(currentSelector, currentRule)); - currentSelector = ""; - currentRule = new ArrayList<>(); - break; - case '\'': - state = ParserState.SINGLE_QUOTES; - currentValue += c; - break; - case '\"': - state = ParserState.DOUBLE_QUOTES; - currentValue += c; - break; - case ' ': case '\n': - break; - default: - currentValue += c; - break; - } + case VALUE: caseValue(c); break; - // quotes in css are exclusively? for paths: so we want to include the quotes themselves - case SINGLE_QUOTES: - switch (c) { - case '\'': - if (previousChar != '\\') { - state = ParserState.VALUE; - currentValue += c; - previousChar = '\0'; - } else { - currentValue = currentValue.substring(0, currentValue.length() - 2); - currentValue += c; - previousChar = c; - } - break; - default: - currentValue += c; - break; - } + case SINGLE_QUOTES: caseSingleQuotes(c); break; - case DOUBLE_QUOTES: - switch (c) { - case '\"': - if (previousChar != '\\') { - state = ParserState.VALUE; - currentValue += c; - previousChar = '\0'; - } else { - currentValue = currentValue.substring(0, currentValue.length() - 2); - currentValue += c; - previousChar = c; - } - break; - default: - currentValue += c; - break; - } + case DOUBLE_QUOTES: caseDoubleQuotes(c); break; } } @@ -189,12 +86,179 @@ public class CssParser { } /** - * Takes an input string with units and returns out the value in pixels. - * This is a fault-tolerant system. + * EFFECTS: Handles and updates parser state/buffers for a single character while in the SELECTORS state. + * See also: the (slightly wrong) context-free grammar commented at the start of this file. + * MODIFIES: this + */ + private void caseSelectors(char c) { + switch (c) { + case '@': + if (currentSelector.equals("")) { + state = ParserState.MEDIA_SELECTORS; + } else { + currentSelector += c; + } + break; + case '{': + state = ParserState.ATTRIBUTE; + break; + case ' ': case '\n': + break; + // todo: do better than blindly create a string; pattern match on css selectors + default: + currentSelector += c; + break; + } + } + + /** + * EFFECTS: Handles and updates parser state/buffers for a single character while in the MEDIA_SELECTORS state. + * MODIFIES: this + */ + private void caseMediaSelectors(char c) { + switch (c) { + // todo: don't entirely disregard media queries, also split between @media/@... + case '{': + state = ParserState.SELECTORS; + // discard currentSelector + currentSelector = ""; + break; + default: + currentSelector += c; + break; + } + } + + /** + * EFFECTS: Handles and updates parser state/buffers for a single character while in the ATTRIBUTE state. + * MODIFIES: this + */ + private void caseAttribute(char c) { + switch (c) { + case ':': + state = ParserState.VALUE; + break; + case '}': + state = ParserState.SELECTORS; + if (!currentValue.equals("") || !currentProperty.equals("")) { + // System.out.println("something's wrong"); + currentProperty = ""; + currentValue = ""; + } + result.add(new Pair<>(currentSelector, currentRule)); + currentSelector = ""; + currentRule = new ArrayList<>(); + break; + case ' ': case '\n': + break; + default: + currentProperty += c; + break; + } + } + + /** + * EFFECTS: Handles and updates parser state/buffers for a single character while in the VALUE state. + * MODIFIES: this + */ + private void caseValue(char c) { + switch (c) { + case ';': + state = ParserState.ATTRIBUTE; + updateCurrentRule(); + break; + case '}': + state = ParserState.SELECTORS; + if (!currentValue.equals("") || !currentProperty.equals("")) { + updateCurrentRule(); + } + result.add(new Pair<>(currentSelector, currentRule)); + currentSelector = ""; + currentRule = new ArrayList<>(); + break; + // todo: handle spaces better: they're actually important inside values + case ' ': case '\n': break; // believe me, i think this is ugly too but it passes checkstyle + case '\'': + state = ParserState.SINGLE_QUOTES; + currentValue += c; + break; + // intentional use of TERRIBLE SMOKING FOOTGUN behavior to check boxes + case '\"': state = ParserState.DOUBLE_QUOTES; + default: currentValue += c; + break; + } + } + + /** + * Helper function to check method length boxes. + * EFFECTS: Adds a new property to the current rule. + * MODIFIES: this + */ + private void updateCurrentRule() { + currentRule.add(new Pair<>(currentProperty, currentValue)); + currentProperty = ""; + currentValue = ""; + } + + // todo: handle additional escaped characters, though what we have right now isn't bad + + /** + * EFFECTS: Handles and updates parser state/buffers for a single character while in the SINGLE_QUOTES state. + * MODIFIES: this + */ + private void caseSingleQuotes(char c) { + switch (c) { + case '\'': + if (previousChar != '\\') { + state = ParserState.VALUE; + // quotes in css are exclusively? for paths: so we want to include the quotes themselves + currentValue += c; + previousChar = '\0'; + } else { + // possibly not the best way to handle this, may be better to keep the backslash + currentValue = currentValue.substring(0, currentValue.length() - 2); + currentValue += c; + previousChar = c; + } + break; + default: + currentValue += c; + break; + } + } + + /** + * EFFECTS: Handles and updates parser state/buffers for a single character while in the DOUBLE_QUOTES state. + * MODIFIES: this + */ + private void caseDoubleQuotes(char c) { + switch (c) { + case '\"': + if (previousChar != '\\') { + state = ParserState.VALUE; + currentValue += c; + previousChar = '\0'; + } else { + currentValue = currentValue.substring(0, currentValue.length() - 2); + currentValue += c; + previousChar = c; + } + break; + default: + currentValue += c; + break; + } + } + + /** + * Takes an input string with units and returns out the value in pixels. This is a fault-tolerant system. * When given an invalid string (i.e. "12p53x"), it will produce an invalid result instead of throwing. * However, it should parse every valid string correctly. + *
+ * REQUIRES: A string of the form [NUMBER][VALIDUNIT] + * EFFECTS: Returns a number, in pixels, that has been converted appropriately */ - private double parseUnits(String input) { + private static double parseUnits(String input) { String numbers = ""; String units = ""; // imagine making a language without iterable strings, fml @@ -210,9 +274,17 @@ public class CssParser { try { value = Float.parseFloat(numbers); } catch (NumberFormatException e) { - System.out.printf("Did not parse a float from %s, proceeding with value 0.0...%n", numbers); + // System.out.printf("Did not parse a float from %s, proceeding with value 0.0...%n", numbers); value = 0.0; } + return convertUnits(units, value); + } + + /** + * REQUIRES: a String that is a unit, otherwise defaults to pixels + * EFFECTS: converts a value in some units to a value in pixels + */ + private static double convertUnits(String units, double value) { // god case/break is such a fault-provoking design i hate it // good thing we avoid breaks entirely here lmao switch (units) { @@ -226,7 +298,7 @@ public class CssParser { case "in": return value * 96; // not handled: % em ex ch rem lh rlh vw vh vmin vmax vb vi svw svh lvw lvh dvw dvh default: - System.out.printf("Unit %s not implemented, defaulting to %s in pixels...%n", units, value); + // System.out.printf("Unit %s not implemented, defaulting to %s in pixels...%n", units, value); return value; } } diff --git a/src/main/model/html/ElementNode.java b/src/main/model/html/ElementNode.java index 1d427e8..a1ad90c 100644 --- a/src/main/model/html/ElementNode.java +++ b/src/main/model/html/ElementNode.java @@ -12,28 +12,42 @@ public class ElementNode implements Node { private ArrayList children; - public String getTag() { - return this.tag; - } - - public ArrayList getChildren() { - return this.children; - } - + /** + * EFFECTS: Constructs a new ElementNode from the arguments provided. + * MODIFIES: this + */ public ElementNode(String tag, ArrayList> attributes, ArrayList children) { this.tag = tag; this.attributes = attributes; this.children = children; } + /** + * Overloads the constructor for ease of use. We often don't provide children, at first. + * EFFECTS: Constructs a new ElementNode from the arguments provided. + * MODIFIES: this + */ public ElementNode(String tag, ArrayList> attributes) { this(tag, attributes, new ArrayList()); } + /** + * EFFECTS: Adds a child to the children ArrayList. + * MODIFIES: this + */ public void addChild(Node child) { this.children.add(child); } + public String getTag() { + return this.tag; + } + + public ArrayList getChildren() { + return this.children; + } + + // We implement this method for easy debugging. public String getData() { return getTag(); } diff --git a/src/main/model/html/HtmlParser.java b/src/main/model/html/HtmlParser.java index d6b4ff1..e9dc0c4 100644 --- a/src/main/model/html/HtmlParser.java +++ b/src/main/model/html/HtmlParser.java @@ -17,229 +17,317 @@ import org.javatuples.*; */ public class HtmlParser { + /** + * HTML is not nice to parse. We manage to get away with a relatively small number of parser states regardless. + */ private enum ParserState { HTML, IGNORED, - OPENING_TAG, KEY, VALUE, - SINGLE_QUOTE, DOUBLE_QUOTE, + OPENING_TAG, KEY, VALUE, // TAG::OPENING_TAG, TAG::KEY, TAG::VALUE + SINGLE_QUOTES, DOUBLE_QUOTES, // VALUE::SINGLE_QUOTES, VALUE::DOUBLE_QUOTES UNKNOWN_TAG, CLOSING_TAG, } - public static ArrayList parseHtmlLL(String input) { + // HTML documents are uniquely a list of Nodes rather than a Node themselves + private ArrayList result; + // a bunch of useful buffers. see CssParser for commentary. + private ArrayDeque unfinished; + private String currentTag; + private ArrayList> currentAttributes; + private String currentKey; + private String currentValue; + private String currentText; + // important for quote escapes, and multiple whitespace chars + private char previousChar; - var result = new ArrayList(); - var unfinished = new ArrayDeque(); - var currentTag = ""; - var currentAttributes = new ArrayList>(); - var currentKey = ""; - var currentValue = ""; - var currentText = ""; - var previousChar = '\0'; // important for quote escapes, and multiple whitespace chars + private ParserState state; + + public HtmlParser() { + result = new ArrayList<>(); + unfinished = new ArrayDeque<>(); + currentTag = ""; + currentAttributes = new ArrayList<>(); + currentKey = ""; + currentValue = ""; + currentText = ""; + previousChar = '\0'; // We safely? assume to start outside of all nodes. - ParserState state = ParserState.HTML; + state = ParserState.HTML; + } + + public ArrayList parseHtml(String input) { for (char c : input.toCharArray()) { // System.out.print(state); // System.out.println(" " + c + " " + currentText); switch (state) { - case HTML: - switch (c) { - case '<': - state = ParserState.UNKNOWN_TAG; - if (!currentText.equals("")) { - if (unfinished.size() != 0) { - unfinished.getLast().addChild(new TextNode(currentText)); - } else { - result.add(new TextNode(currentText)); - } - currentText = ""; - previousChar = '\0'; - } - break; // FOOTGUN LANGUAGE DESIGN - case ' ': case '\n': - if (previousChar != ' ') { - currentText += ' '; - } - previousChar = ' '; - break; - default: - currentText += c; - previousChar = c; - break; - } + case HTML: caseHtml(c); break; - case UNKNOWN_TAG: - switch (c) { - case '/': - state = ParserState.CLOSING_TAG; - break; - case '>': // Why would you put <> in your HTML??? go away - state = ParserState.HTML; - currentText += "<>"; - System.out.println("Why would you put <> in your HTML??? go away"); - break; - // For now, we'll straight-up ignore anything matching the syntax: - // i.e. comments, and - case '!': - state = ParserState.IGNORED; - break; - default: - state = ParserState.OPENING_TAG; - currentTag += c; - break; - } + case UNKNOWN_TAG: caseUnknownTag(c); break; // FOOTGUN LANGUAGE DESIGN STRIKES AGAIN - case IGNORED: - switch (c) { - case '>': - state = ParserState.HTML; - break; - default: - break; - } + case IGNORED: caseIgnored(c); break; - case OPENING_TAG: - switch (c) { - case '>': - state = ParserState.HTML; - var node = new ElementNode(currentTag, currentAttributes); - if (unfinished.size() != 0) { - unfinished.getLast().addChild(node); - unfinished.add(node); - } else { - result.add(node); - unfinished.add((ElementNode) result.get(result.size() - 1)); - } - currentTag = ""; - currentAttributes = new ArrayList<>(); - break; - case ' ': case '\n': - state = ParserState.KEY; - break; - default: - currentTag += c; - break; - } + case OPENING_TAG: caseOpeningTag(c); break; - case CLOSING_TAG: - switch (c) { - case '>': - state = ParserState.HTML; - // IMPORTANT: we don't validate that closing tags correspond to an open tag - if (!isSelfClosingTag(currentTag)) { - if (unfinished.size() != 0) { - unfinished.removeLast(); - } - } - currentTag = ""; - break; - case ' ': case '\n': - break; - default: - currentTag += c; - break; - } + case CLOSING_TAG: caseClosingTag(c); break; - case KEY: - switch (c) { - case '>': - state = ParserState.HTML; - var node = new ElementNode(currentTag, currentAttributes); - if (unfinished.size() != 0) { - unfinished.getLast().addChild(node); - unfinished.add(node); - } else { - result.add(node); - unfinished.add((ElementNode) result.get(result.size() - 1)); - } - currentTag = ""; - currentAttributes = new ArrayList<>(); - break; - case '=': - state = ParserState.VALUE; - break; - case ' ': case '\n': - break; - default: - currentKey += c; - break; - } + case KEY: caseKey(c); break; - case VALUE: - switch (c) { - case '\'': - state = ParserState.SINGLE_QUOTE; - break; - case '\"': - state = ParserState.DOUBLE_QUOTE; - break; - case ' ': case '\n': - currentAttributes.add(new Pair<>(currentKey, currentValue)); - currentKey = ""; - currentValue = ""; - case '>': - if (!currentKey.equals("") || !currentValue.equals("")) { - currentAttributes.add(new Pair<>(currentKey, currentValue)); - currentKey = ""; - currentValue = ""; - } - state = ParserState.HTML; - var node = new ElementNode(currentTag, currentAttributes); - if (unfinished.size() != 0) { - unfinished.getLast().addChild(node); - unfinished.add(node); - } else { - result.add(node); - unfinished.add((ElementNode) result.get(result.size() - 1)); - } - currentTag = ""; - currentAttributes = new ArrayList<>(); - break; - default: - currentValue += c; - break; - } + case VALUE: caseValue(c); break; - case SINGLE_QUOTE: - switch (c) { - case '\'': - if (previousChar != '\\') { - state = ParserState.VALUE; - previousChar = '\0'; - } else { - currentValue = currentValue.substring(0, currentValue.length() - 2); - currentValue += c; - previousChar = c; - } - break; - default: - currentValue += c; - previousChar = c; - break; - } + case SINGLE_QUOTES: caseSingleQuotes(c); break; - case DOUBLE_QUOTE: - switch (c) { - case '\"': - if (previousChar != '\\') { - state = ParserState.VALUE; - previousChar = '\0'; - } else { - currentValue = currentValue.substring(0, currentValue.length() - 2); - currentValue += c; - previousChar = c; - } - default: - currentValue += c; - previousChar = c; - break; - } + case DOUBLE_QUOTES: caseDoubleQuotes(c); break; } } return result; } + /** + * EFFECTS: Handles and updates parser state/buffers for a single character while in the HTML state. + * MODIFIES: this + */ + private void caseHtml(char c) { + switch (c) { + case '<': + state = ParserState.UNKNOWN_TAG; + if (!currentText.equals("")) { + addNewTextNode(); + } + break; // FOOTGUN LANGUAGE DESIGN + case ' ': case '\n': + if (previousChar != ' ') { + currentText += ' '; + } + previousChar = ' '; + break; + default: + currentText += c; + previousChar = c; + break; + } + } + + /** + * EFFECTS: Handles and updates parser state/buffers for a single character while in the UNKNOWN_TAG state. + * MODIFIES: this + */ + private void caseUnknownTag(char c) { + switch (c) { + case '/': + state = ParserState.CLOSING_TAG; + break; + case '>': // Why would you put <> in your HTML??? go away + state = ParserState.HTML; + currentText += "<>"; + break; + // For now, we'll straight-up ignore anything matching the syntax: + // i.e. comments, and + case '!': + state = ParserState.IGNORED; + break; + default: + state = ParserState.OPENING_TAG; + currentTag += c; + break; + } + } + + /** + * EFFECTS: Handles and updates parser state/buffers for a single character while in the IGNORED state. + * MODIFIES: this + */ + private void caseIgnored(char c) { + switch (c) { + case '>': + state = ParserState.HTML; + break; + default: + break; + } + } + + /** + * EFFECTS: Handles and updates parser state/buffers for a single character while in the OPENING_TAG state. + * MODIFIES: this + */ + private void caseOpeningTag(char c) { + switch (c) { + case '>': + addNewElementNode(); + break; + case ' ': case '\n': + state = ParserState.KEY; + break; + default: + currentTag += c; + break; + } + } + + /** + * EFFECTS: Handles and updates parser state/buffers for a single character while in the CLOSING_TAG state. + * MODIFIES: this + */ + private void caseClosingTag(char c) { + switch (c) { + case '>': + state = ParserState.HTML; + // IMPORTANT: we don't validate that closing tags correspond to an open tag + if (!isSelfClosingTag(currentTag)) { + if (unfinished.size() != 0) { + unfinished.removeLast(); + } + } + currentTag = ""; + break; + case ' ': case '\n': + break; + default: + currentTag += c; + break; + } + } + + /** + * EFFECTS: Handles and updates parser state/buffers for a single character while in the KEY state. + * MODIFIES: this + */ + private void caseKey(char c) { + switch (c) { + case '>': + addNewElementNode(); + break; + case '=': + state = ParserState.VALUE; + break; + case ' ': case '\n': + break; + default: + currentKey += c; + break; + } + } + + /** + * EFFECTS: Handles and updates parser state/buffers for a single character while in the VALUE state. + * MODIFIES: this + */ + private void caseValue(char c) { + switch (c) { + case '\'': + state = ParserState.SINGLE_QUOTES; + break; + case '\"': + state = ParserState.DOUBLE_QUOTES; + break; + case ' ': case '\n': + currentAttributes.add(new Pair<>(currentKey, currentValue)); + currentKey = ""; + currentValue = ""; + case '>': + if (!currentKey.equals("") || !currentValue.equals("")) { + currentAttributes.add(new Pair<>(currentKey, currentValue)); + currentKey = ""; + currentValue = ""; + } + addNewElementNode(); + break; + default: + currentValue += c; + break; + } + } + + /** + * EFFECTS: Handles and updates parser state/buffers for a single character while in the SINGLE_QUOTES state. + * MODIFIES: this + */ + private void caseSingleQuotes(char c) { + switch (c) { + case '\'': + if (previousChar != '\\') { + state = ParserState.VALUE; + previousChar = '\0'; + } else { + currentValue = currentValue.substring(0, currentValue.length() - 2); + currentValue += c; + previousChar = c; + } + break; + default: + currentValue += c; + previousChar = c; + break; + } + } + + /** + * EFFECTS: Handles and updates parser state/buffers for a single character while in the DOUBLE_QUOTES state. + * MODIFIES: this + */ + private void caseDoubleQuotes(char c) { + switch (c) { + case '\"': + if (previousChar != '\\') { + state = ParserState.VALUE; + previousChar = '\0'; + } else { + currentValue = currentValue.substring(0, currentValue.length() - 2); + currentValue += c; + previousChar = c; + } + break; // FOOTGUN LANGUAGE DESIGN + default: + currentValue += c; + previousChar = c; + break; + } + } + + /** + * Helper function to remove code duplication. + * EFFECTS: Creates and adds a new ElementNode from the current buffers to the unfinished and result stacks + * MODIFIES: this + */ + private void addNewElementNode() { + state = ParserState.HTML; + var node = new ElementNode(currentTag, currentAttributes); + if (unfinished.size() != 0) { + unfinished.getLast().addChild(node); + if (!isSelfClosingTag(currentTag)) { + unfinished.add(node); + } + } else { + result.add(node); + if (!isSelfClosingTag(currentTag)) { + unfinished.add((ElementNode) result.get(result.size() - 1)); + } + } + currentTag = ""; + currentAttributes = new ArrayList<>(); + } + + /** + * Helper function to check method length boxes. + * EFFECTS: Creates and adds a new TextNode from the current buffers to the unfinished and result stacks + * MODIFIES: this + */ + private void addNewTextNode() { + if (unfinished.size() != 0) { + unfinished.getLast().addChild(new TextNode(currentText)); + } else { + result.add(new TextNode(currentText)); + } + currentText = ""; + previousChar = '\0'; + } + + /** + * Simple helper function to check if a tag is self-closing. + * EFFECTS: Returns whether a String tag is a self-closing tag. + */ private static boolean isSelfClosingTag(String tag) { switch (tag) { case "input": case "param": @@ -281,7 +369,7 @@ public class HtmlParser {

- + qui officia deserunt mollit anim id est laborum.

--> + -