diff options
-rw-r--r-- | src/main/model/css/CssParser.java | 368 | ||||
-rw-r--r-- | src/main/model/html/ElementNode.java | 30 | ||||
-rw-r--r-- | src/main/model/html/HtmlParser.java | 490 | ||||
-rw-r--r-- | src/main/model/html/TextNode.java | 13 | ||||
-rw-r--r-- | src/main/model/util/Node.java | 6 | ||||
-rw-r--r-- | src/test/model/CssParserTest.java | 3 | ||||
-rw-r--r-- | src/test/model/HtmlParserTest.java | 17 |
7 files changed, 554 insertions, 373 deletions
diff --git a/src/main/model/css/CssParser.java b/src/main/model/css/CssParser.java index 8d57bdc..25b6752 100644 --- a/src/main/model/css/CssParser.java +++ b/src/main/model/css/CssParser.java @@ -14,13 +14,6 @@ import java.util.*; * ATTRIBUTE ::= 'color' | 'text' | ... * VALUE ::= ??? idk lol */ - -/** - * This class assumes that it is getting _valid CSS_: that is, the style between two tags - * of a style block, or the raw content of a .css file. - * Making sure this assumption holds is extremely important for program robustness. - * We do not check for validity, i.e. throw any exceptions - the driving principle of web standards is to "fail softly". - */ public class CssParser { /** @@ -32,156 +25,60 @@ public class CssParser { SINGLE_QUOTES, DOUBLE_QUOTES, // VALUE::SINGLE_QUOTES, VALUE::DOUBLE_QUOTES } + // essentially the csstree type, only we don't need it to be a tree + private ArrayList<Pair<String, ArrayList<Pair<String, String>>>> result; + // a bunch of useful buffers: optimizations in the future could likely come from tweaking these + // note that i know nothing about data structure performance: but i'm pretty sure that Strings + // are _not_ the right tool for the job here, lol + private String currentSelector; + private ArrayList<Pair<String, String>> currentRule; + private String currentProperty; + private String currentValue; + // important for quote escapes + private char previousChar; + + private ParserState state; + + /// Initialize all buffers to default values + public CssParser() { + result = new ArrayList<>(); + currentSelector = ""; + currentRule = new ArrayList<>(); + currentProperty = ""; + currentValue = ""; + previousChar = '\0'; + + // We safely assume to start by reading a selector. + state = ParserState.SELECTORS; + } + /** - * Parses a (valid) CSS file in a left-to-right, leftmost-derivation style. + * Parses a (valid) CSS file in a left-to-right, leftmost-derivation style. No additional lookup is needed, + * however we do keep a previousChar value for dealing with (annoying) escaped quotes. * It should be fast - I'd say something about time complexity if I knew anything about time complexity. * No guarantees are made about invalid CSS files. Also, no guarantees are made about valid CSS files, lol. + * <br> + * REQUIRES: A valid CSS file, as a raw String. + * MODIFIES: this + * EFFECTS: Returns a parsed CSS representation as several nested ArrayLists and Pairs of Strings. */ - public static ArrayList<Pair<String, ArrayList<Pair<String, String>>>> parseLL(String input) { - - // parser buffers - // essentially the CssTree type - var result = new ArrayList<Pair<String, ArrayList<Pair<String, String>>>>(); - var currentSelector = ""; - var currentRule = new ArrayList<Pair<String, String>>(); - var currentProperty = ""; - var currentValue = ""; - var previousChar = '\0'; - - // We safely assume to start by reading a selector. - ParserState state = ParserState.SELECTORS; + public ArrayList<Pair<String, ArrayList<Pair<String, String>>>> parseCSS(String input) { for (char c : input.toCharArray()) { // System.out.print(state); // System.out.println(" " + c); switch (state) { - case SELECTORS: - switch (c) { - case '@': - if (currentSelector.equals("")) { - state = ParserState.MEDIA_SELECTORS; - } else { - currentSelector += c; - } - break; - case '{': - state = ParserState.ATTRIBUTE; - break; - case ' ': case '\n': - break; - // todo: do better than blindly create a string; pattern match on css selectors - default: - currentSelector += c; - break; - } + case SELECTORS: caseSelectors(c); break; - case MEDIA_SELECTORS: - switch (c) { - // todo: don't entirely disregard media queries, also split between @media/@... - case '{': - state = ParserState.SELECTORS; - // discard currentSelector - currentSelector = ""; - break; - default: - currentSelector += c; - break; - } + case MEDIA_SELECTORS: caseMediaSelectors(c); break; - case ATTRIBUTE: - switch (c) { - case ':': - state = ParserState.VALUE; - break; - case '}': - state = ParserState.SELECTORS; - if (!currentValue.equals("") || !currentProperty.equals("")) { - System.out.println("something's wrong"); - currentProperty = ""; - currentValue = ""; - } - result.add(new Pair<>(currentSelector, currentRule)); - System.out.println(currentRule); - currentSelector = ""; - currentRule = new ArrayList<>(); - break; - case ' ': case '\n': - break; - default: - currentProperty += c; - break; - } + case ATTRIBUTE: caseAttribute(c); break; - case VALUE: - switch (c) { - case ';': - state = ParserState.ATTRIBUTE; - currentRule.add(new Pair<>(currentProperty, currentValue)); - currentProperty = ""; - currentValue = ""; - break; - case '}': - state = ParserState.SELECTORS; - if (!currentValue.equals("") || !currentProperty.equals("")) { - currentRule.add(new Pair<>(currentProperty, currentValue)); - currentProperty = ""; - currentValue = ""; - } - result.add(new Pair<>(currentSelector, currentRule)); - currentSelector = ""; - currentRule = new ArrayList<>(); - break; - case '\'': - state = ParserState.SINGLE_QUOTES; - currentValue += c; - break; - case '\"': - state = ParserState.DOUBLE_QUOTES; - currentValue += c; - break; - case ' ': case '\n': - break; - default: - currentValue += c; - break; - } + case VALUE: caseValue(c); break; - // quotes in css are exclusively? for paths: so we want to include the quotes themselves - case SINGLE_QUOTES: - switch (c) { - case '\'': - if (previousChar != '\\') { - state = ParserState.VALUE; - currentValue += c; - previousChar = '\0'; - } else { - currentValue = currentValue.substring(0, currentValue.length() - 2); - currentValue += c; - previousChar = c; - } - break; - default: - currentValue += c; - break; - } + case SINGLE_QUOTES: caseSingleQuotes(c); break; - case DOUBLE_QUOTES: - switch (c) { - case '\"': - if (previousChar != '\\') { - state = ParserState.VALUE; - currentValue += c; - previousChar = '\0'; - } else { - currentValue = currentValue.substring(0, currentValue.length() - 2); - currentValue += c; - previousChar = c; - } - break; - default: - currentValue += c; - break; - } + case DOUBLE_QUOTES: caseDoubleQuotes(c); break; } } @@ -189,12 +86,179 @@ public class CssParser { } /** - * Takes an input string with units and returns out the value in pixels. - * This is a fault-tolerant system. + * EFFECTS: Handles and updates parser state/buffers for a single character while in the SELECTORS state. + * See also: the (slightly wrong) context-free grammar commented at the start of this file. + * MODIFIES: this + */ + private void caseSelectors(char c) { + switch (c) { + case '@': + if (currentSelector.equals("")) { + state = ParserState.MEDIA_SELECTORS; + } else { + currentSelector += c; + } + break; + case '{': + state = ParserState.ATTRIBUTE; + break; + case ' ': case '\n': + break; + // todo: do better than blindly create a string; pattern match on css selectors + default: + currentSelector += c; + break; + } + } + + /** + * EFFECTS: Handles and updates parser state/buffers for a single character while in the MEDIA_SELECTORS state. + * MODIFIES: this + */ + private void caseMediaSelectors(char c) { + switch (c) { + // todo: don't entirely disregard media queries, also split between @media/@... + case '{': + state = ParserState.SELECTORS; + // discard currentSelector + currentSelector = ""; + break; + default: + currentSelector += c; + break; + } + } + + /** + * EFFECTS: Handles and updates parser state/buffers for a single character while in the ATTRIBUTE state. + * MODIFIES: this + */ + private void caseAttribute(char c) { + switch (c) { + case ':': + state = ParserState.VALUE; + break; + case '}': + state = ParserState.SELECTORS; + if (!currentValue.equals("") || !currentProperty.equals("")) { + // System.out.println("something's wrong"); + currentProperty = ""; + currentValue = ""; + } + result.add(new Pair<>(currentSelector, currentRule)); + currentSelector = ""; + currentRule = new ArrayList<>(); + break; + case ' ': case '\n': + break; + default: + currentProperty += c; + break; + } + } + + /** + * EFFECTS: Handles and updates parser state/buffers for a single character while in the VALUE state. + * MODIFIES: this + */ + private void caseValue(char c) { + switch (c) { + case ';': + state = ParserState.ATTRIBUTE; + updateCurrentRule(); + break; + case '}': + state = ParserState.SELECTORS; + if (!currentValue.equals("") || !currentProperty.equals("")) { + updateCurrentRule(); + } + result.add(new Pair<>(currentSelector, currentRule)); + currentSelector = ""; + currentRule = new ArrayList<>(); + break; + // todo: handle spaces better: they're actually important inside values + case ' ': case '\n': break; // believe me, i think this is ugly too but it passes checkstyle + case '\'': + state = ParserState.SINGLE_QUOTES; + currentValue += c; + break; + // intentional use of TERRIBLE SMOKING FOOTGUN behavior to check boxes + case '\"': state = ParserState.DOUBLE_QUOTES; + default: currentValue += c; + break; + } + } + + /** + * Helper function to check method length boxes. + * EFFECTS: Adds a new property to the current rule. + * MODIFIES: this + */ + private void updateCurrentRule() { + currentRule.add(new Pair<>(currentProperty, currentValue)); + currentProperty = ""; + currentValue = ""; + } + + // todo: handle additional escaped characters, though what we have right now isn't bad + + /** + * EFFECTS: Handles and updates parser state/buffers for a single character while in the SINGLE_QUOTES state. + * MODIFIES: this + */ + private void caseSingleQuotes(char c) { + switch (c) { + case '\'': + if (previousChar != '\\') { + state = ParserState.VALUE; + // quotes in css are exclusively? for paths: so we want to include the quotes themselves + currentValue += c; + previousChar = '\0'; + } else { + // possibly not the best way to handle this, may be better to keep the backslash + currentValue = currentValue.substring(0, currentValue.length() - 2); + currentValue += c; + previousChar = c; + } + break; + default: + currentValue += c; + break; + } + } + + /** + * EFFECTS: Handles and updates parser state/buffers for a single character while in the DOUBLE_QUOTES state. + * MODIFIES: this + */ + private void caseDoubleQuotes(char c) { + switch (c) { + case '\"': + if (previousChar != '\\') { + state = ParserState.VALUE; + currentValue += c; + previousChar = '\0'; + } else { + currentValue = currentValue.substring(0, currentValue.length() - 2); + currentValue += c; + previousChar = c; + } + break; + default: + currentValue += c; + break; + } + } + + /** + * Takes an input string with units and returns out the value in pixels. This is a fault-tolerant system. * When given an invalid string (i.e. "12p53x"), it will produce an invalid result instead of throwing. * However, it should parse every valid string correctly. + * <br> + * REQUIRES: A string of the form [NUMBER][VALIDUNIT] + * EFFECTS: Returns a number, in pixels, that has been converted appropriately */ - private double parseUnits(String input) { + private static double parseUnits(String input) { String numbers = ""; String units = ""; // imagine making a language without iterable strings, fml @@ -210,9 +274,17 @@ public class CssParser { try { value = Float.parseFloat(numbers); } catch (NumberFormatException e) { - System.out.printf("Did not parse a float from %s, proceeding with value 0.0...%n", numbers); + // System.out.printf("Did not parse a float from %s, proceeding with value 0.0...%n", numbers); value = 0.0; } + return convertUnits(units, value); + } + + /** + * REQUIRES: a String that is a unit, otherwise defaults to pixels + * EFFECTS: converts a value in some units to a value in pixels + */ + private static double convertUnits(String units, double value) { // god case/break is such a fault-provoking design i hate it // good thing we avoid breaks entirely here lmao switch (units) { @@ -226,7 +298,7 @@ public class CssParser { case "in": return value * 96; // not handled: % em ex ch rem lh rlh vw vh vmin vmax vb vi svw svh lvw lvh dvw dvh default: - System.out.printf("Unit %s not implemented, defaulting to %s in pixels...%n", units, value); + // System.out.printf("Unit %s not implemented, defaulting to %s in pixels...%n", units, value); return value; } } diff --git a/src/main/model/html/ElementNode.java b/src/main/model/html/ElementNode.java index 1d427e8..a1ad90c 100644 --- a/src/main/model/html/ElementNode.java +++ b/src/main/model/html/ElementNode.java @@ -12,28 +12,42 @@ public class ElementNode implements Node { private ArrayList<Node> children; - public String getTag() { - return this.tag; - } - - public ArrayList<Node> getChildren() { - return this.children; - } - + /** + * EFFECTS: Constructs a new ElementNode from the arguments provided. + * MODIFIES: this + */ public ElementNode(String tag, ArrayList<Pair<String, String>> attributes, ArrayList<Node> children) { this.tag = tag; this.attributes = attributes; this.children = children; } + /** + * Overloads the constructor for ease of use. We often don't provide children, at first. + * EFFECTS: Constructs a new ElementNode from the arguments provided. + * MODIFIES: this + */ public ElementNode(String tag, ArrayList<Pair<String, String>> attributes) { this(tag, attributes, new ArrayList<Node>()); } + /** + * EFFECTS: Adds a child to the children ArrayList. + * MODIFIES: this + */ public void addChild(Node child) { this.children.add(child); } + public String getTag() { + return this.tag; + } + + public ArrayList<Node> getChildren() { + return this.children; + } + + // We implement this method for easy debugging. public String getData() { return getTag(); } diff --git a/src/main/model/html/HtmlParser.java b/src/main/model/html/HtmlParser.java index d6b4ff1..e9dc0c4 100644 --- a/src/main/model/html/HtmlParser.java +++ b/src/main/model/html/HtmlParser.java @@ -17,229 +17,317 @@ import org.javatuples.*; */ public class HtmlParser { + /** + * HTML is not nice to parse. We manage to get away with a relatively small number of parser states regardless. + */ private enum ParserState { HTML, IGNORED, - OPENING_TAG, KEY, VALUE, - SINGLE_QUOTE, DOUBLE_QUOTE, + OPENING_TAG, KEY, VALUE, // TAG::OPENING_TAG, TAG::KEY, TAG::VALUE + SINGLE_QUOTES, DOUBLE_QUOTES, // VALUE::SINGLE_QUOTES, VALUE::DOUBLE_QUOTES UNKNOWN_TAG, CLOSING_TAG, } - public static ArrayList<Node> parseHtmlLL(String input) { + // HTML documents are uniquely a list of Nodes rather than a Node themselves + private ArrayList<Node> result; + // a bunch of useful buffers. see CssParser for commentary. + private ArrayDeque<ElementNode> unfinished; + private String currentTag; + private ArrayList<Pair<String, String>> currentAttributes; + private String currentKey; + private String currentValue; + private String currentText; + // important for quote escapes, and multiple whitespace chars + private char previousChar; - var result = new ArrayList<Node>(); - var unfinished = new ArrayDeque<ElementNode>(); - var currentTag = ""; - var currentAttributes = new ArrayList<Pair<String, String>>(); - var currentKey = ""; - var currentValue = ""; - var currentText = ""; - var previousChar = '\0'; // important for quote escapes, and multiple whitespace chars + private ParserState state; + + public HtmlParser() { + result = new ArrayList<>(); + unfinished = new ArrayDeque<>(); + currentTag = ""; + currentAttributes = new ArrayList<>(); + currentKey = ""; + currentValue = ""; + currentText = ""; + previousChar = '\0'; // We safely? assume to start outside of all nodes. - ParserState state = ParserState.HTML; + state = ParserState.HTML; + } + + public ArrayList<Node> parseHtml(String input) { for (char c : input.toCharArray()) { // System.out.print(state); // System.out.println(" " + c + " " + currentText); switch (state) { - case HTML: - switch (c) { - case '<': - state = ParserState.UNKNOWN_TAG; - if (!currentText.equals("")) { - if (unfinished.size() != 0) { - unfinished.getLast().addChild(new TextNode(currentText)); - } else { - result.add(new TextNode(currentText)); - } - currentText = ""; - previousChar = '\0'; - } - break; // FOOTGUN LANGUAGE DESIGN - case ' ': case '\n': - if (previousChar != ' ') { - currentText += ' '; - } - previousChar = ' '; - break; - default: - currentText += c; - previousChar = c; - break; - } + case HTML: caseHtml(c); break; - case UNKNOWN_TAG: - switch (c) { - case '/': - state = ParserState.CLOSING_TAG; - break; - case '>': // Why would you put <> in your HTML??? go away - state = ParserState.HTML; - currentText += "<>"; - System.out.println("Why would you put <> in your HTML??? go away"); - break; - // For now, we'll straight-up ignore anything matching the <!...> syntax: - // i.e. comments, and <!DOCTYPE html> - case '!': - state = ParserState.IGNORED; - break; - default: - state = ParserState.OPENING_TAG; - currentTag += c; - break; - } + case UNKNOWN_TAG: caseUnknownTag(c); break; // FOOTGUN LANGUAGE DESIGN STRIKES AGAIN - case IGNORED: - switch (c) { - case '>': - state = ParserState.HTML; - break; - default: - break; - } + case IGNORED: caseIgnored(c); break; - case OPENING_TAG: - switch (c) { - case '>': - state = ParserState.HTML; - var node = new ElementNode(currentTag, currentAttributes); - if (unfinished.size() != 0) { - unfinished.getLast().addChild(node); - unfinished.add(node); - } else { - result.add(node); - unfinished.add((ElementNode) result.get(result.size() - 1)); - } - currentTag = ""; - currentAttributes = new ArrayList<>(); - break; - case ' ': case '\n': - state = ParserState.KEY; - break; - default: - currentTag += c; - break; - } + case OPENING_TAG: caseOpeningTag(c); break; - case CLOSING_TAG: - switch (c) { - case '>': - state = ParserState.HTML; - // IMPORTANT: we don't validate that closing tags correspond to an open tag - if (!isSelfClosingTag(currentTag)) { - if (unfinished.size() != 0) { - unfinished.removeLast(); - } - } - currentTag = ""; - break; - case ' ': case '\n': - break; - default: - currentTag += c; - break; - } + case CLOSING_TAG: caseClosingTag(c); break; - case KEY: - switch (c) { - case '>': - state = ParserState.HTML; - var node = new ElementNode(currentTag, currentAttributes); - if (unfinished.size() != 0) { - unfinished.getLast().addChild(node); - unfinished.add(node); - } else { - result.add(node); - unfinished.add((ElementNode) result.get(result.size() - 1)); - } - currentTag = ""; - currentAttributes = new ArrayList<>(); - break; - case '=': - state = ParserState.VALUE; - break; - case ' ': case '\n': - break; - default: - currentKey += c; - break; - } + case KEY: caseKey(c); break; - case VALUE: - switch (c) { - case '\'': - state = ParserState.SINGLE_QUOTE; - break; - case '\"': - state = ParserState.DOUBLE_QUOTE; - break; - case ' ': case '\n': - currentAttributes.add(new Pair<>(currentKey, currentValue)); - currentKey = ""; - currentValue = ""; - case '>': - if (!currentKey.equals("") || !currentValue.equals("")) { - currentAttributes.add(new Pair<>(currentKey, currentValue)); - currentKey = ""; - currentValue = ""; - } - state = ParserState.HTML; - var node = new ElementNode(currentTag, currentAttributes); - if (unfinished.size() != 0) { - unfinished.getLast().addChild(node); - unfinished.add(node); - } else { - result.add(node); - unfinished.add((ElementNode) result.get(result.size() - 1)); - } - currentTag = ""; - currentAttributes = new ArrayList<>(); - break; - default: - currentValue += c; - break; - } + case VALUE: caseValue(c); break; - case SINGLE_QUOTE: - switch (c) { - case '\'': - if (previousChar != '\\') { - state = ParserState.VALUE; - previousChar = '\0'; - } else { - currentValue = currentValue.substring(0, currentValue.length() - 2); - currentValue += c; - previousChar = c; - } - break; - default: - currentValue += c; - previousChar = c; - break; - } + case SINGLE_QUOTES: caseSingleQuotes(c); break; - case DOUBLE_QUOTE: - switch (c) { - case '\"': - if (previousChar != '\\') { - state = ParserState.VALUE; - previousChar = '\0'; - } else { - currentValue = currentValue.substring(0, currentValue.length() - 2); - currentValue += c; - previousChar = c; - } - default: - currentValue += c; - previousChar = c; - break; - } + case DOUBLE_QUOTES: caseDoubleQuotes(c); break; } } return result; } + /** + * EFFECTS: Handles and updates parser state/buffers for a single character while in the HTML state. + * MODIFIES: this + */ + private void caseHtml(char c) { + switch (c) { + case '<': + state = ParserState.UNKNOWN_TAG; + if (!currentText.equals("")) { + addNewTextNode(); + } + break; // FOOTGUN LANGUAGE DESIGN + case ' ': case '\n': + if (previousChar != ' ') { + currentText += ' '; + } + previousChar = ' '; + break; + default: + currentText += c; + previousChar = c; + break; + } + } + + /** + * EFFECTS: Handles and updates parser state/buffers for a single character while in the UNKNOWN_TAG state. + * MODIFIES: this + */ + private void caseUnknownTag(char c) { + switch (c) { + case '/': + state = ParserState.CLOSING_TAG; + break; + case '>': // Why would you put <> in your HTML??? go away + state = ParserState.HTML; + currentText += "<>"; + break; + // For now, we'll straight-up ignore anything matching the <!...> syntax: + // i.e. comments, and <!DOCTYPE html> + case '!': + state = ParserState.IGNORED; + break; + default: + state = ParserState.OPENING_TAG; + currentTag += c; + break; + } + } + + /** + * EFFECTS: Handles and updates parser state/buffers for a single character while in the IGNORED state. + * MODIFIES: this + */ + private void caseIgnored(char c) { + switch (c) { + case '>': + state = ParserState.HTML; + break; + default: + break; + } + } + + /** + * EFFECTS: Handles and updates parser state/buffers for a single character while in the OPENING_TAG state. + * MODIFIES: this + */ + private void caseOpeningTag(char c) { + switch (c) { + case '>': + addNewElementNode(); + break; + case ' ': case '\n': + state = ParserState.KEY; + break; + default: + currentTag += c; + break; + } + } + + /** + * EFFECTS: Handles and updates parser state/buffers for a single character while in the CLOSING_TAG state. + * MODIFIES: this + */ + private void caseClosingTag(char c) { + switch (c) { + case '>': + state = ParserState.HTML; + // IMPORTANT: we don't validate that closing tags correspond to an open tag + if (!isSelfClosingTag(currentTag)) { + if (unfinished.size() != 0) { + unfinished.removeLast(); + } + } + currentTag = ""; + break; + case ' ': case '\n': + break; + default: + currentTag += c; + break; + } + } + + /** + * EFFECTS: Handles and updates parser state/buffers for a single character while in the KEY state. + * MODIFIES: this + */ + private void caseKey(char c) { + switch (c) { + case '>': + addNewElementNode(); + break; + case '=': + state = ParserState.VALUE; + break; + case ' ': case '\n': + break; + default: + currentKey += c; + break; + } + } + + /** + * EFFECTS: Handles and updates parser state/buffers for a single character while in the VALUE state. + * MODIFIES: this + */ + private void caseValue(char c) { + switch (c) { + case '\'': + state = ParserState.SINGLE_QUOTES; + break; + case '\"': + state = ParserState.DOUBLE_QUOTES; + break; + case ' ': case '\n': + currentAttributes.add(new Pair<>(currentKey, currentValue)); + currentKey = ""; + currentValue = ""; + case '>': + if (!currentKey.equals("") || !currentValue.equals("")) { + currentAttributes.add(new Pair<>(currentKey, currentValue)); + currentKey = ""; + currentValue = ""; + } + addNewElementNode(); + break; + default: + currentValue += c; + break; + } + } + + /** + * EFFECTS: Handles and updates parser state/buffers for a single character while in the SINGLE_QUOTES state. + * MODIFIES: this + */ + private void caseSingleQuotes(char c) { + switch (c) { + case '\'': + if (previousChar != '\\') { + state = ParserState.VALUE; + previousChar = '\0'; + } else { + currentValue = currentValue.substring(0, currentValue.length() - 2); + currentValue += c; + previousChar = c; + } + break; + default: + currentValue += c; + previousChar = c; + break; + } + } + + /** + * EFFECTS: Handles and updates parser state/buffers for a single character while in the DOUBLE_QUOTES state. + * MODIFIES: this + */ + private void caseDoubleQuotes(char c) { + switch (c) { + case '\"': + if (previousChar != '\\') { + state = ParserState.VALUE; + previousChar = '\0'; + } else { + currentValue = currentValue.substring(0, currentValue.length() - 2); + currentValue += c; + previousChar = c; + } + break; // FOOTGUN LANGUAGE DESIGN + default: + currentValue += c; + previousChar = c; + break; + } + } + + /** + * Helper function to remove code duplication. + * EFFECTS: Creates and adds a new ElementNode from the current buffers to the unfinished and result stacks + * MODIFIES: this + */ + private void addNewElementNode() { + state = ParserState.HTML; + var node = new ElementNode(currentTag, currentAttributes); + if (unfinished.size() != 0) { + unfinished.getLast().addChild(node); + if (!isSelfClosingTag(currentTag)) { + unfinished.add(node); + } + } else { + result.add(node); + if (!isSelfClosingTag(currentTag)) { + unfinished.add((ElementNode) result.get(result.size() - 1)); + } + } + currentTag = ""; + currentAttributes = new ArrayList<>(); + } + + /** + * Helper function to check method length boxes. + * EFFECTS: Creates and adds a new TextNode from the current buffers to the unfinished and result stacks + * MODIFIES: this + */ + private void addNewTextNode() { + if (unfinished.size() != 0) { + unfinished.getLast().addChild(new TextNode(currentText)); + } else { + result.add(new TextNode(currentText)); + } + currentText = ""; + previousChar = '\0'; + } + + /** + * Simple helper function to check if a tag is self-closing. + * EFFECTS: Returns whether a String tag is a self-closing tag. + */ private static boolean isSelfClosingTag(String tag) { switch (tag) { case "input": case "param": @@ -281,7 +369,7 @@ public class HtmlParser { <div id="intro"> <img id="face" src="assets/compass.jpg"/> </div> - <!-- <div id="details"> + <div id="details"> <h2>Projects</h2> <p> Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. @@ -291,7 +379,7 @@ public class HtmlParser { dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. </p> - <h2>Posts</h2> + <!-- <h2>Posts</h2> <p> Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris @@ -299,14 +387,12 @@ public class HtmlParser { Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa - qui officia deserunt mollit anim id est laborum. </p> - </div> --> + qui officia deserunt mollit anim id est laborum. </p> --> + </div> </main> <footer> <span><img src="assets/copyleft.svg" width="12" height="12"/> 2020-2022 j-james </span> </footer> </body> </html> -<!-- - */ diff --git a/src/main/model/html/TextNode.java b/src/main/model/html/TextNode.java index 634bf3b..cf0078b 100644 --- a/src/main/model/html/TextNode.java +++ b/src/main/model/html/TextNode.java @@ -5,14 +5,19 @@ import model.util.Node; public class TextNode implements Node { private String text = ""; - public String getText() { - return this.text; - } - + /** + * EFFECTS: Creates a new TextNode from the provided String value. + * MODIFIES: this + */ public TextNode(String text) { this.text = text; } + public String getText() { + return this.text; + } + + // We implement this method for easy debugging. public String getData() { return getText(); } diff --git a/src/main/model/util/Node.java b/src/main/model/util/Node.java index 010a2da..619404f 100644 --- a/src/main/model/util/Node.java +++ b/src/main/model/util/Node.java @@ -1,10 +1,10 @@ package model.util; /** - * yeah there's literally nothing here - * i just need to establish that ElementNode and TextNode both inherit from Node + * yeah there's nothing here + * I just need to establish the inheritance relation of ElementNode and TextNode */ public interface Node { - // Return a representation of the Node + // Return a representation of the Node. Useful for debugging. public String getData(); } diff --git a/src/test/model/CssParserTest.java b/src/test/model/CssParserTest.java index 2852da5..fa395f6 100644 --- a/src/test/model/CssParserTest.java +++ b/src/test/model/CssParserTest.java @@ -10,6 +10,7 @@ public class CssParserTest { @Test void testIdiomaticCss() { var idiomaticCss = "body { background-color: #f0f0f2; margin: 0; padding: 0; font-family: -apple-system, system-ui, BlinkMacSystemFont, \"Segoe UI\", \"Open Sans\", \"Helvetica Neue\", Helvetica, Arial, sans-serif;}div { width: 600px; margin: 5em auto; padding: 2em; background-color: #fdfdff; border-radius: 0.5em; box-shadow: 2px 3px 7px 2px rgba(0,0,0,0.02);}a:link, a:visited { color: #38488f; text-decoration: none;}@media (max - width : 700px) { div { margin: 0 auto; width: auto; }}"; - System.out.println(CssParser.parseLL(idiomaticCss)); + CssParser parser = new CssParser(); + System.out.println(parser.parseCSS(idiomaticCss)); } } diff --git a/src/test/model/HtmlParserTest.java b/src/test/model/HtmlParserTest.java index 4b05cfb..58b4555 100644 --- a/src/test/model/HtmlParserTest.java +++ b/src/test/model/HtmlParserTest.java @@ -7,8 +7,6 @@ import org.junit.jupiter.api.Test; import java.util.*; -import static org.junit.jupiter.api.Assertions.*; - public class HtmlParserTest { String idiomaticHtml = "<!DOCTYPE html><html><head></head><body><p>Hello, world!</p></body></html>"; @@ -18,26 +16,31 @@ public class HtmlParserTest { @Test void testIdiomaticHtml() { String[] idiomaticHtmlArray = {"<!DOCTYPE html>","<html>","<head>","</head>","<body>","<p>","Hello,world!","</p>","</body>","</html>"}; - var parsedHtml = HtmlParser.parseHtmlLL(idiomaticHtml); - displayHtmlTree(parsedHtml); - System.out.println(HtmlParser.parseHtmlLL(idiomaticHtml)); + HtmlParser parser = new HtmlParser(); + displayHtmlTree(parser.parseHtml(idiomaticHtml)); // assertEquals(HtmlParser.parseHtmlLL(idiomaticHtml), Arrays.asList(idiomaticHtmlArray)); } @Test void testBrokenHtml() { String[] brokenHtmlArray = {"<html>","<foo>","<bar>","</bar>","<ba>"}; - System.out.println(HtmlParser.parseHtmlLL(brokenHtml)); + HtmlParser parser = new HtmlParser(); + displayHtmlTree(parser.parseHtml(brokenHtml)); // assertEquals(HtmlParser.parseHtmlLL(brokenHtml), Arrays.asList(brokenHtmlArray)); } @Test void testTrailingTextHtml() { String[] trailingTextHtmlArray = {"<html>","<foo>","<bar>","</bar>","ba"}; - System.out.println(HtmlParser.parseHtmlLL(trailingTextHtml)); + HtmlParser parser = new HtmlParser(); + displayHtmlTree(parser.parseHtml(trailingTextHtml)); // assertEquals(HtmlParser.parseHtmlLL(trailingTextHtml), Arrays.asList(trailingTextHtmlArray)); } + /** + * Simple helper function for debugging. + * EFFECTS: prints a representation of the tree to the console for debugging purposes + */ private void displayHtmlTree(ArrayList<Node> tree) { for (Node node : tree) { if (node instanceof ElementNode) { |