diff options
Diffstat (limited to 'src/main/model/html')
-rw-r--r-- | src/main/model/html/ElementNode.java | 17 | ||||
-rw-r--r-- | src/main/model/html/HtmlParser.java | 25 |
2 files changed, 29 insertions, 13 deletions
diff --git a/src/main/model/html/ElementNode.java b/src/main/model/html/ElementNode.java index a1ad90c..98995d0 100644 --- a/src/main/model/html/ElementNode.java +++ b/src/main/model/html/ElementNode.java @@ -28,7 +28,16 @@ public class ElementNode implements Node { * MODIFIES: this */ public ElementNode(String tag, ArrayList<Pair<String, String>> attributes) { - this(tag, attributes, new ArrayList<Node>()); + this(tag, attributes, new ArrayList<>()); + } + + /** + * Overloads the constructor for ease of use. Should probably only be used for tests. + * EFFECTS: Constructs a new ElementNode from the arguments provided. + * MODIFIES: this + */ + public ElementNode(String tag) { + this(tag, new ArrayList<>(), new ArrayList<>()); } /** @@ -43,12 +52,16 @@ public class ElementNode implements Node { return this.tag; } + public ArrayList<Pair<String, String>> getAttributes() { + return this.attributes; + } + public ArrayList<Node> getChildren() { return this.children; } // We implement this method for easy debugging. public String getData() { - return getTag(); + return getTag() + " " + getAttributes().toString(); } } diff --git a/src/main/model/html/HtmlParser.java b/src/main/model/html/HtmlParser.java index e9dc0c4..bfdd57c 100644 --- a/src/main/model/html/HtmlParser.java +++ b/src/main/model/html/HtmlParser.java @@ -5,15 +5,18 @@ import java.util.*; import model.util.Node; import org.javatuples.*; -/* +/** + * This class represents the state of and implements an LL(1) HTML parser. + * For convenience, the following (defo wrong) context-free grammar for HTML is below. + * <br> * HTML ::= '<!DOCTYPE html>' (NODE)* - * NODE ::= '<'TAG (' ' WORD '=' ('"'TEXT'"' | TEXT))* '>' (NODE)* '</' TAG '>' - * | '<'SINGLE_TAG (' ' WORD '=' ('"'TEXT'"' | TEXT))* ('>'|'/>') + * NODE ::= '<'TAG (' ' WORD '=' ('"'TEXT'"' | TEXT))* '>' (NODE)* '<\/' TAG '>' + * | '<'SELF_CLOSING_TAG (' ' WORD '=' ('"'TEXT'"' | TEXT))* ('>'|'/>') * | (TEXT | NODE)* * TEXT ::= UNICODE - {'"'} + {'\"'} * TAG ::= 'body' | 'div' | ... - * SINGLE_TAG ::= 'img' | ... - * (note that \forall T \in SINGLE_TAG, T \notin TAG) + * SELF_CLOSING_TAG ::= 'img' | ... + * (note that \forall T \in SELF_CLOSING_TAG, T \notin TAG) */ public class HtmlParser { @@ -216,16 +219,16 @@ public class HtmlParser { */ private void caseValue(char c) { switch (c) { - case '\'': - state = ParserState.SINGLE_QUOTES; + case '\'': state = ParserState.SINGLE_QUOTES; break; - case '\"': - state = ParserState.DOUBLE_QUOTES; + case '\"': state = ParserState.DOUBLE_QUOTES; break; case ' ': case '\n': + state = ParserState.KEY; currentAttributes.add(new Pair<>(currentKey, currentValue)); currentKey = ""; currentValue = ""; + break; // THE FOOTGUN DESIGN STRIKES AGAIN case '>': if (!currentKey.equals("") || !currentValue.equals("")) { currentAttributes.add(new Pair<>(currentKey, currentValue)); @@ -251,7 +254,7 @@ public class HtmlParser { state = ParserState.VALUE; previousChar = '\0'; } else { - currentValue = currentValue.substring(0, currentValue.length() - 2); + currentValue = currentValue.substring(0, currentValue.length() - 1); currentValue += c; previousChar = c; } @@ -274,7 +277,7 @@ public class HtmlParser { state = ParserState.VALUE; previousChar = '\0'; } else { - currentValue = currentValue.substring(0, currentValue.length() - 2); + currentValue = currentValue.substring(0, currentValue.length() - 1); currentValue += c; previousChar = c; } |