aboutsummaryrefslogtreecommitdiff
path: root/src/main/model/html
diff options
context:
space:
mode:
Diffstat (limited to 'src/main/model/html')
-rw-r--r--src/main/model/html/ElementNode.java17
-rw-r--r--src/main/model/html/HtmlParser.java25
2 files changed, 29 insertions, 13 deletions
diff --git a/src/main/model/html/ElementNode.java b/src/main/model/html/ElementNode.java
index a1ad90c..98995d0 100644
--- a/src/main/model/html/ElementNode.java
+++ b/src/main/model/html/ElementNode.java
@@ -28,7 +28,16 @@ public class ElementNode implements Node {
* MODIFIES: this
*/
public ElementNode(String tag, ArrayList<Pair<String, String>> attributes) {
- this(tag, attributes, new ArrayList<Node>());
+ this(tag, attributes, new ArrayList<>());
+ }
+
+ /**
+ * Overloads the constructor for ease of use. Should probably only be used for tests.
+ * EFFECTS: Constructs a new ElementNode from the arguments provided.
+ * MODIFIES: this
+ */
+ public ElementNode(String tag) {
+ this(tag, new ArrayList<>(), new ArrayList<>());
}
/**
@@ -43,12 +52,16 @@ public class ElementNode implements Node {
return this.tag;
}
+ public ArrayList<Pair<String, String>> getAttributes() {
+ return this.attributes;
+ }
+
public ArrayList<Node> getChildren() {
return this.children;
}
// We implement this method for easy debugging.
public String getData() {
- return getTag();
+ return getTag() + " " + getAttributes().toString();
}
}
diff --git a/src/main/model/html/HtmlParser.java b/src/main/model/html/HtmlParser.java
index e9dc0c4..bfdd57c 100644
--- a/src/main/model/html/HtmlParser.java
+++ b/src/main/model/html/HtmlParser.java
@@ -5,15 +5,18 @@ import java.util.*;
import model.util.Node;
import org.javatuples.*;
-/*
+/**
+ * This class represents the state of and implements an LL(1) HTML parser.
+ * For convenience, the following (defo wrong) context-free grammar for HTML is below.
+ * <br>
* HTML ::= '<!DOCTYPE html>' (NODE)*
- * NODE ::= '<'TAG (' ' WORD '=' ('"'TEXT'"' | TEXT))* '>' (NODE)* '</' TAG '>'
- * | '<'SINGLE_TAG (' ' WORD '=' ('"'TEXT'"' | TEXT))* ('>'|'/>')
+ * NODE ::= '<'TAG (' ' WORD '=' ('"'TEXT'"' | TEXT))* '>' (NODE)* '<\/' TAG '>'
+ * | '<'SELF_CLOSING_TAG (' ' WORD '=' ('"'TEXT'"' | TEXT))* ('>'|'/>')
* | (TEXT | NODE)*
* TEXT ::= UNICODE - {'"'} + {'\"'}
* TAG ::= 'body' | 'div' | ...
- * SINGLE_TAG ::= 'img' | ...
- * (note that \forall T \in SINGLE_TAG, T \notin TAG)
+ * SELF_CLOSING_TAG ::= 'img' | ...
+ * (note that \forall T \in SELF_CLOSING_TAG, T \notin TAG)
*/
public class HtmlParser {
@@ -216,16 +219,16 @@ public class HtmlParser {
*/
private void caseValue(char c) {
switch (c) {
- case '\'':
- state = ParserState.SINGLE_QUOTES;
+ case '\'': state = ParserState.SINGLE_QUOTES;
break;
- case '\"':
- state = ParserState.DOUBLE_QUOTES;
+ case '\"': state = ParserState.DOUBLE_QUOTES;
break;
case ' ': case '\n':
+ state = ParserState.KEY;
currentAttributes.add(new Pair<>(currentKey, currentValue));
currentKey = "";
currentValue = "";
+ break; // THE FOOTGUN DESIGN STRIKES AGAIN
case '>':
if (!currentKey.equals("") || !currentValue.equals("")) {
currentAttributes.add(new Pair<>(currentKey, currentValue));
@@ -251,7 +254,7 @@ public class HtmlParser {
state = ParserState.VALUE;
previousChar = '\0';
} else {
- currentValue = currentValue.substring(0, currentValue.length() - 2);
+ currentValue = currentValue.substring(0, currentValue.length() - 1);
currentValue += c;
previousChar = c;
}
@@ -274,7 +277,7 @@ public class HtmlParser {
state = ParserState.VALUE;
previousChar = '\0';
} else {
- currentValue = currentValue.substring(0, currentValue.length() - 2);
+ currentValue = currentValue.substring(0, currentValue.length() - 1);
currentValue += c;
previousChar = c;
}