diff options
author | j-james | 2022-10-17 08:33:03 +0000 |
---|---|---|
committer | j-james | 2022-10-17 08:33:03 +0000 |
commit | 2bb1c153b693095b6bbafdfad139791817280af4 (patch) | |
tree | 22ab1ec74b50abbd0dc066e88a29f2ab36942db9 /src/main/model | |
parent | 0caf1994dae8e88f7c219bedd87b65190b88aa89 (diff) |
Refactor HtmlParser to construct a list of Nodes
Diffstat (limited to 'src/main/model')
-rw-r--r-- | src/main/model/html/ElementNode.java | 40 | ||||
-rw-r--r-- | src/main/model/html/HtmlParser.java | 158 | ||||
-rw-r--r-- | src/main/model/html/TextNode.java | 19 | ||||
-rw-r--r-- | src/main/model/util/Node.java | 10 |
4 files changed, 147 insertions, 80 deletions
diff --git a/src/main/model/html/ElementNode.java b/src/main/model/html/ElementNode.java new file mode 100644 index 0000000..1d427e8 --- /dev/null +++ b/src/main/model/html/ElementNode.java @@ -0,0 +1,40 @@ +package model.html; + +import model.util.Node; +import org.javatuples.Pair; + +import java.util.ArrayList; +import java.util.Optional; + +public class ElementNode implements Node { + private String tag; + private ArrayList<Pair<String,String>> attributes; + + private ArrayList<Node> children; + + public String getTag() { + return this.tag; + } + + public ArrayList<Node> getChildren() { + return this.children; + } + + public ElementNode(String tag, ArrayList<Pair<String, String>> attributes, ArrayList<Node> children) { + this.tag = tag; + this.attributes = attributes; + this.children = children; + } + + public ElementNode(String tag, ArrayList<Pair<String, String>> attributes) { + this(tag, attributes, new ArrayList<Node>()); + } + + public void addChild(Node child) { + this.children.add(child); + } + + public String getData() { + return getTag(); + } +} diff --git a/src/main/model/html/HtmlParser.java b/src/main/model/html/HtmlParser.java index 5109e62..6ad5af4 100644 --- a/src/main/model/html/HtmlParser.java +++ b/src/main/model/html/HtmlParser.java @@ -2,12 +2,10 @@ package model.html; import java.util.*; -import model.html.HtmlTree; +import model.util.Node; import org.javatuples.*; - /* - <!DOCTYPE html> <html> <head> @@ -74,28 +72,37 @@ public class HtmlParser { UNKNOWN_TAG, CLOSING_TAG, } - public static ArrayList<HtmlTree> parseHtmlLL(String input) { + public static ArrayList<Node> parseHtmlLL(String input) { - var result = new ArrayList<HtmlTree>(); - var unfinished = new ArrayDeque<HtmlTree>(); + var result = new ArrayList<Node>(); + var unfinished = new ArrayDeque<ElementNode>(); var currentTag = ""; var currentAttributes = new ArrayList<Pair<String, String>>(); var currentKey = ""; var currentValue = ""; var currentText = ""; + var previousChar = '\0'; // We safely? assume to start outside of all nodes. ParserState state = ParserState.HTML; for (char c : input.toCharArray()) { + // System.out.print(state); + // System.out.println(" " + c + " " + currentText); switch (state) { case HTML: switch (c) { case '<': + state = ParserState.UNKNOWN_TAG; if (!currentText.equals("")) { - // unfinished.add(text) idk + if (unfinished.size() != 0) { + unfinished.getLast().addChild(new TextNode(currentText)); + } else { + result.add(new TextNode(currentText)); + } + currentText = ""; } - + break; // FOOTGUN LANGUAGE DESIGN default: currentText += c; break; @@ -106,21 +113,34 @@ public class HtmlParser { case '/': state = ParserState.CLOSING_TAG; break; - case '>': + case '>': // Why would you put <> in your HTML??? go away state = ParserState.HTML; + currentText += "<>"; System.out.println("Why would you put <> in your HTML??? go away"); break; - default: + // Currently doesn't handle <!DOCTYPE> different from any other tag + case '!': default: state = ParserState.OPENING_TAG; currentTag += c; break; } + break; // FOOTGUN LANGUAGE DESIGN STRIKES AGAIN case OPENING_TAG: switch (c) { case '>': state = ParserState.HTML; - // unfinished.add(new HtmlTree(tag) + var node = new ElementNode(currentTag, currentAttributes); + System.out.println("Adding ElementNode " + currentTag); + System.out.println("Current size of unfinished: " + unfinished.size()); + if (unfinished.size() != 0) { + unfinished.getLast().addChild(node); + unfinished.add(node); + } else { + result.add(node); + unfinished.add((ElementNode) result.get(result.size() - 1)); + } currentTag = ""; + currentAttributes = new ArrayList<>(); break; case ' ': case '\n': state = ParserState.KEY; @@ -133,10 +153,14 @@ public class HtmlParser { case CLOSING_TAG: switch (c) { case '>': + state = ParserState.HTML; // IMPORTANT: we don't validate that closing tags correspond to an open tag if (!isSelfClosingTag(currentTag)) { - //unknown.pop + if (unfinished.size() != 0) { + unfinished.removeLast(); + } } + currentTag = ""; break; case ' ': case '\n': break; @@ -149,13 +173,18 @@ public class HtmlParser { switch (c) { case '>': state = ParserState.HTML; - if (currentAttributes.size() != 0) { - // unfinished.something idk new HtmlTree(tag=currentTag, attributes=currentAttributes) - currentAttributes.clear(); + var node = new ElementNode(currentTag, currentAttributes); + System.out.println("Adding ElementNode " + currentTag); + System.out.println("Current size of unfinished: " + unfinished.size()); + if (unfinished.size() != 0) { + unfinished.getLast().addChild(node); + unfinished.add(node); } else { - // unfinished.add(new HtmlTree(tag) + result.add(node); + unfinished.add((ElementNode) result.get(result.size() - 1)); } currentTag = ""; + currentAttributes = new ArrayList<>(); break; case '=': state = ParserState.VALUE; @@ -185,8 +214,20 @@ public class HtmlParser { currentKey = ""; currentValue = ""; } - // unfinished.something idk new HtmlTree(tag=currentTag, attributes=currentAttributes) - currentAttributes.clear(); + state = ParserState.HTML; + var node = new ElementNode(currentTag, currentAttributes); + System.out.println("Adding ElementNode " + currentTag); + System.out.println("Current size of unfinished: " + unfinished.size()); + if (unfinished.size() != 0) { + unfinished.getLast().addChild(node); + unfinished.add(node); + } else { + result.add(node); + unfinished.add((ElementNode) result.get(result.size() - 1)); + } + currentTag = ""; + currentAttributes = new ArrayList<>(); + break; default: currentValue += c; break; @@ -195,15 +236,33 @@ public class HtmlParser { case SINGLE_QUOTE: switch (c) { case '\'': - state = ParserState.VALUE; + if (previousChar != '\\') { + state = ParserState.VALUE; + previousChar = '\0'; + } else { + currentValue += c; + previousChar = c; + } + break; default: currentValue += c; + previousChar = c; break; } break; case DOUBLE_QUOTE: switch (c) { + case '\"': + if (previousChar != '\\') { + state = ParserState.VALUE; + previousChar = '\0'; + } else { + currentValue += c; + previousChar = c; + } default: + currentValue += c; + previousChar = c; break; } break; @@ -212,7 +271,6 @@ public class HtmlParser { return result; } - private static boolean isSelfClosingTag(String tag) { switch (tag) { case "input": case "param": @@ -225,64 +283,4 @@ public class HtmlParser { return false; } } - - /* - public static void parseHtmlLL(String input) { - String tag = ""; - ArrayList<Pair<String, String>> attributes = new ArrayList<>(); - boolean inTag = false; - boolean inAttribute = false; // for checking if we're in quotes - - for (int i = 0; i < input.length(); i++) { - if (inTag) { - if (inAttribute) { - switch (input.charAt(i)) { - case '\"' - } - } else { - switch (input.charAt(i)) { - - } - } - - - } else { - switch (input.charAt(i)) { - case '<': - } - } - } - } - - private static void parseAttribute(String input) { - - } -*/ - -/* - public static void parseHTML(ArrayList<String> input) { - String data = ""; - ArrayList<ParseTree> children = new ArrayList<ParseTree>(); - - boolean inTag = false; - boolean tagComplete = false; - - for (String i : input) { - if (inTag) { - if (i.equals(">")) { - inTag = false; - tagComplete = true; - // remove ending tags and recursively parse out children - } else { - data += i; - } - } else { - if (i.equals("<")) { - inTag = true; - } - } - - } - - }*/ } diff --git a/src/main/model/html/TextNode.java b/src/main/model/html/TextNode.java new file mode 100644 index 0000000..634bf3b --- /dev/null +++ b/src/main/model/html/TextNode.java @@ -0,0 +1,19 @@ +package model.html; + +import model.util.Node; + +public class TextNode implements Node { + private String text = ""; + + public String getText() { + return this.text; + } + + public TextNode(String text) { + this.text = text; + } + + public String getData() { + return getText(); + } +} diff --git a/src/main/model/util/Node.java b/src/main/model/util/Node.java new file mode 100644 index 0000000..010a2da --- /dev/null +++ b/src/main/model/util/Node.java @@ -0,0 +1,10 @@ +package model.util; + +/** + * yeah there's literally nothing here + * i just need to establish that ElementNode and TextNode both inherit from Node + */ +public interface Node { + // Return a representation of the Node + public String getData(); +} |