diff options
-rw-r--r-- | src/main/model/css/CssParser.java | 81 | ||||
-rw-r--r-- | src/main/model/html/HtmlParser.java | 142 | ||||
-rw-r--r-- | src/test/model/HtmlParserTest.java | 25 |
3 files changed, 155 insertions, 93 deletions
diff --git a/src/main/model/css/CssParser.java b/src/main/model/css/CssParser.java index 5f78f0a..8d57bdc 100644 --- a/src/main/model/css/CssParser.java +++ b/src/main/model/css/CssParser.java @@ -15,35 +15,6 @@ import java.util.*; * VALUE ::= ??? idk lol */ -/* - * body { - * background-color: #f0f0f2; - * margin: 0; - * padding: 0; - * font-family: -apple-system, system-ui, BlinkMacSystemFont, "Segoe UI", - * "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif; - * - * } - * div { - * width: 600px; - * margin: 5em auto; - * padding: 2em; - * background-color: #fdfdff; - * border-radius: 0.5em; - * box-shadow: 2px 3px 7px 2px rgba(0,0,0,0.02); - * } - * a:link, a:visited { - * color: #38488f; - * text-decoration: none; - * } - * @media (max - width : 700px) { - * div { - * margin: 0 auto; - * width: auto; - * } - * } - */ - /** * This class assumes that it is getting _valid CSS_: that is, the style between two tags * of a style block, or the raw content of a .css file. @@ -75,6 +46,7 @@ public class CssParser { var currentRule = new ArrayList<Pair<String, String>>(); var currentProperty = ""; var currentValue = ""; + var previousChar = '\0'; // We safely assume to start by reading a selector. ParserState state = ParserState.SELECTORS; @@ -178,8 +150,15 @@ public class CssParser { case SINGLE_QUOTES: switch (c) { case '\'': - state = ParserState.VALUE; - currentValue += c; + if (previousChar != '\\') { + state = ParserState.VALUE; + currentValue += c; + previousChar = '\0'; + } else { + currentValue = currentValue.substring(0, currentValue.length() - 2); + currentValue += c; + previousChar = c; + } break; default: currentValue += c; @@ -189,8 +168,15 @@ public class CssParser { case DOUBLE_QUOTES: switch (c) { case '\"': - state = ParserState.VALUE; - currentValue += c; + if (previousChar != '\\') { + state = ParserState.VALUE; + currentValue += c; + previousChar = '\0'; + } else { + currentValue = currentValue.substring(0, currentValue.length() - 2); + currentValue += c; + previousChar = c; + } break; default: currentValue += c; @@ -245,3 +231,32 @@ public class CssParser { } } } + +/* + * body { + * background-color: #f0f0f2; + * margin: 0; + * padding: 0; + * font-family: -apple-system, system-ui, BlinkMacSystemFont, "Segoe UI", + * "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif; + * + * } + * div { + * width: 600px; + * margin: 5em auto; + * padding: 2em; + * background-color: #fdfdff; + * border-radius: 0.5em; + * box-shadow: 2px 3px 7px 2px rgba(0,0,0,0.02); + * } + * a:link, a:visited { + * color: #38488f; + * text-decoration: none; + * } + * @media (max - width : 700px) { + * div { + * margin: 0 auto; + * width: auto; + * } + * } + */ diff --git a/src/main/model/html/HtmlParser.java b/src/main/model/html/HtmlParser.java index 6ad5af4..d6b4ff1 100644 --- a/src/main/model/html/HtmlParser.java +++ b/src/main/model/html/HtmlParser.java @@ -6,54 +6,6 @@ import model.util.Node; import org.javatuples.*; /* -<!DOCTYPE html> -<html> -<head> - <title>j-james</title> - <meta charset="utf-8"/> - <meta name="viewport" content="width=device-width"/> - <link rel="icon" type="image/jpg" href="assets/compass.jpg"/> - <link rel="stylesheet" href="css/normalize.css"/> - <link rel="stylesheet" href="css/style.css"/> -</head> -<body> - <header> - <h1> - <a href="https://j-james.me">j-james</a> - </h1> - <nav> - <a href="https://j-james.me/about">about</a> - <a href="https://j-james.me/resume">resume</a> - <a href="https://j-james.me/posts">posts</a> - <a href="https://j-james.me/writeups">writeups</a> - </nav> - </header> - <main> - <div id="intro"> - <img id="face" src="assets/compass.jpg"/> - <div id="profile"> - <p> Hello, I'm JJ, and I go by j-james on the Internet. </p> - <p> I'm a second-year student at the <a href="https://ubc.ca">University of British Columbia</a>, flag hunter for <a href="https://ubcctf.github.io">Maple Bacon</a>, embedded programmer on <a href="https://ubcbionics.com/">UBC Bionics</a>, and occasional ultimate frisbee and roller/ice hockey player.</p> - <p> Outside of school, sports, and social life, I enjoy building and contributing to <a href="https://www.gnu.org/philosophy/free-sw">free-and-open-source</a> projects. The majority of my work can either be found on <a href="https://github.com/j-james">GitHub</a> or at <a href="https://sr.ht/~j-james">SourceHut</a>. </p> - </div> - </div> - <!-- <div id="details"> - <h2>Projects</h2> - <p> Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. </p> - <h2>Posts</h2> - <p> Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. </p> - </div> --> - </main> - <footer> - <span><img src="assets/copyleft.svg" width="12" height="12"/> 2020-2022 j-james </span> - </footer> -</body> -</html> -<!-- - -*/ - -/* * HTML ::= '<!DOCTYPE html>' (NODE)* * NODE ::= '<'TAG (' ' WORD '=' ('"'TEXT'"' | TEXT))* '>' (NODE)* '</' TAG '>' * | '<'SINGLE_TAG (' ' WORD '=' ('"'TEXT'"' | TEXT))* ('>'|'/>') @@ -66,7 +18,7 @@ import org.javatuples.*; public class HtmlParser { private enum ParserState { - HTML, + HTML, IGNORED, OPENING_TAG, KEY, VALUE, SINGLE_QUOTE, DOUBLE_QUOTE, UNKNOWN_TAG, CLOSING_TAG, @@ -81,7 +33,7 @@ public class HtmlParser { var currentKey = ""; var currentValue = ""; var currentText = ""; - var previousChar = '\0'; + var previousChar = '\0'; // important for quote escapes, and multiple whitespace chars // We safely? assume to start outside of all nodes. ParserState state = ParserState.HTML; @@ -101,10 +53,18 @@ public class HtmlParser { result.add(new TextNode(currentText)); } currentText = ""; + previousChar = '\0'; } break; // FOOTGUN LANGUAGE DESIGN + case ' ': case '\n': + if (previousChar != ' ') { + currentText += ' '; + } + previousChar = ' '; + break; default: currentText += c; + previousChar = c; break; } break; @@ -118,20 +78,31 @@ public class HtmlParser { currentText += "<>"; System.out.println("Why would you put <> in your HTML??? go away"); break; - // Currently doesn't handle <!DOCTYPE> different from any other tag - case '!': default: + // For now, we'll straight-up ignore anything matching the <!...> syntax: + // i.e. comments, and <!DOCTYPE html> + case '!': + state = ParserState.IGNORED; + break; + default: state = ParserState.OPENING_TAG; currentTag += c; break; } break; // FOOTGUN LANGUAGE DESIGN STRIKES AGAIN + case IGNORED: + switch (c) { + case '>': + state = ParserState.HTML; + break; + default: + break; + } + break; case OPENING_TAG: switch (c) { case '>': state = ParserState.HTML; var node = new ElementNode(currentTag, currentAttributes); - System.out.println("Adding ElementNode " + currentTag); - System.out.println("Current size of unfinished: " + unfinished.size()); if (unfinished.size() != 0) { unfinished.getLast().addChild(node); unfinished.add(node); @@ -174,8 +145,6 @@ public class HtmlParser { case '>': state = ParserState.HTML; var node = new ElementNode(currentTag, currentAttributes); - System.out.println("Adding ElementNode " + currentTag); - System.out.println("Current size of unfinished: " + unfinished.size()); if (unfinished.size() != 0) { unfinished.getLast().addChild(node); unfinished.add(node); @@ -216,8 +185,6 @@ public class HtmlParser { } state = ParserState.HTML; var node = new ElementNode(currentTag, currentAttributes); - System.out.println("Adding ElementNode " + currentTag); - System.out.println("Current size of unfinished: " + unfinished.size()); if (unfinished.size() != 0) { unfinished.getLast().addChild(node); unfinished.add(node); @@ -240,6 +207,7 @@ public class HtmlParser { state = ParserState.VALUE; previousChar = '\0'; } else { + currentValue = currentValue.substring(0, currentValue.length() - 2); currentValue += c; previousChar = c; } @@ -257,6 +225,7 @@ public class HtmlParser { state = ParserState.VALUE; previousChar = '\0'; } else { + currentValue = currentValue.substring(0, currentValue.length() - 2); currentValue += c; previousChar = c; } @@ -284,3 +253,60 @@ public class HtmlParser { } } } + +/* +<!DOCTYPE html> +<html> +<head> + <title>j-james</title> + <meta charset="utf-8"/> + <meta name="viewport" content="width=device-width"/> + <link rel="icon" type="image/jpg" href="assets/compass.jpg"/> + <link rel="stylesheet" href="css/normalize.css"/> + <link rel="stylesheet" href="css/style.css"/> +</head> +<body> + <header> + <h1> + <a href="https://j-james.me">j-james</a> + </h1> + <nav> + <a href="https://j-james.me/about">about</a> + <a href="https://j-james.me/resume">resume</a> + <a href="https://j-james.me/posts">posts</a> + <a href="https://j-james.me/writeups">writeups</a> + </nav> + </header> + <main> + <div id="intro"> + <img id="face" src="assets/compass.jpg"/> + </div> + <!-- <div id="details"> + <h2>Projects</h2> + <p> Lorem ipsum dolor sit amet, consectetur adipiscing elit, + sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. + Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris + nisi ut aliquip ex ea commodo consequat. + Duis aute irure dolor in reprehenderit in voluptate velit esse cillum + dolore eu fugiat nulla pariatur. + Excepteur sint occaecat cupidatat non proident, sunt in culpa + qui officia deserunt mollit anim id est laborum. </p> + <h2>Posts</h2> + <p> Lorem ipsum dolor sit amet, consectetur adipiscing elit, + sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. + Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris + nisi ut aliquip ex ea commodo consequat. + Duis aute irure dolor in reprehenderit in voluptate velit esse cillum + dolore eu fugiat nulla pariatur. + Excepteur sint occaecat cupidatat non proident, sunt in culpa + qui officia deserunt mollit anim id est laborum. </p> + </div> --> + </main> + <footer> + <span><img src="assets/copyleft.svg" width="12" height="12"/> 2020-2022 j-james </span> + </footer> +</body> +</html> +<!-- + +*/ diff --git a/src/test/model/HtmlParserTest.java b/src/test/model/HtmlParserTest.java index e83c857..4b05cfb 100644 --- a/src/test/model/HtmlParserTest.java +++ b/src/test/model/HtmlParserTest.java @@ -1,21 +1,25 @@ package model; +import model.html.ElementNode; import model.html.HtmlParser; +import model.util.Node; import org.junit.jupiter.api.Test; -import java.util.Arrays; +import java.util.*; import static org.junit.jupiter.api.Assertions.*; public class HtmlParserTest { - String idiomaticHtml = "<!DOCTYPE html><html><head></head><body><p>Hello,world!</p></body></html>"; + String idiomaticHtml = "<!DOCTYPE html><html><head></head><body><p>Hello, world!</p></body></html>"; String brokenHtml = "<html><foo><bar></bar><ba"; String trailingTextHtml = "<html><foo><bar></bar>ba"; @Test void testIdiomaticHtml() { String[] idiomaticHtmlArray = {"<!DOCTYPE html>","<html>","<head>","</head>","<body>","<p>","Hello,world!","</p>","</body>","</html>"}; + var parsedHtml = HtmlParser.parseHtmlLL(idiomaticHtml); + displayHtmlTree(parsedHtml); System.out.println(HtmlParser.parseHtmlLL(idiomaticHtml)); // assertEquals(HtmlParser.parseHtmlLL(idiomaticHtml), Arrays.asList(idiomaticHtmlArray)); } @@ -23,12 +27,29 @@ public class HtmlParserTest { @Test void testBrokenHtml() { String[] brokenHtmlArray = {"<html>","<foo>","<bar>","</bar>","<ba>"}; + System.out.println(HtmlParser.parseHtmlLL(brokenHtml)); // assertEquals(HtmlParser.parseHtmlLL(brokenHtml), Arrays.asList(brokenHtmlArray)); } @Test void testTrailingTextHtml() { String[] trailingTextHtmlArray = {"<html>","<foo>","<bar>","</bar>","ba"}; + System.out.println(HtmlParser.parseHtmlLL(trailingTextHtml)); // assertEquals(HtmlParser.parseHtmlLL(trailingTextHtml), Arrays.asList(trailingTextHtmlArray)); } + + private void displayHtmlTree(ArrayList<Node> tree) { + for (Node node : tree) { + if (node instanceof ElementNode) { + System.out.print(((ElementNode) node).getTag() + ": "); + for (Node n : ((ElementNode) node).getChildren()) { + System.out.print(n.getData() + " "); + } + System.out.println(); + displayHtmlTree(((ElementNode) node).getChildren()); + } else { + System.out.println("Text: " + node.getData()); + } + } + } } |