From 453372247c8c173c16fa2234b9645bf7a542ed8d Mon Sep 17 00:00:00 2001 From: j-james Date: Mon, 17 Oct 2022 02:10:50 -0700 Subject: Some edge cases: check for escaped quotes and ignore multiple whitespace chars --- src/main/model/css/CssParser.java | 81 +++++++++++--------- src/main/model/html/HtmlParser.java | 142 +++++++++++++++++++++--------------- src/test/model/HtmlParserTest.java | 25 ++++++- 3 files changed, 155 insertions(+), 93 deletions(-) diff --git a/src/main/model/css/CssParser.java b/src/main/model/css/CssParser.java index 5f78f0a..8d57bdc 100644 --- a/src/main/model/css/CssParser.java +++ b/src/main/model/css/CssParser.java @@ -15,35 +15,6 @@ import java.util.*; * VALUE ::= ??? idk lol */ -/* - * body { - * background-color: #f0f0f2; - * margin: 0; - * padding: 0; - * font-family: -apple-system, system-ui, BlinkMacSystemFont, "Segoe UI", - * "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif; - * - * } - * div { - * width: 600px; - * margin: 5em auto; - * padding: 2em; - * background-color: #fdfdff; - * border-radius: 0.5em; - * box-shadow: 2px 3px 7px 2px rgba(0,0,0,0.02); - * } - * a:link, a:visited { - * color: #38488f; - * text-decoration: none; - * } - * @media (max - width : 700px) { - * div { - * margin: 0 auto; - * width: auto; - * } - * } - */ - /** * This class assumes that it is getting _valid CSS_: that is, the style between two tags * of a style block, or the raw content of a .css file. @@ -75,6 +46,7 @@ public class CssParser { var currentRule = new ArrayList>(); var currentProperty = ""; var currentValue = ""; + var previousChar = '\0'; // We safely assume to start by reading a selector. ParserState state = ParserState.SELECTORS; @@ -178,8 +150,15 @@ public class CssParser { case SINGLE_QUOTES: switch (c) { case '\'': - state = ParserState.VALUE; - currentValue += c; + if (previousChar != '\\') { + state = ParserState.VALUE; + currentValue += c; + previousChar = '\0'; + } else { + currentValue = currentValue.substring(0, currentValue.length() - 2); + currentValue += c; + previousChar = c; + } break; default: currentValue += c; @@ -189,8 +168,15 @@ public class CssParser { case DOUBLE_QUOTES: switch (c) { case '\"': - state = ParserState.VALUE; - currentValue += c; + if (previousChar != '\\') { + state = ParserState.VALUE; + currentValue += c; + previousChar = '\0'; + } else { + currentValue = currentValue.substring(0, currentValue.length() - 2); + currentValue += c; + previousChar = c; + } break; default: currentValue += c; @@ -245,3 +231,32 @@ public class CssParser { } } } + +/* + * body { + * background-color: #f0f0f2; + * margin: 0; + * padding: 0; + * font-family: -apple-system, system-ui, BlinkMacSystemFont, "Segoe UI", + * "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif; + * + * } + * div { + * width: 600px; + * margin: 5em auto; + * padding: 2em; + * background-color: #fdfdff; + * border-radius: 0.5em; + * box-shadow: 2px 3px 7px 2px rgba(0,0,0,0.02); + * } + * a:link, a:visited { + * color: #38488f; + * text-decoration: none; + * } + * @media (max - width : 700px) { + * div { + * margin: 0 auto; + * width: auto; + * } + * } + */ diff --git a/src/main/model/html/HtmlParser.java b/src/main/model/html/HtmlParser.java index 6ad5af4..d6b4ff1 100644 --- a/src/main/model/html/HtmlParser.java +++ b/src/main/model/html/HtmlParser.java @@ -5,54 +5,6 @@ import java.util.*; import model.util.Node; import org.javatuples.*; -/* - - - - j-james - - - - - - - -
-

- j-james -

- -
-
-
- -
-

Hello, I'm JJ, and I go by j-james on the Internet.

-

I'm a second-year student at the University of British Columbia, flag hunter for Maple Bacon, embedded programmer on UBC Bionics, and occasional ultimate frisbee and roller/ice hockey player.

-

Outside of school, sports, and social life, I enjoy building and contributing to free-and-open-source projects. The majority of my work can either be found on GitHub or at SourceHut.

-
-
- -
- - - - + + + + +