From 3e9bb5fae16c35938bc1f7f7669c12cc355c9331 Mon Sep 17 00:00:00 2001 From: j-james Date: Sun, 16 Oct 2022 23:25:45 -0700 Subject: Basic prototypes of HTML/CSS lexers --- .idea/checkstyle-idea.xml | 26 +++---- .idea/libraries/javatuples.xml | 12 ++++ .idea/modules.xml | 2 +- .idea/uiDesigner.xml | 124 ++++++++++++++++++++++++++++++++++ Project-Starter.iml | 33 --------- apus.iml | 32 +++++++++ data/example.css | 25 +++++++ data/example.html | 46 +++++++++++++ data/example.md | 39 +++++++++++ data/tobs.jpg | Bin 314309 -> 0 bytes src/main/model/MyModel.java | 5 -- src/main/model/css/CssLexer.java | 63 +++++++++++++++++ src/main/model/html/HtmlLexer.java | 68 +++++++++++++++++++ src/main/model/util/AbstractTree.java | 35 ++++++++++ src/main/model/util/Lexer.java | 58 ++++++++++++++++ src/test/model/CssLexerTest.java | 67 ++++++++++++++++++ src/test/model/HtmlLexerTest.java | 69 +++++++++++++++++++ src/test/model/MyModelTest.java | 7 -- 18 files changed, 652 insertions(+), 59 deletions(-) create mode 100644 .idea/libraries/javatuples.xml create mode 100644 .idea/uiDesigner.xml delete mode 100644 Project-Starter.iml create mode 100644 apus.iml create mode 100644 data/example.css create mode 100644 data/example.html create mode 100644 data/example.md delete mode 100644 data/tobs.jpg delete mode 100644 src/main/model/MyModel.java create mode 100644 src/main/model/css/CssLexer.java create mode 100644 src/main/model/html/HtmlLexer.java create mode 100644 src/main/model/util/AbstractTree.java create mode 100644 src/main/model/util/Lexer.java create mode 100644 src/test/model/CssLexerTest.java create mode 100644 src/test/model/HtmlLexerTest.java delete mode 100644 src/test/model/MyModelTest.java diff --git a/.idea/checkstyle-idea.xml b/.idea/checkstyle-idea.xml index 551b800..eda52d4 100644 --- a/.idea/checkstyle-idea.xml +++ b/.idea/checkstyle-idea.xml @@ -1,18 +1,18 @@ - - \ No newline at end of file diff --git a/.idea/libraries/javatuples.xml b/.idea/libraries/javatuples.xml new file mode 100644 index 0000000..adca311 --- /dev/null +++ b/.idea/libraries/javatuples.xml @@ -0,0 +1,12 @@ + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml index 7ac8932..7045e98 100644 --- a/.idea/modules.xml +++ b/.idea/modules.xml @@ -2,7 +2,7 @@ - + \ No newline at end of file diff --git a/.idea/uiDesigner.xml b/.idea/uiDesigner.xml new file mode 100644 index 0000000..2b63946 --- /dev/null +++ b/.idea/uiDesigner.xml @@ -0,0 +1,124 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/Project-Starter.iml b/Project-Starter.iml deleted file mode 100644 index 08bb910..0000000 --- a/Project-Starter.iml +++ /dev/null @@ -1,33 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/apus.iml b/apus.iml new file mode 100644 index 0000000..19129c3 --- /dev/null +++ b/apus.iml @@ -0,0 +1,32 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/data/example.css b/data/example.css new file mode 100644 index 0000000..dd93d52 --- /dev/null +++ b/data/example.css @@ -0,0 +1,25 @@ +body { + background-color: #f0f0f2; + margin: 0; + padding: 0; + font-family: -apple-system, system-ui, BlinkMacSystemFont, "Segoe UI", "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif; + +} +div { + width: 600px; + margin: 5em auto; + padding: 2em; + background-color: #fdfdff; + border-radius: 0.5em; + box-shadow: 2px 3px 7px 2px rgba(0,0,0,0.02); +} +a:link, a:visited { + color: #38488f; + text-decoration: none; +} +@media (max-width: 700px) { + div { + margin: 0 auto; + width: auto; + } +} diff --git a/data/example.html b/data/example.html new file mode 100644 index 0000000..b62fc42 --- /dev/null +++ b/data/example.html @@ -0,0 +1,46 @@ + + + + Example Domain + + + + + + + + +
+

Example Domain

+

This domain is for use in illustrative examples in documents. You may use this + domain in literature without prior coordination or asking for permission.

+

More information...

+
+ + diff --git a/data/example.md b/data/example.md new file mode 100644 index 0000000..45b29de --- /dev/null +++ b/data/example.md @@ -0,0 +1,39 @@ +Heading +======= + +Sub-heading +----------- + +# Alternative heading # + +Paragraphs are separated +by a blank line. + +Two spaces at the end of a line +produce a line break. + +Text attributes _italic_, **bold**, `monospace`. + +Horizontal rule: + +--- + +Bullet lists nested within numbered list: + +1. fruits + * apple + * banana +2. vegetables + - carrot + - broccoli + +A [link](http://example.com). + +![Image](Icon-pictures.png "icon") + +> Markdown uses email-style +characters for blockquoting. +> +> Multiple paragraphs need to be prepended individually. + +Most inline HTML tags are supported. \ No newline at end of file diff --git a/data/tobs.jpg b/data/tobs.jpg deleted file mode 100644 index f1652ef..0000000 Binary files a/data/tobs.jpg and /dev/null differ diff --git a/src/main/model/MyModel.java b/src/main/model/MyModel.java deleted file mode 100644 index f9a3dd7..0000000 --- a/src/main/model/MyModel.java +++ /dev/null @@ -1,5 +0,0 @@ -package model; - -public class MyModel { - // delete or rename this class! -} diff --git a/src/main/model/css/CssLexer.java b/src/main/model/css/CssLexer.java new file mode 100644 index 0000000..657d3e1 --- /dev/null +++ b/src/main/model/css/CssLexer.java @@ -0,0 +1,63 @@ +package model.css; + +import java.util.ArrayList; + +/** + * This lexer splits an input by whitespace, brackets, and semicolons. + * Brackets and semicolons are included in the lexed output, whitespace is not. + *
+ * CSS, thankfully, is far more rigid and less-forgiving of errors than HTMl. + * It also has multiple layers of fallback for errors: ranging from: "ignore this + * property", to "ignore this rule", to "this isn't fucking CSS" and ignore it all. + *
+ * Still, even though we don't have to deal with garbage like escaped quotes (future edit: whoops, yes we do) and + * what not, we'll still implement our lexer with a for loop instead of split() for future optimizations. + */ +public class CssLexer { + + public static ArrayList lex(String input) { + String token = ""; + ArrayList tokens = new ArrayList<>(); + boolean inSingleQuotes = false; + boolean inDoubleQuotes = false; + char previous = '\0'; + + for (char i : input.toCharArray()) { + // i HATE fallthrough switch statements + switch (i) { + case '{': case '}': case ';': case ':': + case ' ': case '\n': case '\t': + if (!inSingleQuotes && !inDoubleQuotes) { + if (!token.equals("")) { + tokens.add(token); + token = ""; + } + switch (i) { + case '{': case '}': case ';': case ':': + tokens.add(Character.toString(i)); + break; + case ' ': case '\n': case '\t': + break; + } + } else { + token += i; + } + break; + // intentional use of footgun behavior + case '"': + if (previous != '\\') { + inDoubleQuotes = !inDoubleQuotes; + } + case '\'': + if (previous != '\\') { + inSingleQuotes = !inSingleQuotes; + } + default: + token += i; + break; + } + previous = i; + } + return tokens; + } +} diff --git a/src/main/model/html/HtmlLexer.java b/src/main/model/html/HtmlLexer.java new file mode 100644 index 0000000..8cad425 --- /dev/null +++ b/src/main/model/html/HtmlLexer.java @@ -0,0 +1,68 @@ +package model.html; + +import java.util.ArrayList; + +/** + * We'll tokenize HTML by tags: disregarding the contents of the tag and attributes within the tag. + * The file is also considered to be free-form here: whitespace duplicates are disregarded. + */ +public class HtmlLexer { + + // Takes a String of raw HTML, and tokenizes it for our parser. + public static ArrayList lex(String input) { + String token = ""; + ArrayList tokens = new ArrayList<>(); + boolean inTag = false; + boolean inSingleQuotes = false; + boolean inDoubleQuotes = false; + + for (char i : input.toCharArray()) { + token += i; + switch (i) { + case '<': + if (!inSingleQuotes && !inDoubleQuotes) { + inTag = true; + if (!token.equals("<")) { + tokens.add(token.substring(0, token.length() - 1)); + token = "<"; + } + } else if (inTag) { + System.out.printf("Probably failing parser"); + } + break; + case '>': + if (!inSingleQuotes && !inDoubleQuotes) { + if (!inTag) { + System.out.printf("Probably failing parser"); + } + inTag = false; + tokens.add(token); + token = ""; + } + break; + case '"': + if (!inSingleQuotes) { + inDoubleQuotes = !inDoubleQuotes; + } + break; + case '\'': + if (!inDoubleQuotes) { + inSingleQuotes = !inSingleQuotes; + } + break; + } + } + /** + * When lexing invalid HTML: we may end up with trailing garbage: either an unfinished tag or extra text + * (those are the only two options since this is just the lex step) + */ + if (!token.equals("")) { + if (inTag) { + tokens.add(token + ">"); + } else { + tokens.add(token); + } + } + return tokens; + } +} diff --git a/src/main/model/util/AbstractTree.java b/src/main/model/util/AbstractTree.java new file mode 100644 index 0000000..4c74732 --- /dev/null +++ b/src/main/model/util/AbstractTree.java @@ -0,0 +1,35 @@ +package model.util; + +import org.javatuples.*; + +import java.util.*; + +// Utility class for a general tree: we'll be using these a lot +public abstract class AbstractTree { + + // An AbstractTree holds some kind of data; we'll want this to be generic + // e.g. a tag, attributes, a tag and attributes, etc + private T data; + // Since it's a tree every node also has children. + private ArrayList> children; + + // future implementations may want to consider adding an Optional<> parent; or an Optional<> prevSibling + + public T getData() { + return data; + } + + public ArrayList> getChildren() { + return children; + } + + // god so much boilerplate + public AbstractTree(T data, ArrayList> children) { + this.data = data; + this.children = children; + } + + public void addChild(AbstractTree child) { + this.children.add(child); + } +} diff --git a/src/main/model/util/Lexer.java b/src/main/model/util/Lexer.java new file mode 100644 index 0000000..b35caa6 --- /dev/null +++ b/src/main/model/util/Lexer.java @@ -0,0 +1,58 @@ +package model.util; + +import java.util.*; + +// General-purpose Lexer +public class Lexer { + + // private static final Set whitespace = new HashSet(" ", "\n"); + + // unused, helper function for if we implement finding identifers longer than a character + private static int longestDelimiter(Set delimiters) { + int longestDelimiter = 0; + for (String delimiter : delimiters) { + if (delimiter.length() > longestDelimiter) { + longestDelimiter = delimiter.length(); + } + } + return longestDelimiter; + } + + /** + * Lexes a "free-form" language. "free-form" has a specific meaning here that's important to preserve: + * "free-form" means that _additional_ whitespace characters do not affect the language: e.g. two newlines + * instead of one, four spaces instead of two, etc. They are _not_ "whitespace-insensitive", which is usually + * a misnomer. + * The name's a bit of a joke: free-form languages are generally referred to as whitespace-insensitive --> + * insensitive == rude. Jokes are funnier when you have to explain them. + * Also, insensitiveLex() and freeformLex() aren't really that good of names. + * + * NOTE: This lexer only works with single-character deliminators. + * TODO: deduplicate whitespace + */ + // public static ArrayList rudeLex(String input, Set delimiters) {} + + /** + * We might as well implement a lexer for non-free-form languages, but whatever. We won't use it. + */ + public static ArrayList sensitiveLex(String input, Set delimiters) { + // int longestDelimiter = longestDelimiter(delimiters); + + ArrayList tokens = new ArrayList(); + String currentToken = ""; + // terrible c-style for loop because we may need to manipulate the index in the future + for (int i = 0; i < input.length(); i++) { + char nextToken = input.charAt(i); + if (delimiters.contains(nextToken)) { + if (!currentToken.equals("")) { + tokens.add(currentToken); + } + tokens.add(Character.toString(nextToken)); + currentToken = ""; + } else { + currentToken += input.charAt(i); + } + } + return tokens; + } +} diff --git a/src/test/model/CssLexerTest.java b/src/test/model/CssLexerTest.java new file mode 100644 index 0000000..4ed28e2 --- /dev/null +++ b/src/test/model/CssLexerTest.java @@ -0,0 +1,67 @@ +package model; + +import model.css.CssLexer; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Arrays; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +class CssLexerTest { + + @Test + void testIdiomaticHtml() { + try { + String idiomaticCss = Files.readString(Path.of("data/example.css")); + String[] expected = {"body", "{", "background-color", ":", "#f0f0f2", ";", "margin", ":", "0", ";", "padding", ":", "0", ";", "font-family", ":", "-apple-system,", "system-ui,", "BlinkMacSystemFont,", "\"Segoe UI\",", "\"Open Sans\",", "\"Helvetica Neue\",", "Helvetica,", "Arial,", "sans-serif", ";", "}", "div", "{", "width", ":", "600px", ";", "margin", ":", "5em", "auto", ";", "padding", ":", "2em", ";", "background-color", ":", "#fdfdff", ";", "border-radius", ":", "0.5em", ";", "box-shadow", ":", "2px", "3px", "7px", "2px", "rgba(0,0,0,0.02)", ";", "}", "a", ":", "link,", "a", ":", "visited", "{", "color", ":", "#38488f", ";", "text-decoration", ":", "none", ";", "}", "@media", "(max-width", ":", "700px)", "{", "div", "{", "margin", ":", "0", "auto", ";", "width", ":", "auto", ";", "}", "}"}; + + assertEquals(CssLexer.lex(idiomaticCss), Arrays.asList(expected)); + for (String i : CssLexer.lex(idiomaticCss)) { + System.out.print("\""); + System.out.print(i); + System.out.print("\", "); + } + } catch (IOException e) { + System.out.printf("fuck %s\n", e.toString()); + System.out.println(System.getProperty("user.dir")); + } + } +/** + FoodServicesCard c1; + FoodServicesCard c2; + FoodServicesCard c3; + + @BeforeEach + void runBefore() { + c1 = new FoodServicesCard(0); + c2 = new FoodServicesCard(100); + c3 = new FoodServicesCard(2000); + } + + @Test + void testReloadingAndPurchasing() { + assertFalse(c1.makePurchase(100)); + assertEquals(c1.getBalance(), 0); + c2.reload(10); + assertEquals(c2.getBalance(), 110); + assertTrue(c3.makePurchase(1400)); + assertEquals(c3.getBalance(), 600); + } + + @Test + void testRewardPoints() { + if (c1.makePurchase(c1.POINTS_NEEDED_FOR_CASH_BACK / 2)) { + assertEquals(c1.getRewardPoints(), (c1.POINTS_NEEDED_FOR_CASH_BACK / 2)); + } else { + assertEquals(c1.getRewardPoints(), 0); + } + c2.makePurchase(c2.POINTS_NEEDED_FOR_CASH_BACK); + assertEquals(c2.getRewardPoints(), 0); + c3.makePurchase(1200); + assertEquals(c3.getRewardPoints(), 1200 % c3.POINTS_NEEDED_FOR_CASH_BACK); + } + */ +} \ No newline at end of file diff --git a/src/test/model/HtmlLexerTest.java b/src/test/model/HtmlLexerTest.java new file mode 100644 index 0000000..9dd5574 --- /dev/null +++ b/src/test/model/HtmlLexerTest.java @@ -0,0 +1,69 @@ +package model; + +import model.html.HtmlLexer; + +import org.junit.jupiter.api.*; + +import java.util.Arrays; + +import static org.junit.jupiter.api.Assertions.*; + +class HtmlLexerTest { + String idiomaticHtml = "

Hello,world!

"; + String brokenHtml = "","","","","","

","Hello,world!","

","",""}; + assertEquals(HtmlLexer.lex(idiomaticHtml), Arrays.asList(idiomaticHtmlArray)); + } + + @Test + void testBrokenHtml() { + String[] brokenHtmlArray = {"","","","",""}; + assertEquals(HtmlLexer.lex(brokenHtml), Arrays.asList(brokenHtmlArray)); + } + + @Test + void testTrailingTextHtml() { + String[] trailingTextHtmlArray = {"","","","","ba"}; + assertEquals(HtmlLexer.lex(trailingTextHtml), Arrays.asList(trailingTextHtmlArray)); + } + +/** + FoodServicesCard c1; + FoodServicesCard c2; + FoodServicesCard c3; + + @BeforeEach + void runBefore() { + c1 = new FoodServicesCard(0); + c2 = new FoodServicesCard(100); + c3 = new FoodServicesCard(2000); + } + + @Test + void testReloadingAndPurchasing() { + assertFalse(c1.makePurchase(100)); + assertEquals(c1.getBalance(), 0); + c2.reload(10); + assertEquals(c2.getBalance(), 110); + assertTrue(c3.makePurchase(1400)); + assertEquals(c3.getBalance(), 600); + } + + @Test + void testRewardPoints() { + if (c1.makePurchase(c1.POINTS_NEEDED_FOR_CASH_BACK / 2)) { + assertEquals(c1.getRewardPoints(), (c1.POINTS_NEEDED_FOR_CASH_BACK / 2)); + } else { + assertEquals(c1.getRewardPoints(), 0); + } + c2.makePurchase(c2.POINTS_NEEDED_FOR_CASH_BACK); + assertEquals(c2.getRewardPoints(), 0); + c3.makePurchase(1200); + assertEquals(c3.getRewardPoints(), 1200 % c3.POINTS_NEEDED_FOR_CASH_BACK); + } + */ +} \ No newline at end of file diff --git a/src/test/model/MyModelTest.java b/src/test/model/MyModelTest.java deleted file mode 100644 index c41f32e..0000000 --- a/src/test/model/MyModelTest.java +++ /dev/null @@ -1,7 +0,0 @@ -package model; - -import static org.junit.jupiter.api.Assertions.*; - -class MyModelTest { - // delete or rename this class! -} \ No newline at end of file -- cgit v1.2.3-70-g09d2