From 3e9bb5fae16c35938bc1f7f7669c12cc355c9331 Mon Sep 17 00:00:00 2001
From: j-james
Date: Sun, 16 Oct 2022 23:25:45 -0700
Subject: Basic prototypes of HTML/CSS lexers
---
.idea/checkstyle-idea.xml | 26 +++----
.idea/libraries/javatuples.xml | 12 ++++
.idea/modules.xml | 2 +-
.idea/uiDesigner.xml | 124 ++++++++++++++++++++++++++++++++++
Project-Starter.iml | 33 ---------
apus.iml | 32 +++++++++
data/example.css | 25 +++++++
data/example.html | 46 +++++++++++++
data/example.md | 39 +++++++++++
data/tobs.jpg | Bin 314309 -> 0 bytes
src/main/model/MyModel.java | 5 --
src/main/model/css/CssLexer.java | 63 +++++++++++++++++
src/main/model/html/HtmlLexer.java | 68 +++++++++++++++++++
src/main/model/util/AbstractTree.java | 35 ++++++++++
src/main/model/util/Lexer.java | 58 ++++++++++++++++
src/test/model/CssLexerTest.java | 67 ++++++++++++++++++
src/test/model/HtmlLexerTest.java | 69 +++++++++++++++++++
src/test/model/MyModelTest.java | 7 --
18 files changed, 652 insertions(+), 59 deletions(-)
create mode 100644 .idea/libraries/javatuples.xml
create mode 100644 .idea/uiDesigner.xml
delete mode 100644 Project-Starter.iml
create mode 100644 apus.iml
create mode 100644 data/example.css
create mode 100644 data/example.html
create mode 100644 data/example.md
delete mode 100644 data/tobs.jpg
delete mode 100644 src/main/model/MyModel.java
create mode 100644 src/main/model/css/CssLexer.java
create mode 100644 src/main/model/html/HtmlLexer.java
create mode 100644 src/main/model/util/AbstractTree.java
create mode 100644 src/main/model/util/Lexer.java
create mode 100644 src/test/model/CssLexerTest.java
create mode 100644 src/test/model/HtmlLexerTest.java
delete mode 100644 src/test/model/MyModelTest.java
diff --git a/.idea/checkstyle-idea.xml b/.idea/checkstyle-idea.xml
index 551b800..eda52d4 100644
--- a/.idea/checkstyle-idea.xml
+++ b/.idea/checkstyle-idea.xml
@@ -1,18 +1,18 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+ 8.18
+ JavaOnly
+
+
+
+
+
+
+ (bundled)
+ (bundled)
+ $PROJECT_DIR$/checkstyle.xml
+
\ No newline at end of file
diff --git a/.idea/libraries/javatuples.xml b/.idea/libraries/javatuples.xml
new file mode 100644
index 0000000..adca311
--- /dev/null
+++ b/.idea/libraries/javatuples.xml
@@ -0,0 +1,12 @@
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
index 7ac8932..7045e98 100644
--- a/.idea/modules.xml
+++ b/.idea/modules.xml
@@ -2,7 +2,7 @@
-
+
\ No newline at end of file
diff --git a/.idea/uiDesigner.xml b/.idea/uiDesigner.xml
new file mode 100644
index 0000000..2b63946
--- /dev/null
+++ b/.idea/uiDesigner.xml
@@ -0,0 +1,124 @@
+
+
+
+
+ -
+
+
+ -
+
+
+ -
+
+
+ -
+
+
+ -
+
+
+
+
+
+ -
+
+
+
+
+
+ -
+
+
+
+
+
+ -
+
+
+
+
+
+ -
+
+
+
+
+ -
+
+
+
+
+ -
+
+
+
+
+ -
+
+
+
+
+ -
+
+
+
+
+ -
+
+
+
+
+ -
+
+
+ -
+
+
+
+
+ -
+
+
+
+
+ -
+
+
+
+
+ -
+
+
+
+
+ -
+
+
+
+
+ -
+
+
+ -
+
+
+ -
+
+
+ -
+
+
+ -
+
+
+
+
+ -
+
+
+ -
+
+
+
+
+
\ No newline at end of file
diff --git a/Project-Starter.iml b/Project-Starter.iml
deleted file mode 100644
index 08bb910..0000000
--- a/Project-Starter.iml
+++ /dev/null
@@ -1,33 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/apus.iml b/apus.iml
new file mode 100644
index 0000000..19129c3
--- /dev/null
+++ b/apus.iml
@@ -0,0 +1,32 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/data/example.css b/data/example.css
new file mode 100644
index 0000000..dd93d52
--- /dev/null
+++ b/data/example.css
@@ -0,0 +1,25 @@
+body {
+ background-color: #f0f0f2;
+ margin: 0;
+ padding: 0;
+ font-family: -apple-system, system-ui, BlinkMacSystemFont, "Segoe UI", "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;
+
+}
+div {
+ width: 600px;
+ margin: 5em auto;
+ padding: 2em;
+ background-color: #fdfdff;
+ border-radius: 0.5em;
+ box-shadow: 2px 3px 7px 2px rgba(0,0,0,0.02);
+}
+a:link, a:visited {
+ color: #38488f;
+ text-decoration: none;
+}
+@media (max-width: 700px) {
+ div {
+ margin: 0 auto;
+ width: auto;
+ }
+}
diff --git a/data/example.html b/data/example.html
new file mode 100644
index 0000000..b62fc42
--- /dev/null
+++ b/data/example.html
@@ -0,0 +1,46 @@
+
+
+
+ Example Domain
+
+
+
+
+
+
+
+
+
+
Example Domain
+
This domain is for use in illustrative examples in documents. You may use this
+ domain in literature without prior coordination or asking for permission.
+
More information...
+
+
+
diff --git a/data/example.md b/data/example.md
new file mode 100644
index 0000000..45b29de
--- /dev/null
+++ b/data/example.md
@@ -0,0 +1,39 @@
+Heading
+=======
+
+Sub-heading
+-----------
+
+# Alternative heading #
+
+Paragraphs are separated
+by a blank line.
+
+Two spaces at the end of a line
+produce a line break.
+
+Text attributes _italic_, **bold**, `monospace`.
+
+Horizontal rule:
+
+---
+
+Bullet lists nested within numbered list:
+
+1. fruits
+ * apple
+ * banana
+2. vegetables
+ - carrot
+ - broccoli
+
+A [link](http://example.com).
+
+![Image](Icon-pictures.png "icon")
+
+> Markdown uses email-style
+characters for blockquoting.
+>
+> Multiple paragraphs need to be prepended individually.
+
+Most inline HTML tags are supported.
\ No newline at end of file
diff --git a/data/tobs.jpg b/data/tobs.jpg
deleted file mode 100644
index f1652ef..0000000
Binary files a/data/tobs.jpg and /dev/null differ
diff --git a/src/main/model/MyModel.java b/src/main/model/MyModel.java
deleted file mode 100644
index f9a3dd7..0000000
--- a/src/main/model/MyModel.java
+++ /dev/null
@@ -1,5 +0,0 @@
-package model;
-
-public class MyModel {
- // delete or rename this class!
-}
diff --git a/src/main/model/css/CssLexer.java b/src/main/model/css/CssLexer.java
new file mode 100644
index 0000000..657d3e1
--- /dev/null
+++ b/src/main/model/css/CssLexer.java
@@ -0,0 +1,63 @@
+package model.css;
+
+import java.util.ArrayList;
+
+/**
+ * This lexer splits an input by whitespace, brackets, and semicolons.
+ * Brackets and semicolons are included in the lexed output, whitespace is not.
+ *
+ * CSS, thankfully, is far more rigid and less-forgiving of errors than HTMl.
+ * It also has multiple layers of fallback for errors: ranging from: "ignore this
+ * property", to "ignore this rule", to "this isn't fucking CSS" and ignore it all.
+ *
+ * Still, even though we don't have to deal with garbage like escaped quotes (future edit: whoops, yes we do) and
+ * what not, we'll still implement our lexer with a for loop instead of split() for future optimizations.
+ */
+public class CssLexer {
+
+ public static ArrayList lex(String input) {
+ String token = "";
+ ArrayList tokens = new ArrayList<>();
+ boolean inSingleQuotes = false;
+ boolean inDoubleQuotes = false;
+ char previous = '\0';
+
+ for (char i : input.toCharArray()) {
+ // i HATE fallthrough switch statements
+ switch (i) {
+ case '{': case '}': case ';': case ':':
+ case ' ': case '\n': case '\t':
+ if (!inSingleQuotes && !inDoubleQuotes) {
+ if (!token.equals("")) {
+ tokens.add(token);
+ token = "";
+ }
+ switch (i) {
+ case '{': case '}': case ';': case ':':
+ tokens.add(Character.toString(i));
+ break;
+ case ' ': case '\n': case '\t':
+ break;
+ }
+ } else {
+ token += i;
+ }
+ break;
+ // intentional use of footgun behavior
+ case '"':
+ if (previous != '\\') {
+ inDoubleQuotes = !inDoubleQuotes;
+ }
+ case '\'':
+ if (previous != '\\') {
+ inSingleQuotes = !inSingleQuotes;
+ }
+ default:
+ token += i;
+ break;
+ }
+ previous = i;
+ }
+ return tokens;
+ }
+}
diff --git a/src/main/model/html/HtmlLexer.java b/src/main/model/html/HtmlLexer.java
new file mode 100644
index 0000000..8cad425
--- /dev/null
+++ b/src/main/model/html/HtmlLexer.java
@@ -0,0 +1,68 @@
+package model.html;
+
+import java.util.ArrayList;
+
+/**
+ * We'll tokenize HTML by tags: disregarding the contents of the tag and attributes within the tag.
+ * The file is also considered to be free-form here: whitespace duplicates are disregarded.
+ */
+public class HtmlLexer {
+
+ // Takes a String of raw HTML, and tokenizes it for our parser.
+ public static ArrayList lex(String input) {
+ String token = "";
+ ArrayList tokens = new ArrayList<>();
+ boolean inTag = false;
+ boolean inSingleQuotes = false;
+ boolean inDoubleQuotes = false;
+
+ for (char i : input.toCharArray()) {
+ token += i;
+ switch (i) {
+ case '<':
+ if (!inSingleQuotes && !inDoubleQuotes) {
+ inTag = true;
+ if (!token.equals("<")) {
+ tokens.add(token.substring(0, token.length() - 1));
+ token = "<";
+ }
+ } else if (inTag) {
+ System.out.printf("Probably failing parser");
+ }
+ break;
+ case '>':
+ if (!inSingleQuotes && !inDoubleQuotes) {
+ if (!inTag) {
+ System.out.printf("Probably failing parser");
+ }
+ inTag = false;
+ tokens.add(token);
+ token = "";
+ }
+ break;
+ case '"':
+ if (!inSingleQuotes) {
+ inDoubleQuotes = !inDoubleQuotes;
+ }
+ break;
+ case '\'':
+ if (!inDoubleQuotes) {
+ inSingleQuotes = !inSingleQuotes;
+ }
+ break;
+ }
+ }
+ /**
+ * When lexing invalid HTML: we may end up with trailing garbage: either an unfinished tag or extra text
+ * (those are the only two options since this is just the lex step)
+ */
+ if (!token.equals("")) {
+ if (inTag) {
+ tokens.add(token + ">");
+ } else {
+ tokens.add(token);
+ }
+ }
+ return tokens;
+ }
+}
diff --git a/src/main/model/util/AbstractTree.java b/src/main/model/util/AbstractTree.java
new file mode 100644
index 0000000..4c74732
--- /dev/null
+++ b/src/main/model/util/AbstractTree.java
@@ -0,0 +1,35 @@
+package model.util;
+
+import org.javatuples.*;
+
+import java.util.*;
+
+// Utility class for a general tree: we'll be using these a lot
+public abstract class AbstractTree {
+
+ // An AbstractTree holds some kind of data; we'll want this to be generic
+ // e.g. a tag, attributes, a tag and attributes, etc
+ private T data;
+ // Since it's a tree every node also has children.
+ private ArrayList> children;
+
+ // future implementations may want to consider adding an Optional<> parent; or an Optional<> prevSibling
+
+ public T getData() {
+ return data;
+ }
+
+ public ArrayList> getChildren() {
+ return children;
+ }
+
+ // god so much boilerplate
+ public AbstractTree(T data, ArrayList> children) {
+ this.data = data;
+ this.children = children;
+ }
+
+ public void addChild(AbstractTree child) {
+ this.children.add(child);
+ }
+}
diff --git a/src/main/model/util/Lexer.java b/src/main/model/util/Lexer.java
new file mode 100644
index 0000000..b35caa6
--- /dev/null
+++ b/src/main/model/util/Lexer.java
@@ -0,0 +1,58 @@
+package model.util;
+
+import java.util.*;
+
+// General-purpose Lexer
+public class Lexer {
+
+ // private static final Set whitespace = new HashSet(" ", "\n");
+
+ // unused, helper function for if we implement finding identifers longer than a character
+ private static int longestDelimiter(Set delimiters) {
+ int longestDelimiter = 0;
+ for (String delimiter : delimiters) {
+ if (delimiter.length() > longestDelimiter) {
+ longestDelimiter = delimiter.length();
+ }
+ }
+ return longestDelimiter;
+ }
+
+ /**
+ * Lexes a "free-form" language. "free-form" has a specific meaning here that's important to preserve:
+ * "free-form" means that _additional_ whitespace characters do not affect the language: e.g. two newlines
+ * instead of one, four spaces instead of two, etc. They are _not_ "whitespace-insensitive", which is usually
+ * a misnomer.
+ * The name's a bit of a joke: free-form languages are generally referred to as whitespace-insensitive -->
+ * insensitive == rude. Jokes are funnier when you have to explain them.
+ * Also, insensitiveLex() and freeformLex() aren't really that good of names.
+ *
+ * NOTE: This lexer only works with single-character deliminators.
+ * TODO: deduplicate whitespace
+ */
+ // public static ArrayList rudeLex(String input, Set delimiters) {}
+
+ /**
+ * We might as well implement a lexer for non-free-form languages, but whatever. We won't use it.
+ */
+ public static ArrayList sensitiveLex(String input, Set delimiters) {
+ // int longestDelimiter = longestDelimiter(delimiters);
+
+ ArrayList tokens = new ArrayList();
+ String currentToken = "";
+ // terrible c-style for loop because we may need to manipulate the index in the future
+ for (int i = 0; i < input.length(); i++) {
+ char nextToken = input.charAt(i);
+ if (delimiters.contains(nextToken)) {
+ if (!currentToken.equals("")) {
+ tokens.add(currentToken);
+ }
+ tokens.add(Character.toString(nextToken));
+ currentToken = "";
+ } else {
+ currentToken += input.charAt(i);
+ }
+ }
+ return tokens;
+ }
+}
diff --git a/src/test/model/CssLexerTest.java b/src/test/model/CssLexerTest.java
new file mode 100644
index 0000000..4ed28e2
--- /dev/null
+++ b/src/test/model/CssLexerTest.java
@@ -0,0 +1,67 @@
+package model;
+
+import model.css.CssLexer;
+import org.junit.jupiter.api.Test;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.Arrays;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+class CssLexerTest {
+
+ @Test
+ void testIdiomaticHtml() {
+ try {
+ String idiomaticCss = Files.readString(Path.of("data/example.css"));
+ String[] expected = {"body", "{", "background-color", ":", "#f0f0f2", ";", "margin", ":", "0", ";", "padding", ":", "0", ";", "font-family", ":", "-apple-system,", "system-ui,", "BlinkMacSystemFont,", "\"Segoe UI\",", "\"Open Sans\",", "\"Helvetica Neue\",", "Helvetica,", "Arial,", "sans-serif", ";", "}", "div", "{", "width", ":", "600px", ";", "margin", ":", "5em", "auto", ";", "padding", ":", "2em", ";", "background-color", ":", "#fdfdff", ";", "border-radius", ":", "0.5em", ";", "box-shadow", ":", "2px", "3px", "7px", "2px", "rgba(0,0,0,0.02)", ";", "}", "a", ":", "link,", "a", ":", "visited", "{", "color", ":", "#38488f", ";", "text-decoration", ":", "none", ";", "}", "@media", "(max-width", ":", "700px)", "{", "div", "{", "margin", ":", "0", "auto", ";", "width", ":", "auto", ";", "}", "}"};
+
+ assertEquals(CssLexer.lex(idiomaticCss), Arrays.asList(expected));
+ for (String i : CssLexer.lex(idiomaticCss)) {
+ System.out.print("\"");
+ System.out.print(i);
+ System.out.print("\", ");
+ }
+ } catch (IOException e) {
+ System.out.printf("fuck %s\n", e.toString());
+ System.out.println(System.getProperty("user.dir"));
+ }
+ }
+/**
+ FoodServicesCard c1;
+ FoodServicesCard c2;
+ FoodServicesCard c3;
+
+ @BeforeEach
+ void runBefore() {
+ c1 = new FoodServicesCard(0);
+ c2 = new FoodServicesCard(100);
+ c3 = new FoodServicesCard(2000);
+ }
+
+ @Test
+ void testReloadingAndPurchasing() {
+ assertFalse(c1.makePurchase(100));
+ assertEquals(c1.getBalance(), 0);
+ c2.reload(10);
+ assertEquals(c2.getBalance(), 110);
+ assertTrue(c3.makePurchase(1400));
+ assertEquals(c3.getBalance(), 600);
+ }
+
+ @Test
+ void testRewardPoints() {
+ if (c1.makePurchase(c1.POINTS_NEEDED_FOR_CASH_BACK / 2)) {
+ assertEquals(c1.getRewardPoints(), (c1.POINTS_NEEDED_FOR_CASH_BACK / 2));
+ } else {
+ assertEquals(c1.getRewardPoints(), 0);
+ }
+ c2.makePurchase(c2.POINTS_NEEDED_FOR_CASH_BACK);
+ assertEquals(c2.getRewardPoints(), 0);
+ c3.makePurchase(1200);
+ assertEquals(c3.getRewardPoints(), 1200 % c3.POINTS_NEEDED_FOR_CASH_BACK);
+ }
+ */
+}
\ No newline at end of file
diff --git a/src/test/model/HtmlLexerTest.java b/src/test/model/HtmlLexerTest.java
new file mode 100644
index 0000000..9dd5574
--- /dev/null
+++ b/src/test/model/HtmlLexerTest.java
@@ -0,0 +1,69 @@
+package model;
+
+import model.html.HtmlLexer;
+
+import org.junit.jupiter.api.*;
+
+import java.util.Arrays;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+class HtmlLexerTest {
+ String idiomaticHtml = "Hello,world!
";
+ String brokenHtml = "","","","","","","Hello,world!","
","",""};
+ assertEquals(HtmlLexer.lex(idiomaticHtml), Arrays.asList(idiomaticHtmlArray));
+ }
+
+ @Test
+ void testBrokenHtml() {
+ String[] brokenHtmlArray = {"","",""," ",""};
+ assertEquals(HtmlLexer.lex(brokenHtml), Arrays.asList(brokenHtmlArray));
+ }
+
+ @Test
+ void testTrailingTextHtml() {
+ String[] trailingTextHtmlArray = {"","",""," ","ba"};
+ assertEquals(HtmlLexer.lex(trailingTextHtml), Arrays.asList(trailingTextHtmlArray));
+ }
+
+/**
+ FoodServicesCard c1;
+ FoodServicesCard c2;
+ FoodServicesCard c3;
+
+ @BeforeEach
+ void runBefore() {
+ c1 = new FoodServicesCard(0);
+ c2 = new FoodServicesCard(100);
+ c3 = new FoodServicesCard(2000);
+ }
+
+ @Test
+ void testReloadingAndPurchasing() {
+ assertFalse(c1.makePurchase(100));
+ assertEquals(c1.getBalance(), 0);
+ c2.reload(10);
+ assertEquals(c2.getBalance(), 110);
+ assertTrue(c3.makePurchase(1400));
+ assertEquals(c3.getBalance(), 600);
+ }
+
+ @Test
+ void testRewardPoints() {
+ if (c1.makePurchase(c1.POINTS_NEEDED_FOR_CASH_BACK / 2)) {
+ assertEquals(c1.getRewardPoints(), (c1.POINTS_NEEDED_FOR_CASH_BACK / 2));
+ } else {
+ assertEquals(c1.getRewardPoints(), 0);
+ }
+ c2.makePurchase(c2.POINTS_NEEDED_FOR_CASH_BACK);
+ assertEquals(c2.getRewardPoints(), 0);
+ c3.makePurchase(1200);
+ assertEquals(c3.getRewardPoints(), 1200 % c3.POINTS_NEEDED_FOR_CASH_BACK);
+ }
+ */
+}
\ No newline at end of file
diff --git a/src/test/model/MyModelTest.java b/src/test/model/MyModelTest.java
deleted file mode 100644
index c41f32e..0000000
--- a/src/test/model/MyModelTest.java
+++ /dev/null
@@ -1,7 +0,0 @@
-package model;
-
-import static org.junit.jupiter.api.Assertions.*;
-
-class MyModelTest {
- // delete or rename this class!
-}
\ No newline at end of file
--
cgit v1.2.3-70-g09d2