Basic prototypes of HTML/CSS lexers

author: j-james 2022-10-17 06:25:45 +0000
committer: j-james 2022-10-17 06:27:55 +0000
commit: 3e9bb5fae16c35938bc1f7f7669c12cc355c9331 (patch)
tree: 82e1ab837579e7762071ea97c064c0750a38c106 /src/main/model/css
parent: 0845be5ec0215fb43f9dbdef00b22a733d4080b3 (diff)
1 files changed, 63 insertions, 0 deletions
diff --git a/src/main/model/css/CssLexer.java b/src/main/model/css/CssLexer.java
new file mode 100644
index 0000000..657d3e1
--- /dev/null
+++ b/src/main/model/css/CssLexer.java
@@ -0,0 +1,63 @@
+package model.css;
+
+import java.util.ArrayList;
+
+/**
+ * This lexer splits an input by whitespace, brackets, and semicolons.
+ * Brackets and semicolons are included in the lexed output, whitespace is not.
+ * <br>
+ * CSS, thankfully, is far more rigid and less-forgiving of errors than HTMl.
+ * It also has multiple layers of fallback for errors: ranging from: "ignore this
+ * property", to "ignore this rule", to "this isn't fucking CSS" and ignore it all.
+ * <br>
+ * Still, even though we don't have to deal with garbage like escaped quotes (future edit: whoops, yes we do) and
+ * what not, we'll still implement our lexer with a for loop instead of split() for future optimizations.
+ */
+public class CssLexer {
+
+    public static ArrayList<String> lex(String input) {
+        String token = "";
+        ArrayList<String> tokens = new ArrayList<>();
+        boolean inSingleQuotes = false;
+        boolean inDoubleQuotes = false;
+        char previous = '\0';
+
+        for (char i : input.toCharArray()) {
+            // i HATE fallthrough switch statements
+            switch (i) {
+                case '{': case '}': case ';': case ':':
+                case ' ': case '\n': case '\t':
+                    if (!inSingleQuotes && !inDoubleQuotes) {
+                        if (!token.equals("")) {
+                            tokens.add(token);
+                            token = "";
+                        }
+                        switch (i) {
+                            case '{': case '}': case ';': case ':':
+                                tokens.add(Character.toString(i));
+                                break;
+                            case ' ': case '\n': case '\t':
+                                break;
+                        }
+                    } else {
+                        token += i;
+                    }
+                    break;
+                // intentional use of footgun behavior
+                case '"':
+                    if (previous != '\\') {
+                        inDoubleQuotes = !inDoubleQuotes;
+                    }
+                case '\'':
+                    if (previous != '\\') {
+                        inSingleQuotes = !inSingleQuotes;
+                    }
+                default:
+                    token += i;
+                    break;
+            }
+            previous = i;
+        }
+        return tokens;
+    }
+}
author	j-james	2022-10-17 06:25:45 +0000
committer	j-james	2022-10-17 06:27:55 +0000
commit	3e9bb5fae16c35938bc1f7f7669c12cc355c9331 (patch)
tree	82e1ab837579e7762071ea97c064c0750a38c106 /src/main/model/css
parent	0845be5ec0215fb43f9dbdef00b22a733d4080b3 (diff)