src/main/model/css/CssLexer.java


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63

package model.css;

import java.util.ArrayList;

/**
 * This lexer splits an input by whitespace, brackets, and semicolons.
 * Brackets and semicolons are included in the lexed output, whitespace is not.
 * <br>
 * CSS, thankfully, is far more rigid and less-forgiving of errors than HTMl.
 * It also has multiple layers of fallback for errors: ranging from: "ignore this
 * property", to "ignore this rule", to "this isn't fucking CSS" and ignore it all.
 * <br>
 * Still, even though we don't have to deal with garbage like escaped quotes (future edit: whoops, yes we do) and
 * what not, we'll still implement our lexer with a for loop instead of split() for future optimizations.
 */
public class CssLexer {

    public static ArrayList<String> lex(String input) {
        String token = "";
        ArrayList<String> tokens = new ArrayList<>();
        boolean inSingleQuotes = false;
        boolean inDoubleQuotes = false;
        char previous = '\0';

        for (char i : input.toCharArray()) {
            // i HATE fallthrough switch statements
            switch (i) {
                case '{': case '}': case ';': case ':':
                case ' ': case '\n': case '\t':
                    if (!inSingleQuotes && !inDoubleQuotes) {
                        if (!token.equals("")) {
                            tokens.add(token);
                            token = "";
                        }
                        switch (i) {
                            case '{': case '}': case ';': case ':':
                                tokens.add(Character.toString(i));
                                break;
                            case ' ': case '\n': case '\t':
                                break;
                        }
                    } else {
                        token += i;
                    }
                    break;
                // intentional use of footgun behavior
                case '"':
                    if (previous != '\\') {
                        inDoubleQuotes = !inDoubleQuotes;
                    }
                case '\'':
                    if (previous != '\\') {
                        inSingleQuotes = !inSingleQuotes;
                    }
                default:
                    token += i;
                    break;
            }
            previous = i;
        }
        return tokens;
    }
}