package model.css;
import java.util.ArrayList;
/**
* This lexer splits an input by whitespace, brackets, and semicolons.
* Brackets and semicolons are included in the lexed output, whitespace is not.
*
* CSS, thankfully, is far more rigid and less-forgiving of errors than HTMl.
* It also has multiple layers of fallback for errors: ranging from: "ignore this
* property", to "ignore this rule", to "this isn't fucking CSS" and ignore it all.
*
* Still, even though we don't have to deal with garbage like escaped quotes (future edit: whoops, yes we do) and
* what not, we'll still implement our lexer with a for loop instead of split() for future optimizations.
*/
public class CssLexer {
public static ArrayList lex(String input) {
String token = "";
ArrayList tokens = new ArrayList<>();
boolean inSingleQuotes = false;
boolean inDoubleQuotes = false;
char previous = '\0';
for (char i : input.toCharArray()) {
// i HATE fallthrough switch statements
switch (i) {
case '{': case '}': case ';': case ':':
case ' ': case '\n': case '\t':
if (!inSingleQuotes && !inDoubleQuotes) {
if (!token.equals("")) {
tokens.add(token);
token = "";
}
switch (i) {
case '{': case '}': case ';': case ':':
tokens.add(Character.toString(i));
break;
case ' ': case '\n': case '\t':
break;
}
} else {
token += i;
}
break;
// intentional use of footgun behavior
case '"':
if (previous != '\\') {
inDoubleQuotes = !inDoubleQuotes;
}
case '\'':
if (previous != '\\') {
inSingleQuotes = !inSingleQuotes;
}
default:
token += i;
break;
}
previous = i;
}
return tokens;
}
}