aboutsummaryrefslogtreecommitdiff
path: root/src/main/model/util/Lexer.java
blob: b35caa6e5276b8c67868b9cb36d967a6624f3f4c (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
package model.util;

import java.util.*;

// General-purpose Lexer
public class Lexer {

    // private static final Set<String> whitespace = new HashSet<String>(" ", "\n");

    // unused, helper function for if we implement finding identifers longer than a character
    private static int longestDelimiter(Set<String> delimiters) {
        int longestDelimiter = 0;
        for (String delimiter : delimiters) {
            if (delimiter.length() > longestDelimiter) {
                longestDelimiter = delimiter.length();
            }
        }
        return longestDelimiter;
    }

    /**
     * Lexes a "free-form" language. "free-form" has a specific meaning here that's important to preserve:
     * "free-form" means that _additional_ whitespace characters do not affect the language: e.g. two newlines
     * instead of one, four spaces instead of two, etc. They are _not_ "whitespace-insensitive", which is usually
     * a misnomer.
     * The name's a bit of a joke: free-form languages are generally referred to as whitespace-insensitive -->
     * insensitive == rude. Jokes are funnier when you have to explain them.
     * Also, insensitiveLex() and freeformLex() aren't really that good of names.
     *
     * NOTE: This lexer only works with single-character deliminators.
     * TODO: deduplicate whitespace
     */
    // public static ArrayList<String> rudeLex(String input, Set<Character> delimiters) {}

    /**
     * We might as well implement a lexer for non-free-form languages, but whatever. We won't use it.
     */
    public static ArrayList<String> sensitiveLex(String input, Set<Character> delimiters) {
        // int longestDelimiter = longestDelimiter(delimiters);

        ArrayList<String> tokens = new ArrayList<String>();
        String currentToken = "";
        // terrible c-style for loop because we may need to manipulate the index in the future
        for (int i = 0; i < input.length(); i++) {
            char nextToken = input.charAt(i);
            if (delimiters.contains(nextToken)) {
                if (!currentToken.equals("")) {
                    tokens.add(currentToken);
                }
                tokens.add(Character.toString(nextToken));
                currentToken = "";
            } else {
                currentToken += input.charAt(i);
            }
        }
        return tokens;
    }
}