diff --git a/core/src/main/java/it/cavallium/warppi/math/rules/dsl/frontend/Lexer.java b/core/src/main/java/it/cavallium/warppi/math/rules/dsl/frontend/Lexer.java new file mode 100644 index 00000000..ea0f5ccc --- /dev/null +++ b/core/src/main/java/it/cavallium/warppi/math/rules/dsl/frontend/Lexer.java @@ -0,0 +1,138 @@ +package it.cavallium.warppi.math.rules.dsl.frontend; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.function.Function; +import java.util.function.Predicate; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import static it.cavallium.warppi.math.rules.dsl.frontend.TokenType.*; + +/** + * Converts the source string to a list of tokens. + */ +public class Lexer { + private static final Map keywords = Stream.of( + REDUCTION, EXPANSION, CALCULATION, EXISTENCE, + ARCCOS, ARCSIN, ARCTAN, COS, SIN, TAN, ROOT, SQRT, LOG, + UNDEFINED, PI, E + ).collect(Collectors.toMap( + tokenType -> tokenType.name().toLowerCase(), + Function.identity() + )); + + private final String source; + private final List tokens = new ArrayList<>(); + private int startOfLexeme = 0; + private int curPosition = 0; + + public Lexer(final String source) { + this.source = source; + } + + public List lex() { + while (!atEnd()) { + startOfLexeme = curPosition; + lexToken(); + } + tokens.add(new Token(EOF, "", source.length())); + return tokens; + } + + private void lexToken() { + char current = popChar(); + switch (current) { + case ':': emitToken(COLON); break; + case ',': emitToken(COMMA); break; + case '(': emitToken(LEFT_PAREN); break; + case ')': emitToken(RIGHT_PAREN); break; + case '[': emitToken(LEFT_BRACKET); break; + case ']': emitToken(RIGHT_BRACKET); break; + case '=': emitToken(EQUALS); break; + case '*': emitToken(TIMES); break; + case '/': emitToken(DIVIDE); break; + case '^': emitToken(POWER); break; + + case '+': + if (matchChar('-')) { + emitToken(PLUS_MINUS); + } else { + emitToken(PLUS); + } + break; + + case '-': + if (matchChar('>')) { + emitToken(ARROW); + } else { + emitToken(MINUS); + } + break; + + default: + if (isAsciiDigit(current)) { + number(); + } else if (Character.isJavaIdentifierStart(current)) { + keywordOrIdentifier(); + } else if (!Character.isWhitespace(current)) { + throw new RuntimeException("Unexpected character " + current); + } + } + } + + private void number() { + matchWhile(Lexer::isAsciiDigit); + if (matchChar('.') && matchWhile(Lexer::isAsciiDigit) == 0) { + throw new RuntimeException("Expected digits after decimal separator"); + } + emitToken(NUMBER); + } + + private void keywordOrIdentifier() { + matchWhile(Character::isJavaIdentifierPart); + TokenType type = keywords.getOrDefault(currentLexeme(), IDENTIFIER); + emitToken(type); + } + + private char popChar() { + char current = source.charAt(curPosition); + curPosition++; + return current; + } + + private boolean matchChar(char expected) { + if (atEnd() || source.charAt(curPosition) != expected) { + return false; + } + curPosition++; + return true; + } + + private int matchWhile(Predicate predicate) { + int matched = 0; + while (!atEnd() && predicate.test(source.charAt(curPosition))) { + curPosition++; + matched++; + } + return matched; + } + + private void emitToken(TokenType type) { + tokens.add(new Token(type, currentLexeme(), startOfLexeme)); + } + + private String currentLexeme() { + return source.substring(startOfLexeme, curPosition); + } + + private boolean atEnd() { + return curPosition >= source.length(); + } + + // Character.isDigit also allows various Unicode digits + private static boolean isAsciiDigit(char c) { + return '0' <= c && c <= '9'; + } +} diff --git a/core/src/test/java/it/cavallium/warppi/math/rules/dsl/frontend/LexerTest.java b/core/src/test/java/it/cavallium/warppi/math/rules/dsl/frontend/LexerTest.java new file mode 100644 index 00000000..5eea2ccf --- /dev/null +++ b/core/src/test/java/it/cavallium/warppi/math/rules/dsl/frontend/LexerTest.java @@ -0,0 +1,82 @@ +package it.cavallium.warppi.math.rules.dsl.frontend; + +import org.junit.Test; + +import java.util.Arrays; +import java.util.List; + +import static it.cavallium.warppi.math.rules.dsl.frontend.TokenType.*; +import static org.junit.Assert.*; + +public class LexerTest { + @Test + public void validRule() { + final Lexer lexer = new Lexer( + "reduction TestRule_123:\n" + + " x + y * z = -(a_123 +- 3 / 2.2) -> [\n" + + " x^a_123 = cos(pi) - log(e, e),\n" + + " undefined,\n" + + "]\n" + ); + final List expected = Arrays.asList( + new Token(REDUCTION, "reduction", 0), + new Token(IDENTIFIER, "TestRule_123", 10), + new Token(COLON, ":", 22), + new Token(IDENTIFIER, "x", 26), + new Token(PLUS, "+", 28), + new Token(IDENTIFIER, "y", 30), + new Token(TIMES, "*", 32), + new Token(IDENTIFIER, "z", 34), + new Token(EQUALS, "=", 36), + new Token(MINUS, "-", 38), + new Token(LEFT_PAREN, "(", 39), + new Token(IDENTIFIER, "a_123", 40), + new Token(PLUS_MINUS, "+-", 46), + new Token(NUMBER, "3", 49), + new Token(DIVIDE, "/", 51), + new Token(NUMBER, "2.2", 53), + new Token(RIGHT_PAREN, ")", 56), + new Token(ARROW, "->", 58), + new Token(LEFT_BRACKET, "[", 61), + new Token(IDENTIFIER, "x", 67), + new Token(POWER, "^", 68), + new Token(IDENTIFIER, "a_123", 69), + new Token(EQUALS, "=", 75), + new Token(COS, "cos", 77), + new Token(LEFT_PAREN, "(", 80), + new Token(PI, "pi", 81), + new Token(RIGHT_PAREN, ")", 83), + new Token(MINUS, "-", 85), + new Token(LOG, "log", 87), + new Token(LEFT_PAREN, "(", 90), + new Token(E, "e", 91), + new Token(COMMA, ",", 92), + new Token(E, "e", 94), + new Token(RIGHT_PAREN, ")", 95), + new Token(COMMA, ",", 96), + new Token(UNDEFINED, "undefined", 102), + new Token(COMMA, ",", 111), + new Token(RIGHT_BRACKET, "]", 113), + new Token(EOF, "", 115) + ); + assertEquals(expected, lexer.lex()); + } + + @Test(expected = RuntimeException.class) + public void incompleteNumberOtherChar() { + final Lexer lexer = new Lexer("2. 5"); + lexer.lex(); + } + + @Test(expected = RuntimeException.class) + public void incompleteNumberEof() { + final Lexer lexer = new Lexer("2."); + lexer.lex(); + } + + @Test(expected = RuntimeException.class) + public void meaninglessCharacter() { + final Lexer lexer = new Lexer("@"); + lexer.lex(); + } +}