Implement multiple error reporting and recovery in Lexer
This commit is contained in:
parent
1e0d2e5a0e
commit
f0d2cdc1ab
@ -5,6 +5,7 @@ import it.cavallium.warppi.math.rules.dsl.frontend.Lexer;
|
||||
import it.cavallium.warppi.math.rules.dsl.frontend.Parser;
|
||||
import it.cavallium.warppi.math.rules.dsl.patterns.SubFunctionPattern;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
@ -13,7 +14,9 @@ public class RulesDsl {
|
||||
private RulesDsl() {}
|
||||
|
||||
public static List<Rule> makeRules(final String source) {
|
||||
final Lexer lexer = new Lexer(source);
|
||||
final List<DslException> errors = new ArrayList<>();
|
||||
|
||||
final Lexer lexer = new Lexer(source, errors::add);
|
||||
final Parser parser = new Parser(lexer.lex());
|
||||
final List<PatternRule> rules = parser.parse();
|
||||
|
||||
@ -21,6 +24,10 @@ public class RulesDsl {
|
||||
checkSubFunctionsDefined(rule);
|
||||
}
|
||||
|
||||
if (!errors.isEmpty()) {
|
||||
throw new RuntimeException("Errors in DSL source code");
|
||||
}
|
||||
|
||||
return Collections.unmodifiableList(rules);
|
||||
}
|
||||
|
||||
|
@ -0,0 +1,51 @@
|
||||
package it.cavallium.warppi.math.rules.dsl.frontend;
|
||||
|
||||
import it.cavallium.warppi.math.rules.dsl.DslException;
|
||||
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* Thrown when DSL source code contains a number literal with a decimal separator which is not followed by digits.
|
||||
* <p>
|
||||
* An example of an incomplete literal is <code>2.</code>, while <code>2</code> and <code>2.2</code> are valid.
|
||||
*/
|
||||
public class IncompleteNumberLiteralException extends DslException {
|
||||
private final int position;
|
||||
private final String literal;
|
||||
|
||||
public IncompleteNumberLiteralException(final int position, final String literal) {
|
||||
this.position = position;
|
||||
this.literal = literal;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getPosition() {
|
||||
return position;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getLength() {
|
||||
return literal.length();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return The incomplete number literal.
|
||||
*/
|
||||
public String getLiteral() {
|
||||
return literal;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(final Object o) {
|
||||
if (!(o instanceof IncompleteNumberLiteralException)) {
|
||||
return false;
|
||||
}
|
||||
final IncompleteNumberLiteralException other = (IncompleteNumberLiteralException) o;
|
||||
return this.position == other.position && this.literal.equals(other.literal);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(position, literal);
|
||||
}
|
||||
}
|
@ -1,8 +1,11 @@
|
||||
package it.cavallium.warppi.math.rules.dsl.frontend;
|
||||
|
||||
import it.cavallium.warppi.math.rules.dsl.DslException;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.function.Consumer;
|
||||
import java.util.function.Function;
|
||||
import java.util.function.Predicate;
|
||||
import java.util.stream.Collectors;
|
||||
@ -24,24 +27,56 @@ public class Lexer {
|
||||
));
|
||||
|
||||
private final String source;
|
||||
private final Consumer<? super DslException> errorReporter;
|
||||
|
||||
private final List<Token> tokens = new ArrayList<>();
|
||||
private int startOfLexeme = 0;
|
||||
private int curPosition = 0;
|
||||
private UnexpectedCharactersException unexpectedCharacters = null;
|
||||
|
||||
public Lexer(final String source) {
|
||||
public Lexer(final String source, final Consumer<? super DslException> errorReporter) {
|
||||
this.source = source;
|
||||
this.errorReporter = errorReporter;
|
||||
}
|
||||
|
||||
public List<Token> lex() {
|
||||
while (!atEnd()) {
|
||||
startOfLexeme = curPosition;
|
||||
lexToken();
|
||||
lexAndHandleErrors();
|
||||
}
|
||||
// lexAndHandleErrors reports unexpected characters when they're followed by expected ones:
|
||||
// if there are unexpected characters at the end of the source, they have to be reported here
|
||||
reportAndClearUnexpectedCharacters();
|
||||
tokens.add(new Token(EOF, "", source.length()));
|
||||
return tokens;
|
||||
}
|
||||
|
||||
private void lexToken() {
|
||||
private void lexAndHandleErrors() {
|
||||
try {
|
||||
lexToken();
|
||||
reportAndClearUnexpectedCharacters(); // After finding some expected characters
|
||||
} catch (UnexpectedCharactersException e) {
|
||||
if (unexpectedCharacters == null) {
|
||||
unexpectedCharacters = e;
|
||||
} else {
|
||||
unexpectedCharacters = unexpectedCharacters.concat(e);
|
||||
}
|
||||
} catch (IncompleteNumberLiteralException e) {
|
||||
// If there are multiple errors, report them in the order in which they occur in the source
|
||||
reportAndClearUnexpectedCharacters();
|
||||
errorReporter.accept(e);
|
||||
}
|
||||
}
|
||||
|
||||
private void reportAndClearUnexpectedCharacters() {
|
||||
if (unexpectedCharacters == null) {
|
||||
return;
|
||||
}
|
||||
errorReporter.accept(unexpectedCharacters);
|
||||
unexpectedCharacters = null;
|
||||
}
|
||||
|
||||
private void lexToken() throws UnexpectedCharactersException, IncompleteNumberLiteralException {
|
||||
char current = popChar();
|
||||
switch (current) {
|
||||
case ':': emitToken(COLON); break;
|
||||
@ -86,7 +121,7 @@ public class Lexer {
|
||||
} else if (Character.isJavaIdentifierStart(current)) {
|
||||
keywordOrIdentifier();
|
||||
} else if (!Character.isWhitespace(current)) {
|
||||
throw new RuntimeException("Unexpected character " + current);
|
||||
throw new UnexpectedCharactersException(curPosition - 1, String.valueOf(current));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -101,10 +136,10 @@ public class Lexer {
|
||||
}
|
||||
}
|
||||
|
||||
private void number() {
|
||||
private void number() throws IncompleteNumberLiteralException {
|
||||
matchWhile(Lexer::isAsciiDigit);
|
||||
if (matchChar('.') && matchWhile(Lexer::isAsciiDigit) == 0) {
|
||||
throw new RuntimeException("Expected digits after decimal separator");
|
||||
throw new IncompleteNumberLiteralException(startOfLexeme, currentLexeme());
|
||||
}
|
||||
emitToken(NUMBER);
|
||||
}
|
||||
|
@ -0,0 +1,53 @@
|
||||
package it.cavallium.warppi.math.rules.dsl.frontend;
|
||||
|
||||
import it.cavallium.warppi.math.rules.dsl.DslException;
|
||||
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* Thrown when DSL source code contains one or more (consecutive) characters which are not expected by the lexer.
|
||||
*/
|
||||
public class UnexpectedCharactersException extends DslException {
|
||||
private final int position;
|
||||
private final String unexpectedCharacters;
|
||||
|
||||
public UnexpectedCharactersException(final int position, final String unexpectedCharacters) {
|
||||
this.position = position;
|
||||
this.unexpectedCharacters = unexpectedCharacters;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getPosition() {
|
||||
return position;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getLength() {
|
||||
return unexpectedCharacters.length();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return The string of one or more consecutive unexpected characters.
|
||||
*/
|
||||
public String getUnexpectedCharacters() {
|
||||
return unexpectedCharacters;
|
||||
}
|
||||
|
||||
UnexpectedCharactersException concat(UnexpectedCharactersException other) {
|
||||
return new UnexpectedCharactersException(this.position, this.unexpectedCharacters + other.unexpectedCharacters);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(final Object o) {
|
||||
if (!(o instanceof UnexpectedCharactersException)) {
|
||||
return false;
|
||||
}
|
||||
final UnexpectedCharactersException other = (UnexpectedCharactersException) o;
|
||||
return this.position == other.position && this.unexpectedCharacters.equals(other.unexpectedCharacters);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(position, unexpectedCharacters);
|
||||
}
|
||||
}
|
@ -54,7 +54,7 @@ public class RulesDslTest {
|
||||
|
||||
@Test(expected = RuntimeException.class)
|
||||
public void lexerError() {
|
||||
RulesDsl.makeRules("2. 5");
|
||||
RulesDsl.makeRules("reduction test: 2. 5 -> 1");
|
||||
}
|
||||
|
||||
@Test(expected = RuntimeException.class)
|
||||
|
@ -1,14 +1,25 @@
|
||||
package it.cavallium.warppi.math.rules.dsl.frontend;
|
||||
|
||||
import it.cavallium.warppi.math.rules.dsl.DslException;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
import static it.cavallium.warppi.math.rules.dsl.frontend.TokenType.*;
|
||||
import static org.junit.Assert.*;
|
||||
|
||||
public class LexerTest {
|
||||
private final List<DslException> errors = new ArrayList<>();
|
||||
|
||||
@Before
|
||||
public void setUp() {
|
||||
errors.clear();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void validRule() {
|
||||
final Lexer lexer = new Lexer(
|
||||
@ -16,7 +27,8 @@ public class LexerTest {
|
||||
" x + y * z = -(a_123 +- 3 / 2.2) -> [\n" +
|
||||
" x^a_123 = cos(pi) - log(e, e), // comment\n" +
|
||||
" undefined, /*\n" +
|
||||
"comment */ ]\n"
|
||||
"comment */ ]\n",
|
||||
errors::add
|
||||
);
|
||||
final List<Token> expected = Arrays.asList(
|
||||
new Token(REDUCTION, "reduction", 0),
|
||||
@ -60,23 +72,75 @@ public class LexerTest {
|
||||
new Token(EOF, "", 140)
|
||||
);
|
||||
assertEquals(expected, lexer.lex());
|
||||
assertTrue(errors.isEmpty());
|
||||
}
|
||||
|
||||
@Test(expected = RuntimeException.class)
|
||||
@Test
|
||||
public void incompleteNumberOtherChar() {
|
||||
final Lexer lexer = new Lexer("2. 5");
|
||||
lexer.lex();
|
||||
final Lexer lexer = new Lexer("2. 5 + 3", errors::add);
|
||||
|
||||
final List<Token> expectedTokens = Arrays.asList(
|
||||
new Token(NUMBER, "5", 3),
|
||||
new Token(PLUS, "+", 5),
|
||||
new Token(NUMBER, "3", 7),
|
||||
new Token(EOF, "", 8)
|
||||
);
|
||||
assertEquals(expectedTokens, lexer.lex());
|
||||
|
||||
final List<DslException> expectedErrors = Collections.singletonList(
|
||||
new IncompleteNumberLiteralException(0, "2.")
|
||||
);
|
||||
assertEquals(expectedErrors, errors);
|
||||
}
|
||||
|
||||
@Test(expected = RuntimeException.class)
|
||||
@Test
|
||||
public void incompleteNumberEof() {
|
||||
final Lexer lexer = new Lexer("2.");
|
||||
lexer.lex();
|
||||
final Lexer lexer = new Lexer("2.", errors::add);
|
||||
|
||||
final List<Token> expectedTokens = Collections.singletonList(
|
||||
new Token(EOF, "", 2)
|
||||
);
|
||||
assertEquals(expectedTokens, lexer.lex());
|
||||
|
||||
final List<DslException> expectedErrors = Collections.singletonList(
|
||||
new IncompleteNumberLiteralException(0, "2.")
|
||||
);
|
||||
assertEquals(expectedErrors, errors);
|
||||
}
|
||||
|
||||
@Test(expected = RuntimeException.class)
|
||||
public void meaninglessCharacter() {
|
||||
final Lexer lexer = new Lexer("@");
|
||||
lexer.lex();
|
||||
@Test
|
||||
public void unexpectedCharacters() {
|
||||
final Lexer lexer = new Lexer("reduction @| .: {}", errors::add);
|
||||
|
||||
final List<Token> expectedTokens = Arrays.asList(
|
||||
new Token(REDUCTION, "reduction", 0),
|
||||
new Token(COLON, ":", 14),
|
||||
new Token(EOF, "", 18)
|
||||
);
|
||||
assertEquals(expectedTokens, lexer.lex());
|
||||
|
||||
final List<DslException> expectedErrors = Arrays.asList(
|
||||
new UnexpectedCharactersException(10, "@|"),
|
||||
new UnexpectedCharactersException(13, "."),
|
||||
new UnexpectedCharactersException(16, "{}")
|
||||
);
|
||||
assertEquals(expectedErrors, errors);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void errorOrder() {
|
||||
final Lexer lexer = new Lexer(".2. @", errors::add);
|
||||
|
||||
final List<Token> expectedTokens = Collections.singletonList(
|
||||
new Token(EOF, "", 5)
|
||||
);
|
||||
assertEquals(expectedTokens, lexer.lex());
|
||||
|
||||
final List<DslException> expectedErrors = Arrays.asList(
|
||||
new UnexpectedCharactersException(0, "."),
|
||||
new IncompleteNumberLiteralException(1, "2."),
|
||||
new UnexpectedCharactersException(4, "@")
|
||||
);
|
||||
assertEquals(expectedErrors, errors);
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user