Implement multiple error reporting and recovery in Lexer

This commit is contained in:
Riccardo Azzolini 2019-01-29 12:08:35 +01:00
parent 1e0d2e5a0e
commit f0d2cdc1ab
6 changed files with 229 additions and 19 deletions

View File

@ -5,6 +5,7 @@ import it.cavallium.warppi.math.rules.dsl.frontend.Lexer;
import it.cavallium.warppi.math.rules.dsl.frontend.Parser;
import it.cavallium.warppi.math.rules.dsl.patterns.SubFunctionPattern;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Set;
@ -13,7 +14,9 @@ public class RulesDsl {
private RulesDsl() {}
public static List<Rule> makeRules(final String source) {
final Lexer lexer = new Lexer(source);
final List<DslException> errors = new ArrayList<>();
final Lexer lexer = new Lexer(source, errors::add);
final Parser parser = new Parser(lexer.lex());
final List<PatternRule> rules = parser.parse();
@ -21,6 +24,10 @@ public class RulesDsl {
checkSubFunctionsDefined(rule);
}
if (!errors.isEmpty()) {
throw new RuntimeException("Errors in DSL source code");
}
return Collections.unmodifiableList(rules);
}

View File

@ -0,0 +1,51 @@
package it.cavallium.warppi.math.rules.dsl.frontend;
import it.cavallium.warppi.math.rules.dsl.DslException;
import java.util.Objects;
/**
* Thrown when DSL source code contains a number literal with a decimal separator which is not followed by digits.
* <p>
* An example of an incomplete literal is <code>2.</code>, while <code>2</code> and <code>2.2</code> are valid.
*/
public class IncompleteNumberLiteralException extends DslException {
private final int position;
private final String literal;
public IncompleteNumberLiteralException(final int position, final String literal) {
this.position = position;
this.literal = literal;
}
@Override
public int getPosition() {
return position;
}
@Override
public int getLength() {
return literal.length();
}
/**
* @return The incomplete number literal.
*/
public String getLiteral() {
return literal;
}
@Override
public boolean equals(final Object o) {
if (!(o instanceof IncompleteNumberLiteralException)) {
return false;
}
final IncompleteNumberLiteralException other = (IncompleteNumberLiteralException) o;
return this.position == other.position && this.literal.equals(other.literal);
}
@Override
public int hashCode() {
return Objects.hash(position, literal);
}
}

View File

@ -1,8 +1,11 @@
package it.cavallium.warppi.math.rules.dsl.frontend;
import it.cavallium.warppi.math.rules.dsl.DslException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.function.Consumer;
import java.util.function.Function;
import java.util.function.Predicate;
import java.util.stream.Collectors;
@ -24,24 +27,56 @@ public class Lexer {
));
private final String source;
private final Consumer<? super DslException> errorReporter;
private final List<Token> tokens = new ArrayList<>();
private int startOfLexeme = 0;
private int curPosition = 0;
private UnexpectedCharactersException unexpectedCharacters = null;
public Lexer(final String source) {
public Lexer(final String source, final Consumer<? super DslException> errorReporter) {
this.source = source;
this.errorReporter = errorReporter;
}
public List<Token> lex() {
while (!atEnd()) {
startOfLexeme = curPosition;
lexToken();
lexAndHandleErrors();
}
// lexAndHandleErrors reports unexpected characters when they're followed by expected ones:
// if there are unexpected characters at the end of the source, they have to be reported here
reportAndClearUnexpectedCharacters();
tokens.add(new Token(EOF, "", source.length()));
return tokens;
}
private void lexToken() {
private void lexAndHandleErrors() {
try {
lexToken();
reportAndClearUnexpectedCharacters(); // After finding some expected characters
} catch (UnexpectedCharactersException e) {
if (unexpectedCharacters == null) {
unexpectedCharacters = e;
} else {
unexpectedCharacters = unexpectedCharacters.concat(e);
}
} catch (IncompleteNumberLiteralException e) {
// If there are multiple errors, report them in the order in which they occur in the source
reportAndClearUnexpectedCharacters();
errorReporter.accept(e);
}
}
private void reportAndClearUnexpectedCharacters() {
if (unexpectedCharacters == null) {
return;
}
errorReporter.accept(unexpectedCharacters);
unexpectedCharacters = null;
}
private void lexToken() throws UnexpectedCharactersException, IncompleteNumberLiteralException {
char current = popChar();
switch (current) {
case ':': emitToken(COLON); break;
@ -86,7 +121,7 @@ public class Lexer {
} else if (Character.isJavaIdentifierStart(current)) {
keywordOrIdentifier();
} else if (!Character.isWhitespace(current)) {
throw new RuntimeException("Unexpected character " + current);
throw new UnexpectedCharactersException(curPosition - 1, String.valueOf(current));
}
}
}
@ -101,10 +136,10 @@ public class Lexer {
}
}
private void number() {
private void number() throws IncompleteNumberLiteralException {
matchWhile(Lexer::isAsciiDigit);
if (matchChar('.') && matchWhile(Lexer::isAsciiDigit) == 0) {
throw new RuntimeException("Expected digits after decimal separator");
throw new IncompleteNumberLiteralException(startOfLexeme, currentLexeme());
}
emitToken(NUMBER);
}

View File

@ -0,0 +1,53 @@
package it.cavallium.warppi.math.rules.dsl.frontend;
import it.cavallium.warppi.math.rules.dsl.DslException;
import java.util.Objects;
/**
* Thrown when DSL source code contains one or more (consecutive) characters which are not expected by the lexer.
*/
public class UnexpectedCharactersException extends DslException {
private final int position;
private final String unexpectedCharacters;
public UnexpectedCharactersException(final int position, final String unexpectedCharacters) {
this.position = position;
this.unexpectedCharacters = unexpectedCharacters;
}
@Override
public int getPosition() {
return position;
}
@Override
public int getLength() {
return unexpectedCharacters.length();
}
/**
* @return The string of one or more consecutive unexpected characters.
*/
public String getUnexpectedCharacters() {
return unexpectedCharacters;
}
UnexpectedCharactersException concat(UnexpectedCharactersException other) {
return new UnexpectedCharactersException(this.position, this.unexpectedCharacters + other.unexpectedCharacters);
}
@Override
public boolean equals(final Object o) {
if (!(o instanceof UnexpectedCharactersException)) {
return false;
}
final UnexpectedCharactersException other = (UnexpectedCharactersException) o;
return this.position == other.position && this.unexpectedCharacters.equals(other.unexpectedCharacters);
}
@Override
public int hashCode() {
return Objects.hash(position, unexpectedCharacters);
}
}

View File

@ -54,7 +54,7 @@ public class RulesDslTest {
@Test(expected = RuntimeException.class)
public void lexerError() {
RulesDsl.makeRules("2. 5");
RulesDsl.makeRules("reduction test: 2. 5 -> 1");
}
@Test(expected = RuntimeException.class)

View File

@ -1,14 +1,25 @@
package it.cavallium.warppi.math.rules.dsl.frontend;
import it.cavallium.warppi.math.rules.dsl.DslException;
import org.junit.Before;
import org.junit.Test;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import static it.cavallium.warppi.math.rules.dsl.frontend.TokenType.*;
import static org.junit.Assert.*;
public class LexerTest {
private final List<DslException> errors = new ArrayList<>();
@Before
public void setUp() {
errors.clear();
}
@Test
public void validRule() {
final Lexer lexer = new Lexer(
@ -16,7 +27,8 @@ public class LexerTest {
" x + y * z = -(a_123 +- 3 / 2.2) -> [\n" +
" x^a_123 = cos(pi) - log(e, e), // comment\n" +
" undefined, /*\n" +
"comment */ ]\n"
"comment */ ]\n",
errors::add
);
final List<Token> expected = Arrays.asList(
new Token(REDUCTION, "reduction", 0),
@ -60,23 +72,75 @@ public class LexerTest {
new Token(EOF, "", 140)
);
assertEquals(expected, lexer.lex());
assertTrue(errors.isEmpty());
}
@Test(expected = RuntimeException.class)
@Test
public void incompleteNumberOtherChar() {
final Lexer lexer = new Lexer("2. 5");
lexer.lex();
final Lexer lexer = new Lexer("2. 5 + 3", errors::add);
final List<Token> expectedTokens = Arrays.asList(
new Token(NUMBER, "5", 3),
new Token(PLUS, "+", 5),
new Token(NUMBER, "3", 7),
new Token(EOF, "", 8)
);
assertEquals(expectedTokens, lexer.lex());
final List<DslException> expectedErrors = Collections.singletonList(
new IncompleteNumberLiteralException(0, "2.")
);
assertEquals(expectedErrors, errors);
}
@Test(expected = RuntimeException.class)
@Test
public void incompleteNumberEof() {
final Lexer lexer = new Lexer("2.");
lexer.lex();
final Lexer lexer = new Lexer("2.", errors::add);
final List<Token> expectedTokens = Collections.singletonList(
new Token(EOF, "", 2)
);
assertEquals(expectedTokens, lexer.lex());
final List<DslException> expectedErrors = Collections.singletonList(
new IncompleteNumberLiteralException(0, "2.")
);
assertEquals(expectedErrors, errors);
}
@Test(expected = RuntimeException.class)
public void meaninglessCharacter() {
final Lexer lexer = new Lexer("@");
lexer.lex();
@Test
public void unexpectedCharacters() {
final Lexer lexer = new Lexer("reduction @| .: {}", errors::add);
final List<Token> expectedTokens = Arrays.asList(
new Token(REDUCTION, "reduction", 0),
new Token(COLON, ":", 14),
new Token(EOF, "", 18)
);
assertEquals(expectedTokens, lexer.lex());
final List<DslException> expectedErrors = Arrays.asList(
new UnexpectedCharactersException(10, "@|"),
new UnexpectedCharactersException(13, "."),
new UnexpectedCharactersException(16, "{}")
);
assertEquals(expectedErrors, errors);
}
@Test
public void errorOrder() {
final Lexer lexer = new Lexer(".2. @", errors::add);
final List<Token> expectedTokens = Collections.singletonList(
new Token(EOF, "", 5)
);
assertEquals(expectedTokens, lexer.lex());
final List<DslException> expectedErrors = Arrays.asList(
new UnexpectedCharactersException(0, "."),
new IncompleteNumberLiteralException(1, "2."),
new UnexpectedCharactersException(4, "@")
);
assertEquals(expectedErrors, errors);
}
}