Implement multiple error reporting and recovery in Lexer
This commit is contained in:
parent
1e0d2e5a0e
commit
f0d2cdc1ab
@ -5,6 +5,7 @@ import it.cavallium.warppi.math.rules.dsl.frontend.Lexer;
|
|||||||
import it.cavallium.warppi.math.rules.dsl.frontend.Parser;
|
import it.cavallium.warppi.math.rules.dsl.frontend.Parser;
|
||||||
import it.cavallium.warppi.math.rules.dsl.patterns.SubFunctionPattern;
|
import it.cavallium.warppi.math.rules.dsl.patterns.SubFunctionPattern;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
@ -13,7 +14,9 @@ public class RulesDsl {
|
|||||||
private RulesDsl() {}
|
private RulesDsl() {}
|
||||||
|
|
||||||
public static List<Rule> makeRules(final String source) {
|
public static List<Rule> makeRules(final String source) {
|
||||||
final Lexer lexer = new Lexer(source);
|
final List<DslException> errors = new ArrayList<>();
|
||||||
|
|
||||||
|
final Lexer lexer = new Lexer(source, errors::add);
|
||||||
final Parser parser = new Parser(lexer.lex());
|
final Parser parser = new Parser(lexer.lex());
|
||||||
final List<PatternRule> rules = parser.parse();
|
final List<PatternRule> rules = parser.parse();
|
||||||
|
|
||||||
@ -21,6 +24,10 @@ public class RulesDsl {
|
|||||||
checkSubFunctionsDefined(rule);
|
checkSubFunctionsDefined(rule);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!errors.isEmpty()) {
|
||||||
|
throw new RuntimeException("Errors in DSL source code");
|
||||||
|
}
|
||||||
|
|
||||||
return Collections.unmodifiableList(rules);
|
return Collections.unmodifiableList(rules);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -0,0 +1,51 @@
|
|||||||
|
package it.cavallium.warppi.math.rules.dsl.frontend;
|
||||||
|
|
||||||
|
import it.cavallium.warppi.math.rules.dsl.DslException;
|
||||||
|
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Thrown when DSL source code contains a number literal with a decimal separator which is not followed by digits.
|
||||||
|
* <p>
|
||||||
|
* An example of an incomplete literal is <code>2.</code>, while <code>2</code> and <code>2.2</code> are valid.
|
||||||
|
*/
|
||||||
|
public class IncompleteNumberLiteralException extends DslException {
|
||||||
|
private final int position;
|
||||||
|
private final String literal;
|
||||||
|
|
||||||
|
public IncompleteNumberLiteralException(final int position, final String literal) {
|
||||||
|
this.position = position;
|
||||||
|
this.literal = literal;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getPosition() {
|
||||||
|
return position;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getLength() {
|
||||||
|
return literal.length();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return The incomplete number literal.
|
||||||
|
*/
|
||||||
|
public String getLiteral() {
|
||||||
|
return literal;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(final Object o) {
|
||||||
|
if (!(o instanceof IncompleteNumberLiteralException)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
final IncompleteNumberLiteralException other = (IncompleteNumberLiteralException) o;
|
||||||
|
return this.position == other.position && this.literal.equals(other.literal);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
return Objects.hash(position, literal);
|
||||||
|
}
|
||||||
|
}
|
@ -1,8 +1,11 @@
|
|||||||
package it.cavallium.warppi.math.rules.dsl.frontend;
|
package it.cavallium.warppi.math.rules.dsl.frontend;
|
||||||
|
|
||||||
|
import it.cavallium.warppi.math.rules.dsl.DslException;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.function.Consumer;
|
||||||
import java.util.function.Function;
|
import java.util.function.Function;
|
||||||
import java.util.function.Predicate;
|
import java.util.function.Predicate;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
@ -24,24 +27,56 @@ public class Lexer {
|
|||||||
));
|
));
|
||||||
|
|
||||||
private final String source;
|
private final String source;
|
||||||
|
private final Consumer<? super DslException> errorReporter;
|
||||||
|
|
||||||
private final List<Token> tokens = new ArrayList<>();
|
private final List<Token> tokens = new ArrayList<>();
|
||||||
private int startOfLexeme = 0;
|
private int startOfLexeme = 0;
|
||||||
private int curPosition = 0;
|
private int curPosition = 0;
|
||||||
|
private UnexpectedCharactersException unexpectedCharacters = null;
|
||||||
|
|
||||||
public Lexer(final String source) {
|
public Lexer(final String source, final Consumer<? super DslException> errorReporter) {
|
||||||
this.source = source;
|
this.source = source;
|
||||||
|
this.errorReporter = errorReporter;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<Token> lex() {
|
public List<Token> lex() {
|
||||||
while (!atEnd()) {
|
while (!atEnd()) {
|
||||||
startOfLexeme = curPosition;
|
startOfLexeme = curPosition;
|
||||||
lexToken();
|
lexAndHandleErrors();
|
||||||
}
|
}
|
||||||
|
// lexAndHandleErrors reports unexpected characters when they're followed by expected ones:
|
||||||
|
// if there are unexpected characters at the end of the source, they have to be reported here
|
||||||
|
reportAndClearUnexpectedCharacters();
|
||||||
tokens.add(new Token(EOF, "", source.length()));
|
tokens.add(new Token(EOF, "", source.length()));
|
||||||
return tokens;
|
return tokens;
|
||||||
}
|
}
|
||||||
|
|
||||||
private void lexToken() {
|
private void lexAndHandleErrors() {
|
||||||
|
try {
|
||||||
|
lexToken();
|
||||||
|
reportAndClearUnexpectedCharacters(); // After finding some expected characters
|
||||||
|
} catch (UnexpectedCharactersException e) {
|
||||||
|
if (unexpectedCharacters == null) {
|
||||||
|
unexpectedCharacters = e;
|
||||||
|
} else {
|
||||||
|
unexpectedCharacters = unexpectedCharacters.concat(e);
|
||||||
|
}
|
||||||
|
} catch (IncompleteNumberLiteralException e) {
|
||||||
|
// If there are multiple errors, report them in the order in which they occur in the source
|
||||||
|
reportAndClearUnexpectedCharacters();
|
||||||
|
errorReporter.accept(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void reportAndClearUnexpectedCharacters() {
|
||||||
|
if (unexpectedCharacters == null) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
errorReporter.accept(unexpectedCharacters);
|
||||||
|
unexpectedCharacters = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void lexToken() throws UnexpectedCharactersException, IncompleteNumberLiteralException {
|
||||||
char current = popChar();
|
char current = popChar();
|
||||||
switch (current) {
|
switch (current) {
|
||||||
case ':': emitToken(COLON); break;
|
case ':': emitToken(COLON); break;
|
||||||
@ -86,7 +121,7 @@ public class Lexer {
|
|||||||
} else if (Character.isJavaIdentifierStart(current)) {
|
} else if (Character.isJavaIdentifierStart(current)) {
|
||||||
keywordOrIdentifier();
|
keywordOrIdentifier();
|
||||||
} else if (!Character.isWhitespace(current)) {
|
} else if (!Character.isWhitespace(current)) {
|
||||||
throw new RuntimeException("Unexpected character " + current);
|
throw new UnexpectedCharactersException(curPosition - 1, String.valueOf(current));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -101,10 +136,10 @@ public class Lexer {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void number() {
|
private void number() throws IncompleteNumberLiteralException {
|
||||||
matchWhile(Lexer::isAsciiDigit);
|
matchWhile(Lexer::isAsciiDigit);
|
||||||
if (matchChar('.') && matchWhile(Lexer::isAsciiDigit) == 0) {
|
if (matchChar('.') && matchWhile(Lexer::isAsciiDigit) == 0) {
|
||||||
throw new RuntimeException("Expected digits after decimal separator");
|
throw new IncompleteNumberLiteralException(startOfLexeme, currentLexeme());
|
||||||
}
|
}
|
||||||
emitToken(NUMBER);
|
emitToken(NUMBER);
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,53 @@
|
|||||||
|
package it.cavallium.warppi.math.rules.dsl.frontend;
|
||||||
|
|
||||||
|
import it.cavallium.warppi.math.rules.dsl.DslException;
|
||||||
|
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Thrown when DSL source code contains one or more (consecutive) characters which are not expected by the lexer.
|
||||||
|
*/
|
||||||
|
public class UnexpectedCharactersException extends DslException {
|
||||||
|
private final int position;
|
||||||
|
private final String unexpectedCharacters;
|
||||||
|
|
||||||
|
public UnexpectedCharactersException(final int position, final String unexpectedCharacters) {
|
||||||
|
this.position = position;
|
||||||
|
this.unexpectedCharacters = unexpectedCharacters;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getPosition() {
|
||||||
|
return position;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getLength() {
|
||||||
|
return unexpectedCharacters.length();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return The string of one or more consecutive unexpected characters.
|
||||||
|
*/
|
||||||
|
public String getUnexpectedCharacters() {
|
||||||
|
return unexpectedCharacters;
|
||||||
|
}
|
||||||
|
|
||||||
|
UnexpectedCharactersException concat(UnexpectedCharactersException other) {
|
||||||
|
return new UnexpectedCharactersException(this.position, this.unexpectedCharacters + other.unexpectedCharacters);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(final Object o) {
|
||||||
|
if (!(o instanceof UnexpectedCharactersException)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
final UnexpectedCharactersException other = (UnexpectedCharactersException) o;
|
||||||
|
return this.position == other.position && this.unexpectedCharacters.equals(other.unexpectedCharacters);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
return Objects.hash(position, unexpectedCharacters);
|
||||||
|
}
|
||||||
|
}
|
@ -54,7 +54,7 @@ public class RulesDslTest {
|
|||||||
|
|
||||||
@Test(expected = RuntimeException.class)
|
@Test(expected = RuntimeException.class)
|
||||||
public void lexerError() {
|
public void lexerError() {
|
||||||
RulesDsl.makeRules("2. 5");
|
RulesDsl.makeRules("reduction test: 2. 5 -> 1");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(expected = RuntimeException.class)
|
@Test(expected = RuntimeException.class)
|
||||||
|
@ -1,14 +1,25 @@
|
|||||||
package it.cavallium.warppi.math.rules.dsl.frontend;
|
package it.cavallium.warppi.math.rules.dsl.frontend;
|
||||||
|
|
||||||
|
import it.cavallium.warppi.math.rules.dsl.DslException;
|
||||||
|
import org.junit.Before;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
import java.util.Collections;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import static it.cavallium.warppi.math.rules.dsl.frontend.TokenType.*;
|
import static it.cavallium.warppi.math.rules.dsl.frontend.TokenType.*;
|
||||||
import static org.junit.Assert.*;
|
import static org.junit.Assert.*;
|
||||||
|
|
||||||
public class LexerTest {
|
public class LexerTest {
|
||||||
|
private final List<DslException> errors = new ArrayList<>();
|
||||||
|
|
||||||
|
@Before
|
||||||
|
public void setUp() {
|
||||||
|
errors.clear();
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void validRule() {
|
public void validRule() {
|
||||||
final Lexer lexer = new Lexer(
|
final Lexer lexer = new Lexer(
|
||||||
@ -16,7 +27,8 @@ public class LexerTest {
|
|||||||
" x + y * z = -(a_123 +- 3 / 2.2) -> [\n" +
|
" x + y * z = -(a_123 +- 3 / 2.2) -> [\n" +
|
||||||
" x^a_123 = cos(pi) - log(e, e), // comment\n" +
|
" x^a_123 = cos(pi) - log(e, e), // comment\n" +
|
||||||
" undefined, /*\n" +
|
" undefined, /*\n" +
|
||||||
"comment */ ]\n"
|
"comment */ ]\n",
|
||||||
|
errors::add
|
||||||
);
|
);
|
||||||
final List<Token> expected = Arrays.asList(
|
final List<Token> expected = Arrays.asList(
|
||||||
new Token(REDUCTION, "reduction", 0),
|
new Token(REDUCTION, "reduction", 0),
|
||||||
@ -60,23 +72,75 @@ public class LexerTest {
|
|||||||
new Token(EOF, "", 140)
|
new Token(EOF, "", 140)
|
||||||
);
|
);
|
||||||
assertEquals(expected, lexer.lex());
|
assertEquals(expected, lexer.lex());
|
||||||
|
assertTrue(errors.isEmpty());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(expected = RuntimeException.class)
|
@Test
|
||||||
public void incompleteNumberOtherChar() {
|
public void incompleteNumberOtherChar() {
|
||||||
final Lexer lexer = new Lexer("2. 5");
|
final Lexer lexer = new Lexer("2. 5 + 3", errors::add);
|
||||||
lexer.lex();
|
|
||||||
|
final List<Token> expectedTokens = Arrays.asList(
|
||||||
|
new Token(NUMBER, "5", 3),
|
||||||
|
new Token(PLUS, "+", 5),
|
||||||
|
new Token(NUMBER, "3", 7),
|
||||||
|
new Token(EOF, "", 8)
|
||||||
|
);
|
||||||
|
assertEquals(expectedTokens, lexer.lex());
|
||||||
|
|
||||||
|
final List<DslException> expectedErrors = Collections.singletonList(
|
||||||
|
new IncompleteNumberLiteralException(0, "2.")
|
||||||
|
);
|
||||||
|
assertEquals(expectedErrors, errors);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(expected = RuntimeException.class)
|
@Test
|
||||||
public void incompleteNumberEof() {
|
public void incompleteNumberEof() {
|
||||||
final Lexer lexer = new Lexer("2.");
|
final Lexer lexer = new Lexer("2.", errors::add);
|
||||||
lexer.lex();
|
|
||||||
|
final List<Token> expectedTokens = Collections.singletonList(
|
||||||
|
new Token(EOF, "", 2)
|
||||||
|
);
|
||||||
|
assertEquals(expectedTokens, lexer.lex());
|
||||||
|
|
||||||
|
final List<DslException> expectedErrors = Collections.singletonList(
|
||||||
|
new IncompleteNumberLiteralException(0, "2.")
|
||||||
|
);
|
||||||
|
assertEquals(expectedErrors, errors);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(expected = RuntimeException.class)
|
@Test
|
||||||
public void meaninglessCharacter() {
|
public void unexpectedCharacters() {
|
||||||
final Lexer lexer = new Lexer("@");
|
final Lexer lexer = new Lexer("reduction @| .: {}", errors::add);
|
||||||
lexer.lex();
|
|
||||||
|
final List<Token> expectedTokens = Arrays.asList(
|
||||||
|
new Token(REDUCTION, "reduction", 0),
|
||||||
|
new Token(COLON, ":", 14),
|
||||||
|
new Token(EOF, "", 18)
|
||||||
|
);
|
||||||
|
assertEquals(expectedTokens, lexer.lex());
|
||||||
|
|
||||||
|
final List<DslException> expectedErrors = Arrays.asList(
|
||||||
|
new UnexpectedCharactersException(10, "@|"),
|
||||||
|
new UnexpectedCharactersException(13, "."),
|
||||||
|
new UnexpectedCharactersException(16, "{}")
|
||||||
|
);
|
||||||
|
assertEquals(expectedErrors, errors);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void errorOrder() {
|
||||||
|
final Lexer lexer = new Lexer(".2. @", errors::add);
|
||||||
|
|
||||||
|
final List<Token> expectedTokens = Collections.singletonList(
|
||||||
|
new Token(EOF, "", 5)
|
||||||
|
);
|
||||||
|
assertEquals(expectedTokens, lexer.lex());
|
||||||
|
|
||||||
|
final List<DslException> expectedErrors = Arrays.asList(
|
||||||
|
new UnexpectedCharactersException(0, "."),
|
||||||
|
new IncompleteNumberLiteralException(1, "2."),
|
||||||
|
new UnexpectedCharactersException(4, "@")
|
||||||
|
);
|
||||||
|
assertEquals(expectedErrors, errors);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user