From f0d2cdc1aba58796310ab43d2c87f67f1b808c54 Mon Sep 17 00:00:00 2001 From: Riccardo Azzolini Date: Tue, 29 Jan 2019 12:08:35 +0100 Subject: [PATCH] Implement multiple error reporting and recovery in Lexer --- .../warppi/math/rules/dsl/RulesDsl.java | 9 +- .../IncompleteNumberLiteralException.java | 51 +++++++++++ .../warppi/math/rules/dsl/frontend/Lexer.java | 47 ++++++++-- .../UnexpectedCharactersException.java | 53 ++++++++++++ .../warppi/math/rules/dsl/RulesDslTest.java | 2 +- .../math/rules/dsl/frontend/LexerTest.java | 86 ++++++++++++++++--- 6 files changed, 229 insertions(+), 19 deletions(-) create mode 100644 core/src/main/java/it/cavallium/warppi/math/rules/dsl/frontend/IncompleteNumberLiteralException.java create mode 100644 core/src/main/java/it/cavallium/warppi/math/rules/dsl/frontend/UnexpectedCharactersException.java diff --git a/core/src/main/java/it/cavallium/warppi/math/rules/dsl/RulesDsl.java b/core/src/main/java/it/cavallium/warppi/math/rules/dsl/RulesDsl.java index 6ba89d8a..c2dbec75 100644 --- a/core/src/main/java/it/cavallium/warppi/math/rules/dsl/RulesDsl.java +++ b/core/src/main/java/it/cavallium/warppi/math/rules/dsl/RulesDsl.java @@ -5,6 +5,7 @@ import it.cavallium.warppi.math.rules.dsl.frontend.Lexer; import it.cavallium.warppi.math.rules.dsl.frontend.Parser; import it.cavallium.warppi.math.rules.dsl.patterns.SubFunctionPattern; +import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Set; @@ -13,7 +14,9 @@ public class RulesDsl { private RulesDsl() {} public static List makeRules(final String source) { - final Lexer lexer = new Lexer(source); + final List errors = new ArrayList<>(); + + final Lexer lexer = new Lexer(source, errors::add); final Parser parser = new Parser(lexer.lex()); final List rules = parser.parse(); @@ -21,6 +24,10 @@ public class RulesDsl { checkSubFunctionsDefined(rule); } + if (!errors.isEmpty()) { + throw new RuntimeException("Errors in DSL source code"); + } + return Collections.unmodifiableList(rules); } diff --git a/core/src/main/java/it/cavallium/warppi/math/rules/dsl/frontend/IncompleteNumberLiteralException.java b/core/src/main/java/it/cavallium/warppi/math/rules/dsl/frontend/IncompleteNumberLiteralException.java new file mode 100644 index 00000000..0414e8e3 --- /dev/null +++ b/core/src/main/java/it/cavallium/warppi/math/rules/dsl/frontend/IncompleteNumberLiteralException.java @@ -0,0 +1,51 @@ +package it.cavallium.warppi.math.rules.dsl.frontend; + +import it.cavallium.warppi.math.rules.dsl.DslException; + +import java.util.Objects; + +/** + * Thrown when DSL source code contains a number literal with a decimal separator which is not followed by digits. + *

+ * An example of an incomplete literal is 2., while 2 and 2.2 are valid. + */ +public class IncompleteNumberLiteralException extends DslException { + private final int position; + private final String literal; + + public IncompleteNumberLiteralException(final int position, final String literal) { + this.position = position; + this.literal = literal; + } + + @Override + public int getPosition() { + return position; + } + + @Override + public int getLength() { + return literal.length(); + } + + /** + * @return The incomplete number literal. + */ + public String getLiteral() { + return literal; + } + + @Override + public boolean equals(final Object o) { + if (!(o instanceof IncompleteNumberLiteralException)) { + return false; + } + final IncompleteNumberLiteralException other = (IncompleteNumberLiteralException) o; + return this.position == other.position && this.literal.equals(other.literal); + } + + @Override + public int hashCode() { + return Objects.hash(position, literal); + } +} diff --git a/core/src/main/java/it/cavallium/warppi/math/rules/dsl/frontend/Lexer.java b/core/src/main/java/it/cavallium/warppi/math/rules/dsl/frontend/Lexer.java index 7a79ff7a..eea89648 100644 --- a/core/src/main/java/it/cavallium/warppi/math/rules/dsl/frontend/Lexer.java +++ b/core/src/main/java/it/cavallium/warppi/math/rules/dsl/frontend/Lexer.java @@ -1,8 +1,11 @@ package it.cavallium.warppi.math.rules.dsl.frontend; +import it.cavallium.warppi.math.rules.dsl.DslException; + import java.util.ArrayList; import java.util.List; import java.util.Map; +import java.util.function.Consumer; import java.util.function.Function; import java.util.function.Predicate; import java.util.stream.Collectors; @@ -24,24 +27,56 @@ public class Lexer { )); private final String source; + private final Consumer errorReporter; + private final List tokens = new ArrayList<>(); private int startOfLexeme = 0; private int curPosition = 0; + private UnexpectedCharactersException unexpectedCharacters = null; - public Lexer(final String source) { + public Lexer(final String source, final Consumer errorReporter) { this.source = source; + this.errorReporter = errorReporter; } public List lex() { while (!atEnd()) { startOfLexeme = curPosition; - lexToken(); + lexAndHandleErrors(); } + // lexAndHandleErrors reports unexpected characters when they're followed by expected ones: + // if there are unexpected characters at the end of the source, they have to be reported here + reportAndClearUnexpectedCharacters(); tokens.add(new Token(EOF, "", source.length())); return tokens; } - private void lexToken() { + private void lexAndHandleErrors() { + try { + lexToken(); + reportAndClearUnexpectedCharacters(); // After finding some expected characters + } catch (UnexpectedCharactersException e) { + if (unexpectedCharacters == null) { + unexpectedCharacters = e; + } else { + unexpectedCharacters = unexpectedCharacters.concat(e); + } + } catch (IncompleteNumberLiteralException e) { + // If there are multiple errors, report them in the order in which they occur in the source + reportAndClearUnexpectedCharacters(); + errorReporter.accept(e); + } + } + + private void reportAndClearUnexpectedCharacters() { + if (unexpectedCharacters == null) { + return; + } + errorReporter.accept(unexpectedCharacters); + unexpectedCharacters = null; + } + + private void lexToken() throws UnexpectedCharactersException, IncompleteNumberLiteralException { char current = popChar(); switch (current) { case ':': emitToken(COLON); break; @@ -86,7 +121,7 @@ public class Lexer { } else if (Character.isJavaIdentifierStart(current)) { keywordOrIdentifier(); } else if (!Character.isWhitespace(current)) { - throw new RuntimeException("Unexpected character " + current); + throw new UnexpectedCharactersException(curPosition - 1, String.valueOf(current)); } } } @@ -101,10 +136,10 @@ public class Lexer { } } - private void number() { + private void number() throws IncompleteNumberLiteralException { matchWhile(Lexer::isAsciiDigit); if (matchChar('.') && matchWhile(Lexer::isAsciiDigit) == 0) { - throw new RuntimeException("Expected digits after decimal separator"); + throw new IncompleteNumberLiteralException(startOfLexeme, currentLexeme()); } emitToken(NUMBER); } diff --git a/core/src/main/java/it/cavallium/warppi/math/rules/dsl/frontend/UnexpectedCharactersException.java b/core/src/main/java/it/cavallium/warppi/math/rules/dsl/frontend/UnexpectedCharactersException.java new file mode 100644 index 00000000..2c989112 --- /dev/null +++ b/core/src/main/java/it/cavallium/warppi/math/rules/dsl/frontend/UnexpectedCharactersException.java @@ -0,0 +1,53 @@ +package it.cavallium.warppi.math.rules.dsl.frontend; + +import it.cavallium.warppi.math.rules.dsl.DslException; + +import java.util.Objects; + +/** + * Thrown when DSL source code contains one or more (consecutive) characters which are not expected by the lexer. + */ +public class UnexpectedCharactersException extends DslException { + private final int position; + private final String unexpectedCharacters; + + public UnexpectedCharactersException(final int position, final String unexpectedCharacters) { + this.position = position; + this.unexpectedCharacters = unexpectedCharacters; + } + + @Override + public int getPosition() { + return position; + } + + @Override + public int getLength() { + return unexpectedCharacters.length(); + } + + /** + * @return The string of one or more consecutive unexpected characters. + */ + public String getUnexpectedCharacters() { + return unexpectedCharacters; + } + + UnexpectedCharactersException concat(UnexpectedCharactersException other) { + return new UnexpectedCharactersException(this.position, this.unexpectedCharacters + other.unexpectedCharacters); + } + + @Override + public boolean equals(final Object o) { + if (!(o instanceof UnexpectedCharactersException)) { + return false; + } + final UnexpectedCharactersException other = (UnexpectedCharactersException) o; + return this.position == other.position && this.unexpectedCharacters.equals(other.unexpectedCharacters); + } + + @Override + public int hashCode() { + return Objects.hash(position, unexpectedCharacters); + } +} diff --git a/core/src/test/java/it/cavallium/warppi/math/rules/dsl/RulesDslTest.java b/core/src/test/java/it/cavallium/warppi/math/rules/dsl/RulesDslTest.java index fa62223a..1f2227af 100644 --- a/core/src/test/java/it/cavallium/warppi/math/rules/dsl/RulesDslTest.java +++ b/core/src/test/java/it/cavallium/warppi/math/rules/dsl/RulesDslTest.java @@ -54,7 +54,7 @@ public class RulesDslTest { @Test(expected = RuntimeException.class) public void lexerError() { - RulesDsl.makeRules("2. 5"); + RulesDsl.makeRules("reduction test: 2. 5 -> 1"); } @Test(expected = RuntimeException.class) diff --git a/core/src/test/java/it/cavallium/warppi/math/rules/dsl/frontend/LexerTest.java b/core/src/test/java/it/cavallium/warppi/math/rules/dsl/frontend/LexerTest.java index d90bceb3..06458ea0 100644 --- a/core/src/test/java/it/cavallium/warppi/math/rules/dsl/frontend/LexerTest.java +++ b/core/src/test/java/it/cavallium/warppi/math/rules/dsl/frontend/LexerTest.java @@ -1,14 +1,25 @@ package it.cavallium.warppi.math.rules.dsl.frontend; +import it.cavallium.warppi.math.rules.dsl.DslException; +import org.junit.Before; import org.junit.Test; +import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.List; import static it.cavallium.warppi.math.rules.dsl.frontend.TokenType.*; import static org.junit.Assert.*; public class LexerTest { + private final List errors = new ArrayList<>(); + + @Before + public void setUp() { + errors.clear(); + } + @Test public void validRule() { final Lexer lexer = new Lexer( @@ -16,7 +27,8 @@ public class LexerTest { " x + y * z = -(a_123 +- 3 / 2.2) -> [\n" + " x^a_123 = cos(pi) - log(e, e), // comment\n" + " undefined, /*\n" + - "comment */ ]\n" + "comment */ ]\n", + errors::add ); final List expected = Arrays.asList( new Token(REDUCTION, "reduction", 0), @@ -60,23 +72,75 @@ public class LexerTest { new Token(EOF, "", 140) ); assertEquals(expected, lexer.lex()); + assertTrue(errors.isEmpty()); } - @Test(expected = RuntimeException.class) + @Test public void incompleteNumberOtherChar() { - final Lexer lexer = new Lexer("2. 5"); - lexer.lex(); + final Lexer lexer = new Lexer("2. 5 + 3", errors::add); + + final List expectedTokens = Arrays.asList( + new Token(NUMBER, "5", 3), + new Token(PLUS, "+", 5), + new Token(NUMBER, "3", 7), + new Token(EOF, "", 8) + ); + assertEquals(expectedTokens, lexer.lex()); + + final List expectedErrors = Collections.singletonList( + new IncompleteNumberLiteralException(0, "2.") + ); + assertEquals(expectedErrors, errors); } - @Test(expected = RuntimeException.class) + @Test public void incompleteNumberEof() { - final Lexer lexer = new Lexer("2."); - lexer.lex(); + final Lexer lexer = new Lexer("2.", errors::add); + + final List expectedTokens = Collections.singletonList( + new Token(EOF, "", 2) + ); + assertEquals(expectedTokens, lexer.lex()); + + final List expectedErrors = Collections.singletonList( + new IncompleteNumberLiteralException(0, "2.") + ); + assertEquals(expectedErrors, errors); } - @Test(expected = RuntimeException.class) - public void meaninglessCharacter() { - final Lexer lexer = new Lexer("@"); - lexer.lex(); + @Test + public void unexpectedCharacters() { + final Lexer lexer = new Lexer("reduction @| .: {}", errors::add); + + final List expectedTokens = Arrays.asList( + new Token(REDUCTION, "reduction", 0), + new Token(COLON, ":", 14), + new Token(EOF, "", 18) + ); + assertEquals(expectedTokens, lexer.lex()); + + final List expectedErrors = Arrays.asList( + new UnexpectedCharactersException(10, "@|"), + new UnexpectedCharactersException(13, "."), + new UnexpectedCharactersException(16, "{}") + ); + assertEquals(expectedErrors, errors); + } + + @Test + public void errorOrder() { + final Lexer lexer = new Lexer(".2. @", errors::add); + + final List expectedTokens = Collections.singletonList( + new Token(EOF, "", 5) + ); + assertEquals(expectedTokens, lexer.lex()); + + final List expectedErrors = Arrays.asList( + new UnexpectedCharactersException(0, "."), + new IncompleteNumberLiteralException(1, "2."), + new UnexpectedCharactersException(4, "@") + ); + assertEquals(expectedErrors, errors); } }