Improve DSL front-end documentation

This commit is contained in:
Riccardo Azzolini 2019-08-11 19:32:29 +02:00
parent e297d3592f
commit c8656d1b30
5 changed files with 74 additions and 15 deletions

View File

@ -57,6 +57,4 @@ public class RulesDsl {
undefined.removeAll(defined); undefined.removeAll(defined);
return undefined; return undefined;
} }
} }

View File

@ -9,7 +9,7 @@ import java.util.function.Predicate;
import static it.cavallium.warppi.math.rules.dsl.frontend.TokenType.*; import static it.cavallium.warppi.math.rules.dsl.frontend.TokenType.*;
/** /**
* Converts the source string to a list of tokens. * Converts the source <code>String</code> to a list of {@link Token}s.
*/ */
public class Lexer { public class Lexer {
private static final Map<String, TokenType> KEYWORDS; private static final Map<String, TokenType> KEYWORDS;
@ -34,11 +34,28 @@ public class Lexer {
private int curPosition = 0; private int curPosition = 0;
private UnexpectedCharacters unexpectedCharacters = null; private UnexpectedCharacters unexpectedCharacters = null;
/**
* Constructs a <code>Lexer</code> that will split the given source code into {@link Token}s.
*
* @param source a <code>String</code> containing the DSL source code to process.
* @param errorReporter a <code>Consumer</code> used to report each <code>DslError</code> that the
* <code>Lexer</code> finds within the source string.
*/
public Lexer(final String source, final Consumer<? super DslError> errorReporter) { public Lexer(final String source, final Consumer<? super DslError> errorReporter) {
this.source = source; this.source = source;
this.errorReporter = errorReporter; this.errorReporter = errorReporter;
} }
/**
* Runs the <code>Lexer</code>.
* <p>
* This method can only be called once per instance.
*
* @return the list of <code>Token</code>s extracted from the source string.
* If any errors are reported, this list should not be considered to represent a valid set of DSL rules,
* but it can still be parsed to potentially find additional errors (which may allow the user to fix more
* errors before having to rerun the <code>Lexer</code>).
*/
public List<Token> lex() { public List<Token> lex() {
while (!atEnd()) { while (!atEnd()) {
startOfLexeme = curPosition; startOfLexeme = curPosition;

View File

@ -17,7 +17,7 @@ import java.util.function.Function;
import static it.cavallium.warppi.math.rules.dsl.frontend.TokenType.*; import static it.cavallium.warppi.math.rules.dsl.frontend.TokenType.*;
/** /**
* Converts a list of tokens to a list of <code>PatternRule</code>s. * Converts a list of {@link Token}s to a list of {@link PatternRule}s.
*/ */
public class Parser { public class Parser {
private static final Map<TokenType, RuleType> RULE_TYPES = MapFactory.fromEntries( private static final Map<TokenType, RuleType> RULE_TYPES = MapFactory.fromEntries(
@ -29,7 +29,7 @@ public class Parser {
private final List<Token> tokens; private final List<Token> tokens;
private final Consumer<? super DslError> errorReporter; private final Consumer<? super DslError> errorReporter;
private int current = 0; private int currentIndex = 0;
// For error reporting // For error reporting
private Map<SubFunctionPattern, List<Token>> ruleSubFunctionIdentifiers; private Map<SubFunctionPattern, List<Token>> ruleSubFunctionIdentifiers;
@ -37,11 +37,29 @@ public class Parser {
private final IdentityHashMap<PatternRule, Map<SubFunctionPattern, List<Token>>> subFunctionIdentifiers = private final IdentityHashMap<PatternRule, Map<SubFunctionPattern, List<Token>>> subFunctionIdentifiers =
new IdentityHashMap<>(); new IdentityHashMap<>();
/**
* Constructs a <code>Parser</code> that will produce a list of {@link PatternRule}s from the the given list of {@link Token}s.
*
* @param tokens the list of <code>Token</code>s to process.
* @param errorReporter a <code>Consumer</code> used to report each <code>DslError</code> that the
* <code>Parser</code> finds within the source string.
*/
public Parser(final List<Token> tokens, final Consumer<? super DslError> errorReporter) { public Parser(final List<Token> tokens, final Consumer<? super DslError> errorReporter) {
this.tokens = tokens; this.tokens = tokens;
this.errorReporter = errorReporter; this.errorReporter = errorReporter;
} }
/**
* Runs the <code>Parser</code>.
* <p>
* This method can only be called once per instance.
*
* @return the list of all valid <code>PatternRule</code>s constructed from the given tokens.
* If any errors are reported, this list should not be considered to represent a valid set of DSL rules,
* but each rule can still be analyzed to look for undefined sub-functions in replacement patterns and
* report them (which may allow the user to fix more errors before having to rerun the <code>Lexer</code>
* and <code>Parser</code>).
*/
public List<PatternRule> parse() { public List<PatternRule> parse() {
return rules(); return rules();
} }
@ -264,42 +282,42 @@ public class Parser {
final Token matched = match(expectedType); final Token matched = match(expectedType);
if (matched == null) { if (matched == null) {
throw new SyntaxException( throw new SyntaxException(
new UnexpectedToken(tokens.get(current), expectedType) new UnexpectedToken(tokens.get(currentIndex), expectedType)
); );
} }
return matched; return matched;
} }
private Token match(final TokenType expectedType) { private Token match(final TokenType expectedType) {
final Token curToken = tokens.get(current); final Token curToken = tokens.get(currentIndex);
if (curToken.type != expectedType) { if (curToken.type != expectedType) {
return null; return null;
} }
current++; currentIndex++;
return curToken; return curToken;
} }
private void synchronizeTo(final Set<TokenType> types) { private void synchronizeTo(final Set<TokenType> types) {
while (!atEnd() && !types.contains(tokens.get(current).type)) { while (!atEnd() && !types.contains(tokens.get(currentIndex).type)) {
current++; currentIndex++;
} }
} }
private Token pop() throws SyntaxException { private Token pop() throws SyntaxException {
final Token curToken = tokens.get(current); final Token curToken = tokens.get(currentIndex);
if (atEnd()) { if (atEnd()) {
throw new SyntaxException(new UnexpectedToken(curToken)); // Avoid popping EOF throw new SyntaxException(new UnexpectedToken(curToken)); // Avoid popping EOF
} }
current++; currentIndex++;
return curToken; return curToken;
} }
private Token peek() { private Token peek() {
return tokens.get(current); return tokens.get(currentIndex);
} }
private boolean atEnd() { private boolean atEnd() {
return tokens.get(current).type == EOF; return tokens.get(currentIndex).type == EOF;
} }
@FunctionalInterface @FunctionalInterface

View File

@ -2,14 +2,37 @@ package it.cavallium.warppi.math.rules.dsl.frontend;
import java.util.Objects; import java.util.Objects;
/**
* Represents a single token extracted from DSL source code.
* <p>
* <code>Token</code>s are produced by the {@link Lexer} and consumed by the {@link Parser}.
*/
public class Token { public class Token {
/** The type of the token. */ /** The type of the token. */
public final TokenType type; public final TokenType type;
/** The source string which corresponds to the token. */ /**
* The part of the source code which corresponds to the token.
* <p>
* Some types of token always have the same lexemes (for example, <code>PLUS</code> is always represented by
* <code>"+"</code>), while others have variable lexemes (like <code>IDENTIFIER</code>, which may correspond to any
* valid identifier).
* <p>
* As a special case, tokens of type <code>EOF</code> (which signal the end of the source code) have empty lexemes
* (<code>""</code>). Such tokens only exist to simplify the parser code, by allowing the end of the input to be
* treated like any other token (which is especially useful for error handling, because an unexpected end of input
* just becomes an "unexpected token" error).
*/
public final String lexeme; public final String lexeme;
/** The index at which the token starts in the source string. */ /** The index at which the token starts in the source string. */
public final int position; public final int position;
/**
* Constructs a <code>Token</code>.
*
* @param type the type of the token.
* @param lexeme the part of the source string which corresponds to the token.
* @param position the index at which the token starts in the source string.
*/
public Token(final TokenType type, final String lexeme, final int position) { public Token(final TokenType type, final String lexeme, final int position) {
this.type = type; this.type = type;
this.lexeme = lexeme; this.lexeme = lexeme;

View File

@ -1,5 +1,8 @@
package it.cavallium.warppi.math.rules.dsl.frontend; package it.cavallium.warppi.math.rules.dsl.frontend;
/**
* Specifies the type of a <code>Token</code>.
*/
public enum TokenType { public enum TokenType {
EOF, EOF,
// Separators and grouping // Separators and grouping