Improve DSL front-end documentation

This commit is contained in:
Riccardo Azzolini 2019-08-11 19:32:29 +02:00
parent e297d3592f
commit c8656d1b30
5 changed files with 74 additions and 15 deletions

View File

@ -57,6 +57,4 @@ public class RulesDsl {
undefined.removeAll(defined);
return undefined;
}
}

View File

@ -9,7 +9,7 @@ import java.util.function.Predicate;
import static it.cavallium.warppi.math.rules.dsl.frontend.TokenType.*;
/**
* Converts the source string to a list of tokens.
* Converts the source <code>String</code> to a list of {@link Token}s.
*/
public class Lexer {
private static final Map<String, TokenType> KEYWORDS;
@ -34,11 +34,28 @@ public class Lexer {
private int curPosition = 0;
private UnexpectedCharacters unexpectedCharacters = null;
/**
* Constructs a <code>Lexer</code> that will split the given source code into {@link Token}s.
*
* @param source a <code>String</code> containing the DSL source code to process.
* @param errorReporter a <code>Consumer</code> used to report each <code>DslError</code> that the
* <code>Lexer</code> finds within the source string.
*/
public Lexer(final String source, final Consumer<? super DslError> errorReporter) {
this.source = source;
this.errorReporter = errorReporter;
}
/**
* Runs the <code>Lexer</code>.
* <p>
* This method can only be called once per instance.
*
* @return the list of <code>Token</code>s extracted from the source string.
* If any errors are reported, this list should not be considered to represent a valid set of DSL rules,
* but it can still be parsed to potentially find additional errors (which may allow the user to fix more
* errors before having to rerun the <code>Lexer</code>).
*/
public List<Token> lex() {
while (!atEnd()) {
startOfLexeme = curPosition;

View File

@ -17,7 +17,7 @@ import java.util.function.Function;
import static it.cavallium.warppi.math.rules.dsl.frontend.TokenType.*;
/**
* Converts a list of tokens to a list of <code>PatternRule</code>s.
* Converts a list of {@link Token}s to a list of {@link PatternRule}s.
*/
public class Parser {
private static final Map<TokenType, RuleType> RULE_TYPES = MapFactory.fromEntries(
@ -29,7 +29,7 @@ public class Parser {
private final List<Token> tokens;
private final Consumer<? super DslError> errorReporter;
private int current = 0;
private int currentIndex = 0;
// For error reporting
private Map<SubFunctionPattern, List<Token>> ruleSubFunctionIdentifiers;
@ -37,11 +37,29 @@ public class Parser {
private final IdentityHashMap<PatternRule, Map<SubFunctionPattern, List<Token>>> subFunctionIdentifiers =
new IdentityHashMap<>();
/**
* Constructs a <code>Parser</code> that will produce a list of {@link PatternRule}s from the the given list of {@link Token}s.
*
* @param tokens the list of <code>Token</code>s to process.
* @param errorReporter a <code>Consumer</code> used to report each <code>DslError</code> that the
* <code>Parser</code> finds within the source string.
*/
public Parser(final List<Token> tokens, final Consumer<? super DslError> errorReporter) {
this.tokens = tokens;
this.errorReporter = errorReporter;
}
/**
* Runs the <code>Parser</code>.
* <p>
* This method can only be called once per instance.
*
* @return the list of all valid <code>PatternRule</code>s constructed from the given tokens.
* If any errors are reported, this list should not be considered to represent a valid set of DSL rules,
* but each rule can still be analyzed to look for undefined sub-functions in replacement patterns and
* report them (which may allow the user to fix more errors before having to rerun the <code>Lexer</code>
* and <code>Parser</code>).
*/
public List<PatternRule> parse() {
return rules();
}
@ -264,42 +282,42 @@ public class Parser {
final Token matched = match(expectedType);
if (matched == null) {
throw new SyntaxException(
new UnexpectedToken(tokens.get(current), expectedType)
new UnexpectedToken(tokens.get(currentIndex), expectedType)
);
}
return matched;
}
private Token match(final TokenType expectedType) {
final Token curToken = tokens.get(current);
final Token curToken = tokens.get(currentIndex);
if (curToken.type != expectedType) {
return null;
}
current++;
currentIndex++;
return curToken;
}
private void synchronizeTo(final Set<TokenType> types) {
while (!atEnd() && !types.contains(tokens.get(current).type)) {
current++;
while (!atEnd() && !types.contains(tokens.get(currentIndex).type)) {
currentIndex++;
}
}
private Token pop() throws SyntaxException {
final Token curToken = tokens.get(current);
final Token curToken = tokens.get(currentIndex);
if (atEnd()) {
throw new SyntaxException(new UnexpectedToken(curToken)); // Avoid popping EOF
}
current++;
currentIndex++;
return curToken;
}
private Token peek() {
return tokens.get(current);
return tokens.get(currentIndex);
}
private boolean atEnd() {
return tokens.get(current).type == EOF;
return tokens.get(currentIndex).type == EOF;
}
@FunctionalInterface

View File

@ -2,14 +2,37 @@ package it.cavallium.warppi.math.rules.dsl.frontend;
import java.util.Objects;
/**
* Represents a single token extracted from DSL source code.
* <p>
* <code>Token</code>s are produced by the {@link Lexer} and consumed by the {@link Parser}.
*/
public class Token {
/** The type of the token. */
public final TokenType type;
/** The source string which corresponds to the token. */
/**
* The part of the source code which corresponds to the token.
* <p>
* Some types of token always have the same lexemes (for example, <code>PLUS</code> is always represented by
* <code>"+"</code>), while others have variable lexemes (like <code>IDENTIFIER</code>, which may correspond to any
* valid identifier).
* <p>
* As a special case, tokens of type <code>EOF</code> (which signal the end of the source code) have empty lexemes
* (<code>""</code>). Such tokens only exist to simplify the parser code, by allowing the end of the input to be
* treated like any other token (which is especially useful for error handling, because an unexpected end of input
* just becomes an "unexpected token" error).
*/
public final String lexeme;
/** The index at which the token starts in the source string. */
public final int position;
/**
* Constructs a <code>Token</code>.
*
* @param type the type of the token.
* @param lexeme the part of the source string which corresponds to the token.
* @param position the index at which the token starts in the source string.
*/
public Token(final TokenType type, final String lexeme, final int position) {
this.type = type;
this.lexeme = lexeme;

View File

@ -1,5 +1,8 @@
package it.cavallium.warppi.math.rules.dsl.frontend;
/**
* Specifies the type of a <code>Token</code>.
*/
public enum TokenType {
EOF,
// Separators and grouping