Improve DSL front-end documentation
This commit is contained in:
parent
e297d3592f
commit
c8656d1b30
@ -57,6 +57,4 @@ public class RulesDsl {
|
||||
undefined.removeAll(defined);
|
||||
return undefined;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
@ -9,7 +9,7 @@ import java.util.function.Predicate;
|
||||
import static it.cavallium.warppi.math.rules.dsl.frontend.TokenType.*;
|
||||
|
||||
/**
|
||||
* Converts the source string to a list of tokens.
|
||||
* Converts the source <code>String</code> to a list of {@link Token}s.
|
||||
*/
|
||||
public class Lexer {
|
||||
private static final Map<String, TokenType> KEYWORDS;
|
||||
@ -34,11 +34,28 @@ public class Lexer {
|
||||
private int curPosition = 0;
|
||||
private UnexpectedCharacters unexpectedCharacters = null;
|
||||
|
||||
/**
|
||||
* Constructs a <code>Lexer</code> that will split the given source code into {@link Token}s.
|
||||
*
|
||||
* @param source a <code>String</code> containing the DSL source code to process.
|
||||
* @param errorReporter a <code>Consumer</code> used to report each <code>DslError</code> that the
|
||||
* <code>Lexer</code> finds within the source string.
|
||||
*/
|
||||
public Lexer(final String source, final Consumer<? super DslError> errorReporter) {
|
||||
this.source = source;
|
||||
this.errorReporter = errorReporter;
|
||||
}
|
||||
|
||||
/**
|
||||
* Runs the <code>Lexer</code>.
|
||||
* <p>
|
||||
* This method can only be called once per instance.
|
||||
*
|
||||
* @return the list of <code>Token</code>s extracted from the source string.
|
||||
* If any errors are reported, this list should not be considered to represent a valid set of DSL rules,
|
||||
* but it can still be parsed to potentially find additional errors (which may allow the user to fix more
|
||||
* errors before having to rerun the <code>Lexer</code>).
|
||||
*/
|
||||
public List<Token> lex() {
|
||||
while (!atEnd()) {
|
||||
startOfLexeme = curPosition;
|
||||
|
@ -17,7 +17,7 @@ import java.util.function.Function;
|
||||
import static it.cavallium.warppi.math.rules.dsl.frontend.TokenType.*;
|
||||
|
||||
/**
|
||||
* Converts a list of tokens to a list of <code>PatternRule</code>s.
|
||||
* Converts a list of {@link Token}s to a list of {@link PatternRule}s.
|
||||
*/
|
||||
public class Parser {
|
||||
private static final Map<TokenType, RuleType> RULE_TYPES = MapFactory.fromEntries(
|
||||
@ -29,7 +29,7 @@ public class Parser {
|
||||
|
||||
private final List<Token> tokens;
|
||||
private final Consumer<? super DslError> errorReporter;
|
||||
private int current = 0;
|
||||
private int currentIndex = 0;
|
||||
|
||||
// For error reporting
|
||||
private Map<SubFunctionPattern, List<Token>> ruleSubFunctionIdentifiers;
|
||||
@ -37,11 +37,29 @@ public class Parser {
|
||||
private final IdentityHashMap<PatternRule, Map<SubFunctionPattern, List<Token>>> subFunctionIdentifiers =
|
||||
new IdentityHashMap<>();
|
||||
|
||||
/**
|
||||
* Constructs a <code>Parser</code> that will produce a list of {@link PatternRule}s from the the given list of {@link Token}s.
|
||||
*
|
||||
* @param tokens the list of <code>Token</code>s to process.
|
||||
* @param errorReporter a <code>Consumer</code> used to report each <code>DslError</code> that the
|
||||
* <code>Parser</code> finds within the source string.
|
||||
*/
|
||||
public Parser(final List<Token> tokens, final Consumer<? super DslError> errorReporter) {
|
||||
this.tokens = tokens;
|
||||
this.errorReporter = errorReporter;
|
||||
}
|
||||
|
||||
/**
|
||||
* Runs the <code>Parser</code>.
|
||||
* <p>
|
||||
* This method can only be called once per instance.
|
||||
*
|
||||
* @return the list of all valid <code>PatternRule</code>s constructed from the given tokens.
|
||||
* If any errors are reported, this list should not be considered to represent a valid set of DSL rules,
|
||||
* but each rule can still be analyzed to look for undefined sub-functions in replacement patterns and
|
||||
* report them (which may allow the user to fix more errors before having to rerun the <code>Lexer</code>
|
||||
* and <code>Parser</code>).
|
||||
*/
|
||||
public List<PatternRule> parse() {
|
||||
return rules();
|
||||
}
|
||||
@ -264,42 +282,42 @@ public class Parser {
|
||||
final Token matched = match(expectedType);
|
||||
if (matched == null) {
|
||||
throw new SyntaxException(
|
||||
new UnexpectedToken(tokens.get(current), expectedType)
|
||||
new UnexpectedToken(tokens.get(currentIndex), expectedType)
|
||||
);
|
||||
}
|
||||
return matched;
|
||||
}
|
||||
|
||||
private Token match(final TokenType expectedType) {
|
||||
final Token curToken = tokens.get(current);
|
||||
final Token curToken = tokens.get(currentIndex);
|
||||
if (curToken.type != expectedType) {
|
||||
return null;
|
||||
}
|
||||
current++;
|
||||
currentIndex++;
|
||||
return curToken;
|
||||
}
|
||||
|
||||
private void synchronizeTo(final Set<TokenType> types) {
|
||||
while (!atEnd() && !types.contains(tokens.get(current).type)) {
|
||||
current++;
|
||||
while (!atEnd() && !types.contains(tokens.get(currentIndex).type)) {
|
||||
currentIndex++;
|
||||
}
|
||||
}
|
||||
|
||||
private Token pop() throws SyntaxException {
|
||||
final Token curToken = tokens.get(current);
|
||||
final Token curToken = tokens.get(currentIndex);
|
||||
if (atEnd()) {
|
||||
throw new SyntaxException(new UnexpectedToken(curToken)); // Avoid popping EOF
|
||||
}
|
||||
current++;
|
||||
currentIndex++;
|
||||
return curToken;
|
||||
}
|
||||
|
||||
private Token peek() {
|
||||
return tokens.get(current);
|
||||
return tokens.get(currentIndex);
|
||||
}
|
||||
|
||||
private boolean atEnd() {
|
||||
return tokens.get(current).type == EOF;
|
||||
return tokens.get(currentIndex).type == EOF;
|
||||
}
|
||||
|
||||
@FunctionalInterface
|
||||
|
@ -2,14 +2,37 @@ package it.cavallium.warppi.math.rules.dsl.frontend;
|
||||
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* Represents a single token extracted from DSL source code.
|
||||
* <p>
|
||||
* <code>Token</code>s are produced by the {@link Lexer} and consumed by the {@link Parser}.
|
||||
*/
|
||||
public class Token {
|
||||
/** The type of the token. */
|
||||
public final TokenType type;
|
||||
/** The source string which corresponds to the token. */
|
||||
/**
|
||||
* The part of the source code which corresponds to the token.
|
||||
* <p>
|
||||
* Some types of token always have the same lexemes (for example, <code>PLUS</code> is always represented by
|
||||
* <code>"+"</code>), while others have variable lexemes (like <code>IDENTIFIER</code>, which may correspond to any
|
||||
* valid identifier).
|
||||
* <p>
|
||||
* As a special case, tokens of type <code>EOF</code> (which signal the end of the source code) have empty lexemes
|
||||
* (<code>""</code>). Such tokens only exist to simplify the parser code, by allowing the end of the input to be
|
||||
* treated like any other token (which is especially useful for error handling, because an unexpected end of input
|
||||
* just becomes an "unexpected token" error).
|
||||
*/
|
||||
public final String lexeme;
|
||||
/** The index at which the token starts in the source string. */
|
||||
public final int position;
|
||||
|
||||
/**
|
||||
* Constructs a <code>Token</code>.
|
||||
*
|
||||
* @param type the type of the token.
|
||||
* @param lexeme the part of the source string which corresponds to the token.
|
||||
* @param position the index at which the token starts in the source string.
|
||||
*/
|
||||
public Token(final TokenType type, final String lexeme, final int position) {
|
||||
this.type = type;
|
||||
this.lexeme = lexeme;
|
||||
|
@ -1,5 +1,8 @@
|
||||
package it.cavallium.warppi.math.rules.dsl.frontend;
|
||||
|
||||
/**
|
||||
* Specifies the type of a <code>Token</code>.
|
||||
*/
|
||||
public enum TokenType {
|
||||
EOF,
|
||||
// Separators and grouping
|
||||
|
Loading…
Reference in New Issue
Block a user