Improve DSL front-end documentation
This commit is contained in:
parent
e297d3592f
commit
c8656d1b30
@ -57,6 +57,4 @@ public class RulesDsl {
|
|||||||
undefined.removeAll(defined);
|
undefined.removeAll(defined);
|
||||||
return undefined;
|
return undefined;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -9,7 +9,7 @@ import java.util.function.Predicate;
|
|||||||
import static it.cavallium.warppi.math.rules.dsl.frontend.TokenType.*;
|
import static it.cavallium.warppi.math.rules.dsl.frontend.TokenType.*;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Converts the source string to a list of tokens.
|
* Converts the source <code>String</code> to a list of {@link Token}s.
|
||||||
*/
|
*/
|
||||||
public class Lexer {
|
public class Lexer {
|
||||||
private static final Map<String, TokenType> KEYWORDS;
|
private static final Map<String, TokenType> KEYWORDS;
|
||||||
@ -34,11 +34,28 @@ public class Lexer {
|
|||||||
private int curPosition = 0;
|
private int curPosition = 0;
|
||||||
private UnexpectedCharacters unexpectedCharacters = null;
|
private UnexpectedCharacters unexpectedCharacters = null;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructs a <code>Lexer</code> that will split the given source code into {@link Token}s.
|
||||||
|
*
|
||||||
|
* @param source a <code>String</code> containing the DSL source code to process.
|
||||||
|
* @param errorReporter a <code>Consumer</code> used to report each <code>DslError</code> that the
|
||||||
|
* <code>Lexer</code> finds within the source string.
|
||||||
|
*/
|
||||||
public Lexer(final String source, final Consumer<? super DslError> errorReporter) {
|
public Lexer(final String source, final Consumer<? super DslError> errorReporter) {
|
||||||
this.source = source;
|
this.source = source;
|
||||||
this.errorReporter = errorReporter;
|
this.errorReporter = errorReporter;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Runs the <code>Lexer</code>.
|
||||||
|
* <p>
|
||||||
|
* This method can only be called once per instance.
|
||||||
|
*
|
||||||
|
* @return the list of <code>Token</code>s extracted from the source string.
|
||||||
|
* If any errors are reported, this list should not be considered to represent a valid set of DSL rules,
|
||||||
|
* but it can still be parsed to potentially find additional errors (which may allow the user to fix more
|
||||||
|
* errors before having to rerun the <code>Lexer</code>).
|
||||||
|
*/
|
||||||
public List<Token> lex() {
|
public List<Token> lex() {
|
||||||
while (!atEnd()) {
|
while (!atEnd()) {
|
||||||
startOfLexeme = curPosition;
|
startOfLexeme = curPosition;
|
||||||
|
@ -17,7 +17,7 @@ import java.util.function.Function;
|
|||||||
import static it.cavallium.warppi.math.rules.dsl.frontend.TokenType.*;
|
import static it.cavallium.warppi.math.rules.dsl.frontend.TokenType.*;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Converts a list of tokens to a list of <code>PatternRule</code>s.
|
* Converts a list of {@link Token}s to a list of {@link PatternRule}s.
|
||||||
*/
|
*/
|
||||||
public class Parser {
|
public class Parser {
|
||||||
private static final Map<TokenType, RuleType> RULE_TYPES = MapFactory.fromEntries(
|
private static final Map<TokenType, RuleType> RULE_TYPES = MapFactory.fromEntries(
|
||||||
@ -29,7 +29,7 @@ public class Parser {
|
|||||||
|
|
||||||
private final List<Token> tokens;
|
private final List<Token> tokens;
|
||||||
private final Consumer<? super DslError> errorReporter;
|
private final Consumer<? super DslError> errorReporter;
|
||||||
private int current = 0;
|
private int currentIndex = 0;
|
||||||
|
|
||||||
// For error reporting
|
// For error reporting
|
||||||
private Map<SubFunctionPattern, List<Token>> ruleSubFunctionIdentifiers;
|
private Map<SubFunctionPattern, List<Token>> ruleSubFunctionIdentifiers;
|
||||||
@ -37,11 +37,29 @@ public class Parser {
|
|||||||
private final IdentityHashMap<PatternRule, Map<SubFunctionPattern, List<Token>>> subFunctionIdentifiers =
|
private final IdentityHashMap<PatternRule, Map<SubFunctionPattern, List<Token>>> subFunctionIdentifiers =
|
||||||
new IdentityHashMap<>();
|
new IdentityHashMap<>();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructs a <code>Parser</code> that will produce a list of {@link PatternRule}s from the the given list of {@link Token}s.
|
||||||
|
*
|
||||||
|
* @param tokens the list of <code>Token</code>s to process.
|
||||||
|
* @param errorReporter a <code>Consumer</code> used to report each <code>DslError</code> that the
|
||||||
|
* <code>Parser</code> finds within the source string.
|
||||||
|
*/
|
||||||
public Parser(final List<Token> tokens, final Consumer<? super DslError> errorReporter) {
|
public Parser(final List<Token> tokens, final Consumer<? super DslError> errorReporter) {
|
||||||
this.tokens = tokens;
|
this.tokens = tokens;
|
||||||
this.errorReporter = errorReporter;
|
this.errorReporter = errorReporter;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Runs the <code>Parser</code>.
|
||||||
|
* <p>
|
||||||
|
* This method can only be called once per instance.
|
||||||
|
*
|
||||||
|
* @return the list of all valid <code>PatternRule</code>s constructed from the given tokens.
|
||||||
|
* If any errors are reported, this list should not be considered to represent a valid set of DSL rules,
|
||||||
|
* but each rule can still be analyzed to look for undefined sub-functions in replacement patterns and
|
||||||
|
* report them (which may allow the user to fix more errors before having to rerun the <code>Lexer</code>
|
||||||
|
* and <code>Parser</code>).
|
||||||
|
*/
|
||||||
public List<PatternRule> parse() {
|
public List<PatternRule> parse() {
|
||||||
return rules();
|
return rules();
|
||||||
}
|
}
|
||||||
@ -264,42 +282,42 @@ public class Parser {
|
|||||||
final Token matched = match(expectedType);
|
final Token matched = match(expectedType);
|
||||||
if (matched == null) {
|
if (matched == null) {
|
||||||
throw new SyntaxException(
|
throw new SyntaxException(
|
||||||
new UnexpectedToken(tokens.get(current), expectedType)
|
new UnexpectedToken(tokens.get(currentIndex), expectedType)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
return matched;
|
return matched;
|
||||||
}
|
}
|
||||||
|
|
||||||
private Token match(final TokenType expectedType) {
|
private Token match(final TokenType expectedType) {
|
||||||
final Token curToken = tokens.get(current);
|
final Token curToken = tokens.get(currentIndex);
|
||||||
if (curToken.type != expectedType) {
|
if (curToken.type != expectedType) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
current++;
|
currentIndex++;
|
||||||
return curToken;
|
return curToken;
|
||||||
}
|
}
|
||||||
|
|
||||||
private void synchronizeTo(final Set<TokenType> types) {
|
private void synchronizeTo(final Set<TokenType> types) {
|
||||||
while (!atEnd() && !types.contains(tokens.get(current).type)) {
|
while (!atEnd() && !types.contains(tokens.get(currentIndex).type)) {
|
||||||
current++;
|
currentIndex++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private Token pop() throws SyntaxException {
|
private Token pop() throws SyntaxException {
|
||||||
final Token curToken = tokens.get(current);
|
final Token curToken = tokens.get(currentIndex);
|
||||||
if (atEnd()) {
|
if (atEnd()) {
|
||||||
throw new SyntaxException(new UnexpectedToken(curToken)); // Avoid popping EOF
|
throw new SyntaxException(new UnexpectedToken(curToken)); // Avoid popping EOF
|
||||||
}
|
}
|
||||||
current++;
|
currentIndex++;
|
||||||
return curToken;
|
return curToken;
|
||||||
}
|
}
|
||||||
|
|
||||||
private Token peek() {
|
private Token peek() {
|
||||||
return tokens.get(current);
|
return tokens.get(currentIndex);
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean atEnd() {
|
private boolean atEnd() {
|
||||||
return tokens.get(current).type == EOF;
|
return tokens.get(currentIndex).type == EOF;
|
||||||
}
|
}
|
||||||
|
|
||||||
@FunctionalInterface
|
@FunctionalInterface
|
||||||
|
@ -2,14 +2,37 @@ package it.cavallium.warppi.math.rules.dsl.frontend;
|
|||||||
|
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Represents a single token extracted from DSL source code.
|
||||||
|
* <p>
|
||||||
|
* <code>Token</code>s are produced by the {@link Lexer} and consumed by the {@link Parser}.
|
||||||
|
*/
|
||||||
public class Token {
|
public class Token {
|
||||||
/** The type of the token. */
|
/** The type of the token. */
|
||||||
public final TokenType type;
|
public final TokenType type;
|
||||||
/** The source string which corresponds to the token. */
|
/**
|
||||||
|
* The part of the source code which corresponds to the token.
|
||||||
|
* <p>
|
||||||
|
* Some types of token always have the same lexemes (for example, <code>PLUS</code> is always represented by
|
||||||
|
* <code>"+"</code>), while others have variable lexemes (like <code>IDENTIFIER</code>, which may correspond to any
|
||||||
|
* valid identifier).
|
||||||
|
* <p>
|
||||||
|
* As a special case, tokens of type <code>EOF</code> (which signal the end of the source code) have empty lexemes
|
||||||
|
* (<code>""</code>). Such tokens only exist to simplify the parser code, by allowing the end of the input to be
|
||||||
|
* treated like any other token (which is especially useful for error handling, because an unexpected end of input
|
||||||
|
* just becomes an "unexpected token" error).
|
||||||
|
*/
|
||||||
public final String lexeme;
|
public final String lexeme;
|
||||||
/** The index at which the token starts in the source string. */
|
/** The index at which the token starts in the source string. */
|
||||||
public final int position;
|
public final int position;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructs a <code>Token</code>.
|
||||||
|
*
|
||||||
|
* @param type the type of the token.
|
||||||
|
* @param lexeme the part of the source string which corresponds to the token.
|
||||||
|
* @param position the index at which the token starts in the source string.
|
||||||
|
*/
|
||||||
public Token(final TokenType type, final String lexeme, final int position) {
|
public Token(final TokenType type, final String lexeme, final int position) {
|
||||||
this.type = type;
|
this.type = type;
|
||||||
this.lexeme = lexeme;
|
this.lexeme = lexeme;
|
||||||
|
@ -1,5 +1,8 @@
|
|||||||
package it.cavallium.warppi.math.rules.dsl.frontend;
|
package it.cavallium.warppi.math.rules.dsl.frontend;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Specifies the type of a <code>Token</code>.
|
||||||
|
*/
|
||||||
public enum TokenType {
|
public enum TokenType {
|
||||||
EOF,
|
EOF,
|
||||||
// Separators and grouping
|
// Separators and grouping
|
||||||
|
Loading…
Reference in New Issue
Block a user