370 lines
18 KiB
Java
370 lines
18 KiB
Java
package it.cavallium.dbengine.client.query;
|
|
|
|
import it.cavallium.dbengine.client.query.current.data.BooleanQueryPart;
|
|
import it.cavallium.dbengine.client.query.current.data.BoostQuery;
|
|
import it.cavallium.dbengine.client.query.current.data.BoxedQuery;
|
|
import it.cavallium.dbengine.client.query.current.data.ConstantScoreQuery;
|
|
import it.cavallium.dbengine.client.query.current.data.DoubleNDPointExactQuery;
|
|
import it.cavallium.dbengine.client.query.current.data.DoubleNDPointRangeQuery;
|
|
import it.cavallium.dbengine.client.query.current.data.DoubleNDTermQuery;
|
|
import it.cavallium.dbengine.client.query.current.data.DoublePointExactQuery;
|
|
import it.cavallium.dbengine.client.query.current.data.DoublePointRangeQuery;
|
|
import it.cavallium.dbengine.client.query.current.data.DoublePointSetQuery;
|
|
import it.cavallium.dbengine.client.query.current.data.DoubleTermQuery;
|
|
import it.cavallium.dbengine.client.query.current.data.FieldExistsQuery;
|
|
import it.cavallium.dbengine.client.query.current.data.FloatNDPointExactQuery;
|
|
import it.cavallium.dbengine.client.query.current.data.FloatNDPointRangeQuery;
|
|
import it.cavallium.dbengine.client.query.current.data.FloatNDTermQuery;
|
|
import it.cavallium.dbengine.client.query.current.data.FloatPointExactQuery;
|
|
import it.cavallium.dbengine.client.query.current.data.FloatPointRangeQuery;
|
|
import it.cavallium.dbengine.client.query.current.data.FloatPointSetQuery;
|
|
import it.cavallium.dbengine.client.query.current.data.FloatTermQuery;
|
|
import it.cavallium.dbengine.client.query.current.data.IntNDPointExactQuery;
|
|
import it.cavallium.dbengine.client.query.current.data.IntNDPointRangeQuery;
|
|
import it.cavallium.dbengine.client.query.current.data.IntNDTermQuery;
|
|
import it.cavallium.dbengine.client.query.current.data.IntPointExactQuery;
|
|
import it.cavallium.dbengine.client.query.current.data.IntPointRangeQuery;
|
|
import it.cavallium.dbengine.client.query.current.data.IntPointSetQuery;
|
|
import it.cavallium.dbengine.client.query.current.data.IntTermQuery;
|
|
import it.cavallium.dbengine.client.query.current.data.LongNDPointExactQuery;
|
|
import it.cavallium.dbengine.client.query.current.data.LongNDPointRangeQuery;
|
|
import it.cavallium.dbengine.client.query.current.data.LongNDTermQuery;
|
|
import it.cavallium.dbengine.client.query.current.data.LongPointExactQuery;
|
|
import it.cavallium.dbengine.client.query.current.data.LongPointRangeQuery;
|
|
import it.cavallium.dbengine.client.query.current.data.LongPointSetQuery;
|
|
import it.cavallium.dbengine.client.query.current.data.LongTermQuery;
|
|
import it.cavallium.dbengine.client.query.current.data.NumericSort;
|
|
import it.cavallium.dbengine.client.query.current.data.PhraseQuery;
|
|
import it.cavallium.dbengine.client.query.current.data.PointConfig;
|
|
import it.cavallium.dbengine.client.query.current.data.PointType;
|
|
import it.cavallium.dbengine.client.query.current.data.SortedDocFieldExistsQuery;
|
|
import it.cavallium.dbengine.client.query.current.data.SortedNumericDocValuesFieldSlowRangeQuery;
|
|
import it.cavallium.dbengine.client.query.current.data.SynonymQuery;
|
|
import it.cavallium.dbengine.client.query.current.data.TermAndBoost;
|
|
import it.cavallium.dbengine.client.query.current.data.TermPosition;
|
|
import it.cavallium.dbengine.client.query.current.data.TermQuery;
|
|
import it.cavallium.dbengine.client.query.current.data.WildcardQuery;
|
|
import it.cavallium.dbengine.lucene.RandomSortField;
|
|
import java.text.DecimalFormat;
|
|
import java.text.NumberFormat;
|
|
import java.util.Map;
|
|
import java.util.function.Function;
|
|
import java.util.stream.Collectors;
|
|
import org.apache.lucene.analysis.Analyzer;
|
|
import org.apache.lucene.analysis.LowerCaseFilter;
|
|
import org.apache.lucene.analysis.StopFilter;
|
|
import org.apache.lucene.analysis.TokenStream;
|
|
import org.apache.lucene.analysis.Tokenizer;
|
|
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
|
import org.apache.lucene.analysis.en.EnglishPossessiveFilter;
|
|
import org.apache.lucene.analysis.en.PorterStemFilter;
|
|
import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;
|
|
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
|
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
|
import org.apache.lucene.document.DoublePoint;
|
|
import org.apache.lucene.document.FloatPoint;
|
|
import org.apache.lucene.document.IntPoint;
|
|
import org.apache.lucene.document.LongPoint;
|
|
import org.apache.lucene.document.SortedNumericDocValuesField;
|
|
import org.apache.lucene.index.Term;
|
|
import org.apache.lucene.queryparser.flexible.core.QueryNodeException;
|
|
import org.apache.lucene.queryparser.flexible.standard.StandardQueryParser;
|
|
import org.apache.lucene.queryparser.flexible.standard.config.PointsConfig;
|
|
import org.apache.lucene.search.BooleanClause.Occur;
|
|
import org.apache.lucene.search.BooleanQuery.Builder;
|
|
import org.apache.lucene.search.DocValuesFieldExistsQuery;
|
|
import org.apache.lucene.search.FuzzyQuery;
|
|
import org.apache.lucene.search.MatchAllDocsQuery;
|
|
import org.apache.lucene.search.MatchNoDocsQuery;
|
|
import org.apache.lucene.search.Query;
|
|
import org.apache.lucene.search.Sort;
|
|
import org.apache.lucene.search.SortField;
|
|
import org.apache.lucene.search.SortField.Type;
|
|
import org.apache.lucene.search.SortedNumericSortField;
|
|
|
|
public class QueryParser {
|
|
|
|
public static Query toQuery(it.cavallium.dbengine.client.query.current.data.Query query, Analyzer analyzer) {
|
|
if (query == null) {
|
|
return null;
|
|
}
|
|
switch (query.getBasicType$()) {
|
|
case StandardQuery:
|
|
var standardQuery = (it.cavallium.dbengine.client.query.current.data.StandardQuery) query;
|
|
|
|
// Fix the analyzer
|
|
Map<String, Analyzer> customAnalyzers = standardQuery
|
|
.termFields()
|
|
.stream()
|
|
.collect(Collectors.toMap(Function.identity(), term -> new NoOpAnalyzer()));
|
|
analyzer = new PerFieldAnalyzerWrapper(analyzer, customAnalyzers);
|
|
|
|
var standardQueryParser = new StandardQueryParser(analyzer);
|
|
|
|
standardQueryParser.setPointsConfigMap(standardQuery
|
|
.pointsConfig()
|
|
.stream()
|
|
.collect(Collectors.toMap(
|
|
PointConfig::field,
|
|
pointConfig -> new PointsConfig(
|
|
toNumberFormat(pointConfig.data().numberFormat()),
|
|
toType(pointConfig.data().type())
|
|
)
|
|
)));
|
|
var defaultFields = standardQuery.defaultFields();
|
|
try {
|
|
Query parsed;
|
|
if (defaultFields.size() > 1) {
|
|
standardQueryParser.setMultiFields(defaultFields.toArray(String[]::new));
|
|
parsed = standardQueryParser.parse(standardQuery.query(), null);
|
|
} else if (defaultFields.size() == 1) {
|
|
parsed = standardQueryParser.parse(standardQuery.query(), defaultFields.get(0));
|
|
} else {
|
|
throw new IllegalStateException("Can't parse a standard query expression that has 0 default fields");
|
|
}
|
|
return parsed;
|
|
} catch (QueryNodeException e) {
|
|
throw new IllegalStateException("Can't parse query expression \"" + standardQuery.query() + "\"", e);
|
|
}
|
|
case BooleanQuery:
|
|
var booleanQuery = (it.cavallium.dbengine.client.query.current.data.BooleanQuery) query;
|
|
var bq = new Builder();
|
|
for (BooleanQueryPart part : booleanQuery.parts()) {
|
|
Occur occur = switch (part.occur().getBasicType$()) {
|
|
case OccurFilter -> Occur.FILTER;
|
|
case OccurMust -> Occur.MUST;
|
|
case OccurShould -> Occur.SHOULD;
|
|
case OccurMustNot -> Occur.MUST_NOT;
|
|
default -> throw new IllegalStateException("Unexpected value: " + part.occur().getBasicType$());
|
|
};
|
|
bq.add(toQuery(part.query(), analyzer), occur);
|
|
}
|
|
bq.setMinimumNumberShouldMatch(booleanQuery.minShouldMatch());
|
|
return bq.build();
|
|
case IntPointExactQuery:
|
|
var intPointExactQuery = (IntPointExactQuery) query;
|
|
return IntPoint.newExactQuery(intPointExactQuery.field(), intPointExactQuery.value());
|
|
case IntNDPointExactQuery:
|
|
var intndPointExactQuery = (IntNDPointExactQuery) query;
|
|
var intndValues = intndPointExactQuery.value().toIntArray();
|
|
return IntPoint.newRangeQuery(intndPointExactQuery.field(), intndValues, intndValues);
|
|
case LongPointExactQuery:
|
|
var longPointExactQuery = (LongPointExactQuery) query;
|
|
return LongPoint.newExactQuery(longPointExactQuery.field(), longPointExactQuery.value());
|
|
case FloatPointExactQuery:
|
|
var floatPointExactQuery = (FloatPointExactQuery) query;
|
|
return FloatPoint.newExactQuery(floatPointExactQuery.field(), floatPointExactQuery.value());
|
|
case DoublePointExactQuery:
|
|
var doublePointExactQuery = (DoublePointExactQuery) query;
|
|
return DoublePoint.newExactQuery(doublePointExactQuery.field(), doublePointExactQuery.value());
|
|
case LongNDPointExactQuery:
|
|
var longndPointExactQuery = (LongNDPointExactQuery) query;
|
|
var longndValues = longndPointExactQuery.value().toLongArray();
|
|
return LongPoint.newRangeQuery(longndPointExactQuery.field(), longndValues, longndValues);
|
|
case FloatNDPointExactQuery:
|
|
var floatndPointExactQuery = (FloatNDPointExactQuery) query;
|
|
var floatndValues = floatndPointExactQuery.value().toFloatArray();
|
|
return FloatPoint.newRangeQuery(floatndPointExactQuery.field(), floatndValues, floatndValues);
|
|
case DoubleNDPointExactQuery:
|
|
var doublendPointExactQuery = (DoubleNDPointExactQuery) query;
|
|
var doublendValues = doublendPointExactQuery.value().toDoubleArray();
|
|
return DoublePoint.newRangeQuery(doublendPointExactQuery.field(), doublendValues, doublendValues);
|
|
case IntPointSetQuery:
|
|
var intPointSetQuery = (IntPointSetQuery) query;
|
|
return IntPoint.newSetQuery(intPointSetQuery.field(), intPointSetQuery.values().toIntArray());
|
|
case LongPointSetQuery:
|
|
var longPointSetQuery = (LongPointSetQuery) query;
|
|
return LongPoint.newSetQuery(longPointSetQuery.field(), longPointSetQuery.values().toLongArray());
|
|
case FloatPointSetQuery:
|
|
var floatPointSetQuery = (FloatPointSetQuery) query;
|
|
return FloatPoint.newSetQuery(floatPointSetQuery.field(), floatPointSetQuery.values().toFloatArray());
|
|
case DoublePointSetQuery:
|
|
var doublePointSetQuery = (DoublePointSetQuery) query;
|
|
return DoublePoint.newSetQuery(doublePointSetQuery.field(), doublePointSetQuery.values().toDoubleArray());
|
|
case TermQuery:
|
|
var termQuery = (TermQuery) query;
|
|
return new org.apache.lucene.search.TermQuery(toTerm(termQuery.term()));
|
|
case IntTermQuery:
|
|
var intTermQuery = (IntTermQuery) query;
|
|
return new org.apache.lucene.search.TermQuery(new Term(intTermQuery.field(),
|
|
IntPoint.pack(intTermQuery.value())
|
|
));
|
|
case IntNDTermQuery:
|
|
var intNDTermQuery = (IntNDTermQuery) query;
|
|
return new org.apache.lucene.search.TermQuery(new Term(intNDTermQuery.field(),
|
|
IntPoint.pack(intNDTermQuery.value().toIntArray())
|
|
));
|
|
case LongTermQuery:
|
|
var longTermQuery = (LongTermQuery) query;
|
|
return new org.apache.lucene.search.TermQuery(new Term(longTermQuery.field(),
|
|
LongPoint.pack(longTermQuery.value())
|
|
));
|
|
case LongNDTermQuery:
|
|
var longNDTermQuery = (LongNDTermQuery) query;
|
|
return new org.apache.lucene.search.TermQuery(new Term(longNDTermQuery.field(),
|
|
LongPoint.pack(longNDTermQuery.value().toLongArray())
|
|
));
|
|
case FloatTermQuery:
|
|
var floatTermQuery = (FloatTermQuery) query;
|
|
return new org.apache.lucene.search.TermQuery(new Term(floatTermQuery.field(),
|
|
FloatPoint.pack(floatTermQuery.value())
|
|
));
|
|
case FloatNDTermQuery:
|
|
var floatNDTermQuery = (FloatNDTermQuery) query;
|
|
return new org.apache.lucene.search.TermQuery(new Term(floatNDTermQuery.field(),
|
|
FloatPoint.pack(floatNDTermQuery.value().toFloatArray())
|
|
));
|
|
case DoubleTermQuery:
|
|
var doubleTermQuery = (DoubleTermQuery) query;
|
|
return new org.apache.lucene.search.TermQuery(new Term(doubleTermQuery.field(),
|
|
DoublePoint.pack(doubleTermQuery.value())
|
|
));
|
|
case DoubleNDTermQuery:
|
|
var doubleNDTermQuery = (DoubleNDTermQuery) query;
|
|
return new org.apache.lucene.search.TermQuery(new Term(doubleNDTermQuery.field(),
|
|
DoublePoint.pack(doubleNDTermQuery.value().toDoubleArray())
|
|
));
|
|
case FieldExistsQuery:
|
|
var fieldExistQuery = (FieldExistsQuery) query;
|
|
return new org.apache.lucene.search.FieldExistsQuery(fieldExistQuery.field());
|
|
case BoostQuery:
|
|
var boostQuery = (BoostQuery) query;
|
|
return new org.apache.lucene.search.BoostQuery(toQuery(boostQuery.query(), analyzer), boostQuery.scoreBoost());
|
|
case ConstantScoreQuery:
|
|
var constantScoreQuery = (ConstantScoreQuery) query;
|
|
return new org.apache.lucene.search.ConstantScoreQuery(toQuery(constantScoreQuery.query(), analyzer));
|
|
case BoxedQuery:
|
|
return toQuery(((BoxedQuery) query).query(), analyzer);
|
|
case FuzzyQuery:
|
|
var fuzzyQuery = (it.cavallium.dbengine.client.query.current.data.FuzzyQuery) query;
|
|
return new FuzzyQuery(toTerm(fuzzyQuery.term()),
|
|
fuzzyQuery.maxEdits(),
|
|
fuzzyQuery.prefixLength(),
|
|
fuzzyQuery.maxExpansions(),
|
|
fuzzyQuery.transpositions()
|
|
);
|
|
case IntPointRangeQuery:
|
|
var intPointRangeQuery = (IntPointRangeQuery) query;
|
|
return IntPoint.newRangeQuery(intPointRangeQuery.field(), intPointRangeQuery.min(), intPointRangeQuery.max());
|
|
case IntNDPointRangeQuery:
|
|
var intndPointRangeQuery = (IntNDPointRangeQuery) query;
|
|
return IntPoint.newRangeQuery(intndPointRangeQuery.field(),
|
|
intndPointRangeQuery.min().toIntArray(),
|
|
intndPointRangeQuery.max().toIntArray()
|
|
);
|
|
case LongPointRangeQuery:
|
|
var longPointRangeQuery = (LongPointRangeQuery) query;
|
|
return LongPoint.newRangeQuery(longPointRangeQuery.field(),
|
|
longPointRangeQuery.min(),
|
|
longPointRangeQuery.max()
|
|
);
|
|
case FloatPointRangeQuery:
|
|
var floatPointRangeQuery = (FloatPointRangeQuery) query;
|
|
return FloatPoint.newRangeQuery(floatPointRangeQuery.field(),
|
|
floatPointRangeQuery.min(),
|
|
floatPointRangeQuery.max()
|
|
);
|
|
case DoublePointRangeQuery:
|
|
var doublePointRangeQuery = (DoublePointRangeQuery) query;
|
|
return DoublePoint.newRangeQuery(doublePointRangeQuery.field(),
|
|
doublePointRangeQuery.min(),
|
|
doublePointRangeQuery.max()
|
|
);
|
|
case LongNDPointRangeQuery:
|
|
var longndPointRangeQuery = (LongNDPointRangeQuery) query;
|
|
return LongPoint.newRangeQuery(longndPointRangeQuery.field(),
|
|
longndPointRangeQuery.min().toLongArray(),
|
|
longndPointRangeQuery.max().toLongArray()
|
|
);
|
|
case FloatNDPointRangeQuery:
|
|
var floatndPointRangeQuery = (FloatNDPointRangeQuery) query;
|
|
return FloatPoint.newRangeQuery(floatndPointRangeQuery.field(),
|
|
floatndPointRangeQuery.min().toFloatArray(),
|
|
floatndPointRangeQuery.max().toFloatArray()
|
|
);
|
|
case DoubleNDPointRangeQuery:
|
|
var doublendPointRangeQuery = (DoubleNDPointRangeQuery) query;
|
|
return DoublePoint.newRangeQuery(doublendPointRangeQuery.field(),
|
|
doublendPointRangeQuery.min().toDoubleArray(),
|
|
doublendPointRangeQuery.max().toDoubleArray()
|
|
);
|
|
case MatchAllDocsQuery:
|
|
return new MatchAllDocsQuery();
|
|
case MatchNoDocsQuery:
|
|
return new MatchNoDocsQuery();
|
|
case PhraseQuery:
|
|
var phraseQuery = (PhraseQuery) query;
|
|
var pqb = new org.apache.lucene.search.PhraseQuery.Builder();
|
|
for (TermPosition phrase : phraseQuery.phrase()) {
|
|
pqb.add(toTerm(phrase.term()), phrase.position());
|
|
}
|
|
pqb.setSlop(phraseQuery.slop());
|
|
return pqb.build();
|
|
case SortedDocFieldExistsQuery:
|
|
var sortedDocFieldExistsQuery = (SortedDocFieldExistsQuery) query;
|
|
return new DocValuesFieldExistsQuery(sortedDocFieldExistsQuery.field());
|
|
case SynonymQuery:
|
|
var synonymQuery = (SynonymQuery) query;
|
|
var sqb = new org.apache.lucene.search.SynonymQuery.Builder(synonymQuery.field());
|
|
for (TermAndBoost part : synonymQuery.parts()) {
|
|
sqb.addTerm(toTerm(part.term()), part.boost());
|
|
}
|
|
return sqb.build();
|
|
case SortedNumericDocValuesFieldSlowRangeQuery:
|
|
var sortedNumericDocValuesFieldSlowRangeQuery = (SortedNumericDocValuesFieldSlowRangeQuery) query;
|
|
return SortedNumericDocValuesField.newSlowRangeQuery(sortedNumericDocValuesFieldSlowRangeQuery.field(),
|
|
sortedNumericDocValuesFieldSlowRangeQuery.min(),
|
|
sortedNumericDocValuesFieldSlowRangeQuery.max()
|
|
);
|
|
case WildcardQuery:
|
|
var wildcardQuery = (WildcardQuery) query;
|
|
return new org.apache.lucene.search.WildcardQuery(new Term(wildcardQuery.field(), wildcardQuery.pattern()));
|
|
default:
|
|
throw new IllegalStateException("Unexpected value: " + query.getBasicType$());
|
|
}
|
|
}
|
|
|
|
private static NumberFormat toNumberFormat(it.cavallium.dbengine.client.query.current.data.NumberFormat numberFormat) {
|
|
return switch (numberFormat.getBasicType$()) {
|
|
case NumberFormatDecimal -> new DecimalFormat();
|
|
default -> throw new UnsupportedOperationException("Unsupported type: " + numberFormat.getBasicType$());
|
|
};
|
|
}
|
|
|
|
private static Class<? extends Number> toType(PointType type) {
|
|
return switch (type.getBasicType$()) {
|
|
case PointTypeInt -> Integer.class;
|
|
case PointTypeLong -> Long.class;
|
|
case PointTypeFloat -> Float.class;
|
|
case PointTypeDouble -> Double.class;
|
|
default -> throw new UnsupportedOperationException("Unsupported type: " + type.getBasicType$());
|
|
};
|
|
}
|
|
|
|
private static Term toTerm(it.cavallium.dbengine.client.query.current.data.Term term) {
|
|
return new Term(term.field(), term.value());
|
|
}
|
|
|
|
public static Sort toSort(it.cavallium.dbengine.client.query.current.data.Sort sort) {
|
|
switch (sort.getBasicType$()) {
|
|
case NoSort:
|
|
return null;
|
|
case ScoreSort:
|
|
return new Sort(SortField.FIELD_SCORE);
|
|
case DocSort:
|
|
return new Sort(SortField.FIELD_DOC);
|
|
case NumericSort:
|
|
NumericSort numericSort = (NumericSort) sort;
|
|
return new Sort(new SortedNumericSortField(numericSort.field(), Type.LONG, numericSort.reverse()));
|
|
case RandomSort:
|
|
return new Sort(new RandomSortField());
|
|
default:
|
|
throw new IllegalStateException("Unexpected value: " + sort.getBasicType$());
|
|
}
|
|
}
|
|
|
|
public static it.cavallium.dbengine.client.query.current.data.Term toQueryTerm(Term term) {
|
|
return it.cavallium.dbengine.client.query.current.data.Term.of(term.field(), term.text());
|
|
}
|
|
}
|