diff --git a/pom.xml b/pom.xml
index 6ef1a3e..474bd9f 100644
--- a/pom.xml
+++ b/pom.xml
@@ -206,11 +206,11 @@
lucene-core
${lucene.version}
-
- org.apache.lucene
- lucene-join
- ${lucene.version}
-
+
+ org.apache.lucene
+ lucene-join
+ ${lucene.version}
+
org.apache.lucene
lucene-analysis-common
diff --git a/src/main/data-generator/lucene-query.yaml b/src/main/data-generator/lucene-query.yaml
index 83e4c3c..06054e9 100644
--- a/src/main/data-generator/lucene-query.yaml
+++ b/src/main/data-generator/lucene-query.yaml
@@ -6,7 +6,7 @@ superTypesData:
Query: [
BoxedQuery, TermQuery, IntTermQuery, IntNDTermQuery, LongTermQuery, LongNDTermQuery, FloatTermQuery,
FloatNDTermQuery, DoubleTermQuery, DoubleNDTermQuery,
- PhraseQuery, WildcardQuery, SynonymQuery, FuzzyQuery, MatchAllDocsQuery, MatchNoDocsQuery,
+ PhraseQuery, SolrTextQuery, WildcardQuery, SynonymQuery, FuzzyQuery, MatchAllDocsQuery, MatchNoDocsQuery,
BooleanQuery, SortedNumericDocValuesFieldSlowRangeQuery, SortedDocFieldExistsQuery,
ConstantScoreQuery, BoostQuery, IntPointRangeQuery, IntNDPointRangeQuery, LongPointRangeQuery,
FloatPointRangeQuery, DoublePointRangeQuery, LongNDPointRangeQuery, FloatNDPointRangeQuery,
@@ -136,6 +136,14 @@ baseTypesData:
# counted as characters from the beginning of the phrase.
phrase: TermPosition[]
slop: int
+ # Query that matches a phrase. (Solr)
+ SolrTextQuery:
+ data:
+ # Field name
+ field: String
+ # Text query
+ phrase: String
+ slop: int
# Advanced query that matches text allowing asterisks in the query
WildcardQuery:
data:
diff --git a/src/main/java/it/cavallium/dbengine/client/query/QueryParser.java b/src/main/java/it/cavallium/dbengine/client/query/QueryParser.java
index 36bb21f..2195bb8 100644
--- a/src/main/java/it/cavallium/dbengine/client/query/QueryParser.java
+++ b/src/main/java/it/cavallium/dbengine/client/query/QueryParser.java
@@ -1,5 +1,8 @@
package it.cavallium.dbengine.client.query;
+import com.google.common.xml.XmlEscapers;
+import it.cavallium.dbengine.client.query.current.data.BooleanQuery;
+import it.cavallium.dbengine.client.query.current.data.BooleanQueryBuilder;
import it.cavallium.dbengine.client.query.current.data.BooleanQueryPart;
import it.cavallium.dbengine.client.query.current.data.BoostQuery;
import it.cavallium.dbengine.client.query.current.data.BoxedQuery;
@@ -34,9 +37,12 @@ import it.cavallium.dbengine.client.query.current.data.LongPointRangeQuery;
import it.cavallium.dbengine.client.query.current.data.LongPointSetQuery;
import it.cavallium.dbengine.client.query.current.data.LongTermQuery;
import it.cavallium.dbengine.client.query.current.data.NumericSort;
+import it.cavallium.dbengine.client.query.current.data.OccurMust;
+import it.cavallium.dbengine.client.query.current.data.OccurShould;
import it.cavallium.dbengine.client.query.current.data.PhraseQuery;
import it.cavallium.dbengine.client.query.current.data.PointConfig;
import it.cavallium.dbengine.client.query.current.data.PointType;
+import it.cavallium.dbengine.client.query.current.data.SolrTextQuery;
import it.cavallium.dbengine.client.query.current.data.SortedDocFieldExistsQuery;
import it.cavallium.dbengine.client.query.current.data.SortedNumericDocValuesFieldSlowRangeQuery;
import it.cavallium.dbengine.client.query.current.data.SynonymQuery;
@@ -47,9 +53,13 @@ import it.cavallium.dbengine.client.query.current.data.WildcardQuery;
import it.cavallium.dbengine.lucene.RandomSortField;
import java.text.DecimalFormat;
import java.text.NumberFormat;
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.List;
import java.util.Map;
import java.util.function.Function;
import java.util.stream.Collectors;
+import org.apache.commons.lang3.StringUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;
import org.apache.lucene.document.DoublePoint;
@@ -61,6 +71,7 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.flexible.core.QueryNodeException;
import org.apache.lucene.queryparser.flexible.standard.StandardQueryParser;
import org.apache.lucene.queryparser.flexible.standard.config.PointsConfig;
+import org.apache.lucene.queryparser.xml.builders.UserInputQueryBuilder;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery.Builder;
import org.apache.lucene.search.DocValuesFieldExistsQuery;
@@ -72,9 +83,13 @@ import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.SortField.Type;
import org.apache.lucene.search.SortedNumericSortField;
+import org.jetbrains.annotations.Nullable;
public class QueryParser {
+ private static final String[] QUERY_STRING_FIND = {"\\", "\""};
+ private static final String[] QUERY_STRING_REPLACE = {"\\\\", "\\\""};
+
public static Query toQuery(it.cavallium.dbengine.client.query.current.data.Query query, Analyzer analyzer) {
if (query == null) {
return null;
@@ -350,6 +365,389 @@ public class QueryParser {
}
}
+ public static void toQueryXML(StringBuilder out,
+ it.cavallium.dbengine.client.query.current.data.Query query,
+ @Nullable Float boost) {
+ if (query == null) {
+ return;
+ }
+ switch (query.getBaseType$()) {
+ case StandardQuery -> {
+ var standardQuery = (it.cavallium.dbengine.client.query.current.data.StandardQuery) query;
+
+ out.append(" 1) {
+ throw new UnsupportedOperationException("Maximum supported default fields count: 1");
+ }
+ if (boost != null) {
+ out.append(" boost=\"").append(boost).append("\"");
+ }
+ if (standardQuery.defaultFields().size() == 1) {
+ out
+ .append(" fieldName=\"")
+ .append(XmlEscapers.xmlAttributeEscaper().escape(standardQuery.defaultFields().get(0)))
+ .append("\"");
+ }
+ if (!standardQuery.termFields().isEmpty()) {
+ throw new UnsupportedOperationException("Term fields unsupported");
+ }
+ if (!standardQuery.pointsConfig().isEmpty()) {
+ throw new UnsupportedOperationException("Points config unsupported");
+ }
+ out.append(">");
+ out.append(XmlEscapers.xmlContentEscaper().escape(standardQuery.query()));
+ out.append("\n");
+ }
+ case BooleanQuery -> {
+ var booleanQuery = (it.cavallium.dbengine.client.query.current.data.BooleanQuery) query;
+
+ out.append("\n");
+
+ for (BooleanQueryPart part : booleanQuery.parts()) {
+ out.append(" "filter";
+ case OccurMust -> "must";
+ case OccurShould -> "should";
+ case OccurMustNot -> "mustNot";
+ default -> throw new IllegalStateException("Unexpected value: " + part.occur().getBaseType$());
+ }).append("\"");
+ out.append(">\n");
+ toQueryXML(out, part.query(), null);
+ out.append("\n");
+ }
+ out.append("\n");
+ }
+ case IntPointExactQuery -> {
+ var intPointExactQuery = (IntPointExactQuery) query;
+ out.append("\n");
+ }
+ case IntNDPointExactQuery -> {
+ var intPointExactQuery = (IntPointExactQuery) query;
+ throw new UnsupportedOperationException("N-dimensional point queries are not supported");
+ }
+ case LongPointExactQuery -> {
+ var longPointExactQuery = (LongPointExactQuery) query;
+ out.append("\n");
+ }
+ case FloatPointExactQuery -> {
+ var floatPointExactQuery = (FloatPointExactQuery) query;
+ out.append("\n");
+ }
+ case DoublePointExactQuery -> {
+ var doublePointExactQuery = (DoublePointExactQuery) query;
+ out.append("\n");
+ }
+ case LongNDPointExactQuery -> {
+ var longndPointExactQuery = (LongNDPointExactQuery) query;
+ throw new UnsupportedOperationException("N-dimensional point queries are not supported");
+ }
+ case FloatNDPointExactQuery -> {
+ var floatndPointExactQuery = (FloatNDPointExactQuery) query;
+ throw new UnsupportedOperationException("N-dimensional point queries are not supported");
+ }
+ case DoubleNDPointExactQuery -> {
+ var doublendPointExactQuery = (DoubleNDPointExactQuery) query;
+ throw new UnsupportedOperationException("N-dimensional point queries are not supported");
+ }
+ case IntPointSetQuery -> {
+ var intPointSetQuery = (IntPointSetQuery) query;
+ // Polyfill
+ toQueryXML(out, BooleanQuery.of(intPointSetQuery.values().intStream()
+ .mapToObj(val -> IntPointExactQuery.of(intPointSetQuery.field(), val))
+ .map(q -> BooleanQueryPart.of(q, OccurShould.of()))
+ .toList(), 1), boost);
+ }
+ case LongPointSetQuery -> {
+ var longPointSetQuery = (LongPointSetQuery) query;
+ // Polyfill
+ toQueryXML(out, BooleanQuery.of(longPointSetQuery.values().longStream()
+ .mapToObj(val -> LongPointExactQuery.of(longPointSetQuery.field(), val))
+ .map(q -> BooleanQueryPart.of(q, OccurShould.of()))
+ .toList(), 1), boost);
+ }
+ case FloatPointSetQuery -> {
+ var floatPointSetQuery = (FloatPointSetQuery) query;
+ // Polyfill
+ toQueryXML(out, BooleanQuery.of(floatPointSetQuery.values().stream()
+ .map(val -> FloatPointExactQuery.of(floatPointSetQuery.field(), val))
+ .map(q -> BooleanQueryPart.of(q, OccurShould.of()))
+ .toList(), 1), boost);
+ }
+ case DoublePointSetQuery -> {
+ var doublePointSetQuery = (DoublePointSetQuery) query;
+ // Polyfill
+ toQueryXML(out, BooleanQuery.of(doublePointSetQuery.values().doubleStream()
+ .mapToObj(val -> DoublePointExactQuery.of(doublePointSetQuery.field(), val))
+ .map(q -> BooleanQueryPart.of(q, OccurShould.of()))
+ .toList(), 1), boost);
+ }
+ case TermQuery -> {
+ var termQuery = (TermQuery) query;
+ out
+ .append("");
+ out.append(XmlEscapers.xmlContentEscaper().escape(termQuery.term().value()));
+ out.append("\n");
+ }
+ case IntTermQuery -> {
+ var intTermQuery = (IntTermQuery) query;
+ throw new UnsupportedOperationException("Non-string term fields are not supported");
+ }
+ case IntNDTermQuery -> {
+ var intNDTermQuery = (IntNDTermQuery) query;
+ throw new UnsupportedOperationException("Non-string term fields are not supported");
+ }
+ case LongTermQuery -> {
+ var longTermQuery = (LongTermQuery) query;
+ throw new UnsupportedOperationException("Non-string term fields are not supported");
+ }
+ case LongNDTermQuery -> {
+ var longNDTermQuery = (LongNDTermQuery) query;
+ throw new UnsupportedOperationException("Non-string term fields are not supported");
+ }
+ case FloatTermQuery -> {
+ var floatTermQuery = (FloatTermQuery) query;
+ throw new UnsupportedOperationException("Non-string term fields are not supported");
+ }
+ case FloatNDTermQuery -> {
+ var floatNDTermQuery = (FloatNDTermQuery) query;
+ throw new UnsupportedOperationException("Non-string term fields are not supported");
+ }
+ case DoubleTermQuery -> {
+ var doubleTermQuery = (DoubleTermQuery) query;
+ throw new UnsupportedOperationException("Non-string term fields are not supported");
+ }
+ case DoubleNDTermQuery -> {
+ var doubleNDTermQuery = (DoubleNDTermQuery) query;
+ throw new UnsupportedOperationException("Non-string term fields are not supported");
+ }
+ case FieldExistsQuery -> {
+ var fieldExistQuery = (FieldExistsQuery) query;
+ out.append("");
+ ensureValidField(fieldExistQuery.field());
+ out.append(fieldExistQuery.field());
+ out.append(":[* TO *]");
+ out.append("\n");
+ }
+ case SolrTextQuery -> {
+ var solrTextQuery = (SolrTextQuery) query;
+ out.append("");
+ ensureValidField(solrTextQuery.field());
+ out.append(solrTextQuery.field());
+ out.append(":");
+ out.append("\"").append(escapeQueryStringValue(solrTextQuery.phrase())).append("\"");
+ if (solrTextQuery.slop() > 0) {
+ out.append("~").append(solrTextQuery.slop());
+ }
+ out.append("\n");
+ }
+ case BoostQuery -> {
+ var boostQuery = (BoostQuery) query;
+ toQueryXML(out, boostQuery.query(), boostQuery.scoreBoost());
+ }
+ case ConstantScoreQuery -> {
+ var constantScoreQuery = (ConstantScoreQuery) query;
+ out.append("\n");
+ toQueryXML(out, query, null);
+ out.append("\n");
+ }
+ case BoxedQuery -> {
+ toQueryXML(out, ((BoxedQuery) query).query(), boost);
+ }
+ case FuzzyQuery -> {
+ var fuzzyQuery = (it.cavallium.dbengine.client.query.current.data.FuzzyQuery) query;
+ new FuzzyQuery(toTerm(fuzzyQuery.term()),
+ fuzzyQuery.maxEdits(),
+ fuzzyQuery.prefixLength(),
+ fuzzyQuery.maxExpansions(),
+ fuzzyQuery.transpositions()
+ );
+ throw new UnsupportedOperationException("Fuzzy query is not supported, use span queries");
+ }
+ case IntPointRangeQuery -> {
+ var intPointRangeQuery = (IntPointRangeQuery) query;
+ out.append("\n");
+ }
+ case IntNDPointRangeQuery -> {
+ var intndPointRangeQuery = (IntNDPointRangeQuery) query;
+ throw new UnsupportedOperationException("N-dimensional point queries are not supported");
+ }
+ case LongPointRangeQuery -> {
+ var longPointRangeQuery = (LongPointRangeQuery) query;
+ out.append("\n");
+ }
+ case FloatPointRangeQuery -> {
+ var floatPointRangeQuery = (FloatPointRangeQuery) query;
+ out.append("\n");
+ }
+ case DoublePointRangeQuery -> {
+ var doublePointRangeQuery = (DoublePointRangeQuery) query;
+ out.append("\n");
+ }
+ case LongNDPointRangeQuery -> {
+ var longndPointRangeQuery = (LongNDPointRangeQuery) query;
+ throw new UnsupportedOperationException("N-dimensional point queries are not supported");
+ }
+ case FloatNDPointRangeQuery -> {
+ var floatndPointRangeQuery = (FloatNDPointRangeQuery) query;
+ throw new UnsupportedOperationException("N-dimensional point queries are not supported");
+ }
+ case DoubleNDPointRangeQuery -> {
+ var doublendPointRangeQuery = (DoubleNDPointRangeQuery) query;
+ throw new UnsupportedOperationException("N-dimensional point queries are not supported");
+ }
+ case MatchAllDocsQuery -> {
+ out.append("");
+ out.append("*:*");
+ out.append("\n");
+ }
+ case MatchNoDocsQuery -> {
+ out.append("");
+ //todo: check if it's correct
+ out.append("!*:*");
+ out.append("\n");
+ }
+ case PhraseQuery -> {
+ //todo: check if it's correct
+
+ var phraseQuery = (PhraseQuery) query;
+ out.append("\n");
+ phraseQuery.phrase().stream().sorted(Comparator.comparingInt(TermPosition::position)).forEach(term -> {
+ out
+ .append("")
+ .append(XmlEscapers.xmlContentEscaper().escape(term.term().value()))
+ .append("\n");
+ });
+ out.append("\n");
+ }
+ case SortedDocFieldExistsQuery -> {
+ var sortedDocFieldExistsQuery = (SortedDocFieldExistsQuery) query;
+ throw new UnsupportedOperationException("Field existence query is not supported");
+ }
+ case SynonymQuery -> {
+ var synonymQuery = (SynonymQuery) query;
+ throw new UnsupportedOperationException("Synonym query is not supported");
+ }
+ case SortedNumericDocValuesFieldSlowRangeQuery -> {
+ throw new UnsupportedOperationException("Slow range query is not supported");
+ }
+ case WildcardQuery -> {
+ var wildcardQuery = (WildcardQuery) query;
+ throw new UnsupportedOperationException("Wildcard query is not supported");
+ }
+ default -> throw new IllegalStateException("Unexpected value: " + query.getBaseType$());
+ }
+ }
+
+ private static String escapeQueryStringValue(String text) {
+ return StringUtils.replaceEach(text, QUERY_STRING_FIND, QUERY_STRING_REPLACE);
+ }
+
+ private static void ensureValidField(String field) {
+ field.codePoints().forEach(codePoint -> {
+ if (!Character.isLetterOrDigit(codePoint) && codePoint != '_') {
+ throw new UnsupportedOperationException(
+ "Invalid character \"" + codePoint + "\" in field name \"" + field + "\"");
+ }
+ });
+ }
+
private static NumberFormat toNumberFormat(it.cavallium.dbengine.client.query.current.data.NumberFormat numberFormat) {
return switch (numberFormat.getBaseType$()) {
case NumberFormatDecimal -> new DecimalFormat();
diff --git a/src/main/java/it/cavallium/dbengine/client/query/QueryUtils.java b/src/main/java/it/cavallium/dbengine/client/query/QueryUtils.java
index 28294e3..757d612 100644
--- a/src/main/java/it/cavallium/dbengine/client/query/QueryUtils.java
+++ b/src/main/java/it/cavallium/dbengine/client/query/QueryUtils.java
@@ -38,12 +38,19 @@ public class QueryUtils {
return transformQuery(field, luceneQuery);
}
+ /**
+ * Deprecated: use solr SolrTextQuery
+ */
+ @Deprecated
public static Query phraseSearch(TextFieldsAnalyzer preferredAnalyzer, String field, String text, int slop) {
var qb = new QueryBuilder(LuceneUtils.getAnalyzer(preferredAnalyzer));
var luceneQuery = qb.createPhraseQuery(field, text, slop);
return transformQuery(field, luceneQuery);
}
+ /**
+ * Deprecated: use solr SolrTextQuery
+ */
public static Query exactSearch(TextFieldsAnalyzer preferredAnalyzer, String field, String text) {
var qb = new QueryBuilder(LuceneUtils.getAnalyzer(preferredAnalyzer));
var luceneQuery = qb.createPhraseQuery(field, text);