Fix query parser
This commit is contained in:
parent
e66bc6ce53
commit
dec229ac78
@ -1,6 +1,8 @@
|
|||||||
package it.cavallium.dbengine.client.query;
|
package it.cavallium.dbengine.client.query;
|
||||||
|
|
||||||
import com.google.common.xml.XmlEscapers;
|
import com.google.common.xml.XmlEscapers;
|
||||||
|
import com.ibm.icu.text.BreakIterator;
|
||||||
|
import com.ibm.icu.util.ULocale;
|
||||||
import it.cavallium.dbengine.client.query.current.data.BooleanQuery;
|
import it.cavallium.dbengine.client.query.current.data.BooleanQuery;
|
||||||
import it.cavallium.dbengine.client.query.current.data.BooleanQueryBuilder;
|
import it.cavallium.dbengine.client.query.current.data.BooleanQueryBuilder;
|
||||||
import it.cavallium.dbengine.client.query.current.data.BooleanQueryPart;
|
import it.cavallium.dbengine.client.query.current.data.BooleanQueryPart;
|
||||||
@ -38,6 +40,7 @@ import it.cavallium.dbengine.client.query.current.data.LongPointSetQuery;
|
|||||||
import it.cavallium.dbengine.client.query.current.data.LongTermQuery;
|
import it.cavallium.dbengine.client.query.current.data.LongTermQuery;
|
||||||
import it.cavallium.dbengine.client.query.current.data.NumericSort;
|
import it.cavallium.dbengine.client.query.current.data.NumericSort;
|
||||||
import it.cavallium.dbengine.client.query.current.data.OccurMust;
|
import it.cavallium.dbengine.client.query.current.data.OccurMust;
|
||||||
|
import it.cavallium.dbengine.client.query.current.data.OccurMustNot;
|
||||||
import it.cavallium.dbengine.client.query.current.data.OccurShould;
|
import it.cavallium.dbengine.client.query.current.data.OccurShould;
|
||||||
import it.cavallium.dbengine.client.query.current.data.PhraseQuery;
|
import it.cavallium.dbengine.client.query.current.data.PhraseQuery;
|
||||||
import it.cavallium.dbengine.client.query.current.data.PointConfig;
|
import it.cavallium.dbengine.client.query.current.data.PointConfig;
|
||||||
@ -51,17 +54,23 @@ import it.cavallium.dbengine.client.query.current.data.TermPosition;
|
|||||||
import it.cavallium.dbengine.client.query.current.data.TermQuery;
|
import it.cavallium.dbengine.client.query.current.data.TermQuery;
|
||||||
import it.cavallium.dbengine.client.query.current.data.WildcardQuery;
|
import it.cavallium.dbengine.client.query.current.data.WildcardQuery;
|
||||||
import it.cavallium.dbengine.lucene.RandomSortField;
|
import it.cavallium.dbengine.lucene.RandomSortField;
|
||||||
|
import java.io.ByteArrayInputStream;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.text.DecimalFormat;
|
import java.text.DecimalFormat;
|
||||||
import java.text.NumberFormat;
|
import java.text.NumberFormat;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Locale;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.function.Function;
|
import java.util.function.Function;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
import org.apache.lucene.analysis.icu.segmentation.DefaultICUTokenizerConfig;
|
||||||
|
import org.apache.lucene.analysis.icu.segmentation.ICUTokenizer;
|
||||||
import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;
|
import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;
|
||||||
|
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||||
import org.apache.lucene.document.DoublePoint;
|
import org.apache.lucene.document.DoublePoint;
|
||||||
import org.apache.lucene.document.FloatPoint;
|
import org.apache.lucene.document.FloatPoint;
|
||||||
import org.apache.lucene.document.IntPoint;
|
import org.apache.lucene.document.IntPoint;
|
||||||
@ -71,6 +80,8 @@ import org.apache.lucene.index.Term;
|
|||||||
import org.apache.lucene.queryparser.flexible.core.QueryNodeException;
|
import org.apache.lucene.queryparser.flexible.core.QueryNodeException;
|
||||||
import org.apache.lucene.queryparser.flexible.standard.StandardQueryParser;
|
import org.apache.lucene.queryparser.flexible.standard.StandardQueryParser;
|
||||||
import org.apache.lucene.queryparser.flexible.standard.config.PointsConfig;
|
import org.apache.lucene.queryparser.flexible.standard.config.PointsConfig;
|
||||||
|
import org.apache.lucene.queryparser.xml.CoreParser;
|
||||||
|
import org.apache.lucene.queryparser.xml.ParserException;
|
||||||
import org.apache.lucene.queryparser.xml.builders.UserInputQueryBuilder;
|
import org.apache.lucene.queryparser.xml.builders.UserInputQueryBuilder;
|
||||||
import org.apache.lucene.search.BooleanClause.Occur;
|
import org.apache.lucene.search.BooleanClause.Occur;
|
||||||
import org.apache.lucene.search.BooleanQuery.Builder;
|
import org.apache.lucene.search.BooleanQuery.Builder;
|
||||||
@ -400,28 +411,32 @@ public class QueryParser {
|
|||||||
}
|
}
|
||||||
case BooleanQuery -> {
|
case BooleanQuery -> {
|
||||||
var booleanQuery = (it.cavallium.dbengine.client.query.current.data.BooleanQuery) query;
|
var booleanQuery = (it.cavallium.dbengine.client.query.current.data.BooleanQuery) query;
|
||||||
|
if (booleanQuery.parts().size() == 1
|
||||||
out.append("<BooleanQuery");
|
&& booleanQuery.parts().get(0).occur().getBaseType$() == BaseType.OccurMust) {
|
||||||
if (boost != null) {
|
toQueryXML(out, booleanQuery.parts().get(0).query(), boost);
|
||||||
out.append(" boost=\"").append(boost).append("\"");
|
} else {
|
||||||
}
|
out.append("<BooleanQuery");
|
||||||
out.append(" minimumNumberShouldMatch=\"").append(booleanQuery.minShouldMatch()).append("\"");
|
if (boost != null) {
|
||||||
out.append(">\n");
|
out.append(" boost=\"").append(boost).append("\"");
|
||||||
|
}
|
||||||
for (BooleanQueryPart part : booleanQuery.parts()) {
|
out.append(" minimumNumberShouldMatch=\"").append(booleanQuery.minShouldMatch()).append("\"");
|
||||||
out.append("<Clause");
|
|
||||||
out.append(" occurs=\"").append(switch (part.occur().getBaseType$()) {
|
|
||||||
case OccurFilter -> "filter";
|
|
||||||
case OccurMust -> "must";
|
|
||||||
case OccurShould -> "should";
|
|
||||||
case OccurMustNot -> "mustNot";
|
|
||||||
default -> throw new IllegalStateException("Unexpected value: " + part.occur().getBaseType$());
|
|
||||||
}).append("\"");
|
|
||||||
out.append(">\n");
|
out.append(">\n");
|
||||||
toQueryXML(out, part.query(), null);
|
|
||||||
out.append("</Clause>\n");
|
for (BooleanQueryPart part : booleanQuery.parts()) {
|
||||||
|
out.append("<Clause");
|
||||||
|
out.append(" occurs=\"").append(switch (part.occur().getBaseType$()) {
|
||||||
|
case OccurFilter -> "filter";
|
||||||
|
case OccurMust -> "must";
|
||||||
|
case OccurShould -> "should";
|
||||||
|
case OccurMustNot -> "mustNot";
|
||||||
|
default -> throw new IllegalStateException("Unexpected value: " + part.occur().getBaseType$());
|
||||||
|
}).append("\"");
|
||||||
|
out.append(">\n");
|
||||||
|
toQueryXML(out, part.query(), null);
|
||||||
|
out.append("</Clause>\n");
|
||||||
|
}
|
||||||
|
out.append("</BooleanQuery>\n");
|
||||||
}
|
}
|
||||||
out.append("</BooleanQuery>\n");
|
|
||||||
}
|
}
|
||||||
case IntPointExactQuery -> {
|
case IntPointExactQuery -> {
|
||||||
var intPointExactQuery = (IntPointExactQuery) query;
|
var intPointExactQuery = (IntPointExactQuery) query;
|
||||||
@ -585,7 +600,7 @@ public class QueryParser {
|
|||||||
out.append(solrTextQuery.field());
|
out.append(solrTextQuery.field());
|
||||||
out.append(":");
|
out.append(":");
|
||||||
out.append("\"").append(escapeQueryStringValue(solrTextQuery.phrase())).append("\"");
|
out.append("\"").append(escapeQueryStringValue(solrTextQuery.phrase())).append("\"");
|
||||||
if (solrTextQuery.slop() > 0) {
|
if (solrTextQuery.slop() > 0 && hasMoreThanOneWord(solrTextQuery.phrase())) {
|
||||||
out.append("~").append(solrTextQuery.slop());
|
out.append("~").append(solrTextQuery.slop());
|
||||||
}
|
}
|
||||||
out.append("</UserQuery>\n");
|
out.append("</UserQuery>\n");
|
||||||
@ -735,6 +750,24 @@ public class QueryParser {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static boolean hasMoreThanOneWord(String sentence) {
|
||||||
|
BreakIterator iterator = BreakIterator.getWordInstance(ULocale.ENGLISH);
|
||||||
|
iterator.setText(sentence);
|
||||||
|
|
||||||
|
boolean firstWord = false;
|
||||||
|
iterator.first();
|
||||||
|
int end = iterator.next();
|
||||||
|
while (end != BreakIterator.DONE) {
|
||||||
|
if (!firstWord) {
|
||||||
|
firstWord = true;
|
||||||
|
} else {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
end = iterator.next();
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
private static String escapeQueryStringValue(String text) {
|
private static String escapeQueryStringValue(String text) {
|
||||||
return StringUtils.replaceEach(text, QUERY_STRING_FIND, QUERY_STRING_REPLACE);
|
return StringUtils.replaceEach(text, QUERY_STRING_FIND, QUERY_STRING_REPLACE);
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user