CavalliumDBEngine/src/main/java/it/cavallium/dbengine/client/query/QueryUtils.java

128 lines
5.5 KiB
Java
Raw Normal View History

2021-03-02 01:53:36 +01:00
package it.cavallium.dbengine.client.query;
2021-02-03 13:48:30 +01:00
2021-03-02 01:53:36 +01:00
import it.cavallium.dbengine.client.query.current.data.BooleanQuery;
import it.cavallium.dbengine.client.query.current.data.BooleanQueryPart;
import it.cavallium.dbengine.client.query.current.data.Occur;
import it.cavallium.dbengine.client.query.current.data.OccurFilter;
import it.cavallium.dbengine.client.query.current.data.OccurMust;
import it.cavallium.dbengine.client.query.current.data.OccurMustNot;
import it.cavallium.dbengine.client.query.current.data.OccurShould;
import it.cavallium.dbengine.client.query.current.data.PhraseQuery;
import it.cavallium.dbengine.client.query.current.data.Query;
import it.cavallium.dbengine.client.query.current.data.SynonymQuery;
import it.cavallium.dbengine.client.query.current.data.TermAndBoost;
import it.cavallium.dbengine.client.query.current.data.TermPosition;
import it.cavallium.dbengine.client.query.current.data.TermQuery;
import it.cavallium.dbengine.lucene.LuceneUtils;
import it.cavallium.dbengine.lucene.analyzer.TextFieldsAnalyzer;
2021-02-03 13:48:30 +01:00
import java.io.IOException;
import java.io.StringReader;
2021-02-12 21:55:10 +01:00
import java.util.ArrayList;
2021-02-03 13:48:30 +01:00
import java.util.LinkedList;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.index.Term;
2021-02-12 21:55:10 +01:00
import org.apache.lucene.search.BooleanClause;
2021-02-12 19:39:02 +01:00
import org.apache.lucene.util.QueryBuilder;
2021-02-12 21:55:10 +01:00
import org.jetbrains.annotations.NotNull;
2021-02-03 13:48:30 +01:00
2021-03-02 01:53:36 +01:00
public class QueryUtils {
2021-02-03 13:48:30 +01:00
2021-03-02 01:53:36 +01:00
public static Query approximateSearch(TextFieldsAnalyzer preferredAnalyzer, String field, String text) {
var qb = new QueryBuilder(LuceneUtils.getAnalyzer(preferredAnalyzer));
var luceneQuery = qb.createMinShouldMatchQuery(field, text, 0.75f);
return transformQuery(field, luceneQuery);
2021-02-03 13:48:30 +01:00
}
2021-03-02 01:53:36 +01:00
public static Query exactSearch(TextFieldsAnalyzer preferredAnalyzer, String field, String text) {
var qb = new QueryBuilder(LuceneUtils.getAnalyzer(preferredAnalyzer));
var luceneQuery = qb.createPhraseQuery(field, text);
return transformQuery(field, luceneQuery);
2021-02-03 13:48:30 +01:00
}
2021-02-12 21:55:10 +01:00
@NotNull
private static Query transformQuery(String field, org.apache.lucene.search.Query luceneQuery) {
if (luceneQuery == null) {
2021-03-02 01:53:36 +01:00
return TermQuery.of(it.cavallium.dbengine.client.query.current.data.Term.of(field, ""));
2021-02-12 21:55:10 +01:00
}
if (luceneQuery instanceof org.apache.lucene.search.TermQuery) {
2021-03-02 01:53:36 +01:00
return TermQuery.of(QueryParser.toQueryTerm(((org.apache.lucene.search.TermQuery) luceneQuery).getTerm()));
2021-02-12 21:55:10 +01:00
}
if (luceneQuery instanceof org.apache.lucene.search.BooleanQuery) {
var booleanQuery = (org.apache.lucene.search.BooleanQuery) luceneQuery;
var queryParts = new ArrayList<BooleanQueryPart>();
for (BooleanClause booleanClause : booleanQuery) {
org.apache.lucene.search.Query queryPartQuery = booleanClause.getQuery();
Occur occur;
switch (booleanClause.getOccur()) {
case MUST:
2021-03-02 01:53:36 +01:00
occur = OccurMust.of();
2021-02-12 21:55:10 +01:00
break;
case FILTER:
2021-03-02 01:53:36 +01:00
occur = OccurFilter.of();
2021-02-12 21:55:10 +01:00
break;
case SHOULD:
2021-03-02 01:53:36 +01:00
occur = OccurShould.of();
2021-02-12 21:55:10 +01:00
break;
case MUST_NOT:
2021-03-02 01:53:36 +01:00
occur = OccurMustNot.of();
2021-02-12 21:55:10 +01:00
break;
default:
throw new IllegalArgumentException();
}
2021-03-02 01:53:36 +01:00
queryParts.add(BooleanQueryPart.of(transformQuery(field, queryPartQuery), occur));
2021-02-12 21:55:10 +01:00
}
2021-03-02 01:53:36 +01:00
return BooleanQuery.of(queryParts.toArray(BooleanQueryPart[]::new), booleanQuery.getMinimumNumberShouldMatch());
2021-02-12 21:55:10 +01:00
}
2021-02-16 23:15:56 +01:00
if (luceneQuery instanceof org.apache.lucene.search.PhraseQuery) {
var phraseQuery = (org.apache.lucene.search.PhraseQuery) luceneQuery;
int slop = phraseQuery.getSlop();
var terms = phraseQuery.getTerms();
var positions = phraseQuery.getPositions();
TermPosition[] termPositions = new TermPosition[terms.length];
for (int i = 0; i < terms.length; i++) {
var term = terms[i];
var position = positions[i];
2021-03-02 01:53:36 +01:00
termPositions[i] = TermPosition.of(QueryParser.toQueryTerm(term), position);
2021-02-16 23:15:56 +01:00
}
2021-03-02 01:53:36 +01:00
return PhraseQuery.of(termPositions, slop);
2021-02-16 23:15:56 +01:00
}
2021-02-12 21:55:10 +01:00
org.apache.lucene.search.SynonymQuery synonymQuery = (org.apache.lucene.search.SynonymQuery) luceneQuery;
2021-03-02 01:53:36 +01:00
return SynonymQuery.of(field,
synonymQuery
.getTerms()
.stream()
.map(term -> TermAndBoost.of(QueryParser.toQueryTerm(term), 1))
.toArray(TermAndBoost[]::new)
2021-02-12 21:55:10 +01:00
);
}
2021-02-12 19:39:02 +01:00
private static List<TermPosition> getTerms(TextFieldsAnalyzer preferredAnalyzer, String field, String text) throws IOException {
Analyzer analyzer = LuceneUtils.getAnalyzer(preferredAnalyzer);
2021-02-03 13:48:30 +01:00
TokenStream ts = analyzer.tokenStream(field, new StringReader(text));
return getTerms(ts, field);
}
private static List<TermPosition> getTerms(TokenStream ts, String field) throws IOException {
TermToBytesRefAttribute charTermAttr = ts.addAttribute(TermToBytesRefAttribute.class);
PositionIncrementAttribute positionIncrementTermAttr = ts.addAttribute(PositionIncrementAttribute.class);
List<TermPosition> terms = new LinkedList<>();
try (ts) {
ts.reset(); // Resets this stream to the beginning. (Required)
int termPosition = -1;
while (ts.incrementToken()) {
var tokenPositionIncrement = positionIncrementTermAttr.getPositionIncrement();
termPosition += tokenPositionIncrement;
2021-03-02 01:53:36 +01:00
terms.add(TermPosition.of(QueryParser.toQueryTerm(new Term(field, charTermAttr.getBytesRef())), termPosition));
2021-02-03 13:48:30 +01:00
}
ts.end(); // Perform end-of-stream operations, e.g. set the final offset.
}
// Release resources associated with this stream.
return terms;
}
2020-12-07 22:15:18 +01:00
}