2021-01-30 22:14:48 +01:00
|
|
|
package it.cavallium.dbengine.lucene.serializer;
|
2020-12-07 22:15:18 +01:00
|
|
|
|
2021-02-03 13:48:30 +01:00
|
|
|
import static it.cavallium.dbengine.lucene.serializer.QueryParser.USE_PHRASE_QUERY;
|
2021-02-12 19:39:02 +01:00
|
|
|
import static it.cavallium.dbengine.lucene.serializer.QueryParser.USE_QUERY_BUILDER;
|
2021-02-03 13:48:30 +01:00
|
|
|
|
2021-02-04 22:42:57 +01:00
|
|
|
import it.cavallium.dbengine.lucene.LuceneUtils;
|
|
|
|
import it.cavallium.dbengine.lucene.analyzer.TextFieldsAnalyzer;
|
2021-02-03 13:48:30 +01:00
|
|
|
import java.io.IOException;
|
|
|
|
import java.io.StringReader;
|
2021-02-12 21:55:10 +01:00
|
|
|
import java.util.ArrayList;
|
2021-02-03 13:48:30 +01:00
|
|
|
import java.util.LinkedList;
|
|
|
|
import java.util.List;
|
|
|
|
import org.apache.lucene.analysis.Analyzer;
|
|
|
|
import org.apache.lucene.analysis.TokenStream;
|
|
|
|
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
|
|
|
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
|
|
|
|
import org.apache.lucene.index.Term;
|
2021-02-12 21:55:10 +01:00
|
|
|
import org.apache.lucene.search.BooleanClause;
|
2021-02-12 19:39:02 +01:00
|
|
|
import org.apache.lucene.util.QueryBuilder;
|
2021-02-12 21:55:10 +01:00
|
|
|
import org.jetbrains.annotations.NotNull;
|
2021-02-03 13:48:30 +01:00
|
|
|
|
2020-12-07 22:15:18 +01:00
|
|
|
public interface Query extends SerializedQueryObject {
|
|
|
|
|
2021-02-12 19:39:02 +01:00
|
|
|
static Query approximateSearch(TextFieldsAnalyzer preferredAnalyzer, String field, String text) {
|
|
|
|
if (USE_QUERY_BUILDER) {
|
|
|
|
var qb = new QueryBuilder(LuceneUtils.getAnalyzer(preferredAnalyzer));
|
|
|
|
var luceneQuery = qb.createMinShouldMatchQuery(field, text, 0.75f);
|
2021-02-12 21:55:10 +01:00
|
|
|
return transformQuery(field, luceneQuery);
|
2021-02-12 19:39:02 +01:00
|
|
|
}
|
|
|
|
|
2021-02-03 13:48:30 +01:00
|
|
|
try {
|
2021-02-12 19:39:02 +01:00
|
|
|
var terms = getTerms(preferredAnalyzer, field, text);
|
2021-02-03 13:48:30 +01:00
|
|
|
|
|
|
|
List<BooleanQueryPart> booleanQueryParts = new LinkedList<>();
|
|
|
|
for (TermPosition term : terms) {
|
|
|
|
booleanQueryParts.add(new BooleanQueryPart(new TermQuery(term.getTerm()), Occur.MUST));
|
|
|
|
booleanQueryParts.add(new BooleanQueryPart(new PhraseQuery(terms.toArray(TermPosition[]::new)), Occur.SHOULD));
|
|
|
|
}
|
|
|
|
return new BooleanQuery(booleanQueryParts);
|
|
|
|
} catch (IOException e) {
|
|
|
|
e.printStackTrace();
|
2021-02-12 19:39:02 +01:00
|
|
|
return exactSearch(preferredAnalyzer, field, text);
|
2021-02-03 13:48:30 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-02-12 19:39:02 +01:00
|
|
|
static Query exactSearch(TextFieldsAnalyzer preferredAnalyzer, String field, String text) {
|
|
|
|
if (USE_QUERY_BUILDER) {
|
|
|
|
var qb = new QueryBuilder(LuceneUtils.getAnalyzer(preferredAnalyzer));
|
|
|
|
var luceneQuery = qb.createPhraseQuery(field, text);
|
2021-02-12 21:55:10 +01:00
|
|
|
return transformQuery(field, luceneQuery);
|
2021-02-12 19:39:02 +01:00
|
|
|
}
|
|
|
|
|
2021-02-03 13:48:30 +01:00
|
|
|
try {
|
2021-02-12 19:39:02 +01:00
|
|
|
var terms = getTerms(preferredAnalyzer, field, text);
|
2021-02-03 13:48:30 +01:00
|
|
|
|
|
|
|
if (USE_PHRASE_QUERY) {
|
|
|
|
return new PhraseQuery(terms.toArray(TermPosition[]::new));
|
|
|
|
} else {
|
|
|
|
List<BooleanQueryPart> booleanQueryParts = new LinkedList<>();
|
|
|
|
for (TermPosition term : terms) {
|
|
|
|
booleanQueryParts.add(new BooleanQueryPart(new TermQuery(term.getTerm()), Occur.MUST));
|
|
|
|
}
|
|
|
|
booleanQueryParts.add(new BooleanQueryPart(new PhraseQuery(terms.toArray(TermPosition[]::new)), Occur.FILTER));
|
|
|
|
return new BooleanQuery(booleanQueryParts);
|
|
|
|
}
|
|
|
|
} catch (IOException exception) {
|
|
|
|
throw new RuntimeException(exception);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-02-12 21:55:10 +01:00
|
|
|
@NotNull
|
|
|
|
private static Query transformQuery(String field, org.apache.lucene.search.Query luceneQuery) {
|
|
|
|
if (luceneQuery == null) {
|
|
|
|
return new TermQuery(field, "");
|
|
|
|
}
|
|
|
|
if (luceneQuery instanceof org.apache.lucene.search.TermQuery) {
|
|
|
|
return new TermQuery(((org.apache.lucene.search.TermQuery) luceneQuery).getTerm());
|
|
|
|
}
|
|
|
|
if (luceneQuery instanceof org.apache.lucene.search.BooleanQuery) {
|
|
|
|
var booleanQuery = (org.apache.lucene.search.BooleanQuery) luceneQuery;
|
|
|
|
var queryParts = new ArrayList<BooleanQueryPart>();
|
|
|
|
for (BooleanClause booleanClause : booleanQuery) {
|
|
|
|
org.apache.lucene.search.Query queryPartQuery = booleanClause.getQuery();
|
|
|
|
|
|
|
|
Occur occur;
|
|
|
|
switch (booleanClause.getOccur()) {
|
|
|
|
case MUST:
|
|
|
|
occur = Occur.MUST;
|
|
|
|
break;
|
|
|
|
case FILTER:
|
|
|
|
occur = Occur.FILTER;
|
|
|
|
break;
|
|
|
|
case SHOULD:
|
|
|
|
occur = Occur.SHOULD;
|
|
|
|
break;
|
|
|
|
case MUST_NOT:
|
|
|
|
occur = Occur.MUST_NOT;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
throw new IllegalArgumentException();
|
|
|
|
}
|
|
|
|
queryParts.add(new BooleanQueryPart(transformQuery(field, queryPartQuery), occur));
|
|
|
|
}
|
|
|
|
return new BooleanQuery(queryParts).setMinShouldMatch(booleanQuery.getMinimumNumberShouldMatch());
|
|
|
|
}
|
2021-02-16 23:15:56 +01:00
|
|
|
if (luceneQuery instanceof org.apache.lucene.search.PhraseQuery) {
|
|
|
|
var phraseQuery = (org.apache.lucene.search.PhraseQuery) luceneQuery;
|
|
|
|
int slop = phraseQuery.getSlop();
|
|
|
|
var terms = phraseQuery.getTerms();
|
|
|
|
var positions = phraseQuery.getPositions();
|
|
|
|
TermPosition[] termPositions = new TermPosition[terms.length];
|
|
|
|
for (int i = 0; i < terms.length; i++) {
|
|
|
|
var term = terms[i];
|
|
|
|
var position = positions[i];
|
|
|
|
termPositions[i] = new TermPosition(term, position);
|
|
|
|
}
|
|
|
|
return new PhraseQuery(termPositions).setSlop(slop);
|
|
|
|
}
|
2021-02-12 21:55:10 +01:00
|
|
|
org.apache.lucene.search.SynonymQuery synonymQuery = (org.apache.lucene.search.SynonymQuery) luceneQuery;
|
|
|
|
return new SynonymQuery(field,
|
|
|
|
synonymQuery.getTerms().stream().map(TermQuery::new).toArray(TermQuery[]::new)
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
2021-02-12 19:39:02 +01:00
|
|
|
private static List<TermPosition> getTerms(TextFieldsAnalyzer preferredAnalyzer, String field, String text) throws IOException {
|
|
|
|
Analyzer analyzer = LuceneUtils.getAnalyzer(preferredAnalyzer);
|
2021-02-03 13:48:30 +01:00
|
|
|
TokenStream ts = analyzer.tokenStream(field, new StringReader(text));
|
|
|
|
return getTerms(ts, field);
|
|
|
|
}
|
|
|
|
|
|
|
|
private static List<TermPosition> getTerms(TokenStream ts, String field) throws IOException {
|
|
|
|
TermToBytesRefAttribute charTermAttr = ts.addAttribute(TermToBytesRefAttribute.class);
|
|
|
|
PositionIncrementAttribute positionIncrementTermAttr = ts.addAttribute(PositionIncrementAttribute.class);
|
|
|
|
List<TermPosition> terms = new LinkedList<>();
|
|
|
|
try (ts) {
|
|
|
|
ts.reset(); // Resets this stream to the beginning. (Required)
|
|
|
|
int termPosition = -1;
|
|
|
|
while (ts.incrementToken()) {
|
|
|
|
var tokenPositionIncrement = positionIncrementTermAttr.getPositionIncrement();
|
|
|
|
termPosition += tokenPositionIncrement;
|
|
|
|
terms.add(new TermPosition(new Term(field, charTermAttr.getBytesRef()), termPosition));
|
|
|
|
}
|
|
|
|
ts.end(); // Perform end-of-stream operations, e.g. set the final offset.
|
|
|
|
}
|
|
|
|
// Release resources associated with this stream.
|
|
|
|
return terms;
|
|
|
|
}
|
2020-12-07 22:15:18 +01:00
|
|
|
}
|