CavalliumDBEngine/src/main/java/it/cavallium/dbengine/lucene/serializer/Query.java

98 lines
4.1 KiB
Java
Raw Normal View History

2021-01-30 22:14:48 +01:00
package it.cavallium.dbengine.lucene.serializer;
2020-12-07 22:15:18 +01:00
2021-02-03 13:48:30 +01:00
import static it.cavallium.dbengine.lucene.serializer.QueryParser.USE_PHRASE_QUERY;
2021-02-12 19:39:02 +01:00
import static it.cavallium.dbengine.lucene.serializer.QueryParser.USE_QUERY_BUILDER;
2021-02-03 13:48:30 +01:00
import it.cavallium.dbengine.lucene.LuceneUtils;
import it.cavallium.dbengine.lucene.analyzer.TextFieldsAnalyzer;
2021-02-03 13:48:30 +01:00
import java.io.IOException;
import java.io.StringReader;
import java.util.LinkedList;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.index.Term;
2021-02-12 19:39:02 +01:00
import org.apache.lucene.util.QueryBuilder;
2021-02-03 13:48:30 +01:00
2020-12-07 22:15:18 +01:00
public interface Query extends SerializedQueryObject {
2021-02-12 19:39:02 +01:00
static Query approximateSearch(TextFieldsAnalyzer preferredAnalyzer, String field, String text) {
if (USE_QUERY_BUILDER) {
var qb = new QueryBuilder(LuceneUtils.getAnalyzer(preferredAnalyzer));
var luceneQuery = qb.createMinShouldMatchQuery(field, text, 0.75f);
org.apache.lucene.search.SynonymQuery synonymQuery = (org.apache.lucene.search.SynonymQuery) luceneQuery;
return new SynonymQuery(field,
synonymQuery.getTerms().stream().map(TermQuery::new).toArray(TermQuery[]::new)
);
}
2021-02-03 13:48:30 +01:00
try {
2021-02-12 19:39:02 +01:00
var terms = getTerms(preferredAnalyzer, field, text);
2021-02-03 13:48:30 +01:00
List<BooleanQueryPart> booleanQueryParts = new LinkedList<>();
for (TermPosition term : terms) {
booleanQueryParts.add(new BooleanQueryPart(new TermQuery(term.getTerm()), Occur.MUST));
booleanQueryParts.add(new BooleanQueryPart(new PhraseQuery(terms.toArray(TermPosition[]::new)), Occur.SHOULD));
}
return new BooleanQuery(booleanQueryParts);
} catch (IOException e) {
e.printStackTrace();
2021-02-12 19:39:02 +01:00
return exactSearch(preferredAnalyzer, field, text);
2021-02-03 13:48:30 +01:00
}
}
2021-02-12 19:39:02 +01:00
static Query exactSearch(TextFieldsAnalyzer preferredAnalyzer, String field, String text) {
if (USE_QUERY_BUILDER) {
var qb = new QueryBuilder(LuceneUtils.getAnalyzer(preferredAnalyzer));
var luceneQuery = qb.createPhraseQuery(field, text);
org.apache.lucene.search.SynonymQuery synonymQuery = (org.apache.lucene.search.SynonymQuery) luceneQuery;
return new SynonymQuery(field,
synonymQuery.getTerms().stream().map(TermQuery::new).toArray(TermQuery[]::new)
);
}
2021-02-03 13:48:30 +01:00
try {
2021-02-12 19:39:02 +01:00
var terms = getTerms(preferredAnalyzer, field, text);
2021-02-03 13:48:30 +01:00
if (USE_PHRASE_QUERY) {
return new PhraseQuery(terms.toArray(TermPosition[]::new));
} else {
List<BooleanQueryPart> booleanQueryParts = new LinkedList<>();
for (TermPosition term : terms) {
booleanQueryParts.add(new BooleanQueryPart(new TermQuery(term.getTerm()), Occur.MUST));
}
booleanQueryParts.add(new BooleanQueryPart(new PhraseQuery(terms.toArray(TermPosition[]::new)), Occur.FILTER));
return new BooleanQuery(booleanQueryParts);
}
} catch (IOException exception) {
throw new RuntimeException(exception);
}
}
2021-02-12 19:39:02 +01:00
private static List<TermPosition> getTerms(TextFieldsAnalyzer preferredAnalyzer, String field, String text) throws IOException {
Analyzer analyzer = LuceneUtils.getAnalyzer(preferredAnalyzer);
2021-02-03 13:48:30 +01:00
TokenStream ts = analyzer.tokenStream(field, new StringReader(text));
return getTerms(ts, field);
}
private static List<TermPosition> getTerms(TokenStream ts, String field) throws IOException {
TermToBytesRefAttribute charTermAttr = ts.addAttribute(TermToBytesRefAttribute.class);
PositionIncrementAttribute positionIncrementTermAttr = ts.addAttribute(PositionIncrementAttribute.class);
List<TermPosition> terms = new LinkedList<>();
try (ts) {
ts.reset(); // Resets this stream to the beginning. (Required)
int termPosition = -1;
while (ts.incrementToken()) {
var tokenPositionIncrement = positionIncrementTermAttr.getPositionIncrement();
termPosition += tokenPositionIncrement;
terms.add(new TermPosition(new Term(field, charTermAttr.getBytesRef()), termPosition));
}
ts.end(); // Perform end-of-stream operations, e.g. set the final offset.
}
// Release resources associated with this stream.
return terms;
}
2020-12-07 22:15:18 +01:00
}