Improve performance
This commit is contained in:
parent
43439c6f10
commit
ff9ee54857
@ -1,5 +1,6 @@
|
||||
package it.cavallium.dbengine.client;
|
||||
|
||||
import it.cavallium.dbengine.client.query.current.data.NoSort;
|
||||
import it.cavallium.dbengine.client.query.current.data.NumericSort;
|
||||
import it.cavallium.dbengine.client.query.current.data.RandomSort;
|
||||
import it.cavallium.dbengine.client.query.current.data.ScoreSort;
|
||||
@ -66,6 +67,10 @@ public class MultiSort<T> {
|
||||
return new MultiSort<>(ScoreSort.of());
|
||||
}
|
||||
|
||||
public static <T, U> MultiSort<SearchResultItem<T, U>> noScoreNoSortWithValues() {
|
||||
return new MultiSort<>(NoSort.of());
|
||||
}
|
||||
|
||||
public Sort getQuerySort() {
|
||||
return querySort;
|
||||
}
|
||||
|
@ -38,6 +38,7 @@ public final record ClientQueryParams<T>(@Nullable CompositeSnapshot snapshot,
|
||||
case COMPLETE -> ScoreMode.of(false, true);
|
||||
case COMPLETE_NO_SCORES -> ScoreMode.of(false, false);
|
||||
case TOP_SCORES -> ScoreMode.of(true, true);
|
||||
case NO_SCORES -> ScoreMode.of(true, false);
|
||||
//noinspection UnnecessaryDefault
|
||||
default -> throw new IllegalArgumentException();
|
||||
};
|
||||
|
@ -158,7 +158,7 @@ public class QueryParser {
|
||||
} else if (scoreMode.computeScores() && !scoreMode.onlyTopScores()) {
|
||||
return ScoreMode.COMPLETE;
|
||||
} else if (!scoreMode.computeScores() && scoreMode.onlyTopScores()) {
|
||||
throw new IllegalStateException("Conflicting score mode options: [computeScores = false, onlyTopScore = true]");
|
||||
return ScoreMode.TOP_DOCS;
|
||||
} else if (!scoreMode.computeScores() && !scoreMode.onlyTopScores()) {
|
||||
return ScoreMode.COMPLETE_NO_SCORES;
|
||||
} else {
|
||||
|
@ -18,5 +18,11 @@ public enum LLScoreMode {
|
||||
* hits using the {@link Scorer#setMinCompetitiveScore(float)} API.
|
||||
* This can reduce time if using setMinCompetitiveScore.
|
||||
*/
|
||||
TOP_SCORES
|
||||
TOP_SCORES,
|
||||
/**
|
||||
* Produced scorers will allow visiting some matches but scores won't be
|
||||
* available.
|
||||
* Much faster in multi-lucene indices than complete, because it will not need global scores calculation.
|
||||
*/
|
||||
NO_SCORES
|
||||
}
|
||||
|
@ -27,7 +27,9 @@ import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.NoSuchElementException;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
@ -48,6 +50,7 @@ import org.apache.lucene.search.similarities.BooleanSimilarity;
|
||||
import org.apache.lucene.search.similarities.ClassicSimilarity;
|
||||
import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
import org.jetbrains.annotations.Nullable;
|
||||
import org.novasearch.lucene.search.similarities.BM25Similarity;
|
||||
import org.novasearch.lucene.search.similarities.BM25Similarity.BM25Model;
|
||||
@ -55,11 +58,16 @@ import org.novasearch.lucene.search.similarities.LdpSimilarity;
|
||||
import org.novasearch.lucene.search.similarities.LtcSimilarity;
|
||||
import org.novasearch.lucene.search.similarities.RobertsonSimilarity;
|
||||
import org.reactivestreams.Publisher;
|
||||
import org.warp.commonutils.log.Logger;
|
||||
import org.warp.commonutils.log.LoggerFactory;
|
||||
import reactor.core.publisher.Flux;
|
||||
import reactor.core.publisher.Mono;
|
||||
import reactor.core.scheduler.Scheduler;
|
||||
|
||||
public class LuceneUtils {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(LuceneUtils.class);
|
||||
|
||||
private static final Analyzer lucene4GramWordsAnalyzerEdgeInstance = new NCharGramEdgeAnalyzer(true, 4, 4);
|
||||
private static final Analyzer lucene4GramStringAnalyzerEdgeInstance = new NCharGramEdgeAnalyzer(false, 4, 4);
|
||||
private static final Analyzer lucene4GramWordsAnalyzerInstance = new NCharGramAnalyzer(true, 4, 4);
|
||||
@ -168,35 +176,29 @@ public class LuceneUtils {
|
||||
return minCompetitiveScore == null || score >= minCompetitiveScore;
|
||||
}
|
||||
|
||||
@Nullable
|
||||
/**
|
||||
* @throws NoSuchElementException when the key is not found
|
||||
* @throws IOException when an error occurs when reading the document
|
||||
*/
|
||||
@NotNull
|
||||
public static String keyOfTopDoc(int docId, IndexReader indexReader,
|
||||
String keyFieldName) throws IOException {
|
||||
String keyFieldName) throws IOException, NoSuchElementException {
|
||||
if (docId > indexReader.maxDoc()) {
|
||||
throw new IOException("Document " + docId + " > maxDoc (" +indexReader.maxDoc() + ")");
|
||||
}
|
||||
Document d = indexReader.document(docId, Set.of(keyFieldName));
|
||||
if (d.getFields().isEmpty()) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append("The document docId: ").append(docId).append(" is empty.");
|
||||
var realFields = indexReader.document(docId).getFields();
|
||||
if (!realFields.isEmpty()) {
|
||||
sb.append("\n");
|
||||
sb.append("Present fields:\n");
|
||||
boolean first = true;
|
||||
for (IndexableField field : realFields) {
|
||||
if (first) {
|
||||
first = false;
|
||||
} else {
|
||||
sb.append("\n");
|
||||
}
|
||||
sb.append(" - ").append(field.name());
|
||||
}
|
||||
}
|
||||
throw new IOException(sb.toString());
|
||||
throw new NoSuchElementException(
|
||||
"Can't get key (field \"" + keyFieldName + "\") of document docId: " + docId + ". Available fields: []");
|
||||
} else {
|
||||
var field = d.getField(keyFieldName);
|
||||
if (field == null) {
|
||||
throw new IOException("Can't get key of document docId: " + docId);
|
||||
throw new NoSuchElementException(
|
||||
"Can't get key (field \"" + keyFieldName + "\") of document docId: " + docId + ". Available fields: " + d
|
||||
.getFields()
|
||||
.stream()
|
||||
.map(IndexableField::name)
|
||||
.collect(Collectors.joining(",", "[", "]")));
|
||||
} else {
|
||||
return field.stringValue();
|
||||
}
|
||||
@ -353,8 +355,11 @@ public class LuceneUtils {
|
||||
float score = hit.score;
|
||||
var indexSearcher = indexSearchers.shard(shardIndex);
|
||||
try {
|
||||
@Nullable String collectedDoc = keyOfTopDoc(shardDocId, indexSearcher.getIndexReader(), keyFieldName);
|
||||
return new LLKeyScore(shardDocId, score, Mono.justOrEmpty(collectedDoc));
|
||||
String collectedDoc = keyOfTopDoc(shardDocId, indexSearcher.getIndexReader(), keyFieldName);
|
||||
return new LLKeyScore(shardDocId, score, Mono.just(collectedDoc));
|
||||
} catch (NoSuchElementException ex) {
|
||||
logger.debug("Error: document " + shardDocId + " key is not present!");
|
||||
return null;
|
||||
} catch (Exception ex) {
|
||||
return new LLKeyScore(shardDocId, score, Mono.error(ex));
|
||||
}
|
||||
@ -425,4 +430,8 @@ public class LuceneUtils {
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public static int totalHitsThreshold() {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
@ -5,6 +5,6 @@ import org.apache.lucene.search.ScoreDoc;
|
||||
|
||||
public record PaginationInfo(long totalLimit, long firstPageOffset, long firstPageLimit, boolean forceSinglePage) {
|
||||
|
||||
public static final int MAX_SINGLE_SEARCH_LIMIT = 1000;
|
||||
public static final int MAX_SINGLE_SEARCH_LIMIT = 256;
|
||||
public static final int FIRST_PAGE_LIMIT = 10;
|
||||
}
|
||||
|
@ -29,7 +29,7 @@ public class ScoredLuceneMultiSearcher implements LuceneMultiSearcher {
|
||||
}
|
||||
CollectorManager<TopFieldCollector, TopDocs> sharedManager = new ScoringShardsCollectorManager(luceneSort,
|
||||
LuceneUtils.safeLongToInt(paginationInfo.firstPageOffset() + paginationInfo.firstPageLimit()),
|
||||
null, 1000, LuceneUtils.safeLongToInt(paginationInfo.firstPageOffset()),
|
||||
null, LuceneUtils.totalHitsThreshold(), LuceneUtils.safeLongToInt(paginationInfo.firstPageOffset()),
|
||||
LuceneUtils.safeLongToInt(paginationInfo.firstPageLimit()));
|
||||
return new ScoredSimpleLuceneShardSearcher(sharedManager, queryParams.query(), paginationInfo);
|
||||
});
|
||||
|
@ -100,7 +100,7 @@ class ScoredSimpleLuceneShardSearcher implements LuceneShardSearcher {
|
||||
}
|
||||
CollectorManager<TopFieldCollector, TopDocs> sharedManager
|
||||
= new ScoringShardsCollectorManager(luceneSort, s.currentPageLimit(),
|
||||
(FieldDoc) s.last(), 1000, 0, s.currentPageLimit());
|
||||
(FieldDoc) s.last(), LuceneUtils.totalHitsThreshold(), 0, s.currentPageLimit());
|
||||
//noinspection BlockingMethodInNonBlockingContext
|
||||
TopDocs pageTopDocs = Flux
|
||||
.fromIterable(indexSearchersArray)
|
||||
|
@ -9,7 +9,9 @@ import it.cavallium.dbengine.lucene.LuceneUtils;
|
||||
import java.io.IOException;
|
||||
import java.util.Objects;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.search.TopDocsCollector;
|
||||
import reactor.core.publisher.Flux;
|
||||
import reactor.core.publisher.Mono;
|
||||
import reactor.core.scheduler.Scheduler;
|
||||
@ -31,15 +33,19 @@ public class SimpleLuceneLocalSearcher implements LuceneLocalSearcher {
|
||||
} else {
|
||||
paginationInfo = new PaginationInfo(queryParams.limit(), queryParams.offset(), FIRST_PAGE_LIMIT, false);
|
||||
}
|
||||
//noinspection BlockingMethodInNonBlockingContext
|
||||
TopDocs firstPageTopDocs = TopDocsSearcher.getTopDocs(indexSearcher,
|
||||
queryParams.query(),
|
||||
queryParams.sort(),
|
||||
LuceneUtils.safeLongToInt(paginationInfo.firstPageOffset() + paginationInfo.firstPageLimit()),
|
||||
null,
|
||||
queryParams.scoreMode().needsScores(),
|
||||
1000,
|
||||
LuceneUtils.safeLongToInt(paginationInfo.firstPageOffset()), LuceneUtils.safeLongToInt(paginationInfo.firstPageLimit()));
|
||||
TopDocs firstPageTopDocs;
|
||||
{
|
||||
TopDocsCollector<ScoreDoc> firstPageCollector = TopDocsSearcher.getTopDocsCollector(
|
||||
queryParams.sort(),
|
||||
LuceneUtils.safeLongToInt(paginationInfo.firstPageOffset() + paginationInfo.firstPageLimit()),
|
||||
null,
|
||||
LuceneUtils.totalHitsThreshold());
|
||||
//noinspection BlockingMethodInNonBlockingContext
|
||||
indexSearcher.search(queryParams.query(), firstPageCollector);
|
||||
firstPageTopDocs = firstPageCollector.topDocs(LuceneUtils.safeLongToInt(paginationInfo.firstPageOffset()),
|
||||
LuceneUtils.safeLongToInt(paginationInfo.firstPageLimit())
|
||||
);
|
||||
}
|
||||
Flux<LLKeyScore> firstPageMono = LuceneUtils
|
||||
.convertHits(
|
||||
firstPageTopDocs.scoreDocs,
|
||||
@ -61,9 +67,14 @@ public class SimpleLuceneLocalSearcher implements LuceneLocalSearcher {
|
||||
if (s.last() != null && s.remainingLimit() > 0) {
|
||||
TopDocs pageTopDocs;
|
||||
try {
|
||||
TopDocsCollector<ScoreDoc> collector = TopDocsSearcher.getTopDocsCollector(queryParams.sort(),
|
||||
s.currentPageLimit(),
|
||||
s.last(),
|
||||
LuceneUtils.totalHitsThreshold()
|
||||
);
|
||||
//noinspection BlockingMethodInNonBlockingContext
|
||||
pageTopDocs = TopDocsSearcher.getTopDocs(indexSearcher, queryParams.query(),
|
||||
queryParams.sort(), s.currentPageLimit(), s.last(), queryParams.scoreMode().needsScores(), 1000);
|
||||
indexSearcher.search(queryParams.query(), collector);
|
||||
pageTopDocs = collector.topDocs();
|
||||
} catch (IOException e) {
|
||||
sink.error(e);
|
||||
return EMPTY_STATUS;
|
||||
|
@ -1,96 +1,29 @@
|
||||
package it.cavallium.dbengine.lucene.searcher;
|
||||
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.misc.search.DiversifiedTopDocsCollector;
|
||||
import org.apache.lucene.search.BulkScorer;
|
||||
import org.apache.lucene.search.Collector;
|
||||
import org.apache.lucene.search.FieldDoc;
|
||||
import org.apache.lucene.search.HitQueue;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.LeafCollector;
|
||||
import org.apache.lucene.search.MultiCollectorManager.Collectors;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.Scorable;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.ScoreMode;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.search.TopDocsCollector;
|
||||
import org.apache.lucene.search.TopFieldCollector;
|
||||
import org.apache.lucene.search.TopScoreDocCollector;
|
||||
import org.apache.lucene.search.TotalHits.Relation;
|
||||
|
||||
class TopDocsSearcher {
|
||||
|
||||
private final boolean doDocScores;
|
||||
private final IndexSearcher indexSearcher;
|
||||
private final Query query;
|
||||
private final Sort luceneSort;
|
||||
private final int limit;
|
||||
private final FieldDoc after;
|
||||
private final int totalHitsThreshold;
|
||||
|
||||
@Deprecated
|
||||
public TopDocsSearcher(IndexSearcher indexSearcher,
|
||||
Query query,
|
||||
Sort luceneSort,
|
||||
int limit,
|
||||
FieldDoc after,
|
||||
boolean doDocScores,
|
||||
int totalHitsThreshold) {
|
||||
this.indexSearcher = indexSearcher;
|
||||
this.query = query;
|
||||
this.luceneSort = luceneSort;
|
||||
this.limit = limit;
|
||||
this.after = after;
|
||||
this.doDocScores = doDocScores;
|
||||
this.totalHitsThreshold = totalHitsThreshold;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method must not be called more than once!
|
||||
*/
|
||||
@Deprecated
|
||||
public TopDocs getTopDocs(int offset, int limit) throws IOException {
|
||||
return getTopDocs(indexSearcher, query, luceneSort, limit, after, doDocScores, totalHitsThreshold, offset, limit);
|
||||
}
|
||||
|
||||
/**
|
||||
* This method must not be called more than once!
|
||||
*/
|
||||
@Deprecated
|
||||
public TopDocs getTopDocs() throws IOException {
|
||||
return getTopDocs(indexSearcher, query, luceneSort, limit, after, doDocScores, totalHitsThreshold);
|
||||
}
|
||||
|
||||
public static TopDocs getTopDocs(IndexSearcher indexSearcher,
|
||||
Query query,
|
||||
Sort luceneSort,
|
||||
int limit,
|
||||
ScoreDoc after,
|
||||
boolean doDocScores,
|
||||
int totalHitsThreshold,
|
||||
|
||||
int topDocsStartOffset,
|
||||
int topDocsHowMany) throws IOException {
|
||||
TopDocsCollector<?> collector = getTopDocsCollector(luceneSort, limit, after, totalHitsThreshold);
|
||||
indexSearcher.search(query, collector);
|
||||
TopDocs topDocs = collector.topDocs(topDocsStartOffset, topDocsHowMany);
|
||||
if (doDocScores) {
|
||||
TopFieldCollector.populateScores(topDocs.scoreDocs, indexSearcher, query);
|
||||
}
|
||||
return topDocs;
|
||||
}
|
||||
|
||||
public static TopDocs getTopDocs(IndexSearcher indexSearcher,
|
||||
Query query,
|
||||
Sort luceneSort,
|
||||
int limit,
|
||||
ScoreDoc after,
|
||||
boolean doDocScores,
|
||||
int totalHitsThreshold) throws IOException {
|
||||
TopDocsCollector<?> collector = getTopDocsCollector(luceneSort, limit, after, totalHitsThreshold);
|
||||
indexSearcher.search(query, collector);
|
||||
TopDocs topDocs = collector.topDocs();
|
||||
if (doDocScores) {
|
||||
TopFieldCollector.populateScores(topDocs.scoreDocs, indexSearcher, query);
|
||||
}
|
||||
return topDocs;
|
||||
}
|
||||
|
||||
@SuppressWarnings({"unchecked", "rawtypes"})
|
||||
public static TopDocsCollector<ScoreDoc> getTopDocsCollector(Sort luceneSort,
|
||||
int limit,
|
||||
|
@ -24,7 +24,7 @@ public class UnscoredLuceneMultiSearcher implements LuceneMultiSearcher {
|
||||
UnscoredCollectorManager unsortedCollectorManager = new UnscoredCollectorManager(() -> TopDocsSearcher.getTopDocsCollector(queryParams.sort(),
|
||||
LuceneUtils.safeLongToInt(paginationInfo.firstPageOffset() + paginationInfo.firstPageLimit()),
|
||||
null,
|
||||
1000
|
||||
LuceneUtils.totalHitsThreshold()
|
||||
), queryParams.offset(), queryParams.limit(), queryParams.sort());
|
||||
return new UnscoredLuceneShardSearcher(unsortedCollectorManager, queryParams.query(), paginationInfo);
|
||||
});
|
||||
|
@ -92,7 +92,7 @@ class UnscoredLuceneShardSearcher implements LuceneShardSearcher {
|
||||
Query luceneQuery = queryParams.query();
|
||||
UnscoredCollectorManager currentPageUnsortedCollectorManager = new UnscoredCollectorManager(
|
||||
() -> TopDocsSearcher.getTopDocsCollector(queryParams.sort(), s.currentPageLimit(),
|
||||
s.last(), 1000), 0, s.currentPageLimit(), queryParams.sort());
|
||||
s.last(), LuceneUtils.totalHitsThreshold()), 0, s.currentPageLimit(), queryParams.sort());
|
||||
//noinspection BlockingMethodInNonBlockingContext
|
||||
TopDocs pageTopDocs = Flux
|
||||
.fromIterable(indexSearchersArray)
|
||||
|
Loading…
Reference in New Issue
Block a user