Optimizations
This commit is contained in:
parent
50b3c897ff
commit
adbbbaa646
@ -2,51 +2,126 @@ package it.cavallium.dbengine.lucene.collector;
|
||||
|
||||
import static it.cavallium.dbengine.lucene.searcher.PaginationInfo.ALLOW_UNSCORED_PAGINATION_MODE;
|
||||
|
||||
import it.cavallium.dbengine.lucene.LuceneUtils;
|
||||
import it.cavallium.dbengine.lucene.collector.UnscoredCollector;
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import org.apache.commons.lang3.NotImplementedException;
|
||||
import org.apache.lucene.search.Collector;
|
||||
import org.apache.lucene.search.CollectorManager;
|
||||
import org.apache.lucene.search.FieldDoc;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.ScoreMode;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.search.TopDocsCollector;
|
||||
import org.apache.lucene.search.TopFieldCollector;
|
||||
import org.apache.lucene.search.TopFieldDocs;
|
||||
import org.apache.lucene.search.TopScoreDocCollector;
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
|
||||
public class OptimizedTopDocsCollector {
|
||||
public class OptimizedTopDocsCollector implements CollectorMultiManager<TopDocs, TopDocs> {
|
||||
|
||||
@SuppressWarnings({"unchecked", "rawtypes"})
|
||||
public static TopDocsCollector<ScoreDoc> create(Sort luceneSort,
|
||||
private final Sort luceneSort;
|
||||
private final int limit;
|
||||
private final ScoreDoc after;
|
||||
private final int totalHitsThreshold;
|
||||
private final boolean allowPagination;
|
||||
private final boolean computeScores;
|
||||
|
||||
private final int topDocsOffset;
|
||||
private final int topDocsCount;
|
||||
|
||||
public OptimizedTopDocsCollector(Sort luceneSort,
|
||||
int limit,
|
||||
ScoreDoc after,
|
||||
int totalHitsThreshold,
|
||||
boolean allowPagination,
|
||||
boolean computeScores) {
|
||||
TopDocsCollector<ScoreDoc> collector;
|
||||
if (after != null && !allowPagination) {
|
||||
throw new IllegalArgumentException("\"allowPagination\" is false, but \"after\" is set");
|
||||
}
|
||||
if (luceneSort == null) {
|
||||
if (after == null) {
|
||||
if (computeScores || allowPagination || !ALLOW_UNSCORED_PAGINATION_MODE) {
|
||||
collector = TopScoreDocCollector.create(limit, totalHitsThreshold);
|
||||
} else {
|
||||
collector = new UnscoredCollector(limit);
|
||||
boolean computeScores,
|
||||
int topDocsOffset,
|
||||
int topDocsCount) {
|
||||
this.luceneSort = luceneSort;
|
||||
this.limit = limit;
|
||||
this.after = after;
|
||||
this.totalHitsThreshold = totalHitsThreshold;
|
||||
this.allowPagination = allowPagination;
|
||||
this.computeScores = computeScores;
|
||||
|
||||
this.topDocsOffset = topDocsOffset;
|
||||
this.topDocsCount = topDocsCount;
|
||||
}
|
||||
|
||||
public CollectorManager<TopDocsCollector<?>, TopDocs> get(@NotNull Query query, IndexSearcher indexSearcher) {
|
||||
return new CollectorManager<>() {
|
||||
@Override
|
||||
public TopDocsCollector<?> newCollector() throws IOException {
|
||||
TopDocsCollector<?> collector;
|
||||
if (after != null && !allowPagination) {
|
||||
throw new IllegalArgumentException("\"allowPagination\" is false, but \"after\" is set");
|
||||
}
|
||||
} else {
|
||||
collector = TopScoreDocCollector.create(limit, after, totalHitsThreshold);
|
||||
if (luceneSort == null) {
|
||||
if (after == null) {
|
||||
if (computeScores || allowPagination || !ALLOW_UNSCORED_PAGINATION_MODE) {
|
||||
collector = TopScoreDocCollector.create(limit, totalHitsThreshold);
|
||||
} else {
|
||||
collector = new UnscoredCollector(limit);
|
||||
}
|
||||
} else {
|
||||
collector = TopScoreDocCollector.create(limit, after, totalHitsThreshold);
|
||||
}
|
||||
} else {
|
||||
if (after == null) {
|
||||
collector = TopFieldCollector.create(luceneSort, limit, totalHitsThreshold);
|
||||
} else if (after instanceof FieldDoc afterFieldDoc) {
|
||||
collector = TopFieldCollector.create(luceneSort, limit, afterFieldDoc, totalHitsThreshold);
|
||||
} else {
|
||||
throw new UnsupportedOperationException("GetTopDocs with \"luceneSort\" != null requires \"after\" to be a FieldDoc");
|
||||
}
|
||||
}
|
||||
return collector;
|
||||
}
|
||||
|
||||
@Override
|
||||
public TopDocs reduce(Collection<TopDocsCollector<?>> collectors) throws IOException {
|
||||
TopDocs[] docsArray;
|
||||
boolean needsSort = luceneSort != null;
|
||||
boolean needsScores = luceneSort != null && luceneSort.needsScores();
|
||||
if (needsSort) {
|
||||
docsArray = new TopFieldDocs[collectors.size()];
|
||||
} else {
|
||||
docsArray = new TopDocs[collectors.size()];
|
||||
}
|
||||
int i = 0;
|
||||
for (TopDocsCollector<?> collector : collectors) {
|
||||
docsArray[i] = collector.topDocs();
|
||||
i++;
|
||||
}
|
||||
var merged = LuceneUtils.mergeTopDocs(luceneSort, null, null, docsArray);
|
||||
if (needsScores) {
|
||||
TopFieldCollector.populateScores(merged.scoreDocs, indexSearcher, query);
|
||||
}
|
||||
return merged;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public ScoreMode scoreMode() {
|
||||
throw new NotImplementedException();
|
||||
}
|
||||
|
||||
@SuppressWarnings({"SuspiciousToArrayCall", "IfStatementWithIdenticalBranches"})
|
||||
@Override
|
||||
public TopDocs reduce(List<TopDocs> topDocs) {
|
||||
TopDocs[] arr;
|
||||
if (luceneSort != null) {
|
||||
arr = topDocs.toArray(TopFieldDocs[]::new);
|
||||
} else {
|
||||
if (after == null) {
|
||||
collector = (TopDocsCollector<ScoreDoc>) (TopDocsCollector) TopFieldCollector.create(luceneSort, limit, totalHitsThreshold);
|
||||
} else if (after instanceof FieldDoc afterFieldDoc) {
|
||||
collector = (TopDocsCollector<ScoreDoc>) (TopDocsCollector) TopFieldCollector.create(luceneSort, limit, afterFieldDoc, totalHitsThreshold);
|
||||
} else {
|
||||
throw new UnsupportedOperationException("GetTopDocs with \"luceneSort\" != null requires \"after\" to be a FieldDoc");
|
||||
}
|
||||
arr = topDocs.toArray(TopDocs[]::new);
|
||||
}
|
||||
return collector;
|
||||
return LuceneUtils.mergeTopDocs(luceneSort, topDocsOffset, topDocsCount, arr);
|
||||
}
|
||||
}
|
||||
|
@ -32,8 +32,6 @@ public class ScoringShardsCollectorMultiManager implements CollectorMultiManager
|
||||
private final int totalHitsThreshold;
|
||||
private final @Nullable Integer startN;
|
||||
private final @Nullable Integer topN;
|
||||
private final @Nullable Integer internalStartN;
|
||||
private final @Nullable Integer internalTopN;
|
||||
private final CollectorManager<TopFieldCollector, TopFieldDocs> sharedCollectorManager;
|
||||
|
||||
public ScoringShardsCollectorMultiManager(Query query,
|
||||
@ -83,22 +81,6 @@ public class ScoringShardsCollectorMultiManager implements CollectorMultiManager
|
||||
} else {
|
||||
this.topN = topN;
|
||||
}
|
||||
if (this.topN != null && this.startN != null) {
|
||||
if (this.topN >= 2147483630) {
|
||||
this.internalTopN = this.topN;
|
||||
} else {
|
||||
this.internalTopN = this.startN + this.topN;
|
||||
}
|
||||
} else if (this.topN == null && this.startN != null) {
|
||||
this.internalTopN = null;
|
||||
} else {
|
||||
this.internalTopN = this.topN;
|
||||
}
|
||||
if (this.internalTopN != null) {
|
||||
this.internalStartN = 0;
|
||||
} else {
|
||||
this.internalStartN = null;
|
||||
}
|
||||
this.sharedCollectorManager = TopFieldCollector.createSharedManager(sort == null ? Sort.RELEVANCE : sort, numHits, after, totalHitsThreshold);
|
||||
}
|
||||
|
||||
@ -120,7 +102,7 @@ public class ScoringShardsCollectorMultiManager implements CollectorMultiManager
|
||||
for (TopFieldCollector collector : collectors) {
|
||||
topDocs[i++] = collector.topDocs();
|
||||
}
|
||||
var result = LuceneUtils.mergeTopDocs(sort, 0, numHits, topDocs);
|
||||
var result = LuceneUtils.mergeTopDocs(sort, null, null, topDocs);
|
||||
|
||||
if (sort != null && sort.needsScores()) {
|
||||
TopFieldCollector.populateScores(result.scoreDocs, indexSearcher, query);
|
||||
@ -128,9 +110,9 @@ public class ScoringShardsCollectorMultiManager implements CollectorMultiManager
|
||||
|
||||
return result;
|
||||
} else {
|
||||
TopDocs result;
|
||||
TopDocs[] topDocs;
|
||||
if (sort != null) {
|
||||
TopFieldDocs[] topDocs = new TopFieldDocs[collectors.size()];
|
||||
topDocs = new TopFieldDocs[collectors.size()];
|
||||
var i = 0;
|
||||
for (TopFieldCollector collector : collectors) {
|
||||
topDocs[i] = collector.topDocs();
|
||||
@ -145,9 +127,8 @@ public class ScoringShardsCollectorMultiManager implements CollectorMultiManager
|
||||
}
|
||||
i++;
|
||||
}
|
||||
result = LuceneUtils.mergeTopDocs(sort, internalStartN, internalTopN, topDocs);
|
||||
} else {
|
||||
TopDocs[] topDocs = new TopDocs[collectors.size()];
|
||||
topDocs = new TopDocs[collectors.size()];
|
||||
var i = 0;
|
||||
for (TopFieldCollector collector : collectors) {
|
||||
topDocs[i] = collector.topDocs();
|
||||
@ -156,9 +137,8 @@ public class ScoringShardsCollectorMultiManager implements CollectorMultiManager
|
||||
}
|
||||
i++;
|
||||
}
|
||||
result = LuceneUtils.mergeTopDocs(null, internalStartN, internalTopN, topDocs);
|
||||
}
|
||||
return result;
|
||||
return LuceneUtils.mergeTopDocs(sort, null, null, topDocs);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
@ -184,25 +184,13 @@ public class PagedLocalSearcher implements LocalSearcher {
|
||||
} else if (s.pageIndex() == 0 || (s.last() != null && s.remainingLimit() > 0)) {
|
||||
TopDocs pageTopDocs;
|
||||
try {
|
||||
TopDocsCollector<ScoreDoc> collector = OptimizedTopDocsCollector.create(queryParams.sort(),
|
||||
var cmm = new OptimizedTopDocsCollector(queryParams.sort(),
|
||||
currentPageLimit, s.last(), queryParams.getTotalHitsThresholdInt(),
|
||||
allowPagination, queryParams.needsScores());
|
||||
assert queryParams.complete() == collector.scoreMode().isExhaustive();
|
||||
assert currentPageLimit < Integer.MAX_VALUE || queryParams
|
||||
.getScoreModeOptional()
|
||||
.map(scoreMode -> scoreMode == collector.scoreMode())
|
||||
.orElse(true);
|
||||
allowPagination, queryParams.needsScores(), resultsOffset, currentPageLimit);
|
||||
|
||||
indexSearchers.get(0).search(queryParams.query(), collector);
|
||||
if (resultsOffset > 0) {
|
||||
pageTopDocs = collector.topDocs(resultsOffset, currentPageLimit);
|
||||
} else {
|
||||
pageTopDocs = collector.topDocs();
|
||||
}
|
||||
// Populate scores of topfieldcollector. By default it doesn't popupate the scores
|
||||
if (queryParams.needsScores() && ((Collector) collector) instanceof TopFieldCollector) {
|
||||
TopFieldCollector.populateScores(pageTopDocs.scoreDocs, indexSearchers.get(0), queryParams.query());
|
||||
}
|
||||
pageTopDocs = cmm.reduce(List.of(indexSearchers
|
||||
.get(0)
|
||||
.search(queryParams.query(), cmm.get(queryParams.query(), indexSearchers.get(0)))));
|
||||
} catch (IOException e) {
|
||||
sink.error(e);
|
||||
return EMPTY_STATUS;
|
||||
|
Loading…
x
Reference in New Issue
Block a user