Optimizations

This commit is contained in:
Andrea Cavalli 2021-11-09 01:13:47 +01:00
parent 50b3c897ff
commit adbbbaa646
3 changed files with 109 additions and 66 deletions

View File

@ -2,51 +2,126 @@ package it.cavallium.dbengine.lucene.collector;
import static it.cavallium.dbengine.lucene.searcher.PaginationInfo.ALLOW_UNSCORED_PAGINATION_MODE;
import it.cavallium.dbengine.lucene.LuceneUtils;
import it.cavallium.dbengine.lucene.collector.UnscoredCollector;
import java.io.IOException;
import java.util.Collection;
import java.util.List;
import org.apache.commons.lang3.NotImplementedException;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.CollectorManager;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopDocsCollector;
import org.apache.lucene.search.TopFieldCollector;
import org.apache.lucene.search.TopFieldDocs;
import org.apache.lucene.search.TopScoreDocCollector;
import org.jetbrains.annotations.NotNull;
public class OptimizedTopDocsCollector {
public class OptimizedTopDocsCollector implements CollectorMultiManager<TopDocs, TopDocs> {
@SuppressWarnings({"unchecked", "rawtypes"})
public static TopDocsCollector<ScoreDoc> create(Sort luceneSort,
private final Sort luceneSort;
private final int limit;
private final ScoreDoc after;
private final int totalHitsThreshold;
private final boolean allowPagination;
private final boolean computeScores;
private final int topDocsOffset;
private final int topDocsCount;
public OptimizedTopDocsCollector(Sort luceneSort,
int limit,
ScoreDoc after,
int totalHitsThreshold,
boolean allowPagination,
boolean computeScores) {
TopDocsCollector<ScoreDoc> collector;
if (after != null && !allowPagination) {
throw new IllegalArgumentException("\"allowPagination\" is false, but \"after\" is set");
}
if (luceneSort == null) {
if (after == null) {
if (computeScores || allowPagination || !ALLOW_UNSCORED_PAGINATION_MODE) {
collector = TopScoreDocCollector.create(limit, totalHitsThreshold);
} else {
collector = new UnscoredCollector(limit);
boolean computeScores,
int topDocsOffset,
int topDocsCount) {
this.luceneSort = luceneSort;
this.limit = limit;
this.after = after;
this.totalHitsThreshold = totalHitsThreshold;
this.allowPagination = allowPagination;
this.computeScores = computeScores;
this.topDocsOffset = topDocsOffset;
this.topDocsCount = topDocsCount;
}
public CollectorManager<TopDocsCollector<?>, TopDocs> get(@NotNull Query query, IndexSearcher indexSearcher) {
return new CollectorManager<>() {
@Override
public TopDocsCollector<?> newCollector() throws IOException {
TopDocsCollector<?> collector;
if (after != null && !allowPagination) {
throw new IllegalArgumentException("\"allowPagination\" is false, but \"after\" is set");
}
} else {
collector = TopScoreDocCollector.create(limit, after, totalHitsThreshold);
if (luceneSort == null) {
if (after == null) {
if (computeScores || allowPagination || !ALLOW_UNSCORED_PAGINATION_MODE) {
collector = TopScoreDocCollector.create(limit, totalHitsThreshold);
} else {
collector = new UnscoredCollector(limit);
}
} else {
collector = TopScoreDocCollector.create(limit, after, totalHitsThreshold);
}
} else {
if (after == null) {
collector = TopFieldCollector.create(luceneSort, limit, totalHitsThreshold);
} else if (after instanceof FieldDoc afterFieldDoc) {
collector = TopFieldCollector.create(luceneSort, limit, afterFieldDoc, totalHitsThreshold);
} else {
throw new UnsupportedOperationException("GetTopDocs with \"luceneSort\" != null requires \"after\" to be a FieldDoc");
}
}
return collector;
}
@Override
public TopDocs reduce(Collection<TopDocsCollector<?>> collectors) throws IOException {
TopDocs[] docsArray;
boolean needsSort = luceneSort != null;
boolean needsScores = luceneSort != null && luceneSort.needsScores();
if (needsSort) {
docsArray = new TopFieldDocs[collectors.size()];
} else {
docsArray = new TopDocs[collectors.size()];
}
int i = 0;
for (TopDocsCollector<?> collector : collectors) {
docsArray[i] = collector.topDocs();
i++;
}
var merged = LuceneUtils.mergeTopDocs(luceneSort, null, null, docsArray);
if (needsScores) {
TopFieldCollector.populateScores(merged.scoreDocs, indexSearcher, query);
}
return merged;
}
};
}
@Override
public ScoreMode scoreMode() {
throw new NotImplementedException();
}
@SuppressWarnings({"SuspiciousToArrayCall", "IfStatementWithIdenticalBranches"})
@Override
public TopDocs reduce(List<TopDocs> topDocs) {
TopDocs[] arr;
if (luceneSort != null) {
arr = topDocs.toArray(TopFieldDocs[]::new);
} else {
if (after == null) {
collector = (TopDocsCollector<ScoreDoc>) (TopDocsCollector) TopFieldCollector.create(luceneSort, limit, totalHitsThreshold);
} else if (after instanceof FieldDoc afterFieldDoc) {
collector = (TopDocsCollector<ScoreDoc>) (TopDocsCollector) TopFieldCollector.create(luceneSort, limit, afterFieldDoc, totalHitsThreshold);
} else {
throw new UnsupportedOperationException("GetTopDocs with \"luceneSort\" != null requires \"after\" to be a FieldDoc");
}
arr = topDocs.toArray(TopDocs[]::new);
}
return collector;
return LuceneUtils.mergeTopDocs(luceneSort, topDocsOffset, topDocsCount, arr);
}
}

View File

@ -32,8 +32,6 @@ public class ScoringShardsCollectorMultiManager implements CollectorMultiManager
private final int totalHitsThreshold;
private final @Nullable Integer startN;
private final @Nullable Integer topN;
private final @Nullable Integer internalStartN;
private final @Nullable Integer internalTopN;
private final CollectorManager<TopFieldCollector, TopFieldDocs> sharedCollectorManager;
public ScoringShardsCollectorMultiManager(Query query,
@ -83,22 +81,6 @@ public class ScoringShardsCollectorMultiManager implements CollectorMultiManager
} else {
this.topN = topN;
}
if (this.topN != null && this.startN != null) {
if (this.topN >= 2147483630) {
this.internalTopN = this.topN;
} else {
this.internalTopN = this.startN + this.topN;
}
} else if (this.topN == null && this.startN != null) {
this.internalTopN = null;
} else {
this.internalTopN = this.topN;
}
if (this.internalTopN != null) {
this.internalStartN = 0;
} else {
this.internalStartN = null;
}
this.sharedCollectorManager = TopFieldCollector.createSharedManager(sort == null ? Sort.RELEVANCE : sort, numHits, after, totalHitsThreshold);
}
@ -120,7 +102,7 @@ public class ScoringShardsCollectorMultiManager implements CollectorMultiManager
for (TopFieldCollector collector : collectors) {
topDocs[i++] = collector.topDocs();
}
var result = LuceneUtils.mergeTopDocs(sort, 0, numHits, topDocs);
var result = LuceneUtils.mergeTopDocs(sort, null, null, topDocs);
if (sort != null && sort.needsScores()) {
TopFieldCollector.populateScores(result.scoreDocs, indexSearcher, query);
@ -128,9 +110,9 @@ public class ScoringShardsCollectorMultiManager implements CollectorMultiManager
return result;
} else {
TopDocs result;
TopDocs[] topDocs;
if (sort != null) {
TopFieldDocs[] topDocs = new TopFieldDocs[collectors.size()];
topDocs = new TopFieldDocs[collectors.size()];
var i = 0;
for (TopFieldCollector collector : collectors) {
topDocs[i] = collector.topDocs();
@ -145,9 +127,8 @@ public class ScoringShardsCollectorMultiManager implements CollectorMultiManager
}
i++;
}
result = LuceneUtils.mergeTopDocs(sort, internalStartN, internalTopN, topDocs);
} else {
TopDocs[] topDocs = new TopDocs[collectors.size()];
topDocs = new TopDocs[collectors.size()];
var i = 0;
for (TopFieldCollector collector : collectors) {
topDocs[i] = collector.topDocs();
@ -156,9 +137,8 @@ public class ScoringShardsCollectorMultiManager implements CollectorMultiManager
}
i++;
}
result = LuceneUtils.mergeTopDocs(null, internalStartN, internalTopN, topDocs);
}
return result;
return LuceneUtils.mergeTopDocs(sort, null, null, topDocs);
}
}
};

View File

@ -184,25 +184,13 @@ public class PagedLocalSearcher implements LocalSearcher {
} else if (s.pageIndex() == 0 || (s.last() != null && s.remainingLimit() > 0)) {
TopDocs pageTopDocs;
try {
TopDocsCollector<ScoreDoc> collector = OptimizedTopDocsCollector.create(queryParams.sort(),
var cmm = new OptimizedTopDocsCollector(queryParams.sort(),
currentPageLimit, s.last(), queryParams.getTotalHitsThresholdInt(),
allowPagination, queryParams.needsScores());
assert queryParams.complete() == collector.scoreMode().isExhaustive();
assert currentPageLimit < Integer.MAX_VALUE || queryParams
.getScoreModeOptional()
.map(scoreMode -> scoreMode == collector.scoreMode())
.orElse(true);
allowPagination, queryParams.needsScores(), resultsOffset, currentPageLimit);
indexSearchers.get(0).search(queryParams.query(), collector);
if (resultsOffset > 0) {
pageTopDocs = collector.topDocs(resultsOffset, currentPageLimit);
} else {
pageTopDocs = collector.topDocs();
}
// Populate scores of topfieldcollector. By default it doesn't popupate the scores
if (queryParams.needsScores() && ((Collector) collector) instanceof TopFieldCollector) {
TopFieldCollector.populateScores(pageTopDocs.scoreDocs, indexSearchers.get(0), queryParams.query());
}
pageTopDocs = cmm.reduce(List.of(indexSearchers
.get(0)
.search(queryParams.query(), cmm.get(queryParams.query(), indexSearchers.get(0)))));
} catch (IOException e) {
sink.error(e);
return EMPTY_STATUS;