Improve performance with UnscoredCollector

This commit is contained in:
Andrea Cavalli 2021-07-26 19:21:17 +02:00
parent ee70ece70f
commit ea86bf7a43
7 changed files with 185 additions and 7 deletions

View File

@ -0,0 +1,154 @@
package it.cavallium.dbengine.lucene;
import static it.cavallium.dbengine.lucene.searcher.PaginationInfo.ALLOW_UNSCORED_PAGINATION_MODE;
import it.unimi.dsi.fastutil.ints.IntArrayList;
import it.unimi.dsi.fastutil.ints.IntList;
import it.unimi.dsi.fastutil.ints.IntLists;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.LeafCollector;
import org.apache.lucene.search.Scorable;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.SimpleCollector;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopDocsCollector;
import org.jetbrains.annotations.Nullable;
public class UnscoredCollector extends TopDocsCollector<ScoreDoc> implements LeafCollector {
private final IntArrayList docIds = new IntArrayList();
private final int limit;
private final boolean doAfterDocCalculation;
private final int afterDocId;
private LeafReaderContext currentLeafReaderContext;
private boolean isLastElementOrdered = true;
private int biggestDocId = -1;
private int biggestDocIdIndex;
public UnscoredCollector(@Nullable Integer afterDocId, int limit) {
super(null);
if (!ALLOW_UNSCORED_PAGINATION_MODE) {
throw new UnsupportedOperationException();
}
if (limit <= 0) {
throw new IllegalArgumentException();
}
this.limit = limit;
if (afterDocId != null) {
this.doAfterDocCalculation = true;
this.afterDocId = afterDocId;
} else {
this.doAfterDocCalculation = false;
this.afterDocId = -1;
}
}
public UnscoredCollector(@Nullable Integer afterDocId) {
super(null);
this.limit = -1;
if (afterDocId != null) {
this.doAfterDocCalculation = true;
this.afterDocId = afterDocId;
} else {
this.doAfterDocCalculation = false;
this.afterDocId = -1;
}
}
@Override
public void setScorer(Scorable scorable) {
}
@Override
public void collect(int localDocId) {
totalHits++;
boolean canCollect;
if (limit == -1 || docIds.size() < limit) {
if (doAfterDocCalculation) {
canCollect = localDocId > (this.afterDocId - currentLeafReaderContext.docBase);
} else {
canCollect = true;
}
} else {
canCollect = false;
}
if (canCollect) {
int docId = currentLeafReaderContext.docBase + localDocId;
if (docIds.add(docId)) {
if (docId > biggestDocId) {
isLastElementOrdered = true;
int docIndex = docIds.size() - 1;
biggestDocId = docId;
biggestDocIdIndex = docIndex;
} else {
isLastElementOrdered = false;
}
}
}
}
@Override
public LeafCollector getLeafCollector(LeafReaderContext leafReaderContext) {
this.currentLeafReaderContext = leafReaderContext;
return this;
}
public IntList unscoredDocs() {
return IntLists.unmodifiable(this.docIds);
}
@Override
public ScoreMode scoreMode() {
return ScoreMode.COMPLETE_NO_SCORES;
}
@Override
protected int topDocsSize() {
return Math.min(this.totalHits, this.docIds.size());
}
@Override
public TopDocs topDocs(int start, int howMany) {
int size = this.topDocsSize();
if (howMany < 0) {
throw new IllegalArgumentException("Number of hits requested must be greater than 0 but value was " + howMany);
} else if (start < 0) {
throw new IllegalArgumentException("Expected value of starting position is between 0 and " + size + ", got " + start);
} else if (start < size && howMany != 0) {
howMany = Math.min(size - start, howMany);
ScoreDoc[] results = new ScoreDoc[howMany];
this.populateResults(results, start, howMany);
return this.newTopDocs(results, start);
} else {
return this.newTopDocs((ScoreDoc[])null, start);
}
}
@Override
protected TopDocs newTopDocs(ScoreDoc[] results, int start) {
return super.newTopDocs(results, start);
}
private void populateResults(ScoreDoc[] results, int start, int howMany) {
int i = 0;
for (int docId : docIds.subList(start, start + howMany)) {
results[i] = new ScoreDoc(docId, 1.0f);
i++;
}
if (!isLastElementOrdered || start + howMany < docIds.size()) {
int lastIndex = results.length - 1;
var previousLastDoc = results[lastIndex];
var biggestDoc = results[biggestDocIdIndex];
results[lastIndex] = biggestDoc;
results[biggestDocIdIndex] = previousLastDoc;
}
}
@Override
protected void populateResults(ScoreDoc[] results, int howMany) {
throw new UnsupportedOperationException();
}
}

View File

@ -7,4 +7,8 @@ public record PaginationInfo(long totalLimit, long firstPageOffset, long firstPa
public static final int MAX_SINGLE_SEARCH_LIMIT = 256;
public static final int FIRST_PAGE_LIMIT = 10;
/**
* Use true to allow a custom unscored collector when possible
*/
public static final boolean ALLOW_UNSCORED_PAGINATION_MODE = true;
}

View File

@ -39,7 +39,8 @@ public class SimpleLuceneLocalSearcher implements LuceneLocalSearcher {
queryParams.sort(),
LuceneUtils.safeLongToInt(paginationInfo.firstPageOffset() + paginationInfo.firstPageLimit()),
null,
LuceneUtils.totalHitsThreshold());
LuceneUtils.totalHitsThreshold(),
queryParams.isScored());
//noinspection BlockingMethodInNonBlockingContext
indexSearcher.search(queryParams.query(), firstPageCollector);
firstPageTopDocs = firstPageCollector.topDocs(LuceneUtils.safeLongToInt(paginationInfo.firstPageOffset()),
@ -71,7 +72,8 @@ public class SimpleLuceneLocalSearcher implements LuceneLocalSearcher {
TopDocsCollector<ScoreDoc> collector = TopDocsSearcher.getTopDocsCollector(queryParams.sort(),
s.currentPageLimit(),
s.last(),
LuceneUtils.totalHitsThreshold()
LuceneUtils.totalHitsThreshold(),
queryParams.isScored()
);
//noinspection BlockingMethodInNonBlockingContext
indexSearcher.search(queryParams.query(), collector);

View File

@ -1,5 +1,8 @@
package it.cavallium.dbengine.lucene.searcher;
import static it.cavallium.dbengine.lucene.searcher.PaginationInfo.ALLOW_UNSCORED_PAGINATION_MODE;
import it.cavallium.dbengine.lucene.UnscoredCollector;
import java.io.IOException;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
@ -28,15 +31,27 @@ class TopDocsSearcher {
public static TopDocsCollector<ScoreDoc> getTopDocsCollector(Sort luceneSort,
int limit,
ScoreDoc after,
int totalHitsThreshold) {
int totalHitsThreshold,
boolean computeScores) {
TopDocsCollector<ScoreDoc> collector;
if (luceneSort == null) {
if (after == null) {
collector = TopScoreDocCollector.create(limit, totalHitsThreshold);
if (computeScores || !ALLOW_UNSCORED_PAGINATION_MODE) {
collector = TopScoreDocCollector.create(limit, totalHitsThreshold);
} else {
collector = new UnscoredCollector(null, limit);
}
} else {
collector = TopScoreDocCollector.create(limit, after, totalHitsThreshold);
if (computeScores || !ALLOW_UNSCORED_PAGINATION_MODE) {
collector = TopScoreDocCollector.create(limit, after, totalHitsThreshold);
} else {
collector = new UnscoredCollector(after.doc, limit);
}
}
} else {
if (!computeScores) {
throw new IllegalArgumentException("ComputeScores must be true if sort is set");
}
if (after == null) {
collector = (TopDocsCollector<ScoreDoc>) (TopDocsCollector) TopFieldCollector.create(luceneSort, limit, totalHitsThreshold);
} else if (after instanceof FieldDoc afterFieldDoc) {

View File

@ -1,6 +1,7 @@
package it.cavallium.dbengine.lucene.searcher;
import static it.cavallium.dbengine.lucene.searcher.CurrentPageInfo.TIE_BREAKER;
import static it.cavallium.dbengine.lucene.searcher.PaginationInfo.ALLOW_UNSCORED_PAGINATION_MODE;
import it.cavallium.dbengine.lucene.LuceneUtils;
import java.io.IOException;

View File

@ -24,7 +24,8 @@ public class UnscoredLuceneMultiSearcher implements LuceneMultiSearcher {
UnscoredCollectorManager unsortedCollectorManager = new UnscoredCollectorManager(() -> TopDocsSearcher.getTopDocsCollector(queryParams.sort(),
LuceneUtils.safeLongToInt(paginationInfo.firstPageOffset() + paginationInfo.firstPageLimit()),
null,
LuceneUtils.totalHitsThreshold()
LuceneUtils.totalHitsThreshold(),
queryParams.isScored()
), queryParams.offset(), queryParams.limit(), queryParams.sort());
return new UnscoredLuceneShardSearcher(unsortedCollectorManager, queryParams.query(), paginationInfo);
});

View File

@ -92,7 +92,8 @@ class UnscoredLuceneShardSearcher implements LuceneShardSearcher {
Query luceneQuery = queryParams.query();
UnscoredCollectorManager currentPageUnsortedCollectorManager = new UnscoredCollectorManager(
() -> TopDocsSearcher.getTopDocsCollector(queryParams.sort(), s.currentPageLimit(),
s.last(), LuceneUtils.totalHitsThreshold()), 0, s.currentPageLimit(), queryParams.sort());
s.last(), LuceneUtils.totalHitsThreshold(), queryParams.isScored()),
0, s.currentPageLimit(), queryParams.sort());
//noinspection BlockingMethodInNonBlockingContext
TopDocs pageTopDocs = Flux
.fromIterable(indexSearchersArray)