Improve performance with UnscoredCollector
This commit is contained in:
parent
ee70ece70f
commit
ea86bf7a43
@ -0,0 +1,154 @@
|
||||
package it.cavallium.dbengine.lucene;
|
||||
|
||||
import static it.cavallium.dbengine.lucene.searcher.PaginationInfo.ALLOW_UNSCORED_PAGINATION_MODE;
|
||||
|
||||
import it.unimi.dsi.fastutil.ints.IntArrayList;
|
||||
import it.unimi.dsi.fastutil.ints.IntList;
|
||||
import it.unimi.dsi.fastutil.ints.IntLists;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.LeafCollector;
|
||||
import org.apache.lucene.search.Scorable;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.ScoreMode;
|
||||
|
||||
import org.apache.lucene.search.SimpleCollector;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.search.TopDocsCollector;
|
||||
import org.jetbrains.annotations.Nullable;
|
||||
|
||||
public class UnscoredCollector extends TopDocsCollector<ScoreDoc> implements LeafCollector {
|
||||
private final IntArrayList docIds = new IntArrayList();
|
||||
private final int limit;
|
||||
private final boolean doAfterDocCalculation;
|
||||
private final int afterDocId;
|
||||
private LeafReaderContext currentLeafReaderContext;
|
||||
|
||||
private boolean isLastElementOrdered = true;
|
||||
private int biggestDocId = -1;
|
||||
private int biggestDocIdIndex;
|
||||
|
||||
public UnscoredCollector(@Nullable Integer afterDocId, int limit) {
|
||||
super(null);
|
||||
if (!ALLOW_UNSCORED_PAGINATION_MODE) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
if (limit <= 0) {
|
||||
throw new IllegalArgumentException();
|
||||
}
|
||||
this.limit = limit;
|
||||
if (afterDocId != null) {
|
||||
this.doAfterDocCalculation = true;
|
||||
this.afterDocId = afterDocId;
|
||||
} else {
|
||||
this.doAfterDocCalculation = false;
|
||||
this.afterDocId = -1;
|
||||
}
|
||||
}
|
||||
|
||||
public UnscoredCollector(@Nullable Integer afterDocId) {
|
||||
super(null);
|
||||
this.limit = -1;
|
||||
if (afterDocId != null) {
|
||||
this.doAfterDocCalculation = true;
|
||||
this.afterDocId = afterDocId;
|
||||
} else {
|
||||
this.doAfterDocCalculation = false;
|
||||
this.afterDocId = -1;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setScorer(Scorable scorable) {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collect(int localDocId) {
|
||||
totalHits++;
|
||||
boolean canCollect;
|
||||
if (limit == -1 || docIds.size() < limit) {
|
||||
if (doAfterDocCalculation) {
|
||||
canCollect = localDocId > (this.afterDocId - currentLeafReaderContext.docBase);
|
||||
} else {
|
||||
canCollect = true;
|
||||
}
|
||||
} else {
|
||||
canCollect = false;
|
||||
}
|
||||
if (canCollect) {
|
||||
int docId = currentLeafReaderContext.docBase + localDocId;
|
||||
if (docIds.add(docId)) {
|
||||
if (docId > biggestDocId) {
|
||||
isLastElementOrdered = true;
|
||||
int docIndex = docIds.size() - 1;
|
||||
biggestDocId = docId;
|
||||
biggestDocIdIndex = docIndex;
|
||||
} else {
|
||||
isLastElementOrdered = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public LeafCollector getLeafCollector(LeafReaderContext leafReaderContext) {
|
||||
this.currentLeafReaderContext = leafReaderContext;
|
||||
return this;
|
||||
}
|
||||
|
||||
public IntList unscoredDocs() {
|
||||
return IntLists.unmodifiable(this.docIds);
|
||||
}
|
||||
|
||||
@Override
|
||||
public ScoreMode scoreMode() {
|
||||
return ScoreMode.COMPLETE_NO_SCORES;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int topDocsSize() {
|
||||
return Math.min(this.totalHits, this.docIds.size());
|
||||
}
|
||||
|
||||
@Override
|
||||
public TopDocs topDocs(int start, int howMany) {
|
||||
int size = this.topDocsSize();
|
||||
if (howMany < 0) {
|
||||
throw new IllegalArgumentException("Number of hits requested must be greater than 0 but value was " + howMany);
|
||||
} else if (start < 0) {
|
||||
throw new IllegalArgumentException("Expected value of starting position is between 0 and " + size + ", got " + start);
|
||||
} else if (start < size && howMany != 0) {
|
||||
howMany = Math.min(size - start, howMany);
|
||||
ScoreDoc[] results = new ScoreDoc[howMany];
|
||||
|
||||
this.populateResults(results, start, howMany);
|
||||
return this.newTopDocs(results, start);
|
||||
} else {
|
||||
return this.newTopDocs((ScoreDoc[])null, start);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected TopDocs newTopDocs(ScoreDoc[] results, int start) {
|
||||
return super.newTopDocs(results, start);
|
||||
}
|
||||
|
||||
private void populateResults(ScoreDoc[] results, int start, int howMany) {
|
||||
int i = 0;
|
||||
for (int docId : docIds.subList(start, start + howMany)) {
|
||||
results[i] = new ScoreDoc(docId, 1.0f);
|
||||
i++;
|
||||
}
|
||||
if (!isLastElementOrdered || start + howMany < docIds.size()) {
|
||||
int lastIndex = results.length - 1;
|
||||
var previousLastDoc = results[lastIndex];
|
||||
var biggestDoc = results[biggestDocIdIndex];
|
||||
results[lastIndex] = biggestDoc;
|
||||
results[biggestDocIdIndex] = previousLastDoc;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void populateResults(ScoreDoc[] results, int howMany) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
}
|
@ -7,4 +7,8 @@ public record PaginationInfo(long totalLimit, long firstPageOffset, long firstPa
|
||||
|
||||
public static final int MAX_SINGLE_SEARCH_LIMIT = 256;
|
||||
public static final int FIRST_PAGE_LIMIT = 10;
|
||||
/**
|
||||
* Use true to allow a custom unscored collector when possible
|
||||
*/
|
||||
public static final boolean ALLOW_UNSCORED_PAGINATION_MODE = true;
|
||||
}
|
||||
|
@ -39,7 +39,8 @@ public class SimpleLuceneLocalSearcher implements LuceneLocalSearcher {
|
||||
queryParams.sort(),
|
||||
LuceneUtils.safeLongToInt(paginationInfo.firstPageOffset() + paginationInfo.firstPageLimit()),
|
||||
null,
|
||||
LuceneUtils.totalHitsThreshold());
|
||||
LuceneUtils.totalHitsThreshold(),
|
||||
queryParams.isScored());
|
||||
//noinspection BlockingMethodInNonBlockingContext
|
||||
indexSearcher.search(queryParams.query(), firstPageCollector);
|
||||
firstPageTopDocs = firstPageCollector.topDocs(LuceneUtils.safeLongToInt(paginationInfo.firstPageOffset()),
|
||||
@ -71,7 +72,8 @@ public class SimpleLuceneLocalSearcher implements LuceneLocalSearcher {
|
||||
TopDocsCollector<ScoreDoc> collector = TopDocsSearcher.getTopDocsCollector(queryParams.sort(),
|
||||
s.currentPageLimit(),
|
||||
s.last(),
|
||||
LuceneUtils.totalHitsThreshold()
|
||||
LuceneUtils.totalHitsThreshold(),
|
||||
queryParams.isScored()
|
||||
);
|
||||
//noinspection BlockingMethodInNonBlockingContext
|
||||
indexSearcher.search(queryParams.query(), collector);
|
||||
|
@ -1,5 +1,8 @@
|
||||
package it.cavallium.dbengine.lucene.searcher;
|
||||
|
||||
import static it.cavallium.dbengine.lucene.searcher.PaginationInfo.ALLOW_UNSCORED_PAGINATION_MODE;
|
||||
|
||||
import it.cavallium.dbengine.lucene.UnscoredCollector;
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
@ -28,15 +31,27 @@ class TopDocsSearcher {
|
||||
public static TopDocsCollector<ScoreDoc> getTopDocsCollector(Sort luceneSort,
|
||||
int limit,
|
||||
ScoreDoc after,
|
||||
int totalHitsThreshold) {
|
||||
int totalHitsThreshold,
|
||||
boolean computeScores) {
|
||||
TopDocsCollector<ScoreDoc> collector;
|
||||
if (luceneSort == null) {
|
||||
if (after == null) {
|
||||
collector = TopScoreDocCollector.create(limit, totalHitsThreshold);
|
||||
if (computeScores || !ALLOW_UNSCORED_PAGINATION_MODE) {
|
||||
collector = TopScoreDocCollector.create(limit, totalHitsThreshold);
|
||||
} else {
|
||||
collector = new UnscoredCollector(null, limit);
|
||||
}
|
||||
} else {
|
||||
collector = TopScoreDocCollector.create(limit, after, totalHitsThreshold);
|
||||
if (computeScores || !ALLOW_UNSCORED_PAGINATION_MODE) {
|
||||
collector = TopScoreDocCollector.create(limit, after, totalHitsThreshold);
|
||||
} else {
|
||||
collector = new UnscoredCollector(after.doc, limit);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (!computeScores) {
|
||||
throw new IllegalArgumentException("ComputeScores must be true if sort is set");
|
||||
}
|
||||
if (after == null) {
|
||||
collector = (TopDocsCollector<ScoreDoc>) (TopDocsCollector) TopFieldCollector.create(luceneSort, limit, totalHitsThreshold);
|
||||
} else if (after instanceof FieldDoc afterFieldDoc) {
|
||||
|
@ -1,6 +1,7 @@
|
||||
package it.cavallium.dbengine.lucene.searcher;
|
||||
|
||||
import static it.cavallium.dbengine.lucene.searcher.CurrentPageInfo.TIE_BREAKER;
|
||||
import static it.cavallium.dbengine.lucene.searcher.PaginationInfo.ALLOW_UNSCORED_PAGINATION_MODE;
|
||||
|
||||
import it.cavallium.dbengine.lucene.LuceneUtils;
|
||||
import java.io.IOException;
|
||||
|
@ -24,7 +24,8 @@ public class UnscoredLuceneMultiSearcher implements LuceneMultiSearcher {
|
||||
UnscoredCollectorManager unsortedCollectorManager = new UnscoredCollectorManager(() -> TopDocsSearcher.getTopDocsCollector(queryParams.sort(),
|
||||
LuceneUtils.safeLongToInt(paginationInfo.firstPageOffset() + paginationInfo.firstPageLimit()),
|
||||
null,
|
||||
LuceneUtils.totalHitsThreshold()
|
||||
LuceneUtils.totalHitsThreshold(),
|
||||
queryParams.isScored()
|
||||
), queryParams.offset(), queryParams.limit(), queryParams.sort());
|
||||
return new UnscoredLuceneShardSearcher(unsortedCollectorManager, queryParams.query(), paginationInfo);
|
||||
});
|
||||
|
@ -92,7 +92,8 @@ class UnscoredLuceneShardSearcher implements LuceneShardSearcher {
|
||||
Query luceneQuery = queryParams.query();
|
||||
UnscoredCollectorManager currentPageUnsortedCollectorManager = new UnscoredCollectorManager(
|
||||
() -> TopDocsSearcher.getTopDocsCollector(queryParams.sort(), s.currentPageLimit(),
|
||||
s.last(), LuceneUtils.totalHitsThreshold()), 0, s.currentPageLimit(), queryParams.sort());
|
||||
s.last(), LuceneUtils.totalHitsThreshold(), queryParams.isScored()),
|
||||
0, s.currentPageLimit(), queryParams.sort());
|
||||
//noinspection BlockingMethodInNonBlockingContext
|
||||
TopDocs pageTopDocs = Flux
|
||||
.fromIterable(indexSearchersArray)
|
||||
|
Loading…
x
Reference in New Issue
Block a user