Improve performance with UnscoredCollector
This commit is contained in:
parent
ee70ece70f
commit
ea86bf7a43
@ -0,0 +1,154 @@
|
|||||||
|
package it.cavallium.dbengine.lucene;
|
||||||
|
|
||||||
|
import static it.cavallium.dbengine.lucene.searcher.PaginationInfo.ALLOW_UNSCORED_PAGINATION_MODE;
|
||||||
|
|
||||||
|
import it.unimi.dsi.fastutil.ints.IntArrayList;
|
||||||
|
import it.unimi.dsi.fastutil.ints.IntList;
|
||||||
|
import it.unimi.dsi.fastutil.ints.IntLists;
|
||||||
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
|
import org.apache.lucene.search.LeafCollector;
|
||||||
|
import org.apache.lucene.search.Scorable;
|
||||||
|
import org.apache.lucene.search.ScoreDoc;
|
||||||
|
import org.apache.lucene.search.ScoreMode;
|
||||||
|
|
||||||
|
import org.apache.lucene.search.SimpleCollector;
|
||||||
|
import org.apache.lucene.search.TopDocs;
|
||||||
|
import org.apache.lucene.search.TopDocsCollector;
|
||||||
|
import org.jetbrains.annotations.Nullable;
|
||||||
|
|
||||||
|
public class UnscoredCollector extends TopDocsCollector<ScoreDoc> implements LeafCollector {
|
||||||
|
private final IntArrayList docIds = new IntArrayList();
|
||||||
|
private final int limit;
|
||||||
|
private final boolean doAfterDocCalculation;
|
||||||
|
private final int afterDocId;
|
||||||
|
private LeafReaderContext currentLeafReaderContext;
|
||||||
|
|
||||||
|
private boolean isLastElementOrdered = true;
|
||||||
|
private int biggestDocId = -1;
|
||||||
|
private int biggestDocIdIndex;
|
||||||
|
|
||||||
|
public UnscoredCollector(@Nullable Integer afterDocId, int limit) {
|
||||||
|
super(null);
|
||||||
|
if (!ALLOW_UNSCORED_PAGINATION_MODE) {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
if (limit <= 0) {
|
||||||
|
throw new IllegalArgumentException();
|
||||||
|
}
|
||||||
|
this.limit = limit;
|
||||||
|
if (afterDocId != null) {
|
||||||
|
this.doAfterDocCalculation = true;
|
||||||
|
this.afterDocId = afterDocId;
|
||||||
|
} else {
|
||||||
|
this.doAfterDocCalculation = false;
|
||||||
|
this.afterDocId = -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public UnscoredCollector(@Nullable Integer afterDocId) {
|
||||||
|
super(null);
|
||||||
|
this.limit = -1;
|
||||||
|
if (afterDocId != null) {
|
||||||
|
this.doAfterDocCalculation = true;
|
||||||
|
this.afterDocId = afterDocId;
|
||||||
|
} else {
|
||||||
|
this.doAfterDocCalculation = false;
|
||||||
|
this.afterDocId = -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setScorer(Scorable scorable) {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void collect(int localDocId) {
|
||||||
|
totalHits++;
|
||||||
|
boolean canCollect;
|
||||||
|
if (limit == -1 || docIds.size() < limit) {
|
||||||
|
if (doAfterDocCalculation) {
|
||||||
|
canCollect = localDocId > (this.afterDocId - currentLeafReaderContext.docBase);
|
||||||
|
} else {
|
||||||
|
canCollect = true;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
canCollect = false;
|
||||||
|
}
|
||||||
|
if (canCollect) {
|
||||||
|
int docId = currentLeafReaderContext.docBase + localDocId;
|
||||||
|
if (docIds.add(docId)) {
|
||||||
|
if (docId > biggestDocId) {
|
||||||
|
isLastElementOrdered = true;
|
||||||
|
int docIndex = docIds.size() - 1;
|
||||||
|
biggestDocId = docId;
|
||||||
|
biggestDocIdIndex = docIndex;
|
||||||
|
} else {
|
||||||
|
isLastElementOrdered = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public LeafCollector getLeafCollector(LeafReaderContext leafReaderContext) {
|
||||||
|
this.currentLeafReaderContext = leafReaderContext;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public IntList unscoredDocs() {
|
||||||
|
return IntLists.unmodifiable(this.docIds);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ScoreMode scoreMode() {
|
||||||
|
return ScoreMode.COMPLETE_NO_SCORES;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected int topDocsSize() {
|
||||||
|
return Math.min(this.totalHits, this.docIds.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public TopDocs topDocs(int start, int howMany) {
|
||||||
|
int size = this.topDocsSize();
|
||||||
|
if (howMany < 0) {
|
||||||
|
throw new IllegalArgumentException("Number of hits requested must be greater than 0 but value was " + howMany);
|
||||||
|
} else if (start < 0) {
|
||||||
|
throw new IllegalArgumentException("Expected value of starting position is between 0 and " + size + ", got " + start);
|
||||||
|
} else if (start < size && howMany != 0) {
|
||||||
|
howMany = Math.min(size - start, howMany);
|
||||||
|
ScoreDoc[] results = new ScoreDoc[howMany];
|
||||||
|
|
||||||
|
this.populateResults(results, start, howMany);
|
||||||
|
return this.newTopDocs(results, start);
|
||||||
|
} else {
|
||||||
|
return this.newTopDocs((ScoreDoc[])null, start);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected TopDocs newTopDocs(ScoreDoc[] results, int start) {
|
||||||
|
return super.newTopDocs(results, start);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void populateResults(ScoreDoc[] results, int start, int howMany) {
|
||||||
|
int i = 0;
|
||||||
|
for (int docId : docIds.subList(start, start + howMany)) {
|
||||||
|
results[i] = new ScoreDoc(docId, 1.0f);
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
if (!isLastElementOrdered || start + howMany < docIds.size()) {
|
||||||
|
int lastIndex = results.length - 1;
|
||||||
|
var previousLastDoc = results[lastIndex];
|
||||||
|
var biggestDoc = results[biggestDocIdIndex];
|
||||||
|
results[lastIndex] = biggestDoc;
|
||||||
|
results[biggestDocIdIndex] = previousLastDoc;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void populateResults(ScoreDoc[] results, int howMany) {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
}
|
@ -7,4 +7,8 @@ public record PaginationInfo(long totalLimit, long firstPageOffset, long firstPa
|
|||||||
|
|
||||||
public static final int MAX_SINGLE_SEARCH_LIMIT = 256;
|
public static final int MAX_SINGLE_SEARCH_LIMIT = 256;
|
||||||
public static final int FIRST_PAGE_LIMIT = 10;
|
public static final int FIRST_PAGE_LIMIT = 10;
|
||||||
|
/**
|
||||||
|
* Use true to allow a custom unscored collector when possible
|
||||||
|
*/
|
||||||
|
public static final boolean ALLOW_UNSCORED_PAGINATION_MODE = true;
|
||||||
}
|
}
|
||||||
|
@ -39,7 +39,8 @@ public class SimpleLuceneLocalSearcher implements LuceneLocalSearcher {
|
|||||||
queryParams.sort(),
|
queryParams.sort(),
|
||||||
LuceneUtils.safeLongToInt(paginationInfo.firstPageOffset() + paginationInfo.firstPageLimit()),
|
LuceneUtils.safeLongToInt(paginationInfo.firstPageOffset() + paginationInfo.firstPageLimit()),
|
||||||
null,
|
null,
|
||||||
LuceneUtils.totalHitsThreshold());
|
LuceneUtils.totalHitsThreshold(),
|
||||||
|
queryParams.isScored());
|
||||||
//noinspection BlockingMethodInNonBlockingContext
|
//noinspection BlockingMethodInNonBlockingContext
|
||||||
indexSearcher.search(queryParams.query(), firstPageCollector);
|
indexSearcher.search(queryParams.query(), firstPageCollector);
|
||||||
firstPageTopDocs = firstPageCollector.topDocs(LuceneUtils.safeLongToInt(paginationInfo.firstPageOffset()),
|
firstPageTopDocs = firstPageCollector.topDocs(LuceneUtils.safeLongToInt(paginationInfo.firstPageOffset()),
|
||||||
@ -71,7 +72,8 @@ public class SimpleLuceneLocalSearcher implements LuceneLocalSearcher {
|
|||||||
TopDocsCollector<ScoreDoc> collector = TopDocsSearcher.getTopDocsCollector(queryParams.sort(),
|
TopDocsCollector<ScoreDoc> collector = TopDocsSearcher.getTopDocsCollector(queryParams.sort(),
|
||||||
s.currentPageLimit(),
|
s.currentPageLimit(),
|
||||||
s.last(),
|
s.last(),
|
||||||
LuceneUtils.totalHitsThreshold()
|
LuceneUtils.totalHitsThreshold(),
|
||||||
|
queryParams.isScored()
|
||||||
);
|
);
|
||||||
//noinspection BlockingMethodInNonBlockingContext
|
//noinspection BlockingMethodInNonBlockingContext
|
||||||
indexSearcher.search(queryParams.query(), collector);
|
indexSearcher.search(queryParams.query(), collector);
|
||||||
|
@ -1,5 +1,8 @@
|
|||||||
package it.cavallium.dbengine.lucene.searcher;
|
package it.cavallium.dbengine.lucene.searcher;
|
||||||
|
|
||||||
|
import static it.cavallium.dbengine.lucene.searcher.PaginationInfo.ALLOW_UNSCORED_PAGINATION_MODE;
|
||||||
|
|
||||||
|
import it.cavallium.dbengine.lucene.UnscoredCollector;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
import org.apache.lucene.index.NumericDocValues;
|
import org.apache.lucene.index.NumericDocValues;
|
||||||
@ -28,15 +31,27 @@ class TopDocsSearcher {
|
|||||||
public static TopDocsCollector<ScoreDoc> getTopDocsCollector(Sort luceneSort,
|
public static TopDocsCollector<ScoreDoc> getTopDocsCollector(Sort luceneSort,
|
||||||
int limit,
|
int limit,
|
||||||
ScoreDoc after,
|
ScoreDoc after,
|
||||||
int totalHitsThreshold) {
|
int totalHitsThreshold,
|
||||||
|
boolean computeScores) {
|
||||||
TopDocsCollector<ScoreDoc> collector;
|
TopDocsCollector<ScoreDoc> collector;
|
||||||
if (luceneSort == null) {
|
if (luceneSort == null) {
|
||||||
if (after == null) {
|
if (after == null) {
|
||||||
|
if (computeScores || !ALLOW_UNSCORED_PAGINATION_MODE) {
|
||||||
collector = TopScoreDocCollector.create(limit, totalHitsThreshold);
|
collector = TopScoreDocCollector.create(limit, totalHitsThreshold);
|
||||||
} else {
|
} else {
|
||||||
collector = TopScoreDocCollector.create(limit, after, totalHitsThreshold);
|
collector = new UnscoredCollector(null, limit);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
if (computeScores || !ALLOW_UNSCORED_PAGINATION_MODE) {
|
||||||
|
collector = TopScoreDocCollector.create(limit, after, totalHitsThreshold);
|
||||||
|
} else {
|
||||||
|
collector = new UnscoredCollector(after.doc, limit);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (!computeScores) {
|
||||||
|
throw new IllegalArgumentException("ComputeScores must be true if sort is set");
|
||||||
|
}
|
||||||
if (after == null) {
|
if (after == null) {
|
||||||
collector = (TopDocsCollector<ScoreDoc>) (TopDocsCollector) TopFieldCollector.create(luceneSort, limit, totalHitsThreshold);
|
collector = (TopDocsCollector<ScoreDoc>) (TopDocsCollector) TopFieldCollector.create(luceneSort, limit, totalHitsThreshold);
|
||||||
} else if (after instanceof FieldDoc afterFieldDoc) {
|
} else if (after instanceof FieldDoc afterFieldDoc) {
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
package it.cavallium.dbengine.lucene.searcher;
|
package it.cavallium.dbengine.lucene.searcher;
|
||||||
|
|
||||||
import static it.cavallium.dbengine.lucene.searcher.CurrentPageInfo.TIE_BREAKER;
|
import static it.cavallium.dbengine.lucene.searcher.CurrentPageInfo.TIE_BREAKER;
|
||||||
|
import static it.cavallium.dbengine.lucene.searcher.PaginationInfo.ALLOW_UNSCORED_PAGINATION_MODE;
|
||||||
|
|
||||||
import it.cavallium.dbengine.lucene.LuceneUtils;
|
import it.cavallium.dbengine.lucene.LuceneUtils;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
@ -24,7 +24,8 @@ public class UnscoredLuceneMultiSearcher implements LuceneMultiSearcher {
|
|||||||
UnscoredCollectorManager unsortedCollectorManager = new UnscoredCollectorManager(() -> TopDocsSearcher.getTopDocsCollector(queryParams.sort(),
|
UnscoredCollectorManager unsortedCollectorManager = new UnscoredCollectorManager(() -> TopDocsSearcher.getTopDocsCollector(queryParams.sort(),
|
||||||
LuceneUtils.safeLongToInt(paginationInfo.firstPageOffset() + paginationInfo.firstPageLimit()),
|
LuceneUtils.safeLongToInt(paginationInfo.firstPageOffset() + paginationInfo.firstPageLimit()),
|
||||||
null,
|
null,
|
||||||
LuceneUtils.totalHitsThreshold()
|
LuceneUtils.totalHitsThreshold(),
|
||||||
|
queryParams.isScored()
|
||||||
), queryParams.offset(), queryParams.limit(), queryParams.sort());
|
), queryParams.offset(), queryParams.limit(), queryParams.sort());
|
||||||
return new UnscoredLuceneShardSearcher(unsortedCollectorManager, queryParams.query(), paginationInfo);
|
return new UnscoredLuceneShardSearcher(unsortedCollectorManager, queryParams.query(), paginationInfo);
|
||||||
});
|
});
|
||||||
|
@ -92,7 +92,8 @@ class UnscoredLuceneShardSearcher implements LuceneShardSearcher {
|
|||||||
Query luceneQuery = queryParams.query();
|
Query luceneQuery = queryParams.query();
|
||||||
UnscoredCollectorManager currentPageUnsortedCollectorManager = new UnscoredCollectorManager(
|
UnscoredCollectorManager currentPageUnsortedCollectorManager = new UnscoredCollectorManager(
|
||||||
() -> TopDocsSearcher.getTopDocsCollector(queryParams.sort(), s.currentPageLimit(),
|
() -> TopDocsSearcher.getTopDocsCollector(queryParams.sort(), s.currentPageLimit(),
|
||||||
s.last(), LuceneUtils.totalHitsThreshold()), 0, s.currentPageLimit(), queryParams.sort());
|
s.last(), LuceneUtils.totalHitsThreshold(), queryParams.isScored()),
|
||||||
|
0, s.currentPageLimit(), queryParams.sort());
|
||||||
//noinspection BlockingMethodInNonBlockingContext
|
//noinspection BlockingMethodInNonBlockingContext
|
||||||
TopDocs pageTopDocs = Flux
|
TopDocs pageTopDocs = Flux
|
||||||
.fromIterable(indexSearchersArray)
|
.fromIterable(indexSearchersArray)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user