Better page limits
This commit is contained in:
parent
b8adbf452e
commit
bd97704a0b
@ -68,7 +68,7 @@ public interface LLIndexSearchers extends Resource<LLIndexSearchers> {
|
||||
}
|
||||
|
||||
public LLIndexSearcher shard() {
|
||||
return this.shard(0);
|
||||
return this.shard(-1);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -20,7 +20,9 @@ import it.cavallium.dbengine.lucene.analyzer.TextFieldsAnalyzer;
|
||||
import it.cavallium.dbengine.lucene.analyzer.TextFieldsSimilarity;
|
||||
import it.cavallium.dbengine.lucene.analyzer.WordAnalyzer;
|
||||
import it.cavallium.dbengine.lucene.mlt.MultiMoreLikeThis;
|
||||
import it.cavallium.dbengine.lucene.searcher.ExponentialPageLimits;
|
||||
import it.cavallium.dbengine.lucene.searcher.LocalQueryParams;
|
||||
import it.cavallium.dbengine.lucene.searcher.PageLimits;
|
||||
import it.cavallium.dbengine.lucene.similarity.NGramSimilarity;
|
||||
import java.io.EOFException;
|
||||
import java.io.IOException;
|
||||
@ -113,6 +115,8 @@ public class LuceneUtils {
|
||||
private static final Similarity luceneLDPNoLengthSimilarityInstance = new LdpSimilarity(0, 0.5f);
|
||||
private static final Similarity luceneBooleanSimilarityInstance = new BooleanSimilarity();
|
||||
private static final Similarity luceneRobertsonSimilarityInstance = new RobertsonSimilarity();
|
||||
// TODO: remove this default page limits and make the limits configurable into QueryParams
|
||||
private static final PageLimits DEFAULT_PAGE_LIMITS = new ExponentialPageLimits();
|
||||
|
||||
@SuppressWarnings("DuplicatedCode")
|
||||
public static Analyzer getAnalyzer(TextFieldsAnalyzer analyzer) {
|
||||
@ -364,6 +368,7 @@ public class LuceneUtils {
|
||||
return new LocalQueryParams(QueryParser.toQuery(queryParams.query()),
|
||||
safeLongToInt(queryParams.offset()),
|
||||
safeLongToInt(queryParams.limit()),
|
||||
DEFAULT_PAGE_LIMITS,
|
||||
queryParams.minCompetitiveScore().getNullable(),
|
||||
QueryParser.toSort(queryParams.sort()),
|
||||
QueryParser.toScoreMode(queryParams.scoreMode())
|
||||
@ -541,6 +546,7 @@ public class LuceneUtils {
|
||||
return new LocalQueryParams(new MatchNoDocsQuery(),
|
||||
localQueryParams.offset(),
|
||||
localQueryParams.limit(),
|
||||
DEFAULT_PAGE_LIMITS,
|
||||
localQueryParams.minCompetitiveScore(),
|
||||
localQueryParams.sort(),
|
||||
localQueryParams.scoreMode()
|
||||
@ -585,6 +591,7 @@ public class LuceneUtils {
|
||||
return new LocalQueryParams(luceneQuery,
|
||||
localQueryParams.offset(),
|
||||
localQueryParams.limit(),
|
||||
DEFAULT_PAGE_LIMITS,
|
||||
localQueryParams.minCompetitiveScore(),
|
||||
localQueryParams.sort(),
|
||||
localQueryParams.scoreMode()
|
||||
|
@ -8,17 +8,6 @@ import org.jetbrains.annotations.Nullable;
|
||||
|
||||
record CurrentPageInfo(@Nullable ScoreDoc last, long remainingLimit, int pageIndex) {
|
||||
|
||||
private static final int MAX_ITEMS_PER_PAGE = 500;
|
||||
|
||||
public static final Comparator<ScoreDoc> TIE_BREAKER = Comparator.comparingInt((d) -> d.shardIndex);
|
||||
public static final CurrentPageInfo EMPTY_STATUS = new CurrentPageInfo(null, 0, 0);
|
||||
|
||||
int currentPageLimit() {
|
||||
if (pageIndex >= 10) { // safety
|
||||
return MAX_ITEMS_PER_PAGE;
|
||||
}
|
||||
var min = Math.min(MAX_ITEMS_PER_PAGE, LuceneUtils.safeLongToInt(pageIndex * (0b1L << pageIndex)));
|
||||
assert min > 0;
|
||||
return min;
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,34 @@
|
||||
package it.cavallium.dbengine.lucene.searcher;
|
||||
|
||||
import it.cavallium.dbengine.lucene.LuceneUtils;
|
||||
|
||||
public class ExponentialPageLimits implements PageLimits {
|
||||
|
||||
private final int firstPageLimit;
|
||||
private final int maxItemsPerPage;
|
||||
|
||||
public ExponentialPageLimits() {
|
||||
this(DEFAULT_MIN_ITEMS_PER_PAGE);
|
||||
}
|
||||
|
||||
public ExponentialPageLimits(int firstPageLimit) {
|
||||
this(firstPageLimit, DEFAULT_MAX_ITEMS_PER_PAGE);
|
||||
}
|
||||
|
||||
public ExponentialPageLimits(int firstPageLimit, int maxItemsPerPage) {
|
||||
this.firstPageLimit = firstPageLimit;
|
||||
this.maxItemsPerPage = maxItemsPerPage;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getPageLimit(int pageIndex) {
|
||||
if (pageIndex >= 10) { // safety
|
||||
return maxItemsPerPage;
|
||||
}
|
||||
var limitedPageIndex = Math.max(1, pageIndex);
|
||||
var min = Math.max(firstPageLimit, Math.min(maxItemsPerPage,
|
||||
LuceneUtils.safeLongToInt(limitedPageIndex * (0b1L << limitedPageIndex))));
|
||||
assert min > 0;
|
||||
return min;
|
||||
}
|
||||
}
|
@ -6,7 +6,7 @@ import org.apache.lucene.search.Sort;
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
import org.jetbrains.annotations.Nullable;
|
||||
|
||||
public record LocalQueryParams(@NotNull Query query, int offset, int limit,
|
||||
public record LocalQueryParams(@NotNull Query query, int offset, int limit, @NotNull PageLimits pageLimits,
|
||||
@Nullable Float minCompetitiveScore, @Nullable Sort sort,
|
||||
@NotNull ScoreMode scoreMode) {
|
||||
|
||||
|
@ -0,0 +1,11 @@
|
||||
package it.cavallium.dbengine.lucene.searcher;
|
||||
|
||||
import it.cavallium.dbengine.lucene.LuceneUtils;
|
||||
|
||||
public interface PageLimits {
|
||||
|
||||
int DEFAULT_MIN_ITEMS_PER_PAGE = 10;
|
||||
int DEFAULT_MAX_ITEMS_PER_PAGE = 500;
|
||||
|
||||
int getPageLimit(int pageIndex);
|
||||
}
|
@ -3,7 +3,7 @@ package it.cavallium.dbengine.lucene.searcher;
|
||||
import java.util.Comparator;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
|
||||
public record PaginationInfo(long totalLimit, long firstPageOffset, long firstPageLimit, boolean forceSinglePage) {
|
||||
public record PaginationInfo(long totalLimit, long firstPageOffset, PageLimits pageLimits, boolean forceSinglePage) {
|
||||
|
||||
public static final int MAX_SINGLE_SEARCH_LIMIT = 256;
|
||||
public static final int FIRST_PAGE_LIMIT = 10;
|
||||
|
@ -56,9 +56,9 @@ public class ScoredSimpleLuceneShardSearcher implements LuceneMultiSearcher {
|
||||
*/
|
||||
private PaginationInfo getPaginationInfo(LocalQueryParams queryParams) {
|
||||
if (queryParams.limit() <= MAX_SINGLE_SEARCH_LIMIT) {
|
||||
return new PaginationInfo(queryParams.limit(), queryParams.offset(), queryParams.limit(), true);
|
||||
return new PaginationInfo(queryParams.limit(), queryParams.offset(), queryParams.pageLimits(), true);
|
||||
} else {
|
||||
return new PaginationInfo(queryParams.limit(), queryParams.offset(), FIRST_PAGE_LIMIT, false);
|
||||
return new PaginationInfo(queryParams.limit(), queryParams.offset(), queryParams.pageLimits(), false);
|
||||
}
|
||||
}
|
||||
|
||||
@ -68,12 +68,13 @@ public class ScoredSimpleLuceneShardSearcher implements LuceneMultiSearcher {
|
||||
private Mono<PageData> searchFirstPage(LLIndexSearchers indexSearchers,
|
||||
LocalQueryParams queryParams,
|
||||
PaginationInfo paginationInfo) {
|
||||
var limit = LuceneUtils.safeLongToInt(paginationInfo.firstPageOffset() + paginationInfo.firstPageLimit());
|
||||
var limit = paginationInfo.totalLimit();
|
||||
var pageLimits = paginationInfo.pageLimits();
|
||||
var pagination = !paginationInfo.forceSinglePage();
|
||||
var resultsOffset = LuceneUtils.safeLongToInt(paginationInfo.firstPageOffset());
|
||||
return Mono
|
||||
.fromSupplier(() -> new CurrentPageInfo(null, limit, 0))
|
||||
.flatMap(s -> this.searchPage(queryParams, indexSearchers, pagination, resultsOffset, s));
|
||||
.flatMap(s -> this.searchPage(queryParams, indexSearchers, pagination, pageLimits, resultsOffset, s));
|
||||
}
|
||||
|
||||
/**
|
||||
@ -120,8 +121,9 @@ public class ScoredSimpleLuceneShardSearcher implements LuceneMultiSearcher {
|
||||
return Flux
|
||||
.defer(() -> {
|
||||
AtomicReference<CurrentPageInfo> currentPageInfoRef = new AtomicReference<>(secondPageInfo);
|
||||
return Flux
|
||||
.defer(() -> searchPage(queryParams, indexSearchers, true, 0, currentPageInfoRef.get()))
|
||||
return this
|
||||
.searchPage(queryParams, indexSearchers, true, queryParams.pageLimits(),
|
||||
0, currentPageInfoRef.get())
|
||||
.doOnNext(s -> currentPageInfoRef.set(s.nextPageInfo()))
|
||||
.repeatWhen(s -> s.takeWhile(n -> n > 0));
|
||||
})
|
||||
@ -140,6 +142,7 @@ public class ScoredSimpleLuceneShardSearcher implements LuceneMultiSearcher {
|
||||
private Mono<PageData> searchPage(LocalQueryParams queryParams,
|
||||
LLIndexSearchers indexSearchers,
|
||||
boolean allowPagination,
|
||||
PageLimits pageLimits,
|
||||
int resultsOffset,
|
||||
CurrentPageInfo s) {
|
||||
return Mono
|
||||
@ -150,10 +153,10 @@ public class ScoredSimpleLuceneShardSearcher implements LuceneMultiSearcher {
|
||||
}
|
||||
if ((s.pageIndex() == 0 || s.last() != null) && s.remainingLimit() > 0) {
|
||||
var sort = getSort(queryParams);
|
||||
var limit = s.currentPageLimit();
|
||||
var pageLimit = pageLimits.getPageLimit(s.pageIndex());
|
||||
var totalHitsThreshold = LuceneUtils.totalHitsThreshold();
|
||||
return new ScoringShardsCollectorManager(sort, limit, null,
|
||||
totalHitsThreshold, resultsOffset, s.currentPageLimit());
|
||||
return new ScoringShardsCollectorManager(sort, pageLimit, null,
|
||||
totalHitsThreshold, resultsOffset, pageLimit);
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
@ -171,7 +174,7 @@ public class ScoredSimpleLuceneShardSearcher implements LuceneMultiSearcher {
|
||||
var pageLastDoc = LuceneUtils.getLastScoreDoc(pageTopDocs.scoreDocs);
|
||||
long nextRemainingLimit;
|
||||
if (allowPagination) {
|
||||
nextRemainingLimit = s.remainingLimit() - s.currentPageLimit();
|
||||
nextRemainingLimit = s.remainingLimit() - pageLimits.getPageLimit(s.pageIndex());
|
||||
} else {
|
||||
nextRemainingLimit = 0L;
|
||||
}
|
||||
|
@ -53,9 +53,9 @@ public class SimpleLuceneLocalSearcher implements LuceneLocalSearcher {
|
||||
*/
|
||||
private PaginationInfo getPaginationInfo(LocalQueryParams queryParams) {
|
||||
if (queryParams.limit() <= MAX_SINGLE_SEARCH_LIMIT) {
|
||||
return new PaginationInfo(queryParams.limit(), queryParams.offset(), queryParams.limit(), true);
|
||||
return new PaginationInfo(queryParams.limit(), queryParams.offset(), queryParams.pageLimits(), true);
|
||||
} else {
|
||||
return new PaginationInfo(queryParams.limit(), queryParams.offset(), FIRST_PAGE_LIMIT, false);
|
||||
return new PaginationInfo(queryParams.limit(), queryParams.offset(), queryParams.pageLimits(), false);
|
||||
}
|
||||
}
|
||||
|
||||
@ -65,7 +65,7 @@ public class SimpleLuceneLocalSearcher implements LuceneLocalSearcher {
|
||||
private Mono<PageData> searchFirstPage(LLIndexSearchers indexSearchers,
|
||||
LocalQueryParams queryParams,
|
||||
PaginationInfo paginationInfo) {
|
||||
var limit = LuceneUtils.safeLongToInt(paginationInfo.firstPageOffset() + paginationInfo.firstPageLimit());
|
||||
var limit = paginationInfo.totalLimit();
|
||||
var pagination = !paginationInfo.forceSinglePage();
|
||||
var resultsOffset = LuceneUtils.safeLongToInt(paginationInfo.firstPageOffset());
|
||||
return Mono
|
||||
@ -148,15 +148,16 @@ public class SimpleLuceneLocalSearcher implements LuceneLocalSearcher {
|
||||
} else {
|
||||
throw new IllegalArgumentException();
|
||||
}
|
||||
var currentPageLimit = queryParams.pageLimits().getPageLimit(s.pageIndex());
|
||||
if ((s.pageIndex() == 0 || s.last() != null) && s.remainingLimit() > 0) {
|
||||
TopDocs pageTopDocs;
|
||||
try {
|
||||
TopDocsCollector<ScoreDoc> collector = TopDocsSearcher.getTopDocsCollector(queryParams.sort(),
|
||||
s.currentPageLimit(), s.last(), LuceneUtils.totalHitsThreshold(), allowPagination,
|
||||
queryParams.isScored());
|
||||
currentPageLimit, s.last(), LuceneUtils.totalHitsThreshold(),
|
||||
allowPagination, queryParams.isScored());
|
||||
unshardedIndexSearchers.shard().getIndexSearcher().search(queryParams.query(), collector);
|
||||
if (resultsOffset > 0) {
|
||||
pageTopDocs = collector.topDocs(resultsOffset, s.currentPageLimit());
|
||||
pageTopDocs = collector.topDocs(resultsOffset, currentPageLimit);
|
||||
} else {
|
||||
pageTopDocs = collector.topDocs();
|
||||
}
|
||||
@ -167,7 +168,7 @@ public class SimpleLuceneLocalSearcher implements LuceneLocalSearcher {
|
||||
var pageLastDoc = LuceneUtils.getLastScoreDoc(pageTopDocs.scoreDocs);
|
||||
long nextRemainingLimit;
|
||||
if (allowPagination) {
|
||||
nextRemainingLimit = s.remainingLimit() - s.currentPageLimit();
|
||||
nextRemainingLimit = s.remainingLimit() - currentPageLimit;
|
||||
} else {
|
||||
nextRemainingLimit = 0L;
|
||||
}
|
||||
|
@ -0,0 +1,23 @@
|
||||
package it.cavallium.dbengine.lucene.searcher;
|
||||
|
||||
public class SinglePageLimits implements PageLimits {
|
||||
|
||||
private final int firstPageLimit;
|
||||
|
||||
public SinglePageLimits() {
|
||||
this(DEFAULT_MIN_ITEMS_PER_PAGE);
|
||||
}
|
||||
|
||||
public SinglePageLimits(int firstPageLimit) {
|
||||
this.firstPageLimit = firstPageLimit;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getPageLimit(int pageIndex) {
|
||||
if (pageIndex == 0) {
|
||||
return firstPageLimit;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user