Better page limits

This commit is contained in:
Andrea Cavalli 2021-09-20 18:20:59 +02:00
parent b8adbf452e
commit bd97704a0b
10 changed files with 99 additions and 31 deletions

View File

@ -68,7 +68,7 @@ public interface LLIndexSearchers extends Resource<LLIndexSearchers> {
}
public LLIndexSearcher shard() {
return this.shard(0);
return this.shard(-1);
}
@Override

View File

@ -20,7 +20,9 @@ import it.cavallium.dbengine.lucene.analyzer.TextFieldsAnalyzer;
import it.cavallium.dbengine.lucene.analyzer.TextFieldsSimilarity;
import it.cavallium.dbengine.lucene.analyzer.WordAnalyzer;
import it.cavallium.dbengine.lucene.mlt.MultiMoreLikeThis;
import it.cavallium.dbengine.lucene.searcher.ExponentialPageLimits;
import it.cavallium.dbengine.lucene.searcher.LocalQueryParams;
import it.cavallium.dbengine.lucene.searcher.PageLimits;
import it.cavallium.dbengine.lucene.similarity.NGramSimilarity;
import java.io.EOFException;
import java.io.IOException;
@ -113,6 +115,8 @@ public class LuceneUtils {
private static final Similarity luceneLDPNoLengthSimilarityInstance = new LdpSimilarity(0, 0.5f);
private static final Similarity luceneBooleanSimilarityInstance = new BooleanSimilarity();
private static final Similarity luceneRobertsonSimilarityInstance = new RobertsonSimilarity();
// TODO: remove this default page limits and make the limits configurable into QueryParams
private static final PageLimits DEFAULT_PAGE_LIMITS = new ExponentialPageLimits();
@SuppressWarnings("DuplicatedCode")
public static Analyzer getAnalyzer(TextFieldsAnalyzer analyzer) {
@ -364,6 +368,7 @@ public class LuceneUtils {
return new LocalQueryParams(QueryParser.toQuery(queryParams.query()),
safeLongToInt(queryParams.offset()),
safeLongToInt(queryParams.limit()),
DEFAULT_PAGE_LIMITS,
queryParams.minCompetitiveScore().getNullable(),
QueryParser.toSort(queryParams.sort()),
QueryParser.toScoreMode(queryParams.scoreMode())
@ -541,6 +546,7 @@ public class LuceneUtils {
return new LocalQueryParams(new MatchNoDocsQuery(),
localQueryParams.offset(),
localQueryParams.limit(),
DEFAULT_PAGE_LIMITS,
localQueryParams.minCompetitiveScore(),
localQueryParams.sort(),
localQueryParams.scoreMode()
@ -585,6 +591,7 @@ public class LuceneUtils {
return new LocalQueryParams(luceneQuery,
localQueryParams.offset(),
localQueryParams.limit(),
DEFAULT_PAGE_LIMITS,
localQueryParams.minCompetitiveScore(),
localQueryParams.sort(),
localQueryParams.scoreMode()

View File

@ -8,17 +8,6 @@ import org.jetbrains.annotations.Nullable;
record CurrentPageInfo(@Nullable ScoreDoc last, long remainingLimit, int pageIndex) {
private static final int MAX_ITEMS_PER_PAGE = 500;
public static final Comparator<ScoreDoc> TIE_BREAKER = Comparator.comparingInt((d) -> d.shardIndex);
public static final CurrentPageInfo EMPTY_STATUS = new CurrentPageInfo(null, 0, 0);
int currentPageLimit() {
if (pageIndex >= 10) { // safety
return MAX_ITEMS_PER_PAGE;
}
var min = Math.min(MAX_ITEMS_PER_PAGE, LuceneUtils.safeLongToInt(pageIndex * (0b1L << pageIndex)));
assert min > 0;
return min;
}
}

View File

@ -0,0 +1,34 @@
package it.cavallium.dbengine.lucene.searcher;
import it.cavallium.dbengine.lucene.LuceneUtils;
public class ExponentialPageLimits implements PageLimits {
private final int firstPageLimit;
private final int maxItemsPerPage;
public ExponentialPageLimits() {
this(DEFAULT_MIN_ITEMS_PER_PAGE);
}
public ExponentialPageLimits(int firstPageLimit) {
this(firstPageLimit, DEFAULT_MAX_ITEMS_PER_PAGE);
}
public ExponentialPageLimits(int firstPageLimit, int maxItemsPerPage) {
this.firstPageLimit = firstPageLimit;
this.maxItemsPerPage = maxItemsPerPage;
}
@Override
public int getPageLimit(int pageIndex) {
if (pageIndex >= 10) { // safety
return maxItemsPerPage;
}
var limitedPageIndex = Math.max(1, pageIndex);
var min = Math.max(firstPageLimit, Math.min(maxItemsPerPage,
LuceneUtils.safeLongToInt(limitedPageIndex * (0b1L << limitedPageIndex))));
assert min > 0;
return min;
}
}

View File

@ -6,7 +6,7 @@ import org.apache.lucene.search.Sort;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
public record LocalQueryParams(@NotNull Query query, int offset, int limit,
public record LocalQueryParams(@NotNull Query query, int offset, int limit, @NotNull PageLimits pageLimits,
@Nullable Float minCompetitiveScore, @Nullable Sort sort,
@NotNull ScoreMode scoreMode) {

View File

@ -0,0 +1,11 @@
package it.cavallium.dbengine.lucene.searcher;
import it.cavallium.dbengine.lucene.LuceneUtils;
public interface PageLimits {
int DEFAULT_MIN_ITEMS_PER_PAGE = 10;
int DEFAULT_MAX_ITEMS_PER_PAGE = 500;
int getPageLimit(int pageIndex);
}

View File

@ -3,7 +3,7 @@ package it.cavallium.dbengine.lucene.searcher;
import java.util.Comparator;
import org.apache.lucene.search.ScoreDoc;
public record PaginationInfo(long totalLimit, long firstPageOffset, long firstPageLimit, boolean forceSinglePage) {
public record PaginationInfo(long totalLimit, long firstPageOffset, PageLimits pageLimits, boolean forceSinglePage) {
public static final int MAX_SINGLE_SEARCH_LIMIT = 256;
public static final int FIRST_PAGE_LIMIT = 10;

View File

@ -56,9 +56,9 @@ public class ScoredSimpleLuceneShardSearcher implements LuceneMultiSearcher {
*/
private PaginationInfo getPaginationInfo(LocalQueryParams queryParams) {
if (queryParams.limit() <= MAX_SINGLE_SEARCH_LIMIT) {
return new PaginationInfo(queryParams.limit(), queryParams.offset(), queryParams.limit(), true);
return new PaginationInfo(queryParams.limit(), queryParams.offset(), queryParams.pageLimits(), true);
} else {
return new PaginationInfo(queryParams.limit(), queryParams.offset(), FIRST_PAGE_LIMIT, false);
return new PaginationInfo(queryParams.limit(), queryParams.offset(), queryParams.pageLimits(), false);
}
}
@ -68,12 +68,13 @@ public class ScoredSimpleLuceneShardSearcher implements LuceneMultiSearcher {
private Mono<PageData> searchFirstPage(LLIndexSearchers indexSearchers,
LocalQueryParams queryParams,
PaginationInfo paginationInfo) {
var limit = LuceneUtils.safeLongToInt(paginationInfo.firstPageOffset() + paginationInfo.firstPageLimit());
var limit = paginationInfo.totalLimit();
var pageLimits = paginationInfo.pageLimits();
var pagination = !paginationInfo.forceSinglePage();
var resultsOffset = LuceneUtils.safeLongToInt(paginationInfo.firstPageOffset());
return Mono
.fromSupplier(() -> new CurrentPageInfo(null, limit, 0))
.flatMap(s -> this.searchPage(queryParams, indexSearchers, pagination, resultsOffset, s));
.flatMap(s -> this.searchPage(queryParams, indexSearchers, pagination, pageLimits, resultsOffset, s));
}
/**
@ -120,8 +121,9 @@ public class ScoredSimpleLuceneShardSearcher implements LuceneMultiSearcher {
return Flux
.defer(() -> {
AtomicReference<CurrentPageInfo> currentPageInfoRef = new AtomicReference<>(secondPageInfo);
return Flux
.defer(() -> searchPage(queryParams, indexSearchers, true, 0, currentPageInfoRef.get()))
return this
.searchPage(queryParams, indexSearchers, true, queryParams.pageLimits(),
0, currentPageInfoRef.get())
.doOnNext(s -> currentPageInfoRef.set(s.nextPageInfo()))
.repeatWhen(s -> s.takeWhile(n -> n > 0));
})
@ -140,6 +142,7 @@ public class ScoredSimpleLuceneShardSearcher implements LuceneMultiSearcher {
private Mono<PageData> searchPage(LocalQueryParams queryParams,
LLIndexSearchers indexSearchers,
boolean allowPagination,
PageLimits pageLimits,
int resultsOffset,
CurrentPageInfo s) {
return Mono
@ -150,10 +153,10 @@ public class ScoredSimpleLuceneShardSearcher implements LuceneMultiSearcher {
}
if ((s.pageIndex() == 0 || s.last() != null) && s.remainingLimit() > 0) {
var sort = getSort(queryParams);
var limit = s.currentPageLimit();
var pageLimit = pageLimits.getPageLimit(s.pageIndex());
var totalHitsThreshold = LuceneUtils.totalHitsThreshold();
return new ScoringShardsCollectorManager(sort, limit, null,
totalHitsThreshold, resultsOffset, s.currentPageLimit());
return new ScoringShardsCollectorManager(sort, pageLimit, null,
totalHitsThreshold, resultsOffset, pageLimit);
} else {
return null;
}
@ -171,7 +174,7 @@ public class ScoredSimpleLuceneShardSearcher implements LuceneMultiSearcher {
var pageLastDoc = LuceneUtils.getLastScoreDoc(pageTopDocs.scoreDocs);
long nextRemainingLimit;
if (allowPagination) {
nextRemainingLimit = s.remainingLimit() - s.currentPageLimit();
nextRemainingLimit = s.remainingLimit() - pageLimits.getPageLimit(s.pageIndex());
} else {
nextRemainingLimit = 0L;
}

View File

@ -53,9 +53,9 @@ public class SimpleLuceneLocalSearcher implements LuceneLocalSearcher {
*/
private PaginationInfo getPaginationInfo(LocalQueryParams queryParams) {
if (queryParams.limit() <= MAX_SINGLE_SEARCH_LIMIT) {
return new PaginationInfo(queryParams.limit(), queryParams.offset(), queryParams.limit(), true);
return new PaginationInfo(queryParams.limit(), queryParams.offset(), queryParams.pageLimits(), true);
} else {
return new PaginationInfo(queryParams.limit(), queryParams.offset(), FIRST_PAGE_LIMIT, false);
return new PaginationInfo(queryParams.limit(), queryParams.offset(), queryParams.pageLimits(), false);
}
}
@ -65,7 +65,7 @@ public class SimpleLuceneLocalSearcher implements LuceneLocalSearcher {
private Mono<PageData> searchFirstPage(LLIndexSearchers indexSearchers,
LocalQueryParams queryParams,
PaginationInfo paginationInfo) {
var limit = LuceneUtils.safeLongToInt(paginationInfo.firstPageOffset() + paginationInfo.firstPageLimit());
var limit = paginationInfo.totalLimit();
var pagination = !paginationInfo.forceSinglePage();
var resultsOffset = LuceneUtils.safeLongToInt(paginationInfo.firstPageOffset());
return Mono
@ -148,15 +148,16 @@ public class SimpleLuceneLocalSearcher implements LuceneLocalSearcher {
} else {
throw new IllegalArgumentException();
}
var currentPageLimit = queryParams.pageLimits().getPageLimit(s.pageIndex());
if ((s.pageIndex() == 0 || s.last() != null) && s.remainingLimit() > 0) {
TopDocs pageTopDocs;
try {
TopDocsCollector<ScoreDoc> collector = TopDocsSearcher.getTopDocsCollector(queryParams.sort(),
s.currentPageLimit(), s.last(), LuceneUtils.totalHitsThreshold(), allowPagination,
queryParams.isScored());
currentPageLimit, s.last(), LuceneUtils.totalHitsThreshold(),
allowPagination, queryParams.isScored());
unshardedIndexSearchers.shard().getIndexSearcher().search(queryParams.query(), collector);
if (resultsOffset > 0) {
pageTopDocs = collector.topDocs(resultsOffset, s.currentPageLimit());
pageTopDocs = collector.topDocs(resultsOffset, currentPageLimit);
} else {
pageTopDocs = collector.topDocs();
}
@ -167,7 +168,7 @@ public class SimpleLuceneLocalSearcher implements LuceneLocalSearcher {
var pageLastDoc = LuceneUtils.getLastScoreDoc(pageTopDocs.scoreDocs);
long nextRemainingLimit;
if (allowPagination) {
nextRemainingLimit = s.remainingLimit() - s.currentPageLimit();
nextRemainingLimit = s.remainingLimit() - currentPageLimit;
} else {
nextRemainingLimit = 0L;
}

View File

@ -0,0 +1,23 @@
package it.cavallium.dbengine.lucene.searcher;
public class SinglePageLimits implements PageLimits {
private final int firstPageLimit;
public SinglePageLimits() {
this(DEFAULT_MIN_ITEMS_PER_PAGE);
}
public SinglePageLimits(int firstPageLimit) {
this.firstPageLimit = firstPageLimit;
}
@Override
public int getPageLimit(int pageIndex) {
if (pageIndex == 0) {
return firstPageLimit;
} else {
return 0;
}
}
}