Better page limits

This commit is contained in:
Andrea Cavalli 2021-09-20 18:20:59 +02:00
parent b8adbf452e
commit bd97704a0b
10 changed files with 99 additions and 31 deletions

View File

@ -68,7 +68,7 @@ public interface LLIndexSearchers extends Resource<LLIndexSearchers> {
} }
public LLIndexSearcher shard() { public LLIndexSearcher shard() {
return this.shard(0); return this.shard(-1);
} }
@Override @Override

View File

@ -20,7 +20,9 @@ import it.cavallium.dbengine.lucene.analyzer.TextFieldsAnalyzer;
import it.cavallium.dbengine.lucene.analyzer.TextFieldsSimilarity; import it.cavallium.dbengine.lucene.analyzer.TextFieldsSimilarity;
import it.cavallium.dbengine.lucene.analyzer.WordAnalyzer; import it.cavallium.dbengine.lucene.analyzer.WordAnalyzer;
import it.cavallium.dbengine.lucene.mlt.MultiMoreLikeThis; import it.cavallium.dbengine.lucene.mlt.MultiMoreLikeThis;
import it.cavallium.dbengine.lucene.searcher.ExponentialPageLimits;
import it.cavallium.dbengine.lucene.searcher.LocalQueryParams; import it.cavallium.dbengine.lucene.searcher.LocalQueryParams;
import it.cavallium.dbengine.lucene.searcher.PageLimits;
import it.cavallium.dbengine.lucene.similarity.NGramSimilarity; import it.cavallium.dbengine.lucene.similarity.NGramSimilarity;
import java.io.EOFException; import java.io.EOFException;
import java.io.IOException; import java.io.IOException;
@ -113,6 +115,8 @@ public class LuceneUtils {
private static final Similarity luceneLDPNoLengthSimilarityInstance = new LdpSimilarity(0, 0.5f); private static final Similarity luceneLDPNoLengthSimilarityInstance = new LdpSimilarity(0, 0.5f);
private static final Similarity luceneBooleanSimilarityInstance = new BooleanSimilarity(); private static final Similarity luceneBooleanSimilarityInstance = new BooleanSimilarity();
private static final Similarity luceneRobertsonSimilarityInstance = new RobertsonSimilarity(); private static final Similarity luceneRobertsonSimilarityInstance = new RobertsonSimilarity();
// TODO: remove this default page limits and make the limits configurable into QueryParams
private static final PageLimits DEFAULT_PAGE_LIMITS = new ExponentialPageLimits();
@SuppressWarnings("DuplicatedCode") @SuppressWarnings("DuplicatedCode")
public static Analyzer getAnalyzer(TextFieldsAnalyzer analyzer) { public static Analyzer getAnalyzer(TextFieldsAnalyzer analyzer) {
@ -364,6 +368,7 @@ public class LuceneUtils {
return new LocalQueryParams(QueryParser.toQuery(queryParams.query()), return new LocalQueryParams(QueryParser.toQuery(queryParams.query()),
safeLongToInt(queryParams.offset()), safeLongToInt(queryParams.offset()),
safeLongToInt(queryParams.limit()), safeLongToInt(queryParams.limit()),
DEFAULT_PAGE_LIMITS,
queryParams.minCompetitiveScore().getNullable(), queryParams.minCompetitiveScore().getNullable(),
QueryParser.toSort(queryParams.sort()), QueryParser.toSort(queryParams.sort()),
QueryParser.toScoreMode(queryParams.scoreMode()) QueryParser.toScoreMode(queryParams.scoreMode())
@ -541,6 +546,7 @@ public class LuceneUtils {
return new LocalQueryParams(new MatchNoDocsQuery(), return new LocalQueryParams(new MatchNoDocsQuery(),
localQueryParams.offset(), localQueryParams.offset(),
localQueryParams.limit(), localQueryParams.limit(),
DEFAULT_PAGE_LIMITS,
localQueryParams.minCompetitiveScore(), localQueryParams.minCompetitiveScore(),
localQueryParams.sort(), localQueryParams.sort(),
localQueryParams.scoreMode() localQueryParams.scoreMode()
@ -585,6 +591,7 @@ public class LuceneUtils {
return new LocalQueryParams(luceneQuery, return new LocalQueryParams(luceneQuery,
localQueryParams.offset(), localQueryParams.offset(),
localQueryParams.limit(), localQueryParams.limit(),
DEFAULT_PAGE_LIMITS,
localQueryParams.minCompetitiveScore(), localQueryParams.minCompetitiveScore(),
localQueryParams.sort(), localQueryParams.sort(),
localQueryParams.scoreMode() localQueryParams.scoreMode()

View File

@ -8,17 +8,6 @@ import org.jetbrains.annotations.Nullable;
record CurrentPageInfo(@Nullable ScoreDoc last, long remainingLimit, int pageIndex) { record CurrentPageInfo(@Nullable ScoreDoc last, long remainingLimit, int pageIndex) {
private static final int MAX_ITEMS_PER_PAGE = 500;
public static final Comparator<ScoreDoc> TIE_BREAKER = Comparator.comparingInt((d) -> d.shardIndex); public static final Comparator<ScoreDoc> TIE_BREAKER = Comparator.comparingInt((d) -> d.shardIndex);
public static final CurrentPageInfo EMPTY_STATUS = new CurrentPageInfo(null, 0, 0); public static final CurrentPageInfo EMPTY_STATUS = new CurrentPageInfo(null, 0, 0);
int currentPageLimit() {
if (pageIndex >= 10) { // safety
return MAX_ITEMS_PER_PAGE;
}
var min = Math.min(MAX_ITEMS_PER_PAGE, LuceneUtils.safeLongToInt(pageIndex * (0b1L << pageIndex)));
assert min > 0;
return min;
}
} }

View File

@ -0,0 +1,34 @@
package it.cavallium.dbengine.lucene.searcher;
import it.cavallium.dbengine.lucene.LuceneUtils;
public class ExponentialPageLimits implements PageLimits {
private final int firstPageLimit;
private final int maxItemsPerPage;
public ExponentialPageLimits() {
this(DEFAULT_MIN_ITEMS_PER_PAGE);
}
public ExponentialPageLimits(int firstPageLimit) {
this(firstPageLimit, DEFAULT_MAX_ITEMS_PER_PAGE);
}
public ExponentialPageLimits(int firstPageLimit, int maxItemsPerPage) {
this.firstPageLimit = firstPageLimit;
this.maxItemsPerPage = maxItemsPerPage;
}
@Override
public int getPageLimit(int pageIndex) {
if (pageIndex >= 10) { // safety
return maxItemsPerPage;
}
var limitedPageIndex = Math.max(1, pageIndex);
var min = Math.max(firstPageLimit, Math.min(maxItemsPerPage,
LuceneUtils.safeLongToInt(limitedPageIndex * (0b1L << limitedPageIndex))));
assert min > 0;
return min;
}
}

View File

@ -6,7 +6,7 @@ import org.apache.lucene.search.Sort;
import org.jetbrains.annotations.NotNull; import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable; import org.jetbrains.annotations.Nullable;
public record LocalQueryParams(@NotNull Query query, int offset, int limit, public record LocalQueryParams(@NotNull Query query, int offset, int limit, @NotNull PageLimits pageLimits,
@Nullable Float minCompetitiveScore, @Nullable Sort sort, @Nullable Float minCompetitiveScore, @Nullable Sort sort,
@NotNull ScoreMode scoreMode) { @NotNull ScoreMode scoreMode) {

View File

@ -0,0 +1,11 @@
package it.cavallium.dbengine.lucene.searcher;
import it.cavallium.dbengine.lucene.LuceneUtils;
public interface PageLimits {
int DEFAULT_MIN_ITEMS_PER_PAGE = 10;
int DEFAULT_MAX_ITEMS_PER_PAGE = 500;
int getPageLimit(int pageIndex);
}

View File

@ -3,7 +3,7 @@ package it.cavallium.dbengine.lucene.searcher;
import java.util.Comparator; import java.util.Comparator;
import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.ScoreDoc;
public record PaginationInfo(long totalLimit, long firstPageOffset, long firstPageLimit, boolean forceSinglePage) { public record PaginationInfo(long totalLimit, long firstPageOffset, PageLimits pageLimits, boolean forceSinglePage) {
public static final int MAX_SINGLE_SEARCH_LIMIT = 256; public static final int MAX_SINGLE_SEARCH_LIMIT = 256;
public static final int FIRST_PAGE_LIMIT = 10; public static final int FIRST_PAGE_LIMIT = 10;

View File

@ -56,9 +56,9 @@ public class ScoredSimpleLuceneShardSearcher implements LuceneMultiSearcher {
*/ */
private PaginationInfo getPaginationInfo(LocalQueryParams queryParams) { private PaginationInfo getPaginationInfo(LocalQueryParams queryParams) {
if (queryParams.limit() <= MAX_SINGLE_SEARCH_LIMIT) { if (queryParams.limit() <= MAX_SINGLE_SEARCH_LIMIT) {
return new PaginationInfo(queryParams.limit(), queryParams.offset(), queryParams.limit(), true); return new PaginationInfo(queryParams.limit(), queryParams.offset(), queryParams.pageLimits(), true);
} else { } else {
return new PaginationInfo(queryParams.limit(), queryParams.offset(), FIRST_PAGE_LIMIT, false); return new PaginationInfo(queryParams.limit(), queryParams.offset(), queryParams.pageLimits(), false);
} }
} }
@ -68,12 +68,13 @@ public class ScoredSimpleLuceneShardSearcher implements LuceneMultiSearcher {
private Mono<PageData> searchFirstPage(LLIndexSearchers indexSearchers, private Mono<PageData> searchFirstPage(LLIndexSearchers indexSearchers,
LocalQueryParams queryParams, LocalQueryParams queryParams,
PaginationInfo paginationInfo) { PaginationInfo paginationInfo) {
var limit = LuceneUtils.safeLongToInt(paginationInfo.firstPageOffset() + paginationInfo.firstPageLimit()); var limit = paginationInfo.totalLimit();
var pageLimits = paginationInfo.pageLimits();
var pagination = !paginationInfo.forceSinglePage(); var pagination = !paginationInfo.forceSinglePage();
var resultsOffset = LuceneUtils.safeLongToInt(paginationInfo.firstPageOffset()); var resultsOffset = LuceneUtils.safeLongToInt(paginationInfo.firstPageOffset());
return Mono return Mono
.fromSupplier(() -> new CurrentPageInfo(null, limit, 0)) .fromSupplier(() -> new CurrentPageInfo(null, limit, 0))
.flatMap(s -> this.searchPage(queryParams, indexSearchers, pagination, resultsOffset, s)); .flatMap(s -> this.searchPage(queryParams, indexSearchers, pagination, pageLimits, resultsOffset, s));
} }
/** /**
@ -120,8 +121,9 @@ public class ScoredSimpleLuceneShardSearcher implements LuceneMultiSearcher {
return Flux return Flux
.defer(() -> { .defer(() -> {
AtomicReference<CurrentPageInfo> currentPageInfoRef = new AtomicReference<>(secondPageInfo); AtomicReference<CurrentPageInfo> currentPageInfoRef = new AtomicReference<>(secondPageInfo);
return Flux return this
.defer(() -> searchPage(queryParams, indexSearchers, true, 0, currentPageInfoRef.get())) .searchPage(queryParams, indexSearchers, true, queryParams.pageLimits(),
0, currentPageInfoRef.get())
.doOnNext(s -> currentPageInfoRef.set(s.nextPageInfo())) .doOnNext(s -> currentPageInfoRef.set(s.nextPageInfo()))
.repeatWhen(s -> s.takeWhile(n -> n > 0)); .repeatWhen(s -> s.takeWhile(n -> n > 0));
}) })
@ -140,6 +142,7 @@ public class ScoredSimpleLuceneShardSearcher implements LuceneMultiSearcher {
private Mono<PageData> searchPage(LocalQueryParams queryParams, private Mono<PageData> searchPage(LocalQueryParams queryParams,
LLIndexSearchers indexSearchers, LLIndexSearchers indexSearchers,
boolean allowPagination, boolean allowPagination,
PageLimits pageLimits,
int resultsOffset, int resultsOffset,
CurrentPageInfo s) { CurrentPageInfo s) {
return Mono return Mono
@ -150,10 +153,10 @@ public class ScoredSimpleLuceneShardSearcher implements LuceneMultiSearcher {
} }
if ((s.pageIndex() == 0 || s.last() != null) && s.remainingLimit() > 0) { if ((s.pageIndex() == 0 || s.last() != null) && s.remainingLimit() > 0) {
var sort = getSort(queryParams); var sort = getSort(queryParams);
var limit = s.currentPageLimit(); var pageLimit = pageLimits.getPageLimit(s.pageIndex());
var totalHitsThreshold = LuceneUtils.totalHitsThreshold(); var totalHitsThreshold = LuceneUtils.totalHitsThreshold();
return new ScoringShardsCollectorManager(sort, limit, null, return new ScoringShardsCollectorManager(sort, pageLimit, null,
totalHitsThreshold, resultsOffset, s.currentPageLimit()); totalHitsThreshold, resultsOffset, pageLimit);
} else { } else {
return null; return null;
} }
@ -171,7 +174,7 @@ public class ScoredSimpleLuceneShardSearcher implements LuceneMultiSearcher {
var pageLastDoc = LuceneUtils.getLastScoreDoc(pageTopDocs.scoreDocs); var pageLastDoc = LuceneUtils.getLastScoreDoc(pageTopDocs.scoreDocs);
long nextRemainingLimit; long nextRemainingLimit;
if (allowPagination) { if (allowPagination) {
nextRemainingLimit = s.remainingLimit() - s.currentPageLimit(); nextRemainingLimit = s.remainingLimit() - pageLimits.getPageLimit(s.pageIndex());
} else { } else {
nextRemainingLimit = 0L; nextRemainingLimit = 0L;
} }

View File

@ -53,9 +53,9 @@ public class SimpleLuceneLocalSearcher implements LuceneLocalSearcher {
*/ */
private PaginationInfo getPaginationInfo(LocalQueryParams queryParams) { private PaginationInfo getPaginationInfo(LocalQueryParams queryParams) {
if (queryParams.limit() <= MAX_SINGLE_SEARCH_LIMIT) { if (queryParams.limit() <= MAX_SINGLE_SEARCH_LIMIT) {
return new PaginationInfo(queryParams.limit(), queryParams.offset(), queryParams.limit(), true); return new PaginationInfo(queryParams.limit(), queryParams.offset(), queryParams.pageLimits(), true);
} else { } else {
return new PaginationInfo(queryParams.limit(), queryParams.offset(), FIRST_PAGE_LIMIT, false); return new PaginationInfo(queryParams.limit(), queryParams.offset(), queryParams.pageLimits(), false);
} }
} }
@ -65,7 +65,7 @@ public class SimpleLuceneLocalSearcher implements LuceneLocalSearcher {
private Mono<PageData> searchFirstPage(LLIndexSearchers indexSearchers, private Mono<PageData> searchFirstPage(LLIndexSearchers indexSearchers,
LocalQueryParams queryParams, LocalQueryParams queryParams,
PaginationInfo paginationInfo) { PaginationInfo paginationInfo) {
var limit = LuceneUtils.safeLongToInt(paginationInfo.firstPageOffset() + paginationInfo.firstPageLimit()); var limit = paginationInfo.totalLimit();
var pagination = !paginationInfo.forceSinglePage(); var pagination = !paginationInfo.forceSinglePage();
var resultsOffset = LuceneUtils.safeLongToInt(paginationInfo.firstPageOffset()); var resultsOffset = LuceneUtils.safeLongToInt(paginationInfo.firstPageOffset());
return Mono return Mono
@ -148,15 +148,16 @@ public class SimpleLuceneLocalSearcher implements LuceneLocalSearcher {
} else { } else {
throw new IllegalArgumentException(); throw new IllegalArgumentException();
} }
var currentPageLimit = queryParams.pageLimits().getPageLimit(s.pageIndex());
if ((s.pageIndex() == 0 || s.last() != null) && s.remainingLimit() > 0) { if ((s.pageIndex() == 0 || s.last() != null) && s.remainingLimit() > 0) {
TopDocs pageTopDocs; TopDocs pageTopDocs;
try { try {
TopDocsCollector<ScoreDoc> collector = TopDocsSearcher.getTopDocsCollector(queryParams.sort(), TopDocsCollector<ScoreDoc> collector = TopDocsSearcher.getTopDocsCollector(queryParams.sort(),
s.currentPageLimit(), s.last(), LuceneUtils.totalHitsThreshold(), allowPagination, currentPageLimit, s.last(), LuceneUtils.totalHitsThreshold(),
queryParams.isScored()); allowPagination, queryParams.isScored());
unshardedIndexSearchers.shard().getIndexSearcher().search(queryParams.query(), collector); unshardedIndexSearchers.shard().getIndexSearcher().search(queryParams.query(), collector);
if (resultsOffset > 0) { if (resultsOffset > 0) {
pageTopDocs = collector.topDocs(resultsOffset, s.currentPageLimit()); pageTopDocs = collector.topDocs(resultsOffset, currentPageLimit);
} else { } else {
pageTopDocs = collector.topDocs(); pageTopDocs = collector.topDocs();
} }
@ -167,7 +168,7 @@ public class SimpleLuceneLocalSearcher implements LuceneLocalSearcher {
var pageLastDoc = LuceneUtils.getLastScoreDoc(pageTopDocs.scoreDocs); var pageLastDoc = LuceneUtils.getLastScoreDoc(pageTopDocs.scoreDocs);
long nextRemainingLimit; long nextRemainingLimit;
if (allowPagination) { if (allowPagination) {
nextRemainingLimit = s.remainingLimit() - s.currentPageLimit(); nextRemainingLimit = s.remainingLimit() - currentPageLimit;
} else { } else {
nextRemainingLimit = 0L; nextRemainingLimit = 0L;
} }

View File

@ -0,0 +1,23 @@
package it.cavallium.dbengine.lucene.searcher;
public class SinglePageLimits implements PageLimits {
private final int firstPageLimit;
public SinglePageLimits() {
this(DEFAULT_MIN_ITEMS_PER_PAGE);
}
public SinglePageLimits(int firstPageLimit) {
this.firstPageLimit = firstPageLimit;
}
@Override
public int getPageLimit(int pageIndex) {
if (pageIndex == 0) {
return firstPageLimit;
} else {
return 0;
}
}
}