CavalliumDBEngine/src/main/java/it/cavallium/dbengine/lucene/searcher/ScoredPagedMultiSearcher.java

222 lines
8.3 KiB
Java
Raw Normal View History

package it.cavallium.dbengine.lucene.searcher;
2021-09-19 19:59:37 +02:00
import static it.cavallium.dbengine.lucene.searcher.PaginationInfo.MAX_SINGLE_SEARCH_LIMIT;
import static it.cavallium.dbengine.utils.StreamUtils.LUCENE_SCHEDULER;
import static it.cavallium.dbengine.utils.StreamUtils.fastListing;
2023-02-22 22:31:36 +01:00
import static it.cavallium.dbengine.utils.StreamUtils.streamWhileNonNull;
import static it.cavallium.dbengine.utils.StreamUtils.toListOn;
import com.google.common.collect.Streams;
import it.cavallium.dbengine.client.query.current.data.TotalHitsCount;
import it.cavallium.dbengine.database.LLKeyScore;
2021-09-19 19:59:37 +02:00
import it.cavallium.dbengine.database.LLUtils;
2021-09-20 12:51:27 +02:00
import it.cavallium.dbengine.database.disk.LLIndexSearchers;
import it.cavallium.dbengine.lucene.LuceneCloseable;
import it.cavallium.dbengine.lucene.LuceneUtils;
import it.cavallium.dbengine.lucene.PageLimits;
2021-11-09 00:54:09 +01:00
import it.cavallium.dbengine.lucene.collector.ScoringShardsCollectorMultiManager;
import it.cavallium.dbengine.utils.DBException;
import it.cavallium.dbengine.utils.StreamUtils;
2022-01-28 21:12:10 +01:00
import java.io.IOException;
2021-09-10 01:13:39 +02:00
import java.util.Arrays;
import java.util.Collection;
2021-09-22 11:03:39 +02:00
import java.util.List;
import java.util.Objects;
2021-09-19 19:59:37 +02:00
import java.util.concurrent.atomic.AtomicReference;
import java.util.function.Function;
import java.util.stream.Stream;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
2021-09-22 11:03:39 +02:00
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TopDocs;
import org.jetbrains.annotations.Nullable;
public class ScoredPagedMultiSearcher implements MultiSearcher {
2021-09-19 19:59:37 +02:00
2022-06-14 13:10:38 +02:00
protected static final Logger LOG = LogManager.getLogger(ScoredPagedMultiSearcher.class);
2021-09-23 15:37:13 +02:00
public ScoredPagedMultiSearcher() {
}
@Override
public LuceneSearchResult collectMulti(LLIndexSearchers indexSearchers,
LocalQueryParams queryParams,
2022-02-26 03:28:20 +01:00
@Nullable String keyFieldName,
GlobalQueryRewrite transformer,
Function<Stream<LLKeyScore>, Stream<LLKeyScore>> filterer) {
2022-07-02 11:44:13 +02:00
if (transformer != GlobalQueryRewrite.NO_REWRITE) {
return LuceneUtils.rewriteMulti(this, indexSearchers, queryParams, keyFieldName, transformer, filterer);
2022-07-02 11:44:13 +02:00
}
2022-06-14 13:10:38 +02:00
2022-07-02 11:44:13 +02:00
PaginationInfo paginationInfo = getPaginationInfo(queryParams);
// Search first page results
var firstPageTopDocs = this.searchFirstPage(indexSearchers.shards(), queryParams, paginationInfo);
2022-07-02 11:44:13 +02:00
// Compute the results of the first page
var firstResult = this.computeFirstPageResults(firstPageTopDocs, indexSearchers, keyFieldName, queryParams);
2022-07-02 11:44:13 +02:00
// Compute other results
return this.computeOtherResults(firstResult,
indexSearchers.shards(),
queryParams,
keyFieldName,
filterer
);
}
2021-09-19 19:59:37 +02:00
private Sort getSort(LocalQueryParams queryParams) {
return queryParams.sort();
2021-09-19 19:59:37 +02:00
}
/**
* Get the pagination info
*/
private PaginationInfo getPaginationInfo(LocalQueryParams queryParams) {
2021-10-15 22:03:53 +02:00
if (queryParams.limitInt() <= MAX_SINGLE_SEARCH_LIMIT) {
return new PaginationInfo(queryParams.limitInt(), queryParams.offsetInt(), queryParams.pageLimits(), true);
2021-09-19 19:59:37 +02:00
} else {
2021-10-15 22:03:53 +02:00
return new PaginationInfo(queryParams.limitInt(), queryParams.offsetInt(), queryParams.pageLimits(), false);
}
2021-09-19 19:59:37 +02:00
}
/**
* Search effectively the raw results of the first page
*/
private PageData searchFirstPage(List<IndexSearcher> indexSearchers,
2021-09-19 19:59:37 +02:00
LocalQueryParams queryParams,
PaginationInfo paginationInfo) {
2021-09-20 18:20:59 +02:00
var limit = paginationInfo.totalLimit();
var pageLimits = paginationInfo.pageLimits();
2021-09-19 19:59:37 +02:00
var pagination = !paginationInfo.forceSinglePage();
var resultsOffset = LuceneUtils.safeLongToInt(paginationInfo.firstPageOffset());
return this.searchPage(queryParams,
indexSearchers,
pagination,
pageLimits,
resultsOffset,
new CurrentPageInfo(null, limit, 0)
);
2021-09-19 19:59:37 +02:00
}
/**
* Compute the results of the first page, extracting useful data
*/
private FirstPageResults computeFirstPageResults(PageData firstPageData,
2021-09-20 12:51:27 +02:00
LLIndexSearchers indexSearchers,
2021-09-19 19:59:37 +02:00
String keyFieldName,
LocalQueryParams queryParams) {
var totalHitsCount = LuceneUtils.convertTotalHitsCount(firstPageData.topDocs().totalHits);
var scoreDocs = firstPageData.topDocs().scoreDocs;
assert LLUtils.isSet(scoreDocs);
2021-09-19 19:59:37 +02:00
Stream<LLKeyScore> firstPageHitsFlux = LuceneUtils
.convertHits(Stream.of(scoreDocs), indexSearchers.shards(), keyFieldName)
.limit(queryParams.limitInt());
2021-09-19 19:59:37 +02:00
CurrentPageInfo nextPageInfo = firstPageData.nextPageInfo();
2021-09-19 19:59:37 +02:00
return new FirstPageResults(totalHitsCount, firstPageHitsFlux, nextPageInfo);
2021-09-19 19:59:37 +02:00
}
2021-11-08 11:17:52 +01:00
private LuceneSearchResult computeOtherResults(FirstPageResults firstResult,
2021-09-22 11:03:39 +02:00
List<IndexSearcher> indexSearchers,
2021-09-19 19:59:37 +02:00
LocalQueryParams queryParams,
2021-09-22 11:03:39 +02:00
String keyFieldName,
Function<Stream<LLKeyScore>, Stream<LLKeyScore>> filterer) {
2021-09-22 11:03:39 +02:00
var totalHitsCount = firstResult.totalHitsCount();
var firstPageHitsStream = firstResult.firstPageHitsStream();
2021-09-22 11:03:39 +02:00
var secondPageInfo = firstResult.nextPageInfo();
2021-09-19 19:59:37 +02:00
Stream<LLKeyScore> nextHitsFlux = searchOtherPages(indexSearchers, queryParams, keyFieldName, secondPageInfo);
2021-09-19 19:59:37 +02:00
Stream<LLKeyScore> combinedStream = Stream.concat(firstPageHitsStream, nextHitsFlux);
return new LuceneSearchResult(totalHitsCount, StreamUtils.collect(filterer.apply(combinedStream), fastListing()));
2021-09-19 19:59:37 +02:00
}
/**
* Search effectively the merged raw results of the next pages
*/
private Stream<LLKeyScore> searchOtherPages(List<IndexSearcher> indexSearchers,
2021-09-19 19:59:37 +02:00
LocalQueryParams queryParams, String keyFieldName, CurrentPageInfo secondPageInfo) {
AtomicReference<CurrentPageInfo> currentPageInfoRef = new AtomicReference<>(secondPageInfo);
2023-02-22 22:31:36 +01:00
Stream<ScoreDoc> topFieldDocStream = streamWhileNonNull(() -> {
var currentPageInfo = currentPageInfoRef.getPlain();
if (currentPageInfo == null) return null;
LOG.trace("Current page info: {}", currentPageInfo);
var result = this.searchPage(queryParams, indexSearchers, true, queryParams.pageLimits(), 0, currentPageInfo);
LOG.trace("Next page info: {}", result != null ? result.nextPageInfo() : null);
currentPageInfoRef.setPlain(result != null ? result.nextPageInfo() : null);
if (result == null || result.topDocs().scoreDocs.length == 0) {
return null;
} else {
return Arrays.asList(result.topDocs().scoreDocs);
}
2023-02-22 22:31:36 +01:00
}).flatMap(Collection::stream);
return LuceneUtils.convertHits(topFieldDocStream, indexSearchers, keyFieldName);
2021-09-19 19:59:37 +02:00
}
/**
*
* @param resultsOffset offset of the resulting topDocs. Useful if you want to
* skip the first n results in the first page
*/
private PageData searchPage(LocalQueryParams queryParams,
2021-10-15 22:03:53 +02:00
List<IndexSearcher> indexSearchers,
2021-09-19 19:59:37 +02:00
boolean allowPagination,
2021-09-20 18:20:59 +02:00
PageLimits pageLimits,
2021-09-19 19:59:37 +02:00
int resultsOffset,
CurrentPageInfo s) {
if (resultsOffset < 0) {
throw new IndexOutOfBoundsException(resultsOffset);
}
ScoringShardsCollectorMultiManager cmm;
if (s.pageIndex() == 0 || (s.last() != null && s.remainingLimit() > 0)) {
var query = queryParams.query();
@Nullable var sort = getSort(queryParams);
var pageLimit = pageLimits.getPageLimit(s.pageIndex());
var after = (FieldDoc) s.last();
var totalHitsThreshold = queryParams.getTotalHitsThresholdInt();
cmm = new ScoringShardsCollectorMultiManager(query, sort, pageLimit, after, totalHitsThreshold,
resultsOffset, pageLimit);
} else {
return null;
};
record IndexedShard(IndexSearcher indexSearcher, long shardIndex) {}
List<TopDocs> shardResults = toListOn(LUCENE_SCHEDULER,
Streams.mapWithIndex(indexSearchers.stream(), IndexedShard::new).map(shardWithIndex -> {
var index = (int) shardWithIndex.shardIndex();
var shard = shardWithIndex.indexSearcher();
var cm = cmm.get(shard, index);
try {
return shard.search(queryParams.query(), cm);
} catch (IOException e) {
throw new DBException(e);
}
})
);
var pageTopDocs = cmm.reduce(shardResults);
var pageLastDoc = LuceneUtils.getLastScoreDoc(pageTopDocs.scoreDocs);
long nextRemainingLimit;
if (allowPagination) {
nextRemainingLimit = s.remainingLimit() - pageLimits.getPageLimit(s.pageIndex());
} else {
nextRemainingLimit = 0L;
}
var nextPageIndex = s.pageIndex() + 1;
var nextPageInfo = new CurrentPageInfo(pageLastDoc, nextRemainingLimit, nextPageIndex);
return new PageData(pageTopDocs, nextPageInfo);
}
@Override
public String getName() {
return "scored paged multi";
}
}