From adbbbaa6469b068bf8c15f791efff03a9eb2372a Mon Sep 17 00:00:00 2001 From: Andrea Cavalli Date: Tue, 9 Nov 2021 01:13:47 +0100 Subject: [PATCH] Optimizations --- .../collector/OptimizedTopDocsCollector.java | 123 ++++++++++++++---- .../ScoringShardsCollectorMultiManager.java | 30 +---- .../lucene/searcher/PagedLocalSearcher.java | 22 +--- 3 files changed, 109 insertions(+), 66 deletions(-) diff --git a/src/main/java/it/cavallium/dbengine/lucene/collector/OptimizedTopDocsCollector.java b/src/main/java/it/cavallium/dbengine/lucene/collector/OptimizedTopDocsCollector.java index 79ffca3..5fd9fab 100644 --- a/src/main/java/it/cavallium/dbengine/lucene/collector/OptimizedTopDocsCollector.java +++ b/src/main/java/it/cavallium/dbengine/lucene/collector/OptimizedTopDocsCollector.java @@ -2,51 +2,126 @@ package it.cavallium.dbengine.lucene.collector; import static it.cavallium.dbengine.lucene.searcher.PaginationInfo.ALLOW_UNSCORED_PAGINATION_MODE; +import it.cavallium.dbengine.lucene.LuceneUtils; import it.cavallium.dbengine.lucene.collector.UnscoredCollector; import java.io.IOException; import java.util.Collection; +import java.util.List; +import org.apache.commons.lang3.NotImplementedException; +import org.apache.lucene.search.Collector; import org.apache.lucene.search.CollectorManager; import org.apache.lucene.search.FieldDoc; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.ScoreMode; import org.apache.lucene.search.Sort; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.TopDocsCollector; import org.apache.lucene.search.TopFieldCollector; +import org.apache.lucene.search.TopFieldDocs; import org.apache.lucene.search.TopScoreDocCollector; +import org.jetbrains.annotations.NotNull; -public class OptimizedTopDocsCollector { +public class OptimizedTopDocsCollector implements CollectorMultiManager { - @SuppressWarnings({"unchecked", "rawtypes"}) - public static TopDocsCollector create(Sort luceneSort, + private final Sort luceneSort; + private final int limit; + private final ScoreDoc after; + private final int totalHitsThreshold; + private final boolean allowPagination; + private final boolean computeScores; + + private final int topDocsOffset; + private final int topDocsCount; + + public OptimizedTopDocsCollector(Sort luceneSort, int limit, ScoreDoc after, int totalHitsThreshold, boolean allowPagination, - boolean computeScores) { - TopDocsCollector collector; - if (after != null && !allowPagination) { - throw new IllegalArgumentException("\"allowPagination\" is false, but \"after\" is set"); - } - if (luceneSort == null) { - if (after == null) { - if (computeScores || allowPagination || !ALLOW_UNSCORED_PAGINATION_MODE) { - collector = TopScoreDocCollector.create(limit, totalHitsThreshold); - } else { - collector = new UnscoredCollector(limit); + boolean computeScores, + int topDocsOffset, + int topDocsCount) { + this.luceneSort = luceneSort; + this.limit = limit; + this.after = after; + this.totalHitsThreshold = totalHitsThreshold; + this.allowPagination = allowPagination; + this.computeScores = computeScores; + + this.topDocsOffset = topDocsOffset; + this.topDocsCount = topDocsCount; + } + + public CollectorManager, TopDocs> get(@NotNull Query query, IndexSearcher indexSearcher) { + return new CollectorManager<>() { + @Override + public TopDocsCollector newCollector() throws IOException { + TopDocsCollector collector; + if (after != null && !allowPagination) { + throw new IllegalArgumentException("\"allowPagination\" is false, but \"after\" is set"); } - } else { - collector = TopScoreDocCollector.create(limit, after, totalHitsThreshold); + if (luceneSort == null) { + if (after == null) { + if (computeScores || allowPagination || !ALLOW_UNSCORED_PAGINATION_MODE) { + collector = TopScoreDocCollector.create(limit, totalHitsThreshold); + } else { + collector = new UnscoredCollector(limit); + } + } else { + collector = TopScoreDocCollector.create(limit, after, totalHitsThreshold); + } + } else { + if (after == null) { + collector = TopFieldCollector.create(luceneSort, limit, totalHitsThreshold); + } else if (after instanceof FieldDoc afterFieldDoc) { + collector = TopFieldCollector.create(luceneSort, limit, afterFieldDoc, totalHitsThreshold); + } else { + throw new UnsupportedOperationException("GetTopDocs with \"luceneSort\" != null requires \"after\" to be a FieldDoc"); + } + } + return collector; } + + @Override + public TopDocs reduce(Collection> collectors) throws IOException { + TopDocs[] docsArray; + boolean needsSort = luceneSort != null; + boolean needsScores = luceneSort != null && luceneSort.needsScores(); + if (needsSort) { + docsArray = new TopFieldDocs[collectors.size()]; + } else { + docsArray = new TopDocs[collectors.size()]; + } + int i = 0; + for (TopDocsCollector collector : collectors) { + docsArray[i] = collector.topDocs(); + i++; + } + var merged = LuceneUtils.mergeTopDocs(luceneSort, null, null, docsArray); + if (needsScores) { + TopFieldCollector.populateScores(merged.scoreDocs, indexSearcher, query); + } + return merged; + } + }; + } + + @Override + public ScoreMode scoreMode() { + throw new NotImplementedException(); + } + + @SuppressWarnings({"SuspiciousToArrayCall", "IfStatementWithIdenticalBranches"}) + @Override + public TopDocs reduce(List topDocs) { + TopDocs[] arr; + if (luceneSort != null) { + arr = topDocs.toArray(TopFieldDocs[]::new); } else { - if (after == null) { - collector = (TopDocsCollector) (TopDocsCollector) TopFieldCollector.create(luceneSort, limit, totalHitsThreshold); - } else if (after instanceof FieldDoc afterFieldDoc) { - collector = (TopDocsCollector) (TopDocsCollector) TopFieldCollector.create(luceneSort, limit, afterFieldDoc, totalHitsThreshold); - } else { - throw new UnsupportedOperationException("GetTopDocs with \"luceneSort\" != null requires \"after\" to be a FieldDoc"); - } + arr = topDocs.toArray(TopDocs[]::new); } - return collector; + return LuceneUtils.mergeTopDocs(luceneSort, topDocsOffset, topDocsCount, arr); } } diff --git a/src/main/java/it/cavallium/dbengine/lucene/collector/ScoringShardsCollectorMultiManager.java b/src/main/java/it/cavallium/dbengine/lucene/collector/ScoringShardsCollectorMultiManager.java index bd40051..876a09e 100644 --- a/src/main/java/it/cavallium/dbengine/lucene/collector/ScoringShardsCollectorMultiManager.java +++ b/src/main/java/it/cavallium/dbengine/lucene/collector/ScoringShardsCollectorMultiManager.java @@ -32,8 +32,6 @@ public class ScoringShardsCollectorMultiManager implements CollectorMultiManager private final int totalHitsThreshold; private final @Nullable Integer startN; private final @Nullable Integer topN; - private final @Nullable Integer internalStartN; - private final @Nullable Integer internalTopN; private final CollectorManager sharedCollectorManager; public ScoringShardsCollectorMultiManager(Query query, @@ -83,22 +81,6 @@ public class ScoringShardsCollectorMultiManager implements CollectorMultiManager } else { this.topN = topN; } - if (this.topN != null && this.startN != null) { - if (this.topN >= 2147483630) { - this.internalTopN = this.topN; - } else { - this.internalTopN = this.startN + this.topN; - } - } else if (this.topN == null && this.startN != null) { - this.internalTopN = null; - } else { - this.internalTopN = this.topN; - } - if (this.internalTopN != null) { - this.internalStartN = 0; - } else { - this.internalStartN = null; - } this.sharedCollectorManager = TopFieldCollector.createSharedManager(sort == null ? Sort.RELEVANCE : sort, numHits, after, totalHitsThreshold); } @@ -120,7 +102,7 @@ public class ScoringShardsCollectorMultiManager implements CollectorMultiManager for (TopFieldCollector collector : collectors) { topDocs[i++] = collector.topDocs(); } - var result = LuceneUtils.mergeTopDocs(sort, 0, numHits, topDocs); + var result = LuceneUtils.mergeTopDocs(sort, null, null, topDocs); if (sort != null && sort.needsScores()) { TopFieldCollector.populateScores(result.scoreDocs, indexSearcher, query); @@ -128,9 +110,9 @@ public class ScoringShardsCollectorMultiManager implements CollectorMultiManager return result; } else { - TopDocs result; + TopDocs[] topDocs; if (sort != null) { - TopFieldDocs[] topDocs = new TopFieldDocs[collectors.size()]; + topDocs = new TopFieldDocs[collectors.size()]; var i = 0; for (TopFieldCollector collector : collectors) { topDocs[i] = collector.topDocs(); @@ -145,9 +127,8 @@ public class ScoringShardsCollectorMultiManager implements CollectorMultiManager } i++; } - result = LuceneUtils.mergeTopDocs(sort, internalStartN, internalTopN, topDocs); } else { - TopDocs[] topDocs = new TopDocs[collectors.size()]; + topDocs = new TopDocs[collectors.size()]; var i = 0; for (TopFieldCollector collector : collectors) { topDocs[i] = collector.topDocs(); @@ -156,9 +137,8 @@ public class ScoringShardsCollectorMultiManager implements CollectorMultiManager } i++; } - result = LuceneUtils.mergeTopDocs(null, internalStartN, internalTopN, topDocs); } - return result; + return LuceneUtils.mergeTopDocs(sort, null, null, topDocs); } } }; diff --git a/src/main/java/it/cavallium/dbengine/lucene/searcher/PagedLocalSearcher.java b/src/main/java/it/cavallium/dbengine/lucene/searcher/PagedLocalSearcher.java index 715bbc3..a6af023 100644 --- a/src/main/java/it/cavallium/dbengine/lucene/searcher/PagedLocalSearcher.java +++ b/src/main/java/it/cavallium/dbengine/lucene/searcher/PagedLocalSearcher.java @@ -184,25 +184,13 @@ public class PagedLocalSearcher implements LocalSearcher { } else if (s.pageIndex() == 0 || (s.last() != null && s.remainingLimit() > 0)) { TopDocs pageTopDocs; try { - TopDocsCollector collector = OptimizedTopDocsCollector.create(queryParams.sort(), + var cmm = new OptimizedTopDocsCollector(queryParams.sort(), currentPageLimit, s.last(), queryParams.getTotalHitsThresholdInt(), - allowPagination, queryParams.needsScores()); - assert queryParams.complete() == collector.scoreMode().isExhaustive(); - assert currentPageLimit < Integer.MAX_VALUE || queryParams - .getScoreModeOptional() - .map(scoreMode -> scoreMode == collector.scoreMode()) - .orElse(true); + allowPagination, queryParams.needsScores(), resultsOffset, currentPageLimit); - indexSearchers.get(0).search(queryParams.query(), collector); - if (resultsOffset > 0) { - pageTopDocs = collector.topDocs(resultsOffset, currentPageLimit); - } else { - pageTopDocs = collector.topDocs(); - } - // Populate scores of topfieldcollector. By default it doesn't popupate the scores - if (queryParams.needsScores() && ((Collector) collector) instanceof TopFieldCollector) { - TopFieldCollector.populateScores(pageTopDocs.scoreDocs, indexSearchers.get(0), queryParams.query()); - } + pageTopDocs = cmm.reduce(List.of(indexSearchers + .get(0) + .search(queryParams.query(), cmm.get(queryParams.query(), indexSearchers.get(0))))); } catch (IOException e) { sink.error(e); return EMPTY_STATUS;