Improve performance of infinite queries

This commit is contained in:
Andrea Cavalli 2021-09-25 13:07:52 +02:00
parent d06c9184bd
commit 8e15020f5b
2 changed files with 174 additions and 4 deletions

View File

@ -1,9 +1,7 @@
package it.cavallium.dbengine.lucene.searcher; package it.cavallium.dbengine.lucene.searcher;
import io.net5.buffer.api.Send; import io.net5.buffer.api.Send;
import it.cavallium.dbengine.database.disk.LLIndexSearcher;
import it.cavallium.dbengine.database.disk.LLIndexSearchers; import it.cavallium.dbengine.database.disk.LLIndexSearchers;
import reactor.core.publisher.Flux;
import reactor.core.publisher.Mono; import reactor.core.publisher.Mono;
public class AdaptiveLuceneMultiSearcher implements LuceneMultiSearcher { public class AdaptiveLuceneMultiSearcher implements LuceneMultiSearcher {
@ -14,9 +12,12 @@ public class AdaptiveLuceneMultiSearcher implements LuceneMultiSearcher {
private static final LuceneMultiSearcher scoredSimpleLuceneShardSearcher private static final LuceneMultiSearcher scoredSimpleLuceneShardSearcher
= new ScoredSimpleLuceneShardSearcher(); = new ScoredSimpleLuceneShardSearcher();
private static final LuceneMultiSearcher unscoredPagedLuceneMultiSearcher private static final LuceneMultiSearcher unsortedUnscoredPagedLuceneMultiSearcher
= new SimpleUnsortedUnscoredLuceneMultiSearcher(new SimpleLuceneLocalSearcher()); = new SimpleUnsortedUnscoredLuceneMultiSearcher(new SimpleLuceneLocalSearcher());
private static final LuceneMultiSearcher unsortedUnscoredContinuousLuceneMultiSearcher
= new UnsortedUnscoredContinuousLuceneMultiSearcher();
@Override @Override
public Mono<Send<LuceneSearchResult>> collectMulti(Mono<Send<LLIndexSearchers>> indexSearchersMono, public Mono<Send<LuceneSearchResult>> collectMulti(Mono<Send<LLIndexSearchers>> indexSearchersMono,
LocalQueryParams queryParams, LocalQueryParams queryParams,
@ -27,7 +28,13 @@ public class AdaptiveLuceneMultiSearcher implements LuceneMultiSearcher {
} else if (queryParams.isSorted() || queryParams.isScored()) { } else if (queryParams.isSorted() || queryParams.isScored()) {
return scoredSimpleLuceneShardSearcher.collectMulti(indexSearchersMono, queryParams, keyFieldName, transformer); return scoredSimpleLuceneShardSearcher.collectMulti(indexSearchersMono, queryParams, keyFieldName, transformer);
} else { } else {
return unscoredPagedLuceneMultiSearcher.collectMulti(indexSearchersMono, queryParams, keyFieldName, transformer); if (queryParams.offset() + queryParams.limit() <= queryParams.pageLimits().getPageLimit(0)) {
// Run single-page searches using the paged multi searcher
return unsortedUnscoredPagedLuceneMultiSearcher.collectMulti(indexSearchersMono, queryParams, keyFieldName, transformer);
} else {
// Run large/unbounded searches using the continuous multi searcher
return unsortedUnscoredContinuousLuceneMultiSearcher.collectMulti(indexSearchersMono, queryParams, keyFieldName, transformer);
}
} }
} }
} }

View File

@ -0,0 +1,163 @@
package it.cavallium.dbengine.lucene.searcher;
import io.net5.buffer.api.Resource;
import io.net5.buffer.api.Send;
import it.cavallium.dbengine.client.query.current.data.TotalHitsCount;
import it.cavallium.dbengine.database.LLKeyScore;
import it.cavallium.dbengine.database.LLUtils;
import it.cavallium.dbengine.database.disk.LLIndexSearcher;
import it.cavallium.dbengine.database.disk.LLIndexSearchers;
import it.cavallium.dbengine.lucene.LuceneUtils;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Queue;
import java.util.concurrent.Phaser;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.locks.LockSupport;
import java.util.function.Supplier;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.CollectorManager;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.SimpleCollector;
import reactor.core.publisher.Flux;
import reactor.core.publisher.Mono;
import reactor.core.publisher.Sinks;
import reactor.core.publisher.Sinks.EmitResult;
import reactor.core.publisher.Sinks.Many;
import reactor.core.scheduler.Scheduler;
import reactor.core.scheduler.Schedulers;
import reactor.util.concurrent.Queues;
public class UnsortedUnscoredContinuousLuceneMultiSearcher implements LuceneMultiSearcher {
private static final Scheduler UNSCORED_UNSORTED_EXECUTOR = Schedulers.newBoundedElastic(Runtime
.getRuntime()
.availableProcessors(), Schedulers.DEFAULT_BOUNDED_ELASTIC_QUEUESIZE, "UnscoredUnsortedExecutor");
private static final Supplier<Queue<ScoreDoc>> QUEUE_SUPPLIER = Queues.get(1024);
@Override
public Mono<Send<LuceneSearchResult>> collectMulti(Mono<Send<LLIndexSearchers>> indexSearchersMono,
LocalQueryParams queryParams,
String keyFieldName,
LLSearchTransformer transformer) {
var indexSearchersSendResource = Mono
.fromRunnable(() -> {
LLUtils.ensureBlocking();
if (queryParams.isSorted() && queryParams.limit() > 0) {
throw new UnsupportedOperationException("Sorted queries are not supported"
+ " by UnsortedUnscoredContinuousLuceneMultiSearcher");
}
if (queryParams.isScored() && queryParams.limit() > 0) {
throw new UnsupportedOperationException("Scored queries are not supported"
+ " by UnsortedUnscoredContinuousLuceneMultiSearcher");
}
})
.then(indexSearchersMono);
var localQueryParams = getLocalQueryParams(queryParams);
return LLUtils.usingSendResource(indexSearchersSendResource,
indexSearchers -> Mono.fromCallable(() -> {
Many<ScoreDoc> scoreDocsSink = Sinks.many().unicast().onBackpressureBuffer(QUEUE_SUPPLIER.get());
var cm = new CollectorManager<Collector, Void>() {
class IterableCollector extends SimpleCollector {
private int shardIndex;
private LeafReaderContext context;
@Override
public void collect(int i) {
if (Schedulers.isInNonBlockingThread()) {
throw new UnsupportedOperationException("Called collect in a nonblocking thread");
}
var scoreDoc = new ScoreDoc(context.docBase + i, 0, shardIndex);
synchronized (scoreDocsSink) {
while (scoreDocsSink.tryEmitNext(scoreDoc) == EmitResult.FAIL_OVERFLOW) {
LockSupport.parkNanos(10);
}
}
}
@Override
protected void doSetNextReader(LeafReaderContext context) {
this.context = context;
}
@Override
public ScoreMode scoreMode() {
return ScoreMode.COMPLETE_NO_SCORES;
}
public void setShardIndex(int shardIndex) {
this.shardIndex = shardIndex;
}
}
@Override
public IterableCollector newCollector() {
return new IterableCollector();
}
@Override
public Void reduce(Collection<Collector> collection) {
throw new UnsupportedOperationException();
}
};
AtomicInteger runningTasks = new AtomicInteger(0);
var shards = indexSearchers.shards();
runningTasks.addAndGet(shards.size());
int mutableShardIndex = 0;
for (IndexSearcher shard : shards) {
int shardIndex = mutableShardIndex++;
UNSCORED_UNSORTED_EXECUTOR.schedule(() -> {
try {
var collector = cm.newCollector();
collector.setShardIndex(shardIndex);
shard.search(localQueryParams.query(), collector);
} catch (Throwable e) {
while (scoreDocsSink.tryEmitError(e) == EmitResult.FAIL_NON_SERIALIZED) {
LockSupport.parkNanos(10);
}
} finally {
if (runningTasks.decrementAndGet() <= 0) {
while (scoreDocsSink.tryEmitComplete() == EmitResult.FAIL_NON_SERIALIZED) {
LockSupport.parkNanos(10);
}
}
}
});
}
Flux<LLKeyScore> resultsFlux = LuceneUtils.convertHits(scoreDocsSink.asFlux(), shards, keyFieldName, false);
var totalHitsCount = new TotalHitsCount(0, false);
Flux<LLKeyScore> mergedFluxes = resultsFlux
.skip(queryParams.offset())
.take(queryParams.limit(), true);
return new LuceneSearchResult(totalHitsCount, mergedFluxes, d -> {
indexSearchers.close();
}).send();
}), false);
}
private LocalQueryParams getLocalQueryParams(LocalQueryParams queryParams) {
return new LocalQueryParams(queryParams.query(),
0,
LuceneUtils.safeLongToInt((long) queryParams.offset() + (long) queryParams.limit()),
queryParams.pageLimits(),
queryParams.minCompetitiveScore(),
queryParams.sort(),
queryParams.scoreMode()
);
}
}