CavalliumDBEngine/src/main/java/it/cavallium/dbengine/database/disk/LLLocalLuceneIndex.java

883 lines
29 KiB
Java
Raw Normal View History

2020-12-07 22:15:18 +01:00
package it.cavallium.dbengine.database.disk;
2021-09-10 12:13:52 +02:00
import static it.cavallium.dbengine.database.LLUtils.MARKER_LUCENE;
import static it.cavallium.dbengine.database.LLUtils.toDocument;
import static it.cavallium.dbengine.database.LLUtils.toFields;
2022-01-28 21:12:10 +01:00
import static it.cavallium.dbengine.lucene.searcher.GlobalQueryRewrite.NO_REWRITE;
import static it.cavallium.dbengine.lucene.searcher.LuceneSearchResult.EMPTY_COUNT;
import static it.cavallium.dbengine.utils.StreamUtils.collect;
import static it.cavallium.dbengine.utils.StreamUtils.fastListing;
2022-06-08 16:45:54 +02:00
import static java.util.Objects.requireNonNull;
2021-09-10 12:13:52 +02:00
2022-01-28 19:31:25 +01:00
import com.google.common.collect.Multimap;
2021-12-30 17:28:06 +01:00
import io.micrometer.core.instrument.Counter;
import io.micrometer.core.instrument.MeterRegistry;
2022-06-13 23:25:43 +02:00
import io.micrometer.core.instrument.Tag;
2021-12-30 17:28:06 +01:00
import io.micrometer.core.instrument.Timer;
2022-08-15 23:07:17 +02:00
import it.cavallium.dbengine.client.Backuppable;
import it.cavallium.dbengine.client.IBackuppable;
2021-11-19 19:03:31 +01:00
import it.cavallium.dbengine.client.query.QueryParser;
import it.cavallium.dbengine.client.query.current.data.Query;
2021-03-02 01:53:36 +01:00
import it.cavallium.dbengine.client.query.current.data.QueryParams;
2022-07-02 11:44:13 +02:00
import it.cavallium.dbengine.client.query.current.data.TotalHitsCount;
2021-11-07 17:46:40 +01:00
import it.cavallium.dbengine.database.LLIndexRequest;
import it.cavallium.dbengine.database.LLLuceneIndex;
import it.cavallium.dbengine.database.LLSearchResultShard;
import it.cavallium.dbengine.database.LLSnapshot;
2021-12-30 17:28:06 +01:00
import it.cavallium.dbengine.database.LLSoftUpdateDocument;
import it.cavallium.dbengine.database.LLTerm;
2021-12-30 17:28:06 +01:00
import it.cavallium.dbengine.database.LLUpdateDocument;
2021-11-07 17:46:40 +01:00
import it.cavallium.dbengine.database.LLUpdateFields;
import it.cavallium.dbengine.database.LLUtils;
import it.cavallium.dbengine.lucene.LuceneCloseable;
import it.cavallium.dbengine.lucene.LuceneConcurrentMergeScheduler;
2021-12-30 17:28:06 +01:00
import it.cavallium.dbengine.lucene.LuceneHacks;
import it.cavallium.dbengine.lucene.LuceneUtils;
2021-11-19 19:03:31 +01:00
import it.cavallium.dbengine.lucene.collector.Buckets;
2022-04-30 21:56:42 +02:00
import it.cavallium.dbengine.lucene.directory.Lucene91CodecWithNoFieldCompression;
2022-01-28 19:31:25 +01:00
import it.cavallium.dbengine.lucene.mlt.MoreLikeThisTransformer;
import it.cavallium.dbengine.lucene.searcher.AdaptiveLocalSearcher;
2021-11-18 17:13:53 +01:00
import it.cavallium.dbengine.lucene.searcher.BucketParams;
import it.cavallium.dbengine.lucene.searcher.DecimalBucketMultiSearcher;
2021-07-06 01:30:37 +02:00
import it.cavallium.dbengine.lucene.searcher.LocalQueryParams;
import it.cavallium.dbengine.lucene.searcher.LocalSearcher;
2022-07-02 11:44:13 +02:00
import it.cavallium.dbengine.lucene.searcher.LuceneSearchResult;
2022-03-05 15:46:40 +01:00
import it.cavallium.dbengine.rpc.current.data.IndicizerAnalyzers;
import it.cavallium.dbengine.rpc.current.data.IndicizerSimilarities;
import it.cavallium.dbengine.rpc.current.data.LuceneOptions;
2022-06-30 13:54:55 +02:00
import it.cavallium.dbengine.utils.SimpleResource;
2020-12-07 22:15:18 +01:00
import java.io.IOException;
import it.cavallium.dbengine.utils.DBException;
2022-03-30 23:44:55 +02:00
import java.time.Duration;
2021-11-19 19:03:31 +01:00
import java.util.ArrayList;
2021-11-18 17:13:53 +01:00
import java.util.List;
import java.util.Map.Entry;
2022-03-19 00:08:23 +01:00
import java.util.Objects;
import java.util.concurrent.Callable;
import java.util.concurrent.CompletionException;
import java.util.concurrent.Executors;
2021-09-06 18:52:21 +02:00
import java.util.concurrent.Phaser;
import java.util.concurrent.ScheduledExecutorService;
2020-12-07 22:15:18 +01:00
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.LongAdder;
import java.util.concurrent.locks.ReentrantLock;
import java.util.function.Function;
import java.util.function.Supplier;
2022-03-19 00:08:23 +01:00
import java.util.logging.Level;
import java.util.stream.Stream;
2021-12-31 00:58:47 +01:00
import org.apache.commons.lang3.time.StopWatch;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;
2021-06-25 20:06:58 +02:00
import org.apache.lucene.index.ConcurrentMergeScheduler;
2022-06-28 13:52:21 +02:00
import org.apache.lucene.index.IndexDeletionPolicy;
2020-12-07 22:15:18 +01:00
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
2021-07-06 14:33:47 +02:00
import org.apache.lucene.index.MergeScheduler;
import org.apache.lucene.index.NoMergePolicy;
2021-06-25 20:06:58 +02:00
import org.apache.lucene.index.SerialMergeScheduler;
2020-12-07 22:15:18 +01:00
import org.apache.lucene.index.SnapshotDeletionPolicy;
import org.apache.lucene.search.similarities.Similarity;
2020-12-07 22:15:18 +01:00
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.MMapDirectory;
import org.apache.lucene.util.IOSupplier;
2021-11-19 19:03:31 +01:00
import org.jetbrains.annotations.NotNull;
2020-12-07 22:15:18 +01:00
import org.jetbrains.annotations.Nullable;
2022-08-15 23:07:17 +02:00
public class LLLocalLuceneIndex extends SimpleResource implements IBackuppable, LLLuceneIndex, LuceneCloseable {
2020-12-07 22:15:18 +01:00
protected static final Logger logger = LogManager.getLogger(LLLocalLuceneIndex.class);
2022-01-26 14:22:54 +01:00
2022-04-10 20:15:05 +02:00
private final ReentrantLock shutdownLock = new ReentrantLock();
/**
* Global lucene index scheduler.
* There is only a single thread globally to not overwhelm the disk with
2021-02-03 14:37:02 +01:00
* concurrent commits or concurrent refreshes.
*/
private static final ScheduledExecutorService luceneHeavyTasksScheduler = Executors.newScheduledThreadPool(4,
new LuceneThreadFactory("heavy-tasks").setDaemon(true).withGroup(new ThreadGroup("lucene-heavy-tasks"))
);
private static final ScheduledExecutorService luceneWriteScheduler = Executors.newScheduledThreadPool(8,
new LuceneThreadFactory("lucene-write").setDaemon(true).withGroup(new ThreadGroup("lucene-write"))
);
private static final ScheduledExecutorService bulkScheduler = luceneWriteScheduler;
2022-06-29 01:14:05 +02:00
private static final boolean ENABLE_SNAPSHOTS
= Boolean.parseBoolean(System.getProperty("it.cavallium.dbengine.lucene.snapshot.enable", "true"));
private static final boolean CACHE_SEARCHER_MANAGER
= Boolean.parseBoolean(System.getProperty("it.cavallium.dbengine.lucene.cachedsearchermanager.enable", "true"));
private static final LLSnapshot DUMMY_SNAPSHOT = new LLSnapshot(-1);
2022-01-26 14:22:54 +01:00
private final LocalSearcher localSearcher;
private final DecimalBucketMultiSearcher decimalBucketMultiSearcher = new DecimalBucketMultiSearcher();
2021-12-30 17:28:06 +01:00
private final Counter startedDocIndexings;
private final Counter endeddDocIndexings;
private final Timer docIndexingTime;
private final Timer snapshotTime;
private final Timer flushTime;
private final Timer commitTime;
private final Timer mergeTime;
private final Timer refreshTime;
2022-03-05 15:46:40 +01:00
private final String shardName;
2020-12-07 22:15:18 +01:00
private final IndexWriter indexWriter;
2021-09-06 15:06:51 +02:00
private final SnapshotsManager snapshotsManager;
2021-09-18 18:34:21 +02:00
private final IndexSearcherManager searcherManager;
private final PerFieldAnalyzerWrapper luceneAnalyzer;
private final Similarity luceneSimilarity;
2020-12-07 22:15:18 +01:00
private final Directory directory;
2022-08-15 23:07:17 +02:00
private final LuceneBackuppable backuppable;
2020-12-07 22:15:18 +01:00
private final boolean lowMemory;
2021-09-06 18:52:21 +02:00
private final Phaser activeTasks = new Phaser(1);
public LLLocalLuceneIndex(MeterRegistry meterRegistry,
2022-03-05 15:46:40 +01:00
@NotNull String clusterName,
int shardIndex,
2021-05-28 16:04:59 +02:00
IndicizerAnalyzers indicizerAnalyzers,
IndicizerSimilarities indicizerSimilarities,
LuceneOptions luceneOptions,
@Nullable LuceneHacks luceneHacks) {
2021-12-30 17:28:06 +01:00
2022-03-05 15:46:40 +01:00
if (clusterName.isBlank()) {
throw new DBException("Empty lucene database name");
2020-12-07 22:15:18 +01:00
}
2021-05-25 01:12:24 +02:00
if (!MMapDirectory.UNMAP_SUPPORTED) {
logger.error("Unmap is unsupported, lucene will run slower: {}", MMapDirectory.UNMAP_NOT_SUPPORTED_REASON);
} else {
logger.debug("Lucene MMap is supported");
}
2022-02-26 22:51:22 +01:00
this.lowMemory = luceneOptions.lowMemory();
2022-03-19 00:08:23 +01:00
this.shardName = LuceneUtils.getStandardName(clusterName, shardIndex);
try {
this.directory = LuceneUtils.createLuceneDirectory(luceneOptions.directoryOptions(), shardName);
} catch (IOException e) {
throw new DBException(e);
}
2022-03-11 17:59:46 +01:00
boolean isFilesystemCompressed = LuceneUtils.getIsFilesystemCompressed(luceneOptions.directoryOptions());
2021-06-06 02:23:51 +02:00
this.luceneAnalyzer = LuceneUtils.toPerFieldAnalyzerWrapper(indicizerAnalyzers);
this.luceneSimilarity = LuceneUtils.toPerFieldSimilarityWrapper(indicizerSimilarities);
2021-12-12 16:41:49 +01:00
2021-12-12 23:40:30 +01:00
var maxInMemoryResultEntries = luceneOptions.maxInMemoryResultEntries();
if (luceneHacks != null && luceneHacks.customLocalSearcher() != null) {
localSearcher = luceneHacks.customLocalSearcher().get();
} else {
localSearcher = new AdaptiveLocalSearcher(maxInMemoryResultEntries);
}
2021-06-06 02:23:51 +02:00
var indexWriterConfig = new IndexWriterConfig(luceneAnalyzer);
2022-06-28 13:52:21 +02:00
IndexDeletionPolicy deletionPolicy;
deletionPolicy = requireNonNull(indexWriterConfig.getIndexDeletionPolicy());
2022-06-29 01:14:05 +02:00
if (ENABLE_SNAPSHOTS) {
deletionPolicy = new SnapshotDeletionPolicy(deletionPolicy);
}
2022-06-28 13:52:21 +02:00
indexWriterConfig.setIndexDeletionPolicy(deletionPolicy);
2020-12-07 22:15:18 +01:00
indexWriterConfig.setCommitOnClose(true);
int writerSchedulerMaxThreadCount;
2021-07-06 14:33:47 +02:00
MergeScheduler mergeScheduler;
2020-12-07 22:15:18 +01:00
if (lowMemory) {
2021-07-06 14:33:47 +02:00
mergeScheduler = new SerialMergeScheduler();
writerSchedulerMaxThreadCount = 1;
2020-12-07 22:15:18 +01:00
} else {
//noinspection resource
ConcurrentMergeScheduler concurrentMergeScheduler = new LuceneConcurrentMergeScheduler();
2022-06-08 16:45:54 +02:00
// false means SSD, true means HDD
boolean spins = false;
concurrentMergeScheduler.setDefaultMaxMergesAndThreads(spins);
// It's true by default, but this makes sure it's true if it's a managed path
if (LuceneUtils.getManagedPath(luceneOptions.directoryOptions()).isPresent()) {
2021-07-23 15:20:33 +02:00
concurrentMergeScheduler.enableAutoIOThrottle();
}
writerSchedulerMaxThreadCount = concurrentMergeScheduler.getMaxThreadCount();
2021-07-06 14:33:47 +02:00
mergeScheduler = concurrentMergeScheduler;
2020-12-07 22:15:18 +01:00
}
2022-03-11 17:59:46 +01:00
if (isFilesystemCompressed) {
indexWriterConfig.setUseCompoundFile(false);
2022-04-30 21:56:42 +02:00
indexWriterConfig.setCodec(new Lucene91CodecWithNoFieldCompression());
2022-03-11 17:59:46 +01:00
}
logger.trace("WriterSchedulerMaxThreadCount: {}", writerSchedulerMaxThreadCount);
2021-07-06 14:33:47 +02:00
indexWriterConfig.setMergeScheduler(mergeScheduler);
2022-06-20 11:55:41 +02:00
indexWriterConfig.setMergePolicy(LuceneUtils.getMergePolicy(luceneOptions));
2022-03-09 02:29:38 +01:00
if (luceneOptions.indexWriterRAMBufferSizeMB().isPresent()) {
indexWriterConfig.setRAMBufferSizeMB(luceneOptions.indexWriterRAMBufferSizeMB().get());
}
if (luceneOptions.indexWriterMaxBufferedDocs().isPresent()) {
indexWriterConfig.setMaxBufferedDocs(luceneOptions.indexWriterMaxBufferedDocs().get());
}
if (luceneOptions.indexWriterReaderPooling().isPresent()) {
indexWriterConfig.setReaderPooling(luceneOptions.indexWriterReaderPooling().get());
}
indexWriterConfig.setSimilarity(getLuceneSimilarity());
try {
this.indexWriter = new IndexWriter(directory, indexWriterConfig);
} catch (IOException e) {
throw new DBException(e);
}
2022-06-29 01:14:05 +02:00
if (ENABLE_SNAPSHOTS) {
this.snapshotsManager = new SnapshotsManager(indexWriter, (SnapshotDeletionPolicy) deletionPolicy);
} else {
this.snapshotsManager = null;
}
SimpleIndexSearcherManager searcherManager;
if (CACHE_SEARCHER_MANAGER) {
searcherManager = new SimpleIndexSearcherManager(indexWriter,
snapshotsManager,
luceneHeavyTasksScheduler,
getLuceneSimilarity(),
luceneOptions.applyAllDeletes().orElse(true),
luceneOptions.writeAllDeletes().orElse(false),
luceneOptions.queryRefreshDebounceTime()
);
} else {
searcherManager = new SimpleIndexSearcherManager(indexWriter,
snapshotsManager,
luceneHeavyTasksScheduler,
getLuceneSimilarity(),
luceneOptions.applyAllDeletes().orElse(true),
luceneOptions.writeAllDeletes().orElse(false),
luceneOptions.queryRefreshDebounceTime());
}
2022-06-13 23:25:43 +02:00
this.searcherManager = searcherManager;
2022-03-05 15:46:40 +01:00
this.startedDocIndexings = meterRegistry.counter("index.write.doc.started.counter", "index.name", clusterName);
this.endeddDocIndexings = meterRegistry.counter("index.write.doc.ended.counter", "index.name", clusterName);
this.docIndexingTime = Timer.builder("index.write.doc.timer").publishPercentiles(0.2, 0.5, 0.95).publishPercentileHistogram().tag("index.name", clusterName).register(meterRegistry);
this.snapshotTime = Timer.builder("index.write.snapshot.timer").publishPercentiles(0.2, 0.5, 0.95).publishPercentileHistogram().tag("index.name", clusterName).register(meterRegistry);
this.flushTime = Timer.builder("index.write.flush.timer").publishPercentiles(0.2, 0.5, 0.95).publishPercentileHistogram().tag("index.name", clusterName).register(meterRegistry);
this.commitTime = Timer.builder("index.write.commit.timer").publishPercentiles(0.2, 0.5, 0.95).publishPercentileHistogram().tag("index.name", clusterName).register(meterRegistry);
this.mergeTime = Timer.builder("index.write.merge.timer").publishPercentiles(0.2, 0.5, 0.95).publishPercentileHistogram().tag("index.name", clusterName).register(meterRegistry);
this.refreshTime = Timer.builder("index.search.refresh.timer").publishPercentiles(0.2, 0.5, 0.95).publishPercentileHistogram().tag("index.name", clusterName).register(meterRegistry);
meterRegistry.gauge("index.snapshot.counter", List.of(Tag.of("index.name", clusterName)), this, LLLocalLuceneIndex::getSnapshotsCount);
meterRegistry.gauge("index.write.flushing.bytes", List.of(Tag.of("index.name", clusterName)), this, LLLocalLuceneIndex::getIndexWriterFlushingBytes);
meterRegistry.gauge("index.write.sequence.completed.max", List.of(Tag.of("index.name", clusterName)), this, LLLocalLuceneIndex::getIndexWriterMaxCompletedSequenceNumber);
meterRegistry.gauge("index.write.doc.pending.counter", List.of(Tag.of("index.name", clusterName)), this, LLLocalLuceneIndex::getIndexWriterPendingNumDocs);
meterRegistry.gauge("index.write.segment.merging.counter", List.of(Tag.of("index.name", clusterName)), this, LLLocalLuceneIndex::getIndexWriterMergingSegmentsSize);
meterRegistry.gauge("index.directory.deletion.pending.counter", List.of(Tag.of("index.name", clusterName)), this, LLLocalLuceneIndex::getDirectoryPendingDeletionsCount);
meterRegistry.gauge("index.doc.counter", List.of(Tag.of("index.name", clusterName)), this, LLLocalLuceneIndex::getDocCount);
meterRegistry.gauge("index.doc.max", List.of(Tag.of("index.name", clusterName)), this, LLLocalLuceneIndex::getMaxDoc);
2022-06-13 23:25:43 +02:00
meterRegistry.gauge("index.searcher.refreshes.active.count",
List.of(Tag.of("index.name", clusterName)),
searcherManager,
2022-06-29 01:14:05 +02:00
SimpleIndexSearcherManager::getActiveRefreshes
2022-06-13 23:25:43 +02:00
);
meterRegistry.gauge("index.searcher.searchers.active.count",
List.of(Tag.of("index.name", clusterName)),
searcherManager,
2022-06-29 01:14:05 +02:00
SimpleIndexSearcherManager::getActiveSearchers
2022-06-13 23:25:43 +02:00
);
2021-12-30 17:28:06 +01:00
// Start scheduled tasks
2021-09-06 18:52:21 +02:00
var commitMillis = luceneOptions.commitDebounceTime().toMillis();
luceneHeavyTasksScheduler.scheduleAtFixedRate(this::scheduledCommit, commitMillis, commitMillis,
2021-09-06 18:52:21 +02:00
TimeUnit.MILLISECONDS);
2022-08-15 23:07:17 +02:00
this.backuppable = new LuceneBackuppable();
}
private Similarity getLuceneSimilarity() {
return luceneSimilarity;
}
2020-12-07 22:15:18 +01:00
@Override
public String getLuceneIndexName() {
2022-03-05 15:46:40 +01:00
return shardName;
2020-12-07 22:15:18 +01:00
}
@Override
public LLSnapshot takeSnapshot() {
return runTask(() -> {
if (snapshotsManager == null) {
return DUMMY_SNAPSHOT;
}
try {
return snapshotTime.recordCallable(snapshotsManager::takeSnapshot);
} catch (Exception e) {
throw new DBException("Failed to take snapshot", e);
}
});
}
private <V> V runTask(Supplier<V> supplier) {
if (isClosed()) {
throw new IllegalStateException("Lucene index is closed");
} else {
activeTasks.register();
try {
return supplier.get();
} finally {
activeTasks.arriveAndDeregister();
}
}
}
2020-12-07 22:15:18 +01:00
@Override
public void releaseSnapshot(LLSnapshot snapshot) {
2022-06-29 01:14:05 +02:00
if (snapshotsManager == null) {
if (snapshot != null && !Objects.equals(snapshot, DUMMY_SNAPSHOT)) {
throw new IllegalStateException("Can't release snapshot " + snapshot);
2022-06-29 01:14:05 +02:00
}
return;
2022-06-29 01:14:05 +02:00
}
snapshotsManager.releaseSnapshot(snapshot);
2020-12-07 22:15:18 +01:00
}
@Override
public void addDocument(LLTerm key, LLUpdateDocument doc) {
runTask(() -> {
try {
docIndexingTime.recordCallable(() -> {
startedDocIndexings.increment();
try {
indexWriter.addDocument(toDocument(doc));
} finally {
endeddDocIndexings.increment();
}
return null;
});
} catch (Exception e) {
throw new DBException("Failed to add document", e);
}
2022-03-19 00:08:23 +01:00
logger.trace(MARKER_LUCENE, "Added document {}: {}", key, doc);
return null;
});
2020-12-07 22:15:18 +01:00
}
@Override
public long addDocuments(boolean atomic, Stream<Entry<LLTerm, LLUpdateDocument>> documents) {
return this.runTask(() -> {
if (!atomic) {
LongAdder count = new LongAdder();
documents.forEach(document -> {
count.increment();
LLUpdateDocument value = document.getValue();
startedDocIndexings.increment();
try {
docIndexingTime.recordCallable(() -> {
indexWriter.addDocument(toDocument(value));
return null;
});
} catch (Exception ex) {
throw new CompletionException("Failed to add document", ex);
} finally {
endeddDocIndexings.increment();
}
logger.trace(MARKER_LUCENE, "Added document: {}", document);
});
return count.sum();
} else {
var documentsList = collect(documents, fastListing());
assert documentsList != null;
var count = documentsList.size();
StopWatch stopWatch = StopWatch.createStarted();
try {
startedDocIndexings.increment(count);
try {
indexWriter.addDocuments(LLUtils.toDocumentsFromEntries(documentsList));
} catch (IOException e) {
throw new DBException(e);
} finally {
endeddDocIndexings.increment(count);
}
} finally {
docIndexingTime.record(stopWatch.getTime(TimeUnit.MILLISECONDS) / Math.max(count, 1),
TimeUnit.MILLISECONDS
);
}
return (long) documentsList.size();
}
});
2020-12-07 22:15:18 +01:00
}
2020-12-07 22:15:18 +01:00
@Override
public void deleteDocument(LLTerm id) {
this.runTask(() -> {
2021-12-30 17:28:06 +01:00
try {
return docIndexingTime.recordCallable(() -> {
startedDocIndexings.increment();
try {
indexWriter.deleteDocuments(LLUtils.toTerm(id));
} finally {
endeddDocIndexings.increment();
}
return null;
});
} catch (Exception e) {
throw new DBException("Failed to delete document", e);
}
});
}
@Override
public void update(LLTerm id, LLIndexRequest request) {
this.runTask(() -> {
try {
docIndexingTime.recordCallable(() -> {
startedDocIndexings.increment();
try {
if (request instanceof LLUpdateDocument updateDocument) {
indexWriter.updateDocument(LLUtils.toTerm(id), toDocument(updateDocument));
} else if (request instanceof LLSoftUpdateDocument softUpdateDocument) {
indexWriter.softUpdateDocument(LLUtils.toTerm(id),
toDocument(softUpdateDocument.items()),
toFields(softUpdateDocument.softDeleteItems())
);
} else if (request instanceof LLUpdateFields updateFields) {
indexWriter.updateDocValues(LLUtils.toTerm(id), toFields(updateFields.items()));
} else {
throw new UnsupportedOperationException("Unexpected request type: " + request);
}
} finally {
endeddDocIndexings.increment();
}
return null;
});
} catch (Exception e) {
throw new DBException("Failed to update document", e);
2021-12-30 17:28:06 +01:00
}
logger.trace(MARKER_LUCENE, "Updated document {}: {}", id, request);
return null;
});
2020-12-07 22:15:18 +01:00
}
@Override
public long updateDocuments(Stream<Entry<LLTerm, LLUpdateDocument>> documents) {
return runTask(() -> {
var count = new LongAdder();
documents.forEach(document -> {
count.increment();
LLTerm key = document.getKey();
LLUpdateDocument value = document.getValue();
2022-03-19 00:08:23 +01:00
startedDocIndexings.increment();
try {
docIndexingTime.recordCallable(() -> {
indexWriter.updateDocument(LLUtils.toTerm(key), toDocument(value));
return null;
});
logger.trace(MARKER_LUCENE, "Updated document {}: {}", key, value);
} catch (Exception ex) {
throw new CompletionException(ex);
2022-03-19 00:08:23 +01:00
} finally {
endeddDocIndexings.increment();
2022-02-25 15:46:32 +01:00
}
2022-03-19 00:08:23 +01:00
});
return count.sum();
});
}
2020-12-07 22:15:18 +01:00
@Override
public void deleteAll() {
this.runTask(() -> {
shutdownLock.lock();
try {
indexWriter.deleteAll();
indexWriter.forceMergeDeletes(true);
indexWriter.commit();
2022-06-23 00:14:36 +02:00
indexWriter.deleteUnusedFiles();
} catch (IOException e) {
throw new DBException(e);
} finally {
shutdownLock.unlock();
}
return null;
});
2020-12-07 22:15:18 +01:00
}
@Override
public Stream<LLSearchResultShard> moreLikeThis(@Nullable LLSnapshot snapshot,
2021-03-02 01:53:36 +01:00
QueryParams queryParams,
2022-02-26 03:28:20 +01:00
@Nullable String keyFieldName,
2022-01-28 19:31:25 +01:00
Multimap<String, String> mltDocumentFieldsFlux) {
2021-11-16 23:19:23 +01:00
LocalQueryParams localQueryParams = LuceneUtils.toLocalQueryParams(queryParams, luceneAnalyzer);
var searcher = this.searcherManager.retrieveSearcher(snapshot);
2022-01-28 19:31:25 +01:00
var transformer = new MoreLikeThisTransformer(mltDocumentFieldsFlux, luceneAnalyzer, luceneSimilarity);
2021-07-06 01:30:37 +02:00
var result = localSearcher.collect(searcher, localQueryParams, keyFieldName, transformer, Function.identity());
return Stream.of(new LLSearchResultShard(result.results(), result.totalHitsCount()));
2021-07-06 01:30:37 +02:00
}
2020-12-07 22:15:18 +01:00
@Override
public Stream<LLSearchResultShard> search(@Nullable LLSnapshot snapshot, QueryParams queryParams,
2022-02-26 03:28:20 +01:00
@Nullable String keyFieldName) {
var result = searchInternal(snapshot, queryParams, keyFieldName);
var shard = new LLSearchResultShard(result.results(), result.totalHitsCount());
return Stream.of(shard);
2022-07-02 11:44:13 +02:00
}
public LuceneSearchResult searchInternal(@Nullable LLSnapshot snapshot, QueryParams queryParams,
2022-07-02 11:44:13 +02:00
@Nullable String keyFieldName) {
2021-11-16 23:19:23 +01:00
LocalQueryParams localQueryParams = LuceneUtils.toLocalQueryParams(queryParams, luceneAnalyzer);
2023-02-22 23:31:05 +01:00
try (var searcher = searcherManager.retrieveSearcher(snapshot)) {
if (searcher != null) {
return localSearcher.collect(searcher, localQueryParams, keyFieldName, NO_REWRITE, Function.identity());
} else {
return LuceneSearchResult.EMPTY;
}
2023-02-22 23:31:05 +01:00
}
2022-07-02 11:44:13 +02:00
}
@Override
public TotalHitsCount count(@Nullable LLSnapshot snapshot, Query query, @Nullable Duration timeout) {
2022-07-02 11:44:13 +02:00
var params = LuceneUtils.getCountQueryParams(query);
var result = this.searchInternal(snapshot, params, null);
if (result != null) {
return result.totalHitsCount();
} else {
return EMPTY_COUNT;
}
2021-11-18 17:13:53 +01:00
}
@Override
public Buckets computeBuckets(@Nullable LLSnapshot snapshot,
2021-11-19 19:03:31 +01:00
@NotNull List<Query> queries,
@Nullable Query normalizationQuery,
2021-11-18 17:13:53 +01:00
BucketParams bucketParams) {
2021-11-19 19:03:31 +01:00
List<org.apache.lucene.search.Query> localQueries = new ArrayList<>(queries.size());
for (Query query : queries) {
localQueries.add(QueryParser.toQuery(query, luceneAnalyzer));
}
var localNormalizationQuery = QueryParser.toQuery(normalizationQuery, luceneAnalyzer);
try (LLIndexSearchers searchers = LLIndexSearchers.unsharded(searcherManager.retrieveSearcher(snapshot))) {
2021-11-18 17:13:53 +01:00
return decimalBucketMultiSearcher.collectMulti(searchers, bucketParams, localQueries, localNormalizationQuery);
}
}
public LLIndexSearcher retrieveSearcher(@Nullable LLSnapshot snapshot) {
2022-01-26 14:22:54 +01:00
return searcherManager.retrieveSearcher(snapshot);
2020-12-07 22:15:18 +01:00
}
@Override
2022-06-30 13:54:55 +02:00
protected void onClose() {
logger.debug("Waiting IndexWriter tasks...");
activeTasks.arriveAndAwaitAdvance();
logger.debug("IndexWriter tasks ended");
shutdownLock.lock();
try {
logger.debug("Closing searcher manager...");
searcherManager.close();
logger.debug("Searcher manager closed");
logger.debug("Closing IndexWriter...");
indexWriter.close();
directory.close();
logger.debug("IndexWriter closed");
} catch (IOException ex) {
throw new DBException(ex);
2022-06-30 13:54:55 +02:00
} finally {
shutdownLock.unlock();
}
2020-12-07 22:15:18 +01:00
}
2021-02-03 13:48:30 +01:00
@Override
public void flush() {
runTask(() -> {
if (activeTasks.isTerminated()) return null;
shutdownLock.lock();
try {
if (isClosed()) {
return null;
}
flushTime.recordCallable(() -> {
indexWriter.flush();
2021-02-03 13:48:30 +01:00
return null;
});
} catch (Exception e) {
throw new DBException("Failed to flush", e);
} finally {
shutdownLock.unlock();
}
return null;
});
2021-02-03 13:48:30 +01:00
}
@Override
public void waitForMerges() {
runTask(() -> {
if (activeTasks.isTerminated()) return null;
shutdownLock.lock();
try {
if (isClosed()) {
return null;
}
var mergeScheduler = indexWriter.getConfig().getMergeScheduler();
if (mergeScheduler instanceof ConcurrentMergeScheduler concurrentMergeScheduler) {
concurrentMergeScheduler.sync();
}
} finally {
shutdownLock.unlock();
}
return null;
});
}
@Override
public void waitForLastMerges() {
runTask(() -> {
if (activeTasks.isTerminated()) return null;
shutdownLock.lock();
try {
if (isClosed()) {
return null;
}
indexWriter.getConfig().setMergePolicy(NoMergePolicy.INSTANCE);
var mergeScheduler = indexWriter.getConfig().getMergeScheduler();
if (mergeScheduler instanceof ConcurrentMergeScheduler concurrentMergeScheduler) {
concurrentMergeScheduler.sync();
}
indexWriter.deleteUnusedFiles();
} catch (IOException e) {
throw new DBException(e);
} finally {
shutdownLock.unlock();
}
return null;
});
}
2021-02-03 13:48:30 +01:00
@Override
public void refresh(boolean force) {
runTask(() -> {
activeTasks.register();
try {
if (activeTasks.isTerminated()) return null;
shutdownLock.lock();
try {
if (isClosed()) {
return null;
2021-02-03 13:48:30 +01:00
}
refreshTime.recordCallable(() -> {
if (force) {
searcherManager.maybeRefreshBlocking();
} else {
searcherManager.maybeRefresh();
}
return null;
});
} catch (Exception e) {
throw new DBException("Failed to refresh", e);
} finally {
shutdownLock.unlock();
}
} finally {
activeTasks.arriveAndDeregister();
}
return null;
});
2021-02-03 13:48:30 +01:00
}
2022-02-28 00:40:17 +01:00
/**
* Internal method, do not use
*/
public void scheduledCommit() {
shutdownLock.lock();
2020-12-07 22:15:18 +01:00
try {
2022-06-30 13:54:55 +02:00
if (isClosed()) {
2022-02-28 00:40:17 +01:00
return;
}
2021-12-30 17:28:06 +01:00
commitTime.recordCallable(() -> {
indexWriter.commit();
2022-06-23 00:14:36 +02:00
indexWriter.deleteUnusedFiles();
2021-12-30 17:28:06 +01:00
return null;
});
} catch (Exception ex) {
2021-09-10 12:13:52 +02:00
logger.error(MARKER_LUCENE, "Failed to execute a scheduled commit", ex);
} finally {
shutdownLock.unlock();
2020-12-07 22:15:18 +01:00
}
}
2022-02-28 00:40:17 +01:00
/**
* Internal method, do not use
*/
public void scheduledMerge() { // Do not use. Merges are done automatically by merge policies
shutdownLock.lock();
2021-12-15 16:47:59 +01:00
try {
2022-06-30 13:54:55 +02:00
if (isClosed()) {
2022-02-28 00:40:17 +01:00
return;
}
2021-12-30 17:28:06 +01:00
mergeTime.recordCallable(() -> {
indexWriter.maybeMerge();
return null;
});
} catch (Exception ex) {
2021-12-15 16:47:59 +01:00
logger.error(MARKER_LUCENE, "Failed to execute a scheduled merge", ex);
} finally {
shutdownLock.unlock();
2021-12-15 16:47:59 +01:00
}
}
2020-12-07 22:15:18 +01:00
@Override
public boolean isLowMemoryMode() {
return lowMemory;
}
private double getSnapshotsCount() {
shutdownLock.lock();
try {
2022-06-30 13:54:55 +02:00
if (isClosed()) {
return 0d;
}
2022-06-29 01:14:05 +02:00
if (snapshotsManager == null) return 0d;
return snapshotsManager.getSnapshotsCount();
} finally {
shutdownLock.unlock();
}
}
private double getIndexWriterFlushingBytes() {
shutdownLock.lock();
try {
2022-06-30 13:54:55 +02:00
if (isClosed()) {
return 0d;
}
return indexWriter.getFlushingBytes();
} finally {
shutdownLock.unlock();
}
}
private double getIndexWriterMaxCompletedSequenceNumber() {
shutdownLock.lock();
try {
2022-06-30 13:54:55 +02:00
if (isClosed()) {
return 0d;
}
return indexWriter.getMaxCompletedSequenceNumber();
} finally {
shutdownLock.unlock();
}
}
private double getIndexWriterPendingNumDocs() {
shutdownLock.lock();
try {
2022-06-30 13:54:55 +02:00
if (isClosed()) {
return 0d;
}
return indexWriter.getPendingNumDocs();
} finally {
shutdownLock.unlock();
}
}
private double getIndexWriterMergingSegmentsSize() {
shutdownLock.lock();
try {
2022-06-30 13:54:55 +02:00
if (isClosed()) {
return 0d;
}
return indexWriter.getMergingSegments().size();
} finally {
shutdownLock.unlock();
}
}
private double getDirectoryPendingDeletionsCount() {
shutdownLock.lock();
try {
2022-06-30 13:54:55 +02:00
if (isClosed()) {
return 0d;
}
return indexWriter.getDirectory().getPendingDeletions().size();
} catch (IOException e) {
return 0d;
} finally {
shutdownLock.unlock();
}
}
private double getDocCount() {
shutdownLock.lock();
try {
2022-06-30 13:54:55 +02:00
if (isClosed()) {
return 0d;
}
var docStats = indexWriter.getDocStats();
if (docStats != null) {
return docStats.numDocs;
} else {
return 0d;
}
} finally {
shutdownLock.unlock();
}
}
private double getMaxDoc() {
shutdownLock.lock();
try {
2022-06-30 13:54:55 +02:00
if (isClosed()) {
return 0d;
}
var docStats = indexWriter.getDocStats();
if (docStats != null) {
return docStats.maxDoc;
} else {
return 0d;
}
} finally {
shutdownLock.unlock();
}
}
2022-03-19 00:08:23 +01:00
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
LLLocalLuceneIndex that = (LLLocalLuceneIndex) o;
return Objects.equals(shardName, that.shardName);
}
@Override
public int hashCode() {
return shardName.hashCode();
}
2022-08-15 23:07:17 +02:00
@Override
public void pauseForBackup() {
backuppable.pauseForBackup();
2022-08-15 23:07:17 +02:00
}
@Override
public void resumeAfterBackup() {
backuppable.resumeAfterBackup();
2022-08-15 23:07:17 +02:00
}
@Override
public boolean isPaused() {
return backuppable.isPaused();
}
private class LuceneBackuppable extends Backuppable {
private LLSnapshot snapshot;
@Override
protected void onPauseForBackup() {
var snapshot = LLLocalLuceneIndex.this.takeSnapshot();
if (snapshot == null) {
logger.error("Can't pause index \"{}\" because snapshots are not enabled!", shardName);
}
this.snapshot = snapshot;
2022-08-15 23:07:17 +02:00
}
@Override
protected void onResumeAfterBackup() {
2022-08-15 23:07:17 +02:00
if (snapshot == null) {
return;
2022-08-15 23:07:17 +02:00
}
LLLocalLuceneIndex.this.releaseSnapshot(snapshot);
2022-08-15 23:07:17 +02:00
}
}
2020-12-07 22:15:18 +01:00
}