diff --git a/pom.xml b/pom.xml index d1d01ef..4fcf1a7 100644 --- a/pom.xml +++ b/pom.xml @@ -13,8 +13,7 @@ 0-SNAPSHOT false 1.10.4 - 9.11.0 - 9.2.1 + 9.5.2 5.9.0 1.0.26 @@ -48,7 +47,12 @@ false - + + maven_central + Maven Central + https://repo.maven.apache.org/maven2/ + + mchv-release @@ -171,7 +175,7 @@ org.apache.logging.log4j log4j-slf4j2-impl - 2.22.1 + 2.23.1 test @@ -195,17 +199,17 @@ org.slf4j slf4j-api - 2.0.6 + 2.0.12 org.apache.logging.log4j log4j-api - 2.20.0 + 2.23.1 com.lmax disruptor - 3.4.4 + 4.0.0 test @@ -213,67 +217,6 @@ rocksdbjni ${rocksdb.version} - - org.apache.lucene - lucene-core - ${lucene.version} - - - org.apache.lucene - lucene-join - ${lucene.version} - - - org.apache.lucene - lucene-analysis-common - ${lucene.version} - - - org.apache.lucene - lucene-analysis-icu - ${lucene.version} - - - org.apache.lucene - lucene-codecs - ${lucene.version} - - - org.apache.lucene - lucene-backward-codecs - ${lucene.version} - - - org.apache.lucene - lucene-queries - ${lucene.version} - - - org.apache.lucene - lucene-queryparser - ${lucene.version} - - - org.apache.lucene - lucene-misc - ${lucene.version} - - - org.apache.lucene - lucene-facet - ${lucene.version} - - - org.apache.lucene - lucene-monitor - ${lucene.version} - - - org.apache.lucene - lucene-test-framework - ${lucene.version} - test - org.jetbrains annotations @@ -311,17 +254,6 @@ bcpkix-jdk15on 1.70 - - org.novasearch - lucene-relevance - 9.0.1.0.0-SNAPSHOT - - - org.apache.lucene - lucene-core - - - it.cavallium datagen diff --git a/src/example/java/it.cavallium.dbengine.client/IndicizationExample.java b/src/example/java/it.cavallium.dbengine.client/IndicizationExample.java deleted file mode 100644 index b9ba410..0000000 --- a/src/example/java/it.cavallium.dbengine.client/IndicizationExample.java +++ /dev/null @@ -1,191 +0,0 @@ -package it.cavallium.dbengine.client; - -import it.cavallium.dbengine.client.query.QueryUtils; -import it.cavallium.dbengine.client.query.current.data.QueryParams; -import it.cavallium.dbengine.client.query.current.data.ScoreMode; -import it.cavallium.dbengine.client.query.current.data.ScoreSort; -import it.cavallium.dbengine.database.LLDocument; -import it.cavallium.dbengine.database.LLItem; -import it.cavallium.dbengine.database.LLLuceneIndex; -import it.cavallium.dbengine.database.LLSignal; -import it.cavallium.dbengine.database.LLTerm; -import it.cavallium.dbengine.database.disk.LLLocalDatabaseConnection; -import it.cavallium.dbengine.lucene.LuceneUtils; -import it.cavallium.dbengine.lucene.analyzer.TextFieldsAnalyzer; -import it.cavallium.dbengine.lucene.analyzer.TextFieldsSimilarity; -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.time.Duration; -import java.util.Comparator; -import java.util.StringJoiner; -import java.util.concurrent.CompletionException; -import org.apache.lucene.document.Field.Store; -import reactor.core.publisher.Mono; -import reactor.core.scheduler.Schedulers; - -public class IndicizationExample { - - public static void main(String[] args) { - tempIndex(true) - .flatMap(index -> index - .addDocument(new LLTerm("id", "123"), - new LLDocument(new LLItem[]{ - LLItem.newStringField("id", "123", Store.YES), - LLItem.newTextField("name", "Mario", Store.NO), - LLItem.newStringField("surname", "Rossi", Store.NO) - }) - ) - .then(index.refresh()) - .then(index.search(null, - QueryParams - .builder() - .query(QueryUtils.exactSearch(TextFieldsAnalyzer.N4GramPartialString, "name", "Mario")) - .limit(1) - .sort(ScoreSort.of()) - .scoreMode(ScoreMode.of(false, true)) - .build(), - "id" - )) - .flatMap(results -> Mono.from(results - .results() - .flatMap(r -> r) - .doOnNext(signal -> { - if (signal.isValue()) { - System.out.println("Value: " + signal.getValue()); - } - }) - .filter(LLSignal::isTotalHitsCount)) - ) - .doOnNext(count -> System.out.println("Total hits: " + count)) - .doOnTerminate(() -> System.out.println("Completed")) - .then(index.close()) - ) - .subscribeOn(Schedulers.parallel()) - .block(); - tempIndex(true) - .flatMap(index -> - index - .addDocument(new LLTerm("id", "126"), - new LLDocument(new LLItem[]{ - LLItem.newStringField("id", "126", Store.YES), - LLItem.newTextField("name", "Marioxq", Store.NO), - LLItem.newStringField("surname", "Rossi", Store.NO) - }) - ) - .then(index - .addDocument(new LLTerm("id", "123"), - new LLDocument(new LLItem[]{ - LLItem.newStringField("id", "123", Store.YES), - LLItem.newTextField("name", "Mario", Store.NO), - LLItem.newStringField("surname", "Rossi", Store.NO) - }) - )) - .then(index - .addDocument(new LLTerm("id", "124"), - new LLDocument(new LLItem[]{ - LLItem.newStringField("id", "124", Store.YES), - LLItem.newTextField("name", "Mariossi", Store.NO), - LLItem.newStringField("surname", "Rossi", Store.NO) - }) - )) - .then(index - .addDocument(new LLTerm("id", "125"), - new LLDocument(new LLItem[]{ - LLItem.newStringField("id", "125", Store.YES), - LLItem.newTextField("name", "Mario marios", Store.NO), - LLItem.newStringField("surname", "Rossi", Store.NO) - }) - )) - .then(index - .addDocument(new LLTerm("id", "128"), - new LLDocument(new LLItem[]{ - LLItem.newStringField("id", "128", Store.YES), - LLItem.newTextField("name", "Marion", Store.NO), - LLItem.newStringField("surname", "Rossi", Store.NO) - }) - )) - .then(index - .addDocument(new LLTerm("id", "127"), - new LLDocument(new LLItem[]{ - LLItem.newStringField("id", "127", Store.YES), - LLItem.newTextField("name", "Mariotto", Store.NO), - LLItem.newStringField("surname", "Rossi", Store.NO) - }) - )) - .then(index.refresh()) - .then(index.search(null, - QueryParams - .builder() - .query(QueryUtils.exactSearch(TextFieldsAnalyzer.N4GramPartialString, "name", "Mario")) - .limit(10) - .sort(MultiSort.topScore().getQuerySort()) - .scoreMode(ScoreMode.of(false, true)) - .build(), - "id" - )) - .flatMap(results -> LuceneUtils.mergeSignalStreamRaw(results - .results(), MultiSort.topScoreRaw(), 10L) - .doOnNext(value -> System.out.println("Value: " + value)) - .then(Mono.from(results - .results() - .flatMap(part -> part) - .filter(LLSignal::isTotalHitsCount) - .map(LLSignal::getTotalHitsCount))) - ) - .doOnNext(count -> System.out.println("Total hits: " + count)) - .doOnTerminate(() -> System.out.println("Completed")) - .then(index.close()) - ) - .subscribeOn(Schedulers.parallel()) - .block(); - } - - public static final class CurrentCustomType { - - private final int number; - - public CurrentCustomType(int number) { - this.number = number; - } - - public int getNumber() { - return number; - } - - @Override - public String toString() { - return new StringJoiner(", ", CurrentCustomType.class.getSimpleName() + "[", "]") - .add("number=" + number) - .toString(); - } - } - - private static Mono tempIndex(boolean delete) { - var wrkspcPath = Path.of("/tmp/tempdb/"); - return Mono - .fromCallable(() -> { - if (delete && Files.exists(wrkspcPath)) { - Files.walk(wrkspcPath).sorted(Comparator.reverseOrder()).forEach(file -> { - try { - Files.delete(file); - } catch (IOException ex) { - throw new CompletionException(ex); - } - }); - } - Files.createDirectories(wrkspcPath); - return null; - }) - .subscribeOn(Schedulers.boundedElastic()) - .then(new LLLocalDatabaseConnection(wrkspcPath, true).connect()) - .flatMap(conn -> conn.getLuceneIndex("testindices", - 10, - TextFieldsAnalyzer.N4GramPartialString, - TextFieldsSimilarity.NGramBM25Plus, - Duration.ofSeconds(5), - Duration.ofSeconds(5), - false - )); - } -} diff --git a/src/main/data-generator/lucene-query.yaml b/src/main/data-generator/lucene-query.yaml index c9aac47..7186365 100644 --- a/src/main/data-generator/lucene-query.yaml +++ b/src/main/data-generator/lucene-query.yaml @@ -368,7 +368,7 @@ baseTypesData: DocSort: data: { } TotalHitsCount: - stringRepresenter: "it.cavallium.dbengine.lucene.LuceneUtils.toHumanReadableString" + stringRepresenter: "it.cavallium.dbengine.client.query.QueryUtil.toHumanReadableString" data: value: long exact: boolean diff --git a/src/main/data-generator/quic-rpc.yaml b/src/main/data-generator/quic-rpc.yaml index 4b27277..ca904ee 100644 --- a/src/main/data-generator/quic-rpc.yaml +++ b/src/main/data-generator/quic-rpc.yaml @@ -1,10 +1,6 @@ # A type that starts with "-" is an optional type, otherwise it can't be null currentVersion: "0.0.0" interfacesData: - StandardFSDirectoryOptions: - extendInterfaces: [PathDirectoryOptions] - PathDirectoryOptions: - extendInterfaces: [LuceneDirectoryOptions] ClientBoundRequest: extendInterfaces: [RPCEvent] ClientBoundResponse: @@ -21,7 +17,6 @@ superTypesData: SingletonUpdateOldData, GeneratedEntityId, GetDatabase, - GetLuceneIndex, Disconnect, GetSingleton, SingletonGet, @@ -29,19 +24,16 @@ superTypesData: SingletonUpdateInit, SingletonUpdateEnd, RPCCrash, - CloseDatabase, - CloseLuceneIndex + CloseDatabase ] ServerBoundRequest: [ GetDatabase, - GetLuceneIndex, Disconnect, GetSingleton, SingletonGet, SingletonSet, SingletonUpdateInit, - CloseDatabase, - CloseLuceneIndex + CloseDatabase ] ClientBoundResponse: [ Empty, @@ -57,25 +49,6 @@ superTypesData: Empty, SingletonUpdateEnd ] - LuceneDirectoryOptions: [ - ByteBuffersDirectory, - MemoryMappedFSDirectory, - NIOFSDirectory, - RAFFSDirectory, - DirectIOFSDirectory, - NRTCachingDirectory - ] - StandardFSDirectoryOptions: [ - MemoryMappedFSDirectory, - NIOFSDirectory, - RAFFSDirectory - ] - PathDirectoryOptions: [ - MemoryMappedFSDirectory, - NIOFSDirectory, - RAFFSDirectory, - StandardFSDirectoryOptions - ] Filter: [ NoFilter, BloomFilter @@ -87,12 +60,6 @@ customTypesData: Compression: javaClass: it.cavallium.dbengine.client.Compression serializer: it.cavallium.dbengine.database.remote.CompressionSerializer - TextFieldsAnalyzer: - javaClass: it.cavallium.dbengine.lucene.analyzer.TextFieldsAnalyzer - serializer: it.cavallium.dbengine.database.remote.TextFieldsAnalyzerSerializer - TextFieldsSimilarity: - javaClass: it.cavallium.dbengine.lucene.analyzer.TextFieldsSimilarity - serializer: it.cavallium.dbengine.database.remote.TextFieldsSimilaritySerializer Duration: javaClass: java.time.Duration serializer: it.cavallium.dbengine.database.remote.DurationSerializer @@ -102,9 +69,6 @@ customTypesData: ColumnFamilyHandle: javaClass: org.rocksdb.ColumnFamilyHandle serializer: it.cavallium.dbengine.database.remote.ColumnFamilyHandleSerializer - LuceneHacks: - javaClass: it.cavallium.dbengine.lucene.LuceneHacks - serializer: it.cavallium.dbengine.database.remote.LuceneHacksSerializer UpdateReturnMode: javaClass: it.cavallium.dbengine.database.UpdateReturnMode serializer: it.cavallium.dbengine.database.remote.UpdateReturnModeSerializer @@ -118,12 +82,6 @@ customTypesData: StringMap: javaClass: java.util.Map serializer: it.cavallium.dbengine.database.remote.StringMapSerializer - String2FieldAnalyzerMap: - javaClass: java.util.Map - serializer: it.cavallium.dbengine.database.remote.String2FieldAnalyzerMapSerializer - String2FieldSimilarityMap: - javaClass: java.util.Map - serializer: it.cavallium.dbengine.database.remote.String2FieldSimilarityMapSerializer String2ColumnFamilyHandleMap: javaClass: java.util.Map serializer: it.cavallium.dbengine.database.remote.String2ColumnFamilyHandleMapSerializer @@ -139,13 +97,6 @@ baseTypesData: name: String columns: Column[] databaseOptions: DatabaseOptions - GetLuceneIndex: - data: - clusterName: String - structure: LuceneIndexStructure - indicizerAnalyzers: IndicizerAnalyzers - indicizerSimilarities: IndicizerSimilarities - luceneOptions: LuceneOptions Disconnect: { data: { } } GetSingleton: data: @@ -172,9 +123,6 @@ baseTypesData: CloseDatabase: data: databaseId: long - CloseLuceneIndex: - data: - luceneIndexId: long # Client-bound responses @@ -198,17 +146,6 @@ baseTypesData: # Data - LuceneIndexStructure: - data: - totalShards: int - activeShards: int[] - SingleIndex: - data: - name: String - ClusteredShardIndex: - data: - clusterName: String - shard: int BinaryOptional: data: val: -Binary @@ -277,58 +214,6 @@ baseTypesData: data: maxDictBytes: int compression: Compression - IndicizerAnalyzers: - data: - defaultAnalyzer: TextFieldsAnalyzer - fieldAnalyzer: String2FieldAnalyzerMap - IndicizerSimilarities: - data: - defaultSimilarity: TextFieldsSimilarity - fieldSimilarity: String2FieldSimilarityMap - LuceneOptions: - data: - extraFlags: StringMap - queryRefreshDebounceTime: Duration - commitDebounceTime: Duration - lowMemory: boolean - directoryOptions: LuceneDirectoryOptions - indexWriterReaderPooling: -boolean - indexWriterRAMBufferSizeMB: -double - indexWriterMaxBufferedDocs: -int - applyAllDeletes: -boolean - writeAllDeletes: -boolean - maxInMemoryResultEntries: int - mergePolicy: TieredMergePolicy - TieredMergePolicy: - data: - forceMergeDeletesPctAllowed: -double - deletesPctAllowed: -double - maxMergeAtOnce: -int - maxMergedSegmentBytes: -long - floorSegmentBytes: -long - segmentsPerTier: -double - maxCFSSegmentSizeBytes: -long - noCFSRatio: -double - ByteBuffersDirectory: { data: { } } - MemoryMappedFSDirectory: - data: - managedPath: Path - NIOFSDirectory: - data: - managedPath: Path - RAFFSDirectory: - data: - managedPath: Path - DirectIOFSDirectory: - data: - delegate: StandardFSDirectoryOptions - mergeBufferSize: -int - minBytesDirect: -long - NRTCachingDirectory: - data: - delegate: LuceneDirectoryOptions - maxMergeSizeBytes: long - maxCachedBytes: long versions: 0.0.0: details: diff --git a/src/main/java/it/cavallium/dbengine/client/CompositeDatabasePartLocation.java b/src/main/java/it/cavallium/dbengine/client/CompositeDatabasePartLocation.java index c1f6b42..3bb64ef 100644 --- a/src/main/java/it/cavallium/dbengine/client/CompositeDatabasePartLocation.java +++ b/src/main/java/it/cavallium/dbengine/client/CompositeDatabasePartLocation.java @@ -18,8 +18,7 @@ public class CompositeDatabasePartLocation { } public enum CompositeDatabasePartType { - KV_DATABASE, - LUCENE_INDEX + KV_DATABASE } public CompositeDatabasePartType getPartType() { diff --git a/src/main/java/it/cavallium/dbengine/client/CompositeSnapshot.java b/src/main/java/it/cavallium/dbengine/client/CompositeSnapshot.java index f477989..0b98e6c 100644 --- a/src/main/java/it/cavallium/dbengine/client/CompositeSnapshot.java +++ b/src/main/java/it/cavallium/dbengine/client/CompositeSnapshot.java @@ -2,7 +2,6 @@ package it.cavallium.dbengine.client; import it.cavallium.dbengine.client.CompositeDatabasePartLocation.CompositeDatabasePartType; import it.cavallium.dbengine.database.LLKeyValueDatabaseStructure; -import it.cavallium.dbengine.database.LLLuceneIndex; import it.cavallium.dbengine.database.LLSnapshot; import java.util.Map; import java.util.Objects; @@ -20,12 +19,6 @@ public class CompositeSnapshot { )), () -> "No snapshot for database with name \"" + database.getDatabaseName() + "\""); } - public LLSnapshot getSnapshot(LLLuceneIndex luceneIndex) { - return Objects.requireNonNull(snapshots.get(CompositeDatabasePartLocation.of(CompositeDatabasePartType.LUCENE_INDEX, - luceneIndex.getLuceneIndexName() - )), () -> "No snapshot for lucene index with name \"" + luceneIndex.getLuceneIndexName() + "\""); - } - public Map getAllSnapshots() { return snapshots; } diff --git a/src/main/java/it/cavallium/dbengine/client/ConnectionSettings.java b/src/main/java/it/cavallium/dbengine/client/ConnectionSettings.java index 59e96a5..241f2c4 100644 --- a/src/main/java/it/cavallium/dbengine/client/ConnectionSettings.java +++ b/src/main/java/it/cavallium/dbengine/client/ConnectionSettings.java @@ -29,8 +29,6 @@ public sealed interface ConnectionSettings { sealed interface ConnectionPart { - record ConnectionPartLucene(@Nullable String name) implements ConnectionPart {} - record ConnectionPartRocksDB(@Nullable String name) implements ConnectionPart {} } } diff --git a/src/main/java/it/cavallium/dbengine/client/Hits.java b/src/main/java/it/cavallium/dbengine/client/Hits.java index aab76b1..5ef563f 100644 --- a/src/main/java/it/cavallium/dbengine/client/Hits.java +++ b/src/main/java/it/cavallium/dbengine/client/Hits.java @@ -6,7 +6,6 @@ import it.cavallium.dbengine.database.DiscardingCloseable; import it.cavallium.dbengine.database.LLUtils; import it.cavallium.dbengine.database.SafeCloseable; import it.cavallium.dbengine.database.collections.ValueGetter; -import it.cavallium.dbengine.lucene.LuceneCloseable; import it.cavallium.dbengine.utils.SimpleResource; import java.util.ArrayList; import java.util.List; diff --git a/src/main/java/it/cavallium/dbengine/client/Indicizer.java b/src/main/java/it/cavallium/dbengine/client/Indicizer.java deleted file mode 100644 index 9d6118e..0000000 --- a/src/main/java/it/cavallium/dbengine/client/Indicizer.java +++ /dev/null @@ -1,49 +0,0 @@ -package it.cavallium.dbengine.client; - -import com.google.common.collect.Multimap; -import com.google.common.collect.Multimaps; -import it.cavallium.dbengine.database.LLIndexRequest; -import it.cavallium.dbengine.database.LLSoftUpdateDocument; -import it.cavallium.dbengine.database.LLTerm; -import it.cavallium.dbengine.database.LLUpdateDocument; -import it.cavallium.dbengine.database.LLUpdateFields; -import it.cavallium.dbengine.rpc.current.data.IndicizerAnalyzers; -import it.cavallium.dbengine.rpc.current.data.IndicizerSimilarities; -import java.util.Map; -import org.apache.lucene.index.IndexableField; -import org.jetbrains.annotations.NotNull; - -public abstract class Indicizer { - - /** - * Transform a value to an IndexRequest. - */ - public abstract @NotNull LLIndexRequest toIndexRequest(@NotNull T key, @NotNull U value); - - public final @NotNull LLUpdateDocument toDocument(@NotNull T key, @NotNull U value) { - var req = toIndexRequest(key, value); - if (req instanceof LLUpdateFields updateFields) { - return new LLUpdateDocument(updateFields.items()); - } else if (req instanceof LLUpdateDocument updateDocument) { - return updateDocument; - } else if (req instanceof LLSoftUpdateDocument softUpdateDocument) { - return new LLUpdateDocument(softUpdateDocument.items()); - } else { - throw new UnsupportedOperationException("Unexpected request type: " + req); - } - } - - public abstract @NotNull LLTerm toIndex(@NotNull T key); - - public abstract @NotNull String getKeyFieldName(); - - public abstract @NotNull T getKey(IndexableField key); - - public abstract IndicizerAnalyzers getPerFieldAnalyzer(); - - public abstract IndicizerSimilarities getPerFieldSimilarity(); - - public Multimap getMoreLikeThisDocumentFields(T key, U value) { - return Multimaps.forMap(Map.of()); - } -} diff --git a/src/main/java/it/cavallium/dbengine/client/IndicizerAnalyzers.java b/src/main/java/it/cavallium/dbengine/client/IndicizerAnalyzers.java deleted file mode 100644 index 5b6a404..0000000 --- a/src/main/java/it/cavallium/dbengine/client/IndicizerAnalyzers.java +++ /dev/null @@ -1,19 +0,0 @@ -package it.cavallium.dbengine.client; - -import it.cavallium.dbengine.lucene.analyzer.TextFieldsAnalyzer; -import java.util.Map; - -public class IndicizerAnalyzers { - - public static it.cavallium.dbengine.rpc.current.data.IndicizerAnalyzers of() { - return of(TextFieldsAnalyzer.ICUCollationKey); - } - - public static it.cavallium.dbengine.rpc.current.data.IndicizerAnalyzers of(TextFieldsAnalyzer defaultAnalyzer) { - return of(defaultAnalyzer, Map.of()); - } - - public static it.cavallium.dbengine.rpc.current.data.IndicizerAnalyzers of(TextFieldsAnalyzer defaultAnalyzer, Map fieldAnalyzer) { - return new it.cavallium.dbengine.rpc.current.data.IndicizerAnalyzers(defaultAnalyzer, fieldAnalyzer); - } -} diff --git a/src/main/java/it/cavallium/dbengine/client/IndicizerSimilarities.java b/src/main/java/it/cavallium/dbengine/client/IndicizerSimilarities.java deleted file mode 100644 index 11be179..0000000 --- a/src/main/java/it/cavallium/dbengine/client/IndicizerSimilarities.java +++ /dev/null @@ -1,20 +0,0 @@ -package it.cavallium.dbengine.client; - -import it.cavallium.dbengine.lucene.analyzer.TextFieldsSimilarity; -import java.util.Map; - -public class IndicizerSimilarities { - - public static it.cavallium.dbengine.rpc.current.data.IndicizerSimilarities of() { - return of(TextFieldsSimilarity.BM25Standard); - } - - public static it.cavallium.dbengine.rpc.current.data.IndicizerSimilarities of(TextFieldsSimilarity defaultSimilarity) { - return of(defaultSimilarity, Map.of()); - } - - public static it.cavallium.dbengine.rpc.current.data.IndicizerSimilarities of(TextFieldsSimilarity defaultSimilarity, - Map fieldSimilarity) { - return it.cavallium.dbengine.rpc.current.data.IndicizerSimilarities.of(defaultSimilarity, fieldSimilarity); - } -} diff --git a/src/main/java/it/cavallium/dbengine/client/LuceneIndex.java b/src/main/java/it/cavallium/dbengine/client/LuceneIndex.java deleted file mode 100644 index 2387bc0..0000000 --- a/src/main/java/it/cavallium/dbengine/client/LuceneIndex.java +++ /dev/null @@ -1,71 +0,0 @@ -package it.cavallium.dbengine.client; - -import it.cavallium.dbengine.client.query.ClientQueryParams; -import it.cavallium.dbengine.client.query.current.data.Query; -import it.cavallium.dbengine.client.query.current.data.TotalHitsCount; -import it.cavallium.dbengine.database.Delta; -import it.cavallium.dbengine.database.LLSnapshottable; -import it.cavallium.dbengine.lucene.collector.Buckets; -import it.cavallium.dbengine.lucene.searcher.BucketParams; -import java.util.List; -import java.util.Map.Entry; -import java.util.stream.Stream; -import org.jetbrains.annotations.NotNull; -import org.jetbrains.annotations.Nullable; - -public interface LuceneIndex extends LLSnapshottable, AutoCloseable { - - void addDocument(T key, U value); - - long addDocuments(boolean atomic, Stream> entries); - - void deleteDocument(T key); - - void updateDocument(T key, @NotNull U value); - - long updateDocuments(Stream> entries); - - default void updateOrDeleteDocument(T key, @Nullable U value) { - if (value == null) { - deleteDocument(key); - } else { - updateDocument(key, value); - } - } - - default void updateOrDeleteDocumentIfModified(T key, @NotNull Delta delta) { - updateOrDeleteDocumentIfModified(key, delta.current(), delta.isModified()); - } - - default void updateOrDeleteDocumentIfModified(T key, @Nullable U currentValue, boolean modified) { - if (modified) { - updateOrDeleteDocument(key, currentValue); - } - } - - void deleteAll(); - - Hits> moreLikeThis(ClientQueryParams queryParams, T key, - U mltDocumentValue); - - Hits> search(ClientQueryParams queryParams); - - Buckets computeBuckets(@Nullable CompositeSnapshot snapshot, - @NotNull List queries, - @Nullable Query normalizationQuery, - BucketParams bucketParams); - - TotalHitsCount count(@Nullable CompositeSnapshot snapshot, Query query); - - boolean isLowMemoryMode(); - - void close(); - - void flush(); - - void waitForMerges(); - - void waitForLastMerges(); - - void refresh(boolean force); -} diff --git a/src/main/java/it/cavallium/dbengine/client/LuceneIndexImpl.java b/src/main/java/it/cavallium/dbengine/client/LuceneIndexImpl.java deleted file mode 100644 index 53c68dc..0000000 --- a/src/main/java/it/cavallium/dbengine/client/LuceneIndexImpl.java +++ /dev/null @@ -1,215 +0,0 @@ -package it.cavallium.dbengine.client; - -import static it.cavallium.dbengine.utils.StreamUtils.LUCENE_POOL; -import static it.cavallium.dbengine.utils.StreamUtils.collectOn; -import static java.util.stream.Collectors.collectingAndThen; -import static java.util.stream.Collectors.toList; - -import it.cavallium.dbengine.client.query.ClientQueryParams; -import it.cavallium.dbengine.client.query.current.data.Query; -import it.cavallium.dbengine.client.query.current.data.TotalHitsCount; -import it.cavallium.dbengine.database.LLKeyScore; -import it.cavallium.dbengine.database.LLLuceneIndex; -import it.cavallium.dbengine.database.LLSearchResultShard; -import it.cavallium.dbengine.database.LLSnapshot; -import it.cavallium.dbengine.database.LLTerm; -import it.cavallium.dbengine.database.LLUtils; -import it.cavallium.dbengine.lucene.LuceneUtils; -import it.cavallium.dbengine.lucene.collector.Buckets; -import it.cavallium.dbengine.lucene.searcher.BucketParams; -import it.cavallium.dbengine.utils.StreamUtils; -import it.unimi.dsi.fastutil.objects.ObjectArrayList; -import java.time.Duration; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Objects; -import java.util.function.Function; -import java.util.stream.Stream; -import org.jetbrains.annotations.NotNull; -import org.jetbrains.annotations.Nullable; - -public class LuceneIndexImpl implements LuceneIndex { - - private static final Duration MAX_COUNT_TIME = Duration.ofSeconds(30); - private final LLLuceneIndex luceneIndex; - private final Indicizer indicizer; - - public LuceneIndexImpl(LLLuceneIndex luceneIndex, Indicizer indicizer) { - this.luceneIndex = luceneIndex; - this.indicizer = indicizer; - } - - private LLSnapshot resolveSnapshot(CompositeSnapshot snapshot) { - if (snapshot == null) { - return null; - } else { - return snapshot.getSnapshot(luceneIndex); - } - } - - @Override - public void addDocument(T key, U value) { - luceneIndex.addDocument(indicizer.toIndex(key), indicizer.toDocument(key, value)); - } - - @Override - public long addDocuments(boolean atomic, Stream> entries) { - return luceneIndex.addDocuments(atomic, entries.map(entry -> - Map.entry(indicizer.toIndex(entry.getKey()), indicizer.toDocument(entry.getKey(), entry.getValue())))); - } - - @Override - public void deleteDocument(T key) { - LLTerm id = indicizer.toIndex(key); - luceneIndex.deleteDocument(id); - } - - @Override - public void updateDocument(T key, @NotNull U value) { - luceneIndex.update(indicizer.toIndex(key), indicizer.toIndexRequest(key, value)); - } - - @Override - public long updateDocuments(Stream> entries) { - return luceneIndex.updateDocuments(entries.map(entry -> - Map.entry(indicizer.toIndex(entry.getKey()), indicizer.toDocument(entry.getKey(), entry.getValue())))); - } - - @Override - public void deleteAll() { - luceneIndex.deleteAll(); - } - - @Override - public Hits> moreLikeThis(ClientQueryParams queryParams, - T key, - U mltDocumentValue) { - var mltDocumentFields - = indicizer.getMoreLikeThisDocumentFields(key, mltDocumentValue); - - return collectOn(LUCENE_POOL, luceneIndex.moreLikeThis(resolveSnapshot(queryParams.snapshot()), - queryParams.toQueryParams(), - indicizer.getKeyFieldName(), - mltDocumentFields), - collectingAndThen(toList(), toHitsCollector(queryParams))); - } - - @Override - public Hits> search(ClientQueryParams queryParams) { - return collectOn(LUCENE_POOL, luceneIndex.search(resolveSnapshot(queryParams.snapshot()), - queryParams.toQueryParams(), - indicizer.getKeyFieldName()), - collectingAndThen(toList(), toHitsCollector(queryParams))); - } - - @Override - public Buckets computeBuckets(@Nullable CompositeSnapshot snapshot, - @NotNull List query, - @Nullable Query normalizationQuery, - BucketParams bucketParams) { - return luceneIndex.computeBuckets(resolveSnapshot(snapshot), query, normalizationQuery, bucketParams); - } - - private Hits> mapResults(LLSearchResultShard llSearchResult) { - List> scoresWithKeys = LLUtils.mapList(llSearchResult.results(), - hit -> new HitKey<>(indicizer.getKey(hit.key()), hit.score()) - ); - return new Hits<>(scoresWithKeys, llSearchResult.totalHitsCount()); - } - - @Override - public TotalHitsCount count(@Nullable CompositeSnapshot snapshot, Query query) { - return luceneIndex.count(resolveSnapshot(snapshot), query, MAX_COUNT_TIME); - } - - @Override - public boolean isLowMemoryMode() { - return luceneIndex.isLowMemoryMode(); - } - - @Override - public void close() { - luceneIndex.close(); - } - - /** - * Flush writes to disk - */ - @Override - public void flush() { - luceneIndex.flush(); - } - - @Override - public void waitForMerges() { - luceneIndex.waitForMerges(); - } - - @Override - public void waitForLastMerges() { - luceneIndex.waitForLastMerges(); - } - - /** - * Refresh index searcher - */ - @Override - public void refresh(boolean force) { - luceneIndex.refresh(force); - } - - @Override - public LLSnapshot takeSnapshot() { - return luceneIndex.takeSnapshot(); - } - - @Override - public void releaseSnapshot(LLSnapshot snapshot) { - luceneIndex.releaseSnapshot(snapshot); - } - - private Function, Hits>> toHitsCollector(ClientQueryParams queryParams) { - return (List results) -> resultsToHits(mergeResults(queryParams, results)); - } - - private Hits> resultsToHits(LLSearchResultShard resultShard) { - if (resultShard != null) { - return mapResults(resultShard); - } else { - return Hits.empty(); - } - } - - @SuppressWarnings({"unchecked", "rawtypes"}) - @Nullable - private static LLSearchResultShard mergeResults(ClientQueryParams queryParams, List shards) { - if (shards.size() == 0) { - return null; - } else if (shards.size() == 1) { - return shards.get(0); - } - TotalHitsCount count = null; - ObjectArrayList> results = new ObjectArrayList<>(shards.size()); - var maxLimit = queryParams.offset() + queryParams.limit(); - for (LLSearchResultShard shard : shards) { - if (count == null) { - count = shard.totalHitsCount(); - } else { - count = LuceneUtils.sum(count, shard.totalHitsCount()); - } - results.add(shard.results().stream().limit(maxLimit)); - } - Objects.requireNonNull(count); - Stream resultsFlux; - if (results.size() == 0) { - resultsFlux = Stream.empty(); - } else if (results.size() == 1) { - resultsFlux = results.get(0); - } else { - resultsFlux = results.stream().flatMap(Function.identity()).limit(maxLimit); - } - return new LLSearchResultShard(StreamUtils.toList(resultsFlux), count); - } - -} diff --git a/src/main/java/it/cavallium/dbengine/client/query/ClientQueryParams.java b/src/main/java/it/cavallium/dbengine/client/query/ClientQueryParams.java deleted file mode 100644 index 0e04007..0000000 --- a/src/main/java/it/cavallium/dbengine/client/query/ClientQueryParams.java +++ /dev/null @@ -1,50 +0,0 @@ -package it.cavallium.dbengine.client.query; - -import io.soabase.recordbuilder.core.RecordBuilder; -import it.cavallium.dbengine.client.CompositeSnapshot; -import it.cavallium.dbengine.client.Sort; -import it.cavallium.dbengine.client.query.current.data.NoSort; -import it.cavallium.dbengine.client.query.current.data.Query; -import it.cavallium.dbengine.client.query.current.data.QueryParams; -import it.cavallium.dbengine.client.query.current.data.QueryParamsBuilder; -import java.time.Duration; -import org.jetbrains.annotations.NotNull; -import org.jetbrains.annotations.Nullable; - -@RecordBuilder -public record ClientQueryParams(@Nullable CompositeSnapshot snapshot, - @NotNull Query query, - long offset, - long limit, - @Nullable Sort sort, - boolean computePreciseHitsCount, - @NotNull Duration timeout) { - - public static ClientQueryParamsBuilder builder() { - return ClientQueryParamsBuilder - .builder() - .snapshot(null) - .offset(0) - .limit(Long.MAX_VALUE) - .sort(null) - // Default timeout: 4 minutes - .timeout(Duration.ofMinutes(4)) - .computePreciseHitsCount(true); - } - - public boolean isSorted() { - return sort != null && sort.isSorted(); - } - - public QueryParams toQueryParams() { - return QueryParamsBuilder - .builder() - .query(query()) - .sort(sort != null ? sort.querySort() : new NoSort()) - .offset(offset()) - .limit(limit()) - .computePreciseHitsCount(computePreciseHitsCount()) - .timeoutMilliseconds(timeout.toMillis()) - .build(); - } -} diff --git a/src/main/java/it/cavallium/dbengine/client/query/NoOpAnalyzer.java b/src/main/java/it/cavallium/dbengine/client/query/NoOpAnalyzer.java deleted file mode 100644 index 7b65e73..0000000 --- a/src/main/java/it/cavallium/dbengine/client/query/NoOpAnalyzer.java +++ /dev/null @@ -1,17 +0,0 @@ -package it.cavallium.dbengine.client.query; - -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.core.KeywordTokenizer; - -public class NoOpAnalyzer extends Analyzer { - - public static final Analyzer INSTANCE = new NoOpAnalyzer(); - - public NoOpAnalyzer() { - } - - @Override - protected TokenStreamComponents createComponents(String fieldName) { - return new TokenStreamComponents(new KeywordTokenizer()); - } -} diff --git a/src/main/java/it/cavallium/dbengine/client/query/QueryMoshi.java b/src/main/java/it/cavallium/dbengine/client/query/QueryMoshi.java deleted file mode 100644 index f2e79d5..0000000 --- a/src/main/java/it/cavallium/dbengine/client/query/QueryMoshi.java +++ /dev/null @@ -1,91 +0,0 @@ -package it.cavallium.dbengine.client.query; - -import com.squareup.moshi.JsonAdapter; -import it.cavallium.buffer.Buf; -import it.cavallium.dbengine.client.IntOpenHashSetJsonAdapter; -import it.cavallium.dbengine.client.query.current.CurrentVersion; -import it.cavallium.dbengine.client.query.current.IBaseType; -import it.cavallium.dbengine.client.query.current.IType; -import it.cavallium.dbengine.utils.BooleanListJsonAdapter; -import it.cavallium.dbengine.utils.BufJsonAdapter; -import it.cavallium.dbengine.utils.ByteListJsonAdapter; -import it.cavallium.dbengine.utils.CharListJsonAdapter; -import it.cavallium.dbengine.utils.IntListJsonAdapter; -import it.cavallium.dbengine.utils.LongListJsonAdapter; -import it.cavallium.dbengine.utils.MoshiPolymorphic; -import it.cavallium.dbengine.utils.ShortListJsonAdapter; -import it.unimi.dsi.fastutil.booleans.BooleanList; -import it.unimi.dsi.fastutil.bytes.ByteList; -import it.unimi.dsi.fastutil.chars.CharList; -import it.unimi.dsi.fastutil.ints.IntList; -import it.unimi.dsi.fastutil.ints.IntOpenHashSet; -import it.unimi.dsi.fastutil.longs.LongList; -import it.unimi.dsi.fastutil.objects.Object2ObjectMap; -import it.unimi.dsi.fastutil.objects.Object2ObjectMaps; -import it.unimi.dsi.fastutil.objects.Object2ObjectOpenHashMap; -import it.unimi.dsi.fastutil.shorts.ShortList; -import java.util.HashSet; -import java.util.Map; -import java.util.Set; - -public class QueryMoshi extends MoshiPolymorphic { - - private final Set> abstractClasses; - private final Set> concreteClasses; - private final Map, JsonAdapter> extraAdapters; - - @SuppressWarnings({"unchecked", "RedundantCast", "rawtypes"}) - public QueryMoshi() { - super(true, GetterStyle.RECORDS_GETTERS); - HashSet> abstractClasses = new HashSet<>(); - HashSet> concreteClasses = new HashSet<>(); - - // Add all super types with their implementations - for (var superTypeClass : CurrentVersion.getSuperTypeClasses()) { - for (Class superTypeSubtypesClass : CurrentVersion.getSuperTypeSubtypesClasses( - superTypeClass)) { - concreteClasses.add((Class) (Class) superTypeSubtypesClass); - } - abstractClasses.add((Class) (Class) superTypeClass); - } - - // Add IBaseType with all basic types - abstractClasses.add((Class) (Class) IBaseType.class); - for (BaseType BaseType : BaseType.values()) { - concreteClasses.add((Class) (Class) CurrentVersion.getClass(BaseType)); - } - - this.abstractClasses = abstractClasses; - this.concreteClasses = concreteClasses; - Object2ObjectMap, JsonAdapter> extraAdapters = new Object2ObjectOpenHashMap<>(); - extraAdapters.put(BooleanList.class, new BooleanListJsonAdapter()); - extraAdapters.put(ByteList.class, new ByteListJsonAdapter()); - extraAdapters.put(Buf.class, new BufJsonAdapter()); - extraAdapters.put(ShortList.class, new ShortListJsonAdapter()); - extraAdapters.put(CharList.class, new CharListJsonAdapter()); - extraAdapters.put(IntList.class, new IntListJsonAdapter()); - extraAdapters.put(LongList.class, new LongListJsonAdapter()); - extraAdapters.put(IntOpenHashSet.class, new IntOpenHashSetJsonAdapter()); - this.extraAdapters = Object2ObjectMaps.unmodifiable(extraAdapters); - } - - @Override - public Map, JsonAdapter> getExtraAdapters() { - return extraAdapters; - } - - @Override - protected Set> getAbstractClasses() { - return abstractClasses; - } - - @Override - protected Set> getConcreteClasses() { - return concreteClasses; - } - - @Override - protected boolean shouldIgnoreField(String fieldName) { - return fieldName.contains("$"); - } -} \ No newline at end of file diff --git a/src/main/java/it/cavallium/dbengine/client/query/QueryParser.java b/src/main/java/it/cavallium/dbengine/client/query/QueryParser.java index c7e490f..c48175b 100644 --- a/src/main/java/it/cavallium/dbengine/client/query/QueryParser.java +++ b/src/main/java/it/cavallium/dbengine/client/query/QueryParser.java @@ -1,10 +1,7 @@ package it.cavallium.dbengine.client.query; import com.google.common.xml.XmlEscapers; -import com.ibm.icu.text.BreakIterator; -import com.ibm.icu.util.ULocale; import it.cavallium.dbengine.client.query.current.data.BooleanQuery; -import it.cavallium.dbengine.client.query.current.data.BooleanQueryBuilder; import it.cavallium.dbengine.client.query.current.data.BooleanQueryPart; import it.cavallium.dbengine.client.query.current.data.BoostQuery; import it.cavallium.dbengine.client.query.current.data.BoxedQuery; @@ -24,7 +21,6 @@ import it.cavallium.dbengine.client.query.current.data.FloatPointExactQuery; import it.cavallium.dbengine.client.query.current.data.FloatPointRangeQuery; import it.cavallium.dbengine.client.query.current.data.FloatPointSetQuery; import it.cavallium.dbengine.client.query.current.data.FloatTermQuery; -import it.cavallium.dbengine.client.query.current.data.IntNDPointExactQuery; import it.cavallium.dbengine.client.query.current.data.IntNDPointRangeQuery; import it.cavallium.dbengine.client.query.current.data.IntNDTermQuery; import it.cavallium.dbengine.client.query.current.data.IntPointExactQuery; @@ -38,62 +34,18 @@ import it.cavallium.dbengine.client.query.current.data.LongPointExactQuery; import it.cavallium.dbengine.client.query.current.data.LongPointRangeQuery; import it.cavallium.dbengine.client.query.current.data.LongPointSetQuery; import it.cavallium.dbengine.client.query.current.data.LongTermQuery; -import it.cavallium.dbengine.client.query.current.data.NumericSort; -import it.cavallium.dbengine.client.query.current.data.OccurMust; -import it.cavallium.dbengine.client.query.current.data.OccurMustNot; import it.cavallium.dbengine.client.query.current.data.OccurShould; import it.cavallium.dbengine.client.query.current.data.PhraseQuery; -import it.cavallium.dbengine.client.query.current.data.PointConfig; -import it.cavallium.dbengine.client.query.current.data.PointType; import it.cavallium.dbengine.client.query.current.data.SolrTextQuery; import it.cavallium.dbengine.client.query.current.data.SortedDocFieldExistsQuery; -import it.cavallium.dbengine.client.query.current.data.SortedNumericDocValuesFieldSlowRangeQuery; import it.cavallium.dbengine.client.query.current.data.SynonymQuery; -import it.cavallium.dbengine.client.query.current.data.TermAndBoost; import it.cavallium.dbengine.client.query.current.data.TermPosition; import it.cavallium.dbengine.client.query.current.data.TermQuery; import it.cavallium.dbengine.client.query.current.data.WildcardQuery; -import it.cavallium.dbengine.lucene.RandomSortField; -import java.io.ByteArrayInputStream; -import java.nio.charset.StandardCharsets; -import java.text.DecimalFormat; -import java.text.NumberFormat; -import java.util.ArrayList; +import java.text.BreakIterator; import java.util.Comparator; -import java.util.List; import java.util.Locale; -import java.util.Map; -import java.util.function.Function; -import java.util.stream.Collectors; import org.apache.commons.lang3.StringUtils; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.icu.segmentation.DefaultICUTokenizerConfig; -import org.apache.lucene.analysis.icu.segmentation.ICUTokenizer; -import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper; -import org.apache.lucene.analysis.standard.StandardAnalyzer; -import org.apache.lucene.document.DoublePoint; -import org.apache.lucene.document.FloatPoint; -import org.apache.lucene.document.IntPoint; -import org.apache.lucene.document.LongPoint; -import org.apache.lucene.document.SortedNumericDocValuesField; -import org.apache.lucene.index.Term; -import org.apache.lucene.queryparser.flexible.core.QueryNodeException; -import org.apache.lucene.queryparser.flexible.standard.StandardQueryParser; -import org.apache.lucene.queryparser.flexible.standard.config.PointsConfig; -import org.apache.lucene.queryparser.xml.CoreParser; -import org.apache.lucene.queryparser.xml.ParserException; -import org.apache.lucene.queryparser.xml.builders.UserInputQueryBuilder; -import org.apache.lucene.search.BooleanClause.Occur; -import org.apache.lucene.search.BooleanQuery.Builder; -import org.apache.lucene.search.DocValuesFieldExistsQuery; -import org.apache.lucene.search.FuzzyQuery; -import org.apache.lucene.search.MatchAllDocsQuery; -import org.apache.lucene.search.MatchNoDocsQuery; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.Sort; -import org.apache.lucene.search.SortField; -import org.apache.lucene.search.SortField.Type; -import org.apache.lucene.search.SortedNumericSortField; import org.jetbrains.annotations.Nullable; public class QueryParser { @@ -101,281 +53,6 @@ public class QueryParser { private static final String[] QUERY_STRING_FIND = {"\\", "\""}; private static final String[] QUERY_STRING_REPLACE = {"\\\\", "\\\""}; - public static Query toQuery(it.cavallium.dbengine.client.query.current.data.Query query, Analyzer analyzer) { - if (query == null) { - return null; - } - switch (query.getBaseType$()) { - case StandardQuery -> { - var standardQuery = (it.cavallium.dbengine.client.query.current.data.StandardQuery) query; - - // Fix the analyzer - Map customAnalyzers = standardQuery - .termFields() - .stream() - .collect(Collectors.toMap(Function.identity(), term -> new NoOpAnalyzer())); - analyzer = new PerFieldAnalyzerWrapper(analyzer, customAnalyzers); - var standardQueryParser = new StandardQueryParser(analyzer); - standardQueryParser.setPointsConfigMap(standardQuery.pointsConfig().stream().collect( - Collectors.toMap(PointConfig::field, pointConfig -> - new PointsConfig(toNumberFormat(pointConfig.data().numberFormat()), toType(pointConfig.data().type())) - )) - ); - var defaultFields = standardQuery.defaultFields(); - try { - Query parsed; - if (defaultFields.size() > 1) { - standardQueryParser.setMultiFields(defaultFields.toArray(String[]::new)); - parsed = standardQueryParser.parse(standardQuery.query(), null); - } else if (defaultFields.size() == 1) { - parsed = standardQueryParser.parse(standardQuery.query(), defaultFields.get(0)); - } else { - throw new IllegalStateException("Can't parse a standard query expression that has 0 default fields"); - } - return parsed; - } catch (QueryNodeException e) { - throw new IllegalStateException("Can't parse query expression \"" + standardQuery.query() + "\"", e); - } - } - case BooleanQuery -> { - var booleanQuery = (it.cavallium.dbengine.client.query.current.data.BooleanQuery) query; - var bq = new Builder(); - for (BooleanQueryPart part : booleanQuery.parts()) { - Occur occur = switch (part.occur().getBaseType$()) { - case OccurFilter -> Occur.FILTER; - case OccurMust -> Occur.MUST; - case OccurShould -> Occur.SHOULD; - case OccurMustNot -> Occur.MUST_NOT; - default -> throw new IllegalStateException("Unexpected value: " + part.occur().getBaseType$()); - }; - bq.add(toQuery(part.query(), analyzer), occur); - } - bq.setMinimumNumberShouldMatch(booleanQuery.minShouldMatch()); - return bq.build(); - } - case IntPointExactQuery -> { - var intPointExactQuery = (IntPointExactQuery) query; - return IntPoint.newExactQuery(intPointExactQuery.field(), intPointExactQuery.value()); - } - case IntNDPointExactQuery -> { - var intndPointExactQuery = (IntNDPointExactQuery) query; - var intndValues = intndPointExactQuery.value().toIntArray(); - return IntPoint.newRangeQuery(intndPointExactQuery.field(), intndValues, intndValues); - } - case LongPointExactQuery -> { - var longPointExactQuery = (LongPointExactQuery) query; - return LongPoint.newExactQuery(longPointExactQuery.field(), longPointExactQuery.value()); - } - case FloatPointExactQuery -> { - var floatPointExactQuery = (FloatPointExactQuery) query; - return FloatPoint.newExactQuery(floatPointExactQuery.field(), floatPointExactQuery.value()); - } - case DoublePointExactQuery -> { - var doublePointExactQuery = (DoublePointExactQuery) query; - return DoublePoint.newExactQuery(doublePointExactQuery.field(), doublePointExactQuery.value()); - } - case LongNDPointExactQuery -> { - var longndPointExactQuery = (LongNDPointExactQuery) query; - var longndValues = longndPointExactQuery.value().toLongArray(); - return LongPoint.newRangeQuery(longndPointExactQuery.field(), longndValues, longndValues); - } - case FloatNDPointExactQuery -> { - var floatndPointExactQuery = (FloatNDPointExactQuery) query; - var floatndValues = floatndPointExactQuery.value().toFloatArray(); - return FloatPoint.newRangeQuery(floatndPointExactQuery.field(), floatndValues, floatndValues); - } - case DoubleNDPointExactQuery -> { - var doublendPointExactQuery = (DoubleNDPointExactQuery) query; - var doublendValues = doublendPointExactQuery.value().toDoubleArray(); - return DoublePoint.newRangeQuery(doublendPointExactQuery.field(), doublendValues, doublendValues); - } - case IntPointSetQuery -> { - var intPointSetQuery = (IntPointSetQuery) query; - return IntPoint.newSetQuery(intPointSetQuery.field(), intPointSetQuery.values().toIntArray()); - } - case LongPointSetQuery -> { - var longPointSetQuery = (LongPointSetQuery) query; - return LongPoint.newSetQuery(longPointSetQuery.field(), longPointSetQuery.values().toLongArray()); - } - case FloatPointSetQuery -> { - var floatPointSetQuery = (FloatPointSetQuery) query; - return FloatPoint.newSetQuery(floatPointSetQuery.field(), floatPointSetQuery.values().toFloatArray()); - } - case DoublePointSetQuery -> { - var doublePointSetQuery = (DoublePointSetQuery) query; - return DoublePoint.newSetQuery(doublePointSetQuery.field(), doublePointSetQuery.values().toDoubleArray()); - } - case TermQuery -> { - var termQuery = (TermQuery) query; - return new org.apache.lucene.search.TermQuery(toTerm(termQuery.term())); - } - case IntTermQuery -> { - var intTermQuery = (IntTermQuery) query; - return new org.apache.lucene.search.TermQuery(new Term(intTermQuery.field(), - IntPoint.pack(intTermQuery.value()) - )); - } - case IntNDTermQuery -> { - var intNDTermQuery = (IntNDTermQuery) query; - return new org.apache.lucene.search.TermQuery(new Term(intNDTermQuery.field(), - IntPoint.pack(intNDTermQuery.value().toIntArray()) - )); - } - case LongTermQuery -> { - var longTermQuery = (LongTermQuery) query; - return new org.apache.lucene.search.TermQuery(new Term(longTermQuery.field(), - LongPoint.pack(longTermQuery.value()) - )); - } - case LongNDTermQuery -> { - var longNDTermQuery = (LongNDTermQuery) query; - return new org.apache.lucene.search.TermQuery(new Term(longNDTermQuery.field(), - LongPoint.pack(longNDTermQuery.value().toLongArray()) - )); - } - case FloatTermQuery -> { - var floatTermQuery = (FloatTermQuery) query; - return new org.apache.lucene.search.TermQuery(new Term(floatTermQuery.field(), - FloatPoint.pack(floatTermQuery.value()) - )); - } - case FloatNDTermQuery -> { - var floatNDTermQuery = (FloatNDTermQuery) query; - return new org.apache.lucene.search.TermQuery(new Term(floatNDTermQuery.field(), - FloatPoint.pack(floatNDTermQuery.value().toFloatArray()) - )); - } - case DoubleTermQuery -> { - var doubleTermQuery = (DoubleTermQuery) query; - return new org.apache.lucene.search.TermQuery(new Term(doubleTermQuery.field(), - DoublePoint.pack(doubleTermQuery.value()) - )); - } - case DoubleNDTermQuery -> { - var doubleNDTermQuery = (DoubleNDTermQuery) query; - return new org.apache.lucene.search.TermQuery(new Term(doubleNDTermQuery.field(), - DoublePoint.pack(doubleNDTermQuery.value().toDoubleArray()) - )); - } - case FieldExistsQuery -> { - var fieldExistQuery = (FieldExistsQuery) query; - return new org.apache.lucene.search.FieldExistsQuery(fieldExistQuery.field()); - } - case BoostQuery -> { - var boostQuery = (BoostQuery) query; - return new org.apache.lucene.search.BoostQuery(toQuery(boostQuery.query(), analyzer), boostQuery.scoreBoost()); - } - case ConstantScoreQuery -> { - var constantScoreQuery = (ConstantScoreQuery) query; - return new org.apache.lucene.search.ConstantScoreQuery(toQuery(constantScoreQuery.query(), analyzer)); - } - case BoxedQuery -> { - return toQuery(((BoxedQuery) query).query(), analyzer); - } - case FuzzyQuery -> { - var fuzzyQuery = (it.cavallium.dbengine.client.query.current.data.FuzzyQuery) query; - return new FuzzyQuery(toTerm(fuzzyQuery.term()), - fuzzyQuery.maxEdits(), - fuzzyQuery.prefixLength(), - fuzzyQuery.maxExpansions(), - fuzzyQuery.transpositions() - ); - } - case IntPointRangeQuery -> { - var intPointRangeQuery = (IntPointRangeQuery) query; - return IntPoint.newRangeQuery(intPointRangeQuery.field(), intPointRangeQuery.min(), intPointRangeQuery.max()); - } - case IntNDPointRangeQuery -> { - var intndPointRangeQuery = (IntNDPointRangeQuery) query; - return IntPoint.newRangeQuery(intndPointRangeQuery.field(), - intndPointRangeQuery.min().toIntArray(), - intndPointRangeQuery.max().toIntArray() - ); - } - case LongPointRangeQuery -> { - var longPointRangeQuery = (LongPointRangeQuery) query; - return LongPoint.newRangeQuery(longPointRangeQuery.field(), - longPointRangeQuery.min(), - longPointRangeQuery.max() - ); - } - case FloatPointRangeQuery -> { - var floatPointRangeQuery = (FloatPointRangeQuery) query; - return FloatPoint.newRangeQuery(floatPointRangeQuery.field(), - floatPointRangeQuery.min(), - floatPointRangeQuery.max() - ); - } - case DoublePointRangeQuery -> { - var doublePointRangeQuery = (DoublePointRangeQuery) query; - return DoublePoint.newRangeQuery(doublePointRangeQuery.field(), - doublePointRangeQuery.min(), - doublePointRangeQuery.max() - ); - } - case LongNDPointRangeQuery -> { - var longndPointRangeQuery = (LongNDPointRangeQuery) query; - return LongPoint.newRangeQuery(longndPointRangeQuery.field(), - longndPointRangeQuery.min().toLongArray(), - longndPointRangeQuery.max().toLongArray() - ); - } - case FloatNDPointRangeQuery -> { - var floatndPointRangeQuery = (FloatNDPointRangeQuery) query; - return FloatPoint.newRangeQuery(floatndPointRangeQuery.field(), - floatndPointRangeQuery.min().toFloatArray(), - floatndPointRangeQuery.max().toFloatArray() - ); - } - case DoubleNDPointRangeQuery -> { - var doublendPointRangeQuery = (DoubleNDPointRangeQuery) query; - return DoublePoint.newRangeQuery(doublendPointRangeQuery.field(), - doublendPointRangeQuery.min().toDoubleArray(), - doublendPointRangeQuery.max().toDoubleArray() - ); - } - case MatchAllDocsQuery -> { - return new MatchAllDocsQuery(); - } - case MatchNoDocsQuery -> { - return new MatchNoDocsQuery(); - } - case PhraseQuery -> { - var phraseQuery = (PhraseQuery) query; - var pqb = new org.apache.lucene.search.PhraseQuery.Builder(); - for (TermPosition phrase : phraseQuery.phrase()) { - pqb.add(toTerm(phrase.term()), phrase.position()); - } - pqb.setSlop(phraseQuery.slop()); - return pqb.build(); - } - case SortedDocFieldExistsQuery -> { - var sortedDocFieldExistsQuery = (SortedDocFieldExistsQuery) query; - return new DocValuesFieldExistsQuery(sortedDocFieldExistsQuery.field()); - } - case SynonymQuery -> { - var synonymQuery = (SynonymQuery) query; - var sqb = new org.apache.lucene.search.SynonymQuery.Builder(synonymQuery.field()); - for (TermAndBoost part : synonymQuery.parts()) { - sqb.addTerm(toTerm(part.term()), part.boost()); - } - return sqb.build(); - } - case SortedNumericDocValuesFieldSlowRangeQuery -> { - var sortedNumericDocValuesFieldSlowRangeQuery = (SortedNumericDocValuesFieldSlowRangeQuery) query; - return SortedNumericDocValuesField.newSlowRangeQuery(sortedNumericDocValuesFieldSlowRangeQuery.field(), - sortedNumericDocValuesFieldSlowRangeQuery.min(), - sortedNumericDocValuesFieldSlowRangeQuery.max() - ); - } - case WildcardQuery -> { - var wildcardQuery = (WildcardQuery) query; - return new org.apache.lucene.search.WildcardQuery(new Term(wildcardQuery.field(), wildcardQuery.pattern())); - } - default -> throw new IllegalStateException("Unexpected value: " + query.getBaseType$()); - } - } - public static void toQueryXML(StringBuilder out, it.cavallium.dbengine.client.query.current.data.Query query, @Nullable Float boost) { @@ -623,13 +300,6 @@ public class QueryParser { toQueryXML(out, ((BoxedQuery) query).query(), boost); } case FuzzyQuery -> { - var fuzzyQuery = (it.cavallium.dbengine.client.query.current.data.FuzzyQuery) query; - new FuzzyQuery(toTerm(fuzzyQuery.term()), - fuzzyQuery.maxEdits(), - fuzzyQuery.prefixLength(), - fuzzyQuery.maxExpansions(), - fuzzyQuery.transpositions() - ); throw new UnsupportedOperationException("Fuzzy query is not supported, use span queries"); } case IntPointRangeQuery -> { @@ -751,7 +421,7 @@ public class QueryParser { } private static boolean hasMoreThanOneWord(String sentence) { - BreakIterator iterator = BreakIterator.getWordInstance(ULocale.ENGLISH); + BreakIterator iterator = BreakIterator.getWordInstance(Locale.ENGLISH); iterator.setText(sentence); boolean firstWord = false; @@ -781,46 +451,4 @@ public class QueryParser { }); } - private static NumberFormat toNumberFormat(it.cavallium.dbengine.client.query.current.data.NumberFormat numberFormat) { - return switch (numberFormat.getBaseType$()) { - case NumberFormatDecimal -> new DecimalFormat(); - default -> throw new UnsupportedOperationException("Unsupported type: " + numberFormat.getBaseType$()); - }; - } - - private static Class toType(PointType type) { - return switch (type.getBaseType$()) { - case PointTypeInt -> Integer.class; - case PointTypeLong -> Long.class; - case PointTypeFloat -> Float.class; - case PointTypeDouble -> Double.class; - default -> throw new UnsupportedOperationException("Unsupported type: " + type.getBaseType$()); - }; - } - - private static Term toTerm(it.cavallium.dbengine.client.query.current.data.Term term) { - return new Term(term.field(), term.value()); - } - - public static Sort toSort(it.cavallium.dbengine.client.query.current.data.Sort sort) { - switch (sort.getBaseType$()) { - case NoSort: - return null; - case ScoreSort: - return new Sort(SortField.FIELD_SCORE); - case DocSort: - return new Sort(SortField.FIELD_DOC); - case NumericSort: - NumericSort numericSort = (NumericSort) sort; - return new Sort(new SortedNumericSortField(numericSort.field(), Type.LONG, numericSort.reverse())); - case RandomSort: - return new Sort(new RandomSortField()); - default: - throw new IllegalStateException("Unexpected value: " + sort.getBaseType$()); - } - } - - public static it.cavallium.dbengine.client.query.current.data.Term toQueryTerm(Term term) { - return it.cavallium.dbengine.client.query.current.data.Term.of(term.field(), term.text()); - } } diff --git a/src/main/java/it/cavallium/dbengine/client/query/QueryUtil.java b/src/main/java/it/cavallium/dbengine/client/query/QueryUtil.java new file mode 100644 index 0000000..93ecc13 --- /dev/null +++ b/src/main/java/it/cavallium/dbengine/client/query/QueryUtil.java @@ -0,0 +1,16 @@ +package it.cavallium.dbengine.client.query; + +import it.cavallium.dbengine.client.query.current.data.TotalHitsCount; + +public class QueryUtil { + + @SuppressWarnings("unused") + public static String toHumanReadableString(TotalHitsCount totalHitsCount) { + if (totalHitsCount.exact()) { + return Long.toString(totalHitsCount.value()); + } else { + return totalHitsCount.value() + "+"; + } + } + +} diff --git a/src/main/java/it/cavallium/dbengine/client/query/QueryUtils.java b/src/main/java/it/cavallium/dbengine/client/query/QueryUtils.java deleted file mode 100644 index 757d612..0000000 --- a/src/main/java/it/cavallium/dbengine/client/query/QueryUtils.java +++ /dev/null @@ -1,101 +0,0 @@ -package it.cavallium.dbengine.client.query; - -import static it.cavallium.dbengine.database.LLUtils.mapList; - -import it.cavallium.dbengine.client.query.current.data.BooleanQuery; -import it.cavallium.dbengine.client.query.current.data.BooleanQueryPart; -import it.cavallium.dbengine.client.query.current.data.Occur; -import it.cavallium.dbengine.client.query.current.data.OccurFilter; -import it.cavallium.dbengine.client.query.current.data.OccurMust; -import it.cavallium.dbengine.client.query.current.data.OccurMustNot; -import it.cavallium.dbengine.client.query.current.data.OccurShould; -import it.cavallium.dbengine.client.query.current.data.PhraseQuery; -import it.cavallium.dbengine.client.query.current.data.Query; -import it.cavallium.dbengine.client.query.current.data.SynonymQuery; -import it.cavallium.dbengine.client.query.current.data.TermAndBoost; -import it.cavallium.dbengine.client.query.current.data.TermPosition; -import it.cavallium.dbengine.client.query.current.data.TermQuery; -import it.cavallium.dbengine.lucene.LuceneUtils; -import it.cavallium.dbengine.lucene.analyzer.TextFieldsAnalyzer; -import java.util.ArrayList; -import java.util.List; -import org.apache.lucene.search.BooleanClause; -import org.apache.lucene.util.QueryBuilder; -import org.jetbrains.annotations.NotNull; - -@SuppressWarnings("unused") -public class QueryUtils { - - /** - * @param fraction of query terms [0..1] that should match - */ - public static Query sparseWordsSearch(TextFieldsAnalyzer preferredAnalyzer, - String field, - String text, - float fraction) { - var qb = new QueryBuilder(LuceneUtils.getAnalyzer(preferredAnalyzer)); - var luceneQuery = qb.createMinShouldMatchQuery(field, text, fraction); - return transformQuery(field, luceneQuery); - } - - /** - * Deprecated: use solr SolrTextQuery - */ - @Deprecated - public static Query phraseSearch(TextFieldsAnalyzer preferredAnalyzer, String field, String text, int slop) { - var qb = new QueryBuilder(LuceneUtils.getAnalyzer(preferredAnalyzer)); - var luceneQuery = qb.createPhraseQuery(field, text, slop); - return transformQuery(field, luceneQuery); - } - - /** - * Deprecated: use solr SolrTextQuery - */ - public static Query exactSearch(TextFieldsAnalyzer preferredAnalyzer, String field, String text) { - var qb = new QueryBuilder(LuceneUtils.getAnalyzer(preferredAnalyzer)); - var luceneQuery = qb.createPhraseQuery(field, text); - return transformQuery(field, luceneQuery); - } - - @NotNull - private static Query transformQuery(String field, org.apache.lucene.search.Query luceneQuery) { - if (luceneQuery == null) { - return TermQuery.of(it.cavallium.dbengine.client.query.current.data.Term.of(field, "")); - } - if (luceneQuery instanceof org.apache.lucene.search.TermQuery) { - return TermQuery.of(QueryParser.toQueryTerm(((org.apache.lucene.search.TermQuery) luceneQuery).getTerm())); - } - if (luceneQuery instanceof org.apache.lucene.search.BooleanQuery) { - var booleanQuery = (org.apache.lucene.search.BooleanQuery) luceneQuery; - var queryParts = new ArrayList(); - for (BooleanClause booleanClause : booleanQuery) { - org.apache.lucene.search.Query queryPartQuery = booleanClause.getQuery(); - - Occur occur = switch (booleanClause.getOccur()) { - case MUST -> OccurMust.of(); - case FILTER -> OccurFilter.of(); - case SHOULD -> OccurShould.of(); - case MUST_NOT -> OccurMustNot.of(); - }; - queryParts.add(BooleanQueryPart.of(transformQuery(field, queryPartQuery), occur)); - } - return BooleanQuery.of(List.copyOf(queryParts), booleanQuery.getMinimumNumberShouldMatch()); - } - if (luceneQuery instanceof org.apache.lucene.search.PhraseQuery phraseQuery) { - int slop = phraseQuery.getSlop(); - var terms = phraseQuery.getTerms(); - var positions = phraseQuery.getPositions(); - TermPosition[] termPositions = new TermPosition[terms.length]; - for (int i = 0; i < terms.length; i++) { - var term = terms[i]; - var position = positions[i]; - termPositions[i] = TermPosition.of(QueryParser.toQueryTerm(term), position); - } - return PhraseQuery.of(List.of(termPositions), slop); - } - org.apache.lucene.search.SynonymQuery synonymQuery = (org.apache.lucene.search.SynonymQuery) luceneQuery; - return SynonymQuery.of(field, - mapList(synonymQuery.getTerms(), term -> TermAndBoost.of(QueryParser.toQueryTerm(term), 1)) - ); - } -} diff --git a/src/main/java/it/cavallium/dbengine/database/LLDatabaseConnection.java b/src/main/java/it/cavallium/dbengine/database/LLDatabaseConnection.java index 7c762a3..253bdf4 100644 --- a/src/main/java/it/cavallium/dbengine/database/LLDatabaseConnection.java +++ b/src/main/java/it/cavallium/dbengine/database/LLDatabaseConnection.java @@ -1,16 +1,9 @@ package it.cavallium.dbengine.database; import io.micrometer.core.instrument.MeterRegistry; -import it.cavallium.dbengine.lucene.LuceneHacks; import it.cavallium.dbengine.rpc.current.data.Column; import it.cavallium.dbengine.rpc.current.data.DatabaseOptions; -import it.cavallium.dbengine.rpc.current.data.IndicizerAnalyzers; -import it.cavallium.dbengine.rpc.current.data.IndicizerSimilarities; -import it.cavallium.dbengine.rpc.current.data.LuceneIndexStructure; -import it.cavallium.dbengine.rpc.current.data.LuceneOptions; -import java.io.IOException; import java.util.List; -import org.jetbrains.annotations.Nullable; @SuppressWarnings("UnusedReturnValue") public interface LLDatabaseConnection { @@ -23,12 +16,5 @@ public interface LLDatabaseConnection { List columns, DatabaseOptions databaseOptions); - LLLuceneIndex getLuceneIndex(String clusterName, - LuceneIndexStructure indexStructure, - IndicizerAnalyzers indicizerAnalyzers, - IndicizerSimilarities indicizerSimilarities, - LuceneOptions luceneOptions, - @Nullable LuceneHacks luceneHacks); - void disconnect(); } diff --git a/src/main/java/it/cavallium/dbengine/database/LLIndexRequest.java b/src/main/java/it/cavallium/dbengine/database/LLIndexRequest.java deleted file mode 100644 index 4a8f884..0000000 --- a/src/main/java/it/cavallium/dbengine/database/LLIndexRequest.java +++ /dev/null @@ -1,3 +0,0 @@ -package it.cavallium.dbengine.database; - -public sealed interface LLIndexRequest permits LLSoftUpdateDocument, LLUpdateDocument, LLUpdateFields {} diff --git a/src/main/java/it/cavallium/dbengine/database/LLItem.java b/src/main/java/it/cavallium/dbengine/database/LLItem.java deleted file mode 100644 index 7ed2787..0000000 --- a/src/main/java/it/cavallium/dbengine/database/LLItem.java +++ /dev/null @@ -1,246 +0,0 @@ -package it.cavallium.dbengine.database; - -import java.nio.ByteBuffer; -import java.util.Objects; -import java.util.StringJoiner; -import org.apache.lucene.document.Field; -import org.apache.lucene.document.LongPoint; -import org.apache.lucene.index.VectorSimilarityFunction; -import org.apache.lucene.util.BytesRef; - -public class LLItem { - - private final LLType type; - private final String name; - private final Object data; - - public LLItem(LLType type, String name, ByteBuffer data) { - this.type = type; - this.name = name; - this.data = data; - } - - public LLItem(LLType type, String name, BytesRef data) { - this.type = type; - this.name = name; - this.data = data; - } - - public LLItem(LLType type, String name, KnnFieldData data) { - this.type = type; - this.name = name; - this.data = data; - } - - private LLItem(LLType type, String name, String data) { - this.type = type; - this.name = name; - this.data = data; - } - - private LLItem(LLType type, String name, int data) { - this.type = type; - this.name = name; - this.data = data; - } - - private LLItem(LLType type, String name, float data) { - this.type = type; - this.name = name; - this.data = data; - } - - private LLItem(LLType type, String name, long data) { - this.type = type; - this.name = name; - this.data = data; - } - - private LLItem(LLType type, String name, int... data) { - this.type = type; - this.name = name; - this.data = data; - } - - private LLItem(LLType type, String name, float... data) { - this.type = type; - this.name = name; - this.data = data; - } - - private LLItem(LLType type, String name, double... data) { - this.type = type; - this.name = name; - this.data = data; - } - - private LLItem(LLType type, String name, long... data) { - this.type = type; - this.name = name; - this.data = data; - } - - public static LLItem newIntPoint(String name, int data) { - return new LLItem(LLType.IntPoint, name, data); - } - - public static LLItem newIntPointND(String name, int... data) { - return new LLItem(LLType.IntPointND, name, data); - } - - public static LLItem newLongPoint(String name, long data) { - return new LLItem(LLType.LongPoint, name, data); - } - - public static LLItem newFloatPoint(String name, float data) { - return new LLItem(LLType.FloatPoint, name, data); - } - - public static LLItem newDoublePoint(String name, double data) { - return new LLItem(LLType.DoublePoint, name, data); - } - - public static LLItem newLongPointND(String name, long... data) { - return new LLItem(LLType.LongPointND, name, data); - } - - public static LLItem newFloatPointND(String name, float... data) { - return new LLItem(LLType.FloatPointND, name, data); - } - - public static LLItem newDoublePointND(String name, double... data) { - return new LLItem(LLType.DoublePointND, name, data); - } - - public static LLItem newLongStoredField(String name, long data) { - return new LLItem(LLType.LongStoredField, name, data); - } - - public static LLItem newLongStoredFieldND(String name, long... data) { - BytesRef packed = LongPoint.pack(data); - return new LLItem(LLType.BytesStoredField, name, packed); - } - - public static LLItem newTextField(String name, String data, Field.Store store) { - if (store == Field.Store.YES) { - return new LLItem(LLType.TextFieldStored, name, data); - } else { - return new LLItem(LLType.TextField, name, data); - } - } - - public static LLItem newStringField(String name, String data, Field.Store store) { - if (store == Field.Store.YES) { - return new LLItem(LLType.StringFieldStored, name, data); - } else { - return new LLItem(LLType.StringField, name, data); - } - } - - public static LLItem newStringField(String name, BytesRef bytesRef, Field.Store store) { - if (store == Field.Store.YES) { - return new LLItem(LLType.StringFieldStored, name, bytesRef); - } else { - return new LLItem(LLType.StringField, name, bytesRef); - } - } - - public static LLItem newSortedNumericDocValuesField(String name, long data) { - return new LLItem(LLType.SortedNumericDocValuesField, name, data); - } - - public static LLItem newNumericDocValuesField(String name, long data) { - return new LLItem(LLType.NumericDocValuesField, name, data); - } - - public static LLItem newKnnField(String name, KnnFieldData knnFieldData) { - return new LLItem(LLType.NumericDocValuesField, name, knnFieldData); - } - - public String getName() { - return name; - } - - public LLType getType() { - return type; - } - - public Object getData() { - return data; - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - - LLItem llItem = (LLItem) o; - - if (type != llItem.type) { - return false; - } - return Objects.equals(name, llItem.name); - } - - @Override - public int hashCode() { - int result = type != null ? type.hashCode() : 0; - result = 31 * result + (name != null ? name.hashCode() : 0); - return result; - } - - @Override - public String toString() { - return new StringJoiner(", ", LLItem.class.getSimpleName() + "[", "]") - .add("type=" + type) - .add("name='" + name + "'") - .add("data=" + data) - .toString(); - } - - public int intData() { - return (int) data; - } - - public int[] intArrayData() { - return (int[]) data; - } - - public long longData() { - return (long) data; - } - - public long[] longArrayData() { - return (long[]) data; - } - - public float floatData() { - return (float) data; - } - - public float[] floatArrayData() { - return (float[]) data; - } - - public double doubleData() { - return (double) data; - } - - public double[] doubleArrayData() { - return (double[]) data; - } - - public KnnFieldData knnFieldData() { - return (KnnFieldData) data; - } - - public String stringValue() { - return (String) data; - } - - public record KnnFieldData(float[] data, VectorSimilarityFunction vectorSimilarityFunction) {} -} diff --git a/src/main/java/it/cavallium/dbengine/database/LLKeyScore.java b/src/main/java/it/cavallium/dbengine/database/LLKeyScore.java deleted file mode 100644 index 147e02f..0000000 --- a/src/main/java/it/cavallium/dbengine/database/LLKeyScore.java +++ /dev/null @@ -1,6 +0,0 @@ -package it.cavallium.dbengine.database; - -import org.apache.lucene.index.IndexableField; -import org.jetbrains.annotations.Nullable; - -public record LLKeyScore(int docId, int shardId, float score, @Nullable IndexableField key) {} diff --git a/src/main/java/it/cavallium/dbengine/database/LLLuceneIndex.java b/src/main/java/it/cavallium/dbengine/database/LLLuceneIndex.java deleted file mode 100644 index be3eae1..0000000 --- a/src/main/java/it/cavallium/dbengine/database/LLLuceneIndex.java +++ /dev/null @@ -1,105 +0,0 @@ -package it.cavallium.dbengine.database; - -import static it.cavallium.dbengine.utils.StreamUtils.collectOn; -import static it.cavallium.dbengine.utils.StreamUtils.fastReducing; - -import com.google.common.collect.Multimap; -import it.cavallium.dbengine.client.IBackuppable; -import it.cavallium.dbengine.client.query.current.data.NoSort; -import it.cavallium.dbengine.client.query.current.data.Query; -import it.cavallium.dbengine.client.query.current.data.QueryParams; -import it.cavallium.dbengine.client.query.current.data.TotalHitsCount; -import it.cavallium.dbengine.lucene.collector.Buckets; -import it.cavallium.dbengine.lucene.searcher.BucketParams; -import it.cavallium.dbengine.utils.StreamUtils; -import java.time.Duration; -import java.util.List; -import java.util.Map.Entry; -import java.util.stream.Stream; -import org.jetbrains.annotations.NotNull; -import org.jetbrains.annotations.Nullable; - -public interface LLLuceneIndex extends LLSnapshottable, IBackuppable, SafeCloseable { - - String getLuceneIndexName(); - - void addDocument(LLTerm id, LLUpdateDocument doc); - - long addDocuments(boolean atomic, Stream> documents); - - void deleteDocument(LLTerm id); - - void update(LLTerm id, LLIndexRequest request); - - long updateDocuments(Stream> documents); - - void deleteAll(); - - // todo: add a filterer parameter? - /** - * @param queryParams the limit is valid for each lucene instance. If you have 15 instances, the number of elements - * returned can be at most limit * 15. - *

- * The additional query will be used with the moreLikeThis query: "mltQuery AND additionalQuery" - * @return the collection has one or more flux - */ - Stream moreLikeThis(@Nullable LLSnapshot snapshot, - QueryParams queryParams, - @Nullable String keyFieldName, - Multimap mltDocumentFields); - - // todo: add a filterer parameter? - /** - * @param queryParams the limit is valid for each lucene instance. If you have 15 instances, the number of elements - * returned can be at most limit * 15 - * @return the collection has one or more flux - */ - Stream search(@Nullable LLSnapshot snapshot, - QueryParams queryParams, - @Nullable String keyFieldName); - - /** - * @return buckets with each value collected into one of the buckets - */ - Buckets computeBuckets(@Nullable LLSnapshot snapshot, - @NotNull List queries, - @Nullable Query normalizationQuery, - BucketParams bucketParams); - - default TotalHitsCount count(@Nullable LLSnapshot snapshot, Query query, @Nullable Duration timeout) { - QueryParams params = QueryParams.of(query, - 0, - 0, - NoSort.of(), - false, - timeout == null ? Long.MAX_VALUE : timeout.toMillis() - ); - return collectOn(StreamUtils.LUCENE_POOL, - this.search(snapshot, params, null).map(LLSearchResultShard::totalHitsCount), - fastReducing(TotalHitsCount.of(0, true), - (a, b) -> TotalHitsCount.of(a.value() + b.value(), a.exact() && b.exact()) - ) - ); - } - - boolean isLowMemoryMode(); - - /** - * Flush writes to disk. - * This does not commit, it syncs the data to the disk - */ - void flush(); - - void waitForMerges(); - - /** - * Wait for the latest pending merge - * This disables future merges until shutdown! - */ - void waitForLastMerges(); - - /** - * Refresh index searcher - */ - void refresh(boolean force); -} diff --git a/src/main/java/it/cavallium/dbengine/database/LLMultiDatabaseConnection.java b/src/main/java/it/cavallium/dbengine/database/LLMultiDatabaseConnection.java index 7a08f93..7779fa3 100644 --- a/src/main/java/it/cavallium/dbengine/database/LLMultiDatabaseConnection.java +++ b/src/main/java/it/cavallium/dbengine/database/LLMultiDatabaseConnection.java @@ -1,23 +1,14 @@ package it.cavallium.dbengine.database; import static it.cavallium.dbengine.utils.StreamUtils.collect; -import static it.cavallium.dbengine.utils.StreamUtils.collectOn; import static it.cavallium.dbengine.utils.StreamUtils.executing; import com.google.common.collect.Multimap; import io.micrometer.core.instrument.MeterRegistry; import it.cavallium.dbengine.client.ConnectionSettings.ConnectionPart; -import it.cavallium.dbengine.client.ConnectionSettings.ConnectionPart.ConnectionPartLucene; import it.cavallium.dbengine.client.ConnectionSettings.ConnectionPart.ConnectionPartRocksDB; -import it.cavallium.dbengine.lucene.LuceneHacks; -import it.cavallium.dbengine.lucene.LuceneUtils; import it.cavallium.dbengine.rpc.current.data.Column; import it.cavallium.dbengine.rpc.current.data.DatabaseOptions; -import it.cavallium.dbengine.rpc.current.data.LuceneIndexStructure; -import it.cavallium.dbengine.rpc.current.data.LuceneOptions; -import it.unimi.dsi.fastutil.ints.IntArrayList; -import it.unimi.dsi.fastutil.ints.IntOpenHashSet; -import it.unimi.dsi.fastutil.ints.IntSet; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -28,31 +19,21 @@ import java.util.Set; import java.util.StringJoiner; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -import org.jetbrains.annotations.Nullable; public class LLMultiDatabaseConnection implements LLDatabaseConnection { private static final Logger LOG = LogManager.getLogger(LLMultiDatabaseConnection.class); private final Map databaseShardConnections = new HashMap<>(); - private final Map luceneShardConnections = new HashMap<>(); private final Set allConnections = new HashSet<>(); private final LLDatabaseConnection defaultDatabaseConnection; - private final LLDatabaseConnection defaultLuceneConnection; private final LLDatabaseConnection anyConnection; public LLMultiDatabaseConnection(Multimap subConnections) { LLDatabaseConnection defaultDatabaseConnection = null; - LLDatabaseConnection defaultLuceneConnection = null; for (Entry entry : subConnections.entries()) { var subConnectionSettings = entry.getKey(); var connectionPart = entry.getValue(); - if (connectionPart instanceof ConnectionPartLucene connectionPartLucene) { - if (connectionPartLucene.name() == null) { - defaultLuceneConnection = subConnectionSettings; - } else { - luceneShardConnections.put(connectionPartLucene.name(), subConnectionSettings); - } - } else if (connectionPart instanceof ConnectionPartRocksDB connectionPartRocksDB) { + if (connectionPart instanceof ConnectionPartRocksDB connectionPartRocksDB) { if (connectionPartRocksDB.name() == null) { defaultDatabaseConnection = subConnectionSettings; } else { @@ -63,21 +44,14 @@ public class LLMultiDatabaseConnection implements LLDatabaseConnection { } } this.defaultDatabaseConnection = defaultDatabaseConnection; - this.defaultLuceneConnection = defaultLuceneConnection; if (defaultDatabaseConnection != null) { anyConnection = defaultDatabaseConnection; - } else if (defaultLuceneConnection != null) { - anyConnection = defaultLuceneConnection; } else { anyConnection = subConnections.keySet().stream().findAny().orElse(null); } if (defaultDatabaseConnection != null) { allConnections.add(defaultDatabaseConnection); } - if (defaultLuceneConnection != null) { - allConnections.add(defaultLuceneConnection); - } - allConnections.addAll(luceneShardConnections.values()); allConnections.addAll(databaseShardConnections.values()); } @@ -107,63 +81,6 @@ public class LLMultiDatabaseConnection implements LLDatabaseConnection { return conn.getDatabase(name, columns, databaseOptions); } - @Override - public LLLuceneIndex getLuceneIndex(String clusterName, - LuceneIndexStructure indexStructure, - it.cavallium.dbengine.rpc.current.data.IndicizerAnalyzers indicizerAnalyzers, - it.cavallium.dbengine.rpc.current.data.IndicizerSimilarities indicizerSimilarities, - LuceneOptions luceneOptions, - @Nullable LuceneHacks luceneHacks) { - IntSet registeredShards = new IntOpenHashSet(); - Map connectionToShardMap = new HashMap<>(); - for (int activeShard : indexStructure.activeShards()) { - if (activeShard >= indexStructure.totalShards()) { - throw new IllegalArgumentException( - "ActiveShard " + activeShard + " is bigger than total shards count " + indexStructure.totalShards()); - } - if (!registeredShards.add(activeShard)) { - throw new IllegalArgumentException("ActiveShard " + activeShard + " has been specified twice"); - } - var shardName = LuceneUtils.getStandardName(clusterName, activeShard); - var connection = luceneShardConnections.getOrDefault(shardName, defaultLuceneConnection); - Objects.requireNonNull(connection, "Null connection"); - connectionToShardMap.computeIfAbsent(connection, k -> new IntOpenHashSet()).add(activeShard); - } - if (connectionToShardMap.keySet().size() == 1) { - return connectionToShardMap - .keySet() - .stream() - .findFirst() - .orElseThrow() - .getLuceneIndex(clusterName, - indexStructure, - indicizerAnalyzers, - indicizerSimilarities, - luceneOptions, - luceneHacks - ); - } else { - record ShardToIndex(int shard, LLLuceneIndex connIndex) {} - var luceneIndices = new LLLuceneIndex[indexStructure.totalShards()]; - connectionToShardMap.entrySet().stream().flatMap(entry -> { - var connectionIndexStructure = indexStructure.setActiveShards(new IntArrayList(entry.getValue())); - - LLLuceneIndex connIndex = entry.getKey().getLuceneIndex(clusterName, connectionIndexStructure, - indicizerAnalyzers, indicizerSimilarities, luceneOptions, luceneHacks); - - return entry.getValue().intStream().mapToObj(shard -> new ShardToIndex(shard, connIndex)); - }).forEach(index -> luceneIndices[index.shard] = index.connIndex); - return new LLMultiLuceneIndex(clusterName, - indexStructure, - indicizerAnalyzers, - indicizerSimilarities, - luceneOptions, - luceneHacks, - luceneIndices - ); - } - } - @Override public void disconnect() { collect(allConnections.stream(), executing(connection -> { @@ -179,10 +96,8 @@ public class LLMultiDatabaseConnection implements LLDatabaseConnection { public String toString() { return new StringJoiner(", ", LLMultiDatabaseConnection.class.getSimpleName() + "[", "]") .add("databaseShardConnections=" + databaseShardConnections) - .add("luceneShardConnections=" + luceneShardConnections) .add("allConnections=" + allConnections) .add("defaultDatabaseConnection=" + defaultDatabaseConnection) - .add("defaultLuceneConnection=" + defaultLuceneConnection) .add("anyConnection=" + anyConnection) .toString(); } diff --git a/src/main/java/it/cavallium/dbengine/database/LLMultiLuceneIndex.java b/src/main/java/it/cavallium/dbengine/database/LLMultiLuceneIndex.java deleted file mode 100644 index 8c2f6cb..0000000 --- a/src/main/java/it/cavallium/dbengine/database/LLMultiLuceneIndex.java +++ /dev/null @@ -1,244 +0,0 @@ -package it.cavallium.dbengine.database; - -import static it.cavallium.dbengine.database.LLUtils.mapList; -import static it.cavallium.dbengine.lucene.LuceneUtils.getLuceneIndexId; -import static it.cavallium.dbengine.utils.StreamUtils.LUCENE_POOL; -import static it.cavallium.dbengine.utils.StreamUtils.collectOn; -import static it.cavallium.dbengine.utils.StreamUtils.executing; -import static it.cavallium.dbengine.utils.StreamUtils.fastListing; -import static it.cavallium.dbengine.utils.StreamUtils.fastReducing; -import static it.cavallium.dbengine.utils.StreamUtils.fastSummingLong; -import static it.cavallium.dbengine.utils.StreamUtils.partitionByInt; -import static java.util.stream.Collectors.groupingBy; - -import com.google.common.collect.Multimap; -import it.cavallium.dbengine.client.IBackuppable; -import it.cavallium.dbengine.client.query.current.data.Query; -import it.cavallium.dbengine.client.query.current.data.QueryParams; -import it.cavallium.dbengine.lucene.LuceneHacks; -import it.cavallium.dbengine.lucene.collector.Buckets; -import it.cavallium.dbengine.lucene.searcher.BucketParams; -import it.cavallium.dbengine.rpc.current.data.IndicizerAnalyzers; -import it.cavallium.dbengine.rpc.current.data.IndicizerSimilarities; -import it.cavallium.dbengine.rpc.current.data.LuceneIndexStructure; -import it.cavallium.dbengine.rpc.current.data.LuceneOptions; -import it.unimi.dsi.fastutil.doubles.DoubleArrayList; -import java.util.ArrayList; -import java.util.HashSet; -import java.util.List; -import java.util.Map.Entry; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.atomic.AtomicLong; -import java.util.stream.Stream; -import org.jetbrains.annotations.NotNull; -import org.jetbrains.annotations.Nullable; - -public class LLMultiLuceneIndex implements LLLuceneIndex { - - - private final ConcurrentHashMap> registeredSnapshots = new ConcurrentHashMap<>(); - private final AtomicLong nextSnapshotNumber = new AtomicLong(1); - - private final String clusterName; - private final LuceneIndexStructure indexStructure; - private final IndicizerAnalyzers indicizerAnalyzers; - private final IndicizerSimilarities indicizerSimilarities; - private final LuceneOptions luceneOptions; - private final LuceneHacks luceneHacks; - private final LLLuceneIndex[] luceneIndicesById; - private final List luceneIndicesSet; - private final int totalShards; - - public LLMultiLuceneIndex(String clusterName, - LuceneIndexStructure indexStructure, - IndicizerAnalyzers indicizerAnalyzers, - IndicizerSimilarities indicizerSimilarities, - LuceneOptions luceneOptions, - LuceneHacks luceneHacks, - LLLuceneIndex[] luceneIndices) { - this.clusterName = clusterName; - this.indexStructure = indexStructure; - this.indicizerAnalyzers = indicizerAnalyzers; - this.indicizerSimilarities = indicizerSimilarities; - this.luceneOptions = luceneOptions; - this.luceneHacks = luceneHacks; - this.luceneIndicesById = luceneIndices; - this.totalShards = indexStructure.totalShards(); - var luceneIndicesSet = new HashSet(); - for (LLLuceneIndex luceneIndex : luceneIndices) { - if (luceneIndex != null) { - luceneIndicesSet.add(luceneIndex); - } - } - this.luceneIndicesSet = new ArrayList<>(luceneIndicesSet); - } - - @Override - public String getLuceneIndexName() { - return clusterName; - } - - private LLLuceneIndex getLuceneIndex(LLTerm id) { - return luceneIndicesById[getLuceneIndexId(id, totalShards)]; - } - - @Override - public void addDocument(LLTerm id, LLUpdateDocument doc) { - getLuceneIndex(id).addDocument(id, doc); - } - - @Override - public long addDocuments(boolean atomic, Stream> documents) { - return collectOn(LUCENE_POOL, - partitionByInt(term -> getLuceneIndexId(term.getKey(), totalShards), documents) - .map(entry -> luceneIndicesById[entry.key()].addDocuments(atomic, entry.values().stream())), - fastSummingLong() - ); - } - - @Override - public void deleteDocument(LLTerm id) { - getLuceneIndex(id).deleteDocument(id); - } - - @Override - public void update(LLTerm id, LLIndexRequest request) { - getLuceneIndex(id).update(id, request); - } - - @Override - public long updateDocuments(Stream> documents) { - return collectOn(LUCENE_POOL, - partitionByInt(term -> getLuceneIndexId(term.getKey(), totalShards), documents) - .map(entry -> luceneIndicesById[entry.key()].updateDocuments(entry.values().stream())), - fastSummingLong() - ); - } - - @Override - public void deleteAll() { - luceneIndicesSet.forEach(LLLuceneIndex::deleteAll); - } - - @Override - public Stream moreLikeThis(@Nullable LLSnapshot snapshot, - QueryParams queryParams, - @Nullable String keyFieldName, - Multimap mltDocumentFields) { - return luceneIndicesSet.stream().flatMap(luceneIndex -> luceneIndex.moreLikeThis(snapshot, - queryParams, - keyFieldName, - mltDocumentFields - )); - } - - private Buckets mergeShards(List shards) { - List seriesValues = new ArrayList<>(); - DoubleArrayList totals = new DoubleArrayList(shards.get(0).totals()); - - for (Buckets shard : shards) { - if (seriesValues.isEmpty()) { - seriesValues.addAll(shard.seriesValues()); - } else { - for (int serieIndex = 0; serieIndex < seriesValues.size(); serieIndex++) { - DoubleArrayList mergedSerieValues = seriesValues.get(serieIndex); - for (int dataIndex = 0; dataIndex < mergedSerieValues.size(); dataIndex++) { - mergedSerieValues.set(dataIndex, mergedSerieValues.getDouble(dataIndex) - + shard.seriesValues().get(serieIndex).getDouble(dataIndex) - ); - } - } - } - for (int i = 0; i < totals.size(); i++) { - totals.set(i, totals.getDouble(i) + shard.totals().getDouble(i)); - } - } - return new Buckets(seriesValues, totals); - } - - @Override - public Stream search(@Nullable LLSnapshot snapshot, - QueryParams queryParams, - @Nullable String keyFieldName) { - return luceneIndicesSet.stream().flatMap(luceneIndex -> luceneIndex.search(snapshot, - queryParams, - keyFieldName - )); - } - - @Override - public Buckets computeBuckets(@Nullable LLSnapshot snapshot, - @NotNull List queries, - @Nullable Query normalizationQuery, - BucketParams bucketParams) { - return mergeShards(mapList(luceneIndicesSet, luceneIndex -> luceneIndex.computeBuckets(snapshot, - queries, - normalizationQuery, - bucketParams - ))); - } - - @Override - public boolean isLowMemoryMode() { - return luceneOptions.lowMemory(); - } - - @Override - public void close() { - collectOn(LUCENE_POOL, luceneIndicesSet.stream(), executing(LLLuceneIndex::close)); - } - - @Override - public void flush() { - collectOn(LUCENE_POOL, luceneIndicesSet.stream(), executing(LLLuceneIndex::flush)); - } - - @Override - public void waitForMerges() { - collectOn(LUCENE_POOL, luceneIndicesSet.stream(), executing(LLLuceneIndex::waitForMerges)); - } - - @Override - public void waitForLastMerges() { - collectOn(LUCENE_POOL, luceneIndicesSet.stream(), executing(LLLuceneIndex::waitForLastMerges)); - } - - @Override - public void refresh(boolean force) { - collectOn(LUCENE_POOL, luceneIndicesSet.stream(), executing(index -> index.refresh(force))); - } - - @Override - public LLSnapshot takeSnapshot() { - // Generate next snapshot index - var snapshotIndex = nextSnapshotNumber.getAndIncrement(); - var snapshot = collectOn(LUCENE_POOL, luceneIndicesSet.stream().map(LLSnapshottable::takeSnapshot), fastListing()); - registeredSnapshots.put(snapshotIndex, snapshot); - return new LLSnapshot(snapshotIndex); - } - - @Override - public void releaseSnapshot(LLSnapshot snapshot) { - var list = registeredSnapshots.remove(snapshot.getSequenceNumber()); - for (int shardIndex = 0; shardIndex < list.size(); shardIndex++) { - var luceneIndex = luceneIndicesSet.get(shardIndex); - LLSnapshot instanceSnapshot = list.get(shardIndex); - luceneIndex.releaseSnapshot(instanceSnapshot); - } - } - - @Override - public void pauseForBackup() { - collectOn(LUCENE_POOL, luceneIndicesSet.stream(), executing(LLLuceneIndex::pauseForBackup)); - } - - @Override - public void resumeAfterBackup() { - collectOn(LUCENE_POOL, luceneIndicesSet.stream(), executing(LLLuceneIndex::resumeAfterBackup)); - } - - @Override - public boolean isPaused() { - return this.luceneIndicesSet.stream().anyMatch(IBackuppable::isPaused); - } -} diff --git a/src/main/java/it/cavallium/dbengine/database/LLScoreMode.java b/src/main/java/it/cavallium/dbengine/database/LLScoreMode.java index 7674e0b..459b33e 100644 --- a/src/main/java/it/cavallium/dbengine/database/LLScoreMode.java +++ b/src/main/java/it/cavallium/dbengine/database/LLScoreMode.java @@ -1,7 +1,5 @@ package it.cavallium.dbengine.database; -import org.apache.lucene.search.Scorer; - public enum LLScoreMode { /** * Produced scorers will allow visiting all matches and get their score. @@ -15,7 +13,7 @@ public enum LLScoreMode { COMPLETE_NO_SCORES, /** * Produced scorers will optionally allow skipping over non-competitive - * hits using the {@link Scorer#setMinCompetitiveScore(float)} API. + * hits using the {@link org.apache.lucene.search.Scorer#setMinCompetitiveScore(float)} API. * This can reduce time if using setMinCompetitiveScore. */ TOP_SCORES, diff --git a/src/main/java/it/cavallium/dbengine/database/LLSearchResult.java b/src/main/java/it/cavallium/dbengine/database/LLSearchResult.java deleted file mode 100644 index d206cc5..0000000 --- a/src/main/java/it/cavallium/dbengine/database/LLSearchResult.java +++ /dev/null @@ -1,13 +0,0 @@ -package it.cavallium.dbengine.database; - -import java.util.function.BiFunction; -import java.util.stream.Stream; -import org.jetbrains.annotations.NotNull; - -public record LLSearchResult(Stream results) { - - @NotNull - public static BiFunction accumulator() { - return (a, b) -> new LLSearchResult(Stream.concat(a.results, b.results)); - } -} diff --git a/src/main/java/it/cavallium/dbengine/database/LLSearchResultShard.java b/src/main/java/it/cavallium/dbengine/database/LLSearchResultShard.java deleted file mode 100644 index 9bdbd81..0000000 --- a/src/main/java/it/cavallium/dbengine/database/LLSearchResultShard.java +++ /dev/null @@ -1,51 +0,0 @@ -package it.cavallium.dbengine.database; - -import it.cavallium.dbengine.client.query.current.data.TotalHitsCount; -import it.cavallium.dbengine.lucene.LuceneCloseable; -import it.cavallium.dbengine.utils.SimpleResource; -import java.util.List; -import java.util.Objects; -import java.util.stream.Stream; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; - -public class LLSearchResultShard { - - private static final Logger LOG = LogManager.getLogger(LLSearchResultShard.class); - - private final List results; - private final TotalHitsCount totalHitsCount; - - public LLSearchResultShard(List results, TotalHitsCount totalHitsCount) { - this.results = results; - this.totalHitsCount = totalHitsCount; - } - - public List results() { - return results; - } - - public TotalHitsCount totalHitsCount() { - return totalHitsCount; - } - - @Override - public boolean equals(Object obj) { - if (obj == this) - return true; - if (obj == null || obj.getClass() != this.getClass()) - return false; - var that = (LLSearchResultShard) obj; - return Objects.equals(this.results, that.results) && Objects.equals(this.totalHitsCount, that.totalHitsCount); - } - - @Override - public int hashCode() { - return Objects.hash(results, totalHitsCount); - } - - @Override - public String toString() { - return "LLSearchResultShard[" + "results=" + results + ", " + "totalHitsCount=" + totalHitsCount + ']'; - } -} diff --git a/src/main/java/it/cavallium/dbengine/database/LLSoftUpdateDocument.java b/src/main/java/it/cavallium/dbengine/database/LLSoftUpdateDocument.java deleted file mode 100644 index b11bcba..0000000 --- a/src/main/java/it/cavallium/dbengine/database/LLSoftUpdateDocument.java +++ /dev/null @@ -1,5 +0,0 @@ -package it.cavallium.dbengine.database; - -import java.util.List; - -public record LLSoftUpdateDocument(List items, List softDeleteItems) implements LLIndexRequest {} diff --git a/src/main/java/it/cavallium/dbengine/database/LLTerm.java b/src/main/java/it/cavallium/dbengine/database/LLTerm.java deleted file mode 100644 index e171449..0000000 --- a/src/main/java/it/cavallium/dbengine/database/LLTerm.java +++ /dev/null @@ -1,58 +0,0 @@ -package it.cavallium.dbengine.database; - -import java.util.Objects; -import org.apache.lucene.util.BytesRef; - -public class LLTerm { - - private final String key; - private final BytesRef value; - - public LLTerm(String key, String value) { - this.key = key; - this.value = new BytesRef(value); - } - - public LLTerm(String key, BytesRef value) { - this.key = key; - this.value = value; - } - - public String getKey() { - return key; - } - - public String getValueUTF8() { - return value.utf8ToString(); - } - - public BytesRef getValueBytesRef() { - return value; - } - - @Override - public String toString() { - return "LLTerm{" + - "key='" + key + '\'' + - ", value='" + value + '\'' + - '}'; - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - LLTerm llTerm = (LLTerm) o; - return Objects.equals(key, llTerm.key) && - Objects.equals(value, llTerm.value); - } - - @Override - public int hashCode() { - return Objects.hash(key, value); - } -} diff --git a/src/main/java/it/cavallium/dbengine/database/LLTopKeys.java b/src/main/java/it/cavallium/dbengine/database/LLTopKeys.java deleted file mode 100644 index 28dd3ac..0000000 --- a/src/main/java/it/cavallium/dbengine/database/LLTopKeys.java +++ /dev/null @@ -1,52 +0,0 @@ -package it.cavallium.dbengine.database; - -import java.util.Arrays; -import java.util.Objects; - -@SuppressWarnings("unused") -public class LLTopKeys { - - private final long totalHitsCount; - private final LLKeyScore[] hits; - - public LLTopKeys(long totalHitsCount, LLKeyScore[] hits) { - this.totalHitsCount = totalHitsCount; - this.hits = hits; - } - - public long getTotalHitsCount() { - return totalHitsCount; - } - - public LLKeyScore[] getHits() { - return hits; - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - LLTopKeys llTopKeys = (LLTopKeys) o; - return totalHitsCount == llTopKeys.totalHitsCount && - Arrays.equals(hits, llTopKeys.hits); - } - - @Override - public int hashCode() { - int result = Objects.hash(totalHitsCount); - result = 31 * result + Arrays.hashCode(hits); - return result; - } - - @Override - public String toString() { - return "LLTopKeys{" + - "totalHitsCount=" + totalHitsCount + - ", hits=" + Arrays.toString(hits) + - '}'; - } -} diff --git a/src/main/java/it/cavallium/dbengine/database/LLUpdateDocument.java b/src/main/java/it/cavallium/dbengine/database/LLUpdateDocument.java deleted file mode 100644 index 3023125..0000000 --- a/src/main/java/it/cavallium/dbengine/database/LLUpdateDocument.java +++ /dev/null @@ -1,5 +0,0 @@ -package it.cavallium.dbengine.database; - -import java.util.List; - -public record LLUpdateDocument(List items) implements LLIndexRequest {} diff --git a/src/main/java/it/cavallium/dbengine/database/LLUpdateFields.java b/src/main/java/it/cavallium/dbengine/database/LLUpdateFields.java deleted file mode 100644 index 86de268..0000000 --- a/src/main/java/it/cavallium/dbengine/database/LLUpdateFields.java +++ /dev/null @@ -1,5 +0,0 @@ -package it.cavallium.dbengine.database; - -import java.util.List; - -public record LLUpdateFields(List items) implements LLIndexRequest {} diff --git a/src/main/java/it/cavallium/dbengine/database/LLUtils.java b/src/main/java/it/cavallium/dbengine/database/LLUtils.java index b6f9fa9..941345c 100644 --- a/src/main/java/it/cavallium/dbengine/database/LLUtils.java +++ b/src/main/java/it/cavallium/dbengine/database/LLUtils.java @@ -5,13 +5,8 @@ import static org.apache.commons.lang3.ArrayUtils.EMPTY_BYTE_ARRAY; import com.google.common.primitives.Ints; import com.google.common.primitives.Longs; import it.cavallium.buffer.Buf; -import it.cavallium.dbengine.client.HitEntry; -import it.cavallium.dbengine.client.HitKey; import it.cavallium.dbengine.database.disk.rocksdb.LLReadOptions; import it.cavallium.dbengine.database.serialization.SerializationFunction; -import it.cavallium.dbengine.lucene.LuceneCloseable; -import it.cavallium.dbengine.lucene.LuceneUtils; -import it.cavallium.dbengine.lucene.RandomSortField; import java.lang.invoke.MethodHandle; import java.lang.invoke.MethodHandles; import java.lang.invoke.MethodHandles.Lookup; @@ -22,49 +17,25 @@ import java.util.Collection; import java.util.HexFormat; import java.util.List; import java.util.Map; -import java.util.Map.Entry; import java.util.Objects; import java.util.Optional; import java.util.Set; import java.util.function.Consumer; import java.util.function.Function; -import java.util.stream.Stream; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.Marker; import org.apache.logging.log4j.MarkerManager; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.DoublePoint; -import org.apache.lucene.document.Field; -import org.apache.lucene.document.Field.Store; -import org.apache.lucene.document.FloatPoint; -import org.apache.lucene.document.IntPoint; -import org.apache.lucene.document.LongPoint; -import org.apache.lucene.document.NumericDocValuesField; -import org.apache.lucene.document.SortedNumericDocValuesField; -import org.apache.lucene.document.StoredField; -import org.apache.lucene.document.StringField; -import org.apache.lucene.document.TextField; -import org.apache.lucene.index.Term; -import org.apache.lucene.search.ScoreDoc; -import org.apache.lucene.search.ScoreMode; -import org.apache.lucene.search.Sort; -import org.apache.lucene.search.SortField; -import org.apache.lucene.search.SortedNumericSortField; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.BytesRefBuilder; import org.jetbrains.annotations.NotNull; import org.jetbrains.annotations.Nullable; import org.rocksdb.AbstractImmutableNativeReference; import org.rocksdb.AbstractNativeReference; -import org.rocksdb.ReadOptions; @SuppressWarnings("unused") public class LLUtils { private static final Logger logger = LogManager.getLogger(LLUtils.class); public static final Marker MARKER_ROCKSDB = MarkerManager.getMarker("ROCKSDB"); - public static final Marker MARKER_LUCENE = MarkerManager.getMarker("LUCENE"); public static final int INITIAL_DIRECT_READ_BYTE_BUF_SIZE_BYTES = 4096; public static final ByteBuffer EMPTY_BYTE_BUFFER = ByteBuffer.allocateDirect(0).asReadOnlyBuffer(); @@ -144,116 +115,6 @@ public class LLUtils { return bool ? BUF_TRUE : BUF_FALSE; } - @Nullable - public static Sort toSort(@Nullable LLSort sort) { - if (sort == null) { - return null; - } - if (sort.getType() == LLSortType.LONG) { - return new Sort(new SortedNumericSortField(sort.getFieldName(), SortField.Type.LONG, sort.isReverse())); - } else if (sort.getType() == LLSortType.RANDOM) { - return new Sort(new RandomSortField()); - } else if (sort.getType() == LLSortType.SCORE) { - return new Sort(SortField.FIELD_SCORE); - } else if (sort.getType() == LLSortType.DOC) { - return new Sort(SortField.FIELD_DOC); - } - return null; - } - - public static ScoreMode toScoreMode(LLScoreMode scoreMode) { - return switch (scoreMode) { - case COMPLETE -> ScoreMode.COMPLETE; - case TOP_SCORES -> ScoreMode.TOP_SCORES; - case COMPLETE_NO_SCORES -> ScoreMode.COMPLETE_NO_SCORES; - case NO_SCORES -> ScoreMode.TOP_DOCS; - }; - } - - public static Term toTerm(LLTerm term) { - var valueRef = new FakeBytesRefBuilder(term); - return new Term(term.getKey(), valueRef); - } - - public static Document toDocument(LLUpdateDocument document) { - return toDocument(document.items()); - } - - public static Document toDocument(List document) { - Document d = new Document(); - for (LLItem item : document) { - if (item != null) { - d.add(LLUtils.toField(item)); - } - } - return d; - } - - public static Field[] toFields(List fields) { - Field[] d = new Field[fields.size()]; - for (int i = 0; i < fields.size(); i++) { - d[i] = LLUtils.toField(fields.get(i)); - } - return d; - } - - public static Collection toDocuments(Collection document) { - List d = new ArrayList<>(document.size()); - for (LLUpdateDocument doc : document) { - d.add(LLUtils.toDocument(doc)); - } - return d; - } - - public static Collection toDocumentsFromEntries(Collection> documentsList) { - ArrayList results = new ArrayList<>(documentsList.size()); - for (Entry entry : documentsList) { - results.add(LLUtils.toDocument(entry.getValue())); - } - return results; - } - - public static Iterable toTerms(Iterable terms) { - List d = new ArrayList<>(); - for (LLTerm term : terms) { - d.add(LLUtils.toTerm(term)); - } - return d; - } - - private static Field toField(LLItem item) { - return switch (item.getType()) { - case IntPoint -> new IntPoint(item.getName(), item.intData()); - case DoublePoint -> new DoublePoint(item.getName(), item.doubleData()); - case IntPointND -> new IntPoint(item.getName(), item.intArrayData()); - case LongPoint -> new LongPoint(item.getName(), item.longData()); - case LongPointND -> new LongPoint(item.getName(), item.longArrayData()); - case FloatPointND -> new FloatPoint(item.getName(), item.floatArrayData()); - case DoublePointND -> new DoublePoint(item.getName(), item.doubleArrayData()); - case LongStoredField -> new StoredField(item.getName(), item.longData()); - case BytesStoredField -> new StoredField(item.getName(), (BytesRef) item.getData()); - case FloatPoint -> new FloatPoint(item.getName(), item.floatData()); - case TextField -> new TextField(item.getName(), item.stringValue(), Store.NO); - case TextFieldStored -> new TextField(item.getName(), item.stringValue(), Store.YES); - case SortedNumericDocValuesField -> new SortedNumericDocValuesField(item.getName(), item.longData()); - case NumericDocValuesField -> new NumericDocValuesField(item.getName(), item.longData()); - case StringField -> { - if (item.getData() instanceof BytesRef bytesRef) { - yield new StringField(item.getName(), bytesRef, Store.NO); - } else { - yield new StringField(item.getName(), item.stringValue(), Store.NO); - } - } - case StringFieldStored -> { - if (item.getData() instanceof BytesRef bytesRef) { - yield new StringField(item.getName(), bytesRef, Store.YES); - } else { - yield new StringField(item.getName(), item.stringValue(), Store.YES); - } - } - }; - } - private static int[] getIntArray(byte[] data) { var count = data.length / Integer.BYTES; var items = new int[count]; @@ -284,10 +145,6 @@ public class LLUtils { return items; } - public static it.cavallium.dbengine.database.LLKeyScore toKeyScore(LLKeyScore hit) { - return new it.cavallium.dbengine.database.LLKeyScore(hit.docId(), hit.shardId(), hit.score(), hit.key()); - } - public static String toStringSafe(byte @Nullable[] key) { if (key != null) { return toString(key); @@ -451,15 +308,6 @@ public class LLUtils { return buf.hashCode(); } - public static boolean isSet(ScoreDoc[] scoreDocs) { - for (ScoreDoc scoreDoc : scoreDocs) { - if (scoreDoc == null) { - return false; - } - } - return true; - } - public static boolean isBoundedRange(LLRange rangeShared) { return rangeShared.hasMin() && rangeShared.hasMax(); } @@ -625,11 +473,7 @@ public class LLUtils { private static void closeResource(Object next, boolean manual) { if (next instanceof SafeCloseable closeable) { if (manual || closeable instanceof DiscardingCloseable) { - if (!manual && !LuceneUtils.isLuceneThread() && closeable instanceof LuceneCloseable luceneCloseable) { - luceneCloseable.close(); - } else { - closeable.close(); - } + closeable.close(); } } else if (next instanceof List iterable) { iterable.forEach(obj -> closeResource(obj, manual)); @@ -680,18 +524,4 @@ public class LLUtils { public static Buf wrapNullable(byte[] array) { return array != null ? Buf.wrap(array) : null; } - - private static class FakeBytesRefBuilder extends BytesRefBuilder { - - private final LLTerm term; - - public FakeBytesRefBuilder(LLTerm term) { - this.term = term; - } - - @Override - public BytesRef toBytesRef() { - return term.getValueBytesRef(); - } - } } diff --git a/src/main/java/it/cavallium/dbengine/database/disk/CachedIndexSearcherManager.java b/src/main/java/it/cavallium/dbengine/database/disk/CachedIndexSearcherManager.java deleted file mode 100644 index 30cb399..0000000 --- a/src/main/java/it/cavallium/dbengine/database/disk/CachedIndexSearcherManager.java +++ /dev/null @@ -1,249 +0,0 @@ -package it.cavallium.dbengine.database.disk; - -import com.google.common.cache.CacheBuilder; -import com.google.common.cache.CacheLoader; -import com.google.common.cache.LoadingCache; -import it.cavallium.dbengine.database.LLSnapshot; -import it.cavallium.dbengine.lucene.LuceneCloseable; -import it.cavallium.dbengine.utils.SimpleResource; -import java.io.IOException; -import it.cavallium.dbengine.utils.DBException; -import java.time.Duration; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.ScheduledExecutorService; -import java.util.concurrent.ScheduledFuture; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicLong; -import java.util.concurrent.locks.LockSupport; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.SearcherFactory; -import org.apache.lucene.search.SearcherManager; -import org.apache.lucene.search.similarities.Similarity; -import org.apache.lucene.store.AlreadyClosedException; -import org.jetbrains.annotations.NotNull; -import org.jetbrains.annotations.Nullable; - -// todo: deduplicate code between Cached and Simple searcher managers -public class CachedIndexSearcherManager extends SimpleResource implements IndexSearcherManager, LuceneCloseable { - - private static final Logger LOG = LogManager.getLogger(SimpleIndexSearcherManager.class); - private static final ExecutorService SEARCH_EXECUTOR = Executors.newFixedThreadPool( - Runtime.getRuntime().availableProcessors(), - new LuceneThreadFactory("lucene-search") - .setDaemon(true).withGroup(new ThreadGroup("lucene-search")) - ); - private static final SearcherFactory SEARCHER_FACTORY = new ExecutorSearcherFactory(SEARCH_EXECUTOR); - - @Nullable - private final SnapshotsManager snapshotsManager; - private final Similarity similarity; - private final SearcherManager searcherManager; - - private final AtomicLong activeSearchers = new AtomicLong(0); - private final AtomicLong activeRefreshes = new AtomicLong(0); - - private final LoadingCache cachedSnapshotSearchers; - private final ScheduledFuture refreshSubscription; - - public CachedIndexSearcherManager(IndexWriter indexWriter, - @Nullable SnapshotsManager snapshotsManager, - ScheduledExecutorService luceneHeavyTasksScheduler, - Similarity similarity, - boolean applyAllDeletes, - boolean writeAllDeletes, - Duration queryRefreshDebounceTime) { - this.snapshotsManager = snapshotsManager; - this.similarity = similarity; - - try { - this.searcherManager = new SearcherManager(indexWriter, applyAllDeletes, writeAllDeletes, SEARCHER_FACTORY); - } catch (IOException e) { - throw new DBException(e); - } - - refreshSubscription = luceneHeavyTasksScheduler.scheduleAtFixedRate(() -> { - try { - maybeRefresh(); - } catch (Exception ex) { - LOG.error("Failed to refresh the searcher manager", ex); - } - }, - queryRefreshDebounceTime.toMillis(), - queryRefreshDebounceTime.toMillis(), - TimeUnit.MILLISECONDS - ); - - this.cachedSnapshotSearchers = CacheBuilder.newBuilder() - .expireAfterWrite(queryRefreshDebounceTime) - // Max 3 cached non-main index writers - .maximumSize(3) - .build(new CacheLoader<>() { - @Override - public LLIndexSearcher load(@NotNull LLSnapshot snapshot) { - return CachedIndexSearcherManager.this.generateCachedSearcher(snapshot); - } - }); - } - - private LLIndexSearcher generateCachedSearcher(@Nullable LLSnapshot snapshot) { - if (isClosed()) { - return null; - } - activeSearchers.incrementAndGet(); - try { - IndexSearcher indexSearcher; - boolean fromSnapshot; - if (snapshotsManager == null || snapshot == null) { - try { - indexSearcher = searcherManager.acquire(); - } catch (IOException ex) { - throw new DBException(ex); - } - fromSnapshot = false; - } else { - indexSearcher = snapshotsManager.resolveSnapshot(snapshot).getIndexSearcher(SEARCH_EXECUTOR); - fromSnapshot = true; - } - indexSearcher.setSimilarity(similarity); - assert indexSearcher.getIndexReader().getRefCount() > 0; - LLIndexSearcher llIndexSearcher; - if (fromSnapshot) { - llIndexSearcher = new SnapshotIndexSearcher(indexSearcher); - } else { - llIndexSearcher = new MainIndexSearcher(indexSearcher, searcherManager); - } - return llIndexSearcher; - } catch (Throwable ex) { - activeSearchers.decrementAndGet(); - throw ex; - } - } - - private void dropCachedIndexSearcher() { - // This shouldn't happen more than once per searcher. - activeSearchers.decrementAndGet(); - } - - @Override - public void maybeRefreshBlocking() { - try { - activeRefreshes.incrementAndGet(); - searcherManager.maybeRefreshBlocking(); - } catch (AlreadyClosedException ignored) { - - } catch (IOException e) { - throw new DBException(e); - } finally { - activeRefreshes.decrementAndGet(); - } - } - - @Override - public void maybeRefresh() { - try { - activeRefreshes.incrementAndGet(); - searcherManager.maybeRefresh(); - } catch (AlreadyClosedException ignored) { - - } catch (IOException e) { - throw new DBException(e); - } finally { - activeRefreshes.decrementAndGet(); - } - } - - @Override - public LLIndexSearcher retrieveSearcher(@Nullable LLSnapshot snapshot) { - if (snapshot == null) { - return this.generateCachedSearcher(null); - } else { - return this.cachedSnapshotSearchers.getUnchecked(snapshot); - } - } - - @Override - protected void onClose() { - LOG.debug("Closing IndexSearcherManager..."); - long initTime = System.nanoTime(); - refreshSubscription.cancel(false); - while (!refreshSubscription.isDone() && (System.nanoTime() - initTime) <= 240000000000L) { - LockSupport.parkNanos(50000000); - } - refreshSubscription.cancel(true); - LOG.debug("Closed IndexSearcherManager"); - LOG.debug("Closing refreshes..."); - initTime = System.nanoTime(); - while (activeRefreshes.get() > 0 && (System.nanoTime() - initTime) <= 15000000000L) { - LockSupport.parkNanos(50000000); - } - LOG.debug("Closed refreshes..."); - LOG.debug("Closing active searchers..."); - initTime = System.nanoTime(); - while (activeSearchers.get() > 0 && (System.nanoTime() - initTime) <= 15000000000L) { - LockSupport.parkNanos(50000000); - } - LOG.debug("Closed active searchers"); - LOG.debug("Stopping searcher executor..."); - cachedSnapshotSearchers.invalidateAll(); - cachedSnapshotSearchers.cleanUp(); - SEARCH_EXECUTOR.shutdown(); - try { - if (!SEARCH_EXECUTOR.awaitTermination(15, TimeUnit.SECONDS)) { - SEARCH_EXECUTOR.shutdownNow(); - } - } catch (InterruptedException e) { - LOG.error("Failed to stop executor", e); - } - LOG.debug("Stopped searcher executor"); - } - - public long getActiveSearchers() { - return activeSearchers.get(); - } - - public long getActiveRefreshes() { - return activeRefreshes.get(); - } - - private class MainIndexSearcher extends LLIndexSearcherImpl implements LuceneCloseable { - - public MainIndexSearcher(IndexSearcher indexSearcher, SearcherManager searcherManager) { - super(indexSearcher, () -> releaseOnCleanup(searcherManager, indexSearcher)); - } - - private static void releaseOnCleanup(SearcherManager searcherManager, IndexSearcher indexSearcher) { - try { - LOG.warn("An index searcher was not closed!"); - searcherManager.release(indexSearcher); - } catch (IOException ex) { - LOG.error("Failed to release the index searcher during cleanup: {}", indexSearcher, ex); - } - } - - @Override - public void onClose() { - dropCachedIndexSearcher(); - try { - searcherManager.release(indexSearcher); - } catch (IOException ex) { - throw new DBException(ex); - } - } - } - - private class SnapshotIndexSearcher extends LLIndexSearcherImpl { - - public SnapshotIndexSearcher(IndexSearcher indexSearcher) { - super(indexSearcher); - } - - @Override - public void onClose() { - dropCachedIndexSearcher(); - } - } -} diff --git a/src/main/java/it/cavallium/dbengine/database/disk/ExecutorSearcherFactory.java b/src/main/java/it/cavallium/dbengine/database/disk/ExecutorSearcherFactory.java deleted file mode 100644 index 28ab6d9..0000000 --- a/src/main/java/it/cavallium/dbengine/database/disk/ExecutorSearcherFactory.java +++ /dev/null @@ -1,20 +0,0 @@ -package it.cavallium.dbengine.database.disk; - -import java.util.concurrent.Executor; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.SearcherFactory; - -public class ExecutorSearcherFactory extends SearcherFactory { - - private final Executor executor; - - public ExecutorSearcherFactory(Executor executor) { - this.executor = executor; - } - - @Override - public IndexSearcher newSearcher(IndexReader reader, IndexReader previousReader) { - return new IndexSearcher(reader, executor); - } -} diff --git a/src/main/java/it/cavallium/dbengine/database/disk/IndexSearcherManager.java b/src/main/java/it/cavallium/dbengine/database/disk/IndexSearcherManager.java deleted file mode 100644 index 465bacb..0000000 --- a/src/main/java/it/cavallium/dbengine/database/disk/IndexSearcherManager.java +++ /dev/null @@ -1,16 +0,0 @@ -package it.cavallium.dbengine.database.disk; - -import it.cavallium.dbengine.database.LLSnapshot; -import it.cavallium.dbengine.database.SafeCloseable; -import java.io.IOException; -import java.util.function.Supplier; -import org.jetbrains.annotations.Nullable; - -public interface IndexSearcherManager extends SafeCloseable { - - void maybeRefreshBlocking(); - - void maybeRefresh(); - - LLIndexSearcher retrieveSearcher(@Nullable LLSnapshot snapshot); -} diff --git a/src/main/java/it/cavallium/dbengine/database/disk/LLIndexSearcher.java b/src/main/java/it/cavallium/dbengine/database/disk/LLIndexSearcher.java deleted file mode 100644 index 722aa13..0000000 --- a/src/main/java/it/cavallium/dbengine/database/disk/LLIndexSearcher.java +++ /dev/null @@ -1,28 +0,0 @@ -package it.cavallium.dbengine.database.disk; - -import it.cavallium.dbengine.database.DiscardingCloseable; -import it.cavallium.dbengine.utils.SimpleResource; -import java.util.concurrent.atomic.AtomicBoolean; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import org.apache.lucene.search.IndexSearcher; - -public abstract class LLIndexSearcher extends SimpleResource implements DiscardingCloseable { - - protected static final Logger LOG = LogManager.getLogger(LLIndexSearcher.class); - - public LLIndexSearcher() { - super(); - } - - public LLIndexSearcher(Runnable cleanAction) { - super(cleanAction); - } - - public IndexSearcher getIndexSearcher() { - ensureOpen(); - return getIndexSearcherInternal(); - } - - protected abstract IndexSearcher getIndexSearcherInternal(); -} diff --git a/src/main/java/it/cavallium/dbengine/database/disk/LLIndexSearcherImpl.java b/src/main/java/it/cavallium/dbengine/database/disk/LLIndexSearcherImpl.java deleted file mode 100644 index 9610598..0000000 --- a/src/main/java/it/cavallium/dbengine/database/disk/LLIndexSearcherImpl.java +++ /dev/null @@ -1,27 +0,0 @@ -package it.cavallium.dbengine.database.disk; - -import java.util.concurrent.atomic.AtomicBoolean; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import org.apache.lucene.search.IndexSearcher; - -public abstract class LLIndexSearcherImpl extends LLIndexSearcher { - - protected static final Logger LOG = LogManager.getLogger(LLIndexSearcherImpl.class); - - protected final IndexSearcher indexSearcher; - - public LLIndexSearcherImpl(IndexSearcher indexSearcher) { - super(); - this.indexSearcher = indexSearcher; - } - - public LLIndexSearcherImpl(IndexSearcher indexSearcher, Runnable cleanAction) { - super(cleanAction); - this.indexSearcher = indexSearcher; - } - - public IndexSearcher getIndexSearcherInternal() { - return indexSearcher; - } -} diff --git a/src/main/java/it/cavallium/dbengine/database/disk/LLIndexSearchers.java b/src/main/java/it/cavallium/dbengine/database/disk/LLIndexSearchers.java deleted file mode 100644 index 4ea39ee..0000000 --- a/src/main/java/it/cavallium/dbengine/database/disk/LLIndexSearchers.java +++ /dev/null @@ -1,128 +0,0 @@ -package it.cavallium.dbengine.database.disk; - -import it.cavallium.dbengine.database.DiscardingCloseable; -import it.cavallium.dbengine.lucene.LuceneCloseable; -import it.cavallium.dbengine.lucene.searcher.ShardIndexSearcher; -import it.cavallium.dbengine.utils.SimpleResource; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.Objects; -import org.apache.lucene.search.IndexSearcher; - -public interface LLIndexSearchers extends DiscardingCloseable { - - static LLIndexSearchers of(List indexSearchers) { - return new ShardedIndexSearchers(indexSearchers); - } - - static UnshardedIndexSearchers unsharded(LLIndexSearcher indexSearcher) { - return new UnshardedIndexSearchers(indexSearcher); - } - - List shards(); - - List llShards(); - - IndexSearcher shard(int shardIndex); - - LLIndexSearcher llShard(int shardIndex); - - class UnshardedIndexSearchers implements LLIndexSearchers, LuceneCloseable { - - private final LLIndexSearcher indexSearcher; - - public UnshardedIndexSearchers(LLIndexSearcher indexSearcher) { - Objects.requireNonNull(indexSearcher); - this.indexSearcher = indexSearcher; - } - - @Override - public List shards() { - return List.of(indexSearcher.getIndexSearcher()); - } - - @Override - public List llShards() { - return Collections.singletonList(indexSearcher); - } - - @Override - public IndexSearcher shard(int shardIndex) { - if (shardIndex != -1) { - throw new IndexOutOfBoundsException("Shard index " + shardIndex + " is invalid, this is a unsharded index"); - } - return indexSearcher.getIndexSearcher(); - } - - @Override - public LLIndexSearcher llShard(int shardIndex) { - if (shardIndex != -1) { - throw new IndexOutOfBoundsException("Shard index " + shardIndex + " is invalid, this is a unsharded index"); - } - return indexSearcher; - } - - public IndexSearcher shard() { - return this.shard(-1); - } - - public LLIndexSearcher llShard() { - return this.llShard(-1); - } - - @Override - public void close() { - indexSearcher.close(); - } - } - - class ShardedIndexSearchers implements LLIndexSearchers, LuceneCloseable { - - private final List indexSearchers; - private final List indexSearchersVals; - - public ShardedIndexSearchers(List indexSearchers) { - List shardedIndexSearchersVals = new ArrayList<>(indexSearchers.size()); - for (LLIndexSearcher indexSearcher : indexSearchers) { - shardedIndexSearchersVals.add(indexSearcher.getIndexSearcher()); - } - shardedIndexSearchersVals = ShardIndexSearcher.create(shardedIndexSearchersVals); - this.indexSearchers = indexSearchers; - this.indexSearchersVals = shardedIndexSearchersVals; - } - - @Override - public List shards() { - return Collections.unmodifiableList(indexSearchersVals); - } - - @Override - public List llShards() { - return Collections.unmodifiableList(indexSearchers); - } - - @Override - public IndexSearcher shard(int shardIndex) { - if (shardIndex < 0) { - throw new IndexOutOfBoundsException("Shard index " + shardIndex + " is invalid"); - } - return indexSearchersVals.get(shardIndex); - } - - @Override - public LLIndexSearcher llShard(int shardIndex) { - if (shardIndex < 0) { - throw new IndexOutOfBoundsException("Shard index " + shardIndex + " is invalid"); - } - return indexSearchers.get(shardIndex); - } - - @Override - public void close() { - for (LLIndexSearcher indexSearcher : indexSearchers) { - indexSearcher.close(); - } - } - } -} diff --git a/src/main/java/it/cavallium/dbengine/database/disk/LLLocalDatabaseConnection.java b/src/main/java/it/cavallium/dbengine/database/disk/LLLocalDatabaseConnection.java index 7b842d7..9845bf3 100644 --- a/src/main/java/it/cavallium/dbengine/database/disk/LLLocalDatabaseConnection.java +++ b/src/main/java/it/cavallium/dbengine/database/disk/LLLocalDatabaseConnection.java @@ -2,14 +2,8 @@ package it.cavallium.dbengine.database.disk; import io.micrometer.core.instrument.MeterRegistry; import it.cavallium.dbengine.database.LLDatabaseConnection; -import it.cavallium.dbengine.database.LLLuceneIndex; -import it.cavallium.dbengine.lucene.LuceneHacks; import it.cavallium.dbengine.rpc.current.data.Column; import it.cavallium.dbengine.rpc.current.data.DatabaseOptions; -import it.cavallium.dbengine.rpc.current.data.IndicizerAnalyzers; -import it.cavallium.dbengine.rpc.current.data.IndicizerSimilarities; -import it.cavallium.dbengine.rpc.current.data.LuceneIndexStructure; -import it.cavallium.dbengine.rpc.current.data.LuceneOptions; import it.cavallium.dbengine.utils.DBException; import java.io.IOException; import java.nio.file.Files; @@ -18,7 +12,6 @@ import java.util.LinkedList; import java.util.List; import java.util.StringJoiner; import java.util.concurrent.atomic.AtomicBoolean; -import org.jetbrains.annotations.Nullable; public class LLLocalDatabaseConnection implements LLDatabaseConnection { @@ -75,38 +68,6 @@ public class LLLocalDatabaseConnection implements LLDatabaseConnection { return basePath.resolve("database_" + databaseName); } - @Override - public LLLuceneIndex getLuceneIndex(String clusterName, - LuceneIndexStructure indexStructure, - IndicizerAnalyzers indicizerAnalyzers, - IndicizerSimilarities indicizerSimilarities, - LuceneOptions luceneOptions, - @Nullable LuceneHacks luceneHacks) { - if (clusterName == null) { - throw new IllegalArgumentException("Cluster name must be set"); - } - if (indexStructure.activeShards().size() != 1) { - return new LLLocalMultiLuceneIndex(meterRegistry, - clusterName, - indexStructure.activeShards(), - indexStructure.totalShards(), - indicizerAnalyzers, - indicizerSimilarities, - luceneOptions, - luceneHacks - ); - } else { - return new LLLocalLuceneIndex(meterRegistry, - clusterName, - indexStructure.activeShards().getInt(0), - indicizerAnalyzers, - indicizerSimilarities, - luceneOptions, - luceneHacks - ); - } - } - @Override public void disconnect() { if (connected.compareAndSet(true, false)) { diff --git a/src/main/java/it/cavallium/dbengine/database/disk/LLLocalLuceneIndex.java b/src/main/java/it/cavallium/dbengine/database/disk/LLLocalLuceneIndex.java deleted file mode 100644 index db505d2..0000000 --- a/src/main/java/it/cavallium/dbengine/database/disk/LLLocalLuceneIndex.java +++ /dev/null @@ -1,882 +0,0 @@ -package it.cavallium.dbengine.database.disk; - -import static it.cavallium.dbengine.database.LLUtils.MARKER_LUCENE; -import static it.cavallium.dbengine.database.LLUtils.toDocument; -import static it.cavallium.dbengine.database.LLUtils.toFields; -import static it.cavallium.dbengine.lucene.searcher.GlobalQueryRewrite.NO_REWRITE; -import static it.cavallium.dbengine.lucene.searcher.LuceneSearchResult.EMPTY_COUNT; -import static it.cavallium.dbengine.utils.StreamUtils.collect; -import static it.cavallium.dbengine.utils.StreamUtils.fastListing; -import static java.util.Objects.requireNonNull; - -import com.google.common.collect.Multimap; -import io.micrometer.core.instrument.Counter; -import io.micrometer.core.instrument.MeterRegistry; -import io.micrometer.core.instrument.Tag; -import io.micrometer.core.instrument.Timer; -import it.cavallium.dbengine.client.Backuppable; -import it.cavallium.dbengine.client.IBackuppable; -import it.cavallium.dbengine.client.query.QueryParser; -import it.cavallium.dbengine.client.query.current.data.Query; -import it.cavallium.dbengine.client.query.current.data.QueryParams; -import it.cavallium.dbengine.client.query.current.data.TotalHitsCount; -import it.cavallium.dbengine.database.LLIndexRequest; -import it.cavallium.dbengine.database.LLLuceneIndex; -import it.cavallium.dbengine.database.LLSearchResultShard; -import it.cavallium.dbengine.database.LLSnapshot; -import it.cavallium.dbengine.database.LLSoftUpdateDocument; -import it.cavallium.dbengine.database.LLTerm; -import it.cavallium.dbengine.database.LLUpdateDocument; -import it.cavallium.dbengine.database.LLUpdateFields; -import it.cavallium.dbengine.database.LLUtils; -import it.cavallium.dbengine.lucene.LuceneCloseable; -import it.cavallium.dbengine.lucene.LuceneConcurrentMergeScheduler; -import it.cavallium.dbengine.lucene.LuceneHacks; -import it.cavallium.dbengine.lucene.LuceneUtils; -import it.cavallium.dbengine.lucene.collector.Buckets; -import it.cavallium.dbengine.lucene.directory.Lucene91CodecWithNoFieldCompression; -import it.cavallium.dbengine.lucene.mlt.MoreLikeThisTransformer; -import it.cavallium.dbengine.lucene.searcher.AdaptiveLocalSearcher; -import it.cavallium.dbengine.lucene.searcher.BucketParams; -import it.cavallium.dbengine.lucene.searcher.DecimalBucketMultiSearcher; -import it.cavallium.dbengine.lucene.searcher.LocalQueryParams; -import it.cavallium.dbengine.lucene.searcher.LocalSearcher; -import it.cavallium.dbengine.lucene.searcher.LuceneSearchResult; -import it.cavallium.dbengine.rpc.current.data.IndicizerAnalyzers; -import it.cavallium.dbengine.rpc.current.data.IndicizerSimilarities; -import it.cavallium.dbengine.rpc.current.data.LuceneOptions; -import it.cavallium.dbengine.utils.SimpleResource; -import java.io.IOException; -import it.cavallium.dbengine.utils.DBException; -import java.time.Duration; -import java.util.ArrayList; -import java.util.List; -import java.util.Map.Entry; -import java.util.Objects; -import java.util.concurrent.Callable; -import java.util.concurrent.CompletionException; -import java.util.concurrent.Executors; -import java.util.concurrent.Phaser; -import java.util.concurrent.ScheduledExecutorService; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.LongAdder; -import java.util.concurrent.locks.ReentrantLock; -import java.util.function.Function; -import java.util.function.Supplier; -import java.util.logging.Level; -import java.util.stream.Stream; -import org.apache.commons.lang3.time.StopWatch; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper; -import org.apache.lucene.index.ConcurrentMergeScheduler; -import org.apache.lucene.index.IndexDeletionPolicy; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.index.MergeScheduler; -import org.apache.lucene.index.NoMergePolicy; -import org.apache.lucene.index.SerialMergeScheduler; -import org.apache.lucene.index.SnapshotDeletionPolicy; -import org.apache.lucene.search.similarities.Similarity; -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.MMapDirectory; -import org.apache.lucene.util.IOSupplier; -import org.jetbrains.annotations.NotNull; -import org.jetbrains.annotations.Nullable; - -public class LLLocalLuceneIndex extends SimpleResource implements IBackuppable, LLLuceneIndex, LuceneCloseable { - - protected static final Logger logger = LogManager.getLogger(LLLocalLuceneIndex.class); - - private final ReentrantLock shutdownLock = new ReentrantLock(); - /** - * Global lucene index scheduler. - * There is only a single thread globally to not overwhelm the disk with - * concurrent commits or concurrent refreshes. - */ - private static final ScheduledExecutorService luceneHeavyTasksScheduler = Executors.newScheduledThreadPool(4, - new LuceneThreadFactory("heavy-tasks").setDaemon(true).withGroup(new ThreadGroup("lucene-heavy-tasks")) - ); - private static final ScheduledExecutorService luceneWriteScheduler = Executors.newScheduledThreadPool(8, - new LuceneThreadFactory("lucene-write").setDaemon(true).withGroup(new ThreadGroup("lucene-write")) - ); - private static final ScheduledExecutorService bulkScheduler = luceneWriteScheduler; - - private static final boolean ENABLE_SNAPSHOTS - = Boolean.parseBoolean(System.getProperty("it.cavallium.dbengine.lucene.snapshot.enable", "true")); - - private static final boolean CACHE_SEARCHER_MANAGER - = Boolean.parseBoolean(System.getProperty("it.cavallium.dbengine.lucene.cachedsearchermanager.enable", "true")); - - private static final LLSnapshot DUMMY_SNAPSHOT = new LLSnapshot(-1); - - private final LocalSearcher localSearcher; - private final DecimalBucketMultiSearcher decimalBucketMultiSearcher = new DecimalBucketMultiSearcher(); - - private final Counter startedDocIndexings; - private final Counter endeddDocIndexings; - private final Timer docIndexingTime; - private final Timer snapshotTime; - private final Timer flushTime; - private final Timer commitTime; - private final Timer mergeTime; - private final Timer refreshTime; - - private final String shardName; - private final IndexWriter indexWriter; - private final SnapshotsManager snapshotsManager; - private final IndexSearcherManager searcherManager; - private final PerFieldAnalyzerWrapper luceneAnalyzer; - private final Similarity luceneSimilarity; - private final Directory directory; - private final LuceneBackuppable backuppable; - private final boolean lowMemory; - - private final Phaser activeTasks = new Phaser(1); - - public LLLocalLuceneIndex(MeterRegistry meterRegistry, - @NotNull String clusterName, - int shardIndex, - IndicizerAnalyzers indicizerAnalyzers, - IndicizerSimilarities indicizerSimilarities, - LuceneOptions luceneOptions, - @Nullable LuceneHacks luceneHacks) { - - if (clusterName.isBlank()) { - throw new DBException("Empty lucene database name"); - } - if (!MMapDirectory.UNMAP_SUPPORTED) { - logger.error("Unmap is unsupported, lucene will run slower: {}", MMapDirectory.UNMAP_NOT_SUPPORTED_REASON); - } else { - logger.debug("Lucene MMap is supported"); - } - this.lowMemory = luceneOptions.lowMemory(); - this.shardName = LuceneUtils.getStandardName(clusterName, shardIndex); - try { - this.directory = LuceneUtils.createLuceneDirectory(luceneOptions.directoryOptions(), shardName); - } catch (IOException e) { - throw new DBException(e); - } - boolean isFilesystemCompressed = LuceneUtils.getIsFilesystemCompressed(luceneOptions.directoryOptions()); - - this.luceneAnalyzer = LuceneUtils.toPerFieldAnalyzerWrapper(indicizerAnalyzers); - this.luceneSimilarity = LuceneUtils.toPerFieldSimilarityWrapper(indicizerSimilarities); - - var maxInMemoryResultEntries = luceneOptions.maxInMemoryResultEntries(); - if (luceneHacks != null && luceneHacks.customLocalSearcher() != null) { - localSearcher = luceneHacks.customLocalSearcher().get(); - } else { - localSearcher = new AdaptiveLocalSearcher(maxInMemoryResultEntries); - } - - var indexWriterConfig = new IndexWriterConfig(luceneAnalyzer); - IndexDeletionPolicy deletionPolicy; - deletionPolicy = requireNonNull(indexWriterConfig.getIndexDeletionPolicy()); - if (ENABLE_SNAPSHOTS) { - deletionPolicy = new SnapshotDeletionPolicy(deletionPolicy); - } - indexWriterConfig.setIndexDeletionPolicy(deletionPolicy); - indexWriterConfig.setCommitOnClose(true); - int writerSchedulerMaxThreadCount; - MergeScheduler mergeScheduler; - if (lowMemory) { - mergeScheduler = new SerialMergeScheduler(); - writerSchedulerMaxThreadCount = 1; - } else { - //noinspection resource - ConcurrentMergeScheduler concurrentMergeScheduler = new LuceneConcurrentMergeScheduler(); - // false means SSD, true means HDD - boolean spins = false; - concurrentMergeScheduler.setDefaultMaxMergesAndThreads(spins); - // It's true by default, but this makes sure it's true if it's a managed path - if (LuceneUtils.getManagedPath(luceneOptions.directoryOptions()).isPresent()) { - concurrentMergeScheduler.enableAutoIOThrottle(); - } - writerSchedulerMaxThreadCount = concurrentMergeScheduler.getMaxThreadCount(); - mergeScheduler = concurrentMergeScheduler; - } - if (isFilesystemCompressed) { - indexWriterConfig.setUseCompoundFile(false); - indexWriterConfig.setCodec(new Lucene91CodecWithNoFieldCompression()); - } - logger.trace("WriterSchedulerMaxThreadCount: {}", writerSchedulerMaxThreadCount); - indexWriterConfig.setMergeScheduler(mergeScheduler); - indexWriterConfig.setMergePolicy(LuceneUtils.getMergePolicy(luceneOptions)); - if (luceneOptions.indexWriterRAMBufferSizeMB().isPresent()) { - indexWriterConfig.setRAMBufferSizeMB(luceneOptions.indexWriterRAMBufferSizeMB().get()); - } - if (luceneOptions.indexWriterMaxBufferedDocs().isPresent()) { - indexWriterConfig.setMaxBufferedDocs(luceneOptions.indexWriterMaxBufferedDocs().get()); - } - if (luceneOptions.indexWriterReaderPooling().isPresent()) { - indexWriterConfig.setReaderPooling(luceneOptions.indexWriterReaderPooling().get()); - } - indexWriterConfig.setSimilarity(getLuceneSimilarity()); - try { - this.indexWriter = new IndexWriter(directory, indexWriterConfig); - } catch (IOException e) { - throw new DBException(e); - } - if (ENABLE_SNAPSHOTS) { - this.snapshotsManager = new SnapshotsManager(indexWriter, (SnapshotDeletionPolicy) deletionPolicy); - } else { - this.snapshotsManager = null; - } - SimpleIndexSearcherManager searcherManager; - if (CACHE_SEARCHER_MANAGER) { - searcherManager = new SimpleIndexSearcherManager(indexWriter, - snapshotsManager, - luceneHeavyTasksScheduler, - getLuceneSimilarity(), - luceneOptions.applyAllDeletes().orElse(true), - luceneOptions.writeAllDeletes().orElse(false), - luceneOptions.queryRefreshDebounceTime() - ); - } else { - searcherManager = new SimpleIndexSearcherManager(indexWriter, - snapshotsManager, - luceneHeavyTasksScheduler, - getLuceneSimilarity(), - luceneOptions.applyAllDeletes().orElse(true), - luceneOptions.writeAllDeletes().orElse(false), - luceneOptions.queryRefreshDebounceTime()); - } - this.searcherManager = searcherManager; - - this.startedDocIndexings = meterRegistry.counter("index.write.doc.started.counter", "index.name", clusterName); - this.endeddDocIndexings = meterRegistry.counter("index.write.doc.ended.counter", "index.name", clusterName); - this.docIndexingTime = Timer.builder("index.write.doc.timer").publishPercentiles(0.2, 0.5, 0.95).publishPercentileHistogram().tag("index.name", clusterName).register(meterRegistry); - this.snapshotTime = Timer.builder("index.write.snapshot.timer").publishPercentiles(0.2, 0.5, 0.95).publishPercentileHistogram().tag("index.name", clusterName).register(meterRegistry); - this.flushTime = Timer.builder("index.write.flush.timer").publishPercentiles(0.2, 0.5, 0.95).publishPercentileHistogram().tag("index.name", clusterName).register(meterRegistry); - this.commitTime = Timer.builder("index.write.commit.timer").publishPercentiles(0.2, 0.5, 0.95).publishPercentileHistogram().tag("index.name", clusterName).register(meterRegistry); - this.mergeTime = Timer.builder("index.write.merge.timer").publishPercentiles(0.2, 0.5, 0.95).publishPercentileHistogram().tag("index.name", clusterName).register(meterRegistry); - this.refreshTime = Timer.builder("index.search.refresh.timer").publishPercentiles(0.2, 0.5, 0.95).publishPercentileHistogram().tag("index.name", clusterName).register(meterRegistry); - meterRegistry.gauge("index.snapshot.counter", List.of(Tag.of("index.name", clusterName)), this, LLLocalLuceneIndex::getSnapshotsCount); - meterRegistry.gauge("index.write.flushing.bytes", List.of(Tag.of("index.name", clusterName)), this, LLLocalLuceneIndex::getIndexWriterFlushingBytes); - meterRegistry.gauge("index.write.sequence.completed.max", List.of(Tag.of("index.name", clusterName)), this, LLLocalLuceneIndex::getIndexWriterMaxCompletedSequenceNumber); - meterRegistry.gauge("index.write.doc.pending.counter", List.of(Tag.of("index.name", clusterName)), this, LLLocalLuceneIndex::getIndexWriterPendingNumDocs); - meterRegistry.gauge("index.write.segment.merging.counter", List.of(Tag.of("index.name", clusterName)), this, LLLocalLuceneIndex::getIndexWriterMergingSegmentsSize); - meterRegistry.gauge("index.directory.deletion.pending.counter", List.of(Tag.of("index.name", clusterName)), this, LLLocalLuceneIndex::getDirectoryPendingDeletionsCount); - meterRegistry.gauge("index.doc.counter", List.of(Tag.of("index.name", clusterName)), this, LLLocalLuceneIndex::getDocCount); - meterRegistry.gauge("index.doc.max", List.of(Tag.of("index.name", clusterName)), this, LLLocalLuceneIndex::getMaxDoc); - meterRegistry.gauge("index.searcher.refreshes.active.count", - List.of(Tag.of("index.name", clusterName)), - searcherManager, - SimpleIndexSearcherManager::getActiveRefreshes - ); - meterRegistry.gauge("index.searcher.searchers.active.count", - List.of(Tag.of("index.name", clusterName)), - searcherManager, - SimpleIndexSearcherManager::getActiveSearchers - ); - - // Start scheduled tasks - var commitMillis = luceneOptions.commitDebounceTime().toMillis(); - luceneHeavyTasksScheduler.scheduleAtFixedRate(this::scheduledCommit, commitMillis, commitMillis, - TimeUnit.MILLISECONDS); - - this.backuppable = new LuceneBackuppable(); - } - - private Similarity getLuceneSimilarity() { - return luceneSimilarity; - } - - @Override - public String getLuceneIndexName() { - return shardName; - } - - @Override - public LLSnapshot takeSnapshot() { - return runTask(() -> { - if (snapshotsManager == null) { - return DUMMY_SNAPSHOT; - } - try { - return snapshotTime.recordCallable(snapshotsManager::takeSnapshot); - } catch (Exception e) { - throw new DBException("Failed to take snapshot", e); - } - }); - } - - private V runTask(Supplier supplier) { - if (isClosed()) { - throw new IllegalStateException("Lucene index is closed"); - } else { - activeTasks.register(); - try { - return supplier.get(); - } finally { - activeTasks.arriveAndDeregister(); - } - } - } - - @Override - public void releaseSnapshot(LLSnapshot snapshot) { - if (snapshotsManager == null) { - if (snapshot != null && !Objects.equals(snapshot, DUMMY_SNAPSHOT)) { - throw new IllegalStateException("Can't release snapshot " + snapshot); - } - return; - } - snapshotsManager.releaseSnapshot(snapshot); - } - - @Override - public void addDocument(LLTerm key, LLUpdateDocument doc) { - runTask(() -> { - try { - docIndexingTime.recordCallable(() -> { - startedDocIndexings.increment(); - try { - indexWriter.addDocument(toDocument(doc)); - } finally { - endeddDocIndexings.increment(); - } - return null; - }); - } catch (Exception e) { - throw new DBException("Failed to add document", e); - } - logger.trace(MARKER_LUCENE, "Added document {}: {}", key, doc); - return null; - }); - } - - @Override - public long addDocuments(boolean atomic, Stream> documents) { - return this.runTask(() -> { - if (!atomic) { - LongAdder count = new LongAdder(); - documents.forEach(document -> { - count.increment(); - LLUpdateDocument value = document.getValue(); - startedDocIndexings.increment(); - try { - docIndexingTime.recordCallable(() -> { - indexWriter.addDocument(toDocument(value)); - return null; - }); - } catch (Exception ex) { - throw new CompletionException("Failed to add document", ex); - } finally { - endeddDocIndexings.increment(); - } - logger.trace(MARKER_LUCENE, "Added document: {}", document); - }); - return count.sum(); - } else { - var documentsList = collect(documents, fastListing()); - assert documentsList != null; - var count = documentsList.size(); - StopWatch stopWatch = StopWatch.createStarted(); - try { - startedDocIndexings.increment(count); - try { - indexWriter.addDocuments(LLUtils.toDocumentsFromEntries(documentsList)); - } catch (IOException e) { - throw new DBException(e); - } finally { - endeddDocIndexings.increment(count); - } - } finally { - docIndexingTime.record(stopWatch.getTime(TimeUnit.MILLISECONDS) / Math.max(count, 1), - TimeUnit.MILLISECONDS - ); - } - return (long) documentsList.size(); - } - }); - } - - - @Override - public void deleteDocument(LLTerm id) { - this.runTask(() -> { - try { - return docIndexingTime.recordCallable(() -> { - startedDocIndexings.increment(); - try { - indexWriter.deleteDocuments(LLUtils.toTerm(id)); - } finally { - endeddDocIndexings.increment(); - } - return null; - }); - } catch (Exception e) { - throw new DBException("Failed to delete document", e); - } - }); - } - - @Override - public void update(LLTerm id, LLIndexRequest request) { - this.runTask(() -> { - try { - docIndexingTime.recordCallable(() -> { - startedDocIndexings.increment(); - try { - if (request instanceof LLUpdateDocument updateDocument) { - indexWriter.updateDocument(LLUtils.toTerm(id), toDocument(updateDocument)); - } else if (request instanceof LLSoftUpdateDocument softUpdateDocument) { - indexWriter.softUpdateDocument(LLUtils.toTerm(id), - toDocument(softUpdateDocument.items()), - toFields(softUpdateDocument.softDeleteItems()) - ); - } else if (request instanceof LLUpdateFields updateFields) { - indexWriter.updateDocValues(LLUtils.toTerm(id), toFields(updateFields.items())); - } else { - throw new UnsupportedOperationException("Unexpected request type: " + request); - } - } finally { - endeddDocIndexings.increment(); - } - return null; - }); - } catch (Exception e) { - throw new DBException("Failed to update document", e); - } - logger.trace(MARKER_LUCENE, "Updated document {}: {}", id, request); - return null; - }); - } - - @Override - public long updateDocuments(Stream> documents) { - return runTask(() -> { - var count = new LongAdder(); - documents.forEach(document -> { - count.increment(); - LLTerm key = document.getKey(); - LLUpdateDocument value = document.getValue(); - startedDocIndexings.increment(); - try { - docIndexingTime.recordCallable(() -> { - indexWriter.updateDocument(LLUtils.toTerm(key), toDocument(value)); - return null; - }); - logger.trace(MARKER_LUCENE, "Updated document {}: {}", key, value); - } catch (Exception ex) { - throw new CompletionException(ex); - } finally { - endeddDocIndexings.increment(); - } - }); - return count.sum(); - }); - } - - @Override - public void deleteAll() { - this.runTask(() -> { - shutdownLock.lock(); - try { - indexWriter.deleteAll(); - indexWriter.forceMergeDeletes(true); - indexWriter.commit(); - indexWriter.deleteUnusedFiles(); - } catch (IOException e) { - throw new DBException(e); - } finally { - shutdownLock.unlock(); - } - return null; - }); - } - - @Override - public Stream moreLikeThis(@Nullable LLSnapshot snapshot, - QueryParams queryParams, - @Nullable String keyFieldName, - Multimap mltDocumentFieldsFlux) { - LocalQueryParams localQueryParams = LuceneUtils.toLocalQueryParams(queryParams, luceneAnalyzer); - var searcher = this.searcherManager.retrieveSearcher(snapshot); - var transformer = new MoreLikeThisTransformer(mltDocumentFieldsFlux, luceneAnalyzer, luceneSimilarity); - - var result = localSearcher.collect(searcher, localQueryParams, keyFieldName, transformer, Function.identity()); - return Stream.of(new LLSearchResultShard(result.results(), result.totalHitsCount())); - } - - @Override - public Stream search(@Nullable LLSnapshot snapshot, QueryParams queryParams, - @Nullable String keyFieldName) { - var result = searchInternal(snapshot, queryParams, keyFieldName); - var shard = new LLSearchResultShard(result.results(), result.totalHitsCount()); - return Stream.of(shard); - } - - public LuceneSearchResult searchInternal(@Nullable LLSnapshot snapshot, QueryParams queryParams, - @Nullable String keyFieldName) { - LocalQueryParams localQueryParams = LuceneUtils.toLocalQueryParams(queryParams, luceneAnalyzer); - try (var searcher = searcherManager.retrieveSearcher(snapshot)) { - if (searcher != null) { - return localSearcher.collect(searcher, localQueryParams, keyFieldName, NO_REWRITE, Function.identity()); - } else { - return LuceneSearchResult.EMPTY; - } - } - } - - @Override - public TotalHitsCount count(@Nullable LLSnapshot snapshot, Query query, @Nullable Duration timeout) { - var params = LuceneUtils.getCountQueryParams(query); - var result = this.searchInternal(snapshot, params, null); - if (result != null) { - return result.totalHitsCount(); - } else { - return EMPTY_COUNT; - } - } - - @Override - public Buckets computeBuckets(@Nullable LLSnapshot snapshot, - @NotNull List queries, - @Nullable Query normalizationQuery, - BucketParams bucketParams) { - List localQueries = new ArrayList<>(queries.size()); - for (Query query : queries) { - localQueries.add(QueryParser.toQuery(query, luceneAnalyzer)); - } - var localNormalizationQuery = QueryParser.toQuery(normalizationQuery, luceneAnalyzer); - try (LLIndexSearchers searchers = LLIndexSearchers.unsharded(searcherManager.retrieveSearcher(snapshot))) { - - return decimalBucketMultiSearcher.collectMulti(searchers, bucketParams, localQueries, localNormalizationQuery); - } - } - - public LLIndexSearcher retrieveSearcher(@Nullable LLSnapshot snapshot) { - return searcherManager.retrieveSearcher(snapshot); - } - - @Override - protected void onClose() { - logger.debug("Waiting IndexWriter tasks..."); - activeTasks.arriveAndAwaitAdvance(); - logger.debug("IndexWriter tasks ended"); - shutdownLock.lock(); - try { - logger.debug("Closing searcher manager..."); - searcherManager.close(); - logger.debug("Searcher manager closed"); - logger.debug("Closing IndexWriter..."); - indexWriter.close(); - directory.close(); - logger.debug("IndexWriter closed"); - } catch (IOException ex) { - throw new DBException(ex); - } finally { - shutdownLock.unlock(); - } - } - - @Override - public void flush() { - runTask(() -> { - if (activeTasks.isTerminated()) return null; - shutdownLock.lock(); - try { - if (isClosed()) { - return null; - } - flushTime.recordCallable(() -> { - indexWriter.flush(); - return null; - }); - } catch (Exception e) { - throw new DBException("Failed to flush", e); - } finally { - shutdownLock.unlock(); - } - return null; - }); - } - - @Override - public void waitForMerges() { - runTask(() -> { - if (activeTasks.isTerminated()) return null; - shutdownLock.lock(); - try { - if (isClosed()) { - return null; - } - var mergeScheduler = indexWriter.getConfig().getMergeScheduler(); - if (mergeScheduler instanceof ConcurrentMergeScheduler concurrentMergeScheduler) { - concurrentMergeScheduler.sync(); - } - } finally { - shutdownLock.unlock(); - } - return null; - }); - } - - @Override - public void waitForLastMerges() { - runTask(() -> { - if (activeTasks.isTerminated()) return null; - shutdownLock.lock(); - try { - if (isClosed()) { - return null; - } - indexWriter.getConfig().setMergePolicy(NoMergePolicy.INSTANCE); - var mergeScheduler = indexWriter.getConfig().getMergeScheduler(); - if (mergeScheduler instanceof ConcurrentMergeScheduler concurrentMergeScheduler) { - concurrentMergeScheduler.sync(); - } - indexWriter.deleteUnusedFiles(); - } catch (IOException e) { - throw new DBException(e); - } finally { - shutdownLock.unlock(); - } - return null; - }); - } - - @Override - public void refresh(boolean force) { - runTask(() -> { - activeTasks.register(); - try { - if (activeTasks.isTerminated()) return null; - shutdownLock.lock(); - try { - if (isClosed()) { - return null; - } - refreshTime.recordCallable(() -> { - if (force) { - searcherManager.maybeRefreshBlocking(); - } else { - searcherManager.maybeRefresh(); - } - return null; - }); - } catch (Exception e) { - throw new DBException("Failed to refresh", e); - } finally { - shutdownLock.unlock(); - } - } finally { - activeTasks.arriveAndDeregister(); - } - return null; - }); - } - - /** - * Internal method, do not use - */ - public void scheduledCommit() { - shutdownLock.lock(); - try { - if (isClosed()) { - return; - } - commitTime.recordCallable(() -> { - indexWriter.commit(); - indexWriter.deleteUnusedFiles(); - return null; - }); - } catch (Exception ex) { - logger.error(MARKER_LUCENE, "Failed to execute a scheduled commit", ex); - } finally { - shutdownLock.unlock(); - } - } - - /** - * Internal method, do not use - */ - public void scheduledMerge() { // Do not use. Merges are done automatically by merge policies - shutdownLock.lock(); - try { - if (isClosed()) { - return; - } - mergeTime.recordCallable(() -> { - indexWriter.maybeMerge(); - return null; - }); - } catch (Exception ex) { - logger.error(MARKER_LUCENE, "Failed to execute a scheduled merge", ex); - } finally { - shutdownLock.unlock(); - } - } - - @Override - public boolean isLowMemoryMode() { - return lowMemory; - } - - private double getSnapshotsCount() { - shutdownLock.lock(); - try { - if (isClosed()) { - return 0d; - } - if (snapshotsManager == null) return 0d; - return snapshotsManager.getSnapshotsCount(); - } finally { - shutdownLock.unlock(); - } - } - - private double getIndexWriterFlushingBytes() { - shutdownLock.lock(); - try { - if (isClosed()) { - return 0d; - } - return indexWriter.getFlushingBytes(); - } finally { - shutdownLock.unlock(); - } - } - - private double getIndexWriterMaxCompletedSequenceNumber() { - shutdownLock.lock(); - try { - if (isClosed()) { - return 0d; - } - return indexWriter.getMaxCompletedSequenceNumber(); - } finally { - shutdownLock.unlock(); - } - } - - private double getIndexWriterPendingNumDocs() { - shutdownLock.lock(); - try { - if (isClosed()) { - return 0d; - } - return indexWriter.getPendingNumDocs(); - } finally { - shutdownLock.unlock(); - } - } - - private double getIndexWriterMergingSegmentsSize() { - shutdownLock.lock(); - try { - if (isClosed()) { - return 0d; - } - return indexWriter.getMergingSegments().size(); - } finally { - shutdownLock.unlock(); - } - } - - private double getDirectoryPendingDeletionsCount() { - shutdownLock.lock(); - try { - if (isClosed()) { - return 0d; - } - return indexWriter.getDirectory().getPendingDeletions().size(); - } catch (IOException e) { - return 0d; - } finally { - shutdownLock.unlock(); - } - } - - private double getDocCount() { - shutdownLock.lock(); - try { - if (isClosed()) { - return 0d; - } - var docStats = indexWriter.getDocStats(); - if (docStats != null) { - return docStats.numDocs; - } else { - return 0d; - } - } finally { - shutdownLock.unlock(); - } - } - - private double getMaxDoc() { - shutdownLock.lock(); - try { - if (isClosed()) { - return 0d; - } - var docStats = indexWriter.getDocStats(); - if (docStats != null) { - return docStats.maxDoc; - } else { - return 0d; - } - } finally { - shutdownLock.unlock(); - } - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - - LLLocalLuceneIndex that = (LLLocalLuceneIndex) o; - - return Objects.equals(shardName, that.shardName); - } - - @Override - public int hashCode() { - return shardName.hashCode(); - } - - @Override - public void pauseForBackup() { - backuppable.pauseForBackup(); - } - - @Override - public void resumeAfterBackup() { - backuppable.resumeAfterBackup(); - } - - @Override - public boolean isPaused() { - return backuppable.isPaused(); - } - - private class LuceneBackuppable extends Backuppable { - - private LLSnapshot snapshot; - - @Override - protected void onPauseForBackup() { - var snapshot = LLLocalLuceneIndex.this.takeSnapshot(); - if (snapshot == null) { - logger.error("Can't pause index \"{}\" because snapshots are not enabled!", shardName); - } - this.snapshot = snapshot; - } - - @Override - protected void onResumeAfterBackup() { - if (snapshot == null) { - return; - } - LLLocalLuceneIndex.this.releaseSnapshot(snapshot); - } - } -} diff --git a/src/main/java/it/cavallium/dbengine/database/disk/LLLocalMultiLuceneIndex.java b/src/main/java/it/cavallium/dbengine/database/disk/LLLocalMultiLuceneIndex.java deleted file mode 100644 index e5add52..0000000 --- a/src/main/java/it/cavallium/dbengine/database/disk/LLLocalMultiLuceneIndex.java +++ /dev/null @@ -1,345 +0,0 @@ -package it.cavallium.dbengine.database.disk; - -import static it.cavallium.dbengine.lucene.LuceneUtils.getLuceneIndexId; -import static it.cavallium.dbengine.utils.StreamUtils.LUCENE_POOL; -import static it.cavallium.dbengine.utils.StreamUtils.collectOn; -import static it.cavallium.dbengine.utils.StreamUtils.executing; -import static it.cavallium.dbengine.utils.StreamUtils.fastListing; -import static it.cavallium.dbengine.utils.StreamUtils.fastReducing; -import static it.cavallium.dbengine.utils.StreamUtils.fastSummingLong; -import static it.cavallium.dbengine.utils.StreamUtils.partitionByInt; -import static java.util.stream.Collectors.groupingBy; - -import com.google.common.collect.Multimap; -import com.google.common.collect.Streams; -import io.micrometer.core.instrument.MeterRegistry; -import it.cavallium.dbengine.client.IBackuppable; -import it.cavallium.dbengine.client.query.QueryParser; -import it.cavallium.dbengine.client.query.current.data.Query; -import it.cavallium.dbengine.client.query.current.data.QueryParams; -import it.cavallium.dbengine.client.query.current.data.TotalHitsCount; -import it.cavallium.dbengine.database.LLIndexRequest; -import it.cavallium.dbengine.database.LLLuceneIndex; -import it.cavallium.dbengine.database.LLSearchResultShard; -import it.cavallium.dbengine.database.LLSnapshot; -import it.cavallium.dbengine.database.LLSnapshottable; -import it.cavallium.dbengine.database.LLTerm; -import it.cavallium.dbengine.database.LLUpdateDocument; -import it.cavallium.dbengine.database.SafeCloseable; -import it.cavallium.dbengine.lucene.LuceneCloseable; -import it.cavallium.dbengine.lucene.LuceneHacks; -import it.cavallium.dbengine.lucene.LuceneUtils; -import it.cavallium.dbengine.lucene.collector.Buckets; -import it.cavallium.dbengine.lucene.mlt.MoreLikeThisTransformer; -import it.cavallium.dbengine.lucene.searcher.AdaptiveMultiSearcher; -import it.cavallium.dbengine.lucene.searcher.BucketParams; -import it.cavallium.dbengine.lucene.searcher.DecimalBucketMultiSearcher; -import it.cavallium.dbengine.lucene.searcher.GlobalQueryRewrite; -import it.cavallium.dbengine.lucene.searcher.LocalQueryParams; -import it.cavallium.dbengine.lucene.searcher.LuceneSearchResult; -import it.cavallium.dbengine.lucene.searcher.MultiSearcher; -import it.cavallium.dbengine.rpc.current.data.IndicizerAnalyzers; -import it.cavallium.dbengine.rpc.current.data.IndicizerSimilarities; -import it.cavallium.dbengine.rpc.current.data.LuceneOptions; -import it.cavallium.dbengine.utils.DBException; -import it.cavallium.dbengine.utils.SimpleResource; -import it.cavallium.dbengine.utils.StreamUtils; -import it.unimi.dsi.fastutil.ints.IntList; -import java.io.Closeable; -import java.io.IOException; -import java.time.Duration; -import java.util.ArrayList; -import java.util.HashSet; -import java.util.List; -import java.util.Map.Entry; -import java.util.Objects; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.atomic.AtomicLong; -import java.util.function.Function; -import java.util.stream.Stream; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper; -import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper; -import org.jetbrains.annotations.NotNull; -import org.jetbrains.annotations.Nullable; - -public class LLLocalMultiLuceneIndex extends SimpleResource implements LLLuceneIndex, LuceneCloseable { - - private static final Logger LOG = LogManager.getLogger(LLLuceneIndex.class); - - private final String clusterName; - private final boolean lowMemory; - private final MeterRegistry meterRegistry; - private final ConcurrentHashMap> registeredSnapshots = new ConcurrentHashMap<>(); - private final AtomicLong nextSnapshotNumber = new AtomicLong(1); - private final LLLocalLuceneIndex[] luceneIndicesById; - private final List luceneIndicesSet; - private final int totalShards; - private final PerFieldAnalyzerWrapper luceneAnalyzer; - private final PerFieldSimilarityWrapper luceneSimilarity; - - private final MultiSearcher multiSearcher; - private final DecimalBucketMultiSearcher decimalBucketMultiSearcher = new DecimalBucketMultiSearcher(); - - public LLLocalMultiLuceneIndex(MeterRegistry meterRegistry, - String clusterName, - IntList activeShards, - int totalShards, - IndicizerAnalyzers indicizerAnalyzers, - IndicizerSimilarities indicizerSimilarities, - LuceneOptions luceneOptions, - @Nullable LuceneHacks luceneHacks) { - - if (totalShards <= 1 || totalShards > 100) { - throw new DBException("Unsupported instances count: " + totalShards); - } - - this.meterRegistry = meterRegistry; - LLLocalLuceneIndex[] luceneIndices = new LLLocalLuceneIndex[totalShards]; - for (int i = 0; i < totalShards; i++) { - if (!activeShards.contains(i)) { - continue; - } - luceneIndices[i] = new LLLocalLuceneIndex(meterRegistry, - clusterName, - i, - indicizerAnalyzers, - indicizerSimilarities, - luceneOptions, - luceneHacks - ); - } - this.clusterName = clusterName; - this.totalShards = totalShards; - this.luceneIndicesById = luceneIndices; - var luceneIndicesSet = new HashSet(); - for (var luceneIndex : luceneIndices) { - if (luceneIndex != null) { - luceneIndicesSet.add(luceneIndex); - } - } - this.luceneIndicesSet = new ArrayList<>(luceneIndicesSet); - this.luceneAnalyzer = LuceneUtils.toPerFieldAnalyzerWrapper(indicizerAnalyzers); - this.luceneSimilarity = LuceneUtils.toPerFieldSimilarityWrapper(indicizerSimilarities); - this.lowMemory = luceneOptions.lowMemory(); - - var maxInMemoryResultEntries = luceneOptions.maxInMemoryResultEntries(); - if (luceneHacks != null && luceneHacks.customMultiSearcher() != null) { - multiSearcher = luceneHacks.customMultiSearcher().get(); - } else { - multiSearcher = new AdaptiveMultiSearcher(maxInMemoryResultEntries); - } - } - - private LLLocalLuceneIndex getLuceneIndex(LLTerm id) { - return Objects.requireNonNull(luceneIndicesById[LuceneUtils.getLuceneIndexId(id, totalShards)]); - } - - @Override - public String getLuceneIndexName() { - return clusterName; - } - - private LLIndexSearchers getIndexSearchers(LLSnapshot snapshot) { - // Resolve the snapshot of each shard - return LLIndexSearchers.of(StreamUtils.toListOn(StreamUtils.LUCENE_POOL, - Streams.mapWithIndex(this.luceneIndicesSet.stream(), (luceneIndex, index) -> { - var subSnapshot = resolveSnapshot(snapshot, (int) index); - return luceneIndex.retrieveSearcher(subSnapshot); - }) - )); - } - - @Override - public void addDocument(LLTerm id, LLUpdateDocument doc) { - getLuceneIndex(id).addDocument(id, doc); - } - - @Override - public long addDocuments(boolean atomic, Stream> documents) { - return collectOn(LUCENE_POOL, - partitionByInt(term -> getLuceneIndexId(term.getKey(), totalShards), documents) - .map(entry -> luceneIndicesById[entry.key()].addDocuments(atomic, entry.values().stream())), - fastSummingLong() - ); - } - - @Override - public void deleteDocument(LLTerm id) { - getLuceneIndex(id).deleteDocument(id); - } - - @Override - public void update(LLTerm id, LLIndexRequest request) { - getLuceneIndex(id).update(id, request); - } - - @Override - public long updateDocuments(Stream> documents) { - return collectOn(LUCENE_POOL, - partitionByInt(term -> getLuceneIndexId(term.getKey(), totalShards), documents) - .map(entry -> luceneIndicesById[entry.key()].updateDocuments(entry.values().stream())), - fastSummingLong() - ); - } - - @Override - public void deleteAll() { - luceneIndicesSet.forEach(LLLuceneIndex::deleteAll); - } - - private LLSnapshot resolveSnapshot(LLSnapshot multiSnapshot, int instanceId) { - if (multiSnapshot != null) { - return registeredSnapshots.get(multiSnapshot.getSequenceNumber()).get(instanceId); - } else { - return null; - } - } - - @Override - public Stream moreLikeThis(@Nullable LLSnapshot snapshot, - QueryParams queryParams, - String keyFieldName, - Multimap mltDocumentFields) { - LocalQueryParams localQueryParams = LuceneUtils.toLocalQueryParams(queryParams, luceneAnalyzer); - try (var searchers = this.getIndexSearchers(snapshot)) { - var transformer = new MoreLikeThisTransformer(mltDocumentFields, luceneAnalyzer, luceneSimilarity); - - // Collect all the shards results into a single global result - LuceneSearchResult result = multiSearcher.collectMulti(searchers, - localQueryParams, - keyFieldName, - transformer, - Function.identity() - ); - - // Transform the result type - return Stream.of(new LLSearchResultShard(result.results(), result.totalHitsCount())); - } - } - - @Override - public Stream search(@Nullable LLSnapshot snapshot, - QueryParams queryParams, - @Nullable String keyFieldName) { - LuceneSearchResult result = searchInternal(snapshot, queryParams, keyFieldName); - // Transform the result type - var shard = new LLSearchResultShard(result.results(), result.totalHitsCount()); - return Stream.of(shard); - } - - private LuceneSearchResult searchInternal(@Nullable LLSnapshot snapshot, - QueryParams queryParams, - @Nullable String keyFieldName) { - LocalQueryParams localQueryParams = LuceneUtils.toLocalQueryParams(queryParams, luceneAnalyzer); - try (var searchers = getIndexSearchers(snapshot)) { - - // Collect all the shards results into a single global result - return multiSearcher.collectMulti(searchers, - localQueryParams, - keyFieldName, - GlobalQueryRewrite.NO_REWRITE, - Function.identity() - ); - } - } - - @Override - public TotalHitsCount count(@Nullable LLSnapshot snapshot, Query query, @Nullable Duration timeout) { - var params = LuceneUtils.getCountQueryParams(query); - var result = this.searchInternal(snapshot, params, null); - return result != null ? result.totalHitsCount() : TotalHitsCount.of(0, true); - } - - @Override - public Buckets computeBuckets(@Nullable LLSnapshot snapshot, - @NotNull List queries, - @Nullable Query normalizationQuery, - BucketParams bucketParams) { - List localQueries = new ArrayList<>(queries.size()); - for (Query query : queries) { - localQueries.add(QueryParser.toQuery(query, luceneAnalyzer)); - } - var localNormalizationQuery = QueryParser.toQuery(normalizationQuery, luceneAnalyzer); - try (var searchers = getIndexSearchers(snapshot)) { - - // Collect all the shards results into a single global result - return decimalBucketMultiSearcher.collectMulti(searchers, bucketParams, localQueries, localNormalizationQuery); - } - } - - @Override - protected void onClose() { - collectOn(StreamUtils.LUCENE_POOL, luceneIndicesSet.stream(), executing(SafeCloseable::close)); - if (multiSearcher instanceof Closeable closeable) { - try { - closeable.close(); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - } - - @Override - public void flush() { - collectOn(StreamUtils.LUCENE_POOL, luceneIndicesSet.stream(), executing(LLLuceneIndex::flush)); - } - - @Override - public void waitForMerges() { - collectOn(StreamUtils.LUCENE_POOL, luceneIndicesSet.stream(), executing(LLLuceneIndex::waitForMerges)); - } - - @Override - public void waitForLastMerges() { - collectOn(StreamUtils.LUCENE_POOL, luceneIndicesSet.stream(), executing(LLLuceneIndex::waitForLastMerges)); - } - - @Override - public void refresh(boolean force) { - collectOn(StreamUtils.LUCENE_POOL, luceneIndicesSet.stream(), executing(index -> index.refresh(force))); - } - - @Override - public LLSnapshot takeSnapshot() { - // Generate next snapshot index - var snapshotIndex = nextSnapshotNumber.getAndIncrement(); - var snapshot = collectOn(StreamUtils.LUCENE_POOL, - luceneIndicesSet.stream().map(LLSnapshottable::takeSnapshot), - fastListing() - ); - registeredSnapshots.put(snapshotIndex, snapshot); - return new LLSnapshot(snapshotIndex); - } - - @Override - public void releaseSnapshot(LLSnapshot snapshot) { - var list = registeredSnapshots.remove(snapshot.getSequenceNumber()); - for (int shardIndex = 0; shardIndex < list.size(); shardIndex++) { - var luceneIndex = luceneIndicesSet.get(shardIndex); - LLSnapshot instanceSnapshot = list.get(shardIndex); - luceneIndex.releaseSnapshot(instanceSnapshot); - } - } - - @Override - public boolean isLowMemoryMode() { - return lowMemory; - } - - @Override - public void pauseForBackup() { - collectOn(StreamUtils.LUCENE_POOL, luceneIndicesSet.stream(), executing(LLLuceneIndex::pauseForBackup)); - } - - @Override - public void resumeAfterBackup() { - collectOn(StreamUtils.LUCENE_POOL, luceneIndicesSet.stream(), executing(LLLuceneIndex::resumeAfterBackup)); - } - - @Override - public boolean isPaused() { - return this.luceneIndicesSet.stream().anyMatch(IBackuppable::isPaused); - } -} diff --git a/src/main/java/it/cavallium/dbengine/database/disk/LuceneIndexSnapshot.java b/src/main/java/it/cavallium/dbengine/database/disk/LuceneIndexSnapshot.java deleted file mode 100644 index 8610722..0000000 --- a/src/main/java/it/cavallium/dbengine/database/disk/LuceneIndexSnapshot.java +++ /dev/null @@ -1,75 +0,0 @@ -package it.cavallium.dbengine.database.disk; - -import it.cavallium.dbengine.database.DiscardingCloseable; -import it.cavallium.dbengine.lucene.LuceneCloseable; -import it.cavallium.dbengine.utils.SimpleResource; -import java.io.IOException; -import it.cavallium.dbengine.utils.DBException; -import java.util.concurrent.Executor; -import org.apache.lucene.index.DirectoryReader; -import org.apache.lucene.index.IndexCommit; -import org.apache.lucene.search.IndexSearcher; -import org.jetbrains.annotations.Nullable; - -public class LuceneIndexSnapshot extends SimpleResource implements DiscardingCloseable, LuceneCloseable { - private final IndexCommit snapshot; - - private boolean initialized; - private boolean failed; - private boolean closed; - - private DirectoryReader indexReader; - private IndexSearcher indexSearcher; - - public LuceneIndexSnapshot(IndexCommit snapshot) { - this.snapshot = snapshot; - } - - public IndexCommit getSnapshot() { - return snapshot; - } - - /** - * Can be called only if the snapshot has not been closed - * @throws IllegalStateException if closed or failed - */ - public synchronized IndexSearcher getIndexSearcher(@Nullable Executor searchExecutor) throws IllegalStateException { - openDirectoryIfNeeded(searchExecutor); - return indexSearcher; - } - - private synchronized void openDirectoryIfNeeded(@Nullable Executor searchExecutor) throws IllegalStateException { - if (closed) { - throw new IllegalStateException("Snapshot is closed"); - } - if (failed) { - throw new IllegalStateException("Snapshot failed to open"); - } - if (!initialized) { - try { - var indexReader = DirectoryReader.open(snapshot); - this.indexReader = indexReader; - indexSearcher = new IndexSearcher(indexReader, searchExecutor); - - initialized = true; - } catch (IOException e) { - failed = true; - throw new RuntimeException(e); - } - } - } - - @Override - protected synchronized void onClose() { - closed = true; - - if (initialized && !failed) { - try { - indexReader.close(); - } catch (IOException e) { - throw new DBException(e); - } - indexSearcher = null; - } - } -} diff --git a/src/main/java/it/cavallium/dbengine/database/disk/LuceneThreadFactory.java b/src/main/java/it/cavallium/dbengine/database/disk/LuceneThreadFactory.java deleted file mode 100644 index 7ad0f3c..0000000 --- a/src/main/java/it/cavallium/dbengine/database/disk/LuceneThreadFactory.java +++ /dev/null @@ -1,27 +0,0 @@ -package it.cavallium.dbengine.database.disk; - -import it.cavallium.dbengine.lucene.LuceneThread; -import it.cavallium.dbengine.utils.ShortNamedThreadFactory; -import java.util.Locale; -import org.jetbrains.annotations.NotNull; - -public class LuceneThreadFactory extends ShortNamedThreadFactory { - - /** - * Creates a new {@link ShortNamedThreadFactory} instance - * - * @param threadNamePrefix the name prefix assigned to each thread created. - */ - public LuceneThreadFactory(String threadNamePrefix) { - super(threadNamePrefix); - } - - @Override - public Thread newThread(@NotNull Runnable r) { - final Thread t = new LuceneThread(group, r, String.format(Locale.ROOT, "%s-%d", - this.threadNamePrefix, threadNumber.getAndIncrement()), 0); - t.setDaemon(daemon); - t.setPriority(Thread.NORM_PRIORITY); - return t; - } -} diff --git a/src/main/java/it/cavallium/dbengine/database/disk/OptimisticRocksDBColumn.java b/src/main/java/it/cavallium/dbengine/database/disk/OptimisticRocksDBColumn.java index e462876..1dd1c06 100644 --- a/src/main/java/it/cavallium/dbengine/database/disk/OptimisticRocksDBColumn.java +++ b/src/main/java/it/cavallium/dbengine/database/disk/OptimisticRocksDBColumn.java @@ -10,9 +10,8 @@ import it.cavallium.dbengine.database.LLUtils; import it.cavallium.dbengine.database.disk.rocksdb.LLReadOptions; import it.cavallium.dbengine.database.disk.rocksdb.LLWriteOptions; import it.cavallium.dbengine.database.serialization.SerializationFunction; -import it.cavallium.dbengine.lucene.ExponentialPageLimits; +import it.cavallium.dbengine.utils.ExponentialLimits; import it.cavallium.dbengine.utils.DBException; -import java.io.IOException; import java.util.concurrent.ForkJoinPool; import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.locks.LockSupport; @@ -21,13 +20,11 @@ import org.jetbrains.annotations.NotNull; import org.jetbrains.annotations.Nullable; import org.rocksdb.ColumnFamilyHandle; import org.rocksdb.OptimisticTransactionDB; -import org.rocksdb.ReadOptions; import org.rocksdb.RocksDBException; import org.rocksdb.Status.Code; import org.rocksdb.Transaction; import org.rocksdb.TransactionOptions; import org.rocksdb.WriteBatch; -import org.rocksdb.WriteOptions; public final class OptimisticRocksDBColumn extends AbstractRocksDBColumn { @@ -95,7 +92,7 @@ public final class OptimisticRocksDBColumn extends AbstractRocksDBColumn refreshSubscription; - - public SimpleIndexSearcherManager(IndexWriter indexWriter, - @Nullable SnapshotsManager snapshotsManager, - ScheduledExecutorService luceneHeavyTasksScheduler, - Similarity similarity, - boolean applyAllDeletes, - boolean writeAllDeletes, - Duration queryRefreshDebounceTime) { - this.snapshotsManager = snapshotsManager; - this.luceneHeavyTasksScheduler = luceneHeavyTasksScheduler; - this.similarity = similarity; - this.queryRefreshDebounceTime = queryRefreshDebounceTime; - - try { - this.searcherManager = new SearcherManager(indexWriter, applyAllDeletes, writeAllDeletes, SEARCHER_FACTORY); - } catch (IOException e) { - throw new DBException(e); - } - - refreshSubscription = luceneHeavyTasksScheduler.scheduleAtFixedRate(() -> { - try { - maybeRefresh(); - } catch (Exception ex) { - LOG.error("Failed to refresh the searcher manager", ex); - } - }, queryRefreshDebounceTime.toMillis(), queryRefreshDebounceTime.toMillis(), TimeUnit.MILLISECONDS); - } - - private void dropCachedIndexSearcher() { - // This shouldn't happen more than once per searcher. - activeSearchers.decrementAndGet(); - } - - @Override - public void maybeRefreshBlocking() { - try { - activeRefreshes.incrementAndGet(); - searcherManager.maybeRefreshBlocking(); - } catch (AlreadyClosedException ignored) { - - } catch (IOException e) { - throw new DBException(e); - } finally { - activeRefreshes.decrementAndGet(); - } - } - - @Override - public void maybeRefresh() { - try { - activeRefreshes.incrementAndGet(); - searcherManager.maybeRefresh(); - } catch (AlreadyClosedException ignored) { - - } catch (IOException e) { - throw new DBException(e); - } finally { - activeRefreshes.decrementAndGet(); - } - } - - @Override - public LLIndexSearcher retrieveSearcher(@Nullable LLSnapshot snapshot) { - if (snapshot == null) { - return retrieveSearcherInternal(null); - } else { - return retrieveSearcherInternal(snapshot); - } - } - - private LLIndexSearcher retrieveSearcherInternal(@Nullable LLSnapshot snapshot) { - if (isClosed()) { - return null; - } - try { - if (snapshotsManager == null || snapshot == null) { - return new OnDemandIndexSearcher(searcherManager, similarity); - } else { - activeSearchers.incrementAndGet(); - IndexSearcher indexSearcher = snapshotsManager.resolveSnapshot(snapshot).getIndexSearcher(SEARCH_EXECUTOR); - indexSearcher.setSimilarity(similarity); - assert indexSearcher.getIndexReader().getRefCount() > 0; - return new SnapshotIndexSearcher(indexSearcher); - } - } catch (Throwable ex) { - activeSearchers.decrementAndGet(); - throw ex; - } - } - - @Override - protected void onClose() { - LOG.debug("Closing IndexSearcherManager..."); - refreshSubscription.cancel(false); - long initTime = System.nanoTime(); - while (!refreshSubscription.isDone() && (System.nanoTime() - initTime) <= 15000000000L) { - LockSupport.parkNanos(50000000); - } - refreshSubscription.cancel(true); - LOG.debug("Closed IndexSearcherManager"); - LOG.debug("Closing refresh tasks..."); - initTime = System.nanoTime(); - while (activeRefreshes.get() > 0 && (System.nanoTime() - initTime) <= 15000000000L) { - LockSupport.parkNanos(50000000); - } - if (activeRefreshes.get() > 0) { - LOG.warn("Some refresh tasks remained active after shutdown: {}", activeRefreshes.get()); - } - LOG.debug("Closed refresh tasks"); - LOG.debug("Closing active searchers..."); - initTime = System.nanoTime(); - while (activeSearchers.get() > 0 && (System.nanoTime() - initTime) <= 15000000000L) { - LockSupport.parkNanos(50000000); - } - if (activeSearchers.get() > 0) { - LOG.warn("Some searchers remained active after shutdown: {}", activeSearchers.get()); - } - LOG.debug("Closed active searchers"); - LOG.debug("Stopping searcher executor..."); - SEARCH_EXECUTOR.shutdown(); - try { - if (!SEARCH_EXECUTOR.awaitTermination(15, TimeUnit.SECONDS)) { - SEARCH_EXECUTOR.shutdownNow(); - } - } catch (InterruptedException e) { - LOG.error("Failed to stop executor", e); - } - LOG.debug("Stopped searcher executor"); - } - - public long getActiveSearchers() { - return activeSearchers.get(); - } - - public long getActiveRefreshes() { - return activeRefreshes.get(); - } - - private class MainIndexSearcher extends LLIndexSearcherImpl implements LuceneCloseable { - - public MainIndexSearcher(IndexSearcher indexSearcher) { - super(indexSearcher, () -> releaseOnCleanup(searcherManager, indexSearcher)); - } - - private static void releaseOnCleanup(SearcherManager searcherManager, IndexSearcher indexSearcher) { - try { - LOG.warn("An index searcher was not closed!"); - searcherManager.release(indexSearcher); - } catch (IOException ex) { - LOG.error("Failed to release the index searcher during cleanup: {}", indexSearcher, ex); - } - } - - @Override - public void onClose() { - dropCachedIndexSearcher(); - try { - searcherManager.release(indexSearcher); - } catch (IOException ex) { - throw new DBException(ex); - } - } - } - - private class SnapshotIndexSearcher extends LLIndexSearcherImpl { - - public SnapshotIndexSearcher(IndexSearcher indexSearcher) { - super(indexSearcher); - } - - @Override - public void onClose() { - dropCachedIndexSearcher(); - } - } - - private class OnDemandIndexSearcher extends LLIndexSearcher implements LuceneCloseable { - - private final SearcherManager searcherManager; - private final Similarity similarity; - - private IndexSearcher indexSearcher = null; - - public OnDemandIndexSearcher(SearcherManager searcherManager, - Similarity similarity) { - super(); - this.searcherManager = searcherManager; - this.similarity = similarity; - } - - @Override - protected IndexSearcher getIndexSearcherInternal() { - if (indexSearcher != null) { - return indexSearcher; - } - synchronized (this) { - try { - var indexSearcher = searcherManager.acquire(); - indexSearcher.setSimilarity(similarity); - activeSearchers.incrementAndGet(); - this.indexSearcher = indexSearcher; - return indexSearcher; - } catch (IOException e) { - throw new IllegalStateException("Failed to acquire the index searcher", e); - } - } - } - - @Override - protected void onClose() { - try { - synchronized (this) { - if (indexSearcher != null) { - dropCachedIndexSearcher(); - searcherManager.release(indexSearcher); - } - } - } catch (IOException ex) { - throw new DBException(ex); - } - } - } -} diff --git a/src/main/java/it/cavallium/dbengine/database/disk/SnapshotsManager.java b/src/main/java/it/cavallium/dbengine/database/disk/SnapshotsManager.java deleted file mode 100644 index 975fde9..0000000 --- a/src/main/java/it/cavallium/dbengine/database/disk/SnapshotsManager.java +++ /dev/null @@ -1,110 +0,0 @@ -package it.cavallium.dbengine.database.disk; - -import it.cavallium.dbengine.database.LLSnapshot; -import it.cavallium.dbengine.utils.SimpleResource; -import java.io.IOException; -import it.cavallium.dbengine.utils.DBException; -import java.util.Objects; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.Phaser; -import java.util.concurrent.atomic.AtomicLong; -import org.apache.lucene.index.IndexCommit; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.SnapshotDeletionPolicy; -import org.jetbrains.annotations.Nullable; - -public class SnapshotsManager extends SimpleResource { - - private final IndexWriter indexWriter; - private final SnapshotDeletionPolicy snapshotter; - private final Phaser activeTasks = new Phaser(1); - /** - * Last snapshot sequence number. 0 is not used - */ - private final AtomicLong lastSnapshotSeqNo = new AtomicLong(0); - /** - * LLSnapshot seq no to index commit point - */ - private final ConcurrentHashMap snapshots = new ConcurrentHashMap<>(); - - public SnapshotsManager(IndexWriter indexWriter, - SnapshotDeletionPolicy snapshotter) { - this.indexWriter = indexWriter; - this.snapshotter = snapshotter; - } - - public LuceneIndexSnapshot resolveSnapshot(@Nullable LLSnapshot snapshot) { - if (snapshot == null) { - return null; - } - return Objects.requireNonNull(snapshots.get(snapshot.getSequenceNumber()), - () -> "Can't resolve snapshot " + snapshot.getSequenceNumber() - ); - } - - public LLSnapshot takeSnapshot() { - return takeLuceneSnapshot(); - } - - /** - * Use internally. This method commits before taking the snapshot if there are no commits in a new database, - * avoiding the exception. - */ - private LLSnapshot takeLuceneSnapshot() { - activeTasks.register(); - try { - if (snapshotter.getSnapshots().isEmpty()) { - indexWriter.commit(); - } - var snapshotSeqNo = lastSnapshotSeqNo.incrementAndGet(); - IndexCommit snapshot = snapshotter.snapshot(); - var prevSnapshot = this.snapshots.put(snapshotSeqNo, new LuceneIndexSnapshot(snapshot)); - - // Unexpectedly found a snapshot - if (prevSnapshot != null) { - try { - prevSnapshot.close(); - } catch (DBException e) { - throw new IllegalStateException("Can't close snapshot", e); - } - } - - return new LLSnapshot(snapshotSeqNo); - } catch (IOException e) { - throw new DBException(e); - } finally { - activeTasks.arriveAndDeregister(); - } - } - - public void releaseSnapshot(LLSnapshot snapshot) { - activeTasks.register(); - try { - var indexSnapshot = this.snapshots.remove(snapshot.getSequenceNumber()); - if (indexSnapshot == null) { - throw new DBException("LLSnapshot " + snapshot.getSequenceNumber() + " not found!"); - } - - var luceneIndexSnapshot = indexSnapshot.getSnapshot(); - snapshotter.release(luceneIndexSnapshot); - } catch (IOException e) { - throw new DBException(e); - } finally { - activeTasks.arriveAndDeregister(); - } - } - - /** - * Returns the total number of snapshots currently held. - */ - public int getSnapshotsCount() { - return Math.max(snapshots.size(), snapshotter.getSnapshotCount()); - } - - @Override - protected void onClose() { - if (!activeTasks.isTerminated()) { - activeTasks.arriveAndAwaitAdvance(); - } - } -} diff --git a/src/main/java/it/cavallium/dbengine/database/memory/LLMemoryDatabaseConnection.java b/src/main/java/it/cavallium/dbengine/database/memory/LLMemoryDatabaseConnection.java index d3e8c1b..0fac207 100644 --- a/src/main/java/it/cavallium/dbengine/database/memory/LLMemoryDatabaseConnection.java +++ b/src/main/java/it/cavallium/dbengine/database/memory/LLMemoryDatabaseConnection.java @@ -3,23 +3,11 @@ package it.cavallium.dbengine.database.memory; import io.micrometer.core.instrument.MeterRegistry; import it.cavallium.dbengine.database.LLDatabaseConnection; import it.cavallium.dbengine.database.LLKeyValueDatabase; -import it.cavallium.dbengine.database.LLLuceneIndex; -import it.cavallium.dbengine.database.disk.LLLocalLuceneIndex; -import it.cavallium.dbengine.lucene.LuceneHacks; -import it.cavallium.dbengine.lucene.LuceneUtils; -import it.cavallium.dbengine.rpc.current.data.ByteBuffersDirectory; import it.cavallium.dbengine.rpc.current.data.Column; import it.cavallium.dbengine.rpc.current.data.DatabaseOptions; -import it.cavallium.dbengine.rpc.current.data.IndicizerAnalyzers; -import it.cavallium.dbengine.rpc.current.data.IndicizerSimilarities; -import it.cavallium.dbengine.rpc.current.data.LuceneIndexStructure; -import it.cavallium.dbengine.rpc.current.data.LuceneOptions; -import it.cavallium.dbengine.rpc.current.data.LuceneOptionsBuilder; import java.util.List; import java.util.StringJoiner; import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicReference; -import org.jetbrains.annotations.Nullable; public class LLMemoryDatabaseConnection implements LLDatabaseConnection { @@ -50,27 +38,6 @@ public class LLMemoryDatabaseConnection implements LLDatabaseConnection { return new LLMemoryKeyValueDatabase(meterRegistry, name, columns); } - @Override - public LLLuceneIndex getLuceneIndex(String clusterName, - LuceneIndexStructure indexStructure, - IndicizerAnalyzers indicizerAnalyzers, - IndicizerSimilarities indicizerSimilarities, - LuceneOptions luceneOptions, - @Nullable LuceneHacks luceneHacks) { - var memoryLuceneOptions = LuceneOptionsBuilder - .builder(luceneOptions) - .directoryOptions(new ByteBuffersDirectory()) - .build(); - return new LLLocalLuceneIndex(meterRegistry, - clusterName, - 0, - indicizerAnalyzers, - indicizerSimilarities, - memoryLuceneOptions, - luceneHacks - ); - } - @Override public void disconnect() { connected.compareAndSet(true, false); diff --git a/src/main/java/it/cavallium/dbengine/database/remote/LuceneHacksSerializer.java b/src/main/java/it/cavallium/dbengine/database/remote/LuceneHacksSerializer.java deleted file mode 100644 index e9255b2..0000000 --- a/src/main/java/it/cavallium/dbengine/database/remote/LuceneHacksSerializer.java +++ /dev/null @@ -1,25 +0,0 @@ -package it.cavallium.dbengine.database.remote; - -import it.cavallium.datagen.DataSerializer; -import it.cavallium.dbengine.lucene.LuceneHacks; -import it.cavallium.stream.SafeDataInput; -import it.cavallium.stream.SafeDataOutput; -import java.io.DataInput; -import java.io.DataOutput; -import java.io.IOException; -import org.jetbrains.annotations.NotNull; - -public class LuceneHacksSerializer implements DataSerializer { - - @Override - public void serialize(SafeDataOutput dataOutput, @NotNull LuceneHacks luceneHacks) { - if (luceneHacks.customLocalSearcher() != null || luceneHacks.customMultiSearcher() != null) { - throw new UnsupportedOperationException("Can't encode this type"); - } - } - - @Override - public @NotNull LuceneHacks deserialize(SafeDataInput dataInput) { - return new LuceneHacks(null, null); - } -} diff --git a/src/main/java/it/cavallium/dbengine/database/remote/String2FieldAnalyzerMapSerializer.java b/src/main/java/it/cavallium/dbengine/database/remote/String2FieldAnalyzerMapSerializer.java deleted file mode 100644 index 06b359e..0000000 --- a/src/main/java/it/cavallium/dbengine/database/remote/String2FieldAnalyzerMapSerializer.java +++ /dev/null @@ -1,38 +0,0 @@ -package it.cavallium.dbengine.database.remote; - -import it.cavallium.datagen.DataSerializer; -import it.cavallium.dbengine.lucene.analyzer.TextFieldsAnalyzer; -import it.cavallium.stream.SafeDataInput; -import it.cavallium.stream.SafeDataOutput; -import java.io.DataInput; -import java.io.DataOutput; -import java.io.IOException; -import java.util.Collections; -import java.util.HashMap; -import java.util.Map; -import java.util.Map.Entry; -import org.jetbrains.annotations.NotNull; - -public class String2FieldAnalyzerMapSerializer implements DataSerializer> { - - private static final TextFieldsAnalyzerSerializer TEXT_FIELDS_ANALYZER_SERIALIZER = new TextFieldsAnalyzerSerializer(); - - @Override - public void serialize(SafeDataOutput dataOutput, @NotNull Map stringTextFieldsAnalyzerMap) { - dataOutput.writeInt(stringTextFieldsAnalyzerMap.size()); - for (Entry entry : stringTextFieldsAnalyzerMap.entrySet()) { - dataOutput.writeUTF(entry.getKey()); - TEXT_FIELDS_ANALYZER_SERIALIZER.serialize(dataOutput, entry.getValue()); - } - } - - @Override - public @NotNull Map deserialize(SafeDataInput dataInput) { - var size = dataInput.readInt(); - var result = new HashMap(size); - for (int i = 0; i < size; i++) { - result.put(dataInput.readUTF(), TEXT_FIELDS_ANALYZER_SERIALIZER.deserialize(dataInput)); - } - return Collections.unmodifiableMap(result); - } -} diff --git a/src/main/java/it/cavallium/dbengine/database/remote/String2FieldSimilarityMapSerializer.java b/src/main/java/it/cavallium/dbengine/database/remote/String2FieldSimilarityMapSerializer.java deleted file mode 100644 index cacf7a7..0000000 --- a/src/main/java/it/cavallium/dbengine/database/remote/String2FieldSimilarityMapSerializer.java +++ /dev/null @@ -1,38 +0,0 @@ -package it.cavallium.dbengine.database.remote; - -import it.cavallium.datagen.DataSerializer; -import it.cavallium.dbengine.lucene.analyzer.TextFieldsSimilarity; -import it.cavallium.stream.SafeDataInput; -import it.cavallium.stream.SafeDataOutput; -import java.io.DataInput; -import java.io.DataOutput; -import java.io.IOException; -import java.util.Collections; -import java.util.HashMap; -import java.util.Map; -import java.util.Map.Entry; -import org.jetbrains.annotations.NotNull; - -public class String2FieldSimilarityMapSerializer implements DataSerializer> { - - private static final TextFieldsSimilaritySerializer TEXT_FIELDS_SIMILARITY_SERIALIZER = new TextFieldsSimilaritySerializer(); - - @Override - public void serialize(SafeDataOutput dataOutput, @NotNull Map stringTextFieldsSimilarityMap) { - dataOutput.writeInt(stringTextFieldsSimilarityMap.size()); - for (Entry entry : stringTextFieldsSimilarityMap.entrySet()) { - dataOutput.writeUTF(entry.getKey()); - TEXT_FIELDS_SIMILARITY_SERIALIZER.serialize(dataOutput, entry.getValue()); - } - } - - @Override - public @NotNull Map deserialize(SafeDataInput dataInput) { - var size = dataInput.readInt(); - var result = new HashMap(size); - for (int i = 0; i < size; i++) { - result.put(dataInput.readUTF(), TEXT_FIELDS_SIMILARITY_SERIALIZER.deserialize(dataInput)); - } - return Collections.unmodifiableMap(result); - } -} diff --git a/src/main/java/it/cavallium/dbengine/database/remote/TextFieldsAnalyzerSerializer.java b/src/main/java/it/cavallium/dbengine/database/remote/TextFieldsAnalyzerSerializer.java deleted file mode 100644 index d968125..0000000 --- a/src/main/java/it/cavallium/dbengine/database/remote/TextFieldsAnalyzerSerializer.java +++ /dev/null @@ -1,23 +0,0 @@ -package it.cavallium.dbengine.database.remote; - -import it.cavallium.datagen.DataSerializer; -import it.cavallium.dbengine.lucene.analyzer.TextFieldsAnalyzer; -import it.cavallium.stream.SafeDataInput; -import it.cavallium.stream.SafeDataOutput; -import java.io.DataInput; -import java.io.DataOutput; -import java.io.IOException; -import org.jetbrains.annotations.NotNull; - -public class TextFieldsAnalyzerSerializer implements DataSerializer { - - @Override - public void serialize(SafeDataOutput dataOutput, @NotNull TextFieldsAnalyzer textFieldsAnalyzer) { - dataOutput.writeInt(textFieldsAnalyzer.ordinal()); - } - - @Override - public @NotNull TextFieldsAnalyzer deserialize(SafeDataInput dataInput) { - return TextFieldsAnalyzer.values()[dataInput.readInt()]; - } -} diff --git a/src/main/java/it/cavallium/dbengine/database/remote/TextFieldsSimilaritySerializer.java b/src/main/java/it/cavallium/dbengine/database/remote/TextFieldsSimilaritySerializer.java deleted file mode 100644 index f9771e2..0000000 --- a/src/main/java/it/cavallium/dbengine/database/remote/TextFieldsSimilaritySerializer.java +++ /dev/null @@ -1,23 +0,0 @@ -package it.cavallium.dbengine.database.remote; - -import it.cavallium.datagen.DataSerializer; -import it.cavallium.dbengine.lucene.analyzer.TextFieldsSimilarity; -import it.cavallium.stream.SafeDataInput; -import it.cavallium.stream.SafeDataOutput; -import java.io.DataInput; -import java.io.DataOutput; -import java.io.IOException; -import org.jetbrains.annotations.NotNull; - -public class TextFieldsSimilaritySerializer implements DataSerializer { - - @Override - public void serialize(SafeDataOutput dataOutput, @NotNull TextFieldsSimilarity textFieldsSimilarity) { - dataOutput.writeInt(textFieldsSimilarity.ordinal()); - } - - @Override - public @NotNull TextFieldsSimilarity deserialize(SafeDataInput dataInput) { - return TextFieldsSimilarity.values()[dataInput.readInt()]; - } -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/ArrayIndexComparator.java b/src/main/java/it/cavallium/dbengine/lucene/ArrayIndexComparator.java deleted file mode 100644 index 06c6f4c..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/ArrayIndexComparator.java +++ /dev/null @@ -1,25 +0,0 @@ -package it.cavallium.dbengine.lucene; - -import java.util.Comparator; -import org.apache.lucene.index.IndexReader; - -public class ArrayIndexComparator implements Comparator { - - private final Comparator comp; - - public ArrayIndexComparator(IndexReader[] indexReaders) { - this.comp = Comparator.comparingInt(reader -> { - for (int i = 0; i < indexReaders.length; i++) { - if (indexReaders[i] == reader) { - return i; - } - } - throw new IllegalStateException(); - }); - } - - @Override - public int compare(IndexReader o1, IndexReader o2) { - return comp.compare(o1, o2); - } -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/CheckIndexInput.java b/src/main/java/it/cavallium/dbengine/lucene/CheckIndexInput.java deleted file mode 100644 index 8ce336b..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/CheckIndexInput.java +++ /dev/null @@ -1,116 +0,0 @@ -package it.cavallium.dbengine.lucene; - -import static it.cavallium.dbengine.lucene.LuceneUtils.warnLuceneThread; - -import java.io.IOException; -import org.apache.lucene.store.IndexInput; -import org.apache.lucene.store.RandomAccessInput; - -public class CheckIndexInput extends IndexInput { - - private final IndexInput input; - - public CheckIndexInput(IndexInput input) { - super(input.toString()); - this.input = input; - } - - private static void checkThread() { - warnLuceneThread(); - } - - @Override - public void close() throws IOException { - warnLuceneThread(); - input.close(); - } - - @Override - public long getFilePointer() { - checkThread(); - return input.getFilePointer(); - } - - @Override - public void seek(long pos) throws IOException { - checkThread(); - input.seek(pos); - } - - @Override - public long length() { - checkThread(); - return input.length(); - } - - @Override - public IndexInput slice(String sliceDescription, long offset, long length) throws IOException { - checkThread(); - return input.slice(sliceDescription, offset, length); - } - - @Override - public byte readByte() throws IOException { - checkThread(); - return input.readByte(); - } - - @Override - public void readBytes(byte[] b, int offset, int len) throws IOException { - checkThread(); - input.readBytes(b, offset, len); - } - - @Override - public void skipBytes(long numBytes) throws IOException { - checkThread(); - input.skipBytes(numBytes); - } - - @Override - public IndexInput clone() { - return new CheckIndexInput(input.clone()); - } - - @Override - public String toString() { - checkThread(); - return input.toString(); - } - - @Override - public RandomAccessInput randomAccessSlice(long offset, long length) throws IOException { - var ras = input.randomAccessSlice(offset, length); - return new RandomAccessInput() { - @Override - public long length() { - checkThread(); - return ras.length(); - } - - @Override - public byte readByte(long pos) throws IOException { - checkThread(); - return ras.readByte(pos); - } - - @Override - public short readShort(long pos) throws IOException { - checkThread(); - return ras.readShort(pos); - } - - @Override - public int readInt(long pos) throws IOException { - checkThread(); - return ras.readInt(pos); - } - - @Override - public long readLong(long pos) throws IOException { - checkThread(); - return ras.readLong(pos); - } - }; - } -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/CheckIndexOutput.java b/src/main/java/it/cavallium/dbengine/lucene/CheckIndexOutput.java deleted file mode 100644 index fe5fe71..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/CheckIndexOutput.java +++ /dev/null @@ -1,60 +0,0 @@ -package it.cavallium.dbengine.lucene; - -import static it.cavallium.dbengine.lucene.LuceneUtils.warnLuceneThread; - -import java.io.IOException; -import org.apache.lucene.store.IndexOutput; - -public class CheckIndexOutput extends IndexOutput { - - private final IndexOutput output; - - public CheckIndexOutput(IndexOutput output) { - super(output.toString(), output.getName()); - this.output = output; - } - - private static void checkThread() { - LuceneUtils.warnLuceneThread(); - } - - @Override - public void close() throws IOException { - warnLuceneThread(); - output.close(); - } - - @Override - public long getFilePointer() { - checkThread(); - return output.getFilePointer(); - } - - @Override - public long getChecksum() throws IOException { - checkThread(); - return output.getChecksum(); - } - - @Override - public void writeByte(byte b) throws IOException { - checkThread(); - output.writeByte(b); - } - - @Override - public void writeBytes(byte[] b, int offset, int length) throws IOException { - checkThread(); - output.writeBytes(b, offset, length); - } - - @Override - public String getName() { - return output.getName(); - } - - @Override - public String toString() { - return output.toString(); - } -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/CheckOutputDirectory.java b/src/main/java/it/cavallium/dbengine/lucene/CheckOutputDirectory.java deleted file mode 100644 index d4e5574..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/CheckOutputDirectory.java +++ /dev/null @@ -1,138 +0,0 @@ -package it.cavallium.dbengine.lucene; - -import static it.cavallium.dbengine.lucene.LuceneUtils.warnLuceneThread; - -import it.cavallium.dbengine.utils.DBException; -import java.io.IOException; -import java.util.Collection; -import java.util.Set; -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.IOContext; -import org.apache.lucene.store.IndexInput; -import org.apache.lucene.store.IndexOutput; -import org.apache.lucene.store.Lock; - -public class CheckOutputDirectory extends Directory { - - private final Directory directory; - - public CheckOutputDirectory(Directory directory) { - this.directory = directory; - } - - @Override - public String[] listAll() { - try { - return directory.listAll(); - } catch (IOException e) { - throw new DBException(e); - } - } - - @Override - public void deleteFile(String name) { - try { - directory.deleteFile(name); - } catch (IOException e) { - throw new DBException(e); - } - } - - @Override - public long fileLength(String name) { - try { - return directory.fileLength(name); - } catch (IOException e) { - throw new DBException(e); - } - } - - @Override - public IndexOutput createOutput(String name, IOContext context) { - LuceneUtils.warnLuceneThread(); - try { - return new CheckIndexOutput(directory.createOutput(name, context)); - } catch (IOException e) { - throw new DBException(e); - } - } - - @Override - public IndexOutput createTempOutput(String prefix, String suffix, IOContext context) { - LuceneUtils.warnLuceneThread(); - try { - return new CheckIndexOutput(directory.createTempOutput(prefix, suffix, context)); - } catch (IOException e) { - throw new DBException(e); - } - } - - @Override - public void sync(Collection names) { - LuceneUtils.warnLuceneThread(); - try { - directory.sync(names); - } catch (IOException e) { - throw new DBException(e); - } - } - - @Override - public void syncMetaData() { - LuceneUtils.warnLuceneThread(); - try { - directory.syncMetaData(); - } catch (IOException e) { - throw new DBException(e); - } - } - - @Override - public void rename(String source, String dest) { - LuceneUtils.warnLuceneThread(); - try { - directory.rename(source, dest); - } catch (IOException e) { - throw new DBException(e); - } - } - - @Override - public IndexInput openInput(String name, IOContext context) { - LuceneUtils.warnLuceneThread(); - try { - return new CheckIndexInput(directory.openInput(name, context)); - } catch (IOException e) { - throw new DBException(e); - } - } - - @Override - public Lock obtainLock(String name) { - LuceneUtils.warnLuceneThread(); - try { - return directory.obtainLock(name); - } catch (IOException e) { - throw new DBException(e); - } - } - - @Override - public void close() { - warnLuceneThread(); - try { - directory.close(); - } catch (IOException e) { - throw new DBException(e); - } - } - - @Override - public Set getPendingDeletions() { - try { - return directory.getPendingDeletions(); - } catch (IOException e) { - throw new DBException(e); - } - } -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/CloseableIterable.java b/src/main/java/it/cavallium/dbengine/lucene/CloseableIterable.java deleted file mode 100644 index 133ac92..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/CloseableIterable.java +++ /dev/null @@ -1,15 +0,0 @@ -package it.cavallium.dbengine.lucene; - -import it.cavallium.dbengine.database.DiscardingCloseable; -import java.util.Iterator; -import org.jetbrains.annotations.NotNull; - -public interface CloseableIterable extends Iterable, DiscardingCloseable { - - @Override - void close(); - - @NotNull - @Override - Iterator iterator(); -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/DirectNIOFSDirectory.java b/src/main/java/it/cavallium/dbengine/lucene/DirectNIOFSDirectory.java deleted file mode 100644 index 5181507..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/DirectNIOFSDirectory.java +++ /dev/null @@ -1,143 +0,0 @@ -package it.cavallium.dbengine.lucene; - -import static it.cavallium.dbengine.lucene.LuceneUtils.alignUnsigned; -import static it.cavallium.dbengine.lucene.LuceneUtils.readInternalAligned; - -import it.cavallium.dbengine.utils.DBException; -import java.io.Closeable; -import java.io.EOFException; -import java.io.IOException; -import java.nio.ByteBuffer; -import java.nio.channels.FileChannel; -import java.nio.file.OpenOption; -import java.nio.file.Path; -import java.nio.file.StandardOpenOption; -import org.apache.lucene.store.BufferedIndexInput; -import org.apache.lucene.store.FSDirectory; -import org.apache.lucene.store.FSLockFactory; -import org.apache.lucene.store.IOContext; -import org.apache.lucene.store.IndexInput; -import org.apache.lucene.store.LockFactory; -import org.apache.lucene.util.IOUtils; - -@SuppressWarnings({"RedundantArrayCreation", "unused", "unused", "RedundantCast"}) -public class DirectNIOFSDirectory extends FSDirectory { - - @SuppressWarnings("sunapi") - private final OpenOption[] openOptions = {StandardOpenOption.READ, com.sun.nio.file.ExtendedOpenOption.DIRECT}; - - public DirectNIOFSDirectory(Path path, LockFactory lockFactory) throws IOException { - super(path, lockFactory); - } - - public DirectNIOFSDirectory(Path path) throws IOException { - this(path, FSLockFactory.getDefault()); - } - - @Override - public IndexInput openInput(String name, IOContext context) throws IOException { - this.ensureOpen(); - this.ensureCanRead(name); - Path path = this.getDirectory().resolve(name); - FileChannel fc = FileChannel.open(path, openOptions); - boolean success = false; - - DirectNIOFSDirectory.NIOFSIndexInput var7; - try { - DirectNIOFSDirectory.NIOFSIndexInput indexInput = new DirectNIOFSDirectory.NIOFSIndexInput("NIOFSIndexInput(path=\"" + path + "\")", fc, context); - success = true; - var7 = indexInput; - } finally { - if (!success) { - IOUtils.closeWhileHandlingException(new Closeable[]{fc}); - } - - } - - return var7; - } - - static final class NIOFSIndexInput extends BufferedIndexInput { - private static final int CHUNK_SIZE = 16384; - private final FileChannel channel; - boolean isClone = false; - private final long off; - private final long end; - - public NIOFSIndexInput(String resourceDesc, FileChannel fc, IOContext context) throws IOException { - super(resourceDesc, context); - this.channel = fc; - this.off = 0L; - this.end = fc.size(); - } - - public NIOFSIndexInput(String resourceDesc, FileChannel fc, long off, long length, int bufferSize) { - super(resourceDesc, bufferSize); - this.channel = fc; - this.off = off; - this.end = off + length; - this.isClone = true; - } - - public void close() throws IOException { - if (!this.isClone) { - this.channel.close(); - } - - } - - public DirectNIOFSDirectory.NIOFSIndexInput clone() { - DirectNIOFSDirectory.NIOFSIndexInput clone = (DirectNIOFSDirectory.NIOFSIndexInput)super.clone(); - clone.isClone = true; - return clone; - } - - public IndexInput slice(String sliceDescription, long offset, long length) { - if (offset >= 0L && length >= 0L && offset + length <= this.length()) { - return new DirectNIOFSDirectory.NIOFSIndexInput(this.getFullSliceDescription(sliceDescription), this.channel, this.off + offset, length, this.getBufferSize()); - } else { - throw new IllegalArgumentException("slice() " + sliceDescription + " out of bounds: offset=" + offset + ",length=" + length + ",fileLength=" + this.length() + ": " + this); - } - } - - public long length() { - return this.end - this.off; - } - - protected void readInternal(ByteBuffer b) throws EOFException { - long pos = this.getFilePointer() + this.off; - if (pos + (long)b.remaining() > this.end) { - throw new EOFException("read past EOF: " + this); - } - - try { - if (pos % 4096 == 0 && b.remaining() % 4096 == 0) { - readInternalAligned(this, this.channel, pos, b, b.remaining(), b.remaining(), end); - } else { - long startOffsetAligned = alignUnsigned(pos, false); - int size = b.remaining(); - long endOffsetAligned = alignUnsigned(pos + size, true); - long expectedTempBufferSize = endOffsetAligned - startOffsetAligned; - if (expectedTempBufferSize > Integer.MAX_VALUE || expectedTempBufferSize < 0) { - throw new IllegalStateException("Invalid temp buffer size: " + expectedTempBufferSize); - } - ByteBuffer alignedBuf = ByteBuffer.allocate((int) expectedTempBufferSize); - int sliceStartOffset = (int) (pos - startOffsetAligned); - int sliceEndOffset = sliceStartOffset + (int) size; - readInternalAligned(this, this.channel, startOffsetAligned, alignedBuf, (int) expectedTempBufferSize, sliceEndOffset, end); - var slice = alignedBuf.slice(sliceStartOffset, sliceEndOffset - sliceStartOffset); - b.put(slice.array(), slice.arrayOffset(), sliceEndOffset - sliceStartOffset); - b.limit(b.position()); - } - } catch (IOException var7) { - throw new DBException(var7.getMessage() + ": " + this, var7); - } - } - - protected void seekInternal(long pos) throws EOFException { - if (pos > this.length()) { - throw new EOFException("read past EOF: pos=" + pos + " vs length=" + this.length() + ": " + this); - } - } - } -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/DocumentStoredSingleFieldVisitor.java b/src/main/java/it/cavallium/dbengine/lucene/DocumentStoredSingleFieldVisitor.java deleted file mode 100644 index 41eb8ce..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/DocumentStoredSingleFieldVisitor.java +++ /dev/null @@ -1,59 +0,0 @@ -package it.cavallium.dbengine.lucene; - -import java.io.IOException; -import java.util.Objects; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.FieldType; -import org.apache.lucene.document.StoredField; -import org.apache.lucene.document.TextField; -import org.apache.lucene.index.FieldInfo; -import org.apache.lucene.index.StoredFieldVisitor; - -public class DocumentStoredSingleFieldVisitor extends StoredFieldVisitor { - private final Document doc = new Document(); - private final String fieldToAdd; - - public DocumentStoredSingleFieldVisitor(String fieldToAdd) { - this.fieldToAdd = fieldToAdd; - } - - public DocumentStoredSingleFieldVisitor() { - this.fieldToAdd = null; - } - - public void binaryField(FieldInfo fieldInfo, byte[] value) { - this.doc.add(new StoredField(fieldInfo.name, value)); - } - - public void stringField(FieldInfo fieldInfo, String value) { - FieldType ft = new FieldType(TextField.TYPE_STORED); - ft.setStoreTermVectors(fieldInfo.hasVectors()); - ft.setOmitNorms(fieldInfo.omitsNorms()); - ft.setIndexOptions(fieldInfo.getIndexOptions()); - this.doc.add(new StoredField(fieldInfo.name, (String)Objects.requireNonNull(value, "String value should not be null"), ft)); - } - - public void intField(FieldInfo fieldInfo, int value) { - this.doc.add(new StoredField(fieldInfo.name, value)); - } - - public void longField(FieldInfo fieldInfo, long value) { - this.doc.add(new StoredField(fieldInfo.name, value)); - } - - public void floatField(FieldInfo fieldInfo, float value) { - this.doc.add(new StoredField(fieldInfo.name, value)); - } - - public void doubleField(FieldInfo fieldInfo, double value) { - this.doc.add(new StoredField(fieldInfo.name, value)); - } - - public Status needsField(FieldInfo fieldInfo) { - return Objects.equals(this.fieldToAdd, fieldInfo.name) ? Status.YES : Status.NO; - } - - public Document getDocument() { - return this.doc; - } -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/EmptyPriorityQueue.java b/src/main/java/it/cavallium/dbengine/lucene/EmptyPriorityQueue.java deleted file mode 100644 index 49aa750..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/EmptyPriorityQueue.java +++ /dev/null @@ -1,52 +0,0 @@ -package it.cavallium.dbengine.lucene; - -import java.util.stream.Stream; - -public class EmptyPriorityQueue implements PriorityQueue { - - @Override - public void add(T element) { - throw new UnsupportedOperationException(); - } - - @Override - public T top() { - return null; - } - - @Override - public T pop() { - return null; - } - - @Override - public void replaceTop(T oldTop, T newTop) { - assert oldTop == null; - assert newTop == null; - } - - @Override - public long size() { - return 0; - } - - @Override - public void clear() { - - } - - @Override - public boolean remove(T element) { - throw new UnsupportedOperationException(); - } - - @Override - public Stream iterate() { - return Stream.empty(); - } - - @Override - public void close() { - - } -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/FieldValueHitQueue.java b/src/main/java/it/cavallium/dbengine/lucene/FieldValueHitQueue.java deleted file mode 100644 index d8cfc5d..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/FieldValueHitQueue.java +++ /dev/null @@ -1,20 +0,0 @@ -package it.cavallium.dbengine.lucene; - -import java.io.IOException; -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.search.FieldComparator; -import org.apache.lucene.search.LeafFieldComparator; -import org.apache.lucene.search.SortField; - -public interface FieldValueHitQueue { - - FieldComparator[] getComparators(); - - int[] getReverseMul(); - - LeafFieldComparator[] getComparators(LeafReaderContext context); - - LLFieldDoc fillFields(LLSlotDoc entry); - - SortField[] getFields(); -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/FullDocs.java b/src/main/java/it/cavallium/dbengine/lucene/FullDocs.java deleted file mode 100644 index 4d88e30..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/FullDocs.java +++ /dev/null @@ -1,201 +0,0 @@ -package it.cavallium.dbengine.lucene; - -import static it.cavallium.dbengine.lucene.LLDocElementScoreComparator.SCORE_DOC_SCORE_ELEM_COMPARATOR; -import static it.cavallium.dbengine.utils.StreamUtils.mergeComparing; -import static org.apache.lucene.search.TotalHits.Relation.EQUAL_TO; -import static org.apache.lucene.search.TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO; - -import it.cavallium.dbengine.lucene.collector.FullFieldDocs; -import it.cavallium.dbengine.utils.SimpleResource; -import java.util.Comparator; -import java.util.stream.Stream; -import org.apache.lucene.search.FieldComparator; -import org.apache.lucene.search.Pruning; -import org.apache.lucene.search.Sort; -import org.apache.lucene.search.SortField; -import org.apache.lucene.search.TotalHits; -import org.apache.lucene.search.TotalHits.Relation; -import org.jetbrains.annotations.Nullable; - -public interface FullDocs extends ResourceIterable { - - Comparator SHARD_INDEX_TIE_BREAKER = Comparator.comparingInt(LLDoc::shardIndex); - Comparator DOC_ID_TIE_BREAKER = Comparator.comparingInt(LLDoc::doc); - Comparator DEFAULT_TIE_BREAKER = SHARD_INDEX_TIE_BREAKER.thenComparing(DOC_ID_TIE_BREAKER); - - @Override - Stream iterate(); - - @Override - Stream iterate(long skips); - - TotalHits totalHits(); - - static FullDocs merge(@Nullable Sort sort, FullDocs[] fullDocs) { - ResourceIterable mergedIterable = mergeResourceIterable(sort, fullDocs); - TotalHits mergedTotalHits = mergeTotalHits(fullDocs); - FullDocs docs = new MergedFullDocs<>(mergedIterable, mergedTotalHits); - if (sort != null) { - return new FullFieldDocs<>(docs, sort.getSort()); - } else { - return docs; - } - } - - static int tieBreakCompare( - T firstDoc, - T secondDoc, - Comparator tieBreaker) { - assert tieBreaker != null; - - int value = tieBreaker.compare(firstDoc, secondDoc); - if (value == 0) { - throw new IllegalStateException(); - } else { - return value; - } - } - - static ResourceIterable mergeResourceIterable( - @Nullable Sort sort, - FullDocs[] fullDocs) { - return new MergedResourceIterable<>(fullDocs, sort); - } - - static TotalHits mergeTotalHits(FullDocs[] fullDocs) { - long totalCount = 0; - Relation totalRelation = EQUAL_TO; - for (FullDocs fullDoc : fullDocs) { - var totalHits = fullDoc.totalHits(); - totalCount += totalHits.value; - totalRelation = switch (totalHits.relation) { - case EQUAL_TO -> totalRelation; - case GREATER_THAN_OR_EQUAL_TO -> totalRelation == EQUAL_TO ? GREATER_THAN_OR_EQUAL_TO : totalRelation; - }; - } - return new TotalHits(totalCount, totalRelation); - } - - class MergedResourceIterable extends SimpleResource implements ResourceIterable { - - private final FullDocs[] fullDocs; - private final @Nullable Sort sort; - - public MergedResourceIterable(FullDocs[] fullDocs, @Nullable Sort sort) { - this.fullDocs = fullDocs; - this.sort = sort; - } - - @Override - protected void onClose() { - for (FullDocs fullDoc : fullDocs) { - fullDoc.close(); - } - } - - @Override - public Stream iterate() { - @SuppressWarnings("unchecked") Stream[] iterables = new Stream[fullDocs.length]; - - for (int i = 0; i < fullDocs.length; i++) { - var singleFullDocs = fullDocs[i].iterate(); - iterables[i] = singleFullDocs; - } - - Comparator comp; - if (sort == null) { - // Merge maintaining sorting order (Algorithm taken from TopDocs.ScoreMergeSortQueue) - - comp = SCORE_DOC_SCORE_ELEM_COMPARATOR.thenComparing(DEFAULT_TIE_BREAKER); - } else { - // Merge maintaining sorting order (Algorithm taken from TopDocs.MergeSortQueue) - - SortField[] sortFields = sort.getSort(); - var comparators = new FieldComparator[sortFields.length]; - var reverseMul = new int[sortFields.length]; - - for (int compIDX = 0; compIDX < sortFields.length; ++compIDX) { - SortField sortField = sortFields[compIDX]; - comparators[compIDX] = sortField.getComparator(1, Pruning.NONE); - reverseMul[compIDX] = sortField.getReverse() ? -1 : 1; - } - - comp = (first, second) -> { - assert first != second; - - LLFieldDoc firstFD = (LLFieldDoc) first; - LLFieldDoc secondFD = (LLFieldDoc) second; - - for (int compIDX = 0; compIDX < comparators.length; ++compIDX) { - //noinspection rawtypes - FieldComparator fieldComp = comparators[compIDX]; - //noinspection unchecked - int cmp = reverseMul[compIDX] * fieldComp.compareValues(firstFD.fields().get(compIDX), - secondFD.fields().get(compIDX) - ); - if (cmp != 0) { - return cmp; - } - } - - return tieBreakCompare(first, second, DEFAULT_TIE_BREAKER); - }; - } - - @SuppressWarnings("unchecked") Stream[] fluxes = new Stream[fullDocs.length]; - for (int i = 0; i < iterables.length; i++) { - var shardIndex = i; - fluxes[i] = iterables[i].map(shard -> { - if (shard instanceof LLScoreDoc scoreDoc) { - //noinspection unchecked - return (T) new LLScoreDoc(scoreDoc.doc(), scoreDoc.score(), shardIndex); - } else if (shard instanceof LLFieldDoc fieldDoc) { - //noinspection unchecked - return (T) new LLFieldDoc(fieldDoc.doc(), fieldDoc.score(), shardIndex, fieldDoc.fields()); - } else if (shard instanceof LLSlotDoc slotDoc) { - //noinspection unchecked - return (T) new LLSlotDoc(slotDoc.doc(), slotDoc.score(), shardIndex, slotDoc.slot()); - } else { - throw new UnsupportedOperationException("Unsupported type " + (shard == null ? null : shard.getClass())); - } - }); - if (fullDocs[i].totalHits().relation == EQUAL_TO) { - fluxes[i] = fluxes[i].limit(fullDocs[i].totalHits().value); - } - } - - return mergeComparing(comp, fluxes); - } - } - - class MergedFullDocs extends SimpleResource implements FullDocs { - - private final ResourceIterable mergedIterable; - private final TotalHits mergedTotalHits; - - public MergedFullDocs(ResourceIterable mergedIterable, TotalHits mergedTotalHits) { - this.mergedIterable = mergedIterable; - this.mergedTotalHits = mergedTotalHits; - } - - @Override - public void onClose() { - mergedIterable.close(); - } - - @Override - public Stream iterate() { - return mergedIterable.iterate(); - } - - @Override - public Stream iterate(long skips) { - return mergedIterable.iterate(skips); - } - - @Override - public TotalHits totalHits() { - return mergedTotalHits; - } - } -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/IArray.java b/src/main/java/it/cavallium/dbengine/lucene/IArray.java deleted file mode 100644 index 0621809..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/IArray.java +++ /dev/null @@ -1,20 +0,0 @@ -package it.cavallium.dbengine.lucene; - -import java.util.Objects; -import org.jetbrains.annotations.Nullable; - -public interface IArray { - - @Nullable T get(long index); - - void set(long index, @Nullable T value); - - void reset(long index); - - long size(); - - default T getOrDefault(int slot, T defaultValue) { - return Objects.requireNonNullElse(get(slot), defaultValue); - } - -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/IntSmear.java b/src/main/java/it/cavallium/dbengine/lucene/IntSmear.java deleted file mode 100644 index b3b770e..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/IntSmear.java +++ /dev/null @@ -1,29 +0,0 @@ -package it.cavallium.dbengine.lucene; - -import it.unimi.dsi.fastutil.ints.IntHash; - -public class IntSmear implements IntHash.Strategy { - - @Override - public int hashCode(int e) { - return smear(e); - } - - /* - * This method was written by Doug Lea with assistance from members of JCP - * JSR-166 Expert Group and released to the public domain, as explained at - * http://creativecommons.org/licenses/publicdomain - * - * As of 2010/06/11, this method is identical to the (package private) hash - * method in OpenJDK 7's java.util.HashMap class. - */ - static int smear(int hashCode) { - hashCode ^= (hashCode >>> 20) ^ (hashCode >>> 12); - return hashCode ^ (hashCode >>> 7) ^ (hashCode >>> 4); - } - - @Override - public boolean equals(int a, int b) { - return a == b; - } -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/LLDoc.java b/src/main/java/it/cavallium/dbengine/lucene/LLDoc.java deleted file mode 100644 index 95162fb..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/LLDoc.java +++ /dev/null @@ -1,10 +0,0 @@ -package it.cavallium.dbengine.lucene; - -public sealed interface LLDoc permits LLSlotDoc, LLFieldDoc, LLScoreDoc { - - int doc(); - - float score(); - - int shardIndex(); -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/LLDocElementScoreComparator.java b/src/main/java/it/cavallium/dbengine/lucene/LLDocElementScoreComparator.java deleted file mode 100644 index a8b21bd..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/LLDocElementScoreComparator.java +++ /dev/null @@ -1,13 +0,0 @@ -package it.cavallium.dbengine.lucene; - -import java.util.Comparator; - -class LLDocElementScoreComparator implements Comparator { - - public static final Comparator SCORE_DOC_SCORE_ELEM_COMPARATOR = new LLDocElementScoreComparator(); - - @Override - public int compare(LLDoc hitA, LLDoc hitB) { - return Float.compare(hitB.score(), hitA.score()); - } -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/LLFieldDoc.java b/src/main/java/it/cavallium/dbengine/lucene/LLFieldDoc.java deleted file mode 100644 index 7c396d6..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/LLFieldDoc.java +++ /dev/null @@ -1,19 +0,0 @@ -package it.cavallium.dbengine.lucene; - -import java.util.List; -import java.util.Objects; -import java.util.stream.Collectors; -import org.apache.lucene.search.FieldDoc; - -public record LLFieldDoc(int doc, float score, int shardIndex, List fields) implements LLDoc { - - @Override - public String toString() { - return "doc=" + doc + " score=" + score + " shardIndex=" + shardIndex + " fields="+ fields.stream() - .map(Objects::toString).collect(Collectors.joining(",", "[", "]")); - } - - public FieldDoc toFieldDoc() { - return new FieldDoc(doc, score, fields.toArray(Object[]::new), shardIndex); - } -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/LLScoreDoc.java b/src/main/java/it/cavallium/dbengine/lucene/LLScoreDoc.java deleted file mode 100644 index 71ba36d..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/LLScoreDoc.java +++ /dev/null @@ -1,10 +0,0 @@ -package it.cavallium.dbengine.lucene; - -import org.apache.lucene.search.ScoreDoc; - -public record LLScoreDoc(int doc, float score, int shardIndex) implements LLDoc { - - public ScoreDoc toScoreDoc() { - return new ScoreDoc(doc, score, shardIndex); - } -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/LLSlotDoc.java b/src/main/java/it/cavallium/dbengine/lucene/LLSlotDoc.java deleted file mode 100644 index 79dc5b3..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/LLSlotDoc.java +++ /dev/null @@ -1,24 +0,0 @@ -package it.cavallium.dbengine.lucene; - -import org.apache.lucene.search.FieldComparator; -import org.apache.lucene.search.FieldValueHitQueue.Entry; -import org.apache.lucene.search.ScoreDoc; - -/** Extension of ScoreDoc to also store the {@link FieldComparator} slot. */ -public record LLSlotDoc(int doc, float score, int shardIndex, int slot) implements LLDoc { - - public ScoreDoc toScoreDoc() { - return new ScoreDoc(doc, score, shardIndex); - } - - public ScoreDoc toEntry() { - var entry = new Entry(doc, slot); - entry.shardIndex = shardIndex; - return entry; - } - - @Override - public String toString() { - return "slot:" + slot + " doc=" + doc + " score=" + score + " shardIndex=" + shardIndex; - } -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/LazyFullDocs.java b/src/main/java/it/cavallium/dbengine/lucene/LazyFullDocs.java deleted file mode 100644 index 2bdbd58..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/LazyFullDocs.java +++ /dev/null @@ -1,36 +0,0 @@ -package it.cavallium.dbengine.lucene; - -import it.cavallium.dbengine.utils.SimpleResource; -import java.util.stream.Stream; -import org.apache.lucene.search.TotalHits; - -public class LazyFullDocs extends SimpleResource implements FullDocs { - - private final ResourceIterable pq; - private final TotalHits totalHits; - - public LazyFullDocs(ResourceIterable pq, TotalHits totalHits) { - this.pq = pq; - this.totalHits = totalHits; - } - - @Override - public Stream iterate() { - return pq.iterate(); - } - - @Override - public Stream iterate(long skips) { - return pq.iterate(skips); - } - - @Override - public TotalHits totalHits() { - return totalHits; - } - - @Override - protected void onClose() { - pq.close(); - } -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/LinearPageLimits.java b/src/main/java/it/cavallium/dbengine/lucene/LinearPageLimits.java deleted file mode 100644 index 6850d07..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/LinearPageLimits.java +++ /dev/null @@ -1,38 +0,0 @@ -package it.cavallium.dbengine.lucene; - -/** - *
y = (x * factor) + firstPageLimit
- */ -public class LinearPageLimits implements PageLimits { - - private static final double DEFAULT_FACTOR = 0.5d; - - private final double factor; - private final double firstPageLimit; - private final double maxItemsPerPage; - - public LinearPageLimits() { - this(DEFAULT_FACTOR, DEFAULT_MIN_ITEMS_PER_PAGE); - } - - public LinearPageLimits(double factor) { - this(factor, DEFAULT_MIN_ITEMS_PER_PAGE); - } - - public LinearPageLimits(double factor, int firstPageLimit) { - this(factor, firstPageLimit, DEFAULT_MAX_ITEMS_PER_PAGE); - } - - public LinearPageLimits(double factor, int firstPageLimit, int maxItemsPerPage) { - this.factor = factor; - this.firstPageLimit = firstPageLimit; - this.maxItemsPerPage = maxItemsPerPage; - } - - @Override - public int getPageLimit(int pageIndex) { - double min = Math.min(maxItemsPerPage, firstPageLimit + (pageIndex * factor)); - assert min > 0d; - return (int) min; - } -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/LuceneCloseable.java b/src/main/java/it/cavallium/dbengine/lucene/LuceneCloseable.java deleted file mode 100644 index b4c968d..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/LuceneCloseable.java +++ /dev/null @@ -1,8 +0,0 @@ -package it.cavallium.dbengine.lucene; - -import it.cavallium.dbengine.database.SafeCloseable; - -/** - * This closeable should be run on a lucene thread - */ -public interface LuceneCloseable extends SafeCloseable {} diff --git a/src/main/java/it/cavallium/dbengine/lucene/LuceneConcurrentMergeScheduler.java b/src/main/java/it/cavallium/dbengine/lucene/LuceneConcurrentMergeScheduler.java deleted file mode 100644 index ca7006d..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/LuceneConcurrentMergeScheduler.java +++ /dev/null @@ -1,33 +0,0 @@ -package it.cavallium.dbengine.lucene; - -import java.io.IOException; -import org.apache.lucene.index.ConcurrentMergeScheduler; -import org.apache.lucene.index.MergePolicy.OneMerge; - -public class LuceneConcurrentMergeScheduler extends ConcurrentMergeScheduler { - - public LuceneConcurrentMergeScheduler() { - super(); - } - - @Override - protected synchronized MergeThread getMergeThread(MergeSource mergeSource, OneMerge merge) { - final MergeThread thread = new LuceneMergeThread(mergeSource, merge); - thread.setDaemon(true); - thread.setName("lucene-merge-" + mergeThreadCount++); - return thread; - } - - public class LuceneMergeThread extends MergeThread { - - /** - * Sole constructor. - * - * @param mergeSource - * @param merge - */ - public LuceneMergeThread(MergeSource mergeSource, OneMerge merge) { - super(mergeSource, merge); - } - } -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/LuceneHacks.java b/src/main/java/it/cavallium/dbengine/lucene/LuceneHacks.java deleted file mode 100644 index e8a10e7..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/LuceneHacks.java +++ /dev/null @@ -1,10 +0,0 @@ -package it.cavallium.dbengine.lucene; - -import it.cavallium.dbengine.lucene.searcher.LocalSearcher; -import it.cavallium.dbengine.lucene.searcher.MultiSearcher; -import java.util.function.Supplier; -import org.jetbrains.annotations.NotNull; -import org.jetbrains.annotations.Nullable; - -public record LuceneHacks(@Nullable Supplier<@NotNull LocalSearcher> customLocalSearcher, - @Nullable Supplier<@NotNull MultiSearcher> customMultiSearcher) {} diff --git a/src/main/java/it/cavallium/dbengine/lucene/LuceneThread.java b/src/main/java/it/cavallium/dbengine/lucene/LuceneThread.java deleted file mode 100644 index 323450b..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/LuceneThread.java +++ /dev/null @@ -1,10 +0,0 @@ -package it.cavallium.dbengine.lucene; - -import org.jetbrains.annotations.NotNull; - -public class LuceneThread extends Thread { - - public LuceneThread(ThreadGroup group, @NotNull Runnable runnable, String name, int stackSize) { - super(group, runnable, name, stackSize); - } -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/LuceneUtils.java b/src/main/java/it/cavallium/dbengine/lucene/LuceneUtils.java deleted file mode 100644 index f82dbf9..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/LuceneUtils.java +++ /dev/null @@ -1,772 +0,0 @@ -package it.cavallium.dbengine.lucene; - -import static it.cavallium.dbengine.lucene.searcher.GlobalQueryRewrite.NO_REWRITE; - -import com.google.common.collect.HashMultimap; -import com.google.common.collect.Multimap; -import it.cavallium.datagen.nativedata.Nullabledouble; -import it.cavallium.datagen.nativedata.Nullableint; -import it.cavallium.datagen.nativedata.Nullablelong; -import it.cavallium.dbengine.client.CompositeSnapshot; -import it.cavallium.dbengine.client.query.QueryParser; -import it.cavallium.dbengine.client.query.current.data.NoSort; -import it.cavallium.dbengine.client.query.current.data.QueryParams; -import it.cavallium.dbengine.client.query.current.data.TotalHitsCount; -import it.cavallium.dbengine.database.LLKeyScore; -import it.cavallium.dbengine.database.LLTerm; -import it.cavallium.dbengine.database.LLUtils; -import it.cavallium.dbengine.database.collections.DatabaseMapDictionaryDeep; -import it.cavallium.dbengine.database.collections.DatabaseStageEntry; -import it.cavallium.dbengine.database.collections.DatabaseStageMap; -import it.cavallium.dbengine.database.collections.ValueGetter; -import it.cavallium.dbengine.database.disk.LLIndexSearcher; -import it.cavallium.dbengine.database.disk.LLIndexSearchers; -import it.cavallium.dbengine.lucene.LuceneConcurrentMergeScheduler.LuceneMergeThread; -import it.cavallium.dbengine.lucene.analyzer.LegacyWordAnalyzer; -import it.cavallium.dbengine.lucene.analyzer.NCharGramAnalyzer; -import it.cavallium.dbengine.lucene.analyzer.NCharGramEdgeAnalyzer; -import it.cavallium.dbengine.lucene.analyzer.TextFieldsAnalyzer; -import it.cavallium.dbengine.lucene.analyzer.TextFieldsSimilarity; -import it.cavallium.dbengine.lucene.analyzer.WordAnalyzer; -import it.cavallium.dbengine.lucene.mlt.BigCompositeReader; -import it.cavallium.dbengine.lucene.mlt.MultiMoreLikeThis; -import it.cavallium.dbengine.lucene.searcher.GlobalQueryRewrite; -import it.cavallium.dbengine.lucene.searcher.LocalQueryParams; -import it.cavallium.dbengine.lucene.searcher.LocalSearcher; -import it.cavallium.dbengine.lucene.searcher.LuceneSearchResult; -import it.cavallium.dbengine.lucene.searcher.MultiSearcher; -import it.cavallium.dbengine.lucene.similarity.NGramSimilarity; -import it.cavallium.dbengine.rpc.current.data.ByteBuffersDirectory; -import it.cavallium.dbengine.rpc.current.data.DirectIOFSDirectory; -import it.cavallium.dbengine.rpc.current.data.IndicizerAnalyzers; -import it.cavallium.dbengine.rpc.current.data.IndicizerSimilarities; -import it.cavallium.dbengine.rpc.current.data.LuceneDirectoryOptions; -import it.cavallium.dbengine.rpc.current.data.LuceneIndexStructure; -import it.cavallium.dbengine.rpc.current.data.LuceneOptions; -import it.cavallium.dbengine.rpc.current.data.MemoryMappedFSDirectory; -import it.cavallium.dbengine.rpc.current.data.NIOFSDirectory; -import it.cavallium.dbengine.rpc.current.data.NRTCachingDirectory; -import it.cavallium.dbengine.rpc.current.data.RAFFSDirectory; -import it.cavallium.dbengine.utils.DBException; -import it.unimi.dsi.fastutil.ints.IntArrayList; -import it.unimi.dsi.fastutil.ints.IntList; -import it.unimi.dsi.fastutil.objects.Object2ObjectSortedMap; -import java.io.EOFException; -import java.io.IOException; -import java.nio.ByteBuffer; -import java.nio.channels.FileChannel; -import java.nio.file.Path; -import java.time.Duration; -import java.util.Collection; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.NoSuchElementException; -import java.util.Optional; -import java.util.function.Function; -import java.util.stream.Collectors; -import java.util.stream.Stream; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.CharArraySet; -import org.apache.lucene.analysis.en.EnglishAnalyzer; -import org.apache.lucene.analysis.it.ItalianAnalyzer; -import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper; -import org.apache.lucene.analysis.standard.StandardAnalyzer; -import org.apache.lucene.document.Document; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.IndexableField; -import org.apache.lucene.index.MergePolicy; -import org.apache.lucene.index.TieredMergePolicy; -import org.apache.lucene.misc.store.DirectIODirectory; -import org.apache.lucene.misc.store.RAFDirectory; -import org.apache.lucene.search.BooleanClause.Occur; -import org.apache.lucene.search.BooleanQuery.Builder; -import org.apache.lucene.search.Collector; -import org.apache.lucene.search.ConstantScoreQuery; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.MatchAllDocsQuery; -import org.apache.lucene.search.MatchNoDocsQuery; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.ScoreDoc; -import org.apache.lucene.search.Sort; -import org.apache.lucene.search.TimeLimitingCollector; -import org.apache.lucene.search.TopDocs; -import org.apache.lucene.search.TopFieldDocs; -import org.apache.lucene.search.TotalHits; -import org.apache.lucene.search.similarities.BooleanSimilarity; -import org.apache.lucene.search.similarities.ClassicSimilarity; -import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper; -import org.apache.lucene.search.similarities.Similarity; -import org.apache.lucene.search.similarities.TFIDFSimilarity; -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.FSDirectory; -import org.apache.lucene.store.MMapDirectory; -import org.apache.lucene.util.Constants; -import org.apache.lucene.util.StringHelper; -import org.jetbrains.annotations.NotNull; -import org.jetbrains.annotations.Nullable; -import org.novasearch.lucene.search.similarities.BM25Similarity; -import org.novasearch.lucene.search.similarities.BM25Similarity.BM25Model; -import org.novasearch.lucene.search.similarities.LdpSimilarity; -import org.novasearch.lucene.search.similarities.LtcSimilarity; -import org.novasearch.lucene.search.similarities.RobertsonSimilarity; - -public class LuceneUtils { - - private static final Logger logger = LogManager.getLogger(LuceneUtils.class); - - private static final Analyzer luceneEdge4GramAnalyzerEdgeInstance = new NCharGramEdgeAnalyzer(4, 4); - private static final Analyzer lucene4GramAnalyzerInstance = new NCharGramAnalyzer(4, 4); - private static final Analyzer luceneEdge3To5GramAnalyzerEdgeInstance = new NCharGramEdgeAnalyzer(3, 5); - private static final Analyzer lucene3To5GramAnalyzerInstance = new NCharGramAnalyzer(3, 5); - private static final Analyzer luceneStandardAnalyzerInstance = new StandardAnalyzer(); - private static final Analyzer luceneWordAnalyzerLegacy1Instance = new LegacyWordAnalyzer(false, true, true); - private static final Analyzer luceneWordAnalyzerLegacy2Instance = new LegacyWordAnalyzer(false, false, true); - private static final Analyzer luceneWordAnalyzerLegacy3Instance = new LegacyWordAnalyzer(false, true, true); - private static final Analyzer luceneWordAnalyzerStemInstance = new WordAnalyzer(false,true); - private static final Analyzer luceneWordAnalyzerSimpleInstance = new WordAnalyzer(false, false); - private static final Analyzer luceneICUCollationKeyInstance = new WordAnalyzer(true, true); - private static final Similarity luceneBM25StandardSimilarityInstance = new org.apache.lucene.search.similarities.BM25Similarity(); - private static final Similarity luceneBM25ClassicSimilarityInstance = new BM25Similarity(BM25Model.CLASSIC); - private static final Similarity luceneBM25PlusSimilarityInstance = new BM25Similarity(BM25Model.PLUS); - private static final Similarity luceneBM25LSimilarityInstance = new BM25Similarity(BM25Model.L); - private static final Similarity luceneBM15PlusSimilarityInstance = new BM25Similarity(1.2f, 0.0f, 0.5f, BM25Model.PLUS); - private static final Similarity luceneBM11PlusSimilarityInstance = new BM25Similarity(1.2f, 1.0f, 0.5f, BM25Model.PLUS); - private static final Similarity luceneBM25ClassicNGramSimilarityInstance = NGramSimilarity.bm25(BM25Model.CLASSIC); - private static final Similarity luceneBM25PlusNGramSimilarityInstance = NGramSimilarity.bm25(BM25Model.PLUS); - private static final Similarity luceneBM25LNGramSimilarityInstance = NGramSimilarity.bm25(BM25Model.L); - private static final Similarity luceneBM15PlusNGramSimilarityInstance = NGramSimilarity.bm15(BM25Model.PLUS); - private static final Similarity luceneBM11PlusNGramSimilarityInstance = NGramSimilarity.bm11(BM25Model.PLUS); - private static final Similarity luceneClassicSimilarityInstance = new ClassicSimilarity(); - private static final Similarity luceneClassicNGramSimilarityInstance = NGramSimilarity.classic(); - private static final Similarity luceneLTCSimilarityInstance = new LtcSimilarity(); - private static final Similarity luceneLDPSimilarityInstance = new LdpSimilarity(); - private static final Similarity luceneLDPNoLengthSimilarityInstance = new LdpSimilarity(0, 0.5f); - private static final Similarity luceneBooleanSimilarityInstance = new BooleanSimilarity(); - private static final Similarity luceneRobertsonSimilarityInstance = new RobertsonSimilarity(); - // TODO: remove this default page limits and make the limits configurable into QueryParams - private static final PageLimits DEFAULT_PAGE_LIMITS = new ExponentialPageLimits(); - private static final CharArraySet ENGLISH_AND_ITALIAN_STOP_WORDS; - private static final LuceneIndexStructure SINGLE_STRUCTURE = new LuceneIndexStructure(1, IntList.of(0)); - private static final it.cavallium.dbengine.rpc.current.data.TieredMergePolicy DEFAULT_MERGE_POLICY = new it.cavallium.dbengine.rpc.current.data.TieredMergePolicy( - Nullabledouble.empty(), - Nullabledouble.empty(), - Nullableint.empty(), - Nullablelong.empty(), - Nullablelong.empty(), - Nullabledouble.empty(), - Nullablelong.empty(), - Nullabledouble.empty() - ); - - static { - var cas = new CharArraySet( - EnglishAnalyzer.ENGLISH_STOP_WORDS_SET.size() + ItalianAnalyzer.getDefaultStopSet().size(), true); - cas.addAll(EnglishAnalyzer.ENGLISH_STOP_WORDS_SET); - cas.addAll(ItalianAnalyzer.getDefaultStopSet()); - ENGLISH_AND_ITALIAN_STOP_WORDS = CharArraySet.unmodifiableSet(cas); - } - - @SuppressWarnings("DuplicatedCode") - public static Analyzer getAnalyzer(TextFieldsAnalyzer analyzer) { - return switch (analyzer) { - case N4Gram -> lucene4GramAnalyzerInstance; - case N4GramEdge -> luceneEdge4GramAnalyzerEdgeInstance; - case N3To5Gram -> lucene3To5GramAnalyzerInstance; - case N3To5GramEdge -> luceneEdge3To5GramAnalyzerEdgeInstance; - case Standard -> luceneStandardAnalyzerInstance; - case StandardMultilanguage -> luceneWordAnalyzerStemInstance; - case LegacyFullText -> luceneWordAnalyzerLegacy1Instance; - case LegacyWordWithStemming -> luceneWordAnalyzerLegacy2Instance; - case LegacyICU -> luceneWordAnalyzerLegacy3Instance; - case StandardSimple -> luceneWordAnalyzerSimpleInstance; - case ICUCollationKey -> luceneICUCollationKeyInstance; - //noinspection UnnecessaryDefault - default -> throw new UnsupportedOperationException("Unknown analyzer: " + analyzer); - }; - } - - @SuppressWarnings("DuplicatedCode") - public static Similarity getSimilarity(TextFieldsSimilarity similarity) { - return switch (similarity) { - case BM25Standard -> luceneBM25StandardSimilarityInstance; - case BM25Classic -> luceneBM25ClassicSimilarityInstance; - case NGramBM25Classic -> luceneBM25ClassicNGramSimilarityInstance; - case BM25L -> luceneBM25LSimilarityInstance; - case NGramBM25L -> luceneBM25LNGramSimilarityInstance; - case Classic -> luceneClassicSimilarityInstance; - case NGramClassic -> luceneClassicNGramSimilarityInstance; - case BM25Plus -> luceneBM25PlusSimilarityInstance; - case NGramBM25Plus -> luceneBM25PlusNGramSimilarityInstance; - case BM15Plus -> luceneBM15PlusSimilarityInstance; - case NGramBM15Plus -> luceneBM15PlusNGramSimilarityInstance; - case BM11Plus -> luceneBM11PlusSimilarityInstance; - case NGramBM11Plus -> luceneBM11PlusNGramSimilarityInstance; - case LTC -> luceneLTCSimilarityInstance; - case LDP -> luceneLDPSimilarityInstance; - case LDPNoLength -> luceneLDPNoLengthSimilarityInstance; - case Robertson -> luceneRobertsonSimilarityInstance; - case Boolean -> luceneBooleanSimilarityInstance; - //noinspection UnnecessaryDefault - default -> throw new IllegalStateException("Unknown similarity: " + similarity); - }; - } - - /** - * @throws NoSuchElementException when the key is not found - * @throws IOException when an error occurs when reading the document - */ - @NotNull - public static IndexableField keyOfTopDoc(int docId, IndexReader indexReader, - String keyFieldName) throws NoSuchElementException, IOException { - if (LLUtils.isInNonBlockingThread()) { - throw new UnsupportedOperationException("Called keyOfTopDoc in a nonblocking thread"); - } - if (docId > indexReader.maxDoc()) { - throw new DBException("Document " + docId + " > maxDoc (" +indexReader.maxDoc() + ")"); - } - DocumentStoredSingleFieldVisitor visitor = new DocumentStoredSingleFieldVisitor(keyFieldName); - indexReader.document(docId, visitor); - Document d = visitor.getDocument(); - if (d.getFields().isEmpty()) { - throw new NoSuchElementException( - "Can't get key (field \"" + keyFieldName + "\") of document docId: " + docId + ". Available fields: []"); - } else { - var field = d.getField(keyFieldName); - if (field == null) { - throw new NoSuchElementException( - "Can't get key (field \"" + keyFieldName + "\") of document docId: " + docId + ". Available fields: " + d - .getFields() - .stream() - .map(IndexableField::name) - .collect(Collectors.joining(",", "[", "]"))); - } else { - return field; - } - } - } - - public static ValueGetter, V> getAsyncDbValueGetterDeep( - CompositeSnapshot snapshot, - DatabaseMapDictionaryDeep, ? extends DatabaseStageMap>> dictionaryDeep) { - return entry -> dictionaryDeep.at(snapshot, entry.getKey()).getValue(snapshot, entry.getValue()); - } - - public static PerFieldAnalyzerWrapper toPerFieldAnalyzerWrapper(IndicizerAnalyzers indicizerAnalyzers) { - HashMap perFieldAnalyzer = new HashMap<>(); - indicizerAnalyzers - .fieldAnalyzer() - .forEach((key, value) -> perFieldAnalyzer.put(key, LuceneUtils.getAnalyzer(value))); - return new PerFieldAnalyzerWrapper(LuceneUtils.getAnalyzer(indicizerAnalyzers.defaultAnalyzer()), perFieldAnalyzer); - } - - public static PerFieldSimilarityWrapper toPerFieldSimilarityWrapper(IndicizerSimilarities indicizerSimilarities) { - HashMap perFieldSimilarity = new HashMap<>(); - indicizerSimilarities - .fieldSimilarity() - .forEach((key, value) -> perFieldSimilarity.put(key, LuceneUtils.getSimilarity(value))); - var defaultSimilarity = LuceneUtils.getSimilarity(indicizerSimilarities.defaultSimilarity()); - return new PerFieldSimilarityWrapper() { - - @Override - public Similarity get(String name) { - return perFieldSimilarity.getOrDefault(name, defaultSimilarity); - } - }; - } - - public static int alignUnsigned(int number, boolean expand) { - if (number % 4096 != 0) { - if (expand) { - return number + (4096 - (number % 4096)); - } else { - return number - (number % 4096); - } - } else { - return number; - } - } - - public static long alignUnsigned(long number, boolean expand) { - if (number % 4096L != 0) { - if (expand) { - return number + (4096L - (number % 4096L)); - } else { - return number - (number % 4096L); - } - } else { - return number; - } - } - - public static void readInternalAligned(Object ref, - FileChannel channel, - long pos, - ByteBuffer b, - int readLength, - int usefulLength, - long end) throws IOException { - if (LLUtils.isInNonBlockingThread()) { - throw new UnsupportedOperationException("Called readInternalAligned in a nonblocking thread"); - } - int startBufPosition = b.position(); - int readData = 0; - int i; - for(; readLength > 0; readLength -= i) { - int toRead = readLength; - b.limit(b.position() + toRead); - - assert b.remaining() == toRead; - - var beforeReadBufPosition = b.position(); - channel.read(b, pos); - b.limit(Math.min(startBufPosition + usefulLength, b.position() + toRead)); - var afterReadBufPosition = b.position(); - i = (afterReadBufPosition - beforeReadBufPosition); - readData += i; - - if (i < toRead && i > 0) { - if (readData < usefulLength) { - throw new EOFException("read past EOF: " + ref + " buffer: " + b + " chunkLen: " + toRead + " end: " + end); - } - if (readData == usefulLength) { - b.limit(b.position()); - // File end reached - return; - } - } - - if (i < 0) { - throw new EOFException("read past EOF: " + ref + " buffer: " + b + " chunkLen: " + toRead + " end: " + end); - } - - assert i > 0 : "FileChannel.read with non zero-length bb.remaining() must always read at least one byte (FileChannel is in blocking mode, see spec of ReadableByteChannel)"; - - pos += i; - } - - assert readLength == 0; - } - - public static int safeLongToInt(long l) { - if (l > 2147483630) { - return 2147483630; - } else if (l < -2147483630) { - return -2147483630; - } else { - return (int) l; - } - } - - @Nullable - public static ScoreDoc getLastScoreDoc(ScoreDoc[] scoreDocs) { - if (scoreDocs == null) { - return null; - } - if (scoreDocs.length == 0) { - return null; - } - return scoreDocs[scoreDocs.length - 1]; - } - - public static LocalQueryParams toLocalQueryParams(QueryParams queryParams, Analyzer analyzer) { - return new LocalQueryParams(QueryParser.toQuery(queryParams.query(), analyzer), - queryParams.offset(), - queryParams.limit(), - DEFAULT_PAGE_LIMITS, - QueryParser.toSort(queryParams.sort()), - queryParams.computePreciseHitsCount(), - Duration.ofMillis(queryParams.timeoutMilliseconds()) - ); - } - - public static Stream convertHits(Stream hitsFlux, - List indexSearchers, - @Nullable String keyFieldName) { - return hitsFlux.mapMulti((hit, sink) -> { - var mapped = mapHitBlocking(hit, indexSearchers, keyFieldName); - if (mapped != null) { - sink.accept(mapped); - } - }); - } - - @Nullable - private static LLKeyScore mapHitBlocking(ScoreDoc hit, - List indexSearchers, - @Nullable String keyFieldName) { - assert !LLUtils.isInNonBlockingThread(); - int shardDocId = hit.doc; - int shardIndex = hit.shardIndex; - float score = hit.score; - IndexSearcher indexSearcher; - if (shardIndex == -1 && indexSearchers.size() == 1) { - indexSearcher = indexSearchers.get(0); - } else { - indexSearcher = indexSearchers.get(shardIndex); - } - try { - IndexableField collectedDoc; - if (keyFieldName != null) { - collectedDoc = keyOfTopDoc(shardDocId, indexSearcher.getIndexReader(), keyFieldName); - } else { - collectedDoc = null; - } - return new LLKeyScore(shardDocId, shardIndex, score, collectedDoc); - } catch (NoSuchElementException ex) { - logger.debug("Error: document {} key is not present!", shardDocId); - return null; - } catch (Exception ex) { - logger.error("Failed to read document {}", shardDocId, ex); - return new LLKeyScore(shardDocId, shardIndex, score, null); - } - } - - public static TopDocs mergeTopDocs( - @Nullable Sort sort, - @Nullable Integer startN, - @Nullable Integer topN, - TopDocs[] topDocs) { - if ((startN == null) != (topN == null)) { - throw new IllegalArgumentException("You must pass startN and topN together or nothing"); - } - TopDocs result; - if (sort != null) { - if (!(topDocs instanceof TopFieldDocs[])) { - throw new IllegalStateException("Expected TopFieldDocs[], got TopDocs[]"); - } - if (startN == null) { - int defaultTopN = 0; - for (TopDocs td : topDocs) { - int length = td.scoreDocs.length; - defaultTopN += length; - } - result = TopDocs.merge(sort, 0, defaultTopN, - (TopFieldDocs[]) topDocs - ); - } else { - result = TopDocs.merge(sort, startN, - topN, - (TopFieldDocs[]) topDocs - ); - } - } else { - if (startN == null) { - int defaultTopN = 0; - for (TopDocs td : topDocs) { - int length = td.scoreDocs.length; - defaultTopN += length; - } - result = TopDocs.merge(0, - defaultTopN, - topDocs - ); - } else { - result = TopDocs.merge(startN, - topN, - topDocs - ); - } - } - return result; - } - - public static int totalHitsThreshold(@Nullable Boolean complete) { - return complete == null || complete ? Integer.MAX_VALUE : 1; - } - - public static long totalHitsThresholdLong(@Nullable Boolean complete) { - return complete == null || complete ? Long.MAX_VALUE : 1; - } - - public static TotalHitsCount convertTotalHitsCount(TotalHits totalHits) { - return switch (totalHits.relation) { - case EQUAL_TO -> TotalHitsCount.of(totalHits.value, true); - case GREATER_THAN_OR_EQUAL_TO -> TotalHitsCount.of(totalHits.value, false); - }; - } - - public static TotalHitsCount sum(TotalHitsCount totalHitsCount, TotalHitsCount totalHitsCount1) { - return TotalHitsCount.of(totalHitsCount.value() + totalHitsCount1.value(), - totalHitsCount.exact() && totalHitsCount1.exact() - ); - } - - @SuppressWarnings("unused") - public static String toHumanReadableString(TotalHitsCount totalHitsCount) { - if (totalHitsCount.exact()) { - return Long.toString(totalHitsCount.value()); - } else { - return totalHitsCount.value() + "+"; - } - } - - public static Query getMoreLikeThisQuery(LLIndexSearchers inputIndexSearchers, - LocalQueryParams localQueryParams, - Analyzer analyzer, - Similarity similarity, - Multimap mltDocumentFieldsMultimap) { - List indexSearchers = inputIndexSearchers.shards(); - Query luceneAdditionalQuery = localQueryParams.query(); - // Create the mutable version of the input - Map> mltDocumentFields = HashMultimap.create(mltDocumentFieldsMultimap).asMap(); - - mltDocumentFields.entrySet().removeIf(entry -> entry.getValue().isEmpty()); - if (mltDocumentFields.isEmpty()) { - return new MatchNoDocsQuery(); - } - MultiMoreLikeThis mlt; - if (indexSearchers.size() == 1) { - mlt = new MultiMoreLikeThis(new BigCompositeReader<>(indexSearchers.get(0).getIndexReader(), IndexReader[]::new), - null - ); - } else { - IndexReader[] indexReaders = new IndexReader[indexSearchers.size()]; - for (int i = 0, size = indexSearchers.size(); i < size; i++) { - indexReaders[i] = indexSearchers.get(i).getIndexReader(); - } - mlt = new MultiMoreLikeThis(new BigCompositeReader<>(indexReaders, new ArrayIndexComparator(indexReaders)), null); - } - mlt.setAnalyzer(analyzer); - mlt.setFieldNames(mltDocumentFields.keySet().toArray(String[]::new)); - mlt.setMinTermFreq(1); - mlt.setMinDocFreq(3); - mlt.setMaxDocFreqPct(20); - mlt.setBoost(localQueryParams.needsScores()); - mlt.setStopWords(ENGLISH_AND_ITALIAN_STOP_WORDS); - if (similarity instanceof TFIDFSimilarity tfidfSimilarity) { - mlt.setSimilarity(tfidfSimilarity); - } else { - mlt.setSimilarity(new ClassicSimilarity()); - } - - // Get the reference docId and apply it to MoreLikeThis, to generate the query - Query mltQuery = null; - try { - mltQuery = mlt.like(mltDocumentFields); - } catch (IOException e) { - throw new DBException(e); - } - Query luceneQuery; - if (!(luceneAdditionalQuery instanceof MatchAllDocsQuery)) { - luceneQuery = new Builder() - .add(mltQuery, Occur.MUST) - .add(new ConstantScoreQuery(luceneAdditionalQuery), Occur.MUST) - .build(); - } else { - luceneQuery = mltQuery; - } - return luceneQuery; - } - - public static Collector withTimeout(Collector collector, Duration timeout) { - return new TimeLimitingCollector(collector, TimeLimitingCollector.getGlobalCounter(), timeout.toMillis()); - } - - public static String getStandardName(String clusterName, int shardIndex) { - return clusterName + "-shard" + shardIndex; - } - - public static int getLuceneIndexId(LLTerm id, int totalShards) { - return Math.abs(StringHelper.murmurhash3_x86_32(id.getValueBytesRef(), 7) % totalShards); - } - - public static CheckOutputDirectory createLuceneDirectory(LuceneDirectoryOptions directoryOptions, String directoryName) - throws IOException { - return new CheckOutputDirectory(createLuceneDirectoryInternal(directoryOptions, directoryName)); - } - - private static Directory createLuceneDirectoryInternal(LuceneDirectoryOptions directoryOptions, String directoryName) - throws IOException { - Directory directory; - if (directoryOptions instanceof ByteBuffersDirectory) { - directory = new org.apache.lucene.store.ByteBuffersDirectory(); - } else if (directoryOptions instanceof DirectIOFSDirectory directIOFSDirectory) { - FSDirectory delegateDirectory = (FSDirectory) createLuceneDirectoryInternal(directIOFSDirectory.delegate(), - directoryName - ); - if (Constants.LINUX || Constants.MAC_OS_X) { - try { - int mergeBufferSize = directIOFSDirectory.mergeBufferSize().orElse(DirectIODirectory.DEFAULT_MERGE_BUFFER_SIZE); - long minBytesDirect = directIOFSDirectory.minBytesDirect().orElse(DirectIODirectory.DEFAULT_MIN_BYTES_DIRECT); - directory = new DirectIODirectory(delegateDirectory, mergeBufferSize, minBytesDirect); - } catch (UnsupportedOperationException ex) { - logger.warn("Failed to open FSDirectory with DIRECT flag", ex); - directory = delegateDirectory; - } - } else { - logger.warn("Failed to open FSDirectory with DIRECT flag because the operating system is Windows"); - directory = delegateDirectory; - } - } else if (directoryOptions instanceof MemoryMappedFSDirectory memoryMappedFSDirectory) { - directory = new MMapDirectory(memoryMappedFSDirectory.managedPath().resolve(directoryName + ".lucene.db")); - } else if (directoryOptions instanceof NIOFSDirectory niofsDirectory) { - directory = new org.apache.lucene.store.NIOFSDirectory(niofsDirectory - .managedPath() - .resolve(directoryName + ".lucene.db")); - } else if (directoryOptions instanceof RAFFSDirectory rafFsDirectory) { - directory = new RAFDirectory(rafFsDirectory.managedPath().resolve(directoryName + ".lucene.db")); - } else if (directoryOptions instanceof NRTCachingDirectory nrtCachingDirectory) { - var delegateDirectory = createLuceneDirectoryInternal(nrtCachingDirectory.delegate(), directoryName); - directory = new org.apache.lucene.store.NRTCachingDirectory(delegateDirectory, - toMB(nrtCachingDirectory.maxMergeSizeBytes()), - toMB(nrtCachingDirectory.maxCachedBytes()) - ); - } else { - throw new UnsupportedOperationException("Unsupported directory: " + directoryName + ", " + directoryOptions); - } - return directory; - } - - public static Optional getManagedPath(LuceneDirectoryOptions directoryOptions) { - if (directoryOptions instanceof ByteBuffersDirectory) { - return Optional.empty(); - } else if (directoryOptions instanceof DirectIOFSDirectory directIOFSDirectory) { - return getManagedPath(directIOFSDirectory.delegate()); - } else if (directoryOptions instanceof MemoryMappedFSDirectory memoryMappedFSDirectory) { - return Optional.of(memoryMappedFSDirectory.managedPath()); - } else if (directoryOptions instanceof NIOFSDirectory niofsDirectory) { - return Optional.of(niofsDirectory.managedPath()); - } else if (directoryOptions instanceof RAFFSDirectory raffsDirectory) { - return Optional.of(raffsDirectory.managedPath()); - } else if (directoryOptions instanceof NRTCachingDirectory nrtCachingDirectory) { - return getManagedPath(nrtCachingDirectory.delegate()); - } else { - throw new UnsupportedOperationException("Unsupported directory: " + directoryOptions); - } - } - - public static boolean getIsFilesystemCompressed(LuceneDirectoryOptions directoryOptions) { - if (directoryOptions instanceof ByteBuffersDirectory) { - return false; - } else if (directoryOptions instanceof DirectIOFSDirectory directIOFSDirectory) { - return getIsFilesystemCompressed(directIOFSDirectory.delegate()); - } else if (directoryOptions instanceof MemoryMappedFSDirectory) { - return false; - } else if (directoryOptions instanceof NIOFSDirectory) { - return false; - } else if (directoryOptions instanceof RAFFSDirectory) { - return false; - } else if (directoryOptions instanceof NRTCachingDirectory nrtCachingDirectory) { - return getIsFilesystemCompressed(nrtCachingDirectory.delegate()); - } else { - throw new UnsupportedOperationException("Unsupported directory: " + directoryOptions); - } - } - - public static IntList intListTo(int to) { - var il = new IntArrayList(to); - for (int i = 0; i < to; i++) { - il.add(i); - } - return il; - } - - public static LuceneIndexStructure singleStructure() { - return SINGLE_STRUCTURE; - } - - public static LuceneIndexStructure shardsStructure(int count) { - return new LuceneIndexStructure(count, intListTo(count)); - } - - public static MergePolicy getMergePolicy(LuceneOptions luceneOptions) { - var mergePolicy = new TieredMergePolicy(); - var mergePolicyOptions = luceneOptions.mergePolicy(); - if (mergePolicyOptions.deletesPctAllowed().isPresent()) { - mergePolicy.setDeletesPctAllowed(mergePolicyOptions.deletesPctAllowed().get()); - } - if (mergePolicyOptions.forceMergeDeletesPctAllowed().isPresent()) { - mergePolicy.setForceMergeDeletesPctAllowed(mergePolicyOptions.forceMergeDeletesPctAllowed().get()); - } - if (mergePolicyOptions.maxMergeAtOnce().isPresent()) { - mergePolicy.setMaxMergeAtOnce(mergePolicyOptions.maxMergeAtOnce().get()); - } - if (mergePolicyOptions.maxMergedSegmentBytes().isPresent()) { - mergePolicy.setMaxMergedSegmentMB(toMB(mergePolicyOptions.maxMergedSegmentBytes().get())); - } - if (mergePolicyOptions.floorSegmentBytes().isPresent()) { - mergePolicy.setFloorSegmentMB(toMB(mergePolicyOptions.floorSegmentBytes().get())); - } - if (mergePolicyOptions.segmentsPerTier().isPresent()) { - mergePolicy.setSegmentsPerTier(mergePolicyOptions.segmentsPerTier().get()); - } - if (mergePolicyOptions.maxCFSSegmentSizeBytes().isPresent()) { - mergePolicy.setMaxCFSSegmentSizeMB(toMB(mergePolicyOptions.maxCFSSegmentSizeBytes().get())); - } - if (mergePolicyOptions.noCFSRatio().isPresent()) { - mergePolicy.setNoCFSRatio(mergePolicyOptions.noCFSRatio().get()); - } - return mergePolicy; - } - - public static double toMB(long bytes) { - if (bytes == Long.MAX_VALUE) return Double.MAX_VALUE; - return ((double) bytes) / 1024D / 1024D; - } - - public static it.cavallium.dbengine.rpc.current.data.TieredMergePolicy getDefaultMergePolicy() { - return DEFAULT_MERGE_POLICY; - } - - public static QueryParams getCountQueryParams(it.cavallium.dbengine.client.query.current.data.Query query) { - return QueryParams.of(query, 0, 0, NoSort.of(), false, Long.MAX_VALUE); - } - - /** - * Rewrite a lucene query of a local searcher, then call the local searcher again with the rewritten query - */ - public static LuceneSearchResult rewrite(LocalSearcher localSearcher, - LLIndexSearcher indexSearcher, - LocalQueryParams queryParams, - String keyFieldName, - GlobalQueryRewrite transformer, - Function, Stream> filterer) { - var indexSearchers = LLIndexSearchers.unsharded(indexSearcher); - var queryParams2 = transformer.rewrite(indexSearchers, queryParams); - return localSearcher.collect(indexSearcher, queryParams2, keyFieldName, NO_REWRITE, filterer); - } - - /** - * Rewrite a lucene query of a multi searcher, then call the multi searcher again with the rewritten query - */ - public static LuceneSearchResult rewriteMulti(MultiSearcher multiSearcher, - LLIndexSearchers indexSearchers, - LocalQueryParams queryParams, - String keyFieldName, - GlobalQueryRewrite transformer, - Function, Stream> filterer) { - var queryParams2 = transformer.rewrite(indexSearchers, queryParams); - return multiSearcher.collectMulti(indexSearchers, queryParams2, keyFieldName, NO_REWRITE, filterer); - } - - public static void checkLuceneThread() { - var thread = Thread.currentThread(); - if (!isLuceneThread()) { - throw printLuceneThreadWarning(thread); - } - } - - @SuppressWarnings("ThrowableNotThrown") - public static void warnLuceneThread() { - var thread = Thread.currentThread(); - if (!isLuceneThread()) { - printLuceneThreadWarning(thread); - } - } - - private static IllegalStateException printLuceneThreadWarning(Thread thread) { - var error = new IllegalStateException("Current thread is not a lucene thread: " + thread.getId() + " " + thread - + ". Schedule it using LuceneUtils.luceneScheduler()"); - logger.warn("Current thread is not a lucene thread: {} {}", thread.getId(), thread, error); - return error; - } - - public static boolean isLuceneThread() { - var thread = Thread.currentThread(); - return thread instanceof LuceneThread || thread instanceof LuceneMergeThread; - } -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/MaxScoreAccumulator.java b/src/main/java/it/cavallium/dbengine/lucene/MaxScoreAccumulator.java deleted file mode 100644 index 8d7c92c..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/MaxScoreAccumulator.java +++ /dev/null @@ -1,120 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package it.cavallium.dbengine.lucene; - -import java.util.Objects; -import java.util.concurrent.atomic.LongAccumulator; - -/** - * Maintains the maximum score and its corresponding document id concurrently - * - * This class must mirror this changes: - * - * Lucene MaxScoreAccumulator changes on GitHub - */ -public final class MaxScoreAccumulator { - // we use 2^10-1 to check the remainder with a bitwise operation - static final int DEFAULT_INTERVAL = 0x3ff; - - // scores are always positive - final LongAccumulator acc = new LongAccumulator(MaxScoreAccumulator::maxEncode, Long.MIN_VALUE); - - // non-final and visible for tests - public long modInterval; - - public MaxScoreAccumulator() { - this.modInterval = DEFAULT_INTERVAL; - } - - /** - * Return the max encoded DocAndScore in a way that is consistent with {@link - * DocAndScore#compareTo}. - */ - private static long maxEncode(long v1, long v2) { - float score1 = Float.intBitsToFloat((int) (v1 >> 32)); - float score2 = Float.intBitsToFloat((int) (v2 >> 32)); - int cmp = Float.compare(score1, score2); - if (cmp == 0) { - // tie-break on the minimum doc base - return (int) v1 < (int) v2 ? v1 : v2; - } else if (cmp > 0) { - return v1; - } - return v2; - } - - public void accumulate(int docBase, float score) { - assert docBase >= 0 && score >= 0; - long encode = (((long) Float.floatToIntBits(score)) << 32) | docBase; - acc.accumulate(encode); - } - - public DocAndScore get() { - long value = acc.get(); - if (value == Long.MIN_VALUE) { - return null; - } - float score = Float.intBitsToFloat((int) (value >> 32)); - int docBase = (int) value; - return new DocAndScore(docBase, score); - } - - public static class DocAndScore implements Comparable { - public final int docBase; - public final float score; - - public DocAndScore(int docBase, float score) { - this.docBase = docBase; - this.score = score; - } - - @Override - public int compareTo(DocAndScore o) { - int cmp = Float.compare(score, o.score); - if (cmp == 0) { - // tie-break on the minimum doc base - // For a given minimum competitive score, we want to know the first segment - // where this score occurred, hence the reverse order here. - // On segments with a lower docBase, any document whose score is greater - // than or equal to this score would be competitive, while on segments with a - // higher docBase, documents need to have a strictly greater score to be - // competitive since we tie break on doc ID. - return Integer.compare(o.docBase, docBase); - } - return cmp; - } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - DocAndScore result = (DocAndScore) o; - return docBase == result.docBase && Float.compare(result.score, score) == 0; - } - - @Override - public int hashCode() { - return Objects.hash(docBase, score); - } - - @Override - public String toString() { - return "DocAndScore{" + "docBase=" + docBase + ", score=" + score + '}'; - } - } -} \ No newline at end of file diff --git a/src/main/java/it/cavallium/dbengine/lucene/PageLimits.java b/src/main/java/it/cavallium/dbengine/lucene/PageLimits.java deleted file mode 100644 index 8664670..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/PageLimits.java +++ /dev/null @@ -1,9 +0,0 @@ -package it.cavallium.dbengine.lucene; - -public interface PageLimits { - - int DEFAULT_MIN_ITEMS_PER_PAGE = 10; - int DEFAULT_MAX_ITEMS_PER_PAGE = 250; - - int getPageLimit(int pageIndex); -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/PriorityQueue.java b/src/main/java/it/cavallium/dbengine/lucene/PriorityQueue.java deleted file mode 100644 index 9b8e8ff..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/PriorityQueue.java +++ /dev/null @@ -1,44 +0,0 @@ -package it.cavallium.dbengine.lucene; - -import it.cavallium.dbengine.database.DiscardingCloseable; - -public interface PriorityQueue extends ResourceIterable, DiscardingCloseable { - - /** - * Adds an Object to a PriorityQueue in log(size) time. If one tries to add more objects than maxSize from initialize - * an {@link ArrayIndexOutOfBoundsException} is thrown. - */ - void add(T element); - - /** - * Returns the least element of the PriorityQueue in constant time. - */ - T top(); - - /** - * Removes and returns the least element of the PriorityQueue in log(size) time. - */ - T pop(); - - /** - * Replace the top of the pq with {@code newTop} - */ - void replaceTop(T oldTop, T newTop); - - /** - * Returns the number of elements currently stored in the PriorityQueue. - */ - long size(); - - /** - * Removes all entries from the PriorityQueue. - */ - void clear(); - - /** - * Removes an existing element currently stored in the PriorityQueue. Cost is linear with the size of the queue. (A - * specialization of PriorityQueue which tracks element positions would provide a constant remove time but the - * trade-off would be extra cost to all additions/insertions) - */ - boolean remove(T element); -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/RandomFieldComparator.java b/src/main/java/it/cavallium/dbengine/lucene/RandomFieldComparator.java deleted file mode 100644 index 2a7b6e6..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/RandomFieldComparator.java +++ /dev/null @@ -1,113 +0,0 @@ -package it.cavallium.dbengine.lucene; - -import it.cavallium.dbengine.utils.LFSR.LFSRIterator; -import java.io.IOException; -import java.math.BigInteger; -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.search.FieldComparator; -import org.apache.lucene.search.LeafFieldComparator; -import org.apache.lucene.search.Scorable; -import org.apache.lucene.search.ScoreCachingWrappingScorer; -import org.jetbrains.annotations.NotNull; - -//todo: fix -public class RandomFieldComparator extends FieldComparator implements LeafFieldComparator { - - private final @NotNull LFSRIterator rand; - private final float[] scores; - private float bottom; - private Scorable scorer; - private float topValue; - - /** Creates a new comparator based on relevance for {@code numHits}. */ - public RandomFieldComparator(@NotNull LFSRIterator rand, int numHits) { - this.rand = rand; - scores = new float[numHits]; - } - - @Override - public int compare(int slot1, int slot2) { - return Float.compare(scores[slot2], scores[slot1]); - } - - @Override - public int compareBottom(int doc) throws IOException { - float score = scorer.score(); - assert !Float.isNaN(score); - return Float.compare(score, bottom); - } - - @Override - public void copy(int slot, int doc) throws IOException { - scores[slot] = scorer.score(); - assert !Float.isNaN(scores[slot]); - } - - @Override - public LeafFieldComparator getLeafComparator(LeafReaderContext context) { - return this; - } - - @Override - public void setBottom(final int bottom) { - this.bottom = scores[bottom]; - } - - @Override - public void setTopValue(Float value) { - topValue = Float.MAX_VALUE; - } - - @Override - public void setScorer(Scorable scorer) { - // wrap with a ScoreCachingWrappingScorer so that successive calls to - // score() will not incur score computation over and - // over again. - var randomizedScorer = new Scorable() { - - @Override - public float score() { - return randomize(scorer.docID()); - } - - @Override - public int docID() { - return scorer.docID(); - } - }; - this.scorer = ScoreCachingWrappingScorer.wrap(randomizedScorer); - } - - @SuppressWarnings("RedundantCast") - @Override - public Float value(int slot) { - return (float) scores[slot]; - } - - // Override because we sort reverse of natural Float order: - @Override - public int compareValues(Float first, Float second) { - // Reversed intentionally because relevance by default - // sorts descending: - return second.compareTo(first); - } - - @Override - public int compareTop(int doc) throws IOException { - float docValue = scorer.score(); - assert !Float.isNaN(docValue); - return Float.compare(docValue, topValue); - } - - private float randomize(int num) { - int val = rand.next(BigInteger.valueOf(num)).intValueExact(); - int pow24 = 1 << 24; - if (val >= pow24) { - throw new IndexOutOfBoundsException(); - } - if (val < 0) { - throw new IndexOutOfBoundsException(); - } - return (val & 0x00FFFFFF) / (float)(1 << 24); // only use the lower 24 bits to construct a float from 0.0-1.0 - } -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/RandomFieldComparatorSource.java b/src/main/java/it/cavallium/dbengine/lucene/RandomFieldComparatorSource.java deleted file mode 100644 index c06821b..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/RandomFieldComparatorSource.java +++ /dev/null @@ -1,21 +0,0 @@ -package it.cavallium.dbengine.lucene; - -import it.cavallium.dbengine.utils.LFSR; -import java.util.concurrent.ThreadLocalRandom; -import org.apache.lucene.search.FieldComparator; -import org.apache.lucene.search.FieldComparatorSource; -import org.apache.lucene.search.Pruning; - -public class RandomFieldComparatorSource extends FieldComparatorSource { - - private final LFSR rand; - - public RandomFieldComparatorSource() { - this.rand = LFSR.random(24, ThreadLocalRandom.current().nextInt(1 << 24)); - } - - @Override - public FieldComparator newComparator(String fieldname, int numHits, Pruning pruning, boolean reversed) { - return new RandomFieldComparator(rand.iterator(), numHits); - } -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/RandomSortField.java b/src/main/java/it/cavallium/dbengine/lucene/RandomSortField.java deleted file mode 100644 index f690589..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/RandomSortField.java +++ /dev/null @@ -1,15 +0,0 @@ -package it.cavallium.dbengine.lucene; - -import org.apache.lucene.search.SortField; - -public class RandomSortField extends SortField { - - public RandomSortField() { - super("", new RandomFieldComparatorSource()); - } - - @Override - public boolean needsScores() { - return false; - } -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/ResourceIterable.java b/src/main/java/it/cavallium/dbengine/lucene/ResourceIterable.java deleted file mode 100644 index 8c7df87..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/ResourceIterable.java +++ /dev/null @@ -1,23 +0,0 @@ -package it.cavallium.dbengine.lucene; - -import it.cavallium.dbengine.database.DiscardingCloseable; -import java.util.stream.Stream; - -public interface ResourceIterable extends DiscardingCloseable { - - /** - * Iterate this PriorityQueue - */ - Stream iterate(); - - /** - * Iterate this PriorityQueue - */ - default Stream iterate(long skips) { - if (skips == 0) { - return iterate(); - } else { - return iterate().skip(skips); - } - } -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/Reversable.java b/src/main/java/it/cavallium/dbengine/lucene/Reversable.java deleted file mode 100644 index 247cf08..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/Reversable.java +++ /dev/null @@ -1,6 +0,0 @@ -package it.cavallium.dbengine.lucene; - -public interface Reversable> { - - T reverse(); -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/ReversableResourceIterable.java b/src/main/java/it/cavallium/dbengine/lucene/ReversableResourceIterable.java deleted file mode 100644 index 7ce6943..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/ReversableResourceIterable.java +++ /dev/null @@ -1,3 +0,0 @@ -package it.cavallium.dbengine.lucene; - -public interface ReversableResourceIterable extends ResourceIterable, Reversable> {} diff --git a/src/main/java/it/cavallium/dbengine/lucene/ScoreDocPartialComparator.java b/src/main/java/it/cavallium/dbengine/lucene/ScoreDocPartialComparator.java deleted file mode 100644 index ecaab48..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/ScoreDocPartialComparator.java +++ /dev/null @@ -1,18 +0,0 @@ -package it.cavallium.dbengine.lucene; - -import java.util.Comparator; -import org.apache.lucene.search.ScoreDoc; - -class ScoreDocPartialComparator implements Comparator { - - public static final Comparator SCORE_DOC_PARTIAL_COMPARATOR = new ScoreDocPartialComparator(); - - @Override - public int compare(ScoreDoc hitA, ScoreDoc hitB) { - if (hitA.score == hitB.score) { - return Integer.compare(hitB.doc, hitA.doc); - } else { - return Float.compare(hitA.score, hitB.score); - } - } -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/ScoreDocShardComparator.java b/src/main/java/it/cavallium/dbengine/lucene/ScoreDocShardComparator.java deleted file mode 100644 index 84203df..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/ScoreDocShardComparator.java +++ /dev/null @@ -1,21 +0,0 @@ -package it.cavallium.dbengine.lucene; - -import java.util.Comparator; - -class ScoreDocShardComparator implements Comparator { - - public static final Comparator SCORE_DOC_SHARD_COMPARATOR = new ScoreDocShardComparator(); - - @Override - public int compare(LLScoreDoc hitA, LLScoreDoc hitB) { - if (hitA.score() == hitB.score()) { - if (hitA.doc() == hitB.doc()) { - return Integer.compare(hitA.shardIndex(), hitB.shardIndex()); - } else { - return Integer.compare(hitB.doc(), hitA.doc()); - } - } else { - return Float.compare(hitA.score(), hitB.score()); - } - } -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/SinglePageLimits.java b/src/main/java/it/cavallium/dbengine/lucene/SinglePageLimits.java deleted file mode 100644 index 6dbd5e1..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/SinglePageLimits.java +++ /dev/null @@ -1,23 +0,0 @@ -package it.cavallium.dbengine.lucene; - -public class SinglePageLimits implements PageLimits { - - private final int firstPageLimit; - - public SinglePageLimits() { - this(DEFAULT_MIN_ITEMS_PER_PAGE); - } - - public SinglePageLimits(int firstPageLimit) { - this.firstPageLimit = firstPageLimit; - } - - @Override - public int getPageLimit(int pageIndex) { - if (pageIndex == 0) { - return firstPageLimit; - } else { - return 0; - } - } -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/analyzer/ItaEngStopWords.java b/src/main/java/it/cavallium/dbengine/lucene/analyzer/ItaEngStopWords.java deleted file mode 100644 index 66058cc..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/analyzer/ItaEngStopWords.java +++ /dev/null @@ -1,341 +0,0 @@ -package it.cavallium.dbengine.lucene.analyzer; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import org.apache.lucene.analysis.CharArraySet; - -public class ItaEngStopWords { - - /** - * An unmodifiable set containing some common English words that are not usually useful for - * searching. - */ - public static final CharArraySet ENGLISH_STOP_WORDS_SET; - - public static final CharArraySet ITA_DEFAULT_ARTICLES; - - public static final CharArraySet ITA_STOP_WORDS_SET; - - public static final CharArraySet STOP_WORDS_SET; - - static { - final List stopWords = - Arrays.asList( - "a", "an", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", - "it", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", - "these", "they", "this", "to", "was", "will", "with"); - final CharArraySet stopSet = new CharArraySet(stopWords, false); - ENGLISH_STOP_WORDS_SET = CharArraySet.unmodifiableSet(stopSet); - - ITA_DEFAULT_ARTICLES = CharArraySet.unmodifiableSet(new CharArraySet(Arrays.asList( - "c", - "l", - "all", - "dall", - "dell", - "nell", - "sull", - "coll", - "pell", - "gl", - "agl", - "dagl", - "degl", - "negl", - "sugl", - "un", - "m", - "t", - "s", - "v", - "d" - ), true)); - - ITA_STOP_WORDS_SET = CharArraySet.unmodifiableSet(new CharArraySet(List.of("ad", - "al", - "allo", - "ai", - "agli", - "all", - "agl", - "alla", - "alle", - "con", - "col", - "coi", - "da", - "dal", - "dallo", - "dai", - "dagli", - "dall", - "dagl", - "dalla", - "dalle", - "di", - "del", - "dello", - "dei", - "degli", - "dell", - "degl", - "della", - "delle", - "in", - "nel", - "nello", - "nei", - "negli", - "nell", - "negl", - "nella", - "nelle", - "su", - "sul", - "sullo", - "sui", - "sugli", - "sull", - "sugl", - "sulla", - "sulle", - "per", - "tra", - "contro", - "io", - "tu", - "lui", - "lei", - "noi", - "voi", - "loro", - "mio", - "mia", - "miei", - "mie", - "tuo", - "tua", - "tuoi", - "tue", - "suo", - "sua", - "suoi", - "sue", - "nostro", - "nostra", - "nostri", - "nostre", - "vostro", - "vostra", - "vostri", - "vostre", - "mi", - "ti", - "ci", - "vi", - "lo", - "la", - "li", - "le", - "gli", - "ne", - "il", - "un", - "uno", - "una", - "ma", - "ed", - "se", - "perché", - "anche", - "come", - "dov", - "dove", - "che", - "chi", - "cui", - "non", - "più", - "quale", - "quanto", - "quanti", - "quanta", - "quante", - "quello", - "quelli", - "quella", - "quelle", - "questo", - "questi", - "questa", - "queste", - "si", - "tutto", - "tutti", - "a", - "c", - "e", - "i", - "l", - "o", - "ho", - "hai", - "ha", - "abbiamo", - "avete", - "hanno", - "abbia", - "abbiate", - "abbiano", - "avrò", - "avrai", - "avrà", - "avremo", - "avrete", - "avranno", - "avrei", - "avresti", - "avrebbe", - "avremmo", - "avreste", - "avrebbero", - "avevo", - "avevi", - "aveva", - "avevamo", - "avevate", - "avevano", - "ebbi", - "avesti", - "ebbe", - "avemmo", - "aveste", - "ebbero", - "avessi", - "avesse", - "avessimo", - "avessero", - "avendo", - "avuto", - "avuta", - "avuti", - "avute", - "sono", - "sei", - "è", - "siamo", - "siete", - "sia", - "siate", - "siano", - "sarò", - "sarai", - "sarà", - "saremo", - "sarete", - "saranno", - "sarei", - "saresti", - "sarebbe", - "saremmo", - "sareste", - "sarebbero", - "ero", - "eri", - "era", - "eravamo", - "eravate", - "erano", - "fui", - "fosti", - "fu", - "fummo", - "foste", - "furono", - "fossi", - "fosse", - "fossimo", - "fossero", - "essendo", - "faccio", - "fai", - "facciamo", - "fanno", - "faccia", - "facciate", - "facciano", - "farò", - "farai", - "farà", - "faremo", - "farete", - "faranno", - "farei", - "faresti", - "farebbe", - "faremmo", - "fareste", - "farebbero", - "facevo", - "facevi", - "faceva", - "facevamo", - "facevate", - "facevano", - "feci", - "facesti", - "fece", - "facemmo", - "faceste", - "fecero", - "facessi", - "facesse", - "facessimo", - "facessero", - "facendo", - "sto", - "stai", - "sta", - "stiamo", - "stanno", - "stia", - "stiate", - "stiano", - "starò", - "starai", - "starà", - "staremo", - "starete", - "staranno", - "starei", - "staresti", - "starebbe", - "staremmo", - "stareste", - "starebbero", - "stavo", - "stavi", - "stava", - "stavamo", - "stavate", - "stavano", - "stetti", - "stesti", - "stette", - "stemmo", - "steste", - "stettero", - "stessi", - "stesse", - "stessimo", - "stessero", - "stando" - ), true)); - - var mergedSet = new ArrayList<>(); - mergedSet.addAll(ITA_STOP_WORDS_SET); - mergedSet.addAll(ENGLISH_STOP_WORDS_SET); - STOP_WORDS_SET = new CharArraySet(mergedSet, true); - } -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/analyzer/LegacyWordAnalyzer.java b/src/main/java/it/cavallium/dbengine/lucene/analyzer/LegacyWordAnalyzer.java deleted file mode 100644 index 24c5149..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/analyzer/LegacyWordAnalyzer.java +++ /dev/null @@ -1,1095 +0,0 @@ -package it.cavallium.dbengine.lucene.analyzer; - -import com.ibm.icu.text.Collator; -import com.ibm.icu.util.ULocale; -import java.util.Collections; -import java.util.HashSet; -import java.util.Set; -import java.util.stream.Collectors; -import java.util.stream.Stream; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.CharArraySet; -import org.apache.lucene.analysis.LowerCaseFilter; -import org.apache.lucene.analysis.StopFilter; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.Tokenizer; -import org.apache.lucene.analysis.en.EnglishPossessiveFilter; -import org.apache.lucene.analysis.en.KStemFilter; -import org.apache.lucene.analysis.icu.ICUCollationAttributeFactory; -import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter; -import org.apache.lucene.analysis.miscellaneous.LengthFilter; -import org.apache.lucene.analysis.standard.StandardTokenizer; - -public class LegacyWordAnalyzer extends Analyzer { - - private final boolean icu; - private final boolean removeStopWords; - private final boolean stem; - - public LegacyWordAnalyzer(boolean icu, boolean removeStopWords, boolean stem) { - this.icu = icu; - this.removeStopWords = removeStopWords; - this.stem = stem; - } - - @Override - protected TokenStreamComponents createComponents(final String fieldName) { - Tokenizer tokenizer; - if (icu) { - tokenizer = new StandardTokenizer(new ICUCollationAttributeFactory(Collator.getInstance(ULocale.ROOT))); - } else { - tokenizer = new StandardTokenizer(); - } - TokenStream tokenStream = tokenizer; - if (stem) { - tokenStream = new LengthFilter(tokenStream, 1, 120); - } - if (!icu) { - tokenStream = newCommonFilter(tokenStream, stem); - } - if (removeStopWords) { - tokenStream = new EnglishItalianStopFilter(tokenStream); - } - - return new TokenStreamComponents(tokenizer, tokenStream); - } - - @Override - protected TokenStream normalize(String fieldName, TokenStream in) { - TokenStream tokenStream = in; - tokenStream = newCommonNormalizer(tokenStream); - return tokenStream; - } - - /** - * - * @param stem Enable stem filters on words. - * Pass false if it will be used with a n-gram filter - */ - public static TokenStream newCommonFilter(TokenStream tokenStream, boolean stem) { - tokenStream = newCommonNormalizer(tokenStream); - if (stem) { - tokenStream = new KStemFilter(tokenStream); - tokenStream = new EnglishPossessiveFilter(tokenStream); - } - return tokenStream; - } - - public static TokenStream newCommonNormalizer(TokenStream tokenStream) { - tokenStream = new ASCIIFoldingFilter(tokenStream); - tokenStream = new LowerCaseFilter(tokenStream); - return tokenStream; - } - - public static class EnglishItalianStopFilter extends StopFilter { - - private static final CharArraySet stopWords; - - private static final Set stopWordsString; - - /** - * Constructs a filter which removes words from the input TokenStream that are named in the Set. - * - * @param in Input stream - * @see #makeStopSet(String...) - */ - public EnglishItalianStopFilter(TokenStream in) { - super(in, stopWords); - } - - static { - var englishStopWords = Set.of("a", - "an", - "and", - "are", - "as", - "at", - "be", - "but", - "by", - "for", - "if", - "in", - "into", - "is", - "it", - "no", - "not", - "of", - "on", - "or", - "such", - "that", - "the", - "their", - "then", - "there", - "these", - "they", - "this", - "to", - "was", - "will", - "with" - ); - var oldItalianStopWords = Set.of("a", - "abbastanza", - "abbia", - "abbiamo", - "abbiano", - "abbiate", - "accidenti", - "ad", - "adesso", - "affinché", - "agl", - "agli", - "ahime", - "ahimè", - "ai", - "al", - "alcuna", - "alcuni", - "alcuno", - "all", - "alla", - "alle", - "allo", - "allora", - "altre", - "altri", - "altrimenti", - "altro", - "altrove", - "altrui", - "anche", - "ancora", - "anni", - "anno", - "ansa", - "anticipo", - "assai", - "attesa", - "attraverso", - "avanti", - "avemmo", - "avendo", - "avente", - "aver", - "avere", - "averlo", - "avesse", - "avessero", - "avessi", - "avessimo", - "aveste", - "avesti", - "avete", - "aveva", - "avevamo", - "avevano", - "avevate", - "avevi", - "avevo", - "avrai", - "avranno", - "avrebbe", - "avrebbero", - "avrei", - "avremmo", - "avremo", - "avreste", - "avresti", - "avrete", - "avrà", - "avrò", - "avuta", - "avute", - "avuti", - "avuto", - "basta", - "ben", - "bene", - "benissimo", - "brava", - "bravo", - "buono", - "c", - "caso", - "cento", - "certa", - "certe", - "certi", - "certo", - "che", - "chi", - "chicchessia", - "chiunque", - "ci", - "ciascuna", - "ciascuno", - "cima", - "cinque", - "cio", - "cioe", - "cioè", - "circa", - "citta", - "città", - "ciò", - "co", - "codesta", - "codesti", - "codesto", - "cogli", - "coi", - "col", - "colei", - "coll", - "coloro", - "colui", - "come", - "cominci", - "comprare", - "comunque", - "con", - "concernente", - "conclusione", - "consecutivi", - "consecutivo", - "consiglio", - "contro", - "cortesia", - "cos", - "cosa", - "cosi", - "così", - "cui", - "d", - "da", - "dagl", - "dagli", - "dai", - "dal", - "dall", - "dalla", - "dalle", - "dallo", - "dappertutto", - "davanti", - "degl", - "degli", - "dei", - "del", - "dell", - "della", - "delle", - "dello", - "dentro", - "detto", - "deve", - "devo", - "di", - "dice", - "dietro", - "dire", - "dirimpetto", - "diventa", - "diventare", - "diventato", - "dopo", - "doppio", - "dov", - "dove", - "dovra", - "dovrà", - "dovunque", - "due", - "dunque", - "durante", - "e", - "ebbe", - "ebbero", - "ebbi", - "ecc", - "ecco", - "ed", - "effettivamente", - "egli", - "ella", - "entrambi", - "eppure", - "era", - "erano", - "eravamo", - "eravate", - "eri", - "ero", - "esempio", - "esse", - "essendo", - "esser", - "essere", - "essi", - "ex", - "fa", - "faccia", - "facciamo", - "facciano", - "facciate", - "faccio", - "facemmo", - "facendo", - "facesse", - "facessero", - "facessi", - "facessimo", - "faceste", - "facesti", - "faceva", - "facevamo", - "facevano", - "facevate", - "facevi", - "facevo", - "fai", - "fanno", - "farai", - "faranno", - "fare", - "farebbe", - "farebbero", - "farei", - "faremmo", - "faremo", - "fareste", - "faresti", - "farete", - "farà", - "farò", - "fatto", - "favore", - "fece", - "fecero", - "feci", - "fin", - "finalmente", - "finche", - "fine", - "fino", - "forse", - "forza", - "fosse", - "fossero", - "fossi", - "fossimo", - "foste", - "fosti", - "fra", - "frattempo", - "fu", - "fui", - "fummo", - "fuori", - "furono", - "futuro", - "generale", - "gente", - "gia", - "giacche", - "giorni", - "giorno", - "giu", - "già", - "gli", - "gliela", - "gliele", - "glieli", - "glielo", - "gliene", - "grande", - "grazie", - "gruppo", - "ha", - "haha", - "hai", - "hanno", - "ho", - "i", - "ie", - "ieri", - "il", - "improvviso", - "in", - "inc", - "indietro", - "infatti", - "inoltre", - "insieme", - "intanto", - "intorno", - "invece", - "io", - "l", - "la", - "lasciato", - "lato", - "le", - "lei", - "li", - "lo", - "lontano", - "loro", - "lui", - "lungo", - "luogo", - "là", - "ma", - "macche", - "magari", - "maggior", - "mai", - "male", - "malgrado", - "malissimo", - "me", - "medesimo", - "mediante", - "meglio", - "meno", - "mentre", - "mesi", - "mezzo", - "mi", - "mia", - "mie", - "miei", - "mila", - "miliardi", - "milioni", - "minimi", - "mio", - "modo", - "molta", - "molti", - "moltissimo", - "molto", - "momento", - "mondo", - "ne", - "negl", - "negli", - "nei", - "nel", - "nell", - "nella", - "nelle", - "nello", - "nemmeno", - "neppure", - "nessun", - "nessuna", - "nessuno", - "niente", - "no", - "noi", - "nome", - "non", - "nondimeno", - "nonostante", - "nonsia", - "nostra", - "nostre", - "nostri", - "nostro", - "novanta", - "nove", - "nulla", - "nuovi", - "nuovo", - "o", - "od", - "oggi", - "ogni", - "ognuna", - "ognuno", - "oltre", - "oppure", - "ora", - "ore", - "osi", - "ossia", - "ottanta", - "otto", - "paese", - "parecchi", - "parecchie", - "parecchio", - "parte", - "partendo", - "peccato", - "peggio", - "per", - "perche", - "perchè", - "perché", - "percio", - "perciò", - "perfino", - "pero", - "persino", - "persone", - "però", - "piedi", - "pieno", - "piglia", - "piu", - "piuttosto", - "più", - "po", - "pochissimo", - "poco", - "poi", - "poiche", - "possa", - "possedere", - "posteriore", - "posto", - "potrebbe", - "preferibilmente", - "presa", - "press", - "prima", - "primo", - "principalmente", - "probabilmente", - "promesso", - "proprio", - "puo", - "pure", - "purtroppo", - "può", - "qua", - "qualche", - "qualcosa", - "qualcuna", - "qualcuno", - "quale", - "quali", - "qualunque", - "quando", - "quanta", - "quante", - "quanti", - "quanto", - "quantunque", - "quarto", - "quasi", - "quattro", - "quel", - "quella", - "quelle", - "quelli", - "quello", - "quest", - "questa", - "queste", - "questi", - "questo", - "qui", - "quindi", - "quinto", - "realmente", - "recente", - "recentemente", - "registrazione", - "relativo", - "riecco", - "rispetto", - "salvo", - "sara", - "sarai", - "saranno", - "sarebbe", - "sarebbero", - "sarei", - "saremmo", - "saremo", - "sareste", - "saresti", - "sarete", - "sarà", - "sarò", - "scola", - "scopo", - "scorso", - "se", - "secondo", - "seguente", - "seguito", - "sei", - "sembra", - "sembrare", - "sembrato", - "sembrava", - "sembri", - "sempre", - "senza", - "sette", - "si", - "sia", - "siamo", - "siano", - "siate", - "siete", - "sig", - "solito", - "solo", - "soltanto", - "sono", - "sopra", - "soprattutto", - "sotto", - "spesso", - "sta", - "stai", - "stando", - "stanno", - "starai", - "staranno", - "starebbe", - "starebbero", - "starei", - "staremmo", - "staremo", - "stareste", - "staresti", - "starete", - "starà", - "starò", - "stata", - "state", - "stati", - "stato", - "stava", - "stavamo", - "stavano", - "stavate", - "stavi", - "stavo", - "stemmo", - "stessa", - "stesse", - "stessero", - "stessi", - "stessimo", - "stesso", - "steste", - "stesti", - "stette", - "stettero", - "stetti", - "stia", - "stiamo", - "stiano", - "stiate", - "sto", - "su", - "sua", - "subito", - "successivamente", - "successivo", - "sue", - "sugl", - "sugli", - "sui", - "sul", - "sull", - "sulla", - "sulle", - "sullo", - "suo", - "suoi", - "tale", - "tali", - "talvolta", - "tanto", - "te", - "tempo", - "terzo", - "th", - "ti", - "titolo", - "tra", - "tranne", - "tre", - "trenta", - "triplo", - "troppo", - "trovato", - "tu", - "tua", - "tue", - "tuo", - "tuoi", - "tutta", - "tuttavia", - "tutte", - "tutti", - "tutto", - "uguali", - "ulteriore", - "ultimo", - "un", - "una", - "uno", - "uomo", - "va", - "vai", - "vale", - "vari", - "varia", - "varie", - "vario", - "verso", - "vi", - "vicino", - "visto", - "vita", - "voi", - "volta", - "volte", - "vostra", - "vostre", - "vostri", - "vostro", - "è"); - var italianStopWords = Set.of("a", - "abbia", - "abbiamo", - "abbiano", - "abbiate", - "ad", - "adesso", - "agl", - "agli", - "ai", - "al", - "all", - "alla", - "alle", - "allo", - "allora", - "altre", - "altri", - "altro", - "anche", - "ancora", - "avemmo", - "avendo", - "avere", - "avesse", - "avessero", - "avessi", - "avessimo", - "aveste", - "avesti", - "avete", - "aveva", - "avevamo", - "avevano", - "avevate", - "avevi", - "avevo", - "avrai", - "avranno", - "avrebbe", - "avrebbero", - "avrei", - "avremmo", - "avremo", - "avreste", - "avresti", - "avrete", - "avrà", - "avrò", - "avuta", - "avute", - "avuti", - "avuto", - "c", - "che", - "chi", - "ci", - "coi", - "col", - "come", - "con", - "contro", - "cui", - "da", - "dagl", - "dagli", - "dai", - "dal", - "dall", - "dalla", - "dalle", - "dallo", - "degl", - "degli", - "dei", - "del", - "dell", - "della", - "delle", - "dello", - "dentro", - "di", - "dov", - "dove", - "e", - "ebbe", - "ebbero", - "ebbi", - "ecco", - "ed", - "era", - "erano", - "eravamo", - "eravate", - "eri", - "ero", - "essendo", - "faccia", - "facciamo", - "facciano", - "facciate", - "faccio", - "facemmo", - "facendo", - "facesse", - "facessero", - "facessi", - "facessimo", - "faceste", - "facesti", - "faceva", - "facevamo", - "facevano", - "facevate", - "facevi", - "facevo", - "fai", - "fanno", - "farai", - "faranno", - "fare", - "farebbe", - "farebbero", - "farei", - "faremmo", - "faremo", - "fareste", - "faresti", - "farete", - "farà", - "farò", - "fece", - "fecero", - "feci", - "fino", - "fosse", - "fossero", - "fossi", - "fossimo", - "foste", - "fosti", - "fra", - "fu", - "fui", - "fummo", - "furono", - "giù", - "gli", - "ha", - "hai", - "hanno", - "ho", - "i", - "il", - "in", - "io", - "l", - "la", - "le", - "lei", - "li", - "lo", - "loro", - "lui", - "ma", - "me", - "mi", - "mia", - "mie", - "miei", - "mio", - "ne", - "negl", - "negli", - "nei", - "nel", - "nell", - "nella", - "nelle", - "nello", - "no", - "noi", - "non", - "nostra", - "nostre", - "nostri", - "nostro", - "o", - "per", - "perché", - "però", - "più", - "pochi", - "poco", - "qua", - "quale", - "quanta", - "quante", - "quanti", - "quanto", - "quasi", - "quella", - "quelle", - "quelli", - "quello", - "questa", - "queste", - "questi", - "questo", - "qui", - "quindi", - "sarai", - "saranno", - "sarebbe", - "sarebbero", - "sarei", - "saremmo", - "saremo", - "sareste", - "saresti", - "sarete", - "sarà", - "sarò", - "se", - "sei", - "senza", - "si", - "sia", - "siamo", - "siano", - "siate", - "siete", - "sono", - "sopra", - "sotto", - "sta", - "stai", - "stando", - "stanno", - "starai", - "staranno", - "stare", - "starebbe", - "starebbero", - "starei", - "staremmo", - "staremo", - "stareste", - "staresti", - "starete", - "starà", - "starò", - "stava", - "stavamo", - "stavano", - "stavate", - "stavi", - "stavo", - "stemmo", - "stesse", - "stessero", - "stessi", - "stessimo", - "stesso", - "steste", - "stesti", - "stette", - "stettero", - "stetti", - "stia", - "stiamo", - "stiano", - "stiate", - "sto", - "su", - "sua", - "sue", - "sugl", - "sugli", - "sui", - "sul", - "sull", - "sulla", - "sulle", - "sullo", - "suo", - "suoi", - "te", - "ti", - "tra", - "tu", - "tua", - "tue", - "tuo", - "tuoi", - "tutti", - "tutto", - "un", - "una", - "uno", - "vai", - "vi", - "voi", - "vostra", - "vostre", - "vostri", - "vostro", - "è" - ); - var stopWordsString2 = new HashSet<>(englishStopWords); - stopWordsString2.addAll(italianStopWords); - stopWordsString = Collections.unmodifiableSet(stopWordsString2); - stopWords = CharArraySet.copy(Stream - .concat(englishStopWords.stream(), oldItalianStopWords.stream()) - .map(String::toCharArray) - .collect(Collectors.toSet())); - } - - @SuppressWarnings("unused") - public static CharArraySet getStopWords() { - return stopWords; - } - - public static Set getStopWordsString() { - return stopWordsString; - } - } - -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/analyzer/NCharGramAnalyzer.java b/src/main/java/it/cavallium/dbengine/lucene/analyzer/NCharGramAnalyzer.java deleted file mode 100644 index 2e7212d..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/analyzer/NCharGramAnalyzer.java +++ /dev/null @@ -1,22 +0,0 @@ -package it.cavallium.dbengine.lucene.analyzer; - -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.Tokenizer; -import org.apache.lucene.analysis.ngram.NGramTokenizer; - -public class NCharGramAnalyzer extends Analyzer { - - private final int minGram; - private final int maxGram; - - public NCharGramAnalyzer(int minGram, int maxGram) { - this.minGram = minGram; - this.maxGram = maxGram; - } - - @Override - protected TokenStreamComponents createComponents(final String fieldName) { - Tokenizer tokenizer = new NGramTokenizer(minGram, maxGram); - return new TokenStreamComponents(tokenizer); - } -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/analyzer/NCharGramEdgeAnalyzer.java b/src/main/java/it/cavallium/dbengine/lucene/analyzer/NCharGramEdgeAnalyzer.java deleted file mode 100644 index f3c55ba..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/analyzer/NCharGramEdgeAnalyzer.java +++ /dev/null @@ -1,23 +0,0 @@ -package it.cavallium.dbengine.lucene.analyzer; - -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.Tokenizer; -import org.apache.lucene.analysis.ngram.EdgeNGramTokenizer; - -public class NCharGramEdgeAnalyzer extends Analyzer { - - private final int minGram; - private final int maxGram; - - public NCharGramEdgeAnalyzer(int minGram, int maxGram) { - this.minGram = minGram; - this.maxGram = maxGram; - } - - @Override - protected TokenStreamComponents createComponents(final String fieldName) { - Tokenizer tokenizer = new EdgeNGramTokenizer(minGram, maxGram); - return new TokenStreamComponents(tokenizer); - } - -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/analyzer/TextFieldsAnalyzer.java b/src/main/java/it/cavallium/dbengine/lucene/analyzer/TextFieldsAnalyzer.java deleted file mode 100644 index 92023df..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/analyzer/TextFieldsAnalyzer.java +++ /dev/null @@ -1,15 +0,0 @@ -package it.cavallium.dbengine.lucene.analyzer; - -public enum TextFieldsAnalyzer { - N4Gram, - N4GramEdge, - N3To5Gram, - N3To5GramEdge, - Standard, - StandardSimple, - ICUCollationKey, - StandardMultilanguage, - LegacyFullText, - LegacyWordWithStemming, - LegacyICU -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/analyzer/TextFieldsSimilarity.java b/src/main/java/it/cavallium/dbengine/lucene/analyzer/TextFieldsSimilarity.java deleted file mode 100644 index eb32c18..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/analyzer/TextFieldsSimilarity.java +++ /dev/null @@ -1,22 +0,0 @@ -package it.cavallium.dbengine.lucene.analyzer; - -public enum TextFieldsSimilarity { - BM25Standard, - BM25Classic, - NGramBM25Classic, - BM25L, - NGramBM25L, - BM25Plus, - NGramBM25Plus, - BM15Plus, - NGramBM15Plus, - BM11Plus, - NGramBM11Plus, - Classic, - NGramClassic, - LTC, - LDP, - LDPNoLength, - Robertson, - Boolean -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/analyzer/WordAnalyzer.java b/src/main/java/it/cavallium/dbengine/lucene/analyzer/WordAnalyzer.java deleted file mode 100644 index 5aa50b1..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/analyzer/WordAnalyzer.java +++ /dev/null @@ -1,76 +0,0 @@ -package it.cavallium.dbengine.lucene.analyzer; - -import com.ibm.icu.text.Collator; -import com.ibm.icu.util.ULocale; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.LowerCaseFilter; -import org.apache.lucene.analysis.StopFilter; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.en.EnglishMinimalStemFilter; -import org.apache.lucene.analysis.en.PorterStemFilter; -import org.apache.lucene.analysis.icu.ICUCollationAttributeFactory; -import org.apache.lucene.analysis.icu.ICUFoldingFilter; -import org.apache.lucene.analysis.icu.segmentation.DefaultICUTokenizerConfig; -import org.apache.lucene.analysis.icu.segmentation.ICUTokenizer; -import org.apache.lucene.analysis.it.ItalianLightStemFilter; -import org.apache.lucene.analysis.standard.StandardAnalyzer; -import org.apache.lucene.analysis.standard.StandardTokenizer; -import org.apache.lucene.analysis.util.ElisionFilter; - -public class WordAnalyzer extends Analyzer { - - private static final Collator ROOT_COLLATOR = Collator.getInstance(ULocale.ROOT); - private static final ICUCollationAttributeFactory ROOT_ICU_ATTRIBUTE_FACTORY = new ICUCollationAttributeFactory(ROOT_COLLATOR); - - private final boolean icu; - private final boolean stem; - - public WordAnalyzer(boolean icu, boolean stem) { - this.icu = icu; - this.stem = stem; - if (icu) { - if (!stem) { - throw new IllegalArgumentException("stem must be true if icu is true"); - } - } - } - - @Override - protected TokenStreamComponents createComponents(final String fieldName) { - if (icu) { - var tokenizer = new ICUTokenizer(new DefaultICUTokenizerConfig(false, false)); - TokenStream tokenStream; - tokenStream = new ElisionFilter(tokenizer, ItaEngStopWords.ITA_DEFAULT_ARTICLES); - tokenStream = new LowerCaseFilter(tokenStream); - tokenStream = new StopFilter(tokenStream, ItaEngStopWords.STOP_WORDS_SET); - tokenStream = new ItalianLightStemFilter(tokenStream); - tokenStream = new PorterStemFilter(tokenStream); - tokenStream = new ICUFoldingFilter(tokenStream); - return new TokenStreamComponents(tokenizer, tokenStream); - } else { - var maxTokenLength = StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH; - var standardTokenizer = new StandardTokenizer(new ICUCollationAttributeFactory(Collator.getInstance(ULocale.ROOT))); - standardTokenizer.setMaxTokenLength(maxTokenLength); - TokenStream tokenStream = standardTokenizer; - tokenStream = new LowerCaseFilter(tokenStream); - if (stem) { - tokenStream = new ItalianLightStemFilter(new EnglishMinimalStemFilter(tokenStream)); - } - return new TokenStreamComponents(r -> { - standardTokenizer.setMaxTokenLength(maxTokenLength); - standardTokenizer.setReader(r); - }, tokenStream); - } - } - - @Override - protected TokenStream normalize(String fieldName, TokenStream tokenStream) { - if (icu) { - tokenStream = new LowerCaseFilter(tokenStream); - tokenStream = new ElisionFilter(tokenStream, ItaEngStopWords.ITA_DEFAULT_ARTICLES); - return new ICUFoldingFilter(tokenStream); - } else { - return new LowerCaseFilter(tokenStream); - } - } -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/collector/BucketValueSource.java b/src/main/java/it/cavallium/dbengine/lucene/collector/BucketValueSource.java deleted file mode 100644 index d56c815..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/collector/BucketValueSource.java +++ /dev/null @@ -1,17 +0,0 @@ -package it.cavallium.dbengine.lucene.collector; - -import org.apache.lucene.search.DoubleValuesSource; -import org.apache.lucene.search.LongValuesSource; - -public sealed interface BucketValueSource permits BucketValueSource.DoubleBucketValueSource, - BucketValueSource.LongBucketValueSource, - BucketValueSource.ConstantValueSource, BucketValueSource.NullValueSource { - - record ConstantValueSource(Number constant) implements BucketValueSource {} - - record DoubleBucketValueSource(DoubleValuesSource source) implements BucketValueSource {} - - record LongBucketValueSource(LongValuesSource source) implements BucketValueSource {} - - record NullValueSource() implements BucketValueSource {} -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/collector/Buckets.java b/src/main/java/it/cavallium/dbengine/lucene/collector/Buckets.java deleted file mode 100644 index 60a867e..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/collector/Buckets.java +++ /dev/null @@ -1,28 +0,0 @@ -package it.cavallium.dbengine.lucene.collector; - -import it.unimi.dsi.fastutil.doubles.DoubleArrayList; -import java.util.ArrayList; -import java.util.List; - -public record Buckets(List seriesValues, DoubleArrayList totals) { - - public Buckets { - for (DoubleArrayList values : seriesValues) { - if (values.size() != totals.size()) { - throw new IllegalArgumentException("Buckets size mismatch"); - } - } - } - - public List normalized() { - var normalizedSeries = new ArrayList(seriesValues.size()); - for (DoubleArrayList values : seriesValues) { - DoubleArrayList normalized = new DoubleArrayList(values.size()); - for (int i = 0; i < values.size(); i++) { - normalized.add(values.getDouble(i) / totals.getDouble(i)); - } - normalizedSeries.add(normalized); - } - return normalizedSeries; - } -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/collector/CollectorMultiManager.java b/src/main/java/it/cavallium/dbengine/lucene/collector/CollectorMultiManager.java deleted file mode 100644 index a17f16d..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/collector/CollectorMultiManager.java +++ /dev/null @@ -1,12 +0,0 @@ -package it.cavallium.dbengine.lucene.collector; - -import java.io.IOException; -import java.util.List; -import org.apache.lucene.search.ScoreMode; - -public interface CollectorMultiManager { - - ScoreMode scoreMode(); - - U reduce(List results); -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/collector/DecimalBucketMultiCollectorManager.java b/src/main/java/it/cavallium/dbengine/lucene/collector/DecimalBucketMultiCollectorManager.java deleted file mode 100644 index 2715a56..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/collector/DecimalBucketMultiCollectorManager.java +++ /dev/null @@ -1,289 +0,0 @@ -package it.cavallium.dbengine.lucene.collector; - -import it.unimi.dsi.fastutil.doubles.DoubleArrayList; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import org.apache.commons.lang3.NotImplementedException; -import org.apache.lucene.facet.FacetResult; -import org.apache.lucene.facet.Facets; -import org.apache.lucene.facet.FacetsConfig; -import org.apache.lucene.facet.LabelAndValue; -import org.apache.lucene.facet.RandomSamplingFacetsCollector; -import org.apache.lucene.facet.range.DoubleRange; -import org.apache.lucene.facet.range.DoubleRangeFacetCounts; -import org.apache.lucene.facet.range.LongRange; -import org.apache.lucene.facet.range.LongRangeFacetCounts; -import org.apache.lucene.facet.range.Range; -import org.apache.lucene.search.BooleanClause.Occur; -import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.DoubleValuesSource; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.LongValuesSource; -import org.apache.lucene.search.MatchNoDocsQuery; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.ScoreMode; -import org.jetbrains.annotations.Nullable; - -public class DecimalBucketMultiCollectorManager implements CollectorMultiManager { - - private static final boolean USE_SINGLE_FACET_COLLECTOR = true; - private static final boolean AMORTIZE = true; - private final boolean randomSamplingEnabled; - private final FastFacetsCollectorManager facetsCollectorManager; - private final FastRandomSamplingFacetsCollector randomSamplingFacetsCollector; - private final Range[] bucketRanges; - - private final List queries; - private final @Nullable Query normalizationQuery; - private final @Nullable Integer collectionRate; - private final @Nullable Integer sampleSize; - - private final String bucketField; - private final BucketValueSource bucketValueSource; - - private final double totalLength; - private final double bucketLength; - private final double minimum; - private final double maximum; - private final int buckets; - - // todo: replace with an argument - private static final boolean USE_LONGS = true; - - public DecimalBucketMultiCollectorManager(double minimum, - double maximum, - double buckets, - String bucketField, - BucketValueSource bucketValueSource, - List queries, - @Nullable Query normalizationQuery, - @Nullable Integer collectionRate, - @Nullable Integer sampleSize) { - this.queries = queries; - this.normalizationQuery = normalizationQuery; - var bucketsInt = (int) Math.ceil(buckets); - this.minimum = minimum; - this.maximum = maximum; - this.buckets = bucketsInt; - this.bucketLength = (maximum - minimum) / bucketsInt; - this.totalLength = bucketLength * bucketsInt; - this.bucketField = bucketField; - this.bucketValueSource = bucketValueSource; - this.collectionRate = collectionRate; - this.sampleSize = sampleSize; - - if (USE_LONGS) { - this.bucketRanges = new LongRange[bucketsInt]; - } else { - this.bucketRanges = new DoubleRange[bucketsInt]; - } - for (int i = 0; i < bucketsInt; i++) { - double offsetMin = minimum + (bucketLength * i); - double offsetMax = minimum + (bucketLength * (i + 1)); - if (USE_LONGS) { - this.bucketRanges[i] = new LongRange(Integer.toString(i), - (long) offsetMin, - true, - (long) offsetMax, - i == bucketsInt - 1 - ); - } else { - this.bucketRanges[i] = new DoubleRange(Integer.toString(i), - offsetMin, - true, - offsetMax, - i == bucketsInt - 1 - ); - } - } - - this.randomSamplingEnabled = sampleSize != null; - int intCollectionRate = this.collectionRate == null ? 1 : this.collectionRate; - if (randomSamplingEnabled) { - randomSamplingFacetsCollector = new FastRandomSamplingFacetsCollector(intCollectionRate, sampleSize, 0); - this.facetsCollectorManager = null; - } else { - this.randomSamplingFacetsCollector = null; - this.facetsCollectorManager = new FastFacetsCollectorManager(intCollectionRate); - } - } - - public double[] newBuckets() { - return new double[buckets]; - } - - public Buckets search(IndexSearcher indexSearcher) throws IOException { - Query query; - if (USE_SINGLE_FACET_COLLECTOR && normalizationQuery != null) { - query = normalizationQuery; - } else if (queries.size() == 0) { - query = new MatchNoDocsQuery(); - } else if (queries.size() == 1) { - query = queries.get(0); - } else { - var booleanQueryBuilder = new BooleanQuery.Builder(); - for (Query queryEntry : queries) { - booleanQueryBuilder.add(queryEntry, Occur.SHOULD); - } - booleanQueryBuilder.setMinimumNumberShouldMatch(1); - query = booleanQueryBuilder.build(); - } - it.cavallium.dbengine.lucene.collector.FacetsCollector queryFacetsCollector; - if (randomSamplingEnabled) { - indexSearcher.search(query, randomSamplingFacetsCollector); - queryFacetsCollector = randomSamplingFacetsCollector; - } else { - queryFacetsCollector = indexSearcher.search(query, facetsCollectorManager); - } - double[] reducedNormalizationBuckets = newBuckets(); - List seriesReducedBuckets = new ArrayList<>(queries.size()); - for (int i = 0; i < queries.size(); i++) { - var buckets = newBuckets(); - seriesReducedBuckets.add(DoubleArrayList.wrap(buckets)); - } - int serieIndex = 0; - for (Query queryEntry : queries) { - var reducedBuckets = seriesReducedBuckets.get(serieIndex); - Facets facets; - if (USE_LONGS) { - LongValuesSource valuesSource; - if (bucketValueSource instanceof BucketValueSource.NullValueSource) { - - valuesSource = null; - } else if (bucketValueSource instanceof BucketValueSource.ConstantValueSource constantValueSource) { - valuesSource = LongValuesSource.constant(constantValueSource.constant().longValue()); - } else if (bucketValueSource instanceof BucketValueSource.LongBucketValueSource longBucketValueSource) { - valuesSource = longBucketValueSource.source(); - } else { - throw new IllegalArgumentException("Wrong value source type: " + bucketValueSource); - } - facets = new LongRangeFacetCounts(bucketField, - valuesSource, - queryFacetsCollector.getLuceneFacetsCollector(), - USE_SINGLE_FACET_COLLECTOR && normalizationQuery != null || queries.size() > 1 ? queryEntry : null, - (LongRange[]) bucketRanges - ); - } else { - DoubleValuesSource valuesSource; - if (bucketValueSource instanceof BucketValueSource.NullValueSource) { - valuesSource = null; - } else if (bucketValueSource instanceof BucketValueSource.ConstantValueSource constantValueSource) { - valuesSource = DoubleValuesSource.constant(constantValueSource.constant().longValue()); - } else if (bucketValueSource instanceof BucketValueSource.DoubleBucketValueSource doubleBucketValueSource) { - valuesSource = doubleBucketValueSource.source(); - } else { - throw new IllegalArgumentException("Wrong value source type: " + bucketValueSource); - } - facets = new DoubleRangeFacetCounts(bucketField, - valuesSource, - queryFacetsCollector.getLuceneFacetsCollector(), - USE_SINGLE_FACET_COLLECTOR && normalizationQuery != null || queries.size() > 1 ? queryEntry : null, - (DoubleRange[]) bucketRanges - ); - } - FacetResult children = facets.getTopChildren(1, bucketField); - if (AMORTIZE && randomSamplingEnabled) { - var cfg = new FacetsConfig(); - for (Range bucketRange : bucketRanges) { - cfg.setIndexFieldName(bucketRange.label, bucketField); - } - ((RandomSamplingFacetsCollector) queryFacetsCollector.getLuceneFacetsCollector()).amortizeFacetCounts(children, cfg, indexSearcher); - } - for (LabelAndValue labelAndValue : children.labelValues) { - var index = Integer.parseInt(labelAndValue.label); - reducedBuckets.set(index, reducedBuckets.getDouble(index) + labelAndValue.value.doubleValue()); - } - serieIndex++; - } - - it.cavallium.dbengine.lucene.collector.FacetsCollector normalizationFacetsCollector; - Facets normalizationFacets; - if (normalizationQuery != null) { - if (USE_SINGLE_FACET_COLLECTOR) { - normalizationFacetsCollector = queryFacetsCollector; - } else if (randomSamplingEnabled) { - indexSearcher.search(normalizationQuery, randomSamplingFacetsCollector); - normalizationFacetsCollector = randomSamplingFacetsCollector; - } else { - normalizationFacetsCollector = indexSearcher.search(normalizationQuery, facetsCollectorManager); - } - if (USE_LONGS) { - LongValuesSource valuesSource; - if (bucketValueSource instanceof BucketValueSource.NullValueSource) { - valuesSource = null; - } else if (bucketValueSource instanceof BucketValueSource.ConstantValueSource constantValueSource) { - valuesSource = LongValuesSource.constant(constantValueSource.constant().longValue()); - } else if (bucketValueSource instanceof BucketValueSource.LongBucketValueSource longBucketValueSource) { - valuesSource = longBucketValueSource.source(); - } else { - throw new IllegalArgumentException("Wrong value source type: " + bucketValueSource); - } - normalizationFacets = new LongRangeFacetCounts(bucketField, - valuesSource, - normalizationFacetsCollector.getLuceneFacetsCollector(), - null, - (LongRange[]) bucketRanges - ); - } else { - DoubleValuesSource valuesSource; - if (bucketValueSource instanceof BucketValueSource.NullValueSource) { - valuesSource = null; - } else if (bucketValueSource instanceof BucketValueSource.ConstantValueSource constantValueSource) { - valuesSource = DoubleValuesSource.constant(constantValueSource.constant().longValue()); - } else if (bucketValueSource instanceof BucketValueSource.DoubleBucketValueSource doubleBucketValueSource) { - valuesSource = doubleBucketValueSource.source(); - } else { - throw new IllegalArgumentException("Wrong value source type: " + bucketValueSource); - } - normalizationFacets = new DoubleRangeFacetCounts(bucketField, - valuesSource, - normalizationFacetsCollector.getLuceneFacetsCollector(), - null, - (DoubleRange[]) bucketRanges - ); - } - var normalizationChildren = normalizationFacets.getTopChildren(0, bucketField); - if (AMORTIZE && randomSamplingEnabled) { - var cfg = new FacetsConfig(); - for (Range bucketRange : bucketRanges) { - cfg.setIndexFieldName(bucketRange.label, bucketField); - } - ((RandomSamplingFacetsCollector) normalizationFacetsCollector.getLuceneFacetsCollector()).amortizeFacetCounts(normalizationChildren, cfg, indexSearcher); - } - for (LabelAndValue labelAndValue : normalizationChildren.labelValues) { - var index = Integer.parseInt(labelAndValue.label); - reducedNormalizationBuckets[index] += labelAndValue.value.doubleValue(); - } - } else { - Arrays.fill(reducedNormalizationBuckets, 1); - } - return new Buckets(seriesReducedBuckets, DoubleArrayList.wrap(reducedNormalizationBuckets)); - } - - @Override - public ScoreMode scoreMode() { - throw new NotImplementedException(); - } - - @Override - public Buckets reduce(List reducedBucketsList) { - List seriesReducedValues = new ArrayList<>(); - double[] reducedTotals = newBuckets(); - for (var seriesBuckets : reducedBucketsList) { - for (DoubleArrayList values : seriesBuckets.seriesValues()) { - double[] reducedValues = newBuckets(); - for (int i = 0; i < values.size(); i++) { - reducedValues[i] += values.getDouble(i); - } - seriesReducedValues.add(DoubleArrayList.wrap(reducedValues)); - } - var totals = seriesBuckets.totals(); - for (int i = 0; i < totals.size(); i++) { - reducedTotals[i] += totals.getDouble(i); - } - } - return new Buckets(seriesReducedValues, DoubleArrayList.wrap(reducedTotals)); - } -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/collector/FacetsCollector.java b/src/main/java/it/cavallium/dbengine/lucene/collector/FacetsCollector.java deleted file mode 100644 index 2888e22..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/collector/FacetsCollector.java +++ /dev/null @@ -1,32 +0,0 @@ -package it.cavallium.dbengine.lucene.collector; - -import java.io.IOException; -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.search.Collector; -import org.apache.lucene.search.LeafCollector; -import org.apache.lucene.search.ScoreMode; - -public interface FacetsCollector extends Collector { - - static FacetsCollector wrap(org.apache.lucene.facet.FacetsCollector facetsCollector) { - return new FacetsCollector() { - - @Override - public org.apache.lucene.facet.FacetsCollector getLuceneFacetsCollector() { - return facetsCollector; - } - - @Override - public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException { - return facetsCollector.getLeafCollector(context); - } - - @Override - public ScoreMode scoreMode() { - return facetsCollector.scoreMode(); - } - }; - } - - org.apache.lucene.facet.FacetsCollector getLuceneFacetsCollector(); -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/collector/FastFacetsCollectorManager.java b/src/main/java/it/cavallium/dbengine/lucene/collector/FastFacetsCollectorManager.java deleted file mode 100644 index b4436ac..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/collector/FastFacetsCollectorManager.java +++ /dev/null @@ -1,92 +0,0 @@ -package it.cavallium.dbengine.lucene.collector; - -import static it.cavallium.dbengine.database.LLUtils.mapList; - -import it.cavallium.dbengine.lucene.IntSmear; -import it.unimi.dsi.fastutil.ints.IntHash; -import java.io.IOException; -import java.util.Collection; -import org.apache.lucene.facet.FacetsCollectorManager; -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.search.CollectorManager; -import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.search.LeafCollector; -import org.apache.lucene.search.Scorable; -import org.apache.lucene.search.ScoreMode; - -public class FastFacetsCollectorManager implements CollectorManager { - - private final int collectionRate; - private final IntHash.Strategy hash; - private final FacetsCollectorManager facetsCollectorManager; - - public FastFacetsCollectorManager(int collectionRate) { - this.collectionRate = collectionRate; - this.hash = new IntSmear(); - this.facetsCollectorManager = new FacetsCollectorManager(); - } - - @Override - public FacetsCollector newCollector() { - return new FastFacetsCollector(collectionRate, hash); - } - - @Override - public FacetsCollector reduce(Collection collectors) throws IOException { - return FacetsCollector.wrap(facetsCollectorManager.reduce(mapList(collectors, - facetsCollector -> facetsCollector.getLuceneFacetsCollector() - ))); - } - - private static class FastFacetsCollector implements FacetsCollector { - - private final org.apache.lucene.facet.FacetsCollector collector; - private final int collectionRate; - private final IntHash.Strategy hash; - - public FastFacetsCollector(int collectionRate, IntHash.Strategy hash) { - this.collectionRate = collectionRate; - this.hash = hash; - this.collector = new org.apache.lucene.facet.FacetsCollector(false) { - @Override - public ScoreMode scoreMode() { - return ScoreMode.COMPLETE_NO_SCORES; - } - }; - } - - - @Override - public org.apache.lucene.facet.FacetsCollector getLuceneFacetsCollector() { - return collector; - } - - @Override - public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException { - var leafCollector = collector.getLeafCollector(context); - return new LeafCollector() { - @Override - public void setScorer(Scorable scorer) throws IOException { - leafCollector.setScorer(scorer); - } - - @Override - public void collect(int doc) throws IOException { - if (collectionRate == 1 || hash.hashCode(doc) % collectionRate == 0) { - leafCollector.collect(doc); - } - } - - @Override - public DocIdSetIterator competitiveIterator() throws IOException { - return leafCollector.competitiveIterator(); - } - }; - } - - @Override - public ScoreMode scoreMode() { - return collector.scoreMode(); - } - } -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/collector/FastRandomSamplingFacetsCollector.java b/src/main/java/it/cavallium/dbengine/lucene/collector/FastRandomSamplingFacetsCollector.java deleted file mode 100644 index c893eb2..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/collector/FastRandomSamplingFacetsCollector.java +++ /dev/null @@ -1,63 +0,0 @@ -package it.cavallium.dbengine.lucene.collector; - -import it.cavallium.dbengine.lucene.IntSmear; -import it.unimi.dsi.fastutil.ints.IntHash; -import java.io.IOException; -import org.apache.lucene.facet.RandomSamplingFacetsCollector; -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.search.Scorable; -import org.apache.lucene.search.ScoreMode; -import org.apache.lucene.search.SimpleCollector; - -public class FastRandomSamplingFacetsCollector extends SimpleCollector implements FacetsCollector { - - private final RandomSamplingFacetsCollector collector; - private final int collectionRate; - private final IntHash.Strategy hash; - - /** - * @param collectionRate collect 1 document every n collectable documents - */ - public FastRandomSamplingFacetsCollector(int collectionRate, int sampleSize) { - this(collectionRate, sampleSize, 0); - } - - public FastRandomSamplingFacetsCollector(int collectionRate, int sampleSize, long seed) { - this.collectionRate = collectionRate; - this.hash = new IntSmear(); - this.collector = new RandomSamplingFacetsCollector(sampleSize, seed) { - @Override - public ScoreMode scoreMode() { - return ScoreMode.COMPLETE_NO_SCORES; - } - }; - } - - @Override - protected void doSetNextReader(LeafReaderContext context) throws IOException { - collector.getLeafCollector(context); - } - - @Override - public void setScorer(Scorable scorer) throws IOException { - collector.setScorer(scorer); - } - - @Override - public void collect(int doc) throws IOException { - if (collectionRate == 1 || hash.hashCode(doc) % collectionRate == 0) { - collector.collect(doc); - } - } - - - @Override - public ScoreMode scoreMode() { - return collector.scoreMode(); - } - - @Override - public org.apache.lucene.facet.FacetsCollector getLuceneFacetsCollector() { - return collector; - } -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/collector/FullDocsCollector.java b/src/main/java/it/cavallium/dbengine/lucene/collector/FullDocsCollector.java deleted file mode 100644 index 15c7354..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/collector/FullDocsCollector.java +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package it.cavallium.dbengine.lucene.collector; - -import it.cavallium.dbengine.database.DiscardingCloseable; -import it.cavallium.dbengine.lucene.FullDocs; -import it.cavallium.dbengine.lucene.LLDoc; -import it.cavallium.dbengine.lucene.LazyFullDocs; -import it.cavallium.dbengine.lucene.PriorityQueue; -import it.cavallium.dbengine.lucene.ResourceIterable; -import it.cavallium.dbengine.lucene.Reversable; -import it.cavallium.dbengine.lucene.ReversableResourceIterable; -import it.cavallium.dbengine.utils.SimpleResource; -import org.apache.lucene.search.Collector; -import org.apache.lucene.search.TopDocs; -import org.apache.lucene.search.TotalHits; - -/** - * A base class for all collectors that return a {@link TopDocs} output. This collector allows easy - * extension by providing a single constructor which accepts a {@link PriorityQueue} as well as - * protected members for that priority queue and a counter of the number of total hits.
- * Extending classes can override any of the methods to provide their own implementation, as well as - * avoid the use of the priority queue entirely by passing null to {@link - * #FullDocsCollector(PriorityQueue)}. In that case however, you might want to consider overriding - * all methods, in order to avoid a NullPointerException. - */ -public abstract class FullDocsCollector & Reversable>, INTERNAL extends LLDoc, - EXTERNAL extends LLDoc> extends SimpleResource implements Collector, DiscardingCloseable { - - /** - * The priority queue which holds the top documents. Note that different implementations of - * PriorityQueue give different meaning to 'top documents'. HitQueue for example aggregates the - * top scoring documents, while other PQ implementations may hold documents sorted by other - * criteria. - */ - protected final PQ pq; - - /** The total number of documents that the collector encountered. */ - protected int totalHits; - - /** Whether {@link #totalHits} is exact or a lower bound. */ - protected TotalHits.Relation totalHitsRelation = TotalHits.Relation.EQUAL_TO; - - protected FullDocsCollector(PQ pq) { - this.pq = pq; - } - - /** The total number of documents that matched this query. */ - public int getTotalHits() { - return totalHits; - } - - /** Returns the top docs that were collected by this collector. */ - public FullDocs fullDocs() { - return new LazyFullDocs<>(mapResults(this.pq.reverse()), new TotalHits(totalHits, totalHitsRelation)); - } - - public abstract ResourceIterable mapResults(ResourceIterable it); - - @Override - public void onClose() { - pq.close(); - } -} \ No newline at end of file diff --git a/src/main/java/it/cavallium/dbengine/lucene/collector/FullFieldDocs.java b/src/main/java/it/cavallium/dbengine/lucene/collector/FullFieldDocs.java deleted file mode 100644 index a67286f..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/collector/FullFieldDocs.java +++ /dev/null @@ -1,44 +0,0 @@ -package it.cavallium.dbengine.lucene.collector; - -import it.cavallium.dbengine.database.DiscardingCloseable; -import it.cavallium.dbengine.lucene.FullDocs; -import it.cavallium.dbengine.lucene.LLDoc; -import it.cavallium.dbengine.utils.SimpleResource; -import java.util.stream.Stream; -import org.apache.lucene.search.SortField; -import org.apache.lucene.search.TotalHits; - -public class FullFieldDocs extends SimpleResource implements FullDocs, DiscardingCloseable { - - private final FullDocs fullDocs; - private final SortField[] fields; - - public FullFieldDocs(FullDocs fullDocs, SortField[] fields) { - this.fullDocs = fullDocs; - this.fields = fields; - } - - @Override - public Stream iterate() { - return fullDocs.iterate(); - } - - @Override - public Stream iterate(long skips) { - return fullDocs.iterate(skips); - } - - @Override - public TotalHits totalHits() { - return fullDocs.totalHits(); - } - - public SortField[] fields() { - return fields; - } - - @Override - protected void onClose() { - fullDocs.close(); - } -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/collector/ScoringShardsCollectorMultiManager.java b/src/main/java/it/cavallium/dbengine/lucene/collector/ScoringShardsCollectorMultiManager.java deleted file mode 100644 index 179340b..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/collector/ScoringShardsCollectorMultiManager.java +++ /dev/null @@ -1,160 +0,0 @@ -package it.cavallium.dbengine.lucene.collector; - -import it.cavallium.dbengine.database.LLUtils; -import it.cavallium.dbengine.lucene.LuceneUtils; -import java.io.IOException; -import java.util.Collection; -import java.util.List; -import org.apache.commons.lang3.NotImplementedException; -import org.apache.lucene.search.CollectorManager; -import org.apache.lucene.search.FieldDoc; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.ScoreDoc; -import org.apache.lucene.search.ScoreMode; -import org.apache.lucene.search.Sort; -import org.apache.lucene.search.TopDocs; -import org.apache.lucene.search.TopFieldCollector; -import org.apache.lucene.search.TopFieldDocs; -import org.jetbrains.annotations.Nullable; - -public class ScoringShardsCollectorMultiManager implements CollectorMultiManager { - - private static final boolean USE_CLASSIC_REDUCE = false; - private final Query query; - @Nullable - private final Sort sort; - private final int numHits; - private final FieldDoc after; - private final int totalHitsThreshold; - private final @Nullable Integer startN; - private final @Nullable Integer topN; - private final CollectorManager sharedCollectorManager; - - public ScoringShardsCollectorMultiManager(Query query, - @Nullable final Sort sort, - final int numHits, - final FieldDoc after, - final int totalHitsThreshold, - int startN, - int topN) { - this(query, sort, numHits, after, totalHitsThreshold, (Integer) startN, (Integer) topN); - } - - public ScoringShardsCollectorMultiManager(Query query, - @Nullable final Sort sort, - final int numHits, - final FieldDoc after, - final int totalHitsThreshold, - int startN) { - this(query, sort, numHits, after, totalHitsThreshold, (Integer) startN, (Integer) 2147483630); - } - - public ScoringShardsCollectorMultiManager(Query query, - @Nullable final Sort sort, - final int numHits, - final FieldDoc after, - final int totalHitsThreshold) { - this(query, sort, numHits, after, totalHitsThreshold, null, null); - } - - private ScoringShardsCollectorMultiManager(Query query, - @Nullable final Sort sort, - final int numHits, - final FieldDoc after, - final int totalHitsThreshold, - @Nullable Integer startN, - @Nullable Integer topN) { - this.query = query; - this.sort = sort; - this.numHits = numHits; - this.after = after; - this.totalHitsThreshold = totalHitsThreshold; - this.startN = startN; - if (topN != null && startN != null && (long) topN + (long) startN > 2147483630) { - this.topN = 2147483630 - startN; - } else if (topN != null && topN > 2147483630) { - this.topN = 2147483630; - } else { - this.topN = topN; - } - this.sharedCollectorManager = TopFieldCollector.createSharedManager(sort == null ? Sort.RELEVANCE : sort, numHits, after, totalHitsThreshold); - } - - public CollectorManager get(IndexSearcher indexSearcher, int shardIndex) { - return new CollectorManager<>() { - @Override - public TopFieldCollector newCollector() throws IOException { - return sharedCollectorManager.newCollector(); - } - - @Override - public TopDocs reduce(Collection collectors) throws IOException { - if (LLUtils.isInNonBlockingThread()) { - throw new UnsupportedOperationException("Called reduce in a nonblocking thread"); - } - if (USE_CLASSIC_REDUCE) { - final TopFieldDocs[] topDocs = new TopFieldDocs[collectors.size()]; - int i = 0; - for (TopFieldCollector collector : collectors) { - topDocs[i++] = collector.topDocs(); - } - var result = LuceneUtils.mergeTopDocs(sort, null, null, topDocs); - - if (sort != null && sort.needsScores()) { - TopFieldCollector.populateScores(result.scoreDocs, indexSearcher, query); - } - - return result; - } else { - TopDocs[] topDocs; - if (sort != null) { - topDocs = new TopFieldDocs[collectors.size()]; - var i = 0; - for (TopFieldCollector collector : collectors) { - topDocs[i] = collector.topDocs(); - - // Populate scores of topfieldcollector. By default it doesn't popupate the scores - if (topDocs[i].scoreDocs.length > 0 && Float.isNaN(topDocs[i].scoreDocs[0].score) && sort.needsScores()) { - TopFieldCollector.populateScores(topDocs[i].scoreDocs, indexSearcher, query); - } - - for (ScoreDoc scoreDoc : topDocs[i].scoreDocs) { - scoreDoc.shardIndex = shardIndex; - } - i++; - } - } else { - topDocs = new TopDocs[collectors.size()]; - var i = 0; - for (TopFieldCollector collector : collectors) { - topDocs[i] = collector.topDocs(); - for (ScoreDoc scoreDoc : topDocs[i].scoreDocs) { - scoreDoc.shardIndex = shardIndex; - } - i++; - } - } - return LuceneUtils.mergeTopDocs(sort, null, null, topDocs); - } - } - }; - } - - @Override - public ScoreMode scoreMode() { - throw new NotImplementedException(); - } - - @SuppressWarnings({"SuspiciousToArrayCall", "IfStatementWithIdenticalBranches"}) - @Override - public TopDocs reduce(List topDocs) { - TopDocs[] arr; - if (sort != null) { - arr = topDocs.toArray(TopFieldDocs[]::new); - } else { - arr = topDocs.toArray(TopDocs[]::new); - } - return LuceneUtils.mergeTopDocs(sort, startN, topN, arr); - } -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/collector/TopDocsCollectorMultiManager.java b/src/main/java/it/cavallium/dbengine/lucene/collector/TopDocsCollectorMultiManager.java deleted file mode 100644 index f2a0b6e..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/collector/TopDocsCollectorMultiManager.java +++ /dev/null @@ -1,125 +0,0 @@ -package it.cavallium.dbengine.lucene.collector; - -import static it.cavallium.dbengine.lucene.searcher.PaginationInfo.ALLOW_UNSCORED_PAGINATION_MODE; - -import it.cavallium.dbengine.lucene.LuceneUtils; -import java.io.IOException; -import java.util.Collection; -import java.util.List; -import org.apache.commons.lang3.NotImplementedException; -import org.apache.lucene.search.CollectorManager; -import org.apache.lucene.search.FieldDoc; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.ScoreDoc; -import org.apache.lucene.search.ScoreMode; -import org.apache.lucene.search.Sort; -import org.apache.lucene.search.TopDocs; -import org.apache.lucene.search.TopDocsCollector; -import org.apache.lucene.search.TopFieldCollector; -import org.apache.lucene.search.TopFieldDocs; -import org.apache.lucene.search.TopScoreDocCollector; -import org.jetbrains.annotations.NotNull; - -public class TopDocsCollectorMultiManager implements CollectorMultiManager { - - private final Sort luceneSort; - private final int limit; - private final ScoreDoc after; - private final int totalHitsThreshold; - private final boolean allowPagination; - private final boolean computeScores; - - private final int topDocsOffset; - private final int topDocsCount; - - public TopDocsCollectorMultiManager(Sort luceneSort, - int limit, - ScoreDoc after, - int totalHitsThreshold, - boolean allowPagination, - boolean computeScores, - int topDocsOffset, - int topDocsCount) { - this.luceneSort = luceneSort; - this.limit = limit; - this.after = after; - this.totalHitsThreshold = totalHitsThreshold; - this.allowPagination = allowPagination; - this.computeScores = computeScores; - - this.topDocsOffset = topDocsOffset; - this.topDocsCount = topDocsCount; - } - - public CollectorManager, TopDocs> get(@NotNull Query query, IndexSearcher indexSearcher) { - return new CollectorManager<>() { - @Override - public TopDocsCollector newCollector() { - TopDocsCollector collector; - if (after != null && !allowPagination) { - throw new IllegalArgumentException("\"allowPagination\" is false, but \"after\" is set"); - } - if (luceneSort == null) { - if (after == null) { - if (computeScores || allowPagination || !ALLOW_UNSCORED_PAGINATION_MODE) { - collector = TopScoreDocCollector.create(limit, totalHitsThreshold); - } else { - collector = new UnscoredCollector(limit); - } - } else { - collector = TopScoreDocCollector.create(limit, after, totalHitsThreshold); - } - } else { - if (after == null) { - collector = TopFieldCollector.create(luceneSort, limit, totalHitsThreshold); - } else if (after instanceof FieldDoc afterFieldDoc) { - collector = TopFieldCollector.create(luceneSort, limit, afterFieldDoc, totalHitsThreshold); - } else { - throw new UnsupportedOperationException("GetTopDocs with \"luceneSort\" != null requires \"after\" to be a FieldDoc"); - } - } - return collector; - } - - @Override - public TopDocs reduce(Collection> collectors) throws IOException { - TopDocs[] docsArray; - boolean needsSort = luceneSort != null; - boolean needsScores = luceneSort != null && luceneSort.needsScores(); - if (needsSort) { - docsArray = new TopFieldDocs[collectors.size()]; - } else { - docsArray = new TopDocs[collectors.size()]; - } - int i = 0; - for (TopDocsCollector collector : collectors) { - docsArray[i] = collector.topDocs(); - i++; - } - var merged = LuceneUtils.mergeTopDocs(luceneSort, null, null, docsArray); - if (needsScores) { - TopFieldCollector.populateScores(merged.scoreDocs, indexSearcher, query); - } - return merged; - } - }; - } - - @Override - public ScoreMode scoreMode() { - throw new NotImplementedException(); - } - - @SuppressWarnings({"SuspiciousToArrayCall", "IfStatementWithIdenticalBranches"}) - @Override - public TopDocs reduce(List topDocs) { - TopDocs[] arr; - if (luceneSort != null) { - arr = topDocs.toArray(TopFieldDocs[]::new); - } else { - arr = topDocs.toArray(TopDocs[]::new); - } - return LuceneUtils.mergeTopDocs(luceneSort, topDocsOffset, topDocsCount, arr); - } -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/collector/TotalHitCountCollectorManager.java b/src/main/java/it/cavallium/dbengine/lucene/collector/TotalHitCountCollectorManager.java deleted file mode 100644 index 952dd8c..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/collector/TotalHitCountCollectorManager.java +++ /dev/null @@ -1,64 +0,0 @@ -package it.cavallium.dbengine.lucene.collector; - -import it.cavallium.dbengine.lucene.LuceneUtils; -import it.cavallium.dbengine.lucene.collector.TotalHitCountCollectorManager.TimeLimitingTotalHitCountCollector; -import java.io.IOException; -import java.time.Duration; -import java.util.Collection; -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.search.Collector; -import org.apache.lucene.search.CollectorManager; -import org.apache.lucene.search.LeafCollector; -import org.apache.lucene.search.ScoreMode; -import org.apache.lucene.search.TotalHitCountCollector; - -public class TotalHitCountCollectorManager implements CollectorManager { - - private final Duration timeout; - - public TotalHitCountCollectorManager(Duration timeout) { - this.timeout = timeout; - } - - @Override - public TimeLimitingTotalHitCountCollector newCollector() { - var totalHitCountCollector = new TotalHitCountCollector(); - var timeLimitingCollector = LuceneUtils.withTimeout(totalHitCountCollector, timeout); - return new TimeLimitingTotalHitCountCollector(totalHitCountCollector, timeLimitingCollector); - } - - @Override - public Long reduce(Collection collectors) { - long totalHits = 0; - for (var collector : collectors) { - totalHits += collector.getTotalHits(); - } - return totalHits; - } - - public static final class TimeLimitingTotalHitCountCollector implements Collector { - - private final TotalHitCountCollector totalHitCountCollector; - private final Collector timeLimitingCollector; - - private TimeLimitingTotalHitCountCollector(TotalHitCountCollector totalHitCountCollector, - Collector timeLimitingCollector) { - this.totalHitCountCollector = totalHitCountCollector; - this.timeLimitingCollector = timeLimitingCollector; - } - - @Override - public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException { - return timeLimitingCollector.getLeafCollector(context); - } - - @Override - public ScoreMode scoreMode() { - return timeLimitingCollector.scoreMode(); - } - - public long getTotalHits() { - return totalHitCountCollector.getTotalHits(); - } - } -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/collector/UnscoredCollector.java b/src/main/java/it/cavallium/dbengine/lucene/collector/UnscoredCollector.java deleted file mode 100644 index 632cfef..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/collector/UnscoredCollector.java +++ /dev/null @@ -1,101 +0,0 @@ -package it.cavallium.dbengine.lucene.collector; - -import static it.cavallium.dbengine.lucene.searcher.PaginationInfo.ALLOW_UNSCORED_PAGINATION_MODE; - -import it.unimi.dsi.fastutil.ints.IntArrayList; -import it.unimi.dsi.fastutil.ints.IntList; -import it.unimi.dsi.fastutil.ints.IntLists; -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.search.LeafCollector; -import org.apache.lucene.search.Scorable; -import org.apache.lucene.search.ScoreDoc; -import org.apache.lucene.search.ScoreMode; -import org.apache.lucene.search.TopDocs; -import org.apache.lucene.search.TopDocsCollector; - -public class UnscoredCollector extends TopDocsCollector implements LeafCollector { - private final IntArrayList docIds = new IntArrayList(); - private final int limit; - private LeafReaderContext currentLeafReaderContext; - - public UnscoredCollector(int limit) { - super(null); - if (!ALLOW_UNSCORED_PAGINATION_MODE) { - throw new UnsupportedOperationException(); - } - if (limit <= 0) { - throw new IllegalArgumentException(); - } - this.limit = limit; - } - - @Override - public void setScorer(Scorable scorable) { - } - - @Override - public void collect(int localDocId) { - totalHits++; - boolean canCollect = limit == -1 || docIds.size() < limit; - if (canCollect) { - int docId = currentLeafReaderContext.docBase + localDocId; - docIds.add(docId); - } - } - - @Override - public LeafCollector getLeafCollector(LeafReaderContext leafReaderContext) { - this.currentLeafReaderContext = leafReaderContext; - return this; - } - - public IntList unscoredDocs() { - return IntLists.unmodifiable(this.docIds); - } - - @Override - public ScoreMode scoreMode() { - return ScoreMode.COMPLETE_NO_SCORES; - } - - @Override - protected int topDocsSize() { - return Math.min(this.totalHits, this.docIds.size()); - } - - @Override - public TopDocs topDocs(int start, int howMany) { - int size = this.topDocsSize(); - if (howMany < 0) { - throw new IllegalArgumentException("Number of hits requested must be greater than 0 but value was " + howMany); - } else if (start < 0) { - throw new IllegalArgumentException("Expected value of starting position is between 0 and " + size + ", got " + start); - } else if (start < size && howMany != 0) { - howMany = Math.min(size - start, howMany); - ScoreDoc[] results = new ScoreDoc[howMany]; - - this.populateResults(results, start, howMany); - return this.newTopDocs(results, start); - } else { - return this.newTopDocs((ScoreDoc[])null, start); - } - } - - @Override - protected TopDocs newTopDocs(ScoreDoc[] results, int start) { - return super.newTopDocs(results, start); - } - - private void populateResults(ScoreDoc[] results, int start, int howMany) { - int i = 0; - for (int docId : docIds.subList(start, start + howMany)) { - results[i] = new ScoreDoc(docId, Float.NaN); - i++; - } - } - - @Override - protected void populateResults(ScoreDoc[] results, int howMany) { - throw new UnsupportedOperationException(); - } -} \ No newline at end of file diff --git a/src/main/java/it/cavallium/dbengine/lucene/directory/Lucene90NoCompressionStoredFieldsFormat.java b/src/main/java/it/cavallium/dbengine/lucene/directory/Lucene90NoCompressionStoredFieldsFormat.java deleted file mode 100644 index b0ead5b..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/directory/Lucene90NoCompressionStoredFieldsFormat.java +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package it.cavallium.dbengine.lucene.directory; - -import java.io.IOException; -import org.apache.lucene.codecs.StoredFieldsFormat; -import org.apache.lucene.codecs.StoredFieldsReader; -import org.apache.lucene.codecs.StoredFieldsWriter; -import org.apache.lucene.codecs.compressing.CompressionMode; -import org.apache.lucene.codecs.compressing.Compressor; -import org.apache.lucene.codecs.compressing.Decompressor; -import org.apache.lucene.codecs.lucene90.compressing.Lucene90CompressingStoredFieldsFormat; -import org.apache.lucene.index.FieldInfos; -import org.apache.lucene.index.SegmentInfo; -import org.apache.lucene.store.ByteBuffersDataInput; -import org.apache.lucene.store.DataInput; -import org.apache.lucene.store.DataOutput; -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.IOContext; -import org.apache.lucene.util.ArrayUtil; -import org.apache.lucene.util.BytesRef; - -public class Lucene90NoCompressionStoredFieldsFormat extends StoredFieldsFormat { - - public static final CompressionMode DUMMY = new CompressionMode() { - - @Override - public Compressor newCompressor() { - return DUMMY_COMPRESSOR; - } - - @Override - public Decompressor newDecompressor() { - return DUMMY_DECOMPRESSOR; - } - - @Override - public String toString() { - return "DUMMY"; - } - }; - - private static final Decompressor DUMMY_DECOMPRESSOR = new Decompressor() { - - @Override - public void decompress(DataInput in, int originalLength, int offset, int length, BytesRef bytes) - throws IOException { - assert offset + length <= originalLength; - if (bytes.bytes.length < originalLength) { - bytes.bytes = new byte[ArrayUtil.oversize(originalLength, 1)]; - } - in.readBytes(bytes.bytes, 0, offset + length); - bytes.offset = offset; - bytes.length = length; - } - - @Override - public Decompressor clone() { - return this; - } - }; - - private static final Compressor DUMMY_COMPRESSOR = new Compressor() { - - @Override - public void compress(ByteBuffersDataInput byteBuffersDataInput, DataOutput dataOutput) throws IOException { - dataOutput.copyBytes(byteBuffersDataInput, byteBuffersDataInput.size()); - } - - @Override - public void close() { - } - }; - - public Lucene90NoCompressionStoredFieldsFormat() { - } - - @Override - public StoredFieldsReader fieldsReader(Directory directory, SegmentInfo si, FieldInfos fn, IOContext context) - throws IOException { - return impl().fieldsReader(directory, si, fn, context); - } - - @Override - public StoredFieldsWriter fieldsWriter(Directory directory, SegmentInfo si, IOContext context) throws IOException { - return impl().fieldsWriter(directory, si, context); - } - - StoredFieldsFormat impl() { - return new Lucene90CompressingStoredFieldsFormat("Lucene90StoredFieldsFastData", - DUMMY, - BEST_SPEED_BLOCK_LENGTH, - 1024, - 10 - ); - } - - - // Shoot for 10 sub blocks of 8kB each. - private static final int BEST_SPEED_BLOCK_LENGTH = 10 * 8 * 1024; - -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/directory/Lucene91CodecWithNoFieldCompression.java b/src/main/java/it/cavallium/dbengine/lucene/directory/Lucene91CodecWithNoFieldCompression.java deleted file mode 100644 index 31ffb00..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/directory/Lucene91CodecWithNoFieldCompression.java +++ /dev/null @@ -1,20 +0,0 @@ -package it.cavallium.dbengine.lucene.directory; - -import org.apache.lucene.backward_codecs.lucene90.Lucene90Codec; -import org.apache.lucene.codecs.FilterCodec; -import org.apache.lucene.codecs.StoredFieldsFormat; - -public final class Lucene91CodecWithNoFieldCompression extends FilterCodec { - - private final StoredFieldsFormat storedFieldsFormat; - - public Lucene91CodecWithNoFieldCompression() { - super("Lucene410CodecWithNoFieldCompression", new Lucene90Codec()); - storedFieldsFormat = new Lucene90NoCompressionStoredFieldsFormat(); - } - - @Override - public StoredFieldsFormat storedFieldsFormat() { - return storedFieldsFormat; - } -} \ No newline at end of file diff --git a/src/main/java/it/cavallium/dbengine/lucene/hugepq/search/CustomHitsThresholdChecker.java b/src/main/java/it/cavallium/dbengine/lucene/hugepq/search/CustomHitsThresholdChecker.java deleted file mode 100644 index 6621a6a..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/hugepq/search/CustomHitsThresholdChecker.java +++ /dev/null @@ -1,141 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package it.cavallium.dbengine.lucene.hugepq.search; - -import java.util.concurrent.atomic.AtomicLong; -import org.apache.lucene.search.ScoreMode; - -/** Used for defining custom algorithms to allow searches to early terminate */ -public abstract class CustomHitsThresholdChecker { - /** Implementation of CustomHitsThresholdChecker which allows global hit counting */ - private static class GlobalHitsThresholdChecker extends CustomHitsThresholdChecker { - private final long totalHitsThreshold; - private final AtomicLong globalHitCount; - - public GlobalHitsThresholdChecker(long totalHitsThreshold) { - - if (totalHitsThreshold < 0) { - throw new IllegalArgumentException( - "totalHitsThreshold must be >= 0, got " + totalHitsThreshold); - } - - this.totalHitsThreshold = totalHitsThreshold; - this.globalHitCount = new AtomicLong(); - } - - @Override - public void incrementHitCount() { - globalHitCount.incrementAndGet(); - } - - @Override - public boolean isThresholdReached(boolean supports64Bit) { - if (supports64Bit) { - return globalHitCount.getAcquire() > totalHitsThreshold; - } else { - return Math.min(globalHitCount.getAcquire(), Integer.MAX_VALUE) > Math.min(totalHitsThreshold, Integer.MAX_VALUE); - } - } - - @Override - public ScoreMode scoreMode() { - if (totalHitsThreshold == Long.MAX_VALUE) { - return ScoreMode.COMPLETE; - } - return ScoreMode.TOP_SCORES; - } - - @Override - public long getHitsThreshold(boolean supports64Bit) { - if (supports64Bit) { - return totalHitsThreshold; - } else { - return Math.min(totalHitsThreshold, Integer.MAX_VALUE); - } - } - } - - /** Default implementation of CustomHitsThresholdChecker to be used for single threaded execution */ - private static class LocalHitsThresholdChecker extends CustomHitsThresholdChecker { - private final long totalHitsThreshold; - private long hitCount; - - public LocalHitsThresholdChecker(long totalHitsThreshold) { - - if (totalHitsThreshold < 0) { - throw new IllegalArgumentException( - "totalHitsThreshold must be >= 0, got " + totalHitsThreshold); - } - - this.totalHitsThreshold = totalHitsThreshold; - } - - @Override - public void incrementHitCount() { - ++hitCount; - } - - @Override - public boolean isThresholdReached(boolean supports64Bit) { - if (supports64Bit) { - return hitCount > totalHitsThreshold; - } else { - return Math.min(hitCount, Integer.MAX_VALUE) > Math.min(totalHitsThreshold, Integer.MAX_VALUE); - } - } - - @Override - public ScoreMode scoreMode() { - if (totalHitsThreshold == Long.MAX_VALUE) { - return ScoreMode.COMPLETE; - } - return ScoreMode.TOP_SCORES; - } - - @Override - public long getHitsThreshold(boolean supports64Bit) { - if (supports64Bit) { - return totalHitsThreshold; - } else { - return Math.min(totalHitsThreshold, Integer.MAX_VALUE); - } - } - } - - /* - * Returns a threshold checker that is useful for single threaded searches - */ - public static CustomHitsThresholdChecker create(final long totalHitsThreshold) { - return new LocalHitsThresholdChecker(totalHitsThreshold); - } - - /* - * Returns a threshold checker that is based on a shared counter - */ - public static CustomHitsThresholdChecker createShared(final long totalHitsThreshold) { - return new GlobalHitsThresholdChecker(totalHitsThreshold); - } - - public abstract void incrementHitCount(); - - public abstract ScoreMode scoreMode(); - - public abstract long getHitsThreshold(boolean supports64Bit); - - public abstract boolean isThresholdReached(boolean supports64Bit); -} \ No newline at end of file diff --git a/src/main/java/it/cavallium/dbengine/lucene/mlt/BigCompositeReader.java b/src/main/java/it/cavallium/dbengine/lucene/mlt/BigCompositeReader.java deleted file mode 100644 index bafecaa..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/mlt/BigCompositeReader.java +++ /dev/null @@ -1,201 +0,0 @@ -package it.cavallium.dbengine.lucene.mlt; - -import java.io.IOException; -import java.math.BigInteger; -import java.util.Arrays; -import java.util.Collection; -import java.util.Comparator; -import java.util.List; -import java.util.Set; -import java.util.concurrent.atomic.AtomicLong; -import java.util.function.IntFunction; -import java.util.stream.Collectors; -import java.util.stream.StreamSupport; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.DocumentStoredFieldVisitor; -import org.apache.lucene.index.Fields; -import org.apache.lucene.index.IndexOptions; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.StoredFieldVisitor; -import org.apache.lucene.index.Term; -import org.apache.lucene.store.AlreadyClosedException; - -public class BigCompositeReader { - - private static final long ACTUAL_MAX_DOCS = Long.MAX_VALUE - 10; - private final R[] subReaders; - protected final Comparator subReadersSorter; - private final long[] starts; - private final long maxDoc; - private final AtomicLong numDocs = new AtomicLong(-1); - private final List subReadersList; - - public BigCompositeReader(R subReader, IntFunction arrayInstantiation) { - this(toArray(subReader, arrayInstantiation), null); - } - - private static R[] toArray(R subReader, IntFunction arrayInstantiation) { - var arr = arrayInstantiation.apply(1); - arr[0] = subReader; - return arr; - } - - public BigCompositeReader(R[] subReaders, Comparator subReadersSorter) { - if (subReadersSorter != null) { - Arrays.sort(subReaders, subReadersSorter); - } - - this.subReaders = subReaders; - this.subReadersSorter = subReadersSorter; - this.subReadersList = List.of(subReaders); - this.starts = new long[subReaders.length + 1]; - BigInteger maxDoc = BigInteger.ZERO; - - for(int i = 0; i < subReaders.length; ++i) { - this.starts[i] = maxDoc.longValue(); - IndexReader r = subReaders[i]; - maxDoc = maxDoc.add(BigInteger.valueOf(r.maxDoc())); - } - - if (maxDoc.compareTo(BigInteger.ZERO) < 0 || maxDoc.compareTo(BigInteger.valueOf(ACTUAL_MAX_DOCS)) > 0) { - throw new IllegalArgumentException("Too many documents: composite IndexReaders cannot exceed " - + ACTUAL_MAX_DOCS + " but readers have total maxDoc=" + maxDoc); - } else { - this.maxDoc = maxDoc.longValueExact(); - this.starts[subReaders.length] = this.maxDoc; - } - } - - public static Collection getIndexedFields(BigCompositeReader readers) { - return readers.subReadersList - .stream() - .map(IndexReader::getContext) - .flatMap(l -> l.leaves().stream()) - .flatMap((l) -> StreamSupport - .stream(l.reader().getFieldInfos().spliterator(), false) - .filter((fi) -> fi.getIndexOptions() != IndexOptions.NONE)) - .map((fi) -> fi.name) - .collect(Collectors.toSet()); - } - - private void ensureOpen() { - for (R subReader : subReaders) { - if (subReader.getRefCount() <= 0) { - throw new AlreadyClosedException("this IndexReader is closed"); - } - } - } - - public long getDocCount(String field) throws IOException { - this.ensureOpen(); - long total = 0; - - for (R reader : this.subReaders) { - int sub = reader.getDocCount(field); - - assert sub >= 0; - - assert sub <= reader.maxDoc(); - - total += sub; - } - - return total; - } - - public long docFreq(Term term) throws IOException { - this.ensureOpen(); - long total = 0; - - for (R subReader : this.subReaders) { - int sub = subReader.docFreq(term); - - assert sub >= 0; - - assert sub <= subReader.getDocCount(term.field()); - - total += sub; - } - - return total; - } - - public long numDocs() { - long numDocs = this.numDocs.getOpaque(); - if (numDocs == -1L) { - numDocs = 0L; - - for (IndexReader r : this.subReaders) { - numDocs += r.numDocs(); - } - - assert numDocs >= 0L; - - this.numDocs.set(numDocs); - } - - return numDocs; - } - - public Fields getTermVectors(long docID) throws IOException { - this.ensureOpen(); - int i = this.readerIndex(docID); - return this.subReaders[i].getTermVectors(Math.toIntExact(docID - this.starts[i])); - } - - protected final int readerIndex(long docID) { - if (docID >= 0 && docID < this.maxDoc) { - return subIndex(docID, this.starts); - } else { - throw new IllegalArgumentException("docID must be >= 0 and < maxDoc=" + this.maxDoc + " (got docID=" + docID + ")"); - } - } - - public static int subIndex(long n, long[] docStarts) { - int size = docStarts.length; - int lo = 0; - int hi = size - 1; - - while(hi >= lo) { - int mid = lo + hi >>> 1; - long midValue = docStarts[mid]; - if (n < midValue) { - hi = mid - 1; - } else { - if (n <= midValue) { - while(mid + 1 < size && docStarts[mid + 1] == midValue) { - ++mid; - } - - return mid; - } - - lo = mid + 1; - } - } - - return hi; - } - - public final void document(long docID, StoredFieldVisitor visitor) throws IOException { - this.ensureOpen(); - int i = this.readerIndex(docID); - this.subReaders[i].document(Math.toIntExact(docID - this.starts[i]), visitor); - } - - public final Document document(long docID) throws IOException { - DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor(); - this.document(docID, visitor); - return visitor.getDocument(); - } - - public final Document document(long docID, Set fieldsToLoad) throws IOException { - DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor(fieldsToLoad); - this.document(docID, visitor); - return visitor.getDocument(); - } - - public long maxDoc() { - return this.maxDoc; - } -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/mlt/MoreLikeThisTransformer.java b/src/main/java/it/cavallium/dbengine/lucene/mlt/MoreLikeThisTransformer.java deleted file mode 100644 index bb69bcf..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/mlt/MoreLikeThisTransformer.java +++ /dev/null @@ -1,43 +0,0 @@ -package it.cavallium.dbengine.lucene.mlt; - -import com.google.common.collect.Multimap; -import it.cavallium.dbengine.database.disk.LLIndexSearchers; -import it.cavallium.dbengine.lucene.LuceneUtils; -import it.cavallium.dbengine.lucene.searcher.GlobalQueryRewrite; -import it.cavallium.dbengine.lucene.searcher.LocalQueryParams; -import java.io.IOException; -import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper; -import org.apache.lucene.search.similarities.Similarity; - -public class MoreLikeThisTransformer implements GlobalQueryRewrite { - - private final Multimap mltDocumentFields; - private final PerFieldAnalyzerWrapper luceneAnalyzer; - private final Similarity luceneSimilarity; - - public MoreLikeThisTransformer(Multimap mltDocumentFields, - PerFieldAnalyzerWrapper luceneAnalyzer, - Similarity luceneSimilarity) { - this.mltDocumentFields = mltDocumentFields; - this.luceneAnalyzer = luceneAnalyzer; - this.luceneSimilarity = luceneSimilarity; - } - - @Override - public LocalQueryParams rewrite(LLIndexSearchers indexSearchers, LocalQueryParams queryParams) { - var rewrittenQuery = LuceneUtils.getMoreLikeThisQuery(indexSearchers, - queryParams, - luceneAnalyzer, - luceneSimilarity, - mltDocumentFields - ); - return new LocalQueryParams(rewrittenQuery, - queryParams.offsetLong(), - queryParams.limitLong(), - queryParams.pageLimits(), - queryParams.sort(), - queryParams.computePreciseHitsCount(), - queryParams.timeout() - ); - } -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/mlt/MultiMoreLikeThis.java b/src/main/java/it/cavallium/dbengine/lucene/mlt/MultiMoreLikeThis.java deleted file mode 100644 index 3f658a4..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/mlt/MultiMoreLikeThis.java +++ /dev/null @@ -1,972 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package it.cavallium.dbengine.lucene.mlt; - -import java.io.IOException; -import java.io.Reader; -import java.io.StringReader; -import java.util.ArrayList; -import java.util.Collection; -import java.util.HashMap; -import java.util.Map; -import java.util.Set; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.apache.lucene.analysis.tokenattributes.TermFrequencyAttribute; -import org.apache.lucene.document.Document; -import org.apache.lucene.index.Fields; -import org.apache.lucene.index.IndexableField; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.Terms; -import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.search.BooleanClause; -import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.BoostQuery; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.TermQuery; -import org.apache.lucene.search.similarities.ClassicSimilarity; -import org.apache.lucene.search.similarities.TFIDFSimilarity; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.CharsRefBuilder; -import org.apache.lucene.util.PriorityQueue; - -/** - * Generate "more like this" similarity queries. Based on this mail: - * - *

- * Lucene does let you access the document frequency of terms, with IndexReader.docFreq().
- * Term frequencies can be computed by re-tokenizing the text, which, for a single document,
- * is usually fast enough.  But looking up the docFreq() of every term in the document is
- * probably too slow.
- *
- * You can use some heuristics to prune the set of terms, to avoid calling docFreq() too much,
- * or at all.  Since you're trying to maximize a tf*idf score, you're probably most interested
- * in terms with a high tf. Choosing a tf threshold even as low as two or three will radically
- * reduce the number of terms under consideration.  Another heuristic is that terms with a
- * high idf (i.e., a low df) tend to be longer.  So you could threshold the terms by the
- * number of characters, not selecting anything less than, e.g., six or seven characters.
- * With these sorts of heuristics you can usually find small set of, e.g., ten or fewer terms
- * that do a pretty good job of characterizing a document.
- *
- * It all depends on what you're trying to do.  If you're trying to eek out that last percent
- * of precision and recall regardless of computational difficulty so that you can win a TREC
- * competition, then the techniques I mention above are useless.  But if you're trying to
- * provide a "more like this" button on a search results page that does a decent job and has
- * good performance, such techniques might be useful.
- *
- * An efficient, effective "more-like-this" query generator would be a great contribution, if
- * anyone's interested.  I'd imagine that it would take a Reader or a String (the document's
- * text), analyzer Analyzer, and return a set of representative terms using heuristics like those
- * above.  The frequency and length thresholds could be parameters, etc.
- *
- * Doug
- * 
- * - *

Initial Usage

- * - *

This class has lots of options to try to make it efficient and flexible. The simplest possible - * usage is as follows. The bold fragment is specific to this class.
- * - *

- * IndexReader ir = ...
- * IndexSearcher is = ...
- * 

- * MoreLikeThis mlt = new MoreLikeThis(ir); - * Reader target = ... // orig source of doc you want to find similarities to - * Query query = mlt.like( target); - *

- * Hits hits = is.search(query); - * // now the usual iteration thru 'hits' - the only thing to watch for is to make sure - * //you ignore the doc if it matches your 'target' document, as it should be similar to itself - * - *

- * - *

Thus you: - * - *

    - *
  1. do your normal, Lucene setup for searching, - *
  2. create a MoreLikeThis, - *
  3. get the text of the doc you want to find similarities to - *
  4. then call one of the like() calls to generate a similarity query - *
  5. call the searcher to find the similar docs - *
- * - *
- * - *

More Advanced Usage

- * - *

You may want to use {@link #setFieldNames setFieldNames(...)} so you can examine multiple - * fields (e.g. body and title) for similarity. - * - *

Depending on the size of your index and the size and makeup of your documents you may want to - * call the other set methods to control how the similarity queries are generated: - * - *

    - *
  • {@link #setMinTermFreq setMinTermFreq(...)} - *
  • {@link #setMinDocFreq setMinDocFreq(...)} - *
  • {@link #setMaxDocFreq setMaxDocFreq(...)} - *
  • {@link #setMaxDocFreqPct setMaxDocFreqPct(...)} - *
  • {@link #setMinWordLen setMinWordLen(...)} - *
  • {@link #setMaxWordLen setMaxWordLen(...)} - *
  • {@link #setMaxQueryTerms setMaxQueryTerms(...)} - *
  • {@link #setMaxNumTokensParsed setMaxNumTokensParsed(...)} - *
  • {@link #setStopWords setStopWord(...)} - *
- * - *
- *
- * - *
- * Changes: Mark Harwood 29/02/04
- * Some bugfixing, some refactoring, some optimisation.
- * - bugfix: retrieveTerms(long docNum) was not working for indexes without a termvector -added missing code
- * - bugfix: No significant terms being created for fields with a termvector - because
- * was only counting one occurrence per term/field pair in calculations(ie not including frequency info from TermVector)
- * - refactor: moved common code into isNoiseWord()
- * - optimise: when no termvector support available - used maxNumTermsParsed to limit amount of tokenization
- * 
- */ -@SuppressWarnings("unused") -public final class MultiMoreLikeThis { - - /** - * Default maximum number of tokens to parse in each example doc field that is not stored with - * TermVector support. - * - * @see #getMaxNumTokensParsed - */ - public static final long DEFAULT_MAX_NUM_TOKENS_PARSED = 5000; - - /** - * Ignore terms with less than this frequency in the source doc. - * - * @see #getMinTermFreq - * @see #setMinTermFreq - */ - public static final long DEFAULT_MIN_TERM_FREQ = 2; - - /** - * Ignore words which do not occur in at least this many docs. - * - * @see #getMinDocFreq - * @see #setMinDocFreq - */ - public static final long DEFAULT_MIN_DOC_FREQ = 5; - - /** - * Ignore words which occur in more than this many docs. - * - * @see #getMaxDocFreq - * @see #setMaxDocFreq - * @see #setMaxDocFreqPct - */ - public static final long DEFAULT_MAX_DOC_FREQ = java.lang.Long.MAX_VALUE; - - /** - * Boost terms in query based on score. - * - * @see #isBoost - * @see #setBoost - */ - public static final boolean DEFAULT_BOOST = false; - - /** - * Default field names. Null is used to specify that the field names should be looked up at - * runtime from the provided reader. - */ - public static final String[] DEFAULT_FIELD_NAMES = new String[] {"contents"}; - - /** - * Ignore words less than this length or if 0 then this has no effect. - * - * @see #getMinWordLen - * @see #setMinWordLen - */ - public static final long DEFAULT_MIN_WORD_LENGTH = 0; - - /** - * Ignore words greater than this length or if 0 then this has no effect. - * - * @see #getMaxWordLen - * @see #setMaxWordLen - */ - public static final long DEFAULT_MAX_WORD_LENGTH = 0; - - /** - * Default set of stopwords. If null means to allow stop words. - * - * @see #setStopWords - * @see #getStopWords - */ - public static final Set DEFAULT_STOP_WORDS = null; - - /** Current set of stop words. */ - private Set stopWords = DEFAULT_STOP_WORDS; - - /** - * Return a Query with no more than this many terms. - * - * @see IndexSearcher#getMaxClauseCount - * @see #getMaxQueryTerms - * @see #setMaxQueryTerms - */ - public static final long DEFAULT_MAX_QUERY_TERMS = 25; - - /** Analyzer that will be used to parse the doc. */ - private Analyzer analyzer = null; - - /** Ignore words less frequent that this. */ - private long minTermFreq = DEFAULT_MIN_TERM_FREQ; - - /** Ignore words which do not occur in at least this many docs. */ - private long minDocFreq = DEFAULT_MIN_DOC_FREQ; - - /** Ignore words which occur in more than this many docs. */ - private long maxDocFreq = DEFAULT_MAX_DOC_FREQ; - - /** Should we apply a boost to the Query based on the scores? */ - private boolean boost = DEFAULT_BOOST; - - /** Field name we'll analyze. */ - private String[] fieldNames = DEFAULT_FIELD_NAMES; - - /** - * The maximum number of tokens to parse in each example doc field that is not stored with - * TermVector support - */ - private long maxNumTokensParsed = DEFAULT_MAX_NUM_TOKENS_PARSED; - - /** Ignore words if less than this len. */ - private long minWordLen = DEFAULT_MIN_WORD_LENGTH; - - /** Ignore words if greater than this len. */ - private long maxWordLen = DEFAULT_MAX_WORD_LENGTH; - - /** Don't return a query longer than this. */ - private long maxQueryTerms = DEFAULT_MAX_QUERY_TERMS; - - /** For idf() calculations. */ - private TFIDFSimilarity similarity; // = new DefaultSimilarity(); - - /** IndexReader to use */ - private final BigCompositeReader ir; - - /** Boost factor to use when boosting the terms */ - private float boostFactor = 1; - - /** - * Returns the boost factor used when boosting terms - * - * @return the boost factor used when boosting terms - * @see #setBoostFactor(float) - */ - public float getBoostFactor() { - return boostFactor; - } - - /** - * Sets the boost factor to use when boosting terms - * - * @see #getBoostFactor() - */ - public void setBoostFactor(float boostFactor) { - this.boostFactor = boostFactor; - } - - /** Constructor requiring an IndexReader. */ - public MultiMoreLikeThis(BigCompositeReader ir) { - this(ir, new ClassicSimilarity()); - } - - public MultiMoreLikeThis(BigCompositeReader ir, TFIDFSimilarity sim) { - this.ir = ir; - this.similarity = sim; - } - - public TFIDFSimilarity getSimilarity() { - return similarity; - } - - public void setSimilarity(TFIDFSimilarity similarity) { - this.similarity = similarity; - } - - /** - * Returns an analyzer that will be used to parse source doc with. The default analyzer is not - * set. - * - * @return the analyzer that will be used to parse source doc with. - */ - public Analyzer getAnalyzer() { - return analyzer; - } - - /** - * Sets the analyzer to use. An analyzer is not required for generating a query with the {@link - * #like(long)} method, all other 'like' methods require an analyzer. - * - * @param analyzer the analyzer to use to tokenize text. - */ - public void setAnalyzer(Analyzer analyzer) { - this.analyzer = analyzer; - } - - /** - * Returns the frequency below which terms will be ignored in the source doc. The default - * frequency is the {@link #DEFAULT_MIN_TERM_FREQ}. - * - * @return the frequency below which terms will be ignored in the source doc. - */ - public long getMinTermFreq() { - return minTermFreq; - } - - /** - * Sets the frequency below which terms will be ignored in the source doc. - * - * @param minTermFreq the frequency below which terms will be ignored in the source doc. - */ - public void setMinTermFreq(long minTermFreq) { - this.minTermFreq = minTermFreq; - } - - /** - * Returns the frequency at which words will be ignored which do not occur in at least this many - * docs. The default frequency is {@link #DEFAULT_MIN_DOC_FREQ}. - * - * @return the frequency at which words will be ignored which do not occur in at least this many - * docs. - */ - public long getMinDocFreq() { - return minDocFreq; - } - - /** - * Sets the frequency at which words will be ignored which do not occur in at least this many - * docs. - * - * @param minDocFreq the frequency at which words will be ignored which do not occur in at least - * this many docs. - */ - public void setMinDocFreq(long minDocFreq) { - this.minDocFreq = minDocFreq; - } - - /** - * Returns the maximum frequency in which words may still appear. Words that appear in more than - * this many docs will be ignored. The default frequency is {@link #DEFAULT_MAX_DOC_FREQ}. - * - * @return get the maximum frequency at which words are still allowed, words which occur in more - * docs than this are ignored. - */ - public long getMaxDocFreq() { - return maxDocFreq; - } - - /** - * Set the maximum frequency in which words may still appear. Words that appear in more than this - * many docs will be ignored. - * - * @param maxFreq the maximum count of documents that a term may appear in to be still considered - * relevant - */ - public void setMaxDocFreq(long maxFreq) { - this.maxDocFreq = maxFreq; - } - - /** - * Set the maximum percentage in which words may still appear. Words that appear in more than this - * many percent of all docs will be ignored. - * - *

This method calls {@link #setMaxDocFreq(long)} internally (both conditions cannot be used at - * the same time). - * - * @param maxPercentage the maximum percentage of documents (0-100) that a term may appear in to - * be still considered relevant. - */ - public void setMaxDocFreqPct(long maxPercentage) { - setMaxDocFreq(maxPercentage * ir.maxDoc() / 100L); - } - - /** - * Returns whether to boost terms in query based on "score" or not. The default is {@link - * #DEFAULT_BOOST}. - * - * @return whether to boost terms in query based on "score" or not. - * @see #setBoost - */ - public boolean isBoost() { - return boost; - } - - /** - * Sets whether to boost terms in query based on "score" or not. - * - * @param boost true to boost terms in query based on "score", false otherwise. - * @see #isBoost - */ - public void setBoost(boolean boost) { - this.boost = boost; - } - - /** - * Returns the field names that will be used when generating the 'More Like This' query. The - * default field names that will be used is {@link #DEFAULT_FIELD_NAMES}. - * - * @return the field names that will be used when generating the 'More Like This' query. - */ - public String[] getFieldNames() { - return fieldNames; - } - - /** - * Sets the field names that will be used when generating the 'More Like This' query. Set this to - * null for the field names to be determined at runtime from the IndexReader provided in the - * constructor. - * - * @param fieldNames the field names that will be used when generating the 'More Like This' query. - */ - public void setFieldNames(String[] fieldNames) { - this.fieldNames = fieldNames; - } - - /** - * Returns the minimum word length below which words will be ignored. Set this to 0 for no minimum - * word length. The default is {@link #DEFAULT_MIN_WORD_LENGTH}. - * - * @return the minimum word length below which words will be ignored. - */ - public long getMinWordLen() { - return minWordLen; - } - - /** - * Sets the minimum word length below which words will be ignored. - * - * @param minWordLen the minimum word length below which words will be ignored. - */ - public void setMinWordLen(long minWordLen) { - this.minWordLen = minWordLen; - } - - /** - * Returns the maximum word length above which words will be ignored. Set this to 0 for no maximum - * word length. The default is {@link #DEFAULT_MAX_WORD_LENGTH}. - * - * @return the maximum word length above which words will be ignored. - */ - public long getMaxWordLen() { - return maxWordLen; - } - - /** - * Sets the maximum word length above which words will be ignored. - * - * @param maxWordLen the maximum word length above which words will be ignored. - */ - public void setMaxWordLen(long maxWordLen) { - this.maxWordLen = maxWordLen; - } - - /** - * Set the set of stopwords. Any word in this set is considered "uninteresting" and ignored. Even - * if your Analyzer allows stopwords, you might want to tell the MoreLikeThis code to ignore them, - * as for the purposes of document similarity it seems reasonable to assume that "a stop word is - * never interesting". - * - * @param stopWords set of stopwords, if null it means to allow stop words - * @see #getStopWords - */ - public void setStopWords(Set stopWords) { - this.stopWords = stopWords; - } - - /** - * Get the current stop words being used. - * - * @see #setStopWords - */ - public Set getStopWords() { - return stopWords; - } - - /** - * Returns the maximum number of query terms that will be included in any generated query. The - * default is {@link #DEFAULT_MAX_QUERY_TERMS}. - * - * @return the maximum number of query terms that will be included in any generated query. - */ - public long getMaxQueryTerms() { - return maxQueryTerms; - } - - /** - * Sets the maximum number of query terms that will be included in any generated query. - * - * @param maxQueryTerms the maximum number of query terms that will be included in any generated - * query. - */ - public void setMaxQueryTerms(long maxQueryTerms) { - this.maxQueryTerms = maxQueryTerms; - } - - /** - * @return The maximum number of tokens to parse in each example doc field that is not stored with - * TermVector support - * @see #DEFAULT_MAX_NUM_TOKENS_PARSED - */ - public long getMaxNumTokensParsed() { - return maxNumTokensParsed; - } - - /** - * @param i The maximum number of tokens to parse in each example doc field that is not stored - * with TermVector support - */ - public void setMaxNumTokensParsed(long i) { - maxNumTokensParsed = i; - } - - /** - * Return a query that will return docs like the passed lucene document ID. - * - * @param docNum the documentID of the lucene doc to generate the 'More Like This" query for. - * @return a query that will return docs like the passed lucene document ID. - */ - public Query like(long docNum) throws IOException { - if (fieldNames == null) { - // gather list of valid fields from lucene - Collection fields; - fields = BigCompositeReader.getIndexedFields(ir); - fieldNames = fields.toArray(String[]::new); - } - - return createQuery(retrieveTerms(docNum)); - } - - /** - * @param filteredDocument Document with field values extracted for selected fields. - * @return More Like This query for the passed document. - */ - public Query like(Map> filteredDocument) throws IOException { - if (fieldNames == null) { - // gather list of valid fields from lucene - Collection fields = BigCompositeReader.getIndexedFields(ir); - fieldNames = fields.toArray(String[]::new); - } - return createQuery(retrieveTerms(filteredDocument)); - } - - /** - * Return a query that will return docs like the passed Readers. This was added in order to treat - * multi-value fields. - * - * @return a query that will return docs like the passed Readers. - */ - public Query like(String fieldName, Reader... readers) throws IOException { - Map> perFieldTermFrequencies = new HashMap<>(); - for (Reader r : readers) { - addTermFrequencies(r, perFieldTermFrequencies, fieldName); - } - return createQuery(createQueue(perFieldTermFrequencies)); - } - - /** Create the More like query from a PriorityQueue */ - private Query createQuery(PriorityQueue q) { - BooleanQuery.Builder query = new BooleanQuery.Builder(); - ScoreTerm scoreTerm; - float bestScore = -1; - - while ((scoreTerm = q.pop()) != null) { - Query tq = new TermQuery(new Term(scoreTerm.topField, scoreTerm.word)); - - if (boost) { - if (bestScore == -1) { - bestScore = (scoreTerm.score); - } - float myScore = (scoreTerm.score); - tq = new BoostQuery(tq, boostFactor * myScore / bestScore); - } - - try { - query.add(tq, BooleanClause.Occur.SHOULD); - } catch ( - @SuppressWarnings("unused") - IndexSearcher.TooManyClauses ignore) { - break; - } - } - return query.build(); - } - - /** - * Create a PriorityQueue from a word->tf map. - * - * @param perFieldTermFrequencies a per field map of words keyed on the word(String) with Int - * objects as the values. - */ - private PriorityQueue createQueue( - Map> perFieldTermFrequencies) throws IOException { - // have collected all words in doc and their freqs - final long limit = Math.min(maxQueryTerms, this.getTermsCount(perFieldTermFrequencies)); - FreqQ queue = new FreqQ(Math.toIntExact(limit)); // will order words by score - for (Map.Entry> entry : perFieldTermFrequencies.entrySet()) { - Map perWordTermFrequencies = entry.getValue(); - String fieldName = entry.getKey(); - - long numDocs = ir.getDocCount(fieldName); - if (numDocs == -1) { - numDocs = ir.numDocs(); - } - - for (Map.Entry tfEntry : perWordTermFrequencies.entrySet()) { // for every word - String word = tfEntry.getKey(); - long tf = tfEntry.getValue().x; // term freq in the source doc - if (minTermFreq > 0 && tf < minTermFreq) { - continue; // filter out words that don't occur enough times in the source - } - - var fieldTerm = new Term(fieldName, word); - long docFreq = ir.docFreq(fieldTerm); - - if (minDocFreq > 0L && docFreq < minDocFreq) { - continue; // filter out words that don't occur in enough docs - } - - if (docFreq > maxDocFreq) { - continue; // filter out words that occur in too many docs - } - - if (docFreq == 0) { - continue; // index update problem? - } - - float idf = similarity.idf(docFreq, numDocs); - float score = tf * idf; - - if (queue.size() < limit) { - // there is still space in the queue - queue.add(new ScoreTerm(word, fieldName, score)); - } else { - ScoreTerm term = queue.top(); - // update the smallest in the queue in place and update the queue. - if (term.score < score) { - term.update(word, fieldName, score); - queue.updateTop(); - } - } - } - } - return queue; - } - - private long getTermsCount(Map> perFieldTermFrequencies) { - long totalTermsCount = 0; - Collection> values = perFieldTermFrequencies.values(); - for (Map perWordTermFrequencies : values) { - totalTermsCount += perWordTermFrequencies.size(); - } - return totalTermsCount; - } - - /** Describe the parameters that control how the "more like this" query is formed. */ - public String describeParams() { - StringBuilder sb = new StringBuilder(); - sb.append("\t").append("maxQueryTerms : ").append(maxQueryTerms).append("\n"); - sb.append("\t").append("minWordLen : ").append(minWordLen).append("\n"); - sb.append("\t").append("maxWordLen : ").append(maxWordLen).append("\n"); - sb.append("\t").append("fieldNames : "); - String delim = ""; - for (String fieldName : fieldNames) { - sb.append(delim).append(fieldName); - delim = ", "; - } - sb.append("\n"); - sb.append("\t").append("boost : ").append(boost).append("\n"); - sb.append("\t").append("minTermFreq : ").append(minTermFreq).append("\n"); - sb.append("\t").append("minDocFreq : ").append(minDocFreq).append("\n"); - return sb.toString(); - } - - /** - * Find words for a more-like-this query former. - * - * @param docNum the id of the lucene document from which to find terms - */ - private PriorityQueue retrieveTerms(long docNum) throws IOException { - Map> field2termFreqMap = new HashMap<>(); - retrieveTermsOfIndexReader(ir, docNum, field2termFreqMap); - - return createQueue(field2termFreqMap); - } - - private void retrieveTermsOfIndexReader(BigCompositeReader ir, long docNum, Map> field2termFreqMap) - throws IOException { - for (String fieldName : fieldNames) { - final Fields vectors = ir.getTermVectors(docNum); - final Terms vector; - if (vectors != null) { - vector = vectors.terms(fieldName); - } else { - vector = null; - } - - // field does not store term vector info - if (vector == null) { - Document d = ir.document(docNum); - IndexableField[] fields = d.getFields(fieldName); - for (IndexableField field : fields) { - final String stringValue = field.stringValue(); - if (stringValue != null) { - addTermFrequencies(new StringReader(stringValue), field2termFreqMap, fieldName); - } - } - } else { - addTermFrequencies(field2termFreqMap, vector, fieldName); - } - } - } - - private PriorityQueue retrieveTerms(Map> field2fieldValues) - throws IOException { - Map> field2termFreqMap = new HashMap<>(); - for (String fieldName : fieldNames) { - Collection fieldValues = field2fieldValues.get(fieldName); - if (fieldValues == null) { - continue; - } - for (Object fieldValue : fieldValues) { - if (fieldValue != null) { - addTermFrequencies( - new StringReader(String.valueOf(fieldValue)), field2termFreqMap, fieldName); - } - } - } - return createQueue(field2termFreqMap); - } - /** - * Adds terms and frequencies found in vector into the Map termFreqMap - * - * @param field2termFreqMap a Map of terms and their frequencies per field - * @param vector List of terms and their frequencies for a doc/field - */ - private void addTermFrequencies( - Map> field2termFreqMap, Terms vector, String fieldName) - throws IOException { - Map termFreqMap = - field2termFreqMap.computeIfAbsent(fieldName, k -> new HashMap<>()); - final TermsEnum termsEnum = vector.iterator(); - final CharsRefBuilder spare = new CharsRefBuilder(); - BytesRef text; - while ((text = termsEnum.next()) != null) { - spare.copyUTF8Bytes(text); - final String term = spare.toString(); - if (isNoiseWord(term)) { - continue; - } - final long freq = termsEnum.totalTermFreq(); - - // increment frequency - Long cnt = termFreqMap.get(term); - if (cnt == null) { - cnt = new Long(); - termFreqMap.put(term, cnt); - cnt.x = freq; - } else { - cnt.x += freq; - } - } - } - - /** - * Adds term frequencies found by tokenizing text from reader into the Map words - * - * @param r a source of text to be tokenized - * @param perFieldTermFrequencies a Map of terms and their frequencies per field - * @param fieldName Used by analyzer for any special per-field analysis - */ - private void addTermFrequencies( - Reader r, Map> perFieldTermFrequencies, String fieldName) - throws IOException { - if (analyzer == null) { - throw new UnsupportedOperationException( - "To use MoreLikeThis without " + "term vectors, you must provide an Analyzer"); - } - Map termFreqMap = - perFieldTermFrequencies.computeIfAbsent(fieldName, k -> new HashMap<>()); - try (TokenStream ts = analyzer.tokenStream(fieldName, r)) { - long tokenCount = 0; - // for every token - CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); - TermFrequencyAttribute tfAtt = ts.addAttribute(TermFrequencyAttribute.class); - ts.reset(); - while (ts.incrementToken()) { - String word = termAtt.toString(); - tokenCount++; - if (tokenCount > maxNumTokensParsed) { - break; - } - if (isNoiseWord(word)) { - continue; - } - - // increment frequency - Long cnt = termFreqMap.get(word); - if (cnt == null) { - termFreqMap.put(word, new Long(tfAtt.getTermFrequency())); - } else { - cnt.x += tfAtt.getTermFrequency(); - } - } - ts.end(); - } - } - - /** - * determines if the passed term is likely to be of interest in "more like" comparisons - * - * @param term The word being considered - * @return true if should be ignored, false if should be used in further analysis - */ - private boolean isNoiseWord(String term) { - long len = term.length(); - if (minWordLen > 0 && len < minWordLen) { - return true; - } - if (maxWordLen > 0 && len > maxWordLen) { - return true; - } - return stopWords != null && stopWords.contains(term); - } - - /** - * Find words for a more-like-this query former. The result is a priority queue of arrays with one - * entry for every word in the document. Each array has 6 elements. The elements are: - * - *

    - *
  1. The word (String) - *
  2. The top field that this word comes from (String) - *
  3. The score for this word (Float) - *
  4. The IDF value (Float) - *
  5. The frequency of this word in the index (Integer) - *
  6. The frequency of this word in the source document (Integer) - *
- * - * This is a somewhat "advanced" routine, and in general only the 1st entry in the array is of - * interest. This method is exposed so that you can identify the "interesting words" in a - * document. For an easier method to call see {@link #retrieveInterestingTerms - * retrieveInterestingTerms()}. - * - * @param r the reader that has the content of the document - * @param fieldName field passed to the analyzer to use when analyzing the content - * @return the most interesting words in the document ordered by score, with the highest scoring, - * or best entry, first - * @see #retrieveInterestingTerms - */ - private PriorityQueue retrieveTerms(Reader r, String fieldName) throws IOException { - Map> field2termFreqMap = new HashMap<>(); - addTermFrequencies(r, field2termFreqMap, fieldName); - return createQueue(field2termFreqMap); - } - - /** @see #retrieveInterestingTerms(java.io.Reader, String) */ - public String[] retrieveInterestingTerms(long docNum) throws IOException { - ArrayList al = new ArrayList<>(Math.toIntExact(maxQueryTerms)); - PriorityQueue pq = retrieveTerms(docNum); - ScoreTerm scoreTerm; - // have to be careful, retrieveTerms returns all words but that's probably not useful to our - // caller... - long lim = maxQueryTerms; - // we just want to return the top words - while (((scoreTerm = pq.pop()) != null) && lim-- > 0) { - al.add(scoreTerm.word); // the 1st entry is the interesting word - } - String[] res = new String[al.size()]; - return al.toArray(res); - } - - /** - * Convenience routine to make it easy to return the most interesting words in a document. More - * advanced users will call {@link #retrieveTerms(Reader, String) retrieveTerms()} directly. - * - * @param r the source document - * @param fieldName field passed to analyzer to use when analyzing the content - * @return the most interesting words in the document - * @see #retrieveTerms(java.io.Reader, String) - * @see #setMaxQueryTerms - */ - public String[] retrieveInterestingTerms(Reader r, String fieldName) throws IOException { - ArrayList al = new ArrayList<>(Math.toIntExact(maxQueryTerms)); - PriorityQueue pq = retrieveTerms(r, fieldName); - ScoreTerm scoreTerm; - // have to be careful, retrieveTerms returns all words but that's probably not useful to our - // caller... - long lim = maxQueryTerms; - // we just want to return the top words - while (((scoreTerm = pq.pop()) != null) && lim-- > 0) { - al.add(scoreTerm.word); // the 1st entry is the interesting word - } - String[] res = new String[al.size()]; - return al.toArray(res); - } - - /** PriorityQueue that orders words by score. */ - private static class FreqQ extends PriorityQueue { - FreqQ(int maxSize) { - super(maxSize); - } - - @Override - protected boolean lessThan(ScoreTerm a, ScoreTerm b) { - return a.score < b.score; - } - } - - private static class ScoreTerm { - // only really need 1st 3 entries, other ones are for troubleshooting - String word; - String topField; - float score; - - ScoreTerm(String word, String topField, float score) { - this.word = word; - this.topField = topField; - this.score = score; - } - - void update(String word, String topField, float score) { - this.word = word; - this.topField = topField; - this.score = score; - } - } - - /** Use for frequencies and to avoid renewing Integers. */ - private static class Long { - long x; - - Long() { - this(1L); - } - - Long(long initialValue) { - x = initialValue; - } - } -} \ No newline at end of file diff --git a/src/main/java/it/cavallium/dbengine/lucene/searcher/AdaptiveLocalSearcher.java b/src/main/java/it/cavallium/dbengine/lucene/searcher/AdaptiveLocalSearcher.java deleted file mode 100644 index 916cce0..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/searcher/AdaptiveLocalSearcher.java +++ /dev/null @@ -1,85 +0,0 @@ -package it.cavallium.dbengine.lucene.searcher; - -import static it.cavallium.dbengine.lucene.searcher.GlobalQueryRewrite.NO_REWRITE; - -import it.cavallium.dbengine.database.LLKeyScore; -import it.cavallium.dbengine.database.disk.LLIndexSearcher; -import it.cavallium.dbengine.lucene.LuceneUtils; -import java.io.IOException; -import java.util.function.Function; -import java.util.stream.Stream; -import org.jetbrains.annotations.Nullable; - -public class AdaptiveLocalSearcher implements LocalSearcher { - - private static final StandardSearcher standardSearcher = new StandardSearcher(); - - private static final LocalSearcher scoredPaged = new PagedLocalSearcher(); - - private static final LocalSearcher countSearcher = new CountMultiSearcher(); - - private static final MultiSearcher unsortedUnscoredContinuous = new UnsortedStreamingMultiSearcher(); - - /** - * Use in-memory collectors if the expected results count is lower or equal than this limit - */ - private final int maxInMemoryResultEntries; - - public AdaptiveLocalSearcher(int maxInMemoryResultEntries) { - this.maxInMemoryResultEntries = maxInMemoryResultEntries; - } - - @Override - public LuceneSearchResult collect(LLIndexSearcher indexSearcher, - LocalQueryParams queryParams, - @Nullable String keyFieldName, - GlobalQueryRewrite transformer, - Function, Stream> filterer) { - if (transformer != NO_REWRITE) { - return LuceneUtils.rewrite(this, indexSearcher, queryParams, keyFieldName, transformer, filterer); - } - return transformedCollect(indexSearcher, queryParams, keyFieldName, transformer, filterer); - } - - @Override - public String toString() { - return "adaptivelocal"; - } - - // Remember to change also AdaptiveMultiSearcher - public LuceneSearchResult transformedCollect(LLIndexSearcher indexSearcher, - LocalQueryParams queryParams, - String keyFieldName, - GlobalQueryRewrite transformer, - Function, Stream> filterer) { - // offset + limit - long realLimit = queryParams.offsetLong() + queryParams.limitLong(); - long maxAllowedInMemoryLimit - = Math.max(maxInMemoryResultEntries, (long) queryParams.pageLimits().getPageLimit(0)); - - if (queryParams.limitLong() == 0) { - return countSearcher.collect(indexSearcher, queryParams, keyFieldName, transformer, filterer); - } else if (realLimit <= maxInMemoryResultEntries) { - return standardSearcher.collect(indexSearcher, queryParams, keyFieldName, transformer, filterer); - } else if (queryParams.isSorted()) { - if (realLimit <= maxAllowedInMemoryLimit) { - return scoredPaged.collect(indexSearcher, queryParams, keyFieldName, transformer, filterer); - } else { - if (queryParams.isSortedByScore()) { - if (queryParams.limitLong() < maxInMemoryResultEntries) { - throw new UnsupportedOperationException("Allowed limit is " + maxInMemoryResultEntries + " or greater"); - } - return scoredPaged.collect(indexSearcher, queryParams, keyFieldName, transformer, filterer); - } else { - if (queryParams.limitLong() < maxInMemoryResultEntries) { - throw new UnsupportedOperationException("Allowed limit is " + maxInMemoryResultEntries + " or greater"); - } - return scoredPaged.collect(indexSearcher, queryParams, keyFieldName, transformer, filterer); - } - } - } else { - // Run large/unbounded searches using the continuous multi searcher - return unsortedUnscoredContinuous.collect(indexSearcher, queryParams, keyFieldName, transformer, filterer); - } - } -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/searcher/AdaptiveMultiSearcher.java b/src/main/java/it/cavallium/dbengine/lucene/searcher/AdaptiveMultiSearcher.java deleted file mode 100644 index da41025..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/searcher/AdaptiveMultiSearcher.java +++ /dev/null @@ -1,84 +0,0 @@ -package it.cavallium.dbengine.lucene.searcher; - -import static it.cavallium.dbengine.lucene.searcher.GlobalQueryRewrite.NO_REWRITE; - -import it.cavallium.dbengine.database.LLKeyScore; -import it.cavallium.dbengine.database.disk.LLIndexSearchers; -import it.cavallium.dbengine.lucene.LuceneUtils; -import java.util.function.Function; -import java.util.stream.Stream; -import org.jetbrains.annotations.Nullable; - -public class AdaptiveMultiSearcher implements MultiSearcher { - - private static final StandardSearcher standardSearcher = new StandardSearcher(); - - private static final MultiSearcher count = new CountMultiSearcher(); - - private static final MultiSearcher scoredPaged = new ScoredPagedMultiSearcher(); - - private static final MultiSearcher unsortedUnscoredContinuous = new UnsortedStreamingMultiSearcher(); - - /** - * Use in-memory collectors if the expected results count is lower or equal than this limit - */ - private final int maxInMemoryResultEntries; - - public AdaptiveMultiSearcher(int maxInMemoryResultEntries) { - this.maxInMemoryResultEntries = maxInMemoryResultEntries; - } - - @Override - public LuceneSearchResult collectMulti(LLIndexSearchers indexSearchers, - LocalQueryParams queryParams, - @Nullable String keyFieldName, - GlobalQueryRewrite transformer, - Function, Stream> filterer) { - if (transformer != NO_REWRITE) { - return LuceneUtils.rewriteMulti(this, indexSearchers, queryParams, keyFieldName, transformer, filterer); - } - return transformedCollectMulti(indexSearchers, queryParams, keyFieldName, transformer, filterer); - } - - // Remember to change also AdaptiveLocalSearcher - public LuceneSearchResult transformedCollectMulti(LLIndexSearchers indexSearchers, - LocalQueryParams queryParams, - @Nullable String keyFieldName, - GlobalQueryRewrite transformer, - Function, Stream> filterer) { - // offset + limit - long realLimit = queryParams.offsetLong() + queryParams.limitLong(); - long maxAllowedInMemoryLimit - = Math.max(maxInMemoryResultEntries, (long) queryParams.pageLimits().getPageLimit(0)); - - if (queryParams.limitLong() == 0) { - return count.collectMulti(indexSearchers, queryParams, keyFieldName, transformer, filterer); - } else if (realLimit <= maxInMemoryResultEntries) { - return standardSearcher.collectMulti(indexSearchers, queryParams, keyFieldName, transformer, filterer); - } else if (queryParams.isSorted()) { - if (realLimit <= maxAllowedInMemoryLimit) { - return scoredPaged.collectMulti(indexSearchers, queryParams, keyFieldName, transformer, filterer); - } else { - if (queryParams.isSortedByScore()) { - if (queryParams.limitLong() < maxInMemoryResultEntries) { - throw new UnsupportedOperationException("Allowed limit is " + maxInMemoryResultEntries + " or greater"); - } - return scoredPaged.collectMulti(indexSearchers, queryParams, keyFieldName, transformer, filterer); - } else { - if (queryParams.limitLong() < maxInMemoryResultEntries) { - throw new UnsupportedOperationException("Allowed limit is " + maxInMemoryResultEntries + " or greater"); - } - return scoredPaged.collectMulti(indexSearchers, queryParams, keyFieldName, transformer, filterer); - } - } - } else { - // Run large/unbounded searches using the continuous multi searcher - return unsortedUnscoredContinuous.collectMulti(indexSearchers, queryParams, keyFieldName, transformer, filterer); - } - } - - @Override - public String toString() { - return "adaptive local"; - } -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/searcher/BucketParams.java b/src/main/java/it/cavallium/dbengine/lucene/searcher/BucketParams.java deleted file mode 100644 index 717410e..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/searcher/BucketParams.java +++ /dev/null @@ -1,9 +0,0 @@ -package it.cavallium.dbengine.lucene.searcher; - -import it.cavallium.dbengine.lucene.collector.BucketValueSource; -import org.jetbrains.annotations.NotNull; -import org.jetbrains.annotations.Nullable; - -public record BucketParams(double min, double max, int buckets, String bucketFieldName, - @NotNull BucketValueSource valueSource, @Nullable Integer collectionRate, - @Nullable Integer sampleSize) {} diff --git a/src/main/java/it/cavallium/dbengine/lucene/searcher/CountMultiSearcher.java b/src/main/java/it/cavallium/dbengine/lucene/searcher/CountMultiSearcher.java deleted file mode 100644 index 47a1e40..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/searcher/CountMultiSearcher.java +++ /dev/null @@ -1,82 +0,0 @@ -package it.cavallium.dbengine.lucene.searcher; - -import static it.cavallium.dbengine.database.LLUtils.mapList; - -import it.cavallium.dbengine.client.query.current.data.TotalHitsCount; -import it.cavallium.dbengine.database.LLKeyScore; -import it.cavallium.dbengine.database.LLUtils; -import it.cavallium.dbengine.database.disk.LLIndexSearcher; -import it.cavallium.dbengine.database.disk.LLIndexSearchers; -import it.cavallium.dbengine.lucene.LuceneUtils; -import it.cavallium.dbengine.utils.DBException; -import java.io.IOException; -import java.util.List; -import java.util.function.Function; -import java.util.stream.Stream; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import org.apache.lucene.index.QueryTimeout; -import org.apache.lucene.index.QueryTimeoutImpl; -import org.jetbrains.annotations.Nullable; - -public class CountMultiSearcher implements MultiSearcher { - - protected static final Logger LOG = LogManager.getLogger(CountMultiSearcher.class); - - @Override - public LuceneSearchResult collectMulti(LLIndexSearchers indexSearchers, - LocalQueryParams queryParams, - @Nullable String keyFieldName, - GlobalQueryRewrite transformer, - Function, Stream> filterer) { - if (transformer != GlobalQueryRewrite.NO_REWRITE) { - return LuceneUtils.rewriteMulti(this, indexSearchers, queryParams, keyFieldName, transformer, filterer); - } - if (queryParams.isSorted() && queryParams.limitLong() > 0) { - throw new UnsupportedOperationException( - "Sorted queries are not supported by SimpleUnsortedUnscoredLuceneMultiSearcher"); - } - if (queryParams.needsScores() && queryParams.limitLong() > 0) { - throw new UnsupportedOperationException( - "Scored queries are not supported by SimpleUnsortedUnscoredLuceneMultiSearcher"); - } - - var results = mapList(indexSearchers.llShards(), - searcher -> this.collect(searcher, queryParams, keyFieldName, transformer, f -> filterer.apply(f).limit(0)) - ); - boolean exactTotalHitsCount = true; - long totalHitsCountValue = 0; - for (LuceneSearchResult result : results) { - exactTotalHitsCount &= result.totalHitsCount().exact(); - totalHitsCountValue += result.totalHitsCount().value(); - } - - var totalHitsCount = new TotalHitsCount(totalHitsCountValue, exactTotalHitsCount); - - return new LuceneSearchResult(totalHitsCount, List.of()); - } - - @Override - public LuceneSearchResult collect(LLIndexSearcher indexSearcher, - LocalQueryParams queryParams, - @Nullable String keyFieldName, - GlobalQueryRewrite transformer, - Function, Stream> filterer) { - if (transformer != GlobalQueryRewrite.NO_REWRITE) { - return LuceneUtils.rewrite(this, indexSearcher, queryParams, keyFieldName, transformer, filterer); - } - try { - var is = indexSearcher.getIndexSearcher(); - is.setTimeout(new QueryTimeoutImpl(queryParams.timeout().toMillis())); - var count = is.count(queryParams.query()); - return new LuceneSearchResult(TotalHitsCount.of(count, true), List.of()); - } catch (IOException e) { - throw new DBException(e); - } - } - - @Override - public String toString() { - return "count"; - } -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/searcher/CurrentPageInfo.java b/src/main/java/it/cavallium/dbengine/lucene/searcher/CurrentPageInfo.java deleted file mode 100644 index dc991f3..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/searcher/CurrentPageInfo.java +++ /dev/null @@ -1,13 +0,0 @@ -package it.cavallium.dbengine.lucene.searcher; - -import java.util.Comparator; -import org.apache.lucene.search.ScoreDoc; -import org.jetbrains.annotations.Nullable; - -public record CurrentPageInfo(@Nullable ScoreDoc last, long remainingLimit, int pageIndex) { - - public static final Comparator TIE_BREAKER = Comparator - .comparingInt((d) -> d.shardIndex) - .thenComparingInt(d -> -d.doc); - public static final CurrentPageInfo EMPTY_STATUS = new CurrentPageInfo(null, 0, 0); -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/searcher/DecimalBucketMultiSearcher.java b/src/main/java/it/cavallium/dbengine/lucene/searcher/DecimalBucketMultiSearcher.java deleted file mode 100644 index e01d6e5..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/searcher/DecimalBucketMultiSearcher.java +++ /dev/null @@ -1,59 +0,0 @@ -package it.cavallium.dbengine.lucene.searcher; - -import static it.cavallium.dbengine.utils.StreamUtils.LUCENE_POOL; -import static it.cavallium.dbengine.utils.StreamUtils.collectOn; -import static it.cavallium.dbengine.utils.StreamUtils.fastListing; - -import it.cavallium.dbengine.database.disk.LLIndexSearchers; -import it.cavallium.dbengine.lucene.collector.Buckets; -import it.cavallium.dbengine.lucene.collector.DecimalBucketMultiCollectorManager; -import it.cavallium.dbengine.utils.DBException; -import java.io.IOException; -import java.util.Collection; -import java.util.List; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; -import org.jetbrains.annotations.NotNull; -import org.jetbrains.annotations.Nullable; - -public class DecimalBucketMultiSearcher { - - protected static final Logger logger = LogManager.getLogger(DecimalBucketMultiSearcher.class); - - public Buckets collectMulti(LLIndexSearchers indexSearchers, - BucketParams bucketParams, - @NotNull List queries, - @Nullable Query normalizationQuery) { - try { - // Search results - return this.search(indexSearchers.shards(), bucketParams, queries, normalizationQuery); - } finally { - indexSearchers.close(); - } - } - - private Buckets search(Collection indexSearchers, - BucketParams bucketParams, - @NotNull List queries, - @Nullable Query normalizationQuery) { - var cmm = new DecimalBucketMultiCollectorManager(bucketParams.min(), - bucketParams.max(), - bucketParams.buckets(), - bucketParams.bucketFieldName(), - bucketParams.valueSource(), - queries, - normalizationQuery, - bucketParams.collectionRate(), - bucketParams.sampleSize() - ); - return cmm.reduce(collectOn(LUCENE_POOL, indexSearchers.stream().map(indexSearcher -> { - try { - return cmm.search(indexSearcher); - } catch (IOException e) { - throw new DBException(e); - } - }), fastListing())); - } -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/searcher/FirstPageResults.java b/src/main/java/it/cavallium/dbengine/lucene/searcher/FirstPageResults.java deleted file mode 100644 index 4c10ce3..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/searcher/FirstPageResults.java +++ /dev/null @@ -1,8 +0,0 @@ -package it.cavallium.dbengine.lucene.searcher; - -import it.cavallium.dbengine.client.query.current.data.TotalHitsCount; -import it.cavallium.dbengine.database.LLKeyScore; -import java.util.stream.Stream; - -record FirstPageResults(TotalHitsCount totalHitsCount, Stream firstPageHitsStream, - CurrentPageInfo nextPageInfo) {} diff --git a/src/main/java/it/cavallium/dbengine/lucene/searcher/GlobalQueryRewrite.java b/src/main/java/it/cavallium/dbengine/lucene/searcher/GlobalQueryRewrite.java deleted file mode 100644 index 1fba95a..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/searcher/GlobalQueryRewrite.java +++ /dev/null @@ -1,11 +0,0 @@ -package it.cavallium.dbengine.lucene.searcher; - -import it.cavallium.dbengine.database.disk.LLIndexSearchers; -import java.io.IOException; - -public interface GlobalQueryRewrite { - - GlobalQueryRewrite NO_REWRITE = (indexSearchers, queryParamsMono) -> queryParamsMono; - - LocalQueryParams rewrite(LLIndexSearchers indexSearchers, LocalQueryParams localQueryParams); -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/searcher/LocalQueryParams.java b/src/main/java/it/cavallium/dbengine/lucene/searcher/LocalQueryParams.java deleted file mode 100644 index 255b0c5..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/searcher/LocalQueryParams.java +++ /dev/null @@ -1,104 +0,0 @@ -package it.cavallium.dbengine.lucene.searcher; - -import static it.cavallium.dbengine.lucene.LuceneUtils.safeLongToInt; - -import it.cavallium.dbengine.lucene.LuceneUtils; -import it.cavallium.dbengine.lucene.PageLimits; -import java.time.Duration; -import java.util.Objects; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.Sort; -import org.jetbrains.annotations.NotNull; -import org.jetbrains.annotations.Nullable; - -public record LocalQueryParams(@NotNull Query query, int offsetInt, long offsetLong, int limitInt, long limitLong, - @NotNull PageLimits pageLimits, @Nullable Sort sort, - @Nullable Boolean computePreciseHitsCount, Duration timeout) { - - /** - * Sorted params with long offsets - */ - public LocalQueryParams(@NotNull Query query, - long offsetLong, - long limitLong, - @NotNull PageLimits pageLimits, - @Nullable Sort sort, - @Nullable Boolean computePreciseHitsCount, - Duration timeout) { - this(query, - safeLongToInt(offsetLong), - offsetLong, - safeLongToInt(limitLong), - limitLong, - pageLimits, - sort, - computePreciseHitsCount, - timeout - ); - } - - /** - * Sorted params with int offsets - */ - public LocalQueryParams(@NotNull Query query, - int offsetInt, - int limitInt, - @NotNull PageLimits pageLimits, - @Nullable Sort sort, - boolean computePreciseHitsCount, - Duration timeout) { - this(query, offsetInt, offsetInt, limitInt, limitInt, pageLimits, sort, computePreciseHitsCount, timeout); - } - - /** - * Unsorted params with int offsets - */ - public LocalQueryParams(@NotNull Query query, - int offsetInt, - int limitInt, - @NotNull PageLimits pageLimits, - Duration timeout) { - this(query, offsetInt, offsetInt, limitInt, limitInt, pageLimits, null, null, timeout); - } - - - /** - * Unsorted params with long offsets - */ - public LocalQueryParams(@NotNull Query query, - long offsetLong, - long limitLong, - @NotNull PageLimits pageLimits, - Duration timeout) { - this(query, - safeLongToInt(offsetLong), - offsetLong, - safeLongToInt(limitLong), - limitLong, - pageLimits, - null, - null, - timeout - ); - } - - public boolean isSorted() { - return sort != null; - } - - public boolean isSortedByScore() { - return Objects.equals(sort, Sort.RELEVANCE); - } - - public boolean needsScores() { - return sort != null && sort.needsScores(); - } - - public int getTotalHitsThresholdInt() { - return LuceneUtils.totalHitsThreshold(this.computePreciseHitsCount); - } - - public long getTotalHitsThresholdLong() { - return LuceneUtils.totalHitsThresholdLong(this.computePreciseHitsCount); - } -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/searcher/LocalSearcher.java b/src/main/java/it/cavallium/dbengine/lucene/searcher/LocalSearcher.java deleted file mode 100644 index 0af0d6b..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/searcher/LocalSearcher.java +++ /dev/null @@ -1,30 +0,0 @@ -package it.cavallium.dbengine.lucene.searcher; - -import it.cavallium.dbengine.database.LLKeyScore; -import it.cavallium.dbengine.database.disk.LLIndexSearcher; -import java.util.function.Function; -import java.util.stream.Stream; -import org.jetbrains.annotations.Nullable; - -public interface LocalSearcher { - - /** - * @param indexSearcher Lucene index searcher - * @param queryParams the query parameters - * @param keyFieldName the name of the key field - * @param transformer the search query transformer - * @param filterer the search result filterer - */ - LuceneSearchResult collect(LLIndexSearcher indexSearcher, - LocalQueryParams queryParams, - @Nullable String keyFieldName, - GlobalQueryRewrite transformer, - Function, Stream> filterer); - - /** - * Get the name of this searcher type - * @return searcher type name - */ - @Override - String toString(); -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/searcher/LuceneGenerator.java b/src/main/java/it/cavallium/dbengine/lucene/searcher/LuceneGenerator.java deleted file mode 100644 index 9eb7f26..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/searcher/LuceneGenerator.java +++ /dev/null @@ -1,148 +0,0 @@ -package it.cavallium.dbengine.lucene.searcher; - -import static it.cavallium.dbengine.utils.StreamUtils.streamWhileNonNull; - -import java.io.IOException; -import it.cavallium.dbengine.utils.DBException; -import java.util.Iterator; -import java.util.List; -import java.util.Objects; -import java.util.function.Supplier; -import java.util.stream.Stream; -import org.apache.lucene.index.LeafReader; -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.ScoreDoc; -import org.apache.lucene.search.ScoreMode; -import org.apache.lucene.search.Scorer; -import org.apache.lucene.search.Weight; -import org.apache.lucene.util.Bits; -import org.jetbrains.annotations.Nullable; - -public class LuceneGenerator implements Supplier { - - private final IndexSearcher shard; - private final int shardIndex; - private final Query query; - private final Iterator leavesIterator; - private final boolean computeScores; - - private long remainingOffset; - private long remainingAllowedResults; - private Weight weight; - - private LeafReaderContext leaf; - private DocIdSetIterator docIdSetIterator; - private Scorer scorer; - - LuceneGenerator(IndexSearcher shard, LocalQueryParams localQueryParams, int shardIndex) { - this.shard = shard; - this.shardIndex = shardIndex; - this.query = localQueryParams.query(); - this.remainingOffset = localQueryParams.offsetLong(); - this.remainingAllowedResults = localQueryParams.limitLong(); - this.computeScores = localQueryParams.needsScores(); - List leaves = shard.getTopReaderContext().leaves(); - this.leavesIterator = leaves.iterator(); - } - - public static Stream reactive(IndexSearcher shard, LocalQueryParams localQueryParams, int shardIndex) { - if (localQueryParams.sort() != null) { - throw new IllegalArgumentException("Sorting is not allowed"); - } - var lg = new LuceneGenerator(shard, localQueryParams, shardIndex); - return streamWhileNonNull(lg); - } - - @Override - public ScoreDoc get() { - while (remainingOffset > 0) { - skipNext(); - } - if (remainingAllowedResults == 0) { - return null; - } else { - remainingAllowedResults--; - } - return getNext(); - } - - public void skipNext() { - getNext(); - remainingOffset--; - } - - private Weight createWeight() throws IOException { - ScoreMode scoreMode = computeScores ? ScoreMode.COMPLETE : ScoreMode.COMPLETE_NO_SCORES; - return shard.createWeight(shard.rewrite(query), scoreMode, 1f); - } - - public ScoreDoc getNext() { - if (weight == null) { - try { - weight = createWeight(); - } catch (IOException e) { - throw new DBException(e); - } - } - - try { - return getWeightedNext(); - } catch (IOException e) { - throw new DBException(e); - } - } - - private ScoreDoc getWeightedNext() throws IOException { - while (tryAdvanceDocIdSetIterator()) { - LeafReader reader = leaf.reader(); - Bits liveDocs = reader.getLiveDocs(); - int doc; - while ((doc = docIdSetIterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { - if (docDeleted(liveDocs, doc)) { - continue; - } - return transformDoc(doc); - } - docIdSetIterator = null; - } - clearState(); - return null; - } - private boolean tryAdvanceDocIdSetIterator() throws IOException { - if (docIdSetIterator != null) { - return true; - } - while (leavesIterator.hasNext()) { - LeafReaderContext leaf = leavesIterator.next(); - Scorer scorer = weight.scorer(leaf); - if (scorer == null) { - continue; - } - this.scorer = scorer; - this.leaf = leaf; - this.docIdSetIterator = scorer.iterator(); - return true; - } - return false; - } - - private ScoreDoc transformDoc(int doc) throws IOException { - return new ScoreDoc(leaf.docBase + doc, scorer.score(), shardIndex); - } - - private static boolean docDeleted(@Nullable Bits liveDocs, int doc) { - if (liveDocs == null) { - return false; - } - return !liveDocs.get(doc); - } - - private void clearState() { - docIdSetIterator = null; - scorer = null; - leaf = null; - } -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/searcher/LuceneMultiGenerator.java b/src/main/java/it/cavallium/dbengine/lucene/searcher/LuceneMultiGenerator.java deleted file mode 100644 index a9561a6..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/searcher/LuceneMultiGenerator.java +++ /dev/null @@ -1,54 +0,0 @@ -package it.cavallium.dbengine.lucene.searcher; - -import java.util.Iterator; -import java.util.List; -import java.util.function.Supplier; -import java.util.stream.IntStream; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.ScoreDoc; - -public class LuceneMultiGenerator implements Supplier { - - private final Iterator> generators; - private Supplier luceneGenerator; - - public LuceneMultiGenerator(List shards, LocalQueryParams localQueryParams) { - this.generators = IntStream - .range(0, shards.size()) - .mapToObj(shardIndex -> { - IndexSearcher shard = shards.get(shardIndex); - return (Supplier) new LuceneGenerator(shard, - localQueryParams, - shardIndex - ); - }) - .iterator(); - tryAdvanceGenerator(); - } - - private void tryAdvanceGenerator() { - if (generators.hasNext()) { - luceneGenerator = generators.next(); - } else { - luceneGenerator = null; - } - } - - @Override - public ScoreDoc get() { - if (luceneGenerator == null) { - return null; - } - ScoreDoc item; - do { - item = luceneGenerator.get(); - if (item == null) { - tryAdvanceGenerator(); - if (luceneGenerator == null) { - return null; - } - } - } while (item == null); - return item; - } -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/searcher/LuceneSearchResult.java b/src/main/java/it/cavallium/dbengine/lucene/searcher/LuceneSearchResult.java deleted file mode 100644 index 58737ee..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/searcher/LuceneSearchResult.java +++ /dev/null @@ -1,56 +0,0 @@ -package it.cavallium.dbengine.lucene.searcher; - -import it.cavallium.dbengine.client.query.current.data.TotalHitsCount; -import it.cavallium.dbengine.database.DiscardingCloseable; -import it.cavallium.dbengine.database.LLKeyScore; -import it.cavallium.dbengine.utils.SimpleResource; -import java.util.List; -import java.util.Objects; -import java.util.stream.Stream; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; - -public class LuceneSearchResult { - - public static final TotalHitsCount EMPTY_COUNT = new TotalHitsCount(0, true); - - public static final LuceneSearchResult EMPTY = new LuceneSearchResult(EMPTY_COUNT, List.of()); - private static final Logger logger = LogManager.getLogger(LuceneSearchResult.class); - - private final TotalHitsCount totalHitsCount; - private final List results; - - public LuceneSearchResult(TotalHitsCount totalHitsCount, List results) { - this.totalHitsCount = totalHitsCount; - this.results = results; - } - - public TotalHitsCount totalHitsCount() { - return totalHitsCount; - } - - public List results() { - return results; - } - - @Override - public boolean equals(Object obj) { - if (obj == this) - return true; - if (obj == null || obj.getClass() != this.getClass()) - return false; - var that = (LuceneSearchResult) obj; - return this.totalHitsCount == that.totalHitsCount && Objects.equals(this.results, that.results); - } - - @Override - public int hashCode() { - return Objects.hash(totalHitsCount, results); - } - - @Override - public String toString() { - return "LuceneSearchResult[" + "totalHitsCount=" + totalHitsCount + ", " + "results=" + results + ']'; - } - -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/searcher/MultiSearcher.java b/src/main/java/it/cavallium/dbengine/lucene/searcher/MultiSearcher.java deleted file mode 100644 index d7f5c76..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/searcher/MultiSearcher.java +++ /dev/null @@ -1,43 +0,0 @@ -package it.cavallium.dbengine.lucene.searcher; - -import it.cavallium.dbengine.database.LLKeyScore; -import it.cavallium.dbengine.database.disk.LLIndexSearcher; -import it.cavallium.dbengine.database.disk.LLIndexSearchers; -import java.io.IOException; -import java.util.function.Function; -import java.util.stream.Stream; -import org.jetbrains.annotations.Nullable; - -public interface MultiSearcher extends LocalSearcher { - - /** - * @param indexSearchers Lucene index searcher - * @param queryParams the query parameters - * @param keyFieldName the name of the key field - * @param transformer the search query transformer - * @param filterer the search result filterer - */ - LuceneSearchResult collectMulti(LLIndexSearchers indexSearchers, - LocalQueryParams queryParams, - @Nullable String keyFieldName, - GlobalQueryRewrite transformer, - Function, Stream> filterer); - - /** - * @param indexSearcher Lucene index searcher - * @param queryParams the query parameters - * @param keyFieldName the name of the key field - * @param transformer the search query transformer - * @param filterer the search result filterer - */ - @Override - default LuceneSearchResult collect(LLIndexSearcher indexSearcher, - LocalQueryParams queryParams, - @Nullable String keyFieldName, - GlobalQueryRewrite transformer, - Function, Stream> filterer) { - LLIndexSearchers searchers = LLIndexSearchers.unsharded(indexSearcher); - return this.collectMulti(searchers, queryParams, keyFieldName, transformer, filterer); - } - -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/searcher/PageData.java b/src/main/java/it/cavallium/dbengine/lucene/searcher/PageData.java deleted file mode 100644 index f5be5ee..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/searcher/PageData.java +++ /dev/null @@ -1,5 +0,0 @@ -package it.cavallium.dbengine.lucene.searcher; - -import org.apache.lucene.search.TopDocs; - -record PageData(TopDocs topDocs, CurrentPageInfo nextPageInfo) {} diff --git a/src/main/java/it/cavallium/dbengine/lucene/searcher/PageIterationStepResult.java b/src/main/java/it/cavallium/dbengine/lucene/searcher/PageIterationStepResult.java deleted file mode 100644 index 37950bb..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/searcher/PageIterationStepResult.java +++ /dev/null @@ -1,5 +0,0 @@ -package it.cavallium.dbengine.lucene.searcher; - -import org.jetbrains.annotations.Nullable; - -record PageIterationStepResult(CurrentPageInfo nextPageToIterate, @Nullable PageData pageData) {} diff --git a/src/main/java/it/cavallium/dbengine/lucene/searcher/PagedLocalSearcher.java b/src/main/java/it/cavallium/dbengine/lucene/searcher/PagedLocalSearcher.java deleted file mode 100644 index 510d8f5..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/searcher/PagedLocalSearcher.java +++ /dev/null @@ -1,192 +0,0 @@ -package it.cavallium.dbengine.lucene.searcher; - -import static it.cavallium.dbengine.lucene.searcher.CurrentPageInfo.EMPTY_STATUS; -import static it.cavallium.dbengine.lucene.searcher.PaginationInfo.MAX_SINGLE_SEARCH_LIMIT; -import static it.cavallium.dbengine.utils.StreamUtils.fastListing; -import static it.cavallium.dbengine.utils.StreamUtils.streamWhileNonNull; - -import it.cavallium.dbengine.database.LLKeyScore; -import it.cavallium.dbengine.database.LLUtils; -import it.cavallium.dbengine.database.disk.LLIndexSearcher; -import it.cavallium.dbengine.database.disk.LLIndexSearchers; -import it.cavallium.dbengine.lucene.LuceneUtils; -import it.cavallium.dbengine.lucene.collector.TopDocsCollectorMultiManager; -import it.cavallium.dbengine.utils.DBException; -import it.cavallium.dbengine.utils.StreamUtils; -import java.io.IOException; -import java.util.List; -import java.util.concurrent.atomic.AtomicReference; -import java.util.function.Function; -import java.util.stream.Stream; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.ScoreDoc; -import org.apache.lucene.search.TopDocs; -import org.apache.lucene.search.TotalHits; -import org.apache.lucene.search.TotalHits.Relation; -import org.jetbrains.annotations.Nullable; - -public class PagedLocalSearcher implements LocalSearcher { - - private static final Logger LOG = LogManager.getLogger(PagedLocalSearcher.class); - - @Override - public LuceneSearchResult collect(LLIndexSearcher indexSearcher, - LocalQueryParams queryParams, - @Nullable String keyFieldName, - GlobalQueryRewrite transformer, - Function, Stream> filterer) { - if (transformer != GlobalQueryRewrite.NO_REWRITE) { - return LuceneUtils.rewrite(this, indexSearcher, queryParams, keyFieldName, transformer, filterer); - } - PaginationInfo paginationInfo = getPaginationInfo(queryParams); - - var indexSearchers = LLIndexSearchers.unsharded(indexSearcher); - - // Search first page results - var firstPageTopDocs = this.searchFirstPage(indexSearchers.shards(), queryParams, paginationInfo); - // Compute the results of the first page - var firstResult = this.computeFirstPageResults(firstPageTopDocs, - indexSearchers.shards(), - keyFieldName, - queryParams - ); - return this.computeOtherResults(firstResult, indexSearchers.shards(), queryParams, keyFieldName, filterer); - } - - @Override - public String toString() { - return "paged local"; - } - - /** - * Get the pagination info - */ - private PaginationInfo getPaginationInfo(LocalQueryParams queryParams) { - if (queryParams.limitInt() <= MAX_SINGLE_SEARCH_LIMIT) { - return new PaginationInfo(queryParams.limitInt(), queryParams.offsetInt(), queryParams.pageLimits(), true); - } else { - return new PaginationInfo(queryParams.limitInt(), queryParams.offsetInt(), queryParams.pageLimits(), false); - } - } - - /** - * Search effectively the raw results of the first page - */ - private PageData searchFirstPage(List indexSearchers, - LocalQueryParams queryParams, - PaginationInfo paginationInfo) { - var limit = paginationInfo.totalLimit(); - var pagination = !paginationInfo.forceSinglePage(); - var resultsOffset = LuceneUtils.safeLongToInt(paginationInfo.firstPageOffset()); - var currentPageInfo = new CurrentPageInfo(null, limit, 0); - return this.searchPageSync(queryParams, indexSearchers, pagination, resultsOffset, currentPageInfo).pageData(); - } - - /** - * Compute the results of the first page, extracting useful data - */ - private FirstPageResults computeFirstPageResults(PageData firstPageData, - List indexSearchers, - String keyFieldName, - LocalQueryParams queryParams) { - var totalHitsCount = LuceneUtils.convertTotalHitsCount(firstPageData.topDocs().totalHits); - var scoreDocs = firstPageData.topDocs().scoreDocs; - assert LLUtils.isSet(scoreDocs); - - Stream firstPageHitsFlux = LuceneUtils.convertHits(Stream.of(scoreDocs), - indexSearchers, keyFieldName - ) - .limit(queryParams.limitLong()); - - CurrentPageInfo nextPageInfo = firstPageData.nextPageInfo(); - - return new FirstPageResults(totalHitsCount, firstPageHitsFlux, nextPageInfo); - } - - private LuceneSearchResult computeOtherResults(FirstPageResults firstResult, - List indexSearchers, - LocalQueryParams queryParams, - String keyFieldName, - Function, Stream> filterer) { - var totalHitsCount = firstResult.totalHitsCount(); - var firstPageHitsStream = firstResult.firstPageHitsStream(); - var secondPageInfo = firstResult.nextPageInfo(); - - Stream nextHitsFlux = searchOtherPages(indexSearchers, queryParams, keyFieldName, secondPageInfo); - - Stream combinedFlux = Stream.concat(firstPageHitsStream, nextHitsFlux); - return new LuceneSearchResult(totalHitsCount, StreamUtils.collect(filterer.apply(combinedFlux), fastListing())); - } - - /** - * Search effectively the merged raw results of the next pages - */ - private Stream searchOtherPages(List indexSearchers, - LocalQueryParams queryParams, String keyFieldName, CurrentPageInfo secondPageInfo) { - AtomicReference pageInfo = new AtomicReference<>(secondPageInfo); - Object lock = new Object(); - Stream topFieldDocFlux = streamWhileNonNull(() -> { - synchronized (lock) { - var currentPageInfo = pageInfo.getPlain(); - var result = searchPageSync(queryParams, indexSearchers, true, 0, currentPageInfo); - pageInfo.setPlain(result.nextPageToIterate()); - return result.pageData(); - } - }).flatMap(pd -> Stream.of(pd.topDocs().scoreDocs)); - return LuceneUtils.convertHits(topFieldDocFlux, indexSearchers, keyFieldName); - } - - /** - * - * @param resultsOffset offset of the resulting topDocs. Useful if you want to - * skip the first n results in the first page - */ - private PageIterationStepResult searchPageSync(LocalQueryParams queryParams, - List indexSearchers, - boolean allowPagination, - int resultsOffset, - CurrentPageInfo s) { - if (resultsOffset < 0) { - throw new IndexOutOfBoundsException(resultsOffset); - } - PageData result = null; - var currentPageLimit = queryParams.pageLimits().getPageLimit(s.pageIndex()); - if (s.pageIndex() == 0 && s.remainingLimit() == 0) { - int count; - try { - count = indexSearchers.get(0).count(queryParams.query()); - } catch (IOException e) { - throw new DBException(e); - } - var nextPageInfo = new CurrentPageInfo(null, 0, 1); - return new PageIterationStepResult(EMPTY_STATUS, new PageData(new TopDocs(new TotalHits(count, Relation.EQUAL_TO), new ScoreDoc[0]), nextPageInfo)); - } else if (s.pageIndex() == 0 || (s.last() != null && s.remainingLimit() > 0)) { - TopDocs pageTopDocs; - try { - var cmm = new TopDocsCollectorMultiManager(queryParams.sort(), - currentPageLimit, s.last(), queryParams.getTotalHitsThresholdInt(), - allowPagination, queryParams.needsScores(), resultsOffset, currentPageLimit); - - pageTopDocs = cmm.reduce(List.of(indexSearchers - .get(0) - .search(queryParams.query(), cmm.get(queryParams.query(), indexSearchers.get(0))))); - } catch (IOException e) { - throw new DBException(e); - } - var pageLastDoc = LuceneUtils.getLastScoreDoc(pageTopDocs.scoreDocs); - long nextRemainingLimit; - if (allowPagination) { - nextRemainingLimit = s.remainingLimit() - currentPageLimit; - } else { - nextRemainingLimit = 0L; - } - var nextPageIndex = s.pageIndex() + 1; - var nextPageInfo = new CurrentPageInfo(pageLastDoc, nextRemainingLimit, nextPageIndex); - return new PageIterationStepResult(nextPageInfo, new PageData(pageTopDocs, nextPageInfo)); - } else { - return new PageIterationStepResult(EMPTY_STATUS, null); - } - } -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/searcher/PaginationInfo.java b/src/main/java/it/cavallium/dbengine/lucene/searcher/PaginationInfo.java deleted file mode 100644 index 2d37dd8..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/searcher/PaginationInfo.java +++ /dev/null @@ -1,13 +0,0 @@ -package it.cavallium.dbengine.lucene.searcher; - -import it.cavallium.dbengine.lucene.PageLimits; - -public record PaginationInfo(long totalLimit, long firstPageOffset, PageLimits pageLimits, boolean forceSinglePage) { - - public static final int MAX_SINGLE_SEARCH_LIMIT = 256; - public static final int FIRST_PAGE_LIMIT = 10; - /** - * Use true to allow a custom unscored collector when possible - */ - public static final boolean ALLOW_UNSCORED_PAGINATION_MODE = true; -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/searcher/ScoredPagedMultiSearcher.java b/src/main/java/it/cavallium/dbengine/lucene/searcher/ScoredPagedMultiSearcher.java deleted file mode 100644 index 85f4d93..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/searcher/ScoredPagedMultiSearcher.java +++ /dev/null @@ -1,218 +0,0 @@ -package it.cavallium.dbengine.lucene.searcher; - -import static it.cavallium.dbengine.lucene.searcher.PaginationInfo.MAX_SINGLE_SEARCH_LIMIT; -import static it.cavallium.dbengine.utils.StreamUtils.LUCENE_POOL; -import static it.cavallium.dbengine.utils.StreamUtils.fastListing; -import static it.cavallium.dbengine.utils.StreamUtils.streamWhileNonNull; -import static it.cavallium.dbengine.utils.StreamUtils.toListOn; - -import com.google.common.collect.Streams; -import it.cavallium.dbengine.database.LLKeyScore; -import it.cavallium.dbengine.database.LLUtils; -import it.cavallium.dbengine.database.disk.LLIndexSearchers; -import it.cavallium.dbengine.lucene.LuceneUtils; -import it.cavallium.dbengine.lucene.PageLimits; -import it.cavallium.dbengine.lucene.collector.ScoringShardsCollectorMultiManager; -import it.cavallium.dbengine.utils.DBException; -import it.cavallium.dbengine.utils.StreamUtils; -import java.io.IOException; -import java.util.Arrays; -import java.util.Collection; -import java.util.List; -import java.util.concurrent.atomic.AtomicReference; -import java.util.function.Function; -import java.util.stream.Stream; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import org.apache.lucene.search.FieldDoc; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.ScoreDoc; -import org.apache.lucene.search.Sort; -import org.apache.lucene.search.TopDocs; -import org.jetbrains.annotations.Nullable; - -public class ScoredPagedMultiSearcher implements MultiSearcher { - - protected static final Logger LOG = LogManager.getLogger(ScoredPagedMultiSearcher.class); - - public ScoredPagedMultiSearcher() { - } - - @Override - public LuceneSearchResult collectMulti(LLIndexSearchers indexSearchers, - LocalQueryParams queryParams, - @Nullable String keyFieldName, - GlobalQueryRewrite transformer, - Function, Stream> filterer) { - if (transformer != GlobalQueryRewrite.NO_REWRITE) { - return LuceneUtils.rewriteMulti(this, indexSearchers, queryParams, keyFieldName, transformer, filterer); - } - - PaginationInfo paginationInfo = getPaginationInfo(queryParams); - // Search first page results - var firstPageTopDocs = this.searchFirstPage(indexSearchers.shards(), queryParams, paginationInfo); - // Compute the results of the first page - var firstResult = this.computeFirstPageResults(firstPageTopDocs, indexSearchers, keyFieldName, queryParams); - // Compute other results - return this.computeOtherResults(firstResult, - indexSearchers.shards(), - queryParams, - keyFieldName, - filterer - ); - } - - private Sort getSort(LocalQueryParams queryParams) { - return queryParams.sort(); - } - - /** - * Get the pagination info - */ - private PaginationInfo getPaginationInfo(LocalQueryParams queryParams) { - if (queryParams.limitInt() <= MAX_SINGLE_SEARCH_LIMIT) { - return new PaginationInfo(queryParams.limitInt(), queryParams.offsetInt(), queryParams.pageLimits(), true); - } else { - return new PaginationInfo(queryParams.limitInt(), queryParams.offsetInt(), queryParams.pageLimits(), false); - } - } - - /** - * Search effectively the raw results of the first page - */ - private PageData searchFirstPage(List indexSearchers, - LocalQueryParams queryParams, - PaginationInfo paginationInfo) { - var limit = paginationInfo.totalLimit(); - var pageLimits = paginationInfo.pageLimits(); - var pagination = !paginationInfo.forceSinglePage(); - var resultsOffset = LuceneUtils.safeLongToInt(paginationInfo.firstPageOffset()); - return this.searchPage(queryParams, - indexSearchers, - pagination, - pageLimits, - resultsOffset, - new CurrentPageInfo(null, limit, 0) - ); - } - - /** - * Compute the results of the first page, extracting useful data - */ - private FirstPageResults computeFirstPageResults(PageData firstPageData, - LLIndexSearchers indexSearchers, - String keyFieldName, - LocalQueryParams queryParams) { - var totalHitsCount = LuceneUtils.convertTotalHitsCount(firstPageData.topDocs().totalHits); - var scoreDocs = firstPageData.topDocs().scoreDocs; - assert LLUtils.isSet(scoreDocs); - - Stream firstPageHitsFlux = LuceneUtils - .convertHits(Stream.of(scoreDocs), indexSearchers.shards(), keyFieldName) - .limit(queryParams.limitInt()); - - CurrentPageInfo nextPageInfo = firstPageData.nextPageInfo(); - - return new FirstPageResults(totalHitsCount, firstPageHitsFlux, nextPageInfo); - } - - private LuceneSearchResult computeOtherResults(FirstPageResults firstResult, - List indexSearchers, - LocalQueryParams queryParams, - String keyFieldName, - Function, Stream> filterer) { - var totalHitsCount = firstResult.totalHitsCount(); - var firstPageHitsStream = firstResult.firstPageHitsStream(); - var secondPageInfo = firstResult.nextPageInfo(); - - Stream nextHitsFlux = searchOtherPages(indexSearchers, queryParams, keyFieldName, secondPageInfo); - - Stream combinedStream = Stream.concat(firstPageHitsStream, nextHitsFlux); - return new LuceneSearchResult(totalHitsCount, StreamUtils.collect(filterer.apply(combinedStream), fastListing())); - } - - /** - * Search effectively the merged raw results of the next pages - */ - private Stream searchOtherPages(List indexSearchers, - LocalQueryParams queryParams, String keyFieldName, CurrentPageInfo secondPageInfo) { - AtomicReference currentPageInfoRef = new AtomicReference<>(secondPageInfo); - Stream topFieldDocStream = streamWhileNonNull(() -> { - var currentPageInfo = currentPageInfoRef.getPlain(); - if (currentPageInfo == null) return null; - LOG.trace("Current page info: {}", currentPageInfo); - var result = this.searchPage(queryParams, indexSearchers, true, queryParams.pageLimits(), 0, currentPageInfo); - LOG.trace("Next page info: {}", result != null ? result.nextPageInfo() : null); - currentPageInfoRef.setPlain(result != null ? result.nextPageInfo() : null); - if (result == null || result.topDocs().scoreDocs.length == 0) { - return null; - } else { - return Arrays.asList(result.topDocs().scoreDocs); - } - }).flatMap(Collection::stream); - - return LuceneUtils.convertHits(topFieldDocStream, indexSearchers, keyFieldName); - } - - /** - * - * @param resultsOffset offset of the resulting topDocs. Useful if you want to - * skip the first n results in the first page - */ - private PageData searchPage(LocalQueryParams queryParams, - List indexSearchers, - boolean allowPagination, - PageLimits pageLimits, - int resultsOffset, - CurrentPageInfo s) { - if (resultsOffset < 0) { - throw new IndexOutOfBoundsException(resultsOffset); - } - ScoringShardsCollectorMultiManager cmm; - if (s.pageIndex() == 0 || (s.last() != null && s.remainingLimit() > 0)) { - var query = queryParams.query(); - @Nullable var sort = getSort(queryParams); - var pageLimit = pageLimits.getPageLimit(s.pageIndex()); - var after = (FieldDoc) s.last(); - var totalHitsThreshold = queryParams.getTotalHitsThresholdInt(); - cmm = new ScoringShardsCollectorMultiManager(query, sort, pageLimit, after, totalHitsThreshold, - resultsOffset, pageLimit); - } else { - return null; - }; - record IndexedShard(IndexSearcher indexSearcher, long shardIndex) {} - List shardResults = toListOn(LUCENE_POOL, - Streams.mapWithIndex(indexSearchers.stream(), IndexedShard::new).map(shardWithIndex -> { - var index = (int) shardWithIndex.shardIndex(); - var shard = shardWithIndex.indexSearcher(); - - var cm = cmm.get(shard, index); - - try { - return shard.search(queryParams.query(), cm); - } catch (IOException e) { - throw new DBException(e); - } - }) - ); - - var pageTopDocs = cmm.reduce(shardResults); - - var pageLastDoc = LuceneUtils.getLastScoreDoc(pageTopDocs.scoreDocs); - long nextRemainingLimit; - if (allowPagination) { - nextRemainingLimit = s.remainingLimit() - pageLimits.getPageLimit(s.pageIndex()); - } else { - nextRemainingLimit = 0L; - } - var nextPageIndex = s.pageIndex() + 1; - var nextPageInfo = new CurrentPageInfo(pageLastDoc, nextRemainingLimit, nextPageIndex); - return new PageData(pageTopDocs, nextPageInfo); - } - - @Override - public String toString() { - return "scored paged multi"; - } - -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/searcher/ShardIndexSearcher.java b/src/main/java/it/cavallium/dbengine/lucene/searcher/ShardIndexSearcher.java deleted file mode 100644 index 822b349..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/searcher/ShardIndexSearcher.java +++ /dev/null @@ -1,271 +0,0 @@ -package it.cavallium.dbengine.lucene.searcher; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collection; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.NoSuchElementException; -import java.util.Set; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermStates; -import org.apache.lucene.search.CollectionStatistics; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.QueryVisitor; -import org.apache.lucene.search.TermStatistics; -import org.jetbrains.annotations.Nullable; - -public class ShardIndexSearcher extends IndexSearcher { - public final int myNodeID; - private final IndexSearcher[] searchers; - - private final Map collectionStatsCache; - private final Map termStatsCache; - - public ShardIndexSearcher(SharedShardStatistics sharedShardStatistics, List searchers, int nodeID) { - super(searchers.get(nodeID).getIndexReader(), searchers.get(nodeID).getExecutor()); - this.collectionStatsCache = sharedShardStatistics.collectionStatsCache; - this.termStatsCache = sharedShardStatistics.termStatsCache; - this.searchers = searchers.toArray(IndexSearcher[]::new); - myNodeID = nodeID; - } - - public static List create(Iterable indexSearchersIterable) { - var it = indexSearchersIterable.iterator(); - if (it.hasNext()) { - var is = it.next(); - if (!it.hasNext()) { - return List.of(is); - } - if (is instanceof ShardIndexSearcher) { - if (indexSearchersIterable instanceof List list) { - return list; - } else { - var result = new ArrayList(); - result.add(is); - do { - result.add(it.next()); - } while (it.hasNext()); - return result; - } - } - } - List indexSearchers; - if (indexSearchersIterable instanceof List collection) { - indexSearchers = collection; - } else if (indexSearchersIterable instanceof Collection collection) { - indexSearchers = List.copyOf(collection); - } else { - indexSearchers = new ArrayList<>(); - for (IndexSearcher i : indexSearchersIterable) { - indexSearchers.add(i); - } - } - if (indexSearchers.size() == 0) { - return List.of(); - } else { - var sharedShardStatistics = new SharedShardStatistics(); - List result = new ArrayList<>(indexSearchers.size()); - for (int nodeId = 0; nodeId < indexSearchers.size(); nodeId++) { - result.add(new ShardIndexSearcher(sharedShardStatistics, indexSearchers, nodeId)); - } - return result; - } - } - - @Override - public Query rewrite(Query original) throws IOException { - final IndexSearcher localSearcher = new IndexSearcher(getIndexReader()); - original = localSearcher.rewrite(original); - final Set terms = new HashSet<>(); - original.visit(QueryVisitor.termCollector(terms)); - - // Make a single request to remote nodes for term - // stats: - for (int nodeID = 0; nodeID < searchers.length; nodeID++) { - if (nodeID == myNodeID) { - continue; - } - - final Set missing = new HashSet<>(); - for (Term term : terms) { - final TermAndShard key = new TermAndShard(nodeID, term); - if (!termStatsCache.containsKey(key)) { - missing.add(term); - } - } - if (missing.size() != 0) { - for (Map.Entry ent : getNodeTermStats(missing, nodeID).entrySet()) { - if (ent.getValue() != null) { - final TermAndShard key = new TermAndShard(nodeID, ent.getKey()); - termStatsCache.put(key, ent.getValue()); - } - } - } - } - - return original; - } - - // Mock: in a real env, this would hit the wire and get - // term stats from remote node - Map getNodeTermStats(Set terms, int nodeID) throws IOException { - var s = searchers[nodeID]; - final Map stats = new HashMap<>(); - if (s == null) { - throw new NoSuchElementException("node=" + nodeID); - } - for (Term term : terms) { - final TermStates ts = TermStates.build(s, term, true); - if (ts.docFreq() > 0) { - stats.put(term, s.termStatistics(term, ts.docFreq(), ts.totalTermFreq())); - } - } - return stats; - } - - @Override - public TermStatistics termStatistics(Term term, int docFreq, long totalTermFreq) - throws IOException { - assert term != null; - long distributedDocFreq = 0; - long distributedTotalTermFreq = 0; - for (int nodeID = 0; nodeID < searchers.length; nodeID++) { - - final TermStatistics subStats; - if (nodeID == myNodeID) { - subStats = super.termStatistics(term, docFreq, totalTermFreq); - } else { - final TermAndShard key = new TermAndShard(nodeID, term); - subStats = termStatsCache.get(key); - if (subStats == null) { - continue; // term not found - } - } - - long nodeDocFreq = subStats.docFreq(); - distributedDocFreq += nodeDocFreq; - - long nodeTotalTermFreq = subStats.totalTermFreq(); - distributedTotalTermFreq += nodeTotalTermFreq; - } - assert distributedDocFreq > 0; - return new TermStatistics(term.bytes(), distributedDocFreq, distributedTotalTermFreq); - } - - @Override - public CollectionStatistics collectionStatistics(String field) throws IOException { - // TODO: we could compute this on init and cache, - // since we are re-inited whenever any nodes have a - // new reader - long docCount = 0; - long sumTotalTermFreq = 0; - long sumDocFreq = 0; - long maxDoc = 0; - - for (int nodeID = 0; nodeID < searchers.length; nodeID++) { - final FieldAndShar key = new FieldAndShar(nodeID, field); - final CollectionStatistics nodeStats; - if (nodeID == myNodeID) { - nodeStats = super.collectionStatistics(field); - collectionStatsCache.put(key, new CachedCollectionStatistics(nodeStats)); - } else { - var nodeStatsOptional = collectionStatsCache.get(key); - if (nodeStatsOptional == null) { - nodeStatsOptional = new CachedCollectionStatistics(computeNodeCollectionStatistics(key)); - collectionStatsCache.put(key, nodeStatsOptional); - } - nodeStats = nodeStatsOptional.collectionStatistics(); - } - if (nodeStats == null) { - continue; // field not in sub at all - } - - long nodeDocCount = nodeStats.docCount(); - docCount += nodeDocCount; - - long nodeSumTotalTermFreq = nodeStats.sumTotalTermFreq(); - sumTotalTermFreq += nodeSumTotalTermFreq; - - long nodeSumDocFreq = nodeStats.sumDocFreq(); - sumDocFreq += nodeSumDocFreq; - - assert nodeStats.maxDoc() >= 0; - maxDoc += nodeStats.maxDoc(); - } - - if (maxDoc == 0) { - return null; // field not found across any node whatsoever - } else { - return new CollectionStatistics(field, maxDoc, docCount, sumTotalTermFreq, sumDocFreq); - } - } - - private CollectionStatistics computeNodeCollectionStatistics(FieldAndShar fieldAndShard) throws IOException { - var searcher = searchers[fieldAndShard.nodeID]; - return searcher.collectionStatistics(fieldAndShard.field); - } - - public record CachedCollectionStatistics(@Nullable CollectionStatistics collectionStatistics) {} - - public static class TermAndShard { - private final int nodeID; - private final Term term; - - public TermAndShard(int nodeID, Term term) { - this.nodeID = nodeID; - this.term = term; - } - - @Override - public int hashCode() { - return (nodeID + term.hashCode()); - } - - @Override - public boolean equals(Object _other) { - if (!(_other instanceof final TermAndShard other)) { - return false; - } - - return term.equals(other.term) && nodeID == other.nodeID; - } - } - - - public static class FieldAndShar { - private final int nodeID; - private final String field; - - public FieldAndShar(int nodeID, String field) { - this.nodeID = nodeID; - this.field = field; - } - - @Override - public int hashCode() { - return (nodeID + field.hashCode()); - } - - @Override - public boolean equals(Object _other) { - if (!(_other instanceof final FieldAndShar other)) { - return false; - } - - return field.equals(other.field) && nodeID == other.nodeID; - } - - @Override - public String toString() { - return "FieldAndShardVersion(field=" - + field - + " nodeID=" - + nodeID - + ")"; - } - } -} \ No newline at end of file diff --git a/src/main/java/it/cavallium/dbengine/lucene/searcher/SharedShardStatistics.java b/src/main/java/it/cavallium/dbengine/lucene/searcher/SharedShardStatistics.java deleted file mode 100644 index 523fbe0..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/searcher/SharedShardStatistics.java +++ /dev/null @@ -1,14 +0,0 @@ -package it.cavallium.dbengine.lucene.searcher; - -import it.cavallium.dbengine.lucene.searcher.ShardIndexSearcher.CachedCollectionStatistics; -import it.cavallium.dbengine.lucene.searcher.ShardIndexSearcher.FieldAndShar; -import it.cavallium.dbengine.lucene.searcher.ShardIndexSearcher.TermAndShard; -import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; -import org.apache.lucene.search.TermStatistics; - -public class SharedShardStatistics { - - public final Map collectionStatsCache = new ConcurrentHashMap<>(); - public final Map termStatsCache = new ConcurrentHashMap<>(); -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/searcher/StandardSearcher.java b/src/main/java/it/cavallium/dbengine/lucene/searcher/StandardSearcher.java deleted file mode 100644 index 638d787..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/searcher/StandardSearcher.java +++ /dev/null @@ -1,137 +0,0 @@ -package it.cavallium.dbengine.lucene.searcher; - -import static it.cavallium.dbengine.database.LLUtils.mapList; -import static it.cavallium.dbengine.utils.StreamUtils.toList; -import static java.util.Objects.requireNonNull; - -import it.cavallium.dbengine.client.query.current.data.TotalHitsCount; -import it.cavallium.dbengine.database.LLKeyScore; -import it.cavallium.dbengine.database.disk.LLIndexSearchers; -import it.cavallium.dbengine.lucene.LuceneCloseable; -import it.cavallium.dbengine.lucene.LuceneUtils; -import it.cavallium.dbengine.utils.DBException; -import it.cavallium.dbengine.utils.StreamUtils; -import java.io.IOException; -import java.util.Collection; -import java.util.List; -import java.util.function.Function; -import java.util.stream.Stream; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import org.apache.lucene.search.CollectorManager; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.ScoreDoc; -import org.apache.lucene.search.TopDocs; -import org.apache.lucene.search.TopDocsCollector; -import org.apache.lucene.search.TopFieldCollector; -import org.apache.lucene.search.TopFieldDocs; -import org.apache.lucene.search.TopScoreDocCollector; -import org.jetbrains.annotations.Nullable; - -public class StandardSearcher implements MultiSearcher { - - protected static final Logger LOG = LogManager.getLogger(StandardSearcher.class); - - public StandardSearcher() { - } - - @Override - public LuceneSearchResult collectMulti(LLIndexSearchers indexSearchers, - LocalQueryParams queryParams, - @Nullable String keyFieldName, - GlobalQueryRewrite transformer, - Function, Stream> filterer) { - if (transformer != GlobalQueryRewrite.NO_REWRITE) { - return LuceneUtils.rewriteMulti(this, indexSearchers, queryParams, keyFieldName, transformer, filterer); - } - // Search results - var fullDocs = this.search(indexSearchers.shards(), queryParams); - // Compute the results - return this.computeResults(fullDocs, indexSearchers, keyFieldName, queryParams, filterer); - } - - /** - * Search effectively the raw results - */ - @SuppressWarnings({"rawtypes"}) - private TopDocs search(Collection indexSearchers, LocalQueryParams queryParams) { - var totalHitsThreshold = queryParams.getTotalHitsThresholdInt(); - CollectorManager, ? extends TopDocs> sharedManager; - if (queryParams.isSorted() && !queryParams.isSortedByScore()) { - sharedManager = TopFieldCollector.createSharedManager(queryParams.sort(), - queryParams.limitInt(), null, totalHitsThreshold - ); - } else { - sharedManager = TopScoreDocCollector.createSharedManager(queryParams.limitInt(), null, totalHitsThreshold); - } - ; - var collectors = mapList(indexSearchers, shard -> { - try { - TopDocsCollector collector; - collector = sharedManager.newCollector(); - assert queryParams.computePreciseHitsCount() == null || (queryParams.computePreciseHitsCount() == collector - .scoreMode() - .isExhaustive()); - - shard.search(queryParams.query(), LuceneUtils.withTimeout(collector, queryParams.timeout())); - return collector; - } catch (IOException e) { - throw new DBException(e); - } - }); - - try { - if (collectors.size() <= 1) { - //noinspection unchecked - return sharedManager.reduce((List) collectors); - } else if (queryParams.isSorted() && !queryParams.isSortedByScore()) { - final TopFieldDocs[] topDocs = new TopFieldDocs[collectors.size()]; - int i = 0; - for (var collector : collectors) { - var topFieldDocs = ((TopFieldCollector) collector).topDocs(); - for (ScoreDoc scoreDoc : topFieldDocs.scoreDocs) { - scoreDoc.shardIndex = i; - } - topDocs[i++] = topFieldDocs; - } - return TopDocs.merge(requireNonNull(queryParams.sort()), 0, queryParams.limitInt(), topDocs); - } else { - final TopDocs[] topDocs = new TopDocs[collectors.size()]; - int i = 0; - for (var collector : collectors) { - var topScoreDocs = collector.topDocs(); - for (ScoreDoc scoreDoc : topScoreDocs.scoreDocs) { - scoreDoc.shardIndex = i; - } - topDocs[i++] = topScoreDocs; - } - return TopDocs.merge(0, queryParams.limitInt(), topDocs); - } - } catch (IOException ex) { - throw new DBException(ex); - } - } - - /** - * Compute the results, extracting useful data - */ - private LuceneSearchResult computeResults(TopDocs data, - LLIndexSearchers indexSearchers, - String keyFieldName, - LocalQueryParams queryParams, - Function, Stream> filterer) { - var totalHitsCount = LuceneUtils.convertTotalHitsCount(data.totalHits); - - Stream hitsStream = LuceneUtils - .convertHits(Stream.of(data.scoreDocs), indexSearchers.shards(), keyFieldName) - .skip(queryParams.offsetLong()) - .limit(queryParams.limitLong()); - - return new LuceneSearchResult(totalHitsCount, toList(filterer.apply(hitsStream))); - } - - @Override - public String toString() { - return "standard"; - } -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/searcher/UnsortedStreamingMultiSearcher.java b/src/main/java/it/cavallium/dbengine/lucene/searcher/UnsortedStreamingMultiSearcher.java deleted file mode 100644 index 37d75eb..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/searcher/UnsortedStreamingMultiSearcher.java +++ /dev/null @@ -1,73 +0,0 @@ -package it.cavallium.dbengine.lucene.searcher; - -import static com.google.common.collect.Streams.mapWithIndex; -import static it.cavallium.dbengine.utils.StreamUtils.toList; - -import it.cavallium.dbengine.client.query.current.data.TotalHitsCount; -import it.cavallium.dbengine.database.LLKeyScore; -import it.cavallium.dbengine.database.disk.LLIndexSearchers; -import it.cavallium.dbengine.lucene.LuceneCloseable; -import it.cavallium.dbengine.lucene.LuceneUtils; -import java.io.IOException; -import java.util.List; -import java.util.function.Function; -import java.util.stream.Stream; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.ScoreDoc; -import org.jetbrains.annotations.Nullable; - -public class UnsortedStreamingMultiSearcher implements MultiSearcher { - - - protected static final Logger LOG = LogManager.getLogger(UnsortedStreamingMultiSearcher.class); - - @Override - public LuceneSearchResult collectMulti(LLIndexSearchers indexSearchers, - LocalQueryParams queryParams, - @Nullable String keyFieldName, - GlobalQueryRewrite transformer, - Function, Stream> filterer) { - if (transformer != GlobalQueryRewrite.NO_REWRITE) { - return LuceneUtils.rewriteMulti(this, indexSearchers, queryParams, keyFieldName, transformer, filterer); - } - if (queryParams.isSorted() && queryParams.limitLong() > 0) { - throw new UnsupportedOperationException("Sorted queries are not supported" + " by UnsortedContinuousLuceneMultiSearcher"); - } - var localQueryParams = getLocalQueryParams(queryParams); - - var shards = indexSearchers.shards(); - - Stream scoreDocsFlux = getScoreDocs(localQueryParams, shards); - - Stream resultsFlux = LuceneUtils.convertHits(scoreDocsFlux, shards, keyFieldName); - - var totalHitsCount = new TotalHitsCount(0, false); - Stream mergedFluxes = resultsFlux.skip(queryParams.offsetLong()).limit(queryParams.limitLong()); - - return new LuceneSearchResult(totalHitsCount, toList(filterer.apply(mergedFluxes))); - } - - private Stream getScoreDocs(LocalQueryParams localQueryParams, List shards) { - return mapWithIndex(shards.stream(), - (shard, shardIndex) -> LuceneGenerator.reactive(shard, localQueryParams, (int) shardIndex)) - .flatMap(Function.identity()); - } - - private LocalQueryParams getLocalQueryParams(LocalQueryParams queryParams) { - return new LocalQueryParams(queryParams.query(), - 0L, - queryParams.offsetLong() + queryParams.limitLong(), - queryParams.pageLimits(), - queryParams.sort(), - queryParams.computePreciseHitsCount(), - queryParams.timeout() - ); - } - - @Override - public String toString() { - return "unsorted streaming multi"; - } -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/similarity/NGramSimilarity.java b/src/main/java/it/cavallium/dbengine/lucene/similarity/NGramSimilarity.java deleted file mode 100644 index a8c6b9e..0000000 --- a/src/main/java/it/cavallium/dbengine/lucene/similarity/NGramSimilarity.java +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package it.cavallium.dbengine.lucene.similarity; - - -import org.apache.lucene.search.similarities.ClassicSimilarity; -import org.apache.lucene.search.similarities.Similarity; -import org.novasearch.lucene.search.similarities.BM25Similarity; -import org.novasearch.lucene.search.similarities.BM25Similarity.BM25Model; - -public class NGramSimilarity { - - private NGramSimilarity() { - - } - - public static Similarity classic() { - var instance = new ClassicSimilarity(); - instance.setDiscountOverlaps(false); - return instance; - } - - public static Similarity bm25(BM25Model model) { - var instance = new BM25Similarity(model); - instance.setDiscountOverlaps(false); - return instance; - } - - public static Similarity bm15(BM25Model model) { - var instance = new BM25Similarity(1.2f, 0.0f, 0.5f, model); - instance.setDiscountOverlaps(false); - return instance; - } - - public static Similarity bm11(BM25Model model) { - var instance = new BM25Similarity(1.2f, 1.0f, 0.5f, model); - instance.setDiscountOverlaps(false); - return instance; - } -} diff --git a/src/main/java/it/cavallium/dbengine/utils/BooleanListJsonAdapter.java b/src/main/java/it/cavallium/dbengine/utils/BooleanListJsonAdapter.java deleted file mode 100644 index ae77160..0000000 --- a/src/main/java/it/cavallium/dbengine/utils/BooleanListJsonAdapter.java +++ /dev/null @@ -1,39 +0,0 @@ -package it.cavallium.dbengine.utils; - -import com.squareup.moshi.JsonAdapter; -import com.squareup.moshi.JsonReader; -import com.squareup.moshi.JsonWriter; -import it.unimi.dsi.fastutil.booleans.BooleanArrayList; -import it.unimi.dsi.fastutil.booleans.BooleanList; -import it.unimi.dsi.fastutil.booleans.BooleanLists; -import java.io.IOException; -import org.jetbrains.annotations.NotNull; -import org.jetbrains.annotations.Nullable; - -public class BooleanListJsonAdapter extends JsonAdapter { - - @Override - public @NotNull BooleanList fromJson(@NotNull JsonReader reader) throws IOException { - reader.beginArray(); - BooleanArrayList modifiableOutput = new BooleanArrayList(); - while (reader.hasNext()) { - modifiableOutput.add(reader.nextBoolean()); - } - reader.endArray(); - return BooleanLists.unmodifiable(modifiableOutput); - } - - @Override - public void toJson(@NotNull JsonWriter writer, @Nullable BooleanList value) throws IOException { - if (value == null) { - writer.nullValue(); - return; - } - - writer.beginArray(); - for (int i = 0; i < value.size(); i++) { - writer.value(value.getBoolean(i)); - } - writer.endArray(); - } -} diff --git a/src/main/java/it/cavallium/dbengine/utils/BufJsonAdapter.java b/src/main/java/it/cavallium/dbengine/utils/BufJsonAdapter.java deleted file mode 100644 index cb11c67..0000000 --- a/src/main/java/it/cavallium/dbengine/utils/BufJsonAdapter.java +++ /dev/null @@ -1,37 +0,0 @@ -package it.cavallium.dbengine.utils; - -import com.squareup.moshi.JsonAdapter; -import com.squareup.moshi.JsonReader; -import com.squareup.moshi.JsonWriter; -import it.cavallium.buffer.Buf; -import java.io.IOException; -import org.jetbrains.annotations.NotNull; -import org.jetbrains.annotations.Nullable; - -public class BufJsonAdapter extends JsonAdapter { - - @Override - public @NotNull Buf fromJson(@NotNull JsonReader reader) throws IOException { - reader.beginArray(); - var modifiableOutput = Buf.create(); - while (reader.hasNext()) { - modifiableOutput.add((byte) reader.nextInt()); - } - reader.endArray(); - return modifiableOutput; - } - - @Override - public void toJson(@NotNull JsonWriter writer, @Nullable Buf value) throws IOException { - if (value == null) { - writer.nullValue(); - return; - } - - writer.beginArray(); - for (int i = 0; i < value.size(); i++) { - writer.value((long) value.getByte(i)); - } - writer.endArray(); - } -} diff --git a/src/main/java/it/cavallium/dbengine/utils/ByteListJsonAdapter.java b/src/main/java/it/cavallium/dbengine/utils/ByteListJsonAdapter.java deleted file mode 100644 index 134b981..0000000 --- a/src/main/java/it/cavallium/dbengine/utils/ByteListJsonAdapter.java +++ /dev/null @@ -1,39 +0,0 @@ -package it.cavallium.dbengine.utils; - -import com.squareup.moshi.JsonAdapter; -import com.squareup.moshi.JsonReader; -import com.squareup.moshi.JsonWriter; -import it.unimi.dsi.fastutil.bytes.ByteArrayList; -import it.unimi.dsi.fastutil.bytes.ByteList; -import it.unimi.dsi.fastutil.bytes.ByteLists; -import java.io.IOException; -import org.jetbrains.annotations.NotNull; -import org.jetbrains.annotations.Nullable; - -public class ByteListJsonAdapter extends JsonAdapter { - - @Override - public @NotNull ByteList fromJson(@NotNull JsonReader reader) throws IOException { - reader.beginArray(); - ByteArrayList modifiableOutput = new ByteArrayList(); - while (reader.hasNext()) { - modifiableOutput.add((byte) reader.nextInt()); - } - reader.endArray(); - return ByteLists.unmodifiable(modifiableOutput); - } - - @Override - public void toJson(@NotNull JsonWriter writer, @Nullable ByteList value) throws IOException { - if (value == null) { - writer.nullValue(); - return; - } - - writer.beginArray(); - for (int i = 0; i < value.size(); i++) { - writer.value((long) value.getByte(i)); - } - writer.endArray(); - } -} diff --git a/src/main/java/it/cavallium/dbengine/utils/CharListJsonAdapter.java b/src/main/java/it/cavallium/dbengine/utils/CharListJsonAdapter.java deleted file mode 100644 index 56ad070..0000000 --- a/src/main/java/it/cavallium/dbengine/utils/CharListJsonAdapter.java +++ /dev/null @@ -1,39 +0,0 @@ -package it.cavallium.dbengine.utils; - -import com.squareup.moshi.JsonAdapter; -import com.squareup.moshi.JsonReader; -import com.squareup.moshi.JsonWriter; -import it.unimi.dsi.fastutil.chars.CharArrayList; -import it.unimi.dsi.fastutil.chars.CharList; -import it.unimi.dsi.fastutil.chars.CharLists; -import java.io.IOException; -import org.jetbrains.annotations.NotNull; -import org.jetbrains.annotations.Nullable; - -public class CharListJsonAdapter extends JsonAdapter { - - @Override - public @NotNull CharList fromJson(@NotNull JsonReader reader) throws IOException { - reader.beginArray(); - CharArrayList modifiableOutput = new CharArrayList(); - while (reader.hasNext()) { - modifiableOutput.add((char) reader.nextInt()); - } - reader.endArray(); - return CharLists.unmodifiable(modifiableOutput); - } - - @Override - public void toJson(@NotNull JsonWriter writer, @Nullable CharList value) throws IOException { - if (value == null) { - writer.nullValue(); - return; - } - - writer.beginArray(); - for (int i = 0; i < value.size(); i++) { - writer.value((long) value.getChar(i)); - } - writer.endArray(); - } -} diff --git a/src/main/java/it/cavallium/dbengine/lucene/ExponentialPageLimits.java b/src/main/java/it/cavallium/dbengine/utils/ExponentialLimits.java similarity index 54% rename from src/main/java/it/cavallium/dbengine/lucene/ExponentialPageLimits.java rename to src/main/java/it/cavallium/dbengine/utils/ExponentialLimits.java index 470e49c..08e940b 100644 --- a/src/main/java/it/cavallium/dbengine/lucene/ExponentialPageLimits.java +++ b/src/main/java/it/cavallium/dbengine/utils/ExponentialLimits.java @@ -1,9 +1,9 @@ -package it.cavallium.dbengine.lucene; +package it.cavallium.dbengine.utils; /** *
y = 2 ^ (x + pageIndexOffset) + firstPageLimit
*/ -public class ExponentialPageLimits implements PageLimits { +public class ExponentialLimits { private static final int DEFAULT_PAGE_INDEX_OFFSET = 0; @@ -11,25 +11,12 @@ public class ExponentialPageLimits implements PageLimits { private final int firstPageLimit; private final int maxItemsPerPage; - public ExponentialPageLimits() { - this(DEFAULT_PAGE_INDEX_OFFSET); - } - - public ExponentialPageLimits(int pageIndexOffset) { - this(pageIndexOffset, DEFAULT_MIN_ITEMS_PER_PAGE); - } - - public ExponentialPageLimits(int pageIndexOffset, int firstPageLimit) { - this(pageIndexOffset, firstPageLimit, DEFAULT_MAX_ITEMS_PER_PAGE); - } - - public ExponentialPageLimits(int pageIndexOffset, int firstPageLimit, int maxItemsPerPage) { + public ExponentialLimits(int pageIndexOffset, int firstPageLimit, int maxItemsPerPage) { this.pageIndexOffset = pageIndexOffset; this.firstPageLimit = firstPageLimit; this.maxItemsPerPage = maxItemsPerPage; } - @Override public int getPageLimit(int pageIndex) { var offsetedIndex = pageIndex + pageIndexOffset; var power = 0b1L << offsetedIndex; @@ -40,6 +27,16 @@ public class ExponentialPageLimits implements PageLimits { var min = Math.max(firstPageLimit, Math.min(maxItemsPerPage, firstPageLimit + power)); assert min > 0; - return LuceneUtils.safeLongToInt(min); + return safeLongToInt(min); + } + + private static int safeLongToInt(long l) { + if (l > 2147483630) { + return 2147483630; + } else if (l < -2147483630) { + return -2147483630; + } else { + return (int) l; + } } } diff --git a/src/main/java/it/cavallium/dbengine/utils/IntListJsonAdapter.java b/src/main/java/it/cavallium/dbengine/utils/IntListJsonAdapter.java deleted file mode 100644 index dfd6933..0000000 --- a/src/main/java/it/cavallium/dbengine/utils/IntListJsonAdapter.java +++ /dev/null @@ -1,39 +0,0 @@ -package it.cavallium.dbengine.utils; - -import com.squareup.moshi.JsonAdapter; -import com.squareup.moshi.JsonReader; -import com.squareup.moshi.JsonWriter; -import it.unimi.dsi.fastutil.ints.IntArrayList; -import it.unimi.dsi.fastutil.ints.IntList; -import it.unimi.dsi.fastutil.ints.IntLists; -import java.io.IOException; -import org.jetbrains.annotations.NotNull; -import org.jetbrains.annotations.Nullable; - -public class IntListJsonAdapter extends JsonAdapter { - - @Override - public @NotNull IntList fromJson(@NotNull JsonReader reader) throws IOException { - reader.beginArray(); - IntArrayList modifiableOutput = new IntArrayList(); - while (reader.hasNext()) { - modifiableOutput.add(reader.nextInt()); - } - reader.endArray(); - return IntLists.unmodifiable(modifiableOutput); - } - - @Override - public void toJson(@NotNull JsonWriter writer, @Nullable IntList value) throws IOException { - if (value == null) { - writer.nullValue(); - return; - } - - writer.beginArray(); - for (int i = 0; i < value.size(); i++) { - writer.value((long) value.getInt(i)); - } - writer.endArray(); - } -} diff --git a/src/main/java/it/cavallium/dbengine/utils/LongListJsonAdapter.java b/src/main/java/it/cavallium/dbengine/utils/LongListJsonAdapter.java deleted file mode 100644 index cd2f8f2..0000000 --- a/src/main/java/it/cavallium/dbengine/utils/LongListJsonAdapter.java +++ /dev/null @@ -1,39 +0,0 @@ -package it.cavallium.dbengine.utils; - -import com.squareup.moshi.JsonAdapter; -import com.squareup.moshi.JsonReader; -import com.squareup.moshi.JsonWriter; -import it.unimi.dsi.fastutil.longs.LongArrayList; -import it.unimi.dsi.fastutil.longs.LongList; -import it.unimi.dsi.fastutil.longs.LongLists; -import java.io.IOException; -import org.jetbrains.annotations.NotNull; -import org.jetbrains.annotations.Nullable; - -public class LongListJsonAdapter extends JsonAdapter { - - @Override - public @NotNull LongList fromJson(@NotNull JsonReader reader) throws IOException { - reader.beginArray(); - LongArrayList modifiableOutput = new LongArrayList(); - while (reader.hasNext()) { - modifiableOutput.add(reader.nextLong()); - } - reader.endArray(); - return LongLists.unmodifiable(modifiableOutput); - } - - @Override - public void toJson(@NotNull JsonWriter writer, @Nullable LongList value) throws IOException { - if (value == null) { - writer.nullValue(); - return; - } - - writer.beginArray(); - for (int i = 0; i < value.size(); i++) { - writer.value(value.getLong(i)); - } - writer.endArray(); - } -} diff --git a/src/main/java/it/cavallium/dbengine/utils/ShortListJsonAdapter.java b/src/main/java/it/cavallium/dbengine/utils/ShortListJsonAdapter.java deleted file mode 100644 index a0f30f3..0000000 --- a/src/main/java/it/cavallium/dbengine/utils/ShortListJsonAdapter.java +++ /dev/null @@ -1,39 +0,0 @@ -package it.cavallium.dbengine.utils; - -import com.squareup.moshi.JsonAdapter; -import com.squareup.moshi.JsonReader; -import com.squareup.moshi.JsonWriter; -import it.unimi.dsi.fastutil.shorts.ShortArrayList; -import it.unimi.dsi.fastutil.shorts.ShortList; -import it.unimi.dsi.fastutil.shorts.ShortLists; -import java.io.IOException; -import org.jetbrains.annotations.NotNull; -import org.jetbrains.annotations.Nullable; - -public class ShortListJsonAdapter extends JsonAdapter { - - @Override - public @NotNull ShortList fromJson(@NotNull JsonReader reader) throws IOException { - reader.beginArray(); - ShortArrayList modifiableOutput = new ShortArrayList(); - while (reader.hasNext()) { - modifiableOutput.add((short) reader.nextInt()); - } - reader.endArray(); - return ShortLists.unmodifiable(modifiableOutput); - } - - @Override - public void toJson(@NotNull JsonWriter writer, @Nullable ShortList value) throws IOException { - if (value == null) { - writer.nullValue(); - return; - } - - writer.beginArray(); - for (int i = 0; i < value.size(); i++) { - writer.value((long) value.getShort(i)); - } - writer.endArray(); - } -} diff --git a/src/main/java/it/cavallium/dbengine/utils/StreamUtils.java b/src/main/java/it/cavallium/dbengine/utils/StreamUtils.java index 58d6dc3..953ea12 100644 --- a/src/main/java/it/cavallium/dbengine/utils/StreamUtils.java +++ b/src/main/java/it/cavallium/dbengine/utils/StreamUtils.java @@ -42,8 +42,6 @@ import org.jetbrains.annotations.Nullable; public class StreamUtils { - public static final ForkJoinPool LUCENE_POOL = newNamedForkJoinPool("Lucene", false); - private static final Collector TO_LIST_FAKE_COLLECTOR = new FakeCollector(); private static final Collector COUNT_FAKE_COLLECTOR = new FakeCollector(); private static final Collector FIRST_FAKE_COLLECTOR = new FakeCollector(); diff --git a/src/main/java/module-info.java b/src/main/java/module-info.java index e49b9f9..e49ff1e 100644 --- a/src/main/java/module-info.java +++ b/src/main/java/module-info.java @@ -1,5 +1,4 @@ module dbengine { - exports it.cavallium.dbengine.lucene; exports it.cavallium.dbengine.database; exports it.cavallium.dbengine.rpc.current.data; exports it.cavallium.dbengine.database.remote; @@ -8,16 +7,12 @@ module dbengine { exports it.cavallium.dbengine.database.serialization; exports it.cavallium.dbengine.client; exports it.cavallium.dbengine.client.query.current.data; - exports it.cavallium.dbengine.lucene.collector; - exports it.cavallium.dbengine.lucene.searcher; exports it.cavallium.dbengine.database.collections; - exports it.cavallium.dbengine.lucene.analyzer; exports it.cavallium.dbengine.client.query; exports it.cavallium.dbengine.database.memory; opens it.cavallium.dbengine.database.remote; exports it.cavallium.dbengine.utils; exports it.cavallium.dbengine.database.disk.rocksdb; - exports it.cavallium.dbengine.lucene.hugepq.search; requires org.jetbrains.annotations; requires com.google.common; requires micrometer.core; @@ -27,19 +22,9 @@ module dbengine { requires it.unimi.dsi.fastutil; requires it.cavallium.datagen; requires java.logging; - requires org.apache.lucene.core; requires org.apache.commons.lang3; requires java.compiler; - requires org.apache.lucene.analysis.common; - requires org.apache.lucene.misc; - requires org.apache.lucene.codecs; - requires org.apache.lucene.backward_codecs; - requires lucene.relevance; - requires org.apache.lucene.facet; requires java.management; - requires com.ibm.icu; - requires org.apache.lucene.analysis.icu; - requires org.apache.lucene.queryparser; requires okio; requires moshi.records.reflect; requires moshi; diff --git a/src/test/java/it/cavallium/dbengine/tests/DbTestUtils.java b/src/test/java/it/cavallium/dbengine/tests/DbTestUtils.java index 1519de2..0da6b60 100644 --- a/src/test/java/it/cavallium/dbengine/tests/DbTestUtils.java +++ b/src/test/java/it/cavallium/dbengine/tests/DbTestUtils.java @@ -4,12 +4,9 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import it.cavallium.buffer.BufDataInput; import it.cavallium.buffer.BufDataOutput; -import it.cavallium.dbengine.client.LuceneIndex; -import it.cavallium.dbengine.client.LuceneIndexImpl; import it.cavallium.dbengine.database.LLDatabaseConnection; import it.cavallium.dbengine.database.LLDictionary; import it.cavallium.dbengine.database.LLKeyValueDatabase; -import it.cavallium.dbengine.database.LLLuceneIndex; import it.cavallium.dbengine.database.UpdateMode; import it.cavallium.dbengine.database.collections.DatabaseMapDictionary; import it.cavallium.dbengine.database.collections.DatabaseMapDictionaryDeep; @@ -26,12 +23,16 @@ import java.io.IOException; import java.nio.file.Path; import java.util.concurrent.atomic.AtomicReference; import java.util.function.Function; -import org.apache.lucene.util.IOSupplier; import org.jetbrains.annotations.NotNull; import org.junit.jupiter.api.Assertions; public class DbTestUtils { + @FunctionalInterface + public interface IOSupplier { + T get() throws IOException; + } + public static final String BIG_STRING = generateBigString(); public static final int MAX_IN_MEMORY_RESULT_ENTRIES = 8192; @@ -72,9 +73,6 @@ public class DbTestUtils { } public record TempDb(LLDatabaseConnection connection, LLKeyValueDatabase db, - LLLuceneIndex luceneSingle, - LLLuceneIndex luceneMulti, - SwappableLuceneSearcher swappableLuceneSearcher, Path path) {} public static void ensureNoLeaks() { @@ -91,10 +89,6 @@ public class DbTestUtils { return database.getDictionary(name, updateMode); } - public static LuceneIndex tempLuceneIndex(LLLuceneIndex index) { - return new LuceneIndexImpl<>(index, new StringIndicizer()); - } - public enum MapType { MAP, diff --git a/src/test/java/it/cavallium/dbengine/tests/LocalTemporaryDbGenerator.java b/src/test/java/it/cavallium/dbengine/tests/LocalTemporaryDbGenerator.java index bd5f51b..cb17e0b 100644 --- a/src/test/java/it/cavallium/dbengine/tests/LocalTemporaryDbGenerator.java +++ b/src/test/java/it/cavallium/dbengine/tests/LocalTemporaryDbGenerator.java @@ -1,32 +1,18 @@ package it.cavallium.dbengine.tests; -import static it.cavallium.dbengine.tests.DbTestUtils.MAX_IN_MEMORY_RESULT_ENTRIES; import static it.cavallium.dbengine.tests.DbTestUtils.ensureNoLeaks; import io.micrometer.core.instrument.simple.SimpleMeterRegistry; -import it.cavallium.datagen.nativedata.Nullableboolean; -import it.cavallium.datagen.nativedata.Nullabledouble; -import it.cavallium.datagen.nativedata.Nullableint; import it.cavallium.dbengine.tests.DbTestUtils.TempDb; import it.cavallium.dbengine.client.DefaultDatabaseOptions; -import it.cavallium.dbengine.client.IndicizerAnalyzers; -import it.cavallium.dbengine.client.IndicizerSimilarities; import it.cavallium.dbengine.database.ColumnUtils; import it.cavallium.dbengine.database.LLDatabaseConnection; import it.cavallium.dbengine.database.disk.LLLocalDatabaseConnection; -import it.cavallium.dbengine.lucene.LuceneHacks; -import it.cavallium.dbengine.lucene.LuceneUtils; -import it.cavallium.dbengine.lucene.analyzer.TextFieldsAnalyzer; -import it.cavallium.dbengine.lucene.analyzer.TextFieldsSimilarity; -import it.cavallium.dbengine.rpc.current.data.ByteBuffersDirectory; -import it.cavallium.dbengine.rpc.current.data.LuceneOptions; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; -import java.time.Duration; import java.util.Comparator; import java.util.List; -import java.util.Map; import java.util.concurrent.CompletionException; import java.util.concurrent.atomic.AtomicInteger; @@ -34,19 +20,6 @@ public class LocalTemporaryDbGenerator implements TemporaryDbGenerator { private static final AtomicInteger dbId = new AtomicInteger(0); - private static final LuceneOptions LUCENE_OPTS = new LuceneOptions(Map.of(), - Duration.ofSeconds(5), - Duration.ofSeconds(5), - false, - new ByteBuffersDirectory(), - Nullableboolean.empty(), - Nullabledouble.empty(), - Nullableint.empty(), - Nullableboolean.empty(), - Nullableboolean.empty(), - MAX_IN_MEMORY_RESULT_ENTRIES, - LuceneUtils.getDefaultMergePolicy() - ); @Override public TempDb openTempDb() throws IOException { @@ -69,28 +42,11 @@ public class LocalTemporaryDbGenerator implements TemporaryDbGenerator { true ).connect(); - SwappableLuceneSearcher searcher = new SwappableLuceneSearcher(); - var luceneHacks = new LuceneHacks(() -> searcher, () -> searcher); return new TempDb(conn, conn.getDatabase("testdb", List.of(ColumnUtils.dictionary("testmap"), ColumnUtils.special("ints"), ColumnUtils.special("longs")), DefaultDatabaseOptions.builder().build() ), - conn.getLuceneIndex("testluceneindex1", - LuceneUtils.singleStructure(), - IndicizerAnalyzers.of(TextFieldsAnalyzer.ICUCollationKey), - IndicizerSimilarities.of(TextFieldsSimilarity.Boolean), - LUCENE_OPTS, - luceneHacks - ), - conn.getLuceneIndex("testluceneindex16", - LuceneUtils.shardsStructure(3), - IndicizerAnalyzers.of(TextFieldsAnalyzer.ICUCollationKey), - IndicizerSimilarities.of(TextFieldsSimilarity.Boolean), - LUCENE_OPTS, - luceneHacks - ), - searcher, wrkspcPath ); } @@ -99,9 +55,6 @@ public class LocalTemporaryDbGenerator implements TemporaryDbGenerator { public void closeTempDb(TempDb tempDb) throws IOException { tempDb.db().close(); tempDb.connection().disconnect(); - tempDb.swappableLuceneSearcher().close(); - tempDb.luceneMulti().close(); - tempDb.luceneSingle().close(); ensureNoLeaks(); if (Files.exists(tempDb.path())) { try (var walk = Files.walk(tempDb.path())) { diff --git a/src/test/java/it/cavallium/dbengine/tests/LuceneGeneratorTest.java b/src/test/java/it/cavallium/dbengine/tests/LuceneGeneratorTest.java deleted file mode 100644 index 307c125..0000000 --- a/src/test/java/it/cavallium/dbengine/tests/LuceneGeneratorTest.java +++ /dev/null @@ -1,221 +0,0 @@ -package it.cavallium.dbengine.tests; - -import it.cavallium.dbengine.lucene.ExponentialPageLimits; -import it.cavallium.dbengine.lucene.searcher.LocalQueryParams; -import it.cavallium.dbengine.lucene.searcher.LuceneGenerator; -import it.unimi.dsi.fastutil.longs.LongList; -import java.io.IOException; -import java.time.Duration; -import java.util.Comparator; -import java.util.List; -import java.util.Objects; -import java.util.stream.Collectors; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.standard.StandardAnalyzer; -import org.apache.lucene.document.Field.Store; -import org.apache.lucene.document.LongPoint; -import org.apache.lucene.document.StringField; -import org.apache.lucene.document.TextField; -import org.apache.lucene.index.DirectoryReader; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.index.IndexableField; -import org.apache.lucene.index.Term; -import org.apache.lucene.queryparser.classic.ParseException; -import org.apache.lucene.queryparser.classic.QueryParser; -import it.cavallium.dbengine.lucene.hugepq.search.CustomHitsThresholdChecker; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.ScoreDoc; -import org.apache.lucene.search.Sort; -import org.apache.lucene.search.TermQuery; -import org.apache.lucene.store.ByteBuffersDirectory; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; - -public class LuceneGeneratorTest { - - private static IndexSearcher is; - private static ExponentialPageLimits pageLimits; - private static IndexWriter iw; - - @BeforeAll - public static void beforeAll() throws IOException { - ByteBuffersDirectory dir = new ByteBuffersDirectory(); - Analyzer analyzer = new StandardAnalyzer(); - iw = new IndexWriter(dir, new IndexWriterConfig(analyzer)); - iw.addDocument(List.of(new LongPoint("position", 1, 1))); - iw.addDocument(List.of(new LongPoint("position", 2, 3))); - iw.addDocument(List.of(new LongPoint("position", 4, -1))); - iw.addDocument(List.of(new LongPoint("position", 3, -54))); - // Exactly 4 dummies - iw.addDocument(List.of(new StringField("dummy", "dummy", Store.NO))); - iw.addDocument(List.of(new StringField("dummy", "dummy", Store.NO))); - iw.addDocument(List.of(new StringField("dummy", "dummy", Store.NO))); - iw.addDocument(List.of(new StringField("dummy", "dummy", Store.NO))); - // texts - iw.addDocument(List.of(new TextField("text", "prova abc", Store.YES))); - iw.addDocument(List.of(new TextField("text", "prova mario", Store.YES))); - iw.addDocument(List.of(new TextField("text", "luigi provi def", Store.YES))); - iw.addDocument(List.of(new TextField("text", "abc provo prova", Store.YES))); - iw.addDocument(List.of(new TextField("text", "prova abd", Store.YES))); - iw.addDocument(List.of(new TextField("text", "la prova abc def", Store.YES))); - iw.commit(); - - DirectoryReader ir = DirectoryReader.open(iw); - is = new IndexSearcher(ir); - pageLimits = new ExponentialPageLimits(); - } - - @Test - public void testPosition() throws IOException { - var query = LongPoint.newRangeQuery("position", - LongList.of(1, -1).toLongArray(), - LongList.of(2, 3).toLongArray() - ); - int limit = Integer.MAX_VALUE; - var localQueryParams = new LocalQueryParams(query, 0, limit, pageLimits, Duration.ofDays(1)); - - var expectedResults = fixResults(localQueryParams.isSorted(), - localQueryParams.needsScores(), List.of(is.search(query, limit).scoreDocs)); - - var reactiveGenerator = LuceneGenerator.reactive(is, localQueryParams, -1); - var results = fixResults(localQueryParams.isSorted(), - localQueryParams.needsScores(), reactiveGenerator.toList()); - - Assertions.assertNotEquals(0, results.size()); - - Assertions.assertEquals(expectedResults, results); - } - - @Test - public void testTextSearch() throws IOException, ParseException { - QueryParser queryParser = new QueryParser("text", iw.getAnalyzer()); - Query query = queryParser.parse("prova~10 abc~10"); - int limit = Integer.MAX_VALUE; - var localQueryParams = new LocalQueryParams(query, 0, limit, pageLimits, Duration.ofDays(1)); - - var expectedResults = fixResults(localQueryParams.isSorted(), - localQueryParams.needsScores(), List.of(is.search(query, limit).scoreDocs)); - - var reactiveGenerator = LuceneGenerator.reactive(is, localQueryParams, -1); - var results = fixResults(localQueryParams.isSorted(), localQueryParams.needsScores(), reactiveGenerator.toList()); - - Assertions.assertNotEquals(0, results.size()); - - Assertions.assertEquals(expectedResults, results); - } - - @Test - public void testLimit0() { - var query = LongPoint.newRangeQuery("position", - LongList.of(1, -1).toLongArray(), - LongList.of(2, 3).toLongArray() - ); - var limitThresholdChecker = CustomHitsThresholdChecker.create(0); - var reactiveGenerator = LuceneGenerator.reactive(is, - new LocalQueryParams(query, - 0L, - 0, - pageLimits, - null, - null, - Duration.ofDays(1) - ), - -1 - ); - var results = reactiveGenerator.toList(); - - Assertions.assertNotNull(results); - Assertions.assertEquals(0, results.size()); - } - - @Test - public void testLimitRestrictingResults() { - var query = new TermQuery(new Term("dummy", "dummy")); - int limit = 3; // the number of dummies - 1 - var reactiveGenerator = LuceneGenerator.reactive(is, - new LocalQueryParams(query, - 0L, - limit, - pageLimits, - null, - null, - Duration.ofDays(1) - ), - -1 - ); - var results = reactiveGenerator.toList(); - - Assertions.assertNotNull(results); - Assertions.assertEquals(limit, results.size()); - } - - @Test - public void testDummies() throws IOException { - var query = new TermQuery(new Term("dummy", "dummy")); - int limit = Integer.MAX_VALUE; - var localQueryParams = new LocalQueryParams(query, 0, limit, pageLimits, null, true, Duration.ofDays(1)); - - var expectedResults = fixResults(localQueryParams.isSorted(), localQueryParams.needsScores(), - List.of(is.search(query, limit, Sort.INDEXORDER, false).scoreDocs)); - - var reactiveGenerator = LuceneGenerator.reactive(is, localQueryParams, -1); - var results = fixResults(localQueryParams.isSorted(), - localQueryParams.needsScores(), reactiveGenerator.toList()); - - Assertions.assertEquals(4, results.size()); - Assertions.assertEquals(expectedResults, results); - } - - private List fixResults(boolean sorted, boolean needsScores, List results) { - Assertions.assertNotNull(results); - var s = results.stream().map(scoreDoc -> needsScores ? new MyScoreDoc(scoreDoc) : new UnscoredScoreDoc(scoreDoc)); - - if (!sorted) { - s = s.sorted(Comparator.comparingInt(d -> d.doc)); - } - return s.collect(Collectors.toList()); - } - - private static class MyScoreDoc extends ScoreDoc { - - public MyScoreDoc(ScoreDoc scoreDoc) { - super(scoreDoc.doc, scoreDoc.score, scoreDoc.shardIndex); - } - - @Override - public boolean equals(Object obj) { - if (obj == this) { - return true; - } - if (obj instanceof ScoreDoc sd) { - return Objects.equals(this.score, sd.score) - && Objects.equals(this.doc, sd.doc) - && this.shardIndex == sd.shardIndex; - } - return false; - } - } - - private static class UnscoredScoreDoc extends ScoreDoc { - - public UnscoredScoreDoc(ScoreDoc scoreDoc) { - super(scoreDoc.doc, scoreDoc.score, scoreDoc.shardIndex); - } - - @Override - public boolean equals(Object obj) { - if (obj == this) { - return true; - } - if (obj instanceof ScoreDoc sd) { - return Objects.equals(this.doc, sd.doc) - && this.shardIndex == sd.shardIndex; - } - return false; - } - } - -} diff --git a/src/test/java/it/cavallium/dbengine/tests/MemoryTemporaryDbGenerator.java b/src/test/java/it/cavallium/dbengine/tests/MemoryTemporaryDbGenerator.java index d7335f8..26af874 100644 --- a/src/test/java/it/cavallium/dbengine/tests/MemoryTemporaryDbGenerator.java +++ b/src/test/java/it/cavallium/dbengine/tests/MemoryTemporaryDbGenerator.java @@ -1,70 +1,24 @@ package it.cavallium.dbengine.tests; -import static it.cavallium.dbengine.tests.DbTestUtils.MAX_IN_MEMORY_RESULT_ENTRIES; - import io.micrometer.core.instrument.simple.SimpleMeterRegistry; -import it.cavallium.datagen.nativedata.Nullableboolean; -import it.cavallium.datagen.nativedata.Nullabledouble; -import it.cavallium.datagen.nativedata.Nullableint; import it.cavallium.dbengine.tests.DbTestUtils.TempDb; import it.cavallium.dbengine.client.DefaultDatabaseOptions; -import it.cavallium.dbengine.client.IndicizerAnalyzers; -import it.cavallium.dbengine.client.IndicizerSimilarities; import it.cavallium.dbengine.database.ColumnUtils; import it.cavallium.dbengine.database.memory.LLMemoryDatabaseConnection; -import it.cavallium.dbengine.lucene.LuceneHacks; -import it.cavallium.dbengine.lucene.LuceneUtils; -import it.cavallium.dbengine.lucene.analyzer.TextFieldsAnalyzer; -import it.cavallium.dbengine.lucene.analyzer.TextFieldsSimilarity; -import it.cavallium.dbengine.rpc.current.data.ByteBuffersDirectory; -import it.cavallium.dbengine.rpc.current.data.LuceneOptions; import java.io.IOException; -import java.time.Duration; import java.util.List; -import java.util.Map; public class MemoryTemporaryDbGenerator implements TemporaryDbGenerator { - private static final LuceneOptions LUCENE_OPTS = new LuceneOptions(Map.of(), - Duration.ofSeconds(5), - Duration.ofSeconds(5), - false, - new ByteBuffersDirectory(), - Nullableboolean.empty(), - Nullabledouble.empty(), - Nullableint.empty(), - Nullableboolean.empty(), - Nullableboolean.empty(), - MAX_IN_MEMORY_RESULT_ENTRIES, - LuceneUtils.getDefaultMergePolicy() - ); - @Override public TempDb openTempDb() { var conn = new LLMemoryDatabaseConnection(new SimpleMeterRegistry()); - SwappableLuceneSearcher searcher = new SwappableLuceneSearcher(); - var luceneHacks = new LuceneHacks(() -> searcher, () -> searcher); return new TempDb(conn, conn.getDatabase("testdb", List.of(ColumnUtils.dictionary("testmap"), ColumnUtils.special("ints"), ColumnUtils.special("longs")), DefaultDatabaseOptions.builder().build() ), - conn.getLuceneIndex("testluceneindex1", - LuceneUtils.singleStructure(), - IndicizerAnalyzers.of(TextFieldsAnalyzer.ICUCollationKey), - IndicizerSimilarities.of(TextFieldsSimilarity.Boolean), - LUCENE_OPTS, - luceneHacks - ), - conn.getLuceneIndex("testluceneindex16", - LuceneUtils.shardsStructure(3), - IndicizerAnalyzers.of(TextFieldsAnalyzer.ICUCollationKey), - IndicizerSimilarities.of(TextFieldsSimilarity.Boolean), - LUCENE_OPTS, - luceneHacks - ), - searcher, null ); } @@ -73,8 +27,5 @@ public class MemoryTemporaryDbGenerator implements TemporaryDbGenerator { public void closeTempDb(TempDb tempDb) throws IOException { tempDb.db().close(); tempDb.connection().disconnect(); - tempDb.swappableLuceneSearcher().close(); - tempDb.luceneMulti().close(); - tempDb.luceneSingle().close(); } } diff --git a/src/test/java/it/cavallium/dbengine/tests/PriorityQueueAdaptor.java b/src/test/java/it/cavallium/dbengine/tests/PriorityQueueAdaptor.java deleted file mode 100644 index fa9903f..0000000 --- a/src/test/java/it/cavallium/dbengine/tests/PriorityQueueAdaptor.java +++ /dev/null @@ -1,81 +0,0 @@ -package it.cavallium.dbengine.tests; - -import it.cavallium.dbengine.database.DiscardingCloseable; -import it.cavallium.dbengine.lucene.PriorityQueue; -import it.cavallium.dbengine.utils.SimpleResource; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Objects; -import java.util.stream.Stream; -import org.apache.lucene.search.HitQueue; - -public class PriorityQueueAdaptor extends SimpleResource implements PriorityQueue, DiscardingCloseable { - - private final org.apache.lucene.util.PriorityQueue hitQueue; - - public PriorityQueueAdaptor(org.apache.lucene.util.PriorityQueue hitQueue) { - this.hitQueue = hitQueue; - } - - @Override - public void add(T element) { - hitQueue.add(element); - hitQueue.updateTop(); - } - - @Override - public T top() { - hitQueue.updateTop(); - return hitQueue.top(); - } - - @Override - public T pop() { - var popped = hitQueue.pop(); - hitQueue.updateTop(); - return popped; - } - - @Override - public void replaceTop(T oldTop, T newTop) { - assert Objects.equals(oldTop, hitQueue.top()); - hitQueue.updateTop(newTop); - } - - @Override - public long size() { - return hitQueue.size(); - } - - @Override - public void clear() { - hitQueue.clear(); - } - - @Override - public boolean remove(T element) { - var removed = hitQueue.remove(element); - hitQueue.updateTop(); - return removed; - } - - @Override - public Stream iterate() { - List items = new ArrayList<>(hitQueue.size()); - T item; - while ((item = hitQueue.pop()) != null) { - items.add(item); - } - for (T t : items) { - hitQueue.insertWithOverflow(t); - } - return items.stream(); - } - - @Override - protected void onClose() { - hitQueue.clear(); - hitQueue.updateTop(); - } -} diff --git a/src/test/java/it/cavallium/dbengine/tests/StringIndicizer.java b/src/test/java/it/cavallium/dbengine/tests/StringIndicizer.java deleted file mode 100644 index 7d35916..0000000 --- a/src/test/java/it/cavallium/dbengine/tests/StringIndicizer.java +++ /dev/null @@ -1,66 +0,0 @@ -package it.cavallium.dbengine.tests; - -import com.google.common.primitives.Ints; -import com.google.common.primitives.Longs; -import it.cavallium.dbengine.client.Indicizer; -import it.cavallium.dbengine.database.LLUpdateDocument; -import it.cavallium.dbengine.database.LLItem; -import it.cavallium.dbengine.database.LLTerm; -import it.cavallium.dbengine.lucene.analyzer.TextFieldsAnalyzer; -import it.cavallium.dbengine.lucene.analyzer.TextFieldsSimilarity; -import it.cavallium.dbengine.rpc.current.data.IndicizerAnalyzers; -import it.cavallium.dbengine.rpc.current.data.IndicizerSimilarities; -import java.util.LinkedList; -import org.apache.lucene.document.Field; -import org.apache.lucene.document.Field.Store; -import org.apache.lucene.index.IndexableField; -import org.apache.lucene.util.BytesRef; -import org.jetbrains.annotations.NotNull; - -public class StringIndicizer extends Indicizer { - - @Override - public @NotNull LLUpdateDocument toIndexRequest(@NotNull String key, @NotNull String value) { - var fields = new LinkedList(); - fields.add(LLItem.newStringField("uid", key, Field.Store.YES)); - fields.add(LLItem.newTextField("text", value, Store.NO)); - @SuppressWarnings("UnstableApiUsage") - var numInt = Ints.tryParse(value); - if (numInt != null) { - fields.add(LLItem.newIntPoint("intpoint", numInt)); - fields.add(LLItem.newNumericDocValuesField("intsort", numInt)); - } - @SuppressWarnings("UnstableApiUsage") - var numLong = Longs.tryParse(value); - if (numLong != null) { - fields.add(LLItem.newLongPoint("longpoint", numLong)); - fields.add(LLItem.newNumericDocValuesField("longsort", numLong)); - } - return new LLUpdateDocument(fields); - } - - @Override - public @NotNull LLTerm toIndex(@NotNull String key) { - return new LLTerm("uid", key); - } - - @Override - public @NotNull String getKeyFieldName() { - return "uid"; - } - - @Override - public @NotNull String getKey(IndexableField key) { - return key.stringValue(); - } - - @Override - public IndicizerAnalyzers getPerFieldAnalyzer() { - return it.cavallium.dbengine.client.IndicizerAnalyzers.of(TextFieldsAnalyzer.ICUCollationKey); - } - - @Override - public IndicizerSimilarities getPerFieldSimilarity() { - return it.cavallium.dbengine.client.IndicizerSimilarities.of(TextFieldsSimilarity.BM25Standard); - } -} diff --git a/src/test/java/it/cavallium/dbengine/tests/SwappableLuceneSearcher.java b/src/test/java/it/cavallium/dbengine/tests/SwappableLuceneSearcher.java deleted file mode 100644 index 7530a25..0000000 --- a/src/test/java/it/cavallium/dbengine/tests/SwappableLuceneSearcher.java +++ /dev/null @@ -1,85 +0,0 @@ -package it.cavallium.dbengine.tests; - -import static java.util.Objects.requireNonNull; - -import it.cavallium.dbengine.database.LLKeyScore; -import it.cavallium.dbengine.database.disk.LLIndexSearcher; -import it.cavallium.dbengine.database.disk.LLIndexSearchers; -import it.cavallium.dbengine.lucene.searcher.GlobalQueryRewrite; -import it.cavallium.dbengine.lucene.searcher.LocalQueryParams; -import it.cavallium.dbengine.lucene.searcher.LocalSearcher; -import it.cavallium.dbengine.lucene.searcher.MultiSearcher; -import it.cavallium.dbengine.lucene.searcher.LuceneSearchResult; -import java.io.Closeable; -import java.io.IOException; -import java.util.concurrent.atomic.AtomicReference; -import java.util.function.Function; -import java.util.stream.Stream; -import org.jetbrains.annotations.Nullable; - -public class SwappableLuceneSearcher implements LocalSearcher, MultiSearcher, Closeable { - - private final AtomicReference single = new AtomicReference<>(null); - private final AtomicReference multi = new AtomicReference<>(null); - - public SwappableLuceneSearcher() { - - } - - @Override - public LuceneSearchResult collect(LLIndexSearcher indexSearcher, - LocalQueryParams queryParams, - @Nullable String keyFieldName, - GlobalQueryRewrite transformer, - Function, Stream> filterer) { - var single = this.single.get(); - if (single == null) { - single = this.multi.get(); - } - requireNonNull(single, "LuceneLocalSearcher not set"); - return single.collect(indexSearcher, queryParams, keyFieldName, transformer, filterer); - } - - @Override - public String toString() { - var single = this.single.get(); - var multi = this.multi.get(); - if (single == multi) { - if (single == null) { - return "swappable"; - } else { - return single.toString(); - } - } else { - return "swappable[single=" + single.toString() + ",multi=" + multi.toString() + "]"; - } - } - - @Override - public LuceneSearchResult collectMulti(LLIndexSearchers indexSearchers, - LocalQueryParams queryParams, - @Nullable String keyFieldName, - GlobalQueryRewrite transformer, - Function, Stream> filterer) { - var multi = requireNonNull(this.multi.get(), "LuceneMultiSearcher not set"); - return multi.collectMulti(indexSearchers, queryParams, keyFieldName, transformer, filterer); - } - - public void setSingle(LocalSearcher single) { - this.single.set(single); - } - - public void setMulti(MultiSearcher multi) { - this.multi.set(multi); - } - - @Override - public void close() throws IOException { - if (this.single.get() instanceof Closeable closeable) { - closeable.close(); - } - if (this.multi.get() instanceof Closeable closeable) { - closeable.close(); - } - } -} diff --git a/src/test/java/it/cavallium/dbengine/tests/TestAlignedRead.java b/src/test/java/it/cavallium/dbengine/tests/TestAlignedRead.java deleted file mode 100644 index e903c34..0000000 --- a/src/test/java/it/cavallium/dbengine/tests/TestAlignedRead.java +++ /dev/null @@ -1,22 +0,0 @@ -package it.cavallium.dbengine.tests; - -import it.cavallium.dbengine.lucene.DirectNIOFSDirectory; -import it.cavallium.dbengine.lucene.LuceneUtils; -import java.io.IOException; -import java.nio.ByteBuffer; -import java.nio.channels.FileChannel; -import java.nio.file.Files; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.Test; - -public class TestAlignedRead { - @Test - public void testAlignment() { - Assertions.assertEquals(0, LuceneUtils.alignUnsigned(0, true)); - Assertions.assertEquals(0, LuceneUtils.alignUnsigned(0, false)); - Assertions.assertEquals(4096, LuceneUtils.alignUnsigned(1, true)); - Assertions.assertEquals(0, LuceneUtils.alignUnsigned(1, false)); - Assertions.assertEquals(4096, LuceneUtils.alignUnsigned(4096, true)); - Assertions.assertEquals(4096, LuceneUtils.alignUnsigned(4096, false)); - } -} diff --git a/src/test/java/it/cavallium/dbengine/tests/TestLuceneIndex.java b/src/test/java/it/cavallium/dbengine/tests/TestLuceneIndex.java deleted file mode 100644 index c9bbd0f..0000000 --- a/src/test/java/it/cavallium/dbengine/tests/TestLuceneIndex.java +++ /dev/null @@ -1,239 +0,0 @@ -package it.cavallium.dbengine.tests; - -import static it.cavallium.dbengine.tests.DbTestUtils.MAX_IN_MEMORY_RESULT_ENTRIES; -import static it.cavallium.dbengine.tests.DbTestUtils.ensureNoLeaks; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.fail; - -import it.cavallium.dbengine.tests.DbTestUtils.TempDb; -import it.cavallium.buffer.Buf; -import it.cavallium.dbengine.client.LuceneIndex; -import it.cavallium.dbengine.client.Sort; -import it.cavallium.dbengine.client.query.current.data.MatchAllDocsQuery; -import it.cavallium.dbengine.database.LLLuceneIndex; -import it.cavallium.dbengine.database.LLScoreMode; -import it.cavallium.dbengine.lucene.searcher.AdaptiveLocalSearcher; -import it.cavallium.dbengine.lucene.searcher.AdaptiveMultiSearcher; -import it.cavallium.dbengine.lucene.searcher.CountMultiSearcher; -import it.cavallium.dbengine.lucene.searcher.LocalSearcher; -import it.cavallium.dbengine.lucene.searcher.MultiSearcher; -import java.io.IOException; -import java.util.List; -import java.util.Objects; -import java.util.stream.IntStream; -import java.util.stream.Stream; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import org.jetbrains.annotations.Nullable; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.Arguments; -import org.junit.jupiter.params.provider.MethodSource; - -public class TestLuceneIndex { - - private final Logger log = LogManager.getLogger(this.getClass()); - private TempDb tempDb; - private LLLuceneIndex luceneSingle; - private LLLuceneIndex luceneMulti; - - protected TemporaryDbGenerator getTempDbGenerator() { - return new MemoryTemporaryDbGenerator(); - } - - @BeforeAll - public static void beforeAll() throws IOException { - } - - @BeforeEach - public void beforeEach() throws IOException { - ensureNoLeaks(); - tempDb = Objects.requireNonNull(getTempDbGenerator().openTempDb(), "TempDB"); - luceneSingle = tempDb.luceneSingle(); - luceneMulti = tempDb.luceneMulti(); - } - - public static Stream provideArguments() { - return Stream.of(false, true).map(Arguments::of); - } - - private static final List multi = List.of(false, true); - private static final List scoreModes = List.of(LLScoreMode.NO_SCORES, - LLScoreMode.TOP_SCORES, - LLScoreMode.COMPLETE_NO_SCORES, - LLScoreMode.COMPLETE - ); - private static final List multiSort = List.of(Sort.score(), - Sort.random(), - Sort.no(), - Sort.doc(), - Sort.numeric("longsort", false), - Sort.numeric("longsort", true) - ); - - record Tuple2(X getT1, Y getT2) { - - public Object[] toArray() { - return new Object[] {getT1, getT2}; - } - } - record Tuple3(X getT1, Y getT2, Z getT3) { - - public Object[] toArray() { - return new Object[] {getT1, getT2, getT3}; - } - } - record Tuple4(X getT1, Y getT2, Z getT3, W getT4) { - - public Object[] toArray() { - return new Object[] {getT1, getT2, getT3, getT4}; - } - } - record Tuple5(X getT1, Y getT2, Z getT3, W getT4, X1 getT5) { - - public Object[] toArray() { - return new Object[] {getT1, getT2, getT3, getT4, getT5}; - } - } - - public static Stream provideQueryArgumentsScoreMode() { - return multi.stream() - .flatMap(shard -> scoreModes.stream().map(scoreMode -> new Tuple2<>(shard, scoreMode))) - .map(tuple -> Arguments.of(tuple.toArray())); - } - - public static Stream provideQueryArgumentsSort() { - return multi.stream() - .flatMap(shard -> multiSort.stream().map(multiSort -> new Tuple2<>(shard, multiSort))) - .map(tuple -> Arguments.of(tuple.toArray())); - } - - public static Stream provideQueryArgumentsScoreModeAndSort() { - return multi.stream() - .flatMap(shard -> scoreModes.stream().map(scoreMode -> new Tuple2<>(shard, scoreMode))) - .flatMap(tuple -> multiSort.stream().map(multiSort -> new Tuple3<>(tuple.getT1(), tuple.getT2(), multiSort))) - .map(tuple -> Arguments.of(tuple.toArray())); - } - - @AfterEach - public void afterEach() throws IOException { - getTempDbGenerator().closeTempDb(tempDb); - ensureNoLeaks(); - } - - @AfterAll - public static void afterAll() throws IOException { - } - - private LuceneIndex getLuceneIndex(boolean shards, @Nullable LocalSearcher customSearcher) { - LuceneIndex index = DbTestUtils.tempLuceneIndex(shards ? luceneSingle : luceneMulti); - index.updateDocument("test-key-1", "0123456789"); - index.updateDocument("test-key-2", "test 0123456789 test word"); - index.updateDocument("test-key-3", "0123456789 test example string"); - index.updateDocument("test-key-4", "hello world the quick brown fox jumps over the lazy dog"); - index.updateDocument("test-key-5", "hello the quick brown fox jumps over the lazy dog"); - index.updateDocument("test-key-6", "hello the quick brown fox jumps over the world dog"); - index.updateDocument("test-key-7", "the quick brown fox jumps over the world dog"); - index.updateDocument("test-key-8", "the quick brown fox jumps over the lazy dog"); - index.updateDocument("test-key-9", "Example1"); - index.updateDocument("test-key-10", "Example2"); - index.updateDocument("test-key-11", "Example3"); - index.updateDocument("test-key-12", "-234"); - index.updateDocument("test-key-13", "2111"); - index.updateDocument("test-key-14", "2999"); - index.updateDocument("test-key-15", "3902"); - IntStream.rangeClosed(1, 1000).forEach(i -> index.updateDocument("test-key-" + (15 + i), "" + i)); - tempDb.swappableLuceneSearcher().setSingle(new CountMultiSearcher()); - tempDb.swappableLuceneSearcher().setMulti(new CountMultiSearcher()); - assertCount(index, 1000 + 15); - if (customSearcher != null) { - tempDb.swappableLuceneSearcher().setSingle(customSearcher); - if (shards) { - if (customSearcher instanceof MultiSearcher multiSearcher) { - tempDb.swappableLuceneSearcher().setMulti(multiSearcher); - } else { - throw new IllegalArgumentException("Expected a LuceneMultiSearcher, got a LuceneLocalSearcher: " + customSearcher.toString()); - } - } - } else { - tempDb.swappableLuceneSearcher().setSingle(new AdaptiveLocalSearcher(MAX_IN_MEMORY_RESULT_ENTRIES)); - tempDb.swappableLuceneSearcher().setMulti(new AdaptiveMultiSearcher(MAX_IN_MEMORY_RESULT_ENTRIES)); - } - return index; - } - - private void assertCount(LuceneIndex luceneIndex, long expected) { - Assertions.assertEquals(expected, getCount(luceneIndex)); - } - - private long getCount(LuceneIndex luceneIndex) { - luceneIndex.refresh(true); - var totalHitsCount = luceneIndex.count(null, new MatchAllDocsQuery()); - Assertions.assertTrue(totalHitsCount.exact(), "Can't get count because the total hits count is not exact"); - return totalHitsCount.value(); - } - - @Test - public void testNoOp() { - } - - @Test - public void testNoOpAllocation() { - for (int i = 0; i < 10; i++) { - var a = Buf.create(i * 512); - } - } - - @ParameterizedTest - @MethodSource("provideArguments") - public void testGetLuceneIndex(boolean shards) { - try (var luceneIndex = getLuceneIndex(shards, null)) { - Assertions.assertNotNull(luceneIndex); - } - } - - @ParameterizedTest - @MethodSource("provideArguments") - public void testDeleteAll(boolean shards) { - try (var luceneIndex = getLuceneIndex(shards, null)) { - luceneIndex.deleteAll(); - assertCount(luceneIndex, 0); - } - } - - @ParameterizedTest - @MethodSource("provideArguments") - public void testDelete(boolean shards) { - try (var luceneIndex = getLuceneIndex(shards, null)) { - var prevCount = getCount(luceneIndex); - luceneIndex.deleteDocument("test-key-1"); - assertCount(luceneIndex, prevCount - 1); - } - } - - @ParameterizedTest - @MethodSource("provideArguments") - public void testUpdateSameDoc(boolean shards) { - try (var luceneIndex = getLuceneIndex(shards, null)) { - var prevCount = getCount(luceneIndex); - luceneIndex.updateDocument("test-key-1", "new-value"); - assertCount(luceneIndex, prevCount); - } - } - - @ParameterizedTest - @MethodSource("provideArguments") - public void testUpdateNewDoc(boolean shards) { - try (var luceneIndex = getLuceneIndex(shards, null)) { - var prevCount = getCount(luceneIndex); - luceneIndex.updateDocument("test-key-new", "new-value"); - assertCount(luceneIndex, prevCount + 1); - } - } - -} diff --git a/src/test/java/it/cavallium/dbengine/tests/TestLuceneSearches.java b/src/test/java/it/cavallium/dbengine/tests/TestLuceneSearches.java deleted file mode 100644 index d84d111..0000000 --- a/src/test/java/it/cavallium/dbengine/tests/TestLuceneSearches.java +++ /dev/null @@ -1,345 +0,0 @@ -package it.cavallium.dbengine.tests; - -import static it.cavallium.dbengine.tests.DbTestUtils.MAX_IN_MEMORY_RESULT_ENTRIES; -import static it.cavallium.dbengine.tests.DbTestUtils.ensureNoLeaks; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.fail; - -import it.cavallium.dbengine.tests.DbTestUtils.TempDb; -import it.cavallium.dbengine.tests.TestLuceneIndex.Tuple2; -import it.cavallium.dbengine.client.HitKey; -import it.cavallium.dbengine.client.Hits; -import it.cavallium.dbengine.client.LuceneIndex; -import it.cavallium.dbengine.client.Sort; -import it.cavallium.dbengine.client.query.ClientQueryParams; -import it.cavallium.dbengine.client.query.ClientQueryParamsBuilder; -import it.cavallium.dbengine.client.query.current.data.BooleanQuery; -import it.cavallium.dbengine.client.query.current.data.BooleanQueryPart; -import it.cavallium.dbengine.client.query.current.data.BoostQuery; -import it.cavallium.dbengine.client.query.current.data.MatchAllDocsQuery; -import it.cavallium.dbengine.client.query.current.data.MatchNoDocsQuery; -import it.cavallium.dbengine.client.query.current.data.OccurMust; -import it.cavallium.dbengine.client.query.current.data.OccurShould; -import it.cavallium.dbengine.client.query.current.data.Term; -import it.cavallium.dbengine.client.query.current.data.TermQuery; -import it.cavallium.dbengine.client.query.current.data.TotalHitsCount; -import it.cavallium.dbengine.database.LLLuceneIndex; -import it.cavallium.dbengine.lucene.searcher.AdaptiveLocalSearcher; -import it.cavallium.dbengine.lucene.searcher.AdaptiveMultiSearcher; -import it.cavallium.dbengine.lucene.searcher.CountMultiSearcher; -import it.cavallium.dbengine.lucene.searcher.LocalSearcher; -import it.cavallium.dbengine.lucene.searcher.MultiSearcher; -import it.cavallium.dbengine.lucene.searcher.StandardSearcher; -import it.cavallium.dbengine.lucene.searcher.ScoredPagedMultiSearcher; -import it.cavallium.dbengine.lucene.searcher.PagedLocalSearcher; -import it.cavallium.dbengine.lucene.searcher.UnsortedStreamingMultiSearcher; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashSet; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.Objects; -import java.util.stream.Collectors; -import java.util.stream.IntStream; -import java.util.stream.Stream; -import org.apache.commons.lang3.function.FailableConsumer; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import org.jetbrains.annotations.Nullable; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.Arguments; -import org.junit.jupiter.params.provider.MethodSource; - -public class TestLuceneSearches { - - private static final Logger log = LogManager.getLogger(TestLuceneSearches.class); - private static final MemoryTemporaryDbGenerator TEMP_DB_GENERATOR = new MemoryTemporaryDbGenerator(); - - private static TempDb tempDb; - private static LLLuceneIndex luceneSingle; - private static LLLuceneIndex luceneMulti; - private static LuceneIndex multiIndex; - private static LuceneIndex localIndex; - - private static final Map ELEMENTS; - static { - var modifiableElements = new LinkedHashMap(); - modifiableElements.put("test-key-1", "0123456789"); - modifiableElements.put("test-key-2", "test 0123456789 test word"); - modifiableElements.put("test-key-3", "0123456789 test example string"); - modifiableElements.put("test-key-4", "hello world the quick brown fox jumps over the lazy dog"); - modifiableElements.put("test-key-5", "hello the quick brown fox jumps over the lazy dog"); - modifiableElements.put("test-key-6", "hello the quick brown fox jumps over the world dog"); - modifiableElements.put("test-key-7", "the quick brown fox jumps over the world dog"); - modifiableElements.put("test-key-8", "the quick brown fox jumps over the lazy dog"); - modifiableElements.put("test-key-9", "Example1"); - modifiableElements.put("test-key-10", "Example2"); - modifiableElements.put("test-key-11", "Example3"); - modifiableElements.put("test-key-12", "-234"); - modifiableElements.put("test-key-13", "2111"); - modifiableElements.put("test-key-14", "2999"); - modifiableElements.put("test-key-15", "3902"); - IntStream.rangeClosed(1, 1000).forEach(i -> modifiableElements.put("test-key-" + (15 + i), "" + i)); - ELEMENTS = Collections.unmodifiableMap(modifiableElements); - } - - @BeforeAll - public static void beforeAll() throws IOException { - ensureNoLeaks(); - tempDb = Objects.requireNonNull(TEMP_DB_GENERATOR.openTempDb(), "TempDB"); - luceneSingle = tempDb.luceneSingle(); - luceneMulti = tempDb.luceneMulti(); - - setUpIndex(true); - setUpIndex(false); - } - - private static void setUpIndex(boolean shards) { - LuceneIndex index = DbTestUtils.tempLuceneIndex(shards ? luceneSingle : luceneMulti); - - ELEMENTS.forEach(index::updateDocument); - tempDb.swappableLuceneSearcher().setSingle(new CountMultiSearcher()); - tempDb.swappableLuceneSearcher().setMulti(new CountMultiSearcher()); - assertCount(index, 1000 + 15); - if (shards) { - multiIndex = index; - } else { - localIndex = index; - } - } - - public static Stream provideArguments() { - return Stream.of(false, true).map(Arguments::of); - } - - private static final List multi = List.of(false, true); - private static final List multiSort = List.of( - Sort.score(), - //todo: fix random sort field - //Sort.randomSortField(), - Sort.no(), - Sort.doc(), - Sort.numeric("longsort", false), - Sort.numeric("longsort", true), - Sort.numeric("intsort", false), - Sort.numeric("intsort", true) - ); - - private static List getSearchers(ExpectedQueryType info) { - var sink = new ArrayList(); - if (info.shard()) { - if (info.onlyCount()) { - sink.add(new CountMultiSearcher()); - } else { - sink.add(new ScoredPagedMultiSearcher()); - if (!info.sorted()) { - sink.add(new UnsortedUnscoredSimpleMultiSearcher(new PagedLocalSearcher())); - sink.add(new UnsortedStreamingMultiSearcher()); - } - } - sink.add(new AdaptiveMultiSearcher(MAX_IN_MEMORY_RESULT_ENTRIES)); - } else { - if (info.onlyCount()) { - sink.add(new CountMultiSearcher()); - } else { - sink.add(new PagedLocalSearcher()); - } - sink.add(new AdaptiveLocalSearcher(MAX_IN_MEMORY_RESULT_ENTRIES)); - } - return sink; - } - - public static Stream provideQueryArgumentsScoreMode() { - return multi.stream().map(tuple -> Arguments.of(multi)); - } - - public static Stream provideQueryArgumentsScoreModeAndSort() { - return multi.stream() - .flatMap(multi -> multiSort.stream().map(multiSort -> new Tuple2<>(multi, multiSort))) - .map(tuple -> Arguments.of(tuple.toArray())); - } - - private static void runSearchers(ExpectedQueryType expectedQueryType, FailableConsumer consumer) - throws Throwable { - var searchers = getSearchers(expectedQueryType); - for (LocalSearcher searcher : searchers) { - log.info("Using searcher \"{}\"", searcher.toString()); - consumer.accept(searcher); - } - } - - @BeforeEach - public void beforeEach() { - } - - @AfterEach - public void afterEach() { - } - - @AfterAll - public static void afterAll() throws IOException { - TEMP_DB_GENERATOR.closeTempDb(tempDb); - ensureNoLeaks(); - } - - private LuceneIndex getLuceneIndex(boolean shards, @Nullable LocalSearcher customSearcher) { - if (customSearcher != null) { - tempDb.swappableLuceneSearcher().setSingle(customSearcher); - if (shards) { - if (customSearcher instanceof MultiSearcher multiSearcher) { - tempDb.swappableLuceneSearcher().setMulti(multiSearcher); - } else { - throw new IllegalArgumentException("Expected a LuceneMultiSearcher, got a LuceneLocalSearcher: " + customSearcher.toString()); - } - } - } else { - tempDb.swappableLuceneSearcher().setSingle(new AdaptiveLocalSearcher(MAX_IN_MEMORY_RESULT_ENTRIES)); - tempDb.swappableLuceneSearcher().setMulti(new AdaptiveMultiSearcher(MAX_IN_MEMORY_RESULT_ENTRIES)); - } - return shards ? multiIndex : localIndex; - } - - private static void assertCount(LuceneIndex luceneIndex, long expected) { - Assertions.assertEquals(expected, getCount(luceneIndex)); - } - - private static long getCount(LuceneIndex luceneIndex) { - luceneIndex.refresh(true); - var totalHitsCount = luceneIndex.count(null, new MatchAllDocsQuery()); - Assertions.assertTrue(totalHitsCount.exact(), "Can't get count because the total hits count is not exact"); - return totalHitsCount.value(); - } - - private boolean supportsPreciseHitsCount(LocalSearcher searcher, - ClientQueryParams query) { - var sorted = query.isSorted(); - if (searcher instanceof UnsortedStreamingMultiSearcher) { - return false; - } else if (!sorted) { - return !(searcher instanceof AdaptiveMultiSearcher) && !(searcher instanceof AdaptiveLocalSearcher); - } else { - return true; - } - } - - public void testSearch(ClientQueryParamsBuilder queryParamsBuilder, - ExpectedQueryType expectedQueryType) throws Throwable { - - runSearchers(expectedQueryType, searcher -> { - try (var luceneIndex1 = getLuceneIndex(expectedQueryType.shard(), searcher)) { - var query = queryParamsBuilder.build(); - var results = luceneIndex1.search(query); - var hits = results.totalHitsCount(); - var keys = getResults(results); - if (hits.exact()) { - Assertions.assertEquals(keys.size(), hits.value()); - } else { - Assertions.assertTrue(keys.size() >= hits.value()); - } - - var standardSearcher = new StandardSearcher(); - try (var luceneIndex2 = getLuceneIndex(expectedQueryType.shard(), standardSearcher)) { - var officialQuery = queryParamsBuilder.limit(ELEMENTS.size() * 2L).build(); - var officialResults = luceneIndex2.search(officialQuery); - var officialHits = officialResults.totalHitsCount(); - var officialKeys = getResults(officialResults); - if (officialHits.exact()) { - Assertions.assertEquals(officialKeys.size(), officialHits.value()); - } else { - Assertions.assertTrue(officialKeys.size() >= officialHits.value()); - } - - if (hits.exact() && officialHits.exact()) { - assertExactHits(officialHits.value(), hits); - } - - Assertions.assertEquals(officialKeys.size(), keys.size()); - - assertResults(officialKeys, keys, expectedQueryType.sorted(), expectedQueryType.sortedByScore()); - } - } - }); - } - - @ParameterizedTest - @MethodSource("provideQueryArgumentsScoreModeAndSort") - public void testSearchNoDocs(boolean shards, Sort multiSort) throws Throwable { - var queryBuilder = ClientQueryParams - .>builder() - .query(new MatchNoDocsQuery()) - .snapshot(null) - .computePreciseHitsCount(true) - .sort(multiSort); - - ExpectedQueryType expectedQueryType = new ExpectedQueryType(shards, multiSort, true, false); - testSearch(queryBuilder, expectedQueryType); - } - - @ParameterizedTest - @MethodSource("provideQueryArgumentsScoreModeAndSort") - public void testSearchAllDocs(boolean shards, Sort multiSort) throws Throwable { - var queryBuilder = ClientQueryParams - .>builder() - .query(new MatchAllDocsQuery()) - .snapshot(null) - .computePreciseHitsCount(true) - .sort(multiSort); - - ExpectedQueryType expectedQueryType = new ExpectedQueryType(shards, multiSort, true, false); - testSearch(queryBuilder, expectedQueryType); - } - - @ParameterizedTest - @MethodSource("provideQueryArgumentsScoreModeAndSort") - public void testSearchAdvancedText(boolean shards, Sort multiSort) throws Throwable { - var queryBuilder = ClientQueryParams - .builder() - .query(new BooleanQuery(List.of( - new BooleanQueryPart(new BoostQuery(new TermQuery(new Term("text", "hello")), 3), new OccurShould()), - new BooleanQueryPart(new TermQuery(new Term("text", "world")), new OccurShould()), - new BooleanQueryPart(new BoostQuery(new TermQuery(new Term("text", "hello")), 2), new OccurShould()), - new BooleanQueryPart(new BoostQuery(new TermQuery(new Term("text", "hello")), 100), new OccurShould()), - new BooleanQueryPart(new TermQuery(new Term("text", "hello")), new OccurMust()) - ), 1)) - .snapshot(null) - .computePreciseHitsCount(true) - .sort(multiSort); - - ExpectedQueryType expectedQueryType = new ExpectedQueryType(shards, multiSort, true, false); - testSearch(queryBuilder, expectedQueryType); - } - - private void assertResults(List expectedKeys, List resultKeys, boolean sorted, boolean sortedByScore) { - if (sortedByScore) { - Assertions.assertEquals(expectedKeys, resultKeys); - } else if (sorted) { - var results = resultKeys.stream().map(Scored::key).toList(); - Assertions.assertEquals(expectedKeys.stream().map(Scored::key).toList(), results); - } else { - var results = resultKeys.stream().map(Scored::key).collect(Collectors.toSet()); - Assertions.assertEquals(new HashSet<>(expectedKeys.stream().map(Scored::key).toList()), results); - } - } - - private void assertHitsIfPossible(long expectedCount, TotalHitsCount hits) { - if (hits.exact()) { - assertEquals(new TotalHitsCount(expectedCount, true), hits); - } - } - - private void assertExactHits(long expectedCount, TotalHitsCount hits) { - assertEquals(new TotalHitsCount(expectedCount, true), hits); - } - - private List getResults(Hits> results) { - return results.results().stream().map(key -> new Scored(key.key(), key.score())).toList(); - } - -} diff --git a/src/test/java/it/cavallium/dbengine/tests/UnsortedUnscoredSimpleMultiSearcher.java b/src/test/java/it/cavallium/dbengine/tests/UnsortedUnscoredSimpleMultiSearcher.java deleted file mode 100644 index 3270013..0000000 --- a/src/test/java/it/cavallium/dbengine/tests/UnsortedUnscoredSimpleMultiSearcher.java +++ /dev/null @@ -1,91 +0,0 @@ -package it.cavallium.dbengine.tests; - -import static it.cavallium.dbengine.lucene.searcher.GlobalQueryRewrite.NO_REWRITE; - -import com.google.common.collect.Streams; -import it.cavallium.dbengine.client.query.current.data.TotalHitsCount; -import it.cavallium.dbengine.database.LLKeyScore; -import it.cavallium.dbengine.database.disk.LLIndexSearchers; -import it.cavallium.dbengine.lucene.LuceneCloseable; -import it.cavallium.dbengine.lucene.LuceneUtils; -import it.cavallium.dbengine.lucene.searcher.GlobalQueryRewrite; -import it.cavallium.dbengine.lucene.searcher.LocalQueryParams; -import it.cavallium.dbengine.lucene.searcher.LocalSearcher; -import it.cavallium.dbengine.lucene.searcher.LuceneSearchResult; -import it.cavallium.dbengine.lucene.searcher.MultiSearcher; -import it.cavallium.dbengine.utils.SimpleResource; -import java.io.UncheckedIOException; -import java.util.ArrayList; -import java.util.List; -import java.util.function.Function; -import java.util.stream.Stream; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import org.jetbrains.annotations.Nullable; - -public class UnsortedUnscoredSimpleMultiSearcher implements MultiSearcher { - - private static final Logger LOG = LogManager.getLogger(UnsortedUnscoredSimpleMultiSearcher.class); - - private final LocalSearcher localSearcher; - - public UnsortedUnscoredSimpleMultiSearcher(LocalSearcher localSearcher) { - this.localSearcher = localSearcher; - } - - @Override - public LuceneSearchResult collectMulti(LLIndexSearchers indexSearchers, - LocalQueryParams queryParams, - @Nullable String keyFieldName, - GlobalQueryRewrite transformer, - Function, Stream> filterer) { - if (transformer != NO_REWRITE) { - return LuceneUtils.rewriteMulti(this, indexSearchers, queryParams, keyFieldName, transformer, filterer); - } - if (queryParams.isSorted() && queryParams.limitLong() > 0) { - throw new UnsupportedOperationException( - "Sorted queries are not supported" + " by SimpleUnsortedUnscoredLuceneMultiSearcher"); - } - if (queryParams.needsScores() && queryParams.limitLong() > 0) { - throw new UnsupportedOperationException( - "Scored queries are not supported" + " by SimpleUnsortedUnscoredLuceneMultiSearcher"); - } - - var localQueryParams = getLocalQueryParams(queryParams); - var results = indexSearchers.llShards().stream() - .map(searcher -> localSearcher.collect(searcher, localQueryParams, keyFieldName, transformer, filterer)) - .toList(); - List> resultsFluxes = new ArrayList<>(results.size()); - boolean exactTotalHitsCount = true; - long totalHitsCountValue = 0; - for (LuceneSearchResult result : results) { - resultsFluxes.add(result.results().stream()); - exactTotalHitsCount &= result.totalHitsCount().exact(); - totalHitsCountValue += result.totalHitsCount().value(); - } - - var totalHitsCount = new TotalHitsCount(totalHitsCountValue, exactTotalHitsCount); - //noinspection unchecked - Stream mergedFluxes = (Stream) (Stream) Streams.concat(resultsFluxes.toArray(Stream[]::new)) - .skip(queryParams.offsetLong()) - .limit(queryParams.limitLong()); - - return new LuceneSearchResult(totalHitsCount, mergedFluxes.toList()); - } - - private LocalQueryParams getLocalQueryParams(LocalQueryParams queryParams) { - return new LocalQueryParams(queryParams.query(), - 0L, - queryParams.offsetLong() + queryParams.limitLong(), - queryParams.pageLimits(), - queryParams.sort(), - queryParams.computePreciseHitsCount(), - queryParams.timeout() - ); - } - - @Override - public String toString() { - return "unsorted unscored simple multi"; - } -} diff --git a/src/test/java/module-info.java b/src/test/java/module-info.java index 3f8ad69..eb40ec5 100644 --- a/src/test/java/module-info.java +++ b/src/test/java/module-info.java @@ -3,14 +3,13 @@ module dbengine.tests { requires dbengine; requires it.cavallium.datagen; requires org.assertj.core; - requires org.apache.lucene.core; requires it.unimi.dsi.fastutil; - requires org.apache.lucene.queryparser; requires org.jetbrains.annotations; requires micrometer.core; requires org.junit.jupiter.params; requires com.google.common; requires org.apache.logging.log4j; + requires org.apache.logging.log4j.core; requires org.apache.commons.lang3; requires rocksdbjni; opens it.cavallium.dbengine.tests;