diff --git a/README.md b/README.md index 38f9ecd..5e62867 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,55 @@ -# CavalliumDBEngine +CavalliumDB Engine +================== -Database engine for Java \ No newline at end of file +A very simple wrapper for RocksDB and Lucene, with gRPC and direct connections. + +This is not a database, but only a wrapper for Lucene Core and RocksDB, with a bit of abstraction. + +# Features +## RocksDB Key-Value NoSQL database engine +- Snapshots +- Multi-column databases +- WAL and corruption recovery strategies +- Multiple data types: + - Bytes (Singleton) + - Maps of bytes (Dictionary) + - Maps of maps of bytes (Deep dictionary) + - Sets of bytes (Dictionary without values) + - Maps of sets of bytes (Deep dictionary without values) + +## Apache Lucene Core indexing library +- Documents structure +- Sorting + - Ascending and descending + - Numeric or non-numeric +- Searching + - Nested search terms + - Combined search terms + - Fuzzy text search + - Coordinates, integers, longs, strings, text +- Indicization and analysis + - N-gram + - Edge N-gram + - English words + - Stemming + - Stopwords removal +- Results filtering +- Snapshots + +# F.A.Q. +## Why is it so difficult? +This is not a DMBS. + +This is an engine on which a DBMS can be built upon. For this reason it's very difficult to use it directly without using it through abstraction layers. + +## Can I use objects in the database? +Yes you must serialize/deserialize them using a library of your choice. + +## Why there is a snapshot function for each database part? +Since RocksDB and lucene indices are different instances, every instance has its own snapshot function. + +To have a single snapshot you must implement it as a collection of sub-snapshots in your DBMS. + +## Is CavalliumDB Engine suitable for your project? +No. +This engine is largely undocumented, and it doesn't provide extensive tests on its methods. diff --git a/pom.xml b/pom.xml new file mode 100644 index 0000000..6c701c7 --- /dev/null +++ b/pom.xml @@ -0,0 +1,276 @@ + + 4.0.0 + + CavalliumDBEngine + + it.cavallium + dbengine + 2.0.1 + + jar + + UTF-8 + 1.29.0 + 3.11.4 + 3.11.4 + 2.0.30.Final + + 11 + 11 + + + + protoarch + protoarch + http://home.apache.org/~aajisaka/repository + + + mchv-release + MCHV Release Apache Maven Packages + https://mvn.mchv.eu/repository/mchv + + + mchv-snapshot + MCHV Snapshot Apache Maven Packages + https://mvn.mchv.eu/repository/mchv-snapshot + + + + + org.warp + common-utils + 1.1.1 + + + javax.xml.bind + jaxb-api + 2.2.11 + + + com.sun.xml.bind + jaxb-core + 2.2.11 + + + com.sun.xml.bind + jaxb-impl + 2.2.11 + + + javax.activation + activation + 1.1.1 + + + it.cavallium + concurrent-locks + 1.0.8 + + + org.yaml + snakeyaml + 1.24 + + + io.grpc + grpc-netty-shaded + ${grpc.version} + + + io.grpc + grpc-protobuf + ${grpc.version} + + + io.grpc + grpc-stub + ${grpc.version} + + + io.grpc + grpc-alts + ${grpc.version} + + + javax.annotation + javax.annotation-api + 1.2 + + + io.grpc + grpc-testing + ${grpc.version} + test + + + + + io.grpc + grpc-netty + ${grpc.version} + + + io.netty + netty-tcnative-boringssl-static + ${netty.tcnative.version} + + + + com.google.api.grpc + proto-google-common-protos + 1.0.0 + + + com.google.protobuf + protobuf-java-util + ${protobuf.version} + + + org.mockito + mockito-core + 1.9.5 + test + + + it.unimi.dsi + fastutil + 8.3.1 + + + org.junit.jupiter + junit-jupiter-api + RELEASE + test + + + org.hamcrest + hamcrest-core + + + + + + org.hamcrest + hamcrest-library + 1.3 + test + + + org.rocksdb + rocksdbjni + 6.11.4 + + + org.apache.lucene + lucene-core + 8.6.2 + + + org.apache.lucene + lucene-analyzers-common + 8.6.2 + + + org.apache.lucene + lucene-codecs + 8.6.2 + + + org.apache.lucene + lucene-backward-codecs + 8.6.2 + + + org.apache.lucene + lucene-queries + 8.6.2 + + + org.jetbrains + annotations + 19.0.0 + + + io.projectreactor + reactor-core + 3.4.0 + + + io.projectreactor + reactor-tools + 3.4.0 + + + + + src/test/java + + + ../src/main/libs + + **/*.jar + + + + + + kr.motd.maven + os-maven-plugin + 1.5.0.Final + + + + + org.apache.maven.plugins + maven-install-plugin + 3.0.0-M1 + + + org.xolstice.maven.plugins + protobuf-maven-plugin + 0.5.1 + + com.google.protobuf:protoc:${protoc.version}:exe:${os.detected.classifier} + grpc-java + io.grpc:protoc-gen-grpc-java:${grpc.version}:exe:${os.detected.classifier} + ${basedir}/src/main/proto + + + + generate-sources + + compile + compile-custom + + + + + + maven-dependency-plugin + + + compile + + copy-dependencies + + + ${project.build.directory}/lib + + + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.8.1 + + 11 + 11 + 11 + + + + + diff --git a/src/main/java/it/cavallium/dbengine/database/Column.java b/src/main/java/it/cavallium/dbengine/database/Column.java new file mode 100644 index 0000000..09943b8 --- /dev/null +++ b/src/main/java/it/cavallium/dbengine/database/Column.java @@ -0,0 +1,58 @@ +package it.cavallium.dbengine.database; + +import java.nio.charset.StandardCharsets; +import java.util.Objects; +import java.util.StringJoiner; + +public class Column { + + private final String name; + + private Column(String name) { + this.name = name; + } + + public static Column hashMap(String name) { + return new Column("hash_map_" + name); + } + + public static Column fixedSet(String name) { + return new Column("hash_set_" + name); + } + + public static Column special(String name) { + return new Column(name); + } + + public static String toString(byte[] name) { + return new String(name, StandardCharsets.US_ASCII); + } + + public String getName() { + return name; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (!(o instanceof Column)) { + return false; + } + Column column = (Column) o; + return Objects.equals(name, column.name); + } + + @Override + public int hashCode() { + return Objects.hash(name); + } + + @Override + public String toString() { + return new StringJoiner(", ", Column.class.getSimpleName() + "[", "]") + .add("name='" + name + "'") + .toString(); + } +} diff --git a/src/main/java/it/cavallium/dbengine/database/EnglishItalianStopFilter.java b/src/main/java/it/cavallium/dbengine/database/EnglishItalianStopFilter.java new file mode 100644 index 0000000..dfacbca --- /dev/null +++ b/src/main/java/it/cavallium/dbengine/database/EnglishItalianStopFilter.java @@ -0,0 +1,1005 @@ +package it.cavallium.dbengine.database; + +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.apache.lucene.analysis.CharArraySet; +import org.apache.lucene.analysis.StopFilter; +import org.apache.lucene.analysis.TokenStream; + +public class EnglishItalianStopFilter extends StopFilter { + + private static final CharArraySet stopWords; + + /** + * Constructs a filter which removes words from the input TokenStream that are named in the Set. + * + * @param in Input stream + * @see #makeStopSet(String...) + */ + public EnglishItalianStopFilter(TokenStream in) { + super(in, stopWords); + } + + static { + var englishStopWords = Set.of("a", + "an", + "and", + "are", + "as", + "at", + "be", + "but", + "by", + "for", + "if", + "in", + "into", + "is", + "it", + "no", + "not", + "of", + "on", + "or", + "such", + "that", + "the", + "their", + "then", + "there", + "these", + "they", + "this", + "to", + "was", + "will", + "with" + ); + var oldItalianStopWords = Set.of("a", + "abbastanza", + "abbia", + "abbiamo", + "abbiano", + "abbiate", + "accidenti", + "ad", + "adesso", + "affinché", + "agl", + "agli", + "ahime", + "ahimè", + "ai", + "al", + "alcuna", + "alcuni", + "alcuno", + "all", + "alla", + "alle", + "allo", + "allora", + "altre", + "altri", + "altrimenti", + "altro", + "altrove", + "altrui", + "anche", + "ancora", + "anni", + "anno", + "ansa", + "anticipo", + "assai", + "attesa", + "attraverso", + "avanti", + "avemmo", + "avendo", + "avente", + "aver", + "avere", + "averlo", + "avesse", + "avessero", + "avessi", + "avessimo", + "aveste", + "avesti", + "avete", + "aveva", + "avevamo", + "avevano", + "avevate", + "avevi", + "avevo", + "avrai", + "avranno", + "avrebbe", + "avrebbero", + "avrei", + "avremmo", + "avremo", + "avreste", + "avresti", + "avrete", + "avrà", + "avrò", + "avuta", + "avute", + "avuti", + "avuto", + "basta", + "ben", + "bene", + "benissimo", + "brava", + "bravo", + "buono", + "c", + "caso", + "cento", + "certa", + "certe", + "certi", + "certo", + "che", + "chi", + "chicchessia", + "chiunque", + "ci", + "ciascuna", + "ciascuno", + "cima", + "cinque", + "cio", + "cioe", + "cioè", + "circa", + "citta", + "città", + "ciò", + "co", + "codesta", + "codesti", + "codesto", + "cogli", + "coi", + "col", + "colei", + "coll", + "coloro", + "colui", + "come", + "cominci", + "comprare", + "comunque", + "con", + "concernente", + "conclusione", + "consecutivi", + "consecutivo", + "consiglio", + "contro", + "cortesia", + "cos", + "cosa", + "cosi", + "così", + "cui", + "d", + "da", + "dagl", + "dagli", + "dai", + "dal", + "dall", + "dalla", + "dalle", + "dallo", + "dappertutto", + "davanti", + "degl", + "degli", + "dei", + "del", + "dell", + "della", + "delle", + "dello", + "dentro", + "detto", + "deve", + "devo", + "di", + "dice", + "dietro", + "dire", + "dirimpetto", + "diventa", + "diventare", + "diventato", + "dopo", + "doppio", + "dov", + "dove", + "dovra", + "dovrà", + "dovunque", + "due", + "dunque", + "durante", + "e", + "ebbe", + "ebbero", + "ebbi", + "ecc", + "ecco", + "ed", + "effettivamente", + "egli", + "ella", + "entrambi", + "eppure", + "era", + "erano", + "eravamo", + "eravate", + "eri", + "ero", + "esempio", + "esse", + "essendo", + "esser", + "essere", + "essi", + "ex", + "fa", + "faccia", + "facciamo", + "facciano", + "facciate", + "faccio", + "facemmo", + "facendo", + "facesse", + "facessero", + "facessi", + "facessimo", + "faceste", + "facesti", + "faceva", + "facevamo", + "facevano", + "facevate", + "facevi", + "facevo", + "fai", + "fanno", + "farai", + "faranno", + "fare", + "farebbe", + "farebbero", + "farei", + "faremmo", + "faremo", + "fareste", + "faresti", + "farete", + "farà", + "farò", + "fatto", + "favore", + "fece", + "fecero", + "feci", + "fin", + "finalmente", + "finche", + "fine", + "fino", + "forse", + "forza", + "fosse", + "fossero", + "fossi", + "fossimo", + "foste", + "fosti", + "fra", + "frattempo", + "fu", + "fui", + "fummo", + "fuori", + "furono", + "futuro", + "generale", + "gente", + "gia", + "giacche", + "giorni", + "giorno", + "giu", + "già", + "gli", + "gliela", + "gliele", + "glieli", + "glielo", + "gliene", + "grande", + "grazie", + "gruppo", + "ha", + "haha", + "hai", + "hanno", + "ho", + "i", + "ie", + "ieri", + "il", + "improvviso", + "in", + "inc", + "indietro", + "infatti", + "inoltre", + "insieme", + "intanto", + "intorno", + "invece", + "io", + "l", + "la", + "lasciato", + "lato", + "le", + "lei", + "li", + "lo", + "lontano", + "loro", + "lui", + "lungo", + "luogo", + "là", + "ma", + "macche", + "magari", + "maggior", + "mai", + "male", + "malgrado", + "malissimo", + "me", + "medesimo", + "mediante", + "meglio", + "meno", + "mentre", + "mesi", + "mezzo", + "mi", + "mia", + "mie", + "miei", + "mila", + "miliardi", + "milioni", + "minimi", + "mio", + "modo", + "molta", + "molti", + "moltissimo", + "molto", + "momento", + "mondo", + "ne", + "negl", + "negli", + "nei", + "nel", + "nell", + "nella", + "nelle", + "nello", + "nemmeno", + "neppure", + "nessun", + "nessuna", + "nessuno", + "niente", + "no", + "noi", + "nome", + "non", + "nondimeno", + "nonostante", + "nonsia", + "nostra", + "nostre", + "nostri", + "nostro", + "novanta", + "nove", + "nulla", + "nuovi", + "nuovo", + "o", + "od", + "oggi", + "ogni", + "ognuna", + "ognuno", + "oltre", + "oppure", + "ora", + "ore", + "osi", + "ossia", + "ottanta", + "otto", + "paese", + "parecchi", + "parecchie", + "parecchio", + "parte", + "partendo", + "peccato", + "peggio", + "per", + "perche", + "perchè", + "perché", + "percio", + "perciò", + "perfino", + "pero", + "persino", + "persone", + "però", + "piedi", + "pieno", + "piglia", + "piu", + "piuttosto", + "più", + "po", + "pochissimo", + "poco", + "poi", + "poiche", + "possa", + "possedere", + "posteriore", + "posto", + "potrebbe", + "preferibilmente", + "presa", + "press", + "prima", + "primo", + "principalmente", + "probabilmente", + "promesso", + "proprio", + "puo", + "pure", + "purtroppo", + "può", + "qua", + "qualche", + "qualcosa", + "qualcuna", + "qualcuno", + "quale", + "quali", + "qualunque", + "quando", + "quanta", + "quante", + "quanti", + "quanto", + "quantunque", + "quarto", + "quasi", + "quattro", + "quel", + "quella", + "quelle", + "quelli", + "quello", + "quest", + "questa", + "queste", + "questi", + "questo", + "qui", + "quindi", + "quinto", + "realmente", + "recente", + "recentemente", + "registrazione", + "relativo", + "riecco", + "rispetto", + "salvo", + "sara", + "sarai", + "saranno", + "sarebbe", + "sarebbero", + "sarei", + "saremmo", + "saremo", + "sareste", + "saresti", + "sarete", + "sarà", + "sarò", + "scola", + "scopo", + "scorso", + "se", + "secondo", + "seguente", + "seguito", + "sei", + "sembra", + "sembrare", + "sembrato", + "sembrava", + "sembri", + "sempre", + "senza", + "sette", + "si", + "sia", + "siamo", + "siano", + "siate", + "siete", + "sig", + "solito", + "solo", + "soltanto", + "sono", + "sopra", + "soprattutto", + "sotto", + "spesso", + "sta", + "stai", + "stando", + "stanno", + "starai", + "staranno", + "starebbe", + "starebbero", + "starei", + "staremmo", + "staremo", + "stareste", + "staresti", + "starete", + "starà", + "starò", + "stata", + "state", + "stati", + "stato", + "stava", + "stavamo", + "stavano", + "stavate", + "stavi", + "stavo", + "stemmo", + "stessa", + "stesse", + "stessero", + "stessi", + "stessimo", + "stesso", + "steste", + "stesti", + "stette", + "stettero", + "stetti", + "stia", + "stiamo", + "stiano", + "stiate", + "sto", + "su", + "sua", + "subito", + "successivamente", + "successivo", + "sue", + "sugl", + "sugli", + "sui", + "sul", + "sull", + "sulla", + "sulle", + "sullo", + "suo", + "suoi", + "tale", + "tali", + "talvolta", + "tanto", + "te", + "tempo", + "terzo", + "th", + "ti", + "titolo", + "tra", + "tranne", + "tre", + "trenta", + "triplo", + "troppo", + "trovato", + "tu", + "tua", + "tue", + "tuo", + "tuoi", + "tutta", + "tuttavia", + "tutte", + "tutti", + "tutto", + "uguali", + "ulteriore", + "ultimo", + "un", + "una", + "uno", + "uomo", + "va", + "vai", + "vale", + "vari", + "varia", + "varie", + "vario", + "verso", + "vi", + "vicino", + "visto", + "vita", + "voi", + "volta", + "volte", + "vostra", + "vostre", + "vostri", + "vostro", + "è"); + var italianStopWords = Set.of("a", + "abbia", + "abbiamo", + "abbiano", + "abbiate", + "ad", + "adesso", + "agl", + "agli", + "ai", + "al", + "all", + "alla", + "alle", + "allo", + "allora", + "altre", + "altri", + "altro", + "anche", + "ancora", + "avemmo", + "avendo", + "avere", + "avesse", + "avessero", + "avessi", + "avessimo", + "aveste", + "avesti", + "avete", + "aveva", + "avevamo", + "avevano", + "avevate", + "avevi", + "avevo", + "avrai", + "avranno", + "avrebbe", + "avrebbero", + "avrei", + "avremmo", + "avremo", + "avreste", + "avresti", + "avrete", + "avrà", + "avrò", + "avuta", + "avute", + "avuti", + "avuto", + "c", + "che", + "chi", + "ci", + "coi", + "col", + "come", + "con", + "contro", + "cui", + "da", + "dagl", + "dagli", + "dai", + "dal", + "dall", + "dalla", + "dalle", + "dallo", + "degl", + "degli", + "dei", + "del", + "dell", + "della", + "delle", + "dello", + "dentro", + "di", + "dov", + "dove", + "e", + "ebbe", + "ebbero", + "ebbi", + "ecco", + "ed", + "era", + "erano", + "eravamo", + "eravate", + "eri", + "ero", + "essendo", + "faccia", + "facciamo", + "facciano", + "facciate", + "faccio", + "facemmo", + "facendo", + "facesse", + "facessero", + "facessi", + "facessimo", + "faceste", + "facesti", + "faceva", + "facevamo", + "facevano", + "facevate", + "facevi", + "facevo", + "fai", + "fanno", + "farai", + "faranno", + "fare", + "farebbe", + "farebbero", + "farei", + "faremmo", + "faremo", + "fareste", + "faresti", + "farete", + "farà", + "farò", + "fece", + "fecero", + "feci", + "fino", + "fosse", + "fossero", + "fossi", + "fossimo", + "foste", + "fosti", + "fra", + "fu", + "fui", + "fummo", + "furono", + "giù", + "gli", + "ha", + "hai", + "hanno", + "ho", + "i", + "il", + "in", + "io", + "l", + "la", + "le", + "lei", + "li", + "lo", + "loro", + "lui", + "ma", + "me", + "mi", + "mia", + "mie", + "miei", + "mio", + "ne", + "negl", + "negli", + "nei", + "nel", + "nell", + "nella", + "nelle", + "nello", + "no", + "noi", + "non", + "nostra", + "nostre", + "nostri", + "nostro", + "o", + "per", + "perché", + "però", + "più", + "pochi", + "poco", + "qua", + "quale", + "quanta", + "quante", + "quanti", + "quanto", + "quasi", + "quella", + "quelle", + "quelli", + "quello", + "questa", + "queste", + "questi", + "questo", + "qui", + "quindi", + "sarai", + "saranno", + "sarebbe", + "sarebbero", + "sarei", + "saremmo", + "saremo", + "sareste", + "saresti", + "sarete", + "sarà", + "sarò", + "se", + "sei", + "senza", + "si", + "sia", + "siamo", + "siano", + "siate", + "siete", + "sono", + "sopra", + "sotto", + "sta", + "stai", + "stando", + "stanno", + "starai", + "staranno", + "stare", + "starebbe", + "starebbero", + "starei", + "staremmo", + "staremo", + "stareste", + "staresti", + "starete", + "starà", + "starò", + "stava", + "stavamo", + "stavano", + "stavate", + "stavi", + "stavo", + "stemmo", + "stesse", + "stessero", + "stessi", + "stessimo", + "stesso", + "steste", + "stesti", + "stette", + "stettero", + "stetti", + "stia", + "stiamo", + "stiano", + "stiate", + "sto", + "su", + "sua", + "sue", + "sugl", + "sugli", + "sui", + "sul", + "sull", + "sulla", + "sulle", + "sullo", + "suo", + "suoi", + "te", + "ti", + "tra", + "tu", + "tua", + "tue", + "tuo", + "tuoi", + "tutti", + "tutto", + "un", + "una", + "uno", + "vai", + "vi", + "voi", + "vostra", + "vostre", + "vostri", + "vostro", + "è" + ); + stopWords = CharArraySet.copy(Stream + .concat(englishStopWords.stream(), oldItalianStopWords.stream()) + .map(String::toCharArray) + .collect(Collectors.toSet())); + } +} diff --git a/src/main/java/it/cavallium/dbengine/database/LLDatabaseConnection.java b/src/main/java/it/cavallium/dbengine/database/LLDatabaseConnection.java new file mode 100644 index 0000000..3caca79 --- /dev/null +++ b/src/main/java/it/cavallium/dbengine/database/LLDatabaseConnection.java @@ -0,0 +1,26 @@ +package it.cavallium.dbengine.database; + +import it.cavallium.dbengine.database.analyzer.TextFieldsAnalyzer; +import java.io.IOException; +import java.time.Duration; +import java.util.List; + +public interface LLDatabaseConnection { + + void connect() throws IOException; + + LLKeyValueDatabase getDatabase(String name, List columns, boolean lowMemory) throws IOException; + + LLLuceneIndex getLuceneIndex(String name, + int instancesCount, + TextFieldsAnalyzer textFieldsAnalyzer, + Duration queryRefreshDebounceTime, + Duration commitDebounceTime, + boolean lowMemory) throws IOException; + + void disconnect() throws IOException; + + void ping() throws IOException; + + double getMediumLatencyMillis() throws IOException; +} diff --git a/src/main/java/it/cavallium/dbengine/database/LLDeepDictionary.java b/src/main/java/it/cavallium/dbengine/database/LLDeepDictionary.java new file mode 100644 index 0000000..3e25275 --- /dev/null +++ b/src/main/java/it/cavallium/dbengine/database/LLDeepDictionary.java @@ -0,0 +1,69 @@ +package it.cavallium.dbengine.database; + +import java.io.IOException; +import java.util.Map.Entry; +import java.util.Optional; +import java.util.function.BiConsumer; +import java.util.function.BiFunction; +import java.util.function.Consumer; +import org.apache.commons.lang3.tuple.ImmutableTriple; +import org.jetbrains.annotations.Nullable; +import org.warp.commonutils.concurrency.atomicity.NotAtomic; +import org.warp.commonutils.functional.TriConsumer; +import org.warp.commonutils.functional.TriFunction; +import org.warp.commonutils.type.Bytes; +import org.warp.commonutils.type.UnmodifiableIterableMap; +import org.warp.commonutils.type.UnmodifiableMap; + +@NotAtomic +public interface LLDeepDictionary extends LLKeyValueDatabaseStructure { + + UnmodifiableIterableMap get(@Nullable LLSnapshot snapshot, byte[] key1) throws IOException; + + Optional get(@Nullable LLSnapshot snapshot, byte[] key1, byte[] key2) throws IOException; + + + boolean isEmpty(@Nullable LLSnapshot snapshot, byte[] key1); + + boolean contains(@Nullable LLSnapshot snapshot, byte[] key1, byte[] key2) throws IOException; + + /** + * Note: this will remove previous elements because it replaces the entire map of key + */ + void put(byte[] key1, UnmodifiableIterableMap value) throws IOException; + + Optional put(byte[] key1, byte[] key2, byte[] value, LLDictionaryResultType resultType) throws IOException; + + + void putMulti(byte[][] keys1, UnmodifiableIterableMap[] values) throws IOException; + + void putMulti(byte[] key1, byte[][] keys2, byte[][] values, LLDictionaryResultType resultType, Consumer responses) throws IOException; + + void putMulti(byte[][] keys1, byte[][] keys2, byte[][] values, LLDictionaryResultType resultType, Consumer responses) throws IOException; + + + void clear() throws IOException; + + Optional> clear(byte[] key1, LLDictionaryResultType resultType) throws IOException; + + Optional remove(byte[] key1, byte[] key2, LLDictionaryResultType resultType) throws IOException; + + + void forEach(@Nullable LLSnapshot snapshot, int parallelism, TriConsumer consumer); + + void forEach(@Nullable LLSnapshot snapshot, int parallelism, BiConsumer> consumer); + + void forEach(@Nullable LLSnapshot snapshot, int parallelism, byte[] key1, BiConsumer consumer); + + + void replaceAll(int parallelism, boolean replaceKeys, TriFunction> consumer) throws IOException; + + void replaceAll(int parallelism, boolean replaceKeys, BiFunction, Entry>> consumer) throws IOException; + + void replaceAll(int parallelism, boolean replaceKeys, byte[] key1, BiFunction> consumer) throws IOException; + + + long size(@Nullable LLSnapshot snapshot, boolean fast) throws IOException; + + long exactSize(@Nullable LLSnapshot snapshot, byte[] key1); +} diff --git a/src/main/java/it/cavallium/dbengine/database/LLDictionary.java b/src/main/java/it/cavallium/dbengine/database/LLDictionary.java new file mode 100644 index 0000000..4fc782e --- /dev/null +++ b/src/main/java/it/cavallium/dbengine/database/LLDictionary.java @@ -0,0 +1,44 @@ +package it.cavallium.dbengine.database; + +import java.io.IOException; +import java.util.Map.Entry; +import java.util.Optional; +import java.util.function.BiConsumer; +import java.util.function.BiFunction; +import java.util.function.Consumer; +import org.jetbrains.annotations.Nullable; +import org.warp.commonutils.concurrency.atomicity.NotAtomic; + +@NotAtomic +public interface LLDictionary extends LLKeyValueDatabaseStructure { + + Optional get(@Nullable LLSnapshot snapshot, byte[] key) throws IOException; + + boolean contains(@Nullable LLSnapshot snapshot, byte[] key) throws IOException; + + Optional put(byte[] key, byte[] value, LLDictionaryResultType resultType) + throws IOException; + + void putMulti(byte[][] key, byte[][] value, LLDictionaryResultType resultType, + Consumer responses) throws IOException; + + Optional remove(byte[] key, LLDictionaryResultType resultType) throws IOException; + + /** + * This method can call the consumer from different threads in parallel + */ + void forEach(@Nullable LLSnapshot snapshot, int parallelism, BiConsumer consumer); + + /** + * This method can call the consumer from different threads in parallel + */ + void replaceAll(int parallelism, boolean replaceKeys, BiFunction> consumer) throws IOException; + + void clear() throws IOException; + + long size(@Nullable LLSnapshot snapshot, boolean fast) throws IOException; + + boolean isEmpty(@Nullable LLSnapshot snapshot) throws IOException; + + Optional> removeOne() throws IOException; +} diff --git a/src/main/java/it/cavallium/dbengine/database/LLDictionaryResultType.java b/src/main/java/it/cavallium/dbengine/database/LLDictionaryResultType.java new file mode 100644 index 0000000..317505c --- /dev/null +++ b/src/main/java/it/cavallium/dbengine/database/LLDictionaryResultType.java @@ -0,0 +1,36 @@ +package it.cavallium.dbengine.database; + +import org.jetbrains.annotations.Nullable; + +public enum LLDictionaryResultType { + VOID, VALUE_CHANGED, PREVIOUS_VALUE; + + public static LLDictionaryResultType valueOf(@Nullable it.cavallium.dbengine.proto.LLDictionaryResultType resultType) { + if (resultType == null || resultType == it.cavallium.dbengine.proto.LLDictionaryResultType.UNRECOGNIZED) { + return VOID; + } + + switch (resultType) { + case PREVIOUS_VALUE: + return PREVIOUS_VALUE; + case VALUE_CHANGED: + return VALUE_CHANGED; + case VOID: + return VOID; + } + return VOID; + } + + public it.cavallium.dbengine.proto.LLDictionaryResultType toProto() { + switch (this) { + case VALUE_CHANGED: + return it.cavallium.dbengine.proto.LLDictionaryResultType.VALUE_CHANGED; + case PREVIOUS_VALUE: + return it.cavallium.dbengine.proto.LLDictionaryResultType.PREVIOUS_VALUE; + case VOID: + return it.cavallium.dbengine.proto.LLDictionaryResultType.VOID; + } + + return it.cavallium.dbengine.proto.LLDictionaryResultType.UNRECOGNIZED; + } +} diff --git a/src/main/java/it/cavallium/dbengine/database/LLDocument.java b/src/main/java/it/cavallium/dbengine/database/LLDocument.java new file mode 100644 index 0000000..3b9ce9a --- /dev/null +++ b/src/main/java/it/cavallium/dbengine/database/LLDocument.java @@ -0,0 +1,49 @@ +package it.cavallium.dbengine.database; + +import java.util.Arrays; +import org.jetbrains.annotations.Nullable; + +public class LLDocument { + + private final LLItem[] items; + + public LLDocument(LLItem[] items) { + this.items = items; + } + + public LLItem[] getItems() { + return items; + } + + @Override + public String toString() { + return Arrays.toString(items); + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + LLDocument that = (LLDocument) o; + return Arrays.equals(items, that.items); + } + + @Override + public int hashCode() { + return Arrays.hashCode(items); + } + + @Nullable + public LLItem getField(String uid) { + for (LLItem item : items) { + if (item.getName().equals(uid)) { + return item; + } + } + return null; + } +} diff --git a/src/main/java/it/cavallium/dbengine/database/LLItem.java b/src/main/java/it/cavallium/dbengine/database/LLItem.java new file mode 100644 index 0000000..55f1c8c --- /dev/null +++ b/src/main/java/it/cavallium/dbengine/database/LLItem.java @@ -0,0 +1,144 @@ +package it.cavallium.dbengine.database; + +import com.google.common.primitives.Ints; +import com.google.common.primitives.Longs; +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.Objects; +import java.util.StringJoiner; +import org.apache.lucene.document.Field; +import org.jetbrains.annotations.Nullable; + +public class LLItem { + + private final LLType type; + private final String name; + private final byte[] data; + @Nullable + private final byte[] data2; + + public LLItem(LLType type, String name, byte[] data, @Nullable byte[] data2) { + this.type = type; + this.name = name; + this.data = data; + this.data2 = data2; + } + + private LLItem(LLType type, String name, String data) { + this.type = type; + this.name = name; + this.data = data.getBytes(StandardCharsets.UTF_8); + this.data2 = null; + } + + private LLItem(LLType type, String name, int data) { + this.type = type; + this.name = name; + this.data = Ints.toByteArray(data); + this.data2 = null; + } + + private LLItem(LLType type, String name, float data) { + this.type = type; + this.name = name; + this.data = ByteBuffer.allocate(4).putFloat(data).array();; + this.data2 = null; + } + + private LLItem(LLType type, String name, long data) { + this.type = type; + this.name = name; + this.data = Longs.toByteArray(data); + this.data2 = null; + } + + public static LLItem newIntPoint(String name, int data) { + return new LLItem(LLType.IntPoint, name, data); + } + + public static LLItem newLongPoint(String name, long data) { + return new LLItem(LLType.LongPoint, name, data); + } + + public static LLItem newFloatPoint(String name, float data) { + return new LLItem(LLType.FloatPoint, name, data); + } + + public static LLItem newTextField(String name, String data, Field.Store store) { + if (store == Field.Store.YES) { + return new LLItem(LLType.TextFieldStored, name, data); + } else { + return new LLItem(LLType.TextField, name, data); + } + } + + public static LLItem newStringField(String name, String data, Field.Store store) { + if (store == Field.Store.YES) { + return new LLItem(LLType.StringFieldStored, name, data); + } else { + return new LLItem(LLType.StringField, name, data); + } + } + + public static LLItem newSortedNumericDocValuesField(String name, long data) { + return new LLItem(LLType.SortedNumericDocValuesField, name, data); + } + + public String getName() { + return name; + } + + public LLType getType() { + return type; + } + + public byte[] getData() { + return data; + } + + public byte[] getData2() { + return data2; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + LLItem llItem = (LLItem) o; + return type == llItem.type && + Objects.equals(name, llItem.name) && + Arrays.equals(data, llItem.data) && + Arrays.equals(data2, llItem.data2); + } + + @Override + public int hashCode() { + int result = Objects.hash(type, name); + result = 31 * result + Arrays.hashCode(data); + result = 31 * result + Arrays.hashCode(data2); + return result; + } + + @Override + public String toString() { + var sj = new StringJoiner(", ", "[", "]") + .add("type=" + type) + .add("name='" + name + "'"); + if (data != null && data.length > 0) { + sj.add("data=" + new String(data)); + } + if (data2 != null && data2.length > 0) { + sj.add("data2=" + new String(data2)); + } + return sj.toString(); + } + + public String stringValue() { + return new String(data, StandardCharsets.UTF_8); + } +} diff --git a/src/main/java/it/cavallium/dbengine/database/LLKeyScore.java b/src/main/java/it/cavallium/dbengine/database/LLKeyScore.java new file mode 100644 index 0000000..41bdc57 --- /dev/null +++ b/src/main/java/it/cavallium/dbengine/database/LLKeyScore.java @@ -0,0 +1,48 @@ +package it.cavallium.dbengine.database; + +import java.util.Objects; + +public class LLKeyScore { + + private final String key; + private final float score; + + public LLKeyScore(String key, float score) { + this.key = key; + this.score = score; + } + + public String getKey() { + return key; + } + + public float getScore() { + return score; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + LLKeyScore that = (LLKeyScore) o; + return score == that.score && + Objects.equals(key, that.key); + } + + @Override + public int hashCode() { + return Objects.hash(key, score); + } + + @Override + public String toString() { + return "LLKeyScore{" + + "key=" + key + + ", score=" + score + + '}'; + } +} diff --git a/src/main/java/it/cavallium/dbengine/database/LLKeyValueDatabase.java b/src/main/java/it/cavallium/dbengine/database/LLKeyValueDatabase.java new file mode 100644 index 0000000..8372ae3 --- /dev/null +++ b/src/main/java/it/cavallium/dbengine/database/LLKeyValueDatabase.java @@ -0,0 +1,65 @@ +package it.cavallium.dbengine.database; + +import com.google.common.primitives.Ints; +import com.google.common.primitives.Longs; +import it.cavallium.dbengine.database.structures.LLDeepMap; +import java.io.Closeable; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import it.cavallium.dbengine.database.structures.LLFixedDeepSet; +import it.cavallium.dbengine.database.structures.LLInt; +import it.cavallium.dbengine.database.structures.LLLong; +import it.cavallium.dbengine.database.structures.LLMap; +import it.cavallium.dbengine.database.structures.LLSet; + +public interface LLKeyValueDatabase extends Closeable, LLSnapshottable, LLKeyValueDatabaseStructure { + + LLSingleton getSingleton(byte[] singletonListColumnName, byte[] name, byte[] defaultValue) + throws IOException; + + LLDictionary getDictionary(byte[] columnName) throws IOException; + + LLDeepDictionary getDeepDictionary(byte[] columnName, int keySize, int key2Size) throws IOException; + + default LLSet getSet(String name) throws IOException { + LLDictionary dictionary = getDictionary( + Column.fixedSet(name).getName().getBytes(StandardCharsets.US_ASCII)); + return new LLSet(dictionary); + } + + default LLMap getMap(String name) throws IOException { + LLDictionary dictionary = getDictionary( + Column.hashMap(name).getName().getBytes(StandardCharsets.US_ASCII)); + return new LLMap(dictionary); + } + + default LLFixedDeepSet getDeepSet(String name, int keySize, int key2Size) throws IOException { + LLDeepDictionary deepDictionary = getDeepDictionary( + Column.fixedSet(name).getName().getBytes(StandardCharsets.US_ASCII), keySize, key2Size); + return new LLFixedDeepSet(deepDictionary); + } + + default LLDeepMap getDeepMap(String name, int keySize, int key2Size) throws IOException { + LLDeepDictionary deepDictionary = getDeepDictionary( + Column.hashMap(name).getName().getBytes(StandardCharsets.US_ASCII), keySize, key2Size); + return new LLDeepMap(deepDictionary); + } + + default LLInt getInteger(String singletonListName, String name, int defaultValue) + throws IOException { + LLSingleton singleton = getSingleton( + Column.special(singletonListName).getName().getBytes(StandardCharsets.US_ASCII), + name.getBytes(StandardCharsets.US_ASCII), Ints.toByteArray(defaultValue)); + return new LLInt(singleton); + } + + default LLLong getLong(String singletonListName, String name, long defaultValue) + throws IOException { + LLSingleton singleton = getSingleton( + Column.special(singletonListName).getName().getBytes(StandardCharsets.US_ASCII), + name.getBytes(StandardCharsets.US_ASCII), Longs.toByteArray(defaultValue)); + return new LLLong(singleton); + } + + long getProperty(String propertyName) throws IOException; +} diff --git a/src/main/java/it/cavallium/dbengine/database/LLKeyValueDatabaseStructure.java b/src/main/java/it/cavallium/dbengine/database/LLKeyValueDatabaseStructure.java new file mode 100644 index 0000000..fe9470d --- /dev/null +++ b/src/main/java/it/cavallium/dbengine/database/LLKeyValueDatabaseStructure.java @@ -0,0 +1,6 @@ +package it.cavallium.dbengine.database; + +public interface LLKeyValueDatabaseStructure { + + String getDatabaseName(); +} diff --git a/src/main/java/it/cavallium/dbengine/database/LLLuceneIndex.java b/src/main/java/it/cavallium/dbengine/database/LLLuceneIndex.java new file mode 100644 index 0000000..9078b84 --- /dev/null +++ b/src/main/java/it/cavallium/dbengine/database/LLLuceneIndex.java @@ -0,0 +1,55 @@ +package it.cavallium.dbengine.database; + +import java.io.Closeable; +import java.io.IOException; +import java.util.Collection; +import java.util.Map; +import java.util.Set; +import org.jetbrains.annotations.Nullable; +import reactor.core.publisher.Flux; +import reactor.core.publisher.Mono; +import reactor.util.function.Tuple2; + +public interface LLLuceneIndex extends Closeable, LLSnapshottable { + + String getLuceneIndexName(); + + void addDocument(LLTerm id, LLDocument doc) throws IOException; + + void addDocuments(Iterable keys, Iterable documents) throws IOException; + + void deleteDocument(LLTerm id) throws IOException; + + void updateDocument(LLTerm id, LLDocument document) throws IOException; + + void updateDocuments(Iterable ids, Iterable documents) throws IOException; + + void deleteAll() throws IOException; + + Collection search(@Nullable LLSnapshot snapshot, String query, int limit, @Nullable LLSort sort, String keyFieldName) + throws IOException; + + Collection moreLikeThis(@Nullable LLSnapshot snapshot, + Map> mltDocumentFields, + int limit, + String keyFieldName) throws IOException; + + /** + * + * @param snapshot + * @param query + * @param limit + * @param sort + * @param keyFieldName + * @return the collection has one or more flux + */ + Tuple2, Collection>> searchStream(@Nullable LLSnapshot snapshot, + String query, + int limit, + @Nullable LLSort sort, + String keyFieldName); + + long count(@Nullable LLSnapshot snapshot, String query) throws IOException; + + boolean isLowMemoryMode(); +} diff --git a/src/main/java/it/cavallium/dbengine/database/LLSingleton.java b/src/main/java/it/cavallium/dbengine/database/LLSingleton.java new file mode 100644 index 0000000..60a7b7d --- /dev/null +++ b/src/main/java/it/cavallium/dbengine/database/LLSingleton.java @@ -0,0 +1,11 @@ +package it.cavallium.dbengine.database; + +import java.io.IOException; +import org.jetbrains.annotations.Nullable; + +public interface LLSingleton extends LLKeyValueDatabaseStructure { + + byte[] get(@Nullable LLSnapshot snapshot) throws IOException; + + void set(byte[] value) throws IOException; +} diff --git a/src/main/java/it/cavallium/dbengine/database/LLSnapshot.java b/src/main/java/it/cavallium/dbengine/database/LLSnapshot.java new file mode 100644 index 0000000..f15d37d --- /dev/null +++ b/src/main/java/it/cavallium/dbengine/database/LLSnapshot.java @@ -0,0 +1,41 @@ +package it.cavallium.dbengine.database; + +import java.util.StringJoiner; + +public class LLSnapshot { + private final long sequenceNumber; + + public LLSnapshot(long sequenceNumber) { + this.sequenceNumber = sequenceNumber; + } + + public long getSequenceNumber() { + return sequenceNumber; + } + + @Override + public String toString() { + return new StringJoiner(", ", LLSnapshot.class.getSimpleName() + "[", "]") + .add("sequenceNumber=" + sequenceNumber) + .toString(); + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + + LLSnapshot that = (LLSnapshot) o; + + return sequenceNumber == that.sequenceNumber; + } + + @Override + public int hashCode() { + return (int) (sequenceNumber ^ (sequenceNumber >>> 32)); + } +} diff --git a/src/main/java/it/cavallium/dbengine/database/LLSnapshottable.java b/src/main/java/it/cavallium/dbengine/database/LLSnapshottable.java new file mode 100644 index 0000000..1e5df8a --- /dev/null +++ b/src/main/java/it/cavallium/dbengine/database/LLSnapshottable.java @@ -0,0 +1,10 @@ +package it.cavallium.dbengine.database; + +import java.io.IOException; + +public interface LLSnapshottable { + + LLSnapshot takeSnapshot() throws IOException; + + void releaseSnapshot(LLSnapshot snapshot) throws IOException; +} diff --git a/src/main/java/it/cavallium/dbengine/database/LLSort.java b/src/main/java/it/cavallium/dbengine/database/LLSort.java new file mode 100644 index 0000000..0fbd1a0 --- /dev/null +++ b/src/main/java/it/cavallium/dbengine/database/LLSort.java @@ -0,0 +1,64 @@ +package it.cavallium.dbengine.database; + +import java.util.Objects; + +public class LLSort { + + private final String fieldName; + private final LLSortType type; + private final boolean reverse; + + public LLSort(String fieldName, LLSortType type, boolean reverse) { + this.fieldName = fieldName; + this.type = type; + this.reverse = reverse; + } + + public static LLSort newSortedNumericSortField(String fieldName, boolean reverse) { + return new LLSort(fieldName, LLSortType.LONG, reverse); + } + + public static LLSort newRandomSortField() { + return new LLSort(null, LLSortType.RANDOM, false); + } + + public String getFieldName() { + return fieldName; + } + + public LLSortType getType() { + return type; + } + + public boolean isReverse() { + return reverse; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + LLSort llSort = (LLSort) o; + return reverse == llSort.reverse && + Objects.equals(fieldName, llSort.fieldName) && + type == llSort.type; + } + + @Override + public int hashCode() { + return Objects.hash(fieldName, type, reverse); + } + + @Override + public String toString() { + return "LLSort{" + + "fieldName='" + fieldName + '\'' + + ", type=" + type + + ", reverse=" + reverse + + '}'; + } +} diff --git a/src/main/java/it/cavallium/dbengine/database/LLSortType.java b/src/main/java/it/cavallium/dbengine/database/LLSortType.java new file mode 100644 index 0000000..43f5f87 --- /dev/null +++ b/src/main/java/it/cavallium/dbengine/database/LLSortType.java @@ -0,0 +1,6 @@ +package it.cavallium.dbengine.database; + +public enum LLSortType { + LONG, + RANDOM +} diff --git a/src/main/java/it/cavallium/dbengine/database/LLTerm.java b/src/main/java/it/cavallium/dbengine/database/LLTerm.java new file mode 100644 index 0000000..c529a52 --- /dev/null +++ b/src/main/java/it/cavallium/dbengine/database/LLTerm.java @@ -0,0 +1,48 @@ +package it.cavallium.dbengine.database; + +import java.util.Objects; + +public class LLTerm { + + private final String key; + private final String value; + + public LLTerm(String key, String value) { + this.key = key; + this.value = value; + } + + public String getKey() { + return key; + } + + public String getValue() { + return value; + } + + @Override + public String toString() { + return "LLTerm{" + + "key='" + key + '\'' + + ", value='" + value + '\'' + + '}'; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + LLTerm llTerm = (LLTerm) o; + return Objects.equals(key, llTerm.key) && + Objects.equals(value, llTerm.value); + } + + @Override + public int hashCode() { + return Objects.hash(key, value); + } +} diff --git a/src/main/java/it/cavallium/dbengine/database/LLTopKeys.java b/src/main/java/it/cavallium/dbengine/database/LLTopKeys.java new file mode 100644 index 0000000..6c926e6 --- /dev/null +++ b/src/main/java/it/cavallium/dbengine/database/LLTopKeys.java @@ -0,0 +1,51 @@ +package it.cavallium.dbengine.database; + +import java.util.Arrays; +import java.util.Objects; + +public class LLTopKeys { + + private final long totalHitsCount; + private final LLKeyScore[] hits; + + public LLTopKeys(long totalHitsCount, LLKeyScore[] hits) { + this.totalHitsCount = totalHitsCount; + this.hits = hits; + } + + public long getTotalHitsCount() { + return totalHitsCount; + } + + public LLKeyScore[] getHits() { + return hits; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + LLTopKeys llTopKeys = (LLTopKeys) o; + return totalHitsCount == llTopKeys.totalHitsCount && + Arrays.equals(hits, llTopKeys.hits); + } + + @Override + public int hashCode() { + int result = Objects.hash(totalHitsCount); + result = 31 * result + Arrays.hashCode(hits); + return result; + } + + @Override + public String toString() { + return "LLTopKeys{" + + "totalHitsCount=" + totalHitsCount + + ", hits=" + Arrays.toString(hits) + + '}'; + } +} diff --git a/src/main/java/it/cavallium/dbengine/database/LLType.java b/src/main/java/it/cavallium/dbengine/database/LLType.java new file mode 100644 index 0000000..67b248a --- /dev/null +++ b/src/main/java/it/cavallium/dbengine/database/LLType.java @@ -0,0 +1,12 @@ +package it.cavallium.dbengine.database; + +public enum LLType { + StringField, + StringFieldStored, + IntPoint, + LongPoint, + FloatPoint, + SortedNumericDocValuesField, + TextField, + TextFieldStored +} diff --git a/src/main/java/it/cavallium/dbengine/database/LLUtils.java b/src/main/java/it/cavallium/dbengine/database/LLUtils.java new file mode 100644 index 0000000..787b519 --- /dev/null +++ b/src/main/java/it/cavallium/dbengine/database/LLUtils.java @@ -0,0 +1,200 @@ +package it.cavallium.dbengine.database; + +import com.google.common.primitives.Ints; +import com.google.common.primitives.Longs; +import com.google.protobuf.ByteString; +import it.cavallium.dbengine.database.utils.RandomSortField; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.LinkedList; +import java.util.List; +import java.util.stream.Collectors; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FloatPoint; +import org.apache.lucene.document.IntPoint; +import org.apache.lucene.document.LongPoint; +import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.document.StringField; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.SortedNumericSortField; +import org.jetbrains.annotations.Nullable; +import it.cavallium.dbengine.proto.LLKeyScore; +import it.cavallium.dbengine.proto.LLType; + +public class LLUtils { + + private static final byte[] RESPONSE_TRUE = new byte[]{1}; + private static final byte[] RESPONSE_FALSE = new byte[]{0}; + + public static boolean responseToBoolean(byte[] response) { + return response[0] == 1; + } + + public static byte[] booleanToResponse(boolean bool) { + return bool ? RESPONSE_TRUE : RESPONSE_FALSE; + } + + @Nullable + public static Sort toSort(@Nullable LLSort sort) { + if (sort == null) { + return null; + } + if (sort.getType() == LLSortType.LONG) { + return new Sort(new SortedNumericSortField(sort.getFieldName(), SortField.Type.LONG, sort.isReverse())); + } else if (sort.getType() == LLSortType.RANDOM) { + return new Sort(new RandomSortField()); + } + return null; + } + + public static Term toTerm(LLTerm term) { + return new Term(term.getKey(), term.getValue()); + } + + public static Document toDocument(LLDocument document) { + Document d = new Document(); + for (LLItem item : document.getItems()) { + d.add(LLUtils.toField(item)); + } + return d; + } + + public static Iterable toDocuments(Iterable document) { + List d = new LinkedList<>(); + for (LLDocument doc : document) { + d.add(LLUtils.toDocument(doc)); + } + return d; + } + + public static Iterable toTerms(Iterable terms) { + List d = new LinkedList<>(); + for (LLTerm term : terms) { + d.add(LLUtils.toTerm(term)); + } + return d; + } + + private static IndexableField toField(LLItem item) { + switch (item.getType()) { + case IntPoint: + return new IntPoint(item.getName(), Ints.fromByteArray(item.getData())); + case LongPoint: + return new LongPoint(item.getName(), Longs.fromByteArray(item.getData())); + case FloatPoint: + return new FloatPoint(item.getName(), ByteBuffer.wrap(item.getData()).getFloat()); + case TextField: + return new TextField(item.getName(), item.stringValue(), Field.Store.NO); + case TextFieldStored: + return new TextField(item.getName(), item.stringValue(), Field.Store.YES); + case SortedNumericDocValuesField: + return new SortedNumericDocValuesField(item.getName(), Longs.fromByteArray(item.getData())); + case StringField: + return new StringField(item.getName(), item.stringValue(), Field.Store.NO); + case StringFieldStored: + return new StringField(item.getName(), item.stringValue(), Field.Store.YES); + } + throw new UnsupportedOperationException("Unsupported field type"); + } + + public static Iterable toGrpc(LLItem[] items) { + var list = new ArrayList(items.length); + for (LLItem item : items) { + list.add(LLUtils.toGrpc(item)); + } + return list; + } + + public static it.cavallium.dbengine.proto.LLItem toGrpc(LLItem item) { + var builder = it.cavallium.dbengine.proto.LLItem.newBuilder() + .setType(LLType.valueOf(item.getType().toString())) + .setName(item.getName()) + .setData1(ByteString.copyFrom(item.getData())); + if (item.getData2() != null) { + builder.setData2(ByteString.copyFrom(item.getData2())); + } + return builder.build(); + } + + public static it.cavallium.dbengine.proto.LLDocument toGrpc(LLDocument doc) { + var builder = it.cavallium.dbengine.proto.LLDocument.newBuilder() + .addAllItems(toGrpc(doc.getItems())); + return builder.build(); + } + + public static Iterable toGrpc( + Iterable document) { + LinkedList docs = new LinkedList<>(); + document.forEach((doc) -> docs.add(toGrpc(doc))); + return docs; + } + + public static Iterable toGrpcKey(Iterable term) { + LinkedList terms = new LinkedList<>(); + term.forEach((t) -> terms.add(toGrpc(t))); + return terms; + } + + public static it.cavallium.dbengine.proto.LLTerm toGrpc(LLTerm term) { + return it.cavallium.dbengine.proto.LLTerm.newBuilder() + .setKey(term.getKey()) + .setValue(term.getValue()) + .build(); + } + + public static it.cavallium.dbengine.proto.LLSort toGrpc(LLSort sort) { + return it.cavallium.dbengine.proto.LLSort.newBuilder() + .setFieldName(sort.getFieldName()) + .setType(it.cavallium.dbengine.proto.LLSortType.valueOf(sort.getType().toString())) + .setReverse(sort.isReverse()) + .build(); + } + + public static it.cavallium.dbengine.database.LLKeyScore toKeyScore(LLKeyScore hit) { + return new it.cavallium.dbengine.database.LLKeyScore(hit.getKey(), hit.getScore()); + } + + public static LLDocument toLocal(List documentItemsList) { + return new LLDocument(documentItemsList.stream().map(LLUtils::toLocal).toArray(LLItem[]::new)); + } + + public static LLDocument toLocal(it.cavallium.dbengine.proto.LLDocument document) { + return toLocal(document.getItemsList()); + } + + public static List toLocalDocuments( + List documentItemsList) { + return documentItemsList.stream().map(LLUtils::toLocal).collect(Collectors.toList()); + } + + public static List toLocalTerms(List termItemsList) { + return termItemsList.stream().map(LLUtils::toLocal).collect(Collectors.toList()); + } + + private static LLItem toLocal(it.cavallium.dbengine.proto.LLItem item) { + var data2 = item.getData2() != null ? item.getData2().toByteArray() : null; + return new LLItem(it.cavallium.dbengine.database.LLType.valueOf(item.getType().toString()), + item.getName(), item.getData1().toByteArray(), data2); + } + + public static LLTerm toLocal(it.cavallium.dbengine.proto.LLTerm key) { + return new LLTerm(key.getKey(), key.getValue()); + } + + public static LLSort toLocal(it.cavallium.dbengine.proto.LLSort sort) { + return new LLSort(sort.getFieldName(), LLSortType.valueOf(sort.getType().toString()), + sort.getReverse()); + } + + public static LLKeyScore toGrpc(it.cavallium.dbengine.database.LLKeyScore hit) { + return LLKeyScore.newBuilder() + .setKey(hit.getKey()) + .setScore(hit.getScore()) + .build(); + } +} diff --git a/src/main/java/it/cavallium/dbengine/database/LuceneUtils.java b/src/main/java/it/cavallium/dbengine/database/LuceneUtils.java new file mode 100644 index 0000000..4929b35 --- /dev/null +++ b/src/main/java/it/cavallium/dbengine/database/LuceneUtils.java @@ -0,0 +1,60 @@ +package it.cavallium.dbengine.database; + +import it.cavallium.dbengine.database.analyzer.N4CharGramAnalyzer; +import it.cavallium.dbengine.database.analyzer.N4CharGramEdgeAnalyzer; +import it.cavallium.dbengine.database.analyzer.TextFieldsAnalyzer; +import it.cavallium.dbengine.database.analyzer.WordAnalyzer; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.LowerCaseFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.en.EnglishPossessiveFilter; +import org.apache.lucene.analysis.en.KStemFilter; +import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter; + +public class LuceneUtils { + private static final Analyzer lucene4CharGramAnalyzerEdgeInstance = new N4CharGramEdgeAnalyzer(); + private static final Analyzer lucene4CharGramAnalyzerInstance = new N4CharGramAnalyzer(); + private static final Analyzer luceneWordAnalyzerStopWordsAndStemInstance = new WordAnalyzer(true, true); + private static final Analyzer luceneWordAnalyzerStopWordsInstance = new WordAnalyzer(true, false); + private static final Analyzer luceneWordAnalyzerStemInstance = new WordAnalyzer(false, true); + private static final Analyzer luceneWordAnalyzerSimpleInstance = new WordAnalyzer(false, false); + + public static Analyzer getAnalyzer(TextFieldsAnalyzer analyzer) { + switch (analyzer) { + case PartialWordsEdge: + return lucene4CharGramAnalyzerEdgeInstance; + case PartialWords: + return lucene4CharGramAnalyzerInstance; + case FullText: + return luceneWordAnalyzerStopWordsAndStemInstance; + case WordWithStopwordsStripping: + return luceneWordAnalyzerStopWordsInstance; + case WordWithStemming: + return luceneWordAnalyzerStemInstance; + case WordSimple: + return luceneWordAnalyzerSimpleInstance; + default: + throw new UnsupportedOperationException("Unknown analyzer: " + analyzer); + } + } + + /** + * + * @param stem Enable stem filters on words. + * Pass false if it will be used with a n-gram filter + */ + public static TokenStream newCommonFilter(TokenStream tokenStream, boolean stem) { + tokenStream = newCommonNormalizer(tokenStream); + if (stem) { + tokenStream = new KStemFilter(tokenStream); + tokenStream = new EnglishPossessiveFilter(tokenStream); + } + return tokenStream; + } + + public static TokenStream newCommonNormalizer(TokenStream tokenStream) { + tokenStream = new ASCIIFoldingFilter(tokenStream); + tokenStream = new LowerCaseFilter(tokenStream); + return tokenStream; + } +} diff --git a/src/main/java/it/cavallium/dbengine/database/analyzer/N4CharGramAnalyzer.java b/src/main/java/it/cavallium/dbengine/database/analyzer/N4CharGramAnalyzer.java new file mode 100644 index 0000000..017a308 --- /dev/null +++ b/src/main/java/it/cavallium/dbengine/database/analyzer/N4CharGramAnalyzer.java @@ -0,0 +1,32 @@ +package it.cavallium.dbengine.database.analyzer; + +import it.cavallium.dbengine.database.LuceneUtils; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.core.KeywordTokenizer; +import org.apache.lucene.analysis.ngram.NGramTokenFilter; + +public class N4CharGramAnalyzer extends Analyzer { + + public N4CharGramAnalyzer() { + + } + + @Override + protected TokenStreamComponents createComponents(final String fieldName) { + Tokenizer tokenizer = new KeywordTokenizer(); + TokenStream tokenStream = tokenizer; + tokenStream = LuceneUtils.newCommonFilter(tokenStream, false); + tokenStream = new NGramTokenFilter(tokenStream, 4, 4, false); + + return new TokenStreamComponents(tokenizer, tokenStream); + } + + @Override + protected TokenStream normalize(String fieldName, TokenStream in) { + TokenStream tokenStream = in; + tokenStream = LuceneUtils.newCommonNormalizer(tokenStream); + return tokenStream; + } +} diff --git a/src/main/java/it/cavallium/dbengine/database/analyzer/N4CharGramEdgeAnalyzer.java b/src/main/java/it/cavallium/dbengine/database/analyzer/N4CharGramEdgeAnalyzer.java new file mode 100644 index 0000000..2e60676 --- /dev/null +++ b/src/main/java/it/cavallium/dbengine/database/analyzer/N4CharGramEdgeAnalyzer.java @@ -0,0 +1,32 @@ +package it.cavallium.dbengine.database.analyzer; + +import it.cavallium.dbengine.database.LuceneUtils; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.core.KeywordTokenizer; +import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter; + +public class N4CharGramEdgeAnalyzer extends Analyzer { + + public N4CharGramEdgeAnalyzer() { + + } + + @Override + protected TokenStreamComponents createComponents(final String fieldName) { + Tokenizer tokenizer = new KeywordTokenizer(); + TokenStream tokenStream = tokenizer; + tokenStream = LuceneUtils.newCommonFilter(tokenStream, false); + tokenStream = new EdgeNGramTokenFilter(tokenStream, 4, 4, false); + + return new TokenStreamComponents(tokenizer, tokenStream); + } + + @Override + protected TokenStream normalize(String fieldName, TokenStream in) { + TokenStream tokenStream = in; + tokenStream = LuceneUtils.newCommonNormalizer(tokenStream); + return tokenStream; + } +} diff --git a/src/main/java/it/cavallium/dbengine/database/analyzer/TextFieldsAnalyzer.java b/src/main/java/it/cavallium/dbengine/database/analyzer/TextFieldsAnalyzer.java new file mode 100644 index 0000000..ee24de9 --- /dev/null +++ b/src/main/java/it/cavallium/dbengine/database/analyzer/TextFieldsAnalyzer.java @@ -0,0 +1,10 @@ +package it.cavallium.dbengine.database.analyzer; + +public enum TextFieldsAnalyzer { + PartialWordsEdge, + PartialWords, + WordSimple, + WordWithStopwordsStripping, + WordWithStemming, + FullText, +} diff --git a/src/main/java/it/cavallium/dbengine/database/analyzer/WordAnalyzer.java b/src/main/java/it/cavallium/dbengine/database/analyzer/WordAnalyzer.java new file mode 100644 index 0000000..aaa9af0 --- /dev/null +++ b/src/main/java/it/cavallium/dbengine/database/analyzer/WordAnalyzer.java @@ -0,0 +1,39 @@ +package it.cavallium.dbengine.database.analyzer; + +import it.cavallium.dbengine.database.EnglishItalianStopFilter; +import it.cavallium.dbengine.database.LuceneUtils; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.standard.StandardTokenizer; + +public class WordAnalyzer extends Analyzer { + + private final boolean removeStopWords; + private final boolean stem; + + public WordAnalyzer(boolean removeStopWords, boolean stem) { + this.removeStopWords = removeStopWords; + this.stem = stem; + } + + @Override + protected TokenStreamComponents createComponents(final String fieldName) { + Tokenizer tokenizer = new StandardTokenizer(); + TokenStream tokenStream = tokenizer; + //tokenStream = new LengthFilter(tokenStream, 1, 100); + if (removeStopWords) { + tokenStream = new EnglishItalianStopFilter(tokenStream); + } + tokenStream = LuceneUtils.newCommonFilter(tokenStream, stem); + + return new TokenStreamComponents(tokenizer, tokenStream); + } + + @Override + protected TokenStream normalize(String fieldName, TokenStream in) { + TokenStream tokenStream = in; + tokenStream = LuceneUtils.newCommonNormalizer(tokenStream); + return tokenStream; + } +} diff --git a/src/main/java/it/cavallium/dbengine/database/disk/CappedWriteBatch.java b/src/main/java/it/cavallium/dbengine/database/disk/CappedWriteBatch.java new file mode 100644 index 0000000..bebc6ae --- /dev/null +++ b/src/main/java/it/cavallium/dbengine/database/disk/CappedWriteBatch.java @@ -0,0 +1,186 @@ +package it.cavallium.dbengine.database.disk; + +import java.nio.ByteBuffer; +import org.rocksdb.ColumnFamilyHandle; +import org.rocksdb.RocksDB; +import org.rocksdb.RocksDBException; +import org.rocksdb.WriteBatch; +import org.rocksdb.WriteBatchInterface; +import org.rocksdb.WriteOptions; +import org.warp.commonutils.concurrency.atomicity.NotAtomic; + +@NotAtomic +public class CappedWriteBatch implements WriteBatchInterface, AutoCloseable { + + private final RocksDB db; + private final int cap; + private final WriteOptions writeOptions; + + private final WriteBatch writeBatch; + + /** + * + * @param cap The limit of operations + */ + public CappedWriteBatch(RocksDB db, int cap, int reservedWriteBatchSize, long maxWriteBatchSize, WriteOptions writeOptions) { + this.db = db; + this.cap = cap; + this.writeOptions = writeOptions; + this.writeBatch = new WriteBatch(reservedWriteBatchSize); + this.writeBatch.setMaxBytes(maxWriteBatchSize); + } + + private void flushIfNeeded(boolean force) throws RocksDBException { + if (this.writeBatch.count() >= (force ? 1 : cap)) { + db.write(writeOptions, this.writeBatch); + this.writeBatch.clear(); + } + } + + @Override + public int count() { + return writeBatch.count(); + } + + @Override + public void put(byte[] key, byte[] value) throws RocksDBException { + writeBatch.put(key, value); + flushIfNeeded(false); + } + + @Override + public void put(ColumnFamilyHandle columnFamilyHandle, byte[] key, byte[] value) throws RocksDBException { + writeBatch.put(columnFamilyHandle, key, value); + flushIfNeeded(false); + } + + @Override + public void put(ByteBuffer key, ByteBuffer value) throws RocksDBException { + writeBatch.put(key, value); + flushIfNeeded(false); + } + + @Override + public void put(ColumnFamilyHandle columnFamilyHandle, ByteBuffer key, ByteBuffer value) throws RocksDBException { + writeBatch.put(columnFamilyHandle, key, value); + flushIfNeeded(false); + } + + @Override + public void merge(byte[] key, byte[] value) throws RocksDBException { + writeBatch.merge(key, value); + flushIfNeeded(false); + } + + @Override + public void merge(ColumnFamilyHandle columnFamilyHandle, byte[] key, byte[] value) throws RocksDBException { + writeBatch.merge(columnFamilyHandle, key, value); + flushIfNeeded(false); + } + + @Override + public void remove(byte[] key) throws RocksDBException { + writeBatch.remove(key); + flushIfNeeded(false); + } + + @Override + public void remove(ColumnFamilyHandle columnFamilyHandle, byte[] key) throws RocksDBException { + writeBatch.remove(columnFamilyHandle, key); + flushIfNeeded(false); + } + + @Override + public void delete(byte[] key) throws RocksDBException { + writeBatch.delete(key); + flushIfNeeded(false); + } + + @Override + public void delete(ColumnFamilyHandle columnFamilyHandle, byte[] key) throws RocksDBException { + writeBatch.delete(columnFamilyHandle, key); + flushIfNeeded(false); + } + + @Override + public void singleDelete(byte[] key) throws RocksDBException { + writeBatch.singleDelete(key); + flushIfNeeded(false); + } + + @Override + public void singleDelete(ColumnFamilyHandle columnFamilyHandle, byte[] key) throws RocksDBException { + writeBatch.singleDelete(columnFamilyHandle, key); + flushIfNeeded(false); + } + + @Override + public void remove(ByteBuffer key) throws RocksDBException { + writeBatch.remove(key); + flushIfNeeded(false); + } + + @Override + public void remove(ColumnFamilyHandle columnFamilyHandle, ByteBuffer key) throws RocksDBException { + writeBatch.remove(columnFamilyHandle, key); + flushIfNeeded(false); + } + + @Override + public void deleteRange(byte[] beginKey, byte[] endKey) throws RocksDBException { + writeBatch.deleteRange(beginKey, endKey); + flushIfNeeded(false); + } + + @Override + public void deleteRange(ColumnFamilyHandle columnFamilyHandle, byte[] beginKey, byte[] endKey) + throws RocksDBException { + writeBatch.deleteRange(columnFamilyHandle, beginKey, endKey); + flushIfNeeded(false); + } + + @Override + public void putLogData(byte[] blob) throws RocksDBException { + writeBatch.putLogData(blob); + flushIfNeeded(false); + } + + @Override + public void clear() { + writeBatch.clear(); + } + + @Override + public void setSavePoint() { + writeBatch.setSavePoint(); + } + + @Override + public void rollbackToSavePoint() throws RocksDBException { + writeBatch.rollbackToSavePoint(); + } + + @Override + public void popSavePoint() throws RocksDBException { + writeBatch.popSavePoint(); + } + + @Override + public void setMaxBytes(long maxBytes) { + writeBatch.setMaxBytes(maxBytes); + } + + @Override + public WriteBatch getWriteBatch() { + return writeBatch; + } + + public void writeToDbAndClose() throws RocksDBException { + flushIfNeeded(true); + } + + @Override + public void close() { + writeBatch.close(); + } +} diff --git a/src/main/java/it/cavallium/dbengine/database/disk/LLLocalDatabaseConnection.java b/src/main/java/it/cavallium/dbengine/database/disk/LLLocalDatabaseConnection.java new file mode 100644 index 0000000..f2d42a4 --- /dev/null +++ b/src/main/java/it/cavallium/dbengine/database/disk/LLLocalDatabaseConnection.java @@ -0,0 +1,78 @@ +package it.cavallium.dbengine.database.disk; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.time.Duration; +import java.util.LinkedList; +import java.util.List; +import it.cavallium.dbengine.database.Column; +import it.cavallium.dbengine.database.LLDatabaseConnection; +import it.cavallium.dbengine.database.LLLuceneIndex; +import it.cavallium.dbengine.database.analyzer.TextFieldsAnalyzer; + +public class LLLocalDatabaseConnection implements LLDatabaseConnection { + + private final Path basePath; + private final boolean crashIfWalError; + + public LLLocalDatabaseConnection(Path basePath, boolean crashIfWalError) { + this.basePath = basePath; + this.crashIfWalError = crashIfWalError; + } + + @Override + public void connect() throws IOException { + if (Files.notExists(basePath)) { + Files.createDirectories(basePath); + } + } + + @Override + public LLLocalKeyValueDatabase getDatabase(String name, List columns, boolean lowMemory) throws IOException { + return new LLLocalKeyValueDatabase(name, basePath.resolve("database_" + name), columns, new LinkedList<>(), + crashIfWalError, lowMemory); + } + + @Override + public LLLuceneIndex getLuceneIndex(String name, + int instancesCount, + TextFieldsAnalyzer textFieldsAnalyzer, + Duration queryRefreshDebounceTime, + Duration commitDebounceTime, + boolean lowMemory) throws IOException { + if (instancesCount != 1) { + return new LLLocalMultiLuceneIndex(basePath.resolve("lucene"), + name, + instancesCount, + textFieldsAnalyzer, + queryRefreshDebounceTime, + commitDebounceTime, + lowMemory + ); + } else { + return new LLLocalLuceneIndex(basePath.resolve("lucene"), + name, + textFieldsAnalyzer, + queryRefreshDebounceTime, + commitDebounceTime, + lowMemory + ); + } + } + + @Override + public void disconnect() throws IOException { + + } + + @Override + public void ping() { + + } + + @Override + public double getMediumLatencyMillis() { + return 0; + } +} diff --git a/src/main/java/it/cavallium/dbengine/database/disk/LLLocalDeepDictionary.java b/src/main/java/it/cavallium/dbengine/database/disk/LLLocalDeepDictionary.java new file mode 100644 index 0000000..9280822 --- /dev/null +++ b/src/main/java/it/cavallium/dbengine/database/disk/LLLocalDeepDictionary.java @@ -0,0 +1,865 @@ +package it.cavallium.dbengine.database.disk; + +import it.unimi.dsi.fastutil.objects.ObjectArrayList; +import it.unimi.dsi.fastutil.objects.ObjectOpenHashSet; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map.Entry; +import java.util.Objects; +import java.util.Optional; +import java.util.function.BiConsumer; +import java.util.function.BiFunction; +import java.util.function.Consumer; +import java.util.function.Function; +import org.apache.commons.lang3.tuple.ImmutableTriple; +import org.jetbrains.annotations.NotNull; +import org.jetbrains.annotations.Nullable; +import org.rocksdb.ColumnFamilyHandle; +import org.rocksdb.FlushOptions; +import org.rocksdb.Holder; +import org.rocksdb.ReadOptions; +import org.rocksdb.RocksDB; +import org.rocksdb.RocksDBException; +import org.rocksdb.RocksIterator; +import org.rocksdb.Snapshot; +import org.rocksdb.WriteBatchInterface; +import org.warp.commonutils.concurrency.atomicity.NotAtomic; +import org.warp.commonutils.error.IndexOutOfBoundsException; +import org.warp.commonutils.functional.TriConsumer; +import org.warp.commonutils.functional.TriFunction; +import org.warp.commonutils.type.Bytes; +import org.warp.commonutils.type.UnmodifiableIterableMap; +import org.warp.commonutils.type.UnmodifiableMap; +import it.cavallium.dbengine.database.LLDeepDictionary; +import it.cavallium.dbengine.database.LLDictionaryResultType; +import it.cavallium.dbengine.database.LLSnapshot; +import it.cavallium.dbengine.database.LLUtils; + +@NotAtomic +public class LLLocalDeepDictionary implements LLDeepDictionary { + + private static final byte[] NO_DATA = new byte[0]; + private static final byte[][] NO_DATA_MAP = new byte[0][0]; + private static final ReadOptions EMPTY_READ_OPTIONS = new ReadOptions(); + private final RocksDB db; + private final ColumnFamilyHandle cfh; + private final String databaseName; + private final Function snapshotResolver; + private final int key1Size; + private final int key2Size; + private final int key1Position; + private final int key2Position; + private final int key1EndPosition; + private final int key2EndPosition; + private final int combinedKeySize; + + public LLLocalDeepDictionary(@NotNull RocksDB db, @NotNull ColumnFamilyHandle columnFamilyHandle, + String databaseName, + Function snapshotResolver, int keySize, int key2Size) { + Objects.requireNonNull(db); + this.db = db; + Objects.requireNonNull(columnFamilyHandle); + this.cfh = columnFamilyHandle; + this.databaseName = databaseName; + this.snapshotResolver = snapshotResolver; + this.key1Size = keySize; + this.key2Size = key2Size; + this.key1Position = 0; + this.key2Position = key1Size; + this.key1EndPosition = key1Position + key1Size; + this.key2EndPosition = key2Position + key2Size; + this.combinedKeySize = keySize + key2Size; + } + + @Override + public String getDatabaseName() { + return databaseName; + } + + @SuppressWarnings("BooleanMethodIsAlwaysInverted") + private boolean isSubKey(byte[] key1, byte[] combinedKey) { + if (key1 == null || combinedKey == null || key1.length != key1Size || combinedKey.length != combinedKeySize) { + return false; + } + + return Arrays.equals(key1, 0, key1Size, combinedKey, key1Position, key1EndPosition); + } + + private byte[] getStartSeekKey(byte[] key1) { + if (key1.length != key1Size) { + throw new IndexOutOfBoundsException(key1.length, key1Size, key1Size); + } + return Arrays.copyOf(key1, combinedKeySize); + } + + private byte[] getEndSeekKey(byte[] key1) { + if (key1.length != key1Size) { + throw new IndexOutOfBoundsException(key1.length, key1Size, key1Size); + } + byte[] endSeekKey = Arrays.copyOf(key1, combinedKeySize); + Arrays.fill(endSeekKey, key2Position, key2EndPosition, (byte) 0xFF); + return endSeekKey; + } + + @NotNull + private byte[] getKey1(@NotNull byte[] combinedKey) { + if (combinedKey.length != combinedKeySize) { + throw new IndexOutOfBoundsException(combinedKey.length, combinedKeySize, combinedKeySize); + } + return Arrays.copyOfRange(combinedKey, key1Position, key1EndPosition); + } + + @NotNull + private byte[] getKey2(@NotNull byte[] combinedKey) { + return Arrays.copyOfRange(combinedKey, key2Position, key2EndPosition); + } + + @NotNull + private byte[] getCombinedKey(@NotNull byte[] key1, @NotNull byte[] key2) { + if (key1.length != key1Size) { + throw new IndexOutOfBoundsException(key1.length, key1Size, key1Size); + } + if (key2.length != key2Size) { + throw new IndexOutOfBoundsException(key2.length, key2Size, key2Size); + } + var combinedKey = new byte[combinedKeySize]; + System.arraycopy(key1, 0, combinedKey, key1Position, key1Size); + System.arraycopy(key2, 0, combinedKey, key2Position, key2Size); + return combinedKey; + } + + private ReadOptions resolveSnapshot(LLSnapshot snapshot) { + if (snapshot != null) { + return new ReadOptions().setSnapshot(snapshotResolver.apply(snapshot)); + } else { + return EMPTY_READ_OPTIONS; + } + } + + @Override + public UnmodifiableIterableMap get(@Nullable LLSnapshot snapshot, byte[] key) throws IOException { + if (key.length != key1Size) { + throw new IndexOutOfBoundsException(key.length, key1Size, key1Size); + } + ObjectArrayList keys = new ObjectArrayList<>(); + ObjectArrayList values = new ObjectArrayList<>(); + try (var iterator = db.newIterator(cfh, resolveSnapshot(snapshot))) { + iterator.seek(key); + while (iterator.isValid()) { + + byte[] combinedKey = iterator.key(); + + if (!isSubKey(key, combinedKey)) { + break; + } + + byte[] key2 = getKey2(combinedKey); + byte[] value = iterator.value(); + keys.add(key2); + values.add(value); + + iterator.next(); + } + } + + return UnmodifiableIterableMap.of(keys.toArray(byte[][]::new), values.toArray(byte[][]::new)); + } + + @Override + public Optional get(@Nullable LLSnapshot snapshot, byte[] key1, byte[] key2) throws IOException { + if (key1.length != key1Size) { + throw new IndexOutOfBoundsException(key1.length, key1Size, key1Size); + } + if (key2.length != key2Size) { + throw new IndexOutOfBoundsException(key2.length, key2Size, key2Size); + } + try { + Holder data = new Holder<>(); + byte[] combinedKey = getCombinedKey(key1, key2); + if (db.keyMayExist(cfh, resolveSnapshot(snapshot), combinedKey, data)) { + if (data.getValue() != null) { + return Optional.of(data.getValue()); + } else { + byte[] value = db.get(cfh, resolveSnapshot(snapshot), combinedKey); + return Optional.ofNullable(value); + } + } else { + return Optional.empty(); + } + } catch (RocksDBException e) { + throw new IOException(e); + } + } + + @Override + public boolean isEmpty(@Nullable LLSnapshot snapshot, byte[] key1) { + if (key1.length != key1Size) { + throw new IndexOutOfBoundsException(key1.length, key1Size, key1Size); + } + byte[] startSeekKey = getStartSeekKey(key1); + try (var iterator = db.newIterator(cfh, resolveSnapshot(snapshot))) { + iterator.seek(startSeekKey); + if (!iterator.isValid()) { + return true; + } + byte[] startKey = iterator.key(); + return !isSubKey(key1, startKey); + } + } + + @Override + public boolean contains(@Nullable LLSnapshot snapshot, byte[] key1, byte[] key2) throws IOException { + if (key1.length != key1Size) { + throw new IndexOutOfBoundsException(key1.length, key1Size, key1Size); + } + if (key2.length != key2Size) { + throw new IndexOutOfBoundsException(key2.length, key2Size, key2Size); + } + try { + var combinedKey = getCombinedKey(key1, key2); + int size = RocksDB.NOT_FOUND; + Holder data = new Holder<>(); + if (db.keyMayExist(cfh, resolveSnapshot(snapshot), combinedKey, data)) { + if (data.getValue() != null) { + size = data.getValue().length; + } else { + size = db.get(cfh, resolveSnapshot(snapshot), combinedKey, NO_DATA); + } + } + return size != RocksDB.NOT_FOUND; + } catch (RocksDBException e) { + throw new IOException(e); + } + } + + //todo: use WriteBatch to enhance performance + @Override + public void put(byte[] key1, UnmodifiableIterableMap value) throws IOException { + if (key1.length != key1Size) { + throw new IndexOutOfBoundsException(key1.length, key1Size, key1Size); + } + try { + var bytesValue = Bytes.ofMap(value); + var alreadyEditedKeys = new ObjectOpenHashSet(); + + // Delete old keys and change keys that are already present + try (var iterator = db.newIterator(cfh)) { + iterator.seek(getStartSeekKey(key1)); + while (iterator.isValid()) { + byte[] combinedKey = iterator.key(); + + if (!isSubKey(key1, combinedKey)) { + // The key is outside of key1: exit from the iteration + break; + } + + byte[] key2 = getKey2(combinedKey); + var valueToSetHere = bytesValue.get(key2); + if (valueToSetHere == null) { + // key not present in the new data: remove it from the database + db.delete(cfh, combinedKey); + } else { + // key present in the new data: replace it on the database + alreadyEditedKeys.add(new Bytes(key2)); + db.put(cfh, combinedKey, valueToSetHere.data); + } + + iterator.next(); + } + } + + // Add new keys, avoiding to add already changed keys + var mapIterator = bytesValue.fastIterator(); + while (mapIterator.hasNext()) { + var mapEntry = mapIterator.next(); + var key2 = mapEntry.getKey(); + if (key2.data.length != key2Size) { + throw new IndexOutOfBoundsException(key2.data.length, key2Size, key2Size); + } + + if (!alreadyEditedKeys.contains(key2)) { + var value2 = mapEntry.getValue(); + db.put(cfh, getCombinedKey(key1, key2.data), value2.data); + } + } + } catch (RocksDBException ex) { + throw new IOException(ex); + } + } + + //todo: use WriteBatch to enhance performance + @Override + public void putMulti(byte[][] keys1, UnmodifiableIterableMap[] values) throws IOException { + if (keys1.length == values.length) { + for (int i = 0; i < keys1.length; i++) { + put(keys1[i], values[i]); + } + } else { + throw new IOException("Wrong parameters count"); + } + } + + @Override + public Optional put(byte[] key1, byte[] key2, byte[] value, LLDictionaryResultType resultType) + throws IOException { + if (key1.length != key1Size) { + throw new IndexOutOfBoundsException(key1.length, key1Size, key1Size); + } + if (key2.length != key2Size) { + throw new IndexOutOfBoundsException(key2.length, key2Size, key2Size); + } + try { + byte[] response = null; + var combinedKey = getCombinedKey(key1, key2); + switch (resultType) { + case VALUE_CHANGED: + response = LLUtils.booleanToResponse(!this.contains(null, key1, key2)); + break; + case PREVIOUS_VALUE: + var data = new Holder(); + if (db.keyMayExist(cfh, combinedKey, data)) { + if (data.getValue() != null) { + response = data.getValue(); + } else { + response = db.get(cfh, combinedKey); + } + } else { + response = null; + } + break; + } + db.put(cfh, combinedKey, value); + return Optional.ofNullable(response); + } catch (RocksDBException e) { + throw new IOException(e); + } + } + + //todo: use WriteBatch to enhance performance + @Override + public void putMulti(byte[] key1, + byte[][] keys2, + byte[][] values2, + LLDictionaryResultType resultType, + Consumer responses) throws IOException { + if (keys2.length == values2.length) { + for (int i = 0; i < keys2.length; i++) { + var result = put(key1, keys2[i], values2[i], resultType); + if (resultType != LLDictionaryResultType.VOID) { + responses.accept(result.orElse(NO_DATA)); + } + } + } else { + throw new IOException("Wrong parameters count"); + } + } + + //todo: use WriteBatch to enhance performance + @Override + public void putMulti(byte[][] keys1, + byte[][] keys2, + byte[][] values2, + LLDictionaryResultType resultType, + Consumer responses) throws IOException { + if (keys1.length == keys2.length && keys2.length == values2.length) { + for (int i = 0; i < keys1.length; i++) { + var result = put(keys1[i], keys2[i], values2[i], resultType); + if (resultType != LLDictionaryResultType.VOID) { + responses.accept(result.orElse(NO_DATA)); + } + } + } else { + throw new IOException("Wrong parameters count"); + } + } + + @Override + public Optional remove(byte[] key1, byte[] key2, LLDictionaryResultType resultType) throws IOException { + if (key1.length != key1Size) { + throw new IndexOutOfBoundsException(key1.length, key1Size, key1Size); + } + if (key2.length != key2Size) { + throw new IndexOutOfBoundsException(key2.length, key2Size, key2Size); + } + try { + byte[] response = null; + var combinedKey = getCombinedKey(key1, key2); + switch (resultType) { + case VALUE_CHANGED: + response = LLUtils.booleanToResponse(this.contains(null, key1, key2)); + break; + case PREVIOUS_VALUE: + var data = new Holder(); + if (db.keyMayExist(cfh, combinedKey, data)) { + if (data.getValue() != null) { + response = data.getValue(); + } else { + response = db.get(cfh, combinedKey); + } + } else { + response = null; + } + break; + } + db.delete(cfh, combinedKey); + return Optional.ofNullable(response); + } catch (RocksDBException e) { + throw new IOException(e); + } + } + + @Override + public void forEach(@Nullable LLSnapshot snapshot, int parallelism, TriConsumer consumer) { + forEach_(consumer, snapshot == null ? null : snapshotResolver.apply(snapshot), parallelism); + } + + //todo: implement parallel execution + private void forEach_(TriConsumer consumer, @Nullable Snapshot snapshot, int parallelism) { + try (RocksIterator iterator = (snapshot != null ? db.newIterator(cfh, new ReadOptions().setSnapshot(snapshot)) + : db.newIterator(cfh))) { + iterator.seekToFirst(); + while (iterator.isValid()) { + var combinedKey = iterator.key(); + var key1 = getKey1(combinedKey); + var key2 = getKey2(combinedKey); + + consumer.accept(key1, key2, iterator.value()); + + iterator.next(); + } + } + } + + @Override + public void forEach(@Nullable LLSnapshot snapshot, int parallelism, BiConsumer> consumer) { + forEach_(consumer, snapshot == null ? null : snapshotResolver.apply(snapshot), parallelism); + } + + //todo: implement parallel execution + private void forEach_(BiConsumer> consumer, @Nullable Snapshot snapshot, int parallelism) { + try (RocksIterator iterator = (snapshot != null ? db.newIterator(cfh, new ReadOptions().setSnapshot(snapshot)) + : db.newIterator(cfh))) { + iterator.seekToFirst(); + byte[] currentKey1 = null; + // only append or iterate on this object! byte[].equals() and hash is not trustworthy! + List key2Keys = null; + // only append or iterate on this object! byte[].equals() and hash is not trustworthy! + List key2Values = null; + while (iterator.isValid()) { + var combinedKey = iterator.key(); + var key1 = getKey1(combinedKey); + + if (currentKey1 == null || !Arrays.equals(currentKey1, key1)) { + if (currentKey1 != null && !key2Values.isEmpty()) { + consumer.accept(currentKey1, UnmodifiableIterableMap.of(key2Keys.toArray(byte[][]::new), key2Values.toArray(byte[][]::new))); + } + currentKey1 = key1; + key2Keys = new ArrayList<>(); + key2Values = new ArrayList<>(); + } + + key2Keys.add(getKey2(combinedKey)); + key2Values.add(iterator.value()); + + iterator.next(); + } + if (currentKey1 != null && !key2Values.isEmpty()) { + consumer.accept(currentKey1, UnmodifiableIterableMap.of(key2Keys.toArray(byte[][]::new), key2Values.toArray(byte[][]::new))); + } + } + } + + @Override + public void forEach(@Nullable LLSnapshot snapshot, int parallelism, byte[] key, BiConsumer consumer) { + forEach_(key, consumer, snapshot == null ? null : snapshotResolver.apply(snapshot), parallelism); + } + + //todo: implement parallel execution + private void forEach_(byte[] key1, BiConsumer consumer, @Nullable Snapshot snapshot, int parallelism) { + try (RocksIterator iterator = (snapshot != null ? db.newIterator(cfh, new ReadOptions().setSnapshot(snapshot)) + : db.newIterator(cfh))) { + iterator.seek(getStartSeekKey(key1)); + while (iterator.isValid()) { + byte[] combinedKey = iterator.key(); + + if (!isSubKey(key1, combinedKey)) { + // The key is outside of key1: exit from the iteration + break; + } + + byte[] key2 = getKey2(combinedKey); + byte[] value2 = iterator.value(); + consumer.accept(key2, value2); + + iterator.next(); + } + } + } + + //todo: implement parallel execution + //todo: implement replaceKeys = false optimization (like in LLLocalDictionary), check if it's feasible + @Override + public void replaceAll(int parallelism, boolean replaceKeys, TriFunction> consumer) throws IOException { + var snapshot = db.getSnapshot(); + try { + try (RocksIterator iter = db.newIterator(cfh, new ReadOptions().setSnapshot(snapshot)); + CappedWriteBatch writeBatch = new CappedWriteBatch(db, LLLocalDictionary.CAPPED_WRITE_BATCH_CAP, LLLocalDictionary.RESERVED_WRITE_BATCH_SIZE, LLLocalDictionary.MAX_WRITE_BATCH_SIZE, LLLocalDictionary.BATCH_WRITE_OPTIONS)) { + + iter.seekToFirst(); + + while (iter.isValid()) { + + writeBatch.delete(cfh, iter.key()); + + iter.next(); + } + + iter.seekToFirst(); + + while (iter.isValid()) { + var combinedKey = iter.key(); + var key1 = getKey1(combinedKey); + var key2 = getKey2(combinedKey); + + var result = consumer.apply(key1, key2, iter.value()); + if (result.getLeft().length != key1Size) { + throw new IndexOutOfBoundsException(result.getLeft().length, key1Size, key1Size); + } + if (result.getMiddle().length != key2Size) { + throw new IndexOutOfBoundsException(result.getMiddle().length, key2Size, key2Size); + } + + writeBatch.put(cfh, getCombinedKey(result.getLeft(), result.getMiddle()), result.getRight()); + + iter.next(); + } + + writeBatch.writeToDbAndClose(); + } + } catch (RocksDBException ex) { + throw new IOException(ex); + } finally { + db.releaseSnapshot(snapshot); + snapshot.close(); + } + } + + //todo: implement parallel execution + //todo: implement replaceKeys = false optimization (like in LLLocalDictionary), check if it's feasible + @Override + public void replaceAll(int parallelism, boolean replaceKeys, BiFunction, Entry>> consumer) + throws IOException { + try { + var snapshot = db.getSnapshot(); + try (RocksIterator iter = db.newIterator(cfh, new ReadOptions().setSnapshot(snapshot)); + CappedWriteBatch writeBatch = new CappedWriteBatch(db, LLLocalDictionary.CAPPED_WRITE_BATCH_CAP, LLLocalDictionary.RESERVED_WRITE_BATCH_SIZE, LLLocalDictionary.MAX_WRITE_BATCH_SIZE, LLLocalDictionary.BATCH_WRITE_OPTIONS)) { + + iter.seekToFirst(); + + while (iter.isValid()) { + + writeBatch.delete(cfh, iter.key()); + + iter.next(); + } + + iter.seekToFirst(); + + byte[] currentKey1 = null; + // only append or iterate on this object! byte[].equals() and hash is not trustworthy! + ObjectArrayList key2Keys = null; + // only append or iterate on this object! byte[].equals() and hash is not trustworthy! + ObjectArrayList key2Values = null; + while (iter.isValid()) { + var combinedKey = iter.key(); + var key1 = getKey1(combinedKey); + + if (currentKey1 == null || !Arrays.equals(currentKey1, key1)) { + if (currentKey1 != null && !key2Values.isEmpty()) { + replaceAll_(writeBatch, + currentKey1, + key2Keys.toArray(byte[][]::new), + key2Values.toArray(byte[][]::new), + consumer + ); + } + currentKey1 = key1; + key2Keys = new ObjectArrayList<>(); + key2Values = new ObjectArrayList<>(); + } + + key2Keys.add(getKey2(combinedKey)); + key2Values.add(iter.value()); + + iter.next(); + } + if (currentKey1 != null && !key2Values.isEmpty()) { + replaceAll_(writeBatch, + currentKey1, + key2Keys.toArray(byte[][]::new), + key2Values.toArray(byte[][]::new), + consumer + ); + } + + writeBatch.writeToDbAndClose(); + } finally { + db.releaseSnapshot(snapshot); + snapshot.close(); + } + } catch (RocksDBException exception) { + throw new IOException(exception); + } + } + + private void replaceAll_(WriteBatchInterface writeBatch, + byte[] key1, + byte[][] key2Keys, + byte[][] key2Values, + BiFunction, Entry>> consumer) + throws RocksDBException { + if (key1.length != key1Size) { + throw new IndexOutOfBoundsException(key1.length, key1Size, key1Size); + } + var previousValues = UnmodifiableMap.of(key2Keys, key2Values); + var result = consumer.apply(key1, previousValues); + + var resultKey1 = result.getKey(); + if (resultKey1.length != key1Size) { + throw new IndexOutOfBoundsException(resultKey1.length, key1Size, key1Size); + } + var resultValues = result.getValue(); + + var mapIterator = resultValues.fastIterator(); + while (mapIterator.hasNext()) { + var mapEntry = mapIterator.next(); + var key2 = mapEntry.getKey(); + if (key2.data.length != key2Size) { + throw new IndexOutOfBoundsException(key2.data.length, key2Size, key2Size); + } + + var value2 = mapEntry.getValue(); + writeBatch.put(cfh, getCombinedKey(key1, key2.data), value2); + } + } + + //todo: implement parallel execution + //todo: implement replaceKeys = false optimization (like in LLLocalDictionary), check if it's feasible + @Override + public void replaceAll(int parallelism, boolean replaceKeys, byte[] key1, BiFunction> consumer) throws IOException { + if (key1.length != key1Size) { + throw new IndexOutOfBoundsException(key1.length, key1Size, key1Size); + } + try { + var snapshot = db.getSnapshot(); + try (RocksIterator iter = db.newIterator(cfh, new ReadOptions().setSnapshot(snapshot)); + CappedWriteBatch writeBatch = new CappedWriteBatch(db, LLLocalDictionary.CAPPED_WRITE_BATCH_CAP, LLLocalDictionary.RESERVED_WRITE_BATCH_SIZE, LLLocalDictionary.MAX_WRITE_BATCH_SIZE, LLLocalDictionary.BATCH_WRITE_OPTIONS)) { + + iter.seek(getStartSeekKey(key1)); + + while (iter.isValid()) { + byte[] combinedKey = iter.key(); + + if (!isSubKey(key1, combinedKey)) { + // The key is outside of key1: exit from the iteration + break; + } + + writeBatch.delete(cfh, combinedKey); + + iter.next(); + } + + iter.seek(getStartSeekKey(key1)); + + while (iter.isValid()) { + byte[] combinedKey = iter.key(); + + if (!isSubKey(key1, combinedKey)) { + // The key is outside of key1: exit from the iteration + break; + } + + byte[] key2 = getKey2(combinedKey); + byte[] value2 = iter.value(); + + var result = consumer.apply(key2, value2); + if (result.getKey().length != key2Size) { + throw new IndexOutOfBoundsException(result.getKey().length, key2Size, key2Size); + } + + writeBatch.put(cfh, result.getKey(), result.getValue()); + + iter.next(); + } + + writeBatch.writeToDbAndClose(); + } finally { + db.releaseSnapshot(snapshot); + snapshot.close(); + } + } catch (RocksDBException e) { + throw new IOException(e); + } + } + + // This method is exactly the same of LLLocalDictionary. Remember to keep the code equal + @Override + public void clear() throws IOException { + try { + List ranges = new ArrayList<>(); + byte[] firstKey = null; + byte[] lastKey = null; + boolean empty = false; + while (!empty) { + // retrieve the range extremities + try (RocksIterator iter = db.newIterator(cfh)) { + iter.seekToFirst(); + if (iter.isValid()) { + firstKey = iter.key(); + iter.seekToLast(); + lastKey = iter.key(); + ranges.add(firstKey); + ranges.add(lastKey); + } else { + empty = true; + } + } + + if (!empty) { + if (Arrays.equals(firstKey, lastKey)) { + // Delete single key + db.delete(cfh, lastKey); + } else { + // Delete all + db.deleteRange(cfh, firstKey, lastKey); + // Delete the end because it's not included in the deleteRange domain + db.delete(cfh, lastKey); + } + } + } + + // Delete files related + db.deleteFilesInRanges(cfh, ranges, true); + + // Compact range + db.compactRange(cfh); + + db.flush(new FlushOptions().setWaitForFlush(true).setAllowWriteStall(true), cfh); + db.flushWal(true); + + var finalSize = exactSize(null); + if (finalSize != 0) { + throw new IllegalStateException("The dictionary is not empty after calling clear()"); + } + } catch (RocksDBException e) { + throw new IOException(e); + } + } + + @Override + public Optional> clear(byte[] key1, LLDictionaryResultType resultType) + throws IOException { + if (key1.length != key1Size) { + throw new IndexOutOfBoundsException(key1.length, key1Size, key1Size); + } + try { + Optional> result; + switch (resultType) { + case PREVIOUS_VALUE: + List keys = new ArrayList<>(); + List values = new ArrayList<>(); + try (RocksIterator iter = db.newIterator(cfh)) { + iter.seek(getStartSeekKey(key1)); + while (iter.isValid()) { + var combinedKey = iter.key(); + + if (!isSubKey(key1, combinedKey)) { + break; + } + + keys.add(getKey2(combinedKey)); + values.add(iter.value()); + } + } + result = Optional.of(UnmodifiableIterableMap.of(keys.toArray(byte[][]::new), values.toArray(byte[][]::new))); + break; + case VALUE_CHANGED: + if (isEmpty(null, key1)) { + result = Optional.empty(); + } else { + result = Optional.of(UnmodifiableIterableMap.of(NO_DATA_MAP, NO_DATA_MAP)); + } + break; + case VOID: + default: + result = Optional.empty(); + break; + } + db.deleteRange(cfh, getStartSeekKey(key1), getEndSeekKey(key1)); + return result; + } catch (RocksDBException ex) { + throw new IOException(ex); + } + } + + @Override + public long size(@Nullable LLSnapshot snapshot, boolean fast) { + return fast ? fastSize(snapshot) : exactSize(snapshot); + } + + public long fastSize(@Nullable LLSnapshot snapshot) { + try { + if (snapshot != null) { + return this.exactSize(snapshot); + } + return db.getLongProperty(cfh, "rocksdb.estimate-num-keys"); + } catch (RocksDBException e) { + e.printStackTrace(); + return 0; + } + } + + public long exactSize(@Nullable LLSnapshot snapshot) { + long count = 0; + byte[] currentKey1 = null; + try (RocksIterator iter = db.newIterator(cfh, resolveSnapshot(snapshot))) { + iter.seekToFirst(); + while (iter.isValid()) { + byte[] combinedKey = iter.key(); + + if (!isSubKey(currentKey1, combinedKey)) { + count++; + currentKey1 = getKey1(combinedKey); + } + iter.next(); + } + return count; + } + } + + @Override + public long exactSize(@Nullable LLSnapshot snapshot, byte[] key1) { + if (key1.length != key1Size) { + throw new IndexOutOfBoundsException(key1.length, key1Size, key1Size); + } + long count = 0; + try (RocksIterator iterator = db.newIterator(cfh, resolveSnapshot(snapshot))) { + iterator.seek(getStartSeekKey(key1)); + while (iterator.isValid()) { + byte[] combinedKey = iterator.key(); + + if (!isSubKey(key1, combinedKey)) { + // The key is outside of key1: exit from the iteration + break; + } + + count++; + iterator.next(); + } + } + return count; + } +} diff --git a/src/main/java/it/cavallium/dbengine/database/disk/LLLocalDictionary.java b/src/main/java/it/cavallium/dbengine/database/disk/LLLocalDictionary.java new file mode 100644 index 0000000..5d4fe90 --- /dev/null +++ b/src/main/java/it/cavallium/dbengine/database/disk/LLLocalDictionary.java @@ -0,0 +1,373 @@ +package it.cavallium.dbengine.database.disk; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Objects; +import java.util.Optional; +import java.util.function.BiConsumer; +import java.util.function.BiFunction; +import java.util.function.Consumer; +import java.util.function.Function; +import org.jetbrains.annotations.NotNull; +import org.jetbrains.annotations.Nullable; +import org.rocksdb.ColumnFamilyHandle; +import org.rocksdb.FlushOptions; +import org.rocksdb.Holder; +import org.rocksdb.ReadOptions; +import org.rocksdb.RocksDB; +import org.rocksdb.RocksDBException; +import org.rocksdb.RocksIterator; +import org.rocksdb.Snapshot; +import org.rocksdb.WriteBatch; +import org.rocksdb.WriteOptions; +import org.warp.commonutils.concurrency.atomicity.NotAtomic; +import it.cavallium.dbengine.database.LLDictionary; +import it.cavallium.dbengine.database.LLDictionaryResultType; +import it.cavallium.dbengine.database.LLSnapshot; +import it.cavallium.dbengine.database.LLUtils; + +@NotAtomic +public class LLLocalDictionary implements LLDictionary { + + private static final boolean USE_CURRENT_FASTSIZE_FOR_OLD_SNAPSHOTS = true; + static final int RESERVED_WRITE_BATCH_SIZE = 2 * 1024 * 1024; // 2MiB + static final long MAX_WRITE_BATCH_SIZE = 1024L * 1024L * 1024L; // 1GiB + static final int CAPPED_WRITE_BATCH_CAP = 50000; // 50K operations + static final WriteOptions BATCH_WRITE_OPTIONS = new WriteOptions().setLowPri(true); + + private static final byte[] NO_DATA = new byte[0]; + private static final ReadOptions EMPTY_READ_OPTIONS = new ReadOptions(); + private static final List EMPTY_UNMODIFIABLE_LIST = List.of(); + private final RocksDB db; + private final ColumnFamilyHandle cfh; + private final String databaseName; + private final Function snapshotResolver; + + public LLLocalDictionary(@NotNull RocksDB db, + @NotNull ColumnFamilyHandle columnFamilyHandle, + String databaseName, + Function snapshotResolver) { + Objects.requireNonNull(db); + this.db = db; + Objects.requireNonNull(columnFamilyHandle); + this.cfh = columnFamilyHandle; + this.databaseName = databaseName; + this.snapshotResolver = snapshotResolver; + } + + @Override + public String getDatabaseName() { + return databaseName; + } + + private ReadOptions resolveSnapshot(LLSnapshot snapshot) { + if (snapshot != null) { + return getReadOptions(snapshotResolver.apply(snapshot)); + } else { + return EMPTY_READ_OPTIONS; + } + } + + private ReadOptions getReadOptions(Snapshot snapshot) { + if (snapshot != null) { + return new ReadOptions().setSnapshot(snapshot); + } else { + return EMPTY_READ_OPTIONS; + } + } + + @Override + public Optional get(@Nullable LLSnapshot snapshot, byte[] key) throws IOException { + try { + Holder data = new Holder<>(); + if (db.keyMayExist(cfh, resolveSnapshot(snapshot), key, data)) { + if (data.getValue() != null) { + return Optional.of(data.getValue()); + } else { + byte[] value = db.get(cfh, resolveSnapshot(snapshot), key); + return Optional.ofNullable(value); + } + } else { + return Optional.empty(); + } + } catch (RocksDBException e) { + throw new IOException(e); + } + } + + @Override + public boolean contains(@Nullable LLSnapshot snapshot, byte[] key) throws IOException { + return contains_(snapshot, key); + } + + private boolean contains_(@Nullable LLSnapshot snapshot, byte[] key) throws IOException { + try { + int size = RocksDB.NOT_FOUND; + Holder data = new Holder<>(); + if (db.keyMayExist(cfh, resolveSnapshot(snapshot), key, data)) { + if (data.getValue() != null) { + size = data.getValue().length; + } else { + size = db.get(cfh, resolveSnapshot(snapshot), key, NO_DATA); + } + } + return size != RocksDB.NOT_FOUND; + } catch (RocksDBException e) { + throw new IOException(e); + } + } + + @Override + public Optional put(byte[] key, byte[] value, LLDictionaryResultType resultType) throws IOException { + try { + byte[] response = null; + switch (resultType) { + case VALUE_CHANGED: + response = LLUtils.booleanToResponse(!contains_(null, key)); + break; + case PREVIOUS_VALUE: + var data = new Holder(); + if (db.keyMayExist(cfh, key, data)) { + if (data.getValue() != null) { + response = data.getValue(); + } else { + response = db.get(cfh, key); + } + } else { + response = null; + } + break; + } + db.put(cfh, key, value); + return Optional.ofNullable(response); + } catch (RocksDBException e) { + throw new IOException(e); + } + } + + @Override + public void putMulti(byte[][] key, byte[][] value, LLDictionaryResultType resultType, Consumer responsesConsumer) + throws IOException { + if (key.length == value.length) { + List responses; + try (WriteBatch writeBatch = new WriteBatch(RESERVED_WRITE_BATCH_SIZE)) { + + if (resultType == LLDictionaryResultType.VOID) { + responses = EMPTY_UNMODIFIABLE_LIST; + } else { + responses = db.multiGetAsList(newCfhList(cfh, key.length), Arrays.asList(key)); + } + + for (int i = 0; i < key.length; i++) { + writeBatch.put(cfh, key[i], value[i]); + } + + db.write(BATCH_WRITE_OPTIONS, writeBatch); + } catch (RocksDBException e) { + throw new IOException(e); + } + + for (byte[] response : responses) { + responsesConsumer.accept(response); + } + } else { + throw new IOException("Wrong parameters count"); + } + } + + private static List newCfhList(ColumnFamilyHandle cfh, int size) { + var list = new ArrayList(size); + for (int i = 0; i < size; i++) { + list.add(cfh); + } + return list; + } + + @Override + public Optional remove(byte[] key, LLDictionaryResultType resultType) throws IOException { + try { + byte[] response = null; + switch (resultType) { + case VALUE_CHANGED: + response = LLUtils.booleanToResponse(contains_(null, key)); + break; + case PREVIOUS_VALUE: + var data = new Holder(); + if (db.keyMayExist(cfh, key, data)) { + if (data.getValue() != null) { + response = data.getValue(); + } else { + response = db.get(cfh, key); + } + } else { + response = null; + } + break; + } + db.delete(cfh, key); + return Optional.ofNullable(response); + } catch (RocksDBException e) { + throw new IOException(e); + } + } + + //todo: implement parallel forEach + @Override + public void forEach(@Nullable LLSnapshot snapshot, int parallelism, BiConsumer consumer) { + try (RocksIterator iter = db.newIterator(cfh, resolveSnapshot(snapshot))) { + iter.seekToFirst(); + while (iter.isValid()) { + consumer.accept(iter.key(), iter.value()); + iter.next(); + } + } + } + + //todo: implement parallel replace + @Override + public void replaceAll(int parallelism, boolean replaceKeys, BiFunction> consumer) throws IOException { + try { + try (var snapshot = replaceKeys ? db.getSnapshot() : null) { + try (RocksIterator iter = db.newIterator(cfh, getReadOptions(snapshot)); + CappedWriteBatch writeBatch = new CappedWriteBatch(db, CAPPED_WRITE_BATCH_CAP, RESERVED_WRITE_BATCH_SIZE, MAX_WRITE_BATCH_SIZE, BATCH_WRITE_OPTIONS)) { + + iter.seekToFirst(); + + if (replaceKeys) { + while (iter.isValid()) { + writeBatch.delete(cfh, iter.key()); + + iter.next(); + } + } + + iter.seekToFirst(); + + while (iter.isValid()) { + + var result = consumer.apply(iter.key(), iter.value()); + boolean keyDiffers = !Arrays.equals(iter.key(), result.getKey()); + if (!replaceKeys && keyDiffers) { + throw new IOException("Tried to replace a key"); + } + + // put if changed or if keys can be swapped/replaced + if (replaceKeys || !Arrays.equals(iter.value(), result.getValue())) { + writeBatch.put(cfh, result.getKey(), result.getValue()); + } + + iter.next(); + } + + writeBatch.writeToDbAndClose(); + } finally { + db.releaseSnapshot(snapshot); + } + } + } catch (RocksDBException e) { + throw new IOException(e); + } + } + + // This method is exactly the same of LLLocalDictionary. Remember to keep the code equal + @Override + public void clear() throws IOException { + try (RocksIterator iter = db.newIterator(cfh); + CappedWriteBatch writeBatch = new CappedWriteBatch(db, CAPPED_WRITE_BATCH_CAP, RESERVED_WRITE_BATCH_SIZE, MAX_WRITE_BATCH_SIZE, BATCH_WRITE_OPTIONS)) { + + iter.seekToFirst(); + + while (iter.isValid()) { + writeBatch.delete(cfh, iter.key()); + + iter.next(); + } + + writeBatch.writeToDbAndClose(); + + // Compact range + db.compactRange(cfh); + + db.flush(new FlushOptions().setWaitForFlush(true).setAllowWriteStall(true), cfh); + db.flushWal(true); + + var finalSize = exactSize(null); + if (finalSize != 0) { + throw new IllegalStateException("The dictionary is not empty after calling clear()"); + } + } catch (RocksDBException e) { + throw new IOException(e); + } + } + + @Override + public long size(@Nullable LLSnapshot snapshot, boolean fast) throws IOException { + return fast ? fastSize(snapshot) : exactSize(snapshot); + } + + public long fastSize(@Nullable LLSnapshot snapshot) { + var rocksdbSnapshot = resolveSnapshot(snapshot); + if (USE_CURRENT_FASTSIZE_FOR_OLD_SNAPSHOTS || rocksdbSnapshot.snapshot() == null) { + try { + return db.getLongProperty(cfh, "rocksdb.estimate-num-keys"); + } catch (RocksDBException e) { + e.printStackTrace(); + return 0; + } + } else { + long count = 0; + try (RocksIterator iter = db.newIterator(cfh, rocksdbSnapshot)) { + iter.seekToFirst(); + // If it's a fast size of a snapshot, count only up to 1000 elements + while (iter.isValid() && count < 1000) { + count++; + iter.next(); + } + return count; + } + } + } + + public long exactSize(@Nullable LLSnapshot snapshot) { + long count = 0; + try (RocksIterator iter = db.newIterator(cfh, resolveSnapshot(snapshot))) { + iter.seekToFirst(); + while (iter.isValid()) { + count++; + iter.next(); + } + return count; + } + } + + @Override + public boolean isEmpty(@Nullable LLSnapshot snapshot) { + try (RocksIterator iter = db.newIterator(cfh, resolveSnapshot(snapshot))) { + iter.seekToFirst(); + if (iter.isValid()) { + return false; + } + } + return true; + } + + @Override + public Optional> removeOne() throws IOException { + try (RocksIterator iter = db.newIterator(cfh)) { + iter.seekToFirst(); + if (iter.isValid()) { + byte[] key = iter.key(); + byte[] value = iter.value(); + db.delete(cfh, key); + return Optional.of(Map.entry(key, value)); + } + } catch (RocksDBException e) { + throw new IOException(e); + } + return Optional.empty(); + } +} diff --git a/src/main/java/it/cavallium/dbengine/database/disk/LLLocalKeyValueDatabase.java b/src/main/java/it/cavallium/dbengine/database/disk/LLLocalKeyValueDatabase.java new file mode 100644 index 0000000..2b16135 --- /dev/null +++ b/src/main/java/it/cavallium/dbengine/database/disk/LLLocalKeyValueDatabase.java @@ -0,0 +1,431 @@ +package it.cavallium.dbengine.database.disk; + +import java.io.File; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.attribute.BasicFileAttributes; +import java.time.Duration; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicLong; +import org.rocksdb.BlockBasedTableConfig; +import org.rocksdb.BloomFilter; +import org.rocksdb.ColumnFamilyDescriptor; +import org.rocksdb.ColumnFamilyHandle; +import org.rocksdb.CompactionStyle; +import org.rocksdb.CompressionType; +import org.rocksdb.DBOptions; +import org.rocksdb.DbPath; +import org.rocksdb.FlushOptions; +import org.rocksdb.Options; +import org.rocksdb.RocksDB; +import org.rocksdb.RocksDBException; +import org.rocksdb.Snapshot; +import org.rocksdb.WALRecoveryMode; +import it.cavallium.dbengine.database.Column; +import it.cavallium.dbengine.database.LLDeepDictionary; +import it.cavallium.dbengine.database.LLDictionary; +import it.cavallium.dbengine.database.LLKeyValueDatabase; +import it.cavallium.dbengine.database.LLSingleton; +import it.cavallium.dbengine.database.LLSnapshot; + +public class LLLocalKeyValueDatabase implements LLKeyValueDatabase { + + static { + RocksDB.loadLibrary(); + } + + private static final ColumnFamilyDescriptor DEFAULT_COLUMN_FAMILY = new ColumnFamilyDescriptor( + RocksDB.DEFAULT_COLUMN_FAMILY); + + private final Path dbPath; + private final String name; + private RocksDB db; + private final Map handles; + private final ConcurrentHashMap snapshotsHandles = new ConcurrentHashMap<>(); + private final AtomicLong nextSnapshotNumbers = new AtomicLong(1); + + public LLLocalKeyValueDatabase(String name, Path path, List columns, List handles, + boolean crashIfWalError, boolean lowMemory) throws IOException { + Options options = openRocksDb(path, crashIfWalError, lowMemory); + try { + List descriptors = new LinkedList<>(); + for (Column column : columns) { + descriptors + .add(new ColumnFamilyDescriptor(column.getName().getBytes(StandardCharsets.US_ASCII))); + } + + // Get databases directory path + Path databasesDirPath = path.toAbsolutePath().getParent(); + + String dbPathString = databasesDirPath.toString() + File.separatorChar + path.getFileName(); + Path dbPath = Paths.get(dbPathString); + this.dbPath = dbPath; + this.name = name; + + createIfNotExists(descriptors, options, dbPath, dbPathString); + // Create all column families that don't exist + createAllColumns(descriptors, options, dbPathString); + + // a factory method that returns a RocksDB instance + this.db = RocksDB.open(new DBOptions(options), dbPathString, descriptors, handles); + this.handles = new HashMap<>(); + for (int i = 0; i < columns.size(); i++) { + this.handles.put(columns.get(i), handles.get(i)); + } + + /* + System.out.println("----Data----"); + this.handles.forEach((Column column, ColumnFamilyHandle hnd) -> { + System.out.println("Column: " + column.getName()); + if (!column.getName().contains("hash")) { + var val = new ArrayList(); + var iter = db.newIterator(hnd); + iter.seekToFirst(); + while (iter.isValid()) { + val.add(Column.toString(iter.key())); + System.out.println(" " + Column.toString(iter.key())); + iter.next(); + } + } + }); + */ + + /* + System.out.println("----Columns----"); + this.handles.forEach((Column column, ColumnFamilyHandle hnd) -> { + System.out.println("Column: " + column.getName()); + }); + */ + + flushDb(db, handles); + } catch (RocksDBException ex) { + throw new IOException(ex); + } + } + + @Override + public String getDatabaseName() { + return name; + } + + private void flushAndCloseDb(RocksDB db, List handles) + throws RocksDBException { + flushDb(db, handles); + + for (ColumnFamilyHandle handle : handles) { + handle.close(); + } + + db.closeE(); + } + + private void flushDb(RocksDB db, List handles) throws RocksDBException { + // force flush the database + for (int i = 0; i < 2; i++) { + db.flush(new FlushOptions().setWaitForFlush(true).setAllowWriteStall(true), handles); + db.flushWal(true); + db.syncWal(); + } + // end force flush + } + + private static Options openRocksDb(Path path, boolean crashIfWalError, boolean lowMemory) + throws IOException { + // Get databases directory path + Path databasesDirPath = path.toAbsolutePath().getParent(); + // Create base directories + if (Files.notExists(databasesDirPath)) { + Files.createDirectories(databasesDirPath); + } + + // the Options class contains a set of configurable DB options + // that determines the behaviour of the database. + var options = new Options(); + options.setCreateIfMissing(true); + options.setCompactionStyle(CompactionStyle.LEVEL); + options.setLevelCompactionDynamicLevelBytes(true); + options.setTargetFileSizeBase(64 * 1024 * 1024); // 64MiB sst file + options.setMaxBytesForLevelBase(4 * 256 * 1024 * 1024); // 4 times the sst file + options.setCompressionType(CompressionType.SNAPPY_COMPRESSION); + options.setManualWalFlush(false); + options.setMinWriteBufferNumberToMerge(3); + options.setMaxWriteBufferNumber(4); + options.setWalTtlSeconds(30); // flush wal after 30 seconds + options.setAvoidFlushDuringShutdown(false); // Flush all WALs during shutdown + options.setAvoidFlushDuringRecovery(false); // Flush all WALs during startup + options.setWalRecoveryMode(crashIfWalError ? WALRecoveryMode.AbsoluteConsistency + : WALRecoveryMode.PointInTimeRecovery); // Crash if the WALs are corrupted. Default: TolerateCorruptedTailRecords + options.setDeleteObsoleteFilesPeriodMicros(20 * 1000000); // 20 seconds + options.setPreserveDeletes(false); + options.setKeepLogFileNum(10); + // Direct I/O parameters. Removed because they use too much disk. + //options.setUseDirectReads(true); + //options.setUseDirectIoForFlushAndCompaction(true); + //options.setCompactionReadaheadSize(2 * 1024 * 1024); // recommend at least 2MB + //options.setWritableFileMaxBufferSize(1024 * 1024); // 1MB by default + if (lowMemory) { + // LOW MEMORY + options + .setBytesPerSync(1024 * 1024) + .setWalBytesPerSync(1024 * 1024) + .setIncreaseParallelism(1) + .setMaxOpenFiles(2) + .optimizeLevelStyleCompaction(1024 * 1024) // 1MiB of ram will be used for level style compaction + .setWriteBufferSize(1024 * 1024) // 1MB + .setWalSizeLimitMB(16) // 16MB + .setMaxTotalWalSize(1024L * 1024L * 1024L) // 1GiB max wal directory size + .setDbPaths(List.of(new DbPath(databasesDirPath.resolve(path.getFileName() + "_hot"), + 400L * 1024L * 1024L * 1024L), // 400GiB + new DbPath(databasesDirPath.resolve(path.getFileName() + "_cold"), + 600L * 1024L * 1024L * 1024L))) // 600GiB + ; + } else { + // HIGH MEMORY + options + .setAllowConcurrentMemtableWrite(true) + .setEnableWriteThreadAdaptiveYield(true) + .setIncreaseParallelism(Runtime.getRuntime().availableProcessors()) + .setBytesPerSync(10 * 1024 * 1024) + .setWalBytesPerSync(10 * 1024 * 1024) + .optimizeLevelStyleCompaction( + 128 * 1024 * 1024) // 128MiB of ram will be used for level style compaction + .setWriteBufferSize(128 * 1024 * 1024) // 128MB + .setWalSizeLimitMB(1024) // 1024MB + .setMaxTotalWalSize(8L * 1024L * 1024L * 1024L) // 8GiB max wal directory size + .setDbPaths(List.of(new DbPath(databasesDirPath.resolve(path.getFileName() + "_hot"), + 400L * 1024L * 1024L * 1024L), // 400GiB + new DbPath(databasesDirPath.resolve(path.getFileName() + "_cold"), + 600L * 1024L * 1024L * 1024L))) // 600GiB + ; + } + + final org.rocksdb.BloomFilter bloomFilter = new BloomFilter(10, false); + final BlockBasedTableConfig tableOptions = new BlockBasedTableConfig(); + tableOptions.setFilterPolicy(bloomFilter); + options.setTableFormatConfig(tableOptions); + + return options; + } + + private void createAllColumns(List totalDescriptors, Options options, + String dbPathString) throws RocksDBException { + List columnFamiliesToCreate = new LinkedList<>(); + + for (ColumnFamilyDescriptor descriptor : totalDescriptors) { + columnFamiliesToCreate.add(descriptor.getName()); + } + + List existingColumnFamilies = RocksDB.listColumnFamilies(options, dbPathString); + + columnFamiliesToCreate.removeIf((columnFamilyName) -> { + for (byte[] cfn : existingColumnFamilies) { + if (Arrays.equals(cfn, columnFamilyName)) { + return true; + } + } + return false; + }); + + List descriptors = new LinkedList<>(); + for (byte[] existingColumnFamily : existingColumnFamilies) { + descriptors.add(new ColumnFamilyDescriptor(existingColumnFamily)); + } + + var handles = new LinkedList(); + + /** + * SkipStatsUpdateOnDbOpen = true because this RocksDB.open session is used only to add just some columns + */ + //var dbOptionsFastLoadSlowEdit = new DBOptions(options.setSkipStatsUpdateOnDbOpen(true)); + + this.db = RocksDB.open(new DBOptions(options), dbPathString, descriptors, handles); + + for (byte[] name : columnFamiliesToCreate) { + db.createColumnFamily(new ColumnFamilyDescriptor(name)).close(); + } + + flushAndCloseDb(db, handles); + } + + private void createIfNotExists(List descriptors, Options options, + Path dbPath, String dbPathString) throws RocksDBException { + if (Files.notExists(dbPath)) { + // Check if handles are all different + var descriptorsSet = new HashSet<>(descriptors); + if (descriptorsSet.size() != descriptors.size()) { + throw new IllegalArgumentException("Descriptors must be unique!"); + } + + List descriptorsToCreate = new LinkedList<>(descriptors); + descriptorsToCreate + .removeIf((cf) -> Arrays.equals(cf.getName(), DEFAULT_COLUMN_FAMILY.getName())); + + /** + * SkipStatsUpdateOnDbOpen = true because this RocksDB.open session is used only to add just some columns + */ + //var dbOptionsFastLoadSlowEdit = options.setSkipStatsUpdateOnDbOpen(true); + + LinkedList handles = new LinkedList<>(); + + this.db = RocksDB.open(options, dbPathString); + for (ColumnFamilyDescriptor columnFamilyDescriptor : descriptorsToCreate) { + handles.add(db.createColumnFamily(columnFamilyDescriptor)); + } + + flushAndCloseDb(db, handles); + } + } + + @Override + public LLSingleton getSingleton(byte[] singletonListColumnName, byte[] name, byte[] defaultValue) + throws IOException { + try { + return new LLLocalSingleton(db, + handles.get(Column.special(Column.toString(singletonListColumnName))), + (snapshot) -> snapshotsHandles.get(snapshot.getSequenceNumber()), + LLLocalKeyValueDatabase.this.name, + name, + defaultValue); + } catch (RocksDBException e) { + throw new IOException(e); + } + } + + @Override + public LLDictionary getDictionary(byte[] columnName) { + return new LLLocalDictionary(db, + handles.get(Column.special(Column.toString(columnName))), + name, + (snapshot) -> snapshotsHandles.get(snapshot.getSequenceNumber()) + ); + } + + @Override + public LLDeepDictionary getDeepDictionary(byte[] columnName, int keySize, int key2Size) { + return new LLLocalDeepDictionary(db, + handles.get(Column.special(Column.toString(columnName))), + name, + (snapshot) -> snapshotsHandles.get(snapshot.getSequenceNumber()), + keySize, + key2Size + ); + } + + @Override + public long getProperty(String propertyName) throws IOException { + try { + return db.getAggregatedLongProperty(propertyName); + } catch (RocksDBException exception) { + throw new IOException(exception); + } + } + + @Override + public LLSnapshot takeSnapshot() { + var snapshot = db.getSnapshot(); + long currentSnapshotSequenceNumber = nextSnapshotNumbers.getAndIncrement(); + this.snapshotsHandles.put(currentSnapshotSequenceNumber, snapshot); + return new LLSnapshot(currentSnapshotSequenceNumber); + } + + @Override + public void releaseSnapshot(LLSnapshot snapshot) throws IOException { + Snapshot dbSnapshot = this.snapshotsHandles.remove(snapshot.getSequenceNumber()); + if (dbSnapshot == null) { + throw new IOException("Snapshot " + snapshot.getSequenceNumber() + " not found!"); + } + db.releaseSnapshot(dbSnapshot); + } + + @Override + public void close() throws IOException { + try { + flushAndCloseDb(db, new ArrayList<>(handles.values())); + deleteUnusedOldLogFiles(); + } catch (RocksDBException e) { + throw new IOException(e); + } + } + + /** + * Call this method ONLY AFTER flushing completely a db and closing it! + */ + private void deleteUnusedOldLogFiles() { + Path basePath = dbPath; + try { + Files + .walk(basePath, 1) + .filter(p -> !p.equals(basePath)) + .filter(p -> { + var fileName = p.getFileName().toString(); + if (fileName.startsWith("LOG.old.")) { + var parts = fileName.split("\\."); + if (parts.length == 3) { + try { + long nameSuffix = Long.parseUnsignedLong(parts[2]); + return true; + } catch (NumberFormatException ex) { + return false; + } + } + } + if (fileName.endsWith(".log")) { + var parts = fileName.split("\\."); + if (parts.length == 2) { + try { + int name = Integer.parseUnsignedInt(parts[0]); + return true; + } catch (NumberFormatException ex) { + return false; + } + } + } + return false; + }) + .filter(p -> { + try { + BasicFileAttributes attrs = Files.readAttributes(p, BasicFileAttributes.class); + if (attrs.isRegularFile() && !attrs.isSymbolicLink() && !attrs.isDirectory()) { + long ctime = attrs.creationTime().toMillis(); + long atime = attrs.lastAccessTime().toMillis(); + long mtime = attrs.lastModifiedTime().toMillis(); + long lastTime = Math.max(Math.max(ctime, atime), mtime); + long safeTime; + if (p.getFileName().toString().startsWith("LOG.old.")) { + safeTime = System.currentTimeMillis() - Duration.ofHours(24).toMillis(); + } else { + safeTime = System.currentTimeMillis() - Duration.ofHours(12).toMillis(); + } + if (lastTime < safeTime) { + return true; + } + } + } catch (IOException ex) { + ex.printStackTrace(); + return false; + } + return false; + }) + .forEach(path -> { + try { + Files.deleteIfExists(path); + System.out.println("Deleted log file \"" + path + "\""); + } catch (IOException e) { + e.printStackTrace(); + } + }); + } catch (IOException ex) { + ex.printStackTrace(); + } + } +} diff --git a/src/main/java/it/cavallium/dbengine/database/disk/LLLocalLuceneIndex.java b/src/main/java/it/cavallium/dbengine/database/disk/LLLocalLuceneIndex.java new file mode 100644 index 0000000..8144fbf --- /dev/null +++ b/src/main/java/it/cavallium/dbengine/database/disk/LLLocalLuceneIndex.java @@ -0,0 +1,431 @@ +package it.cavallium.dbengine.database.disk; + +import it.cavallium.dbengine.database.luceneutil.AdaptiveStreamSearcher; +import it.unimi.dsi.fastutil.objects.ObjectArrayList; +import java.io.IOException; +import java.nio.file.Path; +import java.time.Duration; +import java.util.Collection; +import java.util.Collections; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicLong; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.IndexCommit; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.KeepOnlyLastCommitDeletionPolicy; +import org.apache.lucene.index.SnapshotDeletionPolicy; +import org.apache.lucene.queries.mlt.MoreLikeThis; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.SearcherManager; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FSDirectory; +import org.jetbrains.annotations.Nullable; +import org.warp.commonutils.functional.IOFunction; +import org.warp.commonutils.type.ShortNamedThreadFactory; +import it.cavallium.dbengine.database.LLDocument; +import it.cavallium.dbengine.database.LLKeyScore; +import it.cavallium.dbengine.database.LLLuceneIndex; +import it.cavallium.dbengine.database.LLSnapshot; +import it.cavallium.dbengine.database.LLSort; +import it.cavallium.dbengine.database.LLTerm; +import it.cavallium.dbengine.database.LLTopKeys; +import it.cavallium.dbengine.database.LLUtils; +import it.cavallium.dbengine.database.LuceneUtils; +import it.cavallium.dbengine.database.analyzer.TextFieldsAnalyzer; +import it.cavallium.dbengine.database.luceneutil.LuceneStreamSearcher; +import it.cavallium.luceneserializer.luceneserializer.ParseException; +import it.cavallium.luceneserializer.luceneserializer.QueryParser; +import reactor.core.publisher.EmitterProcessor; +import reactor.core.publisher.Flux; +import reactor.core.publisher.Mono; +import reactor.core.scheduler.Schedulers; +import reactor.util.function.Tuple2; +import reactor.util.function.Tuples; + +public class LLLocalLuceneIndex implements LLLuceneIndex { + + private static final LuceneStreamSearcher streamSearcher = new AdaptiveStreamSearcher(); + private final Duration queryRefreshDebounceTime; + private final Duration commitDebounceTime; + private final String luceneIndexName; + private final SnapshotDeletionPolicy snapshotter; + private final IndexWriter indexWriter; + private final SearcherManager searcherManager; + private final Directory directory; + private final AtomicLong lastSearcherRefresh = new AtomicLong(0); + /** + * Last snapshot sequence number. 0 is not used + */ + private final AtomicLong lastSnapshotSeqNo = new AtomicLong(0); + /** + * Snapshot seq no to index commit point + */ + private final ConcurrentHashMap snapshots = new ConcurrentHashMap<>(); + private final ScheduledExecutorService scheduler; + private final boolean lowMemory; + + public LLLocalLuceneIndex(Path luceneBasePath, + String name, + TextFieldsAnalyzer analyzer, + Duration queryRefreshDebounceTime, + Duration commitDebounceTime, + boolean lowMemory) throws IOException { + if (name.length() == 0) { + throw new IOException("Empty lucene database name"); + } + Path directoryPath = luceneBasePath.resolve(name + ".lucene.db"); + this.directory = FSDirectory.open(directoryPath); + this.luceneIndexName = name; + this.snapshotter = new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy()); + this.lowMemory = lowMemory; + IndexWriterConfig indexWriterConfig = new IndexWriterConfig(LuceneUtils.getAnalyzer(analyzer)); + indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); + indexWriterConfig.setIndexDeletionPolicy(snapshotter); + indexWriterConfig.setCommitOnClose(true); + if (lowMemory) { + indexWriterConfig.setRAMBufferSizeMB(32); + indexWriterConfig.setRAMPerThreadHardLimitMB(32); + } else { + indexWriterConfig.setRAMBufferSizeMB(128); + indexWriterConfig.setRAMPerThreadHardLimitMB(512); + } + this.indexWriter = new IndexWriter(directory, indexWriterConfig); + this.searcherManager = new SearcherManager(indexWriter, false, false, null); + this.queryRefreshDebounceTime = queryRefreshDebounceTime; + this.commitDebounceTime = commitDebounceTime; + this.lastSearcherRefresh.set(System.currentTimeMillis()); + this.scheduler = Executors.newSingleThreadScheduledExecutor(new ShortNamedThreadFactory("Lucene")); + scheduler.scheduleAtFixedRate(this::scheduledCommit, + commitDebounceTime.toMillis(), + commitDebounceTime.toMillis(), + TimeUnit.MILLISECONDS + ); + scheduler.scheduleAtFixedRate(this::scheduledQueryRefresh, + queryRefreshDebounceTime.toMillis(), + queryRefreshDebounceTime.toMillis(), + TimeUnit.MILLISECONDS + ); + } + + @Override + public String getLuceneIndexName() { + return luceneIndexName; + } + + @Override + public LLSnapshot takeSnapshot() throws IOException { + + long snapshotSeqNo = lastSnapshotSeqNo.incrementAndGet(); + + IndexCommit snapshot = takeLuceneSnapshot(); + this.snapshots.put(snapshotSeqNo, new LuceneIndexSnapshot(snapshot)); + return new LLSnapshot(snapshotSeqNo); + } + + /** + * Use internally. This method commits before taking the snapshot if there are no commits in a new database, + * avoiding the exception. + */ + private IndexCommit takeLuceneSnapshot() throws IOException { + try { + return snapshotter.snapshot(); + } catch (IllegalStateException ex) { + if ("No index commit to snapshot".equals(ex.getMessage())) { + indexWriter.commit(); + return snapshotter.snapshot(); + } else { + throw ex; + } + } + } + + @Override + public void releaseSnapshot(LLSnapshot snapshot) throws IOException { + var indexSnapshot = this.snapshots.remove(snapshot.getSequenceNumber()); + if (indexSnapshot == null) { + throw new IOException("Snapshot " + snapshot.getSequenceNumber() + " not found!"); + } + + indexSnapshot.close(); + + var luceneIndexSnapshot = indexSnapshot.getSnapshot(); + snapshotter.release(luceneIndexSnapshot); + // Delete unused files after releasing the snapshot + indexWriter.deleteUnusedFiles(); + } + + @Override + public void addDocument(LLTerm key, LLDocument doc) throws IOException { + indexWriter.addDocument(LLUtils.toDocument(doc)); + } + + @Override + public void addDocuments(Iterable keys, Iterable docs) throws IOException { + indexWriter.addDocuments(LLUtils.toDocuments(docs)); + } + + @Override + public void deleteDocument(LLTerm id) throws IOException { + indexWriter.deleteDocuments(LLUtils.toTerm(id)); + } + + @Override + public void updateDocument(LLTerm id, LLDocument document) throws IOException { + indexWriter.updateDocument(LLUtils.toTerm(id), LLUtils.toDocument(document)); + } + + @Override + public void updateDocuments(Iterable ids, Iterable documents) + throws IOException { + var idIt = ids.iterator(); + var docIt = documents.iterator(); + while (idIt.hasNext()) { + var id = idIt.next(); + var doc = docIt.next(); + + indexWriter.updateDocument(LLUtils.toTerm(id), LLUtils.toDocument(doc)); + } + } + + @Override + public void deleteAll() throws IOException { + indexWriter.deleteAll(); + indexWriter.commit(); + indexWriter.forceMergeDeletes(true); + indexWriter.flush(); + indexWriter.commit(); + } + + @Override + public Collection search(@Nullable LLSnapshot snapshot, String queryString, int limit, @Nullable LLSort sort, + String keyFieldName) + throws IOException { + try { + var luceneIndexSnapshot = resolveSnapshot(snapshot); + + Query query = QueryParser.parse(queryString); + Sort luceneSort = LLUtils.toSort(sort); + + return Collections.singleton(runSearch(luceneIndexSnapshot, (indexSearcher) -> { + return blockingSearch(indexSearcher, limit, query, luceneSort, keyFieldName); + })); + } catch (ParseException e) { + throw new IOException("Error during query count!", e); + } + } + + @Override + public Collection moreLikeThis(@Nullable LLSnapshot snapshot, Map> mltDocumentFields, int limit, + String keyFieldName) + throws IOException { + var luceneIndexSnapshot = resolveSnapshot(snapshot); + + if (mltDocumentFields.isEmpty()) { + return Collections.singleton(new LLTopKeys(0, new LLKeyScore[0])); + } + + return Collections.singleton(runSearch(luceneIndexSnapshot, (indexSearcher) -> { + + var mlt = new MoreLikeThis(indexSearcher.getIndexReader()); + mlt.setAnalyzer(indexWriter.getAnalyzer()); + mlt.setFieldNames(mltDocumentFields.keySet().toArray(String[]::new)); + mlt.setMinTermFreq(1); + //mlt.setMinDocFreq(1); + mlt.setBoost(true); + + // Get the reference doc and apply it to MoreLikeThis, to generate the query + @SuppressWarnings({"unchecked", "rawtypes"}) + Query query = mlt.like((Map) mltDocumentFields); + + // Search + return blockingSearch(indexSearcher, limit, query, null, keyFieldName); + })); + } + + private static LLTopKeys blockingSearch(IndexSearcher indexSearcher, + int limit, + Query query, + Sort luceneSort, + String keyFieldName) throws IOException { + TopDocs results; + List keyScores; + + results = luceneSort != null ? indexSearcher.search(query, limit, luceneSort) + : indexSearcher.search(query, limit); + var hits = ObjectArrayList.wrap(results.scoreDocs); + keyScores = new LinkedList<>(); + for (ScoreDoc hit : hits) { + int docId = hit.doc; + float score = hit.score; + Document d = indexSearcher.doc(docId, Set.of(keyFieldName)); + if (d.getFields().isEmpty()) { + System.err.println("The document docId:" + docId + ",score:" + score + " is empty."); + var realFields = indexSearcher.doc(docId).getFields(); + if (!realFields.isEmpty()) { + System.err.println("Present fields:"); + for (IndexableField field : realFields) { + System.err.println(" - " + field.name()); + } + } + } else { + var field = d.getField(keyFieldName); + if (field == null) { + System.err.println("Can't get key of document docId:" + docId + ",score:" + score); + } else { + keyScores.add(new LLKeyScore(field.stringValue(), score)); + } + } + } + return new LLTopKeys(results.totalHits.value, keyScores.toArray(new LLKeyScore[0])); + } + + @SuppressWarnings("UnnecessaryLocalVariable") + @Override + public Tuple2, Collection>> searchStream(@Nullable LLSnapshot snapshot, String queryString, int limit, + @Nullable LLSort sort, String keyFieldName) { + try { + Query query = QueryParser.parse(queryString); + Sort luceneSort = LLUtils.toSort(sort); + + var acquireSearcherWrappedBlocking = Mono + .fromCallable(() -> { + if (snapshot == null) { + return searcherManager.acquire(); + } else { + return resolveSnapshot(snapshot).getIndexSearcher(); + } + }) + .subscribeOn(Schedulers.boundedElastic()); + + EmitterProcessor countProcessor = EmitterProcessor.create(); + EmitterProcessor resultsProcessor = EmitterProcessor.create(); + + var publisher = acquireSearcherWrappedBlocking.flatMapMany(indexSearcher -> { + return Flux.push(sink -> { + try { + Long approximatedTotalResultsCount = streamSearcher.streamSearch(indexSearcher, + query, + limit, + luceneSort, + keyFieldName, + sink::next + ); + sink.next(approximatedTotalResultsCount); + sink.complete(); + } catch (IOException e) { + sink.error(e); + } + }).subscribeOn(Schedulers.boundedElastic()) + .doOnTerminate(() -> { + if (snapshot == null) { + try { + searcherManager.release(indexSearcher); + } catch (IOException e) { + e.printStackTrace(); + } + } + }); + }).publish(); + + publisher.filter(item -> item instanceof Long).cast(Long.class).subscribe(countProcessor); + publisher.filter(item -> item instanceof String).cast(String.class).subscribe(resultsProcessor); + + publisher.connect(); + + return Tuples.of(countProcessor.single(0L), Collections.singleton(resultsProcessor)); + } catch (ParseException e) { + var error = new IOException("Error during query count!", e); + return Tuples.of(Mono.error(error), Collections.singleton(Flux.error(error))); + } + } + + @Override + public long count(@Nullable LLSnapshot snapshot, String queryString) throws IOException { + try { + var luceneIndexSnapshot = resolveSnapshot(snapshot); + + Query query = QueryParser.parse(queryString); + + return (long) runSearch(luceneIndexSnapshot, (indexSearcher) -> indexSearcher.count(query)); + } catch (ParseException e) { + throw new IOException("Error during query count!", e); + } + } + + @Override + public void close() throws IOException { + scheduler.shutdown(); + try { + scheduler.awaitTermination(10, TimeUnit.MINUTES); + } catch (InterruptedException e) { + e.printStackTrace(); + } + if (!scheduler.isTerminated()) { + System.err.println("Terminating lucene scheduler"); + scheduler.shutdownNow(); + } + indexWriter.commit(); + indexWriter.close(); + directory.close(); + } + + private void scheduledCommit() { + try { + if (indexWriter.hasUncommittedChanges()) { + indexWriter.commit(); + } + } catch (IOException ex) { + ex.printStackTrace(); + } + } + + private void scheduledQueryRefresh() { + try { + searcherManager.maybeRefreshBlocking(); + } catch (IOException ex) { + ex.printStackTrace(); + } + } + + private U runSearch(@Nullable LuceneIndexSnapshot snapshot, IOFunction searchExecutor) + throws IOException { + if (snapshot != null) { + return searchExecutor.apply(snapshot.getIndexSearcher()); + } else { + var indexSearcher = searcherManager.acquire(); + try { + return searchExecutor.apply(indexSearcher); + } finally { + searcherManager.release(indexSearcher); + } + } + } + + private LuceneIndexSnapshot resolveSnapshot(@Nullable LLSnapshot snapshot) { + if (snapshot == null) { + return null; + } + return Objects.requireNonNull(snapshots.get(snapshot.getSequenceNumber()), + () -> "Can't resolve snapshot " + snapshot.getSequenceNumber() + ); + } + + @Override + public boolean isLowMemoryMode() { + return lowMemory; + } +} diff --git a/src/main/java/it/cavallium/dbengine/database/disk/LLLocalMultiLuceneIndex.java b/src/main/java/it/cavallium/dbengine/database/disk/LLLocalMultiLuceneIndex.java new file mode 100644 index 0000000..36c3e48 --- /dev/null +++ b/src/main/java/it/cavallium/dbengine/database/disk/LLLocalMultiLuceneIndex.java @@ -0,0 +1,335 @@ +package it.cavallium.dbengine.database.disk; + +import it.unimi.dsi.fastutil.ints.Int2ObjectMap; +import it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap; +import it.unimi.dsi.fastutil.longs.Long2ObjectMap; +import it.unimi.dsi.fastutil.longs.Long2ObjectOpenHashMap; +import java.io.Closeable; +import java.io.IOException; +import java.nio.file.Path; +import java.time.Duration; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.ConcurrentLinkedQueue; +import java.util.concurrent.CopyOnWriteArrayList; +import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.locks.ReentrantReadWriteLock; +import java.util.function.BiConsumer; +import java.util.stream.Collectors; +import org.jetbrains.annotations.Nullable; +import org.warp.commonutils.batch.ParallelUtils; +import org.warp.commonutils.functional.IOBiConsumer; +import org.warp.commonutils.functional.IOConsumer; +import org.warp.commonutils.functional.IOTriConsumer; +import org.warp.commonutils.locks.LockUtils; +import it.cavallium.dbengine.database.LLDocument; +import it.cavallium.dbengine.database.LLKeyScore; +import it.cavallium.dbengine.database.LLLuceneIndex; +import it.cavallium.dbengine.database.LLSnapshot; +import it.cavallium.dbengine.database.LLSort; +import it.cavallium.dbengine.database.LLTerm; +import it.cavallium.dbengine.database.LLTopKeys; +import it.cavallium.dbengine.database.analyzer.TextFieldsAnalyzer; +import reactor.core.publisher.Flux; +import reactor.core.publisher.Mono; +import reactor.util.function.Tuple2; +import reactor.util.function.Tuples; + +public class LLLocalMultiLuceneIndex implements LLLuceneIndex { + + private final Long2ObjectMap registeredSnapshots = new Long2ObjectOpenHashMap<>(); + private final AtomicLong nextSnapshotNumber = new AtomicLong(1); + private final LLLocalLuceneIndex[] luceneIndices; + private final ReentrantReadWriteLock access = new ReentrantReadWriteLock(); + + private final int maxQueueSize = 1000; + + public LLLocalMultiLuceneIndex(Path lucene, + String name, + int instancesCount, + TextFieldsAnalyzer textFieldsAnalyzer, + Duration queryRefreshDebounceTime, + Duration commitDebounceTime, + boolean lowMemory) throws IOException { + + if (instancesCount <= 1 || instancesCount > 100) { + throw new IOException("Unsupported instances count: " + instancesCount); + } + + LLLocalLuceneIndex[] luceneIndices = new LLLocalLuceneIndex[instancesCount]; + for (int i = 0; i < instancesCount; i++) { + String instanceName; + if (i == 0) { + instanceName = name; + } else { + instanceName = name + "_" + String.format("%03d", i); + } + luceneIndices[i] = new LLLocalLuceneIndex(lucene, + instanceName, + textFieldsAnalyzer, + queryRefreshDebounceTime, + commitDebounceTime, + lowMemory + ); + } + this.luceneIndices = luceneIndices; + } + + private LLLocalLuceneIndex getLuceneIndex(LLTerm id) { + return luceneIndices[getLuceneIndexId(id)]; + } + + private int getLuceneIndexId(LLTerm id) { + return Math.abs(id.getValue().hashCode()) % luceneIndices.length; + } + + @Override + public String getLuceneIndexName() { + return LockUtils.lock(access.readLock(), () -> luceneIndices[0].getLuceneIndexName()); + } + + @Override + public void addDocument(LLTerm id, LLDocument doc) throws IOException { + LockUtils.lockIO(access.readLock(), () -> getLuceneIndex(id).addDocument(id, doc)); + } + + @Override + public void addDocuments(Iterable keys, Iterable documents) throws IOException { + LockUtils.lockIO(access.readLock(), () -> { + ParallelUtils.parallelizeIO(s -> runPerInstance(keys, documents, s), + maxQueueSize, + luceneIndices.length, + 1, + LLLuceneIndex::addDocuments + ); + }); + } + + private void runPerInstance(Iterable keys, + Iterable documents, + IOTriConsumer, Iterable> consumer) throws IOException { + var keysIt = keys.iterator(); + var docsIt = documents.iterator(); + + Int2ObjectMap> perInstanceKeys = new Int2ObjectOpenHashMap<>(); + Int2ObjectMap> perInstanceDocs = new Int2ObjectOpenHashMap<>(); + + while (keysIt.hasNext()) { + LLTerm key = keysIt.next(); + LLDocument doc = docsIt.next(); + var instanceId = getLuceneIndexId(key); + + perInstanceKeys.computeIfAbsent(instanceId, iid -> new ArrayList<>()).add(key); + perInstanceDocs.computeIfAbsent(instanceId, iid -> new ArrayList<>()).add(doc); + } + + for (Int2ObjectMap.Entry> currentInstanceEntry : perInstanceKeys.int2ObjectEntrySet()) { + int instanceId = currentInstanceEntry.getIntKey(); + List currentInstanceKeys = currentInstanceEntry.getValue(); + consumer.accept(this.luceneIndices[instanceId], currentInstanceKeys, perInstanceDocs.get(instanceId)); + } + } + + @Override + public void deleteDocument(LLTerm id) throws IOException { + LockUtils.lockIO(access.readLock(), () -> getLuceneIndex(id).deleteDocument(id)); + } + + @Override + public void updateDocument(LLTerm id, LLDocument document) throws IOException { + LockUtils.lockIO(access.readLock(), () -> getLuceneIndex(id).updateDocument(id, document)); + } + + @Override + public void updateDocuments(Iterable keys, Iterable documents) throws IOException { + LockUtils.lockIO(access.readLock(), () -> { + ParallelUtils.parallelizeIO(s -> runPerInstance(keys, documents, s), + maxQueueSize, + luceneIndices.length, + 1, + LLLuceneIndex::updateDocuments + ); + }); + } + + @Override + public void deleteAll() throws IOException { + LockUtils.lockIO(access.writeLock(), () -> { + ParallelUtils.parallelizeIO((IOConsumer s) -> { + for (LLLocalLuceneIndex luceneIndex : luceneIndices) { + s.consume(luceneIndex); + } + }, maxQueueSize, luceneIndices.length, 1, LLLuceneIndex::deleteAll); + }); + } + + @Override + public Collection search(@Nullable LLSnapshot snapshot, + String query, + int limit, + @Nullable LLSort sort, + String keyFieldName) throws IOException { + return LockUtils.lockIO(access.readLock(), () -> { + Collection> result = new ConcurrentLinkedQueue<>(); + + ParallelUtils.parallelizeIO((IOBiConsumer s) -> { + for (int i = 0; i < luceneIndices.length; i++) { + s.consume(luceneIndices[i], resolveSnapshot(snapshot, i)); + } + }, maxQueueSize, luceneIndices.length, 1, (instance, instanceSnapshot) -> { + result.add(instance.search(instanceSnapshot, query, limit, sort, keyFieldName)); + }); + return result; + }).stream().flatMap(Collection::stream).collect(Collectors.toList()); + } + + private LLTopKeys mergeTopKeys(Collection multi) { + long totalHitsCount = 0; + LLKeyScore[] hits; + int hitsArraySize = 0; + for (LLTopKeys llTopKeys : multi) { + totalHitsCount += llTopKeys.getTotalHitsCount(); + hitsArraySize += llTopKeys.getHits().length; + } + hits = new LLKeyScore[hitsArraySize]; + + int offset = 0; + for (LLTopKeys llTopKeys : multi) { + var part = llTopKeys.getHits(); + System.arraycopy(part, 0, hits, offset, part.length); + offset += part.length; + } + + return new LLTopKeys(totalHitsCount, hits); + } + + private LLSnapshot resolveSnapshot(LLSnapshot multiSnapshot, int instanceId) { + if (multiSnapshot != null) { + return registeredSnapshots.get(multiSnapshot.getSequenceNumber())[instanceId]; + } else { + return null; + } + } + + @Override + public Collection moreLikeThis(@Nullable LLSnapshot snapshot, + Map> mltDocumentFields, + int limit, + String keyFieldName) throws IOException { + return LockUtils.lockIO(access.readLock(), () -> { + Collection> result = new ConcurrentLinkedQueue<>(); + + ParallelUtils.parallelizeIO((IOBiConsumer s) -> { + for (int i = 0; i < luceneIndices.length; i++) { + s.consume(luceneIndices[i], resolveSnapshot(snapshot, i)); + } + }, maxQueueSize, luceneIndices.length, 1, (instance, instanceSnapshot) -> { + result.add(instance.moreLikeThis(instanceSnapshot, mltDocumentFields, limit, keyFieldName)); + }); + return result; + }).stream().flatMap(Collection::stream).collect(Collectors.toList()); + } + + @Override + public Tuple2, Collection>> searchStream(@Nullable LLSnapshot snapshot, + String query, + int limit, + @Nullable LLSort sort, + String keyFieldName) { + Collection, Collection>>> multi = LockUtils.lock(access.readLock(), () -> { + Collection, Collection>>> result = new ConcurrentLinkedQueue<>(); + + ParallelUtils.parallelize((BiConsumer s) -> { + for (int i = 0; i < luceneIndices.length; i++) { + s.accept(luceneIndices[i], resolveSnapshot(snapshot, i)); + } + }, maxQueueSize, luceneIndices.length, 1, (instance, instanceSnapshot) -> { + result.add(instance.searchStream(instanceSnapshot, query, limit, sort, keyFieldName)); + }); + return result; + }); + + Mono result1; + Collection> result2; + + result1 = Mono.zip(multi.stream().map(Tuple2::getT1).collect(Collectors.toList()), (items) -> { + long total = 0; + for (Object item : items) { + total += (Long) item; + } + return total; + }); + + result2 = multi.stream().map(Tuple2::getT2).flatMap(Collection::stream).collect(Collectors.toList()); + + return Tuples.of(result1, result2); + } + + @Override + public long count(@Nullable LLSnapshot snapshot, String query) throws IOException { + return LockUtils.lockIO(access.readLock(), () -> { + AtomicLong result = new AtomicLong(0); + + ParallelUtils.parallelizeIO((IOBiConsumer s) -> { + for (int i = 0; i < luceneIndices.length; i++) { + s.consume(luceneIndices[i], resolveSnapshot(snapshot, i)); + } + }, maxQueueSize, luceneIndices.length, 1, (instance, instanceSnapshot) -> { + result.addAndGet(instance.count(instanceSnapshot, query)); + }); + return result.get(); + }); + } + + @Override + public void close() throws IOException { + LockUtils.lockIO(access.writeLock(), () -> { + ParallelUtils.parallelizeIO((IOConsumer s) -> { + for (LLLocalLuceneIndex luceneIndex : luceneIndices) { + s.consume(luceneIndex); + } + }, maxQueueSize, luceneIndices.length, 1, Closeable::close); + }); + } + + @Override + public LLSnapshot takeSnapshot() throws IOException { + return LockUtils.lockIO(access.writeLock(), () -> { + CopyOnWriteArrayList instancesSnapshots = new CopyOnWriteArrayList<>(new LLSnapshot[luceneIndices.length]); + var snapIndex = nextSnapshotNumber.getAndIncrement(); + + ParallelUtils.parallelizeIO((IOBiConsumer s) -> { + for (int i = 0; i < luceneIndices.length; i++) { + s.consume(luceneIndices[i], i); + } + }, maxQueueSize, luceneIndices.length, 1, (instance, i) -> { + var instanceSnapshot = instance.takeSnapshot(); + instancesSnapshots.set(i, instanceSnapshot); + }); + + LLSnapshot[] instancesSnapshotsArray = instancesSnapshots.toArray(LLSnapshot[]::new); + registeredSnapshots.put(snapIndex, instancesSnapshotsArray); + + return new LLSnapshot(snapIndex); + }); + } + + @Override + public void releaseSnapshot(LLSnapshot snapshot) throws IOException { + LockUtils.lockIO(access.writeLock(), () -> { + LLSnapshot[] instancesSnapshots = registeredSnapshots.remove(snapshot.getSequenceNumber()); + for (int i = 0; i < luceneIndices.length; i++) { + LLLocalLuceneIndex luceneIndex = luceneIndices[i]; + luceneIndex.releaseSnapshot(instancesSnapshots[i]); + } + }); + } + + @Override + public boolean isLowMemoryMode() { + return luceneIndices[0].isLowMemoryMode(); + } +} diff --git a/src/main/java/it/cavallium/dbengine/database/disk/LLLocalSingleton.java b/src/main/java/it/cavallium/dbengine/database/disk/LLLocalSingleton.java new file mode 100644 index 0000000..023f0c5 --- /dev/null +++ b/src/main/java/it/cavallium/dbengine/database/disk/LLLocalSingleton.java @@ -0,0 +1,68 @@ +package it.cavallium.dbengine.database.disk; + +import java.io.IOException; +import java.util.function.Function; +import org.jetbrains.annotations.Nullable; +import org.rocksdb.ColumnFamilyHandle; +import org.rocksdb.ReadOptions; +import org.rocksdb.RocksDB; +import org.rocksdb.RocksDBException; +import org.rocksdb.Snapshot; +import it.cavallium.dbengine.database.LLSingleton; +import it.cavallium.dbengine.database.LLSnapshot; + +public class LLLocalSingleton implements LLSingleton { + + private static final ReadOptions EMPTY_READ_OPTIONS = new ReadOptions(); + private final RocksDB db; + private final ColumnFamilyHandle cfh; + private final Function snapshotResolver; + private final byte[] name; + private final String databaseName; + + public LLLocalSingleton(RocksDB db, ColumnFamilyHandle singletonListColumn, + Function snapshotResolver, + String databaseName, + byte[] name, + byte[] defaultValue) throws RocksDBException { + this.db = db; + this.cfh = singletonListColumn; + this.databaseName = databaseName; + this.snapshotResolver = snapshotResolver; + this.name = name; + if (db.get(cfh, this.name) == null) { + db.put(cfh, this.name, defaultValue); + } + } + + private ReadOptions resolveSnapshot(LLSnapshot snapshot) { + if (snapshot != null) { + return new ReadOptions().setSnapshot(snapshotResolver.apply(snapshot)); + } else { + return EMPTY_READ_OPTIONS; + } + } + + @Override + public byte[] get(@Nullable LLSnapshot snapshot) throws IOException { + try { + return db.get(cfh, resolveSnapshot(snapshot), name); + } catch (RocksDBException e) { + throw new IOException(e); + } + } + + @Override + public void set(byte[] value) throws IOException { + try { + db.put(cfh, name, value); + } catch (RocksDBException e) { + throw new IOException(e); + } + } + + @Override + public String getDatabaseName() { + return databaseName; + } +} diff --git a/src/main/java/it/cavallium/dbengine/database/disk/LuceneIndexSnapshot.java b/src/main/java/it/cavallium/dbengine/database/disk/LuceneIndexSnapshot.java new file mode 100644 index 0000000..93ae43f --- /dev/null +++ b/src/main/java/it/cavallium/dbengine/database/disk/LuceneIndexSnapshot.java @@ -0,0 +1,74 @@ +package it.cavallium.dbengine.database.disk; + +import java.io.IOException; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexCommit; +import org.apache.lucene.search.IndexSearcher; + +public class LuceneIndexSnapshot { + private final IndexCommit snapshot; + + private boolean initialized; + private boolean failed; + private boolean closed; + + private DirectoryReader indexReader; + private IndexSearcher indexSearcher; + + public LuceneIndexSnapshot(IndexCommit snapshot) { + this.snapshot = snapshot; + } + + public IndexCommit getSnapshot() { + return snapshot; + } + + /** + * Can be called only if the snapshot has not been closed + * @throws IllegalStateException if closed or failed + */ + public synchronized DirectoryReader getIndexReader() throws IllegalStateException { + openDirectoryIfNeeded(); + return indexReader; + } + + /** + * Can be called only if the snapshot has not been closed + * @throws IllegalStateException if closed or failed + */ + public synchronized IndexSearcher getIndexSearcher() throws IllegalStateException { + openDirectoryIfNeeded(); + return indexSearcher; + } + + private synchronized void openDirectoryIfNeeded() throws IllegalStateException { + if (closed) { + throw new IllegalStateException("Snapshot is closed"); + } + if (failed) { + throw new IllegalStateException("Snapshot failed to open"); + } + if (!initialized) { + try { + indexReader = DirectoryReader.open(snapshot); + indexSearcher = new IndexSearcher(indexReader); + + initialized = true; + } catch (IOException e) { + failed = true; + throw new RuntimeException(e); + } + } + } + + public synchronized void close() throws IOException { + closed = true; + + if (initialized && !failed) { + indexReader.close(); + + indexReader = null; + indexSearcher = null; + } + } +} diff --git a/src/main/java/it/cavallium/dbengine/database/luceneutil/AdaptiveStreamSearcher.java b/src/main/java/it/cavallium/dbengine/database/luceneutil/AdaptiveStreamSearcher.java new file mode 100644 index 0000000..d718fde --- /dev/null +++ b/src/main/java/it/cavallium/dbengine/database/luceneutil/AdaptiveStreamSearcher.java @@ -0,0 +1,42 @@ +package it.cavallium.dbengine.database.luceneutil; + +import java.io.IOException; +import java.util.function.Consumer; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Sort; +import org.jetbrains.annotations.Nullable; + +/** + * Use a different searcher based on the situation + */ +public class AdaptiveStreamSearcher implements LuceneStreamSearcher { + + private final SimpleStreamSearcher simpleStreamSearcher; + private final ParallelCollectorStreamSearcher parallelCollectorStreamSearcher; + private final PagedStreamSearcher pagedStreamSearcher; + + public AdaptiveStreamSearcher() { + this.simpleStreamSearcher = new SimpleStreamSearcher(); + this.parallelCollectorStreamSearcher = new ParallelCollectorStreamSearcher(); + this.pagedStreamSearcher = new PagedStreamSearcher(simpleStreamSearcher); + } + + @Override + public Long streamSearch(IndexSearcher indexSearcher, + Query query, + int limit, + @Nullable Sort luceneSort, + String keyFieldName, + Consumer consumer) throws IOException { + if (luceneSort == null) { + return parallelCollectorStreamSearcher.streamSearch(indexSearcher, query, limit, null, keyFieldName, consumer); + } else { + if (limit > PagedStreamSearcher.MAX_ITEMS_PER_PAGE) { + return pagedStreamSearcher.streamSearch(indexSearcher, query, limit, luceneSort, keyFieldName, consumer); + } else { + return simpleStreamSearcher.streamSearch(indexSearcher, query, limit, luceneSort, keyFieldName, consumer); + } + } + } +} diff --git a/src/main/java/it/cavallium/dbengine/database/luceneutil/LuceneStreamSearcher.java b/src/main/java/it/cavallium/dbengine/database/luceneutil/LuceneStreamSearcher.java new file mode 100644 index 0000000..aa31895 --- /dev/null +++ b/src/main/java/it/cavallium/dbengine/database/luceneutil/LuceneStreamSearcher.java @@ -0,0 +1,29 @@ +package it.cavallium.dbengine.database.luceneutil; + +import java.io.IOException; +import java.util.function.Consumer; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Sort; +import org.jetbrains.annotations.Nullable; + +public interface LuceneStreamSearcher { + + /** + * Do a lucene query, receiving the single results using a consumer + * @param indexSearcher the index searcher, which contains all the lucene data + * @param query the query + * @param limit the maximum number of results + * @param luceneSort the sorting method used for the search + * @param keyFieldName the name of the key field + * @param consumer the consumer of results + * @return the approximated total count of results + * @throws IOException thrown if there is an error + */ + Long streamSearch(IndexSearcher indexSearcher, + Query query, + int limit, + @Nullable Sort luceneSort, + String keyFieldName, + Consumer consumer) throws IOException; +} diff --git a/src/main/java/it/cavallium/dbengine/database/luceneutil/PagedStreamSearcher.java b/src/main/java/it/cavallium/dbengine/database/luceneutil/PagedStreamSearcher.java new file mode 100644 index 0000000..cbf78ff --- /dev/null +++ b/src/main/java/it/cavallium/dbengine/database/luceneutil/PagedStreamSearcher.java @@ -0,0 +1,100 @@ +package it.cavallium.dbengine.database.luceneutil; + +import java.io.IOException; +import java.util.Set; +import java.util.function.Consumer; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.TopDocs; +import org.jetbrains.annotations.Nullable; +import org.warp.commonutils.type.IntWrapper; + +/** + * Sorted paged search (the most memory-efficient stream searcher for big queries) + */ +public class PagedStreamSearcher implements LuceneStreamSearcher { + + public static final int MAX_ITEMS_PER_PAGE = 1000; + private final LuceneStreamSearcher baseStreamSearcher; + + public PagedStreamSearcher(LuceneStreamSearcher baseStreamSearcher) { + this.baseStreamSearcher = baseStreamSearcher; + } + + @Override + public Long streamSearch(IndexSearcher indexSearcher, + Query query, + int limit, + @Nullable Sort luceneSort, + String keyFieldName, + Consumer consumer) throws IOException { + if (limit < MAX_ITEMS_PER_PAGE) { + // Use a normal search method because the limit is low + return baseStreamSearcher.streamSearch(indexSearcher, query, limit, luceneSort, keyFieldName, consumer); + } + IntWrapper currentAllowedResults = new IntWrapper(limit); + + // Run the first page search + TopDocs lastTopDocs = indexSearcher.search(query, MAX_ITEMS_PER_PAGE, luceneSort); + if (lastTopDocs.scoreDocs.length > 0) { + ScoreDoc lastScoreDoc = getLastItem(lastTopDocs.scoreDocs); + consumeHits(currentAllowedResults, lastTopDocs.scoreDocs, indexSearcher, keyFieldName, consumer); + + // Run the searches for each page until the end + boolean finished = currentAllowedResults.var <= 0; + while (!finished) { + lastTopDocs = indexSearcher.searchAfter(lastScoreDoc, query, MAX_ITEMS_PER_PAGE, luceneSort); + if (lastTopDocs.scoreDocs.length > 0) { + lastScoreDoc = getLastItem(lastTopDocs.scoreDocs); + consumeHits(currentAllowedResults, lastTopDocs.scoreDocs, indexSearcher, keyFieldName, consumer); + } + if (lastTopDocs.scoreDocs.length < MAX_ITEMS_PER_PAGE || currentAllowedResults.var <= 0) { + finished = true; + } + } + } + return lastTopDocs.totalHits.value; + } + + private void consumeHits(IntWrapper currentAllowedResults, + ScoreDoc[] hits, + IndexSearcher indexSearcher, + String keyFieldName, + Consumer consumer) throws IOException { + for (ScoreDoc hit : hits) { + int docId = hit.doc; + float score = hit.score; + + if (currentAllowedResults.var-- > 0) { + Document d = indexSearcher.doc(docId, Set.of(keyFieldName)); + if (d.getFields().isEmpty()) { + System.err.println("The document docId:" + docId + ",score:" + score + " is empty."); + var realFields = indexSearcher.doc(docId).getFields(); + if (!realFields.isEmpty()) { + System.err.println("Present fields:"); + for (IndexableField field : realFields) { + System.err.println(" - " + field.name()); + } + } + } else { + var field = d.getField(keyFieldName); + if (field == null) { + System.err.println("Can't get key of document docId:" + docId + ",score:" + score); + } else { + consumer.accept(field.stringValue()); + } + } + } else { + break; + } + } + } + + private static ScoreDoc getLastItem(ScoreDoc[] scoreDocs) { + return scoreDocs[scoreDocs.length - 1]; + } +} diff --git a/src/main/java/it/cavallium/dbengine/database/luceneutil/ParallelCollectorStreamSearcher.java b/src/main/java/it/cavallium/dbengine/database/luceneutil/ParallelCollectorStreamSearcher.java new file mode 100644 index 0000000..171e89d --- /dev/null +++ b/src/main/java/it/cavallium/dbengine/database/luceneutil/ParallelCollectorStreamSearcher.java @@ -0,0 +1,67 @@ +package it.cavallium.dbengine.database.luceneutil; + +import it.cavallium.dbengine.database.utils.LuceneParallelStreamCollectorManager; +import java.io.IOException; +import java.util.Set; +import java.util.concurrent.CompletionException; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.Consumer; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Sort; +import org.jetbrains.annotations.Nullable; + +/** + * Unsorted search (low latency and constant memory usage) + */ +public class ParallelCollectorStreamSearcher implements LuceneStreamSearcher { + + @Override + public Long streamSearch(IndexSearcher indexSearcher, + Query query, + int limit, + @Nullable Sort luceneSort, + String keyFieldName, + Consumer consumer) throws IOException { + if (luceneSort != null) { + throw new IllegalArgumentException("ParallelCollectorStreamSearcher doesn't support sorted searches"); + } + + AtomicInteger currentCount = new AtomicInteger(); + + var result = indexSearcher.search(query, LuceneParallelStreamCollectorManager.fromConsumer(docId -> { + if (currentCount.getAndIncrement() >= limit) { + return false; + } else { + try { + Document d = indexSearcher.doc(docId, Set.of(keyFieldName)); + if (d.getFields().isEmpty()) { + System.err.println("The document docId:" + docId + " is empty."); + var realFields = indexSearcher.doc(docId).getFields(); + if (!realFields.isEmpty()) { + System.err.println("Present fields:"); + for (IndexableField field : realFields) { + System.err.println(" - " + field.name()); + } + } + } else { + var field = d.getField(keyFieldName); + if (field == null) { + System.err.println("Can't get key of document docId:" + docId); + } else { + consumer.accept(field.stringValue()); + } + } + } catch (IOException e) { + e.printStackTrace(); + throw new CompletionException(e); + } + return true; + } + })); + //todo: check the accuracy of our hits counter! + return result.getTotalHitsCount(); + } +} diff --git a/src/main/java/it/cavallium/dbengine/database/luceneutil/SimpleStreamSearcher.java b/src/main/java/it/cavallium/dbengine/database/luceneutil/SimpleStreamSearcher.java new file mode 100644 index 0000000..25b7103 --- /dev/null +++ b/src/main/java/it/cavallium/dbengine/database/luceneutil/SimpleStreamSearcher.java @@ -0,0 +1,54 @@ +package it.cavallium.dbengine.database.luceneutil; + +import it.unimi.dsi.fastutil.objects.ObjectArrayList; +import java.io.IOException; +import java.util.Set; +import java.util.function.Consumer; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.TopDocs; +import org.jetbrains.annotations.Nullable; + +/** + * Sorted search (slower and more memory-intensive) + */ +public class SimpleStreamSearcher implements LuceneStreamSearcher { + + @Override + public Long streamSearch(IndexSearcher indexSearcher, + Query query, + int limit, + @Nullable Sort luceneSort, + String keyFieldName, + Consumer consumer) throws IOException { + TopDocs topDocs = indexSearcher.search(query, limit, luceneSort); + var hits = ObjectArrayList.wrap(topDocs.scoreDocs); + for (ScoreDoc hit : hits) { + int docId = hit.doc; + float score = hit.score; + Document d = indexSearcher.doc(docId, Set.of(keyFieldName)); + if (d.getFields().isEmpty()) { + System.err.println("The document docId:" + docId + ",score:" + score + " is empty."); + var realFields = indexSearcher.doc(docId).getFields(); + if (!realFields.isEmpty()) { + System.err.println("Present fields:"); + for (IndexableField field : realFields) { + System.err.println(" - " + field.name()); + } + } + } else { + var field = d.getField(keyFieldName); + if (field == null) { + System.err.println("Can't get key of document docId:" + docId + ",score:" + score); + } else { + consumer.accept(field.stringValue()); + } + } + } + return topDocs.totalHits.value; + } +} diff --git a/src/main/java/it/cavallium/dbengine/database/remote/client/DbClientFunctions.java b/src/main/java/it/cavallium/dbengine/database/remote/client/DbClientFunctions.java new file mode 100644 index 0000000..35ca144 --- /dev/null +++ b/src/main/java/it/cavallium/dbengine/database/remote/client/DbClientFunctions.java @@ -0,0 +1,76 @@ +package it.cavallium.dbengine.database.remote.client; + +import io.grpc.ManagedChannel; +import io.grpc.netty.GrpcSslContexts; +import io.grpc.netty.NettyChannelBuilder; +import io.netty.handler.ssl.SslContext; +import io.netty.handler.ssl.SslContextBuilder; +import it.cavallium.dbengine.proto.CavalliumDBEngineServiceGrpc; +import java.nio.file.Path; +import java.util.concurrent.TimeUnit; +import java.util.logging.Logger; +import javax.net.ssl.SSLException; + +public class DbClientFunctions extends CavalliumDBEngineServiceGrpc.CavalliumDBEngineServiceImplBase { + + private static final Logger logger = Logger.getLogger(DbClientFunctions.class.getName()); + private static final boolean SSL = false; + + private final ManagedChannel channel; + private final CavalliumDBEngineServiceGrpc.CavalliumDBEngineServiceBlockingStub blockingStub; + private final CavalliumDBEngineServiceGrpc.CavalliumDBEngineServiceStub stub; + + public CavalliumDBEngineServiceGrpc.CavalliumDBEngineServiceBlockingStub getBlockingStub() { + return blockingStub; + } + + public CavalliumDBEngineServiceGrpc.CavalliumDBEngineServiceStub getStub() { + return stub; + } + + public static SslContext buildSslContext(Path trustCertCollectionFilePath, + Path clientCertChainFilePath, + Path clientPrivateKeyFilePath) throws SSLException { + SslContextBuilder builder = GrpcSslContexts.forClient(); + if (trustCertCollectionFilePath != null) { + builder.trustManager(trustCertCollectionFilePath.toFile()); + } + if (clientCertChainFilePath != null && clientPrivateKeyFilePath != null) { + builder.keyManager(clientCertChainFilePath.toFile(), clientPrivateKeyFilePath.toFile()); + } + return builder.build(); + } + + /** + * Construct client connecting to HelloWorld server at {@code host:port}. + */ + public DbClientFunctions(String host, + int port, + SslContext sslContext) throws SSLException { + + this(generateThis(host, port, sslContext)); + } + + private static ManagedChannel generateThis(String host, int port, SslContext sslContext) { + var builder = NettyChannelBuilder.forAddress(host, port); + if (SSL) { + builder.sslContext(sslContext); + } else { + builder.usePlaintext(); + } + return builder.build(); + } + + /** + * Construct client for accessing RouteGuide server using the existing channel. + */ + DbClientFunctions(ManagedChannel channel) { + this.channel = channel; + blockingStub = CavalliumDBEngineServiceGrpc.newBlockingStub(channel); + stub = CavalliumDBEngineServiceGrpc.newStub(channel); + } + + public void shutdown() throws InterruptedException { + channel.shutdown().awaitTermination(5, TimeUnit.SECONDS); + } +} diff --git a/src/main/java/it/cavallium/dbengine/database/remote/client/LLRemoteDatabaseConnection.java b/src/main/java/it/cavallium/dbengine/database/remote/client/LLRemoteDatabaseConnection.java new file mode 100644 index 0000000..ad292bf --- /dev/null +++ b/src/main/java/it/cavallium/dbengine/database/remote/client/LLRemoteDatabaseConnection.java @@ -0,0 +1,131 @@ +package it.cavallium.dbengine.database.remote.client; + +import com.google.protobuf.ByteString; +import io.grpc.StatusRuntimeException; +import it.cavallium.dbengine.proto.CavalliumDBEngineServiceGrpc; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Path; +import java.time.Duration; +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.LongStream; +import javax.net.ssl.SSLException; +import it.cavallium.dbengine.database.Column; +import it.cavallium.dbengine.database.LLDatabaseConnection; +import it.cavallium.dbengine.database.LLKeyValueDatabase; +import it.cavallium.dbengine.database.LLLuceneIndex; +import it.cavallium.dbengine.database.analyzer.TextFieldsAnalyzer; +import it.cavallium.dbengine.proto.DatabaseOpenRequest; +import it.cavallium.dbengine.proto.Empty; +import it.cavallium.dbengine.proto.LuceneIndexOpenRequest; +import it.cavallium.dbengine.proto.ResetConnectionRequest; + +public class LLRemoteDatabaseConnection implements LLDatabaseConnection { + + private final String address; + private final int port; + private final Path trustCertCollectionFilePath; + private final Path clientCertChainFilePath; + private final Path clientPrivateKeyFilePath; + private DbClientFunctions client; + private CavalliumDBEngineServiceGrpc.CavalliumDBEngineServiceBlockingStub blockingStub; + + public LLRemoteDatabaseConnection(String address, int port, Path trustCertCollectionFilePath, + Path clientCertChainFilePath, + Path clientPrivateKeyFilePath) { + this.address = address; + this.port = port; + this.trustCertCollectionFilePath = trustCertCollectionFilePath; + this.clientCertChainFilePath = clientCertChainFilePath; + this.clientPrivateKeyFilePath = clientPrivateKeyFilePath; + } + + @Override + public void connect() throws IOException { + try { + this.client = new DbClientFunctions(address, port, + DbClientFunctions.buildSslContext(trustCertCollectionFilePath, clientCertChainFilePath, + clientPrivateKeyFilePath)); + this.blockingStub = client.getBlockingStub(); + //noinspection ResultOfMethodCallIgnored + blockingStub.resetConnection(ResetConnectionRequest.newBuilder().build()); + } catch (SSLException | StatusRuntimeException e) { + throw new IOException(e); + } + } + + @Override + public LLKeyValueDatabase getDatabase(String name, List columns, boolean lowMemory) throws IOException { + try { + var response = blockingStub.databaseOpen(DatabaseOpenRequest.newBuilder() + .setName(ByteString.copyFrom(name, StandardCharsets.US_ASCII)) + .addAllColumnName(columns.stream().map( + (column) -> ByteString.copyFrom(column.getName().getBytes(StandardCharsets.US_ASCII))) + .collect(Collectors.toList())) + .setLowMemory(lowMemory) + .build()); + int handle = response.getHandle(); + return new LLRemoteKeyValueDatabase(name, client, handle); + } catch (StatusRuntimeException ex) { + throw new IOException(ex); + } + } + + @Override + public LLLuceneIndex getLuceneIndex(String name, + int instancesCount, + TextFieldsAnalyzer textFieldsAnalyzer, + Duration queryRefreshDebounceTime, + java.time.Duration commitDebounceTime, + boolean lowMemory) throws IOException { + try { + var response = blockingStub.luceneIndexOpen(LuceneIndexOpenRequest.newBuilder() + .setName(name) + .setTextFieldsAnalyzer(textFieldsAnalyzer.ordinal()) + .setQueryRefreshDebounceTime((int) queryRefreshDebounceTime.toMillis()) + .setCommitDebounceTime((int) commitDebounceTime.toMillis()) + .setLowMemory(lowMemory) + .setInstancesCount(instancesCount) + .build()); + int handle = response.getHandle(); + return new LLRemoteLuceneIndex(client, name, handle, lowMemory, instancesCount); + } catch (StatusRuntimeException ex) { + throw new IOException(ex); + } + } + + @Override + public void disconnect() throws IOException { + try { + //noinspection ResultOfMethodCallIgnored + blockingStub.resetConnection(ResetConnectionRequest.newBuilder().build()); + client.shutdown(); + } catch (InterruptedException | StatusRuntimeException e) { + throw new IOException(e); + } + } + + @Override + public void ping() throws IOException { + try { + blockingStub.ping(Empty.newBuilder().build()); + } catch (StatusRuntimeException e) { + throw new IOException(e); + } + } + + @Override + public double getMediumLatencyMillis() throws IOException { + int cap = 3; + + long[] results = new long[cap]; + for (int i = 0; i < cap; i++) { + long time1 = System.nanoTime(); + ping(); + long time2 = System.nanoTime(); + results[i] = time2 - time1; + } + return LongStream.of(results).average().orElseThrow() / 1000000; + } +} diff --git a/src/main/java/it/cavallium/dbengine/database/remote/client/LLRemoteDictionary.java b/src/main/java/it/cavallium/dbengine/database/remote/client/LLRemoteDictionary.java new file mode 100644 index 0000000..687fd3e --- /dev/null +++ b/src/main/java/it/cavallium/dbengine/database/remote/client/LLRemoteDictionary.java @@ -0,0 +1,269 @@ +package it.cavallium.dbengine.database.remote.client; + +import com.google.protobuf.ByteString; +import io.grpc.StatusRuntimeException; +import it.cavallium.dbengine.database.LLDictionary; +import it.cavallium.dbengine.database.LLDictionaryResultType; +import it.cavallium.dbengine.database.LLSnapshot; +import it.cavallium.dbengine.proto.CavalliumDBEngineServiceGrpc; +import it.cavallium.dbengine.proto.DictionaryMethodClearRequest; +import it.cavallium.dbengine.proto.DictionaryMethodContainsRequest; +import it.cavallium.dbengine.proto.DictionaryMethodForEachRequest; +import it.cavallium.dbengine.proto.DictionaryMethodGetRequest; +import it.cavallium.dbengine.proto.DictionaryMethodIsEmptyRequest; +import it.cavallium.dbengine.proto.DictionaryMethodPutMultiRequest; +import it.cavallium.dbengine.proto.DictionaryMethodPutRequest; +import it.cavallium.dbengine.proto.DictionaryMethodRemoveOneRequest; +import it.cavallium.dbengine.proto.DictionaryMethodRemoveRequest; +import it.cavallium.dbengine.proto.DictionaryMethodReplaceAllRequest; +import it.cavallium.dbengine.proto.DictionaryMethodSizeRequest; +import java.io.IOError; +import java.io.IOException; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Optional; +import java.util.function.BiConsumer; +import java.util.function.BiFunction; +import java.util.function.Consumer; +import java.util.stream.Collectors; +import org.jetbrains.annotations.Nullable; +import org.warp.commonutils.concurrency.atomicity.NotAtomic; + +@NotAtomic +public class LLRemoteDictionary implements LLDictionary { + + private final CavalliumDBEngineServiceGrpc.CavalliumDBEngineServiceBlockingStub blockingStub; + private final CavalliumDBEngineServiceGrpc.CavalliumDBEngineServiceStub stub; + private final int handle; + private final String name; + + public LLRemoteDictionary(DbClientFunctions clientFunctions, int handle, String name) { + this.blockingStub = clientFunctions.getBlockingStub(); + this.stub = clientFunctions.getStub(); + this.handle = handle; + this.name = name; + } + + @Override + public Optional get(@Nullable LLSnapshot snapshot, byte[] key) throws IOException { + try { + var request = DictionaryMethodGetRequest.newBuilder() + .setDictionaryHandle(handle) + .setKey(ByteString.copyFrom(key)); + if (snapshot != null) { + request.setSequenceNumber(snapshot.getSequenceNumber()); + } + var response = blockingStub.dictionaryMethodGet(request.build()); + var value = response.getValue(); + if (value != null) { + return Optional.of(value.toByteArray()); + } else { + return Optional.empty(); + } + } catch (StatusRuntimeException ex) { + throw new IOException(ex); + } + } + + @Override + public boolean contains(@Nullable LLSnapshot snapshot, byte[] key) throws IOException { + try { + var request = DictionaryMethodContainsRequest.newBuilder() + .setDictionaryHandle(handle) + .setKey(ByteString.copyFrom(key)); + if (snapshot != null) { + request.setSequenceNumber(snapshot.getSequenceNumber()); + } + var response = blockingStub.dictionaryMethodContains(request.build()); + return response.getValue(); + } catch (StatusRuntimeException ex) { + throw new IOException(ex); + } + } + + @Override + public Optional put(byte[] key, byte[] value, LLDictionaryResultType resultType) + throws IOException { + try { + return put_(key, value, resultType); + } catch (StatusRuntimeException ex) { + throw new IOException(ex); + } + } + + private Optional put_(byte[] key, byte[] value, LLDictionaryResultType resultType) { + var response = blockingStub.dictionaryMethodPut(DictionaryMethodPutRequest.newBuilder() + .setDictionaryHandle(handle) + .setKey(ByteString.copyFrom(key)) + .setValue(ByteString.copyFrom(value)) + .setResultType(resultType.toProto()) + .build()); + var bytes = response.getValue(); + if (bytes != null) { + return Optional.of(bytes.toByteArray()); + } else { + return Optional.empty(); + } + } + + @Override + public void putMulti(byte[][] key, byte[][] value, LLDictionaryResultType resultType, + Consumer responses) throws IOException { + try { + var response = blockingStub + .dictionaryMethodPutMulti(DictionaryMethodPutMultiRequest.newBuilder() + .setDictionaryHandle(handle) + .addAllKey( + List.of(key).stream().map(ByteString::copyFrom).collect(Collectors.toList())) + .addAllValue( + List.of(value).stream().map(ByteString::copyFrom).collect(Collectors.toList())) + .setResultType(resultType.toProto()) + .build()); + if (response.getValueList() != null) { + for (ByteString byteString : response.getValueList()) { + responses.accept(byteString.toByteArray()); + } + } + } catch (StatusRuntimeException ex) { + throw new IOException(ex); + } + } + + @Override + public Optional remove(byte[] key, LLDictionaryResultType resultType) throws IOException { + try { + return remove_(key, resultType); + } catch (StatusRuntimeException ex) { + throw new IOException(ex); + } + } + + private Optional remove_(byte[] key, LLDictionaryResultType resultType) { + var response = blockingStub.dictionaryMethodRemove(DictionaryMethodRemoveRequest.newBuilder() + .setDictionaryHandle(handle) + .setKey(ByteString.copyFrom(key)) + .setResultType(resultType.toProto()) + .build()); + var bytes = response.getValue(); + if (bytes != null) { + return Optional.of(bytes.toByteArray()); + } else { + return Optional.empty(); + } + } + + @Override + public void forEach(@Nullable LLSnapshot snapshot, int parallelism, BiConsumer consumer) { + try { + var request = DictionaryMethodForEachRequest.newBuilder().setDictionaryHandle(handle); + if (snapshot != null) { + request.setSequenceNumber(snapshot.getSequenceNumber()); + } + var response = blockingStub.dictionaryMethodForEach(request.build()); + response.forEachRemaining((entry) -> { + var key = entry.getKey().toByteArray(); + var value = entry.getValue().toByteArray(); + consumer.accept(key, value); + }); + } catch (StatusRuntimeException ex) { + throw new IOError(ex); + } + } + + @Override + public void replaceAll(int parallelism, boolean replaceKeys, BiFunction> consumer) throws IOException { + try { + var response = blockingStub + .dictionaryMethodReplaceAll(DictionaryMethodReplaceAllRequest.newBuilder() + .setDictionaryHandle(handle) + .setReplaceKeys(replaceKeys) + .build()); + response.forEachRemaining((entry) -> { + var key = entry.getKey().toByteArray(); + var value = entry.getValue().toByteArray(); + var singleResponse = consumer.apply(key, value); + boolean keyDiffers = false; + if (!Arrays.equals(key, singleResponse.getKey())) { + remove_(key, LLDictionaryResultType.VOID); + keyDiffers = true; + } + + // put if changed + if (keyDiffers || !Arrays.equals(value, singleResponse.getValue())) { + put_(singleResponse.getKey(), singleResponse.getValue(), LLDictionaryResultType.VOID); + } + }); + } catch (StatusRuntimeException ex) { + throw new IOException(ex); + } + } + + @Override + public void clear() throws IOException { + try { + //noinspection ResultOfMethodCallIgnored + blockingStub.dictionaryMethodClear(DictionaryMethodClearRequest.newBuilder() + .setDictionaryHandle(handle) + .build()); + } catch (StatusRuntimeException ex) { + throw new IOException(ex); + } + } + + @Override + public long size(@Nullable LLSnapshot snapshot, boolean fast) throws IOException { + try { + var request = DictionaryMethodSizeRequest.newBuilder().setDictionaryHandle(handle); + if (snapshot != null) { + request.setSequenceNumber(snapshot.getSequenceNumber()); + } + var response = fast ? blockingStub.dictionaryMethodFastSize(request.build()) + : blockingStub.dictionaryMethodExactSize(request.build()); + return response.getSize(); + } catch (StatusRuntimeException ex) { + throw new IOException(ex); + } + } + + @Override + public boolean isEmpty(@Nullable LLSnapshot snapshot) throws IOException { + try { + var request = DictionaryMethodIsEmptyRequest + .newBuilder() + .setDictionaryHandle(handle); + if (snapshot != null) { + request.setSequenceNumber(snapshot.getSequenceNumber()); + } + var response = blockingStub.dictionaryMethodIsEmpty(request.build()); + return response.getEmpty(); + } catch (StatusRuntimeException ex) { + throw new IOException(ex); + } + } + + @Override + public Optional> removeOne() throws IOException { + try { + var response = blockingStub.dictionaryMethodRemoveOne(DictionaryMethodRemoveOneRequest + .newBuilder() + .setDictionaryHandle(handle) + .build()); + var keyBytes = response.getKey(); + var valueBytes = response.getValue(); + if (keyBytes != null && valueBytes != null) { + return Optional.of(Map.entry(keyBytes.toByteArray(), valueBytes.toByteArray())); + } else { + return Optional.empty(); + } + } catch (StatusRuntimeException ex) { + throw new IOException(ex); + } + } + + @Override + public String getDatabaseName() { + return name; + } +} diff --git a/src/main/java/it/cavallium/dbengine/database/remote/client/LLRemoteKeyValueDatabase.java b/src/main/java/it/cavallium/dbengine/database/remote/client/LLRemoteKeyValueDatabase.java new file mode 100644 index 0000000..7f82402 --- /dev/null +++ b/src/main/java/it/cavallium/dbengine/database/remote/client/LLRemoteKeyValueDatabase.java @@ -0,0 +1,124 @@ +package it.cavallium.dbengine.database.remote.client; + +import com.google.protobuf.ByteString; +import io.grpc.StatusRuntimeException; +import java.io.IOException; +import it.cavallium.dbengine.database.LLSnapshot; +import it.cavallium.dbengine.database.LLDeepDictionary; +import it.cavallium.dbengine.database.LLDictionary; +import it.cavallium.dbengine.database.LLKeyValueDatabase; +import it.cavallium.dbengine.database.LLSingleton; +import it.cavallium.dbengine.proto.DatabaseCloseRequest; +import it.cavallium.dbengine.proto.DatabaseSnapshotReleaseRequest; +import it.cavallium.dbengine.proto.DatabaseSnapshotTakeRequest; +import it.cavallium.dbengine.proto.DictionaryOpenRequest; +import it.cavallium.dbengine.proto.SingletonOpenRequest; +import it.cavallium.dbengine.proto.CavalliumDBEngineServiceGrpc; + +public class LLRemoteKeyValueDatabase implements LLKeyValueDatabase { + + private final String name; + private final DbClientFunctions clientFunctions; + private final CavalliumDBEngineServiceGrpc.CavalliumDBEngineServiceBlockingStub blockingStub; + private final CavalliumDBEngineServiceGrpc.CavalliumDBEngineServiceStub stub; + private final int handle; + + public LLRemoteKeyValueDatabase(String name, DbClientFunctions clientFunctions, int handle) { + this.name = name; + this.clientFunctions = clientFunctions; + this.blockingStub = clientFunctions.getBlockingStub(); + this.stub = clientFunctions.getStub(); + this.handle = handle; + } + + @Override + public String getDatabaseName() { + return name; + } + + @Override + public LLSingleton getSingleton(byte[] singletonListColumnName, byte[] name, byte[] defaultValue) + throws IOException { + try { + var response = blockingStub.singletonOpen(SingletonOpenRequest.newBuilder() + .setDatabaseHandle(this.handle) + .setSingletonListColumnName(ByteString.copyFrom(singletonListColumnName)) + .setName(ByteString.copyFrom(name)) + .setDefaultValue(ByteString.copyFrom(defaultValue)) + .build()); + int handle = response.getHandle(); + return new LLRemoteSingleton(LLRemoteKeyValueDatabase.this.name, blockingStub, handle); + } catch (StatusRuntimeException ex) { + throw new IOException(ex); + } + } + + @Override + public LLDictionary getDictionary(byte[] columnName) throws IOException { + try { + var response = blockingStub.dictionaryOpen(DictionaryOpenRequest.newBuilder() + .setDatabaseHandle(this.handle) + .setColumnName(ByteString.copyFrom(columnName)) + .build()); + int handle = response.getHandle(); + return new LLRemoteDictionary(clientFunctions, handle, name); + } catch (StatusRuntimeException ex) { + throw new IOException(ex); + } + } + + @Override + public LLDeepDictionary getDeepDictionary(byte[] columnName, int keySize, int key2Size) throws IOException { + try { + var response = blockingStub.dictionaryOpen(DictionaryOpenRequest.newBuilder() + .setDatabaseHandle(this.handle) + .setColumnName(ByteString.copyFrom(columnName)) + .build()); + int handle = response.getHandle(); + throw new UnsupportedOperationException("Deep dictionaries are not implemented in remote databases!"); //todo: implement + } catch (StatusRuntimeException ex) { + throw new IOException(ex); + } + } + + @Override + public LLSnapshot takeSnapshot() throws IOException { + try { + var response = blockingStub.databaseSnapshotTake(DatabaseSnapshotTakeRequest.newBuilder() + .setDatabaseHandle(this.handle) + .build()); + long sequenceNumber = response.getSequenceNumber(); + return new LLSnapshot(sequenceNumber); + } catch (StatusRuntimeException ex) { + throw new IOException(ex); + } + } + + @Override + public void releaseSnapshot(LLSnapshot snapshot) throws IOException { + try { + var response = blockingStub.databaseSnapshotRelease(DatabaseSnapshotReleaseRequest.newBuilder() + .setDatabaseHandle(this.handle) + .setSequenceNumber(snapshot.getSequenceNumber()) + .build()); + } catch (StatusRuntimeException ex) { + throw new IOException(ex); + } + } + + @Override + public long getProperty(String propertyName) throws IOException { + throw new UnsupportedOperationException("Not implemented"); //todo: implement + } + + @Override + public void close() throws IOException { + try { + var response = blockingStub.databaseClose(DatabaseCloseRequest.newBuilder() + .setDatabaseHandle(this.handle) + .build()); + } catch (StatusRuntimeException ex) { + throw new IOException(ex); + } + } +} diff --git a/src/main/java/it/cavallium/dbengine/database/remote/client/LLRemoteLuceneIndex.java b/src/main/java/it/cavallium/dbengine/database/remote/client/LLRemoteLuceneIndex.java new file mode 100644 index 0000000..617f8b8 --- /dev/null +++ b/src/main/java/it/cavallium/dbengine/database/remote/client/LLRemoteLuceneIndex.java @@ -0,0 +1,339 @@ +package it.cavallium.dbengine.database.remote.client; + +import io.grpc.StatusRuntimeException; +import it.cavallium.dbengine.database.LLDocument; +import it.cavallium.dbengine.database.LLKeyScore; +import it.cavallium.dbengine.database.LLLuceneIndex; +import it.cavallium.dbengine.database.LLSnapshot; +import it.cavallium.dbengine.database.LLSort; +import it.cavallium.dbengine.database.LLTerm; +import it.cavallium.dbengine.database.LLTopKeys; +import it.cavallium.dbengine.database.LLUtils; +import it.cavallium.dbengine.proto.CavalliumDBEngineServiceGrpc; +import it.cavallium.dbengine.proto.LuceneIndexCloseRequest; +import it.cavallium.dbengine.proto.LuceneIndexMethodAddDocumentMultiRequest; +import it.cavallium.dbengine.proto.LuceneIndexMethodAddDocumentRequest; +import it.cavallium.dbengine.proto.LuceneIndexMethodCountRequest; +import it.cavallium.dbengine.proto.LuceneIndexMethodDeleteAllRequest; +import it.cavallium.dbengine.proto.LuceneIndexMethodDeleteDocumentRequest; +import it.cavallium.dbengine.proto.LuceneIndexMethodMoreLikeThisRequest; +import it.cavallium.dbengine.proto.LuceneIndexMethodSearchRequest; +import it.cavallium.dbengine.proto.LuceneIndexMethodSearchResponse; +import it.cavallium.dbengine.proto.LuceneIndexMethodSearchStreamRequest; +import it.cavallium.dbengine.proto.LuceneIndexMethodUpdateDocumentMultiRequest; +import it.cavallium.dbengine.proto.LuceneIndexMethodUpdateDocumentRequest; +import it.cavallium.dbengine.proto.LuceneIndexSnapshotReleaseRequest; +import it.cavallium.dbengine.proto.LuceneIndexSnapshotTakeRequest; +import it.cavallium.dbengine.proto.MltField; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.ConcurrentLinkedQueue; +import java.util.stream.Collectors; +import org.jetbrains.annotations.Nullable; +import org.warp.commonutils.batch.ParallelUtils; +import org.warp.commonutils.functional.IOConsumer; +import reactor.core.publisher.EmitterProcessor; +import reactor.core.publisher.Flux; +import reactor.core.publisher.Mono; +import reactor.util.function.Tuple2; +import reactor.util.function.Tuples; + +public class LLRemoteLuceneIndex implements LLLuceneIndex { + + private final CavalliumDBEngineServiceGrpc.CavalliumDBEngineServiceBlockingStub blockingStub; + private final String luceneIndexName; + private final int handle; + private final boolean lowMemory; + private final int instancesCount; + + public LLRemoteLuceneIndex(DbClientFunctions clientFunctions, + String name, + int handle, + boolean lowMemory, + int instancesCount) { + this.blockingStub = clientFunctions.getBlockingStub(); + this.luceneIndexName = name; + this.handle = handle; + this.lowMemory = lowMemory; + this.instancesCount = instancesCount; + } + + @Override + public String getLuceneIndexName() { + return luceneIndexName; + } + + @Override + public LLSnapshot takeSnapshot() throws IOException { + try { + var searchResult = blockingStub + .luceneIndexSnapshotTake(LuceneIndexSnapshotTakeRequest.newBuilder() + .setHandle(handle).build()); + + return new LLSnapshot(searchResult.getSequenceNumber()); + } catch (StatusRuntimeException ex) { + throw new IOException(ex); + } + } + + @Override + public void releaseSnapshot(LLSnapshot snapshot) throws IOException { + try { + //noinspection ResultOfMethodCallIgnored + blockingStub.luceneIndexSnapshotRelease(LuceneIndexSnapshotReleaseRequest.newBuilder() + .setHandle(handle) + .setSequenceNumber(snapshot.getSequenceNumber()) + .build()); + } catch (StatusRuntimeException ex) { + throw new IOException(ex); + } + } + + @Override + public void addDocument(LLTerm key, LLDocument doc) throws IOException { + try { + //noinspection ResultOfMethodCallIgnored + blockingStub.luceneIndexMethodAddDocument(LuceneIndexMethodAddDocumentRequest.newBuilder() + .setHandle(handle) + .setKey(LLUtils.toGrpc(key)) + .addAllDocumentItems(LLUtils.toGrpc(doc.getItems())) + .build()); + } catch (StatusRuntimeException ex) { + throw new IOException(ex); + } + } + + @Override + public void addDocuments(Iterable keys, Iterable docs) throws IOException { + try { + //noinspection ResultOfMethodCallIgnored + blockingStub + .luceneIndexMethodAddDocumentMulti(LuceneIndexMethodAddDocumentMultiRequest.newBuilder() + .setHandle(handle) + .addAllKey(LLUtils.toGrpcKey(keys)) + .addAllDocuments(LLUtils.toGrpc(docs)) + .build()); + } catch (StatusRuntimeException ex) { + throw new IOException(ex); + } + } + + @Override + public void deleteDocument(LLTerm id) throws IOException { + try { + //noinspection ResultOfMethodCallIgnored + blockingStub + .luceneIndexMethodDeleteDocument(LuceneIndexMethodDeleteDocumentRequest.newBuilder() + .setHandle(handle) + .setKey(LLUtils.toGrpc(id)) + .build()); + } catch (StatusRuntimeException ex) { + throw new IOException(ex); + } + } + + @Override + public void updateDocument(LLTerm id, LLDocument document) throws IOException { + try { + //noinspection ResultOfMethodCallIgnored + blockingStub + .luceneIndexMethodUpdateDocument(LuceneIndexMethodUpdateDocumentRequest.newBuilder() + .setHandle(handle) + .setKey(LLUtils.toGrpc(id)) + .addAllDocumentItems(LLUtils.toGrpc(document.getItems())) + .build()); + } catch (StatusRuntimeException ex) { + throw new IOException(ex); + } + } + + @Override + public void updateDocuments(Iterable ids, Iterable documents) + throws IOException { + try { + //noinspection ResultOfMethodCallIgnored + blockingStub.luceneIndexMethodUpdateDocumentMulti( + LuceneIndexMethodUpdateDocumentMultiRequest.newBuilder() + .setHandle(handle) + .addAllKey(LLUtils.toGrpcKey(ids)) + .addAllDocuments(LLUtils.toGrpc(documents)) + .build()); + } catch (StatusRuntimeException ex) { + throw new IOException(ex); + } + } + + @Override + public void deleteAll() throws IOException { + try { + //noinspection ResultOfMethodCallIgnored + blockingStub.luceneIndexMethodDeleteAll(LuceneIndexMethodDeleteAllRequest.newBuilder() + .setHandle(handle) + .build()); + } catch (StatusRuntimeException ex) { + throw new IOException(ex); + } + } + + @Override + public Collection search(@Nullable LLSnapshot snapshot, + String query, + int limit, + @Nullable LLSort sort, + String keyFieldName) throws IOException { + try { + ConcurrentLinkedQueue multiResult = new ConcurrentLinkedQueue<>(); + + ParallelUtils.parallelizeIO((IOConsumer c) -> { + for (int shardIndex = 0; shardIndex < instancesCount; shardIndex++) { + c.consume(shardIndex); + } + }, 0, instancesCount, 1, shardIndex -> { + var request = LuceneIndexMethodSearchRequest.newBuilder() + .setHandle(handle) + .setQuery(query) + .setLimit(limit) + .setKeyFieldName(keyFieldName); + if (snapshot != null) { + request.setSequenceNumber(snapshot.getSequenceNumber()); + } + if (sort != null) { + request.setSort(LLUtils.toGrpc(sort)); + } + + var searchMultiResults = blockingStub.luceneIndexMethodSearch(request.build()); + + for (LuceneIndexMethodSearchResponse response : searchMultiResults.getResponseList()) { + var result = new LLTopKeys(response.getTotalHitsCount(), + response.getHitsList().stream().map(LLUtils::toKeyScore).toArray(LLKeyScore[]::new) + ); + multiResult.add(result); + } + }); + + return multiResult; + } catch (StatusRuntimeException ex) { + throw new IOException(ex); + } + } + + @Override + public Collection moreLikeThis(@Nullable LLSnapshot snapshot, Map> mltDocumentFields, + int limit, + String keyFieldName) throws IOException { + try { + ConcurrentLinkedQueue multiResult = new ConcurrentLinkedQueue<>(); + + ParallelUtils.parallelizeIO((IOConsumer c) -> { + for (int shardIndex = 0; shardIndex < instancesCount; shardIndex++) { + c.consume(shardIndex); + } + }, 0, instancesCount, 1, shardIndex -> { + var request = LuceneIndexMethodMoreLikeThisRequest.newBuilder() + .setHandle(handle) + .addAllMltFields(mltDocumentFields + .entrySet() + .stream() + .map(entry -> MltField.newBuilder().setKey(entry.getKey()).addAllValues(entry.getValue()).build()) + .collect(Collectors.toList())) + .setLimit(limit) + .setKeyFieldName(keyFieldName); + if (snapshot != null) { + request.setSequenceNumber(snapshot.getSequenceNumber()); + } + + var searchMultiResult = blockingStub.luceneIndexMethodMoreLikeThis(request.build()); + + for (LuceneIndexMethodSearchResponse response : searchMultiResult.getResponseList()) { + var result = new LLTopKeys(response.getTotalHitsCount(), + response.getHitsList().stream().map(LLUtils::toKeyScore).toArray(LLKeyScore[]::new) + ); + multiResult.add(result); + } + }); + + return multiResult; + } catch (StatusRuntimeException ex) { + throw new IOException(ex); + } + } + + @Override + public Tuple2, Collection>> searchStream(@Nullable LLSnapshot snapshot, String query, int limit, @Nullable LLSort sort, String keyFieldName) { + try { + var request = LuceneIndexMethodSearchStreamRequest.newBuilder() + .setHandle(handle) + .setQuery(query) + .setLimit(limit) + .setKeyFieldName(keyFieldName); + if (snapshot != null) { + request.setSequenceNumber(snapshot.getSequenceNumber()); + } + if (sort != null) { + request.setSort(LLUtils.toGrpc(sort)); + } + + var searchResult = blockingStub.luceneIndexMethodSearchStream(request.build()); + + EmitterProcessor approximatedTotalHitsCount = EmitterProcessor.create(); + ArrayList> results = new ArrayList<>(); + for (int shardIndex = 0; shardIndex < instancesCount; shardIndex++) { + results.add(EmitterProcessor.create()); + } + searchResult.forEachRemaining((result) -> { + if (result.getIsKey()) { + results.get(result.getShardIndex()).onNext(result.getKey()); + } else { + approximatedTotalHitsCount.onNext(result.getApproximatedTotalCount()); + } + }); + + return Tuples.of(approximatedTotalHitsCount.single(0L), + results.stream().map(EmitterProcessor::asFlux).collect(Collectors.toList()) + ); + } catch (RuntimeException ex) { + var error = new IOException(ex); + return Tuples.of(Mono.error(error), Collections.singleton(Flux.error(error))); + } + } + + @Override + public long count(@Nullable LLSnapshot snapshot, String query) throws IOException { + try { + var request = LuceneIndexMethodCountRequest.newBuilder() + .setHandle(handle) + .setQuery(query); + if (snapshot != null) { + request.setSequenceNumber(snapshot.getSequenceNumber()); + } + + var searchResult = blockingStub + .luceneIndexMethodCount(request.build()); + + return searchResult.getCount(); + } catch (StatusRuntimeException ex) { + throw new IOException(ex); + } + } + + @Override + public void close() throws IOException { + try { + //noinspection ResultOfMethodCallIgnored + blockingStub.luceneIndexClose(LuceneIndexCloseRequest.newBuilder() + .setHandle(handle) + .build()); + } catch (StatusRuntimeException ex) { + throw new IOException(ex); + } + } + + @Override + public boolean isLowMemoryMode() { + return lowMemory; + } +} diff --git a/src/main/java/it/cavallium/dbengine/database/remote/client/LLRemoteSingleton.java b/src/main/java/it/cavallium/dbengine/database/remote/client/LLRemoteSingleton.java new file mode 100644 index 0000000..62c90ec --- /dev/null +++ b/src/main/java/it/cavallium/dbengine/database/remote/client/LLRemoteSingleton.java @@ -0,0 +1,59 @@ +package it.cavallium.dbengine.database.remote.client; + +import com.google.protobuf.ByteString; +import io.grpc.StatusRuntimeException; +import it.cavallium.dbengine.database.LLSingleton; +import it.cavallium.dbengine.database.LLSnapshot; +import it.cavallium.dbengine.proto.CavalliumDBEngineServiceGrpc; +import it.cavallium.dbengine.proto.SingletonMethodGetRequest; +import it.cavallium.dbengine.proto.SingletonMethodSetRequest; +import java.io.IOException; +import org.jetbrains.annotations.Nullable; + +public class LLRemoteSingleton implements LLSingleton { + + private final CavalliumDBEngineServiceGrpc.CavalliumDBEngineServiceBlockingStub blockingStub; + private final int handle; + private final String databaseName; + + public LLRemoteSingleton( + String databaseName, + CavalliumDBEngineServiceGrpc.CavalliumDBEngineServiceBlockingStub blockingStub, int handle) { + this.databaseName = databaseName; + this.blockingStub = blockingStub; + this.handle = handle; + } + + @Override + public byte[] get(@Nullable LLSnapshot snapshot) throws IOException { + try { + var request = SingletonMethodGetRequest.newBuilder() + .setSingletonHandle(handle); + if (snapshot != null) { + request.setSequenceNumber(snapshot.getSequenceNumber()); + } + var response = blockingStub.singletonMethodGet(request.build()); + return response.getValue().toByteArray(); + } catch (StatusRuntimeException ex) { + throw new IOException(ex); + } + } + + @Override + public void set(byte[] value) throws IOException { + try { + //noinspection ResultOfMethodCallIgnored + blockingStub.singletonMethodSet(SingletonMethodSetRequest.newBuilder() + .setSingletonHandle(handle) + .setValue(ByteString.copyFrom(value)) + .build()); + } catch (StatusRuntimeException ex) { + throw new IOException(ex); + } + } + + @Override + public String getDatabaseName() { + return databaseName; + } +} diff --git a/src/main/java/it/cavallium/dbengine/database/remote/server/DbServerFunctions.java b/src/main/java/it/cavallium/dbengine/database/remote/server/DbServerFunctions.java new file mode 100644 index 0000000..cdd9f83 --- /dev/null +++ b/src/main/java/it/cavallium/dbengine/database/remote/server/DbServerFunctions.java @@ -0,0 +1,840 @@ +package it.cavallium.dbengine.database.remote.server; + +import com.google.protobuf.ByteString; +import io.grpc.stub.StreamObserver; +import it.cavallium.dbengine.database.Column; +import it.cavallium.dbengine.database.LLDictionary; +import it.cavallium.dbengine.database.LLDictionaryResultType; +import it.cavallium.dbengine.database.LLKeyValueDatabase; +import it.cavallium.dbengine.database.LLLuceneIndex; +import it.cavallium.dbengine.database.LLSingleton; +import it.cavallium.dbengine.database.LLSnapshot; +import it.cavallium.dbengine.database.LLTopKeys; +import it.cavallium.dbengine.database.LLUtils; +import it.cavallium.dbengine.database.analyzer.TextFieldsAnalyzer; +import it.cavallium.dbengine.database.disk.LLLocalDatabaseConnection; +import it.cavallium.dbengine.proto.CavalliumDBEngineServiceGrpc; +import it.cavallium.dbengine.proto.DatabaseCloseRequest; +import it.cavallium.dbengine.proto.DatabaseOpenRequest; +import it.cavallium.dbengine.proto.DatabaseSnapshotReleaseRequest; +import it.cavallium.dbengine.proto.DatabaseSnapshotTakeRequest; +import it.cavallium.dbengine.proto.DatabaseSnapshotTakeResult; +import it.cavallium.dbengine.proto.DictionaryMethodClearRequest; +import it.cavallium.dbengine.proto.DictionaryMethodContainsRequest; +import it.cavallium.dbengine.proto.DictionaryMethodContainsResponse; +import it.cavallium.dbengine.proto.DictionaryMethodForEachRequest; +import it.cavallium.dbengine.proto.DictionaryMethodGetRequest; +import it.cavallium.dbengine.proto.DictionaryMethodGetResponse; +import it.cavallium.dbengine.proto.DictionaryMethodIsEmptyRequest; +import it.cavallium.dbengine.proto.DictionaryMethodIsEmptyResponse; +import it.cavallium.dbengine.proto.DictionaryMethodMultiStandardResult; +import it.cavallium.dbengine.proto.DictionaryMethodPutMultiRequest; +import it.cavallium.dbengine.proto.DictionaryMethodPutRequest; +import it.cavallium.dbengine.proto.DictionaryMethodRemoveOneRequest; +import it.cavallium.dbengine.proto.DictionaryMethodRemoveRequest; +import it.cavallium.dbengine.proto.DictionaryMethodSizeRequest; +import it.cavallium.dbengine.proto.DictionaryMethodSizeResponse; +import it.cavallium.dbengine.proto.DictionaryMethodStandardEntityResponse; +import it.cavallium.dbengine.proto.DictionaryMethodStandardResult; +import it.cavallium.dbengine.proto.DictionaryOpenRequest; +import it.cavallium.dbengine.proto.Empty; +import it.cavallium.dbengine.proto.HandleResult; +import it.cavallium.dbengine.proto.LLDocument; +import it.cavallium.dbengine.proto.LLTerm; +import it.cavallium.dbengine.proto.LuceneIndexCloseRequest; +import it.cavallium.dbengine.proto.LuceneIndexMethodAddDocumentMultiRequest; +import it.cavallium.dbengine.proto.LuceneIndexMethodAddDocumentRequest; +import it.cavallium.dbengine.proto.LuceneIndexMethodCountRequest; +import it.cavallium.dbengine.proto.LuceneIndexMethodCountResponse; +import it.cavallium.dbengine.proto.LuceneIndexMethodDeleteAllRequest; +import it.cavallium.dbengine.proto.LuceneIndexMethodDeleteDocumentRequest; +import it.cavallium.dbengine.proto.LuceneIndexMethodMoreLikeThisRequest; +import it.cavallium.dbengine.proto.LuceneIndexMethodSearchMultiResponse; +import it.cavallium.dbengine.proto.LuceneIndexMethodSearchRequest; +import it.cavallium.dbengine.proto.LuceneIndexMethodSearchResponse; +import it.cavallium.dbengine.proto.LuceneIndexMethodSearchStreamItem; +import it.cavallium.dbengine.proto.LuceneIndexMethodSearchStreamRequest; +import it.cavallium.dbengine.proto.LuceneIndexMethodUpdateDocumentMultiRequest; +import it.cavallium.dbengine.proto.LuceneIndexMethodUpdateDocumentRequest; +import it.cavallium.dbengine.proto.LuceneIndexOpenRequest; +import it.cavallium.dbengine.proto.LuceneIndexSnapshotReleaseRequest; +import it.cavallium.dbengine.proto.LuceneIndexSnapshotTakeRequest; +import it.cavallium.dbengine.proto.LuceneIndexSnapshotTakeResult; +import it.cavallium.dbengine.proto.MltField; +import it.cavallium.dbengine.proto.ResetConnectionRequest; +import it.cavallium.dbengine.proto.SingletonMethodGetRequest; +import it.cavallium.dbengine.proto.SingletonMethodGetResponse; +import it.cavallium.dbengine.proto.SingletonMethodSetRequest; +import it.cavallium.dbengine.proto.SingletonOpenRequest; +import it.unimi.dsi.fastutil.objects.ObjectArrayList; +import java.io.IOException; +import java.time.Duration; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedList; +import java.util.List; +import java.util.Map.Entry; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.Collectors; + +public class DbServerFunctions extends CavalliumDBEngineServiceGrpc.CavalliumDBEngineServiceImplBase { + + private final AtomicInteger firstFreeDbHandle = new AtomicInteger(0); + private final AtomicInteger firstFreeLuceneHandle = new AtomicInteger(0); + private final AtomicInteger firstFreeStructureHandle = new AtomicInteger(0); + private final ConcurrentHashMap databases = new ConcurrentHashMap<>(); + private final ConcurrentHashMap luceneIndices = new ConcurrentHashMap<>(); + private final ConcurrentHashMap> databasesRelatedHandles = new ConcurrentHashMap<>(); + + private final ConcurrentHashMap singletons = new ConcurrentHashMap<>(); + private final ConcurrentHashMap dictionaries = new ConcurrentHashMap<>(); + private final LLLocalDatabaseConnection localDatabaseConnection; + + public DbServerFunctions(LLLocalDatabaseConnection localDatabaseConnection) { + this.localDatabaseConnection = localDatabaseConnection; + } + + @Override + public void resetConnection(ResetConnectionRequest request, + StreamObserver responseObserver) { + System.out.println("Resetting connection..."); + int lastHandle = firstFreeDbHandle.get(); + databases.forEach((handle, db) -> { + System.out.println("Closing db " + handle); + try { + db.close(); + } catch (IOException e) { + e.printStackTrace(); + } + }); + for (int i = 0; i < lastHandle; i++) { + var relatedHandles = databasesRelatedHandles.remove(i); + if (relatedHandles != null) { + for (Integer relatedHandle : relatedHandles) { + singletons.remove(relatedHandle); + dictionaries.remove(relatedHandle); + } + } + databases.remove(i); + } + responseObserver.onNext(Empty.newBuilder().build()); + responseObserver.onCompleted(); + System.out.println("Connection reset."); + } + + @Override + public void databaseOpen(DatabaseOpenRequest request, + StreamObserver responseObserver) { + var response = HandleResult.newBuilder(); + + int handle = firstFreeDbHandle.getAndIncrement(); + + System.out.println("Opening db " + handle + "."); + + String dbName = Column.toString(request.getName().toByteArray()); + List columns = request.getColumnNameList().stream() + .map((nameBinary) -> Column.special(Column.toString(nameBinary.toByteArray()))) + .collect(Collectors.toList()); + boolean lowMemory = request.getLowMemory(); + + try { + var database = localDatabaseConnection.getDatabase(dbName, columns, lowMemory); + databases.put(handle, database); + response.setHandle(handle); + responseObserver.onNext(response.build()); + } catch (IOException e) { + e.printStackTrace(); + responseObserver.onError(e); + } + responseObserver.onCompleted(); + } + + @Override + public void databaseClose(DatabaseCloseRequest request, StreamObserver responseObserver) { + try { + System.out.println("Closing db " + request.getDatabaseHandle() + "."); + var db = databases.remove(request.getDatabaseHandle()); + db.close(); + responseObserver.onNext(Empty.newBuilder().build()); + } catch (Exception e) { + responseObserver.onError(e); + } + responseObserver.onCompleted(); + } + + @Override + public void luceneIndexOpen(LuceneIndexOpenRequest request, + StreamObserver responseObserver) { + var response = HandleResult.newBuilder(); + + int handle = firstFreeLuceneHandle.getAndIncrement(); + + System.out.println("Opening lucene " + handle + "."); + + String name = request.getName(); + TextFieldsAnalyzer textFieldsAnalyzer = TextFieldsAnalyzer.values()[request.getTextFieldsAnalyzer()]; + var queryRefreshDebounceTime = Duration.ofMillis(request.getQueryRefreshDebounceTime()); + var commitDebounceTime = Duration.ofMillis(request.getCommitDebounceTime()); + var lowMemory = request.getLowMemory(); + var instancesCount = request.getInstancesCount(); + + try { + var luceneIndex = localDatabaseConnection.getLuceneIndex(name, + instancesCount, + textFieldsAnalyzer, + queryRefreshDebounceTime, + commitDebounceTime, + lowMemory + ); + luceneIndices.put(handle, luceneIndex); + response.setHandle(handle); + responseObserver.onNext(response.build()); + } catch (IOException e) { + e.printStackTrace(); + responseObserver.onError(e); + } + responseObserver.onCompleted(); + } + + @Override + public void luceneIndexClose(LuceneIndexCloseRequest request, + StreamObserver responseObserver) { + try { + System.out.println("Closing lucene " + request.getHandle() + "."); + var luceneIndex = luceneIndices.remove(request.getHandle()); + luceneIndex.close(); + responseObserver.onNext(Empty.newBuilder().build()); + } catch (Exception e) { + responseObserver.onError(e); + } + responseObserver.onCompleted(); + } + + @Override + public void luceneIndexSnapshotTake(LuceneIndexSnapshotTakeRequest request, StreamObserver responseObserver) { + var response = LuceneIndexSnapshotTakeResult.newBuilder(); + + int handle = request.getHandle(); + + try { + var snapshot = luceneIndices.get(handle).takeSnapshot(); + response.setSequenceNumber(snapshot.getSequenceNumber()); + responseObserver.onNext(response.build()); + } catch (IOException e) { + e.printStackTrace(); + responseObserver.onError(e); + } + responseObserver.onCompleted(); + } + + @SuppressWarnings("DuplicatedCode") + @Override + public void luceneIndexSnapshotRelease(LuceneIndexSnapshotReleaseRequest request, StreamObserver responseObserver) { + var response = Empty.newBuilder(); + + int handle = request.getHandle(); + long sequenceNumber = request.getSequenceNumber(); + + try { + luceneIndices.get(handle).releaseSnapshot(new LLSnapshot(sequenceNumber)); + responseObserver.onNext(response.build()); + } catch (IOException e) { + e.printStackTrace(); + responseObserver.onError(e); + } + responseObserver.onCompleted(); + } + + @Override + public void singletonOpen(SingletonOpenRequest request, + StreamObserver responseObserver) { + var response = HandleResult.newBuilder(); + + int handle = firstFreeStructureHandle.getAndIncrement(); + + int dbHandle = request.getDatabaseHandle(); + byte[] singletonListColumnName = request.getSingletonListColumnName().toByteArray(); + byte[] name = request.getName().toByteArray(); + byte[] defaultValue = request.getDefaultValue().toByteArray(); + + try { + var singleton = databases.get(dbHandle) + .getSingleton(singletonListColumnName, name, defaultValue); + singletons.put(handle, singleton); + response.setHandle(handle); + responseObserver.onNext(response.build()); + } catch (IOException e) { + e.printStackTrace(); + responseObserver.onError(e); + } + responseObserver.onCompleted(); + } + + @Override + public void dictionaryOpen(DictionaryOpenRequest request, + StreamObserver responseObserver) { + var response = HandleResult.newBuilder(); + + int handle = firstFreeStructureHandle.getAndIncrement(); + + int dbHandle = request.getDatabaseHandle(); + byte[] columnName = request.getColumnName().toByteArray(); + + try { + var dict = databases.get(dbHandle).getDictionary(columnName); + dictionaries.put(handle, dict); + response.setHandle(handle); + responseObserver.onNext(response.build()); + } catch (IOException e) { + e.printStackTrace(); + responseObserver.onError(e); + } + responseObserver.onCompleted(); + } + + @Override + public void databaseSnapshotTake(DatabaseSnapshotTakeRequest request, StreamObserver responseObserver) { + var response = DatabaseSnapshotTakeResult.newBuilder(); + + int dbHandle = request.getDatabaseHandle(); + + try { + var snapshot = databases.get(dbHandle).takeSnapshot(); + response.setSequenceNumber(snapshot.getSequenceNumber()); + responseObserver.onNext(response.build()); + } catch (IOException e) { + e.printStackTrace(); + responseObserver.onError(e); + } + responseObserver.onCompleted(); + } + + @Override + public void databaseSnapshotRelease(DatabaseSnapshotReleaseRequest request, StreamObserver responseObserver) { + var response = Empty.newBuilder(); + + int dbHandle = request.getDatabaseHandle(); + long sequenceNumber = request.getSequenceNumber(); + + try { + databases.get(dbHandle).releaseSnapshot(new LLSnapshot(sequenceNumber)); + responseObserver.onNext(response.build()); + } catch (IOException e) { + e.printStackTrace(); + responseObserver.onError(e); + } + responseObserver.onCompleted(); + } + + @Override + public void dictionaryMethodGet(DictionaryMethodGetRequest request, + StreamObserver responseObserver) { + var response = DictionaryMethodGetResponse.newBuilder(); + + int handle = request.getDictionaryHandle(); + long sequenceNumber = request.getSequenceNumber(); + LLSnapshot snapshot = sequenceNumber == 0 ? null : new LLSnapshot(sequenceNumber); + byte[] key = request.getKey().toByteArray(); + + try { + var dict = dictionaries.get(handle); + Optional value = dict.get(snapshot, key); + value.ifPresent(bytes -> response.setValue(ByteString.copyFrom(bytes))); + responseObserver.onNext(response.build()); + } catch (IOException e) { + e.printStackTrace(); + responseObserver.onError(e); + } + responseObserver.onCompleted(); + } + + @Override + public void dictionaryMethodContains(DictionaryMethodContainsRequest request, + StreamObserver responseObserver) { + var response = DictionaryMethodContainsResponse.newBuilder(); + + int handle = request.getDictionaryHandle(); + long sequenceNumber = request.getSequenceNumber(); + LLSnapshot snapshot = sequenceNumber == 0 ? null : new LLSnapshot(sequenceNumber); + byte[] key = request.getKey().toByteArray(); + + try { + var dict = dictionaries.get(handle); + boolean value = dict.contains(snapshot, key); + response.setValue(value); + responseObserver.onNext(response.build()); + } catch (IOException e) { + e.printStackTrace(); + responseObserver.onError(e); + } + responseObserver.onCompleted(); + } + + @Override + public void dictionaryMethodPut(DictionaryMethodPutRequest request, + StreamObserver responseObserver) { + var response = DictionaryMethodStandardResult.newBuilder(); + + int handle = request.getDictionaryHandle(); + byte[] key = request.getKey().toByteArray(); + byte[] value = request.getValue().toByteArray(); + var resultType = LLDictionaryResultType + .valueOf(it.cavallium.dbengine.proto.LLDictionaryResultType.forNumber(request.getResultTypeValue())); + + try { + var dict = dictionaries.get(handle); + Optional result = dict.put(key, value, resultType); + result.ifPresent((data) -> response.setValue(ByteString.copyFrom(data))); + responseObserver.onNext(response.build()); + } catch (IOException e) { + e.printStackTrace(); + responseObserver.onError(e); + } + responseObserver.onCompleted(); + } + + @Override + public void dictionaryMethodPutMulti(DictionaryMethodPutMultiRequest request, + StreamObserver responseObserver) { + var response = DictionaryMethodMultiStandardResult.newBuilder(); + + int handle = request.getDictionaryHandle(); + byte[][] key = request.getKeyList().stream().map(ByteString::toByteArray) + .toArray(byte[][]::new); + byte[][] value = request.getValueList().stream().map(ByteString::toByteArray) + .toArray(byte[][]::new); + var resultType = LLDictionaryResultType + .valueOf(it.cavallium.dbengine.proto.LLDictionaryResultType.forNumber(request.getResultTypeValue())); + + try { + var dict = dictionaries.get(handle); + List responses = new LinkedList<>(); + dict.putMulti(key, value, resultType, (bytes) -> responses.add(ByteString.copyFrom(bytes))); + if (!responses.isEmpty()) { + response.addAllValue(responses); + } + responseObserver.onNext(response.build()); + } catch (IOException e) { + e.printStackTrace(); + responseObserver.onError(e); + } + responseObserver.onCompleted(); + } + + @Override + public void dictionaryMethodRemove(DictionaryMethodRemoveRequest request, + StreamObserver responseObserver) { + var response = DictionaryMethodStandardResult.newBuilder(); + + int handle = request.getDictionaryHandle(); + byte[] key = request.getKey().toByteArray(); + var resultType = LLDictionaryResultType + .valueOf(it.cavallium.dbengine.proto.LLDictionaryResultType.forNumber(request.getResultTypeValue())); + + try { + var dict = dictionaries.get(handle); + Optional result = dict.remove(key, resultType); + result.ifPresent((data) -> response.setValue(ByteString.copyFrom(data))); + responseObserver.onNext(response.build()); + } catch (IOException e) { + e.printStackTrace(); + responseObserver.onError(e); + } + responseObserver.onCompleted(); + } + + @Override + public void dictionaryMethodClear(DictionaryMethodClearRequest request, + StreamObserver responseObserver) { + int handle = request.getDictionaryHandle(); + + try { + var dict = dictionaries.get(handle); + dict.clear(); + responseObserver.onNext(Empty.newBuilder().build()); + } catch (IOException e) { + e.printStackTrace(); + responseObserver.onError(e); + } + responseObserver.onCompleted(); + } + + @Override + public void dictionaryMethodFastSize(DictionaryMethodSizeRequest request, + StreamObserver responseObserver) { + var response = DictionaryMethodSizeResponse.newBuilder(); + + int handle = request.getDictionaryHandle(); + long sequenceNumber = request.getSequenceNumber(); + LLSnapshot snapshot = sequenceNumber == 0 ? null : new LLSnapshot(sequenceNumber); + + try { + var dict = dictionaries.get(handle); + long result = dict.size(snapshot, true); + response.setSize(result); + responseObserver.onNext(response.build()); + } catch (IOException e) { + e.printStackTrace(); + responseObserver.onError(e); + } + responseObserver.onCompleted(); + } + + @Override + public void dictionaryMethodExactSize(DictionaryMethodSizeRequest request, + StreamObserver responseObserver) { + var response = DictionaryMethodSizeResponse.newBuilder(); + + int handle = request.getDictionaryHandle(); + long sequenceNumber = request.getSequenceNumber(); + LLSnapshot snapshot = sequenceNumber == 0 ? null : new LLSnapshot(sequenceNumber); + + try { + var dict = dictionaries.get(handle); + long result = dict.size(snapshot, false); + response.setSize(result); + responseObserver.onNext(response.build()); + } catch (IOException e) { + e.printStackTrace(); + responseObserver.onError(e); + } + responseObserver.onCompleted(); + } + + @Override + public void dictionaryMethodIsEmpty(DictionaryMethodIsEmptyRequest request, + StreamObserver responseObserver) { + var response = DictionaryMethodIsEmptyResponse.newBuilder(); + + int handle = request.getDictionaryHandle(); + long sequenceNumber = request.getSequenceNumber(); + LLSnapshot snapshot = sequenceNumber == 0 ? null : new LLSnapshot(sequenceNumber); + + try { + var dict = dictionaries.get(handle); + boolean result = dict.isEmpty(snapshot); + response.setEmpty(result); + responseObserver.onNext(response.build()); + } catch (IOException e) { + e.printStackTrace(); + responseObserver.onError(e); + } + responseObserver.onCompleted(); + } + + @Override + public void dictionaryMethodRemoveOne(DictionaryMethodRemoveOneRequest request, + StreamObserver responseObserver) { + var response = DictionaryMethodStandardEntityResponse.newBuilder(); + + int handle = request.getDictionaryHandle(); + + try { + var dict = dictionaries.get(handle); + Optional> result = dict.removeOne(); + result.ifPresent((data) -> { + response.setKey(ByteString.copyFrom(data.getKey())); + response.setValue(ByteString.copyFrom(data.getValue())); + }); + responseObserver.onNext(response.build()); + } catch (IOException e) { + e.printStackTrace(); + responseObserver.onError(e); + } + responseObserver.onCompleted(); + } + + @Override + public void dictionaryMethodForEach(DictionaryMethodForEachRequest request, + StreamObserver responseObserver) { + + int handle = request.getDictionaryHandle(); + long sequenceNumber = request.getSequenceNumber(); + LLSnapshot snapshot = sequenceNumber == 0 ? null : new LLSnapshot(sequenceNumber); + + var dict = dictionaries.get(handle); + dict.forEach(snapshot, 1, (key, val) -> { + var response = DictionaryMethodStandardEntityResponse.newBuilder(); + response.setKey(ByteString.copyFrom(key)); + response.setValue(ByteString.copyFrom(val)); + responseObserver.onNext(response.build()); + }); + responseObserver.onCompleted(); + } + + @Override + public void singletonMethodGet(SingletonMethodGetRequest request, + StreamObserver responseObserver) { + var response = SingletonMethodGetResponse.newBuilder(); + + int handle = request.getSingletonHandle(); + long sequenceNumber = request.getSequenceNumber(); + LLSnapshot snapshot = sequenceNumber == 0 ? null : new LLSnapshot(sequenceNumber); + + try { + var singleton = singletons.get(handle); + byte[] result = singleton.get(snapshot); + response.setValue(ByteString.copyFrom(result)); + responseObserver.onNext(response.build()); + } catch (IOException e) { + e.printStackTrace(); + responseObserver.onError(e); + } + responseObserver.onCompleted(); + } + + @Override + public void singletonMethodSet(SingletonMethodSetRequest request, + StreamObserver responseObserver) { + int handle = request.getSingletonHandle(); + byte[] value = request.getValue().toByteArray(); + + try { + var singleton = singletons.get(handle); + singleton.set(value); + responseObserver.onNext(Empty.newBuilder().build()); + } catch (IOException e) { + e.printStackTrace(); + responseObserver.onError(e); + } + responseObserver.onCompleted(); + } + + + @Override + public void luceneIndexMethodAddDocument(LuceneIndexMethodAddDocumentRequest request, + StreamObserver responseObserver) { + int handle = request.getHandle(); + var documentKey = request.getKey(); + var documentItemsList = request.getDocumentItemsList(); + + try { + var luceneIndex = luceneIndices.get(handle); + luceneIndex.addDocument(LLUtils.toLocal(documentKey), LLUtils.toLocal(documentItemsList)); + responseObserver.onNext(Empty.newBuilder().build()); + } catch (IOException e) { + e.printStackTrace(); + responseObserver.onError(e); + } + responseObserver.onCompleted(); + } + + @Override + public void luceneIndexMethodAddDocumentMulti(LuceneIndexMethodAddDocumentMultiRequest request, + StreamObserver responseObserver) { + int handle = request.getHandle(); + List keyList = request.getKeyList(); + List documentItemsList = request.getDocumentsList(); + + try { + var luceneIndex = luceneIndices.get(handle); + luceneIndex.addDocuments(LLUtils.toLocalTerms(keyList), LLUtils.toLocalDocuments(documentItemsList)); + responseObserver.onNext(Empty.newBuilder().build()); + } catch (IOException e) { + e.printStackTrace(); + responseObserver.onError(e); + } + responseObserver.onCompleted(); + } + + @Override + public void luceneIndexMethodDeleteDocument(LuceneIndexMethodDeleteDocumentRequest request, + StreamObserver responseObserver) { + int handle = request.getHandle(); + var key = request.getKey(); + + try { + var luceneIndex = luceneIndices.get(handle); + luceneIndex.deleteDocument(LLUtils.toLocal(key)); + responseObserver.onNext(Empty.newBuilder().build()); + } catch (IOException e) { + e.printStackTrace(); + responseObserver.onError(e); + } + responseObserver.onCompleted(); + } + + @Override + public void luceneIndexMethodUpdateDocument(LuceneIndexMethodUpdateDocumentRequest request, + StreamObserver responseObserver) { + int handle = request.getHandle(); + var key = request.getKey(); + var documentItemsList = request.getDocumentItemsList(); + + try { + var luceneIndex = luceneIndices.get(handle); + luceneIndex.updateDocument(LLUtils.toLocal(key), LLUtils.toLocal(documentItemsList)); + responseObserver.onNext(Empty.newBuilder().build()); + } catch (IOException e) { + e.printStackTrace(); + responseObserver.onError(e); + } + responseObserver.onCompleted(); + } + + @Override + public void luceneIndexMethodUpdateDocumentMulti( + LuceneIndexMethodUpdateDocumentMultiRequest request, StreamObserver responseObserver) { + int handle = request.getHandle(); + List keyList = request.getKeyList(); + List documentItemsList = request.getDocumentsList(); + + try { + var luceneIndex = luceneIndices.get(handle); + luceneIndex.updateDocuments(LLUtils.toLocalTerms(keyList), + LLUtils.toLocalDocuments(documentItemsList)); + responseObserver.onNext(Empty.newBuilder().build()); + } catch (IOException e) { + e.printStackTrace(); + responseObserver.onError(e); + } + responseObserver.onCompleted(); + } + + @Override + public void luceneIndexMethodDeleteAll(LuceneIndexMethodDeleteAllRequest request, + StreamObserver responseObserver) { + int handle = request.getHandle(); + + try { + var luceneIndex = luceneIndices.get(handle); + luceneIndex.deleteAll(); + responseObserver.onNext(Empty.newBuilder().build()); + } catch (IOException e) { + e.printStackTrace(); + responseObserver.onError(e); + } + responseObserver.onCompleted(); + } + + @Override + public void luceneIndexMethodSearch(LuceneIndexMethodSearchRequest request, + StreamObserver responseObserver) { + int handle = request.getHandle(); + var snapshot = request.getSequenceNumber() == 0 ? null : new LLSnapshot(request.getSequenceNumber()); + var query = request.getQuery(); + var limit = request.getLimit(); + var sort = request.hasSort() ? LLUtils.toLocal(request.getSort()) : null; + var keyFieldName = request.getKeyFieldName(); + + try { + var luceneIndex = luceneIndices.get(handle); + var multiResults = luceneIndex.search(snapshot, query, limit, sort, keyFieldName); + List responses = new ArrayList<>(); + for (LLTopKeys result : multiResults) { + var response = LuceneIndexMethodSearchResponse.newBuilder() + .setTotalHitsCount(result.getTotalHitsCount()) + .addAllHits(ObjectArrayList.wrap(result.getHits()).stream().map(LLUtils::toGrpc) + .collect(Collectors.toList())); + responses.add(response.build()); + } + responseObserver.onNext(LuceneIndexMethodSearchMultiResponse.newBuilder().addAllResponse(responses).build()); + } catch (IOException e) { + e.printStackTrace(); + responseObserver.onError(e); + } + responseObserver.onCompleted(); + } + + @Override + public void luceneIndexMethodMoreLikeThis(LuceneIndexMethodMoreLikeThisRequest request, + StreamObserver responseObserver) { + int handle = request.getHandle(); + var snapshot = request.getSequenceNumber() == 0 ? null : new LLSnapshot(request.getSequenceNumber()); + var mltFieldsList = request.getMltFieldsList(); + var limit = request.getLimit(); + var keyFieldName = request.getKeyFieldName(); + + try { + var luceneIndex = luceneIndices.get(handle); + + var mltFields = new HashMap>(); + for (MltField mltField : mltFieldsList) { + mltFields.put(mltField.getKey(), new HashSet<>(mltField.getValuesList())); + } + + var multiResults = luceneIndex.moreLikeThis(snapshot, mltFields, limit, keyFieldName); + List responses = new ArrayList<>(); + for (LLTopKeys result : multiResults) { + var response = LuceneIndexMethodSearchResponse + .newBuilder() + .setTotalHitsCount(result.getTotalHitsCount()) + .addAllHits(ObjectArrayList.wrap(result.getHits()).stream().map(LLUtils::toGrpc).collect(Collectors.toList())); + responses.add(response.build()); + } + responseObserver.onNext(LuceneIndexMethodSearchMultiResponse.newBuilder().addAllResponse(responses).build()); + } catch (IOException e) { + e.printStackTrace(); + responseObserver.onError(e); + } + responseObserver.onCompleted(); + } + + @Override + public void luceneIndexMethodSearchStream(LuceneIndexMethodSearchStreamRequest request, + StreamObserver responseObserver) { + int handle = request.getHandle(); + var snapshot = request.getSequenceNumber() == 0 ? null : new LLSnapshot(request.getSequenceNumber()); + var query = request.getQuery(); + var limit = request.getLimit(); + var sort = request.hasSort() ? LLUtils.toLocal(request.getSort()) : null; + var keyFieldName = request.getKeyFieldName(); + + try { + var luceneIndex = luceneIndices.get(handle); + var results = luceneIndex.searchStream(snapshot, query, limit, sort, keyFieldName); + int shardIndex = 0; + for (var flux : results.getT2()) { + int shardIndexF = shardIndex; + flux.subscribe(resultKey -> responseObserver.onNext(LuceneIndexMethodSearchStreamItem + .newBuilder() + .setShardIndex(shardIndexF) + .setIsKey(true) + .setKey(resultKey) + .build()), responseObserver::onError, responseObserver::onCompleted); + + shardIndex++; + } + results + .getT1() + .subscribe(count -> responseObserver.onNext(LuceneIndexMethodSearchStreamItem + .newBuilder() + .setIsKey(false) + .setApproximatedTotalCount(count) + .build()), responseObserver::onError, responseObserver::onCompleted); + } catch (Exception e) { + e.printStackTrace(); + responseObserver.onError(e); + } + } + + @Override + public void luceneIndexMethodCount(LuceneIndexMethodCountRequest request, + StreamObserver responseObserver) { + int handle = request.getHandle(); + var snapshot = request.getSequenceNumber() == 0 ? null : new LLSnapshot(request.getSequenceNumber()); + var query = request.getQuery(); + + try { + var luceneIndex = luceneIndices.get(handle); + var result = luceneIndex.count(snapshot, query); + var response = LuceneIndexMethodCountResponse.newBuilder() + .setCount(result); + responseObserver.onNext(response.build()); + } catch (IOException e) { + e.printStackTrace(); + responseObserver.onError(e); + } + responseObserver.onCompleted(); + } + + @Override + public void ping(Empty request, StreamObserver responseObserver) { + responseObserver.onNext(Empty.newBuilder().build()); + responseObserver.onCompleted(); + } +} diff --git a/src/main/java/it/cavallium/dbengine/database/remote/server/DbServerManager.java b/src/main/java/it/cavallium/dbengine/database/remote/server/DbServerManager.java new file mode 100644 index 0000000..8ced05c --- /dev/null +++ b/src/main/java/it/cavallium/dbengine/database/remote/server/DbServerManager.java @@ -0,0 +1,95 @@ +package it.cavallium.dbengine.database.remote.server; + +import io.grpc.Server; +import io.grpc.ServerBuilder; +import io.grpc.netty.GrpcSslContexts; +import io.netty.handler.ssl.ClientAuth; +import io.netty.handler.ssl.SslContextBuilder; +import io.netty.handler.ssl.SslProvider; +import java.io.IOException; +import java.nio.file.Path; +import it.cavallium.dbengine.database.disk.LLLocalDatabaseConnection; + +public class DbServerManager { + + private Server server; + + public boolean stopped; + + private final LLLocalDatabaseConnection databaseConnection; + private final String host; + private final int port; + private final Path certChainFilePath; + private final Path privateKeyFilePath; + private final Path trustCertCollectionFilePath; + + public DbServerManager(LLLocalDatabaseConnection databaseConnection, String host, int port, + Path certChainFilePath, Path privateKeyFilePath, Path trustCertCollectionFilePath) { + this.databaseConnection = databaseConnection; + this.host = host; + this.port = port; + this.certChainFilePath = certChainFilePath; + this.privateKeyFilePath = privateKeyFilePath; + this.trustCertCollectionFilePath = trustCertCollectionFilePath; + } + + private SslContextBuilder getSslContextBuilder() { + SslContextBuilder sslClientContextBuilder = SslContextBuilder + .forServer(certChainFilePath.toFile(), + privateKeyFilePath.toFile()); + if (trustCertCollectionFilePath != null) { + sslClientContextBuilder.trustManager(trustCertCollectionFilePath.toFile()); + sslClientContextBuilder.clientAuth(ClientAuth.REQUIRE); + } + return GrpcSslContexts.configure(sslClientContextBuilder, + SslProvider.OPENSSL); + } + + public void start() throws IOException { + var srvBuilder = ServerBuilder.forPort(port) + .addService(new DbServerFunctions(databaseConnection)); + server = srvBuilder.build() + .start(); + Runtime.getRuntime().addShutdownHook(new Thread(() -> { + if (!stopped) { + // Use stderr here since the logger may have been reset by its JVM shutdown hook. + System.err.println("*** shutting down gRPC server since JVM is shutting down"); + this.stop(); + try { + databaseConnection.disconnect(); + } catch (IOException e) { + e.printStackTrace(); + } + System.err.println("*** server shut down"); + } + })); + System.out.println("Server started, listening on " + port); + } + + public void stop() { + stopped = true; + if (server != null) { + try { + server.shutdown(); + } catch (Exception ex) { + ex.printStackTrace(); + } + try { + blockUntilShutdown(); + } catch (InterruptedException ex) { + ex.printStackTrace(); + } + } + System.out.println("Server stopped."); + } + + /** + * Await termination on the main thread since the grpc library uses daemon threads. + */ + void blockUntilShutdown() throws InterruptedException { + if (server != null) { + server.awaitTermination(); + } + } + +} diff --git a/src/main/java/it/cavallium/dbengine/database/remote/server/Main.java b/src/main/java/it/cavallium/dbengine/database/remote/server/Main.java new file mode 100644 index 0000000..4f82399 --- /dev/null +++ b/src/main/java/it/cavallium/dbengine/database/remote/server/Main.java @@ -0,0 +1,29 @@ +package it.cavallium.dbengine.database.remote.server; + +import java.io.IOException; +import java.nio.file.Paths; +import it.cavallium.dbengine.database.disk.LLLocalDatabaseConnection; + +public class Main { + + /** + * @param args [database-path] [host] [port] [cert-chain-file-path] [private-key-file-path] + * [trust-cert-collection-file-path] + */ + public static void main(String[] args) throws IOException, InterruptedException { + if (args.length != 7) { + System.out.println( + "Usage: java -jar dataserver.jar "); + } else { + System.out.println("Database server starting..."); + var dbConnection = new LLLocalDatabaseConnection(Paths.get(args[0]), + Boolean.parseBoolean(args[6])); + dbConnection.connect(); + var serverManager = new DbServerManager(dbConnection, args[1], Integer.parseInt(args[2]), + Paths.get(args[3]), Paths.get(args[4]), Paths.get(args[5])); + serverManager.start(); + serverManager.blockUntilShutdown(); + System.out.println("Database has been terminated."); + } + } +} diff --git a/src/main/java/it/cavallium/dbengine/database/structures/LLDeepMap.java b/src/main/java/it/cavallium/dbengine/database/structures/LLDeepMap.java new file mode 100644 index 0000000..fd6088c --- /dev/null +++ b/src/main/java/it/cavallium/dbengine/database/structures/LLDeepMap.java @@ -0,0 +1,161 @@ +package it.cavallium.dbengine.database.structures; + +import java.io.IOException; +import java.util.Map.Entry; +import java.util.Objects; +import java.util.Optional; +import java.util.StringJoiner; +import java.util.function.BiConsumer; +import java.util.function.BiFunction; +import java.util.function.Consumer; +import org.apache.commons.lang3.tuple.ImmutableTriple; +import org.jetbrains.annotations.Nullable; +import org.warp.commonutils.functional.TriConsumer; +import org.warp.commonutils.functional.TriFunction; +import org.warp.commonutils.type.Bytes; +import org.warp.commonutils.type.UnmodifiableIterableMap; +import org.warp.commonutils.type.UnmodifiableMap; +import it.cavallium.dbengine.database.LLDeepDictionary; +import it.cavallium.dbengine.database.LLDictionaryResultType; +import it.cavallium.dbengine.database.LLKeyValueDatabaseStructure; +import it.cavallium.dbengine.database.LLSnapshot; + +public class LLDeepMap implements LLKeyValueDatabaseStructure { + + private final LLDeepDictionary dictionary; + + public LLDeepMap(LLDeepDictionary dictionary) { + this.dictionary = dictionary; + } + + public UnmodifiableIterableMap get(@Nullable LLSnapshot snapshot, byte[] key) throws IOException { + return dictionary.get(snapshot, key); + } + + public Optional get(@Nullable LLSnapshot snapshot, byte[] key1, byte[] key2) throws IOException { + return dictionary.get(snapshot, key1, key2); + } + + public boolean isEmpty(@Nullable LLSnapshot snapshot, byte[] key1) { + return dictionary.isEmpty(snapshot, key1); + } + + public boolean contains(@Nullable LLSnapshot snapshot, byte[] key1, byte[] key2) throws IOException { + return dictionary.contains(snapshot, key1, key2); + } + + /** + * Note: this will remove previous elements because it replaces the entire map of key + */ + public void put(byte[] key1, UnmodifiableIterableMap value) throws IOException { + dictionary.put(key1, value); + } + + public Optional put(byte[] key1, byte[] key2, byte[] value, LLDeepMapResultType resultType) throws IOException { + return dictionary.put(key1, key2, value, resultType.getDictionaryResultType()); + } + + public void putMulti(byte[][] keys1, UnmodifiableIterableMap[] values) throws IOException { + dictionary.putMulti(keys1, values); + } + + public void putMulti(byte[] key1, byte[][] keys2, byte[][] values, LLDeepMapResultType resultType, Consumer responses) throws IOException { + dictionary.putMulti(key1, keys2, values, resultType.getDictionaryResultType(), responses); + } + + public void putMulti(byte[][] keys1, byte[][] keys2, byte[][] values, LLDeepMapResultType resultType, Consumer responses) throws IOException { + dictionary.putMulti(keys1, keys2, values, resultType.getDictionaryResultType(), responses); + } + + public void clear() throws IOException { + dictionary.clear(); + } + + public Optional> clear(byte[] key1, LLDeepMapResultType resultType) throws IOException { + return dictionary.clear(key1, resultType.getDictionaryResultType()); + } + + public Optional remove(byte[] key1, byte[] key2, LLDeepMapResultType resultType) throws IOException { + return dictionary.remove(key1, key2, resultType.getDictionaryResultType()); + } + + public void forEach(@Nullable LLSnapshot snapshot, int parallelism, BiConsumer> consumer) { + dictionary.forEach(snapshot, parallelism, consumer); + } + + public void forEach(@Nullable LLSnapshot snapshot, int parallelism, byte[] key1, BiConsumer consumer) { + dictionary.forEach(snapshot, parallelism, key1, consumer); + } + + public void replaceAll(int parallelism, boolean replaceKeys, BiFunction, Entry>> consumer) throws IOException { + dictionary.replaceAll(parallelism, replaceKeys, consumer); + } + + public void replaceAll(int parallelism, boolean replaceKeys, byte[] key1, BiFunction> consumer) throws IOException { + dictionary.replaceAll(parallelism, replaceKeys, key1, consumer); + } + + public void forEach(@Nullable LLSnapshot snapshot, int parallelism, TriConsumer consumer) { + dictionary.forEach(snapshot, parallelism, consumer); + } + + public void replaceAll(int parallelism, boolean replaceKeys, TriFunction> consumer) throws IOException { + dictionary.replaceAll(parallelism, replaceKeys, consumer); + } + + public long size(@Nullable LLSnapshot snapshot, boolean fast) throws IOException { + return dictionary.size(snapshot, fast); + } + + public long exactSize(@Nullable LLSnapshot snapshot, byte[] key1) { + return dictionary.exactSize(snapshot, key1); + } + + @Override + public String getDatabaseName() { + return dictionary.getDatabaseName(); + } + + public enum LLDeepMapResultType { + VOID, + VALUE_CHANGED, + PREVIOUS_VALUE; + + public LLDictionaryResultType getDictionaryResultType() { + switch (this) { + case VOID: + return LLDictionaryResultType.VOID; + case VALUE_CHANGED: + return LLDictionaryResultType.VALUE_CHANGED; + case PREVIOUS_VALUE: + return LLDictionaryResultType.PREVIOUS_VALUE; + } + + return LLDictionaryResultType.VOID; + } + } + + @Override + public String toString() { + return new StringJoiner(", ", LLDeepMap.class.getSimpleName() + "[", "]") + .add("dictionary=" + dictionary) + .toString(); + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + LLDeepMap llMap = (LLDeepMap) o; + return Objects.equals(dictionary, llMap.dictionary); + } + + @Override + public int hashCode() { + return Objects.hash(dictionary); + } +} diff --git a/src/main/java/it/cavallium/dbengine/database/structures/LLFixedDeepSet.java b/src/main/java/it/cavallium/dbengine/database/structures/LLFixedDeepSet.java new file mode 100644 index 0000000..0a269b7 --- /dev/null +++ b/src/main/java/it/cavallium/dbengine/database/structures/LLFixedDeepSet.java @@ -0,0 +1,209 @@ +package it.cavallium.dbengine.database.structures; + +import it.cavallium.dbengine.database.LLDeepDictionary; +import it.cavallium.dbengine.database.LLDictionaryResultType; +import it.cavallium.dbengine.database.LLKeyValueDatabaseStructure; +import it.cavallium.dbengine.database.LLSnapshot; +import it.cavallium.dbengine.database.LLUtils; +import it.unimi.dsi.fastutil.objects.ObjectSets.UnmodifiableSet; +import java.io.IOException; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Objects; +import java.util.Optional; +import java.util.StringJoiner; +import java.util.function.BiConsumer; +import java.util.function.BiFunction; +import java.util.function.Consumer; +import java.util.function.Function; +import org.jetbrains.annotations.Nullable; +import org.warp.commonutils.type.Bytes; +import org.warp.commonutils.type.UnmodifiableIterableMap; +import org.warp.commonutils.type.UnmodifiableIterableSet; +import org.warp.commonutils.type.UnmodifiableMap; + +/** + * A set in which keys and values must have a fixed size + */ +public class LLFixedDeepSet implements LLKeyValueDatabaseStructure { + + private static final byte[] EMPTY_VALUE = new byte[0]; + private static final Bytes EMPTY_VALUE_BYTES = new Bytes(EMPTY_VALUE); + private final LLDeepDictionary dictionary; + + public LLFixedDeepSet(LLDeepDictionary dictionary) { + this.dictionary = dictionary; + } + + private byte[][] generateEmptyArray(int length) { + byte[][] data = new byte[length][]; + for (int i = 0; i < length; i++) { + data[i] = EMPTY_VALUE; + } + return data; + } + + private Bytes[] generateEmptyBytesArray(int length) { + Bytes[] data = new Bytes[length]; + for (int i = 0; i < length; i++) { + data[i] = EMPTY_VALUE_BYTES; + } + return data; + } + + public UnmodifiableIterableSet get(@Nullable LLSnapshot snapshot, byte[] key1) throws IOException { + return dictionary.get(snapshot, key1).toUnmodifiableIterableKeysSet(byte[][]::new); + } + + public boolean contains(@Nullable LLSnapshot snapshot, byte[] key1, byte[] value) throws IOException { + return dictionary.contains(snapshot, key1, value); + } + + public boolean isEmpty(@Nullable LLSnapshot snapshot, byte[] key1) { + return dictionary.isEmpty(snapshot, key1); + } + + public boolean add(byte[] key1, byte[] value, LLDeepSetItemResultType resultType) throws IOException { + Optional response = dictionary.put(key1, value, EMPTY_VALUE, resultType.getDictionaryResultType()); + if (resultType == LLDeepSetItemResultType.VALUE_CHANGED) { + return LLUtils.responseToBoolean(response.orElseThrow()); + } + return false; + } + + public void addMulti(byte[] key1, byte[][] values) throws IOException { + dictionary.putMulti(key1, values, generateEmptyArray(values.length), LLDictionaryResultType.VOID, (x) -> {}); + } + + /** + * Note: this will remove previous elements because it replaces the entire set + */ + public void put(byte[] key1, UnmodifiableIterableSet values) throws IOException { + dictionary.put(key1, values.toUnmodifiableIterableMapSetValues(generateEmptyArray(values.size()))); + } + + public void putMulti(byte[][] keys1, UnmodifiableIterableSet[] values) throws IOException { + var fixedValues = new UnmodifiableIterableMap[values.length]; + for (int i = 0; i < values.length; i++) { + fixedValues[i] = values[i].toUnmodifiableIterableMapSetValues(generateEmptyArray(values[i].size())); + } + //noinspection unchecked + dictionary.putMulti(keys1, fixedValues); + } + + public void clear() throws IOException { + dictionary.clear(); + } + + public Optional> clear(byte[] key1, LLDeepSetResultType resultType) throws IOException { + Optional> response = dictionary.clear(key1, resultType.getDictionaryResultType()); + if (response.isEmpty()) { + return Optional.empty(); + } else { + return Optional.of(response.get().toUnmodifiableIterableKeysSet(byte[][]::new)); + } + } + + public boolean remove(byte[] key1, byte[] value, LLDeepSetItemResultType resultType) throws IOException { + Optional response = dictionary.remove(key1, value, resultType.getDictionaryResultType()); + if (resultType == LLDeepSetItemResultType.VALUE_CHANGED) { + return LLUtils.responseToBoolean(response.orElseThrow()); + } + return false; + } + + public void forEach(@Nullable LLSnapshot snapshot, int parallelism, BiConsumer> consumer) { + dictionary.forEach(snapshot, parallelism, (key1, entries) -> consumer.accept(key1, entries.toUnmodifiableIterableKeysSet(byte[][]::new))); + } + + public void forEach(@Nullable LLSnapshot snapshot, int parallelism, byte[] key1, Consumer consumer) { + dictionary.forEach(snapshot, parallelism, key1, (value, empty) -> consumer.accept(value)); + } + + public void replaceAll(int parallelism, BiFunction, Entry>> consumer) throws IOException { + dictionary.replaceAll(parallelism, true, (key1, entries) -> { + var result = consumer.apply(key1, entries.toUnmodifiableIterableKeysSet(byte[][]::new)); + var resultItems = result.getValue().toArray(Bytes[]::new); + return Map.entry(result.getKey(), UnmodifiableMap.of(resultItems, generateEmptyArray(resultItems.length))); + }); + } + + public void replaceAll(int parallelism, byte[] key1, Function consumer) throws IOException { + dictionary.replaceAll(parallelism, true, key1, (value, empty) -> { + var changedValue = consumer.apply(value); + return Map.entry(changedValue, EMPTY_VALUE); + }); + } + + public long size(@Nullable LLSnapshot snapshot, boolean fast) throws IOException { + return dictionary.size(snapshot, fast); + } + + public long exactSize(@Nullable LLSnapshot snapshot, byte[] key1) { + return dictionary.exactSize(snapshot, key1); + } + + @Override + public String getDatabaseName() { + return dictionary.getDatabaseName(); + } + + public enum LLDeepSetResultType { + VOID, + VALUE_CHANGED, + PREVIOUS_VALUE; + + public LLDictionaryResultType getDictionaryResultType() { + switch (this) { + case VOID: + return LLDictionaryResultType.VOID; + case VALUE_CHANGED: + return LLDictionaryResultType.VALUE_CHANGED; + case PREVIOUS_VALUE: + return LLDictionaryResultType.PREVIOUS_VALUE; + } + + return LLDictionaryResultType.VOID; + } + } + + public enum LLDeepSetItemResultType { + VOID, + VALUE_CHANGED; + + public LLDictionaryResultType getDictionaryResultType() { + switch (this) { + case VOID: + return LLDictionaryResultType.VOID; + case VALUE_CHANGED: + return LLDictionaryResultType.VALUE_CHANGED; + } + + return LLDictionaryResultType.VOID; + } + } + + @Override + public String toString() { + return new StringJoiner(", ", LLFixedDeepSet.class.getSimpleName() + "[", "]") + .add("dictionary=" + dictionary) + .toString(); + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + LLFixedDeepSet llMap = (LLFixedDeepSet) o; + return Objects.equals(dictionary, llMap.dictionary); + } + + @Override + public int hashCode() { + return Objects.hash(dictionary); + } +} diff --git a/src/main/java/it/cavallium/dbengine/database/structures/LLInt.java b/src/main/java/it/cavallium/dbengine/database/structures/LLInt.java new file mode 100644 index 0000000..030b0d8 --- /dev/null +++ b/src/main/java/it/cavallium/dbengine/database/structures/LLInt.java @@ -0,0 +1,30 @@ +package it.cavallium.dbengine.database.structures; + +import com.google.common.primitives.Ints; +import it.cavallium.dbengine.database.LLKeyValueDatabaseStructure; +import it.cavallium.dbengine.database.LLSingleton; +import it.cavallium.dbengine.database.LLSnapshot; +import java.io.IOException; +import org.jetbrains.annotations.Nullable; + +public class LLInt implements LLKeyValueDatabaseStructure { + + private final LLSingleton singleton; + + public LLInt(LLSingleton singleton) { + this.singleton = singleton; + } + + public int get(@Nullable LLSnapshot snapshot) throws IOException { + return Ints.fromByteArray(singleton.get(snapshot)); + } + + public void set(int value) throws IOException { + singleton.set(Ints.toByteArray(value)); + } + + @Override + public String getDatabaseName() { + return singleton.getDatabaseName(); + } +} diff --git a/src/main/java/it/cavallium/dbengine/database/structures/LLLong.java b/src/main/java/it/cavallium/dbengine/database/structures/LLLong.java new file mode 100644 index 0000000..13682fb --- /dev/null +++ b/src/main/java/it/cavallium/dbengine/database/structures/LLLong.java @@ -0,0 +1,36 @@ +package it.cavallium.dbengine.database.structures; + +import com.google.common.primitives.Ints; +import com.google.common.primitives.Longs; +import it.cavallium.dbengine.database.LLKeyValueDatabaseStructure; +import it.cavallium.dbengine.database.LLSingleton; +import it.cavallium.dbengine.database.LLSnapshot; +import java.io.IOException; +import org.jetbrains.annotations.Nullable; + +public class LLLong implements LLKeyValueDatabaseStructure { + + private final LLSingleton singleton; + + public LLLong(LLSingleton singleton) { + this.singleton = singleton; + } + + public long get(@Nullable LLSnapshot snapshot) throws IOException { + var array = singleton.get(snapshot); + if (array.length == 4) { + return Ints.fromByteArray(array); + } else { + return Longs.fromByteArray(array); + } + } + + public void set(long value) throws IOException { + singleton.set(Longs.toByteArray(value)); + } + + @Override + public String getDatabaseName() { + return singleton.getDatabaseName(); + } +} diff --git a/src/main/java/it/cavallium/dbengine/database/structures/LLMap.java b/src/main/java/it/cavallium/dbengine/database/structures/LLMap.java new file mode 100644 index 0000000..c2bbb10 --- /dev/null +++ b/src/main/java/it/cavallium/dbengine/database/structures/LLMap.java @@ -0,0 +1,117 @@ +package it.cavallium.dbengine.database.structures; + +import it.cavallium.dbengine.database.LLDictionary; +import it.cavallium.dbengine.database.LLDictionaryResultType; +import it.cavallium.dbengine.database.LLKeyValueDatabaseStructure; +import it.cavallium.dbengine.database.LLSnapshot; +import java.io.IOException; +import java.util.Map.Entry; +import java.util.Objects; +import java.util.Optional; +import java.util.StringJoiner; +import java.util.function.BiConsumer; +import java.util.function.BiFunction; +import java.util.function.Consumer; +import org.jetbrains.annotations.Nullable; + +public class LLMap implements LLKeyValueDatabaseStructure { + + private final LLDictionary dictionary; + + public LLMap(LLDictionary dictionary) { + this.dictionary = dictionary; + } + + public Optional get(@Nullable LLSnapshot snapshot, byte[] key) throws IOException { + return dictionary.get(snapshot, key); + } + + public Optional put(byte[] key, byte[] value, LLMapResultType resultType) + throws IOException { + return dictionary.put(key, value, resultType.getDictionaryResultType()); + } + + public void putMulti(byte[][] key, byte[][] value, LLMapResultType resultType, + Consumer> results) throws IOException { + dictionary.putMulti(key, value, resultType.getDictionaryResultType(), + (result) -> results.accept(Optional.ofNullable(result.length == 0 ? null : result))); + } + + public boolean contains(@Nullable LLSnapshot snapshot, byte[] key) throws IOException { + return dictionary.contains(snapshot, key); + } + + public Optional remove(byte[] key, LLMapResultType resultType) throws IOException { + return dictionary.remove(key, resultType.getDictionaryResultType()); + } + + public void clear() throws IOException { + dictionary.clear(); + } + + public long size(@Nullable LLSnapshot snapshot, boolean fast) throws IOException { + return dictionary.size(snapshot, fast); + } + + /** + * The consumer can be called from different threads + */ + public void forEach(@Nullable LLSnapshot snapshot, int parallelism, BiConsumer consumer) { + dictionary.forEach(snapshot, parallelism, consumer); + } + + /** + * The consumer can be called from different threads + */ + public void replaceAll(int parallelism, boolean replaceKeys, BiFunction> consumer) throws IOException { + dictionary.replaceAll(parallelism, replaceKeys, consumer); + } + + @Override + public String getDatabaseName() { + return dictionary.getDatabaseName(); + } + + public enum LLMapResultType { + VOID, + VALUE_CHANGED, + PREVIOUS_VALUE; + + public LLDictionaryResultType getDictionaryResultType() { + switch (this) { + case VOID: + return LLDictionaryResultType.VOID; + case VALUE_CHANGED: + return LLDictionaryResultType.VALUE_CHANGED; + case PREVIOUS_VALUE: + return LLDictionaryResultType.PREVIOUS_VALUE; + } + + return LLDictionaryResultType.VOID; + } + } + + @Override + public String toString() { + return new StringJoiner(", ", LLMap.class.getSimpleName() + "[", "]") + .add("dictionary=" + dictionary) + .toString(); + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + LLMap llMap = (LLMap) o; + return Objects.equals(dictionary, llMap.dictionary); + } + + @Override + public int hashCode() { + return Objects.hash(dictionary); + } +} diff --git a/src/main/java/it/cavallium/dbengine/database/structures/LLSet.java b/src/main/java/it/cavallium/dbengine/database/structures/LLSet.java new file mode 100644 index 0000000..0df1385 --- /dev/null +++ b/src/main/java/it/cavallium/dbengine/database/structures/LLSet.java @@ -0,0 +1,101 @@ +package it.cavallium.dbengine.database.structures; + +import it.cavallium.dbengine.database.LLDictionary; +import it.cavallium.dbengine.database.LLDictionaryResultType; +import it.cavallium.dbengine.database.LLKeyValueDatabaseStructure; +import it.cavallium.dbengine.database.LLSnapshot; +import it.cavallium.dbengine.database.LLUtils; +import java.io.IOException; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Optional; +import java.util.function.Consumer; +import java.util.function.Function; +import org.jetbrains.annotations.Nullable; + +public class LLSet implements LLKeyValueDatabaseStructure { + + private static final byte[] EMPTY_VALUE = new byte[0]; + private final LLDictionary dictionary; + + public LLSet(LLDictionary dictionary) { + this.dictionary = dictionary; + } + + @Override + public String getDatabaseName() { + return dictionary.getDatabaseName(); + } + + private byte[][] generateEmptyArray(int length) { + byte[][] data = new byte[length][]; + for (int i = 0; i < length; i++) { + data[i] = EMPTY_VALUE; + } + return data; + } + + public boolean contains(@Nullable LLSnapshot snapshot, byte[] value) throws IOException { + return dictionary.contains(snapshot, value); + } + + public boolean add(byte[] value, LLSetResultType resultType) throws IOException { + Optional response = dictionary.put(value, EMPTY_VALUE, resultType.getDictionaryResultType()); + if (resultType == LLSetResultType.VALUE_CHANGED) { + return LLUtils.responseToBoolean(response.orElseThrow()); + } + return false; + } + + public void addMulti(byte[][] values) throws IOException { + dictionary.putMulti(values, generateEmptyArray(values.length), LLDictionaryResultType.VOID, (x) -> {}); + } + + public boolean remove(byte[] value, LLSetResultType resultType) throws IOException { + Optional response = dictionary.remove(value, resultType.getDictionaryResultType()); + if (resultType == LLSetResultType.VALUE_CHANGED) { + return LLUtils.responseToBoolean(response.orElseThrow()); + } + return false; + } + + public void clearUnsafe() throws IOException { + dictionary.clear(); + } + + public void forEach(@Nullable LLSnapshot snapshot, int parallelism, Consumer consumer) { + dictionary.forEach(snapshot, parallelism, (key, emptyValue) -> consumer.accept(key)); + } + + public void replaceAll(int parallelism, Function consumer) throws IOException { + dictionary.replaceAll(parallelism, true, (key, emptyValue) -> Map.entry(consumer.apply(key), emptyValue)); + } + + public long size(@Nullable LLSnapshot snapshot, boolean fast) throws IOException { + return dictionary.size(snapshot, fast); + } + + public boolean isEmptyUnsafe(@Nullable LLSnapshot snapshot) throws IOException { + return dictionary.isEmpty(snapshot); + } + + public Optional removeOneUnsafe() throws IOException { + return dictionary.removeOne().map(Entry::getKey); + } + + public enum LLSetResultType { + VOID, + VALUE_CHANGED; + + public LLDictionaryResultType getDictionaryResultType() { + switch (this) { + case VOID: + return LLDictionaryResultType.VOID; + case VALUE_CHANGED: + return LLDictionaryResultType.VALUE_CHANGED; + } + + return LLDictionaryResultType.VOID; + } + } +} diff --git a/src/main/java/it/cavallium/dbengine/database/utils/LuceneParallelStreamCollector.java b/src/main/java/it/cavallium/dbengine/database/utils/LuceneParallelStreamCollector.java new file mode 100644 index 0000000..758e569 --- /dev/null +++ b/src/main/java/it/cavallium/dbengine/database/utils/LuceneParallelStreamCollector.java @@ -0,0 +1,60 @@ +package it.cavallium.dbengine.database.utils; + +import java.io.IOException; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.locks.ReentrantLock; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.LeafCollector; +import org.apache.lucene.search.Scorable; +import org.apache.lucene.search.ScoreMode; + +public class LuceneParallelStreamCollector implements Collector, LeafCollector { + + private final LuceneParallelStreamConsumer streamConsumer; + private final AtomicBoolean stopped; + private final AtomicLong totalHitsCounter; + private final ReentrantLock lock; + private final int base; + + public LuceneParallelStreamCollector(int base, LuceneParallelStreamConsumer streamConsumer, + AtomicBoolean stopped, AtomicLong totalHitsCounter, ReentrantLock lock) { + this.base = base; + this.streamConsumer = streamConsumer; + this.stopped = stopped; + this.totalHitsCounter = totalHitsCounter; + this.lock = lock; + } + + @Override + public final LeafCollector getLeafCollector(LeafReaderContext context) throws IOException { + return new LuceneParallelStreamCollector(context.docBase, streamConsumer, stopped, totalHitsCounter, lock); + } + + @Override + public void setScorer(Scorable scorer) throws IOException { + + } + + @Override + public void collect(int doc) throws IOException { + doc += base; + totalHitsCounter.incrementAndGet(); + lock.lock(); + try { + if (!stopped.get()) { + if (!streamConsumer.consume(doc)) { + stopped.set(true); + } + } + } finally { + lock.unlock(); + } + } + + @Override + public ScoreMode scoreMode() { + return ScoreMode.COMPLETE_NO_SCORES; + } +} diff --git a/src/main/java/it/cavallium/dbengine/database/utils/LuceneParallelStreamCollectorManager.java b/src/main/java/it/cavallium/dbengine/database/utils/LuceneParallelStreamCollectorManager.java new file mode 100644 index 0000000..caa08db --- /dev/null +++ b/src/main/java/it/cavallium/dbengine/database/utils/LuceneParallelStreamCollectorManager.java @@ -0,0 +1,42 @@ +package it.cavallium.dbengine.database.utils; + +import java.io.IOException; +import java.util.Collection; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.locks.ReentrantLock; +import org.apache.lucene.search.CollectorManager; + +public class LuceneParallelStreamCollectorManager implements + CollectorManager { + + private final LuceneParallelStreamConsumer streamConsumer; + private final AtomicBoolean stopped; + private final AtomicLong totalHitsCounter; + private final ReentrantLock lock; + + public static LuceneParallelStreamCollectorManager fromConsumer( + LuceneParallelStreamConsumer streamConsumer) { + return new LuceneParallelStreamCollectorManager(streamConsumer); + } + + public LuceneParallelStreamCollectorManager(LuceneParallelStreamConsumer streamConsumer) { + this.streamConsumer = streamConsumer; + this.stopped = new AtomicBoolean(); + this.totalHitsCounter = new AtomicLong(); + this.lock = new ReentrantLock(); + } + + @Override + public LuceneParallelStreamCollector newCollector() throws IOException { + return new LuceneParallelStreamCollector(0, streamConsumer, stopped, totalHitsCounter, lock); + } + + @Override + public LuceneParallelStreamCollectorResult reduce( + Collection collectors) throws IOException { + return new LuceneParallelStreamCollectorResult(totalHitsCounter.get()); + } + + +} diff --git a/src/main/java/it/cavallium/dbengine/database/utils/LuceneParallelStreamCollectorResult.java b/src/main/java/it/cavallium/dbengine/database/utils/LuceneParallelStreamCollectorResult.java new file mode 100644 index 0000000..79265fa --- /dev/null +++ b/src/main/java/it/cavallium/dbengine/database/utils/LuceneParallelStreamCollectorResult.java @@ -0,0 +1,14 @@ +package it.cavallium.dbengine.database.utils; + +public class LuceneParallelStreamCollectorResult { + + private final long totalHitsCount; + + public LuceneParallelStreamCollectorResult(long totalHitsCount) { + this.totalHitsCount = totalHitsCount; + } + + public long getTotalHitsCount() { + return totalHitsCount; + } +} diff --git a/src/main/java/it/cavallium/dbengine/database/utils/LuceneParallelStreamConsumer.java b/src/main/java/it/cavallium/dbengine/database/utils/LuceneParallelStreamConsumer.java new file mode 100644 index 0000000..11347c9 --- /dev/null +++ b/src/main/java/it/cavallium/dbengine/database/utils/LuceneParallelStreamConsumer.java @@ -0,0 +1,10 @@ +package it.cavallium.dbengine.database.utils; + +public interface LuceneParallelStreamConsumer { + + /** + * @param docId + * @return true to continue, false to stop the execution + */ + boolean consume(int docId); +} diff --git a/src/main/java/it/cavallium/dbengine/database/utils/RandomFieldComparator.java b/src/main/java/it/cavallium/dbengine/database/utils/RandomFieldComparator.java new file mode 100644 index 0000000..1afd850 --- /dev/null +++ b/src/main/java/it/cavallium/dbengine/database/utils/RandomFieldComparator.java @@ -0,0 +1,108 @@ +package it.cavallium.dbengine.database.utils; + +import java.io.IOException; +import java.math.BigInteger; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.FieldComparator; +import org.apache.lucene.search.LeafFieldComparator; +import org.apache.lucene.search.Scorable; +import org.apache.lucene.search.ScoreCachingWrappingScorer; +import org.jetbrains.annotations.NotNull; +import org.warp.commonutils.random.LFSR.LFSRIterator; + +public class RandomFieldComparator extends FieldComparator implements LeafFieldComparator { + + private final @NotNull LFSRIterator rand; + private final float[] scores; + private float bottom; + private Scorable scorer; + private float topValue; + + /** Creates a new comparator based on relevance for {@code numHits}. */ + public RandomFieldComparator(@NotNull LFSRIterator rand, int numHits) { + this.rand = rand; + scores = new float[numHits]; + } + + @Override + public int compare(int slot1, int slot2) { + return Float.compare(scores[slot2], scores[slot1]); + } + + @Override + public int compareBottom(int doc) throws IOException { + float score = scorer.score(); + assert !Float.isNaN(score); + return Float.compare(score, bottom); + } + + @Override + public void copy(int slot, int doc) throws IOException { + scores[slot] = scorer.score(); + assert !Float.isNaN(scores[slot]); + } + + @Override + public LeafFieldComparator getLeafComparator(LeafReaderContext context) { + return this; + } + + @Override + public void setBottom(final int bottom) { + this.bottom = scores[bottom]; + } + + @Override + public void setTopValue(Float value) { + topValue = Float.MAX_VALUE; + } + + @Override + public void setScorer(Scorable scorer) { + // wrap with a ScoreCachingWrappingScorer so that successive calls to + // score() will not incur score computation over and + // over again. + var randomizedScorer = new Scorable() { + + @Override + public float score() throws IOException { + return randomize(scorer.docID()); + } + + @Override + public int docID() { + return scorer.docID(); + } + }; + if (!(scorer instanceof ScoreCachingWrappingScorer)) { + this.scorer = new ScoreCachingWrappingScorer(randomizedScorer); + } else { + this.scorer = randomizedScorer; + } + } + + @Override + public Float value(int slot) { + return Float.valueOf(scores[slot]); + } + + // Override because we sort reverse of natural Float order: + @Override + public int compareValues(Float first, Float second) { + // Reversed intentionally because relevance by default + // sorts descending: + return second.compareTo(first); + } + + @Override + public int compareTop(int doc) throws IOException { + float docValue = scorer.score(); + assert !Float.isNaN(docValue); + return Float.compare(docValue, topValue); + } + + private float randomize(int num) { + int val = rand.next(BigInteger.valueOf(num)).intValue(); + return (val & 0x00FFFFFF) / (float)(1 << 24); // only use the lower 24 bits to construct a float from 0.0-1.0 + } +} diff --git a/src/main/java/it/cavallium/dbengine/database/utils/RandomFieldComparatorSource.java b/src/main/java/it/cavallium/dbengine/database/utils/RandomFieldComparatorSource.java new file mode 100644 index 0000000..7cc0991 --- /dev/null +++ b/src/main/java/it/cavallium/dbengine/database/utils/RandomFieldComparatorSource.java @@ -0,0 +1,20 @@ +package it.cavallium.dbengine.database.utils; + +import java.util.concurrent.ThreadLocalRandom; +import org.apache.lucene.search.FieldComparator; +import org.apache.lucene.search.FieldComparatorSource; +import org.warp.commonutils.random.LFSR; + +public class RandomFieldComparatorSource extends FieldComparatorSource { + + private final LFSR rand; + + public RandomFieldComparatorSource() { + this.rand = LFSR.random(24, ThreadLocalRandom.current().nextInt(1 << 24)); + } + + @Override + public FieldComparator newComparator(String fieldname, int numHits, int sortPos, boolean reversed) { + return new RandomFieldComparator(rand.iterator(), numHits); + } +} diff --git a/src/main/java/it/cavallium/dbengine/database/utils/RandomSortField.java b/src/main/java/it/cavallium/dbengine/database/utils/RandomSortField.java new file mode 100644 index 0000000..0f3dfa3 --- /dev/null +++ b/src/main/java/it/cavallium/dbengine/database/utils/RandomSortField.java @@ -0,0 +1,15 @@ +package it.cavallium.dbengine.database.utils; + +import org.apache.lucene.search.SortField; + +public class RandomSortField extends SortField { + + public RandomSortField() { + super("", new RandomFieldComparatorSource()); + } + + @Override + public boolean needsScores() { + return false; + } +} diff --git a/src/main/java/it/cavallium/luceneserializer/luceneserializer/BooleanQuery.java b/src/main/java/it/cavallium/luceneserializer/luceneserializer/BooleanQuery.java new file mode 100644 index 0000000..10c38b8 --- /dev/null +++ b/src/main/java/it/cavallium/luceneserializer/luceneserializer/BooleanQuery.java @@ -0,0 +1,35 @@ +package it.cavallium.luceneserializer.luceneserializer; + +import java.util.Collection; + +public class BooleanQuery implements Query { + + private final BooleanQueryPart[] parts; + private int minShouldMatch; + + public BooleanQuery(BooleanQueryPart... parts) { + this.parts = parts; + } + + public BooleanQuery(Collection parts) { + this.parts = parts.toArray(BooleanQueryPart[]::new); + } + + public BooleanQuery setMinShouldMatch(int minShouldMatch) { + this.minShouldMatch = minShouldMatch; + return this; + } + + @Override + public void stringify(StringBuilder output) { + StringBuilder data = new StringBuilder(); + StringifyUtils.stringifyInt(data, minShouldMatch); + StringBuilder listData = new StringBuilder(); + listData.append(parts.length).append('|'); + for (int i = 0; i < parts.length; i++) { + parts[i].stringify(listData); + } + StringifyUtils.writeHeader(data, QueryConstructorType.BOOLEAN_QUERY_INFO_LIST, listData); + StringifyUtils.writeHeader(output, QueryConstructorType.BOOLEAN_QUERY, data); + } +} diff --git a/src/main/java/it/cavallium/luceneserializer/luceneserializer/BooleanQueryInfo.java b/src/main/java/it/cavallium/luceneserializer/luceneserializer/BooleanQueryInfo.java new file mode 100644 index 0000000..94ac98e --- /dev/null +++ b/src/main/java/it/cavallium/luceneserializer/luceneserializer/BooleanQueryInfo.java @@ -0,0 +1,42 @@ +package it.cavallium.luceneserializer.luceneserializer; + +import java.util.Objects; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.Query; + +public class BooleanQueryInfo { + + public final Query query; + public final BooleanClause.Occur occur; + + public BooleanQueryInfo(Query query, BooleanClause.Occur occur) { + this.query = query; + this.occur = occur; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + BooleanQueryInfo that = (BooleanQueryInfo) o; + return Objects.equals(query, that.query) && + occur == that.occur; + } + + @Override + public int hashCode() { + return Objects.hash(query, occur); + } + + @Override + public String toString() { + return "BooleanQueryInfo{" + + "query=" + query + + ", occur=" + occur + + '}'; + } +} diff --git a/src/main/java/it/cavallium/luceneserializer/luceneserializer/BooleanQueryPart.java b/src/main/java/it/cavallium/luceneserializer/luceneserializer/BooleanQueryPart.java new file mode 100644 index 0000000..6904c82 --- /dev/null +++ b/src/main/java/it/cavallium/luceneserializer/luceneserializer/BooleanQueryPart.java @@ -0,0 +1,20 @@ +package it.cavallium.luceneserializer.luceneserializer; + +public class BooleanQueryPart implements SerializedQueryObject { + + private final Query query; + private final Occur occur; + + public BooleanQueryPart(Query query, Occur occur) { + this.query = query; + this.occur = occur; + } + + @Override + public void stringify(StringBuilder output) { + StringBuilder data = new StringBuilder(); + query.stringify(data); + occur.stringify(data); + StringifyUtils.writeHeader(output, QueryConstructorType.BOOLEAN_QUERY_INFO, data); + } +} diff --git a/src/main/java/it/cavallium/luceneserializer/luceneserializer/BoostQuery.java b/src/main/java/it/cavallium/luceneserializer/luceneserializer/BoostQuery.java new file mode 100644 index 0000000..2d11c94 --- /dev/null +++ b/src/main/java/it/cavallium/luceneserializer/luceneserializer/BoostQuery.java @@ -0,0 +1,20 @@ +package it.cavallium.luceneserializer.luceneserializer; + +public class BoostQuery implements Query { + + private final Query query; + private final float boostValue; + + public BoostQuery(Query query, float boostValue) { + this.query = query; + this.boostValue = boostValue; + } + + @Override + public void stringify(StringBuilder output) { + StringBuilder data = new StringBuilder(); + query.stringify(data); + StringifyUtils.stringifyFloat(data, boostValue); + StringifyUtils.writeHeader(output, QueryConstructorType.BOOST_QUERY, data); + } +} diff --git a/src/main/java/it/cavallium/luceneserializer/luceneserializer/FuzzyQuery.java b/src/main/java/it/cavallium/luceneserializer/luceneserializer/FuzzyQuery.java new file mode 100644 index 0000000..6656b50 --- /dev/null +++ b/src/main/java/it/cavallium/luceneserializer/luceneserializer/FuzzyQuery.java @@ -0,0 +1,49 @@ +package it.cavallium.luceneserializer.luceneserializer; + +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.util.automaton.LevenshteinAutomata; + +public class FuzzyQuery implements Query { + + private final Term term; + private final int val1; + private final int val2; + private final int val3; + private final boolean bool; + + /** + * Create a new FuzzyQuery that will match terms with an edit distance of at most + * maxEdits to term. If a prefixLength > 0 is + * specified, a common prefix of that length is also required. + * + * @param term the term to search for + * @param maxEdits must be {@code >= 0} and {@code <=} {@link LevenshteinAutomata#MAXIMUM_SUPPORTED_DISTANCE}. + * @param prefixLength length of common (non-fuzzy) prefix + * @param maxExpansions the maximum number of terms to match. If this number is greater than + * {@link BooleanQuery#getMaxClauseCount} when the query is rewritten, then + * the maxClauseCount will be used instead. + * @param transpositions true if transpositions should be treated as a primitive edit operation. + * If this is false, comparisons will implement the classic Levenshtein + * algorithm. + */ + public FuzzyQuery(Term term, int maxEdits, int prefixLength, int maxExpansions, + boolean transpositions) { + this.term = term; + this.val1 = maxEdits; + this.val2 = prefixLength; + this.val3 = maxExpansions; + this.bool = transpositions; + } + + @Override + public void stringify(StringBuilder output) { + StringBuilder data = new StringBuilder(); + StringifyUtils.stringifyTerm(data, term); + StringifyUtils.stringifyInt(data, val1); + StringifyUtils.stringifyInt(data, val2); + StringifyUtils.stringifyInt(data, val3); + StringifyUtils.stringifyBool(data, bool); + StringifyUtils.writeHeader(output, QueryConstructorType.FUZZY_QUERY, data); + } +} diff --git a/src/main/java/it/cavallium/luceneserializer/luceneserializer/IntPointExactQuery.java b/src/main/java/it/cavallium/luceneserializer/luceneserializer/IntPointExactQuery.java new file mode 100644 index 0000000..d39b49c --- /dev/null +++ b/src/main/java/it/cavallium/luceneserializer/luceneserializer/IntPointExactQuery.java @@ -0,0 +1,20 @@ +package it.cavallium.luceneserializer.luceneserializer; + +public class IntPointExactQuery implements Query { + + private final String name; + private final int value; + + public IntPointExactQuery(String name, int value) { + this.name = name; + this.value = value; + } + + @Override + public void stringify(StringBuilder output) { + StringBuilder data = new StringBuilder(); + StringifyUtils.stringifyString(data, name); + StringifyUtils.stringifyInt(data, value); + StringifyUtils.writeHeader(output, QueryConstructorType.INT_POINT_EXACT_QUERY, data); + } +} diff --git a/src/main/java/it/cavallium/luceneserializer/luceneserializer/IntPointRangeQuery.java b/src/main/java/it/cavallium/luceneserializer/luceneserializer/IntPointRangeQuery.java new file mode 100644 index 0000000..44cb21f --- /dev/null +++ b/src/main/java/it/cavallium/luceneserializer/luceneserializer/IntPointRangeQuery.java @@ -0,0 +1,23 @@ +package it.cavallium.luceneserializer.luceneserializer; + +public class IntPointRangeQuery implements Query { + + private final String name; + private final int min; + private final int max; + + public IntPointRangeQuery(String name, int min, int max) { + this.name = name; + this.min = min; + this.max = max; + } + + @Override + public void stringify(StringBuilder output) { + StringBuilder data = new StringBuilder(); + StringifyUtils.stringifyString(data, name); + StringifyUtils.stringifyInt(data, min); + StringifyUtils.stringifyInt(data, max); + StringifyUtils.writeHeader(output, QueryConstructorType.INT_POINT_RANGE_QUERY, data); + } +} diff --git a/src/main/java/it/cavallium/luceneserializer/luceneserializer/LongPointExactQuery.java b/src/main/java/it/cavallium/luceneserializer/luceneserializer/LongPointExactQuery.java new file mode 100644 index 0000000..67fa972 --- /dev/null +++ b/src/main/java/it/cavallium/luceneserializer/luceneserializer/LongPointExactQuery.java @@ -0,0 +1,20 @@ +package it.cavallium.luceneserializer.luceneserializer; + +public class LongPointExactQuery implements Query { + + private final String name; + private final long value; + + public LongPointExactQuery(String name, long value) { + this.name = name; + this.value = value; + } + + @Override + public void stringify(StringBuilder output) { + StringBuilder data = new StringBuilder(); + StringifyUtils.stringifyString(data, name); + StringifyUtils.stringifyLong(data, value); + StringifyUtils.writeHeader(output, QueryConstructorType.LONG_POINT_EXACT_QUERY, data); + } +} diff --git a/src/main/java/it/cavallium/luceneserializer/luceneserializer/LongPointRangeQuery.java b/src/main/java/it/cavallium/luceneserializer/luceneserializer/LongPointRangeQuery.java new file mode 100644 index 0000000..9c83763 --- /dev/null +++ b/src/main/java/it/cavallium/luceneserializer/luceneserializer/LongPointRangeQuery.java @@ -0,0 +1,23 @@ +package it.cavallium.luceneserializer.luceneserializer; + +public class LongPointRangeQuery implements Query { + + private final String name; + private final long min; + private final long max; + + public LongPointRangeQuery(String name, long min, long max) { + this.name = name; + this.min = min; + this.max = max; + } + + @Override + public void stringify(StringBuilder output) { + StringBuilder data = new StringBuilder(); + StringifyUtils.stringifyString(data, name); + StringifyUtils.stringifyLong(data, min); + StringifyUtils.stringifyLong(data, max); + StringifyUtils.writeHeader(output, QueryConstructorType.LONG_POINT_RANGE_QUERY, data); + } +} diff --git a/src/main/java/it/cavallium/luceneserializer/luceneserializer/MatchAllDocsQuery.java b/src/main/java/it/cavallium/luceneserializer/luceneserializer/MatchAllDocsQuery.java new file mode 100644 index 0000000..6dc4640 --- /dev/null +++ b/src/main/java/it/cavallium/luceneserializer/luceneserializer/MatchAllDocsQuery.java @@ -0,0 +1,13 @@ +package it.cavallium.luceneserializer.luceneserializer; + +public class MatchAllDocsQuery implements Query { + + public MatchAllDocsQuery() { + } + + @Override + public void stringify(StringBuilder output) { + StringBuilder data = new StringBuilder(); + StringifyUtils.writeHeader(output, QueryConstructorType.MATCH_ALL_DOCS_QUERY, data); + } +} diff --git a/src/main/java/it/cavallium/luceneserializer/luceneserializer/Occur.java b/src/main/java/it/cavallium/luceneserializer/luceneserializer/Occur.java new file mode 100644 index 0000000..f3fcfbb --- /dev/null +++ b/src/main/java/it/cavallium/luceneserializer/luceneserializer/Occur.java @@ -0,0 +1,35 @@ +package it.cavallium.luceneserializer.luceneserializer; + +import org.apache.lucene.search.BooleanClause; + +public class Occur implements SerializedQueryObject { + + private final BooleanClause.Occur occur; + + public Occur(BooleanClause.Occur occur) { + this.occur = occur; + } + + public static Occur MUST = new Occur(BooleanClause.Occur.MUST); + public static Occur FILTER = new Occur(BooleanClause.Occur.FILTER); + public static Occur SHOULD = new Occur(BooleanClause.Occur.SHOULD); + public static Occur MUST_NOT = new Occur(BooleanClause.Occur.MUST_NOT); + + @Override + public void stringify(StringBuilder output) { + switch (occur) { + case MUST: + StringifyUtils.writeHeader(output, QueryConstructorType.OCCUR_MUST, new StringBuilder()); + break; + case FILTER: + StringifyUtils.writeHeader(output, QueryConstructorType.OCCUR_FILTER, new StringBuilder()); + break; + case SHOULD: + StringifyUtils.writeHeader(output, QueryConstructorType.OCCUR_SHOULD, new StringBuilder()); + break; + case MUST_NOT: + StringifyUtils.writeHeader(output, QueryConstructorType.OCCUR_MUST_NOT, new StringBuilder()); + break; + } + } +} diff --git a/src/main/java/it/cavallium/luceneserializer/luceneserializer/ParseException.java b/src/main/java/it/cavallium/luceneserializer/luceneserializer/ParseException.java new file mode 100644 index 0000000..7a75117 --- /dev/null +++ b/src/main/java/it/cavallium/luceneserializer/luceneserializer/ParseException.java @@ -0,0 +1,8 @@ +package it.cavallium.luceneserializer.luceneserializer; + +public class ParseException extends Exception { + + public ParseException(Exception e) { + super(e); + } +} diff --git a/src/main/java/it/cavallium/luceneserializer/luceneserializer/PhraseQuery.java b/src/main/java/it/cavallium/luceneserializer/luceneserializer/PhraseQuery.java new file mode 100644 index 0000000..78f6909 --- /dev/null +++ b/src/main/java/it/cavallium/luceneserializer/luceneserializer/PhraseQuery.java @@ -0,0 +1,23 @@ +package it.cavallium.luceneserializer.luceneserializer; + +public class PhraseQuery implements Query { + + // some terms can be null + private final TermPosition[] parts; + + public PhraseQuery(TermPosition... parts) { + this.parts = parts; + } + + @Override + public void stringify(StringBuilder output) { + StringBuilder data = new StringBuilder(); + StringBuilder listData = new StringBuilder(); + listData.append(parts.length).append('|'); + for (int i = 0; i < parts.length; i++) { + StringifyUtils.stringifyTermPosition(listData, parts[i]); + } + StringifyUtils.writeHeader(data, QueryConstructorType.TERM_POSITION_LIST, listData); + StringifyUtils.writeHeader(output, QueryConstructorType.PHRASE_QUERY, data); + } +} diff --git a/src/main/java/it/cavallium/luceneserializer/luceneserializer/Query.java b/src/main/java/it/cavallium/luceneserializer/luceneserializer/Query.java new file mode 100644 index 0000000..77b9e12 --- /dev/null +++ b/src/main/java/it/cavallium/luceneserializer/luceneserializer/Query.java @@ -0,0 +1,5 @@ +package it.cavallium.luceneserializer.luceneserializer; + +public interface Query extends SerializedQueryObject { + +} diff --git a/src/main/java/it/cavallium/luceneserializer/luceneserializer/QueryConstructorType.java b/src/main/java/it/cavallium/luceneserializer/luceneserializer/QueryConstructorType.java new file mode 100644 index 0000000..04a9e59 --- /dev/null +++ b/src/main/java/it/cavallium/luceneserializer/luceneserializer/QueryConstructorType.java @@ -0,0 +1,30 @@ +package it.cavallium.luceneserializer.luceneserializer; + +public enum QueryConstructorType { + BOOLEAN_QUERY, + BOOLEAN_QUERY_INFO, + BOOST_QUERY, + SORTED_SLOW_RANGE_QUERY, + INT_POINT_EXACT_QUERY, + LONG_POINT_EXACT_QUERY, + TERM_QUERY, + FUZZY_QUERY, + PHRASE_QUERY, + STRING, + OCCUR_MUST, + OCCUR_SHOULD, + OCCUR_FILTER, + OCCUR_MUST_NOT, + FLOAT, + LONG, + INT, + TERM, + NULL, + BOOLEAN, + TERM_POSITION_LIST, + TERM_POSITION, + BOOLEAN_QUERY_INFO_LIST, + LONG_POINT_RANGE_QUERY, + INT_POINT_RANGE_QUERY, + MATCH_ALL_DOCS_QUERY +} diff --git a/src/main/java/it/cavallium/luceneserializer/luceneserializer/QueryParser.java b/src/main/java/it/cavallium/luceneserializer/luceneserializer/QueryParser.java new file mode 100644 index 0000000..5c0c686 --- /dev/null +++ b/src/main/java/it/cavallium/luceneserializer/luceneserializer/QueryParser.java @@ -0,0 +1,214 @@ +package it.cavallium.luceneserializer.luceneserializer; + +import java.nio.charset.StandardCharsets; +import java.util.Base64; +import java.util.PrimitiveIterator; +import java.util.concurrent.atomic.AtomicInteger; +import org.apache.lucene.document.IntPoint; +import org.apache.lucene.document.LongPoint; +import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.BoostQuery; +import org.apache.lucene.search.FuzzyQuery; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; + +public class QueryParser { + + public static Query parse(String text) throws ParseException { + try { + var builtQuery = (Query) parse(text, new AtomicInteger(0)); + return builtQuery; + } catch (Exception e) { + throw new ParseException(e); + } + } + + private static Object parse(String completeText, AtomicInteger position) { + String text = completeText.substring(position.get()); + if (text.length() <= 2) { + return null; + } + PrimitiveIterator.OfInt iterator = text.chars().iterator(); + StringBuilder numberBuilder = new StringBuilder(); + int index = 0; + while (iterator.hasNext()) { + char character = (char) iterator.nextInt(); + index++; + if (character == '|') { + break; + } else { + numberBuilder.append(character); + } + } + int len = Integer.parseInt(numberBuilder.toString(), 16); + StringBuilder typeBuilder = new StringBuilder(); + while (iterator.hasNext()) { + char character = (char) iterator.nextInt(); + index++; + if (character == '|') { + break; + } else { + typeBuilder.append(character); + } + } + QueryConstructorType type = QueryConstructorType.values()[Integer.parseInt(typeBuilder.toString())]; + + position.addAndGet(index); + + String toParse = text.substring(index, index + len); + switch (type) { + case TERM_QUERY: + Term term = (Term) parse(completeText, position); + return new TermQuery(term); + case BOOST_QUERY: + Query query = (Query) parse(completeText, position); + Float numb = (Float) parse(completeText, position); + return new BoostQuery(query, numb); + case FUZZY_QUERY: + Term fqTerm = (Term) parse(completeText, position); + Integer numb1 = (Integer) parse(completeText, position); + Integer numb2 = (Integer) parse(completeText, position); + Integer numb3 = (Integer) parse(completeText, position); + Boolean bool1 = (Boolean) parse(completeText, position); + return new FuzzyQuery(fqTerm, numb1, numb2, numb3, bool1); + case PHRASE_QUERY: + //noinspection unchecked + TermPosition[] pqTerms = (TermPosition[]) parse(completeText, position); + var pqB = new PhraseQuery.Builder(); + for (TermPosition pqTerm : pqTerms) { + if (pqTerm != null) { + pqB.add(pqTerm.getTerm(), pqTerm.getPosition()); + } + } + return pqB.build(); + case BOOLEAN_QUERY: + var bqB = new BooleanQuery.Builder(); + //noinspection ConstantConditions + int minShouldMatch = (Integer) parse(completeText, position); + bqB.setMinimumNumberShouldMatch(minShouldMatch); + //noinspection unchecked + BooleanQueryInfo[] bqTerms = (BooleanQueryInfo[]) parse(completeText, position); + assert bqTerms != null; + for (BooleanQueryInfo bqTerm : bqTerms) { + bqB.add(bqTerm.query, bqTerm.occur); + } + return bqB.build(); + case BOOLEAN_QUERY_INFO: + Query query1 = (Query) parse(completeText, position); + BooleanClause.Occur occur = (BooleanClause.Occur) parse(completeText, position); + return new BooleanQueryInfo(query1, occur); + case INT_POINT_EXACT_QUERY: + String string1 = (String) parse(completeText, position); + Integer int1 = (Integer) parse(completeText, position); + return IntPoint.newExactQuery(string1, int1); + case LONG_POINT_EXACT_QUERY: + String string5 = (String) parse(completeText, position); + Long long3 = (Long) parse(completeText, position); + return LongPoint.newExactQuery(string5, long3); + case SORTED_SLOW_RANGE_QUERY: + String string2 = (String) parse(completeText, position); + Long long1 = (Long) parse(completeText, position); + Long long2 = (Long) parse(completeText, position); + return SortedNumericDocValuesField.newSlowRangeQuery(string2, long1, long2); + case LONG_POINT_RANGE_QUERY: + String stringx2 = (String) parse(completeText, position); + Long longx1 = (Long) parse(completeText, position); + Long longx2 = (Long) parse(completeText, position); + return LongPoint.newRangeQuery(stringx2, longx1, longx2); + case INT_POINT_RANGE_QUERY: + String stringx3 = (String) parse(completeText, position); + Integer intx1 = (Integer) parse(completeText, position); + Integer intx2 = (Integer) parse(completeText, position); + return IntPoint.newRangeQuery(stringx3, intx1, intx2); + case INT: + position.addAndGet(toParse.length()); + return Integer.parseInt(toParse); + case LONG: + position.addAndGet(toParse.length()); + return Long.parseLong(toParse); + case TERM: + String string3 = (String) parse(completeText, position); + String string4 = (String) parse(completeText, position); + return new Term(string3, string4); + case TERM_POSITION: + Term term1 = (Term) parse(completeText, position); + Integer intx3 = (Integer) parse(completeText, position); + return new TermPosition(term1, intx3); + case FLOAT: + position.addAndGet(toParse.length()); + return Float.parseFloat(toParse); + case STRING: + position.addAndGet(toParse.length()); + return new String(Base64.getDecoder().decode(toParse), StandardCharsets.UTF_8); + case BOOLEAN: + position.addAndGet(toParse.length()); + return Boolean.parseBoolean(toParse); + case NULL: + position.addAndGet(toParse.length()); + return null; + case TERM_POSITION_LIST: + int termsCount; + StringBuilder termsCountBuilder = new StringBuilder(); + var it = toParse.chars().iterator(); + while (it.hasNext()) { + char character = (char) it.nextInt(); + position.incrementAndGet(); + if (character == '|') { + break; + } else { + termsCountBuilder.append(character); + } + } + termsCount = Integer.parseInt(termsCountBuilder.toString()); + + var result1 = new TermPosition[termsCount]; + for (int i = 0; i < termsCount; i++) { + result1[i] = (TermPosition) parse(completeText, position); + } + return result1; + case BOOLEAN_QUERY_INFO_LIST: + int termsCount2; + StringBuilder termsCountBuilder2 = new StringBuilder(); + var it2 = toParse.chars().iterator(); + while (it2.hasNext()) { + char character = (char) it2.nextInt(); + position.incrementAndGet(); + if (character == '|') { + break; + } else { + termsCountBuilder2.append(character); + } + } + termsCount2 = Integer.parseInt(termsCountBuilder2.toString()); + + var result2 = new BooleanQueryInfo[termsCount2]; + for (int i = 0; i < termsCount2; i++) { + result2[i] = (BooleanQueryInfo) parse(completeText, position); + } + return result2; + case OCCUR_MUST: + return BooleanClause.Occur.MUST; + case OCCUR_FILTER: + return BooleanClause.Occur.FILTER; + case OCCUR_SHOULD: + return BooleanClause.Occur.SHOULD; + case OCCUR_MUST_NOT: + return BooleanClause.Occur.MUST_NOT; + case MATCH_ALL_DOCS_QUERY: + return new MatchAllDocsQuery(); + default: + throw new UnsupportedOperationException("Unknown query constructor type"); + } + } + + public static String stringify(SerializedQueryObject query) { + StringBuilder sb = new StringBuilder(); + query.stringify(sb); + return sb.toString(); + } +} diff --git a/src/main/java/it/cavallium/luceneserializer/luceneserializer/SerializedQueryObject.java b/src/main/java/it/cavallium/luceneserializer/luceneserializer/SerializedQueryObject.java new file mode 100644 index 0000000..2aff5f8 --- /dev/null +++ b/src/main/java/it/cavallium/luceneserializer/luceneserializer/SerializedQueryObject.java @@ -0,0 +1,9 @@ +package it.cavallium.luceneserializer.luceneserializer; + +public interface SerializedQueryObject { + + /** + * @return length|type|---data--- + */ + void stringify(StringBuilder output); +} diff --git a/src/main/java/it/cavallium/luceneserializer/luceneserializer/SortedNumericDocValuesFieldSlowRangeQuery.java b/src/main/java/it/cavallium/luceneserializer/luceneserializer/SortedNumericDocValuesFieldSlowRangeQuery.java new file mode 100644 index 0000000..bc21cca --- /dev/null +++ b/src/main/java/it/cavallium/luceneserializer/luceneserializer/SortedNumericDocValuesFieldSlowRangeQuery.java @@ -0,0 +1,23 @@ +package it.cavallium.luceneserializer.luceneserializer; + +public class SortedNumericDocValuesFieldSlowRangeQuery implements Query { + + private final String name; + private final long min; + private final long max; + + public SortedNumericDocValuesFieldSlowRangeQuery(String name, long min, long max) { + this.name = name; + this.min = min; + this.max = max; + } + + @Override + public void stringify(StringBuilder output) { + StringBuilder data = new StringBuilder(); + StringifyUtils.stringifyString(data, name); + StringifyUtils.stringifyLong(data, min); + StringifyUtils.stringifyLong(data, max); + StringifyUtils.writeHeader(output, QueryConstructorType.SORTED_SLOW_RANGE_QUERY, data); + } +} diff --git a/src/main/java/it/cavallium/luceneserializer/luceneserializer/StringifyUtils.java b/src/main/java/it/cavallium/luceneserializer/luceneserializer/StringifyUtils.java new file mode 100644 index 0000000..0ae254a --- /dev/null +++ b/src/main/java/it/cavallium/luceneserializer/luceneserializer/StringifyUtils.java @@ -0,0 +1,54 @@ +package it.cavallium.luceneserializer.luceneserializer; + +import java.nio.charset.StandardCharsets; +import java.util.Base64; +import org.apache.lucene.index.Term; + +public class StringifyUtils { + + public static void stringifyFloat(StringBuilder output, float value) { + writeHeader(output, QueryConstructorType.FLOAT, new StringBuilder().append(value)); + } + + public static void stringifyInt(StringBuilder output, int value) { + writeHeader(output, QueryConstructorType.INT, new StringBuilder().append(value)); + } + + public static void stringifyLong(StringBuilder output, long value) { + writeHeader(output, QueryConstructorType.LONG, new StringBuilder().append(value)); + } + + public static void stringifyBool(StringBuilder output, boolean value) { + writeHeader(output, QueryConstructorType.BOOLEAN, new StringBuilder().append(value)); + } + + public static void stringifyString(StringBuilder output, String value) { + writeHeader(output, QueryConstructorType.STRING, new StringBuilder() + .append(Base64.getEncoder().encodeToString(value.getBytes(StandardCharsets.UTF_8)))); + } + + public static void stringifyTerm(StringBuilder output, Term value) { + var data = new StringBuilder(); + stringifyString(data, value.field()); + stringifyString(data, value.text()); + writeHeader(output, QueryConstructorType.TERM, data); + } + + public static void stringifyTermPosition(StringBuilder output, TermPosition value) { + var data = new StringBuilder(); + stringifyTerm(data, value.getTerm()); + stringifyInt(data, value.getPosition()); + writeHeader(output, QueryConstructorType.TERM_POSITION, data); + } + + public static void stringifyNullTerm(StringBuilder output) { + writeHeader(output, QueryConstructorType.NULL, new StringBuilder()); + } + + public static void writeHeader(StringBuilder output, QueryConstructorType type, + StringBuilder data) { + output.append(Integer.toHexString(data.length())).append('|'); + output.append(type.ordinal()).append('|'); + output.append(data); + } +} diff --git a/src/main/java/it/cavallium/luceneserializer/luceneserializer/TermPosition.java b/src/main/java/it/cavallium/luceneserializer/luceneserializer/TermPosition.java new file mode 100644 index 0000000..310068e --- /dev/null +++ b/src/main/java/it/cavallium/luceneserializer/luceneserializer/TermPosition.java @@ -0,0 +1,27 @@ +package it.cavallium.luceneserializer.luceneserializer; + +import org.apache.lucene.index.Term; + +public class TermPosition implements Query { + + private final Term term; + private final int position; + + public TermPosition(Term term, int position) { + this.term = term; + this.position = position; + } + + public Term getTerm() { + return term; + } + + public int getPosition() { + return position; + } + + @Override + public void stringify(StringBuilder output) { + + } +} diff --git a/src/main/java/it/cavallium/luceneserializer/luceneserializer/TermQuery.java b/src/main/java/it/cavallium/luceneserializer/luceneserializer/TermQuery.java new file mode 100644 index 0000000..2d036ea --- /dev/null +++ b/src/main/java/it/cavallium/luceneserializer/luceneserializer/TermQuery.java @@ -0,0 +1,23 @@ +package it.cavallium.luceneserializer.luceneserializer; + +import org.apache.lucene.index.Term; + +public class TermQuery implements Query { + + private final Term term; + + public TermQuery(Term term) { + this.term = term; + } + + public TermQuery(String name, String val) { + this.term = new Term(name, val); + } + + @Override + public void stringify(StringBuilder output) { + StringBuilder data = new StringBuilder(); + StringifyUtils.stringifyTerm(data, term); + StringifyUtils.writeHeader(output, QueryConstructorType.TERM_QUERY, data); + } +} diff --git a/src/main/proto/database.proto b/src/main/proto/database.proto new file mode 100644 index 0000000..bc641a0 --- /dev/null +++ b/src/main/proto/database.proto @@ -0,0 +1,387 @@ +syntax = "proto3"; + +option java_multiple_files = true; +option java_package = "it.cavallium.dbengine.proto"; +option java_outer_classname = "CavalliumDBEngineProto"; +option objc_class_prefix = "CDBE"; + +package cavallium.dbengine; + +service CavalliumDBEngineService { + rpc ResetConnection (ResetConnectionRequest) returns (Empty) {} + rpc Ping (Empty) returns (Empty) {} + + rpc DatabaseOpen (DatabaseOpenRequest) returns (HandleResult) {} + rpc DatabaseClose (DatabaseCloseRequest) returns (Empty) {} + rpc DatabaseSnapshotTake (DatabaseSnapshotTakeRequest) returns (DatabaseSnapshotTakeResult) {} + rpc DatabaseSnapshotRelease (DatabaseSnapshotReleaseRequest) returns (Empty) {} + rpc SingletonOpen (SingletonOpenRequest) returns (HandleResult) {} + rpc DictionaryOpen (DictionaryOpenRequest) returns (HandleResult) {} + rpc DictionaryMethodGet (DictionaryMethodGetRequest) returns (DictionaryMethodGetResponse) {} + rpc DictionaryMethodContains (DictionaryMethodContainsRequest) returns (DictionaryMethodContainsResponse) {} + rpc DictionaryMethodPut (DictionaryMethodPutRequest) returns (DictionaryMethodStandardResult) {} + rpc DictionaryMethodPutMulti (DictionaryMethodPutMultiRequest) returns (DictionaryMethodMultiStandardResult) {} + rpc DictionaryMethodRemove (DictionaryMethodRemoveRequest) returns (DictionaryMethodStandardResult) {} + rpc DictionaryMethodClear (DictionaryMethodClearRequest) returns (Empty) {} + rpc DictionaryMethodFastSize (DictionaryMethodSizeRequest) returns (DictionaryMethodSizeResponse) {} + rpc DictionaryMethodExactSize (DictionaryMethodSizeRequest) returns (DictionaryMethodSizeResponse) {} + rpc DictionaryMethodIsEmpty (DictionaryMethodIsEmptyRequest) returns (DictionaryMethodIsEmptyResponse) {} + rpc DictionaryMethodRemoveOne (DictionaryMethodRemoveOneRequest) returns (DictionaryMethodStandardEntityResponse) {} + rpc DictionaryMethodForEach (DictionaryMethodForEachRequest) returns (stream DictionaryMethodStandardEntityResponse) {} + rpc DictionaryMethodReplaceAll (DictionaryMethodReplaceAllRequest) returns (stream DictionaryMethodReplaceAllResponse) {} + rpc SingletonMethodGet (SingletonMethodGetRequest) returns (SingletonMethodGetResponse) {} + rpc SingletonMethodSet (SingletonMethodSetRequest) returns (Empty) {} + + rpc LuceneIndexOpen (LuceneIndexOpenRequest) returns (HandleResult) {} + rpc LuceneIndexClose (LuceneIndexCloseRequest) returns (Empty) {} + rpc LuceneIndexSnapshotTake (LuceneIndexSnapshotTakeRequest) returns (LuceneIndexSnapshotTakeResult) {} + rpc LuceneIndexSnapshotRelease (LuceneIndexSnapshotReleaseRequest) returns (Empty) {} + rpc LuceneIndexMethodAddDocument (LuceneIndexMethodAddDocumentRequest) returns (Empty) {} + rpc LuceneIndexMethodAddDocumentMulti (LuceneIndexMethodAddDocumentMultiRequest) returns (Empty) {} + rpc LuceneIndexMethodDeleteDocument (LuceneIndexMethodDeleteDocumentRequest) returns (Empty) {} + rpc LuceneIndexMethodUpdateDocument (LuceneIndexMethodUpdateDocumentRequest) returns (Empty) {} + rpc LuceneIndexMethodUpdateDocumentMulti (LuceneIndexMethodUpdateDocumentMultiRequest) returns (Empty) {} + rpc LuceneIndexMethodDeleteAll (LuceneIndexMethodDeleteAllRequest) returns (Empty) {} + rpc LuceneIndexMethodSearch (LuceneIndexMethodSearchRequest) returns (LuceneIndexMethodSearchMultiResponse) {} + rpc LuceneIndexMethodMoreLikeThis (LuceneIndexMethodMoreLikeThisRequest) returns (LuceneIndexMethodSearchMultiResponse) {} + rpc LuceneIndexMethodSearchStream (LuceneIndexMethodSearchStreamRequest) returns (stream LuceneIndexMethodSearchStreamItem) {} + rpc LuceneIndexMethodCount (LuceneIndexMethodCountRequest) returns (LuceneIndexMethodCountResponse) {} + +} + +enum LLDictionaryResultType { + VOID = 0; + VALUE_CHANGED = 1; + PREVIOUS_VALUE = 2; +} + +message Empty { + +} + +message HandleResult { + int32 handle = 1; +} + +message ResetConnectionRequest { +} + +message DatabaseOpenRequest { + bytes name = 1; + repeated bytes columnName = 2; + bool lowMemory = 3; +} + +message DatabaseCloseRequest { + int32 databaseHandle = 1; +} + +message SingletonOpenRequest { + int32 databaseHandle = 1; + bytes singletonListColumnName = 2; + bytes name = 3; + bytes defaultValue = 4; +} + +message DictionaryOpenRequest { + int32 databaseHandle = 1; + bytes columnName = 2; +} + +message DatabaseSnapshotTakeRequest { + int32 databaseHandle = 1; +} + +message DatabaseSnapshotReleaseRequest { + int32 databaseHandle = 1; + int64 sequenceNumber = 2; +} + +message DatabaseSnapshotTakeResult { + int64 sequenceNumber = 1; +} + +message DictionaryMethodGetRequest { + int32 dictionaryHandle = 1; + int64 sequenceNumber = 3; + + bytes key = 2; +} + +message DictionaryMethodGetResponse { + bytes value = 1; +} + +message DictionaryMethodContainsRequest { + int32 dictionaryHandle = 1; + int64 sequenceNumber = 3; + + bytes key = 2; +} + +message DictionaryMethodContainsResponse { + bool value = 1; +} + +message DictionaryMethodPutRequest { + int32 dictionaryHandle = 1; + bytes key = 2; + bytes value = 3; + LLDictionaryResultType resultType = 4; +} + +message DictionaryMethodPutMultiRequest { + int32 dictionaryHandle = 1; + repeated bytes key = 2; + repeated bytes value = 3; + LLDictionaryResultType resultType = 4; +} + +message DictionaryMethodRemoveRequest { + int32 dictionaryHandle = 1; + bytes key = 2; + LLDictionaryResultType resultType = 3; +} + +message DictionaryMethodClearRequest { + int32 dictionaryHandle = 1; +} + +message DictionaryMethodSizeRequest { + int32 dictionaryHandle = 1; + int64 sequenceNumber = 2; +} + +message DictionaryMethodIsEmptyRequest { + int32 dictionaryHandle = 1; + int64 sequenceNumber = 2; +} + +message DictionaryMethodRemoveOneRequest { + int32 dictionaryHandle = 1; +} + +message DictionaryMethodSizeResponse { + int64 size = 1; +} + +message DictionaryMethodIsEmptyResponse { + bool empty = 1; +} + +message DictionaryMethodStandardResult { + bytes value = 1; +} + +message DictionaryMethodMultiStandardResult { + repeated bytes value = 1; +} + +message DictionaryMethodForEachRequest { + int32 dictionaryHandle = 1; + int64 sequenceNumber = 2; +} + +message DictionaryMethodStandardEntityResponse { + bytes key = 1; + bytes value = 2; +} + +message DictionaryMethodForEachSnapshotRequest { + int32 dictionaryHandle = 1; +} + +message DictionaryMethodForEachSnapshotResponse { + bytes key = 1; + bytes value = 2; +} + +message DictionaryMethodReplaceAllRequest { + int32 dictionaryHandle = 1; + bool replaceKeys = 2; +} + +message DictionaryMethodReplaceAllResponse { + bytes key = 1; + bytes value = 2; +} + +message SingletonMethodGetRequest { + int32 singletonHandle = 1; + int64 sequenceNumber = 2; +} + +message SingletonMethodGetResponse { + bytes value = 1; +} + +message SingletonMethodSetRequest { + int32 singletonHandle = 1; + bytes value = 3; +} + +message LuceneIndexOpenRequest { + string name = 1; + int32 textFieldsAnalyzer = 3; + int32 commitDebounceTime = 2; + int32 queryRefreshDebounceTime = 4; + bool lowMemory = 5; + int32 instancesCount = 6; +} + +message LuceneIndexCloseRequest { + int32 handle = 1; +} + +message LuceneIndexSnapshotTakeRequest { + int32 handle = 1; +} + +message LuceneIndexSnapshotReleaseRequest { + int32 handle = 1; + int64 sequenceNumber = 2; +} + +message LuceneIndexSnapshotTakeResult { + int64 sequenceNumber = 1; +} + +message LuceneIndexMethodAddDocumentRequest { + int32 handle = 1; + LLTerm key = 3; + repeated LLItem documentItems = 2; +} + +message LuceneIndexMethodAddDocumentMultiRequest { + int32 handle = 1; + repeated LLTerm key = 3; + repeated LLDocument documents = 2; +} + +message LuceneIndexMethodDeleteDocumentRequest { + int32 handle = 1; + LLTerm key = 2; +} + +message LuceneIndexMethodUpdateDocumentRequest { + int32 handle = 1; + LLTerm key = 2; + repeated LLItem documentItems = 3; +} + +message LuceneIndexMethodUpdateDocumentMultiRequest { + int32 handle = 1; + repeated LLTerm key = 2; + repeated LLDocument documents = 3; +} + +message LuceneIndexMethodDeleteAllRequest { + int32 handle = 1; +} + +message LuceneIndexMethodSearchRequest { + int32 handle = 1; + int64 sequenceNumber = 6; + + string query = 2; + int32 limit = 3; + LLSort sort = 4; + string keyFieldName = 5; +} + +message LuceneIndexMethodMoreLikeThisRequest { + int32 handle = 1; + int64 sequenceNumber = 5; + + repeated MltField mltFields = 2; + int32 limit = 3; + string keyFieldName = 4; +} + +message MltField { + string key = 1; + repeated string values = 2; +} + +message LuceneIndexMethodSearchMultiResponse { + repeated LuceneIndexMethodSearchResponse response = 1; +} + +message LuceneIndexMethodSearchResponse { + int64 totalHitsCount = 1; + repeated LLKeyScore hits = 2; +} + +message LuceneIndexMethodSearchStreamRequest { + int32 handle = 1; + int64 sequenceNumber = 6; + + int32 shardIndex = 7; + + string query = 2; + int32 limit = 3; + LLSort sort = 4; + string keyFieldName = 5; +} + +message LuceneIndexMethodSearchStreamItem { + bool isKey = 1; + // If isKey == true: + string key = 2; + int32 shardIndex = 3; + // If isKey == false: + int64 approximatedTotalCount = 4; +} + +message LLKeyScore { + string key = 1; + float score = 2; +} + +message LuceneIndexMethodCountRequest { + int32 handle = 1; + int64 sequenceNumber = 3; + + string query = 2; +} + +message LuceneIndexMethodCountResponse { + int64 count = 1; +} + +message LLSort { + string fieldName = 1; + LLSortType type = 2; + bool reverse = 3; +} + +enum LLSortType { + LONG = 0; + RANDOM = 1; +} + +message LLItem { + LLType type = 1; + string name = 2; + bytes data1 = 3; + bytes data2 = 4; +} + +message LLDocument { + repeated LLItem items = 1; +} + +message LLTerm { + string key = 1; + string value = 2; +} + +enum LLType { + StringField = 0; + StringFieldStored = 1; + IntPoint = 2; + LongPoint = 3; + SortedNumericDocValuesField = 4; + TextField = 5; + TextFieldStored = 6; +} \ No newline at end of file