Fix lucene hugepq searcher
This commit is contained in:
parent
dc69bf8e25
commit
6c6263e1d0
@ -6,14 +6,21 @@ import it.cavallium.dbengine.database.SafeCloseable;
|
|||||||
import it.cavallium.dbengine.database.disk.LLTempHugePqEnv;
|
import it.cavallium.dbengine.database.disk.LLTempHugePqEnv;
|
||||||
import it.cavallium.dbengine.database.disk.HugePqEnv;
|
import it.cavallium.dbengine.database.disk.HugePqEnv;
|
||||||
import it.cavallium.dbengine.database.disk.StandardRocksDBColumn;
|
import it.cavallium.dbengine.database.disk.StandardRocksDBColumn;
|
||||||
|
import it.cavallium.dbengine.database.disk.UnreleasableReadOptions;
|
||||||
|
import it.cavallium.dbengine.database.disk.UnreleasableWriteOptions;
|
||||||
import java.util.concurrent.atomic.AtomicBoolean;
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
import org.jetbrains.annotations.Nullable;
|
import org.jetbrains.annotations.Nullable;
|
||||||
|
import org.rocksdb.FlushOptions;
|
||||||
import org.rocksdb.ReadOptions;
|
import org.rocksdb.ReadOptions;
|
||||||
|
import org.rocksdb.RocksDB;
|
||||||
import org.rocksdb.RocksDBException;
|
import org.rocksdb.RocksDBException;
|
||||||
import org.rocksdb.WriteOptions;
|
import org.rocksdb.WriteOptions;
|
||||||
|
|
||||||
public class HugePqArray<V> implements IArray<V>, SafeCloseable {
|
public class HugePqArray<V> implements IArray<V>, SafeCloseable {
|
||||||
|
|
||||||
|
static {
|
||||||
|
RocksDB.loadLibrary();
|
||||||
|
}
|
||||||
|
|
||||||
private final AtomicBoolean closed = new AtomicBoolean();
|
private final AtomicBoolean closed = new AtomicBoolean();
|
||||||
private final HugePqCodec<V> valueCodec;
|
private final HugePqCodec<V> valueCodec;
|
||||||
@ -21,8 +28,11 @@ public class HugePqArray<V> implements IArray<V>, SafeCloseable {
|
|||||||
private final HugePqEnv env;
|
private final HugePqEnv env;
|
||||||
private final int hugePqId;
|
private final int hugePqId;
|
||||||
private final StandardRocksDBColumn rocksDB;
|
private final StandardRocksDBColumn rocksDB;
|
||||||
private WriteOptions writeOptions;
|
private static final UnreleasableWriteOptions writeOptions = new UnreleasableWriteOptions(new WriteOptions()
|
||||||
private ReadOptions readOptions;
|
.setDisableWAL(true)
|
||||||
|
.setSync(false));
|
||||||
|
private static final UnreleasableReadOptions readOptions = new UnreleasableReadOptions(new ReadOptions()
|
||||||
|
.setVerifyChecksums(false));
|
||||||
private final V defaultValue;
|
private final V defaultValue;
|
||||||
|
|
||||||
private final long virtualSize;
|
private final long virtualSize;
|
||||||
@ -33,8 +43,6 @@ public class HugePqArray<V> implements IArray<V>, SafeCloseable {
|
|||||||
this.env = env.getEnv();
|
this.env = env.getEnv();
|
||||||
this.hugePqId = env.allocateDb(null);
|
this.hugePqId = env.allocateDb(null);
|
||||||
this.rocksDB = this.env.openDb(hugePqId);
|
this.rocksDB = this.env.openDb(hugePqId);
|
||||||
this.writeOptions = new WriteOptions().setDisableWAL(true).setSync(false);
|
|
||||||
this.readOptions = new ReadOptions().setVerifyChecksums(false);
|
|
||||||
this.defaultValue = defaultValue;
|
this.defaultValue = defaultValue;
|
||||||
|
|
||||||
this.virtualSize = size;
|
this.virtualSize = size;
|
||||||
@ -57,8 +65,8 @@ public class HugePqArray<V> implements IArray<V>, SafeCloseable {
|
|||||||
ensureBounds(index);
|
ensureBounds(index);
|
||||||
ensureThread();
|
ensureThread();
|
||||||
var keyBuf = allocate(Long.BYTES);
|
var keyBuf = allocate(Long.BYTES);
|
||||||
keyBuf.writeLong(index);
|
|
||||||
try (var valueBuf = valueCodec.serialize(this::allocate, value); keyBuf) {
|
try (var valueBuf = valueCodec.serialize(this::allocate, value); keyBuf) {
|
||||||
|
keyBuf.writeLong(index);
|
||||||
rocksDB.put(writeOptions, keyBuf, valueBuf);
|
rocksDB.put(writeOptions, keyBuf, valueBuf);
|
||||||
} catch (RocksDBException e) {
|
} catch (RocksDBException e) {
|
||||||
throw new IllegalStateException(e);
|
throw new IllegalStateException(e);
|
||||||
@ -85,7 +93,11 @@ public class HugePqArray<V> implements IArray<V>, SafeCloseable {
|
|||||||
|
|
||||||
var keyBuf = allocate(Long.BYTES);
|
var keyBuf = allocate(Long.BYTES);
|
||||||
try (keyBuf) {
|
try (keyBuf) {
|
||||||
|
keyBuf.writeLong(index);
|
||||||
try (var value = rocksDB.get(readOptions, keyBuf)) {
|
try (var value = rocksDB.get(readOptions, keyBuf)) {
|
||||||
|
if (value == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
return valueCodec.deserialize(value);
|
return valueCodec.deserialize(value);
|
||||||
}
|
}
|
||||||
} catch (RocksDBException e) {
|
} catch (RocksDBException e) {
|
||||||
@ -105,17 +117,22 @@ public class HugePqArray<V> implements IArray<V>, SafeCloseable {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void close() {
|
public void close() {
|
||||||
readOptions.close();
|
|
||||||
writeOptions.close();
|
|
||||||
if (closed.compareAndSet(false, true)) {
|
if (closed.compareAndSet(false, true)) {
|
||||||
ensureThread();
|
ensureThread();
|
||||||
this.tempEnv.freeDb(hugePqId);
|
this.tempEnv.freeDb(hugePqId);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return "huge_pq_array[" + virtualSize + "]";
|
return "huge_pq_array[" + virtualSize + "]";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Object[] toArray() {
|
||||||
|
var result = new Object[Math.toIntExact(virtualSize)];
|
||||||
|
for (int i = 0; i < virtualSize; i++) {
|
||||||
|
result[i] = get(i);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -43,7 +43,7 @@ public class HugePqPriorityQueue<T> implements PriorityQueue<T>, Reversable<Reve
|
|||||||
.setDisableWAL(true)
|
.setDisableWAL(true)
|
||||||
.setSync(false));
|
.setSync(false));
|
||||||
private static final UnreleasableReadOptions readOptions = new UnreleasableReadOptions(new ReadOptions()
|
private static final UnreleasableReadOptions readOptions = new UnreleasableReadOptions(new ReadOptions()
|
||||||
.setVerifyChecksums(false).setTotalOrderSeek(true));
|
.setVerifyChecksums(false));
|
||||||
private final HugePqCodec<T> codec;
|
private final HugePqCodec<T> codec;
|
||||||
|
|
||||||
private long size = 0;
|
private long size = 0;
|
||||||
|
@ -42,7 +42,7 @@ public class LLSlotDocCodec implements HugePqCodec<LLSlotDoc>, FieldValueHitQueu
|
|||||||
reverseMul[i] = field.getReverse() ? -1 : 1;
|
reverseMul[i] = field.getReverse() ? -1 : 1;
|
||||||
comparators[i] = HugePqComparator.getComparator(env, field, numHits, i);
|
comparators[i] = HugePqComparator.getComparator(env, field, numHits, i);
|
||||||
}
|
}
|
||||||
comparator = new AbstractComparator(new ComparatorOptions()) {
|
comparator = new AbstractComparator(new ComparatorOptions().setMaxReusedBufferSize(0)) {
|
||||||
@Override
|
@Override
|
||||||
public String name() {
|
public String name() {
|
||||||
return "slot-doc-codec-comparator";
|
return "slot-doc-codec-comparator";
|
||||||
@ -51,11 +51,22 @@ public class LLSlotDocCodec implements HugePqCodec<LLSlotDoc>, FieldValueHitQueu
|
|||||||
@Override
|
@Override
|
||||||
public int compare(ByteBuffer hitA, ByteBuffer hitB) {
|
public int compare(ByteBuffer hitA, ByteBuffer hitB) {
|
||||||
assert hitA != hitB;
|
assert hitA != hitB;
|
||||||
assert getSlot(hitA) != getSlot(hitB);
|
hitA.position(hitA.position() + Float.BYTES);
|
||||||
|
hitB.position(hitB.position() + Float.BYTES);
|
||||||
|
var docA = readDoc(hitA);
|
||||||
|
var docB = readDoc(hitB);
|
||||||
|
if (docA == docB) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
hitA.position(hitA.position() + Integer.BYTES);
|
||||||
|
hitB.position(hitB.position() + Integer.BYTES);
|
||||||
|
var slotA = readSlot(hitA);
|
||||||
|
var slotB = readSlot(hitB);
|
||||||
|
assert slotA != slotB : "Slot " + slotA + " is equal to slot " + slotB;
|
||||||
|
|
||||||
int numComparators = comparators.length;
|
int numComparators = comparators.length;
|
||||||
for (int i = 0; i < numComparators; ++i) {
|
for (int i = 0; i < numComparators; ++i) {
|
||||||
final int c = reverseMul[i] * comparators[i].compare(getSlot(hitA), getSlot(hitB));
|
final int c = reverseMul[i] * comparators[i].compare(slotA, slotB);
|
||||||
if (c != 0) {
|
if (c != 0) {
|
||||||
// Short circuit
|
// Short circuit
|
||||||
return -c;
|
return -c;
|
||||||
@ -63,7 +74,7 @@ public class LLSlotDocCodec implements HugePqCodec<LLSlotDoc>, FieldValueHitQueu
|
|||||||
}
|
}
|
||||||
|
|
||||||
// avoid random sort order that could lead to duplicates (bug #31241):
|
// avoid random sort order that could lead to duplicates (bug #31241):
|
||||||
return Integer.compare(getDoc(hitB), getDoc(hitA));
|
return Integer.compare(docB, docA);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@ -71,11 +82,11 @@ public class LLSlotDocCodec implements HugePqCodec<LLSlotDoc>, FieldValueHitQueu
|
|||||||
@Override
|
@Override
|
||||||
public Buffer serialize(Function<Integer, Buffer> allocator, LLSlotDoc data) {
|
public Buffer serialize(Function<Integer, Buffer> allocator, LLSlotDoc data) {
|
||||||
var buf = allocator.apply(Float.BYTES + Integer.BYTES + Integer.BYTES + Integer.BYTES);
|
var buf = allocator.apply(Float.BYTES + Integer.BYTES + Integer.BYTES + Integer.BYTES);
|
||||||
|
buf.writerOffset(Float.BYTES + Integer.BYTES + Integer.BYTES + Integer.BYTES);
|
||||||
setScore(buf, data.score());
|
setScore(buf, data.score());
|
||||||
setDoc(buf, data.doc());
|
setDoc(buf, data.doc());
|
||||||
setShardIndex(buf, data.shardIndex());
|
setShardIndex(buf, data.shardIndex());
|
||||||
setSlot(buf, data.slot());
|
setSlot(buf, data.slot());
|
||||||
buf.writerOffset(Float.BYTES + Integer.BYTES + Integer.BYTES + Integer.BYTES);
|
|
||||||
return buf;
|
return buf;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -97,8 +108,8 @@ public class LLSlotDocCodec implements HugePqCodec<LLSlotDoc>, FieldValueHitQueu
|
|||||||
return hit.getInt(Float.BYTES);
|
return hit.getInt(Float.BYTES);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static int getDoc(ByteBuffer hit) {
|
private static int readDoc(ByteBuffer hit) {
|
||||||
return hit.getInt(Float.BYTES);
|
return hit.getInt();
|
||||||
}
|
}
|
||||||
|
|
||||||
private static int getShardIndex(Buffer hit) {
|
private static int getShardIndex(Buffer hit) {
|
||||||
@ -109,8 +120,8 @@ public class LLSlotDocCodec implements HugePqCodec<LLSlotDoc>, FieldValueHitQueu
|
|||||||
return hit.getInt(Float.BYTES + Integer.BYTES + Integer.BYTES);
|
return hit.getInt(Float.BYTES + Integer.BYTES + Integer.BYTES);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static int getSlot(ByteBuffer hit) {
|
private static int readSlot(ByteBuffer hit) {
|
||||||
return hit.getInt(Float.BYTES + Integer.BYTES + Integer.BYTES);
|
return hit.getInt();
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void setScore(Buffer hit, float score) {
|
private static void setScore(Buffer hit, float score) {
|
||||||
|
@ -46,7 +46,11 @@ public class DoubleComparator extends NumericComparator<Double> implements SafeC
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int compare(int slot1, int slot2) {
|
public int compare(int slot1, int slot2) {
|
||||||
return Double.compare(values.getOrDefault(slot1, 0d), values.getOrDefault(slot2, 0d));
|
var value1 = values.get(slot1);
|
||||||
|
var value2 = values.get(slot2);
|
||||||
|
assert value1 != null : "Missing value for slot1: " + slot1;
|
||||||
|
assert value2 != null : "Missing value for slot2: " + slot2;
|
||||||
|
return Double.compare(value1, value2);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -46,7 +46,11 @@ public class FloatComparator extends NumericComparator<Float> implements SafeClo
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int compare(int slot1, int slot2) {
|
public int compare(int slot1, int slot2) {
|
||||||
return Float.compare(values.getOrDefault(slot1, 0f), values.getOrDefault(slot2, 0f));
|
var value1 = values.get(slot1);
|
||||||
|
var value2 = values.get(slot2);
|
||||||
|
assert value1 != null : "Missing value for slot1: " + slot1;
|
||||||
|
assert value2 != null : "Missing value for slot2: " + slot2;
|
||||||
|
return Float.compare(value1, value2);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -46,7 +46,11 @@ public class IntComparator extends NumericComparator<Integer> implements SafeClo
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int compare(int slot1, int slot2) {
|
public int compare(int slot1, int slot2) {
|
||||||
return Integer.compare(values.getOrDefault(slot1, 0), values.getOrDefault(slot2, 0));
|
var value1 = values.get(slot1);
|
||||||
|
var value2 = values.get(slot2);
|
||||||
|
assert value1 != null : "Missing value for slot1: " + slot1;
|
||||||
|
assert value2 != null : "Missing value for slot2: " + slot2;
|
||||||
|
return Integer.compare(value1, value2);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -17,12 +17,15 @@
|
|||||||
|
|
||||||
package it.cavallium.dbengine.lucene.comparators;
|
package it.cavallium.dbengine.lucene.comparators;
|
||||||
|
|
||||||
|
import static java.util.Objects.requireNonNull;
|
||||||
|
|
||||||
import it.cavallium.dbengine.database.SafeCloseable;
|
import it.cavallium.dbengine.database.SafeCloseable;
|
||||||
import it.cavallium.dbengine.database.disk.LLTempHugePqEnv;
|
import it.cavallium.dbengine.database.disk.LLTempHugePqEnv;
|
||||||
import it.cavallium.dbengine.lucene.IArray;
|
|
||||||
import it.cavallium.dbengine.lucene.HugePqArray;
|
import it.cavallium.dbengine.lucene.HugePqArray;
|
||||||
|
import it.cavallium.dbengine.lucene.IArray;
|
||||||
import it.cavallium.dbengine.lucene.LongCodec;
|
import it.cavallium.dbengine.lucene.LongCodec;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Objects;
|
||||||
import org.apache.lucene.document.LongPoint;
|
import org.apache.lucene.document.LongPoint;
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
import org.apache.lucene.search.LeafFieldComparator;
|
import org.apache.lucene.search.LeafFieldComparator;
|
||||||
@ -46,7 +49,11 @@ public class LongComparator extends NumericComparator<Long> implements SafeClose
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int compare(int slot1, int slot2) {
|
public int compare(int slot1, int slot2) {
|
||||||
return Long.compare(values.getOrDefault(slot1, 0L), values.getOrDefault(slot2, 0L));
|
var value1 = values.get(slot1);
|
||||||
|
var value2 = values.get(slot2);
|
||||||
|
assert value1 != null : "Missing value for slot1: " + slot1;
|
||||||
|
assert value2 != null : "Missing value for slot2: " + slot2;
|
||||||
|
return Long.compare(value1, value2);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -51,7 +51,11 @@ public final class RelevanceComparator extends FieldComparator<Float> implements
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int compare(int slot1, int slot2) {
|
public int compare(int slot1, int slot2) {
|
||||||
return Float.compare(scores.getOrDefault(slot2, 0f), scores.getOrDefault(slot1, 0f));
|
var value1 = scores.get(slot1);
|
||||||
|
var value2 = scores.get(slot2);
|
||||||
|
assert value1 != null : "Missing score for slot1: " + slot1;
|
||||||
|
assert value2 != null : "Missing score for slot2: " + slot2;
|
||||||
|
return Float.compare(value1, value2);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -26,7 +26,9 @@ public class TestHugePq {
|
|||||||
this.queue = new HugePqPriorityQueue<>(env, new HugePqCodec<Integer>() {
|
this.queue = new HugePqPriorityQueue<>(env, new HugePqCodec<Integer>() {
|
||||||
@Override
|
@Override
|
||||||
public Buffer serialize(Function<Integer, Buffer> allocator, Integer data) {
|
public Buffer serialize(Function<Integer, Buffer> allocator, Integer data) {
|
||||||
return HugePqCodec.setLexInt(allocator.apply(Integer.BYTES), 0, false, data);
|
var buf = allocator.apply(Integer.BYTES);
|
||||||
|
HugePqCodec.setLexInt(buf, 0, false, data);
|
||||||
|
return buf.writerOffset(Integer.BYTES);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -45,6 +45,7 @@ import java.io.IOException;
|
|||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
|
import java.util.LinkedHashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
@ -85,7 +86,7 @@ public class TestLuceneSearches {
|
|||||||
// Start the pool by creating and deleting a direct buffer
|
// Start the pool by creating and deleting a direct buffer
|
||||||
PooledByteBufAllocator.DEFAULT.directBuffer().release();
|
PooledByteBufAllocator.DEFAULT.directBuffer().release();
|
||||||
|
|
||||||
var modifiableElements = new HashMap<String, String>();
|
var modifiableElements = new LinkedHashMap<String, String>();
|
||||||
modifiableElements.put("test-key-1", "0123456789");
|
modifiableElements.put("test-key-1", "0123456789");
|
||||||
modifiableElements.put("test-key-2", "test 0123456789 test word");
|
modifiableElements.put("test-key-2", "test 0123456789 test word");
|
||||||
modifiableElements.put("test-key-3", "0123456789 test example string");
|
modifiableElements.put("test-key-3", "0123456789 test example string");
|
||||||
@ -123,7 +124,7 @@ public class TestLuceneSearches {
|
|||||||
|
|
||||||
Flux
|
Flux
|
||||||
.fromIterable(ELEMENTS.entrySet())
|
.fromIterable(ELEMENTS.entrySet())
|
||||||
.flatMap(entry -> index.updateDocument(entry.getKey(), entry.getValue()))
|
.concatMap(entry -> index.updateDocument(entry.getKey(), entry.getValue()))
|
||||||
.subscribeOn(Schedulers.boundedElastic())
|
.subscribeOn(Schedulers.boundedElastic())
|
||||||
.blockLast();
|
.blockLast();
|
||||||
tempDb.swappableLuceneSearcher().setSingle(new CountMultiSearcher());
|
tempDb.swappableLuceneSearcher().setSingle(new CountMultiSearcher());
|
||||||
@ -279,7 +280,7 @@ public class TestLuceneSearches {
|
|||||||
var officialQuery = queryParamsBuilder.limit(ELEMENTS.size() * 2L).build();
|
var officialQuery = queryParamsBuilder.limit(ELEMENTS.size() * 2L).build();
|
||||||
try (var officialResults = run(luceneIndex.search(officialQuery))) {
|
try (var officialResults = run(luceneIndex.search(officialQuery))) {
|
||||||
var officialHits = officialResults.totalHitsCount();
|
var officialHits = officialResults.totalHitsCount();
|
||||||
var officialKeys = getResults(officialResults).stream().toList();
|
var officialKeys = getResults(officialResults);
|
||||||
if (officialHits.exact()) {
|
if (officialHits.exact()) {
|
||||||
Assertions.assertEquals(officialKeys.size(), officialHits.value());
|
Assertions.assertEquals(officialKeys.size(), officialHits.value());
|
||||||
} else {
|
} else {
|
||||||
|
Loading…
Reference in New Issue
Block a user