Fix lucene hugepq searcher

This commit is contained in:
Andrea Cavalli 2022-04-06 14:25:53 +02:00
parent dc69bf8e25
commit 6c6263e1d0
10 changed files with 82 additions and 28 deletions

View File

@ -6,14 +6,21 @@ import it.cavallium.dbengine.database.SafeCloseable;
import it.cavallium.dbengine.database.disk.LLTempHugePqEnv;
import it.cavallium.dbengine.database.disk.HugePqEnv;
import it.cavallium.dbengine.database.disk.StandardRocksDBColumn;
import it.cavallium.dbengine.database.disk.UnreleasableReadOptions;
import it.cavallium.dbengine.database.disk.UnreleasableWriteOptions;
import java.util.concurrent.atomic.AtomicBoolean;
import org.jetbrains.annotations.Nullable;
import org.rocksdb.FlushOptions;
import org.rocksdb.ReadOptions;
import org.rocksdb.RocksDB;
import org.rocksdb.RocksDBException;
import org.rocksdb.WriteOptions;
public class HugePqArray<V> implements IArray<V>, SafeCloseable {
static {
RocksDB.loadLibrary();
}
private final AtomicBoolean closed = new AtomicBoolean();
private final HugePqCodec<V> valueCodec;
@ -21,8 +28,11 @@ public class HugePqArray<V> implements IArray<V>, SafeCloseable {
private final HugePqEnv env;
private final int hugePqId;
private final StandardRocksDBColumn rocksDB;
private WriteOptions writeOptions;
private ReadOptions readOptions;
private static final UnreleasableWriteOptions writeOptions = new UnreleasableWriteOptions(new WriteOptions()
.setDisableWAL(true)
.setSync(false));
private static final UnreleasableReadOptions readOptions = new UnreleasableReadOptions(new ReadOptions()
.setVerifyChecksums(false));
private final V defaultValue;
private final long virtualSize;
@ -33,8 +43,6 @@ public class HugePqArray<V> implements IArray<V>, SafeCloseable {
this.env = env.getEnv();
this.hugePqId = env.allocateDb(null);
this.rocksDB = this.env.openDb(hugePqId);
this.writeOptions = new WriteOptions().setDisableWAL(true).setSync(false);
this.readOptions = new ReadOptions().setVerifyChecksums(false);
this.defaultValue = defaultValue;
this.virtualSize = size;
@ -57,8 +65,8 @@ public class HugePqArray<V> implements IArray<V>, SafeCloseable {
ensureBounds(index);
ensureThread();
var keyBuf = allocate(Long.BYTES);
keyBuf.writeLong(index);
try (var valueBuf = valueCodec.serialize(this::allocate, value); keyBuf) {
keyBuf.writeLong(index);
rocksDB.put(writeOptions, keyBuf, valueBuf);
} catch (RocksDBException e) {
throw new IllegalStateException(e);
@ -85,7 +93,11 @@ public class HugePqArray<V> implements IArray<V>, SafeCloseable {
var keyBuf = allocate(Long.BYTES);
try (keyBuf) {
keyBuf.writeLong(index);
try (var value = rocksDB.get(readOptions, keyBuf)) {
if (value == null) {
return null;
}
return valueCodec.deserialize(value);
}
} catch (RocksDBException e) {
@ -105,17 +117,22 @@ public class HugePqArray<V> implements IArray<V>, SafeCloseable {
@Override
public void close() {
readOptions.close();
writeOptions.close();
if (closed.compareAndSet(false, true)) {
ensureThread();
this.tempEnv.freeDb(hugePqId);
}
}
@Override
public String toString() {
return "huge_pq_array[" + virtualSize + "]";
}
public Object[] toArray() {
var result = new Object[Math.toIntExact(virtualSize)];
for (int i = 0; i < virtualSize; i++) {
result[i] = get(i);
}
return result;
}
}

View File

@ -43,7 +43,7 @@ public class HugePqPriorityQueue<T> implements PriorityQueue<T>, Reversable<Reve
.setDisableWAL(true)
.setSync(false));
private static final UnreleasableReadOptions readOptions = new UnreleasableReadOptions(new ReadOptions()
.setVerifyChecksums(false).setTotalOrderSeek(true));
.setVerifyChecksums(false));
private final HugePqCodec<T> codec;
private long size = 0;

View File

@ -42,7 +42,7 @@ public class LLSlotDocCodec implements HugePqCodec<LLSlotDoc>, FieldValueHitQueu
reverseMul[i] = field.getReverse() ? -1 : 1;
comparators[i] = HugePqComparator.getComparator(env, field, numHits, i);
}
comparator = new AbstractComparator(new ComparatorOptions()) {
comparator = new AbstractComparator(new ComparatorOptions().setMaxReusedBufferSize(0)) {
@Override
public String name() {
return "slot-doc-codec-comparator";
@ -51,11 +51,22 @@ public class LLSlotDocCodec implements HugePqCodec<LLSlotDoc>, FieldValueHitQueu
@Override
public int compare(ByteBuffer hitA, ByteBuffer hitB) {
assert hitA != hitB;
assert getSlot(hitA) != getSlot(hitB);
hitA.position(hitA.position() + Float.BYTES);
hitB.position(hitB.position() + Float.BYTES);
var docA = readDoc(hitA);
var docB = readDoc(hitB);
if (docA == docB) {
return 0;
}
hitA.position(hitA.position() + Integer.BYTES);
hitB.position(hitB.position() + Integer.BYTES);
var slotA = readSlot(hitA);
var slotB = readSlot(hitB);
assert slotA != slotB : "Slot " + slotA + " is equal to slot " + slotB;
int numComparators = comparators.length;
for (int i = 0; i < numComparators; ++i) {
final int c = reverseMul[i] * comparators[i].compare(getSlot(hitA), getSlot(hitB));
final int c = reverseMul[i] * comparators[i].compare(slotA, slotB);
if (c != 0) {
// Short circuit
return -c;
@ -63,7 +74,7 @@ public class LLSlotDocCodec implements HugePqCodec<LLSlotDoc>, FieldValueHitQueu
}
// avoid random sort order that could lead to duplicates (bug #31241):
return Integer.compare(getDoc(hitB), getDoc(hitA));
return Integer.compare(docB, docA);
}
};
}
@ -71,11 +82,11 @@ public class LLSlotDocCodec implements HugePqCodec<LLSlotDoc>, FieldValueHitQueu
@Override
public Buffer serialize(Function<Integer, Buffer> allocator, LLSlotDoc data) {
var buf = allocator.apply(Float.BYTES + Integer.BYTES + Integer.BYTES + Integer.BYTES);
buf.writerOffset(Float.BYTES + Integer.BYTES + Integer.BYTES + Integer.BYTES);
setScore(buf, data.score());
setDoc(buf, data.doc());
setShardIndex(buf, data.shardIndex());
setSlot(buf, data.slot());
buf.writerOffset(Float.BYTES + Integer.BYTES + Integer.BYTES + Integer.BYTES);
return buf;
}
@ -97,8 +108,8 @@ public class LLSlotDocCodec implements HugePqCodec<LLSlotDoc>, FieldValueHitQueu
return hit.getInt(Float.BYTES);
}
private static int getDoc(ByteBuffer hit) {
return hit.getInt(Float.BYTES);
private static int readDoc(ByteBuffer hit) {
return hit.getInt();
}
private static int getShardIndex(Buffer hit) {
@ -109,8 +120,8 @@ public class LLSlotDocCodec implements HugePqCodec<LLSlotDoc>, FieldValueHitQueu
return hit.getInt(Float.BYTES + Integer.BYTES + Integer.BYTES);
}
private static int getSlot(ByteBuffer hit) {
return hit.getInt(Float.BYTES + Integer.BYTES + Integer.BYTES);
private static int readSlot(ByteBuffer hit) {
return hit.getInt();
}
private static void setScore(Buffer hit, float score) {

View File

@ -46,7 +46,11 @@ public class DoubleComparator extends NumericComparator<Double> implements SafeC
@Override
public int compare(int slot1, int slot2) {
return Double.compare(values.getOrDefault(slot1, 0d), values.getOrDefault(slot2, 0d));
var value1 = values.get(slot1);
var value2 = values.get(slot2);
assert value1 != null : "Missing value for slot1: " + slot1;
assert value2 != null : "Missing value for slot2: " + slot2;
return Double.compare(value1, value2);
}
@Override

View File

@ -46,7 +46,11 @@ public class FloatComparator extends NumericComparator<Float> implements SafeClo
@Override
public int compare(int slot1, int slot2) {
return Float.compare(values.getOrDefault(slot1, 0f), values.getOrDefault(slot2, 0f));
var value1 = values.get(slot1);
var value2 = values.get(slot2);
assert value1 != null : "Missing value for slot1: " + slot1;
assert value2 != null : "Missing value for slot2: " + slot2;
return Float.compare(value1, value2);
}
@Override

View File

@ -46,7 +46,11 @@ public class IntComparator extends NumericComparator<Integer> implements SafeClo
@Override
public int compare(int slot1, int slot2) {
return Integer.compare(values.getOrDefault(slot1, 0), values.getOrDefault(slot2, 0));
var value1 = values.get(slot1);
var value2 = values.get(slot2);
assert value1 != null : "Missing value for slot1: " + slot1;
assert value2 != null : "Missing value for slot2: " + slot2;
return Integer.compare(value1, value2);
}
@Override

View File

@ -17,12 +17,15 @@
package it.cavallium.dbengine.lucene.comparators;
import static java.util.Objects.requireNonNull;
import it.cavallium.dbengine.database.SafeCloseable;
import it.cavallium.dbengine.database.disk.LLTempHugePqEnv;
import it.cavallium.dbengine.lucene.IArray;
import it.cavallium.dbengine.lucene.HugePqArray;
import it.cavallium.dbengine.lucene.IArray;
import it.cavallium.dbengine.lucene.LongCodec;
import java.io.IOException;
import java.util.Objects;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.LeafFieldComparator;
@ -46,7 +49,11 @@ public class LongComparator extends NumericComparator<Long> implements SafeClose
@Override
public int compare(int slot1, int slot2) {
return Long.compare(values.getOrDefault(slot1, 0L), values.getOrDefault(slot2, 0L));
var value1 = values.get(slot1);
var value2 = values.get(slot2);
assert value1 != null : "Missing value for slot1: " + slot1;
assert value2 != null : "Missing value for slot2: " + slot2;
return Long.compare(value1, value2);
}
@Override

View File

@ -51,7 +51,11 @@ public final class RelevanceComparator extends FieldComparator<Float> implements
@Override
public int compare(int slot1, int slot2) {
return Float.compare(scores.getOrDefault(slot2, 0f), scores.getOrDefault(slot1, 0f));
var value1 = scores.get(slot1);
var value2 = scores.get(slot2);
assert value1 != null : "Missing score for slot1: " + slot1;
assert value2 != null : "Missing score for slot2: " + slot2;
return Float.compare(value1, value2);
}
@Override

View File

@ -26,7 +26,9 @@ public class TestHugePq {
this.queue = new HugePqPriorityQueue<>(env, new HugePqCodec<Integer>() {
@Override
public Buffer serialize(Function<Integer, Buffer> allocator, Integer data) {
return HugePqCodec.setLexInt(allocator.apply(Integer.BYTES), 0, false, data);
var buf = allocator.apply(Integer.BYTES);
HugePqCodec.setLexInt(buf, 0, false, data);
return buf.writerOffset(Integer.BYTES);
}
@Override

View File

@ -45,6 +45,7 @@ import java.io.IOException;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
@ -85,7 +86,7 @@ public class TestLuceneSearches {
// Start the pool by creating and deleting a direct buffer
PooledByteBufAllocator.DEFAULT.directBuffer().release();
var modifiableElements = new HashMap<String, String>();
var modifiableElements = new LinkedHashMap<String, String>();
modifiableElements.put("test-key-1", "0123456789");
modifiableElements.put("test-key-2", "test 0123456789 test word");
modifiableElements.put("test-key-3", "0123456789 test example string");
@ -123,7 +124,7 @@ public class TestLuceneSearches {
Flux
.fromIterable(ELEMENTS.entrySet())
.flatMap(entry -> index.updateDocument(entry.getKey(), entry.getValue()))
.concatMap(entry -> index.updateDocument(entry.getKey(), entry.getValue()))
.subscribeOn(Schedulers.boundedElastic())
.blockLast();
tempDb.swappableLuceneSearcher().setSingle(new CountMultiSearcher());
@ -279,7 +280,7 @@ public class TestLuceneSearches {
var officialQuery = queryParamsBuilder.limit(ELEMENTS.size() * 2L).build();
try (var officialResults = run(luceneIndex.search(officialQuery))) {
var officialHits = officialResults.totalHitsCount();
var officialKeys = getResults(officialResults).stream().toList();
var officialKeys = getResults(officialResults);
if (officialHits.exact()) {
Assertions.assertEquals(officialKeys.size(), officialHits.value());
} else {