Implement some sort codecs

This commit is contained in:
Andrea Cavalli 2021-10-15 22:03:53 +02:00
parent 17a9b49755
commit e6c0f14fda
45 changed files with 2613 additions and 140 deletions

4
.gitignore vendored
View File

@ -84,7 +84,8 @@ $RECYCLE.BIN/
.LSOverride
# Icon must end with two \r
Icon
Icon
# Thumbnails
._*
@ -179,3 +180,4 @@ fabric.properties
.idea/caches/build_file_checksums.ser
dbengine.iml
/.idea/

View File

@ -0,0 +1,23 @@
package it.cavallium.dbengine.lucene;
import io.net5.buffer.ByteBuf;
import java.util.function.Function;
public class ByteArrayCodec implements LMDBCodec<byte[]> {
@Override
public ByteBuf serialize(Function<Integer, ByteBuf> allocator, byte[] data) {
var buf = allocator.apply(data.length + Integer.BYTES);
buf.writeInt(data.length);
buf.writeBytes(data);
return buf;
}
@Override
public byte[] deserialize(ByteBuf b) {
var length = b.readInt();
byte[] data = new byte[length];
b.readBytes(data);
return data;
}
}

View File

@ -0,0 +1,24 @@
package it.cavallium.dbengine.lucene;
import io.net5.buffer.ByteBuf;
import java.util.function.Function;
import org.apache.lucene.util.BytesRef;
public class BytesRefCodec implements LMDBCodec<BytesRef> {
@Override
public ByteBuf serialize(Function<Integer, ByteBuf> allocator, BytesRef data) {
var buf = allocator.apply(data.length + Integer.BYTES);
buf.writeInt(data.length);
buf.writeBytes(data.bytes, data.offset, data.length);
return buf;
}
@Override
public BytesRef deserialize(ByteBuf b) {
var length = b.readInt();
var bytes = new byte[length];
b.readBytes(bytes, 0, length);
return new BytesRef(bytes, 0, length);
}
}

View File

@ -0,0 +1,17 @@
package it.cavallium.dbengine.lucene;
import io.net5.buffer.ByteBuf;
import java.util.function.Function;
public class DoubleCodec implements LMDBCodec<Double> {
@Override
public ByteBuf serialize(Function<Integer, ByteBuf> allocator, Double data) {
return allocator.apply(Double.BYTES).writeDouble(data);
}
@Override
public Double deserialize(ByteBuf b) {
return b.readDouble();
}
}

View File

@ -0,0 +1,17 @@
package it.cavallium.dbengine.lucene;
import io.net5.buffer.ByteBuf;
import java.util.function.Function;
public class FloatCodec implements LMDBCodec<Float> {
@Override
public ByteBuf serialize(Function<Integer, ByteBuf> allocator, Float data) {
return allocator.apply(Float.BYTES).writeFloat(data);
}
@Override
public Float deserialize(ByteBuf b) {
return b.readFloat();
}
}

View File

@ -3,6 +3,7 @@ package it.cavallium.dbengine.lucene;
import static it.cavallium.dbengine.lucene.LLDocElementScoreComparator.SCORE_DOC_SCORE_ELEM_COMPARATOR;
import static org.apache.lucene.search.TotalHits.Relation.*;
import it.cavallium.dbengine.lucene.collector.FullFieldDocs;
import java.util.Comparator;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.Sort;
@ -29,7 +30,7 @@ public interface FullDocs<T extends LLDoc> extends ResourceIterable<T> {
static <T extends LLDoc> FullDocs<T> merge(@Nullable Sort sort, FullDocs<T>[] fullDocs) {
ResourceIterable<T> mergedIterable = mergeResourceIterable(sort, fullDocs);
TotalHits mergedTotalHits = mergeTotalHits(fullDocs);
return new FullDocs<>() {
FullDocs<T> docs = new FullDocs<>() {
@Override
public Flux<T> iterate() {
return mergedIterable.iterate();
@ -45,6 +46,11 @@ public interface FullDocs<T extends LLDoc> extends ResourceIterable<T> {
return mergedTotalHits;
}
};
if (sort != null) {
return new FullFieldDocs<>(docs, sort.getSort());
} else {
return docs;
}
}
static <T extends LLDoc> int tieBreakCompare(
@ -119,6 +125,12 @@ public interface FullDocs<T extends LLDoc> extends ResourceIterable<T> {
if (shard instanceof LLScoreDoc scoreDoc) {
//noinspection unchecked
return (T) new LLScoreDoc(scoreDoc.doc(), scoreDoc.score(), shardIndex);
} else if (shard instanceof LLFieldDoc fieldDoc) {
//noinspection unchecked
return (T) new LLFieldDoc(fieldDoc.doc(), fieldDoc.score(), shardIndex, fieldDoc.fields());
} else if (shard instanceof LLSlotDoc slotDoc) {
//noinspection unchecked
return (T) new LLSlotDoc(slotDoc.doc(), slotDoc.score(), shardIndex, slotDoc.slot());
} else {
throw new UnsupportedOperationException("Unsupported type " + shard.getClass());
}

View File

@ -0,0 +1,20 @@
package it.cavallium.dbengine.lucene;
import java.util.Objects;
import org.jetbrains.annotations.Nullable;
public interface IArray<T> {
@Nullable T get(long index);
void set(long index, @Nullable T value);
void reset(long index);
long size();
default T getOrDefault(int slot, T defaultValue) {
return Objects.requireNonNullElse(get(slot), defaultValue);
}
}

View File

@ -0,0 +1,17 @@
package it.cavallium.dbengine.lucene;
import io.net5.buffer.ByteBuf;
import java.util.function.Function;
public class IntCodec implements LMDBCodec<Integer> {
@Override
public ByteBuf serialize(Function<Integer, ByteBuf> allocator, Integer data) {
return allocator.apply(Integer.BYTES).writeInt(data);
}
@Override
public Integer deserialize(ByteBuf b) {
return b.readInt();
}
}

View File

@ -3,6 +3,7 @@ package it.cavallium.dbengine.lucene;
import java.util.List;
import java.util.Objects;
import java.util.stream.Collectors;
import org.apache.lucene.search.FieldDoc;
public record LLFieldDoc(int doc, float score, int shardIndex, List<Object> fields) implements LLDoc {
@ -11,4 +12,8 @@ public record LLFieldDoc(int doc, float score, int shardIndex, List<Object> fiel
return "doc=" + doc + " score=" + score + " shardIndex=" + shardIndex + " fields="+ fields.stream()
.map(Objects::toString).collect(Collectors.joining(",", "[", "]"));
}
public FieldDoc toFieldDoc() {
return new FieldDoc(doc, score, fields.toArray(Object[]::new), shardIndex);
}
}

View File

@ -11,7 +11,7 @@ public class LLScoreDocCodec implements LMDBSortedCodec<LLScoreDoc> {
setScore(buf, data.score());
setDoc(buf, data.doc());
setShardIndex(buf, data.shardIndex());
buf.writerIndex(Float.BYTES + Integer.BYTES + Integer.BYTES + Integer.BYTES);
buf.writerIndex(Float.BYTES + Integer.BYTES + Integer.BYTES);
return buf.asReadOnly();
}

View File

@ -8,8 +8,6 @@ import java.util.List;
import java.util.function.Function;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.FieldValueHitQueue.Entry;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LeafFieldComparator;
import org.apache.lucene.search.Query;
@ -23,7 +21,7 @@ public class LLSlotDocCodec implements LMDBSortedCodec<LLSlotDoc>, FieldValueHit
protected final FieldComparator<?>[] comparators;
protected final int[] reverseMul;
public LLSlotDocCodec(LLTempLMDBEnv env, SortField[] fields) {
public LLSlotDocCodec(LLTempLMDBEnv env, int numHits, SortField[] fields) {
// When we get here, fields.length is guaranteed to be > 0, therefore no
// need to check it again.
@ -37,7 +35,7 @@ public class LLSlotDocCodec implements LMDBSortedCodec<LLSlotDoc>, FieldValueHit
for (int i = 0; i < numComparators; ++i) {
SortField field = fields[i];
reverseMul[i] = field.getReverse() ? -1 : 1;
comparators[i] = LMDBComparator.getComparator(env, field, i);
comparators[i] = LMDBComparator.getComparator(env, field, numHits, i);
}
}
@ -48,7 +46,7 @@ public class LLSlotDocCodec implements LMDBSortedCodec<LLSlotDoc>, FieldValueHit
setDoc(buf, data.doc());
setShardIndex(buf, data.shardIndex());
setSlot(buf, data.slot());
buf.writerIndex(Float.BYTES + Integer.BYTES + Integer.BYTES + Integer.BYTES + Integer.BYTES);
buf.writerIndex(Float.BYTES + Integer.BYTES + Integer.BYTES + Integer.BYTES);
return buf.asReadOnly();
}
@ -59,10 +57,6 @@ public class LLSlotDocCodec implements LMDBSortedCodec<LLSlotDoc>, FieldValueHit
@Override
public int compare(LLSlotDoc hitA, LLSlotDoc hitB) {
assert hitA != hitB;
assert hitA.slot() != hitB.slot();
int numComparators = comparators.length;
for (int i = 0; i < numComparators; ++i) {
final int c = reverseMul[i] * comparators[i].compare(hitA.slot(), hitB.slot());

View File

@ -0,0 +1,234 @@
package it.cavallium.dbengine.lucene;
import static org.lmdbjava.DbiFlags.MDB_CREATE;
import io.net5.buffer.ByteBuf;
import io.net5.buffer.PooledByteBufAllocator;
import it.cavallium.dbengine.database.LLUtils;
import it.cavallium.dbengine.database.disk.LLTempLMDBEnv;
import java.io.Closeable;
import java.io.IOException;
import java.util.Iterator;
import java.util.Objects;
import java.util.StringJoiner;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import org.lmdbjava.CursorIterable;
import org.lmdbjava.CursorIterable.KeyVal;
import org.lmdbjava.Dbi;
import org.lmdbjava.Env;
import org.lmdbjava.GetOp;
import org.lmdbjava.Txn;
import reactor.core.publisher.Flux;
import reactor.core.scheduler.Scheduler;
import reactor.util.function.Tuple2;
import reactor.util.function.Tuple3;
import reactor.util.function.Tuples;
public class LMDBArray<V> implements IArray<V>, Closeable {
private static final boolean FORCE_SYNC = false;
private static final boolean FORCE_THREAD_LOCAL = true;
private static final AtomicLong NEXT_LMDB_ARRAY_ID = new AtomicLong(0);
private final AtomicBoolean closed = new AtomicBoolean();
private final Runnable onClose;
private final LMDBCodec<V> valueCodec;
private final Env<ByteBuf> env;
private final Dbi<ByteBuf> lmdb;
private final V defaultValue;
private boolean writing;
private Txn<ByteBuf> readTxn;
private Txn<ByteBuf> rwTxn;
private long allocatedSize = 0;
private final long virtualSize;
public LMDBArray(LLTempLMDBEnv env, LMDBCodec<V> codec, long size, @Nullable V defaultValue) {
this.onClose = env::decrementRef;
var name = "$array_" + NEXT_LMDB_ARRAY_ID.getAndIncrement();
this.valueCodec = codec;
this.env = env.getEnvAndIncrementRef();
this.lmdb = this.env.openDbi(name, MDB_CREATE);
this.defaultValue = defaultValue;
this.writing = true;
if (FORCE_THREAD_LOCAL) {
this.rwTxn = null;
} else {
this.rwTxn = this.env.txnWrite();
}
this.readTxn = null;
this.virtualSize = size;
}
public LMDBCodec<V> getValueCodec() {
return valueCodec;
}
private ByteBuf allocate(int size) {
return PooledByteBufAllocator.DEFAULT.directBuffer(size, size);
}
private void switchToMode(boolean write) {
if (write) {
if (!writing) {
writing = true;
readTxn.close();
readTxn = null;
assert rwTxn == null;
rwTxn = env.txnWrite();
} else if (rwTxn == null) {
assert readTxn == null;
rwTxn = env.txnWrite();
}
} else {
if (writing) {
writing = false;
if (rwTxn != null) {
rwTxn.commit();
rwTxn.close();
rwTxn = null;
}
if (FORCE_SYNC) {
env.sync(true);
}
assert rwTxn == null;
assert readTxn == null;
readTxn = env.txnRead();
}
}
}
private void endMode() {
if (FORCE_THREAD_LOCAL) {
writing = true;
if (readTxn != null) {
readTxn.commit();
readTxn.close();
readTxn = null;
}
if (rwTxn != null) {
rwTxn.commit();
rwTxn.close();
rwTxn = null;
}
}
assert rwTxn == null;
assert readTxn == null;
}
private static void ensureThread() {
LLUtils.ensureBlocking();
}
private static void ensureItThread() {
ensureThread();
//if (!(Thread.currentThread() instanceof LMDBThread)) {
// throw new IllegalStateException("Must run in LMDB scheduler");
//}
}
@Override
public void set(long index, @Nullable V value) {
ensureBounds(index);
ensureThread();
switchToMode(true);
var keyBuf = allocate(Long.BYTES);
var valueBuf = valueCodec.serialize(this::allocate, value);
keyBuf.writeLong(index);
try {
if (lmdb.put(rwTxn, keyBuf, valueBuf)) {
allocatedSize++;
}
} finally {
endMode();
keyBuf.release();
valueBuf.release();
}
}
@Override
public void reset(long index) {
ensureBounds(index);
ensureThread();
switchToMode(true);
var keyBuf = allocate(Long.BYTES);
keyBuf.writeLong(index);
try {
if (lmdb.delete(rwTxn, keyBuf)) {
allocatedSize--;
}
} finally {
endMode();
keyBuf.release();
}
}
@Override
public @Nullable V get(long index) {
ensureBounds(index);
ensureThread();
switchToMode(false);
var keyBuf = allocate(Long.BYTES);
keyBuf.writeLong(index);
try {
var value = lmdb.get(readTxn, keyBuf);
if (value != null) {
return valueCodec.deserialize(value);
} else {
return defaultValue;
}
} finally {
endMode();
keyBuf.release();
}
}
private void ensureBounds(long index) {
if (index < 0 || index >= virtualSize) throw new IndexOutOfBoundsException();
}
@Override
public long size() {
ensureThread();
return virtualSize;
}
public long allocatedSize() {
return allocatedSize;
}
@Override
public void close() throws IOException {
if (closed.compareAndSet(false, true)) {
try {
ensureThread();
if (rwTxn != null) {
rwTxn.close();
}
if (readTxn != null) {
readTxn.close();
}
try (var txn = env.txnWrite()) {
lmdb.drop(txn, true);
txn.commit();
}
lmdb.close();
} finally {
onClose.run();
}
}
}
@Override
public String toString() {
return "lmdb_array[" + virtualSize + " (allocated=" + allocatedSize + ")]";
}
}

View File

@ -1,13 +1,119 @@
package it.cavallium.dbengine.lucene;
import static org.apache.lucene.search.SortField.STRING_LAST;
import it.cavallium.dbengine.database.disk.LLTempLMDBEnv;
import java.util.Comparator;
import it.cavallium.dbengine.lucene.comparators.DocComparator;
import it.cavallium.dbengine.lucene.comparators.DoubleComparator;
import it.cavallium.dbengine.lucene.comparators.FloatComparator;
import it.cavallium.dbengine.lucene.comparators.IntComparator;
import it.cavallium.dbengine.lucene.comparators.LongComparator;
import it.cavallium.dbengine.lucene.comparators.RelevanceComparator;
import it.cavallium.dbengine.lucene.comparators.TermOrdValComparator;
import java.io.IOException;
import org.apache.commons.lang3.NotImplementedException;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.LeafFieldComparator;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.SortedNumericSelector;
import org.apache.lucene.search.SortedNumericSortField;
public class LMDBComparator {
public static FieldComparator<?> getComparator(LLTempLMDBEnv env, SortField field, int sortPos) {
throw new UnsupportedOperationException("not implemented");
public static FieldComparator<?> getComparator(LLTempLMDBEnv env, SortField sortField,
int numHits, int sortPos) {
var sortFieldClass = sortField.getClass();
if (sortFieldClass == org.apache.lucene.search.SortedNumericSortField.class) {
var nf = (org.apache.lucene.search.SortedNumericSortField) sortField;
var type = nf.getNumericType();
var missingValue = nf.getMissingValue();
var reverse = nf.getReverse();
var selector = nf.getSelector();
final FieldComparator<?> fieldComparator = switch (type) {
case INT -> new IntComparator(env, numHits, nf.getField(), (Integer) missingValue, reverse, sortPos) {
@Override
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
return new IntLeafComparator(context) {
@Override
protected NumericDocValues getNumericDocValues(LeafReaderContext context, String field)
throws IOException {
return SortedNumericSelector.wrap(DocValues.getSortedNumeric(context.reader(), field), selector, type);
}
};
}
};
case FLOAT -> new FloatComparator(env, numHits, nf.getField(), (Float) missingValue, reverse, sortPos) {
@Override
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
return new FloatLeafComparator(context) {
@Override
protected NumericDocValues getNumericDocValues(LeafReaderContext context, String field)
throws IOException {
return SortedNumericSelector.wrap(DocValues.getSortedNumeric(context.reader(), field), selector, type);
}
};
}
};
case LONG -> new LongComparator(env, numHits, nf.getField(), (Long) missingValue, reverse, sortPos) {
@Override
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
return new LongLeafComparator(context) {
@Override
protected NumericDocValues getNumericDocValues(LeafReaderContext context, String field)
throws IOException {
return SortedNumericSelector.wrap(DocValues.getSortedNumeric(context.reader(), field), selector, type);
}
};
}
};
case DOUBLE -> new DoubleComparator(env, numHits, nf.getField(), (Double) missingValue, reverse, sortPos) {
@Override
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
return new DoubleLeafComparator(context) {
@Override
protected NumericDocValues getNumericDocValues(LeafReaderContext context, String field)
throws IOException {
return SortedNumericSelector.wrap(DocValues.getSortedNumeric(context.reader(), field), selector, type);
}
};
}
};
case CUSTOM, DOC, REWRITEABLE, STRING_VAL, SCORE, STRING -> throw new AssertionError();
};
if (!nf.getOptimizeSortWithPoints()) {
fieldComparator.disableSkipping();
}
return fieldComparator;
} else if (sortFieldClass == SortField.class) {
var missingValue = sortField.getMissingValue();
var reverse = sortField.getReverse();
var field = sortField.getField();
var comparatorSource = sortField.getComparatorSource();
return switch (sortField.getType()) {
case SCORE -> new RelevanceComparator(env, numHits);
case DOC -> new DocComparator(env, numHits, reverse, sortPos);
case INT -> new IntComparator(env, numHits, field, (Integer) missingValue,
reverse, sortPos);
case FLOAT -> new FloatComparator(env, numHits, field, (Float) missingValue,
reverse, sortPos);
case LONG -> new LongComparator(env, numHits, field, (Long) missingValue,
reverse, sortPos);
case DOUBLE -> new DoubleComparator(env, numHits, field, (Double) missingValue,
reverse, sortPos);
case CUSTOM -> {
assert comparatorSource != null;
yield comparatorSource.newComparator(field, numHits, sortPos, reverse);
}
case STRING -> new TermOrdValComparator(env, numHits, field, missingValue == STRING_LAST);
case STRING_VAL -> throw new NotImplementedException("String val sort field not implemented");
case REWRITEABLE -> throw new IllegalStateException(
"SortField needs to be rewritten through Sort.rewrite(..) and SortField.rewrite(..)");
};
} else {
throw new NotImplementedException("SortField type not implemented: " + sortFieldClass.getName());
}
}
}

View File

@ -9,10 +9,12 @@ import it.cavallium.dbengine.database.disk.LLTempLMDBEnv;
import java.io.IOException;
import java.util.Iterator;
import java.util.Objects;
import java.util.Optional;
import java.util.StringJoiner;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import org.lmdbjava.Cursor;
import org.lmdbjava.CursorIterable;
import org.lmdbjava.CursorIterable.KeyVal;
@ -27,7 +29,7 @@ import reactor.util.function.Tuple2;
import reactor.util.function.Tuple3;
import reactor.util.function.Tuples;
public class LMDBPriorityQueue<T> implements PriorityQueue<T> {
public class LMDBPriorityQueue<T> implements PriorityQueue<T>, Reversable<ReversableResourceIterable<T>>, ReversableResourceIterable<T> {
private static final boolean FORCE_SYNC = false;
private static final boolean FORCE_THREAD_LOCAL = true;
@ -40,8 +42,6 @@ public class LMDBPriorityQueue<T> implements PriorityQueue<T> {
private final LMDBSortedCodec<T> codec;
private final Env<ByteBuf> env;
private final Dbi<ByteBuf> lmdb;
private final Scheduler scheduler = Schedulers.newBoundedElastic(1,
Schedulers.DEFAULT_BOUNDED_ELASTIC_QUEUESIZE, LMDBThread::new, Integer.MAX_VALUE);
private boolean writing;
private boolean iterating;
@ -186,6 +186,8 @@ public class LMDBPriorityQueue<T> implements PriorityQueue<T> {
}
} finally {
endMode();
buf.release();
uid.release();
}
assert topSingleValid(element);
@ -309,6 +311,39 @@ public class LMDBPriorityQueue<T> implements PriorityQueue<T> {
}
}
public Flux<T> reverseIterate() {
return Flux
.generate(() -> {
ensureItThread();
switchToMode(false, true);
iterating = true;
return true;
}, (isLastKey, sink) -> {
try {
ensureItThread();
boolean found;
if (isLastKey) {
found = cur.last();
} else {
found = cur.prev();
}
if (found) {
sink.next(codec.deserialize(cur.key()));
} else {
sink.complete();
}
return false;
} catch (Throwable ex) {
sink.error(ex);
return false;
}
}, t -> {
ensureItThread();
iterating = false;
endMode();
});
}
@Override
public Flux<T> iterate() {
return Flux
@ -379,8 +414,54 @@ public class LMDBPriorityQueue<T> implements PriorityQueue<T> {
cit.close();
iterating = false;
endMode();
})
.subscribeOn(scheduler, false);
});
}
public Flux<T> reverseIterate(long skips) {
return Flux
.generate(() -> {
ensureItThread();
switchToMode(false, true);
iterating = true;
return true;
}, (isLastKey, sink) -> {
try {
ensureItThread();
boolean found;
if (isLastKey) {
found = cur.last();
} else {
found = cur.prev();
}
if (found) {
// Skip elements
if (isLastKey) {
long remainingSkips = skips;
while (remainingSkips > 0) {
if (cur.prev()) {
remainingSkips--;
} else {
sink.complete();
return false;
}
}
}
sink.next(codec.deserialize(cur.key()));
} else {
sink.complete();
}
return false;
} catch (Throwable ex) {
sink.error(ex);
return false;
}
}, t -> {
ensureItThread();
iterating = false;
endMode();
});
}
@Override
@ -406,11 +487,6 @@ public class LMDBPriorityQueue<T> implements PriorityQueue<T> {
onClose.run();
}
}
scheduler.dispose();
}
public Scheduler getScheduler() {
return scheduler;
}
@Override
@ -419,4 +495,24 @@ public class LMDBPriorityQueue<T> implements PriorityQueue<T> {
.add("size=" + size)
.toString();
}
@Override
public ReversableResourceIterable<T> reverse() {
return new ReversableResourceIterable<>() {
@Override
public Flux<T> iterate() {
return reverseIterate();
}
@Override
public Flux<T> iterate(long skips) {
return reverseIterate(skips);
}
@Override
public ReversableResourceIterable<T> reverse() {
return LMDBPriorityQueue.this;
}
};
}
}

View File

@ -0,0 +1,17 @@
package it.cavallium.dbengine.lucene;
import io.net5.buffer.ByteBuf;
import java.util.function.Function;
public class LongCodec implements LMDBCodec<Long> {
@Override
public ByteBuf serialize(Function<Integer, ByteBuf> allocator, Long data) {
return allocator.apply(Long.BYTES).writeLong(data);
}
@Override
public Long deserialize(ByteBuf b) {
return b.readLong();
}
}

View File

@ -342,8 +342,8 @@ public class LuceneUtils {
public static LocalQueryParams toLocalQueryParams(QueryParams queryParams) {
return new LocalQueryParams(QueryParser.toQuery(queryParams.query()),
safeLongToInt(queryParams.offset()),
safeLongToInt(queryParams.limit()),
queryParams.offset(),
queryParams.limit(),
DEFAULT_PAGE_LIMITS,
queryParams.minCompetitiveScore().getNullable(),
QueryParser.toSort(queryParams.sort()),
@ -457,6 +457,10 @@ public class LuceneUtils {
return complete ? Integer.MAX_VALUE : 1;
}
public static long totalHitsThresholdLong(boolean complete) {
return complete ? Long.MAX_VALUE : 1;
}
public static TotalHitsCount convertTotalHitsCount(TotalHits totalHits) {
return switch (totalHits.relation) {
case EQUAL_TO -> TotalHitsCount.of(totalHits.value, true);
@ -498,8 +502,8 @@ public class LuceneUtils {
mltDocumentFields.entrySet().removeIf(entry -> entry.getValue().isEmpty());
if (mltDocumentFields.isEmpty()) {
return new LocalQueryParams(new MatchNoDocsQuery(),
localQueryParams.offset(),
localQueryParams.limit(),
localQueryParams.offsetLong(),
localQueryParams.limitLong(),
DEFAULT_PAGE_LIMITS,
localQueryParams.minCompetitiveScore(),
localQueryParams.sort(),
@ -543,8 +547,8 @@ public class LuceneUtils {
}
return new LocalQueryParams(luceneQuery,
localQueryParams.offset(),
localQueryParams.limit(),
localQueryParams.offsetLong(),
localQueryParams.limitLong(),
DEFAULT_PAGE_LIMITS,
localQueryParams.minCompetitiveScore(),
localQueryParams.sort(),

View File

@ -0,0 +1,6 @@
package it.cavallium.dbengine.lucene;
public interface Reversable<T extends Reversable<T>> {
T reverse();
}

View File

@ -0,0 +1,3 @@
package it.cavallium.dbengine.lucene;
public interface ReversableResourceIterable<T> extends ResourceIterable<T>, Reversable<ReversableResourceIterable<T>> {}

View File

@ -0,0 +1,78 @@
package it.cavallium.dbengine.lucene;
import io.net5.buffer.ByteBuf;
import io.net5.buffer.PooledByteBufAllocator;
import java.io.IOException;
import java.util.function.Function;
import org.apache.lucene.search.SortField;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
public class SortFieldCodec implements LMDBCodec<SortField> {
@Override
public ByteBuf serialize(Function<Integer, ByteBuf> allocator, SortField data) {
var out = new ByteBufDataOutput();
try {
var provider = data.getIndexSorter().getProviderName();
out.writeString(provider);
SortField.Provider.forName(provider).writeSortField(data, out);
} catch (IOException e) {
throw new RuntimeException(e);
}
return out.buf;
}
@Override
public SortField deserialize(ByteBuf b) {
var in = new ByteBufDataInput(b);
try {
return SortField.Provider.forName(in.readString()).readSortField(in);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
private static class ByteBufDataOutput extends DataOutput {
private final ByteBuf buf;
public ByteBufDataOutput() {
this.buf = PooledByteBufAllocator.DEFAULT.directBuffer();
}
@Override
public void writeByte(byte b) {
buf.writeByte(b);
}
@Override
public void writeBytes(byte[] b, int offset, int length) throws IOException {
buf.writeBytes(b, offset, length);
}
}
private static class ByteBufDataInput extends DataInput {
private final ByteBuf buf;
public ByteBufDataInput(ByteBuf b) {
this.buf = b;
}
@Override
public byte readByte() {
return buf.readByte();
}
@Override
public void readBytes(byte[] b, int offset, int len) {
buf.readBytes(b, offset, len);
}
@Override
public void skipBytes(long numBytes) {
buf.skipBytes((int) numBytes);
}
}
}

View File

@ -21,6 +21,9 @@ import it.cavallium.dbengine.lucene.LLDoc;
import it.cavallium.dbengine.lucene.LazyFullDocs;
import it.cavallium.dbengine.lucene.PriorityQueue;
import it.cavallium.dbengine.lucene.ResourceIterable;
import it.cavallium.dbengine.lucene.Reversable;
import it.cavallium.dbengine.lucene.ReversableResourceIterable;
import java.io.Closeable;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TotalHits;
@ -34,7 +37,8 @@ import org.apache.lucene.search.TotalHits;
* #FullDocsCollector(PriorityQueue)}. In that case however, you might want to consider overriding
* all methods, in order to avoid a NullPointerException.
*/
public abstract class FullDocsCollector<INTERNAL extends LLDoc, EXTERNAL extends LLDoc> implements Collector, AutoCloseable {
public abstract class FullDocsCollector<PQ extends PriorityQueue<INTERNAL> & Reversable<ReversableResourceIterable<INTERNAL>>, INTERNAL extends LLDoc,
EXTERNAL extends LLDoc> implements Collector, AutoCloseable {
/**
* The priority queue which holds the top documents. Note that different implementations of
@ -42,7 +46,7 @@ public abstract class FullDocsCollector<INTERNAL extends LLDoc, EXTERNAL extends
* top scoring documents, while other PQ implementations may hold documents sorted by other
* criteria.
*/
protected final PriorityQueue<INTERNAL> pq;
protected final PQ pq;
/** The total number of documents that the collector encountered. */
protected int totalHits;
@ -50,7 +54,7 @@ public abstract class FullDocsCollector<INTERNAL extends LLDoc, EXTERNAL extends
/** Whether {@link #totalHits} is exact or a lower bound. */
protected TotalHits.Relation totalHitsRelation = TotalHits.Relation.EQUAL_TO;
protected FullDocsCollector(PriorityQueue<INTERNAL> pq) {
protected FullDocsCollector(PQ pq) {
this.pq = pq;
}
@ -61,7 +65,7 @@ public abstract class FullDocsCollector<INTERNAL extends LLDoc, EXTERNAL extends
/** Returns the top docs that were collected by this collector. */
public FullDocs<EXTERNAL> fullDocs() {
return new LazyFullDocs<>(mapResults(this.pq), new TotalHits(totalHits, totalHitsRelation));
return new LazyFullDocs<>(mapResults(this.pq.reverse()), new TotalHits(totalHits, totalHitsRelation));
}
public abstract ResourceIterable<EXTERNAL> mapResults(ResourceIterable<INTERNAL> it);

View File

@ -24,10 +24,10 @@ import org.apache.lucene.search.ScoreMode;
abstract class HitsThresholdChecker {
/** Implementation of HitsThresholdChecker which allows global hit counting */
private static class GlobalHitsThresholdChecker extends HitsThresholdChecker {
private final int totalHitsThreshold;
private final long totalHitsThreshold;
private final AtomicLong globalHitCount;
public GlobalHitsThresholdChecker(int totalHitsThreshold) {
public GlobalHitsThresholdChecker(long totalHitsThreshold) {
if (totalHitsThreshold < 0) {
throw new IllegalArgumentException(
@ -44,27 +44,38 @@ abstract class HitsThresholdChecker {
}
@Override
public boolean isThresholdReached() {
return globalHitCount.getAcquire() > totalHitsThreshold;
public boolean isThresholdReached(boolean supports64Bit) {
if (supports64Bit) {
return globalHitCount.getAcquire() > totalHitsThreshold;
} else {
return Math.min(globalHitCount.getAcquire(), Integer.MAX_VALUE) > Math.min(totalHitsThreshold, Integer.MAX_VALUE);
}
}
@Override
public ScoreMode scoreMode() {
return totalHitsThreshold == Integer.MAX_VALUE ? ScoreMode.COMPLETE : ScoreMode.TOP_SCORES;
if (totalHitsThreshold == Long.MAX_VALUE) {
return ScoreMode.COMPLETE;
}
return ScoreMode.TOP_SCORES;
}
@Override
public int getHitsThreshold() {
return totalHitsThreshold;
public long getHitsThreshold(boolean supports64Bit) {
if (supports64Bit) {
return totalHitsThreshold;
} else {
return Math.min(totalHitsThreshold, Integer.MAX_VALUE);
}
}
}
/** Default implementation of HitsThresholdChecker to be used for single threaded execution */
private static class LocalHitsThresholdChecker extends HitsThresholdChecker {
private final int totalHitsThreshold;
private int hitCount;
private final long totalHitsThreshold;
private long hitCount;
public LocalHitsThresholdChecker(int totalHitsThreshold) {
public LocalHitsThresholdChecker(long totalHitsThreshold) {
if (totalHitsThreshold < 0) {
throw new IllegalArgumentException(
@ -80,32 +91,43 @@ abstract class HitsThresholdChecker {
}
@Override
public boolean isThresholdReached() {
return hitCount > totalHitsThreshold;
public boolean isThresholdReached(boolean supports64Bit) {
if (supports64Bit) {
return hitCount > totalHitsThreshold;
} else {
return Math.min(hitCount, Integer.MAX_VALUE) > Math.min(totalHitsThreshold, Integer.MAX_VALUE);
}
}
@Override
public ScoreMode scoreMode() {
return totalHitsThreshold == Integer.MAX_VALUE ? ScoreMode.COMPLETE : ScoreMode.TOP_SCORES;
if (totalHitsThreshold == Long.MAX_VALUE) {
return ScoreMode.COMPLETE;
}
return ScoreMode.TOP_SCORES;
}
@Override
public int getHitsThreshold() {
return totalHitsThreshold;
public long getHitsThreshold(boolean supports64Bit) {
if (supports64Bit) {
return totalHitsThreshold;
} else {
return Math.min(totalHitsThreshold, Integer.MAX_VALUE);
}
}
}
/*
* Returns a threshold checker that is useful for single threaded searches
*/
public static HitsThresholdChecker create(final int totalHitsThreshold) {
public static HitsThresholdChecker create(final long totalHitsThreshold) {
return new LocalHitsThresholdChecker(totalHitsThreshold);
}
/*
* Returns a threshold checker that is based on a shared counter
*/
public static HitsThresholdChecker createShared(final int totalHitsThreshold) {
public static HitsThresholdChecker createShared(final long totalHitsThreshold) {
return new GlobalHitsThresholdChecker(totalHitsThreshold);
}
@ -113,7 +135,7 @@ abstract class HitsThresholdChecker {
public abstract ScoreMode scoreMode();
public abstract int getHitsThreshold();
public abstract long getHitsThreshold(boolean supports64Bit);
public abstract boolean isThresholdReached();
public abstract boolean isThresholdReached(boolean supports64Bit);
}

View File

@ -61,7 +61,8 @@ import reactor.core.publisher.Flux;
* TopFieldCollector.
*
*/
public abstract class LMDBFullFieldDocCollector extends FullDocsCollector<LLSlotDoc, LLFieldDoc> {
public abstract class LMDBFullFieldDocCollector
extends FullDocsCollector<LMDBPriorityQueue<LLSlotDoc>, LLSlotDoc, LLFieldDoc> {
// TODO: one optimization we could do is to pre-fill
// the queue with sentinel value that guaranteed to
@ -109,7 +110,7 @@ public abstract class LMDBFullFieldDocCollector extends FullDocsCollector<LLSlot
}
if (scoreMode.isExhaustive() == false
&& totalHitsRelation == TotalHits.Relation.EQUAL_TO
&& hitsThresholdChecker.isThresholdReached()) {
&& hitsThresholdChecker.isThresholdReached(false)) {
// for the first time hitsThreshold is reached, notify comparator about this
comparator.setHitsThresholdReached();
totalHitsRelation = TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO;
@ -122,7 +123,7 @@ public abstract class LMDBFullFieldDocCollector extends FullDocsCollector<LLSlot
// this document is largest than anything else in the queue, and
// therefore not competitive.
if (searchSortPartOfIndexSort) {
if (hitsThresholdChecker.isThresholdReached()) {
if (hitsThresholdChecker.isThresholdReached(false)) {
totalHitsRelation = Relation.GREATER_THAN_OR_EQUAL_TO;
throw new CollectionTerminatedException();
} else {
@ -209,9 +210,9 @@ public abstract class LMDBFullFieldDocCollector extends FullDocsCollector<LLSlot
public SimpleFieldCollector(
Sort sort,
PriorityQueue<LLSlotDoc> queue,
LMDBPriorityQueue<LLSlotDoc> queue,
FieldValueHitQueue fieldValueHitQueue,
int numHits,
long numHits,
HitsThresholdChecker hitsThresholdChecker,
MaxScoreAccumulator minScoreAcc) {
super(queue, fieldValueHitQueue, numHits, hitsThresholdChecker, sort.needsScores(), minScoreAcc);
@ -257,9 +258,7 @@ public abstract class LMDBFullFieldDocCollector extends FullDocsCollector<LLSlot
}
}
private static final LLFieldDoc[] EMPTY_SCOREDOCS = new LLFieldDoc[0];
final int numHits;
final long numHits;
final HitsThresholdChecker hitsThresholdChecker;
final FieldComparator<?> firstComparator;
final boolean canSetMinScore;
@ -283,9 +282,9 @@ public abstract class LMDBFullFieldDocCollector extends FullDocsCollector<LLSlot
// visibility, then anyone will be able to extend the class, which is not what
// we want.
private LMDBFullFieldDocCollector(
PriorityQueue<LLSlotDoc> pq,
LMDBPriorityQueue<LLSlotDoc> pq,
FieldValueHitQueue fieldValueHitQueue,
int numHits,
long numHits,
HitsThresholdChecker hitsThresholdChecker,
boolean needsScores,
MaxScoreAccumulator minScoreAcc) {
@ -299,12 +298,12 @@ public abstract class LMDBFullFieldDocCollector extends FullDocsCollector<LLSlot
if (firstComparator.getClass().equals(FieldComparator.RelevanceComparator.class)
&& reverseMul == 1 // if the natural sort is preserved (sort by descending relevance)
&& hitsThresholdChecker.getHitsThreshold() != Integer.MAX_VALUE) {
&& hitsThresholdChecker.getHitsThreshold(false) != Integer.MAX_VALUE) {
scoreMode = ScoreMode.TOP_SCORES;
canSetMinScore = true;
} else {
canSetMinScore = false;
if (hitsThresholdChecker.getHitsThreshold() != Integer.MAX_VALUE) {
if (hitsThresholdChecker.getHitsThreshold(false) != Integer.MAX_VALUE) {
scoreMode = needsScores ? ScoreMode.TOP_DOCS_WITH_SCORES : ScoreMode.TOP_DOCS;
} else {
scoreMode = needsScores ? ScoreMode.COMPLETE : ScoreMode.COMPLETE_NO_SCORES;
@ -320,7 +319,7 @@ public abstract class LMDBFullFieldDocCollector extends FullDocsCollector<LLSlot
protected void updateGlobalMinCompetitiveScore(Scorable scorer) throws IOException {
assert minScoreAcc != null;
if (canSetMinScore && hitsThresholdChecker.isThresholdReached()) {
if (canSetMinScore && hitsThresholdChecker.isThresholdReached(false)) {
// we can start checking the global maximum score even
// if the local queue is not full because the threshold
// is reached.
@ -334,7 +333,7 @@ public abstract class LMDBFullFieldDocCollector extends FullDocsCollector<LLSlot
}
protected void updateMinCompetitiveScore(Scorable scorer) throws IOException {
if (canSetMinScore && queueFull && hitsThresholdChecker.isThresholdReached()) {
if (canSetMinScore && queueFull && hitsThresholdChecker.isThresholdReached(false)) {
assert pq.top() != null;
float minScore = (float) firstComparator.value(pq.top().slot());
if (minScore > minCompetitiveScore) {
@ -401,7 +400,7 @@ public abstract class LMDBFullFieldDocCollector extends FullDocsCollector<LLSlot
throw new IllegalArgumentException("hitsThresholdChecker should not be null");
}
var fieldValueHitQueue = new LLSlotDocCodec(env, sort.getSort());
var fieldValueHitQueue = new LLSlotDocCodec(env, numHits, sort.getSort());
var queue = new LMDBPriorityQueue<>(env, fieldValueHitQueue);
// inform a comparator that sort is based on this single field
@ -420,7 +419,7 @@ public abstract class LMDBFullFieldDocCollector extends FullDocsCollector<LLSlot
* primary sort is by relevancy.
*/
public static CollectorManager<LMDBFullFieldDocCollector, FullFieldDocs<LLFieldDoc>> createSharedManager(
LLTempLMDBEnv env, Sort sort, int numHits, FieldDoc after, int totalHitsThreshold) {
LLTempLMDBEnv env, Sort sort, int numHits, long totalHitsThreshold) {
return new CollectorManager<>() {
private final HitsThresholdChecker hitsThresholdChecker =

View File

@ -49,7 +49,7 @@ import org.jetbrains.annotations.Nullable;
* <p><b>NOTE</b>: The values {@link Float#NaN} and {@link Float#NEGATIVE_INFINITY} are not valid
* scores. This collector will not properly collect hits with such scores.
*/
public abstract class LMDBFullScoreDocCollector extends FullDocsCollector<LLScoreDoc, LLScoreDoc> {
public abstract class LMDBFullScoreDocCollector extends FullDocsCollector<LMDBPriorityQueue<LLScoreDoc>, LLScoreDoc, LLScoreDoc> {
/** Scorable leaf collector */
public abstract static class ScorerLeafCollector implements LeafCollector {
@ -197,7 +197,7 @@ public abstract class LMDBFullScoreDocCollector extends FullDocsCollector<LLScor
public static CollectorManager<LMDBFullScoreDocCollector, FullDocs<LLScoreDoc>> createSharedManager(
LLTempLMDBEnv env,
long numHits,
int totalHitsThreshold) {
long totalHitsThreshold) {
return new CollectorManager<>() {
private final HitsThresholdChecker hitsThresholdChecker =
@ -222,7 +222,7 @@ public abstract class LMDBFullScoreDocCollector extends FullDocsCollector<LLScor
*/
public static CollectorManager<LMDBFullScoreDocCollector, FullDocs<LLScoreDoc>> createSharedManager(
LLTempLMDBEnv env,
int totalHitsThreshold) {
long totalHitsThreshold) {
return new CollectorManager<>() {
private final HitsThresholdChecker hitsThresholdChecker =
@ -282,7 +282,7 @@ public abstract class LMDBFullScoreDocCollector extends FullDocsCollector<LLScor
float score =
docBase > maxMinScore.docID ? Math.nextUp(maxMinScore.score) : maxMinScore.score;
if (score > minCompetitiveScore) {
assert hitsThresholdChecker.isThresholdReached();
assert hitsThresholdChecker.isThresholdReached(true);
scorer.setMinCompetitiveScore(score);
minCompetitiveScore = score;
totalHitsRelation = TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO;
@ -292,7 +292,7 @@ public abstract class LMDBFullScoreDocCollector extends FullDocsCollector<LLScor
protected void updateMinCompetitiveScore(Scorable scorer) throws IOException {
var pqTop = pq.top();
if (hitsThresholdChecker.isThresholdReached()
if (hitsThresholdChecker.isThresholdReached(true)
&& pqTop != null
&& pqTop.score() != Float.NEGATIVE_INFINITY) { // -Infinity is the score of sentinels
// since we tie-break on doc id and collect in doc id order, we can require

View File

@ -5,8 +5,12 @@ import static it.cavallium.dbengine.lucene.searcher.CurrentPageInfo.TIE_BREAKER;
import it.cavallium.dbengine.lucene.LuceneUtils;
import java.io.IOException;
import java.util.Collection;
import java.util.List;
import java.util.Objects;
import org.apache.lucene.search.CollectorManager;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TopDocs;
@ -17,6 +21,7 @@ import reactor.core.scheduler.Schedulers;
public class ScoringShardsCollectorManager implements CollectorManager<TopFieldCollector, TopDocs> {
private final Query query;
@Nullable
private final Sort sort;
private final int numHits;
@ -25,37 +30,43 @@ public class ScoringShardsCollectorManager implements CollectorManager<TopFieldC
private final @Nullable Integer startN;
private final @Nullable Integer topN;
private final CollectorManager<TopFieldCollector, TopFieldDocs> sharedCollectorManager;
private List<IndexSearcher> indexSearchers;
public ScoringShardsCollectorManager(@Nullable final Sort sort,
public ScoringShardsCollectorManager(Query query,
@Nullable final Sort sort,
final int numHits,
final FieldDoc after,
final int totalHitsThreshold,
int startN,
int topN) {
this(sort, numHits, after, totalHitsThreshold, (Integer) startN, (Integer) topN);
this(query, sort, numHits, after, totalHitsThreshold, (Integer) startN, (Integer) topN);
}
public ScoringShardsCollectorManager(@Nullable final Sort sort,
public ScoringShardsCollectorManager(Query query,
@Nullable final Sort sort,
final int numHits,
final FieldDoc after,
final int totalHitsThreshold,
int startN) {
this(sort, numHits, after, totalHitsThreshold, (Integer) startN, (Integer) 2147483630);
this(query, sort, numHits, after, totalHitsThreshold, (Integer) startN, (Integer) 2147483630);
}
public ScoringShardsCollectorManager(@Nullable final Sort sort,
public ScoringShardsCollectorManager(Query query,
@Nullable final Sort sort,
final int numHits,
final FieldDoc after,
final int totalHitsThreshold) {
this(sort, numHits, after, totalHitsThreshold, null, null);
this(query, sort, numHits, after, totalHitsThreshold, null, null);
}
private ScoringShardsCollectorManager(@Nullable final Sort sort,
private ScoringShardsCollectorManager(Query query,
@Nullable final Sort sort,
final int numHits,
final FieldDoc after,
final int totalHitsThreshold,
@Nullable Integer startN,
@Nullable Integer topN) {
this.query = query;
this.sort = sort;
this.numHits = numHits;
this.after = after;
@ -76,6 +87,10 @@ public class ScoringShardsCollectorManager implements CollectorManager<TopFieldC
return sharedCollectorManager.newCollector();
}
public void setIndexSearchers(List<IndexSearcher> indexSearcher) {
this.indexSearchers = indexSearcher;
}
@Override
public TopDocs reduce(Collection<TopFieldCollector> collectors) throws IOException {
if (Schedulers.isInNonBlockingThread()) {
@ -87,6 +102,13 @@ public class ScoringShardsCollectorManager implements CollectorManager<TopFieldC
var i = 0;
for (TopFieldCollector collector : collectors) {
topDocs[i] = collector.topDocs();
// Populate scores of topfieldcollector. By default it doesn't popupate the scores
if (topDocs[i].scoreDocs.length > 0 && Float.isNaN(topDocs[i].scoreDocs[0].score) && sort.needsScores()) {
Objects.requireNonNull(indexSearchers, "You must call setIndexSearchers before calling reduce!");
TopFieldCollector.populateScores(topDocs[i].scoreDocs, indexSearchers.get(i), query);
}
for (ScoreDoc scoreDoc : topDocs[i].scoreDocs) {
scoreDoc.shardIndex = i;
}

View File

@ -92,7 +92,7 @@ public class UnscoredCollector extends TopDocsCollector<ScoreDoc> implements Lea
private void populateResults(ScoreDoc[] results, int start, int howMany) {
int i = 0;
for (int docId : docIds.subList(start, start + howMany)) {
results[i] = new ScoreDoc(docId, 1.0f);
results[i] = new ScoreDoc(docId, Float.NaN);
i++;
}
}

View File

@ -0,0 +1,190 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package it.cavallium.dbengine.lucene.comparators;
import it.cavallium.dbengine.database.disk.LLTempLMDBEnv;
import it.cavallium.dbengine.lucene.IArray;
import it.cavallium.dbengine.lucene.IntCodec;
import it.cavallium.dbengine.lucene.LMDBArray;
import it.cavallium.dbengine.lucene.LMDBPriorityQueue;
import it.cavallium.dbengine.lucene.LongCodec;
import java.io.IOException;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.LeafFieldComparator;
import org.apache.lucene.search.Scorable;
/** Comparator that sorts by asc _doc */
public class DocComparator extends FieldComparator<Integer> {
private final IArray<Integer> docIDs;
private final boolean enableSkipping; // if skipping functionality should be enabled
private int bottom;
private int topValue;
private boolean topValueSet;
private boolean bottomValueSet;
private boolean hitsThresholdReached;
/** Creates a new comparator based on document ids for {@code numHits} */
public DocComparator(LLTempLMDBEnv env, int numHits, boolean reverse, int sortPost) {
this.docIDs = new LMDBArray<>(env, new IntCodec(), numHits, 0);
// skipping functionality is enabled if we are sorting by _doc in asc order as a primary sort
this.enableSkipping = (!reverse && sortPost == 0);
}
@Override
public int compare(int slot1, int slot2) {
// No overflow risk because docIDs are non-negative
return docIDs.getOrDefault(slot1, 0) - docIDs.getOrDefault(slot2, 0);
}
@Override
public LeafFieldComparator getLeafComparator(LeafReaderContext context) {
// TODO: can we "map" our docIDs to the current
// reader? saves having to then subtract on every
// compare call
return new DocLeafComparator(context);
}
@Override
public void setTopValue(Integer value) {
topValue = value;
topValueSet = true;
}
@Override
public Integer value(int slot) {
return docIDs.getOrDefault(slot, 0);
}
/**
* DocLeafComparator with skipping functionality. When sort by _doc asc, after collecting top N
* matches and enough hits, the comparator can skip all the following documents. When sort by _doc
* asc and "top" document is set after which search should start, the comparator provides an
* iterator that can quickly skip to the desired "top" document.
*/
private class DocLeafComparator implements LeafFieldComparator {
private final int docBase;
private final int minDoc;
private final int maxDoc;
private DocIdSetIterator competitiveIterator; // iterator that starts from topValue
public DocLeafComparator(LeafReaderContext context) {
this.docBase = context.docBase;
if (enableSkipping) {
// Skip docs before topValue, but include docs starting with topValue.
// Including topValue is necessary when doing sort on [_doc, other fields]
// in a distributed search where there are docs from different indices
// with the same docID.
this.minDoc = topValue;
this.maxDoc = context.reader().maxDoc();
this.competitiveIterator = DocIdSetIterator.all(maxDoc);
} else {
this.minDoc = -1;
this.maxDoc = -1;
this.competitiveIterator = null;
}
}
@Override
public void setBottom(int slot) {
bottom = docIDs.getOrDefault(slot, 0);
bottomValueSet = true;
updateIterator();
}
@Override
public int compareBottom(int doc) {
// No overflow risk because docIDs are non-negative
return bottom - (docBase + doc);
}
@Override
public int compareTop(int doc) {
int docValue = docBase + doc;
return Integer.compare(topValue, docValue);
}
@Override
public void copy(int slot, int doc) throws IOException {
docIDs.set(slot, docBase + doc);
}
@Override
public void setScorer(Scorable scorer) throws IOException {
// update an iterator on a new segment
updateIterator();
}
@Override
public DocIdSetIterator competitiveIterator() {
if (enableSkipping == false) {
return null;
} else {
return new DocIdSetIterator() {
private int docID = competitiveIterator.docID();
@Override
public int nextDoc() throws IOException {
return advance(docID + 1);
}
@Override
public int docID() {
return docID;
}
@Override
public long cost() {
return competitiveIterator.cost();
}
@Override
public int advance(int target) throws IOException {
return docID = competitiveIterator.advance(target);
}
};
}
}
@Override
public void setHitsThresholdReached() {
hitsThresholdReached = true;
updateIterator();
}
private void updateIterator() {
if (enableSkipping == false || hitsThresholdReached == false) return;
if (bottomValueSet) {
// since we've collected top N matches, we can early terminate
// Currently early termination on _doc is also implemented in TopFieldCollector, but this
// will be removed
// once all bulk scores uses collectors' iterators
competitiveIterator = DocIdSetIterator.empty();
} else if (topValueSet) {
// skip to the desired top doc
if (docBase + maxDoc <= minDoc) {
competitiveIterator = DocIdSetIterator.empty(); // skip this segment
} else {
int segmentMinDoc = Math.max(competitiveIterator.docID(), minDoc - docBase);
competitiveIterator = new MinDocIterator(segmentMinDoc, maxDoc);
}
}
}
}
}

View File

@ -0,0 +1,119 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package it.cavallium.dbengine.lucene.comparators;
import it.cavallium.dbengine.database.disk.LLTempLMDBEnv;
import it.cavallium.dbengine.lucene.DoubleCodec;
import it.cavallium.dbengine.lucene.IArray;
import it.cavallium.dbengine.lucene.LMDBArray;
import it.cavallium.dbengine.lucene.SortFieldCodec;
import java.io.IOException;
import org.apache.lucene.document.DoublePoint;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.LeafFieldComparator;
/**
* Comparator based on {@link Double#compare} for {@code numHits}. This comparator provides a
* skipping functionality - an iterator that can skip over non-competitive documents.
*/
public class DoubleComparator extends NumericComparator<Double> {
private final IArray<Double> values;
protected double topValue;
protected double bottom;
public DoubleComparator(LLTempLMDBEnv env,
int numHits, String field, Double missingValue, boolean reverse, int sortPos) {
super(field, missingValue != null ? missingValue : 0.0, reverse, sortPos, Double.BYTES);
values = new LMDBArray<>(env, new DoubleCodec(), numHits, 0d);
}
@Override
public int compare(int slot1, int slot2) {
return Double.compare(values.getOrDefault(slot1, 0d), values.getOrDefault(slot2, 0d));
}
@Override
public void setTopValue(Double value) {
super.setTopValue(value);
topValue = value;
}
@Override
public Double value(int slot) {
return values.getOrDefault(slot, 0d);
}
@Override
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
return new DoubleLeafComparator(context);
}
/** Leaf comparator for {@link DoubleComparator} that provides skipping functionality */
public class DoubleLeafComparator extends NumericLeafComparator {
public DoubleLeafComparator(LeafReaderContext context) throws IOException {
super(context);
}
private double getValueForDoc(int doc) throws IOException {
if (docValues.advanceExact(doc)) {
return Double.longBitsToDouble(docValues.longValue());
} else {
return missingValue;
}
}
@Override
public void setBottom(int slot) throws IOException {
bottom = values.getOrDefault(slot, 0d);
super.setBottom(slot);
}
@Override
public int compareBottom(int doc) throws IOException {
return Double.compare(bottom, getValueForDoc(doc));
}
@Override
public int compareTop(int doc) throws IOException {
return Double.compare(topValue, getValueForDoc(doc));
}
@Override
public void copy(int slot, int doc) throws IOException {
values.set(slot, getValueForDoc(doc));
super.copy(slot, doc);
}
@Override
protected boolean isMissingValueCompetitive() {
int result = Double.compare(missingValue, bottom);
return reverse ? (result >= 0) : (result <= 0);
}
@Override
protected void encodeBottom(byte[] packedValue) {
DoublePoint.encodeDimension(bottom, packedValue, 0);
}
@Override
protected void encodeTop(byte[] packedValue) {
DoublePoint.encodeDimension(topValue, packedValue, 0);
}
}
}

View File

@ -0,0 +1,119 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package it.cavallium.dbengine.lucene.comparators;
import it.cavallium.dbengine.database.disk.LLTempLMDBEnv;
import it.cavallium.dbengine.lucene.DoubleCodec;
import it.cavallium.dbengine.lucene.FloatCodec;
import it.cavallium.dbengine.lucene.IArray;
import it.cavallium.dbengine.lucene.LMDBArray;
import java.io.IOException;
import org.apache.lucene.document.FloatPoint;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.LeafFieldComparator;
/**
* Comparator based on {@link Float#compare} for {@code numHits}. This comparator provides a
* skipping functionality an iterator that can skip over non-competitive documents.
*/
public class FloatComparator extends NumericComparator<Float> {
private final IArray<Float> values;
protected float topValue;
protected float bottom;
public FloatComparator(LLTempLMDBEnv env,
int numHits, String field, Float missingValue, boolean reverse, int sortPos) {
super(field, missingValue != null ? missingValue : 0.0f, reverse, sortPos, Float.BYTES);
values = new LMDBArray<>(env, new FloatCodec(), numHits, 0f);
}
@Override
public int compare(int slot1, int slot2) {
return Float.compare(values.getOrDefault(slot1, 0f), values.getOrDefault(slot2, 0f));
}
@Override
public void setTopValue(Float value) {
super.setTopValue(value);
topValue = value;
}
@Override
public Float value(int slot) {
return values.getOrDefault(slot, 0f);
}
@Override
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
return new FloatLeafComparator(context);
}
/** Leaf comparator for {@link FloatComparator} that provides skipping functionality */
public class FloatLeafComparator extends NumericLeafComparator {
public FloatLeafComparator(LeafReaderContext context) throws IOException {
super(context);
}
private float getValueForDoc(int doc) throws IOException {
if (docValues.advanceExact(doc)) {
return Float.intBitsToFloat((int) docValues.longValue());
} else {
return missingValue;
}
}
@Override
public void setBottom(int slot) throws IOException {
bottom = values.getOrDefault(slot, 0f);
super.setBottom(slot);
}
@Override
public int compareBottom(int doc) throws IOException {
return Float.compare(bottom, getValueForDoc(doc));
}
@Override
public int compareTop(int doc) throws IOException {
return Float.compare(topValue, getValueForDoc(doc));
}
@Override
public void copy(int slot, int doc) throws IOException {
values.set(slot, getValueForDoc(doc));
super.copy(slot, doc);
}
@Override
protected boolean isMissingValueCompetitive() {
int result = Float.compare(missingValue, bottom);
return reverse ? (result >= 0) : (result <= 0);
}
@Override
protected void encodeBottom(byte[] packedValue) {
FloatPoint.encodeDimension(bottom, packedValue, 0);
}
@Override
protected void encodeTop(byte[] packedValue) {
FloatPoint.encodeDimension(topValue, packedValue, 0);
}
}
}

View File

@ -0,0 +1,121 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package it.cavallium.dbengine.lucene.comparators;
import it.cavallium.dbengine.database.disk.LLTempLMDBEnv;
import it.cavallium.dbengine.lucene.FloatCodec;
import it.cavallium.dbengine.lucene.IArray;
import it.cavallium.dbengine.lucene.IntCodec;
import it.cavallium.dbengine.lucene.LMDBArray;
import java.io.IOException;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.LeafFieldComparator;
/**
* Comparator based on {@link Integer#compare} for {@code numHits}. This comparator provides a
* skipping functionality an iterator that can skip over non-competitive documents.
*/
public class IntComparator extends NumericComparator<Integer> {
private final IArray<Integer> values;
protected int topValue;
protected int bottom;
public IntComparator(LLTempLMDBEnv env,
int numHits, String field, Integer missingValue, boolean reverse, int sortPos) {
super(field, missingValue != null ? missingValue : 0, reverse, sortPos, Integer.BYTES);
values = new LMDBArray<>(env, new IntCodec(), numHits, 0);
}
@Override
public int compare(int slot1, int slot2) {
return Integer.compare(values.getOrDefault(slot1, 0), values.getOrDefault(slot2, 0));
}
@Override
public void setTopValue(Integer value) {
super.setTopValue(value);
topValue = value;
}
@Override
public Integer value(int slot) {
return values.getOrDefault(slot, 0);
}
@Override
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
return new IntLeafComparator(context);
}
/** Leaf comparator for {@link IntComparator} that provides skipping functionality */
public class IntLeafComparator extends NumericLeafComparator {
public IntLeafComparator(LeafReaderContext context) throws IOException {
super(context);
}
private int getValueForDoc(int doc) throws IOException {
if (docValues.advanceExact(doc)) {
return (int) docValues.longValue();
} else {
return missingValue;
}
}
@Override
public void setBottom(int slot) throws IOException {
bottom = values.getOrDefault(slot, 0);
super.setBottom(slot);
}
@Override
public int compareBottom(int doc) throws IOException {
return Integer.compare(bottom, getValueForDoc(doc));
}
@Override
public int compareTop(int doc) throws IOException {
return Integer.compare(topValue, getValueForDoc(doc));
}
@Override
public void copy(int slot, int doc) throws IOException {
values.set(slot, getValueForDoc(doc));
super.copy(slot, doc);
}
@Override
protected boolean isMissingValueCompetitive() {
int result = Integer.compare(missingValue, bottom);
// in reverse (desc) sort missingValue is competitive when it's greater or equal to bottom,
// in asc sort missingValue is competitive when it's smaller or equal to bottom
return reverse ? (result >= 0) : (result <= 0);
}
@Override
protected void encodeBottom(byte[] packedValue) {
IntPoint.encodeDimension(bottom, packedValue, 0);
}
@Override
protected void encodeTop(byte[] packedValue) {
IntPoint.encodeDimension(topValue, packedValue, 0);
}
}
}

View File

@ -0,0 +1,121 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package it.cavallium.dbengine.lucene.comparators;
import it.cavallium.dbengine.database.disk.LLTempLMDBEnv;
import it.cavallium.dbengine.lucene.IArray;
import it.cavallium.dbengine.lucene.IntCodec;
import it.cavallium.dbengine.lucene.LMDBArray;
import it.cavallium.dbengine.lucene.LongCodec;
import java.io.IOException;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.LeafFieldComparator;
/**
* Comparator based on {@link Long#compare} for {@code numHits}. This comparator provides a skipping
* functionality an iterator that can skip over non-competitive documents.
*/
public class LongComparator extends NumericComparator<Long> {
private final IArray<Long> values;
protected long topValue;
protected long bottom;
public LongComparator(LLTempLMDBEnv env,
int numHits, String field, Long missingValue, boolean reverse, int sortPos) {
super(field, missingValue != null ? missingValue : 0L, reverse, sortPos, Long.BYTES);
values = new LMDBArray<>(env, new LongCodec(), numHits, 0L);
}
@Override
public int compare(int slot1, int slot2) {
return Long.compare(values.getOrDefault(slot1, 0L), values.getOrDefault(slot2, 0L));
}
@Override
public void setTopValue(Long value) {
super.setTopValue(value);
topValue = value;
}
@Override
public Long value(int slot) {
return values.getOrDefault(slot, 0L);
}
@Override
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
return new LongLeafComparator(context);
}
/** Leaf comparator for {@link LongComparator} that provides skipping functionality */
public class LongLeafComparator extends NumericLeafComparator {
public LongLeafComparator(LeafReaderContext context) throws IOException {
super(context);
}
private long getValueForDoc(int doc) throws IOException {
if (docValues.advanceExact(doc)) {
return docValues.longValue();
} else {
return missingValue;
}
}
@Override
public void setBottom(int slot) throws IOException {
bottom = values.getOrDefault(slot, 0L);
super.setBottom(slot);
}
@Override
public int compareBottom(int doc) throws IOException {
return Long.compare(bottom, getValueForDoc(doc));
}
@Override
public int compareTop(int doc) throws IOException {
return Long.compare(topValue, getValueForDoc(doc));
}
@Override
public void copy(int slot, int doc) throws IOException {
values.set(slot, getValueForDoc(doc));
super.copy(slot, doc);
}
@Override
protected boolean isMissingValueCompetitive() {
int result = Long.compare(missingValue, bottom);
// in reverse (desc) sort missingValue is competitive when it's greater or equal to bottom,
// in asc sort missingValue is competitive when it's smaller or equal to bottom
return reverse ? (result >= 0) : (result <= 0);
}
@Override
protected void encodeBottom(byte[] packedValue) {
LongPoint.encodeDimension(bottom, packedValue, 0);
}
@Override
protected void encodeTop(byte[] packedValue) {
LongPoint.encodeDimension(topValue, packedValue, 0);
}
}
}

View File

@ -0,0 +1,63 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package it.cavallium.dbengine.lucene.comparators;
import java.io.IOException;
import org.apache.lucene.search.DocIdSetIterator;
/** Docs iterator that starts iterating from a configurable minimum document */
public class MinDocIterator extends DocIdSetIterator {
final int segmentMinDoc;
final int maxDoc;
int doc = -1;
MinDocIterator(int segmentMinDoc, int maxDoc) {
this.segmentMinDoc = segmentMinDoc;
this.maxDoc = maxDoc;
}
@Override
public int docID() {
return doc;
}
@Override
public int nextDoc() throws IOException {
return advance(doc + 1);
}
@Override
public int advance(int target) throws IOException {
assert target > doc;
if (doc == -1) {
// skip directly to minDoc
doc = Math.max(target, segmentMinDoc);
} else {
doc = target;
}
if (doc >= maxDoc) {
doc = NO_MORE_DOCS;
}
return doc;
}
@Override
public long cost() {
return maxDoc - segmentMinDoc;
}
}

View File

@ -0,0 +1,297 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package it.cavallium.dbengine.lucene.comparators;
import java.io.IOException;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.LeafFieldComparator;
import org.apache.lucene.search.Scorable;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.ArrayUtil.ByteArrayComparator;
import org.apache.lucene.util.DocIdSetBuilder;
/**
* Abstract numeric comparator for comparing numeric values. This comparator provides a skipping
* functionality an iterator that can skip over non-competitive documents.
*/
public abstract class NumericComparator<T extends Number> extends FieldComparator<T> {
protected final T missingValue;
protected final String field;
protected final boolean reverse;
private final int bytesCount; // how many bytes are used to encode this number
private final ByteArrayComparator bytesComparator;
protected boolean topValueSet;
protected boolean singleSort; // singleSort is true, if sort is based on a single sort field.
protected boolean hitsThresholdReached;
protected boolean queueFull;
private boolean canSkipDocuments;
protected NumericComparator(
String field, T missingValue, boolean reverse, int sortPos, int bytesCount) {
this.field = field;
this.missingValue = missingValue;
this.reverse = reverse;
// skipping functionality is only relevant for primary sort
this.canSkipDocuments = (sortPos == 0);
this.bytesCount = bytesCount;
this.bytesComparator = ArrayUtil.getUnsignedComparator(bytesCount);
}
@Override
public void setTopValue(T value) {
topValueSet = true;
}
@Override
public void setSingleSort() {
singleSort = true;
}
@Override
public void disableSkipping() {
canSkipDocuments = false;
}
/** Leaf comparator for {@link NumericComparator} that provides skipping functionality */
public abstract class NumericLeafComparator implements LeafFieldComparator {
protected final NumericDocValues docValues;
private final PointValues pointValues;
// if skipping functionality should be enabled on this segment
private final boolean enableSkipping;
private final int maxDoc;
private final byte[] minValueAsBytes;
private final byte[] maxValueAsBytes;
private DocIdSetIterator competitiveIterator;
private long iteratorCost;
private int maxDocVisited = -1;
private int updateCounter = 0;
public NumericLeafComparator(LeafReaderContext context) throws IOException {
this.docValues = getNumericDocValues(context, field);
this.pointValues = canSkipDocuments ? context.reader().getPointValues(field) : null;
if (pointValues != null) {
FieldInfo info = context.reader().getFieldInfos().fieldInfo(field);
if (info == null || info.getPointDimensionCount() == 0) {
throw new IllegalStateException(
"Field "
+ field
+ " doesn't index points according to FieldInfos yet returns non-null PointValues");
} else if (info.getPointDimensionCount() > 1) {
throw new IllegalArgumentException(
"Field " + field + " is indexed with multiple dimensions, sorting is not supported");
} else if (info.getPointNumBytes() != bytesCount) {
throw new IllegalArgumentException(
"Field "
+ field
+ " is indexed with "
+ info.getPointNumBytes()
+ " bytes per dimension, but "
+ NumericComparator.this
+ " expected "
+ bytesCount);
}
this.enableSkipping = true; // skipping is enabled when points are available
this.maxDoc = context.reader().maxDoc();
this.maxValueAsBytes =
reverse == false ? new byte[bytesCount] : topValueSet ? new byte[bytesCount] : null;
this.minValueAsBytes =
reverse ? new byte[bytesCount] : topValueSet ? new byte[bytesCount] : null;
this.competitiveIterator = DocIdSetIterator.all(maxDoc);
this.iteratorCost = maxDoc;
} else {
this.enableSkipping = false;
this.maxDoc = 0;
this.maxValueAsBytes = null;
this.minValueAsBytes = null;
}
}
/** Retrieves the NumericDocValues for the field in this segment */
protected NumericDocValues getNumericDocValues(LeafReaderContext context, String field)
throws IOException {
return DocValues.getNumeric(context.reader(), field);
}
@Override
public void setBottom(int slot) throws IOException {
queueFull = true; // if we are setting bottom, it means that we have collected enough hits
updateCompetitiveIterator(); // update an iterator if we set a new bottom
}
@Override
public void copy(int slot, int doc) throws IOException {
maxDocVisited = doc;
}
@Override
public void setScorer(Scorable scorer) throws IOException {
if (scorer instanceof Scorer) {
iteratorCost =
((Scorer) scorer).iterator().cost(); // starting iterator cost is the scorer's cost
updateCompetitiveIterator(); // update an iterator when we have a new segment
}
}
@Override
public void setHitsThresholdReached() throws IOException {
hitsThresholdReached = true;
updateCompetitiveIterator();
}
// update its iterator to include possibly only docs that are "stronger" than the current bottom
// entry
private void updateCompetitiveIterator() throws IOException {
if (enableSkipping == false || hitsThresholdReached == false || queueFull == false) return;
// if some documents have missing points, check that missing values prohibits optimization
if ((pointValues.getDocCount() < maxDoc) && isMissingValueCompetitive()) {
return; // we can't filter out documents, as documents with missing values are competitive
}
updateCounter++;
if (updateCounter > 256
&& (updateCounter & 0x1f) != 0x1f) { // Start sampling if we get called too much
return;
}
if (reverse == false) {
encodeBottom(maxValueAsBytes);
if (topValueSet) {
encodeTop(minValueAsBytes);
}
} else {
encodeBottom(minValueAsBytes);
if (topValueSet) {
encodeTop(maxValueAsBytes);
}
}
DocIdSetBuilder result = new DocIdSetBuilder(maxDoc);
PointValues.IntersectVisitor visitor =
new PointValues.IntersectVisitor() {
DocIdSetBuilder.BulkAdder adder;
@Override
public void grow(int count) {
adder = result.grow(count);
}
@Override
public void visit(int docID) {
if (docID <= maxDocVisited) {
return; // Already visited or skipped
}
adder.add(docID);
}
@Override
public void visit(int docID, byte[] packedValue) {
if (docID <= maxDocVisited) {
return; // already visited or skipped
}
if (maxValueAsBytes != null) {
int cmp = bytesComparator.compare(packedValue, 0, maxValueAsBytes, 0);
// if doc's value is too high or for single sort even equal, it is not competitive
// and the doc can be skipped
if (cmp > 0 || (singleSort && cmp == 0)) return;
}
if (minValueAsBytes != null) {
int cmp = bytesComparator.compare(packedValue, 0, minValueAsBytes, 0);
// if doc's value is too low or for single sort even equal, it is not competitive
// and the doc can be skipped
if (cmp < 0 || (singleSort && cmp == 0)) return;
}
adder.add(docID); // doc is competitive
}
@Override
public PointValues.Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
if (maxValueAsBytes != null) {
int cmp = bytesComparator.compare(minPackedValue, 0, maxValueAsBytes, 0);
if (cmp > 0 || (singleSort && cmp == 0))
return PointValues.Relation.CELL_OUTSIDE_QUERY;
}
if (minValueAsBytes != null) {
int cmp = bytesComparator.compare(maxPackedValue, 0, minValueAsBytes, 0);
if (cmp < 0 || (singleSort && cmp == 0))
return PointValues.Relation.CELL_OUTSIDE_QUERY;
}
if ((maxValueAsBytes != null
&& bytesComparator.compare(maxPackedValue, 0, maxValueAsBytes, 0) > 0)
|| (minValueAsBytes != null
&& bytesComparator.compare(minPackedValue, 0, minValueAsBytes, 0) < 0)) {
return PointValues.Relation.CELL_CROSSES_QUERY;
}
return PointValues.Relation.CELL_INSIDE_QUERY;
}
};
final long threshold = iteratorCost >>> 3;
long estimatedNumberOfMatches =
pointValues.estimatePointCount(visitor); // runs in O(log(numPoints))
if (estimatedNumberOfMatches >= threshold) {
// the new range is not selective enough to be worth materializing, it doesn't reduce number
// of docs at least 8x
return;
}
pointValues.intersect(visitor);
competitiveIterator = result.build().iterator();
iteratorCost = competitiveIterator.cost();
}
@Override
public DocIdSetIterator competitiveIterator() {
if (enableSkipping == false) return null;
return new DocIdSetIterator() {
private int docID = competitiveIterator.docID();
@Override
public int nextDoc() throws IOException {
return advance(docID + 1);
}
@Override
public int docID() {
return docID;
}
@Override
public long cost() {
return competitiveIterator.cost();
}
@Override
public int advance(int target) throws IOException {
return docID = competitiveIterator.advance(target);
}
};
}
protected abstract boolean isMissingValueCompetitive();
protected abstract void encodeBottom(byte[] packedValue);
protected abstract void encodeTop(byte[] packedValue);
}
}

View File

@ -0,0 +1,117 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package it.cavallium.dbengine.lucene.comparators;
import it.cavallium.dbengine.database.disk.LLTempLMDBEnv;
import it.cavallium.dbengine.lucene.FloatCodec;
import it.cavallium.dbengine.lucene.IArray;
import it.cavallium.dbengine.lucene.LMDBArray;
import it.cavallium.dbengine.lucene.LongCodec;
import java.io.IOException;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.LeafFieldComparator;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorable;
import org.apache.lucene.search.ScoreCachingWrappingScorer;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
/**
* Sorts by descending relevance. NOTE: if you are sorting only by descending relevance and then secondarily by
* ascending docID, performance is faster using {@link org.apache.lucene.search.TopScoreDocCollector} directly (which {@link
* org.apache.lucene.search.IndexSearcher#search(Query, int)} uses when no {@link org.apache.lucene.search.Sort} is specified).
*/
public final class RelevanceComparator extends FieldComparator<Float> implements LeafFieldComparator {
private final IArray<Float> scores;
private float bottom;
private Scorable scorer;
private float topValue;
/**
* Creates a new comparator based on relevance for {@code numHits}.
*/
public RelevanceComparator(LLTempLMDBEnv env, int numHits) {
scores = new LMDBArray<>(env, new FloatCodec(), numHits, 0f);
}
@Override
public int compare(int slot1, int slot2) {
return Float.compare(scores.getOrDefault(slot2, 0f), scores.getOrDefault(slot1, 0f));
}
@Override
public int compareBottom(int doc) throws IOException {
float score = scorer.score();
assert !Float.isNaN(score);
return Float.compare(score, bottom);
}
@Override
public void copy(int slot, int doc) throws IOException {
var score = scorer.score();
scores.set(slot, score);
assert !Float.isNaN(score);
}
@Override
public LeafFieldComparator getLeafComparator(LeafReaderContext context) {
return this;
}
@Override
public void setBottom(final int bottom) {
this.bottom = scores.getOrDefault(bottom, 0f);
}
@Override
public void setTopValue(Float value) {
topValue = value;
}
@Override
public void setScorer(Scorable scorer) {
// wrap with a ScoreCachingWrappingScorer so that successive calls to
// score() will not incur score computation over and
// over again.
this.scorer = ScoreCachingWrappingScorer.wrap(scorer);
}
@Override
public Float value(int slot) {
return scores.getOrDefault(slot, 0f);
}
// Override because we sort reverse of natural Float order:
@Override
public int compareValues(Float first, Float second) {
// Reversed intentionally because relevance by default
// sorts descending:
return second.compareTo(first);
}
@Override
public int compareTop(int doc) throws IOException {
float docValue = scorer.score();
assert !Float.isNaN(docValue);
return Float.compare(docValue, topValue);
}
}

View File

@ -0,0 +1,298 @@
package it.cavallium.dbengine.lucene.comparators;
import it.cavallium.dbengine.database.disk.LLTempLMDBEnv;
import it.cavallium.dbengine.lucene.ByteArrayCodec;
import it.cavallium.dbengine.lucene.BytesRefCodec;
import it.cavallium.dbengine.lucene.FloatCodec;
import it.cavallium.dbengine.lucene.IArray;
import it.cavallium.dbengine.lucene.IntCodec;
import it.cavallium.dbengine.lucene.LMDBArray;
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.LeafFieldComparator;
import org.apache.lucene.search.Scorable;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
/**
* Sorts by field's natural Term sort order, using ordinals. This is functionally equivalent to
* {@link org.apache.lucene.search.FieldComparator.TermValComparator}, but it first resolves the
* string to their relative ordinal positions (using the index returned by {@link
* org.apache.lucene.index.LeafReader#getSortedDocValues(String)}), and does most comparisons
* using the ordinals. For medium to large results, this comparator will be much faster than
* {@link org.apache.lucene.search.FieldComparator.TermValComparator}. For very small result sets
* it may be slower.
*/
public class TermOrdValComparator extends FieldComparator<BytesRef> implements LeafFieldComparator {
/* Ords for each slot.
@lucene.internal */
final IArray<Integer> ords;
/* Values for each slot.
@lucene.internal */
final IArray<byte[]> values;
/* Which reader last copied a value into the slot. When
we compare two slots, we just compare-by-ord if the
readerGen is the same; else we must compare the
values (slower).
@lucene.internal */
final IArray<Integer> readerGen;
/* Gen of current reader we are on.
@lucene.internal */
int currentReaderGen = -1;
/* Current reader's doc ord/values.
@lucene.internal */
SortedDocValues termsIndex;
private final String field;
/* Bottom slot, or -1 if queue isn't full yet
@lucene.internal */
int bottomSlot = -1;
/* Bottom ord (same as ords[bottomSlot] once bottomSlot
is set). Cached for faster compares.
@lucene.internal */
int bottomOrd;
/* True if current bottom slot matches the current
reader.
@lucene.internal */
boolean bottomSameReader;
/* Bottom value (same as values[bottomSlot] once
bottomSlot is set). Cached for faster compares.
@lucene.internal */
byte[] bottomValue;
/** Set by setTopValue. */
byte[] topValue;
boolean topSameReader;
int topOrd;
/** -1 if missing values are sorted first, 1 if they are sorted last */
final int missingSortCmp;
/** Which ordinal to use for a missing value. */
final int missingOrd;
/** Creates this, sorting missing values first. */
public TermOrdValComparator(LLTempLMDBEnv env, int numHits, String field) {
this(env, numHits, field, false);
}
/**
* Creates this, with control over how missing values are sorted. Pass sortMissingLast=true to
* put missing values at the end.
*/
public TermOrdValComparator(LLTempLMDBEnv env, int numHits, String field, boolean sortMissingLast) {
ords = new LMDBArray<>(env, new IntCodec(), numHits, 0);
values = new LMDBArray<>(env, new ByteArrayCodec(), numHits, null);
readerGen = new LMDBArray<>(env, new IntCodec(), numHits, 0);
this.field = field;
if (sortMissingLast) {
missingSortCmp = 1;
missingOrd = Integer.MAX_VALUE;
} else {
missingSortCmp = -1;
missingOrd = -1;
}
}
private int getOrdForDoc(int doc) throws IOException {
if (termsIndex.advanceExact(doc)) {
return termsIndex.ordValue();
} else {
return -1;
}
}
@Override
public int compare(int slot1, int slot2) {
if ((int) readerGen.getOrDefault(slot2, 0) == readerGen.getOrDefault(slot1, 0)) {
return ords.getOrDefault(slot1, 0) - ords.getOrDefault(slot2, 0);
}
final var val1 = values.get(slot1);
final var val2 = values.get(slot2);
if (val1 == null) {
if (val2 == null) {
return 0;
}
return missingSortCmp;
} else if (val2 == null) {
return -missingSortCmp;
}
return Arrays.compare(val1, val2);
}
@Override
public int compareBottom(int doc) throws IOException {
assert bottomSlot != -1;
int docOrd = getOrdForDoc(doc);
if (docOrd == -1) {
docOrd = missingOrd;
}
if (bottomSameReader) {
// ord is precisely comparable, even in the equal case
return bottomOrd - docOrd;
} else if (bottomOrd >= docOrd) {
// the equals case always means bottom is > doc
// (because we set bottomOrd to the lower bound in
// setBottom):
return 1;
} else {
return -1;
}
}
@Override
public void copy(int slot, int doc) throws IOException {
int ord = getOrdForDoc(doc);
if (ord == -1) {
ord = missingOrd;
values.reset(slot);
} else {
assert ord >= 0;
values.set(slot, copyBytes(termsIndex.lookupOrd(ord)));
}
ords.set(slot, ord);
readerGen.set(slot, currentReaderGen);
}
private byte[] copyBytes(BytesRef lookupOrd) {
if (lookupOrd == null) return null;
return Arrays.copyOfRange(lookupOrd.bytes, lookupOrd.offset, lookupOrd.length);
}
/** Retrieves the SortedDocValues for the field in this segment */
protected SortedDocValues getSortedDocValues(LeafReaderContext context, String field)
throws IOException {
return DocValues.getSorted(context.reader(), field);
}
@Override
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
termsIndex = getSortedDocValues(context, field);
currentReaderGen++;
if (topValue != null) {
// Recompute topOrd/SameReader
int ord = termsIndex.lookupTerm(new BytesRef(topValue));
if (ord >= 0) {
topSameReader = true;
topOrd = ord;
} else {
topSameReader = false;
topOrd = -ord - 2;
}
} else {
topOrd = missingOrd;
topSameReader = true;
}
// System.out.println(" getLeafComparator topOrd=" + topOrd + " topSameReader=" +
// topSameReader);
if (bottomSlot != -1) {
// Recompute bottomOrd/SameReader
setBottom(bottomSlot);
}
return this;
}
@Override
public void setBottom(final int bottom) throws IOException {
bottomSlot = bottom;
bottomValue = values.get(bottomSlot);
if (currentReaderGen == readerGen.getOrDefault(bottomSlot, 0)) {
bottomOrd = ords.getOrDefault(bottomSlot, 0);
bottomSameReader = true;
} else {
if (bottomValue == null) {
// missingOrd is null for all segments
assert ords.getOrDefault(bottomSlot, 0) == missingOrd;
bottomOrd = missingOrd;
bottomSameReader = true;
readerGen.set(bottomSlot, currentReaderGen);
} else {
final int ord = termsIndex.lookupTerm(new BytesRef(bottomValue));
if (ord < 0) {
bottomOrd = -ord - 2;
bottomSameReader = false;
} else {
bottomOrd = ord;
// exact value match
bottomSameReader = true;
readerGen.set(bottomSlot, currentReaderGen);
ords.set(bottomSlot, bottomOrd);
}
}
}
}
@Override
public void setTopValue(BytesRef value) {
// null is fine: it means the last doc of the prior
// search was missing this value
topValue = copyBytes(value);
// System.out.println("setTopValue " + topValue);
}
@Override
public BytesRef value(int slot) {
return getBytesRef(values.get(slot));
}
private BytesRef getBytesRef(byte[] bytes) {
if (bytes == null) return null;
return new BytesRef(bytes);
}
@Override
public int compareTop(int doc) throws IOException {
int ord = getOrdForDoc(doc);
if (ord == -1) {
ord = missingOrd;
}
if (topSameReader) {
// ord is precisely comparable, even in the equal
// case
// System.out.println("compareTop doc=" + doc + " ord=" + ord + " ret=" + (topOrd-ord));
return topOrd - ord;
} else if (ord <= topOrd) {
// the equals case always means doc is < value
// (because we set lastOrd to the lower bound)
return 1;
} else {
return -1;
}
}
@Override
public int compareValues(BytesRef val1, BytesRef val2) {
if (val1 == null) {
if (val2 == null) {
return 0;
}
return missingSortCmp;
} else if (val2 == null) {
return -missingSortCmp;
}
return val1.compareTo(val2);
}
@Override
public void setScorer(Scorable scorer) {}
}

View File

@ -43,7 +43,7 @@ public class AdaptiveLocalSearcher implements LocalSearcher {
LocalQueryParams queryParams,
String keyFieldName,
LLSearchTransformer transformer) {
if (queryParams.limit() == 0) {
if (queryParams.limitLong() == 0) {
return countSearcher.collect(indexSearcher, queryParams, keyFieldName, transformer);
} else {
return localSearcher.collect(indexSearcher, queryParams, keyFieldName, transformer);

View File

@ -13,7 +13,7 @@ public class AdaptiveMultiSearcher implements MultiSearcher, Closeable {
private static final MultiSearcher count
= new UnsortedUnscoredSimpleMultiSearcher(new CountLocalSearcher());
private static final MultiSearcher scoredSimple = new ScoredPagedMultiSearcher();
private static final MultiSearcher scoredPaged = new ScoredPagedMultiSearcher();
private static final MultiSearcher unsortedUnscoredPaged
= new UnsortedUnscoredSimpleMultiSearcher(new PagedLocalSearcher());
@ -23,8 +23,11 @@ public class AdaptiveMultiSearcher implements MultiSearcher, Closeable {
private final UnsortedScoredFullMultiSearcher unsortedScoredFull;
private final SortedScoredFullMultiSearcher sortedScoredFull;
public AdaptiveMultiSearcher() throws IOException {
unsortedScoredFull = new UnsortedScoredFullMultiSearcher();
sortedScoredFull = new SortedScoredFullMultiSearcher();
}
@Override
@ -48,17 +51,20 @@ public class AdaptiveMultiSearcher implements MultiSearcher, Closeable {
String keyFieldName,
LLSearchTransformer transformer) {
// offset + limit
long realLimit = ((long) queryParams.offset() + (long) queryParams.limit());
long realLimit = queryParams.offsetLong() + queryParams.limitLong();
return LLUtils.usingSendResource(indexSearchersMono, indexSearchers -> {
if (queryParams.limit() == 0) {
if (queryParams.limitLong() == 0) {
return count.collectMulti(indexSearchersMono, queryParams, keyFieldName, transformer);
} else if (queryParams.isSorted() || queryParams.needsScores()) {
if ((queryParams.isSorted() && !queryParams.isSortedByScore())
|| realLimit <= (long) queryParams.pageLimits().getPageLimit(0)) {
return scoredSimple.collectMulti(indexSearchersMono, queryParams, keyFieldName, transformer);
if (realLimit <= (long) queryParams.pageLimits().getPageLimit(0)) {
return scoredPaged.collectMulti(indexSearchersMono, queryParams, keyFieldName, transformer);
} else {
return unsortedScoredFull.collectMulti(indexSearchersMono, queryParams, keyFieldName, transformer);
if ((queryParams.isSorted() && !queryParams.isSortedByScore())) {
return sortedScoredFull.collectMulti(indexSearchersMono, queryParams, keyFieldName, transformer);
} else {
return unsortedScoredFull.collectMulti(indexSearchersMono, queryParams, keyFieldName, transformer);
}
}
} else if (realLimit <= (long) queryParams.pageLimits().getPageLimit(0)) {
// Run single-page searches using the paged multi searcher
@ -72,6 +78,7 @@ public class AdaptiveMultiSearcher implements MultiSearcher, Closeable {
@Override
public void close() throws IOException {
sortedScoredFull.close();
unsortedScoredFull.close();
}

View File

@ -1,5 +1,7 @@
package it.cavallium.dbengine.lucene.searcher;
import static it.cavallium.dbengine.lucene.LuceneUtils.safeLongToInt;
import it.cavallium.dbengine.lucene.LuceneUtils;
import it.cavallium.dbengine.lucene.PageLimits;
import java.util.Objects;
@ -10,9 +12,31 @@ import org.apache.lucene.search.Sort;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
public record LocalQueryParams(@NotNull Query query, int offset, int limit, @NotNull PageLimits pageLimits,
public record LocalQueryParams(@NotNull Query query, int offsetInt, long offsetLong, int limitInt, long limitLong,
@NotNull PageLimits pageLimits,
@Nullable Float minCompetitiveScore, @Nullable Sort sort, boolean complete) {
public LocalQueryParams(@NotNull Query query,
long offsetLong,
long limitLong,
@NotNull PageLimits pageLimits,
@Nullable Float minCompetitiveScore,
@Nullable Sort sort,
boolean complete) {
this(query, safeLongToInt(offsetLong), offsetLong, safeLongToInt(limitLong), limitLong, pageLimits,
minCompetitiveScore, sort, complete);
}
public LocalQueryParams(@NotNull Query query,
int offsetInt,
int limitInt,
@NotNull PageLimits pageLimits,
@Nullable Float minCompetitiveScore,
@Nullable Sort sort,
boolean complete) {
this(query, offsetInt, offsetInt, limitInt, limitInt, pageLimits, minCompetitiveScore, sort, complete);
}
public boolean isSorted() {
return sort != null;
}
@ -46,7 +70,11 @@ public record LocalQueryParams(@NotNull Query query, int offset, int limit, @Not
}
}
public int getTotalHitsThreshold() {
public int getTotalHitsThresholdInt() {
return LuceneUtils.totalHitsThreshold(this.complete);
}
public long getTotalHitsThresholdLong() {
return LuceneUtils.totalHitsThresholdLong(this.complete);
}
}

View File

@ -0,0 +1,129 @@
package it.cavallium.dbengine.lucene.searcher;
import io.net5.buffer.api.Send;
import it.cavallium.dbengine.database.LLKeyScore;
import it.cavallium.dbengine.database.LLUtils;
import it.cavallium.dbengine.database.disk.LLIndexSearchers;
import it.cavallium.dbengine.database.disk.LLTempLMDBEnv;
import it.cavallium.dbengine.lucene.FullDocs;
import it.cavallium.dbengine.lucene.LLFieldDoc;
import it.cavallium.dbengine.lucene.LuceneUtils;
import it.cavallium.dbengine.lucene.collector.LMDBFullFieldDocCollector;
import it.cavallium.dbengine.lucene.searcher.LLSearchTransformer.TransformerInput;
import java.io.Closeable;
import java.io.IOException;
import java.util.List;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopDocsCollector;
import org.apache.lucene.search.TopFieldCollector;
import org.apache.lucene.search.TopScoreDocCollector;
import org.warp.commonutils.log.Logger;
import org.warp.commonutils.log.LoggerFactory;
import reactor.core.publisher.Flux;
import reactor.core.publisher.Mono;
public class OfficialSearcher implements MultiSearcher, Closeable {
protected static final Logger logger = LoggerFactory.getLogger(OfficialSearcher.class);
private final LLTempLMDBEnv env;
public OfficialSearcher() throws IOException {
this.env = new LLTempLMDBEnv();
}
@Override
public Mono<Send<LuceneSearchResult>> collectMulti(Mono<Send<LLIndexSearchers>> indexSearchersMono,
LocalQueryParams queryParams,
String keyFieldName,
LLSearchTransformer transformer) {
Mono<LocalQueryParams> queryParamsMono;
if (transformer == LLSearchTransformer.NO_TRANSFORMATION) {
queryParamsMono = Mono.just(queryParams);
} else {
queryParamsMono = LLUtils.usingSendResource(indexSearchersMono, indexSearchers -> transformer.transform(Mono
.fromSupplier(() -> new TransformerInput(indexSearchers, queryParams))), true);
}
return queryParamsMono.flatMap(queryParams2 -> LLUtils.usingSendResource(indexSearchersMono, indexSearchers -> this
// Search results
.search(indexSearchers.shards(), queryParams2)
// Compute the results
.transform(fullDocsMono -> this.computeResults(fullDocsMono, indexSearchers,
keyFieldName, queryParams2))
// Ensure that one LuceneSearchResult is always returned
.single(),
false));
}
/**
* Search effectively the raw results
*/
@SuppressWarnings({"unchecked", "rawtypes"})
private Mono<TopDocs> search(Iterable<IndexSearcher> indexSearchers,
LocalQueryParams queryParams) {
return Mono
.fromCallable(() -> {
LLUtils.ensureBlocking();
var totalHitsThreshold = queryParams.getTotalHitsThresholdInt();
if (queryParams.isSorted() && !queryParams.isSortedByScore()) {
return TopFieldCollector.createSharedManager(queryParams.sort(), queryParams.limitInt(), null,
totalHitsThreshold);
} else {
return TopScoreDocCollector.createSharedManager(queryParams.limitInt(), null, totalHitsThreshold);
}
})
.flatMap(sharedManager -> Flux
.fromIterable(indexSearchers)
.flatMap(shard -> Mono.fromCallable(() -> {
LLUtils.ensureBlocking();
var collector = sharedManager.newCollector();
assert queryParams.complete() == collector.scoreMode().isExhaustive();
queryParams.getScoreModeOptional().ifPresent(scoreMode -> {
assert scoreMode == collector.scoreMode();
});
shard.search(queryParams.query(), collector);
return collector;
}))
.collectList()
.flatMap(collectors -> Mono.fromCallable(() -> {
LLUtils.ensureBlocking();
return sharedManager.reduce((List) collectors);
}))
);
}
/**
* Compute the results, extracting useful data
*/
private Mono<Send<LuceneSearchResult>> computeResults(Mono<TopDocs> dataMono,
LLIndexSearchers indexSearchers,
String keyFieldName,
LocalQueryParams queryParams) {
return dataMono.map(data -> {
var totalHitsCount = LuceneUtils.convertTotalHitsCount(data.totalHits);
Flux<LLKeyScore> hitsFlux = LuceneUtils
.convertHits(Flux.fromArray(data.scoreDocs),
indexSearchers.shards(), keyFieldName, true)
.skip(queryParams.offsetLong())
.take(queryParams.limitLong(), true);
return new LuceneSearchResult(totalHitsCount, hitsFlux, indexSearchers::close).send();
});
}
@Override
public void close() throws IOException {
env.close();
}
@Override
public String getName() {
return "official";
}
}

View File

@ -15,10 +15,12 @@ import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.Objects;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopDocsCollector;
import org.apache.lucene.search.TopFieldCollector;
import org.apache.lucene.search.TotalHits;
import org.apache.lucene.search.TotalHits.Relation;
import reactor.core.publisher.Flux;
@ -71,10 +73,10 @@ public class PagedLocalSearcher implements LocalSearcher {
* Get the pagination info
*/
private PaginationInfo getPaginationInfo(LocalQueryParams queryParams) {
if (queryParams.limit() <= MAX_SINGLE_SEARCH_LIMIT) {
return new PaginationInfo(queryParams.limit(), queryParams.offset(), queryParams.pageLimits(), true);
if (queryParams.limitInt() <= MAX_SINGLE_SEARCH_LIMIT) {
return new PaginationInfo(queryParams.limitInt(), queryParams.offsetInt(), queryParams.pageLimits(), true);
} else {
return new PaginationInfo(queryParams.limit(), queryParams.offset(), queryParams.pageLimits(), false);
return new PaginationInfo(queryParams.limitInt(), queryParams.offsetInt(), queryParams.pageLimits(), false);
}
}
@ -109,7 +111,7 @@ public class PagedLocalSearcher implements LocalSearcher {
Flux<LLKeyScore> firstPageHitsFlux = LuceneUtils.convertHits(Flux.fromArray(scoreDocs),
indexSearchers, keyFieldName, true)
.take(queryParams.limit(), true);
.take(queryParams.limitInt(), true);
CurrentPageInfo nextPageInfo = firstPageData.nextPageInfo();
@ -183,7 +185,7 @@ public class PagedLocalSearcher implements LocalSearcher {
TopDocs pageTopDocs;
try {
TopDocsCollector<ScoreDoc> collector = TopDocsCollectorUtils.getTopDocsCollector(queryParams.sort(),
currentPageLimit, s.last(), queryParams.getTotalHitsThreshold(),
currentPageLimit, s.last(), queryParams.getTotalHitsThresholdInt(),
allowPagination, queryParams.needsScores());
assert queryParams.complete() == collector.scoreMode().isExhaustive();
queryParams.getScoreModeOptional().ifPresent(scoreMode -> {
@ -196,6 +198,10 @@ public class PagedLocalSearcher implements LocalSearcher {
} else {
pageTopDocs = collector.topDocs();
}
// Populate scores of topfieldcollector. By default it doesn't popupate the scores
if (queryParams.needsScores() && ((Collector) collector) instanceof TopFieldCollector) {
TopFieldCollector.populateScores(pageTopDocs.scoreDocs, indexSearchers.get(0), queryParams.query());
}
} catch (IOException e) {
sink.error(e);
return EMPTY_STATUS;

View File

@ -12,11 +12,9 @@ import it.cavallium.dbengine.lucene.collector.ScoringShardsCollectorManager;
import it.cavallium.dbengine.lucene.searcher.LLSearchTransformer.TransformerInput;
import java.util.Arrays;
import java.util.List;
import java.util.Objects;
import java.util.concurrent.atomic.AtomicReference;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Sort;
import org.jetbrains.annotations.Nullable;
import org.warp.commonutils.log.Logger;
@ -71,17 +69,17 @@ public class ScoredPagedMultiSearcher implements MultiSearcher {
* Get the pagination info
*/
private PaginationInfo getPaginationInfo(LocalQueryParams queryParams) {
if (queryParams.limit() <= MAX_SINGLE_SEARCH_LIMIT) {
return new PaginationInfo(queryParams.limit(), queryParams.offset(), queryParams.pageLimits(), true);
if (queryParams.limitInt() <= MAX_SINGLE_SEARCH_LIMIT) {
return new PaginationInfo(queryParams.limitInt(), queryParams.offsetInt(), queryParams.pageLimits(), true);
} else {
return new PaginationInfo(queryParams.limit(), queryParams.offset(), queryParams.pageLimits(), false);
return new PaginationInfo(queryParams.limitInt(), queryParams.offsetInt(), queryParams.pageLimits(), false);
}
}
/**
* Search effectively the raw results of the first page
*/
private Mono<PageData> searchFirstPage(Iterable<IndexSearcher> indexSearchers,
private Mono<PageData> searchFirstPage(List<IndexSearcher> indexSearchers,
LocalQueryParams queryParams,
PaginationInfo paginationInfo) {
var limit = paginationInfo.totalLimit();
@ -107,7 +105,7 @@ public class ScoredPagedMultiSearcher implements MultiSearcher {
Flux<LLKeyScore> firstPageHitsFlux = LuceneUtils.convertHits(Flux.fromArray(scoreDocs),
indexSearchers.shards(), keyFieldName, true)
.take(queryParams.limit(), true);
.take(queryParams.limitInt(), true);
CurrentPageInfo nextPageInfo = firstPageData.nextPageInfo();
@ -160,7 +158,7 @@ public class ScoredPagedMultiSearcher implements MultiSearcher {
* skip the first n results in the first page
*/
private Mono<PageData> searchPage(LocalQueryParams queryParams,
Iterable<IndexSearcher> indexSearchers,
List<IndexSearcher> indexSearchers,
boolean allowPagination,
PageLimits pageLimits,
int resultsOffset,
@ -172,11 +170,12 @@ public class ScoredPagedMultiSearcher implements MultiSearcher {
throw new IndexOutOfBoundsException(resultsOffset);
}
if (s.pageIndex() == 0 || (s.last() != null && s.remainingLimit() > 0)) {
var query = queryParams.query();
@Nullable var sort = getSort(queryParams);
var pageLimit = pageLimits.getPageLimit(s.pageIndex());
var after = (FieldDoc) s.last();
var totalHitsThreshold = queryParams.getTotalHitsThreshold();
return new ScoringShardsCollectorManager(sort, pageLimit, after, totalHitsThreshold,
var totalHitsThreshold = queryParams.getTotalHitsThresholdInt();
return new ScoringShardsCollectorManager(query, sort, pageLimit, after, totalHitsThreshold,
resultsOffset);
} else {
return null;
@ -199,7 +198,9 @@ public class ScoredPagedMultiSearcher implements MultiSearcher {
.collectList()
.flatMap(collectors -> Mono.fromCallable(() -> {
LLUtils.ensureBlocking();
sharedManager.setIndexSearchers(indexSearchers);
var pageTopDocs = sharedManager.reduce(collectors);
var pageLastDoc = LuceneUtils.getLastScoreDoc(pageTopDocs.scoreDocs);
long nextRemainingLimit;
if (allowPagination) {

View File

@ -0,0 +1,119 @@
package it.cavallium.dbengine.lucene.searcher;
import io.net5.buffer.api.Send;
import it.cavallium.dbengine.database.LLKeyScore;
import it.cavallium.dbengine.database.LLUtils;
import it.cavallium.dbengine.database.disk.LLIndexSearchers;
import it.cavallium.dbengine.database.disk.LLTempLMDBEnv;
import it.cavallium.dbengine.lucene.FullDocs;
import it.cavallium.dbengine.lucene.LLFieldDoc;
import it.cavallium.dbengine.lucene.LLScoreDoc;
import it.cavallium.dbengine.lucene.LuceneUtils;
import it.cavallium.dbengine.lucene.collector.LMDBFullFieldDocCollector;
import it.cavallium.dbengine.lucene.collector.LMDBFullScoreDocCollector;
import it.cavallium.dbengine.lucene.searcher.LLSearchTransformer.TransformerInput;
import java.io.Closeable;
import java.io.IOException;
import org.apache.lucene.search.IndexSearcher;
import org.warp.commonutils.log.Logger;
import org.warp.commonutils.log.LoggerFactory;
import reactor.core.publisher.Flux;
import reactor.core.publisher.Mono;
public class SortedScoredFullMultiSearcher implements MultiSearcher, Closeable {
protected static final Logger logger = LoggerFactory.getLogger(SortedScoredFullMultiSearcher.class);
private final LLTempLMDBEnv env;
public SortedScoredFullMultiSearcher() throws IOException {
this.env = new LLTempLMDBEnv();
}
@Override
public Mono<Send<LuceneSearchResult>> collectMulti(Mono<Send<LLIndexSearchers>> indexSearchersMono,
LocalQueryParams queryParams,
String keyFieldName,
LLSearchTransformer transformer) {
Mono<LocalQueryParams> queryParamsMono;
if (transformer == LLSearchTransformer.NO_TRANSFORMATION) {
queryParamsMono = Mono.just(queryParams);
} else {
queryParamsMono = LLUtils.usingSendResource(indexSearchersMono, indexSearchers -> transformer.transform(Mono
.fromSupplier(() -> new TransformerInput(indexSearchers, queryParams))), true);
}
return queryParamsMono.flatMap(queryParams2 -> LLUtils.usingSendResource(indexSearchersMono, indexSearchers -> this
// Search results
.search(indexSearchers.shards(), queryParams2)
// Compute the results
.transform(fullDocsMono -> this.computeResults(fullDocsMono, indexSearchers,
keyFieldName, queryParams2))
// Ensure that one LuceneSearchResult is always returned
.single(),
false));
}
/**
* Search effectively the raw results
*/
private Mono<FullDocs<LLFieldDoc>> search(Iterable<IndexSearcher> indexSearchers,
LocalQueryParams queryParams) {
return Mono
.fromCallable(() -> {
LLUtils.ensureBlocking();
var totalHitsThreshold = queryParams.getTotalHitsThresholdLong();
return LMDBFullFieldDocCollector.createSharedManager(env, queryParams.sort(), queryParams.limitInt(),
totalHitsThreshold);
})
.flatMap(sharedManager -> Flux
.fromIterable(indexSearchers)
.flatMap(shard -> Mono.fromCallable(() -> {
LLUtils.ensureBlocking();
var collector = sharedManager.newCollector();
assert queryParams.complete() == collector.scoreMode().isExhaustive();
queryParams.getScoreModeOptional().ifPresent(scoreMode -> {
assert scoreMode == collector.scoreMode();
});
shard.search(queryParams.query(), collector);
return collector;
}))
.collectList()
.flatMap(collectors -> Mono.fromCallable(() -> {
LLUtils.ensureBlocking();
return sharedManager.reduce(collectors);
}))
);
}
/**
* Compute the results, extracting useful data
*/
private Mono<Send<LuceneSearchResult>> computeResults(Mono<FullDocs<LLFieldDoc>> dataMono,
LLIndexSearchers indexSearchers,
String keyFieldName,
LocalQueryParams queryParams) {
return dataMono.map(data -> {
var totalHitsCount = LuceneUtils.convertTotalHitsCount(data.totalHits());
Flux<LLKeyScore> hitsFlux = LuceneUtils
.convertHits(data.iterate(queryParams.offsetLong()).map(LLFieldDoc::toFieldDoc),
indexSearchers.shards(), keyFieldName, true)
.take(queryParams.limitLong(), true);
return new LuceneSearchResult(totalHitsCount, hitsFlux, indexSearchers::close).send();
});
}
@Override
public void close() throws IOException {
env.close();
}
@Override
public String getName() {
return "sorted scored full multi";
}
}

View File

@ -69,8 +69,8 @@ public class UnsortedScoredFullMultiSearcher implements MultiSearcher, Closeable
return Mono
.fromCallable(() -> {
LLUtils.ensureBlocking();
var totalHitsThreshold = queryParams.getTotalHitsThreshold();
return LMDBFullScoreDocCollector.createSharedManager(env, queryParams.limit(), totalHitsThreshold);
var totalHitsThreshold = queryParams.getTotalHitsThresholdLong();
return LMDBFullScoreDocCollector.createSharedManager(env, queryParams.limitLong(), totalHitsThreshold);
})
.flatMap(sharedManager -> Flux
.fromIterable(indexSearchers)
@ -105,9 +105,9 @@ public class UnsortedScoredFullMultiSearcher implements MultiSearcher, Closeable
var totalHitsCount = LuceneUtils.convertTotalHitsCount(data.totalHits());
Flux<LLKeyScore> hitsFlux = LuceneUtils
.convertHits(data.iterate(queryParams.offset()).map(LLScoreDoc::toScoreDoc),
.convertHits(data.iterate(queryParams.offsetLong()).map(LLScoreDoc::toScoreDoc),
indexSearchers.shards(), keyFieldName, true)
.take(queryParams.limit(), true);
.take(queryParams.limitLong(), true);
return new LuceneSearchResult(totalHitsCount, hitsFlux, indexSearchers::close).send();
});

View File

@ -42,11 +42,11 @@ public class UnsortedUnscoredSimpleMultiSearcher implements MultiSearcher {
return Mono
.fromRunnable(() -> {
LLUtils.ensureBlocking();
if (queryParams2.isSorted() && queryParams2.limit() > 0) {
if (queryParams2.isSorted() && queryParams2.limitLong() > 0) {
throw new UnsupportedOperationException("Sorted queries are not supported"
+ " by SimpleUnsortedUnscoredLuceneMultiSearcher");
}
if (queryParams2.needsScores() && queryParams2.limit() > 0) {
if (queryParams2.needsScores() && queryParams2.limitLong() > 0) {
throw new UnsupportedOperationException("Scored queries are not supported"
+ " by SimpleUnsortedUnscoredLuceneMultiSearcher");
}
@ -73,8 +73,8 @@ public class UnsortedUnscoredSimpleMultiSearcher implements MultiSearcher {
var totalHitsCount = new TotalHitsCount(totalHitsCountValue, exactTotalHitsCount);
Flux<LLKeyScore> mergedFluxes = Flux
.merge(resultsFluxes)
.skip(queryParams2.offset())
.take(queryParams2.limit(), true);
.skip(queryParams2.offsetLong())
.take(queryParams2.limitLong(), true);
return new LuceneSearchResult(totalHitsCount, mergedFluxes, () -> {
for (LuceneSearchResult luceneSearchResult : resultsToDrop) {
@ -92,8 +92,8 @@ public class UnsortedUnscoredSimpleMultiSearcher implements MultiSearcher {
private LocalQueryParams getLocalQueryParams(LocalQueryParams queryParams) {
return new LocalQueryParams(queryParams.query(),
0,
LuceneUtils.safeLongToInt((long) queryParams.offset() + (long) queryParams.limit()),
0L,
queryParams.offsetLong() + queryParams.limitLong(),
queryParams.pageLimits(),
queryParams.minCompetitiveScore(),
queryParams.sort(),

View File

@ -50,11 +50,11 @@ public class UnsortedUnscoredStreamingMultiSearcher implements MultiSearcher {
return queryParamsMono
.flatMap(queryParams2 -> {
var localQueryParams = getLocalQueryParams(queryParams2);
if (queryParams2.isSorted() && queryParams2.limit() > 0) {
if (queryParams2.isSorted() && queryParams2.limitLong() > 0) {
return Mono.error(new UnsupportedOperationException("Sorted queries are not supported"
+ " by UnsortedUnscoredContinuousLuceneMultiSearcher"));
}
if (queryParams2.needsScores() && queryParams2.limit() > 0) {
if (queryParams2.needsScores() && queryParams2.limitLong() > 0) {
return Mono.error(new UnsupportedOperationException("Scored queries are not supported"
+ " by UnsortedUnscoredContinuousLuceneMultiSearcher"));
}
@ -101,8 +101,8 @@ public class UnsortedUnscoredStreamingMultiSearcher implements MultiSearcher {
var totalHitsCount = new TotalHitsCount(0, false);
Flux<LLKeyScore> mergedFluxes = resultsFlux
.skip(queryParams2.offset())
.take(queryParams2.limit(), true);
.skip(queryParams2.offsetLong())
.take(queryParams2.limitLong(), true);
return new LuceneSearchResult(totalHitsCount, mergedFluxes, indexSearchers::close).send();
});
@ -112,8 +112,8 @@ public class UnsortedUnscoredStreamingMultiSearcher implements MultiSearcher {
private LocalQueryParams getLocalQueryParams(LocalQueryParams queryParams) {
return new LocalQueryParams(queryParams.query(),
0,
LuceneUtils.safeLongToInt((long) queryParams.offset() + (long) queryParams.limit()),
0L,
queryParams.offsetLong() + queryParams.limitLong(),
queryParams.pageLimits(),
queryParams.minCompetitiveScore(),
queryParams.sort(),

View File

@ -7,6 +7,7 @@ import static it.cavallium.dbengine.SyncUtils.*;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.fail;
import io.net5.buffer.PooledByteBufAllocator;
import it.cavallium.dbengine.DbTestUtils.TempDb;
import it.cavallium.dbengine.DbTestUtils.TestAllocator;
import it.cavallium.dbengine.client.LuceneIndex;
@ -30,8 +31,10 @@ import it.cavallium.dbengine.lucene.searcher.AdaptiveMultiSearcher;
import it.cavallium.dbengine.lucene.searcher.CountLocalSearcher;
import it.cavallium.dbengine.lucene.searcher.LocalSearcher;
import it.cavallium.dbengine.lucene.searcher.MultiSearcher;
import it.cavallium.dbengine.lucene.searcher.OfficialSearcher;
import it.cavallium.dbengine.lucene.searcher.ScoredPagedMultiSearcher;
import it.cavallium.dbengine.lucene.searcher.PagedLocalSearcher;
import it.cavallium.dbengine.lucene.searcher.SortedScoredFullMultiSearcher;
import it.cavallium.dbengine.lucene.searcher.UnsortedUnscoredSimpleMultiSearcher;
import it.cavallium.dbengine.lucene.searcher.UnsortedScoredFullMultiSearcher;
import it.cavallium.dbengine.lucene.searcher.UnsortedUnscoredStreamingMultiSearcher;
@ -76,6 +79,9 @@ public class TestLuceneSearches {
private static final Map<String, String> ELEMENTS;
static {
// Start the pool by creating and deleting a direct buffer
PooledByteBufAllocator.DEFAULT.directBuffer().release();
var modifiableElements = new HashMap<String, String>();
modifiableElements.put("test-key-1", "0123456789");
modifiableElements.put("test-key-2", "test 0123456789 test word");
@ -148,7 +154,9 @@ public class TestLuceneSearches {
sink.next(new UnsortedUnscoredSimpleMultiSearcher(new CountLocalSearcher()));
} else {
sink.next(new ScoredPagedMultiSearcher());
if (!info.sorted() || info.sortedByScore()) {
if (info.sorted() && !info.sortedByScore()) {
sink.next(new SortedScoredFullMultiSearcher());
} else {
sink.next(new UnsortedScoredFullMultiSearcher());
}
if (!info.sorted()) {
@ -183,15 +191,12 @@ public class TestLuceneSearches {
.toStream();
}
private static void runSearchers(ExpectedQueryType expectedQueryType, FailableConsumer<LocalSearcher, Throwable> consumer) {
private static void runSearchers(ExpectedQueryType expectedQueryType, FailableConsumer<LocalSearcher, Throwable> consumer)
throws Throwable {
var searchers = run(getSearchers(expectedQueryType).collectList());
for (LocalSearcher searcher : searchers) {
log.info("Using searcher \"{}\"", searcher.getName());
try {
consumer.accept(searcher);
} catch (Throwable e) {
Assertions.fail(e);
}
consumer.accept(searcher);
}
}
@ -248,7 +253,7 @@ public class TestLuceneSearches {
@ParameterizedTest
@MethodSource("provideQueryArgumentsScoreModeAndSort")
public void testSearchNoDocs(boolean shards, MultiSort<SearchResultKey<String>> multiSort) {
public void testSearchNoDocs(boolean shards, MultiSort<SearchResultKey<String>> multiSort) throws Throwable {
var sorted = multiSort.isSorted();
var sortedByScore = multiSort.getQuerySort().getBasicType$() == BasicType.ScoreSort;
runSearchers(new ExpectedQueryType(shards, sorted, sortedByScore, true, false), searcher -> {
@ -271,7 +276,7 @@ public class TestLuceneSearches {
@ParameterizedTest
@MethodSource("provideQueryArgumentsScoreModeAndSort")
public void testSearchAllDocs(boolean shards, MultiSort<SearchResultKey<String>> multiSort) {
public void testSearchAllDocs(boolean shards, MultiSort<SearchResultKey<String>> multiSort) throws Throwable {
var sorted = multiSort.isSorted();
var sortedByScore = multiSort.getQuerySort().getBasicType$() == BasicType.ScoreSort;
runSearchers(new ExpectedQueryType(shards, sorted, sortedByScore, true, false), (LocalSearcher searcher) -> {
@ -287,12 +292,21 @@ public class TestLuceneSearches {
assertHitsIfPossible(ELEMENTS.size(), hits);
var keys = getResults(results);
assertResults(ELEMENTS.keySet().stream().toList(), keys, false, sortedByScore);
var officialSearcher = new OfficialSearcher();
luceneIndex = getLuceneIndex(shards, officialSearcher);
var officialQuery = queryBuilder.limit(ELEMENTS.size() * 2L).build();
try (var officialResults = run(luceneIndex.search(officialQuery)).receive()) {
var officialKeys = getResults(officialResults).stream().toList();
assertResults(officialKeys, keys, sorted, sortedByScore);
}
}
});
}
private void assertResults(List<String> expectedKeys, List<Scored> resultKeys, boolean sorted, boolean sortedByScore) {
private void assertResults(List<Scored> expectedKeys, List<Scored> resultKeys, boolean sorted, boolean sortedByScore) {
if (sortedByScore) {
float lastScore = Float.NEGATIVE_INFINITY;
@ -304,12 +318,14 @@ public class TestLuceneSearches {
}
}
if (!sorted) {
var results = resultKeys.stream().map(Scored::key).collect(Collectors.toSet());
Assertions.assertEquals(new HashSet<>(expectedKeys), results);
} else {
if (sortedByScore) {
Assertions.assertEquals(expectedKeys, resultKeys);
} else if (sorted) {
var results = resultKeys.stream().map(Scored::key).toList();
Assertions.assertEquals(expectedKeys, results);
Assertions.assertEquals(expectedKeys.stream().map(Scored::key).toList(), results);
} else {
var results = resultKeys.stream().map(Scored::key).collect(Collectors.toSet());
Assertions.assertEquals(new HashSet<>(expectedKeys.stream().map(Scored::key).toList()), results);
}
}