Test huge queries
This commit is contained in:
parent
4a08a876ca
commit
eb02e0f18d
|
@ -24,7 +24,8 @@ public class EmptyPriorityQueue<T> implements PriorityQueue<T> {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void replaceTop(T newTop) {
|
public void replaceTop(T oldTop, T newTop) {
|
||||||
|
assert oldTop == null;
|
||||||
assert newTop == null;
|
assert newTop == null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -34,9 +34,7 @@ public interface FullDocs<T extends LLDoc> extends ResourceIterable<T> {
|
||||||
FullDocs<T> docs = new FullDocs<>() {
|
FullDocs<T> docs = new FullDocs<>() {
|
||||||
@Override
|
@Override
|
||||||
public void close() {
|
public void close() {
|
||||||
for (FullDocs<T> fullDoc : fullDocs) {
|
mergedIterable.close();
|
||||||
fullDoc.close();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -78,7 +76,7 @@ public interface FullDocs<T extends LLDoc> extends ResourceIterable<T> {
|
||||||
static <T extends LLDoc> ResourceIterable<T> mergeResourceIterable(
|
static <T extends LLDoc> ResourceIterable<T> mergeResourceIterable(
|
||||||
@Nullable Sort sort,
|
@Nullable Sort sort,
|
||||||
FullDocs<T>[] fullDocs) {
|
FullDocs<T>[] fullDocs) {
|
||||||
return new ResourceIterable<T>() {
|
return new ResourceIterable<>() {
|
||||||
@Override
|
@Override
|
||||||
public void close() {
|
public void close() {
|
||||||
for (FullDocs<T> fullDoc : fullDocs) {
|
for (FullDocs<T> fullDoc : fullDocs) {
|
||||||
|
@ -138,7 +136,7 @@ public interface FullDocs<T extends LLDoc> extends ResourceIterable<T> {
|
||||||
@SuppressWarnings("unchecked") Flux<T>[] fluxes = new Flux[fullDocs.length];
|
@SuppressWarnings("unchecked") Flux<T>[] fluxes = new Flux[fullDocs.length];
|
||||||
for (int i = 0; i < iterables.length; i++) {
|
for (int i = 0; i < iterables.length; i++) {
|
||||||
var shardIndex = i;
|
var shardIndex = i;
|
||||||
fluxes[i] = iterables[i].<T>map(shard -> {
|
fluxes[i] = iterables[i].map(shard -> {
|
||||||
if (shard instanceof LLScoreDoc scoreDoc) {
|
if (shard instanceof LLScoreDoc scoreDoc) {
|
||||||
//noinspection unchecked
|
//noinspection unchecked
|
||||||
return (T) new LLScoreDoc(scoreDoc.doc(), scoreDoc.score(), shardIndex);
|
return (T) new LLScoreDoc(scoreDoc.doc(), scoreDoc.score(), shardIndex);
|
||||||
|
|
|
@ -10,7 +10,6 @@ import it.cavallium.dbengine.database.disk.RocksIterWithReadOpts;
|
||||||
import it.cavallium.dbengine.database.disk.StandardRocksDBColumn;
|
import it.cavallium.dbengine.database.disk.StandardRocksDBColumn;
|
||||||
import it.cavallium.dbengine.database.disk.UpdateAtomicResultMode;
|
import it.cavallium.dbengine.database.disk.UpdateAtomicResultMode;
|
||||||
import it.cavallium.dbengine.database.disk.UpdateAtomicResultPrevious;
|
import it.cavallium.dbengine.database.disk.UpdateAtomicResultPrevious;
|
||||||
import it.cavallium.dbengine.database.disk.rocksdb.RocksIteratorObj;
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
@ -42,6 +41,9 @@ public class HugePqPriorityQueue<T> implements PriorityQueue<T>, Reversable<Reve
|
||||||
|
|
||||||
private long size = 0;
|
private long size = 0;
|
||||||
|
|
||||||
|
private T cachedTop;
|
||||||
|
private boolean cachedTopSet = false;
|
||||||
|
|
||||||
public HugePqPriorityQueue(LLTempHugePqEnv env, HugePqCodec<T> codec) {
|
public HugePqPriorityQueue(LLTempHugePqEnv env, HugePqCodec<T> codec) {
|
||||||
this.tempEnv = env;
|
this.tempEnv = env;
|
||||||
this.env = env.getEnv();
|
this.env = env.getEnv();
|
||||||
|
@ -74,6 +76,7 @@ public class HugePqPriorityQueue<T> implements PriorityQueue<T>, Reversable<Reve
|
||||||
public void add(T element) {
|
public void add(T element) {
|
||||||
ensureThread();
|
ensureThread();
|
||||||
|
|
||||||
|
cachedTopSet = false;
|
||||||
var keyBuf = serializeKey(element);
|
var keyBuf = serializeKey(element);
|
||||||
try (keyBuf) {
|
try (keyBuf) {
|
||||||
try (var readOptions = newReadOptions(); var writeOptions = newWriteOptions()) {
|
try (var readOptions = newReadOptions(); var writeOptions = newWriteOptions()) {
|
||||||
|
@ -107,6 +110,9 @@ public class HugePqPriorityQueue<T> implements PriorityQueue<T>, Reversable<Reve
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public T top() {
|
public T top() {
|
||||||
|
if (cachedTopSet) {
|
||||||
|
return cachedTop;
|
||||||
|
}
|
||||||
ensureThread();
|
ensureThread();
|
||||||
return databaseTop();
|
return databaseTop();
|
||||||
}
|
}
|
||||||
|
@ -118,9 +124,14 @@ public class HugePqPriorityQueue<T> implements PriorityQueue<T>, Reversable<Reve
|
||||||
if (it.isValid()) {
|
if (it.isValid()) {
|
||||||
var key = it.key();
|
var key = it.key();
|
||||||
try (var keyBuf = rocksDB.getAllocator().copyOf(key)) {
|
try (var keyBuf = rocksDB.getAllocator().copyOf(key)) {
|
||||||
return deserializeKey(keyBuf);
|
var top = deserializeKey(keyBuf);
|
||||||
|
cachedTop = top;
|
||||||
|
cachedTopSet = true;
|
||||||
|
return top;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
cachedTop = null;
|
||||||
|
cachedTopSet = true;
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
} catch (RocksDBException e) {
|
} catch (RocksDBException e) {
|
||||||
|
@ -131,6 +142,7 @@ public class HugePqPriorityQueue<T> implements PriorityQueue<T>, Reversable<Reve
|
||||||
@Override
|
@Override
|
||||||
public T pop() {
|
public T pop() {
|
||||||
ensureThread();
|
ensureThread();
|
||||||
|
cachedTopSet = false;
|
||||||
try (var readOptions = newReadOptions();
|
try (var readOptions = newReadOptions();
|
||||||
var writeOptions = newWriteOptions();
|
var writeOptions = newWriteOptions();
|
||||||
var it = rocksDB.newRocksIterator(true, readOptions, LLRange.all(), false)) {
|
var it = rocksDB.newRocksIterator(true, readOptions, LLRange.all(), false)) {
|
||||||
|
@ -177,10 +189,36 @@ public class HugePqPriorityQueue<T> implements PriorityQueue<T>, Reversable<Reve
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void replaceTop(T newTop) {
|
public void replaceTop(T oldTop, T newTop) {
|
||||||
ensureThread();
|
ensureThread();
|
||||||
this.pop();
|
cachedTopSet = false;
|
||||||
this.add(newTop);
|
if (oldTop == null) {
|
||||||
|
add(newTop);
|
||||||
|
cachedTop = newTop;
|
||||||
|
cachedTopSet = true;
|
||||||
|
} else {
|
||||||
|
try (var readOptions = newReadOptions();
|
||||||
|
var writeOptions = newWriteOptions();
|
||||||
|
var oldKeyBuf = serializeKey(oldTop);
|
||||||
|
var newKeyBuf = serializeKey(newTop);
|
||||||
|
var ignored = rocksDB.updateAtomic(readOptions,
|
||||||
|
writeOptions,
|
||||||
|
oldKeyBuf,
|
||||||
|
this::reduceOrRemove,
|
||||||
|
UpdateAtomicResultMode.NOTHING
|
||||||
|
);
|
||||||
|
var ignored2 = rocksDB.updateAtomic(readOptions,
|
||||||
|
writeOptions,
|
||||||
|
newKeyBuf,
|
||||||
|
this::incrementOrAdd,
|
||||||
|
UpdateAtomicResultMode.NOTHING
|
||||||
|
)) {
|
||||||
|
cachedTop = newTop;
|
||||||
|
cachedTopSet = true;
|
||||||
|
} catch (IOException ex) {
|
||||||
|
throw new IllegalStateException(ex);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -192,6 +230,7 @@ public class HugePqPriorityQueue<T> implements PriorityQueue<T>, Reversable<Reve
|
||||||
@Override
|
@Override
|
||||||
public void clear() {
|
public void clear() {
|
||||||
ensureThread();
|
ensureThread();
|
||||||
|
cachedTopSet = false;
|
||||||
try (var wb = new WriteBatch(); var wo = newWriteOptions()) {
|
try (var wb = new WriteBatch(); var wo = newWriteOptions()) {
|
||||||
wb.deleteRange(rocksDB.getColumnFamilyHandle(), new byte[0], getBiggestKey());
|
wb.deleteRange(rocksDB.getColumnFamilyHandle(), new byte[0], getBiggestKey());
|
||||||
size = 0;
|
size = 0;
|
||||||
|
@ -211,6 +250,7 @@ public class HugePqPriorityQueue<T> implements PriorityQueue<T>, Reversable<Reve
|
||||||
public boolean remove(@NotNull T element) {
|
public boolean remove(@NotNull T element) {
|
||||||
ensureThread();
|
ensureThread();
|
||||||
Objects.requireNonNull(element);
|
Objects.requireNonNull(element);
|
||||||
|
cachedTopSet = false;
|
||||||
try (var readOptions = newReadOptions();
|
try (var readOptions = newReadOptions();
|
||||||
var writeOptions = newWriteOptions();
|
var writeOptions = newWriteOptions();
|
||||||
var keyBuf = serializeKey(element)) {
|
var keyBuf = serializeKey(element)) {
|
||||||
|
@ -294,7 +334,11 @@ public class HugePqPriorityQueue<T> implements PriorityQueue<T>, Reversable<Reve
|
||||||
}
|
}
|
||||||
|
|
||||||
return t;
|
return t;
|
||||||
}, RocksIterWithReadOpts::close).concatMapIterable(item -> item);
|
}, rocksIterWithReadOpts -> {
|
||||||
|
if (rocksIterWithReadOpts != null) {
|
||||||
|
rocksIterWithReadOpts.close();
|
||||||
|
}
|
||||||
|
}).concatMapIterable(item -> item);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -309,7 +353,6 @@ public class HugePqPriorityQueue<T> implements PriorityQueue<T>, Reversable<Reve
|
||||||
@Override
|
@Override
|
||||||
public void close() {
|
public void close() {
|
||||||
if (closed.compareAndSet(false, true)) {
|
if (closed.compareAndSet(false, true)) {
|
||||||
ensureThread();
|
|
||||||
this.tempEnv.freeDb(hugePqId);
|
this.tempEnv.freeDb(hugePqId);
|
||||||
if (this.codec instanceof SafeCloseable closeable) {
|
if (this.codec instanceof SafeCloseable closeable) {
|
||||||
closeable.close();
|
closeable.close();
|
||||||
|
|
|
@ -8,6 +8,6 @@ class LLDocElementScoreComparator implements Comparator<LLDoc> {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int compare(LLDoc hitA, LLDoc hitB) {
|
public int compare(LLDoc hitA, LLDoc hitB) {
|
||||||
return Float.compare(hitA.score(), hitB.score());
|
return Float.compare(hitB.score(), hitA.score());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,7 +23,7 @@ public interface PriorityQueue<T> extends ResourceIterable<T>, SafeCloseable {
|
||||||
/**
|
/**
|
||||||
* Replace the top of the pq with {@code newTop}
|
* Replace the top of the pq with {@code newTop}
|
||||||
*/
|
*/
|
||||||
void replaceTop(T newTop);
|
void replaceTop(T oldTop, T newTop);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the number of elements currently stored in the PriorityQueue.
|
* Returns the number of elements currently stored in the PriorityQueue.
|
||||||
|
|
|
@ -471,7 +471,7 @@ public abstract class HugePqFullFieldDocCollector extends
|
||||||
final void updateBottom(int doc) {
|
final void updateBottom(int doc) {
|
||||||
// bottom.score is already set to Float.NaN in add().
|
// bottom.score is already set to Float.NaN in add().
|
||||||
var bottom = pq.top();
|
var bottom = pq.top();
|
||||||
pq.replaceTop(new LLSlotDoc(docBase + doc, bottom.score(), bottom.shardIndex(), bottom.slot()));
|
pq.replaceTop(bottom, new LLSlotDoc(docBase + doc, bottom.score(), bottom.shardIndex(), bottom.slot()));
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -33,6 +33,7 @@ import org.apache.lucene.search.CollectorManager;
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
import org.apache.lucene.search.LeafCollector;
|
import org.apache.lucene.search.LeafCollector;
|
||||||
import org.apache.lucene.search.Scorable;
|
import org.apache.lucene.search.Scorable;
|
||||||
|
import org.apache.lucene.search.ScoreDoc;
|
||||||
import org.apache.lucene.search.ScoreMode;
|
import org.apache.lucene.search.ScoreMode;
|
||||||
import org.apache.lucene.search.TotalHits;
|
import org.apache.lucene.search.TotalHits;
|
||||||
import org.jetbrains.annotations.NotNull;
|
import org.jetbrains.annotations.NotNull;
|
||||||
|
@ -106,8 +107,7 @@ public abstract class HugePqFullScoreDocCollector extends
|
||||||
|
|
||||||
// If there is a limit, and it's reached, use the replacement logic
|
// If there is a limit, and it's reached, use the replacement logic
|
||||||
if (limit != null && pq.size() >= limit) {
|
if (limit != null && pq.size() >= limit) {
|
||||||
var pqTop = pq.top();
|
if (pq.top() != null && score <= pq.top().score()) {
|
||||||
if (pqTop != null && score <= pqTop.score()) {
|
|
||||||
if (totalHitsRelation == TotalHits.Relation.EQUAL_TO) {
|
if (totalHitsRelation == TotalHits.Relation.EQUAL_TO) {
|
||||||
// we just reached totalHitsThreshold, we can start setting the min
|
// we just reached totalHitsThreshold, we can start setting the min
|
||||||
// competitive score now
|
// competitive score now
|
||||||
|
@ -119,7 +119,7 @@ public abstract class HugePqFullScoreDocCollector extends
|
||||||
return;
|
return;
|
||||||
} else {
|
} else {
|
||||||
// Remove the top element, then add the following element
|
// Remove the top element, then add the following element
|
||||||
pq.replaceTop(new LLScoreDoc(doc + docBase, score, -1));
|
pq.replaceTop(pq.top(), new LLScoreDoc(doc + docBase, score, -1));
|
||||||
// The minimum competitive score will be updated later
|
// The minimum competitive score will be updated later
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -296,13 +296,12 @@ public abstract class HugePqFullScoreDocCollector extends
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void updateMinCompetitiveScore(Scorable scorer) throws IOException {
|
protected void updateMinCompetitiveScore(Scorable scorer) throws IOException {
|
||||||
var pqTop = pq.top();
|
|
||||||
if (hitsThresholdChecker.isThresholdReached(true)
|
if (hitsThresholdChecker.isThresholdReached(true)
|
||||||
&& pqTop != null
|
&& pq.top() != null
|
||||||
&& pqTop.score() != Float.NEGATIVE_INFINITY) { // -Infinity is the score of sentinels
|
&& pq.top().score() != Float.NEGATIVE_INFINITY) { // -Infinity is the score of sentinels
|
||||||
// since we tie-break on doc id and collect in doc id order, we can require
|
// since we tie-break on doc id and collect in doc id order, we can require
|
||||||
// the next float
|
// the next float
|
||||||
float localMinScore = Math.nextUp(pqTop.score());
|
float localMinScore = Math.nextUp(pq.top().score());
|
||||||
if (localMinScore > minCompetitiveScore) {
|
if (localMinScore > minCompetitiveScore) {
|
||||||
scorer.setMinCompetitiveScore(localMinScore);
|
scorer.setMinCompetitiveScore(localMinScore);
|
||||||
totalHitsRelation = TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO;
|
totalHitsRelation = TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO;
|
||||||
|
@ -311,7 +310,7 @@ public abstract class HugePqFullScoreDocCollector extends
|
||||||
// we don't use the next float but we register the document
|
// we don't use the next float but we register the document
|
||||||
// id so that other leaves can require it if they are after
|
// id so that other leaves can require it if they are after
|
||||||
// the current maximum
|
// the current maximum
|
||||||
minScoreAcc.accumulate(docBase, pqTop.score());
|
minScoreAcc.accumulate(docBase, pq.top().score());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -8,6 +8,7 @@ import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.NoSuchElementException;
|
import java.util.NoSuchElementException;
|
||||||
|
import java.util.Optional;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.index.TermStates;
|
import org.apache.lucene.index.TermStates;
|
||||||
|
@ -16,17 +17,16 @@ import org.apache.lucene.search.IndexSearcher;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.search.QueryVisitor;
|
import org.apache.lucene.search.QueryVisitor;
|
||||||
import org.apache.lucene.search.TermStatistics;
|
import org.apache.lucene.search.TermStatistics;
|
||||||
|
import org.jetbrains.annotations.Nullable;
|
||||||
|
|
||||||
public class ShardIndexSearcher extends IndexSearcher {
|
public class ShardIndexSearcher extends IndexSearcher {
|
||||||
public final int myNodeID;
|
public final int myNodeID;
|
||||||
private final IndexSearcher[] searchers;
|
private final IndexSearcher[] searchers;
|
||||||
|
|
||||||
private final Map<FieldAndShar, CollectionStatistics> collectionStatsCache;
|
private final Map<FieldAndShar, CachedCollectionStatistics> collectionStatsCache;
|
||||||
private final Map<TermAndShard, TermStatistics> termStatsCache;
|
private final Map<TermAndShard, TermStatistics> termStatsCache;
|
||||||
|
|
||||||
public ShardIndexSearcher(SharedShardStatistics sharedShardStatistics,
|
public ShardIndexSearcher(SharedShardStatistics sharedShardStatistics, List<IndexSearcher> searchers, int nodeID) {
|
||||||
List<IndexSearcher> searchers,
|
|
||||||
int nodeID) {
|
|
||||||
super(searchers.get(nodeID).getIndexReader(), searchers.get(nodeID).getExecutor());
|
super(searchers.get(nodeID).getIndexReader(), searchers.get(nodeID).getExecutor());
|
||||||
this.collectionStatsCache = sharedShardStatistics.collectionStatsCache;
|
this.collectionStatsCache = sharedShardStatistics.collectionStatsCache;
|
||||||
this.termStatsCache = sharedShardStatistics.termStatsCache;
|
this.termStatsCache = sharedShardStatistics.termStatsCache;
|
||||||
|
@ -93,15 +93,13 @@ public class ShardIndexSearcher extends IndexSearcher {
|
||||||
|
|
||||||
final Set<Term> missing = new HashSet<>();
|
final Set<Term> missing = new HashSet<>();
|
||||||
for (Term term : terms) {
|
for (Term term : terms) {
|
||||||
final TermAndShard key =
|
final TermAndShard key = new TermAndShard(nodeID, term);
|
||||||
new TermAndShard(nodeID, term);
|
|
||||||
if (!termStatsCache.containsKey(key)) {
|
if (!termStatsCache.containsKey(key)) {
|
||||||
missing.add(term);
|
missing.add(term);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (missing.size() != 0) {
|
if (missing.size() != 0) {
|
||||||
for (Map.Entry<Term, TermStatistics> ent :
|
for (Map.Entry<Term, TermStatistics> ent : getNodeTermStats(missing, nodeID).entrySet()) {
|
||||||
getNodeTermStats(missing, nodeID).entrySet()) {
|
|
||||||
if (ent.getValue() != null) {
|
if (ent.getValue() != null) {
|
||||||
final TermAndShard key = new TermAndShard(nodeID, ent.getKey());
|
final TermAndShard key = new TermAndShard(nodeID, ent.getKey());
|
||||||
termStatsCache.put(key, ent.getValue());
|
termStatsCache.put(key, ent.getValue());
|
||||||
|
@ -115,8 +113,7 @@ public class ShardIndexSearcher extends IndexSearcher {
|
||||||
|
|
||||||
// Mock: in a real env, this would hit the wire and get
|
// Mock: in a real env, this would hit the wire and get
|
||||||
// term stats from remote node
|
// term stats from remote node
|
||||||
Map<Term, TermStatistics> getNodeTermStats(Set<Term> terms, int nodeID)
|
Map<Term, TermStatistics> getNodeTermStats(Set<Term> terms, int nodeID) throws IOException {
|
||||||
throws IOException {
|
|
||||||
var s = searchers[nodeID];
|
var s = searchers[nodeID];
|
||||||
final Map<Term, TermStatistics> stats = new HashMap<>();
|
final Map<Term, TermStatistics> stats = new HashMap<>();
|
||||||
if (s == null) {
|
if (s == null) {
|
||||||
|
@ -143,8 +140,7 @@ public class ShardIndexSearcher extends IndexSearcher {
|
||||||
if (nodeID == myNodeID) {
|
if (nodeID == myNodeID) {
|
||||||
subStats = super.termStatistics(term, docFreq, totalTermFreq);
|
subStats = super.termStatistics(term, docFreq, totalTermFreq);
|
||||||
} else {
|
} else {
|
||||||
final TermAndShard key =
|
final TermAndShard key = new TermAndShard(nodeID, term);
|
||||||
new TermAndShard(nodeID, term);
|
|
||||||
subStats = termStatsCache.get(key);
|
subStats = termStatsCache.get(key);
|
||||||
if (subStats == null) {
|
if (subStats == null) {
|
||||||
continue; // term not found
|
continue; // term not found
|
||||||
|
@ -176,8 +172,14 @@ public class ShardIndexSearcher extends IndexSearcher {
|
||||||
final CollectionStatistics nodeStats;
|
final CollectionStatistics nodeStats;
|
||||||
if (nodeID == myNodeID) {
|
if (nodeID == myNodeID) {
|
||||||
nodeStats = super.collectionStatistics(field);
|
nodeStats = super.collectionStatistics(field);
|
||||||
|
collectionStatsCache.put(key, new CachedCollectionStatistics(nodeStats));
|
||||||
} else {
|
} else {
|
||||||
nodeStats = collectionStatsCache.get(key);
|
var nodeStatsOptional = collectionStatsCache.get(key);
|
||||||
|
if (nodeStatsOptional == null) {
|
||||||
|
nodeStatsOptional = new CachedCollectionStatistics(computeNodeCollectionStatistics(key));
|
||||||
|
collectionStatsCache.put(key, nodeStatsOptional);
|
||||||
|
}
|
||||||
|
nodeStats = nodeStatsOptional.collectionStatistics();
|
||||||
}
|
}
|
||||||
if (nodeStats == null) {
|
if (nodeStats == null) {
|
||||||
continue; // field not in sub at all
|
continue; // field not in sub at all
|
||||||
|
@ -203,6 +205,13 @@ public class ShardIndexSearcher extends IndexSearcher {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private CollectionStatistics computeNodeCollectionStatistics(FieldAndShar fieldAndShard) throws IOException {
|
||||||
|
var searcher = searchers[fieldAndShard.nodeID];
|
||||||
|
return searcher.collectionStatistics(fieldAndShard.field);
|
||||||
|
}
|
||||||
|
|
||||||
|
public record CachedCollectionStatistics(@Nullable CollectionStatistics collectionStatistics) {}
|
||||||
|
|
||||||
public static class TermAndShard {
|
public static class TermAndShard {
|
||||||
private final int nodeID;
|
private final int nodeID;
|
||||||
private final Term term;
|
private final Term term;
|
||||||
|
|
|
@ -1,14 +1,16 @@
|
||||||
package it.cavallium.dbengine.lucene.searcher;
|
package it.cavallium.dbengine.lucene.searcher;
|
||||||
|
|
||||||
|
import it.cavallium.dbengine.lucene.searcher.ShardIndexSearcher.CachedCollectionStatistics;
|
||||||
import it.cavallium.dbengine.lucene.searcher.ShardIndexSearcher.FieldAndShar;
|
import it.cavallium.dbengine.lucene.searcher.ShardIndexSearcher.FieldAndShar;
|
||||||
import it.cavallium.dbengine.lucene.searcher.ShardIndexSearcher.TermAndShard;
|
import it.cavallium.dbengine.lucene.searcher.ShardIndexSearcher.TermAndShard;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.Optional;
|
||||||
import java.util.concurrent.ConcurrentHashMap;
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
import org.apache.lucene.search.CollectionStatistics;
|
import org.apache.lucene.search.CollectionStatistics;
|
||||||
import org.apache.lucene.search.TermStatistics;
|
import org.apache.lucene.search.TermStatistics;
|
||||||
|
|
||||||
public class SharedShardStatistics {
|
public class SharedShardStatistics {
|
||||||
|
|
||||||
public final Map<FieldAndShar, CollectionStatistics> collectionStatsCache = new ConcurrentHashMap<>();
|
public final Map<FieldAndShar, CachedCollectionStatistics> collectionStatsCache = new ConcurrentHashMap<>();
|
||||||
public final Map<TermAndShard, TermStatistics> termStatsCache = new ConcurrentHashMap<>();
|
public final Map<TermAndShard, TermStatistics> termStatsCache = new ConcurrentHashMap<>();
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,6 +4,7 @@ import it.cavallium.dbengine.lucene.PriorityQueue;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Objects;
|
||||||
import org.apache.lucene.search.HitQueue;
|
import org.apache.lucene.search.HitQueue;
|
||||||
import reactor.core.publisher.Flux;
|
import reactor.core.publisher.Flux;
|
||||||
|
|
||||||
|
@ -35,7 +36,8 @@ public class PriorityQueueAdaptor<T> implements PriorityQueue<T> {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void replaceTop(T newTop) {
|
public void replaceTop(T oldTop, T newTop) {
|
||||||
|
assert Objects.equals(oldTop, hitQueue.top());
|
||||||
hitQueue.updateTop(newTop);
|
hitQueue.updateTop(newTop);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -311,9 +311,9 @@ public class TestHugePqHitQueue {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void replaceTop(ScoreDoc newTop) {
|
public void replaceTop(ScoreDoc oldTop, ScoreDoc newTop) {
|
||||||
referenceQueue.replaceTop(newTop);
|
referenceQueue.replaceTop(oldTop, newTop);
|
||||||
myQueue.replaceTop(newTop);
|
myQueue.replaceTop(oldTop, newTop);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -0,0 +1,174 @@
|
||||||
|
package it.cavallium.dbengine.lucene.hugepq.search;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
|
|
||||||
|
import it.cavallium.dbengine.client.query.current.data.TotalHitsCount;
|
||||||
|
import it.cavallium.dbengine.database.disk.LLTempHugePqEnv;
|
||||||
|
import it.cavallium.dbengine.lucene.LLFieldDoc;
|
||||||
|
import it.cavallium.dbengine.lucene.LLScoreDoc;
|
||||||
|
import it.cavallium.dbengine.lucene.analyzer.WordAnalyzer;
|
||||||
|
import it.cavallium.dbengine.lucene.searcher.ShardIndexSearcher;
|
||||||
|
import it.cavallium.dbengine.lucene.searcher.SharedShardStatistics;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
import org.apache.lucene.document.Field.Store;
|
||||||
|
import org.apache.lucene.document.LongPoint;
|
||||||
|
import org.apache.lucene.document.SortedNumericDocValuesField;
|
||||||
|
import org.apache.lucene.document.SortedNumericDocValuesField;
|
||||||
|
import org.apache.lucene.document.SortedNumericDocValuesField;
|
||||||
|
import org.apache.lucene.document.SortedNumericDocValuesField;
|
||||||
|
import org.apache.lucene.document.TextField;
|
||||||
|
import org.apache.lucene.index.DirectoryReader;
|
||||||
|
import org.apache.lucene.index.IndexWriter;
|
||||||
|
import org.apache.lucene.index.IndexWriterConfig;
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.search.FieldDoc;
|
||||||
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
|
import org.apache.lucene.search.Query;
|
||||||
|
import org.apache.lucene.search.Sort;
|
||||||
|
import org.apache.lucene.search.SortField;
|
||||||
|
import org.apache.lucene.search.SortField.Type;
|
||||||
|
import org.apache.lucene.search.SortedNumericSortField;
|
||||||
|
import org.apache.lucene.search.TopFieldCollector;
|
||||||
|
import org.apache.lucene.search.TopScoreDocCollector;
|
||||||
|
import org.apache.lucene.search.TotalHits.Relation;
|
||||||
|
import org.apache.lucene.store.ByteBuffersDirectory;
|
||||||
|
import org.apache.lucene.util.QueryBuilder;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
public class HugePqFullFieldDocCollectorTest {
|
||||||
|
Sort sort = new Sort(new SortedNumericSortField("number_sort", Type.LONG));
|
||||||
|
Query luceneQuery = LongPoint.newRangeQuery("number", -100, 100);
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSingleShard() throws IOException {
|
||||||
|
try (var dir = new ByteBuffersDirectory(); var env = new LLTempHugePqEnv()) {
|
||||||
|
var analyzer = new WordAnalyzer(true, true);
|
||||||
|
var writer = new IndexWriter(dir, new IndexWriterConfig(analyzer));
|
||||||
|
writer.updateDocument(new Term("id", "00"), List.of(new SortedNumericDocValuesField("number_sort", 1), new LongPoint("number", 1)));
|
||||||
|
writer.updateDocument(new Term("id", "01"), List.of(new SortedNumericDocValuesField("number_sort", 44), new LongPoint("number", 44)));
|
||||||
|
writer.updateDocument(new Term("id", "02"), List.of(new SortedNumericDocValuesField("number_sort", 203), new LongPoint("number", 203)));
|
||||||
|
writer.updateDocument(new Term("id", "03"), List.of(new SortedNumericDocValuesField("number_sort", 209), new LongPoint("number", 209)));
|
||||||
|
writer.updateDocument(new Term("id", "04"), List.of(new SortedNumericDocValuesField("number_sort", -33), new LongPoint("number", -33)));
|
||||||
|
writer.updateDocument(new Term("id", "05"), List.of(new SortedNumericDocValuesField("number_sort", 0), new LongPoint("number", 0)));
|
||||||
|
writer.updateDocument(new Term("id", "06"), List.of(new SortedNumericDocValuesField("number_sort", 933), new LongPoint("number", 933)));
|
||||||
|
writer.updateDocument(new Term("id", "07"), List.of(new SortedNumericDocValuesField("number_sort", 6), new LongPoint("number", 6)));
|
||||||
|
writer.updateDocument(new Term("id", "08"), List.of(new SortedNumericDocValuesField("number_sort", -11), new LongPoint("number", -11)));
|
||||||
|
writer.updateDocument(new Term("id", "09"), List.of(new SortedNumericDocValuesField("number_sort", 9996), new LongPoint("number", 9996)));
|
||||||
|
writer.updateDocument(new Term("id", "10"), List.of(new SortedNumericDocValuesField("number_sort", 9), new LongPoint("number", 9)));
|
||||||
|
writer.updateDocument(new Term("id", "11"), List.of(new SortedNumericDocValuesField("number_sort", 66), new LongPoint("number", 66)));
|
||||||
|
writer.updateDocument(new Term("id", "12"), List.of(new SortedNumericDocValuesField("number_sort", 88), new LongPoint("number", 88)));
|
||||||
|
writer.updateDocument(new Term("id", "13"), List.of(new SortedNumericDocValuesField("number_sort", 222), new LongPoint("number", 222)));
|
||||||
|
writer.updateDocument(new Term("id", "14"), List.of(new SortedNumericDocValuesField("number_sort", -2), new LongPoint("number", -2)));
|
||||||
|
writer.updateDocument(new Term("id", "15"), List.of(new SortedNumericDocValuesField("number_sort", 7), new LongPoint("number", 7)));
|
||||||
|
writer.updateDocument(new Term("id", "16"), List.of(new SortedNumericDocValuesField("number_sort", 1010912093), new LongPoint("number", 1010912093)));
|
||||||
|
writer.updateDocument(new Term("id", "17"), List.of(new SortedNumericDocValuesField("number_sort", -3894789), new LongPoint("number", -3894789)));
|
||||||
|
writer.updateDocument(new Term("id", "18"), List.of(new SortedNumericDocValuesField("number_sort", 122), new LongPoint("number", 122)));
|
||||||
|
writer.updateDocument(new Term("id", "19"), List.of(new SortedNumericDocValuesField("number_sort", 2), new LongPoint("number", 2)));
|
||||||
|
writer.flush();
|
||||||
|
writer.commit();
|
||||||
|
try (var reader = DirectoryReader.open(writer, true, true)) {
|
||||||
|
var searcher = new IndexSearcher(reader);
|
||||||
|
var expectedResults = searcher.search(luceneQuery, 20, sort, false);
|
||||||
|
var expectedTotalHits = new TotalHitsCount(expectedResults.totalHits.value, expectedResults.totalHits.relation == Relation.EQUAL_TO);
|
||||||
|
var expectedDocs = Arrays
|
||||||
|
.stream(expectedResults.scoreDocs)
|
||||||
|
.map(sd -> (FieldDoc) sd)
|
||||||
|
.map(fieldDoc -> new LLFieldDoc(fieldDoc.doc, fieldDoc.score, fieldDoc.shardIndex, Arrays.asList(fieldDoc.fields)))
|
||||||
|
.toList();
|
||||||
|
try (var collector = HugePqFullFieldDocCollector.create(env, sort, 20, Integer.MAX_VALUE)) {
|
||||||
|
searcher.search(luceneQuery, collector);
|
||||||
|
var docs = collector.fullDocs().iterate().collectList().blockOptional().orElseThrow();
|
||||||
|
System.out.println("Expected docs:");
|
||||||
|
for (var expectedDoc : expectedDocs) {
|
||||||
|
System.out.println(expectedDoc);
|
||||||
|
}
|
||||||
|
System.out.println("");
|
||||||
|
System.out.println("Obtained docs:");
|
||||||
|
for (var doc : docs) {
|
||||||
|
System.out.println(doc);
|
||||||
|
}
|
||||||
|
assertEquals(expectedDocs,
|
||||||
|
docs.stream().map(elem -> new LLFieldDoc(elem.doc(), elem.score(), -1, elem.fields())).toList()
|
||||||
|
);
|
||||||
|
assertEquals(expectedTotalHits, new TotalHitsCount(collector.getTotalHits(), true));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMultiShard() throws IOException {
|
||||||
|
try (var dir1 = new ByteBuffersDirectory(); var dir2 = new ByteBuffersDirectory(); var env = new LLTempHugePqEnv()) {
|
||||||
|
var analyzer = new WordAnalyzer(true, true);
|
||||||
|
var writer1 = new IndexWriter(dir1, new IndexWriterConfig(analyzer));
|
||||||
|
var writer2 = new IndexWriter(dir2, new IndexWriterConfig(analyzer));
|
||||||
|
writer1.updateDocument(new Term("id", "00"), List.of(new SortedNumericDocValuesField("number_sort", 1), new LongPoint("number", 1)));
|
||||||
|
writer1.updateDocument(new Term("id", "01"), List.of(new SortedNumericDocValuesField("number_sort", 44), new LongPoint("number", 44)));
|
||||||
|
writer1.updateDocument(new Term("id", "02"), List.of(new SortedNumericDocValuesField("number_sort", 203), new LongPoint("number", 203)));
|
||||||
|
writer1.updateDocument(new Term("id", "03"), List.of(new SortedNumericDocValuesField("number_sort", 209), new LongPoint("number", 209)));
|
||||||
|
writer1.updateDocument(new Term("id", "04"), List.of(new SortedNumericDocValuesField("number_sort", -33), new LongPoint("number", -33)));
|
||||||
|
writer1.updateDocument(new Term("id", "05"), List.of(new SortedNumericDocValuesField("number_sort", 0), new LongPoint("number", 0)));
|
||||||
|
writer1.updateDocument(new Term("id", "06"), List.of(new SortedNumericDocValuesField("number_sort", 933), new LongPoint("number", 933)));
|
||||||
|
writer1.updateDocument(new Term("id", "07"), List.of(new SortedNumericDocValuesField("number_sort", 6), new LongPoint("number", 6)));
|
||||||
|
writer1.updateDocument(new Term("id", "08"), List.of(new SortedNumericDocValuesField("number_sort", -11), new LongPoint("number", -11)));
|
||||||
|
writer1.updateDocument(new Term("id", "09"), List.of(new SortedNumericDocValuesField("number_sort", 9996), new LongPoint("number", 9996)));
|
||||||
|
writer2.updateDocument(new Term("id", "10"), List.of(new SortedNumericDocValuesField("number_sort", 9), new LongPoint("number", 9)));
|
||||||
|
writer2.updateDocument(new Term("id", "11"), List.of(new SortedNumericDocValuesField("number_sort", 66), new LongPoint("number", 66)));
|
||||||
|
writer2.updateDocument(new Term("id", "12"), List.of(new SortedNumericDocValuesField("number_sort", 88), new LongPoint("number", 88)));
|
||||||
|
writer2.updateDocument(new Term("id", "13"), List.of(new SortedNumericDocValuesField("number_sort", 222), new LongPoint("number", 222)));
|
||||||
|
writer2.updateDocument(new Term("id", "14"), List.of(new SortedNumericDocValuesField("number_sort", -2), new LongPoint("number", -2)));
|
||||||
|
writer2.updateDocument(new Term("id", "15"), List.of(new SortedNumericDocValuesField("number_sort", 7), new LongPoint("number", 7)));
|
||||||
|
writer2.updateDocument(new Term("id", "16"), List.of(new SortedNumericDocValuesField("number_sort", 1010912093), new LongPoint("number", 1010912093)));
|
||||||
|
writer2.updateDocument(new Term("id", "17"), List.of(new SortedNumericDocValuesField("number_sort", -3894789), new LongPoint("number", -3894789)));
|
||||||
|
writer2.updateDocument(new Term("id", "18"), List.of(new SortedNumericDocValuesField("number_sort", 122), new LongPoint("number", 122)));
|
||||||
|
writer2.updateDocument(new Term("id", "19"), List.of(new SortedNumericDocValuesField("number_sort", 2), new LongPoint("number", 2)));
|
||||||
|
writer1.flush();
|
||||||
|
writer2.flush();
|
||||||
|
writer1.commit();
|
||||||
|
writer2.commit();
|
||||||
|
var sharedStats = new SharedShardStatistics();
|
||||||
|
try (var reader1 = DirectoryReader.open(writer1, true, true);
|
||||||
|
var reader2 = DirectoryReader.open(writer2, true, true)) {
|
||||||
|
var searcher1 = new IndexSearcher(reader1);
|
||||||
|
var searcher2 = new IndexSearcher(reader2);
|
||||||
|
var shardSearcher1 = new ShardIndexSearcher(sharedStats, List.of(searcher1, searcher2), 0);
|
||||||
|
var shardSearcher2 = new ShardIndexSearcher(sharedStats, List.of(searcher1, searcher2), 1);
|
||||||
|
var standardSharedManager = TopFieldCollector.createSharedManager(sort, 20, null, Integer.MAX_VALUE);
|
||||||
|
var standardCollector1 = standardSharedManager.newCollector();
|
||||||
|
var standardCollector2 = standardSharedManager.newCollector();
|
||||||
|
shardSearcher1.search(luceneQuery, standardCollector1);
|
||||||
|
shardSearcher2.search(luceneQuery, standardCollector2);
|
||||||
|
var expectedResults = standardSharedManager.reduce(List.of(standardCollector1, standardCollector2));
|
||||||
|
var expectedTotalHits = new TotalHitsCount(expectedResults.totalHits.value, expectedResults.totalHits.relation == Relation.EQUAL_TO);
|
||||||
|
var expectedDocs = Arrays
|
||||||
|
.stream(expectedResults.scoreDocs)
|
||||||
|
.map(sd -> (FieldDoc) sd)
|
||||||
|
.map(fieldDoc -> new LLFieldDoc(fieldDoc.doc, fieldDoc.score, fieldDoc.shardIndex, Arrays.asList(fieldDoc.fields)))
|
||||||
|
.toList();
|
||||||
|
var collectorManager = HugePqFullFieldDocCollector.createSharedManager(env, sort, 20, Integer.MAX_VALUE);
|
||||||
|
var collector1 = collectorManager.newCollector();
|
||||||
|
var collector2 = collectorManager.newCollector();
|
||||||
|
shardSearcher1.search(luceneQuery, collector1);
|
||||||
|
shardSearcher2.search(luceneQuery, collector2);
|
||||||
|
try (var results = collectorManager.reduce(List.of(collector1, collector2))) {
|
||||||
|
var docs = results.iterate().collectList().blockOptional().orElseThrow();
|
||||||
|
System.out.println("Expected docs:");
|
||||||
|
for (var expectedDoc : expectedDocs) {
|
||||||
|
System.out.println(expectedDoc);
|
||||||
|
}
|
||||||
|
System.out.println("");
|
||||||
|
System.out.println("Obtained docs:");
|
||||||
|
for (var doc : docs) {
|
||||||
|
System.out.println(doc);
|
||||||
|
}
|
||||||
|
assertEquals(expectedDocs,
|
||||||
|
docs.stream().map(elem -> new LLFieldDoc(elem.doc(), elem.score(), -1, elem.fields())).toList()
|
||||||
|
);
|
||||||
|
assertEquals(expectedTotalHits, new TotalHitsCount(results.totalHits().value, results.totalHits().relation == Relation.EQUAL_TO));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,170 @@
|
||||||
|
package it.cavallium.dbengine.lucene.hugepq.search;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.*;
|
||||||
|
|
||||||
|
import it.cavallium.dbengine.client.query.QueryUtils;
|
||||||
|
import it.cavallium.dbengine.client.query.current.data.TotalHitsCount;
|
||||||
|
import it.cavallium.dbengine.database.disk.IndexSearcherManager;
|
||||||
|
import it.cavallium.dbengine.database.disk.LLTempHugePqEnv;
|
||||||
|
import it.cavallium.dbengine.lucene.LLScoreDoc;
|
||||||
|
import it.cavallium.dbengine.lucene.LuceneUtils;
|
||||||
|
import it.cavallium.dbengine.lucene.analyzer.LegacyWordAnalyzer;
|
||||||
|
import it.cavallium.dbengine.lucene.analyzer.TextFieldsAnalyzer;
|
||||||
|
import it.cavallium.dbengine.lucene.analyzer.WordAnalyzer;
|
||||||
|
import it.cavallium.dbengine.lucene.searcher.ShardIndexSearcher;
|
||||||
|
import it.cavallium.dbengine.lucene.searcher.SharedShardStatistics;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
import org.apache.lucene.document.Field.Store;
|
||||||
|
import org.apache.lucene.document.TextField;
|
||||||
|
import org.apache.lucene.index.DirectoryReader;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.IndexWriter;
|
||||||
|
import org.apache.lucene.index.IndexWriterConfig;
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.search.CollectorManager;
|
||||||
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
|
import org.apache.lucene.search.PhraseQuery;
|
||||||
|
import org.apache.lucene.search.ScoreDoc;
|
||||||
|
import org.apache.lucene.search.SearcherManager;
|
||||||
|
import org.apache.lucene.search.SimpleCollector;
|
||||||
|
import org.apache.lucene.search.TopDocs;
|
||||||
|
import org.apache.lucene.search.TopDocsCollector;
|
||||||
|
import org.apache.lucene.search.TopScoreDocCollector;
|
||||||
|
import org.apache.lucene.search.TotalHits.Relation;
|
||||||
|
import org.apache.lucene.store.ByteBuffersDirectory;
|
||||||
|
import org.apache.lucene.util.QueryBuilder;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
public class HugePqFullScoreDocCollectorTest {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSingleShard() throws IOException {
|
||||||
|
try (var dir = new ByteBuffersDirectory(); var env = new LLTempHugePqEnv()) {
|
||||||
|
var analyzer = new WordAnalyzer(true, true);
|
||||||
|
var writer = new IndexWriter(dir, new IndexWriterConfig(analyzer));
|
||||||
|
writer.updateDocument(new Term("id", "00"), List.of(new TextField("text", "Mario Rossi", Store.YES)));
|
||||||
|
writer.updateDocument(new Term("id", "01"), List.of(new TextField("text", "Mario Rossi", Store.YES)));
|
||||||
|
writer.updateDocument(new Term("id", "02"), List.of(new TextField("text", "Mario Rossi", Store.YES)));
|
||||||
|
writer.updateDocument(new Term("id", "03"), List.of(new TextField("text", "Marios Rossi", Store.YES)));
|
||||||
|
writer.updateDocument(new Term("id", "04"), List.of(new TextField("text", "Rossi", Store.YES)));
|
||||||
|
writer.updateDocument(new Term("id", "05"), List.of(new TextField("text", "Rossi", Store.YES)));
|
||||||
|
writer.updateDocument(new Term("id", "06"), List.of(new TextField("text", "Rossi", Store.YES)));
|
||||||
|
writer.updateDocument(new Term("id", "07"), List.of(new TextField("text", "Rossi", Store.YES)));
|
||||||
|
writer.updateDocument(new Term("id", "08"), List.of(new TextField("text", "Rossi", Store.YES)));
|
||||||
|
writer.updateDocument(new Term("id", "09"), List.of(new TextField("text", "Rossi", Store.YES)));
|
||||||
|
writer.updateDocument(new Term("id", "10"), List.of(new TextField("text", "ROSSI UA", Store.YES)));
|
||||||
|
writer.updateDocument(new Term("id", "11"), List.of(new TextField("text", "Mario Barman", Store.YES)));
|
||||||
|
writer.updateDocument(new Term("id", "12"), List.of(new TextField("text", "Mario batman", Store.YES)));
|
||||||
|
writer.updateDocument(new Term("id", "13"), List.of(new TextField("text", "Admin Rossi desk", Store.YES)));
|
||||||
|
writer.updateDocument(new Term("id", "14"), List.of(new TextField("text", "MRI Marios bot", Store.YES)));
|
||||||
|
writer.updateDocument(new Term("id", "15"), List.of(new TextField("text", "Mario Rossi [beta]", Store.YES)));
|
||||||
|
writer.updateDocument(new Term("id", "16"), List.of(new TextField("text", "Mario Music Bot", Store.YES)));
|
||||||
|
writer.updateDocument(new Term("id", "17"), List.of(new TextField("text", "Mario night mode", Store.YES)));
|
||||||
|
writer.updateDocument(new Term("id", "18"), List.of(new TextField("text", "Mario stats bot", Store.YES)));
|
||||||
|
writer.updateDocument(new Term("id", "19"), List.of(new TextField("text", "Very very long text with Mario Giovanni and Rossi inside", Store.YES)));
|
||||||
|
writer.flush();
|
||||||
|
writer.commit();
|
||||||
|
try (var reader = DirectoryReader.open(writer, true, true)) {
|
||||||
|
var searcher = new IndexSearcher(reader);
|
||||||
|
var qb = new QueryBuilder(analyzer);
|
||||||
|
var luceneQuery = qb.createMinShouldMatchQuery("text", "Mario rossi", 0.3f);
|
||||||
|
var expectedResults = searcher.search(luceneQuery, 20);
|
||||||
|
var expectedTotalHits = new TotalHitsCount(expectedResults.totalHits.value, expectedResults.totalHits.relation == Relation.EQUAL_TO);
|
||||||
|
var expectedDocs = Arrays
|
||||||
|
.stream(expectedResults.scoreDocs)
|
||||||
|
.map(scoreDoc -> new LLScoreDoc(scoreDoc.doc, scoreDoc.score, scoreDoc.shardIndex))
|
||||||
|
.toList();
|
||||||
|
try (var collector = HugePqFullScoreDocCollector.create(env, 20)) {
|
||||||
|
searcher.search(luceneQuery, collector);
|
||||||
|
var docs = collector.fullDocs().iterate().collectList().blockOptional().orElseThrow();
|
||||||
|
System.out.println("Expected docs:");
|
||||||
|
for (LLScoreDoc expectedDoc : expectedDocs) {
|
||||||
|
System.out.println(expectedDoc);
|
||||||
|
}
|
||||||
|
System.out.println("");
|
||||||
|
System.out.println("Obtained docs:");
|
||||||
|
for (LLScoreDoc doc : docs) {
|
||||||
|
System.out.println(doc);
|
||||||
|
}
|
||||||
|
assertEquals(expectedDocs, docs.stream().map(elem -> new LLScoreDoc(elem.doc(), elem.score(), -1)).toList());
|
||||||
|
assertEquals(expectedTotalHits, new TotalHitsCount(collector.getTotalHits(), true));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMultiShard() throws IOException {
|
||||||
|
try (var dir1 = new ByteBuffersDirectory(); var dir2 = new ByteBuffersDirectory(); var env = new LLTempHugePqEnv()) {
|
||||||
|
var analyzer = new WordAnalyzer(true, true);
|
||||||
|
var writer1 = new IndexWriter(dir1, new IndexWriterConfig(analyzer));
|
||||||
|
var writer2 = new IndexWriter(dir2, new IndexWriterConfig(analyzer));
|
||||||
|
writer1.updateDocument(new Term("id", "00"), List.of(new TextField("text", "Mario Rossi", Store.YES)));
|
||||||
|
writer1.updateDocument(new Term("id", "01"), List.of(new TextField("text", "Mario Rossi", Store.YES)));
|
||||||
|
writer1.updateDocument(new Term("id", "02"), List.of(new TextField("text", "Mario Rossi", Store.YES)));
|
||||||
|
writer1.updateDocument(new Term("id", "03"), List.of(new TextField("text", "Marios Rossi", Store.YES)));
|
||||||
|
writer1.updateDocument(new Term("id", "04"), List.of(new TextField("text", "Rossi", Store.YES)));
|
||||||
|
writer1.updateDocument(new Term("id", "05"), List.of(new TextField("text", "Rossi", Store.YES)));
|
||||||
|
writer1.updateDocument(new Term("id", "06"), List.of(new TextField("text", "Rossi", Store.YES)));
|
||||||
|
writer1.updateDocument(new Term("id", "07"), List.of(new TextField("text", "Rossi", Store.YES)));
|
||||||
|
writer1.updateDocument(new Term("id", "08"), List.of(new TextField("text", "Rossi", Store.YES)));
|
||||||
|
writer1.updateDocument(new Term("id", "09"), List.of(new TextField("text", "Rossi", Store.YES)));
|
||||||
|
writer2.updateDocument(new Term("id", "10"), List.of(new TextField("text", "ROSSI UA", Store.YES)));
|
||||||
|
writer2.updateDocument(new Term("id", "11"), List.of(new TextField("text", "Mario Barman", Store.YES)));
|
||||||
|
writer2.updateDocument(new Term("id", "12"), List.of(new TextField("text", "Mario batman", Store.YES)));
|
||||||
|
writer2.updateDocument(new Term("id", "13"), List.of(new TextField("text", "Admin Rossi desk", Store.YES)));
|
||||||
|
writer2.updateDocument(new Term("id", "14"), List.of(new TextField("text", "MRI Marios bot", Store.YES)));
|
||||||
|
writer2.updateDocument(new Term("id", "15"), List.of(new TextField("text", "Mario Rossi [beta]", Store.YES)));
|
||||||
|
writer2.updateDocument(new Term("id", "16"), List.of(new TextField("text", "Mario Music Bot", Store.YES)));
|
||||||
|
writer2.updateDocument(new Term("id", "17"), List.of(new TextField("text", "Mario night mode", Store.YES)));
|
||||||
|
writer2.updateDocument(new Term("id", "18"), List.of(new TextField("text", "Mario stats bot", Store.YES)));
|
||||||
|
writer2.updateDocument(new Term("id", "19"), List.of(new TextField("text", "Very very long text with Mario Giovanni and Rossi inside", Store.YES)));
|
||||||
|
writer1.flush();
|
||||||
|
writer2.flush();
|
||||||
|
writer1.commit();
|
||||||
|
writer2.commit();
|
||||||
|
var sharedStats = new SharedShardStatistics();
|
||||||
|
try (var reader1 = DirectoryReader.open(writer1, true, true);
|
||||||
|
var reader2 = DirectoryReader.open(writer2, true, true)) {
|
||||||
|
var searcher1 = new IndexSearcher(reader1);
|
||||||
|
var searcher2 = new IndexSearcher(reader2);
|
||||||
|
var shardSearcher1 = new ShardIndexSearcher(sharedStats, List.of(searcher1, searcher2), 0);
|
||||||
|
var shardSearcher2 = new ShardIndexSearcher(sharedStats, List.of(searcher1, searcher2), 1);
|
||||||
|
var qb = new QueryBuilder(analyzer);
|
||||||
|
var luceneQuery = qb.createMinShouldMatchQuery("text", "Mario rossi", 0.3f);
|
||||||
|
var standardSharedManager = TopScoreDocCollector.createSharedManager(20, null, Integer.MAX_VALUE);
|
||||||
|
var standardCollector1 = standardSharedManager.newCollector();
|
||||||
|
var standardCollector2 = standardSharedManager.newCollector();
|
||||||
|
shardSearcher1.search(luceneQuery, standardCollector1);
|
||||||
|
shardSearcher2.search(luceneQuery, standardCollector2);
|
||||||
|
var expectedResults = standardSharedManager.reduce(List.of(standardCollector1, standardCollector2));
|
||||||
|
var expectedTotalHits = new TotalHitsCount(expectedResults.totalHits.value, expectedResults.totalHits.relation == Relation.EQUAL_TO);
|
||||||
|
var expectedDocs = Arrays
|
||||||
|
.stream(expectedResults.scoreDocs)
|
||||||
|
.map(scoreDoc -> new LLScoreDoc(scoreDoc.doc, scoreDoc.score, scoreDoc.shardIndex))
|
||||||
|
.toList();
|
||||||
|
var collectorManager = HugePqFullScoreDocCollector.createSharedManager(env, 20, Integer.MAX_VALUE);
|
||||||
|
var collector1 = collectorManager.newCollector();
|
||||||
|
var collector2 = collectorManager.newCollector();
|
||||||
|
shardSearcher1.search(luceneQuery, collector1);
|
||||||
|
shardSearcher2.search(luceneQuery, collector2);
|
||||||
|
try (var results = collectorManager.reduce(List.of(collector1, collector2))) {
|
||||||
|
var docs = results.iterate().collectList().blockOptional().orElseThrow();
|
||||||
|
System.out.println("Expected docs:");
|
||||||
|
for (LLScoreDoc expectedDoc : expectedDocs) {
|
||||||
|
System.out.println(expectedDoc);
|
||||||
|
}
|
||||||
|
System.out.println("");
|
||||||
|
System.out.println("Obtained docs:");
|
||||||
|
for (LLScoreDoc doc : docs) {
|
||||||
|
System.out.println(doc);
|
||||||
|
}
|
||||||
|
assertEquals(expectedDocs, docs.stream().map(elem -> new LLScoreDoc(elem.doc(), elem.score(), -1)).toList());
|
||||||
|
assertEquals(expectedTotalHits, new TotalHitsCount(results.totalHits().value, results.totalHits().relation == Relation.EQUAL_TO));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user