Code cleanup
This commit is contained in:
parent
cf61636141
commit
8e7f7eaf18
@ -220,7 +220,6 @@ versions:
|
||||
query: Query
|
||||
offset: long
|
||||
limit: long
|
||||
minCompetitiveScore: -float
|
||||
sort: Sort
|
||||
computePreciseHitsCount: boolean
|
||||
timeoutMilliseconds: long
|
||||
|
@ -13,11 +13,10 @@ import org.jetbrains.annotations.NotNull;
|
||||
import org.jetbrains.annotations.Nullable;
|
||||
|
||||
@RecordBuilder
|
||||
public final record ClientQueryParams(@Nullable CompositeSnapshot snapshot,
|
||||
public record ClientQueryParams(@Nullable CompositeSnapshot snapshot,
|
||||
@NotNull Query query,
|
||||
long offset,
|
||||
long limit,
|
||||
@Nullable Float minCompetitiveScore,
|
||||
@Nullable Sort sort,
|
||||
boolean computePreciseHitsCount,
|
||||
@NotNull Duration timeout) {
|
||||
@ -28,7 +27,6 @@ public final record ClientQueryParams(@Nullable CompositeSnapshot snapshot,
|
||||
.snapshot(null)
|
||||
.offset(0)
|
||||
.limit(Long.MAX_VALUE)
|
||||
.minCompetitiveScore(null)
|
||||
.sort(null)
|
||||
// Default timeout: 4 minutes
|
||||
.timeout(Duration.ofMinutes(4))
|
||||
@ -44,7 +42,6 @@ public final record ClientQueryParams(@Nullable CompositeSnapshot snapshot,
|
||||
.builder()
|
||||
.query(query())
|
||||
.sort(sort != null ? sort.querySort() : new NoSort())
|
||||
.minCompetitiveScore(Nullablefloat.ofNullable(minCompetitiveScore()))
|
||||
.offset(offset())
|
||||
.limit(limit())
|
||||
.computePreciseHitsCount(computePreciseHitsCount())
|
||||
|
@ -70,7 +70,7 @@ public interface LLLuceneIndex extends LLSnapshottable {
|
||||
BucketParams bucketParams);
|
||||
|
||||
default Mono<TotalHitsCount> count(@Nullable LLSnapshot snapshot, Query query) {
|
||||
QueryParams params = QueryParams.of(query, 0, 0, Nullablefloat.empty(), NoSort.of(), false, Long.MAX_VALUE);
|
||||
QueryParams params = QueryParams.of(query, 0, 0, NoSort.of(), false, Long.MAX_VALUE);
|
||||
return Mono.from(this.search(snapshot, params, null)
|
||||
.map(llSearchResultShard -> {
|
||||
try (llSearchResultShard) {
|
||||
|
@ -3,7 +3,6 @@ package it.cavallium.dbengine.lucene;
|
||||
import static org.apache.lucene.search.SortField.STRING_LAST;
|
||||
|
||||
import it.cavallium.dbengine.database.disk.LLTempLMDBEnv;
|
||||
import it.cavallium.dbengine.lucene.comparators.DocComparator;
|
||||
import it.cavallium.dbengine.lucene.comparators.DoubleComparator;
|
||||
import it.cavallium.dbengine.lucene.comparators.FloatComparator;
|
||||
import it.cavallium.dbengine.lucene.comparators.IntComparator;
|
||||
@ -19,7 +18,7 @@ import org.apache.lucene.search.FieldComparator;
|
||||
import org.apache.lucene.search.LeafFieldComparator;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.search.SortedNumericSelector;
|
||||
import org.apache.lucene.search.SortedNumericSortField;
|
||||
import org.apache.lucene.search.comparators.LMDBDocComparator;
|
||||
|
||||
public class LMDBComparator {
|
||||
|
||||
@ -94,7 +93,7 @@ public class LMDBComparator {
|
||||
var comparatorSource = sortField.getComparatorSource();
|
||||
return switch (sortField.getType()) {
|
||||
case SCORE -> new RelevanceComparator(env, numHits);
|
||||
case DOC -> new DocComparator(env, numHits, reverse, sortPos);
|
||||
case DOC -> new LMDBDocComparator(env, numHits, reverse, sortPos);
|
||||
case INT -> new IntComparator(env, numHits, field, (Integer) missingValue,
|
||||
reverse, sortPos);
|
||||
case FLOAT -> new FloatComparator(env, numHits, field, (Float) missingValue,
|
||||
|
@ -336,7 +336,6 @@ public class LuceneUtils {
|
||||
queryParams.offset(),
|
||||
queryParams.limit(),
|
||||
DEFAULT_PAGE_LIMITS,
|
||||
queryParams.minCompetitiveScore().getNullable(),
|
||||
QueryParser.toSort(queryParams.sort()),
|
||||
queryParams.computePreciseHitsCount(),
|
||||
Duration.ofMillis(queryParams.timeoutMilliseconds())
|
||||
|
@ -1,4 +1,17 @@
|
||||
package it.cavallium.dbengine.lucene.collector;
|
||||
|
||||
public sealed interface BucketValueSource permits DoubleBucketValueSource, LongBucketValueSource, ConstantValueSource,
|
||||
NullValueSource {}
|
||||
import org.apache.lucene.search.DoubleValuesSource;
|
||||
import org.apache.lucene.search.LongValuesSource;
|
||||
|
||||
public sealed interface BucketValueSource permits BucketValueSource.DoubleBucketValueSource,
|
||||
BucketValueSource.LongBucketValueSource,
|
||||
BucketValueSource.ConstantValueSource, BucketValueSource.NullValueSource {
|
||||
|
||||
record ConstantValueSource(Number constant) implements BucketValueSource {}
|
||||
|
||||
record DoubleBucketValueSource(DoubleValuesSource source) implements BucketValueSource {}
|
||||
|
||||
record LongBucketValueSource(LongValuesSource source) implements BucketValueSource {}
|
||||
|
||||
record NullValueSource() implements BucketValueSource {}
|
||||
}
|
||||
|
@ -1,3 +0,0 @@
|
||||
package it.cavallium.dbengine.lucene.collector;
|
||||
|
||||
public record ConstantValueSource(Number constant) implements BucketValueSource {}
|
@ -1,9 +1,5 @@
|
||||
package it.cavallium.dbengine.lucene.collector;
|
||||
|
||||
import com.google.common.cache.Cache;
|
||||
import com.google.common.cache.CacheBuilder;
|
||||
import com.google.common.cache.CacheLoader;
|
||||
import com.google.common.cache.LoadingCache;
|
||||
import it.unimi.dsi.fastutil.doubles.DoubleArrayList;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
@ -12,8 +8,6 @@ import java.util.List;
|
||||
import org.apache.commons.lang3.NotImplementedException;
|
||||
import org.apache.lucene.facet.FacetResult;
|
||||
import org.apache.lucene.facet.Facets;
|
||||
import org.apache.lucene.facet.FacetsCollector;
|
||||
import org.apache.lucene.facet.FacetsCollectorManager;
|
||||
import org.apache.lucene.facet.FacetsConfig;
|
||||
import org.apache.lucene.facet.LabelAndValue;
|
||||
import org.apache.lucene.facet.RandomSamplingFacetsCollector;
|
||||
@ -24,8 +18,6 @@ import org.apache.lucene.facet.range.LongRangeFacetCounts;
|
||||
import org.apache.lucene.facet.range.Range;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.CachingCollector;
|
||||
import org.apache.lucene.search.Collector;
|
||||
import org.apache.lucene.search.DoubleValuesSource;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.LongValuesSource;
|
||||
@ -157,12 +149,12 @@ public class DecimalBucketMultiCollectorManager implements CollectorMultiManager
|
||||
Facets facets;
|
||||
if (USE_LONGS) {
|
||||
LongValuesSource valuesSource;
|
||||
if (bucketValueSource instanceof NullValueSource) {
|
||||
if (bucketValueSource instanceof BucketValueSource.NullValueSource) {
|
||||
|
||||
valuesSource = null;
|
||||
} else if (bucketValueSource instanceof ConstantValueSource constantValueSource) {
|
||||
} else if (bucketValueSource instanceof BucketValueSource.ConstantValueSource constantValueSource) {
|
||||
valuesSource = LongValuesSource.constant(constantValueSource.constant().longValue());
|
||||
} else if (bucketValueSource instanceof LongBucketValueSource longBucketValueSource) {
|
||||
} else if (bucketValueSource instanceof BucketValueSource.LongBucketValueSource longBucketValueSource) {
|
||||
valuesSource = longBucketValueSource.source();
|
||||
} else {
|
||||
throw new IllegalArgumentException("Wrong value source type: " + bucketValueSource);
|
||||
@ -175,11 +167,11 @@ public class DecimalBucketMultiCollectorManager implements CollectorMultiManager
|
||||
);
|
||||
} else {
|
||||
DoubleValuesSource valuesSource;
|
||||
if (bucketValueSource instanceof NullValueSource) {
|
||||
if (bucketValueSource instanceof BucketValueSource.NullValueSource) {
|
||||
valuesSource = null;
|
||||
} else if (bucketValueSource instanceof ConstantValueSource constantValueSource) {
|
||||
} else if (bucketValueSource instanceof BucketValueSource.ConstantValueSource constantValueSource) {
|
||||
valuesSource = DoubleValuesSource.constant(constantValueSource.constant().longValue());
|
||||
} else if (bucketValueSource instanceof DoubleBucketValueSource doubleBucketValueSource) {
|
||||
} else if (bucketValueSource instanceof BucketValueSource.DoubleBucketValueSource doubleBucketValueSource) {
|
||||
valuesSource = doubleBucketValueSource.source();
|
||||
} else {
|
||||
throw new IllegalArgumentException("Wrong value source type: " + bucketValueSource);
|
||||
@ -219,11 +211,11 @@ public class DecimalBucketMultiCollectorManager implements CollectorMultiManager
|
||||
}
|
||||
if (USE_LONGS) {
|
||||
LongValuesSource valuesSource;
|
||||
if (bucketValueSource instanceof NullValueSource) {
|
||||
if (bucketValueSource instanceof BucketValueSource.NullValueSource) {
|
||||
valuesSource = null;
|
||||
} else if (bucketValueSource instanceof ConstantValueSource constantValueSource) {
|
||||
} else if (bucketValueSource instanceof BucketValueSource.ConstantValueSource constantValueSource) {
|
||||
valuesSource = LongValuesSource.constant(constantValueSource.constant().longValue());
|
||||
} else if (bucketValueSource instanceof LongBucketValueSource longBucketValueSource) {
|
||||
} else if (bucketValueSource instanceof BucketValueSource.LongBucketValueSource longBucketValueSource) {
|
||||
valuesSource = longBucketValueSource.source();
|
||||
} else {
|
||||
throw new IllegalArgumentException("Wrong value source type: " + bucketValueSource);
|
||||
@ -236,11 +228,11 @@ public class DecimalBucketMultiCollectorManager implements CollectorMultiManager
|
||||
);
|
||||
} else {
|
||||
DoubleValuesSource valuesSource;
|
||||
if (bucketValueSource instanceof NullValueSource) {
|
||||
if (bucketValueSource instanceof BucketValueSource.NullValueSource) {
|
||||
valuesSource = null;
|
||||
} else if (bucketValueSource instanceof ConstantValueSource constantValueSource) {
|
||||
} else if (bucketValueSource instanceof BucketValueSource.ConstantValueSource constantValueSource) {
|
||||
valuesSource = DoubleValuesSource.constant(constantValueSource.constant().longValue());
|
||||
} else if (bucketValueSource instanceof DoubleBucketValueSource doubleBucketValueSource) {
|
||||
} else if (bucketValueSource instanceof BucketValueSource.DoubleBucketValueSource doubleBucketValueSource) {
|
||||
valuesSource = doubleBucketValueSource.source();
|
||||
} else {
|
||||
throw new IllegalArgumentException("Wrong value source type: " + bucketValueSource);
|
||||
|
@ -1,5 +0,0 @@
|
||||
package it.cavallium.dbengine.lucene.collector;
|
||||
|
||||
import org.apache.lucene.search.DoubleValuesSource;
|
||||
|
||||
public record DoubleBucketValueSource(DoubleValuesSource source) implements BucketValueSource {}
|
@ -1,5 +0,0 @@
|
||||
package it.cavallium.dbengine.lucene.collector;
|
||||
|
||||
import org.apache.lucene.search.LongValuesSource;
|
||||
|
||||
public record LongBucketValueSource(LongValuesSource source) implements BucketValueSource {}
|
@ -1,3 +0,0 @@
|
||||
package it.cavallium.dbengine.lucene.collector;
|
||||
|
||||
public record NullValueSource() implements BucketValueSource {}
|
@ -28,6 +28,7 @@ import java.io.IOException;
|
||||
import org.apache.lucene.document.DoublePoint;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.LeafFieldComparator;
|
||||
import org.apache.lucene.search.comparators.NumericComparator;
|
||||
|
||||
/**
|
||||
* Comparator based on {@link Double#compare} for {@code numHits}. This comparator provides a
|
||||
|
@ -28,6 +28,7 @@ import java.io.IOException;
|
||||
import org.apache.lucene.document.FloatPoint;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.LeafFieldComparator;
|
||||
import org.apache.lucene.search.comparators.NumericComparator;
|
||||
|
||||
/**
|
||||
* Comparator based on {@link Float#compare} for {@code numHits}. This comparator provides a
|
||||
|
@ -28,6 +28,7 @@ import java.io.IOException;
|
||||
import org.apache.lucene.document.IntPoint;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.LeafFieldComparator;
|
||||
import org.apache.lucene.search.comparators.NumericComparator;
|
||||
|
||||
/**
|
||||
* Comparator based on {@link Integer#compare} for {@code numHits}. This comparator provides a
|
||||
|
@ -28,6 +28,7 @@ import java.io.IOException;
|
||||
import org.apache.lucene.document.LongPoint;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.LeafFieldComparator;
|
||||
import org.apache.lucene.search.comparators.NumericComparator;
|
||||
|
||||
/**
|
||||
* Comparator based on {@link Long#compare} for {@code numHits}. This comparator provides a skipping
|
||||
|
@ -1,66 +0,0 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package it.cavallium.dbengine.lucene.comparators;
|
||||
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
|
||||
/**
|
||||
* Docs iterator that starts iterating from a configurable minimum document
|
||||
* Based on {@link org.apache.lucene.search.comparators.MinDocIterator}
|
||||
* */
|
||||
public class MinDocIterator extends DocIdSetIterator {
|
||||
final int segmentMinDoc;
|
||||
final int maxDoc;
|
||||
int doc = -1;
|
||||
|
||||
MinDocIterator(int segmentMinDoc, int maxDoc) {
|
||||
this.segmentMinDoc = segmentMinDoc;
|
||||
this.maxDoc = maxDoc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
return advance(doc + 1);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
assert target > doc;
|
||||
if (doc == -1) {
|
||||
// skip directly to minDoc
|
||||
doc = Math.max(target, segmentMinDoc);
|
||||
} else {
|
||||
doc = target;
|
||||
}
|
||||
if (doc >= maxDoc) {
|
||||
doc = NO_MORE_DOCS;
|
||||
}
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return maxDoc - segmentMinDoc;
|
||||
}
|
||||
}
|
@ -1,112 +0,0 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package it.cavallium.dbengine.lucene.comparators;
|
||||
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.LeafFieldComparator;
|
||||
import org.apache.lucene.search.Scorable;
|
||||
|
||||
/**
|
||||
* Based on {@link org.apache.lucene.search.comparators.MinDocIterator}
|
||||
*/
|
||||
public final class MultiLeafFieldComparator implements LeafFieldComparator {
|
||||
|
||||
private final LeafFieldComparator[] comparators;
|
||||
private final int[] reverseMul;
|
||||
// we extract the first comparator to avoid array access in the common case
|
||||
// that the first comparator compares worse than the bottom entry in the queue
|
||||
private final LeafFieldComparator firstComparator;
|
||||
private final int firstReverseMul;
|
||||
|
||||
public MultiLeafFieldComparator(LeafFieldComparator[] comparators, int[] reverseMul) {
|
||||
if (comparators.length != reverseMul.length) {
|
||||
throw new IllegalArgumentException(
|
||||
"Must have the same number of comparators and reverseMul, got "
|
||||
+ comparators.length
|
||||
+ " and "
|
||||
+ reverseMul.length);
|
||||
}
|
||||
this.comparators = comparators;
|
||||
this.reverseMul = reverseMul;
|
||||
this.firstComparator = comparators[0];
|
||||
this.firstReverseMul = reverseMul[0];
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setBottom(int slot) throws IOException {
|
||||
for (LeafFieldComparator comparator : comparators) {
|
||||
comparator.setBottom(slot);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareBottom(int doc) throws IOException {
|
||||
int cmp = firstReverseMul * firstComparator.compareBottom(doc);
|
||||
if (cmp != 0) {
|
||||
return cmp;
|
||||
}
|
||||
for (int i = 1; i < comparators.length; ++i) {
|
||||
cmp = reverseMul[i] * comparators[i].compareBottom(doc);
|
||||
if (cmp != 0) {
|
||||
return cmp;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTop(int doc) throws IOException {
|
||||
int cmp = firstReverseMul * firstComparator.compareTop(doc);
|
||||
if (cmp != 0) {
|
||||
return cmp;
|
||||
}
|
||||
for (int i = 1; i < comparators.length; ++i) {
|
||||
cmp = reverseMul[i] * comparators[i].compareTop(doc);
|
||||
if (cmp != 0) {
|
||||
return cmp;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void copy(int slot, int doc) throws IOException {
|
||||
for (LeafFieldComparator comparator : comparators) {
|
||||
comparator.copy(slot, doc);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setScorer(Scorable scorer) throws IOException {
|
||||
for (LeafFieldComparator comparator : comparators) {
|
||||
comparator.setScorer(scorer);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setHitsThresholdReached() throws IOException {
|
||||
// this is needed for skipping functionality that is only relevant for the 1st comparator
|
||||
firstComparator.setHitsThresholdReached();
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocIdSetIterator competitiveIterator() throws IOException {
|
||||
// this is needed for skipping functionality that is only relevant for the 1st comparator
|
||||
return firstComparator.competitiveIterator();
|
||||
}
|
||||
}
|
@ -1,336 +0,0 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package it.cavallium.dbengine.lucene.comparators;
|
||||
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.PointValues;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.FieldComparator;
|
||||
import org.apache.lucene.search.LeafFieldComparator;
|
||||
import org.apache.lucene.search.Scorable;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.ArrayUtil.ByteArrayComparator;
|
||||
import org.apache.lucene.util.DocIdSetBuilder;
|
||||
|
||||
/**
|
||||
* Abstract numeric comparator for comparing numeric values. This comparator provides a skipping
|
||||
* functionality – an iterator that can skip over non-competitive documents.
|
||||
*
|
||||
* <p>Parameter {@code field} provided in the constructor is used as a field name in the default
|
||||
* implementations of the methods {@code getNumericDocValues} and {@code getPointValues} to retrieve
|
||||
* doc values and points. You can pass a dummy value for a field name (e.g. when sorting by script),
|
||||
* but in this case you must override both of these methods.
|
||||
*
|
||||
* Based on {@link org.apache.lucene.search.comparators.NumericComparator}
|
||||
*/
|
||||
public abstract class NumericComparator<T extends Number> extends FieldComparator<T> {
|
||||
protected final T missingValue;
|
||||
protected final String field;
|
||||
protected final boolean reverse;
|
||||
private final int bytesCount; // how many bytes are used to encode this number
|
||||
private final ByteArrayComparator bytesComparator;
|
||||
|
||||
protected boolean topValueSet;
|
||||
protected boolean singleSort; // singleSort is true, if sort is based on a single sort field.
|
||||
protected boolean hitsThresholdReached;
|
||||
protected boolean queueFull;
|
||||
private boolean canSkipDocuments;
|
||||
|
||||
protected NumericComparator(
|
||||
String field, T missingValue, boolean reverse, int sortPos, int bytesCount) {
|
||||
this.field = field;
|
||||
this.missingValue = missingValue;
|
||||
this.reverse = reverse;
|
||||
// skipping functionality is only relevant for primary sort
|
||||
this.canSkipDocuments = (sortPos == 0);
|
||||
this.bytesCount = bytesCount;
|
||||
this.bytesComparator = ArrayUtil.getUnsignedComparator(bytesCount);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setTopValue(T value) {
|
||||
topValueSet = true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setSingleSort() {
|
||||
singleSort = true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void disableSkipping() {
|
||||
canSkipDocuments = false;
|
||||
}
|
||||
|
||||
/** Leaf comparator for {@link NumericComparator} that provides skipping functionality */
|
||||
public abstract class NumericLeafComparator implements LeafFieldComparator {
|
||||
protected final NumericDocValues docValues;
|
||||
private final PointValues pointValues;
|
||||
// if skipping functionality should be enabled on this segment
|
||||
private final boolean enableSkipping;
|
||||
private final int maxDoc;
|
||||
private final byte[] minValueAsBytes;
|
||||
private final byte[] maxValueAsBytes;
|
||||
|
||||
private DocIdSetIterator competitiveIterator;
|
||||
private long iteratorCost;
|
||||
private int maxDocVisited = -1;
|
||||
private int updateCounter = 0;
|
||||
|
||||
public NumericLeafComparator(LeafReaderContext context) throws IOException {
|
||||
this.docValues = getNumericDocValues(context, field);
|
||||
this.pointValues = canSkipDocuments ? getPointValues(context, field) : null;
|
||||
if (pointValues != null) {
|
||||
FieldInfo info = context.reader().getFieldInfos().fieldInfo(field);
|
||||
if (info == null || info.getPointDimensionCount() == 0) {
|
||||
throw new IllegalStateException(
|
||||
"Field "
|
||||
+ field
|
||||
+ " doesn't index points according to FieldInfos yet returns non-null PointValues");
|
||||
} else if (info.getPointDimensionCount() > 1) {
|
||||
throw new IllegalArgumentException(
|
||||
"Field " + field + " is indexed with multiple dimensions, sorting is not supported");
|
||||
} else if (info.getPointNumBytes() != bytesCount) {
|
||||
throw new IllegalArgumentException(
|
||||
"Field "
|
||||
+ field
|
||||
+ " is indexed with "
|
||||
+ info.getPointNumBytes()
|
||||
+ " bytes per dimension, but "
|
||||
+ NumericComparator.this
|
||||
+ " expected "
|
||||
+ bytesCount);
|
||||
}
|
||||
this.enableSkipping = true; // skipping is enabled when points are available
|
||||
this.maxDoc = context.reader().maxDoc();
|
||||
this.maxValueAsBytes =
|
||||
reverse == false ? new byte[bytesCount] : topValueSet ? new byte[bytesCount] : null;
|
||||
this.minValueAsBytes =
|
||||
reverse ? new byte[bytesCount] : topValueSet ? new byte[bytesCount] : null;
|
||||
this.competitiveIterator = DocIdSetIterator.all(maxDoc);
|
||||
this.iteratorCost = maxDoc;
|
||||
} else {
|
||||
this.enableSkipping = false;
|
||||
this.maxDoc = 0;
|
||||
this.maxValueAsBytes = null;
|
||||
this.minValueAsBytes = null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the NumericDocValues for the field in this segment
|
||||
*
|
||||
* <p>If you override this method, you must also override {@link
|
||||
* #getPointValues(LeafReaderContext, String)} This class uses sort optimization that leverages
|
||||
* points to filter out non-competitive matches, which relies on the assumption that points and
|
||||
* doc values record the same information.
|
||||
*
|
||||
* @param context – reader context
|
||||
* @param field - field name
|
||||
* @return numeric doc values for the field in this segment.
|
||||
* @throws IOException If there is a low-level I/O error
|
||||
*/
|
||||
protected NumericDocValues getNumericDocValues(LeafReaderContext context, String field)
|
||||
throws IOException {
|
||||
return DocValues.getNumeric(context.reader(), field);
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves point values for the field in this segment
|
||||
*
|
||||
* <p>If you override this method, you must also override {@link
|
||||
* #getNumericDocValues(LeafReaderContext, String)} This class uses sort optimization that
|
||||
* leverages points to filter out non-competitive matches, which relies on the assumption that
|
||||
* points and doc values record the same information. Return {@code null} even if no points
|
||||
* implementation is available, in this case sort optimization with points will be disabled.
|
||||
*
|
||||
* @param context – reader context
|
||||
* @param field - field name
|
||||
* @return point values for the field in this segment if they are available or {@code null} if
|
||||
* sort optimization with points should be disabled.
|
||||
* @throws IOException If there is a low-level I/O error
|
||||
*/
|
||||
protected PointValues getPointValues(LeafReaderContext context, String field)
|
||||
throws IOException {
|
||||
return context.reader().getPointValues(field);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setBottom(int slot) throws IOException {
|
||||
queueFull = true; // if we are setting bottom, it means that we have collected enough hits
|
||||
updateCompetitiveIterator(); // update an iterator if we set a new bottom
|
||||
}
|
||||
|
||||
@Override
|
||||
public void copy(int slot, int doc) throws IOException {
|
||||
maxDocVisited = doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setScorer(Scorable scorer) throws IOException {
|
||||
if (scorer instanceof Scorer) {
|
||||
iteratorCost =
|
||||
((Scorer) scorer).iterator().cost(); // starting iterator cost is the scorer's cost
|
||||
updateCompetitiveIterator(); // update an iterator when we have a new segment
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setHitsThresholdReached() throws IOException {
|
||||
hitsThresholdReached = true;
|
||||
updateCompetitiveIterator();
|
||||
}
|
||||
|
||||
// update its iterator to include possibly only docs that are "stronger" than the current bottom
|
||||
// entry
|
||||
private void updateCompetitiveIterator() throws IOException {
|
||||
if (enableSkipping == false || hitsThresholdReached == false || queueFull == false) return;
|
||||
// if some documents have missing points, check that missing values prohibits optimization
|
||||
if ((pointValues.getDocCount() < maxDoc) && isMissingValueCompetitive()) {
|
||||
return; // we can't filter out documents, as documents with missing values are competitive
|
||||
}
|
||||
|
||||
updateCounter++;
|
||||
if (updateCounter > 256
|
||||
&& (updateCounter & 0x1f) != 0x1f) { // Start sampling if we get called too much
|
||||
return;
|
||||
}
|
||||
if (reverse == false) {
|
||||
encodeBottom(maxValueAsBytes);
|
||||
if (topValueSet) {
|
||||
encodeTop(minValueAsBytes);
|
||||
}
|
||||
} else {
|
||||
encodeBottom(minValueAsBytes);
|
||||
if (topValueSet) {
|
||||
encodeTop(maxValueAsBytes);
|
||||
}
|
||||
}
|
||||
|
||||
DocIdSetBuilder result = new DocIdSetBuilder(maxDoc);
|
||||
PointValues.IntersectVisitor visitor =
|
||||
new PointValues.IntersectVisitor() {
|
||||
DocIdSetBuilder.BulkAdder adder;
|
||||
|
||||
@Override
|
||||
public void grow(int count) {
|
||||
adder = result.grow(count);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void visit(int docID) {
|
||||
if (docID <= maxDocVisited) {
|
||||
return; // Already visited or skipped
|
||||
}
|
||||
adder.add(docID);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void visit(int docID, byte[] packedValue) {
|
||||
if (docID <= maxDocVisited) {
|
||||
return; // already visited or skipped
|
||||
}
|
||||
if (maxValueAsBytes != null) {
|
||||
int cmp = bytesComparator.compare(packedValue, 0, maxValueAsBytes, 0);
|
||||
// if doc's value is too high or for single sort even equal, it is not competitive
|
||||
// and the doc can be skipped
|
||||
if (cmp > 0 || (singleSort && cmp == 0)) return;
|
||||
}
|
||||
if (minValueAsBytes != null) {
|
||||
int cmp = bytesComparator.compare(packedValue, 0, minValueAsBytes, 0);
|
||||
// if doc's value is too low or for single sort even equal, it is not competitive
|
||||
// and the doc can be skipped
|
||||
if (cmp < 0 || (singleSort && cmp == 0)) return;
|
||||
}
|
||||
adder.add(docID); // doc is competitive
|
||||
}
|
||||
|
||||
@Override
|
||||
public PointValues.Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
|
||||
if (maxValueAsBytes != null) {
|
||||
int cmp = bytesComparator.compare(minPackedValue, 0, maxValueAsBytes, 0);
|
||||
if (cmp > 0 || (singleSort && cmp == 0))
|
||||
return PointValues.Relation.CELL_OUTSIDE_QUERY;
|
||||
}
|
||||
if (minValueAsBytes != null) {
|
||||
int cmp = bytesComparator.compare(maxPackedValue, 0, minValueAsBytes, 0);
|
||||
if (cmp < 0 || (singleSort && cmp == 0))
|
||||
return PointValues.Relation.CELL_OUTSIDE_QUERY;
|
||||
}
|
||||
if ((maxValueAsBytes != null
|
||||
&& bytesComparator.compare(maxPackedValue, 0, maxValueAsBytes, 0) > 0)
|
||||
|| (minValueAsBytes != null
|
||||
&& bytesComparator.compare(minPackedValue, 0, minValueAsBytes, 0) < 0)) {
|
||||
return PointValues.Relation.CELL_CROSSES_QUERY;
|
||||
}
|
||||
return PointValues.Relation.CELL_INSIDE_QUERY;
|
||||
}
|
||||
};
|
||||
final long threshold = iteratorCost >>> 3;
|
||||
long estimatedNumberOfMatches =
|
||||
pointValues.estimatePointCount(visitor); // runs in O(log(numPoints))
|
||||
if (estimatedNumberOfMatches >= threshold) {
|
||||
// the new range is not selective enough to be worth materializing, it doesn't reduce number
|
||||
// of docs at least 8x
|
||||
return;
|
||||
}
|
||||
pointValues.intersect(visitor);
|
||||
competitiveIterator = result.build().iterator();
|
||||
iteratorCost = competitiveIterator.cost();
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocIdSetIterator competitiveIterator() {
|
||||
if (enableSkipping == false) return null;
|
||||
return new DocIdSetIterator() {
|
||||
private int docID = competitiveIterator.docID();
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
return advance(docID + 1);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return docID;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return competitiveIterator.cost();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
return docID = competitiveIterator.advance(target);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
protected abstract boolean isMissingValueCompetitive();
|
||||
|
||||
protected abstract void encodeBottom(byte[] packedValue);
|
||||
|
||||
protected abstract void encodeTop(byte[] packedValue);
|
||||
}
|
||||
}
|
@ -12,40 +12,36 @@ import org.jetbrains.annotations.NotNull;
|
||||
import org.jetbrains.annotations.Nullable;
|
||||
|
||||
public record LocalQueryParams(@NotNull Query query, int offsetInt, long offsetLong, int limitInt, long limitLong,
|
||||
@NotNull PageLimits pageLimits, @Nullable Float minCompetitiveScore, @Nullable Sort sort,
|
||||
boolean computePreciseHitsCount, Duration timeout) {
|
||||
@NotNull PageLimits pageLimits, @Nullable Sort sort, boolean computePreciseHitsCount,
|
||||
Duration timeout) {
|
||||
|
||||
public LocalQueryParams(@NotNull Query query,
|
||||
long offsetLong,
|
||||
long limitLong,
|
||||
@NotNull PageLimits pageLimits,
|
||||
@Nullable Float minCompetitiveScore,
|
||||
@Nullable Sort sort,
|
||||
boolean computePreciseHitsCount,
|
||||
Duration timeout) {
|
||||
this(query, safeLongToInt(offsetLong), offsetLong, safeLongToInt(limitLong), limitLong, pageLimits,
|
||||
minCompetitiveScore, sort, computePreciseHitsCount, timeout);
|
||||
this(query,
|
||||
safeLongToInt(offsetLong),
|
||||
offsetLong,
|
||||
safeLongToInt(limitLong),
|
||||
limitLong,
|
||||
pageLimits,
|
||||
sort,
|
||||
computePreciseHitsCount,
|
||||
timeout
|
||||
);
|
||||
}
|
||||
|
||||
public LocalQueryParams(@NotNull Query query,
|
||||
int offsetInt,
|
||||
int limitInt,
|
||||
@NotNull PageLimits pageLimits,
|
||||
@Nullable Float minCompetitiveScore,
|
||||
@Nullable Sort sort,
|
||||
boolean computePreciseHitsCount,
|
||||
Duration timeout) {
|
||||
this(query,
|
||||
offsetInt,
|
||||
offsetInt,
|
||||
limitInt,
|
||||
limitInt,
|
||||
pageLimits,
|
||||
minCompetitiveScore,
|
||||
sort,
|
||||
computePreciseHitsCount,
|
||||
timeout
|
||||
);
|
||||
this(query, offsetInt, offsetInt, limitInt, limitInt, pageLimits, sort, computePreciseHitsCount, timeout);
|
||||
}
|
||||
|
||||
public boolean isSorted() {
|
||||
|
@ -1,13 +1,11 @@
|
||||
package it.cavallium.dbengine.lucene.searcher;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import it.cavallium.dbengine.lucene.MaxScoreAccumulator;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.function.Supplier;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.IntStream;
|
||||
import java.util.stream.Stream;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.CustomHitsThresholdChecker;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
|
||||
@ -16,12 +14,20 @@ public class LuceneMultiGenerator implements Supplier<ScoreDoc> {
|
||||
private final Iterator<Supplier<ScoreDoc>> generators;
|
||||
private Supplier<ScoreDoc> luceneGenerator;
|
||||
|
||||
public LuceneMultiGenerator(List<IndexSearcher> shards, LocalQueryParams localQueryParams) {
|
||||
public LuceneMultiGenerator(List<IndexSearcher> shards,
|
||||
LocalQueryParams localQueryParams,
|
||||
CustomHitsThresholdChecker hitsThresholdChecker,
|
||||
MaxScoreAccumulator minScoreAcc) {
|
||||
this.generators = IntStream
|
||||
.range(0, shards.size())
|
||||
.mapToObj(shardIndex -> {
|
||||
IndexSearcher shard = shards.get(shardIndex);
|
||||
return (Supplier<ScoreDoc>) new LuceneGenerator(shard, localQueryParams, shardIndex);
|
||||
return (Supplier<ScoreDoc>) new LuceneGenerator(shard,
|
||||
localQueryParams,
|
||||
shardIndex,
|
||||
hitsThresholdChecker,
|
||||
minScoreAcc
|
||||
);
|
||||
})
|
||||
.iterator();
|
||||
tryAdvanceGenerator();
|
||||
|
@ -10,7 +10,7 @@ import it.cavallium.dbengine.database.disk.LLTempLMDBEnv;
|
||||
import it.cavallium.dbengine.lucene.LuceneUtils;
|
||||
import it.cavallium.dbengine.lucene.FullDocs;
|
||||
import it.cavallium.dbengine.lucene.LLScoreDoc;
|
||||
import it.cavallium.dbengine.lucene.collector.LMDBFullScoreDocCollector;
|
||||
import org.apache.lucene.search.LMDBFullScoreDocCollector;
|
||||
import java.io.IOException;
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
|
@ -10,7 +10,7 @@ import it.cavallium.dbengine.database.disk.LLTempLMDBEnv;
|
||||
import it.cavallium.dbengine.lucene.FullDocs;
|
||||
import it.cavallium.dbengine.lucene.LLFieldDoc;
|
||||
import it.cavallium.dbengine.lucene.LuceneUtils;
|
||||
import it.cavallium.dbengine.lucene.collector.LMDBFullFieldDocCollector;
|
||||
import org.apache.lucene.search.LMDBFullFieldDocCollector;
|
||||
import java.io.IOException;
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
|
@ -8,7 +8,9 @@ import it.cavallium.dbengine.database.LLKeyScore;
|
||||
import it.cavallium.dbengine.database.LLUtils;
|
||||
import it.cavallium.dbengine.database.disk.LLIndexSearchers;
|
||||
import it.cavallium.dbengine.lucene.LuceneUtils;
|
||||
import it.cavallium.dbengine.lucene.MaxScoreAccumulator;
|
||||
import java.util.List;
|
||||
import org.apache.lucene.search.CustomHitsThresholdChecker;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import reactor.core.publisher.Flux;
|
||||
@ -56,15 +58,15 @@ public class UnsortedStreamingMultiSearcher implements MultiSearcher {
|
||||
}
|
||||
|
||||
private Flux<ScoreDoc> getScoreDocs(LocalQueryParams localQueryParams, List<IndexSearcher> shards) {
|
||||
return Flux
|
||||
.fromIterable(shards)
|
||||
.index()
|
||||
.flatMap(tuple -> {
|
||||
var shardIndex = (int) (long) tuple.getT1();
|
||||
var shard = tuple.getT2();
|
||||
return LuceneGenerator.reactive(shard, localQueryParams, shardIndex);
|
||||
});
|
||||
|
||||
return Flux.defer(() -> {
|
||||
var hitsThreshold = CustomHitsThresholdChecker.createShared(localQueryParams.getTotalHitsThresholdLong());
|
||||
MaxScoreAccumulator maxScoreAccumulator = new MaxScoreAccumulator();
|
||||
return Flux.fromIterable(shards).index().flatMap(tuple -> {
|
||||
var shardIndex = (int) (long) tuple.getT1();
|
||||
var shard = tuple.getT2();
|
||||
return LuceneGenerator.reactive(shard, localQueryParams, shardIndex, hitsThreshold, maxScoreAccumulator);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
private LocalQueryParams getLocalQueryParams(LocalQueryParams queryParams) {
|
||||
@ -72,7 +74,6 @@ public class UnsortedStreamingMultiSearcher implements MultiSearcher {
|
||||
0L,
|
||||
queryParams.offsetLong() + queryParams.limitLong(),
|
||||
queryParams.pageLimits(),
|
||||
queryParams.minCompetitiveScore(),
|
||||
queryParams.sort(),
|
||||
queryParams.computePreciseHitsCount(),
|
||||
queryParams.timeout()
|
||||
|
@ -15,15 +15,14 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package it.cavallium.dbengine.lucene.collector;
|
||||
package org.apache.lucene.search;
|
||||
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
import org.apache.lucene.search.ScoreMode;
|
||||
|
||||
/** Used for defining custom algorithms to allow searches to early terminate */
|
||||
abstract class HitsThresholdChecker {
|
||||
/** Implementation of HitsThresholdChecker which allows global hit counting */
|
||||
private static class GlobalHitsThresholdChecker extends HitsThresholdChecker {
|
||||
public abstract class CustomHitsThresholdChecker {
|
||||
/** Implementation of CustomHitsThresholdChecker which allows global hit counting */
|
||||
private static class GlobalHitsThresholdChecker extends CustomHitsThresholdChecker {
|
||||
private final long totalHitsThreshold;
|
||||
private final AtomicLong globalHitCount;
|
||||
|
||||
@ -70,8 +69,8 @@ abstract class HitsThresholdChecker {
|
||||
}
|
||||
}
|
||||
|
||||
/** Default implementation of HitsThresholdChecker to be used for single threaded execution */
|
||||
private static class LocalHitsThresholdChecker extends HitsThresholdChecker {
|
||||
/** Default implementation of CustomHitsThresholdChecker to be used for single threaded execution */
|
||||
private static class LocalHitsThresholdChecker extends CustomHitsThresholdChecker {
|
||||
private final long totalHitsThreshold;
|
||||
private long hitCount;
|
||||
|
||||
@ -120,14 +119,14 @@ abstract class HitsThresholdChecker {
|
||||
/*
|
||||
* Returns a threshold checker that is useful for single threaded searches
|
||||
*/
|
||||
public static HitsThresholdChecker create(final long totalHitsThreshold) {
|
||||
public static CustomHitsThresholdChecker create(final long totalHitsThreshold) {
|
||||
return new LocalHitsThresholdChecker(totalHitsThreshold);
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns a threshold checker that is based on a shared counter
|
||||
*/
|
||||
public static HitsThresholdChecker createShared(final long totalHitsThreshold) {
|
||||
public static CustomHitsThresholdChecker createShared(final long totalHitsThreshold) {
|
||||
return new GlobalHitsThresholdChecker(totalHitsThreshold);
|
||||
}
|
||||
|
@ -14,7 +14,7 @@
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package it.cavallium.dbengine.lucene.collector;
|
||||
package org.apache.lucene.search;
|
||||
|
||||
import it.cavallium.dbengine.database.SafeCloseable;
|
||||
import it.cavallium.dbengine.database.disk.LLTempLMDBEnv;
|
||||
@ -27,23 +27,12 @@ import it.cavallium.dbengine.lucene.LMDBPriorityQueue;
|
||||
import it.cavallium.dbengine.lucene.MaxScoreAccumulator;
|
||||
import it.cavallium.dbengine.lucene.PriorityQueue;
|
||||
import it.cavallium.dbengine.lucene.ResourceIterable;
|
||||
import java.io.Closeable;
|
||||
import it.cavallium.dbengine.lucene.collector.FullDocsCollector;
|
||||
import it.cavallium.dbengine.lucene.collector.FullFieldDocs;
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.CollectionTerminatedException;
|
||||
import org.apache.lucene.search.CollectorManager;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.FieldComparator;
|
||||
import org.apache.lucene.search.LeafCollector;
|
||||
import org.apache.lucene.search.LeafFieldComparator;
|
||||
import it.cavallium.dbengine.lucene.comparators.MultiLeafFieldComparator;
|
||||
import org.apache.lucene.search.Scorable;
|
||||
import org.apache.lucene.search.ScoreMode;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.search.TotalHits;
|
||||
import org.apache.lucene.search.TotalHits.Relation;
|
||||
import reactor.core.publisher.Flux;
|
||||
|
||||
@ -57,7 +46,8 @@ import reactor.core.publisher.Flux;
|
||||
* <a href="https://github.com/apache/lucene/commits/main/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java">
|
||||
* Lucene TopFieldCollector changes on GitHub</a>
|
||||
*/
|
||||
public abstract class LMDBFullFieldDocCollector extends FullDocsCollector<LMDBPriorityQueue<LLSlotDoc>, LLSlotDoc, LLFieldDoc> {
|
||||
public abstract class LMDBFullFieldDocCollector extends
|
||||
FullDocsCollector<LMDBPriorityQueue<LLSlotDoc>, LLSlotDoc, LLFieldDoc> {
|
||||
|
||||
// TODO: one optimization we could do is to pre-fill
|
||||
// the queue with sentinel value that guaranteed to
|
||||
@ -71,10 +61,7 @@ public abstract class LMDBFullFieldDocCollector extends FullDocsCollector<LMDBPr
|
||||
Scorable scorer;
|
||||
boolean collectedAllCompetitiveHits = false;
|
||||
|
||||
TopFieldLeafCollector(PriorityQueue<LLSlotDoc> queue,
|
||||
FieldValueHitQueue fieldValueHitQueue,
|
||||
Sort sort,
|
||||
LeafReaderContext context)
|
||||
TopFieldLeafCollector(FieldValueHitQueue fieldValueHitQueue, Sort sort, LeafReaderContext context)
|
||||
throws IOException {
|
||||
// as all segments are sorted in the same way, enough to check only the 1st segment for
|
||||
// indexSort
|
||||
@ -103,9 +90,9 @@ public abstract class LMDBFullFieldDocCollector extends FullDocsCollector<LMDBPr
|
||||
if (minScoreAcc != null && (totalHits & minScoreAcc.modInterval) == 0) {
|
||||
updateGlobalMinCompetitiveScore(scorer);
|
||||
}
|
||||
if (scoreMode.isExhaustive() == false
|
||||
if (!scoreMode.isExhaustive()
|
||||
&& totalHitsRelation == TotalHits.Relation.EQUAL_TO
|
||||
&& hitsThresholdChecker.isThresholdReached(false)) {
|
||||
&& hitsThresholdChecker.isThresholdReached()) {
|
||||
// for the first time hitsThreshold is reached, notify comparator about this
|
||||
comparator.setHitsThresholdReached();
|
||||
totalHitsRelation = TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO;
|
||||
@ -118,7 +105,7 @@ public abstract class LMDBFullFieldDocCollector extends FullDocsCollector<LMDBPr
|
||||
// this document is largest than anything else in the queue, and
|
||||
// therefore not competitive.
|
||||
if (searchSortPartOfIndexSort) {
|
||||
if (hitsThresholdChecker.isThresholdReached(false)) {
|
||||
if (hitsThresholdChecker.isThresholdReached()) {
|
||||
totalHitsRelation = Relation.GREATER_THAN_OR_EQUAL_TO;
|
||||
throw new CollectionTerminatedException();
|
||||
} else {
|
||||
@ -220,7 +207,7 @@ public abstract class LMDBFullFieldDocCollector extends FullDocsCollector<LMDBPr
|
||||
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
|
||||
docBase = context.docBase;
|
||||
|
||||
return new TopFieldLeafCollector(queue, fieldValueHitQueue, sort, context) {
|
||||
return new TopFieldLeafCollector(fieldValueHitQueue, sort, context) {
|
||||
|
||||
@Override
|
||||
public void collect(int doc) throws IOException {
|
||||
@ -298,12 +285,12 @@ public abstract class LMDBFullFieldDocCollector extends FullDocsCollector<LMDBPr
|
||||
|
||||
if (firstComparator.getClass().equals(FieldComparator.RelevanceComparator.class)
|
||||
&& reverseMul == 1 // if the natural sort is preserved (sort by descending relevance)
|
||||
&& hitsThresholdChecker.getHitsThreshold(false) != Integer.MAX_VALUE) {
|
||||
&& hitsThresholdChecker.getHitsThreshold() != Integer.MAX_VALUE) {
|
||||
scoreMode = ScoreMode.TOP_SCORES;
|
||||
canSetMinScore = true;
|
||||
} else {
|
||||
canSetMinScore = false;
|
||||
if (hitsThresholdChecker.getHitsThreshold(false) != Integer.MAX_VALUE) {
|
||||
if (hitsThresholdChecker.getHitsThreshold() != Integer.MAX_VALUE) {
|
||||
scoreMode = needsScores ? ScoreMode.TOP_DOCS_WITH_SCORES : ScoreMode.TOP_DOCS;
|
||||
} else {
|
||||
scoreMode = needsScores ? ScoreMode.COMPLETE : ScoreMode.COMPLETE_NO_SCORES;
|
||||
@ -319,7 +306,7 @@ public abstract class LMDBFullFieldDocCollector extends FullDocsCollector<LMDBPr
|
||||
|
||||
protected void updateGlobalMinCompetitiveScore(Scorable scorer) throws IOException {
|
||||
assert minScoreAcc != null;
|
||||
if (canSetMinScore && hitsThresholdChecker.isThresholdReached(false)) {
|
||||
if (canSetMinScore && hitsThresholdChecker.isThresholdReached()) {
|
||||
// we can start checking the global maximum score even
|
||||
// if the local queue is not full because the threshold
|
||||
// is reached.
|
||||
@ -333,7 +320,7 @@ public abstract class LMDBFullFieldDocCollector extends FullDocsCollector<LMDBPr
|
||||
}
|
||||
|
||||
protected void updateMinCompetitiveScore(Scorable scorer) throws IOException {
|
||||
if (canSetMinScore && queueFull && hitsThresholdChecker.isThresholdReached(false)) {
|
||||
if (canSetMinScore && queueFull && hitsThresholdChecker.isThresholdReached()) {
|
||||
assert pq.top() != null;
|
||||
float minScore = (float) firstComparator.value(pq.top().slot());
|
||||
if (minScore > minCompetitiveScore) {
|
||||
@ -422,17 +409,25 @@ public abstract class LMDBFullFieldDocCollector extends FullDocsCollector<LMDBPr
|
||||
LLTempLMDBEnv env, Sort sort, int numHits, long totalHitsThreshold) {
|
||||
return new CollectorManager<>() {
|
||||
|
||||
private final HitsThresholdChecker hitsThresholdChecker =
|
||||
HitsThresholdChecker.createShared(Math.max(totalHitsThreshold, numHits));
|
||||
private final HitsThresholdChecker hitsThresholdChecker;
|
||||
|
||||
{
|
||||
if (totalHitsThreshold < Integer.MAX_VALUE) {
|
||||
hitsThresholdChecker = HitsThresholdChecker.createShared(Math.max((int) totalHitsThreshold, numHits));
|
||||
} else {
|
||||
hitsThresholdChecker = HitsThresholdChecker.createShared(Integer.MAX_VALUE);
|
||||
}
|
||||
}
|
||||
|
||||
private final MaxScoreAccumulator minScoreAcc = new MaxScoreAccumulator();
|
||||
|
||||
@Override
|
||||
public LMDBFullFieldDocCollector newCollector() throws IOException {
|
||||
public LMDBFullFieldDocCollector newCollector() {
|
||||
return create(env, sort, numHits, hitsThresholdChecker, minScoreAcc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public FullFieldDocs<LLFieldDoc> reduce(Collection<LMDBFullFieldDocCollector> collectors) throws IOException {
|
||||
public FullFieldDocs<LLFieldDoc> reduce(Collection<LMDBFullFieldDocCollector> collectors) {
|
||||
return reduceShared(sort, collectors);
|
||||
}
|
||||
};
|
@ -14,28 +14,20 @@
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package it.cavallium.dbengine.lucene.collector;
|
||||
package org.apache.lucene.search;
|
||||
|
||||
import it.cavallium.dbengine.database.LLUtils;
|
||||
import it.cavallium.dbengine.database.disk.LLTempLMDBEnv;
|
||||
import it.cavallium.dbengine.lucene.FullDocs;
|
||||
import it.cavallium.dbengine.lucene.LLScoreDoc;
|
||||
import it.cavallium.dbengine.lucene.LLScoreDocCodec;
|
||||
import it.cavallium.dbengine.lucene.LMDBPriorityQueue;
|
||||
import it.cavallium.dbengine.lucene.LuceneUtils;
|
||||
import it.cavallium.dbengine.lucene.MaxScoreAccumulator;
|
||||
import it.cavallium.dbengine.lucene.ResourceIterable;
|
||||
import it.cavallium.dbengine.lucene.collector.FullDocsCollector;
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.Collector;
|
||||
import org.apache.lucene.search.CollectorManager;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.LeafCollector;
|
||||
import it.cavallium.dbengine.lucene.MaxScoreAccumulator.DocAndScore;
|
||||
import org.apache.lucene.search.Scorable;
|
||||
import org.apache.lucene.search.ScoreMode;
|
||||
import org.apache.lucene.search.TotalHits;
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
import org.jetbrains.annotations.Nullable;
|
||||
|
||||
@ -53,7 +45,8 @@ import org.jetbrains.annotations.Nullable;
|
||||
* <a href="https://github.com/apache/lucene/commits/main/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java">
|
||||
* Lucene TopScoreDocCollector changes on GitHub</a>
|
||||
*/
|
||||
public abstract class LMDBFullScoreDocCollector extends FullDocsCollector<LMDBPriorityQueue<LLScoreDoc>, LLScoreDoc, LLScoreDoc> {
|
||||
public abstract class LMDBFullScoreDocCollector extends
|
||||
FullDocsCollector<LMDBPriorityQueue<LLScoreDoc>, LLScoreDoc, LLScoreDoc> {
|
||||
|
||||
/** Scorable leaf collector */
|
||||
public abstract static class ScorerLeafCollector implements LeafCollector {
|
||||
@ -69,7 +62,7 @@ public abstract class LMDBFullScoreDocCollector extends FullDocsCollector<LMDBPr
|
||||
private static class SimpleLMDBFullScoreDocCollector extends LMDBFullScoreDocCollector {
|
||||
|
||||
SimpleLMDBFullScoreDocCollector(LLTempLMDBEnv env, @Nullable Long limit,
|
||||
HitsThresholdChecker hitsThresholdChecker, MaxScoreAccumulator minScoreAcc) {
|
||||
CustomHitsThresholdChecker hitsThresholdChecker, MaxScoreAccumulator minScoreAcc) {
|
||||
super(env, limit, hitsThresholdChecker, minScoreAcc);
|
||||
}
|
||||
|
||||
@ -153,7 +146,7 @@ public abstract class LMDBFullScoreDocCollector extends FullDocsCollector<LMDBPr
|
||||
* <code>numHits</code>, and fill the array with sentinel objects.
|
||||
*/
|
||||
public static LMDBFullScoreDocCollector create(LLTempLMDBEnv env, long numHits, int totalHitsThreshold) {
|
||||
return create(env, numHits, HitsThresholdChecker.create(totalHitsThreshold), null);
|
||||
return create(env, numHits, CustomHitsThresholdChecker.create(totalHitsThreshold), null);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -163,12 +156,12 @@ public abstract class LMDBFullScoreDocCollector extends FullDocsCollector<LMDBPr
|
||||
* but will also likely make query processing slower.
|
||||
*/
|
||||
public static LMDBFullScoreDocCollector create(LLTempLMDBEnv env, int totalHitsThreshold) {
|
||||
return create(env, HitsThresholdChecker.create(totalHitsThreshold), null);
|
||||
return create(env, CustomHitsThresholdChecker.create(totalHitsThreshold), null);
|
||||
}
|
||||
|
||||
static LMDBFullScoreDocCollector create(
|
||||
LLTempLMDBEnv env,
|
||||
HitsThresholdChecker hitsThresholdChecker,
|
||||
CustomHitsThresholdChecker hitsThresholdChecker,
|
||||
MaxScoreAccumulator minScoreAcc) {
|
||||
|
||||
if (hitsThresholdChecker == null) {
|
||||
@ -181,7 +174,7 @@ public abstract class LMDBFullScoreDocCollector extends FullDocsCollector<LMDBPr
|
||||
static LMDBFullScoreDocCollector create(
|
||||
LLTempLMDBEnv env,
|
||||
@NotNull Long numHits,
|
||||
HitsThresholdChecker hitsThresholdChecker,
|
||||
CustomHitsThresholdChecker hitsThresholdChecker,
|
||||
MaxScoreAccumulator minScoreAcc) {
|
||||
|
||||
if (hitsThresholdChecker == null) {
|
||||
@ -205,8 +198,8 @@ public abstract class LMDBFullScoreDocCollector extends FullDocsCollector<LMDBPr
|
||||
long totalHitsThreshold) {
|
||||
return new CollectorManager<>() {
|
||||
|
||||
private final HitsThresholdChecker hitsThresholdChecker =
|
||||
HitsThresholdChecker.createShared(totalHitsThreshold);
|
||||
private final CustomHitsThresholdChecker hitsThresholdChecker =
|
||||
CustomHitsThresholdChecker.createShared(totalHitsThreshold);
|
||||
private final MaxScoreAccumulator minScoreAcc = new MaxScoreAccumulator();
|
||||
|
||||
@Override
|
||||
@ -230,8 +223,8 @@ public abstract class LMDBFullScoreDocCollector extends FullDocsCollector<LMDBPr
|
||||
long totalHitsThreshold) {
|
||||
return new CollectorManager<>() {
|
||||
|
||||
private final HitsThresholdChecker hitsThresholdChecker =
|
||||
HitsThresholdChecker.createShared(totalHitsThreshold);
|
||||
private final CustomHitsThresholdChecker hitsThresholdChecker =
|
||||
CustomHitsThresholdChecker.createShared(totalHitsThreshold);
|
||||
private final MaxScoreAccumulator minScoreAcc = new MaxScoreAccumulator();
|
||||
|
||||
@Override
|
||||
@ -258,13 +251,13 @@ public abstract class LMDBFullScoreDocCollector extends FullDocsCollector<LMDBPr
|
||||
|
||||
int docBase;
|
||||
final @Nullable Long limit;
|
||||
final HitsThresholdChecker hitsThresholdChecker;
|
||||
final CustomHitsThresholdChecker hitsThresholdChecker;
|
||||
final MaxScoreAccumulator minScoreAcc;
|
||||
float minCompetitiveScore;
|
||||
|
||||
// prevents instantiation
|
||||
LMDBFullScoreDocCollector(LLTempLMDBEnv env, @Nullable Long limit,
|
||||
HitsThresholdChecker hitsThresholdChecker, MaxScoreAccumulator minScoreAcc) {
|
||||
CustomHitsThresholdChecker hitsThresholdChecker, MaxScoreAccumulator minScoreAcc) {
|
||||
super(new LMDBPriorityQueue<>(env, new LLScoreDocCodec()));
|
||||
assert hitsThresholdChecker != null;
|
||||
this.limit = limit;
|
@ -15,28 +15,25 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package it.cavallium.dbengine.lucene.comparators;
|
||||
package org.apache.lucene.search.comparators;
|
||||
|
||||
import it.cavallium.dbengine.database.SafeCloseable;
|
||||
import it.cavallium.dbengine.database.disk.LLTempLMDBEnv;
|
||||
import it.cavallium.dbengine.lucene.IArray;
|
||||
import it.cavallium.dbengine.lucene.IntCodec;
|
||||
import it.cavallium.dbengine.lucene.LMDBArray;
|
||||
import it.cavallium.dbengine.lucene.LMDBPriorityQueue;
|
||||
import it.cavallium.dbengine.lucene.LongCodec;
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.FieldComparator;
|
||||
import org.apache.lucene.search.LeafFieldComparator;
|
||||
import org.apache.lucene.search.Scorable;
|
||||
import org.apache.lucene.search.comparators.MinDocIterator;
|
||||
|
||||
/**
|
||||
* Comparator that sorts by asc _doc
|
||||
* Based on {@link org.apache.lucene.search.comparators.DocComparator}
|
||||
* */
|
||||
public class DocComparator extends FieldComparator<Integer> implements SafeCloseable {
|
||||
public class LMDBDocComparator extends org.apache.lucene.search.comparators.DocComparator implements SafeCloseable {
|
||||
private final IArray<Integer> docIDs;
|
||||
private final boolean enableSkipping; // if skipping functionality should be enabled
|
||||
private int bottom;
|
||||
@ -46,10 +43,11 @@ public class DocComparator extends FieldComparator<Integer> implements SafeClose
|
||||
private boolean hitsThresholdReached;
|
||||
|
||||
/** Creates a new comparator based on document ids for {@code numHits} */
|
||||
public DocComparator(LLTempLMDBEnv env, int numHits, boolean reverse, int sortPost) {
|
||||
public LMDBDocComparator(LLTempLMDBEnv env, int numHits, boolean reverse, int sortPost) {
|
||||
super(0, reverse, sortPost);
|
||||
this.docIDs = new LMDBArray<>(env, new IntCodec(), numHits, 0);
|
||||
// skipping functionality is enabled if we are sorting by _doc in asc order as a primary sort
|
||||
this.enableSkipping = (!reverse && sortPost == 0);
|
||||
// skipping functionality is enabled if we are sorting by _doc in asc order as a primary sort
|
||||
this.enableSkipping = (!reverse && sortPost == 0);
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -145,7 +143,7 @@ public class DocComparator extends FieldComparator<Integer> implements SafeClose
|
||||
|
||||
@Override
|
||||
public DocIdSetIterator competitiveIterator() {
|
||||
if (enableSkipping == false) {
|
||||
if (!enableSkipping) {
|
||||
return null;
|
||||
} else {
|
||||
return new DocIdSetIterator() {
|
||||
@ -181,7 +179,7 @@ public class DocComparator extends FieldComparator<Integer> implements SafeClose
|
||||
}
|
||||
|
||||
private void updateIterator() {
|
||||
if (enableSkipping == false || hitsThresholdReached == false) return;
|
||||
if (!enableSkipping || !hitsThresholdReached) return;
|
||||
if (bottomValueSet) {
|
||||
// since we've collected top N matches, we can early terminate
|
||||
// Currently early termination on _doc is also implemented in TopFieldCollector, but this
|
Loading…
Reference in New Issue
Block a user