Code cleanup

This commit is contained in:
Andrea Cavalli 2022-02-11 13:32:50 +01:00
parent cf61636141
commit 8e7f7eaf18
27 changed files with 131 additions and 670 deletions

View File

@ -220,7 +220,6 @@ versions:
query: Query
offset: long
limit: long
minCompetitiveScore: -float
sort: Sort
computePreciseHitsCount: boolean
timeoutMilliseconds: long

View File

@ -13,11 +13,10 @@ import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
@RecordBuilder
public final record ClientQueryParams(@Nullable CompositeSnapshot snapshot,
public record ClientQueryParams(@Nullable CompositeSnapshot snapshot,
@NotNull Query query,
long offset,
long limit,
@Nullable Float minCompetitiveScore,
@Nullable Sort sort,
boolean computePreciseHitsCount,
@NotNull Duration timeout) {
@ -28,7 +27,6 @@ public final record ClientQueryParams(@Nullable CompositeSnapshot snapshot,
.snapshot(null)
.offset(0)
.limit(Long.MAX_VALUE)
.minCompetitiveScore(null)
.sort(null)
// Default timeout: 4 minutes
.timeout(Duration.ofMinutes(4))
@ -44,7 +42,6 @@ public final record ClientQueryParams(@Nullable CompositeSnapshot snapshot,
.builder()
.query(query())
.sort(sort != null ? sort.querySort() : new NoSort())
.minCompetitiveScore(Nullablefloat.ofNullable(minCompetitiveScore()))
.offset(offset())
.limit(limit())
.computePreciseHitsCount(computePreciseHitsCount())

View File

@ -70,7 +70,7 @@ public interface LLLuceneIndex extends LLSnapshottable {
BucketParams bucketParams);
default Mono<TotalHitsCount> count(@Nullable LLSnapshot snapshot, Query query) {
QueryParams params = QueryParams.of(query, 0, 0, Nullablefloat.empty(), NoSort.of(), false, Long.MAX_VALUE);
QueryParams params = QueryParams.of(query, 0, 0, NoSort.of(), false, Long.MAX_VALUE);
return Mono.from(this.search(snapshot, params, null)
.map(llSearchResultShard -> {
try (llSearchResultShard) {

View File

@ -3,7 +3,6 @@ package it.cavallium.dbengine.lucene;
import static org.apache.lucene.search.SortField.STRING_LAST;
import it.cavallium.dbengine.database.disk.LLTempLMDBEnv;
import it.cavallium.dbengine.lucene.comparators.DocComparator;
import it.cavallium.dbengine.lucene.comparators.DoubleComparator;
import it.cavallium.dbengine.lucene.comparators.FloatComparator;
import it.cavallium.dbengine.lucene.comparators.IntComparator;
@ -19,7 +18,7 @@ import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.LeafFieldComparator;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.SortedNumericSelector;
import org.apache.lucene.search.SortedNumericSortField;
import org.apache.lucene.search.comparators.LMDBDocComparator;
public class LMDBComparator {
@ -94,7 +93,7 @@ public class LMDBComparator {
var comparatorSource = sortField.getComparatorSource();
return switch (sortField.getType()) {
case SCORE -> new RelevanceComparator(env, numHits);
case DOC -> new DocComparator(env, numHits, reverse, sortPos);
case DOC -> new LMDBDocComparator(env, numHits, reverse, sortPos);
case INT -> new IntComparator(env, numHits, field, (Integer) missingValue,
reverse, sortPos);
case FLOAT -> new FloatComparator(env, numHits, field, (Float) missingValue,

View File

@ -336,7 +336,6 @@ public class LuceneUtils {
queryParams.offset(),
queryParams.limit(),
DEFAULT_PAGE_LIMITS,
queryParams.minCompetitiveScore().getNullable(),
QueryParser.toSort(queryParams.sort()),
queryParams.computePreciseHitsCount(),
Duration.ofMillis(queryParams.timeoutMilliseconds())

View File

@ -1,4 +1,17 @@
package it.cavallium.dbengine.lucene.collector;
public sealed interface BucketValueSource permits DoubleBucketValueSource, LongBucketValueSource, ConstantValueSource,
NullValueSource {}
import org.apache.lucene.search.DoubleValuesSource;
import org.apache.lucene.search.LongValuesSource;
public sealed interface BucketValueSource permits BucketValueSource.DoubleBucketValueSource,
BucketValueSource.LongBucketValueSource,
BucketValueSource.ConstantValueSource, BucketValueSource.NullValueSource {
record ConstantValueSource(Number constant) implements BucketValueSource {}
record DoubleBucketValueSource(DoubleValuesSource source) implements BucketValueSource {}
record LongBucketValueSource(LongValuesSource source) implements BucketValueSource {}
record NullValueSource() implements BucketValueSource {}
}

View File

@ -1,3 +0,0 @@
package it.cavallium.dbengine.lucene.collector;
public record ConstantValueSource(Number constant) implements BucketValueSource {}

View File

@ -1,9 +1,5 @@
package it.cavallium.dbengine.lucene.collector;
import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache;
import it.unimi.dsi.fastutil.doubles.DoubleArrayList;
import java.io.IOException;
import java.util.ArrayList;
@ -12,8 +8,6 @@ import java.util.List;
import org.apache.commons.lang3.NotImplementedException;
import org.apache.lucene.facet.FacetResult;
import org.apache.lucene.facet.Facets;
import org.apache.lucene.facet.FacetsCollector;
import org.apache.lucene.facet.FacetsCollectorManager;
import org.apache.lucene.facet.FacetsConfig;
import org.apache.lucene.facet.LabelAndValue;
import org.apache.lucene.facet.RandomSamplingFacetsCollector;
@ -24,8 +18,6 @@ import org.apache.lucene.facet.range.LongRangeFacetCounts;
import org.apache.lucene.facet.range.Range;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.CachingCollector;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.DoubleValuesSource;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LongValuesSource;
@ -157,12 +149,12 @@ public class DecimalBucketMultiCollectorManager implements CollectorMultiManager
Facets facets;
if (USE_LONGS) {
LongValuesSource valuesSource;
if (bucketValueSource instanceof NullValueSource) {
if (bucketValueSource instanceof BucketValueSource.NullValueSource) {
valuesSource = null;
} else if (bucketValueSource instanceof ConstantValueSource constantValueSource) {
} else if (bucketValueSource instanceof BucketValueSource.ConstantValueSource constantValueSource) {
valuesSource = LongValuesSource.constant(constantValueSource.constant().longValue());
} else if (bucketValueSource instanceof LongBucketValueSource longBucketValueSource) {
} else if (bucketValueSource instanceof BucketValueSource.LongBucketValueSource longBucketValueSource) {
valuesSource = longBucketValueSource.source();
} else {
throw new IllegalArgumentException("Wrong value source type: " + bucketValueSource);
@ -175,11 +167,11 @@ public class DecimalBucketMultiCollectorManager implements CollectorMultiManager
);
} else {
DoubleValuesSource valuesSource;
if (bucketValueSource instanceof NullValueSource) {
if (bucketValueSource instanceof BucketValueSource.NullValueSource) {
valuesSource = null;
} else if (bucketValueSource instanceof ConstantValueSource constantValueSource) {
} else if (bucketValueSource instanceof BucketValueSource.ConstantValueSource constantValueSource) {
valuesSource = DoubleValuesSource.constant(constantValueSource.constant().longValue());
} else if (bucketValueSource instanceof DoubleBucketValueSource doubleBucketValueSource) {
} else if (bucketValueSource instanceof BucketValueSource.DoubleBucketValueSource doubleBucketValueSource) {
valuesSource = doubleBucketValueSource.source();
} else {
throw new IllegalArgumentException("Wrong value source type: " + bucketValueSource);
@ -219,11 +211,11 @@ public class DecimalBucketMultiCollectorManager implements CollectorMultiManager
}
if (USE_LONGS) {
LongValuesSource valuesSource;
if (bucketValueSource instanceof NullValueSource) {
if (bucketValueSource instanceof BucketValueSource.NullValueSource) {
valuesSource = null;
} else if (bucketValueSource instanceof ConstantValueSource constantValueSource) {
} else if (bucketValueSource instanceof BucketValueSource.ConstantValueSource constantValueSource) {
valuesSource = LongValuesSource.constant(constantValueSource.constant().longValue());
} else if (bucketValueSource instanceof LongBucketValueSource longBucketValueSource) {
} else if (bucketValueSource instanceof BucketValueSource.LongBucketValueSource longBucketValueSource) {
valuesSource = longBucketValueSource.source();
} else {
throw new IllegalArgumentException("Wrong value source type: " + bucketValueSource);
@ -236,11 +228,11 @@ public class DecimalBucketMultiCollectorManager implements CollectorMultiManager
);
} else {
DoubleValuesSource valuesSource;
if (bucketValueSource instanceof NullValueSource) {
if (bucketValueSource instanceof BucketValueSource.NullValueSource) {
valuesSource = null;
} else if (bucketValueSource instanceof ConstantValueSource constantValueSource) {
} else if (bucketValueSource instanceof BucketValueSource.ConstantValueSource constantValueSource) {
valuesSource = DoubleValuesSource.constant(constantValueSource.constant().longValue());
} else if (bucketValueSource instanceof DoubleBucketValueSource doubleBucketValueSource) {
} else if (bucketValueSource instanceof BucketValueSource.DoubleBucketValueSource doubleBucketValueSource) {
valuesSource = doubleBucketValueSource.source();
} else {
throw new IllegalArgumentException("Wrong value source type: " + bucketValueSource);

View File

@ -1,5 +0,0 @@
package it.cavallium.dbengine.lucene.collector;
import org.apache.lucene.search.DoubleValuesSource;
public record DoubleBucketValueSource(DoubleValuesSource source) implements BucketValueSource {}

View File

@ -1,5 +0,0 @@
package it.cavallium.dbengine.lucene.collector;
import org.apache.lucene.search.LongValuesSource;
public record LongBucketValueSource(LongValuesSource source) implements BucketValueSource {}

View File

@ -1,3 +0,0 @@
package it.cavallium.dbengine.lucene.collector;
public record NullValueSource() implements BucketValueSource {}

View File

@ -28,6 +28,7 @@ import java.io.IOException;
import org.apache.lucene.document.DoublePoint;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.LeafFieldComparator;
import org.apache.lucene.search.comparators.NumericComparator;
/**
* Comparator based on {@link Double#compare} for {@code numHits}. This comparator provides a

View File

@ -28,6 +28,7 @@ import java.io.IOException;
import org.apache.lucene.document.FloatPoint;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.LeafFieldComparator;
import org.apache.lucene.search.comparators.NumericComparator;
/**
* Comparator based on {@link Float#compare} for {@code numHits}. This comparator provides a

View File

@ -28,6 +28,7 @@ import java.io.IOException;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.LeafFieldComparator;
import org.apache.lucene.search.comparators.NumericComparator;
/**
* Comparator based on {@link Integer#compare} for {@code numHits}. This comparator provides a

View File

@ -28,6 +28,7 @@ import java.io.IOException;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.LeafFieldComparator;
import org.apache.lucene.search.comparators.NumericComparator;
/**
* Comparator based on {@link Long#compare} for {@code numHits}. This comparator provides a skipping

View File

@ -1,66 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package it.cavallium.dbengine.lucene.comparators;
import java.io.IOException;
import org.apache.lucene.search.DocIdSetIterator;
/**
* Docs iterator that starts iterating from a configurable minimum document
* Based on {@link org.apache.lucene.search.comparators.MinDocIterator}
* */
public class MinDocIterator extends DocIdSetIterator {
final int segmentMinDoc;
final int maxDoc;
int doc = -1;
MinDocIterator(int segmentMinDoc, int maxDoc) {
this.segmentMinDoc = segmentMinDoc;
this.maxDoc = maxDoc;
}
@Override
public int docID() {
return doc;
}
@Override
public int nextDoc() throws IOException {
return advance(doc + 1);
}
@Override
public int advance(int target) throws IOException {
assert target > doc;
if (doc == -1) {
// skip directly to minDoc
doc = Math.max(target, segmentMinDoc);
} else {
doc = target;
}
if (doc >= maxDoc) {
doc = NO_MORE_DOCS;
}
return doc;
}
@Override
public long cost() {
return maxDoc - segmentMinDoc;
}
}

View File

@ -1,112 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package it.cavallium.dbengine.lucene.comparators;
import java.io.IOException;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.LeafFieldComparator;
import org.apache.lucene.search.Scorable;
/**
* Based on {@link org.apache.lucene.search.comparators.MinDocIterator}
*/
public final class MultiLeafFieldComparator implements LeafFieldComparator {
private final LeafFieldComparator[] comparators;
private final int[] reverseMul;
// we extract the first comparator to avoid array access in the common case
// that the first comparator compares worse than the bottom entry in the queue
private final LeafFieldComparator firstComparator;
private final int firstReverseMul;
public MultiLeafFieldComparator(LeafFieldComparator[] comparators, int[] reverseMul) {
if (comparators.length != reverseMul.length) {
throw new IllegalArgumentException(
"Must have the same number of comparators and reverseMul, got "
+ comparators.length
+ " and "
+ reverseMul.length);
}
this.comparators = comparators;
this.reverseMul = reverseMul;
this.firstComparator = comparators[0];
this.firstReverseMul = reverseMul[0];
}
@Override
public void setBottom(int slot) throws IOException {
for (LeafFieldComparator comparator : comparators) {
comparator.setBottom(slot);
}
}
@Override
public int compareBottom(int doc) throws IOException {
int cmp = firstReverseMul * firstComparator.compareBottom(doc);
if (cmp != 0) {
return cmp;
}
for (int i = 1; i < comparators.length; ++i) {
cmp = reverseMul[i] * comparators[i].compareBottom(doc);
if (cmp != 0) {
return cmp;
}
}
return 0;
}
@Override
public int compareTop(int doc) throws IOException {
int cmp = firstReverseMul * firstComparator.compareTop(doc);
if (cmp != 0) {
return cmp;
}
for (int i = 1; i < comparators.length; ++i) {
cmp = reverseMul[i] * comparators[i].compareTop(doc);
if (cmp != 0) {
return cmp;
}
}
return 0;
}
@Override
public void copy(int slot, int doc) throws IOException {
for (LeafFieldComparator comparator : comparators) {
comparator.copy(slot, doc);
}
}
@Override
public void setScorer(Scorable scorer) throws IOException {
for (LeafFieldComparator comparator : comparators) {
comparator.setScorer(scorer);
}
}
@Override
public void setHitsThresholdReached() throws IOException {
// this is needed for skipping functionality that is only relevant for the 1st comparator
firstComparator.setHitsThresholdReached();
}
@Override
public DocIdSetIterator competitiveIterator() throws IOException {
// this is needed for skipping functionality that is only relevant for the 1st comparator
return firstComparator.competitiveIterator();
}
}

View File

@ -1,336 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package it.cavallium.dbengine.lucene.comparators;
import java.io.IOException;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.LeafFieldComparator;
import org.apache.lucene.search.Scorable;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.ArrayUtil.ByteArrayComparator;
import org.apache.lucene.util.DocIdSetBuilder;
/**
* Abstract numeric comparator for comparing numeric values. This comparator provides a skipping
* functionality an iterator that can skip over non-competitive documents.
*
* <p>Parameter {@code field} provided in the constructor is used as a field name in the default
* implementations of the methods {@code getNumericDocValues} and {@code getPointValues} to retrieve
* doc values and points. You can pass a dummy value for a field name (e.g. when sorting by script),
* but in this case you must override both of these methods.
*
* Based on {@link org.apache.lucene.search.comparators.NumericComparator}
*/
public abstract class NumericComparator<T extends Number> extends FieldComparator<T> {
protected final T missingValue;
protected final String field;
protected final boolean reverse;
private final int bytesCount; // how many bytes are used to encode this number
private final ByteArrayComparator bytesComparator;
protected boolean topValueSet;
protected boolean singleSort; // singleSort is true, if sort is based on a single sort field.
protected boolean hitsThresholdReached;
protected boolean queueFull;
private boolean canSkipDocuments;
protected NumericComparator(
String field, T missingValue, boolean reverse, int sortPos, int bytesCount) {
this.field = field;
this.missingValue = missingValue;
this.reverse = reverse;
// skipping functionality is only relevant for primary sort
this.canSkipDocuments = (sortPos == 0);
this.bytesCount = bytesCount;
this.bytesComparator = ArrayUtil.getUnsignedComparator(bytesCount);
}
@Override
public void setTopValue(T value) {
topValueSet = true;
}
@Override
public void setSingleSort() {
singleSort = true;
}
@Override
public void disableSkipping() {
canSkipDocuments = false;
}
/** Leaf comparator for {@link NumericComparator} that provides skipping functionality */
public abstract class NumericLeafComparator implements LeafFieldComparator {
protected final NumericDocValues docValues;
private final PointValues pointValues;
// if skipping functionality should be enabled on this segment
private final boolean enableSkipping;
private final int maxDoc;
private final byte[] minValueAsBytes;
private final byte[] maxValueAsBytes;
private DocIdSetIterator competitiveIterator;
private long iteratorCost;
private int maxDocVisited = -1;
private int updateCounter = 0;
public NumericLeafComparator(LeafReaderContext context) throws IOException {
this.docValues = getNumericDocValues(context, field);
this.pointValues = canSkipDocuments ? getPointValues(context, field) : null;
if (pointValues != null) {
FieldInfo info = context.reader().getFieldInfos().fieldInfo(field);
if (info == null || info.getPointDimensionCount() == 0) {
throw new IllegalStateException(
"Field "
+ field
+ " doesn't index points according to FieldInfos yet returns non-null PointValues");
} else if (info.getPointDimensionCount() > 1) {
throw new IllegalArgumentException(
"Field " + field + " is indexed with multiple dimensions, sorting is not supported");
} else if (info.getPointNumBytes() != bytesCount) {
throw new IllegalArgumentException(
"Field "
+ field
+ " is indexed with "
+ info.getPointNumBytes()
+ " bytes per dimension, but "
+ NumericComparator.this
+ " expected "
+ bytesCount);
}
this.enableSkipping = true; // skipping is enabled when points are available
this.maxDoc = context.reader().maxDoc();
this.maxValueAsBytes =
reverse == false ? new byte[bytesCount] : topValueSet ? new byte[bytesCount] : null;
this.minValueAsBytes =
reverse ? new byte[bytesCount] : topValueSet ? new byte[bytesCount] : null;
this.competitiveIterator = DocIdSetIterator.all(maxDoc);
this.iteratorCost = maxDoc;
} else {
this.enableSkipping = false;
this.maxDoc = 0;
this.maxValueAsBytes = null;
this.minValueAsBytes = null;
}
}
/**
* Retrieves the NumericDocValues for the field in this segment
*
* <p>If you override this method, you must also override {@link
* #getPointValues(LeafReaderContext, String)} This class uses sort optimization that leverages
* points to filter out non-competitive matches, which relies on the assumption that points and
* doc values record the same information.
*
* @param context reader context
* @param field - field name
* @return numeric doc values for the field in this segment.
* @throws IOException If there is a low-level I/O error
*/
protected NumericDocValues getNumericDocValues(LeafReaderContext context, String field)
throws IOException {
return DocValues.getNumeric(context.reader(), field);
}
/**
* Retrieves point values for the field in this segment
*
* <p>If you override this method, you must also override {@link
* #getNumericDocValues(LeafReaderContext, String)} This class uses sort optimization that
* leverages points to filter out non-competitive matches, which relies on the assumption that
* points and doc values record the same information. Return {@code null} even if no points
* implementation is available, in this case sort optimization with points will be disabled.
*
* @param context reader context
* @param field - field name
* @return point values for the field in this segment if they are available or {@code null} if
* sort optimization with points should be disabled.
* @throws IOException If there is a low-level I/O error
*/
protected PointValues getPointValues(LeafReaderContext context, String field)
throws IOException {
return context.reader().getPointValues(field);
}
@Override
public void setBottom(int slot) throws IOException {
queueFull = true; // if we are setting bottom, it means that we have collected enough hits
updateCompetitiveIterator(); // update an iterator if we set a new bottom
}
@Override
public void copy(int slot, int doc) throws IOException {
maxDocVisited = doc;
}
@Override
public void setScorer(Scorable scorer) throws IOException {
if (scorer instanceof Scorer) {
iteratorCost =
((Scorer) scorer).iterator().cost(); // starting iterator cost is the scorer's cost
updateCompetitiveIterator(); // update an iterator when we have a new segment
}
}
@Override
public void setHitsThresholdReached() throws IOException {
hitsThresholdReached = true;
updateCompetitiveIterator();
}
// update its iterator to include possibly only docs that are "stronger" than the current bottom
// entry
private void updateCompetitiveIterator() throws IOException {
if (enableSkipping == false || hitsThresholdReached == false || queueFull == false) return;
// if some documents have missing points, check that missing values prohibits optimization
if ((pointValues.getDocCount() < maxDoc) && isMissingValueCompetitive()) {
return; // we can't filter out documents, as documents with missing values are competitive
}
updateCounter++;
if (updateCounter > 256
&& (updateCounter & 0x1f) != 0x1f) { // Start sampling if we get called too much
return;
}
if (reverse == false) {
encodeBottom(maxValueAsBytes);
if (topValueSet) {
encodeTop(minValueAsBytes);
}
} else {
encodeBottom(minValueAsBytes);
if (topValueSet) {
encodeTop(maxValueAsBytes);
}
}
DocIdSetBuilder result = new DocIdSetBuilder(maxDoc);
PointValues.IntersectVisitor visitor =
new PointValues.IntersectVisitor() {
DocIdSetBuilder.BulkAdder adder;
@Override
public void grow(int count) {
adder = result.grow(count);
}
@Override
public void visit(int docID) {
if (docID <= maxDocVisited) {
return; // Already visited or skipped
}
adder.add(docID);
}
@Override
public void visit(int docID, byte[] packedValue) {
if (docID <= maxDocVisited) {
return; // already visited or skipped
}
if (maxValueAsBytes != null) {
int cmp = bytesComparator.compare(packedValue, 0, maxValueAsBytes, 0);
// if doc's value is too high or for single sort even equal, it is not competitive
// and the doc can be skipped
if (cmp > 0 || (singleSort && cmp == 0)) return;
}
if (minValueAsBytes != null) {
int cmp = bytesComparator.compare(packedValue, 0, minValueAsBytes, 0);
// if doc's value is too low or for single sort even equal, it is not competitive
// and the doc can be skipped
if (cmp < 0 || (singleSort && cmp == 0)) return;
}
adder.add(docID); // doc is competitive
}
@Override
public PointValues.Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
if (maxValueAsBytes != null) {
int cmp = bytesComparator.compare(minPackedValue, 0, maxValueAsBytes, 0);
if (cmp > 0 || (singleSort && cmp == 0))
return PointValues.Relation.CELL_OUTSIDE_QUERY;
}
if (minValueAsBytes != null) {
int cmp = bytesComparator.compare(maxPackedValue, 0, minValueAsBytes, 0);
if (cmp < 0 || (singleSort && cmp == 0))
return PointValues.Relation.CELL_OUTSIDE_QUERY;
}
if ((maxValueAsBytes != null
&& bytesComparator.compare(maxPackedValue, 0, maxValueAsBytes, 0) > 0)
|| (minValueAsBytes != null
&& bytesComparator.compare(minPackedValue, 0, minValueAsBytes, 0) < 0)) {
return PointValues.Relation.CELL_CROSSES_QUERY;
}
return PointValues.Relation.CELL_INSIDE_QUERY;
}
};
final long threshold = iteratorCost >>> 3;
long estimatedNumberOfMatches =
pointValues.estimatePointCount(visitor); // runs in O(log(numPoints))
if (estimatedNumberOfMatches >= threshold) {
// the new range is not selective enough to be worth materializing, it doesn't reduce number
// of docs at least 8x
return;
}
pointValues.intersect(visitor);
competitiveIterator = result.build().iterator();
iteratorCost = competitiveIterator.cost();
}
@Override
public DocIdSetIterator competitiveIterator() {
if (enableSkipping == false) return null;
return new DocIdSetIterator() {
private int docID = competitiveIterator.docID();
@Override
public int nextDoc() throws IOException {
return advance(docID + 1);
}
@Override
public int docID() {
return docID;
}
@Override
public long cost() {
return competitiveIterator.cost();
}
@Override
public int advance(int target) throws IOException {
return docID = competitiveIterator.advance(target);
}
};
}
protected abstract boolean isMissingValueCompetitive();
protected abstract void encodeBottom(byte[] packedValue);
protected abstract void encodeTop(byte[] packedValue);
}
}

View File

@ -12,40 +12,36 @@ import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
public record LocalQueryParams(@NotNull Query query, int offsetInt, long offsetLong, int limitInt, long limitLong,
@NotNull PageLimits pageLimits, @Nullable Float minCompetitiveScore, @Nullable Sort sort,
boolean computePreciseHitsCount, Duration timeout) {
@NotNull PageLimits pageLimits, @Nullable Sort sort, boolean computePreciseHitsCount,
Duration timeout) {
public LocalQueryParams(@NotNull Query query,
long offsetLong,
long limitLong,
@NotNull PageLimits pageLimits,
@Nullable Float minCompetitiveScore,
@Nullable Sort sort,
boolean computePreciseHitsCount,
Duration timeout) {
this(query, safeLongToInt(offsetLong), offsetLong, safeLongToInt(limitLong), limitLong, pageLimits,
minCompetitiveScore, sort, computePreciseHitsCount, timeout);
this(query,
safeLongToInt(offsetLong),
offsetLong,
safeLongToInt(limitLong),
limitLong,
pageLimits,
sort,
computePreciseHitsCount,
timeout
);
}
public LocalQueryParams(@NotNull Query query,
int offsetInt,
int limitInt,
@NotNull PageLimits pageLimits,
@Nullable Float minCompetitiveScore,
@Nullable Sort sort,
boolean computePreciseHitsCount,
Duration timeout) {
this(query,
offsetInt,
offsetInt,
limitInt,
limitInt,
pageLimits,
minCompetitiveScore,
sort,
computePreciseHitsCount,
timeout
);
this(query, offsetInt, offsetInt, limitInt, limitInt, pageLimits, sort, computePreciseHitsCount, timeout);
}
public boolean isSorted() {

View File

@ -1,13 +1,11 @@
package it.cavallium.dbengine.lucene.searcher;
import java.util.ArrayList;
import it.cavallium.dbengine.lucene.MaxScoreAccumulator;
import java.util.Iterator;
import java.util.List;
import java.util.function.Supplier;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import java.util.stream.Stream;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.CustomHitsThresholdChecker;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
@ -16,12 +14,20 @@ public class LuceneMultiGenerator implements Supplier<ScoreDoc> {
private final Iterator<Supplier<ScoreDoc>> generators;
private Supplier<ScoreDoc> luceneGenerator;
public LuceneMultiGenerator(List<IndexSearcher> shards, LocalQueryParams localQueryParams) {
public LuceneMultiGenerator(List<IndexSearcher> shards,
LocalQueryParams localQueryParams,
CustomHitsThresholdChecker hitsThresholdChecker,
MaxScoreAccumulator minScoreAcc) {
this.generators = IntStream
.range(0, shards.size())
.mapToObj(shardIndex -> {
IndexSearcher shard = shards.get(shardIndex);
return (Supplier<ScoreDoc>) new LuceneGenerator(shard, localQueryParams, shardIndex);
return (Supplier<ScoreDoc>) new LuceneGenerator(shard,
localQueryParams,
shardIndex,
hitsThresholdChecker,
minScoreAcc
);
})
.iterator();
tryAdvanceGenerator();

View File

@ -10,7 +10,7 @@ import it.cavallium.dbengine.database.disk.LLTempLMDBEnv;
import it.cavallium.dbengine.lucene.LuceneUtils;
import it.cavallium.dbengine.lucene.FullDocs;
import it.cavallium.dbengine.lucene.LLScoreDoc;
import it.cavallium.dbengine.lucene.collector.LMDBFullScoreDocCollector;
import org.apache.lucene.search.LMDBFullScoreDocCollector;
import java.io.IOException;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

View File

@ -10,7 +10,7 @@ import it.cavallium.dbengine.database.disk.LLTempLMDBEnv;
import it.cavallium.dbengine.lucene.FullDocs;
import it.cavallium.dbengine.lucene.LLFieldDoc;
import it.cavallium.dbengine.lucene.LuceneUtils;
import it.cavallium.dbengine.lucene.collector.LMDBFullFieldDocCollector;
import org.apache.lucene.search.LMDBFullFieldDocCollector;
import java.io.IOException;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

View File

@ -8,7 +8,9 @@ import it.cavallium.dbengine.database.LLKeyScore;
import it.cavallium.dbengine.database.LLUtils;
import it.cavallium.dbengine.database.disk.LLIndexSearchers;
import it.cavallium.dbengine.lucene.LuceneUtils;
import it.cavallium.dbengine.lucene.MaxScoreAccumulator;
import java.util.List;
import org.apache.lucene.search.CustomHitsThresholdChecker;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import reactor.core.publisher.Flux;
@ -56,15 +58,15 @@ public class UnsortedStreamingMultiSearcher implements MultiSearcher {
}
private Flux<ScoreDoc> getScoreDocs(LocalQueryParams localQueryParams, List<IndexSearcher> shards) {
return Flux
.fromIterable(shards)
.index()
.flatMap(tuple -> {
var shardIndex = (int) (long) tuple.getT1();
var shard = tuple.getT2();
return LuceneGenerator.reactive(shard, localQueryParams, shardIndex);
});
return Flux.defer(() -> {
var hitsThreshold = CustomHitsThresholdChecker.createShared(localQueryParams.getTotalHitsThresholdLong());
MaxScoreAccumulator maxScoreAccumulator = new MaxScoreAccumulator();
return Flux.fromIterable(shards).index().flatMap(tuple -> {
var shardIndex = (int) (long) tuple.getT1();
var shard = tuple.getT2();
return LuceneGenerator.reactive(shard, localQueryParams, shardIndex, hitsThreshold, maxScoreAccumulator);
});
});
}
private LocalQueryParams getLocalQueryParams(LocalQueryParams queryParams) {
@ -72,7 +74,6 @@ public class UnsortedStreamingMultiSearcher implements MultiSearcher {
0L,
queryParams.offsetLong() + queryParams.limitLong(),
queryParams.pageLimits(),
queryParams.minCompetitiveScore(),
queryParams.sort(),
queryParams.computePreciseHitsCount(),
queryParams.timeout()

View File

@ -15,15 +15,14 @@
* limitations under the License.
*/
package it.cavallium.dbengine.lucene.collector;
package org.apache.lucene.search;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.lucene.search.ScoreMode;
/** Used for defining custom algorithms to allow searches to early terminate */
abstract class HitsThresholdChecker {
/** Implementation of HitsThresholdChecker which allows global hit counting */
private static class GlobalHitsThresholdChecker extends HitsThresholdChecker {
public abstract class CustomHitsThresholdChecker {
/** Implementation of CustomHitsThresholdChecker which allows global hit counting */
private static class GlobalHitsThresholdChecker extends CustomHitsThresholdChecker {
private final long totalHitsThreshold;
private final AtomicLong globalHitCount;
@ -70,8 +69,8 @@ abstract class HitsThresholdChecker {
}
}
/** Default implementation of HitsThresholdChecker to be used for single threaded execution */
private static class LocalHitsThresholdChecker extends HitsThresholdChecker {
/** Default implementation of CustomHitsThresholdChecker to be used for single threaded execution */
private static class LocalHitsThresholdChecker extends CustomHitsThresholdChecker {
private final long totalHitsThreshold;
private long hitCount;
@ -120,14 +119,14 @@ abstract class HitsThresholdChecker {
/*
* Returns a threshold checker that is useful for single threaded searches
*/
public static HitsThresholdChecker create(final long totalHitsThreshold) {
public static CustomHitsThresholdChecker create(final long totalHitsThreshold) {
return new LocalHitsThresholdChecker(totalHitsThreshold);
}
/*
* Returns a threshold checker that is based on a shared counter
*/
public static HitsThresholdChecker createShared(final long totalHitsThreshold) {
public static CustomHitsThresholdChecker createShared(final long totalHitsThreshold) {
return new GlobalHitsThresholdChecker(totalHitsThreshold);
}

View File

@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package it.cavallium.dbengine.lucene.collector;
package org.apache.lucene.search;
import it.cavallium.dbengine.database.SafeCloseable;
import it.cavallium.dbengine.database.disk.LLTempLMDBEnv;
@ -27,23 +27,12 @@ import it.cavallium.dbengine.lucene.LMDBPriorityQueue;
import it.cavallium.dbengine.lucene.MaxScoreAccumulator;
import it.cavallium.dbengine.lucene.PriorityQueue;
import it.cavallium.dbengine.lucene.ResourceIterable;
import java.io.Closeable;
import it.cavallium.dbengine.lucene.collector.FullDocsCollector;
import it.cavallium.dbengine.lucene.collector.FullFieldDocs;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.CollectionTerminatedException;
import org.apache.lucene.search.CollectorManager;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.LeafCollector;
import org.apache.lucene.search.LeafFieldComparator;
import it.cavallium.dbengine.lucene.comparators.MultiLeafFieldComparator;
import org.apache.lucene.search.Scorable;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TotalHits;
import org.apache.lucene.search.TotalHits.Relation;
import reactor.core.publisher.Flux;
@ -57,7 +46,8 @@ import reactor.core.publisher.Flux;
* <a href="https://github.com/apache/lucene/commits/main/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java">
* Lucene TopFieldCollector changes on GitHub</a>
*/
public abstract class LMDBFullFieldDocCollector extends FullDocsCollector<LMDBPriorityQueue<LLSlotDoc>, LLSlotDoc, LLFieldDoc> {
public abstract class LMDBFullFieldDocCollector extends
FullDocsCollector<LMDBPriorityQueue<LLSlotDoc>, LLSlotDoc, LLFieldDoc> {
// TODO: one optimization we could do is to pre-fill
// the queue with sentinel value that guaranteed to
@ -71,10 +61,7 @@ public abstract class LMDBFullFieldDocCollector extends FullDocsCollector<LMDBPr
Scorable scorer;
boolean collectedAllCompetitiveHits = false;
TopFieldLeafCollector(PriorityQueue<LLSlotDoc> queue,
FieldValueHitQueue fieldValueHitQueue,
Sort sort,
LeafReaderContext context)
TopFieldLeafCollector(FieldValueHitQueue fieldValueHitQueue, Sort sort, LeafReaderContext context)
throws IOException {
// as all segments are sorted in the same way, enough to check only the 1st segment for
// indexSort
@ -103,9 +90,9 @@ public abstract class LMDBFullFieldDocCollector extends FullDocsCollector<LMDBPr
if (minScoreAcc != null && (totalHits & minScoreAcc.modInterval) == 0) {
updateGlobalMinCompetitiveScore(scorer);
}
if (scoreMode.isExhaustive() == false
if (!scoreMode.isExhaustive()
&& totalHitsRelation == TotalHits.Relation.EQUAL_TO
&& hitsThresholdChecker.isThresholdReached(false)) {
&& hitsThresholdChecker.isThresholdReached()) {
// for the first time hitsThreshold is reached, notify comparator about this
comparator.setHitsThresholdReached();
totalHitsRelation = TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO;
@ -118,7 +105,7 @@ public abstract class LMDBFullFieldDocCollector extends FullDocsCollector<LMDBPr
// this document is largest than anything else in the queue, and
// therefore not competitive.
if (searchSortPartOfIndexSort) {
if (hitsThresholdChecker.isThresholdReached(false)) {
if (hitsThresholdChecker.isThresholdReached()) {
totalHitsRelation = Relation.GREATER_THAN_OR_EQUAL_TO;
throw new CollectionTerminatedException();
} else {
@ -220,7 +207,7 @@ public abstract class LMDBFullFieldDocCollector extends FullDocsCollector<LMDBPr
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
docBase = context.docBase;
return new TopFieldLeafCollector(queue, fieldValueHitQueue, sort, context) {
return new TopFieldLeafCollector(fieldValueHitQueue, sort, context) {
@Override
public void collect(int doc) throws IOException {
@ -298,12 +285,12 @@ public abstract class LMDBFullFieldDocCollector extends FullDocsCollector<LMDBPr
if (firstComparator.getClass().equals(FieldComparator.RelevanceComparator.class)
&& reverseMul == 1 // if the natural sort is preserved (sort by descending relevance)
&& hitsThresholdChecker.getHitsThreshold(false) != Integer.MAX_VALUE) {
&& hitsThresholdChecker.getHitsThreshold() != Integer.MAX_VALUE) {
scoreMode = ScoreMode.TOP_SCORES;
canSetMinScore = true;
} else {
canSetMinScore = false;
if (hitsThresholdChecker.getHitsThreshold(false) != Integer.MAX_VALUE) {
if (hitsThresholdChecker.getHitsThreshold() != Integer.MAX_VALUE) {
scoreMode = needsScores ? ScoreMode.TOP_DOCS_WITH_SCORES : ScoreMode.TOP_DOCS;
} else {
scoreMode = needsScores ? ScoreMode.COMPLETE : ScoreMode.COMPLETE_NO_SCORES;
@ -319,7 +306,7 @@ public abstract class LMDBFullFieldDocCollector extends FullDocsCollector<LMDBPr
protected void updateGlobalMinCompetitiveScore(Scorable scorer) throws IOException {
assert minScoreAcc != null;
if (canSetMinScore && hitsThresholdChecker.isThresholdReached(false)) {
if (canSetMinScore && hitsThresholdChecker.isThresholdReached()) {
// we can start checking the global maximum score even
// if the local queue is not full because the threshold
// is reached.
@ -333,7 +320,7 @@ public abstract class LMDBFullFieldDocCollector extends FullDocsCollector<LMDBPr
}
protected void updateMinCompetitiveScore(Scorable scorer) throws IOException {
if (canSetMinScore && queueFull && hitsThresholdChecker.isThresholdReached(false)) {
if (canSetMinScore && queueFull && hitsThresholdChecker.isThresholdReached()) {
assert pq.top() != null;
float minScore = (float) firstComparator.value(pq.top().slot());
if (minScore > minCompetitiveScore) {
@ -422,17 +409,25 @@ public abstract class LMDBFullFieldDocCollector extends FullDocsCollector<LMDBPr
LLTempLMDBEnv env, Sort sort, int numHits, long totalHitsThreshold) {
return new CollectorManager<>() {
private final HitsThresholdChecker hitsThresholdChecker =
HitsThresholdChecker.createShared(Math.max(totalHitsThreshold, numHits));
private final HitsThresholdChecker hitsThresholdChecker;
{
if (totalHitsThreshold < Integer.MAX_VALUE) {
hitsThresholdChecker = HitsThresholdChecker.createShared(Math.max((int) totalHitsThreshold, numHits));
} else {
hitsThresholdChecker = HitsThresholdChecker.createShared(Integer.MAX_VALUE);
}
}
private final MaxScoreAccumulator minScoreAcc = new MaxScoreAccumulator();
@Override
public LMDBFullFieldDocCollector newCollector() throws IOException {
public LMDBFullFieldDocCollector newCollector() {
return create(env, sort, numHits, hitsThresholdChecker, minScoreAcc);
}
@Override
public FullFieldDocs<LLFieldDoc> reduce(Collection<LMDBFullFieldDocCollector> collectors) throws IOException {
public FullFieldDocs<LLFieldDoc> reduce(Collection<LMDBFullFieldDocCollector> collectors) {
return reduceShared(sort, collectors);
}
};

View File

@ -14,28 +14,20 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package it.cavallium.dbengine.lucene.collector;
package org.apache.lucene.search;
import it.cavallium.dbengine.database.LLUtils;
import it.cavallium.dbengine.database.disk.LLTempLMDBEnv;
import it.cavallium.dbengine.lucene.FullDocs;
import it.cavallium.dbengine.lucene.LLScoreDoc;
import it.cavallium.dbengine.lucene.LLScoreDocCodec;
import it.cavallium.dbengine.lucene.LMDBPriorityQueue;
import it.cavallium.dbengine.lucene.LuceneUtils;
import it.cavallium.dbengine.lucene.MaxScoreAccumulator;
import it.cavallium.dbengine.lucene.ResourceIterable;
import it.cavallium.dbengine.lucene.collector.FullDocsCollector;
import java.io.IOException;
import java.util.Collection;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.CollectorManager;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LeafCollector;
import it.cavallium.dbengine.lucene.MaxScoreAccumulator.DocAndScore;
import org.apache.lucene.search.Scorable;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.TotalHits;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
@ -53,7 +45,8 @@ import org.jetbrains.annotations.Nullable;
* <a href="https://github.com/apache/lucene/commits/main/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java">
* Lucene TopScoreDocCollector changes on GitHub</a>
*/
public abstract class LMDBFullScoreDocCollector extends FullDocsCollector<LMDBPriorityQueue<LLScoreDoc>, LLScoreDoc, LLScoreDoc> {
public abstract class LMDBFullScoreDocCollector extends
FullDocsCollector<LMDBPriorityQueue<LLScoreDoc>, LLScoreDoc, LLScoreDoc> {
/** Scorable leaf collector */
public abstract static class ScorerLeafCollector implements LeafCollector {
@ -69,7 +62,7 @@ public abstract class LMDBFullScoreDocCollector extends FullDocsCollector<LMDBPr
private static class SimpleLMDBFullScoreDocCollector extends LMDBFullScoreDocCollector {
SimpleLMDBFullScoreDocCollector(LLTempLMDBEnv env, @Nullable Long limit,
HitsThresholdChecker hitsThresholdChecker, MaxScoreAccumulator minScoreAcc) {
CustomHitsThresholdChecker hitsThresholdChecker, MaxScoreAccumulator minScoreAcc) {
super(env, limit, hitsThresholdChecker, minScoreAcc);
}
@ -153,7 +146,7 @@ public abstract class LMDBFullScoreDocCollector extends FullDocsCollector<LMDBPr
* <code>numHits</code>, and fill the array with sentinel objects.
*/
public static LMDBFullScoreDocCollector create(LLTempLMDBEnv env, long numHits, int totalHitsThreshold) {
return create(env, numHits, HitsThresholdChecker.create(totalHitsThreshold), null);
return create(env, numHits, CustomHitsThresholdChecker.create(totalHitsThreshold), null);
}
/**
@ -163,12 +156,12 @@ public abstract class LMDBFullScoreDocCollector extends FullDocsCollector<LMDBPr
* but will also likely make query processing slower.
*/
public static LMDBFullScoreDocCollector create(LLTempLMDBEnv env, int totalHitsThreshold) {
return create(env, HitsThresholdChecker.create(totalHitsThreshold), null);
return create(env, CustomHitsThresholdChecker.create(totalHitsThreshold), null);
}
static LMDBFullScoreDocCollector create(
LLTempLMDBEnv env,
HitsThresholdChecker hitsThresholdChecker,
CustomHitsThresholdChecker hitsThresholdChecker,
MaxScoreAccumulator minScoreAcc) {
if (hitsThresholdChecker == null) {
@ -181,7 +174,7 @@ public abstract class LMDBFullScoreDocCollector extends FullDocsCollector<LMDBPr
static LMDBFullScoreDocCollector create(
LLTempLMDBEnv env,
@NotNull Long numHits,
HitsThresholdChecker hitsThresholdChecker,
CustomHitsThresholdChecker hitsThresholdChecker,
MaxScoreAccumulator minScoreAcc) {
if (hitsThresholdChecker == null) {
@ -205,8 +198,8 @@ public abstract class LMDBFullScoreDocCollector extends FullDocsCollector<LMDBPr
long totalHitsThreshold) {
return new CollectorManager<>() {
private final HitsThresholdChecker hitsThresholdChecker =
HitsThresholdChecker.createShared(totalHitsThreshold);
private final CustomHitsThresholdChecker hitsThresholdChecker =
CustomHitsThresholdChecker.createShared(totalHitsThreshold);
private final MaxScoreAccumulator minScoreAcc = new MaxScoreAccumulator();
@Override
@ -230,8 +223,8 @@ public abstract class LMDBFullScoreDocCollector extends FullDocsCollector<LMDBPr
long totalHitsThreshold) {
return new CollectorManager<>() {
private final HitsThresholdChecker hitsThresholdChecker =
HitsThresholdChecker.createShared(totalHitsThreshold);
private final CustomHitsThresholdChecker hitsThresholdChecker =
CustomHitsThresholdChecker.createShared(totalHitsThreshold);
private final MaxScoreAccumulator minScoreAcc = new MaxScoreAccumulator();
@Override
@ -258,13 +251,13 @@ public abstract class LMDBFullScoreDocCollector extends FullDocsCollector<LMDBPr
int docBase;
final @Nullable Long limit;
final HitsThresholdChecker hitsThresholdChecker;
final CustomHitsThresholdChecker hitsThresholdChecker;
final MaxScoreAccumulator minScoreAcc;
float minCompetitiveScore;
// prevents instantiation
LMDBFullScoreDocCollector(LLTempLMDBEnv env, @Nullable Long limit,
HitsThresholdChecker hitsThresholdChecker, MaxScoreAccumulator minScoreAcc) {
CustomHitsThresholdChecker hitsThresholdChecker, MaxScoreAccumulator minScoreAcc) {
super(new LMDBPriorityQueue<>(env, new LLScoreDocCodec()));
assert hitsThresholdChecker != null;
this.limit = limit;

View File

@ -15,28 +15,25 @@
* limitations under the License.
*/
package it.cavallium.dbengine.lucene.comparators;
package org.apache.lucene.search.comparators;
import it.cavallium.dbengine.database.SafeCloseable;
import it.cavallium.dbengine.database.disk.LLTempLMDBEnv;
import it.cavallium.dbengine.lucene.IArray;
import it.cavallium.dbengine.lucene.IntCodec;
import it.cavallium.dbengine.lucene.LMDBArray;
import it.cavallium.dbengine.lucene.LMDBPriorityQueue;
import it.cavallium.dbengine.lucene.LongCodec;
import java.io.Closeable;
import java.io.IOException;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.LeafFieldComparator;
import org.apache.lucene.search.Scorable;
import org.apache.lucene.search.comparators.MinDocIterator;
/**
* Comparator that sorts by asc _doc
* Based on {@link org.apache.lucene.search.comparators.DocComparator}
* */
public class DocComparator extends FieldComparator<Integer> implements SafeCloseable {
public class LMDBDocComparator extends org.apache.lucene.search.comparators.DocComparator implements SafeCloseable {
private final IArray<Integer> docIDs;
private final boolean enableSkipping; // if skipping functionality should be enabled
private int bottom;
@ -46,10 +43,11 @@ public class DocComparator extends FieldComparator<Integer> implements SafeClose
private boolean hitsThresholdReached;
/** Creates a new comparator based on document ids for {@code numHits} */
public DocComparator(LLTempLMDBEnv env, int numHits, boolean reverse, int sortPost) {
public LMDBDocComparator(LLTempLMDBEnv env, int numHits, boolean reverse, int sortPost) {
super(0, reverse, sortPost);
this.docIDs = new LMDBArray<>(env, new IntCodec(), numHits, 0);
// skipping functionality is enabled if we are sorting by _doc in asc order as a primary sort
this.enableSkipping = (!reverse && sortPost == 0);
// skipping functionality is enabled if we are sorting by _doc in asc order as a primary sort
this.enableSkipping = (!reverse && sortPost == 0);
}
@Override
@ -145,7 +143,7 @@ public class DocComparator extends FieldComparator<Integer> implements SafeClose
@Override
public DocIdSetIterator competitiveIterator() {
if (enableSkipping == false) {
if (!enableSkipping) {
return null;
} else {
return new DocIdSetIterator() {
@ -181,7 +179,7 @@ public class DocComparator extends FieldComparator<Integer> implements SafeClose
}
private void updateIterator() {
if (enableSkipping == false || hitsThresholdReached == false) return;
if (!enableSkipping || !hitsThresholdReached) return;
if (bottomValueSet) {
// since we've collected top N matches, we can early terminate
// Currently early termination on _doc is also implemented in TopFieldCollector, but this