Code cleanup
This commit is contained in:
parent
cf61636141
commit
8e7f7eaf18
@ -220,7 +220,6 @@ versions:
|
|||||||
query: Query
|
query: Query
|
||||||
offset: long
|
offset: long
|
||||||
limit: long
|
limit: long
|
||||||
minCompetitiveScore: -float
|
|
||||||
sort: Sort
|
sort: Sort
|
||||||
computePreciseHitsCount: boolean
|
computePreciseHitsCount: boolean
|
||||||
timeoutMilliseconds: long
|
timeoutMilliseconds: long
|
||||||
|
@ -13,11 +13,10 @@ import org.jetbrains.annotations.NotNull;
|
|||||||
import org.jetbrains.annotations.Nullable;
|
import org.jetbrains.annotations.Nullable;
|
||||||
|
|
||||||
@RecordBuilder
|
@RecordBuilder
|
||||||
public final record ClientQueryParams(@Nullable CompositeSnapshot snapshot,
|
public record ClientQueryParams(@Nullable CompositeSnapshot snapshot,
|
||||||
@NotNull Query query,
|
@NotNull Query query,
|
||||||
long offset,
|
long offset,
|
||||||
long limit,
|
long limit,
|
||||||
@Nullable Float minCompetitiveScore,
|
|
||||||
@Nullable Sort sort,
|
@Nullable Sort sort,
|
||||||
boolean computePreciseHitsCount,
|
boolean computePreciseHitsCount,
|
||||||
@NotNull Duration timeout) {
|
@NotNull Duration timeout) {
|
||||||
@ -28,7 +27,6 @@ public final record ClientQueryParams(@Nullable CompositeSnapshot snapshot,
|
|||||||
.snapshot(null)
|
.snapshot(null)
|
||||||
.offset(0)
|
.offset(0)
|
||||||
.limit(Long.MAX_VALUE)
|
.limit(Long.MAX_VALUE)
|
||||||
.minCompetitiveScore(null)
|
|
||||||
.sort(null)
|
.sort(null)
|
||||||
// Default timeout: 4 minutes
|
// Default timeout: 4 minutes
|
||||||
.timeout(Duration.ofMinutes(4))
|
.timeout(Duration.ofMinutes(4))
|
||||||
@ -44,7 +42,6 @@ public final record ClientQueryParams(@Nullable CompositeSnapshot snapshot,
|
|||||||
.builder()
|
.builder()
|
||||||
.query(query())
|
.query(query())
|
||||||
.sort(sort != null ? sort.querySort() : new NoSort())
|
.sort(sort != null ? sort.querySort() : new NoSort())
|
||||||
.minCompetitiveScore(Nullablefloat.ofNullable(minCompetitiveScore()))
|
|
||||||
.offset(offset())
|
.offset(offset())
|
||||||
.limit(limit())
|
.limit(limit())
|
||||||
.computePreciseHitsCount(computePreciseHitsCount())
|
.computePreciseHitsCount(computePreciseHitsCount())
|
||||||
|
@ -70,7 +70,7 @@ public interface LLLuceneIndex extends LLSnapshottable {
|
|||||||
BucketParams bucketParams);
|
BucketParams bucketParams);
|
||||||
|
|
||||||
default Mono<TotalHitsCount> count(@Nullable LLSnapshot snapshot, Query query) {
|
default Mono<TotalHitsCount> count(@Nullable LLSnapshot snapshot, Query query) {
|
||||||
QueryParams params = QueryParams.of(query, 0, 0, Nullablefloat.empty(), NoSort.of(), false, Long.MAX_VALUE);
|
QueryParams params = QueryParams.of(query, 0, 0, NoSort.of(), false, Long.MAX_VALUE);
|
||||||
return Mono.from(this.search(snapshot, params, null)
|
return Mono.from(this.search(snapshot, params, null)
|
||||||
.map(llSearchResultShard -> {
|
.map(llSearchResultShard -> {
|
||||||
try (llSearchResultShard) {
|
try (llSearchResultShard) {
|
||||||
|
@ -3,7 +3,6 @@ package it.cavallium.dbengine.lucene;
|
|||||||
import static org.apache.lucene.search.SortField.STRING_LAST;
|
import static org.apache.lucene.search.SortField.STRING_LAST;
|
||||||
|
|
||||||
import it.cavallium.dbengine.database.disk.LLTempLMDBEnv;
|
import it.cavallium.dbengine.database.disk.LLTempLMDBEnv;
|
||||||
import it.cavallium.dbengine.lucene.comparators.DocComparator;
|
|
||||||
import it.cavallium.dbengine.lucene.comparators.DoubleComparator;
|
import it.cavallium.dbengine.lucene.comparators.DoubleComparator;
|
||||||
import it.cavallium.dbengine.lucene.comparators.FloatComparator;
|
import it.cavallium.dbengine.lucene.comparators.FloatComparator;
|
||||||
import it.cavallium.dbengine.lucene.comparators.IntComparator;
|
import it.cavallium.dbengine.lucene.comparators.IntComparator;
|
||||||
@ -19,7 +18,7 @@ import org.apache.lucene.search.FieldComparator;
|
|||||||
import org.apache.lucene.search.LeafFieldComparator;
|
import org.apache.lucene.search.LeafFieldComparator;
|
||||||
import org.apache.lucene.search.SortField;
|
import org.apache.lucene.search.SortField;
|
||||||
import org.apache.lucene.search.SortedNumericSelector;
|
import org.apache.lucene.search.SortedNumericSelector;
|
||||||
import org.apache.lucene.search.SortedNumericSortField;
|
import org.apache.lucene.search.comparators.LMDBDocComparator;
|
||||||
|
|
||||||
public class LMDBComparator {
|
public class LMDBComparator {
|
||||||
|
|
||||||
@ -94,7 +93,7 @@ public class LMDBComparator {
|
|||||||
var comparatorSource = sortField.getComparatorSource();
|
var comparatorSource = sortField.getComparatorSource();
|
||||||
return switch (sortField.getType()) {
|
return switch (sortField.getType()) {
|
||||||
case SCORE -> new RelevanceComparator(env, numHits);
|
case SCORE -> new RelevanceComparator(env, numHits);
|
||||||
case DOC -> new DocComparator(env, numHits, reverse, sortPos);
|
case DOC -> new LMDBDocComparator(env, numHits, reverse, sortPos);
|
||||||
case INT -> new IntComparator(env, numHits, field, (Integer) missingValue,
|
case INT -> new IntComparator(env, numHits, field, (Integer) missingValue,
|
||||||
reverse, sortPos);
|
reverse, sortPos);
|
||||||
case FLOAT -> new FloatComparator(env, numHits, field, (Float) missingValue,
|
case FLOAT -> new FloatComparator(env, numHits, field, (Float) missingValue,
|
||||||
|
@ -336,7 +336,6 @@ public class LuceneUtils {
|
|||||||
queryParams.offset(),
|
queryParams.offset(),
|
||||||
queryParams.limit(),
|
queryParams.limit(),
|
||||||
DEFAULT_PAGE_LIMITS,
|
DEFAULT_PAGE_LIMITS,
|
||||||
queryParams.minCompetitiveScore().getNullable(),
|
|
||||||
QueryParser.toSort(queryParams.sort()),
|
QueryParser.toSort(queryParams.sort()),
|
||||||
queryParams.computePreciseHitsCount(),
|
queryParams.computePreciseHitsCount(),
|
||||||
Duration.ofMillis(queryParams.timeoutMilliseconds())
|
Duration.ofMillis(queryParams.timeoutMilliseconds())
|
||||||
|
@ -1,4 +1,17 @@
|
|||||||
package it.cavallium.dbengine.lucene.collector;
|
package it.cavallium.dbengine.lucene.collector;
|
||||||
|
|
||||||
public sealed interface BucketValueSource permits DoubleBucketValueSource, LongBucketValueSource, ConstantValueSource,
|
import org.apache.lucene.search.DoubleValuesSource;
|
||||||
NullValueSource {}
|
import org.apache.lucene.search.LongValuesSource;
|
||||||
|
|
||||||
|
public sealed interface BucketValueSource permits BucketValueSource.DoubleBucketValueSource,
|
||||||
|
BucketValueSource.LongBucketValueSource,
|
||||||
|
BucketValueSource.ConstantValueSource, BucketValueSource.NullValueSource {
|
||||||
|
|
||||||
|
record ConstantValueSource(Number constant) implements BucketValueSource {}
|
||||||
|
|
||||||
|
record DoubleBucketValueSource(DoubleValuesSource source) implements BucketValueSource {}
|
||||||
|
|
||||||
|
record LongBucketValueSource(LongValuesSource source) implements BucketValueSource {}
|
||||||
|
|
||||||
|
record NullValueSource() implements BucketValueSource {}
|
||||||
|
}
|
||||||
|
@ -1,3 +0,0 @@
|
|||||||
package it.cavallium.dbengine.lucene.collector;
|
|
||||||
|
|
||||||
public record ConstantValueSource(Number constant) implements BucketValueSource {}
|
|
@ -1,9 +1,5 @@
|
|||||||
package it.cavallium.dbengine.lucene.collector;
|
package it.cavallium.dbengine.lucene.collector;
|
||||||
|
|
||||||
import com.google.common.cache.Cache;
|
|
||||||
import com.google.common.cache.CacheBuilder;
|
|
||||||
import com.google.common.cache.CacheLoader;
|
|
||||||
import com.google.common.cache.LoadingCache;
|
|
||||||
import it.unimi.dsi.fastutil.doubles.DoubleArrayList;
|
import it.unimi.dsi.fastutil.doubles.DoubleArrayList;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
@ -12,8 +8,6 @@ import java.util.List;
|
|||||||
import org.apache.commons.lang3.NotImplementedException;
|
import org.apache.commons.lang3.NotImplementedException;
|
||||||
import org.apache.lucene.facet.FacetResult;
|
import org.apache.lucene.facet.FacetResult;
|
||||||
import org.apache.lucene.facet.Facets;
|
import org.apache.lucene.facet.Facets;
|
||||||
import org.apache.lucene.facet.FacetsCollector;
|
|
||||||
import org.apache.lucene.facet.FacetsCollectorManager;
|
|
||||||
import org.apache.lucene.facet.FacetsConfig;
|
import org.apache.lucene.facet.FacetsConfig;
|
||||||
import org.apache.lucene.facet.LabelAndValue;
|
import org.apache.lucene.facet.LabelAndValue;
|
||||||
import org.apache.lucene.facet.RandomSamplingFacetsCollector;
|
import org.apache.lucene.facet.RandomSamplingFacetsCollector;
|
||||||
@ -24,8 +18,6 @@ import org.apache.lucene.facet.range.LongRangeFacetCounts;
|
|||||||
import org.apache.lucene.facet.range.Range;
|
import org.apache.lucene.facet.range.Range;
|
||||||
import org.apache.lucene.search.BooleanClause.Occur;
|
import org.apache.lucene.search.BooleanClause.Occur;
|
||||||
import org.apache.lucene.search.BooleanQuery;
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
import org.apache.lucene.search.CachingCollector;
|
|
||||||
import org.apache.lucene.search.Collector;
|
|
||||||
import org.apache.lucene.search.DoubleValuesSource;
|
import org.apache.lucene.search.DoubleValuesSource;
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
import org.apache.lucene.search.LongValuesSource;
|
import org.apache.lucene.search.LongValuesSource;
|
||||||
@ -157,12 +149,12 @@ public class DecimalBucketMultiCollectorManager implements CollectorMultiManager
|
|||||||
Facets facets;
|
Facets facets;
|
||||||
if (USE_LONGS) {
|
if (USE_LONGS) {
|
||||||
LongValuesSource valuesSource;
|
LongValuesSource valuesSource;
|
||||||
if (bucketValueSource instanceof NullValueSource) {
|
if (bucketValueSource instanceof BucketValueSource.NullValueSource) {
|
||||||
|
|
||||||
valuesSource = null;
|
valuesSource = null;
|
||||||
} else if (bucketValueSource instanceof ConstantValueSource constantValueSource) {
|
} else if (bucketValueSource instanceof BucketValueSource.ConstantValueSource constantValueSource) {
|
||||||
valuesSource = LongValuesSource.constant(constantValueSource.constant().longValue());
|
valuesSource = LongValuesSource.constant(constantValueSource.constant().longValue());
|
||||||
} else if (bucketValueSource instanceof LongBucketValueSource longBucketValueSource) {
|
} else if (bucketValueSource instanceof BucketValueSource.LongBucketValueSource longBucketValueSource) {
|
||||||
valuesSource = longBucketValueSource.source();
|
valuesSource = longBucketValueSource.source();
|
||||||
} else {
|
} else {
|
||||||
throw new IllegalArgumentException("Wrong value source type: " + bucketValueSource);
|
throw new IllegalArgumentException("Wrong value source type: " + bucketValueSource);
|
||||||
@ -175,11 +167,11 @@ public class DecimalBucketMultiCollectorManager implements CollectorMultiManager
|
|||||||
);
|
);
|
||||||
} else {
|
} else {
|
||||||
DoubleValuesSource valuesSource;
|
DoubleValuesSource valuesSource;
|
||||||
if (bucketValueSource instanceof NullValueSource) {
|
if (bucketValueSource instanceof BucketValueSource.NullValueSource) {
|
||||||
valuesSource = null;
|
valuesSource = null;
|
||||||
} else if (bucketValueSource instanceof ConstantValueSource constantValueSource) {
|
} else if (bucketValueSource instanceof BucketValueSource.ConstantValueSource constantValueSource) {
|
||||||
valuesSource = DoubleValuesSource.constant(constantValueSource.constant().longValue());
|
valuesSource = DoubleValuesSource.constant(constantValueSource.constant().longValue());
|
||||||
} else if (bucketValueSource instanceof DoubleBucketValueSource doubleBucketValueSource) {
|
} else if (bucketValueSource instanceof BucketValueSource.DoubleBucketValueSource doubleBucketValueSource) {
|
||||||
valuesSource = doubleBucketValueSource.source();
|
valuesSource = doubleBucketValueSource.source();
|
||||||
} else {
|
} else {
|
||||||
throw new IllegalArgumentException("Wrong value source type: " + bucketValueSource);
|
throw new IllegalArgumentException("Wrong value source type: " + bucketValueSource);
|
||||||
@ -219,11 +211,11 @@ public class DecimalBucketMultiCollectorManager implements CollectorMultiManager
|
|||||||
}
|
}
|
||||||
if (USE_LONGS) {
|
if (USE_LONGS) {
|
||||||
LongValuesSource valuesSource;
|
LongValuesSource valuesSource;
|
||||||
if (bucketValueSource instanceof NullValueSource) {
|
if (bucketValueSource instanceof BucketValueSource.NullValueSource) {
|
||||||
valuesSource = null;
|
valuesSource = null;
|
||||||
} else if (bucketValueSource instanceof ConstantValueSource constantValueSource) {
|
} else if (bucketValueSource instanceof BucketValueSource.ConstantValueSource constantValueSource) {
|
||||||
valuesSource = LongValuesSource.constant(constantValueSource.constant().longValue());
|
valuesSource = LongValuesSource.constant(constantValueSource.constant().longValue());
|
||||||
} else if (bucketValueSource instanceof LongBucketValueSource longBucketValueSource) {
|
} else if (bucketValueSource instanceof BucketValueSource.LongBucketValueSource longBucketValueSource) {
|
||||||
valuesSource = longBucketValueSource.source();
|
valuesSource = longBucketValueSource.source();
|
||||||
} else {
|
} else {
|
||||||
throw new IllegalArgumentException("Wrong value source type: " + bucketValueSource);
|
throw new IllegalArgumentException("Wrong value source type: " + bucketValueSource);
|
||||||
@ -236,11 +228,11 @@ public class DecimalBucketMultiCollectorManager implements CollectorMultiManager
|
|||||||
);
|
);
|
||||||
} else {
|
} else {
|
||||||
DoubleValuesSource valuesSource;
|
DoubleValuesSource valuesSource;
|
||||||
if (bucketValueSource instanceof NullValueSource) {
|
if (bucketValueSource instanceof BucketValueSource.NullValueSource) {
|
||||||
valuesSource = null;
|
valuesSource = null;
|
||||||
} else if (bucketValueSource instanceof ConstantValueSource constantValueSource) {
|
} else if (bucketValueSource instanceof BucketValueSource.ConstantValueSource constantValueSource) {
|
||||||
valuesSource = DoubleValuesSource.constant(constantValueSource.constant().longValue());
|
valuesSource = DoubleValuesSource.constant(constantValueSource.constant().longValue());
|
||||||
} else if (bucketValueSource instanceof DoubleBucketValueSource doubleBucketValueSource) {
|
} else if (bucketValueSource instanceof BucketValueSource.DoubleBucketValueSource doubleBucketValueSource) {
|
||||||
valuesSource = doubleBucketValueSource.source();
|
valuesSource = doubleBucketValueSource.source();
|
||||||
} else {
|
} else {
|
||||||
throw new IllegalArgumentException("Wrong value source type: " + bucketValueSource);
|
throw new IllegalArgumentException("Wrong value source type: " + bucketValueSource);
|
||||||
|
@ -1,5 +0,0 @@
|
|||||||
package it.cavallium.dbengine.lucene.collector;
|
|
||||||
|
|
||||||
import org.apache.lucene.search.DoubleValuesSource;
|
|
||||||
|
|
||||||
public record DoubleBucketValueSource(DoubleValuesSource source) implements BucketValueSource {}
|
|
@ -1,5 +0,0 @@
|
|||||||
package it.cavallium.dbengine.lucene.collector;
|
|
||||||
|
|
||||||
import org.apache.lucene.search.LongValuesSource;
|
|
||||||
|
|
||||||
public record LongBucketValueSource(LongValuesSource source) implements BucketValueSource {}
|
|
@ -1,3 +0,0 @@
|
|||||||
package it.cavallium.dbengine.lucene.collector;
|
|
||||||
|
|
||||||
public record NullValueSource() implements BucketValueSource {}
|
|
@ -28,6 +28,7 @@ import java.io.IOException;
|
|||||||
import org.apache.lucene.document.DoublePoint;
|
import org.apache.lucene.document.DoublePoint;
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
import org.apache.lucene.search.LeafFieldComparator;
|
import org.apache.lucene.search.LeafFieldComparator;
|
||||||
|
import org.apache.lucene.search.comparators.NumericComparator;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Comparator based on {@link Double#compare} for {@code numHits}. This comparator provides a
|
* Comparator based on {@link Double#compare} for {@code numHits}. This comparator provides a
|
||||||
|
@ -28,6 +28,7 @@ import java.io.IOException;
|
|||||||
import org.apache.lucene.document.FloatPoint;
|
import org.apache.lucene.document.FloatPoint;
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
import org.apache.lucene.search.LeafFieldComparator;
|
import org.apache.lucene.search.LeafFieldComparator;
|
||||||
|
import org.apache.lucene.search.comparators.NumericComparator;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Comparator based on {@link Float#compare} for {@code numHits}. This comparator provides a
|
* Comparator based on {@link Float#compare} for {@code numHits}. This comparator provides a
|
||||||
|
@ -28,6 +28,7 @@ import java.io.IOException;
|
|||||||
import org.apache.lucene.document.IntPoint;
|
import org.apache.lucene.document.IntPoint;
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
import org.apache.lucene.search.LeafFieldComparator;
|
import org.apache.lucene.search.LeafFieldComparator;
|
||||||
|
import org.apache.lucene.search.comparators.NumericComparator;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Comparator based on {@link Integer#compare} for {@code numHits}. This comparator provides a
|
* Comparator based on {@link Integer#compare} for {@code numHits}. This comparator provides a
|
||||||
|
@ -28,6 +28,7 @@ import java.io.IOException;
|
|||||||
import org.apache.lucene.document.LongPoint;
|
import org.apache.lucene.document.LongPoint;
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
import org.apache.lucene.search.LeafFieldComparator;
|
import org.apache.lucene.search.LeafFieldComparator;
|
||||||
|
import org.apache.lucene.search.comparators.NumericComparator;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Comparator based on {@link Long#compare} for {@code numHits}. This comparator provides a skipping
|
* Comparator based on {@link Long#compare} for {@code numHits}. This comparator provides a skipping
|
||||||
|
@ -1,66 +0,0 @@
|
|||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package it.cavallium.dbengine.lucene.comparators;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import org.apache.lucene.search.DocIdSetIterator;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Docs iterator that starts iterating from a configurable minimum document
|
|
||||||
* Based on {@link org.apache.lucene.search.comparators.MinDocIterator}
|
|
||||||
* */
|
|
||||||
public class MinDocIterator extends DocIdSetIterator {
|
|
||||||
final int segmentMinDoc;
|
|
||||||
final int maxDoc;
|
|
||||||
int doc = -1;
|
|
||||||
|
|
||||||
MinDocIterator(int segmentMinDoc, int maxDoc) {
|
|
||||||
this.segmentMinDoc = segmentMinDoc;
|
|
||||||
this.maxDoc = maxDoc;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int docID() {
|
|
||||||
return doc;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int nextDoc() throws IOException {
|
|
||||||
return advance(doc + 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int advance(int target) throws IOException {
|
|
||||||
assert target > doc;
|
|
||||||
if (doc == -1) {
|
|
||||||
// skip directly to minDoc
|
|
||||||
doc = Math.max(target, segmentMinDoc);
|
|
||||||
} else {
|
|
||||||
doc = target;
|
|
||||||
}
|
|
||||||
if (doc >= maxDoc) {
|
|
||||||
doc = NO_MORE_DOCS;
|
|
||||||
}
|
|
||||||
return doc;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public long cost() {
|
|
||||||
return maxDoc - segmentMinDoc;
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,112 +0,0 @@
|
|||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
package it.cavallium.dbengine.lucene.comparators;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import org.apache.lucene.search.DocIdSetIterator;
|
|
||||||
import org.apache.lucene.search.LeafFieldComparator;
|
|
||||||
import org.apache.lucene.search.Scorable;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Based on {@link org.apache.lucene.search.comparators.MinDocIterator}
|
|
||||||
*/
|
|
||||||
public final class MultiLeafFieldComparator implements LeafFieldComparator {
|
|
||||||
|
|
||||||
private final LeafFieldComparator[] comparators;
|
|
||||||
private final int[] reverseMul;
|
|
||||||
// we extract the first comparator to avoid array access in the common case
|
|
||||||
// that the first comparator compares worse than the bottom entry in the queue
|
|
||||||
private final LeafFieldComparator firstComparator;
|
|
||||||
private final int firstReverseMul;
|
|
||||||
|
|
||||||
public MultiLeafFieldComparator(LeafFieldComparator[] comparators, int[] reverseMul) {
|
|
||||||
if (comparators.length != reverseMul.length) {
|
|
||||||
throw new IllegalArgumentException(
|
|
||||||
"Must have the same number of comparators and reverseMul, got "
|
|
||||||
+ comparators.length
|
|
||||||
+ " and "
|
|
||||||
+ reverseMul.length);
|
|
||||||
}
|
|
||||||
this.comparators = comparators;
|
|
||||||
this.reverseMul = reverseMul;
|
|
||||||
this.firstComparator = comparators[0];
|
|
||||||
this.firstReverseMul = reverseMul[0];
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void setBottom(int slot) throws IOException {
|
|
||||||
for (LeafFieldComparator comparator : comparators) {
|
|
||||||
comparator.setBottom(slot);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int compareBottom(int doc) throws IOException {
|
|
||||||
int cmp = firstReverseMul * firstComparator.compareBottom(doc);
|
|
||||||
if (cmp != 0) {
|
|
||||||
return cmp;
|
|
||||||
}
|
|
||||||
for (int i = 1; i < comparators.length; ++i) {
|
|
||||||
cmp = reverseMul[i] * comparators[i].compareBottom(doc);
|
|
||||||
if (cmp != 0) {
|
|
||||||
return cmp;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int compareTop(int doc) throws IOException {
|
|
||||||
int cmp = firstReverseMul * firstComparator.compareTop(doc);
|
|
||||||
if (cmp != 0) {
|
|
||||||
return cmp;
|
|
||||||
}
|
|
||||||
for (int i = 1; i < comparators.length; ++i) {
|
|
||||||
cmp = reverseMul[i] * comparators[i].compareTop(doc);
|
|
||||||
if (cmp != 0) {
|
|
||||||
return cmp;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void copy(int slot, int doc) throws IOException {
|
|
||||||
for (LeafFieldComparator comparator : comparators) {
|
|
||||||
comparator.copy(slot, doc);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void setScorer(Scorable scorer) throws IOException {
|
|
||||||
for (LeafFieldComparator comparator : comparators) {
|
|
||||||
comparator.setScorer(scorer);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void setHitsThresholdReached() throws IOException {
|
|
||||||
// this is needed for skipping functionality that is only relevant for the 1st comparator
|
|
||||||
firstComparator.setHitsThresholdReached();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public DocIdSetIterator competitiveIterator() throws IOException {
|
|
||||||
// this is needed for skipping functionality that is only relevant for the 1st comparator
|
|
||||||
return firstComparator.competitiveIterator();
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,336 +0,0 @@
|
|||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package it.cavallium.dbengine.lucene.comparators;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import org.apache.lucene.index.DocValues;
|
|
||||||
import org.apache.lucene.index.FieldInfo;
|
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
|
||||||
import org.apache.lucene.index.NumericDocValues;
|
|
||||||
import org.apache.lucene.index.PointValues;
|
|
||||||
import org.apache.lucene.search.DocIdSetIterator;
|
|
||||||
import org.apache.lucene.search.FieldComparator;
|
|
||||||
import org.apache.lucene.search.LeafFieldComparator;
|
|
||||||
import org.apache.lucene.search.Scorable;
|
|
||||||
import org.apache.lucene.search.Scorer;
|
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
|
||||||
import org.apache.lucene.util.ArrayUtil.ByteArrayComparator;
|
|
||||||
import org.apache.lucene.util.DocIdSetBuilder;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Abstract numeric comparator for comparing numeric values. This comparator provides a skipping
|
|
||||||
* functionality – an iterator that can skip over non-competitive documents.
|
|
||||||
*
|
|
||||||
* <p>Parameter {@code field} provided in the constructor is used as a field name in the default
|
|
||||||
* implementations of the methods {@code getNumericDocValues} and {@code getPointValues} to retrieve
|
|
||||||
* doc values and points. You can pass a dummy value for a field name (e.g. when sorting by script),
|
|
||||||
* but in this case you must override both of these methods.
|
|
||||||
*
|
|
||||||
* Based on {@link org.apache.lucene.search.comparators.NumericComparator}
|
|
||||||
*/
|
|
||||||
public abstract class NumericComparator<T extends Number> extends FieldComparator<T> {
|
|
||||||
protected final T missingValue;
|
|
||||||
protected final String field;
|
|
||||||
protected final boolean reverse;
|
|
||||||
private final int bytesCount; // how many bytes are used to encode this number
|
|
||||||
private final ByteArrayComparator bytesComparator;
|
|
||||||
|
|
||||||
protected boolean topValueSet;
|
|
||||||
protected boolean singleSort; // singleSort is true, if sort is based on a single sort field.
|
|
||||||
protected boolean hitsThresholdReached;
|
|
||||||
protected boolean queueFull;
|
|
||||||
private boolean canSkipDocuments;
|
|
||||||
|
|
||||||
protected NumericComparator(
|
|
||||||
String field, T missingValue, boolean reverse, int sortPos, int bytesCount) {
|
|
||||||
this.field = field;
|
|
||||||
this.missingValue = missingValue;
|
|
||||||
this.reverse = reverse;
|
|
||||||
// skipping functionality is only relevant for primary sort
|
|
||||||
this.canSkipDocuments = (sortPos == 0);
|
|
||||||
this.bytesCount = bytesCount;
|
|
||||||
this.bytesComparator = ArrayUtil.getUnsignedComparator(bytesCount);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void setTopValue(T value) {
|
|
||||||
topValueSet = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void setSingleSort() {
|
|
||||||
singleSort = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void disableSkipping() {
|
|
||||||
canSkipDocuments = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Leaf comparator for {@link NumericComparator} that provides skipping functionality */
|
|
||||||
public abstract class NumericLeafComparator implements LeafFieldComparator {
|
|
||||||
protected final NumericDocValues docValues;
|
|
||||||
private final PointValues pointValues;
|
|
||||||
// if skipping functionality should be enabled on this segment
|
|
||||||
private final boolean enableSkipping;
|
|
||||||
private final int maxDoc;
|
|
||||||
private final byte[] minValueAsBytes;
|
|
||||||
private final byte[] maxValueAsBytes;
|
|
||||||
|
|
||||||
private DocIdSetIterator competitiveIterator;
|
|
||||||
private long iteratorCost;
|
|
||||||
private int maxDocVisited = -1;
|
|
||||||
private int updateCounter = 0;
|
|
||||||
|
|
||||||
public NumericLeafComparator(LeafReaderContext context) throws IOException {
|
|
||||||
this.docValues = getNumericDocValues(context, field);
|
|
||||||
this.pointValues = canSkipDocuments ? getPointValues(context, field) : null;
|
|
||||||
if (pointValues != null) {
|
|
||||||
FieldInfo info = context.reader().getFieldInfos().fieldInfo(field);
|
|
||||||
if (info == null || info.getPointDimensionCount() == 0) {
|
|
||||||
throw new IllegalStateException(
|
|
||||||
"Field "
|
|
||||||
+ field
|
|
||||||
+ " doesn't index points according to FieldInfos yet returns non-null PointValues");
|
|
||||||
} else if (info.getPointDimensionCount() > 1) {
|
|
||||||
throw new IllegalArgumentException(
|
|
||||||
"Field " + field + " is indexed with multiple dimensions, sorting is not supported");
|
|
||||||
} else if (info.getPointNumBytes() != bytesCount) {
|
|
||||||
throw new IllegalArgumentException(
|
|
||||||
"Field "
|
|
||||||
+ field
|
|
||||||
+ " is indexed with "
|
|
||||||
+ info.getPointNumBytes()
|
|
||||||
+ " bytes per dimension, but "
|
|
||||||
+ NumericComparator.this
|
|
||||||
+ " expected "
|
|
||||||
+ bytesCount);
|
|
||||||
}
|
|
||||||
this.enableSkipping = true; // skipping is enabled when points are available
|
|
||||||
this.maxDoc = context.reader().maxDoc();
|
|
||||||
this.maxValueAsBytes =
|
|
||||||
reverse == false ? new byte[bytesCount] : topValueSet ? new byte[bytesCount] : null;
|
|
||||||
this.minValueAsBytes =
|
|
||||||
reverse ? new byte[bytesCount] : topValueSet ? new byte[bytesCount] : null;
|
|
||||||
this.competitiveIterator = DocIdSetIterator.all(maxDoc);
|
|
||||||
this.iteratorCost = maxDoc;
|
|
||||||
} else {
|
|
||||||
this.enableSkipping = false;
|
|
||||||
this.maxDoc = 0;
|
|
||||||
this.maxValueAsBytes = null;
|
|
||||||
this.minValueAsBytes = null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Retrieves the NumericDocValues for the field in this segment
|
|
||||||
*
|
|
||||||
* <p>If you override this method, you must also override {@link
|
|
||||||
* #getPointValues(LeafReaderContext, String)} This class uses sort optimization that leverages
|
|
||||||
* points to filter out non-competitive matches, which relies on the assumption that points and
|
|
||||||
* doc values record the same information.
|
|
||||||
*
|
|
||||||
* @param context – reader context
|
|
||||||
* @param field - field name
|
|
||||||
* @return numeric doc values for the field in this segment.
|
|
||||||
* @throws IOException If there is a low-level I/O error
|
|
||||||
*/
|
|
||||||
protected NumericDocValues getNumericDocValues(LeafReaderContext context, String field)
|
|
||||||
throws IOException {
|
|
||||||
return DocValues.getNumeric(context.reader(), field);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Retrieves point values for the field in this segment
|
|
||||||
*
|
|
||||||
* <p>If you override this method, you must also override {@link
|
|
||||||
* #getNumericDocValues(LeafReaderContext, String)} This class uses sort optimization that
|
|
||||||
* leverages points to filter out non-competitive matches, which relies on the assumption that
|
|
||||||
* points and doc values record the same information. Return {@code null} even if no points
|
|
||||||
* implementation is available, in this case sort optimization with points will be disabled.
|
|
||||||
*
|
|
||||||
* @param context – reader context
|
|
||||||
* @param field - field name
|
|
||||||
* @return point values for the field in this segment if they are available or {@code null} if
|
|
||||||
* sort optimization with points should be disabled.
|
|
||||||
* @throws IOException If there is a low-level I/O error
|
|
||||||
*/
|
|
||||||
protected PointValues getPointValues(LeafReaderContext context, String field)
|
|
||||||
throws IOException {
|
|
||||||
return context.reader().getPointValues(field);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void setBottom(int slot) throws IOException {
|
|
||||||
queueFull = true; // if we are setting bottom, it means that we have collected enough hits
|
|
||||||
updateCompetitiveIterator(); // update an iterator if we set a new bottom
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void copy(int slot, int doc) throws IOException {
|
|
||||||
maxDocVisited = doc;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void setScorer(Scorable scorer) throws IOException {
|
|
||||||
if (scorer instanceof Scorer) {
|
|
||||||
iteratorCost =
|
|
||||||
((Scorer) scorer).iterator().cost(); // starting iterator cost is the scorer's cost
|
|
||||||
updateCompetitiveIterator(); // update an iterator when we have a new segment
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void setHitsThresholdReached() throws IOException {
|
|
||||||
hitsThresholdReached = true;
|
|
||||||
updateCompetitiveIterator();
|
|
||||||
}
|
|
||||||
|
|
||||||
// update its iterator to include possibly only docs that are "stronger" than the current bottom
|
|
||||||
// entry
|
|
||||||
private void updateCompetitiveIterator() throws IOException {
|
|
||||||
if (enableSkipping == false || hitsThresholdReached == false || queueFull == false) return;
|
|
||||||
// if some documents have missing points, check that missing values prohibits optimization
|
|
||||||
if ((pointValues.getDocCount() < maxDoc) && isMissingValueCompetitive()) {
|
|
||||||
return; // we can't filter out documents, as documents with missing values are competitive
|
|
||||||
}
|
|
||||||
|
|
||||||
updateCounter++;
|
|
||||||
if (updateCounter > 256
|
|
||||||
&& (updateCounter & 0x1f) != 0x1f) { // Start sampling if we get called too much
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (reverse == false) {
|
|
||||||
encodeBottom(maxValueAsBytes);
|
|
||||||
if (topValueSet) {
|
|
||||||
encodeTop(minValueAsBytes);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
encodeBottom(minValueAsBytes);
|
|
||||||
if (topValueSet) {
|
|
||||||
encodeTop(maxValueAsBytes);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
DocIdSetBuilder result = new DocIdSetBuilder(maxDoc);
|
|
||||||
PointValues.IntersectVisitor visitor =
|
|
||||||
new PointValues.IntersectVisitor() {
|
|
||||||
DocIdSetBuilder.BulkAdder adder;
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void grow(int count) {
|
|
||||||
adder = result.grow(count);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void visit(int docID) {
|
|
||||||
if (docID <= maxDocVisited) {
|
|
||||||
return; // Already visited or skipped
|
|
||||||
}
|
|
||||||
adder.add(docID);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void visit(int docID, byte[] packedValue) {
|
|
||||||
if (docID <= maxDocVisited) {
|
|
||||||
return; // already visited or skipped
|
|
||||||
}
|
|
||||||
if (maxValueAsBytes != null) {
|
|
||||||
int cmp = bytesComparator.compare(packedValue, 0, maxValueAsBytes, 0);
|
|
||||||
// if doc's value is too high or for single sort even equal, it is not competitive
|
|
||||||
// and the doc can be skipped
|
|
||||||
if (cmp > 0 || (singleSort && cmp == 0)) return;
|
|
||||||
}
|
|
||||||
if (minValueAsBytes != null) {
|
|
||||||
int cmp = bytesComparator.compare(packedValue, 0, minValueAsBytes, 0);
|
|
||||||
// if doc's value is too low or for single sort even equal, it is not competitive
|
|
||||||
// and the doc can be skipped
|
|
||||||
if (cmp < 0 || (singleSort && cmp == 0)) return;
|
|
||||||
}
|
|
||||||
adder.add(docID); // doc is competitive
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public PointValues.Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
|
|
||||||
if (maxValueAsBytes != null) {
|
|
||||||
int cmp = bytesComparator.compare(minPackedValue, 0, maxValueAsBytes, 0);
|
|
||||||
if (cmp > 0 || (singleSort && cmp == 0))
|
|
||||||
return PointValues.Relation.CELL_OUTSIDE_QUERY;
|
|
||||||
}
|
|
||||||
if (minValueAsBytes != null) {
|
|
||||||
int cmp = bytesComparator.compare(maxPackedValue, 0, minValueAsBytes, 0);
|
|
||||||
if (cmp < 0 || (singleSort && cmp == 0))
|
|
||||||
return PointValues.Relation.CELL_OUTSIDE_QUERY;
|
|
||||||
}
|
|
||||||
if ((maxValueAsBytes != null
|
|
||||||
&& bytesComparator.compare(maxPackedValue, 0, maxValueAsBytes, 0) > 0)
|
|
||||||
|| (minValueAsBytes != null
|
|
||||||
&& bytesComparator.compare(minPackedValue, 0, minValueAsBytes, 0) < 0)) {
|
|
||||||
return PointValues.Relation.CELL_CROSSES_QUERY;
|
|
||||||
}
|
|
||||||
return PointValues.Relation.CELL_INSIDE_QUERY;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
final long threshold = iteratorCost >>> 3;
|
|
||||||
long estimatedNumberOfMatches =
|
|
||||||
pointValues.estimatePointCount(visitor); // runs in O(log(numPoints))
|
|
||||||
if (estimatedNumberOfMatches >= threshold) {
|
|
||||||
// the new range is not selective enough to be worth materializing, it doesn't reduce number
|
|
||||||
// of docs at least 8x
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
pointValues.intersect(visitor);
|
|
||||||
competitiveIterator = result.build().iterator();
|
|
||||||
iteratorCost = competitiveIterator.cost();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public DocIdSetIterator competitiveIterator() {
|
|
||||||
if (enableSkipping == false) return null;
|
|
||||||
return new DocIdSetIterator() {
|
|
||||||
private int docID = competitiveIterator.docID();
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int nextDoc() throws IOException {
|
|
||||||
return advance(docID + 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int docID() {
|
|
||||||
return docID;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public long cost() {
|
|
||||||
return competitiveIterator.cost();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int advance(int target) throws IOException {
|
|
||||||
return docID = competitiveIterator.advance(target);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
protected abstract boolean isMissingValueCompetitive();
|
|
||||||
|
|
||||||
protected abstract void encodeBottom(byte[] packedValue);
|
|
||||||
|
|
||||||
protected abstract void encodeTop(byte[] packedValue);
|
|
||||||
}
|
|
||||||
}
|
|
@ -12,40 +12,36 @@ import org.jetbrains.annotations.NotNull;
|
|||||||
import org.jetbrains.annotations.Nullable;
|
import org.jetbrains.annotations.Nullable;
|
||||||
|
|
||||||
public record LocalQueryParams(@NotNull Query query, int offsetInt, long offsetLong, int limitInt, long limitLong,
|
public record LocalQueryParams(@NotNull Query query, int offsetInt, long offsetLong, int limitInt, long limitLong,
|
||||||
@NotNull PageLimits pageLimits, @Nullable Float minCompetitiveScore, @Nullable Sort sort,
|
@NotNull PageLimits pageLimits, @Nullable Sort sort, boolean computePreciseHitsCount,
|
||||||
boolean computePreciseHitsCount, Duration timeout) {
|
Duration timeout) {
|
||||||
|
|
||||||
public LocalQueryParams(@NotNull Query query,
|
public LocalQueryParams(@NotNull Query query,
|
||||||
long offsetLong,
|
long offsetLong,
|
||||||
long limitLong,
|
long limitLong,
|
||||||
@NotNull PageLimits pageLimits,
|
@NotNull PageLimits pageLimits,
|
||||||
@Nullable Float minCompetitiveScore,
|
|
||||||
@Nullable Sort sort,
|
@Nullable Sort sort,
|
||||||
boolean computePreciseHitsCount,
|
boolean computePreciseHitsCount,
|
||||||
Duration timeout) {
|
Duration timeout) {
|
||||||
this(query, safeLongToInt(offsetLong), offsetLong, safeLongToInt(limitLong), limitLong, pageLimits,
|
this(query,
|
||||||
minCompetitiveScore, sort, computePreciseHitsCount, timeout);
|
safeLongToInt(offsetLong),
|
||||||
|
offsetLong,
|
||||||
|
safeLongToInt(limitLong),
|
||||||
|
limitLong,
|
||||||
|
pageLimits,
|
||||||
|
sort,
|
||||||
|
computePreciseHitsCount,
|
||||||
|
timeout
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
public LocalQueryParams(@NotNull Query query,
|
public LocalQueryParams(@NotNull Query query,
|
||||||
int offsetInt,
|
int offsetInt,
|
||||||
int limitInt,
|
int limitInt,
|
||||||
@NotNull PageLimits pageLimits,
|
@NotNull PageLimits pageLimits,
|
||||||
@Nullable Float minCompetitiveScore,
|
|
||||||
@Nullable Sort sort,
|
@Nullable Sort sort,
|
||||||
boolean computePreciseHitsCount,
|
boolean computePreciseHitsCount,
|
||||||
Duration timeout) {
|
Duration timeout) {
|
||||||
this(query,
|
this(query, offsetInt, offsetInt, limitInt, limitInt, pageLimits, sort, computePreciseHitsCount, timeout);
|
||||||
offsetInt,
|
|
||||||
offsetInt,
|
|
||||||
limitInt,
|
|
||||||
limitInt,
|
|
||||||
pageLimits,
|
|
||||||
minCompetitiveScore,
|
|
||||||
sort,
|
|
||||||
computePreciseHitsCount,
|
|
||||||
timeout
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean isSorted() {
|
public boolean isSorted() {
|
||||||
|
@ -1,13 +1,11 @@
|
|||||||
package it.cavallium.dbengine.lucene.searcher;
|
package it.cavallium.dbengine.lucene.searcher;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import it.cavallium.dbengine.lucene.MaxScoreAccumulator;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.function.Supplier;
|
import java.util.function.Supplier;
|
||||||
import java.util.stream.Collectors;
|
|
||||||
import java.util.stream.IntStream;
|
import java.util.stream.IntStream;
|
||||||
import java.util.stream.Stream;
|
import org.apache.lucene.search.CustomHitsThresholdChecker;
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
import org.apache.lucene.search.ScoreDoc;
|
import org.apache.lucene.search.ScoreDoc;
|
||||||
|
|
||||||
@ -16,12 +14,20 @@ public class LuceneMultiGenerator implements Supplier<ScoreDoc> {
|
|||||||
private final Iterator<Supplier<ScoreDoc>> generators;
|
private final Iterator<Supplier<ScoreDoc>> generators;
|
||||||
private Supplier<ScoreDoc> luceneGenerator;
|
private Supplier<ScoreDoc> luceneGenerator;
|
||||||
|
|
||||||
public LuceneMultiGenerator(List<IndexSearcher> shards, LocalQueryParams localQueryParams) {
|
public LuceneMultiGenerator(List<IndexSearcher> shards,
|
||||||
|
LocalQueryParams localQueryParams,
|
||||||
|
CustomHitsThresholdChecker hitsThresholdChecker,
|
||||||
|
MaxScoreAccumulator minScoreAcc) {
|
||||||
this.generators = IntStream
|
this.generators = IntStream
|
||||||
.range(0, shards.size())
|
.range(0, shards.size())
|
||||||
.mapToObj(shardIndex -> {
|
.mapToObj(shardIndex -> {
|
||||||
IndexSearcher shard = shards.get(shardIndex);
|
IndexSearcher shard = shards.get(shardIndex);
|
||||||
return (Supplier<ScoreDoc>) new LuceneGenerator(shard, localQueryParams, shardIndex);
|
return (Supplier<ScoreDoc>) new LuceneGenerator(shard,
|
||||||
|
localQueryParams,
|
||||||
|
shardIndex,
|
||||||
|
hitsThresholdChecker,
|
||||||
|
minScoreAcc
|
||||||
|
);
|
||||||
})
|
})
|
||||||
.iterator();
|
.iterator();
|
||||||
tryAdvanceGenerator();
|
tryAdvanceGenerator();
|
||||||
|
@ -10,7 +10,7 @@ import it.cavallium.dbengine.database.disk.LLTempLMDBEnv;
|
|||||||
import it.cavallium.dbengine.lucene.LuceneUtils;
|
import it.cavallium.dbengine.lucene.LuceneUtils;
|
||||||
import it.cavallium.dbengine.lucene.FullDocs;
|
import it.cavallium.dbengine.lucene.FullDocs;
|
||||||
import it.cavallium.dbengine.lucene.LLScoreDoc;
|
import it.cavallium.dbengine.lucene.LLScoreDoc;
|
||||||
import it.cavallium.dbengine.lucene.collector.LMDBFullScoreDocCollector;
|
import org.apache.lucene.search.LMDBFullScoreDocCollector;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import org.apache.logging.log4j.LogManager;
|
import org.apache.logging.log4j.LogManager;
|
||||||
import org.apache.logging.log4j.Logger;
|
import org.apache.logging.log4j.Logger;
|
||||||
|
@ -10,7 +10,7 @@ import it.cavallium.dbengine.database.disk.LLTempLMDBEnv;
|
|||||||
import it.cavallium.dbengine.lucene.FullDocs;
|
import it.cavallium.dbengine.lucene.FullDocs;
|
||||||
import it.cavallium.dbengine.lucene.LLFieldDoc;
|
import it.cavallium.dbengine.lucene.LLFieldDoc;
|
||||||
import it.cavallium.dbengine.lucene.LuceneUtils;
|
import it.cavallium.dbengine.lucene.LuceneUtils;
|
||||||
import it.cavallium.dbengine.lucene.collector.LMDBFullFieldDocCollector;
|
import org.apache.lucene.search.LMDBFullFieldDocCollector;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import org.apache.logging.log4j.LogManager;
|
import org.apache.logging.log4j.LogManager;
|
||||||
import org.apache.logging.log4j.Logger;
|
import org.apache.logging.log4j.Logger;
|
||||||
|
@ -8,7 +8,9 @@ import it.cavallium.dbengine.database.LLKeyScore;
|
|||||||
import it.cavallium.dbengine.database.LLUtils;
|
import it.cavallium.dbengine.database.LLUtils;
|
||||||
import it.cavallium.dbengine.database.disk.LLIndexSearchers;
|
import it.cavallium.dbengine.database.disk.LLIndexSearchers;
|
||||||
import it.cavallium.dbengine.lucene.LuceneUtils;
|
import it.cavallium.dbengine.lucene.LuceneUtils;
|
||||||
|
import it.cavallium.dbengine.lucene.MaxScoreAccumulator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import org.apache.lucene.search.CustomHitsThresholdChecker;
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
import org.apache.lucene.search.ScoreDoc;
|
import org.apache.lucene.search.ScoreDoc;
|
||||||
import reactor.core.publisher.Flux;
|
import reactor.core.publisher.Flux;
|
||||||
@ -56,15 +58,15 @@ public class UnsortedStreamingMultiSearcher implements MultiSearcher {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private Flux<ScoreDoc> getScoreDocs(LocalQueryParams localQueryParams, List<IndexSearcher> shards) {
|
private Flux<ScoreDoc> getScoreDocs(LocalQueryParams localQueryParams, List<IndexSearcher> shards) {
|
||||||
return Flux
|
return Flux.defer(() -> {
|
||||||
.fromIterable(shards)
|
var hitsThreshold = CustomHitsThresholdChecker.createShared(localQueryParams.getTotalHitsThresholdLong());
|
||||||
.index()
|
MaxScoreAccumulator maxScoreAccumulator = new MaxScoreAccumulator();
|
||||||
.flatMap(tuple -> {
|
return Flux.fromIterable(shards).index().flatMap(tuple -> {
|
||||||
var shardIndex = (int) (long) tuple.getT1();
|
var shardIndex = (int) (long) tuple.getT1();
|
||||||
var shard = tuple.getT2();
|
var shard = tuple.getT2();
|
||||||
return LuceneGenerator.reactive(shard, localQueryParams, shardIndex);
|
return LuceneGenerator.reactive(shard, localQueryParams, shardIndex, hitsThreshold, maxScoreAccumulator);
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private LocalQueryParams getLocalQueryParams(LocalQueryParams queryParams) {
|
private LocalQueryParams getLocalQueryParams(LocalQueryParams queryParams) {
|
||||||
@ -72,7 +74,6 @@ public class UnsortedStreamingMultiSearcher implements MultiSearcher {
|
|||||||
0L,
|
0L,
|
||||||
queryParams.offsetLong() + queryParams.limitLong(),
|
queryParams.offsetLong() + queryParams.limitLong(),
|
||||||
queryParams.pageLimits(),
|
queryParams.pageLimits(),
|
||||||
queryParams.minCompetitiveScore(),
|
|
||||||
queryParams.sort(),
|
queryParams.sort(),
|
||||||
queryParams.computePreciseHitsCount(),
|
queryParams.computePreciseHitsCount(),
|
||||||
queryParams.timeout()
|
queryParams.timeout()
|
||||||
|
@ -15,15 +15,14 @@
|
|||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package it.cavallium.dbengine.lucene.collector;
|
package org.apache.lucene.search;
|
||||||
|
|
||||||
import java.util.concurrent.atomic.AtomicLong;
|
import java.util.concurrent.atomic.AtomicLong;
|
||||||
import org.apache.lucene.search.ScoreMode;
|
|
||||||
|
|
||||||
/** Used for defining custom algorithms to allow searches to early terminate */
|
/** Used for defining custom algorithms to allow searches to early terminate */
|
||||||
abstract class HitsThresholdChecker {
|
public abstract class CustomHitsThresholdChecker {
|
||||||
/** Implementation of HitsThresholdChecker which allows global hit counting */
|
/** Implementation of CustomHitsThresholdChecker which allows global hit counting */
|
||||||
private static class GlobalHitsThresholdChecker extends HitsThresholdChecker {
|
private static class GlobalHitsThresholdChecker extends CustomHitsThresholdChecker {
|
||||||
private final long totalHitsThreshold;
|
private final long totalHitsThreshold;
|
||||||
private final AtomicLong globalHitCount;
|
private final AtomicLong globalHitCount;
|
||||||
|
|
||||||
@ -70,8 +69,8 @@ abstract class HitsThresholdChecker {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Default implementation of HitsThresholdChecker to be used for single threaded execution */
|
/** Default implementation of CustomHitsThresholdChecker to be used for single threaded execution */
|
||||||
private static class LocalHitsThresholdChecker extends HitsThresholdChecker {
|
private static class LocalHitsThresholdChecker extends CustomHitsThresholdChecker {
|
||||||
private final long totalHitsThreshold;
|
private final long totalHitsThreshold;
|
||||||
private long hitCount;
|
private long hitCount;
|
||||||
|
|
||||||
@ -120,14 +119,14 @@ abstract class HitsThresholdChecker {
|
|||||||
/*
|
/*
|
||||||
* Returns a threshold checker that is useful for single threaded searches
|
* Returns a threshold checker that is useful for single threaded searches
|
||||||
*/
|
*/
|
||||||
public static HitsThresholdChecker create(final long totalHitsThreshold) {
|
public static CustomHitsThresholdChecker create(final long totalHitsThreshold) {
|
||||||
return new LocalHitsThresholdChecker(totalHitsThreshold);
|
return new LocalHitsThresholdChecker(totalHitsThreshold);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Returns a threshold checker that is based on a shared counter
|
* Returns a threshold checker that is based on a shared counter
|
||||||
*/
|
*/
|
||||||
public static HitsThresholdChecker createShared(final long totalHitsThreshold) {
|
public static CustomHitsThresholdChecker createShared(final long totalHitsThreshold) {
|
||||||
return new GlobalHitsThresholdChecker(totalHitsThreshold);
|
return new GlobalHitsThresholdChecker(totalHitsThreshold);
|
||||||
}
|
}
|
||||||
|
|
@ -14,7 +14,7 @@
|
|||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
package it.cavallium.dbengine.lucene.collector;
|
package org.apache.lucene.search;
|
||||||
|
|
||||||
import it.cavallium.dbengine.database.SafeCloseable;
|
import it.cavallium.dbengine.database.SafeCloseable;
|
||||||
import it.cavallium.dbengine.database.disk.LLTempLMDBEnv;
|
import it.cavallium.dbengine.database.disk.LLTempLMDBEnv;
|
||||||
@ -27,23 +27,12 @@ import it.cavallium.dbengine.lucene.LMDBPriorityQueue;
|
|||||||
import it.cavallium.dbengine.lucene.MaxScoreAccumulator;
|
import it.cavallium.dbengine.lucene.MaxScoreAccumulator;
|
||||||
import it.cavallium.dbengine.lucene.PriorityQueue;
|
import it.cavallium.dbengine.lucene.PriorityQueue;
|
||||||
import it.cavallium.dbengine.lucene.ResourceIterable;
|
import it.cavallium.dbengine.lucene.ResourceIterable;
|
||||||
import java.io.Closeable;
|
import it.cavallium.dbengine.lucene.collector.FullDocsCollector;
|
||||||
|
import it.cavallium.dbengine.lucene.collector.FullFieldDocs;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
import org.apache.lucene.search.CollectionTerminatedException;
|
|
||||||
import org.apache.lucene.search.CollectorManager;
|
|
||||||
import org.apache.lucene.search.DocIdSetIterator;
|
|
||||||
import org.apache.lucene.search.FieldComparator;
|
|
||||||
import org.apache.lucene.search.LeafCollector;
|
|
||||||
import org.apache.lucene.search.LeafFieldComparator;
|
|
||||||
import it.cavallium.dbengine.lucene.comparators.MultiLeafFieldComparator;
|
|
||||||
import org.apache.lucene.search.Scorable;
|
|
||||||
import org.apache.lucene.search.ScoreMode;
|
|
||||||
import org.apache.lucene.search.Sort;
|
|
||||||
import org.apache.lucene.search.SortField;
|
|
||||||
import org.apache.lucene.search.TotalHits;
|
|
||||||
import org.apache.lucene.search.TotalHits.Relation;
|
import org.apache.lucene.search.TotalHits.Relation;
|
||||||
import reactor.core.publisher.Flux;
|
import reactor.core.publisher.Flux;
|
||||||
|
|
||||||
@ -57,7 +46,8 @@ import reactor.core.publisher.Flux;
|
|||||||
* <a href="https://github.com/apache/lucene/commits/main/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java">
|
* <a href="https://github.com/apache/lucene/commits/main/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java">
|
||||||
* Lucene TopFieldCollector changes on GitHub</a>
|
* Lucene TopFieldCollector changes on GitHub</a>
|
||||||
*/
|
*/
|
||||||
public abstract class LMDBFullFieldDocCollector extends FullDocsCollector<LMDBPriorityQueue<LLSlotDoc>, LLSlotDoc, LLFieldDoc> {
|
public abstract class LMDBFullFieldDocCollector extends
|
||||||
|
FullDocsCollector<LMDBPriorityQueue<LLSlotDoc>, LLSlotDoc, LLFieldDoc> {
|
||||||
|
|
||||||
// TODO: one optimization we could do is to pre-fill
|
// TODO: one optimization we could do is to pre-fill
|
||||||
// the queue with sentinel value that guaranteed to
|
// the queue with sentinel value that guaranteed to
|
||||||
@ -71,10 +61,7 @@ public abstract class LMDBFullFieldDocCollector extends FullDocsCollector<LMDBPr
|
|||||||
Scorable scorer;
|
Scorable scorer;
|
||||||
boolean collectedAllCompetitiveHits = false;
|
boolean collectedAllCompetitiveHits = false;
|
||||||
|
|
||||||
TopFieldLeafCollector(PriorityQueue<LLSlotDoc> queue,
|
TopFieldLeafCollector(FieldValueHitQueue fieldValueHitQueue, Sort sort, LeafReaderContext context)
|
||||||
FieldValueHitQueue fieldValueHitQueue,
|
|
||||||
Sort sort,
|
|
||||||
LeafReaderContext context)
|
|
||||||
throws IOException {
|
throws IOException {
|
||||||
// as all segments are sorted in the same way, enough to check only the 1st segment for
|
// as all segments are sorted in the same way, enough to check only the 1st segment for
|
||||||
// indexSort
|
// indexSort
|
||||||
@ -103,9 +90,9 @@ public abstract class LMDBFullFieldDocCollector extends FullDocsCollector<LMDBPr
|
|||||||
if (minScoreAcc != null && (totalHits & minScoreAcc.modInterval) == 0) {
|
if (minScoreAcc != null && (totalHits & minScoreAcc.modInterval) == 0) {
|
||||||
updateGlobalMinCompetitiveScore(scorer);
|
updateGlobalMinCompetitiveScore(scorer);
|
||||||
}
|
}
|
||||||
if (scoreMode.isExhaustive() == false
|
if (!scoreMode.isExhaustive()
|
||||||
&& totalHitsRelation == TotalHits.Relation.EQUAL_TO
|
&& totalHitsRelation == TotalHits.Relation.EQUAL_TO
|
||||||
&& hitsThresholdChecker.isThresholdReached(false)) {
|
&& hitsThresholdChecker.isThresholdReached()) {
|
||||||
// for the first time hitsThreshold is reached, notify comparator about this
|
// for the first time hitsThreshold is reached, notify comparator about this
|
||||||
comparator.setHitsThresholdReached();
|
comparator.setHitsThresholdReached();
|
||||||
totalHitsRelation = TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO;
|
totalHitsRelation = TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO;
|
||||||
@ -118,7 +105,7 @@ public abstract class LMDBFullFieldDocCollector extends FullDocsCollector<LMDBPr
|
|||||||
// this document is largest than anything else in the queue, and
|
// this document is largest than anything else in the queue, and
|
||||||
// therefore not competitive.
|
// therefore not competitive.
|
||||||
if (searchSortPartOfIndexSort) {
|
if (searchSortPartOfIndexSort) {
|
||||||
if (hitsThresholdChecker.isThresholdReached(false)) {
|
if (hitsThresholdChecker.isThresholdReached()) {
|
||||||
totalHitsRelation = Relation.GREATER_THAN_OR_EQUAL_TO;
|
totalHitsRelation = Relation.GREATER_THAN_OR_EQUAL_TO;
|
||||||
throw new CollectionTerminatedException();
|
throw new CollectionTerminatedException();
|
||||||
} else {
|
} else {
|
||||||
@ -220,7 +207,7 @@ public abstract class LMDBFullFieldDocCollector extends FullDocsCollector<LMDBPr
|
|||||||
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
|
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
|
||||||
docBase = context.docBase;
|
docBase = context.docBase;
|
||||||
|
|
||||||
return new TopFieldLeafCollector(queue, fieldValueHitQueue, sort, context) {
|
return new TopFieldLeafCollector(fieldValueHitQueue, sort, context) {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void collect(int doc) throws IOException {
|
public void collect(int doc) throws IOException {
|
||||||
@ -298,12 +285,12 @@ public abstract class LMDBFullFieldDocCollector extends FullDocsCollector<LMDBPr
|
|||||||
|
|
||||||
if (firstComparator.getClass().equals(FieldComparator.RelevanceComparator.class)
|
if (firstComparator.getClass().equals(FieldComparator.RelevanceComparator.class)
|
||||||
&& reverseMul == 1 // if the natural sort is preserved (sort by descending relevance)
|
&& reverseMul == 1 // if the natural sort is preserved (sort by descending relevance)
|
||||||
&& hitsThresholdChecker.getHitsThreshold(false) != Integer.MAX_VALUE) {
|
&& hitsThresholdChecker.getHitsThreshold() != Integer.MAX_VALUE) {
|
||||||
scoreMode = ScoreMode.TOP_SCORES;
|
scoreMode = ScoreMode.TOP_SCORES;
|
||||||
canSetMinScore = true;
|
canSetMinScore = true;
|
||||||
} else {
|
} else {
|
||||||
canSetMinScore = false;
|
canSetMinScore = false;
|
||||||
if (hitsThresholdChecker.getHitsThreshold(false) != Integer.MAX_VALUE) {
|
if (hitsThresholdChecker.getHitsThreshold() != Integer.MAX_VALUE) {
|
||||||
scoreMode = needsScores ? ScoreMode.TOP_DOCS_WITH_SCORES : ScoreMode.TOP_DOCS;
|
scoreMode = needsScores ? ScoreMode.TOP_DOCS_WITH_SCORES : ScoreMode.TOP_DOCS;
|
||||||
} else {
|
} else {
|
||||||
scoreMode = needsScores ? ScoreMode.COMPLETE : ScoreMode.COMPLETE_NO_SCORES;
|
scoreMode = needsScores ? ScoreMode.COMPLETE : ScoreMode.COMPLETE_NO_SCORES;
|
||||||
@ -319,7 +306,7 @@ public abstract class LMDBFullFieldDocCollector extends FullDocsCollector<LMDBPr
|
|||||||
|
|
||||||
protected void updateGlobalMinCompetitiveScore(Scorable scorer) throws IOException {
|
protected void updateGlobalMinCompetitiveScore(Scorable scorer) throws IOException {
|
||||||
assert minScoreAcc != null;
|
assert minScoreAcc != null;
|
||||||
if (canSetMinScore && hitsThresholdChecker.isThresholdReached(false)) {
|
if (canSetMinScore && hitsThresholdChecker.isThresholdReached()) {
|
||||||
// we can start checking the global maximum score even
|
// we can start checking the global maximum score even
|
||||||
// if the local queue is not full because the threshold
|
// if the local queue is not full because the threshold
|
||||||
// is reached.
|
// is reached.
|
||||||
@ -333,7 +320,7 @@ public abstract class LMDBFullFieldDocCollector extends FullDocsCollector<LMDBPr
|
|||||||
}
|
}
|
||||||
|
|
||||||
protected void updateMinCompetitiveScore(Scorable scorer) throws IOException {
|
protected void updateMinCompetitiveScore(Scorable scorer) throws IOException {
|
||||||
if (canSetMinScore && queueFull && hitsThresholdChecker.isThresholdReached(false)) {
|
if (canSetMinScore && queueFull && hitsThresholdChecker.isThresholdReached()) {
|
||||||
assert pq.top() != null;
|
assert pq.top() != null;
|
||||||
float minScore = (float) firstComparator.value(pq.top().slot());
|
float minScore = (float) firstComparator.value(pq.top().slot());
|
||||||
if (minScore > minCompetitiveScore) {
|
if (minScore > minCompetitiveScore) {
|
||||||
@ -422,17 +409,25 @@ public abstract class LMDBFullFieldDocCollector extends FullDocsCollector<LMDBPr
|
|||||||
LLTempLMDBEnv env, Sort sort, int numHits, long totalHitsThreshold) {
|
LLTempLMDBEnv env, Sort sort, int numHits, long totalHitsThreshold) {
|
||||||
return new CollectorManager<>() {
|
return new CollectorManager<>() {
|
||||||
|
|
||||||
private final HitsThresholdChecker hitsThresholdChecker =
|
private final HitsThresholdChecker hitsThresholdChecker;
|
||||||
HitsThresholdChecker.createShared(Math.max(totalHitsThreshold, numHits));
|
|
||||||
|
{
|
||||||
|
if (totalHitsThreshold < Integer.MAX_VALUE) {
|
||||||
|
hitsThresholdChecker = HitsThresholdChecker.createShared(Math.max((int) totalHitsThreshold, numHits));
|
||||||
|
} else {
|
||||||
|
hitsThresholdChecker = HitsThresholdChecker.createShared(Integer.MAX_VALUE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private final MaxScoreAccumulator minScoreAcc = new MaxScoreAccumulator();
|
private final MaxScoreAccumulator minScoreAcc = new MaxScoreAccumulator();
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public LMDBFullFieldDocCollector newCollector() throws IOException {
|
public LMDBFullFieldDocCollector newCollector() {
|
||||||
return create(env, sort, numHits, hitsThresholdChecker, minScoreAcc);
|
return create(env, sort, numHits, hitsThresholdChecker, minScoreAcc);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public FullFieldDocs<LLFieldDoc> reduce(Collection<LMDBFullFieldDocCollector> collectors) throws IOException {
|
public FullFieldDocs<LLFieldDoc> reduce(Collection<LMDBFullFieldDocCollector> collectors) {
|
||||||
return reduceShared(sort, collectors);
|
return reduceShared(sort, collectors);
|
||||||
}
|
}
|
||||||
};
|
};
|
@ -14,28 +14,20 @@
|
|||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
package it.cavallium.dbengine.lucene.collector;
|
package org.apache.lucene.search;
|
||||||
|
|
||||||
import it.cavallium.dbengine.database.LLUtils;
|
|
||||||
import it.cavallium.dbengine.database.disk.LLTempLMDBEnv;
|
import it.cavallium.dbengine.database.disk.LLTempLMDBEnv;
|
||||||
import it.cavallium.dbengine.lucene.FullDocs;
|
import it.cavallium.dbengine.lucene.FullDocs;
|
||||||
import it.cavallium.dbengine.lucene.LLScoreDoc;
|
import it.cavallium.dbengine.lucene.LLScoreDoc;
|
||||||
import it.cavallium.dbengine.lucene.LLScoreDocCodec;
|
import it.cavallium.dbengine.lucene.LLScoreDocCodec;
|
||||||
import it.cavallium.dbengine.lucene.LMDBPriorityQueue;
|
import it.cavallium.dbengine.lucene.LMDBPriorityQueue;
|
||||||
import it.cavallium.dbengine.lucene.LuceneUtils;
|
|
||||||
import it.cavallium.dbengine.lucene.MaxScoreAccumulator;
|
import it.cavallium.dbengine.lucene.MaxScoreAccumulator;
|
||||||
import it.cavallium.dbengine.lucene.ResourceIterable;
|
import it.cavallium.dbengine.lucene.ResourceIterable;
|
||||||
|
import it.cavallium.dbengine.lucene.collector.FullDocsCollector;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
import org.apache.lucene.search.Collector;
|
|
||||||
import org.apache.lucene.search.CollectorManager;
|
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
|
||||||
import org.apache.lucene.search.LeafCollector;
|
|
||||||
import it.cavallium.dbengine.lucene.MaxScoreAccumulator.DocAndScore;
|
import it.cavallium.dbengine.lucene.MaxScoreAccumulator.DocAndScore;
|
||||||
import org.apache.lucene.search.Scorable;
|
|
||||||
import org.apache.lucene.search.ScoreMode;
|
|
||||||
import org.apache.lucene.search.TotalHits;
|
|
||||||
import org.jetbrains.annotations.NotNull;
|
import org.jetbrains.annotations.NotNull;
|
||||||
import org.jetbrains.annotations.Nullable;
|
import org.jetbrains.annotations.Nullable;
|
||||||
|
|
||||||
@ -53,7 +45,8 @@ import org.jetbrains.annotations.Nullable;
|
|||||||
* <a href="https://github.com/apache/lucene/commits/main/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java">
|
* <a href="https://github.com/apache/lucene/commits/main/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java">
|
||||||
* Lucene TopScoreDocCollector changes on GitHub</a>
|
* Lucene TopScoreDocCollector changes on GitHub</a>
|
||||||
*/
|
*/
|
||||||
public abstract class LMDBFullScoreDocCollector extends FullDocsCollector<LMDBPriorityQueue<LLScoreDoc>, LLScoreDoc, LLScoreDoc> {
|
public abstract class LMDBFullScoreDocCollector extends
|
||||||
|
FullDocsCollector<LMDBPriorityQueue<LLScoreDoc>, LLScoreDoc, LLScoreDoc> {
|
||||||
|
|
||||||
/** Scorable leaf collector */
|
/** Scorable leaf collector */
|
||||||
public abstract static class ScorerLeafCollector implements LeafCollector {
|
public abstract static class ScorerLeafCollector implements LeafCollector {
|
||||||
@ -69,7 +62,7 @@ public abstract class LMDBFullScoreDocCollector extends FullDocsCollector<LMDBPr
|
|||||||
private static class SimpleLMDBFullScoreDocCollector extends LMDBFullScoreDocCollector {
|
private static class SimpleLMDBFullScoreDocCollector extends LMDBFullScoreDocCollector {
|
||||||
|
|
||||||
SimpleLMDBFullScoreDocCollector(LLTempLMDBEnv env, @Nullable Long limit,
|
SimpleLMDBFullScoreDocCollector(LLTempLMDBEnv env, @Nullable Long limit,
|
||||||
HitsThresholdChecker hitsThresholdChecker, MaxScoreAccumulator minScoreAcc) {
|
CustomHitsThresholdChecker hitsThresholdChecker, MaxScoreAccumulator minScoreAcc) {
|
||||||
super(env, limit, hitsThresholdChecker, minScoreAcc);
|
super(env, limit, hitsThresholdChecker, minScoreAcc);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -153,7 +146,7 @@ public abstract class LMDBFullScoreDocCollector extends FullDocsCollector<LMDBPr
|
|||||||
* <code>numHits</code>, and fill the array with sentinel objects.
|
* <code>numHits</code>, and fill the array with sentinel objects.
|
||||||
*/
|
*/
|
||||||
public static LMDBFullScoreDocCollector create(LLTempLMDBEnv env, long numHits, int totalHitsThreshold) {
|
public static LMDBFullScoreDocCollector create(LLTempLMDBEnv env, long numHits, int totalHitsThreshold) {
|
||||||
return create(env, numHits, HitsThresholdChecker.create(totalHitsThreshold), null);
|
return create(env, numHits, CustomHitsThresholdChecker.create(totalHitsThreshold), null);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -163,12 +156,12 @@ public abstract class LMDBFullScoreDocCollector extends FullDocsCollector<LMDBPr
|
|||||||
* but will also likely make query processing slower.
|
* but will also likely make query processing slower.
|
||||||
*/
|
*/
|
||||||
public static LMDBFullScoreDocCollector create(LLTempLMDBEnv env, int totalHitsThreshold) {
|
public static LMDBFullScoreDocCollector create(LLTempLMDBEnv env, int totalHitsThreshold) {
|
||||||
return create(env, HitsThresholdChecker.create(totalHitsThreshold), null);
|
return create(env, CustomHitsThresholdChecker.create(totalHitsThreshold), null);
|
||||||
}
|
}
|
||||||
|
|
||||||
static LMDBFullScoreDocCollector create(
|
static LMDBFullScoreDocCollector create(
|
||||||
LLTempLMDBEnv env,
|
LLTempLMDBEnv env,
|
||||||
HitsThresholdChecker hitsThresholdChecker,
|
CustomHitsThresholdChecker hitsThresholdChecker,
|
||||||
MaxScoreAccumulator minScoreAcc) {
|
MaxScoreAccumulator minScoreAcc) {
|
||||||
|
|
||||||
if (hitsThresholdChecker == null) {
|
if (hitsThresholdChecker == null) {
|
||||||
@ -181,7 +174,7 @@ public abstract class LMDBFullScoreDocCollector extends FullDocsCollector<LMDBPr
|
|||||||
static LMDBFullScoreDocCollector create(
|
static LMDBFullScoreDocCollector create(
|
||||||
LLTempLMDBEnv env,
|
LLTempLMDBEnv env,
|
||||||
@NotNull Long numHits,
|
@NotNull Long numHits,
|
||||||
HitsThresholdChecker hitsThresholdChecker,
|
CustomHitsThresholdChecker hitsThresholdChecker,
|
||||||
MaxScoreAccumulator minScoreAcc) {
|
MaxScoreAccumulator minScoreAcc) {
|
||||||
|
|
||||||
if (hitsThresholdChecker == null) {
|
if (hitsThresholdChecker == null) {
|
||||||
@ -205,8 +198,8 @@ public abstract class LMDBFullScoreDocCollector extends FullDocsCollector<LMDBPr
|
|||||||
long totalHitsThreshold) {
|
long totalHitsThreshold) {
|
||||||
return new CollectorManager<>() {
|
return new CollectorManager<>() {
|
||||||
|
|
||||||
private final HitsThresholdChecker hitsThresholdChecker =
|
private final CustomHitsThresholdChecker hitsThresholdChecker =
|
||||||
HitsThresholdChecker.createShared(totalHitsThreshold);
|
CustomHitsThresholdChecker.createShared(totalHitsThreshold);
|
||||||
private final MaxScoreAccumulator minScoreAcc = new MaxScoreAccumulator();
|
private final MaxScoreAccumulator minScoreAcc = new MaxScoreAccumulator();
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -230,8 +223,8 @@ public abstract class LMDBFullScoreDocCollector extends FullDocsCollector<LMDBPr
|
|||||||
long totalHitsThreshold) {
|
long totalHitsThreshold) {
|
||||||
return new CollectorManager<>() {
|
return new CollectorManager<>() {
|
||||||
|
|
||||||
private final HitsThresholdChecker hitsThresholdChecker =
|
private final CustomHitsThresholdChecker hitsThresholdChecker =
|
||||||
HitsThresholdChecker.createShared(totalHitsThreshold);
|
CustomHitsThresholdChecker.createShared(totalHitsThreshold);
|
||||||
private final MaxScoreAccumulator minScoreAcc = new MaxScoreAccumulator();
|
private final MaxScoreAccumulator minScoreAcc = new MaxScoreAccumulator();
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -258,13 +251,13 @@ public abstract class LMDBFullScoreDocCollector extends FullDocsCollector<LMDBPr
|
|||||||
|
|
||||||
int docBase;
|
int docBase;
|
||||||
final @Nullable Long limit;
|
final @Nullable Long limit;
|
||||||
final HitsThresholdChecker hitsThresholdChecker;
|
final CustomHitsThresholdChecker hitsThresholdChecker;
|
||||||
final MaxScoreAccumulator minScoreAcc;
|
final MaxScoreAccumulator minScoreAcc;
|
||||||
float minCompetitiveScore;
|
float minCompetitiveScore;
|
||||||
|
|
||||||
// prevents instantiation
|
// prevents instantiation
|
||||||
LMDBFullScoreDocCollector(LLTempLMDBEnv env, @Nullable Long limit,
|
LMDBFullScoreDocCollector(LLTempLMDBEnv env, @Nullable Long limit,
|
||||||
HitsThresholdChecker hitsThresholdChecker, MaxScoreAccumulator minScoreAcc) {
|
CustomHitsThresholdChecker hitsThresholdChecker, MaxScoreAccumulator minScoreAcc) {
|
||||||
super(new LMDBPriorityQueue<>(env, new LLScoreDocCodec()));
|
super(new LMDBPriorityQueue<>(env, new LLScoreDocCodec()));
|
||||||
assert hitsThresholdChecker != null;
|
assert hitsThresholdChecker != null;
|
||||||
this.limit = limit;
|
this.limit = limit;
|
@ -15,28 +15,25 @@
|
|||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package it.cavallium.dbengine.lucene.comparators;
|
package org.apache.lucene.search.comparators;
|
||||||
|
|
||||||
import it.cavallium.dbengine.database.SafeCloseable;
|
import it.cavallium.dbengine.database.SafeCloseable;
|
||||||
import it.cavallium.dbengine.database.disk.LLTempLMDBEnv;
|
import it.cavallium.dbengine.database.disk.LLTempLMDBEnv;
|
||||||
import it.cavallium.dbengine.lucene.IArray;
|
import it.cavallium.dbengine.lucene.IArray;
|
||||||
import it.cavallium.dbengine.lucene.IntCodec;
|
import it.cavallium.dbengine.lucene.IntCodec;
|
||||||
import it.cavallium.dbengine.lucene.LMDBArray;
|
import it.cavallium.dbengine.lucene.LMDBArray;
|
||||||
import it.cavallium.dbengine.lucene.LMDBPriorityQueue;
|
|
||||||
import it.cavallium.dbengine.lucene.LongCodec;
|
|
||||||
import java.io.Closeable;
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
import org.apache.lucene.search.DocIdSetIterator;
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
import org.apache.lucene.search.FieldComparator;
|
|
||||||
import org.apache.lucene.search.LeafFieldComparator;
|
import org.apache.lucene.search.LeafFieldComparator;
|
||||||
import org.apache.lucene.search.Scorable;
|
import org.apache.lucene.search.Scorable;
|
||||||
|
import org.apache.lucene.search.comparators.MinDocIterator;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Comparator that sorts by asc _doc
|
* Comparator that sorts by asc _doc
|
||||||
* Based on {@link org.apache.lucene.search.comparators.DocComparator}
|
* Based on {@link org.apache.lucene.search.comparators.DocComparator}
|
||||||
* */
|
* */
|
||||||
public class DocComparator extends FieldComparator<Integer> implements SafeCloseable {
|
public class LMDBDocComparator extends org.apache.lucene.search.comparators.DocComparator implements SafeCloseable {
|
||||||
private final IArray<Integer> docIDs;
|
private final IArray<Integer> docIDs;
|
||||||
private final boolean enableSkipping; // if skipping functionality should be enabled
|
private final boolean enableSkipping; // if skipping functionality should be enabled
|
||||||
private int bottom;
|
private int bottom;
|
||||||
@ -46,7 +43,8 @@ public class DocComparator extends FieldComparator<Integer> implements SafeClose
|
|||||||
private boolean hitsThresholdReached;
|
private boolean hitsThresholdReached;
|
||||||
|
|
||||||
/** Creates a new comparator based on document ids for {@code numHits} */
|
/** Creates a new comparator based on document ids for {@code numHits} */
|
||||||
public DocComparator(LLTempLMDBEnv env, int numHits, boolean reverse, int sortPost) {
|
public LMDBDocComparator(LLTempLMDBEnv env, int numHits, boolean reverse, int sortPost) {
|
||||||
|
super(0, reverse, sortPost);
|
||||||
this.docIDs = new LMDBArray<>(env, new IntCodec(), numHits, 0);
|
this.docIDs = new LMDBArray<>(env, new IntCodec(), numHits, 0);
|
||||||
// skipping functionality is enabled if we are sorting by _doc in asc order as a primary sort
|
// skipping functionality is enabled if we are sorting by _doc in asc order as a primary sort
|
||||||
this.enableSkipping = (!reverse && sortPost == 0);
|
this.enableSkipping = (!reverse && sortPost == 0);
|
||||||
@ -145,7 +143,7 @@ public class DocComparator extends FieldComparator<Integer> implements SafeClose
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public DocIdSetIterator competitiveIterator() {
|
public DocIdSetIterator competitiveIterator() {
|
||||||
if (enableSkipping == false) {
|
if (!enableSkipping) {
|
||||||
return null;
|
return null;
|
||||||
} else {
|
} else {
|
||||||
return new DocIdSetIterator() {
|
return new DocIdSetIterator() {
|
||||||
@ -181,7 +179,7 @@ public class DocComparator extends FieldComparator<Integer> implements SafeClose
|
|||||||
}
|
}
|
||||||
|
|
||||||
private void updateIterator() {
|
private void updateIterator() {
|
||||||
if (enableSkipping == false || hitsThresholdReached == false) return;
|
if (!enableSkipping || !hitsThresholdReached) return;
|
||||||
if (bottomValueSet) {
|
if (bottomValueSet) {
|
||||||
// since we've collected top N matches, we can early terminate
|
// since we've collected top N matches, we can early terminate
|
||||||
// Currently early termination on _doc is also implemented in TopFieldCollector, but this
|
// Currently early termination on _doc is also implemented in TopFieldCollector, but this
|
Loading…
Reference in New Issue
Block a user