Facets optimizations
This commit is contained in:
parent
b2216c1b2c
commit
6baa05de51
29
src/main/java/it/cavallium/dbengine/lucene/IntSmear.java
Normal file
29
src/main/java/it/cavallium/dbengine/lucene/IntSmear.java
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
package it.cavallium.dbengine.lucene;
|
||||||
|
|
||||||
|
import it.unimi.dsi.fastutil.ints.IntHash;
|
||||||
|
|
||||||
|
public class IntSmear implements IntHash.Strategy {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode(int e) {
|
||||||
|
return smear(e);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This method was written by Doug Lea with assistance from members of JCP
|
||||||
|
* JSR-166 Expert Group and released to the public domain, as explained at
|
||||||
|
* http://creativecommons.org/licenses/publicdomain
|
||||||
|
*
|
||||||
|
* As of 2010/06/11, this method is identical to the (package private) hash
|
||||||
|
* method in OpenJDK 7's java.util.HashMap class.
|
||||||
|
*/
|
||||||
|
static int smear(int hashCode) {
|
||||||
|
hashCode ^= (hashCode >>> 20) ^ (hashCode >>> 12);
|
||||||
|
return hashCode ^ (hashCode >>> 7) ^ (hashCode >>> 4);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(int a, int b) {
|
||||||
|
return a == b;
|
||||||
|
}
|
||||||
|
}
|
@ -1,18 +1,20 @@
|
|||||||
package it.cavallium.dbengine.lucene.collector;
|
package it.cavallium.dbengine.lucene.collector;
|
||||||
|
|
||||||
|
import com.google.common.cache.Cache;
|
||||||
|
import com.google.common.cache.CacheBuilder;
|
||||||
|
import com.google.common.cache.CacheLoader;
|
||||||
|
import com.google.common.cache.LoadingCache;
|
||||||
import it.unimi.dsi.fastutil.doubles.DoubleArrayList;
|
import it.unimi.dsi.fastutil.doubles.DoubleArrayList;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Collection;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Set;
|
|
||||||
import org.apache.commons.lang3.NotImplementedException;
|
import org.apache.commons.lang3.NotImplementedException;
|
||||||
import org.apache.lucene.document.DocumentStoredFieldVisitor;
|
|
||||||
import org.apache.lucene.facet.FacetResult;
|
import org.apache.lucene.facet.FacetResult;
|
||||||
import org.apache.lucene.facet.Facets;
|
import org.apache.lucene.facet.Facets;
|
||||||
import org.apache.lucene.facet.FacetsCollector;
|
import org.apache.lucene.facet.FacetsCollector;
|
||||||
import org.apache.lucene.facet.FacetsCollectorManager;
|
import org.apache.lucene.facet.FacetsCollectorManager;
|
||||||
|
import org.apache.lucene.facet.FacetsConfig;
|
||||||
import org.apache.lucene.facet.LabelAndValue;
|
import org.apache.lucene.facet.LabelAndValue;
|
||||||
import org.apache.lucene.facet.RandomSamplingFacetsCollector;
|
import org.apache.lucene.facet.RandomSamplingFacetsCollector;
|
||||||
import org.apache.lucene.facet.range.DoubleRange;
|
import org.apache.lucene.facet.range.DoubleRange;
|
||||||
@ -20,29 +22,30 @@ import org.apache.lucene.facet.range.DoubleRangeFacetCounts;
|
|||||||
import org.apache.lucene.facet.range.LongRange;
|
import org.apache.lucene.facet.range.LongRange;
|
||||||
import org.apache.lucene.facet.range.LongRangeFacetCounts;
|
import org.apache.lucene.facet.range.LongRangeFacetCounts;
|
||||||
import org.apache.lucene.facet.range.Range;
|
import org.apache.lucene.facet.range.Range;
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
|
||||||
import org.apache.lucene.search.BooleanClause.Occur;
|
import org.apache.lucene.search.BooleanClause.Occur;
|
||||||
import org.apache.lucene.search.BooleanQuery;
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
|
import org.apache.lucene.search.CachingCollector;
|
||||||
import org.apache.lucene.search.Collector;
|
import org.apache.lucene.search.Collector;
|
||||||
import org.apache.lucene.search.CollectorManager;
|
|
||||||
import org.apache.lucene.search.DoubleValuesSource;
|
import org.apache.lucene.search.DoubleValuesSource;
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
import org.apache.lucene.search.LeafCollector;
|
|
||||||
import org.apache.lucene.search.LongValuesSource;
|
import org.apache.lucene.search.LongValuesSource;
|
||||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
import org.apache.lucene.search.MatchNoDocsQuery;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.search.Scorable;
|
|
||||||
import org.apache.lucene.search.ScoreMode;
|
import org.apache.lucene.search.ScoreMode;
|
||||||
import org.apache.lucene.search.SimpleCollector;
|
|
||||||
import org.jetbrains.annotations.Nullable;
|
import org.jetbrains.annotations.Nullable;
|
||||||
|
|
||||||
public class DecimalBucketMultiCollectorManager implements CollectorMultiManager<Buckets, Buckets> {
|
public class DecimalBucketMultiCollectorManager implements CollectorMultiManager<Buckets, Buckets> {
|
||||||
|
|
||||||
private final FacetsCollectorManager facetsCollectorManager;
|
private static final boolean USE_SINGLE_FACET_COLLECTOR = false;
|
||||||
|
private static final boolean AMORTIZE = true;
|
||||||
|
private final boolean randomSamplingEnabled;
|
||||||
|
private final FastFacetsCollectorManager facetsCollectorManager;
|
||||||
|
private final FastRandomSamplingFacetsCollector randomSamplingFacetsCollector;
|
||||||
private final Range[] bucketRanges;
|
private final Range[] bucketRanges;
|
||||||
|
|
||||||
private final List<Query> queries;
|
private final List<Query> queries;
|
||||||
private final @Nullable Query normalizationQuery;
|
private final @Nullable Query normalizationQuery;
|
||||||
|
private final @Nullable Integer collectionRate;
|
||||||
private final @Nullable Integer sampleSize;
|
private final @Nullable Integer sampleSize;
|
||||||
|
|
||||||
private final String bucketField;
|
private final String bucketField;
|
||||||
@ -64,6 +67,7 @@ public class DecimalBucketMultiCollectorManager implements CollectorMultiManager
|
|||||||
BucketValueSource bucketValueSource,
|
BucketValueSource bucketValueSource,
|
||||||
List<Query> queries,
|
List<Query> queries,
|
||||||
@Nullable Query normalizationQuery,
|
@Nullable Query normalizationQuery,
|
||||||
|
@Nullable Integer collectionRate,
|
||||||
@Nullable Integer sampleSize) {
|
@Nullable Integer sampleSize) {
|
||||||
this.queries = queries;
|
this.queries = queries;
|
||||||
this.normalizationQuery = normalizationQuery;
|
this.normalizationQuery = normalizationQuery;
|
||||||
@ -75,6 +79,7 @@ public class DecimalBucketMultiCollectorManager implements CollectorMultiManager
|
|||||||
this.totalLength = bucketLength * bucketsInt;
|
this.totalLength = bucketLength * bucketsInt;
|
||||||
this.bucketField = bucketField;
|
this.bucketField = bucketField;
|
||||||
this.bucketValueSource = bucketValueSource;
|
this.bucketValueSource = bucketValueSource;
|
||||||
|
this.collectionRate = collectionRate;
|
||||||
this.sampleSize = sampleSize;
|
this.sampleSize = sampleSize;
|
||||||
|
|
||||||
if (USE_LONGS) {
|
if (USE_LONGS) {
|
||||||
@ -102,26 +107,15 @@ public class DecimalBucketMultiCollectorManager implements CollectorMultiManager
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
this.facetsCollectorManager = new FacetsCollectorManager() {
|
this.randomSamplingEnabled = sampleSize != null;
|
||||||
@Override
|
int intCollectionRate = this.collectionRate == null ? 1 : this.collectionRate;
|
||||||
public FacetsCollector newCollector() {
|
if (randomSamplingEnabled) {
|
||||||
if (sampleSize != null) {
|
randomSamplingFacetsCollector = new FastRandomSamplingFacetsCollector(intCollectionRate, sampleSize, 0);
|
||||||
return new RandomSamplingFacetsCollector(sampleSize) {
|
this.facetsCollectorManager = null;
|
||||||
@Override
|
} else {
|
||||||
public ScoreMode scoreMode() {
|
this.randomSamplingFacetsCollector = null;
|
||||||
return ScoreMode.COMPLETE_NO_SCORES;
|
this.facetsCollectorManager = new FastFacetsCollectorManager(intCollectionRate);
|
||||||
}
|
}
|
||||||
};
|
|
||||||
} else {
|
|
||||||
return new FacetsCollector(false) {
|
|
||||||
@Override
|
|
||||||
public ScoreMode scoreMode() {
|
|
||||||
return ScoreMode.COMPLETE_NO_SCORES;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public double[] newBuckets() {
|
public double[] newBuckets() {
|
||||||
@ -129,18 +123,28 @@ public class DecimalBucketMultiCollectorManager implements CollectorMultiManager
|
|||||||
}
|
}
|
||||||
|
|
||||||
public Buckets search(IndexSearcher indexSearcher) throws IOException {
|
public Buckets search(IndexSearcher indexSearcher) throws IOException {
|
||||||
Query globalQuery;
|
Query query;
|
||||||
if (normalizationQuery != null) {
|
if (USE_SINGLE_FACET_COLLECTOR && normalizationQuery != null) {
|
||||||
globalQuery = normalizationQuery;
|
query = normalizationQuery;
|
||||||
|
} else if (queries.size() == 0) {
|
||||||
|
query = new MatchNoDocsQuery();
|
||||||
|
} else if (queries.size() == 1) {
|
||||||
|
query = queries.get(0);
|
||||||
} else {
|
} else {
|
||||||
var booleanQueryBuilder = new BooleanQuery.Builder();
|
var booleanQueryBuilder = new BooleanQuery.Builder();
|
||||||
for (Query query : queries) {
|
for (Query queryEntry : queries) {
|
||||||
booleanQueryBuilder.add(query, Occur.SHOULD);
|
booleanQueryBuilder.add(queryEntry, Occur.SHOULD);
|
||||||
}
|
}
|
||||||
booleanQueryBuilder.setMinimumNumberShouldMatch(1);
|
booleanQueryBuilder.setMinimumNumberShouldMatch(1);
|
||||||
globalQuery = booleanQueryBuilder.build();
|
query = booleanQueryBuilder.build();
|
||||||
|
}
|
||||||
|
it.cavallium.dbengine.lucene.collector.FacetsCollector queryFacetsCollector;
|
||||||
|
if (randomSamplingEnabled) {
|
||||||
|
indexSearcher.search(query, randomSamplingFacetsCollector);
|
||||||
|
queryFacetsCollector = randomSamplingFacetsCollector;
|
||||||
|
} else {
|
||||||
|
queryFacetsCollector = indexSearcher.search(query, facetsCollectorManager);
|
||||||
}
|
}
|
||||||
var facetsCollector = indexSearcher.search(globalQuery, facetsCollectorManager);
|
|
||||||
double[] reducedNormalizationBuckets = newBuckets();
|
double[] reducedNormalizationBuckets = newBuckets();
|
||||||
List<DoubleArrayList> seriesReducedBuckets = new ArrayList<>(queries.size());
|
List<DoubleArrayList> seriesReducedBuckets = new ArrayList<>(queries.size());
|
||||||
for (int i = 0; i < queries.size(); i++) {
|
for (int i = 0; i < queries.size(); i++) {
|
||||||
@ -148,7 +152,7 @@ public class DecimalBucketMultiCollectorManager implements CollectorMultiManager
|
|||||||
seriesReducedBuckets.add(DoubleArrayList.wrap(buckets));
|
seriesReducedBuckets.add(DoubleArrayList.wrap(buckets));
|
||||||
}
|
}
|
||||||
int serieIndex = 0;
|
int serieIndex = 0;
|
||||||
for (Query query : queries) {
|
for (Query queryEntry : queries) {
|
||||||
var reducedBuckets = seriesReducedBuckets.get(serieIndex);
|
var reducedBuckets = seriesReducedBuckets.get(serieIndex);
|
||||||
Facets facets;
|
Facets facets;
|
||||||
if (USE_LONGS) {
|
if (USE_LONGS) {
|
||||||
@ -165,8 +169,8 @@ public class DecimalBucketMultiCollectorManager implements CollectorMultiManager
|
|||||||
}
|
}
|
||||||
facets = new LongRangeFacetCounts(bucketField,
|
facets = new LongRangeFacetCounts(bucketField,
|
||||||
valuesSource,
|
valuesSource,
|
||||||
facetsCollector,
|
queryFacetsCollector.getLuceneFacetsCollector(),
|
||||||
query,
|
USE_SINGLE_FACET_COLLECTOR && normalizationQuery != null || queries.size() > 1 ? queryEntry : null,
|
||||||
(LongRange[]) bucketRanges
|
(LongRange[]) bucketRanges
|
||||||
);
|
);
|
||||||
} else {
|
} else {
|
||||||
@ -182,12 +186,19 @@ public class DecimalBucketMultiCollectorManager implements CollectorMultiManager
|
|||||||
}
|
}
|
||||||
facets = new DoubleRangeFacetCounts(bucketField,
|
facets = new DoubleRangeFacetCounts(bucketField,
|
||||||
valuesSource,
|
valuesSource,
|
||||||
facetsCollector,
|
queryFacetsCollector.getLuceneFacetsCollector(),
|
||||||
query,
|
USE_SINGLE_FACET_COLLECTOR && normalizationQuery != null || queries.size() > 1 ? queryEntry : null,
|
||||||
(DoubleRange[]) bucketRanges
|
(DoubleRange[]) bucketRanges
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
FacetResult children = facets.getTopChildren(0, bucketField);
|
FacetResult children = facets.getTopChildren(0, bucketField);
|
||||||
|
if (AMORTIZE && randomSamplingEnabled) {
|
||||||
|
var cfg = new FacetsConfig();
|
||||||
|
for (Range bucketRange : bucketRanges) {
|
||||||
|
cfg.setIndexFieldName(bucketRange.label, bucketField);
|
||||||
|
}
|
||||||
|
((RandomSamplingFacetsCollector) queryFacetsCollector.getLuceneFacetsCollector()).amortizeFacetCounts(children, cfg, indexSearcher);
|
||||||
|
}
|
||||||
for (LabelAndValue labelAndValue : children.labelValues) {
|
for (LabelAndValue labelAndValue : children.labelValues) {
|
||||||
var index = Integer.parseInt(labelAndValue.label);
|
var index = Integer.parseInt(labelAndValue.label);
|
||||||
reducedBuckets.set(index, reducedBuckets.getDouble(index) + labelAndValue.value.doubleValue());
|
reducedBuckets.set(index, reducedBuckets.getDouble(index) + labelAndValue.value.doubleValue());
|
||||||
@ -195,8 +206,17 @@ public class DecimalBucketMultiCollectorManager implements CollectorMultiManager
|
|||||||
serieIndex++;
|
serieIndex++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
it.cavallium.dbengine.lucene.collector.FacetsCollector normalizationFacetsCollector;
|
||||||
Facets normalizationFacets;
|
Facets normalizationFacets;
|
||||||
if (normalizationQuery != null) {
|
if (normalizationQuery != null) {
|
||||||
|
if (USE_SINGLE_FACET_COLLECTOR) {
|
||||||
|
normalizationFacetsCollector = queryFacetsCollector;
|
||||||
|
} else if (randomSamplingEnabled) {
|
||||||
|
indexSearcher.search(normalizationQuery, randomSamplingFacetsCollector);
|
||||||
|
normalizationFacetsCollector = randomSamplingFacetsCollector;
|
||||||
|
} else {
|
||||||
|
normalizationFacetsCollector = indexSearcher.search(normalizationQuery, facetsCollectorManager);
|
||||||
|
}
|
||||||
if (USE_LONGS) {
|
if (USE_LONGS) {
|
||||||
LongValuesSource valuesSource;
|
LongValuesSource valuesSource;
|
||||||
if (bucketValueSource instanceof NullValueSource) {
|
if (bucketValueSource instanceof NullValueSource) {
|
||||||
@ -210,7 +230,7 @@ public class DecimalBucketMultiCollectorManager implements CollectorMultiManager
|
|||||||
}
|
}
|
||||||
normalizationFacets = new LongRangeFacetCounts(bucketField,
|
normalizationFacets = new LongRangeFacetCounts(bucketField,
|
||||||
valuesSource,
|
valuesSource,
|
||||||
facetsCollector,
|
normalizationFacetsCollector.getLuceneFacetsCollector(),
|
||||||
null,
|
null,
|
||||||
(LongRange[]) bucketRanges
|
(LongRange[]) bucketRanges
|
||||||
);
|
);
|
||||||
@ -227,12 +247,19 @@ public class DecimalBucketMultiCollectorManager implements CollectorMultiManager
|
|||||||
}
|
}
|
||||||
normalizationFacets = new DoubleRangeFacetCounts(bucketField,
|
normalizationFacets = new DoubleRangeFacetCounts(bucketField,
|
||||||
valuesSource,
|
valuesSource,
|
||||||
facetsCollector,
|
normalizationFacetsCollector.getLuceneFacetsCollector(),
|
||||||
null,
|
null,
|
||||||
(DoubleRange[]) bucketRanges
|
(DoubleRange[]) bucketRanges
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
var normalizationChildren = normalizationFacets.getTopChildren(0, bucketField);
|
var normalizationChildren = normalizationFacets.getTopChildren(0, bucketField);
|
||||||
|
if (AMORTIZE && randomSamplingEnabled) {
|
||||||
|
var cfg = new FacetsConfig();
|
||||||
|
for (Range bucketRange : bucketRanges) {
|
||||||
|
cfg.setIndexFieldName(bucketRange.label, bucketField);
|
||||||
|
}
|
||||||
|
((RandomSamplingFacetsCollector) normalizationFacetsCollector.getLuceneFacetsCollector()).amortizeFacetCounts(normalizationChildren, cfg, indexSearcher);
|
||||||
|
}
|
||||||
for (LabelAndValue labelAndValue : normalizationChildren.labelValues) {
|
for (LabelAndValue labelAndValue : normalizationChildren.labelValues) {
|
||||||
var index = Integer.parseInt(labelAndValue.label);
|
var index = Integer.parseInt(labelAndValue.label);
|
||||||
reducedNormalizationBuckets[index] += labelAndValue.value.doubleValue();
|
reducedNormalizationBuckets[index] += labelAndValue.value.doubleValue();
|
||||||
|
@ -0,0 +1,32 @@
|
|||||||
|
package it.cavallium.dbengine.lucene.collector;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
|
import org.apache.lucene.search.Collector;
|
||||||
|
import org.apache.lucene.search.LeafCollector;
|
||||||
|
import org.apache.lucene.search.ScoreMode;
|
||||||
|
|
||||||
|
public interface FacetsCollector extends Collector {
|
||||||
|
|
||||||
|
static FacetsCollector wrap(org.apache.lucene.facet.FacetsCollector facetsCollector) {
|
||||||
|
return new FacetsCollector() {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public org.apache.lucene.facet.FacetsCollector getLuceneFacetsCollector() {
|
||||||
|
return facetsCollector;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
|
||||||
|
return facetsCollector.getLeafCollector(context);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ScoreMode scoreMode() {
|
||||||
|
return facetsCollector.scoreMode();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
org.apache.lucene.facet.FacetsCollector getLuceneFacetsCollector();
|
||||||
|
}
|
@ -0,0 +1,92 @@
|
|||||||
|
package it.cavallium.dbengine.lucene.collector;
|
||||||
|
|
||||||
|
import it.cavallium.dbengine.lucene.IntSmear;
|
||||||
|
import it.unimi.dsi.fastutil.ints.IntHash;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.concurrent.atomic.AtomicLong;
|
||||||
|
import org.apache.lucene.facet.FacetsCollectorManager;
|
||||||
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
|
import org.apache.lucene.search.CollectorManager;
|
||||||
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
|
import org.apache.lucene.search.LeafCollector;
|
||||||
|
import org.apache.lucene.search.Scorable;
|
||||||
|
import org.apache.lucene.search.ScoreMode;
|
||||||
|
|
||||||
|
public class FastFacetsCollectorManager implements CollectorManager<FacetsCollector, FacetsCollector> {
|
||||||
|
|
||||||
|
private final int collectionRate;
|
||||||
|
private final IntHash.Strategy hash;
|
||||||
|
private final FacetsCollectorManager facetsCollectorManager;
|
||||||
|
|
||||||
|
public FastFacetsCollectorManager(int collectionRate) {
|
||||||
|
this.collectionRate = collectionRate;
|
||||||
|
this.hash = new IntSmear();
|
||||||
|
this.facetsCollectorManager = new FacetsCollectorManager();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public FacetsCollector newCollector() {
|
||||||
|
return new FastFacetsCollector(collectionRate, hash);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public FacetsCollector reduce(Collection<FacetsCollector> collectors) throws IOException {
|
||||||
|
return FacetsCollector.wrap(facetsCollectorManager.reduce(collectors
|
||||||
|
.stream()
|
||||||
|
.map(FacetsCollector::getLuceneFacetsCollector)
|
||||||
|
.toList()));
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class FastFacetsCollector implements FacetsCollector {
|
||||||
|
|
||||||
|
private final org.apache.lucene.facet.FacetsCollector collector;
|
||||||
|
private final int collectionRate;
|
||||||
|
private final IntHash.Strategy hash;
|
||||||
|
|
||||||
|
public FastFacetsCollector(int collectionRate, IntHash.Strategy hash) {
|
||||||
|
this.collectionRate = collectionRate;
|
||||||
|
this.hash = hash;
|
||||||
|
this.collector = new org.apache.lucene.facet.FacetsCollector(false) {
|
||||||
|
@Override
|
||||||
|
public ScoreMode scoreMode() {
|
||||||
|
return ScoreMode.COMPLETE_NO_SCORES;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public org.apache.lucene.facet.FacetsCollector getLuceneFacetsCollector() {
|
||||||
|
return collector;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
|
||||||
|
var leafCollector = collector.getLeafCollector(context);
|
||||||
|
return new LeafCollector() {
|
||||||
|
@Override
|
||||||
|
public void setScorer(Scorable scorer) throws IOException {
|
||||||
|
leafCollector.setScorer(scorer);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void collect(int doc) throws IOException {
|
||||||
|
if (hash.hashCode(doc) % collectionRate == 0) {
|
||||||
|
leafCollector.collect(doc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DocIdSetIterator competitiveIterator() throws IOException {
|
||||||
|
return leafCollector.competitiveIterator();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ScoreMode scoreMode() {
|
||||||
|
return collector.scoreMode();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,64 @@
|
|||||||
|
package it.cavallium.dbengine.lucene.collector;
|
||||||
|
|
||||||
|
import it.cavallium.dbengine.lucene.IntSmear;
|
||||||
|
import it.unimi.dsi.fastutil.ints.IntHash;
|
||||||
|
import java.io.IOException;
|
||||||
|
import org.apache.lucene.facet.RandomSamplingFacetsCollector;
|
||||||
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
|
import org.apache.lucene.search.Collector;
|
||||||
|
import org.apache.lucene.search.LeafCollector;
|
||||||
|
import org.apache.lucene.search.Scorable;
|
||||||
|
import org.apache.lucene.search.ScoreMode;
|
||||||
|
import org.apache.lucene.search.SimpleCollector;
|
||||||
|
|
||||||
|
public class FastRandomSamplingFacetsCollector extends SimpleCollector implements FacetsCollector {
|
||||||
|
|
||||||
|
private final RandomSamplingFacetsCollector collector;
|
||||||
|
private final int collectionRate;
|
||||||
|
private final IntHash.Strategy hash;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param collectionRate collect 1 document every n collectable documents
|
||||||
|
*/
|
||||||
|
public FastRandomSamplingFacetsCollector(int collectionRate, int sampleSize) {
|
||||||
|
this(collectionRate, sampleSize, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
public FastRandomSamplingFacetsCollector(int collectionRate, int sampleSize, long seed) {
|
||||||
|
this.collectionRate = collectionRate;
|
||||||
|
this.hash = new IntSmear();
|
||||||
|
this.collector = new RandomSamplingFacetsCollector(sampleSize, seed) {
|
||||||
|
@Override
|
||||||
|
public ScoreMode scoreMode() {
|
||||||
|
return ScoreMode.COMPLETE_NO_SCORES;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void doSetNextReader(LeafReaderContext context) throws IOException {
|
||||||
|
collector.getLeafCollector(context);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setScorer(Scorable scorer) throws IOException {
|
||||||
|
collector.setScorer(scorer);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void collect(int doc) throws IOException {
|
||||||
|
if (hash.hashCode(doc) % collectionRate == 0) {
|
||||||
|
collector.collect(doc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ScoreMode scoreMode() {
|
||||||
|
return collector.scoreMode();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public org.apache.lucene.facet.FacetsCollector getLuceneFacetsCollector() {
|
||||||
|
return collector;
|
||||||
|
}
|
||||||
|
}
|
@ -5,4 +5,5 @@ import org.jetbrains.annotations.NotNull;
|
|||||||
import org.jetbrains.annotations.Nullable;
|
import org.jetbrains.annotations.Nullable;
|
||||||
|
|
||||||
public record BucketParams(double min, double max, int buckets, String bucketFieldName,
|
public record BucketParams(double min, double max, int buckets, String bucketFieldName,
|
||||||
@NotNull BucketValueSource valueSource, @Nullable Integer sampleSize) {}
|
@NotNull BucketValueSource valueSource, @Nullable Integer collectionRate,
|
||||||
|
@Nullable Integer sampleSize) {}
|
||||||
|
@ -46,6 +46,7 @@ public class DecimalBucketMultiSearcher {
|
|||||||
bucketParams.valueSource(),
|
bucketParams.valueSource(),
|
||||||
queries,
|
queries,
|
||||||
normalizationQuery,
|
normalizationQuery,
|
||||||
|
bucketParams.collectionRate(),
|
||||||
bucketParams.sampleSize()
|
bucketParams.sampleSize()
|
||||||
);
|
);
|
||||||
})
|
})
|
||||||
|
Loading…
Reference in New Issue
Block a user