CavalliumDBEngine/src/main/java/it/cavallium/dbengine/lucene/collector/FastRandomSamplingFacetsCollector.java
2023-02-22 16:59:35 +01:00

64 lines
1.8 KiB
Java

package it.cavallium.dbengine.lucene.collector;
import it.cavallium.dbengine.lucene.IntSmear;
import it.unimi.dsi.fastutil.ints.IntHash;
import java.io.IOException;
import org.apache.lucene.facet.RandomSamplingFacetsCollector;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.Scorable;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.SimpleCollector;
public class FastRandomSamplingFacetsCollector extends SimpleCollector implements FacetsCollector {
private final RandomSamplingFacetsCollector collector;
private final int collectionRate;
private final IntHash.Strategy hash;
/**
* @param collectionRate collect 1 document every n collectable documents
*/
public FastRandomSamplingFacetsCollector(int collectionRate, int sampleSize) {
this(collectionRate, sampleSize, 0);
}
public FastRandomSamplingFacetsCollector(int collectionRate, int sampleSize, long seed) {
this.collectionRate = collectionRate;
this.hash = new IntSmear();
this.collector = new RandomSamplingFacetsCollector(sampleSize, seed) {
@Override
public ScoreMode scoreMode() {
return ScoreMode.COMPLETE_NO_SCORES;
}
};
}
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
collector.getLeafCollector(context);
}
@Override
public void setScorer(Scorable scorer) throws IOException {
collector.setScorer(scorer);
}
@Override
public void collect(int doc) throws IOException {
if (collectionRate == 1 || hash.hashCode(doc) % collectionRate == 0) {
collector.collect(doc);
}
}
@Override
public ScoreMode scoreMode() {
return collector.scoreMode();
}
@Override
public org.apache.lucene.facet.FacetsCollector getLuceneFacetsCollector() {
return collector;
}
}