66 lines
1.8 KiB
Java
66 lines
1.8 KiB
Java
package it.cavallium.dbengine.lucene.collector;
|
|
|
|
import it.cavallium.dbengine.lucene.IntSmear;
|
|
import it.unimi.dsi.fastutil.ints.IntHash;
|
|
import java.io.IOException;
|
|
import org.apache.lucene.facet.RandomSamplingFacetsCollector;
|
|
import org.apache.lucene.index.LeafReaderContext;
|
|
import org.apache.lucene.search.Collector;
|
|
import org.apache.lucene.search.LeafCollector;
|
|
import org.apache.lucene.search.Scorable;
|
|
import org.apache.lucene.search.ScoreMode;
|
|
import org.apache.lucene.search.SimpleCollector;
|
|
|
|
public class FastRandomSamplingFacetsCollector extends SimpleCollector implements FacetsCollector {
|
|
|
|
private final RandomSamplingFacetsCollector collector;
|
|
private final int collectionRate;
|
|
private final IntHash.Strategy hash;
|
|
|
|
/**
|
|
* @param collectionRate collect 1 document every n collectable documents
|
|
*/
|
|
public FastRandomSamplingFacetsCollector(int collectionRate, int sampleSize) {
|
|
this(collectionRate, sampleSize, 0);
|
|
}
|
|
|
|
public FastRandomSamplingFacetsCollector(int collectionRate, int sampleSize, long seed) {
|
|
this.collectionRate = collectionRate;
|
|
this.hash = new IntSmear();
|
|
this.collector = new RandomSamplingFacetsCollector(sampleSize, seed) {
|
|
@Override
|
|
public ScoreMode scoreMode() {
|
|
return ScoreMode.COMPLETE_NO_SCORES;
|
|
}
|
|
};
|
|
}
|
|
|
|
@Override
|
|
protected void doSetNextReader(LeafReaderContext context) throws IOException {
|
|
collector.getLeafCollector(context);
|
|
}
|
|
|
|
@Override
|
|
public void setScorer(Scorable scorer) throws IOException {
|
|
collector.setScorer(scorer);
|
|
}
|
|
|
|
@Override
|
|
public void collect(int doc) throws IOException {
|
|
if (collectionRate == 1 || hash.hashCode(doc) % collectionRate == 0) {
|
|
collector.collect(doc);
|
|
}
|
|
}
|
|
|
|
|
|
@Override
|
|
public ScoreMode scoreMode() {
|
|
return collector.scoreMode();
|
|
}
|
|
|
|
@Override
|
|
public org.apache.lucene.facet.FacetsCollector getLuceneFacetsCollector() {
|
|
return collector;
|
|
}
|
|
}
|