298 lines
11 KiB
Java
298 lines
11 KiB
Java
/*
|
||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||
* contributor license agreements. See the NOTICE file distributed with
|
||
* this work for additional information regarding copyright ownership.
|
||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||
* (the "License"); you may not use this file except in compliance with
|
||
* the License. You may obtain a copy of the License at
|
||
*
|
||
* http://www.apache.org/licenses/LICENSE-2.0
|
||
*
|
||
* Unless required by applicable law or agreed to in writing, software
|
||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
* See the License for the specific language governing permissions and
|
||
* limitations under the License.
|
||
*/
|
||
|
||
package it.cavallium.dbengine.lucene.comparators;
|
||
|
||
import java.io.IOException;
|
||
import org.apache.lucene.index.DocValues;
|
||
import org.apache.lucene.index.FieldInfo;
|
||
import org.apache.lucene.index.LeafReaderContext;
|
||
import org.apache.lucene.index.NumericDocValues;
|
||
import org.apache.lucene.index.PointValues;
|
||
import org.apache.lucene.search.DocIdSetIterator;
|
||
import org.apache.lucene.search.FieldComparator;
|
||
import org.apache.lucene.search.LeafFieldComparator;
|
||
import org.apache.lucene.search.Scorable;
|
||
import org.apache.lucene.search.Scorer;
|
||
import org.apache.lucene.util.ArrayUtil;
|
||
import org.apache.lucene.util.ArrayUtil.ByteArrayComparator;
|
||
import org.apache.lucene.util.DocIdSetBuilder;
|
||
|
||
/**
|
||
* Abstract numeric comparator for comparing numeric values. This comparator provides a skipping
|
||
* functionality – an iterator that can skip over non-competitive documents.
|
||
*/
|
||
public abstract class NumericComparator<T extends Number> extends FieldComparator<T> {
|
||
protected final T missingValue;
|
||
protected final String field;
|
||
protected final boolean reverse;
|
||
private final int bytesCount; // how many bytes are used to encode this number
|
||
private final ByteArrayComparator bytesComparator;
|
||
|
||
protected boolean topValueSet;
|
||
protected boolean singleSort; // singleSort is true, if sort is based on a single sort field.
|
||
protected boolean hitsThresholdReached;
|
||
protected boolean queueFull;
|
||
private boolean canSkipDocuments;
|
||
|
||
protected NumericComparator(
|
||
String field, T missingValue, boolean reverse, int sortPos, int bytesCount) {
|
||
this.field = field;
|
||
this.missingValue = missingValue;
|
||
this.reverse = reverse;
|
||
// skipping functionality is only relevant for primary sort
|
||
this.canSkipDocuments = (sortPos == 0);
|
||
this.bytesCount = bytesCount;
|
||
this.bytesComparator = ArrayUtil.getUnsignedComparator(bytesCount);
|
||
}
|
||
|
||
@Override
|
||
public void setTopValue(T value) {
|
||
topValueSet = true;
|
||
}
|
||
|
||
@Override
|
||
public void setSingleSort() {
|
||
singleSort = true;
|
||
}
|
||
|
||
@Override
|
||
public void disableSkipping() {
|
||
canSkipDocuments = false;
|
||
}
|
||
|
||
/** Leaf comparator for {@link NumericComparator} that provides skipping functionality */
|
||
public abstract class NumericLeafComparator implements LeafFieldComparator {
|
||
protected final NumericDocValues docValues;
|
||
private final PointValues pointValues;
|
||
// if skipping functionality should be enabled on this segment
|
||
private final boolean enableSkipping;
|
||
private final int maxDoc;
|
||
private final byte[] minValueAsBytes;
|
||
private final byte[] maxValueAsBytes;
|
||
|
||
private DocIdSetIterator competitiveIterator;
|
||
private long iteratorCost;
|
||
private int maxDocVisited = -1;
|
||
private int updateCounter = 0;
|
||
|
||
public NumericLeafComparator(LeafReaderContext context) throws IOException {
|
||
this.docValues = getNumericDocValues(context, field);
|
||
this.pointValues = canSkipDocuments ? context.reader().getPointValues(field) : null;
|
||
if (pointValues != null) {
|
||
FieldInfo info = context.reader().getFieldInfos().fieldInfo(field);
|
||
if (info == null || info.getPointDimensionCount() == 0) {
|
||
throw new IllegalStateException(
|
||
"Field "
|
||
+ field
|
||
+ " doesn't index points according to FieldInfos yet returns non-null PointValues");
|
||
} else if (info.getPointDimensionCount() > 1) {
|
||
throw new IllegalArgumentException(
|
||
"Field " + field + " is indexed with multiple dimensions, sorting is not supported");
|
||
} else if (info.getPointNumBytes() != bytesCount) {
|
||
throw new IllegalArgumentException(
|
||
"Field "
|
||
+ field
|
||
+ " is indexed with "
|
||
+ info.getPointNumBytes()
|
||
+ " bytes per dimension, but "
|
||
+ NumericComparator.this
|
||
+ " expected "
|
||
+ bytesCount);
|
||
}
|
||
this.enableSkipping = true; // skipping is enabled when points are available
|
||
this.maxDoc = context.reader().maxDoc();
|
||
this.maxValueAsBytes =
|
||
reverse == false ? new byte[bytesCount] : topValueSet ? new byte[bytesCount] : null;
|
||
this.minValueAsBytes =
|
||
reverse ? new byte[bytesCount] : topValueSet ? new byte[bytesCount] : null;
|
||
this.competitiveIterator = DocIdSetIterator.all(maxDoc);
|
||
this.iteratorCost = maxDoc;
|
||
} else {
|
||
this.enableSkipping = false;
|
||
this.maxDoc = 0;
|
||
this.maxValueAsBytes = null;
|
||
this.minValueAsBytes = null;
|
||
}
|
||
}
|
||
|
||
/** Retrieves the NumericDocValues for the field in this segment */
|
||
protected NumericDocValues getNumericDocValues(LeafReaderContext context, String field)
|
||
throws IOException {
|
||
return DocValues.getNumeric(context.reader(), field);
|
||
}
|
||
|
||
@Override
|
||
public void setBottom(int slot) throws IOException {
|
||
queueFull = true; // if we are setting bottom, it means that we have collected enough hits
|
||
updateCompetitiveIterator(); // update an iterator if we set a new bottom
|
||
}
|
||
|
||
@Override
|
||
public void copy(int slot, int doc) throws IOException {
|
||
maxDocVisited = doc;
|
||
}
|
||
|
||
@Override
|
||
public void setScorer(Scorable scorer) throws IOException {
|
||
if (scorer instanceof Scorer) {
|
||
iteratorCost =
|
||
((Scorer) scorer).iterator().cost(); // starting iterator cost is the scorer's cost
|
||
updateCompetitiveIterator(); // update an iterator when we have a new segment
|
||
}
|
||
}
|
||
|
||
@Override
|
||
public void setHitsThresholdReached() throws IOException {
|
||
hitsThresholdReached = true;
|
||
updateCompetitiveIterator();
|
||
}
|
||
|
||
// update its iterator to include possibly only docs that are "stronger" than the current bottom
|
||
// entry
|
||
private void updateCompetitiveIterator() throws IOException {
|
||
if (enableSkipping == false || hitsThresholdReached == false || queueFull == false) return;
|
||
// if some documents have missing points, check that missing values prohibits optimization
|
||
if ((pointValues.getDocCount() < maxDoc) && isMissingValueCompetitive()) {
|
||
return; // we can't filter out documents, as documents with missing values are competitive
|
||
}
|
||
|
||
updateCounter++;
|
||
if (updateCounter > 256
|
||
&& (updateCounter & 0x1f) != 0x1f) { // Start sampling if we get called too much
|
||
return;
|
||
}
|
||
if (reverse == false) {
|
||
encodeBottom(maxValueAsBytes);
|
||
if (topValueSet) {
|
||
encodeTop(minValueAsBytes);
|
||
}
|
||
} else {
|
||
encodeBottom(minValueAsBytes);
|
||
if (topValueSet) {
|
||
encodeTop(maxValueAsBytes);
|
||
}
|
||
}
|
||
|
||
DocIdSetBuilder result = new DocIdSetBuilder(maxDoc);
|
||
PointValues.IntersectVisitor visitor =
|
||
new PointValues.IntersectVisitor() {
|
||
DocIdSetBuilder.BulkAdder adder;
|
||
|
||
@Override
|
||
public void grow(int count) {
|
||
adder = result.grow(count);
|
||
}
|
||
|
||
@Override
|
||
public void visit(int docID) {
|
||
if (docID <= maxDocVisited) {
|
||
return; // Already visited or skipped
|
||
}
|
||
adder.add(docID);
|
||
}
|
||
|
||
@Override
|
||
public void visit(int docID, byte[] packedValue) {
|
||
if (docID <= maxDocVisited) {
|
||
return; // already visited or skipped
|
||
}
|
||
if (maxValueAsBytes != null) {
|
||
int cmp = bytesComparator.compare(packedValue, 0, maxValueAsBytes, 0);
|
||
// if doc's value is too high or for single sort even equal, it is not competitive
|
||
// and the doc can be skipped
|
||
if (cmp > 0 || (singleSort && cmp == 0)) return;
|
||
}
|
||
if (minValueAsBytes != null) {
|
||
int cmp = bytesComparator.compare(packedValue, 0, minValueAsBytes, 0);
|
||
// if doc's value is too low or for single sort even equal, it is not competitive
|
||
// and the doc can be skipped
|
||
if (cmp < 0 || (singleSort && cmp == 0)) return;
|
||
}
|
||
adder.add(docID); // doc is competitive
|
||
}
|
||
|
||
@Override
|
||
public PointValues.Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
|
||
if (maxValueAsBytes != null) {
|
||
int cmp = bytesComparator.compare(minPackedValue, 0, maxValueAsBytes, 0);
|
||
if (cmp > 0 || (singleSort && cmp == 0))
|
||
return PointValues.Relation.CELL_OUTSIDE_QUERY;
|
||
}
|
||
if (minValueAsBytes != null) {
|
||
int cmp = bytesComparator.compare(maxPackedValue, 0, minValueAsBytes, 0);
|
||
if (cmp < 0 || (singleSort && cmp == 0))
|
||
return PointValues.Relation.CELL_OUTSIDE_QUERY;
|
||
}
|
||
if ((maxValueAsBytes != null
|
||
&& bytesComparator.compare(maxPackedValue, 0, maxValueAsBytes, 0) > 0)
|
||
|| (minValueAsBytes != null
|
||
&& bytesComparator.compare(minPackedValue, 0, minValueAsBytes, 0) < 0)) {
|
||
return PointValues.Relation.CELL_CROSSES_QUERY;
|
||
}
|
||
return PointValues.Relation.CELL_INSIDE_QUERY;
|
||
}
|
||
};
|
||
final long threshold = iteratorCost >>> 3;
|
||
long estimatedNumberOfMatches =
|
||
pointValues.estimatePointCount(visitor); // runs in O(log(numPoints))
|
||
if (estimatedNumberOfMatches >= threshold) {
|
||
// the new range is not selective enough to be worth materializing, it doesn't reduce number
|
||
// of docs at least 8x
|
||
return;
|
||
}
|
||
pointValues.intersect(visitor);
|
||
competitiveIterator = result.build().iterator();
|
||
iteratorCost = competitiveIterator.cost();
|
||
}
|
||
|
||
@Override
|
||
public DocIdSetIterator competitiveIterator() {
|
||
if (enableSkipping == false) return null;
|
||
return new DocIdSetIterator() {
|
||
private int docID = competitiveIterator.docID();
|
||
|
||
@Override
|
||
public int nextDoc() throws IOException {
|
||
return advance(docID + 1);
|
||
}
|
||
|
||
@Override
|
||
public int docID() {
|
||
return docID;
|
||
}
|
||
|
||
@Override
|
||
public long cost() {
|
||
return competitiveIterator.cost();
|
||
}
|
||
|
||
@Override
|
||
public int advance(int target) throws IOException {
|
||
return docID = competitiveIterator.advance(target);
|
||
}
|
||
};
|
||
}
|
||
|
||
protected abstract boolean isMissingValueCompetitive();
|
||
|
||
protected abstract void encodeBottom(byte[] packedValue);
|
||
|
||
protected abstract void encodeTop(byte[] packedValue);
|
||
}
|
||
}
|