CavalliumDBEngine/src/main/java/it/cavallium/dbengine/lucene/comparators/DocComparator.java

191 lines
6.4 KiB
Java

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package it.cavallium.dbengine.lucene.comparators;
import it.cavallium.dbengine.database.disk.LLTempLMDBEnv;
import it.cavallium.dbengine.lucene.IArray;
import it.cavallium.dbengine.lucene.IntCodec;
import it.cavallium.dbengine.lucene.LMDBArray;
import it.cavallium.dbengine.lucene.LMDBPriorityQueue;
import it.cavallium.dbengine.lucene.LongCodec;
import java.io.IOException;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.LeafFieldComparator;
import org.apache.lucene.search.Scorable;
/** Comparator that sorts by asc _doc */
public class DocComparator extends FieldComparator<Integer> {
private final IArray<Integer> docIDs;
private final boolean enableSkipping; // if skipping functionality should be enabled
private int bottom;
private int topValue;
private boolean topValueSet;
private boolean bottomValueSet;
private boolean hitsThresholdReached;
/** Creates a new comparator based on document ids for {@code numHits} */
public DocComparator(LLTempLMDBEnv env, int numHits, boolean reverse, int sortPost) {
this.docIDs = new LMDBArray<>(env, new IntCodec(), numHits, 0);
// skipping functionality is enabled if we are sorting by _doc in asc order as a primary sort
this.enableSkipping = (!reverse && sortPost == 0);
}
@Override
public int compare(int slot1, int slot2) {
// No overflow risk because docIDs are non-negative
return docIDs.getOrDefault(slot1, 0) - docIDs.getOrDefault(slot2, 0);
}
@Override
public LeafFieldComparator getLeafComparator(LeafReaderContext context) {
// TODO: can we "map" our docIDs to the current
// reader? saves having to then subtract on every
// compare call
return new DocLeafComparator(context);
}
@Override
public void setTopValue(Integer value) {
topValue = value;
topValueSet = true;
}
@Override
public Integer value(int slot) {
return docIDs.getOrDefault(slot, 0);
}
/**
* DocLeafComparator with skipping functionality. When sort by _doc asc, after collecting top N
* matches and enough hits, the comparator can skip all the following documents. When sort by _doc
* asc and "top" document is set after which search should start, the comparator provides an
* iterator that can quickly skip to the desired "top" document.
*/
private class DocLeafComparator implements LeafFieldComparator {
private final int docBase;
private final int minDoc;
private final int maxDoc;
private DocIdSetIterator competitiveIterator; // iterator that starts from topValue
public DocLeafComparator(LeafReaderContext context) {
this.docBase = context.docBase;
if (enableSkipping) {
// Skip docs before topValue, but include docs starting with topValue.
// Including topValue is necessary when doing sort on [_doc, other fields]
// in a distributed search where there are docs from different indices
// with the same docID.
this.minDoc = topValue;
this.maxDoc = context.reader().maxDoc();
this.competitiveIterator = DocIdSetIterator.all(maxDoc);
} else {
this.minDoc = -1;
this.maxDoc = -1;
this.competitiveIterator = null;
}
}
@Override
public void setBottom(int slot) {
bottom = docIDs.getOrDefault(slot, 0);
bottomValueSet = true;
updateIterator();
}
@Override
public int compareBottom(int doc) {
// No overflow risk because docIDs are non-negative
return bottom - (docBase + doc);
}
@Override
public int compareTop(int doc) {
int docValue = docBase + doc;
return Integer.compare(topValue, docValue);
}
@Override
public void copy(int slot, int doc) throws IOException {
docIDs.set(slot, docBase + doc);
}
@Override
public void setScorer(Scorable scorer) throws IOException {
// update an iterator on a new segment
updateIterator();
}
@Override
public DocIdSetIterator competitiveIterator() {
if (enableSkipping == false) {
return null;
} else {
return new DocIdSetIterator() {
private int docID = competitiveIterator.docID();
@Override
public int nextDoc() throws IOException {
return advance(docID + 1);
}
@Override
public int docID() {
return docID;
}
@Override
public long cost() {
return competitiveIterator.cost();
}
@Override
public int advance(int target) throws IOException {
return docID = competitiveIterator.advance(target);
}
};
}
}
@Override
public void setHitsThresholdReached() {
hitsThresholdReached = true;
updateIterator();
}
private void updateIterator() {
if (enableSkipping == false || hitsThresholdReached == false) return;
if (bottomValueSet) {
// since we've collected top N matches, we can early terminate
// Currently early termination on _doc is also implemented in TopFieldCollector, but this
// will be removed
// once all bulk scores uses collectors' iterators
competitiveIterator = DocIdSetIterator.empty();
} else if (topValueSet) {
// skip to the desired top doc
if (docBase + maxDoc <= minDoc) {
competitiveIterator = DocIdSetIterator.empty(); // skip this segment
} else {
int segmentMinDoc = Math.max(competitiveIterator.docID(), minDoc - docBase);
competitiveIterator = new MinDocIterator(segmentMinDoc, maxDoc);
}
}
}
}
}