CavalliumDBEngine/src/main/java/it/cavallium/dbengine/lucene/comparators/DocComparator.java

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package it.cavallium.dbengine.lucene.comparators;

import it.cavallium.dbengine.database.disk.LLTempLMDBEnv;
import it.cavallium.dbengine.lucene.IArray;
import it.cavallium.dbengine.lucene.IntCodec;
import it.cavallium.dbengine.lucene.LMDBArray;
import it.cavallium.dbengine.lucene.LMDBPriorityQueue;
import it.cavallium.dbengine.lucene.LongCodec;
import java.io.IOException;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.LeafFieldComparator;
import org.apache.lucene.search.Scorable;

/** Comparator that sorts by asc _doc */
public class DocComparator extends FieldComparator<Integer> {
  private final IArray<Integer> docIDs;
  private final boolean enableSkipping; // if skipping functionality should be enabled
  private int bottom;
  private int topValue;
  private boolean topValueSet;
  private boolean bottomValueSet;
  private boolean hitsThresholdReached;

  /** Creates a new comparator based on document ids for {@code numHits} */
  public DocComparator(LLTempLMDBEnv env, int numHits, boolean reverse, int sortPost) {
		this.docIDs = new LMDBArray<>(env, new IntCodec(), numHits, 0);
    // skipping functionality is enabled if we are sorting by _doc in asc order as a primary sort
    this.enableSkipping = (!reverse && sortPost == 0);
  }

  @Override
  public int compare(int slot1, int slot2) {
    // No overflow risk because docIDs are non-negative
    return docIDs.getOrDefault(slot1, 0) - docIDs.getOrDefault(slot2, 0);
  }

  @Override
  public LeafFieldComparator getLeafComparator(LeafReaderContext context) {
    // TODO: can we "map" our docIDs to the current
    // reader? saves having to then subtract on every
    // compare call
    return new DocLeafComparator(context);
  }

  @Override
  public void setTopValue(Integer value) {
    topValue = value;
    topValueSet = true;
  }

  @Override
  public Integer value(int slot) {
    return docIDs.getOrDefault(slot, 0);
  }

  /**
   * DocLeafComparator with skipping functionality. When sort by _doc asc, after collecting top N
   * matches and enough hits, the comparator can skip all the following documents. When sort by _doc
   * asc and "top" document is set after which search should start, the comparator provides an
   * iterator that can quickly skip to the desired "top" document.
   */
  private class DocLeafComparator implements LeafFieldComparator {
    private final int docBase;
    private final int minDoc;
    private final int maxDoc;
    private DocIdSetIterator competitiveIterator; // iterator that starts from topValue

    public DocLeafComparator(LeafReaderContext context) {
      this.docBase = context.docBase;
      if (enableSkipping) {
        // Skip docs before topValue, but include docs starting with topValue.
        // Including topValue is necessary when doing sort on [_doc, other fields]
        // in a distributed search where there are docs from different indices
        // with the same docID.
        this.minDoc = topValue;
        this.maxDoc = context.reader().maxDoc();
        this.competitiveIterator = DocIdSetIterator.all(maxDoc);
      } else {
        this.minDoc = -1;
        this.maxDoc = -1;
        this.competitiveIterator = null;
      }
    }

    @Override
    public void setBottom(int slot) {
      bottom = docIDs.getOrDefault(slot, 0);
      bottomValueSet = true;
      updateIterator();
    }

    @Override
    public int compareBottom(int doc) {
      // No overflow risk because docIDs are non-negative
      return bottom - (docBase + doc);
    }

    @Override
    public int compareTop(int doc) {
      int docValue = docBase + doc;
      return Integer.compare(topValue, docValue);
    }

    @Override
    public void copy(int slot, int doc) throws IOException {
      docIDs.set(slot, docBase + doc);
    }

    @Override
    public void setScorer(Scorable scorer) throws IOException {
      // update an iterator on a new segment
      updateIterator();
    }

    @Override
    public DocIdSetIterator competitiveIterator() {
      if (enableSkipping == false) {
        return null;
      } else {
        return new DocIdSetIterator() {
          private int docID = competitiveIterator.docID();

          @Override
          public int nextDoc() throws IOException {
            return advance(docID + 1);
          }

          @Override
          public int docID() {
            return docID;
          }

          @Override
          public long cost() {
            return competitiveIterator.cost();
          }

          @Override
          public int advance(int target) throws IOException {
            return docID = competitiveIterator.advance(target);
          }
        };
      }
    }

    @Override
    public void setHitsThresholdReached() {
      hitsThresholdReached = true;
      updateIterator();
    }

    private void updateIterator() {
      if (enableSkipping == false || hitsThresholdReached == false) return;
      if (bottomValueSet) {
        // since we've collected top N matches, we can early terminate
        // Currently early termination on _doc is also implemented in TopFieldCollector, but this
        // will be removed
        // once all bulk scores uses collectors' iterators
        competitiveIterator = DocIdSetIterator.empty();
      } else if (topValueSet) {
        // skip to the desired top doc
        if (docBase + maxDoc <= minDoc) {
          competitiveIterator = DocIdSetIterator.empty(); // skip this segment
        } else {
          int segmentMinDoc = Math.max(competitiveIterator.docID(), minDoc - docBase);
          competitiveIterator = new MinDocIterator(segmentMinDoc, maxDoc);
        }
      }
    }
  }
}