CavalliumDBEngine/src/main/java/it/cavallium/dbengine/lucene/comparators/TermOrdValComparator.java

package it.cavallium.dbengine.lucene.comparators;

import it.cavallium.dbengine.database.disk.LLTempLMDBEnv;
import it.cavallium.dbengine.lucene.ByteArrayCodec;
import it.cavallium.dbengine.lucene.BytesRefCodec;
import it.cavallium.dbengine.lucene.FloatCodec;
import it.cavallium.dbengine.lucene.IArray;
import it.cavallium.dbengine.lucene.IntCodec;
import it.cavallium.dbengine.lucene.LMDBArray;
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.LeafFieldComparator;
import org.apache.lucene.search.Scorable;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;

/**
 * Sorts by field's natural Term sort order, using ordinals. This is functionally equivalent to
 * {@link org.apache.lucene.search.FieldComparator.TermValComparator}, but it first resolves the
 * string to their relative ordinal positions (using the index returned by {@link
 * org.apache.lucene.index.LeafReader#getSortedDocValues(String)}), and does most comparisons
 * using the ordinals. For medium to large results, this comparator will be much faster than
 * {@link org.apache.lucene.search.FieldComparator.TermValComparator}. For very small result sets
 * it may be slower.
 */
public class TermOrdValComparator extends FieldComparator<BytesRef> implements LeafFieldComparator {
	/* Ords for each slot.
	@lucene.internal */
	final IArray<Integer> ords;

	/* Values for each slot.
	@lucene.internal */
	final IArray<byte[]> values;

	/* Which reader last copied a value into the slot. When
	we compare two slots, we just compare-by-ord if the
	readerGen is the same; else we must compare the
	values (slower).
	@lucene.internal */
	final IArray<Integer> readerGen;

	/* Gen of current reader we are on.
	@lucene.internal */
	int currentReaderGen = -1;

	/* Current reader's doc ord/values.
	@lucene.internal */
	SortedDocValues termsIndex;

	private final String field;

	/* Bottom slot, or -1 if queue isn't full yet
	@lucene.internal */
	int bottomSlot = -1;

	/* Bottom ord (same as ords[bottomSlot] once bottomSlot
	is set).  Cached for faster compares.
	@lucene.internal */
	int bottomOrd;

	/* True if current bottom slot matches the current
	reader.
	@lucene.internal */
	boolean bottomSameReader;

	/* Bottom value (same as values[bottomSlot] once
	 bottomSlot is set).  Cached for faster compares.
	@lucene.internal */
	byte[] bottomValue;

	/** Set by setTopValue. */
	byte[] topValue;

	boolean topSameReader;
	int topOrd;

	/** -1 if missing values are sorted first, 1 if they are sorted last */
	final int missingSortCmp;

	/** Which ordinal to use for a missing value. */
	final int missingOrd;

	/** Creates this, sorting missing values first. */
	public TermOrdValComparator(LLTempLMDBEnv env, int numHits, String field) {
		this(env, numHits, field, false);
	}

	/**
	 * Creates this, with control over how missing values are sorted. Pass sortMissingLast=true to
	 * put missing values at the end.
	 */
	public TermOrdValComparator(LLTempLMDBEnv env, int numHits, String field, boolean sortMissingLast) {
		ords = new LMDBArray<>(env, new IntCodec(), numHits, 0);
		values = new LMDBArray<>(env, new ByteArrayCodec(), numHits, null);
		readerGen = new LMDBArray<>(env, new IntCodec(), numHits, 0);
		this.field = field;
		if (sortMissingLast) {
			missingSortCmp = 1;
			missingOrd = Integer.MAX_VALUE;
		} else {
			missingSortCmp = -1;
			missingOrd = -1;
		}
	}

	private int getOrdForDoc(int doc) throws IOException {
		if (termsIndex.advanceExact(doc)) {
			return termsIndex.ordValue();
		} else {
			return -1;
		}
	}

	@Override
	public int compare(int slot1, int slot2) {
		if ((int) readerGen.getOrDefault(slot2, 0) == readerGen.getOrDefault(slot1, 0)) {
			return ords.getOrDefault(slot1, 0) - ords.getOrDefault(slot2, 0);
		}

		final var val1 = values.get(slot1);
		final var val2 = values.get(slot2);
		if (val1 == null) {
			if (val2 == null) {
				return 0;
			}
			return missingSortCmp;
		} else if (val2 == null) {
			return -missingSortCmp;
		}
		return Arrays.compare(val1, val2);
	}

	@Override
	public int compareBottom(int doc) throws IOException {
		assert bottomSlot != -1;
		int docOrd = getOrdForDoc(doc);
		if (docOrd == -1) {
			docOrd = missingOrd;
		}
		if (bottomSameReader) {
			// ord is precisely comparable, even in the equal case
			return bottomOrd - docOrd;
		} else if (bottomOrd >= docOrd) {
			// the equals case always means bottom is > doc
			// (because we set bottomOrd to the lower bound in
			// setBottom):
			return 1;
		} else {
			return -1;
		}
	}

	@Override
	public void copy(int slot, int doc) throws IOException {
		int ord = getOrdForDoc(doc);
		if (ord == -1) {
			ord = missingOrd;
			values.reset(slot);
		} else {
			assert ord >= 0;
			values.set(slot, copyBytes(termsIndex.lookupOrd(ord)));
		}
		ords.set(slot, ord);
		readerGen.set(slot, currentReaderGen);
	}

	private byte[] copyBytes(BytesRef lookupOrd) {
		if (lookupOrd == null) return null;
		return Arrays.copyOfRange(lookupOrd.bytes, lookupOrd.offset, lookupOrd.length);
	}

	/** Retrieves the SortedDocValues for the field in this segment */
	protected SortedDocValues getSortedDocValues(LeafReaderContext context, String field)
			throws IOException {
		return DocValues.getSorted(context.reader(), field);
	}

	@Override
	public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
		termsIndex = getSortedDocValues(context, field);
		currentReaderGen++;

		if (topValue != null) {
			// Recompute topOrd/SameReader
			int ord = termsIndex.lookupTerm(new BytesRef(topValue));
			if (ord >= 0) {
				topSameReader = true;
				topOrd = ord;
			} else {
				topSameReader = false;
				topOrd = -ord - 2;
			}
		} else {
			topOrd = missingOrd;
			topSameReader = true;
		}
		// System.out.println("  getLeafComparator topOrd=" + topOrd + " topSameReader=" +
		// topSameReader);

		if (bottomSlot != -1) {
			// Recompute bottomOrd/SameReader
			setBottom(bottomSlot);
		}

		return this;
	}

	@Override
	public void setBottom(final int bottom) throws IOException {
		bottomSlot = bottom;

		bottomValue = values.get(bottomSlot);
		if (currentReaderGen == readerGen.getOrDefault(bottomSlot, 0)) {
			bottomOrd = ords.getOrDefault(bottomSlot, 0);
			bottomSameReader = true;
		} else {
			if (bottomValue == null) {
				// missingOrd is null for all segments
				assert ords.getOrDefault(bottomSlot, 0) == missingOrd;
				bottomOrd = missingOrd;
				bottomSameReader = true;
				readerGen.set(bottomSlot, currentReaderGen);
			} else {
				final int ord = termsIndex.lookupTerm(new BytesRef(bottomValue));
				if (ord < 0) {
					bottomOrd = -ord - 2;
					bottomSameReader = false;
				} else {
					bottomOrd = ord;
					// exact value match
					bottomSameReader = true;
					readerGen.set(bottomSlot, currentReaderGen);
					ords.set(bottomSlot, bottomOrd);
				}
			}
		}
	}

	@Override
	public void setTopValue(BytesRef value) {
		// null is fine: it means the last doc of the prior
		// search was missing this value
		topValue = copyBytes(value);
		// System.out.println("setTopValue " + topValue);
	}

	@Override
	public BytesRef value(int slot) {
		return getBytesRef(values.get(slot));
	}

	private BytesRef getBytesRef(byte[] bytes) {
		if (bytes == null) return null;
		return new BytesRef(bytes);
	}

	@Override
	public int compareTop(int doc) throws IOException {

		int ord = getOrdForDoc(doc);
		if (ord == -1) {
			ord = missingOrd;
		}

		if (topSameReader) {
			// ord is precisely comparable, even in the equal
			// case
			// System.out.println("compareTop doc=" + doc + " ord=" + ord + " ret=" + (topOrd-ord));
			return topOrd - ord;
		} else if (ord <= topOrd) {
			// the equals case always means doc is < value
			// (because we set lastOrd to the lower bound)
			return 1;
		} else {
			return -1;
		}
	}

	@Override
	public int compareValues(BytesRef val1, BytesRef val2) {
		if (val1 == null) {
			if (val2 == null) {
				return 0;
			}
			return missingSortCmp;
		} else if (val2 == null) {
			return -missingSortCmp;
		}
		return val1.compareTo(val2);
	}

	@Override
	public void setScorer(Scorable scorer) {}
}