2021-02-04 22:42:57 +01:00
package it.cavallium.dbengine.lucene ;
2021-09-18 18:34:21 +02:00
import io.net5.buffer.api.Resource ;
import io.net5.buffer.api.Send ;
2021-03-11 14:45:45 +01:00
import it.cavallium.dbengine.client.CompositeSnapshot ;
2021-05-28 16:04:59 +02:00
import it.cavallium.dbengine.client.IndicizerAnalyzers ;
import it.cavallium.dbengine.client.IndicizerSimilarities ;
2021-07-18 19:37:24 +02:00
import it.cavallium.dbengine.client.query.BasicType ;
2021-07-06 01:30:37 +02:00
import it.cavallium.dbengine.client.query.QueryParser ;
import it.cavallium.dbengine.client.query.current.data.QueryParams ;
2021-08-04 01:12:39 +02:00
import it.cavallium.dbengine.client.query.current.data.TotalHitsCount ;
2021-07-08 17:01:56 +02:00
import it.cavallium.dbengine.database.LLKeyScore ;
2021-07-18 19:37:24 +02:00
import it.cavallium.dbengine.database.LLScoreMode ;
2021-03-11 14:45:45 +01:00
import it.cavallium.dbengine.database.collections.DatabaseMapDictionary ;
import it.cavallium.dbengine.database.collections.DatabaseMapDictionaryDeep ;
2021-07-17 11:52:08 +02:00
import it.cavallium.dbengine.database.collections.ValueGetter ;
2021-02-05 20:34:58 +01:00
import it.cavallium.dbengine.lucene.analyzer.NCharGramAnalyzer ;
import it.cavallium.dbengine.lucene.analyzer.NCharGramEdgeAnalyzer ;
2021-02-04 22:42:57 +01:00
import it.cavallium.dbengine.lucene.analyzer.TextFieldsAnalyzer ;
import it.cavallium.dbengine.lucene.analyzer.TextFieldsSimilarity ;
import it.cavallium.dbengine.lucene.analyzer.WordAnalyzer ;
2021-07-08 17:01:56 +02:00
import it.cavallium.dbengine.lucene.searcher.IndexSearchers ;
2021-07-06 01:30:37 +02:00
import it.cavallium.dbengine.lucene.searcher.LocalQueryParams ;
2021-07-08 17:01:56 +02:00
import it.cavallium.dbengine.lucene.searcher.LuceneMultiSearcher ;
2021-02-04 22:42:57 +01:00
import it.cavallium.dbengine.lucene.similarity.NGramSimilarity ;
2021-07-01 21:19:52 +02:00
import java.io.EOFException ;
2021-02-14 13:46:11 +01:00
import java.io.IOException ;
2021-07-01 21:19:52 +02:00
import java.nio.ByteBuffer ;
import java.nio.channels.FileChannel ;
2021-07-08 18:54:53 +02:00
import java.util.Arrays ;
2021-07-08 17:01:56 +02:00
import java.util.Comparator ;
2021-05-28 16:04:59 +02:00
import java.util.HashMap ;
2021-03-11 14:45:45 +01:00
import java.util.Map ;
import java.util.Map.Entry ;
2021-07-17 23:06:26 +02:00
import java.util.NoSuchElementException ;
2021-07-30 14:01:12 +02:00
import java.util.Objects ;
2021-02-14 13:46:11 +01:00
import java.util.Set ;
2021-07-30 14:01:12 +02:00
import java.util.function.Function ;
2021-07-17 23:06:26 +02:00
import java.util.stream.Collectors ;
2021-02-04 22:42:57 +01:00
import org.apache.lucene.analysis.Analyzer ;
import org.apache.lucene.analysis.LowerCaseFilter ;
import org.apache.lucene.analysis.TokenStream ;
import org.apache.lucene.analysis.en.EnglishPossessiveFilter ;
import org.apache.lucene.analysis.en.KStemFilter ;
import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter ;
2021-05-28 16:04:59 +02:00
import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper ;
2021-02-04 22:42:57 +01:00
import org.apache.lucene.analysis.standard.StandardAnalyzer ;
2021-02-14 13:46:11 +01:00
import org.apache.lucene.document.Document ;
2021-07-06 00:30:14 +02:00
import org.apache.lucene.index.IndexReader ;
2021-02-14 13:46:11 +01:00
import org.apache.lucene.index.IndexableField ;
2021-07-06 00:30:14 +02:00
import org.apache.lucene.search.FieldDoc ;
import org.apache.lucene.search.ScoreDoc ;
2021-07-08 17:01:56 +02:00
import org.apache.lucene.search.Sort ;
import org.apache.lucene.search.TopDocs ;
import org.apache.lucene.search.TopFieldDocs ;
2021-08-04 01:12:39 +02:00
import org.apache.lucene.search.TotalHits ;
2021-02-04 22:42:57 +01:00
import org.apache.lucene.search.similarities.BooleanSimilarity ;
import org.apache.lucene.search.similarities.ClassicSimilarity ;
2021-05-28 16:04:59 +02:00
import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper ;
2021-02-04 22:42:57 +01:00
import org.apache.lucene.search.similarities.Similarity ;
2021-07-17 23:06:26 +02:00
import org.jetbrains.annotations.NotNull ;
2021-02-04 22:42:57 +01:00
import org.jetbrains.annotations.Nullable ;
import org.novasearch.lucene.search.similarities.BM25Similarity ;
import org.novasearch.lucene.search.similarities.BM25Similarity.BM25Model ;
import org.novasearch.lucene.search.similarities.LdpSimilarity ;
import org.novasearch.lucene.search.similarities.LtcSimilarity ;
import org.novasearch.lucene.search.similarities.RobertsonSimilarity ;
2021-07-08 17:01:56 +02:00
import org.reactivestreams.Publisher ;
2021-07-17 23:06:26 +02:00
import org.warp.commonutils.log.Logger ;
import org.warp.commonutils.log.LoggerFactory ;
2021-07-08 17:01:56 +02:00
import reactor.core.publisher.Flux ;
import reactor.core.publisher.Mono ;
import reactor.core.scheduler.Scheduler ;
2021-09-07 11:28:03 +02:00
import reactor.core.scheduler.Schedulers ;
2021-09-18 18:34:21 +02:00
import reactor.util.concurrent.Queues ;
2021-02-04 22:42:57 +01:00
public class LuceneUtils {
2021-07-17 23:06:26 +02:00
private static final Logger logger = LoggerFactory . getLogger ( LuceneUtils . class ) ;
2021-02-05 20:34:58 +01:00
private static final Analyzer lucene4GramWordsAnalyzerEdgeInstance = new NCharGramEdgeAnalyzer ( true , 4 , 4 ) ;
private static final Analyzer lucene4GramStringAnalyzerEdgeInstance = new NCharGramEdgeAnalyzer ( false , 4 , 4 ) ;
private static final Analyzer lucene4GramWordsAnalyzerInstance = new NCharGramAnalyzer ( true , 4 , 4 ) ;
private static final Analyzer lucene4GramStringAnalyzerInstance = new NCharGramAnalyzer ( false , 4 , 4 ) ;
private static final Analyzer lucene3To5GramWordsAnalyzerEdgeInstance = new NCharGramEdgeAnalyzer ( true , 3 , 5 ) ;
private static final Analyzer lucene3To5GramStringAnalyzerEdgeInstance = new NCharGramEdgeAnalyzer ( false , 3 , 5 ) ;
private static final Analyzer lucene3To5GramWordsAnalyzerInstance = new NCharGramAnalyzer ( true , 3 , 5 ) ;
private static final Analyzer lucene3To5GramStringAnalyzerInstance = new NCharGramAnalyzer ( false , 3 , 5 ) ;
2021-02-04 22:42:57 +01:00
private static final Analyzer luceneStandardAnalyzerInstance = new StandardAnalyzer ( ) ;
2021-05-28 16:04:59 +02:00
private static final Analyzer luceneWordAnalyzerStopWordsAndStemInstance = new WordAnalyzer ( false , true , true ) ;
private static final Analyzer luceneWordAnalyzerStopWordsInstance = new WordAnalyzer ( false , true , false ) ;
private static final Analyzer luceneWordAnalyzerStemInstance = new WordAnalyzer ( false , false , true ) ;
private static final Analyzer luceneWordAnalyzerSimpleInstance = new WordAnalyzer ( false , false , false ) ;
private static final Analyzer luceneICUCollationKeyInstance = new WordAnalyzer ( false , true , true ) ;
2021-02-04 22:42:57 +01:00
private static final Similarity luceneBM25ClassicSimilarityInstance = new BM25Similarity ( BM25Model . CLASSIC ) ;
private static final Similarity luceneBM25PlusSimilarityInstance = new BM25Similarity ( BM25Model . PLUS ) ;
private static final Similarity luceneBM25LSimilarityInstance = new BM25Similarity ( BM25Model . L ) ;
private static final Similarity luceneBM15PlusSimilarityInstance = new BM25Similarity ( 1 . 2f , 0 . 0f , 0 . 5f , BM25Model . PLUS ) ;
private static final Similarity luceneBM11PlusSimilarityInstance = new BM25Similarity ( 1 . 2f , 1 . 0f , 0 . 5f , BM25Model . PLUS ) ;
private static final Similarity luceneBM25ClassicNGramSimilarityInstance = NGramSimilarity . bm25 ( BM25Model . CLASSIC ) ;
private static final Similarity luceneBM25PlusNGramSimilarityInstance = NGramSimilarity . bm25 ( BM25Model . PLUS ) ;
private static final Similarity luceneBM25LNGramSimilarityInstance = NGramSimilarity . bm25 ( BM25Model . L ) ;
private static final Similarity luceneBM15PlusNGramSimilarityInstance = NGramSimilarity . bm15 ( BM25Model . PLUS ) ;
private static final Similarity luceneBM11PlusNGramSimilarityInstance = NGramSimilarity . bm11 ( BM25Model . PLUS ) ;
private static final Similarity luceneClassicSimilarityInstance = new ClassicSimilarity ( ) ;
private static final Similarity luceneClassicNGramSimilarityInstance = NGramSimilarity . classic ( ) ;
private static final Similarity luceneLTCSimilarityInstance = new LtcSimilarity ( ) ;
private static final Similarity luceneLDPSimilarityInstance = new LdpSimilarity ( ) ;
private static final Similarity luceneLDPNoLengthSimilarityInstance = new LdpSimilarity ( 0 , 0 . 5f ) ;
private static final Similarity luceneBooleanSimilarityInstance = new BooleanSimilarity ( ) ;
private static final Similarity luceneRobertsonSimilarityInstance = new RobertsonSimilarity ( ) ;
2021-05-28 16:04:59 +02:00
@SuppressWarnings ( " DuplicatedCode " )
2021-02-04 22:42:57 +01:00
public static Analyzer getAnalyzer ( TextFieldsAnalyzer analyzer ) {
2021-05-28 16:04:59 +02:00
return switch ( analyzer ) {
case N4GramPartialWords - > lucene4GramWordsAnalyzerInstance ;
case N4GramPartialString - > lucene4GramStringAnalyzerInstance ;
case N4GramPartialWordsEdge - > lucene4GramWordsAnalyzerEdgeInstance ;
case N4GramPartialStringEdge - > lucene4GramStringAnalyzerEdgeInstance ;
case N3To5GramPartialWords - > lucene3To5GramWordsAnalyzerInstance ;
case N3To5GramPartialString - > lucene3To5GramStringAnalyzerInstance ;
case N3To5GramPartialWordsEdge - > lucene3To5GramWordsAnalyzerEdgeInstance ;
case N3To5GramPartialStringEdge - > lucene3To5GramStringAnalyzerEdgeInstance ;
case Standard - > luceneStandardAnalyzerInstance ;
case FullText - > luceneWordAnalyzerStopWordsAndStemInstance ;
case WordWithStopwordsStripping - > luceneWordAnalyzerStopWordsInstance ;
case WordWithStemming - > luceneWordAnalyzerStemInstance ;
case WordSimple - > luceneWordAnalyzerSimpleInstance ;
case ICUCollationKey - > luceneICUCollationKeyInstance ;
//noinspection UnnecessaryDefault
default - > throw new UnsupportedOperationException ( " Unknown analyzer: " + analyzer ) ;
} ;
2021-02-04 22:42:57 +01:00
}
2021-05-28 16:04:59 +02:00
@SuppressWarnings ( " DuplicatedCode " )
2021-02-04 22:42:57 +01:00
public static Similarity getSimilarity ( TextFieldsSimilarity similarity ) {
2021-05-28 16:04:59 +02:00
return switch ( similarity ) {
case BM25Classic - > luceneBM25ClassicSimilarityInstance ;
case NGramBM25Classic - > luceneBM25ClassicNGramSimilarityInstance ;
case BM25L - > luceneBM25LSimilarityInstance ;
case NGramBM25L - > luceneBM25LNGramSimilarityInstance ;
case Classic - > luceneClassicSimilarityInstance ;
case NGramClassic - > luceneClassicNGramSimilarityInstance ;
case BM25Plus - > luceneBM25PlusSimilarityInstance ;
case NGramBM25Plus - > luceneBM25PlusNGramSimilarityInstance ;
case BM15Plus - > luceneBM15PlusSimilarityInstance ;
case NGramBM15Plus - > luceneBM15PlusNGramSimilarityInstance ;
case BM11Plus - > luceneBM11PlusSimilarityInstance ;
case NGramBM11Plus - > luceneBM11PlusNGramSimilarityInstance ;
case LTC - > luceneLTCSimilarityInstance ;
case LDP - > luceneLDPSimilarityInstance ;
case LDPNoLength - > luceneLDPNoLengthSimilarityInstance ;
case Robertson - > luceneRobertsonSimilarityInstance ;
case Boolean - > luceneBooleanSimilarityInstance ;
//noinspection UnnecessaryDefault
default - > throw new IllegalStateException ( " Unknown similarity: " + similarity ) ;
} ;
2021-02-04 22:42:57 +01:00
}
/ * *
*
* @param stem Enable stem filters on words .
* Pass false if it will be used with a n - gram filter
* /
public static TokenStream newCommonFilter ( TokenStream tokenStream , boolean stem ) {
tokenStream = newCommonNormalizer ( tokenStream ) ;
if ( stem ) {
tokenStream = new KStemFilter ( tokenStream ) ;
tokenStream = new EnglishPossessiveFilter ( tokenStream ) ;
}
return tokenStream ;
}
public static TokenStream newCommonNormalizer ( TokenStream tokenStream ) {
tokenStream = new ASCIIFoldingFilter ( tokenStream ) ;
tokenStream = new LowerCaseFilter ( tokenStream ) ;
return tokenStream ;
}
2021-07-04 01:34:17 +02:00
/ * *
*
2021-07-05 12:05:45 +02:00
* @return false if the result is not relevant
2021-07-04 01:34:17 +02:00
* /
@Nullable
2021-07-05 12:05:45 +02:00
public static boolean filterTopDoc ( float score , Float minCompetitiveScore ) {
return minCompetitiveScore = = null | | score > = minCompetitiveScore ;
}
2021-07-17 23:06:26 +02:00
/ * *
* @throws NoSuchElementException when the key is not found
* @throws IOException when an error occurs when reading the document
* /
@NotNull
2021-07-08 17:01:56 +02:00
public static String keyOfTopDoc ( int docId , IndexReader indexReader ,
2021-07-17 23:06:26 +02:00
String keyFieldName ) throws IOException , NoSuchElementException {
2021-09-05 14:23:46 +02:00
if ( Schedulers . isInNonBlockingThread ( ) ) {
throw new UnsupportedOperationException ( " Called keyOfTopDoc in a nonblocking thread " ) ;
}
2021-07-06 00:30:14 +02:00
if ( docId > indexReader . maxDoc ( ) ) {
2021-07-08 17:01:56 +02:00
throw new IOException ( " Document " + docId + " > maxDoc ( " + indexReader . maxDoc ( ) + " ) " ) ;
2021-07-06 00:30:14 +02:00
}
2021-07-27 00:32:30 +02:00
DocumentStoredSingleFieldVisitor visitor = new DocumentStoredSingleFieldVisitor ( keyFieldName ) ;
indexReader . document ( docId , visitor ) ;
Document d = visitor . getDocument ( ) ;
2021-07-05 12:05:45 +02:00
if ( d . getFields ( ) . isEmpty ( ) ) {
2021-07-17 23:06:26 +02:00
throw new NoSuchElementException (
" Can't get key (field \" " + keyFieldName + " \" ) of document docId: " + docId + " . Available fields: [] " ) ;
2021-07-04 01:34:17 +02:00
} else {
2021-07-05 12:05:45 +02:00
var field = d . getField ( keyFieldName ) ;
if ( field = = null ) {
2021-07-17 23:06:26 +02:00
throw new NoSuchElementException (
" Can't get key (field \" " + keyFieldName + " \" ) of document docId: " + docId + " . Available fields: " + d
. getFields ( )
. stream ( )
. map ( IndexableField : : name )
. collect ( Collectors . joining ( " , " , " [ " , " ] " ) ) ) ;
2021-07-05 12:05:45 +02:00
} else {
return field . stringValue ( ) ;
}
2021-07-04 01:34:17 +02:00
}
}
2021-03-11 14:45:45 +01:00
public static < T , U , V > ValueGetter < Entry < T , U > , V > getAsyncDbValueGetterDeep (
CompositeSnapshot snapshot ,
DatabaseMapDictionaryDeep < T , Map < U , V > , DatabaseMapDictionary < U , V > > dictionaryDeep ) {
return entry - > dictionaryDeep
. at ( snapshot , entry . getKey ( ) )
2021-05-12 21:41:47 +02:00
. flatMap ( sub - > sub . getValue ( snapshot , entry . getValue ( ) ) . doAfterTerminate ( sub : : release ) ) ;
2021-03-11 14:45:45 +01:00
}
2021-05-28 16:04:59 +02:00
public static PerFieldAnalyzerWrapper toPerFieldAnalyzerWrapper ( IndicizerAnalyzers indicizerAnalyzers ) {
HashMap < String , Analyzer > perFieldAnalyzer = new HashMap < > ( ) ;
indicizerAnalyzers
. fieldAnalyzer ( )
. forEach ( ( key , value ) - > perFieldAnalyzer . put ( key , LuceneUtils . getAnalyzer ( value ) ) ) ;
return new PerFieldAnalyzerWrapper ( LuceneUtils . getAnalyzer ( indicizerAnalyzers . defaultAnalyzer ( ) ) , perFieldAnalyzer ) ;
}
public static PerFieldSimilarityWrapper toPerFieldSimilarityWrapper ( IndicizerSimilarities indicizerSimilarities ) {
HashMap < String , Similarity > perFieldSimilarity = new HashMap < > ( ) ;
indicizerSimilarities
. fieldSimilarity ( )
. forEach ( ( key , value ) - > perFieldSimilarity . put ( key , LuceneUtils . getSimilarity ( value ) ) ) ;
var defaultSimilarity = LuceneUtils . getSimilarity ( indicizerSimilarities . defaultSimilarity ( ) ) ;
return new PerFieldSimilarityWrapper ( ) {
@Override
public Similarity get ( String name ) {
return perFieldSimilarity . getOrDefault ( name , defaultSimilarity ) ;
}
} ;
}
2021-07-01 21:19:52 +02:00
public static int alignUnsigned ( int number , boolean expand ) {
if ( number % 4096 ! = 0 ) {
if ( expand ) {
return number + ( 4096 - ( number % 4096 ) ) ;
} else {
return number - ( number % 4096 ) ;
}
} else {
return number ;
}
}
public static long alignUnsigned ( long number , boolean expand ) {
if ( number % 4096L ! = 0 ) {
if ( expand ) {
return number + ( 4096L - ( number % 4096L ) ) ;
} else {
return number - ( number % 4096L ) ;
}
} else {
return number ;
}
}
2021-09-05 14:23:46 +02:00
public static void readInternalAligned ( Object ref ,
FileChannel channel ,
long pos ,
ByteBuffer b ,
int readLength ,
int usefulLength ,
long end ) throws IOException {
if ( Schedulers . isInNonBlockingThread ( ) ) {
throw new UnsupportedOperationException ( " Called readInternalAligned in a nonblocking thread " ) ;
}
2021-07-01 21:19:52 +02:00
int startBufPosition = b . position ( ) ;
int readData = 0 ;
int i ;
for ( ; readLength > 0 ; readLength - = i ) {
int toRead = readLength ;
b . limit ( b . position ( ) + toRead ) ;
assert b . remaining ( ) = = toRead ;
var beforeReadBufPosition = b . position ( ) ;
channel . read ( b , pos ) ;
b . limit ( Math . min ( startBufPosition + usefulLength , b . position ( ) + toRead ) ) ;
var afterReadBufPosition = b . position ( ) ;
i = ( afterReadBufPosition - beforeReadBufPosition ) ;
readData + = i ;
if ( i < toRead & & i > 0 ) {
if ( readData < usefulLength ) {
throw new EOFException ( " read past EOF: " + ref + " buffer: " + b + " chunkLen: " + toRead + " end: " + end ) ;
}
if ( readData = = usefulLength ) {
b . limit ( b . position ( ) ) ;
// File end reached
return ;
}
}
if ( i < 0 ) {
throw new EOFException ( " read past EOF: " + ref + " buffer: " + b + " chunkLen: " + toRead + " end: " + end ) ;
}
assert i > 0 : " FileChannel.read with non zero-length bb.remaining() must always read at least one byte (FileChannel is in blocking mode, see spec of ReadableByteChannel) " ;
pos + = ( long ) i ;
}
assert readLength = = 0 ;
}
2021-07-06 00:30:14 +02:00
public static int safeLongToInt ( long l ) {
if ( l > 2147483630 ) {
return 2147483630 ;
} else if ( l < - 2147483630 ) {
return - 2147483630 ;
} else {
return ( int ) l ;
}
}
@Nullable
public static FieldDoc getLastFieldDoc ( ScoreDoc [ ] scoreDocs ) {
if ( scoreDocs = = null ) {
return null ;
}
if ( scoreDocs . length = = 0 ) {
return null ;
}
return ( FieldDoc ) scoreDocs [ scoreDocs . length - 1 ] ;
}
@Nullable
public static ScoreDoc getLastScoreDoc ( ScoreDoc [ ] scoreDocs ) {
if ( scoreDocs = = null ) {
return null ;
}
if ( scoreDocs . length = = 0 ) {
return null ;
}
return scoreDocs [ scoreDocs . length - 1 ] ;
}
2021-07-06 01:30:37 +02:00
public static LocalQueryParams toLocalQueryParams ( QueryParams queryParams ) {
return new LocalQueryParams ( QueryParser . toQuery ( queryParams . query ( ) ) ,
safeLongToInt ( queryParams . offset ( ) ) ,
safeLongToInt ( queryParams . limit ( ) ) ,
queryParams . minCompetitiveScore ( ) . getNullable ( ) ,
QueryParser . toSort ( queryParams . sort ( ) ) ,
QueryParser . toScoreMode ( queryParams . scoreMode ( ) )
) ;
}
2021-07-08 17:01:56 +02:00
2021-09-18 18:34:21 +02:00
public static Flux < LLKeyScore > convertHits ( Flux < ScoreDoc > hitsFlux ,
2021-07-08 17:01:56 +02:00
IndexSearchers indexSearchers ,
String keyFieldName ,
2021-07-30 14:01:12 +02:00
boolean preserveOrder ) {
2021-09-18 18:34:21 +02:00
if ( preserveOrder ) {
return hitsFlux
. publishOn ( Schedulers . boundedElastic ( ) )
. mapNotNull ( hit - > mapHitBlocking ( hit , indexSearchers , keyFieldName ) ) ;
} else {
// Compute parallelism
var availableProcessors = Runtime . getRuntime ( ) . availableProcessors ( ) ;
var min = Queues . XS_BUFFER_SIZE ;
var maxParallelGroups = Math . max ( availableProcessors , min ) ;
2021-07-08 17:01:56 +02:00
2021-09-18 18:34:21 +02:00
return hitsFlux
. groupBy ( hit - > hit . shardIndex % maxParallelGroups ) // Max n groups
. flatMap ( shardHits - > shardHits
. publishOn ( Schedulers . boundedElastic ( ) )
. mapNotNull ( hit - > mapHitBlocking ( hit , indexSearchers , keyFieldName ) ) ,
maxParallelGroups // Max n concurrency. Concurrency must be >= total groups count
) ;
}
2021-07-30 14:01:12 +02:00
}
@Nullable
private static LLKeyScore mapHitBlocking ( ScoreDoc hit ,
IndexSearchers indexSearchers ,
String keyFieldName ) {
2021-09-05 14:23:46 +02:00
if ( Schedulers . isInNonBlockingThread ( ) ) {
throw new UnsupportedOperationException ( " Called mapHitBlocking in a nonblocking thread " ) ;
}
2021-07-30 14:01:12 +02:00
int shardDocId = hit . doc ;
int shardIndex = hit . shardIndex ;
float score = hit . score ;
var indexSearcher = indexSearchers . shard ( shardIndex ) ;
try {
String collectedDoc = keyOfTopDoc ( shardDocId , indexSearcher . getIndexReader ( ) , keyFieldName ) ;
2021-08-24 11:06:25 +02:00
return new LLKeyScore ( shardDocId , score , collectedDoc ) ;
2021-07-30 14:01:12 +02:00
} catch ( NoSuchElementException ex ) {
2021-08-24 11:06:25 +02:00
logger . debug ( " Error: document {} key is not present! " , shardDocId ) ;
2021-07-30 14:01:12 +02:00
return null ;
} catch ( Exception ex ) {
2021-08-24 11:06:25 +02:00
logger . error ( " Failed to read document {} " , shardDocId , ex ) ;
return new LLKeyScore ( shardDocId , score , null ) ;
2021-07-30 14:01:12 +02:00
}
2021-07-08 17:01:56 +02:00
}
/ * *
* Transform a flux of results to take elements while the minimum competitive score is valid
* /
public static Flux < LLKeyScore > filterTopDoc ( Flux < LLKeyScore > flux , LocalQueryParams queryParams ) {
2021-07-08 18:54:53 +02:00
if ( queryParams . scoreMode ( ) . needsScores ( ) & & queryParams . minCompetitiveScore ( ) ! = null ) {
if ( queryParams . sort ( ) ! = null & & queryParams . sort ( ) . needsScores ( ) ) {
return flux . takeWhile ( entry - > LuceneUtils . filterTopDoc ( entry . score ( ) , queryParams . minCompetitiveScore ( ) ) ) ;
} else {
return flux . filter ( entry - > LuceneUtils . filterTopDoc ( entry . score ( ) , queryParams . minCompetitiveScore ( ) ) ) ;
}
2021-07-08 17:01:56 +02:00
} else {
return flux ;
2021-07-08 18:54:53 +02:00
}
2021-07-08 17:01:56 +02:00
}
2021-09-05 14:23:46 +02:00
public static TopDocs mergeTopDocs ( Sort sort ,
@Nullable Integer startN ,
@Nullable Integer topN ,
TopDocs [ ] topDocs ,
Comparator < ScoreDoc > tieBreaker ) {
2021-07-08 18:54:53 +02:00
if ( ( startN = = null ) ! = ( topN = = null ) ) {
throw new IllegalArgumentException ( " You must pass startN and topN together or nothing " ) ;
}
2021-07-08 17:01:56 +02:00
TopDocs result ;
if ( sort ! = null ) {
if ( ! ( topDocs instanceof TopFieldDocs [ ] ) ) {
throw new IllegalStateException ( " Expected TopFieldDocs[], got TopDocs[] " ) ;
}
2021-07-08 18:54:53 +02:00
if ( startN = = null ) {
int defaultTopN = 0 ;
for ( TopDocs td : topDocs ) {
int length = td . scoreDocs . length ;
defaultTopN + = length ;
}
result = TopDocs . merge ( sort , 0 , defaultTopN ,
( TopFieldDocs [ ] ) topDocs ,
tieBreaker
) ;
} else {
result = TopDocs . merge ( sort , startN ,
topN ,
( TopFieldDocs [ ] ) topDocs ,
tieBreaker
) ;
}
2021-07-08 17:01:56 +02:00
} else {
2021-07-08 18:54:53 +02:00
if ( startN = = null ) {
int defaultTopN = 0 ;
for ( TopDocs td : topDocs ) {
int length = td . scoreDocs . length ;
defaultTopN + = length ;
}
result = TopDocs . merge ( 0 ,
defaultTopN ,
topDocs ,
tieBreaker
) ;
} else {
result = TopDocs . merge ( startN ,
topN ,
topDocs ,
tieBreaker
) ;
}
2021-07-08 17:01:56 +02:00
}
return result ;
}
2021-07-17 23:06:26 +02:00
public static int totalHitsThreshold ( ) {
2021-07-18 19:37:24 +02:00
return 1 ;
2021-07-17 23:06:26 +02:00
}
2021-08-04 01:12:39 +02:00
public static TotalHitsCount convertTotalHitsCount ( TotalHits totalHits ) {
return switch ( totalHits . relation ) {
case EQUAL_TO - > TotalHitsCount . of ( totalHits . value , true ) ;
case GREATER_THAN_OR_EQUAL_TO - > TotalHitsCount . of ( totalHits . value , false ) ;
} ;
}
public static TotalHitsCount sum ( TotalHitsCount totalHitsCount , TotalHitsCount totalHitsCount1 ) {
return TotalHitsCount . of ( totalHitsCount . value ( ) + totalHitsCount1 . value ( ) ,
totalHitsCount . exact ( ) & & totalHitsCount1 . exact ( )
) ;
}
2021-08-04 01:16:17 +02:00
@SuppressWarnings ( " unused " )
public static String toHumanReadableString ( TotalHitsCount totalHitsCount ) {
if ( totalHitsCount . exact ( ) ) {
return Long . toString ( totalHitsCount . value ( ) ) ;
} else {
return totalHitsCount . value ( ) + " + " ;
}
}
2021-09-08 21:34:52 +02:00
public static Scheduler newLuceneSearcherScheduler ( boolean multi ) {
return Schedulers . newBoundedElastic (
4 ,
Schedulers . DEFAULT_BOUNDED_ELASTIC_QUEUESIZE ,
multi ? " lucene-searcher-multi " : " lucene-searcher-shard " ,
60 ,
true
) ;
}
2021-02-04 22:42:57 +01:00
}