the-superpirate 43be16e4bc - [nexus] Update schema
- [nexus] Remove outdated protos
  - [nexus] Development
  - [nexus] Development
  - [nexus] Development
  - [nexus] Development
  - [nexus] Development
  - [nexus] Refactor views
  - [nexus] Update aiosumma
  - [nexus] Add tags
  - [nexus] Development
  - [nexus] Update repository
  - [nexus] Update repository
  - [nexus] Update dependencies
  - [nexus] Update dependencies
  - [nexus] Fixes for MetaAPI
  - [nexus] Support for new queries
  - [nexus] Adopt new versions of search
  - [nexus] Improving Nexus
  - [nexus] Various fixes
  - [nexus] Add profile
  - [nexus] Fixes for ingestion
  - [nexus] Refactorings and bugfixes
  - [idm] Add profile methods
  - [nexus] Fix stalled nexus-meta bugs
  - [nexus] Various bugfixes
  - [nexus] Restore IDM API functionality

GitOrigin-RevId: a0842345a6dde5b321279ab5510a50c0def0e71a
2022-09-02 19:15:47 +03:00

68 lines
2.3 KiB
Python

import heapq
from typing import List
from summa.proto import search_service_pb2
class TopDocsIterator:
def __init__(self, top_docs_collector: search_service_pb2.TopDocsCollectorOutput):
self.top_docs_collector = top_docs_collector
self._current = 0
def __lt__(self, other: 'TopDocsIterator'):
self_score = self.current().score
other_score = other.current().score
self_score = getattr(self_score, self_score.WhichOneof('score'))
other_score = getattr(other_score, other_score.WhichOneof('score'))
return self_score > other_score
def __iter__(self):
return self
def __next__(self) -> search_service_pb2.ScoredDocument:
if self.has_any():
item = self.current()
self._current += 1
return item
raise StopIteration
def current(self):
return self.top_docs_collector.scored_documents[self._current]
def has_next(self) -> bool:
return self.top_docs_collector.has_next
def has_any(self) -> bool:
return self._current < len(self.top_docs_collector.scored_documents)
class TopDocsMerger:
def __init__(self, top_docs_collectors: List[search_service_pb2.TopDocsCollectorOutput]):
self.top_docs_heap = []
for top_docs_collector in top_docs_collectors:
top_docs_iterator = TopDocsIterator(top_docs_collector)
if top_docs_iterator.has_any():
self.top_docs_heap.append(top_docs_iterator)
heapq.heapify(self.top_docs_heap)
def merge(self) -> search_service_pb2.CollectorOutput:
scored_documents = []
has_next = any([top_docs_iterator for top_docs_iterator in self.top_docs_heap])
position = 0
while self.top_docs_heap:
largest_top_docs_iterator = heapq.heappop(self.top_docs_heap)
largest_item = next(largest_top_docs_iterator)
largest_item.position = position
scored_documents.append(largest_item)
position += 1
if largest_top_docs_iterator.has_any():
heapq.heappush(self.top_docs_heap, largest_top_docs_iterator)
return search_service_pb2.CollectorOutput(
top_docs=search_service_pb2.TopDocsCollectorOutput(
has_next=has_next,
scored_documents=scored_documents,
)
)