Skip to content

Rankings

BaseReranking

Bases: BaseComponent

Source code in libs\kotaemon\kotaemon\indices\rankings\base.py
class BaseReranking(BaseComponent):
    @abstractmethod
    def run(self, documents: list[Document], query: str) -> list[Document]:
        """Main method to transform list of documents
        (re-ranking, filtering, etc)"""
        ...

run abstractmethod

run(documents, query)

Main method to transform list of documents (re-ranking, filtering, etc)

Source code in libs\kotaemon\kotaemon\indices\rankings\base.py
@abstractmethod
def run(self, documents: list[Document], query: str) -> list[Document]:
    """Main method to transform list of documents
    (re-ranking, filtering, etc)"""
    ...

CohereReranking

Bases: BaseReranking

Source code in libs\kotaemon\kotaemon\indices\rankings\cohere.py
class CohereReranking(BaseReranking):
    model_name: str = "rerank-multilingual-v2.0"
    cohere_api_key: str = os.environ.get("COHERE_API_KEY", "")
    top_k: int = 1

    def run(self, documents: list[Document], query: str) -> list[Document]:
        """Use Cohere Reranker model to re-order documents
        with their relevance score"""
        try:
            import cohere
        except ImportError:
            raise ImportError(
                "Please install Cohere " "`pip install cohere` to use Cohere Reranking"
            )

        cohere_client = cohere.Client(self.cohere_api_key)
        compressed_docs: list[Document] = []

        if not documents:  # to avoid empty api call
            return compressed_docs

        _docs = [d.content for d in documents]
        results = cohere_client.rerank(
            model=self.model_name, query=query, documents=_docs, top_n=self.top_k
        )
        for r in results:
            doc = documents[r.index]
            doc.metadata["relevance_score"] = r.relevance_score
            compressed_docs.append(doc)

        return compressed_docs

run

run(documents, query)

Use Cohere Reranker model to re-order documents with their relevance score

Source code in libs\kotaemon\kotaemon\indices\rankings\cohere.py
def run(self, documents: list[Document], query: str) -> list[Document]:
    """Use Cohere Reranker model to re-order documents
    with their relevance score"""
    try:
        import cohere
    except ImportError:
        raise ImportError(
            "Please install Cohere " "`pip install cohere` to use Cohere Reranking"
        )

    cohere_client = cohere.Client(self.cohere_api_key)
    compressed_docs: list[Document] = []

    if not documents:  # to avoid empty api call
        return compressed_docs

    _docs = [d.content for d in documents]
    results = cohere_client.rerank(
        model=self.model_name, query=query, documents=_docs, top_n=self.top_k
    )
    for r in results:
        doc = documents[r.index]
        doc.metadata["relevance_score"] = r.relevance_score
        compressed_docs.append(doc)

    return compressed_docs

LLMReranking

Bases: BaseReranking

Source code in libs\kotaemon\kotaemon\indices\rankings\llm.py
class LLMReranking(BaseReranking):
    llm: BaseLLM
    prompt_template: PromptTemplate = PromptTemplate(template=RERANK_PROMPT_TEMPLATE)
    top_k: int = 3
    concurrent: bool = True

    def run(
        self,
        documents: list[Document],
        query: str,
    ) -> list[Document]:
        """Filter down documents based on their relevance to the query."""
        filtered_docs = []
        output_parser = BooleanOutputParser()

        if self.concurrent:
            with ThreadPoolExecutor() as executor:
                futures = []
                for doc in documents:
                    _prompt = self.prompt_template.populate(
                        question=query, context=doc.get_content()
                    )
                    futures.append(executor.submit(lambda: self.llm(_prompt).text))

                results = [future.result() for future in futures]
        else:
            results = []
            for doc in documents:
                _prompt = self.prompt_template.populate(
                    question=query, context=doc.get_content()
                )
                results.append(self.llm(_prompt).text)

        # use Boolean parser to extract relevancy output from LLM
        results = [output_parser.parse(result) for result in results]
        for include_doc, doc in zip(results, documents):
            if include_doc:
                filtered_docs.append(doc)

        # prevent returning empty result
        if len(filtered_docs) == 0:
            filtered_docs = documents[: self.top_k]

        return filtered_docs

run

run(documents, query)

Filter down documents based on their relevance to the query.

Source code in libs\kotaemon\kotaemon\indices\rankings\llm.py
def run(
    self,
    documents: list[Document],
    query: str,
) -> list[Document]:
    """Filter down documents based on their relevance to the query."""
    filtered_docs = []
    output_parser = BooleanOutputParser()

    if self.concurrent:
        with ThreadPoolExecutor() as executor:
            futures = []
            for doc in documents:
                _prompt = self.prompt_template.populate(
                    question=query, context=doc.get_content()
                )
                futures.append(executor.submit(lambda: self.llm(_prompt).text))

            results = [future.result() for future in futures]
    else:
        results = []
        for doc in documents:
            _prompt = self.prompt_template.populate(
                question=query, context=doc.get_content()
            )
            results.append(self.llm(_prompt).text)

    # use Boolean parser to extract relevancy output from LLM
    results = [output_parser.parse(result) for result in results]
    for include_doc, doc in zip(results, documents):
        if include_doc:
            filtered_docs.append(doc)

    # prevent returning empty result
    if len(filtered_docs) == 0:
        filtered_docs = documents[: self.top_k]

    return filtered_docs