Skip to content

Rankings

BaseReranking

Bases: BaseComponent

Source code in libs/kotaemon/kotaemon/indices/rankings/base.py
class BaseReranking(BaseComponent):
    @abstractmethod
    def run(self, documents: list[Document], query: str) -> list[Document]:
        """Main method to transform list of documents
        (re-ranking, filtering, etc)"""
        ...

run abstractmethod

run(documents, query)

Main method to transform list of documents (re-ranking, filtering, etc)

Source code in libs/kotaemon/kotaemon/indices/rankings/base.py
@abstractmethod
def run(self, documents: list[Document], query: str) -> list[Document]:
    """Main method to transform list of documents
    (re-ranking, filtering, etc)"""
    ...

CohereReranking

Bases: BaseReranking

Source code in libs/kotaemon/kotaemon/indices/rankings/cohere.py
class CohereReranking(BaseReranking):
    model_name: str = "rerank-multilingual-v2.0"
    cohere_api_key: str = config("COHERE_API_KEY", "")
    use_key_from_ktem: bool = False

    def run(self, documents: list[Document], query: str) -> list[Document]:
        """Use Cohere Reranker model to re-order documents
        with their relevance score"""
        try:
            import cohere
        except ImportError:
            raise ImportError(
                "Please install Cohere `pip install cohere` to use Cohere Reranking"
            )

        # try to get COHERE_API_KEY from embeddings
        if not self.cohere_api_key and self.use_key_from_ktem:
            try:
                from ktem.embeddings.manager import (
                    embedding_models_manager as embeddings,
                )

                cohere_model = embeddings.get("cohere")
                ktem_cohere_api_key = cohere_model._kwargs.get(  # type: ignore
                    "cohere_api_key"
                )
                if ktem_cohere_api_key != "your-key":
                    self.cohere_api_key = ktem_cohere_api_key
            except Exception as e:
                print("Cannot get Cohere API key from `ktem`", e)

        if not self.cohere_api_key:
            print("Cohere API key not found. Skipping rerankings.")
            return documents

        cohere_client = cohere.Client(self.cohere_api_key)
        compressed_docs: list[Document] = []

        if not documents:  # to avoid empty api call
            return compressed_docs

        _docs = [d.content for d in documents]
        response = cohere_client.rerank(
            model=self.model_name, query=query, documents=_docs
        )
        for r in response.results:
            doc = documents[r.index]
            doc.metadata["reranking_score"] = r.relevance_score
            compressed_docs.append(doc)

        return compressed_docs

run

run(documents, query)

Use Cohere Reranker model to re-order documents with their relevance score

Source code in libs/kotaemon/kotaemon/indices/rankings/cohere.py
def run(self, documents: list[Document], query: str) -> list[Document]:
    """Use Cohere Reranker model to re-order documents
    with their relevance score"""
    try:
        import cohere
    except ImportError:
        raise ImportError(
            "Please install Cohere `pip install cohere` to use Cohere Reranking"
        )

    # try to get COHERE_API_KEY from embeddings
    if not self.cohere_api_key and self.use_key_from_ktem:
        try:
            from ktem.embeddings.manager import (
                embedding_models_manager as embeddings,
            )

            cohere_model = embeddings.get("cohere")
            ktem_cohere_api_key = cohere_model._kwargs.get(  # type: ignore
                "cohere_api_key"
            )
            if ktem_cohere_api_key != "your-key":
                self.cohere_api_key = ktem_cohere_api_key
        except Exception as e:
            print("Cannot get Cohere API key from `ktem`", e)

    if not self.cohere_api_key:
        print("Cohere API key not found. Skipping rerankings.")
        return documents

    cohere_client = cohere.Client(self.cohere_api_key)
    compressed_docs: list[Document] = []

    if not documents:  # to avoid empty api call
        return compressed_docs

    _docs = [d.content for d in documents]
    response = cohere_client.rerank(
        model=self.model_name, query=query, documents=_docs
    )
    for r in response.results:
        doc = documents[r.index]
        doc.metadata["reranking_score"] = r.relevance_score
        compressed_docs.append(doc)

    return compressed_docs

LLMReranking

Bases: BaseReranking

Source code in libs/kotaemon/kotaemon/indices/rankings/llm.py
class LLMReranking(BaseReranking):
    llm: BaseLLM
    prompt_template: PromptTemplate = PromptTemplate(template=RERANK_PROMPT_TEMPLATE)
    top_k: int = 3
    concurrent: bool = True

    def run(
        self,
        documents: list[Document],
        query: str,
    ) -> list[Document]:
        """Filter down documents based on their relevance to the query."""
        filtered_docs = []
        output_parser = BooleanOutputParser()

        if self.concurrent:
            with ThreadPoolExecutor() as executor:
                futures = []
                for doc in documents:
                    _prompt = self.prompt_template.populate(
                        question=query, context=doc.get_content()
                    )
                    futures.append(executor.submit(lambda: self.llm(_prompt).text))

                results = [future.result() for future in futures]
        else:
            results = []
            for doc in documents:
                _prompt = self.prompt_template.populate(
                    question=query, context=doc.get_content()
                )
                results.append(self.llm(_prompt).text)

        # use Boolean parser to extract relevancy output from LLM
        results = [output_parser.parse(result) for result in results]
        for include_doc, doc in zip(results, documents):
            if include_doc:
                filtered_docs.append(doc)

        # prevent returning empty result
        if len(filtered_docs) == 0:
            filtered_docs = documents[: self.top_k]

        return filtered_docs

run

run(documents, query)

Filter down documents based on their relevance to the query.

Source code in libs/kotaemon/kotaemon/indices/rankings/llm.py
def run(
    self,
    documents: list[Document],
    query: str,
) -> list[Document]:
    """Filter down documents based on their relevance to the query."""
    filtered_docs = []
    output_parser = BooleanOutputParser()

    if self.concurrent:
        with ThreadPoolExecutor() as executor:
            futures = []
            for doc in documents:
                _prompt = self.prompt_template.populate(
                    question=query, context=doc.get_content()
                )
                futures.append(executor.submit(lambda: self.llm(_prompt).text))

            results = [future.result() for future in futures]
    else:
        results = []
        for doc in documents:
            _prompt = self.prompt_template.populate(
                question=query, context=doc.get_content()
            )
            results.append(self.llm(_prompt).text)

    # use Boolean parser to extract relevancy output from LLM
    results = [output_parser.parse(result) for result in results]
    for include_doc, doc in zip(results, documents):
        if include_doc:
            filtered_docs.append(doc)

    # prevent returning empty result
    if len(filtered_docs) == 0:
        filtered_docs = documents[: self.top_k]

    return filtered_docs

LLMScoring

Bases: LLMReranking

Source code in libs/kotaemon/kotaemon/indices/rankings/llm_scoring.py
class LLMScoring(LLMReranking):
    def run(
        self,
        documents: list[Document],
        query: str,
    ) -> list[Document]:
        """Filter down documents based on their relevance to the query."""
        filtered_docs: list[Document] = []
        output_parser = BooleanOutputParser()

        if self.concurrent:
            with ThreadPoolExecutor() as executor:
                futures = []
                for doc in documents:
                    _prompt = self.prompt_template.populate(
                        question=query, context=doc.get_content()
                    )
                    futures.append(executor.submit(lambda: self.llm(_prompt)))

                results = [future.result() for future in futures]
        else:
            results = []
            for doc in documents:
                _prompt = self.prompt_template.populate(
                    question=query, context=doc.get_content()
                )
                results.append(self.llm(_prompt))

        for result, doc in zip(results, documents):
            score = np.exp(np.average(result.logprobs))
            include_doc = output_parser.parse(result.text)
            if include_doc:
                doc.metadata["llm_reranking_score"] = score
            else:
                doc.metadata["llm_reranking_score"] = 1 - score
            filtered_docs.append(doc)

        # prevent returning empty result
        if len(filtered_docs) == 0:
            filtered_docs = documents[: self.top_k]

        return filtered_docs

run

run(documents, query)

Filter down documents based on their relevance to the query.

Source code in libs/kotaemon/kotaemon/indices/rankings/llm_scoring.py
def run(
    self,
    documents: list[Document],
    query: str,
) -> list[Document]:
    """Filter down documents based on their relevance to the query."""
    filtered_docs: list[Document] = []
    output_parser = BooleanOutputParser()

    if self.concurrent:
        with ThreadPoolExecutor() as executor:
            futures = []
            for doc in documents:
                _prompt = self.prompt_template.populate(
                    question=query, context=doc.get_content()
                )
                futures.append(executor.submit(lambda: self.llm(_prompt)))

            results = [future.result() for future in futures]
    else:
        results = []
        for doc in documents:
            _prompt = self.prompt_template.populate(
                question=query, context=doc.get_content()
            )
            results.append(self.llm(_prompt))

    for result, doc in zip(results, documents):
        score = np.exp(np.average(result.logprobs))
        include_doc = output_parser.parse(result.text)
        if include_doc:
            doc.metadata["llm_reranking_score"] = score
        else:
            doc.metadata["llm_reranking_score"] = 1 - score
        filtered_docs.append(doc)

    # prevent returning empty result
    if len(filtered_docs) == 0:
        filtered_docs = documents[: self.top_k]

    return filtered_docs

LLMTrulensScoring

Bases: LLMReranking

Source code in libs/kotaemon/kotaemon/indices/rankings/llm_trulens.py
class LLMTrulensScoring(LLMReranking):
    llm: BaseLLM
    system_prompt_template: PromptTemplate = SYSTEM_PROMPT_TEMPLATE
    user_prompt_template: PromptTemplate = USER_PROMPT_TEMPLATE
    concurrent: bool = True
    normalize: float = 10
    trim_func: TokenSplitter = TokenSplitter.withx(
        chunk_size=MAX_CONTEXT_LEN,
        chunk_overlap=0,
        separator=" ",
        tokenizer=partial(
            tiktoken.encoding_for_model("gpt-3.5-turbo").encode,
            allowed_special=set(),
            disallowed_special="all",
        ),
    )

    def run(
        self,
        documents: list[Document],
        query: str,
    ) -> list[Document]:
        """Filter down documents based on their relevance to the query."""
        filtered_docs = []

        documents = sorted(documents, key=lambda doc: doc.get_content())
        if self.concurrent:
            with ThreadPoolExecutor() as executor:
                futures = []
                for doc in documents:
                    chunked_doc_content = self.trim_func(
                        [
                            Document(content=doc.get_content())
                            # skip metadata which cause troubles
                        ]
                    )[0].text

                    messages = []
                    messages.append(
                        SystemMessage(self.system_prompt_template.populate())
                    )
                    messages.append(
                        HumanMessage(
                            self.user_prompt_template.populate(
                                question=query, context=chunked_doc_content
                            )
                        )
                    )

                    def llm_call():
                        return self.llm(messages).text

                    futures.append(executor.submit(llm_call))

                results = [future.result() for future in futures]
        else:
            results = []
            for doc in documents:
                messages = []
                messages.append(SystemMessage(self.system_prompt_template.populate()))
                messages.append(
                    SystemMessage(
                        self.user_prompt_template.populate(
                            question=query, context=doc.get_content()
                        )
                    )
                )
                results.append(self.llm(messages).text)

        # use Boolean parser to extract relevancy output from LLM
        results = [
            (r_idx, float(re_0_10_rating(result)) / self.normalize)
            for r_idx, result in enumerate(results)
        ]
        results.sort(key=lambda x: x[1], reverse=True)

        for r_idx, score in results:
            doc = documents[r_idx]
            doc.metadata["llm_trulens_score"] = score
            filtered_docs.append(doc)

        print(
            "LLM rerank scores",
            [doc.metadata["llm_trulens_score"] for doc in filtered_docs],
        )

        return filtered_docs

run

run(documents, query)

Filter down documents based on their relevance to the query.

Source code in libs/kotaemon/kotaemon/indices/rankings/llm_trulens.py
def run(
    self,
    documents: list[Document],
    query: str,
) -> list[Document]:
    """Filter down documents based on their relevance to the query."""
    filtered_docs = []

    documents = sorted(documents, key=lambda doc: doc.get_content())
    if self.concurrent:
        with ThreadPoolExecutor() as executor:
            futures = []
            for doc in documents:
                chunked_doc_content = self.trim_func(
                    [
                        Document(content=doc.get_content())
                        # skip metadata which cause troubles
                    ]
                )[0].text

                messages = []
                messages.append(
                    SystemMessage(self.system_prompt_template.populate())
                )
                messages.append(
                    HumanMessage(
                        self.user_prompt_template.populate(
                            question=query, context=chunked_doc_content
                        )
                    )
                )

                def llm_call():
                    return self.llm(messages).text

                futures.append(executor.submit(llm_call))

            results = [future.result() for future in futures]
    else:
        results = []
        for doc in documents:
            messages = []
            messages.append(SystemMessage(self.system_prompt_template.populate()))
            messages.append(
                SystemMessage(
                    self.user_prompt_template.populate(
                        question=query, context=doc.get_content()
                    )
                )
            )
            results.append(self.llm(messages).text)

    # use Boolean parser to extract relevancy output from LLM
    results = [
        (r_idx, float(re_0_10_rating(result)) / self.normalize)
        for r_idx, result in enumerate(results)
    ]
    results.sort(key=lambda x: x[1], reverse=True)

    for r_idx, score in results:
        doc = documents[r_idx]
        doc.metadata["llm_trulens_score"] = score
        filtered_docs.append(doc)

    print(
        "LLM rerank scores",
        [doc.metadata["llm_trulens_score"] for doc in filtered_docs],
    )

    return filtered_docs