Skip to content

Base

BaseVectorStore

Bases: ABC

Source code in libs/kotaemon/kotaemon/storages/vectorstores/base.py
class BaseVectorStore(ABC):
    @abstractmethod
    def __init__(self, *args, **kwargs):
        ...

    @abstractmethod
    def add(
        self,
        embeddings: list[list[float]] | list[DocumentWithEmbedding],
        metadatas: Optional[list[dict]] = None,
        ids: Optional[list[str]] = None,
    ) -> list[str]:
        """Add vector embeddings to vector stores

        Args:
            embeddings: List of embeddings
            metadatas: List of metadata of the embeddings
            ids: List of ids of the embeddings
            kwargs: meant for vectorstore-specific parameters

        Returns:
            List of ids of the embeddings
        """
        ...

    @abstractmethod
    def delete(self, ids: list[str], **kwargs):
        """Delete vector embeddings from vector stores

        Args:
            ids: List of ids of the embeddings to be deleted
            kwargs: meant for vectorstore-specific parameters
        """
        ...

    @abstractmethod
    def query(
        self,
        embedding: list[float],
        top_k: int = 1,
        ids: Optional[list[str]] = None,
        **kwargs,
    ) -> tuple[list[list[float]], list[float], list[str]]:
        """Return the top k most similar vector embeddings

        Args:
            embedding: List of embeddings
            top_k: Number of most similar embeddings to return
            ids: List of ids of the embeddings to be queried

        Returns:
            the matched embeddings, the similarity scores, and the ids
        """
        ...

    @abstractmethod
    def drop(self):
        """Drop the vector store"""
        ...

add abstractmethod

add(embeddings, metadatas=None, ids=None)

Add vector embeddings to vector stores

Parameters:

Name Type Description Default
embeddings list[list[float]] | list[DocumentWithEmbedding]

List of embeddings

required
metadatas Optional[list[dict]]

List of metadata of the embeddings

None
ids Optional[list[str]]

List of ids of the embeddings

None
kwargs

meant for vectorstore-specific parameters

required

Returns:

Type Description
list[str]

List of ids of the embeddings

Source code in libs/kotaemon/kotaemon/storages/vectorstores/base.py
@abstractmethod
def add(
    self,
    embeddings: list[list[float]] | list[DocumentWithEmbedding],
    metadatas: Optional[list[dict]] = None,
    ids: Optional[list[str]] = None,
) -> list[str]:
    """Add vector embeddings to vector stores

    Args:
        embeddings: List of embeddings
        metadatas: List of metadata of the embeddings
        ids: List of ids of the embeddings
        kwargs: meant for vectorstore-specific parameters

    Returns:
        List of ids of the embeddings
    """
    ...

delete abstractmethod

delete(ids, **kwargs)

Delete vector embeddings from vector stores

Parameters:

Name Type Description Default
ids list[str]

List of ids of the embeddings to be deleted

required
kwargs

meant for vectorstore-specific parameters

{}
Source code in libs/kotaemon/kotaemon/storages/vectorstores/base.py
@abstractmethod
def delete(self, ids: list[str], **kwargs):
    """Delete vector embeddings from vector stores

    Args:
        ids: List of ids of the embeddings to be deleted
        kwargs: meant for vectorstore-specific parameters
    """
    ...

query abstractmethod

query(embedding, top_k=1, ids=None, **kwargs)

Return the top k most similar vector embeddings

Parameters:

Name Type Description Default
embedding list[float]

List of embeddings

required
top_k int

Number of most similar embeddings to return

1
ids Optional[list[str]]

List of ids of the embeddings to be queried

None

Returns:

Type Description
tuple[list[list[float]], list[float], list[str]]

the matched embeddings, the similarity scores, and the ids

Source code in libs/kotaemon/kotaemon/storages/vectorstores/base.py
@abstractmethod
def query(
    self,
    embedding: list[float],
    top_k: int = 1,
    ids: Optional[list[str]] = None,
    **kwargs,
) -> tuple[list[list[float]], list[float], list[str]]:
    """Return the top k most similar vector embeddings

    Args:
        embedding: List of embeddings
        top_k: Number of most similar embeddings to return
        ids: List of ids of the embeddings to be queried

    Returns:
        the matched embeddings, the similarity scores, and the ids
    """
    ...

drop abstractmethod

drop()

Drop the vector store

Source code in libs/kotaemon/kotaemon/storages/vectorstores/base.py
@abstractmethod
def drop(self):
    """Drop the vector store"""
    ...

LlamaIndexVectorStore

Bases: BaseVectorStore

Mixin for LlamaIndex based vectorstores

Source code in libs/kotaemon/kotaemon/storages/vectorstores/base.py
class LlamaIndexVectorStore(BaseVectorStore):
    """Mixin for LlamaIndex based vectorstores"""

    _li_class: type[LIVectorStore | BasePydanticVectorStore] | None

    def _get_li_class(self):
        raise NotImplementedError(
            "Please return the relevant LlamaIndex class in in _get_li_class"
        )

    def __init__(self, *args, **kwargs):
        # get li_class from the method if not set
        if not self._li_class:
            LIClass = self._get_li_class()
        else:
            LIClass = self._li_class

        from dataclasses import fields

        self._client = LIClass(*args, **kwargs)

        self._vsq_kwargs = {_.name for _ in fields(VectorStoreQuery)}
        for key in ["query_embedding", "similarity_top_k", "node_ids"]:
            if key in self._vsq_kwargs:
                self._vsq_kwargs.remove(key)

    def __setattr__(self, name: str, value: Any) -> None:
        if name.startswith("_"):
            return super().__setattr__(name, value)

        return setattr(self._client, name, value)

    def __getattr__(self, name: str) -> Any:
        if name == "_li_class":
            return super().__getattribute__(name)

        return getattr(self._client, name)

    def add(
        self,
        embeddings: list[list[float]] | list[DocumentWithEmbedding],
        metadatas: Optional[list[dict]] = None,
        ids: Optional[list[str]] = None,
    ):
        if isinstance(embeddings[0], list):
            nodes: list[DocumentWithEmbedding] = [
                DocumentWithEmbedding(embedding=embedding) for embedding in embeddings
            ]
        else:
            nodes = embeddings  # type: ignore
        if metadatas is not None:
            for node, metadata in zip(nodes, metadatas):
                node.metadata = metadata
        if ids is not None:
            for node, id in zip(nodes, ids):
                node.id_ = id
                node.relationships = {
                    NodeRelationship.SOURCE: RelatedNodeInfo(node_id=id)
                }

        return self._client.add(nodes=nodes)

    def delete(self, ids: list[str], **kwargs):
        for id_ in ids:
            self._client.delete(ref_doc_id=id_, **kwargs)

    def query(
        self,
        embedding: list[float],
        top_k: int = 1,
        ids: Optional[list[str]] = None,
        **kwargs,
    ) -> tuple[list[list[float]], list[float], list[str]]:
        """Return the top k most similar vector embeddings

        Args:
            embedding: List of embeddings
            top_k: Number of most similar embeddings to return
            ids: List of ids of the embeddings to be queried
            kwargs: extra query parameters. Depending on the name, these parameters
                will be used when constructing the VectorStoreQuery object or when
                performing querying of the underlying vector store.

        Returns:
            the matched embeddings, the similarity scores, and the ids
        """
        vsq_kwargs = {}
        vs_kwargs = {}
        for kwkey, kwvalue in kwargs.items():
            if kwkey in self._vsq_kwargs:
                vsq_kwargs[kwkey] = kwvalue
            else:
                vs_kwargs[kwkey] = kwvalue

        output = self._client.query(
            query=VectorStoreQuery(
                query_embedding=embedding,
                similarity_top_k=top_k,
                node_ids=ids,
                **vsq_kwargs,
            ),
            **vs_kwargs,
        )

        embeddings = []
        if output.nodes:
            for node in output.nodes:
                embeddings.append(node.embedding)
        similarities = output.similarities if output.similarities else []
        out_ids = output.ids if output.ids else []

        return embeddings, similarities, out_ids

query

query(embedding, top_k=1, ids=None, **kwargs)

Return the top k most similar vector embeddings

Parameters:

Name Type Description Default
embedding list[float]

List of embeddings

required
top_k int

Number of most similar embeddings to return

1
ids Optional[list[str]]

List of ids of the embeddings to be queried

None
kwargs

extra query parameters. Depending on the name, these parameters will be used when constructing the VectorStoreQuery object or when performing querying of the underlying vector store.

{}

Returns:

Type Description
tuple[list[list[float]], list[float], list[str]]

the matched embeddings, the similarity scores, and the ids

Source code in libs/kotaemon/kotaemon/storages/vectorstores/base.py
def query(
    self,
    embedding: list[float],
    top_k: int = 1,
    ids: Optional[list[str]] = None,
    **kwargs,
) -> tuple[list[list[float]], list[float], list[str]]:
    """Return the top k most similar vector embeddings

    Args:
        embedding: List of embeddings
        top_k: Number of most similar embeddings to return
        ids: List of ids of the embeddings to be queried
        kwargs: extra query parameters. Depending on the name, these parameters
            will be used when constructing the VectorStoreQuery object or when
            performing querying of the underlying vector store.

    Returns:
        the matched embeddings, the similarity scores, and the ids
    """
    vsq_kwargs = {}
    vs_kwargs = {}
    for kwkey, kwvalue in kwargs.items():
        if kwkey in self._vsq_kwargs:
            vsq_kwargs[kwkey] = kwvalue
        else:
            vs_kwargs[kwkey] = kwvalue

    output = self._client.query(
        query=VectorStoreQuery(
            query_embedding=embedding,
            similarity_top_k=top_k,
            node_ids=ids,
            **vsq_kwargs,
        ),
        **vs_kwargs,
    )

    embeddings = []
    if output.nodes:
        for node in output.nodes:
            embeddings.append(node.embedding)
    similarities = output.similarities if output.similarities else []
    out_ids = output.ids if output.ids else []

    return embeddings, similarities, out_ids