Base

BaseVectorStore ¶

Bases: ABC

Source code in libs/kotaemon/kotaemon/storages/vectorstores/base.py

class BaseVectorStore(ABC):
    @abstractmethod
    def __init__(self, *args, **kwargs):
        ...

    @abstractmethod
    def add(
        self,
        embeddings: list[list[float]] | list[DocumentWithEmbedding],
        metadatas: Optional[list[dict]] = None,
        ids: Optional[list[str]] = None,
    ) -> list[str]:
        """Add vector embeddings to vector stores

        Args:
            embeddings: List of embeddings
            metadatas: List of metadata of the embeddings
            ids: List of ids of the embeddings
            kwargs: meant for vectorstore-specific parameters

        Returns:
            List of ids of the embeddings
        """
        ...

    @abstractmethod
    def delete(self, ids: list[str], **kwargs):
        """Delete vector embeddings from vector stores

        Args:
            ids: List of ids of the embeddings to be deleted
            kwargs: meant for vectorstore-specific parameters
        """
        ...

    @abstractmethod
    def query(
        self,
        embedding: list[float],
        top_k: int = 1,
        ids: Optional[list[str]] = None,
        **kwargs,
    ) -> tuple[list[list[float]], list[float], list[str]]:
        """Return the top k most similar vector embeddings

        Args:
            embedding: List of embeddings
            top_k: Number of most similar embeddings to return
            ids: List of ids of the embeddings to be queried

        Returns:
            the matched embeddings, the similarity scores, and the ids
        """
        ...

    @abstractmethod
    def drop(self):
        """Drop the vector store"""
        ...

add `abstractmethod` ¶

add(embeddings, metadatas=None, ids=None)

Add vector embeddings to vector stores

Parameters:

Name	Type	Description	Default
`embeddings`	`list[list[float]] \| list[DocumentWithEmbedding]`	List of embeddings	required
`metadatas`	`Optional[list[dict]]`	List of metadata of the embeddings	`None`
`ids`	`Optional[list[str]]`	List of ids of the embeddings	`None`
`kwargs`		meant for vectorstore-specific parameters	required

Returns:

Type	Description
`list[str]`	List of ids of the embeddings

Source code in libs/kotaemon/kotaemon/storages/vectorstores/base.py

@abstractmethod
def add(
    self,
    embeddings: list[list[float]] | list[DocumentWithEmbedding],
    metadatas: Optional[list[dict]] = None,
    ids: Optional[list[str]] = None,
) -> list[str]:
    """Add vector embeddings to vector stores

    Args:
        embeddings: List of embeddings
        metadatas: List of metadata of the embeddings
        ids: List of ids of the embeddings
        kwargs: meant for vectorstore-specific parameters

    Returns:
        List of ids of the embeddings
    """
    ...

delete `abstractmethod` ¶

delete(ids, **kwargs)

Delete vector embeddings from vector stores

Parameters:

Name	Type	Description	Default
`ids`	`list[str]`	List of ids of the embeddings to be deleted	required
`kwargs`		meant for vectorstore-specific parameters	`{}`

Source code in libs/kotaemon/kotaemon/storages/vectorstores/base.py

@abstractmethod
def delete(self, ids: list[str], **kwargs):
    """Delete vector embeddings from vector stores

    Args:
        ids: List of ids of the embeddings to be deleted
        kwargs: meant for vectorstore-specific parameters
    """
    ...

query `abstractmethod` ¶

query(embedding, top_k=1, ids=None, **kwargs)

Return the top k most similar vector embeddings

Parameters:

Name	Type	Description	Default
`embedding`	`list[float]`	List of embeddings	required
`top_k`	`int`	Number of most similar embeddings to return	`1`
`ids`	`Optional[list[str]]`	List of ids of the embeddings to be queried	`None`

Returns:

Type	Description
`tuple[list[list[float]], list[float], list[str]]`	the matched embeddings, the similarity scores, and the ids

Source code in libs/kotaemon/kotaemon/storages/vectorstores/base.py

@abstractmethod
def query(
    self,
    embedding: list[float],
    top_k: int = 1,
    ids: Optional[list[str]] = None,
    **kwargs,
) -> tuple[list[list[float]], list[float], list[str]]:
    """Return the top k most similar vector embeddings

    Args:
        embedding: List of embeddings
        top_k: Number of most similar embeddings to return
        ids: List of ids of the embeddings to be queried

    Returns:
        the matched embeddings, the similarity scores, and the ids
    """
    ...

drop `abstractmethod` ¶

drop()

Drop the vector store

Source code in libs/kotaemon/kotaemon/storages/vectorstores/base.py

@abstractmethod
def drop(self):
    """Drop the vector store"""
    ...

LlamaIndexVectorStore ¶

Bases: BaseVectorStore

Mixin for LlamaIndex based vectorstores

Source code in libs/kotaemon/kotaemon/storages/vectorstores/base.py

class LlamaIndexVectorStore(BaseVectorStore):
    """Mixin for LlamaIndex based vectorstores"""

    _li_class: type[LIVectorStore | BasePydanticVectorStore] | None

    def _get_li_class(self):
        raise NotImplementedError(
            "Please return the relevant LlamaIndex class in in _get_li_class"
        )

    def __init__(self, *args, **kwargs):
        # get li_class from the method if not set
        if not self._li_class:
            LIClass = self._get_li_class()
        else:
            LIClass = self._li_class

        from dataclasses import fields

        self._client = LIClass(*args, **kwargs)

        self._vsq_kwargs = {_.name for _ in fields(VectorStoreQuery)}
        for key in ["query_embedding", "similarity_top_k", "node_ids"]:
            if key in self._vsq_kwargs:
                self._vsq_kwargs.remove(key)

    def __setattr__(self, name: str, value: Any) -> None:
        if name.startswith("_"):
            return super().__setattr__(name, value)

        return setattr(self._client, name, value)

    def __getattr__(self, name: str) -> Any:
        if name == "_li_class":
            return super().__getattribute__(name)

        return getattr(self._client, name)

    def add(
        self,
        embeddings: list[list[float]] | list[DocumentWithEmbedding],
        metadatas: Optional[list[dict]] = None,
        ids: Optional[list[str]] = None,
    ):
        if isinstance(embeddings[0], list):
            nodes: list[DocumentWithEmbedding] = [
                DocumentWithEmbedding(embedding=embedding) for embedding in embeddings
            ]
        else:
            nodes = embeddings  # type: ignore
        if metadatas is not None:
            for node, metadata in zip(nodes, metadatas):
                node.metadata = metadata
        if ids is not None:
            for node, id in zip(nodes, ids):
                node.id_ = id
                node.relationships = {
                    NodeRelationship.SOURCE: RelatedNodeInfo(node_id=id)
                }

        return self._client.add(nodes=nodes)

    def delete(self, ids: list[str], **kwargs):
        for id_ in ids:
            self._client.delete(ref_doc_id=id_, **kwargs)

    def query(
        self,
        embedding: list[float],
        top_k: int = 1,
        ids: Optional[list[str]] = None,
        **kwargs,
    ) -> tuple[list[list[float]], list[float], list[str]]:
        """Return the top k most similar vector embeddings

        Args:
            embedding: List of embeddings
            top_k: Number of most similar embeddings to return
            ids: List of ids of the embeddings to be queried
            kwargs: extra query parameters. Depending on the name, these parameters
                will be used when constructing the VectorStoreQuery object or when
                performing querying of the underlying vector store.

        Returns:
            the matched embeddings, the similarity scores, and the ids
        """
        vsq_kwargs = {}
        vs_kwargs = {}
        for kwkey, kwvalue in kwargs.items():
            if kwkey in self._vsq_kwargs:
                vsq_kwargs[kwkey] = kwvalue
            else:
                vs_kwargs[kwkey] = kwvalue

        output = self._client.query(
            query=VectorStoreQuery(
                query_embedding=embedding,
                similarity_top_k=top_k,
                node_ids=ids,
                **vsq_kwargs,
            ),
            **vs_kwargs,
        )

        embeddings = []
        if output.nodes:
            for node in output.nodes:
                embeddings.append(node.embedding)
        similarities = output.similarities if output.similarities else []
        out_ids = output.ids if output.ids else []

        return embeddings, similarities, out_ids

query ¶

query(embedding, top_k=1, ids=None, **kwargs)

Return the top k most similar vector embeddings

Parameters:

Name	Type	Description	Default
`embedding`	`list[float]`	List of embeddings	required
`top_k`	`int`	Number of most similar embeddings to return	`1`
`ids`	`Optional[list[str]]`	List of ids of the embeddings to be queried	`None`
`kwargs`		extra query parameters. Depending on the name, these parameters will be used when constructing the VectorStoreQuery object or when performing querying of the underlying vector store.	`{}`

Returns:

Type	Description
`tuple[list[list[float]], list[float], list[str]]`	the matched embeddings, the similarity scores, and the ids

Source code in libs/kotaemon/kotaemon/storages/vectorstores/base.py

def query(
    self,
    embedding: list[float],
    top_k: int = 1,
    ids: Optional[list[str]] = None,
    **kwargs,
) -> tuple[list[list[float]], list[float], list[str]]:
    """Return the top k most similar vector embeddings

    Args:
        embedding: List of embeddings
        top_k: Number of most similar embeddings to return
        ids: List of ids of the embeddings to be queried
        kwargs: extra query parameters. Depending on the name, these parameters
            will be used when constructing the VectorStoreQuery object or when
            performing querying of the underlying vector store.

    Returns:
        the matched embeddings, the similarity scores, and the ids
    """
    vsq_kwargs = {}
    vs_kwargs = {}
    for kwkey, kwvalue in kwargs.items():
        if kwkey in self._vsq_kwargs:
            vsq_kwargs[kwkey] = kwvalue
        else:
            vs_kwargs[kwkey] = kwvalue

    output = self._client.query(
        query=VectorStoreQuery(
            query_embedding=embedding,
            similarity_top_k=top_k,
            node_ids=ids,
            **vsq_kwargs,
        ),
        **vs_kwargs,
    )

    embeddings = []
    if output.nodes:
        for node in output.nodes:
            embeddings.append(node.embedding)
    similarities = output.similarities if output.similarities else []
    out_ids = output.ids if output.ids else []

    return embeddings, similarities, out_ids

Base

BaseVectorStore ¶

add abstractmethod ¶

delete abstractmethod ¶

query abstractmethod ¶

drop abstractmethod ¶

LlamaIndexVectorStore ¶

query ¶

add `abstractmethod` ¶

delete `abstractmethod` ¶

query `abstractmethod` ¶

drop `abstractmethod` ¶