Skip to content

In Memory

InMemoryDocumentStore

Bases: BaseDocumentStore

Simple memory document store that store document in a dictionary

Source code in libs/kotaemon/kotaemon/storages/docstores/in_memory.py
class InMemoryDocumentStore(BaseDocumentStore):
    """Simple memory document store that store document in a dictionary"""

    def __init__(self):
        self._store = {}

    def add(
        self,
        docs: Union[Document, List[Document]],
        ids: Optional[Union[List[str], str]] = None,
        **kwargs,
    ):
        """Add document into document store

        Args:
            docs: list of documents to add
            ids: specify the ids of documents to add or
                use existing doc.doc_id
            exist_ok: raise error when duplicate doc-id
                found in the docstore (default to False)
        """
        exist_ok: bool = kwargs.pop("exist_ok", False)

        if ids and not isinstance(ids, list):
            ids = [ids]
        if not isinstance(docs, list):
            docs = [docs]
        doc_ids = ids if ids else [doc.doc_id for doc in docs]

        for doc_id, doc in zip(doc_ids, docs):
            if doc_id in self._store and not exist_ok:
                raise ValueError(f"Document with id {doc_id} already exist")
            self._store[doc_id] = doc

    def get(self, ids: Union[List[str], str]) -> List[Document]:
        """Get document by id"""
        if not isinstance(ids, list):
            ids = [ids]

        return [self._store[doc_id] for doc_id in ids]

    def get_all(self) -> List[Document]:
        """Get all documents"""
        return list(self._store.values())

    def count(self) -> int:
        """Count number of documents"""
        return len(self._store)

    def delete(self, ids: Union[List[str], str]):
        """Delete document by id"""
        if not isinstance(ids, list):
            ids = [ids]

        for doc_id in ids:
            del self._store[doc_id]

    def save(self, path: Union[str, Path]):
        """Save document to path"""
        store = {key: value.to_dict() for key, value in self._store.items()}
        with open(path, "w") as f:
            json.dump(store, f)

    def load(self, path: Union[str, Path]):
        """Load document store from path"""
        with open(path) as f:
            store = json.load(f)
        # TODO: save and load aren't lossless. A Document-subclass will lose
        # information. Need to edit the `to_dict` and `from_dict` methods in
        # the Document class.
        # For better query support, utilize SQLite as the default document store.
        # Also, for portability, use SQLAlchemy for document store.
        self._store = {key: Document.from_dict(value) for key, value in store.items()}

    def query(
        self, query: str, top_k: int = 10, doc_ids: Optional[list] = None
    ) -> List[Document]:
        """Perform full-text search on document store"""
        return []

    def __persist_flow__(self):
        return {}

    def drop(self):
        """Drop the document store"""
        self._store = {}

add

add(docs, ids=None, **kwargs)

Add document into document store

Parameters:

Name Type Description Default
docs Union[Document, List[Document]]

list of documents to add

required
ids Optional[Union[List[str], str]]

specify the ids of documents to add or use existing doc.doc_id

None
exist_ok

raise error when duplicate doc-id found in the docstore (default to False)

required
Source code in libs/kotaemon/kotaemon/storages/docstores/in_memory.py
def add(
    self,
    docs: Union[Document, List[Document]],
    ids: Optional[Union[List[str], str]] = None,
    **kwargs,
):
    """Add document into document store

    Args:
        docs: list of documents to add
        ids: specify the ids of documents to add or
            use existing doc.doc_id
        exist_ok: raise error when duplicate doc-id
            found in the docstore (default to False)
    """
    exist_ok: bool = kwargs.pop("exist_ok", False)

    if ids and not isinstance(ids, list):
        ids = [ids]
    if not isinstance(docs, list):
        docs = [docs]
    doc_ids = ids if ids else [doc.doc_id for doc in docs]

    for doc_id, doc in zip(doc_ids, docs):
        if doc_id in self._store and not exist_ok:
            raise ValueError(f"Document with id {doc_id} already exist")
        self._store[doc_id] = doc

get

get(ids)

Get document by id

Source code in libs/kotaemon/kotaemon/storages/docstores/in_memory.py
def get(self, ids: Union[List[str], str]) -> List[Document]:
    """Get document by id"""
    if not isinstance(ids, list):
        ids = [ids]

    return [self._store[doc_id] for doc_id in ids]

get_all

get_all()

Get all documents

Source code in libs/kotaemon/kotaemon/storages/docstores/in_memory.py
def get_all(self) -> List[Document]:
    """Get all documents"""
    return list(self._store.values())

count

count()

Count number of documents

Source code in libs/kotaemon/kotaemon/storages/docstores/in_memory.py
def count(self) -> int:
    """Count number of documents"""
    return len(self._store)

delete

delete(ids)

Delete document by id

Source code in libs/kotaemon/kotaemon/storages/docstores/in_memory.py
def delete(self, ids: Union[List[str], str]):
    """Delete document by id"""
    if not isinstance(ids, list):
        ids = [ids]

    for doc_id in ids:
        del self._store[doc_id]

save

save(path)

Save document to path

Source code in libs/kotaemon/kotaemon/storages/docstores/in_memory.py
def save(self, path: Union[str, Path]):
    """Save document to path"""
    store = {key: value.to_dict() for key, value in self._store.items()}
    with open(path, "w") as f:
        json.dump(store, f)

load

load(path)

Load document store from path

Source code in libs/kotaemon/kotaemon/storages/docstores/in_memory.py
def load(self, path: Union[str, Path]):
    """Load document store from path"""
    with open(path) as f:
        store = json.load(f)
    # TODO: save and load aren't lossless. A Document-subclass will lose
    # information. Need to edit the `to_dict` and `from_dict` methods in
    # the Document class.
    # For better query support, utilize SQLite as the default document store.
    # Also, for portability, use SQLAlchemy for document store.
    self._store = {key: Document.from_dict(value) for key, value in store.items()}

query

query(query, top_k=10, doc_ids=None)

Perform full-text search on document store

Source code in libs/kotaemon/kotaemon/storages/docstores/in_memory.py
def query(
    self, query: str, top_k: int = 10, doc_ids: Optional[list] = None
) -> List[Document]:
    """Perform full-text search on document store"""
    return []

drop

drop()

Drop the document store

Source code in libs/kotaemon/kotaemon/storages/docstores/in_memory.py
def drop(self):
    """Drop the document store"""
    self._store = {}