Bases: InMemoryDocumentStore
Improve InMemoryDocumentStore by auto saving whenever the corpus is changed
Source code in libs/kotaemon/kotaemon/storages/docstores/simple_file.py
| class SimpleFileDocumentStore(InMemoryDocumentStore):
"""Improve InMemoryDocumentStore by auto saving whenever the corpus is changed"""
def __init__(self, path: str | Path, collection_name: str = "default"):
super().__init__()
self._path = path
self._collection_name = collection_name
Path(path).mkdir(parents=True, exist_ok=True)
self._save_path = Path(path) / f"{collection_name}.json"
if self._save_path.is_file():
self.load(self._save_path)
def get(self, ids: Union[List[str], str]) -> List[Document]:
"""Get document by id"""
if not isinstance(ids, list):
ids = [ids]
for doc_id in ids:
if doc_id not in self._store:
self.load(self._save_path)
break
return [self._store[doc_id] for doc_id in ids]
def add(
self,
docs: Union[Document, List[Document]],
ids: Optional[Union[List[str], str]] = None,
**kwargs,
):
"""Add document into document store
Args:
docs: list of documents to add
ids: specify the ids of documents to add or
use existing doc.doc_id
exist_ok: raise error when duplicate doc-id
found in the docstore (default to False)
"""
super().add(docs=docs, ids=ids, **kwargs)
self.save(self._save_path)
def delete(self, ids: Union[List[str], str]):
"""Delete document by id"""
super().delete(ids=ids)
self.save(self._save_path)
def drop(self):
"""Drop the document store"""
super().drop()
self._save_path.unlink(missing_ok=True)
def __persist_flow__(self):
from theflow.utils.modules import serialize
return {
"path": serialize(self._path),
"collection_name": self._collection_name,
}
|
get
Get document by id
Source code in libs/kotaemon/kotaemon/storages/docstores/simple_file.py
| def get(self, ids: Union[List[str], str]) -> List[Document]:
"""Get document by id"""
if not isinstance(ids, list):
ids = [ids]
for doc_id in ids:
if doc_id not in self._store:
self.load(self._save_path)
break
return [self._store[doc_id] for doc_id in ids]
|
add
| add(docs, ids=None, **kwargs)
|
Add document into document store
Parameters:
Name |
Type |
Description |
Default |
docs
|
Union[Document, List[Document]]
|
|
required
|
ids
|
Optional[Union[List[str], str]]
|
specify the ids of documents to add or
use existing doc.doc_id
|
None
|
exist_ok
|
|
raise error when duplicate doc-id
found in the docstore (default to False)
|
required
|
Source code in libs/kotaemon/kotaemon/storages/docstores/simple_file.py
| def add(
self,
docs: Union[Document, List[Document]],
ids: Optional[Union[List[str], str]] = None,
**kwargs,
):
"""Add document into document store
Args:
docs: list of documents to add
ids: specify the ids of documents to add or
use existing doc.doc_id
exist_ok: raise error when duplicate doc-id
found in the docstore (default to False)
"""
super().add(docs=docs, ids=ids, **kwargs)
self.save(self._save_path)
|
delete
Delete document by id
Source code in libs/kotaemon/kotaemon/storages/docstores/simple_file.py
| def delete(self, ids: Union[List[str], str]):
"""Delete document by id"""
super().delete(ids=ids)
self.save(self._save_path)
|
drop
Drop the document store
Source code in libs/kotaemon/kotaemon/storages/docstores/simple_file.py
| def drop(self):
"""Drop the document store"""
super().drop()
self._save_path.unlink(missing_ok=True)
|