Skip to content

Base

BaseComponent

Bases: Function

A component is a class that can be used to compose a pipeline.

Benefits of component

  • Auto caching, logging
  • Allow deployment

For each component, the spirit is

  • Tolerate multiple input types, e.g. str, Document, List[str], List[Document]
  • Enforce single output type. Hence, the output type of a component should be

as generic as possible.

Source code in libs/kotaemon/kotaemon/base/component.py
class BaseComponent(Function):
    """A component is a class that can be used to compose a pipeline.

    !!! tip "Benefits of component"
        - Auto caching, logging
        - Allow deployment

    !!! tip "For each component, the spirit is"
        - Tolerate multiple input types, e.g. str, Document, List[str], List[Document]
        - Enforce single output type. Hence, the output type of a component should be
    as generic as possible.
    """

    inflow = None

    def flow(self):
        if self.inflow is None:
            raise ValueError("No inflow provided.")

        if not isinstance(self.inflow, BaseComponent):
            raise ValueError(
                f"inflow must be a BaseComponent, found {type(self.inflow)}"
            )

        return self.__call__(self.inflow.flow())

    def set_output_queue(self, queue):
        self._queue = queue
        for name in self._ff_nodes:
            node = getattr(self, name)
            if isinstance(node, BaseComponent):
                node.set_output_queue(queue)

    def report_output(self, output: Optional[Document]):
        if self._queue is not None:
            self._queue.put_nowait(output)

    def invoke(self, *args, **kwargs) -> Document | list[Document] | None:
        ...

    async def ainvoke(self, *args, **kwargs) -> Document | list[Document] | None:
        ...

    def stream(self, *args, **kwargs) -> Iterator[Document] | None:
        ...

    def astream(self, *args, **kwargs) -> AsyncGenerator[Document, None] | None:
        ...

    @abstractmethod
    def run(
        self, *args, **kwargs
    ) -> Document | list[Document] | Iterator[Document] | None | Any:
        """Run the component."""
        ...

run abstractmethod

run(*args, **kwargs)

Run the component.

Source code in libs/kotaemon/kotaemon/base/component.py
@abstractmethod
def run(
    self, *args, **kwargs
) -> Document | list[Document] | Iterator[Document] | None | Any:
    """Run the component."""
    ...

Document

Bases: Document

Base document class, mostly inherited from Document class from llama-index.

This class accept one positional argument content of an arbitrary type, which will store the raw content of the document. If specified, the class will use content to initialize the base llama_index class.

Attributes:

Name Type Description
content Any

raw content of the document, can be anything

source Optional[str]

id of the source of the Document. Optional.

channel Optional[Literal['chat', 'info', 'index', 'debug', 'plot']]

the channel to show the document. Optional.: - chat: show in chat message - info: show in information panel - index: show in index panel - debug: show in debug panel

Source code in libs/kotaemon/kotaemon/base/schema.py
class Document(BaseDocument):
    """
    Base document class, mostly inherited from Document class from llama-index.

    This class accept one positional argument `content` of an arbitrary type, which will
        store the raw content of the document. If specified, the class will use
        `content` to initialize the base llama_index class.

    Attributes:
        content: raw content of the document, can be anything
        source: id of the source of the Document. Optional.
        channel: the channel to show the document. Optional.:
            - chat: show in chat message
            - info: show in information panel
            - index: show in index panel
            - debug: show in debug panel
    """

    content: Any = None
    source: Optional[str] = None
    channel: Optional[Literal["chat", "info", "index", "debug", "plot"]] = None

    def __init__(self, content: Optional[Any] = None, *args, **kwargs):
        if content is None:
            if kwargs.get("text", None) is not None:
                kwargs["content"] = kwargs["text"]
            elif kwargs.get("embedding", None) is not None:
                kwargs["content"] = kwargs["embedding"]
                # default text indicating this document only contains embedding
                kwargs["text"] = "<EMBEDDING>"
        elif isinstance(content, Document):
            # TODO: simplify the Document class
            temp_ = content.dict()
            temp_.update(kwargs)
            kwargs = temp_
        else:
            kwargs["content"] = content
            if content:
                kwargs["text"] = str(content)
            else:
                kwargs["text"] = ""
        super().__init__(*args, **kwargs)

    def __bool__(self):
        return bool(self.content)

    @classmethod
    def example(cls) -> "Document":
        document = Document(
            text=SAMPLE_TEXT,
            metadata={"filename": "README.md", "category": "codebase"},
        )
        return document

    def to_haystack_format(self) -> "HaystackDocument":
        """Convert struct to Haystack document format."""
        from haystack.schema import Document as HaystackDocument

        metadata = self.metadata or {}
        text = self.text
        return HaystackDocument(content=text, meta=metadata)

    def __str__(self):
        return str(self.content)

to_haystack_format

to_haystack_format()

Convert struct to Haystack document format.

Source code in libs/kotaemon/kotaemon/base/schema.py
def to_haystack_format(self) -> "HaystackDocument":
    """Convert struct to Haystack document format."""
    from haystack.schema import Document as HaystackDocument

    metadata = self.metadata or {}
    text = self.text
    return HaystackDocument(content=text, meta=metadata)

DocumentWithEmbedding

Bases: Document

Subclass of Document which must contains embedding

Use this if you want to enforce component's IOs to must contain embedding.

Source code in libs/kotaemon/kotaemon/base/schema.py
class DocumentWithEmbedding(Document):
    """Subclass of Document which must contains embedding

    Use this if you want to enforce component's IOs to must contain embedding.
    """

    def __init__(self, embedding: list[float], *args, **kwargs):
        kwargs["embedding"] = embedding
        super().__init__(*args, **kwargs)

ExtractorOutput

Bases: Document

Represents the output of an extractor.

Source code in libs/kotaemon/kotaemon/base/schema.py
class ExtractorOutput(Document):
    """
    Represents the output of an extractor.
    """

    matches: list[str]

RetrievedDocument

Bases: Document

Subclass of Document with retrieval-related information

Attributes:

Name Type Description
score float

score of the document (from 0.0 to 1.0)

retrieval_metadata dict

metadata from the retrieval process, can be used by different components in a retrieved pipeline to communicate with each other

Source code in libs/kotaemon/kotaemon/base/schema.py
class RetrievedDocument(Document):
    """Subclass of Document with retrieval-related information

    Attributes:
        score (float): score of the document (from 0.0 to 1.0)
        retrieval_metadata (dict): metadata from the retrieval process, can be used
            by different components in a retrieved pipeline to communicate with each
            other
    """

    score: float = Field(default=0.0)
    retrieval_metadata: dict = Field(default={})