Spaces:
Sleeping
Sleeping
| from typing import AsyncIterable, List | |
| from langchain_core.document_loaders.blob_loaders import BlobLoader | |
| from langchain_community.document_loaders.generic import GenericLoader | |
| from langchain_core.documents import Document | |
| from langchain_community.document_loaders.base import BaseBlobParser | |
| # Extend the base GenericLoader class | |
| class CustomGenericLoader(GenericLoader): | |
| def __init__(self, blob_loader: BlobLoader, blob_parser: BaseBlobParser): | |
| super().__init__(blob_loader, blob_parser) | |
| async def async_load(self) -> AsyncIterable[Document]: | |
| """ | |
| Asynchronously loads and parses documents from blobs. | |
| """ | |
| async for blob in self.blob_loader.yield_blobs(): | |
| document = self.blob_parser.lazy_parse(blob) | |
| yield document | |
| async def lazy_load(self) -> AsyncIterable[Document]: | |
| """ | |
| A lazy asynchronous load method that can be overridden for more custom behavior. | |
| """ | |
| async for document in self.async_load(): | |
| yield document | |
| async def load_all(self) -> List[Document]: | |
| """ | |
| Asynchronously loads all documents and returns them as a list. | |
| """ | |
| documents = [] | |
| async for document in self.async_load(): | |
| documents.append(document) | |
| return documents | |