from llama_index.vector_stores.upstash import UpstashVectorStore from llama_index.core.storage.storage_context import StorageContext from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, load_index_from_storage from llama_index.core.node_parser import SimpleNodeParser from llama_index.core.settings import Settings from llama_index.embeddings.huggingface import HuggingFaceEmbedding from llama_index.core.schema import Document import os # ✅ Setup embedding Settings.embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2") # ✅ Upstash vector store config def get_upstash_vector_store(): return UpstashVectorStore( url=os.environ["UPSTASH_VECTOR_REST_URL"], token=os.environ["UPSTASH_VECTOR_REST_TOKEN"], ) # ✅ File-based ingestion def build_news_index(data_dir: str) -> VectorStoreIndex: documents = SimpleDirectoryReader(data_dir).load_data() return get_or_build_index_from_docs(documents) # ✅ Direct document ingestion def get_or_build_index_from_docs(documents: list[Document]) -> VectorStoreIndex: nodes = SimpleNodeParser.from_defaults().get_nodes_from_documents(documents) vector_store = get_upstash_vector_store() storage_context = StorageContext.from_defaults(vector_store=vector_store) index = VectorStoreIndex(nodes, storage_context=storage_context) return index # ✅ Load existing index (if no changes in docs) def load_news_index() -> VectorStoreIndex: vector_store = get_upstash_vector_store() storage_context = StorageContext.from_defaults(vector_store=vector_store) return load_index_from_storage(storage_context) # ✅ Preferred file-based entry point def get_or_build_index(data_dir: str) -> VectorStoreIndex: try: return load_news_index() except Exception: return build_news_index(data_dir)