|
from llama_index.vector_stores.upstash import UpstashVectorStore |
|
from llama_index.core.storage.storage_context import StorageContext |
|
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, load_index_from_storage |
|
from llama_index.core.node_parser import SimpleNodeParser |
|
from llama_index.core.settings import Settings |
|
from llama_index.embeddings.huggingface import HuggingFaceEmbedding |
|
from llama_index.core.schema import Document |
|
import os |
|
|
|
|
|
Settings.embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2") |
|
|
|
|
|
def get_upstash_vector_store(): |
|
return UpstashVectorStore( |
|
url=os.environ["UPSTASH_VECTOR_REST_URL"], |
|
token=os.environ["UPSTASH_VECTOR_REST_TOKEN"], |
|
) |
|
|
|
|
|
def build_news_index(data_dir: str) -> VectorStoreIndex: |
|
documents = SimpleDirectoryReader(data_dir).load_data() |
|
return get_or_build_index_from_docs(documents) |
|
|
|
|
|
def get_or_build_index_from_docs(documents: list[Document]) -> VectorStoreIndex: |
|
nodes = SimpleNodeParser.from_defaults().get_nodes_from_documents(documents) |
|
vector_store = get_upstash_vector_store() |
|
storage_context = StorageContext.from_defaults(vector_store=vector_store) |
|
index = VectorStoreIndex(nodes, storage_context=storage_context) |
|
return index |
|
|
|
|
|
def load_news_index() -> VectorStoreIndex: |
|
vector_store = get_upstash_vector_store() |
|
storage_context = StorageContext.from_defaults(vector_store=vector_store) |
|
return load_index_from_storage(storage_context) |
|
|
|
|
|
def get_or_build_index(data_dir: str) -> VectorStoreIndex: |
|
try: |
|
return load_news_index() |
|
except Exception: |
|
return build_news_index(data_dir) |
|
|