File size: 1,861 Bytes
6d24925 989b675 6d24925 989b675 6d24925 989b675 6d24925 989b675 6d24925 989b675 6d24925 989b675 6d24925 989b675 6d24925 989b675 6d24925 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
from llama_index.vector_stores.upstash import UpstashVectorStore
from llama_index.core.storage.storage_context import StorageContext
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, load_index_from_storage
from llama_index.core.node_parser import SimpleNodeParser
from llama_index.core.settings import Settings
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core.schema import Document
import os
# β
Setup embedding
Settings.embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")
# β
Upstash vector store config
def get_upstash_vector_store():
return UpstashVectorStore(
url=os.environ["UPSTASH_VECTOR_REST_URL"],
token=os.environ["UPSTASH_VECTOR_REST_TOKEN"],
)
# β
File-based ingestion
def build_news_index(data_dir: str) -> VectorStoreIndex:
documents = SimpleDirectoryReader(data_dir).load_data()
return get_or_build_index_from_docs(documents)
# β
Direct document ingestion
def get_or_build_index_from_docs(documents: list[Document]) -> VectorStoreIndex:
nodes = SimpleNodeParser.from_defaults().get_nodes_from_documents(documents)
vector_store = get_upstash_vector_store()
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex(nodes, storage_context=storage_context)
return index
# β
Load existing index (if no changes in docs)
def load_news_index() -> VectorStoreIndex:
vector_store = get_upstash_vector_store()
storage_context = StorageContext.from_defaults(vector_store=vector_store)
return load_index_from_storage(storage_context)
# β
Preferred file-based entry point
def get_or_build_index(data_dir: str) -> VectorStoreIndex:
try:
return load_news_index()
except Exception:
return build_news_index(data_dir)
|