File size: 1,861 Bytes
6d24925
 
 
 
 
 
989b675
6d24925
 
989b675
6d24925
 
989b675
6d24925
 
 
 
 
 
989b675
6d24925
 
989b675
6d24925
989b675
 
 
6d24925
 
 
 
 
989b675
6d24925
 
 
 
 
989b675
6d24925
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
from llama_index.vector_stores.upstash import UpstashVectorStore
from llama_index.core.storage.storage_context import StorageContext
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, load_index_from_storage
from llama_index.core.node_parser import SimpleNodeParser
from llama_index.core.settings import Settings
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core.schema import Document
import os

# βœ… Setup embedding
Settings.embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")

# βœ… Upstash vector store config
def get_upstash_vector_store():
    return UpstashVectorStore(
        url=os.environ["UPSTASH_VECTOR_REST_URL"],
        token=os.environ["UPSTASH_VECTOR_REST_TOKEN"],
    )

# βœ… File-based ingestion
def build_news_index(data_dir: str) -> VectorStoreIndex:
    documents = SimpleDirectoryReader(data_dir).load_data()
    return get_or_build_index_from_docs(documents)

# βœ… Direct document ingestion
def get_or_build_index_from_docs(documents: list[Document]) -> VectorStoreIndex:
    nodes = SimpleNodeParser.from_defaults().get_nodes_from_documents(documents)
    vector_store = get_upstash_vector_store()
    storage_context = StorageContext.from_defaults(vector_store=vector_store)
    index = VectorStoreIndex(nodes, storage_context=storage_context)
    return index

# βœ… Load existing index (if no changes in docs)
def load_news_index() -> VectorStoreIndex:
    vector_store = get_upstash_vector_store()
    storage_context = StorageContext.from_defaults(vector_store=vector_store)
    return load_index_from_storage(storage_context)

# βœ… Preferred file-based entry point
def get_or_build_index(data_dir: str) -> VectorStoreIndex:
    try:
        return load_news_index()
    except Exception:
        return build_news_index(data_dir)