Spaces:

nuseAI
/

fastAPIv2

Running

App Files Files Community

fastAPIv2 / components /indexers /news_indexer.py

ragV98

changes v1

989b675 28 days ago

raw

history blame contribute delete

1.86 kB

	from llama_index.vector_stores.upstash import UpstashVectorStore
	from llama_index.core.storage.storage_context import StorageContext
	from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, load_index_from_storage
	from llama_index.core.node_parser import SimpleNodeParser
	from llama_index.core.settings import Settings
	from llama_index.embeddings.huggingface import HuggingFaceEmbedding
	from llama_index.core.schema import Document
	import os

	# ✅ Setup embedding
	Settings.embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")

	# ✅ Upstash vector store config
	def get_upstash_vector_store():
	return UpstashVectorStore(
	url=os.environ["UPSTASH_VECTOR_REST_URL"],
	token=os.environ["UPSTASH_VECTOR_REST_TOKEN"],
	)

	# ✅ File-based ingestion
	def build_news_index(data_dir: str) -> VectorStoreIndex:
	documents = SimpleDirectoryReader(data_dir).load_data()
	return get_or_build_index_from_docs(documents)

	# ✅ Direct document ingestion
	def get_or_build_index_from_docs(documents: list[Document]) -> VectorStoreIndex:
	nodes = SimpleNodeParser.from_defaults().get_nodes_from_documents(documents)
	vector_store = get_upstash_vector_store()
	storage_context = StorageContext.from_defaults(vector_store=vector_store)
	index = VectorStoreIndex(nodes, storage_context=storage_context)
	return index

	# ✅ Load existing index (if no changes in docs)
	def load_news_index() -> VectorStoreIndex:
	vector_store = get_upstash_vector_store()
	storage_context = StorageContext.from_defaults(vector_store=vector_store)
	return load_index_from_storage(storage_context)

	# ✅ Preferred file-based entry point
	def get_or_build_index(data_dir: str) -> VectorStoreIndex:
	try:
	return load_news_index()
	except Exception:
	return build_news_index(data_dir)