decodingdatascience's picture
Create app2.py
ca00d5c verified
import os
import logging
import sys
import gradio as gr
from pinecone import Pinecone
from llama_index.core import VectorStoreIndex, Settings, StorageContext
from llama_index.vector_stores.pinecone import PineconeVectorStore
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
# --- Logging ---
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
# --- Secrets from Hugging Face Spaces ---
PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY")
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
if not PINECONE_API_KEY:
raise ValueError("Missing PINECONE_API_KEY in Hugging Face Space secrets.")
if not OPENAI_API_KEY:
raise ValueError("Missing OPENAI_API_KEY in Hugging Face Space secrets.")
# --- LlamaIndex Settings ---
Settings.llm = OpenAI(
model="gpt-4o-mini",
temperature=0.2,
api_key=OPENAI_API_KEY
)
Settings.embed_model = OpenAIEmbedding(
model="text-embedding-ada-002",
api_key=OPENAI_API_KEY
)
Settings.chunk_size = 600
Settings.chunk_overlap = 200
# --- System Prompt ---
system_prompt = '''
You are Ayesha, the Decoding Data Science (DDS) Enterprise HR Chatbot. Your objective is to interact politely and professionally with employees, answering only HR-related questions. Use only information directly from the connected HR documents to provide your answers. Always provide an explicit citation indicating the document source for every answer. Do not offer information or suggestions beyond what is present in these documents.
If the requested information cannot be found in the connected documents, politely instruct the user to email connect@decodingdatascience.com for further assistance. For questions outside of HR, inform the user that you can only answer HR-related questions. If a question is unclear or possibly HR-related but ambiguous, ask the user to rephrase. Never attempt to answer non-HR, personal, or unrelated questions.
- Remain polite and professional in all interactions.
- Respond exclusively to HR-related topics using only the connected HR documents.
- Always include an explicit citation to the relevant document(s) for every answer.
- If a question is off-topic, state that you can only address HR questions.
- If you are unsure whether the question is HR-related, politely ask for clarification or rephrasing.
- For HR-related questions where the answer is not in the connected documents, kindly direct the user to email connect@decodingdatascience.com.
- Use a friendly and formal tone.
Respond in short, clear paragraphs (2-4 sentences). Do not use markdown or code blocks. Every answer must include the citation showing which document(s) the information is sourced from.
'''
# --- Connect to existing Pinecone index ---
index_name = "quickstart"
pc = Pinecone(api_key=PINECONE_API_KEY)
pinecone_index = pc.Index(index_name)
# --- Connect LlamaIndex to existing Pinecone vector store ---
vector_store = PineconeVectorStore(pinecone_index=pinecone_index)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_vector_store(
vector_store=vector_store,
storage_context=storage_context
)
# --- Query Engine ---
query_engine = index.as_query_engine(
system_prompt=system_prompt,
similarity_top_k=2
)
# --- Gradio App ---
def query_doc(prompt):
try:
if not prompt or not prompt.strip():
return "Please enter an HR-related question."
response = query_engine.query(prompt)
return str(response)
except Exception as e:
return f"Error: {str(e)}"
demo = gr.Interface(
fn=query_doc,
inputs=gr.Textbox(label="Ask a question about the document"),
outputs=gr.Textbox(label="Answer"),
title="DDS Enterprise Chatbot",
description="Ask HR questions based on the indexed HR documents. Powered by LlamaIndex & Pinecone."
)
if __name__ == "__main__":
demo.launch()