File size: 3,315 Bytes
0848f94 415f853 0848f94 415f853 b74efa4 415f853 b74efa4 415f853 0848f94 415f853 0848f94 415f853 0848f94 415f853 0848f94 415f853 0848f94 415f853 0848f94 415f853 0848f94 415f853 0848f94 415f853 0848f94 415f853 0848f94 415f853 0848f94 5de36cd 0848f94 415f853 5de36cd 415f853 0848f94 415f853 0848f94 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 |
import os
import gradio as gr
from smolagents import CodeAgent, tool, LiteLLMModel
from pypdf import PdfReader
from dotenv import load_dotenv
# Load environment variables (for local dev)
load_dotenv()
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
if not GEMINI_API_KEY:
raise ValueError("GEMINI_API_KEY not set. Please set it in your .env or Hugging Face Space secrets.")
# --------- TOOLS ---------
@tool
def process_pdf(file_path: str) -> str:
"""
Extract text from a PDF file.
Args:
file_path (str): Path to the PDF file.
Returns:
str: Extracted text from the PDF.
"""
try:
reader = PdfReader(file_path)
return "\n".join([page.extract_text() or "" for page in reader.pages])
except Exception as e:
return f"PDF extraction failed: {str(e)}"
@tool
def chat_with_pdf(query: str, pdf_text: str) -> str:
"""
Answer questions about the PDF content.
Args:
query (str): The user's question.
pdf_text (str): Text extracted from the PDF.
Returns:
str: The answer to the question based on the PDF content.
"""
# The actual logic is handled by the agent's LLM
pass
# --------- AGENT SETUP ---------
model = LiteLLMModel(
model_id="gemini/gemini-1.5-flash", # Use "gemini/gemini-1.5-pro" if you have access
api_key=GEMINI_API_KEY
# Do NOT set api_base for Gemini AI Studio keys!
)
agent = CodeAgent(
tools=[process_pdf, chat_with_pdf],
model=model
)
# --------- GRADIO INTERFACE ---------
def process_pdf_ui(file):
if not file:
return ""
return process_pdf(file.name)
def chat_ui(message, history, pdf_text):
if not pdf_text:
return [{"role": "assistant", "content": "Please upload a PDF first."}]
# Compose a prompt for the agent
prompt = f"PDF Content:\n{pdf_text}\n\nUser Question: {message}"
try:
response = agent.run(prompt)
# Return response in OpenAI-style message format for Gradio
history = history or []
history.append({"role": "user", "content": message})
history.append({"role": "assistant", "content": response})
return history
except Exception as e:
history = history or []
history.append({"role": "assistant", "content": f"Error: {str(e)}"})
return history
with gr.Blocks() as demo:
gr.Markdown("# 📄 Chat with your PDF")
with gr.Row():
with gr.Column(scale=1):
pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
pdf_text = gr.Textbox(visible=False)
with gr.Column(scale=3):
chatbot = gr.Chatbot(label="PDF Chat", height=400, type="messages")
msg = gr.Textbox(label="Ask a question about the PDF", placeholder="Type your question and hit Enter...")
gr.Markdown("**Note:** This app uses the Gemini AI model to process your PDF and answer questions. Make sure to upload a PDF first and add your Google API in secrets. You can get it from [Gemini AI Studio](https://aistudio.google.com/apikey).")
pdf_input.upload(
fn=process_pdf_ui,
inputs=pdf_input,
outputs=pdf_text
)
msg.submit(
fn=chat_ui,
inputs=[msg, chatbot, pdf_text],
outputs=chatbot
)
if __name__ == "__main__":
demo.launch()
|