Spaces:
Runtime error
Runtime error
import os | |
import nest_asyncio | |
import gradio as gr | |
from dotenv import load_dotenv | |
from IPython.display import Markdown, display | |
from llama_index.core import Settings | |
from llama_index.core import VectorStoreIndex, ServiceContext, SimpleDirectoryReader | |
from llama_index.llms.cohere import Cohere | |
from llama_index.embeddings.cohere import CohereEmbedding | |
from llama_index.postprocessor.cohere_rerank import CohereRerank | |
# allows nested access to the event loop | |
nest_asyncio.apply() | |
# put your API key here, find one at: https://dashboard.cohere.com/api-keys | |
API_KEY = 'ziEpsRreaJzBi5HUDap7gMecJWXX69O26Hf71Kxo' | |
# setup llm & embedding model | |
llm = Cohere(api_key=API_KEY, model="command-r-plus") | |
embed_model = CohereEmbedding( | |
cohere_api_key=API_KEY, | |
model_name="embed-english-v3.0", | |
input_type="search_query", | |
) | |
# Function to load data from uploaded PDF | |
def process_pdfs(pdf_files): | |
# Create a temporary directory to store the uploaded PDFs | |
temp_dir = 'temp_pdf_directory' | |
os.makedirs(temp_dir, exist_ok=True) | |
# Save uploaded files to the temporary directory | |
for file in pdf_files: | |
file_path = os.path.join(temp_dir, file.name) | |
with open(file_path, 'wb') as f: | |
f.write(file.read()) | |
# Load data from the temporary directory | |
loader = SimpleDirectoryReader( | |
input_dir=temp_dir, | |
required_exts=[".pdf"], | |
recursive=True | |
) | |
docs = loader.load_data() | |
# Create an index over loaded data | |
Settings.embed_model = embed_model | |
index = VectorStoreIndex.from_documents(docs, show_progress=True) | |
# Create a cohere reranker | |
cohere_rerank = CohereRerank(api_key=API_KEY) | |
# Create the query engine, where we use a cohere reranker on the fetched nodes | |
Settings.llm = llm | |
query_engine = index.as_query_engine(node_postprocessors=[cohere_rerank]) | |
return index, query_engine | |
# Query function | |
def query_pdfs(pdf_files, question): | |
index, query_engine = process_pdfs(pdf_files) | |
response = query_engine.query(question) | |
return str(response) | |
# Create Gradio interface | |
iface = gr.Interface( | |
fn=query_pdfs, | |
inputs=[ | |
gr.inputs.File(label="Upload PDF Files", type="file", multiple=True), | |
gr.inputs.Textbox(label="Ask a Question", placeholder="Enter your question here...") | |
], | |
outputs="text", | |
title="PDF Query System", | |
description="Upload PDF files and ask questions to extract information from them." | |
) | |
if __name__ == "__main__": | |
iface.launch() | |