Spaces:
Runtime error
Runtime error
import transformers # Added since we use transformers.pipeline below | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
from llama_index.llm_predictor import LLMPredictor # Updated import path for LLMPredictor | |
from llama_index import ( | |
PromptHelper, | |
StorageContext, | |
ServiceContext, | |
load_index_from_storage, | |
SimpleDirectoryReader, | |
GPTVectorStoreIndex | |
) | |
from langchain.llms import HuggingFacePipeline | |
import torch | |
import gradio as gr | |
from ratelimit import limits, sleep_and_retry | |
import sys | |
import os | |
# Configure device | |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu" | |
def create_llm_pipeline(): | |
# Load model and tokenizer | |
model = AutoModelForCausalLM.from_pretrained( | |
"deepseek-ai/DeepSeek-R1", | |
trust_remote_code=True, | |
torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32, | |
device_map="auto" | |
) | |
tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/DeepSeek-R1") | |
# Create pipeline | |
pipeline = transformers.pipeline( | |
"text-generation", | |
model=model, | |
tokenizer=tokenizer, | |
device=0 if DEVICE == "cuda" else -1, # Use device index: 0 for GPU, -1 for CPU | |
max_length=2048, | |
do_sample=True, | |
temperature=0.7, | |
top_p=0.95, | |
) | |
return HuggingFacePipeline(pipeline=pipeline) | |
# Define the rate limit for processing | |
RATE_LIMIT = 3 | |
def create_service_context(): | |
# Constraint parameters | |
max_input_size = 4096 | |
num_outputs = 2048 # Adjusted for DeepSeek model | |
max_chunk_overlap = 15 | |
chunk_size_limit = 600 | |
# Create prompt helper | |
prompt_helper = PromptHelper( | |
max_input_size, | |
num_outputs, | |
chunk_overlap_ratio=0.1, | |
chunk_size_limit=chunk_size_limit | |
) | |
# Create LLM predictor with DeepSeek model | |
llm = create_llm_pipeline() | |
llm_predictor = LLMPredictor(llm=llm) | |
# Create service context | |
service_context = ServiceContext.from_defaults( | |
llm_predictor=llm_predictor, | |
prompt_helper=prompt_helper | |
) | |
return service_context | |
def data_ingestion_indexing(directory_path): | |
# Load documents | |
documents = SimpleDirectoryReader(directory_path).load_data() | |
# Create index | |
index = GPTVectorStoreIndex.from_documents( | |
documents, | |
service_context=create_service_context() | |
) | |
# Persist index | |
index.storage_context.persist() | |
return index | |
def data_querying(input_text): | |
# Load stored index | |
storage_context = StorageContext.from_defaults(persist_dir="./storage") | |
index = load_index_from_storage( | |
storage_context, | |
service_context=create_service_context() | |
) | |
# Query the index | |
response = index.as_query_engine().query(input_text) | |
return response.response | |
# Create Gradio interface | |
iface = gr.Interface( | |
fn=data_querying, | |
inputs=gr.components.Textbox( | |
lines=20, | |
label="Enter your question" | |
), | |
outputs=gr.components.Textbox( | |
lines=25, | |
label="Response", | |
style="height: 400px; overflow-y: scroll;" | |
), | |
title="Philosophy QA - Aristotle Complete Works (Using DeepSeek-R1)" | |
) | |
# Initialize the system | |
if __name__ == "__main__": | |
# Create initial index | |
index = data_ingestion_indexing("books") | |
# Launch the interface | |
iface.launch() | |