philosophy_aristotle / philosophy.py
alx-d's picture
Update philosophy.py
a5abbb0 verified
import transformers # Added since we use transformers.pipeline below
from transformers import AutoModelForCausalLM, AutoTokenizer
from llama_index.llm_predictor import LLMPredictor # Updated import path for LLMPredictor
from llama_index import (
PromptHelper,
StorageContext,
ServiceContext,
load_index_from_storage,
SimpleDirectoryReader,
GPTVectorStoreIndex
)
from langchain.llms import HuggingFacePipeline
import torch
import gradio as gr
from ratelimit import limits, sleep_and_retry
import sys
import os
# Configure device
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
def create_llm_pipeline():
# Load model and tokenizer
model = AutoModelForCausalLM.from_pretrained(
"deepseek-ai/DeepSeek-R1",
trust_remote_code=True,
torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/DeepSeek-R1")
# Create pipeline
pipeline = transformers.pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
device=0 if DEVICE == "cuda" else -1, # Use device index: 0 for GPU, -1 for CPU
max_length=2048,
do_sample=True,
temperature=0.7,
top_p=0.95,
)
return HuggingFacePipeline(pipeline=pipeline)
# Define the rate limit for processing
RATE_LIMIT = 3
@sleep_and_retry
@limits(calls=RATE_LIMIT, period=1)
def create_service_context():
# Constraint parameters
max_input_size = 4096
num_outputs = 2048 # Adjusted for DeepSeek model
max_chunk_overlap = 15
chunk_size_limit = 600
# Create prompt helper
prompt_helper = PromptHelper(
max_input_size,
num_outputs,
chunk_overlap_ratio=0.1,
chunk_size_limit=chunk_size_limit
)
# Create LLM predictor with DeepSeek model
llm = create_llm_pipeline()
llm_predictor = LLMPredictor(llm=llm)
# Create service context
service_context = ServiceContext.from_defaults(
llm_predictor=llm_predictor,
prompt_helper=prompt_helper
)
return service_context
@sleep_and_retry
@limits(calls=RATE_LIMIT, period=1)
def data_ingestion_indexing(directory_path):
# Load documents
documents = SimpleDirectoryReader(directory_path).load_data()
# Create index
index = GPTVectorStoreIndex.from_documents(
documents,
service_context=create_service_context()
)
# Persist index
index.storage_context.persist()
return index
def data_querying(input_text):
# Load stored index
storage_context = StorageContext.from_defaults(persist_dir="./storage")
index = load_index_from_storage(
storage_context,
service_context=create_service_context()
)
# Query the index
response = index.as_query_engine().query(input_text)
return response.response
# Create Gradio interface
iface = gr.Interface(
fn=data_querying,
inputs=gr.components.Textbox(
lines=20,
label="Enter your question"
),
outputs=gr.components.Textbox(
lines=25,
label="Response",
style="height: 400px; overflow-y: scroll;"
),
title="Philosophy QA - Aristotle Complete Works (Using DeepSeek-R1)"
)
# Initialize the system
if __name__ == "__main__":
# Create initial index
index = data_ingestion_indexing("books")
# Launch the interface
iface.launch()