from llama_index import SimpleDirectoryReader, LLMPredictor, PromptHelper, StorageContext, ServiceContext, GPTVectorStoreIndex, load_index_from_storage
from langchain.chat_models import ChatOpenAI
import gradio as gr
import sys
import os
import openai
from ratelimit import limits, sleep_and_retry

# Set the OpenAI API key
os.environ["OPENAI_API_KEY"] = os.environ.get("openai_key")
openai.api_key = os.environ["OPENAI_API_KEY"]

# Define the rate limit for API calls (requests per second)
RATE_LIMIT = 3

# Implement the rate limiting decorator
#@sleep_and_retry
#@limits(calls=RATE_LIMIT, period=1)
def create_service_context():

    # Constraint parameters ORIGINAL
#    max_input_size = 4096
#    num_outputs = 512
#    max_chunk_overlap = 20
#    chunk_size_limit = 600

    # Allows the user to explicitly set certain constraint parameters
#    prompt_helper = PromptHelper(max_input_size, num_outputs, max_chunk_overlap, chunk_size_limit=chunk_size_limit)
    max_input_size = 4096
    num_outputs = 512
    max_chunk_overlap = 20
    chunk_size_limit = 600 
    prompt_helper = PromptHelper(max_input_size, num_outputs, chunk_overlap_ratio= 0.1, chunk_size_limit=chunk_size_limit)
#    llm_predictor = LLMPredictor(llm=ChatOpenAI(temperature=0.7, model_name="gpt-4", max_tokens=num_outputs))

    
    # LLMPredictor is a wrapper class around LangChain's LLMChain that allows easy integration into LlamaIndex
    llm_predictor = LLMPredictor(llm=ChatOpenAI(temperature=0.5, model_name="gpt-3.5-turbo", max_tokens=num_outputs))

    # Constructs service_context
    service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper)
    return service_context

# Implement the rate limiting decorator
#@sleep_and_retry
#@limits(calls=RATE_LIMIT, period=1)
def data_ingestion_indexing(directory_path):

    # Loads data from the specified directory path
    documents = SimpleDirectoryReader(directory_path).load_data()
    
    # When first building the index
    index = GPTVectorStoreIndex.from_documents(
        documents, service_context=create_service_context()
    )

    # Persist index to disk, default "storage" folder
    index.storage_context.persist()

    return index

def data_querying(input_text):

    # Rebuild storage context
    storage_context = StorageContext.from_defaults(persist_dir="./storage")

    # Loads index from storage
    index = load_index_from_storage(storage_context, service_context=create_service_context())
    
    # Queries the index with the input text
    response = index.as_query_engine().query(input_text)
    
    return response.response

with gr.Blocks() as demo:
    chatbot = gr.Chatbot()
    msg = gr.Textbox()

    def respond(message, chat_history):
        original_message = message
        chat_history_strings = [str(msg) for msg in chat_history]  # Ensure that chat_history only contains strings
        message = "As a therapy chatbot designed specifically to assist teenagers and young adults \
please provide a thorough and detailed response by explaining your capabilities, \
features, and methods for helping individuals in this age group. \
In order to receive the most precise, comprehensive, and high-quality response, \
please provide your answer while keeping in mind the following guidelines: \n1. \
Make sure your response is prompt, without unnecessary delays.\n2. \
Aim for perfection by providing accurate and well-thought-out information. \n3. \
Maintain a supportive and social tone, similar to that of a human conversation. \
Keep all responses at a maximum of 30 words when users tell you how they feel and give them advice.\n4. \
Never say \"Based on the given context information\" or \"as a therapy chatbot.\" \
The patient asks you the following question: " + message + "\n \
Previous questions from chat history: " + ' '.join(chat_history_strings)
        bot_message = data_querying(message) + ' '.join(chat_history_strings)
        chat_history.append((original_message, bot_message))
        
        return "", chat_history

    msg.submit(respond, [msg, chatbot], [msg, chatbot])

# Passes in data directory
index = data_ingestion_indexing("therapy2")

# Launch the Gradio app
demo.launch()