from llama_index import SimpleDirectoryReader, LLMPredictor, PromptHelper, StorageContext, ServiceContext, GPTVectorStoreIndex, load_index_from_storage from langchain.chat_models import ChatOpenAI import gradio as gr import sys import os import openai from ratelimit import limits, sleep_and_retry # Set the OpenAI API key os.environ["OPENAI_API_KEY"] = os.environ.get("openai_key") openai.api_key = os.environ["OPENAI_API_KEY"] # Define the rate limit for API calls (requests per second) RATE_LIMIT = 3 # Implement the rate limiting decorator #@sleep_and_retry #@limits(calls=RATE_LIMIT, period=1) def create_service_context(): # Constraint parameters ORIGINAL # max_input_size = 4096 # num_outputs = 512 # max_chunk_overlap = 20 # chunk_size_limit = 600 # Allows the user to explicitly set certain constraint parameters # prompt_helper = PromptHelper(max_input_size, num_outputs, max_chunk_overlap, chunk_size_limit=chunk_size_limit) max_input_size = 4096 num_outputs = 512 max_chunk_overlap = 20 chunk_size_limit = 600 prompt_helper = PromptHelper(max_input_size, num_outputs, chunk_overlap_ratio= 0.1, chunk_size_limit=chunk_size_limit) # llm_predictor = LLMPredictor(llm=ChatOpenAI(temperature=0.7, model_name="gpt-4", max_tokens=num_outputs)) # LLMPredictor is a wrapper class around LangChain's LLMChain that allows easy integration into LlamaIndex llm_predictor = LLMPredictor(llm=ChatOpenAI(temperature=0.5, model_name="gpt-3.5-turbo", max_tokens=num_outputs)) # Constructs service_context service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper) return service_context # Implement the rate limiting decorator #@sleep_and_retry #@limits(calls=RATE_LIMIT, period=1) def data_ingestion_indexing(directory_path): # Loads data from the specified directory path documents = SimpleDirectoryReader(directory_path).load_data() # When first building the index index = GPTVectorStoreIndex.from_documents( documents, service_context=create_service_context() ) # Persist index to disk, default "storage" folder index.storage_context.persist() return index def data_querying(input_text): # Rebuild storage context storage_context = StorageContext.from_defaults(persist_dir="./storage") # Loads index from storage index = load_index_from_storage(storage_context, service_context=create_service_context()) # Queries the index with the input text response = index.as_query_engine().query(input_text) return response.response with gr.Blocks() as demo: chatbot = gr.Chatbot() msg = gr.Textbox() def respond(message, chat_history): original_message = message chat_history_strings = [str(msg) for msg in chat_history] # Ensure that chat_history only contains strings message = "As a therapy chatbot designed specifically to assist teenagers and young adults \ please provide a thorough and detailed response by explaining your capabilities, \ features, and methods for helping individuals in this age group. \ In order to receive the most precise, comprehensive, and high-quality response, \ please provide your answer while keeping in mind the following guidelines: \n1. \ Make sure your response is prompt, without unnecessary delays.\n2. \ Aim for perfection by providing accurate and well-thought-out information. \n3. \ Maintain a supportive and social tone, similar to that of a human conversation. \ Keep all responses at a maximum of 30 words when users tell you how they feel and give them advice.\n4. \ Never say \"Based on the given context information\" or \"as a therapy chatbot.\" \ The patient asks you the following question: " + message + "\n \ Previous questions from chat history: " + ' '.join(chat_history_strings) bot_message = data_querying(message) + ' '.join(chat_history_strings) chat_history.append((original_message, bot_message)) return "", chat_history msg.submit(respond, [msg, chatbot], [msg, chatbot]) # Passes in data directory index = data_ingestion_indexing("therapy2") # Launch the Gradio app demo.launch()