import gradio as gr
import uuid
import os
import speech_recognition as sr
from gtts import gTTS
from langchain_community.llms import Ollama
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory

# Initialize the model and prompt template
chat = Ollama(model="llama3:latest")

prompt = ChatPromptTemplate.from_messages([
    ("system", """
    You are a helpful AI assistant. Your task is to engage in conversation with users,
    answer their questions, and assist them with various tasks.
    Communicate politely and maintain focus on the user's needs.
    Keep responses concise, typically two to three sentences.
    """),
    MessagesPlaceholder(variable_name="history"),
    ("human", "{input}"),
])

runnable = prompt | chat

with_message_history = RunnableWithMessageHistory(
    runnable,
    lambda session_id: ChatMessageHistory(),
    input_messages_key="input",
    history_messages_key="history",
)

def text_to_speech(text, file_name):
    tts = gTTS(text=text, lang='en', slow=False)
    file_path = os.path.join(os.getcwd(), file_name)
    tts.save(file_path)
    return file_path

def speech_to_text(audio):
    if audio is None:
        return "No audio input received."
    
    recognizer = sr.Recognizer()
    try:
        with sr.AudioFile(audio) as source:
            audio_data = recognizer.record(source)
        try:
            text = recognizer.recognize_google(audio_data)
            print(text)
            return text
        except sr.UnknownValueError:
            return "Speech recognition could not understand the audio"
        except sr.RequestError:
            return "Could not request results from the speech recognition service"
    except Exception as e:
        return f"Error processing audio: {str(e)}"

def chat_function(input_type, text_input=None, audio_input=None, history=None):
    if history is None:
        history = []
    
    if input_type == "text":
        user_input = text_input
    elif input_type == "audio":
        if audio_input is not None:
            user_input = speech_to_text(audio_input)
        else:
            user_input = "No audio input received."
    else:
        return history, history, None

    print(f"User input: {user_input}")  # Debug information

    # Get LLM response
    response = with_message_history.invoke(
        {"input": user_input},
        config={"configurable": {"session_id": "chat_history"}},
    )

    # Generate audio for LLM response
    audio_file = f"response_{uuid.uuid4()}.mp3"
    audio_path = text_to_speech(response, audio_file)

    # Update history in the correct format
    history.append((user_input, response))
    
    return history, history, audio_path

# Gradio interface
with gr.Blocks() as demo:
    chatbot = gr.Chatbot()
    with gr.Row():
        text_input = gr.Textbox(placeholder="Type your message here...")
        audio_input = gr.Audio(sources=['microphone'], type="filepath")
    with gr.Row():
        text_button = gr.Button("Send Text")
        audio_button = gr.Button("Send Audio")
    audio_output = gr.Audio()

    def on_audio_change(audio):
        if audio is not None:
            return speech_to_text(audio)
        return ""

    audio_input.change(on_audio_change, inputs=[audio_input], outputs=[text_input])
    text_button.click(chat_function, inputs=[gr.Textbox(value="text"), text_input, audio_input, chatbot], outputs=[chatbot, chatbot, audio_output])
    audio_button.click(chat_function, inputs=[gr.Textbox(value="audio"), text_input, audio_input, chatbot], outputs=[chatbot, chatbot, audio_output])

demo.launch(server_name='0.0.0.0',share=True,max_threads=10)