import os
import shutil

import gradio as gr
from smolagents import ChatMessageToolCall, ActionStep, FinalAnswerStep

import utils
from agent import VideoChatbot
from configs import settings


bot = VideoChatbot(
    model=settings.CHATBOT_MODEL,
    api_base=settings.MODEL_BASE_API,
    api_key=os.environ['GEMINI_API_KEY']
)


def chat(message: dict, history: list[dict]):

    # move the file to the data directory
    message['files'] = [shutil.copy(file, settings.DATA_DIR) for file in message['files']]

    # add the input message to the history
    history.extend([{'role': 'user', 'content': {'path': file}} for file in message['files']])
    history.append({'role': 'user', 'content': message['text']})
    yield history, ''

    for step in bot.chat(message['text'], message['files']):
        match step:
            case ChatMessageToolCall():
                if step.function.name == 'download_video':
                    history.append({
                        'role': 'assistant',
                        'content': f'📥 Downloading video from {step.function.arguments["url"]}'
                    })
                elif step.function.name == 'index_video':
                    video_path = os.path.join(settings.DATA_DIR, step.function.arguments['filename'])
                    video_duration = utils.seconds_to_hms(int(utils.get_media_duration(video_path)))
                    history.append({
                        'role': 'assistant',
                        'content': f'🎥 Indexing video `{step.function.arguments["filename"]}` with length `{video_duration}` '
                                   f'to the knowledge base. This may take a while...'
                    })
                elif step.function.name == 'search_video_segments':
                    filename = os.path.basename(bot.video_rag.videos[step.function.arguments["video_id"]]['video_path'])
                    history.append({
                        'role': 'assistant',
                        'content': f'🔍 Searching video segments in `{filename}` '
                                   f'for query: *{step.function.arguments.get("text_query", step.function.arguments.get("image_query", ""))}*'
                    })
                elif step.function.name == 'read_video_segment':
                    filename = os.path.basename(bot.video_rag.videos[step.function.arguments["video_id"]]['video_path'])
                    history.append({
                        'role': 'assistant',
                        'content': f'📖 Reading video segment `{filename}` '
                                   f'from `{step.function.arguments["start"]}` to `{step.function.arguments["end"]}`'
                    })
                elif step.function.name == 'final_answer':
                    continue
                yield history, ''
            case ActionStep():
                yield history, ''
            case FinalAnswerStep():
                history.append({'role': 'assistant', 'content': step.output})
                yield history, ''


def clear_chat(chatbot):
    chatbot.clear()
    return chatbot, gr.update(value='')


def main():
    with gr.Blocks() as demo:
        gr.Markdown('# Video Chatbot Demo')
        gr.Markdown('This demo showcases a video chatbot that can process and search videos using '
                    'RAG (Retrieval-Augmented Generation). You can upload videos/images or link to YouTube videos, '
                    'ask questions, and get answers based on the video content.')
        chatbot = gr.Chatbot(type='messages', label='Video Chatbot', height=800, resizable=True)
        textbox = gr.MultimodalTextbox(
            sources=['upload'],
            file_types=['image', '.mp4'],
            show_label=False,
            placeholder='Type a message or upload an image/video...',

        )
        textbox.submit(chat, [textbox, chatbot], [chatbot, textbox])
        clear = gr.Button('Clear Chat')
        clear.click(clear_chat, [chatbot], [chatbot, textbox])

    demo.launch(debug=True)

if __name__ == '__main__':
    main()