import os
import requests
import gradio as gr

# === Model Settings ===
MODEL = "tiiuae/falcon-180B-chat"   # or a smaller model for testing
API_URL = f"https://api-inference.huggingface.co/models/{MODEL}"
HEADERS = {"Authorization": f"Bearer {os.environ.get('HF_TOKEN', '')}"}

def query_hf(payload: dict):
    try:
        resp = requests.post(API_URL, headers=HEADERS, json=payload, timeout=120)
        return resp.json()
    except Exception as e:
        return {"error": str(e)}

def chat_fn(user_message, history):
    # Prepare prompt
    payload = {
        "inputs": user_message,
        "parameters": {"max_new_tokens": 512, "temperature": 0.7}
    }
    output = query_hf(payload)

    if isinstance(output, list) and len(output) > 0 and "generated_text" in output[0]:
        bot_message = output[0]["generated_text"]
    elif isinstance(output, dict) and "generated_text" in output:
        bot_message = output["generated_text"]
    else:
        bot_message = str(output)

    history.append((user_message, bot_message))
    return history, ""

with gr.Blocks(css="""
#chatbot {height: 70vh; overflow: auto;}
.gradio-container {max-width: 800px; margin: auto !important;}
footer, .built-with, .logo, .svelte-1ipelgc {display: none !important;}
""") as demo:

    gr.Markdown("## 🤖 Falcon Chatbot")
    with gr.Row():
        chatbot = gr.Chatbot(elem_id="chatbot", bubble_full_width=False, show_copy_button=True)
    with gr.Row():
        msg = gr.Textbox(placeholder="Type your message here...", scale=9)
        send = gr.Button("Send", scale=1)

    state = gr.State([])

    send.click(chat_fn, [msg, state], [chatbot, msg])
    msg.submit(chat_fn, [msg, state], [chatbot, msg])

demo.queue().launch()