import os import requests import gradio as gr # === Model Settings === MODEL = "tiiuae/falcon-180B-chat" # or a smaller model for testing API_URL = f"https://api-inference.huggingface.co/models/{MODEL}" HEADERS = {"Authorization": f"Bearer {os.environ.get('HF_TOKEN', '')}"} def query_hf(payload: dict): try: resp = requests.post(API_URL, headers=HEADERS, json=payload, timeout=120) return resp.json() except Exception as e: return {"error": str(e)} def chat_fn(user_message, history): # Prepare prompt payload = { "inputs": user_message, "parameters": {"max_new_tokens": 512, "temperature": 0.7} } output = query_hf(payload) if isinstance(output, list) and len(output) > 0 and "generated_text" in output[0]: bot_message = output[0]["generated_text"] elif isinstance(output, dict) and "generated_text" in output: bot_message = output["generated_text"] else: bot_message = str(output) history.append((user_message, bot_message)) return history, "" with gr.Blocks(css=""" #chatbot {height: 70vh; overflow: auto;} .gradio-container {max-width: 800px; margin: auto !important;} footer, .built-with, .logo, .svelte-1ipelgc {display: none !important;} """) as demo: gr.Markdown("## 🤖 Falcon Chatbot") with gr.Row(): chatbot = gr.Chatbot(elem_id="chatbot", bubble_full_width=False, show_copy_button=True) with gr.Row(): msg = gr.Textbox(placeholder="Type your message here...", scale=9) send = gr.Button("Send", scale=1) state = gr.State([]) send.click(chat_fn, [msg, state], [chatbot, msg]) msg.submit(chat_fn, [msg, state], [chatbot, msg]) demo.queue().launch()