import gradio as gr from huggingface_hub import InferenceClient import time client = InferenceClient("lambdaindie/lambdai") css = """ @import url('https://fonts.googleapis.com/css2?family=JetBrains+Mono&display=swap'); body { background-color: #111; color: #e0e0e0; font-family: 'JetBrains Mono', monospace; margin: 0; padding: 0; width: 100vw; overflow-x: hidden; } .gradio-container, .gr-block.gr-box { width: 100vw !important; max-width: 100vw !important; box-sizing: border-box; } .gr-button { background: linear-gradient(to right, #2a2a2a, #1f1f1f); color: white; border-radius: 10px; padding: 8px 16px; font-weight: bold; font-family: 'JetBrains Mono', monospace; } .gr-button:hover { background: #333; } .gr-textbox textarea { background-color: #181818 !important; color: #fff !important; font-family: 'JetBrains Mono', monospace !important; border-radius: 8px; } .gr-chat-message, .gr-chatbot { font-family: 'JetBrains Mono', monospace !important; width: 100% !important; max-width: 100% !important; } .markdown-think { background-color: #1e1e1e; border-left: 4px solid #555; padding: 10px; margin-bottom: 8px; font-style: italic; white-space: pre-wrap; font-family: 'JetBrains Mono', monospace; animation: pulse 1.5s infinite ease-in-out; } @keyframes pulse { 0% { opacity: 0.6; } 50% { opacity: 1.0; } 100% { opacity: 0.6; } } """ def respond(message, history, system_message, max_tokens, temperature, top_p): messages = [{"role": "system", "content": system_message}] if system_message else [] for user, assistant in history: if user: messages.append({"role": "user", "content": user}) if assistant: messages.append({"role": "assistant", "content": assistant}) thinking_prompt = messages + [ { "role": "user", "content": f"{message}\n\nThink step-by-step before answering." } ] reasoning = "" yield '
Thinking...
' for chunk in client.chat_completion( thinking_prompt, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p, ): token = chunk.choices[0].delta.content or "" reasoning += token styled_thought = f'
{reasoning.strip()}
' yield styled_thought time.sleep(0.5) final_prompt = messages + [ {"role": "user", "content": message}, {"role": "assistant", "content": reasoning.strip()}, {"role": "user", "content": "Now answer based on your reasoning above."} ] final_answer = "" for chunk in client.chat_completion( final_prompt, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p, ): token = chunk.choices[0].delta.content or "" final_answer += token yield final_answer.strip() demo = gr.ChatInterface( fn=respond, title="λmabdAI", # Título visível agora theme=gr.themes.Base(primary_hue="gray"), css=css, additional_inputs=[ gr.Textbox( value="You are a concise, logical AI that explains its reasoning clearly before answering.", label="System Message" ), gr.Slider(64, 2048, value=512, step=1, label="Max Tokens"), gr.Slider(0.1, 2.0, value=0.7, step=0.1, label="Temperature"), gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p") ] ) if __name__ == "__main__": demo.launch()