import torch from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline import gradio as gr import os model_name = "unsloth/Phi-4-mini-reasoning-unsloth-bnb-4bit" tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained( model_name, device_map="auto", torch_dtype=torch.float16, trust_remote_code=True ) pipe = pipeline( "text-generation", model=model, tokenizer=tokenizer, ) def chat_fn(prompt: str) -> str: if not prompt.strip(): return "Please enter a prompt." try: output = pipe( prompt, max_new_tokens=128, do_sample=False, pad_token_id=tokenizer.eos_token_id, return_full_text=False, ) generated_text = output[0]["generated_text"] final_result = generated_text[:500].encode("utf-8", "ignore").decode("utf-8") if not final_result.strip(): return "The model did not generate a response. Try a different prompt." return final_result except Exception as e: print(f"An error occurred during text generation: {e}") return f"[ERROR] An issue occurred while generating the response. Please try again or simplify your prompt. Details: {str(e)[:200]}" demo_interface = gr.Interface( fn=chat_fn, inputs=gr.Textbox(lines=5, placeholder="Enter your prompt here...", label="Your Prompt"), outputs=gr.Textbox(lines=10, label="Generated Response"), title="Phi-4 Mini Reasoning Chatbot", description="Ask the Phi-4 Mini Reasoning model anything. Responses are limited to prevent errors.", allow_flagging="never", ) demo_interface.launch()