Spaces:
Sleeping
Sleeping
import torch | |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline | |
import gradio as gr | |
import os | |
model_name = "unsloth/Phi-4-mini-reasoning-unsloth-bnb-4bit" | |
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) | |
model = AutoModelForCausalLM.from_pretrained( | |
model_name, | |
device_map="auto", | |
torch_dtype=torch.float16, | |
trust_remote_code=True | |
) | |
pipe = pipeline( | |
"text-generation", | |
model=model, | |
tokenizer=tokenizer, | |
) | |
def chat_fn(prompt: str) -> str: | |
if not prompt.strip(): | |
return "Please enter a prompt." | |
try: | |
output = pipe( | |
prompt, | |
max_new_tokens=128, | |
do_sample=False, | |
pad_token_id=tokenizer.eos_token_id, | |
return_full_text=False, | |
) | |
generated_text = output[0]["generated_text"] | |
final_result = generated_text[:500].encode("utf-8", "ignore").decode("utf-8") | |
if not final_result.strip(): | |
return "The model did not generate a response. Try a different prompt." | |
return final_result | |
except Exception as e: | |
print(f"An error occurred during text generation: {e}") | |
return f"[ERROR] An issue occurred while generating the response. Please try again or simplify your prompt. Details: {str(e)[:200]}" | |
demo_interface = gr.Interface( | |
fn=chat_fn, | |
inputs=gr.Textbox(lines=5, placeholder="Enter your prompt here...", label="Your Prompt"), | |
outputs=gr.Textbox(lines=10, label="Generated Response"), | |
title="Phi-4 Mini Reasoning Chatbot", | |
description="Ask the Phi-4 Mini Reasoning model anything. Responses are limited to prevent errors.", | |
allow_flagging="never", | |
) | |
demo_interface.launch() |