|
import gradio as gr |
|
from transformers import pipeline |
|
import torch |
|
import os |
|
from huggingface_hub import login |
|
|
|
|
|
hf_token = os.environ["HF_TOKEN"] |
|
login(token=hf_token) |
|
|
|
|
|
device = 0 if torch.cuda.is_available() else -1 |
|
|
|
|
|
pipe = pipeline( |
|
"text-generation", |
|
model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", |
|
device=device |
|
) |
|
|
|
|
|
def responder(prompt): |
|
prompt_chat = f"<|system|>Eres un asistente útil.<|user|>{prompt}<|assistant|>" |
|
output = pipe( |
|
prompt_chat, |
|
max_new_tokens=100, |
|
do_sample=True, |
|
temperature=0.7, |
|
top_p=0.9 |
|
)[0]['generated_text'] |
|
respuesta = output.replace(prompt_chat, "").strip() |
|
return respuesta |
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("## 🤖 AmInside 1.0 – Asistente rápido y conversacional") |
|
entrada = gr.Textbox(label="Escribe tu mensaje") |
|
salida = gr.Textbox(label="Respuesta") |
|
entrada.submit(fn=responder, inputs=entrada, outputs=salida) |
|
|
|
demo.launch() |
|
|