File size: 2,167 Bytes
f46aa7a
dcda1c4
f4132f1
ca1817d
f4132f1
ca1817d
 
dcda1c4
f4132f1
ca1817d
d5381da
ca1817d
d5381da
 
ca1817d
 
 
d5381da
ca1817d
 
d5381da
ca1817d
f4132f1
ca1817d
dcda1c4
d5381da
ca1817d
 
 
e8dba54
ca1817d
 
 
 
 
 
 
d5381da
ca1817d
dcda1c4
ca1817d
 
 
 
 
 
 
6a71730
ca1817d
dcda1c4
ca1817d
d5381da
 
ca1817d
 
f4132f1
ca1817d
 
 
 
dcda1c4
ca1817d
 
 
e8dba54
d5381da
f4132f1
 
ca1817d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import gradio as gr
import warnings

# 1. Configuraci贸n a prueba de errores
MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"  # Modelo optimizado para Spaces
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# 2. Carga segura del modelo
try:
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
    model = AutoModelForCausalLM.from_pretrained(
        MODEL_NAME,
        torch_dtype=torch.float16,
        device_map="auto",
        low_cpu_mem_usage=True
    )
    if DEVICE == "cuda":
        model = model.to(DEVICE)
except Exception as e:
    raise gr.Error(f"Error inicializaci贸n: {str(e)}")

# 3. Funci贸n de chat mejorada
def generate_response(message, history):
    try:
        # Limpieza de memoria
        if DEVICE == "cuda":
            torch.cuda.empty_cache()
        
        # Formateo del prompt
        messages = [{"role": "user", "content": message}]
        prompt = tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=True
        )
        
        # Generaci贸n con par谩metros seguros
        inputs = tokenizer(prompt, return_tensors="pt").to(DEVICE)
        outputs = model.generate(
            **inputs,
            max_new_tokens=256,
            temperature=0.7,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id
        )
        
        # Decodificaci贸n segura
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
        return response.split("assistant\n")[-1].strip()
        
    except Exception as e:
        warnings.warn(str(e))
        return f"Error: {str(e)}"

# 4. Interfaz a prueba de fallos
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("## 馃 Chatbot Gerardo - Versi贸n Estable")
    chatbot = gr.ChatInterface(
        fn=generate_response,
        examples=["Hola", "驴C贸mo est谩s?"],
        title="Chatbot Personalizado",
        description="Asistente IA creado por Gerardo",
        cache_examples=False
    )

if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860)