Update app.py
Browse files
app.py
CHANGED
@@ -3,38 +3,38 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStream
|
|
3 |
from threading import Thread
|
4 |
import gradio as gr
|
5 |
|
6 |
-
# 1. Configuración del
|
7 |
-
MODEL_NAME = "
|
8 |
|
9 |
-
# 2. Carga
|
10 |
try:
|
11 |
-
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
12 |
model = AutoModelForCausalLM.from_pretrained(
|
13 |
MODEL_NAME,
|
14 |
torch_dtype=torch.float16,
|
15 |
device_map="auto"
|
16 |
)
|
17 |
except Exception as e:
|
18 |
-
raise gr.Error(f"Error al cargar el modelo: {str(e)}")
|
19 |
|
20 |
-
# 3. Función de
|
21 |
-
def
|
22 |
try:
|
23 |
-
#
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
return_tensors="pt"
|
29 |
-
).to(model.device)
|
30 |
|
|
|
31 |
streamer = TextIteratorStreamer(tokenizer)
|
32 |
|
33 |
generation_kwargs = dict(
|
34 |
-
inputs
|
35 |
streamer=streamer,
|
36 |
-
max_new_tokens=
|
37 |
-
temperature=0.7
|
|
|
38 |
)
|
39 |
|
40 |
thread = Thread(target=model.generate, kwargs=generation_kwargs)
|
@@ -45,18 +45,21 @@ def generate_response(message, history):
|
|
45 |
partial_message += new_token
|
46 |
yield partial_message
|
47 |
|
|
|
|
|
48 |
except Exception as e:
|
49 |
-
|
50 |
|
51 |
-
# 4.
|
52 |
-
with gr.Blocks(title="Chatbot Gerardo") as demo:
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
)
|
59 |
|
60 |
-
# 5. Lanzamiento
|
61 |
if __name__ == "__main__":
|
62 |
-
demo.launch(server_name="0.0.0.0", server_port=7860)
|
|
|
|
3 |
from threading import Thread
|
4 |
import gradio as gr
|
5 |
|
6 |
+
# 1. Configuración robusta del modelo (usamos uno más liviano)
|
7 |
+
MODEL_NAME = "microsoft/phi-2" # Modelo eficiente para Spaces
|
8 |
|
9 |
+
# 2. Carga con manejo de errores
|
10 |
try:
|
11 |
+
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
|
12 |
model = AutoModelForCausalLM.from_pretrained(
|
13 |
MODEL_NAME,
|
14 |
torch_dtype=torch.float16,
|
15 |
device_map="auto"
|
16 |
)
|
17 |
except Exception as e:
|
18 |
+
raise gr.Error(f"❌ Error al cargar el modelo: {str(e)}")
|
19 |
|
20 |
+
# 3. Función de generación mejorada
|
21 |
+
def chat_with_gerardo(message, history):
|
22 |
try:
|
23 |
+
# Construimos el prompt manualmente
|
24 |
+
prompt = "Eres Gerardo, un asistente IA útil. Responde preguntas claramente.\n\n"
|
25 |
+
for user_msg, bot_msg in history:
|
26 |
+
prompt += f"Usuario: {user_msg}\nGerardo: {bot_msg}\n"
|
27 |
+
prompt += f"Usuario: {message}\nGerardo:"
|
|
|
|
|
28 |
|
29 |
+
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
|
30 |
streamer = TextIteratorStreamer(tokenizer)
|
31 |
|
32 |
generation_kwargs = dict(
|
33 |
+
inputs,
|
34 |
streamer=streamer,
|
35 |
+
max_new_tokens=300, # Reducido para evitar OOM
|
36 |
+
temperature=0.7,
|
37 |
+
pad_token_id=tokenizer.eos_token_id
|
38 |
)
|
39 |
|
40 |
thread = Thread(target=model.generate, kwargs=generation_kwargs)
|
|
|
45 |
partial_message += new_token
|
46 |
yield partial_message
|
47 |
|
48 |
+
except torch.cuda.OutOfMemoryError:
|
49 |
+
yield "⚠️ Error: Memoria de GPU agotada. Intenta con una consulta más corta."
|
50 |
except Exception as e:
|
51 |
+
yield f"❌ Error: {str(e)}"
|
52 |
|
53 |
+
# 4. Interfaz con configuración optimizada
|
54 |
+
with gr.Blocks(title="Chatbot Gerardo v2.0") as demo:
|
55 |
+
gr.Markdown("## 🤖 Chatbot creado por Gerardo")
|
56 |
+
gr.ChatInterface(
|
57 |
+
fn=chat_with_gerardo,
|
58 |
+
examples=["Hola Gerardo", "¿Qué puedes hacer?"],
|
59 |
+
cache_examples=False # Importante para Spaces
|
60 |
)
|
61 |
|
62 |
+
# 5. Lanzamiento específico para Hugging Face
|
63 |
if __name__ == "__main__":
|
64 |
+
demo.launch(server_name="0.0.0.0", server_port=7860)
|
65 |
+
|