88ggg commited on
Commit
ca1817d
verified
1 Parent(s): e8dba54

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -61
app.py CHANGED
@@ -1,88 +1,69 @@
1
  import torch
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import gradio as gr
4
- import logging
5
 
6
- # 1. Configuraci贸n robusta
7
- MODEL_NAME = "microsoft/phi-2" # Modelo liviano y estable
8
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
9
 
10
- # Configurar logging
11
- logging.basicConfig(level=logging.INFO)
12
- logger = logging.getLogger(__name__)
13
-
14
- # 2. Carga del modelo mejorada
15
  try:
16
- logger.info("Cargando tokenizer...")
17
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
18
-
19
- logger.info("Cargando modelo...")
20
  model = AutoModelForCausalLM.from_pretrained(
21
  MODEL_NAME,
22
- torch_dtype=torch.float32 if DEVICE == "cpu" else torch.float16,
23
- device_map="auto"
 
24
  )
25
-
26
- # Forzar dispositivo si es necesario
27
- if hasattr(model, "device"):
28
- logger.info(f"Modelo cargado en: {model.device}")
29
- else:
30
- model.to(DEVICE)
31
- logger.info(f"Modelo movido a: {DEVICE}")
32
-
33
  except Exception as e:
34
- logger.error(f"Error de carga: {str(e)}")
35
- raise gr.Error(f"Error al iniciar el modelo: {str(e)}")
36
 
37
- # 3. Funci贸n de generaci贸n a prueba de fallos
38
  def generate_response(message, history):
39
  try:
40
- logger.info(f"Generando respuesta para: {message}")
 
 
41
 
42
- # Construir prompt manualmente
43
- prompt = f"Usuario: {message}\nAsistente:"
 
 
 
 
 
44
 
 
45
  inputs = tokenizer(prompt, return_tensors="pt").to(DEVICE)
 
 
 
 
 
 
 
46
 
47
- # Configuraci贸n de generaci贸n
48
- generation_config = {
49
- "max_new_tokens": 150,
50
- "temperature": 0.7,
51
- "do_sample": True,
52
- "pad_token_id": tokenizer.eos_token_id
53
- }
54
-
55
- # Generaci贸n segura
56
- with torch.no_grad():
57
- outputs = model.generate(**inputs, **generation_config)
58
-
59
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
60
- clean_response = response.split("Asistente:")[-1].strip()
61
-
62
- logger.info(f"Respuesta generada: {clean_response[:50]}...")
63
- return clean_response
64
 
65
  except Exception as e:
66
- logger.error(f"Error en generaci贸n: {str(e)}")
67
- return f"鈿狅笍 Error: {str(e)}"
68
 
69
- # 4. Interfaz reforzada
70
- with gr.Blocks(title="Chatbot Gerardo HD") as demo:
71
- gr.Markdown("""
72
- ## 馃 Chatbot de Gerardo
73
- Versi贸n estable sin errores de runtime
74
- """)
75
-
76
- chat_interface = gr.ChatInterface(
77
  fn=generate_response,
78
- examples=["Hola", "驴Qu茅 es IA?"],
 
 
79
  cache_examples=False
80
  )
81
 
82
- # 5. Lanzamiento optimizado
83
  if __name__ == "__main__":
84
- demo.launch(
85
- server_name="0.0.0.0",
86
- server_port=7860,
87
- show_error=True
88
- )
 
1
  import torch
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import gradio as gr
4
+ import warnings
5
 
6
+ # 1. Configuraci贸n a prueba de errores
7
+ MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta" # Modelo optimizado para Spaces
8
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
9
 
10
+ # 2. Carga segura del modelo
 
 
 
 
11
  try:
12
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 
 
 
13
  model = AutoModelForCausalLM.from_pretrained(
14
  MODEL_NAME,
15
+ torch_dtype=torch.float16,
16
+ device_map="auto",
17
+ low_cpu_mem_usage=True
18
  )
19
+ if DEVICE == "cuda":
20
+ model = model.to(DEVICE)
 
 
 
 
 
 
21
  except Exception as e:
22
+ raise gr.Error(f"Error inicializaci贸n: {str(e)}")
 
23
 
24
+ # 3. Funci贸n de chat mejorada
25
  def generate_response(message, history):
26
  try:
27
+ # Limpieza de memoria
28
+ if DEVICE == "cuda":
29
+ torch.cuda.empty_cache()
30
 
31
+ # Formateo del prompt
32
+ messages = [{"role": "user", "content": message}]
33
+ prompt = tokenizer.apply_chat_template(
34
+ messages,
35
+ tokenize=False,
36
+ add_generation_prompt=True
37
+ )
38
 
39
+ # Generaci贸n con par谩metros seguros
40
  inputs = tokenizer(prompt, return_tensors="pt").to(DEVICE)
41
+ outputs = model.generate(
42
+ **inputs,
43
+ max_new_tokens=256,
44
+ temperature=0.7,
45
+ do_sample=True,
46
+ pad_token_id=tokenizer.eos_token_id
47
+ )
48
 
49
+ # Decodificaci贸n segura
 
 
 
 
 
 
 
 
 
 
 
50
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
51
+ return response.split("assistant\n")[-1].strip()
 
 
 
52
 
53
  except Exception as e:
54
+ warnings.warn(str(e))
55
+ return f"Error: {str(e)}"
56
 
57
+ # 4. Interfaz a prueba de fallos
58
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
59
+ gr.Markdown("## 馃 Chatbot Gerardo - Versi贸n Estable")
60
+ chatbot = gr.ChatInterface(
 
 
 
 
61
  fn=generate_response,
62
+ examples=["Hola", "驴C贸mo est谩s?"],
63
+ title="Chatbot Personalizado",
64
+ description="Asistente IA creado por Gerardo",
65
  cache_examples=False
66
  )
67
 
 
68
  if __name__ == "__main__":
69
+ demo.launch(server_name="0.0.0.0", server_port=7860)