Caikejs commited on
Commit
43d40c7
·
verified ·
1 Parent(s): 07fea57

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +126 -50
app.py CHANGED
@@ -1,58 +1,134 @@
 
1
  import torch
2
- from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
3
 
4
- # Detecta o dispositivo
5
  DEVICE = 0 if torch.cuda.is_available() else -1
6
  TORCH_DTYPE = torch.float16 if torch.cuda.is_available() else torch.float32
7
 
8
- # Modelo A: Falcon 7B Instruct (sem autenticação)
9
- model_a = AutoModelForCausalLM.from_pretrained(
10
- "tiiuae/falcon-7b-instruct",
11
- torch_dtype=TORCH_DTYPE
12
- )
13
- tokenizer_a = AutoTokenizer.from_pretrained("tiiuae/falcon-7b-instruct")
14
-
15
- pipe_a = pipeline(
16
- "text-generation",
17
- model=model_a,
18
- tokenizer=tokenizer_a,
19
- device=DEVICE,
20
- return_full_text=False,
21
- pad_token_id=tokenizer_a.eos_token_id
22
- )
23
-
24
- # Modelo B: OpenAssistant Pythia 12B (sem autenticação)
25
- model_b = AutoModelForCausalLM.from_pretrained(
26
- "OpenAssistant/oasst-sft-1-pythia-12b",
27
- torch_dtype=TORCH_DTYPE
28
- )
29
- tokenizer_b = AutoTokenizer.from_pretrained("OpenAssistant/oasst-sft-1-pythia-12b")
30
-
31
- pipe_b = pipeline(
32
- "text-generation",
33
- model=model_b,
34
- tokenizer=tokenizer_b,
35
- device=DEVICE,
36
- return_full_text=False,
37
- pad_token_id=tokenizer_b.eos_token_id
38
- )
39
-
40
- # Interface de prompt
 
 
 
 
 
 
 
 
 
 
 
 
41
  def format_prompt(user_input):
42
- return f"Responda em português: {user_input.strip()}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
  if __name__ == "__main__":
45
- while True:
46
- prompt = input("\nDigite uma pergunta (ou 'sair'): ").strip()
47
- if prompt.lower() == "sair":
48
- break
49
-
50
- print("\n=== Resposta do Falcon ===")
51
- with torch.no_grad():
52
- falcon_response = pipe_a(format_prompt(prompt), max_new_tokens=100)[0]['generated_text']
53
- print(falcon_response)
54
-
55
- print("\n=== Resposta do OpenAssistant ===")
56
- with torch.no_grad():
57
- oa_response = pipe_b(format_prompt(prompt), max_new_tokens=100)[0]['generated_text']
58
- print(oa_response)
 
1
+ import gradio as gr
2
  import torch
3
+ from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
4
 
5
+ # Configurações de memória
6
  DEVICE = 0 if torch.cuda.is_available() else -1
7
  TORCH_DTYPE = torch.float16 if torch.cuda.is_available() else torch.float32
8
 
9
+ # Modelos otimizados para 16GB
10
+ MODELS = {
11
+ "Falcon 7B (GPT-2 PT)": {
12
+ "name": "pierreguillain/gpt2-small-portuguese",
13
+ "max_tokens": 150
14
+ },
15
+ "OpenAssistant (GPT-Neo PT)": {
16
+ "name": "pierreguillain/gpt-neo-125m-portuguese",
17
+ "max_tokens": 150
18
+ }
19
+ }
20
+
21
+ # Carrega os modelos apenas uma vez
22
+ loaded_models = {}
23
+
24
+ for model_name, config in MODELS.items():
25
+ try:
26
+ model = AutoModelForCausalLM.from_pretrained(
27
+ config["name"],
28
+ torch_dtype=TORCH_DTYPE,
29
+ device_map="auto" if DEVICE == 0 else None,
30
+ low_cpu_mem_usage=True
31
+ )
32
+ tokenizer = AutoTokenizer.from_pretrained(config["name"])
33
+
34
+ pipe = pipeline(
35
+ "text-generation",
36
+ model=model,
37
+ tokenizer=tokenizer,
38
+ device=DEVICE,
39
+ return_full_text=False,
40
+ pad_token_id=tokenizer.eos_token_id if tokenizer.eos_token_id else 50256
41
+ )
42
+
43
+ loaded_models[model_name] = {
44
+ "pipe": pipe,
45
+ "max_tokens": config["max_tokens"]
46
+ }
47
+ print(f"✅ {model_name} carregado com sucesso")
48
+
49
+ except Exception as e:
50
+ print(f"❌ Erro ao carregar {model_name}: {str(e)}")
51
+ loaded_models[model_name] = None
52
+
53
+ # Função para formatar prompt
54
  def format_prompt(user_input):
55
+ return f"Responda de forma clara e concisa: {user_input.strip()}"
56
+
57
+ # Função para gerar respostas
58
+ def generate_response(prompt, model_name):
59
+ if model_name not in loaded_models or not loaded_models[model_name]:
60
+ return "Modelo não disponível"
61
+
62
+ config = loaded_models[model_name]
63
+ try:
64
+ response = config["pipe"](
65
+ format_prompt(prompt),
66
+ max_new_tokens=config["max_tokens"],
67
+ temperature=0.7,
68
+ top_p=0.9,
69
+ repetition_penalty=1.2
70
+ )[0]['generated_text'].strip()
71
+
72
+ return response
73
+
74
+ except Exception as e:
75
+ return f"Erro na geração: {str(e)}"
76
+
77
+ # Interface Gradio
78
+ def chatbot(prompt):
79
+ responses = {}
80
+
81
+ for model_name in MODELS:
82
+ responses[model_name] = generate_response(prompt, model_name)
83
+
84
+ return responses
85
+
86
+ # Criação da interface
87
+ with gr.Blocks(title="Chatbot de Comparação") as demo:
88
+ gr.Markdown("# 🤖 Comparador de Modelos de Linguagem")
89
+ gr.Markdown("Teste e compare diferentes modelos de IA em português")
90
+
91
+ with gr.Row():
92
+ input_prompt = gr.Textbox(
93
+ label="Digite sua pergunta:",
94
+ placeholder="Escreva algo em português...",
95
+ lines=3
96
+ )
97
+
98
+ submit_btn = gr.Button("Enviar Pergunta", variant="primary")
99
+
100
+ with gr.Row():
101
+ for model_name in MODELS:
102
+ with gr.Column():
103
+ gr.Markdown(f"### {model_name}")
104
+ output = gr.Textbox(label="Resposta:", interactive=False)
105
+
106
+ # Conecta os componentes
107
+ submit_btn.click(
108
+ fn=chatbot,
109
+ inputs=input_prompt,
110
+ outputs=[gr.Textbox(visible=False)] + list(MODELS.keys())
111
+ )
112
+
113
+ # Atualiza as saídas individualmente
114
+ for i, model_name in enumerate(MODELS):
115
+ demo.load(
116
+ fn=lambda p, m=model_name: generate_response(p, m),
117
+ inputs=input_prompt,
118
+ outputs=outputs[i+1],
119
+ queue=False
120
+ )
121
+
122
+ # Libera memória explicitamente
123
+ def cleanup():
124
+ global loaded_models
125
+ for model in loaded_models.values():
126
+ if model:
127
+ del model["pipe"]
128
+ torch.cuda.empty_cache()
129
+
130
+ import atexit
131
+ atexit.register(cleanup)
132
 
133
  if __name__ == "__main__":
134
+ demo.launch(server_name="0.0.0.0", server_port=7860)