Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,58 +1,134 @@
|
|
|
|
1 |
import torch
|
2 |
-
from transformers import AutoModelForCausalLM, AutoTokenizer
|
3 |
|
4 |
-
#
|
5 |
DEVICE = 0 if torch.cuda.is_available() else -1
|
6 |
TORCH_DTYPE = torch.float16 if torch.cuda.is_available() else torch.float32
|
7 |
|
8 |
-
#
|
9 |
-
|
10 |
-
"
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
def format_prompt(user_input):
|
42 |
-
return f"Responda
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
|
44 |
if __name__ == "__main__":
|
45 |
-
|
46 |
-
prompt = input("\nDigite uma pergunta (ou 'sair'): ").strip()
|
47 |
-
if prompt.lower() == "sair":
|
48 |
-
break
|
49 |
-
|
50 |
-
print("\n=== Resposta do Falcon ===")
|
51 |
-
with torch.no_grad():
|
52 |
-
falcon_response = pipe_a(format_prompt(prompt), max_new_tokens=100)[0]['generated_text']
|
53 |
-
print(falcon_response)
|
54 |
-
|
55 |
-
print("\n=== Resposta do OpenAssistant ===")
|
56 |
-
with torch.no_grad():
|
57 |
-
oa_response = pipe_b(format_prompt(prompt), max_new_tokens=100)[0]['generated_text']
|
58 |
-
print(oa_response)
|
|
|
1 |
+
import gradio as gr
|
2 |
import torch
|
3 |
+
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
|
4 |
|
5 |
+
# Configurações de memória
|
6 |
DEVICE = 0 if torch.cuda.is_available() else -1
|
7 |
TORCH_DTYPE = torch.float16 if torch.cuda.is_available() else torch.float32
|
8 |
|
9 |
+
# Modelos otimizados para 16GB
|
10 |
+
MODELS = {
|
11 |
+
"Falcon 7B (GPT-2 PT)": {
|
12 |
+
"name": "pierreguillain/gpt2-small-portuguese",
|
13 |
+
"max_tokens": 150
|
14 |
+
},
|
15 |
+
"OpenAssistant (GPT-Neo PT)": {
|
16 |
+
"name": "pierreguillain/gpt-neo-125m-portuguese",
|
17 |
+
"max_tokens": 150
|
18 |
+
}
|
19 |
+
}
|
20 |
+
|
21 |
+
# Carrega os modelos apenas uma vez
|
22 |
+
loaded_models = {}
|
23 |
+
|
24 |
+
for model_name, config in MODELS.items():
|
25 |
+
try:
|
26 |
+
model = AutoModelForCausalLM.from_pretrained(
|
27 |
+
config["name"],
|
28 |
+
torch_dtype=TORCH_DTYPE,
|
29 |
+
device_map="auto" if DEVICE == 0 else None,
|
30 |
+
low_cpu_mem_usage=True
|
31 |
+
)
|
32 |
+
tokenizer = AutoTokenizer.from_pretrained(config["name"])
|
33 |
+
|
34 |
+
pipe = pipeline(
|
35 |
+
"text-generation",
|
36 |
+
model=model,
|
37 |
+
tokenizer=tokenizer,
|
38 |
+
device=DEVICE,
|
39 |
+
return_full_text=False,
|
40 |
+
pad_token_id=tokenizer.eos_token_id if tokenizer.eos_token_id else 50256
|
41 |
+
)
|
42 |
+
|
43 |
+
loaded_models[model_name] = {
|
44 |
+
"pipe": pipe,
|
45 |
+
"max_tokens": config["max_tokens"]
|
46 |
+
}
|
47 |
+
print(f"✅ {model_name} carregado com sucesso")
|
48 |
+
|
49 |
+
except Exception as e:
|
50 |
+
print(f"❌ Erro ao carregar {model_name}: {str(e)}")
|
51 |
+
loaded_models[model_name] = None
|
52 |
+
|
53 |
+
# Função para formatar prompt
|
54 |
def format_prompt(user_input):
|
55 |
+
return f"Responda de forma clara e concisa: {user_input.strip()}"
|
56 |
+
|
57 |
+
# Função para gerar respostas
|
58 |
+
def generate_response(prompt, model_name):
|
59 |
+
if model_name not in loaded_models or not loaded_models[model_name]:
|
60 |
+
return "Modelo não disponível"
|
61 |
+
|
62 |
+
config = loaded_models[model_name]
|
63 |
+
try:
|
64 |
+
response = config["pipe"](
|
65 |
+
format_prompt(prompt),
|
66 |
+
max_new_tokens=config["max_tokens"],
|
67 |
+
temperature=0.7,
|
68 |
+
top_p=0.9,
|
69 |
+
repetition_penalty=1.2
|
70 |
+
)[0]['generated_text'].strip()
|
71 |
+
|
72 |
+
return response
|
73 |
+
|
74 |
+
except Exception as e:
|
75 |
+
return f"Erro na geração: {str(e)}"
|
76 |
+
|
77 |
+
# Interface Gradio
|
78 |
+
def chatbot(prompt):
|
79 |
+
responses = {}
|
80 |
+
|
81 |
+
for model_name in MODELS:
|
82 |
+
responses[model_name] = generate_response(prompt, model_name)
|
83 |
+
|
84 |
+
return responses
|
85 |
+
|
86 |
+
# Criação da interface
|
87 |
+
with gr.Blocks(title="Chatbot de Comparação") as demo:
|
88 |
+
gr.Markdown("# 🤖 Comparador de Modelos de Linguagem")
|
89 |
+
gr.Markdown("Teste e compare diferentes modelos de IA em português")
|
90 |
+
|
91 |
+
with gr.Row():
|
92 |
+
input_prompt = gr.Textbox(
|
93 |
+
label="Digite sua pergunta:",
|
94 |
+
placeholder="Escreva algo em português...",
|
95 |
+
lines=3
|
96 |
+
)
|
97 |
+
|
98 |
+
submit_btn = gr.Button("Enviar Pergunta", variant="primary")
|
99 |
+
|
100 |
+
with gr.Row():
|
101 |
+
for model_name in MODELS:
|
102 |
+
with gr.Column():
|
103 |
+
gr.Markdown(f"### {model_name}")
|
104 |
+
output = gr.Textbox(label="Resposta:", interactive=False)
|
105 |
+
|
106 |
+
# Conecta os componentes
|
107 |
+
submit_btn.click(
|
108 |
+
fn=chatbot,
|
109 |
+
inputs=input_prompt,
|
110 |
+
outputs=[gr.Textbox(visible=False)] + list(MODELS.keys())
|
111 |
+
)
|
112 |
+
|
113 |
+
# Atualiza as saídas individualmente
|
114 |
+
for i, model_name in enumerate(MODELS):
|
115 |
+
demo.load(
|
116 |
+
fn=lambda p, m=model_name: generate_response(p, m),
|
117 |
+
inputs=input_prompt,
|
118 |
+
outputs=outputs[i+1],
|
119 |
+
queue=False
|
120 |
+
)
|
121 |
+
|
122 |
+
# Libera memória explicitamente
|
123 |
+
def cleanup():
|
124 |
+
global loaded_models
|
125 |
+
for model in loaded_models.values():
|
126 |
+
if model:
|
127 |
+
del model["pipe"]
|
128 |
+
torch.cuda.empty_cache()
|
129 |
+
|
130 |
+
import atexit
|
131 |
+
atexit.register(cleanup)
|
132 |
|
133 |
if __name__ == "__main__":
|
134 |
+
demo.launch(server_name="0.0.0.0", server_port=7860)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|