Caikejs's picture
Update app.py
43d40c7 verified
raw
history blame
3.93 kB
import gradio as gr
import torch
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
# Configurações de memória
DEVICE = 0 if torch.cuda.is_available() else -1
TORCH_DTYPE = torch.float16 if torch.cuda.is_available() else torch.float32
# Modelos otimizados para 16GB
MODELS = {
"Falcon 7B (GPT-2 PT)": {
"name": "pierreguillain/gpt2-small-portuguese",
"max_tokens": 150
},
"OpenAssistant (GPT-Neo PT)": {
"name": "pierreguillain/gpt-neo-125m-portuguese",
"max_tokens": 150
}
}
# Carrega os modelos apenas uma vez
loaded_models = {}
for model_name, config in MODELS.items():
try:
model = AutoModelForCausalLM.from_pretrained(
config["name"],
torch_dtype=TORCH_DTYPE,
device_map="auto" if DEVICE == 0 else None,
low_cpu_mem_usage=True
)
tokenizer = AutoTokenizer.from_pretrained(config["name"])
pipe = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
device=DEVICE,
return_full_text=False,
pad_token_id=tokenizer.eos_token_id if tokenizer.eos_token_id else 50256
)
loaded_models[model_name] = {
"pipe": pipe,
"max_tokens": config["max_tokens"]
}
print(f"✅ {model_name} carregado com sucesso")
except Exception as e:
print(f"❌ Erro ao carregar {model_name}: {str(e)}")
loaded_models[model_name] = None
# Função para formatar prompt
def format_prompt(user_input):
return f"Responda de forma clara e concisa: {user_input.strip()}"
# Função para gerar respostas
def generate_response(prompt, model_name):
if model_name not in loaded_models or not loaded_models[model_name]:
return "Modelo não disponível"
config = loaded_models[model_name]
try:
response = config["pipe"](
format_prompt(prompt),
max_new_tokens=config["max_tokens"],
temperature=0.7,
top_p=0.9,
repetition_penalty=1.2
)[0]['generated_text'].strip()
return response
except Exception as e:
return f"Erro na geração: {str(e)}"
# Interface Gradio
def chatbot(prompt):
responses = {}
for model_name in MODELS:
responses[model_name] = generate_response(prompt, model_name)
return responses
# Criação da interface
with gr.Blocks(title="Chatbot de Comparação") as demo:
gr.Markdown("# 🤖 Comparador de Modelos de Linguagem")
gr.Markdown("Teste e compare diferentes modelos de IA em português")
with gr.Row():
input_prompt = gr.Textbox(
label="Digite sua pergunta:",
placeholder="Escreva algo em português...",
lines=3
)
submit_btn = gr.Button("Enviar Pergunta", variant="primary")
with gr.Row():
for model_name in MODELS:
with gr.Column():
gr.Markdown(f"### {model_name}")
output = gr.Textbox(label="Resposta:", interactive=False)
# Conecta os componentes
submit_btn.click(
fn=chatbot,
inputs=input_prompt,
outputs=[gr.Textbox(visible=False)] + list(MODELS.keys())
)
# Atualiza as saídas individualmente
for i, model_name in enumerate(MODELS):
demo.load(
fn=lambda p, m=model_name: generate_response(p, m),
inputs=input_prompt,
outputs=outputs[i+1],
queue=False
)
# Libera memória explicitamente
def cleanup():
global loaded_models
for model in loaded_models.values():
if model:
del model["pipe"]
torch.cuda.empty_cache()
import atexit
atexit.register(cleanup)
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860)