Spaces:
Sleeping
Sleeping
import gradio as gr | |
import torch | |
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer | |
# Configurações de memória | |
DEVICE = 0 if torch.cuda.is_available() else -1 | |
TORCH_DTYPE = torch.float16 if torch.cuda.is_available() else torch.float32 | |
# Modelos otimizados para 16GB | |
MODELS = { | |
"Falcon 7B (GPT-2 PT)": { | |
"name": "pierreguillain/gpt2-small-portuguese", | |
"max_tokens": 150 | |
}, | |
"OpenAssistant (GPT-Neo PT)": { | |
"name": "pierreguillain/gpt-neo-125m-portuguese", | |
"max_tokens": 150 | |
} | |
} | |
# Carrega os modelos apenas uma vez | |
loaded_models = {} | |
for model_name, config in MODELS.items(): | |
try: | |
model = AutoModelForCausalLM.from_pretrained( | |
config["name"], | |
torch_dtype=TORCH_DTYPE, | |
device_map="auto" if DEVICE == 0 else None, | |
low_cpu_mem_usage=True | |
) | |
tokenizer = AutoTokenizer.from_pretrained(config["name"]) | |
pipe = pipeline( | |
"text-generation", | |
model=model, | |
tokenizer=tokenizer, | |
device=DEVICE, | |
return_full_text=False, | |
pad_token_id=tokenizer.eos_token_id if tokenizer.eos_token_id else 50256 | |
) | |
loaded_models[model_name] = { | |
"pipe": pipe, | |
"max_tokens": config["max_tokens"] | |
} | |
print(f"✅ {model_name} carregado com sucesso") | |
except Exception as e: | |
print(f"❌ Erro ao carregar {model_name}: {str(e)}") | |
loaded_models[model_name] = None | |
# Função para formatar prompt | |
def format_prompt(user_input): | |
return f"Responda de forma clara e concisa: {user_input.strip()}" | |
# Função para gerar respostas | |
def generate_response(prompt, model_name): | |
if model_name not in loaded_models or not loaded_models[model_name]: | |
return "Modelo não disponível" | |
config = loaded_models[model_name] | |
try: | |
response = config["pipe"]( | |
format_prompt(prompt), | |
max_new_tokens=config["max_tokens"], | |
temperature=0.7, | |
top_p=0.9, | |
repetition_penalty=1.2 | |
)[0]['generated_text'].strip() | |
return response | |
except Exception as e: | |
return f"Erro na geração: {str(e)}" | |
# Interface Gradio | |
def chatbot(prompt): | |
responses = {} | |
for model_name in MODELS: | |
responses[model_name] = generate_response(prompt, model_name) | |
return responses | |
# Criação da interface | |
with gr.Blocks(title="Chatbot de Comparação") as demo: | |
gr.Markdown("# 🤖 Comparador de Modelos de Linguagem") | |
gr.Markdown("Teste e compare diferentes modelos de IA em português") | |
with gr.Row(): | |
input_prompt = gr.Textbox( | |
label="Digite sua pergunta:", | |
placeholder="Escreva algo em português...", | |
lines=3 | |
) | |
submit_btn = gr.Button("Enviar Pergunta", variant="primary") | |
with gr.Row(): | |
for model_name in MODELS: | |
with gr.Column(): | |
gr.Markdown(f"### {model_name}") | |
output = gr.Textbox(label="Resposta:", interactive=False) | |
# Conecta os componentes | |
submit_btn.click( | |
fn=chatbot, | |
inputs=input_prompt, | |
outputs=[gr.Textbox(visible=False)] + list(MODELS.keys()) | |
) | |
# Atualiza as saídas individualmente | |
for i, model_name in enumerate(MODELS): | |
demo.load( | |
fn=lambda p, m=model_name: generate_response(p, m), | |
inputs=input_prompt, | |
outputs=outputs[i+1], | |
queue=False | |
) | |
# Libera memória explicitamente | |
def cleanup(): | |
global loaded_models | |
for model in loaded_models.values(): | |
if model: | |
del model["pipe"] | |
torch.cuda.empty_cache() | |
import atexit | |
atexit.register(cleanup) | |
if __name__ == "__main__": | |
demo.launch(server_name="0.0.0.0", server_port=7860) |