Spaces:

Caikejs
/

chatbot-em-cascata-fdch

Sleeping

App Files Files Community

Caikejs commited on May 28

Commit

7073dcc

verified ·

1 Parent(s): 44146b1

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -26

app.py CHANGED Viewed

@@ -9,10 +9,9 @@ DEVICE = 0 if torch.cuda.is_available() else -1
 TORCH_DTYPE = torch.float16 if torch.cuda.is_available() else torch.float32
 # Modelo A: Falcon RW 1B
-model_a = AutoModelForCausalLM.from_pretrained(
-    "tiiuae/falcon-rw-1b", torch_dtype=TORCH_DTYPE
-)
 tokenizer_a = AutoTokenizer.from_pretrained("tiiuae/falcon-rw-1b")
 pipe_a = pipeline(
     "text-generation",
     model=model_a,
@@ -22,6 +21,7 @@ pipe_a = pipeline(
     pad_token_id=tokenizer_a.eos_token_id
 )
 model_b = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1", torch_dtype=TORCH_DTYPE)
 tokenizer_b = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")
@@ -34,7 +34,7 @@ pipe_b = pipeline(
     pad_token_id=tokenizer_b.eos_token_id
 )
-# Modelo de classificação de sentimento
 sentiment_arbiter = pipeline(
     "text-classification",
     model="nlptown/bert-base-multilingual-uncased-sentiment",
@@ -48,35 +48,40 @@ similarity_model = SentenceTransformer(
 )
 def semantic_similarity(text1, text2):
     embeddings = similarity_model.encode([text1, text2], convert_to_tensor=True)
     cosine_scores = util.pytorch_cos_sim(embeddings[0], embeddings[1])
     return cosine_scores.item()
 def judge_response(question, response_a, response_b):
     sentiment_a = sentiment_arbiter(response_a)[0]
     sentiment_b = sentiment_arbiter(response_b)[0]
     score_sent_a = int(sentiment_a['label'][0])
     score_sent_b = int(sentiment_b['label'][0])
     sim_score_a = semantic_similarity(question, response_a)
     sim_score_b = semantic_similarity(question, response_b)
-    conciseness_a = min(1.0, 50 / len(response_a.split()))
-    conciseness_b = min(1.0, 50 / len(response_b.split()))
     WEIGHT_SENTIMENT = 0.4
     WEIGHT_RELEVANCE = 0.5
     WEIGHT_CONCISENESS = 0.1
     total_a = (WEIGHT_SENTIMENT * score_sent_a +
                WEIGHT_RELEVANCE * sim_score_a +
                WEIGHT_CONCISENESS * conciseness_a)
     total_b = (WEIGHT_SENTIMENT * score_sent_b +
                WEIGHT_RELEVANCE * sim_score_b +
                WEIGHT_CONCISENESS * conciseness_b)
     THRESHOLD = 0.15
     if abs(total_a - total_b) < THRESHOLD:
         winner = "Modelo A" if score_sent_a >= score_sent_b else "Modelo B"
@@ -84,16 +89,17 @@ def judge_response(question, response_a, response_b):
     else:
         winner = "Modelo A" if total_a > total_b else "Modelo B"
         final_response = response_a if total_a > total_b else response_b
     print(f"\nA: S:{score_sent_a:.2f} R:{sim_score_a:.2f} C:{conciseness_a:.2f} T:{total_a:.2f}")
     print(f"B: S:{score_sent_b:.2f} R:{sim_score_b:.2f} C:{conciseness_b:.2f} T:{total_b:.2f}")
     print(f"Vencedor: {winner} Diferença: {abs(total_a - total_b):.2f}")
     return winner, final_response
 def chatbot(prompt):
     prompt_pt = "Responda em português: " + prompt
     response_a = pipe_a(
         prompt_pt,
         max_new_tokens=60,
@@ -102,18 +108,17 @@ def chatbot(prompt):
         top_p=0.9,
         repetition_penalty=1.2,
     )[0]['generated_text'].strip()
     response_b = pipe_b(
-        prompt_pt,
         max_new_tokens=60,
         temperature=0.7,
         top_k=50,
         top_p=0.9,
         repetition_penalty=1.2,
     )[0]['generated_text'].strip()
     winner, final_response = judge_response(prompt, response_a, response_b)
     return prompt, response_a, response_b, winner, final_response
 css = """
@@ -124,29 +129,29 @@ footer {visibility: hidden}
 with gr.Blocks(css=css) as demo:
     gr.Markdown("# 🤖 Chatbot com Julgamento Aprimorado")
     gr.Markdown("Compara respostas de dois modelos usando múltiplos critérios de qualidade")
     with gr.Row():
         inp = gr.Textbox(label="Digite sua pergunta:", lines=2, placeholder="Escreva sua pergunta em português...")
         btn = gr.Button("Enviar")
     with gr.Row():
         with gr.Column():
             gr.Markdown("### Modelo A (Falcon RW 1B)")
             out_a = gr.Textbox(label="Resposta", interactive=False)
         with gr.Column():
-            gr.Markdown("### Modelo B (Gemma 2B Instruct)")
             out_b = gr.Textbox(label="Resposta", interactive=False)
     with gr.Row():
         with gr.Column(scale=2):
             winner_out = gr.Textbox(label="🏆 Modelo Vencedor", interactive=False)
         with gr.Column(scale=3):
             final_out = gr.Textbox(label="💡 Resposta Escolhida", interactive=False)
     btn.click(
         fn=chatbot,
         inputs=inp,
         outputs=[inp, out_a, out_b, winner_out, final_out]
     )
-demo.launch()

 TORCH_DTYPE = torch.float16 if torch.cuda.is_available() else torch.float32
 # Modelo A: Falcon RW 1B
+model_a = AutoModelForCausalLM.from_pretrained("tiiuae/falcon-rw-1b", torch_dtype=TORCH_DTYPE)
 tokenizer_a = AutoTokenizer.from_pretrained("tiiuae/falcon-rw-1b")
 pipe_a = pipeline(
     "text-generation",
     model=model_a,
     pad_token_id=tokenizer_a.eos_token_id
 )
+# Modelo B: Mistral 7B Instruct
 model_b = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1", torch_dtype=TORCH_DTYPE)
 tokenizer_b = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")
     pad_token_id=tokenizer_b.eos_token_id
 )
+# Classificador de sentimento
 sentiment_arbiter = pipeline(
     "text-classification",
     model="nlptown/bert-base-multilingual-uncased-sentiment",
 )
 def semantic_similarity(text1, text2):
+    if not text1.strip() or not text2.strip():
+        return 0.0
     embeddings = similarity_model.encode([text1, text2], convert_to_tensor=True)
     cosine_scores = util.pytorch_cos_sim(embeddings[0], embeddings[1])
     return cosine_scores.item()
+def format_mistral_prompt(user_input):
+    return f"<s>[INST] {user_input.strip()} [/INST]"
 def judge_response(question, response_a, response_b):
     sentiment_a = sentiment_arbiter(response_a)[0]
     sentiment_b = sentiment_arbiter(response_b)[0]
     score_sent_a = int(sentiment_a['label'][0])
     score_sent_b = int(sentiment_b['label'][0])
     sim_score_a = semantic_similarity(question, response_a)
     sim_score_b = semantic_similarity(question, response_b)
+    conciseness_a = min(1.0, 50 / max(1, len(response_a.split())))
+    conciseness_b = min(1.0, 50 / max(1, len(response_b.split())))
     WEIGHT_SENTIMENT = 0.4
     WEIGHT_RELEVANCE = 0.5
     WEIGHT_CONCISENESS = 0.1
     total_a = (WEIGHT_SENTIMENT * score_sent_a +
                WEIGHT_RELEVANCE * sim_score_a +
                WEIGHT_CONCISENESS * conciseness_a)
     total_b = (WEIGHT_SENTIMENT * score_sent_b +
                WEIGHT_RELEVANCE * sim_score_b +
                WEIGHT_CONCISENESS * conciseness_b)
     THRESHOLD = 0.15
     if abs(total_a - total_b) < THRESHOLD:
         winner = "Modelo A" if score_sent_a >= score_sent_b else "Modelo B"
     else:
         winner = "Modelo A" if total_a > total_b else "Modelo B"
         final_response = response_a if total_a > total_b else response_b
     print(f"\nA: S:{score_sent_a:.2f} R:{sim_score_a:.2f} C:{conciseness_a:.2f} T:{total_a:.2f}")
     print(f"B: S:{score_sent_b:.2f} R:{sim_score_b:.2f} C:{conciseness_b:.2f} T:{total_b:.2f}")
     print(f"Vencedor: {winner} Diferença: {abs(total_a - total_b):.2f}")
     return winner, final_response
 def chatbot(prompt):
     prompt_pt = "Responda em português: " + prompt
+    mistral_prompt = format_mistral_prompt(prompt_pt)
     response_a = pipe_a(
         prompt_pt,
         max_new_tokens=60,
         top_p=0.9,
         repetition_penalty=1.2,
     )[0]['generated_text'].strip()
     response_b = pipe_b(
+        mistral_prompt,
         max_new_tokens=60,
         temperature=0.7,
         top_k=50,
         top_p=0.9,
         repetition_penalty=1.2,
     )[0]['generated_text'].strip()
     winner, final_response = judge_response(prompt, response_a, response_b)
     return prompt, response_a, response_b, winner, final_response
 css = """
 with gr.Blocks(css=css) as demo:
     gr.Markdown("# 🤖 Chatbot com Julgamento Aprimorado")
     gr.Markdown("Compara respostas de dois modelos usando múltiplos critérios de qualidade")
     with gr.Row():
         inp = gr.Textbox(label="Digite sua pergunta:", lines=2, placeholder="Escreva sua pergunta em português...")
         btn = gr.Button("Enviar")
     with gr.Row():
         with gr.Column():
             gr.Markdown("### Modelo A (Falcon RW 1B)")
             out_a = gr.Textbox(label="Resposta", interactive=False)
         with gr.Column():
+            gr.Markdown("### Modelo B (Mistral 7B Instruct)")
             out_b = gr.Textbox(label="Resposta", interactive=False)
     with gr.Row():
         with gr.Column(scale=2):
             winner_out = gr.Textbox(label="🏆 Modelo Vencedor", interactive=False)
         with gr.Column(scale=3):
             final_out = gr.Textbox(label="💡 Resposta Escolhida", interactive=False)
     btn.click(
         fn=chatbot,
         inputs=inp,
         outputs=[inp, out_a, out_b, winner_out, final_out]
     )
+demo.launch()