Caikejs commited on
Commit
7073dcc
·
verified ·
1 Parent(s): 44146b1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -26
app.py CHANGED
@@ -9,10 +9,9 @@ DEVICE = 0 if torch.cuda.is_available() else -1
9
  TORCH_DTYPE = torch.float16 if torch.cuda.is_available() else torch.float32
10
 
11
  # Modelo A: Falcon RW 1B
12
- model_a = AutoModelForCausalLM.from_pretrained(
13
- "tiiuae/falcon-rw-1b", torch_dtype=TORCH_DTYPE
14
- )
15
  tokenizer_a = AutoTokenizer.from_pretrained("tiiuae/falcon-rw-1b")
 
16
  pipe_a = pipeline(
17
  "text-generation",
18
  model=model_a,
@@ -22,6 +21,7 @@ pipe_a = pipeline(
22
  pad_token_id=tokenizer_a.eos_token_id
23
  )
24
 
 
25
  model_b = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1", torch_dtype=TORCH_DTYPE)
26
  tokenizer_b = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")
27
 
@@ -34,7 +34,7 @@ pipe_b = pipeline(
34
  pad_token_id=tokenizer_b.eos_token_id
35
  )
36
 
37
- # Modelo de classificação de sentimento
38
  sentiment_arbiter = pipeline(
39
  "text-classification",
40
  model="nlptown/bert-base-multilingual-uncased-sentiment",
@@ -48,35 +48,40 @@ similarity_model = SentenceTransformer(
48
  )
49
 
50
  def semantic_similarity(text1, text2):
 
 
51
  embeddings = similarity_model.encode([text1, text2], convert_to_tensor=True)
52
  cosine_scores = util.pytorch_cos_sim(embeddings[0], embeddings[1])
53
  return cosine_scores.item()
54
 
 
 
 
55
  def judge_response(question, response_a, response_b):
56
  sentiment_a = sentiment_arbiter(response_a)[0]
57
  sentiment_b = sentiment_arbiter(response_b)[0]
58
-
59
  score_sent_a = int(sentiment_a['label'][0])
60
  score_sent_b = int(sentiment_b['label'][0])
61
-
62
  sim_score_a = semantic_similarity(question, response_a)
63
  sim_score_b = semantic_similarity(question, response_b)
64
-
65
- conciseness_a = min(1.0, 50 / len(response_a.split()))
66
- conciseness_b = min(1.0, 50 / len(response_b.split()))
67
-
68
  WEIGHT_SENTIMENT = 0.4
69
  WEIGHT_RELEVANCE = 0.5
70
  WEIGHT_CONCISENESS = 0.1
71
-
72
  total_a = (WEIGHT_SENTIMENT * score_sent_a +
73
  WEIGHT_RELEVANCE * sim_score_a +
74
  WEIGHT_CONCISENESS * conciseness_a)
75
-
76
  total_b = (WEIGHT_SENTIMENT * score_sent_b +
77
  WEIGHT_RELEVANCE * sim_score_b +
78
  WEIGHT_CONCISENESS * conciseness_b)
79
-
80
  THRESHOLD = 0.15
81
  if abs(total_a - total_b) < THRESHOLD:
82
  winner = "Modelo A" if score_sent_a >= score_sent_b else "Modelo B"
@@ -84,16 +89,17 @@ def judge_response(question, response_a, response_b):
84
  else:
85
  winner = "Modelo A" if total_a > total_b else "Modelo B"
86
  final_response = response_a if total_a > total_b else response_b
87
-
88
  print(f"\nA: S:{score_sent_a:.2f} R:{sim_score_a:.2f} C:{conciseness_a:.2f} T:{total_a:.2f}")
89
  print(f"B: S:{score_sent_b:.2f} R:{sim_score_b:.2f} C:{conciseness_b:.2f} T:{total_b:.2f}")
90
  print(f"Vencedor: {winner} Diferença: {abs(total_a - total_b):.2f}")
91
-
92
  return winner, final_response
93
 
94
  def chatbot(prompt):
95
  prompt_pt = "Responda em português: " + prompt
96
-
 
97
  response_a = pipe_a(
98
  prompt_pt,
99
  max_new_tokens=60,
@@ -102,18 +108,17 @@ def chatbot(prompt):
102
  top_p=0.9,
103
  repetition_penalty=1.2,
104
  )[0]['generated_text'].strip()
105
-
106
  response_b = pipe_b(
107
- prompt_pt,
108
  max_new_tokens=60,
109
  temperature=0.7,
110
  top_k=50,
111
  top_p=0.9,
112
  repetition_penalty=1.2,
113
  )[0]['generated_text'].strip()
114
-
115
  winner, final_response = judge_response(prompt, response_a, response_b)
116
-
117
  return prompt, response_a, response_b, winner, final_response
118
 
119
  css = """
@@ -124,29 +129,29 @@ footer {visibility: hidden}
124
  with gr.Blocks(css=css) as demo:
125
  gr.Markdown("# 🤖 Chatbot com Julgamento Aprimorado")
126
  gr.Markdown("Compara respostas de dois modelos usando múltiplos critérios de qualidade")
127
-
128
  with gr.Row():
129
  inp = gr.Textbox(label="Digite sua pergunta:", lines=2, placeholder="Escreva sua pergunta em português...")
130
  btn = gr.Button("Enviar")
131
-
132
  with gr.Row():
133
  with gr.Column():
134
  gr.Markdown("### Modelo A (Falcon RW 1B)")
135
  out_a = gr.Textbox(label="Resposta", interactive=False)
136
  with gr.Column():
137
- gr.Markdown("### Modelo B (Gemma 2B Instruct)")
138
  out_b = gr.Textbox(label="Resposta", interactive=False)
139
-
140
  with gr.Row():
141
  with gr.Column(scale=2):
142
  winner_out = gr.Textbox(label="🏆 Modelo Vencedor", interactive=False)
143
  with gr.Column(scale=3):
144
  final_out = gr.Textbox(label="💡 Resposta Escolhida", interactive=False)
145
-
146
  btn.click(
147
  fn=chatbot,
148
  inputs=inp,
149
  outputs=[inp, out_a, out_b, winner_out, final_out]
150
  )
151
 
152
- demo.launch()
 
9
  TORCH_DTYPE = torch.float16 if torch.cuda.is_available() else torch.float32
10
 
11
  # Modelo A: Falcon RW 1B
12
+ model_a = AutoModelForCausalLM.from_pretrained("tiiuae/falcon-rw-1b", torch_dtype=TORCH_DTYPE)
 
 
13
  tokenizer_a = AutoTokenizer.from_pretrained("tiiuae/falcon-rw-1b")
14
+
15
  pipe_a = pipeline(
16
  "text-generation",
17
  model=model_a,
 
21
  pad_token_id=tokenizer_a.eos_token_id
22
  )
23
 
24
+ # Modelo B: Mistral 7B Instruct
25
  model_b = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1", torch_dtype=TORCH_DTYPE)
26
  tokenizer_b = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")
27
 
 
34
  pad_token_id=tokenizer_b.eos_token_id
35
  )
36
 
37
+ # Classificador de sentimento
38
  sentiment_arbiter = pipeline(
39
  "text-classification",
40
  model="nlptown/bert-base-multilingual-uncased-sentiment",
 
48
  )
49
 
50
  def semantic_similarity(text1, text2):
51
+ if not text1.strip() or not text2.strip():
52
+ return 0.0
53
  embeddings = similarity_model.encode([text1, text2], convert_to_tensor=True)
54
  cosine_scores = util.pytorch_cos_sim(embeddings[0], embeddings[1])
55
  return cosine_scores.item()
56
 
57
+ def format_mistral_prompt(user_input):
58
+ return f"<s>[INST] {user_input.strip()} [/INST]"
59
+
60
  def judge_response(question, response_a, response_b):
61
  sentiment_a = sentiment_arbiter(response_a)[0]
62
  sentiment_b = sentiment_arbiter(response_b)[0]
63
+
64
  score_sent_a = int(sentiment_a['label'][0])
65
  score_sent_b = int(sentiment_b['label'][0])
66
+
67
  sim_score_a = semantic_similarity(question, response_a)
68
  sim_score_b = semantic_similarity(question, response_b)
69
+
70
+ conciseness_a = min(1.0, 50 / max(1, len(response_a.split())))
71
+ conciseness_b = min(1.0, 50 / max(1, len(response_b.split())))
72
+
73
  WEIGHT_SENTIMENT = 0.4
74
  WEIGHT_RELEVANCE = 0.5
75
  WEIGHT_CONCISENESS = 0.1
76
+
77
  total_a = (WEIGHT_SENTIMENT * score_sent_a +
78
  WEIGHT_RELEVANCE * sim_score_a +
79
  WEIGHT_CONCISENESS * conciseness_a)
80
+
81
  total_b = (WEIGHT_SENTIMENT * score_sent_b +
82
  WEIGHT_RELEVANCE * sim_score_b +
83
  WEIGHT_CONCISENESS * conciseness_b)
84
+
85
  THRESHOLD = 0.15
86
  if abs(total_a - total_b) < THRESHOLD:
87
  winner = "Modelo A" if score_sent_a >= score_sent_b else "Modelo B"
 
89
  else:
90
  winner = "Modelo A" if total_a > total_b else "Modelo B"
91
  final_response = response_a if total_a > total_b else response_b
92
+
93
  print(f"\nA: S:{score_sent_a:.2f} R:{sim_score_a:.2f} C:{conciseness_a:.2f} T:{total_a:.2f}")
94
  print(f"B: S:{score_sent_b:.2f} R:{sim_score_b:.2f} C:{conciseness_b:.2f} T:{total_b:.2f}")
95
  print(f"Vencedor: {winner} Diferença: {abs(total_a - total_b):.2f}")
96
+
97
  return winner, final_response
98
 
99
  def chatbot(prompt):
100
  prompt_pt = "Responda em português: " + prompt
101
+ mistral_prompt = format_mistral_prompt(prompt_pt)
102
+
103
  response_a = pipe_a(
104
  prompt_pt,
105
  max_new_tokens=60,
 
108
  top_p=0.9,
109
  repetition_penalty=1.2,
110
  )[0]['generated_text'].strip()
111
+
112
  response_b = pipe_b(
113
+ mistral_prompt,
114
  max_new_tokens=60,
115
  temperature=0.7,
116
  top_k=50,
117
  top_p=0.9,
118
  repetition_penalty=1.2,
119
  )[0]['generated_text'].strip()
120
+
121
  winner, final_response = judge_response(prompt, response_a, response_b)
 
122
  return prompt, response_a, response_b, winner, final_response
123
 
124
  css = """
 
129
  with gr.Blocks(css=css) as demo:
130
  gr.Markdown("# 🤖 Chatbot com Julgamento Aprimorado")
131
  gr.Markdown("Compara respostas de dois modelos usando múltiplos critérios de qualidade")
132
+
133
  with gr.Row():
134
  inp = gr.Textbox(label="Digite sua pergunta:", lines=2, placeholder="Escreva sua pergunta em português...")
135
  btn = gr.Button("Enviar")
136
+
137
  with gr.Row():
138
  with gr.Column():
139
  gr.Markdown("### Modelo A (Falcon RW 1B)")
140
  out_a = gr.Textbox(label="Resposta", interactive=False)
141
  with gr.Column():
142
+ gr.Markdown("### Modelo B (Mistral 7B Instruct)")
143
  out_b = gr.Textbox(label="Resposta", interactive=False)
144
+
145
  with gr.Row():
146
  with gr.Column(scale=2):
147
  winner_out = gr.Textbox(label="🏆 Modelo Vencedor", interactive=False)
148
  with gr.Column(scale=3):
149
  final_out = gr.Textbox(label="💡 Resposta Escolhida", interactive=False)
150
+
151
  btn.click(
152
  fn=chatbot,
153
  inputs=inp,
154
  outputs=[inp, out_a, out_b, winner_out, final_out]
155
  )
156
 
157
+ demo.launch()