Caikejs commited on
Commit
2e6de90
·
verified ·
1 Parent(s): 7073dcc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -127
app.py CHANGED
@@ -1,16 +1,16 @@
1
- import gradio as gr
2
- from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
3
  import torch
4
- from sentence_transformers import SentenceTransformer, util
5
- import numpy as np
6
 
7
  # Configuração de dispositivo
8
  DEVICE = 0 if torch.cuda.is_available() else -1
9
  TORCH_DTYPE = torch.float16 if torch.cuda.is_available() else torch.float32
10
 
11
- # Modelo A: Falcon RW 1B
12
- model_a = AutoModelForCausalLM.from_pretrained("tiiuae/falcon-rw-1b", torch_dtype=TORCH_DTYPE)
13
- tokenizer_a = AutoTokenizer.from_pretrained("tiiuae/falcon-rw-1b")
 
 
 
14
 
15
  pipe_a = pipeline(
16
  "text-generation",
@@ -21,9 +21,12 @@ pipe_a = pipeline(
21
  pad_token_id=tokenizer_a.eos_token_id
22
  )
23
 
24
- # Modelo B: Mistral 7B Instruct
25
- model_b = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1", torch_dtype=TORCH_DTYPE)
26
- tokenizer_b = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")
 
 
 
27
 
28
  pipe_b = pipeline(
29
  "text-generation",
@@ -34,124 +37,24 @@ pipe_b = pipeline(
34
  pad_token_id=tokenizer_b.eos_token_id
35
  )
36
 
37
- # Classificador de sentimento
38
- sentiment_arbiter = pipeline(
39
- "text-classification",
40
- model="nlptown/bert-base-multilingual-uncased-sentiment",
41
- device=DEVICE
42
- )
43
-
44
- # Modelo de similaridade semântica
45
- similarity_model = SentenceTransformer(
46
- "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
47
- device="cuda" if torch.cuda.is_available() else "cpu"
48
- )
49
-
50
- def semantic_similarity(text1, text2):
51
- if not text1.strip() or not text2.strip():
52
- return 0.0
53
- embeddings = similarity_model.encode([text1, text2], convert_to_tensor=True)
54
- cosine_scores = util.pytorch_cos_sim(embeddings[0], embeddings[1])
55
- return cosine_scores.item()
56
-
57
- def format_mistral_prompt(user_input):
58
- return f"<s>[INST] {user_input.strip()} [/INST]"
59
-
60
- def judge_response(question, response_a, response_b):
61
- sentiment_a = sentiment_arbiter(response_a)[0]
62
- sentiment_b = sentiment_arbiter(response_b)[0]
63
-
64
- score_sent_a = int(sentiment_a['label'][0])
65
- score_sent_b = int(sentiment_b['label'][0])
66
-
67
- sim_score_a = semantic_similarity(question, response_a)
68
- sim_score_b = semantic_similarity(question, response_b)
69
-
70
- conciseness_a = min(1.0, 50 / max(1, len(response_a.split())))
71
- conciseness_b = min(1.0, 50 / max(1, len(response_b.split())))
72
-
73
- WEIGHT_SENTIMENT = 0.4
74
- WEIGHT_RELEVANCE = 0.5
75
- WEIGHT_CONCISENESS = 0.1
76
-
77
- total_a = (WEIGHT_SENTIMENT * score_sent_a +
78
- WEIGHT_RELEVANCE * sim_score_a +
79
- WEIGHT_CONCISENESS * conciseness_a)
80
-
81
- total_b = (WEIGHT_SENTIMENT * score_sent_b +
82
- WEIGHT_RELEVANCE * sim_score_b +
83
- WEIGHT_CONCISENESS * conciseness_b)
84
-
85
- THRESHOLD = 0.15
86
- if abs(total_a - total_b) < THRESHOLD:
87
- winner = "Modelo A" if score_sent_a >= score_sent_b else "Modelo B"
88
- final_response = response_a if score_sent_a >= score_sent_b else response_b
89
- else:
90
- winner = "Modelo A" if total_a > total_b else "Modelo B"
91
- final_response = response_a if total_a > total_b else response_b
92
-
93
- print(f"\nA: S:{score_sent_a:.2f} R:{sim_score_a:.2f} C:{conciseness_a:.2f} T:{total_a:.2f}")
94
- print(f"B: S:{score_sent_b:.2f} R:{sim_score_b:.2f} C:{conciseness_b:.2f} T:{total_b:.2f}")
95
- print(f"Vencedor: {winner} Diferença: {abs(total_a - total_b):.2f}")
96
-
97
- return winner, final_response
98
-
99
- def chatbot(prompt):
100
- prompt_pt = "Responda em português: " + prompt
101
- mistral_prompt = format_mistral_prompt(prompt_pt)
102
-
103
- response_a = pipe_a(
104
- prompt_pt,
105
- max_new_tokens=60,
106
- temperature=0.7,
107
- top_k=50,
108
- top_p=0.9,
109
- repetition_penalty=1.2,
110
- )[0]['generated_text'].strip()
111
-
112
- response_b = pipe_b(
113
- mistral_prompt,
114
- max_new_tokens=60,
115
- temperature=0.7,
116
- top_k=50,
117
- top_p=0.9,
118
- repetition_penalty=1.2,
119
- )[0]['generated_text'].strip()
120
-
121
- winner, final_response = judge_response(prompt, response_a, response_b)
122
- return prompt, response_a, response_b, winner, final_response
123
-
124
- css = """
125
- footer {visibility: hidden}
126
- .output-text {font-size: 16px !important}
127
- """
128
-
129
- with gr.Blocks(css=css) as demo:
130
- gr.Markdown("# 🤖 Chatbot com Julgamento Aprimorado")
131
- gr.Markdown("Compara respostas de dois modelos usando múltiplos critérios de qualidade")
132
-
133
- with gr.Row():
134
- inp = gr.Textbox(label="Digite sua pergunta:", lines=2, placeholder="Escreva sua pergunta em português...")
135
- btn = gr.Button("Enviar")
136
 
137
- with gr.Row():
138
- with gr.Column():
139
- gr.Markdown("### Modelo A (Falcon RW 1B)")
140
- out_a = gr.Textbox(label="Resposta", interactive=False)
141
- with gr.Column():
142
- gr.Markdown("### Modelo B (Mistral 7B Instruct)")
143
- out_b = gr.Textbox(label="Resposta", interactive=False)
144
 
145
- with gr.Row():
146
- with gr.Column(scale=2):
147
- winner_out = gr.Textbox(label="🏆 Modelo Vencedor", interactive=False)
148
- with gr.Column(scale=3):
149
- final_out = gr.Textbox(label="💡 Resposta Escolhida", interactive=False)
 
150
 
151
- btn.click(
152
- fn=chatbot,
153
- inputs=inp,
154
- outputs=[inp, out_a, out_b, winner_out, final_out]
155
- )
156
 
157
- demo.launch()
 
 
 
 
 
1
  import torch
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 
3
 
4
  # Configuração de dispositivo
5
  DEVICE = 0 if torch.cuda.is_available() else -1
6
  TORCH_DTYPE = torch.float16 if torch.cuda.is_available() else torch.float32
7
 
8
+ # Carrega modelo A - LLaMA 2
9
+ model_a = AutoModelForCausalLM.from_pretrained(
10
+ "meta-llama/Llama-2-7b-chat-hf",
11
+ torch_dtype=TORCH_DTYPE
12
+ )
13
+ tokenizer_a = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf")
14
 
15
  pipe_a = pipeline(
16
  "text-generation",
 
21
  pad_token_id=tokenizer_a.eos_token_id
22
  )
23
 
24
+ # Carrega modelo B - Falcon 7B Instruct (sem autenticação)
25
+ model_b = AutoModelForCausalLM.from_pretrained(
26
+ "tiiuae/falcon-7b-instruct",
27
+ torch_dtype=TORCH_DTYPE
28
+ )
29
+ tokenizer_b = AutoTokenizer.from_pretrained("tiiuae/falcon-7b-instruct")
30
 
31
  pipe_b = pipeline(
32
  "text-generation",
 
37
  pad_token_id=tokenizer_b.eos_token_id
38
  )
39
 
40
+ # Funções auxiliares para formatar o prompt
41
+ def format_llama_prompt(user_input):
42
+ return f"[INST] <<SYS>>\nVocê é um assistente útil.\n<</SYS>>\n\n{user_input.strip()} [/INST]"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
+ def format_falcon_prompt(user_input):
45
+ return f"Responda em português: {user_input.strip()}"
 
 
 
 
 
46
 
47
+ # Interface simples para testar os modelos
48
+ if __name__ == "__main__":
49
+ while True:
50
+ prompt = input("\nDigite uma pergunta (ou 'sair'): ").strip()
51
+ if prompt.lower() == "sair":
52
+ break
53
 
54
+ print("\n=== Resposta do LLaMA 2 ===")
55
+ llama_response = pipe_a(format_llama_prompt(prompt), max_new_tokens=200)[0]['generated_text']
56
+ print(llama_response)
 
 
57
 
58
+ print("\n=== Resposta do Falcon 7B ===")
59
+ falcon_response = pipe_b(format_falcon_prompt(prompt), max_new_tokens=200)[0]['generated_text']
60
+ print(falcon_response)