mariusjabami commited on
Commit
a474012
·
verified ·
1 Parent(s): 665b7ce

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +98 -101
app.py CHANGED
@@ -1,88 +1,23 @@
1
- import gradio as gr
2
- import torch
3
  import time
4
  import threading
 
5
  from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
 
6
 
7
- # === Carregar modelo local (CPU) ===
8
- model_name = "lambdaindie/lambda-1v-1B" # troque pelo teu
9
- tokenizer = AutoTokenizer.from_pretrained(model_name)
10
- model = AutoModelForCausalLM.from_pretrained(model_name).to("cpu") # <- CPU aqui
11
-
12
- # === Streamer global para interrupção ===
13
- stop_signal = {"stop": False}
14
-
15
- def generate_stream(prompt, max_tokens=512, temperature=0.7, top_p=0.95):
16
- stop_signal["stop"] = False
17
- inputs = tokenizer(prompt, return_tensors="pt").to("cpu")
18
-
19
- streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
20
-
21
- generation_thread = threading.Thread(
22
- target=model.generate,
23
- kwargs=dict(
24
- input_ids=inputs["input_ids"],
25
- attention_mask=inputs["attention_mask"],
26
- streamer=streamer,
27
- max_new_tokens=max_tokens,
28
- do_sample=True,
29
- temperature=temperature,
30
- top_p=top_p,
31
- pad_token_id=tokenizer.eos_token_id,
32
- )
33
- )
34
- generation_thread.start()
35
-
36
- output = ""
37
- for token in streamer:
38
- if stop_signal["stop"]:
39
- break
40
- output += token
41
- yield output.strip()
42
-
43
- def stop_stream():
44
- stop_signal["stop"] = True
45
-
46
- def respond(message, history, system_message, max_tokens, temperature, top_p):
47
- messages = [{"role": "system", "content": system_message}] if system_message else []
48
-
49
- for user, assistant in history[-3:]: # Limita a 3 interações passadas
50
- if user:
51
- messages.append({"role": "user", "content": user})
52
- if assistant:
53
- messages.append({"role": "assistant", "content": assistant})
54
-
55
- thinking_prompt = messages + [{"role": "user", "content": f"{message}\n\nThink step-by-step before answering."}]
56
- thinking_text = "\n".join([f"{m['role']}: {m['content']}" for m in thinking_prompt])
57
-
58
- reasoning = ""
59
- yield '<div class="markdown-think">Thinking...</div>'
60
-
61
- start = time.time()
62
- for token in generate_stream(thinking_text, max_tokens, temperature, top_p):
63
- reasoning = token
64
- yield f'<div class="markdown-think">{reasoning.strip()}</div>'
65
-
66
- elapsed = time.time() - start
67
- yield f"""
68
- <div style="margin-top:12px;padding:8px 12px;background-color:#222;border-left:4px solid #888;
69
- font-family:'JetBrains Mono', monospace;color:#ccc;font-size:14px;">
70
- Pensou por {elapsed:.1f} segundos
71
- </div>
72
- """
73
-
74
- final_prompt = thinking_text + f"\n\nuser: {message}\nassistant: {reasoning.strip()}\nuser: Now answer based on your reasoning above.\nassistant:"
75
- final_answer = ""
76
-
77
- for token in generate_stream(final_prompt, max_tokens, temperature, top_p):
78
- final_answer = token
79
- yield final_answer.strip()
80
-
81
- # === Interface ===
82
 
 
83
  css = """
84
  @import url('https://fonts.googleapis.com/css2?family=JetBrains+Mono&display=swap');
85
- * { font-family: 'JetBrains Mono', monospace !important; }
 
 
86
  html, body, .gradio-container {
87
  background-color: #111 !important;
88
  color: #e0e0e0 !important;
@@ -110,36 +45,98 @@ textarea, input, button, select {
110
 
111
  theme = gr.themes.Base(
112
  primary_hue="gray",
113
- font=[gr.themes.GoogleFont("JetBrains Mono"), "monospace"]
 
 
 
114
  ).set(
115
  body_background_fill="#111",
116
  body_text_color="#e0e0e0",
117
- input_background_fill="#222",
118
- input_border_color="#444",
119
  button_primary_background_fill="#333",
120
  button_primary_text_color="#e0e0e0",
 
 
 
121
  )
122
 
123
- chatbot = gr.ChatInterface(
124
- fn=respond,
125
- title="λambdAI",
126
- css=css,
127
- theme=theme,
128
- additional_inputs=[
129
- gr.Textbox(value="", label="System Message"),
130
- gr.Slider(64, 2048, value=512, step=1, label="Max Tokens"),
131
- gr.Slider(0.1, 2.0, value=0.7, step=0.1, label="Temperature"),
132
- gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p")
133
- ]
134
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
 
136
- stop_btn = gr.Button("Parar Geração")
137
- stop_btn.click(fn=stop_stream, inputs=[], outputs=[])
138
 
139
- app = gr.Blocks()
140
- with app:
141
- chatbot.render()
142
- stop_btn.render()
143
 
144
- if __name__ == "__main__":
145
- app.launch(share=True)
 
1
+ import os
 
2
  import time
3
  import threading
4
+ import gradio as gr
5
  from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
6
+ import torch
7
 
8
+ # Carregar modelo local
9
+ model_id = "lambdaindie/lambda-1v-1B" # Substitua se quiser
10
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
11
+ model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32)
12
+ model.to("cuda" if torch.cuda.is_available() else "cpu")
13
+ model.eval()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
+ # Estilo
16
  css = """
17
  @import url('https://fonts.googleapis.com/css2?family=JetBrains+Mono&display=swap');
18
+ * {
19
+ font-family: 'JetBrains Mono', monospace !important;
20
+ }
21
  html, body, .gradio-container {
22
  background-color: #111 !important;
23
  color: #e0e0e0 !important;
 
45
 
46
  theme = gr.themes.Base(
47
  primary_hue="gray",
48
+ font=[
49
+ gr.themes.GoogleFont("JetBrains Mono"),
50
+ "monospace"
51
+ ]
52
  ).set(
53
  body_background_fill="#111",
54
  body_text_color="#e0e0e0",
 
 
55
  button_primary_background_fill="#333",
56
  button_primary_text_color="#e0e0e0",
57
+ input_background_fill="#222",
58
+ input_border_color="#444",
59
+ block_title_text_color="#fff"
60
  )
61
 
62
+ # Flag para parar
63
+ stop_signal = False
64
+
65
+ def stop_stream():
66
+ global stop_signal
67
+ stop_signal = True
68
+
69
+ def respond(message, history, system_message, max_tokens, temperature, top_p):
70
+ global stop_signal
71
+ stop_signal = False
72
+
73
+ # Construção do prompt
74
+ prompt = ""
75
+ if system_message:
76
+ prompt += f"{system_message}\n\n"
77
+
78
+ for msg in history:
79
+ role = msg["role"]
80
+ content = msg["content"]
81
+ if role == "user":
82
+ prompt += f"User: {content}\n"
83
+ elif role == "assistant":
84
+ prompt += f"Assistant: {content}\n"
85
+
86
+ prompt += f"User: {message}\nAssistant:"
87
+
88
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
89
+
90
+ streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
91
+ generation_kwargs = dict(
92
+ **inputs,
93
+ streamer=streamer,
94
+ max_new_tokens=max_tokens,
95
+ temperature=temperature,
96
+ top_p=top_p,
97
+ do_sample=True,
98
+ )
99
+
100
+ thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
101
+ thread.start()
102
+
103
+ output = ""
104
+ start = time.time()
105
+
106
+ for token in streamer:
107
+ if stop_signal:
108
+ break
109
+ output += token
110
+ yield {"role": "assistant", "content": output}
111
+
112
+ end = time.time()
113
+ yield {"role": "system", "content": f"Pensou por {end - start:.1f} segundos"}
114
+
115
+ # Interface
116
+ with gr.Blocks(css=css, theme=theme) as app:
117
+ chatbot = gr.Chatbot(label="λ", type="messages")
118
+
119
+ with gr.Row():
120
+ msg = gr.Textbox(label="Mensagem")
121
+ send_btn = gr.Button("Enviar")
122
+ stop_btn = gr.Button("Parar")
123
+
124
+ with gr.Accordion("Configurações Avançadas", open=False):
125
+ system_message = gr.Textbox(label="System Message", value="")
126
+ max_tokens = gr.Slider(64, 2048, value=256, step=1, label="Max Tokens")
127
+ temperature = gr.Slider(0.1, 2.0, value=0.7, step=0.1, label="Temperature")
128
+ top_p = gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p")
129
+
130
+ state = gr.State([])
131
+
132
+ def user_message_submit(user_msg, chat_history):
133
+ if user_msg:
134
+ chat_history = chat_history + [{"role": "user", "content": user_msg}]
135
+ return "", chat_history
136
 
137
+ send_btn.click(fn=user_message_submit, inputs=[msg, state], outputs=[msg, state])\
138
+ .then(fn=respond, inputs=[msg, state, system_message, max_tokens, temperature, top_p], outputs=chatbot)
139
 
140
+ stop_btn.click(fn=stop_stream, inputs=[], outputs=[])
 
 
 
141
 
142
+ app.launch(share=True)