Update app.py
Browse files
app.py
CHANGED
@@ -1,76 +1,44 @@
|
|
1 |
import gradio as gr
|
2 |
-
import
|
3 |
-
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
|
4 |
-
import threading
|
5 |
|
6 |
-
# Detectar dispositivo automaticamente
|
7 |
-
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
8 |
-
|
9 |
-
# Inicializar o modelo e o tokenizer
|
10 |
model_name = "lambdaindie/lambda-1v-1B"
|
11 |
-
model = AutoModelForCausalLM.from_pretrained(model_name).to(device)
|
12 |
-
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
13 |
-
|
14 |
-
stop_flag = {"stop": False}
|
15 |
|
16 |
-
|
17 |
-
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
-
|
20 |
-
inputs = tokenizer(full_prompt, return_tensors="pt").to(device)
|
21 |
|
22 |
-
|
|
|
|
|
|
|
23 |
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
"max_new_tokens": 512,
|
30 |
-
"do_sample": True,
|
31 |
-
"temperature": 0.7,
|
32 |
-
"top_p": 0.9,
|
33 |
-
"pad_token_id": tokenizer.eos_token_id,
|
34 |
-
"streamer": streamer,
|
35 |
-
}
|
36 |
)
|
37 |
-
|
38 |
-
|
39 |
-
reasoning = ""
|
40 |
-
for new_text in streamer:
|
41 |
-
if stop_flag["stop"]:
|
42 |
-
return "", history
|
43 |
-
reasoning += new_text
|
44 |
-
yield "", history[:-1] + [(prompt, f"<div class='final-answer'>{reasoning}</div>")]
|
45 |
-
|
46 |
-
def stop_generation():
|
47 |
-
stop_flag["stop"] = True
|
48 |
|
49 |
with gr.Blocks(css="""
|
50 |
-
|
51 |
-
font-family: 'JetBrains Mono', monospace !important;
|
52 |
-
font-size: 11px !important;
|
53 |
-
}
|
54 |
-
.final-answer {
|
55 |
-
background-color: #1e1e1e;
|
56 |
-
color: #ffffff;
|
57 |
-
padding: 10px;
|
58 |
-
border-left: 4px solid #4caf50;
|
59 |
font-family: 'JetBrains Mono', monospace !important;
|
60 |
-
white-space: pre-wrap;
|
61 |
font-size: 11px !important;
|
62 |
}
|
63 |
""") as demo:
|
64 |
-
gr.Markdown("## λambdAI — Reasoning
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
txt = gr.Textbox(placeholder="Digite sua pergunta...", show_label=False)
|
69 |
-
send_btn = gr.Button("Enviar")
|
70 |
-
stop_btn = gr.Button("Parar")
|
71 |
|
72 |
-
|
73 |
-
txt.submit(respond,
|
74 |
-
stop_btn.click(stop_generation, None, None)
|
75 |
|
76 |
-
|
|
|
1 |
import gradio as gr
|
2 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
|
|
|
3 |
|
|
|
|
|
|
|
|
|
4 |
model_name = "lambdaindie/lambda-1v-1B"
|
|
|
|
|
|
|
|
|
5 |
|
6 |
+
model = AutoModelForCausalLM.from_pretrained(
|
7 |
+
model_name,
|
8 |
+
torch_dtype="float32",
|
9 |
+
low_cpu_mem_usage=True,
|
10 |
+
device_map="auto"
|
11 |
+
)
|
12 |
+
model.eval()
|
13 |
|
14 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
|
|
15 |
|
16 |
+
def respond(prompt):
|
17 |
+
full_prompt = f"Think step-by-step.\nQuestion: {prompt}\nAnswer:"
|
18 |
+
inputs = tokenizer(full_prompt, return_tensors="pt", return_attention_mask=False)
|
19 |
+
inputs = {k: v.to(model.device) for k, v in inputs.items()}
|
20 |
|
21 |
+
output = model.generate(
|
22 |
+
**inputs,
|
23 |
+
max_new_tokens=128,
|
24 |
+
do_sample=False, # greedy, menos RAM
|
25 |
+
pad_token_id=tokenizer.eos_token_id,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
)
|
27 |
+
answer = tokenizer.decode(output[0][inputs["input_ids"].shape[-1]:], skip_special_tokens=True)
|
28 |
+
return answer.strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
|
30 |
with gr.Blocks(css="""
|
31 |
+
.gr-button, .gr-textbox {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
font-family: 'JetBrains Mono', monospace !important;
|
|
|
33 |
font-size: 11px !important;
|
34 |
}
|
35 |
""") as demo:
|
36 |
+
gr.Markdown("## λambdAI — Light CPU Reasoning")
|
37 |
+
txt = gr.Textbox(placeholder="Digite sua pergunta...", show_label=False)
|
38 |
+
output = gr.Textbox(label="Resposta", lines=6)
|
39 |
+
btn = gr.Button("Enviar")
|
|
|
|
|
|
|
40 |
|
41 |
+
btn.click(respond, txt, output)
|
42 |
+
txt.submit(respond, txt, output)
|
|
|
43 |
|
44 |
+
demo.launch(share=True)
|