mariusjabami commited on
Commit
4f7e40d
·
verified ·
1 Parent(s): 3b6f0da

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -25
app.py CHANGED
@@ -1,44 +1,79 @@
1
  import gradio as gr
2
- from transformers import AutoModelForCausalLM, AutoTokenizer
 
 
3
 
4
  model_name = "lambdaindie/lambda-1v-1B"
5
 
 
6
  model = AutoModelForCausalLM.from_pretrained(
7
  model_name,
8
- torch_dtype="float32",
9
- low_cpu_mem_usage=True,
10
- device_map="auto"
11
  )
12
- model.eval()
13
-
14
  tokenizer = AutoTokenizer.from_pretrained(model_name)
15
 
16
- def respond(prompt):
17
- full_prompt = f"Think step-by-step.\nQuestion: {prompt}\nAnswer:"
18
- inputs = tokenizer(full_prompt, return_tensors="pt", return_attention_mask=False)
19
- inputs = {k: v.to(model.device) for k, v in inputs.items()}
 
 
 
 
 
 
20
 
21
- output = model.generate(
22
- **inputs,
23
- max_new_tokens=128,
24
- do_sample=False, # greedy, menos RAM
25
- pad_token_id=tokenizer.eos_token_id,
 
 
 
 
 
 
 
26
  )
27
- answer = tokenizer.decode(output[0][inputs["input_ids"].shape[-1]:], skip_special_tokens=True)
28
- return answer.strip()
29
 
 
 
 
 
 
 
 
 
 
 
 
30
  with gr.Blocks(css="""
31
- .gr-button, .gr-textbox {
32
  font-family: 'JetBrains Mono', monospace !important;
33
  font-size: 11px !important;
 
 
 
 
 
 
 
 
34
  }
35
  """) as demo:
36
- gr.Markdown("## λambdAI — Light CPU Reasoning")
37
- txt = gr.Textbox(placeholder="Digite sua pergunta...", show_label=False)
38
- output = gr.Textbox(label="Resposta", lines=6)
39
- btn = gr.Button("Enviar")
 
 
 
40
 
41
- btn.click(respond, txt, output)
42
- txt.submit(respond, txt, output)
 
43
 
44
- demo.launch(share=True)
 
1
  import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
3
+ import torch
4
+ import threading
5
 
6
  model_name = "lambdaindie/lambda-1v-1B"
7
 
8
+ # Carrega modelo na CPU de forma mais leve
9
  model = AutoModelForCausalLM.from_pretrained(
10
  model_name,
11
+ torch_dtype=torch.float16, # ou torch.bfloat16 se suportar
12
+ low_cpu_mem_usage=True
 
13
  )
 
 
14
  tokenizer = AutoTokenizer.from_pretrained(model_name)
15
 
16
+ stop_flag = {"stop": False}
17
+
18
+ def respond(prompt, history):
19
+ stop_flag["stop"] = False
20
+ history = history[-3:] # Mantém apenas os últimos 3 pares
21
+
22
+ full_prompt = f"\nThink a bit step-by-step before answering.\nQuestion: {prompt}\nAnswer:"
23
+ inputs = tokenizer(full_prompt, return_tensors="pt")
24
+
25
+ streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
26
 
27
+ generation_thread = threading.Thread(
28
+ target=model.generate,
29
+ kwargs={
30
+ "input_ids": inputs["input_ids"],
31
+ "attention_mask": inputs["attention_mask"],
32
+ "max_new_tokens": 512,
33
+ "do_sample": True,
34
+ "temperature": 0.7,
35
+ "top_p": 0.9,
36
+ "pad_token_id": tokenizer.eos_token_id,
37
+ "streamer": streamer,
38
+ }
39
  )
40
+ generation_thread.start()
 
41
 
42
+ reasoning = ""
43
+ for new_text in streamer:
44
+ if stop_flag["stop"]:
45
+ return "", history
46
+ reasoning += new_text
47
+ yield "", history + [(prompt, f"<div class='final-answer'>{reasoning}</div>")]
48
+
49
+ def stop_generation():
50
+ stop_flag["stop"] = True
51
+
52
+ # Interface Gradio
53
  with gr.Blocks(css="""
54
+ #chatbot, .gr-markdown, .gr-button, .gr-textbox {
55
  font-family: 'JetBrains Mono', monospace !important;
56
  font-size: 11px !important;
57
+ }
58
+ .final-answer {
59
+ background-color: #1e1e1e;
60
+ color: #ffffff;
61
+ padding: 10px;
62
+ border-left: 4px solid #4caf50;
63
+ white-space: pre-wrap;
64
+ font-size: 11px !important;
65
  }
66
  """) as demo:
67
+ gr.Markdown("## λambdAI — Reasoning Chat")
68
+
69
+ chatbot = gr.Chatbot(elem_id="chatbot")
70
+ with gr.Row():
71
+ txt = gr.Textbox(placeholder="Digite sua pergunta...", show_label=False)
72
+ send_btn = gr.Button("Enviar")
73
+ stop_btn = gr.Button("Parar")
74
 
75
+ send_btn.click(respond, [txt, chatbot], [txt, chatbot])
76
+ txt.submit(respond, [txt, chatbot], [txt, chatbot])
77
+ stop_btn.click(stop_generation, None, None)
78
 
79
+ demo.launch(share=True)