wenhu commited on
Commit
d5d2872
·
verified ·
1 Parent(s): 31d3f3e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -22
app.py CHANGED
@@ -14,14 +14,12 @@ from serve_constants import html_header, bibtext, learn_more_markdown, tos_markd
14
  cur_dir = os.path.dirname(os.path.abspath(__file__))
15
 
16
  MODEL_ID = "TIGER-Lab/PixelReasoner-RL-v1"
17
- # MODEL_ID = "/home/ma-user/work/haozhe/workspace/lmm-r1/toolckpts/pix17K0506wt-NormalizedPenalizedFixedReweightCont-256-lossvernone-samplevernone-fmtnone-group-n8-ml10000-lr10-sysvcot-8node/global_step24_hf_evalbest"
18
- example_image = f"{cur_dir}/example_images/1.jpg" # /home/ma-user/work/haozhe/workspace/vlspaces/
19
- # example_image = "/home/ma-user/work/haozhe/workspace/vlspaces/example_images/1.jpg"
20
  example_text = "What kind of restaurant is it?"
21
  processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True,
22
- # min_pixels=min_pixels,
23
- max_pixels=512*28*28,
24
- )
25
  model = AutoModelForImageTextToText.from_pretrained(
26
  MODEL_ID,
27
  trust_remote_code=True,
@@ -172,6 +170,7 @@ def model_inference(input_dict, history):
172
  # Create the full path to the folder
173
  folder_path = os.path.join(current_path, folder_to_find)
174
  print('files', files)
 
175
  imagelist = rawimagelist = current_message_images = [load_image(image) for image in files]
176
  all_images += current_message_images
177
  messages.append({
@@ -183,7 +182,7 @@ def model_inference(input_dict, history):
183
  })
184
 
185
  print(messages)
186
- # complete_assistant_response_for_gradio = ""
187
  complete_assistant_response_for_gradio = []
188
  while True:
189
  """
@@ -199,15 +198,9 @@ def model_inference(input_dict, history):
199
 
200
  streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=False)
201
  generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024, temperature=0.1, top_p=0.95, top_k=50)
202
- # import pdb; pdb.set_trace()
203
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
204
  thread.start()
205
 
206
- # buffer = ""
207
- # for new_text in streamer:
208
- # buffer += new_text
209
- # yield buffer
210
- # print(buffer)
211
  current_model_output_segment = "" # Text generated in this specific model call
212
  toolflag = False
213
  for new_text_chunk in streamer:
@@ -226,18 +219,14 @@ def model_inference(input_dict, history):
226
  processed_segment = current_model_output_segment.split("<|im_end|>", 1)[0] if "<|im_end|>" in current_model_output_segment else current_model_output_segment
227
 
228
  # Append this processed segment to the cumulative display string for Gradio
229
- # complete_assistant_response_for_gradio += processed_segment + "\n\n"
230
  complete_assistant_response_for_gradio += [processed_segment + "\n\n"]
231
- # print(f"this one: {complete_assistant_response_for_gradio}")
232
  yield complete_assistant_response_for_gradio # Ensure the fully processed segment is yielded to Gradio
233
 
234
 
235
  # Check for tool call in the *just generated* segment
236
  qatext_for_tool_check = processed_segment
237
  require_tool = tool_end in qatext_for_tool_check and tool_start in qatext_for_tool_check
238
-
239
- # print(f"Segment from model: \"{qatext_for_tool_check[:200]}...\", Requires tool: {require_tool}")
240
-
241
  if require_tool:
242
 
243
  tool_params = parse_last_tool(qatext_for_tool_check)
@@ -252,8 +241,6 @@ def model_inference(input_dict, history):
252
  print(raw_result)
253
  proc_img = raw_result
254
  all_images += [proc_img]
255
- # complete_assistant_response_for_gradio += [(proc_img, "Visual Operation Result")]
256
- # yield complete_assistant_response_for_gradio # Update Gradio display
257
 
258
  new_piece = dict(role='user', content=[
259
  dict(type='text', text="\nHere is the cropped image (Image Size: {}x{}):".format(proc_img.size[0], proc_img.size[1])),
@@ -261,7 +248,6 @@ def model_inference(input_dict, history):
261
  ]
262
  )
263
  messages.append(new_piece)
264
- # print(messages)
265
  # complete_assistant_response_for_gradio += f"\n<b>Analyzing Operation Result ...</b> @region(size={proc_img.size[0]}x{proc_img.size[1]})\n\n"
266
  complete_assistant_response_for_gradio += [f"\n<b>Analyzing Operation Result ...</b> @region(size={proc_img.size[0]}x{proc_img.size[1]})\n\n"]
267
  yield complete_assistant_response_for_gradio # Update Gradio display
@@ -272,7 +258,13 @@ def model_inference(input_dict, history):
272
 
273
  with gr.Blocks() as demo:
274
  examples = [
275
- [{"text": example_text, "files": [example_image]}]
 
 
 
 
 
 
276
  ]
277
 
278
  gr.HTML(html_header)
 
14
  cur_dir = os.path.dirname(os.path.abspath(__file__))
15
 
16
  MODEL_ID = "TIGER-Lab/PixelReasoner-RL-v1"
17
+ example_image = f"{cur_dir}/example_images/1.jpg"
18
+
19
+ print(example_image)
20
  example_text = "What kind of restaurant is it?"
21
  processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True,
22
+ max_pixels=512*28*28)
 
 
23
  model = AutoModelForImageTextToText.from_pretrained(
24
  MODEL_ID,
25
  trust_remote_code=True,
 
170
  # Create the full path to the folder
171
  folder_path = os.path.join(current_path, folder_to_find)
172
  print('files', files)
173
+
174
  imagelist = rawimagelist = current_message_images = [load_image(image) for image in files]
175
  all_images += current_message_images
176
  messages.append({
 
182
  })
183
 
184
  print(messages)
185
+
186
  complete_assistant_response_for_gradio = []
187
  while True:
188
  """
 
198
 
199
  streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=False)
200
  generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024, temperature=0.1, top_p=0.95, top_k=50)
 
201
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
202
  thread.start()
203
 
 
 
 
 
 
204
  current_model_output_segment = "" # Text generated in this specific model call
205
  toolflag = False
206
  for new_text_chunk in streamer:
 
219
  processed_segment = current_model_output_segment.split("<|im_end|>", 1)[0] if "<|im_end|>" in current_model_output_segment else current_model_output_segment
220
 
221
  # Append this processed segment to the cumulative display string for Gradio
 
222
  complete_assistant_response_for_gradio += [processed_segment + "\n\n"]
 
223
  yield complete_assistant_response_for_gradio # Ensure the fully processed segment is yielded to Gradio
224
 
225
 
226
  # Check for tool call in the *just generated* segment
227
  qatext_for_tool_check = processed_segment
228
  require_tool = tool_end in qatext_for_tool_check and tool_start in qatext_for_tool_check
229
+
 
 
230
  if require_tool:
231
 
232
  tool_params = parse_last_tool(qatext_for_tool_check)
 
241
  print(raw_result)
242
  proc_img = raw_result
243
  all_images += [proc_img]
 
 
244
 
245
  new_piece = dict(role='user', content=[
246
  dict(type='text', text="\nHere is the cropped image (Image Size: {}x{}):".format(proc_img.size[0], proc_img.size[1])),
 
248
  ]
249
  )
250
  messages.append(new_piece)
 
251
  # complete_assistant_response_for_gradio += f"\n<b>Analyzing Operation Result ...</b> @region(size={proc_img.size[0]}x{proc_img.size[1]})\n\n"
252
  complete_assistant_response_for_gradio += [f"\n<b>Analyzing Operation Result ...</b> @region(size={proc_img.size[0]}x{proc_img.size[1]})\n\n"]
253
  yield complete_assistant_response_for_gradio # Update Gradio display
 
258
 
259
  with gr.Blocks() as demo:
260
  examples = [
261
+ [
262
+ {"text": example_text,
263
+ "files": [
264
+ example_image
265
+ ]
266
+ }
267
+ ]
268
  ]
269
 
270
  gr.HTML(html_header)