R-PRM-Demo

Running on Zero

App Files Files Community

kevinpro commited on 2 days ago

Commit

31b8285

verified ·

1 Parent(s): 939f8b7

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -20

app.py CHANGED Viewed

@@ -50,7 +50,7 @@ def split_string_into_max_six_chunks(input_str: str) -> list[str]:
         return []
     # Define the maximum number of chunks desired
-    max_chunks = 6
     # If the number of lines is already within the limit, return the lines as they are
     if num_lines <= max_chunks:
@@ -84,10 +84,27 @@ print("Ednd dowload")
 # Loading the tokenizer once, because re-loading it takes about 1.5 seconds each time
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 # Only assign GPU if cache not used
 @spaces.GPU
-def translate(input_question,input_cot):
     all_judge = ""
     reasoning_chunk = split_string_into_max_six_chunks(input_cot)
     previsous_step_string = ""
@@ -96,23 +113,11 @@ def translate(input_question,input_cot):
         cur_step = "Step {}: ".format(index) + r
         input_string = template.format(input_question,previsous_step_string,cur_step)
         print(input_string)
-        input_tokens = (
-            tokenizer(input_string, return_tensors="pt")
-            .input_ids[0]
-            .cpu()
-            .numpy()
-            .tolist()
-        )
-        translated_chunk = model.generate(
-            input_ids=torch.tensor([input_tokens]).to(device),
-            max_length=len(input_tokens) + 2048,
-            num_return_sequences=1,
-        )
-        full_output = tokenizer.decode(translated_chunk[0], skip_special_tokens=True)
-        full_output = full_output.replace(input_string,"")
         previsous_step_string += "\n" + input_string
-        all_judge += "Step {}: ".format(index) + full_output + "\n\n"
-        print(full_output)
     return all_judge
@@ -137,7 +142,7 @@ with gr.Blocks() as demo:
     with gr.Row():
         output = gr.Textbox(label="Output Text", lines=6)
     btn.click(
-        translate,
         inputs=[input_question,input_cot],
         outputs=output,
     )

         return []
     # Define the maximum number of chunks desired
+    max_chunks = 5
     # If the number of lines is already within the limit, return the lines as they are
     if num_lines <= max_chunks:
 # Loading the tokenizer once, because re-loading it takes about 1.5 seconds each time
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 # Only assign GPU if cache not used
 @spaces.GPU
+def working(input_text):
+    input_tokens = (
+        tokenizer(input_text, return_tensors="pt")
+        .input_ids[0]
+        .cpu()
+        .numpy()
+        .tolist()
+    )
+    translated_chunk = model.generate(
+        input_ids=torch.tensor([input_tokens]).to(device),
+        max_length=len(input_tokens) + 2048,
+        num_return_sequences=1,
+    )
+    full_output = tokenizer.decode(translated_chunk[0], skip_special_tokens=True)
+    full_output = full_output.replace(input_text,"")
+    return full_output
+def Judge(input_question,input_cot):
     all_judge = ""
     reasoning_chunk = split_string_into_max_six_chunks(input_cot)
     previsous_step_string = ""
         cur_step = "Step {}: ".format(index) + r
         input_string = template.format(input_question,previsous_step_string,cur_step)
         print(input_string)
+        output = working(input_string)
         previsous_step_string += "\n" + input_string
+        all_judge += "Step {}: ".format(index) + output + "\n\n"
+        print(output)
+        print("============================\n\n")
     return all_judge
     with gr.Row():
         output = gr.Textbox(label="Output Text", lines=6)
     btn.click(
+        Judge,
         inputs=[input_question,input_cot],
         outputs=output,
     )