Spaces:

witcher23
/

nanoVLM-inference

Running

App Files Files Community

vidhanm commited on 14 days ago

Commit

b0a4224

1 Parent(s): 7d56bcc

updated parameters name in call_generate_script

Browse files

Files changed (1) hide show

app.py +42 -43

app.py CHANGED Viewed

@@ -22,109 +22,108 @@ MODEL_REPO_ID = "lusxvr/nanoVLM-222M"
 print(f"DEBUG: Using generate.py script at: {GENERATE_SCRIPT_PATH}")
 print(f"DEBUG: Using model repo ID: {MODEL_REPO_ID}")
 def call_generate_script(image_path: str, prompt_text: str) -> str:
     print(f"\n--- DEBUG (call_generate_script) ---")
     print(f"Timestamp: {time.strftime('%Y-%m-%d %H:%M:%S')}")
     print(f"Calling with image_path='{image_path}', prompt='{prompt_text}'")
-    # Arguments for nanoVLM's generate.py
-    # Using low max_new_tokens for CPU testing.
     cmd_args = [
-        "python", "-u", GENERATE_SCRIPT_PATH, # -u for unbuffered output
         "--hf_model", MODEL_REPO_ID,
-        "--image_path", image_path,      # Corrected: nanoVLM generate.py uses --image_path
         "--prompt", prompt_text,
-        "--num_samples", "1",           # Corrected: Corresponds to --generations
-        "--max_new_tokens", "30",       # Keep it low for testing
-        "--device", "cpu"               # Explicitly set device for generate.py
-        # Optional args for generate.py:
         # "--temperature", "0.7",
-        # "--top_k", "50"
     ]
     print(f"Executing command: {' '.join(cmd_args)}")
-    # Realistic timeout for the subprocess. HF Spaces free tier usually times out requests around 60s.
-    # Set this shorter to catch issues within app.py.
     SCRIPT_TIMEOUT_SECONDS = 55
     start_time = time.time()
-    process_details = "Process details not available." # Placeholder
     try:
         process = subprocess.run(
             cmd_args,
             capture_output=True,
             text=True,
-            check=False,  # Set to False to manually check returncode and log output
             timeout=SCRIPT_TIMEOUT_SECONDS
         )
-        process_details = f"PID {process.pid if hasattr(process, 'pid') else 'N/A'}"
         duration = time.time() - start_time
-        print(f"Subprocess ({process_details}) finished in {duration:.2f} seconds.")
         print(f"generate.py RETURN CODE: {process.returncode}")
         stdout = process.stdout.strip() if process.stdout else "[No STDOUT from generate.py]"
         stderr = process.stderr.strip() if process.stderr else "[No STDERR from generate.py]"
-        print(f"---------- generate.py STDOUT ({process_details}) START ----------\n{stdout}\n---------- generate.py STDOUT ({process_details}) END ----------")
         if stderr or process.returncode != 0:
-            print(f"---------- generate.py STDERR ({process_details}) START ----------\n{stderr}\n---------- generate.py STDERR ({process_details}) END ----------")
         if process.returncode != 0:
             error_message = f"Error: Generation script failed (code {process.returncode})."
-            if "out of memory" in stderr.lower(): error_message += " Potential OOM in script."
-            print(error_message) # Log it before returning
-            return error_message + f" See Space logs for full STDOUT/STDERR from script ({process_details})."
         # --- Parse the output from nanoVLM's generate.py ---
-        # Expected format:
-        # Outputs:
         # > Sample 1: <generated text>
         output_lines = stdout.splitlines()
-        generated_text = "[No parsable output from generate.py]" # Default
         found_output_line = False
         for line_idx, line in enumerate(output_lines):
             stripped_line = line.strip()
-            # print(f"Parsing STDOUT line {line_idx}: '{stripped_line}'") # Can be very verbose
-            if stripped_line.startswith("> Sample 1:") or stripped_line.startswith(">> Generation 1:"):
-                prefix_to_remove = ""
-                if stripped_line.startswith("> Sample 1:"): prefix_to_remove = "> Sample 1:"
-                elif stripped_line.startswith(">> Generation 1:  "): prefix_to_remove = ">> Generation 1:  " # Note double space
-                elif stripped_line.startswith(">> Generation 1: "): prefix_to_remove = ">> Generation 1: " # Note single space
-                if prefix_to_remove:
-                    generated_text = stripped_line.replace(prefix_to_remove, "", 1).strip()
-                    found_output_line = True
-                    print(f"Parsed generated text: '{generated_text}'")
-                    break
         if not found_output_line:
-            print(f"Could not find 'Sample 1' or 'Generation 1' line in generate.py output.")
-            # Return a snippet of STDOUT if parsing fails, to help debug output format
-            generated_text = f"[Parsing failed] STDOUT (first 200 chars): {stdout[:200]}"
         print(f"Returning parsed text: '{generated_text}'")
         return generated_text
     except subprocess.TimeoutExpired as e:
         duration = time.time() - start_time
-        print(f"ERROR: generate.py ({process_details}) timed out after {duration:.2f} seconds (limit: {SCRIPT_TIMEOUT_SECONDS}s).")
-        stdout_on_timeout = e.stdout.strip() if e.stdout else "[No STDOUT on timeout]"
-        stderr_on_timeout = e.stderr.strip() if e.stderr else "[No STDERR on timeout]"
         print(f"STDOUT on timeout:\n{stdout_on_timeout}")
         print(f"STDERR on timeout:\n{stderr_on_timeout}")
         return f"Error: Generation script timed out after {SCRIPT_TIMEOUT_SECONDS}s. Model loading and generation may be too slow for CPU."
     except Exception as e:
         duration = time.time() - start_time
-        print(f"ERROR: An unexpected error occurred ({process_details}) after {duration:.2f}s: {type(e).__name__} - {e}")
         import traceback; traceback.print_exc()
         return f"Unexpected error calling script: {str(e)}"
     finally:
         print(f"--- END (call_generate_script) ---")
 def gradio_interface_fn(image_input_pil: Optional[PILImage.Image], prompt_input_str: Optional[str]) -> str:
     print(f"\nDEBUG (gradio_interface_fn): Timestamp: {time.strftime('%Y-%m-%d %H:%M:%S')}")

 print(f"DEBUG: Using generate.py script at: {GENERATE_SCRIPT_PATH}")
 print(f"DEBUG: Using model repo ID: {MODEL_REPO_ID}")
+# In app.py
 def call_generate_script(image_path: str, prompt_text: str) -> str:
     print(f"\n--- DEBUG (call_generate_script) ---")
     print(f"Timestamp: {time.strftime('%Y-%m-%d %H:%M:%S')}")
     print(f"Calling with image_path='{image_path}', prompt='{prompt_text}'")
+    # Arguments for nanoVLM's generate.py, VERIFIED against its source code
     cmd_args = [
+        "python", "-u", GENERATE_SCRIPT_PATH,
         "--hf_model", MODEL_REPO_ID,
+        "--image_path", image_path,      # VERIFIED: script expects --image_path
         "--prompt", prompt_text,
+        "--num_samples", "1",           # VERIFIED: script expects --num_samples
+        "--max_new_tokens", "30",       # This was correct
+        "--device", "cpu"               # VERIFIED: script expects --device
+        # Optional args for generate.py that you can add if needed:
         # "--temperature", "0.7",
+        # "--top_k", "200" # Default is 200 in script
     ]
     print(f"Executing command: {' '.join(cmd_args)}")
     SCRIPT_TIMEOUT_SECONDS = 55
     start_time = time.time()
+    process_identifier = "generate.py_process"
     try:
         process = subprocess.run(
             cmd_args,
             capture_output=True,
             text=True,
+            check=False,
             timeout=SCRIPT_TIMEOUT_SECONDS
         )
         duration = time.time() - start_time
+        print(f"Subprocess ({process_identifier}) finished in {duration:.2f} seconds.")
         print(f"generate.py RETURN CODE: {process.returncode}")
         stdout = process.stdout.strip() if process.stdout else "[No STDOUT from generate.py]"
         stderr = process.stderr.strip() if process.stderr else "[No STDERR from generate.py]"
+        print(f"---------- generate.py STDOUT ({process_identifier}) START ----------\n{stdout}\n---------- generate.py STDOUT ({process_identifier}) END ----------")
         if stderr or process.returncode != 0:
+            print(f"---------- generate.py STDERR ({process_identifier}) START ----------\n{stderr}\n---------- generate.py STDERR ({process_identifier}) END ----------")
         if process.returncode != 0:
             error_message = f"Error: Generation script failed (code {process.returncode})."
+            if "unrecognized arguments" in stderr:
+                error_message += " Argument mismatch with script."
+            print(error_message)
+            return error_message + f" STDERR Snippet: {stderr[:300]}" # Show more stderr
         # --- Parse the output from nanoVLM's generate.py ---
+        # The original nanoVLM generate.py prints:
         # > Sample 1: <generated text>
         output_lines = stdout.splitlines()
+        generated_text = "[No parsable output from generate.py]"
         found_output_line = False
         for line_idx, line in enumerate(output_lines):
             stripped_line = line.strip()
+            # The actual generate.py from nanoVLM prints "> Sample 1:"
+            prefix_to_remove = None
+            if stripped_line.startswith("> Sample 1:"):
+                prefix_to_remove = "> Sample 1:"
+            if prefix_to_remove:
+                generated_text = stripped_line.replace(prefix_to_remove, "", 1).strip()
+                found_output_line = True
+                print(f"Parsed generated text: '{generated_text}'")
+                break
         if not found_output_line:
+            print(f"Could not find '> Sample 1:' line in generate.py output. Raw STDOUT was:\n{stdout}")
+            if stdout:
+                generated_text = f"[Parsing failed] STDOUT: {stdout[:500]}"
+            else:
+                generated_text = "[Parsing failed, no STDOUT from script]"
         print(f"Returning parsed text: '{generated_text}'")
         return generated_text
     except subprocess.TimeoutExpired as e:
         duration = time.time() - start_time
+        print(f"ERROR: generate.py ({process_identifier}) timed out after {duration:.2f} seconds (limit: {SCRIPT_TIMEOUT_SECONDS}s).")
+        stdout_on_timeout = e.stdout.strip() if hasattr(e, 'stdout') and e.stdout else "[No STDOUT on timeout]"
+        stderr_on_timeout = e.stderr.strip() if hasattr(e, 'stderr') and e.stderr else "[No STDERR on timeout]"
         print(f"STDOUT on timeout:\n{stdout_on_timeout}")
         print(f"STDERR on timeout:\n{stderr_on_timeout}")
         return f"Error: Generation script timed out after {SCRIPT_TIMEOUT_SECONDS}s. Model loading and generation may be too slow for CPU."
     except Exception as e:
         duration = time.time() - start_time
+        print(f"ERROR: An unexpected error occurred ({process_identifier}) after {duration:.2f}s: {type(e).__name__} - {e}")
         import traceback; traceback.print_exc()
         return f"Unexpected error calling script: {str(e)}"
     finally:
         print(f"--- END (call_generate_script) ---")
+# The rest of your app.py (gradio_interface_fn, Gradio Interface Definition, __main__ block)
+# should remain the same.
 def gradio_interface_fn(image_input_pil: Optional[PILImage.Image], prompt_input_str: Optional[str]) -> str:
     print(f"\nDEBUG (gradio_interface_fn): Timestamp: {time.strftime('%Y-%m-%d %H:%M:%S')}")