Spaces:
Running
Running
import sys | |
import os | |
import subprocess # For calling generate.py | |
import tempfile # For handling temporary image files | |
from typing import Optional | |
from PIL import Image as PILImage | |
import gradio as gr | |
import time # For timing | |
# Add the cloned nanoVLM directory to Python's system path | |
NANOVLM_REPO_PATH = "/app/nanoVLM" | |
if NANOVLM_REPO_PATH not in sys.path: | |
print(f"DEBUG: Adding {NANOVLM_REPO_PATH} to sys.path") | |
sys.path.insert(0, NANOVLM_REPO_PATH) | |
print(f"DEBUG: Python sys.path: {sys.path}") | |
print(f"DEBUG: Gradio version: {gr.__version__}") # Log Gradio version | |
GENERATE_SCRIPT_PATH = "/app/nanoVLM/generate.py" | |
MODEL_REPO_ID = "lusxvr/nanoVLM-222M" | |
print(f"DEBUG: Using generate.py script at: {GENERATE_SCRIPT_PATH}") | |
print(f"DEBUG: Using model repo ID: {MODEL_REPO_ID}") | |
# In app.py | |
def call_generate_script(image_path: str, prompt_text: str) -> str: | |
print(f"\n--- DEBUG (call_generate_script) ---") | |
print(f"Timestamp: {time.strftime('%Y-%m-%d %H:%M:%S')}") | |
print(f"Calling with image_path='{image_path}', prompt='{prompt_text}'") | |
# Arguments for nanoVLM's generate.py, VERIFIED against its source code | |
cmd_args = [ | |
"python", "-u", GENERATE_SCRIPT_PATH, | |
"--hf_model", MODEL_REPO_ID, | |
"--image_path", image_path, # VERIFIED: script expects --image_path | |
"--prompt", prompt_text, | |
"--num_samples", "1", # VERIFIED: script expects --num_samples | |
"--max_new_tokens", "30", # This was correct | |
"--device", "cpu" # VERIFIED: script expects --device | |
# Optional args for generate.py that you can add if needed: | |
# "--temperature", "0.7", | |
# "--top_k", "200" # Default is 200 in script | |
] | |
print(f"Executing command: {' '.join(cmd_args)}") | |
SCRIPT_TIMEOUT_SECONDS = 55 | |
start_time = time.time() | |
process_identifier = "generate.py_process" | |
try: | |
process = subprocess.run( | |
cmd_args, | |
capture_output=True, | |
text=True, | |
check=False, | |
timeout=SCRIPT_TIMEOUT_SECONDS | |
) | |
duration = time.time() - start_time | |
print(f"Subprocess ({process_identifier}) finished in {duration:.2f} seconds.") | |
print(f"generate.py RETURN CODE: {process.returncode}") | |
stdout = process.stdout.strip() if process.stdout else "[No STDOUT from generate.py]" | |
stderr = process.stderr.strip() if process.stderr else "[No STDERR from generate.py]" | |
print(f"---------- generate.py STDOUT ({process_identifier}) START ----------\n{stdout}\n---------- generate.py STDOUT ({process_identifier}) END ----------") | |
if stderr or process.returncode != 0: | |
print(f"---------- generate.py STDERR ({process_identifier}) START ----------\n{stderr}\n---------- generate.py STDERR ({process_identifier}) END ----------") | |
if process.returncode != 0: | |
error_message = f"Error: Generation script failed (code {process.returncode})." | |
if "unrecognized arguments" in stderr: | |
error_message += " Argument mismatch with script." | |
print(error_message) | |
return error_message + f" STDERR Snippet: {stderr[:300]}" # Show more stderr | |
# --- Parse the output from nanoVLM's generate.py --- | |
# The original nanoVLM generate.py prints: | |
# > Sample 1: <generated text> | |
output_lines = stdout.splitlines() | |
generated_text = "[No parsable output from generate.py]" | |
found_output_line = False | |
for line_idx, line in enumerate(output_lines): | |
stripped_line = line.strip() | |
# The actual generate.py from nanoVLM prints "> Sample 1:" | |
prefix_to_remove = None | |
if stripped_line.startswith("> Sample 1:"): | |
prefix_to_remove = "> Sample 1:" | |
if prefix_to_remove: | |
generated_text = stripped_line.replace(prefix_to_remove, "", 1).strip() | |
found_output_line = True | |
print(f"Parsed generated text: '{generated_text}'") | |
break | |
if not found_output_line: | |
print(f"Could not find '> Sample 1:' line in generate.py output. Raw STDOUT was:\n{stdout}") | |
if stdout: | |
generated_text = f"[Parsing failed] STDOUT: {stdout[:500]}" | |
else: | |
generated_text = "[Parsing failed, no STDOUT from script]" | |
print(f"Returning parsed text: '{generated_text}'") | |
return generated_text | |
except subprocess.TimeoutExpired as e: | |
duration = time.time() - start_time | |
print(f"ERROR: generate.py ({process_identifier}) timed out after {duration:.2f} seconds (limit: {SCRIPT_TIMEOUT_SECONDS}s).") | |
stdout_on_timeout = e.stdout.strip() if hasattr(e, 'stdout') and e.stdout else "[No STDOUT on timeout]" | |
stderr_on_timeout = e.stderr.strip() if hasattr(e, 'stderr') and e.stderr else "[No STDERR on timeout]" | |
print(f"STDOUT on timeout:\n{stdout_on_timeout}") | |
print(f"STDERR on timeout:\n{stderr_on_timeout}") | |
return f"Error: Generation script timed out after {SCRIPT_TIMEOUT_SECONDS}s. Model loading and generation may be too slow for CPU." | |
except Exception as e: | |
duration = time.time() - start_time | |
print(f"ERROR: An unexpected error occurred ({process_identifier}) after {duration:.2f}s: {type(e).__name__} - {e}") | |
import traceback; traceback.print_exc() | |
return f"Unexpected error calling script: {str(e)}" | |
finally: | |
print(f"--- END (call_generate_script) ---") | |
# The rest of your app.py (gradio_interface_fn, Gradio Interface Definition, __main__ block) | |
# should remain the same. | |
def gradio_interface_fn(image_input_pil: Optional[PILImage.Image], prompt_input_str: Optional[str]) -> str: | |
print(f"\nDEBUG (gradio_interface_fn): Timestamp: {time.strftime('%Y-%m-%d %H:%M:%S')}") | |
print(f"Received prompt: '{prompt_input_str}', Image type: {type(image_input_pil)}") | |
if image_input_pil is None: | |
return "Please upload an image." | |
cleaned_prompt = prompt_input_str.strip() if prompt_input_str else "" | |
if not cleaned_prompt: | |
return "Please provide a non-empty prompt." | |
tmp_image_path = None | |
try: | |
if image_input_pil.mode != "RGB": | |
print(f"Converting image from {image_input_pil.mode} to RGB.") | |
image_input_pil = image_input_pil.convert("RGB") | |
with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp_image_file: | |
image_input_pil.save(tmp_image_file, format="JPEG") | |
tmp_image_path = tmp_image_file.name | |
print(f"Temporary image saved to: {tmp_image_path}") | |
result_text = call_generate_script(tmp_image_path, cleaned_prompt) | |
print(f"Result from call_generate_script: '{result_text}'") | |
return result_text | |
except Exception as e: | |
print(f"ERROR (gradio_interface_fn): Error processing image or calling script: {type(e).__name__} - {e}") | |
import traceback; traceback.print_exc() | |
return f"An error occurred in Gradio interface function: {str(e)}" | |
finally: | |
if tmp_image_path and os.path.exists(tmp_image_path): | |
try: | |
os.remove(tmp_image_path) | |
print(f"Temporary image {tmp_image_path} removed.") | |
except Exception as e_remove: | |
print(f"WARN: Could not remove temporary image {tmp_image_path}: {e_remove}") | |
print(f"DEBUG (gradio_interface_fn): Exiting.") | |
# --- Gradio Interface Definition --- | |
description_md = """ | |
## nanoVLM-222M Interactive Demo (via generate.py) | |
Upload an image and type a prompt. This interface calls the `generate.py` script from | |
`huggingface/nanoVLM` under the hood to perform inference. | |
**Note:** Each request re-loads the model via the script, so it might be slow on CPU. | |
""" | |
print("DEBUG: Defining Gradio interface...") | |
iface = None | |
try: | |
iface = gr.Interface( | |
fn=gradio_interface_fn, | |
inputs=[ | |
gr.Image(type="pil", label="Upload Image"), | |
gr.Textbox(label="Your Prompt / Question", info="e.g., 'describe this image in detail'") | |
], | |
outputs=gr.Textbox(label="Generated Text", show_copy_button=True, lines=5), | |
title="nanoVLM-222M Demo (via Script)", | |
description=description_md, | |
allow_flagging="never" | |
) | |
print("DEBUG: Gradio interface defined successfully.") | |
except Exception as e: | |
print(f"CRITICAL ERROR defining Gradio interface: {e}") | |
import traceback; traceback.print_exc() | |
# --- Launch Gradio App --- | |
if __name__ == "__main__": | |
print("DEBUG: Entered __main__ block for Gradio launch.") | |
if not os.path.exists(GENERATE_SCRIPT_PATH): | |
print(f"CRITICAL ERROR: The script {GENERATE_SCRIPT_PATH} was not found. Cannot launch app.") | |
iface = None | |
if iface is not None: | |
print("DEBUG: Attempting to launch Gradio interface...") | |
try: | |
iface.launch(server_name="0.0.0.0", server_port=7860) | |
print("DEBUG: Gradio launch command issued. UI should be accessible.") | |
except Exception as e: | |
print(f"CRITICAL ERROR launching Gradio interface: {e}") | |
import traceback; traceback.print_exc() | |
else: | |
print("CRITICAL ERROR: Gradio interface (iface) is None or not defined. Cannot launch.") |