# app.py import json import traceback import tempfile import os import gradio as gr from typing import Tuple, Optional, Any # Added Any for file_data_binary # Import config first to ensure JAVA_HOME is set early import config from main_analyzer import analyze_pdf # Import language_tool_python only for the test in __main__ if needed # import language_tool_python def process_upload(file_data_binary: Optional[Any]) -> Tuple[str, Optional[str]]: # Use Optional[Any] for Gradio File type="binary" if file_data_binary is None or not hasattr(file_data_binary, 'read'): # Check if it's a file-like object # Gradio's binary type for gr.File returns a tempfile._TemporaryFileWrapper object # If it's None, no file was uploaded. # If it's not None but doesn't have 'read', it's an unexpected type. # However, gradio usually passes the bytes directly if type="binary" was used in older versions # or a TemporaryFileWrapper which is file-like. # For robustness, let's check if it's bytes. if isinstance(file_data_binary, bytes): pass # Good, it's bytes elif file_data_binary is None: return json.dumps({"error": "No file uploaded or file data is None"}, indent=2), None elif not hasattr(file_data_binary, 'read'): # It's not None, not bytes, not file-like return json.dumps({"error": f"Unexpected file data type: {type(file_data_binary)}"}), None # If it has 'read', it's a file-like object, proceed. # analyze_pdf now handles stream-to-temp-file logic internally via original_pdf_access_path # So we can pass the file_data_binary (which is a file-like object from Gradio) directly. try: print(f"App: Processing uploaded file...") # If file_data_binary is bytes, wrap it in BytesIO for file-like interface # analyze_pdf expects a path or a file-like object with read() and seek() # Gradio with type="binary" gives a tempfile._TemporaryFileWrapper. # This object is already file-like and can be passed directly. # No need to create another temp file here in app.py if main_analyzer handles it. # analyze_pdf will create its own temp file if it receives a stream. results_dict, _ = analyze_pdf(file_data_binary) results_json = json.dumps(results_dict, indent=2, ensure_ascii=False) return results_json, None except Exception as e: print(f"Error in process_upload: {e}\n{traceback.format_exc()}") error_message = json.dumps({"error": str(e), "traceback": traceback.format_exc()}, indent=2) return error_message, None # No finally block needed here for temp file, as analyze_pdf handles its own if it creates one # and Gradio handles the temp file wrapper it provides. def create_interface(): with gr.Blocks(title="PDF Analyzer") as interface: gr.Markdown("# PDF Analyzer") gr.Markdown( "Upload a PDF document to analyze its structure, references, language, and more. " "Language issues are derived from font-filtered text. " "Regex issues and general document checks use unfiltered text from the original PDF. " "All issue coordinates (if found) are mapped back to the original PDF." ) with gr.Row(): file_input = gr.File( label="Upload PDF", file_types=[".pdf"], # type="binary" # Gradio's File component returns a TemporaryFileWrapper which is file-like ) with gr.Row(): analyze_btn = gr.Button("Analyze PDF") with gr.Row(): results_output = gr.JSON( label="Analysis Results", # Simplified label show_label=True ) with gr.Row(): pdf_output = gr.File( label="Annotated PDF (Placeholder - View Coordinates in JSON)", show_label=True, interactive=False # Not interactive as it's a placeholder ) analyze_btn.click( fn=process_upload, inputs=[file_input], outputs=[results_output, pdf_output] ) return interface if __name__ == "__main__": print("\n--- Launching Gradio Interface ---") # config.set_java_home() is called when config.py is imported. # Optional: Test LanguageTool initialization try: import language_tool_python # Import here for the test lt_test = language_tool_python.LanguageTool('en-US') lt_test.close() print("App: LanguageTool initialized successfully for test.") except Exception as lt_e: print(f"App: Warning: Could not initialize LanguageTool for test. Language checks might fail: {lt_e}") print("Please ensure Java is installed and JAVA_HOME is correctly set (see config.py).") app_interface = create_interface() app_interface.launch( share=False, # Set to True for public link if ngrok is installed and desired # server_port=7860 # Optionally specify a port )