# app.py import json import traceback import tempfile # Not strictly needed by process_upload anymore, but good to keep if other parts use it. import os import gradio as gr from typing import Tuple, Optional, Any # Import config first to ensure JAVA_HOME is set early import config from main_analyzer import analyze_pdf # language_tool_python needed for the test in __main__ import language_tool_python def process_upload(uploaded_file_input: Optional[Any]) -> Tuple[str, Optional[str]]: """ Processes the uploaded file from Gradio. 'uploaded_file_input' is expected to be an object from gr.File(), which could be a path string (like gradio.utils.NamedString) or a file-like object. """ if uploaded_file_input is None: print("App: No file uploaded.") return json.dumps({"error": "No file uploaded."}, indent=2), None try: # Pass the uploaded_file_input directly to analyze_pdf. # analyze_pdf is responsible for determining if it's a path or a stream # and handling it accordingly (including creating a temp file for streams if needed). print(f"App: Received file input of type: {type(uploaded_file_input)}. Passing to analyzer.") results_dict, _ = analyze_pdf(uploaded_file_input) # Check if analyze_pdf returned an error (it returns a dict with "error" key in that case) if isinstance(results_dict, dict) and "error" in results_dict: print(f"App: Analysis returned an error: {results_dict['error']}") # Return the error JSON directly return json.dumps(results_dict, indent=2, ensure_ascii=False), None results_json = json.dumps(results_dict, indent=2, ensure_ascii=False) return results_json, None except Exception as e: # This catches unexpected errors during the call to analyze_pdf or JSON dumping. print(f"App: Error in process_upload: {e}\n{traceback.format_exc()}") error_detail = str(e) # If the error is due to an invalid input type that analyze_pdf couldn't handle: if "Invalid PDF input type" in error_detail: # Check for specific error from analyze_pdf error_detail = f"Invalid PDF input type received from uploader: {type(uploaded_file_input)}. Details: {str(e)}" error_message = json.dumps({"error": error_detail, "traceback": traceback.format_exc()}, indent=2) return error_message, None # No explicit temp file cleanup needed here in process_upload for the Gradio-provided file object. # Gradio manages its own temporary files. # analyze_pdf manages any internal temporary files it creates. def create_interface(): with gr.Blocks(title="PDF Analyzer") as interface: gr.Markdown("# PDF Analyzer") gr.Markdown( "Upload a PDF document to analyze its structure, references, language, and more. " "Language issues are derived from font-filtered text. " "Regex issues and general document checks use unfiltered text from the original PDF. " "All issue coordinates (if found) are mapped back to the original PDF." ) with gr.Row(): file_input = gr.File( label="Upload PDF", file_types=[".pdf"] # Default type: Gradio provides a path-like object (e.g., NamedString) # or a TemporaryFileWrapper. Both should be acceptable by analyze_pdf. ) with gr.Row(): analyze_btn = gr.Button("Analyze PDF") with gr.Row(): results_output = gr.JSON( label="Analysis Results", show_label=True ) with gr.Row(): pdf_output = gr.File( label="Annotated PDF (Placeholder - View Coordinates in JSON)", show_label=True, interactive=False ) analyze_btn.click( fn=process_upload, inputs=[file_input], outputs=[results_output, pdf_output] ) return interface if __name__ == "__main__": print("\n--- Launching Gradio Interface ---") # config.set_java_home() is called when config.py is imported. try: lt_test = language_tool_python.LanguageTool('en-US') lt_test.close() print("App: LanguageTool initialized successfully for test.") except Exception as lt_e: print(f"App: Warning: Could not initialize LanguageTool for test. Language checks might fail: {lt_e}\n{traceback.format_exc(limit=1)}") print("Please ensure Java is installed and JAVA_HOME is correctly set (see config.py).") app_interface = create_interface() app_interface.launch( share=False, )