File size: 4,831 Bytes
7f823bb 364e0ba d37148b 7f823bb 364e0ba d37148b 364e0ba 93ea2db 6f96666 7f823bb d37148b 364e0ba d37148b 6f96666 d37148b 93ea2db d37148b 6f96666 d37148b 6f96666 93ea2db 4dd18db 6f96666 d37148b 6f96666 d37148b a0e200f 364e0ba 93ea2db 0c80b43 364e0ba d37148b 364e0ba 0c80b43 364e0ba 0c80b43 364e0ba d37148b 364e0ba 0c80b43 364e0ba 93ea2db 6f96666 d37148b 364e0ba 0c80b43 364e0ba 7f823bb 364e0ba 12a89b7 364e0ba 6f96666 93ea2db 6f96666 93ea2db 6f96666 d37148b 93ea2db d37148b 6f96666 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 |
# app.py
import json
import traceback
import tempfile # Not strictly needed by process_upload anymore, but good to keep if other parts use it.
import os
import gradio as gr
from typing import Tuple, Optional, Any
# Import config first to ensure JAVA_HOME is set early
import config
from main_analyzer import analyze_pdf
# language_tool_python needed for the test in __main__
import language_tool_python
def process_upload(uploaded_file_input: Optional[Any]) -> Tuple[str, Optional[str]]:
"""
Processes the uploaded file from Gradio.
'uploaded_file_input' is expected to be an object from gr.File(),
which could be a path string (like gradio.utils.NamedString) or a file-like object.
"""
if uploaded_file_input is None:
print("App: No file uploaded.")
return json.dumps({"error": "No file uploaded."}, indent=2), None
try:
# Pass the uploaded_file_input directly to analyze_pdf.
# analyze_pdf is responsible for determining if it's a path or a stream
# and handling it accordingly (including creating a temp file for streams if needed).
print(f"App: Received file input of type: {type(uploaded_file_input)}. Passing to analyzer.")
results_dict, _ = analyze_pdf(uploaded_file_input)
# Check if analyze_pdf returned an error (it returns a dict with "error" key in that case)
if isinstance(results_dict, dict) and "error" in results_dict:
print(f"App: Analysis returned an error: {results_dict['error']}")
# Return the error JSON directly
return json.dumps(results_dict, indent=2, ensure_ascii=False), None
results_json = json.dumps(results_dict, indent=2, ensure_ascii=False)
return results_json, None
except Exception as e:
# This catches unexpected errors during the call to analyze_pdf or JSON dumping.
print(f"App: Error in process_upload: {e}\n{traceback.format_exc()}")
error_detail = str(e)
# If the error is due to an invalid input type that analyze_pdf couldn't handle:
if "Invalid PDF input type" in error_detail: # Check for specific error from analyze_pdf
error_detail = f"Invalid PDF input type received from uploader: {type(uploaded_file_input)}. Details: {str(e)}"
error_message = json.dumps({"error": error_detail, "traceback": traceback.format_exc()}, indent=2)
return error_message, None
# No explicit temp file cleanup needed here in process_upload for the Gradio-provided file object.
# Gradio manages its own temporary files.
# analyze_pdf manages any internal temporary files it creates.
def create_interface():
with gr.Blocks(title="PDF Analyzer") as interface:
gr.Markdown("# PDF Analyzer")
gr.Markdown(
"Upload a PDF document to analyze its structure, references, language, and more. "
"Language issues are derived from font-filtered text. "
"Regex issues and general document checks use unfiltered text from the original PDF. "
"All issue coordinates (if found) are mapped back to the original PDF."
)
with gr.Row():
file_input = gr.File(
label="Upload PDF",
file_types=[".pdf"]
# Default type: Gradio provides a path-like object (e.g., NamedString)
# or a TemporaryFileWrapper. Both should be acceptable by analyze_pdf.
)
with gr.Row():
analyze_btn = gr.Button("Analyze PDF")
with gr.Row():
results_output = gr.JSON(
label="Analysis Results",
show_label=True
)
with gr.Row():
pdf_output = gr.File(
label="Annotated PDF (Placeholder - View Coordinates in JSON)",
show_label=True,
interactive=False
)
analyze_btn.click(
fn=process_upload,
inputs=[file_input],
outputs=[results_output, pdf_output]
)
return interface
if __name__ == "__main__":
print("\n--- Launching Gradio Interface ---")
# config.set_java_home() is called when config.py is imported.
try:
lt_test = language_tool_python.LanguageTool('en-US')
lt_test.close()
print("App: LanguageTool initialized successfully for test.")
except Exception as lt_e:
print(f"App: Warning: Could not initialize LanguageTool for test. Language checks might fail: {lt_e}\n{traceback.format_exc(limit=1)}")
print("Please ensure Java is installed and JAVA_HOME is correctly set (see config.py).")
app_interface = create_interface()
app_interface.launch(
share=False,
) |