File size: 5,184 Bytes
7f823bb 364e0ba 7f823bb 364e0ba 93ea2db 364e0ba 93ea2db 6f96666 7f823bb 93ea2db 364e0ba 93ea2db 364e0ba 93ea2db 6f96666 93ea2db 6f96666 93ea2db 6f96666 93ea2db 4dd18db 6f96666 93ea2db 6f96666 93ea2db 4dd18db a0e200f 364e0ba 93ea2db 0c80b43 364e0ba 93ea2db 364e0ba 0c80b43 364e0ba 0c80b43 364e0ba 93ea2db 364e0ba 0c80b43 364e0ba 93ea2db 6f96666 93ea2db 364e0ba 0c80b43 364e0ba 7f823bb 364e0ba 12a89b7 364e0ba 6f96666 93ea2db 6f96666 93ea2db 6f96666 93ea2db 6f96666 93ea2db 6f96666 93ea2db 6f96666 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 |
# app.py
import json
import traceback
import tempfile
import os
import gradio as gr
from typing import Tuple, Optional, Any # Added Any for file_data_binary
# Import config first to ensure JAVA_HOME is set early
import config
from main_analyzer import analyze_pdf
# Import language_tool_python only for the test in __main__ if needed
# import language_tool_python
def process_upload(file_data_binary: Optional[Any]) -> Tuple[str, Optional[str]]: # Use Optional[Any] for Gradio File type="binary"
if file_data_binary is None or not hasattr(file_data_binary, 'read'): # Check if it's a file-like object
# Gradio's binary type for gr.File returns a tempfile._TemporaryFileWrapper object
# If it's None, no file was uploaded.
# If it's not None but doesn't have 'read', it's an unexpected type.
# However, gradio usually passes the bytes directly if type="binary" was used in older versions
# or a TemporaryFileWrapper which is file-like.
# For robustness, let's check if it's bytes.
if isinstance(file_data_binary, bytes):
pass # Good, it's bytes
elif file_data_binary is None:
return json.dumps({"error": "No file uploaded or file data is None"}, indent=2), None
elif not hasattr(file_data_binary, 'read'): # It's not None, not bytes, not file-like
return json.dumps({"error": f"Unexpected file data type: {type(file_data_binary)}"}), None
# If it has 'read', it's a file-like object, proceed.
# analyze_pdf now handles stream-to-temp-file logic internally via original_pdf_access_path
# So we can pass the file_data_binary (which is a file-like object from Gradio) directly.
try:
print(f"App: Processing uploaded file...")
# If file_data_binary is bytes, wrap it in BytesIO for file-like interface
# analyze_pdf expects a path or a file-like object with read() and seek()
# Gradio with type="binary" gives a tempfile._TemporaryFileWrapper.
# This object is already file-like and can be passed directly.
# No need to create another temp file here in app.py if main_analyzer handles it.
# analyze_pdf will create its own temp file if it receives a stream.
results_dict, _ = analyze_pdf(file_data_binary)
results_json = json.dumps(results_dict, indent=2, ensure_ascii=False)
return results_json, None
except Exception as e:
print(f"Error in process_upload: {e}\n{traceback.format_exc()}")
error_message = json.dumps({"error": str(e), "traceback": traceback.format_exc()}, indent=2)
return error_message, None
# No finally block needed here for temp file, as analyze_pdf handles its own if it creates one
# and Gradio handles the temp file wrapper it provides.
def create_interface():
with gr.Blocks(title="PDF Analyzer") as interface:
gr.Markdown("# PDF Analyzer")
gr.Markdown(
"Upload a PDF document to analyze its structure, references, language, and more. "
"Language issues are derived from font-filtered text. "
"Regex issues and general document checks use unfiltered text from the original PDF. "
"All issue coordinates (if found) are mapped back to the original PDF."
)
with gr.Row():
file_input = gr.File(
label="Upload PDF",
file_types=[".pdf"],
# type="binary" # Gradio's File component returns a TemporaryFileWrapper which is file-like
)
with gr.Row():
analyze_btn = gr.Button("Analyze PDF")
with gr.Row():
results_output = gr.JSON(
label="Analysis Results", # Simplified label
show_label=True
)
with gr.Row():
pdf_output = gr.File(
label="Annotated PDF (Placeholder - View Coordinates in JSON)",
show_label=True,
interactive=False # Not interactive as it's a placeholder
)
analyze_btn.click(
fn=process_upload,
inputs=[file_input],
outputs=[results_output, pdf_output]
)
return interface
if __name__ == "__main__":
print("\n--- Launching Gradio Interface ---")
# config.set_java_home() is called when config.py is imported.
# Optional: Test LanguageTool initialization
try:
import language_tool_python # Import here for the test
lt_test = language_tool_python.LanguageTool('en-US')
lt_test.close()
print("App: LanguageTool initialized successfully for test.")
except Exception as lt_e:
print(f"App: Warning: Could not initialize LanguageTool for test. Language checks might fail: {lt_e}")
print("Please ensure Java is installed and JAVA_HOME is correctly set (see config.py).")
app_interface = create_interface()
app_interface.launch(
share=False, # Set to True for public link if ngrok is installed and desired
# server_port=7860 # Optionally specify a port
) |