File size: 3,905 Bytes
7f823bb 364e0ba bea5394 7f823bb 364e0ba de1c169 364e0ba 93ea2db 383775a 6f96666 383775a bea5394 383775a 364e0ba de1c169 d37148b de1c169 6f96666 de1c169 6f96666 383775a 4dd18db 6f96666 bea5394 6f96666 de1c169 bea5394 a0e200f 364e0ba 3cee98d de1c169 364e0ba 383775a 364e0ba 383775a 364e0ba 383775a 364e0ba 383775a 93ea2db 6f96666 383775a 364e0ba 383775a 364e0ba 383775a f497fbd 383775a 6f96666 93ea2db 6f96666 bea5394 6f96666 383775a 6f96666 93ea2db 6f96666 bea5394 93ea2db 383775a 93ea2db 383775a de1c169 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
# app.py
import json
import traceback
import tempfile
import os
import gradio as gr
from typing import Tuple, Optional, Any # Any for file_data_binary for now, though bytes is expected
# Import config first to ensure JAVA_HOME is set early
import config
from main_analyzer import analyze_pdf
# Import language_tool_python only for the test in __main__ if needed
# import language_tool_python
def process_upload(file_data_binary: Optional[bytes]) -> Tuple[str, Optional[str]]: # Explicitly Optional[bytes]
if not isinstance(file_data_binary, bytes):
if file_data_binary is None:
error_msg = "No file uploaded or file data is None."
else:
error_msg = f"Unexpected file data type: {type(file_data_binary)}. Expected bytes."
return json.dumps({"error": error_msg}, indent=2), None
temp_pdf_path = None
try:
# Create a temporary file to store the uploaded PDF bytes
# delete=False is used because analyze_pdf will open it by path.
# We are responsible for deleting it in the finally block.
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
temp_pdf_path = temp_file.name
temp_file.write(file_data_binary)
# The file is closed when exiting the 'with' block, but still exists due to delete=False.
print(f"App: Processing PDF via temporary file: {temp_pdf_path}")
results_dict, _ = analyze_pdf(temp_pdf_path) # Pass the path to the temporary file
results_json = json.dumps(results_dict, indent=2, ensure_ascii=False)
return results_json, None
except Exception as e:
print(f"Error in process_upload: {e}\n{traceback.format_exc()}")
error_message = json.dumps({"error": str(e), "traceback": traceback.format_exc()}, indent=2)
return error_message, None
finally:
# Clean up the temporary file if it was created
if temp_pdf_path and os.path.exists(temp_pdf_path):
try:
os.remove(temp_pdf_path)
print(f"App: Cleaned up temporary PDF file: {temp_pdf_path}")
except Exception as e_clean:
print(f"App: Error cleaning up temporary PDF file {temp_pdf_path}: {e_clean}")
def create_interface():
with gr.Blocks(title="PDF Analyzer") as interface:
with gr.Row():
file_input = gr.File(
label="Upload PDF",
file_types=[".pdf"],
type="binary" # This ensures file_data_binary is bytes
)
with gr.Row():
analyze_btn = gr.Button("Analyze PDF")
with gr.Row():
results_output = gr.JSON(
label="Analysis Results",
show_label=True
)
pdf_output = gr.File(
label="Annotated PDF (Placeholder - View Coordinates in JSON)",
show_label=True,
interactive=False
)
analyze_btn.click(
fn=process_upload,
inputs=[file_input],
outputs=[results_output, pdf_output]
)
return interface
if __name__ == "__main__":
print("\n--- Launching Gradio Interface ---")
# config.set_java_home() is called when config.py is imported.
# Optional: Test LanguageTool initialization
try:
import language_tool_python
lt_test = language_tool_python.LanguageTool('en-US')
lt_test.close()
print("App: LanguageTool initialized successfully for test.")
except Exception as lt_e:
print(f"App: Warning: Could not initialize LanguageTool for test. Language checks might fail: {lt_e}")
print("Please ensure Java is installed and JAVA_HOME is correctly set (see config.py).")
app_interface = create_interface()
app_interface.launch(
share=False,
# server_port=7860
) |