File size: 3,552 Bytes
7f823bb 364e0ba 7f823bb 364e0ba 7f823bb 364e0ba 7f823bb 6f96666 7f823bb 364e0ba 6f96666 364e0ba 6f96666 7f823bb 6f96666 7f823bb 4dd18db 6f96666 4dd18db a0e200f 364e0ba 6f96666 0c80b43 364e0ba 7f823bb 364e0ba 0c80b43 364e0ba 0c80b43 364e0ba 6f96666 364e0ba 0c80b43 364e0ba 6f96666 364e0ba 0c80b43 364e0ba 7f823bb 364e0ba 12a89b7 364e0ba 6f96666 7f823bb 6f96666 7f823bb 6f96666 7f823bb 6f96666 7f823bb 6f96666 364e0ba 7f823bb 6f96666 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 |
# app.py
import json
import traceback
import tempfile
import os
import gradio as gr
from typing import Tuple, Optional # For type hinting
# Ensure JAVA_HOME is set before LanguageTool might be initialized.
# config.py should be imported early if it handles this.
import config # This will run set_java_home() from config.py
# Import the main analysis function
from main_analyzer import analyze_pdf
import language_tool_python # For the test initialization
def process_upload(file_data_binary: bytes) -> Tuple[str, Optional[str]]:
if file_data_binary is None:
return json.dumps({"error": "No file uploaded"}, indent=2), None
temp_input_path = None
try:
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as temp_input_file:
temp_input_file.write(file_data_binary)
temp_input_path = temp_input_file.name
print(f"Temporary PDF for analysis: {temp_input_path}")
results_dict, _ = analyze_pdf(temp_input_path)
results_json = json.dumps(results_dict, indent=2, ensure_ascii=False)
return results_json, None
except Exception as e:
print(f"Error in process_upload: {e}")
error_message = json.dumps({"error": str(e), "traceback": traceback.format_exc()}, indent=2)
return error_message, None
finally:
if temp_input_path and os.path.exists(temp_input_path):
os.unlink(temp_input_path)
print(f"Cleaned up temporary file: {temp_input_path}")
def create_interface():
with gr.Blocks(title="PDF Analyzer") as interface:
gr.Markdown("# PDF Analyzer")
gr.Markdown("Upload a PDF document to analyze its structure, references, language, and more. Language issues will include PDF coordinates if found, and are filtered to appear between 'Abstract' and 'References/Bibliography'.")
with gr.Row():
file_input = gr.File(
label="Upload PDF",
file_types=[".pdf"],
type="binary"
)
with gr.Row():
analyze_btn = gr.Button("Analyze PDF")
with gr.Row():
results_output = gr.JSON(
label="Analysis Results (Coordinates for issues in 'issues' list)",
show_label=True
)
with gr.Row():
pdf_output = gr.File(
label="Annotated PDF (Functionality Removed - View Coordinates in JSON)",
show_label=True,
)
analyze_btn.click(
fn=process_upload,
inputs=[file_input],
outputs=[results_output, pdf_output]
)
return interface
if __name__ == "__main__":
print("\n--- Launching Gradio Interface ---")
# config.py handles JAVA_HOME setting upon its import
# Optional: Check if LanguageTool can be initialized (as in original)
try:
lt_test = language_tool_python.LanguageTool('en-US')
lt_test.close()
print("LanguageTool initialized successfully for test.")
except Exception as lt_e:
print(f"Warning: Could not initialize LanguageTool for test. Language checks might fail: {lt_e}")
print("Please ensure Java is installed and JAVA_HOME is correctly set by config.py or environment.")
print("For example, on Ubuntu with OpenJDK 11: export JAVA_HOME=/usr/lib/jvm/java-11-openjdk-amd64")
interface = create_interface()
interface.launch(
share=False,
server_port=None
) |