|
|
|
import json |
|
import traceback |
|
import tempfile |
|
import os |
|
import gradio as gr |
|
from typing import Tuple, Optional |
|
|
|
|
|
|
|
import config |
|
|
|
|
|
from main_analyzer import analyze_pdf |
|
import language_tool_python |
|
|
|
def process_upload(file_data_binary: bytes) -> Tuple[str, Optional[str]]: |
|
if file_data_binary is None: |
|
return json.dumps({"error": "No file uploaded"}, indent=2), None |
|
|
|
temp_input_path = None |
|
try: |
|
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as temp_input_file: |
|
temp_input_file.write(file_data_binary) |
|
temp_input_path = temp_input_file.name |
|
print(f"Temporary PDF for analysis: {temp_input_path}") |
|
|
|
results_dict, _ = analyze_pdf(temp_input_path) |
|
|
|
results_json = json.dumps(results_dict, indent=2, ensure_ascii=False) |
|
return results_json, None |
|
|
|
except Exception as e: |
|
print(f"Error in process_upload: {e}") |
|
error_message = json.dumps({"error": str(e), "traceback": traceback.format_exc()}, indent=2) |
|
return error_message, None |
|
finally: |
|
if temp_input_path and os.path.exists(temp_input_path): |
|
os.unlink(temp_input_path) |
|
print(f"Cleaned up temporary file: {temp_input_path}") |
|
|
|
|
|
def create_interface(): |
|
with gr.Blocks(title="PDF Analyzer") as interface: |
|
gr.Markdown("# PDF Analyzer") |
|
gr.Markdown("Upload a PDF document to analyze its structure, references, language, and more. Language issues will include PDF coordinates if found, and are filtered to appear between 'Abstract' and 'References/Bibliography'.") |
|
|
|
with gr.Row(): |
|
file_input = gr.File( |
|
label="Upload PDF", |
|
file_types=[".pdf"], |
|
type="binary" |
|
) |
|
|
|
with gr.Row(): |
|
analyze_btn = gr.Button("Analyze PDF") |
|
|
|
with gr.Row(): |
|
results_output = gr.JSON( |
|
label="Analysis Results (Coordinates for issues in 'issues' list)", |
|
show_label=True |
|
) |
|
|
|
with gr.Row(): |
|
pdf_output = gr.File( |
|
label="Annotated PDF (Functionality Removed - View Coordinates in JSON)", |
|
show_label=True, |
|
) |
|
|
|
analyze_btn.click( |
|
fn=process_upload, |
|
inputs=[file_input], |
|
outputs=[results_output, pdf_output] |
|
) |
|
return interface |
|
|
|
if __name__ == "__main__": |
|
print("\n--- Launching Gradio Interface ---") |
|
|
|
|
|
|
|
try: |
|
lt_test = language_tool_python.LanguageTool('en-US') |
|
lt_test.close() |
|
print("LanguageTool initialized successfully for test.") |
|
except Exception as lt_e: |
|
print(f"Warning: Could not initialize LanguageTool for test. Language checks might fail: {lt_e}") |
|
print("Please ensure Java is installed and JAVA_HOME is correctly set by config.py or environment.") |
|
print("For example, on Ubuntu with OpenJDK 11: export JAVA_HOME=/usr/lib/jvm/java-11-openjdk-amd64") |
|
|
|
interface = create_interface() |
|
interface.launch( |
|
share=False, |
|
server_port=None |
|
) |