|
|
|
import json |
|
import traceback |
|
import tempfile |
|
import os |
|
import gradio as gr |
|
from typing import Tuple, Optional, Any |
|
|
|
|
|
import config |
|
|
|
from main_analyzer import analyze_pdf |
|
|
|
|
|
|
|
def process_upload(file_data_binary: Optional[Any]) -> Tuple[str, Optional[str]]: |
|
if file_data_binary is None or not hasattr(file_data_binary, 'read'): |
|
|
|
|
|
|
|
|
|
|
|
|
|
if isinstance(file_data_binary, bytes): |
|
pass |
|
elif file_data_binary is None: |
|
return json.dumps({"error": "No file uploaded or file data is None"}, indent=2), None |
|
elif not hasattr(file_data_binary, 'read'): |
|
return json.dumps({"error": f"Unexpected file data type: {type(file_data_binary)}"}), None |
|
|
|
|
|
|
|
|
|
try: |
|
print(f"App: Processing uploaded file...") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
results_dict, _ = analyze_pdf(file_data_binary) |
|
|
|
results_json = json.dumps(results_dict, indent=2, ensure_ascii=False) |
|
return results_json, None |
|
|
|
except Exception as e: |
|
print(f"Error in process_upload: {e}\n{traceback.format_exc()}") |
|
error_message = json.dumps({"error": str(e), "traceback": traceback.format_exc()}, indent=2) |
|
return error_message, None |
|
|
|
|
|
|
|
|
|
def create_interface(): |
|
with gr.Blocks(title="PDF Analyzer") as interface: |
|
gr.Markdown("# PDF Analyzer") |
|
gr.Markdown( |
|
"Upload a PDF document to analyze its structure, references, language, and more. " |
|
"Language issues are derived from font-filtered text. " |
|
"Regex issues and general document checks use unfiltered text from the original PDF. " |
|
"All issue coordinates (if found) are mapped back to the original PDF." |
|
) |
|
|
|
with gr.Row(): |
|
file_input = gr.File( |
|
label="Upload PDF", |
|
file_types=[".pdf"], |
|
|
|
) |
|
|
|
with gr.Row(): |
|
analyze_btn = gr.Button("Analyze PDF") |
|
|
|
with gr.Row(): |
|
results_output = gr.JSON( |
|
label="Analysis Results", |
|
show_label=True |
|
) |
|
|
|
with gr.Row(): |
|
pdf_output = gr.File( |
|
label="Annotated PDF (Placeholder - View Coordinates in JSON)", |
|
show_label=True, |
|
interactive=False |
|
) |
|
|
|
analyze_btn.click( |
|
fn=process_upload, |
|
inputs=[file_input], |
|
outputs=[results_output, pdf_output] |
|
) |
|
return interface |
|
|
|
if __name__ == "__main__": |
|
print("\n--- Launching Gradio Interface ---") |
|
|
|
|
|
|
|
try: |
|
import language_tool_python |
|
lt_test = language_tool_python.LanguageTool('en-US') |
|
lt_test.close() |
|
print("App: LanguageTool initialized successfully for test.") |
|
except Exception as lt_e: |
|
print(f"App: Warning: Could not initialize LanguageTool for test. Language checks might fail: {lt_e}") |
|
print("Please ensure Java is installed and JAVA_HOME is correctly set (see config.py).") |
|
|
|
app_interface = create_interface() |
|
app_interface.launch( |
|
share=False, |
|
|
|
) |