File size: 4,831 Bytes
7f823bb
364e0ba
 
d37148b
7f823bb
364e0ba
d37148b
364e0ba
93ea2db
 
6f96666
7f823bb
d37148b
 
364e0ba
 
d37148b
 
 
 
 
 
 
 
 
 
6f96666
d37148b
 
 
 
93ea2db
d37148b
6f96666
d37148b
 
 
 
 
 
6f96666
93ea2db
4dd18db
6f96666
d37148b
 
 
 
 
 
 
 
6f96666
d37148b
 
 
a0e200f
364e0ba
 
 
93ea2db
 
 
 
 
 
0c80b43
364e0ba
 
 
d37148b
 
 
364e0ba
0c80b43
364e0ba
 
0c80b43
364e0ba
 
d37148b
364e0ba
 
0c80b43
364e0ba
93ea2db
 
6f96666
d37148b
364e0ba
0c80b43
364e0ba
 
 
7f823bb
364e0ba
 
12a89b7
364e0ba
6f96666
93ea2db
6f96666
 
 
 
93ea2db
6f96666
d37148b
93ea2db
 
 
 
d37148b
6f96666
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
# app.py
import json
import traceback
import tempfile # Not strictly needed by process_upload anymore, but good to keep if other parts use it.
import os
import gradio as gr
from typing import Tuple, Optional, Any 

# Import config first to ensure JAVA_HOME is set early
import config 

from main_analyzer import analyze_pdf 
# language_tool_python needed for the test in __main__
import language_tool_python


def process_upload(uploaded_file_input: Optional[Any]) -> Tuple[str, Optional[str]]:
    """
    Processes the uploaded file from Gradio.
    'uploaded_file_input' is expected to be an object from gr.File(),
    which could be a path string (like gradio.utils.NamedString) or a file-like object.
    """
    if uploaded_file_input is None:
        print("App: No file uploaded.")
        return json.dumps({"error": "No file uploaded."}, indent=2), None

    try:
        # Pass the uploaded_file_input directly to analyze_pdf.
        # analyze_pdf is responsible for determining if it's a path or a stream
        # and handling it accordingly (including creating a temp file for streams if needed).
        print(f"App: Received file input of type: {type(uploaded_file_input)}. Passing to analyzer.")
        
        results_dict, _ = analyze_pdf(uploaded_file_input) 
        
        # Check if analyze_pdf returned an error (it returns a dict with "error" key in that case)
        if isinstance(results_dict, dict) and "error" in results_dict:
            print(f"App: Analysis returned an error: {results_dict['error']}")
            # Return the error JSON directly
            return json.dumps(results_dict, indent=2, ensure_ascii=False), None

        results_json = json.dumps(results_dict, indent=2, ensure_ascii=False)
        return results_json, None 

    except Exception as e:
        # This catches unexpected errors during the call to analyze_pdf or JSON dumping.
        print(f"App: Error in process_upload: {e}\n{traceback.format_exc()}")
        error_detail = str(e)
        # If the error is due to an invalid input type that analyze_pdf couldn't handle:
        if "Invalid PDF input type" in error_detail: # Check for specific error from analyze_pdf
             error_detail = f"Invalid PDF input type received from uploader: {type(uploaded_file_input)}. Details: {str(e)}"
        
        error_message = json.dumps({"error": error_detail, "traceback": traceback.format_exc()}, indent=2)
        return error_message, None
    # No explicit temp file cleanup needed here in process_upload for the Gradio-provided file object.
    # Gradio manages its own temporary files.
    # analyze_pdf manages any internal temporary files it creates.

def create_interface():
    with gr.Blocks(title="PDF Analyzer") as interface:
        gr.Markdown("# PDF Analyzer")
        gr.Markdown(
            "Upload a PDF document to analyze its structure, references, language, and more. "
            "Language issues are derived from font-filtered text. "
            "Regex issues and general document checks use unfiltered text from the original PDF. "
            "All issue coordinates (if found) are mapped back to the original PDF."
        )
        
        with gr.Row():
            file_input = gr.File(
                label="Upload PDF",
                file_types=[".pdf"]
                # Default type: Gradio provides a path-like object (e.g., NamedString)
                # or a TemporaryFileWrapper. Both should be acceptable by analyze_pdf.
            )
        
        with gr.Row():
            analyze_btn = gr.Button("Analyze PDF")
        
        with gr.Row():
            results_output = gr.JSON(
                label="Analysis Results",
                show_label=True
            )
        
        with gr.Row():
            pdf_output = gr.File( 
                label="Annotated PDF (Placeholder - View Coordinates in JSON)",
                show_label=True,
                interactive=False 
            )
        
        analyze_btn.click(
            fn=process_upload,
            inputs=[file_input],
            outputs=[results_output, pdf_output]
        )
    return interface

if __name__ == "__main__":
    print("\n--- Launching Gradio Interface ---")
    # config.set_java_home() is called when config.py is imported.

    try:
        lt_test = language_tool_python.LanguageTool('en-US')
        lt_test.close()
        print("App: LanguageTool initialized successfully for test.")
    except Exception as lt_e:
        print(f"App: Warning: Could not initialize LanguageTool for test. Language checks might fail: {lt_e}\n{traceback.format_exc(limit=1)}")
        print("Please ensure Java is installed and JAVA_HOME is correctly set (see config.py).")
    
    app_interface = create_interface()
    app_interface.launch(
        share=False,
    )