File size: 5,184 Bytes
7f823bb
364e0ba
 
 
7f823bb
364e0ba
93ea2db
364e0ba
93ea2db
 
6f96666
7f823bb
93ea2db
 
364e0ba
93ea2db
 
 
 
 
 
 
 
 
 
 
 
 
 
 
364e0ba
93ea2db
 
6f96666
93ea2db
 
 
6f96666
93ea2db
 
 
 
 
 
6f96666
 
93ea2db
4dd18db
6f96666
93ea2db
6f96666
 
93ea2db
 
4dd18db
a0e200f
364e0ba
 
 
93ea2db
 
 
 
 
 
0c80b43
364e0ba
 
 
 
93ea2db
364e0ba
0c80b43
364e0ba
 
0c80b43
364e0ba
 
93ea2db
364e0ba
 
0c80b43
364e0ba
93ea2db
 
6f96666
93ea2db
364e0ba
0c80b43
364e0ba
 
 
7f823bb
364e0ba
 
12a89b7
364e0ba
6f96666
93ea2db
6f96666
93ea2db
6f96666
93ea2db
6f96666
 
93ea2db
6f96666
93ea2db
 
 
 
 
 
 
6f96666
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
# app.py
import json
import traceback
import tempfile
import os
import gradio as gr
from typing import Tuple, Optional, Any # Added Any for file_data_binary

# Import config first to ensure JAVA_HOME is set early
import config 

from main_analyzer import analyze_pdf 
# Import language_tool_python only for the test in __main__ if needed
# import language_tool_python 

def process_upload(file_data_binary: Optional[Any]) -> Tuple[str, Optional[str]]: # Use Optional[Any] for Gradio File type="binary"
    if file_data_binary is None or not hasattr(file_data_binary, 'read'): # Check if it's a file-like object
        # Gradio's binary type for gr.File returns a tempfile._TemporaryFileWrapper object
        # If it's None, no file was uploaded.
        # If it's not None but doesn't have 'read', it's an unexpected type.
        # However, gradio usually passes the bytes directly if type="binary" was used in older versions
        # or a TemporaryFileWrapper which is file-like.
        # For robustness, let's check if it's bytes.
        if isinstance(file_data_binary, bytes):
             pass # Good, it's bytes
        elif file_data_binary is None:
             return json.dumps({"error": "No file uploaded or file data is None"}, indent=2), None
        elif not hasattr(file_data_binary, 'read'): # It's not None, not bytes, not file-like
             return json.dumps({"error": f"Unexpected file data type: {type(file_data_binary)}"}), None
        # If it has 'read', it's a file-like object, proceed.

    # analyze_pdf now handles stream-to-temp-file logic internally via original_pdf_access_path
    # So we can pass the file_data_binary (which is a file-like object from Gradio) directly.
    try:
        print(f"App: Processing uploaded file...")
        # If file_data_binary is bytes, wrap it in BytesIO for file-like interface
        # analyze_pdf expects a path or a file-like object with read() and seek()
        
        # Gradio with type="binary" gives a tempfile._TemporaryFileWrapper.
        # This object is already file-like and can be passed directly.
        # No need to create another temp file here in app.py if main_analyzer handles it.
        
        # analyze_pdf will create its own temp file if it receives a stream.
        results_dict, _ = analyze_pdf(file_data_binary) 
        
        results_json = json.dumps(results_dict, indent=2, ensure_ascii=False)
        return results_json, None 

    except Exception as e:
        print(f"Error in process_upload: {e}\n{traceback.format_exc()}")
        error_message = json.dumps({"error": str(e), "traceback": traceback.format_exc()}, indent=2)
        return error_message, None
    # No finally block needed here for temp file, as analyze_pdf handles its own if it creates one
    # and Gradio handles the temp file wrapper it provides.


def create_interface():
    with gr.Blocks(title="PDF Analyzer") as interface:
        gr.Markdown("# PDF Analyzer")
        gr.Markdown(
            "Upload a PDF document to analyze its structure, references, language, and more. "
            "Language issues are derived from font-filtered text. "
            "Regex issues and general document checks use unfiltered text from the original PDF. "
            "All issue coordinates (if found) are mapped back to the original PDF."
        )
        
        with gr.Row():
            file_input = gr.File(
                label="Upload PDF",
                file_types=[".pdf"],
                # type="binary" # Gradio's File component returns a TemporaryFileWrapper which is file-like
            )
        
        with gr.Row():
            analyze_btn = gr.Button("Analyze PDF")
        
        with gr.Row():
            results_output = gr.JSON(
                label="Analysis Results", # Simplified label
                show_label=True
            )
        
        with gr.Row():
            pdf_output = gr.File( 
                label="Annotated PDF (Placeholder - View Coordinates in JSON)",
                show_label=True,
                interactive=False # Not interactive as it's a placeholder
            )
        
        analyze_btn.click(
            fn=process_upload,
            inputs=[file_input],
            outputs=[results_output, pdf_output]
        )
    return interface

if __name__ == "__main__":
    print("\n--- Launching Gradio Interface ---")
    # config.set_java_home() is called when config.py is imported.

    # Optional: Test LanguageTool initialization
    try:
        import language_tool_python # Import here for the test
        lt_test = language_tool_python.LanguageTool('en-US')
        lt_test.close()
        print("App: LanguageTool initialized successfully for test.")
    except Exception as lt_e:
        print(f"App: Warning: Could not initialize LanguageTool for test. Language checks might fail: {lt_e}")
        print("Please ensure Java is installed and JAVA_HOME is correctly set (see config.py).")
    
    app_interface = create_interface()
    app_interface.launch(
        share=False, # Set to True for public link if ngrok is installed and desired
        # server_port=7860 # Optionally specify a port
    )