samyak152002 commited on
Commit
93ea2db
·
verified ·
1 Parent(s): 070b77e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -37
app.py CHANGED
@@ -4,52 +4,71 @@ import traceback
4
  import tempfile
5
  import os
6
  import gradio as gr
7
- from typing import Tuple, Optional # For type hinting
8
 
9
- # Ensure JAVA_HOME is set before LanguageTool might be initialized.
10
- # config.py should be imported early if it handles this.
11
- import config # This will run set_java_home() from config.py
12
 
13
- # Import the main analysis function
14
  from main_analyzer import analyze_pdf
15
- import language_tool_python # For the test initialization
 
16
 
17
- def process_upload(file_data_binary: bytes) -> Tuple[str, Optional[str]]:
18
- if file_data_binary is None:
19
- return json.dumps({"error": "No file uploaded"}, indent=2), None
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
- temp_input_path = None
 
22
  try:
23
- with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as temp_input_file:
24
- temp_input_file.write(file_data_binary)
25
- temp_input_path = temp_input_file.name
26
- print(f"Temporary PDF for analysis: {temp_input_path}")
27
 
28
- results_dict, _ = analyze_pdf(temp_input_path)
 
 
 
 
 
29
 
30
  results_json = json.dumps(results_dict, indent=2, ensure_ascii=False)
31
- return results_json, None
32
 
33
  except Exception as e:
34
- print(f"Error in process_upload: {e}")
35
  error_message = json.dumps({"error": str(e), "traceback": traceback.format_exc()}, indent=2)
36
  return error_message, None
37
- finally:
38
- if temp_input_path and os.path.exists(temp_input_path):
39
- os.unlink(temp_input_path)
40
- print(f"Cleaned up temporary file: {temp_input_path}")
41
 
42
 
43
  def create_interface():
44
  with gr.Blocks(title="PDF Analyzer") as interface:
45
  gr.Markdown("# PDF Analyzer")
46
- gr.Markdown("Upload a PDF document to analyze its structure, references, language, and more. Language issues will include PDF coordinates if found, and are filtered to appear between 'Abstract' and 'References/Bibliography'.")
 
 
 
 
 
47
 
48
  with gr.Row():
49
  file_input = gr.File(
50
  label="Upload PDF",
51
  file_types=[".pdf"],
52
- type="binary"
53
  )
54
 
55
  with gr.Row():
@@ -57,14 +76,15 @@ def create_interface():
57
 
58
  with gr.Row():
59
  results_output = gr.JSON(
60
- label="Analysis Results (Coordinates for issues in 'issues' list)",
61
  show_label=True
62
  )
63
 
64
  with gr.Row():
65
- pdf_output = gr.File(
66
- label="Annotated PDF (Functionality Removed - View Coordinates in JSON)",
67
  show_label=True,
 
68
  )
69
 
70
  analyze_btn.click(
@@ -76,20 +96,20 @@ def create_interface():
76
 
77
  if __name__ == "__main__":
78
  print("\n--- Launching Gradio Interface ---")
79
- # config.py handles JAVA_HOME setting upon its import
80
 
81
- # Optional: Check if LanguageTool can be initialized (as in original)
82
  try:
 
83
  lt_test = language_tool_python.LanguageTool('en-US')
84
  lt_test.close()
85
- print("LanguageTool initialized successfully for test.")
86
  except Exception as lt_e:
87
- print(f"Warning: Could not initialize LanguageTool for test. Language checks might fail: {lt_e}")
88
- print("Please ensure Java is installed and JAVA_HOME is correctly set by config.py or environment.")
89
- print("For example, on Ubuntu with OpenJDK 11: export JAVA_HOME=/usr/lib/jvm/java-11-openjdk-amd64")
90
-
91
- interface = create_interface()
92
- interface.launch(
93
- share=False,
94
- server_port=None
95
  )
 
4
  import tempfile
5
  import os
6
  import gradio as gr
7
+ from typing import Tuple, Optional, Any # Added Any for file_data_binary
8
 
9
+ # Import config first to ensure JAVA_HOME is set early
10
+ import config
 
11
 
 
12
  from main_analyzer import analyze_pdf
13
+ # Import language_tool_python only for the test in __main__ if needed
14
+ # import language_tool_python
15
 
16
+ def process_upload(file_data_binary: Optional[Any]) -> Tuple[str, Optional[str]]: # Use Optional[Any] for Gradio File type="binary"
17
+ if file_data_binary is None or not hasattr(file_data_binary, 'read'): # Check if it's a file-like object
18
+ # Gradio's binary type for gr.File returns a tempfile._TemporaryFileWrapper object
19
+ # If it's None, no file was uploaded.
20
+ # If it's not None but doesn't have 'read', it's an unexpected type.
21
+ # However, gradio usually passes the bytes directly if type="binary" was used in older versions
22
+ # or a TemporaryFileWrapper which is file-like.
23
+ # For robustness, let's check if it's bytes.
24
+ if isinstance(file_data_binary, bytes):
25
+ pass # Good, it's bytes
26
+ elif file_data_binary is None:
27
+ return json.dumps({"error": "No file uploaded or file data is None"}, indent=2), None
28
+ elif not hasattr(file_data_binary, 'read'): # It's not None, not bytes, not file-like
29
+ return json.dumps({"error": f"Unexpected file data type: {type(file_data_binary)}"}), None
30
+ # If it has 'read', it's a file-like object, proceed.
31
 
32
+ # analyze_pdf now handles stream-to-temp-file logic internally via original_pdf_access_path
33
+ # So we can pass the file_data_binary (which is a file-like object from Gradio) directly.
34
  try:
35
+ print(f"App: Processing uploaded file...")
36
+ # If file_data_binary is bytes, wrap it in BytesIO for file-like interface
37
+ # analyze_pdf expects a path or a file-like object with read() and seek()
 
38
 
39
+ # Gradio with type="binary" gives a tempfile._TemporaryFileWrapper.
40
+ # This object is already file-like and can be passed directly.
41
+ # No need to create another temp file here in app.py if main_analyzer handles it.
42
+
43
+ # analyze_pdf will create its own temp file if it receives a stream.
44
+ results_dict, _ = analyze_pdf(file_data_binary)
45
 
46
  results_json = json.dumps(results_dict, indent=2, ensure_ascii=False)
47
+ return results_json, None
48
 
49
  except Exception as e:
50
+ print(f"Error in process_upload: {e}\n{traceback.format_exc()}")
51
  error_message = json.dumps({"error": str(e), "traceback": traceback.format_exc()}, indent=2)
52
  return error_message, None
53
+ # No finally block needed here for temp file, as analyze_pdf handles its own if it creates one
54
+ # and Gradio handles the temp file wrapper it provides.
 
 
55
 
56
 
57
  def create_interface():
58
  with gr.Blocks(title="PDF Analyzer") as interface:
59
  gr.Markdown("# PDF Analyzer")
60
+ gr.Markdown(
61
+ "Upload a PDF document to analyze its structure, references, language, and more. "
62
+ "Language issues are derived from font-filtered text. "
63
+ "Regex issues and general document checks use unfiltered text from the original PDF. "
64
+ "All issue coordinates (if found) are mapped back to the original PDF."
65
+ )
66
 
67
  with gr.Row():
68
  file_input = gr.File(
69
  label="Upload PDF",
70
  file_types=[".pdf"],
71
+ # type="binary" # Gradio's File component returns a TemporaryFileWrapper which is file-like
72
  )
73
 
74
  with gr.Row():
 
76
 
77
  with gr.Row():
78
  results_output = gr.JSON(
79
+ label="Analysis Results", # Simplified label
80
  show_label=True
81
  )
82
 
83
  with gr.Row():
84
+ pdf_output = gr.File(
85
+ label="Annotated PDF (Placeholder - View Coordinates in JSON)",
86
  show_label=True,
87
+ interactive=False # Not interactive as it's a placeholder
88
  )
89
 
90
  analyze_btn.click(
 
96
 
97
  if __name__ == "__main__":
98
  print("\n--- Launching Gradio Interface ---")
99
+ # config.set_java_home() is called when config.py is imported.
100
 
101
+ # Optional: Test LanguageTool initialization
102
  try:
103
+ import language_tool_python # Import here for the test
104
  lt_test = language_tool_python.LanguageTool('en-US')
105
  lt_test.close()
106
+ print("App: LanguageTool initialized successfully for test.")
107
  except Exception as lt_e:
108
+ print(f"App: Warning: Could not initialize LanguageTool for test. Language checks might fail: {lt_e}")
109
+ print("Please ensure Java is installed and JAVA_HOME is correctly set (see config.py).")
110
+
111
+ app_interface = create_interface()
112
+ app_interface.launch(
113
+ share=False, # Set to True for public link if ngrok is installed and desired
114
+ # server_port=7860 # Optionally specify a port
 
115
  )