samyak152002's picture
Update app.py
d37148b verified
raw
history blame
4.83 kB
# app.py
import json
import traceback
import tempfile # Not strictly needed by process_upload anymore, but good to keep if other parts use it.
import os
import gradio as gr
from typing import Tuple, Optional, Any
# Import config first to ensure JAVA_HOME is set early
import config
from main_analyzer import analyze_pdf
# language_tool_python needed for the test in __main__
import language_tool_python
def process_upload(uploaded_file_input: Optional[Any]) -> Tuple[str, Optional[str]]:
"""
Processes the uploaded file from Gradio.
'uploaded_file_input' is expected to be an object from gr.File(),
which could be a path string (like gradio.utils.NamedString) or a file-like object.
"""
if uploaded_file_input is None:
print("App: No file uploaded.")
return json.dumps({"error": "No file uploaded."}, indent=2), None
try:
# Pass the uploaded_file_input directly to analyze_pdf.
# analyze_pdf is responsible for determining if it's a path or a stream
# and handling it accordingly (including creating a temp file for streams if needed).
print(f"App: Received file input of type: {type(uploaded_file_input)}. Passing to analyzer.")
results_dict, _ = analyze_pdf(uploaded_file_input)
# Check if analyze_pdf returned an error (it returns a dict with "error" key in that case)
if isinstance(results_dict, dict) and "error" in results_dict:
print(f"App: Analysis returned an error: {results_dict['error']}")
# Return the error JSON directly
return json.dumps(results_dict, indent=2, ensure_ascii=False), None
results_json = json.dumps(results_dict, indent=2, ensure_ascii=False)
return results_json, None
except Exception as e:
# This catches unexpected errors during the call to analyze_pdf or JSON dumping.
print(f"App: Error in process_upload: {e}\n{traceback.format_exc()}")
error_detail = str(e)
# If the error is due to an invalid input type that analyze_pdf couldn't handle:
if "Invalid PDF input type" in error_detail: # Check for specific error from analyze_pdf
error_detail = f"Invalid PDF input type received from uploader: {type(uploaded_file_input)}. Details: {str(e)}"
error_message = json.dumps({"error": error_detail, "traceback": traceback.format_exc()}, indent=2)
return error_message, None
# No explicit temp file cleanup needed here in process_upload for the Gradio-provided file object.
# Gradio manages its own temporary files.
# analyze_pdf manages any internal temporary files it creates.
def create_interface():
with gr.Blocks(title="PDF Analyzer") as interface:
gr.Markdown("# PDF Analyzer")
gr.Markdown(
"Upload a PDF document to analyze its structure, references, language, and more. "
"Language issues are derived from font-filtered text. "
"Regex issues and general document checks use unfiltered text from the original PDF. "
"All issue coordinates (if found) are mapped back to the original PDF."
)
with gr.Row():
file_input = gr.File(
label="Upload PDF",
file_types=[".pdf"]
# Default type: Gradio provides a path-like object (e.g., NamedString)
# or a TemporaryFileWrapper. Both should be acceptable by analyze_pdf.
)
with gr.Row():
analyze_btn = gr.Button("Analyze PDF")
with gr.Row():
results_output = gr.JSON(
label="Analysis Results",
show_label=True
)
with gr.Row():
pdf_output = gr.File(
label="Annotated PDF (Placeholder - View Coordinates in JSON)",
show_label=True,
interactive=False
)
analyze_btn.click(
fn=process_upload,
inputs=[file_input],
outputs=[results_output, pdf_output]
)
return interface
if __name__ == "__main__":
print("\n--- Launching Gradio Interface ---")
# config.set_java_home() is called when config.py is imported.
try:
lt_test = language_tool_python.LanguageTool('en-US')
lt_test.close()
print("App: LanguageTool initialized successfully for test.")
except Exception as lt_e:
print(f"App: Warning: Could not initialize LanguageTool for test. Language checks might fail: {lt_e}\n{traceback.format_exc(limit=1)}")
print("Please ensure Java is installed and JAVA_HOME is correctly set (see config.py).")
app_interface = create_interface()
app_interface.launch(
share=False,
)