Spaces:
Sleeping
Sleeping
import gradio as gr | |
import pandas as pd | |
from io import BytesIO | |
import os | |
import json | |
from datetime import datetime | |
import firebase_admin | |
from firebase_admin import credentials, firestore | |
from dar_processor import preprocess_pdf_text | |
from gemini_utils import get_structured_data_with_gemini, get_harmonised_titles | |
from models import ParsedDARReport, HarmonisedPara | |
# Firebase setup | |
FIREBASE_CREDENTIALS = os.environ.get("FIREBASE_CREDENTIALS") | |
if FIREBASE_CREDENTIALS: | |
# Load credentials from environment variable (preferred for security) | |
cred = credentials.Certificate(json.loads(FIREBASE_CREDENTIALS)) | |
else: | |
# Fallback to reading from firebase.json file | |
if not os.path.exists("firebase.json"): | |
raise ValueError("firebase.json not found and FIREBASE_CREDENTIALS not set.") | |
cred = credentials.Certificate("firebase.json") | |
firebase_admin.initialize_app(cred) | |
db = firestore.client() | |
request_counts = db.collection('request_counts') | |
def get_request_count(): | |
"""Retrieve the current request count for today.""" | |
today = datetime.utcnow().strftime('%Y-%m-%d') | |
doc_ref = request_counts.document(today) | |
doc = doc_ref.get() | |
count = doc.to_dict().get('count', 0) if doc.exists else 0 | |
return count | |
def check_request_limit(): | |
"""Check if the request limit for the day has been reached.""" | |
today = datetime.utcnow().strftime('%Y-%m-%d') | |
doc_ref = request_counts.document(today) | |
doc = doc_ref.get() | |
if not doc.exists: | |
# Initialize counter for the new day | |
doc_ref.set({'count': 0}) | |
count = 0 | |
else: | |
count = doc.to_dict().get('count', 0) | |
if count >= 400: | |
return False, "Daily request limit of 400 reached. Try again tomorrow." | |
# Increment the counter | |
doc_ref.update({'count': firestore.Increment(1)}) | |
return True, None | |
def create_html_report(results_with_harmonised: list[dict]) -> str: | |
"""Generates an HTML string to display the results in a styled table.""" | |
if not results_with_harmonised: | |
return "<p>No audit paras found or processed.</p>" | |
style = """ | |
<style> | |
body { font-family: sans-serif; } | |
.styled-table { | |
border-collapse: collapse; margin: 25px 0; font-size: 0.9em; | |
min-width: 400px; box-shadow: 0 0 20px rgba(0, 0, 0, 0.15); | |
border-radius: 8px; overflow: hidden; | |
} | |
.styled-table thead tr { background-color: #009879; color: #ffffff; text-align: left; } | |
.styled-table th, .styled-table td { padding: 12px 15px; border-bottom: 1px solid #dddddd; } | |
.styled-table tbody tr:last-of-type { border-bottom: 2px solid #009879; } | |
</style> | |
""" | |
html = f"{style}<table class='styled-table'><thead><tr><th>Para No.</th><th>Original Audit Para Heading</th><th>Harmonised Audit Para Heading</th><th>Amount Involved (in Lakhs)</th></tr></thead><tbody>" | |
for item in results_with_harmonised: | |
para_num = item.get('audit_para_number', 'N/A') | |
original_heading = item.get('audit_para_heading', 'N/A') | |
harmonised_heading = item.get('harmonised_audit_para_heading', 'N/A') | |
amount = f"₹{item.get('revenue_involved_lakhs_rs', 0.0):,.2f} L" | |
html += f"<tr><td>{para_num}</td><td>{original_heading}</td><td>{harmonised_heading}</td><td>{amount}</td></tr>" | |
html += "</tbody></table>" | |
return html | |
def process_dar_pdf(pdf_file): | |
"""The main processing function, called after successful login.""" | |
# Check request limit before processing | |
can_process, error_msg = check_request_limit() | |
if not can_process: | |
return error_msg, None, None, f"Requests today: {get_request_count()}/400" | |
gemini_api_key = os.environ.get("GEMINI_API_KEY") | |
if not pdf_file: | |
return "Please upload a PDF file.", None, None, f"Requests today: {get_request_count()}/400" | |
if not gemini_api_key: | |
return "Error: GEMINI_API_KEY secret not found in Space settings.", None, None, f"Requests today: {get_request_count()}/400" | |
# Step 1: Process PDF to text | |
full_text = preprocess_pdf_text(pdf_file.name) | |
if full_text.startswith("Error"): | |
return f"Failed to process PDF: {full_text}", None, None, f"Requests today: {get_request_count()}/400" | |
# Step 2: Extract structured data | |
parsed_report = get_structured_data_with_gemini(gemini_api_key, full_text) | |
if parsed_report.parsing_errors or not parsed_report.audit_paras: | |
error_msg = parsed_report.parsing_errors or "Could not find any audit paras." | |
return error_msg, None, None, f"Requests today: {get_request_count()}/400" | |
# Step 3: Get harmonised titles | |
original_headings = [p.audit_para_heading for p in parsed_report.audit_paras if p.audit_para_heading] | |
if not original_headings: | |
return "Found paras but no headings to harmonise.", None, None, f"Requests today: {get_request_count()}/400" | |
harmonised_results = get_harmonised_titles(gemini_api_key, full_text, original_headings) | |
if not harmonised_results: | |
return "Failed to generate harmonised titles.", None, None, f"Requests today: {get_request_count()}/400" | |
# Step 4: Combine and prepare outputs | |
harmonised_map = {item.original_heading: item.harmonised_heading for item in harmonised_results} | |
final_data_list = [] | |
for para in parsed_report.audit_paras: | |
combined_info = (parsed_report.header.dict() if parsed_report.header else {}) | para.dict() | |
combined_info['harmonised_audit_para_heading'] = harmonised_map.get(para.audit_para_heading, "N/A") | |
final_data_list.append(combined_info) | |
html_output = create_html_report(final_data_list) | |
# Step 5: Create Excel file for download | |
df = pd.DataFrame(final_data_list) | |
excel_columns = [ | |
'gstin', 'trade_name', 'category', 'audit_group_number', 'audit_para_number', | |
'audit_para_heading', 'harmonised_audit_para_heading', 'revenue_involved_lakhs_rs', | |
'revenue_recovered_lakhs_rs', 'status_of_para', 'total_amount_detected_overall_rs', | |
'total_amount_recovered_overall_rs' | |
] | |
df = df.reindex(columns=excel_columns).fillna('N/A') | |
output_excel = BytesIO() | |
df.to_excel(output_excel, index=False, sheet_name='DAR_Extraction') | |
output_excel.seek(0) | |
excel_file_name = "dar_extraction_report.xlsx" | |
with open(excel_file_name, "wb") as f: | |
f.write(output_excel.getbuffer()) | |
return "Processing complete.", html_output, gr.File(value=excel_file_name), f"Requests today: {get_request_count()}/400" | |
# --- Gradio Interface Definition --- | |
with gr.Blocks(theme=gr.themes.Soft(), title="DAR Harmonisation Tool") as demo: | |
# --- Login UI (visible initially) --- | |
with gr.Column(visible=True) as login_ui: | |
gr.Markdown("# Mumbai CGST Audit Officer Login") | |
gr.Markdown("Please enter the credentials to access the tool.") | |
with gr.Row(): | |
username_input = gr.Textbox(label="Username", placeholder="Enter your username") | |
password_input = gr.Textbox(label="Password", type="password", placeholder="Enter your password") | |
login_button = gr.Button("Login", variant="primary") | |
login_error_msg = gr.Markdown(visible=False) | |
# --- Main App UI (hidden initially) --- | |
with gr.Column(visible=False) as main_app_ui: | |
gr.Markdown("# DAR Draft Audit Report Harmonisation Tool") | |
gr.Markdown("## Initiative by Mumbai Audit 1 Commissionerate") | |
gr.Markdown( | |
"Upload a Departmental Audit Report (DAR) in PDF format. The tool will process it and generate harmonised titles for Audit paras in accordance with GST law." | |
) | |
request_count_output = gr.Textbox(label="Requests Made Today", interactive=False, value="Requests today: 0/400") | |
with gr.Row(): | |
with gr.Column(scale=1): | |
pdf_input = gr.File(label="Upload DAR PDF", file_types=[".pdf"]) | |
submit_btn = gr.Button("Process Report", variant="primary") | |
with gr.Column(scale=2): | |
status_output = gr.Textbox(label="Processing Status", interactive=False) | |
excel_output = gr.File(label="Download Excel Report") | |
gr.Markdown("## Harmonised Audit Para Titles") | |
html_output = gr.HTML() | |
submit_btn.click( | |
fn=process_dar_pdf, | |
inputs=[pdf_input], | |
outputs=[status_output, html_output, excel_output, request_count_output] | |
) | |
# --- Login Functionality --- | |
def login(username, password): | |
""" | |
Checks user credentials against secrets. | |
For production, these are loaded from Hugging Face secrets. | |
""" | |
auth_username = os.environ.get("APP_USERNAME") | |
auth_password = os.environ.get("APP_PASSWORD") | |
is_valid_user = (username == auth_username and password == auth_password) | |
if is_valid_user: | |
# Login successful: hide login UI, show main app, display request count | |
request_count = get_request_count() | |
return { | |
login_ui: gr.update(visible=False), | |
main_app_ui: gr.update(visible=True), | |
login_error_msg: gr.update(visible=False), | |
request_count_output: gr.update(value=f"Requests today: {request_count}/400") | |
} | |
else: | |
# Login failed: keep login UI visible, show error message | |
return { | |
login_ui: gr.update(visible=True), | |
main_app_ui: gr.update(visible=False), | |
login_error_msg: gr.update(value="<p style='color:red;'>Invalid username or password.</p>", visible=True), | |
request_count_output: gr.update(value="Requests today: 0/400") | |
} | |
login_button.click( | |
login, | |
inputs=[username_input, password_input], | |
outputs=[login_ui, main_app_ui, login_error_msg, request_count_output] | |
) | |
if __name__ == "__main__": | |
demo.launch(debug=True)# import gradio as gr | |
# import pandas as pd | |
# from io import BytesIO | |
# import os | |
# import json | |
# from datetime import datetime | |
# import firebase_admin | |
# from firebase_admin import credentials, firestore | |
# from dar_processor import preprocess_pdf_text | |
# from gemini_utils import get_structured_data_with_gemini, get_harmonised_titles | |
# from models import ParsedDARReport, HarmonisedPara | |
# # Firebase setup | |
# FIREBASE_CREDENTIALS = os.environ.get("FIREBASE_CREDENTIALS") | |
# if FIREBASE_CREDENTIALS: | |
# # Load credentials from environment variable (preferred for security) | |
# cred = credentials.Certificate(json.loads(FIREBASE_CREDENTIALS)) | |
# else: | |
# # Fallback to reading from firebase.json file | |
# if not os.path.exists("firebase.json"): | |
# raise ValueError("firebase.json not found and FIREBASE_CREDENTIALS not set.") | |
# cred = credentials.Certificate("firebase.json") | |
# firebase_admin.initialize_app(cred) | |
# db = firestore.client() | |
# request_counts = db.collection('request_counts') | |
# def get_request_count(): | |
# """Retrieve the current request count for today.""" | |
# today = datetime.utcnow().strftime('%Y-%m-%d') | |
# doc_ref = request_counts.document(today) | |
# doc = doc_ref.get() | |
# count = doc.to_dict().get('count', 0) if doc.exists else 0 | |
# return count | |
# def check_request_limit(): | |
# """Check if the request limit for the day has been reached.""" | |
# today = datetime.utcnow().strftime('%Y-%m-%d') | |
# doc_ref = request_counts.document(today) | |
# doc = doc_ref.get() | |
# if not doc.exists: | |
# # Initialize counter for the new day | |
# doc_ref.set({'count': 0}) | |
# count = 0 | |
# else: | |
# count = doc.to_dict().get('count', 0) | |
# if count >= 400: | |
# return False, "Daily request limit of 400 reached. Try again tomorrow." | |
# # Increment the counter | |
# doc_ref.update({'count': firestore.Increment(1)}) | |
# return True, None | |
# def create_html_report(results_with_harmonised: list[dict]) -> str: | |
# """Generates an HTML string to display the results in a styled table.""" | |
# if not results_with_harmonised: | |
# return "<p>No audit paras found or processed.</p>" | |
# style = """ | |
# <style> | |
# body { font-family: sans-serif; } | |
# .styled-table { | |
# border-collapse: collapse; margin: 25px 0; font-size: 0.9em; | |
# min-width: 400px; box-shadow: 0 0 20px rgba(0, 0, 0, 0.15); | |
# border-radius: 8px; overflow: hidden; | |
# } | |
# .styled-table thead tr { background-color: #009879; color: #ffffff; text-align: left; } | |
# .styled-table th, .styled-table td { padding: 12px 15px; border-bottom: 1px solid #dddddd; } | |
# .styled-table tbody tr:last-of-type { border-bottom: 2px solid #009879; } | |
# </style> | |
# """ | |
# html = f"{style}<table class='styled-table'><thead><tr><th>Para No.</th><th>Original Audit Para Heading</th><th>Harmonised Audit Para Heading</th><th>Amount Involved (in Lakhs)</th></tr></thead><tbody>" | |
# for item in results_with_harmonised: | |
# para_num = item.get('audit_para_number', 'N/A') | |
# original_heading = item.get('audit_para_heading', 'N/A') | |
# harmonised_heading = item.get('harmonised_audit_para_heading', 'N/A') | |
# amount = f"₹{item.get('revenue_involved_lakhs_rs', 0.0):,.2f} L" | |
# html += f"<tr><td>{para_num}</td><td>{original_heading}</td><td>{harmonised_heading}</td><td>{amount}</td></tr>" | |
# html += "</tbody></table>" | |
# return html | |
# def process_dar_pdf(pdf_file): | |
# """The main processing function, called after successful login.""" | |
# # Check request limit before processing | |
# can_process, error_msg = check_request_limit() | |
# if not can_process: | |
# return error_msg, None, None, f"Requests today: {get_request_count()}/400" | |
# gemini_api_key = os.environ.get("GEMINI_API_KEY") | |
# if not pdf_file: | |
# return "Please upload a PDF file.", None, None, f"Requests today: {get_request_count()}/400" | |
# if not gemini_api_key: | |
# return "Error: GEMINI_API_KEY secret not found in Space settings.", None, None, f"Requests today: {get_request_count()}/400" | |
# # Step 1: Process PDF to text | |
# full_text = preprocess_pdf_text(pdf_file.name) | |
# if full_text.startswith("Error"): | |
# return f"Failed to process PDF: {full_text}", None, None, f"Requests today: {get_request_count()}/400" | |
# # Step 2: Extract structured data | |
# parsed_report = get_structured_data_with_gemini(gemini_api_key, full_text) | |
# if parsed_report.parsing_errors or not parsed_report.audit_paras: | |
# error_msg = parsed_report.parsing_errors or "Could not find any audit paras." | |
# return error_msg, None, None, f"Requests today: {get_request_count()}/400" | |
# # Step 3: Get harmonised titles | |
# original_headings = [p.audit_para_heading for p in parsed_report.audit_paras if p.audit_para_heading] | |
# if not original_headings: | |
# return "Found paras but no headings to harmonise.", None, None, f"Requests today: {get_request_count()}/400" | |
# harmonised_results = get_harmonised_titles(gemini_api_key, full_text, original_headings) | |
# if not harmonised_results: | |
# return "Failed to generate harmonised titles.", None, None, f"Requests today: {get_request_count()}/400" | |
# # Step 4: Combine and prepare outputs | |
# harmonised_map = {item.original_heading: item.harmonised_heading for item in harmonised_results} | |
# final_data_list = [] | |
# for para in parsed_report.audit_paras: | |
# combined_info = (parsed_report.header.dict() if parsed_report.header else {}) | para.dict() | |
# combined_info['harmonised_audit_para_heading'] = harmonised_map.get(para.audit_para_heading, "N/A") | |
# final_data_list.append(combined_info) | |
# html_output = create_html_report(final_data_list) | |
# # Step 5: Create Excel file for download | |
# df = pd.DataFrame(final_data_list) | |
# excel_columns = [ | |
# 'gstin', 'trade_name', 'category', 'audit_group_number', 'audit_para_number', | |
# 'audit_para_heading', 'harmonised_audit_para_heading', 'revenue_involved_lakhs_rs', | |
# 'revenue_recovered_lakhs_rs', 'status_of_para', 'total_amount_detected_overall_rs', | |
# 'total_amount_recovered_overall_rs' | |
# ] | |
# df = df.reindex(columns=excel_columns).fillna('N/A') | |
# output_excel = BytesIO() | |
# df.to_excel(output_excel, index=False, sheet_name='DAR_Extraction') | |
# output_excel.seek(0) | |
# excel_file_name = "dar_extraction_report.xlsx" | |
# with open(excel_file_name, "wb") as f: | |
# f.write(output_excel.getbuffer()) | |
# return "Processing complete.", html_output, gr.File(value=excel_file_name), f"Requests today: {get_request_count()}/400" | |
# # --- Gradio Interface Definition --- | |
# with gr.Blocks(theme=gr.themes.Soft(), title="DAR Harmonisation Tool") as demo: | |
# # --- Login UI (visible initially) --- | |
# with gr.Column(visible=True) as login_ui: | |
# gr.Markdown("# Audit Officer Login") | |
# gr.Markdown("Please enter the credentials to access the tool.") | |
# with gr.Row(): | |
# username_input = gr.Textbox(label="Username", placeholder="Enter your username") | |
# password_input = gr.Textbox(label="Password", type="password", placeholder="Enter your password") | |
# login_button = gr.Button("Login", variant="primary") | |
# login_error_msg = gr.Markdown(visible=False) | |
# # --- Main App UI (hidden initially) --- | |
# with gr.Column(visible=False) as main_app_ui: | |
# gr.Markdown("# DAR Draft Audit Report Harmonisation Tool") | |
# gr.Markdown("## Initiative by Mumbai Audit 1 Commissionerate") | |
# gr.Markdown( | |
# "Upload a Observation letter to Taxpayer or Departmental Audit Report (DAR) in PDF format. The tool will process it and generate harmonised titles for Audit paras in accordance with GST law." | |
# ) | |
# request_count_output = gr.Textbox(label="Requests Made Today", interactive=False, value="Requests today: 0/400") | |
# with gr.Row(): | |
# with gr.Column(scale=1): | |
# pdf_input = gr.File(label="Upload DAR PDF", file_types=[".pdf"]) | |
# submit_btn = gr.Button("Process Report", variant="primary") | |
# with gr.Column(scale=2): | |
# status_output = gr.Textbox(label="Processing Status", interactive=False) | |
# excel_output = gr.File(label="Download Excel Report") | |
# gr.Markdown("## Harmonised Audit Para Titles") | |
# html_output = gr.HTML() | |
# submit_btn.click( | |
# fn=process_dar_pdf, | |
# inputs=[pdf_input], | |
# outputs=[status_output, html_output, excel_output, request_count_output] | |
# ) | |
# # --- Login Functionality --- | |
# def login(username, password): | |
# """ | |
# Checks user credentials against secrets. | |
# For production, these are loaded from Hugging Face secrets. | |
# """ | |
# auth_username = os.environ.get("APP_USERNAME") | |
# auth_password = os.environ.get("APP_PASSWORD") | |
# is_valid_user = (username == auth_username and password == auth_password) | |
# if is_valid_user: | |
# # Login successful: hide login UI, show main app, display request count | |
# request_count = get_request_count() | |
# return { | |
# login_ui: gr.update(visible=False), | |
# main_app_ui: gr.update(visible=True), | |
# login_error_msg: gr.update(visible=False), | |
# request_count_output: gr.update(value=f"Requests today: {request_count}/400") | |
# } | |
# else: | |
# # Login failed: keep login UI visible, show error message | |
# return { | |
# login_ui: gr.update(visible=True), | |
# main_app_ui: gr.update(visible=False), | |
# login_error_msg: gr.update(value="<p style='color:red;'>Invalid username or password.</p>", visible=True), | |
# request_count_output: gr.update(value="Requests today: 0/400") | |
# } | |
# login_button.click( | |
# login, | |
# inputs=[username_input, password_input], | |
# outputs=[login_ui, main_app_ui, login_error_msg, request_count_output] | |
# ) | |
# if __name__ == "__main__": | |
# demo.launch(debug=True) | |
# # import pandas as pd | |
# # from io import BytesIO | |
# # import os | |
# # # These imports assume the other python files (dar_processor.py, etc.) are in the same directory. | |
# # from dar_processor import preprocess_pdf_text | |
# # from gemini_utils import get_structured_data_with_gemini, get_harmonised_titles | |
# # from models import ParsedDARReport, HarmonisedPara | |
# # def create_html_report(results_with_harmonised: list[dict]) -> str: | |
# # """Generates an HTML string to display the results in a styled table.""" | |
# # if not results_with_harmonised: | |
# # return "<p>No audit paras found or processed.</p>" | |
# # style = """ | |
# # <style> | |
# # body { font-family: sans-serif; } | |
# # .styled-table { | |
# # border-collapse: collapse; margin: 25px 0; font-size: 0.9em; | |
# # min-width: 400px; box-shadow: 0 0 20px rgba(0, 0, 0, 0.15); | |
# # border-radius: 8px; overflow: hidden; | |
# # } | |
# # .styled-table thead tr { background-color: #009879; color: #ffffff; text-align: left; } | |
# # .styled-table th, .styled-table td { padding: 12px 15px; border-bottom: 1px solid #dddddd; } | |
# # .styled-table tbody tr:last-of-type { border-bottom: 2px solid #009879; } | |
# # </style> | |
# # """ | |
# # html = f"{style}<table class='styled-table'><thead><tr><th>Para No.</th><th>Original Audit Para Heading</th><th>Harmonised Audit Para Heading</th><th>Amount Involved (in Lakhs)</th></tr></thead><tbody>" | |
# # for item in results_with_harmonised: | |
# # para_num = item.get('audit_para_number', 'N/A') | |
# # original_heading = item.get('audit_para_heading', 'N/A') | |
# # harmonised_heading = item.get('harmonised_audit_para_heading', 'N/A') | |
# # amount = f"₹{item.get('revenue_involved_lakhs_rs', 0.0):,.2f} L" | |
# # html += f"<tr><td>{para_num}</td><td>{original_heading}</td><td>{harmonised_heading}</td><td>{amount}</td></tr>" | |
# # html += "</tbody></table>" | |
# # return html | |
# # def process_dar_pdf(pdf_file): | |
# # """The main processing function, called after successful login.""" | |
# # gemini_api_key = os.environ.get("GEMINI_API_KEY") | |
# # if not pdf_file: | |
# # return "Please upload a PDF file.", None, None | |
# # if not gemini_api_key: | |
# # return "Error: GEMINI_API_KEY secret not found in Space settings.", None, None | |
# # # Step 1: Process PDF to text | |
# # full_text = preprocess_pdf_text(pdf_file.name) | |
# # if full_text.startswith("Error"): | |
# # return f"Failed to process PDF: {full_text}", None, None | |
# # # Step 2: Extract structured data | |
# # parsed_report = get_structured_data_with_gemini(gemini_api_key, full_text) | |
# # if parsed_report.parsing_errors or not parsed_report.audit_paras: | |
# # error_msg = parsed_report.parsing_errors or "Could not find any audit paras." | |
# # return error_msg, None, None | |
# # # Step 3: Get harmonised titles | |
# # original_headings = [p.audit_para_heading for p in parsed_report.audit_paras if p.audit_para_heading] | |
# # if not original_headings: | |
# # return "Found paras but no headings to harmonise.", None, None | |
# # harmonised_results = get_harmonised_titles(gemini_api_key, full_text, original_headings) | |
# # if not harmonised_results: | |
# # return "Failed to generate harmonised titles.", None, None | |
# # # Step 4: Combine and prepare outputs | |
# # harmonised_map = {item.original_heading: item.harmonised_heading for item in harmonised_results} | |
# # final_data_list = [] | |
# # for para in parsed_report.audit_paras: | |
# # combined_info = (parsed_report.header.dict() if parsed_report.header else {}) | para.dict() | |
# # combined_info['harmonised_audit_para_heading'] = harmonised_map.get(para.audit_para_heading, "N/A") | |
# # final_data_list.append(combined_info) | |
# # html_output = create_html_report(final_data_list) | |
# # # Step 5: Create Excel file for download | |
# # df = pd.DataFrame(final_data_list) | |
# # excel_columns = [ | |
# # 'gstin', 'trade_name', 'category', 'audit_group_number', 'audit_para_number', | |
# # 'audit_para_heading', 'harmonised_audit_para_heading', 'revenue_involved_lakhs_rs', | |
# # 'revenue_recovered_lakhs_rs', 'status_of_para', 'total_amount_detected_overall_rs', | |
# # 'total_amount_recovered_overall_rs' | |
# # ] | |
# # df = df.reindex(columns=excel_columns).fillna('N/A') | |
# # output_excel = BytesIO() | |
# # df.to_excel(output_excel, index=False, sheet_name='DAR_Extraction') | |
# # output_excel.seek(0) | |
# # excel_file_name = "dar_extraction_report.xlsx" | |
# # with open(excel_file_name, "wb") as f: | |
# # f.write(output_excel.getbuffer()) | |
# # return "Processing complete.", html_output, gr.File(value=excel_file_name) | |
# # # --- Gradio Interface Definition --- | |
# # with gr.Blocks(theme=gr.themes.Soft(), title="DAR Harmonisation Tool") as demo: | |
# # # --- Login UI (visible initially) --- | |
# # with gr.Column(visible=True) as login_ui: | |
# # gr.Markdown("# Audit Officer Login") | |
# # gr.Markdown("Please enter the credentials to access the tool.") | |
# # with gr.Row(): | |
# # username_input = gr.Textbox(label="Username", placeholder="Enter your username") | |
# # password_input = gr.Textbox(label="Password", type="password", placeholder="Enter your password") | |
# # login_button = gr.Button("Login", variant="primary") | |
# # login_error_msg = gr.Markdown(visible=False) | |
# # # --- Main App UI (hidden initially) --- | |
# # with gr.Column(visible=False) as main_app_ui: | |
# # gr.Markdown("# DAR Draft Audit Report Harmonisation Tool") | |
# # gr.Markdown("## Initiative by Mumbai Audit 1 Commissionerate") | |
# # gr.Markdown( | |
# # "Upload a Observation letter to taxpayer or Departmental Audit Report (DAR) in PDF format. The tool will process it and generate harmonised titles for Audit paras in accordance with GST law." | |
# # ) | |
# # with gr.Row(): | |
# # with gr.Column(scale=1): | |
# # pdf_input = gr.File(label="Upload DAR PDF", file_types=[".pdf"]) | |
# # submit_btn = gr.Button("Process Report", variant="primary") | |
# # with gr.Column(scale=2): | |
# # status_output = gr.Textbox(label="Processing Status", interactive=False) | |
# # excel_output = gr.File(label="Download Excel Report") | |
# # gr.Markdown("## Harmonised Audit Para Titles") | |
# # html_output = gr.HTML() | |
# # submit_btn.click( | |
# # fn=process_dar_pdf, | |
# # inputs=[pdf_input], | |
# # outputs=[status_output, html_output, excel_output] | |
# # ) | |
# # # --- Login Functionality --- | |
# # def login(username, password): | |
# # """ | |
# # Checks user credentials against secrets. | |
# # For production, these are loaded from Hugging Face secrets. | |
# # """ | |
# # # Get credentials from Hugging Face secrets. | |
# # # Fallback to default values for local testing if secrets are not set. | |
# # auth_username = os.environ.get("APP_USERNAME") | |
# # auth_password = os.environ.get("APP_PASSWORD") | |
# # is_valid_user = (username == auth_username and password == auth_password) | |
# # if is_valid_user: | |
# # # Login successful: hide login UI, show main app | |
# # return { | |
# # login_ui: gr.update(visible=False), | |
# # main_app_ui: gr.update(visible=True), | |
# # login_error_msg: gr.update(visible=False) | |
# # } | |
# # else: | |
# # # Login failed: keep login UI visible, show error message | |
# # return { | |
# # login_ui: gr.update(visible=True), | |
# # main_app_ui: gr.update(visible=False), | |
# # login_error_msg: gr.update(value="<p style='color:red;'>Invalid username or password.</p>", visible=True) | |
# # } | |
# # login_button.click( | |
# # login, | |
# # inputs=[username_input, password_input], | |
# # outputs=[login_ui, main_app_ui, login_error_msg] | |
# # ) | |
# # if __name__ == "__main__": | |
# # demo.launch(debug=True) | |