Spaces:

koulsahil
/

Regulatory_Document_Analyzer

Sleeping

App Files Files Community

preview of pdfs added

by koulsahil - opened Apr 10

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

+95

-2

Files changed (1) hide show

app.py +95 -2

app.py CHANGED Viewed

@@ -9,6 +9,17 @@ import base64
 import io
 from datetime import datetime
 import json
 # Set page config
 st.set_page_config(
@@ -27,6 +38,49 @@ This application analyzes SEC filings (10-K, 13F, etc.) to extract:
 - Potential violations
 """)
 # Sidebar for model selection and settings
 st.sidebar.header("Analysis Settings")
@@ -79,6 +133,8 @@ risk_keywords_list = [keyword.strip() for keyword in risk_keywords.split(",")]
 # Add confidence threshold slider
 confidence_threshold = st.sidebar.slider("Confidence Threshold", 0.0, 1.0, 0.5)
 # Function to extract text from PDF
 @st.cache_data
 def extract_text_from_pdf(pdf_file):
@@ -251,9 +307,45 @@ def get_download_link(data, filename, text):
     return href
 # File upload
-uploaded_file = st.file_uploader("Upload SEC Filing (PDF)", type=["pdf"])
 if uploaded_file:
     with st.spinner("Processing PDF file..."):
         # Extract text from PDF
         full_text, text_by_page = extract_text_from_pdf(uploaded_file)
@@ -500,4 +592,5 @@ else:
         st.markdown("Download structured analysis results for review by your legal and compliance teams.")
 # Add footer with information
-st.markdown("---")

 import io
 from datetime import datetime
 import json
+#below liraries to fix the axios error 403 code
+from pathlib import Path
+import os
+#below code to match the docker file config the code worked without this on hugging face so needs to be checked out further
+#UPLOAD_FOLDER = os.getenv('UPLOAD_FOLDER', '/tmp/uploads')
+#Path(UPLOAD_FOLDER).mkdir(exist_ok=True)  # Ensure directory exists
 # Set page config
 st.set_page_config(
 - Potential violations
 """)
+# Function to display PDFs
+def display_pdf(file, height=350):
+    # Handle both file paths and file-like objects
+    if isinstance(file, str):
+        # It's a file path
+        if os.path.exists(file):
+            with open(file, "rb") as f:
+                base64_pdf = base64.b64encode(f.read()).decode("utf-8")
+        else:
+            st.error("Selected PDF not found.")
+            return
+    else:
+        # It's a file-like object (e.g., from file uploader)
+        base64_pdf = base64.b64encode(file.read()).decode("utf-8")
+        # Reset the file pointer to the beginning for later processing
+        file.seek(0)
+    pdf_display = f"""
+    <iframe
+        src="data:application/pdf;base64,{base64_pdf}"
+        width="100%"
+        height="{height}px"
+        style="border: 1px solid #ccc; border-radius: 10px;"
+        type="application/pdf">
+    </iframe>
+    """
+    st.markdown(pdf_display, unsafe_allow_html=True)
+# Define sample PDFs
+sample_pdfs = {
+    "📄 Meridian Financial Services, Inc. Annual Report (10-K)": "example.pdf",
+    "📄 Annual Report (10-K)": "Mock_Form_10K.pdf",
+    "📊 Sample Investment Holdings (13F)": "Mock_Form_13F.pdf",
+}
+# Initialize session state for selected PDF
+if "selected_pdf" not in st.session_state:
+    st.session_state["selected_pdf"] = list(sample_pdfs.values())[0]
 # Sidebar for model selection and settings
 st.sidebar.header("Analysis Settings")
 # Add confidence threshold slider
 confidence_threshold = st.sidebar.slider("Confidence Threshold", 0.0, 1.0, 0.5)
 # Function to extract text from PDF
 @st.cache_data
 def extract_text_from_pdf(pdf_file):
     return href
 # File upload
+# Create two columns for PDF preview and file uploader
+preview_col, upload_col = st.columns([1, 1])
+with upload_col:
+    st.header("Upload Document")
+    uploaded_file = st.file_uploader("Upload SEC Filing (PDF)", type=["pdf"])
+    # Sample PDF selector
+    st.markdown("### Or choose a sample:")
+    sample_cols = st.columns(len(sample_pdfs))
+    for i, (label, file_path) in enumerate(sample_pdfs.items()):
+        with sample_cols[i]:
+            if st.button(label):
+                st.session_state["selected_pdf"] = file_path
+                # When a sample is selected, set it as if it was uploaded
+                try:
+                    with open(file_path, "rb") as f:
+                        file_bytes = f.read()
+                    uploaded_file = io.BytesIO(file_bytes)
+                    uploaded_file.name = file_path
+                except FileNotFoundError:
+                    st.error(f"Sample file {file_path} not found.")
+with preview_col:
+    st.header("Document Preview")
+    # Display uploaded file or selected sample
+    if uploaded_file:
+        display_pdf(uploaded_file, height=400)
+    elif st.session_state["selected_pdf"]:
+        display_pdf(st.session_state["selected_pdf"], height=400)
+    else:
+        st.info("Upload a PDF or select a sample to preview.")
 if uploaded_file:
+    if hasattr(uploaded_file, 'seek'):
+        uploaded_file.seek(0)
     with st.spinner("Processing PDF file..."):
         # Extract text from PDF
         full_text, text_by_page = extract_text_from_pdf(uploaded_file)
         st.markdown("Download structured analysis results for review by your legal and compliance teams.")
 # Add footer with information
+st.markdown("---")
+st.markdown("Regulatory Report Checker - NLP-powered document analysis for compliance teams")