preview of pdfs added

#1
Files changed (1) hide show
  1. app.py +95 -2
app.py CHANGED
@@ -9,6 +9,17 @@ import base64
9
  import io
10
  from datetime import datetime
11
  import json
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  # Set page config
14
  st.set_page_config(
@@ -27,6 +38,49 @@ This application analyzes SEC filings (10-K, 13F, etc.) to extract:
27
  - Potential violations
28
  """)
29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  # Sidebar for model selection and settings
31
  st.sidebar.header("Analysis Settings")
32
 
@@ -79,6 +133,8 @@ risk_keywords_list = [keyword.strip() for keyword in risk_keywords.split(",")]
79
  # Add confidence threshold slider
80
  confidence_threshold = st.sidebar.slider("Confidence Threshold", 0.0, 1.0, 0.5)
81
 
 
 
82
  # Function to extract text from PDF
83
  @st.cache_data
84
  def extract_text_from_pdf(pdf_file):
@@ -251,9 +307,45 @@ def get_download_link(data, filename, text):
251
  return href
252
 
253
  # File upload
254
- uploaded_file = st.file_uploader("Upload SEC Filing (PDF)", type=["pdf"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
255
 
256
  if uploaded_file:
 
 
257
  with st.spinner("Processing PDF file..."):
258
  # Extract text from PDF
259
  full_text, text_by_page = extract_text_from_pdf(uploaded_file)
@@ -500,4 +592,5 @@ else:
500
  st.markdown("Download structured analysis results for review by your legal and compliance teams.")
501
 
502
  # Add footer with information
503
- st.markdown("---")
 
 
9
  import io
10
  from datetime import datetime
11
  import json
12
+ #below liraries to fix the axios error 403 code
13
+ from pathlib import Path
14
+ import os
15
+
16
+
17
+ #below code to match the docker file config the code worked without this on hugging face so needs to be checked out further
18
+
19
+ #UPLOAD_FOLDER = os.getenv('UPLOAD_FOLDER', '/tmp/uploads')
20
+ #Path(UPLOAD_FOLDER).mkdir(exist_ok=True) # Ensure directory exists
21
+
22
+
23
 
24
  # Set page config
25
  st.set_page_config(
 
38
  - Potential violations
39
  """)
40
 
41
+ # Function to display PDFs
42
+ def display_pdf(file, height=350):
43
+ # Handle both file paths and file-like objects
44
+ if isinstance(file, str):
45
+ # It's a file path
46
+ if os.path.exists(file):
47
+ with open(file, "rb") as f:
48
+ base64_pdf = base64.b64encode(f.read()).decode("utf-8")
49
+ else:
50
+ st.error("Selected PDF not found.")
51
+ return
52
+ else:
53
+ # It's a file-like object (e.g., from file uploader)
54
+ base64_pdf = base64.b64encode(file.read()).decode("utf-8")
55
+ # Reset the file pointer to the beginning for later processing
56
+ file.seek(0)
57
+
58
+ pdf_display = f"""
59
+ <iframe
60
+ src="data:application/pdf;base64,{base64_pdf}"
61
+ width="100%"
62
+ height="{height}px"
63
+ style="border: 1px solid #ccc; border-radius: 10px;"
64
+ type="application/pdf">
65
+ </iframe>
66
+ """
67
+ st.markdown(pdf_display, unsafe_allow_html=True)
68
+
69
+ # Define sample PDFs
70
+ sample_pdfs = {
71
+ "πŸ“„ Meridian Financial Services, Inc. Annual Report (10-K)": "example.pdf",
72
+ "πŸ“„ Annual Report (10-K)": "Mock_Form_10K.pdf",
73
+ "πŸ“Š Sample Investment Holdings (13F)": "Mock_Form_13F.pdf",
74
+ }
75
+
76
+ # Initialize session state for selected PDF
77
+ if "selected_pdf" not in st.session_state:
78
+ st.session_state["selected_pdf"] = list(sample_pdfs.values())[0]
79
+
80
+
81
+
82
+
83
+
84
  # Sidebar for model selection and settings
85
  st.sidebar.header("Analysis Settings")
86
 
 
133
  # Add confidence threshold slider
134
  confidence_threshold = st.sidebar.slider("Confidence Threshold", 0.0, 1.0, 0.5)
135
 
136
+
137
+
138
  # Function to extract text from PDF
139
  @st.cache_data
140
  def extract_text_from_pdf(pdf_file):
 
307
  return href
308
 
309
  # File upload
310
+ # Create two columns for PDF preview and file uploader
311
+ preview_col, upload_col = st.columns([1, 1])
312
+
313
+ with upload_col:
314
+ st.header("Upload Document")
315
+ uploaded_file = st.file_uploader("Upload SEC Filing (PDF)", type=["pdf"])
316
+
317
+ # Sample PDF selector
318
+ st.markdown("### Or choose a sample:")
319
+ sample_cols = st.columns(len(sample_pdfs))
320
+
321
+ for i, (label, file_path) in enumerate(sample_pdfs.items()):
322
+ with sample_cols[i]:
323
+ if st.button(label):
324
+ st.session_state["selected_pdf"] = file_path
325
+ # When a sample is selected, set it as if it was uploaded
326
+ try:
327
+ with open(file_path, "rb") as f:
328
+ file_bytes = f.read()
329
+ uploaded_file = io.BytesIO(file_bytes)
330
+ uploaded_file.name = file_path
331
+ except FileNotFoundError:
332
+ st.error(f"Sample file {file_path} not found.")
333
+
334
+ with preview_col:
335
+ st.header("Document Preview")
336
+ # Display uploaded file or selected sample
337
+ if uploaded_file:
338
+ display_pdf(uploaded_file, height=400)
339
+ elif st.session_state["selected_pdf"]:
340
+ display_pdf(st.session_state["selected_pdf"], height=400)
341
+ else:
342
+ st.info("Upload a PDF or select a sample to preview.")
343
+
344
+
345
 
346
  if uploaded_file:
347
+ if hasattr(uploaded_file, 'seek'):
348
+ uploaded_file.seek(0)
349
  with st.spinner("Processing PDF file..."):
350
  # Extract text from PDF
351
  full_text, text_by_page = extract_text_from_pdf(uploaded_file)
 
592
  st.markdown("Download structured analysis results for review by your legal and compliance teams.")
593
 
594
  # Add footer with information
595
+ st.markdown("---")
596
+ st.markdown("Regulatory Report Checker - NLP-powered document analysis for compliance teams")