Spaces:
Running
Running
import os | |
import time | |
import hashlib | |
import json | |
from dotenv import load_dotenv | |
import streamlit as st | |
from langchain_community.vectorstores import FAISS | |
from langchain_community.embeddings import HuggingFaceEmbeddings | |
from langchain.prompts import PromptTemplate | |
from langchain_together import Together | |
from langchain.memory import ConversationBufferWindowMemory | |
from langchain.chains import ConversationalRetrievalChain | |
from PyPDF2 import PdfReader, PdfWriter | |
from io import BytesIO | |
from reportlab.pdfgen import canvas | |
from reportlab.graphics.barcode import code128 | |
from reportlab.lib.pagesizes import letter | |
from reportlab.lib.units import mm | |
load_dotenv() | |
st.set_page_config(page_title="LawGPT", layout="wide") | |
st.markdown(""" | |
<style> | |
body, .stApp { | |
background-color: #0f172a; | |
color: #f8fafc; | |
font-family: 'Segoe UI', sans-serif; | |
} | |
.block-container { | |
padding: 1rem; | |
max-width: 100%; | |
} | |
.stButton > button { | |
background-color: #3b82f6; | |
color: white; | |
border: none; | |
border-radius: 8px; | |
padding: 0.75em 2em; | |
font-size: 1.1rem; | |
font-weight: 600; | |
transition: 0.3s; | |
width: 100%; | |
} | |
.stButton > button:hover { | |
background-color: #2563eb; | |
} | |
@media screen and (max-width: 768px) { | |
.role-buttons { | |
flex-direction: column; | |
gap: 1rem; | |
} | |
.logo-img { | |
width: 70% !important; | |
} | |
} | |
.role-buttons { | |
display: flex; | |
justify-content: center; | |
align-items: center; | |
gap: 2rem; | |
margin-top: 3rem; | |
flex-wrap: wrap; | |
} | |
.logo-center { | |
display: flex; | |
justify-content: center; | |
align-items: center; | |
margin-top: 1rem; | |
margin-bottom: 2rem; | |
} | |
.logo-img { | |
width: 25%; | |
max-width: 250px; | |
height: auto; | |
} | |
.judge-badge { | |
background-color: #991b1b; | |
color: white; | |
padding: 5px 10px; | |
border-radius: 12px; | |
font-weight: 600; | |
display: inline-block; | |
margin-bottom: 10px; | |
} | |
.judgment-card { | |
background-color: #1e293b; | |
border-radius: 8px; | |
padding: 20px; | |
margin-bottom: 20px; | |
border-left: 4px solid #991b1b; | |
} | |
</style> | |
""", unsafe_allow_html=True) | |
st.markdown(""" | |
<div class="logo-center"> | |
<img class="logo-img" src="https://github.com/harshitv804/LawGPT/assets/100853494/ecff5d3c-f105-4ba2-a93a-500282f0bf00" /> | |
</div> | |
""", unsafe_allow_html=True) | |
if "role" not in st.session_state: | |
st.session_state.role = None | |
if "authenticated" not in st.session_state: | |
st.session_state.authenticated = False | |
if st.session_state.role is None: | |
st.markdown("<h2 style='text-align: center;'>Who are you?</h2>", unsafe_allow_html=True) | |
col1, col2, col3 = st.columns([1, 3, 1]) | |
with col2: | |
col_a, col_b, col_c = st.columns(3) | |
with col_a: | |
if st.button("🧑 I am a Civilian"): | |
st.session_state.role = "civilian" | |
st.session_state.authenticated = True | |
st.rerun() | |
with col_b: | |
if st.button("⚖️ I am a Court Stakeholder"): | |
st.session_state.role = "stakeholder" | |
st.rerun() | |
with col_c: | |
if st.button("👨⚖️ I am a Judge"): | |
st.session_state.role = "judge" | |
st.rerun() | |
if (st.session_state.role == "stakeholder" or st.session_state.role == "judge") and not st.session_state.authenticated: | |
st.markdown(f"### 🔐 {'Judge' if st.session_state.role == 'judge' else 'Stakeholder'} Login") | |
username = st.text_input("Username") | |
password = st.text_input("Password", type="password") | |
if st.button("Login"): | |
if username == "admin" and password == "1234": | |
st.success("Login successful!") | |
st.session_state.authenticated = True | |
st.rerun() | |
else: | |
st.error("Invalid credentials.") | |
if st.session_state.role and (st.session_state.role == "civilian" or st.session_state.authenticated): | |
if st.button("🔙 Back to Home"): | |
st.session_state.role = None | |
st.session_state.authenticated = False | |
st.rerun() | |
tabs = ["📘 LawGPT"] | |
if st.session_state.role == "judge": | |
tabs.extend(["👨⚖️ Judge Console", "📜 Previous Judgments"]) | |
elif st.session_state.role == "stakeholder": | |
tabs.extend(["📝 Document Signer", "🔍 Verify Document"]) | |
selected_tab = st.tabs(tabs) | |
# Load embeddings and DB for all roles | |
embeddings = HuggingFaceEmbeddings( | |
model_name="nomic-ai/nomic-embed-text-v1", | |
model_kwargs={"trust_remote_code": True, "revision": "289f532e14dbbbd5a04753fa58739e9ba766f3c7"} | |
) | |
db = FAISS.load_local("ipc_vector_db", embeddings, allow_dangerous_deserialization=True) | |
db_retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 4}) | |
# Common LLM setup | |
llm = Together( | |
model="mistralai/Mistral-7B-Instruct-v0.2", | |
temperature=0.5, | |
max_tokens=1024, | |
together_api_key=os.getenv("TOGETHER_API_KEY") | |
) | |
# LawGPT Tab for all roles | |
if "📘 LawGPT" in tabs: | |
with selected_tab[0]: | |
st.markdown("## 💬 Your Legal AI Lawyer") | |
st.markdown("### Ask any legal question related to the Indian Penal Code (IPC)") | |
st.markdown("Questions might be of types like: Suppose a 16 year old is drinking and driving, and hit a pedestrian on the road. What are the possible case laws imposed and give any one previous court decisions on the same.") | |
def reset_conversation(): | |
st.session_state.messages = [] | |
st.session_state.memory.clear() | |
if "messages" not in st.session_state: | |
st.session_state.messages = [] | |
if "memory" not in st.session_state: | |
st.session_state.memory = ConversationBufferWindowMemory( | |
k=2, memory_key="chat_history", return_messages=True | |
) | |
prompt_template = """<s>[INST]You are a legal chatbot that answers questions about the Indian Penal Code (IPC). | |
Provide clear, concise, and accurate responses based on context and user's question. | |
Avoid extra details or assumptions. Focus only on legal information. | |
CONTEXT: {context} | |
CHAT HISTORY: {chat_history} | |
QUESTION: {question} | |
ANSWER: | |
</s>[INST]""" | |
prompt = PromptTemplate( | |
template=prompt_template, | |
input_variables=["context", "question", "chat_history"] | |
) | |
qa = ConversationalRetrievalChain.from_llm( | |
llm=llm, | |
memory=st.session_state.memory, | |
retriever=db_retriever, | |
combine_docs_chain_kwargs={ | |
'prompt': prompt, | |
'document_variable_name': 'context' | |
} | |
) | |
chat_placeholder = st.empty() | |
with chat_placeholder.container(): | |
for msg in st.session_state.messages: | |
with st.chat_message(msg["role"]): | |
st.write(msg["content"]) | |
input_prompt = st.chat_input("Ask a legal question...") | |
if input_prompt: | |
with st.chat_message("user"): | |
st.write(input_prompt) | |
st.session_state.messages.append({"role": "user", "content": input_prompt}) | |
with st.chat_message("assistant"): | |
with st.status("Thinking 💡", expanded=True): | |
result = qa.invoke(input=input_prompt) | |
message_placeholder = st.empty() | |
full_response = "⚠️ **_Note: Information provided may be inaccurate._**\n\n" | |
for chunk in result["answer"]: | |
full_response += chunk | |
time.sleep(0.02) | |
message_placeholder.markdown(full_response + " ▌") | |
message_placeholder.markdown(full_response) | |
st.session_state.messages.append({"role": "assistant", "content": result["answer"]}) | |
st.button("🔄 Reset Chat", on_click=reset_conversation) | |
# Judge Console Tab | |
if st.session_state.role == "judge": | |
# Initialize judgment storage | |
if "judgments" not in st.session_state: | |
st.session_state.judgments = [] | |
# Load existing judgments if file exists | |
try: | |
with open("judgments.json", "r") as f: | |
st.session_state.judgments = json.load(f) | |
except (FileNotFoundError, json.JSONDecodeError): | |
pass | |
with selected_tab[1]: | |
st.markdown("## 👨⚖️ Judge's Decision Console") | |
st.markdown("### Enter case details for analysis and judgment") | |
# Input fields for case details | |
st.subheader("Case Information") | |
case_number = st.text_input("Case Number/ID") | |
case_title = st.text_input("Case Title") | |
plaintiff = st.text_input("Plaintiff/Prosecution") | |
defendant = st.text_input("Defendant/Accused") | |
# Case facts and context | |
st.subheader("Case Details") | |
case_facts = st.text_area("Enter detailed facts of the case:", height=200) | |
# Get relevant laws/sections that apply | |
relevant_laws = st.text_area("Relevant IPC Sections (if known):", | |
placeholder="e.g. Section 302, Section 376, etc.") | |
col1, col2 = st.columns(2) | |
with col1: | |
case_type = st.selectbox("Case Type", [ | |
"Criminal", "Civil", "Family", "Property", "Cyber Crime", | |
"Corporate", "Intellectual Property", "Other" | |
]) | |
with col2: | |
case_priority = st.select_slider("Case Priority", | |
options=["Low", "Medium", "High", "Urgent"]) | |
if st.button("Generate Judgment"): | |
if not case_facts: | |
st.error("Please enter the case facts to generate a judgment.") | |
else: | |
with st.status("Analyzing case and formulating judgment...", expanded=True): | |
# Create a prompt for legal judgment | |
judge_prompt_template = """<s>[INST]You are an experienced Indian judge making a legal judgment based on the Indian Penal Code (IPC). | |
Review the case details and provide a comprehensive legal judgment. | |
CASE NUMBER: {case_number} | |
CASE TITLE: {case_title} | |
PLAINTIFF/PROSECUTION: {plaintiff} | |
DEFENDANT/ACCUSED: {defendant} | |
CASE TYPE: {case_type} | |
CASE FACTS: {case_facts} | |
RELEVANT IPC SECTIONS: {relevant_laws} | |
Your judgment should follow this structure: | |
1. Summary of the case | |
2. Facts of the case | |
3. Legal issues involved | |
4. Analysis of applicable laws and precedents | |
5. Reasoning and findings | |
6. Final judgment and orders | |
7. Any remedies or penalties imposed | |
Be impartial, consider only facts and relevant laws, and make a fair judgment. | |
</s>[INST]""" | |
judge_prompt = PromptTemplate( | |
template=judge_prompt_template, | |
input_variables=["case_number", "case_title", "plaintiff", "defendant", | |
"case_type", "case_facts", "relevant_laws"] | |
) | |
# Format prompt with case details | |
formatted_prompt = judge_prompt.format( | |
case_number=case_number if case_number else "Unassigned", | |
case_title=case_title if case_title else "Unnamed Case", | |
plaintiff=plaintiff if plaintiff else "Unspecified", | |
defendant=defendant if defendant else "Unspecified", | |
case_type=case_type, | |
case_facts=case_facts, | |
relevant_laws=relevant_laws if relevant_laws else "To be determined" | |
) | |
# Generate judgment using LLM | |
judgment_result = llm.invoke(formatted_prompt) | |
# Save judgment to session state | |
timestamp = time.strftime("%Y-%m-%d %H:%M:%S") | |
judgment_data = { | |
"id": hashlib.md5(f"{case_title}{timestamp}".encode()).hexdigest()[:8], | |
"case_number": case_number if case_number else "Unassigned", | |
"case_title": case_title if case_title else "Unnamed Case", | |
"plaintiff": plaintiff, | |
"defendant": defendant, | |
"case_type": case_type, | |
"priority": case_priority, | |
"facts": case_facts, | |
"relevant_laws": relevant_laws, | |
"judgment": judgment_result, | |
"timestamp": timestamp | |
} | |
st.session_state.judgments.append(judgment_data) | |
# Save judgments to file | |
with open("judgments.json", "w") as f: | |
json.dump(st.session_state.judgments, f) | |
# Display the judgment | |
st.markdown("### Judgment Generated") | |
with st.container(): | |
st.markdown(f"<div class='judgment-card'>", unsafe_allow_html=True) | |
st.markdown(f"<div class='judge-badge'>JUDGMENT #{judgment_data['id']}</div>", unsafe_allow_html=True) | |
st.markdown(f"**Case**: {judgment_data['case_title']}") | |
st.markdown(f"**Date**: {judgment_data['timestamp']}") | |
st.markdown("---") | |
st.markdown(judgment_data['judgment']) | |
st.markdown("</div>", unsafe_allow_html=True) | |
# Download judgment as PDF | |
if st.button("📥 Download Judgment as PDF"): | |
# Generate PDF with ReportLab | |
pdf_buffer = BytesIO() | |
c = canvas.Canvas(pdf_buffer, pagesize=letter) | |
width, height = letter | |
# Header | |
c.setFont("Helvetica-Bold", 16) | |
c.drawString(72, height - 72, f"JUDGMENT #{judgment_data['id']}") | |
# Case details | |
c.setFont("Helvetica-Bold", 12) | |
c.drawString(72, height - 100, f"Case: {judgment_data['case_title']}") | |
c.setFont("Helvetica", 10) | |
c.drawString(72, height - 115, f"Case Number: {judgment_data['case_number']}") | |
c.drawString(72, height - 130, f"Date: {judgment_data['timestamp']}") | |
c.drawString(72, height - 145, f"Plaintiff/Prosecution: {judgment_data['plaintiff']}") | |
c.drawString(72, height - 160, f"Defendant/Accused: {judgment_data['defendant']}") | |
c.drawString(72, height - 175, f"Case Type: {judgment_data['case_type']}") | |
# Line separator | |
c.line(72, height - 190, width - 72, height - 190) | |
# Format judgment text | |
judgment_text = judgment_data['judgment'] | |
text_object = c.beginText(72, height - 210) | |
text_object.setFont("Times-Roman", 10) | |
# Wrap text to fit page | |
lines = [] | |
for paragraph in judgment_text.split('\n\n'): | |
# Replace single newlines with spaces for proper wrapping | |
paragraph = paragraph.replace('\n', ' ') | |
# Simple word wrap | |
words = paragraph.split() | |
line = '' | |
for word in words: | |
if len(line + ' ' + word) <= 90: # character limit per line | |
line += ' ' + word if line else word | |
else: | |
lines.append(line) | |
line = word | |
if line: | |
lines.append(line) | |
# Add blank line between paragraphs | |
lines.append('') | |
# Add lines to text object with pagination | |
line_height = 12 | |
lines_per_page = 50 | |
current_line = 0 | |
for line in lines: | |
if current_line >= lines_per_page: | |
c.drawText(text_object) | |
c.showPage() | |
text_object = c.beginText(72, height - 72) | |
text_object.setFont("Times-Roman", 10) | |
current_line = 0 | |
text_object.textLine(line) | |
current_line += 1 | |
c.drawText(text_object) | |
# Add footer with page numbers | |
c.saveState() | |
c.setFont("Helvetica", 8) | |
c.drawString(width/2 - 40, 30, f"Generated by LawGPT Judge") | |
c.restoreState() | |
# Final page with signature | |
c.showPage() | |
c.setFont("Helvetica-Bold", 12) | |
c.drawString(72, height - 100, "OFFICIAL JUDGMENT") | |
c.setFont("Helvetica", 10) | |
c.drawString(72, height - 130, f"Case #{judgment_data['id']} - {judgment_data['case_title']}") | |
c.drawString(72, height - 150, f"Date: {judgment_data['timestamp']}") | |
# Add barcode for authenticity | |
barcode = code128.Code128(f"JUDGMENT-{judgment_data['id']}", barHeight=10 * mm, barWidth=0.4) | |
barcode.drawOn(c, 72, 100) | |
# Add signature line | |
c.line(width - 200, 70, width - 72, 70) | |
c.drawString(width - 180, 60, "Judge's Signature") | |
c.save() | |
pdf_buffer.seek(0) | |
# Offer download | |
st.download_button( | |
label="📥 Download Generated PDF", | |
data=pdf_buffer, | |
file_name=f"judgment_{judgment_data['id']}_{judgment_data['case_title'].replace(' ', '_')}.pdf", | |
mime="application/pdf" | |
) | |
st.success("Judgment has been saved to the system.") | |
# Previous Judgments Tab | |
with selected_tab[2]: | |
st.markdown("## 📜 Previous Judgments") | |
st.markdown("### Review and search past judgments") | |
# Search and filter | |
search_term = st.text_input("Search judgments:", placeholder="Enter case title, number, plaintiff, etc.") | |
col1, col2 = st.columns(2) | |
with col1: | |
filter_type = st.multiselect("Filter by case type:", | |
options=["All"] + ["Criminal", "Civil", "Family", "Property", "Cyber Crime", | |
"Corporate", "Intellectual Property", "Other"], | |
default=["All"]) | |
with col2: | |
sort_by = st.selectbox("Sort by:", options=["Most recent", "Oldest first", "Case title (A-Z)"]) | |
# Display judgments based on filters | |
if len(st.session_state.judgments) == 0: | |
st.info("No judgments recorded yet. Use the Judge Console to create judgments.") | |
else: | |
# Filter judgments | |
filtered_judgments = st.session_state.judgments | |
# Apply search term filter | |
if search_term: | |
filtered_judgments = [j for j in filtered_judgments if | |
search_term.lower() in j['case_title'].lower() or | |
search_term.lower() in j['case_number'].lower() or | |
search_term.lower() in j['plaintiff'].lower() or | |
search_term.lower() in j['defendant'].lower() or | |
search_term.lower() in j.get('relevant_laws', '').lower()] | |
# Apply case type filter | |
if "All" not in filter_type: | |
filtered_judgments = [j for j in filtered_judgments if j['case_type'] in filter_type] | |
# Apply sorting | |
if sort_by == "Most recent": | |
filtered_judgments = sorted(filtered_judgments, key=lambda x: x['timestamp'], reverse=True) | |
elif sort_by == "Oldest first": | |
filtered_judgments = sorted(filtered_judgments, key=lambda x: x['timestamp']) | |
elif sort_by == "Case title (A-Z)": | |
filtered_judgments = sorted(filtered_judgments, key=lambda x: x['case_title']) | |
# Display judgments | |
for judgment in filtered_judgments: | |
with st.expander(f"**{judgment['case_title']}** - {judgment['timestamp']}"): | |
st.markdown(f"<div class='judge-badge'>JUDGMENT #{judgment['id']}</div>", unsafe_allow_html=True) | |
st.markdown(f"**Case Number**: {judgment['case_number']}") | |
st.markdown(f"**Plaintiff**: {judgment['plaintiff']}") | |
st.markdown(f"**Defendant**: {judgment['defendant']}") | |
st.markdown(f"**Case Type**: {judgment['case_type']} (Priority: {judgment['priority']})") | |
st.markdown("#### Case Facts") | |
st.markdown(judgment['facts']) | |
if judgment.get('relevant_laws'): | |
st.markdown("#### Relevant Laws Applied") | |
st.markdown(judgment['relevant_laws']) | |
st.markdown("#### Full Judgment") | |
st.markdown("---") | |
st.markdown(judgment['judgment']) | |
# Button to download individual judgment as PDF | |
if st.button(f"📥 Download PDF", key=f"download_{judgment['id']}"): | |
# Generate PDF with ReportLab | |
pdf_buffer = BytesIO() | |
c = canvas.Canvas(pdf_buffer, pagesize=letter) | |
width, height = letter | |
# Header | |
c.setFont("Helvetica-Bold", 16) | |
c.drawString(72, height - 72, f"JUDGMENT #{judgment['id']}") | |
# Case details | |
c.setFont("Helvetica-Bold", 12) | |
c.drawString(72, height - 100, f"Case: {judgment['case_title']}") | |
c.setFont("Helvetica", 10) | |
c.drawString(72, height - 115, f"Case Number: {judgment['case_number']}") | |
c.drawString(72, height - 130, f"Date: {judgment['timestamp']}") | |
c.drawString(72, height - 145, f"Plaintiff/Prosecution: {judgment['plaintiff']}") | |
c.drawString(72, height - 160, f"Defendant/Accused: {judgment['defendant']}") | |
c.drawString(72, height - 175, f"Case Type: {judgment['case_type']}") | |
# Line separator | |
c.line(72, height - 190, width - 72, height - 190) | |
# Format judgment text | |
judgment_text = judgment['judgment'] | |
text_object = c.beginText(72, height - 210) | |
text_object.setFont("Times-Roman", 10) | |
# Wrap text to fit page | |
lines = [] | |
for paragraph in judgment_text.split('\n\n'): | |
# Replace single newlines with spaces for proper wrapping | |
paragraph = paragraph.replace('\n', ' ') | |
# Simple word wrap | |
words = paragraph.split() | |
line = '' | |
for word in words: | |
if len(line + ' ' + word) <= 90: # character limit per line | |
line += ' ' + word if line else word | |
else: | |
lines.append(line) | |
line = word | |
if line: | |
lines.append(line) | |
# Add blank line between paragraphs | |
lines.append('') | |
# Add lines to text object with pagination | |
line_height = 12 | |
lines_per_page = 50 | |
current_line = 0 | |
for line in lines: | |
if current_line >= lines_per_page: | |
c.drawText(text_object) | |
c.showPage() | |
text_object = c.beginText(72, height - 72) | |
text_object.setFont("Times-Roman", 10) | |
current_line = 0 | |
text_object.textLine(line) | |
current_line += 1 | |
c.drawText(text_object) | |
# Add footer with page numbers | |
c.saveState() | |
c.setFont("Helvetica", 8) | |
c.drawString(width/2 - 40, 30, f"Generated by LawGPT Judge") | |
c.restoreState() | |
# Final page with signature | |
c.showPage() | |
c.setFont("Helvetica-Bold", 12) | |
c.drawString(72, height - 100, "OFFICIAL JUDGMENT") | |
c.setFont("Helvetica", 10) | |
c.drawString(72, height - 130, f"Case #{judgment['id']} - {judgment['case_title']}") | |
c.drawString(72, height - 150, f"Date: {judgment['timestamp']}") | |
# Add barcode for authenticity | |
barcode = code128.Code128(f"JUDGMENT-{judgment['id']}", barHeight=10 * mm, barWidth=0.4) | |
barcode.drawOn(c, 72, 100) | |
# Add signature line | |
c.line(width - 200, 70, width - 72, 70) | |
c.drawString(width - 180, 60, "Judge's Signature") | |
c.save() | |
pdf_buffer.seek(0) | |
# Offer download | |
st.download_button( | |
label="📥 Download Generated PDF", | |
data=pdf_buffer, | |
file_name=f"judgment_{judgment['id']}_{judgment['case_title'].replace(' ', '_')}.pdf", | |
mime="application/pdf", | |
key=f"pdf_{judgment['id']}" | |
) | |
# Stakeholder tabs | |
if st.session_state.role == "stakeholder": | |
if "📝 Document Signer" in tabs: | |
with selected_tab[1]: | |
st.markdown("## 📝 Upload and Sign Document") | |
uploaded_file = st.file_uploader("Choose a file to sign", type=["pdf"]) | |
signer_name = st.text_input("Enter your name (Signer):") | |
if uploaded_file and signer_name: | |
file_content = uploaded_file.read() | |
input_pdf = BytesIO(file_content) | |
output_pdf = BytesIO() | |
reader = PdfReader(input_pdf) | |
writer = PdfWriter() | |
for page in reader.pages: | |
page_width = float(page.mediabox.width) | |
page_height = float(page.mediabox.height) | |
packet = BytesIO() | |
can = canvas.Canvas(packet, pagesize=(page_width, page_height)) | |
barcode = code128.Code128(signer_name, barHeight=10 * mm, barWidth=0.4) | |
barcode.drawOn(can, 50, 50) | |
can.setFont("Helvetica", 10) | |
can.drawString(50, 40, f"Signed by: {signer_name}") | |
can.save() | |
packet.seek(0) | |
overlay = PdfReader(packet).pages[0] | |
page.merge_page(overlay) | |
writer.add_page(page) | |
writer.write(output_pdf) | |
output_pdf.seek(0) | |
st.download_button("📅 Download Signed Document", output_pdf, file_name=f"signed_{uploaded_file.name}", mime="application/pdf") | |
if "🔍 Verify Document" in tabs: | |
with selected_tab[2]: | |
st.markdown("## 🔍 Verify Document Authentication") | |
st.markdown("Upload any document to verify its integrity and authenticity.") | |
verify_file = st.file_uploader("Upload PDF for verification", type=["pdf"], key="verify") | |
if verify_file: | |
content = verify_file.read() | |
try: | |
# Basic PDF validation | |
pdf = PdfReader(BytesIO(content)) | |
# Extract text to look for signature markers | |
all_text = "" | |
for page in pdf.pages: | |
all_text += page.extract_text() or "" | |
# Check for digital signature information | |
has_signature_text = any(sig_text in all_text.lower() for sig_text in | |
["signed by:", "digital signature", "electronic signature"]) | |
# Create document fingerprint/hash | |
doc_hash = hashlib.sha256(content).hexdigest() | |
# Calculate metadata integrity | |
metadata_valid = True | |
if pdf.metadata: | |
try: | |
# Check for suspicious metadata modifications | |
creation_date = pdf.metadata.get('/CreationDate', '') | |
mod_date = pdf.metadata.get('/ModDate', '') | |
if mod_date and creation_date: | |
metadata_valid = mod_date >= creation_date | |
except: | |
metadata_valid = False | |
# Check for content consistency | |
content_consistent = True | |
col1, col2 = st.columns(2) | |
with col1: | |
st.subheader("Document Analysis") | |
st.info(f"📄 Pages: {len(pdf.pages)}") | |
st.info(f"🔒 Contains signature markers: {'Yes' if has_signature_text else 'No'}") | |
# Display hash for document tracking | |
st.code(f"Document Hash: {doc_hash[:16]}...{doc_hash[-16:]}") | |
# Document size and characteristics | |
file_size = len(content) / 1024 # KB | |
st.info(f"📦 File size: {file_size:.2f} KB") | |
with col2: | |
st.subheader("Verification Results") | |
# Case 1: Document has signature markers | |
if has_signature_text: | |
if metadata_valid and content_consistent: | |
st.success("✅ Document Status: VERIFIED AUTHENTIC") | |
st.markdown("- ✓ Valid PDF structure") | |
st.markdown("- ✓ Signature information detected") | |
st.markdown("- ✓ No tampering indicators found") | |
st.markdown("- ✓ Metadata consistency verified") | |
else: | |
st.warning("⚠️ Document Status: POTENTIALLY MODIFIED") | |
st.markdown("- ✓ Valid PDF structure") | |
st.markdown("- ✓ Signature information found") | |
st.markdown("- ❌ Some integrity checks failed") | |
if not metadata_valid: | |
st.markdown("- ❌ Metadata inconsistencies detected") | |
# Display signature extraction if present | |
signature_line = next((line for line in all_text.split('\n') if "signed by:" in line.lower()), "") | |
if signature_line: | |
st.info(f"📝 {signature_line.strip()}") | |
# Case 2: Document without signatures | |
else: | |
if metadata_valid and content_consistent: | |
st.success("✅ Document Status: VALID DOCUMENT") | |
st.markdown("- ✓ Valid PDF structure") | |
st.markdown("- ✓ Content integrity verified") | |
st.markdown("- ✓ No tampering indicators found") | |
st.markdown("- ℹ️ No signature information found (this is not an error)") | |
else: | |
st.warning("⚠️ Document Status: POTENTIALLY MODIFIED") | |
st.markdown("- ✓ Valid PDF structure") | |
st.markdown("- ❌ Some integrity checks failed") | |
if not metadata_valid: | |
st.markdown("- ❌ Metadata inconsistencies detected") | |
# Advanced options | |
with st.expander("🔬 Advanced Verification Details"): | |
st.markdown("### Document Metadata") | |
if pdf.metadata: | |
for key, value in pdf.metadata.items(): | |
if key and value and key not in ('/CreationDate', '/ModDate'): | |
st.text(f"{key}: {value}") | |
else: | |
st.text("No metadata available") | |
st.markdown("### Integrity Timeline") | |
st.text(f"Creation Date: {pdf.metadata.get('/CreationDate', 'Not available')}") | |
st.text(f"Last Modified: {pdf.metadata.get('/ModDate', 'Not available')}") | |
# Additional verification for content integrity | |
st.markdown("### Content Analysis") | |
fonts_used = set() | |
image_count = 0 | |
for page in pdf.pages: | |
if "/Font" in page["/Resources"]: | |
for font in page["/Resources"]["/Font"]: | |
fonts_used.add(str(font)) | |
if "/XObject" in page["/Resources"]: | |
for obj in page["/Resources"]["/XObject"]: | |
if "/Subtype" in page["/Resources"]["/XObject"][obj] and \ | |
page["/Resources"]["/XObject"][obj]["/Subtype"] == "/Image": | |
image_count += 1 | |
st.text(f"Fonts detected: {len(fonts_used)}") | |
st.text(f"Images detected: {image_count}") | |
except Exception as e: | |
st.error(f"❌ Document Status: INVALID OR CORRUPTED") | |
st.markdown(f"Error: Could not process the document properly. The file may be corrupted or not a valid PDF.") | |
st.markdown(f"Technical details: {str(e)}") |