Spaces:
Running
Running
Upload 9 files
Browse files- .env +1 -0
- .gitattributes +2 -0
- Ingest.py +19 -0
- LICENSE +21 -0
- README.md +5 -5
- app.py +378 -0
- data/ipc_law.pdf +3 -0
- ipc_vector_db/index.faiss +3 -0
- ipc_vector_db/index.pkl +3 -0
- requirements.txt +10 -0
.env
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
TOGETHER_API_KEY=tgp_v1_HRziHodmDceCIkIlrzYQ4rFLodMSZ03O_TGuu69NZWk
|
.gitattributes
CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
data/ipc_law.pdf filter=lfs diff=lfs merge=lfs -text
|
37 |
+
ipc_vector_db/index.faiss filter=lfs diff=lfs merge=lfs -text
|
Ingest.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain_community.document_loaders import PyPDFLoader,DirectoryLoader
|
2 |
+
from langchain.embeddings import HuggingFaceEmbeddings
|
3 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
4 |
+
from langchain_community.vectorstores import FAISS
|
5 |
+
|
6 |
+
|
7 |
+
loader = DirectoryLoader('data', glob="./*.pdf", loader_cls=PyPDFLoader)
|
8 |
+
documents = loader.load()
|
9 |
+
|
10 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=200)
|
11 |
+
texts = text_splitter.split_documents(documents)
|
12 |
+
|
13 |
+
embedings = HuggingFaceEmbeddings(model_name="nomic-ai/nomic-embed-text-v1",model_kwargs={"trust_remote_code":True,"revision":"289f532e14dbbbd5a04753fa58739e9ba766f3c7"})
|
14 |
+
|
15 |
+
# Creates vector embeddings and saves it in the FAISS DB
|
16 |
+
faiss_db = FAISS.from_documents(texts, embedings)
|
17 |
+
|
18 |
+
# Saves and export the vector embeddings databse
|
19 |
+
faiss_db.save_local("ipc_vector_db")
|
LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MIT License
|
2 |
+
|
3 |
+
Copyright (c) 2024 Jurispro
|
4 |
+
|
5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
of this software and associated documentation files (the "Software"), to deal
|
7 |
+
in the Software without restriction, including without limitation the rights
|
8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
copies of the Software, and to permit persons to whom the Software is
|
10 |
+
furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
The above copyright notice and this permission notice shall be included in all
|
13 |
+
copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
SOFTWARE.
|
README.md
CHANGED
@@ -1,10 +1,10 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: streamlit
|
7 |
-
sdk_version: 1.
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
---
|
|
|
1 |
---
|
2 |
+
title: LawGPT - RAG based AI Attorney Chatbot
|
3 |
+
emoji: ⚖️
|
4 |
+
colorFrom: red
|
5 |
+
colorTo: pink
|
6 |
sdk: streamlit
|
7 |
+
sdk_version: 1.31.1
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
---
|
app.py
ADDED
@@ -0,0 +1,378 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import time
|
3 |
+
import hashlib
|
4 |
+
from dotenv import load_dotenv
|
5 |
+
import streamlit as st
|
6 |
+
from langchain_community.vectorstores import FAISS
|
7 |
+
from langchain_community.embeddings import HuggingFaceEmbeddings
|
8 |
+
from langchain.prompts import PromptTemplate
|
9 |
+
from langchain_together import Together
|
10 |
+
from langchain.memory import ConversationBufferWindowMemory
|
11 |
+
from langchain.chains import ConversationalRetrievalChain
|
12 |
+
from PyPDF2 import PdfReader, PdfWriter
|
13 |
+
from io import BytesIO
|
14 |
+
from reportlab.pdfgen import canvas
|
15 |
+
from reportlab.graphics.barcode import code128
|
16 |
+
from reportlab.lib.pagesizes import letter
|
17 |
+
from reportlab.lib.units import mm
|
18 |
+
|
19 |
+
load_dotenv()
|
20 |
+
|
21 |
+
st.set_page_config(page_title="LawGPT", layout="wide")
|
22 |
+
|
23 |
+
st.markdown("""
|
24 |
+
<style>
|
25 |
+
body, .stApp {
|
26 |
+
background-color: #0f172a;
|
27 |
+
color: #f8fafc;
|
28 |
+
font-family: 'Segoe UI', sans-serif;
|
29 |
+
}
|
30 |
+
.block-container {
|
31 |
+
padding: 1rem;
|
32 |
+
max-width: 100%;
|
33 |
+
}
|
34 |
+
.stButton > button {
|
35 |
+
background-color: #3b82f6;
|
36 |
+
color: white;
|
37 |
+
border: none;
|
38 |
+
border-radius: 8px;
|
39 |
+
padding: 0.75em 2em;
|
40 |
+
font-size: 1.1rem;
|
41 |
+
font-weight: 600;
|
42 |
+
transition: 0.3s;
|
43 |
+
width: 100%;
|
44 |
+
}
|
45 |
+
.stButton > button:hover {
|
46 |
+
background-color: #2563eb;
|
47 |
+
}
|
48 |
+
@media screen and (max-width: 768px) {
|
49 |
+
.role-buttons {
|
50 |
+
flex-direction: column;
|
51 |
+
gap: 1rem;
|
52 |
+
}
|
53 |
+
.logo-img {
|
54 |
+
width: 70% !important;
|
55 |
+
}
|
56 |
+
}
|
57 |
+
.role-buttons {
|
58 |
+
display: flex;
|
59 |
+
justify-content: center;
|
60 |
+
align-items: center;
|
61 |
+
gap: 2rem;
|
62 |
+
margin-top: 3rem;
|
63 |
+
flex-wrap: wrap;
|
64 |
+
}
|
65 |
+
.logo-center {
|
66 |
+
display: flex;
|
67 |
+
justify-content: center;
|
68 |
+
align-items: center;
|
69 |
+
margin-top: 1rem;
|
70 |
+
margin-bottom: 2rem;
|
71 |
+
}
|
72 |
+
.logo-img {
|
73 |
+
width: 25%;
|
74 |
+
max-width: 250px;
|
75 |
+
height: auto;
|
76 |
+
}
|
77 |
+
</style>
|
78 |
+
""", unsafe_allow_html=True)
|
79 |
+
|
80 |
+
st.markdown("""
|
81 |
+
<div class="logo-center">
|
82 |
+
<img class="logo-img" src="https://github.com/harshitv804/LawGPT/assets/100853494/ecff5d3c-f105-4ba2-a93a-500282f0bf00" />
|
83 |
+
</div>
|
84 |
+
""", unsafe_allow_html=True)
|
85 |
+
|
86 |
+
if "role" not in st.session_state:
|
87 |
+
st.session_state.role = None
|
88 |
+
if "authenticated" not in st.session_state:
|
89 |
+
st.session_state.authenticated = False
|
90 |
+
|
91 |
+
if st.session_state.role is None:
|
92 |
+
st.markdown("<h2 style='text-align: center;'>Who are you?</h2>", unsafe_allow_html=True)
|
93 |
+
col1, col2, col3 = st.columns([1, 2, 1])
|
94 |
+
with col2:
|
95 |
+
col_a, col_b = st.columns(2)
|
96 |
+
with col_a:
|
97 |
+
if st.button("🧑 I am a Civilian"):
|
98 |
+
st.session_state.role = "civilian"
|
99 |
+
st.session_state.authenticated = True
|
100 |
+
st.rerun()
|
101 |
+
with col_b:
|
102 |
+
if st.button("⚖️ I am a Court Stakeholder"):
|
103 |
+
st.session_state.role = "stakeholder"
|
104 |
+
st.rerun()
|
105 |
+
|
106 |
+
if st.session_state.role == "stakeholder" and not st.session_state.authenticated:
|
107 |
+
st.markdown("### 🔐 Stakeholder Login")
|
108 |
+
username = st.text_input("Username")
|
109 |
+
password = st.text_input("Password", type="password")
|
110 |
+
if st.button("Login"):
|
111 |
+
if username == "admin" and password == "1234":
|
112 |
+
st.success("Login successful!")
|
113 |
+
st.session_state.authenticated = True
|
114 |
+
st.rerun()
|
115 |
+
else:
|
116 |
+
st.error("Invalid credentials.")
|
117 |
+
|
118 |
+
if st.session_state.role and (st.session_state.role == "civilian" or st.session_state.authenticated):
|
119 |
+
if st.button("🔙 Back to Home"):
|
120 |
+
st.session_state.role = None
|
121 |
+
st.session_state.authenticated = False
|
122 |
+
st.rerun()
|
123 |
+
|
124 |
+
tabs = ["📘 LawGPT"]
|
125 |
+
if st.session_state.role == "stakeholder":
|
126 |
+
tabs.extend(["📝 Document Signer", "🔍 Verify Document"])
|
127 |
+
|
128 |
+
selected_tab = st.tabs(tabs)
|
129 |
+
|
130 |
+
if "📘 LawGPT" in tabs:
|
131 |
+
with selected_tab[0]:
|
132 |
+
st.markdown("## 💬 Your Legal AI Lawyer")
|
133 |
+
st.markdown("### Ask any legal question related to the Indian Penal Code (IPC)")
|
134 |
+
st.markdown("Questions might be of types like: Suppose a 16 year old is drinking and driving , and hit a pedestrian on the road . what are the possible case laws imposed and give any one previous court decisions on the same. ")
|
135 |
+
|
136 |
+
def reset_conversation():
|
137 |
+
st.session_state.messages = []
|
138 |
+
st.session_state.memory.clear()
|
139 |
+
|
140 |
+
if "messages" not in st.session_state:
|
141 |
+
st.session_state.messages = []
|
142 |
+
if "memory" not in st.session_state:
|
143 |
+
st.session_state.memory = ConversationBufferWindowMemory(
|
144 |
+
k=2, memory_key="chat_history", return_messages=True
|
145 |
+
)
|
146 |
+
|
147 |
+
embeddings = HuggingFaceEmbeddings(
|
148 |
+
model_name="nomic-ai/nomic-embed-text-v1",
|
149 |
+
model_kwargs={"trust_remote_code": True, "revision": "289f532e14dbbbd5a04753fa58739e9ba766f3c7"}
|
150 |
+
)
|
151 |
+
|
152 |
+
db = FAISS.load_local("ipc_vector_db", embeddings, allow_dangerous_deserialization=True)
|
153 |
+
db_retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 4})
|
154 |
+
|
155 |
+
prompt_template = """<s>[INST]You are a legal chatbot that answers questions about the Indian Penal Code (IPC).
|
156 |
+
Provide clear, concise, and accurate responses based on context and user's question.
|
157 |
+
Avoid extra details or assumptions. Focus only on legal information.
|
158 |
+
|
159 |
+
CONTEXT: {context}
|
160 |
+
CHAT HISTORY: {chat_history}
|
161 |
+
QUESTION: {question}
|
162 |
+
|
163 |
+
ANSWER:
|
164 |
+
</s>[INST]"""
|
165 |
+
|
166 |
+
prompt = PromptTemplate(
|
167 |
+
template=prompt_template,
|
168 |
+
input_variables=["context", "question", "chat_history"]
|
169 |
+
)
|
170 |
+
|
171 |
+
llm = Together(
|
172 |
+
model="mistralai/Mistral-7B-Instruct-v0.2",
|
173 |
+
temperature=0.5,
|
174 |
+
max_tokens=1024,
|
175 |
+
together_api_key=os.getenv("TOGETHER_API_KEY")
|
176 |
+
)
|
177 |
+
|
178 |
+
qa = ConversationalRetrievalChain.from_llm(
|
179 |
+
llm=llm,
|
180 |
+
memory=st.session_state.memory,
|
181 |
+
retriever=db_retriever,
|
182 |
+
combine_docs_chain_kwargs={
|
183 |
+
'prompt': prompt,
|
184 |
+
'document_variable_name': 'context'
|
185 |
+
}
|
186 |
+
)
|
187 |
+
|
188 |
+
chat_placeholder = st.empty()
|
189 |
+
with chat_placeholder.container():
|
190 |
+
for msg in st.session_state.messages:
|
191 |
+
with st.chat_message(msg["role"]):
|
192 |
+
st.write(msg["content"])
|
193 |
+
|
194 |
+
input_prompt = st.chat_input("Ask a legal question...")
|
195 |
+
if input_prompt:
|
196 |
+
with st.chat_message("user"):
|
197 |
+
st.write(input_prompt)
|
198 |
+
st.session_state.messages.append({"role": "user", "content": input_prompt})
|
199 |
+
|
200 |
+
with st.chat_message("assistant"):
|
201 |
+
with st.status("Thinking 💡", expanded=True):
|
202 |
+
result = qa.invoke(input=input_prompt)
|
203 |
+
message_placeholder = st.empty()
|
204 |
+
full_response = "⚠️ **_Note: Information provided may be inaccurate._**\n\n"
|
205 |
+
for chunk in result["answer"]:
|
206 |
+
full_response += chunk
|
207 |
+
time.sleep(0.02)
|
208 |
+
message_placeholder.markdown(full_response + " ▌")
|
209 |
+
st.session_state.messages.append({"role": "assistant", "content": result["answer"]})
|
210 |
+
|
211 |
+
st.button("🔄 Reset Chat", on_click=reset_conversation)
|
212 |
+
|
213 |
+
if st.session_state.role == "stakeholder":
|
214 |
+
if "📝 Document Signer" in tabs:
|
215 |
+
with selected_tab[1]:
|
216 |
+
st.markdown("## 📝 Upload and Sign Document")
|
217 |
+
uploaded_file = st.file_uploader("Choose a file to sign", type=["pdf"])
|
218 |
+
signer_name = st.text_input("Enter your name (Signer):")
|
219 |
+
|
220 |
+
if uploaded_file and signer_name:
|
221 |
+
file_content = uploaded_file.read()
|
222 |
+
input_pdf = BytesIO(file_content)
|
223 |
+
output_pdf = BytesIO()
|
224 |
+
|
225 |
+
reader = PdfReader(input_pdf)
|
226 |
+
writer = PdfWriter()
|
227 |
+
|
228 |
+
for page in reader.pages:
|
229 |
+
page_width = float(page.mediabox.width)
|
230 |
+
page_height = float(page.mediabox.height)
|
231 |
+
packet = BytesIO()
|
232 |
+
can = canvas.Canvas(packet, pagesize=(page_width, page_height))
|
233 |
+
barcode = code128.Code128(signer_name, barHeight=10 * mm, barWidth=0.4)
|
234 |
+
barcode.drawOn(can, 50, 50)
|
235 |
+
can.setFont("Helvetica", 10)
|
236 |
+
can.drawString(50, 40, f"Signed by: {signer_name}")
|
237 |
+
can.save()
|
238 |
+
packet.seek(0)
|
239 |
+
overlay = PdfReader(packet).pages[0]
|
240 |
+
page.merge_page(overlay)
|
241 |
+
writer.add_page(page)
|
242 |
+
|
243 |
+
writer.write(output_pdf)
|
244 |
+
output_pdf.seek(0)
|
245 |
+
|
246 |
+
st.download_button("📅 Download Signed Document", output_pdf, file_name=f"signed_{uploaded_file.name}", mime="application/pdf")
|
247 |
+
|
248 |
+
if "🔍 Verify Document" in tabs:
|
249 |
+
with selected_tab[2]:
|
250 |
+
st.markdown("## 🔍 Verify Document Authentication")
|
251 |
+
st.markdown("Upload any document to verify its integrity and authenticity.")
|
252 |
+
|
253 |
+
verify_file = st.file_uploader("Upload PDF for verification", type=["pdf"], key="verify")
|
254 |
+
|
255 |
+
if verify_file:
|
256 |
+
content = verify_file.read()
|
257 |
+
|
258 |
+
try:
|
259 |
+
# Basic PDF validation
|
260 |
+
pdf = PdfReader(BytesIO(content))
|
261 |
+
|
262 |
+
# Extract text to look for signature markers
|
263 |
+
all_text = ""
|
264 |
+
for page in pdf.pages:
|
265 |
+
all_text += page.extract_text() or ""
|
266 |
+
|
267 |
+
# Check for digital signature information
|
268 |
+
has_signature_text = any(sig_text in all_text.lower() for sig_text in
|
269 |
+
["signed by:", "digital signature", "electronic signature"])
|
270 |
+
|
271 |
+
# Create document fingerprint/hash
|
272 |
+
doc_hash = hashlib.sha256(content).hexdigest()
|
273 |
+
|
274 |
+
# Calculate metadata integrity
|
275 |
+
metadata_valid = True
|
276 |
+
if pdf.metadata:
|
277 |
+
try:
|
278 |
+
# Check for suspicious metadata modifications
|
279 |
+
creation_date = pdf.metadata.get('/CreationDate', '')
|
280 |
+
mod_date = pdf.metadata.get('/ModDate', '')
|
281 |
+
if mod_date and creation_date:
|
282 |
+
metadata_valid = mod_date >= creation_date
|
283 |
+
except:
|
284 |
+
metadata_valid = False
|
285 |
+
|
286 |
+
# Check for content consistency
|
287 |
+
content_consistent = True
|
288 |
+
|
289 |
+
col1, col2 = st.columns(2)
|
290 |
+
|
291 |
+
with col1:
|
292 |
+
st.subheader("Document Analysis")
|
293 |
+
st.info(f"📄 Pages: {len(pdf.pages)}")
|
294 |
+
st.info(f"🔒 Contains signature markers: {'Yes' if has_signature_text else 'No'}")
|
295 |
+
|
296 |
+
# Display hash for document tracking
|
297 |
+
st.code(f"Document Hash: {doc_hash[:16]}...{doc_hash[-16:]}")
|
298 |
+
|
299 |
+
# Document size and characteristics
|
300 |
+
file_size = len(content) / 1024 # KB
|
301 |
+
st.info(f"📦 File size: {file_size:.2f} KB")
|
302 |
+
|
303 |
+
with col2:
|
304 |
+
st.subheader("Verification Results")
|
305 |
+
|
306 |
+
# Case 1: Document has signature markers
|
307 |
+
if has_signature_text:
|
308 |
+
if metadata_valid and content_consistent:
|
309 |
+
st.success("✅ Document Status: VERIFIED AUTHENTIC")
|
310 |
+
st.markdown("- ✓ Valid PDF structure")
|
311 |
+
st.markdown("- ✓ Signature information detected")
|
312 |
+
st.markdown("- ✓ No tampering indicators found")
|
313 |
+
st.markdown("- ✓ Metadata consistency verified")
|
314 |
+
else:
|
315 |
+
st.warning("⚠️ Document Status: POTENTIALLY MODIFIED")
|
316 |
+
st.markdown("- ✓ Valid PDF structure")
|
317 |
+
st.markdown("- ✓ Signature information found")
|
318 |
+
st.markdown("- ❌ Some integrity checks failed")
|
319 |
+
|
320 |
+
if not metadata_valid:
|
321 |
+
st.markdown("- ❌ Metadata inconsistencies detected")
|
322 |
+
|
323 |
+
# Display signature extraction if present
|
324 |
+
signature_line = next((line for line in all_text.split('\n') if "signed by:" in line.lower()), "")
|
325 |
+
if signature_line:
|
326 |
+
st.info(f"📝 {signature_line.strip()}")
|
327 |
+
|
328 |
+
# Case 2: Document without signatures
|
329 |
+
else:
|
330 |
+
if metadata_valid and content_consistent:
|
331 |
+
st.success("✅ Document Status: VALID DOCUMENT")
|
332 |
+
st.markdown("- ✓ Valid PDF structure")
|
333 |
+
st.markdown("- ✓ Content integrity verified")
|
334 |
+
st.markdown("- ✓ No tampering indicators found")
|
335 |
+
st.markdown("- ℹ️ No signature information found (this is not an error)")
|
336 |
+
else:
|
337 |
+
st.warning("⚠️ Document Status: POTENTIALLY MODIFIED")
|
338 |
+
st.markdown("- ✓ Valid PDF structure")
|
339 |
+
st.markdown("- ❌ Some integrity checks failed")
|
340 |
+
|
341 |
+
if not metadata_valid:
|
342 |
+
st.markdown("- ❌ Metadata inconsistencies detected")
|
343 |
+
|
344 |
+
# Advanced options
|
345 |
+
with st.expander("🔬 Advanced Verification Details"):
|
346 |
+
st.markdown("### Document Metadata")
|
347 |
+
if pdf.metadata:
|
348 |
+
for key, value in pdf.metadata.items():
|
349 |
+
if key and value and key not in ('/CreationDate', '/ModDate'):
|
350 |
+
st.text(f"{key}: {value}")
|
351 |
+
else:
|
352 |
+
st.text("No metadata available")
|
353 |
+
|
354 |
+
st.markdown("### Integrity Timeline")
|
355 |
+
st.text(f"Creation Date: {pdf.metadata.get('/CreationDate', 'Not available')}")
|
356 |
+
st.text(f"Last Modified: {pdf.metadata.get('/ModDate', 'Not available')}")
|
357 |
+
|
358 |
+
# Additional verification for content integrity
|
359 |
+
st.markdown("### Content Analysis")
|
360 |
+
fonts_used = set()
|
361 |
+
image_count = 0
|
362 |
+
for page in pdf.pages:
|
363 |
+
if "/Font" in page["/Resources"]:
|
364 |
+
for font in page["/Resources"]["/Font"]:
|
365 |
+
fonts_used.add(str(font))
|
366 |
+
if "/XObject" in page["/Resources"]:
|
367 |
+
for obj in page["/Resources"]["/XObject"]:
|
368 |
+
if "/Subtype" in page["/Resources"]["/XObject"][obj] and \
|
369 |
+
page["/Resources"]["/XObject"][obj]["/Subtype"] == "/Image":
|
370 |
+
image_count += 1
|
371 |
+
|
372 |
+
st.text(f"Fonts detected: {len(fonts_used)}")
|
373 |
+
st.text(f"Images detected: {image_count}")
|
374 |
+
|
375 |
+
except Exception as e:
|
376 |
+
st.error(f"❌ Document Status: INVALID OR CORRUPTED")
|
377 |
+
st.markdown(f"Error: Could not process the document properly. The file may be corrupted or not a valid PDF.")
|
378 |
+
st.markdown(f"Technical details: {str(e)}")
|
data/ipc_law.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e67161633a056f77848221ab30c49b26199c66cc844ee559ac47d2ca5dea9256
|
3 |
+
size 20102169
|
ipc_vector_db/index.faiss
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aff5f96de10392f786dfb37aa8fbc95036d3336e5300633f8c7226ed085d48f4
|
3 |
+
size 18253869
|
ipc_vector_db/index.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6ee60525a2098003e37e49f16af6f0d70fb6d960dbfd66678e3bb3bd7fff21bb
|
3 |
+
size 5925644
|
requirements.txt
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
python-dotenv
|
2 |
+
streamlit
|
3 |
+
langchain
|
4 |
+
langchain_community
|
5 |
+
langchain_together
|
6 |
+
PyPDF2
|
7 |
+
reportlab
|
8 |
+
faiss-cpu
|
9 |
+
transformers
|
10 |
+
sentence-transformers
|