Krrish-shetty commited on
Commit
c6b8d44
·
verified ·
1 Parent(s): fefc462

Upload 9 files

Browse files
Files changed (10) hide show
  1. .env +1 -0
  2. .gitattributes +2 -0
  3. Ingest.py +19 -0
  4. LICENSE +21 -0
  5. README.md +5 -5
  6. app.py +378 -0
  7. data/ipc_law.pdf +3 -0
  8. ipc_vector_db/index.faiss +3 -0
  9. ipc_vector_db/index.pkl +3 -0
  10. requirements.txt +10 -0
.env ADDED
@@ -0,0 +1 @@
 
 
1
+ TOGETHER_API_KEY=tgp_v1_HRziHodmDceCIkIlrzYQ4rFLodMSZ03O_TGuu69NZWk
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ data/ipc_law.pdf filter=lfs diff=lfs merge=lfs -text
37
+ ipc_vector_db/index.faiss filter=lfs diff=lfs merge=lfs -text
Ingest.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_community.document_loaders import PyPDFLoader,DirectoryLoader
2
+ from langchain.embeddings import HuggingFaceEmbeddings
3
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ from langchain_community.vectorstores import FAISS
5
+
6
+
7
+ loader = DirectoryLoader('data', glob="./*.pdf", loader_cls=PyPDFLoader)
8
+ documents = loader.load()
9
+
10
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=200)
11
+ texts = text_splitter.split_documents(documents)
12
+
13
+ embedings = HuggingFaceEmbeddings(model_name="nomic-ai/nomic-embed-text-v1",model_kwargs={"trust_remote_code":True,"revision":"289f532e14dbbbd5a04753fa58739e9ba766f3c7"})
14
+
15
+ # Creates vector embeddings and saves it in the FAISS DB
16
+ faiss_db = FAISS.from_documents(texts, embedings)
17
+
18
+ # Saves and export the vector embeddings databse
19
+ faiss_db.save_local("ipc_vector_db")
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Jurispro
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md CHANGED
@@ -1,10 +1,10 @@
1
  ---
2
- title: Lawgpt Jurispro
3
- emoji: 📊
4
- colorFrom: indigo
5
- colorTo: red
6
  sdk: streamlit
7
- sdk_version: 1.44.1
8
  app_file: app.py
9
  pinned: false
10
  ---
 
1
  ---
2
+ title: LawGPT - RAG based AI Attorney Chatbot
3
+ emoji: ⚖️
4
+ colorFrom: red
5
+ colorTo: pink
6
  sdk: streamlit
7
+ sdk_version: 1.31.1
8
  app_file: app.py
9
  pinned: false
10
  ---
app.py ADDED
@@ -0,0 +1,378 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ import hashlib
4
+ from dotenv import load_dotenv
5
+ import streamlit as st
6
+ from langchain_community.vectorstores import FAISS
7
+ from langchain_community.embeddings import HuggingFaceEmbeddings
8
+ from langchain.prompts import PromptTemplate
9
+ from langchain_together import Together
10
+ from langchain.memory import ConversationBufferWindowMemory
11
+ from langchain.chains import ConversationalRetrievalChain
12
+ from PyPDF2 import PdfReader, PdfWriter
13
+ from io import BytesIO
14
+ from reportlab.pdfgen import canvas
15
+ from reportlab.graphics.barcode import code128
16
+ from reportlab.lib.pagesizes import letter
17
+ from reportlab.lib.units import mm
18
+
19
+ load_dotenv()
20
+
21
+ st.set_page_config(page_title="LawGPT", layout="wide")
22
+
23
+ st.markdown("""
24
+ <style>
25
+ body, .stApp {
26
+ background-color: #0f172a;
27
+ color: #f8fafc;
28
+ font-family: 'Segoe UI', sans-serif;
29
+ }
30
+ .block-container {
31
+ padding: 1rem;
32
+ max-width: 100%;
33
+ }
34
+ .stButton > button {
35
+ background-color: #3b82f6;
36
+ color: white;
37
+ border: none;
38
+ border-radius: 8px;
39
+ padding: 0.75em 2em;
40
+ font-size: 1.1rem;
41
+ font-weight: 600;
42
+ transition: 0.3s;
43
+ width: 100%;
44
+ }
45
+ .stButton > button:hover {
46
+ background-color: #2563eb;
47
+ }
48
+ @media screen and (max-width: 768px) {
49
+ .role-buttons {
50
+ flex-direction: column;
51
+ gap: 1rem;
52
+ }
53
+ .logo-img {
54
+ width: 70% !important;
55
+ }
56
+ }
57
+ .role-buttons {
58
+ display: flex;
59
+ justify-content: center;
60
+ align-items: center;
61
+ gap: 2rem;
62
+ margin-top: 3rem;
63
+ flex-wrap: wrap;
64
+ }
65
+ .logo-center {
66
+ display: flex;
67
+ justify-content: center;
68
+ align-items: center;
69
+ margin-top: 1rem;
70
+ margin-bottom: 2rem;
71
+ }
72
+ .logo-img {
73
+ width: 25%;
74
+ max-width: 250px;
75
+ height: auto;
76
+ }
77
+ </style>
78
+ """, unsafe_allow_html=True)
79
+
80
+ st.markdown("""
81
+ <div class="logo-center">
82
+ <img class="logo-img" src="https://github.com/harshitv804/LawGPT/assets/100853494/ecff5d3c-f105-4ba2-a93a-500282f0bf00" />
83
+ </div>
84
+ """, unsafe_allow_html=True)
85
+
86
+ if "role" not in st.session_state:
87
+ st.session_state.role = None
88
+ if "authenticated" not in st.session_state:
89
+ st.session_state.authenticated = False
90
+
91
+ if st.session_state.role is None:
92
+ st.markdown("<h2 style='text-align: center;'>Who are you?</h2>", unsafe_allow_html=True)
93
+ col1, col2, col3 = st.columns([1, 2, 1])
94
+ with col2:
95
+ col_a, col_b = st.columns(2)
96
+ with col_a:
97
+ if st.button("🧑 I am a Civilian"):
98
+ st.session_state.role = "civilian"
99
+ st.session_state.authenticated = True
100
+ st.rerun()
101
+ with col_b:
102
+ if st.button("⚖️ I am a Court Stakeholder"):
103
+ st.session_state.role = "stakeholder"
104
+ st.rerun()
105
+
106
+ if st.session_state.role == "stakeholder" and not st.session_state.authenticated:
107
+ st.markdown("### 🔐 Stakeholder Login")
108
+ username = st.text_input("Username")
109
+ password = st.text_input("Password", type="password")
110
+ if st.button("Login"):
111
+ if username == "admin" and password == "1234":
112
+ st.success("Login successful!")
113
+ st.session_state.authenticated = True
114
+ st.rerun()
115
+ else:
116
+ st.error("Invalid credentials.")
117
+
118
+ if st.session_state.role and (st.session_state.role == "civilian" or st.session_state.authenticated):
119
+ if st.button("🔙 Back to Home"):
120
+ st.session_state.role = None
121
+ st.session_state.authenticated = False
122
+ st.rerun()
123
+
124
+ tabs = ["📘 LawGPT"]
125
+ if st.session_state.role == "stakeholder":
126
+ tabs.extend(["📝 Document Signer", "🔍 Verify Document"])
127
+
128
+ selected_tab = st.tabs(tabs)
129
+
130
+ if "📘 LawGPT" in tabs:
131
+ with selected_tab[0]:
132
+ st.markdown("## 💬 Your Legal AI Lawyer")
133
+ st.markdown("### Ask any legal question related to the Indian Penal Code (IPC)")
134
+ st.markdown("Questions might be of types like: Suppose a 16 year old is drinking and driving , and hit a pedestrian on the road . what are the possible case laws imposed and give any one previous court decisions on the same. ")
135
+
136
+ def reset_conversation():
137
+ st.session_state.messages = []
138
+ st.session_state.memory.clear()
139
+
140
+ if "messages" not in st.session_state:
141
+ st.session_state.messages = []
142
+ if "memory" not in st.session_state:
143
+ st.session_state.memory = ConversationBufferWindowMemory(
144
+ k=2, memory_key="chat_history", return_messages=True
145
+ )
146
+
147
+ embeddings = HuggingFaceEmbeddings(
148
+ model_name="nomic-ai/nomic-embed-text-v1",
149
+ model_kwargs={"trust_remote_code": True, "revision": "289f532e14dbbbd5a04753fa58739e9ba766f3c7"}
150
+ )
151
+
152
+ db = FAISS.load_local("ipc_vector_db", embeddings, allow_dangerous_deserialization=True)
153
+ db_retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 4})
154
+
155
+ prompt_template = """<s>[INST]You are a legal chatbot that answers questions about the Indian Penal Code (IPC).
156
+ Provide clear, concise, and accurate responses based on context and user's question.
157
+ Avoid extra details or assumptions. Focus only on legal information.
158
+
159
+ CONTEXT: {context}
160
+ CHAT HISTORY: {chat_history}
161
+ QUESTION: {question}
162
+
163
+ ANSWER:
164
+ </s>[INST]"""
165
+
166
+ prompt = PromptTemplate(
167
+ template=prompt_template,
168
+ input_variables=["context", "question", "chat_history"]
169
+ )
170
+
171
+ llm = Together(
172
+ model="mistralai/Mistral-7B-Instruct-v0.2",
173
+ temperature=0.5,
174
+ max_tokens=1024,
175
+ together_api_key=os.getenv("TOGETHER_API_KEY")
176
+ )
177
+
178
+ qa = ConversationalRetrievalChain.from_llm(
179
+ llm=llm,
180
+ memory=st.session_state.memory,
181
+ retriever=db_retriever,
182
+ combine_docs_chain_kwargs={
183
+ 'prompt': prompt,
184
+ 'document_variable_name': 'context'
185
+ }
186
+ )
187
+
188
+ chat_placeholder = st.empty()
189
+ with chat_placeholder.container():
190
+ for msg in st.session_state.messages:
191
+ with st.chat_message(msg["role"]):
192
+ st.write(msg["content"])
193
+
194
+ input_prompt = st.chat_input("Ask a legal question...")
195
+ if input_prompt:
196
+ with st.chat_message("user"):
197
+ st.write(input_prompt)
198
+ st.session_state.messages.append({"role": "user", "content": input_prompt})
199
+
200
+ with st.chat_message("assistant"):
201
+ with st.status("Thinking 💡", expanded=True):
202
+ result = qa.invoke(input=input_prompt)
203
+ message_placeholder = st.empty()
204
+ full_response = "⚠️ **_Note: Information provided may be inaccurate._**\n\n"
205
+ for chunk in result["answer"]:
206
+ full_response += chunk
207
+ time.sleep(0.02)
208
+ message_placeholder.markdown(full_response + " ▌")
209
+ st.session_state.messages.append({"role": "assistant", "content": result["answer"]})
210
+
211
+ st.button("🔄 Reset Chat", on_click=reset_conversation)
212
+
213
+ if st.session_state.role == "stakeholder":
214
+ if "📝 Document Signer" in tabs:
215
+ with selected_tab[1]:
216
+ st.markdown("## 📝 Upload and Sign Document")
217
+ uploaded_file = st.file_uploader("Choose a file to sign", type=["pdf"])
218
+ signer_name = st.text_input("Enter your name (Signer):")
219
+
220
+ if uploaded_file and signer_name:
221
+ file_content = uploaded_file.read()
222
+ input_pdf = BytesIO(file_content)
223
+ output_pdf = BytesIO()
224
+
225
+ reader = PdfReader(input_pdf)
226
+ writer = PdfWriter()
227
+
228
+ for page in reader.pages:
229
+ page_width = float(page.mediabox.width)
230
+ page_height = float(page.mediabox.height)
231
+ packet = BytesIO()
232
+ can = canvas.Canvas(packet, pagesize=(page_width, page_height))
233
+ barcode = code128.Code128(signer_name, barHeight=10 * mm, barWidth=0.4)
234
+ barcode.drawOn(can, 50, 50)
235
+ can.setFont("Helvetica", 10)
236
+ can.drawString(50, 40, f"Signed by: {signer_name}")
237
+ can.save()
238
+ packet.seek(0)
239
+ overlay = PdfReader(packet).pages[0]
240
+ page.merge_page(overlay)
241
+ writer.add_page(page)
242
+
243
+ writer.write(output_pdf)
244
+ output_pdf.seek(0)
245
+
246
+ st.download_button("📅 Download Signed Document", output_pdf, file_name=f"signed_{uploaded_file.name}", mime="application/pdf")
247
+
248
+ if "🔍 Verify Document" in tabs:
249
+ with selected_tab[2]:
250
+ st.markdown("## 🔍 Verify Document Authentication")
251
+ st.markdown("Upload any document to verify its integrity and authenticity.")
252
+
253
+ verify_file = st.file_uploader("Upload PDF for verification", type=["pdf"], key="verify")
254
+
255
+ if verify_file:
256
+ content = verify_file.read()
257
+
258
+ try:
259
+ # Basic PDF validation
260
+ pdf = PdfReader(BytesIO(content))
261
+
262
+ # Extract text to look for signature markers
263
+ all_text = ""
264
+ for page in pdf.pages:
265
+ all_text += page.extract_text() or ""
266
+
267
+ # Check for digital signature information
268
+ has_signature_text = any(sig_text in all_text.lower() for sig_text in
269
+ ["signed by:", "digital signature", "electronic signature"])
270
+
271
+ # Create document fingerprint/hash
272
+ doc_hash = hashlib.sha256(content).hexdigest()
273
+
274
+ # Calculate metadata integrity
275
+ metadata_valid = True
276
+ if pdf.metadata:
277
+ try:
278
+ # Check for suspicious metadata modifications
279
+ creation_date = pdf.metadata.get('/CreationDate', '')
280
+ mod_date = pdf.metadata.get('/ModDate', '')
281
+ if mod_date and creation_date:
282
+ metadata_valid = mod_date >= creation_date
283
+ except:
284
+ metadata_valid = False
285
+
286
+ # Check for content consistency
287
+ content_consistent = True
288
+
289
+ col1, col2 = st.columns(2)
290
+
291
+ with col1:
292
+ st.subheader("Document Analysis")
293
+ st.info(f"📄 Pages: {len(pdf.pages)}")
294
+ st.info(f"🔒 Contains signature markers: {'Yes' if has_signature_text else 'No'}")
295
+
296
+ # Display hash for document tracking
297
+ st.code(f"Document Hash: {doc_hash[:16]}...{doc_hash[-16:]}")
298
+
299
+ # Document size and characteristics
300
+ file_size = len(content) / 1024 # KB
301
+ st.info(f"📦 File size: {file_size:.2f} KB")
302
+
303
+ with col2:
304
+ st.subheader("Verification Results")
305
+
306
+ # Case 1: Document has signature markers
307
+ if has_signature_text:
308
+ if metadata_valid and content_consistent:
309
+ st.success("✅ Document Status: VERIFIED AUTHENTIC")
310
+ st.markdown("- ✓ Valid PDF structure")
311
+ st.markdown("- ✓ Signature information detected")
312
+ st.markdown("- ✓ No tampering indicators found")
313
+ st.markdown("- ✓ Metadata consistency verified")
314
+ else:
315
+ st.warning("⚠️ Document Status: POTENTIALLY MODIFIED")
316
+ st.markdown("- ✓ Valid PDF structure")
317
+ st.markdown("- ✓ Signature information found")
318
+ st.markdown("- ❌ Some integrity checks failed")
319
+
320
+ if not metadata_valid:
321
+ st.markdown("- ❌ Metadata inconsistencies detected")
322
+
323
+ # Display signature extraction if present
324
+ signature_line = next((line for line in all_text.split('\n') if "signed by:" in line.lower()), "")
325
+ if signature_line:
326
+ st.info(f"📝 {signature_line.strip()}")
327
+
328
+ # Case 2: Document without signatures
329
+ else:
330
+ if metadata_valid and content_consistent:
331
+ st.success("✅ Document Status: VALID DOCUMENT")
332
+ st.markdown("- ✓ Valid PDF structure")
333
+ st.markdown("- ✓ Content integrity verified")
334
+ st.markdown("- ✓ No tampering indicators found")
335
+ st.markdown("- ℹ️ No signature information found (this is not an error)")
336
+ else:
337
+ st.warning("⚠️ Document Status: POTENTIALLY MODIFIED")
338
+ st.markdown("- ✓ Valid PDF structure")
339
+ st.markdown("- ❌ Some integrity checks failed")
340
+
341
+ if not metadata_valid:
342
+ st.markdown("- ❌ Metadata inconsistencies detected")
343
+
344
+ # Advanced options
345
+ with st.expander("🔬 Advanced Verification Details"):
346
+ st.markdown("### Document Metadata")
347
+ if pdf.metadata:
348
+ for key, value in pdf.metadata.items():
349
+ if key and value and key not in ('/CreationDate', '/ModDate'):
350
+ st.text(f"{key}: {value}")
351
+ else:
352
+ st.text("No metadata available")
353
+
354
+ st.markdown("### Integrity Timeline")
355
+ st.text(f"Creation Date: {pdf.metadata.get('/CreationDate', 'Not available')}")
356
+ st.text(f"Last Modified: {pdf.metadata.get('/ModDate', 'Not available')}")
357
+
358
+ # Additional verification for content integrity
359
+ st.markdown("### Content Analysis")
360
+ fonts_used = set()
361
+ image_count = 0
362
+ for page in pdf.pages:
363
+ if "/Font" in page["/Resources"]:
364
+ for font in page["/Resources"]["/Font"]:
365
+ fonts_used.add(str(font))
366
+ if "/XObject" in page["/Resources"]:
367
+ for obj in page["/Resources"]["/XObject"]:
368
+ if "/Subtype" in page["/Resources"]["/XObject"][obj] and \
369
+ page["/Resources"]["/XObject"][obj]["/Subtype"] == "/Image":
370
+ image_count += 1
371
+
372
+ st.text(f"Fonts detected: {len(fonts_used)}")
373
+ st.text(f"Images detected: {image_count}")
374
+
375
+ except Exception as e:
376
+ st.error(f"❌ Document Status: INVALID OR CORRUPTED")
377
+ st.markdown(f"Error: Could not process the document properly. The file may be corrupted or not a valid PDF.")
378
+ st.markdown(f"Technical details: {str(e)}")
data/ipc_law.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e67161633a056f77848221ab30c49b26199c66cc844ee559ac47d2ca5dea9256
3
+ size 20102169
ipc_vector_db/index.faiss ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aff5f96de10392f786dfb37aa8fbc95036d3336e5300633f8c7226ed085d48f4
3
+ size 18253869
ipc_vector_db/index.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ee60525a2098003e37e49f16af6f0d70fb6d960dbfd66678e3bb3bd7fff21bb
3
+ size 5925644
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ python-dotenv
2
+ streamlit
3
+ langchain
4
+ langchain_community
5
+ langchain_together
6
+ PyPDF2
7
+ reportlab
8
+ faiss-cpu
9
+ transformers
10
+ sentence-transformers