File size: 5,200 Bytes
d93128e 52f7e16 d93128e c23e409 821e62a c23e409 d93128e 821e62a d93128e 821e62a c23e409 821e62a d93128e c23e409 d93128e 821e62a d93128e 821e62a d93128e 821e62a d93128e c23e409 d93128e 821e62a d93128e 821e62a d93128e 821e62a d93128e 821e62a d93128e c23e409 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 |
import os
os.environ["STREAMLIT_SERVER_PORT"] = "8501"
os.environ["STREAMLIT_SERVER_HEADLESS"] = "true"
import streamlit as st
from transformers import pipeline
from PyPDF2 import PdfReader
import docx
import time
import psutil
from pathlib import Path
import torch
# Page config with wide layout
st.set_page_config(page_title="LexPilot", layout="wide")
# Sidebar with project info
with st.sidebar:
st.title("LexPilot™")
st.markdown(
"""
LexPilot™ ingests text, PDF, and Word files to instantly analyze contracts.
It delivers concise summaries and lets you ask targeted questions—
giving fast, precise insights to speed up legal and procurement reviews.
"""
)
st.markdown("---")
st.write("### System Status")
try:
device_status = 'GPU ✅' if torch.cuda.is_available() else 'CPU ⚠️'
except:
device_status = 'CPU (torch not configured)'
st.text(f"Device: {device_status}")
st.text(f"Memory: {psutil.virtual_memory().percent}% used")
st.text(f"CPU: {psutil.cpu_percent()}% used")
# Setup cache directory for models
def setup_environment():
cache_dir = Path(".cache/models")
try:
cache_dir.mkdir(exist_ok=True, parents=True)
except Exception as e:
st.error(f"Failed to create cache directory: {e}")
return cache_dir
cache_dir = setup_environment()
@st.cache_resource(ttl=3600)
def load_models():
try:
qa_model = pipeline(
"question-answering",
model="distilbert-base-cased-distilled-squad",
device=-1
)
summarizer_model = pipeline(
"summarization",
model="sshleifer/distilbart-cnn-6-6",
device=-1
)
return {'qa': qa_model, 'summarizer': summarizer_model}
except Exception as e:
st.error(f"Failed to load models: {e}")
st.stop()
models = load_models()
def extract_text(file):
if file is None:
return ""
try:
if file.type == "application/pdf":
reader = PdfReader(file)
return " ".join(page.extract_text() for page in reader.pages if page.extract_text())
elif file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
doc = docx.Document(file)
return "\n".join(para.text for para in doc.paragraphs if para.text)
except Exception as e:
st.error(f"Error processing document: {e}")
return ""
def generate_summary(text, max_length=150):
if not text or len(text.strip()) == 0:
return ""
try:
if len(text) > 10000:
chunk_size = 3000
chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
summaries = []
for chunk in chunks:
result = models['summarizer'](
chunk,
max_length=max(max_length//len(chunks), 30),
min_length=30,
do_sample=False
)
summaries.append(result[0]['summary_text'])
return " ".join(summaries)
return models['summarizer'](text, max_length=max_length)[0]['summary_text']
except Exception as e:
st.error(f"Summarization failed: {e}")
return ""
# Main UI title
st.title("📄 LexPilot")
with st.expander("📤 Upload Document", expanded=True):
uploaded_file = st.file_uploader("Choose PDF/DOCX", type=["pdf", "docx"])
manual_text = st.text_area("Or paste text here:", height=150)
context = extract_text(uploaded_file) if uploaded_file else manual_text
tab1, tab2 = st.tabs(["🔍 Question Answering", "📝 Summarization"])
with tab1:
if context and len(context.strip()) > 0:
question = st.text_input("Ask about the document:")
if question and len(question.strip()) > 0:
with st.spinner("Analyzing..."):
start_time = time.time()
try:
result = models['qa'](
question=question,
context=context[:100000]
)
st.success(f"Answered in {time.time()-start_time:.1f}s")
st.markdown(f"**Answer:** {result['answer']}")
st.progress(result['score'])
st.caption(f"Confidence: {result['score']:.0%}")
except Exception as e:
st.error(f"Question answering failed: {e}")
with tab2:
if context and len(context.strip()) > 0:
with st.form("summary_form"):
length = st.slider("Summary Length", 50, 300, 150)
if st.form_submit_button("Generate Summary"):
with st.spinner("Summarizing..."):
start_time = time.time()
summary = generate_summary(context, length)
if summary:
st.success(f"Generated in {time.time()-start_time:.1f}s")
st.markdown(f"**Summary:**\n\n{summary}")
# Show cache dir path in sidebar (optional)
with st.sidebar:
st.markdown("---")
st.write(f"Cache directory: {cache_dir}")
|