File size: 5,200 Bytes
d93128e
 
 
 
52f7e16
d93128e
 
 
 
 
 
 
 
c23e409
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
821e62a
c23e409
d93128e
821e62a
 
 
 
 
d93128e
 
 
 
 
 
 
821e62a
 
 
 
 
 
 
c23e409
821e62a
 
 
d93128e
c23e409
d93128e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
821e62a
d93128e
 
 
 
 
 
 
 
 
 
 
 
 
821e62a
d93128e
 
 
 
 
 
 
821e62a
d93128e
 
c23e409
 
d93128e
821e62a
d93128e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
821e62a
d93128e
 
 
 
821e62a
d93128e
 
 
 
 
 
 
 
 
 
821e62a
d93128e
 
c23e409
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import os
os.environ["STREAMLIT_SERVER_PORT"] = "8501"
os.environ["STREAMLIT_SERVER_HEADLESS"] = "true"

import streamlit as st
from transformers import pipeline
from PyPDF2 import PdfReader
import docx
import time
import psutil
from pathlib import Path
import torch

# Page config with wide layout
st.set_page_config(page_title="LexPilot", layout="wide")

# Sidebar with project info
with st.sidebar:
    st.title("LexPilot™")
    st.markdown(
        """
        LexPilot™ ingests text, PDF, and Word files to instantly analyze contracts.  
        It delivers concise summaries and lets you ask targeted questions—  
        giving fast, precise insights to speed up legal and procurement reviews.
        """
    )
    st.markdown("---")
    st.write("### System Status")
    try:
        device_status = 'GPU ✅' if torch.cuda.is_available() else 'CPU ⚠️'
    except:
        device_status = 'CPU (torch not configured)'
    st.text(f"Device: {device_status}")
    st.text(f"Memory: {psutil.virtual_memory().percent}% used")
    st.text(f"CPU: {psutil.cpu_percent()}% used")

# Setup cache directory for models
def setup_environment():
    cache_dir = Path(".cache/models")
    try:
        cache_dir.mkdir(exist_ok=True, parents=True)
    except Exception as e:
        st.error(f"Failed to create cache directory: {e}")
    return cache_dir

cache_dir = setup_environment()

@st.cache_resource(ttl=3600)
def load_models():
    try:
        qa_model = pipeline(
            "question-answering",
            model="distilbert-base-cased-distilled-squad",
            device=-1
        )
        summarizer_model = pipeline(
            "summarization",
            model="sshleifer/distilbart-cnn-6-6",
            device=-1
        )
        return {'qa': qa_model, 'summarizer': summarizer_model}
    except Exception as e:
        st.error(f"Failed to load models: {e}")
        st.stop()

models = load_models()

def extract_text(file):
    if file is None:
        return ""
    try:
        if file.type == "application/pdf":
            reader = PdfReader(file)
            return " ".join(page.extract_text() for page in reader.pages if page.extract_text())
        elif file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
            doc = docx.Document(file)
            return "\n".join(para.text for para in doc.paragraphs if para.text)
    except Exception as e:
        st.error(f"Error processing document: {e}")
        return ""

def generate_summary(text, max_length=150):
    if not text or len(text.strip()) == 0:
        return ""
    try:
        if len(text) > 10000:
            chunk_size = 3000
            chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
            summaries = []
            for chunk in chunks:
                result = models['summarizer'](
                    chunk,
                    max_length=max(max_length//len(chunks), 30),
                    min_length=30,
                    do_sample=False
                )
                summaries.append(result[0]['summary_text'])
            return " ".join(summaries)
        return models['summarizer'](text, max_length=max_length)[0]['summary_text']
    except Exception as e:
        st.error(f"Summarization failed: {e}")
        return ""

# Main UI title
st.title("📄 LexPilot")

with st.expander("📤 Upload Document", expanded=True):
    uploaded_file = st.file_uploader("Choose PDF/DOCX", type=["pdf", "docx"])
    manual_text = st.text_area("Or paste text here:", height=150)
    context = extract_text(uploaded_file) if uploaded_file else manual_text

tab1, tab2 = st.tabs(["🔍 Question Answering", "📝 Summarization"])

with tab1:
    if context and len(context.strip()) > 0:
        question = st.text_input("Ask about the document:")
        if question and len(question.strip()) > 0:
            with st.spinner("Analyzing..."):
                start_time = time.time()
                try:
                    result = models['qa'](
                        question=question,
                        context=context[:100000]
                    )
                    st.success(f"Answered in {time.time()-start_time:.1f}s")
                    st.markdown(f"**Answer:** {result['answer']}")
                    st.progress(result['score'])
                    st.caption(f"Confidence: {result['score']:.0%}")
                except Exception as e:
                    st.error(f"Question answering failed: {e}")

with tab2:
    if context and len(context.strip()) > 0:
        with st.form("summary_form"):
            length = st.slider("Summary Length", 50, 300, 150)
            if st.form_submit_button("Generate Summary"):
                with st.spinner("Summarizing..."):
                    start_time = time.time()
                    summary = generate_summary(context, length)
                    if summary:
                        st.success(f"Generated in {time.time()-start_time:.1f}s")
                        st.markdown(f"**Summary:**\n\n{summary}")

# Show cache dir path in sidebar (optional)
with st.sidebar:
    st.markdown("---")
    st.write(f"Cache directory: {cache_dir}")