Spaces:

amritn8
/

FINALLL

Sleeping

File size: 5,200 Bytes

import os
os.environ["STREAMLIT_SERVER_PORT"] = "8501"
os.environ["STREAMLIT_SERVER_HEADLESS"] = "true"

import streamlit as st
from transformers import pipeline
from PyPDF2 import PdfReader
import docx
import time
import psutil
from pathlib import Path
import torch

# Page config with wide layout
st.set_page_config(page_title="LexPilot", layout="wide")

# Sidebar with project info
with st.sidebar:
    st.title("LexPilot™")
    st.markdown(
        """
        LexPilot™ ingests text, PDF, and Word files to instantly analyze contracts.  
        It delivers concise summaries and lets you ask targeted questions—  
        giving fast, precise insights to speed up legal and procurement reviews.
        """
    )
    st.markdown("---")
    st.write("### System Status")
    try:
        device_status = 'GPU ✅' if torch.cuda.is_available() else 'CPU ⚠️'
    except:
        device_status = 'CPU (torch not configured)'
    st.text(f"Device: {device_status}")
    st.text(f"Memory: {psutil.virtual_memory().percent}% used")
    st.text(f"CPU: {psutil.cpu_percent()}% used")

# Setup cache directory for models
def setup_environment():
    cache_dir = Path(".cache/models")
    try:
        cache_dir.mkdir(exist_ok=True, parents=True)
    except Exception as e:
        st.error(f"Failed to create cache directory: {e}")
    return cache_dir

cache_dir = setup_environment()

@st.cache_resource(ttl=3600)
def load_models():
    try:
        qa_model = pipeline(
            "question-answering",
            model="distilbert-base-cased-distilled-squad",
            device=-1
        )
        summarizer_model = pipeline(
            "summarization",
            model="sshleifer/distilbart-cnn-6-6",
            device=-1
        )
        return {'qa': qa_model, 'summarizer': summarizer_model}
    except Exception as e:
        st.error(f"Failed to load models: {e}")
        st.stop()

models = load_models()

def extract_text(file):
    if file is None:
        return ""
    try:
        if file.type == "application/pdf":
            reader = PdfReader(file)
            return " ".join(page.extract_text() for page in reader.pages if page.extract_text())
        elif file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
            doc = docx.Document(file)
            return "\n".join(para.text for para in doc.paragraphs if para.text)
    except Exception as e:
        st.error(f"Error processing document: {e}")
        return ""

def generate_summary(text, max_length=150):
    if not text or len(text.strip()) == 0:
        return ""
    try:
        if len(text) > 10000:
            chunk_size = 3000
            chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
            summaries = []
            for chunk in chunks:
                result = models['summarizer'](
                    chunk,
                    max_length=max(max_length//len(chunks), 30),
                    min_length=30,
                    do_sample=False
                )
                summaries.append(result[0]['summary_text'])
            return " ".join(summaries)
        return models['summarizer'](text, max_length=max_length)[0]['summary_text']
    except Exception as e:
        st.error(f"Summarization failed: {e}")
        return ""

# Main UI title
st.title("📄 LexPilot")

with st.expander("📤 Upload Document", expanded=True):
    uploaded_file = st.file_uploader("Choose PDF/DOCX", type=["pdf", "docx"])
    manual_text = st.text_area("Or paste text here:", height=150)
    context = extract_text(uploaded_file) if uploaded_file else manual_text

tab1, tab2 = st.tabs(["🔍 Question Answering", "📝 Summarization"])

with tab1:
    if context and len(context.strip()) > 0:
        question = st.text_input("Ask about the document:")
        if question and len(question.strip()) > 0:
            with st.spinner("Analyzing..."):
                start_time = time.time()
                try:
                    result = models['qa'](
                        question=question,
                        context=context[:100000]
                    )
                    st.success(f"Answered in {time.time()-start_time:.1f}s")
                    st.markdown(f"**Answer:** {result['answer']}")
                    st.progress(result['score'])
                    st.caption(f"Confidence: {result['score']:.0%}")
                except Exception as e:
                    st.error(f"Question answering failed: {e}")

with tab2:
    if context and len(context.strip()) > 0:
        with st.form("summary_form"):
            length = st.slider("Summary Length", 50, 300, 150)
            if st.form_submit_button("Generate Summary"):
                with st.spinner("Summarizing..."):
                    start_time = time.time()
                    summary = generate_summary(context, length)
                    if summary:
                        st.success(f"Generated in {time.time()-start_time:.1f}s")
                        st.markdown(f"**Summary:**\n\n{summary}")

# Show cache dir path in sidebar (optional)
with st.sidebar:
    st.markdown("---")
    st.write(f"Cache directory: {cache_dir}")