import os os.environ["STREAMLIT_SERVER_PORT"] = "8501" os.environ["STREAMLIT_SERVER_HEADLESS"] = "true" import streamlit as st from transformers import pipeline from PyPDF2 import PdfReader import docx import time import psutil from pathlib import Path import torch # Page config with wide layout st.set_page_config(page_title="LexPilot", layout="wide") # Sidebar with project info with st.sidebar: st.title("LexPilot™") st.markdown( """ LexPilot™ ingests text, PDF, and Word files to instantly analyze contracts. It delivers concise summaries and lets you ask targeted questions— giving fast, precise insights to speed up legal and procurement reviews. """ ) st.markdown("---") st.write("### System Status") try: device_status = 'GPU ✅' if torch.cuda.is_available() else 'CPU ⚠️' except: device_status = 'CPU (torch not configured)' st.text(f"Device: {device_status}") st.text(f"Memory: {psutil.virtual_memory().percent}% used") st.text(f"CPU: {psutil.cpu_percent()}% used") # Setup cache directory for models def setup_environment(): cache_dir = Path(".cache/models") try: cache_dir.mkdir(exist_ok=True, parents=True) except Exception as e: st.error(f"Failed to create cache directory: {e}") return cache_dir cache_dir = setup_environment() @st.cache_resource(ttl=3600) def load_models(): try: qa_model = pipeline( "question-answering", model="distilbert-base-cased-distilled-squad", device=-1 ) summarizer_model = pipeline( "summarization", model="sshleifer/distilbart-cnn-6-6", device=-1 ) return {'qa': qa_model, 'summarizer': summarizer_model} except Exception as e: st.error(f"Failed to load models: {e}") st.stop() models = load_models() def extract_text(file): if file is None: return "" try: if file.type == "application/pdf": reader = PdfReader(file) return " ".join(page.extract_text() for page in reader.pages if page.extract_text()) elif file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document": doc = docx.Document(file) return "\n".join(para.text for para in doc.paragraphs if para.text) except Exception as e: st.error(f"Error processing document: {e}") return "" def generate_summary(text, max_length=150): if not text or len(text.strip()) == 0: return "" try: if len(text) > 10000: chunk_size = 3000 chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)] summaries = [] for chunk in chunks: result = models['summarizer']( chunk, max_length=max(max_length//len(chunks), 30), min_length=30, do_sample=False ) summaries.append(result[0]['summary_text']) return " ".join(summaries) return models['summarizer'](text, max_length=max_length)[0]['summary_text'] except Exception as e: st.error(f"Summarization failed: {e}") return "" # Main UI title st.title("📄 LexPilot") with st.expander("📤 Upload Document", expanded=True): uploaded_file = st.file_uploader("Choose PDF/DOCX", type=["pdf", "docx"]) manual_text = st.text_area("Or paste text here:", height=150) context = extract_text(uploaded_file) if uploaded_file else manual_text tab1, tab2 = st.tabs(["🔍 Question Answering", "📝 Summarization"]) with tab1: if context and len(context.strip()) > 0: question = st.text_input("Ask about the document:") if question and len(question.strip()) > 0: with st.spinner("Analyzing..."): start_time = time.time() try: result = models['qa']( question=question, context=context[:100000] ) st.success(f"Answered in {time.time()-start_time:.1f}s") st.markdown(f"**Answer:** {result['answer']}") st.progress(result['score']) st.caption(f"Confidence: {result['score']:.0%}") except Exception as e: st.error(f"Question answering failed: {e}") with tab2: if context and len(context.strip()) > 0: with st.form("summary_form"): length = st.slider("Summary Length", 50, 300, 150) if st.form_submit_button("Generate Summary"): with st.spinner("Summarizing..."): start_time = time.time() summary = generate_summary(context, length) if summary: st.success(f"Generated in {time.time()-start_time:.1f}s") st.markdown(f"**Summary:**\n\n{summary}") # Show cache dir path in sidebar (optional) with st.sidebar: st.markdown("---") st.write(f"Cache directory: {cache_dir}")