Spaces:

amritn8
/

FINALLL

Sleeping

App Files Files Community

FINALLL / app.py

amritn8

Final app.py

c23e409 verified about 2 months ago

raw

history blame contribute delete

5.2 kB

	import os
	os.environ["STREAMLIT_SERVER_PORT"] = "8501"
	os.environ["STREAMLIT_SERVER_HEADLESS"] = "true"

	import streamlit as st
	from transformers import pipeline
	from PyPDF2 import PdfReader
	import docx
	import time
	import psutil
	from pathlib import Path
	import torch

	# Page config with wide layout
	st.set_page_config(page_title="LexPilot", layout="wide")

	# Sidebar with project info
	with st.sidebar:
	st.title("LexPilot™")
	st.markdown(
	"""
	LexPilot™ ingests text, PDF, and Word files to instantly analyze contracts.
	It delivers concise summaries and lets you ask targeted questions—
	giving fast, precise insights to speed up legal and procurement reviews.
	"""
	)
	st.markdown("---")
	st.write("### System Status")
	try:
	device_status = 'GPU ✅' if torch.cuda.is_available() else 'CPU ⚠️'
	except:
	device_status = 'CPU (torch not configured)'
	st.text(f"Device: {device_status}")
	st.text(f"Memory: {psutil.virtual_memory().percent}% used")
	st.text(f"CPU: {psutil.cpu_percent()}% used")

	# Setup cache directory for models
	def setup_environment():
	cache_dir = Path(".cache/models")
	try:
	cache_dir.mkdir(exist_ok=True, parents=True)
	except Exception as e:
	st.error(f"Failed to create cache directory: {e}")
	return cache_dir

	cache_dir = setup_environment()

	@st.cache_resource(ttl=3600)
	def load_models():
	try:
	qa_model = pipeline(
	"question-answering",
	model="distilbert-base-cased-distilled-squad",
	device=-1
	)
	summarizer_model = pipeline(
	"summarization",
	model="sshleifer/distilbart-cnn-6-6",
	device=-1
	)
	return {'qa': qa_model, 'summarizer': summarizer_model}
	except Exception as e:
	st.error(f"Failed to load models: {e}")
	st.stop()

	models = load_models()

	def extract_text(file):
	if file is None:
	return ""
	try:
	if file.type == "application/pdf":
	reader = PdfReader(file)
	return " ".join(page.extract_text() for page in reader.pages if page.extract_text())
	elif file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
	doc = docx.Document(file)
	return "\n".join(para.text for para in doc.paragraphs if para.text)
	except Exception as e:
	st.error(f"Error processing document: {e}")
	return ""

	def generate_summary(text, max_length=150):
	if not text or len(text.strip()) == 0:
	return ""
	try:
	if len(text) > 10000:
	chunk_size = 3000
	chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
	summaries = []
	for chunk in chunks:
	result = models['summarizer'](
	chunk,
	max_length=max(max_length//len(chunks), 30),
	min_length=30,
	do_sample=False
	)
	summaries.append(result[0]['summary_text'])
	return " ".join(summaries)
	return models['summarizer'](text, max_length=max_length)[0]['summary_text']
	except Exception as e:
	st.error(f"Summarization failed: {e}")
	return ""

	# Main UI title
	st.title("📄 LexPilot")

	with st.expander("📤 Upload Document", expanded=True):
	uploaded_file = st.file_uploader("Choose PDF/DOCX", type=["pdf", "docx"])
	manual_text = st.text_area("Or paste text here:", height=150)
	context = extract_text(uploaded_file) if uploaded_file else manual_text

	tab1, tab2 = st.tabs(["🔍 Question Answering", "📝 Summarization"])

	with tab1:
	if context and len(context.strip()) > 0:
	question = st.text_input("Ask about the document:")
	if question and len(question.strip()) > 0:
	with st.spinner("Analyzing..."):
	start_time = time.time()
	try:
	result = models['qa'](
	question=question,
	context=context[:100000]
	)
	st.success(f"Answered in {time.time()-start_time:.1f}s")
	st.markdown(f"Answer: {result['answer']}")
	st.progress(result['score'])
	st.caption(f"Confidence: {result['score']:.0%}")
	except Exception as e:
	st.error(f"Question answering failed: {e}")

	with tab2:
	if context and len(context.strip()) > 0:
	with st.form("summary_form"):
	length = st.slider("Summary Length", 50, 300, 150)
	if st.form_submit_button("Generate Summary"):
	with st.spinner("Summarizing..."):
	start_time = time.time()
	summary = generate_summary(context, length)
	if summary:
	st.success(f"Generated in {time.time()-start_time:.1f}s")
	st.markdown(f"Summary:\n\n{summary}")

	# Show cache dir path in sidebar (optional)
	with st.sidebar:
	st.markdown("---")
	st.write(f"Cache directory: {cache_dir}")