Spaces:

jacob-c
/

resumescreener_v2

Paused

File size: 28,467 Bytes

import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from io import BytesIO
import base64
import os
import re
import warnings
warnings.filterwarnings("ignore")

# ML/NLP imports
try:
    from sentence_transformers import SentenceTransformer, CrossEncoder
    from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
    import torch
    import faiss
    from rank_bm25 import BM25Okapi
    import nltk
    from nltk.tokenize import word_tokenize
    import pdfplumber
    import PyPDF2
    from docx import Document
    from datasets import load_dataset
    ML_IMPORTS_AVAILABLE = True
except ImportError as e:
    st.error(f"Missing required ML libraries: {e}")
    ML_IMPORTS_AVAILABLE = False

# Download NLTK data
try:
    nltk.download('punkt', quiet=True)
    nltk.download('stopwords', quiet=True)
except:
    pass

# Page configuration
st.set_page_config(
    page_title="🤖 AI Resume Screener",
    page_icon="🤖",
    layout="wide",
    initial_sidebar_state="expanded"
)

# Initialize session state
if 'models_loaded' not in st.session_state:
    st.session_state.models_loaded = False
if 'embedding_model' not in st.session_state:
    st.session_state.embedding_model = None
if 'cross_encoder' not in st.session_state:
    st.session_state.cross_encoder = None
if 'llm_tokenizer' not in st.session_state:
    st.session_state.llm_tokenizer = None
if 'llm_model' not in st.session_state:
    st.session_state.llm_model = None
if 'model_errors' not in st.session_state:
    st.session_state.model_errors = {}
if 'resume_texts' not in st.session_state:
    st.session_state.resume_texts = []
if 'resume_filenames' not in st.session_state:
    st.session_state.resume_filenames = []
if 'results' not in st.session_state:
    st.session_state.results = None

def load_models():
    """Load all ML models at startup"""
    if st.session_state.models_loaded:
        return
    
    st.info("🔄 Loading AI models... This may take a few minutes on first run.")
    
    # Load embedding model
    try:
        print("Loading embedding model: BAAI/bge-large-en-v1.5")
        st.text("Loading embedding model...")
        try:
            st.session_state.embedding_model = SentenceTransformer(
                'BAAI/bge-large-en-v1.5', 
                device_map="auto"
            )
        except Exception as e:
            print(f"Device map failed, falling back to default: {e}")
            st.session_state.embedding_model = SentenceTransformer('BAAI/bge-large-en-v1.5')
        print("✅ Embedding model loaded successfully")
    except Exception as e:
        print(f"❌ Error loading embedding model: {e}")
        st.session_state.model_errors['embedding'] = str(e)
    
    # Load cross-encoder
    try:
        print("Loading cross-encoder: cross-encoder/ms-marco-MiniLM-L6-v2")
        st.text("Loading cross-encoder...")
        st.session_state.cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L6-v2')
        print("✅ Cross-encoder loaded successfully")
    except Exception as e:
        print(f"❌ Error loading cross-encoder: {e}")
        st.session_state.model_errors['cross_encoder'] = str(e)
    
    # Load LLM for intent analysis
    try:
        print("Loading LLM: Qwen/Qwen2-1.5B")  # Using smaller model for better compatibility
        st.text("Loading LLM for intent analysis...")
        
        # Quantization config
        bnb_config = BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_use_double_quant=True,
            bnb_4bit_quant_type="nf4",
            bnb_4bit_compute_dtype=torch.bfloat16
        )
        
        st.session_state.llm_tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-1.5B")
        st.session_state.llm_model = AutoModelForCausalLM.from_pretrained(
            "Qwen/Qwen2-1.5B",
            quantization_config=bnb_config,
            device_map="auto",
            trust_remote_code=True
        )
        print("✅ LLM loaded successfully")
    except Exception as e:
        print(f"❌ Error loading LLM: {e}")
        st.session_state.model_errors['llm'] = str(e)
    
    st.session_state.models_loaded = True
    st.success("✅ All models loaded successfully!")

class ResumeScreener:
    def __init__(self):
        self.embedding_model = st.session_state.embedding_model
        self.cross_encoder = st.session_state.cross_encoder
        self.llm_tokenizer = st.session_state.llm_tokenizer
        self.llm_model = st.session_state.llm_model
        
        # Predefined skills list
        self.skills_list = [
            'python', 'java', 'javascript', 'react', 'angular', 'vue', 'node.js',
            'sql', 'mongodb', 'postgresql', 'mysql', 'aws', 'azure', 'gcp',
            'docker', 'kubernetes', 'git', 'machine learning', 'deep learning',
            'tensorflow', 'pytorch', 'scikit-learn', 'pandas', 'numpy',
            'html', 'css', 'bootstrap', 'tailwind', 'api', 'rest', 'graphql',
            'microservices', 'agile', 'scrum', 'devops', 'ci/cd', 'jenkins',
            'linux', 'bash', 'shell scripting', 'data analysis', 'statistics',
            'excel', 'powerbi', 'tableau', 'spark', 'hadoop', 'kafka',
            'redis', 'elasticsearch', 'nginx', 'apache', 'django', 'flask',
            'spring', 'express', 'fastapi', 'laravel', 'php', 'c++', 'c#',
            'go', 'rust', 'scala', 'r', 'matlab', 'sas', 'spss'
        ]
    
    def extract_text_from_file(self, file):
        """Extract text from uploaded files"""
        try:
            if file.type == "application/pdf":
                # Try pdfplumber first
                try:
                    with pdfplumber.open(file) as pdf:
                        text = ""
                        for page in pdf.pages:
                            text += page.extract_text() or ""
                    return text
                except:
                    # Fallback to PyPDF2
                    file.seek(0)
                    reader = PyPDF2.PdfReader(file)
                    text = ""
                    for page in reader.pages:
                        text += page.extract_text()
                    return text
            
            elif file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
                doc = Document(file)
                text = ""
                for paragraph in doc.paragraphs:
                    text += paragraph.text + "\n"
                return text
            
            elif file.type == "text/plain":
                return str(file.read(), "utf-8")
            
            elif file.type == "text/csv":
                df = pd.read_csv(file)
                return df.to_string()
            
            else:
                return "Unsupported file type"
                
        except Exception as e:
            st.warning(f"Error extracting text from {file.name}: {str(e)}")
            return ""
    
    def get_embedding(self, text):
        """Get embedding for text"""
        if not self.embedding_model:
            return None
        
        if not text or len(text.strip()) == 0:
            return np.zeros(1024)  # Default embedding size for BGE
        
        # Truncate if too long
        if len(text) > 8000:
            text = text[:8000]
        
        try:
            embedding = self.embedding_model.encode(text, normalize_embeddings=True)
            return embedding
        except Exception as e:
            st.warning(f"Error getting embedding: {e}")
            return np.zeros(1024)
    
    def calculate_bm25_scores(self, resume_texts, job_description):
        """Calculate BM25 scores"""
        try:
            # Tokenize documents
            tokenized_resumes = [word_tokenize(text.lower()) for text in resume_texts]
            tokenized_job = word_tokenize(job_description.lower())
            
            # Create BM25 object
            bm25 = BM25Okapi(tokenized_resumes)
            
            # Get scores
            scores = bm25.get_scores(tokenized_job)
            return scores
        except Exception as e:
            st.warning(f"Error calculating BM25 scores: {e}")
            return np.zeros(len(resume_texts))
    
    def faiss_recall(self, resume_texts, job_description, top_k=50):
        """FAISS-based recall for top candidates"""
        try:
            if not self.embedding_model:
                return list(range(min(top_k, len(resume_texts))))
            
            # Get embeddings
            resume_embeddings = np.array([self.get_embedding(text) for text in resume_texts])
            job_embedding = self.get_embedding(job_description).reshape(1, -1)
            
            # Build FAISS index
            dimension = resume_embeddings.shape[1]
            index = faiss.IndexFlatIP(dimension)  # Inner product for cosine similarity
            index.add(resume_embeddings.astype('float32'))
            
            # Search
            scores, indices = index.search(job_embedding.astype('float32'), min(top_k, len(resume_texts)))
            
            return indices[0].tolist()
        except Exception as e:
            st.warning(f"Error in FAISS recall: {e}")
            return list(range(min(top_k, len(resume_texts))))
    
    def cross_encoder_rerank(self, resume_texts, job_description, candidate_indices, top_k=20):
        """Re-rank candidates using cross-encoder"""
        try:
            if not self.cross_encoder:
                return candidate_indices[:top_k]
            
            # Prepare pairs for cross-encoder
            pairs = [(job_description, resume_texts[i]) for i in candidate_indices]
            
            # Get scores
            scores = self.cross_encoder.predict(pairs)
            
            # Sort by scores and return top_k
            scored_indices = list(zip(candidate_indices, scores))
            scored_indices.sort(key=lambda x: x[1], reverse=True)
            
            return [idx for idx, _ in scored_indices[:top_k]]
        except Exception as e:
            st.warning(f"Error in cross-encoder reranking: {e}")
            return candidate_indices[:top_k]
    
    def analyze_intent(self, resume_text, job_description):
        """Analyze candidate intent using LLM"""
        try:
            if not self.llm_model or not self.llm_tokenizer:
                return "Maybe", 0.5
            
            prompt = f"""Analyze if this candidate is genuinely interested in this job based on their resume.

Job Description: {job_description[:500]}...

Resume: {resume_text[:1000]}...

Based on the alignment between the candidate's experience and the job requirements, classify their intent as:
- Yes: Strong alignment and genuine interest
- Maybe: Some alignment but unclear intent  
- No: Poor alignment or likely not interested

Intent:"""
            
            inputs = self.llm_tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024)
            
            with torch.no_grad():
                outputs = self.llm_model.generate(
                    **inputs,
                    max_new_tokens=10,
                    temperature=0.1,
                    do_sample=True,
                    pad_token_id=self.llm_tokenizer.eos_token_id
                )
            
            response = self.llm_tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
            
            # Parse response
            if "yes" in response.lower():
                return "Yes", 0.9
            elif "no" in response.lower():
                return "No", 0.1
            else:
                return "Maybe", 0.5
                
        except Exception as e:
            st.warning(f"Error in intent analysis: {e}")
            return "Maybe", 0.5
    
    def extract_skills(self, text, job_description):
        """Extract matching skills from resume"""
        text_lower = text.lower()
        job_lower = job_description.lower()
        
        # Find skills from predefined list
        found_skills = []
        for skill in self.skills_list:
            if skill in text_lower:
                found_skills.append(skill)
        
        # Extract job-specific keywords (simple approach)
        job_words = set(re.findall(r'\b[a-zA-Z]{3,}\b', job_lower))
        text_words = set(re.findall(r'\b[a-zA-Z]{3,}\b', text_lower))
        job_specific = list(job_words.intersection(text_words))[:10]  # Top 10
        
        return {
            'technical_skills': found_skills,
            'job_specific_keywords': job_specific,
            'total_skills': len(found_skills) + len(job_specific)
        }
    
    def add_bm25_scores(self, results_df, resume_texts, job_description):
        """Add BM25 scores to results"""
        bm25_scores = self.calculate_bm25_scores(resume_texts, job_description)
        results_df['bm25_score'] = bm25_scores
        return results_df
    
    def add_intent_scores(self, results_df, resume_texts, job_description):
        """Add intent analysis scores"""
        intent_labels = []
        intent_scores = []
        
        progress_bar = st.progress(0)
        for i, text in enumerate(resume_texts):
            label, score = self.analyze_intent(text, job_description)
            intent_labels.append(label)
            intent_scores.append(score)
            progress_bar.progress((i + 1) / len(resume_texts))
        
        results_df['intent_label'] = intent_labels
        results_df['intent_score'] = intent_scores
        return results_df
    
    def calculate_final_scores(self, results_df):
        """Calculate final weighted scores"""
        # Normalize scores to 0-1 range
        if 'cross_encoder_score' in results_df.columns:
            ce_scores = (results_df['cross_encoder_score'] - results_df['cross_encoder_score'].min()) / \
                       (results_df['cross_encoder_score'].max() - results_df['cross_encoder_score'].min() + 1e-8)
        else:
            ce_scores = np.zeros(len(results_df))
        
        if 'bm25_score' in results_df.columns:
            bm25_scores = (results_df['bm25_score'] - results_df['bm25_score'].min()) / \
                         (results_df['bm25_score'].max() - results_df['bm25_score'].min() + 1e-8)
        else:
            bm25_scores = np.zeros(len(results_df))
        
        intent_scores = results_df.get('intent_score', np.ones(len(results_df)) * 0.5)
        
        # Weighted combination
        final_scores = 0.5 * ce_scores + 0.3 * bm25_scores + 0.2 * intent_scores
        results_df['final_score'] = final_scores
        
        return results_df.sort_values('final_score', ascending=False)
    
    def advanced_pipeline_ranking(self, resume_texts, resume_filenames, job_description):
        """Run the complete advanced pipeline"""
        st.info("🚀 Starting advanced pipeline ranking...")
        
        # Stage 1: FAISS Recall
        st.text("Stage 1: FAISS-based recall (top 50 candidates)")
        top_50_indices = self.faiss_recall(resume_texts, job_description, top_k=50)
        
        # Stage 2: Cross-encoder reranking
        st.text("Stage 2: Cross-encoder reranking (top 20 candidates)")
        top_20_indices = self.cross_encoder_rerank(resume_texts, job_description, top_50_indices, top_k=20)
        
        # Create results dataframe
        results_df = pd.DataFrame({
            'rank': range(1, len(top_20_indices) + 1),
            'filename': [resume_filenames[i] for i in top_20_indices],
            'resume_index': top_20_indices
        })
        
        # Stage 3: Add cross-encoder scores
        st.text("Stage 3: Adding detailed cross-encoder scores")
        if self.cross_encoder:
            pairs = [(job_description, resume_texts[i]) for i in top_20_indices]
            ce_scores = self.cross_encoder.predict(pairs)
            results_df['cross_encoder_score'] = ce_scores
        
        # Stage 4: Add BM25 scores
        st.text("Stage 4: Adding BM25 scores")
        top_20_texts = [resume_texts[i] for i in top_20_indices]
        results_df = self.add_bm25_scores(results_df, top_20_texts, job_description)
        
        # Stage 5: Add intent analysis
        st.text("Stage 5: Analyzing candidate intent")
        results_df = self.add_intent_scores(results_df, top_20_texts, job_description)
        
        # Calculate final scores
        st.text("Calculating final weighted scores...")
        results_df = self.calculate_final_scores(results_df)
        
        # Add skills analysis
        st.text("Extracting skills and keywords...")
        skills_data = []
        for i in top_20_indices:
            skills = self.extract_skills(resume_texts[i], job_description)
            skills_data.append({
                'top_skills': ', '.join(skills['technical_skills'][:5]),
                'job_keywords': ', '.join(skills['job_specific_keywords'][:5]),
                'total_skills_count': skills['total_skills']
            })
        
        skills_df = pd.DataFrame(skills_data)
        results_df = pd.concat([results_df, skills_df], axis=1)
        
        st.success("✅ Pipeline completed successfully!")
        return results_df

# Load models on startup
if ML_IMPORTS_AVAILABLE and not st.session_state.models_loaded:
    load_models()

# Initialize screener
if ML_IMPORTS_AVAILABLE and st.session_state.models_loaded:
    screener = ResumeScreener()

# Sidebar
with st.sidebar:
    st.title("🤖 AI Resume Screener")
    st.markdown("---")
    
    st.subheader("📋 Pipeline Stages")
    st.markdown("""
    1. **FAISS Recall**: Semantic similarity search (top 50)
    2. **Cross-Encoder**: Deep reranking (top 20)
    3. **BM25 Scoring**: Keyword-based relevance
    4. **Intent Analysis**: AI-powered candidate intent
    5. **Final Ranking**: Weighted score combination
    """)
    
    st.subheader("🧠 AI Models")
    if st.session_state.models_loaded:
        st.success("✅ Embedding: BGE-Large-EN")
        st.success("✅ Cross-Encoder: MS-Marco-MiniLM")
        st.success("✅ LLM: Qwen2-1.5B")
    else:
        st.warning("⏳ Models loading...")
    
    if st.session_state.model_errors:
        st.error("❌ Model Errors:")
        for model, error in st.session_state.model_errors.items():
            st.text(f"{model}: {error[:100]}...")
    
    st.subheader("📊 Scoring Formula")
    st.markdown("""
    **Final Score = 0.5 × Cross-Encoder + 0.3 × BM25 + 0.2 × Intent**
    
    - Cross-Encoder: Deep semantic matching
    - BM25: Keyword relevance
    - Intent: Candidate interest level
    """)

# Main content
st.title("🤖 AI Resume Screener")
st.markdown("Automatically rank candidate resumes against job descriptions using advanced AI")

# Step 1: Job Description Input
st.header("📝 Step 1: Job Description")
job_description = st.text_area(
    "Enter the job description:",
    height=200,
    placeholder="Paste the complete job description here..."
)

# Step 2: Resume Upload
st.header("📄 Step 2: Load Resumes")

upload_option = st.radio(
    "Choose how to load resumes:",
    ["Upload Files", "Upload CSV", "Load from Hugging Face Dataset"]
)

if upload_option == "Upload Files":
    uploaded_files = st.file_uploader(
        "Upload resume files",
        type=['pdf', 'docx', 'txt'],
        accept_multiple_files=True
    )
    
    if uploaded_files and st.button("Process Uploaded Files"):
        with st.spinner("Processing files..."):
            texts = []
            filenames = []
            
            for file in uploaded_files:
                if ML_IMPORTS_AVAILABLE and st.session_state.models_loaded:
                    text = screener.extract_text_from_file(file)
                    if text:
                        texts.append(text)
                        filenames.append(file.name)
                else:
                    st.error("Models not loaded. Cannot process files.")
                    break
            
            st.session_state.resume_texts = texts
            st.session_state.resume_filenames = filenames
            st.success(f"✅ Processed {len(texts)} resumes")

elif upload_option == "Upload CSV":
    csv_file = st.file_uploader("Upload CSV with resume texts", type=['csv'])
    
    if csv_file:
        df = pd.read_csv(csv_file)
        st.write("CSV Preview:", df.head())
        
        text_column = st.selectbox("Select text column:", df.columns)
        name_column = st.selectbox("Select name/ID column:", df.columns)
        
        if st.button("Load from CSV"):
            st.session_state.resume_texts = df[text_column].fillna("").tolist()
            st.session_state.resume_filenames = df[name_column].fillna("Unknown").tolist()
            st.success(f"✅ Loaded {len(st.session_state.resume_texts)} resumes from CSV")

elif upload_option == "Load from Hugging Face Dataset":
    dataset_name = st.text_input("Dataset name:", "resume-dataset/resume-screening")
    
    if st.button("Load Dataset"):
        try:
            with st.spinner("Loading dataset..."):
                dataset = load_dataset(dataset_name, split="train")
                
                # Try to identify text and name columns
                columns = dataset.column_names
                text_col = st.selectbox("Select text column:", columns)
                name_col = st.selectbox("Select name/ID column:", columns)
                
                if text_col and name_col:
                    st.session_state.resume_texts = dataset[text_col][:100]  # Limit to 100
                    st.session_state.resume_filenames = [f"Resume_{i}" for i in range(len(st.session_state.resume_texts))]
                    st.success(f"✅ Loaded {len(st.session_state.resume_texts)} resumes from dataset")
        except Exception as e:
            st.error(f"Error loading dataset: {e}")

# Display current resume count
if st.session_state.resume_texts:
    st.info(f"📊 Currently loaded: {len(st.session_state.resume_texts)} resumes")

# Step 3: Run Pipeline
st.header("🚀 Step 3: Run Advanced Pipeline")

can_run = (
    ML_IMPORTS_AVAILABLE and 
    st.session_state.models_loaded and 
    job_description.strip() and 
    st.session_state.resume_texts
)

if st.button("🎯 Run Advanced Ranking Pipeline", disabled=not can_run):
    if not can_run:
        if not ML_IMPORTS_AVAILABLE:
            st.error("❌ ML libraries not available")
        elif not st.session_state.models_loaded:
            st.error("❌ Models not loaded")
        elif not job_description.strip():
            st.error("❌ Please enter a job description")
        elif not st.session_state.resume_texts:
            st.error("❌ Please load some resumes")
    else:
        with st.spinner("Running advanced pipeline..."):
            results = screener.advanced_pipeline_ranking(
                st.session_state.resume_texts,
                st.session_state.resume_filenames,
                job_description
            )
            st.session_state.results = results

# Display Results
if st.session_state.results is not None:
    st.header("📊 Results")
    
    # Create tabs for different views
    tab1, tab2, tab3 = st.tabs(["📋 Summary", "🔍 Detailed Analysis", "📈 Visualizations"])
    
    with tab1:
        st.subheader("Top Ranked Candidates")
        
        # Style the dataframe
        display_df = st.session_state.results[['rank', 'filename', 'final_score', 'cross_encoder_score', 
                                              'bm25_score', 'intent_score', 'intent_label', 'top_skills']].copy()
        display_df['final_score'] = display_df['final_score'].round(3)
        display_df['cross_encoder_score'] = display_df['cross_encoder_score'].round(3)
        display_df['bm25_score'] = display_df['bm25_score'].round(3)
        display_df['intent_score'] = display_df['intent_score'].round(3)
        
        st.dataframe(display_df, use_container_width=True)
        
        # Download link
        csv = display_df.to_csv(index=False)
        b64 = base64.b64encode(csv.encode()).decode()
        href = f'<a href="data:file/csv;base64,{b64}" download="resume_rankings.csv">📥 Download Results as CSV</a>'
        st.markdown(href, unsafe_allow_html=True)
    
    with tab2:
        st.subheader("Detailed Candidate Analysis")
        
        for idx, row in st.session_state.results.iterrows():
            with st.expander(f"#{row['rank']} - {row['filename']} (Score: {row['final_score']:.3f})"):
                col1, col2 = st.columns(2)
                
                with col1:
                    st.metric("Final Score", f"{row['final_score']:.3f}")
                    st.metric("Cross-Encoder", f"{row['cross_encoder_score']:.3f}")
                    st.metric("BM25 Score", f"{row['bm25_score']:.3f}")
                
                with col2:
                    st.metric("Intent Score", f"{row['intent_score']:.3f}")
                    st.metric("Intent Label", row['intent_label'])
                    st.metric("Skills Count", row['total_skills_count'])
                
                st.write("**Top Skills:**", row['top_skills'])
                st.write("**Job Keywords:**", row['job_keywords'])
                
                # Show resume excerpt
                resume_text = st.session_state.resume_texts[row['resume_index']]
                st.text_area("Resume Excerpt:", resume_text[:500] + "...", height=100, key=f"excerpt_{idx}")
    
    with tab3:
        st.subheader("Score Visualizations")
        
        # Score distribution
        fig1 = px.bar(
            st.session_state.results.head(10), 
            x='filename', 
            y='final_score',
            title="Top 10 Candidates - Final Scores",
            color='final_score',
            color_continuous_scale='viridis'
        )
        fig1.update_xaxis(tickangle=45)
        st.plotly_chart(fig1, use_container_width=True)
        
        # Score breakdown
        score_cols = ['cross_encoder_score', 'bm25_score', 'intent_score']
        fig2 = go.Figure()
        
        for i, col in enumerate(score_cols):
            fig2.add_trace(go.Bar(
                name=col.replace('_', ' ').title(),
                x=st.session_state.results['filename'].head(10),
                y=st.session_state.results[col].head(10)
            ))
        
        fig2.update_layout(
            title="Score Breakdown - Top 10 Candidates",
            barmode='group',
            xaxis_tickangle=45
        )
        st.plotly_chart(fig2, use_container_width=True)
        
        # Intent distribution
        intent_counts = st.session_state.results['intent_label'].value_counts()
        fig3 = px.pie(
            values=intent_counts.values,
            names=intent_counts.index,
            title="Candidate Intent Distribution"
        )
        st.plotly_chart(fig3, use_container_width=True)
        
        # Average metrics
        col1, col2, col3, col4 = st.columns(4)
        with col1:
            st.metric("Avg Final Score", f"{st.session_state.results['final_score'].mean():.3f}")
        with col2:
            st.metric("Avg Cross-Encoder", f"{st.session_state.results['cross_encoder_score'].mean():.3f}")
        with col3:
            st.metric("Avg BM25", f"{st.session_state.results['bm25_score'].mean():.3f}")
        with col4:
            st.metric("Avg Intent", f"{st.session_state.results['intent_score'].mean():.3f}")

# Cleanup Controls
st.header("🧹 Cleanup")
col1, col2 = st.columns(2)

with col1:
    if st.button("Clear Resumes Only"):
        st.session_state.resume_texts = []
        st.session_state.resume_filenames = []
        st.session_state.results = None
        st.success("✅ Resumes cleared")

with col2:
    if st.button("Reset Entire App"):
        # Clear all session state
        for key in list(st.session_state.keys()):
            del st.session_state[key]
        
        # Free GPU memory
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
        
        st.success("✅ App reset complete")
        st.experimental_rerun()

# Footer
st.markdown("---")
st.markdown(
    """
    <div style='text-align: center; color: #666; font-size: 0.8em;'>
    🤖 Powered by BGE-Large-EN, MS-Marco-MiniLM, Qwen2-1.5B | Built with Streamlit
    </div>
    """, 
    unsafe_allow_html=True
)