Spaces:
Paused
Paused
import streamlit as st | |
import pandas as pd | |
import numpy as np | |
import plotly.express as px | |
import plotly.graph_objects as go | |
from io import BytesIO | |
import base64 | |
import os | |
import re | |
import warnings | |
warnings.filterwarnings("ignore") | |
# ML/NLP imports | |
try: | |
from sentence_transformers import SentenceTransformer, CrossEncoder | |
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig | |
import torch | |
import faiss | |
from rank_bm25 import BM25Okapi | |
import nltk | |
from nltk.tokenize import word_tokenize | |
import pdfplumber | |
import PyPDF2 | |
from docx import Document | |
from datasets import load_dataset | |
ML_IMPORTS_AVAILABLE = True | |
except ImportError as e: | |
st.error(f"Missing required ML libraries: {e}") | |
ML_IMPORTS_AVAILABLE = False | |
# Download NLTK data | |
try: | |
nltk.download('punkt', quiet=True) | |
nltk.download('stopwords', quiet=True) | |
except: | |
pass | |
# Page configuration | |
st.set_page_config( | |
page_title="π€ AI Resume Screener", | |
page_icon="π€", | |
layout="wide", | |
initial_sidebar_state="expanded" | |
) | |
# Initialize session state | |
if 'models_loaded' not in st.session_state: | |
st.session_state.models_loaded = False | |
if 'embedding_model' not in st.session_state: | |
st.session_state.embedding_model = None | |
if 'cross_encoder' not in st.session_state: | |
st.session_state.cross_encoder = None | |
if 'llm_tokenizer' not in st.session_state: | |
st.session_state.llm_tokenizer = None | |
if 'llm_model' not in st.session_state: | |
st.session_state.llm_model = None | |
if 'model_errors' not in st.session_state: | |
st.session_state.model_errors = {} | |
if 'resume_texts' not in st.session_state: | |
st.session_state.resume_texts = [] | |
if 'resume_filenames' not in st.session_state: | |
st.session_state.resume_filenames = [] | |
if 'results' not in st.session_state: | |
st.session_state.results = None | |
def load_models(): | |
"""Load all ML models at startup""" | |
if st.session_state.models_loaded: | |
return | |
st.info("π Loading AI models... This may take a few minutes on first run.") | |
# Load embedding model | |
try: | |
print("Loading embedding model: BAAI/bge-large-en-v1.5") | |
st.text("Loading embedding model...") | |
try: | |
st.session_state.embedding_model = SentenceTransformer( | |
'BAAI/bge-large-en-v1.5', | |
device_map="auto" | |
) | |
except Exception as e: | |
print(f"Device map failed, falling back to default: {e}") | |
st.session_state.embedding_model = SentenceTransformer('BAAI/bge-large-en-v1.5') | |
print("β Embedding model loaded successfully") | |
except Exception as e: | |
print(f"β Error loading embedding model: {e}") | |
st.session_state.model_errors['embedding'] = str(e) | |
# Load cross-encoder | |
try: | |
print("Loading cross-encoder: cross-encoder/ms-marco-MiniLM-L6-v2") | |
st.text("Loading cross-encoder...") | |
st.session_state.cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L6-v2') | |
print("β Cross-encoder loaded successfully") | |
except Exception as e: | |
print(f"β Error loading cross-encoder: {e}") | |
st.session_state.model_errors['cross_encoder'] = str(e) | |
# Load LLM for intent analysis | |
try: | |
print("Loading LLM: Qwen/Qwen2-1.5B") # Using smaller model for better compatibility | |
st.text("Loading LLM for intent analysis...") | |
# Quantization config | |
bnb_config = BitsAndBytesConfig( | |
load_in_4bit=True, | |
bnb_4bit_use_double_quant=True, | |
bnb_4bit_quant_type="nf4", | |
bnb_4bit_compute_dtype=torch.bfloat16 | |
) | |
st.session_state.llm_tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-1.5B") | |
st.session_state.llm_model = AutoModelForCausalLM.from_pretrained( | |
"Qwen/Qwen2-1.5B", | |
quantization_config=bnb_config, | |
device_map="auto", | |
trust_remote_code=True | |
) | |
print("β LLM loaded successfully") | |
except Exception as e: | |
print(f"β Error loading LLM: {e}") | |
st.session_state.model_errors['llm'] = str(e) | |
st.session_state.models_loaded = True | |
st.success("β All models loaded successfully!") | |
class ResumeScreener: | |
def __init__(self): | |
self.embedding_model = st.session_state.embedding_model | |
self.cross_encoder = st.session_state.cross_encoder | |
self.llm_tokenizer = st.session_state.llm_tokenizer | |
self.llm_model = st.session_state.llm_model | |
# Predefined skills list | |
self.skills_list = [ | |
'python', 'java', 'javascript', 'react', 'angular', 'vue', 'node.js', | |
'sql', 'mongodb', 'postgresql', 'mysql', 'aws', 'azure', 'gcp', | |
'docker', 'kubernetes', 'git', 'machine learning', 'deep learning', | |
'tensorflow', 'pytorch', 'scikit-learn', 'pandas', 'numpy', | |
'html', 'css', 'bootstrap', 'tailwind', 'api', 'rest', 'graphql', | |
'microservices', 'agile', 'scrum', 'devops', 'ci/cd', 'jenkins', | |
'linux', 'bash', 'shell scripting', 'data analysis', 'statistics', | |
'excel', 'powerbi', 'tableau', 'spark', 'hadoop', 'kafka', | |
'redis', 'elasticsearch', 'nginx', 'apache', 'django', 'flask', | |
'spring', 'express', 'fastapi', 'laravel', 'php', 'c++', 'c#', | |
'go', 'rust', 'scala', 'r', 'matlab', 'sas', 'spss' | |
] | |
def extract_text_from_file(self, file): | |
"""Extract text from uploaded files""" | |
try: | |
if file.type == "application/pdf": | |
# Try pdfplumber first | |
try: | |
with pdfplumber.open(file) as pdf: | |
text = "" | |
for page in pdf.pages: | |
text += page.extract_text() or "" | |
return text | |
except: | |
# Fallback to PyPDF2 | |
file.seek(0) | |
reader = PyPDF2.PdfReader(file) | |
text = "" | |
for page in reader.pages: | |
text += page.extract_text() | |
return text | |
elif file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document": | |
doc = Document(file) | |
text = "" | |
for paragraph in doc.paragraphs: | |
text += paragraph.text + "\n" | |
return text | |
elif file.type == "text/plain": | |
return str(file.read(), "utf-8") | |
elif file.type == "text/csv": | |
df = pd.read_csv(file) | |
return df.to_string() | |
else: | |
return "Unsupported file type" | |
except Exception as e: | |
st.warning(f"Error extracting text from {file.name}: {str(e)}") | |
return "" | |
def get_embedding(self, text): | |
"""Get embedding for text""" | |
if not self.embedding_model: | |
return None | |
if not text or len(text.strip()) == 0: | |
return np.zeros(1024) # Default embedding size for BGE | |
# Truncate if too long | |
if len(text) > 8000: | |
text = text[:8000] | |
try: | |
embedding = self.embedding_model.encode(text, normalize_embeddings=True) | |
return embedding | |
except Exception as e: | |
st.warning(f"Error getting embedding: {e}") | |
return np.zeros(1024) | |
def calculate_bm25_scores(self, resume_texts, job_description): | |
"""Calculate BM25 scores""" | |
try: | |
# Tokenize documents | |
tokenized_resumes = [word_tokenize(text.lower()) for text in resume_texts] | |
tokenized_job = word_tokenize(job_description.lower()) | |
# Create BM25 object | |
bm25 = BM25Okapi(tokenized_resumes) | |
# Get scores | |
scores = bm25.get_scores(tokenized_job) | |
return scores | |
except Exception as e: | |
st.warning(f"Error calculating BM25 scores: {e}") | |
return np.zeros(len(resume_texts)) | |
def faiss_recall(self, resume_texts, job_description, top_k=50): | |
"""FAISS-based recall for top candidates""" | |
try: | |
if not self.embedding_model: | |
return list(range(min(top_k, len(resume_texts)))) | |
# Get embeddings | |
resume_embeddings = np.array([self.get_embedding(text) for text in resume_texts]) | |
job_embedding = self.get_embedding(job_description).reshape(1, -1) | |
# Build FAISS index | |
dimension = resume_embeddings.shape[1] | |
index = faiss.IndexFlatIP(dimension) # Inner product for cosine similarity | |
index.add(resume_embeddings.astype('float32')) | |
# Search | |
scores, indices = index.search(job_embedding.astype('float32'), min(top_k, len(resume_texts))) | |
return indices[0].tolist() | |
except Exception as e: | |
st.warning(f"Error in FAISS recall: {e}") | |
return list(range(min(top_k, len(resume_texts)))) | |
def cross_encoder_rerank(self, resume_texts, job_description, candidate_indices, top_k=20): | |
"""Re-rank candidates using cross-encoder""" | |
try: | |
if not self.cross_encoder: | |
return candidate_indices[:top_k] | |
# Prepare pairs for cross-encoder | |
pairs = [(job_description, resume_texts[i]) for i in candidate_indices] | |
# Get scores | |
scores = self.cross_encoder.predict(pairs) | |
# Sort by scores and return top_k | |
scored_indices = list(zip(candidate_indices, scores)) | |
scored_indices.sort(key=lambda x: x[1], reverse=True) | |
return [idx for idx, _ in scored_indices[:top_k]] | |
except Exception as e: | |
st.warning(f"Error in cross-encoder reranking: {e}") | |
return candidate_indices[:top_k] | |
def analyze_intent(self, resume_text, job_description): | |
"""Analyze candidate intent using LLM""" | |
try: | |
if not self.llm_model or not self.llm_tokenizer: | |
return "Maybe", 0.5 | |
prompt = f"""Analyze if this candidate is genuinely interested in this job based on their resume. | |
Job Description: {job_description[:500]}... | |
Resume: {resume_text[:1000]}... | |
Based on the alignment between the candidate's experience and the job requirements, classify their intent as: | |
- Yes: Strong alignment and genuine interest | |
- Maybe: Some alignment but unclear intent | |
- No: Poor alignment or likely not interested | |
Intent:""" | |
inputs = self.llm_tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024) | |
with torch.no_grad(): | |
outputs = self.llm_model.generate( | |
**inputs, | |
max_new_tokens=10, | |
temperature=0.1, | |
do_sample=True, | |
pad_token_id=self.llm_tokenizer.eos_token_id | |
) | |
response = self.llm_tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True) | |
# Parse response | |
if "yes" in response.lower(): | |
return "Yes", 0.9 | |
elif "no" in response.lower(): | |
return "No", 0.1 | |
else: | |
return "Maybe", 0.5 | |
except Exception as e: | |
st.warning(f"Error in intent analysis: {e}") | |
return "Maybe", 0.5 | |
def extract_skills(self, text, job_description): | |
"""Extract matching skills from resume""" | |
text_lower = text.lower() | |
job_lower = job_description.lower() | |
# Find skills from predefined list | |
found_skills = [] | |
for skill in self.skills_list: | |
if skill in text_lower: | |
found_skills.append(skill) | |
# Extract job-specific keywords (simple approach) | |
job_words = set(re.findall(r'\b[a-zA-Z]{3,}\b', job_lower)) | |
text_words = set(re.findall(r'\b[a-zA-Z]{3,}\b', text_lower)) | |
job_specific = list(job_words.intersection(text_words))[:10] # Top 10 | |
return { | |
'technical_skills': found_skills, | |
'job_specific_keywords': job_specific, | |
'total_skills': len(found_skills) + len(job_specific) | |
} | |
def add_bm25_scores(self, results_df, resume_texts, job_description): | |
"""Add BM25 scores to results""" | |
bm25_scores = self.calculate_bm25_scores(resume_texts, job_description) | |
results_df['bm25_score'] = bm25_scores | |
return results_df | |
def add_intent_scores(self, results_df, resume_texts, job_description): | |
"""Add intent analysis scores""" | |
intent_labels = [] | |
intent_scores = [] | |
progress_bar = st.progress(0) | |
for i, text in enumerate(resume_texts): | |
label, score = self.analyze_intent(text, job_description) | |
intent_labels.append(label) | |
intent_scores.append(score) | |
progress_bar.progress((i + 1) / len(resume_texts)) | |
results_df['intent_label'] = intent_labels | |
results_df['intent_score'] = intent_scores | |
return results_df | |
def calculate_final_scores(self, results_df): | |
"""Calculate final weighted scores""" | |
# Normalize scores to 0-1 range | |
if 'cross_encoder_score' in results_df.columns: | |
ce_scores = (results_df['cross_encoder_score'] - results_df['cross_encoder_score'].min()) / \ | |
(results_df['cross_encoder_score'].max() - results_df['cross_encoder_score'].min() + 1e-8) | |
else: | |
ce_scores = np.zeros(len(results_df)) | |
if 'bm25_score' in results_df.columns: | |
bm25_scores = (results_df['bm25_score'] - results_df['bm25_score'].min()) / \ | |
(results_df['bm25_score'].max() - results_df['bm25_score'].min() + 1e-8) | |
else: | |
bm25_scores = np.zeros(len(results_df)) | |
intent_scores = results_df.get('intent_score', np.ones(len(results_df)) * 0.5) | |
# Weighted combination | |
final_scores = 0.5 * ce_scores + 0.3 * bm25_scores + 0.2 * intent_scores | |
results_df['final_score'] = final_scores | |
return results_df.sort_values('final_score', ascending=False) | |
def advanced_pipeline_ranking(self, resume_texts, resume_filenames, job_description): | |
"""Run the complete advanced pipeline""" | |
st.info("π Starting advanced pipeline ranking...") | |
# Stage 1: FAISS Recall | |
st.text("Stage 1: FAISS-based recall (top 50 candidates)") | |
top_50_indices = self.faiss_recall(resume_texts, job_description, top_k=50) | |
# Stage 2: Cross-encoder reranking | |
st.text("Stage 2: Cross-encoder reranking (top 20 candidates)") | |
top_20_indices = self.cross_encoder_rerank(resume_texts, job_description, top_50_indices, top_k=20) | |
# Create results dataframe | |
results_df = pd.DataFrame({ | |
'rank': range(1, len(top_20_indices) + 1), | |
'filename': [resume_filenames[i] for i in top_20_indices], | |
'resume_index': top_20_indices | |
}) | |
# Stage 3: Add cross-encoder scores | |
st.text("Stage 3: Adding detailed cross-encoder scores") | |
if self.cross_encoder: | |
pairs = [(job_description, resume_texts[i]) for i in top_20_indices] | |
ce_scores = self.cross_encoder.predict(pairs) | |
results_df['cross_encoder_score'] = ce_scores | |
# Stage 4: Add BM25 scores | |
st.text("Stage 4: Adding BM25 scores") | |
top_20_texts = [resume_texts[i] for i in top_20_indices] | |
results_df = self.add_bm25_scores(results_df, top_20_texts, job_description) | |
# Stage 5: Add intent analysis | |
st.text("Stage 5: Analyzing candidate intent") | |
results_df = self.add_intent_scores(results_df, top_20_texts, job_description) | |
# Calculate final scores | |
st.text("Calculating final weighted scores...") | |
results_df = self.calculate_final_scores(results_df) | |
# Add skills analysis | |
st.text("Extracting skills and keywords...") | |
skills_data = [] | |
for i in top_20_indices: | |
skills = self.extract_skills(resume_texts[i], job_description) | |
skills_data.append({ | |
'top_skills': ', '.join(skills['technical_skills'][:5]), | |
'job_keywords': ', '.join(skills['job_specific_keywords'][:5]), | |
'total_skills_count': skills['total_skills'] | |
}) | |
skills_df = pd.DataFrame(skills_data) | |
results_df = pd.concat([results_df, skills_df], axis=1) | |
st.success("β Pipeline completed successfully!") | |
return results_df | |
# Load models on startup | |
if ML_IMPORTS_AVAILABLE and not st.session_state.models_loaded: | |
load_models() | |
# Initialize screener | |
if ML_IMPORTS_AVAILABLE and st.session_state.models_loaded: | |
screener = ResumeScreener() | |
# Sidebar | |
with st.sidebar: | |
st.title("π€ AI Resume Screener") | |
st.markdown("---") | |
st.subheader("π Pipeline Stages") | |
st.markdown(""" | |
1. **FAISS Recall**: Semantic similarity search (top 50) | |
2. **Cross-Encoder**: Deep reranking (top 20) | |
3. **BM25 Scoring**: Keyword-based relevance | |
4. **Intent Analysis**: AI-powered candidate intent | |
5. **Final Ranking**: Weighted score combination | |
""") | |
st.subheader("π§ AI Models") | |
if st.session_state.models_loaded: | |
st.success("β Embedding: BGE-Large-EN") | |
st.success("β Cross-Encoder: MS-Marco-MiniLM") | |
st.success("β LLM: Qwen2-1.5B") | |
else: | |
st.warning("β³ Models loading...") | |
if st.session_state.model_errors: | |
st.error("β Model Errors:") | |
for model, error in st.session_state.model_errors.items(): | |
st.text(f"{model}: {error[:100]}...") | |
st.subheader("π Scoring Formula") | |
st.markdown(""" | |
**Final Score = 0.5 Γ Cross-Encoder + 0.3 Γ BM25 + 0.2 Γ Intent** | |
- Cross-Encoder: Deep semantic matching | |
- BM25: Keyword relevance | |
- Intent: Candidate interest level | |
""") | |
# Main content | |
st.title("π€ AI Resume Screener") | |
st.markdown("Automatically rank candidate resumes against job descriptions using advanced AI") | |
# Step 1: Job Description Input | |
st.header("π Step 1: Job Description") | |
job_description = st.text_area( | |
"Enter the job description:", | |
height=200, | |
placeholder="Paste the complete job description here..." | |
) | |
# Step 2: Resume Upload | |
st.header("π Step 2: Load Resumes") | |
upload_option = st.radio( | |
"Choose how to load resumes:", | |
["Upload Files", "Upload CSV", "Load from Hugging Face Dataset"] | |
) | |
if upload_option == "Upload Files": | |
uploaded_files = st.file_uploader( | |
"Upload resume files", | |
type=['pdf', 'docx', 'txt'], | |
accept_multiple_files=True | |
) | |
if uploaded_files and st.button("Process Uploaded Files"): | |
with st.spinner("Processing files..."): | |
texts = [] | |
filenames = [] | |
for file in uploaded_files: | |
if ML_IMPORTS_AVAILABLE and st.session_state.models_loaded: | |
text = screener.extract_text_from_file(file) | |
if text: | |
texts.append(text) | |
filenames.append(file.name) | |
else: | |
st.error("Models not loaded. Cannot process files.") | |
break | |
st.session_state.resume_texts = texts | |
st.session_state.resume_filenames = filenames | |
st.success(f"β Processed {len(texts)} resumes") | |
elif upload_option == "Upload CSV": | |
csv_file = st.file_uploader("Upload CSV with resume texts", type=['csv']) | |
if csv_file: | |
df = pd.read_csv(csv_file) | |
st.write("CSV Preview:", df.head()) | |
text_column = st.selectbox("Select text column:", df.columns) | |
name_column = st.selectbox("Select name/ID column:", df.columns) | |
if st.button("Load from CSV"): | |
st.session_state.resume_texts = df[text_column].fillna("").tolist() | |
st.session_state.resume_filenames = df[name_column].fillna("Unknown").tolist() | |
st.success(f"β Loaded {len(st.session_state.resume_texts)} resumes from CSV") | |
elif upload_option == "Load from Hugging Face Dataset": | |
dataset_name = st.text_input("Dataset name:", "resume-dataset/resume-screening") | |
if st.button("Load Dataset"): | |
try: | |
with st.spinner("Loading dataset..."): | |
dataset = load_dataset(dataset_name, split="train") | |
# Try to identify text and name columns | |
columns = dataset.column_names | |
text_col = st.selectbox("Select text column:", columns) | |
name_col = st.selectbox("Select name/ID column:", columns) | |
if text_col and name_col: | |
st.session_state.resume_texts = dataset[text_col][:100] # Limit to 100 | |
st.session_state.resume_filenames = [f"Resume_{i}" for i in range(len(st.session_state.resume_texts))] | |
st.success(f"β Loaded {len(st.session_state.resume_texts)} resumes from dataset") | |
except Exception as e: | |
st.error(f"Error loading dataset: {e}") | |
# Display current resume count | |
if st.session_state.resume_texts: | |
st.info(f"π Currently loaded: {len(st.session_state.resume_texts)} resumes") | |
# Step 3: Run Pipeline | |
st.header("π Step 3: Run Advanced Pipeline") | |
can_run = ( | |
ML_IMPORTS_AVAILABLE and | |
st.session_state.models_loaded and | |
job_description.strip() and | |
st.session_state.resume_texts | |
) | |
if st.button("π― Run Advanced Ranking Pipeline", disabled=not can_run): | |
if not can_run: | |
if not ML_IMPORTS_AVAILABLE: | |
st.error("β ML libraries not available") | |
elif not st.session_state.models_loaded: | |
st.error("β Models not loaded") | |
elif not job_description.strip(): | |
st.error("β Please enter a job description") | |
elif not st.session_state.resume_texts: | |
st.error("β Please load some resumes") | |
else: | |
with st.spinner("Running advanced pipeline..."): | |
results = screener.advanced_pipeline_ranking( | |
st.session_state.resume_texts, | |
st.session_state.resume_filenames, | |
job_description | |
) | |
st.session_state.results = results | |
# Display Results | |
if st.session_state.results is not None: | |
st.header("π Results") | |
# Create tabs for different views | |
tab1, tab2, tab3 = st.tabs(["π Summary", "π Detailed Analysis", "π Visualizations"]) | |
with tab1: | |
st.subheader("Top Ranked Candidates") | |
# Style the dataframe | |
display_df = st.session_state.results[['rank', 'filename', 'final_score', 'cross_encoder_score', | |
'bm25_score', 'intent_score', 'intent_label', 'top_skills']].copy() | |
display_df['final_score'] = display_df['final_score'].round(3) | |
display_df['cross_encoder_score'] = display_df['cross_encoder_score'].round(3) | |
display_df['bm25_score'] = display_df['bm25_score'].round(3) | |
display_df['intent_score'] = display_df['intent_score'].round(3) | |
st.dataframe(display_df, use_container_width=True) | |
# Download link | |
csv = display_df.to_csv(index=False) | |
b64 = base64.b64encode(csv.encode()).decode() | |
href = f'<a href="data:file/csv;base64,{b64}" download="resume_rankings.csv">π₯ Download Results as CSV</a>' | |
st.markdown(href, unsafe_allow_html=True) | |
with tab2: | |
st.subheader("Detailed Candidate Analysis") | |
for idx, row in st.session_state.results.iterrows(): | |
with st.expander(f"#{row['rank']} - {row['filename']} (Score: {row['final_score']:.3f})"): | |
col1, col2 = st.columns(2) | |
with col1: | |
st.metric("Final Score", f"{row['final_score']:.3f}") | |
st.metric("Cross-Encoder", f"{row['cross_encoder_score']:.3f}") | |
st.metric("BM25 Score", f"{row['bm25_score']:.3f}") | |
with col2: | |
st.metric("Intent Score", f"{row['intent_score']:.3f}") | |
st.metric("Intent Label", row['intent_label']) | |
st.metric("Skills Count", row['total_skills_count']) | |
st.write("**Top Skills:**", row['top_skills']) | |
st.write("**Job Keywords:**", row['job_keywords']) | |
# Show resume excerpt | |
resume_text = st.session_state.resume_texts[row['resume_index']] | |
st.text_area("Resume Excerpt:", resume_text[:500] + "...", height=100, key=f"excerpt_{idx}") | |
with tab3: | |
st.subheader("Score Visualizations") | |
# Score distribution | |
fig1 = px.bar( | |
st.session_state.results.head(10), | |
x='filename', | |
y='final_score', | |
title="Top 10 Candidates - Final Scores", | |
color='final_score', | |
color_continuous_scale='viridis' | |
) | |
fig1.update_xaxis(tickangle=45) | |
st.plotly_chart(fig1, use_container_width=True) | |
# Score breakdown | |
score_cols = ['cross_encoder_score', 'bm25_score', 'intent_score'] | |
fig2 = go.Figure() | |
for i, col in enumerate(score_cols): | |
fig2.add_trace(go.Bar( | |
name=col.replace('_', ' ').title(), | |
x=st.session_state.results['filename'].head(10), | |
y=st.session_state.results[col].head(10) | |
)) | |
fig2.update_layout( | |
title="Score Breakdown - Top 10 Candidates", | |
barmode='group', | |
xaxis_tickangle=45 | |
) | |
st.plotly_chart(fig2, use_container_width=True) | |
# Intent distribution | |
intent_counts = st.session_state.results['intent_label'].value_counts() | |
fig3 = px.pie( | |
values=intent_counts.values, | |
names=intent_counts.index, | |
title="Candidate Intent Distribution" | |
) | |
st.plotly_chart(fig3, use_container_width=True) | |
# Average metrics | |
col1, col2, col3, col4 = st.columns(4) | |
with col1: | |
st.metric("Avg Final Score", f"{st.session_state.results['final_score'].mean():.3f}") | |
with col2: | |
st.metric("Avg Cross-Encoder", f"{st.session_state.results['cross_encoder_score'].mean():.3f}") | |
with col3: | |
st.metric("Avg BM25", f"{st.session_state.results['bm25_score'].mean():.3f}") | |
with col4: | |
st.metric("Avg Intent", f"{st.session_state.results['intent_score'].mean():.3f}") | |
# Cleanup Controls | |
st.header("π§Ή Cleanup") | |
col1, col2 = st.columns(2) | |
with col1: | |
if st.button("Clear Resumes Only"): | |
st.session_state.resume_texts = [] | |
st.session_state.resume_filenames = [] | |
st.session_state.results = None | |
st.success("β Resumes cleared") | |
with col2: | |
if st.button("Reset Entire App"): | |
# Clear all session state | |
for key in list(st.session_state.keys()): | |
del st.session_state[key] | |
# Free GPU memory | |
if torch.cuda.is_available(): | |
torch.cuda.empty_cache() | |
st.success("β App reset complete") | |
st.experimental_rerun() | |
# Footer | |
st.markdown("---") | |
st.markdown( | |
""" | |
<div style='text-align: center; color: #666; font-size: 0.8em;'> | |
π€ Powered by BGE-Large-EN, MS-Marco-MiniLM, Qwen2-1.5B | Built with Streamlit | |
</div> | |
""", | |
unsafe_allow_html=True | |
) |