meesamraza's picture
Update app.py
48e44c0 verified
"""
Quantum Scrutiny Platform | Groq-Powered
Single-file Streamlit app (refactored, Groq streaming-compatible)
"""
import os
import io
import re
import json
import base64
import traceback
from typing import Optional, List
from dotenv import load_dotenv
load_dotenv()
import streamlit as st
import pandas as pd
# File parsing
import fitz # PyMuPDF
from docx import Document # python-docx
# Groq client
from groq import Groq
# Validation
from pydantic import BaseModel, Field, ValidationError
# --- Page config ---
st.set_page_config(layout="wide", page_title="Quantum Scrutiny Platform | Groq-Powered")
# --- Config / Secrets ---
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
ADMIN_PASSWORD = os.getenv("ADMIN_PASSWORD", "admin")
# Initialize Groq client (no API key -> UI warning but app still loads)
groq_client = None
if GROQ_API_KEY:
try:
groq_client = Groq(api_key=GROQ_API_KEY)
except Exception as e:
st.error(f"Failed to initialize Groq client: {e}")
else:
st.warning("GROQ_API_KEY not found. Set it as an environment variable or in .env for model calls to work.")
# --- Session state defaults ---
if 'is_admin_logged_in' not in st.session_state:
st.session_state.is_admin_logged_in = False
if 'analyzed_data' not in st.session_state:
initial_cols = [
'Name', 'Job Role', 'Resume Score (100)', 'Email', 'Phone', 'Shortlisted',
'Experience Summary', 'Education Summary', 'Communication Rating (1-10)',
'Skills/Technologies', 'Certifications', 'ABA Skills (1-10)',
'RBT/BCBA Cert', 'Autism-Care Exp (1-10)'
]
st.session_state.analyzed_data = pd.DataFrame(columns=initial_cols)
if 'individual_analysis' not in st.session_state:
st.session_state.individual_analysis = []
if 'run_analysis' not in st.session_state:
st.session_state.run_analysis = False
# --- Pydantic schema ---
class ResumeAnalysis(BaseModel):
name: str = Field(default="Unknown")
email: str = Field(default="")
phone: str = Field(default="")
certifications: List[str] = Field(default_factory=list)
experience_summary: str = Field(default="")
education_summary: str = Field(default="")
communication_skills: str = Field(default="N/A")
technical_skills: List[str] = Field(default_factory=list)
aba_therapy_skills: Optional[str] = Field(default="N/A")
rbt_bcba_certification: Optional[str] = Field(default="N/A")
autism_care_experience_score: Optional[str] = Field(default="N/A")
# --- Helpers: file text extraction ---
def extract_text_from_file(uploaded_file) -> str:
"""Extract text from PDF or DOCX. Returns empty string on failure."""
try:
content = uploaded_file.read()
filename = uploaded_file.name.lower()
if filename.endswith(".pdf") or content[:5] == b"%PDF-":
try:
with fitz.open(stream=content, filetype="pdf") as doc:
text = ""
for p in doc:
text += p.get_text()
return text.strip()
except Exception:
return ""
elif filename.endswith(".docx"):
try:
doc = Document(io.BytesIO(content))
paragraphs = [p.text for p in doc.paragraphs if p.text and p.text.strip()]
return "\n".join(paragraphs).strip()
except Exception:
return ""
else:
# fallback: decode bytes as text
try:
return content.decode('utf-8', errors='ignore')
except Exception:
return ""
except Exception:
return ""
# --- Groq call with streaming (collects chunks) ---
def call_groq_stream_collect(prompt: str, model_name: str = "llama-3.3-70b-versatile", temperature: float = 0.2, max_completion_tokens: int = 2048, top_p: float = 1.0) -> Optional[str]:
"""
Calls Groq with streaming enabled and collects the textual output.
Returns the full model text, or None on failure.
"""
if not groq_client:
st.error("Groq client not initialized. Set GROQ_API_KEY in environment/secrets.")
return None
try:
completion = groq_client.chat.completions.create(
model=model_name,
messages=[
{"role": "system", "content": "You are a professional Resume Analyzer. Return JSON only when asked."},
{"role": "user", "content": prompt}
],
temperature=temperature,
max_completion_tokens=max_completion_tokens,
top_p=top_p,
stream=True
)
# completion is an iterator/streamable object; collect chunks
collected = ""
# some SDKs yield dict-like chunks, some objects; handle both
for chunk in completion:
try:
# Common pattern: chunk.choices[0].delta.content
delta = getattr(chunk.choices[0].delta, "content", None) if hasattr(chunk, "choices") else None
if delta is None:
# fallback for dict-like object
if isinstance(chunk, dict):
delta = chunk.get("choices", [{}])[0].get("delta", {}).get("content")
if delta:
collected += delta
else:
# Some SDKs return final message in chunk.choices[0].message.content
try:
msg = getattr(chunk.choices[0].message, "content", None)
if msg:
collected += msg
except Exception:
pass
except Exception:
# last-resort: append str(chunk)
try:
collected += str(chunk)
except Exception:
pass
return collected.strip()
except Exception as e:
st.error(f"Groq API call failed: {e}")
return None
# --- Parsing model output safely to JSON ---
def extract_first_json(text: str) -> Optional[dict]:
"""
Find the first JSON object in text and parse it; return dict or None.
"""
if not text:
return None
# find first balanced braces block
# quick heuristic regex for {...}
try:
match = re.search(r"(\{(?:[^{}]|(?R))*\})", text, re.DOTALL)
except re.error:
# Python's re doesn't support (?R); fallback to simpler greedy
match = re.search(r"(\{.*\})", text, re.DOTALL)
if match:
json_text = match.group(1)
else:
# maybe the model returned only JSON-like lines -> try to parse full text
json_text = text
try:
parsed = json.loads(json_text)
return parsed
except Exception:
# try to clean common issues: single quotes -> double quotes
try:
json_text_fixed = json_text.replace("'", '"')
parsed = json.loads(json_text_fixed)
return parsed
except Exception:
return None
# --- Analyze with Groq (cached by resume text + role) ---
@st.cache_data(show_spinner=False)
def analyze_resume_with_groq_cached(resume_text: str, job_role: str) -> ResumeAnalysis:
"""
Calls Groq (streaming) and returns a ResumeAnalysis instance.
Uses caching to avoid duplicate calls for same resume_text+role.
"""
# Build prompt instructing JSON structure
therapist_instructions = ""
if job_role.lower() == "therapist":
therapist_instructions = (
"Because the role is 'Therapist', carefully search for ABA Therapy Skills, "
"RBT/BCBA Certification, and Autism-Care Experience. Provide scores 1-10 as STRINGS, or 'N/A'."
)
else:
therapist_instructions = "If therapist-specific fields are not relevant, set them to 'N/A'."
system_user_prompt = (
"Return a single JSON object with the following keys exactly: "
"name (string), email (string), phone (string), certifications (array of strings), "
"experience_summary (string), education_summary (string), communication_skills (STRING, e.g., '8'), "
"technical_skills (array of strings), aba_therapy_skills (STRING or 'N/A'), "
"rbt_bcba_certification (STRING 'Yes'/'No'/'N/A'), autism_care_experience_score (STRING or 'N/A'). "
f"{therapist_instructions}\n\nResume Text:\n\n{resume_text}\n\nReturn only the JSON object."
)
raw = call_groq_stream_collect(system_user_prompt, model_name="llama-3.3-70b-versatile", temperature=0.0, max_completion_tokens=2048)
if not raw:
# fallback empty object
return ResumeAnalysis(
name="Extraction Failed",
email="",
phone="",
certifications=[],
experience_summary="",
education_summary="",
communication_skills="N/A",
technical_skills=[],
aba_therapy_skills="N/A",
rbt_bcba_certification="N/A",
autism_care_experience_score="N/A"
)
parsed = extract_first_json(raw)
if not parsed:
# show raw output for debugging when developer runs app locally (admin panel will show too)
st.warning("Failed to parse model JSON output. See raw output below for debugging.")
st.text_area("Raw model output (debug)", raw, height=200)
return ResumeAnalysis(
name="Extraction Failed",
email="",
phone="",
certifications=[],
experience_summary="",
education_summary="",
communication_skills="N/A",
technical_skills=[],
aba_therapy_skills="N/A",
rbt_bcba_certification="N/A",
autism_care_experience_score="N/A"
)
# Ensure keys exist and coerce types
parsed.setdefault("name", "Unknown")
parsed.setdefault("email", "")
parsed.setdefault("phone", "")
parsed.setdefault("certifications", [])
parsed.setdefault("experience_summary", "")
parsed.setdefault("education_summary", "")
parsed.setdefault("communication_skills", "N/A")
parsed.setdefault("technical_skills", [])
parsed.setdefault("aba_therapy_skills", "N/A")
parsed.setdefault("rbt_bcba_certification", "N/A")
parsed.setdefault("autism_care_experience_score", "N/A")
# Ensure string coercions for some fields
try:
parsed["communication_skills"] = str(parsed.get("communication_skills") or "N/A")
parsed["aba_therapy_skills"] = str(parsed.get("aba_therapy_skills") or "N/A")
parsed["rbt_bcba_certification"] = str(parsed.get("rbt_bcba_certification") or "N/A")
parsed["autism_care_experience_score"] = str(parsed.get("autism_care_experience_score") or "N/A")
except Exception:
pass
# Validate via Pydantic
try:
analysis = ResumeAnalysis.parse_obj(parsed)
return analysis
except ValidationError as ve:
st.error("Model output failed schema validation.")
st.text_area("Raw model output (debug)", raw, height=200)
st.exception(ve)
return ResumeAnalysis(
name="Extraction Failed",
email="",
phone="",
certifications=[],
experience_summary="",
education_summary="",
communication_skills="N/A",
technical_skills=[],
aba_therapy_skills="N/A",
rbt_bcba_certification="N/A",
autism_care_experience_score="N/A"
)
# --- Scoring logic ---
def calculate_resume_score(analysis: ResumeAnalysis, role: str) -> float:
total_score = 0.0
# Experience summary: up to 40
exp_len = len(analysis.experience_summary or "")
exp_factor = min(exp_len / 100.0, 1.0)
total_score += exp_factor * 40.0
# Skills count: up to 30
skills_count = len(analysis.technical_skills or [])
skills_factor = min(skills_count / 10.0, 1.0)
total_score += skills_factor * 30.0
# Communication: up to 20 (expects 0-10 in string)
try:
m = re.search(r"(\d+(\.\d+)?)", str(analysis.communication_skills))
comm_val = float(m.group(1)) if m else float(str(analysis.communication_skills))
comm_val = max(0.0, min(10.0, comm_val))
except Exception:
comm_val = 5.0
total_score += (comm_val / 10.0) * 20.0
# Certifications: up to 10
total_score += min(len(analysis.certifications or []), 10) * 1.0
# Therapist bonus up to 10
if role.lower() == "therapist":
def safe_score(x):
try:
m = re.search(r"(\d+(\.\d+)?)", str(x))
return float(m.group(1)) if m else 0.0
except Exception:
return 0.0
aba = safe_score(analysis.aba_therapy_skills)
autism = safe_score(analysis.autism_care_experience_score)
spec_bonus = ((aba + autism) / 20.0) * 10.0
total_score += spec_bonus
final = round(min(total_score, 100))
return float(final)
# --- Append to DataFrame ---
def append_analysis_to_dataframe(job_role: str, analysis: ResumeAnalysis, score: float):
data = analysis.dict()
tech = ", ".join(data.get("technical_skills") or [])
certs = ", ".join(data.get("certifications") or [])
row = {
'Name': data.get("name") or "",
'Job Role': job_role,
'Resume Score (100)': score,
'Email': data.get("email") or "",
'Phone': data.get("phone") or "",
'Shortlisted': 'No',
'Experience Summary': data.get("experience_summary") or "",
'Education Summary': data.get("education_summary") or "",
'Communication Rating (1-10)': str(data.get("communication_skills") or "N/A"),
'Skills/Technologies': tech,
'Certifications': certs,
'ABA Skills (1-10)': str(data.get("aba_therapy_skills") or "N/A"),
'RBT/BCBA Cert': str(data.get("rbt_bcba_certification") or "N/A"),
'Autism-Care Exp (1-10)': str(data.get("autism_care_experience_score") or "N/A"),
}
new_df = pd.DataFrame([row])
st.session_state.analyzed_data = pd.concat([st.session_state.analyzed_data, new_df], ignore_index=True)
# --- Excel export helper ---
def df_to_excel_bytes(df: pd.DataFrame) -> bytes:
output = io.BytesIO()
with pd.ExcelWriter(output, engine="openpyxl") as writer:
df.to_excel(writer, index=False, sheet_name="Resume Analysis Data")
return output.getvalue()
# --- UI Layout ---
st.title("🌌 Quantum Scrutiny Platform: AI Resume Analysis (Single-file)")
tab_user, tab_admin = st.tabs(["πŸ‘€ Resume Uploader (User Panel)", "πŸ”’ Admin Dashboard (Password Protected)"])
# --- User Panel ---
with tab_user:
st.header("Upload Resumes for Analysis")
st.info("Upload multiple PDF or DOCX files. The Groq AI engine will extract and score fields.")
job_role_options = ["Software Engineer", "ML Engineer", "Therapist", "Data Analyst", "Project Manager"]
selected_role = st.selectbox("1. Select the Target Job Role", options=job_role_options, key="selected_role")
uploaded_files = st.file_uploader("2. Upload Resumes (PDF or DOCX)", type=["pdf", "docx"], accept_multiple_files=True)
if st.button("πŸš€ Analyze All Uploaded Resumes"):
if not uploaded_files:
st.warning("Please upload one or more resume files to begin analysis.")
else:
st.session_state.run_analysis = True
st.rerun()
if st.session_state.get("run_analysis", False):
if not uploaded_files:
st.warning("No files found. Upload files and try again.")
st.session_state.run_analysis = False
else:
total = len(uploaded_files)
progress = st.progress(0)
st.session_state.individual_analysis = []
idx = 0
with st.spinner("Processing resumes..."):
for f in uploaded_files:
idx += 1
try:
st.write(f"Analyzing **{f.name}**...")
resume_text = extract_text_from_file(f)
if not resume_text:
st.error(f"Could not extract text from {f.name}. Skipping.")
progress.progress(idx / total)
continue
analysis = analyze_resume_with_groq_cached(resume_text, selected_role)
if analysis.name == "Extraction Failed":
st.error(f"Extraction failed for {f.name}. See debug output.")
progress.progress(idx / total)
continue
score = calculate_resume_score(analysis, selected_role)
append_analysis_to_dataframe(selected_role, analysis, score)
st.session_state.individual_analysis.append({
'name': analysis.name,
'score': score,
'role': selected_role,
'file_name': f.name
})
except Exception as e:
st.error(f"Error analyzing {f.name}: {e}")
st.exception(traceback.format_exc())
finally:
progress.progress(idx / total)
st.success(f"βœ… Successfully processed {len(st.session_state.individual_analysis)} of {total} resumes.")
st.session_state.run_analysis = False
# Display last results summary
if st.session_state.individual_analysis:
st.subheader("Last Analysis Summary")
for item in st.session_state.individual_analysis:
st.markdown(f"**{item['name']}** (for **{item['role']}**) - **Score: {item['score']}/100**")
st.markdown("---")
st.caption("All analyzed data is stored in the Admin Dashboard.")
# --- Admin Panel ---
with tab_admin:
if not st.session_state.is_admin_logged_in:
st.header("Admin Login")
password = st.text_input("Enter Admin Password", type="password")
if st.button("πŸ”‘ Login"):
if password == ADMIN_PASSWORD:
st.session_state.is_admin_logged_in = True
st.rerun()
else:
st.error("Incorrect password.")
st.stop()
st.header("🎯 Recruitment Dashboard")
if st.button("πŸšͺ Logout"):
st.session_state.is_admin_logged_in = False
st.rerun()
if st.session_state.analyzed_data.empty:
st.warning("No resume data has been analyzed yet. Please upload files in the User Panel.")
else:
df = st.session_state.analyzed_data.copy()
st.subheader("Candidate Data Table")
st.success(f"**Total Candidates Analyzed: {len(df)}**")
display_cols = ['Name', 'Job Role', 'Resume Score (100)', 'Shortlisted', 'Email', 'Skills/Technologies']
edited_df = st.data_editor(
df[display_cols],
column_config={
"Shortlisted": st.column_config.SelectboxColumn(
"Shortlisted",
help="Mark the candidate as Shortlisted or Rejected.",
options=["No", "Yes"],
required=True
)
},
key="dashboard_editor",
hide_index=True
)
try:
st.session_state.analyzed_data.loc[:, 'Shortlisted'] = edited_df['Shortlisted'].values
except Exception:
for i, val in enumerate(edited_df['Shortlisted'].tolist()):
if i < len(st.session_state.analyzed_data):
st.session_state.analyzed_data.at[i, 'Shortlisted'] = val
st.markdown("---")
st.subheader("πŸ“₯ Download Data")
df_export = st.session_state.analyzed_data.copy()
excel_bytes = df_to_excel_bytes(df_export)
st.download_button(
label="πŸ’Ύ Download All Data as Excel (.xlsx)",
data=excel_bytes,
file_name="quantum_scrutiny_report.xlsx",
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
help="Downloads the full table including all extracted fields and shortlist status."
)
# --- End of file ---