""" Quantum Scrutiny Platform | Groq-Powered Single-file Streamlit app (refactored, Groq streaming-compatible) """ import os import io import re import json import base64 import traceback from typing import Optional, List from dotenv import load_dotenv load_dotenv() import streamlit as st import pandas as pd # File parsing import fitz # PyMuPDF from docx import Document # python-docx # Groq client from groq import Groq # Validation from pydantic import BaseModel, Field, ValidationError # --- Page config --- st.set_page_config(layout="wide", page_title="Quantum Scrutiny Platform | Groq-Powered") # --- Config / Secrets --- GROQ_API_KEY = os.getenv("GROQ_API_KEY") ADMIN_PASSWORD = os.getenv("ADMIN_PASSWORD", "admin") # Initialize Groq client (no API key -> UI warning but app still loads) groq_client = None if GROQ_API_KEY: try: groq_client = Groq(api_key=GROQ_API_KEY) except Exception as e: st.error(f"Failed to initialize Groq client: {e}") else: st.warning("GROQ_API_KEY not found. Set it as an environment variable or in .env for model calls to work.") # --- Session state defaults --- if 'is_admin_logged_in' not in st.session_state: st.session_state.is_admin_logged_in = False if 'analyzed_data' not in st.session_state: initial_cols = [ 'Name', 'Job Role', 'Resume Score (100)', 'Email', 'Phone', 'Shortlisted', 'Experience Summary', 'Education Summary', 'Communication Rating (1-10)', 'Skills/Technologies', 'Certifications', 'ABA Skills (1-10)', 'RBT/BCBA Cert', 'Autism-Care Exp (1-10)' ] st.session_state.analyzed_data = pd.DataFrame(columns=initial_cols) if 'individual_analysis' not in st.session_state: st.session_state.individual_analysis = [] if 'run_analysis' not in st.session_state: st.session_state.run_analysis = False # --- Pydantic schema --- class ResumeAnalysis(BaseModel): name: str = Field(default="Unknown") email: str = Field(default="") phone: str = Field(default="") certifications: List[str] = Field(default_factory=list) experience_summary: str = Field(default="") education_summary: str = Field(default="") communication_skills: str = Field(default="N/A") technical_skills: List[str] = Field(default_factory=list) aba_therapy_skills: Optional[str] = Field(default="N/A") rbt_bcba_certification: Optional[str] = Field(default="N/A") autism_care_experience_score: Optional[str] = Field(default="N/A") # --- Helpers: file text extraction --- def extract_text_from_file(uploaded_file) -> str: """Extract text from PDF or DOCX. Returns empty string on failure.""" try: content = uploaded_file.read() filename = uploaded_file.name.lower() if filename.endswith(".pdf") or content[:5] == b"%PDF-": try: with fitz.open(stream=content, filetype="pdf") as doc: text = "" for p in doc: text += p.get_text() return text.strip() except Exception: return "" elif filename.endswith(".docx"): try: doc = Document(io.BytesIO(content)) paragraphs = [p.text for p in doc.paragraphs if p.text and p.text.strip()] return "\n".join(paragraphs).strip() except Exception: return "" else: # fallback: decode bytes as text try: return content.decode('utf-8', errors='ignore') except Exception: return "" except Exception: return "" # --- Groq call with streaming (collects chunks) --- def call_groq_stream_collect(prompt: str, model_name: str = "llama-3.3-70b-versatile", temperature: float = 0.2, max_completion_tokens: int = 2048, top_p: float = 1.0) -> Optional[str]: """ Calls Groq with streaming enabled and collects the textual output. Returns the full model text, or None on failure. """ if not groq_client: st.error("Groq client not initialized. Set GROQ_API_KEY in environment/secrets.") return None try: completion = groq_client.chat.completions.create( model=model_name, messages=[ {"role": "system", "content": "You are a professional Resume Analyzer. Return JSON only when asked."}, {"role": "user", "content": prompt} ], temperature=temperature, max_completion_tokens=max_completion_tokens, top_p=top_p, stream=True ) # completion is an iterator/streamable object; collect chunks collected = "" # some SDKs yield dict-like chunks, some objects; handle both for chunk in completion: try: # Common pattern: chunk.choices[0].delta.content delta = getattr(chunk.choices[0].delta, "content", None) if hasattr(chunk, "choices") else None if delta is None: # fallback for dict-like object if isinstance(chunk, dict): delta = chunk.get("choices", [{}])[0].get("delta", {}).get("content") if delta: collected += delta else: # Some SDKs return final message in chunk.choices[0].message.content try: msg = getattr(chunk.choices[0].message, "content", None) if msg: collected += msg except Exception: pass except Exception: # last-resort: append str(chunk) try: collected += str(chunk) except Exception: pass return collected.strip() except Exception as e: st.error(f"Groq API call failed: {e}") return None # --- Parsing model output safely to JSON --- def extract_first_json(text: str) -> Optional[dict]: """ Find the first JSON object in text and parse it; return dict or None. """ if not text: return None # find first balanced braces block # quick heuristic regex for {...} try: match = re.search(r"(\{(?:[^{}]|(?R))*\})", text, re.DOTALL) except re.error: # Python's re doesn't support (?R); fallback to simpler greedy match = re.search(r"(\{.*\})", text, re.DOTALL) if match: json_text = match.group(1) else: # maybe the model returned only JSON-like lines -> try to parse full text json_text = text try: parsed = json.loads(json_text) return parsed except Exception: # try to clean common issues: single quotes -> double quotes try: json_text_fixed = json_text.replace("'", '"') parsed = json.loads(json_text_fixed) return parsed except Exception: return None # --- Analyze with Groq (cached by resume text + role) --- @st.cache_data(show_spinner=False) def analyze_resume_with_groq_cached(resume_text: str, job_role: str) -> ResumeAnalysis: """ Calls Groq (streaming) and returns a ResumeAnalysis instance. Uses caching to avoid duplicate calls for same resume_text+role. """ # Build prompt instructing JSON structure therapist_instructions = "" if job_role.lower() == "therapist": therapist_instructions = ( "Because the role is 'Therapist', carefully search for ABA Therapy Skills, " "RBT/BCBA Certification, and Autism-Care Experience. Provide scores 1-10 as STRINGS, or 'N/A'." ) else: therapist_instructions = "If therapist-specific fields are not relevant, set them to 'N/A'." system_user_prompt = ( "Return a single JSON object with the following keys exactly: " "name (string), email (string), phone (string), certifications (array of strings), " "experience_summary (string), education_summary (string), communication_skills (STRING, e.g., '8'), " "technical_skills (array of strings), aba_therapy_skills (STRING or 'N/A'), " "rbt_bcba_certification (STRING 'Yes'/'No'/'N/A'), autism_care_experience_score (STRING or 'N/A'). " f"{therapist_instructions}\n\nResume Text:\n\n{resume_text}\n\nReturn only the JSON object." ) raw = call_groq_stream_collect(system_user_prompt, model_name="llama-3.3-70b-versatile", temperature=0.0, max_completion_tokens=2048) if not raw: # fallback empty object return ResumeAnalysis( name="Extraction Failed", email="", phone="", certifications=[], experience_summary="", education_summary="", communication_skills="N/A", technical_skills=[], aba_therapy_skills="N/A", rbt_bcba_certification="N/A", autism_care_experience_score="N/A" ) parsed = extract_first_json(raw) if not parsed: # show raw output for debugging when developer runs app locally (admin panel will show too) st.warning("Failed to parse model JSON output. See raw output below for debugging.") st.text_area("Raw model output (debug)", raw, height=200) return ResumeAnalysis( name="Extraction Failed", email="", phone="", certifications=[], experience_summary="", education_summary="", communication_skills="N/A", technical_skills=[], aba_therapy_skills="N/A", rbt_bcba_certification="N/A", autism_care_experience_score="N/A" ) # Ensure keys exist and coerce types parsed.setdefault("name", "Unknown") parsed.setdefault("email", "") parsed.setdefault("phone", "") parsed.setdefault("certifications", []) parsed.setdefault("experience_summary", "") parsed.setdefault("education_summary", "") parsed.setdefault("communication_skills", "N/A") parsed.setdefault("technical_skills", []) parsed.setdefault("aba_therapy_skills", "N/A") parsed.setdefault("rbt_bcba_certification", "N/A") parsed.setdefault("autism_care_experience_score", "N/A") # Ensure string coercions for some fields try: parsed["communication_skills"] = str(parsed.get("communication_skills") or "N/A") parsed["aba_therapy_skills"] = str(parsed.get("aba_therapy_skills") or "N/A") parsed["rbt_bcba_certification"] = str(parsed.get("rbt_bcba_certification") or "N/A") parsed["autism_care_experience_score"] = str(parsed.get("autism_care_experience_score") or "N/A") except Exception: pass # Validate via Pydantic try: analysis = ResumeAnalysis.parse_obj(parsed) return analysis except ValidationError as ve: st.error("Model output failed schema validation.") st.text_area("Raw model output (debug)", raw, height=200) st.exception(ve) return ResumeAnalysis( name="Extraction Failed", email="", phone="", certifications=[], experience_summary="", education_summary="", communication_skills="N/A", technical_skills=[], aba_therapy_skills="N/A", rbt_bcba_certification="N/A", autism_care_experience_score="N/A" ) # --- Scoring logic --- def calculate_resume_score(analysis: ResumeAnalysis, role: str) -> float: total_score = 0.0 # Experience summary: up to 40 exp_len = len(analysis.experience_summary or "") exp_factor = min(exp_len / 100.0, 1.0) total_score += exp_factor * 40.0 # Skills count: up to 30 skills_count = len(analysis.technical_skills or []) skills_factor = min(skills_count / 10.0, 1.0) total_score += skills_factor * 30.0 # Communication: up to 20 (expects 0-10 in string) try: m = re.search(r"(\d+(\.\d+)?)", str(analysis.communication_skills)) comm_val = float(m.group(1)) if m else float(str(analysis.communication_skills)) comm_val = max(0.0, min(10.0, comm_val)) except Exception: comm_val = 5.0 total_score += (comm_val / 10.0) * 20.0 # Certifications: up to 10 total_score += min(len(analysis.certifications or []), 10) * 1.0 # Therapist bonus up to 10 if role.lower() == "therapist": def safe_score(x): try: m = re.search(r"(\d+(\.\d+)?)", str(x)) return float(m.group(1)) if m else 0.0 except Exception: return 0.0 aba = safe_score(analysis.aba_therapy_skills) autism = safe_score(analysis.autism_care_experience_score) spec_bonus = ((aba + autism) / 20.0) * 10.0 total_score += spec_bonus final = round(min(total_score, 100)) return float(final) # --- Append to DataFrame --- def append_analysis_to_dataframe(job_role: str, analysis: ResumeAnalysis, score: float): data = analysis.dict() tech = ", ".join(data.get("technical_skills") or []) certs = ", ".join(data.get("certifications") or []) row = { 'Name': data.get("name") or "", 'Job Role': job_role, 'Resume Score (100)': score, 'Email': data.get("email") or "", 'Phone': data.get("phone") or "", 'Shortlisted': 'No', 'Experience Summary': data.get("experience_summary") or "", 'Education Summary': data.get("education_summary") or "", 'Communication Rating (1-10)': str(data.get("communication_skills") or "N/A"), 'Skills/Technologies': tech, 'Certifications': certs, 'ABA Skills (1-10)': str(data.get("aba_therapy_skills") or "N/A"), 'RBT/BCBA Cert': str(data.get("rbt_bcba_certification") or "N/A"), 'Autism-Care Exp (1-10)': str(data.get("autism_care_experience_score") or "N/A"), } new_df = pd.DataFrame([row]) st.session_state.analyzed_data = pd.concat([st.session_state.analyzed_data, new_df], ignore_index=True) # --- Excel export helper --- def df_to_excel_bytes(df: pd.DataFrame) -> bytes: output = io.BytesIO() with pd.ExcelWriter(output, engine="openpyxl") as writer: df.to_excel(writer, index=False, sheet_name="Resume Analysis Data") return output.getvalue() # --- UI Layout --- st.title("🌌 Quantum Scrutiny Platform: AI Resume Analysis (Single-file)") tab_user, tab_admin = st.tabs(["👤 Resume Uploader (User Panel)", "🔒 Admin Dashboard (Password Protected)"]) # --- User Panel --- with tab_user: st.header("Upload Resumes for Analysis") st.info("Upload multiple PDF or DOCX files. The Groq AI engine will extract and score fields.") job_role_options = ["Software Engineer", "ML Engineer", "Therapist", "Data Analyst", "Project Manager"] selected_role = st.selectbox("1. Select the Target Job Role", options=job_role_options, key="selected_role") uploaded_files = st.file_uploader("2. Upload Resumes (PDF or DOCX)", type=["pdf", "docx"], accept_multiple_files=True) if st.button("🚀 Analyze All Uploaded Resumes"): if not uploaded_files: st.warning("Please upload one or more resume files to begin analysis.") else: st.session_state.run_analysis = True st.rerun() if st.session_state.get("run_analysis", False): if not uploaded_files: st.warning("No files found. Upload files and try again.") st.session_state.run_analysis = False else: total = len(uploaded_files) progress = st.progress(0) st.session_state.individual_analysis = [] idx = 0 with st.spinner("Processing resumes..."): for f in uploaded_files: idx += 1 try: st.write(f"Analyzing **{f.name}**...") resume_text = extract_text_from_file(f) if not resume_text: st.error(f"Could not extract text from {f.name}. Skipping.") progress.progress(idx / total) continue analysis = analyze_resume_with_groq_cached(resume_text, selected_role) if analysis.name == "Extraction Failed": st.error(f"Extraction failed for {f.name}. See debug output.") progress.progress(idx / total) continue score = calculate_resume_score(analysis, selected_role) append_analysis_to_dataframe(selected_role, analysis, score) st.session_state.individual_analysis.append({ 'name': analysis.name, 'score': score, 'role': selected_role, 'file_name': f.name }) except Exception as e: st.error(f"Error analyzing {f.name}: {e}") st.exception(traceback.format_exc()) finally: progress.progress(idx / total) st.success(f"✅ Successfully processed {len(st.session_state.individual_analysis)} of {total} resumes.") st.session_state.run_analysis = False # Display last results summary if st.session_state.individual_analysis: st.subheader("Last Analysis Summary") for item in st.session_state.individual_analysis: st.markdown(f"**{item['name']}** (for **{item['role']}**) - **Score: {item['score']}/100**") st.markdown("---") st.caption("All analyzed data is stored in the Admin Dashboard.") # --- Admin Panel --- with tab_admin: if not st.session_state.is_admin_logged_in: st.header("Admin Login") password = st.text_input("Enter Admin Password", type="password") if st.button("🔑 Login"): if password == ADMIN_PASSWORD: st.session_state.is_admin_logged_in = True st.rerun() else: st.error("Incorrect password.") st.stop() st.header("🎯 Recruitment Dashboard") if st.button("🚪 Logout"): st.session_state.is_admin_logged_in = False st.rerun() if st.session_state.analyzed_data.empty: st.warning("No resume data has been analyzed yet. Please upload files in the User Panel.") else: df = st.session_state.analyzed_data.copy() st.subheader("Candidate Data Table") st.success(f"**Total Candidates Analyzed: {len(df)}**") display_cols = ['Name', 'Job Role', 'Resume Score (100)', 'Shortlisted', 'Email', 'Skills/Technologies'] edited_df = st.data_editor( df[display_cols], column_config={ "Shortlisted": st.column_config.SelectboxColumn( "Shortlisted", help="Mark the candidate as Shortlisted or Rejected.", options=["No", "Yes"], required=True ) }, key="dashboard_editor", hide_index=True ) try: st.session_state.analyzed_data.loc[:, 'Shortlisted'] = edited_df['Shortlisted'].values except Exception: for i, val in enumerate(edited_df['Shortlisted'].tolist()): if i < len(st.session_state.analyzed_data): st.session_state.analyzed_data.at[i, 'Shortlisted'] = val st.markdown("---") st.subheader("📥 Download Data") df_export = st.session_state.analyzed_data.copy() excel_bytes = df_to_excel_bytes(df_export) st.download_button( label="💾 Download All Data as Excel (.xlsx)", data=excel_bytes, file_name="quantum_scrutiny_report.xlsx", mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", help="Downloads the full table including all extracted fields and shortlist status." ) # --- End of file ---