Spaces:
Sleeping
Sleeping
File size: 4,679 Bytes
bb9a1f9 2231637 7132f90 bb9a1f9 6a3a4c5 bb9a1f9 2231637 bb9a1f9 2231637 bb9a1f9 2231637 bb9a1f9 2231637 bb9a1f9 7132f90 bb9a1f9 2acd771 bb9a1f9 2231637 b7ec1f2 7132f90 b7ec1f2 bb9a1f9 7132f90 bb9a1f9 2231637 bb9a1f9 2231637 bb9a1f9 7132f90 bb9a1f9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 |
import streamlit as st
import joblib
import re
import PyPDF2
import pandas as pd
import os
import uuid
from datetime import datetime
import tempfile
from io import BytesIO
# Load model and vectorizer
classifier_model = joblib.load('resume_classifier')
resume_vectorizer = joblib.load('resume_vectorizer')
def transfer_tmp_logs():
tmp_log_path = "/tmp/corrections_log.csv"
main_log_path = "corrections_log.csv"
if not os.path.exists(tmp_log_path):
return # No new logs to transfer
tmp_df = pd.read_csv(tmp_log_path)
if os.path.exists(main_log_path):
main_df = pd.read_csv(main_log_path)
# Merge without duplicates based on serial_id
combined_df = pd.concat([main_df, tmp_df]).drop_duplicates(subset=["serial_id"], keep="last")
else:
combined_df = tmp_df
combined_df.to_csv(main_log_path, index=False)
# Optionally, clean up the tmp file after transfer
os.remove(tmp_log_path)
def read_uploaded_file(uploaded_file):
ext = os.path.splitext(uploaded_file.name)[1].lower()
try:
if ext == ".pdf":
reader = PyPDF2.PdfReader(uploaded_file)
text = ""
for page in reader.pages:
page_text = page.extract_text()
if page_text:
text += page_text + "\n"
return text.strip()
elif ext == ".txt":
return uploaded_file.read().decode("utf-8").strip()
else:
return "Unsupported file type."
except Exception as e:
return f"Error reading file: {str(e)}"
def clean_resume(text):
return re.sub(r'[^a-zA-Z]', ' ', text).lower()
def log_or_update(serial_id, timestamp, resume_text, model_prediction, corrected_prediction):
log_file = "/tmp/corrections_log.csv"
resume_text_short = resume_text[:500] # Truncate for privacy/log size
new_row = {
"serial_id": serial_id,
"timestamp": timestamp,
"resume_text": resume_text_short,
"model_prediction": model_prediction,
"corrected_prediction": corrected_prediction
}
if os.path.exists(log_file):
df = pd.read_csv(log_file)
if serial_id in df["serial_id"].values:
df.loc[df["serial_id"] == serial_id, "corrected_prediction"] = corrected_prediction
else:
df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
else:
df = pd.DataFrame([new_row])
df.to_csv(log_file, index=False)
# Streamlit UI
st.title("π Resume Role Classifier")
uploaded_file = st.file_uploader(
"Upload your resume (PDF, TXT format)",
type=["pdf", "txt", "doc", "docx"]
)
if uploaded_file:
# Reset the file read pointer in case it was read earlier
uploaded_file.seek(0)
# Track upload session
if (
"uploaded_file_name" not in st.session_state
or st.session_state.uploaded_file_name != uploaded_file.name
):
st.session_state.uploaded_file_name = uploaded_file.name
st.session_state.serial_id = str(uuid.uuid4())
st.session_state.corrected_prediction = None
extracted_text = read_uploaded_file(uploaded_file)
if "Error" in extracted_text or not extracted_text.strip():
st.warning("β οΈ Could not extract text from the uploaded file.")
else:
cleaned_text = clean_resume(extracted_text)
new_input = resume_vectorizer.transform([cleaned_text])
prediction = classifier_model.predict(new_input)[0]
st.write(f"**Predicted Role:** `{prediction}`")
feedback = st.radio("Is this prediction correct?", ("Yes", "No"), key="feedback_radio")
corrected_prediction = prediction
if feedback == "No":
corrected_prediction = st.text_input(
"Please provide the correct role:",
value=st.session_state.get("corrected_prediction", ""),
key="correction_input"
)
st.session_state.corrected_prediction = corrected_prediction
else:
st.session_state.corrected_prediction = prediction
if (feedback == "Yes") or (feedback == "No" and corrected_prediction):
now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
log_or_update(
serial_id=st.session_state.serial_id,
timestamp=now,
resume_text=extracted_text,
model_prediction=prediction,
corrected_prediction=corrected_prediction
)
st.success(f"β
Final role recorded: `{corrected_prediction}`")
else:
st.info("π€ Please upload a supported file (PDF, TXT, DOC, DOCX).")
|