Spaces:
No application file
No application file
import streamlit as st | |
from langchain.chains import create_history_aware_retriever, create_retrieval_chain | |
from langchain.chains.combine_documents import create_stuff_documents_chain | |
from langchain_core.prompts import ChatPromptTemplate | |
from langchain_groq import ChatGroq | |
from langchain_huggingface import HuggingFaceEmbeddings | |
from langchain_text_splitters import RecursiveCharacterTextSplitter | |
from langchain_community.document_loaders import PyPDFLoader | |
from langchain.vectorstores import FAISS | |
import os | |
from dotenv import load_dotenv | |
import google.generativeai as genai | |
import pandas as pd | |
import json | |
from io import BytesIO | |
import tempfile | |
# Load environment variables | |
load_dotenv() | |
# Set up embeddings | |
HF_TOKEN = os.getenv('HF_TOKEN') | |
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") | |
# API keys | |
api_key = os.getenv('API_KEY') | |
GEMINI_API_KEY = os.getenv('GEMINI_API_KEY') | |
genai.configure(api_key=GEMINI_API_KEY) | |
# Set up generative model | |
model = genai.GenerativeModel('gemini-pro') | |
def create_job_post(job_title, location, exp): | |
prompt = ( | |
f"Create a job opening post for platforms like Internshala, LinkedIn, and Naukri.com. " | |
f"The post should include the job title: {job_title}, location: {location}, and required experience: {exp}. " | |
f"Make it attractive, include skills (if possible but the skills in boxes and highlight them) required, who can apply, benefits, and other necessary details. " | |
f"The post should be 100-200 words." | |
) | |
try: | |
# Replace 'model.generate_content' with the actual method to generate content | |
response = model.generate_content(prompt) # assuming 'model' is defined elsewhere | |
return response.text | |
except Exception as e: | |
return f"Error generating response: {e}" | |
# Streamlit app title | |
st.title("Recruitment AI") | |
# Job title, location, and experience input fields | |
job_title = st.text_input("Enter the job title you are looking for") | |
location = st.text_input("Enter the location you are looking for") | |
exp = st.text_input("Enter the experience you are looking for") | |
# Button to generate job post | |
if st.button("Create Job Post"): | |
if job_title and location and exp: | |
job_post = create_job_post(job_title, location, exp) | |
st.write(job_post) | |
else: | |
st.warning("Please fill in all fields to generate the job post.") | |
job_post = create_job_post(job_title, location, exp) | |
# Resume scoring section | |
llm = ChatGroq(groq_api_key=api_key, model_name="gemma-7b-it") | |
llm_2 = ChatGroq(groq_api_key=api_key, model_name="gemma-7b-it") | |
jd=st.file_uploader("Upload Job Description", type="pdf") | |
def get_jd(jd): | |
try: | |
# Use a temporary file to save the uploaded file | |
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf: | |
temp_pdf.write(jd.getvalue()) | |
temp_pdf_path = temp_pdf.name | |
# Load the PDF using PyPDFLoader | |
loader = PyPDFLoader(temp_pdf_path) | |
docs = loader.load() | |
finally: | |
# Remove the temporary file after processing | |
if os.path.exists(temp_pdf_path): | |
os.remove(temp_pdf_path) | |
# Text splitting for embeddings | |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=5000, chunk_overlap=500) | |
splits = text_splitter.split_documents(docs) | |
# Create FAISS vectorstore for retrieval | |
vectorstore = FAISS.from_documents(splits, embeddings) | |
retriever = vectorstore.as_retriever() | |
# Define prompt and QA chain | |
system_prompt = ( | |
f"Extract the Job description from the uploaded file in proper format." | |
) | |
qa_prompt = ChatPromptTemplate.from_messages( | |
[ | |
("system", system_prompt), | |
("human", "{context}\n{input}"), | |
] | |
) | |
# Create question-answering chain | |
question_answer_chain = create_stuff_documents_chain(llm_2, qa_prompt) | |
rag_chain = create_retrieval_chain(retriever, question_answer_chain) | |
try: | |
# Retrieve the job description using the chain | |
response = rag_chain.invoke({ | |
"input": "Describe the job description in proper format" | |
}) | |
job_description = response["answer"] | |
return job_description | |
except Exception as e: | |
raise RuntimeError(f"Error retrieving job description: {e}") | |
if jd: | |
job_description = get_jd(jd) | |
# File uploader for PDF resumes | |
uploaded_files = st.file_uploader("Choose PDF files", type="pdf", accept_multiple_files=True) | |
# Function to process PDFs in batches of 4 | |
def process_pdfs_in_batches(files): | |
batch_size = 4 | |
num_batches = (len(files) // batch_size) + (1 if len(files) % batch_size != 0 else 0) | |
all_json_data = [] | |
for i in range(num_batches): | |
batch = files[i * batch_size: (i + 1) * batch_size] # Select a batch of files | |
documents = [] # List to hold all document contents | |
for j, uploaded_file in enumerate(batch): | |
try: | |
# Use a temporary file to save the uploaded file | |
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf: | |
temp_pdf.write(uploaded_file.getvalue()) | |
temp_pdf_path = temp_pdf.name | |
# Load the PDF using PyPDFLoader | |
loader = PyPDFLoader(temp_pdf_path) | |
docs = loader.load() | |
documents.extend(docs) | |
finally: | |
# Remove the temporary file after processing | |
if os.path.exists(temp_pdf_path): | |
os.remove(temp_pdf_path) | |
# Text splitting for embeddings | |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=5000, chunk_overlap=500) | |
splits = text_splitter.split_documents(documents) | |
# Create FAISS vectorstore for retrieval | |
vectorstore = FAISS.from_documents(splits, embeddings) | |
retriever = vectorstore.as_retriever() | |
# Define prompt and QA chain | |
system_prompt = ( | |
f"You are a smart AI agent tasked with evaluating resumes against the job description: " | |
f"Job Title: {job_title}, Location: {location}, Experience: {exp}. " | |
f"Your evaluation should provide a score (0-100) for each resume based on skills, experience, and other factors. " | |
f"Extract the following details from each uploaded PDF: Name, Contact Number, Email,Address and the calculated Score. " | |
"Output must be a JSON array of dictionaries, where each dictionary has the keys: 'Name', 'Contact Number', 'Email', 'Address','pdf link or name' and 'Score' " | |
) | |
qa_prompt = ChatPromptTemplate.from_messages( | |
[ | |
("system", system_prompt), | |
("human", "{context}\n{input}"), | |
] | |
) | |
# Create question-answering chain | |
question_answer_chain = create_stuff_documents_chain(llm, qa_prompt) | |
rag_chain = create_retrieval_chain(retriever, question_answer_chain) | |
try: | |
# Button for scoring resumes | |
response = rag_chain.invoke({ | |
"input": "Evaluate these resumes and provide scores scores should be very accurate and strictly evaluated as it would be used by recruiter, as given by system prompt ," | |
" just provide the json data only not anything else and make sure to be consistent with the output and generate text only. Output must be a JSON array of dictionaries in text format, " | |
"where each dictionary has the keys: 'Name', 'Contact Number', 'Email','Address and 'Score' .Just provide the json data nothing else." | |
"Also the generated data should be equal to uploaded resumes not more nor less" | |
}) | |
json_data = json.loads(response["answer"]) | |
# Append the JSON data to the all_json_data list | |
all_json_data.extend(json_data) | |
#st.write(json_data) | |
except Exception as e: | |
st.error(f"Error: {e}") | |
# Once all batches are processed, you can use all_json_data as needed | |
# For example, converting it into a DataFrame and displaying | |
df = pd.DataFrame(all_json_data) | |
st.dataframe( | |
df.style | |
# Highlight min values | |
.set_table_styles([ | |
{'selector': 'thead th', 'props': [('background-color', '#4CAF50'), ('color', 'white')]}, | |
# Table header style | |
{'selector': 'tbody td', 'props': [('border', '1px solid #ddd'), ('padding', '8px')]} # Table body style | |
]) | |
) | |
# Call the batch processing function if files are uploaded | |
if uploaded_files: | |
process_pdfs_in_batches(uploaded_files) | |