Spaces:

Manas281
/

Resume_Evaluation

No application file

File size: 8,956 Bytes

9a800ac

import streamlit as st
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

from langchain_core.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq

from langchain_huggingface import HuggingFaceEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from langchain.vectorstores import FAISS
import os
from dotenv import load_dotenv
import google.generativeai as genai
import pandas as pd
import json
from io import BytesIO
import tempfile

# Load environment variables
load_dotenv()

# Set up embeddings
HF_TOKEN = os.getenv('HF_TOKEN')
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# API keys
api_key = os.getenv('API_KEY')
GEMINI_API_KEY = os.getenv('GEMINI_API_KEY')
genai.configure(api_key=GEMINI_API_KEY)

# Set up generative model
model = genai.GenerativeModel('gemini-pro')


def create_job_post(job_title, location, exp):
    prompt = (
        f"Create a job opening post for platforms like Internshala, LinkedIn, and Naukri.com. "
        f"The post should include the job title: {job_title}, location: {location}, and required experience: {exp}. "
        f"Make it attractive, include skills (if possible but the skills in boxes and highlight them) required, who can apply, benefits, and other necessary details. "
        f"The post should be 100-200 words."
    )

    try:
        # Replace 'model.generate_content' with the actual method to generate content
        response = model.generate_content(prompt)  # assuming 'model' is defined elsewhere
        return response.text
    except Exception as e:
        return f"Error generating response: {e}"


# Streamlit app title
st.title("Recruitment AI")

# Job title, location, and experience input fields
job_title = st.text_input("Enter the job title you are looking for")
location = st.text_input("Enter the location you are looking for")
exp = st.text_input("Enter the experience you are looking for")

# Button to generate job post
if st.button("Create Job Post"):
    if job_title and location and exp:
        job_post = create_job_post(job_title, location, exp)
        st.write(job_post)
    else:
        st.warning("Please fill in all fields to generate the job post.")

job_post = create_job_post(job_title, location, exp)
# Resume scoring section

llm = ChatGroq(groq_api_key=api_key, model_name="gemma-7b-it")
llm_2 = ChatGroq(groq_api_key=api_key, model_name="gemma-7b-it")



jd=st.file_uploader("Upload Job Description", type="pdf")
def get_jd(jd):
    try:
        # Use a temporary file to save the uploaded file
        with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf:
            temp_pdf.write(jd.getvalue())
            temp_pdf_path = temp_pdf.name

        # Load the PDF using PyPDFLoader
        loader = PyPDFLoader(temp_pdf_path)
        docs = loader.load()

    finally:
        # Remove the temporary file after processing
        if os.path.exists(temp_pdf_path):
            os.remove(temp_pdf_path)

    # Text splitting for embeddings
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=5000, chunk_overlap=500)
    splits = text_splitter.split_documents(docs)

    # Create FAISS vectorstore for retrieval
    vectorstore = FAISS.from_documents(splits, embeddings)
    retriever = vectorstore.as_retriever()

    # Define prompt and QA chain
    system_prompt = (
        f"Extract the Job description from the uploaded file in proper format."
    )

    qa_prompt = ChatPromptTemplate.from_messages(
        [
            ("system", system_prompt),
            ("human", "{context}\n{input}"),
        ]
    )

    # Create question-answering chain
    question_answer_chain = create_stuff_documents_chain(llm_2, qa_prompt)
    rag_chain = create_retrieval_chain(retriever, question_answer_chain)

    try:
        # Retrieve the job description using the chain
        response = rag_chain.invoke({
            "input": "Describe the job description in proper format"
        })
        job_description = response["answer"]

        return job_description

    except Exception as e:
        raise RuntimeError(f"Error retrieving job description: {e}")

if jd:
    job_description = get_jd(jd)



    # File uploader for PDF resumes
uploaded_files = st.file_uploader("Choose PDF files", type="pdf", accept_multiple_files=True)


# Function to process PDFs in batches of 4




def process_pdfs_in_batches(files):
    batch_size = 4
    num_batches = (len(files) // batch_size) + (1 if len(files) % batch_size != 0 else 0)
    all_json_data = []

    for i in range(num_batches):
        batch = files[i * batch_size: (i + 1) * batch_size]  # Select a batch of files
        documents = []  # List to hold all document contents

        for j, uploaded_file in enumerate(batch):
            try:
                # Use a temporary file to save the uploaded file
                with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf:
                    temp_pdf.write(uploaded_file.getvalue())
                    temp_pdf_path = temp_pdf.name

                # Load the PDF using PyPDFLoader
                loader = PyPDFLoader(temp_pdf_path)
                docs = loader.load()
                documents.extend(docs)

            finally:
                # Remove the temporary file after processing
                if os.path.exists(temp_pdf_path):
                    os.remove(temp_pdf_path)

        # Text splitting for embeddings
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=5000, chunk_overlap=500)
        splits = text_splitter.split_documents(documents)

        # Create FAISS vectorstore for retrieval
        vectorstore = FAISS.from_documents(splits, embeddings)
        retriever = vectorstore.as_retriever()

        # Define prompt and QA chain
        system_prompt = (
            f"You are a smart AI agent tasked with evaluating resumes against the job description: "
            f"Job Title: {job_title}, Location: {location}, Experience: {exp}. "
            f"Your evaluation should provide a score (0-100) for each resume based on skills, experience, and other factors. "
            f"Extract the following details from each uploaded PDF: Name, Contact Number, Email,Address and the calculated Score. "
            "Output must be a JSON array of dictionaries, where each dictionary has the keys: 'Name', 'Contact Number', 'Email', 'Address','pdf link or name' and 'Score' "


        )

        qa_prompt = ChatPromptTemplate.from_messages(
            [
                ("system", system_prompt),
                ("human", "{context}\n{input}"),
            ]
        )

        # Create question-answering chain
        question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)
        rag_chain = create_retrieval_chain(retriever, question_answer_chain)

        try:
            # Button for scoring resumes
            response = rag_chain.invoke({
                "input": "Evaluate these resumes and provide scores scores should be very accurate and strictly evaluated as it would be used by recruiter, as given by system prompt ,"
                         " just provide the json data only not anything else and make sure to be consistent with the output and generate text only. Output must be a JSON array of dictionaries in text format, "
                         "where each dictionary has the keys: 'Name', 'Contact Number', 'Email','Address and 'Score' .Just provide the json data nothing else."
                         "Also the generated data should be equal to uploaded resumes not more nor less"
            })



            json_data = json.loads(response["answer"])

            # Append the JSON data to the all_json_data list

            all_json_data.extend(json_data)

            #st.write(json_data)

        except Exception as e:
            st.error(f"Error: {e}")

    # Once all batches are processed, you can use all_json_data as needed
    # For example, converting it into a DataFrame and displaying
    df = pd.DataFrame(all_json_data)
    st.dataframe(
        df.style
        # Highlight min values
        .set_table_styles([
            {'selector': 'thead th', 'props': [('background-color', '#4CAF50'), ('color', 'white')]},
            # Table header style
            {'selector': 'tbody td', 'props': [('border', '1px solid #ddd'), ('padding', '8px')]}  # Table body style
        ])
    )


# Call the batch processing function if files are uploaded
if uploaded_files:
    process_pdfs_in_batches(uploaded_files)