File size: 2,384 Bytes
b192930
b493c84
34c6fb2
b94e2cb
 
34c6fb2
 
 
b192930
11abd5a
438e2df
 
34c6fb2
0651e4c
 
34c6fb2
0651e4c
 
b192930
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2096cbd
 
b192930
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import pandas as pd
import streamlit as st

# Tu token secreto de Hugging Face
huggingface_token = st.secrets["HUGGINGFACEHUB_API_TOKEN"]

# Cargar el modelo y tokenizer
model_name = "meta-llama/Llama-3.1-8B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name, token=huggingface_token)
model = AutoModelForCausalLM.from_pretrained(model_name, token=huggingface_token, torch_dtype=torch.float16)

# Usar Streamlit para cargar el archivo CSV
uploaded_file = st.file_uploader("Por favor sube un archivo CSV:", type="csv")

if uploaded_file is not None:
    df = pd.read_csv(uploaded_file)
    if 'job_title' not in df.columns:
        st.error("El archivo CSV debe contener una columna llamada 'job_title'.")
    else:
        job_title = df['job_title'].tolist()
        job_title_text = "\n".join(f"- {title}" for title in job_title)

        user_query = st.text_input("Introduce tu query:")

        if user_query:
            prompt = f"""
            You are an advanced AI assistant trained to process job titles and user queries. I will provide you with a list of job titles, and a user query. Your task is to:
            1. Calculate the cosine similarity score between the query and each job title.
            2. Rank the job titles from the most similar to the least similar based on their semantic meaning.
            3. Return the top 5 job titles with their cosine similarity scores.
            Here is the list of job titles from the CSV:
            {job_title_text}
            The user's query is: "{user_query}"
            Now, compute the similarity scores, rank the job titles, and return the top 5.
            """

            inputs = tokenizer(prompt, return_tensors="pt")

            try:
                with torch.no_grad():
                    outputs = model.generate(**inputs, max_new_tokens=200)

                response = tokenizer.decode(outputs[0], skip_special_tokens=True)
                
                if response:
                    st.write(response)
                else:
                    st.error("No se generó ninguna respuesta.")
            except Exception as e:
                st.error(f"Error al generar la respuesta: {e}")

        # Limpiar memoria después de la inferencia
        torch.cuda.empty_cache()  # Si estás usando una GPU