Spaces:
Sleeping
Sleeping
import torch | |
import pandas as pd | |
import streamlit as st | |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline | |
from langchain.llms import HuggingFacePipeline | |
from huggingface_hub import login | |
from pydantic import BaseModel, model_validator | |
# Token Secret of Hugging Face | |
huggingface_token = st.secrets["HUGGINGFACEHUB_API_TOKEN"] | |
login(huggingface_token) | |
# Load Llama 3.2 | |
# model_name = "meta-llama/Llama-3.2-3B-Instruct" | |
model_name = "meta-llama/Llama-3.2-1B-Instruct" | |
model = AutoModelForCausalLM.from_pretrained(model_name) | |
tokenizer = AutoTokenizer.from_pretrained(model_name, device_map = 'auto') | |
# Detectar si hay una GPU disponible y ajustar el dispositivo | |
device = 0 if torch.cuda.is_available() else -1 | |
# Configuraci贸n del pipeline para generaci贸n de texto | |
#pipe = pipeline(model=model, tokenizer=tokenizer, max_length = 512) # Check documentation without "feature-extraction" | |
pipe = pipeline(task = 'text-generation', model=model, tokenizer=tokenizer, max_length = 512, device = device) # Check documentation without "feature-extraction" | |
# Use transformers pipeline | |
llm_pipeline = HuggingFacePipeline(pipeline=pipe) | |
# Interfase of Streamlit | |
st.title("Cosine Similarity with Llama 3.1") | |
# initialize query | |
query="aspiring human resources specialist" | |
# Upload CSV file | |
uploaded_file = st.file_uploader("Upload a CSV file", type=["csv"]) | |
print("Query: ", query) | |
if uploaded_file is not None: | |
df = pd.read_csv(uploaded_file) | |
if 'job_title' not in df.columns: | |
st.error("The uploaded CSV must contain a 'job_title' column.") | |
else: | |
job_titles = df['job_title'].tolist() | |
if query: | |
st.write("Query:", query) | |
prompt = f""" | |
You are an AI assistant. You have a list of job titles and a search query. | |
Your task is to rank these job titles by their semantic similarity to the given query. | |
Please provide the ranking from most relevant to least relevant. | |
Do not calculate cosine similarity; instead, focus on understanding the semantic relevance of each job title to the query. | |
Format your response like this: | |
1. [Most Relevant Job Title] | |
2. [Second Most Relevant Job Title] | |
... | |
N. [Least Relevant Job Title] | |
Query: "{query}" | |
Job Titles: {job_titles} | |
""" | |
# Llamar al modelo con el prompt | |
try: | |
response = pipe(prompt, max_new_tokens = 300, max_length=1024, num_return_sequences=1) | |
# Mostrar la respuesta del modelo | |
st.write("Model Answer:") | |
st.write(response[0]['generated_text']) | |
except Exception as e: | |
st.error(f"Error while processing: {str(e)}") |