import torch import pandas as pd import streamlit as st from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline from langchain.llms import HuggingFacePipeline from huggingface_hub import login from pydantic import BaseModel, model_validator # Token Secret of Hugging Face huggingface_token = st.secrets["HUGGINGFACEHUB_API_TOKEN"] login(huggingface_token) # Load Llama 3.2 # model_name = "meta-llama/Llama-3.2-3B-Instruct" model_name = "meta-llama/Llama-3.2-1B-Instruct" model = AutoModelForCausalLM.from_pretrained(model_name) tokenizer = AutoTokenizer.from_pretrained(model_name, device_map = 'auto') # Detectar si hay una GPU disponible y ajustar el dispositivo device = 0 if torch.cuda.is_available() else -1 # ConfiguraciĆ³n del pipeline para generaciĆ³n de texto #pipe = pipeline(model=model, tokenizer=tokenizer, max_length = 512) # Check documentation without "feature-extraction" pipe = pipeline(task = 'text-generation', model=model, tokenizer=tokenizer, max_length = 512, device = device) # Check documentation without "feature-extraction" # Use transformers pipeline llm_pipeline = HuggingFacePipeline(pipeline=pipe) # Interfase of Streamlit st.title("Cosine Similarity with Llama 3.1") # initialize query query="aspiring human resources specialist" # Upload CSV file uploaded_file = st.file_uploader("Upload a CSV file", type=["csv"]) print("Query: ", query) if uploaded_file is not None: df = pd.read_csv(uploaded_file) if 'job_title' not in df.columns: st.error("The uploaded CSV must contain a 'job_title' column.") else: job_titles = df['job_title'].tolist() if query: st.write("Query:", query) prompt = f""" You are an AI assistant. You have a list of job titles and a search query. Your task is to rank these job titles by their semantic similarity to the given query. Please provide the ranking from most relevant to least relevant. Do not calculate cosine similarity; instead, focus on understanding the semantic relevance of each job title to the query. Format your response like this: 1. [Most Relevant Job Title] 2. [Second Most Relevant Job Title] ... N. [Least Relevant Job Title] Query: "{query}" Job Titles: {job_titles} """ # Llamar al modelo con el prompt try: response = pipe(prompt, max_new_tokens = 300, max_length=1024, num_return_sequences=1) # Mostrar la respuesta del modelo st.write("Model Answer:") st.write(response[0]['generated_text']) except Exception as e: st.error(f"Error while processing: {str(e)}")