import torch
import pandas as pd
import streamlit as st
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from langchain.llms import HuggingFacePipeline
from huggingface_hub import login
from pydantic import BaseModel, model_validator


# Token Secret of Hugging Face
huggingface_token = st.secrets["HUGGINGFACEHUB_API_TOKEN"]
login(huggingface_token)

# Load Llama 3.2
# model_name = "meta-llama/Llama-3.2-3B-Instruct"
model_name = "meta-llama/Llama-3.2-1B-Instruct"
model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name, device_map = 'auto')

# Detectar si hay una GPU disponible y ajustar el dispositivo
device = 0 if torch.cuda.is_available() else -1

# Configuración del pipeline para generación de texto

#pipe = pipeline(model=model, tokenizer=tokenizer, max_length = 512) # Check documentation without "feature-extraction"
pipe = pipeline(task = 'text-generation', model=model, tokenizer=tokenizer, max_length = 512, device = device) # Check documentation without "feature-extraction"


# Use transformers pipeline
llm_pipeline = HuggingFacePipeline(pipeline=pipe)

# Interfase of Streamlit
st.title("Cosine Similarity with Llama 3.1")


# initialize query
query="aspiring human resources specialist"

# Upload CSV file
uploaded_file = st.file_uploader("Upload a CSV file", type=["csv"])
print("Query: ", query)

if uploaded_file is not None:
    df = pd.read_csv(uploaded_file)

    if 'job_title' not in df.columns:
        st.error("The uploaded CSV must contain a 'job_title' column.")
    else:
        job_titles = df['job_title'].tolist()

        if query:
            st.write("Query:", query)

            prompt = f"""
            You are an AI assistant. You have a list of job titles and a search query.
            Your task is to rank these job titles by their semantic similarity to the given query. 
            Please provide the ranking from most relevant to least relevant. 
            Do not calculate cosine similarity; instead, focus on understanding the semantic relevance of each job title to the query.
            
            Format your response like this:
            1. [Most Relevant Job Title]
            2. [Second Most Relevant Job Title]
            ...
            N. [Least Relevant Job Title]
            
            Query: "{query}"
            Job Titles: {job_titles}
            """

            # Llamar al modelo con el prompt
            try:
                response = pipe(prompt, max_new_tokens =  300, max_length=1024, num_return_sequences=1)

                # Mostrar la respuesta del modelo
                st.write("Model Answer:")
                st.write(response[0]['generated_text'])

            except Exception as e:
                st.error(f"Error while processing: {str(e)}")