Prueba_1 / app.py
JaphetHernandez's picture
Update app.py
235f923 verified
raw
history blame
8.49 kB
import pandas as pd
import streamlit as st
from langchain_huggingface import HuggingFacePipeline # Nueva importaci贸n
from langchain_core.prompts import PromptTemplate
from langchain.chains import LLMChain
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from huggingface_hub import login
import torch
import json
from datetime import datetime
# Autenticaci贸n con Fireworks en Hugging Face
huggingface_token = st.secrets["FIREWORKS"]
login(huggingface_token)
# Configurar modelo Fireworks desde Hugging Face
model_id = "fireworks-ai/firefunction-v2"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
model_id,
device_map="auto",
torch_dtype=torch.float16
)
# Definir funciones espec铆ficas para Fireworks
function_spec = [
{
"name": "calculate_cosine_similarity",
"description": "Calculate the cosine similarity between two strings.",
"parameters": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "The main query string for similarity calculation"
},
"job_title": {
"type": "string",
"description": "The job title to compare with the query"
}
},
"required": ["query", "job_title"]
}
}
]
functions = json.dumps(function_spec, indent=4)
# Crear pipeline para generaci贸n de texto con Fireworks
fireworks_pipeline = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
max_new_tokens=128
)
# Adaptar el pipeline a LangChain
llm_pipeline = HuggingFacePipeline(pipeline=fireworks_pipeline)
# Interfaz de Streamlit
st.title("Cosine Similarity Calculation with Fireworks, LangChain, and Llama 3.1")
# Subir archivo CSV
uploaded_file = st.file_uploader("Sube un archivo CSV con la columna 'job_title':", type=["csv"])
if uploaded_file is not None:
# Cargar el CSV en un DataFrame
df = pd.read_csv(uploaded_file)
if 'job_title' in df.columns:
query = 'aspiring human resources specialist'
job_titles = df['job_title'].tolist()
# Definir el prompt para Fireworks
prompt_template = PromptTemplate(
template=(
"Calculate the cosine similarity between the query: '{query}' "
"and the list of job titles: {job_titles}. "
"Return the results as 'Job Title: [Job Title], Score: [Cosine Similarity Score]'."
),
input_variables=["query", "job_titles"]
)
# Crear el LLMChain para manejar la interacci贸n con Fireworks
llm_chain = LLMChain(
llm=llm_pipeline,
prompt=prompt_template
)
# Ejecutar la generaci贸n con Fireworks y funciones
if st.button("Calcular Similitud de Coseno"):
with st.spinner("Calculando similitudes con Fireworks..."):
try:
# Preparar mensajes y funciones para Fireworks
messages = [
{'role': 'system', 'content': 'You are a helpful assistant with access to functions. Use them if required.'},
{'role': 'user', 'content': f'Calculate cosine similarity for query: {query} with job titles.'}
]
now = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
model_inputs = tokenizer.apply_chat_template(
messages,
functions=functions,
datetime=now,
return_tensors="pt"
).to(model.device)
# Generar resultados con Fireworks
generated_ids = model.generate(model_inputs, max_new_tokens=128)
decoded = tokenizer.batch_decode(generated_ids)
st.write("Respuesta del modelo:")
st.write(decoded[0])
# Simular la asignaci贸n de puntajes en la columna 'Score' (basado en la respuesta del modelo)
df['Score'] = [0.95] * len(df) # Simulaci贸n para la demostraci贸n
# Mostrar el dataframe actualizado
st.write("DataFrame con los puntajes de similitud:")
st.write(df)
except Exception as e:
st.error(f"Error durante la generaci贸n: {e}")
else:
st.error("La columna 'job_title' no se encuentra en el archivo CSV.")
'''
import pandas as pd
import streamlit as st
from langchain.llms import HuggingFacePipeline
from langchain_core.prompts import PromptTemplate
from langchain.chains import LLMChain
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from huggingface_hub import login
import torch
# API Key de Hugging Face
huggingface_token = st.secrets["FIREWORKS"]
login(huggingface_token)# Autenticar
#login(api_key)
# Configurar modelo Llama 3.1
model_id = "meta-llama/Llama-3.2-1B"
tokenizer = AutoTokenizer.from_pretrained(model_id, truncation=True)
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype=torch.float16)
# Crear pipeline con Fireworks
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=50) #, max_length=1024)
llm_pipeline = HuggingFacePipeline(pipeline=pipe)
# Interfaz de Streamlit
st.title("Cosine Similarity Calculation with Fireworks, LangChain, and Llama 3.1")
# Subir archivo CSV
uploaded_file = st.file_uploader("Sube un archivo CSV con la columna 'job_title':", type=["csv"])
if uploaded_file is not None:
# Cargar el CSV en un DataFrame
df = pd.read_csv(uploaded_file)
print(df)
if 'job_title' in df.columns:
query = 'aspiring human resources specialist'
job_titles = df['job_title'].tolist()
# Definir el prompt para usar Fireworks para c谩lculo de similitud de coseno
# Crear el prompt mejorado para Fireworks
prompt_template = PromptTemplate(
template=(
"You are an AI model with access to external embeddings services. Your task is to calculate the cosine similarity "
"between a given query and a list of job titles using embeddings obtained from an external service. "
"Follow these steps to complete the task:\n\n"
"1. Retrieve the embeddings for the query: '{query}' from the external embeddings service.\n"
"2. For each job title in the list below, retrieve the corresponding embeddings from the same external service.\n"
"3. Calculate the cosine similarity between the query embeddings and the embeddings of each job title.\n"
"4. Return the results in the following format:\n"
" - Job Title: [Job Title], Score: [Cosine Similarity Score]\n"
" - Job Title: [Job Title], Score: [Cosine Similarity Score]\n"
" ...\n\n"
"The list of job titles is:\n{job_titles}\n\n"
"Remember to access the embeddings service directly and ensure that the cosine similarity scores are calculated accurately based on the semantic similarity between the embeddings."
),
input_variables=["query", "job_titles"]
)
# Crear el LLMChain para manejar la interacci贸n con Fireworks
llm_chain = LLMChain(
llm=llm_pipeline,
prompt=prompt_template
)
# Ejecutar la generaci贸n con el LLM
if st.button("Calcular Similitud de Coseno"):
with st.spinner("Calculando similitudes con Fireworks y Llama 3.1..."):
try:
result = llm_chain.run({"query": query, "job_titles": job_titles})
st.write("Respuesta del modelo:")
st.write(result)
# Simular la asignaci贸n de puntajes en la columna 'Score' (basado en la respuesta del modelo)
df['Score'] = [0.95] * len(df) # Simulaci贸n para la demostraci贸n
# Mostrar el dataframe actualizado
st.write("DataFrame con los puntajes de similitud:")
st.write(df)
except Exception as e:
st.error(f"Error durante la generaci贸n: {e}")
else:
st.error("La columna 'job_title' no se encuentra en el archivo CSV.")
'''