Fer Aguirre
Initial commit
998cded
import streamlit as st
import pandas as pd
from datasets import Dataset
from sentence_transformers import SentenceTransformer
from sentence_transformers.util import semantic_search
import torch
model = SentenceTransformer("sentence-transformers/gtr-t5-large")
# Read files
url = "https://gist.githubusercontent.com/fer-aguirre/b6bdcf59ecae41f84765f72114de9fd1/raw/b4e029fe236c1f38275621686429b2c7aaa3d18b/embeddings.csv"
df_emb = pd.read_csv(url, index_col=0)
df = pd.read_csv('./foia_sample.csv')
dataset = Dataset.from_pandas(df_emb)
dataset_embeddings = torch.from_numpy(dataset.to_pandas().to_numpy()).to(torch.float)
st.markdown("**Inserta una solicitud de información para generar recomendaciones de dependencias**")
if request := st.text_area("", value=""):
output = model.encode(request)
query_embeddings = torch.FloatTensor(output)
hits = semantic_search(query_embeddings, dataset_embeddings, top_k=3)
id1 = hits[0][0]['corpus_id']
id2 = hits[0][1]['corpus_id']
id3 = hits[0][2]['corpus_id']
rec1 = df.iloc[id1].str.split(pat="/")[0]
rec2 = df.iloc[id2].str.split(pat="/")[0]
rec3 = df.iloc[id3].str.split(pat="/")[0]
list_rec = [rec1, rec2, rec3]
unique_list = []
for string in list_rec:
if string not in unique_list:
unique_list.append(string)
st.markdown(f'Recomendaciones:')
for rec in unique_list:
st.markdown(f':green[{rec[0]}]')
st.markdown("""---""")
if st.button('Genera un ejemplo random'):
test_example = df['combined'].sample(n=1)
index = test_example.index
idx = index[0]
original = df.iloc[idx].str.split(pat="/")[0]
request = test_example.to_string(index=False)
st.text(f'{idx}, {request}')
output = model.encode(request)
query_embeddings = torch.FloatTensor(output)
hits = semantic_search(query_embeddings, dataset_embeddings, top_k=3)
id1 = hits[0][0]['corpus_id']
id2 = hits[0][1]['corpus_id']
id3 = hits[0][2]['corpus_id']
rec1 = df.iloc[id1].str.split(pat="/")[0]
rec2 = df.iloc[id2].str.split(pat="/")[0]
rec3 = df.iloc[id3].str.split(pat="/")[0]
list_rec = [rec1, rec2, rec3]
unique_list = []
for string in list_rec:
if string not in unique_list:
unique_list.append(string)
st.markdown(f'Recomendaciones:')
for rec in unique_list:
st.markdown(f':green[{rec[0]}]')
st.markdown(f'Dependencia original:')
st.markdown(f':red[{original[0]}]')