Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
from datasets import Dataset | |
from sentence_transformers import SentenceTransformer | |
from sentence_transformers.util import semantic_search | |
import torch | |
model = SentenceTransformer("sentence-transformers/gtr-t5-large") | |
# Read files | |
url = "https://gist.githubusercontent.com/fer-aguirre/b6bdcf59ecae41f84765f72114de9fd1/raw/b4e029fe236c1f38275621686429b2c7aaa3d18b/embeddings.csv" | |
df_emb = pd.read_csv(url, index_col=0) | |
df = pd.read_csv('./foia_sample.csv') | |
dataset = Dataset.from_pandas(df_emb) | |
dataset_embeddings = torch.from_numpy(dataset.to_pandas().to_numpy()).to(torch.float) | |
st.markdown("**Inserta una solicitud de información para generar recomendaciones de dependencias**") | |
if request := st.text_area("", value=""): | |
output = model.encode(request) | |
query_embeddings = torch.FloatTensor(output) | |
hits = semantic_search(query_embeddings, dataset_embeddings, top_k=3) | |
id1 = hits[0][0]['corpus_id'] | |
id2 = hits[0][1]['corpus_id'] | |
id3 = hits[0][2]['corpus_id'] | |
rec1 = df.iloc[id1].str.split(pat="/")[0] | |
rec2 = df.iloc[id2].str.split(pat="/")[0] | |
rec3 = df.iloc[id3].str.split(pat="/")[0] | |
list_rec = [rec1, rec2, rec3] | |
unique_list = [] | |
for string in list_rec: | |
if string not in unique_list: | |
unique_list.append(string) | |
st.markdown(f'Recomendaciones:') | |
for rec in unique_list: | |
st.markdown(f':green[{rec[0]}]') | |
st.markdown("""---""") | |
if st.button('Genera un ejemplo random'): | |
test_example = df['combined'].sample(n=1) | |
index = test_example.index | |
idx = index[0] | |
original = df.iloc[idx].str.split(pat="/")[0] | |
request = test_example.to_string(index=False) | |
st.text(f'{idx}, {request}') | |
output = model.encode(request) | |
query_embeddings = torch.FloatTensor(output) | |
hits = semantic_search(query_embeddings, dataset_embeddings, top_k=3) | |
id1 = hits[0][0]['corpus_id'] | |
id2 = hits[0][1]['corpus_id'] | |
id3 = hits[0][2]['corpus_id'] | |
rec1 = df.iloc[id1].str.split(pat="/")[0] | |
rec2 = df.iloc[id2].str.split(pat="/")[0] | |
rec3 = df.iloc[id3].str.split(pat="/")[0] | |
list_rec = [rec1, rec2, rec3] | |
unique_list = [] | |
for string in list_rec: | |
if string not in unique_list: | |
unique_list.append(string) | |
st.markdown(f'Recomendaciones:') | |
for rec in unique_list: | |
st.markdown(f':green[{rec[0]}]') | |
st.markdown(f'Dependencia original:') | |
st.markdown(f':red[{original[0]}]') | |