import streamlit as st import pandas as pd from datasets import Dataset from sentence_transformers import SentenceTransformer from sentence_transformers.util import semantic_search import torch model = SentenceTransformer("sentence-transformers/gtr-t5-large") # Read files url = "https://gist.githubusercontent.com/fer-aguirre/b6bdcf59ecae41f84765f72114de9fd1/raw/b4e029fe236c1f38275621686429b2c7aaa3d18b/embeddings.csv" df_emb = pd.read_csv(url, index_col=0) df = pd.read_csv('./foia_sample.csv') dataset = Dataset.from_pandas(df_emb) dataset_embeddings = torch.from_numpy(dataset.to_pandas().to_numpy()).to(torch.float) st.markdown("**Inserta una solicitud de informaciĆ³n para generar recomendaciones de dependencias**") if request := st.text_area("", value=""): output = model.encode(request) query_embeddings = torch.FloatTensor(output) hits = semantic_search(query_embeddings, dataset_embeddings, top_k=3) id1 = hits[0][0]['corpus_id'] id2 = hits[0][1]['corpus_id'] id3 = hits[0][2]['corpus_id'] rec1 = df.iloc[id1].str.split(pat="/")[0] rec2 = df.iloc[id2].str.split(pat="/")[0] rec3 = df.iloc[id3].str.split(pat="/")[0] list_rec = [rec1, rec2, rec3] unique_list = [] for string in list_rec: if string not in unique_list: unique_list.append(string) st.markdown(f'Recomendaciones:') for rec in unique_list: st.markdown(f':green[{rec[0]}]') st.markdown("""---""") if st.button('Genera un ejemplo random'): test_example = df['combined'].sample(n=1) index = test_example.index idx = index[0] original = df.iloc[idx].str.split(pat="/")[0] request = test_example.to_string(index=False) st.text(f'{idx}, {request}') output = model.encode(request) query_embeddings = torch.FloatTensor(output) hits = semantic_search(query_embeddings, dataset_embeddings, top_k=3) id1 = hits[0][0]['corpus_id'] id2 = hits[0][1]['corpus_id'] id3 = hits[0][2]['corpus_id'] rec1 = df.iloc[id1].str.split(pat="/")[0] rec2 = df.iloc[id2].str.split(pat="/")[0] rec3 = df.iloc[id3].str.split(pat="/")[0] list_rec = [rec1, rec2, rec3] unique_list = [] for string in list_rec: if string not in unique_list: unique_list.append(string) st.markdown(f'Recomendaciones:') for rec in unique_list: st.markdown(f':green[{rec[0]}]') st.markdown(f'Dependencia original:') st.markdown(f':red[{original[0]}]')