|
from sentence_transformers import SentenceTransformer, util |
|
import numpy as np |
|
import pandas as pd |
|
import gradio as gr |
|
|
|
|
|
model = SentenceTransformer('sentence-transformers/msmarco-bert-base-dot-v5') |
|
doc_emb = pd.read_excel("proposals_emb.xlsx", usecols=lambda x: str(x).isnumeric()) |
|
df = pd.read_excel("proposals_clean.xlsx") |
|
|
|
|
|
def cosine(u, v): |
|
res = np.dot(u, v) / (np.linalg.norm(u) * np.linalg.norm(v)) |
|
return res |
|
|
|
|
|
def form_link(post_id): |
|
return f"https://kusama.polkassembly.io/referenda/{post_id}" |
|
|
|
|
|
def processing(query): |
|
query_emb = model.encode(query) |
|
print(doc_emb.columns) |
|
doc_emb['sim1'] = doc_emb.apply(lambda row: cosine(row, query_emb), axis=1) |
|
sim = doc_emb.nlargest(5, 'sim1').index |
|
res = df.iloc[sim][['content', 'status']] |
|
doc_emb.drop(columns=["sim1"], inplace=True) |
|
|
|
out = [f"[Proposal #{row[0]}]({form_link(row[0])}) - {row[1]['status']}: {ind+1}" for ind, row in enumerate(res.iterrows())] |
|
markdown_string = "" |
|
for i in range(len(out)): |
|
markdown_string += f"{i+1}. {out[i]}\n" |
|
return markdown_string |
|
|
|
|
|
iface = gr.Interface(processing, "text", "markdown") |
|
iface.launch() |
|
|