|
import io |
|
import gradio as gr |
|
import torch |
|
from nextus_regressor_class import * |
|
import nltk |
|
from pprint import pprint |
|
import pandas as pd |
|
model = NextUsRegressor() |
|
model.load_state_dict(torch.load("./nextus_regressor1012.pt")) |
|
model.eval() |
|
mask = "[MASKED]" |
|
threshold = 0.05 |
|
def shap(txt, tok_level): |
|
batch = [txt] |
|
if tok_level == "word": |
|
tokens = nltk.word_tokenize(txt) |
|
|
|
elif tok_level == "sentence": |
|
|
|
tokens = nltk.sent_tokenize(txt) |
|
else: |
|
pass |
|
|
|
|
|
for i, _ in enumerate(tokens): |
|
batch.append(" ".join([s for j, s in enumerate(tokens) if j!=i])) |
|
with torch.no_grad(): |
|
y_pred = model(txt) |
|
y_offs = model(batch) |
|
shaps = (y_offs - y_pred).tolist() |
|
shapss = [s[0] for s in shaps] |
|
labels = list() |
|
for s in shapss: |
|
if s <= -1.0*threshold: |
|
labels.append("+") |
|
elif s >= threshold: |
|
labels.append("-") |
|
else: |
|
labels.append(None) |
|
|
|
|
|
|
|
pprint(list(zip(tokens, shapss))) |
|
|
|
largest_shap = torch.max(y_offs - y_pred).item() |
|
largest_shap_span = tokens[torch.argmax(y_offs - y_pred).item()] |
|
explanation = "๊ฐ์ฅ ํฐ ์ํฅ์ ๋ฏธ์น ํ
์คํธ๋\n'"+ largest_shap_span+ "'\n์ด๋ฉฐ, ํด๋น ํ
์คํธ๊ฐ ์์ ๊ฒฝ์ฐ Slant ์ค์ฝ์ด\n" + str(round(y_pred.item(), 4))+ "\n์์\n"+ str(round(largest_shap,4))+ "\n๋งํผ ๋ฒ์ด๋ฉ๋๋ค." |
|
return list(zip(tokens, labels)), explanation |
|
|
|
|
|
|
|
def parse_file_input(f): |
|
|
|
all_articles = list() |
|
|
|
|
|
if ".csv" in f.name: |
|
|
|
all_articles += pd.read_csv(f.name).iloc[:, 0].to_list() |
|
elif ".xls" in f.name: |
|
all_articles += pd.read_excel(f.name).iloc[:, 0].to_list() |
|
else: |
|
pass |
|
|
|
|
|
scores = model(all_articles) |
|
return scores |
|
|
|
demo = gr.Interface(parse_file_input, |
|
[ |
|
gr.File(file_count="single", file_types=[".csv", ".xls", ".xlsx"], type="file", label="๊ธฐ์ฌ ํ์ผ(csv/excel)์ ์
๋ก๋ํ์ธ์") |
|
|
|
|
|
], |
|
gr.Textbox(label="Slant Scores"), |
|
|
|
|
|
|
|
|
|
|
|
theme=gr.themes.Base()) |
|
|
|
demo.launch() |
|
|