Spaces:
Runtime error
Runtime error
File size: 5,511 Bytes
7f333a3 8e43439 f261a66 e42945f 3411c2a f261a66 23ec782 03b82de b68bd8f 09af0a6 b68bd8f 23ec782 216e8fc 03b82de e42945f b68bd8f db836b5 e42945f b68bd8f e42945f 3411c2a e42945f d14c346 e42945f 08c00f2 e42945f 08c00f2 e42945f 0272517 3411c2a d14c346 3411c2a e42945f 0cf9fc6 23ec782 27c8c3b 65399fa cafaa0d 216e8fc a488b81 52094ea 216e8fc d14c346 3411c2a d14c346 23ec782 8e43439 0272517 ba54417 23ec782 ba54417 23ec782 8e43439 23ec782 515f053 8e43439 a61ca85 0d60836 ceba835 e0537cf 7671267 e0537cf a98c3f3 0272517 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
import gradio as gr
import re
from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
import torch
from keybert import KeyBERT
from datasets import load_dataset
import shap
from transformers_interpret import SequenceClassificationExplainer
from ferret import Benchmark
#model_identifier = "karalif/myTestModel"
#model = AutoModelForSequenceClassification.from_pretrained(model_identifier)
#tokenizer = AutoTokenizer.from_pretrained(model_identifier)
name = "karalif/myTestModel"
model = AutoModelForSequenceClassification.from_pretrained(name)
tokenizer = AutoTokenizer.from_pretrained(name, normalization=True)
bench = Benchmark(model, tokenizer)
#text = "hvað er maðurinn eiginlega að pæla ég fatta ekki??????????"
def get_prediction(text):
encoding = tokenizer(text, return_tensors="pt", padding="max_length", truncation=True, max_length=200)
encoding = {k: v.to(model.device) for k, v in encoding.items()}
with torch.no_grad():
outputs = model(**encoding)
logits = outputs.logits
sigmoid = torch.nn.Sigmoid()
probs = sigmoid(logits.squeeze().cpu()).numpy()
kw_model = KeyBERT()
keywords = kw_model.extract_keywords(text, keyphrase_ngram_range=(1, 1), stop_words='english', use_maxsum=True, nr_candidates=20, top_n=5)
response = ""
labels = ['Politeness', 'Toxicity', 'Sentiment', 'Formality']
colors = ['#b8e994', '#f8d7da', '#fff3cd', '#bee5eb'] # Corresponding colors for labels
for i, label in enumerate(labels):
response += f"<span style='background-color:{colors[i]}; color:black;'>{label}</span>: {probs[i]*100:.1f}%<br>"
influential_keywords = "INFLUENTIAL KEYWORDS:<br>"
for keyword, score in keywords:
influential_keywords += f"{keyword} (Score: {score:.2f})<br>"
return response, keywords, influential_keywords
def replace_encoding(tokens):
return [token.replace('Ġ', ' ')
.replace('ð', 'ð')
.replace('é', 'é')
.replace('æ', 'æ')
.replace('ý', 'ý')
.replace('á', 'á')
.replace('ú', 'ú')
.replace('ÃŃ', 'í')
.replace('Ãö', 'ö')
.replace('þ', 'þ')
.replace('Ãģ', 'Á')
.replace('Ãį', 'Ú')
.replace('Ãĵ', 'Ó')
.replace('ÃĨ', 'Æ')
.replace('ÃIJ', 'Ð')
.replace('Ãĸ', 'Ö')
.replace('Ãī', 'É')
.replace('Ãļ', 'ý')
for token in tokens[1:-1]]
def predict(text):
explanations_formality = bench.explain(text, target=0)
explanations_sentiment = bench.explain(text, target=1)
explanations_politeness = bench.explain(text, target=2)
explanations_toxicity = bench.explain(text, target=3)
greeting_pattern = r"^(Halló|Hæ|Sæl|Góðan dag|Kær kveðja|Daginn|Kvöldið|Ágætis|Elsku)"
prediction_output, keywords, influential_keywords = get_prediction(text)
greeting_feedback = ""
modified_input = text
for keyword, _ in keywords:
modified_input = modified_input.replace(keyword, f"<span style='color:green;'>{keyword}</span>")
#if not re.match(greeting_pattern, text, re.IGNORECASE):
# greeting_feedback = "OTHER FEEDBACK:<br>Heilsaðu dóninn þinn<br>"
response = f"INPUT:<br>{modified_input}<br><br>MY PREDICTION:<br>{prediction_output}<br>{influential_keywords}<br>{greeting_feedback}"
# Influential words
explanation_lists = [explanations_toxicity, explanations_formality, explanations_sentiment, explanations_politeness]
labels = ['Toxicity', 'Formality', 'Sentiment', 'Politeness']
response += "<br>MOST INFLUENTIAL WORDS FOR EACH LABEL:<br>"
for i, explanations in enumerate(explanation_lists):
label = labels[i]
for explanation in explanations:
if explanation.explainer == 'Partition SHAP':
tokens = replace_encoding(explanation.tokens)
token_score_pairs = zip(tokens, explanation.scores)
formatted_output = ' '.join([f"{token} ({score})" for token, score in token_score_pairs])
response += f"{label}: {formatted_output}<br>"
#response += "<br>TOP 2 MOST INFLUENTIAL WORDS FOR EACH LABEL:<br>"
#for i, explanations in enumerate(explanation_lists):
# label = labels[i]
# response += f"{label}:<br>"
# for explanation in explanations:
# if explanation.explainer == 'Partition SHAP':
# sorted_scores = sorted(enumerate(explanation.scores), key=lambda x: abs(x[1]), reverse=True)[:2]
# tokens = replace_encoding(explanation.tokens)
# tokens = [tokens[idx] for idx, _ in sorted_scores]
# formatted_output = ' '.join(tokens)
# response += f"{formatted_output}<br>"
return response
description_html = """
<center>
<img src='http://www.ru.is/media/HR_logo_vinstri_transparent.png' width='250' height='auto'>
</center>
"""
demo = gr.Interface(
fn=predict,
inputs=gr.TextArea(label="Enter text here:"),
outputs=gr.HTML(label="Leiðrétt"),
description=description_html,
examples=[
["Sæl og blessuð Kristín, hvað er að frella af þér gamla??"],
],
theme=gr.themes.Default(primary_hue="red", secondary_hue="pink")
)
demo.launch() |