sagittariusA commited on
Commit
967e26e
1 Parent(s): 9520738

add application file

Browse files
Files changed (1) hide show
  1. app.py +79 -0
app.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import torch.nn.functional as F
4
+ import numpy as np
5
+ from corpy.morphodita import Tokenizer
6
+
7
+ import transformers
8
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
9
+
10
+ model_checkpoint = 'ufal/robeczech-base'
11
+ device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
12
+ transformers.logging.set_verbosity(transformers.logging.ERROR)
13
+
14
+ def classify_sentence(sent:str):
15
+ toksentence = tokenizer(sent,truncation=True,return_tensors="pt")
16
+ model.eval()
17
+ with torch.no_grad():
18
+ toksentence.to(device)
19
+ output = model(**toksentence)
20
+
21
+ return F.softmax(output.logits,dim=1).argmax(dim=1)
22
+
23
+ def classify_text(text:str):
24
+ tokenizer_morphodita = Tokenizer("czech")
25
+
26
+ all = []
27
+ for sentence in tokenizer_morphodita.tokenize(text, sents=True):
28
+ all.append(sentence)
29
+
30
+ sentences = np.array([' '.join(x) for x in all])
31
+ annotations = np.array(list(map(classify_sentence,sentences)))
32
+
33
+ return annotations
34
+
35
+ def classify_text_wrapper(text:str):
36
+ result = classify_text(text)
37
+ n = len(result)
38
+ non_biased = np.where(result==0)[0].shape[0]
39
+ biased = np.where(result==1)[0].shape[0]
40
+
41
+ return {'Non-biased':non_biased/n,'Biased':biased/n}
42
+
43
+
44
+ def interpret_bias(text:str):
45
+ result = classify_text(text)
46
+
47
+ tokenizer_morphodita = Tokenizer("czech")
48
+
49
+ interpretation = []
50
+ all = []
51
+ for sentence in tokenizer_morphodita.tokenize(text, sents=True):
52
+ all.append(sentence)
53
+
54
+ sentences = np.array([' '.join(x) for x in all])
55
+
56
+ for idx,sentence in enumerate(sentences):
57
+ score = 0
58
+ #non biased
59
+ if result[idx] == 0:
60
+ score = -1
61
+ #biased
62
+ if result[idx] == 1:
63
+ score = 1
64
+ interpretation.append((sentence, score))
65
+
66
+ return interpretation
67
+
68
+ tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
69
+ model = AutoModelForSequenceClassification.from_pretrained("sagittariusA/media_bias_classifier_cs")
70
+ model.eval()
71
+
72
+ label = gr.outputs.Label(num_top_classes=2)
73
+ inputs = gr.inputs.Textbox(placeholder=None, default="", label=None)
74
+ app = gr.Interface(fn=classify_text_wrapper,title='Bias classifier',theme='default',
75
+ inputs="textbox",layout='unaligned', outputs=label, capture_session=True
76
+ ,interpretation=interpret_bias)
77
+
78
+ app.launch(inbrowser=True)
79
+