Akbartus commited on
Commit
6004edc
·
1 Parent(s): f178121

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +122 -0
app.py ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import datetime
2
+ import gradio as gr
3
+ from huggingface_hub import hf_hub_download
4
+ from langdetect import detect, DetectorFactory, detect_langs
5
+ import fasttext
6
+ from transformers import pipeline
7
+
8
+ models = {'en': 'Narsil/deberta-large-mnli-zero-cls', # English
9
+ 'ru': 'DeepPavlov/xlm-roberta-large-en-ru-mnli' # Russian
10
+ 'uz': 'coppercitylabs/uzbek-news-category-classifier' #Uzbek
11
+
12
+
13
+ hypothesis_templates = {'en': 'This example is {}.', # English
14
+ 'ru': 'Этот пример {}.' # Russian
15
+ 'uz': 'Бу мисол {}.' # Uzbek
16
+
17
+
18
+ classifiers = {'en': pipeline("zero-shot-classification", hypothesis_template=hypothesis_templates['en'],
19
+ model=models['en']),
20
+
21
+ 'ru': pipeline("zero-shot-classification", hypothesis_template=hypothesis_templates['ru'],
22
+ model=models['ru']),
23
+ 'uz': pipeline("zero-shot-classification", hypothesis_template=hypothesis_templates['uz'],
24
+ model=models['uz']),
25
+ }
26
+
27
+ fasttext_model = fasttext.load_model(hf_hub_download("julien-c/fasttext-language-id", "lid.176.bin"))
28
+
29
+ def prep_examples():
30
+ example_text1 = "Coronavirus disease (COVID-19) is an infectious disease caused by the SARS-CoV-2 virus. Most \
31
+ people who fall sick with COVID-19 will experience mild to moderate symptoms and recover without special treatment. \
32
+ However, some will become seriously ill and require medical attention."
33
+ example_labels1 = "business,health related,politics,climate change"
34
+
35
+
36
+ example_text2 = "Россия в среду заявила, что военные учения в аннексированном Москвой Крыму закончились \
37
+ и что солдаты возвращаются в свои гарнизоны, на следующий день после того, как она объявила о первом выводе \
38
+ войск от границ Украины."
39
+ example_labels2 = "новости,комедия"
40
+
41
+ example_text3 = "Алишер Навоий ўзбек классик шоири, буюк ижодкор ва ватанпарвар инсон бўлган."
42
+ example_labels3 = "шеърият,спорт, санъат"
43
+
44
+
45
+ examples = [
46
+ [example_text1, example_labels1],
47
+ [example_text2, example_labels2],
48
+ ]
49
+
50
+ return examples
51
+
52
+ def detect_lang(sequence, labels):
53
+ DetectorFactory.seed = 0
54
+ seq_lang = 'en'
55
+
56
+ try:
57
+ #seq_lang = detect(sequence)
58
+ #lbl_lang = detect(labels)
59
+ seq_lang = fasttext_model.predict(sequence, k=1)[0][0].split("__label__")[1]
60
+ lbl_lang = fasttext_model.predict(labels, k=1)[0][0].split("__label__")[1]
61
+ except:
62
+ print("Language detection failed!",
63
+ "Date:{}, Sequence:{}, Labels:{}".format(
64
+ str(datetime.datetime.now()),
65
+ labels))
66
+
67
+ if seq_lang != lbl_lang:
68
+ print("Different languages detected for sequence and labels!",
69
+ "Date:{}, Sequence:{}, Labels:{}, Sequence Language:{}, Label Language:{}".format(
70
+ str(datetime.datetime.now()),
71
+ sequence,
72
+ labels,
73
+ seq_lang,
74
+ lbl_lang))
75
+
76
+ if seq_lang in models:
77
+ print("Sequence Language detected.",
78
+ "Date:{}, Sequence:{}, Sequence Language:{}".format(
79
+ str(datetime.datetime.now()),
80
+ sequence,
81
+ seq_lang))
82
+ else:
83
+ print("Language not supported. Defaulting to English!",
84
+ "Date:{}, Sequence:{}, Sequence Language:{}".format(
85
+ str(datetime.datetime.now()),
86
+ sequence,
87
+ seq_lang))
88
+ seq_lang = 'en'
89
+
90
+ return seq_lang
91
+
92
+ def sequence_to_classify(sequence, labels):
93
+ classifier = classifiers[detect_lang(sequence, labels)]
94
+
95
+ label_clean = str(labels).split(",")
96
+ response = classifier(sequence, label_clean, multi_label=True)
97
+
98
+ predicted_labels = response['labels']
99
+ predicted_scores = response['scores']
100
+ clean_output = {idx: float(predicted_scores.pop(0)) for idx in predicted_labels}
101
+ print("Date:{}, Sequence:{}, Labels: {}".format(
102
+ str(datetime.datetime.now()),
103
+ sequence,
104
+ predicted_labels))
105
+
106
+ return clean_output
107
+
108
+ iface = gr.Interface(
109
+ title="En-Ru-Uz Multi-label Zero-shot Classification",
110
+ description="Supported languages are: English, Russian and Uzbek",
111
+ fn=sequence_to_classify,
112
+ inputs=[gr.inputs.Textbox(lines=10,
113
+ label="Please enter the text you would like to classify...",
114
+ placeholder="Text here..."),
115
+ gr.inputs.Textbox(lines=2,
116
+ label="Please enter the candidate labels (separated by comma)...",
117
+ placeholder="Labels here separated by comma...")],
118
+ outputs=gr.outputs.Label(num_top_classes=5),
119
+ #interpretation="default",
120
+ examples=prep_examples())
121
+
122
+ iface.launch()