Upload 3 files
Browse files- app.py +26 -3
- lid.176.ftz +3 -0
- requirements.txt +2 -1
app.py
CHANGED
@@ -5,6 +5,7 @@ import random
|
|
5 |
import numpy as np
|
6 |
import pandas as pd
|
7 |
import torch
|
|
|
8 |
|
9 |
|
10 |
|
@@ -13,7 +14,21 @@ label2id = {"NEGATIVE": 0, "POSITIVE": 1}
|
|
13 |
|
14 |
|
15 |
title = "Movie Review Score Discriminator"
|
16 |
-
description = "It is a program that classifies whether it is positive or negative by entering movie reviews.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
|
19 |
|
@@ -58,10 +73,18 @@ kor_model = AutoModelForSequenceClassification.from_pretrained(
|
|
58 |
|
59 |
|
60 |
def builder(lang, text):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
if lang == 'Eng':
|
62 |
model = eng_model
|
63 |
tokenizer = eng_tokenizer
|
64 |
-
|
65 |
model = kor_model
|
66 |
tokenizer = kor_tokenizer
|
67 |
|
@@ -85,7 +108,7 @@ def builder(lang, text):
|
|
85 |
|
86 |
|
87 |
|
88 |
-
demo = gr.Interface(builder, inputs=[gr.inputs.Dropdown(['Eng', 'Kor']), "text"],
|
89 |
outputs=gr.Label(num_top_classes=2, label='Res', color='CadetBlue'),
|
90 |
# outputs='label',
|
91 |
title=title, description=description, examples=examples)
|
|
|
5 |
import numpy as np
|
6 |
import pandas as pd
|
7 |
import torch
|
8 |
+
import fasttext
|
9 |
|
10 |
|
11 |
|
|
|
14 |
|
15 |
|
16 |
title = "Movie Review Score Discriminator"
|
17 |
+
description = "It is a program that classifies whether it is positive or negative by entering movie reviews. \
|
18 |
+
You can choose between the Korean version and the English version. \
|
19 |
+
It also provides a version called Any, which determines whether it is Korean or English and predicts it."
|
20 |
+
|
21 |
+
|
22 |
+
class LanguageIdentification:
|
23 |
+
def __init__(self):
|
24 |
+
pretrained_lang_model = "./lid.176.ftz"
|
25 |
+
self.model = fasttext.load_model(pretrained_lang_model)
|
26 |
+
|
27 |
+
def predict_lang(self, text):
|
28 |
+
predictions = self.model.predict(text, k=2) # returns top 2 matching languages
|
29 |
+
return predictions
|
30 |
+
|
31 |
+
LANGUAGE = LanguageIdentification()
|
32 |
|
33 |
|
34 |
|
|
|
73 |
|
74 |
|
75 |
def builder(lang, text):
|
76 |
+
if lang == 'Any':
|
77 |
+
pred = LANGUAGE.predict_lang(text)
|
78 |
+
if pred[0][0] == '__label__ko':
|
79 |
+
lang = 'Kor'
|
80 |
+
else: # '__label__en'
|
81 |
+
lang = 'Eng'
|
82 |
+
# else:
|
83 |
+
# raise NotImplementedError("It's neither Korean nor English.")
|
84 |
if lang == 'Eng':
|
85 |
model = eng_model
|
86 |
tokenizer = eng_tokenizer
|
87 |
+
if lang == 'Kor':
|
88 |
model = kor_model
|
89 |
tokenizer = kor_tokenizer
|
90 |
|
|
|
108 |
|
109 |
|
110 |
|
111 |
+
demo = gr.Interface(builder, inputs=[gr.inputs.Dropdown(['Any', 'Eng', 'Kor']), "text"],
|
112 |
outputs=gr.Label(num_top_classes=2, label='Res', color='CadetBlue'),
|
113 |
# outputs='label',
|
114 |
title=title, description=description, examples=examples)
|
lid.176.ftz
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8f3472cfe8738a7b6099e8e999c3cbfae0dcd15696aac7d7738a8039db603e83
|
3 |
+
size 938013
|
requirements.txt
CHANGED
@@ -3,4 +3,5 @@ datasets
|
|
3 |
transformers
|
4 |
torch
|
5 |
pandas
|
6 |
-
numpy
|
|
|
|
3 |
transformers
|
4 |
torch
|
5 |
pandas
|
6 |
+
numpy
|
7 |
+
fasttext
|