Upload 4 files
Browse files- app.py +68 -34
- examples.csv +51 -0
- klue:roberta-small-2400.pt +3 -0
- roberta-base-1900.pt +3 -0
app.py
CHANGED
@@ -1,6 +1,8 @@
|
|
1 |
import gradio as gr
|
2 |
-
from
|
|
|
3 |
import random
|
|
|
4 |
|
5 |
|
6 |
README = """
|
@@ -12,10 +14,6 @@ README = """
|
|
12 |
"""
|
13 |
|
14 |
|
15 |
-
model_name = "roberta-base"
|
16 |
-
learning_rate = 5e-5
|
17 |
-
batch_size_train = 64
|
18 |
-
step = 1900
|
19 |
|
20 |
|
21 |
id2label = {0: "NEGATIVE", 1: "POSITIVE"}
|
@@ -26,46 +24,82 @@ title = "Movie Review Score Discriminator"
|
|
26 |
description = "It is a program that classifies whether it is positive or negative by entering movie reviews. You can choose between the Korean version and the English version."
|
27 |
|
28 |
|
29 |
-
examples = ["the greatest musicians ", "cold movie "]
|
30 |
-
# imdb_dataset = load_dataset('imdb')
|
31 |
-
# examples = []
|
32 |
-
# for i in range(3):
|
33 |
-
# idx = random.randrange(len(imdb_dataset['train']))
|
34 |
-
# examples.append(imdb_dataset['train'][idx]['text'])
|
35 |
|
36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
-
def fn(text):
|
39 |
-
return "hello, " + text
|
40 |
|
41 |
|
42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
# title=title, theme="peach",
|
44 |
# allow_flagging="auto",
|
45 |
# description=description, examples=examples)
|
46 |
|
47 |
-
#
|
48 |
-
|
49 |
-
# demo2 = gr.Interface(fn=greet, inputs="text", outputs="text",
|
50 |
# title=title, theme="peach",
|
51 |
# allow_flagging="auto",
|
52 |
# description=description, examples=examples)
|
53 |
-
|
54 |
-
here = gr.Interface(fn,
|
55 |
-
inputs= gr.inputs.Textbox( lines=1, placeholder=None, default="", label=None),
|
56 |
-
outputs='text',
|
57 |
-
title="Sentiment analysis of movie reviews",
|
58 |
-
description=description,
|
59 |
-
theme="peach",
|
60 |
-
allow_flagging="auto",
|
61 |
-
flagging_dir='flagging records')
|
62 |
-
|
63 |
-
|
64 |
-
demo3 = gr.Interface.load("models/mdj1412/movie_review_score_discriminator_eng", inputs="text", outputs="text",
|
65 |
-
title=title, theme="peach",
|
66 |
-
allow_flagging="auto",
|
67 |
-
description=description, examples=examples)
|
68 |
|
69 |
if __name__ == "__main__":
|
70 |
-
|
71 |
-
demo3.launch()
|
|
|
1 |
import gradio as gr
|
2 |
+
from transformers import AutoModelForSequenceClassification
|
3 |
+
from transformers import AutoTokenizer
|
4 |
import random
|
5 |
+
import torch
|
6 |
|
7 |
|
8 |
README = """
|
|
|
14 |
"""
|
15 |
|
16 |
|
|
|
|
|
|
|
|
|
17 |
|
18 |
|
19 |
id2label = {0: "NEGATIVE", 1: "POSITIVE"}
|
|
|
24 |
description = "It is a program that classifies whether it is positive or negative by entering movie reviews. You can choose between the Korean version and the English version."
|
25 |
|
26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
|
28 |
|
29 |
+
def tokenized_data(tokenizer, inputs):
|
30 |
+
return tokenizer.batch_encode_plus(
|
31 |
+
inputs,
|
32 |
+
return_tensors="pt",
|
33 |
+
padding="max_length",
|
34 |
+
max_length=64,
|
35 |
+
truncation=True)
|
36 |
|
|
|
|
|
37 |
|
38 |
|
39 |
+
|
40 |
+
examples_eng = ["the greatest musicians ", "cold movie "]
|
41 |
+
examples_kor = ["κΈμ ", "λΆμ "]
|
42 |
+
|
43 |
+
examples = []
|
44 |
+
df = pd.read_csv('examples.csv', sep='\t', index_col='Unnamed: 0')
|
45 |
+
for i in range(2):
|
46 |
+
idx = random.randint(0, 50)
|
47 |
+
examples.append(df.iloc[idx, 0])
|
48 |
+
examples.append(df.iloc[idx, 1])
|
49 |
+
|
50 |
+
|
51 |
+
|
52 |
+
|
53 |
+
model_kor = gr.Interface.load("models/cardiffnlp/twitter-roberta-base-sentiment")
|
54 |
+
model_eng = gr.Interface.load("models/mdj1412/movie_review_score_discriminator_eng")
|
55 |
+
|
56 |
+
|
57 |
+
|
58 |
+
def builder(version, inputs):
|
59 |
+
if version == 'Eng':
|
60 |
+
model_name = "roberta-base"
|
61 |
+
step = 1900
|
62 |
+
|
63 |
+
else:
|
64 |
+
model_name = "klue/roberta-small"
|
65 |
+
step = 2400
|
66 |
+
|
67 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
68 |
+
inputs = tokenized_data(tokenizer, inputs)
|
69 |
+
file_name = "{}-{}.pt".format(model_name, step)
|
70 |
+
state_dict = torch.load(file_name)
|
71 |
+
model = AutoModelForSequenceClassification.from_pretrained(
|
72 |
+
model_name, num_labels=2, id2label=id2label, label2id=label2id,
|
73 |
+
state_dict=state_dict
|
74 |
+
)
|
75 |
+
|
76 |
+
model.eval()
|
77 |
+
with torch.no_grad():
|
78 |
+
logits = model(input_ids=inputs['input_ids'],
|
79 |
+
attention_mask=inputs['attention_mask']).logits
|
80 |
+
|
81 |
+
prediction = torch.argmax(logits, axis=1)
|
82 |
+
|
83 |
+
return id2label[prediction.item()]
|
84 |
+
|
85 |
+
|
86 |
+
def builder2(inputs):
|
87 |
+
return model_eng(inputs)
|
88 |
+
|
89 |
+
|
90 |
+
demo = gr.Interface(builder, inputs=[gr.inputs.Dropdown(['Eng', 'Kor']), "text"], outputs="text",
|
91 |
+
title=title, description=description, examples=[examples])
|
92 |
+
|
93 |
+
# demo2 = gr.Interface(builder2, inputs="text", outputs="text",
|
94 |
# title=title, theme="peach",
|
95 |
# allow_flagging="auto",
|
96 |
# description=description, examples=examples)
|
97 |
|
98 |
+
# demo3 = gr.Interface.load("models/mdj1412/movie_review_score_discriminator_eng", inputs="text", outputs="text",
|
|
|
|
|
99 |
# title=title, theme="peach",
|
100 |
# allow_flagging="auto",
|
101 |
# description=description, examples=examples)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
|
103 |
if __name__ == "__main__":
|
104 |
+
demo.launch()
|
105 |
+
# demo3.launch()
|
examples.csv
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
eng kor
|
2 |
+
0 of saucy 1μ λμκΉλ€4λͺ
보λ€μ¬λ―Έμμ΄μ2λͺ
λκ°
|
3 |
+
1 cold movie 맀νΈλ¦μ€?γ΄γ΄ μ§μ₯ 묻μ μ‘΄μ
|
4 |
+
2 redundant concept κ°μΈμ μ νμΌλ‘ 1μ μ 쀬μ΅λλ€
|
5 |
+
3 in world cinema 보λλ΄λ΄ λ μκ°λ§ λ¬λ€.
|
6 |
+
4 on all cylinders μν보λ€κ° μ λ μ μ μ²μμ΄λ€μ
|
7 |
+
5 sit through , λ°λ»ν μνμμ~^^μΆμ²ν΄μ!
|
8 |
+
6 heroes λ³λ‘μμ μκ°λ³΄λ€ λ
ΈμΌμ
|
9 |
+
7 sharply μ’μμ κ°μ‘±λ€κ³Ό 보기 μ’μμ
|
10 |
+
8 sometimes dry β‘ μ¬λ°κ² μλ΄€μ΅λλ€γ
γ
|
11 |
+
9 disappointments λ°μ νΈλΉ μ¬λν΄μ~
|
12 |
+
10 the horrors λ₯λ μ΄λ° κ±°λν λ₯μ΄ μμλ€..
|
13 |
+
11 many pointless κ°μ§λ¦½λλ€ λλ§λΉν μμμ§
|
14 |
+
12 a beautifully μ΄κ²λ¬΄μ¨...λ§νλ€ λ§ν γ
γ
γ
|
15 |
+
13 a doa 7κ΄κ΅¬μ μλ²½μ μ΄λ£¨λ λ§μ
|
16 |
+
14 no apparent joy μν 보λ€κ° μ€κ°μ λμμ΅λλ€
|
17 |
+
15 seem fresh μ΅μ
κ·Έλ₯ λ³΄μ§ λ§μΈμμ§μ§ λ
ΈμΌ
|
18 |
+
16 weak and 짱ꡬ κ·Ήμ₯νμ μΈμ λ μ΅κ³ μμ
|
19 |
+
17 skip this dreck , λ΄ μκ°μ μμ€ν κ±°λ€.
|
20 |
+
18 generates κ²λ μ¬λ°λλ,,,,
|
21 |
+
19 funny yet κ·Έλ₯ κ°μ¬λ°μ νμ λ―ΏμΌλ©΄ μλ¨
|
22 |
+
20 in memory μ¬λ°κ² μλ΄£μ΅λλ€ λ무μ’μ΅λλ€μ
|
23 |
+
21 hawaiian shirt λ°₯ λ¨ΉμΌλ©΄μ 보기 μ’μ μν
|
24 |
+
22 grievous but μ¬λ―Έμ κ°λμ κ²ΈλΉν λͺ
μμ
λλ€!!
|
25 |
+
23 hopeless μ¬κ°λ΄ κ°μ¬ν©λλ€.μ λ§λ‘
|
26 |
+
24 bring tissues . λλ μ΄μ μ€λͺ
μ΄ νμν κΉ.
|
27 |
+
25 just too silly μμ 믿보 ν©.μ .λ―Ό λ°°μ°λ~^^
|
28 |
+
26 cinematic bon bons μ°μΆ+μ°κΈ°+μ€ν 리+μμλ―Έ+OST
|
29 |
+
27 irritates and μΆμ΅μ 묻μ΄λμ§ κ·Έλ¬λ
|
30 |
+
28 collapse μ΄μλ μ΅κ³ μ μ½λ―Έλ μν
|
31 |
+
29 no lika da μ¬λ―Έμκ² κ΄λνμμ΅λλ€
|
32 |
+
30 a welcome relief μ€λ§μ°κ·Έλ μμλ μ€λ¦΄μ΄ λ§λ€.
|
33 |
+
31 , compelling μ²μμΌλ‘ κ·Ήμ₯μμ μ€μ΅λλ€
|
34 |
+
32 infectiously λ무λλ μλ΄€μ΄μ κ΅Ώμ
λλ
|
35 |
+
33 imax in short γ
γΉκ² μκΈ°κ³ μΌμλ€.γ
|
36 |
+
34 i hate it . μ°λ§μ 보면 λν΄νλ€ μ λ§
|
37 |
+
35 a good one κ·Έλ₯ κ²μμΌλ‘ λ΄μ§ κ·Έλ¬λ.
|
38 |
+
36 , plodding picture μ§μ§ κ°μΆ μ΅κ³ μ νκ΅μν
|
39 |
+
37 inane and awful μ§μ§μ΅μ
μ
λλ€...λͺ
μ μ보μΈμ
|
40 |
+
38 whole mess λλ§μ 보μ§λ§μΈμ λ μκΉμ
|
41 |
+
39 enjoy the ride μ΄κ±° λ³Ό μκ°μ μΌλμ΄λ λ΄λΌ
|
42 |
+
40 the horror λ무λ무 μ¬λ°μ λ²μ¦ μ΅κ³
|
43 |
+
41 a dim 3μκ°μ΄ μ ν μκΉμ§ μμ
|
44 |
+
42 amazingly lame . μ‘Έμμ΄λ€..
|
45 |
+
43 to spare wildlife λ
Έμ°μΌμ€γ
‘ μ΄λ§μ‘ μ΄μ μ±μ°κΈ°
|
46 |
+
44 carnage and 2022λ
μ΅κ³ νκ΅μν
|
47 |
+
45 second fiddle μ¬λ―Έμλ€λ무μ¬λ―Έμλ€OSTμ§κ²Ήλ€
|
48 |
+
46 a stylish exercise λλ¦ μ¬λ°κ² λ΄ κ°λ³κ² 보기 μ’μλ―
|
49 |
+
47 than this mess μ...κ°λ
νμ΄ λμ’λ€... λκΈ΄λ°
|
50 |
+
48 valuable messages κ°μκΈ° λκ²μ γΉγ
γ
γ
|
51 |
+
49 usual worst λ³μ 1μ λ μ£ΌκΈ°κ° μκΉμ΄ μν..
|
klue:roberta-small-2400.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1b572a576888999c3696750507168b1ec8c194b93e3b0a5fb69d5932cb61a410
|
3 |
+
size 272408049
|
roberta-base-1900.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1f0dcb5d42751656f47868d0b1cd793c33bd2c497df57dde5514a2b15a791d05
|
3 |
+
size 498658641
|