Spaces:

mdj1412
/

movie_review_score_discriminator

Sleeping

App Files Files Community

mdj1412 commited on Jan 25, 2023

Commit

509d266

•

1 Parent(s): d6fbd75

Upload 4 files

Browse files

Files changed (4) hide show

app.py +68 -34
examples.csv +51 -0
klue:roberta-small-2400.pt +3 -0
roberta-base-1900.pt +3 -0

app.py CHANGED Viewed

@@ -1,6 +1,8 @@
 import gradio as gr
-from datasets import load_dataset
 import random
 README = """
@@ -12,10 +14,6 @@ README = """
 """
-model_name = "roberta-base"
-learning_rate = 5e-5
-batch_size_train = 64
-step = 1900
 id2label = {0: "NEGATIVE", 1: "POSITIVE"}
@@ -26,46 +24,82 @@ title = "Movie Review Score Discriminator"
 description = "It is a program that classifies whether it is positive or negative by entering movie reviews. You can choose between the Korean version and the English version."
-examples = ["the greatest musicians ", "cold movie "]
-# imdb_dataset = load_dataset('imdb')
-# examples = []
-# for i in range(3):
-#     idx = random.randrange(len(imdb_dataset['train']))
-#     examples.append(imdb_dataset['train'][idx]['text'])
-def fn(text):
-    return "hello, " + text
-# demo1 = gr.Interface.load("models/cardiffnlp/twitter-roberta-base-sentiment", inputs="text", outputs="text",
 #                          title=title, theme="peach",
 #                          allow_flagging="auto",
 #                          description=description, examples=examples)
-# demo = gr.Interface(fn=greet, inputs="text", outputs="text")
-# demo2 = gr.Interface(fn=greet, inputs="text", outputs="text",
 #                          title=title, theme="peach",
 #                          allow_flagging="auto",
 #                          description=description, examples=examples)
-here = gr.Interface(fn,
-                     inputs= gr.inputs.Textbox( lines=1, placeholder=None, default="", label=None),
-                     outputs='text',
-                     title="Sentiment analysis of movie reviews",
-                     description=description,
-                     theme="peach",
-                     allow_flagging="auto",
-                     flagging_dir='flagging records')
-demo3 = gr.Interface.load("models/mdj1412/movie_review_score_discriminator_eng", inputs="text", outputs="text",
-                         title=title, theme="peach",
-                         allow_flagging="auto",
-                         description=description, examples=examples)
 if __name__ == "__main__":
-    # here.launch()
-    demo3.launch()

 import gradio as gr
+from transformers import AutoModelForSequenceClassification
+from transformers import AutoTokenizer
 import random
+import torch
 README = """
 """
 id2label = {0: "NEGATIVE", 1: "POSITIVE"}
 description = "It is a program that classifies whether it is positive or negative by entering movie reviews. You can choose between the Korean version and the English version."
+def tokenized_data(tokenizer, inputs):
+    return tokenizer.batch_encode_plus(
+        inputs,
+        return_tensors="pt",
+        padding="max_length",
+        max_length=64,
+        truncation=True)
+examples_eng = ["the greatest musicians ", "cold movie "]
+examples_kor = ["긍정", "부정"]
+examples = []
+df = pd.read_csv('examples.csv', sep='\t', index_col='Unnamed: 0')
+for i in range(2):
+    idx = random.randint(0, 50)
+    examples.append(df.iloc[idx, 0])
+    examples.append(df.iloc[idx, 1])
+model_kor = gr.Interface.load("models/cardiffnlp/twitter-roberta-base-sentiment")
+model_eng = gr.Interface.load("models/mdj1412/movie_review_score_discriminator_eng")
+def builder(version, inputs):
+    if version == 'Eng':
+        model_name = "roberta-base"
+        step = 1900
+    else:
+        model_name = "klue/roberta-small"
+        step = 2400
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    inputs = tokenized_data(tokenizer, inputs)
+    file_name = "{}-{}.pt".format(model_name, step)
+    state_dict = torch.load(file_name)
+    model = AutoModelForSequenceClassification.from_pretrained(
+        model_name, num_labels=2, id2label=id2label, label2id=label2id,
+        state_dict=state_dict
+    )
+    model.eval()
+    with torch.no_grad():
+        logits = model(input_ids=inputs['input_ids'],
+            attention_mask=inputs['attention_mask']).logits
+    prediction = torch.argmax(logits, axis=1)
+    return id2label[prediction.item()]
+def builder2(inputs):
+    return model_eng(inputs)
+demo = gr.Interface(builder, inputs=[gr.inputs.Dropdown(['Eng', 'Kor']), "text"], outputs="text",
+                            title=title, description=description, examples=[examples])
+# demo2 = gr.Interface(builder2, inputs="text", outputs="text",
 #                          title=title, theme="peach",
 #                          allow_flagging="auto",
 #                          description=description, examples=examples)
+# demo3 = gr.Interface.load("models/mdj1412/movie_review_score_discriminator_eng", inputs="text", outputs="text",
 #                          title=title, theme="peach",
 #                          allow_flagging="auto",
 #                          description=description, examples=examples)
 if __name__ == "__main__":
+    demo.launch()
+    # demo3.launch()

examples.csv ADDED Viewed

	@@ -0,0 +1,51 @@

+	eng	kor
+0	of saucy 	1점도아깝다4명보다재미없어서2명나감
+1	cold movie 	매트릭스?ㄴㄴ 짜장 묻은 존윅
+2	redundant concept 	개인의 선택으로 1점을 줬습니다
+3	in world cinema 	보는내내 니 생각만 났다.
+4	on all cylinders 	영화보다가 잠든적은 처음이네요
+5	sit through , 	따뜻한 영화에요~^^추천해요!
+6	heroes 	별로에요 생각보다 노잼임
+7	sharply 	좋아요 가족들과 보기 좋아요
+8	sometimes dry 	♡ 재밌게 잘봤습니다ㅎㅎ
+9	disappointments 	반제 호빗 사랑해요~
+10	the horrors 	똥도 이런 거대한 똥이 없었다..
+11	many pointless 	개지립니다 나만당할순없지
+12	a beautifully 	이게무슨...만화네 만화 ㅉㅉㅉ
+13	a doa 	7광구와 쌍벽을 이루는 망작
+14	no apparent joy 	영화 보다가 중간에 나왔습니다
+15	seem fresh 	최악 그냥 보지 마세요진짜 노잼
+16	weak and 	짱구 극장판은 언제나 최고에요
+17	skip this dreck , 	내 시간은 소중한 거다.
+18	generates 	겁나 재밌는디,,,,
+19	funny yet 	그냥 개재밌음 평점 믿으면 안됨
+20	in memory 	재밋게 잘봣습니다 너무좋습니다요
+21	hawaiian shirt 	밥 먹으면서 보기 좋은 영화
+22	grievous but 	재미와 감동을 겸비한 명작입니다!!
+23	hopeless 	재개봉 감사합니다.정말로
+24	bring tissues . 	끝더 이상 설명이 필요할까.
+25	just too silly 	역시 믿보 황.정.민 배우님~^^
+26	cinematic bon bons 	연출+연기+스토리+영상미+OST
+27	irritates and 	추억에 묻어두지 그랬냐
+28	collapse 	이시대 최고의 코미디 영화
+29	no lika da 	재미있게 관람하였습니다
+30	a welcome relief 	스마우그랑 있을땐 스릴이 많다.
+31	, compelling 	처음으로 극장에서 잤습니다
+32	infectiously 	너무나도 잘봤어요 굿입니댜
+33	imax in short 	ㅈㄹ게 웃기고 잼있네.ㅋ
+34	i hate it . 	연말에 보면 뭉클하다 정말
+35	a good one 	그냥 게임으로 내지 그랬냐.
+36	, plodding picture 	진짜 강추 최고의 한국영화
+37	inane and awful 	진짜최악입니다...명절에보세요
+38	whole mess 	대망작 보지마세요 돈 아까움
+39	enjoy the ride 	이거 볼 시간에 야동이나 봐라
+40	the horror 	너무너무 재밌음 버즈 최고
+41	a dim 	3시간이 전혀 아깝지 않은
+42	amazingly lame . 	졸작이다..
+43	to spare wildlife 	노우잼스ㅡ  이만잡 열자 채우기
+44	carnage and 	2022년 최고 한국영화
+45	second fiddle 	재미없다너무재미없다OST지겹다
+46	a stylish exercise 	나름 재밌게 봄 가볍게 보기 좋은듯
+47	than this mess 	와...감독판이 더좋다... 더긴데
+48	valuable messages 	갑자기 도게자 ㄹㅇㅋㅋ
+49	usual worst 	별점 1점도 주기가 아까운 영화..

klue:roberta-small-2400.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1b572a576888999c3696750507168b1ec8c194b93e3b0a5fb69d5932cb61a410
+size 272408049

roberta-base-1900.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1f0dcb5d42751656f47868d0b1cd793c33bd2c497df57dde5514a2b15a791d05
+size 498658641