sigmadream commited on
Commit
fbdc62b
โ€ข
1 Parent(s): ee9cdb8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -13
app.py CHANGED
@@ -13,11 +13,12 @@ id2label = {0: "NEGATIVE", 1: "POSITIVE"}
13
  label2id = {"NEGATIVE": 0, "POSITIVE": 1}
14
 
15
 
16
- title = "ํ•œ๊ตญ์–ด/์˜์–ด ๊ฐ์ • ๋ถ„์„ ์˜ˆ์ œ(๋„ค์ด๋ฒ„ ์˜ํ™” ๋ฆฌ๋ทฐ๋ฅผ ํ™œ์šฉ)"
17
- description = "์˜ํ™”ํ‰์„ ์ž…๋ ฅํ•˜์—ฌ ๊ธ์ •์ ์ธ์ง€ ๋ถ€์ •์ ์ธ์ง€๋ฅผ ๋ถ„๋ฅ˜ํ•˜๋Š” ๋ชจ๋ธ์ž…๋‹ˆ๋‹ค. \
18
- ํ•œ๊ตญ์–ด์ธ์ง€ ์˜์–ด์ธ์ง€ ํŒ๋‹จํ•˜๊ณ  ์˜ˆ์ธกํ•ด์ฃผ๋Š” ""Default""๋ผ๋Š” ๋ฒ„์ „๋„ ์ œ๊ณตํ•ฉ๋‹ˆ๋‹ค." \
19
- ํ•œ๊ตญ์–ด ๋ฒ„์ „๊ณผ ์˜์–ด ๋ฒ„์ „ ์ค‘์—์„œ ์„ ํƒํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.
20
-
 
21
  class LanguageIdentification:
22
  def __init__(self):
23
  pretrained_lang_model = "./lid.176.ftz"
@@ -40,6 +41,7 @@ def tokenized_data(tokenizer, inputs):
40
  truncation=True)
41
 
42
 
 
43
  examples = []
44
  df = pd.read_csv('examples.csv', sep='\t', index_col='Unnamed: 0')
45
  np.random.seed(100)
@@ -148,29 +150,46 @@ def builder(Lang, Text):
148
  return id2label[prediction.item()]
149
 
150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
  with gr.Blocks() as demo1:
152
  gr.Markdown(
153
  """
154
  <h1 align="center">
155
- ํ•œ๊ตญ์–ด/์˜์–ด ๊ฐ์ • ๋ถ„์„ ์˜ˆ์ œ(๋„ค์ด๋ฒ„ ์˜ํ™” ๋ฆฌ๋ทฐ๋ฅผ ํ™œ์šฉ)
156
  </h1>
157
  """)
158
 
159
  gr.Markdown(
160
  """
161
- ์˜ํ™” ๋ฆฌ๋ทฐ๋ฅผ ์ž…๋ ฅํ•˜๋ฉด, ๊ธ์ •์ ์ธ ๊ฐ์ •์ธ์ง€ ๋ถ€์ •์ ์ธ ๊ฐ์ •์ธ์ง€ ํŒ๋ณ„ํ•˜๋Š” ๋ชจ๋ธ์ž…๋‹ˆ๋‹ค. \
162
- ์˜์–ด์™€ ํ•œ๊ธ€์„ ์ง€์›ํ•˜๋ฉฐ, ์–ธ์–ด๋ฅผ ์ง์ ‘ ์„ ํƒํ• ์ˆ˜๋„, ํ˜น์€ ๋ชจ๋ธ์ด ์–ธ์–ด๊ฐ์ง€๋ฅผ ์ง์ ‘ ํ•˜๋„๋ก ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.
163
  ๋ฆฌ๋ทฐ๋ฅผ ์ž…๋ ฅํ•˜๋ฉด, (1) ๊ฐ์ง€๋œ ์–ธ์–ด, (2) ๊ธ์ • ๋ฆฌ๋ทฐ์ผ ํ™•๋ฅ ๊ณผ ๋ถ€์ • ๋ฆฌ๋ทฐ์ผ ํ™•๋ฅ , (3) ์ž…๋ ฅ๋œ ๋ฆฌ๋ทฐ์˜ ์–ด๋Š ๋‹จ์–ด๊ฐ€ ๊ธ์ •/๋ถ€์ • ๊ฒฐ์ •์— ์˜ํ–ฅ์„ ์ฃผ์—ˆ๋Š”์ง€ \
164
- (๊ธ์ •์ผ ๊ฒฝ์šฐ ๋นจ๊ฐ•์ƒ‰, ๋ถ€์ •์ผ ๊ฒฝ์šฐ ํŒŒ๋ž€์ƒ‰)๋ฅผ ํ™•์ธํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.
165
  """)
166
 
167
  with gr.Accordion(label="๋ชจ๋ธ์— ๋Œ€ํ•œ ์„ค๋ช… ( ์—ฌ๊ธฐ๋ฅผ ํด๋ฆญ ํ•˜์‹œ์˜ค. )", open=False):
168
  gr.Markdown(
169
  """
170
- ์˜์–ด ๋ชจ๋ธ์€ bert-base-uncased ๊ธฐ๋ฐ˜์œผ๋กœ, ์˜์–ด ์˜ํ™” ๋ฆฌ๋ทฐ ๋ถ„์„ ๋ฐ์ดํ„ฐ์…‹์ธ SST-2๋กœ ํ•™์Šต ๋ฐ ํ‰๊ฐ€๋˜์—ˆ์Šต๋‹ˆ๋‹ค.
171
- ํ•œ๊ธ€ ๋ชจ๋ธ์€ klue/roberta-base ๊ธฐ๋ฐ˜์ด๋‹ค. ๊ธฐ์กด ํ•œ๊ธ€ ์˜ํ™” ๋ฆฌ๋ทฐ ๋ถ„์„ ๋ฐ์ดํ„ฐ์…‹์ด ์กด์žฌํ•˜์ง€ ์•Š์•„, ๋„ค์ด๋ฒ„ ์˜ํ™”์˜ ๋ฆฌ๋ทฐ๋ฅผ ํฌ๋กค๋งํ•ด์„œ ์˜ํ™” ๋ฆฌ๋ทฐ ๋ถ„์„ ๋ฐ์ดํ„ฐ์…‹์„ ์ œ์ž‘ํ•˜๊ณ , ์ด๋ฅผ ์ด์šฉํ•˜์—ฌ ๋ชจ๋ธ์„ ํ•™์Šต ๋ฐ ํ‰๊ฐ€ํ•˜์˜€์Šต๋‹ˆ๋‹ค.
172
- ์˜์–ด ๋ชจ๋ธ์€ SST-2์—์„œ 92.8%, ํ•œ๊ธ€ ๋ชจ๋ธ์€ ๋„ค์ด๋ฒ„ ์˜ํ™” ๋ฆฌ๋ทฐ ๋ฐ์ดํ„ฐ์…‹์—์„œ 94%์˜ ์ •ํ™•๋„๋ฅผ ๊ฐ€์ง‘๋‹ˆ๋‹ค(test set ๊ธฐ์ค€).
173
- ์–ธ์–ด๊ฐ์ง€๋Š” fasttext์˜ language detector๋ฅผ ์‚ฌ์šฉํ•˜์˜€๋‹ค. ๋ฆฌ๋ทฐ์˜ ๋‹จ์–ด๋ณ„ ์˜ํ–ฅ๋ ฅ์€, ๋‹จ์–ด ๊ฐ๊ฐ์„ ๋ชจ๋ธ์— ๋„ฃ์—ˆ์„ ๋•Œ ๊ฒฐ๊ณผ๊ฐ€ ๊ธ์ •์œผ๋กœ ๋‚˜์˜ค๋Š”์ง€ ๋ถ€์ •์œผ๋กœ ๋‚˜์˜ค๋Š”์ง€๋ฅผ ๋ฐ”ํƒ•์œผ๋กœ ์ธก์ •ํ•˜์˜€์Šต๋‹ˆ๋‹ค.
174
  """)
175
 
176
  with gr.Row():
 
13
  label2id = {"NEGATIVE": 0, "POSITIVE": 1}
14
 
15
 
16
+ title = "์˜ํ™” ๋ฆฌ๋ทฐ ์ ์ˆ˜ ํŒ๋ณ„๊ธฐ"
17
+ description = "์˜ํ™”ํ‰์„ ์ž…๋ ฅํ•˜์—ฌ ๊ธ์ •์ ์ธ์ง€ ๋ถ€์ •์ ์ธ์ง€๋ฅผ ๋ถ„๋ฅ˜ํ•˜๋Š” ํ”„๋กœ๊ทธ๋žจ์ž…๋‹ˆ๋‹ค. \
18
+ ํ•œ๊ตญ์–ด ๋ฒ„์ „๊ณผ ์˜์–ด ๋ฒ„์ „ ์ค‘์—์„œ ์„ ํƒํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. \
19
+ ํ•œ๊ตญ์–ด์ธ์ง€ ์˜์–ด์ธ์ง€ ํŒ๋‹จํ•˜๊ณ  ์˜ˆ์ธกํ•ด์ฃผ๋Š” ""Default""๋ผ๋Š” ๋ฒ„์ „๋„ ์ œ๊ณตํ•ฉ๋‹ˆ๋‹ค."
20
+
21
+
22
  class LanguageIdentification:
23
  def __init__(self):
24
  pretrained_lang_model = "./lid.176.ftz"
 
41
  truncation=True)
42
 
43
 
44
+
45
  examples = []
46
  df = pd.read_csv('examples.csv', sep='\t', index_col='Unnamed: 0')
47
  np.random.seed(100)
 
150
  return id2label[prediction.item()]
151
 
152
 
153
+ # demo3 = gr.Interface.load("models/mdj1412/movie_review_score_discriminator_eng", inputs="text", outputs="text",
154
+ # title=title, theme="peach",
155
+ # allow_flagging="auto",
156
+ # description=description, examples=examples)
157
+
158
+
159
+
160
+ # demo = gr.Interface(builder, inputs=[gr.inputs.Dropdown(['Default', 'Eng', 'Kor']), gr.Textbox(placeholder="๋ฆฌ๋ทฐ๋ฅผ ์ž…๋ ฅํ•˜์‹œ์˜ค.")],
161
+ # outputs=[ gr.Label(num_top_classes=3, label='Lang'),
162
+ # gr.Label(num_top_classes=2, label='Result'),
163
+ # gr.HighlightedText(label="Analysis", combine_adjacent=False)
164
+ # .style(color_map={"+++": "#CF0000", "++": "#FF3232", "+": "#FFD4D4", "---": "#0004FE", "--": "#4C47FF", "-": "#BEBDFF"}) ],
165
+ # # outputs='label',
166
+ # title=title, description=description, examples=examples)
167
+
168
+
169
+
170
  with gr.Blocks() as demo1:
171
  gr.Markdown(
172
  """
173
  <h1 align="center">
174
+ ์˜ํ™” ๋ฆฌ๋ทฐ ์ ์ˆ˜ ํŒ๋ณ„๊ธฐ
175
  </h1>
176
  """)
177
 
178
  gr.Markdown(
179
  """
180
+ ์˜ํ™” ๋ฆฌ๋ทฐ๋ฅผ ์ž…๋ ฅํ•˜๋ฉด, ๋ฆฌ๋ทฐ๊ฐ€ ๊ธ์ •์ธ์ง€ ๋ถ€์ •์ธ์ง€ ํŒ๋ณ„ํ•ด์ฃผ๋Š” ๋ชจ๋ธ์ด๋‹ค. \
181
+ ์˜์–ด์™€ ํ•œ๊ธ€์„ ์ง€์›ํ•˜๋ฉฐ, ์–ธ์–ด๋ฅผ ์ง์ ‘ ์„ ํƒํ• ์ˆ˜๋„, ํ˜น์€ ๋ชจ๋ธ์ด ์–ธ์–ด๊ฐ์ง€๋ฅผ ์ง์ ‘ ํ•˜๋„๋ก ํ•  ์ˆ˜ ์žˆ๋‹ค.
182
  ๋ฆฌ๋ทฐ๋ฅผ ์ž…๋ ฅํ•˜๋ฉด, (1) ๊ฐ์ง€๋œ ์–ธ์–ด, (2) ๊ธ์ • ๋ฆฌ๋ทฐ์ผ ํ™•๋ฅ ๊ณผ ๋ถ€์ • ๋ฆฌ๋ทฐ์ผ ํ™•๋ฅ , (3) ์ž…๋ ฅ๋œ ๋ฆฌ๋ทฐ์˜ ์–ด๋Š ๋‹จ์–ด๊ฐ€ ๊ธ์ •/๋ถ€์ • ๊ฒฐ์ •์— ์˜ํ–ฅ์„ ์ฃผ์—ˆ๋Š”์ง€ \
183
+ (๊ธ์ •์ผ ๊ฒฝ์šฐ ๋นจ๊ฐ•์ƒ‰, ๋ถ€์ •์ผ ๊ฒฝ์šฐ ํŒŒ๋ž€์ƒ‰)๋ฅผ ํ™•์ธํ•  ์ˆ˜ ์žˆ๋‹ค.
184
  """)
185
 
186
  with gr.Accordion(label="๋ชจ๋ธ์— ๋Œ€ํ•œ ์„ค๋ช… ( ์—ฌ๊ธฐ๋ฅผ ํด๋ฆญ ํ•˜์‹œ์˜ค. )", open=False):
187
  gr.Markdown(
188
  """
189
+ ์˜์–ด ๋ชจ๋ธ์€ bert-base-uncased ๊ธฐ๋ฐ˜์œผ๋กœ, ์˜์–ด ์˜ํ™” ๋ฆฌ๋ทฐ ๋ถ„์„ ๋ฐ์ดํ„ฐ์…‹์ธ SST-2๋กœ ํ•™์Šต ๋ฐ ํ‰๊ฐ€๋˜์—ˆ๋‹ค.
190
+ ํ•œ๊ธ€ ๋ชจ๋ธ์€ klue/roberta-base ๊ธฐ๋ฐ˜์ด๋‹ค. ๊ธฐ์กด ํ•œ๊ธ€ ์˜ํ™” ๋ฆฌ๋ทฐ ๋ถ„์„ ๋ฐ์ดํ„ฐ์…‹์ด ์กด์žฌํ•˜์ง€ ์•Š์•„, ๋„ค์ด๋ฒ„ ์˜ํ™”์˜ ๋ฆฌ๋ทฐ๋ฅผ ํฌ๋กค๋งํ•ด์„œ ์˜ํ™” ๋ฆฌ๋ทฐ ๋ถ„์„ ๋ฐ์ดํ„ฐ์…‹์„ ์ œ์ž‘ํ•˜๊ณ , ์ด๋ฅผ ์ด์šฉํ•˜์—ฌ ๋ชจ๋ธ์„ ํ•™์Šต ๋ฐ ํ‰๊ฐ€ํ•˜์˜€๋‹ค.
191
+ ์˜์–ด ๋ชจ๋ธ์€ SST-2์—์„œ 92.8%, ํ•œ๊ธ€ ๋ชจ๋ธ์€ ๋„ค์ด๋ฒ„ ์˜ํ™” ๋ฆฌ๋ทฐ ๋ฐ์ดํ„ฐ์…‹์—์„œ 94%์˜ ์ •ํ™•๋„๋ฅผ ๊ฐ€์ง„๋‹ค (test set ๊ธฐ์ค€).
192
+ ์–ธ์–ด๊ฐ์ง€๋Š” fasttext์˜ language detector๋ฅผ ์‚ฌ์šฉํ•˜์˜€๋‹ค. ๋ฆฌ๋ทฐ์˜ ๋‹จ์–ด๋ณ„ ์˜ํ–ฅ๋ ฅ์€, ๋‹จ์–ด ๊ฐ๊ฐ์„ ๋ชจ๋ธ์— ๋„ฃ์—ˆ์„ ๋•Œ ๊ฒฐ๊ณผ๊ฐ€ ๊ธ์ •์œผ๋กœ ๋‚˜์˜ค๋Š”์ง€ ๋ถ€์ •์œผ๋กœ ๋‚˜์˜ค๋Š”์ง€๋ฅผ ๋ฐ”ํƒ•์œผ๋กœ ์ธก์ •ํ•˜์˜€๋‹ค.
193
  """)
194
 
195
  with gr.Row():