File size: 2,120 Bytes
7a0d3da fda799c 7a0d3da fda799c 7a0d3da fda799c 7a0d3da |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
# -*- coding: utf-8 -*-
"""20231115_hf_space์ ์ฌ๋ณธ
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/167WkIz-J7_z4FG65GkVPfkosxIXlKMQq
"""
# ๊ธฐ์ฌ ์์ฝํด์ฃผ๋ ๊ฐ์ ๊ฐ์ธ ์ฌ์ดํธ ๋ง๋ค๊ธฐ
# ์ฌ์ดํธ: github pages: huggingface space
import gradio as gr
# Interface๋ผ๋ ํด๋์ค๋ก ์
์ถ๋ ฅ ์์๋ฅผ ์น ์๋ฆฌ๋จผํธ๋ก ์๋ ์์ฑํด์ค
from transformers import PreTrainedTokenizerFast,BartForConditionalGeneration
# PreTrainedTokenizerFast: ์ฌ์ ํ๋ จ๋ ํ ํฌ๋์ด์ ๋ก, ํ
์คํธ๋ฅผ ๋ชจ๋ธ์ด ์ดํดํ ์ ์๋ ํ์์ผ๋ก ๋ณํํฉ๋๋ค.
# BartForConditionalGeneration: BART ๋ชจ๋ธ์ ๋ณํ์ผ๋ก ์์ฝ, ๋ฒ์ญ, ํ
์คํธ ์์ฑ ๋ฑ์ ์ฌ์ฉ
# Bart๋ encorder-decoder ๋ชจ๋ธ์ ์์
# from transformers import๋ก ์์ํ๋ import๋ฌธ์ ๋ณด๋ฉด
# ๋ง์ ๊ฒฝ์ฐ AutoTokenizer, AutoModel
# tokenizer = AutoTokenizer.from_pretrained("model name")
# Load Model and Tokenize
tokenizer = PreTrainedTokenizerFast.from_pretrained("ainize/kobart-news")
model = BartForConditionalGeneration.from_pretrained("ainize/kobart-news")
# ์๋ฌธ์ ๋ฐ์์ ์์ฝ๋ฌธ์ ๋ฐํ
def summ(txt):
input_ids = tokenizer.encode(input_text, return_tensors="pt")
summary_text_ids = model.generate(
input_ids=input_ids,
bos_token_id=model.config.bos_token_id, # BOS๋ Beginning of Sentence
eos_token_id=model.config.eos_token_id, # EOS๋ End Of Sentence
length_penalty=2.0, # ์์ฝ์ ์ผ๋ง๋ ์งง๊ฒ ํ ์ง
max_length=142, #
min_length=56, #
num_beams=4) # beam search -> ๊ฐ์ง ์ ๋ผ๊ณ ์๊ฐํ๋ฉด ๋จ. ๊ฐ์ง 4๊ฐ๋ฅผ ํผ์น๊ณ ๊ทธ ๊ฐ๊ฐ์ง์์ 4๊ฐ๋ฅผ ํผ์น ํ ์ด 16๊ฐ์ค ๊ฐ์ฅ ์ ํฉํ 4๊ฐ๋ฅผ ๊ณ ๋ฅธ ๊ฐ์ง๋ฅผ ํผ์ณ ๋ฐ๋ณต ๊ณผ์
return tokenizer.decode(summary_text_ids[0], skip_special_tokens=True)
interface = gr.Interface(summ,
[gr.Textbox(label = "original text")],
[gr.Textbox(label = "summary")])
interface.launch()
|