gabrielwithhappy commited on
Commit
ea6eb55
โ€ข
1 Parent(s): 8d585eb

update model and precondition logic

Browse files
Files changed (1) hide show
  1. app.py +89 -4
app.py CHANGED
@@ -1,15 +1,100 @@
1
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
5
 
6
  with gr.Blocks() as demo:
 
 
7
  gr.Markdown(
8
  """
9
  # ๊ท ํ˜•์žกํžŒ ๋‰ด์Šค ์ฝ๊ธฐ (Balanced News Reading)
 
 
 
 
 
10
  """)
11
- iface = gr.Interface(fn=greet, inputs="text", outputs="text")
12
- # iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
 
15
  if __name__ == "__main__":
 
1
  import gradio as gr
2
+ from newspaper import Article
3
+ from newspaper import Config
4
+
5
+ from transformers import pipeline
6
+ import requests
7
+ from bs4 import BeautifulSoup
8
+ import re
9
+
10
+ from bs4 import BeautifulSoup as bs
11
+ import requests
12
+
13
+ from transformers import PreTrainedTokenizerFast, BartForConditionalGeneration
14
+ # Load Model and Tokenize
15
+ def get_summary(input_text):
16
+ tokenizer = PreTrainedTokenizerFast.from_pretrained("ainize/kobart-news")
17
+ summary_model = BartForConditionalGeneration.from_pretrained("ainize/kobart-news")
18
+ input_ids = tokenizer.encode(input_text, return_tensors="pt")
19
+ summary_text_ids = summary_model.generate(
20
+ input_ids=input_ids,
21
+ bos_token_id=summary_model.config.bos_token_id,
22
+ eos_token_id=summary_model.config.eos_token_id,
23
+ length_penalty=2.0,
24
+ max_length=142,
25
+ min_length=56,
26
+ num_beams=4,
27
+ )
28
+ return tokenizer.decode(summary_text_ids[0], skip_special_tokens=True)
29
+
30
+
31
+
32
+ USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:78.0) Gecko/20100101 Firefox/78.0'
33
+ config = Config()
34
+ config.browser_user_agent = USER_AGENT
35
+ config.request_timeout = 10
36
+
37
+ class news_collector:
38
+ def __init__(self):
39
+ self.examples = []
40
+
41
+ def get_new_parser(self, url):
42
+ article = Article(url, language='ko')
43
+ article.download()
44
+ article.parse()
45
+ return article
46
+
47
+ def get_news_links(self, page=''):
48
+ url = "https://news.daum.net/breakingnews/economic"
49
+ response = requests.get(url)
50
+ html_text = response.text
51
+
52
+ soup = bs(response.text, 'html.parser')
53
+ news_titles = soup.select("a.link_txt")
54
+ links = [item.attrs['href'] for item in news_titles ]
55
+ https_links = [item for item in links if item.startswith('https') == True]
56
+ https_links
57
+ return https_links[:2]
58
+
59
+
60
+ def update_news_examples(self):
61
+ news_links = self.get_news_links()
62
+ for news_url in news_links:
63
+ article = self.get_new_parser(news_url)
64
+ self.examples.append(get_summary(article.text))
65
+
66
+
67
+
68
+ title = "๊ท ํ˜•์žกํžŒ ๋‰ด์Šค ์ฝ๊ธฐ (Balanced News Reading)"
69
+
70
 
 
 
71
 
72
  with gr.Blocks() as demo:
73
+ news = news_collector()
74
+
75
  gr.Markdown(
76
  """
77
  # ๊ท ํ˜•์žกํžŒ ๋‰ด์Šค ์ฝ๊ธฐ (Balanced News Reading)
78
+
79
+ ๊ธ์ •์ ์ธ ๊ธฐ์‚ฌ์™€ ๋ถ€์ •์ ์ธ ๊ธฐ์‚ฌ์˜ ๊ท ํ˜•์„ ๋ณด๋ฉฐ ๋‰ด์Šค๋ฅผ ์ฝ์„ ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. ๋ฐ๋ชจ๋ฅผ ์‹คํ–‰ํ•˜๋ฉด ๋ฐ๋ชจ ์‹คํ–‰ ๋‚ ์งœ์˜ Daum๋‰ด์Šค๋ฅผ `Example`์— ๊ฐ€์ ธ์˜ต๋‹ˆ๋‹ค.
80
+ ๋ชจ๋ธ์—์„œ ์‚ฌ์šฉํ•  ์ˆ˜ ์žˆ๋Š” ๊ธธ์ด๋ณด๋‹ค ๊ธด ๊ธฐ์‚ฌ๊ฐ€ ์žˆ๊ธฐ ๋•Œ๋ฌธ์— ๊ธฐ์‚ฌ๋‚ด์šฉ์„ ์š”์•ฝํ•œ ํ›„ ์š”์—ญ๋œ ๋‚ด์šฉ์„ `Example`์— ์ถ”๊ฐ€ํ•ฉ๋‹ˆ๋‹ค.
81
+
82
+ ๋‰ด์Šค๊ธฐ์‚ฌ๋ฅผ ์„ ํƒํ•˜๊ณ  `Submit`๋ฒ„ํŠผ์„ ๋ˆ„๋ฅด๋ฉด ๊ธฐ์‚ฌ์˜ ๊ฐ์ •ํ‰๊ฐ€๋ฅผ ํ™•์ธํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.
83
  """)
84
+ news.update_news_examples()
85
+
86
+ gr.load("models/gabrielyang/finance_news_classifier-KR_v7",
87
+ inputs = gr.Textbox( placeholder="๋‰ด์Šค ๊ธฐ์‚ฌ ๋‚ด์šฉ์„ ์ž…๋ ฅํ•˜์„ธ์š”." ),
88
+ examples=news.examples)
89
+
90
+ # gr.Examples(
91
+ # examples=[
92
+ # ["images/demo1.jpg"],
93
+ # ["images/demo2.jpg"],
94
+ # ["images/demo4.jpg"],
95
+ # ],
96
+
97
+
98
 
99
 
100
  if __name__ == "__main__":