Spaces:
Runtime error
Runtime error
import os | |
import pandas as pd | |
from flask import Flask, jsonify, request, render_template | |
import spacy | |
from spacy import displacy | |
# ๋ชจ๋ธ์ ์ ์ฉ์ํค๋ ํ์ผ | |
from modules.inference import Tk_instruct | |
# Stocks Data | |
from dataset_creation.nasdaq_data import get_list, get_data | |
# Flask Object ์์ฑ | |
# __name__์ ํ์ฌ ์คํ ์ค์ธ ๋ชจ๋ ์ด๋ฆ์ ์ ๋ฌํ๋ ๊ฒ์ด๋ค. | |
app = Flask(__name__) | |
# def stocks() ์ฌ์ฉ & News Data | |
# Pandas DataFrame : ticker, name, sector, industry, diff, open, close, date | |
demo_dic = get_list() | |
##### Home ##### | |
def home_page(): | |
website_description_box = 'This website analyzes stock market news and provides answers to questions related to news articles.' | |
return render_template('index.html', embed=website_description_box)# html์ ๋ถ๋ฌ์ฌ ๋, | |
##### Data fetch ##### | |
# Show Ticker's Table | |
def stocks(): | |
result = demo_dic.to_dict() # dictionary ํํ๋ก ๋ณํ | |
return jsonify(result=result) | |
################################################################################################ | |
# {ticker1: [{๋ ์ง1: [์ ๋ชฉ1, ์ ๋ชฉ2, ...]}, {๋ ์ง2: [์ ๋ชฉ3, ์ ๋ชฉ4, ...]}, ...], ticker2: [{๋ ์ง3: [์ ๋ชฉ5, ์ ๋ชฉ6, ...]}, {๋ ์ง4: [์ ๋ชฉ7, ์ ๋ชฉ8, ...]}, ...], ... } | |
ticker_dic = dict.fromkeys(demo_dic.ticker, []) # ticker1: [{๋ ์ง1: [์ ๋ชฉ1, ์ ๋ชฉ2, ...]} | |
dir = './news' | |
if not os.path.exists(dir): | |
raise NotImplementedError("Not exists News Data") # ์ค๋ฅ ๊ฐ์ ๋ฐ์ | |
# News Data List ๊ฐ์ ธ์ค๊ธฐ | |
for key in os.listdir(dir): | |
if key not in ticker_dic.keys(): | |
raise NotImplementedError("Not exists Ticker") # ์ค๋ฅ ๊ฐ์ ๋ฐ์ | |
dir2 = os.path.join(dir, key) | |
ticker_dic[key] = dict.fromkeys(os.listdir(dir2), []) # ๋ ์ง1: [์ ๋ชฉ1, ์ ๋ชฉ2, ...] | |
for date in os.listdir(dir2): | |
dir3 = os.path.join(dir2, date) | |
title_list = [title for title in os.listdir(dir3)] | |
# ํด๋น ๋ ์ง์ News๊ฐ ์์ ์๋ ์์ | |
if len(title_list) != 0: | |
ticker_dic[key][date] = title_list # [์ ๋ชฉ1, ์ ๋ชฉ2, ...] | |
else: | |
ticker_dic[key].pop(date) | |
# Show Ticker's Data | |
def chart(): | |
print("Start /chart ") | |
# Javascript ์์ ๋ฐ์ ๋ฉ์์ง | |
ticker = request.args.get('ticker') | |
# Implement Module | |
chart_data = get_data(tickers=[ticker], numOfDay=60)[0] | |
# ๋ ์ง ํ์ ๋ฐ๊พธ๊ธฐ | |
chart_data.index = [k.strftime("%Y-%m-%d") for k in chart_data.index] | |
chart_data = chart_data.to_dict() | |
################ | |
news_dir = os.path.join('./news', ticker) | |
# ํด๋น Ticker์ ๋ ์ง๋ณ ๋ด์ค ์ ๋ชฉ์ ๊ฐ์ ธ์จ๋ค. | |
article_news_dict = {} | |
for key in os.listdir(news_dir): | |
title_list = os.listdir(os.path.join(news_dir, key)) | |
if len(title_list) != 0: | |
article_news_dict[key] = os.listdir(os.path.join(news_dir, key)) | |
# ์ต๊ทผ ๋ด์ค๋ถํฐ ๋ณด์ด๊ฒ (์ ๋ ฌ) | |
news_articles = {} | |
for key, value in sorted(article_news_dict.items(), reverse=True): | |
news_articles[key] = value | |
return jsonify(chart_data=chart_data, news_articles=news_articles) | |
################################################################################################ | |
# 1. ๊ธฐ๋ณธ url | |
# 2. ์ฟผ๋ฆฌ ์คํธ๋ง์ด ์กด์ฌํ๋ url | |
# : request.args.get('๋ณ์์ด๋ฆ')์ ์ฌ์ฉํ์ฌ /user?๋ณ์=๊ฐ&๋ณ์=๊ฐ&...์์ ์ํ๋ ๋ณ์์ ๊ฐ์ ์ป์ ์ ์๋ค. | |
# 3. clean URL | |
# Show Ticker's Title and News's Title | |
def news_info_ner(): | |
# Javascript ์์ ๋ฐ์ ๋ฉ์์ง | |
ticker = request.args.get('ticker') | |
date = request.args.get('date') | |
title = request.args.get('title') | |
andSymbolInTitle = request.args.get('andSymbolInTitle') | |
print(ticker, date, title, andSymbolInTitle) | |
# Title ์์ '&'๋ก ํ์๋์ด ์๋๋ฐ ๋ฐ๋ก ๊ตฌ๋ณํด์ผ ๋๋ค. | |
# andSymbolInTitle ์์ ๊ฐ์ ธ์จ '&' ์์น index๋ฅผ title๊ณผ ํฉ์ณ์ค๋ค. | |
if andSymbolInTitle != '': | |
andSymbolInTitle = andSymbolInTitle.split(',') | |
for i in range(len(andSymbolInTitle)): # String -> int | |
andSymbolInTitle[i] = int(andSymbolInTitle[i]) | |
for idx in andSymbolInTitle: | |
title = title[0:idx] + '&' + title[idx:len(title)] | |
# ํด๋น Ticker, Date, Title์ URL์ ๊ฐ์ ธ์ค๊ธฐ | |
url_dir = "dataset_creation/save_news_url.tsv" | |
if not os.path.exists(url_dir): | |
raise NotImplementedError("Not exists {} directory", url_dir) | |
else: | |
df = pd.read_csv(url_dir, sep='\t', index_col=0) | |
filt = (df['ticker'] == ticker) & (df['date'] == date) & (df['title'] == title) | |
url = list(df.loc[filt, 'url'].values) | |
if len(url) != 1: | |
from IPython import embed; embed() | |
raise NotImplementedError("There exists many URL or empty") | |
else: | |
url = url[0] | |
####################################################### | |
# ๋ด์ค ๋ฐ์ดํฐ ์์น ์ฐพ๊ธฐ ( in directory ) | |
dir = os.path.join('./news', ticker, date, title+'.txt') | |
f = open(dir, 'r') | |
news_data = f.read() | |
# NER | |
nlp = spacy.load("en_core_web_sm") | |
doc = nlp(news_data) # News Data Analysis | |
# ํ์์๋ ์ฉ์ด๋ค ๋ฒ๋ฆฌ๊ธฐ | |
print("=====================================================================") | |
ents = {'text': [], 'start_char': [], 'end_char': [], 'label_': []} | |
for ent in doc.ents: | |
# print(ent.text, ent.start_char, ent.end_char, ent.label_) | |
# ๋ฒ๋ฆฌ๋ ์ฉ์ด๋ค | |
if ent.label_ == 'DATE': | |
continue | |
if ent.label_ == 'TIME': | |
continue | |
if ent.label_ == 'CARDINAL': | |
continue | |
if ent.label_ == 'MONEY': | |
continue | |
if ent.label_ == 'PERCENT': | |
continue | |
if ent.label_ == 'ORDINAL': | |
continue | |
if ent.label_ == 'PRODUCT': | |
continue | |
print(ent.text, ent.start_char, ent.end_char, ent.label_) | |
ents['text'].append(ent.text) | |
ents['start_char'].append(ent.start_char) | |
ents['end_char'].append(ent.end_char) | |
ents['label_'].append(ent.label_) | |
print("=====================================================================") | |
ents['news'] = news_data | |
# ents = {'text': [], 'start_char': [], 'end_char': [], 'label_': [], 'news': []} | |
print("ents : ", ents) | |
return jsonify(ticker=ticker, date=date, title=title, url=url, ents=ents) | |
# return render_template('news.html', embed1=example_embed1, embed2=example_embed2, embed3=example_embed3, embed4=example_embed4) | |
def newsQuestions(): | |
# Javascript ์์ ๋ฐ์ ๋ฉ์์ง | |
ticker = request.args.get('ticker') | |
date = request.args.get('date') | |
title = request.args.get('title') | |
andSymbolInTitle = request.args.get('andSymbolInTitle') | |
questions = request.args.get('questions') | |
# Title ์์ '&'๋ก ํ์๋์ด ์๋๋ฐ ๋ฐ๋ก ๊ตฌ๋ณํด์ผ ๋๋ค. | |
# andSymbolInTitle ์์ ๊ฐ์ ธ์จ '&' ์์น index๋ฅผ title๊ณผ ํฉ์ณ์ค๋ค. | |
if andSymbolInTitle != '': | |
andSymbolInTitle = andSymbolInTitle.split(',') | |
for i in range(len(andSymbolInTitle)): # String -> int | |
andSymbolInTitle[i] = int(andSymbolInTitle[i]) | |
for idx in andSymbolInTitle: | |
title = title[0:idx] + '&' + title[idx:len(title)] | |
# ๋ด์ค ๋ฐ์ดํฐ ์์น ์ฐพ๊ธฐ ( in directory ) | |
dir = os.path.join('./news', ticker, date, title+'.txt') | |
# ๋ด์ซ ๋ฐ์ดํฐ ๊ฐ์ ธ์ค๊ธฐ | |
f = open(dir, 'r') | |
text = f.read() | |
f.close() | |
# ๋ชจ๋ธ ์ ์ฉ | |
answer = Tk_instruct(text, questions) | |
result = {} | |
result['answer'] = answer | |
return jsonify(result=result) | |
# Terminal : Flask : ์์ ํ๋ฉด ํฐ๋ฏธ๋ ์ฌ์คํ | |
# Elements : HTML : ์์ ํ๋ฉด ํฐ๋ฏธ๋ ์ฌ์คํ | |
# Console : javascript : ์ฌ์ดํธ ๋๊ธฐํ | |
# Sources : File : ์ฌ์ดํธ ๋๊ธฐํ | |
# CSS : ์ฌ์ดํธ ๋๊ธฐํ | |
if __name__ == "__main__": | |
# run app | |
# host : ๋ชจ๋ IP์ ๋ํด ์ ๊ทผ ํ์ฉ, ( Default. localhost = 127.0.0.1 ) | |
# port : ์ ์์ open๋ http port, ( Default. port = 5000 ) | |
app.run(host='0.0.0.0', port='7860') #http://0.0.0.0:5001 | |
# app.run(debug=True) #http://0.0.0.0:5001 |