mdj1412
commit first
c109682
raw
history blame
9.68 kB
import os
from bs4 import BeautifulSoup
import pandas as pd
from flask import Flask, jsonify, request, render_template
import spacy
from spacy import displacy
# ๋ชจ๋ธ์„ ์ ์šฉ์‹œํ‚ค๋Š” ํŒŒ์ผ
from modules.inference import Tk_instruct
# Stocks Data
from dataset_creation.nasdaq_data import get_list, get_data
# Flask Object ์ƒ์„ฑ
# __name__์€ ํ˜„์žฌ ์‹คํ–‰ ์ค‘์ธ ๋ชจ๋“ˆ ์ด๋ฆ„์„ ์ „๋‹ฌํ•˜๋Š” ๊ฒƒ์ด๋‹ค.
app = Flask(__name__)
# def stocks() ์‚ฌ์šฉ & News Data
# Pandas DataFrame : ticker, name, sector, industry, diff, open, close, date
demo_dic = get_list()
##### Home #####
@app.route('/')
def home_page():
example_embed = 'This website analyzes stock market news and provides answers to questions related to news articles.'
return render_template('index.html', embed=example_embed)# html์„ ๋ถˆ๋Ÿฌ์˜ฌ ๋•Œ,
##### Data fetch #####
@app.route('/submit', methods=['GET', 'POST'])
def submit():
input_text = request.args.get('input_text')
return jsonify(result={"output":"My output is a summary of: "+input_text})
@app.route('/model', methods=['GET', 'POST'])
def model():
print("\t\t Start model !!!")
# Javascript ์—์„œ ๋ฐ›์€ ๋ฉ”์‹œ์ง€
text_input = request.args.get('text_input')
print(f"Fetch from Javascript /inference, text_input : {text_input}")
# modules/reference.py ์—์„œ ๋ชจ๋ธ ์ ์šฉ
output = Tk_instruct(text_input)
text_output = {"text_output": output}
print(f"Fetch from Javascript /inference, text_output : {text_output}")
return jsonify(result=text_output)
# Show Ticker's Table
@app.route('/stocks', methods=['GET', 'POST'])
def stocks():
result = demo_dic.to_dict() # dictionary ํ˜•ํƒœ๋กœ ๋ณ€ํ™˜
return jsonify(result=result)
################################################################################################
# {ticker1: [{๋‚ ์งœ1: [์ œ๋ชฉ1, ์ œ๋ชฉ2, ...]}, {๋‚ ์งœ2: [์ œ๋ชฉ3, ์ œ๋ชฉ4, ...]}, ...], ticker2: [{๋‚ ์งœ3: [์ œ๋ชฉ5, ์ œ๋ชฉ6, ...]}, {๋‚ ์งœ4: [์ œ๋ชฉ7, ์ œ๋ชฉ8, ...]}, ...], ... }
ticker_dic = dict.fromkeys(demo_dic.ticker, []) # ticker1: [{๋‚ ์งœ1: [์ œ๋ชฉ1, ์ œ๋ชฉ2, ...]}
dir = './news'
if not os.path.exists(dir):
raise NotImplementedError("Not exists News Data")# ์˜ค๋ฅ˜ ๊ฐ•์ œ ๋ฐœ์ƒ
# News Data List ๊ฐ€์ ธ์˜ค๊ธฐ
for key in os.listdir(dir):
if key not in ticker_dic.keys():
raise NotImplementedError("Not exists Ticker")# ์˜ค๋ฅ˜ ๊ฐ•์ œ ๋ฐœ์ƒ
dir2 = os.path.join(dir, key)
ticker_dic[key] = dict.fromkeys(os.listdir(dir2), []) # ๋‚ ์งœ1: [์ œ๋ชฉ1, ์ œ๋ชฉ2, ...]
for date in os.listdir(dir2):
dir3 = os.path.join(dir2, date)
title_list = [title for title in os.listdir(dir3)]
# ํ•ด๋‹น ๋‚ ์งœ์— News๊ฐ€ ์—†์„ ์ˆ˜๋„ ์žˆ์Œ
if len(title_list) != 0:
ticker_dic[key][date] = title_list # [์ œ๋ชฉ1, ์ œ๋ชฉ2, ...]
else:
ticker_dic[key].pop(date)
# from IPython import embed; embed()
# Show Ticker's Title
@app.route('/<ticker>', methods=['GET', 'POST'])
def ticker(ticker):
example_embed = "%s Chart" % (ticker)
return render_template('chart.html', embed=example_embed)
# Show Ticker's Data
@app.route('/chart', methods=['GET', 'POST'])
def chart():
print("Start /chart ")
# Javascript ์—์„œ ๋ฐ›์€ ๋ฉ”์‹œ์ง€
ticker = request.args.get('ticker')
# Implement Module
chart_data = get_data(tickers=[ticker], numOfDay=120)[0]
# ๋‚ ์งœ ํ˜•์‹ ๋ฐ”๊พธ๊ธฐ
chart_data.index = [k.strftime("%Y-%m-%d") for k in chart_data.index]
result = chart_data.to_dict()
return jsonify(result=result)
@app.route('/news', methods=['GET', 'POST'])
def news():
print("Start /news ")
# Javascript ์—์„œ ๋ฐ›์€ ๋ฉ”์‹œ์ง€
ticker = request.args.get('ticker')
news_dir = os.path.join('./news', ticker)
# ํ•ด๋‹น Ticker์˜ ๋‚ ์งœ๋ณ„ ๋‰ด์Šค ์ œ๋ชฉ์„ ๊ฐ€์ ธ์˜จ๋‹ค.
result = {}
for key in os.listdir(news_dir):
title_list = os.listdir(os.path.join(news_dir, key))
if len(title_list) != 0:
result[key] = os.listdir(os.path.join(news_dir, key))
# ์ตœ๊ทผ ๋‰ด์Šค๋ถ€ํ„ฐ ๋ณด์ด๊ฒŒ (์ •๋ ฌ)
sorted_result = {}
for key, value in sorted(result.items(), reverse=True):
sorted_result[key] = value
return jsonify(result=sorted_result)
################################################################################################
# 1. ๊ธฐ๋ณธ url
# 2. ์ฟผ๋ฆฌ ์ŠคํŠธ๋ง์ด ์กด์žฌํ•˜๋Š” url
# : request.args.get('๋ณ€์ˆ˜์ด๋ฆ„')์„ ์‚ฌ์šฉํ•˜์—ฌ /user?๋ณ€์ˆ˜=๊ฐ’&๋ณ€์ˆ˜=๊ฐ’&...์—์„œ ์›ํ•˜๋Š” ๋ณ€์ˆ˜์˜ ๊ฐ’์„ ์–ป์„ ์ˆ˜ ์žˆ๋‹ค.
# 3. clean URL
# Show Ticker's Title and News's Title
@app.route('/info', methods=['GET', 'POST'])
def ticker_title():
print("app.py : /info Start ")
# Javascript ์—์„œ ๋ฐ›์€ ๋ฉ”์‹œ์ง€
ticker = request.args.get('ticker')
date = request.args.get('date')
title = request.args.get('title')
andSymbolInTitle = request.args.get('andSymbolInTitle')
# Title ์—์„œ '&'๋กœ ํ‘œ์‹œ๋˜์–ด ์žˆ๋Š”๋ฐ ๋”ฐ๋กœ ๊ตฌ๋ณ„ํ•ด์•ผ ๋œ๋‹ค.
# andSymbolInTitle ์—์„œ ๊ฐ€์ ธ์˜จ '&' ์œ„์น˜ index๋ฅผ title๊ณผ ํ•ฉ์ณ์ค€๋‹ค.
if andSymbolInTitle != '':
andSymbolInTitle = andSymbolInTitle.split(',')
for i in range(len(andSymbolInTitle)): # String -> int
andSymbolInTitle[i] = int(andSymbolInTitle[i])
for idx in andSymbolInTitle:
title = title[0:idx] + '&' + title[idx:len(title)]
# ํ•ด๋‹น Ticker, Date, Title์˜ URL์„ ๊ฐ€์ ธ์˜ค๊ธฐ
url_dir = "dataset_creation/save_news_url.tsv"
if not os.path.exists(url_dir):
raise NotImplementedError("Not exists {} directory", url_dir)
else:
df = pd.read_csv(url_dir, sep='\t', index_col=0)
filt = (df['ticker'] == ticker) & (df['date'] == date) & (df['title'] == title)
url = list(df.loc[filt, 'url'].values)
if len(url) != 1:
from IPython import embed; embed()
raise NotImplementedError("There exists many URL or empty")
else:
url = url[0]
example_embed1 = ticker
example_embed2 = "Date: %s" % (date)
example_embed3 = "Title: %s" % (title)
example_embed4 = url
return render_template('news_analysis.html', embed1=example_embed1, embed2=example_embed2, embed3=example_embed3, embed4=example_embed4)
@app.route('/ner', methods=['GET', 'POST'])
def ner():
print("Start /ner")
# Javascript ์—์„œ ๋ฐ›์€ ๋ฉ”์‹œ์ง€
ticker = request.args.get('ticker')
date = request.args.get('date')
title = request.args.get('title')
print(ticker, date, title)
# ๋‰ด์Šค ๋ฐ์ดํ„ฐ ์œ„์น˜ ์ฐพ๊ธฐ ( in directory )
dir = os.path.join('./news', ticker, date, title+'.txt')
f = open(dir, 'r')
news_data = f.read()
# NER
nlp = spacy.load("en_core_web_sm")
doc = nlp(news_data) # News Data Analysis
# ํ•„์š”์—†๋Š” ์šฉ์–ด๋“ค ๋ฒ„๋ฆฌ๊ธฐ
print("=====================================================================")
ents = {'text': [], 'start_char': [], 'end_char': [], 'label_': []}
for ent in doc.ents:
# print(ent.text, ent.start_char, ent.end_char, ent.label_)
# ๋ฒ„๋ฆฌ๋Š” ์šฉ์–ด๋“ค
if ent.label_ == 'DATE':
continue
if ent.label_ == 'TIME':
continue
if ent.label_ == 'CARDINAL':
continue
if ent.label_ == 'MONEY':
continue
if ent.label_ == 'PERCENT':
continue
if ent.label_ == 'ORDINAL':
continue
print(ent.text, ent.start_char, ent.end_char, ent.label_)
ents['text'].append(ent.text)
ents['start_char'].append(ent.start_char)
ents['end_char'].append(ent.end_char)
ents['label_'].append(ent.label_)
print("=====================================================================")
ents['news'] = news_data
# ents = {'text': [], 'start_char': [], 'end_char': [], 'label_': [], 'news': []}
return jsonify(result=ents)
@app.route('/newsQuestions', methods=['GET', 'POST'])
def newsQuestions():
# Javascript ์—์„œ ๋ฐ›์€ ๋ฉ”์‹œ์ง€
ticker = request.args.get('ticker')
date = request.args.get('date')
title = request.args.get('title')
andSymbolInTitle = request.args.get('andSymbolInTitle')
questions = request.args.get('questions')
# Title ์—์„œ '&'๋กœ ํ‘œ์‹œ๋˜์–ด ์žˆ๋Š”๋ฐ ๋”ฐ๋กœ ๊ตฌ๋ณ„ํ•ด์•ผ ๋œ๋‹ค.
# andSymbolInTitle ์—์„œ ๊ฐ€์ ธ์˜จ '&' ์œ„์น˜ index๋ฅผ title๊ณผ ํ•ฉ์ณ์ค€๋‹ค.
if andSymbolInTitle != '':
andSymbolInTitle = andSymbolInTitle.split(',')
for i in range(len(andSymbolInTitle)): # String -> int
andSymbolInTitle[i] = int(andSymbolInTitle[i])
for idx in andSymbolInTitle:
title = title[0:idx] + '&' + title[idx:len(title)]
# ๋‰ด์Šค ๋ฐ์ดํ„ฐ ์œ„์น˜ ์ฐพ๊ธฐ ( in directory )
dir = os.path.join('./news', ticker, date, title+'.txt')
# ๋‰ด์Šซ ๋ฐ์ดํ„ฐ ๊ฐ€์ ธ์˜ค๊ธฐ
f = open(dir, 'r')
text = f.read()
f.close()
# ๋ชจ๋ธ ์ ์šฉ
answer = Tk_instruct(text, questions)
result = {}
result['answer'] = answer
return jsonify(result=result)
# Terminal : Flask : ์ˆ˜์ •ํ•˜๋ฉด ํ„ฐ๋ฏธ๋„ ์žฌ์‹คํ–‰
# Elements : HTML : ์ˆ˜์ •ํ•˜๋ฉด ํ„ฐ๋ฏธ๋„ ์žฌ์‹คํ–‰
# Console : javascript : ์‚ฌ์ดํŠธ ๋™๊ธฐํ™”
# Sources : File : ์‚ฌ์ดํŠธ ๋™๊ธฐํ™”
# CSS : ์‚ฌ์ดํŠธ ๋™๊ธฐํ™”
if __name__ == "__main__":
# run app
# host : ๋ชจ๋“  IP์— ๋Œ€ํ•ด ์ ‘๊ทผ ํ—ˆ์šฉ, ( Default. localhost = 127.0.0.1 )
# port : ์ ‘์†์‹œ open๋  http port, ( Default. port = 5000 )
app.run(host='0.0.0.0', port='7860') #http://0.0.0.0:5001
# app.run(debug=True) #http://0.0.0.0:5001