Spaces:

kristada673
/

roboadvisor

Runtime error

App Files Files Community

roboadvisor / finnlp /data_processors /fx.py

kristada673

Upload 19 files

de6e775 over 1 year ago

raw

history blame

3.23 kB

	import os
	import sys

	import pandas as pd
	from finta import TA


	def add_time_feature(df, symbol, dt_col_name="time"):
	"""read csv into df and index on time
	dt_col_name can be any unit from minutes to day. time is the index of pd
	must have pd columns [(time_col),(asset_col), Open,close,High,Low,day]
	data_process will add additional time information: time(index), minute, hour, weekday, week, month,year, day(since 1970)
	use StopLoss and ProfitTaken to simplify the action,
	feed a fixed StopLoss (SL = 200) and PT = SL * ratio
	action space: [action[0,2],ratio[0,10]]
	rewards is point

	add hourly, dayofweek(0-6, Sun-Sat)
	Args:
	file (str): file path/name.csv
	"""

	df["symbol"] = symbol
	df["dt"] = pd.to_datetime(df[dt_col_name])
	df.index = df["dt"]
	df["minute"] = df["dt"].dt.minute
	df["hour"] = df["dt"].dt.hour
	df["weekday"] = df["dt"].dt.dayofweek
	df["week"] = df["dt"].dt.isocalendar().week
	df["month"] = df["dt"].dt.month
	df["year"] = df["dt"].dt.year
	df["day"] = df["dt"].dt.day
	# df = df.set_index('dt')
	return df


	# 'macd', 'boll_ub', 'boll_lb', 'rsi_30', 'dx_30','close_30_sma', 'close_60_sma'
	def tech_indictors(df):
	df["macd"] = TA.MACD(df).SIGNAL
	df["boll_ub"] = TA.BBANDS(df).BB_UPPER
	df["boll_lb"] = TA.BBANDS(df).BB_LOWER
	df["rsi_30"] = TA.RSI(df, period=30)
	df["dx_30"] = TA.ADX(df, period=30)
	df["close_30_sma"] = TA.SMA(df, period=30)
	df["close_60_sma"] = TA.SMA(df, period=60)

	# fill NaN to 0
	df = df.fillna(0)
	print(
	f"--------df head - tail ----------------\n{df.head(3)}\n{df.tail(3)}\n---------------------------------"
	)

	return df


	def split_timeserious(df, key_ts="dt", freq="W", symbol=""):
	"""import df and split into hour, daily, weekly, monthly based and
	save into subfolder

	Args:
	df (pandas df with timestamp is part of multi index):
	spliter (str): H, D, W, M, Y
	"""

	freq_name = {
	"H": "hourly",
	"D": "daily",
	"W": "weekly",
	"M": "monthly",
	"Y": "Yearly",
	}
	for count, (n, g) in enumerate(df.groupby(pd.Grouper(level=key_ts, freq=freq))):
	p = f"./data/split/{symbol}/{freq_name[freq]}"
	os.makedirs(p, exist_ok=True)
	# fname = f'{symbol}_{n:%Y%m%d}_{freq}_{count}.csv'
	fname = f"{symbol}_{n:%Y}_{count}.csv"
	fn = f"{p}/{fname}"
	print(f"save to:{fn}")
	g.reset_index(drop=True, inplace=True)
	g.drop(columns=["dt"], inplace=True)
	g.to_csv(fn)
	return


	"""
	python ./neo_finrl/data_processors/fx.py GBPUSD W ./data/raw/GBPUSD_raw.csv
	symbol="GBPUSD"
	freq = [H, D, W, M]
	file .csv, column names [time, Open, High, Low, Close, Vol]
	"""
	if __name__ == "__main__":
	symbol, freq, file = sys.argv[1], sys.argv[2], sys.argv[3]
	print(f"processing... symbol:{symbol} freq:{freq} file:{file}")
	try:
	df = pd.read_csv(file)
	except Exception:
	print(f"No such file or directory: {file}")
	exit(0)
	df = add_time_feature(df, symbol=symbol, dt_col_name="time")
	df = tech_indictors(df)
	split_timeserious(df, freq=freq, symbol=symbol)
	print(f"Done!")