import choose from pymongo import MongoClient import pandas as pd import numpy as np import datetime import os client = MongoClient( "mongodb://wth000:wth000@43.159.47.250:27017/dbname?authSource=wth000") db = client["wth000"] # 设置参数 # names = ["可转债","COIN", "股票", "指数", "行业", "ETF",] names = ["COIN",] mubiao = f"{5}日量比" a = 20 # 将数据划分成a个等距离的区间 # 获取当前.py文件的绝对路径 file_path = os.path.abspath(__file__) # 获取当前.py文件所在目录的路径 dir_path = os.path.dirname(file_path) # 获取当前.py文件所在目录的上两级目录的路径 dir_path = os.path.dirname(os.path.dirname(dir_path)) files = os.listdir(dir_path) for file in files: for filename in names: if (filename in file): try: print(f"{mubiao}") # 获取文件名和扩展名 name, extension = os.path.splitext(file) path = os.path.join(dir_path, f"{name}.csv") print(name) df = pd.read_csv(path) # df = df.reset_index() # dflanchou = pd.read_csv(f"每日蓝筹.csv") # dflanchou = dflanchou.iloc[:, :2] # dflanchou.columns = ['日期', '代码'] # # dflanchou.rename(columns={dflanchou.columns[0]: '日期', dflanchou.columns[1]: '代码'}, inplace=True) # df = df.merge(dflanchou[['日期', '代码']], on=['日期', '代码']) # print(df) # dfkaipan = pd.read_csv('非ST股票(分钟)开盘后五分钟.csv') # dfkaipan.columns = ['日期', '代码', '开盘(开盘后五分钟)', '最高(开盘后五分钟)', '最低(开盘后五分钟)', # '收盘(开盘后五分钟)', '昨收(开盘后五分钟)', '成交额(开盘后五分钟)', '涨跌幅(今收今开)(开盘后五分钟)'] # df = df.merge(dfkaipan, on=['日期', '代码']) # 增加设置基本面因子 # dfshoupan = pd.read_csv('非ST股票(分钟)收盘前五分钟.csv') # dfshoupan.columns = ['日期', '代码', '开盘(收盘前五分钟)', '最高(收盘前五分钟)', '最低(收盘前五分钟)', # '收盘(收盘前五分钟)', '昨收(收盘前五分钟)', '成交额(收盘前五分钟)', '涨跌幅(今收今开)(收盘前五分钟)'] # df = df.merge(dfshoupan, on=['日期', '代码']) # 增加设置基本面因子 watchtime = 1999 # start_date = datetime.datetime( # watchtime, int(1), int(1)).strftime("%Y-%m-%d %H:%M:%S") # end_date = datetime.datetime(datetime.datetime.strptime( # start_date, "%Y-%m-%d %H:%M:%S").year + 8, int(1), int(1)).strftime("%Y-%m-%d %H:%M:%S") # df = df[(df["日期"] >= start_date) & (df["日期"] <= end_date)] df = df.dropna() # # 量比达到低点,观察是否反弹 # df = df[df["成交量"] == df[f"{40}日成交量低点"]] df = df[df["成交量"] == df[f"{40}日成交量高点"]] # 价格突破高点,观察量比分布 # df = df[df["收盘"] == df[f"{5}日收盘高点"]] # df = df.groupby("代码", group_keys=False).apply(choose.technology) df, m, n = choose.choose(name, df) if ("股票" in name): for i in range(1, n+1): df = df[df[f"{i}日后总涨跌幅(未来函数)"] <= 3*(1+0.1*n)] else: for i in range(1, n+1): df = df[df[f"{i}日后总涨跌幅(未来函数)"] <= 20*(1+0.1*n)] df = df.dropna() df.to_csv(f"实际统计数据{name}_{mubiao}_{watchtime}年.csv") sorted_data = np.sort(df[f"{mubiao}"]) indices = np.linspace( 0, len(df[f"{mubiao}"]), num=a+1, endpoint=True, dtype=int) # 得到每一个区间的上界,并作为该部分对应的区间范围 ranges = [] for i in range(len(indices) - 1): start_idx = indices[i] end_idx = indices[i+1] if i != len(indices) - \ 2 else len(df[f"{mubiao}"]) # 最后一段需要特殊处理 upper_bound = sorted_data[end_idx-1] # 注意索引从0开始,因此要减1 ranges.append((sorted_data[start_idx], upper_bound)) result_dicts = [] day = n # 观察不同的持仓周期的涨跌分布 for n in range(1, day): for rank_range in ranges: sub_df = df.copy()[(df[f"{mubiao}"] >= rank_range[0]) & (df[f"{mubiao}"] <= rank_range[1])] future_returns = np.array(sub_df[f"{n}日后总涨跌幅(未来函数)"]) # 括号注意大小写的问题,要不就会报错没这个参数 up_rate = len( future_returns[future_returns >= 0]) / len(future_returns) avg_return = np.mean(future_returns) result_dict = { f"{mubiao}": f"from{rank_range[0]}to{rank_range[1]}", f"未来{n}日上涨次数": len(future_returns[future_returns >= 0]), f"未来{n}日上涨概率": up_rate, f"未来{n}日平均涨跌幅": avg_return, } result_dicts.append(result_dict) # 将结果持久化 result_df = pd.DataFrame(result_dicts) for n in range(1, day): cols_to_shift = [f"未来{n}日上涨概率", f"未来{n}日上涨次数", f"未来{n}日平均涨跌幅"] result_df[cols_to_shift] = result_df[cols_to_shift].shift( -a*(n-1)) # result_df = result_df.dropna() # 删除含有空值的行 path = os.path.join(os.path.abspath("."), "资产单指标平均收益分布") if not os.path.exists(path): os.makedirs(path) result_df.round(decimals=6).to_csv( f"{path}/{name}{mubiao}持有{n}日{str(watchtime)}平均收益分布.csv", index=False) print(name, "已完成") except Exception as e: print(f"发生bug: {e}")