Spaces:

valory
/

olas-prediction-live-dashboard

Running

App Files Files Community

olas-prediction-live-dashboard / scripts /roi_analysis.py

rosacastillo

updating scripts to be compatible with a separate roi analysis

cf2f2ff 6 months ago

raw

history blame

4.12 kB

	import logging
	import os
	import pickle
	from web3 import Web3
	import pandas as pd
	from functools import partial
	from datetime import datetime
	from markets import (
	etl as mkt_etl,
	DEFAULT_FILENAME as MARKETS_FILENAME,
	)

	TOOLS_FILENAME = "tools_2024.parquet"
	from tools import (
	etl as tools_etl,
	update_tools_accuracy,
	)
	from pull_data import (
	DATA_DIR,
	parallelize_timestamp_conversion,
	block_number_to_timestamp,
	)
	from profitability import run_profitability_analysis
	from get_mech_info import get_mech_info_2024
	from utils import get_question, current_answer
	import gc

	logging.basicConfig(level=logging.INFO)


	def roi_analysis():
	"""Run ROI analysis for the trades done in 2024."""
	rpc = "https://lb.nodies.app/v1/406d8dcc043f4cb3959ed7d6673d311a"
	web3 = Web3(Web3.HTTPProvider(rpc))

	# Run markets ETL
	logging.info("Running markets ETL")
	mkt_etl(MARKETS_FILENAME)
	logging.info("Markets ETL completed")

	# Run tools ETL
	logging.info("Running tools ETL")

	# This etl is saving already the tools parquet file
	tools_etl(
	rpcs=[rpc],
	mech_info=get_mech_info_2024(),
	filename=TOOLS_FILENAME,
	)
	logging.info("Tools ETL completed")

	# Run profitability analysis
	if os.path.exists(DATA_DIR / "fpmmTrades.parquet"):
	os.remove(DATA_DIR / "fpmmTrades.parquet")
	logging.info("Running profitability analysis")
	date = "2024-01-01"
	datetime_jan_2024 = datetime.strptime(date, "%Y-%m-%d")
	timestamp_jan_2024 = int(datetime_jan_2024.timestamp())
	run_profitability_analysis(
	rpc=rpc,
	tools_filename=TOOLS_FILENAME,
	trades_filename="fpmmTrades.parquet",
	from_timestamp=timestamp_jan_2024,
	)
	logging.info("Profitability analysis completed")

	# Get currentAnswer from FPMMS
	fpmms = pd.read_parquet(DATA_DIR / MARKETS_FILENAME)
	tools = pd.read_parquet(DATA_DIR / TOOLS_FILENAME)

	# Get the question from the tools
	logging.info("Getting the question and current answer for the tools")
	tools["title"] = tools["prompt_request"].apply(lambda x: get_question(x))
	tools["currentAnswer"] = tools["title"].apply(lambda x: current_answer(x, fpmms))

	tools["currentAnswer"] = tools["currentAnswer"].str.replace("yes", "Yes")
	tools["currentAnswer"] = tools["currentAnswer"].str.replace("no", "No")

	# Convert block number to timestamp
	logging.info("Converting block number to timestamp")
	t_map = pickle.load(open(DATA_DIR / "t_map.pkl", "rb"))
	tools["request_time"] = tools["request_block"].map(t_map)

	# Identify tools with missing request_time and fill them
	missing_time_indices = tools[tools["request_time"].isna()].index
	if not missing_time_indices.empty:
	partial_block_number_to_timestamp = partial(
	block_number_to_timestamp, web3=web3
	)
	missing_timestamps = parallelize_timestamp_conversion(
	tools.loc[missing_time_indices], partial_block_number_to_timestamp
	)

	# Update the original DataFrame with the missing timestamps
	for i, timestamp in zip(missing_time_indices, missing_timestamps):
	tools.at[i, "request_time"] = timestamp

	tools["request_month_year"] = pd.to_datetime(tools["request_time"]).dt.strftime(
	"%Y-%m"
	)
	tools["request_month_year_week"] = (
	pd.to_datetime(tools["request_time"]).dt.to_period("W").astype(str)
	)

	# Save the tools data after the updates on the content
	tools.to_parquet(DATA_DIR / TOOLS_FILENAME, index=False)

	# Update t_map with new timestamps
	new_timestamps = (
	tools[["request_block", "request_time"]]
	.dropna()
	.set_index("request_block")
	.to_dict()["request_time"]
	)
	t_map.update(new_timestamps)

	with open(DATA_DIR / "t_map_2024.pkl", "wb") as f:
	pickle.dump(t_map, f)

	# clean and release all memory
	del tools
	del fpmms
	del t_map
	gc.collect()

	logging.info("ROI analysis files generated and saved")


	if __name__ == "__main__":
	roi_analysis()