import logging import os import pickle from web3 import Web3 import pandas as pd from functools import partial from datetime import datetime from markets import ( etl as mkt_etl, DEFAULT_FILENAME as MARKETS_FILENAME, ) TOOLS_FILENAME = "tools_2024.parquet" from tools import ( etl as tools_etl, update_tools_accuracy, ) from pull_data import ( DATA_DIR, parallelize_timestamp_conversion, block_number_to_timestamp, ) from profitability import run_profitability_analysis from get_mech_info import get_mech_info_2024 from utils import get_question, current_answer import gc logging.basicConfig(level=logging.INFO) def roi_analysis(): """Run ROI analysis for the trades done in 2024.""" rpc = "https://lb.nodies.app/v1/406d8dcc043f4cb3959ed7d6673d311a" web3 = Web3(Web3.HTTPProvider(rpc)) # Run markets ETL logging.info("Running markets ETL") mkt_etl(MARKETS_FILENAME) logging.info("Markets ETL completed") # Run tools ETL logging.info("Running tools ETL") # This etl is saving already the tools parquet file tools_etl( rpcs=[rpc], mech_info=get_mech_info_2024(), filename=TOOLS_FILENAME, ) logging.info("Tools ETL completed") # Run profitability analysis if os.path.exists(DATA_DIR / "fpmmTrades.parquet"): os.remove(DATA_DIR / "fpmmTrades.parquet") logging.info("Running profitability analysis") date = "2024-01-01" datetime_jan_2024 = datetime.strptime(date, "%Y-%m-%d") timestamp_jan_2024 = int(datetime_jan_2024.timestamp()) run_profitability_analysis( rpc=rpc, tools_filename=TOOLS_FILENAME, trades_filename="fpmmTrades.parquet", from_timestamp=timestamp_jan_2024, ) logging.info("Profitability analysis completed") # Get currentAnswer from FPMMS fpmms = pd.read_parquet(DATA_DIR / MARKETS_FILENAME) tools = pd.read_parquet(DATA_DIR / TOOLS_FILENAME) # Get the question from the tools logging.info("Getting the question and current answer for the tools") tools["title"] = tools["prompt_request"].apply(lambda x: get_question(x)) tools["currentAnswer"] = tools["title"].apply(lambda x: current_answer(x, fpmms)) tools["currentAnswer"] = tools["currentAnswer"].str.replace("yes", "Yes") tools["currentAnswer"] = tools["currentAnswer"].str.replace("no", "No") # Convert block number to timestamp logging.info("Converting block number to timestamp") t_map = pickle.load(open(DATA_DIR / "t_map.pkl", "rb")) tools["request_time"] = tools["request_block"].map(t_map) # Identify tools with missing request_time and fill them missing_time_indices = tools[tools["request_time"].isna()].index if not missing_time_indices.empty: partial_block_number_to_timestamp = partial( block_number_to_timestamp, web3=web3 ) missing_timestamps = parallelize_timestamp_conversion( tools.loc[missing_time_indices], partial_block_number_to_timestamp ) # Update the original DataFrame with the missing timestamps for i, timestamp in zip(missing_time_indices, missing_timestamps): tools.at[i, "request_time"] = timestamp tools["request_month_year"] = pd.to_datetime(tools["request_time"]).dt.strftime( "%Y-%m" ) tools["request_month_year_week"] = ( pd.to_datetime(tools["request_time"]).dt.to_period("W").astype(str) ) # Save the tools data after the updates on the content tools.to_parquet(DATA_DIR / TOOLS_FILENAME, index=False) # Update t_map with new timestamps new_timestamps = ( tools[["request_block", "request_time"]] .dropna() .set_index("request_block") .to_dict()["request_time"] ) t_map.update(new_timestamps) with open(DATA_DIR / "t_map_2024.pkl", "wb") as f: pickle.dump(t_map, f) # clean and release all memory del tools del fpmms del t_map gc.collect() logging.info("ROI analysis files generated and saved") if __name__ == "__main__": roi_analysis()