import sys sys.path.append("speckleUtils") import speckle_utils import os import json import pandas as pd import copy from functools import wraps from specklepy.api.client import SpeckleClient from tripGenerationFunc import * import gradio as gr import requests from huggingface_hub import webhook_endpoint, WebhookPayload from fastapi import Request import datetime """ curl -X POST -H "Content-Type: application/json" \ -d '{"payload": {"stream": {"name": "2B_100_batch"}}}' \ https://serjd-recode-hf-tripgeneration.hf.space/webhooks/update_streams """ #https://serjd-recode_hf_tripGeneration.hf.space/webhooks/update_streams #https://serjd-syncspeckle2notion.hf.space/webhooks/update_streams current_directory = os.path.dirname(os.path.abspath(__file__)) # Path to the config.json file config_file_path = os.path.join(current_directory, "config.json") # Check if the config.json file exists if os.path.exists(config_file_path): # Load the JSON data from config.json with open(config_file_path, 'r') as f: config = json.load(f) # Convert to Python variables with the same names as the keys in the JSON locals().update(config) print("varaibles from json") # Now you can access the variables directly print(STREAM_ID) print(BRANCH_NAME_LAND_USES) print(TARGET_TRIP_RATE) print(ALPHA_LOW) print(F_VALUES_MANUAL) print(distance_matrices_of_interest) print(redistributeTrips) print(DISTANCE_BRACKETS) print(XLS_FILE_PATH) print("==================") else: print("Error: config.json file not found in the current directory.") # checks payload of webhook and runs the main code if webhook was triggered by specified stream + one of the branches listendStreams = ["2B_U100_batch"] listendBranchNames = [BRANCH_NAME_LAND_USES,BRANCH_NAME_DISTANCE_MATRIX,BRANCH_NAME_METRIC_DIST_MATRIX] print("branchnames", listendBranchNames) print("stream", listendStreams) @webhook_endpoint async def update_streams(request: Request): # Initialize flag should_continue = False # Read the request body as JSON payload = await request.json() print("============= payload =============") print(payload) print("============= payload =============") payload = payload["payload"] # Check if the payload structure matches the expected format if "event" in payload and "data" in payload["event"]: event_data = payload["event"]["data"] # Check if the event type is one of the specified types if "event_name" in payload["event"] and payload["event"]["event_name"] in ["commit_create", "commit_delete", "commit_update"]: # Check if the stream name matches the specified list if "stream" in payload and payload["stream"]["name"] in listendStreams: # Check if the branch name matches the specified list if "commit" in event_data and "branchName" in event_data["commit"]: if event_data["commit"]["branchName"] in listendBranchNames: should_continue = True else: print("Branch name not found in payload.") else: print("Stream name not found or not in the specified list.") else: print("Event type is not one of the specified types.") else: print("Payload structure does not match the expected format.") # If the flag is True, continue running the main part of the code if should_continue: # Your main code logic goes here runAll() else: print("Flag is False. Skipping further execution.") return "Webhook processing complete." def runAll(): # get config file:# Parse JSON speckle_token = os.environ.get("SPECKLE_TOKEN") xls_file_path = os.path.join(current_directory, XLS_FILE_PATH) print("full path", xls_file_path) # fetch speckle data CLIENT = SpeckleClient(host="https://speckle.xyz/") CLIENT.authenticate_with_token(token="52566d1047b881764e16ad238356abeb2fc35d8b42") # get land use stream stream_land_use = speckle_utils.getSpeckleStream(STREAM_ID, BRANCH_NAME_LAND_USES, CLIENT, commit_id = "") # navigate to list with speckle objects of interest stream_data = stream_land_use["@Data"]["@{0}"] # transform stream_data to dataframe (create a backup copy of this dataframe) df_speckle_lu = speckle_utils.get_dataframe(stream_data, return_original_df=False) df_main = df_speckle_lu.copy() # set index column df_main = df_main.set_index("ids", drop=False) # get distance matrix stream stream_distance_matrice = speckle_utils.getSpeckleStream(STREAM_ID, BRANCH_NAME_DISTANCE_MATRIX, CLIENT, commit_id = "") # navigate to list with speckle objects of interest distance_matrices = {} for distM in stream_distance_matrice["@Data"]['@{0}']: for kk in distM.__dict__.keys(): try: if kk.split("+")[1].startswith("distance_matrix"): distance_matrix_dict = json.loads(distM[kk]) origin_ids = distance_matrix_dict["origin_uuid"] destination_ids = distance_matrix_dict["destination_uuid"] distance_matrix = distance_matrix_dict["matrix"] # Convert the distance matrix to a DataFrame df_distances = pd.DataFrame(distance_matrix, index=origin_ids, columns=destination_ids) # i want to add the index & colum names to dist_m_csv #distance_matrices[kk] = dist_m_csv[kk] distance_matrices[kk] = df_distances except: pass # get metric matrix stream stream_metric_matrice = speckle_utils.getSpeckleStream(STREAM_ID, BRANCH_NAME_METRIC_DIST_MATRIX, CLIENT, commit_id = "") # navigate to list with speckle objects of interest metric_matrices = {} for distM in stream_metric_matrice["@Data"]['@{0}']: print(distM.__dict__.keys()) for kk in distM.__dict__.keys(): try: if kk.split("+")[1].startswith("metric_matrix"): metric_matrix_dict = json.loads(distM[kk]) origin_ids = metric_matrix_dict["origin_uuid"] destination_ids = metric_matrix_dict["destination_uuid"] metric_matrix = metric_matrix_dict["matrix"] # Convert the distance matrix to a DataFrame df_metric_dist = pd.DataFrame(metric_matrix, index=origin_ids, columns=destination_ids) metric_matrices[kk] = df_metric_dist*10 #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! print("metric_matrix_dict", metric_matrix_dict.keys()) except: pass metric_matrices = extract_distance_matrices(stream_metric_matrice, metric_matrices_of_interest) sourceCommits = { "landuseCommitID": stream_land_use.id, "distanceMatrixCommitID": stream_distance_matrice.id, "metricMatrixCommitID": stream_metric_matrice.id } # READ XLS FILE ====================================== # Read Excel file into Pandas DataFrame #Production # Load Excel file separately #xls_file_path = os.path.join(current_directory, XLS_FILE_PATH) if os.path.exists(xls_file_path): # Production df_production = pd.read_excel(xls_file_path, sheet_name='Production') df_production_transposed = df_production.T df_production = preprocess_dataFrame(df_production, headerRow_idx=2, numRowsStart_idx=3) df_production_transposed = preprocess_dataFrame(df_production_transposed, headerRow_idx=0, numRowsStart_idx=4, numColsStart_idx=4, rowNames_idx=2) # Attraction df_attraction = pd.read_excel(xls_file_path, sheet_name='Attraction') df_attraction = preprocess_dataFrame(df_attraction, headerRow_idx=0, numRowsStart_idx=2) # Distribution_Matrix df_distributionMatrix = pd.read_excel(xls_file_path, sheet_name='Distribution_Matrix') df_distributionMatrix = preprocess_dataFrame(df_distributionMatrix, headerRow_idx=0, numRowsStart_idx=2, numRowsEnd_idx=None, numColsStart_idx=2, numColsEnd_idx=None, rowNames_idx=0) # Alphas df_alphas = pd.read_excel(xls_file_path, sheet_name='Alphas') df_alphas.columns = df_alphas.iloc[1] df_alphas = df_alphas.iloc[0, 2:] # Land use df_lu = pd.read_excel(xls_file_path, sheet_name='Example_Land_Use') df_lu = preprocess_dataFrame(df_lu, headerRow_idx=0, numRowsStart_idx=1) df_lu["nameCombined"] = df_lu.iloc[:, 1].astype(str) + "+" + df_lu.iloc[:, 0].astype(str) # Distance Matrix df_distMatrix = pd.read_excel(xls_file_path, sheet_name='Example_Distance_Matrix') df_distMatrix = preprocess_dataFrame(df_distMatrix, headerRow_idx=0, numRowsStart_idx=1, numRowsEnd_idx=None, numColsStart_idx=1, numColsEnd_idx=None, rowNames_idx=0) else: print("Error: Excel file specified in config.json not found.") # Land use strucutre ======= # THIS IS THE DISTANCE MATRIX THATS USED DOWN THE ROAD df_distances_aligned, df_lu_stream_aligned = align_dataframes(distance_matrices[distanceMatrixName], df_main, 'ids') #Create a df with lanuses lu_cols = [col for col in df_lu_stream_aligned.columns if col.startswith("lu+")] df_lu_stream = df_lu_stream_aligned[lu_cols] # Remove "lu+" from the beginning of column names df_lu_stream.columns = df_lu_stream.columns.str.lstrip('lu+') df_lu_stream = df_lu_stream.T df_lu_stream_t = df_lu_stream.T df_lu_stream_with_nameLu_column = df_lu_stream.reset_index(drop=False).rename(columns={'index': 'nameLu'}) #--- df_lu_names_xlsx = pd.concat([df_lu.iloc[:, 0:2], df_lu.iloc[:, -1]], axis=1) df_lu_names_xlsx.index = df_lu_names_xlsx.iloc[:, 1] column_names = ['nameTripType', 'nameLu', 'nameCombined'] df_lu_names_xlsx.columns = column_names print(f"df_lu_names_xlsx shape: {df_lu_names_xlsx.shape}") df_lu_names_xlsx.head() #-- # Merge DataFrames using an outer join merged_df = pd.merge(df_lu_stream_with_nameLu_column, df_lu_names_xlsx, on='nameLu', how='outer') # Get the unique names and their counts from df_lu_names_xlsx name_counts = df_lu_names_xlsx['nameLu'].value_counts() #print(name_counts) # Identify names in df_lu_stream_with_nameLu_column that are not in df_lu_names_xlsx missing_names = df_lu_stream_with_nameLu_column.loc[~df_lu_stream_with_nameLu_column['nameLu'].isin(df_lu_names_xlsx['nameLu'])] # Append missing rows to df_lu_stream_with_nameLu_column df_lu_stream_duplicated = pd.concat([merged_df, missing_names], ignore_index=True) #-- # Find names in df_lu_names_xlsx that are not in df_lu_stream_with_nameLu_column missing_names = df_lu_names_xlsx.loc[~df_lu_names_xlsx['nameLu'].isin(df_lu_stream_with_nameLu_column['nameLu'])] #-- # print existing names (?) df_lu_names_sorted = df_lu_names_xlsx.sort_values(by='nameLu') df_lu_stream_duplicated_sorted = df_lu_stream_duplicated.sort_values(by='nameLu') #-- # Merge DataFrames to get the order of names merged_order = pd.merge(df_lu_names_xlsx[['nameCombined']], df_lu_stream_duplicated[['nameCombined']], on='nameCombined', how='inner') # Sort df_lu_stream_duplicated based on the order of names in df_lu_names_xlsx df_lu_stream_sorted = df_lu_stream_duplicated.sort_values(by='nameCombined', key=lambda x: pd.Categorical(x, categories=merged_order['nameCombined'], ordered=True)) # Reorganize columns column_order = ['nameTripType', 'nameCombined'] + [col for col in df_lu_stream_sorted.columns if col not in ['nameTripType', 'nameCombined']] # Create a new DataFrame with the desired column order df_lu_stream_reordered = df_lu_stream_sorted[column_order] df_lu_stream_reordered_t = df_lu_stream_reordered.T #-- df_lu_stream_with_index = df_lu_stream_reordered_t.reset_index(drop=False).rename(columns={'index': 'ids'}) df_lu_stream_with_index.index = df_lu_stream_reordered_t.index df_lu_num_t_index = df_lu_stream_with_index.iloc[3:] df_distances_aligned_index = df_distances_aligned.reset_index(drop=False).rename(columns={'index': 'ids'}) df_distances_aligned_index.index = df_distances_aligned.index df_lu_namesCombined = df_lu_stream_with_index.loc["nameCombined"].iloc[1:] # Sort df_lu_stream_with_index based on the 'ids' column in df_distances_aligned_index df_lu_stream_sorted = df_lu_stream_with_index.sort_values(by=['ids'], key=lambda x: pd.Categorical(x, categories=df_distances_aligned_index['ids'], ordered=True)) df_lu_num = df_lu_stream_sorted.T.iloc[1:, :-3] df_lu_num.index = df_lu_namesCombined df_distMatrix_speckle = df_distances_aligned df_attraction_num = df_attraction.reset_index().iloc[:-1, 6:] # ============================================================================= # TRIP GENERATION # ATTRACTION & PRODUCTION ====================================================== """ INPUTS df_attraction_num df_lu_num df_production df_lu df_production_transposed """ df_attraction_proNode_sum_total = attraction_proNode_full_iter(df_attraction_num, df_lu_num, True) #Get the sqmProPerson df_sqmProPerson = df_production.iloc[0, 4:].reset_index()[3] #Get the trip rate df_tripRate = copy.deepcopy(df_production) # create a copy ensures df_tripRate doenst point to df_production df_tripRate.index = df_tripRate.iloc[:, 0] #Set the row names df_tripRate = df_tripRate.iloc[1:, 2] #Numerical df from production ============================================== df_production_num = df_production.iloc[1:, 4:] df_production_transposed1 = df_production_num.T df_total_trips_allNodes = production_proNode_total(df_lu, df_sqmProPerson, df_tripRate, df_production_num, df_production_transposed, df_lu_num, printSteps=False) # Convert data types to float df_total_trips_allNodes = df_total_trips_allNodes.astype(float) df_tripRate = df_tripRate.astype(float) df_total_trips_allNodes_sumPerson = df_total_trips_allNodes.div(df_tripRate, axis=0).sum() df_total_trips_allNodes_sumPerson_proCat = df_total_trips_allNodes.div(df_tripRate, axis=0) df_total_trips_allNodes_sumPerson_proCat_t = df_total_trips_allNodes_sumPerson_proCat.T df_total_trips_allNodes_sumPerson_proCat_t_sum = df_total_trips_allNodes_sumPerson_proCat_t.sum() # get total population total_population = df_total_trips_allNodes_sumPerson_proCat_t_sum["Tot_Res"] + df_total_trips_allNodes_sumPerson_proCat_t_sum["Tot_tou"] # ============================================================================= distance_matrices = extract_distance_matrices(stream_distance_matrice, distance_matrices_of_interest) metric_matrices_ = extract_distance_matrices(stream_metric_matrice, metric_matrices_of_interest) metric_matrices = { k:v*10 for k, v in metric_matrices_.items()} # scale (speckle issue) logs = computeTrips( df_distributionMatrix, df_total_trips_allNodes, df_distMatrix_speckle, df_alphas, df_attraction_proNode_sum_total, df_distances_aligned, TARGET_TRIP_RATE, SCALING_FACTOR, total_population, df_total_trips_allNodes_sumPerson_proCat_t_sum["Tot_Res"], df_total_trips_allNodes_sumPerson_proCat_t_sum["Tot_tou"], distance_matrices, metric_matrices, redistributeTrips, DISTANCE_BRACKETS, ALPHA_LOW, ALPHA_MED, ALPHA_HIGH, ALPHA, ALPHA_UNIFORM, F_VALUES_MANUAL, CLIENT, STREAM_ID, TARGET_BRANCH_TM, sourceCommits ) print(logs)