Spaces:

serJD
/

RECODE_HF_tripGeneration

Sleeping

App Files Files Community

RECODE_HF_tripGeneration / app.py

serJD

Update app.py

bf167f4 9 months ago

raw

history blame contribute delete

16.8 kB

	import sys
	sys.path.append("speckleUtils")
	import speckle_utils

	import os
	import json
	import pandas as pd
	import copy
	from functools import wraps
	from specklepy.api.client import SpeckleClient
	from tripGenerationFunc import *

	import gradio as gr
	import requests
	from huggingface_hub import webhook_endpoint, WebhookPayload
	from fastapi import Request
	import datetime
	"""
	curl -X POST -H "Content-Type: application/json" \
	-d '{"payload": {"stream": {"name": "2B_100_batch"}}}' \
	https://serjd-recode-hf-tripgeneration.hf.space/webhooks/update_streams
	"""
	#https://serjd-recode_hf_tripGeneration.hf.space/webhooks/update_streams
	#https://serjd-syncspeckle2notion.hf.space/webhooks/update_streams

	current_directory = os.path.dirname(os.path.abspath(__file__))
	# Path to the config.json file
	config_file_path = os.path.join(current_directory, "config.json")

	# Check if the config.json file exists
	if os.path.exists(config_file_path):
	# Load the JSON data from config.json
	with open(config_file_path, 'r') as f:
	config = json.load(f)

	# Convert to Python variables with the same names as the keys in the JSON
	locals().update(config)
	print("varaibles from json")
	# Now you can access the variables directly
	print(STREAM_ID)
	print(BRANCH_NAME_LAND_USES)
	print(TARGET_TRIP_RATE)
	print(ALPHA_LOW)
	print(F_VALUES_MANUAL)
	print(distance_matrices_of_interest)
	print(redistributeTrips)
	print(DISTANCE_BRACKETS)
	print(XLS_FILE_PATH)
	print("==================")
	else:
	print("Error: config.json file not found in the current directory.")


	# checks payload of webhook and runs the main code if webhook was triggered by specified stream + one of the branches
	listendStreams = ["2B_U100_batch"]
	listendBranchNames = [BRANCH_NAME_LAND_USES,BRANCH_NAME_DISTANCE_MATRIX,BRANCH_NAME_METRIC_DIST_MATRIX]
	print("branchnames", listendBranchNames)
	print("stream", listendStreams)

	@webhook_endpoint
	async def update_streams(request: Request):
	# Initialize flag
	should_continue = False

	# Read the request body as JSON
	payload = await request.json()

	print("============= payload =============")
	print(payload)
	print("============= payload =============")

	payload = payload["payload"]
	# Check if the payload structure matches the expected format
	if "event" in payload and "data" in payload["event"]:
	event_data = payload["event"]["data"]

	# Check if the event type is one of the specified types
	if "event_name" in payload["event"] and payload["event"]["event_name"] in ["commit_create", "commit_delete", "commit_update"]:
	# Check if the stream name matches the specified list
	if "stream" in payload and payload["stream"]["name"] in listendStreams:
	# Check if the branch name matches the specified list
	if "commit" in event_data and "branchName" in event_data["commit"]:
	if event_data["commit"]["branchName"] in listendBranchNames:
	should_continue = True
	else:
	print("Branch name not found in payload.")
	else:
	print("Stream name not found or not in the specified list.")
	else:
	print("Event type is not one of the specified types.")
	else:
	print("Payload structure does not match the expected format.")

	# If the flag is True, continue running the main part of the code
	if should_continue:
	# Your main code logic goes here
	runAll()
	else:
	print("Flag is False. Skipping further execution.")

	return "Webhook processing complete."



	def runAll():
	# get config file:# Parse JSON

	speckle_token = os.environ.get("SPECKLE_TOKEN")





	xls_file_path = os.path.join(current_directory, XLS_FILE_PATH)
	print("full path", xls_file_path)
	# fetch speckle data
	CLIENT = SpeckleClient(host="https://speckle.xyz/")
	CLIENT.authenticate_with_token(token="52566d1047b881764e16ad238356abeb2fc35d8b42")

	# get land use stream
	stream_land_use = speckle_utils.getSpeckleStream(STREAM_ID,
	BRANCH_NAME_LAND_USES,
	CLIENT,
	commit_id = "")
	# navigate to list with speckle objects of interest
	stream_data = stream_land_use["@Data"]["@{0}"]

	# transform stream_data to dataframe (create a backup copy of this dataframe)
	df_speckle_lu = speckle_utils.get_dataframe(stream_data, return_original_df=False)
	df_main = df_speckle_lu.copy()

	# set index column
	df_main = df_main.set_index("ids", drop=False)


	# get distance matrix stream
	stream_distance_matrice = speckle_utils.getSpeckleStream(STREAM_ID,
	BRANCH_NAME_DISTANCE_MATRIX,
	CLIENT,
	commit_id = "")

	# navigate to list with speckle objects of interest
	distance_matrices = {}
	for distM in stream_distance_matrice["@Data"]['@{0}']:
	for kk in distM.__dict__.keys():
	try:
	if kk.split("+")[1].startswith("distance_matrix"):
	distance_matrix_dict = json.loads(distM[kk])
	origin_ids = distance_matrix_dict["origin_uuid"]
	destination_ids = distance_matrix_dict["destination_uuid"]
	distance_matrix = distance_matrix_dict["matrix"]
	# Convert the distance matrix to a DataFrame
	df_distances = pd.DataFrame(distance_matrix, index=origin_ids, columns=destination_ids)

	# i want to add the index & colum names to dist_m_csv
	#distance_matrices[kk] = dist_m_csv[kk]
	distance_matrices[kk] = df_distances

	except:
	pass


	# get metric matrix stream
	stream_metric_matrice = speckle_utils.getSpeckleStream(STREAM_ID,
	BRANCH_NAME_METRIC_DIST_MATRIX,
	CLIENT,
	commit_id = "")


	# navigate to list with speckle objects of interest
	metric_matrices = {}
	for distM in stream_metric_matrice["@Data"]['@{0}']:
	print(distM.__dict__.keys())
	for kk in distM.__dict__.keys():
	try:
	if kk.split("+")[1].startswith("metric_matrix"):
	metric_matrix_dict = json.loads(distM[kk])
	origin_ids = metric_matrix_dict["origin_uuid"]
	destination_ids = metric_matrix_dict["destination_uuid"]
	metric_matrix = metric_matrix_dict["matrix"]
	# Convert the distance matrix to a DataFrame
	df_metric_dist = pd.DataFrame(metric_matrix, index=origin_ids, columns=destination_ids)
	metric_matrices[kk] = df_metric_dist*10 #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

	print("metric_matrix_dict", metric_matrix_dict.keys())
	except:
	pass

	metric_matrices = extract_distance_matrices(stream_metric_matrice, metric_matrices_of_interest)


	sourceCommits = {
	"landuseCommitID": stream_land_use.id,
	"distanceMatrixCommitID": stream_distance_matrice.id,
	"metricMatrixCommitID": stream_metric_matrice.id
	}


	# READ XLS FILE ======================================
	# Read Excel file into Pandas DataFrame
	#Production
	# Load Excel file separately
	#xls_file_path = os.path.join(current_directory, XLS_FILE_PATH)
	if os.path.exists(xls_file_path):
	# Production
	df_production = pd.read_excel(xls_file_path, sheet_name='Production')
	df_production_transposed = df_production.T
	df_production = preprocess_dataFrame(df_production, headerRow_idx=2, numRowsStart_idx=3)
	df_production_transposed = preprocess_dataFrame(df_production_transposed, headerRow_idx=0, numRowsStart_idx=4,
	numColsStart_idx=4, rowNames_idx=2)

	# Attraction
	df_attraction = pd.read_excel(xls_file_path, sheet_name='Attraction')
	df_attraction = preprocess_dataFrame(df_attraction, headerRow_idx=0, numRowsStart_idx=2)

	# Distribution_Matrix
	df_distributionMatrix = pd.read_excel(xls_file_path, sheet_name='Distribution_Matrix')
	df_distributionMatrix = preprocess_dataFrame(df_distributionMatrix, headerRow_idx=0, numRowsStart_idx=2,
	numRowsEnd_idx=None, numColsStart_idx=2, numColsEnd_idx=None,
	rowNames_idx=0)

	# Alphas
	df_alphas = pd.read_excel(xls_file_path, sheet_name='Alphas')
	df_alphas.columns = df_alphas.iloc[1]
	df_alphas = df_alphas.iloc[0, 2:]

	# Land use
	df_lu = pd.read_excel(xls_file_path, sheet_name='Example_Land_Use')
	df_lu = preprocess_dataFrame(df_lu, headerRow_idx=0, numRowsStart_idx=1)
	df_lu["nameCombined"] = df_lu.iloc[:, 1].astype(str) + "+" + df_lu.iloc[:, 0].astype(str)

	# Distance Matrix
	df_distMatrix = pd.read_excel(xls_file_path, sheet_name='Example_Distance_Matrix')
	df_distMatrix = preprocess_dataFrame(df_distMatrix, headerRow_idx=0, numRowsStart_idx=1, numRowsEnd_idx=None,
	numColsStart_idx=1, numColsEnd_idx=None, rowNames_idx=0)
	else:
	print("Error: Excel file specified in config.json not found.")



	# Land use strucutre =======
	# THIS IS THE DISTANCE MATRIX THATS USED DOWN THE ROAD
	df_distances_aligned, df_lu_stream_aligned = align_dataframes(distance_matrices[distanceMatrixName], df_main, 'ids')

	#Create a df with lanuses
	lu_cols = [col for col in df_lu_stream_aligned.columns if col.startswith("lu+")]
	df_lu_stream = df_lu_stream_aligned[lu_cols]

	# Remove "lu+" from the beginning of column names
	df_lu_stream.columns = df_lu_stream.columns.str.lstrip('lu+')
	df_lu_stream = df_lu_stream.T

	df_lu_stream_t = df_lu_stream.T

	df_lu_stream_with_nameLu_column = df_lu_stream.reset_index(drop=False).rename(columns={'index': 'nameLu'})

	#---
	df_lu_names_xlsx = pd.concat([df_lu.iloc[:, 0:2], df_lu.iloc[:, -1]], axis=1)
	df_lu_names_xlsx.index = df_lu_names_xlsx.iloc[:, 1]
	column_names = ['nameTripType', 'nameLu', 'nameCombined']
	df_lu_names_xlsx.columns = column_names
	print(f"df_lu_names_xlsx shape: {df_lu_names_xlsx.shape}")
	df_lu_names_xlsx.head()

	#--

	# Merge DataFrames using an outer join
	merged_df = pd.merge(df_lu_stream_with_nameLu_column, df_lu_names_xlsx, on='nameLu', how='outer')

	# Get the unique names and their counts from df_lu_names_xlsx
	name_counts = df_lu_names_xlsx['nameLu'].value_counts()
	#print(name_counts)

	# Identify names in df_lu_stream_with_nameLu_column that are not in df_lu_names_xlsx
	missing_names = df_lu_stream_with_nameLu_column.loc[~df_lu_stream_with_nameLu_column['nameLu'].isin(df_lu_names_xlsx['nameLu'])]

	# Append missing rows to df_lu_stream_with_nameLu_column
	df_lu_stream_duplicated = pd.concat([merged_df, missing_names], ignore_index=True)


	#--
	# Find names in df_lu_names_xlsx that are not in df_lu_stream_with_nameLu_column
	missing_names = df_lu_names_xlsx.loc[~df_lu_names_xlsx['nameLu'].isin(df_lu_stream_with_nameLu_column['nameLu'])]

	#--
	# print existing names (?)
	df_lu_names_sorted = df_lu_names_xlsx.sort_values(by='nameLu')
	df_lu_stream_duplicated_sorted = df_lu_stream_duplicated.sort_values(by='nameLu')
	#--
	# Merge DataFrames to get the order of names
	merged_order = pd.merge(df_lu_names_xlsx[['nameCombined']], df_lu_stream_duplicated[['nameCombined']], on='nameCombined', how='inner')

	# Sort df_lu_stream_duplicated based on the order of names in df_lu_names_xlsx
	df_lu_stream_sorted = df_lu_stream_duplicated.sort_values(by='nameCombined', key=lambda x: pd.Categorical(x, categories=merged_order['nameCombined'], ordered=True))

	# Reorganize columns
	column_order = ['nameTripType', 'nameCombined'] + [col for col in df_lu_stream_sorted.columns if col not in ['nameTripType', 'nameCombined']]

	# Create a new DataFrame with the desired column order
	df_lu_stream_reordered = df_lu_stream_sorted[column_order]

	df_lu_stream_reordered_t = df_lu_stream_reordered.T

	#--
	df_lu_stream_with_index = df_lu_stream_reordered_t.reset_index(drop=False).rename(columns={'index': 'ids'})
	df_lu_stream_with_index.index = df_lu_stream_reordered_t.index

	df_lu_num_t_index = df_lu_stream_with_index.iloc[3:]

	df_distances_aligned_index = df_distances_aligned.reset_index(drop=False).rename(columns={'index': 'ids'})
	df_distances_aligned_index.index = df_distances_aligned.index

	df_lu_namesCombined = df_lu_stream_with_index.loc["nameCombined"].iloc[1:]

	# Sort df_lu_stream_with_index based on the 'ids' column in df_distances_aligned_index
	df_lu_stream_sorted = df_lu_stream_with_index.sort_values(by=['ids'], key=lambda x: pd.Categorical(x, categories=df_distances_aligned_index['ids'], ordered=True))


	df_lu_num = df_lu_stream_sorted.T.iloc[1:, :-3]
	df_lu_num.index = df_lu_namesCombined

	df_distMatrix_speckle = df_distances_aligned

	df_attraction_num = df_attraction.reset_index().iloc[:-1, 6:]

	# =============================================================================
	# TRIP GENERATION

	# ATTRACTION & PRODUCTION ======================================================
	"""
	INPUTS
	df_attraction_num
	df_lu_num
	df_production
	df_lu
	df_production_transposed
	"""

	df_attraction_proNode_sum_total = attraction_proNode_full_iter(df_attraction_num, df_lu_num, True)

	#Get the sqmProPerson
	df_sqmProPerson = df_production.iloc[0, 4:].reset_index()[3]

	#Get the trip rate
	df_tripRate = copy.deepcopy(df_production) # create a copy ensures df_tripRate doenst point to df_production
	df_tripRate.index = df_tripRate.iloc[:, 0] #Set the row names
	df_tripRate = df_tripRate.iloc[1:, 2]

	#Numerical df from production ==============================================
	df_production_num = df_production.iloc[1:, 4:]
	df_production_transposed1 = df_production_num.T

	df_total_trips_allNodes = production_proNode_total(df_lu,
	df_sqmProPerson,
	df_tripRate,
	df_production_num,
	df_production_transposed,
	df_lu_num, printSteps=False)
	# Convert data types to float
	df_total_trips_allNodes = df_total_trips_allNodes.astype(float)
	df_tripRate = df_tripRate.astype(float)

	df_total_trips_allNodes_sumPerson = df_total_trips_allNodes.div(df_tripRate, axis=0).sum()
	df_total_trips_allNodes_sumPerson_proCat = df_total_trips_allNodes.div(df_tripRate, axis=0)
	df_total_trips_allNodes_sumPerson_proCat_t = df_total_trips_allNodes_sumPerson_proCat.T
	df_total_trips_allNodes_sumPerson_proCat_t_sum = df_total_trips_allNodes_sumPerson_proCat_t.sum()

	# get total population
	total_population = df_total_trips_allNodes_sumPerson_proCat_t_sum["Tot_Res"] + df_total_trips_allNodes_sumPerson_proCat_t_sum["Tot_tou"]



	# =============================================================================
	distance_matrices = extract_distance_matrices(stream_distance_matrice, distance_matrices_of_interest)
	metric_matrices_ = extract_distance_matrices(stream_metric_matrice, metric_matrices_of_interest)
	metric_matrices = { k:v*10 for k, v in metric_matrices_.items()} # scale (speckle issue)

	logs = computeTrips(
	df_distributionMatrix,
	df_total_trips_allNodes,
	df_distMatrix_speckle,
	df_alphas,
	df_attraction_proNode_sum_total,
	df_distances_aligned,
	TARGET_TRIP_RATE,
	SCALING_FACTOR,
	total_population,
	df_total_trips_allNodes_sumPerson_proCat_t_sum["Tot_Res"],
	df_total_trips_allNodes_sumPerson_proCat_t_sum["Tot_tou"],
	distance_matrices,
	metric_matrices,
	redistributeTrips,
	DISTANCE_BRACKETS,
	ALPHA_LOW, ALPHA_MED, ALPHA_HIGH, ALPHA, ALPHA_UNIFORM, F_VALUES_MANUAL,
	CLIENT,
	STREAM_ID,
	TARGET_BRANCH_TM,
	sourceCommits
	)

	print(logs)