RECODE_HF_tripGeneration / tripGenerationFunc.py
serJD's picture
Update tripGenerationFunc.py
530140a
raw
history blame
34.6 kB
import numpy as np
import pandas as pd
from numba import jit
import math
import json
import os
import sys
from specklepy.api.client import SpeckleClient
from specklepy.api.credentials import get_default_account, get_local_accounts
from specklepy.transports.server import ServerTransport
from specklepy.api import operations
from specklepy.objects.geometry import Polyline, Point
from specklepy.objects import Base
from specklepy.api import operations, models
from specklepy.transports.server import ServerTransport
import time
from functools import wraps
import sys
sys.path.append("speckleUtils")
import speckle_utils
#https://serjd-syncspeckle2notion.hf.space/webhooks/update_streams
#https://serjd-RECODE_HF_tripGeneration.hf.space/webhooks/update_streams
#serJD/RECODE_HF_tripGeneration
# https://huggingface.co/spaces/serJD/RECODE_HF_tripGeneration
# !!! lots of hard coded values in computeTrips !!!
# UTILS
def reconstruct_dataframe(alpha_low, alpha_med, alpha_high, original_df):
# Define the mapping from original values to new alpha parameters
value_to_alpha = {
0.00191: alpha_low,
0.00767: alpha_high,
0.0038: alpha_med
}
# Check if each value is present at least once in the DataFrame
for original_value in value_to_alpha.keys():
if not (original_df == original_value).any().any():
raise ValueError(f"Value {original_value} not found in the input DataFrame.")
# Create a new DataFrame based on the original one
new_df = original_df.copy()
# Apply the mapping to each element in the DataFrame
for original_value, new_value in value_to_alpha.items():
new_df = new_df.replace(original_value, new_value)
return new_df
def preprocess_dataFrame(df, headerRow_idx=0, numRowsStart_idx = None, numRowsEnd_idx=None, numColsStart_idx=None, numColsEnd_idx=None, rowNames_idx=None):
df.columns = df.iloc[headerRow_idx] #Set the header
if rowNames_idx is not None:
df.index = df.iloc[:, rowNames_idx] #Set the row names
df = df.iloc[numRowsStart_idx : numRowsEnd_idx, numColsStart_idx:numColsEnd_idx] #Slice the dataset to numerical data
return df
def timeit(f):
def timed(*args, **kw):
ts = time.time()
result = f(*args, **kw)
te = time.time()
print ('func:%r args:[%r, %r] took: %2.4f sec' % \
(f.__name__, te-ts))
#(f.__name__, args, kw, te-ts))
return result
return timed
def timing_decorator(func):
@wraps(func)
def wrapper(*args, **kwargs):
start_time = time.time()
result = func(*args, **kwargs)
end_time = time.time()
duration = end_time - start_time
timestamp = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time))
print(f"{func.__name__} took {duration:.4f} seconds. Finished at {timestamp}")
return result
return wrapper
# Function to compare two dataframes after converting and rounding
def compare_dataframes(df1, df2, decimals=8):
# Function to convert DataFrame columns to float and then round
def convert_and_round_dataframe(df, decimals):
# Convert all columns to float
df_float = df.astype(float)
# Round to the specified number of decimals
return df_float.round(decimals)
rounded_df1 = convert_and_round_dataframe(df1, decimals)
rounded_df2 = convert_and_round_dataframe(df2, decimals)
are_equal = rounded_df1.equals(rounded_df2)
print("Both methods are equal:", are_equal)
print("Numba shape:", df2.shape)
print("Original shape:", df1.shape)
print("======== ORIGINAL OUTPUT (first item in output list, head() for the first 5 columns)")
print(df1.iloc[0:5].head(2))
print("======== New method OUTPUT (first item in output list, head() for the first 5 columns)")
print(df2.iloc[0:5].head(2))
def align_dataframes(df1, df2, key):
"""
Align two dataframes based on a common key, ensuring that both dataframes
have only the rows with matching keys.
Parameters:
- df1: First dataframe.
- df2: Second dataframe.
- key: Column name to align dataframes on.
Returns:
- df1_aligned, df2_aligned: Tuple of aligned dataframes.
"""
common_ids = df1.index.intersection(df2[key])
df1_aligned = df1.loc[common_ids]
df2_aligned = df2[df2[key].isin(common_ids)].set_index(key, drop=False)
return df1_aligned, df2_aligned
#==================================================================================================
def attraction_proNode(df_attraction_num, df_lu, df_lu_anName=None, printSteps=False):
#lu_proNode
df_lu_proNode = df_lu.reset_index()[df_lu_anName]
if printSteps:
print(df_lu_proNode.shape)
df_lu_proNode.head(50)
#attraction_proNode
if printSteps:
print("df_attraction_num:", df_attraction_num.shape)
print("df_lu_proNode:", df_lu_proNode.shape)
df_attraction_proNode = df_attraction_num.mul(df_lu_proNode, axis=0)
if printSteps:
print("df_attraction_proNode:", df_attraction_proNode.shape)
df_attraction_proNode.head(100)
# Sum the values of each column
df_attraction_proNode_sum = pd.DataFrame(df_attraction_proNode.sum(), columns=['Sum'])
if printSteps:
print("df_attraction_proNode_sum:", df_attraction_proNode_sum.shape)
df_attraction_proNode_sum.head(100)
return df_attraction_proNode_sum
#Non vectorized iterative function
def attraction_proNode_full_iter(df_attraction_num, df_lu_num, printSteps=False):
# Initialize an empty DataFrame
df_attraction_proNode_sum_total = pd.DataFrame()
for column_name, column_data in df_lu_num.items():
df_attraction_proNode_sum = attraction_proNode(df_attraction_num, df_lu_num, df_lu_anName=column_name)
# Concatenate DataFrames along columns
df_attraction_proNode_sum_total = pd.concat([df_attraction_proNode_sum_total, df_attraction_proNode_sum], axis=1)
# Rename columns in df_distBasedAttr_step2 with the same column names as in df_distributionMatrix_step1
df_attraction_proNode_sum_total.columns = df_lu_num.columns
return df_attraction_proNode_sum_total
# PRODUCTION ================================================
def production_proNode(df_lu, df_sqmProPerson, df_tripRate, df_production_num, df_production_transposed, printSteps=False, df_lu_anName=None):
#lu_proNode - reset index
df_lu_proNode = df_lu.reset_index()[df_lu_anName]
if printSteps:
print(df_lu_proNode.shape)
df_lu_proNode.head(50)
#Get the person count - Divide corresponding values of one DataFrame by another
df_personCount = df_lu_proNode.div(df_sqmProPerson)
if printSteps:
print(df_personCount.shape)
print(df_personCount)
# Ensure the index is unique in df_personCount
df_personCount = df_personCount.reset_index(drop=True)
df_production_transposed = df_production_transposed.reset_index(drop=True)
if printSteps:
df_production_transposed.head()
if printSteps:
df_personCount.head()
df_tripRate.head()
#Calculate trip production pro node
df_production_proNode = df_production_transposed
df_production_proNode = df_production_proNode.mul(df_personCount, axis=0)
df_production_proNode = df_production_proNode.T
df_production_proNode = df_production_proNode.mul(df_tripRate, axis=0)
#Total trips
df_production_proNode_rowSum = df_production_proNode.sum(axis=1)
df_total_trips = df_production_proNode_rowSum
#if printSteps:
#df_total_trips.head(50)
return df_total_trips
#Non vectorized iterative function
def production_proNode_total(df_lu, df_sqmProPerson, df_tripRate, df_production_num, df_production_transposed, df_lu_num, printSteps=False):
# Initialize an empty DataFrame
df_total_trips_allNodes = pd.DataFrame()
for column_name, column_data in df_lu_num.items():
df_total_trips_proNode = production_proNode(df_lu_num, df_sqmProPerson, df_tripRate, df_production_num, df_production_transposed, printSteps=False, df_lu_anName=column_name)
# Concatenate DataFrames along columns
df_total_trips_allNodes = pd.concat([df_total_trips_allNodes, df_total_trips_proNode], axis=1)
# Rename columns in df_distBasedAttr_step2 with the same column names as in df_distributionMatrix_step1
df_total_trips_allNodes.columns = df_lu_num.columns
return df_total_trips_allNodes
#df_total_trips_allNodes = production_proNode_total(df_lu, df_sqmProPerson, df_tripRate, df_production_num, df_production_transposed, df_lu_num, printSteps=False)
#df_total_trips_allNodes.head(50)
#==================================================================================================
#STEP 1
def step_1(df_distributionMatrix, df_total_trips_allNodes):
l = []
#counter=0
for column_name, column_data in df_total_trips_allNodes.items():
df_distributionMatrix_step1_proNode = df_distributionMatrix.mul(column_data, axis = 0)
l.append(df_distributionMatrix_step1_proNode)
return l
#STEP 2
def step_2_vectorized(df_distMatrix, df_alphas):
# Convert df_distMatrix to a 2D array: Shape (1464, 1464)
distMatrix_array = df_distMatrix.values
# Convert df_alphas to a 1D array: Shape (26,)
alphas_array = df_alphas.values
# Initialize an empty array to store results: Shape (1464, 1464, 26)
result_3d = np.zeros((distMatrix_array.shape[0], distMatrix_array.shape[1], len(alphas_array)))
# Loop over alphas and perform element-wise multiplication followed by exponential function
for i in range(len(alphas_array)):
result_3d[:, :, i] = np.exp(-distMatrix_array * alphas_array[i])
# Construct the final list of DataFrames
final_list = [pd.DataFrame(result_3d[i, :, :], columns=df_alphas.index, index=df_distMatrix.index) for i in range(result_3d.shape[0])]
return final_list
# Step 3
@jit(nopython=True)
def multiply_and_sum(arr, attraction_arr):
# Element-wise multiplication
multiplied_arr = arr * attraction_arr
# Sum the values of each column
summed_arr = multiplied_arr.sum(axis=0)
return multiplied_arr, summed_arr
def step_3_numba(df_attraction_proNode_sum_total, df_step_2):
# Convert df_attraction_proNode_sum_total to a NumPy array and transpose it
attraction_array = df_attraction_proNode_sum_total.values.T.astype(np.float64) # Ensure float64 dtype
multiplied_results = []
summed_results = []
for df in df_step_2:
# Convert DataFrame to NumPy array with float64 dtype
df_array = df.values.astype(np.float64)
# Perform element-wise multiplication and summing
multiplied_arr, summed_arr = multiply_and_sum(df_array, attraction_array)
# Convert results back to DataFrames
df_multiplied = pd.DataFrame(multiplied_arr, columns=df.columns, index=df.index)
# Reshape summed_arr to have shape (26,1) and then convert to DataFrame
df_summed = pd.DataFrame(summed_arr.reshape(-1, 1), index=df.columns, columns=['Sum'])
multiplied_results.append(df_multiplied)
summed_results.append(df_summed)
return multiplied_results, summed_results
# step 4
@jit(nopython=True)
def divide_and_sum(arr, divisor_arr):
# Ensure divisor_arr is broadcastable to arr's shape
divisor_arr_expanded = divisor_arr.reshape((divisor_arr.shape[0], 1, divisor_arr.shape[1]))
# Initialize arrays to store results
divided_result = np.zeros_like(arr)
summed_result = np.zeros((arr.shape[0], arr.shape[2]))
for i in range(arr.shape[0]):
for j in range(arr.shape[1]):
for k in range(arr.shape[2]):
if divisor_arr_expanded[i, 0, k] != 0:
divided_result[i, j, k] = arr[i, j, k] / divisor_arr_expanded[i, 0, k]
summed_result[i, k] += divided_result[i, j, k]
return divided_result, summed_result
def step_4_numba(distAndAreaBasedAttr_step3, distAndAreaBasedAttr_step3_sum):
# Convert lists of DataFrames to 3D arrays with dtype float64
array_step3 = np.array([df.values for df in distAndAreaBasedAttr_step3]).astype(np.float64)
array_step3_sum = np.array([df.values for df in distAndAreaBasedAttr_step3_sum]).astype(np.float64)
# Perform division and summation using Numba
divided_result, summed_result = divide_and_sum(array_step3, array_step3_sum)
# Convert results back to lists of DataFrames
df_distAndAreaBasedAttr_step4 = [pd.DataFrame(divided_result[i], columns=distAndAreaBasedAttr_step3[0].columns, index=distAndAreaBasedAttr_step3[0].index) for i in range(divided_result.shape[0])]
# Correct the creation of the summed DataFrame to avoid setting the 'Sum' index
df_distAndAreaBasedAttr_step4_sum = [pd.DataFrame(summed_result[i]).T.set_axis(['Sum'], axis='index').set_axis(distAndAreaBasedAttr_step3[0].columns, axis='columns') for i in range(summed_result.shape[0])]
return df_distAndAreaBasedAttr_step4, df_distAndAreaBasedAttr_step4_sum
# step 5
@jit(nopython=True)
def tripsPerArctivity_numba(matrix, attrs):
rows, cols = attrs.shape[0], matrix.shape[0] # 1464, 26
result = np.zeros((cols, rows), dtype=np.float64) # Prepare result matrix (26, 1464)
for i in range(rows): # Iterate over each area
for j in range(cols): # Iterate over each land use category
sum_val = 0.0
for k in range(cols): # Iterate over each element in the distribution matrix row
sum_val += matrix[j, k] * attrs[i, k]
result[j, i] = sum_val
return result
def step_5_numba(distributionMatrix_step1, distAndAreaBasedAttr_step4):
sums = []
count = 0
total_count = len(distributionMatrix_step1)
for df_distributionMatrix_step1, df_distAndAreaBasedAttr_step4 in zip(distributionMatrix_step1, distAndAreaBasedAttr_step4):
# Convert DataFrames to NumPy arrays with dtype float64
matrix = df_distributionMatrix_step1.values.astype(np.float64)
attrs = df_distAndAreaBasedAttr_step4.values.astype(np.float64)
result = tripsPerArctivity_numba(matrix, attrs)
df_result = pd.DataFrame(result, index=df_distributionMatrix_step1.columns, columns=df_distAndAreaBasedAttr_step4.index)
sums.append(df_result)
count += 1
#print(f"Iteration {count} out of {total_count} is finished.")
#print("---------")
return sums
# step 6&7
def step_6_7_vectorized(df_trips_proNode_proActivity_total):
# Convert each DataFrame to a NumPy array and stack them to form a 3D array
array_3d = np.array([df.values for df in df_trips_proNode_proActivity_total])
# Sum across the middle axis (columns of each DataFrame)
summed_array = array_3d.sum(axis=1)
# Convert the summed array back to a DataFrame
final_matrix = pd.DataFrame(summed_array, index=df_trips_proNode_proActivity_total[0].columns, columns=df_trips_proNode_proActivity_total[0].columns)
return final_matrix
# step 8
def adjTripRate_adjFactor(tripMatrix,df_total_trips_allNodes_sumPerson, targetRate=1, factor=1 ):
df_tripMatrix_total_sum = tripMatrix.sum().sum()
df_total_trips_allNodes_sumPerson_total = df_total_trips_allNodes_sumPerson.sum()
# scale to target trip rate
tripRateBeforeAdjustment = df_tripMatrix_total_sum/df_total_trips_allNodes_sumPerson_total
print("tripRateBeforeAdjustment",tripRateBeforeAdjustment)
adjustmentRate = targetRate/tripRateBeforeAdjustment
print("adjustmentRate",adjustmentRate)
# scale by ... scale factor (outdated, was hardcoded )
df_tripMatrix_adjusted = tripMatrix * adjustmentRate
#df_tripMatrix_adjusted_scaled = df_tripMatrix_adjusted.div(factor)
return df_tripMatrix_adjusted, df_tripMatrix_adjusted # df_tripMatrix_adjusted_scaled
# Uniform Matrix
def decay(d, alpha):
return math.exp(d * alpha * -1)
def distanceDecay(df, alpha):
return df.applymap(lambda x: decay(x, alpha))
def matrix_reduce_add(df):
return df[df != sys.float_info.max].sum().sum()
def replace_maxValue(df):
return df.replace(sys.float_info.max, 0)
#Trip gen matrix is used to scale the distance matrix
def getUniformMatrix(distanceMatrix, tripGenMatrix, alpha):
distanceMatrix_withDecay = distanceDecay(distanceMatrix, alpha)
distanceMatrix_sum = matrix_reduce_add(distanceMatrix_withDecay)
tripGenMatrix_sum = matrix_reduce_add(tripGenMatrix)
ratio = distanceMatrix_sum / tripGenMatrix_sum
uniformMatrix = distanceMatrix_withDecay.div(ratio)
return replace_maxValue(uniformMatrix)
#==================================================================================================
#Modal Split functions
def computeModalShare(trip_matrix, dist_matrices, alpha, f_values=None):
"""
Process matrices or DataFrames with exponentiation and normalization.
Args:
trip_matrix (np.ndarray or pd.DataFrame): The trip matrix.
dist_matrices (dict of np.ndarray or pd.DataFrame): Dictionary of distance matrices.
alpha (float): The alpha coefficient.
f_values (dict of float, optional): Dictionary of f coefficients for each matrix. If None, defaults to 0 for each matrix.
Returns:
dict: Normalized matrices.
"""
# Default f_values to 0 for each key in dist_matrices if not provided
if not f_values:
f_values = {key: 0 for key in dist_matrices.keys()}
exp_matrices = {}
for key, matrix in dist_matrices.items():
f = f_values.get(key, 0)
# Convert DataFrame to numpy array if needed
if isinstance(matrix, pd.DataFrame):
matrix = matrix.values
exp_matrix = np.exp(-1 * (matrix * alpha + f))
exp_matrices[key] = exp_matrix
# Calculate the sum of all exponentials
sum_exp = sum(exp_matrices.values())
# Normalize each matrix & multiply by trip matrix and update the matrices
normalized_matrices = {key: (exp_matrix / sum_exp) * trip_matrix for key, exp_matrix in exp_matrices.items()}
return normalized_matrices
def redistributeModalShares(dist_matrices, trip_matrices, redistribution_rules, threshold=0.5):
"""
Redistribute trips among mobility networks based on given redistribution rules and when travel times are within a specified threshold.
Args:
dist_matrices (dict): Dictionary of distance matrices (travel times) for different mobility networks, keyed by identifier.
trip_matrices (dict): Dictionary of matrices representing the number of trips for each mobility network, keyed by identifier.
redistribution_rules (list): List of redistribution rules with "from" and "to" network identifiers.
threshold (float): The threshold for considering travel times as similar.
Returns:
dict: Updated dictionary of trip matrices with transferred trips.
"""
# Verify that all specified matrices exist in the input dictionaries
for rule in redistribution_rules:
if rule["from"] not in dist_matrices or rule["from"] not in trip_matrices:
raise ValueError(f"Matrix ID {rule['from']} not found in the inputs.")
for to_id in rule["to"]:
if to_id not in dist_matrices or to_id not in trip_matrices:
raise ValueError(f"Matrix ID {to_id} not found in the inputs.")
# Copy the trip_matrices to avoid modifying the input directly
updated_trip_matrices = {k: v.copy() for k, v in trip_matrices.items()}
# Redistribute trips based on the rules and the threshold
for rule in redistribution_rules:
from_matrix_id = rule["from"]
from_matrix_trips = updated_trip_matrices[from_matrix_id]
from_matrix_dist = dist_matrices[from_matrix_id]
for to_matrix_id in rule["to"]:
to_matrix_dist = dist_matrices[to_matrix_id]
# Create a boolean array where the absolute difference in travel times is less than or equal to the threshold
similar_travel_time = np.abs(from_matrix_dist - to_matrix_dist) <= threshold
# Find the indices where there are trips to transfer under the new condition
indices_to_transfer = similar_travel_time & (from_matrix_trips > 0)
# Transfer trips where the condition is True
updated_trip_matrices[to_matrix_id][indices_to_transfer] += from_matrix_trips[indices_to_transfer]
# Zero out the transferred trips in the from_matrix
from_matrix_trips[indices_to_transfer] = 0
# Return the updated trip matrices dictionary
return updated_trip_matrices
def computeDistanceBrackets(trip_matrices, metric_dist_matrices, dist_brackets=[800, 2400, 4800]):
# Transform the keys of metric_dist_matrices to match with trip_matrices
transformed_metric_keys = {key.replace("metric_matrix", "distance_matrix")+"_noEntr": matrix
for key, matrix in metric_dist_matrices.items()}
# Initialize dictionary to store aggregated trips per distance bracket
bracket_totals = {bracket: 0 for bracket in dist_brackets}
# Iterate over each pair of trip matrix and distance matrix
for key, trip_matrix in trip_matrices.items():
# Find the corresponding distance matrix
dist_matrix = transformed_metric_keys.get(key)
if dist_matrix is None:
print("no matrxi found")
continue # Skip if no corresponding distance matrix found
# Calculate trips for each distance bracket
for i, bracket in enumerate(dist_brackets):
if i == 0:
# For the first bracket, count trips with distance <= bracket
bracket_totals[bracket] += (trip_matrix[dist_matrix <= bracket]).sum().sum()
else:
# For subsequent brackets, count trips within the bracket range
prev_bracket = dist_brackets[i - 1]
bracket_totals[bracket] += (trip_matrix[(dist_matrix > prev_bracket) & (dist_matrix <= bracket)]).sum().sum()
brackets_sum = sum(bracket_totals.values())
brackets_rel = {str(bracket): round(total / brackets_sum, 3) for bracket, total in bracket_totals.items()}
return brackets_rel
def computeTripStats(trip_matrices, distance_matrices, metric_dist_matrices, pop):
# Transform the keys of metric_dist_matrices to match with trip_matrices
transformed_metric_keys = {key.replace("metric_matrix", "distance_matrix")+"_noEntr": matrix
for key, matrix in metric_dist_matrices.items()}
trips = 0
totalTravelDistance = 0
totalTravelTime = 0
# Iterate over each pair of trip matrix and distance matrix
for key, trip_matrix in trip_matrices.items():
# Find the corresponding distance matrix
metric_dist_matrix = transformed_metric_keys.get(key)
dist_matrix = distance_matrices.get(key)
if metric_dist_matrix is None:
print("no matrxi found")
continue # Skip if no corresponding distance matrix found
# compute
totalTravelTime += (dist_matrix*trip_matrix).sum().sum()
trips += trip_matrix.sum().sum()
totalTravelDistance += (metric_dist_matrix*trip_matrix).sum().sum()
MeanTripDistance = totalTravelDistance/trips
MeanTravelDistancePerPerson = totalTravelDistance/pop
MeanTravelTime = totalTravelTime/trips
MeanTravelTimePerPerson = totalTravelTime/pop
return totalTravelDistance, totalTravelTime, MeanTripDistance, MeanTravelDistancePerPerson, MeanTravelTime, MeanTravelTimePerPerson
def calculate_relative_mode_share(trip_matrices):
"""
Calculate the relative mode share for a dictionary of trip matrices.
Args:
trip_matrices (dict of np.ndarray or pd.DataFrame): Dictionary of trip matrices.
Returns:
dict: Relative mode distribution for each key in trip_matrices.
"""
# Compute the total trips for each mode
total_trips_per_mode = {key: matrix.sum().sum() for key, matrix in trip_matrices.items()}
# Compute the total trips across all modes
total_trips_all_modes = sum(total_trips_per_mode.values())
# Calculate the relative mode distribution
rel_mode_distribution = {key: trips_per_mode / total_trips_all_modes for key, trips_per_mode in total_trips_per_mode.items()}
return rel_mode_distribution
def extract_distance_matrices(stream, distance_matrices_of_interest):
"""
Extract distance matrices from the stream and convert them to pandas DataFrames.
Args:
stream (dict): Stream data containing distance matrices.
distance_matrices_of_interest (list of str): List of keys for the distance matrices of interest.
Returns:
dict: A dictionary of pandas DataFrames, where each key is a distance matrix kind.
"""
distance_matrices = {}
for distMK in distance_matrices_of_interest:
for distM in stream["@Data"]['@{0}']:
#print( distM.__dict__.keys())
try:
distMdict = distM.__dict__[distMK]
distance_matrix_dict = json.loads(distMdict)
origin_ids = distance_matrix_dict["origin_uuid"]
destination_ids = distance_matrix_dict["destination_uuid"]
distance_matrix = distance_matrix_dict["matrix"]
# Convert the distance matrix to a DataFrame
df_distances = pd.DataFrame(distance_matrix, index=origin_ids, columns=destination_ids)
distance_matrices[distMK] = df_distances
except Exception as e:
pass
return distance_matrices
#==================================================================================================
def computeTrips(
df_distributionMatrix,
df_total_trips_allNodes,
df_distMatrix_speckle,
df_alphas,
df_attraction_proNode_sum_total,
df_distances_aligned,
TARGET_TRIP_RATE,
SCALING_FACTOR,
total_population,
tot_res,
tot_vis,
distance_matrices,
metric_matrices,
redistributeTrips,
DISTANCE_BRACKETS,
alpha_low, alpha_med, alpha_high,
alpha_mode,
alpha_uniform,
NEW_F_VALUES,
CLIENT,
TARGET_STREAM,
TARGET_BRANCH,
sourceInfo="",
):
NEW_ALPHAS = reconstruct_dataframe(alpha_low, alpha_med, alpha_high, df_alphas)
NEW_MODE_ALPHA = alpha_mode
# ====
#step 1
distributionMatrix_step1M = step_1(df_distributionMatrix,
df_total_trips_allNodes)
#step 2
df_step_2M = step_2_vectorized(df_distMatrix_speckle,
NEW_ALPHAS)
#step 3
distAndAreaBasedAttr_step3M, distAndAreaBasedAttr_step3_sumM = step_3_numba(df_attraction_proNode_sum_total,
df_step_2M)
#step 4
distAndAreaBasedAttr_step4M, distAndAreaBasedAttr_step4_sumM = step_4_numba(distAndAreaBasedAttr_step3M,
distAndAreaBasedAttr_step3_sumM)
#step 5
df_trips_proNode_proActivity_totalM = step_5_numba(distributionMatrix_step1M,
distAndAreaBasedAttr_step4M)
#step 6 & 7
df_tripMatrixM = step_6_7_vectorized(df_trips_proNode_proActivity_totalM)
#step 8
df_tripMatrix_adjustedM, df_tripMatrix_adjusted_scaledM = adjTripRate_adjFactor(df_tripMatrixM,
total_population,
TARGET_TRIP_RATE,
SCALING_FACTOR )
#------
#MAIN 1 compute trip matrice per mode
trip_matricesM = computeModalShare(df_tripMatrix_adjusted_scaledM,
distance_matrices,
NEW_MODE_ALPHA,
f_values=NEW_F_VALUES)
#MAIN 2 compute modal shares (redistribute trips in case of identical travel time)
trip_matrices_redisM = redistributeModalShares(distance_matrices,
trip_matricesM,
redistributeTrips)
#POST 1 compute mode shares
rel_mode_distributionM = calculate_relative_mode_share(trip_matrices_redisM)
#POST 2 distance brackets
dist_sharesM = computeDistanceBrackets(trip_matrices_redisM,
metric_matrices,
DISTANCE_BRACKETS)
#POST 3 compute more stats
(totalTravelDistance, totalTravelTime,
MeanTripDistance,MeanTravelDistancePerPerson,
MeanTripTime, MeanTravelTimePerPerson) = computeTripStats(trip_matrices_redisM,
distance_matrices,
metric_matrices,
total_population)
uniform_tripmatrix = getUniformMatrix(df_distances_aligned, df_tripMatrix_adjustedM, alpha_uniform)
#add to dataframe
# Define your parameter and target values
newdata = {
# Model Parameter==
# Alpha - Routing
"alpha_low": alpha_low,
"alpha_med": alpha_med,
"alpha_high": alpha_high,
"alpha_uniform":alpha_uniform,
"fvalues":NEW_F_VALUES,
"alpha_mode":NEW_MODE_ALPHA,
# Model Indicators ==
# Modal Shares
"share_ped_mm_art": rel_mode_distributionM['activity_node+distance_matrix_ped_mm_art_noEntr'],
"share_ped_mm": rel_mode_distributionM['activity_node+distance_matrix_ped_mm_noEntr'],
"share_ped": rel_mode_distributionM['activity_node+distance_matrix_ped_noEntr'],
"share_ped_art": rel_mode_distributionM['activity_node+distance_matrix_ped_art_noEntr'],
# Tripshares by Distance Brackets
"800": dist_sharesM["800"],
"2400": dist_sharesM["2400"],
"4800": dist_sharesM["4800"],
# Travel Time & Distances
"totalTravelDistance":totalTravelDistance,
"totalTravelTime":totalTravelTime,
"MeanTravelTimePerPerson":MeanTravelTimePerPerson,
# Trip Distances
"MeanTripDistance":MeanTripDistance,
"MeanTripTime":MeanTripTime,
"MeanTravelDistancePerPerson":MeanTravelDistancePerPerson,
}
trip_matrice_adjName = {k.replace("distance", "trip"):v for k, v in trip_matricesM.items()}
trip_matrice_adjName["tripMatrix_landuse"] = df_tripMatrix_adjusted_scaledM
trip_matrice_adjName["tripMatrix_uniform"] = uniform_tripmatrix
extraData = {"population":total_population,
"residents":tot_res,
"visitors":tot_vis,
"parameter":newdata,
}
commitMsg = "HuggingFace update"
try:
commitMsg += " using these commits: #+ "
for k,v in sourceInfo.items():
commitMsg += f" {k}: {v}"
except:
pass
print(commitMsg)
commit_id = send_matrices_and_create_commit(
trip_matrice_adjName,
CLIENT,
TARGET_STREAM,
TARGET_BRANCH,
commitMsg,
rows_per_chunk=300,
containerMetadata=extraData
)
print ("===============================")
return newdata
#==================================================================================================
# speckle send
def send_row_bundle(rows, indices, transport):
bundle_object = Base()
bundle_object.rows = rows
bundle_object.indices = indices
bundle_id = operations.send(base=bundle_object, transports=[transport])
return bundle_id
def send_matrix(matrix_df, transport, rows_per_chunk):
matrix_object = Base(metaData="Some metadata")
batch_index = 0 # Maintain a separate counter for batch indexing
# Bundle rows together
rows = []
indices = []
for index, row in matrix_df.iterrows():
rows.append([round(r,4) for r in row.tolist()])
indices.append(str(index))
if len(rows) == rows_per_chunk:
bundle_id = send_row_bundle(rows, indices, transport)
# Set the reference to the bundle in the matrix object using setattr
setattr(matrix_object, f"@batch_{batch_index}", {"referencedId": bundle_id})
rows, indices = [], [] # Reset for the next bundle
batch_index += 1 # Increment the batch index
print( str(rows_per_chunk) +" rows has been sent")
# Don't forget to send the last bundle if it's not empty
if rows:
bundle_id = send_row_bundle(rows, indices, transport)
setattr(matrix_object, f"@batch_{batch_index}", {"referencedId": bundle_id})
# Send the matrix object to Speckle
matrix_object_id = operations.send(base=matrix_object, transports=[transport])
return matrix_object_id
# Main function to send all matrices and create a commit
def send_matrices_and_create_commit(matrices, client, stream_id, branch_name, commit_message, rows_per_chunk, containerMetadata):
transport = ServerTransport(client=client, stream_id=stream_id)
matrix_ids = {}
# Send each matrix row by row and store its object ID
for k, df in matrices.items():
matrix_ids[k] = send_matrix(df, transport, rows_per_chunk)
print("object: " + k + " has been sent")
# Create a container object that will hold references to all the matrix objects
container_object = Base()
for k, v in containerMetadata.items():
container_object[k] = v
# Assuming you have a way to reference matrix objects by their IDs in Speckle
for k, obj_id in matrix_ids.items():
print("obj_id", obj_id)
container_object[k] = obj_id
# Dynamically add references to the container object
for matrix_name, matrix_id in matrix_ids.items():
# This assigns a reference to the matrix object by its ID
# You might need to adjust this based on how your Speckle server expects to receive references
setattr(container_object, matrix_name, {"referencedId": matrix_id})
# Send the container object
container_id = operations.send(base=container_object, transports=[transport])
# Now use the container_id when creating the commit
commit_id = client.commit.create(
stream_id=stream_id,
object_id=container_id, # Use the container's ID here
branch_name=branch_name,
message=commit_message,
)