Spaces:
Sleeping
Sleeping
File size: 3,563 Bytes
ce3dfc6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
import time
from functools import wraps
def reconstruct_dataframe(alpha_low, alpha_med, alpha_high, original_df):
# Define the mapping from original values to new alpha parameters
value_to_alpha = {
0.00191: alpha_low,
0.00767: alpha_high,
0.0038: alpha_med
}
# Check if each value is present at least once in the DataFrame
for original_value in value_to_alpha.keys():
if not (original_df == original_value).any().any():
raise ValueError(f"Value {original_value} not found in the input DataFrame.")
# Create a new DataFrame based on the original one
new_df = original_df.copy()
# Apply the mapping to each element in the DataFrame
for original_value, new_value in value_to_alpha.items():
new_df = new_df.replace(original_value, new_value)
return new_df
def preprocess_dataFrame(df, headerRow_idx=0, numRowsStart_idx = None, numRowsEnd_idx=None, numColsStart_idx=None, numColsEnd_idx=None, rowNames_idx=None):
df.columns = df.iloc[headerRow_idx] #Set the header
if rowNames_idx is not None:
df.index = df.iloc[:, rowNames_idx] #Set the row names
df = df.iloc[numRowsStart_idx : numRowsEnd_idx, numColsStart_idx:numColsEnd_idx] #Slice the dataset to numerical data
return df
def timeit(f):
def timed(*args, **kw):
ts = time.time()
result = f(*args, **kw)
te = time.time()
print ('func:%r args:[%r, %r] took: %2.4f sec' % \
(f.__name__, te-ts))
#(f.__name__, args, kw, te-ts))
return result
return timed
def timing_decorator(func):
@wraps(func)
def wrapper(*args, **kwargs):
start_time = time.time()
result = func(*args, **kwargs)
end_time = time.time()
duration = end_time - start_time
timestamp = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time))
print(f"{func.__name__} took {duration:.4f} seconds. Finished at {timestamp}")
return result
return wrapper
# Function to compare two dataframes after converting and rounding
def compare_dataframes(df1, df2, decimals=8):
# Function to convert DataFrame columns to float and then round
def convert_and_round_dataframe(df, decimals):
# Convert all columns to float
df_float = df.astype(float)
# Round to the specified number of decimals
return df_float.round(decimals)
rounded_df1 = convert_and_round_dataframe(df1, decimals)
rounded_df2 = convert_and_round_dataframe(df2, decimals)
are_equal = rounded_df1.equals(rounded_df2)
print("Both methods are equal:", are_equal)
print("Numba shape:", df2.shape)
print("Original shape:", df1.shape)
print("======== ORIGINAL OUTPUT (first item in output list, head() for the first 5 columns)")
print(df1.iloc[0:5].head(2))
print("======== New method OUTPUT (first item in output list, head() for the first 5 columns)")
print(df2.iloc[0:5].head(2))
def align_dataframes(df1, df2, key):
"""
Align two dataframes based on a common key, ensuring that both dataframes
have only the rows with matching keys.
Parameters:
- df1: First dataframe.
- df2: Second dataframe.
- key: Column name to align dataframes on.
Returns:
- df1_aligned, df2_aligned: Tuple of aligned dataframes.
"""
common_ids = df1.index.intersection(df2[key])
df1_aligned = df1.loc[common_ids]
df2_aligned = df2[df2[key].isin(common_ids)].set_index(key, drop=False)
return df1_aligned, df2_aligned
|