File size: 3,563 Bytes
ce3dfc6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112

import time
from functools import wraps

def reconstruct_dataframe(alpha_low, alpha_med, alpha_high, original_df):
    # Define the mapping from original values to new alpha parameters
    value_to_alpha = {
        0.00191: alpha_low,
        0.00767: alpha_high,
        0.0038: alpha_med
    }

    # Check if each value is present at least once in the DataFrame
    for original_value in value_to_alpha.keys():
        if not (original_df == original_value).any().any():
            raise ValueError(f"Value {original_value} not found in the input DataFrame.")

    # Create a new DataFrame based on the original one
    new_df = original_df.copy()

    # Apply the mapping to each element in the DataFrame
    for original_value, new_value in value_to_alpha.items():
        new_df = new_df.replace(original_value, new_value)

    return new_df

def preprocess_dataFrame(df, headerRow_idx=0, numRowsStart_idx = None, numRowsEnd_idx=None, numColsStart_idx=None, numColsEnd_idx=None, rowNames_idx=None):
  df.columns = df.iloc[headerRow_idx] #Set the header
  if rowNames_idx is not None:
    df.index = df.iloc[:, rowNames_idx] #Set the row names
  df = df.iloc[numRowsStart_idx : numRowsEnd_idx, numColsStart_idx:numColsEnd_idx] #Slice the dataset to numerical data
  return df




def timeit(f):
    def timed(*args, **kw):
        ts = time.time()
        result = f(*args, **kw)
        te = time.time()
        print ('func:%r args:[%r, %r] took: %2.4f sec' % \
          (f.__name__, te-ts))
          #(f.__name__, args, kw, te-ts))
        return result
    return timed





def timing_decorator(func):
    @wraps(func)
    def wrapper(*args, **kwargs):
        start_time = time.time()
        result = func(*args, **kwargs)
        end_time = time.time()

        duration = end_time - start_time
        timestamp = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time))

        print(f"{func.__name__} took {duration:.4f} seconds. Finished at {timestamp}")
        return result

    return wrapper


# Function to compare two dataframes after converting and rounding
def compare_dataframes(df1, df2, decimals=8):
    # Function to convert DataFrame columns to float and then round
    def convert_and_round_dataframe(df, decimals):
        # Convert all columns to float
        df_float = df.astype(float)
        # Round to the specified number of decimals
        return df_float.round(decimals)

    rounded_df1 = convert_and_round_dataframe(df1, decimals)
    rounded_df2 = convert_and_round_dataframe(df2, decimals)

    are_equal = rounded_df1.equals(rounded_df2)

    print("Both methods are equal:", are_equal)

    print("Numba shape:", df2.shape)
    print("Original shape:", df1.shape)

    print("======== ORIGINAL OUTPUT (first item in output list, head() for the first 5 columns)")
    print(df1.iloc[0:5].head(2))

    print("======== New method OUTPUT (first item in output list, head() for the first 5 columns)")
    print(df2.iloc[0:5].head(2))


def align_dataframes(df1, df2, key):
    """
    Align two dataframes based on a common key, ensuring that both dataframes
    have only the rows with matching keys.

    Parameters:
    - df1: First dataframe.
    - df2: Second dataframe.
    - key: Column name to align dataframes on.

    Returns:
    - df1_aligned, df2_aligned: Tuple of aligned dataframes.
    """
    common_ids = df1.index.intersection(df2[key])
    df1_aligned = df1.loc[common_ids]
    df2_aligned = df2[df2[key].isin(common_ids)].set_index(key, drop=False)
    return df1_aligned, df2_aligned