File size: 9,472 Bytes
a5bd089
 
 
 
 
b37ce89
a5bd089
 
 
 
 
 
63e4f49
27ebbbd
 
 
a5bd089
 
 
 
f9e67d5
 
a5bd089
 
f9e67d5
a5bd089
 
 
 
 
 
 
 
fa2a5c2
a5bd089
f9e67d5
a5bd089
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fa2a5c2
 
a5bd089
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fa2a5c2
b37ce89
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a5bd089
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205

from funcs.tools import upsample_signal
from funcs.tools import process_signals
from funcs.tools import numpy_to_native
from funcs.plot_func import plot_slices
from funcs.tools import upsample_signal_v2
from funcs.tools import fill_missing_values

import json
import numpy as np
import pandas as pd

def slice_csv_to_json(input_file, slice_size=64, min_slice_size=16, sample_rate=20, window_size=20, debug=False):
    data            = pd.read_csv(input_file, delimiter=";", index_col=0)
    original_data   = data.copy()
    data            = fill_missing_values(data, window_size=10)
    data.fillna(0, inplace=True)

    gz_columns = [col for col in data.columns if col.startswith("GZ")]
    all_peaks = []
    # upsample_factor = sample_rate
    combined_smoothed_signals_upsampled = np.zeros(upsample_signal(data[gz_columns[0]].values, sample_rate).size, dtype=float)
    for gz_col in gz_columns:
        gz_signal = data[gz_col].values
        upsampled_smoothed_signal, peaks = process_signals(gz_signal, sample_rate, window_size=window_size)
        all_peaks.append(peaks)
        combined_smoothed_signals_upsampled += upsampled_smoothed_signal

    # Directly use the average peaks as precise_slice_points
    precise_slice_points = [np.mean(peaks) for peaks in zip(*all_peaks)]

    slices = []
    start_index = 0
    list_time_diff_for_activation = []
    for i, precise_slice_point in enumerate(precise_slice_points):
        end_index = round(precise_slice_point / sample_rate)
        if i == 0:
            start_index = end_index
            continue

        if end_index - start_index >= min_slice_size:
            slice_data = data.iloc[start_index:end_index].to_dict("list")

            if i > 1:
                # Compute precise_time_diff based on the differences between current and last precise_timestamps.
                precise_time_diff = (precise_slice_point - precise_slice_points[i - 1])
                precise_timestamp = slices[-1]["precise_timestamp"] + precise_time_diff

                # Compute the timestamp by finding the closest index in the original signal.
                timestamp = data.index.values[(np.abs(data.index.values - precise_timestamp)).argmin()]

                # Compute time_diff based on the differences between current and last timestamps.
                time_diff = timestamp - slices[-1]["timestamp"]

                # Compute precise_timestamp and precise_time_diff for each GZ channel individually
                for j, gz_col in enumerate(gz_columns):
                    slice_data[f"{gz_col}_precise_time_diff"] = all_peaks[j][i] - all_peaks[j][i - 1]
            else:
                precise_timestamp = data.index.values[start_index]
                timestamp = precise_timestamp
                time_diff = None
                precise_time_diff = None

                # Initialize precise_timestamp and precise_time_diff for each GZ channel individually
                for j, gz_col in enumerate(gz_columns):
                    slice_data[f"{gz_col}_precise_timestamp"] = all_peaks[j][0]
                    slice_data[f"{gz_col}_precise_time_diff"] = None

            slice_data["precise_timestamp"] = precise_timestamp
            slice_data["timestamp"] = timestamp
            slice_data["time_diff"] = time_diff
            slice_data["precise_time_diff"] = precise_time_diff
            list_time_diff_for_activation.append(slice_data["precise_time_diff"])


            if end_index - start_index < slice_size:
                pad_size = slice_size - (end_index - start_index)
                for key in slice_data:
                    if key not in ["timestamp", "time_diff", "precise_timestamp", "precise_time_diff"] and not key.endswith("_precise_timestamp") and not key.endswith("_precise_time_diff"):
                        slice_data[key] = data[key].iloc[start_index - pad_size:start_index].tolist() + slice_data[key]
            else:
                for key in slice_data:
                    if key not in ["timestamp", "time_diff", "precise_timestamp", "precise_time_diff"] and not key.endswith("_precise_timestamp") and not key.endswith("_precise_time_diff"):
                        slice_data[key] = slice_data[key][:slice_size]
            slices.append(slice_data)

        start_index = end_index

    with open('output.json', "w") as f:
        json.dump(numpy_to_native(slices), f, indent=2)

    if debug:
        plot_slices(original_data[gz_columns[0]], data[gz_columns[0]], precise_slice_points, precise_slice_points, sample_rate, data.index.values[0])

    return 'output.json', len(slices), list_time_diff_for_activation


def slice_csv_to_json_v2(input_file, slice_size=64, min_slice_size=10, sample_rate=20):
    data = pd.read_csv(input_file, delimiter=";", index_col=0)
    original_data = data.copy()
    data = fill_missing_values(data)
    data.fillna(0, inplace=True)

    gz_columns = [col for col in data.columns if col.startswith("GZ")]
    all_peaks = []
    upsample_factor = sample_rate
    combined_smoothed_signals_upsampled = np.zeros(upsample_signal_v2(data[gz_columns[0]].values, upsample_factor).size, dtype=float)

    process_signals_failed = False
    for gz_col in gz_columns:
        gz_signal = data[gz_col].values

        upsampled_smoothed_signal, peaks = process_signals(gz_signal, upsample_factor)
        if upsampled_smoothed_signal is None or peaks is None:
            if debug:
                print(f"Skipping {gz_col} due to empty or too short signal")
            continue

        all_peaks.append(peaks)
        combined_smoothed_signals_upsampled += upsampled_smoothed_signal
        gz_signal = data[gz_col].values

        try:
            upsampled_smoothed_signal, peaks = process_signals(gz_signal, upsample_factor)
            all_peaks.append(peaks)
            combined_smoothed_signals_upsampled += upsampled_smoothed_signal
        except Exception as e:  # Catch any exception from process_signals
            process_signals_failed = True
            break

    if process_signals_failed or not any(len(peaks) > 0 for peaks in all_peaks):
        precise_loop_points = list(range(0, len(data), slice_size))
    else:
        precise_loop_points = [np.mean(peaks) for peaks in zip(*all_peaks)]

    precise_slice_points = []
    for i in range(len(precise_loop_points) - 1, -1,-1):

        interval_end = int(precise_loop_points[i])
        if i > 0:
            interval_start = int(precise_loop_points[i - 1])
        else:
            interval_start = 0
        
        max_value_index = np.argmax(combined_smoothed_signals_upsampled[interval_start:interval_end]) + interval_start
        precise_slice_points.append(max_value_index)

    precise_slice_points.reverse()

    slices = []
    start_index = 0
    for i, precise_slice_point in enumerate(precise_slice_points):
        end_index = round(precise_slice_point / upsample_factor)
        if i == 0:
            start_index = end_index
            continue

        if end_index - start_index >= min_slice_size:
            if i == len(precise_slice_points) - 1 and end_index - start_index < slice_size:
                break

            slice_data = data.iloc[start_index:end_index].to_dict("list")

            if i > 1 and not process_signals_failed:
                precise_time_diff = (precise_slice_point - precise_slice_points[i - 1])
                precise_timestamp = slices[-1]["precise_timestamp"] + precise_time_diff

                timestamp = data.index.values[(np.abs(data.index.values - precise_timestamp)).argmin()]

                time_diff = timestamp - slices[-1]["timestamp"]

                for j, gz_col in enumerate(gz_columns):
                    slice_data[f"{gz_col}_precise_time_diff"] = all_peaks[j][i] - all_peaks[j][i - 1]
            else:
                precise_timestamp = data.index.values[start_index]
                timestamp = precise_timestamp
                time_diff = None
                precise_time_diff = None

                for j, gz_col in enumerate(gz_columns):
                    slice_data[f"{gz_col}_precise_timestamp"] = all_peaks[j][0] if not process_signals_failed else None
                    slice_data[f"{gz_col}_precise_time_diff"] = None

            slice_data["precise_timestamp"] = precise_timestamp
            slice_data["timestamp"] = timestamp
            slice_data["time_diff"] = time_diff
            slice_data["precise_time_diff"] = precise_time_diff

            if end_index - start_index < slice_size:
                pad_size = slice_size - (end_index - start_index)
                for key in slice_data:
                    if key not in ["timestamp", "time_diff", "precise_timestamp", "precise_time_diff"] and not key.endswith("_precise_timestamp") and not key.endswith("_precise_time_diff"):
                        slice_data[key] = data[key].iloc[start_index - pad_size:start_index].tolist() + slice_data[key]
            else:
                for key in slice_data:
                    if key not in ["timestamp", "time_diff", "precise_timestamp", "precise_time_diff"] and not key.endswith("_precise_timestamp") and not key.endswith("_precise_time_diff"):
                        slice_data[key] = slice_data[key][:slice_size]
            slices.append(slice_data)

        start_index = end_index
    
    with open('output.json', "w") as f:
        json.dump(numpy_to_native(slices), f, indent=2)

    return 'output.json', len(slices)