Spaces:
Build error
Build error
File size: 9,472 Bytes
a5bd089 b37ce89 a5bd089 63e4f49 27ebbbd a5bd089 f9e67d5 a5bd089 f9e67d5 a5bd089 fa2a5c2 a5bd089 f9e67d5 a5bd089 fa2a5c2 a5bd089 fa2a5c2 b37ce89 a5bd089 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 |
from funcs.tools import upsample_signal
from funcs.tools import process_signals
from funcs.tools import numpy_to_native
from funcs.plot_func import plot_slices
from funcs.tools import upsample_signal_v2
from funcs.tools import fill_missing_values
import json
import numpy as np
import pandas as pd
def slice_csv_to_json(input_file, slice_size=64, min_slice_size=16, sample_rate=20, window_size=20, debug=False):
data = pd.read_csv(input_file, delimiter=";", index_col=0)
original_data = data.copy()
data = fill_missing_values(data, window_size=10)
data.fillna(0, inplace=True)
gz_columns = [col for col in data.columns if col.startswith("GZ")]
all_peaks = []
# upsample_factor = sample_rate
combined_smoothed_signals_upsampled = np.zeros(upsample_signal(data[gz_columns[0]].values, sample_rate).size, dtype=float)
for gz_col in gz_columns:
gz_signal = data[gz_col].values
upsampled_smoothed_signal, peaks = process_signals(gz_signal, sample_rate, window_size=window_size)
all_peaks.append(peaks)
combined_smoothed_signals_upsampled += upsampled_smoothed_signal
# Directly use the average peaks as precise_slice_points
precise_slice_points = [np.mean(peaks) for peaks in zip(*all_peaks)]
slices = []
start_index = 0
list_time_diff_for_activation = []
for i, precise_slice_point in enumerate(precise_slice_points):
end_index = round(precise_slice_point / sample_rate)
if i == 0:
start_index = end_index
continue
if end_index - start_index >= min_slice_size:
slice_data = data.iloc[start_index:end_index].to_dict("list")
if i > 1:
# Compute precise_time_diff based on the differences between current and last precise_timestamps.
precise_time_diff = (precise_slice_point - precise_slice_points[i - 1])
precise_timestamp = slices[-1]["precise_timestamp"] + precise_time_diff
# Compute the timestamp by finding the closest index in the original signal.
timestamp = data.index.values[(np.abs(data.index.values - precise_timestamp)).argmin()]
# Compute time_diff based on the differences between current and last timestamps.
time_diff = timestamp - slices[-1]["timestamp"]
# Compute precise_timestamp and precise_time_diff for each GZ channel individually
for j, gz_col in enumerate(gz_columns):
slice_data[f"{gz_col}_precise_time_diff"] = all_peaks[j][i] - all_peaks[j][i - 1]
else:
precise_timestamp = data.index.values[start_index]
timestamp = precise_timestamp
time_diff = None
precise_time_diff = None
# Initialize precise_timestamp and precise_time_diff for each GZ channel individually
for j, gz_col in enumerate(gz_columns):
slice_data[f"{gz_col}_precise_timestamp"] = all_peaks[j][0]
slice_data[f"{gz_col}_precise_time_diff"] = None
slice_data["precise_timestamp"] = precise_timestamp
slice_data["timestamp"] = timestamp
slice_data["time_diff"] = time_diff
slice_data["precise_time_diff"] = precise_time_diff
list_time_diff_for_activation.append(slice_data["precise_time_diff"])
if end_index - start_index < slice_size:
pad_size = slice_size - (end_index - start_index)
for key in slice_data:
if key not in ["timestamp", "time_diff", "precise_timestamp", "precise_time_diff"] and not key.endswith("_precise_timestamp") and not key.endswith("_precise_time_diff"):
slice_data[key] = data[key].iloc[start_index - pad_size:start_index].tolist() + slice_data[key]
else:
for key in slice_data:
if key not in ["timestamp", "time_diff", "precise_timestamp", "precise_time_diff"] and not key.endswith("_precise_timestamp") and not key.endswith("_precise_time_diff"):
slice_data[key] = slice_data[key][:slice_size]
slices.append(slice_data)
start_index = end_index
with open('output.json', "w") as f:
json.dump(numpy_to_native(slices), f, indent=2)
if debug:
plot_slices(original_data[gz_columns[0]], data[gz_columns[0]], precise_slice_points, precise_slice_points, sample_rate, data.index.values[0])
return 'output.json', len(slices), list_time_diff_for_activation
def slice_csv_to_json_v2(input_file, slice_size=64, min_slice_size=10, sample_rate=20):
data = pd.read_csv(input_file, delimiter=";", index_col=0)
original_data = data.copy()
data = fill_missing_values(data)
data.fillna(0, inplace=True)
gz_columns = [col for col in data.columns if col.startswith("GZ")]
all_peaks = []
upsample_factor = sample_rate
combined_smoothed_signals_upsampled = np.zeros(upsample_signal_v2(data[gz_columns[0]].values, upsample_factor).size, dtype=float)
process_signals_failed = False
for gz_col in gz_columns:
gz_signal = data[gz_col].values
upsampled_smoothed_signal, peaks = process_signals(gz_signal, upsample_factor)
if upsampled_smoothed_signal is None or peaks is None:
if debug:
print(f"Skipping {gz_col} due to empty or too short signal")
continue
all_peaks.append(peaks)
combined_smoothed_signals_upsampled += upsampled_smoothed_signal
gz_signal = data[gz_col].values
try:
upsampled_smoothed_signal, peaks = process_signals(gz_signal, upsample_factor)
all_peaks.append(peaks)
combined_smoothed_signals_upsampled += upsampled_smoothed_signal
except Exception as e: # Catch any exception from process_signals
process_signals_failed = True
break
if process_signals_failed or not any(len(peaks) > 0 for peaks in all_peaks):
precise_loop_points = list(range(0, len(data), slice_size))
else:
precise_loop_points = [np.mean(peaks) for peaks in zip(*all_peaks)]
precise_slice_points = []
for i in range(len(precise_loop_points) - 1, -1,-1):
interval_end = int(precise_loop_points[i])
if i > 0:
interval_start = int(precise_loop_points[i - 1])
else:
interval_start = 0
max_value_index = np.argmax(combined_smoothed_signals_upsampled[interval_start:interval_end]) + interval_start
precise_slice_points.append(max_value_index)
precise_slice_points.reverse()
slices = []
start_index = 0
for i, precise_slice_point in enumerate(precise_slice_points):
end_index = round(precise_slice_point / upsample_factor)
if i == 0:
start_index = end_index
continue
if end_index - start_index >= min_slice_size:
if i == len(precise_slice_points) - 1 and end_index - start_index < slice_size:
break
slice_data = data.iloc[start_index:end_index].to_dict("list")
if i > 1 and not process_signals_failed:
precise_time_diff = (precise_slice_point - precise_slice_points[i - 1])
precise_timestamp = slices[-1]["precise_timestamp"] + precise_time_diff
timestamp = data.index.values[(np.abs(data.index.values - precise_timestamp)).argmin()]
time_diff = timestamp - slices[-1]["timestamp"]
for j, gz_col in enumerate(gz_columns):
slice_data[f"{gz_col}_precise_time_diff"] = all_peaks[j][i] - all_peaks[j][i - 1]
else:
precise_timestamp = data.index.values[start_index]
timestamp = precise_timestamp
time_diff = None
precise_time_diff = None
for j, gz_col in enumerate(gz_columns):
slice_data[f"{gz_col}_precise_timestamp"] = all_peaks[j][0] if not process_signals_failed else None
slice_data[f"{gz_col}_precise_time_diff"] = None
slice_data["precise_timestamp"] = precise_timestamp
slice_data["timestamp"] = timestamp
slice_data["time_diff"] = time_diff
slice_data["precise_time_diff"] = precise_time_diff
if end_index - start_index < slice_size:
pad_size = slice_size - (end_index - start_index)
for key in slice_data:
if key not in ["timestamp", "time_diff", "precise_timestamp", "precise_time_diff"] and not key.endswith("_precise_timestamp") and not key.endswith("_precise_time_diff"):
slice_data[key] = data[key].iloc[start_index - pad_size:start_index].tolist() + slice_data[key]
else:
for key in slice_data:
if key not in ["timestamp", "time_diff", "precise_timestamp", "precise_time_diff"] and not key.endswith("_precise_timestamp") and not key.endswith("_precise_time_diff"):
slice_data[key] = slice_data[key][:slice_size]
slices.append(slice_data)
start_index = end_index
with open('output.json', "w") as f:
json.dump(numpy_to_native(slices), f, indent=2)
return 'output.json', len(slices) |