from funcs.tools import upsample_signal from funcs.tools import process_signals from funcs.tools import numpy_to_native from funcs.plot_func import plot_slices from funcs.tools import upsample_signal_v2 from funcs.tools import fill_missing_values import json import numpy as np import pandas as pd def slice_csv_to_json(input_file, slice_size=64, min_slice_size=16, sample_rate=20, window_size=20, debug=False): data = pd.read_csv(input_file, delimiter=";", index_col=0) original_data = data.copy() data = fill_missing_values(data, window_size=10) data.fillna(0, inplace=True) gz_columns = [col for col in data.columns if col.startswith("GZ")] all_peaks = [] # upsample_factor = sample_rate combined_smoothed_signals_upsampled = np.zeros(upsample_signal(data[gz_columns[0]].values, sample_rate).size, dtype=float) for gz_col in gz_columns: gz_signal = data[gz_col].values upsampled_smoothed_signal, peaks = process_signals(gz_signal, sample_rate, window_size=window_size) all_peaks.append(peaks) combined_smoothed_signals_upsampled += upsampled_smoothed_signal # Directly use the average peaks as precise_slice_points precise_slice_points = [np.mean(peaks) for peaks in zip(*all_peaks)] slices = [] start_index = 0 list_time_diff_for_activation = [] for i, precise_slice_point in enumerate(precise_slice_points): end_index = round(precise_slice_point / sample_rate) if i == 0: start_index = end_index continue if end_index - start_index >= min_slice_size: slice_data = data.iloc[start_index:end_index].to_dict("list") if i > 1: # Compute precise_time_diff based on the differences between current and last precise_timestamps. precise_time_diff = (precise_slice_point - precise_slice_points[i - 1]) precise_timestamp = slices[-1]["precise_timestamp"] + precise_time_diff # Compute the timestamp by finding the closest index in the original signal. timestamp = data.index.values[(np.abs(data.index.values - precise_timestamp)).argmin()] # Compute time_diff based on the differences between current and last timestamps. time_diff = timestamp - slices[-1]["timestamp"] # Compute precise_timestamp and precise_time_diff for each GZ channel individually for j, gz_col in enumerate(gz_columns): slice_data[f"{gz_col}_precise_time_diff"] = all_peaks[j][i] - all_peaks[j][i - 1] else: precise_timestamp = data.index.values[start_index] timestamp = precise_timestamp time_diff = None precise_time_diff = None # Initialize precise_timestamp and precise_time_diff for each GZ channel individually for j, gz_col in enumerate(gz_columns): slice_data[f"{gz_col}_precise_timestamp"] = all_peaks[j][0] slice_data[f"{gz_col}_precise_time_diff"] = None slice_data["precise_timestamp"] = precise_timestamp slice_data["timestamp"] = timestamp slice_data["time_diff"] = time_diff slice_data["precise_time_diff"] = precise_time_diff list_time_diff_for_activation.append(slice_data["precise_time_diff"]) if end_index - start_index < slice_size: pad_size = slice_size - (end_index - start_index) for key in slice_data: if key not in ["timestamp", "time_diff", "precise_timestamp", "precise_time_diff"] and not key.endswith("_precise_timestamp") and not key.endswith("_precise_time_diff"): slice_data[key] = data[key].iloc[start_index - pad_size:start_index].tolist() + slice_data[key] else: for key in slice_data: if key not in ["timestamp", "time_diff", "precise_timestamp", "precise_time_diff"] and not key.endswith("_precise_timestamp") and not key.endswith("_precise_time_diff"): slice_data[key] = slice_data[key][:slice_size] slices.append(slice_data) start_index = end_index with open('output.json', "w") as f: json.dump(numpy_to_native(slices), f, indent=2) if debug: plot_slices(original_data[gz_columns[0]], data[gz_columns[0]], precise_slice_points, precise_slice_points, sample_rate, data.index.values[0]) return 'output.json', len(slices), list_time_diff_for_activation def slice_csv_to_json_v2(input_file, slice_size=64, min_slice_size=10, sample_rate=20): data = pd.read_csv(input_file, delimiter=";", index_col=0) original_data = data.copy() data = fill_missing_values(data) data.fillna(0, inplace=True) gz_columns = [col for col in data.columns if col.startswith("GZ")] all_peaks = [] upsample_factor = sample_rate combined_smoothed_signals_upsampled = np.zeros(upsample_signal_v2(data[gz_columns[0]].values, upsample_factor).size, dtype=float) process_signals_failed = False for gz_col in gz_columns: gz_signal = data[gz_col].values upsampled_smoothed_signal, peaks = process_signals(gz_signal, upsample_factor) if upsampled_smoothed_signal is None or peaks is None: if debug: print(f"Skipping {gz_col} due to empty or too short signal") continue all_peaks.append(peaks) combined_smoothed_signals_upsampled += upsampled_smoothed_signal gz_signal = data[gz_col].values try: upsampled_smoothed_signal, peaks = process_signals(gz_signal, upsample_factor) all_peaks.append(peaks) combined_smoothed_signals_upsampled += upsampled_smoothed_signal except Exception as e: # Catch any exception from process_signals process_signals_failed = True break if process_signals_failed or not any(len(peaks) > 0 for peaks in all_peaks): precise_loop_points = list(range(0, len(data), slice_size)) else: precise_loop_points = [np.mean(peaks) for peaks in zip(*all_peaks)] precise_slice_points = [] for i in range(len(precise_loop_points) - 1, -1,-1): interval_end = int(precise_loop_points[i]) if i > 0: interval_start = int(precise_loop_points[i - 1]) else: interval_start = 0 max_value_index = np.argmax(combined_smoothed_signals_upsampled[interval_start:interval_end]) + interval_start precise_slice_points.append(max_value_index) precise_slice_points.reverse() slices = [] start_index = 0 for i, precise_slice_point in enumerate(precise_slice_points): end_index = round(precise_slice_point / upsample_factor) if i == 0: start_index = end_index continue if end_index - start_index >= min_slice_size: if i == len(precise_slice_points) - 1 and end_index - start_index < slice_size: break slice_data = data.iloc[start_index:end_index].to_dict("list") if i > 1 and not process_signals_failed: precise_time_diff = (precise_slice_point - precise_slice_points[i - 1]) precise_timestamp = slices[-1]["precise_timestamp"] + precise_time_diff timestamp = data.index.values[(np.abs(data.index.values - precise_timestamp)).argmin()] time_diff = timestamp - slices[-1]["timestamp"] for j, gz_col in enumerate(gz_columns): slice_data[f"{gz_col}_precise_time_diff"] = all_peaks[j][i] - all_peaks[j][i - 1] else: precise_timestamp = data.index.values[start_index] timestamp = precise_timestamp time_diff = None precise_time_diff = None for j, gz_col in enumerate(gz_columns): slice_data[f"{gz_col}_precise_timestamp"] = all_peaks[j][0] if not process_signals_failed else None slice_data[f"{gz_col}_precise_time_diff"] = None slice_data["precise_timestamp"] = precise_timestamp slice_data["timestamp"] = timestamp slice_data["time_diff"] = time_diff slice_data["precise_time_diff"] = precise_time_diff if end_index - start_index < slice_size: pad_size = slice_size - (end_index - start_index) for key in slice_data: if key not in ["timestamp", "time_diff", "precise_timestamp", "precise_time_diff"] and not key.endswith("_precise_timestamp") and not key.endswith("_precise_time_diff"): slice_data[key] = data[key].iloc[start_index - pad_size:start_index].tolist() + slice_data[key] else: for key in slice_data: if key not in ["timestamp", "time_diff", "precise_timestamp", "precise_time_diff"] and not key.endswith("_precise_timestamp") and not key.endswith("_precise_time_diff"): slice_data[key] = slice_data[key][:slice_size] slices.append(slice_data) start_index = end_index with open('output.json', "w") as f: json.dump(numpy_to_native(slices), f, indent=2) return 'output.json', len(slices)