cabasus / funcs /convertors.py
arcan3's picture
added revision
fa2a5c2
from funcs.tools import upsample_signal
from funcs.tools import process_signals
from funcs.tools import numpy_to_native
from funcs.plot_func import plot_slices
from funcs.tools import upsample_signal_v2
from funcs.tools import fill_missing_values
import json
import numpy as np
import pandas as pd
def slice_csv_to_json(input_file, slice_size=64, min_slice_size=16, sample_rate=20, window_size=20, debug=False):
data = pd.read_csv(input_file, delimiter=";", index_col=0)
original_data = data.copy()
data = fill_missing_values(data, window_size=10)
data.fillna(0, inplace=True)
gz_columns = [col for col in data.columns if col.startswith("GZ")]
all_peaks = []
# upsample_factor = sample_rate
combined_smoothed_signals_upsampled = np.zeros(upsample_signal(data[gz_columns[0]].values, sample_rate).size, dtype=float)
for gz_col in gz_columns:
gz_signal = data[gz_col].values
upsampled_smoothed_signal, peaks = process_signals(gz_signal, sample_rate, window_size=window_size)
all_peaks.append(peaks)
combined_smoothed_signals_upsampled += upsampled_smoothed_signal
# Directly use the average peaks as precise_slice_points
precise_slice_points = [np.mean(peaks) for peaks in zip(*all_peaks)]
slices = []
start_index = 0
list_time_diff_for_activation = []
for i, precise_slice_point in enumerate(precise_slice_points):
end_index = round(precise_slice_point / sample_rate)
if i == 0:
start_index = end_index
continue
if end_index - start_index >= min_slice_size:
slice_data = data.iloc[start_index:end_index].to_dict("list")
if i > 1:
# Compute precise_time_diff based on the differences between current and last precise_timestamps.
precise_time_diff = (precise_slice_point - precise_slice_points[i - 1])
precise_timestamp = slices[-1]["precise_timestamp"] + precise_time_diff
# Compute the timestamp by finding the closest index in the original signal.
timestamp = data.index.values[(np.abs(data.index.values - precise_timestamp)).argmin()]
# Compute time_diff based on the differences between current and last timestamps.
time_diff = timestamp - slices[-1]["timestamp"]
# Compute precise_timestamp and precise_time_diff for each GZ channel individually
for j, gz_col in enumerate(gz_columns):
slice_data[f"{gz_col}_precise_time_diff"] = all_peaks[j][i] - all_peaks[j][i - 1]
else:
precise_timestamp = data.index.values[start_index]
timestamp = precise_timestamp
time_diff = None
precise_time_diff = None
# Initialize precise_timestamp and precise_time_diff for each GZ channel individually
for j, gz_col in enumerate(gz_columns):
slice_data[f"{gz_col}_precise_timestamp"] = all_peaks[j][0]
slice_data[f"{gz_col}_precise_time_diff"] = None
slice_data["precise_timestamp"] = precise_timestamp
slice_data["timestamp"] = timestamp
slice_data["time_diff"] = time_diff
slice_data["precise_time_diff"] = precise_time_diff
list_time_diff_for_activation.append(slice_data["precise_time_diff"])
if end_index - start_index < slice_size:
pad_size = slice_size - (end_index - start_index)
for key in slice_data:
if key not in ["timestamp", "time_diff", "precise_timestamp", "precise_time_diff"] and not key.endswith("_precise_timestamp") and not key.endswith("_precise_time_diff"):
slice_data[key] = data[key].iloc[start_index - pad_size:start_index].tolist() + slice_data[key]
else:
for key in slice_data:
if key not in ["timestamp", "time_diff", "precise_timestamp", "precise_time_diff"] and not key.endswith("_precise_timestamp") and not key.endswith("_precise_time_diff"):
slice_data[key] = slice_data[key][:slice_size]
slices.append(slice_data)
start_index = end_index
with open('output.json', "w") as f:
json.dump(numpy_to_native(slices), f, indent=2)
if debug:
plot_slices(original_data[gz_columns[0]], data[gz_columns[0]], precise_slice_points, precise_slice_points, sample_rate, data.index.values[0])
return 'output.json', len(slices), list_time_diff_for_activation
def slice_csv_to_json_v2(input_file, slice_size=64, min_slice_size=10, sample_rate=20):
data = pd.read_csv(input_file, delimiter=";", index_col=0)
original_data = data.copy()
data = fill_missing_values(data)
data.fillna(0, inplace=True)
gz_columns = [col for col in data.columns if col.startswith("GZ")]
all_peaks = []
upsample_factor = sample_rate
combined_smoothed_signals_upsampled = np.zeros(upsample_signal_v2(data[gz_columns[0]].values, upsample_factor).size, dtype=float)
process_signals_failed = False
for gz_col in gz_columns:
gz_signal = data[gz_col].values
upsampled_smoothed_signal, peaks = process_signals(gz_signal, upsample_factor)
if upsampled_smoothed_signal is None or peaks is None:
if debug:
print(f"Skipping {gz_col} due to empty or too short signal")
continue
all_peaks.append(peaks)
combined_smoothed_signals_upsampled += upsampled_smoothed_signal
gz_signal = data[gz_col].values
try:
upsampled_smoothed_signal, peaks = process_signals(gz_signal, upsample_factor)
all_peaks.append(peaks)
combined_smoothed_signals_upsampled += upsampled_smoothed_signal
except Exception as e: # Catch any exception from process_signals
process_signals_failed = True
break
if process_signals_failed or not any(len(peaks) > 0 for peaks in all_peaks):
precise_loop_points = list(range(0, len(data), slice_size))
else:
precise_loop_points = [np.mean(peaks) for peaks in zip(*all_peaks)]
precise_slice_points = []
for i in range(len(precise_loop_points) - 1, -1,-1):
interval_end = int(precise_loop_points[i])
if i > 0:
interval_start = int(precise_loop_points[i - 1])
else:
interval_start = 0
max_value_index = np.argmax(combined_smoothed_signals_upsampled[interval_start:interval_end]) + interval_start
precise_slice_points.append(max_value_index)
precise_slice_points.reverse()
slices = []
start_index = 0
for i, precise_slice_point in enumerate(precise_slice_points):
end_index = round(precise_slice_point / upsample_factor)
if i == 0:
start_index = end_index
continue
if end_index - start_index >= min_slice_size:
if i == len(precise_slice_points) - 1 and end_index - start_index < slice_size:
break
slice_data = data.iloc[start_index:end_index].to_dict("list")
if i > 1 and not process_signals_failed:
precise_time_diff = (precise_slice_point - precise_slice_points[i - 1])
precise_timestamp = slices[-1]["precise_timestamp"] + precise_time_diff
timestamp = data.index.values[(np.abs(data.index.values - precise_timestamp)).argmin()]
time_diff = timestamp - slices[-1]["timestamp"]
for j, gz_col in enumerate(gz_columns):
slice_data[f"{gz_col}_precise_time_diff"] = all_peaks[j][i] - all_peaks[j][i - 1]
else:
precise_timestamp = data.index.values[start_index]
timestamp = precise_timestamp
time_diff = None
precise_time_diff = None
for j, gz_col in enumerate(gz_columns):
slice_data[f"{gz_col}_precise_timestamp"] = all_peaks[j][0] if not process_signals_failed else None
slice_data[f"{gz_col}_precise_time_diff"] = None
slice_data["precise_timestamp"] = precise_timestamp
slice_data["timestamp"] = timestamp
slice_data["time_diff"] = time_diff
slice_data["precise_time_diff"] = precise_time_diff
if end_index - start_index < slice_size:
pad_size = slice_size - (end_index - start_index)
for key in slice_data:
if key not in ["timestamp", "time_diff", "precise_timestamp", "precise_time_diff"] and not key.endswith("_precise_timestamp") and not key.endswith("_precise_time_diff"):
slice_data[key] = data[key].iloc[start_index - pad_size:start_index].tolist() + slice_data[key]
else:
for key in slice_data:
if key not in ["timestamp", "time_diff", "precise_timestamp", "precise_time_diff"] and not key.endswith("_precise_timestamp") and not key.endswith("_precise_time_diff"):
slice_data[key] = slice_data[key][:slice_size]
slices.append(slice_data)
start_index = end_index
with open('output.json', "w") as f:
json.dump(numpy_to_native(slices), f, indent=2)
return 'output.json', len(slices)