Spaces:
Build error
Build error
from funcs.tools import upsample_signal | |
from funcs.tools import process_signals | |
from funcs.tools import numpy_to_native | |
from funcs.plot_func import plot_slices | |
from funcs.tools import upsample_signal_v2 | |
from funcs.tools import fill_missing_values | |
import json | |
import numpy as np | |
import pandas as pd | |
def slice_csv_to_json(input_file, slice_size=64, min_slice_size=16, sample_rate=20, window_size=20, debug=False): | |
data = pd.read_csv(input_file, delimiter=";", index_col=0) | |
original_data = data.copy() | |
data = fill_missing_values(data, window_size=10) | |
data.fillna(0, inplace=True) | |
gz_columns = [col for col in data.columns if col.startswith("GZ")] | |
all_peaks = [] | |
# upsample_factor = sample_rate | |
combined_smoothed_signals_upsampled = np.zeros(upsample_signal(data[gz_columns[0]].values, sample_rate).size, dtype=float) | |
for gz_col in gz_columns: | |
gz_signal = data[gz_col].values | |
upsampled_smoothed_signal, peaks = process_signals(gz_signal, sample_rate, window_size=window_size) | |
all_peaks.append(peaks) | |
combined_smoothed_signals_upsampled += upsampled_smoothed_signal | |
# Directly use the average peaks as precise_slice_points | |
precise_slice_points = [np.mean(peaks) for peaks in zip(*all_peaks)] | |
slices = [] | |
start_index = 0 | |
list_time_diff_for_activation = [] | |
for i, precise_slice_point in enumerate(precise_slice_points): | |
end_index = round(precise_slice_point / sample_rate) | |
if i == 0: | |
start_index = end_index | |
continue | |
if end_index - start_index >= min_slice_size: | |
slice_data = data.iloc[start_index:end_index].to_dict("list") | |
if i > 1: | |
# Compute precise_time_diff based on the differences between current and last precise_timestamps. | |
precise_time_diff = (precise_slice_point - precise_slice_points[i - 1]) | |
precise_timestamp = slices[-1]["precise_timestamp"] + precise_time_diff | |
# Compute the timestamp by finding the closest index in the original signal. | |
timestamp = data.index.values[(np.abs(data.index.values - precise_timestamp)).argmin()] | |
# Compute time_diff based on the differences between current and last timestamps. | |
time_diff = timestamp - slices[-1]["timestamp"] | |
# Compute precise_timestamp and precise_time_diff for each GZ channel individually | |
for j, gz_col in enumerate(gz_columns): | |
slice_data[f"{gz_col}_precise_time_diff"] = all_peaks[j][i] - all_peaks[j][i - 1] | |
else: | |
precise_timestamp = data.index.values[start_index] | |
timestamp = precise_timestamp | |
time_diff = None | |
precise_time_diff = None | |
# Initialize precise_timestamp and precise_time_diff for each GZ channel individually | |
for j, gz_col in enumerate(gz_columns): | |
slice_data[f"{gz_col}_precise_timestamp"] = all_peaks[j][0] | |
slice_data[f"{gz_col}_precise_time_diff"] = None | |
slice_data["precise_timestamp"] = precise_timestamp | |
slice_data["timestamp"] = timestamp | |
slice_data["time_diff"] = time_diff | |
slice_data["precise_time_diff"] = precise_time_diff | |
list_time_diff_for_activation.append(slice_data["precise_time_diff"]) | |
if end_index - start_index < slice_size: | |
pad_size = slice_size - (end_index - start_index) | |
for key in slice_data: | |
if key not in ["timestamp", "time_diff", "precise_timestamp", "precise_time_diff"] and not key.endswith("_precise_timestamp") and not key.endswith("_precise_time_diff"): | |
slice_data[key] = data[key].iloc[start_index - pad_size:start_index].tolist() + slice_data[key] | |
else: | |
for key in slice_data: | |
if key not in ["timestamp", "time_diff", "precise_timestamp", "precise_time_diff"] and not key.endswith("_precise_timestamp") and not key.endswith("_precise_time_diff"): | |
slice_data[key] = slice_data[key][:slice_size] | |
slices.append(slice_data) | |
start_index = end_index | |
with open('output.json', "w") as f: | |
json.dump(numpy_to_native(slices), f, indent=2) | |
if debug: | |
plot_slices(original_data[gz_columns[0]], data[gz_columns[0]], precise_slice_points, precise_slice_points, sample_rate, data.index.values[0]) | |
return 'output.json', len(slices), list_time_diff_for_activation | |
def slice_csv_to_json_v2(input_file, slice_size=64, min_slice_size=10, sample_rate=20): | |
data = pd.read_csv(input_file, delimiter=";", index_col=0) | |
original_data = data.copy() | |
data = fill_missing_values(data) | |
data.fillna(0, inplace=True) | |
gz_columns = [col for col in data.columns if col.startswith("GZ")] | |
all_peaks = [] | |
upsample_factor = sample_rate | |
combined_smoothed_signals_upsampled = np.zeros(upsample_signal_v2(data[gz_columns[0]].values, upsample_factor).size, dtype=float) | |
process_signals_failed = False | |
for gz_col in gz_columns: | |
gz_signal = data[gz_col].values | |
upsampled_smoothed_signal, peaks = process_signals(gz_signal, upsample_factor) | |
if upsampled_smoothed_signal is None or peaks is None: | |
if debug: | |
print(f"Skipping {gz_col} due to empty or too short signal") | |
continue | |
all_peaks.append(peaks) | |
combined_smoothed_signals_upsampled += upsampled_smoothed_signal | |
gz_signal = data[gz_col].values | |
try: | |
upsampled_smoothed_signal, peaks = process_signals(gz_signal, upsample_factor) | |
all_peaks.append(peaks) | |
combined_smoothed_signals_upsampled += upsampled_smoothed_signal | |
except Exception as e: # Catch any exception from process_signals | |
process_signals_failed = True | |
break | |
if process_signals_failed or not any(len(peaks) > 0 for peaks in all_peaks): | |
precise_loop_points = list(range(0, len(data), slice_size)) | |
else: | |
precise_loop_points = [np.mean(peaks) for peaks in zip(*all_peaks)] | |
precise_slice_points = [] | |
for i in range(len(precise_loop_points) - 1, -1,-1): | |
interval_end = int(precise_loop_points[i]) | |
if i > 0: | |
interval_start = int(precise_loop_points[i - 1]) | |
else: | |
interval_start = 0 | |
max_value_index = np.argmax(combined_smoothed_signals_upsampled[interval_start:interval_end]) + interval_start | |
precise_slice_points.append(max_value_index) | |
precise_slice_points.reverse() | |
slices = [] | |
start_index = 0 | |
for i, precise_slice_point in enumerate(precise_slice_points): | |
end_index = round(precise_slice_point / upsample_factor) | |
if i == 0: | |
start_index = end_index | |
continue | |
if end_index - start_index >= min_slice_size: | |
if i == len(precise_slice_points) - 1 and end_index - start_index < slice_size: | |
break | |
slice_data = data.iloc[start_index:end_index].to_dict("list") | |
if i > 1 and not process_signals_failed: | |
precise_time_diff = (precise_slice_point - precise_slice_points[i - 1]) | |
precise_timestamp = slices[-1]["precise_timestamp"] + precise_time_diff | |
timestamp = data.index.values[(np.abs(data.index.values - precise_timestamp)).argmin()] | |
time_diff = timestamp - slices[-1]["timestamp"] | |
for j, gz_col in enumerate(gz_columns): | |
slice_data[f"{gz_col}_precise_time_diff"] = all_peaks[j][i] - all_peaks[j][i - 1] | |
else: | |
precise_timestamp = data.index.values[start_index] | |
timestamp = precise_timestamp | |
time_diff = None | |
precise_time_diff = None | |
for j, gz_col in enumerate(gz_columns): | |
slice_data[f"{gz_col}_precise_timestamp"] = all_peaks[j][0] if not process_signals_failed else None | |
slice_data[f"{gz_col}_precise_time_diff"] = None | |
slice_data["precise_timestamp"] = precise_timestamp | |
slice_data["timestamp"] = timestamp | |
slice_data["time_diff"] = time_diff | |
slice_data["precise_time_diff"] = precise_time_diff | |
if end_index - start_index < slice_size: | |
pad_size = slice_size - (end_index - start_index) | |
for key in slice_data: | |
if key not in ["timestamp", "time_diff", "precise_timestamp", "precise_time_diff"] and not key.endswith("_precise_timestamp") and not key.endswith("_precise_time_diff"): | |
slice_data[key] = data[key].iloc[start_index - pad_size:start_index].tolist() + slice_data[key] | |
else: | |
for key in slice_data: | |
if key not in ["timestamp", "time_diff", "precise_timestamp", "precise_time_diff"] and not key.endswith("_precise_timestamp") and not key.endswith("_precise_time_diff"): | |
slice_data[key] = slice_data[key][:slice_size] | |
slices.append(slice_data) | |
start_index = end_index | |
with open('output.json', "w") as f: | |
json.dump(numpy_to_native(slices), f, indent=2) | |
return 'output.json', len(slices) |