Spaces:
Build error
Build error
File size: 4,580 Bytes
a5bd089 63e4f49 a5bd089 63e4f49 a5bd089 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
from funcs.tools import upsample_signal
from funcs.tools import process_signals
from funcs.tools import numpy_to_native
from funcs.plot_func import plot_slices
from funcs.tools import fill_missing_values
import json
import numpy as np
import pandas as pd
def slice_csv_to_json(input_file, slice_size=64, min_slice_size=16, sample_rate=20, window_size=20, debug=False):
data = pd.read_csv(input_file, delimiter=";", index_col=0)
original_data = data.copy()
data = fill_missing_values(data, window_size=10)
data.fillna(0, inplace=True)
gz_columns = [col for col in data.columns if col.startswith("GZ")]
all_peaks = []
upsample_factor = sample_rate
combined_smoothed_signals_upsampled = np.zeros(upsample_signal(data[gz_columns[0]].values, upsample_factor).size, dtype=float)
for gz_col in gz_columns:
gz_signal = data[gz_col].values
upsampled_smoothed_signal, peaks = process_signals(gz_signal, upsample_factor, window_size=window_size)
all_peaks.append(peaks)
combined_smoothed_signals_upsampled += upsampled_smoothed_signal
# Directly use the average peaks as precise_slice_points
precise_slice_points = [np.mean(peaks) for peaks in zip(*all_peaks)]
slices = []
start_index = 0
for i, precise_slice_point in enumerate(precise_slice_points):
end_index = round(precise_slice_point / upsample_factor)
if i == 0:
start_index = end_index
continue
if end_index - start_index >= min_slice_size:
slice_data = data.iloc[start_index:end_index].to_dict("list")
if i > 1:
# Compute precise_time_diff based on the differences between current and last precise_timestamps.
precise_time_diff = (precise_slice_point - precise_slice_points[i - 1])
precise_timestamp = slices[-1]["precise_timestamp"] + precise_time_diff
# Compute the timestamp by finding the closest index in the original signal.
timestamp = data.index.values[(np.abs(data.index.values - precise_timestamp)).argmin()]
# Compute time_diff based on the differences between current and last timestamps.
time_diff = timestamp - slices[-1]["timestamp"]
# Compute precise_timestamp and precise_time_diff for each GZ channel individually
for j, gz_col in enumerate(gz_columns):
#slice_data[f"{gz_col}_precise_timestamp"] = slices[-1][f"{gz_col}_precise_timestamp"] + all_peaks[j][i] - all_peaks[j][i - 1]
slice_data[f"{gz_col}_precise_time_diff"] = all_peaks[j][i] - all_peaks[j][i - 1]
else:
precise_timestamp = data.index.values[start_index]
timestamp = precise_timestamp
time_diff = None
precise_time_diff = None
# Initialize precise_timestamp and precise_time_diff for each GZ channel individually
for j, gz_col in enumerate(gz_columns):
slice_data[f"{gz_col}_precise_timestamp"] = all_peaks[j][0]
slice_data[f"{gz_col}_precise_time_diff"] = None
slice_data["precise_timestamp"] = precise_timestamp
slice_data["timestamp"] = timestamp
slice_data["time_diff"] = time_diff
slice_data["precise_time_diff"] = precise_time_diff
if end_index - start_index < slice_size:
pad_size = slice_size - (end_index - start_index)
for key in slice_data:
if key not in ["timestamp", "time_diff", "precise_timestamp", "precise_time_diff"] and not key.endswith("_precise_timestamp") and not key.endswith("_precise_time_diff"):
slice_data[key] = data[key].iloc[start_index - pad_size:start_index].tolist() + slice_data[key]
else:
for key in slice_data:
if key not in ["timestamp", "time_diff", "precise_timestamp", "precise_time_diff"] and not key.endswith("_precise_timestamp") and not key.endswith("_precise_time_diff"):
slice_data[key] = slice_data[key][:slice_size]
slices.append(slice_data)
start_index = end_index
with open('output.json', "w") as f:
json.dump(numpy_to_native(slices), f, indent=2)
if debug:
plot_slices(original_data[gz_columns[0]], data[gz_columns[0]], precise_slice_points, precise_slice_points, sample_rate, data.index.values[0])
return 'output.json', len(slices) |