Spaces:

metricspace
/

cabasus

Build error

File size: 4,580 Bytes


from funcs.tools import upsample_signal
from funcs.tools import process_signals
from funcs.tools import numpy_to_native
from funcs.plot_func import plot_slices
from funcs.tools import fill_missing_values

import json
import numpy as np
import pandas as pd

def slice_csv_to_json(input_file, slice_size=64, min_slice_size=16, sample_rate=20, window_size=20, debug=False):
    data = pd.read_csv(input_file, delimiter=";", index_col=0)
    original_data = data.copy()
    data = fill_missing_values(data, window_size=10)
    data.fillna(0, inplace=True)

    gz_columns = [col for col in data.columns if col.startswith("GZ")]
    all_peaks = []
    upsample_factor = sample_rate
    combined_smoothed_signals_upsampled = np.zeros(upsample_signal(data[gz_columns[0]].values, upsample_factor).size, dtype=float)
    for gz_col in gz_columns:
        gz_signal = data[gz_col].values
        upsampled_smoothed_signal, peaks = process_signals(gz_signal, upsample_factor, window_size=window_size)
        all_peaks.append(peaks)
        combined_smoothed_signals_upsampled += upsampled_smoothed_signal

    # Directly use the average peaks as precise_slice_points
    precise_slice_points = [np.mean(peaks) for peaks in zip(*all_peaks)]

    slices = []
    start_index = 0
    for i, precise_slice_point in enumerate(precise_slice_points):
        end_index = round(precise_slice_point / upsample_factor)
        if i == 0:
            start_index = end_index
            continue

        if end_index - start_index >= min_slice_size:
            slice_data = data.iloc[start_index:end_index].to_dict("list")

            if i > 1:
                # Compute precise_time_diff based on the differences between current and last precise_timestamps.
                precise_time_diff = (precise_slice_point - precise_slice_points[i - 1])
                precise_timestamp = slices[-1]["precise_timestamp"] + precise_time_diff

                # Compute the timestamp by finding the closest index in the original signal.
                timestamp = data.index.values[(np.abs(data.index.values - precise_timestamp)).argmin()]

                # Compute time_diff based on the differences between current and last timestamps.
                time_diff = timestamp - slices[-1]["timestamp"]

                # Compute precise_timestamp and precise_time_diff for each GZ channel individually
                for j, gz_col in enumerate(gz_columns):
                    #slice_data[f"{gz_col}_precise_timestamp"] = slices[-1][f"{gz_col}_precise_timestamp"] + all_peaks[j][i] - all_peaks[j][i - 1]
                    slice_data[f"{gz_col}_precise_time_diff"] = all_peaks[j][i] - all_peaks[j][i - 1]
            else:
                precise_timestamp = data.index.values[start_index]
                timestamp = precise_timestamp
                time_diff = None
                precise_time_diff = None

                # Initialize precise_timestamp and precise_time_diff for each GZ channel individually
                for j, gz_col in enumerate(gz_columns):
                    slice_data[f"{gz_col}_precise_timestamp"] = all_peaks[j][0]
                    slice_data[f"{gz_col}_precise_time_diff"] = None

            slice_data["precise_timestamp"] = precise_timestamp
            slice_data["timestamp"] = timestamp
            slice_data["time_diff"] = time_diff
            slice_data["precise_time_diff"] = precise_time_diff

            if end_index - start_index < slice_size:
                pad_size = slice_size - (end_index - start_index)
                for key in slice_data:
                    if key not in ["timestamp", "time_diff", "precise_timestamp", "precise_time_diff"] and not key.endswith("_precise_timestamp") and not key.endswith("_precise_time_diff"):
                        slice_data[key] = data[key].iloc[start_index - pad_size:start_index].tolist() + slice_data[key]
            else:
                for key in slice_data:
                    if key not in ["timestamp", "time_diff", "precise_timestamp", "precise_time_diff"] and not key.endswith("_precise_timestamp") and not key.endswith("_precise_time_diff"):
                        slice_data[key] = slice_data[key][:slice_size]
            slices.append(slice_data)

        start_index = end_index

    with open('output.json', "w") as f:
        json.dump(numpy_to_native(slices), f, indent=2)

    if debug:
        plot_slices(original_data[gz_columns[0]], data[gz_columns[0]], precise_slice_points, precise_slice_points, sample_rate, data.index.values[0])

    return 'output.json', len(slices)