Spaces:

metricspace
/

cabasus

Build error

File size: 9,472 Bytes


from funcs.tools import upsample_signal
from funcs.tools import process_signals
from funcs.tools import numpy_to_native
from funcs.plot_func import plot_slices
from funcs.tools import upsample_signal_v2
from funcs.tools import fill_missing_values

import json
import numpy as np
import pandas as pd

def slice_csv_to_json(input_file, slice_size=64, min_slice_size=16, sample_rate=20, window_size=20, debug=False):
    data            = pd.read_csv(input_file, delimiter=";", index_col=0)
    original_data   = data.copy()
    data            = fill_missing_values(data, window_size=10)
    data.fillna(0, inplace=True)

    gz_columns = [col for col in data.columns if col.startswith("GZ")]
    all_peaks = []
    # upsample_factor = sample_rate
    combined_smoothed_signals_upsampled = np.zeros(upsample_signal(data[gz_columns[0]].values, sample_rate).size, dtype=float)
    for gz_col in gz_columns:
        gz_signal = data[gz_col].values
        upsampled_smoothed_signal, peaks = process_signals(gz_signal, sample_rate, window_size=window_size)
        all_peaks.append(peaks)
        combined_smoothed_signals_upsampled += upsampled_smoothed_signal

    # Directly use the average peaks as precise_slice_points
    precise_slice_points = [np.mean(peaks) for peaks in zip(*all_peaks)]

    slices = []
    start_index = 0
    list_time_diff_for_activation = []
    for i, precise_slice_point in enumerate(precise_slice_points):
        end_index = round(precise_slice_point / sample_rate)
        if i == 0:
            start_index = end_index
            continue

        if end_index - start_index >= min_slice_size:
            slice_data = data.iloc[start_index:end_index].to_dict("list")

            if i > 1:
                # Compute precise_time_diff based on the differences between current and last precise_timestamps.
                precise_time_diff = (precise_slice_point - precise_slice_points[i - 1])
                precise_timestamp = slices[-1]["precise_timestamp"] + precise_time_diff

                # Compute the timestamp by finding the closest index in the original signal.
                timestamp = data.index.values[(np.abs(data.index.values - precise_timestamp)).argmin()]

                # Compute time_diff based on the differences between current and last timestamps.
                time_diff = timestamp - slices[-1]["timestamp"]

                # Compute precise_timestamp and precise_time_diff for each GZ channel individually
                for j, gz_col in enumerate(gz_columns):
                    slice_data[f"{gz_col}_precise_time_diff"] = all_peaks[j][i] - all_peaks[j][i - 1]
            else:
                precise_timestamp = data.index.values[start_index]
                timestamp = precise_timestamp
                time_diff = None
                precise_time_diff = None

                # Initialize precise_timestamp and precise_time_diff for each GZ channel individually
                for j, gz_col in enumerate(gz_columns):
                    slice_data[f"{gz_col}_precise_timestamp"] = all_peaks[j][0]
                    slice_data[f"{gz_col}_precise_time_diff"] = None

            slice_data["precise_timestamp"] = precise_timestamp
            slice_data["timestamp"] = timestamp
            slice_data["time_diff"] = time_diff
            slice_data["precise_time_diff"] = precise_time_diff
            list_time_diff_for_activation.append(slice_data["precise_time_diff"])


            if end_index - start_index < slice_size:
                pad_size = slice_size - (end_index - start_index)
                for key in slice_data:
                    if key not in ["timestamp", "time_diff", "precise_timestamp", "precise_time_diff"] and not key.endswith("_precise_timestamp") and not key.endswith("_precise_time_diff"):
                        slice_data[key] = data[key].iloc[start_index - pad_size:start_index].tolist() + slice_data[key]
            else:
                for key in slice_data:
                    if key not in ["timestamp", "time_diff", "precise_timestamp", "precise_time_diff"] and not key.endswith("_precise_timestamp") and not key.endswith("_precise_time_diff"):
                        slice_data[key] = slice_data[key][:slice_size]
            slices.append(slice_data)

        start_index = end_index

    with open('output.json', "w") as f:
        json.dump(numpy_to_native(slices), f, indent=2)

    if debug:
        plot_slices(original_data[gz_columns[0]], data[gz_columns[0]], precise_slice_points, precise_slice_points, sample_rate, data.index.values[0])

    return 'output.json', len(slices), list_time_diff_for_activation


def slice_csv_to_json_v2(input_file, slice_size=64, min_slice_size=10, sample_rate=20):
    data = pd.read_csv(input_file, delimiter=";", index_col=0)
    original_data = data.copy()
    data = fill_missing_values(data)
    data.fillna(0, inplace=True)

    gz_columns = [col for col in data.columns if col.startswith("GZ")]
    all_peaks = []
    upsample_factor = sample_rate
    combined_smoothed_signals_upsampled = np.zeros(upsample_signal_v2(data[gz_columns[0]].values, upsample_factor).size, dtype=float)

    process_signals_failed = False
    for gz_col in gz_columns:
        gz_signal = data[gz_col].values

        upsampled_smoothed_signal, peaks = process_signals(gz_signal, upsample_factor)
        if upsampled_smoothed_signal is None or peaks is None:
            if debug:
                print(f"Skipping {gz_col} due to empty or too short signal")
            continue

        all_peaks.append(peaks)
        combined_smoothed_signals_upsampled += upsampled_smoothed_signal
        gz_signal = data[gz_col].values

        try:
            upsampled_smoothed_signal, peaks = process_signals(gz_signal, upsample_factor)
            all_peaks.append(peaks)
            combined_smoothed_signals_upsampled += upsampled_smoothed_signal
        except Exception as e:  # Catch any exception from process_signals
            process_signals_failed = True
            break

    if process_signals_failed or not any(len(peaks) > 0 for peaks in all_peaks):
        precise_loop_points = list(range(0, len(data), slice_size))
    else:
        precise_loop_points = [np.mean(peaks) for peaks in zip(*all_peaks)]

    precise_slice_points = []
    for i in range(len(precise_loop_points) - 1, -1,-1):

        interval_end = int(precise_loop_points[i])
        if i > 0:
            interval_start = int(precise_loop_points[i - 1])
        else:
            interval_start = 0
        
        max_value_index = np.argmax(combined_smoothed_signals_upsampled[interval_start:interval_end]) + interval_start
        precise_slice_points.append(max_value_index)

    precise_slice_points.reverse()

    slices = []
    start_index = 0
    for i, precise_slice_point in enumerate(precise_slice_points):
        end_index = round(precise_slice_point / upsample_factor)
        if i == 0:
            start_index = end_index
            continue

        if end_index - start_index >= min_slice_size:
            if i == len(precise_slice_points) - 1 and end_index - start_index < slice_size:
                break

            slice_data = data.iloc[start_index:end_index].to_dict("list")

            if i > 1 and not process_signals_failed:
                precise_time_diff = (precise_slice_point - precise_slice_points[i - 1])
                precise_timestamp = slices[-1]["precise_timestamp"] + precise_time_diff

                timestamp = data.index.values[(np.abs(data.index.values - precise_timestamp)).argmin()]

                time_diff = timestamp - slices[-1]["timestamp"]

                for j, gz_col in enumerate(gz_columns):
                    slice_data[f"{gz_col}_precise_time_diff"] = all_peaks[j][i] - all_peaks[j][i - 1]
            else:
                precise_timestamp = data.index.values[start_index]
                timestamp = precise_timestamp
                time_diff = None
                precise_time_diff = None

                for j, gz_col in enumerate(gz_columns):
                    slice_data[f"{gz_col}_precise_timestamp"] = all_peaks[j][0] if not process_signals_failed else None
                    slice_data[f"{gz_col}_precise_time_diff"] = None

            slice_data["precise_timestamp"] = precise_timestamp
            slice_data["timestamp"] = timestamp
            slice_data["time_diff"] = time_diff
            slice_data["precise_time_diff"] = precise_time_diff

            if end_index - start_index < slice_size:
                pad_size = slice_size - (end_index - start_index)
                for key in slice_data:
                    if key not in ["timestamp", "time_diff", "precise_timestamp", "precise_time_diff"] and not key.endswith("_precise_timestamp") and not key.endswith("_precise_time_diff"):
                        slice_data[key] = data[key].iloc[start_index - pad_size:start_index].tolist() + slice_data[key]
            else:
                for key in slice_data:
                    if key not in ["timestamp", "time_diff", "precise_timestamp", "precise_time_diff"] and not key.endswith("_precise_timestamp") and not key.endswith("_precise_time_diff"):
                        slice_data[key] = slice_data[key][:slice_size]
            slices.append(slice_data)

        start_index = end_index
    
    with open('output.json', "w") as f:
        json.dump(numpy_to_native(slices), f, indent=2)

    return 'output.json', len(slices)