ts-explorations / app.py
laverdes's picture
fix: df_right from state loaded
f5dcaef
raw
history blame
4.84 kB
import streamlit as st
import pathlib
import json
import pandas as pd
st.header("Time Series Preprocessing Pipeline")
st.markdown("Users can load their time-series data and select a set of transformations to prepare a training set for univariate or multivariate time-series classification.\
Go ahead and use the sidebar on the left to upload your data files in *.json* format and start exploring and transforming it!")
col1, col2 = st.columns(2)
file_names, file_bytes = [], []
with st.sidebar:
files = st.file_uploader("Load files", accept_multiple_files = True)
if files:
file_names = [file.name for file in files]
file_bytes = [file.getvalue() for file in files]
st.text("\n".join(file_names))
data_dict = dict({'trial_id':[], 'pupil_dilation':[], 'baseline':[], 'rating':[]})
with st.spinner("Building base dictionary..."):
for file_data in file_bytes:
data = json.loads(file_data)
for k in data:
for i in data[k]:
for k, v in i.items():
data_dict[k].append(v)
df_base = pd.DataFrame() # {'<fields>' : []})
with col1:
if file_bytes:
with st.spinner("Building base dataframe..."):
df_base = pd.DataFrame.from_dict(data_dict)
df_base["trial_id"] = df_base.trial_id.map(lambda s: "".join([c for c in s if c.isdigit()]))
df_base["len_pupil_dilation"] = df_base.pupil_dilation.map(lambda l: len(l))
df_base["len_baseline"] = df_base.baseline.map(lambda l: len(l))
st.info(f"number of files: {len(file_names)}")
st.markdown("Your original data")
st.dataframe(df_base)
else:
st.caption("Upload your data from the sidebar to start :sunglasses:")
with col2:
if not df_base.empty:
st.markdown("**Cleaning actions**")
detect_blinking = st.button("Detect blinking ('0.0' values)")
number_of_blinks = 0
if detect_blinking:
# Initialization of session_state
if 'df' not in st.session_state:
st.session_state['df'] = df_base
for ser in df_base['pupil_dilation']:
for f in ser:
if f == 0.0:
number_of_blinks += 1
for ser in df_base['baseline']:
for f in ser:
if f == 0.0:
number_of_blinks += 1
# Initialization of session_state
if 'blinks' not in st.session_state:
st.session_state['blinks'] = number_of_blinks
if "blinks" in st.session_state.keys():
st.info(f"blinking values (0.0) were found in {number_of_blinks} time-steps in all your data")
remove_blinking = st.button("Remove blinking")
# df in column 2
if remove_blinking:
df_right = st.session_state.df.copy(deep=True)
df_right.pupil_dilation = df_right.pupil_dilation.map(lambda ser: [f for f in ser if f != 0.0])
df_right.baseline = df_right.baseline.map(lambda ser: [f for f in ser if f != 0.0])
st.success("Blinking values have been removed!")
elif detect_blinking and not number_of_blinks:
st.caption("No blinking values were found in your data!")
with col2:
if 'df' in list(st.session_state.keys()):
df_right = st.session_state.df.copy(deep=True)
if "baseline" in list(df_right.keys()):
st.markdown(f"A **baseline** feature has been found on your data, do you want to merge it with any of the other features in a new calculated field?")
option = st.multiselect('Select a feature to merge', [k for k in list(df_right.keys()) if k != 'baseline'], [[k for k in list(df_right.keys()) if k != 'baseline'][-4]])
relative_key = f"relative_{option[0]}"
add_relative = st.button(f"Add {relative_key}")
if add_relative:
baseline_mean = [sum(s)/len(s) for s in df_right['baseline']]
df_right['relative_pupil_dilation'] = [df_right[option[0]][i] - baseline_mean[i] for i in range(len(df_right))]
st.markdown("After adding calculated fields")
st.dataframe(df_right)
with open('myfile.csv') as f:
st.download_button('Download CSV', f)
st.info("Your data has been downloaded, you can visualize and detect outliers in the 'Plotting' and 'Detect Outliers' pages on the sidebar.")
if not df_base.empty:
st.warning("Consider running outlier detection to clean your data!", icon="⚠️")
# for key, value in st.session_state.items():
# st.success(f"{key}: {value}")
# reloading new samples would damage the st-session_state loading, vars are only written once