Spaces:
Runtime error
Runtime error
File size: 5,521 Bytes
d391513 4ee3c5a d391513 cbb2e1a d3a3a91 d391513 6912c4c d391513 37ccd3e d391513 37ccd3e d391513 ef746ab d391513 16621db d391513 ef746ab d3a3a91 ef746ab d391513 16621db d391513 16621db d391513 275f86e 37ccd3e c66e5fb d391513 16621db d391513 d3a3a91 c66e5fb ef746ab c66e5fb ef746ab c66e5fb ef746ab 5f55710 d391513 4ee3c5a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 |
import streamlit as st
import pathlib
import json
import pandas as pd
st. set_page_config(layout="wide")
st.header("Time Series Preprocessing Pipeline")
st.markdown("Users can load their time-series data and select a set of transformations to prepare a training set for univariate or multivariate time-series classification.\
Go ahead and use the sidebar on the left to upload your data files in *.json* format and start exploring and transforming it!")
col1, col2 = st.columns(2)
@st.experimental_memo
def convert_df(df):
return df.to_csv(index=False).encode('utf-8')
# Load a prepare data
file_names, file_bytes = [], []
with st.sidebar:
files = st.file_uploader("Load files", accept_multiple_files = True)
if files:
file_names = [file.name for file in files]
file_bytes = [file.getvalue() for file in files]
st.success("Your data has been successfully loaded! 🤗")
data_dict = dict({'trial_id':[], 'pupil_dilation':[], 'baseline':[], 'rating':[]})
with st.spinner("Building base dictionary..."):
for file_data in file_bytes:
data = json.loads(file_data)
for k in data:
for i in data[k]:
for k, v in i.items():
data_dict[k].append(v)
df_base = pd.DataFrame() # {'<fields>' : []})
with col1:
if file_bytes:
with st.spinner("Building base dataframe..."):
df_base = pd.DataFrame.from_dict(data_dict)
df_base["trial_id"] = df_base.trial_id.map(lambda s: "".join([c for c in s if c.isdigit()]))
df_base["len_pupil_dilation"] = df_base.pupil_dilation.map(lambda l: len(l))
df_base["len_baseline"] = df_base.baseline.map(lambda l: len(l))
st.info(f"number of files: {len(file_names)}")
if 'df_base' not in st.session_state:
st.session_state['df_base'] = df_base
else:
st.caption("Upload your data using the sidebar to start :sunglasses:")
if 'df_base' in st.session_state:
st.markdown("Your original data with some extra information about the length of the time-series fields")
st.dataframe(st.session_state.df_base)
# Cleaning starts
with col1:
if not df_base.empty:
st.markdown("**Cleaning actions**")
detect_blinking = st.button("I want to clean my data 🤗")
number_of_blinks = 0
if detect_blinking:
# Initialization of session_state
if 'df' not in st.session_state:
st.session_state['df'] = df_base
for ser in df_base['pupil_dilation']:
for f in ser:
if f == 0.0:
number_of_blinks += 1
for ser in df_base['baseline']:
for f in ser:
if f == 0.0:
number_of_blinks += 1
# Initialization of session_state
if 'blinks' not in st.session_state:
st.session_state['blinks'] = number_of_blinks
if "blinks" in st.session_state.keys():
st.info(f"blinking values (0.0) were found in {number_of_blinks} time-steps in all your data")
remove_blinking = st.button("Remove blinking 🧹")
# df in column 2
if remove_blinking:
df_right = st.session_state.df.copy(deep=True)
df_right.pupil_dilation = df_right.pupil_dilation.map(lambda ser: [f for f in ser if f != 0.0])
df_right.baseline = df_right.baseline.map(lambda ser: [f for f in ser if f != 0.0])
st.session_state['df'] = df_right.copy(deep=True)
st.success("Blinking values have been removed!")
st.session_state.df_base = df_right
elif detect_blinking and not number_of_blinks:
st.caption("No blinking values were found in your data! ")
# Add calculated fields
if 'df' in st.session_state or 'df_right' in st.session_state:
df_right = st.session_state.df.copy(deep=True)
if "baseline" in list(df_right.keys()):
st.markdown(f"A **baseline** feature has been found on your data, do you want to merge it with any of the other features in a new calculated field?")
option = st.multiselect('Select a feature to create relative calculated feature ➕', [k for k in list(df_right.keys()) if k != 'baseline'], [[k for k in list(df_right.keys()) if k != 'baseline'][-4]])
relative_key = f"relative_{option[0]}"
add_relative = st.button(f"Add {relative_key}")
if add_relative:
baseline_mean = [sum(s)/len(s) for s in df_right['baseline']]
df_right[relative_key] = [[field_value - baseline_mean[i] for field_value in df_right[option[0]][i]] for i in range(len(df_right))]
st.markdown("After adding calculated fields and removing blinking values (when applied)")
st.dataframe(df_right)
csv = convert_df(df_right)
if 'df_right' not in st.session_state:
st.session_state['df_right'] = df_right
# Save transformations to disk
downl = st.download_button("Download CSV 💾", csv, "file.csv", "text/csv", key='download-csv')
if downl:
st.info("Your data has been downloaded, you can visualize and detect outliers in the 'Plotting' and 'Detect Outliers' pages on the sidebar.")
if not df_base.empty:
with col1:
st.warning("Consider running outlier detection to clean your data!", icon="⚠️") |