convertcsv2h5 / app.py
ryo2's picture
Upload 2 files
412bf28
raw
history blame
1.47 kB
import gradio as gr
import pandas as pd
import os
from itertools import islice
import tempfile
def guarantee_multiindex_rows(df):
if not isinstance(df.index, pd.MultiIndex):
path = df.index[0]
try:
sep = "/" if "/" in path else "\\"
splits = tuple(df.index.str.split(sep))
df.index = pd.MultiIndex.from_tuples(splits)
except TypeError:
pass
try:
df.index = df.index.set_levels(df.index.levels[1].astype(str), level=1)
except AttributeError:
pass
def convertcsv2h5(csv_name):
csv_name = csv_name.name
csv_path = os.path.splitext(csv_name)[0]
scorer = csv_path.split('_')[1]
with open(csv_name) as datafile:
head = list(islice(datafile, 0, 5))
if "individuals" in head[1]:
header = list(range(4))
else:
header = list(range(3))
if head[-1].split(",")[0] == "labeled-data":
index_col = [0, 1, 2]
else:
index_col = 0
data = pd.read_csv(csv_name, index_col=index_col, header=header)
data.columns = data.columns.set_levels([scorer], level="scorer")
guarantee_multiindex_rows(data)
with tempfile.NamedTemporaryFile(suffix=".h5", delete=False) as temp_file:
temp_file_path = csv_name.replace(".csv", ".h5")
data.to_hdf(temp_file_path, key="df_with_missing", mode="w")
return temp_file_path
iface = gr.Interface(fn=convertcsv2h5, inputs="file", outputs="file")
iface.launch()