Spaces:
Sleeping
Sleeping
import gradio as gr | |
import pandas as pd | |
import os | |
from itertools import islice | |
import tempfile | |
def guarantee_multiindex_rows(df): | |
if not isinstance(df.index, pd.MultiIndex): | |
path = df.index[0] | |
try: | |
sep = "/" if "/" in path else "\\" | |
splits = tuple(df.index.str.split(sep)) | |
df.index = pd.MultiIndex.from_tuples(splits) | |
except TypeError: | |
pass | |
try: | |
df.index = df.index.set_levels(df.index.levels[1].astype(str), level=1) | |
except AttributeError: | |
pass | |
def convertcsv2h5(csv_name): | |
csv_name = csv_name.name | |
csv_path = os.path.splitext(csv_name)[0] | |
scorer = csv_path.split('_')[1] | |
with open(csv_name) as datafile: | |
head = list(islice(datafile, 0, 5)) | |
if "individuals" in head[1]: | |
header = list(range(4)) | |
else: | |
header = list(range(3)) | |
if head[-1].split(",")[0] == "labeled-data": | |
index_col = [0, 1, 2] | |
else: | |
index_col = 0 | |
data = pd.read_csv(csv_name, index_col=index_col, header=header) | |
data.columns = data.columns.set_levels([scorer], level="scorer") | |
guarantee_multiindex_rows(data) | |
with tempfile.NamedTemporaryFile(suffix=".h5", delete=False) as temp_file: | |
temp_file_path = csv_name.replace(".csv", ".h5") | |
data.to_hdf(temp_file_path, key="df_with_missing", mode="w") | |
return temp_file_path | |
iface = gr.Interface(fn=convertcsv2h5, inputs="file", outputs="file") | |
iface.launch() | |