import gradio as gr import pandas as pd import os from itertools import islice import tempfile def guarantee_multiindex_rows(df): if not isinstance(df.index, pd.MultiIndex): path = df.index[0] try: sep = "/" if "/" in path else "\\" splits = tuple(df.index.str.split(sep)) df.index = pd.MultiIndex.from_tuples(splits) except TypeError: pass try: df.index = df.index.set_levels(df.index.levels[1].astype(str), level=1) except AttributeError: pass def convertcsv2h5(csv_name): csv_name = csv_name.name csv_path = os.path.splitext(csv_name)[0] scorer = csv_path.split('_')[1] with open(csv_name) as datafile: head = list(islice(datafile, 0, 5)) if "individuals" in head[1]: header = list(range(4)) else: header = list(range(3)) if head[-1].split(",")[0] == "labeled-data": index_col = [0, 1, 2] else: index_col = 0 data = pd.read_csv(csv_name, index_col=index_col, header=header) data.columns = data.columns.set_levels([scorer], level="scorer") guarantee_multiindex_rows(data) with tempfile.NamedTemporaryFile(suffix=".h5", delete=False) as temp_file: temp_file_path = csv_name.replace(".csv", ".h5") data.to_hdf(temp_file_path, key="df_with_missing", mode="w") return temp_file_path iface = gr.Interface(fn=convertcsv2h5, inputs="file", outputs="file") iface.launch()