File size: 1,473 Bytes
412bf28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import gradio as gr
import pandas as pd
import os
from itertools import islice
import tempfile


def guarantee_multiindex_rows(df):
   if not isinstance(df.index, pd.MultiIndex):
      path = df.index[0]
      try:
         sep = "/" if "/" in path else "\\"
         splits = tuple(df.index.str.split(sep))
         df.index = pd.MultiIndex.from_tuples(splits)
      except TypeError:
         pass

   try:
      df.index = df.index.set_levels(df.index.levels[1].astype(str), level=1)
   except AttributeError:
      pass


def convertcsv2h5(csv_name):
   csv_name = csv_name.name
   csv_path = os.path.splitext(csv_name)[0]
   scorer = csv_path.split('_')[1]
   with open(csv_name) as datafile:
      head = list(islice(datafile, 0, 5))
      if "individuals" in head[1]:
         header = list(range(4))
      else:
         header = list(range(3))
      if head[-1].split(",")[0] == "labeled-data":
         index_col = [0, 1, 2]
      else:
         index_col = 0
      data = pd.read_csv(csv_name, index_col=index_col, header=header)
      data.columns = data.columns.set_levels([scorer], level="scorer")
      guarantee_multiindex_rows(data)

      with tempfile.NamedTemporaryFile(suffix=".h5", delete=False) as temp_file:
         temp_file_path = csv_name.replace(".csv", ".h5")
         data.to_hdf(temp_file_path, key="df_with_missing", mode="w")

   return temp_file_path


iface = gr.Interface(fn=convertcsv2h5, inputs="file", outputs="file")
iface.launch()