Corey Morris commited on
Commit
abac22e
·
1 Parent(s): 6d41115

truthfulqa data added to dataframe

Browse files
result_data_processor.py CHANGED
@@ -34,6 +34,22 @@ class ResultDataProcessor:
34
  .str.replace('\|5', '', regex=True))
35
  return df[[model_name]]
36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  @staticmethod
38
  def _extract_parameters(model_name):
39
  """
@@ -66,6 +82,10 @@ class ResultDataProcessor:
66
  raw_data = self._read_and_transform_data(filename)
67
  model_name = filename.split('/')[2]
68
  cleaned_data = self._cleanup_dataframe(raw_data, model_name)
 
 
 
 
69
  dataframes.append(cleaned_data)
70
 
71
 
 
34
  .str.replace('\|5', '', regex=True))
35
  return df[[model_name]]
36
 
37
+ def _extract_mc1(self, df, model_name):
38
+ df = df.rename(columns={'mc1': model_name})
39
+ # rename row harness|truthfulqa:mc|0 to truthfulqa:mc1
40
+ df.index = (df.index.str.replace('mc\|0', 'mc1', regex=True))
41
+ # just return the harness|truthfulqa:mc1 row
42
+ df = df.loc[['harness|truthfulqa:mc1']]
43
+ return df[[model_name]]
44
+
45
+ def _extract_mc2(self, df, model_name):
46
+ # rename row harness|truthfulqa:mc|0 to truthfulqa:mc2
47
+ df = df.rename(columns={'mc2': model_name})
48
+ df.index = (df.index.str.replace('mc\|0', 'mc2', regex=True))
49
+ df = df.loc[['harness|truthfulqa:mc2']]
50
+ return df[[model_name]]
51
+
52
+
53
  @staticmethod
54
  def _extract_parameters(model_name):
55
  """
 
82
  raw_data = self._read_and_transform_data(filename)
83
  model_name = filename.split('/')[2]
84
  cleaned_data = self._cleanup_dataframe(raw_data, model_name)
85
+ mc1 = self._extract_mc1(raw_data, model_name)
86
+ mc2 = self._extract_mc2(raw_data, model_name)
87
+ cleaned_data = pd.concat([cleaned_data, mc1])
88
+ cleaned_data = pd.concat([cleaned_data, mc2])
89
  dataframes.append(cleaned_data)
90
 
91
 
test_data_processing.py CHANGED
@@ -18,17 +18,17 @@ class TestResultDataProcessor(unittest.TestCase):
18
  self.assertIn('Parameters', data.columns)
19
  self.assertIn('MMLU_average', data.columns)
20
  # check number of columns
21
- self.assertEqual(len(data.columns), 61)
22
 
23
  # check that the number of rows is correct
24
  def test_rows(self):
25
  data = self.processor.data
26
  self.assertEqual(len(data), 992)
27
 
28
- # # check that mc1 column exists
29
- # def test_mc1(self):
30
- # data = self.processor.data
31
- # self.assertIn('mc1', data.columns)
32
 
33
  # test that a column that contains truthfulqa:mc does not exist
34
  def test_truthfulqa_mc(self):
 
18
  self.assertIn('Parameters', data.columns)
19
  self.assertIn('MMLU_average', data.columns)
20
  # check number of columns
21
+ self.assertEqual(len(data.columns), 63)
22
 
23
  # check that the number of rows is correct
24
  def test_rows(self):
25
  data = self.processor.data
26
  self.assertEqual(len(data), 992)
27
 
28
+ # check that mc1 column exists
29
+ def test_mc1(self):
30
+ data = self.processor.data
31
+ self.assertIn('harness|truthfulqa:mc1', data.columns)
32
 
33
  # test that a column that contains truthfulqa:mc does not exist
34
  def test_truthfulqa_mc(self):