Corey Morris
commited on
Commit
·
abac22e
1
Parent(s):
6d41115
truthfulqa data added to dataframe
Browse files- result_data_processor.py +20 -0
- test_data_processing.py +5 -5
result_data_processor.py
CHANGED
@@ -34,6 +34,22 @@ class ResultDataProcessor:
|
|
34 |
.str.replace('\|5', '', regex=True))
|
35 |
return df[[model_name]]
|
36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
@staticmethod
|
38 |
def _extract_parameters(model_name):
|
39 |
"""
|
@@ -66,6 +82,10 @@ class ResultDataProcessor:
|
|
66 |
raw_data = self._read_and_transform_data(filename)
|
67 |
model_name = filename.split('/')[2]
|
68 |
cleaned_data = self._cleanup_dataframe(raw_data, model_name)
|
|
|
|
|
|
|
|
|
69 |
dataframes.append(cleaned_data)
|
70 |
|
71 |
|
|
|
34 |
.str.replace('\|5', '', regex=True))
|
35 |
return df[[model_name]]
|
36 |
|
37 |
+
def _extract_mc1(self, df, model_name):
|
38 |
+
df = df.rename(columns={'mc1': model_name})
|
39 |
+
# rename row harness|truthfulqa:mc|0 to truthfulqa:mc1
|
40 |
+
df.index = (df.index.str.replace('mc\|0', 'mc1', regex=True))
|
41 |
+
# just return the harness|truthfulqa:mc1 row
|
42 |
+
df = df.loc[['harness|truthfulqa:mc1']]
|
43 |
+
return df[[model_name]]
|
44 |
+
|
45 |
+
def _extract_mc2(self, df, model_name):
|
46 |
+
# rename row harness|truthfulqa:mc|0 to truthfulqa:mc2
|
47 |
+
df = df.rename(columns={'mc2': model_name})
|
48 |
+
df.index = (df.index.str.replace('mc\|0', 'mc2', regex=True))
|
49 |
+
df = df.loc[['harness|truthfulqa:mc2']]
|
50 |
+
return df[[model_name]]
|
51 |
+
|
52 |
+
|
53 |
@staticmethod
|
54 |
def _extract_parameters(model_name):
|
55 |
"""
|
|
|
82 |
raw_data = self._read_and_transform_data(filename)
|
83 |
model_name = filename.split('/')[2]
|
84 |
cleaned_data = self._cleanup_dataframe(raw_data, model_name)
|
85 |
+
mc1 = self._extract_mc1(raw_data, model_name)
|
86 |
+
mc2 = self._extract_mc2(raw_data, model_name)
|
87 |
+
cleaned_data = pd.concat([cleaned_data, mc1])
|
88 |
+
cleaned_data = pd.concat([cleaned_data, mc2])
|
89 |
dataframes.append(cleaned_data)
|
90 |
|
91 |
|
test_data_processing.py
CHANGED
@@ -18,17 +18,17 @@ class TestResultDataProcessor(unittest.TestCase):
|
|
18 |
self.assertIn('Parameters', data.columns)
|
19 |
self.assertIn('MMLU_average', data.columns)
|
20 |
# check number of columns
|
21 |
-
self.assertEqual(len(data.columns),
|
22 |
|
23 |
# check that the number of rows is correct
|
24 |
def test_rows(self):
|
25 |
data = self.processor.data
|
26 |
self.assertEqual(len(data), 992)
|
27 |
|
28 |
-
#
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
|
33 |
# test that a column that contains truthfulqa:mc does not exist
|
34 |
def test_truthfulqa_mc(self):
|
|
|
18 |
self.assertIn('Parameters', data.columns)
|
19 |
self.assertIn('MMLU_average', data.columns)
|
20 |
# check number of columns
|
21 |
+
self.assertEqual(len(data.columns), 63)
|
22 |
|
23 |
# check that the number of rows is correct
|
24 |
def test_rows(self):
|
25 |
data = self.processor.data
|
26 |
self.assertEqual(len(data), 992)
|
27 |
|
28 |
+
# check that mc1 column exists
|
29 |
+
def test_mc1(self):
|
30 |
+
data = self.processor.data
|
31 |
+
self.assertIn('harness|truthfulqa:mc1', data.columns)
|
32 |
|
33 |
# test that a column that contains truthfulqa:mc does not exist
|
34 |
def test_truthfulqa_mc(self):
|