Corey Morris commited on
Commit
a5840fb
1 Parent(s): 916604b

removing models that are known to have training data contaminated with evaluations

Browse files
contaminated_models.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ trurl-2-13b-pl-instruct_unload
2
+ trurl-2-13b
result_data_processor.py CHANGED
@@ -140,8 +140,20 @@ class ResultDataProcessor:
140
  # remove extreme outliers from column harness|truthfulqa:mc1
141
  data = self._remove_mc1_outliers(data)
142
 
 
 
143
  return data
144
 
 
 
 
 
 
 
 
 
 
 
145
  def rank_data(self):
146
  # add rank for each column to the dataframe
147
  # copy the data dataframe to avoid modifying the original dataframe
 
140
  # remove extreme outliers from column harness|truthfulqa:mc1
141
  data = self._remove_mc1_outliers(data)
142
 
143
+ data = self.manual_removal_of_models(data)
144
+
145
  return data
146
 
147
+ def manual_removal_of_models(self, df):
148
+ # remove models verified to be trained on evaluation data
149
+ # load the list of models
150
+ with open('contaminated_models.txt') as f:
151
+ contaminated_models = f.read().splitlines()
152
+ # remove the models from the dataframe
153
+ df = df[~df.index.isin(contaminated_models)]
154
+ return df
155
+
156
+
157
  def rank_data(self):
158
  # add rank for each column to the dataframe
159
  # copy the data dataframe to avoid modifying the original dataframe