Corey Morris commited on
Commit
68bce52
1 Parent(s): ad0b971

Catching exceptions in processing files. As new data is introduced, I want to know which files may have different formats and cause problems, but the application shouldn't halt if it can't process a single file

Browse files
Files changed (1) hide show
  1. result_data_processor.py +17 -11
result_data_processor.py CHANGED
@@ -20,6 +20,7 @@ class ResultDataProcessor:
20
  if fnmatch.fnmatch(basename, pattern):
21
  filename = os.path.join(root, basename)
22
  matching_files[root] = filename
 
23
  matching_files = {key: value for key, value in matching_files.items() if 'gpt-j-6b' not in key}
24
  matching_files = list(matching_files.values())
25
  return matching_files
@@ -94,17 +95,22 @@ class ResultDataProcessor:
94
  dataframes = []
95
  organization_names = []
96
  for filename in self._find_files(self.directory, self.pattern):
97
- raw_data = self._read_and_transform_data(filename)
98
- split_path = filename.split('/')
99
- model_name = split_path[2]
100
- organization_name = split_path[1]
101
- cleaned_data = self._cleanup_dataframe(raw_data, model_name)
102
- mc1 = self._extract_mc1(raw_data, model_name)
103
- mc2 = self._extract_mc2(raw_data, model_name)
104
- cleaned_data = pd.concat([cleaned_data, mc1])
105
- cleaned_data = pd.concat([cleaned_data, mc2])
106
- organization_names.append(organization_name)
107
- dataframes.append(cleaned_data)
 
 
 
 
 
108
 
109
 
110
  data = pd.concat(dataframes, axis=1).transpose()
 
20
  if fnmatch.fnmatch(basename, pattern):
21
  filename = os.path.join(root, basename)
22
  matching_files[root] = filename
23
+ # TODO decide on removing this since I am catching the error when processing the file
24
  matching_files = {key: value for key, value in matching_files.items() if 'gpt-j-6b' not in key}
25
  matching_files = list(matching_files.values())
26
  return matching_files
 
95
  dataframes = []
96
  organization_names = []
97
  for filename in self._find_files(self.directory, self.pattern):
98
+ try:
99
+ raw_data = self._read_and_transform_data(filename)
100
+ split_path = filename.split('/')
101
+ model_name = split_path[2]
102
+ organization_name = split_path[1]
103
+ cleaned_data = self._cleanup_dataframe(raw_data, model_name)
104
+ mc1 = self._extract_mc1(raw_data, model_name)
105
+ mc2 = self._extract_mc2(raw_data, model_name)
106
+ cleaned_data = pd.concat([cleaned_data, mc1])
107
+ cleaned_data = pd.concat([cleaned_data, mc2])
108
+ organization_names.append(organization_name)
109
+ dataframes.append(cleaned_data)
110
+ except Exception as e:
111
+ print(f'Error processing {filename}')
112
+ print("The error is: ", e)
113
+ continue
114
 
115
 
116
  data = pd.concat(dataframes, axis=1).transpose()