Corey Morris
commited on
Commit
•
68bce52
1
Parent(s):
ad0b971
Catching exceptions in processing files. As new data is introduced, I want to know which files may have different formats and cause problems, but the application shouldn't halt if it can't process a single file
Browse files- result_data_processor.py +17 -11
result_data_processor.py
CHANGED
@@ -20,6 +20,7 @@ class ResultDataProcessor:
|
|
20 |
if fnmatch.fnmatch(basename, pattern):
|
21 |
filename = os.path.join(root, basename)
|
22 |
matching_files[root] = filename
|
|
|
23 |
matching_files = {key: value for key, value in matching_files.items() if 'gpt-j-6b' not in key}
|
24 |
matching_files = list(matching_files.values())
|
25 |
return matching_files
|
@@ -94,17 +95,22 @@ class ResultDataProcessor:
|
|
94 |
dataframes = []
|
95 |
organization_names = []
|
96 |
for filename in self._find_files(self.directory, self.pattern):
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
|
|
|
|
|
|
|
|
|
|
108 |
|
109 |
|
110 |
data = pd.concat(dataframes, axis=1).transpose()
|
|
|
20 |
if fnmatch.fnmatch(basename, pattern):
|
21 |
filename = os.path.join(root, basename)
|
22 |
matching_files[root] = filename
|
23 |
+
# TODO decide on removing this since I am catching the error when processing the file
|
24 |
matching_files = {key: value for key, value in matching_files.items() if 'gpt-j-6b' not in key}
|
25 |
matching_files = list(matching_files.values())
|
26 |
return matching_files
|
|
|
95 |
dataframes = []
|
96 |
organization_names = []
|
97 |
for filename in self._find_files(self.directory, self.pattern):
|
98 |
+
try:
|
99 |
+
raw_data = self._read_and_transform_data(filename)
|
100 |
+
split_path = filename.split('/')
|
101 |
+
model_name = split_path[2]
|
102 |
+
organization_name = split_path[1]
|
103 |
+
cleaned_data = self._cleanup_dataframe(raw_data, model_name)
|
104 |
+
mc1 = self._extract_mc1(raw_data, model_name)
|
105 |
+
mc2 = self._extract_mc2(raw_data, model_name)
|
106 |
+
cleaned_data = pd.concat([cleaned_data, mc1])
|
107 |
+
cleaned_data = pd.concat([cleaned_data, mc2])
|
108 |
+
organization_names.append(organization_name)
|
109 |
+
dataframes.append(cleaned_data)
|
110 |
+
except Exception as e:
|
111 |
+
print(f'Error processing {filename}')
|
112 |
+
print("The error is: ", e)
|
113 |
+
continue
|
114 |
|
115 |
|
116 |
data = pd.concat(dataframes, axis=1).transpose()
|