Spaces:
Sleeping
Sleeping
Update classification.py
Browse files- classification.py +11 -1
classification.py
CHANGED
@@ -171,6 +171,7 @@ def process_categories(categories, model):
|
|
171 |
|
172 |
|
173 |
def match_categories(df, category_df, treshold=0.45):
|
|
|
174 |
for topic in category_df['topic']:
|
175 |
df[topic] = 0
|
176 |
for index, ebd_content in enumerate(df['Embeddings']):
|
@@ -183,9 +184,18 @@ def match_categories(df, category_df, treshold=0.45):
|
|
183 |
scores_list.append([float(cos_scores[index]) for index in high_score_indices])
|
184 |
for j in high_score_indices:
|
185 |
df.loc[index, category_df.loc[j, 'topic']] = float(cos_scores[j])
|
186 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
187 |
return df
|
188 |
|
|
|
189 |
def save_data(df, filename):
|
190 |
df = df.drop(columns=['Embeddings'])
|
191 |
new_filename = filename.replace(".", "_classified.")
|
|
|
171 |
|
172 |
|
173 |
def match_categories(df, category_df, treshold=0.45):
|
174 |
+
categories_list, experts_list, topic_list, scores_list = [], [], []
|
175 |
for topic in category_df['topic']:
|
176 |
df[topic] = 0
|
177 |
for index, ebd_content in enumerate(df['Embeddings']):
|
|
|
184 |
scores_list.append([float(cos_scores[index]) for index in high_score_indices])
|
185 |
for j in high_score_indices:
|
186 |
df.loc[index, category_df.loc[j, 'topic']] = float(cos_scores[j])
|
187 |
+
else:
|
188 |
+
categories_list.append(np.nan)
|
189 |
+
experts_list.append(np.nan)
|
190 |
+
topic_list.append(np.nan)
|
191 |
+
scores_list.append('pas interessant')
|
192 |
+
df["Description"] = categories_list
|
193 |
+
df["Expert"] = experts_list
|
194 |
+
df["Topic"] = topic_list
|
195 |
+
df["Score"] = scores_list
|
196 |
return df
|
197 |
|
198 |
+
|
199 |
def save_data(df, filename):
|
200 |
df = df.drop(columns=['Embeddings'])
|
201 |
new_filename = filename.replace(".", "_classified.")
|