Standard_Intelligence_Dev

Sleeping

App Files Files Community

YchKhan commited on May 13, 2024

Commit

533a642

verified ·

1 Parent(s): ee041e5

Update classification.py

Browse files

Files changed (1) hide show

classification.py +5 -34

classification.py CHANGED Viewed

@@ -171,46 +171,17 @@ def process_categories(categories, model):
 def match_categories(df, category_df, treshold=0.45):
-    categories_list, experts_list, topic_list, scores_list = [], [], [], []
-    for ebd_content in df['Embeddings']:
         if isinstance(ebd_content, torch.Tensor):
             cos_scores = util.cos_sim(ebd_content, torch.stack(list(category_df['Embeddings']), dim=0))[0]
             high_score_indices = [i for i, score in enumerate(cos_scores) if score > treshold]
-            # Append the corresponding categories, experts, and topics for each high-scoring index
-            categories_list.append([category_df.loc[index, 'description'] for index in high_score_indices])
-            experts_list.append([category_df.loc[index, 'experts'] for index in high_score_indices])
-            topic_list.append([category_df.loc[index, 'topic'] for index in high_score_indices])
-            scores_list.append([float(cos_scores[index]) for index in high_score_indices])
-        else:
-            categories_list.append(np.nan)
-            experts_list.append(np.nan)
-            topic_list.append(np.nan)
-            scores_list.append('pas interessant')
-    df["Description"] = categories_list
-    df["Expert"] = experts_list
-    df["Topic"] = topic_list
-    df["Score"] = scores_list
     return df
-def flatten_nested_lists(nested_list):
-    """Flatten a list of potentially nested lists into a single list."""
-    flattened_list = []
-    for item in nested_list:
-        if isinstance(item, list):
-            flattened_list.extend(flatten_nested_lists(item))  # Recursively flatten the list
-        else:
-            flattened_list.append(item)
-    return flattened_list
 def save_data(df, filename):
-    # Apply flattening and then join for the 'Expert' column
-    df['Expert'] = df['Expert'].apply(lambda x: ', '.join(flatten_nested_lists(x)) if isinstance(x, list) else x)
-    df['Description'] = df['Description'].apply(lambda x: ', '.join(x) if isinstance(x, list) else x)
-    df['Topic'] = df['Topic'].apply(lambda x: ', '.join(x) if isinstance(x, list) else x)
-    df['Score'] = df['Score'].apply(lambda x: ', '.join(map(str, x)) if isinstance(x, list) else x)
     df = df.drop(columns=['Embeddings'])
     new_filename = filename.replace(".", "_classified.")

 def match_categories(df, category_df, treshold=0.45):
+    for topic in category_df['topic']:
+		df[topic] = 0
+    for i, ebd_content in enumerate(df['Embeddings']):
         if isinstance(ebd_content, torch.Tensor):
             cos_scores = util.cos_sim(ebd_content, torch.stack(list(category_df['Embeddings']), dim=0))[0]
             high_score_indices = [i for i, score in enumerate(cos_scores) if score > treshold]
+			for j in high_score_indices:
+				df.loc[i, category_df.loc[j, 'topic']] = float(cos_scores[index])
     return df
 def save_data(df, filename):
     df = df.drop(columns=['Embeddings'])
     new_filename = filename.replace(".", "_classified.")