import numpy as np
from scipy.sparse import csr_matrix
Function to find similar project for the single project matching
Single Project Matching empowers you to choose an individual project using
either the project IATI ID or title, and then unveils the top x projects within a filter (filtered_df) that
bear the closest resemblance to your selected one (p_index).
def find_similar(p_index, similarity_matrix, filtered_df, top_x):
p_index: index of selected project
similarity_matrix: matrix with similarities of all projects
filtered_df: df with filter applied
top_x: top x project which should be displayed
if not isinstance(similarity_matrix, csr_matrix):
similarity_matrix = csr_matrix(similarity_matrix)
filtered_indices = filtered_df.index.tolist()
filtered_column_sim_matrix = similarity_matrix[:, filtered_indices]
index_position_mapping = {position: index for position, index in enumerate(filtered_indices)}
project_row = filtered_column_sim_matrix.getrow(p_index).toarray().ravel()
sorted_indices = np.argsort(project_row)[-top_x:][::-1]
top_indices = [index_position_mapping[i] for i in sorted_indices]
top_values = project_row[sorted_indices]
result_df = filtered_df.loc[top_indices]
result_df['similarity'] = top_values
result_df = result_df[result_df['similarity'] > 0]
return result_df