Spaces:
Running
Running
import re | |
import numpy as np | |
import pandas as pd | |
from numpy import dot | |
from numpy.linalg import norm | |
from body_shape_lookup import body_shape_lookup | |
BODY_SHAPE_MEASURES = "body_shape_measures_normalised_updated.csv" | |
VOLUNTEERS_MEASURES = "volunteers_measures_normalised_updated.csv" | |
# selecting specific features | |
RATIOS_TO_USE = ['shoulder_to_hip_distance', | |
'hip_to_ankle_distance', | |
'thigh_to_torso_ratio_normalised', | |
'upper_to_lower_torso_normalised_ratio', | |
'shoulder_to_hip_ratio', | |
'thigh_to_body_ratio', | |
'upper_torso_to_body_ratio'] | |
def extract_digits(input_string): | |
# find digits in the format '1A' or '12B' | |
match = re.search(r'\d+', input_string) | |
if match: | |
return int(match.group()) | |
else: | |
return -1 # not found | |
def is_match(row): | |
# check whether there was a match for this record | |
# extract the user class from id | |
ground_truth = extract_digits(row['Volunteer_ID']) | |
return ground_truth == row['Rank_1_Body_Shape'] or ground_truth == row['Rank_2_Body_Shape'] or ground_truth == row['Rank_3_Body_Shape'] | |
def select_body_shape(normalised_body_shape_measures): | |
# load the body shape measures | |
body_shape_df = pd.read_csv(BODY_SHAPE_MEASURES) | |
# body_shape_df = normalised_body_shape_measures | |
# load the volunteers measures | |
# volunteers_df = pd.read_csv(VOLUNTEERS_MEASURES) | |
volunteers_df = normalised_body_shape_measures | |
# select only the columns corresponding to the ratios | |
body_shape_ratios = body_shape_df[RATIOS_TO_USE] | |
# Create a DataFrame to store the results | |
results_df = pd.DataFrame(columns=["Volunteer_ID", "Rank_1_Body_Shape", "Score_1", | |
"Rank_2_Body_Shape", "Score_2", | |
"Rank_3_Body_Shape", "Score_3"]) | |
# calculate euclidean distance for each volunteer | |
for index, volunteer_row in volunteers_df.iterrows(): | |
print(f"\nProcessing volunteer {volunteer_row['id']}") | |
volunteer_ratios = volunteer_row[RATIOS_TO_USE] | |
top_scores = [(-1000, 'n/a')] * 3 | |
for body_index, body_shape_row in body_shape_ratios.iterrows(): | |
# euclidean distance | |
# similarity = np.linalg.norm(volunteer_ratios - body_shape_row) | |
# calculate cosine similarity | |
similarity = dot(volunteer_ratios, body_shape_row) / (norm(volunteer_ratios)*norm(body_shape_row)) | |
# Check if the current score is among the top 3 | |
for i, (score, _) in enumerate(top_scores): | |
if similarity > score: | |
top_scores.insert(i, (similarity, body_index + 1)) | |
top_scores = top_scores[:3] | |
break | |
print(f"Volunteer {volunteer_row['id']} (body shape {body_index + 1}) Similarity:\t{similarity:.3f}") | |
# Print the top 3 best body shapes and scores for the current volunteer | |
print(f"Volunteer {volunteer_row['id']} top 3 body shapes and scores are:") | |
for i, (score, body_shape) in enumerate(top_scores): | |
print(f"Rank {i + 1}: Body Shape {body_shape} with score {score:.3f}") | |
body_shape_index = top_scores[0][1] | |
return body_shape_lookup(body_shape_index) |