Spaces:
Running
on
Zero
Running
on
Zero
Update smart_breed_matcher.py
Browse files- smart_breed_matcher.py +25 -163
smart_breed_matcher.py
CHANGED
@@ -1,4 +1,3 @@
|
|
1 |
-
|
2 |
import torch
|
3 |
import re
|
4 |
import numpy as np
|
@@ -81,115 +80,52 @@ class SmartBreedMatcher:
|
|
81 |
return sorted(similarities, key=lambda x: x[1], reverse=True)[:top_n]
|
82 |
|
83 |
|
84 |
-
# def _calculate_breed_similarity(self, breed1_features: Dict, breed2_features: Dict) -> float:
|
85 |
-
# """計算兩個品種之間的相似度,包含健康和噪音因素"""
|
86 |
-
# # 計算描述文本的相似度
|
87 |
-
# desc1_embedding = self._get_cached_embedding(breed1_features['description'])
|
88 |
-
# desc2_embedding = self._get_cached_embedding(breed2_features['description'])
|
89 |
-
# description_similarity = float(util.pytorch_cos_sim(desc1_embedding, desc2_embedding))
|
90 |
-
|
91 |
-
# # 基本特徵相似度
|
92 |
-
# size_similarity = 1.0 if breed1_features['size'] == breed2_features['size'] else 0.5
|
93 |
-
# exercise_similarity = 1.0 if breed1_features['exercise'] == breed2_features['exercise'] else 0.5
|
94 |
-
|
95 |
-
# # 性格相似度
|
96 |
-
# temp1_embedding = self._get_cached_embedding(breed1_features['temperament'])
|
97 |
-
# temp2_embedding = self._get_cached_embedding(breed2_features['temperament'])
|
98 |
-
# temperament_similarity = float(util.pytorch_cos_sim(temp1_embedding, temp2_embedding))
|
99 |
-
|
100 |
-
# # 健康分數相似度
|
101 |
-
# health_score1 = self._calculate_health_score(breed1_features['breed_name'])
|
102 |
-
# health_score2 = self._calculate_health_score(breed2_features['breed_name'])
|
103 |
-
# health_similarity = 1.0 - abs(health_score1 - health_score2)
|
104 |
-
|
105 |
-
# # 噪音水平相似度
|
106 |
-
# noise_similarity = self._calculate_noise_similarity(
|
107 |
-
# breed1_features['breed_name'],
|
108 |
-
# breed2_features['breed_name']
|
109 |
-
# )
|
110 |
-
|
111 |
-
# # 加權計算
|
112 |
-
# weights = {
|
113 |
-
# 'description': 0.25,
|
114 |
-
# 'temperament': 0.20,
|
115 |
-
# 'exercise': 0.2,
|
116 |
-
# 'size': 0.05,
|
117 |
-
# 'health': 0.15,
|
118 |
-
# 'noise': 0.15
|
119 |
-
# }
|
120 |
-
|
121 |
-
# final_similarity = (
|
122 |
-
# description_similarity * weights['description'] +
|
123 |
-
# temperament_similarity * weights['temperament'] +
|
124 |
-
# exercise_similarity * weights['exercise'] +
|
125 |
-
# size_similarity * weights['size'] +
|
126 |
-
# health_similarity * weights['health'] +
|
127 |
-
# noise_similarity * weights['noise']
|
128 |
-
# )
|
129 |
-
|
130 |
-
# return final_similarity
|
131 |
-
|
132 |
def _calculate_breed_similarity(self, breed1_features: Dict, breed2_features: Dict) -> float:
|
133 |
-
"""
|
134 |
-
#
|
135 |
desc1_embedding = self._get_cached_embedding(breed1_features['description'])
|
136 |
desc2_embedding = self._get_cached_embedding(breed2_features['description'])
|
137 |
description_similarity = float(util.pytorch_cos_sim(desc1_embedding, desc2_embedding))
|
138 |
-
|
139 |
-
#
|
140 |
-
size_similarity =
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
)
|
145 |
-
|
146 |
-
# 運動需求相似度(加強版)
|
147 |
-
exercise_similarity = self._calculate_exercise_similarity_enhanced(
|
148 |
-
breed1_features['exercise'],
|
149 |
-
breed2_features['exercise']
|
150 |
-
)
|
151 |
-
|
152 |
-
# 美容需求相似度
|
153 |
-
grooming_similarity = self._calculate_grooming_similarity(
|
154 |
-
breed1_features['breed_name'],
|
155 |
-
breed2_features['breed_name']
|
156 |
-
)
|
157 |
-
|
158 |
-
# 其他相似度計算保持不變
|
159 |
temp1_embedding = self._get_cached_embedding(breed1_features['temperament'])
|
160 |
temp2_embedding = self._get_cached_embedding(breed2_features['temperament'])
|
161 |
temperament_similarity = float(util.pytorch_cos_sim(temp1_embedding, temp2_embedding))
|
162 |
-
|
|
|
163 |
health_score1 = self._calculate_health_score(breed1_features['breed_name'])
|
164 |
health_score2 = self._calculate_health_score(breed2_features['breed_name'])
|
165 |
health_similarity = 1.0 - abs(health_score1 - health_score2)
|
166 |
-
|
|
|
167 |
noise_similarity = self._calculate_noise_similarity(
|
168 |
breed1_features['breed_name'],
|
169 |
breed2_features['breed_name']
|
170 |
)
|
171 |
-
|
172 |
-
#
|
173 |
weights = {
|
174 |
-
'
|
175 |
-
'
|
176 |
-
'
|
177 |
-
'
|
178 |
-
'health': 0.15,
|
179 |
-
'
|
180 |
-
'noise': 0.05 # 保持不變因為不是主要考慮因素
|
181 |
}
|
182 |
-
|
183 |
final_similarity = (
|
184 |
-
size_similarity * weights['size'] +
|
185 |
-
exercise_similarity * weights['exercise'] +
|
186 |
-
grooming_similarity * weights['grooming'] +
|
187 |
-
temperament_similarity * weights['temperament'] +
|
188 |
description_similarity * weights['description'] +
|
|
|
|
|
|
|
189 |
health_similarity * weights['health'] +
|
190 |
noise_similarity * weights['noise']
|
191 |
)
|
192 |
-
|
193 |
return final_similarity
|
194 |
|
195 |
|
@@ -254,80 +190,6 @@ class SmartBreedMatcher:
|
|
254 |
'bonus_score': round(bonus_score, 4),
|
255 |
'scores': {k: round(v, 4) for k, v in scores.items()}
|
256 |
}
|
257 |
-
|
258 |
-
def _calculate_size_similarity_enhanced(self, size1: str, size2: str, description: str) -> float:
|
259 |
-
"""增強版尺寸相似度計算"""
|
260 |
-
# 更細緻的尺寸映射
|
261 |
-
size_map = {
|
262 |
-
'Tiny': 0,
|
263 |
-
'Small': 1,
|
264 |
-
'Small-Medium': 2,
|
265 |
-
'Medium': 3,
|
266 |
-
'Medium-Large': 4,
|
267 |
-
'Large': 5,
|
268 |
-
'Giant': 6
|
269 |
-
}
|
270 |
-
|
271 |
-
# 轉換尺寸到數值
|
272 |
-
value1 = size_map.get(self._normalize_size(size1), 3) # 預設為 Medium
|
273 |
-
value2 = size_map.get(self._normalize_size(size2), 3)
|
274 |
-
|
275 |
-
# 計算基礎相似度
|
276 |
-
base_similarity = 1.0 - (abs(value1 - value2) / 6.0)
|
277 |
-
|
278 |
-
# 根據用戶需求的尺寸偏好調整分數
|
279 |
-
if size2 in ['Small', 'Tiny']:
|
280 |
-
base_similarity *= 0.5 # 顯著降低小型犬的分數
|
281 |
-
elif size2 == 'Giant':
|
282 |
-
base_similarity *= 0.6 # 顯著降低巨型犬的分數
|
283 |
-
elif size2 in ['Medium', 'Medium-Large']:
|
284 |
-
base_similarity *= 1.2 # 提高中型和中大型犬的分數
|
285 |
-
|
286 |
-
# 考慮適應性
|
287 |
-
if 'apartment' in description.lower() and size2 in ['Large', 'Giant']:
|
288 |
-
base_similarity *= 0.8 # 降低大型犬在公寓的適應性分數
|
289 |
-
|
290 |
-
return min(1.0, base_similarity) # 確保不超過1.0
|
291 |
-
|
292 |
-
def _normalize_size(self, size: str) -> str:
|
293 |
-
"""標準化尺寸分類"""
|
294 |
-
size = size.lower()
|
295 |
-
if 'tiny' in size:
|
296 |
-
return 'Tiny'
|
297 |
-
elif 'small' in size:
|
298 |
-
return 'Small'
|
299 |
-
elif 'medium' in size and 'large' in size:
|
300 |
-
return 'Medium-Large'
|
301 |
-
elif 'medium' in size:
|
302 |
-
return 'Medium'
|
303 |
-
elif 'giant' in size:
|
304 |
-
return 'Giant'
|
305 |
-
elif 'large' in size:
|
306 |
-
return 'Large'
|
307 |
-
return 'Medium' # 預設
|
308 |
-
|
309 |
-
def _calculate_exercise_similarity_enhanced(self, exercise1: str, exercise2: str) -> float:
|
310 |
-
"""增強版運動需求相似度計算"""
|
311 |
-
exercise_map = {
|
312 |
-
'Low': 1,
|
313 |
-
'Moderate': 2,
|
314 |
-
'High': 3,
|
315 |
-
'Very High': 4
|
316 |
-
}
|
317 |
-
|
318 |
-
value1 = exercise_map.get(exercise1, 2)
|
319 |
-
value2 = exercise_map.get(exercise2, 2)
|
320 |
-
|
321 |
-
# 基礎相似度
|
322 |
-
base_similarity = 1.0 - abs(value1 - value2) / 3.0
|
323 |
-
|
324 |
-
# 根據用戶需求調整
|
325 |
-
if exercise2 in ['High', 'Very High']:
|
326 |
-
base_similarity *= 1.2 # 提高高運動量品種的分數
|
327 |
-
elif exercise2 == 'Low':
|
328 |
-
base_similarity *= 0.7 # 降低低運動量品種的分數
|
329 |
-
|
330 |
-
return min(1.0, base_similarity)
|
331 |
|
332 |
def _calculate_grooming_similarity(self, breed1: str, breed2: str) -> float:
|
333 |
"""計算美容需求相似度"""
|
|
|
|
|
1 |
import torch
|
2 |
import re
|
3 |
import numpy as np
|
|
|
80 |
return sorted(similarities, key=lambda x: x[1], reverse=True)[:top_n]
|
81 |
|
82 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
def _calculate_breed_similarity(self, breed1_features: Dict, breed2_features: Dict) -> float:
|
84 |
+
"""計算兩個品種之間的相似度,包含健康和噪音因素"""
|
85 |
+
# 計算描述文本的相似度
|
86 |
desc1_embedding = self._get_cached_embedding(breed1_features['description'])
|
87 |
desc2_embedding = self._get_cached_embedding(breed2_features['description'])
|
88 |
description_similarity = float(util.pytorch_cos_sim(desc1_embedding, desc2_embedding))
|
89 |
+
|
90 |
+
# 基本特徵相似度
|
91 |
+
size_similarity = 1.0 if breed1_features['size'] == breed2_features['size'] else 0.5
|
92 |
+
exercise_similarity = 1.0 if breed1_features['exercise'] == breed2_features['exercise'] else 0.5
|
93 |
+
|
94 |
+
# 性格相似度
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
temp1_embedding = self._get_cached_embedding(breed1_features['temperament'])
|
96 |
temp2_embedding = self._get_cached_embedding(breed2_features['temperament'])
|
97 |
temperament_similarity = float(util.pytorch_cos_sim(temp1_embedding, temp2_embedding))
|
98 |
+
|
99 |
+
# 健康分數相似度
|
100 |
health_score1 = self._calculate_health_score(breed1_features['breed_name'])
|
101 |
health_score2 = self._calculate_health_score(breed2_features['breed_name'])
|
102 |
health_similarity = 1.0 - abs(health_score1 - health_score2)
|
103 |
+
|
104 |
+
# 噪音水平相似度
|
105 |
noise_similarity = self._calculate_noise_similarity(
|
106 |
breed1_features['breed_name'],
|
107 |
breed2_features['breed_name']
|
108 |
)
|
109 |
+
|
110 |
+
# 加權計算
|
111 |
weights = {
|
112 |
+
'description': 0.25,
|
113 |
+
'temperament': 0.20,
|
114 |
+
'exercise': 0.2,
|
115 |
+
'size': 0.05,
|
116 |
+
'health': 0.15,
|
117 |
+
'noise': 0.15
|
|
|
118 |
}
|
119 |
+
|
120 |
final_similarity = (
|
|
|
|
|
|
|
|
|
121 |
description_similarity * weights['description'] +
|
122 |
+
temperament_similarity * weights['temperament'] +
|
123 |
+
exercise_similarity * weights['exercise'] +
|
124 |
+
size_similarity * weights['size'] +
|
125 |
health_similarity * weights['health'] +
|
126 |
noise_similarity * weights['noise']
|
127 |
)
|
128 |
+
|
129 |
return final_similarity
|
130 |
|
131 |
|
|
|
190 |
'bonus_score': round(bonus_score, 4),
|
191 |
'scores': {k: round(v, 4) for k, v in scores.items()}
|
192 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
193 |
|
194 |
def _calculate_grooming_similarity(self, breed1: str, breed2: str) -> float:
|
195 |
"""計算美容需求相似度"""
|