Spaces:
Running
on
Zero
Running
on
Zero
Update smart_breed_matcher.py
Browse files- smart_breed_matcher.py +129 -107
smart_breed_matcher.py
CHANGED
@@ -130,51 +130,66 @@ class SmartBreedMatcher:
|
|
130 |
# return final_similarity
|
131 |
|
132 |
def _calculate_breed_similarity(self, breed1_features: Dict, breed2_features: Dict) -> float:
|
133 |
-
"""
|
134 |
-
#
|
135 |
desc1_embedding = self._get_cached_embedding(breed1_features['description'])
|
136 |
desc2_embedding = self._get_cached_embedding(breed2_features['description'])
|
137 |
description_similarity = float(util.pytorch_cos_sim(desc1_embedding, desc2_embedding))
|
138 |
-
|
139 |
-
#
|
140 |
-
size_similarity = self.
|
141 |
-
breed1_features['size'],
|
142 |
breed2_features['size'],
|
143 |
-
|
144 |
)
|
145 |
-
|
146 |
-
#
|
147 |
-
exercise_similarity = self.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
148 |
temp1_embedding = self._get_cached_embedding(breed1_features['temperament'])
|
149 |
temp2_embedding = self._get_cached_embedding(breed2_features['temperament'])
|
150 |
temperament_similarity = float(util.pytorch_cos_sim(temp1_embedding, temp2_embedding))
|
|
|
151 |
health_score1 = self._calculate_health_score(breed1_features['breed_name'])
|
152 |
health_score2 = self._calculate_health_score(breed2_features['breed_name'])
|
153 |
health_similarity = 1.0 - abs(health_score1 - health_score2)
|
|
|
154 |
noise_similarity = self._calculate_noise_similarity(
|
155 |
breed1_features['breed_name'],
|
156 |
breed2_features['breed_name']
|
157 |
)
|
158 |
|
159 |
-
#
|
160 |
weights = {
|
161 |
-
'
|
162 |
-
'
|
163 |
-
'
|
164 |
-
'
|
165 |
-
'health': 0.
|
166 |
-
'
|
|
|
167 |
}
|
168 |
-
|
169 |
final_similarity = (
|
170 |
-
description_similarity * weights['description'] +
|
171 |
-
temperament_similarity * weights['temperament'] +
|
172 |
-
exercise_similarity * weights['exercise'] +
|
173 |
size_similarity * weights['size'] +
|
|
|
|
|
|
|
|
|
174 |
health_similarity * weights['health'] +
|
175 |
noise_similarity * weights['noise']
|
176 |
)
|
177 |
-
|
178 |
return final_similarity
|
179 |
|
180 |
|
@@ -240,104 +255,111 @@ class SmartBreedMatcher:
|
|
240 |
'scores': {k: round(v, 4) for k, v in scores.items()}
|
241 |
}
|
242 |
|
243 |
-
def
|
244 |
-
"""
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
'
|
250 |
-
'
|
251 |
-
'
|
252 |
-
'
|
253 |
-
'
|
254 |
}
|
255 |
|
256 |
-
#
|
257 |
-
|
258 |
-
|
259 |
-
'large': ['not too large', 'not too big', 'not large'],
|
260 |
-
'giant': ['not giant', 'not huge']
|
261 |
-
}
|
262 |
|
263 |
-
#
|
264 |
-
|
265 |
-
preferred_max = 3 # large
|
266 |
|
267 |
-
#
|
268 |
-
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
preferred_min, preferred_max = 2, 2
|
275 |
-
elif size == 'medium-large':
|
276 |
-
preferred_min, preferred_max = 2, 3
|
277 |
-
elif size == 'large':
|
278 |
-
preferred_min, preferred_max = 3, 3
|
279 |
-
elif size == 'giant':
|
280 |
-
preferred_min, preferred_max = 3, 4
|
281 |
|
282 |
-
#
|
283 |
-
|
284 |
-
|
285 |
-
if keyword in description:
|
286 |
-
if size == 'small':
|
287 |
-
preferred_min = max(2, preferred_min)
|
288 |
-
elif size == 'large':
|
289 |
-
preferred_max = min(2, preferred_max)
|
290 |
-
elif size == 'giant':
|
291 |
-
preferred_max = min(3, preferred_max)
|
292 |
|
293 |
-
return (
|
294 |
|
295 |
-
def
|
296 |
-
"""
|
297 |
-
|
298 |
-
|
299 |
-
'Tiny'
|
300 |
-
|
301 |
-
'Small
|
302 |
-
|
303 |
-
'Medium-Large'
|
304 |
-
|
305 |
-
'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
306 |
}
|
307 |
|
308 |
-
|
309 |
-
|
310 |
-
value2 = size_map.get(size2, 2)
|
311 |
|
312 |
-
#
|
313 |
-
base_similarity = 1.0 -
|
314 |
|
315 |
-
#
|
316 |
-
if
|
317 |
-
|
318 |
-
|
319 |
-
#
|
320 |
-
in_range = (preferred_min <= value2 <= preferred_max)
|
321 |
-
|
322 |
-
# 如果不在範圍內,根據距離降低分數
|
323 |
-
if not in_range:
|
324 |
-
distance_to_range = min(
|
325 |
-
abs(value2 - preferred_min),
|
326 |
-
abs(value2 - preferred_max)
|
327 |
-
)
|
328 |
-
penalty = distance_to_range * 0.2 # 每單位差異降低20%
|
329 |
-
base_similarity *= (1 - penalty)
|
330 |
|
331 |
-
return
|
332 |
-
|
333 |
-
def _calculate_exercise_similarity(self, exercise1: str, exercise2: str) -> float:
|
334 |
-
exercise_map = {'Low': 1, 'Moderate': 2, 'High': 3, 'Very High': 4}
|
335 |
-
value1 = exercise_map.get(exercise1, 2) # 預設為 'Moderate'
|
336 |
-
value2 = exercise_map.get(exercise2, 2) # 預設為 'Moderate'
|
337 |
|
338 |
-
|
339 |
-
|
340 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
341 |
|
342 |
def _calculate_health_score(self, breed_name: str) -> float:
|
343 |
"""計算品種的健康分數"""
|
|
|
130 |
# return final_similarity
|
131 |
|
132 |
def _calculate_breed_similarity(self, breed1_features: Dict, breed2_features: Dict) -> float:
|
133 |
+
"""增強版品種相似度計算"""
|
134 |
+
# 基礎相似度計算
|
135 |
desc1_embedding = self._get_cached_embedding(breed1_features['description'])
|
136 |
desc2_embedding = self._get_cached_embedding(breed2_features['description'])
|
137 |
description_similarity = float(util.pytorch_cos_sim(desc1_embedding, desc2_embedding))
|
138 |
+
|
139 |
+
# 尺寸相似度(加強版)
|
140 |
+
size_similarity = self._calculate_size_similarity_enhanced(
|
141 |
+
breed1_features['size'],
|
142 |
breed2_features['size'],
|
143 |
+
breed2_features['description'] # 加入描述以判斷適應性
|
144 |
)
|
145 |
+
|
146 |
+
# 運動需求相似度(加強版)
|
147 |
+
exercise_similarity = self._calculate_exercise_similarity_enhanced(
|
148 |
+
breed1_features['exercise'],
|
149 |
+
breed2_features['exercise']
|
150 |
+
)
|
151 |
+
|
152 |
+
# 美容需求相似度
|
153 |
+
grooming_similarity = self._calculate_grooming_similarity(
|
154 |
+
breed1_features['breed_name'],
|
155 |
+
breed2_features['breed_name']
|
156 |
+
)
|
157 |
+
|
158 |
+
# 其他相似度計算保持不變
|
159 |
temp1_embedding = self._get_cached_embedding(breed1_features['temperament'])
|
160 |
temp2_embedding = self._get_cached_embedding(breed2_features['temperament'])
|
161 |
temperament_similarity = float(util.pytorch_cos_sim(temp1_embedding, temp2_embedding))
|
162 |
+
|
163 |
health_score1 = self._calculate_health_score(breed1_features['breed_name'])
|
164 |
health_score2 = self._calculate_health_score(breed2_features['breed_name'])
|
165 |
health_similarity = 1.0 - abs(health_score1 - health_score2)
|
166 |
+
|
167 |
noise_similarity = self._calculate_noise_similarity(
|
168 |
breed1_features['breed_name'],
|
169 |
breed2_features['breed_name']
|
170 |
)
|
171 |
|
172 |
+
# 調整權重分配
|
173 |
weights = {
|
174 |
+
'size': 0.20, # 仍然重要但不過分主導
|
175 |
+
'exercise': 0.20, # 保持高權重因為這是主要需求
|
176 |
+
'temperament': 0.15, # 保持不變因為性格很重要
|
177 |
+
'grooming': 0.15, # 保持不變
|
178 |
+
'health': 0.15, # 提高一點因為這影響長期生活
|
179 |
+
'description': 0.10, # 保持不變
|
180 |
+
'noise': 0.05 # 保持不變因為不是主要考慮因素
|
181 |
}
|
182 |
+
|
183 |
final_similarity = (
|
|
|
|
|
|
|
184 |
size_similarity * weights['size'] +
|
185 |
+
exercise_similarity * weights['exercise'] +
|
186 |
+
grooming_similarity * weights['grooming'] +
|
187 |
+
temperament_similarity * weights['temperament'] +
|
188 |
+
description_similarity * weights['description'] +
|
189 |
health_similarity * weights['health'] +
|
190 |
noise_similarity * weights['noise']
|
191 |
)
|
192 |
+
|
193 |
return final_similarity
|
194 |
|
195 |
|
|
|
255 |
'scores': {k: round(v, 4) for k, v in scores.items()}
|
256 |
}
|
257 |
|
258 |
+
def _calculate_size_similarity_enhanced(self, size1: str, size2: str, description: str) -> float:
|
259 |
+
"""增強版尺寸相似度計算"""
|
260 |
+
# 更細緻的尺寸映射
|
261 |
+
size_map = {
|
262 |
+
'Tiny': 0,
|
263 |
+
'Small': 1,
|
264 |
+
'Small-Medium': 2,
|
265 |
+
'Medium': 3,
|
266 |
+
'Medium-Large': 4,
|
267 |
+
'Large': 5,
|
268 |
+
'Giant': 6
|
269 |
}
|
270 |
|
271 |
+
# 轉換尺寸到數值
|
272 |
+
value1 = size_map.get(self._normalize_size(size1), 3) # 預設為 Medium
|
273 |
+
value2 = size_map.get(self._normalize_size(size2), 3)
|
|
|
|
|
|
|
274 |
|
275 |
+
# 計算基礎相似度
|
276 |
+
base_similarity = 1.0 - (abs(value1 - value2) / 6.0)
|
|
|
277 |
|
278 |
+
# 根據用戶需求的尺寸偏好調整分數
|
279 |
+
if size2 in ['Small', 'Tiny']:
|
280 |
+
base_similarity *= 0.5 # 顯著降低小型犬的分數
|
281 |
+
elif size2 == 'Giant':
|
282 |
+
base_similarity *= 0.6 # 顯著降低巨型犬的分數
|
283 |
+
elif size2 in ['Medium', 'Medium-Large']:
|
284 |
+
base_similarity *= 1.2 # 提高中型和中大型犬的分數
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
285 |
|
286 |
+
# 考慮適應性
|
287 |
+
if 'apartment' in description.lower() and size2 in ['Large', 'Giant']:
|
288 |
+
base_similarity *= 0.8 # 降低大型犬在公寓的適應性分數
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
289 |
|
290 |
+
return min(1.0, base_similarity) # 確保不超過1.0
|
291 |
|
292 |
+
def _normalize_size(self, size: str) -> str:
|
293 |
+
"""標準化尺寸分類"""
|
294 |
+
size = size.lower()
|
295 |
+
if 'tiny' in size:
|
296 |
+
return 'Tiny'
|
297 |
+
elif 'small' in size:
|
298 |
+
return 'Small'
|
299 |
+
elif 'medium' in size and 'large' in size:
|
300 |
+
return 'Medium-Large'
|
301 |
+
elif 'medium' in size:
|
302 |
+
return 'Medium'
|
303 |
+
elif 'giant' in size:
|
304 |
+
return 'Giant'
|
305 |
+
elif 'large' in size:
|
306 |
+
return 'Large'
|
307 |
+
return 'Medium' # 預設
|
308 |
+
|
309 |
+
def _calculate_exercise_similarity_enhanced(self, exercise1: str, exercise2: str) -> float:
|
310 |
+
"""增強版運動需求相似度計算"""
|
311 |
+
exercise_map = {
|
312 |
+
'Low': 1,
|
313 |
+
'Moderate': 2,
|
314 |
+
'High': 3,
|
315 |
+
'Very High': 4
|
316 |
}
|
317 |
|
318 |
+
value1 = exercise_map.get(exercise1, 2)
|
319 |
+
value2 = exercise_map.get(exercise2, 2)
|
|
|
320 |
|
321 |
+
# 基礎相似度
|
322 |
+
base_similarity = 1.0 - abs(value1 - value2) / 3.0
|
323 |
|
324 |
+
# 根據用戶需求調整
|
325 |
+
if exercise2 in ['High', 'Very High']:
|
326 |
+
base_similarity *= 1.2 # 提高高運動量品種的分數
|
327 |
+
elif exercise2 == 'Low':
|
328 |
+
base_similarity *= 0.7 # 降低低運動量品種的分數
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
329 |
|
330 |
+
return min(1.0, base_similarity)
|
|
|
|
|
|
|
|
|
|
|
331 |
|
332 |
+
def _calculate_grooming_similarity(self, breed1: str, breed2: str) -> float:
|
333 |
+
"""計算美容需求相似度"""
|
334 |
+
grooming_map = {
|
335 |
+
'Low': 1,
|
336 |
+
'Moderate': 2,
|
337 |
+
'High': 3
|
338 |
+
}
|
339 |
+
|
340 |
+
# 從dog_data中獲取美容需求
|
341 |
+
breed1_info = next((dog for dog in self.dog_data if dog[1] == breed1), None)
|
342 |
+
breed2_info = next((dog for dog in self.dog_data if dog[1] == breed2), None)
|
343 |
+
|
344 |
+
if not breed1_info or not breed2_info:
|
345 |
+
return 0.5 # 默認中等相似度
|
346 |
+
|
347 |
+
grooming1 = breed1_info[8] # Grooming_Needs index
|
348 |
+
grooming2 = breed2_info[8]
|
349 |
+
|
350 |
+
value1 = grooming_map.get(grooming1, 2)
|
351 |
+
value2 = grooming_map.get(grooming2, 2)
|
352 |
+
|
353 |
+
# 基礎相似度
|
354 |
+
base_similarity = 1.0 - abs(value1 - value2) / 2.0
|
355 |
+
|
356 |
+
# 根據用戶需求調整
|
357 |
+
if grooming2 == 'Moderate':
|
358 |
+
base_similarity *= 1.1 # 稍微提高中等美容需求的分數
|
359 |
+
elif grooming2 == 'High':
|
360 |
+
base_similarity *= 0.9 # 稍微降低高美容需求的分數
|
361 |
+
|
362 |
+
return min(1.0, base_similarity)
|
363 |
|
364 |
def _calculate_health_score(self, breed_name: str) -> float:
|
365 |
"""計算品種的健康分數"""
|