DawnC commited on
Commit
edb9086
1 Parent(s): 303f94e

Update smart_breed_matcher.py

Browse files
Files changed (1) hide show
  1. smart_breed_matcher.py +102 -21
smart_breed_matcher.py CHANGED
@@ -136,34 +136,34 @@ class SmartBreedMatcher:
136
  desc2_embedding = self._get_cached_embedding(breed2_features['description'])
137
  description_similarity = float(util.pytorch_cos_sim(desc1_embedding, desc2_embedding))
138
 
139
- # 使用新的精細計算方法
140
- size_similarity = self._calculate_size_similarity(breed1_features['size'], breed2_features['size'])
141
- exercise_similarity = self._calculate_exercise_similarity(breed1_features['exercise'], breed2_features['exercise'])
 
 
 
142
 
143
- # 性格相似度
 
144
  temp1_embedding = self._get_cached_embedding(breed1_features['temperament'])
145
  temp2_embedding = self._get_cached_embedding(breed2_features['temperament'])
146
  temperament_similarity = float(util.pytorch_cos_sim(temp1_embedding, temp2_embedding))
147
-
148
- # 健康分數相似度
149
  health_score1 = self._calculate_health_score(breed1_features['breed_name'])
150
  health_score2 = self._calculate_health_score(breed2_features['breed_name'])
151
  health_similarity = 1.0 - abs(health_score1 - health_score2)
152
-
153
- # 噪音水平相似度
154
  noise_similarity = self._calculate_noise_similarity(
155
  breed1_features['breed_name'],
156
  breed2_features['breed_name']
157
  )
158
 
159
- # 加權計算
160
  weights = {
161
- 'description': 0.25,
162
  'temperament': 0.20,
163
- 'exercise': 0.2,
164
- 'size': 0.05,
165
- 'health': 0.15,
166
- 'noise': 0.15
167
  }
168
 
169
  final_similarity = (
@@ -240,14 +240,95 @@ class SmartBreedMatcher:
240
  'scores': {k: round(v, 4) for k, v in scores.items()}
241
  }
242
 
243
- def _calculate_size_similarity(self, size1: str, size2: str) -> float:
244
- size_map = {'Small': 1, 'Medium': 2, 'Large': 3, 'Giant': 4}
245
- value1 = size_map.get(size1, 2) # 預設為 'Medium'
246
- value2 = size_map.get(size2, 2) # 預設為 'Medium'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
247
 
248
- # 計算相似度
249
- size_similarity = 1.0 - abs(value1 - value2) / 3
250
- return max(0.0, size_similarity) # 確保相似度在 [0, 1] 範圍內
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
251
 
252
  def _calculate_exercise_similarity(self, exercise1: str, exercise2: str) -> float:
253
  exercise_map = {'Low': 1, 'Moderate': 2, 'High': 3, 'Very High': 4}
 
136
  desc2_embedding = self._get_cached_embedding(breed2_features['description'])
137
  description_similarity = float(util.pytorch_cos_sim(desc1_embedding, desc2_embedding))
138
 
139
+ # 使用改進後的尺寸相似度計算
140
+ size_similarity = self._calculate_size_similarity(
141
+ breed1_features['size'],
142
+ breed2_features['size'],
143
+ self._get_preferred_size_range(breed1_features['description'])
144
+ )
145
 
146
+ # 其他相似度計算
147
+ exercise_similarity = self._calculate_exercise_similarity(breed1_features['exercise'], breed2_features['exercise'])
148
  temp1_embedding = self._get_cached_embedding(breed1_features['temperament'])
149
  temp2_embedding = self._get_cached_embedding(breed2_features['temperament'])
150
  temperament_similarity = float(util.pytorch_cos_sim(temp1_embedding, temp2_embedding))
 
 
151
  health_score1 = self._calculate_health_score(breed1_features['breed_name'])
152
  health_score2 = self._calculate_health_score(breed2_features['breed_name'])
153
  health_similarity = 1.0 - abs(health_score1 - health_score2)
 
 
154
  noise_similarity = self._calculate_noise_similarity(
155
  breed1_features['breed_name'],
156
  breed2_features['breed_name']
157
  )
158
 
159
+ # 調整權重,增加尺寸的重要性
160
  weights = {
161
+ 'description': 0.20, # 降低描述權重
162
  'temperament': 0.20,
163
+ 'exercise': 0.20,
164
+ 'size': 0.20, # 顯著提高尺寸權重
165
+ 'health': 0.10, # 略微降低
166
+ 'noise': 0.10 # 略微降低
167
  }
168
 
169
  final_similarity = (
 
240
  'scores': {k: round(v, 4) for k, v in scores.items()}
241
  }
242
 
243
+ def _get_preferred_size_range(self, description: str) -> tuple:
244
+ """分析描述文本,確定用戶偏好的尺寸範圍"""
245
+ description = description.lower()
246
+
247
+ # 定義關鍵詞匹配
248
+ size_indicators = {
249
+ 'small': ['small', 'tiny', 'little'],
250
+ 'medium': ['medium', 'medium-sized', 'moderate size'],
251
+ 'medium-large': ['medium to large', 'slightly larger', 'medium-large'],
252
+ 'large': ['large', 'big'],
253
+ 'giant': ['giant', 'huge', 'very large']
254
+ }
255
+
256
+ # 檢測負面提及
257
+ negative_indicators = {
258
+ 'small': ['not too small', 'not small'],
259
+ 'large': ['not too large', 'not too big', 'not large'],
260
+ 'giant': ['not giant', 'not huge']
261
+ }
262
+
263
+ # 默認為中型
264
+ preferred_min = 2 # medium
265
+ preferred_max = 3 # large
266
+
267
+ # 分析描述中的尺寸偏好
268
+ for size, keywords in size_indicators.items():
269
+ for keyword in keywords:
270
+ if keyword in description:
271
+ if size == 'small':
272
+ preferred_min, preferred_max = 1, 2
273
+ elif size == 'medium':
274
+ preferred_min, preferred_max = 2, 2
275
+ elif size == 'medium-large':
276
+ preferred_min, preferred_max = 2, 3
277
+ elif size == 'large':
278
+ preferred_min, preferred_max = 3, 3
279
+ elif size == 'giant':
280
+ preferred_min, preferred_max = 3, 4
281
+
282
+ # 檢查負面提及並調整
283
+ for size, keywords in negative_indicators.items():
284
+ for keyword in keywords:
285
+ if keyword in description:
286
+ if size == 'small':
287
+ preferred_min = max(2, preferred_min)
288
+ elif size == 'large':
289
+ preferred_max = min(2, preferred_max)
290
+ elif size == 'giant':
291
+ preferred_max = min(3, preferred_max)
292
+
293
+ return (preferred_min, preferred_max)
294
 
295
+ def _calculate_size_similarity(self, size1: str, size2: str, preferred_range: tuple = None) -> float:
296
+ """改進的尺寸相似度計算"""
297
+ # 更細緻的尺寸映射
298
+ size_map = {
299
+ 'Tiny': 0.5,
300
+ 'Small': 1,
301
+ 'Small-Medium': 1.5,
302
+ 'Medium': 2,
303
+ 'Medium-Large': 2.5,
304
+ 'Large': 3,
305
+ 'Giant': 4
306
+ }
307
+
308
+ # 獲取數值
309
+ value1 = size_map.get(size1, 2)
310
+ value2 = size_map.get(size2, 2)
311
+
312
+ # 基礎相似度計算
313
+ base_similarity = 1.0 - (abs(value1 - value2) / 3.5) # 3.5 是最大可能差異
314
+
315
+ # 如果有偏好範圍,進行額外調整
316
+ if preferred_range:
317
+ preferred_min, preferred_max = preferred_range
318
+
319
+ # 檢查是否在偏好範圍內
320
+ in_range = (preferred_min <= value2 <= preferred_max)
321
+
322
+ # 如果不在範圍內,根據距離降低分數
323
+ if not in_range:
324
+ distance_to_range = min(
325
+ abs(value2 - preferred_min),
326
+ abs(value2 - preferred_max)
327
+ )
328
+ penalty = distance_to_range * 0.2 # 每單位差異降低20%
329
+ base_similarity *= (1 - penalty)
330
+
331
+ return max(0.0, min(1.0, base_similarity)) # 確保在 [0, 1] 範圍內
332
 
333
  def _calculate_exercise_similarity(self, exercise1: str, exercise2: str) -> float:
334
  exercise_map = {'Low': 1, 'Moderate': 2, 'High': 3, 'Very High': 4}