DawnC commited on
Commit
0c060b3
1 Parent(s): f03d5fa

Update smart_breed_matcher.py

Browse files
Files changed (1) hide show
  1. smart_breed_matcher.py +25 -163
smart_breed_matcher.py CHANGED
@@ -1,4 +1,3 @@
1
-
2
  import torch
3
  import re
4
  import numpy as np
@@ -81,115 +80,52 @@ class SmartBreedMatcher:
81
  return sorted(similarities, key=lambda x: x[1], reverse=True)[:top_n]
82
 
83
 
84
- # def _calculate_breed_similarity(self, breed1_features: Dict, breed2_features: Dict) -> float:
85
- # """計算兩個品種之間的相似度,包含健康和噪音因素"""
86
- # # 計算描述文本的相似度
87
- # desc1_embedding = self._get_cached_embedding(breed1_features['description'])
88
- # desc2_embedding = self._get_cached_embedding(breed2_features['description'])
89
- # description_similarity = float(util.pytorch_cos_sim(desc1_embedding, desc2_embedding))
90
-
91
- # # 基本特徵相似度
92
- # size_similarity = 1.0 if breed1_features['size'] == breed2_features['size'] else 0.5
93
- # exercise_similarity = 1.0 if breed1_features['exercise'] == breed2_features['exercise'] else 0.5
94
-
95
- # # 性格相似度
96
- # temp1_embedding = self._get_cached_embedding(breed1_features['temperament'])
97
- # temp2_embedding = self._get_cached_embedding(breed2_features['temperament'])
98
- # temperament_similarity = float(util.pytorch_cos_sim(temp1_embedding, temp2_embedding))
99
-
100
- # # 健康分數相似度
101
- # health_score1 = self._calculate_health_score(breed1_features['breed_name'])
102
- # health_score2 = self._calculate_health_score(breed2_features['breed_name'])
103
- # health_similarity = 1.0 - abs(health_score1 - health_score2)
104
-
105
- # # 噪音水平相似度
106
- # noise_similarity = self._calculate_noise_similarity(
107
- # breed1_features['breed_name'],
108
- # breed2_features['breed_name']
109
- # )
110
-
111
- # # 加權計算
112
- # weights = {
113
- # 'description': 0.25,
114
- # 'temperament': 0.20,
115
- # 'exercise': 0.2,
116
- # 'size': 0.05,
117
- # 'health': 0.15,
118
- # 'noise': 0.15
119
- # }
120
-
121
- # final_similarity = (
122
- # description_similarity * weights['description'] +
123
- # temperament_similarity * weights['temperament'] +
124
- # exercise_similarity * weights['exercise'] +
125
- # size_similarity * weights['size'] +
126
- # health_similarity * weights['health'] +
127
- # noise_similarity * weights['noise']
128
- # )
129
-
130
- # return final_similarity
131
-
132
  def _calculate_breed_similarity(self, breed1_features: Dict, breed2_features: Dict) -> float:
133
- """增強版品種相似度計算"""
134
- # 基礎相似度計算
135
  desc1_embedding = self._get_cached_embedding(breed1_features['description'])
136
  desc2_embedding = self._get_cached_embedding(breed2_features['description'])
137
  description_similarity = float(util.pytorch_cos_sim(desc1_embedding, desc2_embedding))
138
-
139
- # 尺寸相似度(加強版)
140
- size_similarity = self._calculate_size_similarity_enhanced(
141
- breed1_features['size'],
142
- breed2_features['size'],
143
- breed2_features['description'] # 加入描述以判斷適應性
144
- )
145
-
146
- # 運動需求相似度(加強版)
147
- exercise_similarity = self._calculate_exercise_similarity_enhanced(
148
- breed1_features['exercise'],
149
- breed2_features['exercise']
150
- )
151
-
152
- # 美容需求相似度
153
- grooming_similarity = self._calculate_grooming_similarity(
154
- breed1_features['breed_name'],
155
- breed2_features['breed_name']
156
- )
157
-
158
- # 其他相似度計算保持不變
159
  temp1_embedding = self._get_cached_embedding(breed1_features['temperament'])
160
  temp2_embedding = self._get_cached_embedding(breed2_features['temperament'])
161
  temperament_similarity = float(util.pytorch_cos_sim(temp1_embedding, temp2_embedding))
162
-
 
163
  health_score1 = self._calculate_health_score(breed1_features['breed_name'])
164
  health_score2 = self._calculate_health_score(breed2_features['breed_name'])
165
  health_similarity = 1.0 - abs(health_score1 - health_score2)
166
-
 
167
  noise_similarity = self._calculate_noise_similarity(
168
  breed1_features['breed_name'],
169
  breed2_features['breed_name']
170
  )
171
-
172
- # 調整權重分配
173
  weights = {
174
- 'size': 0.20, # 仍然重要但不過分主導
175
- 'exercise': 0.20, # 保持高權重因為這是主要需求
176
- 'temperament': 0.15, # 保持不變因為性格很重要
177
- 'grooming': 0.15, # 保持不變
178
- 'health': 0.15, # 提高一點因為這影響長期生活
179
- 'description': 0.10, # 保持不變
180
- 'noise': 0.05 # 保持不變因為不是主要考慮因素
181
  }
182
-
183
  final_similarity = (
184
- size_similarity * weights['size'] +
185
- exercise_similarity * weights['exercise'] +
186
- grooming_similarity * weights['grooming'] +
187
- temperament_similarity * weights['temperament'] +
188
  description_similarity * weights['description'] +
 
 
 
189
  health_similarity * weights['health'] +
190
  noise_similarity * weights['noise']
191
  )
192
-
193
  return final_similarity
194
 
195
 
@@ -254,80 +190,6 @@ class SmartBreedMatcher:
254
  'bonus_score': round(bonus_score, 4),
255
  'scores': {k: round(v, 4) for k, v in scores.items()}
256
  }
257
-
258
- def _calculate_size_similarity_enhanced(self, size1: str, size2: str, description: str) -> float:
259
- """增強版尺寸相似度計算"""
260
- # 更細緻的尺寸映射
261
- size_map = {
262
- 'Tiny': 0,
263
- 'Small': 1,
264
- 'Small-Medium': 2,
265
- 'Medium': 3,
266
- 'Medium-Large': 4,
267
- 'Large': 5,
268
- 'Giant': 6
269
- }
270
-
271
- # 轉換尺寸到數值
272
- value1 = size_map.get(self._normalize_size(size1), 3) # 預設為 Medium
273
- value2 = size_map.get(self._normalize_size(size2), 3)
274
-
275
- # 計算基礎相似度
276
- base_similarity = 1.0 - (abs(value1 - value2) / 6.0)
277
-
278
- # 根據用戶需求的尺寸偏好調整分數
279
- if size2 in ['Small', 'Tiny']:
280
- base_similarity *= 0.5 # 顯著降低小型犬的分數
281
- elif size2 == 'Giant':
282
- base_similarity *= 0.6 # 顯著降低巨型犬的分數
283
- elif size2 in ['Medium', 'Medium-Large']:
284
- base_similarity *= 1.2 # 提高中型和中大型犬的分數
285
-
286
- # 考慮適應性
287
- if 'apartment' in description.lower() and size2 in ['Large', 'Giant']:
288
- base_similarity *= 0.8 # 降低大型犬在公寓的適應性分數
289
-
290
- return min(1.0, base_similarity) # 確保不超過1.0
291
-
292
- def _normalize_size(self, size: str) -> str:
293
- """標準化尺寸分類"""
294
- size = size.lower()
295
- if 'tiny' in size:
296
- return 'Tiny'
297
- elif 'small' in size:
298
- return 'Small'
299
- elif 'medium' in size and 'large' in size:
300
- return 'Medium-Large'
301
- elif 'medium' in size:
302
- return 'Medium'
303
- elif 'giant' in size:
304
- return 'Giant'
305
- elif 'large' in size:
306
- return 'Large'
307
- return 'Medium' # 預設
308
-
309
- def _calculate_exercise_similarity_enhanced(self, exercise1: str, exercise2: str) -> float:
310
- """增強版運動需求相似度計算"""
311
- exercise_map = {
312
- 'Low': 1,
313
- 'Moderate': 2,
314
- 'High': 3,
315
- 'Very High': 4
316
- }
317
-
318
- value1 = exercise_map.get(exercise1, 2)
319
- value2 = exercise_map.get(exercise2, 2)
320
-
321
- # 基礎相似度
322
- base_similarity = 1.0 - abs(value1 - value2) / 3.0
323
-
324
- # 根據用戶需求調整
325
- if exercise2 in ['High', 'Very High']:
326
- base_similarity *= 1.2 # 提高高運動量品種的分數
327
- elif exercise2 == 'Low':
328
- base_similarity *= 0.7 # 降低低運動量品種的分數
329
-
330
- return min(1.0, base_similarity)
331
 
332
  def _calculate_grooming_similarity(self, breed1: str, breed2: str) -> float:
333
  """計算美容需求相似度"""
 
 
1
  import torch
2
  import re
3
  import numpy as np
 
80
  return sorted(similarities, key=lambda x: x[1], reverse=True)[:top_n]
81
 
82
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  def _calculate_breed_similarity(self, breed1_features: Dict, breed2_features: Dict) -> float:
84
+ """計算兩個品種之間的相似度,包含健康和噪音因素"""
85
+ # 計算描述文本的相似度
86
  desc1_embedding = self._get_cached_embedding(breed1_features['description'])
87
  desc2_embedding = self._get_cached_embedding(breed2_features['description'])
88
  description_similarity = float(util.pytorch_cos_sim(desc1_embedding, desc2_embedding))
89
+
90
+ # 基本特徵相似度
91
+ size_similarity = 1.0 if breed1_features['size'] == breed2_features['size'] else 0.5
92
+ exercise_similarity = 1.0 if breed1_features['exercise'] == breed2_features['exercise'] else 0.5
93
+
94
+ # 性格相似度
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  temp1_embedding = self._get_cached_embedding(breed1_features['temperament'])
96
  temp2_embedding = self._get_cached_embedding(breed2_features['temperament'])
97
  temperament_similarity = float(util.pytorch_cos_sim(temp1_embedding, temp2_embedding))
98
+
99
+ # 健康分數相似度
100
  health_score1 = self._calculate_health_score(breed1_features['breed_name'])
101
  health_score2 = self._calculate_health_score(breed2_features['breed_name'])
102
  health_similarity = 1.0 - abs(health_score1 - health_score2)
103
+
104
+ # 噪音水平相似度
105
  noise_similarity = self._calculate_noise_similarity(
106
  breed1_features['breed_name'],
107
  breed2_features['breed_name']
108
  )
109
+
110
+ # 加權計算
111
  weights = {
112
+ 'description': 0.25,
113
+ 'temperament': 0.20,
114
+ 'exercise': 0.2,
115
+ 'size': 0.05,
116
+ 'health': 0.15,
117
+ 'noise': 0.15
 
118
  }
119
+
120
  final_similarity = (
 
 
 
 
121
  description_similarity * weights['description'] +
122
+ temperament_similarity * weights['temperament'] +
123
+ exercise_similarity * weights['exercise'] +
124
+ size_similarity * weights['size'] +
125
  health_similarity * weights['health'] +
126
  noise_similarity * weights['noise']
127
  )
128
+
129
  return final_similarity
130
 
131
 
 
190
  'bonus_score': round(bonus_score, 4),
191
  'scores': {k: round(v, 4) for k, v in scores.items()}
192
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193
 
194
  def _calculate_grooming_similarity(self, breed1: str, breed2: str) -> float:
195
  """計算美容需求相似度"""