DawnC commited on
Commit
b0f8388
1 Parent(s): 6ab66a3

Delete smart_breed_matcher.py

Browse files
Files changed (1) hide show
  1. smart_breed_matcher.py +0 -1001
smart_breed_matcher.py DELETED
@@ -1,1001 +0,0 @@
1
- import torch
2
- import re
3
- import numpy as np
4
- import spaces
5
- from typing import List, Dict, Tuple, Optional
6
- from dataclasses import dataclass
7
- from breed_health_info import breed_health_info
8
- from breed_noise_info import breed_noise_info
9
- from dog_database import dog_data
10
- from scoring_calculation_system import UserPreferences
11
- from sentence_transformers import SentenceTransformer, util
12
- from functools import wraps
13
-
14
- def gpu_init_wrapper(func):
15
- @spaces.GPU
16
- @wraps(func)
17
- def wrapper(*args, **kwargs):
18
- return func(*args, **kwargs)
19
- return wrapper
20
-
21
- def safe_prediction(func):
22
- """錯誤處理裝飾器,提供 GPU 到 CPU 的降級機制"""
23
- @wraps(func)
24
- def wrapper(*args, **kwargs):
25
- try:
26
- return func(*args, **kwargs)
27
- except RuntimeError as e:
28
- if "CUDA" in str(e):
29
- print("GPU 操作失敗,嘗試使用 CPU")
30
- return func(*args, **kwargs)
31
- raise
32
- return wrapper
33
-
34
- class SmartBreedMatcher:
35
-
36
- def __init__(self, dog_data: List[Tuple]):
37
- self.dog_data = dog_data
38
- self.model = None
39
- self._embedding_cache = {}
40
- self._clear_cache()
41
-
42
- def _initialize_model(self):
43
- """延遲初始化模型,只在需要時才創建"""
44
- if self.model is None:
45
- self.model = SentenceTransformer('all-mpnet-base-v2')
46
-
47
- def _clear_cache(self):
48
- self._embedding_cache = {}
49
-
50
-
51
- @spaces.GPU
52
- def _get_cached_embedding(self, text: str) -> torch.Tensor:
53
- """使用 GPU 裝飾器確保在正確的時機初始化 CUDA"""
54
- if self.model is None:
55
- self._initialize_model()
56
-
57
- if text not in self._embedding_cache:
58
- self._embedding_cache[text] = self.model.encode(text)
59
- return self._embedding_cache[text]
60
-
61
- def _categorize_breeds(self) -> Dict:
62
- """自動將狗品種分類"""
63
- categories = {
64
- 'working_dogs': [],
65
- 'herding_dogs': [],
66
- 'hunting_dogs': [],
67
- 'companion_dogs': [],
68
- 'guard_dogs': []
69
- }
70
-
71
- for breed_info in self.dog_data:
72
- description = breed_info[9].lower()
73
- temperament = breed_info[4].lower()
74
-
75
- # 根據描述和性格特徵自動分類
76
- if any(word in description for word in ['herding', 'shepherd', 'cattle', 'flock']):
77
- categories['herding_dogs'].append(breed_info[1])
78
- elif any(word in description for word in ['hunting', 'hunt', 'retriever', 'pointer']):
79
- categories['hunting_dogs'].append(breed_info[1])
80
- elif any(word in description for word in ['companion', 'toy', 'family', 'lap']):
81
- categories['companion_dogs'].append(breed_info[1])
82
- elif any(word in description for word in ['guard', 'protection', 'watchdog']):
83
- categories['guard_dogs'].append(breed_info[1])
84
- elif any(word in description for word in ['working', 'draft', 'cart']):
85
- categories['working_dogs'].append(breed_info[1])
86
-
87
- return categories
88
-
89
- def find_similar_breeds(self, breed_name: str, top_n: int = 5) -> List[Tuple[str, float]]:
90
- """
91
- 找出與指定品種最相似的其他品種
92
-
93
- Args:
94
- breed_name: 目標品種名稱
95
- top_n: 返回的相似品種數量
96
-
97
- Returns:
98
- List[Tuple[str, float]]: 相似品種列表,包含品種名稱和相似度分數
99
- """
100
- try:
101
- if self.model is None:
102
- self._initialize_model()
103
- target_breed = next((breed for breed in self.dog_data if breed[1] == breed_name), None)
104
- if not target_breed:
105
- return []
106
-
107
- # 獲取完整的目標品種特徵
108
- target_features = {
109
- 'breed_name': target_breed[1],
110
- 'size': target_breed[2],
111
- 'temperament': target_breed[4],
112
- 'exercise': target_breed[7],
113
- 'grooming': target_breed[8],
114
- 'description': target_breed[9],
115
- 'good_with_children': target_breed[6] # 添加這個特徵
116
- }
117
-
118
- similarities = []
119
- for breed in self.dog_data:
120
- if breed[1] != breed_name:
121
- breed_features = {
122
- 'breed_name': breed[1],
123
- 'size': breed[2],
124
- 'temperament': breed[4],
125
- 'exercise': breed[7],
126
- 'grooming': breed[8],
127
- 'description': breed[9],
128
- 'good_with_children': breed[6] # 添加這個特徵
129
- }
130
-
131
- try:
132
- similarity_score = self._calculate_breed_similarity(target_features, breed_features)
133
- # 確保分數在有效範圍內
134
- similarity_score = min(1.0, max(0.0, similarity_score))
135
- similarities.append((breed[1], similarity_score))
136
- except Exception as e:
137
- print(f"Error calculating similarity for {breed[1]}: {str(e)}")
138
- continue
139
-
140
- # 根據相似度排序並返回前N個
141
- return sorted(similarities, key=lambda x: x[1], reverse=True)[:top_n]
142
-
143
- except Exception as e:
144
- print(f"Error in find_similar_breeds: {str(e)}")
145
- return []
146
-
147
-
148
- def _calculate_breed_similarity(self, breed1_features: Dict, breed2_features: Dict, weights: Dict[str, float]) -> float:
149
- try:
150
- # 1. 基礎相似度計算
151
- size_similarity = self._calculate_size_similarity_enhanced(
152
- breed1_features.get('size', 'Medium'),
153
- breed2_features.get('size', 'Medium'),
154
- breed2_features.get('description', '')
155
- )
156
-
157
- exercise_similarity = self._calculate_exercise_similarity_enhanced(
158
- breed1_features.get('exercise', 'Moderate'),
159
- breed2_features.get('exercise', 'Moderate')
160
- )
161
-
162
- # 性格相似度
163
- temp1_embedding = self._get_cached_embedding(breed1_features.get('temperament', ''))
164
- temp2_embedding = self._get_cached_embedding(breed2_features.get('temperament', ''))
165
- temperament_similarity = float(util.pytorch_cos_sim(temp1_embedding, temp2_embedding))
166
-
167
- # 其他相似度
168
- grooming_similarity = self._calculate_grooming_similarity(
169
- breed1_features.get('breed_name', ''),
170
- breed2_features.get('breed_name', '')
171
- )
172
-
173
- health_similarity = self._calculate_health_score_similarity(
174
- breed1_features.get('breed_name', ''),
175
- breed2_features.get('breed_name', '')
176
- )
177
-
178
- noise_similarity = self._calculate_noise_similarity(
179
- breed1_features.get('breed_name', ''),
180
- breed2_features.get('breed_name', '')
181
- )
182
-
183
- # 2. 關鍵特徵評分
184
- feature_scores = {}
185
- for feature, similarity in {
186
- 'size': size_similarity,
187
- 'exercise': exercise_similarity,
188
- 'temperament': temperament_similarity,
189
- 'grooming': grooming_similarity,
190
- 'health': health_similarity,
191
- 'noise': noise_similarity
192
- }.items():
193
- # 根據權重調整每個特徵分數
194
- importance = weights.get(feature, 0.1)
195
- if importance > 0.3: # 高權重特徵
196
- if similarity < 0.5: # 若關鍵特徵匹配度低
197
- feature_scores[feature] = similarity * 0.5 # 大幅降低分數
198
- else:
199
- feature_scores[feature] = similarity * 1.2 # 提高匹配度好的分數
200
- else: # 一般特徵
201
- feature_scores[feature] = similarity
202
-
203
- # 3. 計算最終相似度
204
- weighted_sum = 0
205
- weight_sum = 0
206
- for feature, score in feature_scores.items():
207
- feature_weight = weights.get(feature, 0.1)
208
- weighted_sum += score * feature_weight
209
- weight_sum += feature_weight
210
-
211
- final_similarity = weighted_sum / weight_sum if weight_sum > 0 else 0.5
212
-
213
- return min(1.0, max(0.2, final_similarity)) # 設定最低分數為0.2
214
-
215
- except Exception as e:
216
- print(f"Error in calculate_breed_similarity: {str(e)}")
217
- return 0.5
218
-
219
- def get_breed_characteristics_score(self, breed_features: Dict, description: str) -> float:
220
- score = 1.0
221
- description_lower = description.lower()
222
- breed_score_multipliers = []
223
-
224
- # 運動需求評估
225
- exercise_needs = breed_features.get('exercise', 'Moderate')
226
- exercise_keywords = ['active', 'running', 'energetic', 'athletic']
227
- if any(keyword in description_lower for keyword in exercise_keywords):
228
- multipliers = {
229
- 'Very High': 1.5,
230
- 'High': 1.3,
231
- 'Moderate': 0.7,
232
- 'Low': 0.4
233
- }
234
- breed_score_multipliers.append(multipliers.get(exercise_needs, 1.0))
235
-
236
- # 體型評估
237
- size = breed_features.get('size', 'Medium')
238
- if 'apartment' in description_lower:
239
- size_multipliers = {
240
- 'Giant': 0.3,
241
- 'Large': 0.6,
242
- 'Medium-Large': 0.8,
243
- 'Medium': 1.4,
244
- 'Small': 1.0,
245
- 'Tiny': 0.9
246
- }
247
- breed_score_multipliers.append(size_multipliers.get(size, 1.0))
248
- elif 'house' in description_lower:
249
- size_multipliers = {
250
- 'Giant': 0.8,
251
- 'Large': 1.2,
252
- 'Medium-Large': 1.3,
253
- 'Medium': 1.2,
254
- 'Small': 0.9,
255
- 'Tiny': 0.7
256
- }
257
- breed_score_multipliers.append(size_multipliers.get(size, 1.0))
258
-
259
- # 家庭適應性評估
260
- if any(keyword in description_lower for keyword in ['family', 'children', 'kids']):
261
- good_with_children = breed_features.get('good_with_children', False)
262
- breed_score_multipliers.append(1.3 if good_with_children else 0.6)
263
-
264
- # 噪音評估
265
- if 'quiet' in description_lower:
266
- noise_level = breed_features.get('noise_level', 'Moderate')
267
- noise_multipliers = {
268
- 'Low': 1.3,
269
- 'Moderate': 0.9,
270
- 'High': 0.5
271
- }
272
- breed_score_multipliers.append(noise_multipliers.get(noise_level, 1.0))
273
-
274
- # 應用所有乘數
275
- for multiplier in breed_score_multipliers:
276
- score *= multiplier
277
-
278
- # 確保分數在合理範圍內
279
- return min(1.5, max(0.3, score))
280
-
281
- def _calculate_size_similarity_enhanced(self, size1: str, size2: str, description: str) -> float:
282
- """
283
- 增強版尺寸相似度計算
284
- """
285
- try:
286
- # 更細緻的尺寸映射
287
- size_map = {
288
- 'Tiny': 0,
289
- 'Small': 1,
290
- 'Small-Medium': 2,
291
- 'Medium': 3,
292
- 'Medium-Large': 4,
293
- 'Large': 5,
294
- 'Giant': 6
295
- }
296
-
297
- # 標準化並獲取數值
298
- value1 = size_map.get(self._normalize_size(size1), 3)
299
- value2 = size_map.get(self._normalize_size(size2), 3)
300
-
301
- # 基礎相似度計算
302
- base_similarity = 1.0 - (abs(value1 - value2) / 6.0)
303
-
304
- # 環境適應性調整
305
- if 'apartment' in description.lower():
306
- if size2 in ['Large', 'Giant']:
307
- base_similarity *= 0.7 # 大型犬在公寓降低相似度
308
- elif size2 in ['Medium', 'Medium-Large']:
309
- base_similarity *= 1.2 # 中型犬更適合
310
- elif size2 in ['Small', 'Tiny']:
311
- base_similarity *= 0.8 # 過小的狗也不是最佳選擇
312
-
313
- return min(1.0, base_similarity)
314
- except Exception as e:
315
- print(f"Error in calculate_size_similarity_enhanced: {str(e)}")
316
- return 0.5
317
-
318
- def _normalize_size(self, size: str) -> str:
319
- """
320
- 標準化犬種尺寸分類
321
-
322
- Args:
323
- size: 原始尺寸描述
324
-
325
- Returns:
326
- str: 標準化後的尺寸類別
327
- """
328
- try:
329
- size = size.lower()
330
- if 'tiny' in size:
331
- return 'Tiny'
332
- elif 'small' in size and 'medium' in size:
333
- return 'Small-Medium'
334
- elif 'small' in size:
335
- return 'Small'
336
- elif 'medium' in size and 'large' in size:
337
- return 'Medium-Large'
338
- elif 'medium' in size:
339
- return 'Medium'
340
- elif 'giant' in size:
341
- return 'Giant'
342
- elif 'large' in size:
343
- return 'Large'
344
- return 'Medium' # 默認為 Medium
345
- except Exception as e:
346
- print(f"Error in normalize_size: {str(e)}")
347
- return 'Medium'
348
-
349
- def _calculate_exercise_similarity_enhanced(self, exercise1: str, exercise2: str) -> float:
350
- try:
351
- exercise_values = {
352
- 'Very High': 4,
353
- 'High': 3,
354
- 'Moderate': 2,
355
- 'Low': 1
356
- }
357
-
358
- value1 = exercise_values.get(exercise1, 2)
359
- value2 = exercise_values.get(exercise2, 2)
360
-
361
- # 計算差異
362
- diff = abs(value1 - value2)
363
-
364
- if diff == 0:
365
- return 1.0
366
- elif diff == 1:
367
- return 0.7
368
- elif diff == 2:
369
- return 0.4
370
- else:
371
- return 0.2
372
-
373
- except Exception as e:
374
- print(f"Error in calculate_exercise_similarity_enhanced: {str(e)}")
375
- return 0.5
376
-
377
- def _calculate_grooming_similarity(self, breed1: str, breed2: str) -> float:
378
- """
379
- 計算美容需求相似度
380
-
381
- Args:
382
- breed1: 第一個品種名稱
383
- breed2: 第二個品種名稱
384
-
385
- Returns:
386
- float: 相似度分數 (0-1)
387
- """
388
- try:
389
- grooming_map = {
390
- 'Low': 1,
391
- 'Moderate': 2,
392
- 'High': 3
393
- }
394
-
395
- # 從dog_data中獲取美容需求
396
- breed1_info = next((dog for dog in self.dog_data if dog[1] == breed1), None)
397
- breed2_info = next((dog for dog in self.dog_data if dog[1] == breed2), None)
398
-
399
- if not breed1_info or not breed2_info:
400
- return 0.5 # 數據缺失時返回中等相似度
401
-
402
- grooming1 = breed1_info[8] # Grooming_Needs index
403
- grooming2 = breed2_info[8]
404
-
405
- # 獲取數值,默認為 Moderate
406
- value1 = grooming_map.get(grooming1, 2)
407
- value2 = grooming_map.get(grooming2, 2)
408
-
409
- # 基礎相似度計算
410
- base_similarity = 1.0 - (abs(value1 - value2) / 2.0)
411
-
412
- # 美容需求調整
413
- if grooming2 == 'Moderate':
414
- base_similarity *= 1.1 # 中等美容需求略微加分
415
- elif grooming2 == 'High':
416
- base_similarity *= 0.9 # 高美容需求略微降分
417
-
418
- return min(1.0, base_similarity)
419
- except Exception as e:
420
- print(f"Error in calculate_grooming_similarity: {str(e)}")
421
- return 0.5
422
-
423
- def _calculate_health_score_similarity(self, breed1: str, breed2: str) -> float:
424
- """
425
- 計算兩個品種的健康評分相似度
426
- """
427
- try:
428
- score1 = self._calculate_health_score(breed1)
429
- score2 = self._calculate_health_score(breed2)
430
- return 1.0 - abs(score1 - score2)
431
- except Exception as e:
432
- print(f"Error in calculate_health_score_similarity: {str(e)}")
433
- return 0.5
434
-
435
- def _calculate_health_score(self, breed_name: str) -> float:
436
- """
437
- 計算品種的健康評分
438
-
439
- Args:
440
- breed_name: 品種名稱
441
-
442
- Returns:
443
- float: 健康評分 (0-1)
444
- """
445
- try:
446
- if breed_name not in breed_health_info:
447
- return 0.5
448
-
449
- health_notes = breed_health_info[breed_name]['health_notes'].lower()
450
-
451
- # 嚴重健康問題
452
- severe_conditions = [
453
- 'cancer', 'cardiomyopathy', 'epilepsy', 'dysplasia',
454
- 'bloat', 'progressive', 'syndrome'
455
- ]
456
-
457
- # 中等健康問題
458
- moderate_conditions = [
459
- 'allergies', 'infections', 'thyroid', 'luxation',
460
- 'skin problems', 'ear'
461
- ]
462
-
463
- # 計算問題數量
464
- severe_count = sum(1 for condition in severe_conditions if condition in health_notes)
465
- moderate_count = sum(1 for condition in moderate_conditions if condition in health_notes)
466
-
467
- # 基礎健康評分
468
- health_score = 1.0
469
- health_score -= (severe_count * 0.15) # 嚴重問題扣分更多
470
- health_score -= (moderate_count * 0.05) # 中等問題扣分較少
471
-
472
- # 確保評分在合理範圍內
473
- return max(0.3, min(1.0, health_score))
474
- except Exception as e:
475
- print(f"Error in calculate_health_score: {str(e)}")
476
- return 0.5
477
-
478
-
479
- def _calculate_noise_similarity(self, breed1: str, breed2: str) -> float:
480
- """計算兩個品種的噪音相似度"""
481
- noise_levels = {
482
- 'Low': 1,
483
- 'Moderate': 2,
484
- 'High': 3,
485
- 'Unknown': 2 # 默認為中等
486
- }
487
-
488
- noise1 = breed_noise_info.get(breed1, {}).get('noise_level', 'Unknown')
489
- noise2 = breed_noise_info.get(breed2, {}).get('noise_level', 'Unknown')
490
-
491
- # 獲取數值級別
492
- level1 = noise_levels.get(noise1, 2)
493
- level2 = noise_levels.get(noise2, 2)
494
-
495
- # 計算差異並歸一化
496
- difference = abs(level1 - level2)
497
- similarity = 1.0 - (difference / 2) # 最大差異是2,所以除以2來歸一化
498
-
499
- return similarity
500
-
501
- # bonus score zone
502
- def _calculate_size_bonus(self, size: str, living_space: str) -> float:
503
- """
504
- 計算尺寸匹配的獎勵分數
505
-
506
- Args:
507
- size: 品種尺寸
508
- living_space: 居住空間類型
509
-
510
- Returns:
511
- float: 獎勵分數 (-0.25 到 0.15)
512
- """
513
- try:
514
- if living_space == "apartment":
515
- size_scores = {
516
- 'Tiny': -0.15,
517
- 'Small': 0.10,
518
- 'Medium': 0.15,
519
- 'Large': 0.10,
520
- 'Giant': -0.30
521
- }
522
- else: # house
523
- size_scores = {
524
- 'Tiny': -0.10,
525
- 'Small': 0.05,
526
- 'Medium': 0.15,
527
- 'Large': 0.15,
528
- 'Giant': -0.15
529
- }
530
- return size_scores.get(size, 0.0)
531
- except Exception as e:
532
- print(f"Error in calculate_size_bonus: {str(e)}")
533
- return 0.0
534
-
535
- def _calculate_exercise_bonus(self, exercise_needs: str, exercise_time: int) -> float:
536
- """
537
- 計算運動需求匹配的獎勵分數
538
-
539
- Args:
540
- exercise_needs: 品種運動需求
541
- exercise_time: 用戶可提供的運動時間(分鐘)
542
-
543
- Returns:
544
- float: 獎勵分數 (-0.20 到 0.20)
545
- """
546
- try:
547
- if exercise_time >= 120: # 高運動量需求
548
- exercise_scores = {
549
- 'Low': -0.30,
550
- 'Moderate': -0.10,
551
- 'High': 0.15,
552
- 'Very High': 0.30
553
- }
554
- elif exercise_time >= 60: # 中等運動量需求
555
- exercise_scores = {
556
- 'Low': -0.05,
557
- 'Moderate': 0.15,
558
- 'High': 0.05,
559
- 'Very High': -0.10
560
- }
561
- else: # 低運動量需求
562
- exercise_scores = {
563
- 'Low': 0.15,
564
- 'Moderate': 0.05,
565
- 'High': -0.15,
566
- 'Very High': -0.20
567
- }
568
- return exercise_scores.get(exercise_needs, 0.0)
569
- except Exception as e:
570
- print(f"Error in calculate_exercise_bonus: {str(e)}")
571
- return 0.0
572
-
573
- def _calculate_grooming_bonus(self, grooming: str, commitment: str) -> float:
574
- """
575
- 計算美容需求匹配的獎勵分數
576
-
577
- Args:
578
- grooming: 品種美容需求
579
- commitment: 用戶美容投入程度
580
-
581
- Returns:
582
- float: 獎勵分數 (-0.15 到 0.10)
583
- """
584
- try:
585
- if commitment == "high":
586
- grooming_scores = {
587
- 'Low': -0.05,
588
- 'Moderate': 0.05,
589
- 'High': 0.10
590
- }
591
- else: # medium or low commitment
592
- grooming_scores = {
593
- 'Low': 0.10,
594
- 'Moderate': 0.05,
595
- 'High': -0.20
596
- }
597
- return grooming_scores.get(grooming, 0.0)
598
- except Exception as e:
599
- print(f"Error in calculate_grooming_bonus: {str(e)}")
600
- return 0.0
601
-
602
- def _calculate_family_bonus(self, breed_info: Dict) -> float:
603
- """
604
- 計算家庭適應性的獎勵分數
605
-
606
- Args:
607
- breed_info: 品種信息字典
608
-
609
- Returns:
610
- float: 獎勵分數 (0 到 0.20)
611
- """
612
- try:
613
- bonus = 0.0
614
- temperament = breed_info.get('Temperament', '').lower()
615
- good_with_children = breed_info.get('Good_With_Children', False)
616
-
617
- if good_with_children:
618
- bonus += 0.20
619
- if any(trait in temperament for trait in ['gentle', 'patient', 'friendly']):
620
- bonus += 0.10
621
-
622
- return min(0.20, bonus)
623
- except Exception as e:
624
- print(f"Error in calculate_family_bonus: {str(e)}")
625
- return 0.0
626
-
627
-
628
- def _detect_scenario(self, description: str) -> Dict[str, float]:
629
- """
630
- 檢測場景並返回對應權重
631
- """
632
- # 基礎場景定義
633
- scenarios = {
634
- 'athletic': {
635
- 'keywords': ['active', 'exercise', 'running', 'athletic', 'energetic', 'sports'],
636
- 'weights': {
637
- 'exercise': 0.40,
638
- 'size': 0.25,
639
- 'temperament': 0.20,
640
- 'health': 0.15
641
- }
642
- },
643
- 'apartment': {
644
- 'keywords': ['apartment', 'flat', 'condo'],
645
- 'weights': {
646
- 'size': 0.35,
647
- 'noise': 0.30,
648
- 'exercise': 0.20,
649
- 'temperament': 0.15
650
- }
651
- },
652
- 'family': {
653
- 'keywords': ['family', 'children', 'kids', 'friendly'],
654
- 'weights': {
655
- 'temperament': 0.35,
656
- 'safety': 0.30,
657
- 'noise': 0.20,
658
- 'exercise': 0.15
659
- }
660
- },
661
- 'novice': {
662
- 'keywords': ['first time', 'beginner', 'new owner', 'inexperienced'],
663
- 'weights': {
664
- 'trainability': 0.35,
665
- 'temperament': 0.30,
666
- 'care_level': 0.20,
667
- 'health': 0.15
668
- }
669
- }
670
- }
671
-
672
- # 檢測匹配的場景
673
- matched_scenarios = []
674
- for scenario, config in scenarios.items():
675
- if any(keyword in description.lower() for keyword in config['keywords']):
676
- matched_scenarios.append(scenario)
677
-
678
- # 默認權重
679
- default_weights = {
680
- 'exercise': 0.20,
681
- 'size': 0.20,
682
- 'temperament': 0.20,
683
- 'health': 0.15,
684
- 'noise': 0.10,
685
- 'grooming': 0.10,
686
- 'trainability': 0.05
687
- }
688
-
689
- # 如果沒有匹配場景,返回默認權重
690
- if not matched_scenarios:
691
- return default_weights
692
-
693
- # 合併匹配場景的權重
694
- final_weights = default_weights.copy()
695
- for scenario in matched_scenarios:
696
- scenario_weights = scenarios[scenario]['weights']
697
- for feature, weight in scenario_weights.items():
698
- if feature in final_weights:
699
- final_weights[feature] = max(final_weights[feature], weight)
700
-
701
- return final_weights
702
-
703
-
704
- def _calculate_final_scores(self, breed_name: str, base_scores: Dict,
705
- smart_score: float, is_preferred: bool,
706
- similarity_score: float = 0.0,
707
- characteristics_score: float = 1.0,
708
- weights: Dict[str, float] = None) -> Dict:
709
- try:
710
- # 使用傳入的權重或默認權重
711
- if weights is None:
712
- weights = {
713
- 'base': 0.35,
714
- 'smart': 0.35,
715
- 'bonus': 0.15,
716
- 'characteristics': 0.15
717
- }
718
-
719
- # 確保 base_scores 包含所有必要的鍵
720
- base_scores = {
721
- 'overall': base_scores.get('overall', smart_score),
722
- 'size': base_scores.get('size', 0.0),
723
- 'exercise': base_scores.get('exercise', 0.0),
724
- 'temperament': base_scores.get('temperament', 0.0),
725
- 'grooming': base_scores.get('grooming', 0.0),
726
- 'health': base_scores.get('health', 0.0),
727
- 'noise': base_scores.get('noise', 0.0)
728
- }
729
-
730
- # 計算基礎分數
731
- base_score = base_scores['overall']
732
-
733
- # 計算獎勵分數
734
- bonus_score = 0.0
735
- if is_preferred:
736
- bonus_score = 0.95
737
- elif similarity_score > 0:
738
- bonus_score = min(0.8, similarity_score) * 0.95
739
-
740
- # 特徵匹配度調整
741
- if characteristics_score < 0.5:
742
- base_score *= 0.7 # 降低基礎分數
743
- smart_score *= 0.7 # 降低智能匹配分數
744
-
745
- # 計算最終分數
746
- final_score = (
747
- base_score * weights.get('base', 0.35) +
748
- smart_score * weights.get('smart', 0.35) +
749
- bonus_score * weights.get('bonus', 0.15) +
750
- characteristics_score * weights.get('characteristics', 0.15)
751
- )
752
-
753
- # 確保分數在合理範圍內
754
- final_score = min(1.0, max(0.3, final_score))
755
-
756
- return {
757
- 'final_score': round(final_score, 4),
758
- 'base_score': round(base_score, 4),
759
- 'smart_score': round(smart_score, 4),
760
- 'bonus_score': round(bonus_score, 4),
761
- 'characteristics_score': round(characteristics_score, 4),
762
- 'detailed_scores': base_scores
763
- }
764
-
765
- except Exception as e:
766
- print(f"Error in calculate_final_scores: {str(e)}")
767
- return {
768
- 'final_score': 0.5,
769
- 'base_score': 0.5,
770
- 'smart_score': 0.5,
771
- 'bonus_score': 0.0,
772
- 'characteristics_score': 0.5,
773
- 'detailed_scores': {
774
- 'overall': 0.5,
775
- 'size': 0.5,
776
- 'exercise': 0.5,
777
- 'temperament': 0.5,
778
- 'grooming': 0.5,
779
- 'health': 0.5,
780
- 'noise': 0.5
781
- }
782
- }
783
-
784
- def _general_matching(self, description: str, weights: Dict[str, float], top_n: int = 10) -> List[Dict]:
785
- """基本的品種匹配邏輯,考慮描述、性格、噪音和健康因素"""
786
- try:
787
- matches = []
788
- desc_embedding = self._get_cached_embedding(description)
789
-
790
- for breed in self.dog_data:
791
- breed_name = breed[1]
792
- breed_features = self._extract_breed_features(breed)
793
- breed_description = breed[9]
794
- temperament = breed[4]
795
-
796
- breed_desc_embedding = self._get_cached_embedding(breed_description)
797
- breed_temp_embedding = self._get_cached_embedding(temperament)
798
-
799
- desc_similarity = float(util.pytorch_cos_sim(desc_embedding, breed_desc_embedding))
800
- temp_similarity = float(util.pytorch_cos_sim(desc_embedding, breed_temp_embedding))
801
-
802
- noise_similarity = self._calculate_noise_similarity(breed_name, breed_name)
803
- health_score = self._calculate_health_score(breed_name)
804
- health_similarity = 1.0 - abs(health_score - 0.8)
805
-
806
- # 使用傳入的權重
807
- final_score = (
808
- desc_similarity * weights.get('description', 0.35) +
809
- temp_similarity * weights.get('temperament', 0.25) +
810
- noise_similarity * weights.get('noise', 0.2) +
811
- health_similarity * weights.get('health', 0.2)
812
- )
813
-
814
- # 計算特徵分數
815
- characteristics_score = self.get_breed_characteristics_score(breed_features, description)
816
-
817
- # 構建完整的 scores 字典
818
- scores = {
819
- 'overall': final_score,
820
- 'size': breed_features.get('size_score', 0.0),
821
- 'exercise': breed_features.get('exercise_score', 0.0),
822
- 'temperament': temp_similarity,
823
- 'grooming': breed_features.get('grooming_score', 0.0),
824
- 'health': health_score,
825
- 'noise': noise_similarity
826
- }
827
-
828
- matches.append({
829
- 'breed': breed_name,
830
- 'scores': scores,
831
- 'final_score': final_score,
832
- 'base_score': final_score,
833
- 'characteristics_score': characteristics_score,
834
- 'bonus_score': 0.0,
835
- 'is_preferred': False,
836
- 'similarity': final_score,
837
- 'health_score': health_score,
838
- 'reason': "Matched based on description and characteristics"
839
- })
840
-
841
- return sorted(matches, key=lambda x: (-x['characteristics_score'], -x['final_score']))[:top_n]
842
-
843
- except Exception as e:
844
- print(f"Error in _general_matching: {str(e)}")
845
- return []
846
-
847
-
848
- def _detect_breed_preference(self, description: str) -> Optional[str]:
849
- """檢測用戶是否提到特定品種"""
850
- description_lower = f" {description.lower()} "
851
-
852
- for breed_info in self.dog_data:
853
- breed_name = breed_info[1]
854
- normalized_breed = breed_name.lower().replace('_', ' ')
855
-
856
- pattern = rf"\b{re.escape(normalized_breed)}\b"
857
-
858
- if re.search(pattern, description_lower):
859
- return breed_name
860
-
861
- return None
862
-
863
- def _extract_breed_features(self, breed_info: Tuple) -> Dict:
864
- """
865
- 從品種信息中提取特徵
866
-
867
- Args:
868
- breed_info: 品種信息元組
869
-
870
- Returns:
871
- Dict: 包含品種特徵的字典
872
- """
873
- try:
874
- return {
875
- 'breed_name': breed_info[1],
876
- 'size': breed_info[2],
877
- 'temperament': breed_info[4],
878
- 'exercise': breed_info[7],
879
- 'grooming': breed_info[8],
880
- 'description': breed_info[9],
881
- 'good_with_children': breed_info[6]
882
- }
883
- except Exception as e:
884
- print(f"Error in extract_breed_features: {str(e)}")
885
- return {
886
- 'breed_name': '',
887
- 'size': 'Medium',
888
- 'temperament': '',
889
- 'exercise': 'Moderate',
890
- 'grooming': 'Moderate',
891
- 'description': '',
892
- 'good_with_children': False
893
- }
894
-
895
- @gpu_init_wrapper
896
- @safe_prediction
897
- def match_user_preference(self, description: str, top_n: int = 10) -> List[Dict]:
898
- try:
899
- if self.model is None:
900
- self._initialize_model()
901
- # 獲取場景權重
902
- weights = self._detect_scenario(description)
903
- matches = []
904
- preferred_breed = self._detect_breed_preference(description)
905
-
906
- # 處理用戶明確提到的品種
907
- if preferred_breed:
908
- breed_info = next((breed for breed in self.dog_data if breed[1] == preferred_breed), None)
909
- if breed_info:
910
- breed_features = self._extract_breed_features(breed_info)
911
- base_similarity = self._calculate_breed_similarity(breed_features, breed_features, weights)
912
-
913
- # 計算特徵分數
914
- characteristics_score = self.get_breed_characteristics_score(breed_features, description)
915
-
916
- # 計算最終分數
917
- scores = self._calculate_final_scores(
918
- preferred_breed,
919
- {'overall': base_similarity},
920
- smart_score=base_similarity,
921
- is_preferred=True,
922
- similarity_score=1.0,
923
- characteristics_score=characteristics_score,
924
- weights=weights
925
- )
926
-
927
- matches.append({
928
- 'breed': preferred_breed,
929
- 'scores': scores['detailed_scores'],
930
- 'final_score': scores['final_score'],
931
- 'base_score': scores['base_score'],
932
- 'bonus_score': scores['bonus_score'],
933
- 'characteristics_score': characteristics_score,
934
- 'is_preferred': True,
935
- 'priority': 1,
936
- 'health_score': self._calculate_health_score(preferred_breed),
937
- 'reason': "Directly matched your preferred breed"
938
- })
939
-
940
- # 尋找相似品種
941
- similar_breeds = self.find_similar_breeds(preferred_breed, top_n=top_n-1)
942
- for breed_name, similarity in similar_breeds:
943
- if breed_name != preferred_breed:
944
- breed_info = next((breed for breed in self.dog_data if breed[1] == breed_name), None)
945
- if breed_info:
946
- breed_features = self._extract_breed_features(breed_info)
947
- characteristics_score = self.get_breed_characteristics_score(breed_features, description)
948
-
949
- scores = self._calculate_final_scores(
950
- breed_name,
951
- {'overall': similarity},
952
- smart_score=similarity,
953
- is_preferred=False,
954
- similarity_score=similarity,
955
- characteristics_score=characteristics_score,
956
- weights=weights
957
- )
958
-
959
- if scores['final_score'] >= 0.4: # 設定最低分數門檻
960
- matches.append({
961
- 'breed': breed_name,
962
- 'scores': scores['detailed_scores'],
963
- 'final_score': scores['final_score'],
964
- 'base_score': scores['base_score'],
965
- 'bonus_score': scores['bonus_score'],
966
- 'characteristics_score': characteristics_score,
967
- 'is_preferred': False,
968
- 'priority': 2,
969
- 'health_score': self._calculate_health_score(breed_name),
970
- 'reason': f"Similar to {preferred_breed}"
971
- })
972
-
973
- # 如果沒有找到偏好品種或需要更多匹配
974
- if len(matches) < top_n:
975
- general_matches = self._general_matching(description, weights, top_n - len(matches))
976
- for match in general_matches:
977
- if match['breed'] not in [m['breed'] for m in matches]:
978
- match['priority'] = 3
979
- if match['final_score'] >= 0.4: # 分數門檻
980
- matches.append(match)
981
-
982
- # 最終排序
983
- matches.sort(key=lambda x: (
984
- -x.get('characteristics_score', 0), # 首先考慮特徵匹配度
985
- -x.get('final_score', 0), # 然後是總分
986
- -x.get('base_score', 0), # 最後是基礎分數
987
- x.get('breed', '') # 字母順序
988
- ))
989
-
990
- # 取前N個結果
991
- final_matches = matches[:top_n]
992
-
993
- # 更新排名
994
- for i, match in enumerate(final_matches, 1):
995
- match['rank'] = i
996
-
997
- return final_matches
998
-
999
- except Exception as e:
1000
- print(f"Error in match_user_preference: {str(e)}")
1001
- return []