import torch import re import numpy as np from typing import List, Dict, Tuple, Optional from dataclasses import dataclass from breed_health_info import breed_health_info from breed_noise_info import breed_noise_info from dog_database import dog_data from scoring_calculation_system import UserPreferences from sentence_transformers import SentenceTransformer, util class SmartBreedMatcher: def __init__(self, dog_data: List[Tuple]): self.dog_data = dog_data self.model = SentenceTransformer('all-mpnet-base-v2') self._embedding_cache = {} def _get_cached_embedding(self, text: str) -> torch.Tensor: if text not in self._embedding_cache: self._embedding_cache[text] = self.model.encode(text) return self._embedding_cache[text] def _categorize_breeds(self) -> Dict: """自動將狗品種分類""" categories = { 'working_dogs': [], 'herding_dogs': [], 'hunting_dogs': [], 'companion_dogs': [], 'guard_dogs': [] } for breed_info in self.dog_data: description = breed_info[9].lower() temperament = breed_info[4].lower() # 根據描述和性格特徵自動分類 if any(word in description for word in ['herding', 'shepherd', 'cattle', 'flock']): categories['herding_dogs'].append(breed_info[1]) elif any(word in description for word in ['hunting', 'hunt', 'retriever', 'pointer']): categories['hunting_dogs'].append(breed_info[1]) elif any(word in description for word in ['companion', 'toy', 'family', 'lap']): categories['companion_dogs'].append(breed_info[1]) elif any(word in description for word in ['guard', 'protection', 'watchdog']): categories['guard_dogs'].append(breed_info[1]) elif any(word in description for word in ['working', 'draft', 'cart']): categories['working_dogs'].append(breed_info[1]) return categories def find_similar_breeds(self, breed_name: str, top_n: int = 5) -> List[Tuple[str, float]]: """找出與指定品種最相似的其他品種""" target_breed = next((breed for breed in self.dog_data if breed[1] == breed_name), None) if not target_breed: return [] # 獲取目標品種的特徵 target_features = { 'breed_name': target_breed[1], # 添加品種名稱 'size': target_breed[2], 'temperament': target_breed[4], 'exercise': target_breed[7], 'description': target_breed[9] } similarities = [] for breed in self.dog_data: if breed[1] != breed_name: breed_features = { 'breed_name': breed[1], # 添加品種名稱 'size': breed[2], 'temperament': breed[4], 'exercise': breed[7], 'description': breed[9] } similarity_score = self._calculate_breed_similarity(target_features, breed_features) similarities.append((breed[1], similarity_score)) return sorted(similarities, key=lambda x: x[1], reverse=True)[:top_n] # def _calculate_breed_similarity(self, breed1_features: Dict, breed2_features: Dict) -> float: # """計算兩個品種之間的相似度,包含健康和噪音因素""" # # 計算描述文本的相似度 # desc1_embedding = self._get_cached_embedding(breed1_features['description']) # desc2_embedding = self._get_cached_embedding(breed2_features['description']) # description_similarity = float(util.pytorch_cos_sim(desc1_embedding, desc2_embedding)) # # 基本特徵相似度 # size_similarity = 1.0 if breed1_features['size'] == breed2_features['size'] else 0.5 # exercise_similarity = 1.0 if breed1_features['exercise'] == breed2_features['exercise'] else 0.5 # # 性格相似度 # temp1_embedding = self._get_cached_embedding(breed1_features['temperament']) # temp2_embedding = self._get_cached_embedding(breed2_features['temperament']) # temperament_similarity = float(util.pytorch_cos_sim(temp1_embedding, temp2_embedding)) # # 健康分數相似度 # health_score1 = self._calculate_health_score(breed1_features['breed_name']) # health_score2 = self._calculate_health_score(breed2_features['breed_name']) # health_similarity = 1.0 - abs(health_score1 - health_score2) # # 噪音水平相似度 # noise_similarity = self._calculate_noise_similarity( # breed1_features['breed_name'], # breed2_features['breed_name'] # ) # # 加權計算 # weights = { # 'description': 0.25, # 'temperament': 0.20, # 'exercise': 0.2, # 'size': 0.05, # 'health': 0.15, # 'noise': 0.15 # } # final_similarity = ( # description_similarity * weights['description'] + # temperament_similarity * weights['temperament'] + # exercise_similarity * weights['exercise'] + # size_similarity * weights['size'] + # health_similarity * weights['health'] + # noise_similarity * weights['noise'] # ) # return final_similarity def _calculate_breed_similarity(self, breed1_features: Dict, breed2_features: Dict) -> float: """增強版品種相似度計算""" # 基礎相似度計算 desc1_embedding = self._get_cached_embedding(breed1_features['description']) desc2_embedding = self._get_cached_embedding(breed2_features['description']) description_similarity = float(util.pytorch_cos_sim(desc1_embedding, desc2_embedding)) # 尺寸相似度(加強版) size_similarity = self._calculate_size_similarity_enhanced( breed1_features['size'], breed2_features['size'], breed2_features['description'] # 加入描述以判斷適應性 ) # 運動需求相似度(加強版) exercise_similarity = self._calculate_exercise_similarity_enhanced( breed1_features['exercise'], breed2_features['exercise'] ) # 美容需求相似度 grooming_similarity = self._calculate_grooming_similarity( breed1_features['breed_name'], breed2_features['breed_name'] ) # 其他相似度計算保持不變 temp1_embedding = self._get_cached_embedding(breed1_features['temperament']) temp2_embedding = self._get_cached_embedding(breed2_features['temperament']) temperament_similarity = float(util.pytorch_cos_sim(temp1_embedding, temp2_embedding)) health_score1 = self._calculate_health_score(breed1_features['breed_name']) health_score2 = self._calculate_health_score(breed2_features['breed_name']) health_similarity = 1.0 - abs(health_score1 - health_score2) noise_similarity = self._calculate_noise_similarity( breed1_features['breed_name'], breed2_features['breed_name'] ) # 調整權重分配 weights = { 'size': 0.20, # 仍然重要但不過分主導 'exercise': 0.20, # 保持高權重因為這是主要需求 'temperament': 0.15, # 保持不變因為性格很重要 'grooming': 0.15, # 保持不變 'health': 0.15, # 提高一點因為這影響長期生活 'description': 0.10, # 保持不變 'noise': 0.05 # 保持不變因為不是主要考慮因素 } final_similarity = ( size_similarity * weights['size'] + exercise_similarity * weights['exercise'] + grooming_similarity * weights['grooming'] + temperament_similarity * weights['temperament'] + description_similarity * weights['description'] + health_similarity * weights['health'] + noise_similarity * weights['noise'] ) return final_similarity def _calculate_final_scores(self, breed_name: str, base_scores: Dict, smart_score: float, is_preferred: bool, similarity_score: float = 0.0) -> Dict: """ 計算最終分數,包含基礎分數和獎勵分數 Args: breed_name: 品種名稱 base_scores: 基礎評分 (空間、運動等) smart_score: 智能匹配分數 is_preferred: 是否為用戶指定品種 similarity_score: 與指定品種的相似度 (0-1) """ # 基礎權重 weights = { 'base': 0.6, # 基礎分數權重 'smart': 0.25, # 智能匹配權重 'bonus': 0.15 # 獎勵分數權重 } # 計算基礎分數 base_score = base_scores.get('overall', 0.7) # 計算獎勵分數 bonus_score = 0.0 if is_preferred: # 用戶指定品種獲得最高獎勵 bonus_score = 0.95 elif similarity_score > 0: # 相似品種獲得部分獎勵,但不超過80%的最高獎勵 bonus_score = min(0.8, similarity_score) * 0.95 # 計算最終分數 final_score = ( base_score * weights['base'] + smart_score * weights['smart'] + bonus_score * weights['bonus'] ) # 更新各項分數 scores = base_scores.copy() # 如果是用戶指定品種,稍微提升各項基礎分數,但保持合理範圍 if is_preferred: for key in scores: if key != 'overall': scores[key] = min(1.0, scores[key] * 1.1) # 最多提升10% # 為相似品種調整分數 elif similarity_score > 0: boost_factor = 1.0 + (similarity_score * 0.05) # 最多提升5% for key in scores: if key != 'overall': scores[key] = min(0.95, scores[key] * boost_factor) # 確保不超過95% return { 'final_score': round(final_score, 4), 'base_score': round(base_score, 4), 'bonus_score': round(bonus_score, 4), 'scores': {k: round(v, 4) for k, v in scores.items()} } def _calculate_size_similarity_enhanced(self, size1: str, size2: str, description: str) -> float: """增強版尺寸相似度計算""" # 更細緻的尺寸映射 size_map = { 'Tiny': 0, 'Small': 1, 'Small-Medium': 2, 'Medium': 3, 'Medium-Large': 4, 'Large': 5, 'Giant': 6 } # 轉換尺寸到數值 value1 = size_map.get(self._normalize_size(size1), 3) # 預設為 Medium value2 = size_map.get(self._normalize_size(size2), 3) # 計算基礎相似度 base_similarity = 1.0 - (abs(value1 - value2) / 6.0) # 根據用戶需求的尺寸偏好調整分數 if size2 in ['Small', 'Tiny']: base_similarity *= 0.5 # 顯著降低小型犬的分數 elif size2 == 'Giant': base_similarity *= 0.6 # 顯著降低巨型犬的分數 elif size2 in ['Medium', 'Medium-Large']: base_similarity *= 1.2 # 提高中型和中大型犬的分數 # 考慮適應性 if 'apartment' in description.lower() and size2 in ['Large', 'Giant']: base_similarity *= 0.8 # 降低大型犬在公寓的適應性分數 return min(1.0, base_similarity) # 確保不超過1.0 def _normalize_size(self, size: str) -> str: """標準化尺寸分類""" size = size.lower() if 'tiny' in size: return 'Tiny' elif 'small' in size: return 'Small' elif 'medium' in size and 'large' in size: return 'Medium-Large' elif 'medium' in size: return 'Medium' elif 'giant' in size: return 'Giant' elif 'large' in size: return 'Large' return 'Medium' # 預設 def _calculate_exercise_similarity_enhanced(self, exercise1: str, exercise2: str) -> float: """增強版運動需求相似度計算""" exercise_map = { 'Low': 1, 'Moderate': 2, 'High': 3, 'Very High': 4 } value1 = exercise_map.get(exercise1, 2) value2 = exercise_map.get(exercise2, 2) # 基礎相似度 base_similarity = 1.0 - abs(value1 - value2) / 3.0 # 根據用戶需求調整 if exercise2 in ['High', 'Very High']: base_similarity *= 1.2 # 提高高運動量品種的分數 elif exercise2 == 'Low': base_similarity *= 0.7 # 降低低運動量品種的分數 return min(1.0, base_similarity) def _calculate_grooming_similarity(self, breed1: str, breed2: str) -> float: """計算美容需求相似度""" grooming_map = { 'Low': 1, 'Moderate': 2, 'High': 3 } # 從dog_data中獲取美容需求 breed1_info = next((dog for dog in self.dog_data if dog[1] == breed1), None) breed2_info = next((dog for dog in self.dog_data if dog[1] == breed2), None) if not breed1_info or not breed2_info: return 0.5 # 默認中等相似度 grooming1 = breed1_info[8] # Grooming_Needs index grooming2 = breed2_info[8] value1 = grooming_map.get(grooming1, 2) value2 = grooming_map.get(grooming2, 2) # 基礎相似度 base_similarity = 1.0 - abs(value1 - value2) / 2.0 # 根據用戶需求調整 if grooming2 == 'Moderate': base_similarity *= 1.1 # 稍微提高中等美容需求的分數 elif grooming2 == 'High': base_similarity *= 0.9 # 稍微降低高美容需求的分數 return min(1.0, base_similarity) def _calculate_health_score(self, breed_name: str) -> float: """計算品種的健康分數""" if breed_name not in breed_health_info: return 0.5 health_notes = breed_health_info[breed_name]['health_notes'].lower() # 嚴重健康問題 severe_conditions = [ 'cancer', 'cardiomyopathy', 'epilepsy', 'dysplasia', 'bloat', 'progressive', 'syndrome' ] # 中等健康問題 moderate_conditions = [ 'allergies', 'infections', 'thyroid', 'luxation', 'skin problems', 'ear' ] severe_count = sum(1 for condition in severe_conditions if condition in health_notes) moderate_count = sum(1 for condition in moderate_conditions if condition in health_notes) health_score = 1.0 health_score -= (severe_count * 0.1) health_score -= (moderate_count * 0.05) # 特殊條件調整(根據用戶偏好) if hasattr(self, 'user_preferences'): if self.user_preferences.has_children: if 'requires frequent' in health_notes or 'regular monitoring' in health_notes: health_score *= 0.9 if self.user_preferences.health_sensitivity == 'high': health_score *= 0.9 return max(0.3, min(1.0, health_score)) def _calculate_noise_similarity(self, breed1: str, breed2: str) -> float: """計算兩個品種的噪音相似度""" noise_levels = { 'Low': 1, 'Moderate': 2, 'High': 3, 'Unknown': 2 # 默認為中等 } noise1 = breed_noise_info.get(breed1, {}).get('noise_level', 'Unknown') noise2 = breed_noise_info.get(breed2, {}).get('noise_level', 'Unknown') # 獲取數值級別 level1 = noise_levels.get(noise1, 2) level2 = noise_levels.get(noise2, 2) # 計算差異並歸一化 difference = abs(level1 - level2) similarity = 1.0 - (difference / 2) # 最大差異是2,所以除以2來歸一化 return similarity def _general_matching(self, description: str, top_n: int = 10) -> List[Dict]: """基本的品種匹配邏輯,考慮描述、性格、噪音和健康因素""" matches = [] # 預先計算描述的 embedding 並快取 desc_embedding = self._get_cached_embedding(description) for breed in self.dog_data: breed_name = breed[1] breed_description = breed[9] temperament = breed[4] # 使用快取計算相似度 breed_desc_embedding = self._get_cached_embedding(breed_description) breed_temp_embedding = self._get_cached_embedding(temperament) desc_similarity = float(util.pytorch_cos_sim(desc_embedding, breed_desc_embedding)) temp_similarity = float(util.pytorch_cos_sim(desc_embedding, breed_temp_embedding)) # 其餘計算保持不變 noise_similarity = self._calculate_noise_similarity(breed_name, breed_name) health_score = self._calculate_health_score(breed_name) health_similarity = 1.0 - abs(health_score - 0.8) weights = { 'description': 0.35, 'temperament': 0.25, 'noise': 0.2, 'health': 0.2 } final_score = ( desc_similarity * weights['description'] + temp_similarity * weights['temperament'] + noise_similarity * weights['noise'] + health_similarity * weights['health'] ) matches.append({ 'breed': breed_name, 'score': final_score, 'is_preferred': False, 'similarity': final_score, 'reason': "Matched based on description, temperament, noise level, and health score" }) return sorted(matches, key=lambda x: -x['score'])[:top_n] def _detect_breed_preference(self, description: str) -> Optional[str]: """檢測用戶是否提到特定品種""" description_lower = f" {description.lower()} " for breed_info in self.dog_data: breed_name = breed_info[1] normalized_breed = breed_name.lower().replace('_', ' ') pattern = rf"\b{re.escape(normalized_breed)}\b" if re.search(pattern, description_lower): return breed_name return None def match_user_preference(self, description: str, top_n: int = 10) -> List[Dict]: """根據用戶描述匹配最適合的品種""" preferred_breed = self._detect_breed_preference(description) matches = [] if preferred_breed: # 首先添加偏好品種 breed_info = next((breed for breed in self.dog_data if breed[1] == preferred_breed), None) if breed_info: base_scores = {'overall': 1.0} # 給予最高基礎分數 # 計算偏好品種的最終分數 scores = self._calculate_final_scores( preferred_breed, base_scores, smart_score=1.0, is_preferred=True, similarity_score=1.0 ) matches.append({ 'breed': preferred_breed, 'score': 1.0, # 確保最高分 'final_score': scores['final_score'], 'base_score': scores['base_score'], 'bonus_score': scores['bonus_score'], 'is_preferred': True, 'priority': 1, # 最高優先級 'health_score': self._calculate_health_score(preferred_breed), 'noise_level': breed_noise_info.get(preferred_breed, {}).get('noise_level', 'Unknown'), 'reason': "Directly matched your preferred breed" }) # 添加相似品種 similar_breeds = self.find_similar_breeds(preferred_breed, top_n=top_n-1) for breed_name, similarity in similar_breeds: if breed_name != preferred_breed: # 使用 _calculate_final_scores 計算相似品種分數 scores = self._calculate_final_scores( breed_name, {'overall': similarity * 0.9}, # 基礎分數略低於偏好品種 smart_score=similarity, is_preferred=False, similarity_score=similarity ) matches.append({ 'breed': breed_name, 'score': min(0.95, similarity), # 確保不超過偏好品種 'final_score': scores['final_score'], 'base_score': scores['base_score'], 'bonus_score': scores['bonus_score'], 'is_preferred': False, 'priority': 2, 'health_score': self._calculate_health_score(breed_name), 'noise_level': breed_noise_info.get(breed_name, {}).get('noise_level', 'Unknown'), 'reason': f"Similar to {preferred_breed}" }) else: matches = self._general_matching(description, top_n) for match in matches: match['priority'] = 3 # 使用複合排序鍵 final_matches = sorted( matches, key=lambda x: ( x.get('priority', 3) * -1, # 優先級倒序(1最高) x.get('is_preferred', False) * 1, # 偏好品種優先 float(x.get('final_score', 0)) * -1, # 分數倒序 x.get('breed', '') # 品種名稱正序 ) )[:top_n] return final_matches