AurelioAguirre commited on
Commit
1b76f70
·
1 Parent(s): 834f200

added mps and cput initialization

Browse files
Files changed (3) hide show
  1. main/.cache/hub/version.txt +1 -0
  2. main/api.py +45 -31
  3. main/app.py +0 -2
main/.cache/hub/version.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ 1
main/api.py CHANGED
@@ -80,48 +80,62 @@ class LLMApi:
80
  self.logger.error(f"Failed to download model {model_name}: {str(e)}")
81
  raise
82
 
83
- def initialize_model(self, model_name: str) -> None:
84
- """
85
- Initialize a model and tokenizer for text generation.
86
-
87
- Args:
88
- model_name: The name of the model to initialize
89
- """
90
- self.logger.info(f"Initializing generation model: {model_name}")
91
- try:
92
- self.generation_model_name = model_name
93
- local_model_path = self.models_path / model_name.split('/')[-1]
94
-
95
- # Check if model exists locally
96
- if local_model_path.exists():
97
- self.logger.info(f"Loading model from local path: {local_model_path}")
98
- model_path = local_model_path
99
- else:
100
- self.logger.info(f"Loading model from source: {model_name}")
101
- model_path = model_name
102
-
 
103
  quantization_config = BitsAndBytesConfig(
104
  load_in_8bit=True,
105
  llm_int8_threshold=3.0
106
- )
107
-
108
  self.generation_model = AutoModelForCausalLM.from_pretrained(
109
  model_path,
110
  device_map="auto",
111
  quantization_config=quantization_config,
112
  torch_dtype=torch.float16
113
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
 
115
- self.tokenizer = AutoTokenizer.from_pretrained(model_path)
116
 
117
- # Update generation config with tokenizer-specific values
118
- self.generation_config["eos_token_id"] = self.tokenizer.eos_token_id
119
- self.generation_config["pad_token_id"] = self.tokenizer.eos_token_id
120
 
121
- self.logger.info(f"Successfully initialized generation model: {model_name}")
122
- except Exception as e:
123
- self.logger.error(f"Failed to initialize generation model {model_name}: {str(e)}")
124
- raise
125
 
126
  def initialize_embedding_model(self, model_name: str) -> None:
127
  """
 
80
  self.logger.error(f"Failed to download model {model_name}: {str(e)}")
81
  raise
82
 
83
+ def initialize_model(self, model_name: str) -> None:
84
+ """
85
+ Initialize a model and tokenizer for text generation.
86
+ Handles different platforms (CUDA, MPS, CPU) appropriately.
87
+ """
88
+ self.logger.info(f"Initializing generation model: {model_name}")
89
+ try:
90
+ self.generation_model_name = model_name
91
+ local_model_path = self.models_path / model_name.split('/')[-1]
92
+
93
+ # Check if model exists locally
94
+ if local_model_path.exists():
95
+ self.logger.info(f"Loading model from local path: {local_model_path}")
96
+ model_path = local_model_path
97
+ else:
98
+ self.logger.info(f"Loading model from source: {model_name}")
99
+ model_path = model_name
100
+
101
+ # Check platform and set appropriate configuration
102
+ if torch.cuda.is_available():
103
+ self.logger.info("CUDA detected, using GPU with quantization")
104
  quantization_config = BitsAndBytesConfig(
105
  load_in_8bit=True,
106
  llm_int8_threshold=3.0
107
+ )
 
108
  self.generation_model = AutoModelForCausalLM.from_pretrained(
109
  model_path,
110
  device_map="auto",
111
  quantization_config=quantization_config,
112
  torch_dtype=torch.float16
113
+ )
114
+ elif torch.backends.mps.is_available():
115
+ self.logger.info("Apple Silicon detected, using MPS device")
116
+ self.generation_model = AutoModelForCausalLM.from_pretrained(
117
+ model_path,
118
+ device_map="mps",
119
+ torch_dtype=torch.float16
120
+ )
121
+ else:
122
+ self.logger.info("No GPU detected, falling back to CPU")
123
+ self.generation_model = AutoModelForCausalLM.from_pretrained(
124
+ model_path,
125
+ device_map="cpu",
126
+ torch_dtype=torch.float32 # Use full precision for CPU
127
+ )
128
 
129
+ self.tokenizer = AutoTokenizer.from_pretrained(model_path)
130
 
131
+ # Update generation config with tokenizer-specific values
132
+ self.generation_config["eos_token_id"] = self.tokenizer.eos_token_id
133
+ self.generation_config["pad_token_id"] = self.tokenizer.eos_token_id
134
 
135
+ self.logger.info(f"Successfully initialized generation model: {model_name}")
136
+ except Exception as e:
137
+ self.logger.error(f"Failed to initialize generation model {model_name}: {str(e)}")
138
+ raise
139
 
140
  def initialize_embedding_model(self, model_name: str) -> None:
141
  """
main/app.py CHANGED
@@ -5,8 +5,6 @@ from .routes import router, init_router
5
  from .utils.logging import setup_logger
6
  from .utils.validation import validate_hf
7
 
8
-
9
-
10
  def load_config():
11
  """Load configuration from yaml file"""
12
  with open("main/config.yaml", "r") as f:
 
5
  from .utils.logging import setup_logger
6
  from .utils.validation import validate_hf
7
 
 
 
8
  def load_config():
9
  """Load configuration from yaml file"""
10
  with open("main/config.yaml", "r") as f: