zurd46 commited on
Commit
987aabd
1 Parent(s): 34c466e

Create utils.py

Browse files
Files changed (1) hide show
  1. utils.py +181 -0
utils.py ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Microsoft Corporation.
2
+ # Licensed under the MIT license.
3
+
4
+ import os
5
+ import re
6
+ import torch
7
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TextStreamer
8
+ from peft import PeftModel
9
+
10
+ def get_device_map():
11
+ num_gpus = torch.cuda.device_count()
12
+
13
+ if num_gpus > 1:
14
+ print("More than one GPU found. Setting device_map to use CUDA device 0.")
15
+ return 'cuda:0'
16
+ else:
17
+ return 'auto'
18
+
19
+ def check_adapter_path(adapters_name):
20
+ """
21
+ Checks if the adapter path is correctly set and not a placeholder.
22
+ Args:
23
+ adapters_name (str): The file path for the adapters.
24
+ Raises:
25
+ ValueError: If the adapters_name contains placeholder characters.
26
+ """
27
+ if '<' in adapters_name or '>' in adapters_name:
28
+ raise ValueError("The adapter path has not been set correctly.")
29
+
30
+ def load_tokenizer(model_name):
31
+ """
32
+ Loads and returns a tokenizer for the specified model.
33
+ Args:
34
+ model_name (str): The name of the model for which to load the tokenizer.
35
+ Returns:
36
+ AutoTokenizer: The loaded tokenizer with special tokens added and padding side set.
37
+ """
38
+ tok = AutoTokenizer.from_pretrained(model_name, device_map=get_device_map(), trust_remote_code=True)
39
+ tok.add_special_tokens({'pad_token': '[PAD]'})
40
+ tok.padding_side = 'right' # TRL requires right padding
41
+ return tok
42
+
43
+ def load_model(model_name, torch_dtype, quant_type):
44
+ """
45
+ Loads and returns a model with the specified quantization configuration.
46
+ If more than one GPU is available, wraps the model with DataParallel.
47
+ Args:
48
+ model_name (str): The name of the model to load.
49
+ torch_dtype (torch.dtype): The data type for model weights (e.g., torch.float16).
50
+ quant_type (str): The quantization type to use.
51
+ Returns:
52
+ AutoModelForCausalLM: The loaded model possibly wrapped with DataParallel.
53
+ """
54
+ try:
55
+ model = AutoModelForCausalLM.from_pretrained(
56
+ pretrained_model_name_or_path=model_name,
57
+ trust_remote_code=True,
58
+ device_map=get_device_map(),
59
+ torch_dtype=torch_dtype,
60
+ quantization_config=BitsAndBytesConfig(
61
+ load_in_4bit=True,
62
+ bnb_4bit_compute_dtype=torch_dtype,
63
+ bnb_4bit_use_double_quant=True,
64
+ bnb_4bit_quant_type=quant_type
65
+ ),
66
+ )
67
+
68
+ return model
69
+ except Exception as e:
70
+ raise RuntimeError(f"Error loading model: {e}")
71
+
72
+ def resize_embeddings(model, tokenizer):
73
+ """
74
+ Resizes the token embeddings in the model to account for new tokens.
75
+ Args:
76
+ model (AutoModelForCausalLM): The model whose token embeddings will be resized.
77
+ tokenizer (AutoTokenizer): The tokenizer corresponding to the model.
78
+ """
79
+ model.resize_token_embeddings(len(tokenizer))
80
+
81
+ def load_peft_model(model, adapters_name):
82
+ """
83
+ Loads the PEFT model from the pretrained model and specified adapters.
84
+ Args:
85
+ model (AutoModelForCausalLM): The base model.
86
+ adapters_name (str): Path to the adapters file.
87
+ Returns:
88
+ PeftModel: The PEFT model with the loaded adapters.
89
+ """
90
+ return PeftModel.from_pretrained(model, adapters_name)
91
+
92
+ def get_device():
93
+ """
94
+ Determines and returns the device to use for computations.
95
+ If CUDA is available, returns a CUDA device, otherwise returns a CPU device.
96
+ Prints the number of GPUs available if CUDA is used.
97
+ Returns:
98
+ torch.device: The device to use.
99
+ """
100
+ if torch.cuda.is_available():
101
+ device = torch.device("cuda")
102
+ print(f"Number of GPUs available: {torch.cuda.device_count()}")
103
+ else:
104
+ device = torch.device("cpu")
105
+ return device
106
+
107
+ def run_prompt(model, tokenizer, device, template):
108
+ """
109
+ Runs an interactive prompt where the user can enter text to get generated responses.
110
+ Continues to prompt the user for input until '#end' is entered.
111
+ Args:
112
+ model (AutoModelForCausalLM): The model to use for text generation.
113
+ tokenizer (AutoTokenizer): The tokenizer to use for encoding the input text.
114
+ device (torch.device): The device on which to perform the computation.
115
+ template (str): The template string to format the input text.
116
+ """
117
+ while True:
118
+ new_input = input("Enter your text (type #end to stop): ")
119
+ if new_input == "#end":
120
+ break
121
+
122
+ try:
123
+ _ = generate_text(model, tokenizer, device, new_input, template)
124
+ except Exception as e:
125
+ print(f"An error occurred during text generation: {e}")
126
+
127
+ def generate_text(model, tokenizer, device, input_text, template):
128
+ """
129
+ Generates and returns text using the provided model and tokenizer for the input text.
130
+ Args:
131
+ model (AutoModelForCausalLM): The model to use for text generation.
132
+ tokenizer (AutoTokenizer): The tokenizer to use for encoding the input text.
133
+ device (torch.device): The device on which to perform the computation.
134
+ input_text (str): The input text to generate responses for.
135
+ template (str): The template string to format the input text.
136
+ Returns:
137
+ torch.Tensor: The generated text tensor.
138
+ """
139
+ inputs = tokenizer(template.format(input_text), return_tensors="pt")
140
+ inputs = inputs.to(device) # Move input tensors to the device
141
+ streamer = TextStreamer(tokenizer)
142
+ return model.generate(**inputs, streamer=streamer,
143
+ max_new_tokens=1024,
144
+ pad_token_id=tokenizer.pad_token_id,
145
+ eos_token_id=tokenizer.eos_token_id)
146
+
147
+ def get_last_folder_alphabetically(directory_path):
148
+ """
149
+ Finds the last folder alphabetically in a specified directory.
150
+
151
+ Args:
152
+ directory_path (str): The path to the directory.
153
+
154
+ Returns:
155
+ str: The path to the last folder found alphabetically.
156
+ If the directory does not exist or contains no folders, a descriptive string is returned.
157
+ """
158
+ if not os.path.exists(directory_path):
159
+ return "Directory does not exist."
160
+
161
+ all_files_and_folders = os.listdir(directory_path)
162
+ only_folders = [f for f in all_files_and_folders if os.path.isdir(os.path.join(directory_path, f))]
163
+ if not only_folders:
164
+ return "No folders found in the directory."
165
+
166
+ only_folders.sort(key=natural_sort_key)
167
+ last_folder = only_folders[-1]
168
+ return os.path.join(directory_path, last_folder)
169
+
170
+ def natural_sort_key(s):
171
+ """
172
+ Generates a key for sorting strings that contain numbers where the numbers should be sorted numerically,
173
+ and the rest alphabetically.
174
+
175
+ Args:
176
+ s (str): The string to be sorted.
177
+
178
+ Returns:
179
+ list: A list of strings and integers derived from the input string.
180
+ """
181
+ return [int(text) if text.isdigit() else text.lower() for text in re.split('([0-9]+)', s)]