petil777
/

srv1_parallel

Transformers

custom_code

Model card Files Files and versions Community

root commited on Oct 13, 2023

Commit

2032b87

1 Parent(s): f43289b

add direct saving safetensor logic to modeling

Browse files

Files changed (1) hide show

modeling_srv1_tp.py +50 -15

modeling_srv1_tp.py CHANGED Viewed

@@ -12,7 +12,7 @@ import torch.utils.checkpoint
 from torch import nn
 from torch.nn import CrossEntropyLoss
 from transformers.activations import ACT2FN
-from transformers import AutoTokenizer, AutoConfig
 from .configuration_srv1 import SRV1Config
 from transformers.modeling_outputs import (
@@ -832,9 +832,7 @@ class SRV1ForCausalLM(SRV1PreTrainedModel):
 class SRV1ForCausalLMParallel(SRV1ForCausalLM):
     def __init__(self, config, **kwargs):
-        model_id = kwargs.get("local_path", None)
-        if model_id is None:
-            model_id = kwargs.get("pretrained_model_name_or_path", None)
         revision = kwargs.get("revision", None)
         trust_remote_code = kwargs.get("trust_remote_code", False)
         quantize = kwargs.get("quantize", None)
@@ -854,9 +852,9 @@ class SRV1ForCausalLMParallel(SRV1ForCausalLM):
         if rank == 0:
             print(config)
             print(f"Final dtype {dtype}")
-            print(f"Will read model dir {model_id}")
         self.tokenizer = AutoTokenizer.from_pretrained(
-            model_id,
             revision=revision,
             padding_side="left",
             truncation_side="left",
@@ -865,10 +863,20 @@ class SRV1ForCausalLMParallel(SRV1ForCausalLM):
         config.quantize = quantize
         torch.distributed.barrier(group=self.process_group)
-        import glob
-        filenames = glob.glob(f"{model_id}/*.safetensors")
-        if rank == 0:
-            print(f"Will read filename {filenames}")
         weights = Weights(filenames=filenames, device=device, dtype=dtype, process_group=self.process_group)
         print(f"RANK[{rank}]: Loaded Weights success. device:{device}")
@@ -883,15 +891,42 @@ class SRV1ForCausalLMParallel(SRV1ForCausalLM):
     @classmethod
     def from_pretrained(cls, pretrained_model_name_or_path, *model_args, config=None, **kwargs):
         config_path = config if config is not None else pretrained_model_name_or_path
-        local_config_path = kwargs.get("local_path", None)
-        if local_config_path is not None:
-            config_path = local_config_path
         config = cls.config_class.from_pretrained(
                 config_path,
                 **kwargs,
             )
-        kwargs.update({"pretrained_model_name_or_path": pretrained_model_name_or_path})
         model = cls(config, *model_args, **kwargs)
-        return model

 from torch import nn
 from torch.nn import CrossEntropyLoss
 from transformers.activations import ACT2FN
+from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM
 from .configuration_srv1 import SRV1Config
 from transformers.modeling_outputs import (
 class SRV1ForCausalLMParallel(SRV1ForCausalLM):
     def __init__(self, config, **kwargs):
+        local_path = kwargs.get("local_path", None)
         revision = kwargs.get("revision", None)
         trust_remote_code = kwargs.get("trust_remote_code", False)
         quantize = kwargs.get("quantize", None)
         if rank == 0:
             print(config)
             print(f"Final dtype {dtype}")
+            print(f"Will read model dir {local_path}")
         self.tokenizer = AutoTokenizer.from_pretrained(
+            local_path,
             revision=revision,
             padding_side="left",
             truncation_side="left",
         config.quantize = quantize
         torch.distributed.barrier(group=self.process_group)
+        if local_path is not None:
+            import glob
+            filenames = glob.glob(f"{local_path}/safetensors/*.safetensors")
+            if len(filenames) == 0 and rank == 0:
+                print("No file detected. Will make safetensors...")
+                from pathlib import Path
+                Path(f"{local_path}/safetensors").mkdir(parents=True, exist_ok=True)
+                tmp_model = AutoModelForCausalLM.from_pretrained(local_path)
+                SRV1ForCausalLMParallel.save_model_in_distributed_safetensor(tmp_model, f"{local_path}/safetensors")
+                del tmp_model
+                torch.cuda.empty_cache()
+            torch.distributed.barrier(group=self.process_group)
+            filenames = glob.glob(f"{local_path}/safetensors/*.safetensors")
+            print(f"rank{rank} will read {filenames}")
         weights = Weights(filenames=filenames, device=device, dtype=dtype, process_group=self.process_group)
         print(f"RANK[{rank}]: Loaded Weights success. device:{device}")
     @classmethod
     def from_pretrained(cls, pretrained_model_name_or_path, *model_args, config=None, **kwargs):
+        """
+        pretrained_model_name_or_path is necessary for routing automodel in huggingface model reop
+        Then, local_path is needed for loading actual weights
+        """
         config_path = config if config is not None else pretrained_model_name_or_path
+        local_path = kwargs.get("local_path", None)
+        if local_path is not None:
+            config_path = local_path
         config = cls.config_class.from_pretrained(
                 config_path,
                 **kwargs,
             )
         model = cls(config, *model_args, **kwargs)
+        return model
+    @staticmethod
+    def save_model_in_distributed_safetensor(model, save_dir, n_file=2):
+        from safetensors.torch import save_file
+        from safetensors.torch import safe_open
+        total_params = [torch.numel(model.state_dict()[k]) for k in model.state_dict()]
+        if n_file is None:
+            bound = 5000000000 # 5B
+            n_file = int((sum(total_params) + bound -1) / bound)
+        params_per_gpu = float(sum(total_params) / n_file)
+        params = [0]
+        tensors = {}
+        for i, (k, v) in enumerate(model.state_dict().items()):
+            cur_params = torch.numel(model.state_dict()[k])
+            params[-1] += cur_params
+            tensors.update({k:v})
+            if params[-1] > params_per_gpu or i == len(model.state_dict())-1:
+                name = f"model{len(params)-1}.safetensors"
+                path = os.path.join(save_dir, name)
+                save_file(tensors, path)
+                params.append(0)
+                del tensors
+                tensors = {}
+        print("SafeTensors Save Success")