petil777
/

srv1_parallel

Transformers

custom_code

Inference Endpoints

Model card Files Files and versions Community

root commited on Oct 11, 2023

Commit

34eb6e3

•

1 Parent(s): 7cf213a

update modeling with print

Browse files

Files changed (1) hide show

modeling_srv1_tp.py +14 -7

modeling_srv1_tp.py CHANGED Viewed

@@ -839,17 +839,22 @@ class SRV1ForCausalLMParallel(SRV1ForCausalLM):
         trust_remote_code = kwargs.get("trust_remote_code", False)
         quantize = kwargs.get("quantize", None)
         dtype = kwargs.get("dtype", None)
-        print("Start initializing...")
         self.process_group, rank, world_size = initialize_torch_distributed()
-        print(f"RANK[{rank}]: Distributed Initialize Success")
         if torch.cuda.is_available():
             device = torch.device(f"cuda:{rank}")
             dtype = torch.float16 if dtype is None else dtype
-            print(f"Use dtype {dtype}")
         else:
             raise NotImplementedError("Flash is only available on GPU")
-        print(f"Will read model dir {model_id}")
         self.tokenizer = AutoTokenizer.from_pretrained(
             model_id,
             revision=revision,
@@ -857,14 +862,15 @@ class SRV1ForCausalLMParallel(SRV1ForCausalLM):
             truncation_side="left",
             trust_remote_code=trust_remote_code,
         )
-        # config already defined in from_pretrained
-        # config = SRV1Config.from_pretrained(model_id, revision=revision, trust_remote_code=trust_remote_code)
         config.quantize = quantize
         torch.distributed.barrier(group=self.process_group)
         import glob
         filenames = glob.glob(f"{model_id}/*.safetensors")
-        print(f"Will read filename {filenames}")
         weights = Weights(filenames=filenames, device=device, dtype=dtype, process_group=self.process_group)
         print(f"RANK[{rank}]: Loaded Weights success. device:{device}")
         torch.distributed.barrier(group=self.process_group)
@@ -887,4 +893,5 @@ class SRV1ForCausalLMParallel(SRV1ForCausalLM):
             )
         kwargs.update({"pretrained_model_name_or_path": pretrained_model_name_or_path})
         model = cls(config, *model_args, **kwargs)
         return model

         trust_remote_code = kwargs.get("trust_remote_code", False)
         quantize = kwargs.get("quantize", None)
         dtype = kwargs.get("dtype", None)
+        if dtype is None:
+            dtype = config.torch_dtype
         self.process_group, rank, world_size = initialize_torch_distributed()
         if torch.cuda.is_available():
             device = torch.device(f"cuda:{rank}")
             dtype = torch.float16 if dtype is None else dtype
         else:
             raise NotImplementedError("Flash is only available on GPU")
+        if rank == 0:
+            print(config)
+            print(f"Final dtype {dtype}")
+            print(f"Will read model dir {model_id}")
         self.tokenizer = AutoTokenizer.from_pretrained(
             model_id,
             revision=revision,
             truncation_side="left",
             trust_remote_code=trust_remote_code,
         )
         config.quantize = quantize
         torch.distributed.barrier(group=self.process_group)
         import glob
         filenames = glob.glob(f"{model_id}/*.safetensors")
+        if rank == 0:
+            print(f"Will read filename {filenames}")
         weights = Weights(filenames=filenames, device=device, dtype=dtype, process_group=self.process_group)
         print(f"RANK[{rank}]: Loaded Weights success. device:{device}")
         torch.distributed.barrier(group=self.process_group)
             )
         kwargs.update({"pretrained_model_name_or_path": pretrained_model_name_or_path})
         model = cls(config, *model_args, **kwargs)
         return model