root commited on
Commit
34eb6e3
1 Parent(s): 7cf213a

update modeling with print

Browse files
Files changed (1) hide show
  1. modeling_srv1_tp.py +14 -7
modeling_srv1_tp.py CHANGED
@@ -839,17 +839,22 @@ class SRV1ForCausalLMParallel(SRV1ForCausalLM):
839
  trust_remote_code = kwargs.get("trust_remote_code", False)
840
  quantize = kwargs.get("quantize", None)
841
  dtype = kwargs.get("dtype", None)
842
- print("Start initializing...")
 
 
843
  self.process_group, rank, world_size = initialize_torch_distributed()
844
- print(f"RANK[{rank}]: Distributed Initialize Success")
845
  if torch.cuda.is_available():
846
  device = torch.device(f"cuda:{rank}")
847
  dtype = torch.float16 if dtype is None else dtype
848
- print(f"Use dtype {dtype}")
849
  else:
850
  raise NotImplementedError("Flash is only available on GPU")
851
 
852
- print(f"Will read model dir {model_id}")
 
 
 
853
  self.tokenizer = AutoTokenizer.from_pretrained(
854
  model_id,
855
  revision=revision,
@@ -857,14 +862,15 @@ class SRV1ForCausalLMParallel(SRV1ForCausalLM):
857
  truncation_side="left",
858
  trust_remote_code=trust_remote_code,
859
  )
860
- # config already defined in from_pretrained
861
- # config = SRV1Config.from_pretrained(model_id, revision=revision, trust_remote_code=trust_remote_code)
862
  config.quantize = quantize
863
  torch.distributed.barrier(group=self.process_group)
864
  import glob
865
  filenames = glob.glob(f"{model_id}/*.safetensors")
866
- print(f"Will read filename {filenames}")
 
867
  weights = Weights(filenames=filenames, device=device, dtype=dtype, process_group=self.process_group)
 
868
  print(f"RANK[{rank}]: Loaded Weights success. device:{device}")
869
 
870
  torch.distributed.barrier(group=self.process_group)
@@ -887,4 +893,5 @@ class SRV1ForCausalLMParallel(SRV1ForCausalLM):
887
  )
888
  kwargs.update({"pretrained_model_name_or_path": pretrained_model_name_or_path})
889
  model = cls(config, *model_args, **kwargs)
 
890
  return model
 
839
  trust_remote_code = kwargs.get("trust_remote_code", False)
840
  quantize = kwargs.get("quantize", None)
841
  dtype = kwargs.get("dtype", None)
842
+ if dtype is None:
843
+ dtype = config.torch_dtype
844
+
845
  self.process_group, rank, world_size = initialize_torch_distributed()
846
+
847
  if torch.cuda.is_available():
848
  device = torch.device(f"cuda:{rank}")
849
  dtype = torch.float16 if dtype is None else dtype
850
+
851
  else:
852
  raise NotImplementedError("Flash is only available on GPU")
853
 
854
+ if rank == 0:
855
+ print(config)
856
+ print(f"Final dtype {dtype}")
857
+ print(f"Will read model dir {model_id}")
858
  self.tokenizer = AutoTokenizer.from_pretrained(
859
  model_id,
860
  revision=revision,
 
862
  truncation_side="left",
863
  trust_remote_code=trust_remote_code,
864
  )
865
+
 
866
  config.quantize = quantize
867
  torch.distributed.barrier(group=self.process_group)
868
  import glob
869
  filenames = glob.glob(f"{model_id}/*.safetensors")
870
+ if rank == 0:
871
+ print(f"Will read filename {filenames}")
872
  weights = Weights(filenames=filenames, device=device, dtype=dtype, process_group=self.process_group)
873
+
874
  print(f"RANK[{rank}]: Loaded Weights success. device:{device}")
875
 
876
  torch.distributed.barrier(group=self.process_group)
 
893
  )
894
  kwargs.update({"pretrained_model_name_or_path": pretrained_model_name_or_path})
895
  model = cls(config, *model_args, **kwargs)
896
+
897
  return model