root
commited on
Commit
•
34eb6e3
1
Parent(s):
7cf213a
update modeling with print
Browse files- modeling_srv1_tp.py +14 -7
modeling_srv1_tp.py
CHANGED
@@ -839,17 +839,22 @@ class SRV1ForCausalLMParallel(SRV1ForCausalLM):
|
|
839 |
trust_remote_code = kwargs.get("trust_remote_code", False)
|
840 |
quantize = kwargs.get("quantize", None)
|
841 |
dtype = kwargs.get("dtype", None)
|
842 |
-
|
|
|
|
|
843 |
self.process_group, rank, world_size = initialize_torch_distributed()
|
844 |
-
|
845 |
if torch.cuda.is_available():
|
846 |
device = torch.device(f"cuda:{rank}")
|
847 |
dtype = torch.float16 if dtype is None else dtype
|
848 |
-
|
849 |
else:
|
850 |
raise NotImplementedError("Flash is only available on GPU")
|
851 |
|
852 |
-
|
|
|
|
|
|
|
853 |
self.tokenizer = AutoTokenizer.from_pretrained(
|
854 |
model_id,
|
855 |
revision=revision,
|
@@ -857,14 +862,15 @@ class SRV1ForCausalLMParallel(SRV1ForCausalLM):
|
|
857 |
truncation_side="left",
|
858 |
trust_remote_code=trust_remote_code,
|
859 |
)
|
860 |
-
|
861 |
-
# config = SRV1Config.from_pretrained(model_id, revision=revision, trust_remote_code=trust_remote_code)
|
862 |
config.quantize = quantize
|
863 |
torch.distributed.barrier(group=self.process_group)
|
864 |
import glob
|
865 |
filenames = glob.glob(f"{model_id}/*.safetensors")
|
866 |
-
|
|
|
867 |
weights = Weights(filenames=filenames, device=device, dtype=dtype, process_group=self.process_group)
|
|
|
868 |
print(f"RANK[{rank}]: Loaded Weights success. device:{device}")
|
869 |
|
870 |
torch.distributed.barrier(group=self.process_group)
|
@@ -887,4 +893,5 @@ class SRV1ForCausalLMParallel(SRV1ForCausalLM):
|
|
887 |
)
|
888 |
kwargs.update({"pretrained_model_name_or_path": pretrained_model_name_or_path})
|
889 |
model = cls(config, *model_args, **kwargs)
|
|
|
890 |
return model
|
|
|
839 |
trust_remote_code = kwargs.get("trust_remote_code", False)
|
840 |
quantize = kwargs.get("quantize", None)
|
841 |
dtype = kwargs.get("dtype", None)
|
842 |
+
if dtype is None:
|
843 |
+
dtype = config.torch_dtype
|
844 |
+
|
845 |
self.process_group, rank, world_size = initialize_torch_distributed()
|
846 |
+
|
847 |
if torch.cuda.is_available():
|
848 |
device = torch.device(f"cuda:{rank}")
|
849 |
dtype = torch.float16 if dtype is None else dtype
|
850 |
+
|
851 |
else:
|
852 |
raise NotImplementedError("Flash is only available on GPU")
|
853 |
|
854 |
+
if rank == 0:
|
855 |
+
print(config)
|
856 |
+
print(f"Final dtype {dtype}")
|
857 |
+
print(f"Will read model dir {model_id}")
|
858 |
self.tokenizer = AutoTokenizer.from_pretrained(
|
859 |
model_id,
|
860 |
revision=revision,
|
|
|
862 |
truncation_side="left",
|
863 |
trust_remote_code=trust_remote_code,
|
864 |
)
|
865 |
+
|
|
|
866 |
config.quantize = quantize
|
867 |
torch.distributed.barrier(group=self.process_group)
|
868 |
import glob
|
869 |
filenames = glob.glob(f"{model_id}/*.safetensors")
|
870 |
+
if rank == 0:
|
871 |
+
print(f"Will read filename {filenames}")
|
872 |
weights = Weights(filenames=filenames, device=device, dtype=dtype, process_group=self.process_group)
|
873 |
+
|
874 |
print(f"RANK[{rank}]: Loaded Weights success. device:{device}")
|
875 |
|
876 |
torch.distributed.barrier(group=self.process_group)
|
|
|
893 |
)
|
894 |
kwargs.update({"pretrained_model_name_or_path": pretrained_model_name_or_path})
|
895 |
model = cls(config, *model_args, **kwargs)
|
896 |
+
|
897 |
return model
|