Kenjoyer's picture
Upload 7 files
cb4089f verified
import json
import os
from safetensors import safe_open
from safetensors.torch import save_file
import torch
def load_index_file(index_path):
with open(index_path, 'r') as f:
return json.load(f)
def save_index_file(index_path, index_data):
with open(index_path, 'w') as f:
json.dump(index_data, f, indent=2)
def load_tensor(file_path, tensor_name):
with safe_open(file_path, framework="pt") as f:
return f.get_tensor(tensor_name)
def save_tensor(file_path, tensor_data):
save_file(tensor_data, file_path)
def get_tensor_location(tensor_index, tensor_name):
if tensor_name in tensor_index['weight_map']:
return tensor_index['weight_map'][tensor_name], tensor_name
return None, None
def concatenate_experts(input_path, fine_tuned_index, block_number, tensor_base_name, tensor_fine_name, temp_folder):
experts_tensors = []
for expert_number in range(16):
tensor_name = tensor_fine_name.replace('%b', str(block_number)).replace('%e', str(expert_number))
shard_file, tensor_location = get_tensor_location(fine_tuned_index, tensor_name)
if shard_file is None:
raise ValueError(f"Tensor {tensor_name} not found in the index.")
tensor_data = load_tensor(os.path.join(input_path, shard_file), tensor_location)
if tensor_fine_name.endswith(".w2.weight"):
tensor_data = tensor_data.transpose(0, 1) # Reverse the dimensions
experts_tensors.append(tensor_data)
concatenated_tensor = torch.cat(experts_tensors, dim=0) # Concatenate along the correct dimension
temp_file_path = os.path.join(temp_folder, f"temp_concatenated_{block_number}_{tensor_base_name}.pt")
torch.save(concatenated_tensor, temp_file_path)
return temp_file_path
def save_sharded_tensors(output_path, tensors, shard_size=5 * 1024 * 1024 * 1024): # 5GB limit
shard_data = {}
shard_count = 1 # Start counting from 1
current_shard_size = 0
shard_index = []
tensor_to_shard_map = {}
for tensor_name, tensor_file_path in tensors.items():
tensor_data = torch.load(tensor_file_path)
tensor_size = tensor_data.numel() * tensor_data.element_size() # Calculate tensor size
if current_shard_size + tensor_size > shard_size:
shard_filename = f"model-{shard_count:05d}-of-00061.safetensors"
save_tensor(os.path.join(output_path, shard_filename), shard_data)
shard_index.append({
'filename': shard_filename,
'tensors': {name: {'shape': tensor.shape, 'dtype': str(tensor.dtype), 'offset': 0} for name, tensor in shard_data.items()}
})
shard_data = {}
shard_count += 1
current_shard_size = 0
shard_data[tensor_name] = tensor_data
tensor_to_shard_map[tensor_name] = f"model-{shard_count:05d}-of-00061.safetensors"
current_shard_size += tensor_size
if shard_data:
shard_filename = f"model-{shard_count:05d}-of-00061.safetensors"
save_tensor(os.path.join(output_path, shard_filename), shard_data)
shard_index.append({
'filename': shard_filename,
'tensors': {name: {'shape': tensor.shape, 'dtype': str(tensor.dtype), 'offset': 0} for name, tensor in shard_data.items()}
})
# Ensure the last shard is correctly mapped
for tensor_name in shard_data.keys():
tensor_to_shard_map[tensor_name] = shard_filename
return shard_index, tensor_to_shard_map
def transform_model(input_path, fine_tuned_index_path, output_path, output_index_path, temp_folder):
fine_tuned_index = load_index_file(fine_tuned_index_path)
transformed_tensors = {}
for block_number in range(40):
for tensor_base_name, tensor_fine_name in [
("transformer.blocks.%b.ffn.experts.mlp.v1", "transformer.blocks.%b.ffn.experts.mlp_experts.%e.v1.weight"),
("transformer.blocks.%b.ffn.experts.mlp.w1", "transformer.blocks.%b.ffn.experts.mlp_experts.%e.w1.weight"),
("transformer.blocks.%b.ffn.experts.mlp.w2", "transformer.blocks.%b.ffn.experts.mlp_experts.%e.w2.weight"),
]:
tensor_name = tensor_base_name.replace('%b', str(block_number))
concatenated_tensor_path = concatenate_experts(input_path, fine_tuned_index, block_number, tensor_base_name, tensor_fine_name, temp_folder)
transformed_tensors[tensor_name] = concatenated_tensor_path
# For other tensors, directly copy from fine-tuned model
for tensor_name in [
"lm_head.weight",
"transformer.blocks.%b.ffn.router.layer.weight",
"transformer.blocks.%b.norm_attn_norm.attn.Wqkv.weight",
"transformer.blocks.%b.norm_attn_norm.attn.out_proj.weight",
"transformer.blocks.%b.norm_attn_norm.norm_1.weight",
"transformer.blocks.%b.norm_attn_norm.norm_2.weight",
"transformer.norm_f.weight",
"transformer.wte.weight"
]:
tensor_name = tensor_name.replace('%b', str(block_number))
shard_file, tensor_location = get_tensor_location(fine_tuned_index, tensor_name)
if shard_file is None:
raise ValueError(f"Tensor {tensor_name} not found in the index.")
tensor_data = load_tensor(os.path.join(input_path, shard_file), tensor_location)
temp_file_path = os.path.join(temp_folder, f"temp_{tensor_name}.pt")
torch.save(tensor_data, temp_file_path)
transformed_tensors[tensor_name] = temp_file_path
# Save transformed tensors into shards
shard_index, tensor_to_shard_map = save_sharded_tensors(output_path, transformed_tensors)
# Update and save the index file
fine_tuned_index['weight_map'] = tensor_to_shard_map
save_index_file(output_index_path, fine_tuned_index)
# Paths
input_path = '/path/cognitivecomputations_dolphin-2.9.1-dbrx/'
fine_tuned_index_path = '/path/cognitivecomputations_dolphin-2.9.1-dbrx/model.safetensors.index.json'
output_index_path = '/path/dolphin-2.9.1-dbrx-llamacppfix/model.safetensors.index.json'
output_path = '/path/output/' # make sure there is 250GB free in there
temp_folder = '/path/temp/' # make sure there is 250GB free in there
# Ensure the temp_folder exists
os.makedirs(temp_folder, exist_ok=True)
transform_model(input_path, fine_tuned_index_path, output_path, output_index_path, temp_folder)