Spaces:
Running
Running
multi-gpus
Browse files- app.py +5 -3
- requirements.txt +1 -0
app.py
CHANGED
@@ -47,10 +47,11 @@ if 'int4' in model_path:
|
|
47 |
#model = AutoModel.from_pretrained(model_path, trust_remote_code=True, attn_implementation='sdpa')
|
48 |
model = AutoModel.from_pretrained(model_path, trust_remote_code=True)
|
49 |
else:
|
50 |
-
if args.multi_gpus:
|
51 |
from accelerate import load_checkpoint_and_dispatch, init_empty_weights, infer_auto_device_map
|
52 |
with init_empty_weights():
|
53 |
-
model = AutoModel.from_pretrained(model_path, trust_remote_code=True, attn_implementation='sdpa', torch_dtype=torch.bfloat16)
|
|
|
54 |
device_map = infer_auto_device_map(model, max_memory={0: "10GB", 1: "10GB"},
|
55 |
no_split_module_classes=['SiglipVisionTransformer', 'Qwen2DecoderLayer'])
|
56 |
device_id = device_map["llm.model.embed_tokens"]
|
@@ -69,7 +70,8 @@ else:
|
|
69 |
device_map["llm.model.layers.16"] = device_id2
|
70 |
#print(device_map)
|
71 |
|
72 |
-
model = load_checkpoint_and_dispatch(model, model_path, dtype=torch.bfloat16, device_map=device_map)
|
|
|
73 |
else:
|
74 |
#model = AutoModel.from_pretrained(model_path, trust_remote_code=True, attn_implementation='sdpa', torch_dtype=torch.bfloat16)
|
75 |
model = AutoModel.from_pretrained(model_path, trust_remote_code=True, torch_dtype=torch.bfloat16)
|
|
|
47 |
#model = AutoModel.from_pretrained(model_path, trust_remote_code=True, attn_implementation='sdpa')
|
48 |
model = AutoModel.from_pretrained(model_path, trust_remote_code=True)
|
49 |
else:
|
50 |
+
if True: #args.multi_gpus:
|
51 |
from accelerate import load_checkpoint_and_dispatch, init_empty_weights, infer_auto_device_map
|
52 |
with init_empty_weights():
|
53 |
+
#model = AutoModel.from_pretrained(model_path, trust_remote_code=True, attn_implementation='sdpa', torch_dtype=torch.bfloat16)
|
54 |
+
model = AutoModel.from_pretrained(model_path, trust_remote_code=True, torch_dtype=torch.bfloat16)
|
55 |
device_map = infer_auto_device_map(model, max_memory={0: "10GB", 1: "10GB"},
|
56 |
no_split_module_classes=['SiglipVisionTransformer', 'Qwen2DecoderLayer'])
|
57 |
device_id = device_map["llm.model.embed_tokens"]
|
|
|
70 |
device_map["llm.model.layers.16"] = device_id2
|
71 |
#print(device_map)
|
72 |
|
73 |
+
#model = load_checkpoint_and_dispatch(model, model_path, dtype=torch.bfloat16, device_map=device_map)
|
74 |
+
model = AutoModel.from_pretrained(model_path, trust_remote_code=True, torch_dtype=torch.bfloat16, device_map=device_map)
|
75 |
else:
|
76 |
#model = AutoModel.from_pretrained(model_path, trust_remote_code=True, attn_implementation='sdpa', torch_dtype=torch.bfloat16)
|
77 |
model = AutoModel.from_pretrained(model_path, trust_remote_code=True, torch_dtype=torch.bfloat16)
|
requirements.txt
CHANGED
@@ -8,3 +8,4 @@ opencv-python
|
|
8 |
decord
|
9 |
gradio==4.22.0
|
10 |
http://thunlp.oss-cn-qingdao.aliyuncs.com/multi_modal/never_delete/modelscope_studio-0.4.0.9-py3-none-any.whl
|
|
|
|
8 |
decord
|
9 |
gradio==4.22.0
|
10 |
http://thunlp.oss-cn-qingdao.aliyuncs.com/multi_modal/never_delete/modelscope_studio-0.4.0.9-py3-none-any.whl
|
11 |
+
accelerate
|