MiniCPM-V-2_6

Running

finalf0 commited on Aug 7, 2024

Commit

a4b23f2

1 Parent(s): f1b5dd8

multi-gpus

Files changed (2) hide show

app.py CHANGED Viewed

@@ -47,10 +47,11 @@ if 'int4' in model_path:
     #model = AutoModel.from_pretrained(model_path, trust_remote_code=True, attn_implementation='sdpa')
     model = AutoModel.from_pretrained(model_path, trust_remote_code=True)
 else:
-    if args.multi_gpus:
         from accelerate import load_checkpoint_and_dispatch, init_empty_weights, infer_auto_device_map
         with init_empty_weights():
-            model = AutoModel.from_pretrained(model_path, trust_remote_code=True, attn_implementation='sdpa', torch_dtype=torch.bfloat16)
         device_map = infer_auto_device_map(model, max_memory={0: "10GB", 1: "10GB"},
             no_split_module_classes=['SiglipVisionTransformer', 'Qwen2DecoderLayer'])
         device_id = device_map["llm.model.embed_tokens"]
@@ -69,7 +70,8 @@ else:
         device_map["llm.model.layers.16"] = device_id2
         #print(device_map)
-        model = load_checkpoint_and_dispatch(model, model_path, dtype=torch.bfloat16, device_map=device_map)
     else:
         #model = AutoModel.from_pretrained(model_path, trust_remote_code=True, attn_implementation='sdpa', torch_dtype=torch.bfloat16)
         model = AutoModel.from_pretrained(model_path, trust_remote_code=True, torch_dtype=torch.bfloat16)

     #model = AutoModel.from_pretrained(model_path, trust_remote_code=True, attn_implementation='sdpa')
     model = AutoModel.from_pretrained(model_path, trust_remote_code=True)
 else:
+    if True: #args.multi_gpus:
         from accelerate import load_checkpoint_and_dispatch, init_empty_weights, infer_auto_device_map
         with init_empty_weights():
+            #model = AutoModel.from_pretrained(model_path, trust_remote_code=True, attn_implementation='sdpa', torch_dtype=torch.bfloat16)
+            model = AutoModel.from_pretrained(model_path, trust_remote_code=True, torch_dtype=torch.bfloat16)
         device_map = infer_auto_device_map(model, max_memory={0: "10GB", 1: "10GB"},
             no_split_module_classes=['SiglipVisionTransformer', 'Qwen2DecoderLayer'])
         device_id = device_map["llm.model.embed_tokens"]
         device_map["llm.model.layers.16"] = device_id2
         #print(device_map)
+        #model = load_checkpoint_and_dispatch(model, model_path, dtype=torch.bfloat16, device_map=device_map)
+        model = AutoModel.from_pretrained(model_path, trust_remote_code=True, torch_dtype=torch.bfloat16, device_map=device_map)
     else:
         #model = AutoModel.from_pretrained(model_path, trust_remote_code=True, attn_implementation='sdpa', torch_dtype=torch.bfloat16)
         model = AutoModel.from_pretrained(model_path, trust_remote_code=True, torch_dtype=torch.bfloat16)

requirements.txt CHANGED Viewed

@@ -8,3 +8,4 @@ opencv-python
 decord
 gradio==4.22.0
 http://thunlp.oss-cn-qingdao.aliyuncs.com/multi_modal/never_delete/modelscope_studio-0.4.0.9-py3-none-any.whl

 decord
 gradio==4.22.0
 http://thunlp.oss-cn-qingdao.aliyuncs.com/multi_modal/never_delete/modelscope_studio-0.4.0.9-py3-none-any.whl
+accelerate