Vision-CAIR commited on
Commit
efcffe8
·
verified ·
1 Parent(s): 92fb897

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. modeling_llama_v2.py +0 -1
  2. registry.py +16 -19
  3. utils.py +0 -133
modeling_llama_v2.py CHANGED
@@ -9,7 +9,6 @@ from transformers.utils import add_start_docstrings_to_model_forward, replace_re
9
  from transformers.modeling_outputs import CausalLMOutputWithPast
10
  from transformers.models.llama.modeling_llama import LLAMA_INPUTS_DOCSTRING, _CONFIG_FOR_DOC
11
  from transformers.models.llama.modeling_llama import LlamaForCausalLM as LlamaForCausalLMOrig
12
- # from minigpt4_video.models.transformers.src.transformers.models.llama.modeling_llama import LlamaForCausalLM as LlamaForCausalLMOrig
13
 
14
  class LlamaForCausalLM(LlamaForCausalLMOrig):
15
 
 
9
  from transformers.modeling_outputs import CausalLMOutputWithPast
10
  from transformers.models.llama.modeling_llama import LLAMA_INPUTS_DOCSTRING, _CONFIG_FOR_DOC
11
  from transformers.models.llama.modeling_llama import LlamaForCausalLM as LlamaForCausalLMOrig
 
12
 
13
  class LlamaForCausalLM(LlamaForCausalLMOrig):
14
 
registry.py CHANGED
@@ -26,24 +26,21 @@ class Registry:
26
  Args:
27
  name: Key with which the task will be registered.
28
 
29
- Usage:
30
-
31
- from minigpt4.common.registry import registry
32
  """
33
 
34
  def wrap(model_cls):
35
- # from minigpt4.models import BaseModel
36
 
37
- # assert issubclass(
38
- # model_cls, BaseModel
39
- # ), "All models must inherit BaseModel class"
40
 
41
- # if name in cls.mapping["model_name_mapping"]:
42
- # raise KeyError(
43
- # "Name '{}' already registered for {}.".format(
44
- # name, cls.mapping["model_name_mapping"][name]
45
- # )
46
- # )
47
  cls.mapping["model_name_mapping"][name] = model_cls
48
  return model_cls
49
 
@@ -58,7 +55,7 @@ class Registry:
58
 
59
  Usage:
60
 
61
- from minigpt4.common.registry import registry
62
  """
63
 
64
  def wrap(processor_cls):
@@ -87,7 +84,7 @@ class Registry:
87
 
88
  Usage:
89
 
90
- from minigpt4.common.registry import registry
91
  """
92
 
93
  def wrap(lr_sched_cls):
@@ -111,7 +108,7 @@ class Registry:
111
 
112
  Usage:
113
 
114
- from minigpt4.common.registry import registry
115
  """
116
 
117
  def wrap(runner_cls):
@@ -135,7 +132,7 @@ class Registry:
135
 
136
  Usage:
137
 
138
- from minigpt4.common.registry import registry
139
  """
140
  assert isinstance(path, str), "All path must be str."
141
  if name in cls.mapping["paths"]:
@@ -151,7 +148,7 @@ class Registry:
151
 
152
  Usage::
153
 
154
- from minigpt4.common.registry import registry
155
 
156
  registry.register("config", {})
157
  """
@@ -260,7 +257,7 @@ class Registry:
260
  name: Key which needs to be removed.
261
  Usage::
262
 
263
- from mmf.common.registry import registry
264
 
265
  config = registry.unregister("config")
266
  """
 
26
  Args:
27
  name: Key with which the task will be registered.
28
 
 
 
 
29
  """
30
 
31
  def wrap(model_cls):
32
+ from .base_model import BaseModel
33
 
34
+ assert issubclass(
35
+ model_cls, BaseModel
36
+ ), "All models must inherit BaseModel class"
37
 
38
+ if name in cls.mapping["model_name_mapping"]:
39
+ raise KeyError(
40
+ "Name '{}' already registered for {}.".format(
41
+ name, cls.mapping["model_name_mapping"][name]
42
+ )
43
+ )
44
  cls.mapping["model_name_mapping"][name] = model_cls
45
  return model_cls
46
 
 
55
 
56
  Usage:
57
 
58
+ from .registry import registry
59
  """
60
 
61
  def wrap(processor_cls):
 
84
 
85
  Usage:
86
 
87
+ from .registry import registry
88
  """
89
 
90
  def wrap(lr_sched_cls):
 
108
 
109
  Usage:
110
 
111
+ .common.registry import registry
112
  """
113
 
114
  def wrap(runner_cls):
 
132
 
133
  Usage:
134
 
135
+ from .registry import registry
136
  """
137
  assert isinstance(path, str), "All path must be str."
138
  if name in cls.mapping["paths"]:
 
148
 
149
  Usage::
150
 
151
+ from .registry import registry
152
 
153
  registry.register("config", {})
154
  """
 
257
  name: Key which needs to be removed.
258
  Usage::
259
 
260
+ from registry import registry
261
 
262
  config = registry.unregister("config")
263
  """
utils.py CHANGED
@@ -468,136 +468,3 @@ def merge_vision_embeddings(input_ids: torch.Tensor,
468
  inputs_embeds[mask] = torch.cat(vision_embeddings)
469
 
470
  return inputs_embeds
471
-
472
-
473
- class LayerFn(Protocol):
474
-
475
- def __call__(
476
- self,
477
- prefix="",
478
- ) -> torch.nn.Module:
479
- ...
480
-
481
-
482
- class PPMissingLayer(torch.nn.Identity):
483
- """
484
- A placeholder layer for missing layers in a pipeline parallel model.
485
- """
486
-
487
- def __init__(self, *args, **kwargs):
488
- super().__init__()
489
-
490
-
491
- _CPU_OFFLOAD_BYTES = 0
492
- _CPU_OFFLOAD_MAX_BYTES = 0
493
-
494
-
495
- def set_cpu_offload_max_bytes(max_bytes: int) -> None:
496
- global _CPU_OFFLOAD_MAX_BYTES, _CPU_OFFLOAD_BYTES
497
- _CPU_OFFLOAD_BYTES = 0
498
- _CPU_OFFLOAD_MAX_BYTES = max_bytes
499
-
500
-
501
- def maybe_offload_to_cpu(module: torch.nn.Module) -> torch.nn.Module:
502
- device = next(module.parameters()).device
503
-
504
- if device == torch.device("cpu"):
505
- return module
506
-
507
- global _CPU_OFFLOAD_MAX_BYTES, _CPU_OFFLOAD_BYTES
508
- if _CPU_OFFLOAD_BYTES >= _CPU_OFFLOAD_MAX_BYTES:
509
- return module
510
-
511
- pin_memory = is_pin_memory_available()
512
-
513
- # offload parameters to CPU
514
- # use pin_memory if possible, which helps cudagraph capture speed
515
- for p in module.parameters():
516
- if _CPU_OFFLOAD_BYTES >= _CPU_OFFLOAD_MAX_BYTES:
517
- # we use per-parameter offloading
518
- # one module might have some parameters offloaded and some not
519
- break
520
-
521
- # `torch.empty_like` does not support `pin_memory` argument
522
- cpu_data = torch.empty(size=p.data.size(),
523
- dtype=p.data.dtype,
524
- layout=p.data.layout,
525
- device='cpu',
526
- pin_memory=pin_memory)
527
- cpu_data.copy_(p.data)
528
- p.data = cpu_data
529
- _CPU_OFFLOAD_BYTES += p.data.numel() * p.data.element_size()
530
-
531
- state_dict: Dict[str, torch.Tensor] = module.state_dict()
532
-
533
- original_forward = module.forward
534
-
535
- def forward(*args, **kwargs):
536
- module.forward = original_forward
537
- device_state = {
538
- # here we blindly call `to(device)`
539
- # if the parameter is already on the device, it will be a no-op
540
- k: v.to(device, non_blocking=True)
541
- for k, v in state_dict.items()
542
- }
543
- output = functional_call(module,
544
- device_state,
545
- args=args,
546
- kwargs=kwargs)
547
- module.forward = forward
548
- return output
549
-
550
- module.forward = forward
551
-
552
- return module
553
-
554
-
555
- def make_layers(
556
- num_hidden_layers: int,
557
- layer_fn: LayerFn,
558
- prefix: str,
559
- ) -> Tuple[int, int, torch.nn.ModuleList]:
560
- """Make a list of layers with the given layer function, taking
561
- pipeline parallelism into account.
562
- """
563
- from vllm.distributed.parallel_state import get_pp_group
564
- from vllm.distributed.utils import get_pp_indices
565
- start_layer, end_layer = get_pp_indices(num_hidden_layers,
566
- get_pp_group().rank_in_group,
567
- get_pp_group().world_size)
568
- modules = torch.nn.ModuleList(
569
- [PPMissingLayer() for _ in range(start_layer)] + [
570
- maybe_offload_to_cpu(layer_fn(prefix=f"{prefix}.{idx}"))
571
- for idx in range(start_layer, end_layer)
572
- ] + [PPMissingLayer() for _ in range(end_layer, num_hidden_layers)])
573
- return start_layer, end_layer, modules
574
-
575
-
576
- # NOTE: don't use lru_cache here because it can prevent garbage collection
577
- _model_to_pp_missing_layer_names: Dict[int, List[str]] = {}
578
-
579
-
580
- def get_pp_missing_layer_names(model: torch.nn.Module) -> List[str]:
581
- """Get the names of the missing layers in a pipeline parallel model."""
582
- model_id = id(model)
583
- if model_id in _model_to_pp_missing_layer_names:
584
- return _model_to_pp_missing_layer_names[model_id]
585
-
586
- missing_layer_names = []
587
- for name, module in model.named_modules():
588
- if isinstance(module, PPMissingLayer):
589
- # NOTE: the trailing dot is used to match the prefix of the layer.
590
- # without the dot, we could match a layer that is not missing,
591
- # e.g., 'encoder.layer.1' would match 'encoder.layer.11'
592
- missing_layer_names.append(name + '.')
593
- _model_to_pp_missing_layer_names[model_id] = missing_layer_names
594
-
595
- return missing_layer_names
596
-
597
-
598
- def is_pp_missing_parameter(name: str, model: torch.nn.Module) -> bool:
599
- """Check if a parameter is missing in a pipeline parallel model."""
600
- for missing_layer_name in get_pp_missing_layer_names(model):
601
- if name.startswith(missing_layer_name):
602
- return True
603
- return False
 
468
  inputs_embeds[mask] = torch.cat(vision_embeddings)
469
 
470
  return inputs_embeds