twodgirl
/

flux-dev-fp8-e4m3fn-diffusers

Text-to-Image

Diffusers

Safetensors

flux

Model card Files Files and versions

twodgirl commited on 12 days ago

Commit

6171612

•

1 Parent(s): b05b8f1

Create layer replacement.

Browse files

Files changed (1) hide show

linear_8.py +87 -0

linear_8.py ADDED Viewed

	@@ -0,0 +1,87 @@

+from diffusers.models.normalization import RMSNorm
+import torch
+from torch import nn, Tensor
+###
+# Code from aredden/flux-fp8-api
+class Linear8(nn.Module):
+    __constants__ = ['in_features', 'out_features']
+    def __init__(self,
+                 in_features: int, out_features: int, bias: bool = True,
+                 device=None, dtype=None):
+        factory_kwargs = {'device': device, 'dtype': dtype}
+        super().__init__()
+        self.in_features = in_features
+        self.out_features = out_features
+        self.weight = nn.Parameter(torch.empty((out_features, in_features), **factory_kwargs))
+        if bias:
+            self.bias = nn.Parameter(torch.empty(out_features, **factory_kwargs))
+        else:
+            self.register_parameter('bias', None)
+        self.does_fp8 = self.supports_fp8_compute()
+        self.scale_weight = torch.ones(1, device=device, dtype=torch.float32)
+        self.scale_input = torch.ones(1, device=device, dtype=torch.float32)
+    def supports_fp8_compute(self, device=None):
+        props = torch.cuda.get_device_properties(device)
+        if props.major >= 9 or props.major == 8 and props.minor >= 9:
+            return True
+        return False
+    # def __setattr__(self, key, value):
+    #     if isinstance(value, nn.Parameter):
+    #         pass
+    def forward(self, x: Tensor) -> Tensor:
+        if self.does_fp8 is False:
+            return torch.nn.functional.linear(x, self.weight, self.bias)
+        dims = x.shape[:-1]
+        x = x.view(-1, self.in_features)
+        # Requires torch 2.4.
+        y = torch._scaled_mm(x.to(torch.float8_e4m3fn),
+                             torch.transpose(self.weight, 0, 1),
+                             scale_a=self.scale_input.to(device=x.device),
+                             scale_b=self.scale_weight.to(device=x.device),
+                             bias=self.bias.to(torch.bfloat16),
+                             out_dtype=self.weight.dtype,
+                             use_fast_accum=True)[0]
+        return y.view(*dims, self.out_features).to(torch.bfloat16)
+###
+# Code from sayakpaul
+# http://github.com/huggingface/diffusers/issues/6500
+def replace_regular_linears(module, parent=''):
+    for name, child in module.named_children():
+        if isinstance(child, torch.nn.Linear):
+            in_features = child.in_features
+            out_features = child.out_features
+            device = child.weight.data.device
+            dtype = child.weight.data.dtype
+            has_bias = True if child.bias is not None else False
+            new_layer = Linear8(in_features,
+                                out_features,
+                                has_bias,
+                                device,
+                                dtype)
+            new_layer.load_state_dict(child.state_dict())
+            new_layer = new_layer.to(device)
+            setattr(module, name, new_layer)
+        elif isinstance(child, RMSNorm):
+            # RMSNorm doesn't support float8.
+            rsd = child.state_dict()
+            if 'weight' in rsd:
+                child.load_state_dict({'weight': rsd['weight'].to(torch.bfloat16)},
+                                      assign=True)
+        else:
+            # Recursively apply to child modules.
+            if parent == '':
+                replace_regular_linears(child, parent=name)
+            else:
+                replace_regular_linears(child, parent='.'.join([parent, name]))