Spaces:
Running
on
L40S
Running
on
L40S
File size: 3,404 Bytes
2252f3d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 |
''' Moderator
# Input feature: body, part(head, hand)
# output: fused feature, weight
'''
import numpy as np
import torch.nn as nn
import torch
import torch.nn.functional as F
# MLP + temperature softmax
# w = SoftMax(w^\prime * temperature)
class TempSoftmaxFusion(nn.Module):
def __init__(self,
channels=[2048 * 2, 1024, 1],
detach_inputs=False,
detach_feature=False):
super(TempSoftmaxFusion, self).__init__()
self.detach_inputs = detach_inputs
self.detach_feature = detach_feature
# weight
layers = []
for l in range(0, len(channels) - 1):
layers.append(nn.Linear(channels[l], channels[l + 1]))
if l < len(channels) - 2:
layers.append(nn.ReLU())
self.layers = nn.Sequential(*layers)
# temperature
self.register_parameter('temperature', nn.Parameter(torch.ones(1)))
def forward(self, x, y, work=True):
'''
x: feature from body
y: feature from part(head/hand)
work: whether to fuse features
'''
if work:
# 1. cat input feature, predict the weights
f_in = torch.cat([x, y], dim=1)
if self.detach_inputs:
f_in = f_in.detach()
f_temp = self.layers(f_in)
f_weight = F.softmax(f_temp * self.temperature, dim=1)
# 2. feature fusion
if self.detach_feature:
x = x.detach()
y = y.detach()
f_out = f_weight[:, [0]] * x + f_weight[:, [1]] * y
x_out = f_out
y_out = f_out
else:
x_out = x
y_out = y
f_weight = None
return x_out, y_out, f_weight
# MLP + Gumbel-Softmax trick
# w = w^{\prime} - w^{\prime}\text{.detach()} + w^{\prime}\text{.gt(0.5)}
class GumbelSoftmaxFusion(nn.Module):
def __init__(self,
channels=[2048 * 2, 1024, 1],
detach_inputs=False,
detach_feature=False):
super(GumbelSoftmaxFusion, self).__init__()
self.detach_inputs = detach_inputs
self.detach_feature = detach_feature
# weight
layers = []
for l in range(0, len(channels) - 1):
layers.append(nn.Linear(channels[l], channels[l + 1]))
if l < len(channels) - 2:
layers.append(nn.ReLU())
layers.append(nn.Softmax())
self.layers = nn.Sequential(*layers)
def forward(self, x, y, work=True):
'''
x: feature from body
y: feature from part(head/hand)
work: whether to fuse features
'''
if work:
# 1. cat input feature, predict the weights
f_in = torch.cat([x, y], dim=-1)
if self.detach_inputs:
f_in = f_in.detach()
f_weight = self.layers(f_in)
# weight to be hard
f_weight = f_weight - f_weight.detach() + f_weight.gt(0.5)
# 2. feature fusion
if self.detach_feature:
x = x.detach()
y = y.detach()
f_out = f_weight[:, [0]] * x + f_weight[:, [1]] * y
x_out = f_out
y_out = f_out
else:
x_out = x
y_out = y
f_weight = None
return x_out, y_out, f_weight
|