File size: 3,404 Bytes
2252f3d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
''' Moderator
# Input feature: body, part(head, hand)
# output: fused feature, weight
'''
import numpy as np
import torch.nn as nn
import torch
import torch.nn.functional as F

# MLP + temperature softmax
# w = SoftMax(w^\prime * temperature)


class TempSoftmaxFusion(nn.Module):

    def __init__(self,
                 channels=[2048 * 2, 1024, 1],
                 detach_inputs=False,
                 detach_feature=False):
        super(TempSoftmaxFusion, self).__init__()
        self.detach_inputs = detach_inputs
        self.detach_feature = detach_feature
        # weight
        layers = []
        for l in range(0, len(channels) - 1):
            layers.append(nn.Linear(channels[l], channels[l + 1]))
            if l < len(channels) - 2:
                layers.append(nn.ReLU())
        self.layers = nn.Sequential(*layers)
        # temperature
        self.register_parameter('temperature', nn.Parameter(torch.ones(1)))

    def forward(self, x, y, work=True):
        '''
        x: feature from body
        y: feature from part(head/hand) 
        work: whether to fuse features
        '''
        if work:
            # 1. cat input feature, predict the weights
            f_in = torch.cat([x, y], dim=1)
            if self.detach_inputs:
                f_in = f_in.detach()
            f_temp = self.layers(f_in)
            f_weight = F.softmax(f_temp * self.temperature, dim=1)

            # 2. feature fusion
            if self.detach_feature:
                x = x.detach()
                y = y.detach()
            f_out = f_weight[:, [0]] * x + f_weight[:, [1]] * y
            x_out = f_out
            y_out = f_out
        else:
            x_out = x
            y_out = y
            f_weight = None
        return x_out, y_out, f_weight


# MLP + Gumbel-Softmax trick
# w = w^{\prime} - w^{\prime}\text{.detach()} + w^{\prime}\text{.gt(0.5)}


class GumbelSoftmaxFusion(nn.Module):

    def __init__(self,
                 channels=[2048 * 2, 1024, 1],
                 detach_inputs=False,
                 detach_feature=False):
        super(GumbelSoftmaxFusion, self).__init__()
        self.detach_inputs = detach_inputs
        self.detach_feature = detach_feature

        # weight
        layers = []
        for l in range(0, len(channels) - 1):
            layers.append(nn.Linear(channels[l], channels[l + 1]))
            if l < len(channels) - 2:
                layers.append(nn.ReLU())
        layers.append(nn.Softmax())
        self.layers = nn.Sequential(*layers)

    def forward(self, x, y, work=True):
        '''
        x: feature from body
        y: feature from part(head/hand) 
        work: whether to fuse features
        '''
        if work:
            # 1. cat input feature, predict the weights
            f_in = torch.cat([x, y], dim=-1)
            if self.detach_inputs:
                f_in = f_in.detach()
            f_weight = self.layers(f_in)
            # weight to be hard
            f_weight = f_weight - f_weight.detach() + f_weight.gt(0.5)

            # 2. feature fusion
            if self.detach_feature:
                x = x.detach()
                y = y.detach()
            f_out = f_weight[:, [0]] * x + f_weight[:, [1]] * y
            x_out = f_out
            y_out = f_out
        else:
            x_out = x
            y_out = y
            f_weight = None
        return x_out, y_out, f_weight