Spaces:

Zevin2023
/

GMC-IQA

Runtime error

App Files Files Community

Zevin2023 commited on Sep 2, 2023

Commit

07e1105

1 Parent(s): 82f6e27

MoC-IQA

Browse files

Files changed (28) hide show

app.py +45 -0
models/__pycache__/WAIQT.cpython-38.pyc +0 -0
models/__pycache__/gc_loss.cpython-37.pyc +0 -0
models/__pycache__/gc_loss.cpython-38.pyc +0 -0
models/__pycache__/monet.cpython-37.pyc +0 -0
models/__pycache__/monet.cpython-38.pyc +0 -0
models/gc_loss.py +187 -0
models/monet.py +250 -0
utils/__pycache__/__init__.cpython-37.pyc +0 -0
utils/__pycache__/iqa_solver.cpython-36.pyc +0 -0
utils/__pycache__/iqa_solver.cpython-37.pyc +0 -0
utils/__pycache__/iqa_solver.cpython-38.pyc +0 -0
utils/__pycache__/log_writer.cpython-36.pyc +0 -0
utils/__pycache__/log_writer.cpython-37.pyc +0 -0
utils/__pycache__/log_writer.cpython-38.pyc +0 -0
utils/__pycache__/process.cpython-38.pyc +0 -0
utils/dataset/__pycache__/__init__.cpython-37.pyc +0 -0
utils/dataset/__pycache__/data_loader.cpython-37.pyc +0 -0
utils/dataset/__pycache__/data_loader.cpython-38.pyc +0 -0
utils/dataset/__pycache__/folders.cpython-37.pyc +0 -0
utils/dataset/__pycache__/folders.cpython-38.pyc +0 -0
utils/dataset/__pycache__/process.cpython-38.pyc +0 -0
utils/dataset/data_loader.py +36 -0
utils/dataset/dataset_info.json +6 -0
utils/dataset/folders.py +207 -0
utils/dataset/process.py +57 -0
utils/iqa_solver.py +130 -0
utils/log_writer.py +14 -0

app.py ADDED Viewed

	@@ -0,0 +1,45 @@

+import torch
+import torchvision
+import cv2
+import numpy as np
+from models import monet as MoNet
+import argparse
+from utils.dataset.process import ToTensor, Normalize
+import gradio as gr
+def load_image(img_path):
+    d_img = cv2.cvtColor(np.asarray(img_path),cv2.COLOR_RGB2BGR)
+    # d_img = cv2.imread(img_path, cv2.IMREAD_COLOR)
+    d_img = cv2.resize(d_img, (224, 224), interpolation=cv2.INTER_CUBIC)
+    d_img = cv2.cvtColor(d_img, cv2.COLOR_BGR2RGB)
+    d_img = np.array(d_img).astype('float32') / 255
+    d_img = np.transpose(d_img, (2, 0, 1))
+    return d_img
+def predict(image):
+    parser = argparse.ArgumentParser()
+    # model related
+    parser.add_argument('--backbone', dest='backbone', type=str, default='vit_base_patch8_224',
+                        help='The backbone for MoNet.')
+    parser.add_argument('--mal_num', dest='mal_num', type=int, default=3, help='The number of the MAL modules.')
+    config = parser.parse_args()
+    model = MoNet.MoNet(config).cuda()
+    model.load_state_dict(torch.load('./checkpoints/best_model.pkl'))
+    model.eval()
+    trans = torchvision.transforms.Compose([Normalize(0.5, 0.5), ToTensor()])
+    """Run a single prediction on the model"""
+    img = load_image(image)
+    img_tensor = trans(img).unsqueeze(0).cuda()
+    iq = model(img_tensor).cpu().detach().numpy().tolist()[0]
+    return "The image quality of the image is: {}".format(round(iq, 4))
+# os.system("wget -O ./checkpoints/best_model.pkl https://huggingface.co/Zevin2023/MoC-IQA/resolve/main/Koniq10K_570908.pkl")
+interface = gr.Interface(fn=predict, inputs="image", outputs="text")
+interface.launch(server_name='127.0.0.1',server_port=8088)

models/__pycache__/WAIQT.cpython-38.pyc ADDED Viewed

Binary file (6.91 kB). View file

models/__pycache__/gc_loss.cpython-37.pyc ADDED Viewed

Binary file (5.34 kB). View file

models/__pycache__/gc_loss.cpython-38.pyc ADDED Viewed

Binary file (3.11 kB). View file

models/__pycache__/monet.cpython-37.pyc ADDED Viewed

Binary file (8.07 kB). View file

models/__pycache__/monet.cpython-38.pyc ADDED Viewed

Binary file (8.1 kB). View file

models/gc_loss.py ADDED Viewed

	@@ -0,0 +1,187 @@

+import torch.nn as nn
+import torch
+import numpy as np
+# class GC_Loss(nn.Module):
+#     def __init__(self, queue_len=800):
+#         super(GC_Loss, self).__init__()
+#         self.pred_queue = list()
+#         self.gt_queue = list()
+#         self.queue_len = 0
+#         self.queue_max_len = queue_len
+#         print('CCWD Length: ', queue_len)
+#         self.l1_loss = torch.nn.L1Loss().cuda()
+#         self.l2_loss = torch.nn.MSELoss().cuda()
+#     def enqueue(self, pred, gt):
+#         bs = pred.shape[0]
+#         self.queue_len = self.queue_len + bs
+#         self.pred_queue = self.pred_queue + pred.cpu().detach().numpy().tolist()
+#         self.gt_queue = self.gt_queue + gt.cpu().detach().numpy().tolist()
+#         if self.queue_len > self.queue_max_len:
+#             self.dequeue(self.queue_len - self.queue_max_len)
+#             self.queue_len = self.queue_max_len
+#     def dequeue(self, n):
+#         for index in range(n):
+#             self.pred_queue.pop(0)
+#             self.gt_queue.pop(0)
+#     def clear(self):
+#         self.pred_queue.clear()
+#         self.gt_queue.clear()
+#     def forward(self, x, y):
+#         x_queue = self.pred_queue.copy()
+#         y_queue = self.gt_queue.copy()
+#         # 获取队列中的所有值
+#         x_all = torch.cat((x, torch.tensor(x_queue).cuda()), dim=0)
+#         y_all = torch.cat((y, torch.tensor(y_queue).cuda()), dim=0)
+#         # 估计均值和方差
+#         x_bar = torch.mean(x_all, dim=0)
+#         x_std = torch.std(x_all, dim=0)
+#         y_bar = torch.mean(y_all, dim=0)
+#         y_std = torch.std(y_all, dim=0)
+#         # 估计预测值在整体值中的PLCC
+#         diff_x_plcc = (x - x_bar)  # [bs, 1]
+#         diff_y_plcc = (y - y_bar)  # [bs, 1]
+#         x1 = torch.sum(torch.mul(diff_x_plcc, diff_y_plcc))
+#         x2_1 = torch.sqrt(torch.sum(torch.mul(diff_x_plcc, diff_x_plcc)))
+#         x2_2 = torch.sqrt(torch.sum(torch.mul(diff_y_plcc, diff_y_plcc)))
+#         # 对所有值标准化
+#         diff_x = (x_all - x_bar) / x_std  # [bs, 1]
+#         diff_y = (y_all - y_bar) / y_std  # [bs, 1]
+#         rank_x = diff_x.reshape(-1, 1)
+#         rank_y = diff_y.reshape(-1, 1)
+#         rank_x = rank_x - rank_x.transpose(1, 0)
+#         rank_y = rank_y - rank_y.transpose(1, 0)
+#         # 对所有值估计排序
+#         rank_x = torch.sum(1 / 2 * (1 + torch.erf(rank_x)), dim=1)
+#         rank_y = torch.sum(1 / 2 * (1 + torch.erf(rank_y)), dim=1)
+#         # 计算排序后的均值和方差
+#         rank_x_bar = torch.mean(rank_x, dim=0)
+#         rank_x_std = torch.std(rank_x, dim=0)
+#         rank_y_bar = torch.mean(rank_y, dim=0)
+#         rank_y_std = torch.std(rank_y, dim=0)
+#         # 估计预测值在整体值中的SROCC
+#         rank_x_ = (x - rank_x_bar) / rank_x_std  # [bs, 1]
+#         rank_y_ = (y - rank_y_bar) / rank_y_std  # [bs, 1]
+#         x1_rank = torch.sum(torch.mul(rank_x_, rank_y_))
+#         x2_1_rank = torch.sqrt(torch.sum(torch.mul(rank_x_, rank_x_)))
+#         x2_2_rank = torch.sqrt(torch.sum(torch.mul(rank_y_, rank_y_)))
+#         self.enqueue(x, y)
+#         return (0.5 * ((1 - x1 / (x2_1 * x2_2)) + (1 - (x1_rank / (x2_1_rank * x2_2_rank)))) + 1) * self.l2_loss(x, y)
+class GC_Loss(nn.Module):
+    def __init__(self, queue_len=800, alpha=0.5, beta=0.5, gamma=1):
+        super(GC_Loss, self).__init__()
+        self.pred_queue = list()
+        self.gt_queue = list()
+        self.queue_len = 0
+        self.queue_max_len = queue_len
+        print('The queue length is: ', self.queue_max_len)
+        self.mse = torch.nn.MSELoss().cuda()
+        self.alpha, self.beta, self.gamma = alpha, beta, gamma
+    def consistency(self, pred_data, gt_data):
+        pred_one_batch, pred_queue = pred_data
+        gt_one_batch, gt_queue = gt_data
+        pred_mean = torch.mean(pred_queue)
+        gt_mean = torch.mean(gt_queue)
+        diff_pred = pred_one_batch - pred_mean
+        diff_gt = gt_one_batch - gt_mean
+        x1 = torch.sum(torch.mul(diff_pred, diff_gt))
+        x2_1 = torch.sqrt(torch.sum(torch.mul(diff_pred, diff_pred)))
+        x2_2 = torch.sqrt(torch.sum(torch.mul(diff_gt, diff_gt)))
+        return x1 / (x2_1 * x2_2)
+    def ppra(self, x):
+        """
+            Pairwise Preference-based Rank Approximation
+        """
+        x_bar, x_std = torch.mean(x), torch.std(x)
+        x_n = (x - x_bar) / x_std
+        x_n_T = x_n.reshape(-1, 1)
+        rank_x = x_n_T - x_n_T.transpose(1, 0)
+        rank_x = torch.sum(1 / 2 * (1 + torch.erf(rank_x / torch.sqrt(torch.tensor(2, dtype=torch.float)))), dim=1)
+        return rank_x
+    @torch.no_grad()
+    def enqueue(self, pred, gt):
+        bs = pred.shape[0]
+        self.queue_len = self.queue_len + bs
+        self.pred_queue = self.pred_queue + pred.tolist()
+        self.gt_queue = self.gt_queue + gt.cpu().detach().numpy().tolist()
+        if self.queue_len > self.queue_max_len:
+            self.dequeue(self.queue_len - self.queue_max_len)
+            self.queue_len = self.queue_max_len
+    @torch.no_grad()
+    def dequeue(self, n):
+        for _ in range(n):
+            self.pred_queue.pop(0)
+            self.gt_queue.pop(0)
+    def clear(self):
+        self.pred_queue.clear()
+        self.gt_queue.clear()
+    def forward(self, x, y):
+        x_queue = self.pred_queue.copy()
+        y_queue = self.gt_queue.copy()
+        x_all = torch.cat((x, torch.tensor(x_queue).cuda()), dim=0)
+        y_all = torch.cat((y, torch.tensor(y_queue).cuda()), dim=0)
+        PLCC = self.consistency((x, x_all), (y, y_all))
+        PGC = 1 - PLCC
+        rank_x = self.ppra(x_all)
+        rank_y = self.ppra(y_all)
+        SROCC = self.consistency((rank_x[:x.shape[0]], rank_x), (rank_y[:y.shape[0]], rank_y))
+        SGC = 1 - SROCC
+        GC = (self.alpha * PGC + self.beta * SGC + self.gamma) * self.mse(x, y)
+        self.enqueue(x, y)
+        return GC
+if __name__ == '__main__':
+    gc = GC_Loss().cuda()
+    x = torch.tensor([1, 2, 3, 4, 5], dtype=torch.float).cuda()
+    y = torch.tensor([6, 7, 8, 9, 15], dtype=torch.float).cuda()
+    res = gc(x, y)
+    print(res)

models/monet.py ADDED Viewed

	@@ -0,0 +1,250 @@

+"""
+    The completion for Mean-opinion Network(MoNet)
+"""
+import torch
+import torch.nn as nn
+import timm
+from timm.models.vision_transformer import Block
+from einops import rearrange
+class Attention_Block(nn.Module):
+    def __init__(self, dim, drop=0.1):
+        super().__init__()
+        self.c_q = nn.Linear(dim, dim)
+        self.c_k = nn.Linear(dim, dim)
+        self.c_v = nn.Linear(dim, dim)
+        self.norm_fact = dim ** -0.5
+        self.softmax = nn.Softmax(dim=-1)
+        self.proj_drop = nn.Dropout(drop)
+    def forward(self, x):
+        _x = x
+        B, C, N = x.shape
+        q = self.c_q(x)
+        k = self.c_k(x)
+        v = self.c_v(x)
+        attn = q @ k.transpose(-2, -1) * self.norm_fact
+        attn = self.softmax(attn)
+        x = (attn @ v).transpose(1, 2).reshape(B, C, N)
+        x = self.proj_drop(x)
+        x = x + _x
+        return x
+class Self_Attention(nn.Module):
+    """ Self attention Layer"""
+    def __init__(self, in_dim):
+        super(Self_Attention, self).__init__()
+        self.qConv = nn.Conv2d(in_channels=in_dim, out_channels=in_dim // 8, kernel_size=1)
+        self.kConv = nn.Conv2d(in_channels=in_dim, out_channels=in_dim // 8, kernel_size=1)
+        self.vConv = nn.Conv2d(in_channels=in_dim, out_channels=in_dim, kernel_size=1)
+        self.gamma = nn.Parameter(torch.zeros(1))
+        self.softmax = nn.Softmax(dim=-1)
+    def forward(self, inFeature):
+        bs, C, w, h = inFeature.size()
+        proj_query = self.qConv(inFeature).view(bs, -1, w * h).permute(0, 2, 1)
+        proj_key = self.kConv(inFeature).view(bs, -1, w * h)
+        energy = torch.bmm(proj_query, proj_key)
+        attention = self.softmax(energy)
+        proj_value = self.vConv(inFeature).view(bs, -1, w * h)
+        out = torch.bmm(proj_value, attention.permute(0, 2, 1))
+        out = out.view(bs, C, w, h)
+        out = self.gamma * out + inFeature
+        return out
+class MAL(nn.Module):
+    """
+        Multi-view Attention Learning (MAL) module
+    """
+    def __init__(self, in_dim=768, feature_num=4, feature_size=28):
+        super().__init__()
+        self.channel_attention = Attention_Block(in_dim * feature_num)  # Channel-wise self attention
+        self.feature_attention = Attention_Block(feature_size ** 2 * feature_num)  # Pixel-wise self attention
+        # Self attention module for each input feature
+        self.attention_module = nn.ModuleList()
+        for _ in range(feature_num):
+            self.attention_module.append(Self_Attention(in_dim))
+        self.feature_num = feature_num
+        self.in_dim = in_dim
+    def forward(self, features):
+        feature = torch.tensor([]).cuda()
+        for index, _ in enumerate(features):
+            feature = torch.cat((feature, self.attention_module[index](features[index]).unsqueeze(0)), dim=0)
+        features = feature
+        input_tensor = rearrange(features, 'n b c w h -> b (n c) (w h)')  # bs, 768 * feature_num, 28 * 28
+        bs, _, _ = input_tensor.shape  # [2, 3072, 784]
+        in_feature = rearrange(input_tensor, 'b (w c) h -> b w (c h)', w=self.in_dim, c=self.feature_num)  # bs, 768, 28 * 28 * feature_num
+        feature_weight_sum = self.feature_attention(in_feature)  # bs, 768, 768
+        in_channel = input_tensor.permute(0, 2, 1)  # bs, 28 * 28, 768 * feature_num
+        channel_weight_sum = self.channel_attention(in_channel)  # bs, 28 * 28, 28 * 28
+        weight_sum_res = (rearrange(feature_weight_sum, 'b w (c h) -> b (w c) h', w=self.in_dim,
+                                    c=self.feature_num) + channel_weight_sum.permute(0, 2, 1)) / 2  # [2, 3072, 784]
+        weight_sum_res = torch.mean(weight_sum_res.view(bs, self.feature_num, self.in_dim, -1), dim=1)
+        return weight_sum_res  # bs, 768, 28 * 28
+class SaveOutput:
+    def __init__(self):
+        self.outputs = []
+    def __call__(self, module, module_in, module_out):
+        self.outputs.append(module_out)
+    def clear(self):
+        self.outputs = []
+class MoNet(nn.Module):
+    def __init__(self, config, patch_size=8, drop=0.1, dim_mlp=768, img_size=224):
+        super().__init__()
+        self.img_size = img_size
+        self.input_size = img_size // patch_size
+        self.dim_mlp = dim_mlp
+        self.vit = timm.create_model(config.backbone, pretrained=False)
+        self.save_output = SaveOutput()
+        # Register Hooks
+        hook_handles = []
+        for layer in self.vit.modules():
+            if isinstance(layer, Block):
+                handle = layer.register_forward_hook(self.save_output)
+                hook_handles.append(handle)
+        self.MALs = nn.ModuleList()
+        for _ in range(config.mal_num):
+            self.MALs.append(MAL())
+        # Image Quality Score Regression
+        self.fusion_wam = MAL(feature_num=config.mal_num)
+        self.block = Block(dim_mlp, 12)
+        self.cnn = nn.Sequential(
+            nn.Conv2d(dim_mlp, 256, 5),
+            nn.BatchNorm2d(256),
+            nn.ReLU(inplace=True),
+            nn.AvgPool2d((2, 2)),
+            nn.Conv2d(256, 128, 3),
+            nn.BatchNorm2d(128),
+            nn.ReLU(inplace=True),
+            nn.AvgPool2d((2, 2)),
+            nn.Conv2d(128, 128, 3),
+            nn.BatchNorm2d(128),
+            nn.ReLU(inplace=True),
+            nn.AvgPool2d((3, 3)),
+        )
+        self.fc_score = nn.Sequential(
+            nn.Linear(128, 128 // 2),
+            nn.ReLU(),
+            nn.Dropout(drop),
+            nn.Linear(128 // 2, 1),
+            nn.Sigmoid()
+        )
+    def extract_feature(self, save_output, block_index=[2, 5, 8, 11]):
+        x1 = save_output.outputs[block_index[0]][:, 1:]
+        x2 = save_output.outputs[block_index[1]][:, 1:]
+        x3 = save_output.outputs[block_index[2]][:, 1:]
+        x4 = save_output.outputs[block_index[3]][:, 1:]
+        x = torch.cat((x1, x2, x3, x4), dim=2)
+        return x
+    def forward(self, x):
+        # Multi-level Feature From Different Transformer Blocks
+        _x = self.vit(x)
+        x = self.extract_feature(self.save_output)  # bs, 28 * 28, 768 * 4
+        self.save_output.outputs.clear()
+        x = x.permute(0, 2, 1)  # bs, 768 * 4, 28 * 28
+        x = rearrange(x, 'b (d n) (w h) -> b d n w h', d=4, n=self.dim_mlp, w=self.input_size, h=self.input_size)  # bs, 4, 768, 28, 28
+        x = x.permute(1, 0, 2, 3, 4)  # bs, 4, 768, 28 * 28
+        # Different Opinion Features (DOF)
+        DOF = torch.tensor([]).cuda()
+        for index, _ in enumerate(self.MALs):
+            DOF = torch.cat((DOF, self.MALs[index](x).unsqueeze(0)), dim=0)
+        DOF = rearrange(DOF, 'n c d (w h) -> n c d w h', w=self.input_size, h=self.input_size)  # 3, bs, 768, 28, 28
+        # Image Quality Score Regression
+        wam = self.fusion_wam(DOF).permute(0, 2, 1)  # bs, 28 * 28 768
+        wam = self.block(wam).permute(0, 2, 1)
+        wam = rearrange(wam, 'c d (w h) -> c d w h', w=self.input_size, h=self.input_size)
+        score = self.cnn(wam).squeeze(-1).squeeze(-1)
+        score = self.fc_score(score).view(-1)
+        return score
+if __name__ == '__main__':
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--seed', dest='seed', type=int, default=3407)
+    parser.add_argument('--gpu_id', dest='gpu_id', type=str, default='0')
+    # model related
+    parser.add_argument('--backbone', dest='backbone', type=str, default='vit_base_patch8_224',
+                        help='The backbone for MoNet.')
+    parser.add_argument('--mal_num', dest='mal_num', type=int, default=3, help='The number of the MAL modules.')
+    # data related
+    parser.add_argument('--dataset', dest='dataset', type=str, default='livec',
+                        help='Support datasets: livec|koniq10k|bid|spaq')
+    parser.add_argument('--train_patch_num', dest='train_patch_num', type=int, default=5,
+                        help='Number of sample patches from training image')
+    parser.add_argument('--test_patch_num', dest='test_patch_num', type=int, default=25,
+                        help='Number of sample patches from testing image')
+    parser.add_argument('--patch_size', dest='patch_size', type=int, default=224,
+                        help='Crop size for training & testing image patches')
+    # training related
+    parser.add_argument('--lr', dest='lr', type=float, default=1e-5, help='Learning rate')
+    parser.add_argument('--weight_decay', dest='weight_decay', type=float, default=1e-5, help='Weight decay')
+    parser.add_argument('--batch_size', dest='batch_size', type=int, default=11, help='Batch size')
+    parser.add_argument('--epochs', dest='epochs', type=int, default=50, help='Epochs for training')
+    parser.add_argument('--T_max', dest='T_max', type=int, default=50, help='Hyper-parameter for CosineAnnealingLR')
+    parser.add_argument('--eta_min', dest='eta_min', type=int, default=0, help='Hyper-parameter for CosineAnnealingLR')
+    parser.add_argument('--save_path', dest='save_path', type=str, default='./training_for_IQA',
+                        help='The path where the model and logs will be saved.')
+    config = parser.parse_args()
+    # torch.autograd.set_detect_anomaly(True)
+    # with torch.autograd.detect_anomaly():
+    in_tensor = torch.zeros((2, 3, 224, 224), dtype=torch.float).cuda()
+    model = MoNet(config).cuda()
+    res = model(in_tensor)
+    print('{} : {} [M]'.format('#Params', sum(map(lambda x: x.numel(), model.parameters())) / 10 ** 6))
+    # label = torch.tensor([1, 2], dtype=torch.float).cuda()
+    # loss = torch.nn.L1Loss().cuda()
+    #
+    # res = model(in_tensor)
+    # # loss = loss_func()
+    # l = loss(label, res)
+    # print(l)
+    # l.backward()

utils/__pycache__/__init__.cpython-37.pyc ADDED Viewed

Binary file (258 Bytes). View file

utils/__pycache__/iqa_solver.cpython-36.pyc ADDED Viewed

Binary file (3.73 kB). View file

utils/__pycache__/iqa_solver.cpython-37.pyc ADDED Viewed

Binary file (3.81 kB). View file

utils/__pycache__/iqa_solver.cpython-38.pyc ADDED Viewed

Binary file (3.64 kB). View file

utils/__pycache__/log_writer.cpython-36.pyc ADDED Viewed

Binary file (799 Bytes). View file

utils/__pycache__/log_writer.cpython-37.pyc ADDED Viewed

Binary file (809 Bytes). View file

utils/__pycache__/log_writer.cpython-38.pyc ADDED Viewed

Binary file (825 Bytes). View file

utils/__pycache__/process.cpython-38.pyc ADDED Viewed

Binary file (1.78 kB). View file

utils/dataset/__pycache__/__init__.cpython-37.pyc ADDED Viewed

Binary file (266 Bytes). View file

utils/dataset/__pycache__/data_loader.cpython-37.pyc ADDED Viewed

Binary file (1.8 kB). View file

utils/dataset/__pycache__/data_loader.cpython-38.pyc ADDED Viewed

Binary file (1.44 kB). View file

utils/dataset/__pycache__/folders.cpython-37.pyc ADDED Viewed

Binary file (6.02 kB). View file

utils/dataset/__pycache__/folders.cpython-38.pyc ADDED Viewed

Binary file (5.75 kB). View file

utils/dataset/__pycache__/process.cpython-38.pyc ADDED Viewed

Binary file (1.93 kB). View file

utils/dataset/data_loader.py ADDED Viewed

	@@ -0,0 +1,36 @@

+import torch
+import torchvision
+from utils.dataset import folders
+from utils.dataset.process import ToTensor, Normalize, RandHorizontalFlip
+class Data_Loader(object):
+    """Dataset class for IQA databases"""
+    def __init__(self, config, path, img_indx, istrain=True):
+        self.batch_size = config.batch_size
+        self.istrain = istrain
+        dataset = config.dataset
+        patch_size = config.patch_size
+        # Train transforms
+        if istrain:
+            transforms=torchvision.transforms.Compose([Normalize(0.5, 0.5), RandHorizontalFlip(prob_aug=0.5), ToTensor()])
+        else:
+            transforms=torchvision.transforms.Compose([Normalize(0.5, 0.5), ToTensor()])
+        if dataset == 'livec':
+            self.data = folders.LIVEC(root=path, index=img_indx, transform=transforms)
+        elif dataset == 'koniq10k':
+            self.data = folders.Koniq10k(root=path, index=img_indx, transform=transforms)
+        elif dataset == 'bid':
+            self.data = folders.BID(root=path, index=img_indx, transform=transforms)
+        elif dataset == 'spaq':
+            self.data = folders.SPAQ(root=path, index=img_indx, transform=transforms)
+        else:
+            raise Exception("Only support livec, koniq10k, bid, spaq.")
+    def get_data(self):
+        dataloader = torch.utils.data.DataLoader(self.data, batch_size=self.batch_size, shuffle=self.istrain, num_workers=8)
+        return dataloader

utils/dataset/dataset_info.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+    "livec": ["/disk1/chenzewen/sciResLife/ALotOfDataset/IQA/ChallengeDB_release", 1162],
+    "koniq10k": ["/disk1/chenzewen/sciResLife/ALotOfDataset/IQA/koniq-10k", 10073],
+    "bid": ["/disk1/chenzewen/sciResLife/ALotOfDataset/IQA/BID/ImageDatabase", 586],
+    "spaq": ["/home/ssl/Database/ChallengeDB_release/ChallengeDB_release/", 11125]
+}

utils/dataset/folders.py ADDED Viewed

	@@ -0,0 +1,207 @@

+import torch.utils.data as data
+import torch
+from PIL import Image
+import os
+import scipy.io
+import numpy as np
+import csv
+from openpyxl import load_workbook
+import cv2
+class LIVEC(data.Dataset):
+    def __init__(self, root, index, transform):
+        imgpath = scipy.io.loadmat(os.path.join(root, 'Data', 'AllImages_release.mat'))
+        imgpath = imgpath['AllImages_release']
+        imgpath = imgpath[7:1169]
+        mos = scipy.io.loadmat(os.path.join(root, 'Data', 'AllMOS_release.mat'))
+        labels = mos['AllMOS_release'].astype(np.float32)
+        labels = labels[0][7:1169]
+        sample, gt = [], []
+        for i, item in enumerate(index):
+            sample.append(os.path.join(root, 'Images', imgpath[item][0][0]))
+            gt.append(labels[item])
+        gt = normalization(gt)
+        self.samples, self.gt = sample, gt
+        self.transform = transform
+    def __getitem__(self, index):
+        """
+        Args:
+            index (int): Index
+        Returns:
+            tuple: (sample, target) where target is class_index of the target class.
+        """
+        img_tensor, gt_tensor = get_item(self.samples, self.gt, index, self.transform)
+        return img_tensor, gt_tensor
+    def __len__(self):
+        length = len(self.samples)
+        return length
+class Koniq10k(data.Dataset):
+    def __init__(self, root, index, transform):
+        imgname = []
+        mos_all = []
+        csv_file = os.path.join(root, 'koniq10k_distributions_sets.csv')
+        with open(csv_file) as f:
+            reader = csv.DictReader(f)
+            for row in reader:
+                imgname.append(row['image_name'])
+                mos = np.array(float(row['MOS'])).astype(np.float32)
+                mos_all.append(mos)
+        sample, gt = [], []
+        for i, item in enumerate(index):
+            sample.append(os.path.join(root, '1024x768', imgname[item]))
+            gt.append(mos_all[item])
+        gt = normalization(gt)
+        self.samples, self.gt = sample, gt
+        self.transform = transform
+    def __getitem__(self, index):
+        """
+        Args:
+            index (int): Index
+        Returns:
+            tuple: (sample, target) where target is class_index of the target class.
+        """
+        img_tensor, gt_tensor = get_item(self.samples, self.gt, index, self.transform)
+        return img_tensor, gt_tensor
+    def __len__(self):
+        length = len(self.samples)
+        return length
+class SPAQ(data.Dataset):
+    def __init__(self, root, index, transform):
+        imgname = []
+        mos_all = []
+        csv_file = os.path.join(root, 'koniq10k_scores_and_distributions.csv')
+        with open(csv_file) as f:
+            reader = csv.DictReader(f)
+            for row in reader:
+                imgname.append(row['image_name'])
+                mos = np.array(float(row['MOS_zscore'])).astype(np.float32)
+                mos_all.append(mos)
+        sample, gt = [], []
+        for i, item in enumerate(index):
+            sample.append(os.path.join(root, '1024x768', imgname[item]))
+            gt.append(labels[item])
+        gt = norm_target(gt)
+        self.samples, self.gt = sample, gt
+        self.samples, self.gt = sample, gt
+        self.transform = transform
+    def __getitem__(self, index):
+        """
+        Args:
+            index (int): Index
+        Returns:
+            tuple: (sample, target) where target is class_index of the target class.
+        """
+        path, target = self.samples[index], self.gt[index]
+        sample = pil_loader(path)
+        sample = self.transform(sample)
+        return sample, target
+    def __len__(self):
+        length = len(self.samples)
+        return length
+class BID(data.Dataset):
+    def __init__(self, root, index, transform):
+        imgname = []
+        mos_all = []
+        xls_file = os.path.join(root, 'DatabaseGrades.xlsx')
+        workbook = load_workbook(xls_file)
+        booksheet = workbook.active
+        rows = booksheet.rows
+        count = 1
+        for row in rows:
+            count += 1
+            img_num = booksheet.cell(row=count, column=1).value
+            img_name = "DatabaseImage%04d.JPG" % (img_num)
+            imgname.append(img_name)
+            mos = booksheet.cell(row=count, column=2).value
+            mos = np.array(mos)
+            mos = mos.astype(np.float32)
+            mos_all.append(mos)
+            if count == 587:
+                break
+        sample, gt = [], []
+        for i, item in enumerate(index):
+            sample.append(os.path.join(root, imgname[item]))
+            gt.append(mos_all[item])
+        gt = normalization(gt)
+        self.samples, self.gt = sample, gt
+        self.transform = transform
+    def __getitem__(self, index):
+        """
+        Args:
+            index (int): Index
+        Returns:
+            tuple: (sample, target) where target is class_index of the target class.
+        """
+        img_tensor, gt_tensor = get_item(self.samples, self.gt, index, self.transform)
+        return img_tensor, gt_tensor
+    def __len__(self):
+        length = len(self.samples)
+        return length
+def get_item(samples, gt, index, transform):
+    path, target = samples[index], gt[index]
+    sample = load_image(path)
+    samples = {'img': sample, 'gt': target }
+    samples = transform(samples)
+    return samples['img'], samples['gt'].type(torch.FloatTensor)
+def getFileName(path, suffix):
+    filename = []
+    f_list = os.listdir(path)
+    for i in f_list:
+        if os.path.splitext(i)[1] == suffix:
+            filename.append(i)
+    return filename
+def load_image(img_path):
+    d_img = cv2.imread(img_path, cv2.IMREAD_COLOR)
+    d_img = cv2.resize(d_img, (224, 224), interpolation=cv2.INTER_CUBIC)
+    d_img = cv2.cvtColor(d_img, cv2.COLOR_BGR2RGB)
+    d_img = np.array(d_img).astype('float32') / 255
+    d_img = np.transpose(d_img, (2, 0, 1))
+    return d_img
+def normalization(data):
+    data = np.array(data)
+    range = np.max(data) - np.min(data)
+    data = (data - np.min(data)) / range
+    data = list(data.astype('float').reshape(-1, 1))
+    return data

utils/dataset/process.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import torch
+import numpy as np
+class Normalize(object):
+    def __init__(self, mean, var):
+        self.mean = mean
+        self.var = var
+    def __call__(self, sample):
+        if isinstance(sample, dict):
+            img = sample['img']
+            gt = sample['gt']
+            img = (img - self.mean) / self.var
+            sample = {'img': img, 'gt': gt}
+        else:
+            sample = (sample - self.mean) / self.var
+        return sample
+class RandHorizontalFlip(object):
+    def __init__(self, prob_aug):
+        self.prob_aug = prob_aug
+    def __call__(self, sample):
+        p_aug = np.array([self.prob_aug, 1 - self.prob_aug])
+        prob_lr = np.random.choice([1, 0], p=p_aug.ravel())
+        if isinstance(sample, dict):
+            img = sample['img']
+            gt = sample['gt']
+            if prob_lr > 0.5:
+                img = np.fliplr(img).copy()
+            sample = {'img': img, 'gt': gt}
+        else:
+            if prob_lr > 0.5:
+                sample = np.fliplr(sample).copy()
+        return sample
+class ToTensor(object):
+    def __init__(self):
+        pass
+    def __call__(self, sample):
+        if isinstance(sample, dict):
+            img = sample['img']
+            gt = sample['gt']
+            img = torch.from_numpy(img).type(torch.FloatTensor)
+            gt = torch.from_numpy(gt).type(torch.FloatTensor)
+            sample = {'img': img, 'gt': gt}
+        else:
+            sample = torch.from_numpy(sample).type(torch.FloatTensor)
+        return sample

utils/iqa_solver.py ADDED Viewed

	@@ -0,0 +1,130 @@

+import torch
+from scipy import stats
+import numpy as np
+from models import monet as MoNet
+from models import gc_loss as GC_Loss
+from utils.dataset import data_loader
+import json
+import random
+import os
+from tqdm import tqdm
+def get_data(dataset, data_path='./utils/dataset/dataset_info.json'):
+    with open(data_path, 'r') as data_info:
+        data_info = json.load(data_info)
+    path, img_num = data_info[dataset]
+    img_num = list(range(img_num))
+    random.shuffle(img_num)
+    train_index = img_num[0:int(round(0.8 * len(img_num)))]
+    test_index = img_num[int(round(0.8 * len(img_num))):len(img_num)]
+    return path, train_index, test_index
+def cal_srocc_plcc(pred_score, gt_score):
+    srocc, _ = stats.spearmanr(pred_score, gt_score)
+    plcc, _ = stats.pearsonr(pred_score, gt_score)
+    return srocc, plcc
+class Solver:
+    def __init__(self, config):
+        path, train_index, test_index = get_data(dataset=config.dataset)
+        train_loader = data_loader.Data_Loader(config, path, train_index, istrain=True)
+        test_loader = data_loader.Data_Loader(config, path, test_index, istrain=False)
+        self.train_data = train_loader.get_data()
+        self.test_data = test_loader.get_data()
+        print('Traning data number: ', len(train_index))
+        print('Testing data number: ', len(test_index))
+        if config.loss == 'MAE':
+            self.loss = torch.nn.L1Loss().cuda()
+        elif config.loss == 'MSE':
+            self.loss = torch.nn.MSELoss().cuda()
+        elif config.loss == 'GC':
+            self.loss = GC_Loss.GC_Loss(queue_len=int(len(train_index) * config.queue_ratio))
+        else:
+            raise 'Only Support MAE, MSE and GC loss.'
+        print('Loading MoNet...')
+        self.MoNet = MoNet.MoNet(config).cuda()
+        self.MoNet.train(True)
+        self.epochs = config.epochs
+        self.optimizer = torch.optim.Adam(self.MoNet.parameters(), lr=config.lr, weight_decay=config.weight_decay)
+        self.scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(self.optimizer, T_max=config.T_max, eta_min=config.eta_min)
+        self.model_save_path = os.path.join(config.save_path, 'best_model.pkl')
+    def train(self):
+        """Training"""
+        best_srocc = 0.0
+        best_plcc = 0.0
+        print('----------------------------------')
+        print('Epoch\tTrain_Loss\tTrain_SROCC\tTrain_PLCC\tTest_SROCC\tTest_PLCC')
+        for t in range(self.epochs):
+            epoch_loss = []
+            pred_scores = []
+            gt_scores = []
+            for img, label in tqdm(self.train_data):
+                img = img.cuda()
+                label = label.view(-1).cuda()
+                self.optimizer.zero_grad()
+                pred = self.MoNet(img)  # 'paras' contains the network weights conveyed to target network
+                pred_scores = pred_scores + pred.cpu().tolist()
+                gt_scores = gt_scores + label.cpu().tolist()
+                loss = self.loss(pred.squeeze(), label.float().detach())
+                epoch_loss.append(loss.item())
+                loss.backward()
+                self.optimizer.step()
+                self.scheduler.step()
+            train_srocc, train_plcc = cal_srocc_plcc(pred_scores, gt_scores)
+            test_srocc, test_plcc = self.test()
+            if test_srocc + test_plcc > best_srocc + best_plcc:
+                best_srocc = test_srocc
+                best_plcc = test_plcc
+                torch.save(self.MoNet.state_dict(), self.model_save_path)
+                print('Model saved in: ', self.model_save_path)
+            print('{}\t{}\t{}\t{}\t{}\t{}'.format(t + 1, round(np.mean(epoch_loss), 4), round(train_srocc, 4),
+                                                  round(train_plcc, 4), round(test_srocc, 4), round(test_plcc, 4)))
+        print('Best test SROCC {}, PLCC {}'.format(round(best_srocc, 4), round(best_plcc, 4)))
+        return best_srocc, best_plcc
+    def test(self):
+        """Testing"""
+        self.MoNet.train(False)
+        pred_scores = []
+        gt_scores = []
+        with torch.no_grad():
+            for img, label in tqdm(self.test_data):
+                # Data.
+                img = img.cuda()
+                label = label.view(-1).cuda()
+                pred = self.MoNet(img)
+                pred_scores = pred_scores + pred.cpu().tolist()
+                gt_scores = gt_scores + label.cpu().tolist()
+        test_srocc, test_plcc = cal_srocc_plcc(pred_scores, gt_scores)
+        self.MoNet.train(True)
+        return test_srocc, test_plcc

utils/log_writer.py ADDED Viewed

	@@ -0,0 +1,14 @@

+import sys
+class Logger(object):
+    def __init__(self, filename="Default.log"):
+        self.terminal = sys.stdout
+        self.log = open(filename, "w")
+    def write(self, message):
+        self.terminal.write(message)
+        self.log.write(message)
+        self.flush()
+    def flush(self):
+        self.log.flush()