Raizudeen commited on
Commit
6be1ab7
·
verified ·
1 Parent(s): 4aa9699

Upload 415 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +8 -0
  2. README.md +121 -0
  3. Real-ESRGAN/.pre-commit-config.yaml +46 -0
  4. Real-ESRGAN/MANIFEST.in +8 -0
  5. Real-ESRGAN/README_CN.md +276 -0
  6. Real-ESRGAN/cog.yaml +22 -0
  7. Real-ESRGAN/cog_predict.py +148 -0
  8. Real-ESRGAN/experiments/pretrained_models/README.md +1 -0
  9. Real-ESRGAN/gfpgan/weights/detection_Resnet50_Final.pth +3 -0
  10. Real-ESRGAN/gfpgan/weights/parsing_parsenet.pth +3 -0
  11. Real-ESRGAN/inference_realesrgan.py +166 -0
  12. Real-ESRGAN/inference_realesrgan_video.py +398 -0
  13. Real-ESRGAN/inputs/00003.png +0 -0
  14. Real-ESRGAN/inputs/00017_gray.png +0 -0
  15. Real-ESRGAN/inputs/0014.jpg +0 -0
  16. Real-ESRGAN/inputs/0030.jpg +0 -0
  17. Real-ESRGAN/inputs/ADE_val_00000114.jpg +0 -0
  18. Real-ESRGAN/inputs/OST_009.png +0 -0
  19. Real-ESRGAN/inputs/children-alpha.png +0 -0
  20. Real-ESRGAN/inputs/tree_alpha_16bit.png +0 -0
  21. Real-ESRGAN/inputs/video/onepiece_demo.mp4 +0 -0
  22. Real-ESRGAN/inputs/wolf_gray.jpg +0 -0
  23. Real-ESRGAN/options/finetune_realesrgan_x4plus.yml +188 -0
  24. Real-ESRGAN/options/finetune_realesrgan_x4plus_pairdata.yml +150 -0
  25. Real-ESRGAN/options/train_realesrgan_x2plus.yml +186 -0
  26. Real-ESRGAN/options/train_realesrgan_x4plus.yml +185 -0
  27. Real-ESRGAN/options/train_realesrnet_x2plus.yml +145 -0
  28. Real-ESRGAN/options/train_realesrnet_x4plus.yml +144 -0
  29. Real-ESRGAN/realesrgan/__init__.py +6 -0
  30. Real-ESRGAN/realesrgan/__pycache__/__init__.cpython-311.pyc +0 -0
  31. Real-ESRGAN/realesrgan/__pycache__/utils.cpython-311.pyc +0 -0
  32. Real-ESRGAN/realesrgan/__pycache__/version.cpython-311.pyc +0 -0
  33. Real-ESRGAN/realesrgan/archs/__init__.py +10 -0
  34. Real-ESRGAN/realesrgan/archs/__pycache__/__init__.cpython-311.pyc +0 -0
  35. Real-ESRGAN/realesrgan/archs/__pycache__/discriminator_arch.cpython-311.pyc +0 -0
  36. Real-ESRGAN/realesrgan/archs/__pycache__/srvgg_arch.cpython-311.pyc +0 -0
  37. Real-ESRGAN/realesrgan/archs/discriminator_arch.py +67 -0
  38. Real-ESRGAN/realesrgan/archs/srvgg_arch.py +69 -0
  39. Real-ESRGAN/realesrgan/data/__init__.py +10 -0
  40. Real-ESRGAN/realesrgan/data/__pycache__/__init__.cpython-311.pyc +0 -0
  41. Real-ESRGAN/realesrgan/data/__pycache__/realesrgan_dataset.cpython-311.pyc +0 -0
  42. Real-ESRGAN/realesrgan/data/__pycache__/realesrgan_paired_dataset.cpython-311.pyc +0 -0
  43. Real-ESRGAN/realesrgan/data/realesrgan_dataset.py +192 -0
  44. Real-ESRGAN/realesrgan/data/realesrgan_paired_dataset.py +108 -0
  45. Real-ESRGAN/realesrgan/models/__init__.py +10 -0
  46. Real-ESRGAN/realesrgan/models/__pycache__/__init__.cpython-311.pyc +0 -0
  47. Real-ESRGAN/realesrgan/models/__pycache__/realesrgan_model.cpython-311.pyc +0 -0
  48. Real-ESRGAN/realesrgan/models/__pycache__/realesrnet_model.cpython-311.pyc +0 -0
  49. Real-ESRGAN/realesrgan/models/realesrgan_model.py +258 -0
  50. Real-ESRGAN/realesrgan/models/realesrnet_model.py +188 -0
.gitattributes CHANGED
@@ -33,3 +33,11 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ checkpoints/pretrained.state filter=lfs diff=lfs merge=lfs -text
37
+ examples/kennedy_hd.mkv filter=lfs diff=lfs merge=lfs -text
38
+ examples/mona_hd.jpg filter=lfs diff=lfs merge=lfs -text
39
+ examples/mona_hd.mkv filter=lfs diff=lfs merge=lfs -text
40
+ input_audios/part_000_RVC_1.wav filter=lfs diff=lfs merge=lfs -text
41
+ input_videos/MC.mp4 filter=lfs diff=lfs merge=lfs -text
42
+ temp/result.avi filter=lfs diff=lfs merge=lfs -text
43
+ temp/temp.wav filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Wav2Lip-HD: Improving Wav2Lip to achieve High-Fidelity Videos
2
+
3
+ This repository contains code for achieving high-fidelity lip-syncing in videos, using the [Wav2Lip algorithm](https://github.com/Rudrabha/Wav2Lip) for lip-syncing and the [Real-ESRGAN algorithm](https://github.com/xinntao/Real-ESRGAN) for super-resolution. The combination of these two algorithms allows for the creation of lip-synced videos that are both highly accurate and visually stunning.
4
+
5
+ ## Algorithm
6
+
7
+ The algorithm for achieving high-fidelity lip-syncing with Wav2Lip and Real-ESRGAN can be summarized as follows:
8
+
9
+ 1. The input video and audio are given to `Wav2Lip` algorithm.
10
+ 2. Python script is written to extract frames from the video generated by wav2lip.
11
+ 3. Frames are provided to Real-ESRGAN algorithm to improve quality.
12
+ 4. Then, the high-quality frames are converted to video using ffmpeg, along with the original audio.
13
+ 5. The result is a high-quality lip-syncing video.
14
+ 6. The specific steps for running this algorithm are described in the [Testing Model](https://github.com/saifhassan/Wav2Lip-HD#testing-model) section of this README.
15
+
16
+ ## Testing Model
17
+
18
+ To test the "Wav2Lip-HD" model, follow these steps:
19
+
20
+ 1. Clone this repository and install requirements using following command (Make sure, Python and CUDA are already installed):
21
+
22
+ ```
23
+ git clone https://github.com/saifhassan/Wav2Lip-HD.git
24
+ cd Wav2Lip-HD
25
+ pip install -r requirements.txt
26
+ ```
27
+
28
+ 2. Downloading weights
29
+
30
+ | Model | Directory | Download Link |
31
+ | :------------- |:-------------| :-----:|
32
+ | Wav2Lip | [checkpoints/](https://github.com/saifhassan/Wav2Lip-HD/tree/main/checkpoints) | [Link](https://drive.google.com/drive/folders/1tB_uz-TYMePRMZzrDMdShWUZZ0JK3SIZ?usp=sharing) |
33
+ | ESRGAN | [experiments/001_ESRGAN_x4_f64b23_custom16k_500k_B16G1_wandb/models/](https://github.com/saifhassan/Wav2Lip-HD/tree/main/experiments/001_ESRGAN_x4_f64b23_custom16k_500k_B16G1_wandb/models) | [Link](https://drive.google.com/file/d/1Al8lEpnx2K-kDX7zL2DBcAuDnSKXACPb/view?usp=sharing) |
34
+ | Face_Detection | [face_detection/detection/sfd/](https://github.com/saifhassan/Wav2Lip-HD/tree/main/face_detection/detection/sfd) | [Link](https://drive.google.com/file/d/1uNLYCPFFmO-og3WSHyFytJQLLYOwH5uY/view?usp=sharing) |
35
+ | Real-ESRGAN | Real-ESRGAN/gfpgan/weights/ | [Link](https://drive.google.com/drive/folders/1BLx6aMpHgFt41fJ27_cRmT8bt53kVAYG?usp=sharing) |
36
+ | Real-ESRGAN | Real-ESRGAN/weights/ | [Link](https://drive.google.com/file/d/1qNIf8cJl_dQo3ivelPJVWFkApyEAGnLi/view?usp=sharing) |
37
+
38
+
39
+ 3. Put input video to `input_videos` directory and input audio to `input_audios` directory.
40
+ 4. Open `run_final.sh` file and modify following parameters:
41
+
42
+ `filename=kennedy` (just video file name without extension)
43
+
44
+ `input_audio=input_audios/ai.wav` (audio filename with extension)
45
+
46
+ 5. Execute `run_final.sh` using following command:
47
+
48
+ ```
49
+ bash run_final.sh
50
+ ```
51
+
52
+ 6. Outputs
53
+
54
+ - `output_videos_wav2lip` directory contains video output generated by wav2lip algorithm.
55
+ - `frames_wav2lip` directory contains frames extracted from video (generated by wav2lip algorithm).
56
+ - `frames_hd` directory contains frames after performing super-resolution using Real-ESRGAN algorithm.
57
+ - `output_videos_hd` directory contains final high quality video output generated by Wav2Lip-HD.
58
+
59
+
60
+ ## Results
61
+ The results produced by Wav2Lip-HD are in two forms, one is frames and other is videos. Both are shared below:
62
+
63
+ ### Example output frames </summary>
64
+ <table>
65
+ <tr>
66
+ <td>Frame by Wav2Lip</td>
67
+ <td>Optimized Frame</td>
68
+ </tr>
69
+ <tr>
70
+ <td><img src="examples/1_low.jpg" width=500></td>
71
+ <td><img src="examples/1_hd.jpg" width=500></td>
72
+ </tr>
73
+ <tr>
74
+ <td><img src="examples/kennedy_low.jpg" width=500></td>
75
+ <td><img src="examples/kennedy_hd.jpg" width=500></td>
76
+ </tr>
77
+
78
+ </tr>
79
+ <tr>
80
+ <td><img src="examples/mona_low.jpg" width=500></td>
81
+ <td><img src="examples/mona_hd.jpg" width=500></td>
82
+ </tr>
83
+ </table>
84
+ </Details>
85
+
86
+ ### Example output videos
87
+
88
+ | Video by Wav2Lip | Optimized Video |
89
+ | ------------- | ------------- |
90
+ | <video src="https://user-images.githubusercontent.com/11873763/229389410-56d96244-8c67-4add-a43e-a4900aa9db88.mp4" width="500"> | <video src="https://user-images.githubusercontent.com/11873763/229389414-d5cb6d33-7772-47a7-b829-9e3d5c3945a1.mp4" width="500">|
91
+ | <video src="https://user-images.githubusercontent.com/11873763/229389751-507669f1-7772-4863-ab23-8df7f206a065.mp4" width="500"> | <video src="https://user-images.githubusercontent.com/11873763/229389962-5373b765-ce3a-4af2-bd6a-8be8543ee933.mp4" width="500">|
92
+
93
+ ## Acknowledgements
94
+
95
+ We would like to thank the following repositories and libraries for their contributions to our work:
96
+
97
+ 1. The [Wav2Lip](https://github.com/Rudrabha/Wav2Lip) repository, which is the core model of our algorithm that performs lip-sync.
98
+ 2. The [face-parsing.PyTorch](https://github.com/zllrunning/face-parsing.PyTorch) repository, which provides us with a model for face segmentation.
99
+ 3. The [Real-ESRGAN](https://github.com/xinntao/Real-ESRGAN) repository, which provides the super resolution component for our algorithm.
100
+ 4. [ffmpeg](https://ffmpeg.org), which we use for converting frames to video.
101
+
102
+
103
+
104
+
105
+
106
+
107
+
108
+
109
+
110
+
111
+
112
+
113
+
114
+
115
+
116
+
117
+
118
+
119
+
120
+
121
+
Real-ESRGAN/.pre-commit-config.yaml ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ repos:
2
+ # flake8
3
+ - repo: https://github.com/PyCQA/flake8
4
+ rev: 3.8.3
5
+ hooks:
6
+ - id: flake8
7
+ args: ["--config=setup.cfg", "--ignore=W504, W503"]
8
+
9
+ # modify known_third_party
10
+ - repo: https://github.com/asottile/seed-isort-config
11
+ rev: v2.2.0
12
+ hooks:
13
+ - id: seed-isort-config
14
+
15
+ # isort
16
+ - repo: https://github.com/timothycrosley/isort
17
+ rev: 5.2.2
18
+ hooks:
19
+ - id: isort
20
+
21
+ # yapf
22
+ - repo: https://github.com/pre-commit/mirrors-yapf
23
+ rev: v0.30.0
24
+ hooks:
25
+ - id: yapf
26
+
27
+ # codespell
28
+ - repo: https://github.com/codespell-project/codespell
29
+ rev: v2.1.0
30
+ hooks:
31
+ - id: codespell
32
+
33
+ # pre-commit-hooks
34
+ - repo: https://github.com/pre-commit/pre-commit-hooks
35
+ rev: v3.2.0
36
+ hooks:
37
+ - id: trailing-whitespace # Trim trailing whitespace
38
+ - id: check-yaml # Attempt to load all yaml files to verify syntax
39
+ - id: check-merge-conflict # Check for files that contain merge conflict strings
40
+ - id: double-quote-string-fixer # Replace double quoted strings with single quoted strings
41
+ - id: end-of-file-fixer # Make sure files end in a newline and only a newline
42
+ - id: requirements-txt-fixer # Sort entries in requirements.txt and remove incorrect entry for pkg-resources==0.0.0
43
+ - id: fix-encoding-pragma # Remove the coding pragma: # -*- coding: utf-8 -*-
44
+ args: ["--remove"]
45
+ - id: mixed-line-ending # Replace or check mixed line ending
46
+ args: ["--fix=lf"]
Real-ESRGAN/MANIFEST.in ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ include assets/*
2
+ include inputs/*
3
+ include scripts/*.py
4
+ include inference_realesrgan.py
5
+ include VERSION
6
+ include LICENSE
7
+ include requirements.txt
8
+ include weights/README.md
Real-ESRGAN/README_CN.md ADDED
@@ -0,0 +1,276 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <p align="center">
2
+ <img src="assets/realesrgan_logo.png" height=120>
3
+ </p>
4
+
5
+ ## <div align="center"><b><a href="README.md">English</a> | <a href="README_CN.md">简体中文</a></b></div>
6
+
7
+ [![download](https://img.shields.io/github/downloads/xinntao/Real-ESRGAN/total.svg)](https://github.com/xinntao/Real-ESRGAN/releases)
8
+ [![PyPI](https://img.shields.io/pypi/v/realesrgan)](https://pypi.org/project/realesrgan/)
9
+ [![Open issue](https://img.shields.io/github/issues/xinntao/Real-ESRGAN)](https://github.com/xinntao/Real-ESRGAN/issues)
10
+ [![Closed issue](https://img.shields.io/github/issues-closed/xinntao/Real-ESRGAN)](https://github.com/xinntao/Real-ESRGAN/issues)
11
+ [![LICENSE](https://img.shields.io/github/license/xinntao/Real-ESRGAN.svg)](https://github.com/xinntao/Real-ESRGAN/blob/master/LICENSE)
12
+ [![python lint](https://github.com/xinntao/Real-ESRGAN/actions/workflows/pylint.yml/badge.svg)](https://github.com/xinntao/Real-ESRGAN/blob/master/.github/workflows/pylint.yml)
13
+ [![Publish-pip](https://github.com/xinntao/Real-ESRGAN/actions/workflows/publish-pip.yml/badge.svg)](https://github.com/xinntao/Real-ESRGAN/blob/master/.github/workflows/publish-pip.yml)
14
+
15
+ :fire: 更新动漫视频的小模型 **RealESRGAN AnimeVideo-v3**. 更多信息在 [[动漫视频模型介绍](docs/anime_video_model.md)] 和 [[比较](docs/anime_comparisons_CN.md)] 中.
16
+
17
+ 1. Real-ESRGAN的[Colab Demo](https://colab.research.google.com/drive/1k2Zod6kSHEvraybHl50Lys0LerhyTMCo?usp=sharing) | Real-ESRGAN**动漫视频** 的[Colab Demo](https://colab.research.google.com/drive/1yNl9ORUxxlL4N0keJa2SEPB61imPQd1B?usp=sharing)
18
+ 2. **支持Intel/AMD/Nvidia显卡**的绿色版exe文件: [Windows版](https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesrgan-ncnn-vulkan-20220424-windows.zip) / [Linux版](https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesrgan-ncnn-vulkan-20220424-ubuntu.zip) / [macOS版](https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesrgan-ncnn-vulkan-20220424-macos.zip),详情请移步[这里](#便携版(绿色版)可执行文件)。NCNN的实现在 [Real-ESRGAN-ncnn-vulkan](https://github.com/xinntao/Real-ESRGAN-ncnn-vulkan)。
19
+
20
+ Real-ESRGAN 的目标是开发出**实用的图像/视频修复算法**。<br>
21
+ 我们在 ESRGAN 的基础上使用纯合成的数据来进行训练,以使其能被应用于实际的图片修复的场景(顾名思义:Real-ESRGAN)。
22
+
23
+ :art: Real-ESRGAN 需要,也很欢迎你的贡献,如新功能、模型、bug修复、建议、维护等等。详情可以查看[CONTRIBUTING.md](docs/CONTRIBUTING.md),所有的贡献者都会被列在[此处](README_CN.md#hugs-感谢)。
24
+
25
+ :milky_way: 感谢大家提供了很好的反馈。这些反馈会逐步更新在 [这个文档](docs/feedback.md)。
26
+
27
+ :question: 常见的问题可以在[FAQ.md](docs/FAQ.md)中找到答案。(好吧,现在还是空白的=-=||)
28
+
29
+ ---
30
+
31
+ 如果 Real-ESRGAN 对你有帮助,可以给本项目一个 Star :star: ,或者推荐给你的朋友们,谢谢!:blush: <br/>
32
+ 其他推荐的项目:<br/>
33
+ :arrow_forward: [GFPGAN](https://github.com/TencentARC/GFPGAN): 实用的人脸复原算法 <br>
34
+ :arrow_forward: [BasicSR](https://github.com/xinntao/BasicSR): 开源的图像和视频工具箱<br>
35
+ :arrow_forward: [facexlib](https://github.com/xinntao/facexlib): 提供与人脸相关的工具箱<br>
36
+ :arrow_forward: [HandyView](https://github.com/xinntao/HandyView): 基于PyQt5的图片查看器,方便查看以及比较 <br>
37
+
38
+ ---
39
+
40
+ <!---------------------------------- Updates --------------------------->
41
+ <details>
42
+ <summary>🚩<b>更新</b></summary>
43
+
44
+ - ✅ 更新动漫视频的小模型 **RealESRGAN AnimeVideo-v3**. 更多信息在 [anime video models](docs/anime_video_model.md) 和 [comparisons](docs/anime_comparisons.md)中.
45
+ - ✅ 添加了针对动漫视频的小模型, 更多信息在 [anime video models](docs/anime_video_model.md) 中.
46
+ - ✅ 添加了ncnn 实现:[Real-ESRGAN-ncnn-vulkan](https://github.com/xinntao/Real-ESRGAN-ncnn-vulkan).
47
+ - ✅ 添加了 [*RealESRGAN_x4plus_anime_6B.pth*](https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth),对二次元图片进行了优化,并减少了model的大小。详情 以及 与[waifu2x](https://github.com/nihui/waifu2x-ncnn-vulkan)的对比请查看[**anime_model.md**](docs/anime_model.md)
48
+ - ✅支持用户在自己的数据上进行微调 (finetune):[详情](docs/Training.md#Finetune-Real-ESRGAN-on-your-own-dataset)
49
+ - ✅ 支持使用[GFPGAN](https://github.com/TencentARC/GFPGAN)**增强人脸**
50
+ - ✅ 通过[Gradio](https://github.com/gradio-app/gradio)添加到了[Huggingface Spaces](https://huggingface.co/spaces)(一个机器学习应用的在线平台):[Gradio在线版](https://huggingface.co/spaces/akhaliq/Real-ESRGAN)。感谢[@AK391](https://github.com/AK391)
51
+ - ✅ 支持任意比例的缩放:`--outscale`(实际上使用`LANCZOS4`来更进一步调整输出图像的尺寸)。添加了*RealESRGAN_x2plus.pth*模型
52
+ - ✅ [推断脚本](inference_realesrgan.py)支持: 1) 分块处理**tile**; 2) 带**alpha通道**的图像; 3) **灰色**图像; 4) **16-bit**图像.
53
+ - ✅ 训练代码已经发布,具体做法可查看:[Training.md](docs/Training.md)。
54
+
55
+ </details>
56
+
57
+ <!---------------------------------- Projects that use Real-ESRGAN --------------------------->
58
+ <details>
59
+ <summary>🧩<b>使用Real-ESRGAN的项目</b></summary>
60
+
61
+ &nbsp;&nbsp;&nbsp;&nbsp;👋 如果你开发/使用/集成了Real-ESRGAN, 欢迎联系我添加
62
+
63
+ - NCNN-Android: [RealSR-NCNN-Android](https://github.com/tumuyan/RealSR-NCNN-Android) by [tumuyan](https://github.com/tumuyan)
64
+ - VapourSynth: [vs-realesrgan](https://github.com/HolyWu/vs-realesrgan) by [HolyWu](https://github.com/HolyWu)
65
+ - NCNN: [Real-ESRGAN-ncnn-vulkan](https://github.com/xinntao/Real-ESRGAN-ncnn-vulkan)
66
+
67
+ &nbsp;&nbsp;&nbsp;&nbsp;**易用的图形界面**
68
+
69
+ - [Waifu2x-Extension-GUI](https://github.com/AaronFeng753/Waifu2x-Extension-GUI) by [AaronFeng753](https://github.com/AaronFeng753)
70
+ - [Squirrel-RIFE](https://github.com/Justin62628/Squirrel-RIFE) by [Justin62628](https://github.com/Justin62628)
71
+ - [Real-GUI](https://github.com/scifx/Real-GUI) by [scifx](https://github.com/scifx)
72
+ - [Real-ESRGAN_GUI](https://github.com/net2cn/Real-ESRGAN_GUI) by [net2cn](https://github.com/net2cn)
73
+ - [Real-ESRGAN-EGUI](https://github.com/WGzeyu/Real-ESRGAN-EGUI) by [WGzeyu](https://github.com/WGzeyu)
74
+ - [anime_upscaler](https://github.com/shangar21/anime_upscaler) by [shangar21](https://github.com/shangar21)
75
+ - [RealESRGAN-GUI](https://github.com/Baiyuetribe/paper2gui/blob/main/Video%20Super%20Resolution/RealESRGAN-GUI.md) by [Baiyuetribe](https://github.com/Baiyuetribe)
76
+
77
+ </details>
78
+
79
+ <details>
80
+ <summary>👀<b>Demo视频(B站)</b></summary>
81
+
82
+ - [大闹天宫片段](https://www.bilibili.com/video/BV1ja41117zb)
83
+
84
+ </details>
85
+
86
+ ### :book: Real-ESRGAN: Training Real-World Blind Super-Resolution with Pure Synthetic Data
87
+
88
+ > [[论文](https://arxiv.org/abs/2107.10833)] &emsp; [项目主页] &emsp; [[YouTube 视频](https://www.youtube.com/watch?v=fxHWoDSSvSc)] &emsp; [[B站视频](https://www.bilibili.com/video/BV1H34y1m7sS/)] &emsp; [[Poster](https://xinntao.github.io/projects/RealESRGAN_src/RealESRGAN_poster.pdf)] &emsp; [[PPT](https://docs.google.com/presentation/d/1QtW6Iy8rm8rGLsJ0Ldti6kP-7Qyzy6XL/edit?usp=sharing&ouid=109799856763657548160&rtpof=true&sd=true)]<br>
89
+ > [Xintao Wang](https://xinntao.github.io/), Liangbin Xie, [Chao Dong](https://scholar.google.com.hk/citations?user=OSDCB0UAAAAJ), [Ying Shan](https://scholar.google.com/citations?user=4oXBp9UAAAAJ&hl=en) <br>
90
+ > Tencent ARC Lab; Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences
91
+
92
+ <p align="center">
93
+ <img src="assets/teaser.jpg">
94
+ </p>
95
+
96
+ ---
97
+
98
+ 我们提供了一套训练好的模型(*RealESRGAN_x4plus.pth*),可以进行4倍的超分辨率。<br>
99
+ **现在的 Real-ESRGAN 还是有几率失败的,因为现实生活的降质过程比较复杂。**<br>
100
+ 而且,本项目对**人脸以及文字之类**的效果还不是太好,但是我们会持续进行优化的。<br>
101
+
102
+ Real-ESRGAN 将会被长期支持,我会在空闲的时间中持续维护更新。
103
+
104
+ 这些是未来计划的几个新功能:
105
+
106
+ - [ ] 优化人脸
107
+ - [ ] 优化文字
108
+ - [x] 优化动画图像
109
+ - [ ] 支持更多的超分辨率比例
110
+ - [ ] 可调节的复原
111
+
112
+ 如果你有好主意或需求,欢迎在 issue 或 discussion 中提出。<br/>
113
+ 如果你有一些 Real-ESRGAN 中有问题的照片,你也可以在 issue 或者 discussion 中发出来。我会留意(但是不一定能解决:stuck_out_tongue:)。如果有必要的话,我还会专门开一页来记录那些有待解决的图像。
114
+
115
+ ---
116
+
117
+ ### 便携版(绿色版)可执行文件
118
+
119
+ 你可以下载**支持Intel/AMD/Nvidia显卡**的绿色版exe文件: [Windows版](https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesrgan-ncnn-vulkan-20220424-windows.zip) / [Linux版](https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesrgan-ncnn-vulkan-20220424-ubuntu.zip) / [macOS版](https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesrgan-ncnn-vulkan-20220424-macos.zip)。
120
+
121
+ 绿色版指的是这些exe你可以直接运行(放U盘里拷走都没问题),因为里面已经有所需的文件和模型了。它不需要 CUDA 或者 PyTorch运行环境。<br>
122
+
123
+ 你可以通过下面这个命令来运行(Windows版本的例子,更多信息请查看对应版本的README.md):
124
+
125
+ ```bash
126
+ ./realesrgan-ncnn-vulkan.exe -i 输入图像.jpg -o 输出图像.png -n 模型名字
127
+ ```
128
+
129
+ 我们提供了五种模型:
130
+
131
+ 1. realesrgan-x4plus(默认)
132
+ 2. reaesrnet-x4plus
133
+ 3. realesrgan-x4plus-anime(针对动漫插画图像优化,有更小的体积)
134
+ 4. realesr-animevideov3 (针对动漫视频)
135
+
136
+ 你可以通过`-n`参数来使用其他模型,例如`./realesrgan-ncnn-vulkan.exe -i 二次元图片.jpg -o 二刺螈图片.png -n realesrgan-x4plus-anime`
137
+
138
+ ### 可执行文件的用法
139
+
140
+ 1. 更多细节可以参考 [Real-ESRGAN-ncnn-vulkan](https://github.com/xinntao/Real-ESRGAN-ncnn-vulkan#computer-usages).
141
+ 2. 注意:可执行文件并没有支持 python 脚本 `inference_realesrgan.py` 中所有的功能,比如 `outscale` 选项) .
142
+
143
+ ```console
144
+ Usage: realesrgan-ncnn-vulkan.exe -i infile -o outfile [options]...
145
+
146
+ -h show this help
147
+ -i input-path input image path (jpg/png/webp) or directory
148
+ -o output-path output image path (jpg/png/webp) or directory
149
+ -s scale upscale ratio (can be 2, 3, 4. default=4)
150
+ -t tile-size tile size (>=32/0=auto, default=0) can be 0,0,0 for multi-gpu
151
+ -m model-path folder path to the pre-trained models. default=models
152
+ -n model-name model name (default=realesr-animevideov3, can be realesr-animevideov3 | realesrgan-x4plus | realesrgan-x4plus-anime | realesrnet-x4plus)
153
+ -g gpu-id gpu device to use (default=auto) can be 0,1,2 for multi-gpu
154
+ -j load:proc:save thread count for load/proc/save (default=1:2:2) can be 1:2,2,2:2 for multi-gpu
155
+ -x enable tta mode"
156
+ -f format output image format (jpg/png/webp, default=ext/png)
157
+ -v verbose output
158
+ ```
159
+
160
+ 由于这些exe文件会把图像分成几个板块,然后来分别进行处理,再合成导出,输出的图像可能会有一点割裂感(而且可能跟PyTorch的输出不太一样)
161
+
162
+ ---
163
+
164
+ ## :wrench: 依赖以及安装
165
+
166
+ - Python >= 3.7 (推荐使用[Anaconda](https://www.anaconda.com/download/#linux)或[Miniconda](https://docs.conda.io/en/latest/miniconda.html))
167
+ - [PyTorch >= 1.7](https://pytorch.org/)
168
+
169
+ #### 安装
170
+
171
+ 1. 把项目克隆到本地
172
+
173
+ ```bash
174
+ git clone https://github.com/xinntao/Real-ESRGAN.git
175
+ cd Real-ESRGAN
176
+ ```
177
+
178
+ 2. 安装各种依赖
179
+
180
+ ```bash
181
+ # 安装 basicsr - https://github.com/xinntao/BasicSR
182
+ # 我们使用BasicSR来训练以及推断
183
+ pip install basicsr
184
+ # facexlib和gfpgan是用来增强人脸的
185
+ pip install facexlib
186
+ pip install gfpgan
187
+ pip install -r requirements.txt
188
+ python setup.py develop
189
+ ```
190
+
191
+ ## :zap: 快速上手
192
+
193
+ ### 普通图片
194
+
195
+ 下载我们训练好的模型: [RealESRGAN_x4plus.pth](https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth)
196
+
197
+ ```bash
198
+ wget https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth -P weights
199
+ ```
200
+
201
+ 推断!
202
+
203
+ ```bash
204
+ python inference_realesrgan.py -n RealESRGAN_x4plus -i inputs --face_enhance
205
+ ```
206
+
207
+ 结果在`results`文件夹
208
+
209
+ ### 动画图片
210
+
211
+ <p align="center">
212
+ <img src="https://raw.githubusercontent.com/xinntao/public-figures/master/Real-ESRGAN/cmp_realesrgan_anime_1.png">
213
+ </p>
214
+
215
+ 训练好的模型: [RealESRGAN_x4plus_anime_6B](https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth)<br>
216
+ 有关[waifu2x](https://github.com/nihui/waifu2x-ncnn-vulkan)的更多信息和对比在[**anime_model.md**](docs/anime_model.md)中。
217
+
218
+ ```bash
219
+ # 下载模型
220
+ wget https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth -P weights
221
+ # 推断
222
+ python inference_realesrgan.py -n RealESRGAN_x4plus_anime_6B -i inputs
223
+ ```
224
+
225
+ 结果在`results`文件夹
226
+
227
+ ### Python 脚本的用法
228
+
229
+ 1. 虽然你使用了 X4 模型,但是你可以 **输出任意尺寸比例的图片**,只要实用了 `outscale` 参数. 程序会进一步对模型的输出图像进行缩放。
230
+
231
+ ```console
232
+ Usage: python inference_realesrgan.py -n RealESRGAN_x4plus -i infile -o outfile [options]...
233
+
234
+ A common command: python inference_realesrgan.py -n RealESRGAN_x4plus -i infile --outscale 3.5 --face_enhance
235
+
236
+ -h show this help
237
+ -i --input Input image or folder. Default: inputs
238
+ -o --output Output folder. Default: results
239
+ -n --model_name Model name. Default: RealESRGAN_x4plus
240
+ -s, --outscale The final upsampling scale of the image. Default: 4
241
+ --suffix Suffix of the restored image. Default: out
242
+ -t, --tile Tile size, 0 for no tile during testing. Default: 0
243
+ --face_enhance Whether to use GFPGAN to enhance face. Default: False
244
+ --fp32 Whether to use half precision during inference. Default: False
245
+ --ext Image extension. Options: auto | jpg | png, auto means using the same extension as inputs. Default: auto
246
+ ```
247
+
248
+ ## :european_castle: 模型库
249
+
250
+ 请参见 [docs/model_zoo.md](docs/model_zoo.md)
251
+
252
+ ## :computer: 训练,在你的数据上微调(Fine-tune)
253
+
254
+ 这里有一份详细的指南:[Training.md](docs/Training.md).
255
+
256
+ ## BibTeX 引用
257
+
258
+ @Article{wang2021realesrgan,
259
+ title={Real-ESRGAN: Training Real-World Blind Super-Resolution with Pure Synthetic Data},
260
+ author={Xintao Wang and Liangbin Xie and Chao Dong and Ying Shan},
261
+ journal={arXiv:2107.10833},
262
+ year={2021}
263
+ }
264
+
265
+ ## :e-mail: 联系我们
266
+
267
+ 如果你有任何问题,请通过 `xintao.wang@outlook.com` 或 `xintaowang@tencent.com` 联系我们。
268
+
269
+ ## :hugs: 感谢
270
+
271
+ 感谢所有的贡献者大大们~
272
+
273
+ - [AK391](https://github.com/AK391): 通过[Gradio](https://github.com/gradio-app/gradio)添加到了[Huggingface Spaces](https://huggingface.co/spaces)(一个机器学习应用的在线平台):[Gradio在线版](https://huggingface.co/spaces/akhaliq/Real-ESRGAN)。
274
+ - [Asiimoviet](https://github.com/Asiimoviet): 把 README.md 文档 翻译成了中文。
275
+ - [2ji3150](https://github.com/2ji3150): 感谢详尽并且富有价值的[反馈、建议](https://github.com/xinntao/Real-ESRGAN/issues/131).
276
+ - [Jared-02](https://github.com/Jared-02): 把 Training.md 文档 翻译成了中文。
Real-ESRGAN/cog.yaml ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file is used for constructing replicate env
2
+ image: "r8.im/tencentarc/realesrgan"
3
+
4
+ build:
5
+ gpu: true
6
+ python_version: "3.8"
7
+ system_packages:
8
+ - "libgl1-mesa-glx"
9
+ - "libglib2.0-0"
10
+ python_packages:
11
+ - "torch==1.7.1"
12
+ - "torchvision==0.8.2"
13
+ - "numpy==1.21.1"
14
+ - "lmdb==1.2.1"
15
+ - "opencv-python==4.5.3.56"
16
+ - "PyYAML==5.4.1"
17
+ - "tqdm==4.62.2"
18
+ - "yapf==0.31.0"
19
+ - "basicsr==1.4.2"
20
+ - "facexlib==0.2.5"
21
+
22
+ predict: "cog_predict.py:Predictor"
Real-ESRGAN/cog_predict.py ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # flake8: noqa
2
+ # This file is used for deploying replicate models
3
+ # running: cog predict -i img=@inputs/00017_gray.png -i version='General - v3' -i scale=2 -i face_enhance=True -i tile=0
4
+ # push: cog push r8.im/xinntao/realesrgan
5
+
6
+ import os
7
+
8
+ os.system('pip install gfpgan')
9
+ os.system('python setup.py develop')
10
+
11
+ import cv2
12
+ import shutil
13
+ import tempfile
14
+ import torch
15
+ from basicsr.archs.rrdbnet_arch import RRDBNet
16
+ from basicsr.archs.srvgg_arch import SRVGGNetCompact
17
+
18
+ from realesrgan.utils import RealESRGANer
19
+
20
+ try:
21
+ from cog import BasePredictor, Input, Path
22
+ from gfpgan import GFPGANer
23
+ except Exception:
24
+ print('please install cog and realesrgan package')
25
+
26
+
27
+ class Predictor(BasePredictor):
28
+
29
+ def setup(self):
30
+ os.makedirs('output', exist_ok=True)
31
+ # download weights
32
+ if not os.path.exists('weights/realesr-general-x4v3.pth'):
33
+ os.system(
34
+ 'wget https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-general-x4v3.pth -P ./weights'
35
+ )
36
+ if not os.path.exists('weights/GFPGANv1.4.pth'):
37
+ os.system('wget https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.4.pth -P ./weights')
38
+ if not os.path.exists('weights/RealESRGAN_x4plus.pth'):
39
+ os.system(
40
+ 'wget https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth -P ./weights'
41
+ )
42
+ if not os.path.exists('weights/RealESRGAN_x4plus_anime_6B.pth'):
43
+ os.system(
44
+ 'wget https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth -P ./weights'
45
+ )
46
+ if not os.path.exists('weights/realesr-animevideov3.pth'):
47
+ os.system(
48
+ 'wget https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-animevideov3.pth -P ./weights'
49
+ )
50
+
51
+ def choose_model(self, scale, version, tile=0):
52
+ half = True if torch.cuda.is_available() else False
53
+ if version == 'General - RealESRGANplus':
54
+ model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4)
55
+ model_path = 'weights/RealESRGAN_x4plus.pth'
56
+ self.upsampler = RealESRGANer(
57
+ scale=4, model_path=model_path, model=model, tile=tile, tile_pad=10, pre_pad=0, half=half)
58
+ elif version == 'General - v3':
59
+ model = SRVGGNetCompact(num_in_ch=3, num_out_ch=3, num_feat=64, num_conv=32, upscale=4, act_type='prelu')
60
+ model_path = 'weights/realesr-general-x4v3.pth'
61
+ self.upsampler = RealESRGANer(
62
+ scale=4, model_path=model_path, model=model, tile=tile, tile_pad=10, pre_pad=0, half=half)
63
+ elif version == 'Anime - anime6B':
64
+ model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=6, num_grow_ch=32, scale=4)
65
+ model_path = 'weights/RealESRGAN_x4plus_anime_6B.pth'
66
+ self.upsampler = RealESRGANer(
67
+ scale=4, model_path=model_path, model=model, tile=tile, tile_pad=10, pre_pad=0, half=half)
68
+ elif version == 'AnimeVideo - v3':
69
+ model = SRVGGNetCompact(num_in_ch=3, num_out_ch=3, num_feat=64, num_conv=16, upscale=4, act_type='prelu')
70
+ model_path = 'weights/realesr-animevideov3.pth'
71
+ self.upsampler = RealESRGANer(
72
+ scale=4, model_path=model_path, model=model, tile=tile, tile_pad=10, pre_pad=0, half=half)
73
+
74
+ self.face_enhancer = GFPGANer(
75
+ model_path='weights/GFPGANv1.4.pth',
76
+ upscale=scale,
77
+ arch='clean',
78
+ channel_multiplier=2,
79
+ bg_upsampler=self.upsampler)
80
+
81
+ def predict(
82
+ self,
83
+ img: Path = Input(description='Input'),
84
+ version: str = Input(
85
+ description='RealESRGAN version. Please see [Readme] below for more descriptions',
86
+ choices=['General - RealESRGANplus', 'General - v3', 'Anime - anime6B', 'AnimeVideo - v3'],
87
+ default='General - v3'),
88
+ scale: float = Input(description='Rescaling factor', default=2),
89
+ face_enhance: bool = Input(
90
+ description='Enhance faces with GFPGAN. Note that it does not work for anime images/vidoes', default=False),
91
+ tile: int = Input(
92
+ description=
93
+ 'Tile size. Default is 0, that is no tile. When encountering the out-of-GPU-memory issue, please specify it, e.g., 400 or 200',
94
+ default=0)
95
+ ) -> Path:
96
+ if tile <= 100 or tile is None:
97
+ tile = 0
98
+ print(f'img: {img}. version: {version}. scale: {scale}. face_enhance: {face_enhance}. tile: {tile}.')
99
+ try:
100
+ extension = os.path.splitext(os.path.basename(str(img)))[1]
101
+ img = cv2.imread(str(img), cv2.IMREAD_UNCHANGED)
102
+ if len(img.shape) == 3 and img.shape[2] == 4:
103
+ img_mode = 'RGBA'
104
+ elif len(img.shape) == 2:
105
+ img_mode = None
106
+ img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
107
+ else:
108
+ img_mode = None
109
+
110
+ h, w = img.shape[0:2]
111
+ if h < 300:
112
+ img = cv2.resize(img, (w * 2, h * 2), interpolation=cv2.INTER_LANCZOS4)
113
+
114
+ self.choose_model(scale, version, tile)
115
+
116
+ try:
117
+ if face_enhance:
118
+ _, _, output = self.face_enhancer.enhance(
119
+ img, has_aligned=False, only_center_face=False, paste_back=True)
120
+ else:
121
+ output, _ = self.upsampler.enhance(img, outscale=scale)
122
+ except RuntimeError as error:
123
+ print('Error', error)
124
+ print('If you encounter CUDA out of memory, try to set "tile" to a smaller size, e.g., 400.')
125
+
126
+ if img_mode == 'RGBA': # RGBA images should be saved in png format
127
+ extension = 'png'
128
+ # save_path = f'output/out.{extension}'
129
+ # cv2.imwrite(save_path, output)
130
+ out_path = Path(tempfile.mkdtemp()) / f'out.{extension}'
131
+ cv2.imwrite(str(out_path), output)
132
+ except Exception as error:
133
+ print('global exception: ', error)
134
+ finally:
135
+ clean_folder('output')
136
+ return out_path
137
+
138
+
139
+ def clean_folder(folder):
140
+ for filename in os.listdir(folder):
141
+ file_path = os.path.join(folder, filename)
142
+ try:
143
+ if os.path.isfile(file_path) or os.path.islink(file_path):
144
+ os.unlink(file_path)
145
+ elif os.path.isdir(file_path):
146
+ shutil.rmtree(file_path)
147
+ except Exception as e:
148
+ print(f'Failed to delete {file_path}. Reason: {e}')
Real-ESRGAN/experiments/pretrained_models/README.md ADDED
@@ -0,0 +1 @@
 
 
1
+ # Put downloaded pre-trained models here
Real-ESRGAN/gfpgan/weights/detection_Resnet50_Final.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d1de9c2944f2ccddca5f5e010ea5ae64a39845a86311af6fdf30841b0a5a16d
3
+ size 109497761
Real-ESRGAN/gfpgan/weights/parsing_parsenet.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d558d8d0e42c20224f13cf5a29c79eba2d59913419f945545d8cf7b72920de2
3
+ size 85331193
Real-ESRGAN/inference_realesrgan.py ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import cv2
3
+ import glob
4
+ import os
5
+ from basicsr.archs.rrdbnet_arch import RRDBNet
6
+ from basicsr.utils.download_util import load_file_from_url
7
+
8
+ from realesrgan import RealESRGANer
9
+ from realesrgan.archs.srvgg_arch import SRVGGNetCompact
10
+
11
+
12
+ def main():
13
+ """Inference demo for Real-ESRGAN.
14
+ """
15
+ parser = argparse.ArgumentParser()
16
+ parser.add_argument('-i', '--input', type=str, default='inputs', help='Input image or folder')
17
+ parser.add_argument(
18
+ '-n',
19
+ '--model_name',
20
+ type=str,
21
+ default='RealESRGAN_x4plus',
22
+ help=('Model names: RealESRGAN_x4plus | RealESRNet_x4plus | RealESRGAN_x4plus_anime_6B | RealESRGAN_x2plus | '
23
+ 'realesr-animevideov3 | realesr-general-x4v3'))
24
+ parser.add_argument('-o', '--output', type=str, default='results', help='Output folder')
25
+ parser.add_argument(
26
+ '-dn',
27
+ '--denoise_strength',
28
+ type=float,
29
+ default=0.5,
30
+ help=('Denoise strength. 0 for weak denoise (keep noise), 1 for strong denoise ability. '
31
+ 'Only used for the realesr-general-x4v3 model'))
32
+ parser.add_argument('-s', '--outscale', type=float, default=4, help='The final upsampling scale of the image')
33
+ parser.add_argument(
34
+ '--model_path', type=str, default=None, help='[Option] Model path. Usually, you do not need to specify it')
35
+ parser.add_argument('--suffix', type=str, default='out', help='Suffix of the restored image')
36
+ parser.add_argument('-t', '--tile', type=int, default=0, help='Tile size, 0 for no tile during testing')
37
+ parser.add_argument('--tile_pad', type=int, default=10, help='Tile padding')
38
+ parser.add_argument('--pre_pad', type=int, default=0, help='Pre padding size at each border')
39
+ parser.add_argument('--face_enhance', action='store_true', help='Use GFPGAN to enhance face')
40
+ parser.add_argument(
41
+ '--fp32', action='store_true', help='Use fp32 precision during inference. Default: fp16 (half precision).')
42
+ parser.add_argument(
43
+ '--alpha_upsampler',
44
+ type=str,
45
+ default='realesrgan',
46
+ help='The upsampler for the alpha channels. Options: realesrgan | bicubic')
47
+ parser.add_argument(
48
+ '--ext',
49
+ type=str,
50
+ default='auto',
51
+ help='Image extension. Options: auto | jpg | png, auto means using the same extension as inputs')
52
+ parser.add_argument(
53
+ '-g', '--gpu-id', type=int, default=None, help='gpu device to use (default=None) can be 0,1,2 for multi-gpu')
54
+
55
+ args = parser.parse_args()
56
+
57
+ # determine models according to model names
58
+ args.model_name = args.model_name.split('.')[0]
59
+ if args.model_name == 'RealESRGAN_x4plus': # x4 RRDBNet model
60
+ model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4)
61
+ netscale = 4
62
+ file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth']
63
+ elif args.model_name == 'RealESRNet_x4plus': # x4 RRDBNet model
64
+ model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4)
65
+ netscale = 4
66
+ file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.1/RealESRNet_x4plus.pth']
67
+ elif args.model_name == 'RealESRGAN_x4plus_anime_6B': # x4 RRDBNet model with 6 blocks
68
+ model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=6, num_grow_ch=32, scale=4)
69
+ netscale = 4
70
+ file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth']
71
+ elif args.model_name == 'RealESRGAN_x2plus': # x2 RRDBNet model
72
+ model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=2)
73
+ netscale = 2
74
+ file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/RealESRGAN_x2plus.pth']
75
+ elif args.model_name == 'realesr-animevideov3': # x4 VGG-style model (XS size)
76
+ model = SRVGGNetCompact(num_in_ch=3, num_out_ch=3, num_feat=64, num_conv=16, upscale=4, act_type='prelu')
77
+ netscale = 4
78
+ file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-animevideov3.pth']
79
+ elif args.model_name == 'realesr-general-x4v3': # x4 VGG-style model (S size)
80
+ model = SRVGGNetCompact(num_in_ch=3, num_out_ch=3, num_feat=64, num_conv=32, upscale=4, act_type='prelu')
81
+ netscale = 4
82
+ file_url = [
83
+ 'https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-general-wdn-x4v3.pth',
84
+ 'https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-general-x4v3.pth'
85
+ ]
86
+
87
+ # determine model paths
88
+ if args.model_path is not None:
89
+ model_path = args.model_path
90
+ else:
91
+ model_path = os.path.join('weights', args.model_name + '.pth')
92
+ if not os.path.isfile(model_path):
93
+ ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
94
+ for url in file_url:
95
+ # model_path will be updated
96
+ model_path = load_file_from_url(
97
+ url=url, model_dir=os.path.join(ROOT_DIR, 'weights'), progress=True, file_name=None)
98
+
99
+ # use dni to control the denoise strength
100
+ dni_weight = None
101
+ if args.model_name == 'realesr-general-x4v3' and args.denoise_strength != 1:
102
+ wdn_model_path = model_path.replace('realesr-general-x4v3', 'realesr-general-wdn-x4v3')
103
+ model_path = [model_path, wdn_model_path]
104
+ dni_weight = [args.denoise_strength, 1 - args.denoise_strength]
105
+
106
+ # restorer
107
+ upsampler = RealESRGANer(
108
+ scale=netscale,
109
+ model_path=model_path,
110
+ dni_weight=dni_weight,
111
+ model=model,
112
+ tile=args.tile,
113
+ tile_pad=args.tile_pad,
114
+ pre_pad=args.pre_pad,
115
+ half=not args.fp32,
116
+ gpu_id=args.gpu_id)
117
+
118
+ if args.face_enhance: # Use GFPGAN for face enhancement
119
+ from gfpgan import GFPGANer
120
+ face_enhancer = GFPGANer(
121
+ model_path='https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.3.pth',
122
+ upscale=args.outscale,
123
+ arch='clean',
124
+ channel_multiplier=2,
125
+ bg_upsampler=upsampler)
126
+ os.makedirs(args.output, exist_ok=True)
127
+
128
+ if os.path.isfile(args.input):
129
+ paths = [args.input]
130
+ else:
131
+ paths = sorted(glob.glob(os.path.join(args.input, '*')))
132
+
133
+ for idx, path in enumerate(paths):
134
+ imgname, extension = os.path.splitext(os.path.basename(path))
135
+ print('Testing', idx, imgname)
136
+
137
+ img = cv2.imread(path, cv2.IMREAD_UNCHANGED)
138
+ if len(img.shape) == 3 and img.shape[2] == 4:
139
+ img_mode = 'RGBA'
140
+ else:
141
+ img_mode = None
142
+
143
+ try:
144
+ if args.face_enhance:
145
+ _, _, output = face_enhancer.enhance(img, has_aligned=False, only_center_face=False, paste_back=True)
146
+ else:
147
+ output, _ = upsampler.enhance(img, outscale=args.outscale)
148
+ except RuntimeError as error:
149
+ print('Error', error)
150
+ print('If you encounter CUDA out of memory, try to set --tile with a smaller number.')
151
+ else:
152
+ if args.ext == 'auto':
153
+ extension = extension[1:]
154
+ else:
155
+ extension = args.ext
156
+ if img_mode == 'RGBA': # RGBA images should be saved in png format
157
+ extension = 'png'
158
+ if args.suffix == '':
159
+ save_path = os.path.join(args.output, f'{imgname}.{extension}')
160
+ else:
161
+ save_path = os.path.join(args.output, f'{imgname}_{args.suffix}.{extension}')
162
+ cv2.imwrite(save_path, output)
163
+
164
+
165
+ if __name__ == '__main__':
166
+ main()
Real-ESRGAN/inference_realesrgan_video.py ADDED
@@ -0,0 +1,398 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import cv2
3
+ import glob
4
+ import mimetypes
5
+ import numpy as np
6
+ import os
7
+ import shutil
8
+ import subprocess
9
+ import torch
10
+ from basicsr.archs.rrdbnet_arch import RRDBNet
11
+ from basicsr.utils.download_util import load_file_from_url
12
+ from os import path as osp
13
+ from tqdm import tqdm
14
+
15
+ from realesrgan import RealESRGANer
16
+ from realesrgan.archs.srvgg_arch import SRVGGNetCompact
17
+
18
+ try:
19
+ import ffmpeg
20
+ except ImportError:
21
+ import pip
22
+ pip.main(['install', '--user', 'ffmpeg-python'])
23
+ import ffmpeg
24
+
25
+
26
+ def get_video_meta_info(video_path):
27
+ ret = {}
28
+ probe = ffmpeg.probe(video_path)
29
+ video_streams = [stream for stream in probe['streams'] if stream['codec_type'] == 'video']
30
+ has_audio = any(stream['codec_type'] == 'audio' for stream in probe['streams'])
31
+ ret['width'] = video_streams[0]['width']
32
+ ret['height'] = video_streams[0]['height']
33
+ ret['fps'] = eval(video_streams[0]['avg_frame_rate'])
34
+ ret['audio'] = ffmpeg.input(video_path).audio if has_audio else None
35
+ ret['nb_frames'] = int(video_streams[0]['nb_frames'])
36
+ return ret
37
+
38
+
39
+ def get_sub_video(args, num_process, process_idx):
40
+ if num_process == 1:
41
+ return args.input
42
+ meta = get_video_meta_info(args.input)
43
+ duration = int(meta['nb_frames'] / meta['fps'])
44
+ part_time = duration // num_process
45
+ print(f'duration: {duration}, part_time: {part_time}')
46
+ os.makedirs(osp.join(args.output, f'{args.video_name}_inp_tmp_videos'), exist_ok=True)
47
+ out_path = osp.join(args.output, f'{args.video_name}_inp_tmp_videos', f'{process_idx:03d}.mp4')
48
+ cmd = [
49
+ args.ffmpeg_bin, f'-i {args.input}', '-ss', f'{part_time * process_idx}',
50
+ f'-to {part_time * (process_idx + 1)}' if process_idx != num_process - 1 else '', '-async 1', out_path, '-y'
51
+ ]
52
+ print(' '.join(cmd))
53
+ subprocess.call(' '.join(cmd), shell=True)
54
+ return out_path
55
+
56
+
57
+ class Reader:
58
+
59
+ def __init__(self, args, total_workers=1, worker_idx=0):
60
+ self.args = args
61
+ input_type = mimetypes.guess_type(args.input)[0]
62
+ self.input_type = 'folder' if input_type is None else input_type
63
+ self.paths = [] # for image&folder type
64
+ self.audio = None
65
+ self.input_fps = None
66
+ if self.input_type.startswith('video'):
67
+ video_path = get_sub_video(args, total_workers, worker_idx)
68
+ self.stream_reader = (
69
+ ffmpeg.input(video_path).output('pipe:', format='rawvideo', pix_fmt='bgr24',
70
+ loglevel='error').run_async(
71
+ pipe_stdin=True, pipe_stdout=True, cmd=args.ffmpeg_bin))
72
+ meta = get_video_meta_info(video_path)
73
+ self.width = meta['width']
74
+ self.height = meta['height']
75
+ self.input_fps = meta['fps']
76
+ self.audio = meta['audio']
77
+ self.nb_frames = meta['nb_frames']
78
+
79
+ else:
80
+ if self.input_type.startswith('image'):
81
+ self.paths = [args.input]
82
+ else:
83
+ paths = sorted(glob.glob(os.path.join(args.input, '*')))
84
+ tot_frames = len(paths)
85
+ num_frame_per_worker = tot_frames // total_workers + (1 if tot_frames % total_workers else 0)
86
+ self.paths = paths[num_frame_per_worker * worker_idx:num_frame_per_worker * (worker_idx + 1)]
87
+
88
+ self.nb_frames = len(self.paths)
89
+ assert self.nb_frames > 0, 'empty folder'
90
+ from PIL import Image
91
+ tmp_img = Image.open(self.paths[0])
92
+ self.width, self.height = tmp_img.size
93
+ self.idx = 0
94
+
95
+ def get_resolution(self):
96
+ return self.height, self.width
97
+
98
+ def get_fps(self):
99
+ if self.args.fps is not None:
100
+ return self.args.fps
101
+ elif self.input_fps is not None:
102
+ return self.input_fps
103
+ return 24
104
+
105
+ def get_audio(self):
106
+ return self.audio
107
+
108
+ def __len__(self):
109
+ return self.nb_frames
110
+
111
+ def get_frame_from_stream(self):
112
+ img_bytes = self.stream_reader.stdout.read(self.width * self.height * 3) # 3 bytes for one pixel
113
+ if not img_bytes:
114
+ return None
115
+ img = np.frombuffer(img_bytes, np.uint8).reshape([self.height, self.width, 3])
116
+ return img
117
+
118
+ def get_frame_from_list(self):
119
+ if self.idx >= self.nb_frames:
120
+ return None
121
+ img = cv2.imread(self.paths[self.idx])
122
+ self.idx += 1
123
+ return img
124
+
125
+ def get_frame(self):
126
+ if self.input_type.startswith('video'):
127
+ return self.get_frame_from_stream()
128
+ else:
129
+ return self.get_frame_from_list()
130
+
131
+ def close(self):
132
+ if self.input_type.startswith('video'):
133
+ self.stream_reader.stdin.close()
134
+ self.stream_reader.wait()
135
+
136
+
137
+ class Writer:
138
+
139
+ def __init__(self, args, audio, height, width, video_save_path, fps):
140
+ out_width, out_height = int(width * args.outscale), int(height * args.outscale)
141
+ if out_height > 2160:
142
+ print('You are generating video that is larger than 4K, which will be very slow due to IO speed.',
143
+ 'We highly recommend to decrease the outscale(aka, -s).')
144
+
145
+ if audio is not None:
146
+ self.stream_writer = (
147
+ ffmpeg.input('pipe:', format='rawvideo', pix_fmt='bgr24', s=f'{out_width}x{out_height}',
148
+ framerate=fps).output(
149
+ audio,
150
+ video_save_path,
151
+ pix_fmt='yuv420p',
152
+ vcodec='libx264',
153
+ loglevel='error',
154
+ acodec='copy').overwrite_output().run_async(
155
+ pipe_stdin=True, pipe_stdout=True, cmd=args.ffmpeg_bin))
156
+ else:
157
+ self.stream_writer = (
158
+ ffmpeg.input('pipe:', format='rawvideo', pix_fmt='bgr24', s=f'{out_width}x{out_height}',
159
+ framerate=fps).output(
160
+ video_save_path, pix_fmt='yuv420p', vcodec='libx264',
161
+ loglevel='error').overwrite_output().run_async(
162
+ pipe_stdin=True, pipe_stdout=True, cmd=args.ffmpeg_bin))
163
+
164
+ def write_frame(self, frame):
165
+ frame = frame.astype(np.uint8).tobytes()
166
+ self.stream_writer.stdin.write(frame)
167
+
168
+ def close(self):
169
+ self.stream_writer.stdin.close()
170
+ self.stream_writer.wait()
171
+
172
+
173
+ def inference_video(args, video_save_path, device=None, total_workers=1, worker_idx=0):
174
+ # ---------------------- determine models according to model names ---------------------- #
175
+ args.model_name = args.model_name.split('.pth')[0]
176
+ if args.model_name == 'RealESRGAN_x4plus': # x4 RRDBNet model
177
+ model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4)
178
+ netscale = 4
179
+ file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth']
180
+ elif args.model_name == 'RealESRNet_x4plus': # x4 RRDBNet model
181
+ model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4)
182
+ netscale = 4
183
+ file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.1/RealESRNet_x4plus.pth']
184
+ elif args.model_name == 'RealESRGAN_x4plus_anime_6B': # x4 RRDBNet model with 6 blocks
185
+ model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=6, num_grow_ch=32, scale=4)
186
+ netscale = 4
187
+ file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth']
188
+ elif args.model_name == 'RealESRGAN_x2plus': # x2 RRDBNet model
189
+ model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=2)
190
+ netscale = 2
191
+ file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/RealESRGAN_x2plus.pth']
192
+ elif args.model_name == 'realesr-animevideov3': # x4 VGG-style model (XS size)
193
+ model = SRVGGNetCompact(num_in_ch=3, num_out_ch=3, num_feat=64, num_conv=16, upscale=4, act_type='prelu')
194
+ netscale = 4
195
+ file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-animevideov3.pth']
196
+ elif args.model_name == 'realesr-general-x4v3': # x4 VGG-style model (S size)
197
+ model = SRVGGNetCompact(num_in_ch=3, num_out_ch=3, num_feat=64, num_conv=32, upscale=4, act_type='prelu')
198
+ netscale = 4
199
+ file_url = [
200
+ 'https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-general-wdn-x4v3.pth',
201
+ 'https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-general-x4v3.pth'
202
+ ]
203
+
204
+ # ---------------------- determine model paths ---------------------- #
205
+ model_path = os.path.join('weights', args.model_name + '.pth')
206
+ if not os.path.isfile(model_path):
207
+ ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
208
+ for url in file_url:
209
+ # model_path will be updated
210
+ model_path = load_file_from_url(
211
+ url=url, model_dir=os.path.join(ROOT_DIR, 'weights'), progress=True, file_name=None)
212
+
213
+ # use dni to control the denoise strength
214
+ dni_weight = None
215
+ if args.model_name == 'realesr-general-x4v3' and args.denoise_strength != 1:
216
+ wdn_model_path = model_path.replace('realesr-general-x4v3', 'realesr-general-wdn-x4v3')
217
+ model_path = [model_path, wdn_model_path]
218
+ dni_weight = [args.denoise_strength, 1 - args.denoise_strength]
219
+
220
+ # restorer
221
+ upsampler = RealESRGANer(
222
+ scale=netscale,
223
+ model_path=model_path,
224
+ dni_weight=dni_weight,
225
+ model=model,
226
+ tile=args.tile,
227
+ tile_pad=args.tile_pad,
228
+ pre_pad=args.pre_pad,
229
+ half=not args.fp32,
230
+ device=device,
231
+ )
232
+
233
+ if 'anime' in args.model_name and args.face_enhance:
234
+ print('face_enhance is not supported in anime models, we turned this option off for you. '
235
+ 'if you insist on turning it on, please manually comment the relevant lines of code.')
236
+ args.face_enhance = False
237
+
238
+ if args.face_enhance: # Use GFPGAN for face enhancement
239
+ from gfpgan import GFPGANer
240
+ face_enhancer = GFPGANer(
241
+ model_path='https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.3.pth',
242
+ upscale=args.outscale,
243
+ arch='clean',
244
+ channel_multiplier=2,
245
+ bg_upsampler=upsampler) # TODO support custom device
246
+ else:
247
+ face_enhancer = None
248
+
249
+ reader = Reader(args, total_workers, worker_idx)
250
+ audio = reader.get_audio()
251
+ height, width = reader.get_resolution()
252
+ fps = reader.get_fps()
253
+ writer = Writer(args, audio, height, width, video_save_path, fps)
254
+
255
+ pbar = tqdm(total=len(reader), unit='frame', desc='inference')
256
+ while True:
257
+ img = reader.get_frame()
258
+ if img is None:
259
+ break
260
+
261
+ try:
262
+ if args.face_enhance:
263
+ _, _, output = face_enhancer.enhance(img, has_aligned=False, only_center_face=False, paste_back=True)
264
+ else:
265
+ output, _ = upsampler.enhance(img, outscale=args.outscale)
266
+ except RuntimeError as error:
267
+ print('Error', error)
268
+ print('If you encounter CUDA out of memory, try to set --tile with a smaller number.')
269
+ else:
270
+ writer.write_frame(output)
271
+
272
+ torch.cuda.synchronize(device)
273
+ pbar.update(1)
274
+
275
+ reader.close()
276
+ writer.close()
277
+
278
+
279
+ def run(args):
280
+ args.video_name = osp.splitext(os.path.basename(args.input))[0]
281
+ video_save_path = osp.join(args.output, f'{args.video_name}_{args.suffix}.mp4')
282
+
283
+ if args.extract_frame_first:
284
+ tmp_frames_folder = osp.join(args.output, f'{args.video_name}_inp_tmp_frames')
285
+ os.makedirs(tmp_frames_folder, exist_ok=True)
286
+ os.system(f'ffmpeg -i {args.input} -qscale:v 1 -qmin 1 -qmax 1 -vsync 0 {tmp_frames_folder}/frame%08d.png')
287
+ args.input = tmp_frames_folder
288
+
289
+ num_gpus = torch.cuda.device_count()
290
+ num_process = num_gpus * args.num_process_per_gpu
291
+ if num_process == 1:
292
+ inference_video(args, video_save_path)
293
+ return
294
+
295
+ ctx = torch.multiprocessing.get_context('spawn')
296
+ pool = ctx.Pool(num_process)
297
+ os.makedirs(osp.join(args.output, f'{args.video_name}_out_tmp_videos'), exist_ok=True)
298
+ pbar = tqdm(total=num_process, unit='sub_video', desc='inference')
299
+ for i in range(num_process):
300
+ sub_video_save_path = osp.join(args.output, f'{args.video_name}_out_tmp_videos', f'{i:03d}.mp4')
301
+ pool.apply_async(
302
+ inference_video,
303
+ args=(args, sub_video_save_path, torch.device(i % num_gpus), num_process, i),
304
+ callback=lambda arg: pbar.update(1))
305
+ pool.close()
306
+ pool.join()
307
+
308
+ # combine sub videos
309
+ # prepare vidlist.txt
310
+ with open(f'{args.output}/{args.video_name}_vidlist.txt', 'w') as f:
311
+ for i in range(num_process):
312
+ f.write(f'file \'{args.video_name}_out_tmp_videos/{i:03d}.mp4\'\n')
313
+
314
+ cmd = [
315
+ args.ffmpeg_bin, '-f', 'concat', '-safe', '0', '-i', f'{args.output}/{args.video_name}_vidlist.txt', '-c',
316
+ 'copy', f'{video_save_path}'
317
+ ]
318
+ print(' '.join(cmd))
319
+ subprocess.call(cmd)
320
+ shutil.rmtree(osp.join(args.output, f'{args.video_name}_out_tmp_videos'))
321
+ if osp.exists(osp.join(args.output, f'{args.video_name}_inp_tmp_videos')):
322
+ shutil.rmtree(osp.join(args.output, f'{args.video_name}_inp_tmp_videos'))
323
+ os.remove(f'{args.output}/{args.video_name}_vidlist.txt')
324
+
325
+
326
+ def main():
327
+ """Inference demo for Real-ESRGAN.
328
+ It mainly for restoring anime videos.
329
+
330
+ """
331
+ parser = argparse.ArgumentParser()
332
+ parser.add_argument('-i', '--input', type=str, default='inputs', help='Input video, image or folder')
333
+ parser.add_argument(
334
+ '-n',
335
+ '--model_name',
336
+ type=str,
337
+ default='realesr-animevideov3',
338
+ help=('Model names: realesr-animevideov3 | RealESRGAN_x4plus_anime_6B | RealESRGAN_x4plus | RealESRNet_x4plus |'
339
+ ' RealESRGAN_x2plus | realesr-general-x4v3'
340
+ 'Default:realesr-animevideov3'))
341
+ parser.add_argument('-o', '--output', type=str, default='results', help='Output folder')
342
+ parser.add_argument(
343
+ '-dn',
344
+ '--denoise_strength',
345
+ type=float,
346
+ default=0.5,
347
+ help=('Denoise strength. 0 for weak denoise (keep noise), 1 for strong denoise ability. '
348
+ 'Only used for the realesr-general-x4v3 model'))
349
+ parser.add_argument('-s', '--outscale', type=float, default=4, help='The final upsampling scale of the image')
350
+ parser.add_argument('--suffix', type=str, default='out', help='Suffix of the restored video')
351
+ parser.add_argument('-t', '--tile', type=int, default=0, help='Tile size, 0 for no tile during testing')
352
+ parser.add_argument('--tile_pad', type=int, default=10, help='Tile padding')
353
+ parser.add_argument('--pre_pad', type=int, default=0, help='Pre padding size at each border')
354
+ parser.add_argument('--face_enhance', action='store_true', help='Use GFPGAN to enhance face')
355
+ parser.add_argument(
356
+ '--fp32', action='store_true', help='Use fp32 precision during inference. Default: fp16 (half precision).')
357
+ parser.add_argument('--fps', type=float, default=None, help='FPS of the output video')
358
+ parser.add_argument('--ffmpeg_bin', type=str, default='ffmpeg', help='The path to ffmpeg')
359
+ parser.add_argument('--extract_frame_first', action='store_true')
360
+ parser.add_argument('--num_process_per_gpu', type=int, default=1)
361
+
362
+ parser.add_argument(
363
+ '--alpha_upsampler',
364
+ type=str,
365
+ default='realesrgan',
366
+ help='The upsampler for the alpha channels. Options: realesrgan | bicubic')
367
+ parser.add_argument(
368
+ '--ext',
369
+ type=str,
370
+ default='auto',
371
+ help='Image extension. Options: auto | jpg | png, auto means using the same extension as inputs')
372
+ args = parser.parse_args()
373
+
374
+ args.input = args.input.rstrip('/').rstrip('\\')
375
+ os.makedirs(args.output, exist_ok=True)
376
+
377
+ if mimetypes.guess_type(args.input)[0] is not None and mimetypes.guess_type(args.input)[0].startswith('video'):
378
+ is_video = True
379
+ else:
380
+ is_video = False
381
+
382
+ if is_video and args.input.endswith('.flv'):
383
+ mp4_path = args.input.replace('.flv', '.mp4')
384
+ os.system(f'ffmpeg -i {args.input} -codec copy {mp4_path}')
385
+ args.input = mp4_path
386
+
387
+ if args.extract_frame_first and not is_video:
388
+ args.extract_frame_first = False
389
+
390
+ run(args)
391
+
392
+ if args.extract_frame_first:
393
+ tmp_frames_folder = osp.join(args.output, f'{args.video_name}_inp_tmp_frames')
394
+ shutil.rmtree(tmp_frames_folder)
395
+
396
+
397
+ if __name__ == '__main__':
398
+ main()
Real-ESRGAN/inputs/00003.png ADDED
Real-ESRGAN/inputs/00017_gray.png ADDED
Real-ESRGAN/inputs/0014.jpg ADDED
Real-ESRGAN/inputs/0030.jpg ADDED
Real-ESRGAN/inputs/ADE_val_00000114.jpg ADDED
Real-ESRGAN/inputs/OST_009.png ADDED
Real-ESRGAN/inputs/children-alpha.png ADDED
Real-ESRGAN/inputs/tree_alpha_16bit.png ADDED
Real-ESRGAN/inputs/video/onepiece_demo.mp4 ADDED
Binary file (593 kB). View file
 
Real-ESRGAN/inputs/wolf_gray.jpg ADDED
Real-ESRGAN/options/finetune_realesrgan_x4plus.yml ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # general settings
2
+ name: finetune_RealESRGANx4plus_400k
3
+ model_type: RealESRGANModel
4
+ scale: 4
5
+ num_gpu: auto
6
+ manual_seed: 0
7
+
8
+ # ----------------- options for synthesizing training data in RealESRGANModel ----------------- #
9
+ # USM the ground-truth
10
+ l1_gt_usm: True
11
+ percep_gt_usm: True
12
+ gan_gt_usm: False
13
+
14
+ # the first degradation process
15
+ resize_prob: [0.2, 0.7, 0.1] # up, down, keep
16
+ resize_range: [0.15, 1.5]
17
+ gaussian_noise_prob: 0.5
18
+ noise_range: [1, 30]
19
+ poisson_scale_range: [0.05, 3]
20
+ gray_noise_prob: 0.4
21
+ jpeg_range: [30, 95]
22
+
23
+ # the second degradation process
24
+ second_blur_prob: 0.8
25
+ resize_prob2: [0.3, 0.4, 0.3] # up, down, keep
26
+ resize_range2: [0.3, 1.2]
27
+ gaussian_noise_prob2: 0.5
28
+ noise_range2: [1, 25]
29
+ poisson_scale_range2: [0.05, 2.5]
30
+ gray_noise_prob2: 0.4
31
+ jpeg_range2: [30, 95]
32
+
33
+ gt_size: 256
34
+ queue_size: 180
35
+
36
+ # dataset and data loader settings
37
+ datasets:
38
+ train:
39
+ name: DF2K+OST
40
+ type: RealESRGANDataset
41
+ dataroot_gt: datasets/DF2K
42
+ meta_info: datasets/DF2K/meta_info/meta_info_DF2Kmultiscale+OST_sub.txt
43
+ io_backend:
44
+ type: disk
45
+
46
+ blur_kernel_size: 21
47
+ kernel_list: ['iso', 'aniso', 'generalized_iso', 'generalized_aniso', 'plateau_iso', 'plateau_aniso']
48
+ kernel_prob: [0.45, 0.25, 0.12, 0.03, 0.12, 0.03]
49
+ sinc_prob: 0.1
50
+ blur_sigma: [0.2, 3]
51
+ betag_range: [0.5, 4]
52
+ betap_range: [1, 2]
53
+
54
+ blur_kernel_size2: 21
55
+ kernel_list2: ['iso', 'aniso', 'generalized_iso', 'generalized_aniso', 'plateau_iso', 'plateau_aniso']
56
+ kernel_prob2: [0.45, 0.25, 0.12, 0.03, 0.12, 0.03]
57
+ sinc_prob2: 0.1
58
+ blur_sigma2: [0.2, 1.5]
59
+ betag_range2: [0.5, 4]
60
+ betap_range2: [1, 2]
61
+
62
+ final_sinc_prob: 0.8
63
+
64
+ gt_size: 256
65
+ use_hflip: True
66
+ use_rot: False
67
+
68
+ # data loader
69
+ use_shuffle: true
70
+ num_worker_per_gpu: 5
71
+ batch_size_per_gpu: 12
72
+ dataset_enlarge_ratio: 1
73
+ prefetch_mode: ~
74
+
75
+ # Uncomment these for validation
76
+ # val:
77
+ # name: validation
78
+ # type: PairedImageDataset
79
+ # dataroot_gt: path_to_gt
80
+ # dataroot_lq: path_to_lq
81
+ # io_backend:
82
+ # type: disk
83
+
84
+ # network structures
85
+ network_g:
86
+ type: RRDBNet
87
+ num_in_ch: 3
88
+ num_out_ch: 3
89
+ num_feat: 64
90
+ num_block: 23
91
+ num_grow_ch: 32
92
+
93
+ network_d:
94
+ type: UNetDiscriminatorSN
95
+ num_in_ch: 3
96
+ num_feat: 64
97
+ skip_connection: True
98
+
99
+ # path
100
+ path:
101
+ # use the pre-trained Real-ESRNet model
102
+ pretrain_network_g: experiments/pretrained_models/RealESRNet_x4plus.pth
103
+ param_key_g: params_ema
104
+ strict_load_g: true
105
+ pretrain_network_d: experiments/pretrained_models/RealESRGAN_x4plus_netD.pth
106
+ param_key_d: params
107
+ strict_load_d: true
108
+ resume_state: ~
109
+
110
+ # training settings
111
+ train:
112
+ ema_decay: 0.999
113
+ optim_g:
114
+ type: Adam
115
+ lr: !!float 1e-4
116
+ weight_decay: 0
117
+ betas: [0.9, 0.99]
118
+ optim_d:
119
+ type: Adam
120
+ lr: !!float 1e-4
121
+ weight_decay: 0
122
+ betas: [0.9, 0.99]
123
+
124
+ scheduler:
125
+ type: MultiStepLR
126
+ milestones: [400000]
127
+ gamma: 0.5
128
+
129
+ total_iter: 400000
130
+ warmup_iter: -1 # no warm up
131
+
132
+ # losses
133
+ pixel_opt:
134
+ type: L1Loss
135
+ loss_weight: 1.0
136
+ reduction: mean
137
+ # perceptual loss (content and style losses)
138
+ perceptual_opt:
139
+ type: PerceptualLoss
140
+ layer_weights:
141
+ # before relu
142
+ 'conv1_2': 0.1
143
+ 'conv2_2': 0.1
144
+ 'conv3_4': 1
145
+ 'conv4_4': 1
146
+ 'conv5_4': 1
147
+ vgg_type: vgg19
148
+ use_input_norm: true
149
+ perceptual_weight: !!float 1.0
150
+ style_weight: 0
151
+ range_norm: false
152
+ criterion: l1
153
+ # gan loss
154
+ gan_opt:
155
+ type: GANLoss
156
+ gan_type: vanilla
157
+ real_label_val: 1.0
158
+ fake_label_val: 0.0
159
+ loss_weight: !!float 1e-1
160
+
161
+ net_d_iters: 1
162
+ net_d_init_iters: 0
163
+
164
+ # Uncomment these for validation
165
+ # validation settings
166
+ # val:
167
+ # val_freq: !!float 5e3
168
+ # save_img: True
169
+
170
+ # metrics:
171
+ # psnr: # metric name
172
+ # type: calculate_psnr
173
+ # crop_border: 4
174
+ # test_y_channel: false
175
+
176
+ # logging settings
177
+ logger:
178
+ print_freq: 100
179
+ save_checkpoint_freq: !!float 5e3
180
+ use_tb_logger: true
181
+ wandb:
182
+ project: ~
183
+ resume_id: ~
184
+
185
+ # dist training settings
186
+ dist_params:
187
+ backend: nccl
188
+ port: 29500
Real-ESRGAN/options/finetune_realesrgan_x4plus_pairdata.yml ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # general settings
2
+ name: finetune_RealESRGANx4plus_400k_pairdata
3
+ model_type: RealESRGANModel
4
+ scale: 4
5
+ num_gpu: auto
6
+ manual_seed: 0
7
+
8
+ # USM the ground-truth
9
+ l1_gt_usm: True
10
+ percep_gt_usm: True
11
+ gan_gt_usm: False
12
+
13
+ high_order_degradation: False # do not use the high-order degradation generation process
14
+
15
+ # dataset and data loader settings
16
+ datasets:
17
+ train:
18
+ name: DIV2K
19
+ type: RealESRGANPairedDataset
20
+ dataroot_gt: datasets/DF2K
21
+ dataroot_lq: datasets/DF2K
22
+ meta_info: datasets/DF2K/meta_info/meta_info_DIV2K_sub_pair.txt
23
+ io_backend:
24
+ type: disk
25
+
26
+ gt_size: 256
27
+ use_hflip: True
28
+ use_rot: False
29
+
30
+ # data loader
31
+ use_shuffle: true
32
+ num_worker_per_gpu: 5
33
+ batch_size_per_gpu: 12
34
+ dataset_enlarge_ratio: 1
35
+ prefetch_mode: ~
36
+
37
+ # Uncomment these for validation
38
+ # val:
39
+ # name: validation
40
+ # type: PairedImageDataset
41
+ # dataroot_gt: path_to_gt
42
+ # dataroot_lq: path_to_lq
43
+ # io_backend:
44
+ # type: disk
45
+
46
+ # network structures
47
+ network_g:
48
+ type: RRDBNet
49
+ num_in_ch: 3
50
+ num_out_ch: 3
51
+ num_feat: 64
52
+ num_block: 23
53
+ num_grow_ch: 32
54
+
55
+ network_d:
56
+ type: UNetDiscriminatorSN
57
+ num_in_ch: 3
58
+ num_feat: 64
59
+ skip_connection: True
60
+
61
+ # path
62
+ path:
63
+ # use the pre-trained Real-ESRNet model
64
+ pretrain_network_g: experiments/pretrained_models/RealESRNet_x4plus.pth
65
+ param_key_g: params_ema
66
+ strict_load_g: true
67
+ pretrain_network_d: experiments/pretrained_models/RealESRGAN_x4plus_netD.pth
68
+ param_key_d: params
69
+ strict_load_d: true
70
+ resume_state: ~
71
+
72
+ # training settings
73
+ train:
74
+ ema_decay: 0.999
75
+ optim_g:
76
+ type: Adam
77
+ lr: !!float 1e-4
78
+ weight_decay: 0
79
+ betas: [0.9, 0.99]
80
+ optim_d:
81
+ type: Adam
82
+ lr: !!float 1e-4
83
+ weight_decay: 0
84
+ betas: [0.9, 0.99]
85
+
86
+ scheduler:
87
+ type: MultiStepLR
88
+ milestones: [400000]
89
+ gamma: 0.5
90
+
91
+ total_iter: 400000
92
+ warmup_iter: -1 # no warm up
93
+
94
+ # losses
95
+ pixel_opt:
96
+ type: L1Loss
97
+ loss_weight: 1.0
98
+ reduction: mean
99
+ # perceptual loss (content and style losses)
100
+ perceptual_opt:
101
+ type: PerceptualLoss
102
+ layer_weights:
103
+ # before relu
104
+ 'conv1_2': 0.1
105
+ 'conv2_2': 0.1
106
+ 'conv3_4': 1
107
+ 'conv4_4': 1
108
+ 'conv5_4': 1
109
+ vgg_type: vgg19
110
+ use_input_norm: true
111
+ perceptual_weight: !!float 1.0
112
+ style_weight: 0
113
+ range_norm: false
114
+ criterion: l1
115
+ # gan loss
116
+ gan_opt:
117
+ type: GANLoss
118
+ gan_type: vanilla
119
+ real_label_val: 1.0
120
+ fake_label_val: 0.0
121
+ loss_weight: !!float 1e-1
122
+
123
+ net_d_iters: 1
124
+ net_d_init_iters: 0
125
+
126
+ # Uncomment these for validation
127
+ # validation settings
128
+ # val:
129
+ # val_freq: !!float 5e3
130
+ # save_img: True
131
+
132
+ # metrics:
133
+ # psnr: # metric name
134
+ # type: calculate_psnr
135
+ # crop_border: 4
136
+ # test_y_channel: false
137
+
138
+ # logging settings
139
+ logger:
140
+ print_freq: 100
141
+ save_checkpoint_freq: !!float 5e3
142
+ use_tb_logger: true
143
+ wandb:
144
+ project: ~
145
+ resume_id: ~
146
+
147
+ # dist training settings
148
+ dist_params:
149
+ backend: nccl
150
+ port: 29500
Real-ESRGAN/options/train_realesrgan_x2plus.yml ADDED
@@ -0,0 +1,186 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # general settings
2
+ name: train_RealESRGANx2plus_400k_B12G4
3
+ model_type: RealESRGANModel
4
+ scale: 2
5
+ num_gpu: auto # auto: can infer from your visible devices automatically. official: 4 GPUs
6
+ manual_seed: 0
7
+
8
+ # ----------------- options for synthesizing training data in RealESRGANModel ----------------- #
9
+ # USM the ground-truth
10
+ l1_gt_usm: True
11
+ percep_gt_usm: True
12
+ gan_gt_usm: False
13
+
14
+ # the first degradation process
15
+ resize_prob: [0.2, 0.7, 0.1] # up, down, keep
16
+ resize_range: [0.15, 1.5]
17
+ gaussian_noise_prob: 0.5
18
+ noise_range: [1, 30]
19
+ poisson_scale_range: [0.05, 3]
20
+ gray_noise_prob: 0.4
21
+ jpeg_range: [30, 95]
22
+
23
+ # the second degradation process
24
+ second_blur_prob: 0.8
25
+ resize_prob2: [0.3, 0.4, 0.3] # up, down, keep
26
+ resize_range2: [0.3, 1.2]
27
+ gaussian_noise_prob2: 0.5
28
+ noise_range2: [1, 25]
29
+ poisson_scale_range2: [0.05, 2.5]
30
+ gray_noise_prob2: 0.4
31
+ jpeg_range2: [30, 95]
32
+
33
+ gt_size: 256
34
+ queue_size: 180
35
+
36
+ # dataset and data loader settings
37
+ datasets:
38
+ train:
39
+ name: DF2K+OST
40
+ type: RealESRGANDataset
41
+ dataroot_gt: datasets/DF2K
42
+ meta_info: datasets/DF2K/meta_info/meta_info_DF2Kmultiscale+OST_sub.txt
43
+ io_backend:
44
+ type: disk
45
+
46
+ blur_kernel_size: 21
47
+ kernel_list: ['iso', 'aniso', 'generalized_iso', 'generalized_aniso', 'plateau_iso', 'plateau_aniso']
48
+ kernel_prob: [0.45, 0.25, 0.12, 0.03, 0.12, 0.03]
49
+ sinc_prob: 0.1
50
+ blur_sigma: [0.2, 3]
51
+ betag_range: [0.5, 4]
52
+ betap_range: [1, 2]
53
+
54
+ blur_kernel_size2: 21
55
+ kernel_list2: ['iso', 'aniso', 'generalized_iso', 'generalized_aniso', 'plateau_iso', 'plateau_aniso']
56
+ kernel_prob2: [0.45, 0.25, 0.12, 0.03, 0.12, 0.03]
57
+ sinc_prob2: 0.1
58
+ blur_sigma2: [0.2, 1.5]
59
+ betag_range2: [0.5, 4]
60
+ betap_range2: [1, 2]
61
+
62
+ final_sinc_prob: 0.8
63
+
64
+ gt_size: 256
65
+ use_hflip: True
66
+ use_rot: False
67
+
68
+ # data loader
69
+ use_shuffle: true
70
+ num_worker_per_gpu: 5
71
+ batch_size_per_gpu: 12
72
+ dataset_enlarge_ratio: 1
73
+ prefetch_mode: ~
74
+
75
+ # Uncomment these for validation
76
+ # val:
77
+ # name: validation
78
+ # type: PairedImageDataset
79
+ # dataroot_gt: path_to_gt
80
+ # dataroot_lq: path_to_lq
81
+ # io_backend:
82
+ # type: disk
83
+
84
+ # network structures
85
+ network_g:
86
+ type: RRDBNet
87
+ num_in_ch: 3
88
+ num_out_ch: 3
89
+ num_feat: 64
90
+ num_block: 23
91
+ num_grow_ch: 32
92
+ scale: 2
93
+
94
+ network_d:
95
+ type: UNetDiscriminatorSN
96
+ num_in_ch: 3
97
+ num_feat: 64
98
+ skip_connection: True
99
+
100
+ # path
101
+ path:
102
+ # use the pre-trained Real-ESRNet model
103
+ pretrain_network_g: experiments/pretrained_models/RealESRNet_x2plus.pth
104
+ param_key_g: params_ema
105
+ strict_load_g: true
106
+ resume_state: ~
107
+
108
+ # training settings
109
+ train:
110
+ ema_decay: 0.999
111
+ optim_g:
112
+ type: Adam
113
+ lr: !!float 1e-4
114
+ weight_decay: 0
115
+ betas: [0.9, 0.99]
116
+ optim_d:
117
+ type: Adam
118
+ lr: !!float 1e-4
119
+ weight_decay: 0
120
+ betas: [0.9, 0.99]
121
+
122
+ scheduler:
123
+ type: MultiStepLR
124
+ milestones: [400000]
125
+ gamma: 0.5
126
+
127
+ total_iter: 400000
128
+ warmup_iter: -1 # no warm up
129
+
130
+ # losses
131
+ pixel_opt:
132
+ type: L1Loss
133
+ loss_weight: 1.0
134
+ reduction: mean
135
+ # perceptual loss (content and style losses)
136
+ perceptual_opt:
137
+ type: PerceptualLoss
138
+ layer_weights:
139
+ # before relu
140
+ 'conv1_2': 0.1
141
+ 'conv2_2': 0.1
142
+ 'conv3_4': 1
143
+ 'conv4_4': 1
144
+ 'conv5_4': 1
145
+ vgg_type: vgg19
146
+ use_input_norm: true
147
+ perceptual_weight: !!float 1.0
148
+ style_weight: 0
149
+ range_norm: false
150
+ criterion: l1
151
+ # gan loss
152
+ gan_opt:
153
+ type: GANLoss
154
+ gan_type: vanilla
155
+ real_label_val: 1.0
156
+ fake_label_val: 0.0
157
+ loss_weight: !!float 1e-1
158
+
159
+ net_d_iters: 1
160
+ net_d_init_iters: 0
161
+
162
+ # Uncomment these for validation
163
+ # validation settings
164
+ # val:
165
+ # val_freq: !!float 5e3
166
+ # save_img: True
167
+
168
+ # metrics:
169
+ # psnr: # metric name
170
+ # type: calculate_psnr
171
+ # crop_border: 4
172
+ # test_y_channel: false
173
+
174
+ # logging settings
175
+ logger:
176
+ print_freq: 100
177
+ save_checkpoint_freq: !!float 5e3
178
+ use_tb_logger: true
179
+ wandb:
180
+ project: ~
181
+ resume_id: ~
182
+
183
+ # dist training settings
184
+ dist_params:
185
+ backend: nccl
186
+ port: 29500
Real-ESRGAN/options/train_realesrgan_x4plus.yml ADDED
@@ -0,0 +1,185 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # general settings
2
+ name: train_RealESRGANx4plus_400k_B12G4
3
+ model_type: RealESRGANModel
4
+ scale: 4
5
+ num_gpu: auto # auto: can infer from your visible devices automatically. official: 4 GPUs
6
+ manual_seed: 0
7
+
8
+ # ----------------- options for synthesizing training data in RealESRGANModel ----------------- #
9
+ # USM the ground-truth
10
+ l1_gt_usm: True
11
+ percep_gt_usm: True
12
+ gan_gt_usm: False
13
+
14
+ # the first degradation process
15
+ resize_prob: [0.2, 0.7, 0.1] # up, down, keep
16
+ resize_range: [0.15, 1.5]
17
+ gaussian_noise_prob: 0.5
18
+ noise_range: [1, 30]
19
+ poisson_scale_range: [0.05, 3]
20
+ gray_noise_prob: 0.4
21
+ jpeg_range: [30, 95]
22
+
23
+ # the second degradation process
24
+ second_blur_prob: 0.8
25
+ resize_prob2: [0.3, 0.4, 0.3] # up, down, keep
26
+ resize_range2: [0.3, 1.2]
27
+ gaussian_noise_prob2: 0.5
28
+ noise_range2: [1, 25]
29
+ poisson_scale_range2: [0.05, 2.5]
30
+ gray_noise_prob2: 0.4
31
+ jpeg_range2: [30, 95]
32
+
33
+ gt_size: 256
34
+ queue_size: 180
35
+
36
+ # dataset and data loader settings
37
+ datasets:
38
+ train:
39
+ name: DF2K+OST
40
+ type: RealESRGANDataset
41
+ dataroot_gt: datasets/DF2K
42
+ meta_info: datasets/DF2K/meta_info/meta_info_DF2Kmultiscale+OST_sub.txt
43
+ io_backend:
44
+ type: disk
45
+
46
+ blur_kernel_size: 21
47
+ kernel_list: ['iso', 'aniso', 'generalized_iso', 'generalized_aniso', 'plateau_iso', 'plateau_aniso']
48
+ kernel_prob: [0.45, 0.25, 0.12, 0.03, 0.12, 0.03]
49
+ sinc_prob: 0.1
50
+ blur_sigma: [0.2, 3]
51
+ betag_range: [0.5, 4]
52
+ betap_range: [1, 2]
53
+
54
+ blur_kernel_size2: 21
55
+ kernel_list2: ['iso', 'aniso', 'generalized_iso', 'generalized_aniso', 'plateau_iso', 'plateau_aniso']
56
+ kernel_prob2: [0.45, 0.25, 0.12, 0.03, 0.12, 0.03]
57
+ sinc_prob2: 0.1
58
+ blur_sigma2: [0.2, 1.5]
59
+ betag_range2: [0.5, 4]
60
+ betap_range2: [1, 2]
61
+
62
+ final_sinc_prob: 0.8
63
+
64
+ gt_size: 256
65
+ use_hflip: True
66
+ use_rot: False
67
+
68
+ # data loader
69
+ use_shuffle: true
70
+ num_worker_per_gpu: 5
71
+ batch_size_per_gpu: 12
72
+ dataset_enlarge_ratio: 1
73
+ prefetch_mode: ~
74
+
75
+ # Uncomment these for validation
76
+ # val:
77
+ # name: validation
78
+ # type: PairedImageDataset
79
+ # dataroot_gt: path_to_gt
80
+ # dataroot_lq: path_to_lq
81
+ # io_backend:
82
+ # type: disk
83
+
84
+ # network structures
85
+ network_g:
86
+ type: RRDBNet
87
+ num_in_ch: 3
88
+ num_out_ch: 3
89
+ num_feat: 64
90
+ num_block: 23
91
+ num_grow_ch: 32
92
+
93
+ network_d:
94
+ type: UNetDiscriminatorSN
95
+ num_in_ch: 3
96
+ num_feat: 64
97
+ skip_connection: True
98
+
99
+ # path
100
+ path:
101
+ # use the pre-trained Real-ESRNet model
102
+ pretrain_network_g: experiments/pretrained_models/RealESRNet_x4plus.pth
103
+ param_key_g: params_ema
104
+ strict_load_g: true
105
+ resume_state: ~
106
+
107
+ # training settings
108
+ train:
109
+ ema_decay: 0.999
110
+ optim_g:
111
+ type: Adam
112
+ lr: !!float 1e-4
113
+ weight_decay: 0
114
+ betas: [0.9, 0.99]
115
+ optim_d:
116
+ type: Adam
117
+ lr: !!float 1e-4
118
+ weight_decay: 0
119
+ betas: [0.9, 0.99]
120
+
121
+ scheduler:
122
+ type: MultiStepLR
123
+ milestones: [400000]
124
+ gamma: 0.5
125
+
126
+ total_iter: 400000
127
+ warmup_iter: -1 # no warm up
128
+
129
+ # losses
130
+ pixel_opt:
131
+ type: L1Loss
132
+ loss_weight: 1.0
133
+ reduction: mean
134
+ # perceptual loss (content and style losses)
135
+ perceptual_opt:
136
+ type: PerceptualLoss
137
+ layer_weights:
138
+ # before relu
139
+ 'conv1_2': 0.1
140
+ 'conv2_2': 0.1
141
+ 'conv3_4': 1
142
+ 'conv4_4': 1
143
+ 'conv5_4': 1
144
+ vgg_type: vgg19
145
+ use_input_norm: true
146
+ perceptual_weight: !!float 1.0
147
+ style_weight: 0
148
+ range_norm: false
149
+ criterion: l1
150
+ # gan loss
151
+ gan_opt:
152
+ type: GANLoss
153
+ gan_type: vanilla
154
+ real_label_val: 1.0
155
+ fake_label_val: 0.0
156
+ loss_weight: !!float 1e-1
157
+
158
+ net_d_iters: 1
159
+ net_d_init_iters: 0
160
+
161
+ # Uncomment these for validation
162
+ # validation settings
163
+ # val:
164
+ # val_freq: !!float 5e3
165
+ # save_img: True
166
+
167
+ # metrics:
168
+ # psnr: # metric name
169
+ # type: calculate_psnr
170
+ # crop_border: 4
171
+ # test_y_channel: false
172
+
173
+ # logging settings
174
+ logger:
175
+ print_freq: 100
176
+ save_checkpoint_freq: !!float 5e3
177
+ use_tb_logger: true
178
+ wandb:
179
+ project: ~
180
+ resume_id: ~
181
+
182
+ # dist training settings
183
+ dist_params:
184
+ backend: nccl
185
+ port: 29500
Real-ESRGAN/options/train_realesrnet_x2plus.yml ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # general settings
2
+ name: train_RealESRNetx2plus_1000k_B12G4
3
+ model_type: RealESRNetModel
4
+ scale: 2
5
+ num_gpu: auto # auto: can infer from your visible devices automatically. official: 4 GPUs
6
+ manual_seed: 0
7
+
8
+ # ----------------- options for synthesizing training data in RealESRNetModel ----------------- #
9
+ gt_usm: True # USM the ground-truth
10
+
11
+ # the first degradation process
12
+ resize_prob: [0.2, 0.7, 0.1] # up, down, keep
13
+ resize_range: [0.15, 1.5]
14
+ gaussian_noise_prob: 0.5
15
+ noise_range: [1, 30]
16
+ poisson_scale_range: [0.05, 3]
17
+ gray_noise_prob: 0.4
18
+ jpeg_range: [30, 95]
19
+
20
+ # the second degradation process
21
+ second_blur_prob: 0.8
22
+ resize_prob2: [0.3, 0.4, 0.3] # up, down, keep
23
+ resize_range2: [0.3, 1.2]
24
+ gaussian_noise_prob2: 0.5
25
+ noise_range2: [1, 25]
26
+ poisson_scale_range2: [0.05, 2.5]
27
+ gray_noise_prob2: 0.4
28
+ jpeg_range2: [30, 95]
29
+
30
+ gt_size: 256
31
+ queue_size: 180
32
+
33
+ # dataset and data loader settings
34
+ datasets:
35
+ train:
36
+ name: DF2K+OST
37
+ type: RealESRGANDataset
38
+ dataroot_gt: datasets/DF2K
39
+ meta_info: datasets/DF2K/meta_info/meta_info_DF2Kmultiscale+OST_sub.txt
40
+ io_backend:
41
+ type: disk
42
+
43
+ blur_kernel_size: 21
44
+ kernel_list: ['iso', 'aniso', 'generalized_iso', 'generalized_aniso', 'plateau_iso', 'plateau_aniso']
45
+ kernel_prob: [0.45, 0.25, 0.12, 0.03, 0.12, 0.03]
46
+ sinc_prob: 0.1
47
+ blur_sigma: [0.2, 3]
48
+ betag_range: [0.5, 4]
49
+ betap_range: [1, 2]
50
+
51
+ blur_kernel_size2: 21
52
+ kernel_list2: ['iso', 'aniso', 'generalized_iso', 'generalized_aniso', 'plateau_iso', 'plateau_aniso']
53
+ kernel_prob2: [0.45, 0.25, 0.12, 0.03, 0.12, 0.03]
54
+ sinc_prob2: 0.1
55
+ blur_sigma2: [0.2, 1.5]
56
+ betag_range2: [0.5, 4]
57
+ betap_range2: [1, 2]
58
+
59
+ final_sinc_prob: 0.8
60
+
61
+ gt_size: 256
62
+ use_hflip: True
63
+ use_rot: False
64
+
65
+ # data loader
66
+ use_shuffle: true
67
+ num_worker_per_gpu: 5
68
+ batch_size_per_gpu: 12
69
+ dataset_enlarge_ratio: 1
70
+ prefetch_mode: ~
71
+
72
+ # Uncomment these for validation
73
+ # val:
74
+ # name: validation
75
+ # type: PairedImageDataset
76
+ # dataroot_gt: path_to_gt
77
+ # dataroot_lq: path_to_lq
78
+ # io_backend:
79
+ # type: disk
80
+
81
+ # network structures
82
+ network_g:
83
+ type: RRDBNet
84
+ num_in_ch: 3
85
+ num_out_ch: 3
86
+ num_feat: 64
87
+ num_block: 23
88
+ num_grow_ch: 32
89
+ scale: 2
90
+
91
+ # path
92
+ path:
93
+ pretrain_network_g: experiments/pretrained_models/RealESRGAN_x4plus.pth
94
+ param_key_g: params_ema
95
+ strict_load_g: False
96
+ resume_state: ~
97
+
98
+ # training settings
99
+ train:
100
+ ema_decay: 0.999
101
+ optim_g:
102
+ type: Adam
103
+ lr: !!float 2e-4
104
+ weight_decay: 0
105
+ betas: [0.9, 0.99]
106
+
107
+ scheduler:
108
+ type: MultiStepLR
109
+ milestones: [1000000]
110
+ gamma: 0.5
111
+
112
+ total_iter: 1000000
113
+ warmup_iter: -1 # no warm up
114
+
115
+ # losses
116
+ pixel_opt:
117
+ type: L1Loss
118
+ loss_weight: 1.0
119
+ reduction: mean
120
+
121
+ # Uncomment these for validation
122
+ # validation settings
123
+ # val:
124
+ # val_freq: !!float 5e3
125
+ # save_img: True
126
+
127
+ # metrics:
128
+ # psnr: # metric name
129
+ # type: calculate_psnr
130
+ # crop_border: 4
131
+ # test_y_channel: false
132
+
133
+ # logging settings
134
+ logger:
135
+ print_freq: 100
136
+ save_checkpoint_freq: !!float 5e3
137
+ use_tb_logger: true
138
+ wandb:
139
+ project: ~
140
+ resume_id: ~
141
+
142
+ # dist training settings
143
+ dist_params:
144
+ backend: nccl
145
+ port: 29500
Real-ESRGAN/options/train_realesrnet_x4plus.yml ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # general settings
2
+ name: train_RealESRNetx4plus_1000k_B12G4
3
+ model_type: RealESRNetModel
4
+ scale: 4
5
+ num_gpu: auto # auto: can infer from your visible devices automatically. official: 4 GPUs
6
+ manual_seed: 0
7
+
8
+ # ----------------- options for synthesizing training data in RealESRNetModel ----------------- #
9
+ gt_usm: True # USM the ground-truth
10
+
11
+ # the first degradation process
12
+ resize_prob: [0.2, 0.7, 0.1] # up, down, keep
13
+ resize_range: [0.15, 1.5]
14
+ gaussian_noise_prob: 0.5
15
+ noise_range: [1, 30]
16
+ poisson_scale_range: [0.05, 3]
17
+ gray_noise_prob: 0.4
18
+ jpeg_range: [30, 95]
19
+
20
+ # the second degradation process
21
+ second_blur_prob: 0.8
22
+ resize_prob2: [0.3, 0.4, 0.3] # up, down, keep
23
+ resize_range2: [0.3, 1.2]
24
+ gaussian_noise_prob2: 0.5
25
+ noise_range2: [1, 25]
26
+ poisson_scale_range2: [0.05, 2.5]
27
+ gray_noise_prob2: 0.4
28
+ jpeg_range2: [30, 95]
29
+
30
+ gt_size: 256
31
+ queue_size: 180
32
+
33
+ # dataset and data loader settings
34
+ datasets:
35
+ train:
36
+ name: DF2K+OST
37
+ type: RealESRGANDataset
38
+ dataroot_gt: datasets/DF2K
39
+ meta_info: datasets/DF2K/meta_info/meta_info_DF2Kmultiscale+OST_sub.txt
40
+ io_backend:
41
+ type: disk
42
+
43
+ blur_kernel_size: 21
44
+ kernel_list: ['iso', 'aniso', 'generalized_iso', 'generalized_aniso', 'plateau_iso', 'plateau_aniso']
45
+ kernel_prob: [0.45, 0.25, 0.12, 0.03, 0.12, 0.03]
46
+ sinc_prob: 0.1
47
+ blur_sigma: [0.2, 3]
48
+ betag_range: [0.5, 4]
49
+ betap_range: [1, 2]
50
+
51
+ blur_kernel_size2: 21
52
+ kernel_list2: ['iso', 'aniso', 'generalized_iso', 'generalized_aniso', 'plateau_iso', 'plateau_aniso']
53
+ kernel_prob2: [0.45, 0.25, 0.12, 0.03, 0.12, 0.03]
54
+ sinc_prob2: 0.1
55
+ blur_sigma2: [0.2, 1.5]
56
+ betag_range2: [0.5, 4]
57
+ betap_range2: [1, 2]
58
+
59
+ final_sinc_prob: 0.8
60
+
61
+ gt_size: 256
62
+ use_hflip: True
63
+ use_rot: False
64
+
65
+ # data loader
66
+ use_shuffle: true
67
+ num_worker_per_gpu: 5
68
+ batch_size_per_gpu: 12
69
+ dataset_enlarge_ratio: 1
70
+ prefetch_mode: ~
71
+
72
+ # Uncomment these for validation
73
+ # val:
74
+ # name: validation
75
+ # type: PairedImageDataset
76
+ # dataroot_gt: path_to_gt
77
+ # dataroot_lq: path_to_lq
78
+ # io_backend:
79
+ # type: disk
80
+
81
+ # network structures
82
+ network_g:
83
+ type: RRDBNet
84
+ num_in_ch: 3
85
+ num_out_ch: 3
86
+ num_feat: 64
87
+ num_block: 23
88
+ num_grow_ch: 32
89
+
90
+ # path
91
+ path:
92
+ pretrain_network_g: experiments/pretrained_models/ESRGAN_SRx4_DF2KOST_official-ff704c30.pth
93
+ param_key_g: params_ema
94
+ strict_load_g: true
95
+ resume_state: ~
96
+
97
+ # training settings
98
+ train:
99
+ ema_decay: 0.999
100
+ optim_g:
101
+ type: Adam
102
+ lr: !!float 2e-4
103
+ weight_decay: 0
104
+ betas: [0.9, 0.99]
105
+
106
+ scheduler:
107
+ type: MultiStepLR
108
+ milestones: [1000000]
109
+ gamma: 0.5
110
+
111
+ total_iter: 1000000
112
+ warmup_iter: -1 # no warm up
113
+
114
+ # losses
115
+ pixel_opt:
116
+ type: L1Loss
117
+ loss_weight: 1.0
118
+ reduction: mean
119
+
120
+ # Uncomment these for validation
121
+ # validation settings
122
+ # val:
123
+ # val_freq: !!float 5e3
124
+ # save_img: True
125
+
126
+ # metrics:
127
+ # psnr: # metric name
128
+ # type: calculate_psnr
129
+ # crop_border: 4
130
+ # test_y_channel: false
131
+
132
+ # logging settings
133
+ logger:
134
+ print_freq: 100
135
+ save_checkpoint_freq: !!float 5e3
136
+ use_tb_logger: true
137
+ wandb:
138
+ project: ~
139
+ resume_id: ~
140
+
141
+ # dist training settings
142
+ dist_params:
143
+ backend: nccl
144
+ port: 29500
Real-ESRGAN/realesrgan/__init__.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ # flake8: noqa
2
+ from .archs import *
3
+ from .data import *
4
+ from .models import *
5
+ from .utils import *
6
+ from .version import *
Real-ESRGAN/realesrgan/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (327 Bytes). View file
 
Real-ESRGAN/realesrgan/__pycache__/utils.cpython-311.pyc ADDED
Binary file (17.9 kB). View file
 
Real-ESRGAN/realesrgan/__pycache__/version.cpython-311.pyc ADDED
Binary file (278 Bytes). View file
 
Real-ESRGAN/realesrgan/archs/__init__.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ import importlib
2
+ from basicsr.utils import scandir
3
+ from os import path as osp
4
+
5
+ # automatically scan and import arch modules for registry
6
+ # scan all the files that end with '_arch.py' under the archs folder
7
+ arch_folder = osp.dirname(osp.abspath(__file__))
8
+ arch_filenames = [osp.splitext(osp.basename(v))[0] for v in scandir(arch_folder) if v.endswith('_arch.py')]
9
+ # import all the arch modules
10
+ _arch_modules = [importlib.import_module(f'realesrgan.archs.{file_name}') for file_name in arch_filenames]
Real-ESRGAN/realesrgan/archs/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (1.18 kB). View file
 
Real-ESRGAN/realesrgan/archs/__pycache__/discriminator_arch.cpython-311.pyc ADDED
Binary file (4.89 kB). View file
 
Real-ESRGAN/realesrgan/archs/__pycache__/srvgg_arch.cpython-311.pyc ADDED
Binary file (4.1 kB). View file
 
Real-ESRGAN/realesrgan/archs/discriminator_arch.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from basicsr.utils.registry import ARCH_REGISTRY
2
+ from torch import nn as nn
3
+ from torch.nn import functional as F
4
+ from torch.nn.utils import spectral_norm
5
+
6
+
7
+ @ARCH_REGISTRY.register()
8
+ class UNetDiscriminatorSN(nn.Module):
9
+ """Defines a U-Net discriminator with spectral normalization (SN)
10
+
11
+ It is used in Real-ESRGAN: Training Real-World Blind Super-Resolution with Pure Synthetic Data.
12
+
13
+ Arg:
14
+ num_in_ch (int): Channel number of inputs. Default: 3.
15
+ num_feat (int): Channel number of base intermediate features. Default: 64.
16
+ skip_connection (bool): Whether to use skip connections between U-Net. Default: True.
17
+ """
18
+
19
+ def __init__(self, num_in_ch, num_feat=64, skip_connection=True):
20
+ super(UNetDiscriminatorSN, self).__init__()
21
+ self.skip_connection = skip_connection
22
+ norm = spectral_norm
23
+ # the first convolution
24
+ self.conv0 = nn.Conv2d(num_in_ch, num_feat, kernel_size=3, stride=1, padding=1)
25
+ # downsample
26
+ self.conv1 = norm(nn.Conv2d(num_feat, num_feat * 2, 4, 2, 1, bias=False))
27
+ self.conv2 = norm(nn.Conv2d(num_feat * 2, num_feat * 4, 4, 2, 1, bias=False))
28
+ self.conv3 = norm(nn.Conv2d(num_feat * 4, num_feat * 8, 4, 2, 1, bias=False))
29
+ # upsample
30
+ self.conv4 = norm(nn.Conv2d(num_feat * 8, num_feat * 4, 3, 1, 1, bias=False))
31
+ self.conv5 = norm(nn.Conv2d(num_feat * 4, num_feat * 2, 3, 1, 1, bias=False))
32
+ self.conv6 = norm(nn.Conv2d(num_feat * 2, num_feat, 3, 1, 1, bias=False))
33
+ # extra convolutions
34
+ self.conv7 = norm(nn.Conv2d(num_feat, num_feat, 3, 1, 1, bias=False))
35
+ self.conv8 = norm(nn.Conv2d(num_feat, num_feat, 3, 1, 1, bias=False))
36
+ self.conv9 = nn.Conv2d(num_feat, 1, 3, 1, 1)
37
+
38
+ def forward(self, x):
39
+ # downsample
40
+ x0 = F.leaky_relu(self.conv0(x), negative_slope=0.2, inplace=True)
41
+ x1 = F.leaky_relu(self.conv1(x0), negative_slope=0.2, inplace=True)
42
+ x2 = F.leaky_relu(self.conv2(x1), negative_slope=0.2, inplace=True)
43
+ x3 = F.leaky_relu(self.conv3(x2), negative_slope=0.2, inplace=True)
44
+
45
+ # upsample
46
+ x3 = F.interpolate(x3, scale_factor=2, mode='bilinear', align_corners=False)
47
+ x4 = F.leaky_relu(self.conv4(x3), negative_slope=0.2, inplace=True)
48
+
49
+ if self.skip_connection:
50
+ x4 = x4 + x2
51
+ x4 = F.interpolate(x4, scale_factor=2, mode='bilinear', align_corners=False)
52
+ x5 = F.leaky_relu(self.conv5(x4), negative_slope=0.2, inplace=True)
53
+
54
+ if self.skip_connection:
55
+ x5 = x5 + x1
56
+ x5 = F.interpolate(x5, scale_factor=2, mode='bilinear', align_corners=False)
57
+ x6 = F.leaky_relu(self.conv6(x5), negative_slope=0.2, inplace=True)
58
+
59
+ if self.skip_connection:
60
+ x6 = x6 + x0
61
+
62
+ # extra convolutions
63
+ out = F.leaky_relu(self.conv7(x6), negative_slope=0.2, inplace=True)
64
+ out = F.leaky_relu(self.conv8(out), negative_slope=0.2, inplace=True)
65
+ out = self.conv9(out)
66
+
67
+ return out
Real-ESRGAN/realesrgan/archs/srvgg_arch.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from basicsr.utils.registry import ARCH_REGISTRY
2
+ from torch import nn as nn
3
+ from torch.nn import functional as F
4
+
5
+
6
+ @ARCH_REGISTRY.register()
7
+ class SRVGGNetCompact(nn.Module):
8
+ """A compact VGG-style network structure for super-resolution.
9
+
10
+ It is a compact network structure, which performs upsampling in the last layer and no convolution is
11
+ conducted on the HR feature space.
12
+
13
+ Args:
14
+ num_in_ch (int): Channel number of inputs. Default: 3.
15
+ num_out_ch (int): Channel number of outputs. Default: 3.
16
+ num_feat (int): Channel number of intermediate features. Default: 64.
17
+ num_conv (int): Number of convolution layers in the body network. Default: 16.
18
+ upscale (int): Upsampling factor. Default: 4.
19
+ act_type (str): Activation type, options: 'relu', 'prelu', 'leakyrelu'. Default: prelu.
20
+ """
21
+
22
+ def __init__(self, num_in_ch=3, num_out_ch=3, num_feat=64, num_conv=16, upscale=4, act_type='prelu'):
23
+ super(SRVGGNetCompact, self).__init__()
24
+ self.num_in_ch = num_in_ch
25
+ self.num_out_ch = num_out_ch
26
+ self.num_feat = num_feat
27
+ self.num_conv = num_conv
28
+ self.upscale = upscale
29
+ self.act_type = act_type
30
+
31
+ self.body = nn.ModuleList()
32
+ # the first conv
33
+ self.body.append(nn.Conv2d(num_in_ch, num_feat, 3, 1, 1))
34
+ # the first activation
35
+ if act_type == 'relu':
36
+ activation = nn.ReLU(inplace=True)
37
+ elif act_type == 'prelu':
38
+ activation = nn.PReLU(num_parameters=num_feat)
39
+ elif act_type == 'leakyrelu':
40
+ activation = nn.LeakyReLU(negative_slope=0.1, inplace=True)
41
+ self.body.append(activation)
42
+
43
+ # the body structure
44
+ for _ in range(num_conv):
45
+ self.body.append(nn.Conv2d(num_feat, num_feat, 3, 1, 1))
46
+ # activation
47
+ if act_type == 'relu':
48
+ activation = nn.ReLU(inplace=True)
49
+ elif act_type == 'prelu':
50
+ activation = nn.PReLU(num_parameters=num_feat)
51
+ elif act_type == 'leakyrelu':
52
+ activation = nn.LeakyReLU(negative_slope=0.1, inplace=True)
53
+ self.body.append(activation)
54
+
55
+ # the last conv
56
+ self.body.append(nn.Conv2d(num_feat, num_out_ch * upscale * upscale, 3, 1, 1))
57
+ # upsample
58
+ self.upsampler = nn.PixelShuffle(upscale)
59
+
60
+ def forward(self, x):
61
+ out = x
62
+ for i in range(0, len(self.body)):
63
+ out = self.body[i](out)
64
+
65
+ out = self.upsampler(out)
66
+ # add the nearest upsampled image, so that the network learns the residual
67
+ base = F.interpolate(x, scale_factor=self.upscale, mode='nearest')
68
+ out += base
69
+ return out
Real-ESRGAN/realesrgan/data/__init__.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ import importlib
2
+ from basicsr.utils import scandir
3
+ from os import path as osp
4
+
5
+ # automatically scan and import dataset modules for registry
6
+ # scan all the files that end with '_dataset.py' under the data folder
7
+ data_folder = osp.dirname(osp.abspath(__file__))
8
+ dataset_filenames = [osp.splitext(osp.basename(v))[0] for v in scandir(data_folder) if v.endswith('_dataset.py')]
9
+ # import all the dataset modules
10
+ _dataset_modules = [importlib.import_module(f'realesrgan.data.{file_name}') for file_name in dataset_filenames]
Real-ESRGAN/realesrgan/data/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (1.19 kB). View file
 
Real-ESRGAN/realesrgan/data/__pycache__/realesrgan_dataset.cpython-311.pyc ADDED
Binary file (11.4 kB). View file
 
Real-ESRGAN/realesrgan/data/__pycache__/realesrgan_paired_dataset.cpython-311.pyc ADDED
Binary file (6.67 kB). View file
 
Real-ESRGAN/realesrgan/data/realesrgan_dataset.py ADDED
@@ -0,0 +1,192 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import math
3
+ import numpy as np
4
+ import os
5
+ import os.path as osp
6
+ import random
7
+ import time
8
+ import torch
9
+ from basicsr.data.degradations import circular_lowpass_kernel, random_mixed_kernels
10
+ from basicsr.data.transforms import augment
11
+ from basicsr.utils import FileClient, get_root_logger, imfrombytes, img2tensor
12
+ from basicsr.utils.registry import DATASET_REGISTRY
13
+ from torch.utils import data as data
14
+
15
+
16
+ @DATASET_REGISTRY.register()
17
+ class RealESRGANDataset(data.Dataset):
18
+ """Dataset used for Real-ESRGAN model:
19
+ Real-ESRGAN: Training Real-World Blind Super-Resolution with Pure Synthetic Data.
20
+
21
+ It loads gt (Ground-Truth) images, and augments them.
22
+ It also generates blur kernels and sinc kernels for generating low-quality images.
23
+ Note that the low-quality images are processed in tensors on GPUS for faster processing.
24
+
25
+ Args:
26
+ opt (dict): Config for train datasets. It contains the following keys:
27
+ dataroot_gt (str): Data root path for gt.
28
+ meta_info (str): Path for meta information file.
29
+ io_backend (dict): IO backend type and other kwarg.
30
+ use_hflip (bool): Use horizontal flips.
31
+ use_rot (bool): Use rotation (use vertical flip and transposing h and w for implementation).
32
+ Please see more options in the codes.
33
+ """
34
+
35
+ def __init__(self, opt):
36
+ super(RealESRGANDataset, self).__init__()
37
+ self.opt = opt
38
+ self.file_client = None
39
+ self.io_backend_opt = opt['io_backend']
40
+ self.gt_folder = opt['dataroot_gt']
41
+
42
+ # file client (lmdb io backend)
43
+ if self.io_backend_opt['type'] == 'lmdb':
44
+ self.io_backend_opt['db_paths'] = [self.gt_folder]
45
+ self.io_backend_opt['client_keys'] = ['gt']
46
+ if not self.gt_folder.endswith('.lmdb'):
47
+ raise ValueError(f"'dataroot_gt' should end with '.lmdb', but received {self.gt_folder}")
48
+ with open(osp.join(self.gt_folder, 'meta_info.txt')) as fin:
49
+ self.paths = [line.split('.')[0] for line in fin]
50
+ else:
51
+ # disk backend with meta_info
52
+ # Each line in the meta_info describes the relative path to an image
53
+ with open(self.opt['meta_info']) as fin:
54
+ paths = [line.strip().split(' ')[0] for line in fin]
55
+ self.paths = [os.path.join(self.gt_folder, v) for v in paths]
56
+
57
+ # blur settings for the first degradation
58
+ self.blur_kernel_size = opt['blur_kernel_size']
59
+ self.kernel_list = opt['kernel_list']
60
+ self.kernel_prob = opt['kernel_prob'] # a list for each kernel probability
61
+ self.blur_sigma = opt['blur_sigma']
62
+ self.betag_range = opt['betag_range'] # betag used in generalized Gaussian blur kernels
63
+ self.betap_range = opt['betap_range'] # betap used in plateau blur kernels
64
+ self.sinc_prob = opt['sinc_prob'] # the probability for sinc filters
65
+
66
+ # blur settings for the second degradation
67
+ self.blur_kernel_size2 = opt['blur_kernel_size2']
68
+ self.kernel_list2 = opt['kernel_list2']
69
+ self.kernel_prob2 = opt['kernel_prob2']
70
+ self.blur_sigma2 = opt['blur_sigma2']
71
+ self.betag_range2 = opt['betag_range2']
72
+ self.betap_range2 = opt['betap_range2']
73
+ self.sinc_prob2 = opt['sinc_prob2']
74
+
75
+ # a final sinc filter
76
+ self.final_sinc_prob = opt['final_sinc_prob']
77
+
78
+ self.kernel_range = [2 * v + 1 for v in range(3, 11)] # kernel size ranges from 7 to 21
79
+ # TODO: kernel range is now hard-coded, should be in the configure file
80
+ self.pulse_tensor = torch.zeros(21, 21).float() # convolving with pulse tensor brings no blurry effect
81
+ self.pulse_tensor[10, 10] = 1
82
+
83
+ def __getitem__(self, index):
84
+ if self.file_client is None:
85
+ self.file_client = FileClient(self.io_backend_opt.pop('type'), **self.io_backend_opt)
86
+
87
+ # -------------------------------- Load gt images -------------------------------- #
88
+ # Shape: (h, w, c); channel order: BGR; image range: [0, 1], float32.
89
+ gt_path = self.paths[index]
90
+ # avoid errors caused by high latency in reading files
91
+ retry = 3
92
+ while retry > 0:
93
+ try:
94
+ img_bytes = self.file_client.get(gt_path, 'gt')
95
+ except (IOError, OSError) as e:
96
+ logger = get_root_logger()
97
+ logger.warn(f'File client error: {e}, remaining retry times: {retry - 1}')
98
+ # change another file to read
99
+ index = random.randint(0, self.__len__())
100
+ gt_path = self.paths[index]
101
+ time.sleep(1) # sleep 1s for occasional server congestion
102
+ else:
103
+ break
104
+ finally:
105
+ retry -= 1
106
+ img_gt = imfrombytes(img_bytes, float32=True)
107
+
108
+ # -------------------- Do augmentation for training: flip, rotation -------------------- #
109
+ img_gt = augment(img_gt, self.opt['use_hflip'], self.opt['use_rot'])
110
+
111
+ # crop or pad to 400
112
+ # TODO: 400 is hard-coded. You may change it accordingly
113
+ h, w = img_gt.shape[0:2]
114
+ crop_pad_size = 400
115
+ # pad
116
+ if h < crop_pad_size or w < crop_pad_size:
117
+ pad_h = max(0, crop_pad_size - h)
118
+ pad_w = max(0, crop_pad_size - w)
119
+ img_gt = cv2.copyMakeBorder(img_gt, 0, pad_h, 0, pad_w, cv2.BORDER_REFLECT_101)
120
+ # crop
121
+ if img_gt.shape[0] > crop_pad_size or img_gt.shape[1] > crop_pad_size:
122
+ h, w = img_gt.shape[0:2]
123
+ # randomly choose top and left coordinates
124
+ top = random.randint(0, h - crop_pad_size)
125
+ left = random.randint(0, w - crop_pad_size)
126
+ img_gt = img_gt[top:top + crop_pad_size, left:left + crop_pad_size, ...]
127
+
128
+ # ------------------------ Generate kernels (used in the first degradation) ------------------------ #
129
+ kernel_size = random.choice(self.kernel_range)
130
+ if np.random.uniform() < self.opt['sinc_prob']:
131
+ # this sinc filter setting is for kernels ranging from [7, 21]
132
+ if kernel_size < 13:
133
+ omega_c = np.random.uniform(np.pi / 3, np.pi)
134
+ else:
135
+ omega_c = np.random.uniform(np.pi / 5, np.pi)
136
+ kernel = circular_lowpass_kernel(omega_c, kernel_size, pad_to=False)
137
+ else:
138
+ kernel = random_mixed_kernels(
139
+ self.kernel_list,
140
+ self.kernel_prob,
141
+ kernel_size,
142
+ self.blur_sigma,
143
+ self.blur_sigma, [-math.pi, math.pi],
144
+ self.betag_range,
145
+ self.betap_range,
146
+ noise_range=None)
147
+ # pad kernel
148
+ pad_size = (21 - kernel_size) // 2
149
+ kernel = np.pad(kernel, ((pad_size, pad_size), (pad_size, pad_size)))
150
+
151
+ # ------------------------ Generate kernels (used in the second degradation) ------------------------ #
152
+ kernel_size = random.choice(self.kernel_range)
153
+ if np.random.uniform() < self.opt['sinc_prob2']:
154
+ if kernel_size < 13:
155
+ omega_c = np.random.uniform(np.pi / 3, np.pi)
156
+ else:
157
+ omega_c = np.random.uniform(np.pi / 5, np.pi)
158
+ kernel2 = circular_lowpass_kernel(omega_c, kernel_size, pad_to=False)
159
+ else:
160
+ kernel2 = random_mixed_kernels(
161
+ self.kernel_list2,
162
+ self.kernel_prob2,
163
+ kernel_size,
164
+ self.blur_sigma2,
165
+ self.blur_sigma2, [-math.pi, math.pi],
166
+ self.betag_range2,
167
+ self.betap_range2,
168
+ noise_range=None)
169
+
170
+ # pad kernel
171
+ pad_size = (21 - kernel_size) // 2
172
+ kernel2 = np.pad(kernel2, ((pad_size, pad_size), (pad_size, pad_size)))
173
+
174
+ # ------------------------------------- the final sinc kernel ------------------------------------- #
175
+ if np.random.uniform() < self.opt['final_sinc_prob']:
176
+ kernel_size = random.choice(self.kernel_range)
177
+ omega_c = np.random.uniform(np.pi / 3, np.pi)
178
+ sinc_kernel = circular_lowpass_kernel(omega_c, kernel_size, pad_to=21)
179
+ sinc_kernel = torch.FloatTensor(sinc_kernel)
180
+ else:
181
+ sinc_kernel = self.pulse_tensor
182
+
183
+ # BGR to RGB, HWC to CHW, numpy to tensor
184
+ img_gt = img2tensor([img_gt], bgr2rgb=True, float32=True)[0]
185
+ kernel = torch.FloatTensor(kernel)
186
+ kernel2 = torch.FloatTensor(kernel2)
187
+
188
+ return_d = {'gt': img_gt, 'kernel1': kernel, 'kernel2': kernel2, 'sinc_kernel': sinc_kernel, 'gt_path': gt_path}
189
+ return return_d
190
+
191
+ def __len__(self):
192
+ return len(self.paths)
Real-ESRGAN/realesrgan/data/realesrgan_paired_dataset.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from basicsr.data.data_util import paired_paths_from_folder, paired_paths_from_lmdb
3
+ from basicsr.data.transforms import augment, paired_random_crop
4
+ from basicsr.utils import FileClient, imfrombytes, img2tensor
5
+ from basicsr.utils.registry import DATASET_REGISTRY
6
+ from torch.utils import data as data
7
+ from torchvision.transforms.functional import normalize
8
+
9
+
10
+ @DATASET_REGISTRY.register()
11
+ class RealESRGANPairedDataset(data.Dataset):
12
+ """Paired image dataset for image restoration.
13
+
14
+ Read LQ (Low Quality, e.g. LR (Low Resolution), blurry, noisy, etc) and GT image pairs.
15
+
16
+ There are three modes:
17
+ 1. 'lmdb': Use lmdb files.
18
+ If opt['io_backend'] == lmdb.
19
+ 2. 'meta_info': Use meta information file to generate paths.
20
+ If opt['io_backend'] != lmdb and opt['meta_info'] is not None.
21
+ 3. 'folder': Scan folders to generate paths.
22
+ The rest.
23
+
24
+ Args:
25
+ opt (dict): Config for train datasets. It contains the following keys:
26
+ dataroot_gt (str): Data root path for gt.
27
+ dataroot_lq (str): Data root path for lq.
28
+ meta_info (str): Path for meta information file.
29
+ io_backend (dict): IO backend type and other kwarg.
30
+ filename_tmpl (str): Template for each filename. Note that the template excludes the file extension.
31
+ Default: '{}'.
32
+ gt_size (int): Cropped patched size for gt patches.
33
+ use_hflip (bool): Use horizontal flips.
34
+ use_rot (bool): Use rotation (use vertical flip and transposing h
35
+ and w for implementation).
36
+
37
+ scale (bool): Scale, which will be added automatically.
38
+ phase (str): 'train' or 'val'.
39
+ """
40
+
41
+ def __init__(self, opt):
42
+ super(RealESRGANPairedDataset, self).__init__()
43
+ self.opt = opt
44
+ self.file_client = None
45
+ self.io_backend_opt = opt['io_backend']
46
+ # mean and std for normalizing the input images
47
+ self.mean = opt['mean'] if 'mean' in opt else None
48
+ self.std = opt['std'] if 'std' in opt else None
49
+
50
+ self.gt_folder, self.lq_folder = opt['dataroot_gt'], opt['dataroot_lq']
51
+ self.filename_tmpl = opt['filename_tmpl'] if 'filename_tmpl' in opt else '{}'
52
+
53
+ # file client (lmdb io backend)
54
+ if self.io_backend_opt['type'] == 'lmdb':
55
+ self.io_backend_opt['db_paths'] = [self.lq_folder, self.gt_folder]
56
+ self.io_backend_opt['client_keys'] = ['lq', 'gt']
57
+ self.paths = paired_paths_from_lmdb([self.lq_folder, self.gt_folder], ['lq', 'gt'])
58
+ elif 'meta_info' in self.opt and self.opt['meta_info'] is not None:
59
+ # disk backend with meta_info
60
+ # Each line in the meta_info describes the relative path to an image
61
+ with open(self.opt['meta_info']) as fin:
62
+ paths = [line.strip() for line in fin]
63
+ self.paths = []
64
+ for path in paths:
65
+ gt_path, lq_path = path.split(', ')
66
+ gt_path = os.path.join(self.gt_folder, gt_path)
67
+ lq_path = os.path.join(self.lq_folder, lq_path)
68
+ self.paths.append(dict([('gt_path', gt_path), ('lq_path', lq_path)]))
69
+ else:
70
+ # disk backend
71
+ # it will scan the whole folder to get meta info
72
+ # it will be time-consuming for folders with too many files. It is recommended using an extra meta txt file
73
+ self.paths = paired_paths_from_folder([self.lq_folder, self.gt_folder], ['lq', 'gt'], self.filename_tmpl)
74
+
75
+ def __getitem__(self, index):
76
+ if self.file_client is None:
77
+ self.file_client = FileClient(self.io_backend_opt.pop('type'), **self.io_backend_opt)
78
+
79
+ scale = self.opt['scale']
80
+
81
+ # Load gt and lq images. Dimension order: HWC; channel order: BGR;
82
+ # image range: [0, 1], float32.
83
+ gt_path = self.paths[index]['gt_path']
84
+ img_bytes = self.file_client.get(gt_path, 'gt')
85
+ img_gt = imfrombytes(img_bytes, float32=True)
86
+ lq_path = self.paths[index]['lq_path']
87
+ img_bytes = self.file_client.get(lq_path, 'lq')
88
+ img_lq = imfrombytes(img_bytes, float32=True)
89
+
90
+ # augmentation for training
91
+ if self.opt['phase'] == 'train':
92
+ gt_size = self.opt['gt_size']
93
+ # random crop
94
+ img_gt, img_lq = paired_random_crop(img_gt, img_lq, gt_size, scale, gt_path)
95
+ # flip, rotation
96
+ img_gt, img_lq = augment([img_gt, img_lq], self.opt['use_hflip'], self.opt['use_rot'])
97
+
98
+ # BGR to RGB, HWC to CHW, numpy to tensor
99
+ img_gt, img_lq = img2tensor([img_gt, img_lq], bgr2rgb=True, float32=True)
100
+ # normalize
101
+ if self.mean is not None or self.std is not None:
102
+ normalize(img_lq, self.mean, self.std, inplace=True)
103
+ normalize(img_gt, self.mean, self.std, inplace=True)
104
+
105
+ return {'lq': img_lq, 'gt': img_gt, 'lq_path': lq_path, 'gt_path': gt_path}
106
+
107
+ def __len__(self):
108
+ return len(self.paths)
Real-ESRGAN/realesrgan/models/__init__.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ import importlib
2
+ from basicsr.utils import scandir
3
+ from os import path as osp
4
+
5
+ # automatically scan and import model modules for registry
6
+ # scan all the files that end with '_model.py' under the model folder
7
+ model_folder = osp.dirname(osp.abspath(__file__))
8
+ model_filenames = [osp.splitext(osp.basename(v))[0] for v in scandir(model_folder) if v.endswith('_model.py')]
9
+ # import all the model modules
10
+ _model_modules = [importlib.import_module(f'realesrgan.models.{file_name}') for file_name in model_filenames]
Real-ESRGAN/realesrgan/models/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (1.19 kB). View file
 
Real-ESRGAN/realesrgan/models/__pycache__/realesrgan_model.cpython-311.pyc ADDED
Binary file (15.2 kB). View file
 
Real-ESRGAN/realesrgan/models/__pycache__/realesrnet_model.cpython-311.pyc ADDED
Binary file (11.9 kB). View file
 
Real-ESRGAN/realesrgan/models/realesrgan_model.py ADDED
@@ -0,0 +1,258 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import random
3
+ import torch
4
+ from basicsr.data.degradations import random_add_gaussian_noise_pt, random_add_poisson_noise_pt
5
+ from basicsr.data.transforms import paired_random_crop
6
+ from basicsr.models.srgan_model import SRGANModel
7
+ from basicsr.utils import DiffJPEG, USMSharp
8
+ from basicsr.utils.img_process_util import filter2D
9
+ from basicsr.utils.registry import MODEL_REGISTRY
10
+ from collections import OrderedDict
11
+ from torch.nn import functional as F
12
+
13
+
14
+ @MODEL_REGISTRY.register()
15
+ class RealESRGANModel(SRGANModel):
16
+ """RealESRGAN Model for Real-ESRGAN: Training Real-World Blind Super-Resolution with Pure Synthetic Data.
17
+
18
+ It mainly performs:
19
+ 1. randomly synthesize LQ images in GPU tensors
20
+ 2. optimize the networks with GAN training.
21
+ """
22
+
23
+ def __init__(self, opt):
24
+ super(RealESRGANModel, self).__init__(opt)
25
+ self.jpeger = DiffJPEG(differentiable=False).cuda() # simulate JPEG compression artifacts
26
+ self.usm_sharpener = USMSharp().cuda() # do usm sharpening
27
+ self.queue_size = opt.get('queue_size', 180)
28
+
29
+ @torch.no_grad()
30
+ def _dequeue_and_enqueue(self):
31
+ """It is the training pair pool for increasing the diversity in a batch.
32
+
33
+ Batch processing limits the diversity of synthetic degradations in a batch. For example, samples in a
34
+ batch could not have different resize scaling factors. Therefore, we employ this training pair pool
35
+ to increase the degradation diversity in a batch.
36
+ """
37
+ # initialize
38
+ b, c, h, w = self.lq.size()
39
+ if not hasattr(self, 'queue_lr'):
40
+ assert self.queue_size % b == 0, f'queue size {self.queue_size} should be divisible by batch size {b}'
41
+ self.queue_lr = torch.zeros(self.queue_size, c, h, w).cuda()
42
+ _, c, h, w = self.gt.size()
43
+ self.queue_gt = torch.zeros(self.queue_size, c, h, w).cuda()
44
+ self.queue_ptr = 0
45
+ if self.queue_ptr == self.queue_size: # the pool is full
46
+ # do dequeue and enqueue
47
+ # shuffle
48
+ idx = torch.randperm(self.queue_size)
49
+ self.queue_lr = self.queue_lr[idx]
50
+ self.queue_gt = self.queue_gt[idx]
51
+ # get first b samples
52
+ lq_dequeue = self.queue_lr[0:b, :, :, :].clone()
53
+ gt_dequeue = self.queue_gt[0:b, :, :, :].clone()
54
+ # update the queue
55
+ self.queue_lr[0:b, :, :, :] = self.lq.clone()
56
+ self.queue_gt[0:b, :, :, :] = self.gt.clone()
57
+
58
+ self.lq = lq_dequeue
59
+ self.gt = gt_dequeue
60
+ else:
61
+ # only do enqueue
62
+ self.queue_lr[self.queue_ptr:self.queue_ptr + b, :, :, :] = self.lq.clone()
63
+ self.queue_gt[self.queue_ptr:self.queue_ptr + b, :, :, :] = self.gt.clone()
64
+ self.queue_ptr = self.queue_ptr + b
65
+
66
+ @torch.no_grad()
67
+ def feed_data(self, data):
68
+ """Accept data from dataloader, and then add two-order degradations to obtain LQ images.
69
+ """
70
+ if self.is_train and self.opt.get('high_order_degradation', True):
71
+ # training data synthesis
72
+ self.gt = data['gt'].to(self.device)
73
+ self.gt_usm = self.usm_sharpener(self.gt)
74
+
75
+ self.kernel1 = data['kernel1'].to(self.device)
76
+ self.kernel2 = data['kernel2'].to(self.device)
77
+ self.sinc_kernel = data['sinc_kernel'].to(self.device)
78
+
79
+ ori_h, ori_w = self.gt.size()[2:4]
80
+
81
+ # ----------------------- The first degradation process ----------------------- #
82
+ # blur
83
+ out = filter2D(self.gt_usm, self.kernel1)
84
+ # random resize
85
+ updown_type = random.choices(['up', 'down', 'keep'], self.opt['resize_prob'])[0]
86
+ if updown_type == 'up':
87
+ scale = np.random.uniform(1, self.opt['resize_range'][1])
88
+ elif updown_type == 'down':
89
+ scale = np.random.uniform(self.opt['resize_range'][0], 1)
90
+ else:
91
+ scale = 1
92
+ mode = random.choice(['area', 'bilinear', 'bicubic'])
93
+ out = F.interpolate(out, scale_factor=scale, mode=mode)
94
+ # add noise
95
+ gray_noise_prob = self.opt['gray_noise_prob']
96
+ if np.random.uniform() < self.opt['gaussian_noise_prob']:
97
+ out = random_add_gaussian_noise_pt(
98
+ out, sigma_range=self.opt['noise_range'], clip=True, rounds=False, gray_prob=gray_noise_prob)
99
+ else:
100
+ out = random_add_poisson_noise_pt(
101
+ out,
102
+ scale_range=self.opt['poisson_scale_range'],
103
+ gray_prob=gray_noise_prob,
104
+ clip=True,
105
+ rounds=False)
106
+ # JPEG compression
107
+ jpeg_p = out.new_zeros(out.size(0)).uniform_(*self.opt['jpeg_range'])
108
+ out = torch.clamp(out, 0, 1) # clamp to [0, 1], otherwise JPEGer will result in unpleasant artifacts
109
+ out = self.jpeger(out, quality=jpeg_p)
110
+
111
+ # ----------------------- The second degradation process ----------------------- #
112
+ # blur
113
+ if np.random.uniform() < self.opt['second_blur_prob']:
114
+ out = filter2D(out, self.kernel2)
115
+ # random resize
116
+ updown_type = random.choices(['up', 'down', 'keep'], self.opt['resize_prob2'])[0]
117
+ if updown_type == 'up':
118
+ scale = np.random.uniform(1, self.opt['resize_range2'][1])
119
+ elif updown_type == 'down':
120
+ scale = np.random.uniform(self.opt['resize_range2'][0], 1)
121
+ else:
122
+ scale = 1
123
+ mode = random.choice(['area', 'bilinear', 'bicubic'])
124
+ out = F.interpolate(
125
+ out, size=(int(ori_h / self.opt['scale'] * scale), int(ori_w / self.opt['scale'] * scale)), mode=mode)
126
+ # add noise
127
+ gray_noise_prob = self.opt['gray_noise_prob2']
128
+ if np.random.uniform() < self.opt['gaussian_noise_prob2']:
129
+ out = random_add_gaussian_noise_pt(
130
+ out, sigma_range=self.opt['noise_range2'], clip=True, rounds=False, gray_prob=gray_noise_prob)
131
+ else:
132
+ out = random_add_poisson_noise_pt(
133
+ out,
134
+ scale_range=self.opt['poisson_scale_range2'],
135
+ gray_prob=gray_noise_prob,
136
+ clip=True,
137
+ rounds=False)
138
+
139
+ # JPEG compression + the final sinc filter
140
+ # We also need to resize images to desired sizes. We group [resize back + sinc filter] together
141
+ # as one operation.
142
+ # We consider two orders:
143
+ # 1. [resize back + sinc filter] + JPEG compression
144
+ # 2. JPEG compression + [resize back + sinc filter]
145
+ # Empirically, we find other combinations (sinc + JPEG + Resize) will introduce twisted lines.
146
+ if np.random.uniform() < 0.5:
147
+ # resize back + the final sinc filter
148
+ mode = random.choice(['area', 'bilinear', 'bicubic'])
149
+ out = F.interpolate(out, size=(ori_h // self.opt['scale'], ori_w // self.opt['scale']), mode=mode)
150
+ out = filter2D(out, self.sinc_kernel)
151
+ # JPEG compression
152
+ jpeg_p = out.new_zeros(out.size(0)).uniform_(*self.opt['jpeg_range2'])
153
+ out = torch.clamp(out, 0, 1)
154
+ out = self.jpeger(out, quality=jpeg_p)
155
+ else:
156
+ # JPEG compression
157
+ jpeg_p = out.new_zeros(out.size(0)).uniform_(*self.opt['jpeg_range2'])
158
+ out = torch.clamp(out, 0, 1)
159
+ out = self.jpeger(out, quality=jpeg_p)
160
+ # resize back + the final sinc filter
161
+ mode = random.choice(['area', 'bilinear', 'bicubic'])
162
+ out = F.interpolate(out, size=(ori_h // self.opt['scale'], ori_w // self.opt['scale']), mode=mode)
163
+ out = filter2D(out, self.sinc_kernel)
164
+
165
+ # clamp and round
166
+ self.lq = torch.clamp((out * 255.0).round(), 0, 255) / 255.
167
+
168
+ # random crop
169
+ gt_size = self.opt['gt_size']
170
+ (self.gt, self.gt_usm), self.lq = paired_random_crop([self.gt, self.gt_usm], self.lq, gt_size,
171
+ self.opt['scale'])
172
+
173
+ # training pair pool
174
+ self._dequeue_and_enqueue()
175
+ # sharpen self.gt again, as we have changed the self.gt with self._dequeue_and_enqueue
176
+ self.gt_usm = self.usm_sharpener(self.gt)
177
+ self.lq = self.lq.contiguous() # for the warning: grad and param do not obey the gradient layout contract
178
+ else:
179
+ # for paired training or validation
180
+ self.lq = data['lq'].to(self.device)
181
+ if 'gt' in data:
182
+ self.gt = data['gt'].to(self.device)
183
+ self.gt_usm = self.usm_sharpener(self.gt)
184
+
185
+ def nondist_validation(self, dataloader, current_iter, tb_logger, save_img):
186
+ # do not use the synthetic process during validation
187
+ self.is_train = False
188
+ super(RealESRGANModel, self).nondist_validation(dataloader, current_iter, tb_logger, save_img)
189
+ self.is_train = True
190
+
191
+ def optimize_parameters(self, current_iter):
192
+ # usm sharpening
193
+ l1_gt = self.gt_usm
194
+ percep_gt = self.gt_usm
195
+ gan_gt = self.gt_usm
196
+ if self.opt['l1_gt_usm'] is False:
197
+ l1_gt = self.gt
198
+ if self.opt['percep_gt_usm'] is False:
199
+ percep_gt = self.gt
200
+ if self.opt['gan_gt_usm'] is False:
201
+ gan_gt = self.gt
202
+
203
+ # optimize net_g
204
+ for p in self.net_d.parameters():
205
+ p.requires_grad = False
206
+
207
+ self.optimizer_g.zero_grad()
208
+ self.output = self.net_g(self.lq)
209
+
210
+ l_g_total = 0
211
+ loss_dict = OrderedDict()
212
+ if (current_iter % self.net_d_iters == 0 and current_iter > self.net_d_init_iters):
213
+ # pixel loss
214
+ if self.cri_pix:
215
+ l_g_pix = self.cri_pix(self.output, l1_gt)
216
+ l_g_total += l_g_pix
217
+ loss_dict['l_g_pix'] = l_g_pix
218
+ # perceptual loss
219
+ if self.cri_perceptual:
220
+ l_g_percep, l_g_style = self.cri_perceptual(self.output, percep_gt)
221
+ if l_g_percep is not None:
222
+ l_g_total += l_g_percep
223
+ loss_dict['l_g_percep'] = l_g_percep
224
+ if l_g_style is not None:
225
+ l_g_total += l_g_style
226
+ loss_dict['l_g_style'] = l_g_style
227
+ # gan loss
228
+ fake_g_pred = self.net_d(self.output)
229
+ l_g_gan = self.cri_gan(fake_g_pred, True, is_disc=False)
230
+ l_g_total += l_g_gan
231
+ loss_dict['l_g_gan'] = l_g_gan
232
+
233
+ l_g_total.backward()
234
+ self.optimizer_g.step()
235
+
236
+ # optimize net_d
237
+ for p in self.net_d.parameters():
238
+ p.requires_grad = True
239
+
240
+ self.optimizer_d.zero_grad()
241
+ # real
242
+ real_d_pred = self.net_d(gan_gt)
243
+ l_d_real = self.cri_gan(real_d_pred, True, is_disc=True)
244
+ loss_dict['l_d_real'] = l_d_real
245
+ loss_dict['out_d_real'] = torch.mean(real_d_pred.detach())
246
+ l_d_real.backward()
247
+ # fake
248
+ fake_d_pred = self.net_d(self.output.detach().clone()) # clone for pt1.9
249
+ l_d_fake = self.cri_gan(fake_d_pred, False, is_disc=True)
250
+ loss_dict['l_d_fake'] = l_d_fake
251
+ loss_dict['out_d_fake'] = torch.mean(fake_d_pred.detach())
252
+ l_d_fake.backward()
253
+ self.optimizer_d.step()
254
+
255
+ if self.ema_decay > 0:
256
+ self.model_ema(decay=self.ema_decay)
257
+
258
+ self.log_dict = self.reduce_loss_dict(loss_dict)
Real-ESRGAN/realesrgan/models/realesrnet_model.py ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import random
3
+ import torch
4
+ from basicsr.data.degradations import random_add_gaussian_noise_pt, random_add_poisson_noise_pt
5
+ from basicsr.data.transforms import paired_random_crop
6
+ from basicsr.models.sr_model import SRModel
7
+ from basicsr.utils import DiffJPEG, USMSharp
8
+ from basicsr.utils.img_process_util import filter2D
9
+ from basicsr.utils.registry import MODEL_REGISTRY
10
+ from torch.nn import functional as F
11
+
12
+
13
+ @MODEL_REGISTRY.register()
14
+ class RealESRNetModel(SRModel):
15
+ """RealESRNet Model for Real-ESRGAN: Training Real-World Blind Super-Resolution with Pure Synthetic Data.
16
+
17
+ It is trained without GAN losses.
18
+ It mainly performs:
19
+ 1. randomly synthesize LQ images in GPU tensors
20
+ 2. optimize the networks with GAN training.
21
+ """
22
+
23
+ def __init__(self, opt):
24
+ super(RealESRNetModel, self).__init__(opt)
25
+ self.jpeger = DiffJPEG(differentiable=False).cuda() # simulate JPEG compression artifacts
26
+ self.usm_sharpener = USMSharp().cuda() # do usm sharpening
27
+ self.queue_size = opt.get('queue_size', 180)
28
+
29
+ @torch.no_grad()
30
+ def _dequeue_and_enqueue(self):
31
+ """It is the training pair pool for increasing the diversity in a batch.
32
+
33
+ Batch processing limits the diversity of synthetic degradations in a batch. For example, samples in a
34
+ batch could not have different resize scaling factors. Therefore, we employ this training pair pool
35
+ to increase the degradation diversity in a batch.
36
+ """
37
+ # initialize
38
+ b, c, h, w = self.lq.size()
39
+ if not hasattr(self, 'queue_lr'):
40
+ assert self.queue_size % b == 0, f'queue size {self.queue_size} should be divisible by batch size {b}'
41
+ self.queue_lr = torch.zeros(self.queue_size, c, h, w).cuda()
42
+ _, c, h, w = self.gt.size()
43
+ self.queue_gt = torch.zeros(self.queue_size, c, h, w).cuda()
44
+ self.queue_ptr = 0
45
+ if self.queue_ptr == self.queue_size: # the pool is full
46
+ # do dequeue and enqueue
47
+ # shuffle
48
+ idx = torch.randperm(self.queue_size)
49
+ self.queue_lr = self.queue_lr[idx]
50
+ self.queue_gt = self.queue_gt[idx]
51
+ # get first b samples
52
+ lq_dequeue = self.queue_lr[0:b, :, :, :].clone()
53
+ gt_dequeue = self.queue_gt[0:b, :, :, :].clone()
54
+ # update the queue
55
+ self.queue_lr[0:b, :, :, :] = self.lq.clone()
56
+ self.queue_gt[0:b, :, :, :] = self.gt.clone()
57
+
58
+ self.lq = lq_dequeue
59
+ self.gt = gt_dequeue
60
+ else:
61
+ # only do enqueue
62
+ self.queue_lr[self.queue_ptr:self.queue_ptr + b, :, :, :] = self.lq.clone()
63
+ self.queue_gt[self.queue_ptr:self.queue_ptr + b, :, :, :] = self.gt.clone()
64
+ self.queue_ptr = self.queue_ptr + b
65
+
66
+ @torch.no_grad()
67
+ def feed_data(self, data):
68
+ """Accept data from dataloader, and then add two-order degradations to obtain LQ images.
69
+ """
70
+ if self.is_train and self.opt.get('high_order_degradation', True):
71
+ # training data synthesis
72
+ self.gt = data['gt'].to(self.device)
73
+ # USM sharpen the GT images
74
+ if self.opt['gt_usm'] is True:
75
+ self.gt = self.usm_sharpener(self.gt)
76
+
77
+ self.kernel1 = data['kernel1'].to(self.device)
78
+ self.kernel2 = data['kernel2'].to(self.device)
79
+ self.sinc_kernel = data['sinc_kernel'].to(self.device)
80
+
81
+ ori_h, ori_w = self.gt.size()[2:4]
82
+
83
+ # ----------------------- The first degradation process ----------------------- #
84
+ # blur
85
+ out = filter2D(self.gt, self.kernel1)
86
+ # random resize
87
+ updown_type = random.choices(['up', 'down', 'keep'], self.opt['resize_prob'])[0]
88
+ if updown_type == 'up':
89
+ scale = np.random.uniform(1, self.opt['resize_range'][1])
90
+ elif updown_type == 'down':
91
+ scale = np.random.uniform(self.opt['resize_range'][0], 1)
92
+ else:
93
+ scale = 1
94
+ mode = random.choice(['area', 'bilinear', 'bicubic'])
95
+ out = F.interpolate(out, scale_factor=scale, mode=mode)
96
+ # add noise
97
+ gray_noise_prob = self.opt['gray_noise_prob']
98
+ if np.random.uniform() < self.opt['gaussian_noise_prob']:
99
+ out = random_add_gaussian_noise_pt(
100
+ out, sigma_range=self.opt['noise_range'], clip=True, rounds=False, gray_prob=gray_noise_prob)
101
+ else:
102
+ out = random_add_poisson_noise_pt(
103
+ out,
104
+ scale_range=self.opt['poisson_scale_range'],
105
+ gray_prob=gray_noise_prob,
106
+ clip=True,
107
+ rounds=False)
108
+ # JPEG compression
109
+ jpeg_p = out.new_zeros(out.size(0)).uniform_(*self.opt['jpeg_range'])
110
+ out = torch.clamp(out, 0, 1) # clamp to [0, 1], otherwise JPEGer will result in unpleasant artifacts
111
+ out = self.jpeger(out, quality=jpeg_p)
112
+
113
+ # ----------------------- The second degradation process ----------------------- #
114
+ # blur
115
+ if np.random.uniform() < self.opt['second_blur_prob']:
116
+ out = filter2D(out, self.kernel2)
117
+ # random resize
118
+ updown_type = random.choices(['up', 'down', 'keep'], self.opt['resize_prob2'])[0]
119
+ if updown_type == 'up':
120
+ scale = np.random.uniform(1, self.opt['resize_range2'][1])
121
+ elif updown_type == 'down':
122
+ scale = np.random.uniform(self.opt['resize_range2'][0], 1)
123
+ else:
124
+ scale = 1
125
+ mode = random.choice(['area', 'bilinear', 'bicubic'])
126
+ out = F.interpolate(
127
+ out, size=(int(ori_h / self.opt['scale'] * scale), int(ori_w / self.opt['scale'] * scale)), mode=mode)
128
+ # add noise
129
+ gray_noise_prob = self.opt['gray_noise_prob2']
130
+ if np.random.uniform() < self.opt['gaussian_noise_prob2']:
131
+ out = random_add_gaussian_noise_pt(
132
+ out, sigma_range=self.opt['noise_range2'], clip=True, rounds=False, gray_prob=gray_noise_prob)
133
+ else:
134
+ out = random_add_poisson_noise_pt(
135
+ out,
136
+ scale_range=self.opt['poisson_scale_range2'],
137
+ gray_prob=gray_noise_prob,
138
+ clip=True,
139
+ rounds=False)
140
+
141
+ # JPEG compression + the final sinc filter
142
+ # We also need to resize images to desired sizes. We group [resize back + sinc filter] together
143
+ # as one operation.
144
+ # We consider two orders:
145
+ # 1. [resize back + sinc filter] + JPEG compression
146
+ # 2. JPEG compression + [resize back + sinc filter]
147
+ # Empirically, we find other combinations (sinc + JPEG + Resize) will introduce twisted lines.
148
+ if np.random.uniform() < 0.5:
149
+ # resize back + the final sinc filter
150
+ mode = random.choice(['area', 'bilinear', 'bicubic'])
151
+ out = F.interpolate(out, size=(ori_h // self.opt['scale'], ori_w // self.opt['scale']), mode=mode)
152
+ out = filter2D(out, self.sinc_kernel)
153
+ # JPEG compression
154
+ jpeg_p = out.new_zeros(out.size(0)).uniform_(*self.opt['jpeg_range2'])
155
+ out = torch.clamp(out, 0, 1)
156
+ out = self.jpeger(out, quality=jpeg_p)
157
+ else:
158
+ # JPEG compression
159
+ jpeg_p = out.new_zeros(out.size(0)).uniform_(*self.opt['jpeg_range2'])
160
+ out = torch.clamp(out, 0, 1)
161
+ out = self.jpeger(out, quality=jpeg_p)
162
+ # resize back + the final sinc filter
163
+ mode = random.choice(['area', 'bilinear', 'bicubic'])
164
+ out = F.interpolate(out, size=(ori_h // self.opt['scale'], ori_w // self.opt['scale']), mode=mode)
165
+ out = filter2D(out, self.sinc_kernel)
166
+
167
+ # clamp and round
168
+ self.lq = torch.clamp((out * 255.0).round(), 0, 255) / 255.
169
+
170
+ # random crop
171
+ gt_size = self.opt['gt_size']
172
+ self.gt, self.lq = paired_random_crop(self.gt, self.lq, gt_size, self.opt['scale'])
173
+
174
+ # training pair pool
175
+ self._dequeue_and_enqueue()
176
+ self.lq = self.lq.contiguous() # for the warning: grad and param do not obey the gradient layout contract
177
+ else:
178
+ # for paired training or validation
179
+ self.lq = data['lq'].to(self.device)
180
+ if 'gt' in data:
181
+ self.gt = data['gt'].to(self.device)
182
+ self.gt_usm = self.usm_sharpener(self.gt)
183
+
184
+ def nondist_validation(self, dataloader, current_iter, tb_logger, save_img):
185
+ # do not use the synthetic process during validation
186
+ self.is_train = False
187
+ super(RealESRNetModel, self).nondist_validation(dataloader, current_iter, tb_logger, save_img)
188
+ self.is_train = True