svjack commited on
Commit
92ab350
1 Parent(s): f6bb4b4

Delete folder stable-diffusion.cpp with huggingface_hub

Browse files
stable-diffusion.cpp/.dockerignore DELETED
@@ -1,6 +0,0 @@
1
- build*/
2
- test/
3
-
4
- .cache/
5
- *.swp
6
- models/
 
 
 
 
 
 
 
stable-diffusion.cpp/.github/workflows/build.yml DELETED
@@ -1,201 +0,0 @@
1
- name: CI
2
-
3
- on:
4
- workflow_dispatch: # allows manual triggering
5
- inputs:
6
- create_release:
7
- description: 'Create new release'
8
- required: true
9
- type: boolean
10
- push:
11
- branches:
12
- - master
13
- - ci
14
- paths: ['.github/workflows/**', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu']
15
- pull_request:
16
- types: [opened, synchronize, reopened]
17
- paths: ['**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu']
18
-
19
- env:
20
- BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
21
-
22
- jobs:
23
- ubuntu-latest-cmake:
24
- runs-on: ubuntu-latest
25
-
26
- steps:
27
- - name: Clone
28
- id: checkout
29
- uses: actions/checkout@v3
30
- with:
31
- submodules: recursive
32
-
33
-
34
- - name: Dependencies
35
- id: depends
36
- run: |
37
- sudo apt-get update
38
- sudo apt-get install build-essential
39
-
40
- - name: Build
41
- id: cmake_build
42
- run: |
43
- mkdir build
44
- cd build
45
- cmake ..
46
- cmake --build . --config Release
47
-
48
- #- name: Test
49
- #id: cmake_test
50
- #run: |
51
- #cd build
52
- #ctest --verbose --timeout 900
53
-
54
- macOS-latest-cmake:
55
- runs-on: macos-latest
56
-
57
- steps:
58
- - name: Clone
59
- id: checkout
60
- uses: actions/checkout@v3
61
- with:
62
- submodules: recursive
63
-
64
- - name: Dependencies
65
- id: depends
66
- continue-on-error: true
67
- run: |
68
- brew update
69
-
70
- - name: Build
71
- id: cmake_build
72
- run: |
73
- sysctl -a
74
- mkdir build
75
- cd build
76
- cmake ..
77
- cmake --build . --config Release
78
-
79
- #- name: Test
80
- #id: cmake_test
81
- #run: |
82
- #cd build
83
- #ctest --verbose --timeout 900
84
-
85
- windows-latest-cmake:
86
- runs-on: windows-latest
87
-
88
- strategy:
89
- matrix:
90
- include:
91
- - build: 'noavx'
92
- defines: '-DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF'
93
- - build: 'avx2'
94
- defines: '-DGGML_AVX2=ON'
95
- - build: 'avx'
96
- defines: '-DGGML_AVX2=OFF'
97
- - build: 'avx512'
98
- defines: '-DGGML_AVX512=ON'
99
-
100
- steps:
101
- - name: Clone
102
- id: checkout
103
- uses: actions/checkout@v3
104
- with:
105
- submodules: recursive
106
-
107
- - name: Build
108
- id: cmake_build
109
- run: |
110
- mkdir build
111
- cd build
112
- cmake .. ${{ matrix.defines }}
113
- cmake --build . --config Release
114
-
115
- - name: Check AVX512F support
116
- id: check_avx512f
117
- if: ${{ matrix.build == 'avx512' }}
118
- continue-on-error: true
119
- run: |
120
- cd build
121
- $vcdir = $(vswhere -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath)
122
- $msvc = $(join-path $vcdir $('VC\Tools\MSVC\'+$(gc -raw $(join-path $vcdir 'VC\Auxiliary\Build\Microsoft.VCToolsVersion.default.txt')).Trim()))
123
- $cl = $(join-path $msvc 'bin\Hostx64\x64\cl.exe')
124
- echo 'int main(void){unsigned int a[4];__cpuid(a,7);return !(a[1]&65536);}' >> avx512f.c
125
- & $cl /O2 /GS- /kernel avx512f.c /link /nodefaultlib /entry:main
126
- .\avx512f.exe && echo "AVX512F: YES" && ( echo HAS_AVX512F=1 >> $env:GITHUB_ENV ) || echo "AVX512F: NO"
127
-
128
- #- name: Test
129
- #id: cmake_test
130
- #run: |
131
- #cd build
132
- #ctest -C Release --verbose --timeout 900
133
-
134
- - name: Get commit hash
135
- id: commit
136
- if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
137
- uses: pr-mpt/actions-commit-hash@v2
138
-
139
- - name: Pack artifacts
140
- id: pack_artifacts
141
- if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
142
- run: |
143
- Copy-Item ggml/LICENSE .\build\bin\Release\ggml.txt
144
- Copy-Item LICENSE .\build\bin\Release\stable-diffusion.cpp.txt
145
- 7z a sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-win-${{ matrix.build }}-x64.zip .\build\bin\Release\*
146
-
147
- - name: Upload artifacts
148
- if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
149
- uses: actions/upload-artifact@v3
150
- with:
151
- path: |
152
- sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-win-${{ matrix.build }}-x64.zip
153
-
154
- release:
155
- if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
156
-
157
- runs-on: ubuntu-latest
158
-
159
- needs:
160
- - ubuntu-latest-cmake
161
- - macOS-latest-cmake
162
- - windows-latest-cmake
163
-
164
- steps:
165
- - name: Download artifacts
166
- id: download-artifact
167
- uses: actions/download-artifact@v3
168
-
169
- - name: Get commit hash
170
- id: commit
171
- uses: pr-mpt/actions-commit-hash@v2
172
-
173
- - name: Create release
174
- id: create_release
175
- uses: anzz1/action-create-release@v1
176
- env:
177
- GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
178
- with:
179
- tag_name: ${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}
180
-
181
- - name: Upload release
182
- id: upload_release
183
- uses: actions/github-script@v3
184
- with:
185
- github-token: ${{secrets.GITHUB_TOKEN}}
186
- script: |
187
- const path = require('path');
188
- const fs = require('fs');
189
- const release_id = '${{ steps.create_release.outputs.id }}';
190
- for (let file of await fs.readdirSync('./artifact')) {
191
- if (path.extname(file) === '.zip') {
192
- console.log('uploadReleaseAsset', file);
193
- await github.repos.uploadReleaseAsset({
194
- owner: context.repo.owner,
195
- repo: context.repo.repo,
196
- release_id: release_id,
197
- name: file,
198
- data: await fs.readFileSync(`./artifact/${file}`)
199
- });
200
- }
201
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
stable-diffusion.cpp/.gitignore DELETED
@@ -1,5 +0,0 @@
1
- build*/
2
- test/
3
-
4
- .cache/
5
- *.swp
 
 
 
 
 
 
stable-diffusion.cpp/.gitmodules DELETED
@@ -1,3 +0,0 @@
1
- [submodule "ggml"]
2
- path = ggml
3
- url = https://github.com/leejet/ggml.git
 
 
 
 
stable-diffusion.cpp/CMakeLists.txt DELETED
@@ -1,45 +0,0 @@
1
- cmake_minimum_required(VERSION 3.12)
2
- project("stable-diffusion")
3
-
4
- set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
5
-
6
- if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
7
- set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
8
- set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
9
- endif()
10
-
11
- set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
12
- set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
13
-
14
- if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
15
- set(SD_STANDALONE ON)
16
- else()
17
- set(SD_STANDALONE OFF)
18
- endif()
19
-
20
- #
21
- # Option list
22
- #
23
-
24
- # general
25
- #option(SD_BUILD_TESTS "sd: build tests" ${SD_STANDALONE})
26
- option(SD_BUILD_EXAMPLES "sd: build examples" ${SD_STANDALONE})
27
- option(BUILD_SHARED_LIBS "sd: build shared libs" OFF)
28
- #option(SD_BUILD_SERVER "sd: build server example" ON)
29
-
30
-
31
- # deps
32
- add_subdirectory(ggml)
33
-
34
- set(SD_LIB stable-diffusion)
35
-
36
- add_library(${SD_LIB} stable-diffusion.h stable-diffusion.cpp)
37
- target_link_libraries(${SD_LIB} PUBLIC ggml)
38
- target_include_directories(${SD_LIB} PUBLIC .)
39
- target_compile_features(${SD_LIB} PUBLIC cxx_std_11)
40
-
41
-
42
- if (SD_BUILD_EXAMPLES)
43
- add_subdirectory(examples)
44
- endif()
45
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
stable-diffusion.cpp/Dockerfile DELETED
@@ -1,17 +0,0 @@
1
- ARG UBUNTU_VERSION=22.04
2
-
3
- FROM ubuntu:$UBUNTU_VERSION as build
4
-
5
- RUN apt-get update && apt-get install -y build-essential git cmake
6
-
7
- WORKDIR /sd.cpp
8
-
9
- COPY . .
10
-
11
- RUN mkdir build && cd build && cmake .. && cmake --build . --config Release
12
-
13
- FROM ubuntu:$UBUNTU_VERSION as runtime
14
-
15
- COPY --from=build /sd.cpp/build/bin/sd /sd
16
-
17
- ENTRYPOINT [ "/sd" ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
stable-diffusion.cpp/LICENSE DELETED
@@ -1,21 +0,0 @@
1
- MIT License
2
-
3
- Copyright (c) 2023 leejet
4
-
5
- Permission is hereby granted, free of charge, to any person obtaining a copy
6
- of this software and associated documentation files (the "Software"), to deal
7
- in the Software without restriction, including without limitation the rights
8
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
- copies of the Software, and to permit persons to whom the Software is
10
- furnished to do so, subject to the following conditions:
11
-
12
- The above copyright notice and this permission notice shall be included in all
13
- copies or substantial portions of the Software.
14
-
15
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
- SOFTWARE.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
stable-diffusion.cpp/README.md DELETED
@@ -1,198 +0,0 @@
1
- <p align="center">
2
- <img src="./assets/a%20lovely%20cat.png" width="256x">
3
- </p>
4
-
5
- # stable-diffusion.cpp
6
-
7
- Inference of [Stable Diffusion](https://github.com/CompVis/stable-diffusion) in pure C/C++
8
-
9
- ## Features
10
-
11
- - Plain C/C++ implementation based on [ggml](https://github.com/ggerganov/ggml), working in the same way as [llama.cpp](https://github.com/ggerganov/llama.cpp)
12
- - 16-bit, 32-bit float support
13
- - 4-bit, 5-bit and 8-bit integer quantization support
14
- - Accelerated memory-efficient CPU inference
15
- - Only requires ~2.3GB when using txt2img with fp16 precision to generate a 512x512 image
16
- - AVX, AVX2 and AVX512 support for x86 architectures
17
- - SD1.x and SD2.x support
18
- - Original `txt2img` and `img2img` mode
19
- - Negative prompt
20
- - [stable-diffusion-webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui) style tokenizer (not all the features, only token weighting for now)
21
- - Sampling method
22
- - `Euler A`
23
- - `Euler`
24
- - `Heun`
25
- - `DPM2`
26
- - `DPM++ 2M`
27
- - [`DPM++ 2M v2`](https://github.com/AUTOMATIC1111/stable-diffusion-webui/discussions/8457)
28
- - `DPM++ 2S a`
29
- - Cross-platform reproducibility (`--rng cuda`, consistent with the `stable-diffusion-webui GPU RNG`)
30
- - Embedds generation parameters into png output as webui-compatible text string
31
- - Supported platforms
32
- - Linux
33
- - Mac OS
34
- - Windows
35
- - Android (via Termux)
36
-
37
- ### TODO
38
-
39
- - [ ] More sampling methods
40
- - [ ] GPU support
41
- - [ ] Make inference faster
42
- - The current implementation of ggml_conv_2d is slow and has high memory usage
43
- - [ ] Continuing to reduce memory usage (quantizing the weights of ggml_conv_2d)
44
- - [ ] LoRA support
45
- - [ ] k-quants support
46
-
47
- ## Usage
48
-
49
- ### Get the Code
50
-
51
- ```
52
- git clone --recursive https://github.com/leejet/stable-diffusion.cpp
53
- cd stable-diffusion.cpp
54
- ```
55
-
56
- - If you have already cloned the repository, you can use the following command to update the repository to the latest code.
57
-
58
- ```
59
- cd stable-diffusion.cpp
60
- git pull origin master
61
- git submodule init
62
- git submodule update
63
- ```
64
-
65
- ### Convert weights
66
-
67
- - download original weights(.ckpt or .safetensors). For example
68
- - Stable Diffusion v1.4 from https://huggingface.co/CompVis/stable-diffusion-v-1-4-original
69
- - Stable Diffusion v1.5 from https://huggingface.co/runwayml/stable-diffusion-v1-5
70
- - Stable Diffuison v2.1 from https://huggingface.co/stabilityai/stable-diffusion-2-1
71
-
72
- ```shell
73
- curl -L -O https://huggingface.co/CompVis/stable-diffusion-v-1-4-original/resolve/main/sd-v1-4.ckpt
74
- # curl -L -O https://huggingface.co/runwayml/stable-diffusion-v1-5/resolve/main/v1-5-pruned-emaonly.safetensors
75
- # curl -L -O https://huggingface.co/stabilityai/stable-diffusion-2-1/blob/main/v2-1_768-nonema-pruned.safetensors
76
- ```
77
-
78
- - convert weights to ggml model format
79
-
80
- ```shell
81
- cd models
82
- pip install -r requirements.txt
83
- python convert.py [path to weights] --out_type [output precision]
84
- # For example, python convert.py sd-v1-4.ckpt --out_type f16
85
- ```
86
-
87
- ### Quantization
88
-
89
- You can specify the output model format using the --out_type parameter
90
-
91
- - `f16` for 16-bit floating-point
92
- - `f32` for 32-bit floating-point
93
- - `q8_0` for 8-bit integer quantization
94
- - `q5_0` or `q5_1` for 5-bit integer quantization
95
- - `q4_0` or `q4_1` for 4-bit integer quantization
96
-
97
- ### Build
98
-
99
- #### Build from scratch
100
-
101
- ```shell
102
- mkdir build
103
- cd build
104
- cmake ..
105
- cmake --build . --config Release
106
- ```
107
-
108
- ##### Using OpenBLAS
109
-
110
- ```
111
- cmake .. -DGGML_OPENBLAS=ON
112
- cmake --build . --config Release
113
- ```
114
-
115
- ### Run
116
-
117
- ```
118
- usage: ./bin/sd [arguments]
119
-
120
- arguments:
121
- -h, --help show this help message and exit
122
- -M, --mode [txt2img or img2img] generation mode (default: txt2img)
123
- -t, --threads N number of threads to use during computation (default: -1).
124
- If threads <= 0, then threads will be set to the number of CPU physical cores
125
- -m, --model [MODEL] path to model
126
- -i, --init-img [IMAGE] path to the input image, required by img2img
127
- -o, --output OUTPUT path to write result image to (default: .\output.png)
128
- -p, --prompt [PROMPT] the prompt to render
129
- -n, --negative-prompt PROMPT the negative prompt (default: "")
130
- --cfg-scale SCALE unconditional guidance scale: (default: 7.0)
131
- --strength STRENGTH strength for noising/unnoising (default: 0.75)
132
- 1.0 corresponds to full destruction of information in init image
133
- -H, --height H image height, in pixel space (default: 512)
134
- -W, --width W image width, in pixel space (default: 512)
135
- --sampling-method {euler, euler_a, heun, dpm++2m, dpm++2mv2}
136
- sampling method (default: "euler_a")
137
- --steps STEPS number of sample steps (default: 20)
138
- --rng {std_default, cuda} RNG (default: cuda)
139
- -s SEED, --seed SEED RNG seed (default: 42, use random seed for < 0)
140
- -v, --verbose print extra info
141
- ```
142
-
143
- #### txt2img example
144
-
145
- ```
146
- ./bin/sd -m ../models/sd-v1-4-ggml-model-f16.bin -p "a lovely cat"
147
- ```
148
-
149
- Using formats of different precisions will yield results of varying quality.
150
-
151
- | f32 | f16 |q8_0 |q5_0 |q5_1 |q4_0 |q4_1 |
152
- | ---- |---- |---- |---- |---- |---- |---- |
153
- | ![](./assets/f32.png) |![](./assets/f16.png) |![](./assets/q8_0.png) |![](./assets/q5_0.png) |![](./assets/q5_1.png) |![](./assets/q4_0.png) |![](./assets/q4_1.png) |
154
-
155
- #### img2img example
156
-
157
- - `./output.png` is the image generated from the above txt2img pipeline
158
-
159
-
160
- ```
161
- ./bin/sd --mode img2img -m ../models/sd-v1-4-ggml-model-f16.bin -p "cat with blue eyes" -i ./output.png -o ./img2img_output.png --strength 0.4
162
- ```
163
-
164
- <p align="center">
165
- <img src="./assets/img2img_output.png" width="256x">
166
- </p>
167
-
168
- ### Docker
169
-
170
- #### Building using Docker
171
-
172
- ```shell
173
- docker build -t sd .
174
- ```
175
-
176
- #### Run
177
-
178
- ```shell
179
- docker run -v /path/to/models:/models -v /path/to/output/:/output sd [args...]
180
- # For example
181
- # docker run -v ./models:/models -v ./build:/output sd -m /models/sd-v1-4-ggml-model-f16.bin -p "a lovely cat" -v -o /output/output.png
182
- ```
183
-
184
- ## Memory/Disk Requirements
185
-
186
- | precision | f32 | f16 |q8_0 |q5_0 |q5_1 |q4_0 |q4_1 |
187
- | ---- | ---- |---- |---- |---- |---- |---- |---- |
188
- | **Disk** | 2.7G | 2.0G | 1.7G | 1.6G | 1.6G | 1.5G | 1.5G |
189
- | **Memory**(txt2img - 512 x 512) | ~2.8G | ~2.3G | ~2.1G | ~2.0G | ~2.0G | ~2.0G | ~2.0G |
190
-
191
-
192
- ## References
193
-
194
- - [ggml](https://github.com/ggerganov/ggml)
195
- - [stable-diffusion](https://github.com/CompVis/stable-diffusion)
196
- - [stable-diffusion-stability-ai](https://github.com/Stability-AI/stablediffusion)
197
- - [stable-diffusion-webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui)
198
- - [k-diffusion](https://github.com/crowsonkb/k-diffusion)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
stable-diffusion.cpp/assets/a lovely cat.png DELETED
Binary file (695 kB)
 
stable-diffusion.cpp/assets/f16.png DELETED
Binary file (695 kB)
 
stable-diffusion.cpp/assets/f32.png DELETED
Binary file (695 kB)
 
stable-diffusion.cpp/assets/img2img_output.png DELETED
Binary file (601 kB)
 
stable-diffusion.cpp/assets/q4_0.png DELETED
Binary file (665 kB)
 
stable-diffusion.cpp/assets/q4_1.png DELETED
Binary file (681 kB)
 
stable-diffusion.cpp/assets/q5_0.png DELETED
Binary file (682 kB)
 
stable-diffusion.cpp/assets/q5_1.png DELETED
Binary file (674 kB)
 
stable-diffusion.cpp/assets/q8_0.png DELETED
Binary file (674 kB)
 
stable-diffusion.cpp/examples/CMakeLists.txt DELETED
@@ -1,8 +0,0 @@
1
- # TODO: move into its own subdirectoy
2
- # TODO: make stb libs a target (maybe common)
3
- set(SD_TARGET sd)
4
-
5
- add_executable(${SD_TARGET} main.cpp stb_image.h stb_image_write.h)
6
- install(TARGETS ${SD_TARGET} RUNTIME)
7
- target_link_libraries(${SD_TARGET} PRIVATE stable-diffusion ${CMAKE_THREAD_LIBS_INIT})
8
- target_compile_features(${SD_TARGET} PUBLIC cxx_std_11)
 
 
 
 
 
 
 
 
 
stable-diffusion.cpp/examples/main.cpp DELETED
@@ -1,473 +0,0 @@
1
- #include <stdio.h>
2
- #include <ctime>
3
- #include <fstream>
4
- #include <iostream>
5
- #include <random>
6
- #include <string>
7
- #include <thread>
8
- #include <unordered_set>
9
-
10
- #include "stable-diffusion.h"
11
-
12
- #define STB_IMAGE_IMPLEMENTATION
13
- #include "stb_image.h"
14
-
15
- #define STB_IMAGE_WRITE_IMPLEMENTATION
16
- #define STB_IMAGE_WRITE_STATIC
17
- #include "stb_image_write.h"
18
-
19
- #if defined(__APPLE__) && defined(__MACH__)
20
- #include <sys/sysctl.h>
21
- #include <sys/types.h>
22
- #endif
23
-
24
- #if !defined(_WIN32)
25
- #include <sys/ioctl.h>
26
- #include <unistd.h>
27
- #endif
28
-
29
- #define TXT2IMG "txt2img"
30
- #define IMG2IMG "img2img"
31
-
32
- // get_num_physical_cores is copy from
33
- // https://github.com/ggerganov/llama.cpp/blob/master/examples/common.cpp
34
- // LICENSE: https://github.com/ggerganov/llama.cpp/blob/master/LICENSE
35
- int32_t get_num_physical_cores() {
36
- #ifdef __linux__
37
- // enumerate the set of thread siblings, num entries is num cores
38
- std::unordered_set<std::string> siblings;
39
- for (uint32_t cpu = 0; cpu < UINT32_MAX; ++cpu) {
40
- std::ifstream thread_siblings("/sys/devices/system/cpu" + std::to_string(cpu) + "/topology/thread_siblings");
41
- if (!thread_siblings.is_open()) {
42
- break; // no more cpus
43
- }
44
- std::string line;
45
- if (std::getline(thread_siblings, line)) {
46
- siblings.insert(line);
47
- }
48
- }
49
- if (siblings.size() > 0) {
50
- return static_cast<int32_t>(siblings.size());
51
- }
52
- #elif defined(__APPLE__) && defined(__MACH__)
53
- int32_t num_physical_cores;
54
- size_t len = sizeof(num_physical_cores);
55
- int result = sysctlbyname("hw.perflevel0.physicalcpu", &num_physical_cores, &len, NULL, 0);
56
- if (result == 0) {
57
- return num_physical_cores;
58
- }
59
- result = sysctlbyname("hw.physicalcpu", &num_physical_cores, &len, NULL, 0);
60
- if (result == 0) {
61
- return num_physical_cores;
62
- }
63
- #elif defined(_WIN32)
64
- // TODO: Implement
65
- #endif
66
- unsigned int n_threads = std::thread::hardware_concurrency();
67
- return n_threads > 0 ? (n_threads <= 4 ? n_threads : n_threads / 2) : 4;
68
- }
69
-
70
- const char* rng_type_to_str[] = {
71
- "std_default",
72
- "cuda",
73
- };
74
-
75
- // Names of the sampler method, same order as enum SampleMethod in stable-diffusion.h
76
- const char* sample_method_str[] = {
77
- "euler_a",
78
- "euler",
79
- "heun",
80
- "dpm2",
81
- "dpm++2s_a",
82
- "dpm++2m",
83
- "dpm++2mv2"};
84
-
85
- // Names of the sigma schedule overrides, same order as Schedule in stable-diffusion.h
86
- const char* schedule_str[] = {
87
- "default",
88
- "discrete",
89
- "karras"};
90
-
91
- struct Option {
92
- int n_threads = -1;
93
- std::string mode = TXT2IMG;
94
- std::string model_path;
95
- std::string output_path = "output.png";
96
- std::string init_img;
97
- std::string prompt;
98
- std::string negative_prompt;
99
- float cfg_scale = 7.0f;
100
- int w = 512;
101
- int h = 512;
102
- SampleMethod sample_method = EULER_A;
103
- Schedule schedule = DEFAULT;
104
- int sample_steps = 20;
105
- float strength = 0.75f;
106
- RNGType rng_type = CUDA_RNG;
107
- int64_t seed = 42;
108
- bool verbose = false;
109
-
110
- void print() {
111
- printf("Option: \n");
112
- printf(" n_threads: %d\n", n_threads);
113
- printf(" mode: %s\n", mode.c_str());
114
- printf(" model_path: %s\n", model_path.c_str());
115
- printf(" output_path: %s\n", output_path.c_str());
116
- printf(" init_img: %s\n", init_img.c_str());
117
- printf(" prompt: %s\n", prompt.c_str());
118
- printf(" negative_prompt: %s\n", negative_prompt.c_str());
119
- printf(" cfg_scale: %.2f\n", cfg_scale);
120
- printf(" width: %d\n", w);
121
- printf(" height: %d\n", h);
122
- printf(" sample_method: %s\n", sample_method_str[sample_method]);
123
- printf(" schedule: %s\n", schedule_str[schedule]);
124
- printf(" sample_steps: %d\n", sample_steps);
125
- printf(" strength: %.2f\n", strength);
126
- printf(" rng: %s\n", rng_type_to_str[rng_type]);
127
- printf(" seed: %ld\n", seed);
128
- }
129
- };
130
-
131
- void print_usage(int argc, const char* argv[]) {
132
- printf("usage: %s [arguments]\n", argv[0]);
133
- printf("\n");
134
- printf("arguments:\n");
135
- printf(" -h, --help show this help message and exit\n");
136
- printf(" -M, --mode [txt2img or img2img] generation mode (default: txt2img)\n");
137
- printf(" -t, --threads N number of threads to use during computation (default: -1).\n");
138
- printf(" If threads <= 0, then threads will be set to the number of CPU physical cores\n");
139
- printf(" -m, --model [MODEL] path to model\n");
140
- printf(" -i, --init-img [IMAGE] path to the input image, required by img2img\n");
141
- printf(" -o, --output OUTPUT path to write result image to (default: .\\output.png)\n");
142
- printf(" -p, --prompt [PROMPT] the prompt to render\n");
143
- printf(" -n, --negative-prompt PROMPT the negative prompt (default: \"\")\n");
144
- printf(" --cfg-scale SCALE unconditional guidance scale: (default: 7.0)\n");
145
- printf(" --strength STRENGTH strength for noising/unnoising (default: 0.75)\n");
146
- printf(" 1.0 corresponds to full destruction of information in init image\n");
147
- printf(" -H, --height H image height, in pixel space (default: 512)\n");
148
- printf(" -W, --width W image width, in pixel space (default: 512)\n");
149
- printf(" --sampling-method {euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2}\n");
150
- printf(" sampling method (default: \"euler_a\")\n");
151
- printf(" --steps STEPS number of sample steps (default: 20)\n");
152
- printf(" --rng {std_default, cuda} RNG (default: cuda)\n");
153
- printf(" -s SEED, --seed SEED RNG seed (default: 42, use random seed for < 0)\n");
154
- printf(" --schedule {discrete, karras} Denoiser sigma schedule (default: discrete)\n");
155
- printf(" -v, --verbose print extra info\n");
156
- }
157
-
158
- void parse_args(int argc, const char* argv[], Option* opt) {
159
- bool invalid_arg = false;
160
-
161
- for (int i = 1; i < argc; i++) {
162
- std::string arg = argv[i];
163
-
164
- if (arg == "-t" || arg == "--threads") {
165
- if (++i >= argc) {
166
- invalid_arg = true;
167
- break;
168
- }
169
- opt->n_threads = std::stoi(argv[i]);
170
- } else if (arg == "-M" || arg == "--mode") {
171
- if (++i >= argc) {
172
- invalid_arg = true;
173
- break;
174
- }
175
- opt->mode = argv[i];
176
-
177
- } else if (arg == "-m" || arg == "--model") {
178
- if (++i >= argc) {
179
- invalid_arg = true;
180
- break;
181
- }
182
- opt->model_path = argv[i];
183
- } else if (arg == "-i" || arg == "--init-img") {
184
- if (++i >= argc) {
185
- invalid_arg = true;
186
- break;
187
- }
188
- opt->init_img = argv[i];
189
- } else if (arg == "-o" || arg == "--output") {
190
- if (++i >= argc) {
191
- invalid_arg = true;
192
- break;
193
- }
194
- opt->output_path = argv[i];
195
- } else if (arg == "-p" || arg == "--prompt") {
196
- if (++i >= argc) {
197
- invalid_arg = true;
198
- break;
199
- }
200
- opt->prompt = argv[i];
201
- } else if (arg == "-n" || arg == "--negative-prompt") {
202
- if (++i >= argc) {
203
- invalid_arg = true;
204
- break;
205
- }
206
- opt->negative_prompt = argv[i];
207
- } else if (arg == "--cfg-scale") {
208
- if (++i >= argc) {
209
- invalid_arg = true;
210
- break;
211
- }
212
- opt->cfg_scale = std::stof(argv[i]);
213
- } else if (arg == "--strength") {
214
- if (++i >= argc) {
215
- invalid_arg = true;
216
- break;
217
- }
218
- opt->strength = std::stof(argv[i]);
219
- } else if (arg == "-H" || arg == "--height") {
220
- if (++i >= argc) {
221
- invalid_arg = true;
222
- break;
223
- }
224
- opt->h = std::stoi(argv[i]);
225
- } else if (arg == "-W" || arg == "--width") {
226
- if (++i >= argc) {
227
- invalid_arg = true;
228
- break;
229
- }
230
- opt->w = std::stoi(argv[i]);
231
- } else if (arg == "--steps") {
232
- if (++i >= argc) {
233
- invalid_arg = true;
234
- break;
235
- }
236
- opt->sample_steps = std::stoi(argv[i]);
237
- } else if (arg == "--rng") {
238
- if (++i >= argc) {
239
- invalid_arg = true;
240
- break;
241
- }
242
- std::string rng_type_str = argv[i];
243
- if (rng_type_str == "std_default") {
244
- opt->rng_type = STD_DEFAULT_RNG;
245
- } else if (rng_type_str == "cuda") {
246
- opt->rng_type = CUDA_RNG;
247
- } else {
248
- invalid_arg = true;
249
- break;
250
- }
251
- } else if (arg == "--schedule") {
252
- if (++i >= argc) {
253
- invalid_arg = true;
254
- break;
255
- }
256
- const char* schedule_selected = argv[i];
257
- int schedule_found = -1;
258
- for (int d = 0; d < N_SCHEDULES; d++) {
259
- if (!strcmp(schedule_selected, schedule_str[d])) {
260
- schedule_found = d;
261
- }
262
- }
263
- if (schedule_found == -1) {
264
- invalid_arg = true;
265
- break;
266
- }
267
- opt->schedule = (Schedule)schedule_found;
268
- } else if (arg == "-s" || arg == "--seed") {
269
- if (++i >= argc) {
270
- invalid_arg = true;
271
- break;
272
- }
273
- opt->seed = std::stoll(argv[i]);
274
- } else if (arg == "--sampling-method") {
275
- if (++i >= argc) {
276
- invalid_arg = true;
277
- break;
278
- }
279
- const char* sample_method_selected = argv[i];
280
- int sample_method_found = -1;
281
- for (int m = 0; m < N_SAMPLE_METHODS; m++) {
282
- if (!strcmp(sample_method_selected, sample_method_str[m])) {
283
- sample_method_found = m;
284
- }
285
- }
286
- if (sample_method_found == -1) {
287
- invalid_arg = true;
288
- break;
289
- }
290
- opt->sample_method = (SampleMethod)sample_method_found;
291
- } else if (arg == "-h" || arg == "--help") {
292
- print_usage(argc, argv);
293
- exit(0);
294
- } else if (arg == "-v" || arg == "--verbose") {
295
- opt->verbose = true;
296
- } else {
297
- fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
298
- print_usage(argc, argv);
299
- exit(1);
300
- }
301
- if (invalid_arg) {
302
- fprintf(stderr, "error: invalid parameter for argument: %s\n", arg.c_str());
303
- print_usage(argc, argv);
304
- exit(1);
305
- }
306
- }
307
-
308
- if (opt->n_threads <= 0) {
309
- opt->n_threads = get_num_physical_cores();
310
- }
311
-
312
- if (opt->mode != TXT2IMG && opt->mode != IMG2IMG) {
313
- fprintf(stderr, "error: invalid mode %s, must be one of ['%s', '%s']\n",
314
- opt->mode.c_str(), TXT2IMG, IMG2IMG);
315
- exit(1);
316
- }
317
-
318
- if (opt->prompt.length() == 0) {
319
- fprintf(stderr, "error: the following arguments are required: prompt\n");
320
- print_usage(argc, argv);
321
- exit(1);
322
- }
323
-
324
- if (opt->model_path.length() == 0) {
325
- fprintf(stderr, "error: the following arguments are required: model_path\n");
326
- print_usage(argc, argv);
327
- exit(1);
328
- }
329
-
330
- if (opt->mode == IMG2IMG && opt->init_img.length() == 0) {
331
- fprintf(stderr, "error: when using the img2img mode, the following arguments are required: init-img\n");
332
- print_usage(argc, argv);
333
- exit(1);
334
- }
335
-
336
- if (opt->output_path.length() == 0) {
337
- fprintf(stderr, "error: the following arguments are required: output_path\n");
338
- print_usage(argc, argv);
339
- exit(1);
340
- }
341
-
342
- if (opt->w <= 0 || opt->w % 64 != 0) {
343
- fprintf(stderr, "error: the width must be a multiple of 64\n");
344
- exit(1);
345
- }
346
-
347
- if (opt->h <= 0 || opt->h % 64 != 0) {
348
- fprintf(stderr, "error: the height must be a multiple of 64\n");
349
- exit(1);
350
- }
351
-
352
- if (opt->sample_steps <= 0) {
353
- fprintf(stderr, "error: the sample_steps must be greater than 0\n");
354
- exit(1);
355
- }
356
-
357
- if (opt->strength < 0.f || opt->strength > 1.f) {
358
- fprintf(stderr, "error: can only work with strength in [0.0, 1.0]\n");
359
- exit(1);
360
- }
361
-
362
- if (opt->seed < 0) {
363
- srand((int)time(NULL));
364
- opt->seed = rand();
365
- }
366
- }
367
-
368
- std::string basename(const std::string& path) {
369
- size_t pos = path.find_last_of('/');
370
- if (pos != std::string::npos) {
371
- return path.substr(pos + 1);
372
- }
373
- pos = path.find_last_of('\\');
374
- if (pos != std::string::npos) {
375
- return path.substr(pos + 1);
376
- }
377
- return path;
378
- }
379
-
380
- int main(int argc, const char* argv[]) {
381
- Option opt;
382
- parse_args(argc, argv, &opt);
383
-
384
- if (opt.verbose) {
385
- opt.print();
386
- printf("%s", sd_get_system_info().c_str());
387
- set_sd_log_level(SDLogLevel::DEBUG);
388
- }
389
-
390
- bool vae_decode_only = true;
391
- std::vector<uint8_t> init_img;
392
- if (opt.mode == IMG2IMG) {
393
- vae_decode_only = false;
394
-
395
- int c = 0;
396
- unsigned char* img_data = stbi_load(opt.init_img.c_str(), &opt.w, &opt.h, &c, 3);
397
- if (img_data == NULL) {
398
- fprintf(stderr, "load image from '%s' failed\n", opt.init_img.c_str());
399
- return 1;
400
- }
401
- if (c != 3) {
402
- fprintf(stderr, "input image must be a 3 channels RGB image, but got %d channels\n", c);
403
- free(img_data);
404
- return 1;
405
- }
406
- if (opt.w <= 0 || opt.w % 64 != 0) {
407
- fprintf(stderr, "error: the width of image must be a multiple of 64\n");
408
- free(img_data);
409
- return 1;
410
- }
411
- if (opt.h <= 0 || opt.h % 64 != 0) {
412
- fprintf(stderr, "error: the height of image must be a multiple of 64\n");
413
- free(img_data);
414
- return 1;
415
- }
416
- init_img.assign(img_data, img_data + (opt.w * opt.h * c));
417
- }
418
-
419
- StableDiffusion sd(opt.n_threads, vae_decode_only, true, opt.rng_type);
420
- if (!sd.load_from_file(opt.model_path, opt.schedule)) {
421
- return 1;
422
- }
423
-
424
- std::vector<uint8_t> img;
425
- if (opt.mode == TXT2IMG) {
426
- img = sd.txt2img(opt.prompt,
427
- opt.negative_prompt,
428
- opt.cfg_scale,
429
- opt.w,
430
- opt.h,
431
- opt.sample_method,
432
- opt.sample_steps,
433
- opt.seed);
434
- } else {
435
- img = sd.img2img(init_img,
436
- opt.prompt,
437
- opt.negative_prompt,
438
- opt.cfg_scale,
439
- opt.w,
440
- opt.h,
441
- opt.sample_method,
442
- opt.sample_steps,
443
- opt.strength,
444
- opt.seed);
445
- }
446
-
447
- if (img.size() == 0) {
448
- fprintf(stderr, "generate failed\n");
449
- return 1;
450
- }
451
-
452
- std::string parameter_string = opt.prompt + "\n";
453
- if (opt.negative_prompt.size() != 0) {
454
- parameter_string += "Negative prompt: " + opt.negative_prompt + "\n";
455
- }
456
- parameter_string += "Steps: " + std::to_string(opt.sample_steps) + ", ";
457
- parameter_string += "CFG scale: " + std::to_string(opt.cfg_scale) + ", ";
458
- parameter_string += "Seed: " + std::to_string(opt.seed) + ", ";
459
- parameter_string += "Size: " + std::to_string(opt.w) + "x" + std::to_string(opt.h) + ", ";
460
- parameter_string += "Model: " + basename(opt.model_path) + ", ";
461
- parameter_string += "RNG: " + std::string(rng_type_to_str[opt.rng_type]) + ", ";
462
- parameter_string += "Sampler: " + std::string(sample_method_str[opt.sample_method]);
463
- if (opt.schedule == KARRAS) {
464
- parameter_string += " karras";
465
- }
466
- parameter_string += ", ";
467
- parameter_string += "Version: stable-diffusion.cpp";
468
-
469
- stbi_write_png(opt.output_path.c_str(), opt.w, opt.h, 3, img.data(), 0, parameter_string.c_str());
470
- printf("save result image to '%s'\n", opt.output_path.c_str());
471
-
472
- return 0;
473
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
stable-diffusion.cpp/examples/stb_image.h DELETED
The diff for this file is too large to render. See raw diff
 
stable-diffusion.cpp/examples/stb_image_write.h DELETED
@@ -1,1741 +0,0 @@
1
- /* stb_image_write - v1.16 - public domain - http://nothings.org/stb
2
- writes out PNG/BMP/TGA/JPEG/HDR images to C stdio - Sean Barrett 2010-2015
3
- no warranty implied; use at your own risk
4
-
5
- Before #including,
6
-
7
- #define STB_IMAGE_WRITE_IMPLEMENTATION
8
-
9
- in the file that you want to have the implementation.
10
-
11
- Will probably not work correctly with strict-aliasing optimizations.
12
-
13
- ABOUT:
14
-
15
- This header file is a library for writing images to C stdio or a callback.
16
-
17
- The PNG output is not optimal; it is 20-50% larger than the file
18
- written by a decent optimizing implementation; though providing a custom
19
- zlib compress function (see STBIW_ZLIB_COMPRESS) can mitigate that.
20
- This library is designed for source code compactness and simplicity,
21
- not optimal image file size or run-time performance.
22
-
23
- BUILDING:
24
-
25
- You can #define STBIW_ASSERT(x) before the #include to avoid using assert.h.
26
- You can #define STBIW_MALLOC(), STBIW_REALLOC(), and STBIW_FREE() to replace
27
- malloc,realloc,free.
28
- You can #define STBIW_MEMMOVE() to replace memmove()
29
- You can #define STBIW_ZLIB_COMPRESS to use a custom zlib-style compress function
30
- for PNG compression (instead of the builtin one), it must have the following signature:
31
- unsigned char * my_compress(unsigned char *data, int data_len, int *out_len, int quality);
32
- The returned data will be freed with STBIW_FREE() (free() by default),
33
- so it must be heap allocated with STBIW_MALLOC() (malloc() by default),
34
-
35
- UNICODE:
36
-
37
- If compiling for Windows and you wish to use Unicode filenames, compile
38
- with
39
- #define STBIW_WINDOWS_UTF8
40
- and pass utf8-encoded filenames. Call stbiw_convert_wchar_to_utf8 to convert
41
- Windows wchar_t filenames to utf8.
42
-
43
- USAGE:
44
-
45
- There are five functions, one for each image file format:
46
-
47
- int stbi_write_png(char const *filename, int w, int h, int comp, const void *data, int stride_in_bytes);
48
- int stbi_write_bmp(char const *filename, int w, int h, int comp, const void *data);
49
- int stbi_write_tga(char const *filename, int w, int h, int comp, const void *data);
50
- int stbi_write_jpg(char const *filename, int w, int h, int comp, const void *data, int quality);
51
- int stbi_write_hdr(char const *filename, int w, int h, int comp, const float *data);
52
-
53
- void stbi_flip_vertically_on_write(int flag); // flag is non-zero to flip data vertically
54
-
55
- There are also five equivalent functions that use an arbitrary write function. You are
56
- expected to open/close your file-equivalent before and after calling these:
57
-
58
- int stbi_write_png_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data, int stride_in_bytes);
59
- int stbi_write_bmp_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data);
60
- int stbi_write_tga_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data);
61
- int stbi_write_hdr_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const float *data);
62
- int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int quality);
63
-
64
- where the callback is:
65
- void stbi_write_func(void *context, void *data, int size);
66
-
67
- You can configure it with these global variables:
68
- int stbi_write_tga_with_rle; // defaults to true; set to 0 to disable RLE
69
- int stbi_write_png_compression_level; // defaults to 8; set to higher for more compression
70
- int stbi_write_force_png_filter; // defaults to -1; set to 0..5 to force a filter mode
71
-
72
-
73
- You can define STBI_WRITE_NO_STDIO to disable the file variant of these
74
- functions, so the library will not use stdio.h at all. However, this will
75
- also disable HDR writing, because it requires stdio for formatted output.
76
-
77
- Each function returns 0 on failure and non-0 on success.
78
-
79
- The functions create an image file defined by the parameters. The image
80
- is a rectangle of pixels stored from left-to-right, top-to-bottom.
81
- Each pixel contains 'comp' channels of data stored interleaved with 8-bits
82
- per channel, in the following order: 1=Y, 2=YA, 3=RGB, 4=RGBA. (Y is
83
- monochrome color.) The rectangle is 'w' pixels wide and 'h' pixels tall.
84
- The *data pointer points to the first byte of the top-left-most pixel.
85
- For PNG, "stride_in_bytes" is the distance in bytes from the first byte of
86
- a row of pixels to the first byte of the next row of pixels.
87
-
88
- PNG creates output files with the same number of components as the input.
89
- The BMP format expands Y to RGB in the file format and does not
90
- output alpha.
91
-
92
- PNG supports writing rectangles of data even when the bytes storing rows of
93
- data are not consecutive in memory (e.g. sub-rectangles of a larger image),
94
- by supplying the stride between the beginning of adjacent rows. The other
95
- formats do not. (Thus you cannot write a native-format BMP through the BMP
96
- writer, both because it is in BGR order and because it may have padding
97
- at the end of the line.)
98
-
99
- PNG allows you to set the deflate compression level by setting the global
100
- variable 'stbi_write_png_compression_level' (it defaults to 8).
101
-
102
- HDR expects linear float data. Since the format is always 32-bit rgb(e)
103
- data, alpha (if provided) is discarded, and for monochrome data it is
104
- replicated across all three channels.
105
-
106
- TGA supports RLE or non-RLE compressed data. To use non-RLE-compressed
107
- data, set the global variable 'stbi_write_tga_with_rle' to 0.
108
-
109
- JPEG does ignore alpha channels in input data; quality is between 1 and 100.
110
- Higher quality looks better but results in a bigger image.
111
- JPEG baseline (no JPEG progressive).
112
-
113
- CREDITS:
114
-
115
-
116
- Sean Barrett - PNG/BMP/TGA
117
- Baldur Karlsson - HDR
118
- Jean-Sebastien Guay - TGA monochrome
119
- Tim Kelsey - misc enhancements
120
- Alan Hickman - TGA RLE
121
- Emmanuel Julien - initial file IO callback implementation
122
- Jon Olick - original jo_jpeg.cpp code
123
- Daniel Gibson - integrate JPEG, allow external zlib
124
- Aarni Koskela - allow choosing PNG filter
125
-
126
- bugfixes:
127
- github:Chribba
128
- Guillaume Chereau
129
- github:jry2
130
- github:romigrou
131
- Sergio Gonzalez
132
- Jonas Karlsson
133
- Filip Wasil
134
- Thatcher Ulrich
135
- github:poppolopoppo
136
- Patrick Boettcher
137
- github:xeekworx
138
- Cap Petschulat
139
- Simon Rodriguez
140
- Ivan Tikhonov
141
- github:ignotion
142
- Adam Schackart
143
- Andrew Kensler
144
-
145
- LICENSE
146
-
147
- See end of file for license information.
148
-
149
- */
150
-
151
- #ifndef INCLUDE_STB_IMAGE_WRITE_H
152
- #define INCLUDE_STB_IMAGE_WRITE_H
153
-
154
- #include <stdlib.h>
155
-
156
- // if STB_IMAGE_WRITE_STATIC causes problems, try defining STBIWDEF to 'inline' or 'static inline'
157
- #ifndef STBIWDEF
158
- #ifdef STB_IMAGE_WRITE_STATIC
159
- #define STBIWDEF static
160
- #else
161
- #ifdef __cplusplus
162
- #define STBIWDEF extern "C"
163
- #else
164
- #define STBIWDEF extern
165
- #endif
166
- #endif
167
- #endif
168
-
169
- #ifndef STB_IMAGE_WRITE_STATIC // C++ forbids static forward declarations
170
- STBIWDEF int stbi_write_tga_with_rle;
171
- STBIWDEF int stbi_write_png_compression_level;
172
- STBIWDEF int stbi_write_force_png_filter;
173
- #endif
174
-
175
- #ifndef STBI_WRITE_NO_STDIO
176
- STBIWDEF int stbi_write_png(char const *filename, int w, int h, int comp, const void *data, int stride_in_bytes, const char* parameters = NULL);
177
- STBIWDEF int stbi_write_bmp(char const *filename, int w, int h, int comp, const void *data);
178
- STBIWDEF int stbi_write_tga(char const *filename, int w, int h, int comp, const void *data);
179
- STBIWDEF int stbi_write_hdr(char const *filename, int w, int h, int comp, const float *data);
180
- STBIWDEF int stbi_write_jpg(char const *filename, int x, int y, int comp, const void *data, int quality);
181
-
182
- #ifdef STBIW_WINDOWS_UTF8
183
- STBIWDEF int stbiw_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input);
184
- #endif
185
- #endif
186
-
187
- typedef void stbi_write_func(void *context, void *data, int size);
188
-
189
- STBIWDEF int stbi_write_png_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data, int stride_in_bytes);
190
- STBIWDEF int stbi_write_bmp_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data);
191
- STBIWDEF int stbi_write_tga_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data);
192
- STBIWDEF int stbi_write_hdr_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const float *data);
193
- STBIWDEF int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int quality);
194
-
195
- STBIWDEF void stbi_flip_vertically_on_write(int flip_boolean);
196
-
197
- #endif//INCLUDE_STB_IMAGE_WRITE_H
198
-
199
- #ifdef STB_IMAGE_WRITE_IMPLEMENTATION
200
-
201
- #ifdef _WIN32
202
- #ifndef _CRT_SECURE_NO_WARNINGS
203
- #define _CRT_SECURE_NO_WARNINGS
204
- #endif
205
- #ifndef _CRT_NONSTDC_NO_DEPRECATE
206
- #define _CRT_NONSTDC_NO_DEPRECATE
207
- #endif
208
- #endif
209
-
210
- #ifndef STBI_WRITE_NO_STDIO
211
- #include <stdio.h>
212
- #endif // STBI_WRITE_NO_STDIO
213
-
214
- #include <stdarg.h>
215
- #include <stdlib.h>
216
- #include <string.h>
217
- #include <math.h>
218
-
219
- #if defined(STBIW_MALLOC) && defined(STBIW_FREE) && (defined(STBIW_REALLOC) || defined(STBIW_REALLOC_SIZED))
220
- // ok
221
- #elif !defined(STBIW_MALLOC) && !defined(STBIW_FREE) && !defined(STBIW_REALLOC) && !defined(STBIW_REALLOC_SIZED)
222
- // ok
223
- #else
224
- #error "Must define all or none of STBIW_MALLOC, STBIW_FREE, and STBIW_REALLOC (or STBIW_REALLOC_SIZED)."
225
- #endif
226
-
227
- #ifndef STBIW_MALLOC
228
- #define STBIW_MALLOC(sz) malloc(sz)
229
- #define STBIW_REALLOC(p,newsz) realloc(p,newsz)
230
- #define STBIW_FREE(p) free(p)
231
- #endif
232
-
233
- #ifndef STBIW_REALLOC_SIZED
234
- #define STBIW_REALLOC_SIZED(p,oldsz,newsz) STBIW_REALLOC(p,newsz)
235
- #endif
236
-
237
-
238
- #ifndef STBIW_MEMMOVE
239
- #define STBIW_MEMMOVE(a,b,sz) memmove(a,b,sz)
240
- #endif
241
-
242
-
243
- #ifndef STBIW_ASSERT
244
- #include <assert.h>
245
- #define STBIW_ASSERT(x) assert(x)
246
- #endif
247
-
248
- #define STBIW_UCHAR(x) (unsigned char) ((x) & 0xff)
249
-
250
- #ifdef STB_IMAGE_WRITE_STATIC
251
- static int stbi_write_png_compression_level = 8;
252
- static int stbi_write_tga_with_rle = 1;
253
- static int stbi_write_force_png_filter = -1;
254
- #else
255
- int stbi_write_png_compression_level = 8;
256
- int stbi_write_tga_with_rle = 1;
257
- int stbi_write_force_png_filter = -1;
258
- #endif
259
-
260
- static int stbi__flip_vertically_on_write = 0;
261
-
262
- STBIWDEF void stbi_flip_vertically_on_write(int flag)
263
- {
264
- stbi__flip_vertically_on_write = flag;
265
- }
266
-
267
- typedef struct
268
- {
269
- stbi_write_func *func;
270
- void *context;
271
- unsigned char buffer[64];
272
- int buf_used;
273
- } stbi__write_context;
274
-
275
- // initialize a callback-based context
276
- static void stbi__start_write_callbacks(stbi__write_context *s, stbi_write_func *c, void *context)
277
- {
278
- s->func = c;
279
- s->context = context;
280
- }
281
-
282
- #ifndef STBI_WRITE_NO_STDIO
283
-
284
- static void stbi__stdio_write(void *context, void *data, int size)
285
- {
286
- fwrite(data,1,size,(FILE*) context);
287
- }
288
-
289
- #if defined(_WIN32) && defined(STBIW_WINDOWS_UTF8)
290
- #ifdef __cplusplus
291
- #define STBIW_EXTERN extern "C"
292
- #else
293
- #define STBIW_EXTERN extern
294
- #endif
295
- STBIW_EXTERN __declspec(dllimport) int __stdcall MultiByteToWideChar(unsigned int cp, unsigned long flags, const char *str, int cbmb, wchar_t *widestr, int cchwide);
296
- STBIW_EXTERN __declspec(dllimport) int __stdcall WideCharToMultiByte(unsigned int cp, unsigned long flags, const wchar_t *widestr, int cchwide, char *str, int cbmb, const char *defchar, int *used_default);
297
-
298
- STBIWDEF int stbiw_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input)
299
- {
300
- return WideCharToMultiByte(65001 /* UTF8 */, 0, input, -1, buffer, (int) bufferlen, NULL, NULL);
301
- }
302
- #endif
303
-
304
- static FILE *stbiw__fopen(char const *filename, char const *mode)
305
- {
306
- FILE *f;
307
- #if defined(_WIN32) && defined(STBIW_WINDOWS_UTF8)
308
- wchar_t wMode[64];
309
- wchar_t wFilename[1024];
310
- if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, filename, -1, wFilename, sizeof(wFilename)/sizeof(*wFilename)))
311
- return 0;
312
-
313
- if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, mode, -1, wMode, sizeof(wMode)/sizeof(*wMode)))
314
- return 0;
315
-
316
- #if defined(_MSC_VER) && _MSC_VER >= 1400
317
- if (0 != _wfopen_s(&f, wFilename, wMode))
318
- f = 0;
319
- #else
320
- f = _wfopen(wFilename, wMode);
321
- #endif
322
-
323
- #elif defined(_MSC_VER) && _MSC_VER >= 1400
324
- if (0 != fopen_s(&f, filename, mode))
325
- f=0;
326
- #else
327
- f = fopen(filename, mode);
328
- #endif
329
- return f;
330
- }
331
-
332
- static int stbi__start_write_file(stbi__write_context *s, const char *filename)
333
- {
334
- FILE *f = stbiw__fopen(filename, "wb");
335
- stbi__start_write_callbacks(s, stbi__stdio_write, (void *) f);
336
- return f != NULL;
337
- }
338
-
339
- static void stbi__end_write_file(stbi__write_context *s)
340
- {
341
- fclose((FILE *)s->context);
342
- }
343
-
344
- #endif // !STBI_WRITE_NO_STDIO
345
-
346
- typedef unsigned int stbiw_uint32;
347
- typedef int stb_image_write_test[sizeof(stbiw_uint32)==4 ? 1 : -1];
348
-
349
- static void stbiw__writefv(stbi__write_context *s, const char *fmt, va_list v)
350
- {
351
- while (*fmt) {
352
- switch (*fmt++) {
353
- case ' ': break;
354
- case '1': { unsigned char x = STBIW_UCHAR(va_arg(v, int));
355
- s->func(s->context,&x,1);
356
- break; }
357
- case '2': { int x = va_arg(v,int);
358
- unsigned char b[2];
359
- b[0] = STBIW_UCHAR(x);
360
- b[1] = STBIW_UCHAR(x>>8);
361
- s->func(s->context,b,2);
362
- break; }
363
- case '4': { stbiw_uint32 x = va_arg(v,int);
364
- unsigned char b[4];
365
- b[0]=STBIW_UCHAR(x);
366
- b[1]=STBIW_UCHAR(x>>8);
367
- b[2]=STBIW_UCHAR(x>>16);
368
- b[3]=STBIW_UCHAR(x>>24);
369
- s->func(s->context,b,4);
370
- break; }
371
- default:
372
- STBIW_ASSERT(0);
373
- return;
374
- }
375
- }
376
- }
377
-
378
- static void stbiw__writef(stbi__write_context *s, const char *fmt, ...)
379
- {
380
- va_list v;
381
- va_start(v, fmt);
382
- stbiw__writefv(s, fmt, v);
383
- va_end(v);
384
- }
385
-
386
- static void stbiw__write_flush(stbi__write_context *s)
387
- {
388
- if (s->buf_used) {
389
- s->func(s->context, &s->buffer, s->buf_used);
390
- s->buf_used = 0;
391
- }
392
- }
393
-
394
- static void stbiw__putc(stbi__write_context *s, unsigned char c)
395
- {
396
- s->func(s->context, &c, 1);
397
- }
398
-
399
- static void stbiw__write1(stbi__write_context *s, unsigned char a)
400
- {
401
- if ((size_t)s->buf_used + 1 > sizeof(s->buffer))
402
- stbiw__write_flush(s);
403
- s->buffer[s->buf_used++] = a;
404
- }
405
-
406
- static void stbiw__write3(stbi__write_context *s, unsigned char a, unsigned char b, unsigned char c)
407
- {
408
- int n;
409
- if ((size_t)s->buf_used + 3 > sizeof(s->buffer))
410
- stbiw__write_flush(s);
411
- n = s->buf_used;
412
- s->buf_used = n+3;
413
- s->buffer[n+0] = a;
414
- s->buffer[n+1] = b;
415
- s->buffer[n+2] = c;
416
- }
417
-
418
- static void stbiw__write_pixel(stbi__write_context *s, int rgb_dir, int comp, int write_alpha, int expand_mono, unsigned char *d)
419
- {
420
- unsigned char bg[3] = { 255, 0, 255}, px[3];
421
- int k;
422
-
423
- if (write_alpha < 0)
424
- stbiw__write1(s, d[comp - 1]);
425
-
426
- switch (comp) {
427
- case 2: // 2 pixels = mono + alpha, alpha is written separately, so same as 1-channel case
428
- case 1:
429
- if (expand_mono)
430
- stbiw__write3(s, d[0], d[0], d[0]); // monochrome bmp
431
- else
432
- stbiw__write1(s, d[0]); // monochrome TGA
433
- break;
434
- case 4:
435
- if (!write_alpha) {
436
- // composite against pink background
437
- for (k = 0; k < 3; ++k)
438
- px[k] = bg[k] + ((d[k] - bg[k]) * d[3]) / 255;
439
- stbiw__write3(s, px[1 - rgb_dir], px[1], px[1 + rgb_dir]);
440
- break;
441
- }
442
- /* FALLTHROUGH */
443
- case 3:
444
- stbiw__write3(s, d[1 - rgb_dir], d[1], d[1 + rgb_dir]);
445
- break;
446
- }
447
- if (write_alpha > 0)
448
- stbiw__write1(s, d[comp - 1]);
449
- }
450
-
451
- static void stbiw__write_pixels(stbi__write_context *s, int rgb_dir, int vdir, int x, int y, int comp, void *data, int write_alpha, int scanline_pad, int expand_mono)
452
- {
453
- stbiw_uint32 zero = 0;
454
- int i,j, j_end;
455
-
456
- if (y <= 0)
457
- return;
458
-
459
- if (stbi__flip_vertically_on_write)
460
- vdir *= -1;
461
-
462
- if (vdir < 0) {
463
- j_end = -1; j = y-1;
464
- } else {
465
- j_end = y; j = 0;
466
- }
467
-
468
- for (; j != j_end; j += vdir) {
469
- for (i=0; i < x; ++i) {
470
- unsigned char *d = (unsigned char *) data + (j*x+i)*comp;
471
- stbiw__write_pixel(s, rgb_dir, comp, write_alpha, expand_mono, d);
472
- }
473
- stbiw__write_flush(s);
474
- s->func(s->context, &zero, scanline_pad);
475
- }
476
- }
477
-
478
- static int stbiw__outfile(stbi__write_context *s, int rgb_dir, int vdir, int x, int y, int comp, int expand_mono, void *data, int alpha, int pad, const char *fmt, ...)
479
- {
480
- if (y < 0 || x < 0) {
481
- return 0;
482
- } else {
483
- va_list v;
484
- va_start(v, fmt);
485
- stbiw__writefv(s, fmt, v);
486
- va_end(v);
487
- stbiw__write_pixels(s,rgb_dir,vdir,x,y,comp,data,alpha,pad, expand_mono);
488
- return 1;
489
- }
490
- }
491
-
492
- static int stbi_write_bmp_core(stbi__write_context *s, int x, int y, int comp, const void *data)
493
- {
494
- if (comp != 4) {
495
- // write RGB bitmap
496
- int pad = (-x*3) & 3;
497
- return stbiw__outfile(s,-1,-1,x,y,comp,1,(void *) data,0,pad,
498
- "11 4 22 4" "4 44 22 444444",
499
- 'B', 'M', 14+40+(x*3+pad)*y, 0,0, 14+40, // file header
500
- 40, x,y, 1,24, 0,0,0,0,0,0); // bitmap header
501
- } else {
502
- // RGBA bitmaps need a v4 header
503
- // use BI_BITFIELDS mode with 32bpp and alpha mask
504
- // (straight BI_RGB with alpha mask doesn't work in most readers)
505
- return stbiw__outfile(s,-1,-1,x,y,comp,1,(void *)data,1,0,
506
- "11 4 22 4" "4 44 22 444444 4444 4 444 444 444 444",
507
- 'B', 'M', 14+108+x*y*4, 0, 0, 14+108, // file header
508
- 108, x,y, 1,32, 3,0,0,0,0,0, 0xff0000,0xff00,0xff,0xff000000u, 0, 0,0,0, 0,0,0, 0,0,0, 0,0,0); // bitmap V4 header
509
- }
510
- }
511
-
512
- STBIWDEF int stbi_write_bmp_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data)
513
- {
514
- stbi__write_context s = { 0 };
515
- stbi__start_write_callbacks(&s, func, context);
516
- return stbi_write_bmp_core(&s, x, y, comp, data);
517
- }
518
-
519
- #ifndef STBI_WRITE_NO_STDIO
520
- STBIWDEF int stbi_write_bmp(char const *filename, int x, int y, int comp, const void *data)
521
- {
522
- stbi__write_context s = { 0 };
523
- if (stbi__start_write_file(&s,filename)) {
524
- int r = stbi_write_bmp_core(&s, x, y, comp, data);
525
- stbi__end_write_file(&s);
526
- return r;
527
- } else
528
- return 0;
529
- }
530
- #endif //!STBI_WRITE_NO_STDIO
531
-
532
- static int stbi_write_tga_core(stbi__write_context *s, int x, int y, int comp, void *data)
533
- {
534
- int has_alpha = (comp == 2 || comp == 4);
535
- int colorbytes = has_alpha ? comp-1 : comp;
536
- int format = colorbytes < 2 ? 3 : 2; // 3 color channels (RGB/RGBA) = 2, 1 color channel (Y/YA) = 3
537
-
538
- if (y < 0 || x < 0)
539
- return 0;
540
-
541
- if (!stbi_write_tga_with_rle) {
542
- return stbiw__outfile(s, -1, -1, x, y, comp, 0, (void *) data, has_alpha, 0,
543
- "111 221 2222 11", 0, 0, format, 0, 0, 0, 0, 0, x, y, (colorbytes + has_alpha) * 8, has_alpha * 8);
544
- } else {
545
- int i,j,k;
546
- int jend, jdir;
547
-
548
- stbiw__writef(s, "111 221 2222 11", 0,0,format+8, 0,0,0, 0,0,x,y, (colorbytes + has_alpha) * 8, has_alpha * 8);
549
-
550
- if (stbi__flip_vertically_on_write) {
551
- j = 0;
552
- jend = y;
553
- jdir = 1;
554
- } else {
555
- j = y-1;
556
- jend = -1;
557
- jdir = -1;
558
- }
559
- for (; j != jend; j += jdir) {
560
- unsigned char *row = (unsigned char *) data + j * x * comp;
561
- int len;
562
-
563
- for (i = 0; i < x; i += len) {
564
- unsigned char *begin = row + i * comp;
565
- int diff = 1;
566
- len = 1;
567
-
568
- if (i < x - 1) {
569
- ++len;
570
- diff = memcmp(begin, row + (i + 1) * comp, comp);
571
- if (diff) {
572
- const unsigned char *prev = begin;
573
- for (k = i + 2; k < x && len < 128; ++k) {
574
- if (memcmp(prev, row + k * comp, comp)) {
575
- prev += comp;
576
- ++len;
577
- } else {
578
- --len;
579
- break;
580
- }
581
- }
582
- } else {
583
- for (k = i + 2; k < x && len < 128; ++k) {
584
- if (!memcmp(begin, row + k * comp, comp)) {
585
- ++len;
586
- } else {
587
- break;
588
- }
589
- }
590
- }
591
- }
592
-
593
- if (diff) {
594
- unsigned char header = STBIW_UCHAR(len - 1);
595
- stbiw__write1(s, header);
596
- for (k = 0; k < len; ++k) {
597
- stbiw__write_pixel(s, -1, comp, has_alpha, 0, begin + k * comp);
598
- }
599
- } else {
600
- unsigned char header = STBIW_UCHAR(len - 129);
601
- stbiw__write1(s, header);
602
- stbiw__write_pixel(s, -1, comp, has_alpha, 0, begin);
603
- }
604
- }
605
- }
606
- stbiw__write_flush(s);
607
- }
608
- return 1;
609
- }
610
-
611
- STBIWDEF int stbi_write_tga_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data)
612
- {
613
- stbi__write_context s = { 0 };
614
- stbi__start_write_callbacks(&s, func, context);
615
- return stbi_write_tga_core(&s, x, y, comp, (void *) data);
616
- }
617
-
618
- #ifndef STBI_WRITE_NO_STDIO
619
- STBIWDEF int stbi_write_tga(char const *filename, int x, int y, int comp, const void *data)
620
- {
621
- stbi__write_context s = { 0 };
622
- if (stbi__start_write_file(&s,filename)) {
623
- int r = stbi_write_tga_core(&s, x, y, comp, (void *) data);
624
- stbi__end_write_file(&s);
625
- return r;
626
- } else
627
- return 0;
628
- }
629
- #endif
630
-
631
- // *************************************************************************************************
632
- // Radiance RGBE HDR writer
633
- // by Baldur Karlsson
634
-
635
- #define stbiw__max(a, b) ((a) > (b) ? (a) : (b))
636
-
637
- #ifndef STBI_WRITE_NO_STDIO
638
-
639
- static void stbiw__linear_to_rgbe(unsigned char *rgbe, float *linear)
640
- {
641
- int exponent;
642
- float maxcomp = stbiw__max(linear[0], stbiw__max(linear[1], linear[2]));
643
-
644
- if (maxcomp < 1e-32f) {
645
- rgbe[0] = rgbe[1] = rgbe[2] = rgbe[3] = 0;
646
- } else {
647
- float normalize = (float) frexp(maxcomp, &exponent) * 256.0f/maxcomp;
648
-
649
- rgbe[0] = (unsigned char)(linear[0] * normalize);
650
- rgbe[1] = (unsigned char)(linear[1] * normalize);
651
- rgbe[2] = (unsigned char)(linear[2] * normalize);
652
- rgbe[3] = (unsigned char)(exponent + 128);
653
- }
654
- }
655
-
656
- static void stbiw__write_run_data(stbi__write_context *s, int length, unsigned char databyte)
657
- {
658
- unsigned char lengthbyte = STBIW_UCHAR(length+128);
659
- STBIW_ASSERT(length+128 <= 255);
660
- s->func(s->context, &lengthbyte, 1);
661
- s->func(s->context, &databyte, 1);
662
- }
663
-
664
- static void stbiw__write_dump_data(stbi__write_context *s, int length, unsigned char *data)
665
- {
666
- unsigned char lengthbyte = STBIW_UCHAR(length);
667
- STBIW_ASSERT(length <= 128); // inconsistent with spec but consistent with official code
668
- s->func(s->context, &lengthbyte, 1);
669
- s->func(s->context, data, length);
670
- }
671
-
672
- static void stbiw__write_hdr_scanline(stbi__write_context *s, int width, int ncomp, unsigned char *scratch, float *scanline)
673
- {
674
- unsigned char scanlineheader[4] = { 2, 2, 0, 0 };
675
- unsigned char rgbe[4];
676
- float linear[3];
677
- int x;
678
-
679
- scanlineheader[2] = (width&0xff00)>>8;
680
- scanlineheader[3] = (width&0x00ff);
681
-
682
- /* skip RLE for images too small or large */
683
- if (width < 8 || width >= 32768) {
684
- for (x=0; x < width; x++) {
685
- switch (ncomp) {
686
- case 4: /* fallthrough */
687
- case 3: linear[2] = scanline[x*ncomp + 2];
688
- linear[1] = scanline[x*ncomp + 1];
689
- linear[0] = scanline[x*ncomp + 0];
690
- break;
691
- default:
692
- linear[0] = linear[1] = linear[2] = scanline[x*ncomp + 0];
693
- break;
694
- }
695
- stbiw__linear_to_rgbe(rgbe, linear);
696
- s->func(s->context, rgbe, 4);
697
- }
698
- } else {
699
- int c,r;
700
- /* encode into scratch buffer */
701
- for (x=0; x < width; x++) {
702
- switch(ncomp) {
703
- case 4: /* fallthrough */
704
- case 3: linear[2] = scanline[x*ncomp + 2];
705
- linear[1] = scanline[x*ncomp + 1];
706
- linear[0] = scanline[x*ncomp + 0];
707
- break;
708
- default:
709
- linear[0] = linear[1] = linear[2] = scanline[x*ncomp + 0];
710
- break;
711
- }
712
- stbiw__linear_to_rgbe(rgbe, linear);
713
- scratch[x + width*0] = rgbe[0];
714
- scratch[x + width*1] = rgbe[1];
715
- scratch[x + width*2] = rgbe[2];
716
- scratch[x + width*3] = rgbe[3];
717
- }
718
-
719
- s->func(s->context, scanlineheader, 4);
720
-
721
- /* RLE each component separately */
722
- for (c=0; c < 4; c++) {
723
- unsigned char *comp = &scratch[width*c];
724
-
725
- x = 0;
726
- while (x < width) {
727
- // find first run
728
- r = x;
729
- while (r+2 < width) {
730
- if (comp[r] == comp[r+1] && comp[r] == comp[r+2])
731
- break;
732
- ++r;
733
- }
734
- if (r+2 >= width)
735
- r = width;
736
- // dump up to first run
737
- while (x < r) {
738
- int len = r-x;
739
- if (len > 128) len = 128;
740
- stbiw__write_dump_data(s, len, &comp[x]);
741
- x += len;
742
- }
743
- // if there's a run, output it
744
- if (r+2 < width) { // same test as what we break out of in search loop, so only true if we break'd
745
- // find next byte after run
746
- while (r < width && comp[r] == comp[x])
747
- ++r;
748
- // output run up to r
749
- while (x < r) {
750
- int len = r-x;
751
- if (len > 127) len = 127;
752
- stbiw__write_run_data(s, len, comp[x]);
753
- x += len;
754
- }
755
- }
756
- }
757
- }
758
- }
759
- }
760
-
761
- static int stbi_write_hdr_core(stbi__write_context *s, int x, int y, int comp, float *data)
762
- {
763
- if (y <= 0 || x <= 0 || data == NULL)
764
- return 0;
765
- else {
766
- // Each component is stored separately. Allocate scratch space for full output scanline.
767
- unsigned char *scratch = (unsigned char *) STBIW_MALLOC(x*4);
768
- int i, len;
769
- char buffer[128];
770
- char header[] = "#?RADIANCE\n# Written by stb_image_write.h\nFORMAT=32-bit_rle_rgbe\n";
771
- s->func(s->context, header, sizeof(header)-1);
772
-
773
- #ifdef __STDC_LIB_EXT1__
774
- len = sprintf_s(buffer, sizeof(buffer), "EXPOSURE= 1.0000000000000\n\n-Y %d +X %d\n", y, x);
775
- #else
776
- len = sprintf(buffer, "EXPOSURE= 1.0000000000000\n\n-Y %d +X %d\n", y, x);
777
- #endif
778
- s->func(s->context, buffer, len);
779
-
780
- for(i=0; i < y; i++)
781
- stbiw__write_hdr_scanline(s, x, comp, scratch, data + comp*x*(stbi__flip_vertically_on_write ? y-1-i : i));
782
- STBIW_FREE(scratch);
783
- return 1;
784
- }
785
- }
786
-
787
- STBIWDEF int stbi_write_hdr_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const float *data)
788
- {
789
- stbi__write_context s = { 0 };
790
- stbi__start_write_callbacks(&s, func, context);
791
- return stbi_write_hdr_core(&s, x, y, comp, (float *) data);
792
- }
793
-
794
- STBIWDEF int stbi_write_hdr(char const *filename, int x, int y, int comp, const float *data)
795
- {
796
- stbi__write_context s = { 0 };
797
- if (stbi__start_write_file(&s,filename)) {
798
- int r = stbi_write_hdr_core(&s, x, y, comp, (float *) data);
799
- stbi__end_write_file(&s);
800
- return r;
801
- } else
802
- return 0;
803
- }
804
- #endif // STBI_WRITE_NO_STDIO
805
-
806
-
807
- //////////////////////////////////////////////////////////////////////////////
808
- //
809
- // PNG writer
810
- //
811
-
812
- #ifndef STBIW_ZLIB_COMPRESS
813
- // stretchy buffer; stbiw__sbpush() == vector<>::push_back() -- stbiw__sbcount() == vector<>::size()
814
- #define stbiw__sbraw(a) ((int *) (void *) (a) - 2)
815
- #define stbiw__sbm(a) stbiw__sbraw(a)[0]
816
- #define stbiw__sbn(a) stbiw__sbraw(a)[1]
817
-
818
- #define stbiw__sbneedgrow(a,n) ((a)==0 || stbiw__sbn(a)+n >= stbiw__sbm(a))
819
- #define stbiw__sbmaybegrow(a,n) (stbiw__sbneedgrow(a,(n)) ? stbiw__sbgrow(a,n) : 0)
820
- #define stbiw__sbgrow(a,n) stbiw__sbgrowf((void **) &(a), (n), sizeof(*(a)))
821
-
822
- #define stbiw__sbpush(a, v) (stbiw__sbmaybegrow(a,1), (a)[stbiw__sbn(a)++] = (v))
823
- #define stbiw__sbcount(a) ((a) ? stbiw__sbn(a) : 0)
824
- #define stbiw__sbfree(a) ((a) ? STBIW_FREE(stbiw__sbraw(a)),0 : 0)
825
-
826
- static void *stbiw__sbgrowf(void **arr, int increment, int itemsize)
827
- {
828
- int m = *arr ? 2*stbiw__sbm(*arr)+increment : increment+1;
829
- void *p = STBIW_REALLOC_SIZED(*arr ? stbiw__sbraw(*arr) : 0, *arr ? (stbiw__sbm(*arr)*itemsize + sizeof(int)*2) : 0, itemsize * m + sizeof(int)*2);
830
- STBIW_ASSERT(p);
831
- if (p) {
832
- if (!*arr) ((int *) p)[1] = 0;
833
- *arr = (void *) ((int *) p + 2);
834
- stbiw__sbm(*arr) = m;
835
- }
836
- return *arr;
837
- }
838
-
839
- static unsigned char *stbiw__zlib_flushf(unsigned char *data, unsigned int *bitbuffer, int *bitcount)
840
- {
841
- while (*bitcount >= 8) {
842
- stbiw__sbpush(data, STBIW_UCHAR(*bitbuffer));
843
- *bitbuffer >>= 8;
844
- *bitcount -= 8;
845
- }
846
- return data;
847
- }
848
-
849
- static int stbiw__zlib_bitrev(int code, int codebits)
850
- {
851
- int res=0;
852
- while (codebits--) {
853
- res = (res << 1) | (code & 1);
854
- code >>= 1;
855
- }
856
- return res;
857
- }
858
-
859
- static unsigned int stbiw__zlib_countm(unsigned char *a, unsigned char *b, int limit)
860
- {
861
- int i;
862
- for (i=0; i < limit && i < 258; ++i)
863
- if (a[i] != b[i]) break;
864
- return i;
865
- }
866
-
867
- static unsigned int stbiw__zhash(unsigned char *data)
868
- {
869
- stbiw_uint32 hash = data[0] + (data[1] << 8) + (data[2] << 16);
870
- hash ^= hash << 3;
871
- hash += hash >> 5;
872
- hash ^= hash << 4;
873
- hash += hash >> 17;
874
- hash ^= hash << 25;
875
- hash += hash >> 6;
876
- return hash;
877
- }
878
-
879
- #define stbiw__zlib_flush() (out = stbiw__zlib_flushf(out, &bitbuf, &bitcount))
880
- #define stbiw__zlib_add(code,codebits) \
881
- (bitbuf |= (code) << bitcount, bitcount += (codebits), stbiw__zlib_flush())
882
- #define stbiw__zlib_huffa(b,c) stbiw__zlib_add(stbiw__zlib_bitrev(b,c),c)
883
- // default huffman tables
884
- #define stbiw__zlib_huff1(n) stbiw__zlib_huffa(0x30 + (n), 8)
885
- #define stbiw__zlib_huff2(n) stbiw__zlib_huffa(0x190 + (n)-144, 9)
886
- #define stbiw__zlib_huff3(n) stbiw__zlib_huffa(0 + (n)-256,7)
887
- #define stbiw__zlib_huff4(n) stbiw__zlib_huffa(0xc0 + (n)-280,8)
888
- #define stbiw__zlib_huff(n) ((n) <= 143 ? stbiw__zlib_huff1(n) : (n) <= 255 ? stbiw__zlib_huff2(n) : (n) <= 279 ? stbiw__zlib_huff3(n) : stbiw__zlib_huff4(n))
889
- #define stbiw__zlib_huffb(n) ((n) <= 143 ? stbiw__zlib_huff1(n) : stbiw__zlib_huff2(n))
890
-
891
- #define stbiw__ZHASH 16384
892
-
893
- #endif // STBIW_ZLIB_COMPRESS
894
-
895
- STBIWDEF unsigned char * stbi_zlib_compress(unsigned char *data, int data_len, int *out_len, int quality)
896
- {
897
- #ifdef STBIW_ZLIB_COMPRESS
898
- // user provided a zlib compress implementation, use that
899
- return STBIW_ZLIB_COMPRESS(data, data_len, out_len, quality);
900
- #else // use builtin
901
- static unsigned short lengthc[] = { 3,4,5,6,7,8,9,10,11,13,15,17,19,23,27,31,35,43,51,59,67,83,99,115,131,163,195,227,258, 259 };
902
- static unsigned char lengtheb[]= { 0,0,0,0,0,0,0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0 };
903
- static unsigned short distc[] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577, 32768 };
904
- static unsigned char disteb[] = { 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13 };
905
- unsigned int bitbuf=0;
906
- int i,j, bitcount=0;
907
- unsigned char *out = NULL;
908
- unsigned char ***hash_table = (unsigned char***) STBIW_MALLOC(stbiw__ZHASH * sizeof(unsigned char**));
909
- if (hash_table == NULL)
910
- return NULL;
911
- if (quality < 5) quality = 5;
912
-
913
- stbiw__sbpush(out, 0x78); // DEFLATE 32K window
914
- stbiw__sbpush(out, 0x5e); // FLEVEL = 1
915
- stbiw__zlib_add(1,1); // BFINAL = 1
916
- stbiw__zlib_add(1,2); // BTYPE = 1 -- fixed huffman
917
-
918
- for (i=0; i < stbiw__ZHASH; ++i)
919
- hash_table[i] = NULL;
920
-
921
- i=0;
922
- while (i < data_len-3) {
923
- // hash next 3 bytes of data to be compressed
924
- int h = stbiw__zhash(data+i)&(stbiw__ZHASH-1), best=3;
925
- unsigned char *bestloc = 0;
926
- unsigned char **hlist = hash_table[h];
927
- int n = stbiw__sbcount(hlist);
928
- for (j=0; j < n; ++j) {
929
- if (hlist[j]-data > i-32768) { // if entry lies within window
930
- int d = stbiw__zlib_countm(hlist[j], data+i, data_len-i);
931
- if (d >= best) { best=d; bestloc=hlist[j]; }
932
- }
933
- }
934
- // when hash table entry is too long, delete half the entries
935
- if (hash_table[h] && stbiw__sbn(hash_table[h]) == 2*quality) {
936
- STBIW_MEMMOVE(hash_table[h], hash_table[h]+quality, sizeof(hash_table[h][0])*quality);
937
- stbiw__sbn(hash_table[h]) = quality;
938
- }
939
- stbiw__sbpush(hash_table[h],data+i);
940
-
941
- if (bestloc) {
942
- // "lazy matching" - check match at *next* byte, and if it's better, do cur byte as literal
943
- h = stbiw__zhash(data+i+1)&(stbiw__ZHASH-1);
944
- hlist = hash_table[h];
945
- n = stbiw__sbcount(hlist);
946
- for (j=0; j < n; ++j) {
947
- if (hlist[j]-data > i-32767) {
948
- int e = stbiw__zlib_countm(hlist[j], data+i+1, data_len-i-1);
949
- if (e > best) { // if next match is better, bail on current match
950
- bestloc = NULL;
951
- break;
952
- }
953
- }
954
- }
955
- }
956
-
957
- if (bestloc) {
958
- int d = (int) (data+i - bestloc); // distance back
959
- STBIW_ASSERT(d <= 32767 && best <= 258);
960
- for (j=0; best > lengthc[j+1]-1; ++j);
961
- stbiw__zlib_huff(j+257);
962
- if (lengtheb[j]) stbiw__zlib_add(best - lengthc[j], lengtheb[j]);
963
- for (j=0; d > distc[j+1]-1; ++j);
964
- stbiw__zlib_add(stbiw__zlib_bitrev(j,5),5);
965
- if (disteb[j]) stbiw__zlib_add(d - distc[j], disteb[j]);
966
- i += best;
967
- } else {
968
- stbiw__zlib_huffb(data[i]);
969
- ++i;
970
- }
971
- }
972
- // write out final bytes
973
- for (;i < data_len; ++i)
974
- stbiw__zlib_huffb(data[i]);
975
- stbiw__zlib_huff(256); // end of block
976
- // pad with 0 bits to byte boundary
977
- while (bitcount)
978
- stbiw__zlib_add(0,1);
979
-
980
- for (i=0; i < stbiw__ZHASH; ++i)
981
- (void) stbiw__sbfree(hash_table[i]);
982
- STBIW_FREE(hash_table);
983
-
984
- // store uncompressed instead if compression was worse
985
- if (stbiw__sbn(out) > data_len + 2 + ((data_len+32766)/32767)*5) {
986
- stbiw__sbn(out) = 2; // truncate to DEFLATE 32K window and FLEVEL = 1
987
- for (j = 0; j < data_len;) {
988
- int blocklen = data_len - j;
989
- if (blocklen > 32767) blocklen = 32767;
990
- stbiw__sbpush(out, data_len - j == blocklen); // BFINAL = ?, BTYPE = 0 -- no compression
991
- stbiw__sbpush(out, STBIW_UCHAR(blocklen)); // LEN
992
- stbiw__sbpush(out, STBIW_UCHAR(blocklen >> 8));
993
- stbiw__sbpush(out, STBIW_UCHAR(~blocklen)); // NLEN
994
- stbiw__sbpush(out, STBIW_UCHAR(~blocklen >> 8));
995
- memcpy(out+stbiw__sbn(out), data+j, blocklen);
996
- stbiw__sbn(out) += blocklen;
997
- j += blocklen;
998
- }
999
- }
1000
-
1001
- {
1002
- // compute adler32 on input
1003
- unsigned int s1=1, s2=0;
1004
- int blocklen = (int) (data_len % 5552);
1005
- j=0;
1006
- while (j < data_len) {
1007
- for (i=0; i < blocklen; ++i) { s1 += data[j+i]; s2 += s1; }
1008
- s1 %= 65521; s2 %= 65521;
1009
- j += blocklen;
1010
- blocklen = 5552;
1011
- }
1012
- stbiw__sbpush(out, STBIW_UCHAR(s2 >> 8));
1013
- stbiw__sbpush(out, STBIW_UCHAR(s2));
1014
- stbiw__sbpush(out, STBIW_UCHAR(s1 >> 8));
1015
- stbiw__sbpush(out, STBIW_UCHAR(s1));
1016
- }
1017
- *out_len = stbiw__sbn(out);
1018
- // make returned pointer freeable
1019
- STBIW_MEMMOVE(stbiw__sbraw(out), out, *out_len);
1020
- return (unsigned char *) stbiw__sbraw(out);
1021
- #endif // STBIW_ZLIB_COMPRESS
1022
- }
1023
-
1024
- static unsigned int stbiw__crc32(unsigned char *buffer, int len)
1025
- {
1026
- #ifdef STBIW_CRC32
1027
- return STBIW_CRC32(buffer, len);
1028
- #else
1029
- static unsigned int crc_table[256] =
1030
- {
1031
- 0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, 0x076DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3,
1032
- 0x0eDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988, 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91,
1033
- 0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE, 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7,
1034
- 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, 0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5,
1035
- 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172, 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B,
1036
- 0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940, 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59,
1037
- 0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, 0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F,
1038
- 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924, 0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D,
1039
- 0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A, 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433,
1040
- 0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, 0x7F6A0DBB, 0x086D3D2D, 0x91646C97, 0xE6635C01,
1041
- 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E, 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457,
1042
- 0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, 0xFCB9887C, 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65,
1043
- 0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, 0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB,
1044
- 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0, 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9,
1045
- 0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086, 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F,
1046
- 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, 0x59B33D17, 0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD,
1047
- 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A, 0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683,
1048
- 0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8, 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1,
1049
- 0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, 0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7,
1050
- 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC, 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5,
1051
- 0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252, 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B,
1052
- 0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, 0xDF60EFC3, 0xA867DF55, 0x316E8EEF, 0x4669BE79,
1053
- 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236, 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F,
1054
- 0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04, 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D,
1055
- 0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, 0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713,
1056
- 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38, 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21,
1057
- 0x86D3D2D4, 0xF1D4E242, 0x68DDB3F8, 0x1FDA836E, 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777,
1058
- 0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, 0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45,
1059
- 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2, 0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB,
1060
- 0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0, 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9,
1061
- 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, 0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF,
1062
- 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94, 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D
1063
- };
1064
-
1065
- unsigned int crc = ~0u;
1066
- int i;
1067
- for (i=0; i < len; ++i)
1068
- crc = (crc >> 8) ^ crc_table[buffer[i] ^ (crc & 0xff)];
1069
- return ~crc;
1070
- #endif
1071
- }
1072
-
1073
- #define stbiw__wpng4(o,a,b,c,d) ((o)[0]=STBIW_UCHAR(a),(o)[1]=STBIW_UCHAR(b),(o)[2]=STBIW_UCHAR(c),(o)[3]=STBIW_UCHAR(d),(o)+=4)
1074
- #define stbiw__wp32(data,v) stbiw__wpng4(data, (v)>>24,(v)>>16,(v)>>8,(v));
1075
- #define stbiw__wptag(data,s) stbiw__wpng4(data, s[0],s[1],s[2],s[3])
1076
-
1077
- static void stbiw__wpcrc(unsigned char **data, int len)
1078
- {
1079
- unsigned int crc = stbiw__crc32(*data - len - 4, len+4);
1080
- stbiw__wp32(*data, crc);
1081
- }
1082
-
1083
- static unsigned char stbiw__paeth(int a, int b, int c)
1084
- {
1085
- int p = a + b - c, pa = abs(p-a), pb = abs(p-b), pc = abs(p-c);
1086
- if (pa <= pb && pa <= pc) return STBIW_UCHAR(a);
1087
- if (pb <= pc) return STBIW_UCHAR(b);
1088
- return STBIW_UCHAR(c);
1089
- }
1090
-
1091
- // @OPTIMIZE: provide an option that always forces left-predict or paeth predict
1092
- static void stbiw__encode_png_line(unsigned char *pixels, int stride_bytes, int width, int height, int y, int n, int filter_type, signed char *line_buffer)
1093
- {
1094
- static int mapping[] = { 0,1,2,3,4 };
1095
- static int firstmap[] = { 0,1,0,5,6 };
1096
- int *mymap = (y != 0) ? mapping : firstmap;
1097
- int i;
1098
- int type = mymap[filter_type];
1099
- unsigned char *z = pixels + stride_bytes * (stbi__flip_vertically_on_write ? height-1-y : y);
1100
- int signed_stride = stbi__flip_vertically_on_write ? -stride_bytes : stride_bytes;
1101
-
1102
- if (type==0) {
1103
- memcpy(line_buffer, z, width*n);
1104
- return;
1105
- }
1106
-
1107
- // first loop isn't optimized since it's just one pixel
1108
- for (i = 0; i < n; ++i) {
1109
- switch (type) {
1110
- case 1: line_buffer[i] = z[i]; break;
1111
- case 2: line_buffer[i] = z[i] - z[i-signed_stride]; break;
1112
- case 3: line_buffer[i] = z[i] - (z[i-signed_stride]>>1); break;
1113
- case 4: line_buffer[i] = (signed char) (z[i] - stbiw__paeth(0,z[i-signed_stride],0)); break;
1114
- case 5: line_buffer[i] = z[i]; break;
1115
- case 6: line_buffer[i] = z[i]; break;
1116
- }
1117
- }
1118
- switch (type) {
1119
- case 1: for (i=n; i < width*n; ++i) line_buffer[i] = z[i] - z[i-n]; break;
1120
- case 2: for (i=n; i < width*n; ++i) line_buffer[i] = z[i] - z[i-signed_stride]; break;
1121
- case 3: for (i=n; i < width*n; ++i) line_buffer[i] = z[i] - ((z[i-n] + z[i-signed_stride])>>1); break;
1122
- case 4: for (i=n; i < width*n; ++i) line_buffer[i] = z[i] - stbiw__paeth(z[i-n], z[i-signed_stride], z[i-signed_stride-n]); break;
1123
- case 5: for (i=n; i < width*n; ++i) line_buffer[i] = z[i] - (z[i-n]>>1); break;
1124
- case 6: for (i=n; i < width*n; ++i) line_buffer[i] = z[i] - stbiw__paeth(z[i-n], 0,0); break;
1125
- }
1126
- }
1127
-
1128
- STBIWDEF unsigned char *stbi_write_png_to_mem(const unsigned char *pixels, int stride_bytes, int x, int y, int n, int *out_len, const char* parameters)
1129
- {
1130
- int force_filter = stbi_write_force_png_filter;
1131
- int param_length = 0;
1132
- int ctype[5] = { -1, 0, 4, 2, 6 };
1133
- unsigned char sig[8] = { 137,80,78,71,13,10,26,10 };
1134
- unsigned char *out,*o, *filt, *zlib;
1135
- signed char *line_buffer;
1136
- int j,zlen;
1137
-
1138
- if (stride_bytes == 0)
1139
- stride_bytes = x * n;
1140
-
1141
- if (force_filter >= 5) {
1142
- force_filter = -1;
1143
- }
1144
-
1145
- filt = (unsigned char *) STBIW_MALLOC((x*n+1) * y); if (!filt) return 0;
1146
- line_buffer = (signed char *) STBIW_MALLOC(x * n); if (!line_buffer) { STBIW_FREE(filt); return 0; }
1147
- for (j=0; j < y; ++j) {
1148
- int filter_type;
1149
- if (force_filter > -1) {
1150
- filter_type = force_filter;
1151
- stbiw__encode_png_line((unsigned char*)(pixels), stride_bytes, x, y, j, n, force_filter, line_buffer);
1152
- } else { // Estimate the best filter by running through all of them:
1153
- int best_filter = 0, best_filter_val = 0x7fffffff, est, i;
1154
- for (filter_type = 0; filter_type < 5; filter_type++) {
1155
- stbiw__encode_png_line((unsigned char*)(pixels), stride_bytes, x, y, j, n, filter_type, line_buffer);
1156
-
1157
- // Estimate the entropy of the line using this filter; the less, the better.
1158
- est = 0;
1159
- for (i = 0; i < x*n; ++i) {
1160
- est += abs((signed char) line_buffer[i]);
1161
- }
1162
- if (est < best_filter_val) {
1163
- best_filter_val = est;
1164
- best_filter = filter_type;
1165
- }
1166
- }
1167
- if (filter_type != best_filter) { // If the last iteration already got us the best filter, don't redo it
1168
- stbiw__encode_png_line((unsigned char*)(pixels), stride_bytes, x, y, j, n, best_filter, line_buffer);
1169
- filter_type = best_filter;
1170
- }
1171
- }
1172
- // when we get here, filter_type contains the filter type, and line_buffer contains the data
1173
- filt[j*(x*n+1)] = (unsigned char) filter_type;
1174
- STBIW_MEMMOVE(filt+j*(x*n+1)+1, line_buffer, x*n);
1175
- }
1176
- STBIW_FREE(line_buffer);
1177
- zlib = stbi_zlib_compress(filt, y*( x*n+1), &zlen, stbi_write_png_compression_level);
1178
- STBIW_FREE(filt);
1179
- if (!zlib) return 0;
1180
-
1181
- if(parameters != NULL) {
1182
- param_length = strlen(parameters);
1183
- param_length += strlen("parameters") + 1; // For the name and the null-byte
1184
- }
1185
-
1186
- // each tag requires 12 bytes of overhead
1187
- out = (unsigned char *) STBIW_MALLOC(8 + 12+13 + 12+zlen + 12 + ((parameters)?(param_length+12):0));
1188
- if (!out) return 0;
1189
- *out_len = 8 + 12+13 + 12+zlen + 12 + ((parameters)?(param_length+12):0);
1190
-
1191
- o=out;
1192
- STBIW_MEMMOVE(o,sig,8); o+= 8;
1193
- stbiw__wp32(o, 13); // header length
1194
- stbiw__wptag(o, "IHDR");
1195
- stbiw__wp32(o, x);
1196
- stbiw__wp32(o, y);
1197
- *o++ = 8;
1198
- *o++ = STBIW_UCHAR(ctype[n]);
1199
- *o++ = 0;
1200
- *o++ = 0;
1201
- *o++ = 0;
1202
- stbiw__wpcrc(&o,13);
1203
-
1204
- if(parameters != NULL) {
1205
- stbiw__wp32(o, param_length);
1206
- stbiw__wptag(o, "tEXt");
1207
- STBIW_MEMMOVE(o, "parameters", strlen("parameters"));
1208
- o+=strlen("parameters");
1209
- *o++ = 0; // Null pyte separator
1210
- STBIW_MEMMOVE(o, parameters, strlen(parameters));
1211
- o+=strlen(parameters);
1212
- stbiw__wpcrc(&o, param_length);
1213
- }
1214
-
1215
- stbiw__wp32(o, zlen);
1216
- stbiw__wptag(o, "IDAT");
1217
- STBIW_MEMMOVE(o, zlib, zlen);
1218
- o += zlen;
1219
- STBIW_FREE(zlib);
1220
- stbiw__wpcrc(&o, zlen);
1221
-
1222
- stbiw__wp32(o,0);
1223
- stbiw__wptag(o, "IEND");
1224
- stbiw__wpcrc(&o,0);
1225
-
1226
- STBIW_ASSERT(o == out + *out_len);
1227
-
1228
- return out;
1229
- }
1230
-
1231
- #ifndef STBI_WRITE_NO_STDIO
1232
- STBIWDEF int stbi_write_png(char const *filename, int x, int y, int comp, const void *data, int stride_bytes, const char* parameters)
1233
- {
1234
- FILE *f;
1235
- int len;
1236
- unsigned char *png = stbi_write_png_to_mem((const unsigned char *) data, stride_bytes, x, y, comp, &len, parameters);
1237
- if (png == NULL) return 0;
1238
-
1239
- f = stbiw__fopen(filename, "wb");
1240
- if (!f) { STBIW_FREE(png); return 0; }
1241
- fwrite(png, 1, len, f);
1242
- fclose(f);
1243
- STBIW_FREE(png);
1244
- return 1;
1245
- }
1246
- #endif
1247
-
1248
- STBIWDEF int stbi_write_png_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int stride_bytes)
1249
- {
1250
- int len;
1251
- unsigned char *png = stbi_write_png_to_mem((const unsigned char *) data, stride_bytes, x, y, comp, &len, NULL);
1252
- if (png == NULL) return 0;
1253
- func(context, png, len);
1254
- STBIW_FREE(png);
1255
- return 1;
1256
- }
1257
-
1258
-
1259
- /* ***************************************************************************
1260
- *
1261
- * JPEG writer
1262
- *
1263
- * This is based on Jon Olick's jo_jpeg.cpp:
1264
- * public domain Simple, Minimalistic JPEG writer - http://www.jonolick.com/code.html
1265
- */
1266
-
1267
- static const unsigned char stbiw__jpg_ZigZag[] = { 0,1,5,6,14,15,27,28,2,4,7,13,16,26,29,42,3,8,12,17,25,30,41,43,9,11,18,
1268
- 24,31,40,44,53,10,19,23,32,39,45,52,54,20,22,33,38,46,51,55,60,21,34,37,47,50,56,59,61,35,36,48,49,57,58,62,63 };
1269
-
1270
- static void stbiw__jpg_writeBits(stbi__write_context *s, int *bitBufP, int *bitCntP, const unsigned short *bs) {
1271
- int bitBuf = *bitBufP, bitCnt = *bitCntP;
1272
- bitCnt += bs[1];
1273
- bitBuf |= bs[0] << (24 - bitCnt);
1274
- while(bitCnt >= 8) {
1275
- unsigned char c = (bitBuf >> 16) & 255;
1276
- stbiw__putc(s, c);
1277
- if(c == 255) {
1278
- stbiw__putc(s, 0);
1279
- }
1280
- bitBuf <<= 8;
1281
- bitCnt -= 8;
1282
- }
1283
- *bitBufP = bitBuf;
1284
- *bitCntP = bitCnt;
1285
- }
1286
-
1287
- static void stbiw__jpg_DCT(float *d0p, float *d1p, float *d2p, float *d3p, float *d4p, float *d5p, float *d6p, float *d7p) {
1288
- float d0 = *d0p, d1 = *d1p, d2 = *d2p, d3 = *d3p, d4 = *d4p, d5 = *d5p, d6 = *d6p, d7 = *d7p;
1289
- float z1, z2, z3, z4, z5, z11, z13;
1290
-
1291
- float tmp0 = d0 + d7;
1292
- float tmp7 = d0 - d7;
1293
- float tmp1 = d1 + d6;
1294
- float tmp6 = d1 - d6;
1295
- float tmp2 = d2 + d5;
1296
- float tmp5 = d2 - d5;
1297
- float tmp3 = d3 + d4;
1298
- float tmp4 = d3 - d4;
1299
-
1300
- // Even part
1301
- float tmp10 = tmp0 + tmp3; // phase 2
1302
- float tmp13 = tmp0 - tmp3;
1303
- float tmp11 = tmp1 + tmp2;
1304
- float tmp12 = tmp1 - tmp2;
1305
-
1306
- d0 = tmp10 + tmp11; // phase 3
1307
- d4 = tmp10 - tmp11;
1308
-
1309
- z1 = (tmp12 + tmp13) * 0.707106781f; // c4
1310
- d2 = tmp13 + z1; // phase 5
1311
- d6 = tmp13 - z1;
1312
-
1313
- // Odd part
1314
- tmp10 = tmp4 + tmp5; // phase 2
1315
- tmp11 = tmp5 + tmp6;
1316
- tmp12 = tmp6 + tmp7;
1317
-
1318
- // The rotator is modified from fig 4-8 to avoid extra negations.
1319
- z5 = (tmp10 - tmp12) * 0.382683433f; // c6
1320
- z2 = tmp10 * 0.541196100f + z5; // c2-c6
1321
- z4 = tmp12 * 1.306562965f + z5; // c2+c6
1322
- z3 = tmp11 * 0.707106781f; // c4
1323
-
1324
- z11 = tmp7 + z3; // phase 5
1325
- z13 = tmp7 - z3;
1326
-
1327
- *d5p = z13 + z2; // phase 6
1328
- *d3p = z13 - z2;
1329
- *d1p = z11 + z4;
1330
- *d7p = z11 - z4;
1331
-
1332
- *d0p = d0; *d2p = d2; *d4p = d4; *d6p = d6;
1333
- }
1334
-
1335
- static void stbiw__jpg_calcBits(int val, unsigned short bits[2]) {
1336
- int tmp1 = val < 0 ? -val : val;
1337
- val = val < 0 ? val-1 : val;
1338
- bits[1] = 1;
1339
- while(tmp1 >>= 1) {
1340
- ++bits[1];
1341
- }
1342
- bits[0] = val & ((1<<bits[1])-1);
1343
- }
1344
-
1345
- static int stbiw__jpg_processDU(stbi__write_context *s, int *bitBuf, int *bitCnt, float *CDU, int du_stride, float *fdtbl, int DC, const unsigned short HTDC[256][2], const unsigned short HTAC[256][2]) {
1346
- const unsigned short EOB[2] = { HTAC[0x00][0], HTAC[0x00][1] };
1347
- const unsigned short M16zeroes[2] = { HTAC[0xF0][0], HTAC[0xF0][1] };
1348
- int dataOff, i, j, n, diff, end0pos, x, y;
1349
- int DU[64];
1350
-
1351
- // DCT rows
1352
- for(dataOff=0, n=du_stride*8; dataOff<n; dataOff+=du_stride) {
1353
- stbiw__jpg_DCT(&CDU[dataOff], &CDU[dataOff+1], &CDU[dataOff+2], &CDU[dataOff+3], &CDU[dataOff+4], &CDU[dataOff+5], &CDU[dataOff+6], &CDU[dataOff+7]);
1354
- }
1355
- // DCT columns
1356
- for(dataOff=0; dataOff<8; ++dataOff) {
1357
- stbiw__jpg_DCT(&CDU[dataOff], &CDU[dataOff+du_stride], &CDU[dataOff+du_stride*2], &CDU[dataOff+du_stride*3], &CDU[dataOff+du_stride*4],
1358
- &CDU[dataOff+du_stride*5], &CDU[dataOff+du_stride*6], &CDU[dataOff+du_stride*7]);
1359
- }
1360
- // Quantize/descale/zigzag the coefficients
1361
- for(y = 0, j=0; y < 8; ++y) {
1362
- for(x = 0; x < 8; ++x,++j) {
1363
- float v;
1364
- i = y*du_stride+x;
1365
- v = CDU[i]*fdtbl[j];
1366
- // DU[stbiw__jpg_ZigZag[j]] = (int)(v < 0 ? ceilf(v - 0.5f) : floorf(v + 0.5f));
1367
- // ceilf() and floorf() are C99, not C89, but I /think/ they're not needed here anyway?
1368
- DU[stbiw__jpg_ZigZag[j]] = (int)(v < 0 ? v - 0.5f : v + 0.5f);
1369
- }
1370
- }
1371
-
1372
- // Encode DC
1373
- diff = DU[0] - DC;
1374
- if (diff == 0) {
1375
- stbiw__jpg_writeBits(s, bitBuf, bitCnt, HTDC[0]);
1376
- } else {
1377
- unsigned short bits[2];
1378
- stbiw__jpg_calcBits(diff, bits);
1379
- stbiw__jpg_writeBits(s, bitBuf, bitCnt, HTDC[bits[1]]);
1380
- stbiw__jpg_writeBits(s, bitBuf, bitCnt, bits);
1381
- }
1382
- // Encode ACs
1383
- end0pos = 63;
1384
- for(; (end0pos>0)&&(DU[end0pos]==0); --end0pos) {
1385
- }
1386
- // end0pos = first element in reverse order !=0
1387
- if(end0pos == 0) {
1388
- stbiw__jpg_writeBits(s, bitBuf, bitCnt, EOB);
1389
- return DU[0];
1390
- }
1391
- for(i = 1; i <= end0pos; ++i) {
1392
- int startpos = i;
1393
- int nrzeroes;
1394
- unsigned short bits[2];
1395
- for (; DU[i]==0 && i<=end0pos; ++i) {
1396
- }
1397
- nrzeroes = i-startpos;
1398
- if ( nrzeroes >= 16 ) {
1399
- int lng = nrzeroes>>4;
1400
- int nrmarker;
1401
- for (nrmarker=1; nrmarker <= lng; ++nrmarker)
1402
- stbiw__jpg_writeBits(s, bitBuf, bitCnt, M16zeroes);
1403
- nrzeroes &= 15;
1404
- }
1405
- stbiw__jpg_calcBits(DU[i], bits);
1406
- stbiw__jpg_writeBits(s, bitBuf, bitCnt, HTAC[(nrzeroes<<4)+bits[1]]);
1407
- stbiw__jpg_writeBits(s, bitBuf, bitCnt, bits);
1408
- }
1409
- if(end0pos != 63) {
1410
- stbiw__jpg_writeBits(s, bitBuf, bitCnt, EOB);
1411
- }
1412
- return DU[0];
1413
- }
1414
-
1415
- static int stbi_write_jpg_core(stbi__write_context *s, int width, int height, int comp, const void* data, int quality) {
1416
- // Constants that don't pollute global namespace
1417
- static const unsigned char std_dc_luminance_nrcodes[] = {0,0,1,5,1,1,1,1,1,1,0,0,0,0,0,0,0};
1418
- static const unsigned char std_dc_luminance_values[] = {0,1,2,3,4,5,6,7,8,9,10,11};
1419
- static const unsigned char std_ac_luminance_nrcodes[] = {0,0,2,1,3,3,2,4,3,5,5,4,4,0,0,1,0x7d};
1420
- static const unsigned char std_ac_luminance_values[] = {
1421
- 0x01,0x02,0x03,0x00,0x04,0x11,0x05,0x12,0x21,0x31,0x41,0x06,0x13,0x51,0x61,0x07,0x22,0x71,0x14,0x32,0x81,0x91,0xa1,0x08,
1422
- 0x23,0x42,0xb1,0xc1,0x15,0x52,0xd1,0xf0,0x24,0x33,0x62,0x72,0x82,0x09,0x0a,0x16,0x17,0x18,0x19,0x1a,0x25,0x26,0x27,0x28,
1423
- 0x29,0x2a,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x53,0x54,0x55,0x56,0x57,0x58,0x59,
1424
- 0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x83,0x84,0x85,0x86,0x87,0x88,0x89,
1425
- 0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4,0xb5,0xb6,
1426
- 0xb7,0xb8,0xb9,0xba,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xe1,0xe2,
1427
- 0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa
1428
- };
1429
- static const unsigned char std_dc_chrominance_nrcodes[] = {0,0,3,1,1,1,1,1,1,1,1,1,0,0,0,0,0};
1430
- static const unsigned char std_dc_chrominance_values[] = {0,1,2,3,4,5,6,7,8,9,10,11};
1431
- static const unsigned char std_ac_chrominance_nrcodes[] = {0,0,2,1,2,4,4,3,4,7,5,4,4,0,1,2,0x77};
1432
- static const unsigned char std_ac_chrominance_values[] = {
1433
- 0x00,0x01,0x02,0x03,0x11,0x04,0x05,0x21,0x31,0x06,0x12,0x41,0x51,0x07,0x61,0x71,0x13,0x22,0x32,0x81,0x08,0x14,0x42,0x91,
1434
- 0xa1,0xb1,0xc1,0x09,0x23,0x33,0x52,0xf0,0x15,0x62,0x72,0xd1,0x0a,0x16,0x24,0x34,0xe1,0x25,0xf1,0x17,0x18,0x19,0x1a,0x26,
1435
- 0x27,0x28,0x29,0x2a,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x53,0x54,0x55,0x56,0x57,0x58,
1436
- 0x59,0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x82,0x83,0x84,0x85,0x86,0x87,
1437
- 0x88,0x89,0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4,
1438
- 0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,
1439
- 0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa
1440
- };
1441
- // Huffman tables
1442
- static const unsigned short YDC_HT[256][2] = { {0,2},{2,3},{3,3},{4,3},{5,3},{6,3},{14,4},{30,5},{62,6},{126,7},{254,8},{510,9}};
1443
- static const unsigned short UVDC_HT[256][2] = { {0,2},{1,2},{2,2},{6,3},{14,4},{30,5},{62,6},{126,7},{254,8},{510,9},{1022,10},{2046,11}};
1444
- static const unsigned short YAC_HT[256][2] = {
1445
- {10,4},{0,2},{1,2},{4,3},{11,4},{26,5},{120,7},{248,8},{1014,10},{65410,16},{65411,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1446
- {12,4},{27,5},{121,7},{502,9},{2038,11},{65412,16},{65413,16},{65414,16},{65415,16},{65416,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1447
- {28,5},{249,8},{1015,10},{4084,12},{65417,16},{65418,16},{65419,16},{65420,16},{65421,16},{65422,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1448
- {58,6},{503,9},{4085,12},{65423,16},{65424,16},{65425,16},{65426,16},{65427,16},{65428,16},{65429,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1449
- {59,6},{1016,10},{65430,16},{65431,16},{65432,16},{65433,16},{65434,16},{65435,16},{65436,16},{65437,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1450
- {122,7},{2039,11},{65438,16},{65439,16},{65440,16},{65441,16},{65442,16},{65443,16},{65444,16},{65445,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1451
- {123,7},{4086,12},{65446,16},{65447,16},{65448,16},{65449,16},{65450,16},{65451,16},{65452,16},{65453,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1452
- {250,8},{4087,12},{65454,16},{65455,16},{65456,16},{65457,16},{65458,16},{65459,16},{65460,16},{65461,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1453
- {504,9},{32704,15},{65462,16},{65463,16},{65464,16},{65465,16},{65466,16},{65467,16},{65468,16},{65469,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1454
- {505,9},{65470,16},{65471,16},{65472,16},{65473,16},{65474,16},{65475,16},{65476,16},{65477,16},{65478,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1455
- {506,9},{65479,16},{65480,16},{65481,16},{65482,16},{65483,16},{65484,16},{65485,16},{65486,16},{65487,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1456
- {1017,10},{65488,16},{65489,16},{65490,16},{65491,16},{65492,16},{65493,16},{65494,16},{65495,16},{65496,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1457
- {1018,10},{65497,16},{65498,16},{65499,16},{65500,16},{65501,16},{65502,16},{65503,16},{65504,16},{65505,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1458
- {2040,11},{65506,16},{65507,16},{65508,16},{65509,16},{65510,16},{65511,16},{65512,16},{65513,16},{65514,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1459
- {65515,16},{65516,16},{65517,16},{65518,16},{65519,16},{65520,16},{65521,16},{65522,16},{65523,16},{65524,16},{0,0},{0,0},{0,0},{0,0},{0,0},
1460
- {2041,11},{65525,16},{65526,16},{65527,16},{65528,16},{65529,16},{65530,16},{65531,16},{65532,16},{65533,16},{65534,16},{0,0},{0,0},{0,0},{0,0},{0,0}
1461
- };
1462
- static const unsigned short UVAC_HT[256][2] = {
1463
- {0,2},{1,2},{4,3},{10,4},{24,5},{25,5},{56,6},{120,7},{500,9},{1014,10},{4084,12},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1464
- {11,4},{57,6},{246,8},{501,9},{2038,11},{4085,12},{65416,16},{65417,16},{65418,16},{65419,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1465
- {26,5},{247,8},{1015,10},{4086,12},{32706,15},{65420,16},{65421,16},{65422,16},{65423,16},{65424,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1466
- {27,5},{248,8},{1016,10},{4087,12},{65425,16},{65426,16},{65427,16},{65428,16},{65429,16},{65430,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1467
- {58,6},{502,9},{65431,16},{65432,16},{65433,16},{65434,16},{65435,16},{65436,16},{65437,16},{65438,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1468
- {59,6},{1017,10},{65439,16},{65440,16},{65441,16},{65442,16},{65443,16},{65444,16},{65445,16},{65446,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1469
- {121,7},{2039,11},{65447,16},{65448,16},{65449,16},{65450,16},{65451,16},{65452,16},{65453,16},{65454,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1470
- {122,7},{2040,11},{65455,16},{65456,16},{65457,16},{65458,16},{65459,16},{65460,16},{65461,16},{65462,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1471
- {249,8},{65463,16},{65464,16},{65465,16},{65466,16},{65467,16},{65468,16},{65469,16},{65470,16},{65471,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1472
- {503,9},{65472,16},{65473,16},{65474,16},{65475,16},{65476,16},{65477,16},{65478,16},{65479,16},{65480,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1473
- {504,9},{65481,16},{65482,16},{65483,16},{65484,16},{65485,16},{65486,16},{65487,16},{65488,16},{65489,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1474
- {505,9},{65490,16},{65491,16},{65492,16},{65493,16},{65494,16},{65495,16},{65496,16},{65497,16},{65498,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1475
- {506,9},{65499,16},{65500,16},{65501,16},{65502,16},{65503,16},{65504,16},{65505,16},{65506,16},{65507,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1476
- {2041,11},{65508,16},{65509,16},{65510,16},{65511,16},{65512,16},{65513,16},{65514,16},{65515,16},{65516,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1477
- {16352,14},{65517,16},{65518,16},{65519,16},{65520,16},{65521,16},{65522,16},{65523,16},{65524,16},{65525,16},{0,0},{0,0},{0,0},{0,0},{0,0},
1478
- {1018,10},{32707,15},{65526,16},{65527,16},{65528,16},{65529,16},{65530,16},{65531,16},{65532,16},{65533,16},{65534,16},{0,0},{0,0},{0,0},{0,0},{0,0}
1479
- };
1480
- static const int YQT[] = {16,11,10,16,24,40,51,61,12,12,14,19,26,58,60,55,14,13,16,24,40,57,69,56,14,17,22,29,51,87,80,62,18,22,
1481
- 37,56,68,109,103,77,24,35,55,64,81,104,113,92,49,64,78,87,103,121,120,101,72,92,95,98,112,100,103,99};
1482
- static const int UVQT[] = {17,18,24,47,99,99,99,99,18,21,26,66,99,99,99,99,24,26,56,99,99,99,99,99,47,66,99,99,99,99,99,99,
1483
- 99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99};
1484
- static const float aasf[] = { 1.0f * 2.828427125f, 1.387039845f * 2.828427125f, 1.306562965f * 2.828427125f, 1.175875602f * 2.828427125f,
1485
- 1.0f * 2.828427125f, 0.785694958f * 2.828427125f, 0.541196100f * 2.828427125f, 0.275899379f * 2.828427125f };
1486
-
1487
- int row, col, i, k, subsample;
1488
- float fdtbl_Y[64], fdtbl_UV[64];
1489
- unsigned char YTable[64], UVTable[64];
1490
-
1491
- if(!data || !width || !height || comp > 4 || comp < 1) {
1492
- return 0;
1493
- }
1494
-
1495
- quality = quality ? quality : 90;
1496
- subsample = quality <= 90 ? 1 : 0;
1497
- quality = quality < 1 ? 1 : quality > 100 ? 100 : quality;
1498
- quality = quality < 50 ? 5000 / quality : 200 - quality * 2;
1499
-
1500
- for(i = 0; i < 64; ++i) {
1501
- int uvti, yti = (YQT[i]*quality+50)/100;
1502
- YTable[stbiw__jpg_ZigZag[i]] = (unsigned char) (yti < 1 ? 1 : yti > 255 ? 255 : yti);
1503
- uvti = (UVQT[i]*quality+50)/100;
1504
- UVTable[stbiw__jpg_ZigZag[i]] = (unsigned char) (uvti < 1 ? 1 : uvti > 255 ? 255 : uvti);
1505
- }
1506
-
1507
- for(row = 0, k = 0; row < 8; ++row) {
1508
- for(col = 0; col < 8; ++col, ++k) {
1509
- fdtbl_Y[k] = 1 / (YTable [stbiw__jpg_ZigZag[k]] * aasf[row] * aasf[col]);
1510
- fdtbl_UV[k] = 1 / (UVTable[stbiw__jpg_ZigZag[k]] * aasf[row] * aasf[col]);
1511
- }
1512
- }
1513
-
1514
- // Write Headers
1515
- {
1516
- static const unsigned char head0[] = { 0xFF,0xD8,0xFF,0xE0,0,0x10,'J','F','I','F',0,1,1,0,0,1,0,1,0,0,0xFF,0xDB,0,0x84,0 };
1517
- static const unsigned char head2[] = { 0xFF,0xDA,0,0xC,3,1,0,2,0x11,3,0x11,0,0x3F,0 };
1518
- const unsigned char head1[] = { 0xFF,0xC0,0,0x11,8,(unsigned char)(height>>8),STBIW_UCHAR(height),(unsigned char)(width>>8),STBIW_UCHAR(width),
1519
- 3,1,(unsigned char)(subsample?0x22:0x11),0,2,0x11,1,3,0x11,1,0xFF,0xC4,0x01,0xA2,0 };
1520
- s->func(s->context, (void*)head0, sizeof(head0));
1521
- s->func(s->context, (void*)YTable, sizeof(YTable));
1522
- stbiw__putc(s, 1);
1523
- s->func(s->context, UVTable, sizeof(UVTable));
1524
- s->func(s->context, (void*)head1, sizeof(head1));
1525
- s->func(s->context, (void*)(std_dc_luminance_nrcodes+1), sizeof(std_dc_luminance_nrcodes)-1);
1526
- s->func(s->context, (void*)std_dc_luminance_values, sizeof(std_dc_luminance_values));
1527
- stbiw__putc(s, 0x10); // HTYACinfo
1528
- s->func(s->context, (void*)(std_ac_luminance_nrcodes+1), sizeof(std_ac_luminance_nrcodes)-1);
1529
- s->func(s->context, (void*)std_ac_luminance_values, sizeof(std_ac_luminance_values));
1530
- stbiw__putc(s, 1); // HTUDCinfo
1531
- s->func(s->context, (void*)(std_dc_chrominance_nrcodes+1), sizeof(std_dc_chrominance_nrcodes)-1);
1532
- s->func(s->context, (void*)std_dc_chrominance_values, sizeof(std_dc_chrominance_values));
1533
- stbiw__putc(s, 0x11); // HTUACinfo
1534
- s->func(s->context, (void*)(std_ac_chrominance_nrcodes+1), sizeof(std_ac_chrominance_nrcodes)-1);
1535
- s->func(s->context, (void*)std_ac_chrominance_values, sizeof(std_ac_chrominance_values));
1536
- s->func(s->context, (void*)head2, sizeof(head2));
1537
- }
1538
-
1539
- // Encode 8x8 macroblocks
1540
- {
1541
- static const unsigned short fillBits[] = {0x7F, 7};
1542
- int DCY=0, DCU=0, DCV=0;
1543
- int bitBuf=0, bitCnt=0;
1544
- // comp == 2 is grey+alpha (alpha is ignored)
1545
- int ofsG = comp > 2 ? 1 : 0, ofsB = comp > 2 ? 2 : 0;
1546
- const unsigned char *dataR = (const unsigned char *)data;
1547
- const unsigned char *dataG = dataR + ofsG;
1548
- const unsigned char *dataB = dataR + ofsB;
1549
- int x, y, pos;
1550
- if(subsample) {
1551
- for(y = 0; y < height; y += 16) {
1552
- for(x = 0; x < width; x += 16) {
1553
- float Y[256], U[256], V[256];
1554
- for(row = y, pos = 0; row < y+16; ++row) {
1555
- // row >= height => use last input row
1556
- int clamped_row = (row < height) ? row : height - 1;
1557
- int base_p = (stbi__flip_vertically_on_write ? (height-1-clamped_row) : clamped_row)*width*comp;
1558
- for(col = x; col < x+16; ++col, ++pos) {
1559
- // if col >= width => use pixel from last input column
1560
- int p = base_p + ((col < width) ? col : (width-1))*comp;
1561
- float r = dataR[p], g = dataG[p], b = dataB[p];
1562
- Y[pos]= +0.29900f*r + 0.58700f*g + 0.11400f*b - 128;
1563
- U[pos]= -0.16874f*r - 0.33126f*g + 0.50000f*b;
1564
- V[pos]= +0.50000f*r - 0.41869f*g - 0.08131f*b;
1565
- }
1566
- }
1567
- DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, Y+0, 16, fdtbl_Y, DCY, YDC_HT, YAC_HT);
1568
- DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, Y+8, 16, fdtbl_Y, DCY, YDC_HT, YAC_HT);
1569
- DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, Y+128, 16, fdtbl_Y, DCY, YDC_HT, YAC_HT);
1570
- DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, Y+136, 16, fdtbl_Y, DCY, YDC_HT, YAC_HT);
1571
-
1572
- // subsample U,V
1573
- {
1574
- float subU[64], subV[64];
1575
- int yy, xx;
1576
- for(yy = 0, pos = 0; yy < 8; ++yy) {
1577
- for(xx = 0; xx < 8; ++xx, ++pos) {
1578
- int j = yy*32+xx*2;
1579
- subU[pos] = (U[j+0] + U[j+1] + U[j+16] + U[j+17]) * 0.25f;
1580
- subV[pos] = (V[j+0] + V[j+1] + V[j+16] + V[j+17]) * 0.25f;
1581
- }
1582
- }
1583
- DCU = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, subU, 8, fdtbl_UV, DCU, UVDC_HT, UVAC_HT);
1584
- DCV = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, subV, 8, fdtbl_UV, DCV, UVDC_HT, UVAC_HT);
1585
- }
1586
- }
1587
- }
1588
- } else {
1589
- for(y = 0; y < height; y += 8) {
1590
- for(x = 0; x < width; x += 8) {
1591
- float Y[64], U[64], V[64];
1592
- for(row = y, pos = 0; row < y+8; ++row) {
1593
- // row >= height => use last input row
1594
- int clamped_row = (row < height) ? row : height - 1;
1595
- int base_p = (stbi__flip_vertically_on_write ? (height-1-clamped_row) : clamped_row)*width*comp;
1596
- for(col = x; col < x+8; ++col, ++pos) {
1597
- // if col >= width => use pixel from last input column
1598
- int p = base_p + ((col < width) ? col : (width-1))*comp;
1599
- float r = dataR[p], g = dataG[p], b = dataB[p];
1600
- Y[pos]= +0.29900f*r + 0.58700f*g + 0.11400f*b - 128;
1601
- U[pos]= -0.16874f*r - 0.33126f*g + 0.50000f*b;
1602
- V[pos]= +0.50000f*r - 0.41869f*g - 0.08131f*b;
1603
- }
1604
- }
1605
-
1606
- DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, Y, 8, fdtbl_Y, DCY, YDC_HT, YAC_HT);
1607
- DCU = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, U, 8, fdtbl_UV, DCU, UVDC_HT, UVAC_HT);
1608
- DCV = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, V, 8, fdtbl_UV, DCV, UVDC_HT, UVAC_HT);
1609
- }
1610
- }
1611
- }
1612
-
1613
- // Do the bit alignment of the EOI marker
1614
- stbiw__jpg_writeBits(s, &bitBuf, &bitCnt, fillBits);
1615
- }
1616
-
1617
- // EOI
1618
- stbiw__putc(s, 0xFF);
1619
- stbiw__putc(s, 0xD9);
1620
-
1621
- return 1;
1622
- }
1623
-
1624
- STBIWDEF int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int quality)
1625
- {
1626
- stbi__write_context s = { 0 };
1627
- stbi__start_write_callbacks(&s, func, context);
1628
- return stbi_write_jpg_core(&s, x, y, comp, (void *) data, quality);
1629
- }
1630
-
1631
-
1632
- #ifndef STBI_WRITE_NO_STDIO
1633
- STBIWDEF int stbi_write_jpg(char const *filename, int x, int y, int comp, const void *data, int quality)
1634
- {
1635
- stbi__write_context s = { 0 };
1636
- if (stbi__start_write_file(&s,filename)) {
1637
- int r = stbi_write_jpg_core(&s, x, y, comp, data, quality);
1638
- stbi__end_write_file(&s);
1639
- return r;
1640
- } else
1641
- return 0;
1642
- }
1643
- #endif
1644
-
1645
- #endif // STB_IMAGE_WRITE_IMPLEMENTATION
1646
-
1647
- /* Revision history
1648
- 1.16 (2021-07-11)
1649
- make Deflate code emit uncompressed blocks when it would otherwise expand
1650
- support writing BMPs with alpha channel
1651
- 1.15 (2020-07-13) unknown
1652
- 1.14 (2020-02-02) updated JPEG writer to downsample chroma channels
1653
- 1.13
1654
- 1.12
1655
- 1.11 (2019-08-11)
1656
-
1657
- 1.10 (2019-02-07)
1658
- support utf8 filenames in Windows; fix warnings and platform ifdefs
1659
- 1.09 (2018-02-11)
1660
- fix typo in zlib quality API, improve STB_I_W_STATIC in C++
1661
- 1.08 (2018-01-29)
1662
- add stbi__flip_vertically_on_write, external zlib, zlib quality, choose PNG filter
1663
- 1.07 (2017-07-24)
1664
- doc fix
1665
- 1.06 (2017-07-23)
1666
- writing JPEG (using Jon Olick's code)
1667
- 1.05 ???
1668
- 1.04 (2017-03-03)
1669
- monochrome BMP expansion
1670
- 1.03 ???
1671
- 1.02 (2016-04-02)
1672
- avoid allocating large structures on the stack
1673
- 1.01 (2016-01-16)
1674
- STBIW_REALLOC_SIZED: support allocators with no realloc support
1675
- avoid race-condition in crc initialization
1676
- minor compile issues
1677
- 1.00 (2015-09-14)
1678
- installable file IO function
1679
- 0.99 (2015-09-13)
1680
- warning fixes; TGA rle support
1681
- 0.98 (2015-04-08)
1682
- added STBIW_MALLOC, STBIW_ASSERT etc
1683
- 0.97 (2015-01-18)
1684
- fixed HDR asserts, rewrote HDR rle logic
1685
- 0.96 (2015-01-17)
1686
- add HDR output
1687
- fix monochrome BMP
1688
- 0.95 (2014-08-17)
1689
- add monochrome TGA output
1690
- 0.94 (2014-05-31)
1691
- rename private functions to avoid conflicts with stb_image.h
1692
- 0.93 (2014-05-27)
1693
- warning fixes
1694
- 0.92 (2010-08-01)
1695
- casts to unsigned char to fix warnings
1696
- 0.91 (2010-07-17)
1697
- first public release
1698
- 0.90 first internal release
1699
- */
1700
-
1701
- /*
1702
- ------------------------------------------------------------------------------
1703
- This software is available under 2 licenses -- choose whichever you prefer.
1704
- ------------------------------------------------------------------------------
1705
- ALTERNATIVE A - MIT License
1706
- Copyright (c) 2017 Sean Barrett
1707
- Permission is hereby granted, free of charge, to any person obtaining a copy of
1708
- this software and associated documentation files (the "Software"), to deal in
1709
- the Software without restriction, including without limitation the rights to
1710
- use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
1711
- of the Software, and to permit persons to whom the Software is furnished to do
1712
- so, subject to the following conditions:
1713
- The above copyright notice and this permission notice shall be included in all
1714
- copies or substantial portions of the Software.
1715
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1716
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1717
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
1718
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1719
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
1720
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
1721
- SOFTWARE.
1722
- ------------------------------------------------------------------------------
1723
- ALTERNATIVE B - Public Domain (www.unlicense.org)
1724
- This is free and unencumbered software released into the public domain.
1725
- Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
1726
- software, either in source code form or as a compiled binary, for any purpose,
1727
- commercial or non-commercial, and by any means.
1728
- In jurisdictions that recognize copyright laws, the author or authors of this
1729
- software dedicate any and all copyright interest in the software to the public
1730
- domain. We make this dedication for the benefit of the public at large and to
1731
- the detriment of our heirs and successors. We intend this dedication to be an
1732
- overt act of relinquishment in perpetuity of all present and future rights to
1733
- this software under copyright law.
1734
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1735
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1736
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
1737
- AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
1738
- ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
1739
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
1740
- ------------------------------------------------------------------------------
1741
- */
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
stable-diffusion.cpp/models/.gitignore DELETED
@@ -1,5 +0,0 @@
1
- *.bin
2
- *.ckpt
3
- *.safetensor
4
- *.safetensors
5
- *.log
 
 
 
 
 
 
stable-diffusion.cpp/models/README.md DELETED
@@ -1,26 +0,0 @@
1
- # Model Convert Script
2
-
3
- ## Requirements
4
-
5
- - vocab.json, from https://huggingface.co/openai/clip-vit-large-patch14/raw/main/vocab.json
6
-
7
-
8
- ```shell
9
- pip install -r requirements.txt
10
- ```
11
-
12
- ## Usage
13
- ```
14
- usage: convert.py [-h] [--out_type {f32,f16,q4_0,q4_1,q5_0,q5_1,q8_0}] [--out_file OUT_FILE] model_path
15
-
16
- Convert Stable Diffuison model to GGML compatible file format
17
-
18
- positional arguments:
19
- model_path model file path (*.pth, *.pt, *.ckpt, *.safetensors)
20
-
21
- options:
22
- -h, --help show this help message and exit
23
- --out_type {f32,f16,q4_0,q4_1,q5_0,q5_1,q8_0}
24
- output format (default: based on input)
25
- --out_file OUT_FILE path to write to; default: based on input and current working directory
26
- ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
stable-diffusion.cpp/models/convert.py DELETED
@@ -1,385 +0,0 @@
1
- import struct
2
- import json
3
- import os
4
-
5
- import numpy as np
6
- import torch
7
- import safetensors.torch
8
-
9
- this_file_dir = os.path.dirname(__file__)
10
- vocab_dir = this_file_dir
11
-
12
- SD1 = 0
13
- SD2 = 1
14
-
15
- ggml_ftype_str_to_int = {
16
- "f32": 0,
17
- "f16": 1,
18
- "q4_0": 2,
19
- "q4_1": 3,
20
- "q5_0": 8,
21
- "q5_1": 9,
22
- "q8_0": 7
23
- }
24
-
25
- ggml_ttype_str_to_int = {
26
- "f32": 0,
27
- "f16": 1,
28
- "q4_0": 2,
29
- "q4_1": 3,
30
- "q5_0": 6,
31
- "q5_1": 7,
32
- "q8_0": 8
33
- }
34
-
35
- QK4_0 = 32
36
- def quantize_q4_0(x):
37
- assert x.shape[-1] % QK4_0 == 0 and x.shape[-1] > QK4_0
38
- x = x.reshape(-1, QK4_0)
39
- max = np.take_along_axis(x, np.argmax(np.abs(x), axis=-1)[:, np.newaxis], axis=-1)
40
- d = max / -8
41
- qs = ((x / d) + 8).round().clip(min=0, max=15).astype(np.int8)
42
- half = QK4_0 // 2
43
- qs = qs[:, :half] | (qs[:, half:] << 4)
44
- d = d.astype(np.float16).view(np.int8)
45
- y = np.concatenate((d, qs), axis=-1)
46
- return y
47
-
48
- QK4_1 = 32
49
- def quantize_q4_1(x):
50
- assert x.shape[-1] % QK4_1 == 0 and x.shape[-1] > QK4_1
51
- x = x.reshape(-1, QK4_1)
52
- min = np.min(x, axis=-1, keepdims=True)
53
- max = np.max(x, axis=-1, keepdims=True)
54
- d = (max - min) / ((1 << 4) - 1)
55
- qs = ((x - min) / d).round().clip(min=0, max=15).astype(np.int8)
56
- half = QK4_1 // 2
57
- qs = qs[:, :half] | (qs[:, half:] << 4)
58
- d = d.astype(np.float16).view(np.int8)
59
- m = min.astype(np.float16).view(np.int8)
60
- y = np.concatenate((d, m, qs), axis=-1)
61
- return y
62
-
63
- QK5_0 = 32
64
- def quantize_q5_0(x):
65
- assert x.shape[-1] % QK5_0 == 0 and x.shape[-1] > QK5_0
66
- x = x.reshape(-1, QK5_0)
67
- max = np.take_along_axis(x, np.argmax(np.abs(x), axis=-1)[:, np.newaxis], axis=-1)
68
- d = max / -16
69
- xi = ((x / d) + 16).round().clip(min=0, max=31).astype(np.int8)
70
- half = QK5_0 // 2
71
- qs = (xi[:, :half] & 0x0F) | (xi[:, half:] << 4)
72
- qh = np.zeros(qs.shape[:-1], dtype=np.int32)
73
- for i in range(QK5_0):
74
- qh |= ((xi[:, i] & 0x10) >> 4).astype(np.int32) << i
75
- d = d.astype(np.float16).view(np.int8)
76
- qh = qh[..., np.newaxis].view(np.int8)
77
- y = np.concatenate((d, qh, qs), axis=-1)
78
- return y
79
-
80
- QK5_1 = 32
81
- def quantize_q5_1(x):
82
- assert x.shape[-1] % QK5_1 == 0 and x.shape[-1] > QK5_1
83
- x = x.reshape(-1, QK5_1)
84
- min = np.min(x, axis=-1, keepdims=True)
85
- max = np.max(x, axis=-1, keepdims=True)
86
- d = (max - min) / ((1 << 5) - 1)
87
- xi = ((x - min) / d).round().clip(min=0, max=31).astype(np.int8)
88
- half = QK5_1//2
89
- qs = (xi[:, :half] & 0x0F) | (xi[:, half:] << 4)
90
- qh = np.zeros(xi.shape[:-1], dtype=np.int32)
91
- for i in range(QK5_1):
92
- qh |= ((xi[:, i] & 0x10) >> 4).astype(np.int32) << i
93
- d = d.astype(np.float16).view(np.int8)
94
- m = min.astype(np.float16).view(np.int8)
95
- qh = qh[..., np.newaxis].view(np.int8)
96
- ndarray = np.concatenate((d, m, qh, qs), axis=-1)
97
- return ndarray
98
-
99
- QK8_0 = 32
100
- def quantize_q8_0(x):
101
- assert x.shape[-1] % QK8_0 == 0 and x.shape[-1] > QK8_0
102
- x = x.reshape(-1, QK8_0)
103
- amax = np.max(np.abs(x), axis=-1, keepdims=True)
104
- d = amax / ((1 << 7) - 1)
105
- qs = (x / d).round().clip(min=-128, max=127).astype(np.int8)
106
- d = d.astype(np.float16).view(np.int8)
107
- y = np.concatenate((d, qs), axis=-1)
108
- return y
109
-
110
- # copy from https://github.com/openai/CLIP/blob/main/clip/simple_tokenizer.py#L16
111
- def bytes_to_unicode():
112
- """
113
- Returns list of utf-8 byte and a corresponding list of unicode strings.
114
- The reversible bpe codes work on unicode strings.
115
- This means you need a large # of unicode characters in your vocab if you want to avoid UNKs.
116
- When you're at something like a 10B token dataset you end up needing around 5K for decent coverage.
117
- This is a significant percentage of your normal, say, 32K bpe vocab.
118
- To avoid that, we want lookup tables between utf-8 bytes and unicode strings.
119
- And avoids mapping to whitespace/control characters the bpe code barfs on.
120
- """
121
- bs = list(range(ord("!"), ord("~")+1))+list(range(ord("¡"), ord("¬")+1))+list(range(ord("®"), ord("ÿ")+1))
122
- cs = bs[:]
123
- n = 0
124
- for b in range(2**8):
125
- if b not in bs:
126
- bs.append(b)
127
- cs.append(2**8+n)
128
- n += 1
129
- cs = [chr(n) for n in cs]
130
- return dict(zip(bs, cs))
131
-
132
- def load_model_from_file(model_path):
133
- print("loading model from {}".format(model_path))
134
- if model_path.lower().endswith(".safetensors"):
135
- pl_sd = safetensors.torch.load_file(model_path, device="cpu")
136
- else:
137
- pl_sd = torch.load(model_path, map_location="cpu")
138
- state_dict = pl_sd["state_dict"] if "state_dict" in pl_sd else pl_sd
139
- print("loading model from {} completed".format(model_path))
140
- return state_dict
141
-
142
- def get_alpha_comprod(linear_start=0.00085, linear_end=0.0120, timesteps=1000):
143
- betas = torch.linspace(linear_start ** 0.5, linear_end ** 0.5, timesteps, dtype=torch.float32) ** 2
144
- alphas = 1. - betas
145
- alphas_cumprod = np.cumprod(alphas.numpy(), axis=0)
146
- return torch.tensor(alphas_cumprod)
147
-
148
- unused_tensors = [
149
- "betas",
150
- "alphas_cumprod_prev",
151
- "sqrt_alphas_cumprod",
152
- "sqrt_one_minus_alphas_cumprod",
153
- "log_one_minus_alphas_cumprod",
154
- "sqrt_recip_alphas_cumprod",
155
- "sqrt_recipm1_alphas_cumprod",
156
- "posterior_variance",
157
- "posterior_log_variance_clipped",
158
- "posterior_mean_coef1",
159
- "posterior_mean_coef2",
160
- "cond_stage_model.transformer.text_model.embeddings.position_ids",
161
- "cond_stage_model.model.logit_scale",
162
- "cond_stage_model.model.text_projection",
163
- "model_ema.decay",
164
- "model_ema.num_updates",
165
- "control_model",
166
- "lora_te_text_model",
167
- "embedding_manager"
168
- ]
169
-
170
-
171
- def preprocess(state_dict):
172
- alphas_cumprod = state_dict.get("alphas_cumprod")
173
- if alphas_cumprod != None:
174
- # print((np.abs(get_alpha_comprod().numpy() - alphas_cumprod.numpy()) < 0.000001).all())
175
- pass
176
- else:
177
- print("no alphas_cumprod in file, generate new one")
178
- alphas_cumprod = get_alpha_comprod()
179
- state_dict["alphas_cumprod"] = alphas_cumprod
180
-
181
- new_state_dict = {}
182
- for name, w in state_dict.items():
183
- # ignore unused tensors
184
- if not isinstance(w, torch.Tensor):
185
- continue
186
- skip = False
187
- for unused_tensor in unused_tensors:
188
- if name.startswith(unused_tensor):
189
- skip = True
190
- break
191
- if skip:
192
- continue
193
-
194
- # # convert BF16 to FP16
195
- if w.dtype == torch.bfloat16:
196
- w = w.to(torch.float16)
197
-
198
- # convert open_clip to hf CLIPTextModel (for SD2.x)
199
- open_clip_to_hf_clip_model = {
200
- "cond_stage_model.model.ln_final.bias": "cond_stage_model.transformer.text_model.final_layer_norm.bias",
201
- "cond_stage_model.model.ln_final.weight": "cond_stage_model.transformer.text_model.final_layer_norm.weight",
202
- "cond_stage_model.model.positional_embedding": "cond_stage_model.transformer.text_model.embeddings.position_embedding.weight",
203
- "cond_stage_model.model.token_embedding.weight": "cond_stage_model.transformer.text_model.embeddings.token_embedding.weight",
204
- "first_stage_model.decoder.mid.attn_1.to_k.bias": "first_stage_model.decoder.mid.attn_1.k.bias",
205
- "first_stage_model.decoder.mid.attn_1.to_k.weight": "first_stage_model.decoder.mid.attn_1.k.weight",
206
- "first_stage_model.decoder.mid.attn_1.to_out.0.bias": "first_stage_model.decoder.mid.attn_1.proj_out.bias",
207
- "first_stage_model.decoder.mid.attn_1.to_out.0.weight": "first_stage_model.decoder.mid.attn_1.proj_out.weight",
208
- "first_stage_model.decoder.mid.attn_1.to_q.bias": "first_stage_model.decoder.mid.attn_1.q.bias",
209
- "first_stage_model.decoder.mid.attn_1.to_q.weight": "first_stage_model.decoder.mid.attn_1.q.weight",
210
- "first_stage_model.decoder.mid.attn_1.to_v.bias": "first_stage_model.decoder.mid.attn_1.v.bias",
211
- "first_stage_model.decoder.mid.attn_1.to_v.weight": "first_stage_model.decoder.mid.attn_1.v.weight",
212
- }
213
- open_clip_to_hk_clip_resblock = {
214
- "attn.out_proj.bias": "self_attn.out_proj.bias",
215
- "attn.out_proj.weight": "self_attn.out_proj.weight",
216
- "ln_1.bias": "layer_norm1.bias",
217
- "ln_1.weight": "layer_norm1.weight",
218
- "ln_2.bias": "layer_norm2.bias",
219
- "ln_2.weight": "layer_norm2.weight",
220
- "mlp.c_fc.bias": "mlp.fc1.bias",
221
- "mlp.c_fc.weight": "mlp.fc1.weight",
222
- "mlp.c_proj.bias": "mlp.fc2.bias",
223
- "mlp.c_proj.weight": "mlp.fc2.weight",
224
- }
225
- open_clip_resblock_prefix = "cond_stage_model.model.transformer.resblocks."
226
- hf_clip_resblock_prefix = "cond_stage_model.transformer.text_model.encoder.layers."
227
- if name in open_clip_to_hf_clip_model:
228
- new_name = open_clip_to_hf_clip_model[name]
229
- print(f"preprocess {name} => {new_name}")
230
- name = new_name
231
- if name.startswith(open_clip_resblock_prefix):
232
- remain = name[len(open_clip_resblock_prefix):]
233
- idx = remain.split(".")[0]
234
- suffix = remain[len(idx)+1:]
235
- if suffix == "attn.in_proj_weight":
236
- w_q, w_k, w_v = w.chunk(3)
237
- for new_suffix, new_w in zip(["self_attn.q_proj.weight", "self_attn.k_proj.weight", "self_attn.v_proj.weight"], [w_q, w_k, w_v]):
238
- new_name = hf_clip_resblock_prefix + idx + "." + new_suffix
239
- new_state_dict[new_name] = new_w
240
- print(f"preprocess {name}{w.size()} => {new_name}{new_w.size()}")
241
- elif suffix == "attn.in_proj_bias":
242
- w_q, w_k, w_v = w.chunk(3)
243
- for new_suffix, new_w in zip(["self_attn.q_proj.bias", "self_attn.k_proj.bias", "self_attn.v_proj.bias"], [w_q, w_k, w_v]):
244
- new_name = hf_clip_resblock_prefix + idx + "." + new_suffix
245
- new_state_dict[new_name] = new_w
246
- print(f"preprocess {name}{w.size()} => {new_name}{new_w.size()}")
247
- else:
248
- new_suffix = open_clip_to_hk_clip_resblock[suffix]
249
- new_name = hf_clip_resblock_prefix + idx + "." + new_suffix
250
- new_state_dict[new_name] = w
251
- print(f"preprocess {name} => {new_name}")
252
- continue
253
-
254
- # convert unet transformer linear to conv2d 1x1
255
- if name.startswith("model.diffusion_model.") and (name.endswith("proj_in.weight") or name.endswith("proj_out.weight")):
256
- if len(w.shape) == 2:
257
- new_w = w.unsqueeze(2).unsqueeze(3)
258
- new_state_dict[name] = new_w
259
- print(f"preprocess {name} {w.size()} => {name} {new_w.size()}")
260
- continue
261
-
262
- # convert vae attn block linear to conv2d 1x1
263
- if name.startswith("first_stage_model.") and "attn_1" in name:
264
- if len(w.shape) == 2:
265
- new_w = w.unsqueeze(2).unsqueeze(3)
266
- new_state_dict[name] = new_w
267
- print(f"preprocess {name} {w.size()} => {name} {new_w.size()}")
268
- continue
269
-
270
- new_state_dict[name] = w
271
- return new_state_dict
272
-
273
- def convert(model_path, out_type = None, out_file=None):
274
- # load model
275
- with open(os.path.join(vocab_dir, "vocab.json"), encoding="utf-8") as f:
276
- clip_vocab = json.load(f)
277
-
278
- state_dict = load_model_from_file(model_path)
279
- model_type = SD1
280
- if "cond_stage_model.model.token_embedding.weight" in state_dict.keys():
281
- model_type = SD2
282
- print("Stable diffuison 2.x")
283
- else:
284
- print("Stable diffuison 1.x")
285
- state_dict = preprocess(state_dict)
286
-
287
- # output option
288
- if out_type == None:
289
- weight = state_dict["model.diffusion_model.input_blocks.0.0.weight"].numpy()
290
- if weight.dtype == np.float32:
291
- out_type = "f32"
292
- elif weight.dtype == np.float16:
293
- out_type = "f16"
294
- elif weight.dtype == np.float64:
295
- out_type = "f32"
296
- else:
297
- raise Exception("unsupported weight type %s" % weight.dtype)
298
- if out_file == None:
299
- out_file = os.path.splitext(os.path.basename(model_path))[0] + f"-ggml-model-{out_type}.bin"
300
- out_file = os.path.join(os.getcwd(), out_file)
301
- print(f"Saving GGML compatible file to {out_file}")
302
-
303
- # convert and save
304
- with open(out_file, "wb") as file:
305
- # magic: ggml in hex
306
- file.write(struct.pack("i", 0x67676D6C))
307
- # model & file type
308
- ftype = (model_type << 16) | ggml_ftype_str_to_int[out_type]
309
- file.write(struct.pack("i", ftype))
310
-
311
- # vocab
312
- byte_encoder = bytes_to_unicode()
313
- byte_decoder = {v: k for k, v in byte_encoder.items()}
314
- file.write(struct.pack("i", len(clip_vocab)))
315
- for key in clip_vocab:
316
- text = bytearray([byte_decoder[c] for c in key])
317
- file.write(struct.pack("i", len(text)))
318
- file.write(text)
319
-
320
- # weights
321
- for name in state_dict.keys():
322
- if not isinstance(state_dict[name], torch.Tensor):
323
- continue
324
- skip = False
325
- for unused_tensor in unused_tensors:
326
- if name.startswith(unused_tensor):
327
- skip = True
328
- break
329
- if skip:
330
- continue
331
- if name in unused_tensors:
332
- continue
333
- data = state_dict[name].numpy()
334
-
335
- n_dims = len(data.shape)
336
- shape = data.shape
337
- old_type = data.dtype
338
-
339
- ttype = "f32"
340
- if n_dims == 4:
341
- data = data.astype(np.float16)
342
- ttype = "f16"
343
- elif n_dims == 2 and name[-7:] == ".weight":
344
- if out_type == "f32":
345
- data = data.astype(np.float32)
346
- elif out_type == "f16":
347
- data = data.astype(np.float16)
348
- elif out_type == "q4_0":
349
- data = quantize_q4_0(data)
350
- elif out_type == "q4_1":
351
- data = quantize_q4_1(data)
352
- elif out_type == "q5_0":
353
- data = quantize_q5_0(data)
354
- elif out_type == "q5_1":
355
- data = quantize_q5_1(data)
356
- elif out_type == "q8_0":
357
- data = quantize_q8_0(data)
358
- else:
359
- raise Exception("invalid out_type {}".format(out_type))
360
- ttype = out_type
361
- else:
362
- data = data.astype(np.float32)
363
- ttype = "f32"
364
-
365
- print("Processing tensor: {} with shape {}, {} -> {}".format(name, data.shape, old_type, ttype))
366
-
367
- # header
368
- name_bytes = name.encode("utf-8")
369
- file.write(struct.pack("iii", n_dims, len(name_bytes), ggml_ttype_str_to_int[ttype]))
370
- for i in range(n_dims):
371
- file.write(struct.pack("i", shape[n_dims - 1 - i]))
372
- file.write(name_bytes)
373
- # data
374
- data.tofile(file)
375
- print("Convert done")
376
- print(f"Saved GGML compatible file to {out_file}")
377
-
378
- if __name__ == "__main__":
379
- import argparse
380
- parser = argparse.ArgumentParser(description="Convert Stable Diffuison model to GGML compatible file format")
381
- parser.add_argument("--out_type", choices=["f32", "f16", "q4_0", "q4_1", "q5_0", "q5_1", "q8_0"], help="output format (default: based on input)")
382
- parser.add_argument("--out_file", help="path to write to; default: based on input and current working directory")
383
- parser.add_argument("model_path", help="model file path (*.pth, *.pt, *.ckpt, *.safetensors)")
384
- args = parser.parse_args()
385
- convert(args.model_path, args.out_type, args.out_file)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
stable-diffusion.cpp/models/requirements.txt DELETED
@@ -1,4 +0,0 @@
1
- numpy
2
- torch
3
- safetensors
4
- pytorch_lightning
 
 
 
 
 
stable-diffusion.cpp/models/vocab.json DELETED
The diff for this file is too large to render. See raw diff
 
stable-diffusion.cpp/rng.h DELETED
@@ -1,35 +0,0 @@
1
- #ifndef __RNG_H__
2
- #define __RNG_H__
3
-
4
- #include <random>
5
- #include <vector>
6
-
7
- class RNG {
8
- public:
9
- virtual void manual_seed(uint64_t seed) = 0;
10
- virtual std::vector<float> randn(uint32_t n) = 0;
11
- };
12
-
13
- class STDDefaultRNG : public RNG {
14
- private:
15
- std::default_random_engine generator;
16
-
17
- public:
18
- void manual_seed(uint64_t seed) {
19
- generator.seed(seed);
20
- }
21
-
22
- std::vector<float> randn(uint32_t n) {
23
- std::vector<float> result;
24
- float mean = 0.0;
25
- float stddev = 1.0;
26
- std::normal_distribution<float> distribution(mean, stddev);
27
- for (int i = 0; i < n; i++) {
28
- float random_number = distribution(generator);
29
- result.push_back(random_number);
30
- }
31
- return result;
32
- }
33
- };
34
-
35
- #endif // __RNG_H__
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
stable-diffusion.cpp/rng_philox.h DELETED
@@ -1,125 +0,0 @@
1
- #ifndef __RNG_PHILOX_H__
2
- #define __RNG_PHILOX_H__
3
-
4
- #include <cmath>
5
- #include <vector>
6
-
7
- #include "rng.h"
8
-
9
- // RNG imitiating torch cuda randn on CPU.
10
- // Port from: https://github.com/AUTOMATIC1111/stable-diffusion-webui/blob/5ef669de080814067961f28357256e8fe27544f4/modules/rng_philox.py
11
- class PhiloxRNG : public RNG {
12
- private:
13
- uint64_t seed;
14
- uint32_t offset;
15
-
16
- private:
17
- std::vector<uint32_t> philox_m = {0xD2511F53, 0xCD9E8D57};
18
- std::vector<uint32_t> philox_w = {0x9E3779B9, 0xBB67AE85};
19
- float two_pow32_inv = 2.3283064e-10;
20
- float two_pow32_inv_2pi = 2.3283064e-10 * 6.2831855;
21
-
22
- std::vector<uint32_t> uint32(uint64_t x) {
23
- std::vector<uint32_t> result(2);
24
- result[0] = static_cast<uint32_t>(x & 0xFFFFFFFF);
25
- result[1] = static_cast<uint32_t>(x >> 32);
26
- return result;
27
- }
28
-
29
- std::vector<std::vector<uint32_t>> uint32(const std::vector<uint64_t>& x) {
30
- int N = x.size();
31
- std::vector<std::vector<uint32_t>> result(2, std::vector<uint32_t>(N));
32
-
33
- for (int i = 0; i < N; ++i) {
34
- result[0][i] = static_cast<uint32_t>(x[i] & 0xFFFFFFFF);
35
- result[1][i] = static_cast<uint32_t>(x[i] >> 32);
36
- }
37
-
38
- return result;
39
- }
40
-
41
- // A single round of the Philox 4x32 random number generator.
42
- void philox4_round(std::vector<std::vector<uint32_t>>& counter,
43
- const std::vector<std::vector<uint32_t>>& key) {
44
- uint32_t N = counter[0].size();
45
- for (uint32_t i = 0; i < N; i++) {
46
- std::vector<uint32_t> v1 = uint32(static_cast<uint64_t>(counter[0][i]) * static_cast<uint64_t>(philox_m[0]));
47
- std::vector<uint32_t> v2 = uint32(static_cast<uint64_t>(counter[2][i]) * static_cast<uint64_t>(philox_m[1]));
48
-
49
- counter[0][i] = v2[1] ^ counter[1][i] ^ key[0][i];
50
- counter[1][i] = v2[0];
51
- counter[2][i] = v1[1] ^ counter[3][i] ^ key[1][i];
52
- counter[3][i] = v1[0];
53
- }
54
- }
55
-
56
- // Generates 32-bit random numbers using the Philox 4x32 random number generator.
57
- // Parameters:
58
- // counter : A 4xN array of 32-bit integers representing the counter values (offset into generation).
59
- // key : A 2xN array of 32-bit integers representing the key values (seed).
60
- // rounds : The number of rounds to perform.
61
- // Returns:
62
- // std::vector<std::vector<uint32_t>>: A 4xN array of 32-bit integers containing the generated random numbers.
63
- std::vector<std::vector<uint32_t>> philox4_32(std::vector<std::vector<uint32_t>>& counter,
64
- std::vector<std::vector<uint32_t>>& key,
65
- int rounds = 10) {
66
- uint32_t N = counter[0].size();
67
- for (int i = 0; i < rounds - 1; ++i) {
68
- philox4_round(counter, key);
69
-
70
- for (uint32_t j = 0; j < N; ++j) {
71
- key[0][j] += philox_w[0];
72
- key[1][j] += philox_w[1];
73
- }
74
- }
75
-
76
- philox4_round(counter, key);
77
- return counter;
78
- }
79
-
80
- float box_muller(float x, float y) {
81
- float u = x * two_pow32_inv + two_pow32_inv / 2;
82
- float v = y * two_pow32_inv_2pi + two_pow32_inv_2pi / 2;
83
-
84
- float s = sqrt(-2.0 * log(u));
85
-
86
- float r1 = s * sin(v);
87
- return r1;
88
- }
89
-
90
- public:
91
- PhiloxRNG(uint64_t seed = 0) {
92
- this->seed = seed;
93
- this->offset = 0;
94
- }
95
-
96
- void manual_seed(uint64_t seed) {
97
- this->seed = seed;
98
- this->offset = 0;
99
- }
100
-
101
- std::vector<float> randn(uint32_t n) {
102
- std::vector<std::vector<uint32_t>> counter(4, std::vector<uint32_t>(n, 0));
103
- for (uint32_t i = 0; i < n; i++) {
104
- counter[0][i] = this->offset;
105
- }
106
-
107
- for (uint32_t i = 0; i < n; i++) {
108
- counter[2][i] = i;
109
- }
110
- this->offset += 1;
111
-
112
- std::vector<uint64_t> key(n, this->seed);
113
- std::vector<std::vector<uint32_t>> key_uint32 = uint32(key);
114
-
115
- std::vector<std::vector<uint32_t>> g = philox4_32(counter, key_uint32);
116
-
117
- std::vector<float> result;
118
- for (int i = 0; i < n; ++i) {
119
- result.push_back(box_muller(g[0][i], g[1][i]));
120
- }
121
- return result;
122
- }
123
- };
124
-
125
- #endif // __RNG_PHILOX_H__
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
stable-diffusion.cpp/stable-diffusion.cpp DELETED
The diff for this file is too large to render. See raw diff
 
stable-diffusion.cpp/stable-diffusion.h DELETED
@@ -1,74 +0,0 @@
1
- #ifndef __STABLE_DIFFUSION_H__
2
- #define __STABLE_DIFFUSION_H__
3
-
4
- #include <memory>
5
- #include <vector>
6
-
7
- enum SDLogLevel {
8
- DEBUG,
9
- INFO,
10
- WARN,
11
- ERROR
12
- };
13
-
14
- enum RNGType {
15
- STD_DEFAULT_RNG,
16
- CUDA_RNG
17
- };
18
-
19
- enum SampleMethod {
20
- EULER_A,
21
- EULER,
22
- HEUN,
23
- DPM2,
24
- DPMPP2S_A,
25
- DPMPP2M,
26
- DPMPP2Mv2,
27
- N_SAMPLE_METHODS
28
- };
29
-
30
- enum Schedule {
31
- DEFAULT,
32
- DISCRETE,
33
- KARRAS,
34
- N_SCHEDULES
35
- };
36
-
37
- class StableDiffusionGGML;
38
-
39
- class StableDiffusion {
40
- private:
41
- std::shared_ptr<StableDiffusionGGML> sd;
42
-
43
- public:
44
- StableDiffusion(int n_threads = -1,
45
- bool vae_decode_only = false,
46
- bool free_params_immediately = false,
47
- RNGType rng_type = STD_DEFAULT_RNG);
48
- bool load_from_file(const std::string& file_path, Schedule d = DEFAULT);
49
- std::vector<uint8_t> txt2img(
50
- const std::string& prompt,
51
- const std::string& negative_prompt,
52
- float cfg_scale,
53
- int width,
54
- int height,
55
- SampleMethod sample_method,
56
- int sample_steps,
57
- int64_t seed);
58
- std::vector<uint8_t> img2img(
59
- const std::vector<uint8_t>& init_img,
60
- const std::string& prompt,
61
- const std::string& negative_prompt,
62
- float cfg_scale,
63
- int width,
64
- int height,
65
- SampleMethod sample_method,
66
- int sample_steps,
67
- float strength,
68
- int64_t seed);
69
- };
70
-
71
- void set_sd_log_level(SDLogLevel level);
72
- std::string sd_get_system_info();
73
-
74
- #endif // __STABLE_DIFFUSION_H__