update vae, text_encoder, model_index.json, README.md
Browse files- README.md +56 -0
- doc/badges/badge-colab.svg +33 -0
- doc/badges/badge-docker.svg +29 -0
- doc/badges/badge-license.svg +27 -0
- doc/badges/badge-pdf.svg +27 -0
- doc/badges/badge-website.svg +129 -0
- doc/teaser_collage_transparant.png +3 -0
- model_index.json +1 -1
- text_encoder/config.json +2 -2
- text_encoder/model.fp16.safetensors +3 -0
- text_encoder/model.safetensors +2 -2
- text_encoder/pytorch_model.bin +3 -0
- text_encoder/pytorch_model.fp16.bin +3 -0
- vae/config.json +2 -6
- vae/diffusion_pytorch_model.bin +3 -0
- vae/diffusion_pytorch_model.fp16.bin +3 -0
- vae/diffusion_pytorch_model.fp16.safetensors +3 -0
- vae/diffusion_pytorch_model.safetensors +2 -2
README.md
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: apache-2.0
|
3 |
+
language:
|
4 |
+
- en
|
5 |
+
pipeline_tag: normals-estimation-lcm
|
6 |
+
tags:
|
7 |
+
- monocular normals estimation
|
8 |
+
- single image normals estimation
|
9 |
+
- normals
|
10 |
+
- in-the-wild
|
11 |
+
- zero-shot
|
12 |
+
- LCM
|
13 |
+
---
|
14 |
+
# Marigold Normals (LCM) Model Card
|
15 |
+
|
16 |
+
This model belongs to the family of diffusion-based Marigold models for solving various computer vision tasks.
|
17 |
+
The Marigold Normals model focuses on the surface normals task.
|
18 |
+
It takes an input image and computes surface normals in each pixel.
|
19 |
+
The LCM stands for Latent Consistency Models, which is a technique for making the diffusion model fast.
|
20 |
+
The Marigold Normals model is trained from Stable Diffusion with synthetic data, and the LCM model is further fine-tuned from it.
|
21 |
+
Thanks to the rich visual knowledge stored in Stable Diffusion, Marigold models possess deep scene understanding and excel at solving computer vision tasks.
|
22 |
+
Read more about Marigold in our paper titled "Repurposing Diffusion-Based Image Generators for Monocular Depth Estimation".
|
23 |
+
|
24 |
+
[![Website](doc/badges/badge-website.svg)](https://marigoldmonodepth.github.io)
|
25 |
+
[![GitHub](https://img.shields.io/github/stars/prs-eth/Marigold?style=default&label=GitHub%20★&logo=github)](https://github.com/prs-eth/Marigold)
|
26 |
+
[![Paper](doc/badges/badge-pdf.svg)](https://arxiv.org/abs/2312.02145)
|
27 |
+
[![Hugging Face Space](https://img.shields.io/badge/🤗%20Hugging%20Face-Space-yellow)](https://huggingface.co/spaces/toshas/marigold)
|
28 |
+
|
29 |
+
Developed by:
|
30 |
+
[Bingxin Ke](http://www.kebingxin.com/),
|
31 |
+
[Anton Obukhov](https://www.obukhov.ai/),
|
32 |
+
[Shengyu Huang](https://shengyuh.github.io/),
|
33 |
+
[Nando Metzger](https://nandometzger.github.io/),
|
34 |
+
[Rodrigo Caye Daudt](https://rcdaudt.github.io/),
|
35 |
+
[Konrad Schindler](https://scholar.google.com/citations?user=FZuNgqIAAAAJ&hl=en)
|
36 |
+
|
37 |
+
![teaser](doc/teaser_collage_transparant.png)
|
38 |
+
|
39 |
+
## 🎓 Citation
|
40 |
+
|
41 |
+
```bibtex
|
42 |
+
@InProceedings{ke2023repurposing,
|
43 |
+
title={Repurposing Diffusion-Based Image Generators for Monocular Depth Estimation},
|
44 |
+
author={Bingxin Ke and Anton Obukhov and Shengyu Huang and Nando Metzger and Rodrigo Caye Daudt and Konrad Schindler},
|
45 |
+
booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
|
46 |
+
year={2024}
|
47 |
+
}
|
48 |
+
```
|
49 |
+
|
50 |
+
## 🎫 License
|
51 |
+
|
52 |
+
This work is licensed under the Apache License, Version 2.0 (as defined in the [LICENSE](LICENSE.txt)).
|
53 |
+
|
54 |
+
By downloading and using the code and model you agree to the terms in the [LICENSE](LICENSE.txt).
|
55 |
+
|
56 |
+
[![License](https://img.shields.io/badge/License-Apache--2.0-929292)](https://www.apache.org/licenses/LICENSE-2.0)
|
doc/badges/badge-colab.svg
ADDED
doc/badges/badge-docker.svg
ADDED
doc/badges/badge-license.svg
ADDED
doc/badges/badge-pdf.svg
ADDED
doc/badges/badge-website.svg
ADDED
doc/teaser_collage_transparant.png
ADDED
Git LFS Details
|
model_index.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
{
|
2 |
"_class_name":"MarigoldPipeline",
|
3 |
-
"_diffusers_version":"0.
|
4 |
"unet":[
|
5 |
"diffusers",
|
6 |
"UNet2DConditionModel"
|
|
|
1 |
{
|
2 |
"_class_name":"MarigoldPipeline",
|
3 |
+
"_diffusers_version":"0.24.0",
|
4 |
"unet":[
|
5 |
"diffusers",
|
6 |
"UNet2DConditionModel"
|
text_encoder/config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "/
|
3 |
"architectures": [
|
4 |
"CLIPTextModel"
|
5 |
],
|
@@ -20,6 +20,6 @@
|
|
20 |
"pad_token_id": 1,
|
21 |
"projection_dim": 512,
|
22 |
"torch_dtype": "float32",
|
23 |
-
"transformers_version": "4.
|
24 |
"vocab_size": 49408
|
25 |
}
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "hf-models/stable-diffusion-v2-768x768/text_encoder",
|
3 |
"architectures": [
|
4 |
"CLIPTextModel"
|
5 |
],
|
|
|
20 |
"pad_token_id": 1,
|
21 |
"projection_dim": 512,
|
22 |
"torch_dtype": "float32",
|
23 |
+
"transformers_version": "4.25.0.dev0",
|
24 |
"vocab_size": 49408
|
25 |
}
|
text_encoder/model.fp16.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bc1827c465450322616f06dea41596eac7d493f4e95904dcb51f0fc745c4e13f
|
3 |
+
size 680820392
|
text_encoder/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1e4aa519f64dc6386f88221a66c106a09fa027b47a20cc0e126687695f2a6669
|
3 |
+
size 1361597016
|
text_encoder/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e9c787e9388134c1a25dc69934a51a32a2683b38b8a9b017e1f3a692b8ed6b98
|
3 |
+
size 1361679905
|
text_encoder/pytorch_model.fp16.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bfc767ee894a2d26166aa7c22b7b297a1ff8e246493734490dd048087d4c9c07
|
3 |
+
size 680899947
|
vae/config.json
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
{
|
2 |
"_class_name": "AutoencoderKL",
|
3 |
-
"_diffusers_version": "0.
|
4 |
-
"_name_or_path": "/
|
5 |
"act_fn": "silu",
|
6 |
"block_out_channels": [
|
7 |
128,
|
@@ -15,16 +15,12 @@
|
|
15 |
"DownEncoderBlock2D",
|
16 |
"DownEncoderBlock2D"
|
17 |
],
|
18 |
-
"force_upcast": true,
|
19 |
"in_channels": 3,
|
20 |
"latent_channels": 4,
|
21 |
-
"latents_mean": null,
|
22 |
-
"latents_std": null,
|
23 |
"layers_per_block": 2,
|
24 |
"norm_num_groups": 32,
|
25 |
"out_channels": 3,
|
26 |
"sample_size": 768,
|
27 |
-
"scaling_factor": 0.18215,
|
28 |
"up_block_types": [
|
29 |
"UpDecoderBlock2D",
|
30 |
"UpDecoderBlock2D",
|
|
|
1 |
{
|
2 |
"_class_name": "AutoencoderKL",
|
3 |
+
"_diffusers_version": "0.8.0",
|
4 |
+
"_name_or_path": "hf-models/stable-diffusion-v2-768x768/vae",
|
5 |
"act_fn": "silu",
|
6 |
"block_out_channels": [
|
7 |
128,
|
|
|
15 |
"DownEncoderBlock2D",
|
16 |
"DownEncoderBlock2D"
|
17 |
],
|
|
|
18 |
"in_channels": 3,
|
19 |
"latent_channels": 4,
|
|
|
|
|
20 |
"layers_per_block": 2,
|
21 |
"norm_num_groups": 32,
|
22 |
"out_channels": 3,
|
23 |
"sample_size": 768,
|
|
|
24 |
"up_block_types": [
|
25 |
"UpDecoderBlock2D",
|
26 |
"UpDecoderBlock2D",
|
vae/diffusion_pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a4302e1efa25f3a47ceb7536bc335715ad9d1f203e90c2d25507600d74006e89
|
3 |
+
size 334715313
|
vae/diffusion_pytorch_model.fp16.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:44915add42092106e70bffac475aae4283b5e8167a8a0c5f55ccc667ee4ebeb5
|
3 |
+
size 167405651
|
vae/diffusion_pytorch_model.fp16.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3e4c08995484ee61270175e9e7a072b66a6e4eeb5f0c266667fe1f45b90daf9a
|
3 |
+
size 167335342
|
vae/diffusion_pytorch_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a1d993488569e928462932c8c38a0760b874d166399b14414135bd9c42df5815
|
3 |
+
size 334643276
|