toshas commited on
Commit
700480e
1 Parent(s): fe07a4d

update vae, text_encoder, model_index.json, README.md

Browse files
README.md ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ language:
4
+ - en
5
+ pipeline_tag: normals-estimation-lcm
6
+ tags:
7
+ - monocular normals estimation
8
+ - single image normals estimation
9
+ - normals
10
+ - in-the-wild
11
+ - zero-shot
12
+ - LCM
13
+ ---
14
+ # Marigold Normals (LCM) Model Card
15
+
16
+ This model belongs to the family of diffusion-based Marigold models for solving various computer vision tasks.
17
+ The Marigold Normals model focuses on the surface normals task.
18
+ It takes an input image and computes surface normals in each pixel.
19
+ The LCM stands for Latent Consistency Models, which is a technique for making the diffusion model fast.
20
+ The Marigold Normals model is trained from Stable Diffusion with synthetic data, and the LCM model is further fine-tuned from it.
21
+ Thanks to the rich visual knowledge stored in Stable Diffusion, Marigold models possess deep scene understanding and excel at solving computer vision tasks.
22
+ Read more about Marigold in our paper titled "Repurposing Diffusion-Based Image Generators for Monocular Depth Estimation".
23
+
24
+ [![Website](doc/badges/badge-website.svg)](https://marigoldmonodepth.github.io)
25
+ [![GitHub](https://img.shields.io/github/stars/prs-eth/Marigold?style=default&label=GitHub%20★&logo=github)](https://github.com/prs-eth/Marigold)
26
+ [![Paper](doc/badges/badge-pdf.svg)](https://arxiv.org/abs/2312.02145)
27
+ [![Hugging Face Space](https://img.shields.io/badge/🤗%20Hugging%20Face-Space-yellow)](https://huggingface.co/spaces/toshas/marigold)
28
+
29
+ Developed by:
30
+ [Bingxin Ke](http://www.kebingxin.com/),
31
+ [Anton Obukhov](https://www.obukhov.ai/),
32
+ [Shengyu Huang](https://shengyuh.github.io/),
33
+ [Nando Metzger](https://nandometzger.github.io/),
34
+ [Rodrigo Caye Daudt](https://rcdaudt.github.io/),
35
+ [Konrad Schindler](https://scholar.google.com/citations?user=FZuNgqIAAAAJ&hl=en)
36
+
37
+ ![teaser](doc/teaser_collage_transparant.png)
38
+
39
+ ## 🎓 Citation
40
+
41
+ ```bibtex
42
+ @InProceedings{ke2023repurposing,
43
+ title={Repurposing Diffusion-Based Image Generators for Monocular Depth Estimation},
44
+ author={Bingxin Ke and Anton Obukhov and Shengyu Huang and Nando Metzger and Rodrigo Caye Daudt and Konrad Schindler},
45
+ booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
46
+ year={2024}
47
+ }
48
+ ```
49
+
50
+ ## 🎫 License
51
+
52
+ This work is licensed under the Apache License, Version 2.0 (as defined in the [LICENSE](LICENSE.txt)).
53
+
54
+ By downloading and using the code and model you agree to the terms in the [LICENSE](LICENSE.txt).
55
+
56
+ [![License](https://img.shields.io/badge/License-Apache--2.0-929292)](https://www.apache.org/licenses/LICENSE-2.0)
doc/badges/badge-colab.svg ADDED
doc/badges/badge-docker.svg ADDED
doc/badges/badge-license.svg ADDED
doc/badges/badge-pdf.svg ADDED
doc/badges/badge-website.svg ADDED
doc/teaser_collage_transparant.png ADDED

Git LFS Details

  • SHA256: 9ac22708df13690f231aae38a833a49efb38ce0479e3aa14213034fda7aac970
  • Pointer size: 132 Bytes
  • Size of remote file: 5.14 MB
model_index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "_class_name":"MarigoldPipeline",
3
- "_diffusers_version":"0.25.0",
4
  "unet":[
5
  "diffusers",
6
  "UNet2DConditionModel"
 
1
  {
2
  "_class_name":"MarigoldPipeline",
3
+ "_diffusers_version":"0.24.0",
4
  "unet":[
5
  "diffusers",
6
  "UNet2DConditionModel"
text_encoder/config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "/Users/anton/.cache/huggingface/hub/models--prs-eth--marigold-v1-0/snapshots/0f4e0021da7a153804301f8b988e8b7b4daf056b/text_encoder",
3
  "architectures": [
4
  "CLIPTextModel"
5
  ],
@@ -20,6 +20,6 @@
20
  "pad_token_id": 1,
21
  "projection_dim": 512,
22
  "torch_dtype": "float32",
23
- "transformers_version": "4.40.0",
24
  "vocab_size": 49408
25
  }
 
1
  {
2
+ "_name_or_path": "hf-models/stable-diffusion-v2-768x768/text_encoder",
3
  "architectures": [
4
  "CLIPTextModel"
5
  ],
 
20
  "pad_token_id": 1,
21
  "projection_dim": 512,
22
  "torch_dtype": "float32",
23
+ "transformers_version": "4.25.0.dev0",
24
  "vocab_size": 49408
25
  }
text_encoder/model.fp16.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc1827c465450322616f06dea41596eac7d493f4e95904dcb51f0fc745c4e13f
3
+ size 680820392
text_encoder/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:67e013543d4fac905c882e2993d86a2d454ee69dc9e8f37c0c23d33a48959d15
3
- size 1361596304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e4aa519f64dc6386f88221a66c106a09fa027b47a20cc0e126687695f2a6669
3
+ size 1361597016
text_encoder/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9c787e9388134c1a25dc69934a51a32a2683b38b8a9b017e1f3a692b8ed6b98
3
+ size 1361679905
text_encoder/pytorch_model.fp16.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfc767ee894a2d26166aa7c22b7b297a1ff8e246493734490dd048087d4c9c07
3
+ size 680899947
vae/config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "_class_name": "AutoencoderKL",
3
- "_diffusers_version": "0.27.2",
4
- "_name_or_path": "/Users/anton/.cache/huggingface/hub/models--prs-eth--marigold-v1-0/snapshots/0f4e0021da7a153804301f8b988e8b7b4daf056b/vae",
5
  "act_fn": "silu",
6
  "block_out_channels": [
7
  128,
@@ -15,16 +15,12 @@
15
  "DownEncoderBlock2D",
16
  "DownEncoderBlock2D"
17
  ],
18
- "force_upcast": true,
19
  "in_channels": 3,
20
  "latent_channels": 4,
21
- "latents_mean": null,
22
- "latents_std": null,
23
  "layers_per_block": 2,
24
  "norm_num_groups": 32,
25
  "out_channels": 3,
26
  "sample_size": 768,
27
- "scaling_factor": 0.18215,
28
  "up_block_types": [
29
  "UpDecoderBlock2D",
30
  "UpDecoderBlock2D",
 
1
  {
2
  "_class_name": "AutoencoderKL",
3
+ "_diffusers_version": "0.8.0",
4
+ "_name_or_path": "hf-models/stable-diffusion-v2-768x768/vae",
5
  "act_fn": "silu",
6
  "block_out_channels": [
7
  128,
 
15
  "DownEncoderBlock2D",
16
  "DownEncoderBlock2D"
17
  ],
 
18
  "in_channels": 3,
19
  "latent_channels": 4,
 
 
20
  "layers_per_block": 2,
21
  "norm_num_groups": 32,
22
  "out_channels": 3,
23
  "sample_size": 768,
 
24
  "up_block_types": [
25
  "UpDecoderBlock2D",
26
  "UpDecoderBlock2D",
vae/diffusion_pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4302e1efa25f3a47ceb7536bc335715ad9d1f203e90c2d25507600d74006e89
3
+ size 334715313
vae/diffusion_pytorch_model.fp16.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44915add42092106e70bffac475aae4283b5e8167a8a0c5f55ccc667ee4ebeb5
3
+ size 167405651
vae/diffusion_pytorch_model.fp16.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e4c08995484ee61270175e9e7a072b66a6e4eeb5f0c266667fe1f45b90daf9a
3
+ size 167335342
vae/diffusion_pytorch_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2aa1f43011b553a4cba7f37456465cdbd48aab7b54b9348b890e8058ea7683ec
3
- size 334643268
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1d993488569e928462932c8c38a0760b874d166399b14414135bd9c42df5815
3
+ size 334643276