toshas commited on Apr 27

Commit

700480e

•

1 Parent(s): fe07a4d

update vae, text_encoder, model_index.json, README.md

Browse files

Files changed (18) hide show

README.md +56 -0
doc/badges/badge-colab.svg +33 -0
doc/badges/badge-docker.svg +29 -0
doc/badges/badge-license.svg +27 -0
doc/badges/badge-pdf.svg +27 -0
doc/badges/badge-website.svg +129 -0
doc/teaser_collage_transparant.png +3 -0
model_index.json +1 -1
text_encoder/config.json +2 -2
text_encoder/model.fp16.safetensors +3 -0
text_encoder/model.safetensors +2 -2
text_encoder/pytorch_model.bin +3 -0
text_encoder/pytorch_model.fp16.bin +3 -0
vae/config.json +2 -6
vae/diffusion_pytorch_model.bin +3 -0
vae/diffusion_pytorch_model.fp16.bin +3 -0
vae/diffusion_pytorch_model.fp16.safetensors +3 -0
vae/diffusion_pytorch_model.safetensors +2 -2

README.md ADDED Viewed

	@@ -0,0 +1,56 @@

+---
+license: apache-2.0
+language:
+- en
+pipeline_tag: normals-estimation-lcm
+tags:
+- monocular normals estimation
+- single image normals estimation
+- normals
+- in-the-wild
+- zero-shot
+- LCM
+---
+# Marigold Normals (LCM) Model Card
+This model belongs to the family of diffusion-based Marigold models for solving various computer vision tasks.
+The Marigold Normals model focuses on the surface normals task.
+It takes an input image and computes surface normals in each pixel.
+The LCM stands for Latent Consistency Models, which is a technique for making the diffusion model fast.
+The Marigold Normals model is trained from Stable Diffusion with synthetic data, and the LCM model is further fine-tuned from it.
+Thanks to the rich visual knowledge stored in Stable Diffusion, Marigold models possess deep scene understanding and excel at solving computer vision tasks.
+Read more about Marigold in our paper titled "Repurposing Diffusion-Based Image Generators for Monocular Depth Estimation".
+[![Website](doc/badges/badge-website.svg)](https://marigoldmonodepth.github.io)
+[![GitHub](https://img.shields.io/github/stars/prs-eth/Marigold?style=default&label=GitHub%20★&logo=github)](https://github.com/prs-eth/Marigold)
+[![Paper](doc/badges/badge-pdf.svg)](https://arxiv.org/abs/2312.02145)
+[![Hugging Face Space](https://img.shields.io/badge/🤗%20Hugging%20Face-Space-yellow)](https://huggingface.co/spaces/toshas/marigold)
+Developed by:
+[Bingxin Ke](http://www.kebingxin.com/),
+[Anton Obukhov](https://www.obukhov.ai/),
+[Shengyu Huang](https://shengyuh.github.io/),
+[Nando Metzger](https://nandometzger.github.io/),
+[Rodrigo Caye Daudt](https://rcdaudt.github.io/),
+[Konrad Schindler](https://scholar.google.com/citations?user=FZuNgqIAAAAJ&hl=en)
+![teaser](doc/teaser_collage_transparant.png)
+## 🎓 Citation
+```bibtex
+@InProceedings{ke2023repurposing,
+      title={Repurposing Diffusion-Based Image Generators for Monocular Depth Estimation},
+      author={Bingxin Ke and Anton Obukhov and Shengyu Huang and Nando Metzger and Rodrigo Caye Daudt and Konrad Schindler},
+      booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
+      year={2024}
+}
+```
+## 🎫 License
+This work is licensed under the Apache License, Version 2.0 (as defined in the [LICENSE](LICENSE.txt)).
+By downloading and using the code and model you agree to the terms in the [LICENSE](LICENSE.txt).
+[![License](https://img.shields.io/badge/License-Apache--2.0-929292)](https://www.apache.org/licenses/LICENSE-2.0)

doc/badges/badge-colab.svg ADDED Viewed

doc/badges/badge-docker.svg ADDED Viewed

doc/badges/badge-license.svg ADDED Viewed

doc/badges/badge-pdf.svg ADDED Viewed

doc/badges/badge-website.svg ADDED Viewed

doc/teaser_collage_transparant.png ADDED Viewed

Git LFS Details

SHA256: 9ac22708df13690f231aae38a833a49efb38ce0479e3aa14213034fda7aac970
Pointer size: 132 Bytes
Size of remote file: 5.14 MB

model_index.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "_class_name":"MarigoldPipeline",
-  "_diffusers_version":"0.25.0",
   "unet":[
      "diffusers",
      "UNet2DConditionModel"

 {
   "_class_name":"MarigoldPipeline",
+  "_diffusers_version":"0.24.0",
   "unet":[
      "diffusers",
      "UNet2DConditionModel"

text_encoder/config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "/Users/anton/.cache/huggingface/hub/models--prs-eth--marigold-v1-0/snapshots/0f4e0021da7a153804301f8b988e8b7b4daf056b/text_encoder",
   "architectures": [
     "CLIPTextModel"
   ],
@@ -20,6 +20,6 @@
   "pad_token_id": 1,
   "projection_dim": 512,
   "torch_dtype": "float32",
-  "transformers_version": "4.40.0",
   "vocab_size": 49408
 }

 {
+  "_name_or_path": "hf-models/stable-diffusion-v2-768x768/text_encoder",
   "architectures": [
     "CLIPTextModel"
   ],
   "pad_token_id": 1,
   "projection_dim": 512,
   "torch_dtype": "float32",
+  "transformers_version": "4.25.0.dev0",
   "vocab_size": 49408
 }

text_encoder/model.fp16.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bc1827c465450322616f06dea41596eac7d493f4e95904dcb51f0fc745c4e13f
+size 680820392

text_encoder/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:67e013543d4fac905c882e2993d86a2d454ee69dc9e8f37c0c23d33a48959d15
-size 1361596304

 version https://git-lfs.github.com/spec/v1
+oid sha256:1e4aa519f64dc6386f88221a66c106a09fa027b47a20cc0e126687695f2a6669
+size 1361597016

text_encoder/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e9c787e9388134c1a25dc69934a51a32a2683b38b8a9b017e1f3a692b8ed6b98
+size 1361679905

text_encoder/pytorch_model.fp16.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bfc767ee894a2d26166aa7c22b7b297a1ff8e246493734490dd048087d4c9c07
+size 680899947

vae/config.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "_class_name": "AutoencoderKL",
-  "_diffusers_version": "0.27.2",
-  "_name_or_path": "/Users/anton/.cache/huggingface/hub/models--prs-eth--marigold-v1-0/snapshots/0f4e0021da7a153804301f8b988e8b7b4daf056b/vae",
   "act_fn": "silu",
   "block_out_channels": [
     128,
@@ -15,16 +15,12 @@
     "DownEncoderBlock2D",
     "DownEncoderBlock2D"
   ],
-  "force_upcast": true,
   "in_channels": 3,
   "latent_channels": 4,
-  "latents_mean": null,
-  "latents_std": null,
   "layers_per_block": 2,
   "norm_num_groups": 32,
   "out_channels": 3,
   "sample_size": 768,
-  "scaling_factor": 0.18215,
   "up_block_types": [
     "UpDecoderBlock2D",
     "UpDecoderBlock2D",

 {
   "_class_name": "AutoencoderKL",
+  "_diffusers_version": "0.8.0",
+  "_name_or_path": "hf-models/stable-diffusion-v2-768x768/vae",
   "act_fn": "silu",
   "block_out_channels": [
     128,
     "DownEncoderBlock2D",
     "DownEncoderBlock2D"
   ],
   "in_channels": 3,
   "latent_channels": 4,
   "layers_per_block": 2,
   "norm_num_groups": 32,
   "out_channels": 3,
   "sample_size": 768,
   "up_block_types": [
     "UpDecoderBlock2D",
     "UpDecoderBlock2D",

vae/diffusion_pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a4302e1efa25f3a47ceb7536bc335715ad9d1f203e90c2d25507600d74006e89
+size 334715313

vae/diffusion_pytorch_model.fp16.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:44915add42092106e70bffac475aae4283b5e8167a8a0c5f55ccc667ee4ebeb5
+size 167405651

vae/diffusion_pytorch_model.fp16.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3e4c08995484ee61270175e9e7a072b66a6e4eeb5f0c266667fe1f45b90daf9a
+size 167335342

vae/diffusion_pytorch_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2aa1f43011b553a4cba7f37456465cdbd48aab7b54b9348b890e8058ea7683ec
-size 334643268

 version https://git-lfs.github.com/spec/v1
+oid sha256:a1d993488569e928462932c8c38a0760b874d166399b14414135bd9c42df5815
+size 334643276