Spaces:

toshas
/

marigold-iid-private

Sleeping

App Files Files Community

KevinQu7 commited on 18 days ago

Commit

a8e6640

•

1 Parent(s): 40be8c2

update -gitattributes

Browse files

Files changed (26) hide show

.gitattributes +3 -0
README.md +24 -7
app.py +151 -99
files/image/berries.jpeg +3 -0
files/image/books.jpg +3 -0
files/image/cat.jpg +3 -0
files/image/cat2.png +3 -0
files/image/costumes.png +3 -0
files/image/einstein.jpg +3 -0
files/image/food.jpeg +3 -0
files/image/food_counter.png +3 -0
files/image/icecream.jpg +3 -0
files/image/juices.jpeg +3 -0
files/image/livingroom.jpg +3 -0
files/image/puzzle.jpeg +3 -0
files/image/rocket.png +3 -0
files/image/scientists.jpg +3 -0
files/image/screw.png +3 -0
files/image/statues.png +3 -0
files/image/swings.jpg +3 -0
files/image/tabletennis.jpg +3 -0
files/image/tent.jpg +3 -0
gradio_patches/examples.py +13 -0
gradio_patches/flagging.py +165 -0
marigold_iid_appearance.py +3 -3
marigold_iid_residual.py → marigold_iid_lighting.py +12 -11

.gitattributes CHANGED Viewed

@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text
+*.jpeg filter=lfs diff=lfs merge=lfs -text
+*.jpg filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -1,12 +1,29 @@
 ---
-title: Marigold Iid Private
-emoji: 🏢
-colorFrom: gray
-colorTo: yellow
 sdk: gradio
-sdk_version: 5.9.1
 app_file: app.py
-pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Marigold Intrinsic Image Decomposition
+emoji: 🏵️
+colorFrom: purple
+colorTo: blue
 sdk: gradio
+sdk_version: 4.21.0
 app_file: app.py
+pinned: true
+license: cc-by-sa-4.0
+hf_oauth: true
+hf_oauth_expiration_minutes: 43200
 ---
+This is a demo of Marigold-IID, the state-of-the-art intrinsic image decomposition model for images in the wild.
+We provide two models:
+- Marigold-IID-Appearance which predicts albedo, metallic and roughness
+- Marigold-IID-Lighting which predicts albedo, diffuse shading and non-diffuse residual
+Find out more in our CVPR 2024 paper titled ["Repurposing Diffusion-Based Image Generators for Monocular Depth Estimation"](https://arxiv.org/abs/2312.02145)
+```
+@InProceedings{ke2023repurposing,
+  title={Repurposing Diffusion-Based Image Generators for Monocular Depth Estimation},
+  author={Bingxin Ke and Anton Obukhov and Shengyu Huang and Nando Metzger and Rodrigo Caye Daudt and Konrad Schindler},
+  booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
+  year={2024}
+}
+```

app.py CHANGED Viewed

@@ -14,7 +14,7 @@
 # --------------------------------------------------------------------------
 # If you find this code useful, we kindly ask you to cite our paper in your work.
 # Please find bibtex at: https://github.com/prs-eth/Marigold#-citation
-# More information about the method can be found at https://marigoldmonodepth.github.io
 # --------------------------------------------------------------------------
 from __future__ import annotations
@@ -28,7 +28,6 @@ import gradio as gr
 import numpy as np
 import torch as torch
 from PIL import Image
-from diffusers import UNet2DConditionModel
 from gradio_imageslider import ImageSlider
 from huggingface_hub import login
@@ -36,7 +35,7 @@ from huggingface_hub import login
 from gradio_patches.examples import Examples
 from gradio_patches.flagging import HuggingFaceDatasetSaver, FlagMethod
 from marigold_iid_appearance import MarigoldIIDAppearancePipeline
-from marigold_iid_residual import MarigoldIIDResidualPipeline
 warnings.filterwarnings(
     "ignore", message=".*LoginButton created outside of a Blocks context.*"
@@ -48,36 +47,53 @@ default_image_denoise_steps = 4
 default_image_ensemble_size = 1
 default_image_processing_res = 768
 default_image_reproducuble = True
-default_model_type="appearance"
 default_share_always_show_hf_logout_btn = True
 default_share_always_show_accordion = False
 loaded_pipelines = {}  # Cache to store loaded pipelines
-def process_with_loaded_pipeline(image_path, denoise_steps, ensemble_size, processing_res, model_type):
     # Load and cache the pipeline based on the model type.
     if model_type not in loaded_pipelines.keys():
-        auth_token = os.environ.get("KEV_DEV")
         if model_type == "appearance":
-            loaded_pipelines[model_type] = MarigoldIIDAppearancePipeline.from_pretrained(
-                "prs-eth/marigold-iid-appearance-v1-1", token=auth_token
             )
-        elif model_type == "residual":
-            loaded_pipelines[model_type] = MarigoldIIDResidualPipeline.from_pretrained(
-                "prs-eth/marigold-iid-residual-v1-1", token=auth_token
             )
         # Move the pipeline to GPU if available
         device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         loaded_pipelines[model_type] = loaded_pipelines[model_type].to(device)
         try:
-            import xformers
             loaded_pipelines[model_type].enable_xformers_memory_efficient_attention()
         except:
             pass  # run without xformers
     pipe = loaded_pipelines[model_type]
     # Process the image using the preloaded pipeline.
@@ -90,12 +106,14 @@ def process_with_loaded_pipeline(image_path, denoise_steps, ensemble_size, proce
         model_type=model_type,
     )
 def process_image_check(path_input):
     if path_input is None:
         raise gr.Error(
             "Missing image in the first pane: upload a file or use one from the gallery below."
         )
 def process_image(
     pipe,
     path_input,
@@ -111,73 +129,108 @@ def process_image(
     input_image = Image.open(path_input)
     pipe_out = pipe(
         input_image,
         denoising_steps=denoise_steps,
         ensemble_size=ensemble_size,
         processing_res=processing_res,
-        batch_size=1 if processing_res == 0 else 0,  # TODO: do we abuse "batch size" notation here?
         seed=default_seed,
         show_progress_bar=True,
     )
     path_output_dir = os.path.splitext(path_input)[0] + "_output"
     os.makedirs(path_output_dir, exist_ok=True)
-    path_albedo_out = os.path.join(path_output_dir, f"{name_base}_albedo_fp32.npy")
-    path_albedo_out_vis = os.path.join(path_output_dir, f"{name_base}_albedo.png")
-    albedo = pipe_out.albedo
-    albedo_colored = pipe_out.albedo_colored
-    np.save(path_albedo_out, albedo)
-    albedo_colored.save(path_albedo_out_vis)
     if model_type == "appearance":
-        path_material_out = os.path.join(path_output_dir, f"{name_base}_material_fp32.npy")
-        path_material_out_vis = os.path.join(path_output_dir, f"{name_base}_material.png")
         material = pipe_out.material
         material_colored = pipe_out.material_colored
         np.save(path_material_out, material)
         material_colored.save(path_material_out_vis)
         return (
-            [path_input, path_albedo_out_vis],
             [path_input, path_material_out_vis],
-            None,
-            [path_albedo_out_vis, path_material_out_vis, path_albedo_out, path_material_out],
         )
-    elif model_type == "residual":
-        path_shading_out = os.path.join(path_output_dir, f"{name_base}_shading_fp32.npy")
         path_shading_out_vis = os.path.join(path_output_dir, f"{name_base}_shading.png")
-        path_residual_out = os.path.join(path_output_dir, f"{name_base}_residual_fp32.npy")
-        path_residual_out_vis = os.path.join(path_output_dir, f"{name_base}_residual.png")
         shading = pipe_out.shading
         shading_colored = pipe_out.shading_colored
         residual = pipe_out.residual
         residual_colored = pipe_out.residual_colored
         np.save(path_shading_out, shading)
         shading_colored.save(path_shading_out_vis)
         np.save(path_residual_out, residual)
         residual_colored.save(path_residual_out_vis)
         return (
-            [path_input, path_albedo_out_vis],
             [path_input, path_shading_out_vis],
             [path_input, path_residual_out_vis],
-            [path_albedo_out_vis, path_shading_out_vis, path_residual_out_vis, path_albedo_out, path_shading_out, path_residual_out],
         )
 def run_demo_server(hf_writer=None):
-    process_pipe_image = spaces.GPU(functools.partial(process_with_loaded_pipeline), duration=120)
     gradio_theme = gr.themes.Default()
     with gr.Blocks(
@@ -233,7 +286,7 @@ def run_demo_server(hf_writer=None):
         gr.Markdown(
             """
-            # Marigold Normals Estimation
             <p align="center">
             <a title="Website" href="https://marigoldcomputervision.github.io/" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
@@ -280,25 +333,25 @@ def run_demo_server(hf_writer=None):
                 )
                 model_type = gr.Radio(
                     [
-                        ("Appearance (Albedo & Material)", "appearance"),
-                        ("Residual (Albedo, Shading & Residual)", "residual"),
                     ],
-                    label="Model Type",
                     value=default_model_type,
                 )
                 with gr.Accordion("Advanced options", open=True):
                     image_ensemble_size = gr.Slider(
                         label="Ensemble size",
                         minimum=1,
-                        maximum=10,
                         step=1,
                         value=default_image_ensemble_size,
                     )
                     image_denoise_steps = gr.Slider(
                         label="Number of denoising steps",
                         minimum=1,
-                        maximum=20,
                         step=1,
                         value=default_image_denoise_steps,
                     )
@@ -311,7 +364,7 @@ def run_demo_server(hf_writer=None):
                         value=default_image_processing_res,
                     )
                 with gr.Row():
-                    image_submit_btn = gr.Button(value="Compute Normals", variant="primary")
                     image_reset_btn = gr.Button(value="Reset")
             with gr.Column():
                 image_output_slider1 = ImageSlider(
@@ -322,7 +375,7 @@ def run_demo_server(hf_writer=None):
                     interactive=False,
                     elem_classes="slider",
                     position=0.25,
-                    visible=True
                 )
                 image_output_slider2 = ImageSlider(
                     label="Predicted Material",
@@ -332,7 +385,7 @@ def run_demo_server(hf_writer=None):
                     interactive=False,
                     elem_classes="slider",
                     position=0.25,
-                    visible=True
                 )
                 image_output_slider3 = ImageSlider(
                     label="Predicted Residual",
@@ -342,7 +395,7 @@ def run_demo_server(hf_writer=None):
                     interactive=False,
                     elem_classes="slider",
                     position=0.25,
-                    visible=False
                 )
                 image_output_files = gr.Files(
                     label="Output files",
@@ -352,9 +405,9 @@ def run_demo_server(hf_writer=None):
                 if hf_writer is not None:
                     with gr.Accordion(
-                            "Feedback",
-                            open=False,
-                            visible=default_share_always_show_accordion,
                     ) as share_box:
                         share_instructions = gr.Markdown(
                             get_share_instructions(is_full=True),
@@ -362,16 +415,16 @@ def run_demo_server(hf_writer=None):
                         )
                         share_transfer_of_rights = gr.Checkbox(
                             label="(Optional) I own or hold necessary rights to the submitted image. By "
-                                  "checking this box, I grant an irrevocable, non-exclusive, transferable, "
-                                  "royalty-free, worldwide license to use the uploaded image, including for "
-                                  "publishing, reproducing, and model training. [transfer_of_rights]",
                             scale=1,
                         )
                         share_content_is_legal = gr.Checkbox(
                             label="By checking this box, I acknowledge that my uploaded content is legal and "
-                                  "safe, and that I am solely responsible for ensuring it complies with all "
-                                  "applicable laws and regulations. Additionally, I am aware that my Hugging Face "
-                                  "username is collected. [content_is_legal]",
                             scale=1,
                         )
                         share_reason = gr.Textbox(
@@ -384,7 +437,7 @@ def run_demo_server(hf_writer=None):
                             share_share_btn = gr.Button(
                                 "Share", variant="stop", scale=1
                             )
         # Function to toggle visibility and set dynamic labels
         def toggle_sliders_and_labels(model_type):
             if model_type == "appearance":
@@ -393,7 +446,7 @@ def run_demo_server(hf_writer=None):
                     gr.update(visible=True, label="Predicted Material"),
                     gr.update(visible=False),  # Hide third slider
                 )
-            elif model_type == "residual":
                 return (
                     gr.update(visible=True, label="Predicted Albedo"),
                     gr.update(visible=True, label="Predicted Shading"),
@@ -407,36 +460,35 @@ def run_demo_server(hf_writer=None):
             outputs=[image_output_slider1, image_output_slider2, image_output_slider3],
             show_progress=False,
         )
         Examples(
             fn=process_pipe_image,
             examples=[
-                os.path.join("files", "image", name)
                 for name in [
-                    "berries.jpeg",
                     "costumes.png",
                     "cat.jpg",
-                    "einstein.jpg",
                     "food.jpeg",
-                    "food_counter.png",
                     "puzzle.jpeg",
-                    "rocket.png",
-                    "scientists.jpg",
-                    "cat2.png",
                     "screw.png",
-                    "statues.png",
-                    "swings.jpg"
                 ]
             ],
-            inputs=[image_input],
-            outputs= [
                 image_output_slider1,
                 image_output_slider2,
                 image_output_slider3,
-                image_output_files
             ],
-            cache_examples=False,   # TODO: toggle later
-            directory_name="examples_image",
         )
         ### Image tab
@@ -474,17 +526,17 @@ def run_demo_server(hf_writer=None):
                 fn=process_pipe_image,
                 inputs=[
                     image_input,
                     image_denoise_steps,
                     image_ensemble_size,
                     image_processing_res,
-                    model_type
                 ],
-            outputs= [
-                image_output_slider1,
-                image_output_slider2,
-                image_output_slider3,
-                image_output_files
-            ],
                 concurrency_limit=1,
             )
         else:
@@ -498,17 +550,17 @@ def run_demo_server(hf_writer=None):
                 fn=process_pipe_image,
                 inputs=[
                     image_input,
                     image_denoise_steps,
                     image_ensemble_size,
                     image_processing_res,
-                    model_type
                 ],
-            outputs= [
-                image_output_slider1,
-                image_output_slider2,
-                image_output_slider3,
-                image_output_files
-            ],
                 concurrency_limit=1,
             )

 # --------------------------------------------------------------------------
 # If you find this code useful, we kindly ask you to cite our paper in your work.
 # Please find bibtex at: https://github.com/prs-eth/Marigold#-citation
+# More information about the method can be found at https://marigoldcomputervision.github.io
 # --------------------------------------------------------------------------
 from __future__ import annotations
 import numpy as np
 import torch as torch
 from PIL import Image
 from gradio_imageslider import ImageSlider
 from huggingface_hub import login
 from gradio_patches.examples import Examples
 from gradio_patches.flagging import HuggingFaceDatasetSaver, FlagMethod
 from marigold_iid_appearance import MarigoldIIDAppearancePipeline
+from marigold_iid_lighting import MarigoldIIDLightingPipeline
 warnings.filterwarnings(
     "ignore", message=".*LoginButton created outside of a Blocks context.*"
 default_image_ensemble_size = 1
 default_image_processing_res = 768
 default_image_reproducuble = True
+default_model_type = "appearance"
 default_share_always_show_hf_logout_btn = True
 default_share_always_show_accordion = False
 loaded_pipelines = {}  # Cache to store loaded pipelines
+def process_with_loaded_pipeline(
+    image_path,
+    model_type=default_model_type,
+    denoise_steps=default_image_denoise_steps,
+    ensemble_size=default_image_ensemble_size,
+    processing_res=default_image_processing_res,
+):
     # Load and cache the pipeline based on the model type.
     if model_type not in loaded_pipelines.keys():
+        auth_token = os.environ.get("KEV_TOKEN")
         if model_type == "appearance":
+            if "lighting" in loaded_pipelines.keys():
+                del loaded_pipelines[
+                    "lighting"
+                ]  # to save GPU memory. Can be removed if enough memory is available for faster switching between models
+                torch.cuda.empty_cache()
+            loaded_pipelines[model_type] = (
+                MarigoldIIDAppearancePipeline.from_pretrained(
+                    "prs-eth/marigold-iid-appearance-v1-1", token=auth_token
+                )
             )
+        elif model_type == "lighting":
+            if "appearance" in loaded_pipelines.keys():
+                del loaded_pipelines[
+                    "appearance"
+                ]  # to save GPU memory. Can be removed if enough memory is available for faster switching between models
+                torch.cuda.empty_cache()
+            loaded_pipelines[model_type] = MarigoldIIDLightingPipeline.from_pretrained(
+                "prs-eth/marigold-iid-lighting-v1-1", token=auth_token
             )
         # Move the pipeline to GPU if available
         device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         loaded_pipelines[model_type] = loaded_pipelines[model_type].to(device)
         try:
             loaded_pipelines[model_type].enable_xformers_memory_efficient_attention()
         except:
             pass  # run without xformers
     pipe = loaded_pipelines[model_type]
     # Process the image using the preloaded pipeline.
         model_type=model_type,
     )
 def process_image_check(path_input):
     if path_input is None:
         raise gr.Error(
             "Missing image in the first pane: upload a file or use one from the gallery below."
         )
 def process_image(
     pipe,
     path_input,
     input_image = Image.open(path_input)
     pipe_out = pipe(
         input_image,
         denoising_steps=denoise_steps,
         ensemble_size=ensemble_size,
         processing_res=processing_res,
+        batch_size=1
+        if processing_res == 0
+        else 0,  # TODO: do we abuse "batch size" notation here?
         seed=default_seed,
         show_progress_bar=True,
     )
     path_output_dir = os.path.splitext(path_input)[0] + "_output"
     os.makedirs(path_output_dir, exist_ok=True)
     if model_type == "appearance":
+        path_albedo_out = os.path.join(
+            path_output_dir, f"{name_base}_albedo_app_fp32.npy"
+        )
+        path_albedo_out_vis = os.path.join(
+            path_output_dir, f"{name_base}_albedo_app.png"
+        )
+        path_material_out = os.path.join(
+            path_output_dir, f"{name_base}_material_fp32.npy"
+        )
+        path_material_out_vis = os.path.join(
+            path_output_dir, f"{name_base}_material.png"
+        )
+        albedo = pipe_out.albedo
+        albedo_colored = pipe_out.albedo_colored
         material = pipe_out.material
         material_colored = pipe_out.material_colored
+        np.save(path_albedo_out, albedo)
+        albedo_colored.save(path_albedo_out_vis)
         np.save(path_material_out, material)
         material_colored.save(path_material_out_vis)
         return (
+            [path_input, path_albedo_out_vis],
             [path_input, path_material_out_vis],
+            [path_input, path_material_out_vis],  # placeholder which is not displayed
+            [
+                path_albedo_out_vis,
+                path_material_out_vis,
+                path_albedo_out,
+                path_material_out,
+            ],
+        )
+    elif model_type == "lighting":
+        path_albedo_out = os.path.join(
+            path_output_dir, f"{name_base}_albedo_res_fp32.npy"
+        )
+        path_albedo_out_vis = os.path.join(
+            path_output_dir, f"{name_base}_albedo_res.png"
+        )
+        path_shading_out = os.path.join(
+            path_output_dir, f"{name_base}_shading_fp32.npy"
         )
         path_shading_out_vis = os.path.join(path_output_dir, f"{name_base}_shading.png")
+        path_residual_out = os.path.join(
+            path_output_dir, f"{name_base}_residual_fp32.npy"
+        )
+        path_residual_out_vis = os.path.join(
+            path_output_dir, f"{name_base}_residual.png"
+        )
+        albedo = pipe_out.albedo
+        albedo_colored = pipe_out.albedo_colored
         shading = pipe_out.shading
         shading_colored = pipe_out.shading_colored
         residual = pipe_out.residual
         residual_colored = pipe_out.residual_colored
+        np.save(path_albedo_out, albedo)
+        albedo_colored.save(path_albedo_out_vis)
         np.save(path_shading_out, shading)
         shading_colored.save(path_shading_out_vis)
         np.save(path_residual_out, residual)
         residual_colored.save(path_residual_out_vis)
         return (
+            [path_input, path_albedo_out_vis],
             [path_input, path_shading_out_vis],
             [path_input, path_residual_out_vis],
+            [
+                path_albedo_out_vis,
+                path_shading_out_vis,
+                path_residual_out_vis,
+                path_albedo_out,
+                path_shading_out,
+                path_residual_out,
+            ],
         )
 def run_demo_server(hf_writer=None):
+    process_pipe_image = spaces.GPU(
+        functools.partial(process_with_loaded_pipeline), duration=120
+    )
     gradio_theme = gr.themes.Default()
     with gr.Blocks(
         gr.Markdown(
             """
+            # Marigold Intrinsic Image Decomposition (IID)
             <p align="center">
             <a title="Website" href="https://marigoldcomputervision.github.io/" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
                 )
                 model_type = gr.Radio(
                     [
+                        ("Appearance (albedo & material)", "appearance"),
+                        ("Lighting (albedo, shading & residual)", "lighting"),
                     ],
+                    label="Model type: Marigold-IID-Appearance or Marigold IID-Lighting",
                     value=default_model_type,
                 )
                 with gr.Accordion("Advanced options", open=True):
                     image_ensemble_size = gr.Slider(
                         label="Ensemble size",
                         minimum=1,
+                        maximum=5,
                         step=1,
                         value=default_image_ensemble_size,
                     )
                     image_denoise_steps = gr.Slider(
                         label="Number of denoising steps",
                         minimum=1,
+                        maximum=10,
                         step=1,
                         value=default_image_denoise_steps,
                     )
                         value=default_image_processing_res,
                     )
                 with gr.Row():
+                    image_submit_btn = gr.Button(value="Compute IID", variant="primary")
                     image_reset_btn = gr.Button(value="Reset")
             with gr.Column():
                 image_output_slider1 = ImageSlider(
                     interactive=False,
                     elem_classes="slider",
                     position=0.25,
+                    visible=True,
                 )
                 image_output_slider2 = ImageSlider(
                     label="Predicted Material",
                     interactive=False,
                     elem_classes="slider",
                     position=0.25,
+                    visible=True,
                 )
                 image_output_slider3 = ImageSlider(
                     label="Predicted Residual",
                     interactive=False,
                     elem_classes="slider",
                     position=0.25,
+                    visible=False,
                 )
                 image_output_files = gr.Files(
                     label="Output files",
                 if hf_writer is not None:
                     with gr.Accordion(
+                        "Feedback",
+                        open=False,
+                        visible=default_share_always_show_accordion,
                     ) as share_box:
                         share_instructions = gr.Markdown(
                             get_share_instructions(is_full=True),
                         )
                         share_transfer_of_rights = gr.Checkbox(
                             label="(Optional) I own or hold necessary rights to the submitted image. By "
+                            "checking this box, I grant an irrevocable, non-exclusive, transferable, "
+                            "royalty-free, worldwide license to use the uploaded image, including for "
+                            "publishing, reproducing, and model training. [transfer_of_rights]",
                             scale=1,
                         )
                         share_content_is_legal = gr.Checkbox(
                             label="By checking this box, I acknowledge that my uploaded content is legal and "
+                            "safe, and that I am solely responsible for ensuring it complies with all "
+                            "applicable laws and regulations. Additionally, I am aware that my Hugging Face "
+                            "username is collected. [content_is_legal]",
                             scale=1,
                         )
                         share_reason = gr.Textbox(
                             share_share_btn = gr.Button(
                                 "Share", variant="stop", scale=1
                             )
         # Function to toggle visibility and set dynamic labels
         def toggle_sliders_and_labels(model_type):
             if model_type == "appearance":
                     gr.update(visible=True, label="Predicted Material"),
                     gr.update(visible=False),  # Hide third slider
                 )
+            elif model_type == "lighting":
                 return (
                     gr.update(visible=True, label="Predicted Albedo"),
                     gr.update(visible=True, label="Predicted Shading"),
             outputs=[image_output_slider1, image_output_slider2, image_output_slider3],
             show_progress=False,
         )
         Examples(
             fn=process_pipe_image,
             examples=[
+                [os.path.join("files", "image", name), _model_type]
                 for name in [
+                    "livingroom.jpg",
+                    "books.jpg",
+                    "food_counter.png",
+                    "cat2.png",
                     "costumes.png",
+                    "icecream.jpg",
+                    "juices.jpeg",
                     "cat.jpg",
                     "food.jpeg",
                     "puzzle.jpeg",
                     "screw.png",
                 ]
+                for _model_type in ["appearance", "lighting"]
             ],
+            inputs=[image_input, model_type],
+            outputs=[
                 image_output_slider1,
                 image_output_slider2,
                 image_output_slider3,
+                image_output_files,
             ],
+            cache_examples=True,  # TODO: toggle later
+            directory_name="examples_images",
         )
         ### Image tab
                 fn=process_pipe_image,
                 inputs=[
                     image_input,
+                    model_type,
                     image_denoise_steps,
                     image_ensemble_size,
                     image_processing_res,
                 ],
+                outputs=[
+                    image_output_slider1,
+                    image_output_slider2,
+                    image_output_slider3,
+                    image_output_files,
+                ],
                 concurrency_limit=1,
             )
         else:
                 fn=process_pipe_image,
                 inputs=[
                     image_input,
+                    model_type,
                     image_denoise_steps,
                     image_ensemble_size,
                     image_processing_res,
                 ],
+                outputs=[
+                    image_output_slider1,
+                    image_output_slider2,
+                    image_output_slider3,
+                    image_output_files,
+                ],
                 concurrency_limit=1,
             )

files/image/berries.jpeg ADDED Viewed

Git LFS Details

SHA256: dac1411ea48cf83b7a59c6424032f95b2ff496b3a98cdccf168bbed1c8f0aed4
Pointer size: 131 Bytes
Size of remote file: 940 kB

files/image/books.jpg ADDED Viewed

Git LFS Details

SHA256: 1d2648160e85956a5fb6e04b78c241be9662cdb7388bdf92af1b2d72af4506d1
Pointer size: 131 Bytes
Size of remote file: 743 kB

files/image/cat.jpg ADDED Viewed

Git LFS Details

SHA256: 794796a86e56a4b372287661dc934daa2d15e988d01afe88afc50b32644c007a
Pointer size: 131 Bytes
Size of remote file: 236 kB

files/image/cat2.png ADDED Viewed

Git LFS Details

SHA256: 04a24d72cf9599348d2e3e31e08684ea8a18fcec1e05c3e287e8678f8745fc9e
Pointer size: 131 Bytes
Size of remote file: 758 kB

files/image/costumes.png ADDED Viewed

Git LFS Details

SHA256: fc3197481cf925cc02a662dff6d7f8395223e43c249ca9c0b823e3dbc97adf55
Pointer size: 132 Bytes
Size of remote file: 1.07 MB

files/image/einstein.jpg ADDED Viewed

Git LFS Details

SHA256: d4a4543c0fffb2ca5ea3c17e23e88fcfcf66eae8b487173fbc5c25d0d614bdb6
Pointer size: 131 Bytes
Size of remote file: 367 kB

files/image/food.jpeg ADDED Viewed

Git LFS Details

SHA256: a26151050a574b0dc0014e9c4806da3d6f6bc1297ee1035a16b9ace007a179af
Pointer size: 132 Bytes
Size of remote file: 1.04 MB

files/image/food_counter.png ADDED Viewed

Git LFS Details

SHA256: 1ba51cd83534e42203c463614b2ea62a0b6ab39202042175714ea45e6e2061e6
Pointer size: 132 Bytes
Size of remote file: 1.73 MB

files/image/icecream.jpg ADDED Viewed

Git LFS Details

SHA256: 1d7d0865b532267a62f9a3ecd67afec5246d4839242c1ef5717f53747b104f0b
Pointer size: 132 Bytes
Size of remote file: 2.74 MB

files/image/juices.jpeg ADDED Viewed

Git LFS Details

SHA256: 906c561aadaffd78ae2aa3b5d8aaf6986e8d890a5ed1ed4a26329f364ab60c97
Pointer size: 132 Bytes
Size of remote file: 1.29 MB

files/image/livingroom.jpg ADDED Viewed

Git LFS Details

SHA256: fd05910b4c9aa60af1e05c0985a3ecf7685662f1145eed972f14782a89a05e1d
Pointer size: 131 Bytes
Size of remote file: 815 kB

files/image/puzzle.jpeg ADDED Viewed

Git LFS Details

SHA256: 60b66432124a0936c6143301a9f9b793af4184bc9340c567d11fdd5a22cc98cc
Pointer size: 131 Bytes
Size of remote file: 374 kB

files/image/rocket.png ADDED Viewed

Git LFS Details

SHA256: 27faa0f9263fbdf13e57a2e4ee70211dae5afba8f919763f9fe3afb8c82ae627
Pointer size: 131 Bytes
Size of remote file: 620 kB

files/image/scientists.jpg ADDED Viewed

Git LFS Details

SHA256: 7b164dfbc4ab6e491ce81972b8c0e076fdc4af622289d0aa3cb43ee3c2be4030
Pointer size: 131 Bytes
Size of remote file: 444 kB

files/image/screw.png ADDED Viewed

Git LFS Details

SHA256: 550ac366acdbd07376c8215d7e09e621598639abb78fdcdaf85b1bb87e6786e4
Pointer size: 132 Bytes
Size of remote file: 1.21 MB

files/image/statues.png ADDED Viewed

Git LFS Details

SHA256: 143ded9acabd996f91f11c2fcf7bf7c240552551ef4e66308a49c225f1d81fec
Pointer size: 132 Bytes
Size of remote file: 1.16 MB

files/image/swings.jpg ADDED Viewed

Git LFS Details

SHA256: cae2ac669c948313eae8aca53017f10b64b42f87c53b9c34639962b218fdf1f1
Pointer size: 131 Bytes
Size of remote file: 353 kB

files/image/tabletennis.jpg ADDED Viewed

Git LFS Details

SHA256: cd0d95612636e9ee7e431246480314f873ee1a431c572886100da42bcda72ed2
Pointer size: 131 Bytes
Size of remote file: 695 kB

files/image/tent.jpg ADDED Viewed

Git LFS Details

SHA256: 3d0869e11523dfa405afa134078b344195c89c8a6195ad663d393570e8e6d405
Pointer size: 132 Bytes
Size of remote file: 1.87 MB

gradio_patches/examples.py ADDED Viewed

	@@ -0,0 +1,13 @@

+from pathlib import Path
+import gradio
+from gradio.utils import get_cache_folder
+class Examples(gradio.helpers.Examples):
+    def __init__(self, *args, directory_name=None, **kwargs):
+        super().__init__(*args, **kwargs, _initiated_directly=False)
+        if directory_name is not None:
+            self.cached_folder = get_cache_folder() / directory_name
+            self.cached_file = Path(self.cached_folder) / "log.csv"
+        self.create()

gradio_patches/flagging.py ADDED Viewed

	@@ -0,0 +1,165 @@

+from __future__ import annotations
+import datetime
+import json
+import time
+import uuid
+from collections import OrderedDict
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any
+import gradio
+import gradio as gr
+import huggingface_hub
+from gradio import FlaggingCallback
+from gradio_client import utils as client_utils
+class HuggingFaceDatasetSaver(gradio.HuggingFaceDatasetSaver):
+    def flag(
+        self,
+        flag_data: list[Any],
+        flag_option: str = "",
+        username: str | None = None,
+    ) -> int:
+        if self.separate_dirs:
+            # JSONL files to support dataset preview on the Hub
+            current_utc_time = datetime.now(timezone.utc)
+            iso_format_without_microseconds = current_utc_time.strftime(
+                "%Y-%m-%dT%H:%M:%S"
+            )
+            milliseconds = int(current_utc_time.microsecond / 1000)
+            unique_id = f"{iso_format_without_microseconds}.{milliseconds:03}Z"
+            if username not in (None, ""):
+                unique_id += f"_U_{username}"
+            else:
+                unique_id += f"_{str(uuid.uuid4())[:8]}"
+            components_dir = self.dataset_dir / unique_id
+            data_file = components_dir / "metadata.jsonl"
+            path_in_repo = unique_id  # upload in sub folder (safer for concurrency)
+        else:
+            # Unique CSV file
+            components_dir = self.dataset_dir
+            data_file = components_dir / "data.csv"
+            path_in_repo = None  # upload at root level
+        return self._flag_in_dir(
+            data_file=data_file,
+            components_dir=components_dir,
+            path_in_repo=path_in_repo,
+            flag_data=flag_data,
+            flag_option=flag_option,
+            username=username or "",
+        )
+    def _deserialize_components(
+        self,
+        data_dir: Path,
+        flag_data: list[Any],
+        flag_option: str = "",
+        username: str = "",
+    ) -> tuple[dict[Any, Any], list[Any]]:
+        """Deserialize components and return the corresponding row for the flagged sample.
+        Images/audio are saved to disk as individual files.
+        """
+        # Components that can have a preview on dataset repos
+        file_preview_types = {gr.Audio: "Audio", gr.Image: "Image"}
+        # Generate the row corresponding to the flagged sample
+        features = OrderedDict()
+        row = []
+        for component, sample in zip(self.components, flag_data):
+            # Get deserialized object (will save sample to disk if applicable -file, audio, image,...-)
+            label = component.label or ""
+            save_dir = data_dir / client_utils.strip_invalid_filename_characters(label)
+            save_dir.mkdir(exist_ok=True, parents=True)
+            deserialized = component.flag(sample, save_dir)
+            # Base component .flag method returns JSON; extract path from it when it is FileData
+            if component.data_model:
+                data = component.data_model.from_json(json.loads(deserialized))
+                if component.data_model == gr.data_classes.FileData:
+                    deserialized = data.path
+            # Add deserialized object to row
+            features[label] = {"dtype": "string", "_type": "Value"}
+            try:
+                deserialized_path = Path(deserialized)
+                if not deserialized_path.exists():
+                    raise FileNotFoundError(f"File {deserialized} not found")
+                row.append(str(deserialized_path.relative_to(self.dataset_dir)))
+            except (FileNotFoundError, TypeError, ValueError):
+                deserialized = "" if deserialized is None else str(deserialized)
+                row.append(deserialized)
+            # If component is eligible for a preview, add the URL of the file
+            # Be mindful that images and audio can be None
+            if isinstance(component, tuple(file_preview_types)):  # type: ignore
+                for _component, _type in file_preview_types.items():
+                    if isinstance(component, _component):
+                        features[label + " file"] = {"_type": _type}
+                        break
+                if deserialized:
+                    path_in_repo = str(  # returned filepath is absolute, we want it relative to compute URL
+                        Path(deserialized).relative_to(self.dataset_dir)
+                    ).replace(
+                        "\\", "/"
+                    )
+                    row.append(
+                        huggingface_hub.hf_hub_url(
+                            repo_id=self.dataset_id,
+                            filename=path_in_repo,
+                            repo_type="dataset",
+                        )
+                    )
+                else:
+                    row.append("")
+        features["flag"] = {"dtype": "string", "_type": "Value"}
+        features["username"] = {"dtype": "string", "_type": "Value"}
+        row.append(flag_option)
+        row.append(username)
+        return features, row
+class FlagMethod:
+    """
+    Helper class that contains the flagging options and calls the flagging method. Also
+    provides visual feedback to the user when flag is clicked.
+    """
+    def __init__(
+        self,
+        flagging_callback: FlaggingCallback,
+        label: str,
+        value: str,
+        visual_feedback: bool = True,
+    ):
+        self.flagging_callback = flagging_callback
+        self.label = label
+        self.value = value
+        self.__name__ = "Flag"
+        self.visual_feedback = visual_feedback
+    def __call__(
+        self,
+        request: gr.Request,
+        profile: gr.OAuthProfile | None,
+        *flag_data,
+    ):
+        username = None
+        if profile is not None:
+            username = profile.username
+        try:
+            self.flagging_callback.flag(
+                list(flag_data), flag_option=self.value, username=username
+            )
+        except Exception as e:
+            print(f"Error while sharing: {e}")
+            if self.visual_feedback:
+                return gr.Button(value="Sharing error", interactive=False)
+        if not self.visual_feedback:
+            return
+        time.sleep(0.8)  # to provide enough time for the user to observe button change
+        return gr.Button(value="Sharing complete", interactive=False)

marigold_iid_appearance.py CHANGED Viewed

@@ -278,12 +278,12 @@ class MarigoldIIDAppearancePipeline(DiffusionPipeline):
         )
         albedo_colored = (albedo + 1.0) * 0.5
-        albedo_colored = (albedo_colored * 255).to(np.uint8)
         albedo_colored = self.chw2hwc(albedo_colored)
         albedo_colored_img = Image.fromarray(albedo_colored)
         material_colored = (material + 1.0) * 0.5
-        material_colored = (material_colored * 255).to(np.uint8)
         material_colored = self.chw2hwc(material_colored)
         material_colored_img = Image.fromarray(material_colored)
@@ -436,7 +436,7 @@ class MarigoldIIDAppearancePipeline(DiffusionPipeline):
         assert target_latents.shape[1] == 8  # self.n_targets * 4
         # scale latent
-        target_latents = target_latents / self.rgb_latent_scale_factor
         # decode
         targets = []
         for i in range(self.n_targets):

         )
         albedo_colored = (albedo + 1.0) * 0.5
+        albedo_colored = (albedo_colored * 255).astype(np.uint8)
         albedo_colored = self.chw2hwc(albedo_colored)
         albedo_colored_img = Image.fromarray(albedo_colored)
         material_colored = (material + 1.0) * 0.5
+        material_colored = (material_colored * 255).astype(np.uint8)
         material_colored = self.chw2hwc(material_colored)
         material_colored_img = Image.fromarray(material_colored)
         assert target_latents.shape[1] == 8  # self.n_targets * 4
         # scale latent
+        target_latents = target_latents / self.latent_scale_factor
         # decode
         targets = []
         for i in range(self.n_targets):

marigold_iid_residual.py → marigold_iid_lighting.py RENAMED Viewed

@@ -38,9 +38,9 @@ from transformers import CLIPTextModel, CLIPTokenizer
 # Will error if the minimal version of diffusers is not installed. Remove at your own risks.
 check_min_version("0.27.0.dev0")
-class MarigoldIIDResidualOutput(BaseOutput):
     """
-    Output class for Marigold IID Residual pipeline.
     Args:
         albedo (`np.ndarray`):
@@ -65,7 +65,7 @@ class MarigoldIIDResidualOutput(BaseOutput):
     residual: np.ndarray
     residual_colored: Image.Image
-class MarigoldIIDResidualPipeline(DiffusionPipeline):
     """
     Pipeline for Intrinsic Image Decomposition (Albedo, diffuse shading and non-diffuse residual) using Marigold: https://marigoldcomputervision.github.io.
@@ -124,7 +124,7 @@ class MarigoldIIDResidualPipeline(DiffusionPipeline):
         color_map: str = "Spectral",  # TODO change colorization api based on modality
         show_progress_bar: bool = True,
         **kwargs,
-    ) -> MarigoldIIDResidualOutput:
         """
         Function invoked when calling the pipeline.
@@ -155,7 +155,7 @@ class MarigoldIIDResidualPipeline(DiffusionPipeline):
             show_progress_bar (`bool`, *optional*, defaults to `True`):
                 Display a progress bar of diffusion denoising.
         Returns:
-            `MarigoldIIDResidualOutput`: Output class for Marigold monocular intrinsic image decomposition (Residual) prediction pipeline, including:
             - **albedo** (`np.ndarray`) Predicted albedo map with the shape of [3, H, W] values in the range of [0, 1]
             - **albedo_colored** (`PIL.Image.Image`) Colorized albedo map with the shape of [3, H, W] values in the range of [0, 1]
             - **material** (`np.ndarray`) Predicted material map with the shape of [3, H, W] and values in [0, 1]
@@ -276,24 +276,25 @@ class MarigoldIIDResidualPipeline(DiffusionPipeline):
         shading = final_pred[3:6, :, :]
         residual = final_pred[6:, :, :]
-        albedo_colored = (albedo + 1.0) * 0.5
-        albedo_colored = (albedo_colored * 255).to(np.uint8)
         albedo_colored = self.chw2hwc(albedo_colored)
         albedo_colored_img = Image.fromarray(albedo_colored)
         shading_colored = (shading + 1.0) * 0.5
         shading_colored = shading_colored / shading_colored.max() # rescale for better visualization
-        shading_colored = (shading_colored * 255).to(np.uint8)
         shading_colored = self.chw2hwc(shading_colored)
         shading_colored_img = Image.fromarray(shading_colored)
         residual_colored = (residual + 1.0) * 0.5
         residual_colored = residual_colored / residual_colored.max() # rescale for better visualization
-        residual_colored = (residual_colored * 255).to(np.uint8)
         residual_colored = self.chw2hwc(residual_colored)
         residual_colored_img = Image.fromarray(residual_colored)
-        out = MarigoldIIDResidualOutput(
             albedo=albedo,
             albedo_colored=albedo_colored_img,
             shading=shading,
@@ -444,7 +445,7 @@ class MarigoldIIDResidualPipeline(DiffusionPipeline):
         assert target_latents.shape[1] == 12  # self.n_targets * 4
         # scale latent
-        target_latents = target_latents / self.rgb_latent_scale_factor
         # decode
         targets = []
         for i in range(self.n_targets):

 # Will error if the minimal version of diffusers is not installed. Remove at your own risks.
 check_min_version("0.27.0.dev0")
+class MarigoldIIDLightingOutput(BaseOutput):
     """
+    Output class for Marigold-IID-Lighting pipeline.
     Args:
         albedo (`np.ndarray`):
     residual: np.ndarray
     residual_colored: Image.Image
+class MarigoldIIDLightingPipeline(DiffusionPipeline):
     """
     Pipeline for Intrinsic Image Decomposition (Albedo, diffuse shading and non-diffuse residual) using Marigold: https://marigoldcomputervision.github.io.
         color_map: str = "Spectral",  # TODO change colorization api based on modality
         show_progress_bar: bool = True,
         **kwargs,
+    ) -> MarigoldIIDLightingOutput:
         """
         Function invoked when calling the pipeline.
             show_progress_bar (`bool`, *optional*, defaults to `True`):
                 Display a progress bar of diffusion denoising.
         Returns:
+            `MarigoldIIDLightingOutput`: Output class for Marigold monocular intrinsic image decomposition (lighting) prediction pipeline, including:
             - **albedo** (`np.ndarray`) Predicted albedo map with the shape of [3, H, W] values in the range of [0, 1]
             - **albedo_colored** (`PIL.Image.Image`) Colorized albedo map with the shape of [3, H, W] values in the range of [0, 1]
             - **material** (`np.ndarray`) Predicted material map with the shape of [3, H, W] and values in [0, 1]
         shading = final_pred[3:6, :, :]
         residual = final_pred[6:, :, :]
+        albedo_colored = (albedo + 1.0) * 0.5  # [-1,1] -> [0,1]
+        albedo_colored = albedo_colored ** (1/2.2) # from linear to sRGB (to be consistent with IID-Appearance model)
+        albedo_colored = (albedo_colored * 255).astype(np.uint8)
         albedo_colored = self.chw2hwc(albedo_colored)
         albedo_colored_img = Image.fromarray(albedo_colored)
         shading_colored = (shading + 1.0) * 0.5
         shading_colored = shading_colored / shading_colored.max() # rescale for better visualization
+        shading_colored = (shading_colored * 255).astype(np.uint8)
         shading_colored = self.chw2hwc(shading_colored)
         shading_colored_img = Image.fromarray(shading_colored)
         residual_colored = (residual + 1.0) * 0.5
         residual_colored = residual_colored / residual_colored.max() # rescale for better visualization
+        residual_colored = (residual_colored * 255).astype(np.uint8)
         residual_colored = self.chw2hwc(residual_colored)
         residual_colored_img = Image.fromarray(residual_colored)
+        out = MarigoldIIDLightingOutput(
             albedo=albedo,
             albedo_colored=albedo_colored_img,
             shading=shading,
         assert target_latents.shape[1] == 12  # self.n_targets * 4
         # scale latent
+        target_latents = target_latents / self.latent_scale_factor
         # decode
         targets = []
         for i in range(self.n_targets):