Spaces:

ds1david
/

sculpt

Runtime error

App Files Files Community

ds1david commited on Mar 18

Commit

1665fe1

1 Parent(s): eb1615e

New logic

Browse files

Files changed (2) hide show

app.py +110 -59
requirements.txt +4 -7

app.py CHANGED Viewed

@@ -1,87 +1,138 @@
 import gradio as gr
 import torch
 import numpy as np
-from diffusers import StableDiffusionXLPipeline
 from transformers import DPTFeatureExtractor, DPTForDepthEstimation
-from PIL import Image, ImageEnhance, ImageOps
-device = "cpu"  # or "cuda" if you have a GPU
-torch_dtype = torch.float32
-print("Loading SDXL Base model...")
-pipe = StableDiffusionXLPipeline.from_pretrained(
-    "stabilityai/stable-diffusion-xl-base-1.0",
-    torch_dtype=torch_dtype
-).to(device)
-print("Loading bas-relief LoRA weights with PEFT...")
-pipe.load_lora_weights(
-    "KappaNeuro/bas-relief",      # The HF repo with BAS-RELIEF.safetensors
-    weight_name="BAS-RELIEF.safetensors",
-    peft_backend="peft"          # This is crucial
-)
-# Converter os módulos para float32 para evitar conflitos de dtype
-pipe.unet = pipe.unet.float()
-pipe.text_encoder = pipe.text_encoder.float()
-print("Loading DPT Depth Model...")
-feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-large")
-depth_model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large").to(device)
-def enhance_depth_map(depth_arr: np.ndarray) -> Image.Image:
-    d_min, d_max = depth_arr.min(), depth_arr.max()
-    depth_stretched = (depth_arr - d_min) / (d_max - d_min + 1e-8)
-    depth_stretched = (depth_stretched * 255).astype(np.uint8)
-    depth_pil = Image.fromarray(depth_stretched)
-    depth_pil = ImageOps.autocontrast(depth_pil)
-    enhancer = ImageEnhance.Sharpness(depth_pil)
-    depth_pil = enhancer.enhance(2.0)
-    return depth_pil
-def generate_bas_relief_and_depth(imagem):
-    # Use the token "BAS-RELIEF" so the LoRA triggers
-    full_prompt = f"BAS-RELIEF"
-    print("Generating image with LoRA style...")
-    result = pipe(
-        prompt=full_prompt,
-        image=imagem,
-        num_inference_steps=15,   # reduce if too slow
-        guidance_scale=7.5,
-        height=512,               # reduce if you still get timeouts
-        width=512
-    )
-    image = result.images[0]
-    print("Running DPT Depth Estimation...")
-    inputs = feature_extractor(image, return_tensors="pt").to(device)
     with torch.no_grad():
-        outputs = depth_model(**inputs)
         predicted_depth = outputs.predicted_depth
     prediction = torch.nn.functional.interpolate(
         predicted_depth.unsqueeze(1),
         size=image.size[::-1],
         mode="bicubic",
-        align_corners=False
-    ).squeeze()
-    depth_map_pil = enhance_depth_map(prediction.cpu().numpy())
-    return image, depth_map_pil
-# Interface Gradio
-interface = gr.Interface(
-    fn=generate_bas_relief_and_depth,
-    inputs=gr.Image(type="pil"),
-    outputs=[gr.Image(label="Resultado"), gr.Image(label="Profundidade")],
-    title="Conversor para Baixo-relevo",
-    description="Transforme imagens em baixo-relevo com mapa de profundidade"
-)
 if __name__ == "__main__":
-    interface.launch()

 import gradio as gr
 import torch
 import numpy as np
+from PIL import Image
+from peft import PeftModel
 from transformers import DPTFeatureExtractor, DPTForDepthEstimation
+from diffusers import StableDiffusionXLControlNetPipeline, ControlNetModel
+from torchvision import transforms
+# Configurações iniciais
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+TORCH_DTYPE = torch.float16 if torch.cuda.is_available() else torch.float32
+# --- Carregamento dos Modelos ---
+# 1. Thera: Super Resolução
+def load_thera_model():
+    # Modelo hipotético - ajuste conforme implementação real do Thera
+    model = torch.hub.load('prs-eth/thera', 'thera', trust_repo=True)
+    return model.to(DEVICE)
+# 2. Depth Map com PEFT
+def load_depth_model():
+    base_model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
+    model = PeftModel.from_pretrained(base_model, "danube2024/dpt-peft-lora")
+    return model.to(DEVICE).eval()
+# 3. Bas-Relief com ControlNet
+def load_controlnet():
+    controlnet = ControlNetModel.from_pretrained(
+        "danube2024/controlnet-bas-relief",
+        torch_dtype=TORCH_DTYPE
+    )
+    pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
+        "stabilityai/stable-diffusion-xl-base-1.0",
+        controlnet=controlnet,
+        torch_dtype=TORCH_DTYPE
+    )
+    pipe.load_lora_weights("danube2024/bas-relief-lora")
+    return pipe.to(DEVICE)
+# --- Processamento ---
+def run_thera(image, model):
+    transform = transforms.Compose([
+        transforms.ToTensor(),
+        transforms.Normalize([0.5], [0.5])
+    ])
+    input_tensor = transform(image).unsqueeze(0).to(DEVICE)
     with torch.no_grad():
+        output = model(input_tensor)
+    output_img = transforms.ToPILImage()(output.squeeze().cpu().clamp(-1, 1) * 0.5 + 0.5)
+    return output_img
+def create_depth_map(image, model, feature_extractor):
+    inputs = feature_extractor(images=image, return_tensors="pt").to(DEVICE)
+    with torch.no_grad():
+        outputs = model(**inputs)
         predicted_depth = outputs.predicted_depth
     prediction = torch.nn.functional.interpolate(
         predicted_depth.unsqueeze(1),
         size=image.size[::-1],
         mode="bicubic",
+        align_corners=False,
+    )
+    return prediction.squeeze().cpu().numpy()
+def create_bas_relief(prompt, image, depth_map, pipe):
+    control_image = Image.fromarray((depth_map * 255).astype(np.uint8))
+    image = image.resize((1024, 1024))
+    control_image = control_image.resize((1024, 1024))
+    result = pipe(
+        prompt=prompt,
+        image=image,
+        control_image=control_image,
+        strength=0.8,
+        num_inference_steps=30
+    ).images[0]
+    return result
+# --- Interface Gradio ---
+with gr.Blocks() as app:
+    gr.Markdown("# 🖼️ Super Resolução + Depth Map + Bas-Relief")
+    with gr.Row():
+        with gr.Column():
+            input_image = gr.Image(type="pil", label="Imagem de Entrada")
+            prompt = gr.Textbox("high quality bas-relief sculpture, intricate details")
+            submit_btn = gr.Button("Processar")
+        with gr.Column():
+            upscaled_output = gr.Image(label="Imagem Super Resolvida")
+            depth_output = gr.Image(label="Mapa de Profundidade")
+            basrelief_output = gr.Image(label="Resultado Bas-Relief")
+    def process(image, prompt):
+        # Carregar modelos
+        thera_model = load_thera_model()
+        depth_model = load_depth_model()
+        feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-large")
+        basrelief_pipe = load_controlnet()
+        # 1. Super Resolução
+        upscaled = run_thera(image, thera_model)
+        # 2. Depth Map
+        depth = create_depth_map(upscaled, depth_model, feature_extractor)
+        depth_normalized = (depth - depth.min()) / (depth.max() - depth.min())
+        # 3. Bas-Relief
+        basrelief = create_bas_relief(prompt, upscaled, depth_normalized, basrelief_pipe)
+        return upscaled, depth_normalized, basrelief
+    submit_btn.click(
+        process,
+        inputs=[input_image, prompt],
+        outputs=[upscaled_output, depth_output, basrelief_output]
+    )
 if __name__ == "__main__":
+    app.launch()

requirements.txt CHANGED Viewed

@@ -1,8 +1,5 @@
-peft
-accelerate
-diffusers>=0.20.0
-transformers>=4.30.0
-torch
 gradio
-Pillow
-safetensors

 gradio
+torch
+peft
+transformers
+diffusers