Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,9 +1,18 @@
|
|
1 |
import torch
|
2 |
-
from transformers import
|
|
|
|
|
|
|
|
|
|
|
3 |
from PIL import Image, ImageFilter
|
4 |
import numpy as np
|
5 |
import gradio as gr
|
6 |
|
|
|
|
|
|
|
|
|
7 |
# Load pre-trained models and processors
|
8 |
seg_processor = SegformerImageProcessor.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")
|
9 |
seg_model = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")
|
@@ -11,57 +20,78 @@ depth_processor = DPTImageProcessor.from_pretrained("Intel/dpt-large")
|
|
11 |
depth_model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
|
12 |
|
13 |
def process_image(image):
|
14 |
-
#
|
|
|
|
|
|
|
|
|
15 |
image = image.resize((512, 512))
|
16 |
|
17 |
-
#
|
18 |
seg_inputs = seg_processor(images=image, return_tensors="pt")
|
19 |
with torch.no_grad():
|
20 |
seg_outputs = seg_model(**seg_inputs)
|
21 |
seg_logits = seg_outputs.logits
|
22 |
segmentation = torch.argmax(seg_logits, dim=1)[0].numpy()
|
23 |
|
24 |
-
# Create binary mask for 'person' class
|
25 |
person_class_index = 12
|
26 |
binary_mask = (segmentation == person_class_index).astype(np.uint8) * 255
|
|
|
27 |
|
28 |
-
#
|
29 |
depth_inputs = depth_processor(images=image, return_tensors="pt")
|
30 |
with torch.no_grad():
|
31 |
depth_outputs = depth_model(**depth_inputs)
|
32 |
predicted_depth = depth_outputs.predicted_depth[0].cpu().numpy()
|
33 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
inverted_depth = 1 - normalized_depth
|
35 |
-
|
36 |
-
depth_weight_resized = depth_weight_resized[:, :, np.newaxis]
|
37 |
|
38 |
-
#
|
|
|
|
|
|
|
|
|
|
|
39 |
blurred_image = image.filter(ImageFilter.GaussianBlur(radius=15))
|
|
|
|
|
40 |
original_np = np.array(image).astype(np.float32)
|
41 |
blurred_np = np.array(blurred_image).astype(np.float32)
|
|
|
|
|
42 |
composite_np = (1 - depth_weight_resized) * original_np + depth_weight_resized * blurred_np
|
43 |
composite_image = Image.fromarray(np.clip(composite_np, 0, 255).astype(np.uint8))
|
44 |
|
45 |
-
# Return results
|
46 |
-
binary_mask_image = Image.fromarray(binary_mask)
|
47 |
-
depth_map_image = Image.fromarray((normalized_depth * 255).astype(np.uint8))
|
48 |
return image, binary_mask_image, depth_map_image, composite_image
|
49 |
|
50 |
-
#
|
51 |
interface = gr.Interface(
|
52 |
fn=process_image,
|
53 |
-
inputs=gr.
|
54 |
outputs=[
|
55 |
-
gr.
|
56 |
-
gr.
|
57 |
-
gr.
|
58 |
-
gr.
|
59 |
],
|
60 |
title="Semantic Segmentation and Depth Estimation",
|
61 |
-
description="Upload an image to generate a segmentation mask, depth map, and blurred background effect."
|
|
|
|
|
|
|
|
|
62 |
)
|
63 |
|
64 |
# Launch the interface
|
65 |
if __name__ == "__main__":
|
66 |
interface.launch()
|
67 |
-
|
|
|
1 |
import torch
|
2 |
+
from transformers import (
|
3 |
+
SegformerImageProcessor,
|
4 |
+
SegformerForSemanticSegmentation,
|
5 |
+
DPTImageProcessor,
|
6 |
+
DPTForDepthEstimation
|
7 |
+
)
|
8 |
from PIL import Image, ImageFilter
|
9 |
import numpy as np
|
10 |
import gradio as gr
|
11 |
|
12 |
+
# Suppress specific warnings
|
13 |
+
import warnings
|
14 |
+
warnings.filterwarnings("ignore", category=UserWarning, module="transformers")
|
15 |
+
|
16 |
# Load pre-trained models and processors
|
17 |
seg_processor = SegformerImageProcessor.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")
|
18 |
seg_model = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")
|
|
|
20 |
depth_model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
|
21 |
|
22 |
def process_image(image):
|
23 |
+
# Ensure image is in RGB
|
24 |
+
if image.mode != "RGB":
|
25 |
+
image = image.convert("RGB")
|
26 |
+
|
27 |
+
# Resize the image to 512x512
|
28 |
image = image.resize((512, 512))
|
29 |
|
30 |
+
# ------------------ Semantic Segmentation ------------------
|
31 |
seg_inputs = seg_processor(images=image, return_tensors="pt")
|
32 |
with torch.no_grad():
|
33 |
seg_outputs = seg_model(**seg_inputs)
|
34 |
seg_logits = seg_outputs.logits
|
35 |
segmentation = torch.argmax(seg_logits, dim=1)[0].numpy()
|
36 |
|
37 |
+
# Create binary mask for 'person' class (class index 12)
|
38 |
person_class_index = 12
|
39 |
binary_mask = (segmentation == person_class_index).astype(np.uint8) * 255
|
40 |
+
binary_mask_image = Image.fromarray(binary_mask)
|
41 |
|
42 |
+
# ------------------ Depth Estimation ------------------
|
43 |
depth_inputs = depth_processor(images=image, return_tensors="pt")
|
44 |
with torch.no_grad():
|
45 |
depth_outputs = depth_model(**depth_inputs)
|
46 |
predicted_depth = depth_outputs.predicted_depth[0].cpu().numpy()
|
47 |
+
|
48 |
+
# Normalize the depth map for visualization
|
49 |
+
min_depth = predicted_depth.min()
|
50 |
+
max_depth = predicted_depth.max()
|
51 |
+
normalized_depth = (predicted_depth - min_depth) / (max_depth - min_depth)
|
52 |
+
depth_map_image = Image.fromarray((normalized_depth * 255).astype(np.uint8))
|
53 |
+
|
54 |
+
# ------------------ Blurred Background Effect ------------------
|
55 |
+
# Invert the depth map
|
56 |
inverted_depth = 1 - normalized_depth
|
57 |
+
inverted_depth = (inverted_depth - inverted_depth.min()) / (inverted_depth.max() - inverted_depth.min())
|
|
|
58 |
|
59 |
+
# Resize and expand dimensions to match image channels
|
60 |
+
depth_weight_resized = Image.fromarray((inverted_depth * 255).astype(np.uint8)).resize((512, 512))
|
61 |
+
depth_weight_resized = np.array(depth_weight_resized) / 255.0
|
62 |
+
depth_weight_resized = np.expand_dims(depth_weight_resized, axis=-1)
|
63 |
+
|
64 |
+
# Apply Gaussian blur to the entire image
|
65 |
blurred_image = image.filter(ImageFilter.GaussianBlur(radius=15))
|
66 |
+
|
67 |
+
# Convert images to numpy arrays
|
68 |
original_np = np.array(image).astype(np.float32)
|
69 |
blurred_np = np.array(blurred_image).astype(np.float32)
|
70 |
+
|
71 |
+
# Blend images based on the depth weight
|
72 |
composite_np = (1 - depth_weight_resized) * original_np + depth_weight_resized * blurred_np
|
73 |
composite_image = Image.fromarray(np.clip(composite_np, 0, 255).astype(np.uint8))
|
74 |
|
|
|
|
|
|
|
75 |
return image, binary_mask_image, depth_map_image, composite_image
|
76 |
|
77 |
+
# Define Gradio interface using the updated API
|
78 |
interface = gr.Interface(
|
79 |
fn=process_image,
|
80 |
+
inputs=gr.Image(type="pil", label="Upload Image"),
|
81 |
outputs=[
|
82 |
+
gr.Image(type="pil", label="Original Image"),
|
83 |
+
gr.Image(type="pil", label="Segmentation Mask"),
|
84 |
+
gr.Image(type="pil", label="Depth Map"),
|
85 |
+
gr.Image(type="pil", label="Blurred Background Effect"),
|
86 |
],
|
87 |
title="Semantic Segmentation and Depth Estimation",
|
88 |
+
description="Upload an image to generate a segmentation mask, depth map, and a blurred background effect.",
|
89 |
+
examples=[
|
90 |
+
["Selfie_1.jpg"],
|
91 |
+
["Selfie_2.jpg"]
|
92 |
+
]
|
93 |
)
|
94 |
|
95 |
# Launch the interface
|
96 |
if __name__ == "__main__":
|
97 |
interface.launch()
|
|