tpatel53 commited on
Commit
a0bb102
·
verified ·
1 Parent(s): e3a9a0b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -20
app.py CHANGED
@@ -1,9 +1,18 @@
1
  import torch
2
- from transformers import SegformerImageProcessor, SegformerForSemanticSegmentation, DPTImageProcessor, DPTForDepthEstimation
 
 
 
 
 
3
  from PIL import Image, ImageFilter
4
  import numpy as np
5
  import gradio as gr
6
 
 
 
 
 
7
  # Load pre-trained models and processors
8
  seg_processor = SegformerImageProcessor.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")
9
  seg_model = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")
@@ -11,57 +20,78 @@ depth_processor = DPTImageProcessor.from_pretrained("Intel/dpt-large")
11
  depth_model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
12
 
13
  def process_image(image):
14
- # Preprocess the input image
 
 
 
 
15
  image = image.resize((512, 512))
16
 
17
- # Perform semantic segmentation
18
  seg_inputs = seg_processor(images=image, return_tensors="pt")
19
  with torch.no_grad():
20
  seg_outputs = seg_model(**seg_inputs)
21
  seg_logits = seg_outputs.logits
22
  segmentation = torch.argmax(seg_logits, dim=1)[0].numpy()
23
 
24
- # Create binary mask for 'person' class
25
  person_class_index = 12
26
  binary_mask = (segmentation == person_class_index).astype(np.uint8) * 255
 
27
 
28
- # Perform depth estimation
29
  depth_inputs = depth_processor(images=image, return_tensors="pt")
30
  with torch.no_grad():
31
  depth_outputs = depth_model(**depth_inputs)
32
  predicted_depth = depth_outputs.predicted_depth[0].cpu().numpy()
33
- normalized_depth = (predicted_depth - predicted_depth.min()) / (predicted_depth.max() - predicted_depth.min())
 
 
 
 
 
 
 
 
34
  inverted_depth = 1 - normalized_depth
35
- depth_weight_resized = np.array(Image.fromarray((inverted_depth * 255).astype(np.uint8)).resize((512, 512))) / 255.0
36
- depth_weight_resized = depth_weight_resized[:, :, np.newaxis]
37
 
38
- # Create blurred background effect
 
 
 
 
 
39
  blurred_image = image.filter(ImageFilter.GaussianBlur(radius=15))
 
 
40
  original_np = np.array(image).astype(np.float32)
41
  blurred_np = np.array(blurred_image).astype(np.float32)
 
 
42
  composite_np = (1 - depth_weight_resized) * original_np + depth_weight_resized * blurred_np
43
  composite_image = Image.fromarray(np.clip(composite_np, 0, 255).astype(np.uint8))
44
 
45
- # Return results
46
- binary_mask_image = Image.fromarray(binary_mask)
47
- depth_map_image = Image.fromarray((normalized_depth * 255).astype(np.uint8))
48
  return image, binary_mask_image, depth_map_image, composite_image
49
 
50
- # Create Gradio interface
51
  interface = gr.Interface(
52
  fn=process_image,
53
- inputs=gr.inputs.Image(type="pil"),
54
  outputs=[
55
- gr.outputs.Image(type="pil", label="Original Image"),
56
- gr.outputs.Image(type="pil", label="Segmentation Mask"),
57
- gr.outputs.Image(type="pil", label="Depth Map"),
58
- gr.outputs.Image(type="pil", label="Blurred Background Effect"),
59
  ],
60
  title="Semantic Segmentation and Depth Estimation",
61
- description="Upload an image to generate a segmentation mask, depth map, and blurred background effect."
 
 
 
 
62
  )
63
 
64
  # Launch the interface
65
  if __name__ == "__main__":
66
  interface.launch()
67
-
 
1
  import torch
2
+ from transformers import (
3
+ SegformerImageProcessor,
4
+ SegformerForSemanticSegmentation,
5
+ DPTImageProcessor,
6
+ DPTForDepthEstimation
7
+ )
8
  from PIL import Image, ImageFilter
9
  import numpy as np
10
  import gradio as gr
11
 
12
+ # Suppress specific warnings
13
+ import warnings
14
+ warnings.filterwarnings("ignore", category=UserWarning, module="transformers")
15
+
16
  # Load pre-trained models and processors
17
  seg_processor = SegformerImageProcessor.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")
18
  seg_model = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")
 
20
  depth_model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
21
 
22
  def process_image(image):
23
+ # Ensure image is in RGB
24
+ if image.mode != "RGB":
25
+ image = image.convert("RGB")
26
+
27
+ # Resize the image to 512x512
28
  image = image.resize((512, 512))
29
 
30
+ # ------------------ Semantic Segmentation ------------------
31
  seg_inputs = seg_processor(images=image, return_tensors="pt")
32
  with torch.no_grad():
33
  seg_outputs = seg_model(**seg_inputs)
34
  seg_logits = seg_outputs.logits
35
  segmentation = torch.argmax(seg_logits, dim=1)[0].numpy()
36
 
37
+ # Create binary mask for 'person' class (class index 12)
38
  person_class_index = 12
39
  binary_mask = (segmentation == person_class_index).astype(np.uint8) * 255
40
+ binary_mask_image = Image.fromarray(binary_mask)
41
 
42
+ # ------------------ Depth Estimation ------------------
43
  depth_inputs = depth_processor(images=image, return_tensors="pt")
44
  with torch.no_grad():
45
  depth_outputs = depth_model(**depth_inputs)
46
  predicted_depth = depth_outputs.predicted_depth[0].cpu().numpy()
47
+
48
+ # Normalize the depth map for visualization
49
+ min_depth = predicted_depth.min()
50
+ max_depth = predicted_depth.max()
51
+ normalized_depth = (predicted_depth - min_depth) / (max_depth - min_depth)
52
+ depth_map_image = Image.fromarray((normalized_depth * 255).astype(np.uint8))
53
+
54
+ # ------------------ Blurred Background Effect ------------------
55
+ # Invert the depth map
56
  inverted_depth = 1 - normalized_depth
57
+ inverted_depth = (inverted_depth - inverted_depth.min()) / (inverted_depth.max() - inverted_depth.min())
 
58
 
59
+ # Resize and expand dimensions to match image channels
60
+ depth_weight_resized = Image.fromarray((inverted_depth * 255).astype(np.uint8)).resize((512, 512))
61
+ depth_weight_resized = np.array(depth_weight_resized) / 255.0
62
+ depth_weight_resized = np.expand_dims(depth_weight_resized, axis=-1)
63
+
64
+ # Apply Gaussian blur to the entire image
65
  blurred_image = image.filter(ImageFilter.GaussianBlur(radius=15))
66
+
67
+ # Convert images to numpy arrays
68
  original_np = np.array(image).astype(np.float32)
69
  blurred_np = np.array(blurred_image).astype(np.float32)
70
+
71
+ # Blend images based on the depth weight
72
  composite_np = (1 - depth_weight_resized) * original_np + depth_weight_resized * blurred_np
73
  composite_image = Image.fromarray(np.clip(composite_np, 0, 255).astype(np.uint8))
74
 
 
 
 
75
  return image, binary_mask_image, depth_map_image, composite_image
76
 
77
+ # Define Gradio interface using the updated API
78
  interface = gr.Interface(
79
  fn=process_image,
80
+ inputs=gr.Image(type="pil", label="Upload Image"),
81
  outputs=[
82
+ gr.Image(type="pil", label="Original Image"),
83
+ gr.Image(type="pil", label="Segmentation Mask"),
84
+ gr.Image(type="pil", label="Depth Map"),
85
+ gr.Image(type="pil", label="Blurred Background Effect"),
86
  ],
87
  title="Semantic Segmentation and Depth Estimation",
88
+ description="Upload an image to generate a segmentation mask, depth map, and a blurred background effect.",
89
+ examples=[
90
+ ["Selfie_1.jpg"],
91
+ ["Selfie_2.jpg"]
92
+ ]
93
  )
94
 
95
  # Launch the interface
96
  if __name__ == "__main__":
97
  interface.launch()