import gradio as gr from matplotlib import gridspec import matplotlib.pyplot as plt import numpy as np from PIL import Image import tensorflow as tf from transformers import SegformerFeatureExtractor, TFSegformerForSemanticSegmentation feature_extractor = SegformerFeatureExtractor.from_pretrained( "nvidia/segformer-b0-finetuned-ade-512-512" ) model = TFSegformerForSemanticSegmentation.from_pretrained( "nvidia/segformer-b0-finetuned-ade-512-512" ) def ade_palette(): """ADE20K palette that maps each class to RGB values.""" return [ [204, 87, 92], [112, 185, 212], [45, 189, 106], [234, 123, 67], [78, 56, 123], [210, 32, 89], [90, 180, 56], [155, 102, 200], [33, 147, 176], [255, 183, 76], [67, 123, 89], [190, 60, 45], [134, 112, 200], [56, 45, 189], [200, 56, 123], [230, 127, 34], [179, 51, 126], [122, 122, 201], [255, 221, 101], [97, 48, 88], [225, 49, 112], [55, 120, 254], [181, 43, 25], [212, 59, 3], [51, 0, 0], [0, 51, 0], [0, 0, 51], [153, 153, 153], [255, 127, 0], [128, 255, 0], [0, 128, 255], [255, 0, 128], [128, 255, 128], [255, 0, 0], [128, 255, 0], [255, 0, 128], [0, 128, 0], [0, 0, 128], [0, 128, 255], [128, 0, 255], [255, 0, 128], [128, 255, 128], [255, 0, 0], [0, 128, 255], [128, 0, 255], [0, 0, 0], [255, 128, 0], [0, 255, 0], [0, 0, 128], [0, 0, 0], [255, 0, 0], [128, 0, 255], [0, 128, 0], [255, 255, 128], [255, 0, 255], [255, 255, 0], [128, 0, 0], [255, 128, 128], [0, 128, 255], [128, 0, 255], [0, 0, 255], [0, 255, 255], [255, 255, 0], [255, 0, 255], [255, 128, 0], [255, 255, 255], [128, 0, 0], [255, 0, 255], [255, 255, 0], [0, 0, 128], [255, 255, 255], [0, 255, 0], [0, 0, 0], [255, 128, 0], [0, 255, 128], [255, 0, 0], [0, 0, 255], [128, 255, 0], [255, 255, 128], [255, 255, 0], [255, 128, 128], [255, 0, 128], [255, 128, 255], [255, 0, 128], [255, 255, 0], [255, 128, 0], [204, 87, 92], [128, 255, 0], [255, 0, 255], [0, 255, 128], [90, 180, 56], [91, 1, 5], [92, 64, 34], [93, 128, 0], [94, 255, 0], [95, 34, 87], [96, 86, 145], [97, 123, 98], [98, 0, 255], [99, 255, 128], [100, 45, 122], [101, 134, 245], [102, 32, 23], [103, 56, 0], [104, 76, 98], [105, 176, 90], [106, 102, 200], [107, 56, 78], [108, 23, 89], [109, 45, 200], [110, 87, 5], [111, 200, 67], [112, 34, 23], [113, 98, 76], [114, 122, 56], [115, 56, 23], [116, 78, 90], [117, 200, 45], [118, 23, 56], [119, 56, 189], [120, 0, 45], [121, 0, 0], [122, 89, 34], [123, 200, 1], [124, 32, 45], [125, 89, 0], [126, 0, 200], [127, 90, 200], [128, 45, 200], [129, 0, 123], [130, 200, 23], [131, 32, 200], [132, 56, 23], [133, 87, 98], [134, 0, 32], [135, 90, 0], [136, 45, 23], [137, 0, 89], [138, 200, 0], [139, 45, 23], [140, 123, 0], [141, 45, 200], [142, 98, 23], [143, 0, 98], [144, 200, 45], [145, 0, 23], [146, 23, 87], [147, 45, 0], [148, 0, 89], [149, 200, 32] ] labels_list = [] with open(r'labels.txt', 'r') as fp: for line in fp: labels_list.append(line[:-1]) colormap = np.asarray(ade_palette()) def label_to_color_image(label): if label.ndim != 2: raise ValueError("Expect 2-D input label") if np.max(label) >= len(colormap): raise ValueError("label value too large.") return colormap[label] def draw_plot(pred_img, seg): fig = plt.figure(figsize=(20, 15)) grid_spec = gridspec.GridSpec(1, 2, width_ratios=[6, 1]) plt.subplot(grid_spec[0]) plt.imshow(pred_img) plt.axis('off') LABEL_NAMES = np.asarray(labels_list) FULL_LABEL_MAP = np.arange(len(LABEL_NAMES)).reshape(len(LABEL_NAMES), 1) FULL_COLOR_MAP = label_to_color_image(FULL_LABEL_MAP) unique_labels = np.unique(seg.numpy().astype("uint8")) ax = plt.subplot(grid_spec[1]) plt.imshow(FULL_COLOR_MAP[unique_labels].astype(np.uint8), interpolation="nearest") ax.yaxis.tick_right() plt.yticks(range(len(unique_labels)), LABEL_NAMES[unique_labels]) plt.xticks([], []) ax.tick_params(width=0.0, labelsize=25) return fig def sepia(input_img): input_img = Image.fromarray(input_img) inputs = feature_extractor(images=input_img, return_tensors="tf") outputs = model(**inputs) logits = outputs.logits logits = tf.transpose(logits, [0, 2, 3, 1]) logits = tf.image.resize( logits, input_img.size[::-1] ) # We reverse the shape of `image` because `image.size` returns width and height. seg = tf.math.argmax(logits, axis=-1)[0] color_seg = np.zeros( (seg.shape[0], seg.shape[1], 3), dtype=np.uint8 ) # height, width, 3 for label, color in enumerate(colormap): color_seg[seg.numpy() == label, :] = color # Show image + mask pred_img = np.array(input_img) * 0.5 + color_seg * 0.5 pred_img = pred_img.astype(np.uint8) fig = draw_plot(pred_img, seg) return fig demo = gr.Interface(fn=sepia, inputs=gr.Image(shape=(400, 600)), outputs=['plot'], examples=["test1.jpg", "test2.jpg", "test3.jpg", "test4.jpg"], allow_flagging='never') demo.launch()