File size: 2,713 Bytes
7ebfeb9 afda258 cf05f8b 22ed06b 623b4fb 931c795 e82dfb2 7fcb6d2 623b4fb eba7622 f385ddd 7ebfeb9 afda258 8875dbc f385ddd 7ebfeb9 8875dbc f385ddd afda258 dee2758 f385ddd fc6f52f f385ddd 7ebfeb9 97fceae 8875dbc a4593c9 8875dbc 97fceae eba7622 8875dbc 97fceae 8875dbc 97fceae f385ddd 97fceae 4efeadd 5ee7d76 58ff2f9 ae697d5 97fceae ae697d5 97fceae a4593c9 97fceae 8875dbc 623b4fb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
import gradio as gr
from PIL import Image
import clipGPT
import vitGPT
import skimage.io as io
import PIL.Image
import difflib
import ViTCoAtt
import cnnrnn
from build_vocab import Vocabulary
# Caption generation functions
def generate_caption_clipgpt(image, max_tokens, temperature):
caption = clipGPT.generate_caption_clipgpt(image, max_tokens, temperature)
return caption
def generate_caption_vitgpt(image, max_tokens, temperature):
caption = vitGPT.generate_caption(image, max_tokens, temperature)
return caption
def generate_caption_vitCoAtt(image):
caption = ViTCoAtt.CaptionSampler.main(image)
return caption
def generate_caption_cnnrnn(image):
# with open('/content/Image_features_ecoder_decoder.pickle', 'rb') as f:
# Xnet_features = pickle.load(f)
# image = Xnet_features[image]
# caption = cnn-rnn.get_result(image)
caption = ""
return caption
with gr.Row():
image = gr.Image(label="Upload Chest X-ray", type="pil")
with gr.Row():
with gr.Column(): # Column for dropdowns and model choice
max_tokens = gr.Dropdown(list(range(50, 101)), label="Max Tokens", value=75)
temperature = gr.Slider(0.5, 0.9, step=0.1, label="Temperature", value=0.7)
model_choice = gr.Radio(["CLIP-GPT2", "ViT-GPT2", "ViT-CoAttention"], label="Select Model")
generate_button = gr.Button("Generate Caption")
caption = gr.Textbox(label="Generated Caption")
def predict(img, model_name, max_tokens, temperature):
if model_name == "CLIP-GPT2":
return generate_caption_clipgpt(img, max_tokens, temperature)
elif model_name == "ViT-GPT2":
return generate_caption_vitgpt(img, max_tokens, temperature)
elif model_name == "ViT-CoAttention":
return generate_caption_vitCoAtt(img)
elif model_name == "Baseline Model CNN-RNN":
print(img)
return generate_caption_cnnrnn(img)
else:
return "Caption generation for this model is not yet implemented."
examples = [[f"example{i}.jpg"] for i in range(1,4)]
description= "You can generate captions by uploading an X-Ray and selecting a model of your choice below. Please select the number of Max Tokens and Temperature setting, if you are testing CLIP GPT2 and VIT GPT2 Models"
title = "MedViT: A Vision Transformer-Driven Method for Generating Medical Reports 🏥🤖"
interface = gr.Interface(
fn=predict,
inputs = [image, model_choice, max_tokens, temperature],
theme="soft",
outputs=caption,
examples = examples,
title = title,
description = description
)
interface.launch(debug=True)
|