File size: 2,713 Bytes
7ebfeb9
afda258
 
cf05f8b
22ed06b
 
623b4fb
931c795
e82dfb2
7fcb6d2
623b4fb
eba7622
f385ddd
7ebfeb9
afda258
8875dbc
f385ddd
 
7ebfeb9
8875dbc
f385ddd
 
afda258
dee2758
f385ddd
 
fc6f52f
f385ddd
 
 
 
 
 
 
7ebfeb9
97fceae
 
8875dbc
a4593c9
 
8875dbc
97fceae
 
 
 
 
 
 
 
eba7622
8875dbc
97fceae
8875dbc
97fceae
 
 
 
 
 
 
f385ddd
 
 
97fceae
 
 
 
4efeadd
5ee7d76
58ff2f9
ae697d5
 
97fceae
 
 
 
ae697d5
97fceae
a4593c9
 
 
97fceae
 
 
8875dbc
623b4fb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import gradio as gr
from PIL import Image
import clipGPT
import vitGPT
import skimage.io as io
import PIL.Image
import difflib
import ViTCoAtt
import cnnrnn
from build_vocab import Vocabulary


    

# Caption generation functions
def generate_caption_clipgpt(image, max_tokens, temperature):
    caption = clipGPT.generate_caption_clipgpt(image, max_tokens, temperature)
    return caption

def generate_caption_vitgpt(image, max_tokens, temperature):
    caption = vitGPT.generate_caption(image, max_tokens, temperature)
    return caption

def generate_caption_vitCoAtt(image):
    caption = ViTCoAtt.CaptionSampler.main(image)
    return caption

def generate_caption_cnnrnn(image):
    # with open('/content/Image_features_ecoder_decoder.pickle', 'rb') as f:
        # Xnet_features = pickle.load(f)
        # image = Xnet_features[image]
        # caption = cnn-rnn.get_result(image)
        caption = ""
        return caption


with gr.Row():
     
    image = gr.Image(label="Upload Chest X-ray", type="pil")   
                     
       
with gr.Row():
 
    with gr.Column(): # Column for dropdowns and model choice
        max_tokens = gr.Dropdown(list(range(50, 101)), label="Max Tokens", value=75)
        temperature = gr.Slider(0.5, 0.9, step=0.1, label="Temperature", value=0.7) 

    model_choice = gr.Radio(["CLIP-GPT2", "ViT-GPT2", "ViT-CoAttention"], label="Select Model") 
    generate_button = gr.Button("Generate Caption") 
    
    
caption = gr.Textbox(label="Generated Caption") 
    
def predict(img, model_name, max_tokens, temperature):
    if model_name == "CLIP-GPT2":
      return generate_caption_clipgpt(img, max_tokens, temperature)
    elif model_name == "ViT-GPT2":
      return generate_caption_vitgpt(img, max_tokens, temperature)
    elif model_name == "ViT-CoAttention":
      return generate_caption_vitCoAtt(img)
    elif model_name == "Baseline Model CNN-RNN":
        print(img)
      return generate_caption_cnnrnn(img)
    else:
      return "Caption generation for this model is not yet implemented."   



examples = [[f"example{i}.jpg"] for i in range(1,4)]

description= "You can generate captions by uploading an X-Ray and selecting a model of your choice below. Please select the number of Max Tokens and Temperature setting, if you are testing CLIP GPT2 and VIT GPT2 Models"
title = "MedViT: A Vision Transformer-Driven Method for Generating Medical Reports 🏥🤖"

interface = gr.Interface(
        fn=predict,
        inputs = [image, model_choice, max_tokens, temperature],
        theme="soft",
        outputs=caption,
        examples = examples,
        title = title,
        description = description
    )

interface.launch(debug=True)