Spaces:

Jyothirmai
/

image-captioning-chest-xrays

Sleeping

File size: 5,001 Bytes

7ebfeb9
afda258
 
cf05f8b
22ed06b
 
623b4fb
931c795
e82dfb2
7fcb6d2
c60fb3f
eba7622
afda258
8875dbc
f385ddd
 
7ebfeb9
8875dbc
f385ddd
 
afda258
dee2758
f385ddd
 
fc6f52f
f385ddd
ec31839
8ba8a00
 
 
4c7d9fb
7ebfeb9
97fceae
 
30e7fb3
a4593c9
8ba8a00
97fceae
 
 
ea41590
e5a0b2d
97fceae
9cda1d3
97fceae
eba7622
8ba8a00
9e98a91
 
 
 
 
 
 
 
8ba8a00
9e98a91
 
 
 
 
ea41590
 
 
9e98a91
 
 
ea41590
97fceae
ea41590
97fceae
ea41590
97fceae
ea41590
f385ddd
e5a0b2d
 
ea41590
97fceae
f103c3f
97fceae
 
9e98a91
 
 
0a2f651
c9254be
ae697d5
 
97fceae
 
 
ea41590
cc88e44
8ba8a00
a4593c9
 
 
97fceae
 
cc88e44
97fceae
8875dbc
623b4fb

import gradio as gr
from PIL import Image
import clipGPT
import vitGPT
import skimage.io as io
import PIL.Image
import difflib
import ViTCoAtt
import cnnrnn
from build_vocab import Vocabulary
import pickle

# Caption generation functions
def generate_caption_clipgpt(image, max_tokens, temperature):
    caption = clipGPT.generate_caption_clipgpt(image, max_tokens, temperature)
    return caption

def generate_caption_vitgpt(image, max_tokens, temperature):
    caption = vitGPT.generate_caption(image, max_tokens, temperature)
    return caption

def generate_caption_vitCoAtt(image):
    caption = ViTCoAtt.CaptionSampler.main(image)
    return caption

def generate_caption_cnnrnn(image):
    with open('Image_features_ecoder_decoder.pickle', 'rb') as f:
        Xnet_features = pickle.load(f)
        image = Xnet_features[image]
        caption = cnnrnn.get_result(image)
    return caption


with gr.Row():
    image = gr.Image(label="Upload Chest X-ray", type="pil", height='50',width='50')   
                     
 
with gr.Row():
    with gr.Column(): # Column for dropdowns and model choice
        max_tokens = gr.Dropdown(list(range(50, 101)), label="Max Tokens", value=75)
        temperature = gr.Slider(0.5, 0.9, step=0.1, label="Temperature", value=0.9)
        imgID = gr.Dropdown(["1","2","3","4"], label="Example Image Selected")

    model_choice = gr.Radio(["CLIP-GPT2", "ViT-GPT2", "ViT-CoAttention", "Baseline Model CNN-RNN"], label="Select Model") 
    generate_button = gr.Button("Generate Caption") 
    

caption = gr.Textbox(label="Generated Caption")
real_caption = gr.Textbox(label="Actual Caption")

def getCaption(imgID):   
    real_captions = {"1" : "No acute cardiopulmonary abnormality. Low lung volumes. Heart size and mediastinal contour within normal limits. No focal air space consolidation, pneumothorax, or pleural effusion. Mild thoracic spine degenerative change.", 
                     "2":"Left basilar atelectasis and/or infiltrate, with no radiographic evidence of tuberculosis. Heart size upper limits of normal. Small amount of left basilar airspace disease. The right lung is clear. There are no cavitary lesions seen. No pneumothorax. No pleural effusions",
                     "3":"Cardiomegaly and small bilateral pleural effusions. Abnormal pulmonary opacities most suggestive of pulmonary edema, primary differential diagnosis includes infection and aspiration, clinical correlation recommended Moderate-to-marked enlargement of the cardiac silhouette, mediastinal contours appear similar to prior. Mild bilateral posterior sulcus blunting, interstitial and alveolar opacities greatest in the central lungs and bases with indistinct vascular margination.",
                     "4":"Severe cardiomegaly. Limited mediastinal evaluation given body habitus and lordotic projection. Recommend XXXX for further evaluation of mediastinum given T/Spine injury noted on C/Spine imaging. Critical result notification documented through Primordial. Lordotic projection and large body habitus. Limited mediastinal evaluation. Severe cardiomegaly. No visualized pneumothorax. No large effusion or airspace disease. No fracture."}

    print(real_captions[imgID])
    return real_captions[imgID]
    
def getImageID(imgID):
    imgIDs = {"1":"/content/drive/MyDrive/cnn-rnn/NLMCXR_png/CXR192_IM-0598_0",
          "2":"/content/drive/MyDrive/cnn-rnn/NLMCXR_png/CXR194_IM-0609_0",
          "3":"/content/drive/MyDrive/cnn-rnn/NLMCXR_png/CXR2637_IM-1122_0",
          "4":"/content/drive/MyDrive/cnn-rnn/NLMCXR_png/CXR1111_IM-0077_0"}
    print(imgIDs[imgID])
    return imgIDs[imgID]
    
def predict(img, model_name, max_tokens, temperature, imgID):
    if model_name == "CLIP-GPT2":
        return generate_caption_clipgpt(img, max_tokens, temperature), getCaption(imgID)
    elif model_name == "ViT-GPT2":
        return generate_caption_vitgpt(img, max_tokens, temperature), getCaption(imgID)
    elif model_name == "ViT-CoAttention":
        return generate_caption_vitCoAtt(img),  getCaption(imgID)
    elif model_name == "Baseline Model CNN-RNN":
        print(imgID)
        img = getImageID(imgID)
        return generate_caption_cnnrnn(img), getCaption(imgID)
    else:
        return "select a model","select an image"   



#main call
        
examples = [[f"example{i}.jpg"] for i in range(1,7)]

description= "You can generate captions by uploading an X-Ray and selecting a model of your choice below. Please select the number of Max Tokens and Temperature setting, if you are testing CLIP GPT2 and VIT GPT2 Models"
title = "MedViT: A Vision Transformer-Driven Method for Generating Medical Reports 🏥🤖"

interface = gr.Interface(
        fn=predict,
        inputs = [image, model_choice, max_tokens, temperature, imgID],
        theme="sudeepshouche/minimalist",
        outputs=[caption,real_caption],
        examples = examples,
        title = title,
        description = description
    )


interface.launch(debug=True)