import gradio as gr from PIL import Image import clipGPT import vitGPT import skimage.io as io import PIL.Image import difflib import ViTCoAtt import cnnrnn from build_vocab import Vocabulary # Caption generation functions def generate_caption_clipgpt(image, max_tokens, temperature): caption = clipGPT.generate_caption_clipgpt(image, max_tokens, temperature) return caption def generate_caption_vitgpt(image, max_tokens, temperature): caption = vitGPT.generate_caption(image, max_tokens, temperature) return caption def generate_caption_vitCoAtt(image): caption = ViTCoAtt.CaptionSampler.main(image) return caption def generate_caption_cnnrnn(image): with open('/content/Image_features_ecoder_decoder.pickle', 'rb') as f: Xnet_features = pickle.load(f) image = Xnet_features[image] caption = cnnrnn.get_result(image) return caption with gr.Row(): image = gr.Image(label="Upload Chest X-ray", type="pil") with gr.Row(): with gr.Column(): # Column for dropdowns and model choice max_tokens = gr.Dropdown(list(range(50, 101)), label="Max Tokens", value=75) temperature = gr.Slider(0.5, 0.9, step=0.1, label="Temperature", value=0.9) imgID = gr.Dropdown(list(range(1,6)), label="Example Image Selected", value=1) model_choice = gr.Radio(["CLIP-GPT2", "ViT-GPT2", "ViT-CoAttention", "Baseline Model CNN-RNN"], label="Select Model") generate_button = gr.Button("Generate Caption") real_captions = {"1" : "No acute cardiopulmonary abnormality. Low lung volumes. Heart size and mediastinal contour within normal limits. No focal air space consolidation, pneumothorax, or pleural effusion. Mild thoracic spine degenerative change.", "2":"Left basilar atelectasis and/or infiltrate, with no radiographic evidence of tuberculosis. Heart size upper limits of normal. Small amount of left basilar airspace disease. The right lung is clear. There are no cavitary lesions seen. No pneumothorax. No pleural effusions", "3":"Cardiomegaly and small bilateral pleural effusions. Abnormal pulmonary opacities most suggestive of pulmonary edema, primary differential diagnosis includes infection and aspiration, clinical correlation recommended Moderate-to-marked enlargement of the cardiac silhouette, mediastinal contours appear similar to prior. Mild bilateral posterior sulcus blunting, interstitial and alveolar opacities greatest in the central lungs and bases with indistinct vascular margination.", "4":"Severe cardiomegaly. Limited mediastinal evaluation given body habitus and lordotic projection. Recommend XXXX for further evaluation of mediastinum given T/Spine injury noted on C/Spine imaging. Critical result notification documented through Primordial. Lordotic projection and large body habitus. Limited mediastinal evaluation. Severe cardiomegaly. No visualized pneumothorax. No large effusion or airspace disease. No fracture."} imgIDs = {"1":"/content/drive/MyDrive/cnn-rnn/NLMCXR_png/CXR192_IM-0598_0", "2":"/content/drive/MyDrive/cnn-rnn/NLMCXR_png/CXR194_IM-0609_0", "3":"/content/drive/MyDrive/cnn-rnn/NLMCXR_png/CXR2637_IM-1122_0", "4":"/content/drive/MyDrive/cnn-rnn/NLMCXR_png/CXR1111_IM-0077_0"} caption = gr.Textbox(label="Generated Caption") real_caption = gr.Textbox(label="Actual Caption") def predict(img, model_name, max_tokens, temperature, imgID): if model_name == "CLIP-GPT2": return generate_caption_clipgpt(img, max_tokens, temperature), getCaption(imgID) elif model_name == "ViT-GPT2": return generate_caption_vitgpt(img, max_tokens, temperature), getCaption(imgID) elif model_name == "ViT-CoAttention": return generate_caption_vitCoAtt(img), getCaption(imgID) elif model_name == "Baseline Model CNN-RNN": img = getImageID(imgID) return generate_caption_cnnrnn(img), getCaption(imgID) else: return "Caption generation for this model is not yet implemented." def getCaption(imgID): print(real_captions[imgID]) return real_captions[imgID] def getImageID(imgID): print(imgIDs[imgID]) return imgIDs[imgID] examples = [[f"example{i}.jpg"] for i in range(1,7)] description= "You can generate captions by uploading an X-Ray and selecting a model of your choice below. Please select the number of Max Tokens and Temperature setting, if you are testing CLIP GPT2 and VIT GPT2 Models" title = "MedViT: A Vision Transformer-Driven Method for Generating Medical Reports 🏥🤖" interface = gr.Interface( fn=predict, inputs = [image, model_choice, max_tokens, temperature, imgID], theme="sudeepshouche/minimalist", outputs=[caption,real_caption], examples = examples, title = title, description = description ) interface.launch(debug=True)