hyzhou commited on Jun 2, 2024

Commit

cca9b7e

1 Parent(s): 2d71a54

upload everything

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

LICENSE +24 -0
MedVersa/pytorch_model.bin +3 -0
README.md +58 -0
__pycache__/utils.cpython-39.pyc +0 -0
demo.py +544 -0
demo_ex/1de015eb-891f1b02-f90be378-d6af1e86-df3270c2.png +0 -0
demo_ex/79eee504-b1b60ab8-5e8dd843-b6ed87aa-670747b1.png +0 -0
demo_ex/Case_00840_0000.nii.gz +3 -0
demo_ex/Case_01013_0000.nii.gz +3 -0
demo_ex/ISIC_0032258.jpg +0 -0
demo_ex/ISIC_0033730.jpg +0 -0
demo_ex/bc25fa99-0d3766cc-7704edb7-5c7a4a63-dc65480a.png +0 -0
demo_ex/c536f749-2326f755-6a65f28f-469affd2-26392ce9.png +0 -0
demo_ex/f39b05b1-f544e51a-cfe317ca-b66a4aa6-1c1dc22d.png +0 -0
demo_ex/f3fefc29-68544ac8-284b820d-858b5470-f579b982.png +0 -0
environment.yml +479 -0
inference.py +107 -0
medomni/__init__.py +31 -0
medomni/__pycache__/__init__.cpython-311.pyc +0 -0
medomni/__pycache__/__init__.cpython-39.pyc +0 -0
medomni/common/__init__.py +0 -0
medomni/common/__pycache__/__init__.cpython-39.pyc +0 -0
medomni/common/__pycache__/config.cpython-39.pyc +0 -0
medomni/common/__pycache__/dist_utils.cpython-39.pyc +0 -0
medomni/common/__pycache__/logger.cpython-39.pyc +0 -0
medomni/common/__pycache__/optims.cpython-39.pyc +0 -0
medomni/common/__pycache__/registry.cpython-39.pyc +0 -0
medomni/common/__pycache__/utils.cpython-39.pyc +0 -0
medomni/common/config.py +468 -0
medomni/common/dist_utils.py +137 -0
medomni/common/gradcam.py +24 -0
medomni/common/logger.py +200 -0
medomni/common/optims.py +119 -0
medomni/common/registry.py +327 -0
medomni/common/utils.py +424 -0
medomni/configs/datasets/medinterp/align.yaml +5 -0
medomni/configs/default.yaml +5 -0
medomni/configs/models/medomni.yaml +12 -0
medomni/conversation/__init__.py +0 -0
medomni/conversation/__pycache__/__init__.cpython-39.pyc +0 -0
medomni/conversation/__pycache__/conversation.cpython-39.pyc +0 -0
medomni/conversation/conversation.py +222 -0
medomni/datasets/__init__.py +0 -0
medomni/datasets/__pycache__/__init__.cpython-39.pyc +0 -0
medomni/datasets/__pycache__/data_utils.cpython-39.pyc +0 -0
medomni/datasets/builders/__init__.py +71 -0
medomni/datasets/builders/__pycache__/__init__.cpython-39.pyc +0 -0
medomni/datasets/builders/__pycache__/base_dataset_builder.cpython-39.pyc +0 -0
medomni/datasets/builders/__pycache__/image_text_pair_builder.cpython-39.pyc +0 -0
medomni/datasets/builders/base_dataset_builder.py +234 -0

LICENSE ADDED Viewed

	@@ -0,0 +1,24 @@

+Copyright – President and Fellows of Harvard College, 2024. All Rights Reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+Redistributions of source code must retain the above copyright notice, this
+list of conditions and the following disclaimer. Redistributions in binary
+form must reproduce the above copyrightnotice, this list of conditions and the
+following disclaimer in the documentation and/or other materials provided with
+the distribution. Neither the name "Harvard" nor the names of its contributors
+may be used to endorse or promote products derived from this software without
+specific prior written permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOTLIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALLTHECOPYRIGHT HOLDER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION)HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+OTHERWISE)ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+OF THE POSSIBILITY OF SUCH DAMAGE.

MedVersa/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b3ce596897168d79649e6d6df128a1b409a0cc878092f00667873be6f4b8c9d3
+size 13993804625

README.md ADDED Viewed

	@@ -0,0 +1,58 @@

+---
+title: hyzhouMedVersa
+app_file: demo_inter.py
+sdk: gradio
+sdk_version: 4.24.0
+---
+# MedVersa: An orchestrated medical AI system
+MedVersa is a compound medical AI system that can coordinate multimodal inputs, orchestrate models and tools for varying tasks, and generate multimodal outputs.
+## Environment
+MedVersa is written in [Python](https://www.python.org/). It is recommended to configure/manage your python environment using conda. To do this, you need to install the [miniconda](https://docs.anaconda.com/free/miniconda/index.html) or [anaconda](https://www.anaconda.com/) first.
+After installing conda, you need to set up a new conda environment for MedVersa using the provided `environment.yml`:
+``` shell
+conda env create -f environment.yml
+conda activate medversa
+```
+## Inference
+``` python
+from utils import *
+# ---  Launch Model ---
+device = 'cuda:0'
+model_cls = registry.get_model_class('medomni') # medomni is the architecture name :)
+model = model_cls.from_pretrained('hyzhou/MedVersa').to(device)
+model.eval()
+# --- Define examples ---
+examples = [
+    [
+        ["./demo_ex/c536f749-2326f755-6a65f28f-469affd2-26392ce9.png"],
+        "Age:30-40.\nGender:F.\nIndication: ___-year-old female with end-stage renal disease not on dialysis presents with dyspnea.  PICC line placement.\nComparison: None.",
+        "How would you characterize the findings from <img0>?",
+        "cxr",
+        "report generation",
+    ],
+]
+# --- Define hyperparams ---
+num_beams = 1
+do_sample = True
+min_length = 1
+top_p = 0.9
+repetition_penalty = 1
+length_penalty = 1
+temperature = 0.1
+# --- Generate a report for an chest X-ray image ---
+index = 0
+demo_ex = examples[index]
+images, context, prompt, modality, task = demo_ex[0], demo_ex[1], demo_ex[2], demo_ex[3], demo_ex[4]
+seg_mask_2d, seg_mask_3d, output_text = generate_predictions(model, images, context, prompt, modality, task, num_beams, do_sample, min_length, top_p, repetition_penalty, length_penalty, temperature)
+print(output_text)
+```
+For more details and examples, please refer to `inference.py`.
+## Demo
+`CUDA_VISIBLE_DEVICES=0 python demo.py --cfg-path medversa.yaml`

__pycache__/utils.cpython-39.pyc ADDED Viewed

Binary file (11.2 kB). View file

demo.py ADDED Viewed

	@@ -0,0 +1,544 @@

+import gradio as gr
+import argparse
+import torch
+import torch.nn.functional as F
+import torchvision.transforms.functional as TF
+from torchvision import transforms
+from PIL import Image
+import skimage.morphology, skimage.io
+import cv2
+import numpy as np
+import random
+from transformers import StoppingCriteria, StoppingCriteriaList
+from copy import deepcopy
+from medomni.common.config import Config
+from medomni.common.dist_utils import get_rank
+from medomni.common.registry import registry
+import torchio as tio
+import nibabel as nib
+from scipy import ndimage, misc
+import time
+import ipdb
+# Function to parse command line arguments
+def parse_args():
+    parser = argparse.ArgumentParser(description="Demo")
+    parser.add_argument("--cfg-path", required=True, help="path to configuration file.")
+    parser.add_argument(
+        "--options",
+        nargs="+",
+        help="override some settings in the used config, the key-value pair in xxx=yyy format will be merged into config file (deprecate), change to --cfg-options instead.",
+    )
+    args = parser.parse_args()
+    return args
+device = 'cuda:0'
+# Launch model
+args = parse_args()
+cfg = Config(args)
+model_config = cfg.model_cfg
+model_cls = registry.get_model_class(model_config.arch)
+model = model_cls.from_pretrained('hyzhou/MedVersa').to(device)
+model.eval()
+global global_images
+global_images = None
+def seg_2d_process(image_path, pred_mask, img_size=224):
+    image = cv2.imread(image_path[0])
+    if pred_mask.sum() != 0:
+        labels = skimage.morphology.label(pred_mask)
+        labelCount = np.bincount(labels.ravel())
+        largest_label = np.argmax(labelCount[1:]) + 1
+        pred_mask[labels != largest_label] = 0
+        pred_mask[labels == largest_label] = 255
+        pred_mask = pred_mask.astype(np.uint8)
+        contours, _ = cv2.findContours(pred_mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
+        if contours:
+            contours = np.vstack(contours)
+            binary_array = np.zeros((img_size, img_size))
+            binary_array = cv2.drawContours(binary_array, contours, -1, 255, thickness=cv2.FILLED)
+            binary_array = cv2.resize(binary_array, (image.shape[1], image.shape[0]), interpolation = cv2.INTER_NEAREST) / 255
+            image = [Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))]
+            mask = [binary_array]
+        else:
+            image = [Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))]
+            mask = [np.zeros((image.shape[1], image.shape[0]))]
+    else:
+        mask = [np.zeros((image.shape[1], image.shape[0]))]
+        image = [Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))]
+    # output_image = cv2.drawContours(binary_array, contours, -1, (110, 0, 255), 2)
+    # output_image_pil = Image.fromarray(cv2.cvtColor(output_image, cv2.COLOR_BGR2RGB))
+    return image, mask
+def seg_3d_process(image_path, seg_mask):
+    img  = nib.load(image_path[0]).get_fdata()
+    image = window_scan(img).transpose(2,0,1).astype(np.uint8)
+    if seg_mask.sum() != 0:
+        seg_mask = resize_back_volume_abd(seg_mask, image.shape).astype(np.uint8)
+        image_slices = []
+        contour_slices = []
+        for i in range(seg_mask.shape[0]):
+            slice_img = np.fliplr(np.rot90(image[i]))
+            slice_mask = np.fliplr(np.rot90(seg_mask[i]))
+            contours, _ = cv2.findContours(slice_mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
+            image_slices.append(Image.fromarray(slice_img))
+            if contours:
+                binary_array = np.zeros(seg_mask.shape[1:])
+                binary_array = cv2.drawContours(binary_array, contours, -1, 255, thickness=cv2.FILLED) / 255
+                binary_array = cv2.resize(binary_array, slice_img.shape, interpolation = cv2.INTER_NEAREST)
+                contour_slices.append(binary_array)
+            else:
+                contour_slices.append(np.zeros_like(slice_img))
+    else:
+        image_slices = []
+        contour_slices = []
+        slice_img = np.fliplr(np.rot90(image[i]))
+        image_slices.append(Image.fromarray(slice_img))
+        contour_slices.append(np.zeros_like(slice_img))
+    return image_slices, contour_slices
+def det_2d_process(image_path, box):
+    image_slices = []
+    image = cv2.imread(image_path[0])
+    if box is not None:
+        hi,wd,_ = image.shape
+        color = tuple(np.random.random(size=3) * 256)
+        x1, y1, x2, y2 = int(box[0]*wd), int(box[1]*hi), int(box[2]*wd), int(box[3]*hi)
+        image = cv2.rectangle(image, (x1, y1), (x2, y2), color, 10)
+    image_slices.append(Image.fromarray(image))
+    return image_slices
+def window_scan(scan, window_center=50, window_width=400):
+    """
+    Apply windowing to a scan.
+    Parameters:
+    scan (numpy.ndarray): 3D numpy array of the CT scan
+    window_center (int): The center of the window
+    window_width (int): The width of the window
+    Returns:
+    numpy.ndarray: Windowed CT scan
+    """
+    lower_bound = window_center - (window_width // 2)
+    upper_bound = window_center + (window_width // 2)
+    windowed_scan = np.clip(scan, lower_bound, upper_bound)
+    windowed_scan = (windowed_scan - lower_bound) / (upper_bound - lower_bound)
+    windowed_scan = (windowed_scan * 255).astype(np.uint8)
+    return windowed_scan
+def task_seg_2d(model, preds, hidden_states, image):
+    token_mask = preds == model.seg_token_idx_2d
+    indices = torch.where(token_mask == True)[0].cpu().numpy()
+    feats = model.model_seg_2d.encoder(image.unsqueeze(0)[:, 0])
+    last_feats = feats[-1]
+    target_states = [hidden_states[ind][-1] for ind in indices]
+    if target_states:
+        target_states = torch.cat(target_states).squeeze()
+        seg_states = model.text2seg_2d(target_states).unsqueeze(0)
+        last_feats = last_feats + seg_states.unsqueeze(-1).unsqueeze(-1)
+        last_feats = model.text2seg_2d_gn(last_feats)
+        feats[-1] = last_feats
+        seg_feats = model.model_seg_2d.decoder(*feats)
+        seg_preds = model.model_seg_2d.segmentation_head(seg_feats)
+        seg_probs = F.sigmoid(seg_preds)
+        seg_mask = seg_probs.cpu().squeeze().numpy() >= 0.5
+        return seg_mask
+    else:
+        return None
+def task_seg_3d(model, preds, hidden_states, img_embeds_list):
+    new_img_embeds_list = deepcopy(img_embeds_list)
+    token_mask = preds == model.seg_token_idx_3d
+    indices = torch.where(token_mask == True)[0].cpu().numpy()
+    target_states = [hidden_states[ind][-1] for ind in indices]
+    if target_states:
+        target_states = torch.cat(target_states).squeeze().unsqueeze(0)
+        seg_states = model.text2seg_3d(target_states)
+        last_feats = new_img_embeds_list[-1]
+        last_feats = last_feats + seg_states.unsqueeze(-1).unsqueeze(-1).unsqueeze(-1)
+        last_feats = model.text2seg_3d_gn(last_feats)
+        new_img_embeds_list[-1] = last_feats
+        seg_preds = model.visual_encoder_3d(encoder_only=False, x_=new_img_embeds_list)
+        seg_probs = F.sigmoid(seg_preds)
+        seg_mask = seg_probs.cpu().squeeze().numpy() >= 0.5
+        return seg_mask
+def task_det_2d(model, preds, hidden_states):
+    token_mask = preds == model.det_token_idx
+    indices = torch.where(token_mask == True)[0].cpu().numpy()
+    target_states = [hidden_states[ind][-1] for ind in indices]
+    if target_states:
+        target_states = torch.cat(target_states).squeeze()
+        det_states = model.text_det(target_states).detach().cpu()
+        return det_states.numpy()
+    return torch.zeros_like(indices)
+class StoppingCriteriaSub(StoppingCriteria):
+    def __init__(self, stops=[]):
+        super().__init__()
+        self.stops = stops
+    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor):
+        for stop in self.stops:
+            if torch.all((stop == input_ids[0][-len(stop):])).item():
+                return True
+        return False
+def resize_back_volume_abd(img, target_size):
+    desired_depth = target_size[0]
+    desired_width = target_size[1]
+    desired_height = target_size[2]
+    current_depth = img.shape[0] # [d, w, h]
+    current_width = img.shape[1]
+    current_height = img.shape[2]
+    depth = current_depth / desired_depth
+    width = current_width / desired_width
+    height = current_height / desired_height
+    depth_factor = 1 / depth
+    width_factor = 1 / width
+    height_factor = 1 / height
+    img = ndimage.zoom(img, (depth_factor, width_factor, height_factor), order=0)
+    return img
+def resize_volume_abd(img):
+    img[img<=-200] = -200
+    img[img>=300] = 300
+    desired_depth = 64
+    desired_width = 192
+    desired_height = 192
+    current_width = img.shape[0] # [w, h, d]
+    current_height = img.shape[1]
+    current_depth = img.shape[2]
+    depth = current_depth / desired_depth
+    width = current_width / desired_width
+    height = current_height / desired_height
+    depth_factor = 1 / depth
+    width_factor = 1 / width
+    height_factor = 1 / height
+    img = ndimage.zoom(img, (width_factor, height_factor, depth_factor), order=0)
+    return img
+def load_and_preprocess_image(image):
+    mean = (0.48145466, 0.4578275, 0.40821073)
+    std = (0.26862954, 0.26130258, 0.27577711)
+    transform = transforms.Compose([
+        transforms.Resize([224, 224]),
+        transforms.ToTensor(),
+        transforms.Normalize(mean, std)
+    ])
+    image = transform(image).type(torch.bfloat16).cuda().unsqueeze(0)
+    return image
+def load_and_preprocess_volume(image):
+    img  = nib.load(image).get_fdata()
+    image = torch.from_numpy(resize_volume_abd(img)).permute(2,0,1)
+    transform = tio.Compose([
+        tio.ZNormalization(masking_method=tio.ZNormalization.mean),
+    ])
+    image = transform(image.unsqueeze(0)).type(torch.bfloat16).cuda()
+    return image
+def read_image(image_path):
+    if image_path.endswith(('.jpg', '.jpeg', '.png')):
+        return load_and_preprocess_image(Image.open(image_path).convert('RGB'))
+    elif image_path.endswith('.nii.gz'):
+        return load_and_preprocess_volume(image_path)
+    else:
+        raise ValueError("Unsupported file format")
+def generate(image_path, image, context, modal, num_imgs, prompt, num_beams, do_sample, min_length, top_p, repetition_penalty, length_penalty, temperature):
+    if (len(context) != 0 and ('report' in prompt or 'finding' in prompt or 'impression' in prompt)) or (len(context) != 0 and modal=='derm' and ('diagnosis' in prompt or 'issue' in prompt or 'problem' in prompt)):
+        prompt = '<context>' + context + '</context>' + prompt
+    if modal == 'ct' and 'segment' in prompt.lower():
+        if 'liver' in prompt:
+            prompt = 'Segment the liver.'
+        if 'spleen' in prompt:
+            prompt = 'Segment the spleen.'
+        if 'kidney' in prompt:
+            prompt = 'Segment the kidney.'
+        if 'pancrea' in prompt:
+            prompt = 'Segment the pancreas.'
+    img_embeds, atts_img, img_embeds_list = model.encode_img(image.unsqueeze(0), [modal])
+    placeholder = ['<ImageHere>'] * 9
+    prefix = '###Human:' + ''.join([f'<img{i}>' + ''.join(placeholder) + f'</img{i}>' for i in range(num_imgs)])
+    img_embeds, atts_img = model.prompt_wrap(img_embeds, atts_img, [prefix], [num_imgs])
+    prompt += '###Assistant:'
+    prompt_tokens = model.llama_tokenizer(prompt, return_tensors="pt", add_special_tokens=False).to(image.device)
+    new_img_embeds, new_atts_img = model.prompt_concat(img_embeds, atts_img, prompt_tokens)
+    outputs = model.llama_model.generate(
+        inputs_embeds=new_img_embeds,
+        max_new_tokens=450,
+        stopping_criteria=StoppingCriteriaList([StoppingCriteriaSub(stops=[
+            torch.tensor([835]).type(torch.bfloat16).to(image.device),
+            torch.tensor([2277, 29937]).type(torch.bfloat16).to(image.device)
+        ])]),
+        num_beams=num_beams,
+        do_sample=do_sample,
+        min_length=min_length,
+        top_p=top_p,
+        repetition_penalty=repetition_penalty,
+        length_penalty=length_penalty,
+        temperature=temperature,
+        output_hidden_states=True,
+        return_dict_in_generate=True,
+    )
+    hidden_states = outputs.hidden_states
+    preds = outputs.sequences[0]
+    output_image = None
+    seg_mask_2d = None
+    seg_mask_3d = None
+    if sum(preds == model.seg_token_idx_2d):
+        seg_mask = task_seg_2d(model, preds, hidden_states, image)
+        output_image, seg_mask_2d = seg_2d_process(image_path, seg_mask)
+    if sum(preds == model.seg_token_idx_3d):
+        seg_mask = task_seg_3d(model, preds, hidden_states, img_embeds_list)
+        output_image, seg_mask_3d = seg_3d_process(image_path, seg_mask)
+    if sum(preds == model.det_token_idx):
+        det_box = task_det_2d(model, preds, hidden_states)
+        output_image = det_2d_process(image_path, det_box)
+    if preds[0] == 0:  # Remove unknown token <unk> at the beginning
+        preds = preds[1:]
+    if preds[0] == 1:  # Remove start token <s> at the beginning
+        preds = preds[1:]
+    output_text = model.llama_tokenizer.decode(preds, add_special_tokens=False)
+    output_text = output_text.split('###')[0].split('Assistant:')[-1].strip()
+    if 'mel' in output_text and modal == 'derm':
+        output_text = 'The main diagnosis is melanoma.'
+    return output_image, seg_mask_2d, seg_mask_3d, output_text
+def generate_predictions(images, context, prompt, modality, num_beams, do_sample, min_length, top_p, repetition_penalty, length_penalty, temperature):
+    num_imgs = len(images)
+    modal = modality.lower()
+    image_tensors = [read_image(img) for img in images]
+    if modality == 'ct':
+        time.sleep(2)
+    else:
+        time.sleep(1)
+    image_tensor = torch.cat(image_tensors)
+    with torch.autocast("cuda"):
+        with torch.no_grad():
+            generated_image, seg_mask_2d, seg_mask_3d, output_text = generate(images, image_tensor, context, modal, num_imgs, prompt, num_beams, do_sample, min_length, top_p, repetition_penalty, length_penalty, temperature)
+    return generated_image, seg_mask_2d, seg_mask_3d, output_text
+my_dict = {}
+def gradio_interface(chatbot, images, context, prompt, modality, num_beams, do_sample, min_length, top_p, repetition_penalty, length_penalty, temperature):
+    global global_images
+    if not images:
+        image = np.zeros((224, 224, 3), dtype=np.uint8)
+        blank_image = Image.fromarray(image)
+        snapshot = (blank_image, [])
+        global_images = 'none'
+        return [(prompt, "At least one image is required to proceed.")], snapshot, gr.update(maximum=0)
+    if not prompt or not modality:
+        image = np.zeros((224, 224, 3), dtype=np.uint8)
+        blank_image = Image.fromarray(image)
+        snapshot = (blank_image, [])
+        global_images = 'none'
+        return [(prompt, "Please provide prompt and modality to proceed.")], snapshot, gr.update(maximum=0)
+    generated_images, seg_mask_2d, seg_mask_3d, output_text = generate_predictions(images, context, prompt, modality, num_beams, do_sample, min_length, top_p, repetition_penalty, length_penalty, temperature)
+    output_images = []
+    input_images = [np.asarray(Image.open(img.name).convert('RGB')).astype(np.uint8) if img.name.endswith(('.jpg', '.jpeg', '.png')) else f"{img.name} (3D Volume)" for img in images]
+    if generated_images is not None:
+        for generated_image in generated_images:
+            output_images.append(np.asarray(generated_image).astype(np.uint8))
+        snapshot = (output_images[0], [])
+        if seg_mask_2d is not None:
+            snapshot = (output_images[0], [(seg_mask_2d[0], "Mask")])
+        if seg_mask_3d is not None:
+            snapshot = (output_images[0], [(seg_mask_3d[0], "Mask")])
+    else:
+        output_images = input_images.copy()
+        snapshot = (output_images[0], [])
+    my_dict['image'] = output_images
+    my_dict['mask'] = None
+    if seg_mask_2d is not None:
+        my_dict['mask'] = seg_mask_2d
+    if seg_mask_3d is not None:
+        my_dict['mask'] = seg_mask_3d
+    if global_images != images and (global_images is not None):
+        chatbot = []
+        chatbot.append((prompt, output_text))
+    else:
+        chatbot.append((prompt, output_text))
+    global_images = images
+    return chatbot, snapshot, gr.update(maximum=len(output_images)-1)
+# my_dict = {}
+# def gradio_interface(images, task, context, prompt, modality, num_beams, do_sample, min_length, top_p, repetition_penalty, length_penalty, temperature):
+#     if not images:
+#         return None, "Error: At least one image is required to proceed."
+#     if not prompt or not task or not modality:
+#         return None, "Error: Please provide prompt, select task and modality to proceed."
+#     generated_images, seg_mask_2d, seg_mask_3d, output_text = generate_predictions(images, task, context, prompt, modality, num_beams, do_sample, min_length, top_p, repetition_penalty, length_penalty, temperature)
+#     output_images = []
+#     input_images = [np.asarray(Image.open(img.name).convert('RGB')).astype(np.uint8) if img.name.endswith(('.jpg', '.jpeg', '.png')) else f"{img.name} (3D Volume)" for img in images]
+#     if generated_images is not None:
+#         for generated_image in generated_images:
+#             output_images.append(np.asarray(generated_image).astype(np.uint8))
+#         snapshot = (output_images[0], [])
+#         if seg_mask_2d is not None:
+#             snapshot = (output_images[0], [(seg_mask_2d[0], "Mask")])
+#         if seg_mask_3d is not None:
+#             snapshot = (output_images[0], [(seg_mask_3d[0], "Mask")])
+#     else:
+#         output_images = input_images.copy()
+#         snapshot = (output_images[0], [])
+#     my_dict['image'] = output_images
+#     my_dict['mask'] = None
+#     if seg_mask_2d is not None:
+#         my_dict['mask'] = seg_mask_2d
+#     if seg_mask_3d is not None:
+#         my_dict['mask'] = seg_mask_3d
+#     return output_text, snapshot, gr.update(maximum=len(output_images)-1)
+def render(x):
+    if x > len(my_dict['image'])-1:
+        x = len(my_dict['image'])-1
+    if x < 0:
+        x = 0
+    image = my_dict['image'][x]
+    if my_dict['mask'] is None:
+        return (image,[])
+    else:
+        mask = my_dict['mask'][x]
+        value = (image,[(mask, "Mask")])
+        return value
+def update_context_visibility(task):
+    if task == "report generation" or task == 'classification':
+        return gr.update(visible=True)
+    else:
+        return gr.update(visible=False)
+def reset_chatbot():
+    return []
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    # with gr.Row():
+    #     gr.Markdown("<link href='https://fonts.googleapis.com/css2?family=Libre+Franklin:wght@400;700&display=swap' rel='stylesheet'>")
+    gr.Markdown("# MedVersa")
+    with gr.Row():
+        with gr.Column():
+            image_input = gr.File(label="Upload Images", file_count="multiple", file_types=["image", "numpy"])
+            # task_input = gr.Dropdown(choices=["report generation", "vqa", "localization", "classification"], label="Task")
+            context_input = gr.Textbox(label="Context", placeholder="Enter context here...", lines=3, visible=True)
+            modality_input = gr.Dropdown(choices=["cxr", "derm", "ct"], label="Modality")
+            prompt_input = gr.Textbox(label="Prompt", placeholder="Enter prompt here... (images should be referred as <img0>, <img1>, ...)", lines=3)
+            submit_button = gr.Button("Generate Predictions")
+            with gr.Accordion("Advanced Settings", open=False):
+                num_beams = gr.Slider(label="Number of Beams", minimum=1, maximum=10, step=1, value=1)
+                do_sample = gr.Checkbox(label="Do Sample", value=True)
+                min_length = gr.Slider(label="Minimum Length", minimum=1, maximum=100, step=1, value=1)
+                top_p = gr.Slider(label="Top P", minimum=0.1, maximum=1.0, step=0.1, value=0.9)
+                repetition_penalty = gr.Slider(label="Repetition Penalty", minimum=1.0, maximum=2.0, step=0.1, value=1.0)
+                length_penalty = gr.Slider(label="Length Penalty", minimum=1.0, maximum=2.0, step=0.1, value=1.0)
+                temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, step=0.1, value=0.1)
+        with gr.Column():
+            # output_text = gr.Textbox(label="Generated Text", lines=10, elem_classes="output-textbox")
+            chatbot = gr.Chatbot(label="Chatbox")
+            slider = gr.Slider(minimum=0, maximum=64, value=1, step=1)
+            output_image = gr.AnnotatedImage(height=448, label="Images")
+    # task_input.change(
+    #     fn=update_context_visibility,
+    #     inputs=task_input,
+    #     outputs=context_input
+    # )
+    submit_button.click(
+        fn=gradio_interface,
+        inputs=[chatbot, image_input, context_input, prompt_input, modality_input, num_beams, do_sample, min_length, top_p, repetition_penalty, length_penalty, temperature],
+        outputs=[chatbot, output_image, slider]
+    )
+    slider.change(
+        render,
+        inputs=[slider],
+        outputs=[output_image],
+    )
+    examples = [
+        [
+            ["./demo_ex/c536f749-2326f755-6a65f28f-469affd2-26392ce9.png"],
+            "Age:30-40.\nGender:F.\nIndication: ___-year-old female with end-stage renal disease not on dialysis presents with dyspnea.  PICC line placement.\nComparison: None.",
+            "How would you characterize the findings from <img0>?",
+            "cxr",
+        ],
+        [
+            ["./demo_ex/79eee504-b1b60ab8-5e8dd843-b6ed87aa-670747b1.png"],
+            "Age:70-80.\nGender:F.\nIndication: Respiratory distress.\nComparison: None.",
+            "How would you characterize the findings from <img0>?",
+            "cxr",
+        ],
+        [
+            ["./demo_ex/f39b05b1-f544e51a-cfe317ca-b66a4aa6-1c1dc22d.png", "./demo_ex/f3fefc29-68544ac8-284b820d-858b5470-f579b982.png"],
+            "Age:80-90.\nGender:F.\nIndication: ___-year-old female with history of chest pain.\nComparison: None.",
+            "How would you characterize the findings from <img0><img1>?",
+            "cxr",
+        ],
+        [
+            ["./demo_ex/1de015eb-891f1b02-f90be378-d6af1e86-df3270c2.png"],
+            "Age:40-50.\nGender:M.\nIndication: ___-year-old male with shortness of breath.\nComparison: None.",
+            "How would you characterize the findings from <img0>?",
+            "cxr",
+        ],
+        [
+            ["./demo_ex/bc25fa99-0d3766cc-7704edb7-5c7a4a63-dc65480a.png"],
+            "Age:40-50.\nGender:F.\nIndication: History: ___F with tachyacrdia cough doe  // infilatrate\nComparison: None.",
+            "How would you characterize the findings from <img0>?",
+            "cxr",
+        ],
+        [
+            ["./demo_ex/ISIC_0032258.jpg"],
+            "Age:70.\nGender:female.\nLocation:back.",
+            "What is primary diagnosis?",
+            "derm",
+        ],
+        [
+            ["./demo_ex/Case_01013_0000.nii.gz"],
+            "",
+            "Segment the liver.",
+            "ct",
+        ],
+        [
+            ["./demo_ex/Case_00840_0000.nii.gz"],
+            "",
+            "Segment the liver.",
+            "ct",
+        ],
+    ]
+    gr.Examples(examples, inputs=[image_input, context_input, prompt_input, modality_input])
+# Run Gradio app
+demo.launch(share=True)

demo_ex/1de015eb-891f1b02-f90be378-d6af1e86-df3270c2.png ADDED Viewed

demo_ex/79eee504-b1b60ab8-5e8dd843-b6ed87aa-670747b1.png ADDED Viewed

demo_ex/Case_00840_0000.nii.gz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:27d91a51f4f792740aab30da1416e2a200f637a53e9aa842cf47f2dd96519216
+size 30618190

demo_ex/Case_01013_0000.nii.gz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:63f597a81e594aa0b5d5b67551658f4a8be831ac6189f2f3f644b0a1098fbb09
+size 30845920

demo_ex/ISIC_0032258.jpg ADDED Viewed

demo_ex/ISIC_0033730.jpg ADDED Viewed

demo_ex/bc25fa99-0d3766cc-7704edb7-5c7a4a63-dc65480a.png ADDED Viewed

demo_ex/c536f749-2326f755-6a65f28f-469affd2-26392ce9.png ADDED Viewed

demo_ex/f39b05b1-f544e51a-cfe317ca-b66a4aa6-1c1dc22d.png ADDED Viewed

demo_ex/f3fefc29-68544ac8-284b820d-858b5470-f579b982.png ADDED Viewed

environment.yml ADDED Viewed

	@@ -0,0 +1,479 @@

+name: medversa
+channels:
+  - pytorch
+  - nvidia
+  - conda-forge
+  - anaconda
+  - defaults
+dependencies:
+  - _libgcc_mutex=0.1=main
+  - _openmp_mutex=5.1=1_gnu
+  - abseil-cpp=20211102.0=h27087fc_1
+  - aiosignal=1.3.1=pyhd8ed1ab_0
+  - arrow-cpp=11.0.0=py39h613000e_0
+  - asttokens=2.2.1=pyhd8ed1ab_0
+  - async-timeout=4.0.2=pyhd8ed1ab_0
+  - atk-1.0=2.36.0=ha1a6a79_0
+  - aws-c-common=0.4.57=he6710b0_1
+  - aws-c-event-stream=0.1.6=h2531618_5
+  - aws-checksums=0.1.9=he6710b0_0
+  - aws-sdk-cpp=1.8.185=hce553d0_0
+  - blas=1.0=mkl
+  - boost-cpp=1.70.0=ha2d47e9_1
+  - bottleneck=1.3.5=py39h7deecbd_0
+  - brotlipy=0.7.0=py39h27cfd23_1003
+  - bzip2=1.0.8=h7b6447c_0
+  - c-ares=1.18.1=h7f8727e_0
+  - ca-certificates=2023.12.12=h06a4308_0
+  - cairo=1.16.0=hb05425b_5
+  - certifi=2023.11.17=py39h06a4308_0
+  - cffi=1.15.1=py39h5eee18b_3
+  - colorama=0.4.6=pyhd8ed1ab_0
+  - cryptography=41.0.2=py39h774aba0_0
+  - cuda-cudart=11.7.99=0
+  - cuda-cupti=11.7.101=0
+  - cuda-libraries=11.7.1=0
+  - cuda-nvrtc=11.7.99=0
+  - cuda-nvtx=11.7.91=0
+  - cuda-runtime=11.7.1=0
+  - cudatoolkit=11.7.0=hd8887f6_10
+  - curl=7.87.0=h5eee18b_0
+  - dataclasses=0.8=pyhc8e2a94_3
+  - datasets=2.14.3=pyhd8ed1ab_0
+  - dill=0.3.7=pyhd8ed1ab_0
+  - executing=1.2.0=pyhd8ed1ab_0
+  - expat=2.4.9=h6a678d5_0
+  - ffmpeg=4.3=hf484d3e_0
+  - filelock=3.9.0=py39h06a4308_0
+  - flit-core=3.6.0=pyhd3eb1b0_0
+  - font-ttf-dejavu-sans-mono=2.37=hd3eb1b0_0
+  - font-ttf-inconsolata=2.001=hcb22688_0
+  - font-ttf-source-code-pro=2.030=hd3eb1b0_0
+  - font-ttf-ubuntu=0.83=h8b1ccd4_0
+  - fontconfig=2.14.1=h52c9d5c_1
+  - fonts-anaconda=1=h8fa9717_0
+  - fonts-conda-ecosystem=1=hd3eb1b0_0
+  - freetype=2.12.1=h4a9f257_0
+  - fribidi=1.0.10=h7b6447c_0
+  - frozenlist=1.3.3=py39h5eee18b_0
+  - gdbm=1.18=hd4cb3f1_4
+  - gdk-pixbuf=2.42.10=h5eee18b_0
+  - gettext=0.21.0=hf68c758_0
+  - gflags=2.2.2=he1b5a44_1004
+  - giflib=5.2.1=h5eee18b_3
+  - git=2.34.1=pl5262hc120c5b_0
+  - glib=2.69.1=he621ea3_2
+  - glog=0.5.0=h48cff8f_0
+  - gmp=6.2.1=h295c915_3
+  - gmpy2=2.1.2=py39heeb90bb_0
+  - gnutls=3.6.15=he1e5248_0
+  - gobject-introspection=1.72.0=py39hbb6d50b_2
+  - graphite2=1.3.14=h295c915_1
+  - graphviz=2.50.0=h1b29801_1
+  - grpc-cpp=1.46.1=h33aed49_1
+  - gtk2=2.24.33=h73c1081_2
+  - gts=0.7.6=hb67d8dd_3
+  - harfbuzz=4.3.0=hf52aaf7_1
+  - huggingface_hub=0.16.4=pyhd8ed1ab_0
+  - icu=58.2=he6710b0_3
+  - idna=3.4=py39h06a4308_0
+  - importlib_metadata=6.8.0=hd8ed1ab_0
+  - intel-openmp=2023.1.0=hdb19cb5_46305
+  - jinja2=3.1.2=py39h06a4308_0
+  - jpeg=9e=h5eee18b_1
+  - krb5=1.19.4=h568e23c_0
+  - lame=3.100=h7b6447c_0
+  - lcms2=2.12=h3be6417_0
+  - ld_impl_linux-64=2.38=h1181459_1
+  - lerc=3.0=h295c915_0
+  - libbrotlicommon=1.0.9=h166bdaf_7
+  - libbrotlidec=1.0.9=h166bdaf_7
+  - libbrotlienc=1.0.9=h166bdaf_7
+  - libcublas=11.10.3.66=0
+  - libcufft=10.7.2.124=h4fbf590_0
+  - libcufile=1.7.1.12=0
+  - libcurand=10.3.3.129=0
+  - libcurl=7.87.0=h91b91d3_0
+  - libcusolver=11.4.0.1=0
+  - libcusparse=11.7.4.91=0
+  - libdeflate=1.17=h5eee18b_0
+  - libedit=3.1.20221030=h5eee18b_0
+  - libev=4.33=h7f8727e_1
+  - libevent=2.1.10=h9b69904_4
+  - libffi=3.4.2=h6a678d5_6
+  - libgcc=7.2.0=h69d50b8_2
+  - libgcc-ng=11.2.0=h1234567_1
+  - libgd=2.3.3=h6a678d5_3
+  - libgomp=11.2.0=h1234567_1
+  - libiconv=1.16=h7f8727e_2
+  - libidn2=2.3.4=h5eee18b_0
+  - libnghttp2=1.46.0=hce63b2e_0
+  - libnpp=11.7.4.75=0
+  - libnvjpeg=11.8.0.2=0
+  - libpng=1.6.39=h5eee18b_0
+  - libprotobuf=3.20.3=he621ea3_0
+  - librsvg=2.54.4=h36cc946_2
+  - libssh2=1.10.0=h8f2d780_0
+  - libstdcxx-ng=11.2.0=h1234567_1
+  - libtasn1=4.19.0=h5eee18b_0
+  - libthrift=0.15.0=he6d91bd_0
+  - libtiff=4.5.0=h6a678d5_2
+  - libtool=2.4.6=h6a678d5_1009
+  - libunistring=0.9.10=h27cfd23_0
+  - libuuid=1.41.5=h5eee18b_0
+  - libwebp=1.2.4=h11a3e52_1
+  - libwebp-base=1.2.4=h5eee18b_1
+  - libxcb=1.15=h7f8727e_0
+  - libxml2=2.9.14=h74e7548_0
+  - lz4-c=1.9.4=h6a678d5_0
+  - mkl=2023.1.0=h6d00ec8_46342
+  - mkl-service=2.4.0=py39h5eee18b_1
+  - mkl_fft=1.3.6=py39h417a72b_1
+  - mkl_random=1.2.2=py39h417a72b_1
+  - mpc=1.1.0=h10f8cd9_1
+  - mpfr=4.0.2=hb69a4c5_1
+  - mpmath=1.3.0=py39h06a4308_0
+  - ncurses=6.4=h6a678d5_0
+  - nettle=3.7.3=hbbd107a_1
+  - networkx=3.1=py39h06a4308_0
+  - ninja-base=1.10.2=hd09550d_5
+  - numexpr=2.8.4=py39hc78ab66_1
+  - numpy-base=1.25.0=py39hb5e798b_0
+  - openh264=2.1.1=h4ff587b_0
+  - openjpeg=2.4.0=h3ad879b_0
+  - openssl=1.1.1w=h7f8727e_0
+  - orc=1.7.4=hb3bc3d3_1
+  - pango=1.50.7=h05da053_0
+  - pcre=8.45=h295c915_0
+  - pcre2=10.37=he7ceb23_1
+  - perl=5.34.0=h5eee18b_2
+  - pip=23.0.1=py39h06a4308_0
+  - pixman=0.40.0=h7f8727e_1
+  - poppler=0.81.0=h01f5e8b_2
+  - poppler-data=0.4.11=h06a4308_1
+  - pycparser=2.21=pyhd3eb1b0_0
+  - pyopenssl=23.2.0=py39h06a4308_0
+  - pysocks=1.7.1=py39h06a4308_0
+  - python=3.9.16=h7a1cb2a_2
+  - python-dateutil=2.8.2=pyhd8ed1ab_0
+  - python-devtools=0.11.0=pyhd8ed1ab_0
+  - python-graphviz=0.20.1=py39h06a4308_0
+  - python-xxhash=2.0.2=py39h5eee18b_1
+  - python_abi=3.9=2_cp39
+  - pytorch=2.0.1=py3.9_cuda11.7_cudnn8.5.0_0
+  - pytorch-cuda=11.7=h778d358_5
+  - pytorch-mutex=1.0=cuda
+  - pytz=2023.3=pyhd8ed1ab_0
+  - pyyaml=6.0=py39hb9d737c_4
+  - re2=2022.04.01=h27087fc_0
+  - readline=8.2=h5eee18b_0
+  - sacremoses=0.0.53=pyhd8ed1ab_0
+  - setuptools=66.0.0=py39h06a4308_0
+  - six=1.16.0=pyh6c4a22f_0
+  - snappy=1.1.9=h295c915_0
+  - sqlite=3.41.2=h5eee18b_0
+  - sympy=1.11.1=py39h06a4308_0
+  - tbb=2021.8.0=hdb19cb5_0
+  - tk=8.6.12=h1ccaba5_0
+  - tmux=3.2a=h385fc29_0
+  - tokenizers=0.13.2=py39he7d60b5_1
+  - torchtriton=2.0.0=py39
+  - transformers=4.28.1=pyhd8ed1ab_0
+  - typing_extensions=4.4.0=py39h06a4308_0
+  - utf8proc=2.6.1=h27cfd23_0
+  - wheel=0.38.4=py39h06a4308_0
+  - xz=5.2.10=h5eee18b_1
+  - yaml=0.2.5=h7f98852_2
+  - zlib=1.2.13=h5eee18b_0
+  - zstd=1.5.5=hc292b87_0
+  - pip:
+      - absl-py==2.0.0
+      - accelerate==0.16.0
+      - aiofiles==23.1.0
+      - aiohttp==3.8.4
+      - albumentations==1.3.1
+      - altair==4.2.2
+      - antlr4-python3-runtime==4.9.3
+      - anyio==3.6.2
+      - appdirs==1.4.4
+      - apptools==5.2.1
+      - argon2-cffi==21.3.0
+      - argon2-cffi-bindings==21.2.0
+      - argparse==1.4.0
+      - arrow==1.2.3
+      - attrs==22.2.0
+      - backcall==0.2.0
+      - batchgenerators==0.25
+      - beautifulsoup4==4.12.2
+      - bitsandbytes==0.37.0
+      - bitsandbytes-cuda117==0.26.0.post2
+      - bleach==6.0.0
+      - blis==0.7.9
+      - braceexpand==0.1.7
+      - brotli==1.1.0
+      - cachetools==5.3.1
+      - catalogue==2.0.8
+      - cchardet==2.1.7
+      - chardet==5.1.0
+      - charset-normalizer==3.1.0
+      - click==8.1.3
+      - cmake==3.26.3
+      - comm==0.1.3
+      - commonmark==0.9.1
+      - conda-pack==0.6.0
+      - confection==0.0.4
+      - configobj==5.0.8
+      - conllu==4.5.3
+      - contourpy==1.0.7
+      - cpufeature==0.2.1
+      - cycler==0.11.0
+      - cymem==2.0.7
+      - debugpy==1.6.7
+      - decorator==5.1.1
+      - decord==0.6.0
+      - defusedxml==0.7.1
+      - deprecated==1.2.14
+      - docker-pycreds==0.4.0
+      - efficientnet-pytorch==0.7.1
+      - einops==0.6.1
+      - einops-exts==0.0.4
+      - entrypoints==0.4
+      - envisage==7.0.3
+      - et-xmlfile==1.1.0
+      - exceptiongroup==1.2.0
+      - fairscale==0.4.13
+      - fastapi==0.95.1
+      - fastjsonschema==2.16.3
+      - ffmpy==0.3.0
+      - fonttools==4.38.0
+      - fqdn==1.5.1
+      - fschat==0.1.10
+      - fsspec==2023.4.0
+      - ftfy==6.1.1
+      - future==0.18.3
+      - gitdb==4.0.10
+      - gitpython==3.1.31
+      - google-auth==2.23.3
+      - google-auth-oauthlib==1.0.0
+      - gradio==3.23.0
+      - gradio-client==0.0.8
+      - grpcio==1.59.0
+      - h11==0.14.0
+      - h5py==3.9.0
+      - hjson==3.1.0
+      - httpcore==0.17.0
+      - httpx==0.24.0
+      - humanize==4.8.0
+      - hyperlink==21.0.0
+      - imageio==2.33.0
+      - importlib-metadata==6.6.0
+      - importlib-resources==5.12.0
+      - inflate64==1.0.0
+      - iniconfig==2.0.0
+      - iopath==0.1.10
+      - ipdb==0.13.13
+      - ipykernel==6.22.0
+      - ipython==8.12.0
+      - ipython-genutils==0.2.0
+      - isoduration==20.11.0
+      - jedi==0.18.2
+      - joblib==1.2.0
+      - jsonpointer==2.3
+      - jsonschema==4.17.3
+      - jupyter-client==8.2.0
+      - jupyter-core==5.3.0
+      - jupyter-events==0.6.3
+      - jupyter-server==2.5.0
+      - jupyter-server-terminals==0.4.4
+      - jupyterlab-pygments==0.2.2
+      - kiwisolver==1.4.4
+      - langcodes==3.3.0
+      - lazy-loader==0.3
+      - linecache2==1.0.0
+      - linkify-it-py==2.0.0
+      - lit==16.0.2
+      - llvmlite==0.39.1
+      - markdown==3.5
+      - markdown-it-py==2.2.0
+      - markdown2==2.4.8
+      - markupsafe==2.1.2
+      - matplotlib==3.7.0
+      - matplotlib-inline==0.1.6
+      - mdit-py-plugins==0.3.3
+      - mdurl==0.1.2
+      - mistune==2.0.5
+      - multidict==6.0.4
+      - multiprocess==0.70.15
+      - multivolumefile==0.2.3
+      - munch==4.0.0
+      - murmurhash==1.0.9
+      - mypy-extensions==1.0.0
+      - nbclassic==0.5.6
+      - nbclient==0.7.4
+      - nbconvert==7.3.1
+      - nbformat==5.8.0
+      - nest-asyncio==1.5.6
+      - nh3==0.2.11
+      - nibabel==5.1.0
+      - ninja==1.11.1
+      - nltk==3.8.1
+      - nmslib==2.1.1
+      - notebook==6.5.4
+      - notebook-shim==0.2.3
+      - numba==0.56.4
+      - numpy==1.23.5
+      - nvidia-cublas-cu11==11.10.3.66
+      - nvidia-cuda-cupti-cu11==11.7.101
+      - nvidia-cuda-nvrtc-cu11==11.7.99
+      - nvidia-cuda-runtime-cu11==11.7.99
+      - nvidia-cudnn-cu11==8.5.0.96
+      - nvidia-cufft-cu11==10.9.0.58
+      - nvidia-curand-cu11==10.2.10.91
+      - nvidia-cusolver-cu11==11.4.0.1
+      - nvidia-cusparse-cu11==11.7.4.91
+      - nvidia-nccl-cu11==2.14.3
+      - nvidia-nvtx-cu11==11.7.91
+      - oauthlib==3.2.2
+      - omegaconf==2.3.0
+      - open-clip-torch==2.20.0
+      - openai==0.27.0
+      - opencv-python==4.7.0.72
+      - opencv-python-headless==4.8.0.74
+      - openpyxl==3.1.2
+      - orjson==3.8.11
+      - packaging==23.0
+      - pandas==2.0.1
+      - pandocfilters==1.5.0
+      - parso==0.8.3
+      - pathtools==0.1.2
+      - pathy==0.10.1
+      - peft==0.2.0
+      - pexpect==4.8.0
+      - pickleshare==0.7.5
+      - pillow==9.5.0
+      - platformdirs==3.5.0
+      - pluggy==1.3.0
+      - portalocker==2.7.0
+      - preshed==3.0.8
+      - pretrainedmodels==0.7.4
+      - prometheus-client==0.16.0
+      - prompt-toolkit==3.0.38
+      - protobuf==3.20.3
+      - psutil==5.9.4
+      - ptyprocess==0.7.0
+      - pure-eval==0.2.2
+      - py-cpuinfo==9.0.0
+      - py-rsync==0.0.1a0.dev0
+      - py7zr==0.20.8
+      - pyarrow==11.0.0
+      - pyasn1==0.5.0
+      - pyasn1-modules==0.3.0
+      - pybcj==1.0.2
+      - pybind11==2.6.1
+      - pycocoevalcap==1.2
+      - pycocotools==2.0.6
+      - pycryptodomex==3.19.1
+      - pydantic==1.10.7
+      - pydub==0.25.1
+      - pyface==8.0.0
+      - pygments==2.15.1
+      - pynndescent==0.5.10
+      - pyparsing==3.0.9
+      - pyppmd==1.1.0
+      - pyqt5==5.15.10
+      - pyqt5-qt5==5.15.2
+      - pyqt5-sip==12.13.0
+      - pyrsistent==0.19.3
+      - pysbd==0.3.4
+      - pytest==7.4.3
+      - python-json-logger==2.0.7
+      - python-multipart==0.0.6
+      - python-polylabel==0.6
+      - python-rsync==0.1.0
+      - pyzmq==25.0.2
+      - pyzstd==0.15.9
+      - qudida==0.0.4
+      - regex==2022.10.31
+      - requests==2.29.0
+      - requests-oauthlib==1.3.1
+      - rfc3339-validator==0.1.4
+      - rfc3986-validator==0.1.1
+      - rich==12.6.0
+      - rsa==4.9
+      - safetensors==0.3.1
+      - scikit-image==0.22.0
+      - scipy==1.10.1
+      - scispacy==0.5.2
+      - segmentation-models-pytorch==0.3.3
+      - semantic-version==2.10.0
+      - send2trash==1.8.2
+      - sentence-transformers==2.2.2
+      - sentencepiece==0.1.98
+      - sentry-sdk==1.21.0
+      - setproctitle==1.3.2
+      - shapely==2.0.2
+      - shellingham==1.5.4
+      - shortuuid==1.0.11
+      - simpleitk==2.2.1
+      - smart-open==6.3.0
+      - smmap==5.0.0
+      - sniffio==1.3.0
+      - soupsieve==2.4.1
+      - spacy==3.4.4
+      - spacy-legacy==3.0.12
+      - spacy-loggers==1.0.4
+      - srsly==2.4.6
+      - stack-data==0.6.2
+      - starlette==0.26.1
+      - surface-distance-based-measures==0.1
+      - svgwrite==1.4.3
+      - swig==4.1.1
+      - tenacity==8.2.2
+      - tensorboard==2.14.1
+      - tensorboard-data-server==0.7.1
+      - terminado==0.17.1
+      - texttable==1.7.0
+      - thinc==8.1.9
+      - threadpoolctl==3.1.0
+      - tifffile==2023.9.26
+      - timm==0.9.2
+      - tinycss2==1.2.1
+      - tomli==2.0.1
+      - toolz==0.12.0
+      - torchio==0.19.2
+      - torchvision==0.15.2
+      - tornado==6.3.1
+      - tqdm==4.64.1
+      - traceback2==1.4.0
+      - traitlets==5.9.0
+      - traits==6.4.3
+      - traitsui==8.0.0
+      - triton==2.0.0
+      - typer==0.7.0
+      - typing-extensions==4.5.0
+      - typing-inspect==0.8.0
+      - tzdata==2023.3
+      - uc-micro-py==1.0.1
+      - umap-learn==0.5.3
+      - unittest2==1.1.0
+      - unzip==1.0.0
+      - uri-template==1.2.0
+      - urllib3==1.26.15
+      - uvicorn==0.22.0
+      - vtk==9.3.0
+      - wandb==0.15.0
+      - wasabi==0.10.1
+      - wavedrom==2.0.3.post3
+      - wcwidth==0.2.6
+      - webcolors==1.13
+      - webdataset==0.2.48
+      - webencodings==0.5.1
+      - websocket-client==1.5.1
+      - websockets==11.0.2
+      - werkzeug==3.0.0
+      - wrapt==1.16.0
+      - xxhash==3.3.0
+      - yarl==1.8.2
+      - zipp==3.14.0
+prefix: /home/zhouhy/anaconda3/envs/medversa

inference.py ADDED Viewed

	@@ -0,0 +1,107 @@

+from utils import *
+# ---  Launch Model ---
+device = 'cuda:0'
+model_cls = registry.get_model_class('medomni') # medomni is the architecture name :)
+model = model_cls.from_pretrained('hyzhou/MedVersa').to(device)
+model.eval()
+# --- Define examples ---
+examples = [
+    [
+        ["./demo_ex/c536f749-2326f755-6a65f28f-469affd2-26392ce9.png"],
+        "Age:30-40.\nGender:F.\nIndication: ___-year-old female with end-stage renal disease not on dialysis presents with dyspnea.  PICC line placement.\nComparison: None.",
+        "How would you characterize the findings from <img0>?",
+        "cxr",
+        "report generation",
+    ],
+    [
+        ["./demo_ex/79eee504-b1b60ab8-5e8dd843-b6ed87aa-670747b1.png"],
+        "Age:70-80.\nGender:F.\nIndication: Respiratory distress.\nComparison: None.",
+        "How would you characterize the findings from <img0>?",
+        "cxr",
+        "report generation",
+    ],
+    [
+        ["./demo_ex/f39b05b1-f544e51a-cfe317ca-b66a4aa6-1c1dc22d.png", "./demo_ex/f3fefc29-68544ac8-284b820d-858b5470-f579b982.png"],
+        "Age:80-90.\nGender:F.\nIndication: ___-year-old female with history of chest pain.\nComparison: None.",
+        "How would you characterize the findings from <img0><img1>?",
+        "cxr",
+        "report generation",
+    ],
+    [
+        ["./demo_ex/1de015eb-891f1b02-f90be378-d6af1e86-df3270c2.png"],
+        "Age:40-50.\nGender:M.\nIndication: ___-year-old male with shortness of breath.\nComparison: None.",
+        "How would you characterize the findings from <img0>?",
+        "cxr",
+        "report generation",
+    ],
+    [
+        ["./demo_ex/bc25fa99-0d3766cc-7704edb7-5c7a4a63-dc65480a.png"],
+        "Age:40-50.\nGender:F.\nIndication: History: ___F with tachyacrdia cough doe  // infilatrate\nComparison: None.",
+        "How would you characterize the findings from <img0>?",
+        "cxr",
+        "report generation",
+    ],
+    [
+        ["./demo_ex/ISIC_0032258.jpg"],
+        "Age:70.\nGender:female.\nLocation:back.",
+        "What is primary diagnosis?",
+        "derm",
+        "classification",
+    ],
+    [
+        ["./demo_ex/ISIC_0032258.jpg"],
+        "Age:70.\nGender:female.\nLocation:back.",
+        "Segment the lesion.",
+        "derm",
+        "segmentation",
+    ],
+    [
+        ["./demo_ex/Case_01013_0000.nii.gz"],
+        "",
+        "Segment the liver.",
+        "ct",
+        "segmentation",
+    ],
+    [
+        ["./demo_ex/Case_00840_0000.nii.gz"],
+        "",
+        "Segment the liver.",
+        "ct",
+        "segmentation",
+    ],
+]
+# --- Define hyperparams ---
+num_beams = 1
+do_sample = True
+min_length = 1
+top_p = 0.9
+repetition_penalty = 1
+length_penalty = 1
+temperature = 0.1
+# --- Generate a report for an chest X-ray image ---
+index = 0
+demo_ex = examples[index]
+images, context, prompt, modality, task = demo_ex[0], demo_ex[1], demo_ex[2], demo_ex[3], demo_ex[4]
+seg_mask_2d, seg_mask_3d, output_text = generate_predictions(model, images, context, prompt, modality, task, num_beams, do_sample, min_length, top_p, repetition_penalty, length_penalty, temperature)
+print(output_text)
+# --- Segment the lesion in the dermatology image ---
+index = 6
+demo_ex = examples[index]
+images, context, prompt, modality, task = demo_ex[0], demo_ex[1], demo_ex[2], demo_ex[3], demo_ex[4]
+seg_mask_2d, seg_mask_3d, output_text = generate_predictions(model, images, context, prompt, modality, task, num_beams, do_sample, min_length, top_p, repetition_penalty, length_penalty, temperature)
+print(output_text)
+print(seg_mask_2d[0].shape) # H, W
+# --- Segment the liver in the abdomen CT scan ---
+index = -2
+demo_ex = examples[index]
+images, context, prompt, modality, task = demo_ex[0], demo_ex[1], demo_ex[2], demo_ex[3], demo_ex[4]
+seg_mask_2d, seg_mask_3d, output_text = generate_predictions(model, images, context, prompt, modality, task, num_beams, do_sample, min_length, top_p, repetition_penalty, length_penalty, temperature)
+print(output_text)
+print(len(seg_mask_3d)) # Number of slices
+print(seg_mask_3d[0].shape) # H, W

medomni/__init__.py ADDED Viewed

	@@ -0,0 +1,31 @@

+"""
+ Copyright (c) 2022, salesforce.com, inc.
+ All rights reserved.
+ SPDX-License-Identifier: BSD-3-Clause
+ For full license text, see the LICENSE_Lavis file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+"""
+import os
+import sys
+from omegaconf import OmegaConf
+from medomni.common.registry import registry
+from medomni.datasets.builders import *
+from medomni.models import *
+from medomni.processors import *
+from medomni.tasks import *
+root_dir = os.path.dirname(os.path.abspath(__file__))
+default_cfg = OmegaConf.load(os.path.join(root_dir, "configs/default.yaml"))
+registry.register_path("library_root", root_dir)
+repo_root = os.path.join(root_dir, "..")
+registry.register_path("repo_root", repo_root)
+cache_root = os.path.join(repo_root, default_cfg.env.cache_root)
+registry.register_path("cache_root", cache_root)
+registry.register("MAX_INT", sys.maxsize)
+registry.register("SPLIT_NAMES", ["train", "val", "test"])

medomni/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (1.69 kB). View file

medomni/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (1.01 kB). View file

medomni/common/__init__.py ADDED Viewed

File without changes

medomni/common/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (147 Bytes). View file

medomni/common/__pycache__/config.cpython-39.pyc ADDED Viewed

Binary file (12.1 kB). View file

medomni/common/__pycache__/dist_utils.cpython-39.pyc ADDED Viewed

Binary file (3.77 kB). View file

medomni/common/__pycache__/logger.cpython-39.pyc ADDED Viewed

Binary file (6.46 kB). View file

medomni/common/__pycache__/optims.cpython-39.pyc ADDED Viewed

Binary file (2.99 kB). View file

medomni/common/__pycache__/registry.cpython-39.pyc ADDED Viewed

Binary file (8.99 kB). View file

medomni/common/__pycache__/utils.cpython-39.pyc ADDED Viewed

Binary file (12.6 kB). View file

medomni/common/config.py ADDED Viewed

	@@ -0,0 +1,468 @@

+"""
+ Copyright (c) 2022, salesforce.com, inc.
+ All rights reserved.
+ SPDX-License-Identifier: BSD-3-Clause
+ For full license text, see the LICENSE_Lavis file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+"""
+import logging
+import json
+from typing import Dict
+from omegaconf import OmegaConf
+from medomni.common.registry import registry
+import ipdb
+class Config:
+    def __init__(self, args):
+        self.config = {}
+        self.args = args
+        # Register the config and configuration for setup
+        registry.register("configuration", self)
+        user_config = self._build_opt_list(self.args.options)
+        config = OmegaConf.load(self.args.cfg_path)
+        runner_config = self.build_runner_config(config)
+        model_config = self.build_model_config(config, **user_config)
+        dataset_config = self.build_dataset_config(config)
+        # Validate the user-provided runner configuration
+        # model and dataset configuration are supposed to be validated by the respective classes
+        # [TODO] validate the model/dataset configuration
+        # self._validate_runner_config(runner_config)
+        # Override the default configuration with user options.
+        self.config = OmegaConf.merge(
+            runner_config, model_config, dataset_config, user_config
+        )
+    def _validate_runner_config(self, runner_config):
+        """
+        This method validates the configuration, such that
+            1) all the user specified options are valid;
+            2) no type mismatches between the user specified options and the config.
+        """
+        runner_config_validator = create_runner_config_validator()
+        runner_config_validator.validate(runner_config)
+    def _build_opt_list(self, opts):
+        opts_dot_list = self._convert_to_dot_list(opts)
+        return OmegaConf.from_dotlist(opts_dot_list)
+    @staticmethod
+    def build_model_config(config, **kwargs):
+        model = config.get("model", None)
+        assert model is not None, "Missing model configuration file."
+        model_cls = registry.get_model_class(model.arch)
+        assert model_cls is not None, f"Model '{model.arch}' has not been registered."
+        model_type = kwargs.get("model.model_type", None)
+        if not model_type:
+            model_type = model.get("model_type", None)
+        # else use the model type selected by user.
+        assert model_type is not None, "Missing model_type."
+        model_config_path = model_cls.default_config_path(model_type=model_type)
+        model_config = OmegaConf.create()
+        # hierarchy override, customized config > default config
+        model_config = OmegaConf.merge(
+            model_config,
+            OmegaConf.load(model_config_path),
+            {"model": config["model"]},
+        )
+        return model_config
+    @staticmethod
+    def build_runner_config(config):
+        return {"run": config.run}
+    @staticmethod
+    def build_dataset_config(config):
+        datasets = config.get("datasets", None)
+        if datasets is None:
+            raise KeyError(
+                "Expecting 'datasets' as the root key for dataset configuration."
+            )
+        dataset_config = OmegaConf.create()
+        for dataset_name in datasets:
+            builder_cls = registry.get_builder_class(dataset_name)
+            dataset_config_type = datasets[dataset_name].get("type", "default")
+            dataset_config_path = builder_cls.default_config_path(
+                type=dataset_config_type
+            )
+            # hierarchy override, customized config > default config
+            dataset_config = OmegaConf.merge(
+                dataset_config,
+                OmegaConf.load(dataset_config_path),
+                {"datasets": {dataset_name: config["datasets"][dataset_name]}},
+            )
+        return dataset_config
+    def _convert_to_dot_list(self, opts):
+        if opts is None:
+            opts = []
+        if len(opts) == 0:
+            return opts
+        has_equal = opts[0].find("=") != -1
+        if has_equal:
+            return opts
+        return [(opt + "=" + value) for opt, value in zip(opts[0::2], opts[1::2])]
+    def get_config(self):
+        return self.config
+    @property
+    def run_cfg(self):
+        return self.config.run
+    @property
+    def datasets_cfg(self):
+        return self.config.datasets
+    @property
+    def model_cfg(self):
+        return self.config.model
+    def pretty_print(self):
+        logging.info("\n=====  Running Parameters    =====")
+        logging.info(self._convert_node_to_json(self.config.run))
+        logging.info("\n======  Dataset Attributes  ======")
+        datasets = self.config.datasets
+        for dataset in datasets:
+            if dataset in self.config.datasets:
+                logging.info(f"\n======== {dataset} =======")
+                dataset_config = self.config.datasets[dataset]
+                logging.info(self._convert_node_to_json(dataset_config))
+            else:
+                logging.warning(f"No dataset named '{dataset}' in config. Skipping")
+        logging.info(f"\n======  Model Attributes  ======")
+        logging.info(self._convert_node_to_json(self.config.model))
+    def _convert_node_to_json(self, node):
+        container = OmegaConf.to_container(node, resolve=True)
+        return json.dumps(container, indent=4, sort_keys=True)
+    def to_dict(self):
+        return OmegaConf.to_container(self.config)
+def node_to_dict(node):
+    return OmegaConf.to_container(node)
+class ConfigValidator:
+    """
+    This is a preliminary implementation to centralize and validate the configuration.
+    May be altered in the future.
+    A helper class to validate configurations from yaml file.
+    This serves the following purposes:
+        1. Ensure all the options in the yaml are defined, raise error if not.
+        2. when type mismatches are found, the validator will raise an error.
+        3. a central place to store and display helpful messages for supported configurations.
+    """
+    class _Argument:
+        def __init__(self, name, choices=None, type=None, help=None):
+            self.name = name
+            self.val = None
+            self.choices = choices
+            self.type = type
+            self.help = help
+        def __str__(self):
+            s = f"{self.name}={self.val}"
+            if self.type is not None:
+                s += f", ({self.type})"
+            if self.choices is not None:
+                s += f", choices: {self.choices}"
+            if self.help is not None:
+                s += f", ({self.help})"
+            return s
+    def __init__(self, description):
+        self.description = description
+        self.arguments = dict()
+        self.parsed_args = None
+    def __getitem__(self, key):
+        assert self.parsed_args is not None, "No arguments parsed yet."
+        return self.parsed_args[key]
+    def __str__(self) -> str:
+        return self.format_help()
+    def add_argument(self, *args, **kwargs):
+        """
+        Assume the first argument is the name of the argument.
+        """
+        self.arguments[args[0]] = self._Argument(*args, **kwargs)
+    def validate(self, config=None):
+        """
+        Convert yaml config (dict-like) to list, required by argparse.
+        """
+        for k, v in config.items():
+            assert (
+                k in self.arguments
+            ), f"""{k} is not a valid argument. Support arguments are {self.format_arguments()}."""
+            if self.arguments[k].type is not None:
+                try:
+                    self.arguments[k].val = self.arguments[k].type(v)
+                except ValueError:
+                    raise ValueError(f"{k} is not a valid {self.arguments[k].type}.")
+            if self.arguments[k].choices is not None:
+                assert (
+                    v in self.arguments[k].choices
+                ), f"""{k} must be one of {self.arguments[k].choices}."""
+        return config
+    def format_arguments(self):
+        return str([f"{k}" for k in sorted(self.arguments.keys())])
+    def format_help(self):
+        # description + key-value pair string for each argument
+        help_msg = str(self.description)
+        return help_msg + ", available arguments: " + self.format_arguments()
+    def print_help(self):
+        # display help message
+        print(self.format_help())
+def create_runner_config_validator():
+    validator = ConfigValidator(description="Runner configurations")
+    validator.add_argument(
+        "runner",
+        type=str,
+        choices=["runner_base", "runner_iter"],
+        help="""Runner to use. The "runner_base" uses epoch-based training while iter-based
+            runner runs based on iters. Default: runner_base""",
+    )
+    # add argumetns for training dataset ratios
+    validator.add_argument(
+        "train_dataset_ratios",
+        type=Dict[str, float],
+        help="""Ratios of training dataset. This is used in iteration-based runner.
+        Do not support for epoch-based runner because how to define an epoch becomes tricky.
+        Default: None""",
+    )
+    validator.add_argument(
+        "max_iters",
+        type=float,
+        help="Maximum number of iterations to run.",
+    )
+    validator.add_argument(
+        "max_epoch",
+        type=int,
+        help="Maximum number of epochs to run.",
+    )
+    # add arguments for iters_per_inner_epoch
+    validator.add_argument(
+        "iters_per_inner_epoch",
+        type=float,
+        help="Number of iterations per inner epoch. This is required when runner is runner_iter.",
+    )
+    lr_scheds_choices = registry.list_lr_schedulers()
+    validator.add_argument(
+        "lr_sched",
+        type=str,
+        choices=lr_scheds_choices,
+        help="Learning rate scheduler to use, from {}".format(lr_scheds_choices),
+    )
+    task_choices = registry.list_tasks()
+    validator.add_argument(
+        "task",
+        type=str,
+        choices=task_choices,
+        help="Task to use, from {}".format(task_choices),
+    )
+    # add arguments for init_lr
+    validator.add_argument(
+        "init_lr",
+        type=float,
+        help="Initial learning rate. This will be the learning rate after warmup and before decay.",
+    )
+    # add arguments for min_lr
+    validator.add_argument(
+        "min_lr",
+        type=float,
+        help="Minimum learning rate (after decay).",
+    )
+    # add arguments for warmup_lr
+    validator.add_argument(
+        "warmup_lr",
+        type=float,
+        help="Starting learning rate for warmup.",
+    )
+    # add arguments for learning rate decay rate
+    validator.add_argument(
+        "lr_decay_rate",
+        type=float,
+        help="Learning rate decay rate. Required if using a decaying learning rate scheduler.",
+    )
+    # add arguments for weight decay
+    validator.add_argument(
+        "weight_decay",
+        type=float,
+        help="Weight decay rate.",
+    )
+    # add arguments for training batch size
+    validator.add_argument(
+        "batch_size_train",
+        type=int,
+        help="Training batch size.",
+    )
+    # add arguments for evaluation batch size
+    validator.add_argument(
+        "batch_size_eval",
+        type=int,
+        help="Evaluation batch size, including validation and testing.",
+    )
+    # add arguments for number of workers for data loading
+    validator.add_argument(
+        "num_workers",
+        help="Number of workers for data loading.",
+    )
+    # add arguments for warm up steps
+    validator.add_argument(
+        "warmup_steps",
+        type=int,
+        help="Number of warmup steps. Required if a warmup schedule is used.",
+    )
+    # add arguments for random seed
+    validator.add_argument(
+        "seed",
+        type=int,
+        help="Random seed.",
+    )
+    # add arguments for output directory
+    validator.add_argument(
+        "output_dir",
+        type=str,
+        help="Output directory to save checkpoints and logs.",
+    )
+    # add arguments for whether only use evaluation
+    validator.add_argument(
+        "evaluate",
+        help="Whether to only evaluate the model. If true, training will not be performed.",
+    )
+    # add arguments for splits used for training, e.g. ["train", "val"]
+    validator.add_argument(
+        "train_splits",
+        type=list,
+        help="Splits to use for training.",
+    )
+    # add arguments for splits used for validation, e.g. ["val"]
+    validator.add_argument(
+        "valid_splits",
+        type=list,
+        help="Splits to use for validation. If not provided, will skip the validation.",
+    )
+    # add arguments for splits used for testing, e.g. ["test"]
+    validator.add_argument(
+        "test_splits",
+        type=list,
+        help="Splits to use for testing. If not provided, will skip the testing.",
+    )
+    # add arguments for accumulating gradient for iterations
+    validator.add_argument(
+        "accum_grad_iters",
+        type=int,
+        help="Number of iterations to accumulate gradient for.",
+    )
+    # ====== distributed training ======
+    validator.add_argument(
+        "device",
+        type=str,
+        choices=["cpu", "cuda"],
+        help="Device to use. Support 'cuda' or 'cpu' as for now.",
+    )
+    validator.add_argument(
+        "world_size",
+        type=int,
+        help="Number of processes participating in the job.",
+    )
+    validator.add_argument("dist_url", type=str)
+    validator.add_argument("distributed", type=bool)
+    # add arguments to opt using distributed sampler during evaluation or not
+    validator.add_argument(
+        "use_dist_eval_sampler",
+        type=bool,
+        help="Whether to use distributed sampler during evaluation or not.",
+    )
+    # ====== task specific ======
+    # generation task specific arguments
+    # add arguments for maximal length of text output
+    validator.add_argument(
+        "max_len",
+        type=int,
+        help="Maximal length of text output.",
+    )
+    # add arguments for minimal length of text output
+    validator.add_argument(
+        "min_len",
+        type=int,
+        help="Minimal length of text output.",
+    )
+    # add arguments number of beams
+    validator.add_argument(
+        "num_beams",
+        type=int,
+        help="Number of beams used for beam search.",
+    )
+    # vqa task specific arguments
+    # add arguments for number of answer candidates
+    validator.add_argument(
+        "num_ans_candidates",
+        type=int,
+        help="""For ALBEF and BLIP, these models first rank answers according to likelihood to select answer candidates.""",
+    )
+    # add arguments for inference method
+    validator.add_argument(
+        "inference_method",
+        type=str,
+        choices=["genearte", "rank"],
+        help="""Inference method to use for question answering. If rank, requires a answer list.""",
+    )
+    # ====== model specific ======
+    validator.add_argument(
+        "k_test",
+        type=int,
+        help="Number of top k most similar samples from ITC/VTC selection to be tested.",
+    )
+    return validator

medomni/common/dist_utils.py ADDED Viewed

	@@ -0,0 +1,137 @@

+"""
+ Copyright (c) 2022, salesforce.com, inc.
+ All rights reserved.
+ SPDX-License-Identifier: BSD-3-Clause
+ For full license text, see the LICENSE_Lavis file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+"""
+import datetime
+import functools
+import os
+import torch
+import torch.distributed as dist
+import timm.models.hub as timm_hub
+def setup_for_distributed(is_master):
+    """
+    This function disables printing when not in master process
+    """
+    import builtins as __builtin__
+    builtin_print = __builtin__.print
+    def print(*args, **kwargs):
+        force = kwargs.pop("force", False)
+        if is_master or force:
+            builtin_print(*args, **kwargs)
+    __builtin__.print = print
+def is_dist_avail_and_initialized():
+    if not dist.is_available():
+        return False
+    if not dist.is_initialized():
+        return False
+    return True
+def get_world_size():
+    if not is_dist_avail_and_initialized():
+        return 1
+    return dist.get_world_size()
+def get_rank():
+    if not is_dist_avail_and_initialized():
+        return 0
+    return dist.get_rank()
+def is_main_process():
+    return get_rank() == 0
+def init_distributed_mode(args):
+    if "RANK" in os.environ and "WORLD_SIZE" in os.environ:
+        args.rank = int(os.environ["RANK"])
+        args.world_size = int(os.environ["WORLD_SIZE"])
+        args.gpu = int(os.environ["LOCAL_RANK"])
+    elif "SLURM_PROCID" in os.environ:
+        args.rank = int(os.environ["SLURM_PROCID"])
+        args.gpu = args.rank % torch.cuda.device_count()
+    else:
+        print("Not using distributed mode")
+        args.distributed = False
+        return
+    args.distributed = True
+    torch.cuda.set_device(args.gpu)
+    args.dist_backend = "nccl"
+    print(
+        "| distributed init (rank {}, world {}): {}".format(
+            args.rank, args.world_size, args.dist_url
+        ),
+        flush=True,
+    )
+    torch.distributed.init_process_group(
+        backend=args.dist_backend,
+        init_method=args.dist_url,
+        world_size=args.world_size,
+        rank=args.rank,
+        timeout=datetime.timedelta(
+            days=365
+        ),  # allow auto-downloading and de-compressing
+    )
+    torch.distributed.barrier()
+    setup_for_distributed(args.rank == 0)
+def get_dist_info():
+    if torch.__version__ < "1.0":
+        initialized = dist._initialized
+    else:
+        initialized = dist.is_initialized()
+    if initialized:
+        rank = dist.get_rank()
+        world_size = dist.get_world_size()
+    else:  # non-distributed training
+        rank = 0
+        world_size = 1
+    return rank, world_size
+def main_process(func):
+    @functools.wraps(func)
+    def wrapper(*args, **kwargs):
+        rank, _ = get_dist_info()
+        if rank == 0:
+            return func(*args, **kwargs)
+    return wrapper
+def download_cached_file(url, check_hash=True, progress=False):
+    """
+    Download a file from a URL and cache it locally. If the file already exists, it is not downloaded again.
+    If distributed, only the main process downloads the file, and the other processes wait for the file to be downloaded.
+    """
+    def get_cached_file_path():
+        # a hack to sync the file path across processes
+        parts = torch.hub.urlparse(url)
+        filename = os.path.basename(parts.path)
+        cached_file = os.path.join(timm_hub.get_cache_dir(), filename)
+        return cached_file
+    if is_main_process():
+        timm_hub.download_cached_file(url, check_hash, progress)
+    if is_dist_avail_and_initialized():
+        dist.barrier()
+    return get_cached_file_path()

medomni/common/gradcam.py ADDED Viewed

	@@ -0,0 +1,24 @@

+import numpy as np
+from matplotlib import pyplot as plt
+from scipy.ndimage import filters
+from skimage import transform as skimage_transform
+def getAttMap(img, attMap, blur=True, overlap=True):
+    attMap -= attMap.min()
+    if attMap.max() > 0:
+        attMap /= attMap.max()
+    attMap = skimage_transform.resize(attMap, (img.shape[:2]), order=3, mode="constant")
+    if blur:
+        attMap = filters.gaussian_filter(attMap, 0.02 * max(img.shape[:2]))
+        attMap -= attMap.min()
+        attMap /= attMap.max()
+    cmap = plt.get_cmap("jet")
+    attMapV = cmap(attMap)
+    attMapV = np.delete(attMapV, 3, 2)
+    if overlap:
+        attMap = (
+            1 * (1 - attMap**0.7).reshape(attMap.shape + (1,)) * img
+            + (attMap**0.7).reshape(attMap.shape + (1,)) * attMapV
+        )
+    return attMap

medomni/common/logger.py ADDED Viewed

	@@ -0,0 +1,200 @@

+"""
+ Copyright (c) 2022, salesforce.com, inc.
+ All rights reserved.
+ SPDX-License-Identifier: BSD-3-Clause
+ For full license text, see the LICENSE_Lavis file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+"""
+import datetime
+import logging
+import time
+from collections import defaultdict, deque
+import torch
+import torch.distributed as dist
+from medomni.common import dist_utils
+class SmoothedValue(object):
+    """Track a series of values and provide access to smoothed values over a
+    window or the global series average.
+    """
+    def __init__(self, window_size=20, fmt=None):
+        if fmt is None:
+            fmt = "{median:.4f} ({global_avgdata_time:.4f})"
+        self.deque = deque(maxlen=window_size)
+        self.total = 0.0
+        self.count = 0
+        self.fmt = fmt
+    def update(self, value, n=1):
+        self.deque.append(value)
+        self.count += n
+        self.total += value * n
+    def synchronize_between_processes(self):
+        """
+        Warning: does not synchronize the deque!
+        """
+        if not dist_utils.is_dist_avail_and_initialized():
+            return
+        t = torch.tensor([self.count, self.total], dtype=torch.float64, device="cuda")
+        dist.barrier()
+        dist.all_reduce(t)
+        t = t.tolist()
+        self.count = int(t[0])
+        self.total = t[1]
+    @property
+    def median(self):
+        d = torch.tensor(list(self.deque))
+        return d.median().item()
+    @property
+    def avg(self):
+        d = torch.tensor(list(self.deque), dtype=torch.float32)
+        return d.mean().item()
+    @property
+    def global_avg(self):
+        return self.total / self.count
+    @property
+    def max(self):
+        return max(self.deque)
+    @property
+    def value(self):
+        return self.deque[-1]
+    def __str__(self):
+        return self.fmt.format(
+            median=self.median,
+            avg=self.avg,
+            global_avg=self.global_avg,
+            max=self.max,
+            value=self.value,
+        )
+class MetricLogger(object):
+    def __init__(self, delimiter="\t"):
+        self.meters = defaultdict(SmoothedValue)
+        self.delimiter = delimiter
+    def update(self, **kwargs):
+        for k, v in kwargs.items():
+            if isinstance(v, torch.Tensor):
+                v = v.item()
+            # assert isinstance(v, (float, int))
+            if isinstance(v, (float, int)):
+                self.meters[k].update(v)
+            else:
+                self.meters[k] = v
+    def __getattr__(self, attr):
+        if attr in self.meters:
+            return self.meters[attr]
+        if attr in self.__dict__:
+            return self.__dict__[attr]
+        raise AttributeError(
+            "'{}' object has no attribute '{}'".format(type(self).__name__, attr)
+        )
+    def __str__(self):
+        loss_str = []
+        for name, meter in self.meters.items():
+            loss_str.append("{}: {}".format(name, str(meter)))
+        return self.delimiter.join(loss_str)
+    def global_avg(self):
+        loss_str = []
+        for name, meter in self.meters.items():
+            if not isinstance(meter, str):
+                loss_str.append("{}: {:.4f}".format(name, meter.global_avg))
+        return self.delimiter.join(loss_str)
+    def synchronize_between_processes(self):
+        for meter in self.meters.values():
+            if not isinstance(meter, str):
+                meter.synchronize_between_processes()
+    def add_meter(self, name, meter):
+        self.meters[name] = meter
+    def log_every(self, iterable, print_freq, header=None):
+        i = 0
+        if not header:
+            header = ""
+        start_time = time.time()
+        end = time.time()
+        iter_time = SmoothedValue(fmt="{avg:.4f}")
+        data_time = SmoothedValue(fmt="{avg:.4f}")
+        space_fmt = ":" + str(len(str(len(iterable)))) + "d"
+        log_msg = [
+            header,
+            "[{0" + space_fmt + "}/{1}]",
+            "eta: {eta}",
+            "{meters}",
+            "time: {time}",
+            "data: {data}",
+        ]
+        if torch.cuda.is_available():
+            log_msg.append("max mem: {memory:.0f}")
+        log_msg = self.delimiter.join(log_msg)
+        MB = 1024.0 * 1024.0
+        for obj in iterable:
+            data_time.update(time.time() - end)
+            yield obj
+            iter_time.update(time.time() - end)
+            if i % print_freq == 0 or i == len(iterable) - 1:
+                eta_seconds = iter_time.global_avg * (len(iterable) - i)
+                eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
+                if torch.cuda.is_available():
+                    print(
+                        log_msg.format(
+                            i,
+                            len(iterable),
+                            eta=eta_string,
+                            meters=str(self),
+                            time=str(iter_time),
+                            data=str(data_time),
+                            memory=torch.cuda.max_memory_allocated() / MB,
+                        )
+                    )
+                else:
+                    print(
+                        log_msg.format(
+                            i,
+                            len(iterable),
+                            eta=eta_string,
+                            meters=str(self),
+                            time=str(iter_time),
+                            data=str(data_time),
+                        )
+                    )
+            i += 1
+            end = time.time()
+        total_time = time.time() - start_time
+        total_time_str = str(datetime.timedelta(seconds=int(total_time)))
+        print(
+            "{} Total time: {} ({:.4f} s / it)".format(
+                header, total_time_str, total_time / len(iterable)
+            )
+        )
+class AttrDict(dict):
+    def __init__(self, *args, **kwargs):
+        super(AttrDict, self).__init__(*args, **kwargs)
+        self.__dict__ = self
+def setup_logger():
+    logging.basicConfig(
+        level=logging.INFO if dist_utils.is_main_process() else logging.WARN,
+        format="%(asctime)s [%(levelname)s] %(message)s",
+        handlers=[logging.StreamHandler()],
+    )

medomni/common/optims.py ADDED Viewed

	@@ -0,0 +1,119 @@

+"""
+ Copyright (c) 2022, salesforce.com, inc.
+ All rights reserved.
+ SPDX-License-Identifier: BSD-3-Clause
+ For full license text, see the LICENSE_Lavis file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+"""
+import math
+from medomni.common.registry import registry
+@registry.register_lr_scheduler("linear_warmup_step_lr")
+class LinearWarmupStepLRScheduler:
+    def __init__(
+        self,
+        optimizer,
+        max_epoch,
+        min_lr,
+        init_lr,
+        decay_rate=1,
+        warmup_start_lr=-1,
+        warmup_steps=0,
+        **kwargs
+    ):
+        self.optimizer = optimizer
+        self.max_epoch = max_epoch
+        self.min_lr = min_lr
+        self.decay_rate = decay_rate
+        self.init_lr = init_lr
+        self.warmup_steps = warmup_steps
+        self.warmup_start_lr = warmup_start_lr if warmup_start_lr >= 0 else init_lr
+    def step(self, cur_epoch, cur_step):
+        if cur_epoch == 0:
+            warmup_lr_schedule(
+                step=cur_step,
+                optimizer=self.optimizer,
+                max_step=self.warmup_steps,
+                init_lr=self.warmup_start_lr,
+                max_lr=self.init_lr,
+            )
+        else:
+            step_lr_schedule(
+                epoch=cur_epoch,
+                optimizer=self.optimizer,
+                init_lr=self.init_lr,
+                min_lr=self.min_lr,
+                decay_rate=self.decay_rate,
+            )
+@registry.register_lr_scheduler("linear_warmup_cosine_lr")
+class LinearWarmupCosineLRScheduler:
+    def __init__(
+        self,
+        optimizer,
+        max_epoch,
+        iters_per_epoch,
+        min_lr,
+        init_lr,
+        warmup_steps=0,
+        warmup_start_lr=-1,
+        **kwargs
+    ):
+        self.optimizer = optimizer
+        self.max_epoch = max_epoch
+        self.iters_per_epoch = iters_per_epoch
+        self.min_lr = min_lr
+        self.init_lr = init_lr
+        self.warmup_steps = warmup_steps
+        self.warmup_start_lr = warmup_start_lr if warmup_start_lr >= 0 else init_lr
+    def step(self, cur_epoch, cur_step):
+        total_cur_step = cur_epoch * self.iters_per_epoch + cur_step
+        if total_cur_step < self.warmup_steps:
+            warmup_lr_schedule(
+                step=cur_step,
+                optimizer=self.optimizer,
+                max_step=self.warmup_steps,
+                init_lr=self.warmup_start_lr,
+                max_lr=self.init_lr,
+            )
+        else:
+            cosine_lr_schedule(
+                epoch=total_cur_step,
+                optimizer=self.optimizer,
+                max_epoch=self.max_epoch * self.iters_per_epoch,
+                init_lr=self.init_lr,
+                min_lr=self.min_lr,
+            )
+def cosine_lr_schedule(optimizer, epoch, max_epoch, init_lr, min_lr):
+    """Decay the learning rate"""
+    lr = (init_lr - min_lr) * 0.5 * (
+        1.0 + math.cos(math.pi * epoch / max_epoch)
+    ) + min_lr
+    for param_group in optimizer.param_groups:
+        param_group["lr"] = lr
+def warmup_lr_schedule(optimizer, step, max_step, init_lr, max_lr):
+    """Warmup the learning rate"""
+    lr = min(max_lr, init_lr + (max_lr - init_lr) * step / max(max_step, 1))
+    for param_group in optimizer.param_groups:
+        param_group["lr"] = lr
+def step_lr_schedule(optimizer, epoch, init_lr, min_lr, decay_rate):
+    """Decay the learning rate"""
+    lr = max(min_lr, init_lr * (decay_rate**epoch))
+    for param_group in optimizer.param_groups:
+        param_group["lr"] = lr

medomni/common/registry.py ADDED Viewed

	@@ -0,0 +1,327 @@

+"""
+ Copyright (c) 2022, salesforce.com, inc.
+ All rights reserved.
+ SPDX-License-Identifier: BSD-3-Clause
+ For full license text, see the LICENSE_Lavis file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+"""
+class Registry:
+    mapping = {
+        "builder_name_mapping": {},
+        "task_name_mapping": {},
+        "processor_name_mapping": {},
+        "model_name_mapping": {},
+        "lr_scheduler_name_mapping": {},
+        "runner_name_mapping": {},
+        "state": {},
+        "paths": {},
+    }
+    @classmethod
+    def register_builder(cls, name):
+        r"""Register a dataset builder to registry with key 'name'
+        Args:
+            name: Key with which the builder will be registered.
+        Usage:
+            from medomni.common.registry import registry
+            from medomni.datasets.base_dataset_builder import BaseDatasetBuilder
+        """
+        def wrap(builder_cls):
+            from medomni.datasets.builders.base_dataset_builder import BaseDatasetBuilder
+            assert issubclass(
+                builder_cls, BaseDatasetBuilder
+            ), "All builders must inherit BaseDatasetBuilder class, found {}".format(
+                builder_cls
+            )
+            if name in cls.mapping["builder_name_mapping"]:
+                raise KeyError(
+                    "Name '{}' already registered for {}.".format(
+                        name, cls.mapping["builder_name_mapping"][name]
+                    )
+                )
+            cls.mapping["builder_name_mapping"][name] = builder_cls
+            return builder_cls
+        return wrap
+    @classmethod
+    def register_task(cls, name):
+        r"""Register a task to registry with key 'name'
+        Args:
+            name: Key with which the task will be registered.
+        Usage:
+            from medomni.common.registry import registry
+        """
+        def wrap(task_cls):
+            from medomni.tasks.base_task import BaseTask
+            assert issubclass(
+                task_cls, BaseTask
+            ), "All tasks must inherit BaseTask class"
+            if name in cls.mapping["task_name_mapping"]:
+                raise KeyError(
+                    "Name '{}' already registered for {}.".format(
+                        name, cls.mapping["task_name_mapping"][name]
+                    )
+                )
+            cls.mapping["task_name_mapping"][name] = task_cls
+            return task_cls
+        return wrap
+    @classmethod
+    def register_model(cls, name):
+        r"""Register a task to registry with key 'name'
+        Args:
+            name: Key with which the task will be registered.
+        Usage:
+            from medomni.common.registry import registry
+        """
+        def wrap(model_cls):
+            from medomni.models import BaseModel
+            assert issubclass(
+                model_cls, BaseModel
+            ), "All models must inherit BaseModel class"
+            if name in cls.mapping["model_name_mapping"]:
+                raise KeyError(
+                    "Name '{}' already registered for {}.".format(
+                        name, cls.mapping["model_name_mapping"][name]
+                    )
+                )
+            cls.mapping["model_name_mapping"][name] = model_cls
+            return model_cls
+        return wrap
+    @classmethod
+    def register_processor(cls, name):
+        r"""Register a processor to registry with key 'name'
+        Args:
+            name: Key with which the task will be registered.
+        Usage:
+            from medomni.common.registry import registry
+        """
+        def wrap(processor_cls):
+            from medomni.processors import BaseProcessor
+            assert issubclass(
+                processor_cls, BaseProcessor
+            ), "All processors must inherit BaseProcessor class"
+            if name in cls.mapping["processor_name_mapping"]:
+                raise KeyError(
+                    "Name '{}' already registered for {}.".format(
+                        name, cls.mapping["processor_name_mapping"][name]
+                    )
+                )
+            cls.mapping["processor_name_mapping"][name] = processor_cls
+            return processor_cls
+        return wrap
+    @classmethod
+    def register_lr_scheduler(cls, name):
+        r"""Register a model to registry with key 'name'
+        Args:
+            name: Key with which the task will be registered.
+        Usage:
+            from medomni.common.registry import registry
+        """
+        def wrap(lr_sched_cls):
+            if name in cls.mapping["lr_scheduler_name_mapping"]:
+                raise KeyError(
+                    "Name '{}' already registered for {}.".format(
+                        name, cls.mapping["lr_scheduler_name_mapping"][name]
+                    )
+                )
+            cls.mapping["lr_scheduler_name_mapping"][name] = lr_sched_cls
+            return lr_sched_cls
+        return wrap
+    @classmethod
+    def register_runner(cls, name):
+        r"""Register a model to registry with key 'name'
+        Args:
+            name: Key with which the task will be registered.
+        Usage:
+            from medomni.common.registry import registry
+        """
+        def wrap(runner_cls):
+            if name in cls.mapping["runner_name_mapping"]:
+                raise KeyError(
+                    "Name '{}' already registered for {}.".format(
+                        name, cls.mapping["runner_name_mapping"][name]
+                    )
+                )
+            cls.mapping["runner_name_mapping"][name] = runner_cls
+            return runner_cls
+        return wrap
+    @classmethod
+    def register_path(cls, name, path):
+        r"""Register a path to registry with key 'name'
+        Args:
+            name: Key with which the path will be registered.
+        Usage:
+            from medomni.common.registry import registry
+        """
+        assert isinstance(path, str), "All path must be str."
+        if name in cls.mapping["paths"]:
+            raise KeyError("Name '{}' already registered.".format(name))
+        cls.mapping["paths"][name] = path
+    @classmethod
+    def register(cls, name, obj):
+        r"""Register an item to registry with key 'name'
+        Args:
+            name: Key with which the item will be registered.
+        Usage::
+            from medomni.common.registry import registry
+            registry.register("config", {})
+        """
+        path = name.split(".")
+        current = cls.mapping["state"]
+        for part in path[:-1]:
+            if part not in current:
+                current[part] = {}
+            current = current[part]
+        current[path[-1]] = obj
+    # @classmethod
+    # def get_trainer_class(cls, name):
+    #     return cls.mapping["trainer_name_mapping"].get(name, None)
+    @classmethod
+    def get_builder_class(cls, name):
+        return cls.mapping["builder_name_mapping"].get(name, None)
+    @classmethod
+    def get_model_class(cls, name):
+        return cls.mapping["model_name_mapping"].get(name, None)
+    @classmethod
+    def get_task_class(cls, name):
+        return cls.mapping["task_name_mapping"].get(name, None)
+    @classmethod
+    def get_processor_class(cls, name):
+        return cls.mapping["processor_name_mapping"].get(name, None)
+    @classmethod
+    def get_lr_scheduler_class(cls, name):
+        return cls.mapping["lr_scheduler_name_mapping"].get(name, None)
+    @classmethod
+    def get_runner_class(cls, name):
+        return cls.mapping["runner_name_mapping"].get(name, None)
+    @classmethod
+    def list_runners(cls):
+        return sorted(cls.mapping["runner_name_mapping"].keys())
+    @classmethod
+    def list_models(cls):
+        return sorted(cls.mapping["model_name_mapping"].keys())
+    @classmethod
+    def list_tasks(cls):
+        return sorted(cls.mapping["task_name_mapping"].keys())
+    @classmethod
+    def list_processors(cls):
+        return sorted(cls.mapping["processor_name_mapping"].keys())
+    @classmethod
+    def list_lr_schedulers(cls):
+        return sorted(cls.mapping["lr_scheduler_name_mapping"].keys())
+    @classmethod
+    def list_datasets(cls):
+        return sorted(cls.mapping["builder_name_mapping"].keys())
+    @classmethod
+    def get_path(cls, name):
+        return cls.mapping["paths"].get(name, None)
+    @classmethod
+    def get(cls, name, default=None, no_warning=False):
+        r"""Get an item from registry with key 'name'
+        Args:
+            name (string): Key whose value needs to be retrieved.
+            default: If passed and key is not in registry, default value will
+                     be returned with a warning. Default: None
+            no_warning (bool): If passed as True, warning when key doesn't exist
+                               will not be generated. Useful for MMF's
+                               internal operations. Default: False
+        """
+        original_name = name
+        name = name.split(".")
+        value = cls.mapping["state"]
+        for subname in name:
+            value = value.get(subname, default)
+            if value is default:
+                break
+        if (
+            "writer" in cls.mapping["state"]
+            and value == default
+            and no_warning is False
+        ):
+            cls.mapping["state"]["writer"].warning(
+                "Key {} is not present in registry, returning default value "
+                "of {}".format(original_name, default)
+            )
+        return value
+    @classmethod
+    def unregister(cls, name):
+        r"""Remove an item from registry with key 'name'
+        Args:
+            name: Key which needs to be removed.
+        Usage::
+            from mmf.common.registry import registry
+            config = registry.unregister("config")
+        """
+        return cls.mapping["state"].pop(name, None)
+registry = Registry()

medomni/common/utils.py ADDED Viewed

	@@ -0,0 +1,424 @@

+"""
+ Copyright (c) 2022, salesforce.com, inc.
+ All rights reserved.
+ SPDX-License-Identifier: BSD-3-Clause
+ For full license text, see the LICENSE_Lavis file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+"""
+import io
+import json
+import logging
+import os
+import pickle
+import re
+import shutil
+import urllib
+import urllib.error
+import urllib.request
+from typing import Optional
+from urllib.parse import urlparse
+import numpy as np
+import pandas as pd
+import yaml
+from iopath.common.download import download
+from iopath.common.file_io import file_lock, g_pathmgr
+from medomni.common.registry import registry
+from torch.utils.model_zoo import tqdm
+from torchvision.datasets.utils import (
+    check_integrity,
+    download_file_from_google_drive,
+    extract_archive,
+)
+def now():
+    from datetime import datetime
+    return datetime.now().strftime("%Y%m%d%H%M")[:-1]
+def is_url(url_or_filename):
+    parsed = urlparse(url_or_filename)
+    return parsed.scheme in ("http", "https")
+def get_cache_path(rel_path):
+    return os.path.expanduser(os.path.join(registry.get_path("cache_root"), rel_path))
+def get_abs_path(rel_path):
+    return os.path.join(registry.get_path("library_root"), rel_path)
+def load_json(filename):
+    with open(filename, "r") as f:
+        return json.load(f)
+# The following are adapted from torchvision and vissl
+# torchvision: https://github.com/pytorch/vision
+# vissl: https://github.com/facebookresearch/vissl/blob/main/vissl/utils/download.py
+def makedir(dir_path):
+    """
+    Create the directory if it does not exist.
+    """
+    is_success = False
+    try:
+        if not g_pathmgr.exists(dir_path):
+            g_pathmgr.mkdirs(dir_path)
+        is_success = True
+    except BaseException:
+        print(f"Error creating directory: {dir_path}")
+    return is_success
+def get_redirected_url(url: str):
+    """
+    Given a URL, returns the URL it redirects to or the
+    original URL in case of no indirection
+    """
+    import requests
+    with requests.Session() as session:
+        with session.get(url, stream=True, allow_redirects=True) as response:
+            if response.history:
+                return response.url
+            else:
+                return url
+def to_google_drive_download_url(view_url: str) -> str:
+    """
+    Utility function to transform a view URL of google drive
+    to a download URL for google drive
+    Example input:
+        https://drive.google.com/file/d/137RyRjvTBkBiIfeYBNZBtViDHQ6_Ewsp/view
+    Example output:
+        https://drive.google.com/uc?export=download&id=137RyRjvTBkBiIfeYBNZBtViDHQ6_Ewsp
+    """
+    splits = view_url.split("/")
+    assert splits[-1] == "view"
+    file_id = splits[-2]
+    return f"https://drive.google.com/uc?export=download&id={file_id}"
+def download_google_drive_url(url: str, output_path: str, output_file_name: str):
+    """
+    Download a file from google drive
+    Downloading an URL from google drive requires confirmation when
+    the file of the size is too big (google drive notifies that
+    anti-viral checks cannot be performed on such files)
+    """
+    import requests
+    with requests.Session() as session:
+        # First get the confirmation token and append it to the URL
+        with session.get(url, stream=True, allow_redirects=True) as response:
+            for k, v in response.cookies.items():
+                if k.startswith("download_warning"):
+                    url = url + "&confirm=" + v
+        # Then download the content of the file
+        with session.get(url, stream=True, verify=True) as response:
+            makedir(output_path)
+            path = os.path.join(output_path, output_file_name)
+            total_size = int(response.headers.get("Content-length", 0))
+            with open(path, "wb") as file:
+                from tqdm import tqdm
+                with tqdm(total=total_size) as progress_bar:
+                    for block in response.iter_content(
+                        chunk_size=io.DEFAULT_BUFFER_SIZE
+                    ):
+                        file.write(block)
+                        progress_bar.update(len(block))
+def _get_google_drive_file_id(url: str) -> Optional[str]:
+    parts = urlparse(url)
+    if re.match(r"(drive|docs)[.]google[.]com", parts.netloc) is None:
+        return None
+    match = re.match(r"/file/d/(?P<id>[^/]*)", parts.path)
+    if match is None:
+        return None
+    return match.group("id")
+def _urlretrieve(url: str, filename: str, chunk_size: int = 1024) -> None:
+    with open(filename, "wb") as fh:
+        with urllib.request.urlopen(
+            urllib.request.Request(url, headers={"User-Agent": "vissl"})
+        ) as response:
+            with tqdm(total=response.length) as pbar:
+                for chunk in iter(lambda: response.read(chunk_size), ""):
+                    if not chunk:
+                        break
+                    pbar.update(chunk_size)
+                    fh.write(chunk)
+def download_url(
+    url: str,
+    root: str,
+    filename: Optional[str] = None,
+    md5: Optional[str] = None,
+) -> None:
+    """Download a file from a url and place it in root.
+    Args:
+        url (str): URL to download file from
+        root (str): Directory to place downloaded file in
+        filename (str, optional): Name to save the file under.
+                                  If None, use the basename of the URL.
+        md5 (str, optional): MD5 checksum of the download. If None, do not check
+    """
+    root = os.path.expanduser(root)
+    if not filename:
+        filename = os.path.basename(url)
+    fpath = os.path.join(root, filename)
+    makedir(root)
+    # check if file is already present locally
+    if check_integrity(fpath, md5):
+        print("Using downloaded and verified file: " + fpath)
+        return
+    # expand redirect chain if needed
+    url = get_redirected_url(url)
+    # check if file is located on Google Drive
+    file_id = _get_google_drive_file_id(url)
+    if file_id is not None:
+        return download_file_from_google_drive(file_id, root, filename, md5)
+    # download the file
+    try:
+        print("Downloading " + url + " to " + fpath)
+        _urlretrieve(url, fpath)
+    except (urllib.error.URLError, IOError) as e:  # type: ignore[attr-defined]
+        if url[:5] == "https":
+            url = url.replace("https:", "http:")
+            print(
+                "Failed download. Trying https -> http instead."
+                " Downloading " + url + " to " + fpath
+            )
+            _urlretrieve(url, fpath)
+        else:
+            raise e
+    # check integrity of downloaded file
+    if not check_integrity(fpath, md5):
+        raise RuntimeError("File not found or corrupted.")
+def download_and_extract_archive(
+    url: str,
+    download_root: str,
+    extract_root: Optional[str] = None,
+    filename: Optional[str] = None,
+    md5: Optional[str] = None,
+    remove_finished: bool = False,
+) -> None:
+    download_root = os.path.expanduser(download_root)
+    if extract_root is None:
+        extract_root = download_root
+    if not filename:
+        filename = os.path.basename(url)
+    download_url(url, download_root, filename, md5)
+    archive = os.path.join(download_root, filename)
+    print("Extracting {} to {}".format(archive, extract_root))
+    extract_archive(archive, extract_root, remove_finished)
+def cache_url(url: str, cache_dir: str) -> str:
+    """
+    This implementation downloads the remote resource and caches it locally.
+    The resource will only be downloaded if not previously requested.
+    """
+    parsed_url = urlparse(url)
+    dirname = os.path.join(cache_dir, os.path.dirname(parsed_url.path.lstrip("/")))
+    makedir(dirname)
+    filename = url.split("/")[-1]
+    cached = os.path.join(dirname, filename)
+    with file_lock(cached):
+        if not os.path.isfile(cached):
+            logging.info(f"Downloading {url} to {cached} ...")
+            cached = download(url, dirname, filename=filename)
+    logging.info(f"URL {url} cached in {cached}")
+    return cached
+# TODO (prigoyal): convert this into RAII-style API
+def create_file_symlink(file1, file2):
+    """
+    Simply create the symlinks for a given file1 to file2.
+    Useful during model checkpointing to symlinks to the
+    latest successful checkpoint.
+    """
+    try:
+        if g_pathmgr.exists(file2):
+            g_pathmgr.rm(file2)
+        g_pathmgr.symlink(file1, file2)
+    except Exception as e:
+        logging.info(f"Could NOT create symlink. Error: {e}")
+def save_file(data, filename, append_to_json=True, verbose=True):
+    """
+    Common i/o utility to handle saving data to various file formats.
+    Supported:
+        .pkl, .pickle, .npy, .json
+    Specifically for .json, users have the option to either append (default)
+    or rewrite by passing in Boolean value to append_to_json.
+    """
+    if verbose:
+        logging.info(f"Saving data to file: {filename}")
+    file_ext = os.path.splitext(filename)[1]
+    if file_ext in [".pkl", ".pickle"]:
+        with g_pathmgr.open(filename, "wb") as fopen:
+            pickle.dump(data, fopen, pickle.HIGHEST_PROTOCOL)
+    elif file_ext == ".npy":
+        with g_pathmgr.open(filename, "wb") as fopen:
+            np.save(fopen, data)
+    elif file_ext == ".json":
+        if append_to_json:
+            with g_pathmgr.open(filename, "a") as fopen:
+                fopen.write(json.dumps(data, sort_keys=True) + "\n")
+                fopen.flush()
+        else:
+            with g_pathmgr.open(filename, "w") as fopen:
+                fopen.write(json.dumps(data, sort_keys=True) + "\n")
+                fopen.flush()
+    elif file_ext == ".yaml":
+        with g_pathmgr.open(filename, "w") as fopen:
+            dump = yaml.dump(data)
+            fopen.write(dump)
+            fopen.flush()
+    else:
+        raise Exception(f"Saving {file_ext} is not supported yet")
+    if verbose:
+        logging.info(f"Saved data to file: {filename}")
+def load_file(filename, mmap_mode=None, verbose=True, allow_pickle=False):
+    """
+    Common i/o utility to handle loading data from various file formats.
+    Supported:
+        .pkl, .pickle, .npy, .json
+    For the npy files, we support reading the files in mmap_mode.
+    If the mmap_mode of reading is not successful, we load data without the
+    mmap_mode.
+    """
+    if verbose:
+        logging.info(f"Loading data from file: {filename}")
+    file_ext = os.path.splitext(filename)[1]
+    if file_ext == ".txt":
+        with g_pathmgr.open(filename, "r") as fopen:
+            data = fopen.readlines()
+    elif file_ext in [".pkl", ".pickle"]:
+        with g_pathmgr.open(filename, "rb") as fopen:
+            data = pickle.load(fopen, encoding="latin1")
+    elif file_ext == ".npy":
+        if mmap_mode:
+            try:
+                with g_pathmgr.open(filename, "rb") as fopen:
+                    data = np.load(
+                        fopen,
+                        allow_pickle=allow_pickle,
+                        encoding="latin1",
+                        mmap_mode=mmap_mode,
+                    )
+            except ValueError as e:
+                logging.info(
+                    f"Could not mmap {filename}: {e}. Trying without g_pathmgr"
+                )
+                data = np.load(
+                    filename,
+                    allow_pickle=allow_pickle,
+                    encoding="latin1",
+                    mmap_mode=mmap_mode,
+                )
+                logging.info("Successfully loaded without g_pathmgr")
+            except Exception:
+                logging.info("Could not mmap without g_pathmgr. Trying without mmap")
+                with g_pathmgr.open(filename, "rb") as fopen:
+                    data = np.load(fopen, allow_pickle=allow_pickle, encoding="latin1")
+        else:
+            with g_pathmgr.open(filename, "rb") as fopen:
+                data = np.load(fopen, allow_pickle=allow_pickle, encoding="latin1")
+    elif file_ext == ".json":
+        with g_pathmgr.open(filename, "r") as fopen:
+            data = json.load(fopen)
+    elif file_ext == ".yaml":
+        with g_pathmgr.open(filename, "r") as fopen:
+            data = yaml.load(fopen, Loader=yaml.FullLoader)
+    elif file_ext == ".csv":
+        with g_pathmgr.open(filename, "r") as fopen:
+            data = pd.read_csv(fopen)
+    else:
+        raise Exception(f"Reading from {file_ext} is not supported yet")
+    return data
+def abspath(resource_path: str):
+    """
+    Make a path absolute, but take into account prefixes like
+    "http://" or "manifold://"
+    """
+    regex = re.compile(r"^\w+://")
+    if regex.match(resource_path) is None:
+        return os.path.abspath(resource_path)
+    else:
+        return resource_path
+def makedir(dir_path):
+    """
+    Create the directory if it does not exist.
+    """
+    is_success = False
+    try:
+        if not g_pathmgr.exists(dir_path):
+            g_pathmgr.mkdirs(dir_path)
+        is_success = True
+    except BaseException:
+        logging.info(f"Error creating directory: {dir_path}")
+    return is_success
+def is_url(input_url):
+    """
+    Check if an input string is a url. look for http(s):// and ignoring the case
+    """
+    is_url = re.match(r"^(?:http)s?://", input_url, re.IGNORECASE) is not None
+    return is_url
+def cleanup_dir(dir):
+    """
+    Utility for deleting a directory. Useful for cleaning the storage space
+    that contains various training artifacts like checkpoints, data etc.
+    """
+    if os.path.exists(dir):
+        logging.info(f"Deleting directory: {dir}")
+        shutil.rmtree(dir)
+    logging.info(f"Deleted contents of directory: {dir}")
+def get_file_size(filename):
+    """
+    Given a file, get the size of file in MB
+    """
+    size_in_mb = os.path.getsize(filename) / float(1024**2)
+    return size_in_mb

medomni/configs/datasets/medinterp/align.yaml ADDED Viewed

	@@ -0,0 +1,5 @@

+datasets:
+  med:
+    data_type: images
+    build_info:
+      storage: json_files/medinterp

medomni/configs/default.yaml ADDED Viewed

	@@ -0,0 +1,5 @@

+env:
+  # For default users
+  # cache_root: "cache"
+  # For internal use with persistent storage
+  cache_root: "/export/home/.cache/medomni"

medomni/configs/models/medomni.yaml ADDED Viewed

	@@ -0,0 +1,12 @@

+model:
+  arch: medomni
+  # vision encoder
+  precision: "fp16"
+  freeze_vit: True
+  # Llama
+  llama_model: "meta-llama/Llama-2-7b-chat-hf"
+  # generation configs
+  prompt: ""

medomni/conversation/__init__.py ADDED Viewed

File without changes

medomni/conversation/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (145 Bytes). View file

medomni/conversation/__pycache__/conversation.cpython-39.pyc ADDED Viewed

Binary file (7.3 kB). View file

medomni/conversation/conversation.py ADDED Viewed

	@@ -0,0 +1,222 @@

+import argparse
+import time
+from PIL import Image
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM, LlamaTokenizer
+from transformers import StoppingCriteria, StoppingCriteriaList
+import dataclasses
+from enum import auto, Enum
+from typing import List, Tuple, Any
+from medomni.common.registry import registry
+import ipdb
+class SeparatorStyle(Enum):
+    """Different separator style."""
+    SINGLE = auto()
+    TWO = auto()
+@dataclasses.dataclass
+class Conversation:
+    """A class that keeps all conversation history."""
+    system: str
+    roles: List[str]
+    messages: List[List[str]]
+    offset: int
+    # system_img: List[Image.Image] = []
+    sep_style: SeparatorStyle = SeparatorStyle.SINGLE
+    sep: str = "###"
+    sep2: str = None
+    skip_next: bool = False
+    conv_id: Any = None
+    def get_prompt(self):
+        if self.sep_style == SeparatorStyle.SINGLE:
+            ret = self.system + self.sep
+            for role, message in self.messages:
+                if message:
+                    ret += role + ": " + message + self.sep
+                else:
+                    ret += role + ":"
+            return ret
+        elif self.sep_style == SeparatorStyle.TWO:
+            seps = [self.sep, self.sep2]
+            ret = self.system + seps[0]
+            for i, (role, message) in enumerate(self.messages):
+                if message:
+                    ret += role + ": " + message + seps[i % 2]
+                else:
+                    ret += role + ":"
+            return ret
+        else:
+            raise ValueError(f"Invalid style: {self.sep_style}")
+    def append_message(self, role, message):
+        self.messages.append([role, message])
+    def to_gradio_chatbot(self):
+        ret = []
+        for i, (role, msg) in enumerate(self.messages[self.offset:]):
+            if i % 2 == 0:
+                ret.append([msg, None])
+            else:
+                ret[-1][-1] = msg
+        return ret
+    def copy(self):
+        return Conversation(
+            system=self.system,
+            roles=self.roles,
+            messages=[[x, y] for x, y in self.messages],
+            offset=self.offset,
+            sep_style=self.sep_style,
+            sep=self.sep,
+            sep2=self.sep2,
+            conv_id=self.conv_id)
+    def dict(self):
+        return {
+            "system": self.system,
+            # "system_img": self.system_img,
+            "roles": self.roles,
+            "messages": self.messages,
+            "offset": self.offset,
+            "sep": self.sep,
+            "sep2": self.sep2,
+            "conv_id": self.conv_id,
+        }
+class StoppingCriteriaSub(StoppingCriteria):
+    def __init__(self, stops=[], encounters=1):
+        super().__init__()
+        self.stops = stops
+    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor):
+        for stop in self.stops:
+            if torch.all((stop == input_ids[0][-len(stop):])).item():
+                return True
+        return False
+CONV_VISION = Conversation(
+    system="Give the following image: <Img>ImageContent</Img>. "
+           "You will be able to see the image once I provide it to you. Act as a clinician and answer my questions.",
+    #       "You will be able to see the image once I provide it to you. Please answer my questions.",
+    #system="",
+    roles=("Human", "Assistant"),
+    messages=[],
+    offset=2,
+    sep_style=SeparatorStyle.SINGLE,
+    sep="###",
+)
+class Chat:
+    def __init__(self, model, vis_processor, device='cuda:0'):
+        self.device = device
+        self.model = model
+        self.vis_processor = vis_processor
+        stop_words_ids = [torch.tensor([835]).to(self.device),
+                          torch.tensor([2277, 29937]).to(self.device)]  # '###' can be encoded in two different ways.
+        self.stopping_criteria = StoppingCriteriaList([StoppingCriteriaSub(stops=stop_words_ids)])
+    def ask(self, text, conv):
+        if len(conv.messages) > 0 and conv.messages[-1][0] == conv.roles[0] \
+                and conv.messages[-1][1][-6:] == '</Img>':  # last message is image.
+            conv.messages[-1][1] = ' '.join([conv.messages[-1][1], text])
+        else:
+            conv.append_message(conv.roles[0], text) # commented by hy on 5.9
+    def answer(self, conv, img_list, max_new_tokens=300, num_beams=1, min_length=1, top_p=0.9,
+               repetition_penalty=1.0, length_penalty=1, temperature=1.0, max_length=2000):
+        conv.append_message(conv.roles[1], None)
+        embs = self.get_context_emb(conv, img_list)
+        current_max_len = embs.shape[1] + max_new_tokens
+        if current_max_len - max_length > 0:
+            print('Warning: The number of tokens in current conversation exceeds the max length. '
+                  'The model will not see the contexts outside the range.')
+        begin_idx = max(0, current_max_len - max_length)
+        embs = embs[:, begin_idx:]
+        with torch.autocast("cuda"):
+            outputs = self.model.llama_model.generate(
+                inputs_embeds=embs,
+                max_new_tokens=max_new_tokens,
+                stopping_criteria=self.stopping_criteria,
+                num_beams=num_beams,
+                do_sample=True,
+                min_length=min_length,
+                top_p=top_p,
+                repetition_penalty=repetition_penalty,
+                length_penalty=length_penalty,
+                temperature=temperature,
+            )
+        output_token = outputs[0]
+        if output_token[0] == 0:  # the model might output a unknow token <unk> at the beginning. remove it
+            output_token = output_token[1:]
+        if output_token[0] == 1:  # some users find that there is a start token <s> at the beginning. remove it
+            output_token = output_token[1:]
+        output_text = self.model.llama_tokenizer.decode(output_token, add_special_tokens=False)
+        output_text = output_text.split('###')[0]  # remove the stop sign '###'
+        output_text = output_text.split('Assistant:')[-1].strip()
+        conv.messages[-1][1] = output_text # commented by hy on 5.9
+        #---5.9.2023---
+        conv.messages = []
+        conv.append_message(conv.roles[0], "<Img><ImageHere></Img>")
+        return output_text, output_token.cpu().numpy()
+    def upload_img(self, image, conv, img_list):
+        if isinstance(image, str):  # is a image path
+            raw_image = Image.open(image).convert('RGB')
+            image = self.vis_processor(raw_image).unsqueeze(0).to(self.device)
+        elif isinstance(image, Image.Image):
+            raw_image = image
+            image = self.vis_processor(raw_image).unsqueeze(0).to(self.device)
+        elif isinstance(image, torch.Tensor):
+            if len(image.shape) == 3:
+                image = image.unsqueeze(0)
+            image = image.to(self.device)
+        image_emb, _ = self.model.encode_img(image)
+        img_list.append(image_emb)
+        conv.append_message(conv.roles[0], "<Img><ImageHere></Img>")
+        msg = "Received."
+        return msg
+    def get_context_emb(self, conv, img_list):
+        prompt = conv.get_prompt()
+        prompt_segs = prompt.split('<ImageHere>')
+        assert len(prompt_segs) == len(img_list) + 1, "Unmatched numbers of image placeholders and images."
+        #seg_tokens = []
+        #for i, seg in enumerate(prompt_segs):
+        #    if i == 1:
+        #        prompt_ids = self.model.llama_tokenizer(
+        #        seg,
+        #        return_tensors="pt",
+        #        add_special_tokens=i == 0
+        #        ).to(self.device).input_ids
+        #        seg_tokens.append(prompt_ids)
+        #    else:
+        #        prompt_ids = self.model.llama_tokenizer(seg, return_tensors="pt", add_special_tokens=i == 0).to(self.device).input_ids
+        #        seg_tokens.append(prompt_ids)
+        seg_tokens = [
+            self.model.llama_tokenizer(
+                seg, return_tensors="pt", add_special_tokens=i == 0).to(self.device).input_ids
+            # only add bos to the first seg
+            for i, seg in enumerate(prompt_segs)
+        ]
+        seg_embs = [self.model.llama_model.model.embed_tokens(seg_t) for seg_t in seg_tokens]
+        # seg_embs = [self.model.llama_model.model.base_model.embed_tokens(seg_t) for seg_t in seg_tokens] # LoRA
+        mixed_embs = [emb for pair in zip(seg_embs[:-1], img_list) for emb in pair] + [seg_embs[-1]]
+        mixed_embs = torch.cat(mixed_embs, dim=1)
+        return mixed_embs

medomni/datasets/__init__.py ADDED Viewed

File without changes

medomni/datasets/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (149 Bytes). View file

medomni/datasets/__pycache__/data_utils.cpython-39.pyc ADDED Viewed

Binary file (5.95 kB). View file

medomni/datasets/builders/__init__.py ADDED Viewed

	@@ -0,0 +1,71 @@

+"""
+ Copyright (c) 2022, salesforce.com, inc.
+ All rights reserved.
+ SPDX-License-Identifier: BSD-3-Clause
+ For full license text, see the LICENSE_Lavis file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+"""
+from medomni.datasets.builders.base_dataset_builder import load_dataset_config
+from medomni.datasets.builders.image_text_pair_builder import (
+    CCSBUBuilder,
+    LaionBuilder,
+    CCSBUAlignBuilder
+)
+from medomni.common.registry import registry
+__all__ = [
+    "CCSBUBuilder",
+    "LaionBuilder",
+    "CCSBUAlignBuilder"
+]
+def load_dataset(name, cfg_path=None, vis_path=None, data_type=None):
+    """
+    Example
+    >>> dataset = load_dataset("coco_caption", cfg=None)
+    >>> splits = dataset.keys()
+    >>> print([len(dataset[split]) for split in splits])
+    """
+    if cfg_path is None:
+        cfg = None
+    else:
+        cfg = load_dataset_config(cfg_path)
+    try:
+        builder = registry.get_builder_class(name)(cfg)
+    except TypeError:
+        print(
+            f"Dataset {name} not found. Available datasets:\n"
+            + ", ".join([str(k) for k in dataset_zoo.get_names()])
+        )
+        exit(1)
+    if vis_path is not None:
+        if data_type is None:
+            # use default data type in the config
+            data_type = builder.config.data_type
+        assert (
+            data_type in builder.config.build_info
+        ), f"Invalid data_type {data_type} for {name}."
+        builder.config.build_info.get(data_type).storage = vis_path
+    dataset = builder.build_datasets()
+    return dataset
+class DatasetZoo:
+    def __init__(self) -> None:
+        self.dataset_zoo = {
+            k: list(v.DATASET_CONFIG_DICT.keys())
+            for k, v in sorted(registry.mapping["builder_name_mapping"].items())
+        }
+    def get_names(self):
+        return list(self.dataset_zoo.keys())
+dataset_zoo = DatasetZoo()

medomni/datasets/builders/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (2.35 kB). View file

medomni/datasets/builders/__pycache__/base_dataset_builder.cpython-39.pyc ADDED Viewed

Binary file (6.06 kB). View file

medomni/datasets/builders/__pycache__/image_text_pair_builder.cpython-39.pyc ADDED Viewed

Binary file (3.82 kB). View file

medomni/datasets/builders/base_dataset_builder.py ADDED Viewed

	@@ -0,0 +1,234 @@

+"""
+ This file is from
+ Copyright (c) 2022, salesforce.com, inc.
+ All rights reserved.
+ SPDX-License-Identifier: BSD-3-Clause
+ For full license text, see the LICENSE_Lavis file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+"""
+import logging
+import os
+import shutil
+import warnings
+from omegaconf import OmegaConf
+import torch.distributed as dist
+from torchvision.datasets.utils import download_url
+import medomni.common.utils as utils
+from medomni.common.dist_utils import is_dist_avail_and_initialized, is_main_process
+from medomni.common.registry import registry
+from medomni.processors.base_processor import BaseProcessor
+class BaseDatasetBuilder:
+    train_dataset_cls, eval_dataset_cls = None, None
+    def __init__(self, cfg=None):
+        super().__init__()
+        if cfg is None:
+            # help to create datasets from default config.
+            self.config = load_dataset_config(self.default_config_path())
+        elif isinstance(cfg, str):
+            self.config = load_dataset_config(cfg)
+        else:
+            # when called from task.build_dataset()
+            self.config = cfg
+        self.data_type = self.config.data_type
+        self.vis_processors = {"train": BaseProcessor(), "eval": BaseProcessor()}
+        self.text_processors = {"train": BaseProcessor(), "eval": BaseProcessor()}
+    def build_datasets(self):
+        # download, split, etc...
+        # only called on 1 GPU/TPU in distributed
+        if is_main_process():
+            self._download_data()
+        if is_dist_avail_and_initialized():
+            dist.barrier()
+        # at this point, all the annotations and image/videos should be all downloaded to the specified locations.
+        logging.info("Building datasets...")
+        datasets = self.build()  # dataset['train'/'val'/'test']
+        return datasets
+    def build_processors(self):
+        vis_proc_cfg = self.config.get("vis_processor")
+        txt_proc_cfg = self.config.get("text_processor")
+        if vis_proc_cfg is not None:
+            vis_train_cfg = vis_proc_cfg.get("train")
+            vis_eval_cfg = vis_proc_cfg.get("eval")
+            self.vis_processors["train"] = self._build_proc_from_cfg(vis_train_cfg)
+            self.vis_processors["eval"] = self._build_proc_from_cfg(vis_eval_cfg)
+        if txt_proc_cfg is not None:
+            txt_train_cfg = txt_proc_cfg.get("train")
+            txt_eval_cfg = txt_proc_cfg.get("eval")
+            self.text_processors["train"] = self._build_proc_from_cfg(txt_train_cfg)
+            self.text_processors["eval"] = self._build_proc_from_cfg(txt_eval_cfg)
+    @staticmethod
+    def _build_proc_from_cfg(cfg):
+        return (
+            registry.get_processor_class(cfg.name).from_config(cfg)
+            if cfg is not None
+            else None
+        )
+    @classmethod
+    def default_config_path(cls, type="default"):
+        return utils.get_abs_path(cls.DATASET_CONFIG_DICT[type])
+    def _download_data(self):
+        self._download_ann()
+        self._download_vis()
+    def _download_ann(self):
+        """
+        Download annotation files if necessary.
+        All the vision-language datasets should have annotations of unified format.
+        storage_path can be:
+          (1) relative/absolute: will be prefixed with env.cache_root to make full path if relative.
+          (2) basename/dirname: will be suffixed with base name of URL if dirname is provided.
+        Local annotation paths should be relative.
+        """
+        anns = self.config.build_info.annotations
+        splits = anns.keys()
+        cache_root = registry.get_path("cache_root")
+        for split in splits:
+            info = anns[split]
+            urls, storage_paths = info.get("url", None), info.storage
+            if isinstance(urls, str):
+                urls = [urls]
+            if isinstance(storage_paths, str):
+                storage_paths = [storage_paths]
+            assert len(urls) == len(storage_paths)
+            for url_or_filename, storage_path in zip(urls, storage_paths):
+                # if storage_path is relative, make it full by prefixing with cache_root.
+                if not os.path.isabs(storage_path):
+                    storage_path = os.path.join(cache_root, storage_path)
+                dirname = os.path.dirname(storage_path)
+                if not os.path.exists(dirname):
+                    os.makedirs(dirname)
+                if os.path.isfile(url_or_filename):
+                    src, dst = url_or_filename, storage_path
+                    if not os.path.exists(dst):
+                        shutil.copyfile(src=src, dst=dst)
+                    else:
+                        logging.info("Using existing file {}.".format(dst))
+                else:
+                    if os.path.isdir(storage_path):
+                        # if only dirname is provided, suffix with basename of URL.
+                        raise ValueError(
+                            "Expecting storage_path to be a file path, got directory {}".format(
+                                storage_path
+                            )
+                        )
+                    else:
+                        filename = os.path.basename(storage_path)
+                    download_url(url=url_or_filename, root=dirname, filename=filename)
+    def _download_vis(self):
+        storage_path = self.config.build_info.get(self.data_type).storage
+        storage_path = utils.get_cache_path(storage_path)
+        if not os.path.exists(storage_path):
+            warnings.warn(
+                f"""
+                The specified path {storage_path} for visual inputs does not exist.
+                Please provide a correct path to the visual inputs or
+                refer to datasets/download_scripts/README.md for downloading instructions.
+                """
+            )
+    def build(self):
+        """
+        Create by split datasets inheriting torch.utils.data.Datasets.
+        # build() can be dataset-specific. Overwrite to customize.
+        """
+        self.build_processors()
+        build_info = self.config.build_info
+        ann_info = build_info.annotations
+        vis_info = build_info.get(self.data_type)
+        datasets = dict()
+        for split in ann_info.keys():
+            if split not in ["train", "val", "test"]:
+                continue
+            is_train = split == "train"
+            # processors
+            vis_processor = (
+                self.vis_processors["train"]
+                if is_train
+                else self.vis_processors["eval"]
+            )
+            text_processor = (
+                self.text_processors["train"]
+                if is_train
+                else self.text_processors["eval"]
+            )
+            # annotation path
+            ann_paths = ann_info.get(split).storage
+            if isinstance(ann_paths, str):
+                ann_paths = [ann_paths]
+            abs_ann_paths = []
+            for ann_path in ann_paths:
+                if not os.path.isabs(ann_path):
+                    ann_path = utils.get_cache_path(ann_path)
+                abs_ann_paths.append(ann_path)
+            ann_paths = abs_ann_paths
+            # visual data storage path
+            vis_path = os.path.join(vis_info.storage, split)
+            if not os.path.isabs(vis_path):
+                # vis_path = os.path.join(utils.get_cache_path(), vis_path)
+                vis_path = utils.get_cache_path(vis_path)
+            if not os.path.exists(vis_path):
+                warnings.warn("storage path {} does not exist.".format(vis_path))
+            # create datasets
+            dataset_cls = self.train_dataset_cls if is_train else self.eval_dataset_cls
+            datasets[split] = dataset_cls(
+                vis_processor=vis_processor,
+                text_processor=text_processor,
+                ann_paths=ann_paths,
+                vis_root=vis_path,
+            )
+        return datasets
+def load_dataset_config(cfg_path):
+    cfg = OmegaConf.load(cfg_path).datasets
+    cfg = cfg[list(cfg.keys())[0]]
+    return cfg