Spaces:

wenkai
/

FAPM_demo

Running on Zero

App Files Files Community

wenkai commited on Jun 24, 2024

Commit

f7be0e2

verified ·

1 Parent(s): 1a7e2de

Upload 15 files

Browse files

Files changed (15) hide show

FAPM_inference.py +86 -0
LICENSE +21 -0
README.md +88 -13
app.py +53 -0
blip2_eval_example.py +27 -0
evaluate.py +92 -0
requirements.txt +31 -0
salesforce_lavis.egg-info/PKG-INFO +336 -0
salesforce_lavis.egg-info/SOURCES.txt +715 -0
salesforce_lavis.egg-info/dependency_links.txt +1 -0
salesforce_lavis.egg-info/not-zip-safe +1 -0
salesforce_lavis.egg-info/requires.txt +29 -0
salesforce_lavis.egg-info/top_level.txt +10 -0
setup.py +36 -0
train.py +103 -0

FAPM_inference.py ADDED Viewed

	@@ -0,0 +1,86 @@

+import torch
+import torch.nn as nn
+import pandas as pd
+import torch.nn.functional as F
+from lavis.models.protein_models.protein_function_opt import Blip2ProteinMistral
+# from lavis.models.base_model import FAPMConfig
+# from lavis.models.blip2_models.blip2_opt import Blip2ProteinOPT
+import random
+from lavis.models.base_model import FAPMConfig
+import argparse
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='FAPM')
+    parser.add_argument('--model_path', type=str, help='Dataset path')
+    parser.add_argument('--example_path', type=str, help='Example protein path')
+    parser.add_argument('--device', type=str, default='cuda', help='Which gpu to use if any (default: cuda)')
+    parser.add_argument('--prompt', type=str, default='none', help='Input prompt for protein function prediction')
+    parser.add_argument('--ground_truth', type=str, default='none', help='Ground truth function')
+    parser.add_argument('--prop', type=bool, default=True, help='Match and propagation the predictions of language model')
+    args = parser.parse_args()
+    prop = args.prop
+    # model = Blip2ProteinOPT(config=FAPMConfig(), esm_size='3b')
+    # model.load_checkpoint('/cluster/home/wenkai/LAVIS/lavis/output/BLIP2/Pretrain_stage2/20240327081/checkpoint_2.pth')
+    model = Blip2ProteinMistral(config=FAPMConfig(), esm_size='3b')
+    model.load_checkpoint(args.model_path)
+    model.to(args.device)
+    # esm_emb = torch.load('/cluster/home/wenkai/LAVIS/data/pretrain/ipr_domain_emb_esm2_3b/Gp49.pt')['representations'][36]
+    esm_emb = torch.load(args.example_path)['representations'][36]
+    esm_emb = F.pad(esm_emb.t(), (0, 1024 - len(esm_emb))).t().to('cuda')
+    samples = {'name': ['P18281'],
+               'image': torch.unsqueeze(esm_emb, dim=0),
+               'text_input': [args.ground_truth],
+               'prompt': [args.prompt]}
+    prediction = model.generate(samples, length_penalty=0., num_beams=15, num_captions=10, temperature=1., repetition_penalty=1.0)
+    print(f"Text Prediction: {prediction}")
+    if prop == True:
+        from data.evaluate_data.utils import Ontology
+        import difflib
+        import re
+        # godb = Ontology(f'/cluster/home/wenkai/LAVIS/data/go1.4-basic.obo', with_rels=True)
+        godb = Ontology(f'data/go1.4-basic.obo', with_rels=True)
+        go_des = pd.read_csv('data/go_descriptions1.4.txt', sep='|', header=None)
+        go_des.columns = ['id', 'text']
+        go_des = go_des.dropna()
+        go_des['id'] = go_des['id'].apply(lambda x: re.sub('_', ':', x))
+        go_obo_set = set(go_des['id'].tolist())
+        go_des['text'] = go_des['text'].apply(lambda x: x.lower())
+        GO_dict = dict(zip(go_des['text'], go_des['id']))
+        Func_dict = dict(zip(go_des['id'], go_des['text']))
+        # terms_mf = pd.read_pickle('/cluster/home/wenkai/deepgo2/data/mf/terms.pkl')
+        terms_mf = pd.read_pickle('data/terms/mf_terms.pkl')
+        choices_mf = [Func_dict[i] for i in list(set(terms_mf['gos']))]
+        choices = {x.lower(): x for x in choices_mf}
+        pred_terms_list = []
+        pred_go_list = []
+        prop_annotations = []
+        for x in prediction:
+            x = [eval(i) for i in x.split('; ')]
+            pred_terms = []
+            pred_go = []
+            annot_set = set()
+            for i in x:
+                txt = i[0]
+                prob = i[1]
+                sim_list = difflib.get_close_matches(txt.lower(), choices, n=1, cutoff=0.9)
+                if len(sim_list) > 0:
+                    pred_terms.append((sim_list[0], prob))
+                    pred_go.append((GO_dict[sim_list[0]], prob))
+                    annot_set |= godb.get_anchestors(GO_dict[sim_list[0]])
+            pred_terms_list.append(pred_terms)
+            pred_go_list.append(pred_go)
+            annots = list(annot_set)
+            prop_annotations.append(annots)
+        print(f"Predictions of GO terms: \n{pred_terms_list} \nPredictions of GO id: \n{pred_go_list} \nPredictions of GO id propgated: \n{prop_annotations}")

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2024 xiangwenkai
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md CHANGED Viewed

@@ -1,13 +1,88 @@
----
-title: FAPM Demo
-emoji: 🚀
-colorFrom: blue
-colorTo: red
-sdk: gradio
-sdk_version: 4.36.1
-app_file: app.py
-pinned: false
-license: mit
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+## Introduction
+<p align="center">
+    <br>
+    <img src="assets/FAPM.png"/>
+    <br>
+<p>
+Huggingface repo: *https://huggingface.co/wenkai/FAPM/*
+## Installation
+1. (Optional) Creating conda environment
+```bash
+conda create -n lavis python=3.8
+conda activate lavis
+```
+2. for development, you may build from source
+```bash
+git clone https://github.com/xiangwenkai/FAPM.git
+cd FAPM
+pip install -e .
+# if needed
+# pip install Biopython
+# pip install fair-esm
+```
+### Datasets
+#### 1.raw dataset
+Raw data are avaliable at *https://ftp.uniprot.org/pub/databases/uniprot/previous_releases/release-2023_04/knowledgebase/*, this file is very large and need to be processed to get its name, sequence, GO label, function description and prompt.
+The domain level protein dataset we used are avaliable at *https://ftp.ebi.ac.uk/pub/databases/interpro/releases/95.0/protein2ipr.dat.gz*
+In this respository, We provide the experimental train/val/test sets of Swiss-Prot, which are avaliable at data/swissprot_exp
+#### 2.ESM2 embeddings
+Source code for ESM2 embeddings generation: *https://github.com/facebookresearch/esm*
+The generation command:
+```bash
+conda activate FAPM
+python esm_scripts/extract.py esm2_t36_3B_UR50D you_path/protein.fasta you_path_to_save_embedding_files --repr_layers 36 --truncation_seq_length 1024 --include per_tok
+```
+Example:
+```
+conda activate FAPM
+python esm_scripts/extract.py esm2_t36_3B_UR50D data/fasta/example.fasta data/emb_esm2_3b --repr_layers 36 --truncation_seq_length 1024 --include per_tok
+```
+The default path to save embedding files is **data/emb_esm2_3b**
+You can refer to *data/fasta/prepare_custom_fasta.py* to prepare your custom fasta data.
+## Pretraining language models
+Source: *https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B*
+## Training
+data config: lavis/configs/datasets/protein/GO_defaults_cap.yaml
+stage1 config: lavis/projects/blip2/train/protein_pretrain_stage1.yaml
+stage1 training command: run_scripts/blip2/train/protein_pretrain_domain_stage1.sh
+stage2 config: lavis/projects/blip2/train/protein_pretrain_stage2.yaml
+stage2 training/finetuning command: run_scripts/blip2/train/protein_pretrain_domain_stage2.sh
+## Trained models
+The models are avaliable at **https://huggingface.co/wenkai/FAPM/tree/main/model**
+You can also download our trained models from google drive: *https://drive.google.com/drive/folders/1aA0eSYxNw3DvrU5GU1Cu-4q2kIxxAGSE?usp=drive_link*
+## Testing
+config: lavis/projects/blip2/eval/caption_protein_eval.yaml
+command: run_scripts/blip2/eval/eval_cap_protein.sh
+## Inference example
+```
+python FAPM_inference.py \
+--model_path model/checkpoint_mf2.pth \
+--example_path data/emb_esm2_3b/P18281.pt \
+--device cuda \
+--prompt Acanthamoeba \
+--prop True
+```

app.py ADDED Viewed

	@@ -0,0 +1,53 @@

+import os
+import torch
+import torch.nn as nn
+import pandas as pd
+import torch.nn.functional as F
+from lavis.models.protein_models.protein_function_opt import Blip2ProteinMistral
+from lavis.models.base_model import FAPMConfig
+import spaces
+import gradio as gr
+# Load the model
+model = Blip2ProteinMistral(config=FAPMConfig(), esm_size='3b')
+model.load_checkpoint("model/checkpoint_mf2.pth")
+model.to('cuda')
+@spaces.GPU
+def generate_caption(protein, prompt):
+    # Process the image and the prompt
+    with open('data/fasta/example.fasta', 'w') as f:
+        f.write('>{}\n'.format("protein_name"))
+        f.write('{}\n'.format(protein.strip()))
+    os.system("python esm_scripts/extract.py esm2_t36_3B_UR50D data/fasta/example.fasta data/emb_esm2_3b --repr_layers 36 --truncation_seq_length 1024 --include per_tok")
+    esm_emb = torch.load("data/emb_esm2_3b/protein_name.pt")['representations'][36]
+    esm_emb = F.pad(esm_emb.t(), (0, 1024 - len(esm_emb))).t().to('cuda')
+    samples = {'name': ['test_protein'],
+               'image': torch.unsqueeze(esm_emb, dim=0),
+               'text_input': ['none'],
+               'prompt': [prompt]}
+    # Generate the output
+    prediction = model.generate(samples, length_penalty=0., num_beams=15, num_captions=10, temperature=1., repetition_penalty=1.0)
+    return prediction
+# Define the FAPM interface
+description = """Quick demonstration of the FAPM model for protein function prediction. Upload an protein sequence to generate a function description. Modify the Prompt to provide the taxonomy information.
+The model used in this app is available at [Hugging Face Model Hub](https://huggingface.co/wenkai/FAPM) and the source code can be found on [GitHub](https://github.com/xiangwenkai/FAPM/tree/main)."""
+iface = gr.Interface(
+    fn=generate_caption,
+    inputs=[gr.Textbox(type="pil", label="Upload sequence"), gr.Textbox(label="Prompt", value="taxonomy prompt")],
+    outputs=gr.Textbox(label="Generated description"),
+    description=description
+)
+# Launch the interface
+iface.launch()

blip2_eval_example.py ADDED Viewed

	@@ -0,0 +1,27 @@

+import torch
+from PIL import Image
+import requests
+from lavis.models import load_model_and_preprocess
+img_url = 'https://storage.googleapis.com/sfr-vision-language-research/LAVIS/assets/merlion.png'
+raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')
+display(raw_image.resize((596, 437)))
+device = torch.device("cuda") if torch.cuda.is_available() else "cpu"
+model, vis_processors, _ = load_model_and_preprocess(
+    name="blip2_opt", model_type="caption_coco_opt2.7b", is_eval=True, device=device
+)
+image = vis_processors["eval"](raw_image).unsqueeze(0).to(device)
+model.generate({"image": image})
+# due to the non-determinstic nature of necleus sampling, you may get different captions.
+model.generate({"image": image}, use_nucleus_sampling=True, num_captions=3)
+model.generate({"image": image, "prompt": "Question: which city is this? Answer:"})
+model.generate({
+    "image": image,
+    "prompt": "Question: which city is this? Answer: singapore. Question: why?"})

evaluate.py ADDED Viewed

	@@ -0,0 +1,92 @@

+"""
+ Copyright (c) 2022, salesforce.com, inc.
+ All rights reserved.
+ SPDX-License-Identifier: BSD-3-Clause
+ For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+"""
+import argparse
+import random
+import numpy as np
+import torch
+import torch.backends.cudnn as cudnn
+import lavis.tasks as tasks
+from lavis.common.config import Config
+from lavis.common.dist_utils import get_rank, init_distributed_mode
+from lavis.common.logger import setup_logger
+from lavis.common.optims import (
+    LinearWarmupCosineLRScheduler,
+    LinearWarmupStepLRScheduler,
+)
+from lavis.common.utils import now
+# imports modules for registration
+from lavis.datasets.builders import *
+from lavis.models import *
+from lavis.processors import *
+from lavis.runners.runner_base import RunnerBase
+from lavis.tasks import *
+def parse_args():
+    parser = argparse.ArgumentParser(description="Training")
+    parser.add_argument("--cfg-path", required=True, help="path to configuration file.")
+    parser.add_argument(
+        "--options",
+        nargs="+",
+        help="override some settings in the used config, the key-value pair "
+        "in xxx=yyy format will be merged into config file (deprecate), "
+        "change to --cfg-options instead.",
+    )
+    args = parser.parse_args()
+    # if 'LOCAL_RANK' not in os.environ:
+    #     os.environ['LOCAL_RANK'] = str(args.local_rank)
+    return args
+def setup_seeds(config):
+    seed = config.run_cfg.seed + get_rank()
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    cudnn.benchmark = False
+    cudnn.deterministic = True
+def main():
+    # allow auto-dl completes on main process without timeout when using NCCL backend.
+    # os.environ["NCCL_BLOCKING_WAIT"] = "1"
+    # set before init_distributed_mode() to ensure the same job_id shared across all ranks.
+    job_id = now()
+    cfg = Config(parse_args())
+    init_distributed_mode(cfg.run_cfg)
+    setup_seeds(cfg)
+    # set after init_distributed_mode() to only log on master.
+    setup_logger()
+    cfg.pretty_print()
+    task = tasks.setup_task(cfg)
+    datasets = task.build_datasets(cfg)
+    model = task.build_model(cfg)
+    # model.generate({"image": ['MMSKLGVLLTICLLLFPLTAVPLDGDQPADQPAERKQNEQHPLFDQKRGCCRWPCPSRCGMARCCSS','MMSKQPAERKQNEQHPLFDQKRGCCRWPCPSRCGMARCCSS']})
+    runner = RunnerBase(cfg=cfg, job_id=job_id, task=task, model=model, datasets=datasets)
+    runner.evaluate(skip_reload=True)
+if __name__ == "__main__":
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,31 @@

+contexttimer
+decord
+diffusers<=0.16.0
+einops>=0.4.1
+fairscale==0.4.4
+ftfy
+iopath
+ipython
+omegaconf
+opencv-python-headless==4.5.5.64
+opendatasets
+packaging
+pandas
+plotly
+pre-commit
+pycocoevalcap
+pycocotools
+python-magic
+scikit-image
+sentencepiece
+spacy
+streamlit
+timm==0.4.12
+torch>=1.10.0
+torchvision
+tqdm
+transformers>=4.28.0
+webdataset
+wheel
+Biopython
+fair-esm

salesforce_lavis.egg-info/PKG-INFO ADDED Viewed

	@@ -0,0 +1,336 @@

+Metadata-Version: 2.1
+Name: salesforce-lavis
+Version: 1.0.1
+Summary: LAVIS - A One-stop Library for Language-Vision Intelligence
+Author: Dongxu Li, Junnan Li, Hung Le, Guangsen Wang, Silvio Savarese, Steven C.H. Hoi
+License: 3-Clause BSD
+Keywords: Vision-Language,Multimodal,Image Captioning,Generative AI,Deep Learning,Library,PyTorch
+Requires-Python: >=3.7.0
+Description-Content-Type: text/markdown
+License-File: LICENSE.txt
+<p align="center">
+    <br>
+    <img src="docs/_static/logo_final.png" width="400"/>
+    <br>
+<p>
+<div align="center">
+  <a href="https://github.com/salesforce/LAVIS/releases"><img alt="Latest Release" src="https://img.shields.io/github/release/salesforce/LAVIS.svg" /></a>
+  <a href="https://opensource.salesforce.com/LAVIS/index.html">
+  <img alt="docs" src="https://github.com/salesforce/LAVIS/actions/workflows/docs.yaml/badge.svg"/>
+  <a href="https://opensource.org/licenses/BSD-3-Clause">
+  <img alt="license" src="https://img.shields.io/badge/License-BSD_3--Clause-blue.svg"/>
+  </a>
+  <a href="https://pepy.tech/project/salesforce-lavis">
+  <img alt="Downloads" src="https://pepy.tech/badge/salesforce-lavis">
+  </a>
+</div>
+<div align="center">
+<a href="https://opensource.salesforce.com/LAVIS//latest/benchmark.html">Benchmark</a>,
+<a href="https://arxiv.org/abs/2209.09019">Technical Report</a>,
+<a href="https://opensource.salesforce.com/LAVIS//latest/index.html">Documentation</a>,
+<a href="https://github.com/salesforce/LAVIS/tree/main/examples">Jupyter Notebook Examples</a>,
+<a href="https://blog.salesforceairesearch.com/lavis-language-vision-library/">Blog</a>
+</div>
+# LAVIS - A Library for Language-Vision Intelligence
+## What's New: 🎉
+  * [Model Release] July 2023, released implementation of **BLIP-Diffusion** <br>
+  [Paper](https://arxiv.org/abs/2305.06500), [Project Page](https://github.com/salesforce/LAVIS/tree/main/projects/blip-diffusion), [Website](https://dxli94.github.io/BLIP-Diffusion-website/)
+  > A text-to-image generation model that trains 20x than DreamBooth. Also facilitates zero-shot subject-driven generation and editing.
+  * [Model Release] May 2023, released implementation of **InstructBLIP** <br>
+  [Paper](https://arxiv.org/abs/2305.06500), [Project Page](https://github.com/salesforce/LAVIS/tree/main/projects/instructblip)
+  > A new vision-language instruction-tuning framework using BLIP-2 models, achieving state-of-the-art zero-shot generalization performance on a wide range of vision-language tasks.
+  * [Model Release] Jan 2023, released implementation of **BLIP-2** <br>
+  [Paper](https://arxiv.org/abs/2301.12597), [Project Page](https://github.com/salesforce/LAVIS/tree/main/projects/blip2), [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/salesforce/LAVIS/blob/main/examples/blip2_instructed_generation.ipynb)
+  > A generic and efficient pre-training strategy that easily harvests development of pretrained vision models and large language models (LLMs) for vision-language pretraining. BLIP-2 beats Flamingo on zero-shot VQAv2 (**65.0** vs **56.3**), establishing new state-of-the-art on zero-shot captioning (on NoCaps **121.6** CIDEr score vs previous best **113.2**). In addition, equipped with powerful LLMs (e.g. OPT, FlanT5), BLIP-2 also unlocks the new **zero-shot instructed vision-to-language generation** capabilities for various interesting applications!
+  * Jan 2023, LAVIS is now available on [PyPI](https://pypi.org/project/salesforce-lavis/) for installation!
+  * [Model Release] Dec 2022, released implementation of **Img2LLM-VQA** (**CVPR 2023**, _"From Images to Textual Prompts: Zero-shot VQA with Frozen Large Language Models"_, by Jiaxian Guo et al) <br>
+  [Paper](https://arxiv.org/pdf/2212.10846.pdf), [Project Page](https://github.com/salesforce/LAVIS/tree/main/projects/img2llm-vqa), [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/salesforce/LAVIS/blob/main/projects/img2llm-vqa/img2llm_vqa.ipynb)
+  > A plug-and-play module that enables off-the-shelf use of Large Language Models (LLMs) for visual question answering (VQA). Img2LLM-VQA surpasses Flamingo on zero-shot VQA on VQAv2 (61.9 vs 56.3), while in contrast requiring no end-to-end training!
+  * [Model Release] Oct 2022, released implementation of **PNP-VQA** (**EMNLP Findings 2022**, _"Plug-and-Play VQA: Zero-shot VQA by Conjoining Large Pretrained Models with Zero Training"_, by Anthony T.M.H. et al), <br>
+  [Paper](https://arxiv.org/abs/2210.08773), [Project Page](https://github.com/salesforce/LAVIS/tree/main/projects/pnp-vqa), [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/salesforce/LAVIS/blob/main/projects/pnp-vqa/pnp_vqa.ipynb))
+  >  A modular zero-shot VQA framework that requires no PLMs training, achieving SoTA zero-shot VQA performance.
+## Table of Contents
+  - [Introduction](#introduction)
+  - [Installation](#installation)
+  - [Getting Started](#getting-started)
+    - [Model Zoo](#model-zoo)
+    - [Image Captioning](#image-captioning)
+    - [Visual question answering (VQA)](#visual-question-answering-vqa)
+    - [Unified Feature Extraction Interface](#unified-feature-extraction-interface)
+    - [Load Datasets](#load-datasets)
+  - [Jupyter Notebook Examples](#jupyter-notebook-examples)
+  - [Resources and Tools](#resources-and-tools)
+  - [Documentations](#documentations)
+  - [Ethical and Responsible Use](#ethical-and-responsible-use)
+  - [Technical Report and Citing LAVIS](#technical-report-and-citing-lavis)
+  - [License](#license)
+## Introduction
+LAVIS is a Python deep learning library for LAnguage-and-VISion intelligence research and applications. This library aims to provide engineers and researchers with a one-stop solution to rapidly develop models for their specific multimodal scenarios, and benchmark them across standard and customized datasets.
+It features a unified interface design to access
+- **10+** tasks
+(retrieval, captioning, visual question answering, multimodal classification etc.);
+- **20+** datasets (COCO, Flickr, Nocaps, Conceptual
+Commons, SBU, etc.);
+- **30+** pretrained weights of state-of-the-art foundation language-vision models and their task-specific adaptations, including [ALBEF](https://arxiv.org/pdf/2107.07651.pdf),
+[BLIP](https://arxiv.org/pdf/2201.12086.pdf), [ALPRO](https://arxiv.org/pdf/2112.09583.pdf), [CLIP](https://arxiv.org/pdf/2103.00020.pdf).
+<p align="center">
+    <br>
+    <img src="assets/demo-6.png"/>
+    <br>
+<p>
+Key features of LAVIS include:
+- **Unified and Modular Interface**: facilitating to easily leverage and repurpose existing modules (datasets, models, preprocessors), also to add new modules.
+- **Easy Off-the-shelf Inference and Feature Extraction**: readily available pre-trained models let you take advantage of state-of-the-art multimodal understanding and generation capabilities on your own data.
+- **Reproducible Model Zoo and Training Recipes**: easily replicate and extend state-of-the-art models on existing and new tasks.
+- **Dataset Zoo and Automatic Downloading Tools**: it can be a hassle to prepare the many language-vision datasets. LAVIS provides automatic downloading scripts to help prepare a large variety of datasets and their annotations.
+The following table shows the supported tasks, datasets and models in our library. This is a continuing effort and we are working on further growing the list.
+|                  Tasks                   |     Supported Models     |             Supported Datasets             |
+| :--------------------------------------: | :----------------------: | :----------------------------------------: |
+|         Image-text Pre-training          |       ALBEF, BLIP        | COCO, VisualGenome, SBU ConceptualCaptions |
+|           Image-text Retrieval           |    ALBEF, BLIP, CLIP     |              COCO, Flickr30k               |
+|           Text-image Retrieval           |    ALBEF, BLIP, CLIP     |              COCO, Flickr30k               |
+|        Visual Question Answering         |       ALBEF, BLIP        |           VQAv2, OKVQA, A-OKVQA            |
+|             Image Captioning             |           BLIP           |                COCO, NoCaps                |
+|           Image Classification           |           CLIP           |                  ImageNet                  |
+| Natural Language Visual Reasoning (NLVR) |       ALBEF, BLIP        |                   NLVR2                    |
+|          Visual Entailment (VE)          |          ALBEF           |                  SNLI-VE                   |
+|             Visual Dialogue              |           BLIP           |                  VisDial                   |
+|           Video-text Retrieval           |       BLIP, ALPRO        |               MSRVTT, DiDeMo               |
+|           Text-video Retrieval           |       BLIP, ALPRO        |               MSRVTT, DiDeMo               |
+|    Video Question Answering (VideoQA)    |       BLIP, ALPRO        |                MSRVTT, MSVD                |
+|              Video Dialogue              |         VGD-GPT          |                    AVSD                    |
+|      Multimodal Feature Extraction       | ALBEF, CLIP, BLIP, ALPRO |                 customized                 |
+|         Text-to-image Generation         |      [COMING SOON]       |                                            |
+## Installation
+1. (Optional) Creating conda environment
+```bash
+conda create -n lavis python=3.8
+conda activate lavis
+```
+2. install from [PyPI](https://pypi.org/project/salesforce-lavis/)
+```bash
+pip install salesforce-lavis
+```
+3. Or, for development, you may build from source
+```bash
+git clone https://github.com/salesforce/LAVIS.git
+cd LAVIS
+pip install -e .
+```
+## Getting Started
+### Model Zoo
+Model zoo summarizes supported models in LAVIS, to view:
+```python
+from lavis.models import model_zoo
+print(model_zoo)
+# ==================================================
+# Architectures                  Types
+# ==================================================
+# albef_classification           ve
+# albef_feature_extractor        base
+# albef_nlvr                     nlvr
+# albef_pretrain                 base
+# albef_retrieval                coco, flickr
+# albef_vqa                      vqav2
+# alpro_qa                       msrvtt, msvd
+# alpro_retrieval                msrvtt, didemo
+# blip_caption                   base_coco, large_coco
+# blip_classification            base
+# blip_feature_extractor         base
+# blip_nlvr                      nlvr
+# blip_pretrain                  base
+# blip_retrieval                 coco, flickr
+# blip_vqa                       vqav2, okvqa, aokvqa
+# clip_feature_extractor         ViT-B-32, ViT-B-16, ViT-L-14, ViT-L-14-336, RN50
+# clip                           ViT-B-32, ViT-B-16, ViT-L-14, ViT-L-14-336, RN50
+# gpt_dialogue                   base
+```
+Let’s see how to use models in LAVIS to perform inference on example data. We first load a sample image from local.
+```python
+import torch
+from PIL import Image
+# setup device to use
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# load sample image
+raw_image = Image.open("docs/_static/merlion.png").convert("RGB")
+```
+This example image shows [Merlion park](https://en.wikipedia.org/wiki/Merlion) ([source](https://theculturetrip.com/asia/singapore/articles/what-exactly-is-singapores-merlion-anyway/)), a landmark in Singapore.
+### Image Captioning
+In this example, we use the BLIP model to generate a caption for the image. To make inference even easier, we also associate each
+pre-trained model with its preprocessors (transforms), accessed via ``load_model_and_preprocess()``.
+```python
+import torch
+from lavis.models import load_model_and_preprocess
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# loads BLIP caption base model, with finetuned checkpoints on MSCOCO captioning dataset.
+# this also loads the associated image processors
+model, vis_processors, _ = load_model_and_preprocess(name="blip_caption", model_type="base_coco", is_eval=True, device=device)
+# preprocess the image
+# vis_processors stores image transforms for "train" and "eval" (validation / testing / inference)
+image = vis_processors["eval"](raw_image).unsqueeze(0).to(device)
+# generate caption
+model.generate({"image": image})
+# ['a large fountain spewing water into the air']
+```
+### Visual question answering (VQA)
+BLIP model is able to answer free-form questions about images in natural language.
+To access the VQA model, simply replace the ``name`` and ``model_type`` arguments
+passed to ``load_model_and_preprocess()``.
+```python
+from lavis.models import load_model_and_preprocess
+model, vis_processors, txt_processors = load_model_and_preprocess(name="blip_vqa", model_type="vqav2", is_eval=True, device=device)
+# ask a random question.
+question = "Which city is this photo taken?"
+image = vis_processors["eval"](raw_image).unsqueeze(0).to(device)
+question = txt_processors["eval"](question)
+model.predict_answers(samples={"image": image, "text_input": question}, inference_method="generate")
+# ['singapore']
+```
+### Unified Feature Extraction Interface
+LAVIS provides a unified interface to extract features from each architecture.
+To extract features, we load the feature extractor variants of each model.
+The multimodal feature can be used for multimodal classification.
+The low-dimensional unimodal features can be used to compute cross-modal similarity.
+```python
+from lavis.models import load_model_and_preprocess
+model, vis_processors, txt_processors = load_model_and_preprocess(name="blip_feature_extractor", model_type="base", is_eval=True, device=device)
+caption = "a large fountain spewing water into the air"
+image = vis_processors["eval"](raw_image).unsqueeze(0).to(device)
+text_input = txt_processors["eval"](caption)
+sample = {"image": image, "text_input": [text_input]}
+features_multimodal = model.extract_features(sample)
+print(features_multimodal.multimodal_embeds.shape)
+# torch.Size([1, 12, 768]), use features_multimodal[:,0,:] for multimodal classification tasks
+features_image = model.extract_features(sample, mode="image")
+features_text = model.extract_features(sample, mode="text")
+print(features_image.image_embeds.shape)
+# torch.Size([1, 197, 768])
+print(features_text.text_embeds.shape)
+# torch.Size([1, 12, 768])
+# low-dimensional projected features
+print(features_image.image_embeds_proj.shape)
+# torch.Size([1, 197, 256])
+print(features_text.text_embeds_proj.shape)
+# torch.Size([1, 12, 256])
+similarity = features_image.image_embeds_proj[:,0,:] @ features_text.text_embeds_proj[:,0,:].t()
+print(similarity)
+# tensor([[0.2622]])
+```
+### Load Datasets
+LAVIS inherently supports a wide variety of common language-vision datasets by providing [automatic download tools](https://opensource.salesforce.com/LAVIS//latest/benchmark) to help download and organize these datasets. After downloading, to load the datasets, use the following code:
+```python
+from lavis.datasets.builders import dataset_zoo
+dataset_names = dataset_zoo.get_names()
+print(dataset_names)
+# ['aok_vqa', 'coco_caption', 'coco_retrieval', 'coco_vqa', 'conceptual_caption_12m',
+#  'conceptual_caption_3m', 'didemo_retrieval', 'flickr30k', 'imagenet', 'laion2B_multi',
+#  'msrvtt_caption', 'msrvtt_qa', 'msrvtt_retrieval', 'msvd_caption', 'msvd_qa', 'nlvr',
+#  'nocaps', 'ok_vqa', 'sbu_caption', 'snli_ve', 'vatex_caption', 'vg_caption', 'vg_vqa']
+```
+After downloading the images, we can use ``load_dataset()`` to obtain the dataset.
+```python
+from lavis.datasets.builders import load_dataset
+coco_dataset = load_dataset("coco_caption")
+print(coco_dataset.keys())
+# dict_keys(['train', 'val', 'test'])
+print(len(coco_dataset["train"]))
+# 566747
+print(coco_dataset["train"][0])
+# {'image': <PIL.Image.Image image mode=RGB size=640x480>,
+#  'text_input': 'A woman wearing a net on her head cutting a cake. ',
+#  'image_id': 0}
+```
+If you already host a local copy of the dataset, you can pass in the ``vis_path`` argument to change the default location to load images.
+```python
+coco_dataset = load_dataset("coco_caption", vis_path=YOUR_LOCAL_PATH)
+```
+## Jupyter Notebook Examples
+See [examples](https://github.com/salesforce/LAVIS/tree/main/examples) for more inference examples, e.g. captioning, feature extraction, VQA, GradCam, zeros-shot classification.
+## Resources and Tools
+- **Benchmarks**: see [Benchmark](https://opensource.salesforce.com/LAVIS//latest/benchmark) for instructions to evaluate and train supported models.
+- **Dataset Download and Browsing**: see [Dataset Download](https://opensource.salesforce.com/LAVIS//latest/benchmark) for instructions and automatic tools on download common language-vision datasets.
+- **GUI Demo**: to run the demo locally, run ```bash run_scripts/run_demo.sh``` and then follow the instruction on the prompts to view in browser. A web demo is coming soon.
+## Documentations
+For more details and advanced usages, please refer to
+[documentation](https://opensource.salesforce.com/LAVIS//latest/index.html#).
+## Ethical and Responsible Use
+We note that models in LAVIS provide no guarantees on their multimodal abilities; incorrect or biased predictions may be observed. In particular, the datasets and pretrained models utilized in LAVIS may contain socioeconomic biases which could result in misclassification and other unwanted behaviors such as offensive or inappropriate speech. We strongly recommend that users review the pre-trained models and overall system in LAVIS before practical adoption. We plan to improve the library by investigating and mitigating these potential biases and
+inappropriate behaviors in the future.
+## Technical Report and Citing LAVIS
+You can find more details in our [technical report](https://arxiv.org/abs/2209.09019).
+If you're using LAVIS in your research or applications, please cite using this BibTeX:
+```bibtex
+@inproceedings{li-etal-2023-lavis,
+    title = "{LAVIS}: A One-stop Library for Language-Vision Intelligence",
+    author = "Li, Dongxu  and
+      Li, Junnan  and
+      Le, Hung  and
+      Wang, Guangsen  and
+      Savarese, Silvio  and
+      Hoi, Steven C.H.",
+    booktitle = "Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 3: System Demonstrations)",
+    month = jul,
+    year = "2023",
+    address = "Toronto, Canada",
+    publisher = "Association for Computational Linguistics",
+    url = "https://aclanthology.org/2023.acl-demo.3",
+    pages = "31--41",
+    abstract = "We introduce LAVIS, an open-source deep learning library for LAnguage-VISion research and applications. LAVIS aims to serve as a one-stop comprehensive library that brings recent advancements in the language-vision field accessible for researchers and practitioners, as well as fertilizing future research and development. It features a unified interface to easily access state-of-the-art image-language, video-language models and common datasets. LAVIS supports training, evaluation and benchmarking on a rich variety of tasks, including multimodal classification, retrieval, captioning, visual question answering, dialogue and pre-training. In the meantime, the library is also highly extensible and configurable, facilitating future development and customization. In this technical report, we describe design principles, key components and functionalities of the library, and also present benchmarking results across common language-vision tasks.",
+}
+}
+```
+## Contact us
+If you have any questions, comments or suggestions, please do not hesitate to contact us at lavis@salesforce.com.
+## License
+[BSD 3-Clause License](LICENSE.txt)

salesforce_lavis.egg-info/SOURCES.txt ADDED Viewed

	@@ -0,0 +1,715 @@

+LICENSE.txt
+MANIFEST.in
+README.md
+pyproject.toml
+requirements.txt
+setup.py
+app/__init__.py
+app/calculate_coco_features.py
+app/caption.py
+app/classification.py
+app/dataset_browser.py
+app/image_text_match.py
+app/main.py
+app/multimodal_search.py
+app/multipage.py
+app/text_localization.py
+app/utils.py
+app/vqa.py
+data/prepare_go_cls_protein_function_data.py
+data/prepare_go_cls_protein_function_data_reviewed.py
+docs/conf.py
+lavis/__init__.py
+lavis/common/config.py
+lavis/common/dist_utils.py
+lavis/common/gradcam.py
+lavis/common/logger.py
+lavis/common/optims.py
+lavis/common/registry.py
+lavis/common/utils.py
+lavis/common/annotator/util.py
+lavis/common/annotator/canny/__init__.py
+lavis/common/annotator/hed/__init__.py
+lavis/common/annotator/midas/__init__.py
+lavis/common/annotator/midas/api.py
+lavis/common/annotator/midas/utils.py
+lavis/common/annotator/midas/midas/__init__.py
+lavis/common/annotator/midas/midas/base_model.py
+lavis/common/annotator/midas/midas/blocks.py
+lavis/common/annotator/midas/midas/dpt_depth.py
+lavis/common/annotator/midas/midas/midas_net.py
+lavis/common/annotator/midas/midas/midas_net_custom.py
+lavis/common/annotator/midas/midas/transforms.py
+lavis/common/annotator/midas/midas/vit.py
+lavis/common/annotator/mlsd/__init__.py
+lavis/common/annotator/mlsd/utils.py
+lavis/common/annotator/mlsd/models/mbv2_mlsd_large.py
+lavis/common/annotator/mlsd/models/mbv2_mlsd_tiny.py
+lavis/common/annotator/openpose/__init__.py
+lavis/common/annotator/openpose/body.py
+lavis/common/annotator/openpose/hand.py
+lavis/common/annotator/openpose/model.py
+lavis/common/annotator/openpose/util.py
+lavis/common/annotator/uniformer/__init__.py
+lavis/common/annotator/uniformer/configs/_base_/default_runtime.py
+lavis/common/annotator/uniformer/configs/_base_/datasets/ade20k.py
+lavis/common/annotator/uniformer/configs/_base_/datasets/chase_db1.py
+lavis/common/annotator/uniformer/configs/_base_/datasets/cityscapes.py
+lavis/common/annotator/uniformer/configs/_base_/datasets/cityscapes_769x769.py
+lavis/common/annotator/uniformer/configs/_base_/datasets/drive.py
+lavis/common/annotator/uniformer/configs/_base_/datasets/hrf.py
+lavis/common/annotator/uniformer/configs/_base_/datasets/pascal_context.py
+lavis/common/annotator/uniformer/configs/_base_/datasets/pascal_context_59.py
+lavis/common/annotator/uniformer/configs/_base_/datasets/pascal_voc12.py
+lavis/common/annotator/uniformer/configs/_base_/datasets/pascal_voc12_aug.py
+lavis/common/annotator/uniformer/configs/_base_/datasets/stare.py
+lavis/common/annotator/uniformer/configs/_base_/models/ann_r50-d8.py
+lavis/common/annotator/uniformer/configs/_base_/models/apcnet_r50-d8.py
+lavis/common/annotator/uniformer/configs/_base_/models/ccnet_r50-d8.py
+lavis/common/annotator/uniformer/configs/_base_/models/cgnet.py
+lavis/common/annotator/uniformer/configs/_base_/models/danet_r50-d8.py
+lavis/common/annotator/uniformer/configs/_base_/models/deeplabv3_r50-d8.py
+lavis/common/annotator/uniformer/configs/_base_/models/deeplabv3_unet_s5-d16.py
+lavis/common/annotator/uniformer/configs/_base_/models/deeplabv3plus_r50-d8.py
+lavis/common/annotator/uniformer/configs/_base_/models/dmnet_r50-d8.py
+lavis/common/annotator/uniformer/configs/_base_/models/dnl_r50-d8.py
+lavis/common/annotator/uniformer/configs/_base_/models/emanet_r50-d8.py
+lavis/common/annotator/uniformer/configs/_base_/models/encnet_r50-d8.py
+lavis/common/annotator/uniformer/configs/_base_/models/fast_scnn.py
+lavis/common/annotator/uniformer/configs/_base_/models/fcn_hr18.py
+lavis/common/annotator/uniformer/configs/_base_/models/fcn_r50-d8.py
+lavis/common/annotator/uniformer/configs/_base_/models/fcn_unet_s5-d16.py
+lavis/common/annotator/uniformer/configs/_base_/models/fpn_r50.py
+lavis/common/annotator/uniformer/configs/_base_/models/fpn_uniformer.py
+lavis/common/annotator/uniformer/configs/_base_/models/gcnet_r50-d8.py
+lavis/common/annotator/uniformer/configs/_base_/models/lraspp_m-v3-d8.py
+lavis/common/annotator/uniformer/configs/_base_/models/nonlocal_r50-d8.py
+lavis/common/annotator/uniformer/configs/_base_/models/ocrnet_hr18.py
+lavis/common/annotator/uniformer/configs/_base_/models/ocrnet_r50-d8.py
+lavis/common/annotator/uniformer/configs/_base_/models/pointrend_r50.py
+lavis/common/annotator/uniformer/configs/_base_/models/psanet_r50-d8.py
+lavis/common/annotator/uniformer/configs/_base_/models/pspnet_r50-d8.py
+lavis/common/annotator/uniformer/configs/_base_/models/pspnet_unet_s5-d16.py
+lavis/common/annotator/uniformer/configs/_base_/models/upernet_r50.py
+lavis/common/annotator/uniformer/configs/_base_/models/upernet_uniformer.py
+lavis/common/annotator/uniformer/configs/_base_/schedules/schedule_160k.py
+lavis/common/annotator/uniformer/configs/_base_/schedules/schedule_20k.py
+lavis/common/annotator/uniformer/configs/_base_/schedules/schedule_40k.py
+lavis/common/annotator/uniformer/configs/_base_/schedules/schedule_80k.py
+lavis/common/annotator/uniformer/exp/upernet_global_small/config.py
+lavis/common/annotator/uniformer/exp/upernet_global_small/test_config_g.py
+lavis/common/annotator/uniformer/exp/upernet_global_small/test_config_h32.py
+lavis/common/annotator/uniformer/exp/upernet_global_small/test_config_w32.py
+lavis/common/annotator/uniformer/mmcv/__init__.py
+lavis/common/annotator/uniformer/mmcv/version.py
+lavis/common/annotator/uniformer/mmcv/arraymisc/__init__.py
+lavis/common/annotator/uniformer/mmcv/arraymisc/quantization.py
+lavis/common/annotator/uniformer/mmcv/cnn/__init__.py
+lavis/common/annotator/uniformer/mmcv/cnn/alexnet.py
+lavis/common/annotator/uniformer/mmcv/cnn/builder.py
+lavis/common/annotator/uniformer/mmcv/cnn/resnet.py
+lavis/common/annotator/uniformer/mmcv/cnn/vgg.py
+lavis/common/annotator/uniformer/mmcv/cnn/bricks/__init__.py
+lavis/common/annotator/uniformer/mmcv/cnn/bricks/activation.py
+lavis/common/annotator/uniformer/mmcv/cnn/bricks/context_block.py
+lavis/common/annotator/uniformer/mmcv/cnn/bricks/conv.py
+lavis/common/annotator/uniformer/mmcv/cnn/bricks/conv2d_adaptive_padding.py
+lavis/common/annotator/uniformer/mmcv/cnn/bricks/conv_module.py
+lavis/common/annotator/uniformer/mmcv/cnn/bricks/conv_ws.py
+lavis/common/annotator/uniformer/mmcv/cnn/bricks/depthwise_separable_conv_module.py
+lavis/common/annotator/uniformer/mmcv/cnn/bricks/drop.py
+lavis/common/annotator/uniformer/mmcv/cnn/bricks/generalized_attention.py
+lavis/common/annotator/uniformer/mmcv/cnn/bricks/hsigmoid.py
+lavis/common/annotator/uniformer/mmcv/cnn/bricks/hswish.py
+lavis/common/annotator/uniformer/mmcv/cnn/bricks/non_local.py
+lavis/common/annotator/uniformer/mmcv/cnn/bricks/norm.py
+lavis/common/annotator/uniformer/mmcv/cnn/bricks/padding.py
+lavis/common/annotator/uniformer/mmcv/cnn/bricks/plugin.py
+lavis/common/annotator/uniformer/mmcv/cnn/bricks/registry.py
+lavis/common/annotator/uniformer/mmcv/cnn/bricks/scale.py
+lavis/common/annotator/uniformer/mmcv/cnn/bricks/swish.py
+lavis/common/annotator/uniformer/mmcv/cnn/bricks/transformer.py
+lavis/common/annotator/uniformer/mmcv/cnn/bricks/upsample.py
+lavis/common/annotator/uniformer/mmcv/cnn/bricks/wrappers.py
+lavis/common/annotator/uniformer/mmcv/cnn/utils/__init__.py
+lavis/common/annotator/uniformer/mmcv/cnn/utils/flops_counter.py
+lavis/common/annotator/uniformer/mmcv/cnn/utils/fuse_conv_bn.py
+lavis/common/annotator/uniformer/mmcv/cnn/utils/sync_bn.py
+lavis/common/annotator/uniformer/mmcv/cnn/utils/weight_init.py
+lavis/common/annotator/uniformer/mmcv/engine/__init__.py
+lavis/common/annotator/uniformer/mmcv/engine/test.py
+lavis/common/annotator/uniformer/mmcv/fileio/__init__.py
+lavis/common/annotator/uniformer/mmcv/fileio/file_client.py
+lavis/common/annotator/uniformer/mmcv/fileio/io.py
+lavis/common/annotator/uniformer/mmcv/fileio/parse.py
+lavis/common/annotator/uniformer/mmcv/fileio/handlers/__init__.py
+lavis/common/annotator/uniformer/mmcv/fileio/handlers/base.py
+lavis/common/annotator/uniformer/mmcv/fileio/handlers/json_handler.py
+lavis/common/annotator/uniformer/mmcv/fileio/handlers/pickle_handler.py
+lavis/common/annotator/uniformer/mmcv/fileio/handlers/yaml_handler.py
+lavis/common/annotator/uniformer/mmcv/image/__init__.py
+lavis/common/annotator/uniformer/mmcv/image/colorspace.py
+lavis/common/annotator/uniformer/mmcv/image/geometric.py
+lavis/common/annotator/uniformer/mmcv/image/io.py
+lavis/common/annotator/uniformer/mmcv/image/misc.py
+lavis/common/annotator/uniformer/mmcv/image/photometric.py
+lavis/common/annotator/uniformer/mmcv/ops/__init__.py
+lavis/common/annotator/uniformer/mmcv/ops/assign_score_withk.py
+lavis/common/annotator/uniformer/mmcv/ops/ball_query.py
+lavis/common/annotator/uniformer/mmcv/ops/bbox.py
+lavis/common/annotator/uniformer/mmcv/ops/border_align.py
+lavis/common/annotator/uniformer/mmcv/ops/box_iou_rotated.py
+lavis/common/annotator/uniformer/mmcv/ops/carafe.py
+lavis/common/annotator/uniformer/mmcv/ops/cc_attention.py
+lavis/common/annotator/uniformer/mmcv/ops/contour_expand.py
+lavis/common/annotator/uniformer/mmcv/ops/corner_pool.py
+lavis/common/annotator/uniformer/mmcv/ops/correlation.py
+lavis/common/annotator/uniformer/mmcv/ops/deform_conv.py
+lavis/common/annotator/uniformer/mmcv/ops/deform_roi_pool.py
+lavis/common/annotator/uniformer/mmcv/ops/deprecated_wrappers.py
+lavis/common/annotator/uniformer/mmcv/ops/focal_loss.py
+lavis/common/annotator/uniformer/mmcv/ops/furthest_point_sample.py
+lavis/common/annotator/uniformer/mmcv/ops/fused_bias_leakyrelu.py
+lavis/common/annotator/uniformer/mmcv/ops/gather_points.py
+lavis/common/annotator/uniformer/mmcv/ops/group_points.py
+lavis/common/annotator/uniformer/mmcv/ops/info.py
+lavis/common/annotator/uniformer/mmcv/ops/iou3d.py
+lavis/common/annotator/uniformer/mmcv/ops/knn.py
+lavis/common/annotator/uniformer/mmcv/ops/masked_conv.py
+lavis/common/annotator/uniformer/mmcv/ops/merge_cells.py
+lavis/common/annotator/uniformer/mmcv/ops/modulated_deform_conv.py
+lavis/common/annotator/uniformer/mmcv/ops/multi_scale_deform_attn.py
+lavis/common/annotator/uniformer/mmcv/ops/nms.py
+lavis/common/annotator/uniformer/mmcv/ops/pixel_group.py
+lavis/common/annotator/uniformer/mmcv/ops/point_sample.py
+lavis/common/annotator/uniformer/mmcv/ops/points_in_boxes.py
+lavis/common/annotator/uniformer/mmcv/ops/points_sampler.py
+lavis/common/annotator/uniformer/mmcv/ops/psa_mask.py
+lavis/common/annotator/uniformer/mmcv/ops/roi_align.py
+lavis/common/annotator/uniformer/mmcv/ops/roi_align_rotated.py
+lavis/common/annotator/uniformer/mmcv/ops/roi_pool.py
+lavis/common/annotator/uniformer/mmcv/ops/roiaware_pool3d.py
+lavis/common/annotator/uniformer/mmcv/ops/roipoint_pool3d.py
+lavis/common/annotator/uniformer/mmcv/ops/saconv.py
+lavis/common/annotator/uniformer/mmcv/ops/scatter_points.py
+lavis/common/annotator/uniformer/mmcv/ops/sync_bn.py
+lavis/common/annotator/uniformer/mmcv/ops/three_interpolate.py
+lavis/common/annotator/uniformer/mmcv/ops/three_nn.py
+lavis/common/annotator/uniformer/mmcv/ops/tin_shift.py
+lavis/common/annotator/uniformer/mmcv/ops/upfirdn2d.py
+lavis/common/annotator/uniformer/mmcv/ops/voxelize.py
+lavis/common/annotator/uniformer/mmcv/parallel/__init__.py
+lavis/common/annotator/uniformer/mmcv/parallel/_functions.py
+lavis/common/annotator/uniformer/mmcv/parallel/collate.py
+lavis/common/annotator/uniformer/mmcv/parallel/data_container.py
+lavis/common/annotator/uniformer/mmcv/parallel/data_parallel.py
+lavis/common/annotator/uniformer/mmcv/parallel/distributed.py
+lavis/common/annotator/uniformer/mmcv/parallel/distributed_deprecated.py
+lavis/common/annotator/uniformer/mmcv/parallel/registry.py
+lavis/common/annotator/uniformer/mmcv/parallel/scatter_gather.py
+lavis/common/annotator/uniformer/mmcv/parallel/utils.py
+lavis/common/annotator/uniformer/mmcv/runner/__init__.py
+lavis/common/annotator/uniformer/mmcv/runner/base_module.py
+lavis/common/annotator/uniformer/mmcv/runner/base_runner.py
+lavis/common/annotator/uniformer/mmcv/runner/builder.py
+lavis/common/annotator/uniformer/mmcv/runner/checkpoint.py
+lavis/common/annotator/uniformer/mmcv/runner/default_constructor.py
+lavis/common/annotator/uniformer/mmcv/runner/dist_utils.py
+lavis/common/annotator/uniformer/mmcv/runner/epoch_based_runner.py
+lavis/common/annotator/uniformer/mmcv/runner/fp16_utils.py
+lavis/common/annotator/uniformer/mmcv/runner/iter_based_runner.py
+lavis/common/annotator/uniformer/mmcv/runner/log_buffer.py
+lavis/common/annotator/uniformer/mmcv/runner/priority.py
+lavis/common/annotator/uniformer/mmcv/runner/utils.py
+lavis/common/annotator/uniformer/mmcv/runner/hooks/__init__.py
+lavis/common/annotator/uniformer/mmcv/runner/hooks/checkpoint.py
+lavis/common/annotator/uniformer/mmcv/runner/hooks/closure.py
+lavis/common/annotator/uniformer/mmcv/runner/hooks/ema.py
+lavis/common/annotator/uniformer/mmcv/runner/hooks/evaluation.py
+lavis/common/annotator/uniformer/mmcv/runner/hooks/hook.py
+lavis/common/annotator/uniformer/mmcv/runner/hooks/iter_timer.py
+lavis/common/annotator/uniformer/mmcv/runner/hooks/lr_updater.py
+lavis/common/annotator/uniformer/mmcv/runner/hooks/memory.py
+lavis/common/annotator/uniformer/mmcv/runner/hooks/momentum_updater.py
+lavis/common/annotator/uniformer/mmcv/runner/hooks/optimizer.py
+lavis/common/annotator/uniformer/mmcv/runner/hooks/profiler.py
+lavis/common/annotator/uniformer/mmcv/runner/hooks/sampler_seed.py
+lavis/common/annotator/uniformer/mmcv/runner/hooks/sync_buffer.py
+lavis/common/annotator/uniformer/mmcv/runner/hooks/logger/__init__.py
+lavis/common/annotator/uniformer/mmcv/runner/hooks/logger/base.py
+lavis/common/annotator/uniformer/mmcv/runner/hooks/logger/dvclive.py
+lavis/common/annotator/uniformer/mmcv/runner/hooks/logger/mlflow.py
+lavis/common/annotator/uniformer/mmcv/runner/hooks/logger/neptune.py
+lavis/common/annotator/uniformer/mmcv/runner/hooks/logger/pavi.py
+lavis/common/annotator/uniformer/mmcv/runner/hooks/logger/tensorboard.py
+lavis/common/annotator/uniformer/mmcv/runner/hooks/logger/text.py
+lavis/common/annotator/uniformer/mmcv/runner/hooks/logger/wandb.py
+lavis/common/annotator/uniformer/mmcv/runner/optimizer/__init__.py
+lavis/common/annotator/uniformer/mmcv/runner/optimizer/builder.py
+lavis/common/annotator/uniformer/mmcv/runner/optimizer/default_constructor.py
+lavis/common/annotator/uniformer/mmcv/utils/__init__.py
+lavis/common/annotator/uniformer/mmcv/utils/config.py
+lavis/common/annotator/uniformer/mmcv/utils/env.py
+lavis/common/annotator/uniformer/mmcv/utils/ext_loader.py
+lavis/common/annotator/uniformer/mmcv/utils/logging.py
+lavis/common/annotator/uniformer/mmcv/utils/misc.py
+lavis/common/annotator/uniformer/mmcv/utils/parrots_jit.py
+lavis/common/annotator/uniformer/mmcv/utils/parrots_wrapper.py
+lavis/common/annotator/uniformer/mmcv/utils/path.py
+lavis/common/annotator/uniformer/mmcv/utils/progressbar.py
+lavis/common/annotator/uniformer/mmcv/utils/registry.py
+lavis/common/annotator/uniformer/mmcv/utils/testing.py
+lavis/common/annotator/uniformer/mmcv/utils/timer.py
+lavis/common/annotator/uniformer/mmcv/utils/trace.py
+lavis/common/annotator/uniformer/mmcv/utils/version_utils.py
+lavis/common/annotator/uniformer/mmcv/video/__init__.py
+lavis/common/annotator/uniformer/mmcv/video/io.py
+lavis/common/annotator/uniformer/mmcv/video/optflow.py
+lavis/common/annotator/uniformer/mmcv/video/processing.py
+lavis/common/annotator/uniformer/mmcv/visualization/__init__.py
+lavis/common/annotator/uniformer/mmcv/visualization/color.py
+lavis/common/annotator/uniformer/mmcv/visualization/image.py
+lavis/common/annotator/uniformer/mmcv/visualization/optflow.py
+lavis/common/annotator/uniformer/mmcv_custom/__init__.py
+lavis/common/annotator/uniformer/mmcv_custom/checkpoint.py
+lavis/common/annotator/uniformer/mmseg/apis/__init__.py
+lavis/common/annotator/uniformer/mmseg/apis/inference.py
+lavis/common/annotator/uniformer/mmseg/apis/test.py
+lavis/common/annotator/uniformer/mmseg/apis/train.py
+lavis/common/annotator/uniformer/mmseg/core/__init__.py
+lavis/common/annotator/uniformer/mmseg/core/evaluation/__init__.py
+lavis/common/annotator/uniformer/mmseg/core/evaluation/class_names.py
+lavis/common/annotator/uniformer/mmseg/core/evaluation/eval_hooks.py
+lavis/common/annotator/uniformer/mmseg/core/evaluation/metrics.py
+lavis/common/annotator/uniformer/mmseg/core/seg/__init__.py
+lavis/common/annotator/uniformer/mmseg/core/seg/builder.py
+lavis/common/annotator/uniformer/mmseg/core/seg/sampler/__init__.py
+lavis/common/annotator/uniformer/mmseg/core/seg/sampler/base_pixel_sampler.py
+lavis/common/annotator/uniformer/mmseg/core/seg/sampler/ohem_pixel_sampler.py
+lavis/common/annotator/uniformer/mmseg/core/utils/__init__.py
+lavis/common/annotator/uniformer/mmseg/core/utils/misc.py
+lavis/common/annotator/uniformer/mmseg/datasets/__init__.py
+lavis/common/annotator/uniformer/mmseg/datasets/ade.py
+lavis/common/annotator/uniformer/mmseg/datasets/builder.py
+lavis/common/annotator/uniformer/mmseg/datasets/chase_db1.py
+lavis/common/annotator/uniformer/mmseg/datasets/cityscapes.py
+lavis/common/annotator/uniformer/mmseg/datasets/custom.py
+lavis/common/annotator/uniformer/mmseg/datasets/dataset_wrappers.py
+lavis/common/annotator/uniformer/mmseg/datasets/drive.py
+lavis/common/annotator/uniformer/mmseg/datasets/hrf.py
+lavis/common/annotator/uniformer/mmseg/datasets/pascal_context.py
+lavis/common/annotator/uniformer/mmseg/datasets/stare.py
+lavis/common/annotator/uniformer/mmseg/datasets/voc.py
+lavis/common/annotator/uniformer/mmseg/datasets/pipelines/__init__.py
+lavis/common/annotator/uniformer/mmseg/datasets/pipelines/compose.py
+lavis/common/annotator/uniformer/mmseg/datasets/pipelines/formating.py
+lavis/common/annotator/uniformer/mmseg/datasets/pipelines/loading.py
+lavis/common/annotator/uniformer/mmseg/datasets/pipelines/test_time_aug.py
+lavis/common/annotator/uniformer/mmseg/datasets/pipelines/transforms.py
+lavis/common/annotator/uniformer/mmseg/models/__init__.py
+lavis/common/annotator/uniformer/mmseg/models/builder.py
+lavis/common/annotator/uniformer/mmseg/models/backbones/__init__.py
+lavis/common/annotator/uniformer/mmseg/models/backbones/cgnet.py
+lavis/common/annotator/uniformer/mmseg/models/backbones/fast_scnn.py
+lavis/common/annotator/uniformer/mmseg/models/backbones/hrnet.py
+lavis/common/annotator/uniformer/mmseg/models/backbones/mobilenet_v2.py
+lavis/common/annotator/uniformer/mmseg/models/backbones/mobilenet_v3.py
+lavis/common/annotator/uniformer/mmseg/models/backbones/resnest.py
+lavis/common/annotator/uniformer/mmseg/models/backbones/resnet.py
+lavis/common/annotator/uniformer/mmseg/models/backbones/resnext.py
+lavis/common/annotator/uniformer/mmseg/models/backbones/unet.py
+lavis/common/annotator/uniformer/mmseg/models/backbones/uniformer.py
+lavis/common/annotator/uniformer/mmseg/models/backbones/vit.py
+lavis/common/annotator/uniformer/mmseg/models/decode_heads/__init__.py
+lavis/common/annotator/uniformer/mmseg/models/decode_heads/ann_head.py
+lavis/common/annotator/uniformer/mmseg/models/decode_heads/apc_head.py
+lavis/common/annotator/uniformer/mmseg/models/decode_heads/aspp_head.py
+lavis/common/annotator/uniformer/mmseg/models/decode_heads/cascade_decode_head.py
+lavis/common/annotator/uniformer/mmseg/models/decode_heads/cc_head.py
+lavis/common/annotator/uniformer/mmseg/models/decode_heads/da_head.py
+lavis/common/annotator/uniformer/mmseg/models/decode_heads/decode_head.py
+lavis/common/annotator/uniformer/mmseg/models/decode_heads/dm_head.py
+lavis/common/annotator/uniformer/mmseg/models/decode_heads/dnl_head.py
+lavis/common/annotator/uniformer/mmseg/models/decode_heads/ema_head.py
+lavis/common/annotator/uniformer/mmseg/models/decode_heads/enc_head.py
+lavis/common/annotator/uniformer/mmseg/models/decode_heads/fcn_head.py
+lavis/common/annotator/uniformer/mmseg/models/decode_heads/fpn_head.py
+lavis/common/annotator/uniformer/mmseg/models/decode_heads/gc_head.py
+lavis/common/annotator/uniformer/mmseg/models/decode_heads/lraspp_head.py
+lavis/common/annotator/uniformer/mmseg/models/decode_heads/nl_head.py
+lavis/common/annotator/uniformer/mmseg/models/decode_heads/ocr_head.py
+lavis/common/annotator/uniformer/mmseg/models/decode_heads/point_head.py
+lavis/common/annotator/uniformer/mmseg/models/decode_heads/psa_head.py
+lavis/common/annotator/uniformer/mmseg/models/decode_heads/psp_head.py
+lavis/common/annotator/uniformer/mmseg/models/decode_heads/sep_aspp_head.py
+lavis/common/annotator/uniformer/mmseg/models/decode_heads/sep_fcn_head.py
+lavis/common/annotator/uniformer/mmseg/models/decode_heads/uper_head.py
+lavis/common/annotator/uniformer/mmseg/models/losses/__init__.py
+lavis/common/annotator/uniformer/mmseg/models/losses/accuracy.py
+lavis/common/annotator/uniformer/mmseg/models/losses/cross_entropy_loss.py
+lavis/common/annotator/uniformer/mmseg/models/losses/dice_loss.py
+lavis/common/annotator/uniformer/mmseg/models/losses/lovasz_loss.py
+lavis/common/annotator/uniformer/mmseg/models/losses/utils.py
+lavis/common/annotator/uniformer/mmseg/models/necks/__init__.py
+lavis/common/annotator/uniformer/mmseg/models/necks/fpn.py
+lavis/common/annotator/uniformer/mmseg/models/necks/multilevel_neck.py
+lavis/common/annotator/uniformer/mmseg/models/segmentors/__init__.py
+lavis/common/annotator/uniformer/mmseg/models/segmentors/base.py
+lavis/common/annotator/uniformer/mmseg/models/segmentors/cascade_encoder_decoder.py
+lavis/common/annotator/uniformer/mmseg/models/segmentors/encoder_decoder.py
+lavis/common/annotator/uniformer/mmseg/models/utils/__init__.py
+lavis/common/annotator/uniformer/mmseg/models/utils/drop.py
+lavis/common/annotator/uniformer/mmseg/models/utils/inverted_residual.py
+lavis/common/annotator/uniformer/mmseg/models/utils/make_divisible.py
+lavis/common/annotator/uniformer/mmseg/models/utils/res_layer.py
+lavis/common/annotator/uniformer/mmseg/models/utils/se_layer.py
+lavis/common/annotator/uniformer/mmseg/models/utils/self_attention_block.py
+lavis/common/annotator/uniformer/mmseg/models/utils/up_conv_block.py
+lavis/common/annotator/uniformer/mmseg/models/utils/weight_init.py
+lavis/common/annotator/uniformer/mmseg/ops/__init__.py
+lavis/common/annotator/uniformer/mmseg/ops/encoding.py
+lavis/common/annotator/uniformer/mmseg/ops/wrappers.py
+lavis/common/annotator/uniformer/mmseg/utils/__init__.py
+lavis/common/annotator/uniformer/mmseg/utils/collect_env.py
+lavis/common/annotator/uniformer/mmseg/utils/logger.py
+lavis/common/vqa_tools/__init__.py
+lavis/common/vqa_tools/vqa.py
+lavis/common/vqa_tools/vqa_eval.py
+lavis/configs/default.yaml
+lavis/configs/datasets/aokvqa/defaults.yaml
+lavis/configs/datasets/avsd/defaults_dial.yaml
+lavis/configs/datasets/blip_diffusion_datasets/defaults.yaml
+lavis/configs/datasets/coco/defaults_cap.yaml
+lavis/configs/datasets/coco/defaults_ret.yaml
+lavis/configs/datasets/coco/defaults_vqa.yaml
+lavis/configs/datasets/coco/eval_vqa.yaml
+lavis/configs/datasets/conceptual_caption/defaults_12m.yaml
+lavis/configs/datasets/conceptual_caption/defaults_3m.yaml
+lavis/configs/datasets/didemo/defaults_ret.yaml
+lavis/configs/datasets/flickr30k/defaults.yaml
+lavis/configs/datasets/gqa/balanced_testdev.yaml
+lavis/configs/datasets/gqa/balanced_val.yaml
+lavis/configs/datasets/gqa/defaults.yaml
+lavis/configs/datasets/imagenet/defaults.yaml
+lavis/configs/datasets/laion/defaults_2B_multi.yaml
+lavis/configs/datasets/msrvtt/defaults_cap.yaml
+lavis/configs/datasets/msrvtt/defaults_qa.yaml
+lavis/configs/datasets/msrvtt/defaults_ret.yaml
+lavis/configs/datasets/msvd/defaults_cap.yaml
+lavis/configs/datasets/msvd/defaults_qa.yaml
+lavis/configs/datasets/nlvr/defaults.yaml
+lavis/configs/datasets/nocaps/defaults.yaml
+lavis/configs/datasets/okvqa/defaults.yaml
+lavis/configs/datasets/protein/GO_defaults_cap.yaml
+lavis/configs/datasets/protein/defaults_cap.yaml
+lavis/configs/datasets/sbu_caption/defaults.yaml
+lavis/configs/datasets/snli_ve/defaults.yaml
+lavis/configs/datasets/vatex/defaults_cap.yaml
+lavis/configs/datasets/vg/defaults_caption.yaml
+lavis/configs/datasets/vg/defaults_vqa.yaml
+lavis/configs/models/albef_classification_ve.yaml
+lavis/configs/models/albef_feature_extractor.yaml
+lavis/configs/models/albef_nlvr.yaml
+lavis/configs/models/albef_pretrain_base.yaml
+lavis/configs/models/albef_retrieval_coco.yaml
+lavis/configs/models/albef_retrieval_flickr.yaml
+lavis/configs/models/albef_vqav2.yaml
+lavis/configs/models/alpro_qa_msrvtt.yaml
+lavis/configs/models/alpro_qa_msvd.yaml
+lavis/configs/models/alpro_retrieval_didemo.yaml
+lavis/configs/models/alpro_retrieval_msrvtt.yaml
+lavis/configs/models/bert_config.json
+lavis/configs/models/bert_config_alpro.json
+lavis/configs/models/blip_caption_base_coco.yaml
+lavis/configs/models/blip_caption_large_coco.yaml
+lavis/configs/models/blip_classification_base.yaml
+lavis/configs/models/blip_feature_extractor_base.yaml
+lavis/configs/models/blip_itm_base.yaml
+lavis/configs/models/blip_itm_large.yaml
+lavis/configs/models/blip_nlvr.yaml
+lavis/configs/models/blip_pretrain_base.yaml
+lavis/configs/models/blip_pretrain_large.yaml
+lavis/configs/models/blip_retrieval_coco.yaml
+lavis/configs/models/blip_retrieval_flickr.yaml
+lavis/configs/models/blip_vqa_aokvqa.yaml
+lavis/configs/models/blip_vqa_okvqa.yaml
+lavis/configs/models/blip_vqav2.yaml
+lavis/configs/models/clip_resnet50.yaml
+lavis/configs/models/clip_vit_base16.yaml
+lavis/configs/models/clip_vit_base32.yaml
+lavis/configs/models/clip_vit_large14.yaml
+lavis/configs/models/clip_vit_large14_336.yaml
+lavis/configs/models/gpt_dialogue_base.yaml
+lavis/configs/models/med_config.json
+lavis/configs/models/med_config_albef.json
+lavis/configs/models/med_large_config.json
+lavis/configs/models/blip-diffusion/blip_diffusion_base.yaml
+lavis/configs/models/blip-diffusion/blip_diffusion_controlnet_canny.yaml
+lavis/configs/models/blip-diffusion/blip_diffusion_controlnet_depth.yaml
+lavis/configs/models/blip-diffusion/blip_diffusion_controlnet_hed.yaml
+lavis/configs/models/blip2/blip2_caption_flant5xl.yaml
+lavis/configs/models/blip2/blip2_caption_opt2.7b.yaml
+lavis/configs/models/blip2/blip2_caption_opt6.7b.yaml
+lavis/configs/models/blip2/blip2_coco.yaml
+lavis/configs/models/blip2/blip2_instruct_flant5xl.yaml
+lavis/configs/models/blip2/blip2_instruct_flant5xxl.yaml
+lavis/configs/models/blip2/blip2_instruct_vicuna13b.yaml
+lavis/configs/models/blip2/blip2_instruct_vicuna7b.yaml
+lavis/configs/models/blip2/blip2_pretrain.yaml
+lavis/configs/models/blip2/blip2_pretrain_flant5xl.yaml
+lavis/configs/models/blip2/blip2_pretrain_flant5xl_vitL.yaml
+lavis/configs/models/blip2/blip2_pretrain_flant5xxl.yaml
+lavis/configs/models/blip2/blip2_pretrain_llama7b.yaml
+lavis/configs/models/blip2/blip2_pretrain_opt2.7b.yaml
+lavis/configs/models/blip2/blip2_pretrain_opt6.7b.yaml
+lavis/configs/models/blip2/blip2_pretrain_vitL.yaml
+lavis/configs/models/blip2/pretrain_protein_opt2.7b.yaml
+lavis/configs/models/blip2/pretrain_protein_opt350m.yaml
+lavis/configs/models/clip/RN101-quickgelu.json
+lavis/configs/models/clip/RN101.json
+lavis/configs/models/clip/RN50-quickgelu.json
+lavis/configs/models/clip/RN50.json
+lavis/configs/models/clip/RN50x16.json
+lavis/configs/models/clip/RN50x4.json
+lavis/configs/models/clip/ViT-B-16-plus-240.json
+lavis/configs/models/clip/ViT-B-16-plus.json
+lavis/configs/models/clip/ViT-B-16.json
+lavis/configs/models/clip/ViT-B-32-plus-256.json
+lavis/configs/models/clip/ViT-B-32-quickgelu.json
+lavis/configs/models/clip/ViT-B-32.json
+lavis/configs/models/clip/ViT-H-14.json
+lavis/configs/models/clip/ViT-H-16.json
+lavis/configs/models/clip/ViT-L-14-280.json
+lavis/configs/models/clip/ViT-L-14-336.json
+lavis/configs/models/clip/ViT-L-14.json
+lavis/configs/models/clip/ViT-L-16-320.json
+lavis/configs/models/clip/ViT-L-16.json
+lavis/configs/models/clip/ViT-g-14.json
+lavis/configs/models/clip/timm-efficientnetv2_rw_s.json
+lavis/configs/models/clip/timm-resnet50d.json
+lavis/configs/models/clip/timm-resnetaa50d.json
+lavis/configs/models/clip/timm-resnetblur50.json
+lavis/configs/models/clip/timm-swin_base_patch4_window7_224.json
+lavis/configs/models/clip/timm-vit_base_patch16_224.json
+lavis/configs/models/clip/timm-vit_base_patch32_224.json
+lavis/configs/models/clip/timm-vit_small_patch16_224.json
+lavis/configs/models/img2prompt-vqa/img2prompt_vqa_base.yaml
+lavis/configs/models/pnp-vqa/pnp_vqa_3b.yaml
+lavis/configs/models/pnp-vqa/pnp_vqa_base.yaml
+lavis/configs/models/pnp-vqa/pnp_vqa_large.yaml
+lavis/configs/models/pnp-vqa/unifiedqav2_3b_config.json
+lavis/configs/models/pnp-vqa/unifiedqav2_base_config.json
+lavis/configs/models/pnp-vqa/unifiedqav2_large_config.json
+lavis/datasets/data_utils.py
+lavis/datasets/builders/__init__.py
+lavis/datasets/builders/base_dataset_builder.py
+lavis/datasets/builders/caption_builder.py
+lavis/datasets/builders/classification_builder.py
+lavis/datasets/builders/dialogue_builder.py
+lavis/datasets/builders/image_text_pair_builder.py
+lavis/datasets/builders/imagefolder_builder.py
+lavis/datasets/builders/retrieval_builder.py
+lavis/datasets/builders/text_to_image_generation_builder.py
+lavis/datasets/builders/video_qa_builder.py
+lavis/datasets/builders/vqa_builder.py
+lavis/datasets/datasets/aok_vqa_datasets.py
+lavis/datasets/datasets/avsd_dialogue_datasets.py
+lavis/datasets/datasets/base_dataset.py
+lavis/datasets/datasets/caption_datasets.py
+lavis/datasets/datasets/coco_caption_datasets.py
+lavis/datasets/datasets/coco_vqa_datasets.py
+lavis/datasets/datasets/dataloader_utils.py
+lavis/datasets/datasets/dialogue_datasets.py
+lavis/datasets/datasets/gqa_datasets.py
+lavis/datasets/datasets/image_text_pair_datasets.py
+lavis/datasets/datasets/imagefolder_dataset.py
+lavis/datasets/datasets/laion_dataset.py
+lavis/datasets/datasets/multimodal_classification_datasets.py
+lavis/datasets/datasets/nlvr_datasets.py
+lavis/datasets/datasets/retrieval_datasets.py
+lavis/datasets/datasets/snli_ve_datasets.py
+lavis/datasets/datasets/subject_driven_t2i_dataset.py
+lavis/datasets/datasets/vg_vqa_datasets.py
+lavis/datasets/datasets/video_caption_datasets.py
+lavis/datasets/datasets/video_vqa_datasets.py
+lavis/datasets/datasets/vqa_datasets.py
+lavis/models/__init__.py
+lavis/models/base_model.py
+lavis/models/clip_vit.py
+lavis/models/eva_vit.py
+lavis/models/med.py
+lavis/models/vit.py
+lavis/models/albef_models/__init__.py
+lavis/models/albef_models/albef_classification.py
+lavis/models/albef_models/albef_feature_extractor.py
+lavis/models/albef_models/albef_nlvr.py
+lavis/models/albef_models/albef_outputs.py
+lavis/models/albef_models/albef_pretrain.py
+lavis/models/albef_models/albef_retrieval.py
+lavis/models/albef_models/albef_vqa.py
+lavis/models/alpro_models/__init__.py
+lavis/models/alpro_models/alpro_outputs.py
+lavis/models/alpro_models/alpro_qa.py
+lavis/models/alpro_models/alpro_retrieval.py
+lavis/models/blip2_models/Qformer.py
+lavis/models/blip2_models/__init__.py
+lavis/models/blip2_models/blip2.py
+lavis/models/blip2_models/blip2_image_text_matching.py
+lavis/models/blip2_models/blip2_opt.py
+lavis/models/blip2_models/blip2_qformer.py
+lavis/models/blip2_models/blip2_t5.py
+lavis/models/blip2_models/blip2_t5_instruct.py
+lavis/models/blip2_models/blip2_vicuna_instruct.py
+lavis/models/blip2_models/modeling_llama.py
+lavis/models/blip2_models/modeling_opt.py
+lavis/models/blip2_models/modeling_t5.py
+lavis/models/blip_diffusion_models/__init__.py
+lavis/models/blip_diffusion_models/blip_diffusion.py
+lavis/models/blip_diffusion_models/modeling_ctx_clip.py
+lavis/models/blip_diffusion_models/ptp_utils.py
+lavis/models/blip_diffusion_models/utils.py
+lavis/models/blip_models/__init__.py
+lavis/models/blip_models/blip.py
+lavis/models/blip_models/blip_caption.py
+lavis/models/blip_models/blip_classification.py
+lavis/models/blip_models/blip_feature_extractor.py
+lavis/models/blip_models/blip_image_text_matching.py
+lavis/models/blip_models/blip_nlvr.py
+lavis/models/blip_models/blip_outputs.py
+lavis/models/blip_models/blip_pretrain.py
+lavis/models/blip_models/blip_retrieval.py
+lavis/models/blip_models/blip_vqa.py
+lavis/models/blip_models/nlvr_encoder.py
+lavis/models/clip_models/__init__.py
+lavis/models/clip_models/bpe_simple_vocab_16e6.txt.gz
+lavis/models/clip_models/clip_outputs.py
+lavis/models/clip_models/loss.py
+lavis/models/clip_models/model.py
+lavis/models/clip_models/pretrained.py
+lavis/models/clip_models/timm_model.py
+lavis/models/clip_models/tokenizer.py
+lavis/models/clip_models/transform.py
+lavis/models/clip_models/utils.py
+lavis/models/gpt_models/gpt_dialogue.py
+lavis/models/img2prompt_models/__init__.py
+lavis/models/img2prompt_models/img2prompt_vqa.py
+lavis/models/pnp_vqa_models/__init__.py
+lavis/models/pnp_vqa_models/pnp_unifiedqav2_fid.py
+lavis/models/pnp_vqa_models/pnp_vqa.py
+lavis/models/timesformer/__init__.py
+lavis/models/timesformer/conv2d_same.py
+lavis/models/timesformer/features.py
+lavis/models/timesformer/helpers.py
+lavis/models/timesformer/linear.py
+lavis/models/timesformer/vit.py
+lavis/models/timesformer/vit_utils.py
+lavis/processors/__init__.py
+lavis/processors/alpro_processors.py
+lavis/processors/base_processor.py
+lavis/processors/blip_diffusion_processors.py
+lavis/processors/blip_processors.py
+lavis/processors/clip_processors.py
+lavis/processors/functional_video.py
+lavis/processors/gpt_processors.py
+lavis/processors/randaugment.py
+lavis/processors/transforms_video.py
+lavis/projects/albef/eval/nlvr_eval.yaml
+lavis/projects/albef/eval/ret_coco_eval.yaml
+lavis/projects/albef/eval/ret_flickr30k_eval.yaml
+lavis/projects/albef/eval/snli_ve_eval.yaml
+lavis/projects/albef/eval/vqa_test.yaml
+lavis/projects/albef/eval/vqa_val.yaml
+lavis/projects/albef/train/aokvqa_ft.yaml
+lavis/projects/albef/train/nlvr_ft.yaml
+lavis/projects/albef/train/okvqa_ft.yaml
+lavis/projects/albef/train/pretrain.yaml
+lavis/projects/albef/train/ret_coco_ft.yaml
+lavis/projects/albef/train/ret_flickr30k_ft.yaml
+lavis/projects/albef/train/snli_ve_ft.yaml
+lavis/projects/albef/train/vqa_ft.yaml
+lavis/projects/alpro/eval/didemo_ret_eval.yaml
+lavis/projects/alpro/eval/msrvtt_qa_eval.yaml
+lavis/projects/alpro/eval/msrvtt_ret_eval.yaml
+lavis/projects/alpro/eval/msvd_qa_eval.yaml
+lavis/projects/alpro/train/didemo_ret_ft.yaml
+lavis/projects/alpro/train/msrvtt_qa_ft.yaml
+lavis/projects/alpro/train/msrvtt_retrieval_ft.yaml
+lavis/projects/alpro/train/msvd_qa_ft.yaml
+lavis/projects/blip/coco_cap_ft_iter.yaml
+lavis/projects/blip/eval/aokvqa_eval.yaml
+lavis/projects/blip/eval/caption_coco_eval.yaml
+lavis/projects/blip/eval/caption_coco_eval_large.yaml
+lavis/projects/blip/eval/nlvr_eval.yaml
+lavis/projects/blip/eval/nocaps_eval.yaml
+lavis/projects/blip/eval/okvqa_eval.yaml
+lavis/projects/blip/eval/ret_coco_eval.yaml
+lavis/projects/blip/eval/ret_flickr_eval.yaml
+lavis/projects/blip/eval/vqav2_eval.yaml
+lavis/projects/blip/train/aokvqa_ft.yaml
+lavis/projects/blip/train/caption_coco_ft.yaml
+lavis/projects/blip/train/caption_coco_large_ft.yaml
+lavis/projects/blip/train/nlvr_ft.yaml
+lavis/projects/blip/train/okvqa_ft.yaml
+lavis/projects/blip/train/pretrain_14m.yaml
+lavis/projects/blip/train/retrieval_coco_ft.yaml
+lavis/projects/blip/train/retrieval_flickr_ft.yaml
+lavis/projects/blip/train/vqav2_ft.yaml
+lavis/projects/blip2/eval/caption_coco_flant5xl_eval.yaml
+lavis/projects/blip2/eval/caption_coco_opt2.7b_eval.yaml
+lavis/projects/blip2/eval/caption_coco_opt6.7b_eval.yaml
+lavis/projects/blip2/eval/caption_protein_opt2.7b_eval.yaml
+lavis/projects/blip2/eval/gqa_zeroshot_flant5xl_eval.yaml
+lavis/projects/blip2/eval/okvqa_zeroshot_flant5xl_eval.yaml
+lavis/projects/blip2/eval/ret_coco_eval.yaml
+lavis/projects/blip2/eval/ret_flickr_eval.yaml
+lavis/projects/blip2/eval/vqav2_zeroshot_flant5xl_eval.yaml
+lavis/projects/blip2/eval/vqav2_zeroshot_opt_eval.yaml
+lavis/projects/blip2/train/caption_coco_ft.yaml
+lavis/projects/blip2/train/gptProcessed_test_stage1.yaml
+lavis/projects/blip2/train/pretrain_stage1.yaml
+lavis/projects/blip2/train/pretrain_stage2.yaml
+lavis/projects/blip2/train/retrieval_coco_ft.yaml
+lavis/projects/blip2/train/test_stage1.yaml
+lavis/projects/blip2/train/test_stage2.yaml
+lavis/projects/blip_diffusion/finetune-db-dog.yaml
+lavis/projects/blip_diffusion/finetune-db-pink-dress.yaml
+lavis/projects/blip_diffusion/finetune-db-shein-jacket.yaml
+lavis/projects/blip_diffusion/finetune-db-template.yaml
+lavis/projects/clip/exp_coco_ret_eval.yaml
+lavis/projects/clip/exp_flickr_ret_eval.yaml
+lavis/projects/clip/exp_imnet_zs_eval.yaml
+lavis/projects/gpt/eval/dialogue_avsd_eval.yaml
+lavis/projects/gpt/train/dialogue_avsd_ft.yaml
+lavis/projects/pnp-vqa/eval/gqa_eval.yaml
+lavis/projects/pnp-vqa/eval/gqa_eval_3b.yaml
+lavis/projects/pnp-vqa/eval/gqa_eval_large.yaml
+lavis/projects/pnp-vqa/eval/okvqa_eval.yaml
+lavis/projects/pnp-vqa/eval/okvqa_eval_3b.yaml
+lavis/projects/pnp-vqa/eval/okvqa_eval_large.yaml
+lavis/projects/pnp-vqa/eval/vqav2_eval.yaml
+lavis/projects/pnp-vqa/eval/vqav2_eval_3b.yaml
+lavis/projects/pnp-vqa/eval/vqav2_eval_large.yaml
+lavis/projects/pnp-vqa/eval/vqav2_test_eval.yaml
+lavis/projects/pnp-vqa/eval/vqav2_test_eval_3b.yaml
+lavis/projects/pnp-vqa/eval/vqav2_test_eval_large.yaml
+lavis/runners/__init__.py
+lavis/runners/runner_base.py
+lavis/runners/runner_iter.py
+lavis/tasks/__init__.py
+lavis/tasks/base_task.py
+lavis/tasks/captioning.py
+lavis/tasks/dialogue.py
+lavis/tasks/image_text_pretrain.py
+lavis/tasks/multimodal_classification.py
+lavis/tasks/retrieval.py
+lavis/tasks/text_to_image_generation.py
+lavis/tasks/vqa.py
+lavis/tasks/vqa_reading_comprehension.py
+projects/img2llm-vqa/img2llm_vqa.py
+projects/instructblip/run_demo.py
+salesforce_lavis.egg-info/PKG-INFO
+salesforce_lavis.egg-info/SOURCES.txt
+salesforce_lavis.egg-info/dependency_links.txt
+salesforce_lavis.egg-info/not-zip-safe
+salesforce_lavis.egg-info/requires.txt
+salesforce_lavis.egg-info/top_level.txt

salesforce_lavis.egg-info/dependency_links.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+

salesforce_lavis.egg-info/not-zip-safe ADDED Viewed

	@@ -0,0 +1 @@


1	+

salesforce_lavis.egg-info/requires.txt ADDED Viewed

	@@ -0,0 +1,29 @@

+contexttimer
+decord
+diffusers<=0.16.0
+einops>=0.4.1
+fairscale==0.4.4
+ftfy
+iopath
+ipython
+omegaconf
+opencv-python-headless==4.5.5.64
+opendatasets
+packaging
+pandas
+plotly
+pre-commit
+pycocoevalcap
+pycocotools
+python-magic
+scikit-image
+sentencepiece
+spacy
+streamlit
+timm==0.4.12
+torch>=1.10.0
+torchvision
+tqdm
+transformers>=4.28.0
+webdataset
+wheel

salesforce_lavis.egg-info/top_level.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+app
+assets
+data
+dataset_card
+docs
+examples
+lavis
+projects
+protein
+run_scripts

setup.py ADDED Viewed

	@@ -0,0 +1,36 @@

+"""
+ Copyright (c) 2022, salesforce.com, inc.
+ All rights reserved.
+ SPDX-License-Identifier: BSD-3-Clause
+ For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+"""
+from setuptools import setup, find_namespace_packages
+import platform
+DEPENDENCY_LINKS = []
+if platform.system() == "Windows":
+    DEPENDENCY_LINKS.append("https://download.pytorch.org/whl/torch_stable.html")
+def fetch_requirements(filename):
+    with open(filename) as f:
+        return [ln.strip() for ln in f.read().split("\n")]
+setup(
+    name="salesforce-lavis",
+    version="1.0.1",
+    author="Dongxu Li, Junnan Li, Hung Le, Guangsen Wang, Silvio Savarese, Steven C.H. Hoi",
+    description="LAVIS - A One-stop Library for Language-Vision Intelligence",
+    long_description=open("README.md", "r", encoding="utf-8").read(),
+    long_description_content_type="text/markdown",
+    keywords="Vision-Language, Multimodal, Image Captioning, Generative AI, Deep Learning, Library, PyTorch",
+    license="3-Clause BSD",
+    packages=find_namespace_packages(include="lavis.*"),
+    install_requires=fetch_requirements("requirements.txt"),
+    python_requires=">=3.7.0",
+    include_package_data=True,
+    dependency_links=DEPENDENCY_LINKS,
+    zip_safe=False,
+)

train.py ADDED Viewed

	@@ -0,0 +1,103 @@

+"""
+ Copyright (c) 2022, salesforce.com, inc.
+ All rights reserved.
+ SPDX-License-Identifier: BSD-3-Clause
+ For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+"""
+import argparse
+import os
+import random
+import numpy as np
+import torch
+import torch.backends.cudnn as cudnn
+import lavis.tasks as tasks
+from lavis.common.config import Config
+from lavis.common.dist_utils import get_rank, init_distributed_mode
+from lavis.common.logger import setup_logger
+from lavis.common.optims import (
+    LinearWarmupCosineLRScheduler,
+    LinearWarmupStepLRScheduler,
+)
+from lavis.common.registry import registry
+from lavis.common.utils import now
+# imports modules for registration
+from lavis.datasets.builders import *
+from lavis.models import *
+from lavis.processors import *
+from lavis.runners import *
+from lavis.tasks import *
+def parse_args():
+    parser = argparse.ArgumentParser(description="Training")
+    parser.add_argument("--cfg-path", required=True, help="path to configuration file.")
+    parser.add_argument(
+        "--options",
+        nargs="+",
+        help="override some settings in the used config, the key-value pair "
+        "in xxx=yyy format will be merged into config file (deprecate), "
+        "change to --cfg-options instead.",
+    )
+    args = parser.parse_args()
+    # if 'LOCAL_RANK' not in os.environ:
+    #     os.environ['LOCAL_RANK'] = str(args.local_rank)
+    return args
+def setup_seeds(config):
+    seed = config.run_cfg.seed + get_rank()
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    cudnn.benchmark = False
+    cudnn.deterministic = True
+def get_runner_class(cfg):
+    """
+    Get runner class from config. Default to epoch-based runner.
+    """
+    runner_cls = registry.get_runner_class(cfg.run_cfg.get("runner", "runner_base"))
+    return runner_cls
+def main():
+    # allow auto-dl completes on main process without timeout when using NCCL backend.
+    # os.environ["NCCL_BLOCKING_WAIT"] = "1"
+    # set before init_distributed_mode() to ensure the same job_id shared across all ranks.
+    job_id = now()
+    cfg = Config(parse_args())
+    init_distributed_mode(cfg.run_cfg)
+    setup_seeds(cfg)
+    # set after init_distributed_mode() to only log on master.
+    setup_logger()
+    cfg.pretty_print()
+    task = tasks.setup_task(cfg)
+    datasets = task.build_datasets(cfg)
+    model = task.build_model(cfg)
+    runner = get_runner_class(cfg)(
+        cfg=cfg, job_id=job_id, task=task, model=model, datasets=datasets
+    )
+    runner.train()
+if __name__ == "__main__":
+    main()