Ejafa
/

chameleon-7b

Model card Files Files and versions Community

Ejafa commited on 17 days ago

Commit

9c0c4b2

•

1 Parent(s): db8be2a

Upload folder using huggingface_hub

Browse files

Files changed (10) hide show

README.md +20 -0
models/7b/checklist.chk +3 -0
models/7b/consolidate_params.json +8 -0
models/7b/consolidated.pth +3 -0
models/7b/params.json +169 -0
prompts_for_human_evaluations.jsonl +0 -0
tokenizer/checklist.chk +3 -0
tokenizer/text_tokenizer.json +0 -0
tokenizer/vqgan.ckpt +3 -0
tokenizer/vqgan.yaml +57 -0

README.md ADDED Viewed

	@@ -0,0 +1,20 @@

+# Mixed-modal and Text-only Prompts for Human Evaluation
+This file ```prompts_for_human_evaluations.jsonl``` contains the 1,048 prompts used for evaluating Chameleon's output: 441 (42.1%) are mixed-modal (i.e., containing both text and images), and the remaining 607 (57.9%) are text-only. The expected responses are mixed-modal, containing both text and images.
+## Background
+We work with a third-party crowdsourcing vendor to collect a set of diverse and natural prompts from human annotators. Specifically, we ask annotators to creatively think about what they want a multi-modal model to generate for different real-life scenarios. For example, for the scenario of “imagine you are in a kitchen”, annotators may come up with prompts like “How to cook pasta?” or “How should I design the layout of my island? Show me some examples.” The prompts can be text-only or text with some images, and the expected responses should be mixed-modal, containing both text and images.
+After collecting an initial set of prompts, we ask three random annotators to evaluate whether the prompts are clear and whether they expect the responses to contain images. We use a majority vote to filter unclear prompts and prompts that don’t expect mixed-modal responses. In the end, our final evaluation set contains
+1,048 prompts: 441 (42.1%) are mixed-modal (i.e., containing both text and images), and the remaining 607 (57.9%) are text-only.
+More details on how these prompts are collected and some statistics can be found in the [paper](https://arxiv.org/pdf/2405.09818).
+## File format
+Each line of the file ```prompts_for_human_evaluations.jsonl``` defines a prompt, with the following fields:
+- ```id```: The GUID of this prompt.
+- ```prompt```: The prompt content. If the prompt contains images, then their position is given by the special ```<img>``` token.
+- ```task_type```: The task category of this prompt.
+- ```image_urls```: A list of the URLs of images used in the prompts. Each image maps to a special ```<img>``` token in the prompt by order.

models/7b/checklist.chk ADDED Viewed

	@@ -0,0 +1,3 @@

+60333d9acd866e4b5e6690ecabee3b65  consolidated.pth
+b2cbf6940c157b6e969f6263388efcc3  consolidate_params.json
+8bc0e859d4afa00f5f40a9de296eef8e  params.json

models/7b/consolidate_params.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "dtype": "bf16",
+    "model_parallel_size": 1,
+    "on_gpu": true,
+    "src": "/fsx-onellm/rpasunuru/SFT/v2.1_textpp_7b_1366k_sftv1.4_exp1/v2.1_textpp_7b_1366k_sftv1.4_exp1_run000/checkpoints/checkpoint_0001200_noimggen/",
+    "tgt": "/fsx-onellm/rpasunuru/SFT/v2.1_textpp_7b_1366k_sftv1.4_exp1/v2.1_textpp_7b_1366k_sftv1.4_exp1_run000/checkpoints/checkpoint_0001200_noimggen_consolidated/",
+    "tokenizer_path": null
+}

models/7b/consolidated.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:284172dd1aa7d6520277e6565080748031a730a3dc557a07ae6603ea60335db2
+size 14026453679

models/7b/params.json ADDED Viewed

	@@ -0,0 +1,169 @@

+{
+    "async_checkpointing": false,
+    "async_eval_ngpus": -1,
+    "batch_size": 2,
+    "data": "",
+    "disable_logging": false,
+    "disable_workers_print": false,
+    "dtype": "bf16",
+    "dump_after_steps": 0,
+    "dump_dir": "/fsx-onellm/rpasunuru/SFT/v2.1_textpp_7b_1366k_sftv1.4_exp1/v2.1_textpp_7b_1366k_sftv1.4_exp1_run000",
+    "dump_freq": 400,
+    "dump_profile_traces": false,
+    "enable_loss_tracker": false,
+    "epochs": -1,
+    "eval_freq": 400,
+    "exp_id": "",
+    "exp_name": "",
+    "finetuning_dir": "/fsx-onellm/shared/from_rsc/v2.1_7b_dr_qk_zloss_linear_zero3_sft_optiml_textpp_run000_checkpoint_1366000",
+    "fp32_reduce_scatter": "all",
+    "gpu_check_level": 3,
+    "image_loss_weight": 1.0,
+    "image_text_rotation_prob": 0.0,
+    "instruct": {
+        "no_loss_prompt": true,
+        "no_loss_truncated": false,
+        "use_eot": true
+    },
+    "instruct_data": "/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/long_caption:2.92,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/vqa:4.59,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/text2image:10.44,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/llama2_rjv6_helpful:43.27,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/code_llama:0.51,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/interleaved_batch1-17:27.45,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/image_dialogue:7.46,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/llama2_rjv6_harmless:0.97,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/cybersec_safety:0.33,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/onellm_multimodal_safety:0.86,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/autosafety:0.51,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/rainbow_safety:0.10,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/genai_safety:0.58",
+    "iter_gopher": {
+        "buffer_size": 16,
+        "max_precompute": 10,
+        "n_chars_by_tok": 15,
+        "n_seqs_to_concat": 10,
+        "num_processes": 1
+    },
+    "iter_jsonl": {
+        "buffer_size": 64,
+        "same_data": false
+    },
+    "iter_multi": {
+        "buffer_size": 512,
+        "ignore_extra_chunks": true,
+        "max_precompute": 20,
+        "multiprocess": true
+    },
+    "iter_type": "multi",
+    "keep_checkpoints_every_steps": 400,
+    "keep_eval_checkpoints": true,
+    "keep_n_last_checkpoints": 2,
+    "log_all_steps": false,
+    "log_freq": 10,
+    "log_updates": true,
+    "log_wandb": false,
+    "loss_rescaling": false,
+    "model": {
+        "add_extra_toks": "0",
+        "alpha_depth": "disabled",
+        "attn_dropout": 0,
+        "attn_to_keep": "all",
+        "custom_bwd": false,
+        "dim": 4096,
+        "dropout": 0.05,
+        "efficient_attn": "flash",
+        "emb_dropout": 0,
+        "ffn_dim_multiplier": 1.0,
+        "ffn_dropout": 0,
+        "full_logging_n_layers": 4,
+        "fuse_sequence_parallel": false,
+        "init": {
+            "coeff_std": null,
+            "depth_last": false,
+            "fixed_std": null,
+            "no_init": false,
+            "pos_init_scalar": null,
+            "use_depth": "current",
+            "use_gaussian": true
+        },
+        "layer_ckpt": "none",
+        "linear_residual_dropout": false,
+        "loss_parallel": false,
+        "max_length": 2048,
+        "multiple_of": 256,
+        "n_heads": 32,
+        "n_kv_heads": null,
+        "n_layers": 32,
+        "non_linearity": "swiglu",
+        "norm_affine": true,
+        "norm_eps": 1e-05,
+        "norm_type": "rmsnorm",
+        "output_dropout": 0,
+        "output_size": -1,
+        "pre_norm": true,
+        "qk_normalization": true,
+        "recompute_attn": true,
+        "recompute_fc1_out": true,
+        "recompute_fc3_out": true,
+        "residual_dropout": 0.0,
+        "rope_theta": 10000.0,
+        "sequence_parallel": false,
+        "swin_norm": false,
+        "turn_eos_token": "<eos>",
+        "use_rope": true,
+        "vocab_size": 65536
+    },
+    "model_parallel_size": 1,
+    "no_final_ckpt": false,
+    "num_retrieved_docs": 0,
+    "old_mp": -1,
+    "old_world_size": -1,
+    "optim": {
+        "beta1": 0.9,
+        "beta2": 0.95,
+        "clip": 1.0,
+        "cosine_theta": 1.0,
+        "cycle_length": 1.0,
+        "epsilon": 1e-08,
+        "exp_factor": 0.5,
+        "lr": 1e-05,
+        "lr_min_ratio": 0.1,
+        "scheduler": "cosine",
+        "use_deprecated_optim": false,
+        "warmup": 100,
+        "weight_decay": 0.1
+    },
+    "periodic_gpu_check": true,
+    "profile_freq": -1,
+    "reshard_after_forward": false,
+    "restore_dataloader_position": false,
+    "retrieval_prob": 0.0,
+    "rlhf": null,
+    "root_dump_dir": "",
+    "save_optimizer_states": true,
+    "seq_len": 4096,
+    "slurm": {
+        "global_rank": 0,
+        "is_slurm_job": true,
+        "world_size": 64
+    },
+    "steps": 1200,
+    "tokenizer": "/fsx-onellm/rpasunuru/models/cm3z/cm3v2_7b_placeholder/gpt2-unified-image-sentinel.json",
+    "tokenizer_dir": "/fsx/guismay/data/large_experiments/fair_llm/datasets/tokenizers",
+    "torch_seed": -1,
+    "unlimited_steps": false,
+    "use_hf_tokenizer": true,
+    "valid": {
+        "batch_size": 32,
+        "debug": false,
+        "majority_voting": 0,
+        "n_batches": 100,
+        "onellm_eval": false,
+        "onellm_eval_media_storage": "",
+        "ppl_files_str": "",
+        "prompt_path": "",
+        "prompt_templates": "{}",
+        "random_fewshots": false,
+        "seq_len": 2048,
+        "tasks_root_dir": "",
+        "tasks_str": "",
+        "temperature": 1.0,
+        "top_k": 0,
+        "top_p": 0.0,
+        "use_sampling": false,
+        "write_eval": false
+    },
+    "wandb_entity": "violet-zct",
+    "wandb_project": "instruct_sft",
+    "water_marking_codes_str": null,
+    "z_loss_weight": 0.0001
+}

prompts_for_human_evaluations.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer/checklist.chk ADDED Viewed

	@@ -0,0 +1,3 @@

+170a932b687671a4e676f3bf69147295  text_tokenizer.json
+1a559fb5dab4d351d19496ae89da1db1  vqgan.ckpt
+25724c8110d6adabc9130a123b4b922e  vqgan.yaml

tokenizer/text_tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer/vqgan.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4ede986bf6b171db3081ce171ad88e4ac970793cea14c180b3e5ac5105f4cb43
+size 281270377

tokenizer/vqgan.yaml ADDED Viewed

	@@ -0,0 +1,57 @@

+model:
+  base_learning_rate: 4.5e-06
+  target: taming.models.vqgan.VQModel
+  params:
+    embed_dim: 256
+    n_embed: 8192
+    ddconfig:
+      double_z: false
+      z_channels: 256
+      resolution: 512
+      in_channels: 3
+      out_ch: 3
+      ch: 128
+      ch_mult:
+      - 1
+      - 1
+      - 2
+      - 2
+      - 4
+      num_res_blocks: 2
+      attn_resolutions: []
+      dropout: 0.0
+    lossconfig:
+      target: taming.modules.losses.vqperceptual_vit_vqgan.VQLPIPSWithDiscriminator
+      params:
+        disc_start: 100001
+        perceptual_weight: 1.0
+        adversarial_weight: 0.5
+        disc_params:
+          size: 512
+    ckpt_path: manifold://fair_onellm_checkpoints/tree/v2/tokenizer/vqgan_wm_0209.ckpt
+data:
+  target: main.DataModuleFromConfig
+  params:
+    batch_size: 4
+    num_workers: 10
+    image_size: 512
+    filter_image_size: 512
+    dataset: coco
+    aesthetics_th: 0
+    clipsim_th: 0
+--distributed-world-size: null
+'32': null
+--distributed-port: null
+'17338': null
+--save-dir: null
+/checkpoint/shellysheynin/shutterstock/512x512_1024tokens_4node_shutterstock_laion_no_attn_styleGAN:
+  log_every-500:
+    ngpu32: null
+--tensorboard-logdir: null
+/checkpoint/shellysheynin/tensorboard_logs/2023-03-30/512x512_1024tokens_4node_shutterstock_laion_no_attn_styleGAN:
+  log_every-500:
+    ngpu32: null
+'14561': null
+/checkpoint/shellysheynin/tensorboard_logs/2023-04-02/512x512_1024tokens_4node_shutterstock_laion_no_attn_styleGAN:
+  log_every-500:
+    ngpu32: null