Spaces:

flax-community
/

dalle-mini

Running

App Files Files Community

boris commited on Oct 9, 2021

Commit

ff051c9

1 Parent(s): bf3640d

feat: allow latest version only

Browse files

Files changed (1) hide show

dev/inference/wandb-backend.ipynb +154 -358

dev/inference/wandb-backend.ipynb CHANGED Viewed

@@ -32,9 +32,25 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "wandb_runs = ['rjf3rycy']\n",
     "VQGAN_REPO, VQGAN_COMMIT_ID = 'dalle-mini/vqgan_imagenet_f16_16384', None\n",
-    "normalize_text = True"
    ]
   },
   {
@@ -104,18 +120,6 @@
     "    samples = [samples[i:i+batch_size] for i in range(0, len(samples), batch_size)]"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "3ffb1d09-bd1c-4f57-9ae5-3eda6f7d3a08",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# TODO: iterate on runs\n",
-    "wandb_run = wandb_runs[0]\n",
-    "model_pmapped = False"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -123,12 +127,14 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "def get_artifact_versions(run_id):\n",
     "    try:\n",
-    "        versions = api.artifact_versions(type_name='bart_model', name=f'dalle-mini/dalle-mini/model-{run_id}', per_page=10000)\n",
     "    except:\n",
-    "        versions = []\n",
-    "    return versions"
    ]
   },
   {
@@ -139,7 +145,7 @@
    "outputs": [],
    "source": [
     "def get_training_config(run_id):\n",
-    "    training_run = api.run(f'dalle-mini/dalle-mini/{run_id}')\n",
     "    config = training_run.config\n",
     "    return config"
    ]
@@ -155,7 +161,7 @@
     "def get_last_inference_version(run_id):\n",
     "    try:\n",
     "        inference_run = api.run(f'dalle-mini/dalle-mini/inference-{run_id}')\n",
-    "        return inference_run.summary.get('_step', None)\n",
     "    except:\n",
     "        return None"
    ]
@@ -186,68 +192,142 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "bba70f33-af8b-4eb3-9973-7be672301a0b",
    "metadata": {},
    "outputs": [],
    "source": [
-    "def log_run(run_id):\n",
-    "    artifact_versions = get_artifact_versions(run_id)\n",
-    "    last_inference_version = get_last_inference_version(run_id)\n",
-    "    training_config = get_training_config(run_id)\n",
-    "    run = None\n",
-    "    p_generate = None\n",
-    "    model_files = ['config.json', 'flax_model.msgpack', 'merges.txt', 'special_tokens_map.json', 'tokenizer.json', 'tokenizer_config.json', 'vocab.json']\n",
-    "    for artifact in artifact_versions:\n",
-    "        print(f'Processing artifact: {artifact.name}')\n",
-    "        version = int(artifact.version[1:])\n",
-    "        if last_version_inference is None:\n",
-    "            # we should start from v0\n",
-    "            assert version == 0\n",
-    "        elif version <= last_version_inference:\n",
-    "            print(f'v{version} has already been logged (versions logged up to v{last_version_inference}')\n",
-    "        else:\n",
-    "            # check we are logging the correct version\n",
-    "            assert version == last_version_inference + 1\n",
-    "        \n",
-    "        # start/resume corresponding run\n",
-    "        if run is None:\n",
-    "            run = wandb.init(job_type='inference', config=config, id=f'inference-{wandb_run}', resume='allow')\n",
-    "        \n",
-    "        # work in temporary directory\n",
-    "        with tempfile.TemporaryDirectory() as tmp:\n",
-    "            \n",
-    "            # download model files\n",
-    "            artifact = run.use_artifact(artifact)\n",
-    "            for f in model_files:\n",
-    "                artifact.get_path(f).download(tmp)\n",
-    "                \n",
-    "            # load tokenizer and model\n",
-    "            tokenizer = BartTokenizer.from_pretrained(tmp)\n",
-    "            model = CustomFlaxBartForConditionalGeneration.from_pretrained(tmp)\n",
-    "            model_params = replicate(model.params)\n",
-    "            \n",
-    "            # pmap model function needs to happen only once per model config\n",
-    "            if p_generate is None:\n",
-    "                p_generate = pmap_model_function(model)\n",
-    "            \n",
-    "            for batch in tqdm(samples):\n",
-    "                prompts = [x['Caption'] for x in batch]\n",
-    "                processed_prompts = [text_normalizer(x) for x in prompts] if normalize_text else prompts\n",
-    "            \n",
-    "\n",
-    "            \n",
-    "        \n",
-    "        "
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "4d542342-3232-48a5-a0aa-3cb5c157aa8c",
    "metadata": {},
-   "outputs": [],
-   "source": [
-    "log_run(wandb_run)"
    ]
   },
   {
@@ -257,296 +337,12 @@
    "metadata": {},
    "outputs": [],
    "source": [
     "def log_runs(runs):\n",
     "    for run in tqdm(runs):\n",
     "        log_run(run)"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "7a24b903-777b-4e3d-817c-00ed613a7021",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# TODO: loop over samples\n",
-    "batch = samples[0]\n",
-    "prompts = [x['Caption'] for x in batch]\n",
-    "processed_prompts = [text_normalizer(x) for x in prompts] if normalize_text else prompts"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "d77aa785-dc05-4070-aba2-aa007524d20b",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "processed_prompts"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "95db38fb-8948-4814-98ae-c172ca7c6d0a",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "repeated_prompts = processed_prompts * jax.device_count()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "e948ba9e-3700-4e87-926f-580a10f3e5cd",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "tokenized_prompt = tokenizer(repeated_prompts, return_tensors='jax', padding='max_length', truncation=True, max_length=128).data\n",
-    "tokenized_prompt = shard(tokenized_prompt)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "30d96812-fc17-4acf-bb64-5fdb8d0cd313",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "tokenized_prompt['input_ids'].shape"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "92ea034b-2649-4d18-ab6d-877ed04ae5c4",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "images = []\n",
-    "for i in range(num_images // jax.device_count()):\n",
-    "    key, subkey = jax.random.split(key, 2)\n",
-    "    \n",
-    "    encoded_images = p_generate(tokenized_prompt, shard_prng_key(subkey), model_params)\n",
-    "    encoded_images = encoded_images.sequences[..., 1:]\n",
-    "    \n",
-    "    decoded_images = p_decode(encoded_images, vqgan_params)\n",
-    "    decoded_images = decoded_images.clip(0., 1.).reshape((-1, 256, 256, 3))\n",
-    "    \n",
-    "    for img in decoded_images:\n",
-    "        images.append(Image.fromarray(np.asarray(img * 255, dtype=np.uint8)))\n",
-    "    "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "84d52f30-44c9-4a74-9992-fb2578f19b90",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "len(images)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "beb594f9-5b91-47fe-98bd-41e68c6b1d73",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "images[0]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "bb135190-64e5-44af-b416-e688b034da44",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "images[1]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "d78a0d92-72c2-4f82-a6ab-b3f5865dd863",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "clip_inputs = processor(text=prompts, images=images, return_tensors='np', padding='max_length', max_length=77, truncation=True).data"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "89ff78a6-bfa4-44d9-ad66-07a4a68b4352",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# each shard will have one prompt\n",
-    "clip_inputs['input_ids'].shape"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "2cda8984-049c-4c87-96ad-7b0412750656",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# each shard needs to have the images corresponding to a specific prompt\n",
-    "clip_inputs['pixel_values'].shape"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "0a044e8f-be29-404b-b6c7-8f2395c5efc6",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "images_per_prompt_indices = np.asarray(range(0, len(images), batch_size))\n",
-    "images_per_prompt_indices"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "7a6c61b3-12e0-45d8-b39a-830288324d3d",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "7318e67e-4214-46f9-bf60-6d139d4bd00f",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# reorder so each shard will have correct images\n",
-    "clip_inputs['pixel_values'] = jnp.concatenate(list(clip_inputs['pixel_values'][images_per_prompt_indices + i] for i in range(batch_size)))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "90c949a2-8e2a-4905-b6d4-92038f1704b8",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "clip_inputs = shard(clip_inputs)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "58fa836e-5ebb-45e7-af77-ab10646dfbfb",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "logits = p_clip(clip_inputs)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "fd7a3f91-3a1f-4a0a-8b3e-3c926cd367fb",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "logits.shape"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "fa406db7-0a21-4e4b-9890-4c7aece4280c",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "logits = logits.reshape(-1, num_images)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "9c359a8c-2c27-4e68-8775-371857397723",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "logits.shape"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "a56b9f28-dd91-4382-bc47-11e89fda1254",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "logits"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "0bed8167-0a6d-46c1-badf-8bdc20b93c31",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "top_idx = logits.argsort()[:, -top_k:][..., ::-1]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "188c5333-6b8c-4a17-8cc8-15651c77ef99",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "len(images)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "babd22b3-e773-467d-8bbb-f0323f57a44b",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "results = []\n",
-    "columns = ['Caption', 'Theme'] + [f'Image {i+1}' for i in range(top_k)] + [f'Score {i+1}' for i in range(top_k)]\n",
-    "logits = jax.device_get(logits)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "75976c9f-dea5-48e3-8920-55a1bbfd91c2",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "for i, (idx, scores, sample) in enumerate(zip(top_idx, logits, batch)):\n",
-    "    if sample['Caption'] == padding_item: continue\n",
-    "    cur_images = [images[x] for x in images_per_prompt_indices + i]\n",
-    "    top_images = [wandb.Image(cur_images[x]) for x in idx]\n",
-    "    top_scores = [scores[x] for x in idx]\n",
-    "    results.append([sample['Caption'], sample['Theme']] + top_images + top_scores)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "4bf40461-99d3-4d36-b7cc-e0129a3c9053",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "table = wandb.Table(columns=columns, data=results)"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,

    "metadata": {},
    "outputs": [],
    "source": [
+    "run_ids = ['rjf3rycy']\n",
+    "ENTITY, PROJECT = 'dalle-mini', 'dalle-mini'  # used only for training run\n",
     "VQGAN_REPO, VQGAN_COMMIT_ID = 'dalle-mini/vqgan_imagenet_f16_16384', None\n",
+    "normalize_text = True\n",
+    "latest_only = False   # log only latest or all versions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "23e00271-941c-4e1b-b6a9-107a1b77324d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "run_ids = ['4oh3u7ca']\n",
+    "ENTITY, PROJECT = 'wandb', 'hf-flax-dalle-mini'\n",
+    "VQGAN_REPO, VQGAN_COMMIT_ID = 'dalle-mini/vqgan_imagenet_f16_16384', None\n",
+    "normalize_text = False\n",
+    "latest_only = True   # log only latest or all versions"
    ]
   },
   {
     "    samples = [samples[i:i+batch_size] for i in range(0, len(samples), batch_size)]"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
+    "def get_artifact_versions(run_id, latest_only=False):\n",
     "    try:\n",
+    "        if latest_only:\n",
+    "            return [api.artifact(type='bart_model', name=f'{ENTITY}/{PROJECT}/model-{run_id}:latest')]\n",
+    "        else:\n",
+    "            return api.artifact_versions(type_name='bart_model', name=f'{ENTITY}/{PROJECT}/model-{run_id}', per_page=10000)\n",
     "    except:\n",
+    "        return []"
    ]
   },
   {
    "outputs": [],
    "source": [
     "def get_training_config(run_id):\n",
+    "    training_run = api.run(f'{ENTITY}/{PROJECT}/{run_id}')\n",
     "    config = training_run.config\n",
     "    return config"
    ]
     "def get_last_inference_version(run_id):\n",
     "    try:\n",
     "        inference_run = api.run(f'dalle-mini/dalle-mini/inference-{run_id}')\n",
+    "        return inference_run.summary.get('version', None)\n",
     "    except:\n",
     "        return None"
    ]
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "23b2444c-67a9-44d7-abd1-187ed83a9431",
    "metadata": {},
    "outputs": [],
    "source": [
+    "run_id = run_ids[0]\n",
+    "# TODO: turn everything into a class"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "bba70f33-af8b-4eb3-9973-7be672301a0b",
    "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Processing artifact: model-4oh3u7ca:v54\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mborisd13\u001b[0m (use `wandb login --relogin` to force relogin)\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "\n",
+       "                    Syncing run <strong><a href=\"https://wandb.ai/dalle-mini/dalle-mini/runs/inference-4oh3u7ca\" target=\"_blank\">inference-4oh3u7ca</a></strong> to <a href=\"https://wandb.ai/dalle-mini/dalle-mini\" target=\"_blank\">Weights & Biases</a> (<a href=\"https://docs.wandb.com/integrations/jupyter.html\" target=\"_blank\">docs</a>).<br/>\n",
+       "\n",
+       "                "
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "artifact_versions = get_artifact_versions(run_id, latest_only)\n",
+    "last_inference_version = get_last_inference_version(run_id)\n",
+    "training_config = get_training_config(run_id)\n",
+    "run = None\n",
+    "p_generate = None\n",
+    "model_files = ['config.json', 'flax_model.msgpack', 'merges.txt', 'special_tokens_map.json', 'tokenizer.json', 'tokenizer_config.json', 'vocab.json']\n",
+    "for artifact in artifact_versions:\n",
+    "    print(f'Processing artifact: {artifact.name}')\n",
+    "    version = int(artifact.version[1:])\n",
+    "    results = []\n",
+    "    columns = ['Caption', 'Theme'] + [f'Image {i+1}' for i in range(top_k)] + [f'Score {i+1}' for i in range(top_k)]\n",
+    "    \n",
+    "    if latest_only:\n",
+    "        assert last_inference_version is None or version > last_inference_version\n",
+    "    else:\n",
+    "        if last_inference_version is None:\n",
+    "            # we should start from v0\n",
+    "            assert version == 0\n",
+    "        elif version <= last_inference_version:\n",
+    "            print(f'v{version} has already been logged (versions logged up to v{last_inference_version}')\n",
+    "        else:\n",
+    "            # check we are logging the correct version\n",
+    "            assert version == last_inference_version + 1\n",
+    "\n",
+    "    # start/resume corresponding run\n",
+    "    if run is None:\n",
+    "        run = wandb.init(job_type='inference', entity='dalle-mini', project='dalle-mini', config=training_config, id=f'inference-{run_id}', resume='allow')\n",
+    "\n",
+    "    # work in temporary directory\n",
+    "    with tempfile.TemporaryDirectory() as tmp:\n",
+    "\n",
+    "        # download model files\n",
+    "        artifact = run.use_artifact(artifact)\n",
+    "        for f in model_files:\n",
+    "            artifact.get_path(f).download(tmp)\n",
+    "\n",
+    "        # load tokenizer and model\n",
+    "        tokenizer = BartTokenizer.from_pretrained(tmp)\n",
+    "        model = CustomFlaxBartForConditionalGeneration.from_pretrained(tmp)\n",
+    "        model_params = replicate(model.params)\n",
+    "\n",
+    "        # pmap model function needs to happen only once per model config\n",
+    "        if p_generate is None:\n",
+    "            p_generate = pmap_model_function(model)\n",
+    "\n",
+    "        # process one batch of captions\n",
+    "        for batch in tqdm(samples):\n",
+    "            prompts = [x['Caption'] for x in batch]\n",
+    "            processed_prompts = [text_normalizer(x) for x in prompts] if normalize_text else prompts\n",
+    "\n",
+    "            # repeat the prompts to distribute over each device and tokenize\n",
+    "            processed_prompts = processed_prompts * jax.device_count()\n",
+    "            tokenized_prompt = tokenizer(processed_prompts, return_tensors='jax', padding='max_length', truncation=True, max_length=128).data\n",
+    "            tokenized_prompt = shard(tokenized_prompt)\n",
+    "\n",
+    "            # generate images\n",
+    "            print('Generating images')\n",
+    "            images = []\n",
+    "            for i in tqdm(range(num_images // jax.device_count())):\n",
+    "                key, subkey = jax.random.split(key)\n",
+    "                encoded_images = p_generate(tokenized_prompt, shard_prng_key(subkey), model_params)\n",
+    "                encoded_images = encoded_images.sequences[..., 1:]\n",
+    "                decoded_images = p_decode(encoded_images, vqgan_params)\n",
+    "                decoded_images = decoded_images.clip(0., 1.).reshape((-1, 256, 256, 3))\n",
+    "                for img in decoded_images:\n",
+    "                    images.append(Image.fromarray(np.asarray(img * 255, dtype=np.uint8)))\n",
+    "\n",
+    "            # get clip scores\n",
+    "            print('Calculating CLIP scores')\n",
+    "            clip_inputs = processor(text=prompts, images=images, return_tensors='np', padding='max_length', max_length=77, truncation=True).data\n",
+    "            # each shard will have one prompt, images need to be reorganized to be associated to the correct shard\n",
+    "            images_per_prompt_indices = np.asarray(range(0, len(images), batch_size))\n",
+    "            clip_inputs['pixel_values'] = jnp.concatenate(list(clip_inputs['pixel_values'][images_per_prompt_indices + i] for i in range(batch_size)))\n",
+    "            clip_inputs = shard(clip_inputs)\n",
+    "            logits = p_clip(clip_inputs)\n",
+    "            logits = logits.reshape(-1, num_images)\n",
+    "            top_scores = logits.argsort()[:, -top_k:][..., ::-1]\n",
+    "            logits = jax.device_get(logits)\n",
+    "\n",
+    "            # add to results table\n",
+    "            for i, (idx, scores, sample) in enumerate(zip(top_scores, logits, batch)):\n",
+    "                if sample['Caption'] == padding_item: continue\n",
+    "                cur_images = [images[x] for x in images_per_prompt_indices + i]\n",
+    "                top_images = [wandb.Image(cur_images[x]) for x in idx]\n",
+    "                top_scores = [scores[x] for x in idx]\n",
+    "                results.append([sample['Caption'], sample['Theme']] + top_images + top_scores)\n",
+    "\n",
+    "    # log results\n",
+    "    table = wandb.Table(columns=columns, data=results)\n",
+    "    run.log({'Samples': table, 'version': version})\n",
+    "    wandb.finish()"
    ]
   },
   {
    "metadata": {},
    "outputs": [],
    "source": [
+    "# TODO: not implemented\n",
     "def log_runs(runs):\n",
     "    for run in tqdm(runs):\n",
     "        log_run(run)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,