Spaces:

flax-community
/

DietNerf-Demo

Build error

App Files Files Community

alexlau commited on Jul 18, 2021

Commit

19677a1

1 Parent(s): faeefa7

first deploy demo

Browse files

Files changed (37) hide show

.gitignore +3 -0
LICENSE +201 -0
app.py +40 -0
demo/__init__.py +0 -0
demo/src/__init__.py +0 -0
demo/src/config.py +44 -0
demo/src/models.py +38 -0
demo/src/utils.py +62 -0
jaxnerf/README.md +205 -0
jaxnerf/__init__.py +15 -0
jaxnerf/configs/blender.yaml +9 -0
jaxnerf/configs/demo.yaml +10 -0
jaxnerf/configs/diet_nerf_tpu_vm_few_shot.yaml +20 -0
jaxnerf/configs/diet_nerf_tpu_vm_test.yaml +20 -0
jaxnerf/configs/eval_diet_nerf_tpu_vm_few_shot.yaml +22 -0
jaxnerf/configs/llff.yaml +13 -0
jaxnerf/configs/llff_360.yaml +15 -0
jaxnerf/configs/nerf_tpu_vm_few_shot.yaml +20 -0
jaxnerf/configs/orig_nerf_tpu_vm_full.yaml +13 -0
jaxnerf/configs/orig_nerf_tpu_vm_test.yaml +13 -0
jaxnerf/eval.py +192 -0
jaxnerf/eval.sh +44 -0
jaxnerf/example_data/imgs/r_0.png +0 -0
jaxnerf/example_data/transforms_test.json +1 -0
jaxnerf/example_data/transforms_train.json +1 -0
jaxnerf/nerf/__init__.py +15 -0
jaxnerf/nerf/clip_utils.py +134 -0
jaxnerf/nerf/datasets.py +565 -0
jaxnerf/nerf/model_utils.py +321 -0
jaxnerf/nerf/models.py +256 -0
jaxnerf/nerf/precompute.py +59 -0
jaxnerf/nerf/utils.py +457 -0
jaxnerf/requirements.txt +14 -0
jaxnerf/run.sh +33 -0
jaxnerf/train.py +326 -0
jaxnerf/train.sh +34 -0
requirements.txt +8 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+.idea
+__pycache__
+models

LICENSE ADDED Viewed

	@@ -0,0 +1,201 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [yyyy] [name of copyright owner]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

app.py ADDED Viewed

	@@ -0,0 +1,40 @@

+import os
+import math
+import streamlit as st
+from google_drive_downloader import GoogleDriveDownloader as gdd
+from demo.src.models import load_trained_model
+from demo.src.utils import render_predict_from_pose, predict_to_image
+from demo.src.config import MODEL_DIR, MODEL_NAME, FILE_ID
+if not os.path.isfile('models'):
+    model_path = os.path.join(MODEL_DIR, MODEL_NAME)
+    gdd.download_file_from_google_drive(file_id=FILE_ID,
+                                        dest_path=model_path,
+                                        unzip=False)
+    print(f'model downloaded from google drive: {model_path}')
+@st.cache(show_spinner=False, allow_output_mutation=True)
+def fetch_model():
+    model, state = load_trained_model(MODEL_DIR, MODEL_NAME)
+    return model, state
+model, state = fetch_model()
+pi = math.pi
+st.set_page_config(page_title="DietNeRF Demo")
+st.sidebar.header('SELECT YOUR VIEW DIRECTION')
+theta = st.sidebar.slider("Theta", min_value=0., max_value=2.*pi,
+                          step=0.5, value=0.)
+phi = st.sidebar.slider("Phi", min_value=0., max_value=0.5*pi,
+                        step=0.1, value=1.)
+radius = st.sidebar.slider("Radius", min_value=2., max_value=6.,
+                           step=1., value=3.)
+pred_color, _ = render_predict_from_pose(state, theta, phi, radius)
+im = predict_to_image(pred_color)
+st.image(im, use_column_width=False)

demo/__init__.py ADDED Viewed

File without changes

demo/src/__init__.py ADDED Viewed

File without changes

demo/src/config.py ADDED Viewed

	@@ -0,0 +1,44 @@

+# for downloading model from google drive
+FILE_ID = "1iytA1n2z4go3uVCwE__vIKouTKyIDjEq"
+MODEL_DIR = './models'
+MODEL_NAME = 'trained_model'
+class NerfConfig:
+    # MODEL CONFIG
+    model = "nerf"
+    net_activation = "relu"
+    rgb_activation = "sigmoid"
+    sigma_activation = "relu"
+    min_deg_point = 0
+    max_deg_point = 10
+    deg_view = 4
+    # reduce num_coarse_samples, num_fine_samples for speedup
+    num_coarse_samples = 32
+    num_fine_samples = 64
+    use_viewdirs = True
+    near = 2
+    far = 6
+    noise_std = None
+    # TODO @Alex: set white_bkgd as flag if we add LLFF dataset
+    white_bkgd = True
+    net_depth = 8
+    net_width = 256
+    net_depth_condition = 1
+    net_width_condition = 128
+    skip_layer = 4
+    num_rgb_channels = 3
+    num_sigma_channels = 1
+    lindisp = True
+    legacy_posenc_order = False
+    randomized = True
+    # DATA CONFIG
+    W = 800
+    H = 800
+    IMAGE_SHAPE = (W, H, 3)
+    # TODO @Alex: flexible focal if we add LLFF dataset
+    FOCAL = 555.5555155968841
+    # reduce CHUNK if OOM
+    CHUNK = 4096
+    DOWNSAMPLE = 2

demo/src/models.py ADDED Viewed

	@@ -0,0 +1,38 @@

+import os
+import flax
+from jax import random
+from flax.training import checkpoints
+from jaxnerf.nerf import models
+from jaxnerf.nerf import utils
+from demo.src.config import NerfConfig
+rng = random.PRNGKey(0)
+# TODO @Alex: make image size flexible if needed
+dummy_rays = random.uniform(rng, shape=NerfConfig.IMAGE_SHAPE)
+dummy_batch = {"rays": utils.Rays(dummy_rays, dummy_rays, dummy_rays)}
+dummy_lr = 1e-2
+def load_trained_model(model_dir, model_fn):
+    model, init_variables = init_model()
+    optimizer = flax.optim.Adam(dummy_lr).create(init_variables)
+    state = utils.TrainState(optimizer=optimizer)
+    del optimizer, init_variables
+    assert os.path.isfile(os.path.join(model_dir, model_fn))
+    state = checkpoints.restore_checkpoint(model_dir, state,
+                                           prefix=model_fn)
+    return model, state
+def init_model():
+    _, key = random.split(rng)
+    model, init_variables = models.get_model(key, dummy_batch,
+                                             NerfConfig)
+    return model, init_variables
+if __name__ == '__main__':
+    _model_dir = '../ship_fewshot_wsc'
+    _model_fn = 'checkpoint_345000'
+    _model, _state = load_trained_model(_model_dir, _model_fn)

demo/src/utils.py ADDED Viewed

	@@ -0,0 +1,62 @@

+from functools import partial
+import jax
+from jax import random
+import numpy as np
+from PIL import Image
+from jaxnerf.nerf import clip_utils
+from jaxnerf.nerf import utils
+from demo.src.config import NerfConfig
+from demo.src.models import init_model
+model, _ = init_model()
+def render_predict_from_pose(state, theta, phi, radius):
+    rng = random.PRNGKey(0)
+    partial_render_fn = partial(render_pfn, state.optimizer.target)
+    rays = _render_rays_from_pose(theta, phi, radius)
+    pred_color, pred_disp, _ = utils.render_image(
+        partial_render_fn, rays,
+        rng, False, chunk=NerfConfig.CHUNK)
+    return pred_color, pred_disp
+def predict_to_image(pred_out):
+    image_arr = np.array(np.clip(pred_out, 0., 1.) * 255.).astype(np.uint8)
+    return Image.fromarray(image_arr)
+def _render_rays_from_pose(theta, phi, radius):
+    camtoworld = np.array(clip_utils.pose_spherical(theta, phi, radius))
+    rays = _camtoworld_matrix_to_rays(camtoworld)
+    return rays
+def _camtoworld_matrix_to_rays(camtoworld):
+    """ render one instance of rays given a camera to world matrix (4, 4) """
+    pixel_center = 0.
+    w, h = NerfConfig.W, NerfConfig.H
+    focal, downsample = NerfConfig.FOCAL, NerfConfig.DOWNSAMPLE
+    x, y = np.meshgrid(  # pylint: disable=unbalanced-tuple-unpacking
+        np.arange(0, w, downsample, dtype=np.float32) + pixel_center,  # X-Axis (columns)
+        np.arange(0, h, downsample, dtype=np.float32) + pixel_center,  # Y-Axis (rows)
+        indexing="xy")
+    camera_dirs = np.stack([(x - w * 0.5) / focal,
+                            -(y - h * 0.5) / focal,
+                            -np.ones_like(x)],
+                           axis=-1)
+    directions = (camera_dirs[..., None, :] * camtoworld[None, None, :3, :3]).sum(axis=-1)
+    origins = np.broadcast_to(camtoworld[None, None, :3, -1], directions.shape)
+    viewdirs = directions / np.linalg.norm(directions, axis=-1, keepdims=True)
+    return utils.Rays(origins=origins, directions=directions, viewdirs=viewdirs)
+def _render_fn(variables, key_0, key_1, rays):
+    return jax.lax.all_gather(model.apply(
+        variables, key_0, key_1, rays, False),
+        axis_name="batch")
+render_pfn = jax.pmap(_render_fn, in_axes=(None, None, None, 0),
+                      donate_argnums=3, axis_name="batch")

jaxnerf/README.md ADDED Viewed

	@@ -0,0 +1,205 @@

+# JaxNeRF
+This is a [JAX](https://github.com/google/jax) implementation of
+[NeRF: Representing Scenes as Neural Radiance Fields for View Synthesis](http://www.matthewtancik.com/nerf).
+This code is created and maintained by
+[Boyang Deng](https://boyangdeng.com/),
+[Jon Barron](https://jonbarron.info/),
+and [Pratul Srinivasan](https://people.eecs.berkeley.edu/~pratul/).
+<div align="center">
+  <img width="95%" alt="NeRF Teaser" src="https://raw.githubusercontent.com/bmild/nerf/master/imgs/pipeline.jpg">
+</div>
+Our JAX implementation currently supports:
+<table class="tg">
+<thead>
+  <tr>
+    <th class="tg-0lax"><span style="font-weight:bold">Platform</span></th>
+    <th class="tg-0lax" colspan="2"><span style="font-weight:bold">Single-Host GPU</span></th>
+    <th class="tg-0lax" colspan="2"><span style="font-weight:bold">Multi-Device TPU</span></th>
+  </tr>
+</thead>
+<tbody>
+  <tr>
+    <td class="tg-0lax"><span style="font-weight:bold">Type</span></td>
+    <td class="tg-0lax">Single-Device</td>
+    <td class="tg-0lax">Multi-Device</td>
+    <td class="tg-0lax">Single-Host</td>
+    <td class="tg-0lax">Multi-Host</td>
+  </tr>
+  <tr>
+    <td class="tg-0lax"><span style="font-weight:bold">Training</span></td>
+    <td class="tg-0lax"><img src="http://storage.googleapis.com/gresearch/jaxnerf/check.png" alt="Supported" width=18px height=18px></td>
+    <td class="tg-0lax"><img src="http://storage.googleapis.com/gresearch/jaxnerf/check.png" alt="Supported" width=18px height=18px></td>
+    <td class="tg-0lax"><img src="http://storage.googleapis.com/gresearch/jaxnerf/check.png" alt="Supported" width=18px height=18px></td>
+    <td class="tg-0lax"><img src="http://storage.googleapis.com/gresearch/jaxnerf/check.png" alt="Supported" width=18px height=18px></td>
+  </tr>
+  <tr>
+    <td class="tg-0lax"><span style="font-weight:bold">Evaluation</span></td>
+    <td class="tg-0lax"><img src="http://storage.googleapis.com/gresearch/jaxnerf/check.png" alt="Supported" width=18px height=18px></td>
+    <td class="tg-0lax"><img src="http://storage.googleapis.com/gresearch/jaxnerf/check.png" alt="Supported" width=18px height=18px></td>
+    <td class="tg-0lax"><img src="http://storage.googleapis.com/gresearch/jaxnerf/check.png" alt="Supported" width=18px height=18px></td>
+    <td class="tg-0lax"><img src="http://storage.googleapis.com/gresearch/jaxnerf/check.png" alt="Supported" width=18px height=18px></td>
+  </tr>
+</tbody>
+</table>
+The training job on 128 TPUv2 cores can be done in **2.5 hours (v.s 3 days for TF
+NeRF)** for 1 million optimization steps. In other words, JaxNeRF trains to the best while trains very fast.
+As for inference speed, here are the statistics of rendering an image with
+800x800 resolution (numbers are averaged over 50 rendering passes):
+| Platform | 1 x NVIDIA V100 |                                                  8 x NVIDIA V100                                                  |                                                    128 x TPUv2                                                    |
+|----------|:---------------:|:-----------------------------------------------------------------------------------------------------------------:|:-----------------------------------------------------------------------------------------------------------------:|
+| TF NeRF  |    27.74 secs   | <img src="http://storage.googleapis.com/gresearch/jaxnerf/cross.png"  alt="Not Supported" width=18px height=18px> | <img src="http://storage.googleapis.com/gresearch/jaxnerf/cross.png"  alt="Not Supported" width=18px height=18px> |
+| JaxNeRF  |    20.77 secs   |                                                     2.65 secs                                                     |                                                     0.35 secs                                                     |
+The code is tested and reviewed carefully to match the
+[original TF NeRF implementation](https://github.com/bmild/nerf).
+If you have any issues using this code, please do not open an issue as the repo
+is shared by all projects under Google Research. Instead, just email
+jaxnerf@google.com.
+## Installation
+We recommend using [Anaconda](https://www.anaconda.com/products/individual) to set
+up the environment. Run the following commands:
+```
+# Clone the repo
+svn export https://github.com/google-research/google-research/trunk/jaxnerf
+# Create a conda environment, note you can use python 3.6-3.8 as
+# one of the dependencies (TensorFlow) hasn't supported python 3.9 yet.
+conda create --name jaxnerf python=3.6.12; conda activate jaxnerf
+# Prepare pip
+conda install pip; pip install --upgrade pip
+# Install requirements
+pip install -r jaxnerf/requirements.txt
+# [Optional] Install GPU and TPU support for Jax
+# Remember to change cuda101 to your CUDA version, e.g. cuda110 for CUDA 11.0.
+pip install --upgrade jax jaxlib==0.1.57+cuda101 -f https://storage.googleapis.com/jax-releases/jax_releases.html
+```
+Then, you'll need to download the datasets
+from the [NeRF official Google Drive](https://drive.google.com/drive/folders/128yBriW1IG_3NJ5Rp7APSTZsJqdJdfc1).
+Please download the `nerf_synthetic.zip` and `nerf_llff_data.zip` and unzip them
+in the place you like. Let's assume they are placed under `/tmp/jaxnerf/data/`.
+That's it for installation. You're good to go. **Notice:** For the following instructions, you don't need to enter the jaxnerf folder. Just stay in the parent folder.
+## Two Commands for Everything
+```
+bash jaxnerf/train.sh demo /tmp/jaxnerf/data
+bash jaxnerf/eval.sh demo /tmp/jaxnerf/data
+```
+Once both jobs are done running (which may take a while if you only have 1 GPU
+or CPU), you'll have a folder, `/tmp/jaxnerf/data/demo`, with:
+  * Trained NeRF models for all scenes in the blender dataset.
+  * Rendered images and depth maps for all test views.
+  * The collected PSNRs of all scenes in a TXT file.
+Note that we used the `demo` config here which is basically the `blender` config
+in the paper except smaller batch size and much less train steps. Of course, you
+can use other configs to replace `demo` and other data locations to replace
+`/tmp/jaxnerf/data`.
+We provide 2 configurations in the folder `configs` which match the original
+configurations used in the paper for the blender dataset and the LLFF dataset.
+Be careful when you use them. Their batch sizes are large so you may get OOM error if you have limited resources, for example, 1 GPU with small memory. Also, they have many many train steps so you may need days to finish training all scenes.
+## Play with One Scene
+You can also train NeRF on only one scene. The easiest way is to use given configs:
+```
+python -m jaxnerf.train \
+  --data_dir=/PATH/TO/YOUR/SCENE/DATA \
+  --train_dir=/PATH/TO/THE/PLACE/YOU/WANT/TO/SAVE/CHECKPOINTS \
+  --config=configs/CONFIG_YOU_LIKE
+```
+Evaluating NeRF on one scene is similar:
+```
+python -m jaxnerf.eval \
+  --data_dir=/PATH/TO/YOUR/SCENE/DATA \
+  --train_dir=/PATH/TO/THE/PLACE/YOU/SAVED/CHECKPOINTS \
+  --config=configs/CONFIG_YOU_LIKE \
+  --chunk=4096
+```
+The `chunk` parameter defines how many rays are feed to the model in one go.
+We recommend you to use the largest value that fits to your device's memory but
+small values are fine, only a bit slow.
+You can also define your own configurations by passing command line flags. Please refer to the `define_flags` function in `nerf/utils.py` for all the flags and their meanings.
+**Note**: For the ficus scene in the blender dataset, we noticed that it's sensible to different initializations,
+e.g. using different random seeds, if using the original learning rate schedule in the paper.
+Therefore, we provide a simple tweak (turned off by default) for more stable trainings: using `lr_delay_steps` and `lr_delay_mult`.
+This allows the training to start from a smaller learning rate (`lr_init` * `lr_delay_mult`) in the first `lr_delay_steps`.
+We didn't use them for our pretrained models
+but we tested `lr_delay_steps=5000` with `lr_delay_mult=0.2` and it works quite smoothly.
+## Pretrained Models
+We provide a collection of pretrained NeRF models that match the numbers
+reported in the [paper](https://arxiv.org/abs/2003.08934). Actually, ours are
+slightly better overall because we trained for more iterations (while still
+being much faster!). You can find our pretrained models
+[here](http://storage.googleapis.com/gresearch/jaxnerf/jaxnerf_pretrained_models.zip).
+The performances (in PSNR) of our pretrained NeRF models are listed below:
+### Blender
+| Scene   |   Chair   |   Drums   |   Ficus   |   Hotdog  |    Lego   | Materials |    Mic    |    Ship   |    Mean   |
+|---------|:---------:|:---------:|:---------:|:---------:|:---------:|:---------:|:---------:|:---------:|:---------:|
+| TF NeRF |   33.00   |   25.01   |   30.13   |   36.18   |   32.54   |   29.62   |   32.91   |   28.65   |   31.01   |
+| JaxNeRF | **34.08** | **25.03** | **30.43** | **36.92** | **33.28** | **29.91** | **34.53** | **29.36** | **31.69** |
+### LLFF
+| Scene   |    Room   |    Fern   |   Leaves  |  Fortress |  Orchids  |   Flower  |   T-Rex   |   Horns   |    Mean   |
+|---------|:---------:|:---------:|:---------:|:---------:|:---------:|:---------:|:---------:|:---------:|:---------:|
+| TF NeRF |   32.70   | **25.17** |   20.92   |   31.16   | **20.36** |   27.40   |   26.80   |   27.45   |   26.50   |
+| JaxNeRF | **33.04** |   24.83   | **21.23** | **31.76** |   20.27   | **28.07** | **27.42** | **28.10** | **26.84** |
+## Citation
+If you use this software package, please cite it as:
+```
+@software{jaxnerf2020github,
+  author = {Boyang Deng and Jonathan T. Barron and Pratul P. Srinivasan},
+  title = {{JaxNeRF}: an efficient {JAX} implementation of {NeRF}},
+  url = {https://github.com/google-research/google-research/tree/master/jaxnerf},
+  version = {0.0},
+  year = {2020},
+}
+```
+and also cite the original NeRF paper:
+```
+@inproceedings{mildenhall2020nerf,
+  title={NeRF: Representing Scenes as Neural Radiance Fields for View Synthesis},
+  author={Ben Mildenhall and Pratul P. Srinivasan and Matthew Tancik and Jonathan T. Barron and Ravi Ramamoorthi and Ren Ng},
+  year={2020},
+  booktitle={ECCV},
+}
+```
+## Acknowledgement
+We'd like to thank
+[Daniel Duckworth](http://www.stronglyconvex.com/),
+[Dan Gnanapragasam](https://research.google/people/DanGnanapragasam/),
+and [James Bradbury](https://twitter.com/jekbradbury)
+for their help on reviewing and optimizing this code.
+We'd like to also thank the amazing [JAX](https://github.com/google/jax) team for
+very insightful and helpful discussions on how to use JAX for NeRF.

jaxnerf/__init__.py ADDED Viewed

	@@ -0,0 +1,15 @@

+# coding=utf-8
+# Copyright 2021 The Google Research Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

jaxnerf/configs/blender.yaml ADDED Viewed

	@@ -0,0 +1,9 @@

+dataset: blender
+batching: single_image
+factor: 0
+num_coarse_samples: 64
+num_fine_samples: 128
+use_viewdirs: true
+white_bkgd: true
+batch_size: 4096
+randomized: true

jaxnerf/configs/demo.yaml ADDED Viewed

	@@ -0,0 +1,10 @@

+dataset: blender
+batching: single_image
+factor: 0
+num_coarse_samples: 64
+num_fine_samples: 128
+use_viewdirs: true
+white_bkgd: true
+batch_size: 1024
+randomized: true
+max_steps: 50000

jaxnerf/configs/diet_nerf_tpu_vm_few_shot.yaml ADDED Viewed

	@@ -0,0 +1,20 @@

+dataset: blender
+batching: single_image
+factor: 0
+num_coarse_samples: 64
+num_fine_samples: 128
+use_viewdirs: true
+white_bkgd: true
+batch_size: 1024
+randomized: true
+max_steps: 500000
+print_every: 100
+render_every: 5000
+save_every: 5000
+use_semantic_loss: true
+clip_model_name: openai/clip-vit-base-patch32
+clip_output_dtype: float32
+sc_loss_factor: 4
+sc_loss_every: 16
+sc_loss_mult: 10
+few_shot: 8

jaxnerf/configs/diet_nerf_tpu_vm_test.yaml ADDED Viewed

	@@ -0,0 +1,20 @@

+dataset: blender
+batching: single_image
+factor: 0
+num_coarse_samples: 64
+num_fine_samples: 128
+use_viewdirs: true
+white_bkgd: true
+batch_size: 1024
+randomized: true
+max_steps: 500000
+print_every: 100
+render_every: 5000
+save_every: 5000
+use_semantic_loss: true
+clip_model_name: openai/clip-vit-base-patch32
+clip_output_dtype: float32
+sc_loss_factor: 4
+sc_loss_every: 16
+sc_loss_mult: 10
+few_shot: -1

jaxnerf/configs/eval_diet_nerf_tpu_vm_few_shot.yaml ADDED Viewed

	@@ -0,0 +1,22 @@

+dataset: blender
+batching: single_image
+factor: 0
+num_coarse_samples: 64
+num_fine_samples: 128
+use_viewdirs: true
+white_bkgd: true
+batch_size: 1024
+randomized: true
+max_steps: 500000
+print_every: 100
+render_every: 5000
+save_every: 5000
+use_semantic_loss: true
+clip_model_name: openai/clip-vit-base-patch32
+clip_output_dtype: float32
+sc_loss_factor: 4
+sc_loss_every: 16
+sc_loss_mult: 10
+few_shot: 8
+spherify: True
+lindisp: True

jaxnerf/configs/llff.yaml ADDED Viewed

	@@ -0,0 +1,13 @@

+dataset: llff
+batching: all_images
+num_coarse_samples: 64
+num_fine_samples: 128
+use_viewdirs: true
+white_bkgd: false
+batch_size: 4096
+randomized: true
+near: 0.
+far: 1.
+factor: 4
+llffhold: 8
+noise_std: 1.

jaxnerf/configs/llff_360.yaml ADDED Viewed

	@@ -0,0 +1,15 @@

+dataset: llff
+batching: all_images
+num_coarse_samples: 64
+num_fine_samples: 128
+use_viewdirs: true
+white_bkgd: false
+batch_size: 4096
+randomized: true
+near: 0.2
+far: 100.
+factor: 8
+llffhold: 8
+noise_std: 1.
+spherify: True
+lindisp: True

jaxnerf/configs/nerf_tpu_vm_few_shot.yaml ADDED Viewed

	@@ -0,0 +1,20 @@

+dataset: blender
+batching: single_image
+factor: 0
+num_coarse_samples: 64
+num_fine_samples: 128
+use_viewdirs: true
+white_bkgd: true
+batch_size: 1024
+randomized: true
+max_steps: 500000
+print_every: 100
+render_every: 5000
+save_every: 5000
+use_semantic_loss: false
+clip_model_name: openai/clip-vit-base-patch32
+clip_output_dtype: float32
+sc_loss_factor: 4
+sc_loss_every: 16
+sc_loss_mult: 10
+few_shot: 8

jaxnerf/configs/orig_nerf_tpu_vm_full.yaml ADDED Viewed

	@@ -0,0 +1,13 @@

+dataset: blender
+batching: single_image
+factor: 0
+num_coarse_samples: 64
+num_fine_samples: 128
+use_viewdirs: true
+white_bkgd: true
+batch_size: 1024
+randomized: true
+max_steps: 100000
+print_every: 1000
+render_every: 5000
+save_every: 5000

jaxnerf/configs/orig_nerf_tpu_vm_test.yaml ADDED Viewed

	@@ -0,0 +1,13 @@

+dataset: blender
+batching: single_image
+factor: 0
+num_coarse_samples: 64
+num_fine_samples: 128
+use_viewdirs: true
+white_bkgd: true
+batch_size: 1024
+randomized: true
+max_steps: 5000
+print_every: 100
+render_every: 500
+save_every: 500

jaxnerf/eval.py ADDED Viewed

	@@ -0,0 +1,192 @@

+# coding=utf-8
+# Copyright 2021 The Google Research Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# Lint as: python3
+"""Evaluation script for Nerf."""
+import functools
+from os import path
+from absl import app
+from absl import flags
+import flax
+from flax.metrics import tensorboard
+from flax.training import checkpoints
+import jax
+from jax import random
+import numpy as np
+import tensorflow as tf
+import tensorflow_hub as tf_hub
+#import wandb
+import glob
+import cv2
+import os
+from jaxnerf.nerf import datasets
+from jaxnerf.nerf import models
+from jaxnerf.nerf import utils
+FLAGS = flags.FLAGS
+utils.define_flags()
+#LPIPS_TFHUB_PATH = "@neural-rendering/lpips/distance/1"
+def compute_lpips(image1, image2, model):
+    """Compute the LPIPS metric."""
+    # The LPIPS model expects a batch dimension.
+    return model(
+        tf.convert_to_tensor(image1[None, Ellipsis]),
+        tf.convert_to_tensor(image2[None, Ellipsis]))[0]
+def main(unused_argv):
+    # Hide the GPUs and TPUs from TF so it does not reserve memory on them for
+    # LPIPS computation or dataset loading.
+    tf.config.experimental.set_visible_devices([], "GPU")
+    tf.config.experimental.set_visible_devices([], "TPU")
+    #wandb.init(project="hf-flax-clip-nerf", entity="wandb", sync_tensorboard=True)
+    rng = random.PRNGKey(20200823)
+    if FLAGS.config is not None:
+        utils.update_flags(FLAGS)
+    if FLAGS.train_dir is None:
+        raise ValueError("train_dir must be set. None set now.")
+    if FLAGS.data_dir is None:
+        raise ValueError("data_dir must be set. None set now.")
+    dataset = datasets.get_dataset("test", FLAGS)
+    rng, key = random.split(rng)
+    model, init_variables = models.get_model(key, dataset.peek(), FLAGS)
+    optimizer = flax.optim.Adam(FLAGS.lr_init).create(init_variables)
+    state = utils.TrainState(optimizer=optimizer)
+    del optimizer, init_variables
+    #lpips_model = tf_hub.load(LPIPS_TFHUB_PATH)
+    # Rendering is forced to be deterministic even if training was randomized, as
+    # this eliminates "speckle" artifacts.
+    def render_fn(variables, key_0, key_1, rays):
+        return jax.lax.all_gather(
+            model.apply(variables, key_0, key_1, rays, False), axis_name="batch")
+    # pmap over only the data input.
+    render_pfn = jax.pmap(
+        render_fn,
+        in_axes=(None, None, None, 0),
+        donate_argnums=3,
+        axis_name="batch",
+    )
+    # Compiling to the CPU because it's faster and more accurate.
+    ssim_fn = jax.jit(
+        functools.partial(utils.compute_ssim, max_val=1.), backend="cpu")
+    last_step = 0
+    out_dir = path.join(FLAGS.train_dir,
+                        "path_renders" if FLAGS.render_path else "test_preds")
+    if not FLAGS.eval_once:
+        summary_writer = tensorboard.SummaryWriter(
+            path.join(FLAGS.train_dir, "eval"))
+    while True:
+        state = checkpoints.restore_checkpoint(FLAGS.train_dir, state)
+        step = int(state.optimizer.state.step)
+        if step <= last_step:
+            continue
+        if FLAGS.save_output and (not utils.isdir(out_dir)):
+            utils.makedirs(out_dir)
+        psnr_values = []
+        ssim_values = []
+        #lpips_values = []
+        if not FLAGS.eval_once:
+            showcase_index = np.random.randint(0, dataset.size)
+        for idx in range(dataset.sizerender_image):
+            print(f"Evaluating {idx + 1}/{dataset.size}")
+            batch = next(dataset)
+            pred_color, pred_disp, pred_acc = utils.render_image(
+                functools.partial(render_pfn, state.optimizer.target),
+                batch["rays"],
+                rng,
+                FLAGS.dataset == "llff",
+                chunk=FLAGS.chunk)
+            if jax.host_id() != 0:  # Only record via host 0.
+                continue
+            if not FLAGS.eval_once and idx == showcase_index:
+                showcase_color = pred_color
+                showcase_disp = pred_disp
+                showcase_acc = pred_acc
+                if not FLAGS.render_path:
+                    showcase_gt = batch["pixels"]
+            if not FLAGS.render_path:
+                psnr = utils.compute_psnr(((pred_color - batch["pixels"]) ** 2).mean())
+                ssim = ssim_fn(pred_color, batch["pixels"])
+                #lpips = compute_lpips(pred_color, batch["pixels"], lpips_model)
+                print(f"PSNR = {psnr:.4f}, SSIM = {ssim:.4f}")
+                psnr_values.append(float(psnr))
+                ssim_values.append(float(ssim))
+                #lpips_values.append(float(lpips))
+            if FLAGS.save_output:
+                utils.save_img(pred_color, path.join(out_dir, "{:03d}.png".format(idx)))
+                utils.save_img(pred_disp[Ellipsis, 0],
+                               path.join(out_dir, "disp_{:03d}.png".format(idx)))
+        if (not FLAGS.eval_once) and (jax.host_id() == 0):
+            summary_writer.image("pred_color", showcase_color, step)
+            summary_writer.image("pred_disp", showcase_disp, step)
+            summary_writer.image("pred_acc", showcase_acc, step)
+            if not FLAGS.render_path:
+                summary_writer.scalar("psnr", np.mean(np.array(psnr_values)), step)
+                summary_writer.scalar("ssim", np.mean(np.array(ssim_values)), step)
+                #summary_writer.scalar("lpips", np.mean(np.array(lpips_values)), step)
+                summary_writer.image("target", showcase_gt, step)
+        if FLAGS.save_output and (not FLAGS.render_path) and (jax.host_id() == 0):
+            with utils.open_file(path.join(out_dir, f"psnrs_{step}.txt"), "w") as f:
+                f.write(" ".join([str(v) for v in psnr_values]))
+            with utils.open_file(path.join(out_dir, f"ssims_{step}.txt"), "w") as f:
+                f.write(" ".join([str(v) for v in ssim_values]))
+            #with utils.open_file(path.join(out_dir, f"lpips_{step}.txt"), "w") as f:
+                #f.write(" ".join([str(v) for v in lpips_values]))
+            with utils.open_file(path.join(out_dir, "psnr.txt"), "w") as f:
+                f.write("{}".format(np.mean(np.array(psnr_values))))
+            with utils.open_file(path.join(out_dir, "ssim.txt"), "w") as f:
+                f.write("{}".format(np.mean(np.array(ssim_values))))
+            #with utils.open_file(path.join(out_dir, "lpips.txt"), "w") as f:
+                #f.write("{}".format(np.mean(np.array(lpips_values))))
+            imglist = glob.glob(os.path.join(out_dir, "[0-9][0-9][0-9].png"))
+            sorted_files = sorted(imglist, key=lambda x: int(x.split('/')[-1].split('.')[0]))
+            imglist2 = glob.glob(os.path.join(out_dir, "disp_[0-9][0-9][0-9].png"))
+            sorted_files2 = sorted(imglist2, key=lambda x: int(x.split('/')[-1].split('.')[0].split('_')[-1]))
+            fourcc = cv2.VideoWriter_fourcc(*'MP4V')
+            fps = 10.0
+            out = cv2.VideoWriter(os.path.join(out_dir, "rendering_video.mp4"), fourcc, fps,
+                                  (2 * img.shape[1], img.shape[0]))
+            for i in range(len(imglist)):
+                img = cv2.imread(imglist[i], cv2.IMREAD_COLOR)
+                img2 = cv2.imread(imglist2[i], cv2.IMREAD_COLOR)
+                catimg = np.concatenate((img, img2), axis=1)
+                out.write(catimg)
+            out.release()
+        if FLAGS.eval_once:
+            break
+        if int(step) >= FLAGS.max_steps:
+            break
+        last_step = step
+if __name__ == "__main__":
+    app.run(main)

jaxnerf/eval.sh ADDED Viewed

	@@ -0,0 +1,44 @@

+# Copyright 2021 The Google Research Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#!/bin/bash
+CONFIG=$1
+DATA_ROOT=$2
+ROOT_DIR=/tmp/jaxnerf/"$CONFIG"
+if [ $CONFIG == "llff" ]
+then
+  SCENES="room fern leaves fortress orchids flower trex horns"
+  DATA_FOLDER="nerf_llff_data"
+else
+  SCENES="lego chair drums ficus hotdog materials mic ship"
+  DATA_FOLDER="nerf_synthetic"
+fi
+# launch evaluation jobs for all scenes.
+for scene in $SCENES; do
+  python -m jaxnerf.eval \
+    --data_dir="$DATA_ROOT"/"$DATA_FOLDER"/"$scene" \
+    --train_dir="$ROOT_DIR"/"$scene" \
+    --chunk=4096 \
+    --config=configs/"$CONFIG"
+done
+# collect PSNR of all scenes.
+touch "$ROOT_DIR"/psnr.txt
+for scene in $SCENES; do
+  printf "${scene}: " >> "$ROOT_DIR"/psnr.txt
+  cat "$ROOT_DIR"/"$scene"/test_preds/psnr.txt >> \
+    "$ROOT_DIR"/psnr.txt
+  printf $'\n' >> "$ROOT_DIR"/psnr.txt
+done

jaxnerf/example_data/imgs/r_0.png ADDED Viewed

jaxnerf/example_data/transforms_test.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"camera_angle_x": 0.6911112070083618, "frames": [{"file_path": "./imgs/r_0", "rotation": 0.012566370614359171, "transform_matrix": [[-0.9999021887779236, 0.004192245192825794, -0.013345719315111637, -0.05379832163453102], [-0.013988681137561798, -0.2996590733528137, 0.95394366979599, 3.845470428466797], [-4.656612873077393e-10, 0.9540371894836426, 0.29968830943107605, 1.2080823183059692], [0.0, 0.0, 0.0, 1.0]]}]}

jaxnerf/example_data/transforms_train.json ADDED Viewed

	@@ -0,0 +1 @@

jaxnerf/nerf/__init__.py ADDED Viewed

	@@ -0,0 +1,15 @@

+# coding=utf-8
+# Copyright 2021 The Google Research Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

jaxnerf/nerf/clip_utils.py ADDED Viewed

	@@ -0,0 +1,134 @@

+import math
+from typing import Optional
+from absl import flags
+from functools import partial
+import jax
+from jax import random
+import jax.numpy as jnp
+import numpy as np
+from transformers import FlaxCLIPModel
+FLAGS = flags.FLAGS
+# import jmp
+# my_policy = jmp.Policy(compute_dtype=np.float16,
+#                        param_dtype=np.float16,
+#                        output_dtype=np.float16)
+@partial(jax.jit, static_argnums=[0, 1])
+def update_semantic_loss(model, clip_model, rng, state, batch, lr):
+    # the batch is without shard
+    random_rays = batch["random_rays"]
+    #rng, key_0, key_1 = rng
+    rng, key_0, key_1 = random.split(rng,3)
+    def semantic_loss(variables):
+        # TODO @Alex: (alt) sample less along a ray/ sample on a strided grid (make change on model call)
+        # TODO @Alex: (alt) apply mixed precision
+        src_ret = model.apply(variables, key_0, key_1, random_rays, False)
+        src_image, _, _ = src_ret[-1]
+        # reshape flat pixel to an image (assume 3 channels & square shape)
+        w = int(math.sqrt(src_image.shape[0]))
+        src_image = src_image.reshape([-1, w, w, 3]).transpose(0, 3, 1, 2)
+        src_image = preprocess_for_CLIP(src_image)
+        src_embedding = clip_model.get_image_features(pixel_values=src_image)
+        src_embedding /= jnp.linalg.norm(src_embedding, axis=-1, keepdims=True)
+        src_embedding = jnp.array(src_embedding)
+        target_embedding = batch["embedding"]
+        sc_loss = 0.5 * FLAGS.sc_loss_mult * jnp.sum((src_embedding - target_embedding) ** 2) / src_embedding.shape[0]
+        return sc_loss * 1e-2
+    sc_loss, grad = jax.value_and_grad(semantic_loss)(jax.device_get(jax.tree_map(lambda x:x[0], state)).optimizer.target)
+    return sc_loss, grad
+def trans_t(t):
+    return jnp.array([
+        [1, 0, 0, 0],
+        [0, 1, 0, 0],
+        [0, 0, 1, t],
+        [0, 0, 0, 1]], dtype=jnp.float32)
+def rot_phi(phi):
+    return jnp.array([
+        [1, 0, 0, 0],
+        [0, jnp.cos(phi), -np.sin(phi), 0],
+        [0, jnp.sin(phi), jnp.cos(phi), 0],
+        [0, 0, 0, 1]], dtype=jnp.float32)
+def rot_theta(th):
+    return jnp.array([
+        [np.cos(th), 0, -np.sin(th), 0],
+        [0, 1, 0, 0],
+        [np.sin(th), 0, jnp.cos(th), 0],
+        [0, 0, 0, 1]], dtype=jnp.float32)
+def pose_spherical(theta, phi, radius):
+    c2w = trans_t(radius)
+    c2w = rot_phi(phi / 180. * jnp.pi) @ c2w
+    c2w = rot_theta(theta / 180. * jnp.pi) @ c2w
+    c2w = jnp.array([[-1, 0, 0, 0], [0, 0, 1, 0], [0, 1, 0, 0], [0, 0, 0, 1]]) @ c2w
+    return c2w
+def random_pose(rng, bds):
+    rng, *rng_inputs = jax.random.split(rng, 3)
+    radius = random.uniform(rng_inputs[1], minval=bds[0], maxval=bds[1])
+    theta = random.uniform(rng_inputs[1], minval=0, maxval=2 * jnp.pi)
+    phi = random.uniform(rng_inputs[1], minval=0, maxval=np.pi / 2)
+    return pose_spherical(radius, theta, phi)
+def preprocess_for_CLIP(image):
+    """
+    jax-based preprocessing for CLIP
+    image  [B, 3, H, W]: batch image
+    return [B, 3, 224, 224]: pre-processed image for CLIP
+    """
+    B, D, H, W = image.shape
+    image = jax.image.resize(image, (B, D, 224, 224), 'bicubic')  # assume that images have rectangle shape.
+    mean = jnp.array([0.48145466, 0.4578275, 0.40821073]).reshape(1, 3, 1, 1)
+    std = jnp.array([0.26862954, 0.26130258, 0.27577711]).reshape(1, 3, 1, 1)
+    image = (image - mean.astype(image.dtype)) / std.astype(image.dtype)
+    return image
+# TODO @Alex: VisionModel v.s. original CLIP? (differ by a projection matrix)
+def init_CLIP(dtype: str, model_name: Optional[str]) -> FlaxCLIPModel:
+    if dtype == 'float16':
+        dtype = jnp.float16
+    elif dtype == 'float32':
+        dtype = jnp.float32
+    else:
+        raise ValueError
+    if model_name is None:
+        model_name = 'openai/clip-vit-base-patch32'
+    return FlaxCLIPModel.from_pretrained(model_name, dtype=dtype)
+# def SC_loss(rng_inputs, model, params, bds, rays, N_samples, target_emb, CLIP_model, l):
+#     """
+#     target_emb [1, D]: pre-computed target embedding vector \phi(I)
+#     source_img [1, 3, H, W]: source image \hat{I}
+#     l: loss weight lambda
+#     return: SC_loss
+#     """
+#     # _,H,W,D = rays.shape
+#     rng_inputs, model, params, bds, rays, N_samples, target_emb, CLIP_model, l = my_policy.cast_to_compute(
+#         (rng_inputs, model, params, bds, rays, N_samples, target_emb, CLIP_model, l))
+#     _, H, W, _ = rays.shape
+#     source_img = jnp.clip(render_fn(rng_inputs, model, params, None,
+#                                    np.reshape(rays, (2, -1, 3)),
+#                                    bds[0], bds[1], 1, rand=False),
+#                          0, 1)
+#     # source_img = np.clip(render_rays(rng_inputs, model, params, None, np.reshape(rays, (2, -1, 3)), bds[0], bds[1], 1, rand=False), 0, 1)
+#     source_img = np.reshape(source_img, [1, H, W, 3]).transpose(0, 3, 1, 2)
+#     source_img = preprocess_for_CLIP(source_img)
+#     source_emb = CLIP_model.get_image_features(pixel_values=source_img)
+#     source_emb /= np.linalg.norm(source_emb, axis=-1, keepdims=True)
+#     return l/2 * (np.sum((source_emb - target_emb) ** 2) / source_emb.shape[0])

jaxnerf/nerf/datasets.py ADDED Viewed

	@@ -0,0 +1,565 @@

+# coding=utf-8
+# Copyright 2021 The Google Research Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# Lint as: python3
+"""Different datasets implementation plus a general port for all the datasets."""
+INTERNAL = False  # pylint: disable=g-statement-before-imports
+import json
+import os
+from os import path
+import queue
+import threading
+if not INTERNAL:
+    import cv2  # pylint: disable=g-import-not-at-top
+import jax
+import numpy as np
+from PIL import Image
+from jaxnerf.nerf import utils
+from jaxnerf.nerf import clip_utils
+def get_dataset(split, args, clip_model = None):
+    return dataset_dict[args.dataset](split, args, clip_model)
+def convert_to_ndc(origins, directions, focal, w, h, near=1.):
+    """Convert a set of rays to NDC coordinates."""
+    # Shift ray origins to near plane
+    t = -(near + origins[..., 2]) / directions[..., 2]
+    origins = origins + t[..., None] * directions
+    dx, dy, dz = tuple(np.moveaxis(directions, -1, 0))
+    ox, oy, oz = tuple(np.moveaxis(origins, -1, 0))
+    # Projection
+    o0 = -((2 * focal) / w) * (ox / oz)
+    o1 = -((2 * focal) / h) * (oy / oz)
+    o2 = 1 + 2 * near / oz
+    d0 = -((2 * focal) / w) * (dx / dz - ox / oz)
+    d1 = -((2 * focal) / h) * (dy / dz - oy / oz)
+    d2 = -2 * near / oz
+    origins = np.stack([o0, o1, o2], -1)
+    directions = np.stack([d0, d1, d2], -1)
+    return origins, directions
+class Dataset(threading.Thread):
+    """Dataset Base Class."""
+    def __init__(self, split, flags, clip_model):
+        super(Dataset, self).__init__()
+        self.queue = queue.Queue(3)  # Set prefetch buffer to 3 batches.
+        self.daemon = True
+        self.use_pixel_centers = flags.use_pixel_centers
+        self.split = split
+        if split == "train":
+            self._train_init(flags, clip_model)
+        elif split == "test":
+            self._test_init(flags)
+        else:
+            raise ValueError(
+                "the split argument should be either \"train\" or \"test\", set"
+                "to {} here.".format(split))
+        self.batch_size = flags.batch_size // jax.process_count()
+        self.batching = flags.batching
+        self.render_path = flags.render_path
+        self.far = flags.far
+        self.near = flags.near
+        self.max_steps = flags.max_steps
+        self.sc_loss_factor = flags.sc_loss_factor
+        self.start()
+    def __iter__(self):
+        return self
+    def __next__(self):
+        """Get the next training batch or test example.
+        Returns:
+          batch: dict, has "pixels" and "rays".
+        """
+        x = self.queue.get()
+        if self.split == "train":
+            return utils.shard(x)
+        else:
+            return utils.to_device(x)
+    def peek(self):
+        """Peek at the next training batch or test example without dequeuing it.
+        Returns:
+          batch: dict, has "pixels" and "rays".
+        """
+        x = self.queue.queue[0].copy()  # Make a copy of the front of the queue.
+        if self.split == "train":
+            return utils.shard(x)
+        else:
+            return utils.to_device(x)
+    def run(self):
+        if self.split == "train":
+            next_func = self._next_train
+        else:
+            next_func = self._next_test
+        while True:
+            self.queue.put(next_func())
+    @property
+    def size(self):
+        return self.n_examples
+    def _train_init(self, flags, clip_model):
+        """Initialize training."""
+        self._load_renderings(flags, clip_model)
+        self._generate_rays()
+        if flags.batching == "all_images":
+            # flatten the ray and image dimension together.
+            self.images = self.images.reshape([-1, 3])
+            self.rays = utils.namedtuple_map(lambda r: r.reshape([-1, r.shape[-1]]),
+                                             self.rays)
+        elif flags.batching == "single_image":
+            self.images = self.images.reshape([-1, self.resolution, 3])
+            self.rays = utils.namedtuple_map(
+                lambda r: r.reshape([-1, self.resolution, r.shape[-1]]), self.rays)
+        else:
+            raise NotImplementedError(
+                f"{flags.batching} batching strategy is not implemented.")
+    def _test_init(self, flags):
+        self._load_renderings(flags, clip_model = None)
+        self._generate_rays()
+        self.it = 0
+    def _next_train(self):
+        """Sample next training batch."""
+        if self.batching == "all_images":
+            ray_indices = np.random.randint(0, self.rays[0].shape[0],
+                                            (self.batch_size,))
+            batch_pixels = self.images[ray_indices]
+            batch_rays = utils.namedtuple_map(lambda r: r[ray_indices], self.rays)
+            raise NotImplementedError("image_index not implemented for batching=all_images")
+        elif self.batching == "single_image":
+            image_index = np.random.randint(0, self.n_examples, ())
+            ray_indices = np.random.randint(0, self.rays[0][0].shape[0],
+                                            (self.batch_size,))
+            batch_pixels = self.images[image_index][ray_indices]
+            batch_rays = utils.namedtuple_map(lambda r: r[image_index][ray_indices],
+                                              self.rays)
+        else:
+            raise NotImplementedError(
+                f"{self.batching} batching strategy is not implemented.")
+        return {"pixels": batch_pixels, "rays": batch_rays, "image_index": image_index}
+    def _next_test(self):
+        """Sample next test example."""
+        idx = self.it
+        self.it = (self.it + 1) % self.n_examples
+        if self.render_path:
+            return {"rays": utils.namedtuple_map(lambda r: r[idx], self.render_rays)}
+        else:
+            return {"pixels": self.images[idx],
+                    "rays": utils.namedtuple_map(lambda r: r[idx], self.rays),
+                    "image_index": idx}
+    # TODO(bydeng): Swap this function with a more flexible camera model.
+    def _generate_rays(self):
+        """Generating rays for all images."""
+        pixel_center = 0.5 if self.use_pixel_centers else 0.0
+        x, y = np.meshgrid(  # pylint: disable=unbalanced-tuple-unpacking
+            np.arange(self.w, dtype=np.float32) + pixel_center,  # X-Axis (columns)
+            np.arange(self.h, dtype=np.float32) + pixel_center,  # Y-Axis (rows)
+            indexing="xy")
+        camera_dirs = np.stack([(x - self.w * 0.5) / self.focal,
+                                -(y - self.h * 0.5) / self.focal, -np.ones_like(x)],
+                               axis=-1)
+        directions = ((camera_dirs[None, ..., None, :] *
+                       self.camtoworlds[:, None, None, :3, :3]).sum(axis=-1))
+        origins = np.broadcast_to(self.camtoworlds[:, None, None, :3, -1],
+                                  directions.shape)
+        viewdirs = directions / np.linalg.norm(directions, axis=-1, keepdims=True)
+        self.rays = utils.Rays(
+            origins=origins, directions=directions, viewdirs=viewdirs)
+    def camtoworld_matrix_to_rays(self, camtoworld, downsample = 1):
+        """ render one instance of rays given a camera to world matrix (4, 4) """
+        pixel_center = 0.5 if self.use_pixel_centers else 0.0
+        # TODO @Alex: apply mesh downsampling here
+        x, y = np.meshgrid(  # pylint: disable=unbalanced-tuple-unpacking
+            np.arange(self.w, step = downsample, dtype=np.float32) + pixel_center,  # X-Axis (columns)
+            np.arange(self.h, step = downsample, dtype=np.float32) + pixel_center,  # Y-Axis (rows)
+            indexing="xy")
+        camera_dirs = np.stack([(x - self.w * 0.5) / self.focal,
+                                -(y - self.h * 0.5) / self.focal, -np.ones_like(x)],
+                               axis=-1)
+        directions = (camera_dirs[..., None, :] * camtoworld[None, None, :3, :3]).sum(axis=-1)
+        origins = np.broadcast_to(camtoworld[None, None, :3, -1], directions.shape)
+        viewdirs = directions / np.linalg.norm(directions, axis=-1, keepdims=True)
+        return utils.Rays(origins=origins, directions=directions, viewdirs=viewdirs)
+class Blender(Dataset):
+    """Blender Dataset."""
+    def _load_renderings(self, flags, clip_model = None):
+        """Load images from disk."""
+        if flags.render_path:
+            raise ValueError("render_path cannot be used for the blender dataset.")
+        cams, images, meta = self.load_files(flags.data_dir, self.split, flags.factor, flags.few_shot)
+        # load in CLIP precomputed image features
+        self.images = np.stack(images, axis=0)
+        if flags.white_bkgd:
+            self.images = (self.images[..., :3] * self.images[..., -1:] +
+                           (1. - self.images[..., -1:]))
+        else:
+            self.images = self.images[..., :3]
+        self.h, self.w = self.images.shape[1:3]
+        self.resolution = self.h * self.w
+        self.camtoworlds = np.stack(cams, axis=0)
+        camera_angle_x = float(meta["camera_angle_x"])
+        self.focal = .5 * self.w / np.tan(.5 * camera_angle_x)
+        self.n_examples = self.images.shape[0]
+        if flags.use_semantic_loss and clip_model is not None:
+            embs = []
+            for img in self.images:
+                img = np.expand_dims(np.transpose(img,[2,0,1]), 0)
+                embs.append(clip_model.get_image_features(pixel_values = clip_utils.preprocess_for_CLIP(img)))
+            self.embeddings = np.concatenate(embs, 0)
+            self.image_idx = np.arange(self.images.shape[0])
+            np.random.shuffle(self.image_idx)
+            self.image_idx = self.image_idx.tolist()
+        # self.embeddings = utils.read_pickle(flags.precompute_pkl_path)
+        # self.precompute_pkl_path = flags.precompute_pkl_path
+    @staticmethod
+    def load_files(data_dir, split, factor, few_shot):
+        with utils.open_file(path.join(data_dir, "transforms_{}.json".format(split)), "r") as fp:
+            meta = json.load(fp)
+        images = []
+        cams = []
+        frames = np.arange(len(meta["frames"]))
+        if few_shot > 0 and split == 'train':
+            np.random.shuffle(frames)
+            frames = frames[:few_shot]
+        for i in frames:
+            frame = meta["frames"][i]
+            fname = os.path.join(data_dir, frame["file_path"] + ".png")
+            with utils.open_file(fname, "rb") as imgin:
+                image = np.array(Image.open(imgin)).astype(np.float32) / 255.
+                if factor == 2:
+                    [halfres_h, halfres_w] = [hw // 2 for hw in image.shape[:2]]
+                    image = cv2.resize(image, (halfres_w, halfres_h),
+                                       interpolation=cv2.INTER_AREA)
+                elif factor == 4:
+                    [halfres_h, halfres_w] = [hw // 4 for hw in image.shape[:2]]
+                    image = cv2.resize(image, (halfres_w, halfres_h),
+                                       interpolation=cv2.INTER_AREA)
+                elif factor > 0:
+                    raise ValueError("Blender dataset only supports factor=0 or 2 or 4, {} "
+                                     "set.".format(factor))
+            cams.append(np.array(frame["transform_matrix"], dtype=np.float32))
+            images.append(image)
+        return cams, images, meta
+    def _next_train(self):
+        batch_dict = super(Blender, self)._next_train()
+        if self.batching == "single_image":
+            image_index = batch_dict.pop("image_index")
+            # target image for CLIP
+            '''
+            batch_dict["embedding"] = self.embeddings[image_index]
+            # source rays for CLIP (for constructing source image later)
+            src_seed = int(np.random.randint(0, self.max_steps, ()))
+            src_rng = jax.random.PRNGKey(src_seed)
+            src_camtoworld = np.array(clip_utils.random_pose(src_rng, (self.near, self.far)))
+            random_rays = self.camtoworld_matrix_to_rays(src_camtoworld, downsample = 16)
+            random_rays = utils.Rays(origins=np.reshape(random_rays[0], [-1,3]), directions=np.reshape(random_rays[1], [-1,3]), viewdirs=np.reshape(random_rays[2], [-1,3]))
+            batch_dict["random_rays"] = random_rays
+            '''
+        else:
+            raise NotImplementedError
+        return batch_dict
+    def get_clip_data(self):
+        if len(self.image_idx) == 0:
+            self.image_idx = np.arange(self.images.shape[0])
+            np.random.shuffle(self.image_idx)
+            self.image_idx = self.image_idx.tolist()
+        image_index = self.image_idx.pop()
+        batch_dict = {}
+        batch_dict["embedding"] = self.embeddings[image_index]
+        # source rays for CLIP (for constructing source image later)
+        src_seed = int(np.random.randint(0, self.max_steps, ()))
+        src_rng = jax.random.PRNGKey(src_seed)
+        src_camtoworld = np.array(clip_utils.random_pose(src_rng, (self.near, self.far)))
+        random_rays = self.camtoworld_matrix_to_rays(src_camtoworld, downsample = 16)
+        random_rays = utils.Rays(origins=np.reshape(random_rays[0], [-1,3]), directions=np.reshape(random_rays[1], [-1,3]), viewdirs=np.reshape(random_rays[2], [-1,3]))
+        batch_dict["random_rays"] = random_rays
+        return batch_dict
+class LLFF(Dataset):
+    """LLFF Dataset."""
+    def _load_renderings(self, flags):
+        """Load images from disk."""
+        # Load images.
+        imgdir_suffix = ""
+        if flags.factor > 0:
+            imgdir_suffix = "_{}".format(flags.factor)
+            factor = flags.factor
+        else:
+            factor = 1
+        imgdir = path.join(flags.data_dir, "images" + imgdir_suffix)
+        if not utils.file_exists(imgdir):
+            raise ValueError("Image folder {} doesn't exist.".format(imgdir))
+        imgfiles = [
+            path.join(imgdir, f)
+            for f in sorted(utils.listdir(imgdir))
+            if f.endswith("JPG") or f.endswith("jpg") or f.endswith("png")
+        ]
+        images = []
+        for imgfile in imgfiles:
+            with utils.open_file(imgfile, "rb") as imgin:
+                image = np.array(Image.open(imgin), dtype=np.float32) / 255.
+                images.append(image)
+        images = np.stack(images, axis=-1)
+        # Load poses and bds.
+        with utils.open_file(path.join(flags.data_dir, "poses_bounds.npy"),
+                             "rb") as fp:
+            poses_arr = np.load(fp)
+        poses = poses_arr[:, :-2].reshape([-1, 3, 5]).transpose([1, 2, 0])
+        bds = poses_arr[:, -2:].transpose([1, 0])
+        if poses.shape[-1] != images.shape[-1]:
+            raise RuntimeError("Mismatch between imgs {} and poses {}".format(
+                images.shape[-1], poses.shape[-1]))
+        # Update poses according to downsampling.
+        poses[:2, 4, :] = np.array(images.shape[:2]).reshape([2, 1])
+        poses[2, 4, :] = poses[2, 4, :] * 1. / factor
+        # Correct rotation matrix ordering and move variable dim to axis 0.
+        poses = np.concatenate(
+            [poses[:, 1:2, :], -poses[:, 0:1, :], poses[:, 2:, :]], 1)
+        poses = np.moveaxis(poses, -1, 0).astype(np.float32)
+        images = np.moveaxis(images, -1, 0)
+        bds = np.moveaxis(bds, -1, 0).astype(np.float32)
+        # Rescale according to a default bd factor.
+        scale = 1. / (bds.min() * .75)
+        poses[:, :3, 3] *= scale
+        bds *= scale
+        # Recenter poses.
+        poses = self._recenter_poses(poses)
+        # Generate a spiral/spherical ray path for rendering videos.
+        if flags.spherify:
+            poses = self._generate_spherical_poses(poses, bds)
+            self.spherify = True
+        else:
+            self.spherify = False
+        if not flags.spherify and self.split == "test":
+            self._generate_spiral_poses(poses, bds)
+        # Select the split.
+        i_test = np.arange(images.shape[0])[::flags.llffhold]
+        i_train = np.array(
+            [i for i in np.arange(int(images.shape[0])) if i not in i_test])
+        if self.split == "train":
+            indices = i_train
+        else:
+            indices = i_test
+        images = images[indices]
+        poses = poses[indices]
+        self.images = images
+        self.camtoworlds = poses[:, :3, :4]
+        self.focal = poses[0, -1, -1]
+        self.h, self.w = images.shape[1:3]
+        self.resolution = self.h * self.w
+        if flags.render_path:
+            self.n_examples = self.render_poses.shape[0]
+        else:
+            self.n_examples = images.shape[0]
+    def _generate_rays(self):
+        """Generate normalized device coordinate rays for llff."""
+        if self.split == "test":
+            n_render_poses = self.render_poses.shape[0]
+            self.camtoworlds = np.concatenate([self.render_poses, self.camtoworlds],
+                                              axis=0)
+        super()._generate_rays()
+        if not self.spherify:
+            ndc_origins, ndc_directions = convert_to_ndc(self.rays.origins,
+                                                         self.rays.directions,
+                                                         self.focal, self.w, self.h)
+            self.rays = utils.Rays(
+                origins=ndc_origins,
+                directions=ndc_directions,
+                viewdirs=self.rays.viewdirs)
+        # Split poses from the dataset and generated poses
+        if self.split == "test":
+            self.camtoworlds = self.camtoworlds[n_render_poses:]
+            split = [np.split(r, [n_render_poses], 0) for r in self.rays]
+            split0, split1 = zip(*split)
+            self.render_rays = utils.Rays(*split0)
+            self.rays = utils.Rays(*split1)
+    def _recenter_poses(self, poses):
+        """Recenter poses according to the original NeRF code."""
+        poses_ = poses.copy()
+        bottom = np.reshape([0, 0, 0, 1.], [1, 4])
+        c2w = self._poses_avg(poses)
+        c2w = np.concatenate([c2w[:3, :4], bottom], -2)
+        bottom = np.tile(np.reshape(bottom, [1, 1, 4]), [poses.shape[0], 1, 1])
+        poses = np.concatenate([poses[:, :3, :4], bottom], -2)
+        poses = np.linalg.inv(c2w) @ poses
+        poses_[:, :3, :4] = poses[:, :3, :4]
+        poses = poses_
+        return poses
+    def _poses_avg(self, poses):
+        """Average poses according to the original NeRF code."""
+        hwf = poses[0, :3, -1:]
+        center = poses[:, :3, 3].mean(0)
+        vec2 = self._normalize(poses[:, :3, 2].sum(0))
+        up = poses[:, :3, 1].sum(0)
+        c2w = np.concatenate([self._viewmatrix(vec2, up, center), hwf], 1)
+        return c2w
+    def _viewmatrix(self, z, up, pos):
+        """Construct lookat view matrix."""
+        vec2 = self._normalize(z)
+        vec1_avg = up
+        vec0 = self._normalize(np.cross(vec1_avg, vec2))
+        vec1 = self._normalize(np.cross(vec2, vec0))
+        m = np.stack([vec0, vec1, vec2, pos], 1)
+        return m
+    def _normalize(self, x):
+        """Normalization helper function."""
+        return x / np.linalg.norm(x)
+    def _generate_spiral_poses(self, poses, bds):
+        """Generate a spiral path for rendering."""
+        c2w = self._poses_avg(poses)
+        # Get average pose.
+        up = self._normalize(poses[:, :3, 1].sum(0))
+        # Find a reasonable "focus depth" for this dataset.
+        close_depth, inf_depth = bds.min() * .9, bds.max() * 5.
+        dt = .75
+        mean_dz = 1. / (((1. - dt) / close_depth + dt / inf_depth))
+        focal = mean_dz
+        # Get radii for spiral path.
+        tt = poses[:, :3, 3]
+        rads = np.percentile(np.abs(tt), 90, 0)
+        c2w_path = c2w
+        n_views = 120
+        n_rots = 2
+        # Generate poses for spiral path.
+        render_poses = []
+        rads = np.array(list(rads) + [1.])
+        hwf = c2w_path[:, 4:5]
+        zrate = .5
+        for theta in np.linspace(0., 2. * np.pi * n_rots, n_views + 1)[:-1]:
+            c = np.dot(c2w[:3, :4], (np.array(
+                [np.cos(theta), -np.sin(theta), -np.sin(theta * zrate), 1.]) * rads))
+            z = self._normalize(c - np.dot(c2w[:3, :4], np.array([0, 0, -focal, 1.])))
+            render_poses.append(np.concatenate([self._viewmatrix(z, up, c), hwf], 1))
+        self.render_poses = np.array(render_poses).astype(np.float32)[:, :3, :4]
+    def _generate_spherical_poses(self, poses, bds):
+        """Generate a 360 degree spherical path for rendering."""
+        # pylint: disable=g-long-lambda
+        p34_to_44 = lambda p: np.concatenate([
+            p,
+            np.tile(np.reshape(np.eye(4)[-1, :], [1, 1, 4]), [p.shape[0], 1, 1])
+        ], 1)
+        rays_d = poses[:, :3, 2:3]
+        rays_o = poses[:, :3, 3:4]
+        def min_line_dist(rays_o, rays_d):
+            a_i = np.eye(3) - rays_d * np.transpose(rays_d, [0, 2, 1])
+            b_i = -a_i @ rays_o
+            pt_mindist = np.squeeze(-np.linalg.inv(
+                (np.transpose(a_i, [0, 2, 1]) @ a_i).mean(0)) @ (b_i).mean(0))
+            return pt_mindist
+        pt_mindist = min_line_dist(rays_o, rays_d)
+        center = pt_mindist
+        up = (poses[:, :3, 3] - center).mean(0)
+        vec0 = self._normalize(up)
+        vec1 = self._normalize(np.cross([.1, .2, .3], vec0))
+        vec2 = self._normalize(np.cross(vec0, vec1))
+        pos = center
+        c2w = np.stack([vec1, vec2, vec0, pos], 1)
+        poses_reset = (
+                np.linalg.inv(p34_to_44(c2w[None])) @ p34_to_44(poses[:, :3, :4]))
+        rad = np.sqrt(np.mean(np.sum(np.square(poses_reset[:, :3, 3]), -1)))
+        sc = 1. / rad
+        poses_reset[:, :3, 3] *= sc
+        bds *= sc
+        rad *= sc
+        centroid = np.mean(poses_reset[:, :3, 3], 0)
+        zh = centroid[2]
+        radcircle = np.sqrt(rad ** 2 - zh ** 2)
+        new_poses = []
+        for th in np.linspace(0., 2. * np.pi, 120):
+            camorigin = np.array([radcircle * np.cos(th), radcircle * np.sin(th), zh])
+            up = np.array([0, 0, -1.])
+            vec2 = self._normalize(camorigin)
+            vec0 = self._normalize(np.cross(vec2, up))
+            vec1 = self._normalize(np.cross(vec2, vec0))
+            pos = camorigin
+            p = np.stack([vec0, vec1, vec2, pos], 1)
+            new_poses.append(p)
+        new_poses = np.stack(new_poses, 0)
+        new_poses = np.concatenate([
+            new_poses,
+            np.broadcast_to(poses[0, :3, -1:], new_poses[:, :3, -1:].shape)
+        ], -1)
+        poses_reset = np.concatenate([
+            poses_reset[:, :3, :4],
+            np.broadcast_to(poses[0, :3, -1:], poses_reset[:, :3, -1:].shape)
+        ], -1)
+        if self.split == "test":
+            self.render_poses = new_poses[:, :3, :4]
+        return poses_reset
+dataset_dict = {"blender": Blender,
+                "llff": LLFF}

jaxnerf/nerf/model_utils.py ADDED Viewed

	@@ -0,0 +1,321 @@

+# coding=utf-8
+# Copyright 2021 The Google Research Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# Lint as: python3
+"""Helper functions/classes for model definition."""
+import functools
+from typing import Any, Callable
+from flax import linen as nn
+import jax
+from jax import lax
+from jax import random
+import jax.numpy as jnp
+class MLP(nn.Module):
+    """A simple MLP."""
+    net_depth: int = 8  # The depth of the first part of MLP.
+    net_width: int = 256  # The width of the first part of MLP.
+    net_depth_condition: int = 1  # The depth of the second part of MLP.
+    net_width_condition: int = 128  # The width of the second part of MLP.
+    net_activation: Callable[..., Any] = nn.relu  # The activation function.
+    skip_layer: int = 4  # The layer to add skip layers to.
+    num_rgb_channels: int = 3  # The number of RGB channels.
+    num_sigma_channels: int = 1  # The number of sigma channels.
+    @nn.compact
+    def __call__(self, x, condition=None):
+        """
+        Evaluate the MLP.
+        Args:
+            x: jnp.ndarray(float32), [batch, num_samples, feature], points.
+            condition: jnp.ndarray(float32), [batch, feature], if not None, this
+                variable will be part of the input to the second part of the MLP
+                concatenated with the output vector of the first part of the MLP. If
+                None, only the first part of the MLP will be used with input x. In the
+                original paper, this variable is the view direction.
+        Returns:
+            raw_rgb: jnp.ndarray(float32), with a shape of
+                [batch, num_samples, num_rgb_channels].
+            raw_sigma: jnp.ndarray(float32), with a shape of
+                [batch, num_samples, num_sigma_channels].
+        """
+        feature_dim = x.shape[-1]
+        num_samples = x.shape[1]
+        x = x.reshape([-1, feature_dim])
+        dense_layer = functools.partial(
+            nn.Dense, kernel_init=jax.nn.initializers.glorot_uniform())
+        inputs = x
+        for i in range(self.net_depth):
+            x = dense_layer(self.net_width)(x)
+            x = self.net_activation(x)
+            if i % self.skip_layer == 0 and i > 0:
+                x = jnp.concatenate([x, inputs], axis=-1)
+        raw_sigma = dense_layer(self.num_sigma_channels)(x).reshape(
+            [-1, num_samples, self.num_sigma_channels])
+        if condition is not None:
+            # Output of the first part of MLP.
+            bottleneck = dense_layer(self.net_width)(x)
+            # Broadcast condition from [batch, feature] to
+            # [batch, num_samples, feature] since all the samples along the same ray
+            # have the same viewdir.
+            condition = jnp.tile(condition[:, None, :], (1, num_samples, 1))
+            # Collapse the [batch, num_samples, feature] tensor to
+            # [batch * num_samples, feature] so that it can be fed into nn.Dense.
+            condition = condition.reshape([-1, condition.shape[-1]])
+            x = jnp.concatenate([bottleneck, condition], axis=-1)
+            # Here use 1 extra layer to align with the original nerf model.
+            for i in range(self.net_depth_condition):
+                x = dense_layer(self.net_width_condition)(x)
+                x = self.net_activation(x)
+        raw_rgb = dense_layer(self.num_rgb_channels)(x).reshape(
+            [-1, num_samples, self.num_rgb_channels])
+        return raw_rgb, raw_sigma
+def cast_rays(z_vals, origins, directions):
+    return origins[..., None, :] + z_vals[..., None] * directions[..., None, :]
+def sample_along_rays(key, origins, directions, num_samples, near, far,
+                      randomized, lindisp):
+    """
+    Stratified sampling along the rays.
+    Args:
+        key: jnp.ndarray, random generator key.
+        origins: jnp.ndarray(float32), [batch_size, 3], ray origins.
+        directions: jnp.ndarray(float32), [batch_size, 3], ray directions.
+        num_samples: int.
+        near: float, near clip.
+        far: float, far clip.
+        randomized: bool, use randomized stratified sampling.
+        lindisp: bool, sampling linearly in disparity rather than depth.
+    Returns:
+        z_vals: jnp.ndarray, [batch_size, num_samples], sampled z values.
+        points: jnp.ndarray, [batch_size, num_samples, 3], sampled points.
+    """
+    batch_size = origins.shape[0]
+    t_vals = jnp.linspace(0., 1., num_samples)
+    if lindisp:
+        z_vals = 1. / (1. / near * (1. - t_vals) + 1. / far * t_vals)
+    else:
+        z_vals = near * (1. - t_vals) + far * t_vals
+    if randomized:
+        mids = .5 * (z_vals[..., 1:] + z_vals[..., :-1])
+        upper = jnp.concatenate([mids, z_vals[..., -1:]], -1)
+        lower = jnp.concatenate([z_vals[..., :1], mids], -1)
+        t_rand = random.uniform(key, [batch_size, num_samples])
+        z_vals = lower + (upper - lower) * t_rand
+    else:
+        # Broadcast z_vals to make the returned shape consistent.
+        z_vals = jnp.broadcast_to(z_vals[None, ...], [batch_size, num_samples])
+    coords = cast_rays(z_vals, origins, directions)
+    return z_vals, coords
+def posenc(x, min_deg, max_deg, legacy_posenc_order=False):
+    """
+    Cat x with a positional encoding of x with scales 2^[min_deg, max_deg-1].
+    Instead of computing [sin(x), cos(x)], we use the trig identity
+    cos(x) = sin(x + pi/2) and do one vectorized call to sin([x, x+pi/2]).
+    Args:
+        x: jnp.ndarray, variables to be encoded. Note that x should be in [-pi, pi].
+        min_deg: int, the minimum (inclusive) degree of the encoding.
+        max_deg: int, the maximum (exclusive) degree of the encoding.
+        legacy_posenc_order: bool, keep the same ordering as the original tf code.
+    Returns:
+        encoded: jnp.ndarray, encoded variables.
+    """
+    if min_deg == max_deg:
+        return x
+    scales = jnp.array([2 ** i for i in range(min_deg, max_deg)])
+    if legacy_posenc_order:
+        xb = x[..., None, :] * scales[:, None]
+        four_feat = jnp.reshape(
+            jnp.sin(jnp.stack([xb, xb + 0.5 * jnp.pi], -2)),
+            list(x.shape[:-1]) + [-1])
+    else:
+        xb = jnp.reshape((x[..., None, :] * scales[:, None]),
+                         list(x.shape[:-1]) + [-1])
+        four_feat = jnp.sin(jnp.concatenate([xb, xb + 0.5 * jnp.pi], axis=-1))
+    return jnp.concatenate([x] + [four_feat], axis=-1)
+def volumetric_rendering(rgb, sigma, z_vals, dirs, white_bkgd):
+    """
+    Volumetric Rendering Function.
+    Args:
+        rgb: jnp.ndarray(float32), color, [batch_size, num_samples, 3]
+        sigma: jnp.ndarray(float32), density, [batch_size, num_samples, 1].
+        z_vals: jnp.ndarray(float32), [batch_size, num_samples].
+        dirs: jnp.ndarray(float32), [batch_size, 3].
+        white_bkgd: bool.
+    Returns:
+        comp_rgb: jnp.ndarray(float32), [batch_size, 3].
+        disp: jnp.ndarray(float32), [batch_size].
+        acc: jnp.ndarray(float32), [batch_size].
+        weights: jnp.ndarray(float32), [batch_size, num_samples]
+    """
+    eps = 1e-10
+    dists = jnp.concatenate([
+        z_vals[..., 1:] - z_vals[..., :-1],
+        jnp.broadcast_to([1e10], z_vals[..., :1].shape)
+    ], -1)
+    dists = dists * jnp.linalg.norm(dirs[..., None, :], axis=-1)
+    # Note that we're quietly turning sigma from [..., 0] to [...].
+    alpha = 1.0 - jnp.exp(-sigma[..., 0] * dists)
+    accum_prod = jnp.concatenate([
+        jnp.ones_like(alpha[..., :1], alpha.dtype),
+        jnp.cumprod(1.0 - alpha[..., :-1] + eps, axis=-1)
+    ],
+        axis=-1)
+    weights = alpha * accum_prod
+    comp_rgb = (weights[..., None] * rgb).sum(axis=-2)
+    depth = (weights * z_vals).sum(axis=-1)
+    acc = weights.sum(axis=-1)
+    # Equivalent to (but slightly more efficient and stable than):
+    #  disp = 1 / max(eps, where(acc > eps, depth / acc, 0))
+    inv_eps = 1 / eps
+    disp = acc / depth
+    disp = jnp.where((disp > 0) & (disp < inv_eps) & (acc > eps), disp, inv_eps)
+    if white_bkgd:
+        comp_rgb = comp_rgb + (1. - acc[..., None])
+    return comp_rgb, disp, acc, weights
+def piecewise_constant_pdf(key, bins, weights, num_samples, randomized):
+    """
+    Piecewise-Constant PDF sampling.
+    Args:
+        key: jnp.ndarray(float32), [2,], random number generator.
+        bins: jnp.ndarray(float32), [batch_size, num_bins + 1].
+        weights: jnp.ndarray(float32), [batch_size, num_bins].
+        num_samples: int, the number of samples.
+        randomized: bool, use randomized samples.
+    Returns:
+        z_samples: jnp.ndarray(float32), [batch_size, num_samples].
+    """
+    # Pad each weight vector (only if necessary) to bring its sum to `eps`. This
+    # avoids NaNs when the input is zeros or small, but has no effect otherwise.
+    eps = 1e-5
+    weight_sum = jnp.sum(weights, axis=-1, keepdims=True)
+    padding = jnp.maximum(0, eps - weight_sum)
+    weights += padding / weights.shape[-1]
+    weight_sum += padding
+    # Compute the PDF and CDF for each weight vector, while ensuring that the CDF
+    # starts with exactly 0 and ends with exactly 1.
+    pdf = weights / weight_sum
+    cdf = jnp.minimum(1, jnp.cumsum(pdf[..., :-1], axis=-1))
+    cdf = jnp.concatenate([
+        jnp.zeros(list(cdf.shape[:-1]) + [1]), cdf,
+        jnp.ones(list(cdf.shape[:-1]) + [1])
+    ],
+        axis=-1)
+    # Draw uniform samples.
+    if randomized:
+        # Note that `u` is in [0, 1) --- it can be zero, but it can never be 1.
+        u = random.uniform(key, list(cdf.shape[:-1]) + [num_samples])
+    else:
+        # Match the behavior of random.uniform() by spanning [0, 1-eps].
+        u = jnp.linspace(0., 1. - jnp.finfo('float32').eps, num_samples)
+        u = jnp.broadcast_to(u, list(cdf.shape[:-1]) + [num_samples])
+    # Identify the location in `cdf` that corresponds to a random sample.
+    # The final `True` index in `mask` will be the start of the sampled interval.
+    mask = u[..., None, :] >= cdf[..., :, None]
+    def find_interval(x):
+        # Grab the value where `mask` switches from True to False, and vice versa.
+        # This approach takes advantage of the fact that `x` is sorted.
+        x0 = jnp.max(jnp.where(mask, x[..., None], x[..., :1, None]), -2)
+        x1 = jnp.min(jnp.where(~mask, x[..., None], x[..., -1:, None]), -2)
+        return x0, x1
+    bins_g0, bins_g1 = find_interval(bins)
+    cdf_g0, cdf_g1 = find_interval(cdf)
+    t = jnp.clip(jnp.nan_to_num((u - cdf_g0) / (cdf_g1 - cdf_g0), 0), 0, 1)
+    samples = bins_g0 + t * (bins_g1 - bins_g0)
+    # Prevent gradient from backprop-ing through `samples`.
+    return lax.stop_gradient(samples)
+def sample_pdf(key, bins, weights, origins, directions, z_vals, num_samples,
+               randomized):
+    """
+    Hierarchical sampling.
+    Args:
+        key: jnp.ndarray(float32), [2,], random number generator.
+        bins: jnp.ndarray(float32), [batch_size, num_bins + 1].
+        weights: jnp.ndarray(float32), [batch_size, num_bins].
+        origins: jnp.ndarray(float32), [batch_size, 3], ray origins.
+        directions: jnp.ndarray(float32), [batch_size, 3], ray directions.
+        z_vals: jnp.ndarray(float32), [batch_size, num_coarse_samples].
+        num_samples: int, the number of samples.
+        randomized: bool, use randomized samples.
+    Returns:
+        z_vals: jnp.ndarray(float32),
+          [batch_size, num_coarse_samples + num_fine_samples].
+        points: jnp.ndarray(float32),
+          [batch_size, num_coarse_samples + num_fine_samples, 3].
+    """
+    z_samples = piecewise_constant_pdf(key, bins, weights, num_samples,
+                                       randomized)
+    # Compute united z_vals and sample points
+    z_vals = jnp.sort(jnp.concatenate([z_vals, z_samples], axis=-1), axis=-1)
+    coords = cast_rays(z_vals, origins, directions)
+    return z_vals, coords
+def add_gaussian_noise(key, raw, noise_std, randomized):
+    """
+    Adds gaussian noise to `raw`, which can used to regularize it.
+    Args:
+        key: jnp.ndarray(float32), [2,], random number generator.
+        raw: jnp.ndarray(float32), arbitrary shape.
+        noise_std: float, The standard deviation of the noise to be added.
+        randomized: bool, add noise if randomized is True.
+    Returns:
+        raw + noise: jnp.ndarray(float32), with the same shape as `raw`.
+    """
+    if (noise_std is not None) and randomized:
+        return raw + random.normal(key, raw.shape, dtype=raw.dtype) * noise_std
+    else:
+        return raw

jaxnerf/nerf/models.py ADDED Viewed

	@@ -0,0 +1,256 @@

+# coding=utf-8
+# Copyright 2021 The Google Research Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# Lint as: python3
+"""Different model implementation plus a general port for all the models."""
+from typing import Any, Callable
+from flax import linen as nn
+from jax import random
+import jax.numpy as jnp
+from jaxnerf.nerf import model_utils
+from jaxnerf.nerf import utils
+def get_model(key, example_batch, args):
+    """A helper function that wraps around a 'model zoo'."""
+    model_dict = {"nerf": construct_nerf}
+    return model_dict[args.model](key, example_batch, args)
+class NerfModel(nn.Module):
+    """Nerf NN Model with both coarse and fine MLPs."""
+    num_coarse_samples: int  # The number of samples for the coarse nerf.
+    num_fine_samples: int  # The number of samples for the fine nerf.
+    use_viewdirs: bool  # If True, use viewdirs as an input.
+    near: float  # The distance to the near plane
+    far: float  # The distance to the far plane
+    noise_std: float  # The std dev of noise added to raw sigma.
+    net_depth: int  # The depth of the first part of MLP.
+    net_width: int  # The width of the first part of MLP.
+    net_depth_condition: int  # The depth of the second part of MLP.
+    net_width_condition: int  # The width of the second part of MLP.
+    net_activation: Callable[..., Any]  # MLP activation
+    skip_layer: int  # How often to add skip connections.
+    num_rgb_channels: int  # The number of RGB channels.
+    num_sigma_channels: int  # The number of density channels.
+    white_bkgd: bool  # If True, use a white background.
+    min_deg_point: int  # The minimum degree of positional encoding for positions.
+    max_deg_point: int  # The maximum degree of positional encoding for positions.
+    deg_view: int  # The degree of positional encoding for viewdirs.
+    lindisp: bool  # If True, sample linearly in disparity rather than in depth.
+    rgb_activation: Callable[..., Any]  # Output RGB activation.
+    sigma_activation: Callable[..., Any]  # Output sigma activation.
+    legacy_posenc_order: bool  # Keep the same ordering as the original tf code.
+    @nn.compact
+    def __call__(self, rng_0, rng_1, rays, randomized):
+        """Nerf Model.
+        Args:
+          rng_0: jnp.ndarray, random number generator for coarse model sampling.
+          rng_1: jnp.ndarray, random number generator for fine model sampling.
+          rays: util.Rays, a namedtuple of ray origins, directions, and viewdirs.
+          randomized: bool, use randomized stratified sampling.
+        Returns:
+          ret: list, [(rgb_coarse, disp_coarse, acc_coarse), (rgb, disp, acc)]
+        """
+        # Stratified sampling along rays
+        key, rng_0 = random.split(rng_0)
+        z_vals, samples = model_utils.sample_along_rays(
+            key,
+            rays.origins,
+            rays.directions,
+            self.num_coarse_samples,
+            self.near,
+            self.far,
+            randomized,
+            self.lindisp,
+        )
+        samples_enc = model_utils.posenc(
+            samples,
+            self.min_deg_point,
+            self.max_deg_point,
+            self.legacy_posenc_order,
+        )
+        # Construct the "coarse" MLP.
+        coarse_mlp = model_utils.MLP(
+            net_depth=self.net_depth,
+            net_width=self.net_width,
+            net_depth_condition=self.net_depth_condition,
+            net_width_condition=self.net_width_condition,
+            net_activation=self.net_activation,
+            skip_layer=self.skip_layer,
+            num_rgb_channels=self.num_rgb_channels,
+            num_sigma_channels=self.num_sigma_channels)
+        # Point attribute predictions
+        if self.use_viewdirs:
+            viewdirs_enc = model_utils.posenc(
+                rays.viewdirs,
+                0,
+                self.deg_view,
+                self.legacy_posenc_order,
+            )
+            raw_rgb, raw_sigma = coarse_mlp(samples_enc, viewdirs_enc)
+        else:
+            viewdirs_enc = None
+            raw_rgb, raw_sigma = coarse_mlp(samples_enc)
+        # Add noises to regularize the density predictions if needed
+        key, rng_0 = random.split(rng_0)
+        raw_sigma = model_utils.add_gaussian_noise(
+            key,
+            raw_sigma,
+            self.noise_std,
+            randomized,
+        )
+        rgb = self.rgb_activation(raw_rgb)
+        sigma = self.sigma_activation(raw_sigma)
+        # Volumetric rendering.
+        comp_rgb, disp, acc, weights = model_utils.volumetric_rendering(
+            rgb,
+            sigma,
+            z_vals,
+            rays.directions,
+            white_bkgd=self.white_bkgd,
+        )
+        ret = [
+            (comp_rgb, disp, acc),
+        ]
+        # Hierarchical sampling based on coarse predictions
+        if self.num_fine_samples > 0:
+            z_vals_mid = .5 * (z_vals[..., 1:] + z_vals[..., :-1])
+            key, rng_1 = random.split(rng_1)
+            z_vals, samples = model_utils.sample_pdf(
+                key,
+                z_vals_mid,
+                weights[..., 1:-1],
+                rays.origins,
+                rays.directions,
+                z_vals,
+                self.num_fine_samples,
+                randomized,
+            )
+            samples_enc = model_utils.posenc(
+                samples,
+                self.min_deg_point,
+                self.max_deg_point,
+                self.legacy_posenc_order,
+            )
+            # Construct the "fine" MLP.
+            fine_mlp = model_utils.MLP(
+                net_depth=self.net_depth,
+                net_width=self.net_width,
+                net_depth_condition=self.net_depth_condition,
+                net_width_condition=self.net_width_condition,
+                net_activation=self.net_activation,
+                skip_layer=self.skip_layer,
+                num_rgb_channels=self.num_rgb_channels,
+                num_sigma_channels=self.num_sigma_channels)
+            if self.use_viewdirs:
+                raw_rgb, raw_sigma = fine_mlp(samples_enc, viewdirs_enc)
+            else:
+                raw_rgb, raw_sigma = fine_mlp(samples_enc)
+            key, rng_1 = random.split(rng_1)
+            raw_sigma = model_utils.add_gaussian_noise(
+                key,
+                raw_sigma,
+                self.noise_std,
+                randomized,
+            )
+            rgb = self.rgb_activation(raw_rgb)
+            sigma = self.sigma_activation(raw_sigma)
+            comp_rgb, disp, acc, unused_weights = model_utils.volumetric_rendering(
+                rgb,
+                sigma,
+                z_vals,
+                rays.directions,
+                white_bkgd=self.white_bkgd,
+            )
+            ret.append((comp_rgb, disp, acc))
+        return ret
+def construct_nerf(key, example_batch, args):
+    """Construct a Neural Radiance Field.
+  Args:
+    key: jnp.ndarray. Random number generator.
+    example_batch: dict, an example of a batch of data.
+    args: FLAGS class. Hyperparameters of nerf.
+  Returns:
+    model: nn.Model. Nerf model with parameters.
+    state: flax.Module.state. Nerf model state for stateful parameters.
+  """
+    net_activation = getattr(nn, str(args.net_activation))
+    rgb_activation = getattr(nn, str(args.rgb_activation))
+    sigma_activation = getattr(nn, str(args.sigma_activation))
+    # Assert that rgb_activation always produces outputs in [0, 1], and
+    # sigma_activation always produce non-negative outputs.
+    x = jnp.exp(jnp.linspace(-90, 90, 1024))
+    x = jnp.concatenate([-x[::-1], x], 0)
+    rgb = rgb_activation(x)
+    if jnp.any(rgb < 0) or jnp.any(rgb > 1):
+        raise NotImplementedError(
+            "Choice of rgb_activation `{}` produces colors outside of [0, 1]"
+                .format(args.rgb_activation))
+    sigma = sigma_activation(x)
+    if jnp.any(sigma < 0):
+        raise NotImplementedError(
+            "Choice of sigma_activation `{}` produces negative densities".format(
+                args.sigma_activation))
+    model = NerfModel(
+        min_deg_point=args.min_deg_point,
+        max_deg_point=args.max_deg_point,
+        deg_view=args.deg_view,
+        num_coarse_samples=args.num_coarse_samples,
+        num_fine_samples=args.num_fine_samples,
+        use_viewdirs=args.use_viewdirs,
+        near=args.near,
+        far=args.far,
+        noise_std=args.noise_std,
+        white_bkgd=args.white_bkgd,
+        net_depth=args.net_depth,
+        net_width=args.net_width,
+        net_depth_condition=args.net_depth_condition,
+        net_width_condition=args.net_width_condition,
+        skip_layer=args.skip_layer,
+        num_rgb_channels=args.num_rgb_channels,
+        num_sigma_channels=args.num_sigma_channels,
+        lindisp=args.lindisp,
+        net_activation=net_activation,
+        rgb_activation=rgb_activation,
+        sigma_activation=sigma_activation,
+        legacy_posenc_order=args.legacy_posenc_order)
+    rays = example_batch["rays"]
+    key1, key2, key3 = random.split(key, num=3)
+    init_variables = model.init(
+        key1,
+        rng_0=key2,
+        rng_1=key3,
+        rays=utils.namedtuple_map(lambda x: x[0], rays),
+        randomized=args.randomized)
+    return model, init_variables

jaxnerf/nerf/precompute.py ADDED Viewed

	@@ -0,0 +1,59 @@

+"""
+command line example:
+$ python -i -m jaxnerf.nerf.precompute --data_dir {path-to-data-dir} --split train \
+                                       --dataset blender --factor 4 --dtype float16
+"""
+import os
+import argparse
+from typing import Optional
+import jax.numpy as np
+from jaxnerf.nerf import utils
+from jaxnerf.nerf import clip_utils
+from jaxnerf.nerf import datasets
+def precompute_image_features(data_dir: str, split: str, dataset: str, factor: int, dtype: str,
+                              model_name: Optional[str], render_path: Optional[str]):
+    if dataset == "blender":
+        if render_path:
+            raise ValueError("render_path cannot be used for the blender dataset.")
+        # image in numpy.ndarray
+        _, images, _ = datasets.Blender.load_files(data_dir, split, factor)
+        clip_model = clip_utils.init_CLIP(dtype, model_name)
+        # CLIP output in jax.numpy.ndarray
+        images = np.stack(images).transpose(0, 3, 1, 2)
+        images = images[:, :3, :, :]
+        images = clip_utils.preprocess_for_CLIP(images)
+        embeddings = clip_model.get_image_features(pixel_values=images)
+        embeddings /= np.linalg.norm(embeddings, axis=-1, keepdims=True)
+        print(f'completed precomputing CLIP embeddings: ({embeddings.shape[0]} images)')
+        # write as pickle
+        write_path = os.path.join(data_dir, f'clip_cache_{split}_factor{factor}_{dtype}.pkl')
+        utils.write_pickle(embeddings, write_path)
+        print(f'precompute written as pickle: {write_path}')
+    elif dataset == "llff":
+        raise NotImplementedError
+    else:
+        raise ValueError(f"invalid dataset: {dataset}")
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--data_dir", type=str, required=True)
+    parser.add_argument("--split", type=str, required=True, help="train/val/test")
+    parser.add_argument("--dataset", type=str, required=True)
+    parser.add_argument("--factor", type=int, required=True,
+                        help="downsampling factor: 0/2/4")
+    parser.add_argument("--dtype", type=str, required=True,
+                        help="float32/float16 (float16 is used to save memory)")
+    parser.add_argument("--model_name", type=str, required=False, default=None)
+    parser.add_argument("--render_path", type=str, required=False, default=None)
+    args = parser.parse_args()
+    precompute_image_features(args.data_dir, args.split, args.dataset, args.factor,
+                              args.dtype, args.model_name, args.render_path)

jaxnerf/nerf/utils.py ADDED Viewed

	@@ -0,0 +1,457 @@

+# coding=utf-8
+# Copyright 2021 The Google Research Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# Lint as: python3
+"""Utility functions."""
+import collections
+import os
+from os import path
+import pickle
+from absl import flags
+import flax
+import jax
+import jax.numpy as jnp
+import jax.scipy as jsp
+import numpy as np
+from PIL import Image
+import yaml
+from jaxnerf.nerf import datasets
+BASE_DIR = "jaxnerf"
+INTERNAL = False
+@flax.struct.dataclass
+class TrainState:
+    optimizer: flax.optim.Optimizer
+@flax.struct.dataclass
+class Stats:
+    loss: float
+    psnr: float
+    loss_c: float
+    psnr_c: float
+    weight_l2: float
+Rays = collections.namedtuple("Rays", ("origins", "directions", "viewdirs"))
+def namedtuple_map(fn, tup):
+    """Apply `fn` to each element of `tup` and cast to `tup`'s namedtuple."""
+    return type(tup)(*map(fn, tup))
+def define_flags():
+    """Define flags for both training and evaluation modes."""
+    flags.DEFINE_string("train_dir", None, "where to store ckpts and logs")
+    flags.DEFINE_string("data_dir", None, "input data directory.")
+    flags.DEFINE_string("config", None,
+                        "using config files to set hyperparameters.")
+    # CLIP part Flags
+    flags.DEFINE_bool("use_semantic_loss", True,
+                      "whether use semantic loss or not")
+    flags.DEFINE_string("precompute_pkl_path", None,
+                        "where to load the pickle file that precompute image features")
+    flags.DEFINE_string("clip_model_name", "openai/clip-vit-base-patch32", "model type for CLIP")
+    flags.DEFINE_string("clip_output_dtype", "float32",
+                        "float32/ float16 (float16 for memory saving)")
+    flags.DEFINE_integer("sc_loss_factor", 4,
+                         "factor for downsampling image (0/2/4). "
+                         "its compounded on top of another flag: factor")
+    flags.DEFINE_integer("sc_loss_every", 16,
+                         "no. of steps to take before performing semantic loss evaluation")
+    flags.DEFINE_float("sc_loss_mult", 10.,
+                       "weighting for semantic loss from CLIP")
+    # Dataset Flags
+    # TODO(pratuls): rename to dataset_loader and consider cleaning up
+    flags.DEFINE_enum("dataset", "blender",
+                      list(k for k in datasets.dataset_dict.keys()),
+                      "The type of dataset feed to nerf.")
+    flags.DEFINE_enum(
+        "batching", "single_image", ["single_image", "all_images"],
+        "source of ray sampling when collecting training batch,"
+        "single_image for sampling from only one image in a batch,"
+        "all_images for sampling from all the training images.")
+    flags.DEFINE_bool(
+        "white_bkgd", True, "using white color as default background."
+                            "(used in the blender dataset only)")
+    flags.DEFINE_integer("batch_size", 1024,
+                         "the number of rays in a mini-batch (for training).")
+    flags.DEFINE_integer("factor", 4,
+                         "the downsample factor of images, 0 for no downsample.")
+    flags.DEFINE_bool("spherify", False, "set for spherical 360 scenes.")
+    flags.DEFINE_bool(
+        "render_path", False, "render generated path if set true."
+                              "(used in the llff dataset only)")
+    flags.DEFINE_integer(
+        "llffhold", 8, "will take every 1/N images as LLFF test set."
+                       "(used in the llff dataset only)")
+    flags.DEFINE_bool(
+        "use_pixel_centers", False,
+        "If True, generate rays through the center of each pixel. Note: While "
+        "this is the correct way to handle rays, it is not the way rays are "
+        "handled in the original NeRF paper. Setting this TRUE yields ~ +1 PSNR "
+        "compared to Vanilla NeRF.")
+    # Model Flags
+    flags.DEFINE_string("model", "nerf", "name of model to use.")
+    flags.DEFINE_float("near", 2., "near clip of volumetric rendering.")
+    flags.DEFINE_float("far", 6., "far clip of volumentric rendering.")
+    flags.DEFINE_integer("net_depth", 8, "depth of the first part of MLP.")
+    flags.DEFINE_integer("net_width", 256, "width of the first part of MLP.")
+    flags.DEFINE_integer("net_depth_condition", 1,
+                         "depth of the second part of MLP.")
+    flags.DEFINE_integer("net_width_condition", 128,
+                         "width of the second part of MLP.")
+    flags.DEFINE_float("weight_decay_mult", 0, "The multiplier on weight decay")
+    flags.DEFINE_integer(
+        "skip_layer", 4, "add a skip connection to the output vector of every"
+                         "skip_layer layers.")
+    flags.DEFINE_integer("num_rgb_channels", 3, "the number of RGB channels.")
+    flags.DEFINE_integer("num_sigma_channels", 1,
+                         "the number of density channels.")
+    flags.DEFINE_bool("randomized", True, "use randomized stratified sampling.")
+    flags.DEFINE_integer("min_deg_point", 0,
+                         "Minimum degree of positional encoding for points.")
+    flags.DEFINE_integer("max_deg_point", 10,
+                         "Maximum degree of positional encoding for points.")
+    flags.DEFINE_integer("deg_view", 4,
+                         "Degree of positional encoding for viewdirs.")
+    flags.DEFINE_integer(
+        "num_coarse_samples", 64,
+        "the number of samples on each ray for the coarse model.")
+    flags.DEFINE_integer("num_fine_samples", 128,
+                         "the number of samples on each ray for the fine model.")
+    flags.DEFINE_bool("use_viewdirs", True, "use view directions as a condition.")
+    flags.DEFINE_float(
+        "noise_std", None, "std dev of noise added to regularize sigma output."
+                           "(used in the llff dataset only)")
+    flags.DEFINE_bool("lindisp", False,
+                      "sampling linearly in disparity rather than depth.")
+    flags.DEFINE_string("net_activation", "relu",
+                        "activation function used within the MLP.")
+    flags.DEFINE_string("rgb_activation", "sigmoid",
+                        "activation function used to produce RGB.")
+    flags.DEFINE_string("sigma_activation", "relu",
+                        "activation function used to produce density.")
+    flags.DEFINE_bool(
+        "legacy_posenc_order", False,
+        "If True, revert the positional encoding feature order to an older version of this codebase."
+    )
+    # Train Flags
+    flags.DEFINE_float("lr_init", 5e-4, "The initial learning rate.")
+    flags.DEFINE_float("lr_final", 5e-6, "The final learning rate.")
+    flags.DEFINE_integer(
+        "lr_delay_steps", 0, "The number of steps at the beginning of "
+                             "training to reduce the learning rate by lr_delay_mult")
+    flags.DEFINE_float(
+        "lr_delay_mult", 1., "A multiplier on the learning rate when the step "
+                             "is < lr_delay_steps")
+    flags.DEFINE_float("grad_max_norm", 0.,
+                       "The gradient clipping magnitude (disabled if == 0).")
+    flags.DEFINE_float("grad_max_val", 0.,
+                       "The gradient clipping value (disabled if == 0).")
+    flags.DEFINE_integer("max_steps", 1000000,
+                         "the number of optimization steps.")
+    flags.DEFINE_integer("save_every", 10000,
+                         "the number of steps to save a checkpoint.")
+    flags.DEFINE_integer("print_every", 100,
+                         "the number of steps between reports to tensorboard.")
+    flags.DEFINE_integer(
+        "render_every", 5000, "the number of steps to render a test image,"
+                              "better to be x00 for accurate step time record.")
+    flags.DEFINE_integer("gc_every", 10000,
+                         "the number of steps to run python garbage collection.")
+    flags.DEFINE_integer("few_shot", -1,
+                         "the number of images.")
+    # Eval Flags
+    flags.DEFINE_bool(
+        "eval_once", True,
+        "evaluate the model only once if true, otherwise keeping evaluating new"
+        "checkpoints if there's any.")
+    flags.DEFINE_bool("save_output", True,
+                      "save predicted images to disk if True.")
+    flags.DEFINE_integer(
+        "chunk", 8192,
+        "the size of chunks for evaluation inferences, set to the value that"
+        "fits your GPU/TPU memory.")
+def update_flags(args):
+    """Update the flags in `args` with the contents of the config YAML file."""
+    pth = path.join(BASE_DIR, args.config + ".yaml")
+    with open_file(pth, "r") as fin:
+        configs = yaml.load(fin, Loader=yaml.FullLoader)
+    # Only allow args to be updated if they already exist.
+    invalid_args = list(set(configs.keys()) - set(dir(args)))
+    if invalid_args:
+        raise ValueError(f"Invalid args {invalid_args} in {pth}.")
+    args.__dict__.update(configs)
+def open_file(pth, mode="r"):
+    if not INTERNAL:
+        return open(pth, mode=mode)
+def file_exists(pth):
+    if not INTERNAL:
+        return path.exists(pth)
+def listdir(pth):
+    if not INTERNAL:
+        return os.listdir(pth)
+def isdir(pth):
+    if not INTERNAL:
+        return path.isdir(pth)
+def makedirs(pth):
+    if not INTERNAL:
+        os.makedirs(pth)
+def render_image(render_fn, rays, rng, normalize_disp, chunk=8192):
+    """Render all the pixels of an image (in test mode).
+    Args:
+        render_fn: function, jit-ed render function.
+        rays: a `Rays` namedtuple, the rays to be rendered.
+        rng: jnp.ndarray, random number generator (used in training mode only).
+        normalize_disp: bool, if true then normalize `disp` to [0, 1].
+        chunk: int, the size of chunks to render sequentially.
+    Returns:
+        rgb: jnp.ndarray, rendered color image.
+        disp: jnp.ndarray, rendered disparity image.
+        acc: jnp.ndarray, rendered accumulated weights per pixel.
+    """
+    height, width = rays[0].shape[:2]
+    num_rays = height * width
+    rays = namedtuple_map(lambda r: r.reshape((num_rays, -1)), rays)
+    unused_rng, key_0, key_1 = jax.random.split(rng, 3)
+    host_id = jax.host_id()
+    results = []
+    for i in range(0, num_rays, chunk):
+        # pylint: disable=cell-var-from-loop
+        chunk_rays = namedtuple_map(lambda r: r[i:i + chunk], rays)
+        chunk_size = chunk_rays[0].shape[0]
+        rays_remaining = chunk_size % jax.device_count()
+        if rays_remaining != 0:
+            padding = jax.device_count() - rays_remaining
+            chunk_rays = namedtuple_map(
+                lambda r: jnp.pad(r, ((0, padding), (0, 0)), mode="edge"), chunk_rays)
+        else:
+            padding = 0
+        # After padding the number of chunk_rays is always divisible by
+        # host_count.
+        rays_per_host = chunk_rays[0].shape[0] // jax.process_count()
+        start, stop = host_id * rays_per_host, (host_id + 1) * rays_per_host
+        chunk_rays = namedtuple_map(lambda r: shard(r[start:stop]), chunk_rays)
+        chunk_results = render_fn(key_0, key_1, chunk_rays)[-1]
+        results.append([unshard(x[0], padding) for x in chunk_results])
+        # pylint: enable=cell-var-from-loop
+    rgb, disp, acc = [jnp.concatenate(r, axis=0) for r in zip(*results)]
+    # Normalize disp for visualization for ndc_rays in llff front-facing scenes.
+    if normalize_disp:
+        disp = (disp - disp.min()) / (disp.max() - disp.min())
+    return (rgb.reshape((height, width, -1)), disp.reshape(
+        (height, width, -1)), acc.reshape((height, width, -1)))
+def compute_psnr(mse):
+    """Compute psnr value given mse (we assume the maximum pixel value is 1).
+    Args:
+        mse: float, mean square error of pixels.
+    Returns:
+        psnr: float, the psnr value.
+    """
+    return -10. * jnp.log(mse) / jnp.log(10.)
+def compute_ssim(img0,
+                 img1,
+                 max_val,
+                 filter_size=11,
+                 filter_sigma=1.5,
+                 k1=0.01,
+                 k2=0.03,
+                 return_map=False):
+    """Computes SSIM from two images.
+    This function was modeled after tf.image.ssim, and should produce comparable
+    output.
+    Args:
+        img0: array. An image of size [..., width, height, num_channels].
+        img1: array. An image of size [..., width, height, num_channels].
+        max_val: float > 0. The maximum magnitude that `img0` or `img1` can have.
+        filter_size: int >= 1. Window size.
+        filter_sigma: float > 0. The bandwidth of the Gaussian used for filtering.
+        k1: float > 0. One of the SSIM dampening parameters.
+        k2: float > 0. One of the SSIM dampening parameters.
+        return_map: Bool. If True, will cause the per-pixel SSIM "map" to returned
+    Returns:
+        Each image's mean SSIM, or a tensor of individual values if `return_map`.
+    """
+    # Construct a 1D Gaussian blur filter.
+    hw = filter_size // 2
+    shift = (2 * hw - filter_size + 1) / 2
+    f_i = ((jnp.arange(filter_size) - hw + shift) / filter_sigma) ** 2
+    filt = jnp.exp(-0.5 * f_i)
+    filt /= jnp.sum(filt)
+    # Blur in x and y (faster than the 2D convolution).
+    filt_fn1 = lambda z: jsp.signal.convolve2d(z, filt[:, None], mode="valid")
+    filt_fn2 = lambda z: jsp.signal.convolve2d(z, filt[None, :], mode="valid")
+    # Vmap the blurs to the tensor size, and then compose them.
+    num_dims = len(img0.shape)
+    map_axes = tuple(list(range(num_dims - 3)) + [num_dims - 1])
+    for d in map_axes:
+        filt_fn1 = jax.vmap(filt_fn1, in_axes=d, out_axes=d)
+        filt_fn2 = jax.vmap(filt_fn2, in_axes=d, out_axes=d)
+    filt_fn = lambda z: filt_fn1(filt_fn2(z))
+    mu0 = filt_fn(img0)
+    mu1 = filt_fn(img1)
+    mu00 = mu0 * mu0
+    mu11 = mu1 * mu1
+    mu01 = mu0 * mu1
+    sigma00 = filt_fn(img0 ** 2) - mu00
+    sigma11 = filt_fn(img1 ** 2) - mu11
+    sigma01 = filt_fn(img0 * img1) - mu01
+    # Clip the variances and covariances to valid values.
+    # Variance must be non-negative:
+    sigma00 = jnp.maximum(0., sigma00)
+    sigma11 = jnp.maximum(0., sigma11)
+    sigma01 = jnp.sign(sigma01) * jnp.minimum(
+        jnp.sqrt(sigma00 * sigma11), jnp.abs(sigma01))
+    c1 = (k1 * max_val) ** 2
+    c2 = (k2 * max_val) ** 2
+    numer = (2 * mu01 + c1) * (2 * sigma01 + c2)
+    denom = (mu00 + mu11 + c1) * (sigma00 + sigma11 + c2)
+    ssim_map = numer / denom
+    ssim = jnp.mean(ssim_map, list(range(num_dims - 3, num_dims)))
+    return ssim_map if return_map else ssim
+def save_img(img, pth):
+    """Save an image to disk.
+    Args:
+        img: jnp.ndarry, [height, width, channels], img will be clipped to [0, 1]
+            before saved to pth.
+        pth: string, path to save the image to.
+    """
+    with open_file(pth, "wb") as imgout:
+        Image.fromarray(np.array(
+            (np.clip(img, 0., 1.) * 255.).astype(jnp.uint8))).save(imgout, "PNG")
+def learning_rate_decay(step,
+                        lr_init,
+                        lr_final,
+                        max_steps,
+                        lr_delay_steps=0,
+                        lr_delay_mult=1):
+    """Continuous learning rate decay function.
+    The returned rate is lr_init when step=0 and lr_final when step=max_steps, and
+    is log-linearly interpolated elsewhere (equivalent to exponential decay).
+    If lr_delay_steps>0 then the learning rate will be scaled by some smooth
+    function of lr_delay_mult, such that the initial learning rate is
+    lr_init*lr_delay_mult at the beginning of optimization but will be eased back
+    to the normal learning rate when steps>lr_delay_steps.
+    Args:
+        step: int, the current optimization step.
+        lr_init: float, the initial learning rate.
+        lr_final: float, the final learning rate.
+        max_steps: int, the number of steps during optimization.
+        lr_delay_steps: int, the number of steps to delay the full learning rate.
+        lr_delay_mult: float, the multiplier on the rate when delaying it.
+    Returns:
+        lr: the learning for current step 'step'.
+    """
+    if lr_delay_steps > 0:
+        # A kind of reverse cosine decay.
+        delay_rate = lr_delay_mult + (1 - lr_delay_mult) * np.sin(
+            0.5 * np.pi * np.clip(step / lr_delay_steps, 0, 1))
+    else:
+        delay_rate = 1.
+    t = np.clip(step / max_steps, 0, 1)
+    log_lerp = np.exp(np.log(lr_init) * (1 - t) + np.log(lr_final) * t)
+    return delay_rate * log_lerp
+def shard(xs):
+    """Split data into shards for multiple devices along the first dimension."""
+    '''
+    if 'embedding' in xs:
+        xs['pixels'] = jax.tree_map(lambda x: x.reshape((jax.local_device_count(), -1) + x.shape[1:]), xs['pixels'])
+        xs['rays'] = jax.tree_map(lambda x: x.reshape((jax.local_device_count(), -1) + x.shape[1:]), xs['rays'])
+        xs['embedding'] = np.stack([xs['embedding']]*jax.local_device_count(),0)
+        xs['random_rays'] = jax.tree_map(lambda x: np.stack([x]*jax.local_device_count(),0), xs['random_rays'])
+    else:
+        xs = jax.tree_map(
+        lambda x: x.reshape((jax.local_device_count(), -1) + x.shape[1:]) if len(x.shape) != 0 else x
+        , xs)
+    return xs
+    '''
+    return jax.tree_map(
+        lambda x: x.reshape((jax.local_device_count(), -1) + x.shape[1:]) if len(x.shape) != 0 else x
+        , xs)
+def to_device(xs):
+    """Transfer data to devices (GPU/TPU)."""
+    return jax.tree_map(jnp.array, xs)
+def unshard(x, padding=0):
+    """Collect the sharded tensor to the shape before sharding."""
+    y = x.reshape([x.shape[0] * x.shape[1]] + list(x.shape[2:]))
+    if padding > 0:
+        y = y[:-padding]
+    return y
+def write_pickle(data, fn):
+    with open(fn, 'wb') as f:
+        pickle.dump(data, f)
+    return None
+def read_pickle(fn):
+    with open(fn, 'rb') as f:
+        data = pickle.load(f)
+    return data

jaxnerf/requirements.txt ADDED Viewed

	@@ -0,0 +1,14 @@

+numpy>=1.16.4
+jax>=0.2.6
+jaxlib>=0.1.57
+flax>=0.2.2
+opencv-python>=4.4.0
+Pillow>=7.2.0
+pyyaml>=5.3.1
+tensorboard>=2.4.0
+tensorflow>=2.3.1
+tensorflow-hub>=0.11.0
+transformers==4.8.2
+wandb==0.10.33
+tqdm==4.61.2
+# pip install git+https://github.com/deepmind/jmp  # mixed precision for JAX

jaxnerf/run.sh ADDED Viewed

	@@ -0,0 +1,33 @@

+# Copyright 2021 The Google Research Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#!/bin/bash
+set -e
+set -x
+virtualenv -p python3 .
+source ./bin/activate
+pip install -r jaxnerf/requirements.txt
+pip uninstall jax
+pip install --upgrade pip
+pip install "jax[tpu]>=0.2.16" -f https://storage.googleapis.com/jax-releases/libtpu_releases.html
+python -m jaxnerf.train \
+  --data_dir=/mnt/data/NeRF_Data/nerf_synthetic/lego \
+  --train_dir=test_output \
+  --max_steps=5 \
+  --factor=2 \
+  --batch_size=512 \
+  --config=configs/orig_nerf_tpu_vm_test \
+  --precompute_pkl_path /mnt/data/NeRF_Data/nerf_synthetic/lego/clip_cache_train_factor4_float32.pkl

jaxnerf/train.py ADDED Viewed

	@@ -0,0 +1,326 @@

+# coding=utf-8
+# Copyright 2021 The Google Research Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# Lint as: python3
+"""Training script for Nerf."""
+import functools
+import gc
+import time
+from absl import app
+from absl import flags
+import flax
+from flax.metrics import tensorboard
+from flax.training import checkpoints
+import jax
+from jax import config
+from jax import random
+import jax.numpy as jnp
+import numpy as np
+# import wandb
+from tqdm import tqdm
+from jaxnerf.nerf import datasets
+from jaxnerf.nerf import models
+from jaxnerf.nerf import utils
+from jaxnerf.nerf import clip_utils
+FLAGS = flags.FLAGS
+utils.define_flags()
+config.parse_flags_with_absl()
+# set up TPU for colab
+import os
+if "COLAB_TPU_ADDR" in os.environ:
+    import jax.tools.colab_tpu
+    jax.tools.colab_tpu.setup_tpu()
+print(f"detected device: {jax.local_devices()}")
+def train_step(model, clip_model, rng, state, batch, lr, step, K):#, clip_grad):
+    # TODO make clip_grad input enable
+    """One optimization step.
+    Args:
+        model: The linen model.
+        rng: jnp.ndarray, random number generator.
+        state: utils.TrainState, state of the model/optimizer.
+        batch: dict, a mini-batch of data for training.
+        lr: float, real-time learning rate.
+    Returns:
+        new_state: utils.TrainState, new training state.
+        stats: list. [(loss, psnr), (loss_coarse, psnr_coarse)].
+        rng: jnp.ndarray, updated random number generator.
+    """
+    rng, key_0, key_1 = random.split(rng, 3)
+    def loss_fn(variables):
+        rays = batch["rays"]
+        ret = model.apply(variables, key_0, key_1, rays, FLAGS.randomized)
+        if len(ret) not in (1, 2):
+            raise ValueError(
+                "ret should contain either 1 set of output (coarse only), or 2 sets"
+                "of output (coarse as ret[0] and fine as ret[1]).")
+        # The main prediction is always at the end of the ret list.
+        rgb, unused_disp, unused_acc = ret[-1]
+        loss = ((rgb - batch["pixels"][Ellipsis, :3]) ** 2).mean()
+        psnr = utils.compute_psnr(loss)
+        if len(ret) > 1:
+            # If there are both coarse and fine predictions, we compute the loss for
+            # the coarse prediction (ret[0]) as well.
+            rgb_c, unused_disp_c, unused_acc_c = ret[0]
+            loss_c = ((rgb_c - batch["pixels"][Ellipsis, :3]) ** 2).mean()
+            psnr_c = utils.compute_psnr(loss_c)
+        else:
+            loss_c = 0.
+            psnr_c = 0.
+        def tree_sum_fn(fn):
+            return jax.tree_util.tree_reduce(lambda x, y: x + fn(y),
+                                             variables, initializer=0)
+        weight_l2 = (tree_sum_fn(lambda z: jnp.sum(z ** 2)) /
+                     tree_sum_fn(lambda z: jnp.prod(jnp.array(z.shape))))
+        total_loss = loss + loss_c + FLAGS.weight_decay_mult * weight_l2
+        stats = utils.Stats(loss=loss, psnr=psnr, loss_c=loss_c,
+                            psnr_c=psnr_c, weight_l2=weight_l2)
+        return total_loss, stats
+    (_, stats), grad = (
+        jax.value_and_grad(loss_fn, has_aux=True)(state.optimizer.target))
+    grad = jax.lax.pmean(grad, axis_name="batch")
+    stats = jax.lax.pmean(stats, axis_name="batch")
+    # Clip the gradient by value.
+    if FLAGS.grad_max_val > 0:
+        clip_fn = lambda z: jnp.clip(z, -FLAGS.grad_max_val, FLAGS.grad_max_val)
+        grad = jax.tree_util.tree_map(clip_fn, grad)
+    # Clip the (possibly value-clipped) gradient by norm.
+    if FLAGS.grad_max_norm > 0:
+        grad_norm = jnp.sqrt(
+            jax.tree_util.tree_reduce(
+                lambda x, y: x + jnp.sum(y ** 2), grad, initializer=0))
+        mult = jnp.minimum(1, FLAGS.grad_max_norm / (1e-7 + grad_norm))
+        grad = jax.tree_util.tree_map(lambda z: mult * z, grad)
+    #return grad, state, rng
+    new_optimizer = state.optimizer.apply_gradient(grad, learning_rate =lr)
+    new_state = state.replace(optimizer=new_optimizer)
+    return new_state, stats, rng
+def update_step(state, grad, lr):
+    new_optimizer = state.optimizer.apply_gradient(grad, learning_rate=lr)
+    new_state = state.replace(optimizer=new_optimizer)
+    return new_state
+def main(unused_argv):
+    #wandb.init(project="hf-flax-clip-nerf", entity="wandb", sync_tensorboard=True)
+    rng = random.PRNGKey(20200823)
+    # Shift the numpy random seed by host_id() to shuffle data loaded by different
+    # hosts.
+    np.random.seed(20201473 + jax.host_id())
+    if FLAGS.config is not None:
+        utils.update_flags(FLAGS)
+    if FLAGS.batch_size % jax.device_count() != 0:
+        raise ValueError("Batch size must be divisible by the number of devices.")
+    if FLAGS.train_dir is None:
+        raise ValueError("train_dir must be set. None set now.")
+    if FLAGS.data_dir is None:
+        raise ValueError("data_dir must be set. None set now.")
+    # setup CLIP model
+    if FLAGS.use_semantic_loss:
+        clip_model = clip_utils.init_CLIP(FLAGS.clip_output_dtype,
+                                          FLAGS.clip_model_name)
+        print('semantic loss ACTIVATED, CLIP is set up')
+    else:
+        clip_model = None
+        print('semantic loss DEACTIVATED, CLIP is set to None')
+    dataset = datasets.get_dataset("train", FLAGS, clip_model)
+    test_dataset = datasets.get_dataset("test", FLAGS, clip_model)
+    # setup NeRF model
+    rng, key = random.split(rng)
+    model, variables = models.get_model(key, dataset.peek(), FLAGS)
+    optimizer = flax.optim.Adam(FLAGS.lr_init).create(variables)
+    state = utils.TrainState(optimizer=optimizer)
+    del optimizer, variables
+    learning_rate_fn = functools.partial(
+        utils.learning_rate_decay,
+        lr_init=FLAGS.lr_init,
+        lr_final=FLAGS.lr_final,
+        max_steps=FLAGS.max_steps,
+        lr_delay_steps=FLAGS.lr_delay_steps,
+        lr_delay_mult=FLAGS.lr_delay_mult)
+    train_pstep = jax.pmap(
+        functools.partial(train_step, model, clip_model),
+        axis_name="batch",
+        in_axes=(0, 0, 0, None, None, None),
+        donate_argnums=(2,))
+    update_pstep = jax.pmap(
+        functools.partial(update_step,),
+        axis_name="batch",
+        in_axes=(0, None, None),
+        donate_argnums=(0,))
+    def render_fn(variables, key_0, key_1, rays):
+        return jax.lax.all_gather(
+            model.apply(variables, key_0, key_1, rays, FLAGS.randomized),
+            axis_name="batch")
+    render_pfn = jax.pmap(
+        render_fn,
+        in_axes=(None, None, None, 0),  # Only distribute the data input.
+        donate_argnums=(3,),
+        axis_name="batch")
+    # Compiling to the CPU because it's faster and more accurate.
+    ssim_fn = jax.jit(
+        functools.partial(utils.compute_ssim, max_val=1.), backend="cpu")
+    if not utils.isdir(FLAGS.train_dir):
+        utils.makedirs(FLAGS.train_dir)
+    state = checkpoints.restore_checkpoint(FLAGS.train_dir, state)
+    # Resume training a the step of the last checkpoint.
+    init_step = state.optimizer.state.step + 1
+    # for distributive training
+    state = flax.jax_utils.replicate(state)
+    if jax.host_id() == 0:
+        summary_writer = tensorboard.SummaryWriter(FLAGS.train_dir)
+    # Prefetch_buffer_size = 3 x batch_size
+    pdataset = flax.jax_utils.prefetch_to_device(dataset, 3)
+    n_local_devices = jax.local_device_count()
+    rng = rng + jax.host_id()  # Make random seed separate across hosts.
+    keys = random.split(rng, n_local_devices)  # For pmapping RNG keys.
+    gc.disable()  # Disable automatic garbage collection for efficiency.
+    stats_trace = []
+    reset_timer = True
+    # for semantic loss update
+    cnter = 1
+    trigger = int(FLAGS.sc_loss_every / n_local_devices)
+    for step, batch in tqdm(zip(range(init_step, FLAGS.max_steps + 1), pdataset)):
+        if reset_timer:
+            t_loop_start = time.time()
+            reset_timer = False
+        lr = learning_rate_fn(step)
+        if step%FLAGS.sc_loss_every == 0 and FLAGS.use_semantic_loss:
+            # remove dimension for device coz its only run in host core
+            sc_batch = dataset.get_clip_data()
+            sc_loss, sc_grad = clip_utils.update_semantic_loss(model, clip_model,
+                                                               keys[0], state, sc_batch, lr)
+            sc_grad = flax.jax_utils.replicate(sc_grad)
+            sc_grad = jax.tree_map( lambda x: x[0], sc_grad)
+        else:
+            sc_loss = 0.
+        state, stats, keys = train_pstep(keys, state, batch, lr, step, FLAGS.sc_loss_every)#, grad)
+        if step%FLAGS.sc_loss_every == 0 and FLAGS.use_semantic_loss:
+            state = update_pstep(state, sc_grad, lr)
+        if jax.host_id() == 0:
+            stats_trace.append(stats)
+        if step % FLAGS.gc_every == 0:
+            gc.collect()
+        # Log training summaries. This is put behind a host_id check because in
+        # multi-host evaluation, all hosts need to run inference even though we
+        # only use host 0 to record results.
+        if jax.host_id() == 0:
+            if step % FLAGS.print_every == 0:
+                summary_writer.scalar("train_loss", stats.loss[0], step)
+                summary_writer.scalar("train_psnr", stats.psnr[0], step)
+                summary_writer.scalar("train_loss_coarse", stats.loss_c[0], step)
+                summary_writer.scalar("train_psnr_coarse", stats.psnr_c[0], step)
+                summary_writer.scalar("weight_l2", stats.weight_l2[0], step)
+                avg_loss = np.mean(np.concatenate([s.loss for s in stats_trace]))
+                avg_psnr = np.mean(np.concatenate([s.psnr for s in stats_trace]))
+                stats_trace = []
+                summary_writer.scalar("train_avg_loss", avg_loss, step)
+                summary_writer.scalar("train_avg_psnr", avg_psnr, step)
+                summary_writer.scalar("learning_rate", lr, step)
+                steps_per_sec = FLAGS.print_every / (time.time() - t_loop_start)
+                reset_timer = True
+                rays_per_sec = FLAGS.batch_size * steps_per_sec
+                summary_writer.scalar("train_steps_per_sec", steps_per_sec, step)
+                summary_writer.scalar("train_rays_per_sec", rays_per_sec, step)
+                precision = int(np.ceil(np.log10(FLAGS.max_steps))) + 1
+                print(("{:" + "{:d}".format(precision) + "d}").format(step) +
+                      f"/{FLAGS.max_steps:d}: " + f"i_loss={stats.loss[0]:0.4f}, " +
+                      f"avg_loss={avg_loss:0.4f}, " +
+                      f"weight_l2={stats.weight_l2[0]:0.2e}, " +
+                    #   f"sc_loss={sc_loss:0.4f}, " +
+                      f"lr={lr:0.2e}, {rays_per_sec:0.0f} rays/sec")
+            if step % FLAGS.save_every == 0:
+                state_to_save = jax.device_get(jax.tree_map(lambda x: x[0], state))
+                checkpoints.save_checkpoint(
+                    FLAGS.train_dir, state_to_save, int(step), keep=100)
+        # Test-set evaluation.
+        if FLAGS.render_every > 0 and step % FLAGS.render_every == 0:
+            # We reuse the same random number generator from the optimization step
+            # here on purpose so that the visualization matches what happened in
+            # training.
+            t_eval_start = time.time()
+            eval_variables = jax.device_get(jax.tree_map(lambda x: x[0],
+                                                         state)).optimizer.target
+            test_case = next(test_dataset)
+            pred_color, pred_disp, pred_acc = utils.render_image(
+                functools.partial(render_pfn, eval_variables),
+                test_case["rays"],
+                keys[0],
+                FLAGS.dataset == "llff",
+                chunk=FLAGS.chunk)
+            # Log eval summaries on host 0.
+            if jax.host_id() == 0:
+                psnr = utils.compute_psnr(
+                    ((pred_color - test_case["pixels"]) ** 2).mean())
+                ssim = ssim_fn(pred_color, test_case["pixels"])
+                eval_time = time.time() - t_eval_start
+                num_rays = jnp.prod(jnp.array(test_case["rays"].directions.shape[:-1]))
+                rays_per_sec = num_rays / eval_time
+                summary_writer.scalar("test_rays_per_sec", rays_per_sec, step)
+                print(f"Eval {step}: {eval_time:0.3f}s., {rays_per_sec:0.0f} rays/sec")
+                summary_writer.scalar("test_psnr", psnr, step)
+                summary_writer.scalar("test_ssim", ssim, step)
+                summary_writer.image("test_pred_color", pred_color, step)
+                summary_writer.image("test_pred_disp", pred_disp, step)
+                summary_writer.image("test_pred_acc", pred_acc, step)
+                summary_writer.image("test_target", test_case["pixels"], step)
+    if FLAGS.max_steps % FLAGS.save_every != 0:
+        state = jax.device_get(jax.tree_map(lambda x: x[0], state))
+        checkpoints.save_checkpoint(
+            FLAGS.train_dir, state, int(FLAGS.max_steps), keep=100)
+if __name__ == "__main__":
+    app.run(main)

jaxnerf/train.sh ADDED Viewed

	@@ -0,0 +1,34 @@

+# Copyright 2021 The Google Research Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#!/bin/bash
+CONFIG=$1
+DATA_ROOT=$2
+ROOT_DIR=/tmp/jaxnerf/"$CONFIG"
+if [ $CONFIG == "llff" ]
+then
+  SCENES="room fern leaves fortress orchids flower trex horns"
+  DATA_FOLDER="nerf_llff_data"
+else
+  SCENES="lego chair drums ficus hotdog materials mic ship"
+  DATA_FOLDER="nerf_synthetic"
+fi
+# launch training jobs for all scenes.
+for scene in $SCENES; do
+  python -m jaxnerf.train \
+    --data_dir="$DATA_ROOT"/"$DATA_FOLDER"/"$scene" \
+    --train_dir="$ROOT_DIR"/"$scene" \
+    --config=configs/"$CONFIG"
+done

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+numpy>=1.16.4
+jax>=0.2.6
+jaxlib>=0.1.57
+flax>=0.2.2
+opencv-python>=4.4.0
+Pillow>=7.2.0
+streamlit==0.84.1
+googledrivedownloader==0.4