JianyuanWang commited on
Commit
1bfa5fd
·
1 Parent(s): 6eae011
app.py CHANGED
@@ -185,6 +185,8 @@ cake_images = glob.glob(f'vggsfm_code/examples/cake/images/*')
185
  british_museum_images = glob.glob(f'vggsfm_code/examples/british_museum/images/*')
186
 
187
 
 
 
188
  with gr.Blocks() as demo:
189
  gr.Markdown("# 🎨 VGGSfM: Visual Geometry Grounded Deep Structure From Motion")
190
 
@@ -197,7 +199,7 @@ with gr.Blocks() as demo:
197
  <li>upload the images (.jpg, .png, etc.), or </li>
198
  <li>upload a video (.mp4, .mov, etc.) </li>
199
  </ul>
200
- <p>The reconstruction should take <strong> up to 1 minute </strong>. If both images and videos are uploaded, the demo will only reconstruct the uploaded images. By default, we extract one image frame per second from the input video. To prevent crashes on the Hugging Face space, we currently limit reconstruction to the first 20 image frames. </p>
201
  <p>SfM methods are designed for <strong> rigid/static reconstruction </strong>. When dealing with dynamic/moving inputs, these methods may still work by focusing on the rigid parts of the scene. However, to ensure high-quality results, it is better to minimize the presence of moving objects in the input data. </p>
202
  <p>If you meet any problem, feel free to create an issue in our <a href="https://github.com/facebookresearch/vggsfm" target="_blank">GitHub Repo</a> ⭐</p>
203
  <p>(Please note that running reconstruction on Hugging Face space is slower than on a local machine.) </p>
@@ -208,7 +210,7 @@ with gr.Blocks() as demo:
208
  with gr.Column(scale=1):
209
  input_video = gr.Video(label="Input video", interactive=True)
210
  input_images = gr.File(file_count="multiple", label="Input Images", interactive=True)
211
- num_query_images = gr.Slider(minimum=1, maximum=10, step=1, value=5, label="Number of query images",
212
  info="More query images usually lead to better reconstruction at lower speeds. If the viewpoint differences between your images are minimal, you can set this value to 1. ")
213
  num_query_points = gr.Slider(minimum=512, maximum=4096, step=1, value=1024, label="Number of query points",
214
  info="More query points usually lead to denser reconstruction at lower speeds.")
@@ -218,8 +220,10 @@ with gr.Blocks() as demo:
218
  log_output = gr.Textbox(label="Log")
219
 
220
  with gr.Row():
 
 
 
221
  clear_btn = gr.ClearButton([input_video, input_images, num_query_images, num_query_points, reconstruction_output, log_output], scale=1)
222
- submit_btn = gr.Button("Reconstruct", scale=3)
223
 
224
 
225
  examples = [
@@ -232,7 +236,7 @@ with gr.Blocks() as demo:
232
  inputs=[input_video, input_images, num_query_images, num_query_points],
233
  outputs=[reconstruction_output, log_output], # Provide outputs
234
  fn=vggsfm_demo, # Provide the function
235
- cache_examples=True
236
  )
237
 
238
  submit_btn.click(
@@ -243,7 +247,7 @@ with gr.Blocks() as demo:
243
  )
244
 
245
  # demo.launch(debug=True, share=True)
246
- demo.queue(max_size=30).launch(show_error=True)
247
  # demo.queue(max_size=20, concurrency_count=1).launch(debug=True, share=True)
248
  ########################################################################################################################
249
 
 
185
  british_museum_images = glob.glob(f'vggsfm_code/examples/british_museum/images/*')
186
 
187
 
188
+
189
+
190
  with gr.Blocks() as demo:
191
  gr.Markdown("# 🎨 VGGSfM: Visual Geometry Grounded Deep Structure From Motion")
192
 
 
199
  <li>upload the images (.jpg, .png, etc.), or </li>
200
  <li>upload a video (.mp4, .mov, etc.) </li>
201
  </ul>
202
+ <p>The reconstruction should take <strong> up to 1 minute </strong>. If both images and videos are uploaded, the demo will only reconstruct the uploaded images. By default, we extract <strong> 1 image frame per second from the input video </strong>. To prevent crashes on the Hugging Face space, we currently limit reconstruction to the first 20 image frames. </p>
203
  <p>SfM methods are designed for <strong> rigid/static reconstruction </strong>. When dealing with dynamic/moving inputs, these methods may still work by focusing on the rigid parts of the scene. However, to ensure high-quality results, it is better to minimize the presence of moving objects in the input data. </p>
204
  <p>If you meet any problem, feel free to create an issue in our <a href="https://github.com/facebookresearch/vggsfm" target="_blank">GitHub Repo</a> ⭐</p>
205
  <p>(Please note that running reconstruction on Hugging Face space is slower than on a local machine.) </p>
 
210
  with gr.Column(scale=1):
211
  input_video = gr.Video(label="Input video", interactive=True)
212
  input_images = gr.File(file_count="multiple", label="Input Images", interactive=True)
213
+ num_query_images = gr.Slider(minimum=1, maximum=10, step=1, value=5, label="Number of query images (key frames)",
214
  info="More query images usually lead to better reconstruction at lower speeds. If the viewpoint differences between your images are minimal, you can set this value to 1. ")
215
  num_query_points = gr.Slider(minimum=512, maximum=4096, step=1, value=1024, label="Number of query points",
216
  info="More query points usually lead to denser reconstruction at lower speeds.")
 
220
  log_output = gr.Textbox(label="Log")
221
 
222
  with gr.Row():
223
+ submit_btn = gr.Button("Reconstruct", scale=1)
224
+
225
+ # submit_btn = gr.Button("Reconstruct", scale=1, elem_attributes={"style": "background-color: blue; color: white;"})
226
  clear_btn = gr.ClearButton([input_video, input_images, num_query_images, num_query_points, reconstruction_output, log_output], scale=1)
 
227
 
228
 
229
  examples = [
 
236
  inputs=[input_video, input_images, num_query_images, num_query_points],
237
  outputs=[reconstruction_output, log_output], # Provide outputs
238
  fn=vggsfm_demo, # Provide the function
239
+ cache_examples=True,
240
  )
241
 
242
  submit_btn.click(
 
247
  )
248
 
249
  # demo.launch(debug=True, share=True)
250
+ demo.queue(max_size=20).launch(show_error=True, share=True)
251
  # demo.queue(max_size=20, concurrency_count=1).launch(debug=True, share=True)
252
  ########################################################################################################################
253
 
vggsfm_code/examples/videos/bonsai_video.mp4 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:627cba512d70ff1ead2ba23e9e8492104934c42c6f2263665d39b72b24ea4d82
3
- size 2107907
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe81a91e79e96b14bfea751f61da63e32f8f4e54879c68b726468a44f7f8818a
3
+ size 2290807
vggsfm_code/examples/videos/british_museum_video.mp4 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7672a2df58075afe5a7415190daa11cfdcd740f9890d9f2ad7e5f35ae419ce6f
3
- size 419807
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fbbde1a54deaadb5144a3bcecdd2c404fe950312f3b8f2b9628ba49067053df
3
+ size 407548