chaojiemao commited on
Commit
87e4b76
·
verified ·
1 Parent(s): e667736

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +94 -87
app.py CHANGED
@@ -45,6 +45,9 @@ video_sty = '\U0001f3a5' # 🎥
45
 
46
  lock = threading.Lock()
47
 
 
 
 
48
 
49
  class ChatBotUI(object):
50
  def __init__(self,
@@ -82,13 +85,12 @@ class ChatBotUI(object):
82
  self.model_choices[model_name] = model_cfg
83
  print('Models: ', self.model_choices.keys())
84
 
85
- #FS.get_from("ms://AI-ModelScope/FLUX.1-dev@flux1-dev.safetensors")
86
- #FS.get_from("ms://AI-ModelScope/FLUX.1-dev@ae.safetensors")
87
- #FS.get_dir_to_local_dir("ms://AI-ModelScope/FLUX.1-dev@text_encoder_2/")
88
- #FS.get_dir_to_local_dir("ms://AI-ModelScope/FLUX.1-dev@tokenizer_2/")
89
- #FS.get_dir_to_local_dir("ms://AI-ModelScope/FLUX.1-dev@text_encoder/")
90
- #FS.get_dir_to_local_dir("ms://AI-ModelScope/FLUX.1-dev@tokenizer/")
91
-
92
  assert len(self.model_choices) > 0
93
  if self.default_model_name == "": self.default_model_name = self.model_choices.keys()[0]
94
  self.model_name = self.default_model_name
@@ -133,15 +135,11 @@ class ChatBotUI(object):
133
  )
134
 
135
  sys_prompt = """You are part of a team of bots that creates videos. You work with an assistant bot that will draw anything you say in square brackets.
136
-
137
  For example , outputting " a beautiful morning in the woods with the sun peaking through the trees " will trigger your partner bot to output an video of a forest morning , as described. You will be prompted by people looking to create detailed , amazing videos. The way to accomplish this is to take their short prompts and make them extremely detailed and descriptive.
138
  There are a few rules to follow:
139
-
140
  You will only ever output a single video description per user request.
141
-
142
  When modifications are requested , you should not simply make the description longer . You should refactor the entire description to integrate the suggestions.
143
  Other times the user will not want modifications , but instead want a new image . In this case , you should ignore your previous conversation with the user.
144
-
145
  Video descriptions must have the same num of words as examples below. Extra words will be ignored.
146
  """
147
  self.enhance_ctx = [
@@ -151,39 +149,39 @@ class ChatBotUI(object):
151
  },
152
  {
153
  'role':
154
- 'user',
155
  'content':
156
- 'Create an imaginative video descriptive caption or modify an earlier caption for the user input : "a girl is on the beach"',
157
  },
158
  {
159
  'role':
160
- 'assistant',
161
  'content':
162
- "A radiant woman stands on a deserted beach, arms outstretched, wearing a beige trench coat, white blouse, light blue jeans, and chic boots, against a backdrop of soft sky and sea. Moments later, she is seen mid-twirl, arms exuberant, with the lighting suggesting dawn or dusk. Then, she runs along the beach, her attire complemented by an off-white scarf and black ankle boots, the tranquil sea behind her. Finally, she holds a paper airplane, her pose reflecting joy and freedom, with the ocean's gentle waves and the sky's soft pastel hues enhancing the serene ambiance.",
163
  },
164
  {
165
  'role':
166
- 'user',
167
  'content':
168
- 'Create an imaginative video descriptive caption or modify an earlier caption for the user input : "A man jogging on a football field"',
169
  },
170
  {
171
  'role':
172
- 'assistant',
173
  'content':
174
- "A determined man in athletic attire, including a blue long-sleeve shirt, black shorts, and blue socks, jogs around a snow-covered soccer field, showcasing his solitary exercise in a quiet, overcast setting. His long dreadlocks, focused expression, and the serene winter backdrop highlight his dedication to fitness. As he moves, his attire, consisting of a blue sports sweatshirt, black athletic pants, gloves, and sneakers, grips the snowy ground. He is seen running past a chain-link fence enclosing the playground area, with a basketball hoop and children's slide, suggesting a moment of solitary exercise amidst the empty field.",
175
  },
176
  {
177
  'role':
178
- 'user',
179
  'content':
180
- 'Create an imaginative video descriptive caption or modify an earlier caption for the user input : " A woman is dancing, HD footage, close-up"',
181
  },
182
  {
183
  'role':
184
- 'assistant',
185
  'content':
186
- 'A young woman with her hair in an updo and wearing a teal hoodie stands against a light backdrop, initially looking over her shoulder with a contemplative expression. She then confidently makes a subtle dance move, suggesting rhythm and movement. Next, she appears poised and focused, looking directly at the camera. Her expression shifts to one of introspection as she gazes downward slightly. Finally, she dances with confidence, her left hand over her heart, symbolizing a poignant moment, all while dressed in the same teal hoodie against a plain, light-colored background.',
187
  },
188
  ]
189
 
@@ -350,7 +348,6 @@ class ChatBotUI(object):
350
  show_download_button=True,
351
  elem_id='image_viewer')
352
 
353
-
354
  with gr.Accordion(label='Setting', open=False):
355
  with gr.Row():
356
  self.model_name_dd = gr.Dropdown(
@@ -376,7 +373,6 @@ class ChatBotUI(object):
376
  label='Refiner Prompt',
377
  container=False)
378
 
379
-
380
  with gr.Row():
381
  with gr.Column(scale=8, min_width=500):
382
  with gr.Row():
@@ -397,10 +393,11 @@ class ChatBotUI(object):
397
  visible=self.pipe.input.get("guide_rescale", None) is not None,
398
  label='Rescale')
399
  self.refiner_scale = gr.Slider(minimum=-0.1,
400
- maximum=1.0,
401
- value=self.pipe.input.get("refiner_scale", 0.5),
402
- visible=self.pipe.input.get("refiner_scale", None) is not None,
403
- label='Refiner Scale')
 
404
  self.seed = gr.Slider(minimum=-1,
405
  maximum=10000000,
406
  value=-1,
@@ -461,7 +458,6 @@ class ChatBotUI(object):
461
  with gr.Row():
462
  self.chatbot_inst = """
463
  **Instruction**:
464
-
465
  1. Click 'Upload' button to upload one or more images as input images.
466
  2. Enter '@' in the text box will exhibit all images in the gallery.
467
  3. Select the image you wish to edit from the gallery, and its Image ID will be displayed in the text box.
@@ -471,19 +467,16 @@ class ChatBotUI(object):
471
  6. **Important** To render text on an image, please ensure to include a space between each letter. For instance, "add text 'g i r l' on the mask area of @xxxxx".
472
  7. To implement local editing based on a specified mask, simply click on the image within the chat window to access the image editor. Here, you can draw a mask and then click the 'Submit' button to upload the edited image along with the mask. For inpainting tasks, select the 'Composite' mask type, while for outpainting tasks, choose the 'Outpainting' mask type. For all other local editing tasks, please select the 'Background' mask type.
473
  8. If you find our work valuable, we invite you to refer to the [ACE Page](https://ali-vilab.github.io/ace-page/) for comprehensive information.
474
-
475
  """
476
 
477
  self.legacy_inst = """
478
  **Instruction**:
479
-
480
  1. You can edit the image by uploading it; if no image is uploaded, an image will be generated from text..
481
  2. Enter '@' in the text box will exhibit all images in the gallery.
482
  3. Select the image you wish to edit from the gallery, and its Image ID will be displayed in the text box.
483
  4. **Important** To render text on an image, please ensure to include a space between each letter. For instance, "add text 'g i r l' on the mask area of @xxxxx".
484
  5. To perform multi-step editing, partial editing, inpainting, outpainting, and other operations, please click the Chatbot Checkbox to enable the conversational editing mode and follow the relevant instructions..
485
  6. If you find our work valuable, we invite you to refer to the [ACE Page](https://ali-vilab.github.io/ace-page/) for comprehensive information.
486
-
487
  """
488
 
489
  self.instruction = gr.Markdown(value=self.legacy_inst)
@@ -493,7 +486,7 @@ class ChatBotUI(object):
493
  show_progress=False):
494
  with gr.Column(scale=1, min_width=100, visible=False) as self.upload_panel:
495
  self.upload_btn = gr.Button(value=upload_sty +
496
- ' Upload',
497
  variant='secondary')
498
  with gr.Column(scale=5, min_width=500):
499
  self.text = gr.Textbox(
@@ -505,7 +498,7 @@ class ChatBotUI(object):
505
  variant='primary')
506
  with gr.Column(scale=1, min_width=100):
507
  self.retry_btn = gr.Button(value=refresh_sty +
508
- ' Retry',
509
  variant='secondary')
510
  with gr.Column(scale=1, min_width=100):
511
  self.mode_checkbox = gr.Checkbox(
@@ -514,7 +507,7 @@ class ChatBotUI(object):
514
  with gr.Column(scale=(1 if self.enable_i2v else 0),
515
  min_width=0):
516
  self.video_gen_btn = gr.Button(value=video_sty +
517
- ' Gen Video',
518
  variant='secondary',
519
  visible=self.enable_i2v)
520
  with gr.Column(scale=(1 if self.enable_i2v else 0),
@@ -546,21 +539,24 @@ class ChatBotUI(object):
546
  lock.acquire()
547
  del self.pipe
548
  torch.cuda.empty_cache()
549
- self.pipe = ACEInference()
550
- self.pipe.init_from_cfg(self.model_choices[model_name])
 
 
 
551
  self.model_name = model_name
552
  lock.release()
553
 
554
  return (model_name, gr.update(), gr.update(),
555
  gr.Slider(
556
- value=self.pipe.input.get("sample_steps", 20),
557
- visible=self.pipe.input.get("sample_steps", None) is not None),
558
  gr.Slider(
559
  value=self.pipe.input.get("guide_scale", 4.5),
560
  visible=self.pipe.input.get("guide_scale", None) is not None),
561
  gr.Slider(
562
- value=self.pipe.input.get("guide_rescale", 0.5),
563
- visible=self.pipe.input.get("guide_rescale", None) is not None),
564
  gr.Slider(
565
  value=self.pipe.input.get("output_height", 1024),
566
  visible=self.pipe.input.get("output_height", None) is not None),
@@ -571,9 +567,9 @@ class ChatBotUI(object):
571
  value=self.pipe.input.get("refiner_prompt", ""),
572
  visible=self.pipe.input.get("refiner_prompt", None) is not None),
573
  gr.Slider(
574
- value=self.pipe.input.get("refiner_scale", 0.5),
575
- visible=self.pipe.input.get("refiner_scale", None) is not None
576
- ),
577
  gr.Checkbox(
578
  value=self.pipe.input.get("use_ace", True),
579
  visible=self.pipe.input.get("use_ace", None) is not None
@@ -590,7 +586,6 @@ class ChatBotUI(object):
590
  self.output_width, self.refiner_prompt, self.refiner_scale,
591
  self.use_ace])
592
 
593
-
594
  def mode_change(mode_check):
595
  if mode_check:
596
  # ChatBot
@@ -612,12 +607,12 @@ class ChatBotUI(object):
612
  gr.Column(visible=False),
613
  gr.Markdown(value=self.legacy_inst)
614
  )
 
615
  self.mode_checkbox.change(mode_change, inputs=[self.mode_checkbox],
616
  outputs=[self.legacy_group, self.chat_group,
617
  self.chat_btn, self.ui_mode,
618
  self.upload_panel, self.instruction])
619
 
620
-
621
  ########################################
622
  def generate_gallery(text, images):
623
  if text.endswith(' '):
@@ -695,9 +690,9 @@ class ChatBotUI(object):
695
  messages = copy.deepcopy(self.enhance_ctx)
696
  messages.append({
697
  'role':
698
- 'user',
699
  'content':
700
- f'Create an imaginative video descriptive caption or modify an earlier caption in ENGLISH for the user input: "{prompt}"',
701
  })
702
  lock.acquire()
703
  outputs = self.enhancer(
@@ -748,31 +743,31 @@ class ChatBotUI(object):
748
  ########################################
749
  @spaces.GPU(duration=240)
750
  def run_chat(
751
- message,
752
- legacy_image,
753
- ui_mode,
754
- use_ace,
755
- extend_prompt,
756
- history,
757
- images,
758
- use_history,
759
- history_result,
760
- negative_prompt,
761
- cfg_scale,
762
- rescale,
763
- refiner_prompt,
764
- refiner_scale,
765
- step,
766
- seed,
767
- output_h,
768
- output_w,
769
- video_auto,
770
- video_steps,
771
- video_frames,
772
- video_cfg_scale,
773
- video_fps,
774
- video_seed,
775
- progress=gr.Progress(track_tqdm=True)):
776
  legacy_img_ids = []
777
  if ui_mode == 'legacy':
778
  if legacy_image is not None:
@@ -803,8 +798,12 @@ class ChatBotUI(object):
803
  )
804
  continue
805
  placeholder = '{image}' if i == 0 else '{' + f'image{i}' + '}'
806
- new_message = re.sub(f'@{img_id}', placeholder,
807
- new_message)
 
 
 
 
808
  img_meta = images[img_id]
809
  img_path = img_meta['image']
810
  img_mask = img_meta['mask']
@@ -845,7 +844,7 @@ class ChatBotUI(object):
845
  history_io=history_io,
846
  output_height=output_h,
847
  output_width=output_w,
848
- sampler='ddim',
849
  sample_steps=step,
850
  guide_scale=cfg_scale,
851
  guide_rescale=rescale,
@@ -916,7 +915,7 @@ class ChatBotUI(object):
916
  device='cuda').manual_seed(video_seed)
917
  pixel_values = load_image(img.convert('RGB'),
918
  max_num=self.llm_max_num).to(
919
- torch.bfloat16).cuda()
920
  prompt = self.captioner.chat(self.llm_tokenizer, pixel_values,
921
  self.llm_prompt,
922
  self.llm_generation_config)
@@ -927,9 +926,9 @@ class ChatBotUI(object):
927
  messages = copy.deepcopy(self.enhance_ctx)
928
  messages.append({
929
  'role':
930
- 'user',
931
  'content':
932
- f'Create an imaginative video descriptive caption or modify an earlier caption in ENGLISH for the user input: "{prompt}"',
933
  })
934
  lock.acquire()
935
  outputs = self.enhancer(
@@ -964,7 +963,7 @@ class ChatBotUI(object):
964
  return (history, images, gr.Image(value=save_path),
965
  history_result, self.get_history(
966
  history), gr.update(), gr.update(
967
- visible=False), retry_msg)
968
 
969
  chat_inputs = [
970
  self.legacy_image_uploader, self.ui_mode, self.use_ace,
@@ -1014,9 +1013,13 @@ class ChatBotUI(object):
1014
  w = int(w / ratio)
1015
  img = img.resize((w, h))
1016
  edit_image.append(img)
 
 
1017
  edit_image_mask.append(
1018
  img_mask if img_mask is not None else None)
1019
  edit_task.append(task)
 
 
1020
  if ref1 is not None:
1021
  edit_image.append(ref1)
1022
  edit_image_mask.append(None)
@@ -1052,8 +1055,12 @@ class ChatBotUI(object):
1052
  img_str = f'<img src="data:image/png;base64,{img_b64}" style="pointer-events: none;">'
1053
  history = [(prompt,
1054
  f'{pre_info} The generated image is:\n {img_str}')]
 
 
 
 
1055
  return self.get_history(history), gr.update(value=''), gr.update(
1056
- visible=False), gr.update(value=-1)
1057
 
1058
  with self.eg:
1059
  self.example_task = gr.Text(label='Task Name',
@@ -1079,8 +1086,9 @@ class ChatBotUI(object):
1079
  self.example_task, self.example_image, self.example_mask,
1080
  self.example_ref_im1, self.text, self.seed
1081
  ],
1082
- outputs=[self.chatbot, self.text, self.gallery, self.seed],
1083
  examples_per_page=4,
 
1084
  run_on_click=True)
1085
 
1086
  ########################################
@@ -1173,7 +1181,7 @@ class ChatBotUI(object):
1173
  image, history, images)
1174
  return gr.update(visible=False), gr.update(
1175
  visible=True), gr.update(
1176
- value=self.get_history(history)), history, images
1177
 
1178
  self.sub_btn_1.click(
1179
  submit_upload_image,
@@ -1189,7 +1197,7 @@ class ChatBotUI(object):
1189
  imagemask, mask_type, history, images)
1190
  return gr.update(visible=False), gr.update(
1191
  visible=True), gr.update(
1192
- value=self.get_history(history)), history, images
1193
 
1194
  self.sub_btn_2.click(submit_edit_image,
1195
  inputs=[
@@ -1405,11 +1413,10 @@ class ChatBotUI(object):
1405
  return history, images, img_id
1406
 
1407
 
1408
-
1409
  if __name__ == '__main__':
1410
  cfg = "config/chatbot_ui.yaml"
1411
  with gr.Blocks() as demo:
1412
  chatbot = ChatBotUI(cfg)
1413
  chatbot.create_ui()
1414
  chatbot.set_callbacks()
1415
- demo.launch()
 
45
 
46
  lock = threading.Lock()
47
 
48
+ inference_dict = {
49
+ "ACE": ACEInference
50
+ }
51
 
52
  class ChatBotUI(object):
53
  def __init__(self,
 
85
  self.model_choices[model_name] = model_cfg
86
  print('Models: ', self.model_choices.keys())
87
 
88
+ FS.get_from("ms://AI-ModelScope/FLUX.1-dev@ae.safetensors")
89
+ FS.get_dir_to_local_dir("ms://AI-ModelScope/FLUX.1-dev@text_encoder_2/")
90
+ FS.get_dir_to_local_dir("ms://AI-ModelScope/FLUX.1-dev@tokenizer_2/")
91
+ FS.get_dir_to_local_dir("ms://AI-ModelScope/FLUX.1-dev@text_encoder/")
92
+ FS.get_dir_to_local_dir("ms://AI-ModelScope/FLUX.1-dev@tokenizer/")
93
+
 
94
  assert len(self.model_choices) > 0
95
  if self.default_model_name == "": self.default_model_name = self.model_choices.keys()[0]
96
  self.model_name = self.default_model_name
 
135
  )
136
 
137
  sys_prompt = """You are part of a team of bots that creates videos. You work with an assistant bot that will draw anything you say in square brackets.
 
138
  For example , outputting " a beautiful morning in the woods with the sun peaking through the trees " will trigger your partner bot to output an video of a forest morning , as described. You will be prompted by people looking to create detailed , amazing videos. The way to accomplish this is to take their short prompts and make them extremely detailed and descriptive.
139
  There are a few rules to follow:
 
140
  You will only ever output a single video description per user request.
 
141
  When modifications are requested , you should not simply make the description longer . You should refactor the entire description to integrate the suggestions.
142
  Other times the user will not want modifications , but instead want a new image . In this case , you should ignore your previous conversation with the user.
 
143
  Video descriptions must have the same num of words as examples below. Extra words will be ignored.
144
  """
145
  self.enhance_ctx = [
 
149
  },
150
  {
151
  'role':
152
+ 'user',
153
  'content':
154
+ 'Create an imaginative video descriptive caption or modify an earlier caption for the user input : "a girl is on the beach"',
155
  },
156
  {
157
  'role':
158
+ 'assistant',
159
  'content':
160
+ "A radiant woman stands on a deserted beach, arms outstretched, wearing a beige trench coat, white blouse, light blue jeans, and chic boots, against a backdrop of soft sky and sea. Moments later, she is seen mid-twirl, arms exuberant, with the lighting suggesting dawn or dusk. Then, she runs along the beach, her attire complemented by an off-white scarf and black ankle boots, the tranquil sea behind her. Finally, she holds a paper airplane, her pose reflecting joy and freedom, with the ocean's gentle waves and the sky's soft pastel hues enhancing the serene ambiance.",
161
  },
162
  {
163
  'role':
164
+ 'user',
165
  'content':
166
+ 'Create an imaginative video descriptive caption or modify an earlier caption for the user input : "A man jogging on a football field"',
167
  },
168
  {
169
  'role':
170
+ 'assistant',
171
  'content':
172
+ "A determined man in athletic attire, including a blue long-sleeve shirt, black shorts, and blue socks, jogs around a snow-covered soccer field, showcasing his solitary exercise in a quiet, overcast setting. His long dreadlocks, focused expression, and the serene winter backdrop highlight his dedication to fitness. As he moves, his attire, consisting of a blue sports sweatshirt, black athletic pants, gloves, and sneakers, grips the snowy ground. He is seen running past a chain-link fence enclosing the playground area, with a basketball hoop and children's slide, suggesting a moment of solitary exercise amidst the empty field.",
173
  },
174
  {
175
  'role':
176
+ 'user',
177
  'content':
178
+ 'Create an imaginative video descriptive caption or modify an earlier caption for the user input : " A woman is dancing, HD footage, close-up"',
179
  },
180
  {
181
  'role':
182
+ 'assistant',
183
  'content':
184
+ 'A young woman with her hair in an updo and wearing a teal hoodie stands against a light backdrop, initially looking over her shoulder with a contemplative expression. She then confidently makes a subtle dance move, suggesting rhythm and movement. Next, she appears poised and focused, looking directly at the camera. Her expression shifts to one of introspection as she gazes downward slightly. Finally, she dances with confidence, her left hand over her heart, symbolizing a poignant moment, all while dressed in the same teal hoodie against a plain, light-colored background.',
185
  },
186
  ]
187
 
 
348
  show_download_button=True,
349
  elem_id='image_viewer')
350
 
 
351
  with gr.Accordion(label='Setting', open=False):
352
  with gr.Row():
353
  self.model_name_dd = gr.Dropdown(
 
373
  label='Refiner Prompt',
374
  container=False)
375
 
 
376
  with gr.Row():
377
  with gr.Column(scale=8, min_width=500):
378
  with gr.Row():
 
393
  visible=self.pipe.input.get("guide_rescale", None) is not None,
394
  label='Rescale')
395
  self.refiner_scale = gr.Slider(minimum=-0.1,
396
+ maximum=1.0,
397
+ value=self.pipe.input.get("refiner_scale", 0.5),
398
+ visible=self.pipe.input.get("refiner_scale",
399
+ None) is not None,
400
+ label='Refiner Scale')
401
  self.seed = gr.Slider(minimum=-1,
402
  maximum=10000000,
403
  value=-1,
 
458
  with gr.Row():
459
  self.chatbot_inst = """
460
  **Instruction**:
 
461
  1. Click 'Upload' button to upload one or more images as input images.
462
  2. Enter '@' in the text box will exhibit all images in the gallery.
463
  3. Select the image you wish to edit from the gallery, and its Image ID will be displayed in the text box.
 
467
  6. **Important** To render text on an image, please ensure to include a space between each letter. For instance, "add text 'g i r l' on the mask area of @xxxxx".
468
  7. To implement local editing based on a specified mask, simply click on the image within the chat window to access the image editor. Here, you can draw a mask and then click the 'Submit' button to upload the edited image along with the mask. For inpainting tasks, select the 'Composite' mask type, while for outpainting tasks, choose the 'Outpainting' mask type. For all other local editing tasks, please select the 'Background' mask type.
469
  8. If you find our work valuable, we invite you to refer to the [ACE Page](https://ali-vilab.github.io/ace-page/) for comprehensive information.
 
470
  """
471
 
472
  self.legacy_inst = """
473
  **Instruction**:
 
474
  1. You can edit the image by uploading it; if no image is uploaded, an image will be generated from text..
475
  2. Enter '@' in the text box will exhibit all images in the gallery.
476
  3. Select the image you wish to edit from the gallery, and its Image ID will be displayed in the text box.
477
  4. **Important** To render text on an image, please ensure to include a space between each letter. For instance, "add text 'g i r l' on the mask area of @xxxxx".
478
  5. To perform multi-step editing, partial editing, inpainting, outpainting, and other operations, please click the Chatbot Checkbox to enable the conversational editing mode and follow the relevant instructions..
479
  6. If you find our work valuable, we invite you to refer to the [ACE Page](https://ali-vilab.github.io/ace-page/) for comprehensive information.
 
480
  """
481
 
482
  self.instruction = gr.Markdown(value=self.legacy_inst)
 
486
  show_progress=False):
487
  with gr.Column(scale=1, min_width=100, visible=False) as self.upload_panel:
488
  self.upload_btn = gr.Button(value=upload_sty +
489
+ ' Upload',
490
  variant='secondary')
491
  with gr.Column(scale=5, min_width=500):
492
  self.text = gr.Textbox(
 
498
  variant='primary')
499
  with gr.Column(scale=1, min_width=100):
500
  self.retry_btn = gr.Button(value=refresh_sty +
501
+ ' Retry',
502
  variant='secondary')
503
  with gr.Column(scale=1, min_width=100):
504
  self.mode_checkbox = gr.Checkbox(
 
507
  with gr.Column(scale=(1 if self.enable_i2v else 0),
508
  min_width=0):
509
  self.video_gen_btn = gr.Button(value=video_sty +
510
+ ' Gen Video',
511
  variant='secondary',
512
  visible=self.enable_i2v)
513
  with gr.Column(scale=(1 if self.enable_i2v else 0),
 
539
  lock.acquire()
540
  del self.pipe
541
  torch.cuda.empty_cache()
542
+ torch.cuda.ipc_collect()
543
+ pipe_cfg = self.model_choices[model_name]
544
+ infer_name = pipe_cfg.get("INFERENCE_TYPE", "ACE")
545
+ self.pipe = inference_dict[infer_name]()
546
+ self.pipe.init_from_cfg(pipe_cfg)
547
  self.model_name = model_name
548
  lock.release()
549
 
550
  return (model_name, gr.update(), gr.update(),
551
  gr.Slider(
552
+ value=self.pipe.input.get("sample_steps", 20),
553
+ visible=self.pipe.input.get("sample_steps", None) is not None),
554
  gr.Slider(
555
  value=self.pipe.input.get("guide_scale", 4.5),
556
  visible=self.pipe.input.get("guide_scale", None) is not None),
557
  gr.Slider(
558
+ value=self.pipe.input.get("guide_rescale", 0.5),
559
+ visible=self.pipe.input.get("guide_rescale", None) is not None),
560
  gr.Slider(
561
  value=self.pipe.input.get("output_height", 1024),
562
  visible=self.pipe.input.get("output_height", None) is not None),
 
567
  value=self.pipe.input.get("refiner_prompt", ""),
568
  visible=self.pipe.input.get("refiner_prompt", None) is not None),
569
  gr.Slider(
570
+ value=self.pipe.input.get("refiner_scale", 0.5),
571
+ visible=self.pipe.input.get("refiner_scale", None) is not None
572
+ ),
573
  gr.Checkbox(
574
  value=self.pipe.input.get("use_ace", True),
575
  visible=self.pipe.input.get("use_ace", None) is not None
 
586
  self.output_width, self.refiner_prompt, self.refiner_scale,
587
  self.use_ace])
588
 
 
589
  def mode_change(mode_check):
590
  if mode_check:
591
  # ChatBot
 
607
  gr.Column(visible=False),
608
  gr.Markdown(value=self.legacy_inst)
609
  )
610
+
611
  self.mode_checkbox.change(mode_change, inputs=[self.mode_checkbox],
612
  outputs=[self.legacy_group, self.chat_group,
613
  self.chat_btn, self.ui_mode,
614
  self.upload_panel, self.instruction])
615
 
 
616
  ########################################
617
  def generate_gallery(text, images):
618
  if text.endswith(' '):
 
690
  messages = copy.deepcopy(self.enhance_ctx)
691
  messages.append({
692
  'role':
693
+ 'user',
694
  'content':
695
+ f'Create an imaginative video descriptive caption or modify an earlier caption in ENGLISH for the user input: "{prompt}"',
696
  })
697
  lock.acquire()
698
  outputs = self.enhancer(
 
743
  ########################################
744
  @spaces.GPU(duration=240)
745
  def run_chat(
746
+ message,
747
+ legacy_image,
748
+ ui_mode,
749
+ use_ace,
750
+ extend_prompt,
751
+ history,
752
+ images,
753
+ use_history,
754
+ history_result,
755
+ negative_prompt,
756
+ cfg_scale,
757
+ rescale,
758
+ refiner_prompt,
759
+ refiner_scale,
760
+ step,
761
+ seed,
762
+ output_h,
763
+ output_w,
764
+ video_auto,
765
+ video_steps,
766
+ video_frames,
767
+ video_cfg_scale,
768
+ video_fps,
769
+ video_seed,
770
+ progress=gr.Progress(track_tqdm=True)):
771
  legacy_img_ids = []
772
  if ui_mode == 'legacy':
773
  if legacy_image is not None:
 
798
  )
799
  continue
800
  placeholder = '{image}' if i == 0 else '{' + f'image{i}' + '}'
801
+ if placeholder not in new_message:
802
+ new_message = re.sub(f'@{img_id}', placeholder,
803
+ new_message)
804
+ else:
805
+ new_message = re.sub(f'@{img_id} ', "",
806
+ new_message, 1)
807
  img_meta = images[img_id]
808
  img_path = img_meta['image']
809
  img_mask = img_meta['mask']
 
844
  history_io=history_io,
845
  output_height=output_h,
846
  output_width=output_w,
847
+ sampler=self.pipe.input.get("sampler", "ddim"),
848
  sample_steps=step,
849
  guide_scale=cfg_scale,
850
  guide_rescale=rescale,
 
915
  device='cuda').manual_seed(video_seed)
916
  pixel_values = load_image(img.convert('RGB'),
917
  max_num=self.llm_max_num).to(
918
+ torch.bfloat16).cuda()
919
  prompt = self.captioner.chat(self.llm_tokenizer, pixel_values,
920
  self.llm_prompt,
921
  self.llm_generation_config)
 
926
  messages = copy.deepcopy(self.enhance_ctx)
927
  messages.append({
928
  'role':
929
+ 'user',
930
  'content':
931
+ f'Create an imaginative video descriptive caption or modify an earlier caption in ENGLISH for the user input: "{prompt}"',
932
  })
933
  lock.acquire()
934
  outputs = self.enhancer(
 
963
  return (history, images, gr.Image(value=save_path),
964
  history_result, self.get_history(
965
  history), gr.update(), gr.update(
966
+ visible=False), retry_msg)
967
 
968
  chat_inputs = [
969
  self.legacy_image_uploader, self.ui_mode, self.use_ace,
 
1013
  w = int(w / ratio)
1014
  img = img.resize((w, h))
1015
  edit_image.append(img)
1016
+ if img_mask is not None:
1017
+ img_mask = img_mask if np.sum(np.array(img_mask)) > 0 else None
1018
  edit_image_mask.append(
1019
  img_mask if img_mask is not None else None)
1020
  edit_task.append(task)
1021
+ if ref1 is not None:
1022
+ ref1 = ref1 if np.sum(np.array(ref1)) > 0 else None
1023
  if ref1 is not None:
1024
  edit_image.append(ref1)
1025
  edit_image_mask.append(None)
 
1055
  img_str = f'<img src="data:image/png;base64,{img_b64}" style="pointer-events: none;">'
1056
  history = [(prompt,
1057
  f'{pre_info} The generated image is:\n {img_str}')]
1058
+
1059
+ img_id = get_md5(img_b64)[:12]
1060
+ save_path = os.path.join(self.cache_dir, f'{img_id}.png')
1061
+ img.convert('RGB').save(save_path)
1062
  return self.get_history(history), gr.update(value=''), gr.update(
1063
+ visible=False), gr.update(value=save_path), gr.update(value=-1)
1064
 
1065
  with self.eg:
1066
  self.example_task = gr.Text(label='Task Name',
 
1086
  self.example_task, self.example_image, self.example_mask,
1087
  self.example_ref_im1, self.text, self.seed
1088
  ],
1089
+ outputs=[self.chatbot, self.text, self.gallery, self.legacy_image_viewer, self.seed],
1090
  examples_per_page=4,
1091
+ cache_examples=False,
1092
  run_on_click=True)
1093
 
1094
  ########################################
 
1181
  image, history, images)
1182
  return gr.update(visible=False), gr.update(
1183
  visible=True), gr.update(
1184
+ value=self.get_history(history)), history, images
1185
 
1186
  self.sub_btn_1.click(
1187
  submit_upload_image,
 
1197
  imagemask, mask_type, history, images)
1198
  return gr.update(visible=False), gr.update(
1199
  visible=True), gr.update(
1200
+ value=self.get_history(history)), history, images
1201
 
1202
  self.sub_btn_2.click(submit_edit_image,
1203
  inputs=[
 
1413
  return history, images, img_id
1414
 
1415
 
 
1416
  if __name__ == '__main__':
1417
  cfg = "config/chatbot_ui.yaml"
1418
  with gr.Blocks() as demo:
1419
  chatbot = ChatBotUI(cfg)
1420
  chatbot.create_ui()
1421
  chatbot.set_callbacks()
1422
+ demo.launch()