MakiAi commited on
Commit
63ee519
1 Parent(s): 0d3229d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -48
app.py CHANGED
@@ -5,19 +5,22 @@ import spaces
5
 
6
  from OmniGen import OmniGenPipeline
7
 
 
8
  pipe = OmniGenPipeline.from_pretrained(
9
  "Shitao/OmniGen-v1"
10
  )
11
 
12
  @spaces.GPU(duration=180)
13
- # 示例处理函数:生成图像
14
  def generate_image(text, img1, img2, img3, height, width, guidance_scale, img_guidance_scale, inference_steps, seed, separate_cfg_infer):
 
15
  input_images = [img1, img2, img3]
16
- # 去除 None
17
  input_images = [img for img in input_images if img is not None]
18
  if len(input_images) == 0:
19
  input_images = None
20
 
 
21
  output = pipe(
22
  prompt=text,
23
  input_images=input_images,
@@ -26,24 +29,14 @@ def generate_image(text, img1, img2, img3, height, width, guidance_scale, img_gu
26
  guidance_scale=guidance_scale,
27
  img_guidance_scale=1.6,
28
  num_inference_steps=inference_steps,
29
- separate_cfg_infer=True, # set False can speed up the inference process
30
  use_kv_cache=False,
31
  seed=seed,
32
- # separate_cfg_infer=separate_cfg_infer,
33
  )
34
  img = output[0]
35
  return img
36
- # def generate_image(text, img1, img2, img3, height, width, guidance_scale, inference_steps):
37
- # input_images = []
38
- # if img1:
39
- # input_images.append(Image.open(img1))
40
- # if img2:
41
- # input_images.append(Image.open(img2))
42
- # if img3:
43
- # input_images.append(Image.open(img3))
44
-
45
- # return input_images[0] if input_images else None
46
 
 
47
 
48
  def get_example():
49
  case = [
@@ -231,81 +224,90 @@ def get_example():
231
  ],
232
  ]
233
  return case
234
-
235
  def run_for_examples(text, img1, img2, img3, height, width, guidance_scale, img_guidance_scale, inference_steps, seed, separate_cfg_infer,):
236
  return generate_image(text, img1, img2, img3, height, width, guidance_scale, img_guidance_scale, inference_steps, seed, separate_cfg_infer,)
237
 
 
238
  description = """
239
- OmniGen is a unified image generation model that you can use to perform various tasks, including but not limited to text-to-image generation, subject-driven generation, Identity-Preserving Generation, and image-conditioned generation.
 
 
 
 
240
 
241
- For multi-modal to image generation, you should pass a string as `prompt`, and a list of image paths as `input_images`. The placeholder in the prompt should be in the format of `<img><|image_*|></img>` (for the first image, the placeholder is <img><|image_1|></img>. for the second image, the the placeholder is <img><|image_2|></img>).
242
- For example, use an image of a woman to generate a new image:
243
- prompt = "A woman holds a bouquet of flowers and faces the camera. Thw woman is \<img\>\<|image_1|\>\</img\>."
 
 
244
 
245
- Tips:
246
- - Oversaturated: If the image appears oversaturated, please reduce the `guidance_scale`.
247
- - Low-quality: More detailed prompt will lead to better results.
248
- - Animate Style: If the genereate images is in animate style, you can try to add `photo` to the prompt`.
249
- - Edit generated image. If you generate a image by omnigen and then want to edit it, you cannot use the same seed to edit this image. For example, use seed=0 to generate image, and should use seed=1 to edit this image.
250
- - For image editing tasks, we recommend placing the image before the editing instruction. For example, use `<img><|image_1|></img> remove suit`, rather than `remove suit <img><|image_1|></img>`.
 
251
  """
252
 
253
  separate_cfg_infer_arg = False
254
 
255
- # Gradio 接口
256
  with gr.Blocks() as demo:
257
- gr.Markdown("# OmniGen: Unified Image Generation [paper](https://arxiv.org/abs/2409.11340) [code](https://github.com/VectorSpaceLab/OmniGen)")
258
  gr.Markdown(description)
259
  with gr.Row():
260
  with gr.Column():
261
- # 文本输入框
262
  prompt_input = gr.Textbox(
263
- label="Enter your prompt, use <img><|image_i|></img> to represent i-th input image", placeholder="Type your prompt here..."
 
264
  )
265
 
266
  with gr.Row(equal_height=True):
267
- # 图片上传框
268
- image_input_1 = gr.Image(label="<img><|image_1|></img>", type="filepath")
269
- image_input_2 = gr.Image(label="<img><|image_2|></img>", type="filepath")
270
- image_input_3 = gr.Image(label="<img><|image_3|></img>", type="filepath")
271
 
272
- # 高度和宽度滑块
273
  height_input = gr.Slider(
274
- label="Height", minimum=256, maximum=2048, value=1024, step=16
275
  )
276
  width_input = gr.Slider(
277
- label="Width", minimum=256, maximum=2048, value=1024, step=16
278
  )
279
 
280
- # 引导尺度输入
281
  guidance_scale_input = gr.Slider(
282
- label="Guidance Scale", minimum=1.0, maximum=5.0, value=2.5, step=0.1
283
  )
284
 
285
  img_guidance_scale_input = gr.Slider(
286
- label="img_guidance_scale", minimum=1.0, maximum=2.0, value=1.6, step=0.1
287
  )
288
 
289
  num_inference_steps = gr.Slider(
290
- label="Inference Steps", minimum=1, maximum=100, value=50, step=1
291
  )
292
 
293
  seed_input = gr.Slider(
294
- label="Seed", minimum=0, maximum=2147483647, value=42, step=1
295
  )
296
 
297
  separate_cfg_infer = gr.Checkbox(
298
- label="separate_cfg_infer", info="enable separate cfg infer"
299
  )
300
 
301
- # 生成按钮
302
- generate_button = gr.Button("Generate Image")
303
 
304
  with gr.Column():
305
- # 输出图像框
306
- output_image = gr.Image(label="Output Image")
307
 
308
- # 按钮点击事件
309
  generate_button.click(
310
  generate_image,
311
  inputs=[
@@ -324,6 +326,7 @@ with gr.Blocks() as demo:
324
  outputs=output_image,
325
  )
326
 
 
327
  gr.Examples(
328
  examples=get_example(),
329
  fn=run_for_examples,
@@ -343,5 +346,5 @@ with gr.Blocks() as demo:
343
  outputs=output_image,
344
  )
345
 
346
- # 启动应用
347
  demo.launch()
 
5
 
6
  from OmniGen import OmniGenPipeline
7
 
8
+ # OmniGenモデルの初期化
9
  pipe = OmniGenPipeline.from_pretrained(
10
  "Shitao/OmniGen-v1"
11
  )
12
 
13
  @spaces.GPU(duration=180)
14
+ # 画像生成の主要機能
15
  def generate_image(text, img1, img2, img3, height, width, guidance_scale, img_guidance_scale, inference_steps, seed, separate_cfg_infer):
16
+ # 入力画像の処理
17
  input_images = [img1, img2, img3]
18
+ # Noneの画像を除外
19
  input_images = [img for img in input_images if img is not None]
20
  if len(input_images) == 0:
21
  input_images = None
22
 
23
+ # モデルを使用して画像生成
24
  output = pipe(
25
  prompt=text,
26
  input_images=input_images,
 
29
  guidance_scale=guidance_scale,
30
  img_guidance_scale=1.6,
31
  num_inference_steps=inference_steps,
32
+ separate_cfg_infer=True, # Falseにすると推論が高速化
33
  use_kv_cache=False,
34
  seed=seed,
 
35
  )
36
  img = output[0]
37
  return img
 
 
 
 
 
 
 
 
 
 
38
 
39
+ # テストケース用のサンプルデータを取得
40
 
41
  def get_example():
42
  case = [
 
224
  ],
225
  ]
226
  return case
227
+ # サンプル実行用の関数
228
  def run_for_examples(text, img1, img2, img3, height, width, guidance_scale, img_guidance_scale, inference_steps, seed, separate_cfg_infer,):
229
  return generate_image(text, img1, img2, img3, height, width, guidance_scale, img_guidance_scale, inference_steps, seed, separate_cfg_infer,)
230
 
231
+ # アプリケーションの説明文
232
  description = """
233
+ OmniGenは、以下のような様々なタスクを実行できる統合画像生成モデルです:
234
+ - テキストから画像への生成
235
+ - 被写体主導の生成
236
+ - アイデンティティを保持した生成
237
+ - 画像条件付き生成
238
 
239
+ マルチモーダルから画像を生成する場合:
240
+ - プロンプトには文字列を入力
241
+ - 入力画像はリストとして渡す
242
+ - プロンプト内の画像プレースホルダーは `<img><|image_*|></img>` 形式で指定
243
+ (1番目の画像は <img><|image_1|></img>、2番目は <img><|image_2|></img>)
244
 
245
+ 使用上のヒント:
246
+ - 色が過飽和な場合:`guidance_scale` を下げてください
247
+ - 画質が低い場合:より詳細なプロンプトを使用してください
248
+ - アニメ調の場合:プロンプトに `photo` を追加してみてください
249
+ - 生成済み画像の編集:同じseedは使用できません(例:生成時seed=0なら、編集時はseed=1など)
250
+ - 画像編集タスクでは、画像を編集指示の前に配置することを推奨
251
+ (例:`<img><|image_1|></img> remove suit` を使用し、`remove suit <img><|image_1|></img>` は避ける)
252
  """
253
 
254
  separate_cfg_infer_arg = False
255
 
256
+ # Gradio インターフェースの構築
257
  with gr.Blocks() as demo:
258
+ gr.Markdown("# OmniGen: 統合画像生成モデル [論文](https://arxiv.org/abs/2409.11340) [コード](https://github.com/VectorSpaceLab/OmniGen)")
259
  gr.Markdown(description)
260
  with gr.Row():
261
  with gr.Column():
262
+ # プロンプト入力
263
  prompt_input = gr.Textbox(
264
+ label="プロンプトを入力してください(i番目の入力画像は<img><|image_i|></img>で指定)",
265
+ placeholder="ここにプロンプトを入力..."
266
  )
267
 
268
  with gr.Row(equal_height=True):
269
+ # 画像入力
270
+ image_input_1 = gr.Image(label="画像1: <img><|image_1|></img>", type="filepath")
271
+ image_input_2 = gr.Image(label="画像2: <img><|image_2|></img>", type="filepath")
272
+ image_input_3 = gr.Image(label="画像3: <img><|image_3|></img>", type="filepath")
273
 
274
+ # 画像サイズ設定
275
  height_input = gr.Slider(
276
+ label="画像の高さ", minimum=256, maximum=2048, value=1024, step=16
277
  )
278
  width_input = gr.Slider(
279
+ label="画像の幅", minimum=256, maximum=2048, value=1024, step=16
280
  )
281
 
282
+ # 各種パラメータ設定
283
  guidance_scale_input = gr.Slider(
284
+ label="ガイダンススケール", minimum=1.0, maximum=5.0, value=2.5, step=0.1
285
  )
286
 
287
  img_guidance_scale_input = gr.Slider(
288
+ label="画像ガイダンススケール", minimum=1.0, maximum=2.0, value=1.6, step=0.1
289
  )
290
 
291
  num_inference_steps = gr.Slider(
292
+ label="推論ステップ数", minimum=1, maximum=100, value=50, step=1
293
  )
294
 
295
  seed_input = gr.Slider(
296
+ label="シード値", minimum=0, maximum=2147483647, value=42, step=1
297
  )
298
 
299
  separate_cfg_infer = gr.Checkbox(
300
+ label="CFG推論を分離", info="分離CFG推論を有効にする"
301
  )
302
 
303
+ # 生成ボタン
304
+ generate_button = gr.Button("画像を生成")
305
 
306
  with gr.Column():
307
+ # 出力画像表示
308
+ output_image = gr.Image(label="生成された画像")
309
 
310
+ # ボタンクリックイベントの設定
311
  generate_button.click(
312
  generate_image,
313
  inputs=[
 
326
  outputs=output_image,
327
  )
328
 
329
+ # サンプル例の設定
330
  gr.Examples(
331
  examples=get_example(),
332
  fn=run_for_examples,
 
346
  outputs=output_image,
347
  )
348
 
349
+ # アプリケーションの起動
350
  demo.launch()