OmniGen-JP

Running on Zero

App Files Files Community

MakiAi commited on Oct 27, 2024

Commit

63ee519

verified ·

1 Parent(s): 0d3229d

Update app.py

Browse files

Files changed (1) hide show

app.py +51 -48

app.py CHANGED Viewed

@@ -5,19 +5,22 @@ import spaces
 from OmniGen import OmniGenPipeline
 pipe = OmniGenPipeline.from_pretrained(
     "Shitao/OmniGen-v1"
 )
 @spaces.GPU(duration=180)
-# 示例处理函数：生成图像
 def generate_image(text, img1, img2, img3, height, width, guidance_scale, img_guidance_scale, inference_steps, seed, separate_cfg_infer):
     input_images = [img1, img2, img3]
-    # 去除 None
     input_images = [img for img in input_images if img is not None]
     if len(input_images) == 0:
         input_images = None
     output = pipe(
         prompt=text,
         input_images=input_images,
@@ -26,24 +29,14 @@ def generate_image(text, img1, img2, img3, height, width, guidance_scale, img_gu
         guidance_scale=guidance_scale,
         img_guidance_scale=1.6,
         num_inference_steps=inference_steps,
-        separate_cfg_infer=True, # set False can speed up the inference process
         use_kv_cache=False,
         seed=seed,
-        # separate_cfg_infer=separate_cfg_infer,
     )
     img = output[0]
     return img
-# def generate_image(text, img1, img2, img3, height, width, guidance_scale, inference_steps):
-#     input_images = []
-#     if img1:
-#         input_images.append(Image.open(img1))
-#     if img2:
-#         input_images.append(Image.open(img2))
-#     if img3:
-#         input_images.append(Image.open(img3))
-#     return input_images[0] if input_images else None
 def get_example():
     case = [
@@ -231,81 +224,90 @@ def get_example():
         ],
     ]
     return case
 def run_for_examples(text, img1, img2, img3, height, width, guidance_scale, img_guidance_scale, inference_steps, seed, separate_cfg_infer,):
     return generate_image(text, img1, img2, img3, height, width, guidance_scale, img_guidance_scale, inference_steps, seed, separate_cfg_infer,)
 description = """
-OmniGen is a unified image generation model that you can use to perform various tasks, including but not limited to text-to-image generation, subject-driven generation, Identity-Preserving Generation, and image-conditioned generation.
-For multi-modal to image generation, you should pass a string as `prompt`, and a list of image paths as `input_images`. The placeholder in the prompt should be in the format of `<img><|image_*|></img>` (for the first image, the placeholder is <img><|image_1|></img>. for the second image, the the placeholder is <img><|image_2|></img>).
-For example, use an image of a woman to generate a new image:
-prompt = "A woman holds a bouquet of flowers and faces the camera. Thw woman is \<img\>\<|image_1|\>\</img\>."
-Tips:
-- Oversaturated: If the image appears oversaturated, please reduce the `guidance_scale`.
-- Low-quality: More detailed prompt will lead to better results.
-- Animate Style: If the genereate images is in animate style, you can try to add `photo` to the prompt`.
-- Edit generated image. If you generate a image by omnigen and then want to edit it, you cannot use the same seed to edit this image. For example, use seed=0 to generate image, and should use seed=1 to edit this image.
-- For image editing tasks, we recommend placing the image before the editing instruction. For example, use `<img><|image_1|></img> remove suit`, rather than `remove suit <img><|image_1|></img>`.
 """
 separate_cfg_infer_arg = False
-# Gradio 接口
 with gr.Blocks() as demo:
-    gr.Markdown("# OmniGen: Unified Image Generation [paper](https://arxiv.org/abs/2409.11340) [code](https://github.com/VectorSpaceLab/OmniGen)")
     gr.Markdown(description)
     with gr.Row():
         with gr.Column():
-            # 文本输入框
             prompt_input = gr.Textbox(
-                label="Enter your prompt, use <img><|image_i|></img> to represent i-th input image", placeholder="Type your prompt here..."
             )
             with gr.Row(equal_height=True):
-                # 图片上传框
-                image_input_1 = gr.Image(label="<img><|image_1|></img>", type="filepath")
-                image_input_2 = gr.Image(label="<img><|image_2|></img>", type="filepath")
-                image_input_3 = gr.Image(label="<img><|image_3|></img>", type="filepath")
-            # 高度和宽度滑块
             height_input = gr.Slider(
-                label="Height", minimum=256, maximum=2048, value=1024, step=16
             )
             width_input = gr.Slider(
-                label="Width", minimum=256, maximum=2048, value=1024, step=16
             )
-            # 引导尺度输入
             guidance_scale_input = gr.Slider(
-                label="Guidance Scale", minimum=1.0, maximum=5.0, value=2.5, step=0.1
             )
             img_guidance_scale_input = gr.Slider(
-                label="img_guidance_scale", minimum=1.0, maximum=2.0, value=1.6, step=0.1
             )
             num_inference_steps = gr.Slider(
-                label="Inference Steps", minimum=1, maximum=100, value=50, step=1
             )
             seed_input = gr.Slider(
-                label="Seed", minimum=0, maximum=2147483647, value=42, step=1
             )
             separate_cfg_infer = gr.Checkbox(
-                label="separate_cfg_infer", info="enable separate cfg infer"
             )
-            # 生成按钮
-            generate_button = gr.Button("Generate Image")
         with gr.Column():
-            # 输出图像框
-            output_image = gr.Image(label="Output Image")
-    # 按钮点击事件
     generate_button.click(
         generate_image,
         inputs=[
@@ -324,6 +326,7 @@ with gr.Blocks() as demo:
         outputs=output_image,
     )
     gr.Examples(
         examples=get_example(),
         fn=run_for_examples,
@@ -343,5 +346,5 @@ with gr.Blocks() as demo:
         outputs=output_image,
     )
-# 启动应用
 demo.launch()

 from OmniGen import OmniGenPipeline
+# OmniGenモデルの初期化
 pipe = OmniGenPipeline.from_pretrained(
     "Shitao/OmniGen-v1"
 )
 @spaces.GPU(duration=180)
+# 画像生成の主要機能
 def generate_image(text, img1, img2, img3, height, width, guidance_scale, img_guidance_scale, inference_steps, seed, separate_cfg_infer):
+    # 入力画像の処理
     input_images = [img1, img2, img3]
+    # Noneの画像を除外
     input_images = [img for img in input_images if img is not None]
     if len(input_images) == 0:
         input_images = None
+    # モデルを使用して画像生成
     output = pipe(
         prompt=text,
         input_images=input_images,
         guidance_scale=guidance_scale,
         img_guidance_scale=1.6,
         num_inference_steps=inference_steps,
+        separate_cfg_infer=True, # Falseにすると推論が高速化
         use_kv_cache=False,
         seed=seed,
     )
     img = output[0]
     return img
+# テストケース用のサンプルデータを取得
 def get_example():
     case = [
         ],
     ]
     return case
+# サンプル実行用の関数
 def run_for_examples(text, img1, img2, img3, height, width, guidance_scale, img_guidance_scale, inference_steps, seed, separate_cfg_infer,):
     return generate_image(text, img1, img2, img3, height, width, guidance_scale, img_guidance_scale, inference_steps, seed, separate_cfg_infer,)
+# アプリケーションの説明文
 description = """
+OmniGenは、以下のような様々なタスクを実行できる統合画像生成モデルです：
+- テキストから画像への生成
+- 被写体主導の生成
+- アイデンティティを保持した生成
+- 画像条件付き生成
+マルチモーダルから画像を生成する場合：
+- プロンプトには文字列を入力
+- 入力画像はリストとして渡す
+- プロンプト内の画像プレースホルダーは `<img><|image_*|></img>` 形式で指定
+  (1番目の画像は <img><|image_1|></img>、2番目は <img><|image_2|></img>)
+使用上のヒント：
+- 色が過飽和な場合：`guidance_scale` を下げてください
+- 画質が低い場合：より詳細なプロンプトを使用してください
+- アニメ調の場合：プロンプトに `photo` を追加してみてください
+- 生成済み画像の編集：同じseedは使用できません（例：生成時seed=0なら、編集時はseed=1など）
+- 画像編集タスクでは、画像を編集指示の前に配置することを推奨
+  （例：`<img><|image_1|></img> remove suit` を使用し、`remove suit <img><|image_1|></img>` は避ける）
 """
 separate_cfg_infer_arg = False
+# Gradio インターフェースの構築
 with gr.Blocks() as demo:
+    gr.Markdown("# OmniGen: 統合画像生成モデル [論文](https://arxiv.org/abs/2409.11340) [コード](https://github.com/VectorSpaceLab/OmniGen)")
     gr.Markdown(description)
     with gr.Row():
         with gr.Column():
+            # プロンプト入力
             prompt_input = gr.Textbox(
+                label="プロンプトを入力してください（i番目の入力画像は<img><|image_i|></img>で指定）",
+                placeholder="ここにプロンプトを入力..."
             )
             with gr.Row(equal_height=True):
+                # 画像入力
+                image_input_1 = gr.Image(label="画像1: <img><|image_1|></img>", type="filepath")
+                image_input_2 = gr.Image(label="画像2: <img><|image_2|></img>", type="filepath")
+                image_input_3 = gr.Image(label="画像3: <img><|image_3|></img>", type="filepath")
+            # 画像サイズ設定
             height_input = gr.Slider(
+                label="画像の高さ", minimum=256, maximum=2048, value=1024, step=16
             )
             width_input = gr.Slider(
+                label="画像の幅", minimum=256, maximum=2048, value=1024, step=16
             )
+            # 各種パラメータ設定
             guidance_scale_input = gr.Slider(
+                label="ガイダンススケール", minimum=1.0, maximum=5.0, value=2.5, step=0.1
             )
             img_guidance_scale_input = gr.Slider(
+                label="画像ガイダンススケール", minimum=1.0, maximum=2.0, value=1.6, step=0.1
             )
             num_inference_steps = gr.Slider(
+                label="推論ステップ数", minimum=1, maximum=100, value=50, step=1
             )
             seed_input = gr.Slider(
+                label="シード値", minimum=0, maximum=2147483647, value=42, step=1
             )
             separate_cfg_infer = gr.Checkbox(
+                label="CFG推論を分離", info="分離CFG推論を有効にする"
             )
+            # 生成ボタン
+            generate_button = gr.Button("画像を生成")
         with gr.Column():
+            # 出力画像表示
+            output_image = gr.Image(label="生成された画像")
+    # ボタンクリックイベントの設定
     generate_button.click(
         generate_image,
         inputs=[
         outputs=output_image,
     )
+    # サンプル例の設定
     gr.Examples(
         examples=get_example(),
         fn=run_for_examples,
         outputs=output_image,
     )
+# アプリケーションの起動
 demo.launch()