Spaces:
Running
on
Zero
Running
on
Zero
File size: 11,885 Bytes
f8a748e 7f48662 1876385 f8a748e 7f48662 f8a748e 63ee519 200a130 c38e273 200a130 f8a748e ef2764c 63ee519 5ed5dc0 63ee519 7f48662 63ee519 7f48662 f8a748e 63ee519 7f48662 f8a748e 7f48662 200a130 7f48662 200a130 63ee519 200a130 44bc074 7f48662 200a130 63ee519 200a130 c38e273 730f5fd c38e273 4004f94 730f5fd c38e273 4004f94 0d3229d c38e273 200a130 730f5fd 89a8b3c 200a130 4004f94 730f5fd eb7f8ad 730f5fd 0d3229d 4004f94 730f5fd 4004f94 0d3229d 200a130 730f5fd 0d3229d 730f5fd 0d3229d 730f5fd 4004f94 730f5fd 0d3229d 730f5fd 4004f94 0d3229d 4004f94 730f5fd 4004f94 0d3229d 4004f94 730f5fd 4004f94 0d3229d 4004f94 200a130 4004f94 730f5fd eb7f8ad 44bc074 0d3229d 200a130 4004f94 730f5fd 4004f94 0d3229d 4004f94 730f5fd 0d3229d 730f5fd 4004f94 730f5fd 4004f94 0d3229d 4004f94 687aaef 730f5fd 687aaef 730f5fd 687aaef 730f5fd 0d3229d 687aaef 200a130 63ee519 5ed5dc0 200a130 63ee519 4004f94 63ee519 4004f94 63ee519 730f5fd 63ee519 4004f94 7f48662 5ed5dc0 63ee519 7f48662 63ee519 4004f94 7f48662 63ee519 200a130 63ee519 200a130 63ee519 200a130 63ee519 200a130 63ee519 200a130 63ee519 200a130 63ee519 200a130 63ee519 730f5fd 63ee519 200a130 63ee519 200a130 44bc074 63ee519 44bc074 5ed5dc0 63ee519 5ed5dc0 63ee519 200a130 7f48662 63ee519 7f48662 63ee519 7f48662 200a130 730f5fd 200a130 44bc074 5ed5dc0 200a130 63ee519 200a130 730f5fd 200a130 44bc074 5ed5dc0 200a130 f8a748e 63ee519 7f48662 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 |
import gradio as gr
from PIL import Image
import os
import spaces
from OmniGen import OmniGenPipeline
# OmniGenモデルの初期化
pipe = OmniGenPipeline.from_pretrained(
"Shitao/OmniGen-v1"
)
@spaces.GPU(duration=180)
# 画像生成の主要機能
def generate_image(text, img1, img2, img3, height, width, guidance_scale, img_guidance_scale, inference_steps, seed, separate_cfg_infer):
# 入力画像の処理
input_images = [img1, img2, img3]
# Noneの画像を除外
input_images = [img for img in input_images if img is not None]
if len(input_images) == 0:
input_images = None
# モデルを使用して画像生成
output = pipe(
prompt=text,
input_images=input_images,
height=height,
width=width,
guidance_scale=guidance_scale,
img_guidance_scale=1.6,
num_inference_steps=inference_steps,
separate_cfg_infer=True, # Falseにすると推論が高速化
use_kv_cache=False,
seed=seed,
)
img = output[0]
return img
# テストケース用のサンプルデータを取得
def get_example():
case = [
[
"A curly-haired man in a red shirt is drinking tea.",
None,
None,
None,
1024,
1024,
2.5,
1.6,
50,
0,
True,
],
[
"The woman in <img><|image_1|></img> waves her hand happily in the crowd",
"./imgs/test_cases/zhang.png",
None,
None,
1024,
1024,
2.5,
1.9,
50,
128,
True,
],
[
"A man in a black shirt is reading a book. The man is the right man in <img><|image_1|></img>.",
"./imgs/test_cases/two_man.jpg",
None,
None,
1024,
1024,
2.5,
1.6,
50,
0,
True,
],
[
"Two woman are raising fried chicken legs in a bar. A woman is <img><|image_1|></img>. The other woman is <img><|image_2|></img>.",
"./imgs/test_cases/mckenna.jpg",
"./imgs/test_cases/Amanda.jpg",
None,
1024,
1024,
2.5,
1.8,
50,
168,
True,
],
[
"A man and a short-haired woman with a wrinkled face are standing in front of a bookshelf in a library. The man is the man in the middle of <img><|image_1|></img>, and the woman is oldest woman in <img><|image_2|></img>",
"./imgs/test_cases/1.jpg",
"./imgs/test_cases/2.jpg",
None,
1024,
1024,
2.5,
1.6,
50,
60,
True,
],
[
"A man and a woman are sitting at a classroom desk. The man is the man with yellow hair in <img><|image_1|></img>. The woman is the woman on the left of <img><|image_2|></img>",
"./imgs/test_cases/3.jpg",
"./imgs/test_cases/4.jpg",
None,
1024,
1024,
2.5,
1.8,
50,
66,
True,
],
[
"The flower <img><|image_1|><\/img> is placed in the vase which is in the middle of <img><|image_2|><\/img> on a wooden table of a living room",
"./imgs/test_cases/rose.jpg",
"./imgs/test_cases/vase.jpg",
None,
1024,
1024,
2.5,
1.6,
50,
0,
True,
],
[
"<img><|image_1|><img>\n Remove the woman's earrings. Replace the mug with a clear glass filled with sparkling iced cola.",
"./imgs/demo_cases/t2i_woman_with_book.png",
None,
None,
1024,
1024,
2.5,
1.6,
50,
222,
True,
],
[
"Detect the skeleton of human in this image: <img><|image_1|></img>.",
"./imgs/test_cases/control.jpg",
None,
None,
1024,
1024,
2.0,
1.6,
50,
0,
True,
],
[
"Generate a new photo using the following picture and text as conditions: <img><|image_1|><img>\n A young boy is sitting on a sofa in the library, holding a book. His hair is neatly combed, and a faint smile plays on his lips, with a few freckles scattered across his cheeks. The library is quiet, with rows of shelves filled with books stretching out behind him.",
"./imgs/demo_cases/skeletal.png",
None,
None,
1024,
1024,
2,
1.6,
50,
42,
True,
],
[
"Following the pose of this image <img><|image_1|><img>, generate a new photo: A young boy is sitting on a sofa in the library, holding a book. His hair is neatly combed, and a faint smile plays on his lips, with a few freckles scattered across his cheeks. The library is quiet, with rows of shelves filled with books stretching out behind him.",
"./imgs/demo_cases/edit.png",
None,
None,
1024,
1024,
2.0,
1.6,
50,
123,
True,
],
[
"Following the depth mapping of this image <img><|image_1|><img>, generate a new photo: A young girl is sitting on a sofa in the library, holding a book. His hair is neatly combed, and a faint smile plays on his lips, with a few freckles scattered across his cheeks. The library is quiet, with rows of shelves filled with books stretching out behind him.",
"./imgs/demo_cases/edit.png",
None,
None,
1024,
1024,
2.0,
1.6,
50,
1,
True,
],
[
"<img><|image_1|><\/img> What item can be used to see the current time? Please remove it.",
"./imgs/test_cases/watch.jpg",
None,
None,
1024,
1024,
2.5,
1.6,
50,
0,
True,
],
[
"According to the following examples, generate an output for the input.\nInput: <img><|image_1|></img>\nOutput: <img><|image_2|></img>\n\nInput: <img><|image_3|></img>\nOutput: ",
"./imgs/test_cases/icl1.jpg",
"./imgs/test_cases/icl2.jpg",
"./imgs/test_cases/icl3.jpg",
1024,
1024,
2.5,
1.6,
50,
1,
True,
],
]
return case
# サンプル実行用の関数
def run_for_examples(text, img1, img2, img3, height, width, guidance_scale, img_guidance_scale, inference_steps, seed, separate_cfg_infer,):
return generate_image(text, img1, img2, img3, height, width, guidance_scale, img_guidance_scale, inference_steps, seed, separate_cfg_infer,)
# アプリケーションの説明文
description = """
OmniGenは、以下のような様々なタスクを実行できる統合画像生成モデルです:
- テキストから画像への生成
- 被写体主導の生成
- アイデンティティを保持した生成
- 画像条件付き生成
マルチモーダルから画像を生成する場合:
- プロンプトには文字列を入力
- 入力画像はリストとして渡す
- プロンプト内の画像プレースホルダーは `<img><|image_*|></img>` 形式で指定
(1番目の画像は <img><|image_1|></img>、2番目は <img><|image_2|></img>)
使用上のヒント:
- 色が過飽和な場合:`guidance_scale` を下げてください
- 画質が低い場合:より詳細なプロンプトを使用してください
- アニメ調の場合:プロンプトに `photo` を追加してみてください
- 生成済み画像の編集:同じseedは使用できません(例:生成時seed=0なら、編集時はseed=1など)
- 画像編集タスクでは、画像を編集指示の前に配置することを推奨
(例:`<img><|image_1|></img> remove suit` を使用し、`remove suit <img><|image_1|></img>` は避ける)
"""
separate_cfg_infer_arg = False
# Gradio インターフェースの構築
with gr.Blocks() as demo:
gr.Markdown("# OmniGen: 統合画像生成モデル [論文](https://arxiv.org/abs/2409.11340) [コード](https://github.com/VectorSpaceLab/OmniGen)")
gr.Markdown(description)
with gr.Row():
with gr.Column():
# プロンプト入力
prompt_input = gr.Textbox(
label="プロンプトを入力してください(i番目の入力画像は<img><|image_i|></img>で指定)",
placeholder="ここにプロンプトを入力..."
)
with gr.Row(equal_height=True):
# 画像入力
image_input_1 = gr.Image(label="画像1: <img><|image_1|></img>", type="filepath")
image_input_2 = gr.Image(label="画像2: <img><|image_2|></img>", type="filepath")
image_input_3 = gr.Image(label="画像3: <img><|image_3|></img>", type="filepath")
# 画像サイズ設定
height_input = gr.Slider(
label="画像の高さ", minimum=256, maximum=2048, value=1024, step=16
)
width_input = gr.Slider(
label="画像の幅", minimum=256, maximum=2048, value=1024, step=16
)
# 各種パラメータ設定
guidance_scale_input = gr.Slider(
label="ガイダンススケール", minimum=1.0, maximum=5.0, value=2.5, step=0.1
)
img_guidance_scale_input = gr.Slider(
label="画像ガイダンススケール", minimum=1.0, maximum=2.0, value=1.6, step=0.1
)
num_inference_steps = gr.Slider(
label="推論ステップ数", minimum=1, maximum=100, value=50, step=1
)
seed_input = gr.Slider(
label="シード値", minimum=0, maximum=2147483647, value=42, step=1
)
separate_cfg_infer = gr.Checkbox(
label="CFG推論を分離", info="分離CFG推論を有効にする"
)
# 生成ボタン
generate_button = gr.Button("画像を生成")
with gr.Column():
# 出力画像表示
output_image = gr.Image(label="生成された画像")
# ボタンクリックイベントの設定
generate_button.click(
generate_image,
inputs=[
prompt_input,
image_input_1,
image_input_2,
image_input_3,
height_input,
width_input,
guidance_scale_input,
img_guidance_scale_input,
num_inference_steps,
seed_input,
separate_cfg_infer,
],
outputs=output_image,
)
# サンプル例の設定
gr.Examples(
examples=get_example(),
fn=run_for_examples,
inputs=[
prompt_input,
image_input_1,
image_input_2,
image_input_3,
height_input,
width_input,
guidance_scale_input,
img_guidance_scale_input,
num_inference_steps,
seed_input,
separate_cfg_infer,
],
outputs=output_image,
)
# アプリケーションの起動
demo.launch() |