Spaces:

ginigen
/

FLUXllama-Multilingual

Running on Zero

App Files Files Community

ginipick commited on Dec 16, 2024

Commit

17d8233

verified ·

1 Parent(s): ef45d8e

Update app.py

Browse files

Files changed (1) hide show

app.py +62 -194

app.py CHANGED Viewed

@@ -1,5 +1,6 @@
-import os
 import spaces
 import time
 import gradio as gr
 import torch
@@ -8,9 +9,11 @@ from torchvision import transforms
 from dataclasses import dataclass
 import math
 from typing import Callable
 from tqdm import tqdm
 import bitsandbytes as bnb
 from bitsandbytes.nn.modules import Params4bit, QuantState
 import torch
 import random
 from einops import rearrange, repeat
@@ -18,8 +21,11 @@ from diffusers import AutoencoderKL
 from torch import Tensor, nn
 from transformers import CLIPTextModel, CLIPTokenizer
 from transformers import T5EncoderModel, T5Tokenizer
-from transformers import MarianMTModel, MarianTokenizer, pipeline
-from huggingface_hub import snapshot_download
 class HFEmbedder(nn.Module):
     def __init__(self, version: str, max_length: int, **hf_kwargs):
@@ -54,6 +60,7 @@ class HFEmbedder(nn.Module):
             output_hidden_states=False,
         )
         return outputs[self.output_key]
 device = "cuda"
 t5 = HFEmbedder("DeepFloyd/t5-v1_1-xxl", max_length=512, torch_dtype=torch.bfloat16).to(device)
@@ -63,6 +70,9 @@ ae = AutoencoderKL.from_pretrained("black-forest-labs/FLUX.1-dev", subfolder="va
 # freeze(t5)
 def functional_linear_4bits(x, weight, bias):
     out = bnb.matmul_4bit(x, weight.t(), bias=bias, quant_state=weight.quant_state)
     out = out.to(x)
@@ -200,6 +210,9 @@ class Linear(ForgeLoader4Bit):
 nn.Linear = Linear
 def attention(q: Tensor, k: Tensor, v: Tensor, pe: Tensor) -> Tensor:
     q, k = apply_rope(q, k, pe)
@@ -724,6 +737,8 @@ def get_image(image) -> torch.Tensor | None:
     return img[None, ...]
 from huggingface_hub import hf_hub_download
 from safetensors.torch import load_file
@@ -734,157 +749,43 @@ model = Flux().to(dtype=torch.bfloat16, device="cuda")
 result = model.load_state_dict(sd)
 model_zero_init = False
-# 언어-모델 매핑 딕셔너리 추가
-TRANSLATORS = {
-    "Korean": "Helsinki-NLP/opus-mt-ko-en",
-    "Japanese": "Helsinki-NLP/opus-mt-ja-en",
-    "Chinese": "Helsinki-NLP/opus-mt-zh-en",
-    "Russian": "Helsinki-NLP/opus-mt-ru-en",
-    "Spanish": "Helsinki-NLP/opus-mt-es-en",
-    "French": "Helsinki-NLP/opus-mt-fr-en",
-    "Arabic": "Helsinki-NLP/opus-mt-ar-en",
-    "Bengali": "Helsinki-NLP/opus-mt-bn-en",
-    "Estonian": "Helsinki-NLP/opus-mt-et-en",
-    "Polish": "Helsinki-NLP/opus-mt-pl-en",
-    "Swedish": "Helsinki-NLP/opus-mt-sv-en",
-    "Thai": "Helsinki-NLP/opus-mt-th-en",
-    "Urdu": "Helsinki-NLP/opus-mt-ur-en",
-    "Bulgarian": "Helsinki-NLP/opus-mt-bg-en",
-    "Catalan": "Helsinki-NLP/opus-mt-ca-en",
-    "Czech": "Helsinki-NLP/opus-mt-cs-en",
-    "Azerbaijani": "Helsinki-NLP/opus-mt-az-en",
-    "Basque": "Helsinki-NLP/opus-mt-bat-en",
-    "Bicolano": "Helsinki-NLP/opus-mt-bcl-en",
-    "Bemba": "Helsinki-NLP/opus-mt-bem-en",
-    "Berber": "Helsinki-NLP/opus-mt-ber-en",
-    "Bislama": "Helsinki-NLP/opus-mt-bi-en",
-    "Bantu": "Helsinki-NLP/opus-mt-bnt-en",
-    "Brazilian Sign Language": "Helsinki-NLP/opus-mt-bzs-en",
-    "Caucasian": "Helsinki-NLP/opus-mt-cau-en",
-    "Cebuano": "Helsinki-NLP/opus-mt-ceb-en",
-    "Celtic": "Helsinki-NLP/opus-mt-cel-en",
-    "Chuukese": "Helsinki-NLP/opus-mt-chk-en",
-    "Creoles and pidgins (French)": "Helsinki-NLP/opus-mt-cpf-en",
-    "Seychelles Creole": "Helsinki-NLP/opus-mt-crs-en",
-    "American Sign Language": "Helsinki-NLP/opus-mt-ase-en",
-    "Artificial Language": "Helsinki-NLP/opus-mt-art-en",
-    "Atlantic-Congo": "Helsinki-NLP/opus-mt-alv-en",
-    "Afroasiatic": "Helsinki-NLP/opus-mt-afa-en",
-    "Afrikaans": "Helsinki-NLP/opus-mt-af-en",
-    "Austroasiatic": "Helsinki-NLP/opus-mt-aav-en"
-}
-translators_cache = {}
-# 모델 캐시 디렉토리 설정
-os.environ['TRANSFORMERS_CACHE'] = '/tmp/transformers_cache'
-def download_model(model_name):
-    """모델을 미리 다운로드"""
-    try:
-        cache_dir = os.path.join('/tmp/transformers_cache', model_name.split('/')[-1])
-        snapshot_download(
-            repo_id=model_name,
-            cache_dir=cache_dir,
-            local_files_only=False
-        )
-        return cache_dir
-    except Exception as e:
-        print(f"Error downloading model {model_name}: {e}")
-        return None
-def get_translator(lang):
-    """번역기 초기화 및 반환"""
-    if lang == "English":
-        return None
-    if lang not in translators_cache:
-        try:
-            model_name = TRANSLATORS[lang]
-            # pipeline 사용 대신 직접 모델 로드
-            tokenizer = MarianTokenizer.from_pretrained(model_name)
-            model = MarianMTModel.from_pretrained(model_name)
-            # CPU에서 실행
-            model = model.to("cpu").eval()
-            translators_cache[lang] = {
-                "model": model,
-                "tokenizer": tokenizer
-            }
-            print(f"Successfully loaded translator for {lang}")
-        except Exception as e:
-            print(f"Error loading translator for {lang}: {e}")
-            return None
-    return translators_cache[lang]
-def translate_text(text, translator_info):
-    """번역 수행"""
-    if translator_info is None:
-        return text
-    try:
-        tokenizer = translator_info["tokenizer"]
-        model = translator_info["model"]
-        # 입력 텍스트 전처리
-        inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
-        # 번역 수행
-        with torch.no_grad():
-            outputs = model.generate(**inputs)
-        # 번역 결과 디코딩
-        translated = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        print(f"Original text: {text}")
-        print(f"Translated text: {translated}")
-        return translated
-    except Exception as e:
-        print(f"Translation error: {e}")
-        return text
 @spaces.GPU
 @torch.no_grad()
 def generate_image(
-    prompt, source_lang, width, height, guidance, inference_steps, seed,
     do_img2img, init_image, image2image_strength, resize_img,
     progress=gr.Progress(track_tqdm=True),
 ):
-    # 번역 처리
-    try:
-        if source_lang != "English":
-            translator_info = get_translator(source_lang)
-            if translator_info is not None:
-                translated_prompt = translate_text(prompt, translator_info)
-                print(f"Using translated prompt: {translated_prompt}")
-            else:
-                print(f"No translator available for {source_lang}, using original prompt")
-                translated_prompt = prompt
-        else:
-            translated_prompt = prompt
-    except Exception as e:
-        print(f"Translation failed: {e}")
-        translated_prompt = prompt
     if seed == 0:
         seed = int(random.random() * 1000000)
     device = "cuda" if torch.cuda.is_available() else "cpu"
     torch_device = torch.device(device)
     global model, model_zero_init
     if not model_zero_init:
@@ -901,11 +802,10 @@ def generate_image(
             height = init_image.shape[-2]
             width = init_image.shape[-1]
         init_image = ae.encode(init_image.to(torch_device).to(torch.bfloat16)).latent_dist.sample()
-        init_image = (init_image - ae.config.shift_factor) * ae.config.scaling_factor
     generator = torch.Generator(device=device).manual_seed(seed)
-    x = torch.randn(1, 16, 2 * math.ceil(height / 16), 2 * math.ceil(width / 16),
-                   device=device, dtype=torch.bfloat16, generator=generator)
     num_steps = inference_steps
     timesteps = get_schedule(num_steps, (x.shape[-1] * x.shape[-2]) // 4, shift=True)
@@ -916,18 +816,22 @@ def generate_image(
         timesteps = timesteps[t_idx:]
         x = t * x + (1.0 - t) * init_image.to(x.dtype)
-    inp = prepare(t5=t5, clip=clip, img=x, prompt=translated_prompt)
     x = denoise(model, **inp, timesteps=timesteps, guidance=guidance)
     x = unpack(x.float(), height, width)
     with torch.autocast(device_type=torch_device.type, dtype=torch.bfloat16):
-        x = (x / ae.config.scaling_factor) + ae.config.shift_factor
         x = ae.decode(x).sample
     x = x.clamp(-1, 1)
     x = rearrange(x[0], "c h w -> h w c")
     img = Image.fromarray((127.5 * (x + 1.0)).cpu().byte().numpy())
     return img, seed, translated_prompt
 css = """
@@ -936,21 +840,14 @@ footer {
 }
 """
 def create_demo():
     with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css) as demo:
         with gr.Row():
             with gr.Column():
-                source_lang = gr.Dropdown(
-                    choices=["English"] + sorted(list(TRANSLATORS.keys())),
-                    value="English",
-                    label="Source Language"
-                )
-                prompt = gr.Textbox(
-                    label="Prompt",
-                    value="A beautiful landscape"
-                )
                 width = gr.Slider(minimum=128, maximum=2048, step=64, label="Width", value=768)
                 height = gr.Slider(minimum=128, maximum=2048, step=64, label="Height", value=768)
                 guidance = gr.Slider(minimum=1.0, maximum=5.0, step=0.1, label="Guidance", value=3.5)
@@ -964,44 +861,13 @@ def create_demo():
                 seed = gr.Number(label="Seed", precision=-1)
                 do_img2img = gr.Checkbox(label="Image to Image", value=False)
                 init_image = gr.Image(label="Input Image", visible=False)
-                image2image_strength = gr.Slider(
-                    minimum=0.0, maximum=1.0, step=0.01,
-                    label="Noising strength", value=0.8, visible=False
-                )
                 resize_img = gr.Checkbox(label="Resize image", value=True, visible=False)
                 generate_button = gr.Button("Generate")
             with gr.Column():
                 output_image = gr.Image(label="Generated Image")
                 output_seed = gr.Text(label="Used Seed")
-                translated_prompt = gr.Text(label="Translated Prompt")
-        # 다국어 예제
-        examples = [
-            # English
-            ["A beautiful sunset over mountains", "English", 768, 768, 3.5, 30, 0, False, None, 0.8, True],
-            # Korean
-            ["벚꽃이 흩날리는 서울의 봄 풍경", "Korean", 768, 768, 3.5, 30, 0, False, None, 0.8, True],
-            # Japanese
-            ["富士山と桜の美しい風景", "Japanese", 768, 768, 3.5, 30, 0, False, None, 0.8, True],
-            # Chinese
-            ["长城日落的壮丽景色", "Chinese", 768, 768, 3.5, 30, 0, False, None, 0.8, True],
-            # Spanish
-            ["Un hermoso atardecer en la playa", "Spanish", 768, 768, 3.5, 30, 0, False, None, 0.8, True]
-        ]
-        gr.Examples(
-            examples=examples,
-            inputs=[
-                prompt, source_lang, width, height, guidance, inference_steps,
-                seed, do_img2img, init_image, image2image_strength, resize_img
-            ],
-            outputs=[output_image, output_seed, translated_prompt],
-            fn=generate_image,
-            cache_examples=True
-        )
         do_img2img.change(
             fn=lambda x: [gr.update(visible=x), gr.update(visible=x), gr.update(visible=x)],
@@ -1011,16 +877,18 @@ def create_demo():
         generate_button.click(
             fn=generate_image,
-            inputs=[
-                prompt, source_lang, width, height, guidance, inference_steps,
-                seed, do_img2img, init_image, image2image_strength, resize_img
-            ],
-            outputs=[output_image, output_seed, translated_prompt]
         )
     return demo
 if __name__ == "__main__":
-    print("Starting demo...")
     demo = create_demo()
-    demo.launch(share=True)

+# import os
 import spaces
 import time
 import gradio as gr
 import torch
 from dataclasses import dataclass
 import math
 from typing import Callable
 from tqdm import tqdm
 import bitsandbytes as bnb
 from bitsandbytes.nn.modules import Params4bit, QuantState
 import torch
 import random
 from einops import rearrange, repeat
 from torch import Tensor, nn
 from transformers import CLIPTextModel, CLIPTokenizer
 from transformers import T5EncoderModel, T5Tokenizer
+# from optimum.quanto import freeze, qfloat8, quantize
+from transformers import pipeline
+ko_translator = pipeline("translation", model="Helsinki-NLP/opus-mt-ko-en")
+ja_translator = pipeline("translation", model="Helsinki-NLP/opus-mt-ja-en")
 class HFEmbedder(nn.Module):
     def __init__(self, version: str, max_length: int, **hf_kwargs):
             output_hidden_states=False,
         )
         return outputs[self.output_key]
 device = "cuda"
 t5 = HFEmbedder("DeepFloyd/t5-v1_1-xxl", max_length=512, torch_dtype=torch.bfloat16).to(device)
 # freeze(t5)
+# ---------------- NF4 ----------------
 def functional_linear_4bits(x, weight, bias):
     out = bnb.matmul_4bit(x, weight.t(), bias=bias, quant_state=weight.quant_state)
     out = out.to(x)
 nn.Linear = Linear
+# ---------------- Model ----------------
 def attention(q: Tensor, k: Tensor, v: Tensor, pe: Tensor) -> Tensor:
     q, k = apply_rope(q, k, pe)
     return img[None, ...]
+# ---------------- Demo ----------------
 from huggingface_hub import hf_hub_download
 from safetensors.torch import load_file
 result = model.load_state_dict(sd)
 model_zero_init = False
+# model = Flux().to(dtype=torch.bfloat16, device="cuda")
+# result = model.load_state_dict(load_file("/storage/dev/nyanko/flux-dev/flux1-dev.sft"))
 @spaces.GPU
 @torch.no_grad()
 def generate_image(
+    prompt, width, height, guidance, inference_steps, seed,
     do_img2img, init_image, image2image_strength, resize_img,
     progress=gr.Progress(track_tqdm=True),
 ):
+    translated_prompt = prompt
+    # 한글 또는 일본어 문자 감지
+    def contains_korean(text):
+        return any('\u3131' <= c <= '\u318E' or '\uAC00' <= c <= '\uD7A3' for c in text)
+    def contains_japanese(text):
+        return any('\u3040' <= c <= '\u309F' or '\u30A0' <= c <= '\u30FF' or '\u4E00' <= c <= '\u9FFF' for c in text)
+    # 한글이나 일본어가 있으면 번역
+    if contains_korean(prompt):
+        translated_prompt = ko_translator(prompt, max_length=512)[0]['translation_text']
+        print(f"Translated Korean prompt: {translated_prompt}")
+        prompt = translated_prompt
+    elif contains_japanese(prompt):
+        translated_prompt = ja_translator(prompt, max_length=512)[0]['translation_text']
+        print(f"Translated Japanese prompt: {translated_prompt}")
+        prompt = translated_prompt
     if seed == 0:
         seed = int(random.random() * 1000000)
     device = "cuda" if torch.cuda.is_available() else "cpu"
     torch_device = torch.device(device)
     global model, model_zero_init
     if not model_zero_init:
             height = init_image.shape[-2]
             width = init_image.shape[-1]
         init_image = ae.encode(init_image.to(torch_device).to(torch.bfloat16)).latent_dist.sample()
+        init_image =  (init_image - ae.config.shift_factor) * ae.config.scaling_factor
     generator = torch.Generator(device=device).manual_seed(seed)
+    x = torch.randn(1, 16, 2 * math.ceil(height / 16), 2 * math.ceil(width / 16), device=device, dtype=torch.bfloat16, generator=generator)
     num_steps = inference_steps
     timesteps = get_schedule(num_steps, (x.shape[-1] * x.shape[-2]) // 4, shift=True)
         timesteps = timesteps[t_idx:]
         x = t * x + (1.0 - t) * init_image.to(x.dtype)
+    inp = prepare(t5=t5, clip=clip, img=x, prompt=prompt)
     x = denoise(model, **inp, timesteps=timesteps, guidance=guidance)
+    # with profile(activities=[ProfilerActivity.CPU],record_shapes=True,profile_memory=True) as prof:
+    # print(prof.key_averages().table(sort_by="cpu_time_total", row_limit=20))
     x = unpack(x.float(), height, width)
     with torch.autocast(device_type=torch_device.type, dtype=torch.bfloat16):
+        x = x = (x / ae.config.scaling_factor) + ae.config.shift_factor
         x = ae.decode(x).sample
     x = x.clamp(-1, 1)
     x = rearrange(x[0], "c h w -> h w c")
     img = Image.fromarray((127.5 * (x + 1.0)).cpu().byte().numpy())
     return img, seed, translated_prompt
 css = """
 }
 """
 def create_demo():
     with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css) as demo:
         with gr.Row():
             with gr.Column():
+                prompt = gr.Textbox(label="Prompt(한글 가능)", value="A cute and fluffy golden retriever puppy sitting upright, holding a neatly designed white sign with bold, colorful lettering that reads 'Have a Happy Day!' in cheerful fonts. The puppy has expressive, sparkling eyes, a happy smile, and fluffy ears slightly flopped. The background is a vibrant and sunny meadow with soft-focus flowers, glowing sunlight filtering through the trees, and a warm golden glow that enhances the joyful atmosphere. The sign is framed with small decorative flowers, adding a charming and wholesome touch. Ensure the text on the sign is clear and legible.")
                 width = gr.Slider(minimum=128, maximum=2048, step=64, label="Width", value=768)
                 height = gr.Slider(minimum=128, maximum=2048, step=64, label="Height", value=768)
                 guidance = gr.Slider(minimum=1.0, maximum=5.0, step=0.1, label="Guidance", value=3.5)
                 seed = gr.Number(label="Seed", precision=-1)
                 do_img2img = gr.Checkbox(label="Image to Image", value=False)
                 init_image = gr.Image(label="Input Image", visible=False)
+                image2image_strength = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label="Noising strength", value=0.8, visible=False)
                 resize_img = gr.Checkbox(label="Resize image", value=True, visible=False)
                 generate_button = gr.Button("Generate")
             with gr.Column():
                 output_image = gr.Image(label="Generated Image")
                 output_seed = gr.Text(label="Used Seed")
         do_img2img.change(
             fn=lambda x: [gr.update(visible=x), gr.update(visible=x), gr.update(visible=x)],
         generate_button.click(
             fn=generate_image,
+            inputs=[prompt, width, height, guidance, inference_steps, seed, do_img2img, init_image, image2image_strength, resize_img],
+            outputs=[output_image, output_seed]
         )
+        examples = [
+            "a tiny astronaut hatching from an egg on the moon",
+            "a cat holding a sign that says hello world",
+            "an anime illustration of a wiener schnitzel",
+        ]
     return demo
 if __name__ == "__main__":
     demo = create_demo()
+    demo.launch()