aiqtech commited on
Commit
868eab9
โ€ข
1 Parent(s): 0ab2174

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +108 -55
app.py CHANGED
@@ -23,30 +23,45 @@ os.makedirs(TMP_DIR, exist_ok=True)
23
  def initialize_models():
24
  global pipeline, translator, flux_pipe
25
 
26
- # Hugging Face ํ† ํฐ ํ™•์ธ
27
- hf_token = os.getenv("HF_TOKEN")
28
- if not hf_token:
29
- raise ValueError("HF_TOKEN environment variable is not set. Please set your Hugging Face token.")
30
 
31
- # Trellis ํŒŒ์ดํ”„๋ผ์ธ ์ดˆ๊ธฐํ™”
32
- pipeline = TrellisImageTo3DPipeline.from_pretrained("JeffreyXiang/TRELLIS-image-large")
33
- pipeline.cuda()
34
-
35
- # ๋ฒˆ์—ญ๊ธฐ ์ดˆ๊ธฐํ™”
36
- translator = translation_pipeline("translation", model="Helsinki-NLP/opus-mt-ko-en")
37
-
38
- # Flux ํŒŒ์ดํ”„๋ผ์ธ ์ดˆ๊ธฐํ™” - token ์ถ”๊ฐ€
39
- flux_pipe = FluxPipeline.from_pretrained(
40
- "black-forest-labs/FLUX.1-dev",
41
- torch_dtype=torch.bfloat16,
42
- use_auth_token=hf_token # Hugging Face ํ† ํฐ ์ ์šฉ
43
- )
44
- flux_pipe.load_lora_weights(
45
- "gokaygokay/Flux-Game-Assets-LoRA-v2",
46
- use_auth_token=hf_token # LoRA ๊ฐ€์ค‘์น˜ ๋กœ๋“œ์‹œ์—๋„ ํ† ํฐ ์ ์šฉ
47
- )
48
- flux_pipe.fuse_lora(lora_scale=1.0)
49
- flux_pipe.to(device="cuda", dtype=torch.bfloat16)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
  def translate_if_korean(text):
52
  if any(ord('๊ฐ€') <= ord(char) <= ord('ํžฃ') for char in text):
@@ -101,32 +116,56 @@ def unpack_state(state: dict) -> Tuple[Gaussian, edict, str]:
101
  return gs, mesh, state['trial_id']
102
 
103
  @spaces.GPU
104
- def image_to_3d(trial_id: str, seed: int, randomize_seed: bool, ss_guidance_strength: float, ss_sampling_steps: int, slat_guidance_strength: float, slat_sampling_steps: int) -> Tuple[dict, str]:
105
- if randomize_seed:
106
- seed = np.random.randint(0, MAX_SEED)
107
- outputs = pipeline.run(
108
- Image.open(f"{TMP_DIR}/{trial_id}.png"),
109
- seed=seed,
110
- formats=["gaussian", "mesh"],
111
- preprocess_image=False,
112
- sparse_structure_sampler_params={
113
- "steps": ss_sampling_steps,
114
- "cfg_strength": ss_guidance_strength,
115
- },
116
- slat_sampler_params={
117
- "steps": slat_sampling_steps,
118
- "cfg_strength": slat_guidance_strength,
119
- },
120
- )
121
- video = render_utils.render_video(outputs['gaussian'][0], num_frames=120)['color']
122
- video_geo = render_utils.render_video(outputs['mesh'][0], num_frames=120)['normal']
123
- video = [np.concatenate([video[i], video_geo[i]], axis=1) for i in range(len(video))]
124
- trial_id = uuid.uuid4()
125
- video_path = f"{TMP_DIR}/{trial_id}.mp4"
126
- os.makedirs(os.path.dirname(video_path), exist_ok=True)
127
- imageio.mimsave(video_path, video, fps=15)
128
- state = pack_state(outputs['gaussian'][0], outputs['mesh'][0], trial_id)
129
- return state, video_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
 
131
  @spaces.GPU
132
  def generate_image_from_text(prompt, height, width, guidance_scale, num_steps):
@@ -165,9 +204,16 @@ def deactivate_button() -> gr.Button:
165
  return gr.Button(interactive=False)
166
 
167
 
168
- with gr.Blocks() as demo:
 
 
 
 
 
 
 
169
  gr.Markdown("""
170
- # 3D Asset Creation & Text-to-Image Generation
171
  """)
172
 
173
  with gr.Tabs():
@@ -287,11 +333,18 @@ with gr.Blocks() as demo:
287
  outputs=[txt2img_output]
288
  )
289
 
290
- # Launch the Gradio app
291
  if __name__ == "__main__":
292
- initialize_models() # ๋ชจ๋“  ๋ชจ๋ธ ์ดˆ๊ธฐํ™”
 
 
 
 
 
293
  try:
294
- pipeline.preprocess_image(Image.fromarray(np.zeros((512, 512, 3), dtype=np.uint8))) # Preload rembg
 
295
  except:
296
  pass
297
- demo.launch()
 
 
 
23
  def initialize_models():
24
  global pipeline, translator, flux_pipe
25
 
26
+ # CUDA ๋ฉ”๋ชจ๋ฆฌ ์ดˆ๊ธฐํ™”
27
+ if torch.cuda.is_available():
28
+ torch.cuda.empty_cache()
 
29
 
30
+ try:
31
+ # Trellis ํŒŒ์ดํ”„๋ผ์ธ ์ดˆ๊ธฐํ™”
32
+ pipeline = TrellisImageTo3DPipeline.from_pretrained(
33
+ "JeffreyXiang/TRELLIS-image-large",
34
+ device_map="auto" # Zero GPU ํ™˜๊ฒฝ์— ๋งž๊ฒŒ ์ž๋™ device ๋งคํ•‘
35
+ )
36
+
37
+ # ๋ฒˆ์—ญ๊ธฐ ์ดˆ๊ธฐํ™”
38
+ translator = translation_pipeline(
39
+ "translation",
40
+ model="Helsinki-NLP/opus-mt-ko-en",
41
+ device_map="auto"
42
+ )
43
+
44
+ # Flux ํŒŒ์ดํ”„๋ผ์ธ ์ดˆ๊ธฐํ™”
45
+ flux_pipe = FluxPipeline.from_pretrained(
46
+ "black-forest-labs/FLUX.1-dev",
47
+ torch_dtype=torch.float16, # bfloat16 ๋Œ€์‹  float16 ์‚ฌ์šฉ
48
+ device_map="auto"
49
+ )
50
+
51
+ # LoRA ๊ฐ€์ค‘์น˜ ๋กœ๋“œ
52
+ flux_pipe.load_lora_weights(
53
+ "gokaygokay/Flux-Game-Assets-LoRA-v2",
54
+ device_map="auto"
55
+ )
56
+ flux_pipe.fuse_lora(lora_scale=1.0)
57
+
58
+ except Exception as e:
59
+ print(f"Error initializing models: {str(e)}")
60
+ if torch.cuda.is_available():
61
+ torch.cuda.empty_cache()
62
+ raise e
63
+
64
+
65
 
66
  def translate_if_korean(text):
67
  if any(ord('๊ฐ€') <= ord(char) <= ord('ํžฃ') for char in text):
 
116
  return gs, mesh, state['trial_id']
117
 
118
  @spaces.GPU
119
+ def image_to_3d(trial_id: str, seed: int, randomize_seed: bool, ss_guidance_strength: float,
120
+ ss_sampling_steps: int, slat_guidance_strength: float, slat_sampling_steps: int):
121
+ try:
122
+ if torch.cuda.is_available():
123
+ torch.cuda.empty_cache()
124
+
125
+ if randomize_seed:
126
+ seed = np.random.randint(0, MAX_SEED)
127
+
128
+ input_image = Image.open(f"{TMP_DIR}/{trial_id}.png")
129
+
130
+ # ๋ฉ”๋ชจ๋ฆฌ ์ตœ์ ํ™”๋ฅผ ์œ„ํ•œ ์ปจํ…์ŠคํŠธ ๋งค๋‹ˆ์ € ์‚ฌ์šฉ
131
+ with torch.cuda.amp.autocast(enabled=True):
132
+ outputs = pipeline.run(
133
+ input_image,
134
+ seed=seed,
135
+ formats=["gaussian", "mesh"],
136
+ preprocess_image=False,
137
+ sparse_structure_sampler_params={
138
+ "steps": ss_sampling_steps,
139
+ "cfg_strength": ss_guidance_strength,
140
+ },
141
+ slat_sampler_params={
142
+ "steps": slat_sampling_steps,
143
+ "cfg_strength": slat_guidance_strength,
144
+ }
145
+ )
146
+
147
+ # ๋น„๋””์˜ค ๋ Œ๋”๋ง
148
+ video = render_utils.render_video(outputs['gaussian'][0], num_frames=120)['color']
149
+ video_geo = render_utils.render_video(outputs['mesh'][0], num_frames=120)['normal']
150
+ video = [np.concatenate([video[i], video_geo[i]], axis=1) for i in range(len(video))]
151
+
152
+ trial_id = str(uuid.uuid4())
153
+ video_path = f"{TMP_DIR}/{trial_id}.mp4"
154
+ os.makedirs(os.path.dirname(video_path), exist_ok=True)
155
+ imageio.mimsave(video_path, video, fps=15)
156
+
157
+ state = pack_state(outputs['gaussian'][0], outputs['mesh'][0], trial_id)
158
+
159
+ if torch.cuda.is_available():
160
+ torch.cuda.empty_cache()
161
+
162
+ return state, video_path
163
+
164
+ except Exception as e:
165
+ print(f"Error in image_to_3d: {str(e)}")
166
+ if torch.cuda.is_available():
167
+ torch.cuda.empty_cache()
168
+ raise e
169
 
170
  @spaces.GPU
171
  def generate_image_from_text(prompt, height, width, guidance_scale, num_steps):
 
204
  return gr.Button(interactive=False)
205
 
206
 
207
+ css = """
208
+ footer {
209
+ visibility: hidden;
210
+ }
211
+ """
212
+
213
+
214
+ with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css) as demo:
215
  gr.Markdown("""
216
+ # Craft3D : 3D Asset Creation & Text-to-Image Generation
217
  """)
218
 
219
  with gr.Tabs():
 
333
  outputs=[txt2img_output]
334
  )
335
 
 
336
  if __name__ == "__main__":
337
+ # CUDA ๋ฉ”๋ชจ๋ฆฌ ์บ์‹œ ์ดˆ๊ธฐํ™”
338
+ torch.cuda.empty_cache()
339
+
340
+ # ๋ชจ๋ธ ์ดˆ๊ธฐํ™”
341
+ initialize_models()
342
+
343
  try:
344
+ # rembg ์‚ฌ์ „ ๋กœ๋“œ
345
+ pipeline.preprocess_image(Image.fromarray(np.zeros((512, 512, 3), dtype=np.uint8)))
346
  except:
347
  pass
348
+
349
+ # Gradio ์•ฑ ์‹คํ–‰
350
+ demo.launch(share=True) # share=True ์ถ”๊ฐ€