Spaces:
Running
on
L40S
Running
on
L40S
Update app.py
Browse files
app.py
CHANGED
@@ -32,27 +32,32 @@ def initialize_models():
|
|
32 |
try:
|
33 |
import torch
|
34 |
|
35 |
-
#
|
36 |
-
torch.backends.cudnn.benchmark = True
|
37 |
-
torch.backends.cuda.matmul.allow_tf32 = True
|
38 |
torch.backends.cudnn.allow_tf32 = True
|
39 |
|
40 |
print("Initializing Trellis pipeline...")
|
41 |
pipeline = TrellisImageTo3DPipeline.from_pretrained(
|
42 |
-
"JeffreyXiang/TRELLIS-image-large"
|
43 |
-
torch_dtype=torch.float16 # A100μμ FP16 μ¬μ©
|
44 |
)
|
45 |
|
46 |
if torch.cuda.is_available():
|
47 |
pipeline = pipeline.to("cuda")
|
|
|
|
|
|
|
48 |
|
49 |
print("Initializing translator...")
|
50 |
translator = translation_pipeline(
|
51 |
"translation",
|
52 |
model="Helsinki-NLP/opus-mt-ko-en",
|
53 |
-
device="cuda"
|
54 |
)
|
55 |
|
|
|
|
|
|
|
56 |
print("Models initialized successfully")
|
57 |
return True
|
58 |
|
@@ -68,17 +73,15 @@ def get_flux_pipe():
|
|
68 |
free_memory()
|
69 |
flux_pipe = FluxPipeline.from_pretrained(
|
70 |
"black-forest-labs/FLUX.1-dev",
|
71 |
-
torch_dtype=torch.float16, # A100μμ FP16 μ¬μ©
|
72 |
use_safetensors=True
|
73 |
).to("cuda")
|
|
|
|
|
74 |
except Exception as e:
|
75 |
print(f"Error loading Flux pipeline: {e}")
|
76 |
return None
|
77 |
return flux_pipe
|
78 |
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
def free_memory():
|
83 |
"""κ°νλ λ©λͺ¨λ¦¬ μ 리 ν¨μ"""
|
84 |
import gc
|
@@ -108,7 +111,7 @@ def free_memory():
|
|
108 |
except:
|
109 |
pass
|
110 |
|
111 |
-
|
112 |
def setup_gpu_model(model):
|
113 |
"""GPU μ€μ μ΄ νμν λͺ¨λΈμ μ²λ¦¬νλ ν¨μ"""
|
114 |
if torch.cuda.is_available():
|
@@ -122,7 +125,7 @@ def translate_if_korean(text):
|
|
122 |
return translated
|
123 |
return text
|
124 |
|
125 |
-
|
126 |
def preprocess_image(image: Image.Image) -> Tuple[str, Image.Image]:
|
127 |
try:
|
128 |
if pipeline is None:
|
@@ -192,7 +195,6 @@ def unpack_state(state: dict) -> Tuple[Gaussian, edict, str]:
|
|
192 |
|
193 |
return gs, mesh, state['trial_id']
|
194 |
|
195 |
-
@spaces.GPU
|
196 |
def image_to_3d(trial_id: str, seed: int, randomize_seed: bool, ss_guidance_strength: float,
|
197 |
ss_sampling_steps: int, slat_guidance_strength: float, slat_sampling_steps: int):
|
198 |
try:
|
@@ -201,8 +203,8 @@ def image_to_3d(trial_id: str, seed: int, randomize_seed: bool, ss_guidance_stre
|
|
201 |
|
202 |
input_image = Image.open(f"{TMP_DIR}/{trial_id}.png")
|
203 |
|
204 |
-
# μ΄λ―Έμ§ ν¬κΈ° μ ν
|
205 |
-
max_size =
|
206 |
if max(input_image.size) > max_size:
|
207 |
ratio = max_size / max(input_image.size)
|
208 |
input_image = input_image.resize(
|
@@ -214,31 +216,31 @@ def image_to_3d(trial_id: str, seed: int, randomize_seed: bool, ss_guidance_stre
|
|
214 |
if torch.cuda.is_available():
|
215 |
pipeline.to("cuda")
|
216 |
|
217 |
-
with torch.
|
218 |
outputs = pipeline.run(
|
219 |
input_image,
|
220 |
seed=seed,
|
221 |
formats=["gaussian", "mesh"],
|
222 |
preprocess_image=False,
|
223 |
sparse_structure_sampler_params={
|
224 |
-
"steps": min(ss_sampling_steps,
|
225 |
"cfg_strength": ss_guidance_strength,
|
226 |
},
|
227 |
slat_sampler_params={
|
228 |
-
"steps": min(slat_sampling_steps,
|
229 |
"cfg_strength": slat_guidance_strength,
|
230 |
}
|
231 |
)
|
232 |
|
233 |
-
# λΉλμ€
|
234 |
-
video = render_utils.render_video(outputs['gaussian'][0], num_frames=
|
235 |
-
video_geo = render_utils.render_video(outputs['mesh'][0], num_frames=
|
236 |
video = [np.concatenate([video[i], video_geo[i]], axis=1) for i in range(len(video))]
|
237 |
|
238 |
trial_id = str(uuid.uuid4())
|
239 |
video_path = f"{TMP_DIR}/{trial_id}.mp4"
|
240 |
os.makedirs(os.path.dirname(video_path), exist_ok=True)
|
241 |
-
imageio.mimsave(video_path, video, fps=
|
242 |
|
243 |
state = pack_state(outputs['gaussian'][0], outputs['mesh'][0], trial_id)
|
244 |
|
@@ -253,26 +255,23 @@ def image_to_3d(trial_id: str, seed: int, randomize_seed: bool, ss_guidance_stre
|
|
253 |
pipeline.to("cpu")
|
254 |
raise e
|
255 |
|
256 |
-
|
257 |
def generate_image_from_text(prompt, height, width, guidance_scale, num_steps):
|
258 |
try:
|
259 |
free_memory()
|
260 |
|
261 |
-
# Flux νμ΄νλΌμΈ κ°μ Έμ€κΈ°
|
262 |
flux_pipe = get_flux_pipe()
|
263 |
if flux_pipe is None:
|
264 |
raise Exception("Failed to load Flux pipeline")
|
265 |
|
266 |
-
#
|
267 |
-
height = min(height, 1024)
|
268 |
width = min(width, 1024)
|
269 |
|
270 |
-
# ν둬ννΈ μ²λ¦¬
|
271 |
-
base_prompt = "wbgmsst, 3D, white background"
|
272 |
translated_prompt = translate_if_korean(prompt)
|
273 |
-
final_prompt = f"{translated_prompt},
|
274 |
|
275 |
-
with torch.cuda.amp.autocast():
|
276 |
output = flux_pipe(
|
277 |
prompt=[final_prompt],
|
278 |
height=height,
|
@@ -292,7 +291,7 @@ def generate_image_from_text(prompt, height, width, guidance_scale, num_steps):
|
|
292 |
free_memory()
|
293 |
raise e
|
294 |
|
295 |
-
|
296 |
def extract_glb(state: dict, mesh_simplify: float, texture_size: int) -> Tuple[str, str]:
|
297 |
gs, mesh, trial_id = unpack_state(state)
|
298 |
glb = postprocessing_utils.to_glb(gs, mesh, simplify=mesh_simplify, texture_size=texture_size, verbose=False)
|
|
|
32 |
try:
|
33 |
import torch
|
34 |
|
35 |
+
# L40S GPU μ΅μ ν μ€μ
|
36 |
+
torch.backends.cudnn.benchmark = True
|
37 |
+
torch.backends.cuda.matmul.allow_tf32 = True
|
38 |
torch.backends.cudnn.allow_tf32 = True
|
39 |
|
40 |
print("Initializing Trellis pipeline...")
|
41 |
pipeline = TrellisImageTo3DPipeline.from_pretrained(
|
42 |
+
"JeffreyXiang/TRELLIS-image-large"
|
|
|
43 |
)
|
44 |
|
45 |
if torch.cuda.is_available():
|
46 |
pipeline = pipeline.to("cuda")
|
47 |
+
# λͺ¨λΈμ FP16μΌλ‘ λ³ν
|
48 |
+
for param in pipeline.parameters():
|
49 |
+
param.data = param.data.half()
|
50 |
|
51 |
print("Initializing translator...")
|
52 |
translator = translation_pipeline(
|
53 |
"translation",
|
54 |
model="Helsinki-NLP/opus-mt-ko-en",
|
55 |
+
device="cuda"
|
56 |
)
|
57 |
|
58 |
+
# Flux νμ΄νλΌμΈμ λμ€μ μ΄κΈ°ν
|
59 |
+
flux_pipe = None
|
60 |
+
|
61 |
print("Models initialized successfully")
|
62 |
return True
|
63 |
|
|
|
73 |
free_memory()
|
74 |
flux_pipe = FluxPipeline.from_pretrained(
|
75 |
"black-forest-labs/FLUX.1-dev",
|
|
|
76 |
use_safetensors=True
|
77 |
).to("cuda")
|
78 |
+
# FP16μΌλ‘ λ³ν
|
79 |
+
flux_pipe.to(torch.float16)
|
80 |
except Exception as e:
|
81 |
print(f"Error loading Flux pipeline: {e}")
|
82 |
return None
|
83 |
return flux_pipe
|
84 |
|
|
|
|
|
|
|
85 |
def free_memory():
|
86 |
"""κ°νλ λ©λͺ¨λ¦¬ μ 리 ν¨μ"""
|
87 |
import gc
|
|
|
111 |
except:
|
112 |
pass
|
113 |
|
114 |
+
|
115 |
def setup_gpu_model(model):
|
116 |
"""GPU μ€μ μ΄ νμν λͺ¨λΈμ μ²λ¦¬νλ ν¨μ"""
|
117 |
if torch.cuda.is_available():
|
|
|
125 |
return translated
|
126 |
return text
|
127 |
|
128 |
+
|
129 |
def preprocess_image(image: Image.Image) -> Tuple[str, Image.Image]:
|
130 |
try:
|
131 |
if pipeline is None:
|
|
|
195 |
|
196 |
return gs, mesh, state['trial_id']
|
197 |
|
|
|
198 |
def image_to_3d(trial_id: str, seed: int, randomize_seed: bool, ss_guidance_strength: float,
|
199 |
ss_sampling_steps: int, slat_guidance_strength: float, slat_sampling_steps: int):
|
200 |
try:
|
|
|
203 |
|
204 |
input_image = Image.open(f"{TMP_DIR}/{trial_id}.png")
|
205 |
|
206 |
+
# L40Sμ λ§κ² μ΄λ―Έμ§ ν¬κΈ° μ ν μ‘°μ
|
207 |
+
max_size = 768 # L40Sλ λ ν° μ΄λ―Έμ§ μ²λ¦¬ κ°λ₯
|
208 |
if max(input_image.size) > max_size:
|
209 |
ratio = max_size / max(input_image.size)
|
210 |
input_image = input_image.resize(
|
|
|
216 |
if torch.cuda.is_available():
|
217 |
pipeline.to("cuda")
|
218 |
|
219 |
+
with torch.cuda.amp.autocast(): # μλ νΌν© μ λ°λ μ¬μ©
|
220 |
outputs = pipeline.run(
|
221 |
input_image,
|
222 |
seed=seed,
|
223 |
formats=["gaussian", "mesh"],
|
224 |
preprocess_image=False,
|
225 |
sparse_structure_sampler_params={
|
226 |
+
"steps": min(ss_sampling_steps, 20), # L40Sμμ λ λ§μ μ€ν
νμ©
|
227 |
"cfg_strength": ss_guidance_strength,
|
228 |
},
|
229 |
slat_sampler_params={
|
230 |
+
"steps": min(slat_sampling_steps, 20),
|
231 |
"cfg_strength": slat_guidance_strength,
|
232 |
}
|
233 |
)
|
234 |
|
235 |
+
# λΉλμ€ μμ±
|
236 |
+
video = render_utils.render_video(outputs['gaussian'][0], num_frames=40)['color']
|
237 |
+
video_geo = render_utils.render_video(outputs['mesh'][0], num_frames=40)['normal']
|
238 |
video = [np.concatenate([video[i], video_geo[i]], axis=1) for i in range(len(video))]
|
239 |
|
240 |
trial_id = str(uuid.uuid4())
|
241 |
video_path = f"{TMP_DIR}/{trial_id}.mp4"
|
242 |
os.makedirs(os.path.dirname(video_path), exist_ok=True)
|
243 |
+
imageio.mimsave(video_path, video, fps=20)
|
244 |
|
245 |
state = pack_state(outputs['gaussian'][0], outputs['mesh'][0], trial_id)
|
246 |
|
|
|
255 |
pipeline.to("cpu")
|
256 |
raise e
|
257 |
|
258 |
+
|
259 |
def generate_image_from_text(prompt, height, width, guidance_scale, num_steps):
|
260 |
try:
|
261 |
free_memory()
|
262 |
|
|
|
263 |
flux_pipe = get_flux_pipe()
|
264 |
if flux_pipe is None:
|
265 |
raise Exception("Failed to load Flux pipeline")
|
266 |
|
267 |
+
# L40Sμ λ§κ² ν¬κΈ° μ ν μ‘°μ
|
268 |
+
height = min(height, 1024)
|
269 |
width = min(width, 1024)
|
270 |
|
|
|
|
|
271 |
translated_prompt = translate_if_korean(prompt)
|
272 |
+
final_prompt = f"{translated_prompt}, wbgmsst, 3D, white background"
|
273 |
|
274 |
+
with torch.cuda.amp.autocast():
|
275 |
output = flux_pipe(
|
276 |
prompt=[final_prompt],
|
277 |
height=height,
|
|
|
291 |
free_memory()
|
292 |
raise e
|
293 |
|
294 |
+
|
295 |
def extract_glb(state: dict, mesh_simplify: float, texture_size: int) -> Tuple[str, str]:
|
296 |
gs, mesh, trial_id = unpack_state(state)
|
297 |
glb = postprocessing_utils.to_glb(gs, mesh, simplify=mesh_simplify, texture_size=texture_size, verbose=False)
|