import torch from diffusers import DiffusionPipeline, DDPMScheduler from accelerate import Accelerator from datasets import load_dataset from tqdm.auto import tqdm from transformers import TrainingArguments import gradio as gr # Konfigurasi pretrained_model_name_or_path = "black-forest-labs/FLUX.1-dev" dataset_name = "DucHaiten/anime-SDXL" # Gunakan dataset sesuai keinginan Anda learning_rate = 1e-5 num_train_epochs = 2 # Sesuaikan dengan kebutuhan train_batch_size = 1 # Gunakan batch size kecil untuk Spaces gratis gradient_accumulation_steps = 4 # Sesuaikan dengan kebutuhan output_dir = "flux-anime" image_resize = 128 # Sesuaikan dengan kebutuhan # Muat model dan scheduler pipeline = DiffusionPipeline.from_pretrained( pretrained_model_name_or_path, torch_dtype=torch.float16 ) pipeline.scheduler = DDPMScheduler.from_config(pipeline.scheduler.config) pipeline.enable_xformers_memory_efficient_attention() # Muat dataset dataset = load_dataset(dataset_name)["train"] # Fungsi untuk memproses data def preprocess_function(examples): images = [ image.convert("RGB").resize((image_resize, image_resize)) for image in examples["image"] ] texts = [text for text in examples["text"]] examples["pixel_values"] = pipeline.feature_extractor( images=images, return_tensors="pt" ).pixel_values examples["prompt"] = texts return examples # Proses dataset processed_dataset = dataset.map( preprocess_function, batched=True, num_proc=4, remove_columns=dataset.column_names, ) # Inisialisasi accelerator accelerator = Accelerator( gradient_accumulation_steps=gradient_accumulation_steps, mixed_precision="fp16", ) pipeline.unet, pipeline.vae, processed_dataset = accelerator.prepare( pipeline.unet, pipeline.vae, processed_dataset ) # Optimizer optimizer = torch.optim.AdamW( pipeline.unet.parameters(), lr=learning_rate, ) # Training arguments training_args = TrainingArguments( output_dir=output_dir, per_device_train_batch_size=train_batch_size, gradient_accumulation_steps=gradient_accumulation_steps, num_train_epochs=num_train_epochs, learning_rate=learning_rate, fp16=True, logging_dir="./logs", report_to="tensorboard", push_to_hub=True, # Push model ke Hugging Face Hub ) # Training loop progress_bar = tqdm( range(num_train_epochs * len(processed_dataset) // train_batch_size) ) # --- Komponen Gradio --- with gr.Blocks() as interface: gr.Markdown( "## Fine-tuning FLUX untuk Anime" ) # Ganti judul sesuai dataset Anda loss_textbox = gr.Textbox(label="Loss") epoch_textbox = gr.Textbox(label="Epoch") progress_bar_gradio = gr.ProgressBar(label="Progress") output_image = gr.Image(label="Generated Image") def train_step(step, epoch, loss): loss_textbox.update(value=loss) epoch_textbox.update(value=epoch) progress_bar_gradio.update(value=step / len(progress_bar)) if step % 100 == 0: with torch.no_grad(): image = pipeline( "anime style image of a girl with blue hair" ).images[ 0 ] # Ganti prompt sesuai dataset Anda output_image.update(value=image) return loss, epoch, step / len(progress_bar) interface.launch(server_name="0.0.0.0") # ------------------------ for epoch in range(num_train_epochs): pipeline.unet.train() for step, batch in enumerate( processed_dataset.iter(batch_size=train_batch_size) ): with accelerator.accumulate(pipeline.unet): latents = pipeline.vae.encode( batch["pixel_values"].to(dtype=torch.float16) ).latent_dist.sample() latents = latents * pipeline.vae.config.scaling_factor noise = torch.randn_like(latents) bsz = latents.shape[0] timesteps = torch.randint( 0, pipeline.scheduler.config.num_train_timesteps, (bsz,), device=latents.device, ) timesteps = timesteps.long() noisy_latents = pipeline.scheduler.add_noise( latents, noise, timesteps ) model_pred = pipeline.unet( noisy_latents, timesteps, batch["prompt"] ).sample loss = torch.nn.functional.mse_loss( model_pred.float(), noise.float(), reduction="mean" ) accelerator.backward(loss) optimizer.step() optimizer.zero_grad() progress_bar.update(1) # Update komponen Gradio train_step(step, epoch, loss.item()) # Simpan model pipeline.save_pretrained(output_dir)