Skip to content

Commit e204844

Browse files
committed
Merge branch 'main' into lora-load-adapter
2 parents c89c318 + 3deed72 commit e204844

40 files changed

+692
-472
lines changed

.github/workflows/nightly_tests.yml

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,62 @@ jobs:
180180
pip install slack_sdk tabulate
181181
python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
182182
183+
run_big_gpu_torch_tests:
184+
name: Torch tests on big GPU
185+
strategy:
186+
fail-fast: false
187+
max-parallel: 2
188+
runs-on:
189+
group: aws-g6e-xlarge-plus
190+
container:
191+
image: diffusers/diffusers-pytorch-cuda
192+
options: --shm-size "16gb" --ipc host --gpus 0
193+
steps:
194+
- name: Checkout diffusers
195+
uses: actions/checkout@v3
196+
with:
197+
fetch-depth: 2
198+
- name: NVIDIA-SMI
199+
run: nvidia-smi
200+
- name: Install dependencies
201+
run: |
202+
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
203+
python -m uv pip install -e [quality,test]
204+
python -m uv pip install peft@git+https://github.com/huggingface/peft.git
205+
pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
206+
python -m uv pip install pytest-reportlog
207+
- name: Environment
208+
run: |
209+
python utils/print_env.py
210+
- name: Selected Torch CUDA Test on big GPU
211+
env:
212+
HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
213+
# https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
214+
CUBLAS_WORKSPACE_CONFIG: :16:8
215+
BIG_GPU_MEMORY: 40
216+
run: |
217+
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
218+
-m "big_gpu_with_torch_cuda" \
219+
--make-reports=tests_big_gpu_torch_cuda \
220+
--report-log=tests_big_gpu_torch_cuda.log \
221+
tests/
222+
- name: Failure short reports
223+
if: ${{ failure() }}
224+
run: |
225+
cat reports/tests_big_gpu_torch_cuda_stats.txt
226+
cat reports/tests_big_gpu_torch_cuda_failures_short.txt
227+
- name: Test suite reports artifacts
228+
if: ${{ always() }}
229+
uses: actions/upload-artifact@v4
230+
with:
231+
name: torch_cuda_big_gpu_test_reports
232+
path: reports
233+
- name: Generate Report and Notify Channel
234+
if: always()
235+
run: |
236+
pip install slack_sdk tabulate
237+
python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
238+
183239
run_flax_tpu_tests:
184240
name: Nightly Flax TPU Tests
185241
runs-on: docker-tpu

examples/advanced_diffusion_training/train_dreambooth_lora_flux_advanced.py

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1778,15 +1778,10 @@ def load_model_hook(models, input_dir):
17781778
if not args.enable_t5_ti:
17791779
# pure textual inversion - only clip
17801780
if pure_textual_inversion:
1781-
params_to_optimize = [
1782-
text_parameters_one_with_lr,
1783-
]
1781+
params_to_optimize = [text_parameters_one_with_lr]
17841782
te_idx = 0
17851783
else: # regular te training or regular pivotal for clip
1786-
params_to_optimize = [
1787-
transformer_parameters_with_lr,
1788-
text_parameters_one_with_lr,
1789-
]
1784+
params_to_optimize = [transformer_parameters_with_lr, text_parameters_one_with_lr]
17901785
te_idx = 1
17911786
elif args.enable_t5_ti:
17921787
# pivotal tuning of clip & t5
@@ -1809,9 +1804,7 @@ def load_model_hook(models, input_dir):
18091804
]
18101805
te_idx = 1
18111806
else:
1812-
params_to_optimize = [
1813-
transformer_parameters_with_lr,
1814-
]
1807+
params_to_optimize = [transformer_parameters_with_lr]
18151808

18161809
# Optimizer creation
18171810
if not (args.optimizer.lower() == "prodigy" or args.optimizer.lower() == "adamw"):
@@ -1871,7 +1864,6 @@ def load_model_hook(models, input_dir):
18711864
params_to_optimize[-1]["lr"] = args.learning_rate
18721865
optimizer = optimizer_class(
18731866
params_to_optimize,
1874-
lr=args.learning_rate,
18751867
betas=(args.adam_beta1, args.adam_beta2),
18761868
beta3=args.prodigy_beta3,
18771869
weight_decay=args.adam_weight_decay,

examples/advanced_diffusion_training/train_dreambooth_lora_sd15_advanced.py

Lines changed: 39 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@
6767
convert_state_dict_to_kohya,
6868
is_wandb_available,
6969
)
70+
from diffusers.utils.hub_utils import load_or_create_model_card, populate_model_card
7071
from diffusers.utils.import_utils import is_xformers_available
7172

7273

@@ -79,30 +80,27 @@
7980
def save_model_card(
8081
repo_id: str,
8182
use_dora: bool,
82-
images=None,
83-
base_model=str,
83+
images: list = None,
84+
base_model: str = None,
8485
train_text_encoder=False,
8586
train_text_encoder_ti=False,
8687
token_abstraction_dict=None,
87-
instance_prompt=str,
88-
validation_prompt=str,
88+
instance_prompt=None,
89+
validation_prompt=None,
8990
repo_folder=None,
9091
vae_path=None,
9192
):
92-
img_str = "widget:\n"
9393
lora = "lora" if not use_dora else "dora"
94-
for i, image in enumerate(images):
95-
image.save(os.path.join(repo_folder, f"image_{i}.png"))
96-
img_str += f"""
97-
- text: '{validation_prompt if validation_prompt else ' ' }'
98-
output:
99-
url:
100-
"image_{i}.png"
101-
"""
102-
if not images:
103-
img_str += f"""
104-
- text: '{instance_prompt}'
105-
"""
94+
95+
widget_dict = []
96+
if images is not None:
97+
for i, image in enumerate(images):
98+
image.save(os.path.join(repo_folder, f"image_{i}.png"))
99+
widget_dict.append(
100+
{"text": validation_prompt if validation_prompt else " ", "output": {"url": f"image_{i}.png"}}
101+
)
102+
else:
103+
widget_dict.append({"text": instance_prompt})
106104
embeddings_filename = f"{repo_folder}_emb"
107105
instance_prompt_webui = re.sub(r"<s\d+>", "", re.sub(r"<s\d+>", embeddings_filename, instance_prompt, count=1))
108106
ti_keys = ", ".join(f'"{match}"' for match in re.findall(r"<s\d+>", instance_prompt))
@@ -137,24 +135,7 @@ def save_model_card(
137135
trigger_str += f"""
138136
to trigger concept `{key}` → use `{tokens}` in your prompt \n
139137
"""
140-
141-
yaml = f"""---
142-
tags:
143-
- stable-diffusion
144-
- stable-diffusion-diffusers
145-
- diffusers-training
146-
- text-to-image
147-
- diffusers
148-
- {lora}
149-
- template:sd-lora
150-
{img_str}
151-
base_model: {base_model}
152-
instance_prompt: {instance_prompt}
153-
license: openrail++
154-
---
155-
"""
156-
157-
model_card = f"""
138+
model_description = f"""
158139
# SD1.5 LoRA DreamBooth - {repo_id}
159140
160141
<Gallery />
@@ -202,8 +183,28 @@ def save_model_card(
202183
Special VAE used for training: {vae_path}.
203184
204185
"""
205-
with open(os.path.join(repo_folder, "README.md"), "w") as f:
206-
f.write(yaml + model_card)
186+
model_card = load_or_create_model_card(
187+
repo_id_or_path=repo_id,
188+
from_training=True,
189+
license="openrail++",
190+
base_model=base_model,
191+
prompt=instance_prompt,
192+
model_description=model_description,
193+
inference=True,
194+
widget=widget_dict,
195+
)
196+
197+
tags = [
198+
"text-to-image",
199+
"diffusers",
200+
"diffusers-training",
201+
lora,
202+
"template:sd-lora" "stable-diffusion",
203+
"stable-diffusion-diffusers",
204+
]
205+
model_card = populate_model_card(model_card, tags=tags)
206+
207+
model_card.save(os.path.join(repo_folder, "README.md"))
207208

208209

209210
def import_model_class_from_model_name_or_path(
@@ -1358,10 +1359,7 @@ def load_model_hook(models, input_dir):
13581359
else args.adam_weight_decay,
13591360
"lr": args.text_encoder_lr if args.text_encoder_lr else args.learning_rate,
13601361
}
1361-
params_to_optimize = [
1362-
unet_lora_parameters_with_lr,
1363-
text_lora_parameters_one_with_lr,
1364-
]
1362+
params_to_optimize = [unet_lora_parameters_with_lr, text_lora_parameters_one_with_lr]
13651363
else:
13661364
params_to_optimize = [unet_lora_parameters_with_lr]
13671365

@@ -1423,7 +1421,6 @@ def load_model_hook(models, input_dir):
14231421

14241422
optimizer = optimizer_class(
14251423
params_to_optimize,
1426-
lr=args.learning_rate,
14271424
betas=(args.adam_beta1, args.adam_beta2),
14281425
beta3=args.prodigy_beta3,
14291426
weight_decay=args.adam_weight_decay,

examples/advanced_diffusion_training/train_dreambooth_lora_sdxl_advanced.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1794,7 +1794,6 @@ def load_model_hook(models, input_dir):
17941794

17951795
optimizer = optimizer_class(
17961796
params_to_optimize,
1797-
lr=args.learning_rate,
17981797
betas=(args.adam_beta1, args.adam_beta2),
17991798
beta3=args.prodigy_beta3,
18001799
weight_decay=args.adam_weight_decay,

examples/cogvideo/train_cogvideox_image_to_video_lora.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -947,7 +947,6 @@ def get_optimizer(args, params_to_optimize, use_deepspeed: bool = False):
947947

948948
optimizer = optimizer_class(
949949
params_to_optimize,
950-
lr=args.learning_rate,
951950
betas=(args.adam_beta1, args.adam_beta2),
952951
beta3=args.prodigy_beta3,
953952
weight_decay=args.adam_weight_decay,

examples/cogvideo/train_cogvideox_lora.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -969,7 +969,6 @@ def get_optimizer(args, params_to_optimize, use_deepspeed: bool = False):
969969

970970
optimizer = optimizer_class(
971971
params_to_optimize,
972-
lr=args.learning_rate,
973972
betas=(args.adam_beta1, args.adam_beta2),
974973
beta3=args.prodigy_beta3,
975974
weight_decay=args.adam_weight_decay,

examples/controlnet/train_controlnet_flux.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,7 @@ def log_validation(
152152
guidance_scale=3.5,
153153
generator=generator,
154154
).images[0]
155+
image = image.resize((args.resolution, args.resolution))
155156
images.append(image)
156157
image_logs.append(
157158
{"validation_image": validation_image, "images": images, "validation_prompt": validation_prompt}

examples/dreambooth/train_dreambooth_flux.py

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757
is_wandb_available,
5858
)
5959
from diffusers.utils.hub_utils import load_or_create_model_card, populate_model_card
60+
from diffusers.utils.import_utils import is_torch_npu_available
6061
from diffusers.utils.torch_utils import is_compiled_module
6162

6263

@@ -68,6 +69,12 @@
6869

6970
logger = get_logger(__name__)
7071

72+
if is_torch_npu_available():
73+
import torch_npu
74+
75+
torch.npu.config.allow_internal_format = False
76+
torch.npu.set_compile_mode(jit_compile=False)
77+
7178

7279
def save_model_card(
7380
repo_id: str,
@@ -189,6 +196,8 @@ def log_validation(
189196
del pipeline
190197
if torch.cuda.is_available():
191198
torch.cuda.empty_cache()
199+
elif is_torch_npu_available():
200+
torch_npu.npu.empty_cache()
192201

193202
return images
194203

@@ -1035,7 +1044,9 @@ def main(args):
10351044
cur_class_images = len(list(class_images_dir.iterdir()))
10361045

10371046
if cur_class_images < args.num_class_images:
1038-
has_supported_fp16_accelerator = torch.cuda.is_available() or torch.backends.mps.is_available()
1047+
has_supported_fp16_accelerator = (
1048+
torch.cuda.is_available() or torch.backends.mps.is_available() or is_torch_npu_available()
1049+
)
10391050
torch_dtype = torch.float16 if has_supported_fp16_accelerator else torch.float32
10401051
if args.prior_generation_precision == "fp32":
10411052
torch_dtype = torch.float32
@@ -1073,6 +1084,8 @@ def main(args):
10731084
del pipeline
10741085
if torch.cuda.is_available():
10751086
torch.cuda.empty_cache()
1087+
elif is_torch_npu_available():
1088+
torch_npu.npu.empty_cache()
10761089

10771090
# Handle the repository creation
10781091
if accelerator.is_main_process:
@@ -1226,10 +1239,7 @@ def load_model_hook(models, input_dir):
12261239
"weight_decay": args.adam_weight_decay_text_encoder,
12271240
"lr": args.text_encoder_lr if args.text_encoder_lr else args.learning_rate,
12281241
}
1229-
params_to_optimize = [
1230-
transformer_parameters_with_lr,
1231-
text_parameters_one_with_lr,
1232-
]
1242+
params_to_optimize = [transformer_parameters_with_lr, text_parameters_one_with_lr]
12331243
else:
12341244
params_to_optimize = [transformer_parameters_with_lr]
12351245

@@ -1291,7 +1301,6 @@ def load_model_hook(models, input_dir):
12911301

12921302
optimizer = optimizer_class(
12931303
params_to_optimize,
1294-
lr=args.learning_rate,
12951304
betas=(args.adam_beta1, args.adam_beta2),
12961305
beta3=args.prodigy_beta3,
12971306
weight_decay=args.adam_weight_decay,
@@ -1358,6 +1367,8 @@ def compute_text_embeddings(prompt, text_encoders, tokenizers):
13581367
gc.collect()
13591368
if torch.cuda.is_available():
13601369
torch.cuda.empty_cache()
1370+
elif is_torch_npu_available():
1371+
torch_npu.npu.empty_cache()
13611372

13621373
# If custom instance prompts are NOT provided (i.e. the instance prompt is used for all images),
13631374
# pack the statically computed variables appropriately here. This is so that we don't
@@ -1723,7 +1734,10 @@ def get_sigmas(timesteps, n_dim=4, dtype=torch.float32):
17231734
)
17241735
if not args.train_text_encoder:
17251736
del text_encoder_one, text_encoder_two
1726-
torch.cuda.empty_cache()
1737+
if torch.cuda.is_available():
1738+
torch.cuda.empty_cache()
1739+
elif is_torch_npu_available():
1740+
torch_npu.npu.empty_cache()
17271741
gc.collect()
17281742

17291743
# Save the lora layers

examples/dreambooth/train_dreambooth_lora_flux.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ def log_validation(
177177
f"Running validation... \n Generating {args.num_validation_images} images with prompt:"
178178
f" {args.validation_prompt}."
179179
)
180-
pipeline = pipeline.to(accelerator.device, dtype=torch_dtype)
180+
pipeline = pipeline.to(accelerator.device)
181181
pipeline.set_progress_bar_config(disable=True)
182182

183183
# run inference
@@ -1335,10 +1335,7 @@ def load_model_hook(models, input_dir):
13351335
"weight_decay": args.adam_weight_decay_text_encoder,
13361336
"lr": args.text_encoder_lr if args.text_encoder_lr else args.learning_rate,
13371337
}
1338-
params_to_optimize = [
1339-
transformer_parameters_with_lr,
1340-
text_parameters_one_with_lr,
1341-
]
1338+
params_to_optimize = [transformer_parameters_with_lr, text_parameters_one_with_lr]
13421339
else:
13431340
params_to_optimize = [transformer_parameters_with_lr]
13441341

@@ -1400,7 +1397,6 @@ def load_model_hook(models, input_dir):
14001397

14011398
optimizer = optimizer_class(
14021399
params_to_optimize,
1403-
lr=args.learning_rate,
14041400
betas=(args.adam_beta1, args.adam_beta2),
14051401
beta3=args.prodigy_beta3,
14061402
weight_decay=args.adam_weight_decay,
@@ -1710,7 +1706,7 @@ def get_sigmas(timesteps, n_dim=4, dtype=torch.float32):
17101706
)
17111707

17121708
# handle guidance
1713-
if transformer.config.guidance_embeds:
1709+
if accelerator.unwrap_model(transformer).config.guidance_embeds:
17141710
guidance = torch.tensor([args.guidance_scale], device=accelerator.device)
17151711
guidance = guidance.expand(model_input.shape[0])
17161712
else:
@@ -1823,6 +1819,8 @@ def get_sigmas(timesteps, n_dim=4, dtype=torch.float32):
18231819
# create pipeline
18241820
if not args.train_text_encoder:
18251821
text_encoder_one, text_encoder_two = load_text_encoders(text_encoder_cls_one, text_encoder_cls_two)
1822+
text_encoder_one.to(weight_dtype)
1823+
text_encoder_two.to(weight_dtype)
18261824
pipeline = FluxPipeline.from_pretrained(
18271825
args.pretrained_model_name_or_path,
18281826
vae=vae,

0 commit comments

Comments
 (0)