Skip to content

Commit 39a7628

Browse files
authored
Merge branch 'main' into animatediff-img2video
2 parents fdbb68f + 318556b commit 39a7628

File tree

80 files changed

+8384
-6274
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

80 files changed

+8384
-6274
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ Please refer to the [How to use Stable Diffusion in Apple Silicon](https://huggi
7777

7878
## Quickstart
7979

80-
Generating outputs is super easy with 🤗 Diffusers. To generate an image from text, use the `from_pretrained` method to load any pretrained diffusion model (browse the [Hub](https://huggingface.co/models?library=diffusers&sort=downloads) for 16000+ checkpoints):
80+
Generating outputs is super easy with 🤗 Diffusers. To generate an image from text, use the `from_pretrained` method to load any pretrained diffusion model (browse the [Hub](https://huggingface.co/models?library=diffusers&sort=downloads) for 19000+ checkpoints):
8181

8282
```python
8383
from diffusers import DiffusionPipeline
@@ -219,7 +219,7 @@ Also, say 👋 in our public Discord channel <a href="https://discord.gg/G7tWnz9
219219
- https://github.com/deep-floyd/IF
220220
- https://github.com/bentoml/BentoML
221221
- https://github.com/bmaltais/kohya_ss
222-
- +7000 other amazing GitHub repositories 💪
222+
- +8000 other amazing GitHub repositories 💪
223223

224224
Thank you for using us ❤️.
225225

docs/source/en/api/models/unet-motion.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,4 +22,4 @@ The abstract from the paper is:
2222
[[autodoc]] UNetMotionModel
2323

2424
## UNet3DConditionOutput
25-
[[autodoc]] models.unet_3d_condition.UNet3DConditionOutput
25+
[[autodoc]] models.unets.unet_3d_condition.UNet3DConditionOutput

docs/source/en/api/models/unet.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,4 +22,4 @@ The abstract from the paper is:
2222
[[autodoc]] UNet1DModel
2323

2424
## UNet1DOutput
25-
[[autodoc]] models.unet_1d.UNet1DOutput
25+
[[autodoc]] models.unets.unet_1d.UNet1DOutput

docs/source/en/api/models/unet2d-cond.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,10 @@ The abstract from the paper is:
2222
[[autodoc]] UNet2DConditionModel
2323

2424
## UNet2DConditionOutput
25-
[[autodoc]] models.unet_2d_condition.UNet2DConditionOutput
25+
[[autodoc]] models.unets.unet_2d_condition.UNet2DConditionOutput
2626

2727
## FlaxUNet2DConditionModel
28-
[[autodoc]] models.unet_2d_condition_flax.FlaxUNet2DConditionModel
28+
[[autodoc]] models.unets.unet_2d_condition_flax.FlaxUNet2DConditionModel
2929

3030
## FlaxUNet2DConditionOutput
31-
[[autodoc]] models.unet_2d_condition_flax.FlaxUNet2DConditionOutput
31+
[[autodoc]] models.unets.unet_2d_condition_flax.FlaxUNet2DConditionOutput

docs/source/en/api/models/unet2d.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,4 +22,4 @@ The abstract from the paper is:
2222
[[autodoc]] UNet2DModel
2323

2424
## UNet2DOutput
25-
[[autodoc]] models.unet_2d.UNet2DOutput
25+
[[autodoc]] models.unets.unet_2d.UNet2DOutput

docs/source/en/api/models/unet3d-cond.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,4 +22,4 @@ The abstract from the paper is:
2222
[[autodoc]] UNet3DConditionModel
2323

2424
## UNet3DConditionOutput
25-
[[autodoc]] models.unet_3d_condition.UNet3DConditionOutput
25+
[[autodoc]] models.unets.unet_3d_condition.UNet3DConditionOutput

examples/community/pipeline_animatediff_controlnet.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
from diffusers.loaders import IPAdapterMixin, LoraLoaderMixin, TextualInversionLoaderMixin
2727
from diffusers.models import AutoencoderKL, ControlNetModel, UNet2DConditionModel, UNetMotionModel
2828
from diffusers.models.lora import adjust_lora_scale_text_encoder
29-
from diffusers.models.unet_motion_model import MotionAdapter
29+
from diffusers.models.unets.unet_motion_model import MotionAdapter
3030
from diffusers.pipelines.controlnet.multicontrolnet import MultiControlNetModel
3131
from diffusers.pipelines.pipeline_utils import DiffusionPipeline
3232
from diffusers.schedulers import (

examples/community/stable_diffusion_controlnet_reference.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from diffusers import StableDiffusionControlNetPipeline
99
from diffusers.models import ControlNetModel
1010
from diffusers.models.attention import BasicTransformerBlock
11-
from diffusers.models.unet_2d_blocks import CrossAttnDownBlock2D, CrossAttnUpBlock2D, DownBlock2D, UpBlock2D
11+
from diffusers.models.unets.unet_2d_blocks import CrossAttnDownBlock2D, CrossAttnUpBlock2D, DownBlock2D, UpBlock2D
1212
from diffusers.pipelines.controlnet.multicontrolnet import MultiControlNetModel
1313
from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput
1414
from diffusers.utils import logging

examples/community/stable_diffusion_reference.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
from diffusers import StableDiffusionPipeline
99
from diffusers.models.attention import BasicTransformerBlock
10-
from diffusers.models.unet_2d_blocks import CrossAttnDownBlock2D, CrossAttnUpBlock2D, DownBlock2D, UpBlock2D
10+
from diffusers.models.unets.unet_2d_blocks import CrossAttnDownBlock2D, CrossAttnUpBlock2D, DownBlock2D, UpBlock2D
1111
from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput
1212
from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion import rescale_noise_cfg
1313
from diffusers.utils import PIL_INTERPOLATION, logging

examples/community/stable_diffusion_xl_reference.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
from diffusers import StableDiffusionXLPipeline
1010
from diffusers.models.attention import BasicTransformerBlock
11-
from diffusers.models.unet_2d_blocks import (
11+
from diffusers.models.unets.unet_2d_blocks import (
1212
CrossAttnDownBlock2D,
1313
CrossAttnUpBlock2D,
1414
DownBlock2D,

examples/research_projects/controlnetxs/controlnetxs.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
from diffusers.models.autoencoders import AutoencoderKL
2727
from diffusers.models.lora import LoRACompatibleConv
2828
from diffusers.models.modeling_utils import ModelMixin
29-
from diffusers.models.unet_2d_blocks import (
29+
from diffusers.models.unets.unet_2d_blocks import (
3030
CrossAttnDownBlock2D,
3131
CrossAttnUpBlock2D,
3232
DownBlock2D,
@@ -36,7 +36,7 @@
3636
UpBlock2D,
3737
Upsample2D,
3838
)
39-
from diffusers.models.unet_2d_condition import UNet2DConditionModel
39+
from diffusers.models.unets.unet_2d_condition import UNet2DConditionModel
4040
from diffusers.utils import BaseOutput, logging
4141

4242

examples/research_projects/diffusion_dpo/train_diffusion_dpo_sdxl.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -740,6 +740,10 @@ def preprocess_train(examples):
740740
# Resize.
741741
combined_im = train_resize(combined_im)
742742

743+
# Flipping.
744+
if not args.no_flip and random.random() < 0.5:
745+
combined_im = train_flip(combined_im)
746+
743747
# Cropping.
744748
if not args.random_crop:
745749
y1 = max(0, int(round((combined_im.shape[1] - args.resolution) / 2.0)))
@@ -749,11 +753,6 @@ def preprocess_train(examples):
749753
y1, x1, h, w = train_crop.get_params(combined_im, (args.resolution, args.resolution))
750754
combined_im = crop(combined_im, y1, x1, h, w)
751755

752-
# Flipping.
753-
if random.random() < 0.5:
754-
x1 = combined_im.shape[2] - x1
755-
combined_im = train_flip(combined_im)
756-
757756
crop_top_left = (y1, x1)
758757
crop_top_lefts.append(crop_top_left)
759758
combined_im = normalize(combined_im)

examples/text_to_image/README_sdxl.md

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,66 @@ The above command will also run inference as fine-tuning progresses and log the
183183

184184
* SDXL's VAE is known to suffer from numerical instability issues. This is why we also expose a CLI argument namely `--pretrained_vae_model_name_or_path` that lets you specify the location of a better VAE (such as [this one](https://huggingface.co/madebyollin/sdxl-vae-fp16-fix)).
185185

186+
187+
### Using DeepSpeed
188+
Using DeepSpeed one can reduce the consumption of GPU memory, enabling the training of models on GPUs with smaller memory sizes. DeepSpeed is capable of offloading model parameters to the machine's memory, or it can distribute parameters, gradients, and optimizer states across multiple GPUs. This allows for the training of larger models under the same hardware configuration.
189+
190+
First, you need to use the `accelerate config` command to choose to use DeepSpeed, or manually use the accelerate config file to set up DeepSpeed.
191+
192+
Here is an example of a config file for using DeepSpeed. For more detailed explanations of the configuration, you can refer to this [link](https://huggingface.co/docs/accelerate/usage_guides/deepspeed).
193+
```yaml
194+
compute_environment: LOCAL_MACHINE
195+
debug: true
196+
deepspeed_config:
197+
gradient_accumulation_steps: 1
198+
gradient_clipping: 1.0
199+
offload_optimizer_device: none
200+
offload_param_device: none
201+
zero3_init_flag: false
202+
zero_stage: 2
203+
distributed_type: DEEPSPEED
204+
downcast_bf16: 'no'
205+
machine_rank: 0
206+
main_training_function: main
207+
mixed_precision: fp16
208+
num_machines: 1
209+
num_processes: 1
210+
rdzv_backend: static
211+
same_network: true
212+
tpu_env: []
213+
tpu_use_cluster: false
214+
tpu_use_sudo: false
215+
use_cpu: false
216+
```
217+
You need to save the mentioned configuration as an `accelerate_config.yaml` file. Then, you need to input the path of your `accelerate_config.yaml` file into the `ACCELERATE_CONFIG_FILE` parameter. This way you can use DeepSpeed to train your SDXL model in LoRA. Additionally, you can use DeepSpeed to train other SD models in this way.
218+
219+
```shell
220+
export MODEL_NAME="stabilityai/stable-diffusion-xl-base-1.0"
221+
export VAE_NAME="madebyollin/sdxl-vae-fp16-fix"
222+
export DATASET_NAME="lambdalabs/pokemon-blip-captions"
223+
export ACCELERATE_CONFIG_FILE="your accelerate_config.yaml"
224+
225+
accelerate launch --config_file $ACCELERATE_CONFIG_FILE train_text_to_image_lora_sdxl.py \
226+
--pretrained_model_name_or_path=$MODEL_NAME \
227+
--pretrained_vae_model_name_or_path=$VAE_NAME \
228+
--dataset_name=$DATASET_NAME --caption_column="text" \
229+
--resolution=1024 \
230+
--train_batch_size=1 \
231+
--num_train_epochs=2 \
232+
--checkpointing_steps=2 \
233+
--learning_rate=1e-04 \
234+
--lr_scheduler="constant" \
235+
--lr_warmup_steps=0 \
236+
--mixed_precision="fp16" \
237+
--max_train_steps=20 \
238+
--validation_epochs=20 \
239+
--seed=1234 \
240+
--output_dir="sd-pokemon-model-lora-sdxl" \
241+
--validation_prompt="cute dragon creature"
242+
243+
```
244+
245+
186246
### Finetuning the text encoder and UNet
187247

188248
The script also allows you to finetune the `text_encoder` along with the `unet`.

examples/text_to_image/train_text_to_image_lora_sdxl.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -652,21 +652,22 @@ def save_model_hook(models, weights, output_dir):
652652
text_encoder_two_lora_layers_to_save = None
653653

654654
for model in models:
655-
if isinstance(model, type(unwrap_model(unet))):
655+
if isinstance(unwrap_model(model), type(unwrap_model(unet))):
656656
unet_lora_layers_to_save = convert_state_dict_to_diffusers(get_peft_model_state_dict(model))
657-
elif isinstance(model, type(unwrap_model(text_encoder_one))):
657+
elif isinstance(unwrap_model(model), type(unwrap_model(text_encoder_one))):
658658
text_encoder_one_lora_layers_to_save = convert_state_dict_to_diffusers(
659659
get_peft_model_state_dict(model)
660660
)
661-
elif isinstance(model, type(unwrap_model(text_encoder_two))):
661+
elif isinstance(unwrap_model(model), type(unwrap_model(text_encoder_two))):
662662
text_encoder_two_lora_layers_to_save = convert_state_dict_to_diffusers(
663663
get_peft_model_state_dict(model)
664664
)
665665
else:
666666
raise ValueError(f"unexpected save model: {model.__class__}")
667667

668668
# make sure to pop weight so that corresponding model is not saved again
669-
weights.pop()
669+
if weights:
670+
weights.pop()
670671

671672
StableDiffusionXLPipeline.save_lora_weights(
672673
output_dir,

scripts/convert_amused.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
from diffusers import VQModel
1212
from diffusers.models.attention_processor import AttnProcessor
13-
from diffusers.models.uvit_2d import UVit2DModel
13+
from diffusers.models.unets.uvit_2d import UVit2DModel
1414
from diffusers.pipelines.amused.pipeline_amused import AmusedPipeline
1515
from diffusers.schedulers import AmusedScheduler
1616

scripts/convert_consistency_decoder.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from diffusers import AutoencoderKL, ConsistencyDecoderVAE, DiffusionPipeline, StableDiffusionPipeline, UNet2DModel
1515
from diffusers.models.autoencoders.vae import Encoder
1616
from diffusers.models.embeddings import TimestepEmbedding
17-
from diffusers.models.unet_2d_blocks import ResnetDownsampleBlock2D, ResnetUpsampleBlock2D, UNetMidBlock2D
17+
from diffusers.models.unets.unet_2d_blocks import ResnetDownsampleBlock2D, ResnetUpsampleBlock2D, UNetMidBlock2D
1818

1919

2020
args = ArgumentParser()

src/diffusers/__init__.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,7 @@
153153
"LCMScheduler",
154154
"PNDMScheduler",
155155
"RePaintScheduler",
156+
"SASolverScheduler",
156157
"SchedulerMixin",
157158
"ScoreSdeVeScheduler",
158159
"UnCLIPScheduler",
@@ -382,7 +383,7 @@
382383
else:
383384
_import_structure["models.controlnet_flax"] = ["FlaxControlNetModel"]
384385
_import_structure["models.modeling_flax_utils"] = ["FlaxModelMixin"]
385-
_import_structure["models.unet_2d_condition_flax"] = ["FlaxUNet2DConditionModel"]
386+
_import_structure["models.unets.unet_2d_condition_flax"] = ["FlaxUNet2DConditionModel"]
386387
_import_structure["models.vae_flax"] = ["FlaxAutoencoderKL"]
387388
_import_structure["pipelines"].extend(["FlaxDiffusionPipeline"])
388389
_import_structure["schedulers"].extend(
@@ -531,6 +532,7 @@
531532
LCMScheduler,
532533
PNDMScheduler,
533534
RePaintScheduler,
535+
SASolverScheduler,
534536
SchedulerMixin,
535537
ScoreSdeVeScheduler,
536538
UnCLIPScheduler,
@@ -711,7 +713,7 @@
711713
else:
712714
from .models.controlnet_flax import FlaxControlNetModel
713715
from .models.modeling_flax_utils import FlaxModelMixin
714-
from .models.unet_2d_condition_flax import FlaxUNet2DConditionModel
716+
from .models.unets.unet_2d_condition_flax import FlaxUNet2DConditionModel
715717
from .models.vae_flax import FlaxAutoencoderKL
716718
from .pipelines import FlaxDiffusionPipeline
717719
from .schedulers import (

src/diffusers/experimental/rl/value_guided_sampling.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
import torch
1717
import tqdm
1818

19-
from ...models.unet_1d import UNet1DModel
19+
from ...models.unets.unet_1d import UNet1DModel
2020
from ...pipelines import DiffusionPipeline
2121
from ...utils.dummy_pt_objects import DDPMScheduler
2222
from ...utils.torch_utils import randn_tensor

src/diffusers/models/__init__.py

Lines changed: 21 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -39,19 +39,19 @@
3939
_import_structure["t5_film_transformer"] = ["T5FilmDecoder"]
4040
_import_structure["transformer_2d"] = ["Transformer2DModel"]
4141
_import_structure["transformer_temporal"] = ["TransformerTemporalModel"]
42-
_import_structure["unet_1d"] = ["UNet1DModel"]
43-
_import_structure["unet_2d"] = ["UNet2DModel"]
44-
_import_structure["unet_2d_condition"] = ["UNet2DConditionModel"]
45-
_import_structure["unet_3d_condition"] = ["UNet3DConditionModel"]
46-
_import_structure["unet_kandinsky3"] = ["Kandinsky3UNet"]
47-
_import_structure["unet_motion_model"] = ["MotionAdapter", "UNetMotionModel"]
48-
_import_structure["unet_spatio_temporal_condition"] = ["UNetSpatioTemporalConditionModel"]
49-
_import_structure["uvit_2d"] = ["UVit2DModel"]
42+
_import_structure["unets.unet_1d"] = ["UNet1DModel"]
43+
_import_structure["unets.unet_2d"] = ["UNet2DModel"]
44+
_import_structure["unets.unet_2d_condition"] = ["UNet2DConditionModel"]
45+
_import_structure["unets.unet_3d_condition"] = ["UNet3DConditionModel"]
46+
_import_structure["unets.unet_kandinsky3"] = ["Kandinsky3UNet"]
47+
_import_structure["unets.unet_motion_model"] = ["MotionAdapter", "UNetMotionModel"]
48+
_import_structure["unets.unet_spatio_temporal_condition"] = ["UNetSpatioTemporalConditionModel"]
49+
_import_structure["unets.uvit_2d"] = ["UVit2DModel"]
5050
_import_structure["vq_model"] = ["VQModel"]
5151

5252
if is_flax_available():
5353
_import_structure["controlnet_flax"] = ["FlaxControlNetModel"]
54-
_import_structure["unet_2d_condition_flax"] = ["FlaxUNet2DConditionModel"]
54+
_import_structure["unets.unet_2d_condition_flax"] = ["FlaxUNet2DConditionModel"]
5555
_import_structure["vae_flax"] = ["FlaxAutoencoderKL"]
5656

5757

@@ -73,19 +73,22 @@
7373
from .t5_film_transformer import T5FilmDecoder
7474
from .transformer_2d import Transformer2DModel
7575
from .transformer_temporal import TransformerTemporalModel
76-
from .unet_1d import UNet1DModel
77-
from .unet_2d import UNet2DModel
78-
from .unet_2d_condition import UNet2DConditionModel
79-
from .unet_3d_condition import UNet3DConditionModel
80-
from .unet_kandinsky3 import Kandinsky3UNet
81-
from .unet_motion_model import MotionAdapter, UNetMotionModel
82-
from .unet_spatio_temporal_condition import UNetSpatioTemporalConditionModel
83-
from .uvit_2d import UVit2DModel
76+
from .unets import (
77+
Kandinsky3UNet,
78+
MotionAdapter,
79+
UNet1DModel,
80+
UNet2DConditionModel,
81+
UNet2DModel,
82+
UNet3DConditionModel,
83+
UNetMotionModel,
84+
UNetSpatioTemporalConditionModel,
85+
UVit2DModel,
86+
)
8487
from .vq_model import VQModel
8588

8689
if is_flax_available():
8790
from .controlnet_flax import FlaxControlNetModel
88-
from .unet_2d_condition_flax import FlaxUNet2DConditionModel
91+
from .unets import FlaxUNet2DConditionModel
8992
from .vae_flax import FlaxAutoencoderKL
9093

9194
else:

src/diffusers/models/autoencoders/autoencoder_kl.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ def disable_slicing(self):
157157
self.use_slicing = False
158158

159159
@property
160-
# Copied from diffusers.models.unet_2d_condition.UNet2DConditionModel.attn_processors
160+
# Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.attn_processors
161161
def attn_processors(self) -> Dict[str, AttentionProcessor]:
162162
r"""
163163
Returns:
@@ -181,7 +181,7 @@ def fn_recursive_add_processors(name: str, module: torch.nn.Module, processors:
181181

182182
return processors
183183

184-
# Copied from diffusers.models.unet_2d_condition.UNet2DConditionModel.set_attn_processor
184+
# Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.set_attn_processor
185185
def set_attn_processor(self, processor: Union[AttentionProcessor, Dict[str, AttentionProcessor]]):
186186
r"""
187187
Sets the attention processor to use to compute attention.
@@ -216,7 +216,7 @@ def fn_recursive_attn_processor(name: str, module: torch.nn.Module, processor):
216216
for name, module in self.named_children():
217217
fn_recursive_attn_processor(name, module, processor)
218218

219-
# Copied from diffusers.models.unet_2d_condition.UNet2DConditionModel.set_default_attn_processor
219+
# Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.set_default_attn_processor
220220
def set_default_attn_processor(self):
221221
"""
222222
Disables custom attention processors and sets the default attention implementation.
@@ -448,7 +448,7 @@ def forward(
448448

449449
return DecoderOutput(sample=dec)
450450

451-
# Copied from diffusers.models.unet_2d_condition.UNet2DConditionModel.fuse_qkv_projections
451+
# Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.fuse_qkv_projections
452452
def fuse_qkv_projections(self):
453453
"""
454454
Enables fused QKV projections. For self-attention modules, all projection matrices (i.e., query,
@@ -472,7 +472,7 @@ def fuse_qkv_projections(self):
472472
if isinstance(module, Attention):
473473
module.fuse_projections(fuse=True)
474474

475-
# Copied from diffusers.models.unet_2d_condition.UNet2DConditionModel.unfuse_qkv_projections
475+
# Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.unfuse_qkv_projections
476476
def unfuse_qkv_projections(self):
477477
"""Disables the fused QKV projection if enabled.
478478

0 commit comments

Comments
 (0)