@@ -29,7 +29,7 @@ Our work underscores the potential of larger UNet architectures in the first sta
29
29
30
30
Before you can use IF, you need to accept its usage conditions. To do so:
31
31
1. Make sure to have a [Hugging Face account](https://huggingface.co/join) and be logged in
32
- 2. Accept the license on the model card of [DeepFloyd/IF-I-IF -v1.0](https://huggingface.co/DeepFloyd/IF-I-IF -v1.0). Accepting the license on the stage I model card will auto accept for the other IF models.
32
+ 2. Accept the license on the model card of [DeepFloyd/IF-I-XL -v1.0](https://huggingface.co/DeepFloyd/IF-I-XL -v1.0). Accepting the license on the stage I model card will auto accept for the other IF models.
33
33
3. Make sure to login locally. Install `huggingface_hub`
34
34
```sh
35
35
pip install huggingface_hub --upgrade
@@ -62,7 +62,7 @@ The following sections give more in-detail examples of how to use IF. Specifical
62
62
63
63
**Available checkpoints**
64
64
- *Stage-1*
65
- - [DeepFloyd/IF-I-IF -v1.0](https://huggingface.co/DeepFloyd/IF-I-IF -v1.0)
65
+ - [DeepFloyd/IF-I-XL -v1.0](https://huggingface.co/DeepFloyd/IF-I-XL -v1.0)
66
66
- [DeepFloyd/IF-I-L-v1.0](https://huggingface.co/DeepFloyd/IF-I-L-v1.0)
67
67
- [DeepFloyd/IF-I-M-v1.0](https://huggingface.co/DeepFloyd/IF-I-M-v1.0)
68
68
@@ -90,7 +90,7 @@ from diffusers.utils import pt_to_pil
90
90
import torch
91
91
92
92
# stage 1
93
- stage_1 = DiffusionPipeline.from_pretrained("DeepFloyd/IF-I-IF -v1.0", variant = " fp16" , torch_dtype =torch.float16)
93
+ stage_1 = DiffusionPipeline.from_pretrained("DeepFloyd/IF-I-XL -v1.0", variant = " fp16" , torch_dtype =torch.float16)
94
94
stage_1.enable_model_cpu_offload()
95
95
96
96
# stage 2
@@ -162,7 +162,7 @@ original_image = Image.open(BytesIO(response.content)).convert("RGB")
162
162
original_image = original_image.resize((768, 512))
163
163
164
164
# stage 1
165
- stage_1 = IFImg2ImgPipeline.from_pretrained("DeepFloyd/IF-I-IF -v1.0", variant = " fp16" , torch_dtype =torch.float16)
165
+ stage_1 = IFImg2ImgPipeline.from_pretrained("DeepFloyd/IF-I-XL -v1.0", variant = " fp16" , torch_dtype =torch.float16)
166
166
stage_1.enable_model_cpu_offload()
167
167
168
168
# stage 2
@@ -244,7 +244,7 @@ mask_image = Image.open(BytesIO(response.content))
244
244
mask_image = mask_image
245
245
246
246
# stage 1
247
- stage_1 = IFInpaintingPipeline.from_pretrained("DeepFloyd/IF-I-IF -v1.0", variant = " fp16" , torch_dtype =torch.float16)
247
+ stage_1 = IFInpaintingPipeline.from_pretrained("DeepFloyd/IF-I-XL -v1.0", variant = " fp16" , torch_dtype =torch.float16)
248
248
stage_1.enable_model_cpu_offload()
249
249
250
250
# stage 2
@@ -305,7 +305,7 @@ In addition to being loaded with `from_pretrained`, Pipelines can also be loaded
305
305
```python
306
306
from diffusers import IFPipeline, IFSuperResolutionPipeline
307
307
308
- pipe_1 = IFPipeline.from_pretrained("DeepFloyd/IF-I-IF -v1.0")
308
+ pipe_1 = IFPipeline.from_pretrained("DeepFloyd/IF-I-XL -v1.0")
309
309
pipe_2 = IFSuperResolutionPipeline.from_pretrained("DeepFloyd/IF-II-L-v1.0")
310
310
311
311
@@ -326,7 +326,7 @@ pipe_2 = IFInpaintingSuperResolutionPipeline(**pipe_2.components)
326
326
The simplest optimization to run IF faster is to move all model components to the GPU.
327
327
328
328
```py
329
- pipe = DiffusionPipeline.from_pretrained("DeepFloyd/IF-I-IF -v1.0", variant = " fp16" , torch_dtype =torch.float16)
329
+ pipe = DiffusionPipeline.from_pretrained("DeepFloyd/IF-I-XL -v1.0", variant = " fp16" , torch_dtype =torch.float16)
330
330
pipe.to("cuda")
331
331
```
332
332
@@ -352,7 +352,7 @@ the input image which also determines how many steps to run in the denoising pro
352
352
A smaller number will vary the image less but run faster.
353
353
354
354
```py
355
- pipe = IFImg2ImgPipeline.from_pretrained("DeepFloyd/IF-I-IF -v1.0", variant = " fp16" , torch_dtype =torch.float16)
355
+ pipe = IFImg2ImgPipeline.from_pretrained("DeepFloyd/IF-I-XL -v1.0", variant = " fp16" , torch_dtype =torch.float16)
356
356
pipe.to("cuda")
357
357
358
358
image = pipe(image=image, prompt = " <prompt>" , strength =0.3).images
@@ -364,7 +364,7 @@ with IF and it might not give expected results.
364
364
```py
365
365
import torch
366
366
367
- pipe = DiffusionPipeline.from_pretrained("DeepFloyd/IF-I-IF -v1.0", variant = " fp16" , torch_dtype =torch.float16)
367
+ pipe = DiffusionPipeline.from_pretrained("DeepFloyd/IF-I-XL -v1.0", variant = " fp16" , torch_dtype =torch.float16)
368
368
pipe.to("cuda")
369
369
370
370
pipe.text_encoder = torch.compile(pipe.text_encoder)
@@ -378,14 +378,14 @@ When optimizing for GPU memory, we can use the standard diffusers cpu offloading
378
378
Either the model based CPU offloading,
379
379
380
380
```py
381
- pipe = DiffusionPipeline.from_pretrained("DeepFloyd/IF-I-IF -v1.0", variant = " fp16" , torch_dtype =torch.float16)
381
+ pipe = DiffusionPipeline.from_pretrained("DeepFloyd/IF-I-XL -v1.0", variant = " fp16" , torch_dtype =torch.float16)
382
382
pipe.enable_model_cpu_offload()
383
383
```
384
384
385
385
or the more aggressive layer based CPU offloading.
386
386
387
387
```py
388
- pipe = DiffusionPipeline.from_pretrained("DeepFloyd/IF-I-IF -v1.0", variant = " fp16" , torch_dtype =torch.float16)
388
+ pipe = DiffusionPipeline.from_pretrained("DeepFloyd/IF-I-XL -v1.0", variant = " fp16" , torch_dtype =torch.float16)
389
389
pipe.enable_sequential_cpu_offload()
390
390
```
391
391
@@ -395,13 +395,13 @@ Additionally, T5 can be loaded in 8bit precision
395
395
from transformers import T5EncoderModel
396
396
397
397
text_encoder = T5EncoderModel.from_pretrained(
398
- " DeepFloyd/IF-I-IF -v1.0" , subfolder = " text_encoder" , device_map = " auto" , load_in_8bit =True, variant = " 8bit"
398
+ " DeepFloyd/IF-I-XL -v1.0" , subfolder = " text_encoder" , device_map = " auto" , load_in_8bit =True, variant = " 8bit"
399
399
)
400
400
401
401
from diffusers import DiffusionPipeline
402
402
403
403
pipe = DiffusionPipeline.from_pretrained(
404
- " DeepFloyd/IF-I-IF -v1.0" ,
404
+ " DeepFloyd/IF-I-XL -v1.0" ,
405
405
text_encoder =text_encoder, # pass the previously instantiated 8bit text encoder
406
406
unet =None,
407
407
device_map = " auto" ,
@@ -422,13 +422,13 @@ from transformers import T5EncoderModel
422
422
from diffusers.utils import pt_to_pil
423
423
424
424
text_encoder = T5EncoderModel.from_pretrained(
425
- " DeepFloyd/IF-I-IF -v1.0" , subfolder = " text_encoder" , device_map = " auto" , load_in_8bit =True, variant = " 8bit"
425
+ " DeepFloyd/IF-I-XL -v1.0" , subfolder = " text_encoder" , device_map = " auto" , load_in_8bit =True, variant = " 8bit"
426
426
)
427
427
428
428
# text to image
429
429
430
430
pipe = DiffusionPipeline.from_pretrained(
431
- " DeepFloyd/IF-I-IF -v1.0" ,
431
+ " DeepFloyd/IF-I-XL -v1.0" ,
432
432
text_encoder =text_encoder, # pass the previously instantiated 8bit text encoder
433
433
unet =None,
434
434
device_map = " auto" ,
@@ -444,7 +444,7 @@ gc.collect()
444
444
torch.cuda.empty_cache()
445
445
446
446
pipe = IFPipeline.from_pretrained(
447
- " DeepFloyd/IF-I-IF -v1.0" , text_encoder =None, variant = " fp16" , torch_dtype =torch.float16, device_map = " auto"
447
+ " DeepFloyd/IF-I-XL -v1.0" , text_encoder =None, variant = " fp16" , torch_dtype =torch.float16, device_map = " auto"
448
448
)
449
449
450
450
generator = torch.Generator().manual_seed(0)
0 commit comments