@@ -57,6 +57,50 @@ prompt = "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k"
57
57
image = pipe(prompt=prompt).images[0]
58
58
```
59
59
60
+ ### Image-to-image
61
+
62
+ You can use SDXL as follows for * image-to-image* :
63
+
64
+ ``` py
65
+ import torch
66
+ from diffusers import StableDiffusionXLImg2ImgPipeline
67
+ from diffusers.utils import load_image
68
+
69
+ pipe = StableDiffusionXLImg2ImgPipeline.from_pretrained(
70
+ " stabilityai/stable-diffusion-xl-refiner-0.9" , torch_dtype = torch.float16, variant = " fp16" , use_safetensors = True
71
+ )
72
+ pipe = pipe.to(" cuda" )
73
+ url = " https://huggingface.co/datasets/patrickvonplaten/images/resolve/main/aa_xl/000000009.png"
74
+
75
+ init_image = load_image(url).convert(" RGB" )
76
+ prompt = " a photo of an astronaut riding a horse on mars"
77
+ image = pipe(prompt, image = init_image).images[0 ]
78
+ ```
79
+
80
+ ### Inpainting
81
+
82
+ You can use SDXL as follows for * inpainting*
83
+
84
+ ``` py
85
+ import torch
86
+ from diffusers import StableDiffusionXLInpaintPipeline
87
+ from diffusers.utils import load_image
88
+
89
+ pipe = StableDiffusionXLInpaintPipeline.from_pretrained(
90
+ " stabilityai/stable-diffusion-xl-base-0.9" , torch_dtype = torch.float16, variant = " fp16" , use_safetensors = True
91
+ )
92
+ pipe.to(" cuda" )
93
+
94
+ img_url = " https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo.png"
95
+ mask_url = " https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo_mask.png"
96
+
97
+ init_image = load_image(img_url).convert(" RGB" )
98
+ mask_image = load_image(mask_url).convert(" RGB" )
99
+
100
+ prompt = " A majestic tiger sitting on a bench"
101
+ image = pipe(prompt = prompt, image = init_image, mask_image = mask_image, num_inference_steps = 50 , strength = 0.80 ).images[0 ]
102
+ ```
103
+
60
104
### Refining the image output
61
105
62
106
In addition to the [ base model checkpoint] ( https://huggingface.co/stabilityai/stable-diffusion-xl-base-0.9 ) ,
@@ -183,24 +227,65 @@ image = refiner(prompt=prompt, image=image[None, :]).images[0]
183
227
| ---| ---|
184
228
| ![ ] ( https://huggingface.co/datasets/diffusers/docs-images/resolve/main/sd_xl/init_image.png ) | ![ ] ( https://huggingface.co/datasets/diffusers/docs-images/resolve/main/sd_xl/refined_image.png ) |
185
229
186
- ### Image-to-image
230
+ < Tip >
187
231
188
- ``` py
189
- import torch
190
- from diffusers import StableDiffusionXLImg2ImgPipeline
232
+ The refiner can also very well be used in an in-painting setting. To do so just make
233
+ sure you use the [ ` StableDiffusionXLInpaintPipeline ` ] classes as shown below
234
+
235
+ </Tip >
236
+
237
+ To use the refiner for inpainting in the Ensemble of Expert Denoisers setting you can do the following:
238
+
239
+ ``` py
240
+ from diffusers import StableDiffusionXLInpaintPipeline
191
241
from diffusers.utils import load_image
192
242
193
- pipe = StableDiffusionXLImg2ImgPipeline .from_pretrained(
194
- " stabilityai/stable-diffusion-xl-refiner -0.9" , torch_dtype = torch.float16
243
+ pipe = StableDiffusionXLInpaintPipeline .from_pretrained(
244
+ " stabilityai/stable-diffusion-xl-base -0.9" , torch_dtype = torch.float16, variant = " fp16 " , use_safetensors = True
195
245
)
196
- pipe = pipe.to(" cuda" )
197
- url = " https://huggingface.co/datasets/patrickvonplaten/images/resolve/main/aa_xl/000000009.png"
246
+ pipe.to(" cuda" )
198
247
199
- init_image = load_image(url).convert(" RGB" )
200
- prompt = " a photo of an astronaut riding a horse on mars"
201
- image = pipe(prompt, image = init_image).images[0 ]
248
+ refiner = StableDiffusionXLInpaintPipeline.from_pretrained(
249
+ " stabilityai/stable-diffusion-xl-refiner-0.9" ,
250
+ text_encoder_2 = pipe.text_encoder_2,
251
+ vae = pipe.vae,
252
+ torch_dtype = torch.float16,
253
+ use_safetensors = True ,
254
+ variant = " fp16" ,
255
+ )
256
+ refiner.to(" cuda" )
257
+
258
+ img_url = " https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo.png"
259
+ mask_url = " https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo_mask.png"
260
+
261
+ init_image = load_image(img_url).convert(" RGB" )
262
+ mask_image = load_image(mask_url).convert(" RGB" )
263
+
264
+ prompt = " A majestic tiger sitting on a bench"
265
+ num_inference_steps = 75
266
+ high_noise_frac = 0.7
267
+
268
+ image = pipe(
269
+ prompt = prompt,
270
+ image = init_image,
271
+ mask_image = mask_image,
272
+ num_inference_steps = num_inference_steps,
273
+ strength = 0.80 ,
274
+ denoising_start = high_noise_frac,
275
+ output_type = " latent" ,
276
+ ).images
277
+ image = refiner(
278
+ prompt = prompt,
279
+ image = image,
280
+ mask_image = mask_image,
281
+ num_inference_steps = num_inference_steps,
282
+ denoising_start = high_noise_frac,
283
+ ).images[0 ]
202
284
```
203
285
286
+ To use the refiner for inpainting in the standard SDE-style setting, simply remove ` denoising_end ` and ` denoising_start ` and choose a smaller
287
+ number of inference steps for the refiner.
288
+
204
289
### Loading single file checkpoints / original file format
205
290
206
291
By making use of [ ` ~diffusers.loaders.FromSingleFileMixin.from_single_file ` ] you can also load the
@@ -271,3 +356,9 @@ pip install xformers
271
356
[[ autodoc]] StableDiffusionXLImg2ImgPipeline
272
357
- all
273
358
- __ call__
359
+
360
+ ## StableDiffusionXLInpaintPipeline
361
+
362
+ [[ autodoc]] StableDiffusionXLInpaintPipeline
363
+ - all
364
+ - __ call__
0 commit comments