updated doc for stable diffusion pipelines (huggingface#1770)

yiyixuxu · yiyixuxu · patrickvonplaten · web-flow · commit 1d53c56ee6d1 · 2023-01-02T11:35:51.000-10:00
* add a doc page for each pipeline under api/pipelines/stable_diffusion * add pipeline examples to docstrings * updated stable_diffusion_2 page * updated default markdown syntax to list methods based on huggingface#1870 * add function decorator Co-authored-by: yiyixuxu <yixu@Yis-MacBook-Pro.lan> Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com>
diff --git a/models/embeddings_flax.py b/models/embeddings_flax.py
@@ -27,6 +27,7 @@ def get_sinusoidal_embeddings(
     scale: float = 1.0,
 ) -> jnp.ndarray:
     """Returns the positional encoding (same as Tensor2Tensor).
+
     Args:
         timesteps: a 1-D Tensor of N indices, one per batch element.
         These may be fractional.
diff --git a/pipelines/alt_diffusion/pipeline_alt_diffusion.py b/pipelines/alt_diffusion/pipeline_alt_diffusion.py
@@ -31,14 +31,29 @@
     LMSDiscreteScheduler,
     PNDMScheduler,
 )
-from ...utils import deprecate, logging
+from ...utils import deprecate, logging, replace_example_docstring
 from ..pipeline_utils import DiffusionPipeline
 from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
 from . import AltDiffusionPipelineOutput, RobertaSeriesModelWithTransformation
 
 
 logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
 
+EXAMPLE_DOC_STRING = """
+    Examples:
+        ```py
+        >>> import torch
+        >>> from diffusers import AltDiffusionPipeline
+
+        >>> pipe = AltDiffusionPipeline.from_pretrained("BAAI/AltDiffusion-m9", torch_dtype=torch.float16)
+        >>> pipe = pipe.to("cuda")
+
+        >>> # "dark elf princess, highly detailed, d & d, fantasy, highly detailed, digital painting, trending on artstation, concept art, sharp focus, illustration, art by artgerm and greg rutkowski and fuji choko and viktoria gavrilenko and hoang lap"
+        >>> prompt = "黑暗精灵公主，非常详细，幻想，非常详细，数字绘画，概念艺术，敏锐的焦点，插图"
+        >>> image = pipe(prompt).images[0]
+        ```
+"""
+
 
 # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline with Stable->Alt, CLIPTextModel->RobertaSeriesModelWithTransformation, CLIPTokenizer->XLMRobertaTokenizer, AltDiffusionSafetyChecker->StableDiffusionSafetyChecker
 class AltDiffusionPipeline(DiffusionPipeline):
@@ -407,6 +422,7 @@ def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype
         return latents
 
     @torch.no_grad()
+    @replace_example_docstring(EXAMPLE_DOC_STRING)
     def __call__(
         self,
         prompt: Union[str, List[str]],
@@ -471,6 +487,8 @@ def __call__(
                 The frequency at which the `callback` function will be called. If not specified, the callback will be
                 called at every step.
 
+        Examples:
+
         Returns:
             [`~pipelines.stable_diffusion.AltDiffusionPipelineOutput`] or `tuple`:
             [`~pipelines.stable_diffusion.AltDiffusionPipelineOutput`] if `return_dict` is True, otherwise a `tuple.
diff --git a/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py b/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py
@@ -33,14 +33,43 @@
     LMSDiscreteScheduler,
     PNDMScheduler,
 )
-from ...utils import PIL_INTERPOLATION, deprecate, logging
+from ...utils import PIL_INTERPOLATION, deprecate, logging, replace_example_docstring
 from ..pipeline_utils import DiffusionPipeline
 from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
 from . import AltDiffusionPipelineOutput, RobertaSeriesModelWithTransformation
 
 
 logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
 
+EXAMPLE_DOC_STRING = """
+    Examples:
+        ```py
+        >>> import requests
+        >>> import torch
+        >>> from PIL import Image
+        >>> from io import BytesIO
+
+        >>> from diffusers import AltDiffusionImg2ImgPipeline
+
+        >>> device = "cuda"
+        >>> model_id_or_path = "BAAI/AltDiffusion-m9"
+        >>> pipe = AltDiffusionImg2ImgPipeline.from_pretrained(model_id_or_path, torch_dtype=torch.float16)
+        >>> pipe = pipe.to(device)
+
+        >>> url = "https://raw.githubusercontent.com/CompVis/stable-diffusion/main/assets/stable-samples/img2img/sketch-mountains-input.jpg"
+
+        >>> response = requests.get(url)
+        >>> init_image = Image.open(BytesIO(response.content)).convert("RGB")
+        >>> init_image = init_image.resize((768, 512))
+
+        >>> # "A fantasy landscape, trending on artstation"
+        >>> prompt = "幻想风景, artstation"
+
+        >>> images = pipe(prompt=prompt, image=init_image, strength=0.75, guidance_scale=7.5).images
+        >>> images[0].save("幻想风景.png")
+        ```
+"""
+
 
 # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.preprocess
 def preprocess(image):
@@ -450,6 +479,7 @@ def prepare_latents(self, image, timestep, batch_size, num_images_per_prompt, dt
         return latents
 
     @torch.no_grad()
+    @replace_example_docstring(EXAMPLE_DOC_STRING)
     def __call__(
         self,
         prompt: Union[str, List[str]],
@@ -514,6 +544,7 @@ def __call__(
             callback_steps (`int`, *optional*, defaults to 1):
                 The frequency at which the `callback` function will be called. If not specified, the callback will be
                 called at every step.
+        Examples:
 
         Returns:
             [`~pipelines.stable_diffusion.AltDiffusionPipelineOutput`] or `tuple`:
diff --git a/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/pipelines/stable_diffusion/pipeline_stable_diffusion.py
@@ -17,7 +17,6 @@
 
 import torch
 
-from diffusers.utils import is_accelerate_available
 from packaging import version
 from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer
 
@@ -31,14 +30,28 @@
     LMSDiscreteScheduler,
     PNDMScheduler,
 )
-from ...utils import deprecate, logging
+from ...utils import deprecate, is_accelerate_available, logging, replace_example_docstring
 from ..pipeline_utils import DiffusionPipeline
 from . import StableDiffusionPipelineOutput
 from .safety_checker import StableDiffusionSafetyChecker
 
 
 logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
 
+EXAMPLE_DOC_STRING = """
+    Examples:
+        ```py
+        >>> import torch
+        >>> from diffusers import StableDiffusionPipeline
+
+        >>> pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16)
+        >>> pipe = pipe.to("cuda")
+
+        >>> prompt = "a photo of an astronaut riding a horse on mars"
+        >>> image = pipe(prompt).images[0]
+        ```
+"""
+
 
 class StableDiffusionPipeline(DiffusionPipeline):
     r"""
@@ -406,6 +419,7 @@ def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype
         return latents
 
     @torch.no_grad()
+    @replace_example_docstring(EXAMPLE_DOC_STRING)
     def __call__(
         self,
         prompt: Union[str, List[str]],
@@ -470,6 +484,8 @@ def __call__(
                 The frequency at which the `callback` function will be called. If not specified, the callback will be
                 called at every step.
 
+        Examples:
+
         Returns:
             [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] or `tuple`:
             [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] if `return_dict` is True, otherwise a `tuple.
diff --git a/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py b/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py
@@ -505,6 +505,29 @@ def __call__(
                 The frequency at which the `callback` function will be called. If not specified, the callback will be
                 called at every step.
 
+        Examples:
+
+        ```py
+        >>> import torch
+        >>> import requests
+        >>> from PIL import Image
+
+        >>> from diffusers import StableDiffusionDepth2ImgPipeline
+
+        >>> pipe = StableDiffusionDepth2ImgPipeline.from_pretrained(
+        ...     "stabilityai/stable-diffusion-2-depth",
+        ...     torch_dtype=torch.float16,
+        ... )
+        >>> pipe.to("cuda")
+
+
+        >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+        >>> init_image = Image.open(requests.get(url, stream=True).raw)
+        >>> prompt = "two tigers"
+        >>> n_propmt = "bad, deformed, ugly, bad anotomy"
+        >>> image = pipe(prompt=prompt, image=init_image, negative_prompt=n_propmt, strength=0.7).images[0]
+        ```
+
         Returns:
             [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] or `tuple`:
             [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] if `return_dict` is True, otherwise a `tuple.
diff --git a/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py b/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py
@@ -19,7 +19,6 @@
 import torch
 
 import PIL
-from diffusers.utils import is_accelerate_available
 from packaging import version
 from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer
 
@@ -33,14 +32,42 @@
     LMSDiscreteScheduler,
     PNDMScheduler,
 )
-from ...utils import PIL_INTERPOLATION, deprecate, logging
+from ...utils import PIL_INTERPOLATION, deprecate, is_accelerate_available, logging, replace_example_docstring
 from ..pipeline_utils import DiffusionPipeline
 from . import StableDiffusionPipelineOutput
 from .safety_checker import StableDiffusionSafetyChecker
 
 
 logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
 
+EXAMPLE_DOC_STRING = """
+    Examples:
+        ```py
+        >>> import requests
+        >>> import torch
+        >>> from PIL import Image
+        >>> from io import BytesIO
+
+        >>> from diffusers import StableDiffusionImg2ImgPipeline
+
+        >>> device = "cuda"
+        >>> model_id_or_path = "runwayml/stable-diffusion-v1-5"
+        >>> pipe = StableDiffusionImg2ImgPipeline.from_pretrained(model_id_or_path, torch_dtype=torch.float16)
+        >>> pipe = pipe.to(device)
+
+        >>> url = "https://raw.githubusercontent.com/CompVis/stable-diffusion/main/assets/stable-samples/img2img/sketch-mountains-input.jpg"
+
+        >>> response = requests.get(url)
+        >>> init_image = Image.open(BytesIO(response.content)).convert("RGB")
+        >>> init_image = init_image.resize((768, 512))
+
+        >>> prompt = "A fantasy landscape, trending on artstation"
+
+        >>> images = pipe(prompt=prompt, image=init_image, strength=0.75, guidance_scale=7.5).images
+        >>> images[0].save("fantasy_landscape.png")
+        ```
+"""
+
 
 def preprocess(image):
     if isinstance(image, torch.Tensor):
@@ -455,6 +482,7 @@ def prepare_latents(self, image, timestep, batch_size, num_images_per_prompt, dt
         return latents
 
     @torch.no_grad()
+    @replace_example_docstring(EXAMPLE_DOC_STRING)
     def __call__(
         self,
         prompt: Union[str, List[str]],
@@ -519,6 +547,7 @@ def __call__(
             callback_steps (`int`, *optional*, defaults to 1):
                 The frequency at which the `callback` function will be called. If not specified, the callback will be
                 called at every step.
+        Examples:
 
         Returns:
             [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] or `tuple`:
diff --git a/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py b/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py
@@ -616,6 +616,37 @@ def __call__(
                 The frequency at which the `callback` function will be called. If not specified, the callback will be
                 called at every step.
 
+        Examples:
+
+        ```py
+        >>> import PIL
+        >>> import requests
+        >>> import torch
+        >>> from io import BytesIO
+
+        >>> from diffusers import StableDiffusionInpaintPipeline
+
+
+        >>> def download_image(url):
+        ...     response = requests.get(url)
+        ...     return PIL.Image.open(BytesIO(response.content)).convert("RGB")
+
+
+        >>> img_url = "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo.png"
+        >>> mask_url = "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo_mask.png"
+
+        >>> init_image = download_image(img_url).resize((512, 512))
+        >>> mask_image = download_image(mask_url).resize((512, 512))
+
+        >>> pipe = StableDiffusionInpaintPipeline.from_pretrained(
+        ...     "runwayml/stable-diffusion-inpainting", torch_dtype=torch.float16
+        ... )
+        >>> pipe = pipe.to("cuda")
+
+        >>> prompt = "Face of a yellow cat, high resolution, sitting on a park bench"
+        >>> image = pipe(prompt=prompt, image=init_image, mask_image=mask_image).images[0]
+        ```
+
         Returns:
             [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] or `tuple`:
             [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] if `return_dict` is True, otherwise a `tuple.
diff --git a/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py b/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py
@@ -390,6 +390,32 @@ def __call__(
                 The frequency at which the `callback` function will be called. If not specified, the callback will be
                 called at every step.
 
+        Examples:
+        ```py
+        >>> import requests
+        >>> from PIL import Image
+        >>> from io import BytesIO
+        >>> from diffusers import StableDiffusionUpscalePipeline
+        >>> import torch
+
+        >>> # load model and scheduler
+        >>> model_id = "stabilityai/stable-diffusion-x4-upscaler"
+        >>> pipeline = StableDiffusionUpscalePipeline.from_pretrained(
+        ...     model_id, revision="fp16", torch_dtype=torch.float16
+        ... )
+        >>> pipeline = pipeline.to("cuda")
+
+        >>> # let's download an  image
+        >>> url = "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd2-upscale/low_res_cat.png"
+        >>> response = requests.get(url)
+        >>> low_res_img = Image.open(BytesIO(response.content)).convert("RGB")
+        >>> low_res_img = low_res_img.resize((128, 128))
+        >>> prompt = "a white cat"
+
+        >>> upscaled_image = pipeline(prompt=prompt, image=low_res_img).images[0]
+        >>> upscaled_image.save("upsampled_cat.png")
+        ```
+
         Returns:
             [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] or `tuple`:
             [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] if `return_dict` is True, otherwise a `tuple.
diff --git a/training_utils.py b/training_utils.py
@@ -60,6 +60,7 @@ def __init__(
             to train for a million or more steps (reaches decay factor 0.999 at 31.6K steps, 0.9999 at 1M steps),
             gamma=1, power=3/4 for models you plan to train for less (reaches decay factor 0.999 at 10K steps, 0.9999
             at 215.4k steps).
+
         Args:
             inv_gamma (float): Inverse multiplicative factor of EMA warmup. Default: 1.
             power (float): Exponential factor of EMA warmup. Default: 2/3.
diff --git a/utils/__init__.py b/utils/__init__.py
@@ -32,6 +32,7 @@
     WEIGHTS_NAME,
 )
 from .deprecation_utils import deprecate
+from .doc_utils import replace_example_docstring
 from .dynamic_modules_utils import get_class_from_dynamic_module
 from .hub_utils import HF_HUB_OFFLINE, http_user_agent
 from .import_utils import (
diff --git a/utils/doc_utils.py b/utils/doc_utils.py
@@ -0,0 +1,38 @@
+# Copyright 2022 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Doc utilities: Utilities related to documentation
+"""
+import re
+
+
+def replace_example_docstring(example_docstring):
+    def docstring_decorator(fn):
+        func_doc = fn.__doc__
+        lines = func_doc.split("\n")
+        i = 0
+        while i < len(lines) and re.search(r"^\s*Examples?:\s*$", lines[i]) is None:
+            i += 1
+        if i < len(lines):
+            lines[i] = example_docstring
+            func_doc = "\n".join(lines)
+        else:
+            raise ValueError(
+                f"The function {fn} should have an empty 'Examples:' in its docstring as placeholder, "
+                f"current docstring is:\n{func_doc}"
+            )
+        fn.__doc__ = func_doc
+        return fn
+
+    return docstring_decorator

Original file line number	Diff line number	Diff line change
`@@ -32,6 +32,7 @@`
`32`	`32`	`WEIGHTS_NAME,`
`33`	`33`	`)`
`34`	`34`	`from .deprecation_utils import deprecate`
	`35`	`+from .doc_utils import replace_example_docstring`
`35`	`36`	`from .dynamic_modules_utils import get_class_from_dynamic_module`
`36`	`37`	`from .hub_utils import HF_HUB_OFFLINE, http_user_agent`
`37`	`38`	`from .import_utils import (`