Update src/diffusers/image_processor.py

yiyixuxu · yiyixuxu · commit acf0d605faa2 · 2023-04-18T16:49:12.000-10:00
Update src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py

Co-authored-by: Patrick von Platen &lt;patrick.v.platen@gmail.com&gt;

update img2img
diff --git a/src/diffusers/image_processor.py b/src/diffusers/image_processor.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 import warnings
-from typing import Union
+from typing import Union, Optional, List
 
 import numpy as np
 import PIL
@@ -93,6 +93,13 @@ def normalize(images):
         Normalize an image array to [-1,1]
         """
         return 2.0 * images - 1.0
+    
+    @staticmethod
+    def denormalize(images):
+        """
+        Denormalize an image array to [0,1]
+        """
+        return (images / 2 + 0.5).clamp(0, 1)
 
     def resize(self, images: PIL.Image.Image) -> PIL.Image.Image:
         """
@@ -165,9 +172,14 @@ def preprocess(
 
     def postprocess(
         self,
-        image,
+        image: torch.FloatTensor,
         output_type: str = "pil",
-    ):
+        do_normalize: Optional[Union[List[bool], bool]] = None,
+    ):  
+        if not isinstance(image, torch.Tensor):
+            raise ValueError(
+                f"Input for postprocess is in incorrect format: {type(image)}.  we only support pytorch tensor"
+            )
         if output_type not in ["latent", "pt", "np", "pil"]:
             deprecation_message = (
                 f"the output_type {output_type} is outdated and has been set to `np`. Please make sure to set it to one of these instead: "
@@ -179,10 +191,12 @@ def postprocess(
         if output_type == "latent":
             return image
 
-        if self.config.do_normalize:
-            image = (image / 2 + 0.5).clamp(0, 1)
+        if not isinstance(do_normalize, list):
+            do_normalize = image.shape[0] * [do_normalize or self.config.do_normalize]
+        
+        image = torch.stack([self.denormalize(image[i]) if do_normalize[i] else image[i] for i in range(image.shape[0])])
 
-        if isinstance(image, torch.Tensor) and output_type == "pt":
+        if output_type == "pt":
             return image
 
         image = self.pt_to_numpy(image)
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
@@ -423,11 +423,14 @@ def _encode_prompt(
 
         return prompt_embeds
 
-    def run_safety_checker(self, image, device, dtype, output_type="pil"):
-        if self.safety_checker is None or output_type == "latent":
+    def run_safety_checker(self, image, device, dtype):
+        if self.safety_checker is None:
             has_nsfw_concept = False
         else:
-            feature_extractor_input = self.image_processor.postprocess(image, output_type="pil")
+            if torch.is_tensor(image):
+                feature_extractor_input = self.image_processor.postprocess(image, output_type="pil")
+            else:
+                feature_extractor_input = self.image_processor.numpy_to_pil(image)
             safety_checker_input = self.feature_extractor(feature_extractor_input, return_tensors="pt").to(device)
             image, has_nsfw_concept = self.safety_checker(
                 images=image, clip_input=safety_checker_input.pixel_values.to(dtype)
@@ -705,10 +708,12 @@ def __call__(
 
         if not output_type == "latent":
             image = self.vae.decode(latents / self.vae.config.scaling_factor).sample
-
-        image, has_nsfw_concept = self.run_safety_checker(image, device, prompt_embeds.dtype, output_type=output_type)
-
-        image = self.image_processor.postprocess(image, output_type=output_type)
+            image, has_nsfw_concept = self.run_safety_checker(image, device, prompt_embeds.dtype)
+        else:
+            has_nsfw_concept = False
+        
+        do_normalize = [not has_nsfw for has_nsfw in has_nsfw_concept] if isinstance(has_nsfw_concept, list) else not has_nsfw_concept
+        image = self.image_processor.postprocess(image, output_type=output_type, do_normalize=do_normalize)
 
         # Offload last model to CPU
         if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py
@@ -436,10 +436,13 @@ def _encode_prompt(
 
     # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.run_safety_checker
     def run_safety_checker(self, image, device, dtype, output_type="pil"):
-        if self.safety_checker is None or output_type == "latent":
+        if self.safety_checker is None:
             has_nsfw_concept = False
         else:
-            feature_extractor_input = self.image_processor.postprocess(image, output_type="pil")
+            if torch.is_tensor(image):
+                feature_extractor_input = self.image_processor.postprocess(image, output_type="pil")
+            else:
+                feature_extractor_input = self.image_processor.numpy_to_pil(image)
             safety_checker_input = self.feature_extractor(feature_extractor_input, return_tensors="pt").to(device)
             image, has_nsfw_concept = self.safety_checker(
                 images=image, clip_input=safety_checker_input.pixel_values.to(dtype)
@@ -744,10 +747,16 @@ def __call__(
 
         if not output_type == "latent":
             image = self.vae.decode(latents / self.vae.config.scaling_factor).sample
-
-        image, has_nsfw_concept = self.run_safety_checker(image, device, prompt_embeds.dtype, output_type=output_type)
-
-        image = self.image_processor.postprocess(image, output_type=output_type)
+            image, has_nsfw_concept = self.run_safety_checker(image, device, prompt_embeds.dtype)
+        else:
+            has_nsfw_concept = False
+        
+        do_normalize = (
+            [not has_nsfw for has_nsfw in has_nsfw_concept]
+            if isinstance(has_nsfw_concept, list)
+            else not has_nsfw_concept
+        )
+        image = self.image_processor.postprocess(image, output_type=output_type, do_normalize=do_normalize)
 
         # Offload last model to CPU
         if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None: