Description
Describe the bug
when trying to use IPAdapter with pipeline_animatediff_controlnet i get an error.
Reproduction
import torch
from diffusers.utils import load_image
from PIL import Image
from transformers import CLIPVisionModelWithProjection
from diffusers import ControlNetModel, AutoencoderKL, MotionAdapter, LCMScheduler
from diffusers import DiffusionPipeline
model_id = "frankjoshua/toonyou_beta6"
motion_adapter_id = MotionAdapter.from_pretrained("wangfuyun/AnimateLCM", torch_dtype=torch.float16)
controlnet_id = ControlNetModel.from_pretrained("lllyasviel/control_v11f1p_sd15_depth", torch_dtype=torch.float16)
vae_id = AutoencoderKL.from_pretrained("stabilityai/sd-vae-ft-mse", torch_dtype=torch.float16)
image_encoder_id = CLIPVisionModelWithProjection.from_pretrained("h94/IP-Adapter", subfolder="models/image_encoder", torch_dtype=torch.float16)
pipe = DiffusionPipeline.from_pretrained(
model_id,
vae=vae_id,
controlnet=controlnet_id,
motion_adapter=motion_adapter_id,
image_encoder=image_encoder_id,
custom_pipeline="pipeline_animatediff_controlnet",
torch_dtype=torch.float16)
pipe.load_lora_weights("wangfuyun/AnimateLCM", weight_name="AnimateLCM_sd15_t2v_lora.safetensors", adapter_name="lcm-lora")
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
pipe.load_ip_adapter("h94/IP-Adapter", subfolder="models", weight_name="ip-adapter_sd15.safetensors")
pipe.set_ip_adapter_scale(1.0)
pipe.enable_model_cpu_offload()
pipe.enable_vae_slicing()
#controlnet depth map sequence
conditioning = []
for x in range(1, 17):
conditioning.append(Image.open("debug/depth"+"."+str(x).zfill(4)+".png").resize((512, 512)))
ip_image=Image.open("debug/IPAdapter.jpg")
result = pipe(
prompt="a girl with pink hair",
negative_prompt="bad quality",
num_frames=16,
width=512,
height=512,
ip_adapter_image=ip_image,
conditioning_frames=conditioning,
controlnet_conditioning_scale=0.8,
num_inference_steps=20,
guidance_scale=7,
).frames[0]
for x in range(0, len(result)):
result[x].save("debug/res."+str(x+1).zfill(4)+".png")
Logs
Traceback (most recent call last):
File "debug1.py", line 36, in <module>
result = pipe(
File "/mnt/shared/foss-20/diffusers1/DIFF/lib/python3.8/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/mnt/shared/foss-20/diffusers1/cache/modules/diffusers_modules/git/pipeline_animatediff_controlnet.py", line 952, in __call__
image_embeds = self.prepare_ip_adapter_image_embeds(
File "/mnt/shared/foss-20/diffusers1/cache/modules/diffusers_modules/git/pipeline_animatediff_controlnet.py", line 407, in prepare_ip_adapter_image_embeds
single_image_embeds, single_negative_image_embeds = self.encode_image(
TypeError: encode_image() takes 4 positional arguments but 5 were given
System Info
diffusers
version: 0.27.0- Platform: Linux-5.15.0-94-generic-x86_64-with-glibc2.29
- Python version: 3.8.0
- PyTorch version (GPU?): 2.2.1+cu121 (True)
- Huggingface_hub version: 0.21.4
- Transformers version: 4.38.2
- Accelerate version: 0.27.2
- xFormers version: not installed
- Using GPU in script?:
- Using distributed or parallel set-up in script?:
Who can help?
No response