@@ -115,8 +115,11 @@ def test_audio_diffusion(self):
115
115
output = pipe (generator = generator , steps = 4 , return_dict = False )
116
116
image_from_tuple = output [0 ][0 ]
117
117
118
- assert audio .shape == (1 , (self .dummy_unet .sample_size [1 ] - 1 ) * mel .hop_length )
119
- assert image .height == self .dummy_unet .sample_size [0 ] and image .width == self .dummy_unet .sample_size [1 ]
118
+ assert audio .shape == (1 , (self .dummy_unet .config .sample_size [1 ] - 1 ) * mel .hop_length )
119
+ assert (
120
+ image .height == self .dummy_unet .config .sample_size [0 ]
121
+ and image .width == self .dummy_unet .config .sample_size [1 ]
122
+ )
120
123
image_slice = np .frombuffer (image .tobytes (), dtype = "uint8" )[:10 ]
121
124
image_from_tuple_slice = np .frombuffer (image_from_tuple .tobytes (), dtype = "uint8" )[:10 ]
122
125
expected_slice = np .array ([69 , 255 , 255 , 255 , 0 , 0 , 77 , 181 , 12 , 127 ])
@@ -133,14 +136,14 @@ def test_audio_diffusion(self):
133
136
pipe .set_progress_bar_config (disable = None )
134
137
135
138
np .random .seed (0 )
136
- raw_audio = np .random .uniform (- 1 , 1 , ((dummy_vqvae_and_unet [0 ].sample_size [1 ] - 1 ) * mel .hop_length ,))
139
+ raw_audio = np .random .uniform (- 1 , 1 , ((dummy_vqvae_and_unet [0 ].config . sample_size [1 ] - 1 ) * mel .hop_length ,))
137
140
generator = torch .Generator (device = device ).manual_seed (42 )
138
141
output = pipe (raw_audio = raw_audio , generator = generator , start_step = 5 , steps = 10 )
139
142
image = output .images [0 ]
140
143
141
144
assert (
142
- image .height == self .dummy_vqvae_and_unet [0 ].sample_size [0 ]
143
- and image .width == self .dummy_vqvae_and_unet [0 ].sample_size [1 ]
145
+ image .height == self .dummy_vqvae_and_unet [0 ].config . sample_size [0 ]
146
+ and image .width == self .dummy_vqvae_and_unet [0 ].config . sample_size [1 ]
144
147
)
145
148
image_slice = np .frombuffer (image .tobytes (), dtype = "uint8" )[:10 ]
146
149
expected_slice = np .array ([120 , 117 , 110 , 109 , 138 , 167 , 138 , 148 , 132 , 121 ])
@@ -183,8 +186,8 @@ def test_audio_diffusion(self):
183
186
audio = output .audios [0 ]
184
187
image = output .images [0 ]
185
188
186
- assert audio .shape == (1 , (pipe .unet .sample_size [1 ] - 1 ) * pipe .mel .hop_length )
187
- assert image .height == pipe .unet .sample_size [0 ] and image .width == pipe .unet .sample_size [1 ]
189
+ assert audio .shape == (1 , (pipe .unet .config . sample_size [1 ] - 1 ) * pipe .mel .hop_length )
190
+ assert image .height == pipe .unet .config . sample_size [0 ] and image .width == pipe .unet . config .sample_size [1 ]
188
191
image_slice = np .frombuffer (image .tobytes (), dtype = "uint8" )[:10 ]
189
192
expected_slice = np .array ([151 , 167 , 154 , 144 , 122 , 134 , 121 , 105 , 70 , 26 ])
190
193
0 commit comments