@@ -38,6 +38,9 @@ def from_single_file(cls, pretrained_model_link_or_path, **kwargs):
38
38
- A link to the `.ckpt` file (for example
39
39
`"https://huggingface.co/<repo_id>/blob/main/<path_to_file>.ckpt"`) on the Hub.
40
40
- A path to a *file* containing all pipeline weights.
41
+ config_file (`str`, *optional*):
42
+ Filepath to the configuration YAML file associated with the model. If not provided it will default to:
43
+ https://raw.githubusercontent.com/CompVis/stable-diffusion/main/configs/stable-diffusion/v1-inference.yaml
41
44
torch_dtype (`str` or `torch.dtype`, *optional*):
42
45
Override the default `torch.dtype` and load the model with another dtype. If `"auto"` is passed, the
43
46
dtype is automatically derived from the model's weights.
@@ -65,6 +68,13 @@ def from_single_file(cls, pretrained_model_link_or_path, **kwargs):
65
68
image_size (`int`, *optional*, defaults to 512):
66
69
The image size the model was trained on. Use 512 for all Stable Diffusion v1 models and the Stable
67
70
Diffusion v2 base model. Use 768 for Stable Diffusion v2.
71
+ scaling_factor (`float`, *optional*, defaults to 0.18215):
72
+ The component-wise standard deviation of the trained latent space computed using the first batch of the
73
+ training set. This is used to scale the latent space to have unit variance when training the diffusion
74
+ model. The latents are scaled with the formula `z = z * scaling_factor` before being passed to the
75
+ diffusion model. When decoding, the latents are scaled back to the original scale with the formula: `z
76
+ = 1 / scaling_factor * z`. For more details, refer to sections 4.3.2 and D.1 of the [High-Resolution
77
+ Image Synthesis with Latent Diffusion Models](https://arxiv.org/abs/2112.10752) paper.
68
78
use_safetensors (`bool`, *optional*, defaults to `None`):
69
79
If set to `None`, the safetensors weights are downloaded if they're available **and** if the
70
80
safetensors library is installed. If set to `True`, the model is forcibly loaded from safetensors
@@ -92,6 +102,7 @@ def from_single_file(cls, pretrained_model_link_or_path, **kwargs):
92
102
"""
93
103
94
104
original_config_file = kwargs .pop ("original_config_file" , None )
105
+ config_file = kwargs .pop ("config_file" , None )
95
106
resume_download = kwargs .pop ("resume_download" , False )
96
107
force_download = kwargs .pop ("force_download" , False )
97
108
proxies = kwargs .pop ("proxies" , None )
@@ -103,6 +114,13 @@ def from_single_file(cls, pretrained_model_link_or_path, **kwargs):
103
114
use_safetensors = kwargs .pop ("use_safetensors" , True )
104
115
105
116
class_name = cls .__name__
117
+
118
+ if (config_file is not None ) and (original_config_file is not None ):
119
+ raise ValueError (
120
+ "You cannot pass both `config_file` and `original_config_file` to `from_single_file`. Please use only one of these arguments."
121
+ )
122
+
123
+ original_config_file = original_config_file or config_file
106
124
original_config , checkpoint = fetch_ldm_config_and_checkpoint (
107
125
pretrained_model_link_or_path = pretrained_model_link_or_path ,
108
126
class_name = class_name ,
@@ -118,7 +136,10 @@ def from_single_file(cls, pretrained_model_link_or_path, **kwargs):
118
136
)
119
137
120
138
image_size = kwargs .pop ("image_size" , None )
121
- component = create_diffusers_vae_model_from_ldm (class_name , original_config , checkpoint , image_size = image_size )
139
+ scaling_factor = kwargs .pop ("scaling_factor" , None )
140
+ component = create_diffusers_vae_model_from_ldm (
141
+ class_name , original_config , checkpoint , image_size = image_size , scaling_factor = scaling_factor
142
+ )
122
143
vae = component ["vae" ]
123
144
if torch_dtype is not None :
124
145
vae = vae .to (torch_dtype )
0 commit comments