up

sayakpaul · sayakpaul · commit 210fa1e544cd · 2024-08-29T17:15:46.000+05:30
diff --git a/src/diffusers/models/modeling_utils.py b/src/diffusers/models/modeling_utils.py
@@ -529,6 +529,7 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
         low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT)
         variant = kwargs.pop("variant", None)
         use_safetensors = kwargs.pop("use_safetensors", None)
+        quantization_config = kwargs.pop("quantization_config", None)
 
         allow_pickle = False
         if use_safetensors is None:
@@ -624,6 +625,9 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
             **kwargs,
         )
 
+        # determine quantization config.
+        ##############################
+
         # Determine if we're loading from a directory of sharded checkpoints.
         is_sharded = False
         index_file = None
diff --git a/src/diffusers/quantizers/__init__.py b/src/diffusers/quantizers/__init__.py
@@ -27,12 +27,14 @@
 
 if is_torch_available():
     _import_structure["base"] = ["DiffusersQuantizer"]
+
     if is_bitsandbytes_available() and is_accelerate_available():
         _import_structure["bitsandbytes"] = [
             "set_module_quantized_tensor_to_device",
             "replace_with_bnb_linear",
             "dequantize_bnb_weight",
             "dequantize_and_replace",
+            "BitsAndBytesConfig"
         ]
 
 if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
@@ -46,6 +48,7 @@
                 replace_with_bnb_linear,
                 set_module_quantized_tensor_to_device,
             )
+            from .quantization_config import BitsAndBytesConfig
 
 else:
     import sys
diff --git a/src/diffusers/quantizers/auto.py b/src/diffusers/quantizers/auto.py
@@ -0,0 +1,133 @@
+
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import warnings
+from typing import Dict, Optional, Union
+
+from .bitsandbytes import BnB4BitDiffusersQuantizer, BnB8BitDiffusersQuantizer
+from .quantization_config import BitsAndBytesConfig, QuantizationConfigMixin, QuantizationMethod
+
+
+AUTO_QUANTIZER_MAPPING = {
+    "bitsandbytes_4bit": BnB4BitDiffusersQuantizer,
+    "bitsandbytes_8bit": BnB8BitDiffusersQuantizer,
+}
+
+AUTO_QUANTIZATION_CONFIG_MAPPING = {
+    "bitsandbytes_4bit": BitsAndBytesConfig,
+    "bitsandbytes_8bit": BitsAndBytesConfig,
+}
+
+
+class DiffusersAutoQuantizationConfig:
+    """
+    The Auto-HF quantization config class that takes care of automatically dispatching to the correct
+    quantization config given a quantization config stored in a dictionary.
+    """
+
+    @classmethod
+    def from_dict(cls, quantization_config_dict: Dict):
+        quant_method = quantization_config_dict.get("quant_method", None)
+        # We need a special care for bnb models to make sure everything is BC ..
+        if quantization_config_dict.get("load_in_8bit", False) or quantization_config_dict.get("load_in_4bit", False):
+            suffix = "_4bit" if quantization_config_dict.get("load_in_4bit", False) else "_8bit"
+            quant_method = QuantizationMethod.BITS_AND_BYTES + suffix
+        elif quant_method is None:
+            raise ValueError(
+                "The model's quantization config from the arguments has no `quant_method` attribute. Make sure that the model has been correctly quantized"
+            )
+
+        if quant_method not in AUTO_QUANTIZATION_CONFIG_MAPPING.keys():
+            raise ValueError(
+                f"Unknown quantization type, got {quant_method} - supported types are:"
+                f" {list(AUTO_QUANTIZER_MAPPING.keys())}"
+            )
+
+        target_cls = AUTO_QUANTIZATION_CONFIG_MAPPING[quant_method]
+        return target_cls.from_dict(quantization_config_dict)
+
+    @classmethod
+    def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
+        model_config = cls.load_config(pretrained_model_name_or_path, **kwargs)
+        if getattr(model_config, "quantization_config", None) is None:
+            raise ValueError(
+                f"Did not found a `quantization_config` in {pretrained_model_name_or_path}. Make sure that the model is correctly quantized."
+            )
+        quantization_config_dict = model_config.quantization_config
+        quantization_config = cls.from_dict(quantization_config_dict)
+        # Update with potential kwargs that are passed through from_pretrained.
+        quantization_config.update(kwargs)
+        return quantization_config
+
+
+class DiffusersAutoQuantizer:
+    """
+     The Auto-HF quantizer class that takes care of automatically instantiating to the correct
+    `HfQuantizer` given the `QuantizationConfig`.
+    """
+
+    @classmethod
+    def from_config(cls, quantization_config: Union[QuantizationConfigMixin, Dict], **kwargs):
+        # Convert it to a QuantizationConfig if the q_config is a dict
+        if isinstance(quantization_config, dict):
+            quantization_config = DiffusersAutoQuantizationConfig.from_dict(quantization_config)
+
+        quant_method = quantization_config.quant_method
+
+        # Again, we need a special care for bnb as we have a single quantization config
+        # class for both 4-bit and 8-bit quantization
+        if quant_method == QuantizationMethod.BITS_AND_BYTES:
+            if quantization_config.load_in_8bit:
+                quant_method += "_8bit"
+            else:
+                quant_method += "_4bit"
+
+        if quant_method not in AUTO_QUANTIZER_MAPPING.keys():
+            raise ValueError(
+                f"Unknown quantization type, got {quant_method} - supported types are:"
+                f" {list(AUTO_QUANTIZER_MAPPING.keys())}"
+            )
+
+        target_cls = AUTO_QUANTIZER_MAPPING[quant_method]
+        return target_cls(quantization_config, **kwargs)
+
+    @classmethod
+    def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
+        quantization_config = DiffusersAutoQuantizationConfig.from_pretrained(pretrained_model_name_or_path, **kwargs)
+        return cls.from_config(quantization_config)
+
+    @classmethod
+    def merge_quantization_configs(
+        cls,
+        quantization_config: Union[dict, QuantizationConfigMixin],
+        quantization_config_from_args: Optional[QuantizationConfigMixin],
+    ):
+        """
+        handles situations where both quantization_config from args and quantization_config from model config are present.
+        """
+        if quantization_config_from_args is not None:
+            warning_msg = (
+                "You passed `quantization_config` or equivalent parameters to `from_pretrained` but the model you're loading"
+                " already has a `quantization_config` attribute. The `quantization_config` from the model will be used."
+            )
+        else:
+            warning_msg = ""
+
+        if isinstance(quantization_config, dict):
+            quantization_config = DiffusersAutoQuantizationConfig.from_dict(quantization_config)
+
+        if warning_msg != "":
+            warnings.warn(warning_msg)
+
+        return quantization_config