Skip to content

Rename Lumina(2)Text2ImgPipeline -> Lumina(2)Pipeline #10827

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Mar 13, 2025
14 changes: 7 additions & 7 deletions docs/source/en/api/pipelines/lumina.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,10 @@ Use [`torch.compile`](https://huggingface.co/docs/diffusers/main/en/tutorials/fa
First, load the pipeline:

```python
from diffusers import LuminaText2ImgPipeline
from diffusers import LuminaPipeline
import torch

pipeline = LuminaText2ImgPipeline.from_pretrained(
pipeline = LuminaPipeline.from_pretrained(
"Alpha-VLLM/Lumina-Next-SFT-diffusers", torch_dtype=torch.bfloat16
).to("cuda")
```
Expand All @@ -86,11 +86,11 @@ image = pipeline(prompt="Upper body of a young woman in a Victorian-era outfit w

Quantization helps reduce the memory requirements of very large models by storing model weights in a lower precision data type. However, quantization may have varying impact on video quality depending on the video model.

Refer to the [Quantization](../../quantization/overview) overview to learn more about supported quantization backends and selecting a quantization backend that supports your use case. The example below demonstrates how to load a quantized [`LuminaText2ImgPipeline`] for inference with bitsandbytes.
Refer to the [Quantization](../../quantization/overview) overview to learn more about supported quantization backends and selecting a quantization backend that supports your use case. The example below demonstrates how to load a quantized [`LuminaPipeline`] for inference with bitsandbytes.

```py
import torch
from diffusers import BitsAndBytesConfig as DiffusersBitsAndBytesConfig, Transformer2DModel, LuminaText2ImgPipeline
from diffusers import BitsAndBytesConfig as DiffusersBitsAndBytesConfig, Transformer2DModel, LuminaPipeline
from transformers import BitsAndBytesConfig as BitsAndBytesConfig, T5EncoderModel

quant_config = BitsAndBytesConfig(load_in_8bit=True)
Expand All @@ -109,7 +109,7 @@ transformer_8bit = Transformer2DModel.from_pretrained(
torch_dtype=torch.float16,
)

pipeline = LuminaText2ImgPipeline.from_pretrained(
pipeline = LuminaPipeline.from_pretrained(
"Alpha-VLLM/Lumina-Next-SFT-diffusers",
text_encoder=text_encoder_8bit,
transformer=transformer_8bit,
Expand All @@ -122,9 +122,9 @@ image = pipeline(prompt).images[0]
image.save("lumina.png")
```

## LuminaText2ImgPipeline
## LuminaPipeline

[[autodoc]] LuminaText2ImgPipeline
[[autodoc]] LuminaPipeline
- all
- __call__

12 changes: 6 additions & 6 deletions docs/source/en/api/pipelines/lumina2.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,14 @@ Single file loading for Lumina Image 2.0 is available for the `Lumina2Transforme

```python
import torch
from diffusers import Lumina2Transformer2DModel, Lumina2Text2ImgPipeline
from diffusers import Lumina2Transformer2DModel, Lumina2Pipeline

ckpt_path = "https://huggingface.co/Alpha-VLLM/Lumina-Image-2.0/blob/main/consolidated.00-of-01.pth"
transformer = Lumina2Transformer2DModel.from_single_file(
ckpt_path, torch_dtype=torch.bfloat16
)

pipe = Lumina2Text2ImgPipeline.from_pretrained(
pipe = Lumina2Pipeline.from_pretrained(
"Alpha-VLLM/Lumina-Image-2.0", transformer=transformer, torch_dtype=torch.bfloat16
)
pipe.enable_model_cpu_offload()
Expand All @@ -60,7 +60,7 @@ image.save("lumina-single-file.png")
GGUF Quantized checkpoints for the `Lumina2Transformer2DModel` can be loaded via `from_single_file` with the `GGUFQuantizationConfig`

```python
from diffusers import Lumina2Transformer2DModel, Lumina2Text2ImgPipeline, GGUFQuantizationConfig
from diffusers import Lumina2Transformer2DModel, Lumina2Pipeline, GGUFQuantizationConfig

ckpt_path = "https://huggingface.co/calcuis/lumina-gguf/blob/main/lumina2-q4_0.gguf"
transformer = Lumina2Transformer2DModel.from_single_file(
Expand All @@ -69,7 +69,7 @@ transformer = Lumina2Transformer2DModel.from_single_file(
torch_dtype=torch.bfloat16,
)

pipe = Lumina2Text2ImgPipeline.from_pretrained(
pipe = Lumina2Pipeline.from_pretrained(
"Alpha-VLLM/Lumina-Image-2.0", transformer=transformer, torch_dtype=torch.bfloat16
)
pipe.enable_model_cpu_offload()
Expand All @@ -80,8 +80,8 @@ image = pipe(
image.save("lumina-gguf.png")
```

## Lumina2Text2ImgPipeline
## Lumina2Pipeline

[[autodoc]] Lumina2Text2ImgPipeline
[[autodoc]] Lumina2Pipeline
- all
- __call__
4 changes: 2 additions & 2 deletions scripts/convert_lumina_to_diffusers.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from safetensors.torch import load_file
from transformers import AutoModel, AutoTokenizer

from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, LuminaNextDiT2DModel, LuminaText2ImgPipeline
from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, LuminaNextDiT2DModel, LuminaPipeline


def main(args):
Expand Down Expand Up @@ -115,7 +115,7 @@ def main(args):
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b")
text_encoder = AutoModel.from_pretrained("google/gemma-2b")

pipeline = LuminaText2ImgPipeline(
pipeline = LuminaPipeline(
tokenizer=tokenizer, text_encoder=text_encoder, transformer=transformer, vae=vae, scheduler=scheduler
)
pipeline.save_pretrained(args.dump_path)
Expand Down
4 changes: 4 additions & 0 deletions src/diffusers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,9 @@
"LEditsPPPipelineStableDiffusionXL",
"LTXImageToVideoPipeline",
"LTXPipeline",
"Lumina2Pipeline",
"Lumina2Text2ImgPipeline",
"LuminaPipeline",
"LuminaText2ImgPipeline",
"MarigoldDepthPipeline",
"MarigoldIntrinsicsPipeline",
Expand Down Expand Up @@ -859,7 +861,9 @@
LEditsPPPipelineStableDiffusionXL,
LTXImageToVideoPipeline,
LTXPipeline,
Lumina2Pipeline,
Lumina2Text2ImgPipeline,
LuminaPipeline,
LuminaText2ImgPipeline,
MarigoldDepthPipeline,
MarigoldIntrinsicsPipeline,
Expand Down
8 changes: 4 additions & 4 deletions src/diffusers/pipelines/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,8 +261,8 @@
)
_import_structure["latte"] = ["LattePipeline"]
_import_structure["ltx"] = ["LTXPipeline", "LTXImageToVideoPipeline"]
_import_structure["lumina"] = ["LuminaText2ImgPipeline"]
_import_structure["lumina2"] = ["Lumina2Text2ImgPipeline"]
_import_structure["lumina"] = ["LuminaPipeline", "LuminaText2ImgPipeline"]
_import_structure["lumina2"] = ["Lumina2Pipeline", "Lumina2Text2ImgPipeline"]
_import_structure["marigold"].extend(
[
"MarigoldDepthPipeline",
Expand Down Expand Up @@ -611,8 +611,8 @@
LEditsPPPipelineStableDiffusionXL,
)
from .ltx import LTXImageToVideoPipeline, LTXPipeline
from .lumina import LuminaText2ImgPipeline
from .lumina2 import Lumina2Text2ImgPipeline
from .lumina import LuminaPipeline, LuminaText2ImgPipeline
from .lumina2 import Lumina2Pipeline, Lumina2Text2ImgPipeline
from .marigold import (
MarigoldDepthPipeline,
MarigoldIntrinsicsPipeline,
Expand Down
8 changes: 4 additions & 4 deletions src/diffusers/pipelines/auto_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,8 @@
)
from .kandinsky3 import Kandinsky3Img2ImgPipeline, Kandinsky3Pipeline
from .latent_consistency_models import LatentConsistencyModelImg2ImgPipeline, LatentConsistencyModelPipeline
from .lumina import LuminaText2ImgPipeline
from .lumina2 import Lumina2Text2ImgPipeline
from .lumina import LuminaPipeline
from .lumina2 import Lumina2Pipeline
from .pag import (
HunyuanDiTPAGPipeline,
PixArtSigmaPAGPipeline,
Expand Down Expand Up @@ -141,8 +141,8 @@
("flux", FluxPipeline),
("flux-control", FluxControlPipeline),
("flux-controlnet", FluxControlNetPipeline),
("lumina", LuminaText2ImgPipeline),
("lumina2", Lumina2Text2ImgPipeline),
("lumina", LuminaPipeline),
("lumina2", Lumina2Pipeline),
("cogview3", CogView3PlusPipeline),
("cogview4", CogView4Pipeline),
]
Expand Down
4 changes: 2 additions & 2 deletions src/diffusers/pipelines/lumina/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
else:
_import_structure["pipeline_lumina"] = ["LuminaText2ImgPipeline"]
_import_structure["pipeline_lumina"] = ["LuminaPipeline", "LuminaText2ImgPipeline"]

if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
try:
Expand All @@ -32,7 +32,7 @@
except OptionalDependencyNotAvailable:
from ...utils.dummy_torch_and_transformers_objects import *
else:
from .pipeline_lumina import LuminaText2ImgPipeline
from .pipeline_lumina import LuminaPipeline, LuminaText2ImgPipeline

else:
import sys
Expand Down
29 changes: 24 additions & 5 deletions src/diffusers/pipelines/lumina/pipeline_lumina.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from ...schedulers import FlowMatchEulerDiscreteScheduler
from ...utils import (
BACKENDS_MAPPING,
deprecate,
is_bs4_available,
is_ftfy_available,
is_torch_xla_available,
Expand Down Expand Up @@ -60,11 +61,9 @@
Examples:
```py
>>> import torch
>>> from diffusers import LuminaText2ImgPipeline
>>> from diffusers import LuminaPipeline

>>> pipe = LuminaText2ImgPipeline.from_pretrained(
... "Alpha-VLLM/Lumina-Next-SFT-diffusers", torch_dtype=torch.bfloat16
... )
>>> pipe = LuminaPipeline.from_pretrained("Alpha-VLLM/Lumina-Next-SFT-diffusers", torch_dtype=torch.bfloat16)
>>> # Enable memory optimizations.
>>> pipe.enable_model_cpu_offload()

Expand Down Expand Up @@ -134,7 +133,7 @@ def retrieve_timesteps(
return timesteps, num_inference_steps


class LuminaText2ImgPipeline(DiffusionPipeline):
class LuminaPipeline(DiffusionPipeline):
r"""
Pipeline for text-to-image generation using Lumina-T2I.

Expand Down Expand Up @@ -932,3 +931,23 @@ def __call__(
return (image,)

return ImagePipelineOutput(images=image)


class LuminaText2ImgPipeline(LuminaPipeline):
def __init__(
self,
transformer: LuminaNextDiT2DModel,
scheduler: FlowMatchEulerDiscreteScheduler,
vae: AutoencoderKL,
text_encoder: GemmaPreTrainedModel,
tokenizer: Union[GemmaTokenizer, GemmaTokenizerFast],
):
deprecation_message = "`LuminaText2ImgPipeline` has been renamed to `LuminaPipeline` and will be removed in a future version. Please use `LuminaPipeline` instead."
deprecate("diffusers.pipelines.lumina.pipeline_lumina.LuminaText2ImgPipeline", "0.34", deprecation_message)
super().__init__(
transformer=transformer,
scheduler=scheduler,
vae=vae,
text_encoder=text_encoder,
tokenizer=tokenizer,
)
4 changes: 2 additions & 2 deletions src/diffusers/pipelines/lumina2/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
else:
_import_structure["pipeline_lumina2"] = ["Lumina2Text2ImgPipeline"]
_import_structure["pipeline_lumina2"] = ["Lumina2Pipeline", "Lumina2Text2ImgPipeline"]

if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
try:
Expand All @@ -32,7 +32,7 @@
except OptionalDependencyNotAvailable:
from ...utils.dummy_torch_and_transformers_objects import *
else:
from .pipeline_lumina2 import Lumina2Text2ImgPipeline
from .pipeline_lumina2 import Lumina2Pipeline, Lumina2Text2ImgPipeline

else:
import sys
Expand Down
27 changes: 24 additions & 3 deletions src/diffusers/pipelines/lumina2/pipeline_lumina2.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from ...models.transformers.transformer_lumina2 import Lumina2Transformer2DModel
from ...schedulers import FlowMatchEulerDiscreteScheduler
from ...utils import (
deprecate,
is_torch_xla_available,
logging,
replace_example_docstring,
Expand All @@ -47,9 +48,9 @@
Examples:
```py
>>> import torch
>>> from diffusers import Lumina2Text2ImgPipeline
>>> from diffusers import Lumina2Pipeline

>>> pipe = Lumina2Text2ImgPipeline.from_pretrained("Alpha-VLLM/Lumina-Image-2.0", torch_dtype=torch.bfloat16)
>>> pipe = Lumina2Pipeline.from_pretrained("Alpha-VLLM/Lumina-Image-2.0", torch_dtype=torch.bfloat16)
>>> # Enable memory optimizations.
>>> pipe.enable_model_cpu_offload()

Expand Down Expand Up @@ -133,7 +134,7 @@ def retrieve_timesteps(
return timesteps, num_inference_steps


class Lumina2Text2ImgPipeline(DiffusionPipeline, Lumina2LoraLoaderMixin):
class Lumina2Pipeline(DiffusionPipeline, Lumina2LoraLoaderMixin):
r"""
Pipeline for text-to-image generation using Lumina-T2I.

Expand Down Expand Up @@ -767,3 +768,23 @@ def __call__(
return (image,)

return ImagePipelineOutput(images=image)


class Lumina2Text2ImgPipeline(Lumina2Pipeline):
def __init__(
self,
transformer: Lumina2Transformer2DModel,
scheduler: FlowMatchEulerDiscreteScheduler,
vae: AutoencoderKL,
text_encoder: Gemma2PreTrainedModel,
tokenizer: Union[GemmaTokenizer, GemmaTokenizerFast],
):
deprecation_message = "`Lumina2Text2ImgPipeline` has been renamed to `Lumina2Pipeline` and will be removed in a future version. Please use `Lumina2Pipeline` instead."
deprecate("diffusers.pipelines.lumina2.pipeline_lumina2.Lumina2Text2ImgPipeline", "0.34", deprecation_message)
super().__init__(
transformer=transformer,
scheduler=scheduler,
vae=vae,
text_encoder=text_encoder,
tokenizer=tokenizer,
)
30 changes: 30 additions & 0 deletions src/diffusers/utils/dummy_torch_and_transformers_objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -1217,6 +1217,21 @@ def from_pretrained(cls, *args, **kwargs):
requires_backends(cls, ["torch", "transformers"])


class Lumina2Pipeline(metaclass=DummyObject):
_backends = ["torch", "transformers"]

def __init__(self, *args, **kwargs):
requires_backends(self, ["torch", "transformers"])

@classmethod
def from_config(cls, *args, **kwargs):
requires_backends(cls, ["torch", "transformers"])

@classmethod
def from_pretrained(cls, *args, **kwargs):
requires_backends(cls, ["torch", "transformers"])


class Lumina2Text2ImgPipeline(metaclass=DummyObject):
_backends = ["torch", "transformers"]

Expand All @@ -1232,6 +1247,21 @@ def from_pretrained(cls, *args, **kwargs):
requires_backends(cls, ["torch", "transformers"])


class LuminaPipeline(metaclass=DummyObject):
_backends = ["torch", "transformers"]

def __init__(self, *args, **kwargs):
requires_backends(self, ["torch", "transformers"])

@classmethod
def from_config(cls, *args, **kwargs):
requires_backends(cls, ["torch", "transformers"])

@classmethod
def from_pretrained(cls, *args, **kwargs):
requires_backends(cls, ["torch", "transformers"])


class LuminaText2ImgPipeline(metaclass=DummyObject):
_backends = ["torch", "transformers"]

Expand Down
Loading
Loading