Skip to content

chore: Re-test BF16 fixes on main, refactor test suite #3490

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 17 commits into
base: main
Choose a base branch
from
Open
9 changes: 8 additions & 1 deletion .github/workflows/build-test-linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,13 @@ jobs:
cd tests/py
python -m pip install -r requirements.txt
cd dynamo
python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dyn_models_export.xml --ir dynamo models/
python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_models.xml --ir dynamo models/test_models.py
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do these need to be separated?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Only issue is that people will forget to add their tests to the list here

python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_models_dynamic.xml --ir dynamo models/test_dyn_models.py
python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/engine_cache.xml --ir dynamo models/test_engine_cache.py
python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dtype_support.xml --ir dynamo models/test_dtype_support.py
python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/model_refit.xml --ir dynamo models/test_model_refit.py
python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/modelopt_models.xml --ir dynamo models/test_modelopt_models.py
python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/weight_stripped_engine.xml --ir dynamo models/test_weight_stripped_engine.py
popd

tests-py-dynamo-serde:
Expand Down Expand Up @@ -206,6 +212,7 @@ jobs:
cd dynamo
python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/export_serde_test_results.xml --ir dynamo models/test_export_serde.py
python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/reexport_test_results.xml --ir dynamo models/test_reexport.py
python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/export_kwargs_serde_test_results.xml --ir dynamo models/test_export_kwargs_serde.py
popd

tests-py-torch-compile-be:
Expand Down
2 changes: 1 addition & 1 deletion py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py
Original file line number Diff line number Diff line change
Expand Up @@ -898,7 +898,7 @@ def get_attr(self, target: str, args: Any, kwargs: Any) -> np.ndarray:
else:
constant_tensor = frozen_attr

return to_torch(constant_tensor)
return to_torch(constant_tensor)

def call_method(self, target: str, args: Any, kwargs: Any) -> Any:
assert isinstance(target, str)
Expand Down
67 changes: 33 additions & 34 deletions py/torch_tensorrt/dynamo/conversion/converter_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,10 +344,6 @@ def create_constant(
with unset_fake_temporarily():

torch_value = to_torch(value, dtype)
if torch_value is None:
raise ValueError(
f"Cannot convert tensor '{name}' to a TensorRT constant because its value is None."
)
if torch_value.dtype == torch.float64:
raise ValueError(
"TensorRT does not support float64 (double) precision. To resolve this, please set truncate_double=True in your compilation settings and re-run the model."
Expand Down Expand Up @@ -589,42 +585,45 @@ def to_numpy(
Returns:
A Numpy array or None, if the input was None.
"""
output = None

if value is None or isinstance(value, np.ndarray):
output = value
with unset_fake_temporarily():
output = None

elif isinstance(value, torch.Tensor):
if value.is_quantized:
value = value.dequantize()
elif value.dtype == torch.bfloat16:
# TODO: Remove when numpy has a BF16 type
_LOGGER.warning(
"Requested a conversion of bfloat16 tensor from torch to numpy which isn't supported. Casting this tensor to FP32 precision currently. Please use to_torch() API for better data representation",
)
value = value.to(torch.float)
if value is None or isinstance(value, np.ndarray):
output = value

output = value.cpu().detach().contiguous().numpy()
elif isinstance(value, torch.Tensor):
if value.is_quantized:
value = value.dequantize()
elif value.dtype == torch.bfloat16:
# TODO: Remove when numpy has a BF16 type
_LOGGER.warning(
"Requested a conversion of bfloat16 tensor from torch to numpy which isn't supported. Casting this tensor to FP32 precision currently. Please use to_torch() API for better data representation",
)
value = value.to(torch.float)

elif isinstance(value, int):
output = np.array([value], dtype=np.int32)
output = value.cpu().detach().contiguous().numpy()

elif isinstance(value, float):
output = np.array([value], dtype=np.float32)
elif isinstance(value, int):
output = np.array([value], dtype=np.int32)

elif isinstance(value, bool):
output = np.array([value], dtype=np.bool_)
elif isinstance(value, float):
output = np.array([value], dtype=np.float32)

if isinstance(output, np.ndarray) or output is None:
return (
output
if (dtype is None or output is None)
else output.astype(_enums.dtype._from(dtype).to(np.dtype, use_default=True))
)
else:
raise AssertionError(
f"to_numpy can only be called on None, bool, int, float, np.ndarray, or torch.Tensor, got: {value}"
)
elif isinstance(value, bool):
output = np.array([value], dtype=np.bool_)

if isinstance(output, np.ndarray) or output is None:
return (
output
if (dtype is None or output is None)
else output.astype(
_enums.dtype._from(dtype).to(np.dtype, use_default=True)
)
)
else:
raise AssertionError(
f"to_numpy can only be called on None, bool, int, float, np.ndarray, or torch.Tensor, got: {value}"
)


def to_torch(
Expand Down
8 changes: 1 addition & 7 deletions tests/py/dynamo/backend/test_backend_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,10 @@
from copy import deepcopy

import torch
import torch_tensorrt
from torch.testing._internal.common_utils import TestCase, run_tests
from torch_tensorrt.dynamo.partitioning import fast_partition

import torch_tensorrt

from ..testing_utilities import DECIMALS_OF_AGREEMENT, lower_graph_testing


Expand Down Expand Up @@ -51,7 +50,6 @@ def forward(self, x, y):
pass_through_build_failures=True,
torch_executed_ops={"torch.ops.aten.add.Tensor"},
use_python_runtime=False,
debug=True,
)
optimized_model_results = optimized_model(*inputs).detach().cpu()
torch_model_results = fx_graph(*inputs).detach().cpu()
Expand Down Expand Up @@ -132,7 +130,6 @@ def forward(self, x, y):
pass_through_build_failures=True,
torch_executed_ops={"torch.ops.aten.add.Tensor"},
use_python_runtime=False,
debug=True,
)
optimized_model_results = optimized_model(*inputs).detach().cpu()
torch_model_results = model(*inputs).detach().cpu()
Expand Down Expand Up @@ -177,7 +174,6 @@ def forward(self, x, y):
optimization_level=4,
version_compatible=True,
max_aux_streams=5,
debug=True,
)
optimized_model_results = optimized_model(*inputs).detach().cpu()
torch_model_results = fx_graph(*inputs).detach().cpu()
Expand Down Expand Up @@ -225,7 +221,6 @@ def forward(self, x, y):
min_block_size=1,
pass_through_build_failures=True,
truncate_double=True,
debug=True,
)
optimized_model_results = optimized_model(*inputs).detach().cpu()
torch_model_results = fx_graph(*inputs).detach().cpu()
Expand Down Expand Up @@ -298,7 +293,6 @@ def forward(self, x, y):
min_block_size=1,
pass_through_build_failures=True,
truncate_double=False,
debug=True,
torch_executed_ops={"torch.ops.aten.add.Tensor"},
)
optimized_model_results = optimized_model(*inputs).detach().cpu()
Expand Down
2 changes: 0 additions & 2 deletions tests/py/dynamo/conversion/harness.py
Original file line number Diff line number Diff line change
Expand Up @@ -415,7 +415,6 @@ def run_test(
compilation_settings = CompilationSettings(
enabled_precisions={dtype._from(precision)},
truncate_double=True,
debug=True,
immutable_weights=immutable_weights,
)

Expand Down Expand Up @@ -507,7 +506,6 @@ def run_test_compare_tensor_attributes_only(
compilation_settings = CompilationSettings(
enabled_precisions={dtype._from(precision)},
truncate_double=True,
debug=True,
immutable_weights=immutable_weights,
)

Expand Down
1 change: 0 additions & 1 deletion tests/py/dynamo/models/test_dtype_support.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,6 @@ def forward(self, x):
ir="torch_compile",
inputs=inputs,
enabled_precisions={torch.bfloat16},
debug=True,
min_block_size=1,
device=device,
cache_built_engines=False,
Expand Down
1 change: 0 additions & 1 deletion tests/py/dynamo/models/test_model_refit.py
Original file line number Diff line number Diff line change
Expand Up @@ -815,7 +815,6 @@ def forward(self, x):
exp_program,
tuple(inputs),
enabled_precisions={torch.float},
debug=True,
min_block_size=1,
immutable_weights=False,
)
Expand Down
117 changes: 117 additions & 0 deletions tests/py/dynamo/models/test_modelopt_models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
# type: ignore
import importlib
import platform
import unittest
from importlib import metadata

import pytest
import torch
import torch_tensorrt as torchtrt

from packaging.version import Version

assertions = unittest.TestCase()


@unittest.skipIf(
torch.cuda.get_device_capability() < (8, 9),
"FP8 quantization requires compute capability 8.9 or later",
)
@unittest.skipIf(
not importlib.util.find_spec("modelopt"),
"ModelOpt is required to run this test",
)
@pytest.mark.unit
def test_base_fp8():
import modelopt.torch.quantization as mtq
from modelopt.torch.quantization.utils import export_torch_mode

class SimpleNetwork(torch.nn.Module):
def __init__(self):
super(SimpleNetwork, self).__init__()
self.linear1 = torch.nn.Linear(in_features=10, out_features=5)
self.linear2 = torch.nn.Linear(in_features=5, out_features=1)

def forward(self, x):
x = self.linear1(x)
x = torch.nn.ReLU()(x)
x = self.linear2(x)
return x

def calibrate_loop(model):
"""Simple calibration function for testing."""
model(input_tensor)

input_tensor = torch.randn(1, 10).cuda()
model = SimpleNetwork().eval().cuda()

quant_cfg = mtq.FP8_DEFAULT_CFG
mtq.quantize(model, quant_cfg, forward_loop=calibrate_loop)
# model has FP8 qdq nodes at this point
output_pyt = model(input_tensor)

with torch.no_grad():
with export_torch_mode():
exp_program = torch.export.export(model, (input_tensor,), strict=False)
trt_model = torchtrt.dynamo.compile(
exp_program,
inputs=[input_tensor],
enabled_precisions={torch.float8_e4m3fn},
min_block_size=1,
cache_built_engines=False,
reuse_cached_engines=False,
)
outputs_trt = trt_model(input_tensor)
assert torch.allclose(output_pyt, outputs_trt, rtol=5e-3, atol=1e-2)


@unittest.skipIf(
platform.system() != "Linux"
or not importlib.util.find_spec("modelopt")
or Version(metadata.version("nvidia-modelopt")) < Version("0.27.0"),
"modelopt 0.17.0 or later is required, Int8 quantization is supported in modelopt since 0.17.0 or later for linux",
)
@pytest.mark.unit
def test_base_int8():
import modelopt.torch.quantization as mtq
from modelopt.torch.quantization.utils import export_torch_mode

class SimpleNetwork(torch.nn.Module):
def __init__(self):
super(SimpleNetwork, self).__init__()
self.linear1 = torch.nn.Linear(in_features=10, out_features=5)
self.linear2 = torch.nn.Linear(in_features=5, out_features=1)

def forward(self, x):
x = self.linear1(x)
x = torch.nn.ReLU()(x)
x = self.linear2(x)
return x

def calibrate_loop(model):
"""Simple calibration function for testing."""
model(input_tensor)

input_tensor = torch.randn(1, 10).cuda()
model = SimpleNetwork().eval().cuda()

quant_cfg = mtq.INT8_DEFAULT_CFG
mtq.quantize(model, quant_cfg, forward_loop=calibrate_loop)
# model has INT8 qdq nodes at this point
output_pyt = model(input_tensor)

with torchtrt.logging.debug(), torch.no_grad():
with export_torch_mode():
exp_program = torch.export.export(model, (input_tensor,), strict=False)
trt_model = torchtrt.dynamo.compile(
exp_program,
inputs=[input_tensor],
enabled_precisions={torch.int8},
min_block_size=1,
cache_built_engines=False,
reuse_cached_engines=False,
truncate_double=True,
debug=True,
)
outputs_trt = trt_model(input_tensor)
assert torch.allclose(output_pyt, outputs_trt, rtol=5e-3, atol=1e-2)
Loading
Loading