Skip to content

Rotated bboxes transforms #9084

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added test/assets/fakedata/draw_rotated_boxes.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
10 changes: 5 additions & 5 deletions test/common_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -444,13 +444,13 @@ def sample_position(values, max_value):
r_rad = r * torch.pi / 180.0
cos, sin = torch.cos(r_rad), torch.sin(r_rad)
x1, y1 = x, y
x3 = x1 + w * cos
y3 = y1 - w * sin
x2 = x3 + h * sin
y2 = y3 + h * cos
x2 = x1 + w * cos
y2 = y1 - w * sin
x3 = x2 + h * sin
y3 = y2 + h * cos
x4 = x1 + h * sin
y4 = y1 + h * cos
parts = (x1, y1, x3, y3, x2, y2, x4, y4)
parts = (x1, y1, x2, y2, x3, y3, x4, y4)
else:
raise ValueError(f"Format {format} is not supported")

Expand Down
98 changes: 90 additions & 8 deletions test/test_transforms_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -560,6 +560,78 @@ def affine_bounding_boxes(bounding_boxes):
)


def reference_affine_rotated_bounding_boxes_helper(bounding_boxes, *, affine_matrix, new_canvas_size=None, clamp=True):
format = bounding_boxes.format
canvas_size = new_canvas_size or bounding_boxes.canvas_size

def affine_rotated_bounding_boxes(bounding_boxes):
dtype = bounding_boxes.dtype
device = bounding_boxes.device

# Go to float before converting to prevent precision loss in case of CXCYWHR -> XYXYXYXY and W or H is 1
input_xyxyxyxy = F.convert_bounding_box_format(
bounding_boxes.to(dtype=torch.float64, device="cpu", copy=True),
old_format=format,
new_format=tv_tensors.BoundingBoxFormat.XYXYXYXY,
inplace=True,
)
x1, y1, x2, y2, x3, y3, x4, y4 = input_xyxyxyxy.squeeze(0).tolist()

points = np.array(
[
[x1, y1, 1.0],
[x2, y2, 1.0],
[x3, y3, 1.0],
[x4, y4, 1.0],
]
)
transformed_points = np.matmul(points, affine_matrix.astype(points.dtype).T)
output = torch.tensor(
[
float(transformed_points[1, 0]),
float(transformed_points[1, 1]),
float(transformed_points[0, 0]),
float(transformed_points[0, 1]),
float(transformed_points[3, 0]),
float(transformed_points[3, 1]),
float(transformed_points[2, 0]),
float(transformed_points[2, 1]),
]
)

output = F.convert_bounding_box_format(
output, old_format=tv_tensors.BoundingBoxFormat.XYXYXYXY, new_format=format
)

if clamp:
# It is important to clamp before casting, especially for CXCYWHR format, dtype=int64
output = F.clamp_bounding_boxes(
output,
format=format,
canvas_size=canvas_size,
)
else:
# We leave the bounding box as float32 so the caller gets the full precision to perform any additional
# operation
dtype = output.dtype

return output.to(dtype=dtype, device=device)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't we cast back to the input dtype unconditionally? In general the transforms should preserve the input dtype, but here's it's not clear that we are?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this test, we are creating an intermediate tensor and therefore make sure to cast it to the correct dtype.


return tv_tensors.BoundingBoxes(
torch.cat(
[
affine_rotated_bounding_boxes(b)
for b in bounding_boxes.reshape(
-1, 5 if format != tv_tensors.BoundingBoxFormat.XYXYXYXY else 8
).unbind()
],
dim=0,
).reshape(bounding_boxes.shape),
format=format,
canvas_size=canvas_size,
)


class TestResize:
INPUT_SIZE = (17, 11)
OUTPUT_SIZES = [17, [17], (17,), None, [12, 13], (12, 13)]
Expand Down Expand Up @@ -1012,7 +1084,7 @@ class TestHorizontalFlip:
def test_kernel_image(self, dtype, device):
check_kernel(F.horizontal_flip_image, make_image(dtype=dtype, device=device))

@pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
@pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
@pytest.mark.parametrize("dtype", [torch.float32, torch.int64])
@pytest.mark.parametrize("device", cpu_and_cuda())
def test_kernel_bounding_boxes(self, format, dtype, device):
Expand Down Expand Up @@ -1071,17 +1143,22 @@ def test_image_correctness(self, fn):

torch.testing.assert_close(actual, expected)

def _reference_horizontal_flip_bounding_boxes(self, bounding_boxes):
def _reference_horizontal_flip_bounding_boxes(self, bounding_boxes: tv_tensors.BoundingBoxes):
affine_matrix = np.array(
[
[-1, 0, bounding_boxes.canvas_size[1]],
[0, 1, 0],
],
)

return reference_affine_bounding_boxes_helper(bounding_boxes, affine_matrix=affine_matrix)
helper = (
reference_affine_rotated_bounding_boxes_helper
if tv_tensors.is_rotated_bounding_format(bounding_boxes.format)
else reference_affine_bounding_boxes_helper
)
return helper(bounding_boxes, affine_matrix=affine_matrix)

@pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
@pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
@pytest.mark.parametrize(
"fn", [F.horizontal_flip, transform_cls_to_functional(transforms.RandomHorizontalFlip, p=1)]
)
Expand Down Expand Up @@ -1464,7 +1541,7 @@ class TestVerticalFlip:
def test_kernel_image(self, dtype, device):
check_kernel(F.vertical_flip_image, make_image(dtype=dtype, device=device))

@pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
@pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
@pytest.mark.parametrize("dtype", [torch.float32, torch.int64])
@pytest.mark.parametrize("device", cpu_and_cuda())
def test_kernel_bounding_boxes(self, format, dtype, device):
Expand Down Expand Up @@ -1521,17 +1598,22 @@ def test_image_correctness(self, fn):

torch.testing.assert_close(actual, expected)

def _reference_vertical_flip_bounding_boxes(self, bounding_boxes):
def _reference_vertical_flip_bounding_boxes(self, bounding_boxes: tv_tensors.BoundingBoxes):
affine_matrix = np.array(
[
[1, 0, 0],
[0, -1, bounding_boxes.canvas_size[0]],
],
)

return reference_affine_bounding_boxes_helper(bounding_boxes, affine_matrix=affine_matrix)
helper = (
reference_affine_rotated_bounding_boxes_helper
if tv_tensors.is_rotated_bounding_format(bounding_boxes.format)
else reference_affine_bounding_boxes_helper
)
return helper(bounding_boxes, affine_matrix=affine_matrix)

@pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
@pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
@pytest.mark.parametrize("fn", [F.vertical_flip, transform_cls_to_functional(transforms.RandomVerticalFlip, p=1)])
def test_bounding_boxes_correctness(self, format, fn):
bounding_boxes = make_bounding_boxes(format=format)
Expand Down
28 changes: 28 additions & 0 deletions test/test_tv_tensors.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,34 @@ def test_bbox_instance(data, format):
assert bboxes.format == format


@pytest.mark.parametrize(
"format, is_rotated_expected",
[
("XYXY", False),
("XYWH", False),
("CXCYWH", False),
("XYXYXYXY", True),
("XYWHR", True),
("CXCYWHR", True),
(tv_tensors.BoundingBoxFormat.XYXY, False),
(tv_tensors.BoundingBoxFormat.XYWH, False),
(tv_tensors.BoundingBoxFormat.CXCYWH, False),
(tv_tensors.BoundingBoxFormat.XYXYXYXY, True),
(tv_tensors.BoundingBoxFormat.XYWHR, True),
(tv_tensors.BoundingBoxFormat.CXCYWHR, True),
],
)
@pytest.mark.parametrize("scripted", (False, True))
def test_bbox_format(format, is_rotated_expected, scripted):
if isinstance(format, str):
format = tv_tensors.BoundingBoxFormat[(format.upper())]

fn = tv_tensors.is_rotated_bounding_format
if scripted:
fn = torch.jit.script(fn)
assert fn(format) == is_rotated_expected


def test_bbox_dim_error():
data_3d = [[[1, 2, 3, 4]]]
with pytest.raises(ValueError, match="Expected a 1D or 2D tensor, got 3D"):
Expand Down
31 changes: 30 additions & 1 deletion test/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,25 @@
PILLOW_VERSION = tuple(int(x) for x in PILLOW_VERSION.split("."))

boxes = torch.tensor([[0, 0, 20, 20], [0, 0, 0, 0], [10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float)

rotated_boxes = torch.tensor(
[
[100, 150, 150, 150, 150, 250, 100, 250],
[200, 350, 250, 350, 250, 250, 200, 250],
[300, 200, 200, 200, 200, 250, 300, 250],
# Not really a rectangle, but it doesn't matter
[
100,
100,
200,
50,
290,
350,
200,
400,
],
],
dtype=torch.float,
)
keypoints = torch.tensor([[[10, 10], [5, 5], [2, 2]], [[20, 20], [30, 30], [3, 3]]], dtype=torch.float)


Expand Down Expand Up @@ -148,6 +166,17 @@ def test_draw_boxes_with_coloured_label_backgrounds():
assert_equal(result, expected)


@pytest.mark.skipif(PILLOW_VERSION < (10, 1), reason="The reference image is only valid for PIL >= 10.1")
def test_draw_rotated_boxes():
img = torch.full((3, 500, 500), 255, dtype=torch.uint8)
colors = ["blue", "yellow", (0, 255, 0), "black"]

result = utils.draw_bounding_boxes(img, rotated_boxes, colors=colors)
path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", "fakedata", "draw_rotated_boxes.png")
expected = torch.as_tensor(np.array(Image.open(path))).permute(2, 0, 1)
assert_equal(result, expected)


@pytest.mark.parametrize("fill", [True, False])
def test_draw_boxes_dtypes(fill):
img_uint8 = torch.full((3, 100, 100), 255, dtype=torch.uint8)
Expand Down
32 changes: 16 additions & 16 deletions torchvision/ops/_box_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,56 +130,56 @@ def _box_xywhr_to_cxcywhr(boxes: Tensor) -> Tensor:

def _box_xywhr_to_xyxyxyxy(boxes: Tensor) -> Tensor:
"""
Converts rotated bounding boxes from (x1, y1, w, h, r) format to (x1, y1, x3, y3, x2, y2, x4, y4) format.
Converts rotated bounding boxes from (x1, y1, w, h, r) format to (x1, y1, x2, y2, x3, y3, x4, y4) format.
(x1, y1) refer to top left of bounding box
(w, h) are width and height of the rotated bounding box
r is rotation angle w.r.t to the box center by :math:`|r|` degrees counter clock wise in the image plan

(x1, y1) refer to top left of rotated bounding box
(x3, y3) refer to top right of rotated bounding box
(x2, y2) refer to bottom right of rotated bounding box
(x2, y2) refer to top right of rotated bounding box
(x3, y3) refer to bottom right of rotated bounding box
(x4, y4) refer to bottom left ofrotated bounding box
Args:
boxes (Tensor[N, 5]): rotated boxes in (cx, cy, w, h, r) format which will be converted.

Returns:
boxes (Tensor(N, 8)): rotated boxes in (x1, y1, x3, y3, x2, y2, x4, y4) format.
boxes (Tensor(N, 8)): rotated boxes in (x1, y1, x2, y2, x3, y3, x4, y4) format.
"""
x1, y1, w, h, r = boxes.unbind(-1)
r_rad = r * torch.pi / 180.0
cos, sin = torch.cos(r_rad), torch.sin(r_rad)

x3 = x1 + w * cos
y3 = y1 - w * sin
x2 = x3 + h * sin
y2 = y3 + h * cos
x2 = x1 + w * cos
y2 = y1 - w * sin
x3 = x2 + h * sin
y3 = y2 + h * cos
x4 = x1 + h * sin
y4 = y1 + h * cos

return torch.stack((x1, y1, x3, y3, x2, y2, x4, y4), dim=-1)
return torch.stack((x1, y1, x2, y2, x3, y3, x4, y4), dim=-1)


def _box_xyxyxyxy_to_xywhr(boxes: Tensor) -> Tensor:
"""
Converts rotated bounding boxes from (x1, y1, x3, y3, x2, y2, x4, y4) format to (x1, y1, w, h, r) format.
Converts rotated bounding boxes from (x1, y1, x2, y2, x3, y3, x4, y4) format to (x1, y1, w, h, r) format.
(x1, y1) refer to top left of the rotated bounding box
(x3, y3) refer to bottom left of the rotated bounding box
(x2, y2) refer to bottom right of the rotated bounding box
(x2, y2) refer to bottom left of the rotated bounding box
(x3, y3) refer to bottom right of the rotated bounding box
(x4, y4) refer to top right of the rotated bounding box
(w, h) refers to width and height of rotated bounding box
r is rotation angle w.r.t to the box center by :math:`|r|` degrees counter clock wise in the image plan

Args:
boxes (Tensor(N, 8)): rotated boxes in (x1, y1, x3, y3, x2, y2, x4, y4) format.
boxes (Tensor(N, 8)): rotated boxes in (x1, y1, x2, y2, x3, y3, x4, y4) format.

Returns:
boxes (Tensor[N, 5]): rotated boxes in (x1, y1, w, h, r) format.
"""
x1, y1, x3, y3, x2, y2, x4, y4 = boxes.unbind(-1)
r_rad = torch.atan2(y1 - y3, x3 - x1)
x1, y1, x2, y2, x3, y3, x4, y4 = boxes.unbind(-1)
r_rad = torch.atan2(y1 - y2, x2 - x1)
r = r_rad * 180 / torch.pi

w = ((x3 - x1) ** 2 + (y1 - y3) ** 2).sqrt()
w = ((x2 - x1) ** 2 + (y1 - y2) ** 2).sqrt()
h = ((x3 - x2) ** 2 + (y3 - y2) ** 2).sqrt()

boxes = torch.stack((x1, y1, w, h, r), dim=-1)
Expand Down
4 changes: 2 additions & 2 deletions torchvision/ops/boxes.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,8 +209,8 @@ def box_convert(boxes: Tensor, in_fmt: str, out_fmt: str) -> Tensor:
being width and height.
r is rotation angle w.r.t to the box center by :math:`|r|` degrees counter clock wise in the image plan

``'xyxyxyxy'``: boxes are represented via corners, x1, y1 being top left, x2, y2 bottom right,
x3, y3 bottom left, and x4, y4 top right.
``'xyxyxyxy'``: boxes are represented via corners, x1, y1 being top left, x2, y2 top right,
x3, y3 bottom right, and x4, y4 bottom left.

Args:
boxes (Tensor[N, K]): boxes which will be converted. K is the number of coordinates (4 for unrotated bounding boxes, 5 or 8 for rotated bounding boxes)
Expand Down
Loading
Loading