Skip to content

Commit 6da1fdb

Browse files
PYTHON-5126 Resync bson vector spec tests following additions (#2161)
1 parent 38f97a3 commit 6da1fdb

File tree

5 files changed

+79
-9
lines changed

5 files changed

+79
-9
lines changed

bson/binary.py

+4
Original file line numberDiff line numberDiff line change
@@ -450,6 +450,10 @@ def from_vector(
450450
raise ValueError(f"padding does not apply to {dtype=}")
451451
elif dtype == BinaryVectorDtype.PACKED_BIT: # pack ints in [0, 255] as unsigned uint8
452452
format_str = "B"
453+
if 0 <= padding > 7:
454+
raise ValueError(f"{padding=}. It must be in [0,1, ..7].")
455+
if padding and not vector:
456+
raise ValueError("Empty vector with non-zero padding.")
453457
elif dtype == BinaryVectorDtype.FLOAT32: # pack floats as float32
454458
format_str = "f"
455459
if padding:

test/bson_binary_vector/float32.json

+25-2
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,15 @@
1111
"padding": 0,
1212
"canonical_bson": "1C00000005766563746F72000A0000000927000000FE420000E04000"
1313
},
14+
{
15+
"description": "Vector with decimals and negative value FLOAT32",
16+
"valid": true,
17+
"vector": [127.7, -7.7],
18+
"dtype_hex": "0x27",
19+
"dtype_alias": "FLOAT32",
20+
"padding": 0,
21+
"canonical_bson": "1C00000005766563746F72000A0000000927006666FF426666F6C000"
22+
},
1423
{
1524
"description": "Empty Vector FLOAT32",
1625
"valid": true,
@@ -35,8 +44,22 @@
3544
"vector": [127.0, 7.0],
3645
"dtype_hex": "0x27",
3746
"dtype_alias": "FLOAT32",
38-
"padding": 3
47+
"padding": 3,
48+
"canonical_bson": "1C00000005766563746F72000A0000000927030000FE420000E04000"
49+
},
50+
{
51+
"description": "Insufficient vector data with 3 bytes FLOAT32",
52+
"valid": false,
53+
"dtype_hex": "0x27",
54+
"dtype_alias": "FLOAT32",
55+
"canonical_bson": "1700000005766563746F7200050000000927002A2A2A00"
56+
},
57+
{
58+
"description": "Insufficient vector data with 5 bytes FLOAT32",
59+
"valid": false,
60+
"dtype_hex": "0x27",
61+
"dtype_alias": "FLOAT32",
62+
"canonical_bson": "1900000005766563746F7200070000000927002A2A2A2A2A00"
3963
}
4064
]
4165
}
42-

test/bson_binary_vector/int8.json

+2-2
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,8 @@
4242
"vector": [127, 7],
4343
"dtype_hex": "0x03",
4444
"dtype_alias": "INT8",
45-
"padding": 3
45+
"padding": 3,
46+
"canonical_bson": "1600000005766563746F7200040000000903037F0700"
4647
},
4748
{
4849
"description": "INT8 with float inputs",
@@ -54,4 +55,3 @@
5455
}
5556
]
5657
}
57-

test/bson_binary_vector/packed_bit.json

+34-1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,15 @@
22
"description": "Tests of Binary subtype 9, Vectors, with dtype PACKED_BIT",
33
"test_key": "vector",
44
"tests": [
5+
{
6+
"description": "Padding specified with no vector data PACKED_BIT",
7+
"valid": false,
8+
"vector": [],
9+
"dtype_hex": "0x10",
10+
"dtype_alias": "PACKED_BIT",
11+
"padding": 1,
12+
"canonical_bson": "1400000005766563746F72000200000009100100"
13+
},
514
{
615
"description": "Simple Vector PACKED_BIT",
716
"valid": true,
@@ -44,7 +53,31 @@
4453
"dtype_hex": "0x10",
4554
"dtype_alias": "PACKED_BIT",
4655
"padding": 0
56+
},
57+
{
58+
"description": "Vector with float values PACKED_BIT",
59+
"valid": false,
60+
"vector": [127.5],
61+
"dtype_hex": "0x10",
62+
"dtype_alias": "PACKED_BIT",
63+
"padding": 0
64+
},
65+
{
66+
"description": "Exceeding maximum padding PACKED_BIT",
67+
"valid": false,
68+
"vector": [1],
69+
"dtype_hex": "0x10",
70+
"dtype_alias": "PACKED_BIT",
71+
"padding": 8,
72+
"canonical_bson": "1500000005766563746F7200030000000910080100"
73+
},
74+
{
75+
"description": "Negative padding PACKED_BIT",
76+
"valid": false,
77+
"vector": [1],
78+
"dtype_hex": "0x10",
79+
"dtype_alias": "PACKED_BIT",
80+
"padding": -1
4781
}
4882
]
4983
}
50-

test/test_bson_binary_vector.py

+14-4
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ def create_test(case_spec):
4949
def run_test(self):
5050
for test_case in case_spec.get("tests", []):
5151
description = test_case["description"]
52-
vector_exp = test_case["vector"]
52+
vector_exp = test_case.get("vector", [])
5353
dtype_hex_exp = test_case["dtype_hex"]
5454
dtype_alias_exp = test_case.get("dtype_alias")
5555
padding_exp = test_case.get("padding", 0)
@@ -76,17 +76,27 @@ def run_test(self):
7676
self.assertEqual(
7777
vector_obs.dtype, BinaryVectorDtype[dtype_alias_exp], description
7878
)
79-
self.assertEqual(vector_obs.data, vector_exp, description)
80-
self.assertEqual(vector_obs.padding, padding_exp, description)
81-
79+
if dtype_exp in [BinaryVectorDtype.FLOAT32]:
80+
[
81+
self.assertAlmostEqual(vector_obs.data[i], vector_exp[i], delta=1e-5)
82+
for i in range(len(vector_exp))
83+
]
84+
else:
85+
self.assertEqual(vector_obs.data, vector_exp, description)
8286
# Test Binary Vector to BSON
8387
vector_exp = Binary.from_vector(vector_exp, dtype_exp, padding_exp)
8488
cB_obs = binascii.hexlify(encode({test_key: vector_exp})).decode().upper()
8589
self.assertEqual(cB_obs, canonical_bson_exp, description)
8690

8791
else:
8892
with self.assertRaises((struct.error, ValueError), msg=description):
93+
# Tests Binary.from_vector
8994
Binary.from_vector(vector_exp, dtype_exp, padding_exp)
95+
# Tests Binary.as_vector
96+
cB_exp = binascii.unhexlify(canonical_bson_exp.encode("utf8"))
97+
decoded_doc = decode(cB_exp)
98+
binary_obs = decoded_doc[test_key]
99+
binary_obs.as_vector()
90100

91101
return run_test
92102

0 commit comments

Comments
 (0)