@@ -240,16 +240,23 @@ rmm::device_buffer make_elements(InputIterator begin, InputIterator end)
240
240
* element in `[begin,end)` that evaluated to `true`.
241
241
*/
242
242
template <typename ValidityIterator>
243
- std::vector<bitmask_type> make_null_mask_vector (ValidityIterator begin, ValidityIterator end)
243
+ std::pair<std::vector<bitmask_type>, cudf::size_type> make_null_mask_vector (ValidityIterator begin,
244
+ ValidityIterator end)
244
245
{
245
246
auto const size = cudf::distance (begin, end);
246
247
auto const num_words = cudf::bitmask_allocation_size_bytes (size) / sizeof (bitmask_type);
247
248
248
- auto null_mask = std::vector<bitmask_type>(num_words, 0 );
249
- for (auto i = 0 ; i < size; ++i)
250
- if (*(begin + i)) set_bit_unsafe (null_mask.data (), i);
249
+ auto null_mask = std::vector<bitmask_type>(num_words, 0 );
250
+ auto null_count = cudf::size_type{0 };
251
+ for (auto i = 0 ; i < size; ++i) {
252
+ if (*(begin + i)) {
253
+ set_bit_unsafe (null_mask.data (), i);
254
+ } else {
255
+ ++null_count;
256
+ }
257
+ }
251
258
252
- return null_mask;
259
+ return { std::move ( null_mask), null_count} ;
253
260
}
254
261
255
262
/* *
@@ -266,12 +273,14 @@ std::vector<bitmask_type> make_null_mask_vector(ValidityIterator begin, Validity
266
273
* element in `[begin,end)` that evaluated to `true`.
267
274
*/
268
275
template <typename ValidityIterator>
269
- rmm::device_buffer make_null_mask (ValidityIterator begin, ValidityIterator end)
276
+ std::pair<rmm::device_buffer, cudf::size_type> make_null_mask (ValidityIterator begin,
277
+ ValidityIterator end)
270
278
{
271
- auto null_mask = make_null_mask_vector (begin, end);
272
- return rmm::device_buffer{null_mask.data (),
273
- null_mask.size () * sizeof (decltype (null_mask.front ())),
274
- cudf::get_default_stream ()};
279
+ auto [null_mask, null_count] = make_null_mask_vector (begin, end);
280
+ auto d_mask = rmm::device_buffer{null_mask.data (),
281
+ cudf::bitmask_allocation_size_bytes (cudf::distance (begin, end)),
282
+ cudf::get_default_stream ()};
283
+ return {std::move (d_mask), null_count};
275
284
}
276
285
277
286
/* *
@@ -319,10 +328,12 @@ class fixed_width_column_wrapper : public detail::column_wrapper {
319
328
fixed_width_column_wrapper () : column_wrapper{}
320
329
{
321
330
std::vector<ElementTo> empty;
322
- wrapped.reset (new cudf::column{
323
- cudf::data_type{cudf::type_to_id<ElementTo>()},
324
- 0 ,
325
- detail::make_elements<ElementTo, SourceElementT>(empty.begin (), empty.end ())});
331
+ wrapped.reset (
332
+ new cudf::column{cudf::data_type{cudf::type_to_id<ElementTo>()},
333
+ 0 ,
334
+ detail::make_elements<ElementTo, SourceElementT>(empty.begin (), empty.end ()),
335
+ rmm::device_buffer{},
336
+ 0 });
326
337
}
327
338
328
339
/* *
@@ -349,7 +360,9 @@ class fixed_width_column_wrapper : public detail::column_wrapper {
349
360
auto const size = cudf::distance (begin, end);
350
361
wrapped.reset (new cudf::column{cudf::data_type{cudf::type_to_id<ElementTo>()},
351
362
size,
352
- detail::make_elements<ElementTo, SourceElementT>(begin, end)});
363
+ detail::make_elements<ElementTo, SourceElementT>(begin, end),
364
+ rmm::device_buffer{},
365
+ 0 });
353
366
}
354
367
355
368
/* *
@@ -379,12 +392,13 @@ class fixed_width_column_wrapper : public detail::column_wrapper {
379
392
fixed_width_column_wrapper (InputIterator begin, InputIterator end, ValidityIterator v)
380
393
: column_wrapper{}
381
394
{
382
- auto const size = cudf::distance (begin, end);
395
+ auto const size = cudf::distance (begin, end);
396
+ auto [null_mask, null_count] = detail::make_null_mask (v, v + size);
383
397
wrapped.reset (new cudf::column{cudf::data_type{cudf::type_to_id<ElementTo>()},
384
398
size,
385
399
detail::make_elements<ElementTo, SourceElementT>(begin, end),
386
- detail::make_null_mask (v, v + size ),
387
- cudf::UNKNOWN_NULL_COUNT });
400
+ std::move (null_mask ),
401
+ null_count });
388
402
}
389
403
390
404
/* *
@@ -547,7 +561,9 @@ class fixed_point_column_wrapper : public detail::column_wrapper {
547
561
wrapped.reset (new cudf::column{
548
562
data_type,
549
563
size,
550
- rmm::device_buffer{elements.data (), size * sizeof (Rep), cudf::get_default_stream ()}});
564
+ rmm::device_buffer{elements.data (), size * sizeof (Rep), cudf::get_default_stream ()},
565
+ rmm::device_buffer{},
566
+ 0 });
551
567
}
552
568
553
569
/* *
@@ -603,17 +619,17 @@ class fixed_point_column_wrapper : public detail::column_wrapper {
603
619
{
604
620
CUDF_EXPECTS (numeric::is_supported_representation_type<Rep>(), " not valid representation type" );
605
621
606
- auto const size = cudf::distance (begin, end);
607
- auto const elements = thrust::host_vector<Rep>(begin, end);
608
- auto const id = type_to_id<numeric::fixed_point<Rep, numeric::Radix::BASE_10>>();
609
- auto const data_type = cudf::data_type{id, static_cast <int32_t >(scale)};
610
-
622
+ auto const size = cudf::distance (begin, end);
623
+ auto const elements = thrust::host_vector<Rep>(begin, end);
624
+ auto const id = type_to_id<numeric::fixed_point<Rep, numeric::Radix::BASE_10>>();
625
+ auto const data_type = cudf::data_type{id, static_cast <int32_t >(scale)};
626
+ auto [null_mask, null_count] = detail::make_null_mask (v, v + size);
611
627
wrapped.reset (new cudf::column{
612
628
data_type,
613
629
size,
614
630
rmm::device_buffer{elements.data (), size * sizeof (Rep), cudf::get_default_stream ()},
615
- detail::make_null_mask (v, v + size ),
616
- cudf::UNKNOWN_NULL_COUNT });
631
+ std::move (null_mask ),
632
+ null_count });
617
633
}
618
634
619
635
/* *
@@ -736,7 +752,7 @@ class strings_column_wrapper : public detail::column_wrapper {
736
752
chars, cudf::get_default_stream (), rmm::mr::get_current_device_resource ());
737
753
auto d_offsets = cudf::detail::make_device_uvector_sync (
738
754
offsets, cudf::get_default_stream (), rmm::mr::get_current_device_resource ());
739
- wrapped = cudf::make_strings_column (d_chars, d_offsets);
755
+ wrapped = cudf::make_strings_column (d_chars, d_offsets, {}, 0 );
740
756
}
741
757
742
758
/* *
@@ -771,16 +787,16 @@ class strings_column_wrapper : public detail::column_wrapper {
771
787
strings_column_wrapper (StringsIterator begin, StringsIterator end, ValidityIterator v)
772
788
: column_wrapper{}
773
789
{
774
- size_type num_strings = std::distance (begin, end);
775
- auto [chars, offsets] = detail::make_chars_and_offsets (begin, end, v);
776
- auto null_mask = detail::make_null_mask_vector (v, v + num_strings);
777
- auto d_chars = cudf::detail::make_device_uvector_sync (
790
+ size_type num_strings = std::distance (begin, end);
791
+ auto [chars, offsets] = detail::make_chars_and_offsets (begin, end, v);
792
+ auto [ null_mask, null_count] = detail::make_null_mask_vector (v, v + num_strings);
793
+ auto d_chars = cudf::detail::make_device_uvector_sync (
778
794
chars, cudf::get_default_stream (), rmm::mr::get_current_device_resource ());
779
795
auto d_offsets = cudf::detail::make_device_uvector_sync (
780
796
offsets, cudf::get_default_stream (), rmm::mr::get_current_device_resource ());
781
797
auto d_bitmask = cudf::detail::make_device_uvector_sync (
782
798
null_mask, cudf::get_default_stream (), rmm::mr::get_current_device_resource ());
783
- wrapped = cudf::make_strings_column (d_chars, d_offsets, d_bitmask);
799
+ wrapped = cudf::make_strings_column (d_chars, d_offsets, d_bitmask, null_count );
784
800
}
785
801
786
802
/* *
@@ -1579,14 +1595,14 @@ class lists_column_wrapper : public detail::column_wrapper {
1579
1595
// increment depth
1580
1596
depth = expected_depth + 1 ;
1581
1597
1598
+ auto [null_mask, null_count] = [&] {
1599
+ if (v.size () <= 0 ) return std::make_pair (rmm::device_buffer{}, cudf::size_type{0 });
1600
+ return cudf::test::detail::make_null_mask (v.begin (), v.end ());
1601
+ }();
1602
+
1582
1603
// construct the list column
1583
- wrapped =
1584
- make_lists_column (cols.size (),
1585
- std::move (offsets),
1586
- std::move (data),
1587
- v.size () <= 0 ? 0 : cudf::UNKNOWN_NULL_COUNT,
1588
- v.size () <= 0 ? rmm::device_buffer{}
1589
- : cudf::test::detail::make_null_mask (v.begin (), v.end ()));
1604
+ wrapped = make_lists_column (
1605
+ cols.size (), std::move (offsets), std::move (data), null_count, std::move (null_mask));
1590
1606
}
1591
1607
1592
1608
/* *
@@ -1668,7 +1684,7 @@ class lists_column_wrapper : public detail::column_wrapper {
1668
1684
std::make_unique<column>(lcv.offsets ()),
1669
1685
normalize_column (lists_column_view (col).child (),
1670
1686
lists_column_view (expected_hierarchy).child ()),
1671
- UNKNOWN_NULL_COUNT ,
1687
+ col. null_count () ,
1672
1688
copy_bitmask (col));
1673
1689
}
1674
1690
@@ -1843,12 +1859,13 @@ class structs_column_wrapper : public detail::column_wrapper {
1843
1859
CUDF_EXPECTS (validity.size () <= 0 || static_cast <size_type>(validity.size ()) == num_rows,
1844
1860
" Validity buffer must have as many elements as rows in the struct column." );
1845
1861
1862
+ auto [null_mask, null_count] = [&] {
1863
+ if (validity.size () <= 0 ) return std::make_pair (rmm::device_buffer{}, cudf::size_type{0 });
1864
+ return cudf::test::detail::make_null_mask (validity.begin (), validity.end ());
1865
+ }();
1866
+
1846
1867
wrapped = cudf::make_structs_column (
1847
- num_rows,
1848
- std::move (child_columns),
1849
- validity.size () <= 0 ? 0 : cudf::UNKNOWN_NULL_COUNT,
1850
- validity.size () <= 0 ? rmm::device_buffer{}
1851
- : detail::make_null_mask (validity.begin (), validity.end ()));
1868
+ num_rows, std::move (child_columns), null_count, std::move (null_mask));
1852
1869
}
1853
1870
1854
1871
template <typename V>
0 commit comments