@@ -515,32 +515,32 @@ __device__ inline cuuint32_t __nvvm_get_smem_pointer(void *__ptr) {
515
515
#if !defined(__CUDA_ARCH__ ) || __CUDA_ARCH__ >= 800
516
516
__device__ inline unsigned __reduce_add_sync (unsigned __mask ,
517
517
unsigned __value ) {
518
- return __nvvm_redux_sync_add (__mask , __value );
518
+ return __nvvm_redux_sync_add (__value , __mask );
519
519
}
520
520
__device__ inline unsigned __reduce_min_sync (unsigned __mask ,
521
521
unsigned __value ) {
522
- return __nvvm_redux_sync_umin (__mask , __value );
522
+ return __nvvm_redux_sync_umin (__value , __mask );
523
523
}
524
524
__device__ inline unsigned __reduce_max_sync (unsigned __mask ,
525
525
unsigned __value ) {
526
- return __nvvm_redux_sync_umax (__mask , __value );
526
+ return __nvvm_redux_sync_umax (__value , __mask );
527
527
}
528
528
__device__ inline int __reduce_min_sync (unsigned __mask , int __value ) {
529
- return __nvvm_redux_sync_min (__mask , __value );
529
+ return __nvvm_redux_sync_min (__value , __mask );
530
530
}
531
531
__device__ inline int __reduce_max_sync (unsigned __mask , int __value ) {
532
- return __nvvm_redux_sync_max (__mask , __value );
532
+ return __nvvm_redux_sync_max (__value , __mask );
533
533
}
534
534
__device__ inline unsigned __reduce_or_sync (unsigned __mask , unsigned __value ) {
535
- return __nvvm_redux_sync_or (__mask , __value );
535
+ return __nvvm_redux_sync_or (__value , __mask );
536
536
}
537
537
__device__ inline unsigned __reduce_and_sync (unsigned __mask ,
538
538
unsigned __value ) {
539
- return __nvvm_redux_sync_and (__mask , __value );
539
+ return __nvvm_redux_sync_and (__value , __mask );
540
540
}
541
541
__device__ inline unsigned __reduce_xor_sync (unsigned __mask ,
542
542
unsigned __value ) {
543
- return __nvvm_redux_sync_xor (__mask , __value );
543
+ return __nvvm_redux_sync_xor (__value , __mask );
544
544
}
545
545
546
546
__device__ inline void __nv_memcpy_async_shared_global_4 (void * __dst ,
0 commit comments