Lines Matching defs:reduce_data

27 void gpu_regular_warp_reduce(void *reduce_data, ShuffleReductFnTy shflFct) {
29 shflFct(reduce_data, /*LaneId - not used= */ 0,
34 void gpu_irregular_warp_reduce(void *reduce_data, ShuffleReductFnTy shflFct,
41 shflFct(reduce_data, /*LaneId = */ tid, /*Offset=*/mask, /*AlgoVersion=*/1);
47 static uint32_t gpu_irregular_simd_reduce(void *reduce_data,
60 shflFct(reduce_data, /*LaneId =*/logical_lane_id,
66 static int32_t nvptx_parallel_reduce_nowait(void *reduce_data,
99 gpu_regular_warp_reduce(reduce_data, shflFct);
102 reduce_data, shflFct,
113 cpyFct(reduce_data, WarpsNeeded);
116 gpu_irregular_warp_reduce(reduce_data, shflFct, WarpsNeeded,
124 gpu_regular_warp_reduce(reduce_data, shflFct);
126 gpu_irregular_warp_reduce(reduce_data, shflFct,
133 return gpu_irregular_simd_reduce(reduce_data, shflFct);
145 cpyFct(reduce_data, WarpsNeeded);
149 gpu_irregular_warp_reduce(reduce_data, shflFct, WarpsNeeded,
173 void *reduce_data,
176 return nvptx_parallel_reduce_nowait(reduce_data, shflFct, cpyFct);
181 uint64_t reduce_data_size, void *reduce_data, ShuffleReductFnTy shflFct,
217 lgcpyFct(GlobalBuffer, ModBockId, reduce_data);
219 lgredFct(GlobalBuffer, ModBockId, reduce_data);
236 // reduce_data is global or shared so before being reduced within the
238 // local_reduce_data = reduce_data[i]
274 glcpyFct(GlobalBuffer, ThreadId, reduce_data);
276 glredFct(GlobalBuffer, i, reduce_data);
280 gpu_regular_warp_reduce(reduce_data, shflFct);
290 cpyFct(reduce_data, WarpsNeeded);
294 gpu_irregular_warp_reduce(reduce_data, shflFct, WarpsNeeded,