diff --git a/src/core/kernels/kernels.cuh b/src/core/kernels/kernels.cuh index 84c5fda..b4d748c 100644 --- a/src/core/kernels/kernels.cuh +++ b/src/core/kernels/kernels.cuh @@ -946,41 +946,27 @@ reduce_scal(const cudaStream_t stream, const ReductionType rtype, const int3& st ERRCHK(tpb_reduce <= num_elems); ERRCHK(nx * ny * nz % 2 == 0); + // clang-format off if (rtype == RTYPE_MAX) { - kernel_filter - <<>>(vtxbuf, start, end, scratchpad); - kernel_reduce<<>>( - scratchpad, num_elems); - kernel_reduce_block - <<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result); - } - else if (rtype == RTYPE_MIN) { - kernel_filter - <<>>(vtxbuf, start, end, scratchpad); - kernel_reduce<<>>( - scratchpad, num_elems); - kernel_reduce_block - <<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result); - } - else if (rtype == RTYPE_RMS) { - kernel_filter - <<>>(vtxbuf, start, end, scratchpad); - kernel_reduce<<>>( - scratchpad, num_elems); - kernel_reduce_block - <<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result); - } - else if (rtype == RTYPE_RMS_EXP) { - kernel_filter - <<>>(vtxbuf, start, end, scratchpad); - kernel_reduce<<>>( - scratchpad, num_elems); - kernel_reduce_block - <<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result); - } - else { + kernel_filter<<>>(vtxbuf, start, end, scratchpad); + kernel_reduce<<>>(scratchpad, num_elems); + kernel_reduce_block<<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result); + } else if (rtype == RTYPE_MIN) { + kernel_filter<<>>(vtxbuf, start, end, scratchpad); + kernel_reduce<<>>(scratchpad, num_elems); + kernel_reduce_block<<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result); + } else if (rtype == RTYPE_RMS) { + kernel_filter<<>>(vtxbuf, start, end, scratchpad); + kernel_reduce<<>>(scratchpad, num_elems); + kernel_reduce_block<<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result); + } else if (rtype == RTYPE_RMS_EXP) { + kernel_filter<<>>(vtxbuf, start, end, scratchpad); + kernel_reduce<<>>(scratchpad, num_elems); + kernel_reduce_block<<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result); + } else { ERROR("Unrecognized rtype"); } + // clang-format on AcReal result; cudaMemcpy(&result, reduce_result, sizeof(AcReal), cudaMemcpyDeviceToHost); return result; @@ -1010,41 +996,27 @@ reduce_vec(const cudaStream_t stream, const ReductionType rtype, const int3& sta ERRCHK(tpb_reduce <= num_elems); ERRCHK(nx * ny * nz % 2 == 0); + // clang-format off if (rtype == RTYPE_MAX) { - kernel_filter_vec<<>>( - vtxbuf0, vtxbuf1, vtxbuf2, start, end, scratchpad); - kernel_reduce<<>>( - scratchpad, num_elems); - kernel_reduce_block - <<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result); - } - else if (rtype == RTYPE_MIN) { - kernel_filter_vec<<>>( - vtxbuf0, vtxbuf1, vtxbuf2, start, end, scratchpad); - kernel_reduce<<>>( - scratchpad, num_elems); - kernel_reduce_block - <<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result); - } - else if (rtype == RTYPE_RMS) { - kernel_filter_vec<<>>( - vtxbuf0, vtxbuf1, vtxbuf2, start, end, scratchpad); - kernel_reduce<<>>( - scratchpad, num_elems); - kernel_reduce_block - <<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result); - } - else if (rtype == RTYPE_RMS_EXP) { - kernel_filter_vec<<>>( - vtxbuf0, vtxbuf1, vtxbuf2, start, end, scratchpad); - kernel_reduce<<>>( - scratchpad, num_elems); - kernel_reduce_block - <<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result); - } - else { + kernel_filter_vec<<>>(vtxbuf0, vtxbuf1, vtxbuf2, start, end, scratchpad); + kernel_reduce<<>>(scratchpad, num_elems); + kernel_reduce_block<<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result); + } else if (rtype == RTYPE_MIN) { + kernel_filter_vec<<>>(vtxbuf0, vtxbuf1, vtxbuf2, start, end, scratchpad); + kernel_reduce<<>>(scratchpad, num_elems); + kernel_reduce_block<<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result); + } else if (rtype == RTYPE_RMS) { + kernel_filter_vec<<>>(vtxbuf0, vtxbuf1, vtxbuf2, start, end, scratchpad); + kernel_reduce<<>>(scratchpad, num_elems); + kernel_reduce_block<<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result); + } else if (rtype == RTYPE_RMS_EXP) { + kernel_filter_vec<<>>(vtxbuf0, vtxbuf1, vtxbuf2, start, end, scratchpad); + kernel_reduce<<>>(scratchpad, num_elems); + kernel_reduce_block<<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result); + } else { ERROR("Unrecognized rtype"); } + // clang-format on AcReal result; cudaMemcpy(&result, reduce_result, sizeof(AcReal), cudaMemcpyDeviceToHost); return result;