From 69deef66fe5c9285b9b19a1e7fa56dcd8e87cf40 Mon Sep 17 00:00:00 2001 From: jpekkila Date: Tue, 30 Jul 2019 14:28:18 +0300 Subject: [PATCH] Added sum reduction. NOTE: Scalar sum does not pass the automated test but vector sum does. I couldn't see anything wrong with the code itself and I strongly suspect that the failures are caused by loss of precision due to summing a huge amount of numbers of different magnitudes. However I'm not yet completely sure. Something like the Kahan summation algorithm might be useful if the errors are really caused by fp arithmetic. --- include/astaroth_defines.h | 9 +++- src/core/astaroth.cu | 3 +- src/core/kernels/kernels.cuh | 8 ++++ src/standalone/model/model_reduce.cc | 66 ++++++++++++++-------------- 4 files changed, 50 insertions(+), 36 deletions(-) diff --git a/include/astaroth_defines.h b/include/astaroth_defines.h index ac9804b..240ed86 100644 --- a/include/astaroth_defines.h +++ b/include/astaroth_defines.h @@ -77,7 +77,14 @@ typedef struct { typedef enum { AC_SUCCESS = 0, AC_FAILURE = 1 } AcResult; -typedef enum { RTYPE_MAX, RTYPE_MIN, RTYPE_RMS, RTYPE_RMS_EXP, NUM_REDUCTION_TYPES } ReductionType; +typedef enum { + RTYPE_MAX, + RTYPE_MIN, + RTYPE_RMS, + RTYPE_RMS_EXP, + RTYPE_SUM, + NUM_REDUCTION_TYPES +} ReductionType; typedef enum { STREAM_DEFAULT, diff --git a/src/core/astaroth.cu b/src/core/astaroth.cu index fadf4c0..9abc010 100644 --- a/src/core/astaroth.cu +++ b/src/core/astaroth.cu @@ -474,7 +474,7 @@ simple_final_reduce_scal(const ReductionType& rtype, const AcReal* results, cons else if (rtype == RTYPE_MIN) { res = min(res, results[i]); } - else if (rtype == RTYPE_RMS || rtype == RTYPE_RMS_EXP) { + else if (rtype == RTYPE_RMS || rtype == RTYPE_RMS_EXP || rtype == RTYPE_SUM) { res = sum(res, results[i]); } else { @@ -486,7 +486,6 @@ simple_final_reduce_scal(const ReductionType& rtype, const AcReal* results, cons const AcReal inv_n = AcReal(1.) / (grid.n.x * grid.n.y * grid.n.z); res = sqrt(inv_n * res); } - return res; } diff --git a/src/core/kernels/kernels.cuh b/src/core/kernels/kernels.cuh index 8977d07..52b48f6 100644 --- a/src/core/kernels/kernels.cuh +++ b/src/core/kernels/kernels.cuh @@ -893,6 +893,10 @@ reduce_scal(const cudaStream_t stream, const ReductionType rtype, const int3& st kernel_filter<<>>(vtxbuf, start, end, scratchpad); kernel_reduce<<>>(scratchpad, num_elems); kernel_reduce_block<<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result); + } else if (rtype == RTYPE_SUM) { + kernel_filter<<>>(vtxbuf, start, end, scratchpad); + kernel_reduce<<>>(scratchpad, num_elems); + kernel_reduce_block<<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result); } else { ERROR("Unrecognized rtype"); } @@ -944,6 +948,10 @@ reduce_vec(const cudaStream_t stream, const ReductionType rtype, const int3& sta kernel_filter_vec<<>>(vtxbuf0, vtxbuf1, vtxbuf2, start, end, scratchpad); kernel_reduce<<>>(scratchpad, num_elems); kernel_reduce_block<<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result); + } else if (rtype == RTYPE_SUM) { + kernel_filter_vec<<>>(vtxbuf0, vtxbuf1, vtxbuf2, start, end, scratchpad); + kernel_reduce<<>>(scratchpad, num_elems); + kernel_reduce_block<<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result); } else { ERROR("Unrecognized rtype"); } diff --git a/src/standalone/model/model_reduce.cc b/src/standalone/model/model_reduce.cc index 6d48c4b..ae092c9 100644 --- a/src/standalone/model/model_reduce.cc +++ b/src/standalone/model/model_reduce.cc @@ -68,8 +68,7 @@ exp_squared(const ModelScalar& a, const ModelScalar& b, const ModelScalar& c) { // clang-format on ModelScalar -model_reduce_scal(const ModelMesh& mesh, const ReductionType& rtype, - const VertexBufferHandle& a) +model_reduce_scal(const ModelMesh& mesh, const ReductionType& rtype, const VertexBufferHandle& a) { ReduceInitialScalFunc reduce_initial; ReduceFunc reduce; @@ -95,30 +94,31 @@ model_reduce_scal(const ModelMesh& mesh, const ReductionType& rtype, reduce = sum; solve_mean = true; break; + case RTYPE_SUM: + reduce_initial = length; + reduce = sum; + break; default: ERROR("Unrecognized RTYPE"); } - const int initial_idx = acVertexBufferIdx( - mesh.info.int_params[AC_nx_min], mesh.info.int_params[AC_ny_min], - mesh.info.int_params[AC_nz_min], mesh.info); + const int initial_idx = acVertexBufferIdx(mesh.info.int_params[AC_nx_min], + mesh.info.int_params[AC_ny_min], + mesh.info.int_params[AC_nz_min], mesh.info); ModelScalar res; if (rtype == RTYPE_MAX || rtype == RTYPE_MIN) res = reduce_initial(mesh.vertex_buffer[a][initial_idx]); else - res = .0f; + res = 0; - for (int k = mesh.info.int_params[AC_nz_min]; - k < mesh.info.int_params[AC_nz_max]; ++k) { - for (int j = mesh.info.int_params[AC_ny_min]; - j < mesh.info.int_params[AC_ny_max]; ++j) { - for (int i = mesh.info.int_params[AC_nx_min]; - i < mesh.info.int_params[AC_nx_max]; ++i) { + for (int k = mesh.info.int_params[AC_nz_min]; k < mesh.info.int_params[AC_nz_max]; ++k) { + for (int j = mesh.info.int_params[AC_ny_min]; j < mesh.info.int_params[AC_ny_max]; ++j) { + for (int i = mesh.info.int_params[AC_nx_min]; i < mesh.info.int_params[AC_nx_max]; + ++i) { const int idx = acVertexBufferIdx(i, j, k, mesh.info); - const ModelScalar curr_val = reduce_initial( - mesh.vertex_buffer[a][idx]); - res = reduce(res, curr_val); + const ModelScalar curr_val = reduce_initial(mesh.vertex_buffer[a][idx]); + res = reduce(res, curr_val); } } } @@ -133,9 +133,8 @@ model_reduce_scal(const ModelMesh& mesh, const ReductionType& rtype, } ModelScalar -model_reduce_vec(const ModelMesh& mesh, const ReductionType& rtype, - const VertexBufferHandle& a, const VertexBufferHandle& b, - const VertexBufferHandle& c) +model_reduce_vec(const ModelMesh& mesh, const ReductionType& rtype, const VertexBufferHandle& a, + const VertexBufferHandle& b, const VertexBufferHandle& c) { // ModelScalar (*reduce_initial)(ModelScalar, ModelScalar, ModelScalar); ReduceInitialVecFunc reduce_initial; @@ -162,33 +161,34 @@ model_reduce_vec(const ModelMesh& mesh, const ReductionType& rtype, reduce = sum; solve_mean = true; break; + case RTYPE_SUM: + reduce_initial = length; + reduce = sum; + break; default: ERROR("Unrecognized RTYPE"); } - const int initial_idx = acVertexBufferIdx( - mesh.info.int_params[AC_nx_min], mesh.info.int_params[AC_ny_min], - mesh.info.int_params[AC_nz_min], mesh.info); + const int initial_idx = acVertexBufferIdx(mesh.info.int_params[AC_nx_min], + mesh.info.int_params[AC_ny_min], + mesh.info.int_params[AC_nz_min], mesh.info); ModelScalar res; if (rtype == RTYPE_MAX || rtype == RTYPE_MIN) - res = reduce_initial(mesh.vertex_buffer[a][initial_idx], - mesh.vertex_buffer[b][initial_idx], + res = reduce_initial(mesh.vertex_buffer[a][initial_idx], mesh.vertex_buffer[b][initial_idx], mesh.vertex_buffer[c][initial_idx]); else res = 0; - for (int k = mesh.info.int_params[AC_nz_min]; - k < mesh.info.int_params[AC_nz_max]; k++) { - for (int j = mesh.info.int_params[AC_ny_min]; - j < mesh.info.int_params[AC_ny_max]; j++) { - for (int i = mesh.info.int_params[AC_nx_min]; - i < mesh.info.int_params[AC_nx_max]; i++) { + for (int k = mesh.info.int_params[AC_nz_min]; k < mesh.info.int_params[AC_nz_max]; k++) { + for (int j = mesh.info.int_params[AC_ny_min]; j < mesh.info.int_params[AC_ny_max]; j++) { + for (int i = mesh.info.int_params[AC_nx_min]; i < mesh.info.int_params[AC_nx_max]; + i++) { const int idx = acVertexBufferIdx(i, j, k, mesh.info); - const ModelScalar curr_val = reduce_initial( - mesh.vertex_buffer[a][idx], mesh.vertex_buffer[b][idx], - mesh.vertex_buffer[c][idx]); - res = reduce(res, curr_val); + const ModelScalar curr_val = reduce_initial(mesh.vertex_buffer[a][idx], + mesh.vertex_buffer[b][idx], + mesh.vertex_buffer[c][idx]); + res = reduce(res, curr_val); } } }