diff --git a/include/astaroth_defines.h b/include/astaroth_defines.h index eb89daf..73a25d9 100644 --- a/include/astaroth_defines.h +++ b/include/astaroth_defines.h @@ -95,7 +95,14 @@ typedef struct { typedef enum { AC_SUCCESS = 0, AC_FAILURE = 1 } AcResult; -typedef enum { RTYPE_MAX, RTYPE_MIN, RTYPE_RMS, RTYPE_RMS_EXP, NUM_REDUCTION_TYPES } ReductionType; +typedef enum { + RTYPE_MAX, + RTYPE_MIN, + RTYPE_RMS, + RTYPE_RMS_EXP, + RTYPE_SUM, + NUM_REDUCTION_TYPES +} ReductionType; typedef enum { STREAM_DEFAULT, diff --git a/src/core/astaroth.cu b/src/core/astaroth.cu index 05af3d3..736956b 100644 --- a/src/core/astaroth.cu +++ b/src/core/astaroth.cu @@ -515,7 +515,7 @@ simple_final_reduce_scal(const ReductionType rtype, const AcReal* results, const else if (rtype == RTYPE_MIN) { res = min(res, results[i]); } - else if (rtype == RTYPE_RMS || rtype == RTYPE_RMS_EXP) { + else if (rtype == RTYPE_RMS || rtype == RTYPE_RMS_EXP || rtype == RTYPE_SUM) { res = sum(res, results[i]); } else { @@ -527,7 +527,6 @@ simple_final_reduce_scal(const ReductionType rtype, const AcReal* results, const const AcReal inv_n = AcReal(1.) / (grid.n.x * grid.n.y * grid.n.z); res = sqrt(inv_n * res); } - return res; } diff --git a/src/core/device.cu b/src/core/device.cu index f4bef17..c046d18 100644 --- a/src/core/device.cu +++ b/src/core/device.cu @@ -43,7 +43,9 @@ __constant__ AcMeshInfo d_mesh_info; __constant__ int3 d_multigpu_offset; __constant__ Grid globalGrid; #define DCONST_INT(X) (d_mesh_info.int_params[X]) +#define DCONST_INT3(X) (d_mesh_info.int3_params[X]) #define DCONST_REAL(X) (d_mesh_info.real_params[X]) +#define DCONST_REAL3(X) (d_mesh_info.real3_params[X]) #define DEVICE_VTXBUF_IDX(i, j, k) ((i) + (j)*DCONST_INT(AC_mx) + (k)*DCONST_INT(AC_mxy)) #define DEVICE_1D_COMPDOMAIN_IDX(i, j, k) ((i) + (j)*DCONST_INT(AC_nx) + (k)*DCONST_INT(AC_nxy)) #include "kernels/kernels.cuh" diff --git a/src/core/kernels/kernels.cuh b/src/core/kernels/kernels.cuh index 2c7d876..e8e4042 100644 --- a/src/core/kernels/kernels.cuh +++ b/src/core/kernels/kernels.cuh @@ -893,6 +893,10 @@ reduce_scal(const cudaStream_t stream, const ReductionType rtype, const int3& st kernel_filter<<>>(vtxbuf, start, end, scratchpad); kernel_reduce<<>>(scratchpad, num_elems); kernel_reduce_block<<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result); + } else if (rtype == RTYPE_SUM) { + kernel_filter<<>>(vtxbuf, start, end, scratchpad); + kernel_reduce<<>>(scratchpad, num_elems); + kernel_reduce_block<<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result); } else { ERROR("Unrecognized rtype"); } @@ -944,6 +948,10 @@ reduce_vec(const cudaStream_t stream, const ReductionType rtype, const int3& sta kernel_filter_vec<<>>(vtxbuf0, vtxbuf1, vtxbuf2, start, end, scratchpad); kernel_reduce<<>>(scratchpad, num_elems); kernel_reduce_block<<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result); + } else if (rtype == RTYPE_SUM) { + kernel_filter_vec<<>>(vtxbuf0, vtxbuf1, vtxbuf2, start, end, scratchpad); + kernel_reduce<<>>(scratchpad, num_elems); + kernel_reduce_block<<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result); } else { ERROR("Unrecognized rtype"); } diff --git a/src/standalone/model/model_reduce.cc b/src/standalone/model/model_reduce.cc index 1de3b32..bbcfb32 100644 --- a/src/standalone/model/model_reduce.cc +++ b/src/standalone/model/model_reduce.cc @@ -68,8 +68,7 @@ exp_squared(const ModelScalar& a, const ModelScalar& b, const ModelScalar& c) { // clang-format on ModelScalar -model_reduce_scal(const ModelMesh& mesh, const ReductionType& rtype, - const VertexBufferHandle& a) +model_reduce_scal(const ModelMesh& mesh, const ReductionType& rtype, const VertexBufferHandle& a) { ReduceInitialScalFunc reduce_initial; ReduceFunc reduce; @@ -95,30 +94,31 @@ model_reduce_scal(const ModelMesh& mesh, const ReductionType& rtype, reduce = sum; solve_mean = true; break; + case RTYPE_SUM: + reduce_initial = length; + reduce = sum; + break; default: ERROR("Unrecognized RTYPE"); } - const int initial_idx = acVertexBufferIdx( - mesh.info.int_params[AC_nx_min], mesh.info.int_params[AC_ny_min], - mesh.info.int_params[AC_nz_min], mesh.info); + const int initial_idx = acVertexBufferIdx(mesh.info.int_params[AC_nx_min], + mesh.info.int_params[AC_ny_min], + mesh.info.int_params[AC_nz_min], mesh.info); ModelScalar res; if (rtype == RTYPE_MAX || rtype == RTYPE_MIN) res = reduce_initial(mesh.vertex_buffer[a][initial_idx]); else - res = .0f; + res = 0; - for (int k = mesh.info.int_params[AC_nz_min]; - k < mesh.info.int_params[AC_nz_max]; ++k) { - for (int j = mesh.info.int_params[AC_ny_min]; - j < mesh.info.int_params[AC_ny_max]; ++j) { - for (int i = mesh.info.int_params[AC_nx_min]; - i < mesh.info.int_params[AC_nx_max]; ++i) { + for (int k = mesh.info.int_params[AC_nz_min]; k < mesh.info.int_params[AC_nz_max]; ++k) { + for (int j = mesh.info.int_params[AC_ny_min]; j < mesh.info.int_params[AC_ny_max]; ++j) { + for (int i = mesh.info.int_params[AC_nx_min]; i < mesh.info.int_params[AC_nx_max]; + ++i) { const int idx = acVertexBufferIdx(i, j, k, mesh.info); - const ModelScalar curr_val = reduce_initial( - mesh.vertex_buffer[a][idx]); - res = reduce(res, curr_val); + const ModelScalar curr_val = reduce_initial(mesh.vertex_buffer[a][idx]); + res = reduce(res, curr_val); } } } @@ -133,9 +133,8 @@ model_reduce_scal(const ModelMesh& mesh, const ReductionType& rtype, } ModelScalar -model_reduce_vec(const ModelMesh& mesh, const ReductionType& rtype, - const VertexBufferHandle& a, const VertexBufferHandle& b, - const VertexBufferHandle& c) +model_reduce_vec(const ModelMesh& mesh, const ReductionType& rtype, const VertexBufferHandle& a, + const VertexBufferHandle& b, const VertexBufferHandle& c) { // ModelScalar (*reduce_initial)(ModelScalar, ModelScalar, ModelScalar); ReduceInitialVecFunc reduce_initial; @@ -162,33 +161,34 @@ model_reduce_vec(const ModelMesh& mesh, const ReductionType& rtype, reduce = sum; solve_mean = true; break; + case RTYPE_SUM: + reduce_initial = length; + reduce = sum; + break; default: ERROR("Unrecognized RTYPE"); } - const int initial_idx = acVertexBufferIdx( - mesh.info.int_params[AC_nx_min], mesh.info.int_params[AC_ny_min], - mesh.info.int_params[AC_nz_min], mesh.info); + const int initial_idx = acVertexBufferIdx(mesh.info.int_params[AC_nx_min], + mesh.info.int_params[AC_ny_min], + mesh.info.int_params[AC_nz_min], mesh.info); ModelScalar res; if (rtype == RTYPE_MAX || rtype == RTYPE_MIN) - res = reduce_initial(mesh.vertex_buffer[a][initial_idx], - mesh.vertex_buffer[b][initial_idx], + res = reduce_initial(mesh.vertex_buffer[a][initial_idx], mesh.vertex_buffer[b][initial_idx], mesh.vertex_buffer[c][initial_idx]); else res = 0; - for (int k = mesh.info.int_params[AC_nz_min]; - k < mesh.info.int_params[AC_nz_max]; k++) { - for (int j = mesh.info.int_params[AC_ny_min]; - j < mesh.info.int_params[AC_ny_max]; j++) { - for (int i = mesh.info.int_params[AC_nx_min]; - i < mesh.info.int_params[AC_nx_max]; i++) { + for (int k = mesh.info.int_params[AC_nz_min]; k < mesh.info.int_params[AC_nz_max]; k++) { + for (int j = mesh.info.int_params[AC_ny_min]; j < mesh.info.int_params[AC_ny_max]; j++) { + for (int i = mesh.info.int_params[AC_nx_min]; i < mesh.info.int_params[AC_nx_max]; + i++) { const int idx = acVertexBufferIdx(i, j, k, mesh.info); - const ModelScalar curr_val = reduce_initial( - mesh.vertex_buffer[a][idx], mesh.vertex_buffer[b][idx], - mesh.vertex_buffer[c][idx]); - res = reduce(res, curr_val); + const ModelScalar curr_val = reduce_initial(mesh.vertex_buffer[a][idx], + mesh.vertex_buffer[b][idx], + mesh.vertex_buffer[c][idx]); + res = reduce(res, curr_val); } } }