Merge branch 'master' into cmakelist_rewrite_and_C_API_conformity_07-26

This commit is contained in:
jpekkila
2019-07-31 20:12:22 +03:00
5 changed files with 52 additions and 36 deletions

View File

@@ -95,7 +95,14 @@ typedef struct {
typedef enum { AC_SUCCESS = 0, AC_FAILURE = 1 } AcResult; typedef enum { AC_SUCCESS = 0, AC_FAILURE = 1 } AcResult;
typedef enum { RTYPE_MAX, RTYPE_MIN, RTYPE_RMS, RTYPE_RMS_EXP, NUM_REDUCTION_TYPES } ReductionType; typedef enum {
RTYPE_MAX,
RTYPE_MIN,
RTYPE_RMS,
RTYPE_RMS_EXP,
RTYPE_SUM,
NUM_REDUCTION_TYPES
} ReductionType;
typedef enum { typedef enum {
STREAM_DEFAULT, STREAM_DEFAULT,

View File

@@ -515,7 +515,7 @@ simple_final_reduce_scal(const ReductionType rtype, const AcReal* results, const
else if (rtype == RTYPE_MIN) { else if (rtype == RTYPE_MIN) {
res = min(res, results[i]); res = min(res, results[i]);
} }
else if (rtype == RTYPE_RMS || rtype == RTYPE_RMS_EXP) { else if (rtype == RTYPE_RMS || rtype == RTYPE_RMS_EXP || rtype == RTYPE_SUM) {
res = sum(res, results[i]); res = sum(res, results[i]);
} }
else { else {
@@ -527,7 +527,6 @@ simple_final_reduce_scal(const ReductionType rtype, const AcReal* results, const
const AcReal inv_n = AcReal(1.) / (grid.n.x * grid.n.y * grid.n.z); const AcReal inv_n = AcReal(1.) / (grid.n.x * grid.n.y * grid.n.z);
res = sqrt(inv_n * res); res = sqrt(inv_n * res);
} }
return res; return res;
} }

View File

@@ -43,7 +43,9 @@ __constant__ AcMeshInfo d_mesh_info;
__constant__ int3 d_multigpu_offset; __constant__ int3 d_multigpu_offset;
__constant__ Grid globalGrid; __constant__ Grid globalGrid;
#define DCONST_INT(X) (d_mesh_info.int_params[X]) #define DCONST_INT(X) (d_mesh_info.int_params[X])
#define DCONST_INT3(X) (d_mesh_info.int3_params[X])
#define DCONST_REAL(X) (d_mesh_info.real_params[X]) #define DCONST_REAL(X) (d_mesh_info.real_params[X])
#define DCONST_REAL3(X) (d_mesh_info.real3_params[X])
#define DEVICE_VTXBUF_IDX(i, j, k) ((i) + (j)*DCONST_INT(AC_mx) + (k)*DCONST_INT(AC_mxy)) #define DEVICE_VTXBUF_IDX(i, j, k) ((i) + (j)*DCONST_INT(AC_mx) + (k)*DCONST_INT(AC_mxy))
#define DEVICE_1D_COMPDOMAIN_IDX(i, j, k) ((i) + (j)*DCONST_INT(AC_nx) + (k)*DCONST_INT(AC_nxy)) #define DEVICE_1D_COMPDOMAIN_IDX(i, j, k) ((i) + (j)*DCONST_INT(AC_nx) + (k)*DCONST_INT(AC_nxy))
#include "kernels/kernels.cuh" #include "kernels/kernels.cuh"

View File

@@ -893,6 +893,10 @@ reduce_scal(const cudaStream_t stream, const ReductionType rtype, const int3& st
kernel_filter<dexp_squared><<<bpg_filter, tpb_filter, 0, stream>>>(vtxbuf, start, end, scratchpad); kernel_filter<dexp_squared><<<bpg_filter, tpb_filter, 0, stream>>>(vtxbuf, start, end, scratchpad);
kernel_reduce<dsum><<<bpg_reduce, tpb_reduce, sizeof(AcReal) * tpb_reduce, stream>>>(scratchpad, num_elems); kernel_reduce<dsum><<<bpg_reduce, tpb_reduce, sizeof(AcReal) * tpb_reduce, stream>>>(scratchpad, num_elems);
kernel_reduce_block<dsum><<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result); kernel_reduce_block<dsum><<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result);
} else if (rtype == RTYPE_SUM) {
kernel_filter<dvalue><<<bpg_filter, tpb_filter, 0, stream>>>(vtxbuf, start, end, scratchpad);
kernel_reduce<dsum><<<bpg_reduce, tpb_reduce, sizeof(AcReal) * tpb_reduce, stream>>>(scratchpad, num_elems);
kernel_reduce_block<dsum><<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result);
} else { } else {
ERROR("Unrecognized rtype"); ERROR("Unrecognized rtype");
} }
@@ -944,6 +948,10 @@ reduce_vec(const cudaStream_t stream, const ReductionType rtype, const int3& sta
kernel_filter_vec<dexp_squared_vec><<<bpg_filter, tpb_filter, 0, stream>>>(vtxbuf0, vtxbuf1, vtxbuf2, start, end, scratchpad); kernel_filter_vec<dexp_squared_vec><<<bpg_filter, tpb_filter, 0, stream>>>(vtxbuf0, vtxbuf1, vtxbuf2, start, end, scratchpad);
kernel_reduce<dsum><<<bpg_reduce, tpb_reduce, sizeof(AcReal) * tpb_reduce, stream>>>(scratchpad, num_elems); kernel_reduce<dsum><<<bpg_reduce, tpb_reduce, sizeof(AcReal) * tpb_reduce, stream>>>(scratchpad, num_elems);
kernel_reduce_block<dsum><<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result); kernel_reduce_block<dsum><<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result);
} else if (rtype == RTYPE_SUM) {
kernel_filter_vec<dlength_vec><<<bpg_filter, tpb_filter, 0, stream>>>(vtxbuf0, vtxbuf1, vtxbuf2, start, end, scratchpad);
kernel_reduce<dsum><<<bpg_reduce, tpb_reduce, sizeof(AcReal) * tpb_reduce, stream>>>(scratchpad, num_elems);
kernel_reduce_block<dsum><<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result);
} else { } else {
ERROR("Unrecognized rtype"); ERROR("Unrecognized rtype");
} }

View File

@@ -68,8 +68,7 @@ exp_squared(const ModelScalar& a, const ModelScalar& b, const ModelScalar& c) {
// clang-format on // clang-format on
ModelScalar ModelScalar
model_reduce_scal(const ModelMesh& mesh, const ReductionType& rtype, model_reduce_scal(const ModelMesh& mesh, const ReductionType& rtype, const VertexBufferHandle& a)
const VertexBufferHandle& a)
{ {
ReduceInitialScalFunc reduce_initial; ReduceInitialScalFunc reduce_initial;
ReduceFunc reduce; ReduceFunc reduce;
@@ -95,30 +94,31 @@ model_reduce_scal(const ModelMesh& mesh, const ReductionType& rtype,
reduce = sum; reduce = sum;
solve_mean = true; solve_mean = true;
break; break;
case RTYPE_SUM:
reduce_initial = length;
reduce = sum;
break;
default: default:
ERROR("Unrecognized RTYPE"); ERROR("Unrecognized RTYPE");
} }
const int initial_idx = acVertexBufferIdx( const int initial_idx = acVertexBufferIdx(mesh.info.int_params[AC_nx_min],
mesh.info.int_params[AC_nx_min], mesh.info.int_params[AC_ny_min], mesh.info.int_params[AC_ny_min],
mesh.info.int_params[AC_nz_min], mesh.info); mesh.info.int_params[AC_nz_min], mesh.info);
ModelScalar res; ModelScalar res;
if (rtype == RTYPE_MAX || rtype == RTYPE_MIN) if (rtype == RTYPE_MAX || rtype == RTYPE_MIN)
res = reduce_initial(mesh.vertex_buffer[a][initial_idx]); res = reduce_initial(mesh.vertex_buffer[a][initial_idx]);
else else
res = .0f; res = 0;
for (int k = mesh.info.int_params[AC_nz_min]; for (int k = mesh.info.int_params[AC_nz_min]; k < mesh.info.int_params[AC_nz_max]; ++k) {
k < mesh.info.int_params[AC_nz_max]; ++k) { for (int j = mesh.info.int_params[AC_ny_min]; j < mesh.info.int_params[AC_ny_max]; ++j) {
for (int j = mesh.info.int_params[AC_ny_min]; for (int i = mesh.info.int_params[AC_nx_min]; i < mesh.info.int_params[AC_nx_max];
j < mesh.info.int_params[AC_ny_max]; ++j) { ++i) {
for (int i = mesh.info.int_params[AC_nx_min];
i < mesh.info.int_params[AC_nx_max]; ++i) {
const int idx = acVertexBufferIdx(i, j, k, mesh.info); const int idx = acVertexBufferIdx(i, j, k, mesh.info);
const ModelScalar curr_val = reduce_initial( const ModelScalar curr_val = reduce_initial(mesh.vertex_buffer[a][idx]);
mesh.vertex_buffer[a][idx]); res = reduce(res, curr_val);
res = reduce(res, curr_val);
} }
} }
} }
@@ -133,9 +133,8 @@ model_reduce_scal(const ModelMesh& mesh, const ReductionType& rtype,
} }
ModelScalar ModelScalar
model_reduce_vec(const ModelMesh& mesh, const ReductionType& rtype, model_reduce_vec(const ModelMesh& mesh, const ReductionType& rtype, const VertexBufferHandle& a,
const VertexBufferHandle& a, const VertexBufferHandle& b, const VertexBufferHandle& b, const VertexBufferHandle& c)
const VertexBufferHandle& c)
{ {
// ModelScalar (*reduce_initial)(ModelScalar, ModelScalar, ModelScalar); // ModelScalar (*reduce_initial)(ModelScalar, ModelScalar, ModelScalar);
ReduceInitialVecFunc reduce_initial; ReduceInitialVecFunc reduce_initial;
@@ -162,33 +161,34 @@ model_reduce_vec(const ModelMesh& mesh, const ReductionType& rtype,
reduce = sum; reduce = sum;
solve_mean = true; solve_mean = true;
break; break;
case RTYPE_SUM:
reduce_initial = length;
reduce = sum;
break;
default: default:
ERROR("Unrecognized RTYPE"); ERROR("Unrecognized RTYPE");
} }
const int initial_idx = acVertexBufferIdx( const int initial_idx = acVertexBufferIdx(mesh.info.int_params[AC_nx_min],
mesh.info.int_params[AC_nx_min], mesh.info.int_params[AC_ny_min], mesh.info.int_params[AC_ny_min],
mesh.info.int_params[AC_nz_min], mesh.info); mesh.info.int_params[AC_nz_min], mesh.info);
ModelScalar res; ModelScalar res;
if (rtype == RTYPE_MAX || rtype == RTYPE_MIN) if (rtype == RTYPE_MAX || rtype == RTYPE_MIN)
res = reduce_initial(mesh.vertex_buffer[a][initial_idx], res = reduce_initial(mesh.vertex_buffer[a][initial_idx], mesh.vertex_buffer[b][initial_idx],
mesh.vertex_buffer[b][initial_idx],
mesh.vertex_buffer[c][initial_idx]); mesh.vertex_buffer[c][initial_idx]);
else else
res = 0; res = 0;
for (int k = mesh.info.int_params[AC_nz_min]; for (int k = mesh.info.int_params[AC_nz_min]; k < mesh.info.int_params[AC_nz_max]; k++) {
k < mesh.info.int_params[AC_nz_max]; k++) { for (int j = mesh.info.int_params[AC_ny_min]; j < mesh.info.int_params[AC_ny_max]; j++) {
for (int j = mesh.info.int_params[AC_ny_min]; for (int i = mesh.info.int_params[AC_nx_min]; i < mesh.info.int_params[AC_nx_max];
j < mesh.info.int_params[AC_ny_max]; j++) { i++) {
for (int i = mesh.info.int_params[AC_nx_min];
i < mesh.info.int_params[AC_nx_max]; i++) {
const int idx = acVertexBufferIdx(i, j, k, mesh.info); const int idx = acVertexBufferIdx(i, j, k, mesh.info);
const ModelScalar curr_val = reduce_initial( const ModelScalar curr_val = reduce_initial(mesh.vertex_buffer[a][idx],
mesh.vertex_buffer[a][idx], mesh.vertex_buffer[b][idx], mesh.vertex_buffer[b][idx],
mesh.vertex_buffer[c][idx]); mesh.vertex_buffer[c][idx]);
res = reduce(res, curr_val); res = reduce(res, curr_val);
} }
} }
} }