Merge branch 'master' into node_device_interface_revision_07-23

This commit is contained in:
jpekkila
2019-07-30 14:31:33 +03:00
5 changed files with 52 additions and 36 deletions

View File

@@ -95,7 +95,14 @@ typedef struct {
typedef enum { AC_SUCCESS = 0, AC_FAILURE = 1 } AcResult;
typedef enum { RTYPE_MAX, RTYPE_MIN, RTYPE_RMS, RTYPE_RMS_EXP, NUM_REDUCTION_TYPES } ReductionType;
typedef enum {
RTYPE_MAX,
RTYPE_MIN,
RTYPE_RMS,
RTYPE_RMS_EXP,
RTYPE_SUM,
NUM_REDUCTION_TYPES
} ReductionType;
typedef enum {
STREAM_DEFAULT,

View File

@@ -515,7 +515,7 @@ simple_final_reduce_scal(const ReductionType rtype, const AcReal* results, const
else if (rtype == RTYPE_MIN) {
res = min(res, results[i]);
}
else if (rtype == RTYPE_RMS || rtype == RTYPE_RMS_EXP) {
else if (rtype == RTYPE_RMS || rtype == RTYPE_RMS_EXP || rtype == RTYPE_SUM) {
res = sum(res, results[i]);
}
else {
@@ -527,7 +527,6 @@ simple_final_reduce_scal(const ReductionType rtype, const AcReal* results, const
const AcReal inv_n = AcReal(1.) / (grid.n.x * grid.n.y * grid.n.z);
res = sqrt(inv_n * res);
}
return res;
}

View File

@@ -43,7 +43,9 @@ __constant__ AcMeshInfo d_mesh_info;
__constant__ int3 d_multigpu_offset;
__constant__ Grid globalGrid;
#define DCONST_INT(X) (d_mesh_info.int_params[X])
#define DCONST_INT3(X) (d_mesh_info.int3_params[X])
#define DCONST_REAL(X) (d_mesh_info.real_params[X])
#define DCONST_REAL3(X) (d_mesh_info.real3_params[X])
#define DEVICE_VTXBUF_IDX(i, j, k) ((i) + (j)*DCONST_INT(AC_mx) + (k)*DCONST_INT(AC_mxy))
#define DEVICE_1D_COMPDOMAIN_IDX(i, j, k) ((i) + (j)*DCONST_INT(AC_nx) + (k)*DCONST_INT(AC_nxy))
#include "kernels/kernels.cuh"

View File

@@ -893,6 +893,10 @@ reduce_scal(const cudaStream_t stream, const ReductionType rtype, const int3& st
kernel_filter<dexp_squared><<<bpg_filter, tpb_filter, 0, stream>>>(vtxbuf, start, end, scratchpad);
kernel_reduce<dsum><<<bpg_reduce, tpb_reduce, sizeof(AcReal) * tpb_reduce, stream>>>(scratchpad, num_elems);
kernel_reduce_block<dsum><<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result);
} else if (rtype == RTYPE_SUM) {
kernel_filter<dvalue><<<bpg_filter, tpb_filter, 0, stream>>>(vtxbuf, start, end, scratchpad);
kernel_reduce<dsum><<<bpg_reduce, tpb_reduce, sizeof(AcReal) * tpb_reduce, stream>>>(scratchpad, num_elems);
kernel_reduce_block<dsum><<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result);
} else {
ERROR("Unrecognized rtype");
}
@@ -944,6 +948,10 @@ reduce_vec(const cudaStream_t stream, const ReductionType rtype, const int3& sta
kernel_filter_vec<dexp_squared_vec><<<bpg_filter, tpb_filter, 0, stream>>>(vtxbuf0, vtxbuf1, vtxbuf2, start, end, scratchpad);
kernel_reduce<dsum><<<bpg_reduce, tpb_reduce, sizeof(AcReal) * tpb_reduce, stream>>>(scratchpad, num_elems);
kernel_reduce_block<dsum><<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result);
} else if (rtype == RTYPE_SUM) {
kernel_filter_vec<dlength_vec><<<bpg_filter, tpb_filter, 0, stream>>>(vtxbuf0, vtxbuf1, vtxbuf2, start, end, scratchpad);
kernel_reduce<dsum><<<bpg_reduce, tpb_reduce, sizeof(AcReal) * tpb_reduce, stream>>>(scratchpad, num_elems);
kernel_reduce_block<dsum><<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result);
} else {
ERROR("Unrecognized rtype");
}

View File

@@ -68,8 +68,7 @@ exp_squared(const ModelScalar& a, const ModelScalar& b, const ModelScalar& c) {
// clang-format on
ModelScalar
model_reduce_scal(const ModelMesh& mesh, const ReductionType& rtype,
const VertexBufferHandle& a)
model_reduce_scal(const ModelMesh& mesh, const ReductionType& rtype, const VertexBufferHandle& a)
{
ReduceInitialScalFunc reduce_initial;
ReduceFunc reduce;
@@ -95,29 +94,30 @@ model_reduce_scal(const ModelMesh& mesh, const ReductionType& rtype,
reduce = sum;
solve_mean = true;
break;
case RTYPE_SUM:
reduce_initial = length;
reduce = sum;
break;
default:
ERROR("Unrecognized RTYPE");
}
const int initial_idx = acVertexBufferIdx(
mesh.info.int_params[AC_nx_min], mesh.info.int_params[AC_ny_min],
const int initial_idx = acVertexBufferIdx(mesh.info.int_params[AC_nx_min],
mesh.info.int_params[AC_ny_min],
mesh.info.int_params[AC_nz_min], mesh.info);
ModelScalar res;
if (rtype == RTYPE_MAX || rtype == RTYPE_MIN)
res = reduce_initial(mesh.vertex_buffer[a][initial_idx]);
else
res = .0f;
res = 0;
for (int k = mesh.info.int_params[AC_nz_min];
k < mesh.info.int_params[AC_nz_max]; ++k) {
for (int j = mesh.info.int_params[AC_ny_min];
j < mesh.info.int_params[AC_ny_max]; ++j) {
for (int i = mesh.info.int_params[AC_nx_min];
i < mesh.info.int_params[AC_nx_max]; ++i) {
for (int k = mesh.info.int_params[AC_nz_min]; k < mesh.info.int_params[AC_nz_max]; ++k) {
for (int j = mesh.info.int_params[AC_ny_min]; j < mesh.info.int_params[AC_ny_max]; ++j) {
for (int i = mesh.info.int_params[AC_nx_min]; i < mesh.info.int_params[AC_nx_max];
++i) {
const int idx = acVertexBufferIdx(i, j, k, mesh.info);
const ModelScalar curr_val = reduce_initial(
mesh.vertex_buffer[a][idx]);
const ModelScalar curr_val = reduce_initial(mesh.vertex_buffer[a][idx]);
res = reduce(res, curr_val);
}
}
@@ -133,9 +133,8 @@ model_reduce_scal(const ModelMesh& mesh, const ReductionType& rtype,
}
ModelScalar
model_reduce_vec(const ModelMesh& mesh, const ReductionType& rtype,
const VertexBufferHandle& a, const VertexBufferHandle& b,
const VertexBufferHandle& c)
model_reduce_vec(const ModelMesh& mesh, const ReductionType& rtype, const VertexBufferHandle& a,
const VertexBufferHandle& b, const VertexBufferHandle& c)
{
// ModelScalar (*reduce_initial)(ModelScalar, ModelScalar, ModelScalar);
ReduceInitialVecFunc reduce_initial;
@@ -162,31 +161,32 @@ model_reduce_vec(const ModelMesh& mesh, const ReductionType& rtype,
reduce = sum;
solve_mean = true;
break;
case RTYPE_SUM:
reduce_initial = length;
reduce = sum;
break;
default:
ERROR("Unrecognized RTYPE");
}
const int initial_idx = acVertexBufferIdx(
mesh.info.int_params[AC_nx_min], mesh.info.int_params[AC_ny_min],
const int initial_idx = acVertexBufferIdx(mesh.info.int_params[AC_nx_min],
mesh.info.int_params[AC_ny_min],
mesh.info.int_params[AC_nz_min], mesh.info);
ModelScalar res;
if (rtype == RTYPE_MAX || rtype == RTYPE_MIN)
res = reduce_initial(mesh.vertex_buffer[a][initial_idx],
mesh.vertex_buffer[b][initial_idx],
res = reduce_initial(mesh.vertex_buffer[a][initial_idx], mesh.vertex_buffer[b][initial_idx],
mesh.vertex_buffer[c][initial_idx]);
else
res = 0;
for (int k = mesh.info.int_params[AC_nz_min];
k < mesh.info.int_params[AC_nz_max]; k++) {
for (int j = mesh.info.int_params[AC_ny_min];
j < mesh.info.int_params[AC_ny_max]; j++) {
for (int i = mesh.info.int_params[AC_nx_min];
i < mesh.info.int_params[AC_nx_max]; i++) {
for (int k = mesh.info.int_params[AC_nz_min]; k < mesh.info.int_params[AC_nz_max]; k++) {
for (int j = mesh.info.int_params[AC_ny_min]; j < mesh.info.int_params[AC_ny_max]; j++) {
for (int i = mesh.info.int_params[AC_nx_min]; i < mesh.info.int_params[AC_nx_max];
i++) {
const int idx = acVertexBufferIdx(i, j, k, mesh.info);
const ModelScalar curr_val = reduce_initial(
mesh.vertex_buffer[a][idx], mesh.vertex_buffer[b][idx],
const ModelScalar curr_val = reduce_initial(mesh.vertex_buffer[a][idx],
mesh.vertex_buffer[b][idx],
mesh.vertex_buffer[c][idx]);
res = reduce(res, curr_val);
}