Merge branch 'master' into node_device_interface_revision_07-23
This commit is contained in:
@@ -95,7 +95,14 @@ typedef struct {
|
|||||||
|
|
||||||
typedef enum { AC_SUCCESS = 0, AC_FAILURE = 1 } AcResult;
|
typedef enum { AC_SUCCESS = 0, AC_FAILURE = 1 } AcResult;
|
||||||
|
|
||||||
typedef enum { RTYPE_MAX, RTYPE_MIN, RTYPE_RMS, RTYPE_RMS_EXP, NUM_REDUCTION_TYPES } ReductionType;
|
typedef enum {
|
||||||
|
RTYPE_MAX,
|
||||||
|
RTYPE_MIN,
|
||||||
|
RTYPE_RMS,
|
||||||
|
RTYPE_RMS_EXP,
|
||||||
|
RTYPE_SUM,
|
||||||
|
NUM_REDUCTION_TYPES
|
||||||
|
} ReductionType;
|
||||||
|
|
||||||
typedef enum {
|
typedef enum {
|
||||||
STREAM_DEFAULT,
|
STREAM_DEFAULT,
|
||||||
|
@@ -515,7 +515,7 @@ simple_final_reduce_scal(const ReductionType rtype, const AcReal* results, const
|
|||||||
else if (rtype == RTYPE_MIN) {
|
else if (rtype == RTYPE_MIN) {
|
||||||
res = min(res, results[i]);
|
res = min(res, results[i]);
|
||||||
}
|
}
|
||||||
else if (rtype == RTYPE_RMS || rtype == RTYPE_RMS_EXP) {
|
else if (rtype == RTYPE_RMS || rtype == RTYPE_RMS_EXP || rtype == RTYPE_SUM) {
|
||||||
res = sum(res, results[i]);
|
res = sum(res, results[i]);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
@@ -527,7 +527,6 @@ simple_final_reduce_scal(const ReductionType rtype, const AcReal* results, const
|
|||||||
const AcReal inv_n = AcReal(1.) / (grid.n.x * grid.n.y * grid.n.z);
|
const AcReal inv_n = AcReal(1.) / (grid.n.x * grid.n.y * grid.n.z);
|
||||||
res = sqrt(inv_n * res);
|
res = sqrt(inv_n * res);
|
||||||
}
|
}
|
||||||
|
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -43,7 +43,9 @@ __constant__ AcMeshInfo d_mesh_info;
|
|||||||
__constant__ int3 d_multigpu_offset;
|
__constant__ int3 d_multigpu_offset;
|
||||||
__constant__ Grid globalGrid;
|
__constant__ Grid globalGrid;
|
||||||
#define DCONST_INT(X) (d_mesh_info.int_params[X])
|
#define DCONST_INT(X) (d_mesh_info.int_params[X])
|
||||||
|
#define DCONST_INT3(X) (d_mesh_info.int3_params[X])
|
||||||
#define DCONST_REAL(X) (d_mesh_info.real_params[X])
|
#define DCONST_REAL(X) (d_mesh_info.real_params[X])
|
||||||
|
#define DCONST_REAL3(X) (d_mesh_info.real3_params[X])
|
||||||
#define DEVICE_VTXBUF_IDX(i, j, k) ((i) + (j)*DCONST_INT(AC_mx) + (k)*DCONST_INT(AC_mxy))
|
#define DEVICE_VTXBUF_IDX(i, j, k) ((i) + (j)*DCONST_INT(AC_mx) + (k)*DCONST_INT(AC_mxy))
|
||||||
#define DEVICE_1D_COMPDOMAIN_IDX(i, j, k) ((i) + (j)*DCONST_INT(AC_nx) + (k)*DCONST_INT(AC_nxy))
|
#define DEVICE_1D_COMPDOMAIN_IDX(i, j, k) ((i) + (j)*DCONST_INT(AC_nx) + (k)*DCONST_INT(AC_nxy))
|
||||||
#include "kernels/kernels.cuh"
|
#include "kernels/kernels.cuh"
|
||||||
|
@@ -893,6 +893,10 @@ reduce_scal(const cudaStream_t stream, const ReductionType rtype, const int3& st
|
|||||||
kernel_filter<dexp_squared><<<bpg_filter, tpb_filter, 0, stream>>>(vtxbuf, start, end, scratchpad);
|
kernel_filter<dexp_squared><<<bpg_filter, tpb_filter, 0, stream>>>(vtxbuf, start, end, scratchpad);
|
||||||
kernel_reduce<dsum><<<bpg_reduce, tpb_reduce, sizeof(AcReal) * tpb_reduce, stream>>>(scratchpad, num_elems);
|
kernel_reduce<dsum><<<bpg_reduce, tpb_reduce, sizeof(AcReal) * tpb_reduce, stream>>>(scratchpad, num_elems);
|
||||||
kernel_reduce_block<dsum><<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result);
|
kernel_reduce_block<dsum><<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result);
|
||||||
|
} else if (rtype == RTYPE_SUM) {
|
||||||
|
kernel_filter<dvalue><<<bpg_filter, tpb_filter, 0, stream>>>(vtxbuf, start, end, scratchpad);
|
||||||
|
kernel_reduce<dsum><<<bpg_reduce, tpb_reduce, sizeof(AcReal) * tpb_reduce, stream>>>(scratchpad, num_elems);
|
||||||
|
kernel_reduce_block<dsum><<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result);
|
||||||
} else {
|
} else {
|
||||||
ERROR("Unrecognized rtype");
|
ERROR("Unrecognized rtype");
|
||||||
}
|
}
|
||||||
@@ -944,6 +948,10 @@ reduce_vec(const cudaStream_t stream, const ReductionType rtype, const int3& sta
|
|||||||
kernel_filter_vec<dexp_squared_vec><<<bpg_filter, tpb_filter, 0, stream>>>(vtxbuf0, vtxbuf1, vtxbuf2, start, end, scratchpad);
|
kernel_filter_vec<dexp_squared_vec><<<bpg_filter, tpb_filter, 0, stream>>>(vtxbuf0, vtxbuf1, vtxbuf2, start, end, scratchpad);
|
||||||
kernel_reduce<dsum><<<bpg_reduce, tpb_reduce, sizeof(AcReal) * tpb_reduce, stream>>>(scratchpad, num_elems);
|
kernel_reduce<dsum><<<bpg_reduce, tpb_reduce, sizeof(AcReal) * tpb_reduce, stream>>>(scratchpad, num_elems);
|
||||||
kernel_reduce_block<dsum><<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result);
|
kernel_reduce_block<dsum><<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result);
|
||||||
|
} else if (rtype == RTYPE_SUM) {
|
||||||
|
kernel_filter_vec<dlength_vec><<<bpg_filter, tpb_filter, 0, stream>>>(vtxbuf0, vtxbuf1, vtxbuf2, start, end, scratchpad);
|
||||||
|
kernel_reduce<dsum><<<bpg_reduce, tpb_reduce, sizeof(AcReal) * tpb_reduce, stream>>>(scratchpad, num_elems);
|
||||||
|
kernel_reduce_block<dsum><<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result);
|
||||||
} else {
|
} else {
|
||||||
ERROR("Unrecognized rtype");
|
ERROR("Unrecognized rtype");
|
||||||
}
|
}
|
||||||
|
@@ -68,8 +68,7 @@ exp_squared(const ModelScalar& a, const ModelScalar& b, const ModelScalar& c) {
|
|||||||
// clang-format on
|
// clang-format on
|
||||||
|
|
||||||
ModelScalar
|
ModelScalar
|
||||||
model_reduce_scal(const ModelMesh& mesh, const ReductionType& rtype,
|
model_reduce_scal(const ModelMesh& mesh, const ReductionType& rtype, const VertexBufferHandle& a)
|
||||||
const VertexBufferHandle& a)
|
|
||||||
{
|
{
|
||||||
ReduceInitialScalFunc reduce_initial;
|
ReduceInitialScalFunc reduce_initial;
|
||||||
ReduceFunc reduce;
|
ReduceFunc reduce;
|
||||||
@@ -95,30 +94,31 @@ model_reduce_scal(const ModelMesh& mesh, const ReductionType& rtype,
|
|||||||
reduce = sum;
|
reduce = sum;
|
||||||
solve_mean = true;
|
solve_mean = true;
|
||||||
break;
|
break;
|
||||||
|
case RTYPE_SUM:
|
||||||
|
reduce_initial = length;
|
||||||
|
reduce = sum;
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
ERROR("Unrecognized RTYPE");
|
ERROR("Unrecognized RTYPE");
|
||||||
}
|
}
|
||||||
|
|
||||||
const int initial_idx = acVertexBufferIdx(
|
const int initial_idx = acVertexBufferIdx(mesh.info.int_params[AC_nx_min],
|
||||||
mesh.info.int_params[AC_nx_min], mesh.info.int_params[AC_ny_min],
|
mesh.info.int_params[AC_ny_min],
|
||||||
mesh.info.int_params[AC_nz_min], mesh.info);
|
mesh.info.int_params[AC_nz_min], mesh.info);
|
||||||
|
|
||||||
ModelScalar res;
|
ModelScalar res;
|
||||||
if (rtype == RTYPE_MAX || rtype == RTYPE_MIN)
|
if (rtype == RTYPE_MAX || rtype == RTYPE_MIN)
|
||||||
res = reduce_initial(mesh.vertex_buffer[a][initial_idx]);
|
res = reduce_initial(mesh.vertex_buffer[a][initial_idx]);
|
||||||
else
|
else
|
||||||
res = .0f;
|
res = 0;
|
||||||
|
|
||||||
for (int k = mesh.info.int_params[AC_nz_min];
|
for (int k = mesh.info.int_params[AC_nz_min]; k < mesh.info.int_params[AC_nz_max]; ++k) {
|
||||||
k < mesh.info.int_params[AC_nz_max]; ++k) {
|
for (int j = mesh.info.int_params[AC_ny_min]; j < mesh.info.int_params[AC_ny_max]; ++j) {
|
||||||
for (int j = mesh.info.int_params[AC_ny_min];
|
for (int i = mesh.info.int_params[AC_nx_min]; i < mesh.info.int_params[AC_nx_max];
|
||||||
j < mesh.info.int_params[AC_ny_max]; ++j) {
|
++i) {
|
||||||
for (int i = mesh.info.int_params[AC_nx_min];
|
|
||||||
i < mesh.info.int_params[AC_nx_max]; ++i) {
|
|
||||||
const int idx = acVertexBufferIdx(i, j, k, mesh.info);
|
const int idx = acVertexBufferIdx(i, j, k, mesh.info);
|
||||||
const ModelScalar curr_val = reduce_initial(
|
const ModelScalar curr_val = reduce_initial(mesh.vertex_buffer[a][idx]);
|
||||||
mesh.vertex_buffer[a][idx]);
|
res = reduce(res, curr_val);
|
||||||
res = reduce(res, curr_val);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -133,9 +133,8 @@ model_reduce_scal(const ModelMesh& mesh, const ReductionType& rtype,
|
|||||||
}
|
}
|
||||||
|
|
||||||
ModelScalar
|
ModelScalar
|
||||||
model_reduce_vec(const ModelMesh& mesh, const ReductionType& rtype,
|
model_reduce_vec(const ModelMesh& mesh, const ReductionType& rtype, const VertexBufferHandle& a,
|
||||||
const VertexBufferHandle& a, const VertexBufferHandle& b,
|
const VertexBufferHandle& b, const VertexBufferHandle& c)
|
||||||
const VertexBufferHandle& c)
|
|
||||||
{
|
{
|
||||||
// ModelScalar (*reduce_initial)(ModelScalar, ModelScalar, ModelScalar);
|
// ModelScalar (*reduce_initial)(ModelScalar, ModelScalar, ModelScalar);
|
||||||
ReduceInitialVecFunc reduce_initial;
|
ReduceInitialVecFunc reduce_initial;
|
||||||
@@ -162,33 +161,34 @@ model_reduce_vec(const ModelMesh& mesh, const ReductionType& rtype,
|
|||||||
reduce = sum;
|
reduce = sum;
|
||||||
solve_mean = true;
|
solve_mean = true;
|
||||||
break;
|
break;
|
||||||
|
case RTYPE_SUM:
|
||||||
|
reduce_initial = length;
|
||||||
|
reduce = sum;
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
ERROR("Unrecognized RTYPE");
|
ERROR("Unrecognized RTYPE");
|
||||||
}
|
}
|
||||||
|
|
||||||
const int initial_idx = acVertexBufferIdx(
|
const int initial_idx = acVertexBufferIdx(mesh.info.int_params[AC_nx_min],
|
||||||
mesh.info.int_params[AC_nx_min], mesh.info.int_params[AC_ny_min],
|
mesh.info.int_params[AC_ny_min],
|
||||||
mesh.info.int_params[AC_nz_min], mesh.info);
|
mesh.info.int_params[AC_nz_min], mesh.info);
|
||||||
|
|
||||||
ModelScalar res;
|
ModelScalar res;
|
||||||
if (rtype == RTYPE_MAX || rtype == RTYPE_MIN)
|
if (rtype == RTYPE_MAX || rtype == RTYPE_MIN)
|
||||||
res = reduce_initial(mesh.vertex_buffer[a][initial_idx],
|
res = reduce_initial(mesh.vertex_buffer[a][initial_idx], mesh.vertex_buffer[b][initial_idx],
|
||||||
mesh.vertex_buffer[b][initial_idx],
|
|
||||||
mesh.vertex_buffer[c][initial_idx]);
|
mesh.vertex_buffer[c][initial_idx]);
|
||||||
else
|
else
|
||||||
res = 0;
|
res = 0;
|
||||||
|
|
||||||
for (int k = mesh.info.int_params[AC_nz_min];
|
for (int k = mesh.info.int_params[AC_nz_min]; k < mesh.info.int_params[AC_nz_max]; k++) {
|
||||||
k < mesh.info.int_params[AC_nz_max]; k++) {
|
for (int j = mesh.info.int_params[AC_ny_min]; j < mesh.info.int_params[AC_ny_max]; j++) {
|
||||||
for (int j = mesh.info.int_params[AC_ny_min];
|
for (int i = mesh.info.int_params[AC_nx_min]; i < mesh.info.int_params[AC_nx_max];
|
||||||
j < mesh.info.int_params[AC_ny_max]; j++) {
|
i++) {
|
||||||
for (int i = mesh.info.int_params[AC_nx_min];
|
|
||||||
i < mesh.info.int_params[AC_nx_max]; i++) {
|
|
||||||
const int idx = acVertexBufferIdx(i, j, k, mesh.info);
|
const int idx = acVertexBufferIdx(i, j, k, mesh.info);
|
||||||
const ModelScalar curr_val = reduce_initial(
|
const ModelScalar curr_val = reduce_initial(mesh.vertex_buffer[a][idx],
|
||||||
mesh.vertex_buffer[a][idx], mesh.vertex_buffer[b][idx],
|
mesh.vertex_buffer[b][idx],
|
||||||
mesh.vertex_buffer[c][idx]);
|
mesh.vertex_buffer[c][idx]);
|
||||||
res = reduce(res, curr_val);
|
res = reduce(res, curr_val);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user