Added sum reduction. NOTE: Scalar sum does not pass the automated test but vector sum does. I couldn't see anything wrong with the code itself and I strongly suspect that the failures are caused by loss of precision due to summing a huge amount of numbers of different magnitudes. However I'm not yet completely sure. Something like the Kahan summation algorithm might be useful if the errors are really caused by fp arithmetic.

This commit is contained in:
jpekkila
2019-07-30 14:28:18 +03:00
parent 9796d5e981
commit 69deef66fe
4 changed files with 50 additions and 36 deletions

View File

@@ -77,7 +77,14 @@ typedef struct {
typedef enum { AC_SUCCESS = 0, AC_FAILURE = 1 } AcResult;
typedef enum { RTYPE_MAX, RTYPE_MIN, RTYPE_RMS, RTYPE_RMS_EXP, NUM_REDUCTION_TYPES } ReductionType;
typedef enum {
RTYPE_MAX,
RTYPE_MIN,
RTYPE_RMS,
RTYPE_RMS_EXP,
RTYPE_SUM,
NUM_REDUCTION_TYPES
} ReductionType;
typedef enum {
STREAM_DEFAULT,

View File

@@ -474,7 +474,7 @@ simple_final_reduce_scal(const ReductionType& rtype, const AcReal* results, cons
else if (rtype == RTYPE_MIN) {
res = min(res, results[i]);
}
else if (rtype == RTYPE_RMS || rtype == RTYPE_RMS_EXP) {
else if (rtype == RTYPE_RMS || rtype == RTYPE_RMS_EXP || rtype == RTYPE_SUM) {
res = sum(res, results[i]);
}
else {
@@ -486,7 +486,6 @@ simple_final_reduce_scal(const ReductionType& rtype, const AcReal* results, cons
const AcReal inv_n = AcReal(1.) / (grid.n.x * grid.n.y * grid.n.z);
res = sqrt(inv_n * res);
}
return res;
}

View File

@@ -893,6 +893,10 @@ reduce_scal(const cudaStream_t stream, const ReductionType rtype, const int3& st
kernel_filter<dexp_squared><<<bpg_filter, tpb_filter, 0, stream>>>(vtxbuf, start, end, scratchpad);
kernel_reduce<dsum><<<bpg_reduce, tpb_reduce, sizeof(AcReal) * tpb_reduce, stream>>>(scratchpad, num_elems);
kernel_reduce_block<dsum><<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result);
} else if (rtype == RTYPE_SUM) {
kernel_filter<dvalue><<<bpg_filter, tpb_filter, 0, stream>>>(vtxbuf, start, end, scratchpad);
kernel_reduce<dsum><<<bpg_reduce, tpb_reduce, sizeof(AcReal) * tpb_reduce, stream>>>(scratchpad, num_elems);
kernel_reduce_block<dsum><<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result);
} else {
ERROR("Unrecognized rtype");
}
@@ -944,6 +948,10 @@ reduce_vec(const cudaStream_t stream, const ReductionType rtype, const int3& sta
kernel_filter_vec<dexp_squared_vec><<<bpg_filter, tpb_filter, 0, stream>>>(vtxbuf0, vtxbuf1, vtxbuf2, start, end, scratchpad);
kernel_reduce<dsum><<<bpg_reduce, tpb_reduce, sizeof(AcReal) * tpb_reduce, stream>>>(scratchpad, num_elems);
kernel_reduce_block<dsum><<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result);
} else if (rtype == RTYPE_SUM) {
kernel_filter_vec<dlength_vec><<<bpg_filter, tpb_filter, 0, stream>>>(vtxbuf0, vtxbuf1, vtxbuf2, start, end, scratchpad);
kernel_reduce<dsum><<<bpg_reduce, tpb_reduce, sizeof(AcReal) * tpb_reduce, stream>>>(scratchpad, num_elems);
kernel_reduce_block<dsum><<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result);
} else {
ERROR("Unrecognized rtype");
}

View File

@@ -68,8 +68,7 @@ exp_squared(const ModelScalar& a, const ModelScalar& b, const ModelScalar& c) {
// clang-format on
ModelScalar
model_reduce_scal(const ModelMesh& mesh, const ReductionType& rtype,
const VertexBufferHandle& a)
model_reduce_scal(const ModelMesh& mesh, const ReductionType& rtype, const VertexBufferHandle& a)
{
ReduceInitialScalFunc reduce_initial;
ReduceFunc reduce;
@@ -95,30 +94,31 @@ model_reduce_scal(const ModelMesh& mesh, const ReductionType& rtype,
reduce = sum;
solve_mean = true;
break;
case RTYPE_SUM:
reduce_initial = length;
reduce = sum;
break;
default:
ERROR("Unrecognized RTYPE");
}
const int initial_idx = acVertexBufferIdx(
mesh.info.int_params[AC_nx_min], mesh.info.int_params[AC_ny_min],
mesh.info.int_params[AC_nz_min], mesh.info);
const int initial_idx = acVertexBufferIdx(mesh.info.int_params[AC_nx_min],
mesh.info.int_params[AC_ny_min],
mesh.info.int_params[AC_nz_min], mesh.info);
ModelScalar res;
if (rtype == RTYPE_MAX || rtype == RTYPE_MIN)
res = reduce_initial(mesh.vertex_buffer[a][initial_idx]);
else
res = .0f;
res = 0;
for (int k = mesh.info.int_params[AC_nz_min];
k < mesh.info.int_params[AC_nz_max]; ++k) {
for (int j = mesh.info.int_params[AC_ny_min];
j < mesh.info.int_params[AC_ny_max]; ++j) {
for (int i = mesh.info.int_params[AC_nx_min];
i < mesh.info.int_params[AC_nx_max]; ++i) {
for (int k = mesh.info.int_params[AC_nz_min]; k < mesh.info.int_params[AC_nz_max]; ++k) {
for (int j = mesh.info.int_params[AC_ny_min]; j < mesh.info.int_params[AC_ny_max]; ++j) {
for (int i = mesh.info.int_params[AC_nx_min]; i < mesh.info.int_params[AC_nx_max];
++i) {
const int idx = acVertexBufferIdx(i, j, k, mesh.info);
const ModelScalar curr_val = reduce_initial(
mesh.vertex_buffer[a][idx]);
res = reduce(res, curr_val);
const ModelScalar curr_val = reduce_initial(mesh.vertex_buffer[a][idx]);
res = reduce(res, curr_val);
}
}
}
@@ -133,9 +133,8 @@ model_reduce_scal(const ModelMesh& mesh, const ReductionType& rtype,
}
ModelScalar
model_reduce_vec(const ModelMesh& mesh, const ReductionType& rtype,
const VertexBufferHandle& a, const VertexBufferHandle& b,
const VertexBufferHandle& c)
model_reduce_vec(const ModelMesh& mesh, const ReductionType& rtype, const VertexBufferHandle& a,
const VertexBufferHandle& b, const VertexBufferHandle& c)
{
// ModelScalar (*reduce_initial)(ModelScalar, ModelScalar, ModelScalar);
ReduceInitialVecFunc reduce_initial;
@@ -162,33 +161,34 @@ model_reduce_vec(const ModelMesh& mesh, const ReductionType& rtype,
reduce = sum;
solve_mean = true;
break;
case RTYPE_SUM:
reduce_initial = length;
reduce = sum;
break;
default:
ERROR("Unrecognized RTYPE");
}
const int initial_idx = acVertexBufferIdx(
mesh.info.int_params[AC_nx_min], mesh.info.int_params[AC_ny_min],
mesh.info.int_params[AC_nz_min], mesh.info);
const int initial_idx = acVertexBufferIdx(mesh.info.int_params[AC_nx_min],
mesh.info.int_params[AC_ny_min],
mesh.info.int_params[AC_nz_min], mesh.info);
ModelScalar res;
if (rtype == RTYPE_MAX || rtype == RTYPE_MIN)
res = reduce_initial(mesh.vertex_buffer[a][initial_idx],
mesh.vertex_buffer[b][initial_idx],
res = reduce_initial(mesh.vertex_buffer[a][initial_idx], mesh.vertex_buffer[b][initial_idx],
mesh.vertex_buffer[c][initial_idx]);
else
res = 0;
for (int k = mesh.info.int_params[AC_nz_min];
k < mesh.info.int_params[AC_nz_max]; k++) {
for (int j = mesh.info.int_params[AC_ny_min];
j < mesh.info.int_params[AC_ny_max]; j++) {
for (int i = mesh.info.int_params[AC_nx_min];
i < mesh.info.int_params[AC_nx_max]; i++) {
for (int k = mesh.info.int_params[AC_nz_min]; k < mesh.info.int_params[AC_nz_max]; k++) {
for (int j = mesh.info.int_params[AC_ny_min]; j < mesh.info.int_params[AC_ny_max]; j++) {
for (int i = mesh.info.int_params[AC_nx_min]; i < mesh.info.int_params[AC_nx_max];
i++) {
const int idx = acVertexBufferIdx(i, j, k, mesh.info);
const ModelScalar curr_val = reduce_initial(
mesh.vertex_buffer[a][idx], mesh.vertex_buffer[b][idx],
mesh.vertex_buffer[c][idx]);
res = reduce(res, curr_val);
const ModelScalar curr_val = reduce_initial(mesh.vertex_buffer[a][idx],
mesh.vertex_buffer[b][idx],
mesh.vertex_buffer[c][idx]);
res = reduce(res, curr_val);
}
}
}