Merge branch 'master' into cmakelist_rewrite_and_C_API_conformity_07-26
This commit is contained in:
@@ -515,7 +515,7 @@ simple_final_reduce_scal(const ReductionType rtype, const AcReal* results, const
|
||||
else if (rtype == RTYPE_MIN) {
|
||||
res = min(res, results[i]);
|
||||
}
|
||||
else if (rtype == RTYPE_RMS || rtype == RTYPE_RMS_EXP) {
|
||||
else if (rtype == RTYPE_RMS || rtype == RTYPE_RMS_EXP || rtype == RTYPE_SUM) {
|
||||
res = sum(res, results[i]);
|
||||
}
|
||||
else {
|
||||
@@ -527,7 +527,6 @@ simple_final_reduce_scal(const ReductionType rtype, const AcReal* results, const
|
||||
const AcReal inv_n = AcReal(1.) / (grid.n.x * grid.n.y * grid.n.z);
|
||||
res = sqrt(inv_n * res);
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
@@ -43,7 +43,9 @@ __constant__ AcMeshInfo d_mesh_info;
|
||||
__constant__ int3 d_multigpu_offset;
|
||||
__constant__ Grid globalGrid;
|
||||
#define DCONST_INT(X) (d_mesh_info.int_params[X])
|
||||
#define DCONST_INT3(X) (d_mesh_info.int3_params[X])
|
||||
#define DCONST_REAL(X) (d_mesh_info.real_params[X])
|
||||
#define DCONST_REAL3(X) (d_mesh_info.real3_params[X])
|
||||
#define DEVICE_VTXBUF_IDX(i, j, k) ((i) + (j)*DCONST_INT(AC_mx) + (k)*DCONST_INT(AC_mxy))
|
||||
#define DEVICE_1D_COMPDOMAIN_IDX(i, j, k) ((i) + (j)*DCONST_INT(AC_nx) + (k)*DCONST_INT(AC_nxy))
|
||||
#include "kernels/kernels.cuh"
|
||||
|
||||
@@ -893,6 +893,10 @@ reduce_scal(const cudaStream_t stream, const ReductionType rtype, const int3& st
|
||||
kernel_filter<dexp_squared><<<bpg_filter, tpb_filter, 0, stream>>>(vtxbuf, start, end, scratchpad);
|
||||
kernel_reduce<dsum><<<bpg_reduce, tpb_reduce, sizeof(AcReal) * tpb_reduce, stream>>>(scratchpad, num_elems);
|
||||
kernel_reduce_block<dsum><<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result);
|
||||
} else if (rtype == RTYPE_SUM) {
|
||||
kernel_filter<dvalue><<<bpg_filter, tpb_filter, 0, stream>>>(vtxbuf, start, end, scratchpad);
|
||||
kernel_reduce<dsum><<<bpg_reduce, tpb_reduce, sizeof(AcReal) * tpb_reduce, stream>>>(scratchpad, num_elems);
|
||||
kernel_reduce_block<dsum><<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result);
|
||||
} else {
|
||||
ERROR("Unrecognized rtype");
|
||||
}
|
||||
@@ -944,6 +948,10 @@ reduce_vec(const cudaStream_t stream, const ReductionType rtype, const int3& sta
|
||||
kernel_filter_vec<dexp_squared_vec><<<bpg_filter, tpb_filter, 0, stream>>>(vtxbuf0, vtxbuf1, vtxbuf2, start, end, scratchpad);
|
||||
kernel_reduce<dsum><<<bpg_reduce, tpb_reduce, sizeof(AcReal) * tpb_reduce, stream>>>(scratchpad, num_elems);
|
||||
kernel_reduce_block<dsum><<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result);
|
||||
} else if (rtype == RTYPE_SUM) {
|
||||
kernel_filter_vec<dlength_vec><<<bpg_filter, tpb_filter, 0, stream>>>(vtxbuf0, vtxbuf1, vtxbuf2, start, end, scratchpad);
|
||||
kernel_reduce<dsum><<<bpg_reduce, tpb_reduce, sizeof(AcReal) * tpb_reduce, stream>>>(scratchpad, num_elems);
|
||||
kernel_reduce_block<dsum><<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result);
|
||||
} else {
|
||||
ERROR("Unrecognized rtype");
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user