ReduceVecScal calls added. For Alfven speeds.
This commit is contained in:
@@ -126,6 +126,15 @@ acReduceVec(const ReductionType rtype, const VertexBufferHandle a, const VertexB
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
AcReal
|
||||||
|
acReduceVecScal(const ReductionType rtype, const VertexBufferHandle a, const VertexBufferHandle b,
|
||||||
|
const VertexBufferHandle c, const VertexBufferHandle d)
|
||||||
|
{
|
||||||
|
AcReal result;
|
||||||
|
acNodeReduceVecScal(nodes[0], STREAM_DEFAULT, rtype, a, b, c, d, &result);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
AcResult
|
AcResult
|
||||||
acStoreWithOffset(const int3 dst, const size_t num_vertices, AcMesh* host_mesh)
|
acStoreWithOffset(const int3 dst, const size_t num_vertices, AcMesh* host_mesh)
|
||||||
{
|
{
|
||||||
|
@@ -474,6 +474,28 @@ acDeviceReduceVec(const Device device, const Stream stream, const ReductionType
|
|||||||
return AC_SUCCESS;
|
return AC_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
AcResult
|
||||||
|
acDeviceReduceVecScal(const Device device, const Stream stream, const ReductionType rtype,
|
||||||
|
const VertexBufferHandle vtxbuf0, const VertexBufferHandle vtxbuf1,
|
||||||
|
const VertexBufferHandle vtxbuf2, const VertexBufferHandle vtxbuf3, AcReal* result)
|
||||||
|
{
|
||||||
|
cudaSetDevice(device->id);
|
||||||
|
|
||||||
|
const int3 start = (int3){device->local_config.int_params[AC_nx_min],
|
||||||
|
device->local_config.int_params[AC_ny_min],
|
||||||
|
device->local_config.int_params[AC_nz_min]};
|
||||||
|
|
||||||
|
const int3 end = (int3){device->local_config.int_params[AC_nx_max],
|
||||||
|
device->local_config.int_params[AC_ny_max],
|
||||||
|
device->local_config.int_params[AC_nz_max]};
|
||||||
|
|
||||||
|
*result = acKernelReduceVecScal(device->streams[stream], rtype, start, end, device->vba.in[vtxbuf0],
|
||||||
|
device->vba.in[vtxbuf1], device->vba.in[vtxbuf2], device->vba.in[vtxbuf3],
|
||||||
|
device->reduce_scratchpad, device->reduce_result);
|
||||||
|
return AC_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
#if AC_MPI_ENABLED
|
#if AC_MPI_ENABLED
|
||||||
/**
|
/**
|
||||||
Quick overview of the MPI implementation:
|
Quick overview of the MPI implementation:
|
||||||
@@ -2047,4 +2069,7 @@ acGridReduceVec(const Stream stream, const ReductionType rtype, const VertexBuff
|
|||||||
|
|
||||||
return acMPIReduceScal(local_result, rtype, result);
|
return acMPIReduceScal(local_result, rtype, result);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//MV: for MPI we will need acGridReduceVecScal() to get Alfven speeds etc. TODO
|
||||||
|
|
||||||
#endif // AC_MPI_ENABLED
|
#endif // AC_MPI_ENABLED
|
||||||
|
@@ -797,13 +797,13 @@ simple_final_reduce_scal(const Node node, const ReductionType& rtype, const AcRe
|
|||||||
{
|
{
|
||||||
AcReal res = results[0];
|
AcReal res = results[0];
|
||||||
for (int i = 1; i < n; ++i) {
|
for (int i = 1; i < n; ++i) {
|
||||||
if (rtype == RTYPE_MAX) {
|
if (rtype == RTYPE_MAX || rtype == RTYPE_ALFVEN_MAX) {
|
||||||
res = max(res, results[i]);
|
res = max(res, results[i]);
|
||||||
}
|
}
|
||||||
else if (rtype == RTYPE_MIN) {
|
else if (rtype == RTYPE_MIN || rtype == RTYPE_ALFVEN_MIN) {
|
||||||
res = min(res, results[i]);
|
res = min(res, results[i]);
|
||||||
}
|
}
|
||||||
else if (rtype == RTYPE_RMS || rtype == RTYPE_RMS_EXP || rtype == RTYPE_SUM) {
|
else if (rtype == RTYPE_RMS || rtype == RTYPE_RMS_EXP || rtype == RTYPE_SUM || rtype == RTYPE_ALFVEN_RMS) {
|
||||||
res = sum(res, results[i]);
|
res = sum(res, results[i]);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
@@ -811,7 +811,7 @@ simple_final_reduce_scal(const Node node, const ReductionType& rtype, const AcRe
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (rtype == RTYPE_RMS || rtype == RTYPE_RMS_EXP) {
|
if (rtype == RTYPE_RMS || rtype == RTYPE_RMS_EXP || rtype == RTYPE_ALFVEN_RMS) {
|
||||||
const AcReal inv_n = AcReal(1.) / (node->grid.n.x * node->grid.n.y * node->grid.n.z);
|
const AcReal inv_n = AcReal(1.) / (node->grid.n.x * node->grid.n.y * node->grid.n.z);
|
||||||
res = sqrt(inv_n * res);
|
res = sqrt(inv_n * res);
|
||||||
}
|
}
|
||||||
@@ -850,3 +850,21 @@ acNodeReduceVec(const Node node, const Stream stream, const ReductionType rtype,
|
|||||||
*result = simple_final_reduce_scal(node, rtype, results, node->num_devices);
|
*result = simple_final_reduce_scal(node, rtype, results, node->num_devices);
|
||||||
return AC_SUCCESS;
|
return AC_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
AcResult
|
||||||
|
acNodeReduceVecScal(const Node node, const Stream stream, const ReductionType rtype,
|
||||||
|
const VertexBufferHandle a, const VertexBufferHandle b, const VertexBufferHandle c,
|
||||||
|
const VertexBufferHandle d, AcReal* result)
|
||||||
|
{
|
||||||
|
acNodeSynchronizeStream(node, STREAM_ALL);
|
||||||
|
|
||||||
|
AcReal results[node->num_devices];
|
||||||
|
// #pragma omp parallel for
|
||||||
|
for (int i = 0; i < node->num_devices; ++i) {
|
||||||
|
acDeviceReduceVecScal(node->devices[i], stream, rtype, a, b, c, d, &results[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
*result = simple_final_reduce_scal(node, rtype, results, node->num_devices);
|
||||||
|
return AC_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user