MPI communication of corners via CPU OK
This commit is contained in:
@@ -89,7 +89,7 @@ isWithin(const int3 idx, const int3 min, const int3 max)
|
||||
static PackedData
|
||||
acCreatePackedData(const int3 dims)
|
||||
{
|
||||
PackedData data = {0};
|
||||
PackedData data = {};
|
||||
|
||||
data.dims = dims;
|
||||
|
||||
@@ -109,6 +109,46 @@ acDestroyPackedData(PackedData* data)
|
||||
return AC_SUCCESS;
|
||||
}
|
||||
|
||||
static PackedData
|
||||
acCreatePackedDataHost(const int3 dims)
|
||||
{
|
||||
PackedData data = {};
|
||||
|
||||
data.dims = dims;
|
||||
|
||||
const size_t bytes = dims.x * dims.y * dims.z * sizeof(data.data[0]) * NUM_VTXBUF_HANDLES;
|
||||
data.data = (AcReal*)malloc(bytes);
|
||||
ERRCHK_ALWAYS(data.data);
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
static void
|
||||
acTransferPackedDataToHost(const PackedData ddata, PackedData* hdata)
|
||||
{
|
||||
const size_t bytes = ddata.dims.x * ddata.dims.y * ddata.dims.z * sizeof(ddata.data[0]) *
|
||||
NUM_VTXBUF_HANDLES;
|
||||
ERRCHK_CUDA_ALWAYS(cudaMemcpy(hdata->data, ddata.data, bytes, cudaMemcpyDeviceToHost));
|
||||
}
|
||||
|
||||
static void
|
||||
acTransferPackedDataToDevice(const PackedData hdata, PackedData* ddata)
|
||||
{
|
||||
const size_t bytes = hdata.dims.x * hdata.dims.y * hdata.dims.z * sizeof(hdata.data[0]) *
|
||||
NUM_VTXBUF_HANDLES;
|
||||
ERRCHK_CUDA_ALWAYS(cudaMemcpy(ddata->data, hdata.data, bytes, cudaMemcpyHostToDevice));
|
||||
}
|
||||
|
||||
static AcResult
|
||||
acDestroyPackedDataHost(PackedData* data)
|
||||
{
|
||||
data->dims = (int3){-1, -1, -1};
|
||||
free(data->data);
|
||||
data->data = NULL;
|
||||
|
||||
return AC_SUCCESS;
|
||||
}
|
||||
|
||||
AcResult
|
||||
acDeviceCreate(const int id, const AcMeshInfo device_config, Device* device_handle)
|
||||
{
|
||||
@@ -1220,7 +1260,7 @@ acDeviceCommunicateHalosMPI(const Device device)
|
||||
|
||||
const int3 neighbor = (int3){i, j, k};
|
||||
|
||||
for (int a_idx = 0; a_idx < ARRAY_SIZE(a0s); ++a_idx) {
|
||||
for (size_t a_idx = 0; a_idx < ARRAY_SIZE(a0s); ++a_idx) {
|
||||
const int3 a0 = a0s[a_idx];
|
||||
const int3 a1 = a0 + device->corners_send[a_idx].dims;
|
||||
|
||||
@@ -1241,7 +1281,7 @@ acDeviceCommunicateHalosMPI(const Device device)
|
||||
print_int3(b0);
|
||||
printf("\n");
|
||||
|
||||
for (int b_idx = 0; b_idx < ARRAY_SIZE(b0s); ++b_idx) {
|
||||
for (size_t b_idx = 0; b_idx < ARRAY_SIZE(b0s); ++b_idx) {
|
||||
if (b0s[b_idx].x == b0.x && b0s[b_idx].y == b0.y &&
|
||||
b0s[b_idx].z == b0.z) {
|
||||
|
||||
@@ -1340,15 +1380,15 @@ acDeviceCommunicateCornersMPI(const Device device)
|
||||
if (i == 0 && j == 0 && k == 0)
|
||||
continue;
|
||||
|
||||
for (int a_idx = 0; a_idx < ARRAY_SIZE(a0s); ++a_idx) {
|
||||
for (int b_idx = 0; b_idx < ARRAY_SIZE(b0s); ++b_idx) {
|
||||
for (size_t a_idx = 0; a_idx < ARRAY_SIZE(a0s); ++a_idx) {
|
||||
for (size_t b_idx = 0; b_idx < ARRAY_SIZE(b0s); ++b_idx) {
|
||||
const int3 neighbor = (int3){i, j, k};
|
||||
|
||||
const int3 a0 = a0s[a_idx];
|
||||
const int3 a1 = a0 + dims;
|
||||
// const int3 a1 = a0 + dims;
|
||||
|
||||
const int3 b0 = a0 - neighbor * nn;
|
||||
const int3 b1 = a1 - neighbor * nn;
|
||||
// const int3 b1 = a1 - neighbor * nn;
|
||||
|
||||
if (b0s[b_idx].x == b0.x && b0s[b_idx].y == b0.y && b0s[b_idx].z == b0.z) {
|
||||
|
||||
@@ -1361,16 +1401,30 @@ acDeviceCommunicateCornersMPI(const Device device)
|
||||
acKernelPackData(stream, device->vba, a0, src);
|
||||
acDeviceSynchronizeStream(device, STREAM_DEFAULT);
|
||||
|
||||
// Host ////////////////////////////////////////////////
|
||||
PackedData src_host = acCreatePackedDataHost(dims);
|
||||
PackedData dst_host = acCreatePackedDataHost(dims);
|
||||
acTransferPackedDataToHost(src, &src_host);
|
||||
////////////////////////////////////////////////////////
|
||||
|
||||
const int3 pid3d = getPid3D(pid, decomp);
|
||||
MPI_Request send_req, recv_req;
|
||||
MPI_Isend(src.data, count, datatype, getPid(pid3d + neighbor, decomp),
|
||||
b_idx, MPI_COMM_WORLD, &send_req);
|
||||
MPI_Irecv(dst.data, count, datatype, getPid(pid3d - neighbor, decomp),
|
||||
b_idx, MPI_COMM_WORLD, &recv_req);
|
||||
MPI_Isend(src_host.data, count, datatype,
|
||||
getPid(pid3d + neighbor, decomp), b_idx, MPI_COMM_WORLD,
|
||||
&send_req);
|
||||
MPI_Irecv(dst_host.data, count, datatype,
|
||||
getPid(pid3d - neighbor, decomp), b_idx, MPI_COMM_WORLD,
|
||||
&recv_req);
|
||||
|
||||
MPI_Status status;
|
||||
MPI_Wait(&recv_req, &status);
|
||||
|
||||
// Host ////////////////////////////////////////////////
|
||||
acTransferPackedDataToDevice(dst_host, &dst);
|
||||
acDestroyPackedDataHost(&src_host);
|
||||
acDestroyPackedDataHost(&dst_host);
|
||||
////////////////////////////////////////////////////////
|
||||
|
||||
acKernelUnpackData(stream, dst, b0, device->vba);
|
||||
acDeviceSynchronizeStream(device, STREAM_DEFAULT);
|
||||
|
||||
@@ -1433,15 +1487,15 @@ acDeviceCommunicateEdgesMPI(const Device device)
|
||||
if (i == 0 && j == 0 && k == 0)
|
||||
continue;
|
||||
|
||||
for (int a_idx = 0; a_idx < ARRAY_SIZE(a0s); ++a_idx) {
|
||||
for (int b_idx = 0; b_idx < ARRAY_SIZE(b0s); ++b_idx) {
|
||||
for (size_t a_idx = 0; a_idx < ARRAY_SIZE(a0s); ++a_idx) {
|
||||
for (size_t b_idx = 0; b_idx < ARRAY_SIZE(b0s); ++b_idx) {
|
||||
const int3 neighbor = (int3){i, j, k};
|
||||
|
||||
const int3 a0 = a0s[a_idx];
|
||||
const int3 a1 = a0 + dims;
|
||||
// const int3 a1 = a0 + dims;
|
||||
|
||||
const int3 b0 = a0 - neighbor * nn;
|
||||
const int3 b1 = a1 - neighbor * nn;
|
||||
// const int3 b1 = a1 - neighbor * nn;
|
||||
|
||||
if (b0s[b_idx].x == b0.x && b0s[b_idx].y == b0.y && b0s[b_idx].z == b0.z) {
|
||||
printf("Transfer: ");
|
||||
@@ -1487,7 +1541,7 @@ static AcResult
|
||||
acDeviceCommunicateHalosMPI(const Device device)
|
||||
{
|
||||
acDeviceCommunicateCornersMPI(device);
|
||||
acDeviceCommunicateEdgesMPI(device);
|
||||
// acDeviceCommunicateEdgesMPI(device);
|
||||
return AC_SUCCESS;
|
||||
}
|
||||
/*
|
||||
@@ -1543,7 +1597,7 @@ acDeviceCommunicateHalosMPI(const Device device)
|
||||
|
||||
const int3 neighbor = (int3){i, j, k};
|
||||
|
||||
for (int a_idx = 0; a_idx < ARRAY_SIZE(a0s); ++a_idx) {
|
||||
for (size_t a_idx = 0; a_idx < ARRAY_SIZE(a0s); ++a_idx) {
|
||||
const int3 a0 = a0s[a_idx];
|
||||
const int3 a1 = a0 + device->corners_send[a_idx].dims;
|
||||
|
||||
@@ -1564,7 +1618,7 @@ acDeviceCommunicateHalosMPI(const Device device)
|
||||
print_int3(b0);
|
||||
printf("\n");
|
||||
|
||||
for (int b_idx = 0; b_idx < ARRAY_SIZE(b0s); ++b_idx) {
|
||||
for (size_t b_idx = 0; b_idx < ARRAY_SIZE(b0s); ++b_idx) {
|
||||
if (b0s[b_idx].x == b0.x && b0s[b_idx].y == b0.y &&
|
||||
b0s[b_idx].z == b0.z) {
|
||||
|
||||
|
Reference in New Issue
Block a user