Current 3D decomp method still too complicated. Starting again from scratch.
This commit is contained in:
@@ -35,12 +35,6 @@
|
|||||||
|
|
||||||
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
|
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
int3 dims;
|
|
||||||
AcReal* in;
|
|
||||||
AcReal* out;
|
|
||||||
} PackedData;
|
|
||||||
|
|
||||||
struct device_s {
|
struct device_s {
|
||||||
int id;
|
int id;
|
||||||
AcMeshInfo local_config;
|
AcMeshInfo local_config;
|
||||||
@@ -54,13 +48,7 @@ struct device_s {
|
|||||||
AcReal* reduce_result;
|
AcReal* reduce_result;
|
||||||
|
|
||||||
#if AC_MPI_ENABLED
|
#if AC_MPI_ENABLED
|
||||||
|
// TODO
|
||||||
#define NUM_SIDES (6)
|
|
||||||
#define NUM_EDGES (12)
|
|
||||||
#define NUM_CORNERS (8)
|
|
||||||
PackedData sides[NUM_VTXBUF_HANDLES][NUM_SIDES];
|
|
||||||
PackedData edges[NUM_VTXBUF_HANDLES][NUM_EDGES];
|
|
||||||
PackedData corners[NUM_VTXBUF_HANDLES][NUM_CORNERS];
|
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -78,29 +66,6 @@ print_int3(const int3 val)
|
|||||||
printf("(%d, %d, %d)", val.x, val.y, val.z);
|
printf("(%d, %d, %d)", val.x, val.y, val.z);
|
||||||
}
|
}
|
||||||
|
|
||||||
PackedData
|
|
||||||
acDeviceMallocPackedData(const int3 dims)
|
|
||||||
{
|
|
||||||
PackedData data = {0};
|
|
||||||
data.dims = dims;
|
|
||||||
|
|
||||||
const size_t bytes = dims.x * dims.y * dims.z * sizeof(data.in[0]);
|
|
||||||
ERRCHK_CUDA_ALWAYS(cudaMalloc((void**)&data.in, bytes));
|
|
||||||
ERRCHK_CUDA_ALWAYS(cudaMalloc((void**)&data.out, bytes));
|
|
||||||
|
|
||||||
return data;
|
|
||||||
}
|
|
||||||
|
|
||||||
AcResult
|
|
||||||
acDeviceFreePackedData(PackedData* data)
|
|
||||||
{
|
|
||||||
data->dims = (int3){-1, -1, -1};
|
|
||||||
|
|
||||||
cudaFree(data->in);
|
|
||||||
cudaFree(data->out);
|
|
||||||
return AC_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
isWithin(const int3 idx, const int3 min, const int3 max)
|
isWithin(const int3 idx, const int3 min, const int3 max)
|
||||||
{
|
{
|
||||||
@@ -115,120 +80,6 @@ isWithin(const int3 idx, const int3 min, const int3 max)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
AcResult
|
|
||||||
acDeviceMallocPackedSidesEdgesCorners(Device device)
|
|
||||||
{
|
|
||||||
const int3 nn = (int3){
|
|
||||||
device->local_config.int_params[AC_nx],
|
|
||||||
device->local_config.int_params[AC_ny],
|
|
||||||
device->local_config.int_params[AC_nz],
|
|
||||||
};
|
|
||||||
|
|
||||||
const int3 mm = (int3){
|
|
||||||
device->local_config.int_params[AC_mx],
|
|
||||||
device->local_config.int_params[AC_my],
|
|
||||||
device->local_config.int_params[AC_mz],
|
|
||||||
};
|
|
||||||
|
|
||||||
const int3 a0s[] = {
|
|
||||||
(int3){NGHOST, NGHOST, NGHOST}, //
|
|
||||||
(int3){nn.x, NGHOST, NGHOST}, //
|
|
||||||
(int3){NGHOST, nn.y, NGHOST}, //
|
|
||||||
(int3){nn.x, nn.y, NGHOST}, //
|
|
||||||
(int3){NGHOST, NGHOST, nn.z}, //
|
|
||||||
(int3){nn.x, NGHOST, nn.z}, //
|
|
||||||
(int3){NGHOST, nn.y, nn.z}, //
|
|
||||||
(int3){nn.x, nn.y, nn.z},
|
|
||||||
};
|
|
||||||
|
|
||||||
const int3 sides[] = {
|
|
||||||
(int3){nn.x, nn.y, NGHOST},
|
|
||||||
(int3){nn.x, NGHOST, nn.z},
|
|
||||||
(int3){NGHOST, nn.y, nn.z},
|
|
||||||
};
|
|
||||||
|
|
||||||
const int3 edges[] = {
|
|
||||||
(int3){nn.x, NGHOST, NGHOST},
|
|
||||||
(int3){NGHOST, nn.y, NGHOST},
|
|
||||||
(int3){NGHOST, NGHOST, nn.z},
|
|
||||||
};
|
|
||||||
|
|
||||||
const int3 corners[] = {
|
|
||||||
(int3){NGHOST, NGHOST, NGHOST},
|
|
||||||
};
|
|
||||||
|
|
||||||
for (size_t i = 0; i < NUM_VTXBUF_HANDLES; ++i) {
|
|
||||||
for (size_t pcounter = 0; pcounter < ARRAY_SIZE(a0s); ++pcounter) { // for start pos
|
|
||||||
const int3 a0 = a0s[pcounter];
|
|
||||||
|
|
||||||
// for sides
|
|
||||||
size_t j = 0;
|
|
||||||
for (size_t scounter = 0; scounter < ARRAY_SIZE(sides); ++scounter) {
|
|
||||||
const int3 a1 = a0 + sides[scounter];
|
|
||||||
if (!isWithin(a1, (int3){0, 0, 0}, mm + (int3){1, 1, 1}))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
ERRCHK_ALWAYS(j < NUM_SIDES)
|
|
||||||
device->sides[i][j] = acDeviceMallocPackedData(sides[scounter]);
|
|
||||||
if (!i) {
|
|
||||||
printf("Allocated side ");
|
|
||||||
print_int3(device->sides[i][j].dims);
|
|
||||||
printf(" for packed data\n");
|
|
||||||
}
|
|
||||||
++j;
|
|
||||||
}
|
|
||||||
// for edges
|
|
||||||
j = 0;
|
|
||||||
for (size_t scounter = 0; scounter < ARRAY_SIZE(edges); ++scounter) {
|
|
||||||
const int3 a1 = a0 + edges[scounter];
|
|
||||||
if (!isWithin(a1, (int3){0, 0, 0}, mm + (int3){1, 1, 1}))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
ERRCHK_ALWAYS(j < NUM_EDGES)
|
|
||||||
device->edges[i][j] = acDeviceMallocPackedData(edges[scounter]);
|
|
||||||
if (!i) {
|
|
||||||
printf("Allocated edge ");
|
|
||||||
print_int3(device->edges[i][j].dims);
|
|
||||||
printf(" for packed data\n");
|
|
||||||
}
|
|
||||||
++j;
|
|
||||||
}
|
|
||||||
// for corners
|
|
||||||
j = 0;
|
|
||||||
for (size_t scounter = 0; scounter < ARRAY_SIZE(corners); ++scounter) {
|
|
||||||
const int3 a1 = a0 + corners[scounter];
|
|
||||||
if (!isWithin(a1, (int3){0, 0, 0}, mm + (int3){1, 1, 1}))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
ERRCHK_ALWAYS(j < NUM_CORNERS)
|
|
||||||
device->corners[i][j] = acDeviceMallocPackedData(corners[scounter]);
|
|
||||||
if (!i) {
|
|
||||||
printf("Allocated corner ");
|
|
||||||
print_int3(device->corners[i][j].dims);
|
|
||||||
printf(" for packed data\n");
|
|
||||||
}
|
|
||||||
++j;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return AC_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
AcResult
|
|
||||||
acDeviceFreePackedSidesEdgesCorners(Device device)
|
|
||||||
{
|
|
||||||
for (size_t i = 0; i < NUM_VTXBUF_HANDLES; ++i) {
|
|
||||||
for (size_t j = 0; j < NUM_SIDES; ++j)
|
|
||||||
acDeviceFreePackedData(&device->sides[i][j]);
|
|
||||||
|
|
||||||
for (size_t j = 0; j < NUM_EDGES; ++j)
|
|
||||||
acDeviceFreePackedData(&device->edges[i][j]);
|
|
||||||
|
|
||||||
for (size_t j = 0; j < NUM_CORNERS; ++j)
|
|
||||||
acDeviceFreePackedData(&device->corners[i][j]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
AcResult
|
AcResult
|
||||||
acDeviceCreate(const int id, const AcMeshInfo device_config, Device* device_handle)
|
acDeviceCreate(const int id, const AcMeshInfo device_config, Device* device_handle)
|
||||||
{
|
{
|
||||||
@@ -278,7 +129,7 @@ acDeviceCreate(const int id, const AcMeshInfo device_config, Device* device_hand
|
|||||||
ERRCHK_CUDA_ALWAYS(cudaMalloc((void**)&device->reduce_result, sizeof(AcReal)));
|
ERRCHK_CUDA_ALWAYS(cudaMalloc((void**)&device->reduce_result, sizeof(AcReal)));
|
||||||
|
|
||||||
#if AC_MPI_ENABLED
|
#if AC_MPI_ENABLED
|
||||||
acDeviceMallocPackedSidesEdgesCorners(device);
|
// TODO
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Device constants
|
// Device constants
|
||||||
@@ -315,7 +166,7 @@ acDeviceDestroy(Device device)
|
|||||||
cudaFree(device->reduce_result);
|
cudaFree(device->reduce_result);
|
||||||
|
|
||||||
#if AC_MPI_ENABLED
|
#if AC_MPI_ENABLED
|
||||||
acDeviceFreePackedSidesEdgesCorners(device);
|
// TODO
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Concurrency
|
// Concurrency
|
||||||
@@ -1282,86 +1133,6 @@ acDeviceCommunicateHalosMPI(const Device device)
|
|||||||
MPI_Comm_rank(MPI_COMM_WORLD, &pid);
|
MPI_Comm_rank(MPI_COMM_WORLD, &pid);
|
||||||
const int3 decomp = decompose(nprocs);
|
const int3 decomp = decompose(nprocs);
|
||||||
|
|
||||||
const int3 nn = (int3){
|
|
||||||
device->local_config.int_params[AC_nx],
|
|
||||||
device->local_config.int_params[AC_ny],
|
|
||||||
device->local_config.int_params[AC_nz],
|
|
||||||
};
|
|
||||||
const int3 mm = (int3){
|
|
||||||
device->local_config.int_params[AC_mx],
|
|
||||||
device->local_config.int_params[AC_my],
|
|
||||||
device->local_config.int_params[AC_mz],
|
|
||||||
};
|
|
||||||
|
|
||||||
const int3 a0s[] = {
|
|
||||||
(int3){NGHOST, NGHOST, NGHOST}, //
|
|
||||||
(int3){nn.x, NGHOST, NGHOST}, //
|
|
||||||
(int3){NGHOST, nn.y, NGHOST}, //
|
|
||||||
(int3){nn.x, nn.y, NGHOST}, //
|
|
||||||
(int3){NGHOST, NGHOST, nn.z}, //
|
|
||||||
(int3){nn.x, NGHOST, nn.z}, //
|
|
||||||
(int3){NGHOST, nn.y, nn.z}, //
|
|
||||||
(int3){nn.x, nn.y, nn.z},
|
|
||||||
};
|
|
||||||
|
|
||||||
const int3 sides[] = {
|
|
||||||
(int3){nn.x, nn.y, NGHOST},
|
|
||||||
(int3){nn.x, NGHOST, nn.z},
|
|
||||||
(int3){NGHOST, nn.y, nn.z},
|
|
||||||
};
|
|
||||||
|
|
||||||
const int3 edges[] = {
|
|
||||||
(int3){nn.x, NGHOST, NGHOST},
|
|
||||||
(int3){NGHOST, nn.y, NGHOST},
|
|
||||||
(int3){NGHOST, NGHOST, nn.z},
|
|
||||||
};
|
|
||||||
|
|
||||||
const int3 corners[] = {
|
|
||||||
(int3){NGHOST, NGHOST, NGHOST},
|
|
||||||
};
|
|
||||||
|
|
||||||
for (int k = -1; k <= 1; ++k) {
|
|
||||||
for (int j = -1; j <= 1; ++j) {
|
|
||||||
for (int i = -1; i <= 1; ++i) {
|
|
||||||
if (i == 0 && j == 0 && k == 0)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
const int3 neighbor = (int3){i, j, k};
|
|
||||||
const int neighbor_idx = getPid(neighbor, decomp);
|
|
||||||
|
|
||||||
// Sides
|
|
||||||
for (size_t pcounter = 0; pcounter < ARRAY_SIZE(a0s); ++pcounter) { // for start pos
|
|
||||||
const int3 a0 = a0s[pcounter];
|
|
||||||
|
|
||||||
// for side
|
|
||||||
for (size_t scounter = 0; scounter < ARRAY_SIZE(sides); ++scounter) {
|
|
||||||
const int3 a1 = a0 + sides[scounter];
|
|
||||||
if (!isWithin(a1, (int3){0, 0, 0}, mm + (int3){1, 1, 1}))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
const int3 b0_neighbor = a0 - neighbor * nn;
|
|
||||||
const int3 b1_neighbor = a1 - neighbor * nn;
|
|
||||||
|
|
||||||
// communicateBlockWithNeighbor(COMM_SEND, a0, a1, b0, b1, neighbor_idx);
|
|
||||||
|
|
||||||
if (isWithin(b0_neighbor, (int3){0, 0, 0}, mm) &&
|
|
||||||
isWithin(b1_neighbor, (int3){0, 0, 0}, mm + (int3){1, 1, 1})) {
|
|
||||||
printf("\t\t%d (side): ", neighbor_idx);
|
|
||||||
print_int3(neighbor);
|
|
||||||
printf("\n");
|
|
||||||
printf("\t\t\t:");
|
|
||||||
print_int3(b0_neighbor);
|
|
||||||
printf("\n");
|
|
||||||
printf("\t\t\t:");
|
|
||||||
print_int3(b1_neighbor);
|
|
||||||
printf("\n");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return AC_SUCCESS;
|
return AC_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1431,9 +1202,9 @@ acDeviceRunMPITest(void)
|
|||||||
|
|
||||||
printf("Decomposition: %d, %d, %d\n", decomposition.x, decomposition.y, decomposition.z);
|
printf("Decomposition: %d, %d, %d\n", decomposition.x, decomposition.y, decomposition.z);
|
||||||
printf("Process %d: (%d, %d, %d)\n", pid, pid3d.x, pid3d.y, pid3d.z);
|
printf("Process %d: (%d, %d, %d)\n", pid, pid3d.x, pid3d.y, pid3d.z);
|
||||||
ERRCHK(info.int_params[AC_nx] % decomposition.x == 0);
|
ERRCHK_ALWAYS(info.int_params[AC_nx] % decomposition.x == 0);
|
||||||
ERRCHK(info.int_params[AC_ny] % decomposition.y == 0);
|
ERRCHK_ALWAYS(info.int_params[AC_ny] % decomposition.y == 0);
|
||||||
ERRCHK(info.int_params[AC_nz] % decomposition.z == 0);
|
ERRCHK_ALWAYS(info.int_params[AC_nz] % decomposition.z == 0);
|
||||||
|
|
||||||
submesh_info.int_params[AC_nx] = info.int_params[AC_nx] / decomposition.x;
|
submesh_info.int_params[AC_nx] = info.int_params[AC_nx] / decomposition.x;
|
||||||
submesh_info.int_params[AC_ny] = info.int_params[AC_ny] / decomposition.y;
|
submesh_info.int_params[AC_ny] = info.int_params[AC_ny] / decomposition.y;
|
||||||
|
Reference in New Issue
Block a user