Proper decomposition when using Morton order to partition the computational domain

This commit is contained in:
jpekkila
2020-04-19 22:50:26 +03:00
parent ffb274e16f
commit 4dd825f574

View File

@@ -470,6 +470,38 @@ mod(const int a, const int b)
}
#include <stdint.h>
typedef struct {
uint64_t x, y, z;
} uint3_64;
static uint3_64
morton3D(const uint64_t pid)
{
uint64_t i, j, k;
i = j = k = 0;
for (int bit = 0; bit <= 21; ++bit) {
const uint64_t mask = 0x1l << 3 * bit;
i |= ((pid & (mask << 0)) >> 2 * bit) >> 0;
j |= ((pid & (mask << 1)) >> 2 * bit) >> 1;
k |= ((pid & (mask << 2)) >> 2 * bit) >> 2;
}
return (uint3_64){i, j, k};
}
static uint64_t
morton1D(const uint3_64 pid)
{
uint64_t i = 0;
for (int bit = 0; bit <= 21; ++bit) {
const uint64_t mask = 0x1l << bit;
i |= ((pid.x & mask) << 0) << 2 * bit;
i |= ((pid.y & mask) << 1) << 2 * bit;
i |= ((pid.z & mask) << 2) << 2 * bit;
}
return i;
}
int
getPid(int3 pid, const int3 decomposition)
{
@@ -538,6 +570,20 @@ onTheSameNode(const int pid_a, const int pid_b)
static int3
decompose(const int target)
{
// This is just so beautifully elegant. Complex and efficient decomposition
// in just one line of code.
uint3_64 p = morton3D(target - 1);
p = (uint3_64){p.x + 1, p.y + 1, p.z + 1};
if (p.x * p.y * p.z != target) {
fprintf(stderr, "Invalid number of processes! Cannot decompose the problem domain!\n");
fprintf(stderr, "Target nprocs: %d. Found: %d\n", target, p.x * p.y * p.z);
ERROR("Invalid nprocs");
return (int3){-1, -1, -1};
}
return (int3){p.x, p.y, p.z};
/*
if (target == 16)
return (int3){4, 2, 2};
if (target == 32)
@@ -565,6 +611,7 @@ decompose(const int target)
else {
return (int3){decomposition[0], decomposition[1], decomposition[2]};
}
*/
}
static PackedData