Added very experimental implementation for mixed precision. Comm is done with f32 and comp with f64.
This commit is contained in:
@@ -14,7 +14,7 @@
|
|||||||
#define MPI_DECOMPOSITION_AXES (3)
|
#define MPI_DECOMPOSITION_AXES (3)
|
||||||
#define MPI_COMPUTE_ENABLED (1)
|
#define MPI_COMPUTE_ENABLED (1)
|
||||||
#define MPI_COMM_ENABLED (1)
|
#define MPI_COMM_ENABLED (1)
|
||||||
#define MPI_INCL_CORNERS (1)
|
#define MPI_INCL_CORNERS (0)
|
||||||
#define MPI_USE_PINNED (0) // Do inter-node comm with pinned memory
|
#define MPI_USE_PINNED (0) // Do inter-node comm with pinned memory
|
||||||
#define MPI_USE_CUDA_DRIVER_PINNING (0) // Pin with cuPointerSetAttribute, otherwise cudaMallocHost
|
#define MPI_USE_CUDA_DRIVER_PINNING (0) // Pin with cuPointerSetAttribute, otherwise cudaMallocHost
|
||||||
|
|
||||||
@@ -721,7 +721,7 @@ acCreatePackedDataHost(const int3 dims)
|
|||||||
data.dims = dims;
|
data.dims = dims;
|
||||||
|
|
||||||
const size_t bytes = dims.x * dims.y * dims.z * sizeof(data.data[0]) * NUM_VTXBUF_HANDLES;
|
const size_t bytes = dims.x * dims.y * dims.z * sizeof(data.data[0]) * NUM_VTXBUF_HANDLES;
|
||||||
data.data = (AcReal*)malloc(bytes);
|
data.data = (AcRealPacked*)malloc(bytes);
|
||||||
ERRCHK_ALWAYS(data.data);
|
ERRCHK_ALWAYS(data.data);
|
||||||
|
|
||||||
return data;
|
return data;
|
||||||
@@ -1132,8 +1132,13 @@ acTransferCommData(const Device device, //
|
|||||||
cudaSetDevice(device->id);
|
cudaSetDevice(device->id);
|
||||||
|
|
||||||
MPI_Datatype datatype = MPI_FLOAT;
|
MPI_Datatype datatype = MPI_FLOAT;
|
||||||
if (sizeof(AcReal) == 8)
|
if (sizeof(data->srcs[0].data[0]) == 2) {
|
||||||
|
datatype = MPI_SHORT; // TODO CONFIRM THAT IS CORRECTLY CAST TO HALF
|
||||||
|
} else if (sizeof(data->srcs[0].data[0]) == 4) {
|
||||||
|
datatype = MPI_FLOAT;
|
||||||
|
} else {
|
||||||
datatype = MPI_DOUBLE;
|
datatype = MPI_DOUBLE;
|
||||||
|
}
|
||||||
|
|
||||||
int nprocs, pid;
|
int nprocs, pid;
|
||||||
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
|
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
|
||||||
|
@@ -8,11 +8,13 @@
|
|||||||
#define MPI_GPUDIRECT_DISABLED (0)
|
#define MPI_GPUDIRECT_DISABLED (0)
|
||||||
#endif // AC_MPI_ENABLED
|
#endif // AC_MPI_ENABLED
|
||||||
|
|
||||||
|
typedef float AcRealPacked;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
int3 dims;
|
int3 dims;
|
||||||
AcReal* data;
|
AcRealPacked* data;
|
||||||
|
|
||||||
AcReal* data_pinned;
|
AcRealPacked* data_pinned;
|
||||||
bool pinned = false; // Set if data was received to pinned memory
|
bool pinned = false; // Set if data was received to pinned memory
|
||||||
} PackedData;
|
} PackedData;
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user