Code cleanup
This commit is contained in:
@@ -784,7 +784,7 @@ acDeviceReduceVec(const Device device, const Stream stream, const ReductionType
|
|||||||
|
|
||||||
// Needs mx * my * NGHOST * NUM_VTXBUF_HANDLES threads
|
// Needs mx * my * NGHOST * NUM_VTXBUF_HANDLES threads
|
||||||
static __global__ void
|
static __global__ void
|
||||||
pack_data(VertexBufferArray vba, const int3 start, AcReal* buffer)
|
pack_data(const VertexBufferArray vba, const int3 src, AcReal* __restrict__ buffer)
|
||||||
{
|
{
|
||||||
const int3 m = (int3){
|
const int3 m = (int3){
|
||||||
DCONST(AC_mx),
|
DCONST(AC_mx),
|
||||||
@@ -797,7 +797,7 @@ pack_data(VertexBufferArray vba, const int3 start, AcReal* buffer)
|
|||||||
if (vertexIdx >= m.x * m.y * NGHOST * NUM_VTXBUF_HANDLES)
|
if (vertexIdx >= m.x * m.y * NGHOST * NUM_VTXBUF_HANDLES)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
const int vba_idx = IDX(start) + (vertexIdx % block_size);
|
const int vba_idx = IDX(src) + (vertexIdx % block_size);
|
||||||
const int vba_handle = vertexIdx / block_size;
|
const int vba_handle = vertexIdx / block_size;
|
||||||
|
|
||||||
const int buf_idx = vertexIdx;
|
const int buf_idx = vertexIdx;
|
||||||
@@ -806,7 +806,7 @@ pack_data(VertexBufferArray vba, const int3 start, AcReal* buffer)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static __global__ void
|
static __global__ void
|
||||||
unpack_data(VertexBufferArray vba, const int3 start, AcReal* buffer)
|
unpack_data(const AcReal* __restrict__ buffer, VertexBufferArray vba, const int3 dst)
|
||||||
{
|
{
|
||||||
const int3 m = (int3){
|
const int3 m = (int3){
|
||||||
DCONST(AC_mx),
|
DCONST(AC_mx),
|
||||||
@@ -819,7 +819,7 @@ unpack_data(VertexBufferArray vba, const int3 start, AcReal* buffer)
|
|||||||
if (vertexIdx >= m.x * m.y * NGHOST * NUM_VTXBUF_HANDLES)
|
if (vertexIdx >= m.x * m.y * NGHOST * NUM_VTXBUF_HANDLES)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
const int vba_idx = IDX(start) + (vertexIdx % block_size);
|
const int vba_idx = IDX(dst) + (vertexIdx % block_size);
|
||||||
const int vba_handle = vertexIdx / block_size;
|
const int vba_handle = vertexIdx / block_size;
|
||||||
|
|
||||||
const int buf_idx = vertexIdx;
|
const int buf_idx = vertexIdx;
|
||||||
@@ -1661,36 +1661,21 @@ acDeviceRunMPITest(void)
|
|||||||
MPI_Get_processor_name(processor_name, &name_len);
|
MPI_Get_processor_name(processor_name, &name_len);
|
||||||
printf("Processor %s. Process %d of %d.\n", processor_name, pid, num_processes);
|
printf("Processor %s. Process %d of %d.\n", processor_name, pid, num_processes);
|
||||||
|
|
||||||
// Check MPI support
|
#ifdef MPIX_CUDA_AWARE_SUPPORT
|
||||||
//// Borrowing start (from OpenMPI examples)
|
if (MPIX_Query_cuda_support())
|
||||||
#if defined(MPIX_CUDA_AWARE_SUPPORT) && MPIX_CUDA_AWARE_SUPPORT
|
printf("CUDA-aware MPI supported (MPIX)\n");
|
||||||
printf("This MPI library has CUDA-aware support.\n", MPIX_CUDA_AWARE_SUPPORT);
|
else
|
||||||
#elif defined(MPIX_CUDA_AWARE_SUPPORT) && !MPIX_CUDA_AWARE_SUPPORT
|
WARNING("CUDA-aware MPI not supported with this MPI library (MPIX)\n");
|
||||||
printf("This MPI library does not have CUDA-aware support.\n");
|
|
||||||
#else
|
#else
|
||||||
printf("This MPI library cannot determine if there is CUDA-aware support.\n");
|
printf("MPIX_CUDA_AWARE_SUPPORT was not defined. Do not know wheter CUDA-aware MPI is "
|
||||||
#endif /* MPIX_CUDA_AWARE_SUPPORT */
|
"supported\n");
|
||||||
|
#endif
|
||||||
|
|
||||||
printf("Run time check:\n");
|
if (getenv("MPICH_RDMA_ENABLED_CUDA") && atoi(getenv("MPICH_RDMA_ENABLED_CUDA")))
|
||||||
#if defined(MPIX_CUDA_AWARE_SUPPORT)
|
printf("CUDA-aware MPI supported (MPICH)\n");
|
||||||
if (1 == MPIX_Query_cuda_support()) {
|
else
|
||||||
printf("This MPI library has CUDA-aware support.\n");
|
WARNING("MPICH not used or this MPI library does not support CUDA-aware MPI\n");
|
||||||
}
|
|
||||||
else {
|
|
||||||
printf("This MPI library does not have CUDA-aware support.\n");
|
|
||||||
}
|
|
||||||
#else /* !defined(MPIX_CUDA_AWARE_SUPPORT) */
|
|
||||||
printf("This MPI library cannot determine if there is CUDA-aware support.\n");
|
|
||||||
#endif /* MPIX_CUDA_AWARE_SUPPORT */
|
|
||||||
//////// Borrowing end
|
|
||||||
|
|
||||||
int direct = getenv("MPICH_RDMA_ENABLED_CUDA") == NULL
|
|
||||||
? 0
|
|
||||||
: atoi(getenv("MPICH_RDMA_ENABLED_CUDA"));
|
|
||||||
if (direct != 1) {
|
|
||||||
printf("MPICH_RDMA_ENABLED_CUDA not enabled!\n");
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
// Create model and candidate meshes
|
// Create model and candidate meshes
|
||||||
AcMeshInfo info;
|
AcMeshInfo info;
|
||||||
acLoadConfig(AC_DEFAULT_CONFIG, &info);
|
acLoadConfig(AC_DEFAULT_CONFIG, &info);
|
||||||
@@ -1756,6 +1741,14 @@ acDeviceRunMPITest(void)
|
|||||||
printf("Time per step: %f ms\n", ms_elapsed / num_iters);
|
printf("Time per step: %f ms\n", ms_elapsed / num_iters);
|
||||||
}
|
}
|
||||||
////////////////////////////// Timer end
|
////////////////////////////// Timer end
|
||||||
|
const cudaStream_t default_stream = device->streams[STREAM_DEFAULT];
|
||||||
|
const int block_size = submesh_info.int_params[AC_mx] * submesh_info.int_params[AC_my] * NGHOST;
|
||||||
|
const dim3 tpb(256, 1, 1);
|
||||||
|
const dim3 bpg((uint)ceil((block_size * NUM_VTXBUF_HANDLES) / (float)tpb.x), 1, 1);
|
||||||
|
const int3 src = (int3){0, 0, 0}; // TODO
|
||||||
|
const int3 dst = (int3){0, 0, 0}; // TODO
|
||||||
|
pack_data<<<bpg, tpb, 0, default_stream>>>(device->vba, src, device->inner[0]);
|
||||||
|
unpack_data<<<bpg, tpb, 0, default_stream>>>(device->inner[0], device->vba, dst);
|
||||||
|
|
||||||
acDeviceStoreMesh(device, STREAM_DEFAULT, &submesh);
|
acDeviceStoreMesh(device, STREAM_DEFAULT, &submesh);
|
||||||
acDeviceDestroy(device);
|
acDeviceDestroy(device);
|
||||||
|
|||||||
Reference in New Issue
Block a user