Autoformatted
This commit is contained in:
@@ -6,15 +6,21 @@
|
||||
|
||||
#include <mpi.h>
|
||||
|
||||
#include <cuda_runtime_api.h>
|
||||
#include <cuda.h> // CUDA driver API
|
||||
#include <cuda_runtime_api.h>
|
||||
|
||||
#include "timer_hires.h" // From src/common
|
||||
|
||||
//#define BLOCK_SIZE (100 * 1024 * 1024) // Bytes
|
||||
#define BLOCK_SIZE (256 * 256 * 3 * 8 * 8)
|
||||
|
||||
#define errchk(x) { if (!(x)) { fprintf(stderr, "errchk(%s) failed", #x); assert(x); }}
|
||||
#define errchk(x) \
|
||||
{ \
|
||||
if (!(x)) { \
|
||||
fprintf(stderr, "errchk(%s) failed", #x); \
|
||||
assert(x); \
|
||||
} \
|
||||
}
|
||||
|
||||
/*
|
||||
Findings:
|
||||
@@ -57,17 +63,18 @@ allocDevice(const size_t bytes)
|
||||
static uint8_t*
|
||||
allocDevicePinned(const size_t bytes)
|
||||
{
|
||||
#define USE_CUDA_DRIVER_PINNING (1)
|
||||
#if USE_CUDA_DRIVER_PINNING
|
||||
#define USE_CUDA_DRIVER_PINNING (1)
|
||||
#if USE_CUDA_DRIVER_PINNING
|
||||
uint8_t* arr = allocDevice(bytes);
|
||||
|
||||
unsigned int flag = 1;
|
||||
CUresult retval = cuPointerSetAttribute(&flag, CU_POINTER_ATTRIBUTE_SYNC_MEMOPS, (CUdeviceptr)arr);
|
||||
CUresult retval = cuPointerSetAttribute(&flag, CU_POINTER_ATTRIBUTE_SYNC_MEMOPS,
|
||||
(CUdeviceptr)arr);
|
||||
|
||||
errchk(retval == CUDA_SUCCESS);
|
||||
return arr;
|
||||
|
||||
#else
|
||||
#else
|
||||
uint8_t* arr;
|
||||
// Standard (20 GiB/s internode, 85 GiB/s intranode)
|
||||
// const cudaError_t retval = cudaMalloc((void**)&arr, bytes);
|
||||
@@ -77,7 +84,7 @@ allocDevicePinned(const size_t bytes)
|
||||
const cudaError_t retval = cudaMallocHost((void**)&arr, bytes);
|
||||
errchk(retval == cudaSuccess);
|
||||
return arr;
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -267,7 +274,6 @@ send_h2d(uint8_t* src, uint8_t* dst)
|
||||
cudaMemcpy(dst, src, BLOCK_SIZE, cudaMemcpyHostToDevice);
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
sendrecv_d2h2d(uint8_t* dsrc, uint8_t* hdst, uint8_t* hsrc, uint8_t* ddst)
|
||||
{
|
||||
@@ -327,10 +333,10 @@ measurebw(const char* msg, const size_t bytes, void (*sendrecv)(uint8_t*, uint8_
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
measurebw2(const char* msg, const size_t bytes, void (*sendrecv)(uint8_t*, uint8_t*, uint8_t*, uint8_t*), uint8_t* dsrc, uint8_t* hdst,
|
||||
uint8_t* hsrc, uint8_t* ddst)
|
||||
measurebw2(const char* msg, const size_t bytes,
|
||||
void (*sendrecv)(uint8_t*, uint8_t*, uint8_t*, uint8_t*), uint8_t* dsrc, uint8_t* hdst,
|
||||
uint8_t* hsrc, uint8_t* ddst)
|
||||
{
|
||||
const size_t num_samples = 100;
|
||||
|
||||
@@ -414,8 +420,8 @@ main(void)
|
||||
measurebw("Bidirectional bandwidth, twoway (Host)", //
|
||||
2 * BLOCK_SIZE, sendrecv_twoway, src, dst);
|
||||
measurebw("Bidirectional bandwidth, async multiple (Host)", //
|
||||
2 * (nprocs-1) * BLOCK_SIZE, sendrecv_nonblocking_multiple, src, dst);
|
||||
//measurebw("Bidirectional bandwidth, async multiple parallel (Host)", //
|
||||
2 * (nprocs - 1) * BLOCK_SIZE, sendrecv_nonblocking_multiple, src, dst);
|
||||
// measurebw("Bidirectional bandwidth, async multiple parallel (Host)", //
|
||||
// 2 * (nprocs-1) * BLOCK_SIZE, sendrecv_nonblocking_multiple_parallel, src, dst);
|
||||
|
||||
freeHost(src);
|
||||
@@ -434,11 +440,12 @@ main(void)
|
||||
measurebw("Bidirectional bandwidth, twoway (Device)", //
|
||||
2 * BLOCK_SIZE, sendrecv_twoway, src, dst);
|
||||
measurebw("Bidirectional bandwidth, async multiple (Device)", //
|
||||
2 * (nprocs-1) *BLOCK_SIZE, sendrecv_nonblocking_multiple, src, dst);
|
||||
//measurebw("Bidirectional bandwidth, async multiple parallel (Device)", //
|
||||
2 * (nprocs - 1) * BLOCK_SIZE, sendrecv_nonblocking_multiple, src, dst);
|
||||
// measurebw("Bidirectional bandwidth, async multiple parallel (Device)", //
|
||||
// 2 * (nprocs-1) *BLOCK_SIZE, sendrecv_nonblocking_multiple_parallel, src, dst);
|
||||
measurebw("Bidirectional bandwidth, async multiple (Device, rt pinning)", //
|
||||
2 * (nprocs-1) *BLOCK_SIZE, sendrecv_nonblocking_multiple_rt_pinning, src, dst);
|
||||
2 * (nprocs - 1) * BLOCK_SIZE, sendrecv_nonblocking_multiple_rt_pinning, src,
|
||||
dst);
|
||||
|
||||
freeDevice(src);
|
||||
freeDevice(dst);
|
||||
@@ -456,7 +463,7 @@ main(void)
|
||||
measurebw("Bidirectional bandwidth, twoway (Device, pinned)", //
|
||||
2 * BLOCK_SIZE, sendrecv_twoway, src, dst);
|
||||
measurebw("Bidirectional bandwidth, async multiple (Device, pinned)", //
|
||||
2 * (nprocs-1) *BLOCK_SIZE, sendrecv_nonblocking_multiple, src, dst);
|
||||
2 * (nprocs - 1) * BLOCK_SIZE, sendrecv_nonblocking_multiple, src, dst);
|
||||
|
||||
freeDevice(src);
|
||||
freeDevice(dst);
|
||||
@@ -472,7 +479,8 @@ main(void)
|
||||
measurebw("Unidirectional D2H", BLOCK_SIZE, send_d2h, dsrc, hdst);
|
||||
measurebw("Unidirectional H2D", BLOCK_SIZE, send_h2d, hsrc, ddst);
|
||||
|
||||
measurebw2("Bidirectional D2H & H2D", 2 * BLOCK_SIZE, sendrecv_d2h2d, dsrc, hdst, hsrc, ddst);
|
||||
measurebw2("Bidirectional D2H & H2D", 2 * BLOCK_SIZE, sendrecv_d2h2d, dsrc, hdst, hsrc,
|
||||
ddst);
|
||||
|
||||
freeDevice(dsrc);
|
||||
freeDevice(ddst);
|
||||
@@ -490,7 +498,8 @@ main(void)
|
||||
measurebw("Unidirectional D2H (pinned)", BLOCK_SIZE, send_d2h, dsrc, hdst);
|
||||
measurebw("Unidirectional H2D (pinned)", BLOCK_SIZE, send_h2d, hsrc, ddst);
|
||||
|
||||
measurebw2("Bidirectional D2H & H2D (pinned)", 2 * BLOCK_SIZE, sendrecv_d2h2d, dsrc, hdst, hsrc, ddst);
|
||||
measurebw2("Bidirectional D2H & H2D (pinned)", 2 * BLOCK_SIZE, sendrecv_d2h2d, dsrc, hdst,
|
||||
hsrc, ddst);
|
||||
|
||||
freeDevice(dsrc);
|
||||
freeDevice(ddst);
|
||||
|
Reference in New Issue
Block a user