Autoformatted

This commit is contained in:
jpekkila
2020-06-24 15:56:30 +03:00
parent 3c3b2a1885
commit 0d1c5b3911
5 changed files with 108 additions and 68 deletions

View File

@@ -6,15 +6,21 @@
#include <mpi.h>
#include <cuda_runtime_api.h>
#include <cuda.h> // CUDA driver API
#include <cuda_runtime_api.h>
#include "timer_hires.h" // From src/common
//#define BLOCK_SIZE (100 * 1024 * 1024) // Bytes
#define BLOCK_SIZE (256 * 256 * 3 * 8 * 8)
#define errchk(x) { if (!(x)) { fprintf(stderr, "errchk(%s) failed", #x); assert(x); }}
#define errchk(x) \
{ \
if (!(x)) { \
fprintf(stderr, "errchk(%s) failed", #x); \
assert(x); \
} \
}
/*
Findings:
@@ -57,17 +63,18 @@ allocDevice(const size_t bytes)
static uint8_t*
allocDevicePinned(const size_t bytes)
{
#define USE_CUDA_DRIVER_PINNING (1)
#if USE_CUDA_DRIVER_PINNING
#define USE_CUDA_DRIVER_PINNING (1)
#if USE_CUDA_DRIVER_PINNING
uint8_t* arr = allocDevice(bytes);
unsigned int flag = 1;
CUresult retval = cuPointerSetAttribute(&flag, CU_POINTER_ATTRIBUTE_SYNC_MEMOPS, (CUdeviceptr)arr);
CUresult retval = cuPointerSetAttribute(&flag, CU_POINTER_ATTRIBUTE_SYNC_MEMOPS,
(CUdeviceptr)arr);
errchk(retval == CUDA_SUCCESS);
return arr;
#else
#else
uint8_t* arr;
// Standard (20 GiB/s internode, 85 GiB/s intranode)
// const cudaError_t retval = cudaMalloc((void**)&arr, bytes);
@@ -77,7 +84,7 @@ allocDevicePinned(const size_t bytes)
const cudaError_t retval = cudaMallocHost((void**)&arr, bytes);
errchk(retval == cudaSuccess);
return arr;
#endif
#endif
}
/*
@@ -267,7 +274,6 @@ send_h2d(uint8_t* src, uint8_t* dst)
cudaMemcpy(dst, src, BLOCK_SIZE, cudaMemcpyHostToDevice);
}
static void
sendrecv_d2h2d(uint8_t* dsrc, uint8_t* hdst, uint8_t* hsrc, uint8_t* ddst)
{
@@ -327,10 +333,10 @@ measurebw(const char* msg, const size_t bytes, void (*sendrecv)(uint8_t*, uint8_
MPI_Barrier(MPI_COMM_WORLD);
}
static void
measurebw2(const char* msg, const size_t bytes, void (*sendrecv)(uint8_t*, uint8_t*, uint8_t*, uint8_t*), uint8_t* dsrc, uint8_t* hdst,
uint8_t* hsrc, uint8_t* ddst)
measurebw2(const char* msg, const size_t bytes,
void (*sendrecv)(uint8_t*, uint8_t*, uint8_t*, uint8_t*), uint8_t* dsrc, uint8_t* hdst,
uint8_t* hsrc, uint8_t* ddst)
{
const size_t num_samples = 100;
@@ -414,8 +420,8 @@ main(void)
measurebw("Bidirectional bandwidth, twoway (Host)", //
2 * BLOCK_SIZE, sendrecv_twoway, src, dst);
measurebw("Bidirectional bandwidth, async multiple (Host)", //
2 * (nprocs-1) * BLOCK_SIZE, sendrecv_nonblocking_multiple, src, dst);
//measurebw("Bidirectional bandwidth, async multiple parallel (Host)", //
2 * (nprocs - 1) * BLOCK_SIZE, sendrecv_nonblocking_multiple, src, dst);
// measurebw("Bidirectional bandwidth, async multiple parallel (Host)", //
// 2 * (nprocs-1) * BLOCK_SIZE, sendrecv_nonblocking_multiple_parallel, src, dst);
freeHost(src);
@@ -434,11 +440,12 @@ main(void)
measurebw("Bidirectional bandwidth, twoway (Device)", //
2 * BLOCK_SIZE, sendrecv_twoway, src, dst);
measurebw("Bidirectional bandwidth, async multiple (Device)", //
2 * (nprocs-1) *BLOCK_SIZE, sendrecv_nonblocking_multiple, src, dst);
//measurebw("Bidirectional bandwidth, async multiple parallel (Device)", //
2 * (nprocs - 1) * BLOCK_SIZE, sendrecv_nonblocking_multiple, src, dst);
// measurebw("Bidirectional bandwidth, async multiple parallel (Device)", //
// 2 * (nprocs-1) *BLOCK_SIZE, sendrecv_nonblocking_multiple_parallel, src, dst);
measurebw("Bidirectional bandwidth, async multiple (Device, rt pinning)", //
2 * (nprocs-1) *BLOCK_SIZE, sendrecv_nonblocking_multiple_rt_pinning, src, dst);
2 * (nprocs - 1) * BLOCK_SIZE, sendrecv_nonblocking_multiple_rt_pinning, src,
dst);
freeDevice(src);
freeDevice(dst);
@@ -456,7 +463,7 @@ main(void)
measurebw("Bidirectional bandwidth, twoway (Device, pinned)", //
2 * BLOCK_SIZE, sendrecv_twoway, src, dst);
measurebw("Bidirectional bandwidth, async multiple (Device, pinned)", //
2 * (nprocs-1) *BLOCK_SIZE, sendrecv_nonblocking_multiple, src, dst);
2 * (nprocs - 1) * BLOCK_SIZE, sendrecv_nonblocking_multiple, src, dst);
freeDevice(src);
freeDevice(dst);
@@ -472,7 +479,8 @@ main(void)
measurebw("Unidirectional D2H", BLOCK_SIZE, send_d2h, dsrc, hdst);
measurebw("Unidirectional H2D", BLOCK_SIZE, send_h2d, hsrc, ddst);
measurebw2("Bidirectional D2H & H2D", 2 * BLOCK_SIZE, sendrecv_d2h2d, dsrc, hdst, hsrc, ddst);
measurebw2("Bidirectional D2H & H2D", 2 * BLOCK_SIZE, sendrecv_d2h2d, dsrc, hdst, hsrc,
ddst);
freeDevice(dsrc);
freeDevice(ddst);
@@ -490,7 +498,8 @@ main(void)
measurebw("Unidirectional D2H (pinned)", BLOCK_SIZE, send_d2h, dsrc, hdst);
measurebw("Unidirectional H2D (pinned)", BLOCK_SIZE, send_h2d, hsrc, ddst);
measurebw2("Bidirectional D2H & H2D (pinned)", 2 * BLOCK_SIZE, sendrecv_d2h2d, dsrc, hdst, hsrc, ddst);
measurebw2("Bidirectional D2H & H2D (pinned)", 2 * BLOCK_SIZE, sendrecv_d2h2d, dsrc, hdst,
hsrc, ddst);
freeDevice(dsrc);
freeDevice(ddst);