Improvements to samples

This commit is contained in:
jpekkila
2020-04-07 17:58:47 +03:00
parent 427a3ac5d8
commit fb41741d74
5 changed files with 79 additions and 23 deletions

View File

@@ -89,33 +89,45 @@ main(void)
}
}*/
/*
// Basic
const size_t num_iters = 100;
// Warmup
for (size_t i = 0; i < 10; ++i)
for (size_t i = 0; i < num_iters / 10; ++i)
acGridIntegrate(STREAM_DEFAULT, FLT_EPSILON);
// Benchmark
Timer t;
const AcReal dt = FLT_EPSILON;
const AcReal dt = FLT_EPSILON;
acGridSynchronizeStream(STREAM_ALL);
timer_reset(&t);
acGridSynchronizeStream(STREAM_ALL);
acGridSynchronizeStream(STREAM_ALL);
timer_reset(&t);
acGridSynchronizeStream(STREAM_ALL);
const size_t num_iters = 50;
for (size_t i = 0; i < num_iters; ++i)
acGridIntegrate(STREAM_DEFAULT, dt);
for (size_t i = 0; i < num_iters; ++i)
acGridIntegrate(STREAM_DEFAULT, dt);
acGridSynchronizeStream(STREAM_ALL);
if (!pid)
timer_diff_print(t);
acGridSynchronizeStream(STREAM_ALL);
/*
acGridSynchronizeStream(STREAM_ALL);
if (!pid)
timer_diff_print(t);
acGridSynchronizeStream(STREAM_ALL);
*/
// Percentiles
const size_t num_iters = 100;
const double nth_percentile = 0.90;
std::vector<double> results; // ms
results.reserve(num_iters);
// Warmup
for (size_t i = 0; i < num_iters / 10; ++i)
acGridIntegrate(STREAM_DEFAULT, FLT_EPSILON);
// Benchmark
Timer t;
const AcReal dt = FLT_EPSILON;
for (size_t i = 0; i < num_iters; ++i) {
acGridSynchronizeStream(STREAM_ALL);
timer_reset(&t);
@@ -123,9 +135,9 @@ main(void)
acGridIntegrate(STREAM_DEFAULT, dt);
acGridSynchronizeStream(STREAM_ALL);
results.push_back(timer_diff_nsec(t) / 1e6);
acGridSynchronizeStream(STREAM_ALL);
}
// Write benchmark to file
if (!pid) {
std::sort(results.begin(), results.end(),
[](const double& a, const double& b) { return a < b; });
@@ -149,7 +161,49 @@ main(void)
fprintf(fp, "%d, %g\n", nprocs, results[nth_percentile * num_iters]);
fclose(fp);
}*/
}
/*
const size_t num_iters = 100;
const double nth_percentile = 0.90;
std::vector<double> results; // ms
results.reserve(num_iters);
for (size_t i = 0; i < num_iters; ++i) {
acGridSynchronizeStream(STREAM_ALL);
timer_reset(&t);
acGridSynchronizeStream(STREAM_ALL);
acGridIntegrate(STREAM_DEFAULT, dt);
acGridSynchronizeStream(STREAM_ALL);
results.push_back(timer_diff_nsec(t) / 1e6);
}
// Write benchmark to file
if (!pid) {
std::sort(results.begin(), results.end(),
[](const double& a, const double& b) { return a < b; });
fprintf(stdout,
"Integration step time %g ms (%gth "
"percentile)--------------------------------------\n",
results[nth_percentile * num_iters], 100 * nth_percentile);
char path[4096] = "";
if (test == TEST_STRONG_SCALING)
strncpy(path, "strong_scaling.csv", sizeof(path));
else if (test == TEST_WEAK_SCALING)
strncpy(path, "weak_scaling.csv", sizeof(path));
else
ERROR("Invalid test type");
FILE* fp = fopen(path, "a");
ERRCHK_ALWAYS(fp);
// Format
// nprocs, measured (ms)
fprintf(fp, "%d, %g\n", nprocs, results[nth_percentile * num_iters]);
fclose(fp);
}*/
acGridQuit();
MPI_Finalize();

View File

@@ -5,5 +5,5 @@ find_package(OpenMP)
find_package(CUDAToolkit)
add_executable(bwtest main.c)
add_compile_options(-O3)
target_link_libraries(bwtest MPI::MPI_C OpenMP::OpenMP_C CUDA::cudart_static)
target_compile_options(bwtest PRIVATE -O3)

View File

@@ -303,6 +303,7 @@ main(void)
PRINT("Block size: %u MiB\n", BLOCK_SIZE / (1024 * 1024));
#if 0
{
uint8_t* src = allocHost(BLOCK_SIZE);
uint8_t* dst = allocHost(BLOCK_SIZE);
@@ -362,7 +363,7 @@ main(void)
freeDevice(dst);
}
PRINT("\n------------------------\n");
/*
#else
{ // Final run for easy identification with the profiler
uint8_t* src = allocDevice(BLOCK_SIZE);
uint8_t* dst = allocDevice(BLOCK_SIZE);
@@ -373,7 +374,7 @@ main(void)
freeDevice(src);
freeDevice(dst);
}
*/
#endif
MPI_Finalize();
return EXIT_SUCCESS;

View File

@@ -650,7 +650,7 @@ acPinPackedData(const Device device, const cudaStream_t stream, PackedData* ddat
static void
acUnpinPackedData(const Device device, const cudaStream_t stream, PackedData* ddata)
{
if (!ddata->pinned)
if (!ddata->pinned) // Unpin iff the data was pinned previously
return;
cudaSetDevice(device->id);
@@ -1362,7 +1362,8 @@ acTransferCommData(const Device device, //
const int npid = getPid(pid3d + neighbor, decomp);
cudaStreamSynchronize(data->streams[a_idx]);
MPI_Isend(src->data, count, datatype, npid, b_idx, MPI_COMM_WORLD,
MPI_Isend(src->data, count, datatype, npid, b_idx, MPI_COMM_WORLD,
&data->send_reqs[b_idx]);
}
}
}

View File

@@ -15,8 +15,8 @@ typedef struct {
#if (AC_MPI_ENABLED && AC_MPI_RT_PINNING)
AcReal* data_pinned;
bool pinned; // Set if data was received to pinned memory
#endif // (AC_MPI_ENABLED && AC_MPI_RT_PINNING)
bool pinned = false; // Set if data was received to pinned memory
#endif // (AC_MPI_ENABLED && AC_MPI_RT_PINNING)
#if (AC_MPI_ENABLED && AC_MPI_UNIDIRECTIONAL_COMM)
MPI_Win win; // MPI window for RMA