From b6d120bf290bf197b61969f00ec248514a2ee378 Mon Sep 17 00:00:00 2001 From: Carl William Pearson Date: Wed, 2 Jun 2021 10:57:47 -0600 Subject: [PATCH] add persistent ping-pong --- .gitignore | 1 + Makefile | 5 +- README.md | 5 ++ ascicgpu030.sh | 9 +++ main-wrapper.sh | 5 ++ persistent.cpp | 156 ++++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 180 insertions(+), 1 deletion(-) create mode 100755 ascicgpu030.sh create mode 100755 main-wrapper.sh create mode 100644 persistent.cpp diff --git a/.gitignore b/.gitignore index b6b0fcc..526ac83 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ one-sided main +persistent diff --git a/Makefile b/Makefile index bc86c61..11f9cdb 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -TARGETS = main one-sided +TARGETS = main one-sided persistent all: ${TARGETS} @@ -13,4 +13,7 @@ main: main.cpp Makefile $(MPICXX) $(CXXFLAGS) $< -o $@ one-sided: one_sided.cpp Makefile + $(MPICXX) $(CXXFLAGS) $< -o $@ + +persistent: persistent.cpp Makefile $(MPICXX) $(CXXFLAGS) $< -o $@ \ No newline at end of file diff --git a/README.md b/README.md index d843380..fa3a85c 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,11 @@ If any tests fails, you can re-run them individually. Execute any binary you want using `mpirun`, or whatever is appropriate for your platform. +## Run Microbenchmarks + +- `persistent` (`persistent.cpp`) ping-pong time for persistent communication. + + ## Notes on specific platforms Some Open MPIs use `long long` for their datatypes, which means we can't support ANSI C++ (`-ansi`). \ No newline at end of file diff --git a/ascicgpu030.sh b/ascicgpu030.sh new file mode 100755 index 0000000..5209267 --- /dev/null +++ b/ascicgpu030.sh @@ -0,0 +1,9 @@ +#! /bin/bash + +yes | module clear -s +. $HOME/repos/Trilinos/cmake/std/atdm/load-env.sh Volta70-cuda-static-opt-rdc +module unload sems-openmpi +module load sems-openmpi/4.0.5 # needed for the --host syntax in mpirun + +mpirun -np 8 -host ascicgpu030:4,ascicgpu032:4 \ +./main-wrapper.sh \ No newline at end of file diff --git a/main-wrapper.sh b/main-wrapper.sh new file mode 100755 index 0000000..0ac4b40 --- /dev/null +++ b/main-wrapper.sh @@ -0,0 +1,5 @@ +#! /bin/bash + +which mpirun + +./main \ No newline at end of file diff --git a/persistent.cpp b/persistent.cpp new file mode 100644 index 0000000..6f149d6 --- /dev/null +++ b/persistent.cpp @@ -0,0 +1,156 @@ +#include +#include +#include + +#include +#include + +const float sample_target = 200e-6; + +struct Sample { + double raw; + double norm; +}; + +static Sample get_sample(int perSample, MPI_Request *sreq, MPI_Request *rreq, int rank, MPI_Comm comm) { + Sample sample; + MPI_Barrier(comm); + double start = MPI_Wtime(); + for (int i = 0; i < perSample; ++i) { + if (0 == rank) { + MPI_Start(sreq); + MPI_Wait(sreq, MPI_STATUS_IGNORE); + MPI_Start(rreq); + MPI_Wait(rreq, MPI_STATUS_IGNORE); + } else if (1 == rank) { + MPI_Start(rreq); + MPI_Wait(rreq, MPI_STATUS_IGNORE); + MPI_Start(sreq); + MPI_Wait(sreq, MPI_STATUS_IGNORE); + } + } + double stop = MPI_Wtime(); + sample.raw = stop-start; + sample.norm = sample.raw / perSample; + return sample; +} + +int main(int argc, char **argv) { + // Initialize the MPI environment + MPI_Init(&argc, &argv); + + // Get the number of processes + int size, rank; + MPI_Comm_size(MPI_COMM_WORLD, &size); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + + if (size < 2) { + printf("need at least 2 ranks!\n"); + exit(1); + } + + int src = (rank + 1) % 2; + int dst = (rank + 1) % 2; + int tag = 0; + MPI_Request sreq, rreq; + int numIters = 100; + + + std::vector sweep{ + 1, + 64, + 128, + 256, + 512, + 1 * 1024, + 2 * 1024, + 4 * 1024, + 8 * 1024, + 16 * 1024, + 32 * 1024, + 64 * 1024, + 128 * 1024, + 256 * 1024, + 512 * 1024, + 1 * 1024 * 1024, + 2 * 1024 * 1024, + 4 * 1024 * 1024, + 8 * 1024 * 1024, + 16 * 1024 * 1024, + 32 * 1024 * 1024, + 64 * 1024 * 1024, + 128 * 1024 * 1024, + 256 * 1024 * 1024, + }; + + if (0 == rank) { + printf("bytes,min,max,avg,med\n"); + } + + for (size_t bytes : sweep) { + std::vector samples(numIters); + char *buf = new char[bytes]; + if (mlock(buf, bytes)) { + perror("error locking memory"); + } + + MPI_Send_init(buf, bytes, MPI_BYTE, dst, tag, MPI_COMM_WORLD, &sreq); + MPI_Recv_init(buf, bytes, MPI_BYTE, src, tag, MPI_COMM_WORLD, &rreq); + + // try to reach 200us / sample + int perSample = 1; + for (int i = 0; i < 10; ++i) { + double sample = get_sample(perSample, &sreq, &rreq, rank, MPI_COMM_WORLD).raw; + // estimate number of measurements per sample + int guess = sample_target / sample + /*rounding*/0.5; + // close half the distance to this estimate + perSample += (guess - perSample) * 0.5; + if (perSample < 1) perSample = 1; + MPI_Bcast(&perSample, 1, MPI_INT, 0, MPI_COMM_WORLD); + } + + if (0 == rank) { + fprintf(stderr, "sample averaged over %d iterations\n", perSample); + } + + for (int i = 0; i < numIters; ++i) { + samples[i] = get_sample(perSample, &sreq, &rreq, rank, MPI_COMM_WORLD).norm; + } + + // each sample is the max time observed + MPI_Allreduce(MPI_IN_PLACE, samples.data(), numIters, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); + + // bubble sort + bool changed = true; + while (changed) { + changed = false; + for (int i = 0; i < numIters - 1; ++i) { + if (samples[i] > samples[i+1]) { + double tmp = samples[i+1]; + samples[i+1] = samples[i]; + samples[i] = tmp; + changed = true; + } + } + } + + // average + double avg = 0; + for (int i = 0; i < numIters; ++i) { + avg += samples[i]; + } + avg /= numIters; + + if (0 == rank) { + printf("%lu,%e,%e,%e,%e\n", bytes, samples[0], samples[numIters-1], avg, samples[numIters/2]); + } + + if (munlock(buf, bytes)) { + perror("error unlocking memory"); + } + delete[] buf; + } + + MPI_Finalize(); + return 0; +}