This commit is contained in:
Johannes Pekkila
2019-11-27 08:55:23 +01:00
10 changed files with 186 additions and 1153 deletions

View File

@@ -799,7 +799,7 @@ mod(const int a, const int b)
return r < 0 ? r + b : r;
}
static int
static inline int
get_neighbor(const int3 offset)
{
// The number of nodes is n^3 = m = num_processes
@@ -1246,7 +1246,9 @@ acDeviceRunMPITest(void)
////////////////////////////////////////////////////////////////////////////////////////////////
Device device;
acDeviceCreate(0, submesh_info, &device);
int devices_per_node = -1;
cudaGetDeviceCount(&devices_per_node);
acDeviceCreate(pid % devices_per_node, submesh_info, &device);
acDeviceLoadMesh(device, STREAM_DEFAULT, submesh);
// Warmup

View File

@@ -40,6 +40,12 @@
#include <math.h>
#include <vector>
static bool
smaller_than(const double& a, const double& b)
{
return a < b;
}
int
run_benchmark(const char* config_path)
{
@@ -59,21 +65,37 @@ run_benchmark(const char* config_path)
acInit(mesh_info);
acLoad(*mesh);
std::vector<double> results;
results.reserve(num_iters);
// Warmup
for (int i = 0; i < 10; ++i) {
acIntegrate(0);
}
acSynchronize();
const AcReal dt = FLT_EPSILON;
Timer total_time;
timer_reset(&total_time);
Timer step_time;
for (int i = 0; i < num_iters; ++i) {
const AcReal dt = FLT_EPSILON;
timer_reset(&step_time);
acIntegrate(dt);
acSynchronize();
results.push_back(timer_diff_nsec(step_time) / 1e6);
}
acSynchronizeStream(STREAM_ALL);
const double ms_elapsed = timer_diff_nsec(total_time) / 1e6;
acSynchronize();
const double ms_elapsed = timer_diff_nsec(total_time) / 1e6;
const double nth_percentile = 0.95;
std::sort(results.begin(), results.end(), smaller_than);
printf("vertices: %d^3, iterations: %d\n", nn, num_iters);
printf("Total time: %f ms\n", ms_elapsed);
printf("%dth percentile per step: %f ms\n", int(100 * nth_percentile),
results[int(nth_percentile * num_iters)]);
acQuit();
acmesh_destroy(mesh);