diff --git a/include/astaroth.h b/include/astaroth.h index f3c0fb0..7e8a3a8 100644 --- a/include/astaroth.h +++ b/include/astaroth.h @@ -55,7 +55,6 @@ extern "C" { #define REGISTERS_PER_THREAD (255) #define MAX_REGISTERS_PER_BLOCK (65536) #define MAX_THREADS_PER_BLOCK (1024) -#define NUM_ITERATIONS (10) #define WARP_SIZE (32) /* * ============================================================================= diff --git a/src/core/device.cu b/src/core/device.cu index ae0a3f4..11a890d 100644 --- a/src/core/device.cu +++ b/src/core/device.cu @@ -466,7 +466,8 @@ autoOptimize(const Device device) cudaEventRecord(tstart); // ---------------------------------------- Timing start - for (int i = 0; i < NUM_ITERATIONS; ++i) + const int num_iterations = 10; + for (int i = 0; i < num_iterations; ++i) solve<2><<>>(start, end, device->vba, FLT_EPSILON); cudaEventRecord(tstop); // ----------------------------------------- Timing end @@ -483,8 +484,9 @@ autoOptimize(const Device device) } } #if VERBOSE_PRINTING - printf("Auto-optimization done. The best threadblock dimensions for rkStep: (%d, %d, %d) %f ms\n", best_dims.x, best_dims.y, best_dims.z, - double(best_time) / NUM_ITERATIONS); + printf( + "Auto-optimization done. The best threadblock dimensions for rkStep: (%d, %d, %d) %f ms\n", + best_dims.x, best_dims.y, best_dims.z, double(best_time) / num_iterations); #endif /* FILE* fp = fopen("../config/rk3_tbdims.cuh", "w");