Merge branch 'master' into multigpu_optimization_2019-07-05

This commit is contained in:
jpekkila
2019-07-08 16:11:24 +03:00
3 changed files with 7 additions and 7 deletions

View File

@@ -152,11 +152,8 @@ globalFunction(void)
Modules used when compiling the code on TIARA cluster.
* intel/2016
* hdf5/1.8.16_openmpi_1.10.2_ic16.0
* cmake/3.9.5
* openmpi/1.10.2_ic16.0
* gcc/5.3.0
* gcc/8.3.0
* cuda/10.1

View File

@@ -96,6 +96,7 @@ printDeviceInfo(const Device device)
printf(" Peak Memory Bandwidth (GiB/s): %f\n",
2 * (props.memoryClockRate * 1e3) * props.memoryBusWidth / (8. * 1024. * 1024. * 1024.));
printf(" ECC enabled: %d\n", props.ECCEnabled);
// Memory usage
size_t free_bytes, total_bytes;
cudaMemGetInfo(&free_bytes, &total_bytes);
@@ -108,8 +109,10 @@ printDeviceInfo(const Device device)
printf(" Local L1 cache supported: %d\n", props.localL1CacheSupported);
printf(" Global L1 cache supported: %d\n", props.globalL1CacheSupported);
printf(" L2 size: %d KiB\n", props.l2CacheSize / (1024));
printf(" Total const mem: %ld KiB\n", props.totalConstMem / (1024));
printf(" Shared mem per block: %ld KiB\n", props.sharedMemPerBlock / (1024));
//MV: props.totalConstMem and props.sharedMemPerBlock cause assembler error
//MV: while compiling in TIARA gp cluster. Therefore commeted out.
//!! printf(" Total const mem: %ld KiB\n", props.totalConstMem / (1024));
//!! printf(" Shared mem per block: %ld KiB\n", props.sharedMemPerBlock / (1024));
printf(" Other\n");
printf(" Warp size: %d\n", props.warpSize);
// printf(" Single to double perf. ratio: %dx\n",

View File

@@ -246,7 +246,7 @@ run_simulation(void)
// Generate e for k. Needed for the sake of isotrophy.
AcReal3 e_force;
if ((k_force.y == 0.0) && (k_force.z == 0.0)) {
if ((k_force.y == AcReal(0.0)) && (k_force.z == AcReal(0.0))) {
e_force = (AcReal3){0.0, 1.0, 0.0};
}
else {