Merge branch 'master' into multigpu_optimization_2019-07-05
This commit is contained in:
@@ -152,11 +152,8 @@ globalFunction(void)
|
|||||||
|
|
||||||
Modules used when compiling the code on TIARA cluster.
|
Modules used when compiling the code on TIARA cluster.
|
||||||
|
|
||||||
* intel/2016
|
|
||||||
* hdf5/1.8.16_openmpi_1.10.2_ic16.0
|
|
||||||
* cmake/3.9.5
|
* cmake/3.9.5
|
||||||
* openmpi/1.10.2_ic16.0
|
* gcc/8.3.0
|
||||||
* gcc/5.3.0
|
|
||||||
* cuda/10.1
|
* cuda/10.1
|
||||||
|
|
||||||
|
|
||||||
|
@@ -96,6 +96,7 @@ printDeviceInfo(const Device device)
|
|||||||
printf(" Peak Memory Bandwidth (GiB/s): %f\n",
|
printf(" Peak Memory Bandwidth (GiB/s): %f\n",
|
||||||
2 * (props.memoryClockRate * 1e3) * props.memoryBusWidth / (8. * 1024. * 1024. * 1024.));
|
2 * (props.memoryClockRate * 1e3) * props.memoryBusWidth / (8. * 1024. * 1024. * 1024.));
|
||||||
printf(" ECC enabled: %d\n", props.ECCEnabled);
|
printf(" ECC enabled: %d\n", props.ECCEnabled);
|
||||||
|
|
||||||
// Memory usage
|
// Memory usage
|
||||||
size_t free_bytes, total_bytes;
|
size_t free_bytes, total_bytes;
|
||||||
cudaMemGetInfo(&free_bytes, &total_bytes);
|
cudaMemGetInfo(&free_bytes, &total_bytes);
|
||||||
@@ -108,8 +109,10 @@ printDeviceInfo(const Device device)
|
|||||||
printf(" Local L1 cache supported: %d\n", props.localL1CacheSupported);
|
printf(" Local L1 cache supported: %d\n", props.localL1CacheSupported);
|
||||||
printf(" Global L1 cache supported: %d\n", props.globalL1CacheSupported);
|
printf(" Global L1 cache supported: %d\n", props.globalL1CacheSupported);
|
||||||
printf(" L2 size: %d KiB\n", props.l2CacheSize / (1024));
|
printf(" L2 size: %d KiB\n", props.l2CacheSize / (1024));
|
||||||
printf(" Total const mem: %ld KiB\n", props.totalConstMem / (1024));
|
//MV: props.totalConstMem and props.sharedMemPerBlock cause assembler error
|
||||||
printf(" Shared mem per block: %ld KiB\n", props.sharedMemPerBlock / (1024));
|
//MV: while compiling in TIARA gp cluster. Therefore commeted out.
|
||||||
|
//!! printf(" Total const mem: %ld KiB\n", props.totalConstMem / (1024));
|
||||||
|
//!! printf(" Shared mem per block: %ld KiB\n", props.sharedMemPerBlock / (1024));
|
||||||
printf(" Other\n");
|
printf(" Other\n");
|
||||||
printf(" Warp size: %d\n", props.warpSize);
|
printf(" Warp size: %d\n", props.warpSize);
|
||||||
// printf(" Single to double perf. ratio: %dx\n",
|
// printf(" Single to double perf. ratio: %dx\n",
|
||||||
|
@@ -246,7 +246,7 @@ run_simulation(void)
|
|||||||
|
|
||||||
// Generate e for k. Needed for the sake of isotrophy.
|
// Generate e for k. Needed for the sake of isotrophy.
|
||||||
AcReal3 e_force;
|
AcReal3 e_force;
|
||||||
if ((k_force.y == 0.0) && (k_force.z == 0.0)) {
|
if ((k_force.y == AcReal(0.0)) && (k_force.z == AcReal(0.0))) {
|
||||||
e_force = (AcReal3){0.0, 1.0, 0.0};
|
e_force = (AcReal3){0.0, 1.0, 0.0};
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
Reference in New Issue
Block a user