Merge branch 'master' into alt_bcond_2020_09
This commit is contained in:
@@ -107,7 +107,7 @@ main(int argc, char** argv)
|
||||
}
|
||||
}
|
||||
|
||||
const TestType test = TEST_STRONG_SCALING;
|
||||
const TestType test = TEST_WEAK_SCALING;
|
||||
if (test == TEST_WEAK_SCALING) {
|
||||
uint3_64 decomp = decompose(nprocs);
|
||||
info.int_params[AC_nx] *= decomp.x;
|
||||
@@ -126,10 +126,15 @@ main(int argc, char** argv)
|
||||
|
||||
// GPU alloc & compute
|
||||
acGridInit(info);
|
||||
acGridRandomize();
|
||||
|
||||
/*
|
||||
AcMesh model;
|
||||
acMeshCreate(info, &model);
|
||||
acMeshRandomize(&model);
|
||||
acGridLoadMesh(STREAM_DEFAULT, model);
|
||||
*/
|
||||
|
||||
/*
|
||||
acGridLoadMesh(STREAM_DEFAULT, model);
|
||||
|
||||
@@ -154,7 +159,7 @@ main(int argc, char** argv)
|
||||
}*/
|
||||
|
||||
// Percentiles
|
||||
const size_t num_iters = 1000;
|
||||
const size_t num_iters = 100;
|
||||
const double nth_percentile = 0.90;
|
||||
std::vector<double> results; // ms
|
||||
results.reserve(num_iters);
|
||||
|
@@ -17,42 +17,48 @@ main(void)
|
||||
|
||||
// Boilerplate
|
||||
fprintf(fp, "#!/bin/bash\n");
|
||||
fprintf(fp, "#BATCH --job-name=astaroth\n");
|
||||
fprintf(fp, "#SBATCH --account=project_2000403\n");
|
||||
fprintf(fp, "#SBATCH --time=03:00:00\n");
|
||||
fprintf(fp, "#SBATCH --mem=32000\n");
|
||||
fprintf(fp, "#SBATCH --partition=gpu\n");
|
||||
fprintf(fp, "#BATCH --job-name=astaroth\n"); // OK
|
||||
fprintf(fp, "#SBATCH --account=project_2000403\n"); // OK
|
||||
fprintf(fp, "#SBATCH --time=04:00:00\n"); // OK
|
||||
fprintf(fp, "#SBATCH --mem=0\n"); // OK
|
||||
fprintf(fp, "#SBATCH --partition=gpu\n"); // OK
|
||||
fprintf(fp, "#SBATCH --exclusive\n"); // OK
|
||||
fprintf(fp, "#SBATCH --cpus-per-task=10\n"); // OK
|
||||
fprintf(fp, "#SBATCH --output=benchmark-%d-%%j.out\n", nprocs);
|
||||
// HACK: exclude misconfigured nodes on Puhti
|
||||
fprintf(fp, "#SBATCH -x "
|
||||
"r04g[05-06],r02g02,r14g04,r04g07,r16g07,r18g[02-03],r15g08,r17g06,r13g04\n");
|
||||
// fprintf(fp, "#SBATCH --cpus-per-task=10\n");
|
||||
|
||||
// nprocs, nodes, gpus
|
||||
const int max_gpus_per_node = 4;
|
||||
const int gpus_per_node = nprocs < max_gpus_per_node ? nprocs : max_gpus_per_node;
|
||||
const int nodes = (int)ceil((double)nprocs / max_gpus_per_node);
|
||||
fprintf(fp, "#SBATCH --gres=gpu:v100:%d\n", gpus_per_node);
|
||||
fprintf(fp, "#SBATCH -n %d\n", nprocs);
|
||||
fprintf(fp, "#SBATCH -N %d\n", nodes);
|
||||
fprintf(fp, "#SBATCH --gres=gpu:v100:%d\n", gpus_per_node); // OK
|
||||
fprintf(fp, "#SBATCH -n %d\n", nprocs); // OK
|
||||
fprintf(fp, "#SBATCH -N %d\n", nodes); // OK
|
||||
// fprintf(fp, "#SBATCH --exclusive\n");
|
||||
if (nprocs >= 4)
|
||||
fprintf(fp, "#SBATCH --ntasks-per-socket=2\n");
|
||||
// if (nprocs >= 4)
|
||||
// fprintf(fp, "#SBATCH --ntasks-per-socket=2\n");
|
||||
|
||||
// Modules
|
||||
// OpenMPI
|
||||
fprintf(fp, "module load gcc/8.3.0 cuda/10.1.168 cmake openmpi/4.0.3-cuda nccl\n");
|
||||
//fprintf(fp, "export UCX_TLS=rc,sm,cuda_copy,gdr_copy,cuda_ipc\n"); // https://www.open-mpi.org/fa
|
||||
//fprintf(fp, "export PSM2_CUDA=1\nexport PSM2_GPUDIRECT=1\n");
|
||||
//if (nprocs >= 32)
|
||||
// fprintf(fp, "export UCX_TLS=ud_x,cuda_copy,gdr_copy,cuda_ipc\n"); // https://www.open-mpi.org/fa
|
||||
// fprintf(fp, "export UCX_TLS=rc,sm,cuda_copy,gdr_copy,cuda_ipc\n"); //
|
||||
// https://www.open-mpi.org/fa fprintf(fp, "export PSM2_CUDA=1\nexport PSM2_GPUDIRECT=1\n");
|
||||
// if (nprocs >= 32)
|
||||
// fprintf(fp, "export UCX_TLS=ud_x,cuda_copy,gdr_copy,cuda_ipc\n"); //
|
||||
// https://www.open-mpi.org/fa
|
||||
|
||||
// HPCX
|
||||
//fprintf(fp, "module load gcc/8.3.0 cuda/10.1.168 cmake hpcx-mpi/2.5.0-cuda nccl\n");
|
||||
//fprintf(fp, "export UCX_MEMTYPE_CACHE=n\n"); // Workaround for bug in hpcx-mpi/2.5.0
|
||||
// fprintf(fp, "module load gcc/8.3.0 cuda/10.1.168 cmake hpcx-mpi/2.5.0-cuda nccl\n");
|
||||
// fprintf(fp, "export UCX_MEMTYPE_CACHE=n\n"); // Workaround for bug in hpcx-mpi/2.5.0
|
||||
|
||||
// Profile and run
|
||||
// fprintf(fp, "mkdir -p profile_%d\n", nprocs);
|
||||
|
||||
/*
|
||||
const int nx = 256; // max size 1792;
|
||||
const int nx = 256; // max size 2048;
|
||||
const int ny = nx;
|
||||
const int nz = nx;
|
||||
|
||||
@@ -67,11 +73,11 @@ main(void)
|
||||
"benchmark_decomp_1D", "benchmark_decomp_2D", "benchmark_decomp_3D",
|
||||
"benchmark_decomp_1D_comm", "benchmark_decomp_2D_comm", "benchmark_decomp_3D_comm",
|
||||
"benchmark_meshsize_256", "benchmark_meshsize_512", "benchmark_meshsize_1024",
|
||||
"benchmark_meshsize_1792", "benchmark_stencilord_2", "benchmark_stencilord_4",
|
||||
"benchmark_meshsize_2048", "benchmark_stencilord_2", "benchmark_stencilord_4",
|
||||
"benchmark_stencilord_6", "benchmark_stencilord_8", "benchmark_timings_control",
|
||||
"benchmark_timings_comp", "benchmark_timings_comm", "benchmark_timings_default",
|
||||
"benchmark_timings_corners", "benchmark_weak_128", "benchmark_weak_256",
|
||||
"benchmark_weak_448",
|
||||
"benchmark_weak_512",
|
||||
};
|
||||
for (size_t i = 0; i < sizeof(files) / sizeof(files[0]); ++i) {
|
||||
int nn = 256;
|
||||
@@ -79,14 +85,32 @@ main(void)
|
||||
nn = 512;
|
||||
else if (strcmp(files[i], "benchmark_meshsize_1024") == 0)
|
||||
nn = 1024;
|
||||
else if (strcmp(files[i], "benchmark_meshsize_1792") == 0)
|
||||
nn = 1792;
|
||||
else if (strcmp(files[i], "benchmark_meshsize_2048") == 0)
|
||||
nn = 2048;
|
||||
else if (strcmp(files[i], "benchmark_weak_128") == 0)
|
||||
nn = 128;
|
||||
else if (strcmp(files[i], "benchmark_weak_448") == 0)
|
||||
nn = 448;
|
||||
else if (strcmp(files[i], "benchmark_weak_512") == 0)
|
||||
nn = 512;
|
||||
|
||||
fprintf(fp, "$(cd %s && srun ./benchmark %d %d %d && cd ..)\n", files[i], nn, nn, nn);
|
||||
// W/ Fredriks tunings
|
||||
// (may cause Assertion `status == UCS_OK' failed errors)
|
||||
// fprintf(fp,
|
||||
// "$(cd %s && UCX_RNDV_THRESH=16384 UCX_RNDV_SCHEME=get_zcopy "
|
||||
// "UCX_MAX_RNDV_RAILS=1 srun ./benchmark %d %d %d && cd ..)\n",
|
||||
// files[i], nn, nn, nn);
|
||||
if (nodes >= 2) {
|
||||
fprintf(fp,
|
||||
"$(cd %s && UCX_RNDV_THRESH=16384 UCX_RNDV_SCHEME=get_zcopy "
|
||||
"UCX_MAX_RNDV_RAILS=1 srun --kill-on-bad-exit=0 ./benchmark %d %d %d && rm "
|
||||
"-f core.* && cd ..)\n",
|
||||
files[i], nn, nn, nn);
|
||||
}
|
||||
else {
|
||||
fprintf(fp,
|
||||
"$(cd %s && srun --kill-on-bad-exit=0 ./benchmark %d %d %d && rm -f core.* "
|
||||
"&& cd ..)\n",
|
||||
files[i], nn, nn, nn);
|
||||
}
|
||||
}
|
||||
|
||||
fclose(fp);
|
||||
|
@@ -43,7 +43,13 @@
|
||||
|
||||
// NEED TO BE DEFINED HERE. IS NOT NOTICED BY compile_acc call.
|
||||
#define LFORCING (0)
|
||||
|
||||
#ifdef VTXBUF_ACCRETION
|
||||
#define LSINK (1)
|
||||
#else
|
||||
#define LSINK (0)
|
||||
#endif
|
||||
|
||||
#ifdef BFIELDX
|
||||
#define LBFIELD (1)
|
||||
#else
|
||||
@@ -322,6 +328,7 @@ run_simulation(const char* config_path)
|
||||
// acmesh_init_to(INIT_TYPE_SIMPLE_CORE, mesh); //Initial condition for a collapse test
|
||||
|
||||
#if LSINK
|
||||
printf("WARNING! Sink particle is under development. USE AT YOUR OWN RISK!")
|
||||
vertex_buffer_set(VTXBUF_ACCRETION, 0.0, mesh);
|
||||
#endif
|
||||
|
||||
@@ -388,18 +395,10 @@ run_simulation(const char* config_path)
|
||||
/* Step the simulation */
|
||||
AcReal accreted_mass = 0.0;
|
||||
AcReal sink_mass = 0.0;
|
||||
AcReal uu_freefall = 0.0;
|
||||
AcReal dt_typical = 0.0;
|
||||
int dtcounter = 0;
|
||||
for (int i = start_step + 1; i < max_steps; ++i) {
|
||||
const AcReal umax = acReduceVec(RTYPE_MAX, VTXBUF_UUX, VTXBUF_UUY, VTXBUF_UUZ);
|
||||
#if LBFIELD
|
||||
const AcReal vAmax = acReduceVecScal(RTYPE_ALFVEN_MAX, BFIELDX, BFIELDY, BFIELDZ, VTXBUF_LNRHO);
|
||||
const AcReal uref = max(umax, vAmax);
|
||||
const AcReal dt = host_timestep(uref, vAmax, mesh_info);
|
||||
#else
|
||||
const AcReal dt = host_timestep(umax, 0.0l, mesh_info);
|
||||
#endif
|
||||
|
||||
#if LSINK
|
||||
|
||||
const AcReal sum_mass = acReduceScal(RTYPE_SUM, VTXBUF_ACCRETION);
|
||||
@@ -407,7 +406,7 @@ run_simulation(const char* config_path)
|
||||
sink_mass = 0.0;
|
||||
sink_mass = mesh_info.real_params[AC_M_sink_init] + accreted_mass;
|
||||
acLoadDeviceConstant(AC_M_sink, sink_mass);
|
||||
vertex_buffer_set(VTXBUF_ACCRETION, 0.0, mesh);
|
||||
vertex_buffer_set(VTXBUF_ACCRETION, 0.0, mesh); //TODO THIS IS A BUG! WILL ONLY SET HOST BUFFER 0!
|
||||
|
||||
int on_off_switch;
|
||||
if (i < 1) {
|
||||
@@ -417,11 +416,26 @@ run_simulation(const char* config_path)
|
||||
on_off_switch = 1;
|
||||
}
|
||||
acLoadDeviceConstant(AC_switch_accretion, on_off_switch);
|
||||
|
||||
//Adjust courant condition for free fall velocity
|
||||
const AcReal RR = mesh_info.real_params[AC_soft]*mesh_info.real_params[AC_soft];
|
||||
const AcReal SQ2GM = sqrt(AcReal(2.0)*mesh_info.real_params[AC_G_const]*sink_mass);
|
||||
uu_freefall = fabs(SQ2GM / sqrt(RR));
|
||||
#else
|
||||
accreted_mass = -1.0;
|
||||
sink_mass = -1.0;
|
||||
#endif
|
||||
|
||||
const AcReal umax = acReduceVec(RTYPE_MAX, VTXBUF_UUX, VTXBUF_UUY, VTXBUF_UUZ);
|
||||
#if LBFIELD
|
||||
const AcReal vAmax = acReduceVecScal(RTYPE_ALFVEN_MAX, BFIELDX, BFIELDY, BFIELDZ, VTXBUF_LNRHO);
|
||||
const AcReal uref = max(max(umax,uu_freefall), vAmax);
|
||||
const AcReal dt = host_timestep(uref, vAmax, mesh_info);
|
||||
#else
|
||||
const AcReal uref = max(umax,uu_freefall);
|
||||
const AcReal dt = host_timestep(uref, 0.0l, mesh_info);
|
||||
#endif
|
||||
|
||||
#if LFORCING
|
||||
const ForcingParams forcing_params = generateForcingParams(mesh_info);
|
||||
loadForcingParamsToDevice(forcing_params);
|
||||
|
Reference in New Issue
Block a user