diff --git a/acc/mhd_solver/stencil_assembly.sas b/acc/mhd_solver/stencil_assembly.sas index 3451722..53b5a45 100644 --- a/acc/mhd_solver/stencil_assembly.sas +++ b/acc/mhd_solver/stencil_assembly.sas @@ -21,9 +21,12 @@ der6x_upwd(in Scalar vertex) return (Scalar){ Scalar(1.0/60.0)*inv_ds* ( - Scalar(20.0)* vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z] - + Scalar(15.0)*(vertex[vertexIdx.x+1, vertexIdx.y, vertexIdx.z] + vertex[vertexIdx.x-1, vertexIdx.y, vertexIdx.z]) - - Scalar( 6.0)*(vertex[vertexIdx.x+2, vertexIdx.y, vertexIdx.z] + vertex[vertexIdx.x-2, vertexIdx.y, vertexIdx.z]) - + vertex[vertexIdx.x+3, vertexIdx.y, vertexIdx.z] + vertex[vertexIdx.x-3, vertexIdx.y, vertexIdx.z])}; + + Scalar(15.0)*(vertex[vertexIdx.x+1, vertexIdx.y, vertexIdx.z] + + vertex[vertexIdx.x-1, vertexIdx.y, vertexIdx.z]) + - Scalar( 6.0)*(vertex[vertexIdx.x+2, vertexIdx.y, vertexIdx.z] + + vertex[vertexIdx.x-2, vertexIdx.y, vertexIdx.z]) + + vertex[vertexIdx.x+3, vertexIdx.y, vertexIdx.z] + + vertex[vertexIdx.x-3, vertexIdx.y, vertexIdx.z])}; } Preprocessed Scalar @@ -33,9 +36,12 @@ der6y_upwd(in Scalar vertex) return (Scalar){ Scalar(1.0/60.0)*inv_ds* ( -Scalar( 20.0)* vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z] - +Scalar( 15.0)*(vertex[vertexIdx.x, vertexIdx.y+1, vertexIdx.z] + vertex[vertexIdx.x, vertexIdx.y-1, vertexIdx.z]) - -Scalar( 6.0)*(vertex[vertexIdx.x, vertexIdx.y+2, vertexIdx.z] + vertex[vertexIdx.x, vertexIdx.y-2, vertexIdx.z]) - + vertex[vertexIdx.x, vertexIdx.y+3, vertexIdx.z] + vertex[vertexIdx.x, vertexIdx.y-3, vertexIdx.z])}; + +Scalar( 15.0)*(vertex[vertexIdx.x, vertexIdx.y+1, vertexIdx.z] + + vertex[vertexIdx.x, vertexIdx.y-1, vertexIdx.z]) + -Scalar( 6.0)*(vertex[vertexIdx.x, vertexIdx.y+2, vertexIdx.z] + + vertex[vertexIdx.x, vertexIdx.y-2, vertexIdx.z]) + + vertex[vertexIdx.x, vertexIdx.y+3, vertexIdx.z] + + vertex[vertexIdx.x, vertexIdx.y-3, vertexIdx.z])}; } Preprocessed Scalar @@ -45,9 +51,12 @@ der6z_upwd(in Scalar vertex) return (Scalar){ Scalar(1.0/60.0)*inv_ds* ( -Scalar( 20.0)* vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z] - +Scalar( 15.0)*(vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z+1] + vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z-1]) - -Scalar( 6.0)*(vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z+2] + vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z-2]) - + vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z+3] + vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z-3])}; + +Scalar( 15.0)*(vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z+1] + + vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z-1]) + -Scalar( 6.0)*(vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z+2] + + vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z-2]) + + vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z+3] + + vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z-3])}; } #endif diff --git a/acc/mhd_solver/stencil_defines.h b/acc/mhd_solver/stencil_defines.h index 5124820..b4bc622 100644 --- a/acc/mhd_solver/stencil_defines.h +++ b/acc/mhd_solver/stencil_defines.h @@ -31,7 +31,7 @@ #define LENTROPY (1) #define LTEMPERATURE (0) #define LFORCING (1) -#define LUPWD (0) +#define LUPWD (1) #define AC_THERMAL_CONDUCTIVITY (AcReal(0.001)) // TODO: make an actual config parameter diff --git a/acc/mhd_solver/stencil_process.sps b/acc/mhd_solver/stencil_process.sps index 46efb7b..1f6f190 100644 --- a/acc/mhd_solver/stencil_process.sps +++ b/acc/mhd_solver/stencil_process.sps @@ -222,23 +222,26 @@ helical_forcing(Scalar magnitude, Vector k_force, Vector xx, Vector ff_re, Vecto // JP: This looks wrong: // 1) Should it be dsx * nx instead of dsx * ny? // 2) Should you also use globalGrid.n instead of the local n? + // MV: You are rigth. Made a quickfix. I did not see the error because multigpu is split + // in z direction not y direction. // 3) Also final point: can we do this with vectors/quaternions instead? // Tringonometric functions are much more expensive and inaccurate/ + // MV: Good idea. No an immediate priority. // Fun related article: // https://randomascii.wordpress.com/2014/10/09/intel-underestimates-error-bounds-by-1-3-quintillion/ - xx.x = xx.x*(2.0*M_PI/(dsx*(DCONST_INT(AC_ny_max) - DCONST_INT(AC_ny_min)))); - xx.y = xx.y*(2.0*M_PI/(dsy*(DCONST_INT(AC_ny_max) - DCONST_INT(AC_ny_min)))); - xx.z = xx.z*(2.0*M_PI/(dsz*(DCONST_INT(AC_ny_max) - DCONST_INT(AC_ny_min)))); + xx.x = xx.x*(2.0*M_PI/(dsx*globalGrid.n.x)); + xx.y = xx.y*(2.0*M_PI/(dsy*globalGrid.n.y)); + xx.z = xx.z*(2.0*M_PI/(dsz*globalGrid.n.z)); Scalar cos_phi = cos(phi); Scalar sin_phi = sin(phi); - Scalar cos_k_dox_x = cos(dot(k_force, xx)); - Scalar sin_k_dox_x = sin(dot(k_force, xx)); + Scalar cos_k_dot_x = cos(dot(k_force, xx)); + Scalar sin_k_dot_x = sin(dot(k_force, xx)); // Phase affect only the x-component - //Scalar real_comp = cos_k_dox_x; - //Scalar imag_comp = sin_k_dox_x; - Scalar real_comp_phase = cos_k_dox_x*cos_phi - sin_k_dox_x*sin_phi; - Scalar imag_comp_phase = cos_k_dox_x*sin_phi + sin_k_dox_x*cos_phi; + //Scalar real_comp = cos_k_dot_x; + //Scalar imag_comp = sin_k_dot_x; + Scalar real_comp_phase = cos_k_dot_x*cos_phi - sin_k_dot_x*sin_phi; + Scalar imag_comp_phase = cos_k_dot_x*sin_phi + sin_k_dot_x*cos_phi; Vector force = (Vector){ ff_re.x*real_comp_phase - ff_im.x*imag_comp_phase, diff --git a/doc/manual/.gitignore b/doc/manual/.gitignore new file mode 100644 index 0000000..23f832b --- /dev/null +++ b/doc/manual/.gitignore @@ -0,0 +1,2 @@ +*.html +*.pdf diff --git a/scripts/auto_optimize.sh b/scripts/auto_optimize.sh deleted file mode 100755 index c86fa63..0000000 --- a/scripts/auto_optimize.sh +++ /dev/null @@ -1,51 +0,0 @@ -#!/bin/bash - -# Run this in your build directory (cd build && ../scripts/auto_optimize.sh) -# Generates a ${BENCHMARK_FILE} which contains the threadblock dims and other -# constants used in the integration in addition to the time used. - -MAX_THREADS=1024 # Max size of the thread block, depends on hardware - -BENCHMARK_FILE="benchmark.out" -TBCONFCREATOR_SRC_PATH="../scripts/gen_rk3_threadblockconf.c" -TBCONFFILE_DST_PATH="../src/core/kernels" - -C_COMPILER_NAME="gcc" - -rm ${BENCHMARK_FILE} - -for (( tz=2; tz<=8; tz*=2)) -do -for (( ty=1; ty<=1; ty+=1)) -do -for (( tx=16; tx<=64; tx*=2)) -do - -if ( (${tx}*${ty}*${tz}) > ${MAX_THREADS}) -then break -fi - -for (( launch_bound=1; launch_bound<=8; launch_bound*=2)) -do -for (( elems_per_thread=1; elems_per_thread<=128; elems_per_thread*=2)) -do - # Generate the threadblock configuration - ${C_COMPILER_NAME} ${TBCONFCREATOR_SRC_PATH} -o gen_rk3_threadblockconf - ./gen_rk3_threadblockconf ${tx} ${ty} ${tz} ${elems_per_thread} ${launch_bound} - rm gen_rk3_threadblockconf - mv rk3_threadblock.conf ${TBCONFFILE_DST_PATH} - - # Compile and run the test build - cmake -DBUILD_DEBUG=OFF -DDOUBLE_PRECISION=OFF -DAUTO_OPTIMIZE=ON .. && make -j - #if ./ac_run -t; then - # echo Success - ./ac_run -b - #else - # echo fail! - #fi -done -done -done -done -done - diff --git a/scripts/autotest.sh b/scripts/autotest.sh index 49f0b57..f8e20a5 100755 --- a/scripts/autotest.sh +++ b/scripts/autotest.sh @@ -5,10 +5,13 @@ # source ./sourceme.sh # autotest.sh # -# If you need slurm or to pass something before ./ac_run, set the variable +# If you need slurm or to pass something before ./ac_run, export the variable # SRUN_COMMAND before calling this script. # # F.ex. on Taito SRUN_COMMAND="srun --gres=gpu:k80:4 --mem=24000 -t 00:14:59 -p gputest --cpus-per-task 1 -n 1" +# export SRUN_COMMAND +# autotest.sh +echo "SRUN_COMMAND: " ${SRUN_COMMAND} results=() diff --git a/src/standalone/model/model_rk3.cc b/src/standalone/model/model_rk3.cc index bb6dbc2..3900f0c 100644 --- a/src/standalone/model/model_rk3.cc +++ b/src/standalone/model/model_rk3.cc @@ -43,6 +43,9 @@ typedef struct { ModelScalar value; ModelVector gradient; ModelMatrix hessian; +#if LUPWD + ModelVector upwind; +#endif } ModelScalarData; typedef struct { @@ -273,6 +276,53 @@ derzz(const int i, const int j, const int k, const ModelScalar* arr) return second_derivative(pencil, get(AC_inv_dsz)); } +#if LUPWD +static inline ModelScalar +der6x_upwd(const int i, const int j, const int k, const ModelScalar* arr) +{ + ModelScalar inv_ds = get(AC_inv_dsx); + + return ModelScalar(1.0/60.0)*inv_ds* ( + -ModelScalar( 20.0)* arr[IDX(i, j, k)] + +ModelScalar( 15.0)*(arr[IDX(i+1, j, k)] + + arr[IDX(i-1, j, k)]) + -ModelScalar( 6.0)*(arr[IDX(i+2, j, k)] + + arr[IDX(i-2, j, k)]) + + arr[IDX(i+3, j, k)] + + arr[IDX(i-3, j, k)]); +} + +static inline ModelScalar +der6y_upwd(const int i, const int j, const int k, const ModelScalar* arr) +{ + ModelScalar inv_ds = get(AC_inv_dsy); + + return ModelScalar(1.0/60.0)*inv_ds* ( + -ModelScalar( 20.0)* arr[IDX(i, j, k)] + +ModelScalar( 15.0)*(arr[IDX(i, j+1, k)] + + arr[IDX(i, j-1, k)]) + -ModelScalar( 6.0)*(arr[IDX(i, j+2, k)] + + arr[IDX(i, j-2, k)]) + + arr[IDX(i, j+3, k)] + + arr[IDX(i, j-3, k)]); +} + +static inline ModelScalar +der6z_upwd(const int i, const int j, const int k, const ModelScalar* arr) +{ + ModelScalar inv_ds = get(AC_inv_dsz); + + return ModelScalar(1.0/60.0)*inv_ds* ( + -ModelScalar( 20.0)* arr[IDX(i, j, k )] + +ModelScalar( 15.0)*(arr[IDX(i, j, k+1)] + + arr[IDX(i, j, k-1)]) + -ModelScalar( 6.0)*(arr[IDX(i, j, k+2)] + + arr[IDX(i, j, k-2)]) + + arr[IDX(i, j, k+3)] + + arr[IDX(i, j, k-3)]); +} +#endif + static inline ModelScalar compute_value(const int i, const int j, const int k, const ModelScalar* arr) { @@ -285,6 +335,14 @@ compute_gradient(const int i, const int j, const int k, const ModelScalar* arr) return (ModelVector){derx(i, j, k, arr), dery(i, j, k, arr), derz(i, j, k, arr)}; } +#if LUPWD +static inline ModelVector +compute_upwind(const int i, const int j, const int k, const ModelScalar* arr) +{ + return (ModelVector){der6x_upwd(i, j, k, arr), der6y_upwd(i, j, k, arr), der6z_upwd(i, j, k, arr)}; +} +#endif + static inline ModelMatrix compute_hessian(const int i, const int j, const int k, const ModelScalar* arr) { @@ -309,6 +367,10 @@ read_data(const int i, const int j, const int k, ModelScalar* buf[], const int h // diagonals with all arrays data.hessian = compute_hessian(i, j, k, buf[handle]); +#if LUPWD + data.upwind = compute_upwind(i, j, k, buf[handle]); +#endif + return data; } @@ -354,6 +416,8 @@ gradients(const ModelVectorData& data) return (ModelMatrix){gradient(data.x), gradient(data.y), gradient(data.z)}; } + + /* * ============================================================================= * Level 0.3 (Built-in functions available during the Stencil Processing Stage) @@ -502,10 +566,27 @@ contract(const ModelMatrix& mat) * Stencil Processing Stage (equations) * ============================================================================= */ + +#if LUPWD +ModelScalar +upwd_der6(const ModelVectorData& uu, const ModelScalarData& lnrho) +{ + ModelScalar uux = fabs(value(uu).x); + ModelScalar uuy = fabs(value(uu).y); + ModelScalar uuz = fabs(value(uu).z); + return uux*lnrho.upwind.x + uuy*lnrho.upwind.y + uuz*lnrho.upwind.z; +} +#endif + static inline ModelScalar continuity(const ModelVectorData& uu, const ModelScalarData& lnrho) { - return -dot(value(uu), gradient(lnrho)) - divergence(uu); + return -dot(value(uu), gradient(lnrho)) +#if LUPWD + //This is a corrective hyperdiffusion term for upwinding. + + upwd_der6(uu, lnrho) +#endif + - divergence(uu); } static inline ModelScalar @@ -679,23 +760,24 @@ helical_forcing(ModelScalar magnitude, ModelVector k_force, ModelVector xx, Mode ModelVector ff_im, ModelScalar phi) { (void)magnitude; // WARNING: unused - xx.x = xx.x * (2.0l * M_PI / (get(AC_dsx) * (get(AC_ny_max) - get(AC_ny_min)))); - xx.y = xx.y * (2.0l * M_PI / (get(AC_dsy) * (get(AC_ny_max) - get(AC_ny_min)))); - xx.z = xx.z * (2.0l * M_PI / (get(AC_dsz) * (get(AC_ny_max) - get(AC_ny_min)))); + xx.x = xx.x*(2.0*M_PI/(get(AC_dsx)*get(AC_nx))); + xx.y = xx.y*(2.0*M_PI/(get(AC_dsy)*get(AC_ny))); + xx.z = xx.z*(2.0*M_PI/(get(AC_dsz)*get(AC_nz))); - ModelScalar cosl_phi = cosl(phi); - ModelScalar sinl_phi = sinl(phi); - ModelScalar cosl_k_dox_x = cosl(dot(k_force, xx)); - ModelScalar sinl_k_dox_x = sinl(dot(k_force, xx)); + ModelScalar cos_phi = cosl(phi); + ModelScalar sin_phi = sinl(phi); + ModelScalar cos_k_dot_x = cosl(dot(k_force, xx)); + ModelScalar sin_k_dot_x = sinl(dot(k_force, xx)); // Phase affect only the x-component - // ModelScalar real_comp = cosl_k_dox_x; - // ModelScalar imag_comp = sinl_k_dox_x; - ModelScalar real_comp_phase = cosl_k_dox_x * cosl_phi - sinl_k_dox_x * sinl_phi; - ModelScalar imag_comp_phase = cosl_k_dox_x * sinl_phi + sinl_k_dox_x * cosl_phi; + //Scalar real_comp = cos_k_dot_x; + //Scalar imag_comp = sin_k_dot_x; + ModelScalar real_comp_phase = cos_k_dot_x*cos_phi - sin_k_dot_x*sin_phi; + ModelScalar imag_comp_phase = cos_k_dot_x*sin_phi + sin_k_dot_x*cos_phi; - ModelVector force = (ModelVector){ff_re.x * real_comp_phase - ff_im.x * imag_comp_phase, - ff_re.y * real_comp_phase - ff_im.y * imag_comp_phase, - ff_re.z * real_comp_phase - ff_im.z * imag_comp_phase}; + + ModelVector force = (ModelVector){ ff_re.x*real_comp_phase - ff_im.x*imag_comp_phase, + ff_re.y*real_comp_phase - ff_im.y*imag_comp_phase, + ff_re.z*real_comp_phase - ff_im.z*imag_comp_phase}; return force; }