From 9af5ba21563ad765238314f2353caea115b5b1aa Mon Sep 17 00:00:00 2001 From: Miikka Vaisala Date: Wed, 7 Aug 2019 11:11:27 +0800 Subject: [PATCH 01/11] Copied elements in the DSL form. Needs to be adapted at the next stage. --- acc/mhd_solver/stencil_assembly.sas | 27 ++++++++---- src/standalone/model/model_rk3.cc | 68 ++++++++++++++++++++++++++++- 2 files changed, 85 insertions(+), 10 deletions(-) diff --git a/acc/mhd_solver/stencil_assembly.sas b/acc/mhd_solver/stencil_assembly.sas index 3451722..53b5a45 100644 --- a/acc/mhd_solver/stencil_assembly.sas +++ b/acc/mhd_solver/stencil_assembly.sas @@ -21,9 +21,12 @@ der6x_upwd(in Scalar vertex) return (Scalar){ Scalar(1.0/60.0)*inv_ds* ( - Scalar(20.0)* vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z] - + Scalar(15.0)*(vertex[vertexIdx.x+1, vertexIdx.y, vertexIdx.z] + vertex[vertexIdx.x-1, vertexIdx.y, vertexIdx.z]) - - Scalar( 6.0)*(vertex[vertexIdx.x+2, vertexIdx.y, vertexIdx.z] + vertex[vertexIdx.x-2, vertexIdx.y, vertexIdx.z]) - + vertex[vertexIdx.x+3, vertexIdx.y, vertexIdx.z] + vertex[vertexIdx.x-3, vertexIdx.y, vertexIdx.z])}; + + Scalar(15.0)*(vertex[vertexIdx.x+1, vertexIdx.y, vertexIdx.z] + + vertex[vertexIdx.x-1, vertexIdx.y, vertexIdx.z]) + - Scalar( 6.0)*(vertex[vertexIdx.x+2, vertexIdx.y, vertexIdx.z] + + vertex[vertexIdx.x-2, vertexIdx.y, vertexIdx.z]) + + vertex[vertexIdx.x+3, vertexIdx.y, vertexIdx.z] + + vertex[vertexIdx.x-3, vertexIdx.y, vertexIdx.z])}; } Preprocessed Scalar @@ -33,9 +36,12 @@ der6y_upwd(in Scalar vertex) return (Scalar){ Scalar(1.0/60.0)*inv_ds* ( -Scalar( 20.0)* vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z] - +Scalar( 15.0)*(vertex[vertexIdx.x, vertexIdx.y+1, vertexIdx.z] + vertex[vertexIdx.x, vertexIdx.y-1, vertexIdx.z]) - -Scalar( 6.0)*(vertex[vertexIdx.x, vertexIdx.y+2, vertexIdx.z] + vertex[vertexIdx.x, vertexIdx.y-2, vertexIdx.z]) - + vertex[vertexIdx.x, vertexIdx.y+3, vertexIdx.z] + vertex[vertexIdx.x, vertexIdx.y-3, vertexIdx.z])}; + +Scalar( 15.0)*(vertex[vertexIdx.x, vertexIdx.y+1, vertexIdx.z] + + vertex[vertexIdx.x, vertexIdx.y-1, vertexIdx.z]) + -Scalar( 6.0)*(vertex[vertexIdx.x, vertexIdx.y+2, vertexIdx.z] + + vertex[vertexIdx.x, vertexIdx.y-2, vertexIdx.z]) + + vertex[vertexIdx.x, vertexIdx.y+3, vertexIdx.z] + + vertex[vertexIdx.x, vertexIdx.y-3, vertexIdx.z])}; } Preprocessed Scalar @@ -45,9 +51,12 @@ der6z_upwd(in Scalar vertex) return (Scalar){ Scalar(1.0/60.0)*inv_ds* ( -Scalar( 20.0)* vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z] - +Scalar( 15.0)*(vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z+1] + vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z-1]) - -Scalar( 6.0)*(vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z+2] + vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z-2]) - + vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z+3] + vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z-3])}; + +Scalar( 15.0)*(vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z+1] + + vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z-1]) + -Scalar( 6.0)*(vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z+2] + + vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z-2]) + + vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z+3] + + vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z-3])}; } #endif diff --git a/src/standalone/model/model_rk3.cc b/src/standalone/model/model_rk3.cc index bb6dbc2..356f6ad 100644 --- a/src/standalone/model/model_rk3.cc +++ b/src/standalone/model/model_rk3.cc @@ -354,6 +354,56 @@ gradients(const ModelVectorData& data) return (ModelMatrix){gradient(data.x), gradient(data.y), gradient(data.z)}; } +#if LUPWD + +Preprocessed Scalar +der6x_upwd(in Scalar vertex) +{ + Scalar inv_ds = DCONST_REAL(AC_inv_dsx); + + return (Scalar){ Scalar(1.0/60.0)*inv_ds* ( + - Scalar(20.0)* vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z] + + Scalar(15.0)*(vertex[vertexIdx.x+1, vertexIdx.y, vertexIdx.z] + + vertex[vertexIdx.x-1, vertexIdx.y, vertexIdx.z]) + - Scalar( 6.0)*(vertex[vertexIdx.x+2, vertexIdx.y, vertexIdx.z] + + vertex[vertexIdx.x-2, vertexIdx.y, vertexIdx.z]) + + vertex[vertexIdx.x+3, vertexIdx.y, vertexIdx.z] + + vertex[vertexIdx.x-3, vertexIdx.y, vertexIdx.z])}; +} + +Preprocessed Scalar +der6y_upwd(in Scalar vertex) +{ + Scalar inv_ds = DCONST_REAL(AC_inv_dsy); + + return (Scalar){ Scalar(1.0/60.0)*inv_ds* ( + -Scalar( 20.0)* vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z] + +Scalar( 15.0)*(vertex[vertexIdx.x, vertexIdx.y+1, vertexIdx.z] + + vertex[vertexIdx.x, vertexIdx.y-1, vertexIdx.z]) + -Scalar( 6.0)*(vertex[vertexIdx.x, vertexIdx.y+2, vertexIdx.z] + + vertex[vertexIdx.x, vertexIdx.y-2, vertexIdx.z]) + + vertex[vertexIdx.x, vertexIdx.y+3, vertexIdx.z] + + vertex[vertexIdx.x, vertexIdx.y-3, vertexIdx.z])}; +} + +Preprocessed Scalar +der6z_upwd(in Scalar vertex) +{ + Scalar inv_ds = DCONST_REAL(AC_inv_dsz); + + return (Scalar){ Scalar(1.0/60.0)*inv_ds* ( + -Scalar( 20.0)* vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z] + +Scalar( 15.0)*(vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z+1] + + vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z-1]) + -Scalar( 6.0)*(vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z+2] + + vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z-2]) + + vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z+3] + + vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z-3])}; +} + +#endif + + /* * ============================================================================= * Level 0.3 (Built-in functions available during the Stencil Processing Stage) @@ -450,6 +500,17 @@ curl(const ModelVectorData& vec) gradient(vec.y).x - gradient(vec.x).y}; } +#if LUPWD +Scalar +upwd_der6(in Vector uu, in Scalar lnrho) +{ + Scalar uux = fabs(value(uu).x); + Scalar uuy = fabs(value(uu).y); + Scalar uuz = fabs(value(uu).z); + return (Scalar){uux*der6x_upwd(lnrho) + uuy*der6y_upwd(lnrho) + uuz*der6z_upwd(lnrho)}; +} +#endif + static inline ModelVector gradient_of_divergence(const ModelVectorData& vec) { @@ -505,7 +566,12 @@ contract(const ModelMatrix& mat) static inline ModelScalar continuity(const ModelVectorData& uu, const ModelScalarData& lnrho) { - return -dot(value(uu), gradient(lnrho)) - divergence(uu); + return -dot(value(uu), gradient(lnrho)) +#if LUPWD + //This is a corrective hyperdiffusion term for upwinding. + + upwd_der6(uu, lnrho) +#endif + - divergence(uu); } static inline ModelScalar From 15f0fa6aa552b0f7c18248ba2a9eabcec23707a8 Mon Sep 17 00:00:00 2001 From: Miikka Vaisala Date: Wed, 7 Aug 2019 14:07:45 +0800 Subject: [PATCH 02/11] Useful .gitignore added --- doc/manual/.gitignore | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 doc/manual/.gitignore diff --git a/doc/manual/.gitignore b/doc/manual/.gitignore new file mode 100644 index 0000000..23f832b --- /dev/null +++ b/doc/manual/.gitignore @@ -0,0 +1,2 @@ +*.html +*.pdf From 7cc524b78b2807f520dc4ad2c4f92329a1a83209 Mon Sep 17 00:00:00 2001 From: Miikka Vaisala Date: Wed, 7 Aug 2019 14:57:51 +0800 Subject: [PATCH 03/11] Adapting for autotest but i, j, k indexing is confusing. --- acc/mhd_solver/stencil_defines.h | 4 +- src/standalone/model/model_rk3.cc | 118 +++++++++++++++--------------- 2 files changed, 61 insertions(+), 61 deletions(-) diff --git a/acc/mhd_solver/stencil_defines.h b/acc/mhd_solver/stencil_defines.h index 5124820..ea4c102 100644 --- a/acc/mhd_solver/stencil_defines.h +++ b/acc/mhd_solver/stencil_defines.h @@ -30,8 +30,8 @@ #define LMAGNETIC (1) #define LENTROPY (1) #define LTEMPERATURE (0) -#define LFORCING (1) -#define LUPWD (0) +#define LFORCING (0) +#define LUPWD (1) #define AC_THERMAL_CONDUCTIVITY (AcReal(0.001)) // TODO: make an actual config parameter diff --git a/src/standalone/model/model_rk3.cc b/src/standalone/model/model_rk3.cc index 356f6ad..3471cd1 100644 --- a/src/standalone/model/model_rk3.cc +++ b/src/standalone/model/model_rk3.cc @@ -273,6 +273,53 @@ derzz(const int i, const int j, const int k, const ModelScalar* arr) return second_derivative(pencil, get(AC_inv_dsz)); } +#if LUPWD +static inline ModelScalar +der6x_upwd(const int i, const int j, const int k, const ModelScalar* arr) +{ + ModelScalar inv_ds = get(AC_inv_dsx); + + return ModelScalar(1.0/60.0)*inv_ds* ( + -ModelScalar( 20.0)* arr[IDX(i, j, k)] + +ModelScalar( 15.0)*(arr[IDX(i+1, j, k)] + + arr[IDX(i-1, j, k)]) + -ModelScalar( 6.0)*(arr[IDX(i+2, j, k)] + + arr[IDX(i-2, j, k)]) + + arr[IDX(i+3, j, k)] + + arr[IDX(i-3, j, k)]); +} + +static inline ModelScalar +der6y_upwd(const int i, const int j, const int k, const ModelScalar* arr) +{ + ModelScalar inv_ds = get(AC_inv_dsy); + + return ModelScalar(1.0/60.0)*inv_ds* ( + -ModelScalar( 20.0)* arr[IDX(i, j, k)] + +ModelScalar( 15.0)*(arr[IDX(i, j+1, k)] + + arr[IDX(i, j-1, k)]) + -ModelScalar( 6.0)*(arr[IDX(i, j+2, k)] + + arr[IDX(i, j-2, k)]) + + arr[IDX(i, j+3, k)] + + arr[IDX(i, j-3, k)]); +} + +static inline ModelScalar +der6z_upwd(const int i, const int j, const int k, const ModelScalar* arr) +{ + ModelScalar inv_ds = get(AC_inv_dsz); + + return ModelScalar(1.0/60.0)*inv_ds* ( + -ModelScalar( 20.0)* arr[IDX(i, j, k )] + +ModelScalar( 15.0)*(arr[IDX(i, j, k+1)] + + arr[IDX(i, j, k-1)]) + -ModelScalar( 6.0)*(arr[IDX(i, j, k+2)] + + arr[IDX(i, j, k-2)]) + + arr[IDX(i, j, k+3)] + + arr[IDX(i, j, k-3)]); +} +#endif + static inline ModelScalar compute_value(const int i, const int j, const int k, const ModelScalar* arr) { @@ -354,54 +401,6 @@ gradients(const ModelVectorData& data) return (ModelMatrix){gradient(data.x), gradient(data.y), gradient(data.z)}; } -#if LUPWD - -Preprocessed Scalar -der6x_upwd(in Scalar vertex) -{ - Scalar inv_ds = DCONST_REAL(AC_inv_dsx); - - return (Scalar){ Scalar(1.0/60.0)*inv_ds* ( - - Scalar(20.0)* vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z] - + Scalar(15.0)*(vertex[vertexIdx.x+1, vertexIdx.y, vertexIdx.z] - + vertex[vertexIdx.x-1, vertexIdx.y, vertexIdx.z]) - - Scalar( 6.0)*(vertex[vertexIdx.x+2, vertexIdx.y, vertexIdx.z] - + vertex[vertexIdx.x-2, vertexIdx.y, vertexIdx.z]) - + vertex[vertexIdx.x+3, vertexIdx.y, vertexIdx.z] - + vertex[vertexIdx.x-3, vertexIdx.y, vertexIdx.z])}; -} - -Preprocessed Scalar -der6y_upwd(in Scalar vertex) -{ - Scalar inv_ds = DCONST_REAL(AC_inv_dsy); - - return (Scalar){ Scalar(1.0/60.0)*inv_ds* ( - -Scalar( 20.0)* vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z] - +Scalar( 15.0)*(vertex[vertexIdx.x, vertexIdx.y+1, vertexIdx.z] - + vertex[vertexIdx.x, vertexIdx.y-1, vertexIdx.z]) - -Scalar( 6.0)*(vertex[vertexIdx.x, vertexIdx.y+2, vertexIdx.z] - + vertex[vertexIdx.x, vertexIdx.y-2, vertexIdx.z]) - + vertex[vertexIdx.x, vertexIdx.y+3, vertexIdx.z] - + vertex[vertexIdx.x, vertexIdx.y-3, vertexIdx.z])}; -} - -Preprocessed Scalar -der6z_upwd(in Scalar vertex) -{ - Scalar inv_ds = DCONST_REAL(AC_inv_dsz); - - return (Scalar){ Scalar(1.0/60.0)*inv_ds* ( - -Scalar( 20.0)* vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z] - +Scalar( 15.0)*(vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z+1] - + vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z-1]) - -Scalar( 6.0)*(vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z+2] - + vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z-2]) - + vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z+3] - + vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z-3])}; -} - -#endif /* @@ -500,17 +499,6 @@ curl(const ModelVectorData& vec) gradient(vec.y).x - gradient(vec.x).y}; } -#if LUPWD -Scalar -upwd_der6(in Vector uu, in Scalar lnrho) -{ - Scalar uux = fabs(value(uu).x); - Scalar uuy = fabs(value(uu).y); - Scalar uuz = fabs(value(uu).z); - return (Scalar){uux*der6x_upwd(lnrho) + uuy*der6y_upwd(lnrho) + uuz*der6z_upwd(lnrho)}; -} -#endif - static inline ModelVector gradient_of_divergence(const ModelVectorData& vec) { @@ -563,6 +551,18 @@ contract(const ModelMatrix& mat) * Stencil Processing Stage (equations) * ============================================================================= */ + +#if LUPWD +ModelScalar +upwd_der6(const ModelVectorData& uu, const ModelScalarData& lnrho) +{ + ModelScalar uux = fabs(value(uu).x); + ModelScalar uuy = fabs(value(uu).y); + ModelScalar uuz = fabs(value(uu).z); + return uux*der6x_upwd(lnrho) + uuy*der6y_upwd(lnrho) + uuz*der6z_upwd(lnrho); +} +#endif + static inline ModelScalar continuity(const ModelVectorData& uu, const ModelScalarData& lnrho) { From 7e6361a92abde5a9fcdf8413309baa33f04fcb7d Mon Sep 17 00:00:00 2001 From: Miikka Vaisala Date: Wed, 7 Aug 2019 16:04:48 +0800 Subject: [PATCH 04/11] Forcing hotfix. Will need more investigation before scientific runs. Now just something to correct the obvious bug. --- acc/mhd_solver/stencil_process.sps | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/acc/mhd_solver/stencil_process.sps b/acc/mhd_solver/stencil_process.sps index c3ade38..2adb5ba 100644 --- a/acc/mhd_solver/stencil_process.sps +++ b/acc/mhd_solver/stencil_process.sps @@ -222,13 +222,16 @@ helical_forcing(Scalar magnitude, Vector k_force, Vector xx, Vector ff_re, Vecto // JP: This looks wrong: // 1) Should it be dsx * nx instead of dsx * ny? // 2) Should you also use globalGrid.n instead of the local n? + // MV: You are rigth. Made a quickfix. I did not see the error because multigpu is split + // in z direction not y direction. // 3) Also final point: can we do this with vectors/quaternions instead? // Tringonometric functions are much more expensive and inaccurate/ + // MV: Good idea. No an immediate priority. // Fun related article: // https://randomascii.wordpress.com/2014/10/09/intel-underestimates-error-bounds-by-1-3-quintillion/ - xx.x = xx.x*(2.0*M_PI/(dsx*(DCONST_INT(AC_ny_max) - DCONST_INT(AC_ny_min)))); - xx.y = xx.y*(2.0*M_PI/(dsy*(DCONST_INT(AC_ny_max) - DCONST_INT(AC_ny_min)))); - xx.z = xx.z*(2.0*M_PI/(dsz*(DCONST_INT(AC_ny_max) - DCONST_INT(AC_ny_min)))); + xx.x = xx.x*(2.0*M_PI/(dsx*globalGrid.n.x)); + xx.y = xx.y*(2.0*M_PI/(dsy*globalGrid.n.y)); + xx.z = xx.z*(2.0*M_PI/(dsz*globalGrid.n.z)); Scalar cos_phi = cos(phi); Scalar sin_phi = sin(phi); From 738b2abaf3d9ea0b394cbf89ab3101e71e6cb3e7 Mon Sep 17 00:00:00 2001 From: Miikka Vaisala Date: Wed, 7 Aug 2019 18:57:53 +0800 Subject: [PATCH 05/11] Supposedly working autotest for upwinding. --- src/standalone/model/model_rk3.cc | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/standalone/model/model_rk3.cc b/src/standalone/model/model_rk3.cc index 3471cd1..b51f285 100644 --- a/src/standalone/model/model_rk3.cc +++ b/src/standalone/model/model_rk3.cc @@ -43,6 +43,9 @@ typedef struct { ModelScalar value; ModelVector gradient; ModelMatrix hessian; +#if LUPWD + ModelVector upwind; +#endif } ModelScalarData; typedef struct { @@ -332,6 +335,12 @@ compute_gradient(const int i, const int j, const int k, const ModelScalar* arr) return (ModelVector){derx(i, j, k, arr), dery(i, j, k, arr), derz(i, j, k, arr)}; } +static inline ModelVector +compute_upwind(const int i, const int j, const int k, const ModelScalar* arr) +{ + return (ModelVector){der6x_upwd(i, j, k, arr), der6y_upwd(i, j, k, arr), der6z_upwd(i, j, k, arr)}; +} + static inline ModelMatrix compute_hessian(const int i, const int j, const int k, const ModelScalar* arr) { @@ -356,6 +365,10 @@ read_data(const int i, const int j, const int k, ModelScalar* buf[], const int h // diagonals with all arrays data.hessian = compute_hessian(i, j, k, buf[handle]); +#if LUPWD + data.upwind = compute_upwind(i, j, k, buf[handle]); +#endif + return data; } @@ -559,7 +572,7 @@ upwd_der6(const ModelVectorData& uu, const ModelScalarData& lnrho) ModelScalar uux = fabs(value(uu).x); ModelScalar uuy = fabs(value(uu).y); ModelScalar uuz = fabs(value(uu).z); - return uux*der6x_upwd(lnrho) + uuy*der6y_upwd(lnrho) + uuz*der6z_upwd(lnrho); + return uux*lnrho.upwind.x + uuy*lnrho.upwind.y + uuz*lnrho.upwind.z; } #endif From 7fdbd76aa21118949ddea7d34a0f0e0f76d59da8 Mon Sep 17 00:00:00 2001 From: Miikka Vaisala Date: Wed, 7 Aug 2019 19:05:14 +0800 Subject: [PATCH 06/11] The default stencil_defines.h setting for merge. --- acc/mhd_solver/stencil_defines.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/acc/mhd_solver/stencil_defines.h b/acc/mhd_solver/stencil_defines.h index ea4c102..6db725d 100644 --- a/acc/mhd_solver/stencil_defines.h +++ b/acc/mhd_solver/stencil_defines.h @@ -30,8 +30,9 @@ #define LMAGNETIC (1) #define LENTROPY (1) #define LTEMPERATURE (0) -#define LFORCING (0) -#define LUPWD (1) +#define LFORCING (1) +#define LUPWD (0) + #define AC_THERMAL_CONDUCTIVITY (AcReal(0.001)) // TODO: make an actual config parameter From 0bb568642fd8997cf5a2b130fb4986581f562ab0 Mon Sep 17 00:00:00 2001 From: Miikka Vaisala Date: Wed, 7 Aug 2019 19:10:39 +0800 Subject: [PATCH 07/11] Still one bug --- src/standalone/model/model_rk3.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/standalone/model/model_rk3.cc b/src/standalone/model/model_rk3.cc index b51f285..7eb962f 100644 --- a/src/standalone/model/model_rk3.cc +++ b/src/standalone/model/model_rk3.cc @@ -335,11 +335,13 @@ compute_gradient(const int i, const int j, const int k, const ModelScalar* arr) return (ModelVector){derx(i, j, k, arr), dery(i, j, k, arr), derz(i, j, k, arr)}; } +#if LUPWD static inline ModelVector compute_upwind(const int i, const int j, const int k, const ModelScalar* arr) { return (ModelVector){der6x_upwd(i, j, k, arr), der6y_upwd(i, j, k, arr), der6z_upwd(i, j, k, arr)}; } +#endif static inline ModelMatrix compute_hessian(const int i, const int j, const int k, const ModelScalar* arr) From b61617ee0ff3e2cb80d45f11ef24dd727404207e Mon Sep 17 00:00:00 2001 From: jpekkila Date: Wed, 7 Aug 2019 15:53:32 +0300 Subject: [PATCH 08/11] Enabled upwinding by default and updated the model helical forcing with the hotfixed changes from earlier commits. Autotests kinda pass (we get 1 failure but this is likely due to inaccuracies of the trigonometric functions used in helical forcing. The error is very close to the acceptable error bound). --- acc/mhd_solver/stencil_defines.h | 3 +- src/standalone/model/model_rk3.cc | 51 ++++++++++++++++--------------- 2 files changed, 27 insertions(+), 27 deletions(-) diff --git a/acc/mhd_solver/stencil_defines.h b/acc/mhd_solver/stencil_defines.h index 6db725d..b4bc622 100644 --- a/acc/mhd_solver/stencil_defines.h +++ b/acc/mhd_solver/stencil_defines.h @@ -31,8 +31,7 @@ #define LENTROPY (1) #define LTEMPERATURE (0) #define LFORCING (1) -#define LUPWD (0) - +#define LUPWD (1) #define AC_THERMAL_CONDUCTIVITY (AcReal(0.001)) // TODO: make an actual config parameter diff --git a/src/standalone/model/model_rk3.cc b/src/standalone/model/model_rk3.cc index 7eb962f..1084794 100644 --- a/src/standalone/model/model_rk3.cc +++ b/src/standalone/model/model_rk3.cc @@ -44,7 +44,7 @@ typedef struct { ModelVector gradient; ModelMatrix hessian; #if LUPWD - ModelVector upwind; + ModelVector upwind; #endif } ModelScalarData; @@ -284,11 +284,11 @@ der6x_upwd(const int i, const int j, const int k, const ModelScalar* arr) return ModelScalar(1.0/60.0)*inv_ds* ( -ModelScalar( 20.0)* arr[IDX(i, j, k)] - +ModelScalar( 15.0)*(arr[IDX(i+1, j, k)] + +ModelScalar( 15.0)*(arr[IDX(i+1, j, k)] + arr[IDX(i-1, j, k)]) - -ModelScalar( 6.0)*(arr[IDX(i+2, j, k)] + -ModelScalar( 6.0)*(arr[IDX(i+2, j, k)] + arr[IDX(i-2, j, k)]) - + arr[IDX(i+3, j, k)] + + arr[IDX(i+3, j, k)] + arr[IDX(i-3, j, k)]); } @@ -299,11 +299,11 @@ der6y_upwd(const int i, const int j, const int k, const ModelScalar* arr) return ModelScalar(1.0/60.0)*inv_ds* ( -ModelScalar( 20.0)* arr[IDX(i, j, k)] - +ModelScalar( 15.0)*(arr[IDX(i, j+1, k)] + +ModelScalar( 15.0)*(arr[IDX(i, j+1, k)] + arr[IDX(i, j-1, k)]) - -ModelScalar( 6.0)*(arr[IDX(i, j+2, k)] + -ModelScalar( 6.0)*(arr[IDX(i, j+2, k)] + arr[IDX(i, j-2, k)]) - + arr[IDX(i, j+3, k)] + + arr[IDX(i, j+3, k)] + arr[IDX(i, j-3, k)]); } @@ -314,11 +314,11 @@ der6z_upwd(const int i, const int j, const int k, const ModelScalar* arr) return ModelScalar(1.0/60.0)*inv_ds* ( -ModelScalar( 20.0)* arr[IDX(i, j, k )] - +ModelScalar( 15.0)*(arr[IDX(i, j, k+1)] + +ModelScalar( 15.0)*(arr[IDX(i, j, k+1)] + arr[IDX(i, j, k-1)]) - -ModelScalar( 6.0)*(arr[IDX(i, j, k+2)] + -ModelScalar( 6.0)*(arr[IDX(i, j, k+2)] + arr[IDX(i, j, k-2)]) - + arr[IDX(i, j, k+3)] + + arr[IDX(i, j, k+3)] + arr[IDX(i, j, k-3)]); } #endif @@ -581,7 +581,7 @@ upwd_der6(const ModelVectorData& uu, const ModelScalarData& lnrho) static inline ModelScalar continuity(const ModelVectorData& uu, const ModelScalarData& lnrho) { - return -dot(value(uu), gradient(lnrho)) + return -dot(value(uu), gradient(lnrho)) #if LUPWD //This is a corrective hyperdiffusion term for upwinding. + upwd_der6(uu, lnrho) @@ -760,23 +760,24 @@ helical_forcing(ModelScalar magnitude, ModelVector k_force, ModelVector xx, Mode ModelVector ff_im, ModelScalar phi) { (void)magnitude; // WARNING: unused - xx.x = xx.x * (2.0l * M_PI / (get(AC_dsx) * (get(AC_ny_max) - get(AC_ny_min)))); - xx.y = xx.y * (2.0l * M_PI / (get(AC_dsy) * (get(AC_ny_max) - get(AC_ny_min)))); - xx.z = xx.z * (2.0l * M_PI / (get(AC_dsz) * (get(AC_ny_max) - get(AC_ny_min)))); + xx.x = xx.x*(2.0*M_PI/(get(AC_dsx)*get(AC_nx))); + xx.y = xx.y*(2.0*M_PI/(get(AC_dsy)*get(AC_ny))); + xx.z = xx.z*(2.0*M_PI/(get(AC_dsz)*get(AC_nz))); - ModelScalar cosl_phi = cosl(phi); - ModelScalar sinl_phi = sinl(phi); - ModelScalar cosl_k_dox_x = cosl(dot(k_force, xx)); - ModelScalar sinl_k_dox_x = sinl(dot(k_force, xx)); + ModelScalar cos_phi = cosl(phi); + ModelScalar sin_phi = sinl(phi); + ModelScalar cos_k_dox_x = cosl(dot(k_force, xx)); + ModelScalar sin_k_dox_x = sinl(dot(k_force, xx)); // Phase affect only the x-component - // ModelScalar real_comp = cosl_k_dox_x; - // ModelScalar imag_comp = sinl_k_dox_x; - ModelScalar real_comp_phase = cosl_k_dox_x * cosl_phi - sinl_k_dox_x * sinl_phi; - ModelScalar imag_comp_phase = cosl_k_dox_x * sinl_phi + sinl_k_dox_x * cosl_phi; + //Scalar real_comp = cos_k_dox_x; + //Scalar imag_comp = sin_k_dox_x; + ModelScalar real_comp_phase = cos_k_dox_x*cos_phi - sin_k_dox_x*sin_phi; + ModelScalar imag_comp_phase = cos_k_dox_x*sin_phi + sin_k_dox_x*cos_phi; - ModelVector force = (ModelVector){ff_re.x * real_comp_phase - ff_im.x * imag_comp_phase, - ff_re.y * real_comp_phase - ff_im.y * imag_comp_phase, - ff_re.z * real_comp_phase - ff_im.z * imag_comp_phase}; + + ModelVector force = (ModelVector){ ff_re.x*real_comp_phase - ff_im.x*imag_comp_phase, + ff_re.y*real_comp_phase - ff_im.y*imag_comp_phase, + ff_re.z*real_comp_phase - ff_im.z*imag_comp_phase}; return force; } From e2f5cced1e52b33c598b4ebb3646490136247227 Mon Sep 17 00:00:00 2001 From: jpekkila Date: Wed, 7 Aug 2019 16:08:03 +0300 Subject: [PATCH 09/11] Renamed dox -> dot --- acc/mhd_solver/stencil_process.sps | 18 +++++++++--------- src/standalone/model/model_rk3.cc | 12 ++++++------ 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/acc/mhd_solver/stencil_process.sps b/acc/mhd_solver/stencil_process.sps index 2adb5ba..cc692fe 100644 --- a/acc/mhd_solver/stencil_process.sps +++ b/acc/mhd_solver/stencil_process.sps @@ -222,11 +222,11 @@ helical_forcing(Scalar magnitude, Vector k_force, Vector xx, Vector ff_re, Vecto // JP: This looks wrong: // 1) Should it be dsx * nx instead of dsx * ny? // 2) Should you also use globalGrid.n instead of the local n? - // MV: You are rigth. Made a quickfix. I did not see the error because multigpu is split - // in z direction not y direction. + // MV: You are rigth. Made a quickfix. I did not see the error because multigpu is split + // in z direction not y direction. // 3) Also final point: can we do this with vectors/quaternions instead? // Tringonometric functions are much more expensive and inaccurate/ - // MV: Good idea. No an immediate priority. + // MV: Good idea. No an immediate priority. // Fun related article: // https://randomascii.wordpress.com/2014/10/09/intel-underestimates-error-bounds-by-1-3-quintillion/ xx.x = xx.x*(2.0*M_PI/(dsx*globalGrid.n.x)); @@ -235,13 +235,13 @@ helical_forcing(Scalar magnitude, Vector k_force, Vector xx, Vector ff_re, Vecto Scalar cos_phi = cos(phi); Scalar sin_phi = sin(phi); - Scalar cos_k_dox_x = cos(dot(k_force, xx)); - Scalar sin_k_dox_x = sin(dot(k_force, xx)); + Scalar cos_k_dot_x = cos(dot(k_force, xx)); + Scalar sin_k_dot_x = sin(dot(k_force, xx)); // Phase affect only the x-component - //Scalar real_comp = cos_k_dox_x; - //Scalar imag_comp = sin_k_dox_x; - Scalar real_comp_phase = cos_k_dox_x*cos_phi - sin_k_dox_x*sin_phi; - Scalar imag_comp_phase = cos_k_dox_x*sin_phi + sin_k_dox_x*cos_phi; + //Scalar real_comp = cos_k_dot_x; + //Scalar imag_comp = sin_k_dot_x; + Scalar real_comp_phase = cos_k_dot_x*cos_phi - sin_k_dot_x*sin_phi; + Scalar imag_comp_phase = cos_k_dot_x*sin_phi + sin_k_dot_x*cos_phi; Vector force = (Vector){ ff_re.x*real_comp_phase - ff_im.x*imag_comp_phase, diff --git a/src/standalone/model/model_rk3.cc b/src/standalone/model/model_rk3.cc index 1084794..3900f0c 100644 --- a/src/standalone/model/model_rk3.cc +++ b/src/standalone/model/model_rk3.cc @@ -766,13 +766,13 @@ helical_forcing(ModelScalar magnitude, ModelVector k_force, ModelVector xx, Mode ModelScalar cos_phi = cosl(phi); ModelScalar sin_phi = sinl(phi); - ModelScalar cos_k_dox_x = cosl(dot(k_force, xx)); - ModelScalar sin_k_dox_x = sinl(dot(k_force, xx)); + ModelScalar cos_k_dot_x = cosl(dot(k_force, xx)); + ModelScalar sin_k_dot_x = sinl(dot(k_force, xx)); // Phase affect only the x-component - //Scalar real_comp = cos_k_dox_x; - //Scalar imag_comp = sin_k_dox_x; - ModelScalar real_comp_phase = cos_k_dox_x*cos_phi - sin_k_dox_x*sin_phi; - ModelScalar imag_comp_phase = cos_k_dox_x*sin_phi + sin_k_dox_x*cos_phi; + //Scalar real_comp = cos_k_dot_x; + //Scalar imag_comp = sin_k_dot_x; + ModelScalar real_comp_phase = cos_k_dot_x*cos_phi - sin_k_dot_x*sin_phi; + ModelScalar imag_comp_phase = cos_k_dot_x*sin_phi + sin_k_dot_x*cos_phi; ModelVector force = (ModelVector){ ff_re.x*real_comp_phase - ff_im.x*imag_comp_phase, From c38218da3baf2a93127106cf78c0df3bc1862db8 Mon Sep 17 00:00:00 2001 From: jpekkila Date: Wed, 7 Aug 2019 16:22:55 +0300 Subject: [PATCH 10/11] SRUN_COMMAND has to be exported for it to be available in autotest.sh --- scripts/autotest.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scripts/autotest.sh b/scripts/autotest.sh index 49f0b57..f8e20a5 100755 --- a/scripts/autotest.sh +++ b/scripts/autotest.sh @@ -5,10 +5,13 @@ # source ./sourceme.sh # autotest.sh # -# If you need slurm or to pass something before ./ac_run, set the variable +# If you need slurm or to pass something before ./ac_run, export the variable # SRUN_COMMAND before calling this script. # # F.ex. on Taito SRUN_COMMAND="srun --gres=gpu:k80:4 --mem=24000 -t 00:14:59 -p gputest --cpus-per-task 1 -n 1" +# export SRUN_COMMAND +# autotest.sh +echo "SRUN_COMMAND: " ${SRUN_COMMAND} results=() From 5432d20b1b15060c6a82a81b3cb2036c0b467bfe Mon Sep 17 00:00:00 2001 From: jpekkila Date: Wed, 7 Aug 2019 16:25:03 +0300 Subject: [PATCH 11/11] Removed a depreceated auto-optimization script. The optimization is done during acInit() at runtime instead --- scripts/auto_optimize.sh | 51 ---------------------------------------- 1 file changed, 51 deletions(-) delete mode 100755 scripts/auto_optimize.sh diff --git a/scripts/auto_optimize.sh b/scripts/auto_optimize.sh deleted file mode 100755 index c86fa63..0000000 --- a/scripts/auto_optimize.sh +++ /dev/null @@ -1,51 +0,0 @@ -#!/bin/bash - -# Run this in your build directory (cd build && ../scripts/auto_optimize.sh) -# Generates a ${BENCHMARK_FILE} which contains the threadblock dims and other -# constants used in the integration in addition to the time used. - -MAX_THREADS=1024 # Max size of the thread block, depends on hardware - -BENCHMARK_FILE="benchmark.out" -TBCONFCREATOR_SRC_PATH="../scripts/gen_rk3_threadblockconf.c" -TBCONFFILE_DST_PATH="../src/core/kernels" - -C_COMPILER_NAME="gcc" - -rm ${BENCHMARK_FILE} - -for (( tz=2; tz<=8; tz*=2)) -do -for (( ty=1; ty<=1; ty+=1)) -do -for (( tx=16; tx<=64; tx*=2)) -do - -if ( (${tx}*${ty}*${tz}) > ${MAX_THREADS}) -then break -fi - -for (( launch_bound=1; launch_bound<=8; launch_bound*=2)) -do -for (( elems_per_thread=1; elems_per_thread<=128; elems_per_thread*=2)) -do - # Generate the threadblock configuration - ${C_COMPILER_NAME} ${TBCONFCREATOR_SRC_PATH} -o gen_rk3_threadblockconf - ./gen_rk3_threadblockconf ${tx} ${ty} ${tz} ${elems_per_thread} ${launch_bound} - rm gen_rk3_threadblockconf - mv rk3_threadblock.conf ${TBCONFFILE_DST_PATH} - - # Compile and run the test build - cmake -DBUILD_DEBUG=OFF -DDOUBLE_PRECISION=OFF -DAUTO_OPTIMIZE=ON .. && make -j - #if ./ac_run -t; then - # echo Success - ./ac_run -b - #else - # echo fail! - #fi -done -done -done -done -done -