diff --git a/README.md b/README.md index 0f75b17..1632b47 100644 --- a/README.md +++ b/README.md @@ -2,13 +2,13 @@ # Astaroth - A Multi-GPU library for generic stencil computations -Astaroth is a single-node multi-GPU library for multiphysics and other problems, which involve stencil computations in a discrete mesh. It's licenced under the terms of the GNU General Public Licence, version 3, or later (see [LICENCE.txt](https://bitbucket.org/miikkavaisala/astaroth-code/src/master/astaroth_2.0/LICENCE.txt)). Astaroth ships with a domain-specific language, that can be used to translate high-level representation of the stencil computations into a heavily inlined GPU pipeline. +Astaroth is a single-node multi-GPU library for multiphysics and other problems, which involve stencil computations in a discrete mesh. It's licenced under the terms of the GNU General Public Licence, version 3, or later (see [LICENCE.txt](https://bitbucket.org/miikkavaisala/astaroth-code/src/master/astaroth_2.0/LICENCE.txt)). Astaroth ships with a domain-specific language that can be used to translate high-level representations of various stencil operations into efficient CUDA kernels. ## System requirements NVIDIA GPU with >= 3.0 compute capability. See https://en.wikipedia.org/wiki/CUDA#GPUs_supported. -## Building (3rd party libraries) +## Building (3rd party libraries for real-time visualization) 1. `cd 3rdparty` 1. `./setup_dependencies.sh` Note: this may take some time. @@ -60,6 +60,43 @@ Run `doxygen doxyfile` in astaroth_2.0 directory. The generated files can be fou If you have clang-format, you may run `scripts/fix_style.sh`. This script will recursively fix style of all the source files down from the current working directory. The script will ask for a confirmation before making any changes. ## Directory structure +TODO + +## Contributing + +0. **Do not break existing functionality.** Do not modify the interface functions declared in astaroth.h and device.cuh in any way. Bug fixes are exceptions. If you need new functionality, create a new function. + +0. **Do not rename or redefine variables or constants declared in astaroth.h** without consulting everyone involved with the project. + +0. **Ensure that the code compiles and the automated tests pass** by running `./ac_run -t` before pushing changes to master. If you want to implement a feature that consists of multiple commits, see Managing feature branches below. + +### Managing feature branches + +0. Ensure that you're on the latest version of master. `git checkout master && git pull` + +0. Create a feature branch with `git checkout -b `, f.ex. `git checkout -b forcingtests_2019-01-01` + +0. Do your commits in that branch until your new feature works + +0. Merge master with your feature branch `git merge master` + +0. Resolve the conflicts and test that the code compiles and still works by running `./ac_run -t` + +0. If everything is OK, commit your final changes to the feature branch and merge it to master `git commit && git checkout master && git merge && git push` + +0. Unless you really have to keep your feature branch around for historical/other reasons, remove it from remote by calling `git push origin --delete ` + +A flowchart is available at [doc/commitflowchart.png](https://bitbucket.org/jpekkila/astaroth/src/2d91df19dcb3/doc/commitflowchart.png?at=master). + +### About branches in general + +* Unused branches should not kept around after merging them into master in order to avoid cluttering the repository. + +* `git branch -a --merged` shows a list of branches that have been merged to master and are likely not needed any more. + +* `git push origin --delete ` deletes a remote branch while `git branch -d ` deletes a local branch + +* If you think that you have messed up and lost work, run `git reflog` which lists the latests commits. All work that has been committed should be accessible with the hashes listed by this command with `git checkout `. ## Coding style. diff --git a/acc/mhd_solver/stencil_process.sps b/acc/mhd_solver/stencil_process.sps index 39527e0..3547d5b 100644 --- a/acc/mhd_solver/stencil_process.sps +++ b/acc/mhd_solver/stencil_process.sps @@ -1,4 +1,4 @@ -#define LINDUCTION (1) +#define LMAGNETIC (1) #define LENTROPY (1) #define LTEMPERATURE (0) #define LGRAVITY (0) @@ -295,7 +295,7 @@ in Vector uu = (int3) {VTXBUF_UUX, VTXBUF_UUY, VTXBUF_UUZ}; out Vector out_uu = (int3) {VTXBUF_UUX,VTXBUF_UUY,VTXBUF_UUZ}; -#if LINDUCTION +#if LMAGNETIC in Vector aa = (int3) {VTXBUF_AX,VTXBUF_AY,VTXBUF_AZ}; out Vector out_aa = (int3) {VTXBUF_AX,VTXBUF_AY,VTXBUF_AZ}; #endif @@ -314,7 +314,7 @@ Kernel void solve(Scalar dt) { out_lnrho = rk3(out_lnrho, lnrho, continuity(uu, lnrho), dt); - #if LINDUCTION + #if LMAGNETIC out_aa = rk3(out_aa, aa, induction(uu, aa), dt); #endif diff --git a/acc/pseudodisk/stencil_process_gravx.sps b/acc/pseudodisk/stencil_process_gravx.sps index 32d980a..0ccb4f9 100644 --- a/acc/pseudodisk/stencil_process_gravx.sps +++ b/acc/pseudodisk/stencil_process_gravx.sps @@ -1,4 +1,4 @@ -#define LINDUCTION (1) +#define LMAGNETIC (1) #define LENTROPY (1) @@ -200,7 +200,7 @@ in Vector uu = (int3) {VTXBUF_UUX, VTXBUF_UUY, VTXBUF_UUZ}; out Vector out_uu = (int3) {VTXBUF_UUX,VTXBUF_UUY,VTXBUF_UUZ}; -#if LINDUCTION +#if LMAGNETIC in Vector aa = (int3) {VTXBUF_AX,VTXBUF_AY,VTXBUF_AZ}; out Vector out_aa = (int3) {VTXBUF_AX,VTXBUF_AY,VTXBUF_AZ}; #endif @@ -214,7 +214,7 @@ Kernel void solve(Scalar dt) { WRITE(out_lnrho, RK3(out_lnrho, lnrho, continuity(uu, lnrho), dt)); - #if LINDUCTION + #if LMAGNETIC WRITE(out_aa, RK3(out_aa, aa, induction(uu, aa), dt)); #endif diff --git a/acc/pseudodisk/stencil_process_isotherm_gravx.sps b/acc/pseudodisk/stencil_process_isotherm_gravx.sps index f79b7ff..9584774 100644 --- a/acc/pseudodisk/stencil_process_isotherm_gravx.sps +++ b/acc/pseudodisk/stencil_process_isotherm_gravx.sps @@ -117,7 +117,7 @@ out Scalar out_lnrho = VTXBUF_LNRHO; in Vector uu = (int3) {VTXBUF_UUX, VTXBUF_UUY, VTXBUF_UUZ}; out Vector out_uu = (int3) {VTXBUF_UUX,VTXBUF_UUY,VTXBUF_UUZ}; -#if LINDUCTION +#if LMAGNETIC in Vector aa = (int3) {VTXBUF_AX,VTXBUF_AY,VTXBUF_AZ}; out Vector out_aa = (int3) {VTXBUF_AX,VTXBUF_AY,VTXBUF_AZ}; #endif @@ -126,7 +126,7 @@ Kernel void solve(Scalar dt) { WRITE(out_lnrho, RK3(out_lnrho, lnrho, continuity(uu, lnrho), dt)); - #if LINDUCTION + #if LMAGNETIC WRITE(out_aa, RK3(out_aa, aa, induction(uu, aa), dt)); #endif diff --git a/acc/pseudodisk/stencil_process_isotherm_linegrav.sps b/acc/pseudodisk/stencil_process_isotherm_linegrav.sps index a2b83f1..9f90e7c 100644 --- a/acc/pseudodisk/stencil_process_isotherm_linegrav.sps +++ b/acc/pseudodisk/stencil_process_isotherm_linegrav.sps @@ -122,7 +122,7 @@ out Scalar out_lnrho = VTXBUF_LNRHO; in Vector uu = (int3) {VTXBUF_UUX, VTXBUF_UUY, VTXBUF_UUZ}; out Vector out_uu = (int3) {VTXBUF_UUX,VTXBUF_UUY,VTXBUF_UUZ}; -#if LINDUCTION +#if LMAGNETIC in Vector aa = (int3) {VTXBUF_AX,VTXBUF_AY,VTXBUF_AZ}; out Vector out_aa = (int3) {VTXBUF_AX,VTXBUF_AY,VTXBUF_AZ}; #endif @@ -131,7 +131,7 @@ Kernel void solve(Scalar dt) { WRITE(out_lnrho, RK3(out_lnrho, lnrho, continuity(uu, lnrho), dt)); - #if LINDUCTION + #if LMAGNETIC WRITE(out_aa, RK3(out_aa, aa, induction(uu, aa), dt)); #endif diff --git a/acc/pseudodisk/stencil_process_linegrav.sps b/acc/pseudodisk/stencil_process_linegrav.sps index ecc6c99..e42e680 100644 --- a/acc/pseudodisk/stencil_process_linegrav.sps +++ b/acc/pseudodisk/stencil_process_linegrav.sps @@ -1,4 +1,4 @@ -#define LINDUCTION (1) +#define LMAGNETIC (1) #define LENTROPY (1) @@ -205,7 +205,7 @@ in Vector uu = (int3) {VTXBUF_UUX, VTXBUF_UUY, VTXBUF_UUZ}; out Vector out_uu = (int3) {VTXBUF_UUX,VTXBUF_UUY,VTXBUF_UUZ}; -#if LINDUCTION +#if LMAGNETIC in Vector aa = (int3) {VTXBUF_AX,VTXBUF_AY,VTXBUF_AZ}; out Vector out_aa = (int3) {VTXBUF_AX,VTXBUF_AY,VTXBUF_AZ}; #endif @@ -219,7 +219,7 @@ Kernel void solve(Scalar dt) { WRITE(out_lnrho, RK3(out_lnrho, lnrho, continuity(uu, lnrho), dt)); - #if LINDUCTION + #if LMAGNETIC WRITE(out_aa, RK3(out_aa, aa, induction(uu, aa), dt)); #endif diff --git a/acc/samples/common_header.h b/acc/samples/common_header.h index 14eed0c..7168873 100644 --- a/acc/samples/common_header.h +++ b/acc/samples/common_header.h @@ -76,7 +76,7 @@ extern "C" { // L-prefix inherited from the old Astaroth, no idea what it means // MV: L means a Logical switch variale, something having true of false value. #define LFORCING (0) // Note: forcing is disabled currently in the files generated by acc (compiler of our DSL) -#define LINDUCTION (1) +#define LMAGNETIC (1) #define LENTROPY (1) #define LTEMPERATURE (0) @@ -185,13 +185,13 @@ extern "C" { FUNC(VTXBUF_UUZ), \ // FUNC(VTXBUF_DYE), -#if LINDUCTION -#define AC_FOR_INDUCTION_VTXBUF_HANDLES(FUNC)\ +#if LMAGNETIC +#define AC_FOR_MAGNETIC_VTXBUF_HANDLES(FUNC)\ FUNC(VTXBUF_AX), \ FUNC(VTXBUF_AY), \ FUNC(VTXBUF_AZ), #else -#define AC_FOR_INDUCTION_VTXBUF_HANDLES(FUNC) +#define AC_FOR_MAGNETIC_VTXBUF_HANDLES(FUNC) #endif #if LENTROPY @@ -210,7 +210,7 @@ extern "C" { #define AC_FOR_VTXBUF_HANDLES(FUNC)\ AC_FOR_HYDRO_VTXBUF_HANDLES(FUNC)\ - AC_FOR_INDUCTION_VTXBUF_HANDLES(FUNC)\ + AC_FOR_MAGNETIC_VTXBUF_HANDLES(FUNC)\ AC_FOR_ENTROPY_VTXBUF_HANDLES(FUNC)\ AC_FOR_TEMPERATURE_VTXBUF_HANDLES(FUNC) // clang-format on diff --git a/doc/commitflowchart.drawio b/doc/commitflowchart.drawio new file mode 100644 index 0000000..e62eedb --- /dev/null +++ b/doc/commitflowchart.drawio @@ -0,0 +1 @@ +7VxbU+M2FP4tfcj0Ccb3xI8LgbYzXWZn6MyWfemIWMQqtpWRFZLsr69kS77JCSY4lht4AOJjy5bP9TuXMLGv4+1vBKzCrziA0cQygu3Enk8sy7R9h/3hlF1O8a1pTlgSFIiLSsI9+gkF0RDUNQpgWruQYhxRtKoTFzhJ4ILWaIAQvKlf9oSj+lNXYAkVwv0CRCr1OwpomFNnrlHSf4doGconm4Y4EwN5sSCkIQjwpkKybyb2NcGY5p/i7TWMOPMkX/J1t3vOFhsjMKFdFiTfbx9WN8atl/74K/bv7iL488eFEEZKd/KFYcDeXxwmOGF/rgheJwHktzHYESY0xEucgOhPjFeMaDLiv5DSnZAeWFPMSCGNI3EWbhH9Wyznnx8q9Pm2cmK+kwcJJbtsyeXUlccP1ZPluuxILszfh7/EXjYJUorXZAEP8MYS6gbIEtID13mFMJkVQBxDth+2jsAIUPRS3wcQ6rgsrislxj4Iob1BgNJOBpKgeYwErbFL0NcpQbHJFxCtxZOuCATPaeZDUgrJxL5VZExCHD+u2VauNiGi8H4FMkZsmBOuC07cHBIKt4f5qb6/WGBLDyZceOGbNxWHKEhhxRdKWu8c8/yDOl+q901J7dGJXbrHGMGbbAAmwRcevNjhIgJpihY58RZFdcH2YiZeV0fn6rQTT7GTr+AZcrVk75chgDhGlJtNgAiDAtEuwwqlGSk2VGiEOYgdmbO6HRV2VbEj02oxJO9UhjRTWHqHFTaxF6Z1XqSU4Gd4jSNMyhDzxFSzQQIRWiZchRmDuBu74uxDDFt9ESdiFASZcbYxv26wPfDfchp+bKry32lhv3Uq9puqSg8SzHt0He+NnGLpN4zYkwtJOQ1LcZyGCHJXJVY1pFBs43jB+IpcrllIptzZAPaTwA37/UhAsgi1exXHGptXMU2FfQ8shTtbv+KMzq9MFQEsETevRQgXz3jNP148TvjzPRBznkRcEiKOSr0Wp5a04FKvwovgE9UuOnfq1o3HUUVXwN1BZDdQCBgSzPYYbkyBWF6FqvvkPlBSrgKr75g8MwpOSsB6MIywp6BVus8QKpIE6Sqvez2hLdeIU3g0p8UsBs34rNlBqziDjK9HI3m37ncDZd50WFBmqbBinnlDvdWRpq24nmZbsT8a/PIbetnirAaFX7bz6aw6Oyu7a43WtnVGdLnNavUJsh1PyuoS0zembkV0H2l66LUkJ8Omh7beWF4xjtJUXjOPmnGUtjKK6m1uGB0sKEel78UF7M3ArnLBisf7dD9ssBvueWY02pSvXC9hR6ly+Q56xRaOe+4ue6Q66ej16nYLpORYycjKJSlF2T02PHcbGcycaoeZapH0nJsHtj+9rNeKprrLfI4K9PMyX9xAJmdau2vmo34Lshm0dieHgj6jSD9RxOkcRbQOcDhqFJG5QSMXGFU7upkZzLRnBo6jMHJieVlvIkAv7GPWixCtjKzbL08/EnlWUtgGKmv236bsiEixvGm9dLVv6KS8c8erdRp2v8mZuv5G26ZVdb0W1XVPprptrXxFglkdbiBBjQQ3NatzM93VOVdvde6Y8oM55vKD43cM0q7Wlpx7eNL5E5udTOxax6MdNUtt8ctzdo8MrBXxVSC3o2N1gfg2IUxECQEly4noALNIzusJnULBoKO+jZzKbwmsltHirk37ZP5azXJVS9bZLG+OdfoSnLyGo83TxThL4dkDXv9KOFINGEz4ZWQcbGAEf9bCwVatM07GQVfh4FlXt8w6oDaNlhn/QWGap7eWcgxMa8TrArWNIl67XYf8NcM0NY+awwjStmLKE8FxtrEY8/MjK6eYpvZ6iquOoX2oSRDTtNRAMmyyqcLPahXHwAQtEQeEFxeBVPM+h3J7YGoxz1QwtSU6t87IngwSerbW2HB2uZxndYwNls7QIHdZsaU/uBaCbHogq6xvKkO25Rc1niCga6JGCK2Y1zasRj+xpaFrSlhWA72S2D+Hz35wbWC7cjvalae1RuKpuc5YpyCK2sNYBtO9sx9MH9hk/h9piqemKd+qdcPKl5HH1PJtlqo8Q3eK4qlfVfvsm2Wezq2LytXdN5uq8OuS3Qgs/iHrLIOhJxDGKLrNll8Hak5LYj9tAWqnE4U6cPJBRNFs/E9bZr56EgU7LP83Uz5nXP6HK/vmPw== \ No newline at end of file diff --git a/doc/commitflowchart.png b/doc/commitflowchart.png new file mode 100644 index 0000000..8b6ccc2 Binary files /dev/null and b/doc/commitflowchart.png differ diff --git a/include/astaroth.h b/include/astaroth.h index efc2ad1..45159f5 100644 --- a/include/astaroth.h +++ b/include/astaroth.h @@ -67,17 +67,18 @@ extern "C" { // logical switches #include "user.h" +// clang-format off #ifndef USER_PROVIDED_DEFINES - #define STENCIL_ORDER (6) - #define NGHOST (STENCIL_ORDER/2) - #define LHYDRO (1) - #define LDENSITY (1) - #define LFORCING (1) - #define LINDUCTION (1) - #define LENTROPY (1) - #define LTEMPERATURE (0) - #define LMAGNETIC LINDUCTION + #define STENCIL_ORDER (6) + #define NGHOST (STENCIL_ORDER / 2) + #define LDENSITY (1) + #define LHYDRO (1) + #define LMAGNETIC (1) + #define LENTROPY (1) + #define LTEMPERATURE (0) + #define LFORCING (1) #endif +// clang-format on #define AC_THERMAL_CONDUCTIVITY (AcReal(0.001)) // TODO: make an actual config parameter @@ -193,54 +194,51 @@ extern "C" { * ============================================================================= */ // clang-format off -#ifdef LHYDRO -#define AC_FOR_HYDRO_VTXBUF_HANDLES(FUNC) \ - FUNC(VTXBUF_UUX), \ - FUNC(VTXBUF_UUY), \ - FUNC(VTXBUF_UUZ), -#else -#define AC_FOR_HYDRO_VTXBUF_HANDLES(FUNC) -#endif - -#ifdef LDENSITY +#ifdef LDENSITY #define AC_FOR_DENSITY_VTXBUF_HANDLES(FUNC) \ - FUNC(VTXBUF_LNRHO), + FUNC(VTXBUF_LNRHO), #else #define AC_FOR_DENSITY_VTXBUF_HANDLES(FUNC) #endif -#ifdef LENTROPY +#ifdef LHYDRO +#define AC_FOR_HYDRO_VTXBUF_HANDLES(FUNC) \ + FUNC(VTXBUF_UUX), \ + FUNC(VTXBUF_UUY), \ + FUNC(VTXBUF_UUZ), +#else +#define AC_FOR_HYDRO_VTXBUF_HANDLES(FUNC) +#endif + +#ifdef LMAGNETIC +#define AC_FOR_MAGNETIC_VTXBUF_HANDLES(FUNC) \ + FUNC(VTXBUF_AX), \ + FUNC(VTXBUF_AY), \ + FUNC(VTXBUF_AZ), +#else +#define AC_FOR_MAGNETIC_VTXBUF_HANDLES(FUNC) +#endif + +#ifdef LENTROPY #define AC_FOR_ENTROPY_VTXBUF_HANDLES(FUNC) \ FUNC(VTXBUF_ENTROPY), #else #define AC_FOR_ENTROPY_VTXBUF_HANDLES(FUNC) #endif -#ifdef LMAGNETIC -#define AC_FOR_INDUCTION_VTXBUF_HANDLES(FUNC) \ - FUNC(VTXBUF_AX), \ - FUNC(VTXBUF_AY), \ - FUNC(VTXBUF_AZ), +//MR: Temperature must not have an additional variable slot, but should sit on the +// same as entropy. +#if LTEMPERATURE + #define AC_FOR_TEMPERATURE_VTXBUF_HANDLES(FUNC)\ + FUNC(VTXBUF_TEMPERATURE), #else -#define AC_FOR_INDUCTION_VTXBUF_HANDLES(FUNC) + #define AC_FOR_TEMPERATURE_VTXBUF_HANDLES(FUNC) #endif #define AC_FOR_VTXBUF_HANDLES(FUNC) AC_FOR_HYDRO_VTXBUF_HANDLES(FUNC) \ AC_FOR_DENSITY_VTXBUF_HANDLES(FUNC) \ AC_FOR_ENTROPY_VTXBUF_HANDLES(FUNC) \ - AC_FOR_INDUCTION_VTXBUF_HANDLES(FUNC) \ - -//MR: Temperature must not have an additional variable slot, but should sit on the -// same as entropy. -#ifndef USER_PROVIDED - #if LTEMPERATURE - #define AC_FOR_TEMPERATURE_VTXBUF_HANDLES(FUNC)\ - FUNC(VTXBUF_TEMPERATURE), - #else - #define AC_FOR_TEMPERATURE_VTXBUF_HANDLES(FUNC) - #endif -#endif - + AC_FOR_MAGNETIC_VTXBUF_HANDLES(FUNC) \ // clang-format on /* @@ -248,19 +246,21 @@ extern "C" { * Single/double precision switch * ============================================================================= */ +// clang-format off #if AC_DOUBLE_PRECISION == 1 -typedef double AcReal; -typedef double3 AcReal3; -#define AC_REAL_MAX (DBL_MAX) -#define AC_REAL_MIN (DBL_MIN) -#define AC_REAL_EPSILON (DBL_EPSILON) + typedef double AcReal; + typedef double3 AcReal3; + #define AC_REAL_MAX (DBL_MAX) + #define AC_REAL_MIN (DBL_MIN) + #define AC_REAL_EPSILON (DBL_EPSILON) #else -typedef float AcReal; -typedef float3 AcReal3; -#define AC_REAL_MAX (FLT_MAX) -#define AC_REAL_MIN (FLT_MIN) -#define AC_REAL_EPSILON (FLT_EPSILON) + typedef float AcReal; + typedef float3 AcReal3; + #define AC_REAL_MAX (FLT_MAX) + #define AC_REAL_MIN (FLT_MIN) + #define AC_REAL_EPSILON (FLT_EPSILON) #endif +// clang-format on typedef struct { AcReal3 row[3]; @@ -296,7 +296,7 @@ typedef enum { RTYPE_MAX, RTYPE_MIN, RTYPE_RMS, RTYPE_RMS_EXP, NUM_REDUCTION_TYP typedef enum { AC_FOR_INT_PARAM_TYPES(AC_GEN_ID), NUM_INT_PARAM_TYPES } AcIntParam; typedef enum { AC_FOR_REAL_PARAM_TYPES(AC_GEN_ID), NUM_REAL_PARAM_TYPES } AcRealParam; -//typedef enum { AC_FOR_VEC_PARAM_TYPES(AC_GEN_ID), NUM_VEC_PARAM_TYPES } AcVecParam; +// typedef enum { AC_FOR_VEC_PARAM_TYPES(AC_GEN_ID), NUM_VEC_PARAM_TYPES } AcVecParam; extern const char* intparam_names[]; // Defined in astaroth.cu extern const char* realparam_names[]; // Defined in astaroth.cu @@ -304,7 +304,7 @@ extern const char* realparam_names[]; // Defined in astaroth.cu typedef struct { int int_params[NUM_INT_PARAM_TYPES]; AcReal real_params[NUM_REAL_PARAM_TYPES]; - //AcReal* vec_params[NUM_VEC_PARAM_TYPES]; + // AcReal* vec_params[NUM_VEC_PARAM_TYPES]; } AcMeshInfo; /* @@ -418,35 +418,3 @@ AcResult acForcingVec(const AcReal forcing_magnitude, const AcReal3 k_force, con #ifdef __cplusplus } #endif - -/* - * ============================================================================= - * Notes - * ============================================================================= - */ -/* -typedef enum { - VTX_BUF_LNRHO, - VTX_BUF_UUX, - VTX_BUF_UUY, - VTX_BUF_UUZ, - NUM_VERTEX_BUFFER_HANDLES -} VertexBufferHandle - -// LNRHO etc -typedef struct { - AcReal* data; -} VertexBuffer; - -// Host -typedef struct { - VertexBuffer vertex_buffers[NUM_VERTEX_BUFFER_HANDLES]; - MeshInfo info; -} Mesh; - -// Device -typedef struct { - VertexBuffer in[NUM_VERTEX_BUFFER_HANDLES]; - VertexBuffer out[NUM_VERTEX_BUFFER_HANDLES]; -} VertexBufferArray; -*/ diff --git a/include/user.h b/include/user.h index c2075b4..2d4e33a 100644 --- a/include/user.h +++ b/include/user.h @@ -1,3 +1,4 @@ +// clang-format off #ifdef PENCIL_ASTAROTH #include "../cparam.inc_c.h" @@ -6,11 +7,16 @@ #include "PC_moduleflags.h" #define CONFIG_PATH #define AC_MULTIGPU_ENABLED (false) - #ifdef DOUBLE_PRECISION + #ifdef DOUBLE_PRECISION #define AC_DOUBLE_PRECISION 1 #else #define AC_DOUBLE_PRECISION 0 #endif + + #define LENTROPY (1) // TODO above + #define LFORCING (1) // TODO above + #define STENCIL_ORDER (6) // nghost is not 1, 2 or 3 (as it is not fetched from fortran yet). This causes the compilation to fail. TODO remove this line + #define USER_PROVIDED_DEFINES #endif - +// clang-format on diff --git a/src/core/astaroth.cu b/src/core/astaroth.cu index 007554e..a0cf7ae 100644 --- a/src/core/astaroth.cu +++ b/src/core/astaroth.cu @@ -77,7 +77,7 @@ acCheckDeviceAvailability(void) { int device_count; // Separate from num_devices to avoid side effects ERRCHK_CUDA_ALWAYS(cudaGetDeviceCount(&device_count)); - if (device_count > 0) + if (device_count > 0) return AC_SUCCESS; else return AC_FAILURE; @@ -120,12 +120,14 @@ acInit(const AcMeshInfo& config) ERRCHK_ALWAYS(subgrid.n.y >= STENCIL_ORDER); ERRCHK_ALWAYS(subgrid.n.z >= STENCIL_ORDER); +#if VERBOSE_PRINTING // clang-format off printf("Grid m "); printInt3(grid.m); printf("\n"); printf("Grid n "); printInt3(grid.n); printf("\n"); printf("Subrid m "); printInt3(subgrid.m); printf("\n"); printf("Subrid n "); printInt3(subgrid.n); printf("\n"); // clang-format on +#endif // Initialize the devices for (int i = 0; i < num_devices; ++i) { @@ -271,8 +273,7 @@ AcResult acIntegrateStep(const int& isubstep, const AcReal& dt) { const int3 start = (int3){NGHOST, NGHOST, NGHOST}; - const int3 end = (int3){NGHOST + subgrid.n.x, NGHOST + subgrid.n.y, - NGHOST + subgrid.n.z}; + const int3 end = (int3){NGHOST + subgrid.n.x, NGHOST + subgrid.n.y, NGHOST + subgrid.n.z}; for (int i = 0; i < num_devices; ++i) { rkStep(devices[i], STREAM_PRIMARY, isubstep, start, end, dt); } diff --git a/src/core/device.cu b/src/core/device.cu index 6df10f7..dda8494 100644 --- a/src/core/device.cu +++ b/src/core/device.cu @@ -42,6 +42,10 @@ __constant__ Grid globalGrid; #define DEVICE_1D_COMPDOMAIN_IDX(i, j, k) ((i) + (j)*DCONST_INT(AC_nx) + (k)*DCONST_INT(AC_nxy)) #include "kernels/kernels.cuh" +#if PACKED_DATA_TRANSFERS // Defined in device.cuh +// #include "kernels/pack_unpack.cuh" +#endif + struct device_s { int id; AcMeshInfo local_config; @@ -53,6 +57,11 @@ struct device_s { VertexBufferArray vba; AcReal* reduce_scratchpad; AcReal* reduce_result; + +#if PACKED_DATA_TRANSFERS +// Declare memory for buffers needed for packed data transfers here +// AcReal* data_packing_buffer; +#endif }; AcResult @@ -154,6 +163,10 @@ createDevice(const int id, const AcMeshInfo device_config, Device* device_handle cudaMalloc(&device->reduce_scratchpad, AC_VTXBUF_COMPDOMAIN_SIZE_BYTES(device_config))); ERRCHK_CUDA_ALWAYS(cudaMalloc(&device->reduce_result, sizeof(AcReal))); +#if PACKED_DATA_TRANSFERS +// Allocate data required for packed transfers here (cudaMalloc) +#endif + // Device constants ERRCHK_CUDA_ALWAYS(cudaMemcpyToSymbol(d_mesh_info, &device_config, sizeof(device_config), 0, cudaMemcpyHostToDevice)); @@ -184,6 +197,10 @@ destroyDevice(Device device) cudaFree(device->reduce_scratchpad); cudaFree(device->reduce_result); +#if PACKED_DATA_TRANSFERS +// Free data required for packed tranfers here (cudaFree) +#endif + // Concurrency for (int i = 0; i < NUM_STREAM_TYPES; ++i) cudaStreamDestroy(device->streams[i]); @@ -373,3 +390,7 @@ loadGlobalGrid(const Device device, const Grid grid) cudaMemcpyToSymbol(globalGrid, &grid, sizeof(grid), 0, cudaMemcpyHostToDevice)); return AC_SUCCESS; } + +#if PACKED_DATA_TRANSFERS +// Functions for calling packed data transfers +#endif diff --git a/src/core/device.cuh b/src/core/device.cuh index 28dbd50..1e9becc 100644 --- a/src/core/device.cuh +++ b/src/core/device.cuh @@ -98,3 +98,8 @@ AcResult loadDeviceConstant(const Device device, const AcRealParam param, const /** */ AcResult loadGlobalGrid(const Device device, const Grid grid); + +// #define PACKED_DATA_TRANSFERS (1) %JP: placeholder for optimized ghost zone packing and transfers +#if PACKED_DATA_TRANSFERS +// Declarations used for packed data transfers +#endif diff --git a/src/core/kernels/kernels.cuh b/src/core/kernels/kernels.cuh index 97d0803..1ff9051 100644 --- a/src/core/kernels/kernels.cuh +++ b/src/core/kernels/kernels.cuh @@ -426,7 +426,7 @@ cross(const AcReal3& a, const AcReal3& b) } static __host__ __device__ __forceinline__ bool -is_valid(const AcReal a) +is_valid(const AcReal& a) { return !isnan(a) && !isinf(a); } @@ -546,7 +546,7 @@ normalized(const AcReal3& vec) // Sinusoidal forcing // https://arxiv.org/pdf/1704.04676.pdf // NOTE: This method of forcing is depracated. However, it will remain in here -// until a corresponding scheme exists in the new code. +// until a corresponding scheme exists in the new code. __constant__ AcReal3 forcing_vec; __constant__ AcReal forcing_phi; static __device__ __forceinline__ AcReal3 diff --git a/src/standalone/config_loader.cc b/src/standalone/config_loader.cc index 36e33e3..b89adb5 100644 --- a/src/standalone/config_loader.cc +++ b/src/standalone/config_loader.cc @@ -152,7 +152,7 @@ update_config(AcMeshInfo* config) config->real_params[AC_G_CONST]; config->real_params[AC_sq2GM_star] = AcReal(sqrt(AcReal(2) * config->real_params[AC_GM_star])); -#if VERBOSE_PRINTING +#if VERBOSE_PRINTING // Defined in astaroth.h printf("###############################################################\n"); printf("Config dimensions recalculated:\n"); print(*config); diff --git a/src/standalone/model/model_rk3.cc b/src/standalone/model/model_rk3.cc index 547cc63..dd04bcf 100644 --- a/src/standalone/model/model_rk3.cc +++ b/src/standalone/model/model_rk3.cc @@ -745,7 +745,7 @@ solve_alpha_step(const int step_number, const ModelScalar dt, const int i, const ModelScalar rate_of_change[NUM_VTXBUF_HANDLES] = {0}; rate_of_change[VTXBUF_LNRHO] = continuity(uu, lnrho); -#if LINDUCTION +#if LMAGNETIC const ModelVectorData aa = read_data(i, j, k, in.vertex_buffer, (int3){VTXBUF_AX, VTXBUF_AY, VTXBUF_AZ}); const ModelVector aa_res = induction(uu, aa);