Merge branch 'master' into sink_20190723

Hopefully the merge isssues were resolved.
This commit is contained in:
Miikka Vaisala
2019-09-02 11:58:48 +08:00
29 changed files with 1364 additions and 592 deletions

View File

@@ -8,17 +8,21 @@ FILENAME="${FULL_NAME%.*}"
EXTENSION="${FULL_NAME##*.}" EXTENSION="${FULL_NAME##*.}"
if [ "${EXTENSION}" = "sas" ]; then if [ "${EXTENSION}" = "sas" ]; then
echo "Generating stencil assembly stage ${FILENAME}.sas -> stencil_assembly.cuh"
COMPILE_FLAGS="-sas" # Generate stencil assembly stage COMPILE_FLAGS="-sas" # Generate stencil assembly stage
CUH_FILENAME="stencil_assembly.cuh" CUH_FILENAME="stencil_assembly.cuh"
echo "Generating stencil assembly stage ${FILENAME}.sas -> ${CUH_FILENAME}"
elif [ "${EXTENSION}" = "sps" ]; then elif [ "${EXTENSION}" = "sps" ]; then
echo "Generating stencil processing stage: ${FILENAME}.sps -> stencil_process.cuh"
COMPILE_FLAGS="-sps" # Generate stencil processing stage COMPILE_FLAGS="-sps" # Generate stencil processing stage
CUH_FILENAME="stencil_process.cuh" CUH_FILENAME="stencil_process.cuh"
echo "Generating stencil processing stage: ${FILENAME}.sps -> ${CUH_FILENAME}"
elif [ "${EXTENSION}" = "sdh" ]; then
COMPILE_FLAGS="-sdh" # Generate stencil definition header
CUH_FILENAME="stencil_defines.h"
echo "Generating stencil definition header: ${FILENAME}.sdh -> ${CUH_FILENAME}"
else else
echo "Error: unknown extension" ${EXTENSION} "of file" ${FULL_NAME} echo "Error: unknown extension" ${EXTENSION} "of file" ${FULL_NAME}
echo "Extension should be either .sas or .sps" echo "Extension should be either .sas, .sps or .sdh"
exit exit
fi fi
${ACC_DIR}/preprocess.sh $2 $1 | ${ACC_DIR}/build/acc ${COMPILE_FLAGS} > ${CUH_FILENAME} ${ACC_DIR}/preprocess.sh $1 | ${ACC_DIR}/build/acc ${COMPILE_FLAGS} > ${CUH_FILENAME}

5
acc/mhd_solver/.gitignore vendored Normal file
View File

@@ -0,0 +1,5 @@
build
testbin
# Except this file
!.gitignore

View File

@@ -1,3 +1,5 @@
#include "stencil_definition.sdh"
Preprocessed Scalar Preprocessed Scalar
value(in ScalarField vertex) value(in ScalarField vertex)
{ {
@@ -7,9 +9,7 @@ value(in ScalarField vertex)
Preprocessed Vector Preprocessed Vector
gradient(in ScalarField vertex) gradient(in ScalarField vertex)
{ {
return (Vector){derx(vertexIdx, vertex), return (Vector){derx(vertexIdx, vertex), dery(vertexIdx, vertex), derz(vertexIdx, vertex)};
dery(vertexIdx, vertex),
derz(vertexIdx, vertex)};
} }
#if LUPWD #if LUPWD
@@ -17,46 +17,46 @@ gradient(in ScalarField vertex)
Preprocessed Scalar Preprocessed Scalar
der6x_upwd(in ScalarField vertex) der6x_upwd(in ScalarField vertex)
{ {
Scalar inv_ds = DCONST_REAL(AC_inv_dsx); Scalar inv_ds = AC_inv_dsx;
return (Scalar){ Scalar(1.0/60.0)*inv_ds* ( return (Scalar){Scalar(1.0 / 60.0) * inv_ds *
- Scalar(20.0)* vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z] (-Scalar(20.0) * vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z] +
+ Scalar(15.0)*(vertex[vertexIdx.x+1, vertexIdx.y, vertexIdx.z] Scalar(15.0) * (vertex[vertexIdx.x + 1, vertexIdx.y, vertexIdx.z] +
+ vertex[vertexIdx.x-1, vertexIdx.y, vertexIdx.z]) vertex[vertexIdx.x - 1, vertexIdx.y, vertexIdx.z]) -
- Scalar( 6.0)*(vertex[vertexIdx.x+2, vertexIdx.y, vertexIdx.z] Scalar(6.0) * (vertex[vertexIdx.x + 2, vertexIdx.y, vertexIdx.z] +
+ vertex[vertexIdx.x-2, vertexIdx.y, vertexIdx.z]) vertex[vertexIdx.x - 2, vertexIdx.y, vertexIdx.z]) +
+ vertex[vertexIdx.x+3, vertexIdx.y, vertexIdx.z] vertex[vertexIdx.x + 3, vertexIdx.y, vertexIdx.z] +
+ vertex[vertexIdx.x-3, vertexIdx.y, vertexIdx.z])}; vertex[vertexIdx.x - 3, vertexIdx.y, vertexIdx.z])};
} }
Preprocessed Scalar Preprocessed Scalar
der6y_upwd(in ScalarField vertex) der6y_upwd(in ScalarField vertex)
{ {
Scalar inv_ds = DCONST_REAL(AC_inv_dsy); Scalar inv_ds = AC_inv_dsy;
return (Scalar){ Scalar(1.0/60.0)*inv_ds* ( return (Scalar){Scalar(1.0 / 60.0) * inv_ds *
-Scalar( 20.0)* vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z] (-Scalar(20.0) * vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z] +
+Scalar( 15.0)*(vertex[vertexIdx.x, vertexIdx.y+1, vertexIdx.z] Scalar(15.0) * (vertex[vertexIdx.x, vertexIdx.y + 1, vertexIdx.z] +
+ vertex[vertexIdx.x, vertexIdx.y-1, vertexIdx.z]) vertex[vertexIdx.x, vertexIdx.y - 1, vertexIdx.z]) -
-Scalar( 6.0)*(vertex[vertexIdx.x, vertexIdx.y+2, vertexIdx.z] Scalar(6.0) * (vertex[vertexIdx.x, vertexIdx.y + 2, vertexIdx.z] +
+ vertex[vertexIdx.x, vertexIdx.y-2, vertexIdx.z]) vertex[vertexIdx.x, vertexIdx.y - 2, vertexIdx.z]) +
+ vertex[vertexIdx.x, vertexIdx.y+3, vertexIdx.z] vertex[vertexIdx.x, vertexIdx.y + 3, vertexIdx.z] +
+ vertex[vertexIdx.x, vertexIdx.y-3, vertexIdx.z])}; vertex[vertexIdx.x, vertexIdx.y - 3, vertexIdx.z])};
} }
Preprocessed Scalar Preprocessed Scalar
der6z_upwd(in ScalarField vertex) der6z_upwd(in ScalarField vertex)
{ {
Scalar inv_ds = DCONST_REAL(AC_inv_dsz); Scalar inv_ds = AC_inv_dsz;
return (Scalar){ Scalar(1.0/60.0)*inv_ds* ( return (Scalar){Scalar(1.0 / 60.0) * inv_ds *
-Scalar( 20.0)* vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z] (-Scalar(20.0) * vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z] +
+Scalar( 15.0)*(vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z+1] Scalar(15.0) * (vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z + 1] +
+ vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z-1]) vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z - 1]) -
-Scalar( 6.0)*(vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z+2] Scalar(6.0) * (vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z + 2] +
+ vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z-2]) vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z - 2]) +
+ vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z+3] vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z + 3] +
+ vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z-3])}; vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z - 3])};
} }
#endif #endif
@@ -66,7 +66,8 @@ hessian(in ScalarField vertex)
{ {
Matrix hessian; Matrix hessian;
hessian.row[0] = (Vector){derxx(vertexIdx, vertex), derxy(vertexIdx, vertex), derxz(vertexIdx, vertex)}; hessian.row[0] = (Vector){derxx(vertexIdx, vertex), derxy(vertexIdx, vertex),
derxz(vertexIdx, vertex)};
hessian.row[1] = (Vector){hessian.row[0].y, deryy(vertexIdx, vertex), deryz(vertexIdx, vertex)}; hessian.row[1] = (Vector){hessian.row[0].y, deryy(vertexIdx, vertex), deryz(vertexIdx, vertex)};
hessian.row[2] = (Vector){hessian.row[0].z, hessian.row[1].z, derzz(vertexIdx, vertex)}; hessian.row[2] = (Vector){hessian.row[0].z, hessian.row[1].z, derzz(vertexIdx, vertex)};

View File

@@ -1,181 +0,0 @@
/*
Copyright (C) 2014-2019, Johannes Pekkilae, Miikka Vaeisalae.
This file is part of Astaroth.
Astaroth is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Astaroth is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
/*
* =============================================================================
* Logical switches
* =============================================================================
*/
#define STENCIL_ORDER (6)
#define NGHOST (STENCIL_ORDER / 2)
#define LDENSITY (1)
#define LHYDRO (1)
#define LMAGNETIC (1)
#define LENTROPY (0)
#define LTEMPERATURE (0)
#define LFORCING (0)
#define LUPWD (1)
#define LSINK (1)
#define AC_THERMAL_CONDUCTIVITY (AcReal(0.001)) // TODO: make an actual config parameter
/*
* =============================================================================
* User-defined parameters
* =============================================================================
*/
// clang-format off
#define AC_FOR_USER_INT_PARAM_TYPES(FUNC)\
/* Other */\
FUNC(AC_max_steps), \
FUNC(AC_save_steps), \
FUNC(AC_bin_steps), \
FUNC(AC_bc_type),
#define AC_FOR_USER_INT3_PARAM_TYPES(FUNC)
#define AC_FOR_USER_REAL_PARAM_TYPES(FUNC)\
/* cparams */\
FUNC(AC_dsx), \
FUNC(AC_dsy), \
FUNC(AC_dsz), \
FUNC(AC_dsmin), \
/* physical grid*/\
FUNC(AC_xlen), \
FUNC(AC_ylen), \
FUNC(AC_zlen), \
FUNC(AC_xorig), \
FUNC(AC_yorig), \
FUNC(AC_zorig), \
/*Physical units*/\
FUNC(AC_unit_density),\
FUNC(AC_unit_velocity),\
FUNC(AC_unit_length),\
/* properties of gravitating star*/\
FUNC(AC_star_pos_x),\
FUNC(AC_star_pos_y),\
FUNC(AC_star_pos_z),\
FUNC(AC_M_star),\
/* properties of sink particle*/\
FUNC(AC_sink_pos_x),\
FUNC(AC_sink_pos_y),\
FUNC(AC_sink_pos_z),\
FUNC(AC_M_sink),\
FUNC(AC_M_sink_init),\
FUNC(AC_M_sink_Msun),\
FUNC(AC_soft),\
FUNC(AC_accretion_range),\
FUNC(AC_switch_accretion),\
/* Run params */\
FUNC(AC_cdt), \
FUNC(AC_cdtv), \
FUNC(AC_cdts), \
FUNC(AC_nu_visc), \
FUNC(AC_cs_sound), \
FUNC(AC_eta), \
FUNC(AC_mu0), \
FUNC(AC_cp_sound), \
FUNC(AC_gamma), \
FUNC(AC_cv_sound), \
FUNC(AC_lnT0), \
FUNC(AC_lnrho0), \
FUNC(AC_zeta), \
FUNC(AC_trans),\
/* Other */\
FUNC(AC_bin_save_t), \
/* Initial condition params */\
FUNC(AC_ampl_lnrho), \
FUNC(AC_ampl_uu), \
FUNC(AC_angl_uu), \
FUNC(AC_lnrho_edge),\
FUNC(AC_lnrho_out),\
/* Forcing parameters. User configured. */\
FUNC(AC_forcing_magnitude),\
FUNC(AC_relhel), \
FUNC(AC_kmin), \
FUNC(AC_kmax), \
/* Forcing parameters. Set by the generator. */\
FUNC(AC_forcing_phase),\
FUNC(AC_k_forcex),\
FUNC(AC_k_forcey),\
FUNC(AC_k_forcez),\
FUNC(AC_kaver),\
FUNC(AC_ff_hel_rex),\
FUNC(AC_ff_hel_rey),\
FUNC(AC_ff_hel_rez),\
FUNC(AC_ff_hel_imx),\
FUNC(AC_ff_hel_imy),\
FUNC(AC_ff_hel_imz),\
/* Additional helper params */\
/* (deduced from other params do not set these directly!) */\
FUNC(AC_G_const),\
FUNC(AC_unit_mass),\
FUNC(AC_GM_star),\
FUNC(AC_sq2GM_star),\
FUNC(AC_cs2_sound), \
FUNC(AC_inv_dsx), \
FUNC(AC_inv_dsy), \
FUNC(AC_inv_dsz),
#define AC_FOR_USER_REAL3_PARAM_TYPES(FUNC)
// clang-format on
/*
* =============================================================================
* User-defined vertex buffers
* =============================================================================
*/
// clang-format off
#if LENTROPY
#define AC_FOR_VTXBUF_HANDLES(FUNC) \
FUNC(VTXBUF_LNRHO), \
/*Added vertex buffer for sink. TODO: Invoke smarter.*/\
FUNC(VTXBUF_ACCRETION), \
FUNC(VTXBUF_UUX), \
FUNC(VTXBUF_UUY), \
FUNC(VTXBUF_UUZ), \
FUNC(VTXBUF_AX), \
FUNC(VTXBUF_AY), \
FUNC(VTXBUF_AZ), \
FUNC(VTXBUF_ENTROPY),
#elif LMAGNETIC
#define AC_FOR_VTXBUF_HANDLES(FUNC) \
FUNC(VTXBUF_LNRHO), \
/*Added vertex buffer for sink. TODO: Invoke smarter.*/\
FUNC(VTXBUF_ACCRETION), \
FUNC(VTXBUF_UUX), \
FUNC(VTXBUF_UUY), \
FUNC(VTXBUF_UUZ), \
FUNC(VTXBUF_AX), \
FUNC(VTXBUF_AY), \
FUNC(VTXBUF_AZ),
#elif LHYDRO
#define AC_FOR_VTXBUF_HANDLES(FUNC) \
FUNC(VTXBUF_LNRHO), \
/*Added vertex buffer for sink. TODO: Invoke smarter.*/\
FUNC(VTXBUF_ACCRETION), \
FUNC(VTXBUF_UUX), \
FUNC(VTXBUF_UUY), \
FUNC(VTXBUF_UUZ),
#else
#define AC_FOR_VTXBUF_HANDLES(FUNC) \
FUNC(VTXBUF_LNRHO),
#endif
// clang-format on

View File

@@ -0,0 +1,130 @@
#define LDENSITY (1)
#define LHYDRO (1)
#define LMAGNETIC (1)
#define LENTROPY (1)
#define LTEMPERATURE (0)
#define LFORCING (1)
#define LUPWD (1)
#define LSINK (1)
#define AC_THERMAL_CONDUCTIVITY (AcReal(0.001)) // TODO: make an actual config parameter
// Int params
uniform int AC_max_steps;
uniform int AC_save_steps;
uniform int AC_bin_steps;
uniform int AC_bc_type;
// Real params
uniform Scalar AC_dt;
// Spacing
uniform Scalar AC_dsx;
uniform Scalar AC_dsy;
uniform Scalar AC_dsz;
uniform Scalar AC_dsmin;
// physical grid
uniform Scalar AC_xlen;
uniform Scalar AC_ylen;
uniform Scalar AC_zlen;
uniform Scalar AC_xorig;
uniform Scalar AC_yorig;
uniform Scalar AC_zorig;
// Physical units
uniform Scalar AC_unit_density;
uniform Scalar AC_unit_velocity;
uniform Scalar AC_unit_length;
// properties of gravitating star
uniform Scalar AC_star_pos_x;
uniform Scalar AC_star_pos_y;
uniform Scalar AC_star_pos_z;
uniform Scalar AC_M_star;
// properties of sink particle
uniform Scalar AC_sink_pos_x;
uniform Scalar AC_sink_pos_y;
uniform Scalar AC_sink_pos_z;
uniform Scalar AC_M_sink;
uniform Scalar AC_M_sink_init;
uniform Scalar AC_M_sink_Msun;
uniform Scalar AC_soft;
uniform Scalar AC_accretion_range;
uniform Scalar AC_switch_accretion;
// Run params
uniform Scalar AC_cdt;
uniform Scalar AC_cdtv;
uniform Scalar AC_cdts;
uniform Scalar AC_nu_visc;
uniform Scalar AC_cs_sound;
uniform Scalar AC_eta;
uniform Scalar AC_mu0;
uniform Scalar AC_cp_sound;
uniform Scalar AC_gamma;
uniform Scalar AC_cv_sound;
uniform Scalar AC_lnT0;
uniform Scalar AC_lnrho0;
uniform Scalar AC_zeta;
uniform Scalar AC_trans;
// Other
uniform Scalar AC_bin_save_t;
// Initial condition params
uniform Scalar AC_ampl_lnrho;
uniform Scalar AC_ampl_uu;
uniform Scalar AC_angl_uu;
uniform Scalar AC_lnrho_edge;
uniform Scalar AC_lnrho_out;
// Forcing parameters. User configured.
uniform Scalar AC_forcing_magnitude;
uniform Scalar AC_relhel;
uniform Scalar AC_kmin;
uniform Scalar AC_kmax;
// Forcing parameters. Set by the generator.
uniform Scalar AC_forcing_phase;
uniform Scalar AC_k_forcex;
uniform Scalar AC_k_forcey;
uniform Scalar AC_k_forcez;
uniform Scalar AC_kaver;
uniform Scalar AC_ff_hel_rex;
uniform Scalar AC_ff_hel_rey;
uniform Scalar AC_ff_hel_rez;
uniform Scalar AC_ff_hel_imx;
uniform Scalar AC_ff_hel_imy;
uniform Scalar AC_ff_hel_imz;
// Additional helper params // (deduced from other params do not set these directly!)
uniform Scalar AC_G_CONST;
uniform Scalar AC_GM_star;
uniform Scalar AC_unit_mass;
uniform Scalar AC_sq2GM_star;
uniform Scalar AC_cs2_sound;
uniform Scalar AC_inv_dsx;
uniform Scalar AC_inv_dsy;
uniform Scalar AC_inv_dsz;
/*
* =============================================================================
* User-defined vertex buffers
* =============================================================================
*/
#if LENTROPY
uniform ScalarField VTXBUF_LNRHO;
uniform ScalarField VTXBUF_UUX;
uniform ScalarField VTXBUF_UUY;
uniform ScalarField VTXBUF_UUZ;
uniform ScalarField VTXBUF_AX;
uniform ScalarField VTXBUF_AY;
uniform ScalarField VTXBUF_AZ;
uniform ScalarField VTXBUF_ENTROPY;
#elif LMAGNETIC
uniform ScalarField VTXBUF_LNRHO;
uniform ScalarField VTXBUF_UUX;
uniform ScalarField VTXBUF_UUY;
uniform ScalarField VTXBUF_UUZ;
uniform ScalarField VTXBUF_AX;
uniform ScalarField VTXBUF_AY;
uniform ScalarField VTXBUF_AZ;
#elif LHYDRO
uniform ScalarField VTXBUF_LNRHO;
uniform ScalarField VTXBUF_UUX;
uniform ScalarField VTXBUF_UUY;
uniform ScalarField VTXBUF_UUZ;
#else
uniform ScalarField VTXBUF_LNRHO;
#endif

View File

@@ -1,26 +1,4 @@
// Declare uniforms (i.e. device constants) #include "stencil_definition.sdh"
uniform Scalar cs2_sound;
uniform Scalar nu_visc;
uniform Scalar cp_sound;
uniform Scalar cv_sound;
uniform Scalar mu0;
uniform Scalar eta;
uniform Scalar gamma;
uniform Scalar zeta;
uniform Scalar dsx;
uniform Scalar dsy;
uniform Scalar dsz;
uniform Scalar lnT0;
uniform Scalar lnrho0;
uniform int nx_min;
uniform int ny_min;
uniform int nz_min;
uniform int nx;
uniform int ny;
uniform int nz;
Vector Vector
value(in VectorField uu) value(in VectorField uu)
@@ -167,9 +145,9 @@ sink_accretion_velocity(int3 globalVertexIdx, in VectorField uu, Scalar dt) {
#endif #endif
Scalar Scalar
continuity(int3 globalVertexIdx, in VectorField uu, in ScalarField lnrho, Scalar dt) { continuity(int3 globalVertexIdx, in VectorField uu, in ScalarField lnrho, Scalar dt)
{
return -dot(value(uu), gradient(lnrho)) return -dot(value(uu), gradient(lnrho))
#if LUPWD #if LUPWD
// This is a corrective hyperdiffusion term for upwinding. // This is a corrective hyperdiffusion term for upwinding.
@@ -185,25 +163,26 @@ continuity(int3 globalVertexIdx, in VectorField uu, in ScalarField lnrho, Scalar
#if LENTROPY #if LENTROPY
Vector Vector
momentum(int3 globalVertexIdx, in VectorField uu, in ScalarField lnrho, in ScalarField ss, in VectorField aa, Scalar dt) { momentum(int3 globalVertexIdx, in VectorField uu, in ScalarField lnrho, in ScalarField ss, in VectorField aa, Scalar dt)
{
const Matrix S = stress_tensor(uu); const Matrix S = stress_tensor(uu);
const Scalar cs2 = cs2_sound * exp(gamma * value(ss) / cp_sound + (gamma - 1) * (value(lnrho) - lnrho0)); const Scalar cs2 = AC_cs2_sound * exp(AC_gamma * value(ss) / AC_cp_sound +
const Vector j = (Scalar(1.) / mu0) * (gradient_of_divergence(aa) - laplace_vec(aa)); // Current density (AC_gamma - 1) * (value(lnrho) - AC_lnrho0));
const Vector j = (Scalar(1.) / AC_mu0) *
(gradient_of_divergence(aa) - laplace_vec(aa)); // Current density
const Vector B = curl(aa); const Vector B = curl(aa);
// TODO: DOES INTHERMAL VERSTION INCLUDE THE MAGNETIC FIELD? // TODO: DOES INTHERMAL VERSTION INCLUDE THE MAGNETIC FIELD?
const Scalar inv_rho = Scalar(1.) / exp(value(lnrho)); const Scalar inv_rho = Scalar(1.) / exp(value(lnrho));
// Regex replace CPU constants with get\(AC_([a-zA-Z_0-9]*)\) // Regex replace CPU constants with get\(AC_([a-zA-Z_0-9]*)\)
// \1 // \1
const Vector mom = - mul(gradients(uu), value(uu)) const Vector mom = -mul(gradients(uu), value(uu)) -
- cs2 * ((Scalar(1.) / cp_sound) * gradient(ss) + gradient(lnrho)) cs2 * ((Scalar(1.) / AC_cp_sound) * gradient(ss) + gradient(lnrho)) +
+ inv_rho * cross(j, B) inv_rho * cross(j, B) +
+ nu_visc * ( AC_nu_visc *
laplace_vec(uu) (laplace_vec(uu) + Scalar(1. / 3.) * gradient_of_divergence(uu) +
+ Scalar(1. / 3.) * gradient_of_divergence(uu) Scalar(2.) * mul(S, gradient(lnrho))) +
+ Scalar(2.) * mul(S, gradient(lnrho)) AC_zeta * gradient_of_divergence(uu)
)
+ zeta * gradient_of_divergence(uu)
#if LSINK #if LSINK
//Gravity term //Gravity term
+ sink_gravity(globalVertexIdx) + sink_gravity(globalVertexIdx)
@@ -218,40 +197,46 @@ momentum(int3 globalVertexIdx, in VectorField uu, in ScalarField lnrho, in Scala
} }
#elif LTEMPERATURE #elif LTEMPERATURE
Vector Vector
momentum(int3 globalVertexIdx, in VectorField uu, in ScalarField lnrho, in ScalarField tt) { momentum(int3 globalVertexIdx, in VectorField uu, in ScalarField lnrho, in ScalarField tt)
{
Vector mom; Vector mom;
const Matrix S = stress_tensor(uu);
const Vector pressure_term = (cp_sound - cv_sound) * (gradient(tt) + value(tt) * gradient(lnrho));
mom = -mul(gradients(uu), value(uu)) -
pressure_term +
nu_visc *
(laplace_vec(uu) + Scalar(1. / 3.) * gradient_of_divergence(uu) +
Scalar(2.) * mul(S, gradient(lnrho))) + zeta * gradient_of_divergence(uu)
const Matrix S = stress_tensor(uu);
const Vector pressure_term = (AC_cp_sound - AC_cv_sound) *
(gradient(tt) + value(tt) * gradient(lnrho));
mom = -mul(gradients(uu), value(uu)) - pressure_term +
AC_nu_visc * (laplace_vec(uu) + Scalar(1. / 3.) * gradient_of_divergence(uu) +
Scalar(2.) * mul(S, gradient(lnrho))) +
AC_zeta * gradient_of_divergence(uu)
#if LSINK #if LSINK
+ sink_gravity(globalVertexIdx); + sink_gravity(globalVertexIdx);
#else #else
; ;
#endif #endif
#if LGRAVITY
mom = mom - (Vector){0, 0, -10.0};
#endif
return mom; return mom;
} }
#else #else
Vector Vector
momentum(int3 globalVertexIdx, in VectorField uu, in ScalarField lnrho, Scalar dt) { momentum(int3 globalVertexIdx, in VectorField uu, in ScalarField lnrho, Scalar dt)
{
Vector mom; Vector mom;
const Matrix S = stress_tensor(uu); const Matrix S = stress_tensor(uu);
// Isothermal: we have constant speed of sound // Isothermal: we have constant speed of sound
mom = -mul(gradients(uu), value(uu)) - mom = -mul(gradients(uu), value(uu)) - AC_cs2_sound * gradient(lnrho) +
cs2_sound * gradient(lnrho) + AC_nu_visc * (laplace_vec(uu) + Scalar(1. / 3.) * gradient_of_divergence(uu) +
nu_visc * Scalar(2.) * mul(S, gradient(lnrho))) +
(laplace_vec(uu) + Scalar(1. / 3.) * gradient_of_divergence(uu) + AC_zeta * gradient_of_divergence(uu)
Scalar(2.) * mul(S, gradient(lnrho))) + zeta * gradient_of_divergence(uu)
#if LSINK #if LSINK
+ sink_gravity(globalVertexIdx); + sink_gravity(globalVertexIdx)
//Corresponding loss of momentum //Corresponding loss of momentum
- //(Scalar(1.0) / Scalar( (dsx*dsy*dsz) * exp(value(lnrho)))) * // Correction factor by unit mass - //(Scalar(1.0) / Scalar( (dsx*dsy*dsz) * exp(value(lnrho)))) * // Correction factor by unit mass
sink_accretion_velocity(globalVertexIdx, uu, dt) // As in Lee et al.(2014) sink_accretion_velocity(globalVertexIdx, uu, dt) // As in Lee et al.(2014)
@@ -260,73 +245,77 @@ momentum(int3 globalVertexIdx, in VectorField uu, in ScalarField lnrho, Scalar d
; ;
#endif #endif
#if LGRAVITY
mom = mom - (Vector){0, 0, -10.0};
#endif
return mom; return mom;
} }
#endif #endif
Vector Vector
induction(in VectorField uu, in VectorField aa) { induction(in VectorField uu, in VectorField aa)
{
// Note: We do (-nabla^2 A + nabla(nabla dot A)) instead of (nabla x (nabla // Note: We do (-nabla^2 A + nabla(nabla dot A)) instead of (nabla x (nabla
// x A)) in order to avoid taking the first derivative twice (did the math, // x A)) in order to avoid taking the first derivative twice (did the math,
// yes this actually works. See pg.28 in arXiv:astro-ph/0109497) // yes this actually works. See pg.28 in arXiv:astro-ph/0109497)
// u cross B - ETA * mu0 * (mu0^-1 * [- laplace A + grad div A ]) // u cross B - AC_eta * AC_mu0 * (AC_mu0^-1 * [- laplace A + grad div A ])
const Vector B = curl(aa); const Vector B = curl(aa);
const Vector grad_div = gradient_of_divergence(aa); const Vector grad_div = gradient_of_divergence(aa);
const Vector lap = laplace_vec(aa); const Vector lap = laplace_vec(aa);
// Note, mu0 is cancelled out // Note, AC_mu0 is cancelled out
const Vector ind = cross(value(uu), B) - eta * (grad_div - lap); const Vector ind = cross(value(uu), B) - AC_eta * (grad_div - lap);
return ind; return ind;
} }
#if LENTROPY #if LENTROPY
Scalar Scalar
lnT( in ScalarField ss, in ScalarField lnrho) { lnT(in ScalarField ss, in ScalarField lnrho)
const Scalar lnT = lnT0 + gamma * value(ss) / cp_sound + {
(gamma - Scalar(1.)) * (value(lnrho) - lnrho0); const Scalar lnT = AC_lnT0 + AC_gamma * value(ss) / AC_cp_sound +
(AC_gamma - Scalar(1.)) * (value(lnrho) - AC_lnrho0);
return lnT; return lnT;
} }
// Nabla dot (K nabla T) / (rho T) // Nabla dot (K nabla T) / (rho T)
Scalar Scalar
heat_conduction( in ScalarField ss, in ScalarField lnrho) { heat_conduction(in ScalarField ss, in ScalarField lnrho)
const Scalar inv_cp_sound = AcReal(1.) / cp_sound; {
const Scalar inv_AC_cp_sound = AcReal(1.) / AC_cp_sound;
const Vector grad_ln_chi = -gradient(lnrho); const Vector grad_ln_chi = -gradient(lnrho);
const Scalar first_term = gamma * inv_cp_sound * laplace(ss) + const Scalar first_term = AC_gamma * inv_AC_cp_sound * laplace(ss) +
(gamma - AcReal(1.)) * laplace(lnrho); (AC_gamma - AcReal(1.)) * laplace(lnrho);
const Vector second_term = gamma * inv_cp_sound * gradient(ss) + const Vector second_term = AC_gamma * inv_AC_cp_sound * gradient(ss) +
(gamma - AcReal(1.)) * gradient(lnrho); (AC_gamma - AcReal(1.)) * gradient(lnrho);
const Vector third_term = gamma * (inv_cp_sound * gradient(ss) + const Vector third_term = AC_gamma * (inv_AC_cp_sound * gradient(ss) + gradient(lnrho)) +
gradient(lnrho)) + grad_ln_chi; grad_ln_chi;
const Scalar chi = AC_THERMAL_CONDUCTIVITY / (exp(value(lnrho)) * AC_cp_sound);
const Scalar chi = AC_THERMAL_CONDUCTIVITY / (exp(value(lnrho)) * cp_sound); return AC_cp_sound * chi * (first_term + dot(second_term, third_term));
return cp_sound * chi * (first_term + dot(second_term, third_term));
} }
Scalar Scalar
heating(const int i, const int j, const int k) { heating(const int i, const int j, const int k)
{
return 1; return 1;
} }
Scalar Scalar
entropy(in ScalarField ss, in VectorField uu, in ScalarField lnrho, in VectorField aa) { entropy(in ScalarField ss, in VectorField uu, in ScalarField lnrho, in VectorField aa)
{
const Matrix S = stress_tensor(uu); const Matrix S = stress_tensor(uu);
const Scalar inv_pT = Scalar(1.) / (exp(value(lnrho)) * exp(lnT(ss, lnrho))); const Scalar inv_pT = Scalar(1.) / (exp(value(lnrho)) * exp(lnT(ss, lnrho)));
const Vector j = (Scalar(1.) / mu0) * (gradient_of_divergence(aa) - laplace_vec(aa)); // Current density const Vector j = (Scalar(1.) / AC_mu0) *
const Scalar RHS = H_CONST - C_CONST (gradient_of_divergence(aa) - laplace_vec(aa)); // Current density
+ eta * (mu0) * dot(j, j) const Scalar RHS = H_CONST - C_CONST + AC_eta * (AC_mu0)*dot(j, j) +
+ Scalar(2.) * exp(value(lnrho)) * nu_visc * contract(S) Scalar(2.) * exp(value(lnrho)) * AC_nu_visc * contract(S) +
+ zeta * exp(value(lnrho)) * divergence(uu) * divergence(uu); AC_zeta * exp(value(lnrho)) * divergence(uu) * divergence(uu);
return - dot(value(uu), gradient(ss)) return -dot(value(uu), gradient(ss)) + inv_pT * RHS + heat_conduction(ss, lnrho);
+ inv_pT * RHS
+ heat_conduction(ss, lnrho);
} }
#endif #endif
@@ -336,12 +325,13 @@ heat_transfer(in VectorField uu, in ScalarField lnrho, in ScalarField tt)
{ {
const Matrix S = stress_tensor(uu); const Matrix S = stress_tensor(uu);
const Scalar heat_diffusivity_k = 0.0008; // 8e-4; const Scalar heat_diffusivity_k = 0.0008; // 8e-4;
return -dot(value(uu), gradient(tt)) + heat_diffusivity_k * laplace(tt) + heat_diffusivity_k * dot(gradient(lnrho), gradient(tt)) + nu_visc * contract(S) * (Scalar(1.) / cv_sound) - (gamma - 1) * value(tt) * divergence(uu); return -dot(value(uu), gradient(tt)) + heat_diffusivity_k * laplace(tt) +
heat_diffusivity_k * dot(gradient(lnrho), gradient(tt)) +
AC_nu_visc * contract(S) * (Scalar(1.) / AC_cv_sound) -
(AC_gamma - 1) * value(tt) * divergence(uu);
} }
#endif #endif
#if LFORCING #if LFORCING
Vector Vector
simple_vortex_forcing(Vector a, Vector b, Scalar magnitude){ simple_vortex_forcing(Vector a, Vector b, Scalar magnitude){
@@ -363,18 +353,13 @@ Vector
} }
} }
// The Pencil Code forcing_hel_noshear(), manual Eq. 222, inspired forcing function with adjustable
// helicity
// The Pencil Code forcing_hel_noshear(), manual Eq. 222, inspired forcing function with adjustable helicity
Vector Vector
helical_forcing(Scalar magnitude, Vector k_force, Vector xx, Vector ff_re, Vector ff_im, Scalar phi) helical_forcing(Scalar magnitude, Vector k_force, Vector xx, Vector ff_re, Vector ff_im, Scalar phi)
{ {
int accretion_switch = DCONST_INT(AC_switch_accretion);
if (accretion_switch == 0){
// JP: This looks wrong: // JP: This looks wrong:
// 1) Should it be dsx * nx instead of dsx * ny? // 1) Should it be AC_dsx * AC_nx instead of AC_dsx * AC_ny?
// 2) Should you also use globalGrid.n instead of the local n? // 2) Should you also use globalGrid.n instead of the local n?
// MV: You are rigth. Made a quickfix. I did not see the error because multigpu is split // MV: You are rigth. Made a quickfix. I did not see the error because multigpu is split
// in z direction not y direction. // in z direction not y direction.
@@ -383,9 +368,9 @@ helical_forcing(Scalar magnitude, Vector k_force, Vector xx, Vector ff_re, Vecto
// MV: Good idea. No an immediate priority. // MV: Good idea. No an immediate priority.
// Fun related article: // Fun related article:
// https://randomascii.wordpress.com/2014/10/09/intel-underestimates-error-bounds-by-1-3-quintillion/ // https://randomascii.wordpress.com/2014/10/09/intel-underestimates-error-bounds-by-1-3-quintillion/
xx.x = xx.x*(2.0*M_PI/(dsx*globalGridN.x)); xx.x = xx.x * (2.0 * M_PI / (AC_dsx * globalGridN.x));
xx.y = xx.y*(2.0*M_PI/(dsy*globalGridN.y)); xx.y = xx.y * (2.0 * M_PI / (AC_dsy * globalGridN.y));
xx.z = xx.z*(2.0*M_PI/(dsz*globalGridN.z)); xx.z = xx.z * (2.0 * M_PI / (AC_dsz * globalGridN.z));
Scalar cos_phi = cos(phi); Scalar cos_phi = cos(phi);
Scalar sin_phi = sin(phi); Scalar sin_phi = sin(phi);
@@ -397,16 +382,11 @@ helical_forcing(Scalar magnitude, Vector k_force, Vector xx, Vector ff_re, Vecto
Scalar real_comp_phase = cos_k_dot_x * cos_phi - sin_k_dot_x * sin_phi; Scalar real_comp_phase = cos_k_dot_x * cos_phi - sin_k_dot_x * sin_phi;
Scalar imag_comp_phase = cos_k_dot_x * sin_phi + sin_k_dot_x * cos_phi; Scalar imag_comp_phase = cos_k_dot_x * sin_phi + sin_k_dot_x * cos_phi;
Vector force = (Vector){ff_re.x * real_comp_phase - ff_im.x * imag_comp_phase, Vector force = (Vector){ff_re.x * real_comp_phase - ff_im.x * imag_comp_phase,
ff_re.y * real_comp_phase - ff_im.y * imag_comp_phase, ff_re.y * real_comp_phase - ff_im.y * imag_comp_phase,
ff_re.z * real_comp_phase - ff_im.z * imag_comp_phase}; ff_re.z * real_comp_phase - ff_im.z * imag_comp_phase};
return force; return force;
} else {
return (Vector){0,0,0};
}
} }
Vector Vector
@@ -460,7 +440,6 @@ out ScalarField out_lnrho(VTXBUF_LNRHO);
in VectorField uu(VTXBUF_UUX, VTXBUF_UUY, VTXBUF_UUZ); in VectorField uu(VTXBUF_UUX, VTXBUF_UUY, VTXBUF_UUZ);
out VectorField out_uu(VTXBUF_UUX, VTXBUF_UUY, VTXBUF_UUZ); out VectorField out_uu(VTXBUF_UUX, VTXBUF_UUY, VTXBUF_UUZ);
#if LMAGNETIC #if LMAGNETIC
in VectorField aa(VTXBUF_AX, VTXBUF_AY, VTXBUF_AZ); in VectorField aa(VTXBUF_AX, VTXBUF_AY, VTXBUF_AZ);
out VectorField out_aa(VTXBUF_AX, VTXBUF_AY, VTXBUF_AZ); out VectorField out_aa(VTXBUF_AX, VTXBUF_AY, VTXBUF_AZ);
@@ -482,7 +461,9 @@ out Scalar out_accretion = VTXBUF_ACCRETION;
#endif #endif
Kernel void Kernel void
solve(Scalar dt) { solve()
{
Scalar dt = AC_dt;
out_lnrho = rk3(out_lnrho, lnrho, continuity(globalVertexIdx, uu, lnrho, dt), dt); out_lnrho = rk3(out_lnrho, lnrho, continuity(globalVertexIdx, uu, lnrho, dt), dt);
#if LMAGNETIC #if LMAGNETIC
@@ -490,13 +471,13 @@ solve(Scalar dt) {
#endif #endif
#if LENTROPY #if LENTROPY
out_uu = rk3(out_uu, uu, momentum(globalVertexIdx, uu, lnrho, ss, aa, dt), dt); out_uu = rk3(out_uu, uu, momentum(uu, lnrho, ss, aa), dt);
out_ss = rk3(out_ss, ss, entropy(ss, uu, lnrho, aa), dt); out_ss = rk3(out_ss, ss, entropy(ss, uu, lnrho, aa), dt);
#elif LTEMPERATURE #elif LTEMPERATURE
out_uu = rk3(out_uu, uu, momentum(globalVertexIdx, uu, lnrho, tt), dt); out_uu = rk3(out_uu, uu, momentum(uu, lnrho, tt), dt);
out_tt = rk3(out_tt, tt, heat_transfer(uu, lnrho, tt), dt); out_tt = rk3(out_tt, tt, heat_transfer(uu, lnrho, tt), dt);
#else #else
out_uu = rk3(out_uu, uu, momentum(globalVertexIdx, uu, lnrho, dt), dt); out_uu = rk3(out_uu, uu, momentum(uu, lnrho), dt);
#endif #endif
#if LFORCING #if LFORCING
@@ -506,14 +487,10 @@ solve(Scalar dt) {
#endif #endif
#if LSINK #if LSINK
// out_lnrho = log(exp(out_lnrho) - sink_accretion(globalVertexIdx, lnrho));
// out_accretion = value(accretion) + (sink_accretion(globalVertexIdx,lnrho) * dsx * dsy * dsz);
out_accretion = rk3(out_accretion, accretion, sink_accretion(globalVertexIdx, lnrho, dt), dt);// unit now is rho! out_accretion = rk3(out_accretion, accretion, sink_accretion(globalVertexIdx, lnrho, dt), dt);// unit now is rho!
// out_lnrho = log(exp(out_lnrho) - out_accretion);
if (step_number == 2) { if (step_number == 2) {
out_accretion = out_accretion * dsx * dsy * dsz;// unit is now mass! out_accretion = out_accretion * dsx * dsy * dsz;// unit is now mass!
} }
//TODO: implement accretion correction to contiunity equation and momentum equation.
#endif #endif
} }

View File

@@ -15,7 +15,7 @@ L [a-zA-Z_]
"void" { return VOID; } /* Rest of the types inherited from C */ "void" { return VOID; } /* Rest of the types inherited from C */
"int" { return INT; } "int" { return INT; }
"int3" { return INT3; } "int3" { return INT3; }
"ScalarField" { return SCALAR; } "ScalarField" { return SCALARFIELD; }
"VectorField" { return VECTOR; } "VectorField" { return VECTOR; }
"Kernel" { return KERNEL; } /* Function specifiers */ "Kernel" { return KERNEL; } /* Function specifiers */

View File

@@ -16,7 +16,7 @@ int yyget_lineno();
%token CONSTANT IN OUT UNIFORM %token CONSTANT IN OUT UNIFORM
%token IDENTIFIER NUMBER %token IDENTIFIER NUMBER
%token RETURN %token RETURN
%token SCALAR VECTOR MATRIX %token SCALAR VECTOR MATRIX SCALARFIELD
%token VOID INT INT3 %token VOID INT INT3
%token IF ELSE FOR WHILE ELIF %token IF ELSE FOR WHILE ELIF
%token LEQU LAND LOR LLEQU %token LEQU LAND LOR LLEQU
@@ -209,6 +209,7 @@ type_specifier: VOID
| SCALAR { $$ = astnode_create(NODE_TYPE_SPECIFIER, NULL, NULL); $$->token = SCALAR; } | SCALAR { $$ = astnode_create(NODE_TYPE_SPECIFIER, NULL, NULL); $$->token = SCALAR; }
| VECTOR { $$ = astnode_create(NODE_TYPE_SPECIFIER, NULL, NULL); $$->token = VECTOR; } | VECTOR { $$ = astnode_create(NODE_TYPE_SPECIFIER, NULL, NULL); $$->token = VECTOR; }
| MATRIX { $$ = astnode_create(NODE_TYPE_SPECIFIER, NULL, NULL); $$->token = MATRIX; } | MATRIX { $$ = astnode_create(NODE_TYPE_SPECIFIER, NULL, NULL); $$->token = MATRIX; }
| SCALARFIELD { $$ = astnode_create(NODE_TYPE_SPECIFIER, NULL, NULL); $$->token = SCALARFIELD; }
; ;
identifier: IDENTIFIER { $$ = astnode_create(NODE_IDENTIFIER, NULL, NULL); astnode_set_buffer(yytext, $$); } identifier: IDENTIFIER { $$ = astnode_create(NODE_IDENTIFIER, NULL, NULL); astnode_set_buffer(yytext, $$); }

View File

@@ -36,7 +36,8 @@
ASTNode* root = NULL; ASTNode* root = NULL;
static const char inout_name_prefix[] = "handle_"; static const char inout_name_prefix[] = "handle_";
static bool doing_stencil_assembly = true; typedef enum { STENCIL_ASSEMBLY, STENCIL_PROCESS, STENCIL_HEADER } CompilationType;
static CompilationType compilation_type;
/* /*
* ============================================================================= * =============================================================================
@@ -59,9 +60,10 @@ static const char* translation_table[TRANSLATION_TABLE_SIZE] = {
[SCALAR] = "AcReal", [SCALAR] = "AcReal",
[VECTOR] = "AcReal3", [VECTOR] = "AcReal3",
[MATRIX] = "AcMatrix", [MATRIX] = "AcMatrix",
[SCALARFIELD] = "AcReal",
// Type qualifiers // Type qualifiers
[KERNEL] = "template <int step_number> static " [KERNEL] = "template <int step_number> static __global__",
"__global__", //__launch_bounds__(RK_THREADBLOCK_SIZE, //__launch_bounds__(RK_THREADBLOCK_SIZE,
// RK_LAUNCH_BOUND_MIN_BLOCKS), // RK_LAUNCH_BOUND_MIN_BLOCKS),
[PREPROCESSED] = "static __device__ " [PREPROCESSED] = "static __device__ "
"__forceinline__", "__forceinline__",
@@ -212,11 +214,14 @@ translate_latest_symbol(void)
// FUNCTION PARAMETER // FUNCTION PARAMETER
else if (symbol->type == SYMBOLTYPE_FUNCTION_PARAMETER) { else if (symbol->type == SYMBOLTYPE_FUNCTION_PARAMETER) {
if (symbol->type_qualifier == IN || symbol->type_qualifier == OUT) { if (symbol->type_qualifier == IN || symbol->type_qualifier == OUT) {
if (doing_stencil_assembly) if (compilation_type == STENCIL_ASSEMBLY)
printf("const __restrict__ %s* %s", translate(symbol->type_specifier), printf("const __restrict__ %s* %s", translate(symbol->type_specifier),
symbol->identifier); symbol->identifier);
else else if (compilation_type == STENCIL_PROCESS)
printf("const %sData& %s", translate(symbol->type_specifier), symbol->identifier); printf("const %sData& %s", translate(symbol->type_specifier), symbol->identifier);
else
printf("Invalid compilation type %d, IN and OUT qualifiers not supported\n",
compilation_type);
} }
else { else {
print_symbol(handle); print_symbol(handle);
@@ -224,14 +229,18 @@ translate_latest_symbol(void)
} }
// UNIFORM // UNIFORM
else if (symbol->type_qualifier == UNIFORM) { else if (symbol->type_qualifier == UNIFORM) {
// if (compilation_type != STENCIL_HEADER) {
// printf("ERROR: %s can only be used in stencil headers\n", translation_table[UNIFORM]);
//}
/* Do nothing */ /* Do nothing */
} }
// IN / OUT // IN / OUT
else if (symbol->type != SYMBOLTYPE_FUNCTION_PARAMETER && else if (symbol->type != SYMBOLTYPE_FUNCTION_PARAMETER &&
(symbol->type_qualifier == IN || symbol->type_qualifier == OUT)) { (symbol->type_qualifier == IN || symbol->type_qualifier == OUT)) {
printf("static __device__ const %s %s%s", symbol->type_specifier == SCALAR ? "int" : "int3", printf("static __device__ const %s %s%s",
inout_name_prefix, symbol_table[handle].identifier); symbol->type_specifier == SCALARFIELD ? "int" : "int3", inout_name_prefix,
symbol_table[handle].identifier);
if (symbol->type_specifier == VECTOR) if (symbol->type_specifier == VECTOR)
printf(" = make_int3"); printf(" = make_int3");
} }
@@ -309,10 +318,16 @@ traverse(const ASTNode* node)
inside_kernel = true; inside_kernel = true;
// Kernel parameter boilerplate // Kernel parameter boilerplate
const char* kernel_parameter_boilerplate = "GEN_KERNEL_PARAM_BOILERPLATE, "; const char* kernel_parameter_boilerplate = "GEN_KERNEL_PARAM_BOILERPLATE";
if (inside_kernel && node->type == NODE_FUNCTION_PARAMETER_DECLARATION) if (inside_kernel && node->type == NODE_FUNCTION_PARAMETER_DECLARATION) {
printf("%s", kernel_parameter_boilerplate); printf("%s", kernel_parameter_boilerplate);
if (node->lhs != NULL) {
printf("Compilation error: function parameters for Kernel functions not allowed!\n");
exit(EXIT_FAILURE);
}
}
// Kernel builtin variables boilerplate (read input/output arrays and setup // Kernel builtin variables boilerplate (read input/output arrays and setup
// indices) // indices)
const char* kernel_builtin_variables_boilerplate = "GEN_KERNEL_BUILTIN_VARIABLES_" const char* kernel_builtin_variables_boilerplate = "GEN_KERNEL_BUILTIN_VARIABLES_"
@@ -369,6 +384,8 @@ traverse(const ASTNode* node)
// printf("%s%s", inout_name_prefix, symbol->identifier); // printf("%s%s", inout_name_prefix, symbol->identifier);
//} //}
if (symbol->type_qualifier == UNIFORM) { if (symbol->type_qualifier == UNIFORM) {
printf("DCONST(%s) ", symbol->identifier);
/*
if (symbol->type_specifier == SCALAR) if (symbol->type_specifier == SCALAR)
printf("DCONST_REAL(AC_%s) ", symbol->identifier); printf("DCONST_REAL(AC_%s) ", symbol->identifier);
else if (symbol->type_specifier == INT) else if (symbol->type_specifier == INT)
@@ -376,6 +393,7 @@ traverse(const ASTNode* node)
else else
printf("INVALID UNIFORM type specifier %s with %s\n", printf("INVALID UNIFORM type specifier %s with %s\n",
translate(symbol->type_specifier), symbol->identifier); translate(symbol->type_specifier), symbol->identifier);
*/
} }
else { else {
// Do a regular translation // Do a regular translation
@@ -545,14 +563,89 @@ generate_preprocessed_structures(void)
"); ");
} }
static void
generate_header(void)
{
printf("\n#pragma once\n");
// Int params
printf("#define AC_FOR_USER_INT_PARAM_TYPES(FUNC)");
for (int i = 0; i < num_symbols; ++i) {
if (symbol_table[i].type_specifier == INT) {
printf("\\\nFUNC(%s),", symbol_table[i].identifier);
}
}
printf("\n\n");
// Int3 params
printf("#define AC_FOR_USER_INT3_PARAM_TYPES(FUNC)");
for (int i = 0; i < num_symbols; ++i) {
if (symbol_table[i].type_specifier == INT3) {
printf("\\\nFUNC(%s),", symbol_table[i].identifier);
}
}
printf("\n\n");
// Scalar params
printf("#define AC_FOR_USER_REAL_PARAM_TYPES(FUNC)");
for (int i = 0; i < num_symbols; ++i) {
if (symbol_table[i].type_specifier == SCALAR) {
printf("\\\nFUNC(%s),", symbol_table[i].identifier);
}
}
printf("\n\n");
// Vector params
printf("#define AC_FOR_USER_REAL3_PARAM_TYPES(FUNC)");
for (int i = 0; i < num_symbols; ++i) {
if (symbol_table[i].type_specifier == VECTOR) {
printf("\\\nFUNC(%s),", symbol_table[i].identifier);
}
}
printf("\n\n");
// Scalar fields
printf("#define AC_FOR_VTXBUF_HANDLES(FUNC)");
for (int i = 0; i < num_symbols; ++i) {
if (symbol_table[i].type_specifier == SCALARFIELD) {
printf("\\\nFUNC(%s),", symbol_table[i].identifier);
}
}
printf("\n\n");
/*
printf("\n");
printf("typedef struct {\n");
for (int i = 0; i < num_symbols; ++i) {
if (symbol_table[i].type_qualifier == PREPROCESSED)
printf("%s %s;\n", translate(symbol_table[i].type_specifier),
symbol_table[i].identifier);
}
printf("} %sData;\n", translate(SCALAR));
*/
}
static void
generate_library_hooks(void)
{
for (int i = 0; i < num_symbols; ++i) {
if (symbol_table[i].type_qualifier == KERNEL) {
printf("GEN_DEVICE_FUNC_HOOK(%s)\n", symbol_table[i].identifier);
// printf("GEN_NODE_FUNC_HOOK(%s)\n", symbol_table[i].identifier);
}
}
}
int int
main(int argc, char** argv) main(int argc, char** argv)
{ {
if (argc == 2) { if (argc == 2) {
if (!strcmp(argv[1], "-sas")) if (!strcmp(argv[1], "-sas"))
doing_stencil_assembly = true; compilation_type = STENCIL_ASSEMBLY;
else if (!strcmp(argv[1], "-sps")) else if (!strcmp(argv[1], "-sps"))
doing_stencil_assembly = false; compilation_type = STENCIL_PROCESS;
else if (!strcmp(argv[1], "-sdh"))
compilation_type = STENCIL_HEADER;
else else
printf("Unknown flag %s. Generating stencil assembly.\n", argv[1]); printf("Unknown flag %s. Generating stencil assembly.\n", argv[1]);
} }
@@ -560,8 +653,8 @@ main(int argc, char** argv)
printf("Usage: ./acc [flags]\n" printf("Usage: ./acc [flags]\n"
"Flags:\n" "Flags:\n"
"\t-sas - Generates code for the stencil assembly stage\n" "\t-sas - Generates code for the stencil assembly stage\n"
"\t-sps - Generates code for the stencil processing " "\t-sps - Generates code for the stencil processing stage\n"
"stage\n"); "\t-hh - Generates stencil definitions from a header file\n");
printf("\n"); printf("\n");
return EXIT_FAILURE; return EXIT_FAILURE;
} }
@@ -576,8 +669,12 @@ main(int argc, char** argv)
// Traverse // Traverse
traverse(root); traverse(root);
if (doing_stencil_assembly) if (compilation_type == STENCIL_ASSEMBLY)
generate_preprocessed_structures(); generate_preprocessed_structures();
else if (compilation_type == STENCIL_HEADER)
generate_header();
else if (compilation_type == STENCIL_PROCESS)
generate_library_hooks();
// print_symbol_table(); // print_symbol_table();

View File

@@ -0,0 +1,67 @@
# Astaroth specification and user manual
Copyright (C) 2014-2019, Johannes Pekkila, Miikka Vaisala.
Astaroth is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Astaroth is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Astaroth is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
# Introduction and background
Astaroth is a collection of tools for utilizing multiple graphics processing units (GPUs) efficiently in three-dimensional stencil computations. This document specifies the Astaroth application-programming interface (API) and domain-specific language (DSL).
Astaroth has been designed for the demands in computational sciences, where large stencils are often used to attain sufficient accuracy. The majority of previous work focuses on stencil computations with low-order stencils for which several efficient algorithms have been proposed, whereas work on high-order stencils is more limited. In addition, in computational physics multiple fields interact with each other, such as the velocity and magnetic fields of electrically conducting fluids. Such computations are especially challenging to solve efficiently because of the problem's relatively low operational intensity and the small caches provided by GPUs. Efficient methods for computations with several coupled fields and large stencils have not been addressed sufficiently in prior work.
With Astaroth, we have taken inspiration of image processing and graphics pipelines which rely on holding intermediate data in caches for the duration of computations, and extended the idea to work efficiently also with large three-dimensional stencils and an arbitrary number of coupled fields. As programming GPUs efficiently is relatively verbose and requires deep knowledge of the underlying hardware and execution model, we have created a high-level domain-specific language for expressing a wide range of tasks in computational sciences and provide a source-to-source compiler for translating stencil problems expressed in our language into efficient CUDA kernels.
The kernels generated from the Astaroth DSL are embedded in the Astaroth Core library, which is usable via the Astaroth API. While the Astaroth library is written in C++/CUDA, the API conforms to the C99 standard.
# Publications
The foundational work was done in (Väisälä, Pekkilä, 2017) and the library, API and DSL described in this document were introduced in (Pekkilä, 2019). We kindly wish the users of Astaroth to cite to these publications in their work.
> J. Pekkilä, Astaroth: A Library for Stencil Computations on Graphics Processing Units. Master's thesis, Aalto University School of Science, Espoo, Finland, 2019.
> M. S. Väisälä, Magnetic Phenomena of the Interstellar Medium in Theory and Observation. PhD thesis, University of Helsinki, Finland, 2017.
> J. Pekkilä, M. S. Väisälä, M. Käpylä, P. J. Käpylä, and O. Anjum, “Methods for compressible fluid simulation on GPUs using high-order finite differences, ”Computer Physics Communications, vol. 217, pp. 1122, Aug. 2017.
# Astaroth API
## Devices and nodes
## Meshes
## Streams and synchronization
## Interface
# Astaroth DSL
## Uniforms
## Vertex buffers
### Input and output buffers
## Built-in variables and functions

View File

@@ -0,0 +1,391 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!-- Created with Inkscape (http://www.inkscape.org/) -->
<svg
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:cc="http://creativecommons.org/ns#"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:svg="http://www.w3.org/2000/svg"
xmlns="http://www.w3.org/2000/svg"
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
width="76.050636mm"
height="85.851082mm"
viewBox="0 0 76.050636 85.851082"
version="1.1"
id="svg8"
inkscape:version="0.92.3 (2405546, 2018-03-11)"
sodipodi:docname="domain_decomposition.svg"
inkscape:export-filename="/home/mvaisala/astaroth/doc/domain_decomposition.svg.png"
inkscape:export-xdpi="233.7916"
inkscape:export-ydpi="233.7916">
<defs
id="defs2">
<inkscape:perspective
sodipodi:type="inkscape:persp3d"
inkscape:vp_x="-34.999999 : 849.45444 : 1"
inkscape:vp_y="0 : 3779.5274 : 0"
inkscape:vp_z="758.70081 : 849.45444 : 1"
inkscape:persp3d-origin="361.85039 : 662.36784 : 1"
id="perspective3883" />
<inkscape:perspective
sodipodi:type="inkscape:persp3d"
inkscape:vp_x="-28.760416 : 819.37885 : 1"
inkscape:vp_y="0 : 3779.5274 : 0"
inkscape:vp_z="764.94038 : 819.37885 : 1"
inkscape:persp3d-origin="368.08997 : 632.29225 : 1"
id="perspective3817" />
<inkscape:perspective
sodipodi:type="inkscape:persp3d"
inkscape:vp_x="139.37224 : 246.52176 : 1"
inkscape:vp_y="90.281989 : 563.44101 : 1"
inkscape:vp_z="193.82452 : 233.87492 : 1"
inkscape:persp3d-origin="90.281989 : 216.71184 : 1"
id="perspective3715" />
</defs>
<sodipodi:namedview
id="base"
pagecolor="#ffffff"
bordercolor="#666666"
borderopacity="1.0"
inkscape:pageopacity="0.0"
inkscape:pageshadow="2"
inkscape:zoom="1.4"
inkscape:cx="65.802138"
inkscape:cy="225.49817"
inkscape:document-units="mm"
inkscape:current-layer="layer1"
showgrid="true"
inkscape:snap-grids="true"
fit-margin-top="0"
fit-margin-left="0"
fit-margin-right="0"
fit-margin-bottom="0"
inkscape:window-width="1920"
inkscape:window-height="1025"
inkscape:window-x="0"
inkscape:window-y="27"
inkscape:window-maximized="1">
<inkscape:grid
type="xygrid"
id="grid3923"
originx="-51.915482"
originy="-200.5164" />
</sodipodi:namedview>
<metadata
id="metadata5">
<rdf:RDF>
<cc:Work
rdf:about="">
<dc:format>image/svg+xml</dc:format>
<dc:type
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
<dc:title></dc:title>
</cc:Work>
</rdf:RDF>
</metadata>
<g
inkscape:label="Layer 1"
inkscape:groupmode="layer"
id="layer1"
transform="translate(-51.915483,-10.632538)">
<g
inkscape:corner7="-0.091655035 : 0.066875827 : 0.25 : 1"
inkscape:corner0="0.2311558 : 0.078514233 : 0 : 1"
inkscape:perspectiveID="#perspective3817"
id="g3815"
sodipodi:type="inkscape:box3d">
<path
points="345.49477,-34.871325 425.05445,-25.867869 425.05445,-55.566078 345.49477,-70.600089 "
d="m 345.49477,-70.600089 v 35.728764 l 79.55968,9.003456 v -29.698209 z"
inkscape:box3dsidetype="6"
style="fill:#353564;fill-rule:evenodd;stroke:none;stroke-linejoin:round"
id="path3803"
sodipodi:type="inkscape:box3dside" />
<path
points="537.05691,-76.198873 537.05691,-38.224283 425.05445,-25.867869 425.05445,-55.566078 "
d="m 425.05445,-55.566078 112.00246,-20.632795 v 37.97459 l -112.00246,12.356414 z"
inkscape:box3dsidetype="11"
style="fill:#e9e9ff;fill-rule:evenodd;stroke:none;stroke-linejoin:round"
id="path3805"
sodipodi:type="inkscape:box3dside" />
<path
points="460.049,-102.2544 537.05691,-76.198873 425.05445,-55.566078 345.49477,-70.600089 "
d="M 345.49477,-70.600089 460.049,-102.2544 l 77.00791,26.055527 -112.00246,20.632795 z"
inkscape:box3dsidetype="5"
style="fill:#4d4d9f;fill-rule:evenodd;stroke:none;stroke-linejoin:round"
id="path3807"
sodipodi:type="inkscape:box3dside" />
<path
points="460.049,-53.828225 537.05691,-38.224283 425.05445,-25.867869 345.49477,-34.871325 "
d="m 345.49477,-34.871325 114.55423,-18.9569 77.00791,15.603942 -112.00246,12.356414 z"
inkscape:box3dsidetype="13"
style="fill:#afafde;fill-rule:evenodd;stroke:none;stroke-linejoin:round"
id="path3809"
sodipodi:type="inkscape:box3dside" />
<path
points="460.049,-53.828225 537.05691,-38.224283 537.05691,-76.198873 460.049,-102.2544 "
d="m 460.049,-102.2544 v 48.426175 l 77.00791,15.603942 v -37.97459 z"
inkscape:box3dsidetype="14"
style="fill:#d7d7ff;fill-rule:evenodd;stroke:none;stroke-linejoin:round"
id="path3811"
sodipodi:type="inkscape:box3dside" />
<path
points="460.049,-102.2544 460.049,-53.828225 345.49477,-34.871325 345.49477,-70.600089 "
d="M 345.49477,-70.600089 460.049,-102.2544 v 48.426175 l -114.55423,18.9569 z"
inkscape:box3dsidetype="3"
style="fill:#8686bf;fill-rule:evenodd;stroke:none;stroke-linejoin:round"
id="path3813"
sodipodi:type="inkscape:box3dside" />
</g>
<g
inkscape:corner7="-0.067801071 : 0.080724505 : 0.25 : 1"
inkscape:corner0="0.29810879 : 0.093471507 : 0 : 1"
inkscape:perspectiveID="#perspective3883"
id="g3881"
sodipodi:type="inkscape:box3d">
<path
points="322.62977,-102.51699 401.43343,-87.835826 401.43343,-118.95615 322.62977,-139.63071 "
d="m 322.62977,-139.63071 v 37.11372 l 78.80366,14.681164 v -31.120324 z"
inkscape:box3dsidetype="6"
style="fill:#353564;fill-rule:evenodd;stroke:none;stroke-linejoin:round"
id="path3869"
sodipodi:type="inkscape:box3dside" />
<path
points="520.44801,-152.18313 520.44801,-111.43056 401.43343,-87.835826 401.43343,-118.95615 "
d="m 401.43343,-118.95615 119.01458,-33.22698 v 40.75257 l -119.01458,23.594734 z"
inkscape:box3dsidetype="11"
style="fill:#e9e9ff;fill-rule:evenodd;stroke:none;stroke-linejoin:round"
id="path3871"
sodipodi:type="inkscape:box3dside" />
<path
points="442.62977,-189.88386 520.44801,-152.18313 401.43343,-118.95615 322.62977,-139.63071 "
d="m 322.62977,-139.63071 120,-50.25315 77.81824,37.70073 -119.01458,33.22698 z"
inkscape:box3dsidetype="5"
style="fill:#4d4d9f;fill-rule:evenodd;stroke:none;stroke-linejoin:round"
id="path3873"
sodipodi:type="inkscape:box3dside" />
<path
points="442.62977,-138.20214 520.44801,-111.43056 401.43343,-87.835826 322.62977,-102.51699 "
d="m 322.62977,-102.51699 120,-35.68515 77.81824,26.77158 -119.01458,23.594734 z"
inkscape:box3dsidetype="13"
style="fill:#afafde;fill-rule:evenodd;stroke:none;stroke-linejoin:round"
id="path3875"
sodipodi:type="inkscape:box3dside" />
<path
points="442.62977,-138.20214 520.44801,-111.43056 520.44801,-152.18313 442.62977,-189.88386 "
d="m 442.62977,-189.88386 v 51.68172 l 77.81824,26.77158 v -40.75257 z"
inkscape:box3dsidetype="14"
style="fill:#d7d7ff;fill-rule:evenodd;stroke:none;stroke-linejoin:round"
id="path3877"
sodipodi:type="inkscape:box3dside" />
<path
points="442.62977,-189.88386 442.62977,-138.20214 322.62977,-102.51699 322.62977,-139.63071 "
d="m 322.62977,-139.63071 120,-50.25315 v 51.68172 l -120,35.68515 z"
inkscape:box3dsidetype="3"
style="fill:#8686bf;fill-rule:evenodd;stroke:none;stroke-linejoin:round"
id="path3879"
sodipodi:type="inkscape:box3dside" />
</g>
<g
id="g4791"
transform="translate(13.229163,-23.812503)"
style="fill:#ffffff;fill-opacity:1">
<rect
y="87.979164"
x="63.5"
height="10.583336"
width="42.333332"
id="rect3925"
style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:0.5;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" />
<rect
y="77.395836"
x="63.500004"
height="10.583337"
width="42.333332"
id="rect3925-2"
style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:0.5;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" />
<rect
y="66.8125"
x="63.499996"
height="10.583337"
width="42.333332"
id="rect3925-0"
style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:0.5;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" />
<rect
y="56.229168"
x="63.499996"
height="10.583337"
width="42.333332"
id="rect3925-23"
style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:0.5;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" />
</g>
<path
style="fill:none;stroke:#000000;stroke-width:0.5;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
d="m 60.854159,48.291665 15.875,-15.875"
id="path4862"
inkscape:connector-curvature="0" />
<path
style="fill:none;stroke:#000000;stroke-width:0.5;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
d="m 103.18749,48.291666 15.875,-15.875001"
id="path4862-1"
inkscape:connector-curvature="0" />
<path
style="fill:none;stroke:#000000;stroke-width:0.5;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
d="m 103.18749,90.624998 15.875,-15.875"
id="path4862-2"
inkscape:connector-curvature="0" />
<path
style="fill:none;stroke:#000000;stroke-width:0.5;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
d="m 103.18749,80.041668 15.875,-15.875"
id="path4862-9"
inkscape:connector-curvature="0" />
<path
style="fill:none;stroke:#000000;stroke-width:0.5;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
d="m 103.18749,69.458328 15.875,-15.875"
id="path4862-3"
inkscape:connector-curvature="0" />
<path
style="fill:none;stroke:#000000;stroke-width:0.5;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
d="m 103.18749,58.874998 15.875,-15.874999"
id="path4862-19"
inkscape:connector-curvature="0" />
<path
style="fill:none;stroke:#000000;stroke-width:0.5;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
d="m 60.854158,58.874998 15.875001,-15.875"
id="path4862-4"
inkscape:connector-curvature="0" />
<text
xml:space="preserve"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:10.58333302px;line-height:1.25;font-family:'URW Gothic L';-inkscape-font-specification:'URW Gothic L';letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332"
x="90.045952"
y="15.407738"
id="text4926"><tspan
sodipodi:role="line"
x="90.926521"
y="15.407738"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:6.3499999px;line-height:10.58333302px;font-family:'URW Gothic L';-inkscape-font-specification:'URW Gothic L';text-align:center;text-anchor:middle;stroke-width:0.26458332"
id="tspan4928">Astaroth multi-GPU node </tspan><tspan
sodipodi:role="line"
x="90.045952"
y="28.636904"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:6.3499999px;line-height:10.58333302px;font-family:'URW Gothic L';-inkscape-font-specification:'URW Gothic L';text-align:center;text-anchor:middle;stroke-width:0.26458332"
id="tspan4930">domain decomposition</tspan></text>
</g>
<g
inkscape:groupmode="layer"
id="layer2"
inkscape:label="Layer 2"
transform="translate(-51.915483,-10.632538)">
<g
id="g4791-8"
transform="translate(-2.6458431,-7.9375045)"
style="fill:#ffffff;fill-opacity:1">
<rect
y="87.979164"
x="63.5"
height="10.583336"
width="42.333332"
id="rect3925-9"
style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:0.5;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" />
<rect
y="77.395836"
x="63.500004"
height="10.583337"
width="42.333332"
id="rect3925-2-7"
style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:0.5;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" />
<rect
y="66.8125"
x="63.499996"
height="10.583337"
width="42.333332"
id="rect3925-0-3"
style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:0.5;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" />
<rect
y="56.229168"
x="63.499996"
height="10.583337"
width="42.333332"
id="rect3925-23-6"
style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:0.5;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" />
</g>
<text
xml:space="preserve"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:10.58333302px;line-height:1.25;font-family:'URW Gothic L';-inkscape-font-specification:'URW Gothic L';letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332"
x="76.351189"
y="55.28421"
id="text4926-2"><tspan
sodipodi:role="line"
x="76.351189"
y="55.28421"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:4.23333311px;font-family:'URW Gothic L';-inkscape-font-specification:'URW Gothic L';stroke-width:0.26458332"
id="tspan4930-0">GPU 0</tspan></text>
<text
xml:space="preserve"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:10.58333302px;line-height:1.25;font-family:'URW Gothic L';-inkscape-font-specification:'URW Gothic L';letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332"
x="75.973213"
y="66.245522"
id="text4926-2-9"><tspan
sodipodi:role="line"
x="75.973213"
y="66.245522"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:4.23333311px;font-family:'URW Gothic L';-inkscape-font-specification:'URW Gothic L';stroke-width:0.26458332"
id="tspan4930-0-9">GPU 1</tspan></text>
<text
xml:space="preserve"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:10.58333302px;line-height:1.25;font-family:'URW Gothic L';-inkscape-font-specification:'URW Gothic L';letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332"
x="75.784225"
y="76.072899"
id="text4926-2-4"><tspan
sodipodi:role="line"
x="75.784225"
y="76.072899"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:4.23333311px;font-family:'URW Gothic L';-inkscape-font-specification:'URW Gothic L';stroke-width:0.26458332"
id="tspan4930-0-5">GPU 2</tspan></text>
<text
xml:space="preserve"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:10.58333302px;line-height:1.25;font-family:'URW Gothic L';-inkscape-font-specification:'URW Gothic L';letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332"
x="75.973213"
y="86.656235"
id="text4926-2-1"><tspan
sodipodi:role="line"
x="75.973213"
y="86.656235"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:4.23333311px;font-family:'URW Gothic L';-inkscape-font-specification:'URW Gothic L';stroke-width:0.26458332"
id="tspan4930-0-0">GPU 3</tspan></text>
<text
xml:space="preserve"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:10.58333302px;line-height:1.25;font-family:'URW Gothic L';-inkscape-font-specification:'URW Gothic L';letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332"
x="51.59375"
y="70.970222"
id="text4926-2-3"><tspan
sodipodi:role="line"
x="51.59375"
y="70.970222"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:4.23333311px;font-family:'URW Gothic L';-inkscape-font-specification:'URW Gothic L';stroke-width:0.26458332"
id="tspan4930-0-7">Nz</tspan></text>
<text
xml:space="preserve"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:10.58333302px;line-height:1.25;font-family:'URW Gothic L';-inkscape-font-specification:'URW Gothic L';letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332"
x="79.563988"
y="96.48362"
id="text4926-2-3-8"><tspan
sodipodi:role="line"
x="79.563988"
y="96.48362"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:4.23333311px;font-family:'URW Gothic L';-inkscape-font-specification:'URW Gothic L';stroke-width:0.26458332"
id="tspan4930-0-7-8">Nx</tspan></text>
<text
xml:space="preserve"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:10.58333302px;line-height:1.25;font-family:'URW Gothic L';-inkscape-font-specification:'URW Gothic L';letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332"
x="114.71577"
y="87.223206"
id="text4926-2-3-6"><tspan
sodipodi:role="line"
x="114.71577"
y="87.223206"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:4.23333311px;font-family:'URW Gothic L';-inkscape-font-specification:'URW Gothic L';stroke-width:0.26458332"
id="tspan4930-0-7-0">Ny</tspan></text>
</g>
</svg>

After

Width:  |  Height:  |  Size: 19 KiB

View File

@@ -45,6 +45,8 @@ typedef struct {
#endif // __CUDACC__ #endif // __CUDACC__
// Library flags // Library flags
#define STENCIL_ORDER (6)
#define NGHOST (STENCIL_ORDER / 2)
#define VERBOSE_PRINTING (1) #define VERBOSE_PRINTING (1)
// Built-in types and parameters // Built-in types and parameters
@@ -215,6 +217,9 @@ acVertexBufferIdx(const int i, const int j, const int k, const AcMeshInfo info)
k * info.int_params[AC_mx] * info.int_params[AC_my]; k * info.int_params[AC_mx] * info.int_params[AC_my];
} }
/** Prints all parameters inside AcMeshInfo */
void acPrintMeshInfo(const AcMeshInfo config);
/* /*
static inline int static inline int
acGetParam(const AcMeshInfo info, const AcIntParam param) acGetParam(const AcMeshInfo info, const AcIntParam param)

View File

@@ -46,8 +46,24 @@ AcResult acDeviceSynchronizeStream(const Device device, const Stream stream);
AcResult acDeviceSwapBuffers(const Device device); AcResult acDeviceSwapBuffers(const Device device);
/** */ /** */
AcResult acDeviceLoadConstant(const Device device, const Stream stream, const AcRealParam param, AcResult acDeviceLoadScalarConstant(const Device device, const Stream stream,
const AcReal value); const AcRealParam param, const AcReal value);
/** */
AcResult acDeviceLoadVectorConstant(const Device device, const Stream stream,
const AcReal3Param param, const AcReal3 value);
/** */
AcResult acDeviceLoadIntConstant(const Device device, const Stream stream, const AcIntParam param,
const int value);
/** */
AcResult acDeviceLoadInt3Constant(const Device device, const Stream stream, const AcInt3Param param,
const int3 value);
/** */
AcResult acDeviceLoadMeshInfo(const Device device, const Stream stream,
const AcMeshInfo device_config);
/** */ /** */
AcResult acDeviceLoadVertexBufferWithOffset(const Device device, const Stream stream, AcResult acDeviceLoadVertexBufferWithOffset(const Device device, const Stream stream,

View File

@@ -8,17 +8,18 @@ fi
KERNEL_DIR=${AC_HOME}"/src/core/kernels" KERNEL_DIR=${AC_HOME}"/src/core/kernels"
ACC_DIR=${AC_HOME}"/acc" ACC_DIR=${AC_HOME}"/acc"
ACC_DEFAULT_HEADER="mhd_solver/stencil_defines.h"
ACC_DEFAULT_SAS="mhd_solver/stencil_assembly.sas" ACC_DEFAULT_SAS="mhd_solver/stencil_assembly.sas"
ACC_DEFAULT_SPS="mhd_solver/stencil_process.sps" ACC_DEFAULT_SPS="mhd_solver/stencil_process.sps"
ACC_DEFAULT_HEADER="mhd_solver/stencil_definition.sdh"
ACC_DEFAULT_INCLUDE_DIR="mhd_solver"
${ACC_DIR}/clean.sh ${ACC_DIR}/clean.sh
${ACC_DIR}/build_acc.sh ${ACC_DIR}/build_acc.sh
ACC_HEADER=${ACC_DEFAULT_HEADER}
ACC_SAS=${ACC_DEFAULT_SAS} ACC_SAS=${ACC_DEFAULT_SAS}
ACC_SPS=${ACC_DEFAULT_SPS} ACC_SPS=${ACC_DEFAULT_SPS}
ACC_HEADER=${ACC_DEFAULT_HEADER}
ACC_INCLUDE_DIR=${ACC_DEFAULT_INCLUDE_DIR}
while [ "$#" -gt 0 ] while [ "$#" -gt 0 ]
do do
@@ -56,9 +57,17 @@ echo "Header file:" ${ACC_DIR}/${ACC_HEADER}
echo "Assembly file: ${ACC_DIR}/${ACC_SAS}" echo "Assembly file: ${ACC_DIR}/${ACC_SAS}"
echo "Process file: ${ACC_DIR}/${ACC_SPS}" echo "Process file: ${ACC_DIR}/${ACC_SPS}"
cd ${KERNEL_DIR} cd ${ACC_DIR}/${ACC_INCLUDE_DIR}
${ACC_DIR}/compile.sh ${ACC_DIR}/${ACC_SAS} ${ACC_DIR}/${ACC_HEADER} echo ${PWD}
${ACC_DIR}/compile.sh ${ACC_DIR}/${ACC_SPS} ${ACC_DIR}/${ACC_HEADER} ${ACC_DIR}/compile.sh ${ACC_DIR}/${ACC_SAS}
${ACC_DIR}/compile.sh ${ACC_DIR}/${ACC_SPS}
${ACC_DIR}/compile.sh ${ACC_DIR}/${ACC_HEADER}
echo "Linking: " ${ACC_DIR}/${ACC_HEADER} " -> " ${AC_HOME}/include/stencil_defines.h echo "Moving stencil_assembly.cuh -> ${AC_HOME}/src/core/kernels"
ln -sf ${ACC_DIR}/${ACC_HEADER} ${AC_HOME}/include/stencil_defines.h mv stencil_assembly.cuh ${AC_HOME}/src/core/kernels
echo "Moving stencil_process.cuh -> ${AC_HOME}/src/core/kernels"
mv stencil_process.cuh ${AC_HOME}/src/core/kernels
echo "Moving stencil_defines.cuh -> ${AC_HOME}/include"
mv stencil_defines.h ${AC_HOME}/include

View File

@@ -0,0 +1 @@
nvcc -E ../src/core/device.cu -I ../include -I ../ > preprocessed_device_files.pp

View File

@@ -36,6 +36,21 @@ const char* vtxbuf_names[] = {AC_FOR_VTXBUF_HANDLES(AC_GEN_STR)};
static const int num_nodes = 1; static const int num_nodes = 1;
static Node nodes[num_nodes]; static Node nodes[num_nodes];
void
acPrintMeshInfo(const AcMeshInfo config)
{
for (int i = 0; i < NUM_INT_PARAMS; ++i)
printf("[%s]: %d\n", intparam_names[i], config.int_params[i]);
for (int i = 0; i < NUM_INT3_PARAMS; ++i)
printf("[%s]: (%d, %d, %d)\n", int3param_names[i], config.int3_params[i].x,
config.int3_params[i].y, config.int3_params[i].z);
for (int i = 0; i < NUM_REAL_PARAMS; ++i)
printf("[%s]: %g\n", realparam_names[i], double(config.real_params[i]));
for (int i = 0; i < NUM_REAL3_PARAMS; ++i)
printf("[%s]: (%g, %g, %g)\n", real3param_names[i], double(config.real3_params[i].x),
double(config.real3_params[i].y), double(config.real3_params[i].z));
}
AcResult AcResult
acInit(const AcMeshInfo mesh_info) acInit(const AcMeshInfo mesh_info)
{ {

View File

@@ -39,35 +39,54 @@ typedef struct {
AcReal* out[NUM_VTXBUF_HANDLES]; AcReal* out[NUM_VTXBUF_HANDLES];
} VertexBufferArray; } VertexBufferArray;
struct device_s {
int id;
AcMeshInfo local_config;
// Concurrency
cudaStream_t streams[NUM_STREAM_TYPES];
// Memory
VertexBufferArray vba;
AcReal* reduce_scratchpad;
AcReal* reduce_result;
#if PACKED_DATA_TRANSFERS
// Declare memory for buffers needed for packed data transfers here
// AcReal* data_packing_buffer;
#endif
};
__constant__ AcMeshInfo d_mesh_info; __constant__ AcMeshInfo d_mesh_info;
static inline int __device__ static int __device__ __forceinline__
DCONST(const AcIntParam param) DCONST(const AcIntParam param)
{ {
return d_mesh_info.int_params[param]; return d_mesh_info.int_params[param];
} }
static inline int3 __device__ static int3 __device__ __forceinline__
DCONST(const AcInt3Param param) DCONST(const AcInt3Param param)
{ {
return d_mesh_info.int3_params[param]; return d_mesh_info.int3_params[param];
} }
static inline AcReal __device__ static AcReal __device__ __forceinline__
DCONST(const AcRealParam param) DCONST(const AcRealParam param)
{ {
return d_mesh_info.real_params[param]; return d_mesh_info.real_params[param];
} }
static inline AcReal3 __device__ static AcReal3 __device__ __forceinline__
DCONST(const AcReal3Param param) DCONST(const AcReal3Param param)
{ {
return d_mesh_info.real3_params[param]; return d_mesh_info.real3_params[param];
} }
constexpr VertexBufferHandle
DCONST(const VertexBufferHandle handle)
{
return handle;
}
#define DCONST_INT(x) DCONST(x) #define DCONST_INT(x) DCONST(x)
#define DCONST_INT3(x) DCONST(x) #define DCONST_INT3(x) DCONST(x)
#define DCONST_REAL(x) DCONST(x) #define DCONST_REAL(x) DCONST(x)
#define DCONST_REAL3(x) DCONST(x) #define DCONST_REAL3(x) DCONST(x)
//#define DCONST_INT(X) (d_mesh_info.int_params[X])
//#define DCONST_INT3(X) (d_mesh_info.int3_params[X])
//#define DCONST_REAL(X) (d_mesh_info.real_params[X])
//#define DCONST_REAL3(X) (d_mesh_info.real3_params[X])
#define DEVICE_VTXBUF_IDX(i, j, k) ((i) + (j)*DCONST_INT(AC_mx) + (k)*DCONST_INT(AC_mxy)) #define DEVICE_VTXBUF_IDX(i, j, k) ((i) + (j)*DCONST_INT(AC_mx) + (k)*DCONST_INT(AC_mxy))
#define DEVICE_1D_COMPDOMAIN_IDX(i, j, k) ((i) + (j)*DCONST_INT(AC_nx) + (k)*DCONST_INT(AC_nxy)) #define DEVICE_1D_COMPDOMAIN_IDX(i, j, k) ((i) + (j)*DCONST_INT(AC_nx) + (k)*DCONST_INT(AC_nxy))
#define globalGridN (d_mesh_info.int3_params[AC_global_grid_n]) #define globalGridN (d_mesh_info.int3_params[AC_global_grid_n])
@@ -88,26 +107,8 @@ static dim3 rk3_tpb(32, 1, 4);
// #include "kernels/pack_unpack.cuh" // #include "kernels/pack_unpack.cuh"
#endif #endif
struct device_s {
int id;
AcMeshInfo local_config;
// Concurrency
cudaStream_t streams[NUM_STREAM_TYPES];
// Memory
VertexBufferArray vba;
AcReal* reduce_scratchpad;
AcReal* reduce_result;
#if PACKED_DATA_TRANSFERS
// Declare memory for buffers needed for packed data transfers here
// AcReal* data_packing_buffer;
#endif
};
// clang-format off // clang-format off
static __global__ void dummy_kernel(void) {} static __global__ void dummy_kernel(void) { DCONST((AcIntParam)0); DCONST((AcInt3Param)0); DCONST((AcRealParam)0); DCONST((AcReal3Param)0); }
// clang-format on // clang-format on
AcResult AcResult
@@ -153,8 +154,7 @@ acDeviceCreate(const int id, const AcMeshInfo device_config, Device* device_hand
#endif #endif
// Device constants // Device constants
ERRCHK_CUDA_ALWAYS(cudaMemcpyToSymbol(d_mesh_info, &device_config, sizeof(device_config), 0, acDeviceLoadMeshInfo(device, STREAM_DEFAULT, device_config);
cudaMemcpyHostToDevice));
printf("Created device %d (%p)\n", device->id, device); printf("Created device %d (%p)\n", device->id, device);
*device_handle = device; *device_handle = device;
@@ -303,8 +303,9 @@ acDeviceAutoOptimize(const Device device)
cudaEventRecord(tstart); // ---------------------------------------- Timing start cudaEventRecord(tstart); // ---------------------------------------- Timing start
acDeviceLoadScalarConstant(device, STREAM_DEFAULT, AC_dt, FLT_EPSILON);
for (int i = 0; i < num_iterations; ++i) for (int i = 0; i < num_iterations; ++i)
solve<2><<<bpg, tpb>>>(start, end, device->vba, FLT_EPSILON); solve<2><<<bpg, tpb>>>(start, end, device->vba);
cudaEventRecord(tstop); // ----------------------------------------- Timing end cudaEventRecord(tstop); // ----------------------------------------- Timing end
cudaEventSynchronize(tstop); cudaEventSynchronize(tstop);
@@ -361,7 +362,7 @@ acDeviceSwapBuffers(const Device device)
} }
AcResult AcResult
acDeviceLoadConstant(const Device device, const Stream stream, const AcRealParam param, acDeviceLoadScalarConstant(const Device device, const Stream stream, const AcRealParam param,
const AcReal value) const AcReal value)
{ {
cudaSetDevice(device->id); cudaSetDevice(device->id);
@@ -371,6 +372,55 @@ acDeviceLoadConstant(const Device device, const Stream stream, const AcRealParam
return AC_SUCCESS; return AC_SUCCESS;
} }
AcResult
acDeviceLoadVectorConstant(const Device device, const Stream stream, const AcReal3Param param,
const AcReal3 value)
{
cudaSetDevice(device->id);
const size_t offset = (size_t)&d_mesh_info.real3_params[param] - (size_t)&d_mesh_info;
ERRCHK_CUDA(cudaMemcpyToSymbolAsync(d_mesh_info, &value, sizeof(value), offset,
cudaMemcpyHostToDevice, device->streams[stream]));
return AC_SUCCESS;
}
AcResult
acDeviceLoadIntConstant(const Device device, const Stream stream, const AcIntParam param,
const int value)
{
cudaSetDevice(device->id);
const size_t offset = (size_t)&d_mesh_info.int_params[param] - (size_t)&d_mesh_info;
ERRCHK_CUDA(cudaMemcpyToSymbolAsync(d_mesh_info, &value, sizeof(value), offset,
cudaMemcpyHostToDevice, device->streams[stream]));
return AC_SUCCESS;
}
AcResult
acDeviceLoadInt3Constant(const Device device, const Stream stream, const AcInt3Param param,
const int3 value)
{
cudaSetDevice(device->id);
const size_t offset = (size_t)&d_mesh_info.int3_params[param] - (size_t)&d_mesh_info;
ERRCHK_CUDA(cudaMemcpyToSymbolAsync(d_mesh_info, &value, sizeof(value), offset,
cudaMemcpyHostToDevice, device->streams[stream]));
return AC_SUCCESS;
}
AcResult
acDeviceLoadMeshInfo(const Device device, const Stream stream, const AcMeshInfo device_config)
{
cudaSetDevice(device->id);
ERRCHK_ALWAYS(device_config.int_params[AC_nx] == device->local_config.int_params[AC_nx]);
ERRCHK_ALWAYS(device_config.int_params[AC_ny] == device->local_config.int_params[AC_ny]);
ERRCHK_ALWAYS(device_config.int_params[AC_nz] == device->local_config.int_params[AC_nz]);
ERRCHK_ALWAYS(device_config.int_params[AC_multigpu_offset] ==
device->local_config.int_params[AC_multigpu_offset]);
ERRCHK_CUDA_ALWAYS(cudaMemcpyToSymbolAsync(d_mesh_info, &device_config, sizeof(device_config),
0, cudaMemcpyHostToDevice, device->streams[stream]));
return AC_SUCCESS;
}
AcResult AcResult
acDeviceLoadVertexBufferWithOffset(const Device device, const Stream stream, const AcMesh host_mesh, acDeviceLoadVertexBufferWithOffset(const Device device, const Stream stream, const AcMesh host_mesh,
const VertexBufferHandle vtxbuf_handle, const int3 src, const VertexBufferHandle vtxbuf_handle, const int3 src,
@@ -551,12 +601,13 @@ acDeviceIntegrateSubstep(const Device device, const Stream stream, const int ste
(unsigned int)ceil(n.y / AcReal(tpb.y)), // (unsigned int)ceil(n.y / AcReal(tpb.y)), //
(unsigned int)ceil(n.z / AcReal(tpb.z))); (unsigned int)ceil(n.z / AcReal(tpb.z)));
acDeviceLoadScalarConstant(device, stream, AC_dt, dt);
if (step_number == 0) if (step_number == 0)
solve<0><<<bpg, tpb, 0, device->streams[stream]>>>(start, end, device->vba, dt); solve<0><<<bpg, tpb, 0, device->streams[stream]>>>(start, end, device->vba);
else if (step_number == 1) else if (step_number == 1)
solve<1><<<bpg, tpb, 0, device->streams[stream]>>>(start, end, device->vba, dt); solve<1><<<bpg, tpb, 0, device->streams[stream]>>>(start, end, device->vba);
else else
solve<2><<<bpg, tpb, 0, device->streams[stream]>>>(start, end, device->vba, dt); solve<2><<<bpg, tpb, 0, device->streams[stream]>>>(start, end, device->vba);
ERRCHK_CUDA_KERNEL(); ERRCHK_CUDA_KERNEL();

View File

@@ -26,6 +26,8 @@
*/ */
#pragma once #pragma once
#include "src/core/math_utils.h"
#include <assert.h> #include <assert.h>
static __device__ __forceinline__ int static __device__ __forceinline__ int
@@ -321,65 +323,6 @@ read_data(const int i, const int j, const int k,
* ============================================================================= * =============================================================================
*/ */
static __host__ __device__ __forceinline__ AcReal3
operator-(const AcReal3& a, const AcReal3& b)
{
return (AcReal3){a.x - b.x, a.y - b.y, a.z - b.z};
}
static __host__ __device__ __forceinline__ AcReal3
operator+(const AcReal3& a, const AcReal3& b)
{
return (AcReal3){a.x + b.x, a.y + b.y, a.z + b.z};
}
static __host__ __device__ __forceinline__ AcReal3
operator-(const AcReal3& a)
{
return (AcReal3){-a.x, -a.y, -a.z};
}
static __host__ __device__ __forceinline__ AcReal3 operator*(const AcReal a, const AcReal3& b)
{
return (AcReal3){a * b.x, a * b.y, a * b.z};
}
static __host__ __device__ __forceinline__ AcReal
dot(const AcReal3& a, const AcReal3& b)
{
return a.x * b.x + a.y * b.y + a.z * b.z;
}
static __host__ __device__ __forceinline__ AcReal3
mul(const AcMatrix& aa, const AcReal3& x)
{
return (AcReal3){dot(aa.row[0], x), dot(aa.row[1], x), dot(aa.row[2], x)};
}
static __host__ __device__ __forceinline__ AcReal3
cross(const AcReal3& a, const AcReal3& b)
{
AcReal3 c;
c.x = a.y * b.z - a.z * b.y;
c.y = a.z * b.x - a.x * b.z;
c.z = a.x * b.y - a.y * b.x;
return c;
}
static __host__ __device__ __forceinline__ bool
is_valid(const AcReal& a)
{
return !isnan(a) && !isinf(a);
}
static __host__ __device__ __forceinline__ bool
is_valid(const AcReal3& a)
{
return is_valid(a.x) && is_valid(a.y) && is_valid(a.z);
}
/* /*
* ============================================================================= * =============================================================================
* Level 1 (Stencil Processing Stage) * Level 1 (Stencil Processing Stage)
@@ -642,4 +585,54 @@ read_out(const int idx, AcReal* __restrict__ field[], const int3 handle)
\ \
const int idx = IDX(vertexIdx.x, vertexIdx.y, vertexIdx.z); const int idx = IDX(vertexIdx.x, vertexIdx.y, vertexIdx.z);
// clang-format off
#define GEN_DEVICE_FUNC_HOOK(identifier) \
template <int step_number> \
AcResult acDeviceKernel_##identifier(const Device device, const Stream stream, \
const int3 start, const int3 end) \
{ \
cudaSetDevice(device->id); \
\
const dim3 tpb(32, 1, 4); \
\
const int3 n = end - start; \
const dim3 bpg((unsigned int)ceil(n.x / AcReal(tpb.x)), \
(unsigned int)ceil(n.y / AcReal(tpb.y)), \
(unsigned int)ceil(n.z / AcReal(tpb.z))); \
\
identifier<step_number> \
<<<bpg, tpb, 0, device->streams[stream]>>>(start, end, device->vba); \
ERRCHK_CUDA_KERNEL(); \
\
return AC_SUCCESS; \
}
/*
#define GEN_NODE_FUNC_HOOK(identifier) \
template <int step_number> \
AcResult acNodeKernel_##identifier(const Node node, const Stream stream, const int3 start, \
const int3 end) \
{ \
acNodeSynchronizeStream(node, stream); \
\
for (int i = 0; i < node->num_devices; ++i) { \
\
const int3 d0 = (int3){NGHOST, NGHOST, NGHOST + i * node->subgrid.n.z}; \
const int3 d1 = d0 + (int3){node->subgrid.n.x, node->subgrid.n.y, node->subgrid.n.z}; \
\
const int3 da = max(start, d0); \
const int3 db = min(end, d1); \
\
if (db.z >= da.z) { \
const int3 da_local = da - (int3){0, 0, i * node->subgrid.n.z}; \
const int3 db_local = db - (int3){0, 0, i * node->subgrid.n.z}; \
acDeviceKernel_ #identifier(node->devices[i], stream, isubstep, da_local, \
db_local, dt); \
} \
} \
return AC_SUCCESS; \
}
*/
// clang-format on
#include "stencil_process.cuh" #include "stencil_process.cuh"

View File

@@ -25,9 +25,9 @@
* *
*/ */
#pragma once #pragma once
#include <cmath> //#include <cmath>
using namespace std; // Potentially bad practice to declare namespace std here // using namespace std; // Potentially bad practice to declare namespace std here
// #include <math.h> // isnan, isinf // Overloads incorrect/bugged with GCC <= 6.0 #include <math.h> // isnan, isinf // Overloads incorrect/bugged with GCC <= 6.0
//#include <tgmath.h> // Even this does not work //#include <tgmath.h> // Even this does not work
#include <stdlib.h> // rand #include <stdlib.h> // rand
@@ -64,16 +64,6 @@ sum(const T& a, const T& b)
return a + b; return a + b;
} }
template <class T>
static inline const T
is_valid(const T& val)
{
if (isnan(val) || isinf(val))
return false;
else
return true;
}
template <class T> template <class T>
static inline const T static inline const T
clamp(const T& val, const T& min, const T& max) clamp(const T& val, const T& min, const T& max)
@@ -87,20 +77,85 @@ randr()
return AcReal(rand()) / AcReal(RAND_MAX); return AcReal(rand()) / AcReal(RAND_MAX);
} }
static inline int3
operator+(const int3& a, const int3& b)
{
return (int3){a.x + b.x, a.y + b.y, a.z + b.z};
}
static inline int3
operator-(const int3& a, const int3& b)
{
return (int3){a.x - b.x, a.y - b.y, a.z - b.z};
}
static inline bool static inline bool
is_power_of_two(const unsigned val) is_power_of_two(const unsigned val)
{ {
return val && !(val & (val - 1)); return val && !(val & (val - 1));
} }
#ifdef __CUDACC__
#define HOST_DEVICE_INLINE __host__ __device__ __forceinline__
#else
#define HOST_DEVICE_INLINE inline
#endif // __CUDACC__
static HOST_DEVICE_INLINE AcReal3
operator+(const AcReal3& a, const AcReal3& b)
{
return (AcReal3){a.x + b.x, a.y + b.y, a.z + b.z};
}
static HOST_DEVICE_INLINE int3
operator+(const int3& a, const int3& b)
{
return (int3){a.x + b.x, a.y + b.y, a.z + b.z};
}
static HOST_DEVICE_INLINE AcReal3
operator-(const AcReal3& a, const AcReal3& b)
{
return (AcReal3){a.x - b.x, a.y - b.y, a.z - b.z};
}
static HOST_DEVICE_INLINE int3
operator-(const int3& a, const int3& b)
{
return (int3){a.x - b.x, a.y - b.y, a.z - b.z};
}
static HOST_DEVICE_INLINE AcReal3
operator-(const AcReal3& a)
{
return (AcReal3){-a.x, -a.y, -a.z};
}
static HOST_DEVICE_INLINE AcReal3 operator*(const AcReal& a, const AcReal3& b)
{
return (AcReal3){a * b.x, a * b.y, a * b.z};
}
static HOST_DEVICE_INLINE AcReal
dot(const AcReal3& a, const AcReal3& b)
{
return a.x * b.x + a.y * b.y + a.z * b.z;
}
static HOST_DEVICE_INLINE AcReal3
mul(const AcMatrix& aa, const AcReal3& x)
{
return (AcReal3){dot(aa.row[0], x), dot(aa.row[1], x), dot(aa.row[2], x)};
}
static HOST_DEVICE_INLINE AcReal3
cross(const AcReal3& a, const AcReal3& b)
{
AcReal3 c;
c.x = a.y * b.z - a.z * b.y;
c.y = a.z * b.x - a.x * b.z;
c.z = a.x * b.y - a.y * b.x;
return c;
}
static HOST_DEVICE_INLINE bool
is_valid(const AcReal a)
{
return !isnan(a) && !isinf(a);
}
static HOST_DEVICE_INLINE bool
is_valid(const AcReal3& a)
{
return is_valid(a.x) && is_valid(a.y) && is_valid(a.z);
}

View File

@@ -429,7 +429,7 @@ acNodeLoadConstant(const Node node, const Stream stream, const AcRealParam param
acNodeSynchronizeStream(node, stream); acNodeSynchronizeStream(node, stream);
// #pragma omp parallel for // #pragma omp parallel for
for (int i = 0; i < node->num_devices; ++i) { for (int i = 0; i < node->num_devices; ++i) {
acDeviceLoadConstant(node->devices[i], stream, param, value); acDeviceLoadScalarConstant(node->devices[i], stream, param, value);
} }
return AC_SUCCESS; return AC_SUCCESS;
} }

View File

@@ -8,5 +8,5 @@ set(CMAKE_C_STANDARD_REQUIRED ON)
find_package(MPI REQUIRED) find_package(MPI REQUIRED)
add_executable(mpitest main.c) add_executable(mpitest main.c)
target_include_directories(mpitest PRIVATE ${MPI_C_INCLUDE_PATH}) target_include_directories(mpitest PRIVATE ${CMAKE_SOURCE_DIR}/src/standalone ${MPI_C_INCLUDE_PATH})
target_link_libraries(mpitest PRIVATE ${MPI_C_LIBRARIES} astaroth_core) target_link_libraries(mpitest astaroth_core astaroth_standalone ${MPI_C_LIBRARIES})

View File

@@ -16,13 +16,120 @@
You should have received a copy of the GNU General Public License You should have received a copy of the GNU General Public License
along with Astaroth. If not, see <http://www.gnu.org/licenses/>. along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
*/ */
/**
Running: mpirun -np <num processes> <executable>
*/
#undef NDEBUG // Assert always
#include <assert.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h>
#include "astaroth.h" #include "astaroth.h"
#include "autotest.h"
#include <mpi.h> #include <mpi.h>
// From Astaroth Standalone
#include "config_loader.h"
#include "model/host_memory.h"
static void
distribute_mesh(const AcMesh* src, AcMesh* dst)
{
MPI_Datatype datatype = MPI_FLOAT;
if (sizeof(AcReal) == 8)
datatype = MPI_DOUBLE;
int process_id, num_processes;
MPI_Comm_rank(MPI_COMM_WORLD, &process_id);
MPI_Comm_size(MPI_COMM_WORLD, &num_processes);
const size_t count = acVertexBufferSize(dst->info);
for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) {
// Communicate to self
if (process_id == 0) {
assert(src);
assert(dst);
memcpy(&dst->vertex_buffer[i][0], //
&src->vertex_buffer[i][0], //
count * sizeof(src->vertex_buffer[i][0]));
}
// Communicate to others
for (int j = 1; j < num_processes; ++j) {
if (process_id == 0) {
assert(src);
// Send
// TODO RECHECK THESE j INDICES
const size_t src_idx = j * dst->info.int_params[AC_mx] *
dst->info.int_params[AC_my] * src->info.int_params[AC_nz] /
num_processes;
MPI_Send(&src->vertex_buffer[i][src_idx], count, datatype, j, 0, MPI_COMM_WORLD);
}
else {
assert(dst);
// Recv
const size_t dst_idx = 0;
MPI_Status status;
MPI_Recv(&dst->vertex_buffer[i][dst_idx], count, datatype, 0, 0, MPI_COMM_WORLD,
&status);
}
}
}
}
static void
gather_mesh(const AcMesh* src, AcMesh* dst)
{
MPI_Datatype datatype = MPI_FLOAT;
if (sizeof(AcReal) == 8)
datatype = MPI_DOUBLE;
int process_id, num_processes;
MPI_Comm_rank(MPI_COMM_WORLD, &process_id);
MPI_Comm_size(MPI_COMM_WORLD, &num_processes);
size_t count = acVertexBufferSize(src->info);
for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) {
// Communicate to self
if (process_id == 0) {
assert(src);
assert(dst);
memcpy(&dst->vertex_buffer[i][0], //
&src->vertex_buffer[i][0], //
count * sizeof(AcReal));
}
// Communicate to others
for (int j = 1; j < num_processes; ++j) {
if (process_id == 0) {
// Recv
// const size_t dst_idx = j * acVertexBufferCompdomainSize(dst->info);
const size_t dst_idx = j * dst->info.int_params[AC_mx] *
dst->info.int_params[AC_my] * dst->info.int_params[AC_nz] /
num_processes;
assert(dst_idx + count <= acVertexBufferSize(dst->info));
MPI_Status status;
MPI_Recv(&dst->vertex_buffer[i][dst_idx], count, datatype, j, 0, MPI_COMM_WORLD,
&status);
}
else {
// Send
const size_t src_idx = 0;
assert(src_idx + count <= acVertexBufferSize(src->info));
MPI_Send(&src->vertex_buffer[i][src_idx], count, datatype, 0, 0, MPI_COMM_WORLD);
}
}
}
}
int int
main(void) main(void)
{ {
@@ -37,14 +144,39 @@ main(void)
MPI_Get_processor_name(processor_name, &name_len); MPI_Get_processor_name(processor_name, &name_len);
printf("Processor %s. Process %d of %d.\n", processor_name, process_id, num_processes); printf("Processor %s. Process %d of %d.\n", processor_name, process_id, num_processes);
AcMeshInfo info = { AcMeshInfo mesh_info;
.int_params[AC_nx] = 128, load_config(&mesh_info);
.int_params[AC_ny] = 64, update_config(&mesh_info);
.int_params[AC_nz] = 32,
}; AcMesh* main_mesh = NULL;
acInit(info); ModelMesh* model_mesh = NULL;
acIntegrate(0.1f); if (process_id == 0) {
acQuit(); main_mesh = acmesh_create(mesh_info);
acmesh_init_to(INIT_TYPE_RANDOM, main_mesh);
model_mesh = modelmesh_create(mesh_info);
acmesh_to_modelmesh(*main_mesh, model_mesh);
}
AcMeshInfo submesh_info = mesh_info;
submesh_info.int_params[AC_nz] /= num_processes;
update_config(&submesh_info);
AcMesh* submesh = acmesh_create(submesh_info);
/////////////////////
distribute_mesh(main_mesh, submesh);
gather_mesh(submesh, main_mesh);
/////////////////////////
// Autotest
bool is_acceptable = verify_meshes(*model_mesh, *main_mesh);
/////
acmesh_destroy(submesh);
if (process_id == 0) {
modelmesh_destroy(model_mesh);
acmesh_destroy(main_mesh);
}
MPI_Finalize(); MPI_Finalize();
return EXIT_SUCCESS; return EXIT_SUCCESS;

View File

@@ -25,10 +25,11 @@ add_compile_options(-pipe ${OpenMP_CXX_FLAGS})
add_compile_options(-Wall -Wextra -Werror -Wdouble-promotion -Wfloat-conversion)# -Wshadow) add_compile_options(-Wall -Wextra -Werror -Wdouble-promotion -Wfloat-conversion)# -Wshadow)
## Compile and link ## Compile and link
add_library(astaroth_standalone ${SOURCES}) add_library(astaroth_standalone STATIC ${SOURCES})
target_link_libraries(astaroth_standalone PRIVATE astaroth_core "${OpenMP_CXX_FLAGS}" ${SDL2_LIBRARY})
add_executable(ac_run main.cc) add_executable(ac_run main.cc)
target_link_libraries(ac_run PRIVATE astaroth_standalone astaroth_core "${OpenMP_CXX_FLAGS}" ${SDL2_LIBRARY}) target_link_libraries(ac_run PRIVATE astaroth_standalone)
# Define the config directory # Define the config directory
if (ALTER_CONF) if (ALTER_CONF)

View File

@@ -75,6 +75,12 @@ static const InitType test_cases[] = {INIT_TYPE_RANDOM, INIT_TYPE_XWAVE,
INIT_TYPE_GAUSSIAN_RADIAL_EXPL, INIT_TYPE_ABC_FLOW}; INIT_TYPE_GAUSSIAN_RADIAL_EXPL, INIT_TYPE_ABC_FLOW};
// #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0])) // #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
static inline bool
is_valid(const ModelScalar a)
{
return !isnan(a) && !isinf(a);
}
#if TEST_TYPE == \ #if TEST_TYPE == \
QUICK_TEST // REGULAR TEST START HERE QUICK_TEST // REGULAR TEST START HERE
// -------------------------------------------------------------------------------------------------------------- // --------------------------------------------------------------------------------------------------------------

View File

@@ -34,15 +34,6 @@
#include "src/core/errchk.h" #include "src/core/errchk.h"
#include "src/core/math_utils.h" #include "src/core/math_utils.h"
static inline void
print(const AcMeshInfo& config)
{
for (int i = 0; i < NUM_INT_PARAMS; ++i)
printf("[%s]: %d\n", intparam_names[i], config.int_params[i]);
for (int i = 0; i < NUM_REAL_PARAMS; ++i)
printf("[%s]: %g\n", realparam_names[i], double(config.real_params[i]));
}
/** /**
\brief Find the index of the keyword in names \brief Find the index of the keyword in names
\return Index in range 0...n if the keyword is in names. -1 if the keyword was \return Index in range 0...n if the keyword is in names. -1 if the keyword was
@@ -163,7 +154,7 @@ update_config(AcMeshInfo* config)
#if VERBOSE_PRINTING // Defined in astaroth.h #if VERBOSE_PRINTING // Defined in astaroth.h
printf("###############################################################\n"); printf("###############################################################\n");
printf("Config dimensions recalculated:\n"); printf("Config dimensions recalculated:\n");
print(*config); acPrintMeshInfo(*config);
printf("###############################################################\n"); printf("###############################################################\n");
#endif #endif
} }

View File

@@ -26,7 +26,9 @@
*/ */
#include "host_forcing.h" #include "host_forcing.h"
#include "src/core/math_utils.h" // #include "src/core/math_utils.h"
#include <cmath>
using namespace std;
// The is a wrapper for genering random numbers with a chosen system. // The is a wrapper for genering random numbers with a chosen system.
AcReal AcReal
@@ -36,7 +38,7 @@ get_random_number_01()
return AcReal(rand()) / AcReal(RAND_MAX); return AcReal(rand()) / AcReal(RAND_MAX);
} }
AcReal3 static AcReal3
cross(const AcReal3& a, const AcReal3& b) cross(const AcReal3& a, const AcReal3& b)
{ {
AcReal3 c; AcReal3 c;
@@ -48,13 +50,13 @@ cross(const AcReal3& a, const AcReal3& b)
return c; return c;
} }
AcReal static AcReal
dot(const AcReal3& a, const AcReal3& b) dot(const AcReal3& a, const AcReal3& b)
{ {
return a.x * b.x + a.y * b.y + a.z * b.z; return a.x * b.x + a.y * b.y + a.z * b.z;
} }
AcReal3 static AcReal3
vec_norm(const AcReal3& a) vec_norm(const AcReal3& a)
{ {
AcReal3 c; AcReal3 c;
@@ -67,7 +69,7 @@ vec_norm(const AcReal3& a)
return c; return c;
} }
AcReal3 static AcReal3
vec_multi_scal(const AcReal scal, const AcReal3& a) vec_multi_scal(const AcReal scal, const AcReal3& a)
{ {
AcReal3 c; AcReal3 c;

View File

@@ -32,14 +32,6 @@
AcReal get_random_number_01(); AcReal get_random_number_01();
AcReal3 cross(const AcReal3& a, const AcReal3& b);
AcReal dot(const AcReal3& a, const AcReal3& b);
AcReal3 vec_norm(const AcReal3& a);
AcReal3 vec_multi_scal(const AcReal scal, const AcReal3& a);
AcReal3 helical_forcing_k_generator(const AcReal kmax, const AcReal kmin); AcReal3 helical_forcing_k_generator(const AcReal kmax, const AcReal kmin);
void helical_forcing_e_generator(AcReal3* e_force, const AcReal3 k_force); void helical_forcing_e_generator(AcReal3* e_force, const AcReal3 k_force);

View File

@@ -31,6 +31,16 @@
#include "host_memory.h" #include "host_memory.h"
#include "model_boundconds.h" #include "model_boundconds.h"
// Standalone flags
#define LDENSITY (1)
#define LHYDRO (1)
#define LMAGNETIC (1)
#define LENTROPY (1)
#define LTEMPERATURE (0)
#define LFORCING (1)
#define LUPWD (1)
#define AC_THERMAL_CONDUCTIVITY (AcReal(0.001)) // TODO: make an actual config parameter
typedef struct { typedef struct {
ModelScalar x, y, z; ModelScalar x, y, z;
} ModelVector; } ModelVector;

View File

@@ -27,6 +27,7 @@
#pragma once #pragma once
#include "astaroth.h" #include "astaroth.h"
#include "math.h"
typedef long double ModelScalar; typedef long double ModelScalar;