Merge branch 'master' into sink_20190723

Hopefully the merge isssues were resolved.
This commit is contained in:
Miikka Vaisala
2019-09-02 11:58:48 +08:00
29 changed files with 1364 additions and 592 deletions

View File

@@ -8,17 +8,21 @@ FILENAME="${FULL_NAME%.*}"
EXTENSION="${FULL_NAME##*.}"
if [ "${EXTENSION}" = "sas" ]; then
echo "Generating stencil assembly stage ${FILENAME}.sas -> stencil_assembly.cuh"
COMPILE_FLAGS="-sas" # Generate stencil assembly stage
CUH_FILENAME="stencil_assembly.cuh"
echo "Generating stencil assembly stage ${FILENAME}.sas -> ${CUH_FILENAME}"
elif [ "${EXTENSION}" = "sps" ]; then
echo "Generating stencil processing stage: ${FILENAME}.sps -> stencil_process.cuh"
COMPILE_FLAGS="-sps" # Generate stencil processing stage
CUH_FILENAME="stencil_process.cuh"
echo "Generating stencil processing stage: ${FILENAME}.sps -> ${CUH_FILENAME}"
elif [ "${EXTENSION}" = "sdh" ]; then
COMPILE_FLAGS="-sdh" # Generate stencil definition header
CUH_FILENAME="stencil_defines.h"
echo "Generating stencil definition header: ${FILENAME}.sdh -> ${CUH_FILENAME}"
else
echo "Error: unknown extension" ${EXTENSION} "of file" ${FULL_NAME}
echo "Extension should be either .sas or .sps"
echo "Extension should be either .sas, .sps or .sdh"
exit
fi
${ACC_DIR}/preprocess.sh $2 $1 | ${ACC_DIR}/build/acc ${COMPILE_FLAGS} > ${CUH_FILENAME}
${ACC_DIR}/preprocess.sh $1 | ${ACC_DIR}/build/acc ${COMPILE_FLAGS} > ${CUH_FILENAME}

5
acc/mhd_solver/.gitignore vendored Normal file
View File

@@ -0,0 +1,5 @@
build
testbin
# Except this file
!.gitignore

View File

@@ -1,3 +1,5 @@
#include "stencil_definition.sdh"
Preprocessed Scalar
value(in ScalarField vertex)
{
@@ -7,9 +9,7 @@ value(in ScalarField vertex)
Preprocessed Vector
gradient(in ScalarField vertex)
{
return (Vector){derx(vertexIdx, vertex),
dery(vertexIdx, vertex),
derz(vertexIdx, vertex)};
return (Vector){derx(vertexIdx, vertex), dery(vertexIdx, vertex), derz(vertexIdx, vertex)};
}
#if LUPWD
@@ -17,46 +17,46 @@ gradient(in ScalarField vertex)
Preprocessed Scalar
der6x_upwd(in ScalarField vertex)
{
Scalar inv_ds = DCONST_REAL(AC_inv_dsx);
Scalar inv_ds = AC_inv_dsx;
return (Scalar){ Scalar(1.0/60.0)*inv_ds* (
- Scalar(20.0)* vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z]
+ Scalar(15.0)*(vertex[vertexIdx.x+1, vertexIdx.y, vertexIdx.z]
+ vertex[vertexIdx.x-1, vertexIdx.y, vertexIdx.z])
- Scalar( 6.0)*(vertex[vertexIdx.x+2, vertexIdx.y, vertexIdx.z]
+ vertex[vertexIdx.x-2, vertexIdx.y, vertexIdx.z])
+ vertex[vertexIdx.x+3, vertexIdx.y, vertexIdx.z]
+ vertex[vertexIdx.x-3, vertexIdx.y, vertexIdx.z])};
return (Scalar){Scalar(1.0 / 60.0) * inv_ds *
(-Scalar(20.0) * vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z] +
Scalar(15.0) * (vertex[vertexIdx.x + 1, vertexIdx.y, vertexIdx.z] +
vertex[vertexIdx.x - 1, vertexIdx.y, vertexIdx.z]) -
Scalar(6.0) * (vertex[vertexIdx.x + 2, vertexIdx.y, vertexIdx.z] +
vertex[vertexIdx.x - 2, vertexIdx.y, vertexIdx.z]) +
vertex[vertexIdx.x + 3, vertexIdx.y, vertexIdx.z] +
vertex[vertexIdx.x - 3, vertexIdx.y, vertexIdx.z])};
}
Preprocessed Scalar
der6y_upwd(in ScalarField vertex)
{
Scalar inv_ds = DCONST_REAL(AC_inv_dsy);
Scalar inv_ds = AC_inv_dsy;
return (Scalar){ Scalar(1.0/60.0)*inv_ds* (
-Scalar( 20.0)* vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z]
+Scalar( 15.0)*(vertex[vertexIdx.x, vertexIdx.y+1, vertexIdx.z]
+ vertex[vertexIdx.x, vertexIdx.y-1, vertexIdx.z])
-Scalar( 6.0)*(vertex[vertexIdx.x, vertexIdx.y+2, vertexIdx.z]
+ vertex[vertexIdx.x, vertexIdx.y-2, vertexIdx.z])
+ vertex[vertexIdx.x, vertexIdx.y+3, vertexIdx.z]
+ vertex[vertexIdx.x, vertexIdx.y-3, vertexIdx.z])};
return (Scalar){Scalar(1.0 / 60.0) * inv_ds *
(-Scalar(20.0) * vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z] +
Scalar(15.0) * (vertex[vertexIdx.x, vertexIdx.y + 1, vertexIdx.z] +
vertex[vertexIdx.x, vertexIdx.y - 1, vertexIdx.z]) -
Scalar(6.0) * (vertex[vertexIdx.x, vertexIdx.y + 2, vertexIdx.z] +
vertex[vertexIdx.x, vertexIdx.y - 2, vertexIdx.z]) +
vertex[vertexIdx.x, vertexIdx.y + 3, vertexIdx.z] +
vertex[vertexIdx.x, vertexIdx.y - 3, vertexIdx.z])};
}
Preprocessed Scalar
der6z_upwd(in ScalarField vertex)
{
Scalar inv_ds = DCONST_REAL(AC_inv_dsz);
Scalar inv_ds = AC_inv_dsz;
return (Scalar){ Scalar(1.0/60.0)*inv_ds* (
-Scalar( 20.0)* vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z]
+Scalar( 15.0)*(vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z+1]
+ vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z-1])
-Scalar( 6.0)*(vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z+2]
+ vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z-2])
+ vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z+3]
+ vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z-3])};
return (Scalar){Scalar(1.0 / 60.0) * inv_ds *
(-Scalar(20.0) * vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z] +
Scalar(15.0) * (vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z + 1] +
vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z - 1]) -
Scalar(6.0) * (vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z + 2] +
vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z - 2]) +
vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z + 3] +
vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z - 3])};
}
#endif
@@ -66,9 +66,10 @@ hessian(in ScalarField vertex)
{
Matrix hessian;
hessian.row[0] = (Vector){derxx(vertexIdx, vertex), derxy(vertexIdx, vertex), derxz(vertexIdx, vertex)};
hessian.row[1] = (Vector){hessian.row[0].y, deryy(vertexIdx, vertex), deryz(vertexIdx, vertex)};
hessian.row[2] = (Vector){hessian.row[0].z, hessian.row[1].z, derzz(vertexIdx, vertex)};
hessian.row[0] = (Vector){derxx(vertexIdx, vertex), derxy(vertexIdx, vertex),
derxz(vertexIdx, vertex)};
hessian.row[1] = (Vector){hessian.row[0].y, deryy(vertexIdx, vertex), deryz(vertexIdx, vertex)};
hessian.row[2] = (Vector){hessian.row[0].z, hessian.row[1].z, derzz(vertexIdx, vertex)};
return hessian;
}

View File

@@ -1,181 +0,0 @@
/*
Copyright (C) 2014-2019, Johannes Pekkilae, Miikka Vaeisalae.
This file is part of Astaroth.
Astaroth is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Astaroth is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
/*
* =============================================================================
* Logical switches
* =============================================================================
*/
#define STENCIL_ORDER (6)
#define NGHOST (STENCIL_ORDER / 2)
#define LDENSITY (1)
#define LHYDRO (1)
#define LMAGNETIC (1)
#define LENTROPY (0)
#define LTEMPERATURE (0)
#define LFORCING (0)
#define LUPWD (1)
#define LSINK (1)
#define AC_THERMAL_CONDUCTIVITY (AcReal(0.001)) // TODO: make an actual config parameter
/*
* =============================================================================
* User-defined parameters
* =============================================================================
*/
// clang-format off
#define AC_FOR_USER_INT_PARAM_TYPES(FUNC)\
/* Other */\
FUNC(AC_max_steps), \
FUNC(AC_save_steps), \
FUNC(AC_bin_steps), \
FUNC(AC_bc_type),
#define AC_FOR_USER_INT3_PARAM_TYPES(FUNC)
#define AC_FOR_USER_REAL_PARAM_TYPES(FUNC)\
/* cparams */\
FUNC(AC_dsx), \
FUNC(AC_dsy), \
FUNC(AC_dsz), \
FUNC(AC_dsmin), \
/* physical grid*/\
FUNC(AC_xlen), \
FUNC(AC_ylen), \
FUNC(AC_zlen), \
FUNC(AC_xorig), \
FUNC(AC_yorig), \
FUNC(AC_zorig), \
/*Physical units*/\
FUNC(AC_unit_density),\
FUNC(AC_unit_velocity),\
FUNC(AC_unit_length),\
/* properties of gravitating star*/\
FUNC(AC_star_pos_x),\
FUNC(AC_star_pos_y),\
FUNC(AC_star_pos_z),\
FUNC(AC_M_star),\
/* properties of sink particle*/\
FUNC(AC_sink_pos_x),\
FUNC(AC_sink_pos_y),\
FUNC(AC_sink_pos_z),\
FUNC(AC_M_sink),\
FUNC(AC_M_sink_init),\
FUNC(AC_M_sink_Msun),\
FUNC(AC_soft),\
FUNC(AC_accretion_range),\
FUNC(AC_switch_accretion),\
/* Run params */\
FUNC(AC_cdt), \
FUNC(AC_cdtv), \
FUNC(AC_cdts), \
FUNC(AC_nu_visc), \
FUNC(AC_cs_sound), \
FUNC(AC_eta), \
FUNC(AC_mu0), \
FUNC(AC_cp_sound), \
FUNC(AC_gamma), \
FUNC(AC_cv_sound), \
FUNC(AC_lnT0), \
FUNC(AC_lnrho0), \
FUNC(AC_zeta), \
FUNC(AC_trans),\
/* Other */\
FUNC(AC_bin_save_t), \
/* Initial condition params */\
FUNC(AC_ampl_lnrho), \
FUNC(AC_ampl_uu), \
FUNC(AC_angl_uu), \
FUNC(AC_lnrho_edge),\
FUNC(AC_lnrho_out),\
/* Forcing parameters. User configured. */\
FUNC(AC_forcing_magnitude),\
FUNC(AC_relhel), \
FUNC(AC_kmin), \
FUNC(AC_kmax), \
/* Forcing parameters. Set by the generator. */\
FUNC(AC_forcing_phase),\
FUNC(AC_k_forcex),\
FUNC(AC_k_forcey),\
FUNC(AC_k_forcez),\
FUNC(AC_kaver),\
FUNC(AC_ff_hel_rex),\
FUNC(AC_ff_hel_rey),\
FUNC(AC_ff_hel_rez),\
FUNC(AC_ff_hel_imx),\
FUNC(AC_ff_hel_imy),\
FUNC(AC_ff_hel_imz),\
/* Additional helper params */\
/* (deduced from other params do not set these directly!) */\
FUNC(AC_G_const),\
FUNC(AC_unit_mass),\
FUNC(AC_GM_star),\
FUNC(AC_sq2GM_star),\
FUNC(AC_cs2_sound), \
FUNC(AC_inv_dsx), \
FUNC(AC_inv_dsy), \
FUNC(AC_inv_dsz),
#define AC_FOR_USER_REAL3_PARAM_TYPES(FUNC)
// clang-format on
/*
* =============================================================================
* User-defined vertex buffers
* =============================================================================
*/
// clang-format off
#if LENTROPY
#define AC_FOR_VTXBUF_HANDLES(FUNC) \
FUNC(VTXBUF_LNRHO), \
/*Added vertex buffer for sink. TODO: Invoke smarter.*/\
FUNC(VTXBUF_ACCRETION), \
FUNC(VTXBUF_UUX), \
FUNC(VTXBUF_UUY), \
FUNC(VTXBUF_UUZ), \
FUNC(VTXBUF_AX), \
FUNC(VTXBUF_AY), \
FUNC(VTXBUF_AZ), \
FUNC(VTXBUF_ENTROPY),
#elif LMAGNETIC
#define AC_FOR_VTXBUF_HANDLES(FUNC) \
FUNC(VTXBUF_LNRHO), \
/*Added vertex buffer for sink. TODO: Invoke smarter.*/\
FUNC(VTXBUF_ACCRETION), \
FUNC(VTXBUF_UUX), \
FUNC(VTXBUF_UUY), \
FUNC(VTXBUF_UUZ), \
FUNC(VTXBUF_AX), \
FUNC(VTXBUF_AY), \
FUNC(VTXBUF_AZ),
#elif LHYDRO
#define AC_FOR_VTXBUF_HANDLES(FUNC) \
FUNC(VTXBUF_LNRHO), \
/*Added vertex buffer for sink. TODO: Invoke smarter.*/\
FUNC(VTXBUF_ACCRETION), \
FUNC(VTXBUF_UUX), \
FUNC(VTXBUF_UUY), \
FUNC(VTXBUF_UUZ),
#else
#define AC_FOR_VTXBUF_HANDLES(FUNC) \
FUNC(VTXBUF_LNRHO),
#endif
// clang-format on

View File

@@ -0,0 +1,130 @@
#define LDENSITY (1)
#define LHYDRO (1)
#define LMAGNETIC (1)
#define LENTROPY (1)
#define LTEMPERATURE (0)
#define LFORCING (1)
#define LUPWD (1)
#define LSINK (1)
#define AC_THERMAL_CONDUCTIVITY (AcReal(0.001)) // TODO: make an actual config parameter
// Int params
uniform int AC_max_steps;
uniform int AC_save_steps;
uniform int AC_bin_steps;
uniform int AC_bc_type;
// Real params
uniform Scalar AC_dt;
// Spacing
uniform Scalar AC_dsx;
uniform Scalar AC_dsy;
uniform Scalar AC_dsz;
uniform Scalar AC_dsmin;
// physical grid
uniform Scalar AC_xlen;
uniform Scalar AC_ylen;
uniform Scalar AC_zlen;
uniform Scalar AC_xorig;
uniform Scalar AC_yorig;
uniform Scalar AC_zorig;
// Physical units
uniform Scalar AC_unit_density;
uniform Scalar AC_unit_velocity;
uniform Scalar AC_unit_length;
// properties of gravitating star
uniform Scalar AC_star_pos_x;
uniform Scalar AC_star_pos_y;
uniform Scalar AC_star_pos_z;
uniform Scalar AC_M_star;
// properties of sink particle
uniform Scalar AC_sink_pos_x;
uniform Scalar AC_sink_pos_y;
uniform Scalar AC_sink_pos_z;
uniform Scalar AC_M_sink;
uniform Scalar AC_M_sink_init;
uniform Scalar AC_M_sink_Msun;
uniform Scalar AC_soft;
uniform Scalar AC_accretion_range;
uniform Scalar AC_switch_accretion;
// Run params
uniform Scalar AC_cdt;
uniform Scalar AC_cdtv;
uniform Scalar AC_cdts;
uniform Scalar AC_nu_visc;
uniform Scalar AC_cs_sound;
uniform Scalar AC_eta;
uniform Scalar AC_mu0;
uniform Scalar AC_cp_sound;
uniform Scalar AC_gamma;
uniform Scalar AC_cv_sound;
uniform Scalar AC_lnT0;
uniform Scalar AC_lnrho0;
uniform Scalar AC_zeta;
uniform Scalar AC_trans;
// Other
uniform Scalar AC_bin_save_t;
// Initial condition params
uniform Scalar AC_ampl_lnrho;
uniform Scalar AC_ampl_uu;
uniform Scalar AC_angl_uu;
uniform Scalar AC_lnrho_edge;
uniform Scalar AC_lnrho_out;
// Forcing parameters. User configured.
uniform Scalar AC_forcing_magnitude;
uniform Scalar AC_relhel;
uniform Scalar AC_kmin;
uniform Scalar AC_kmax;
// Forcing parameters. Set by the generator.
uniform Scalar AC_forcing_phase;
uniform Scalar AC_k_forcex;
uniform Scalar AC_k_forcey;
uniform Scalar AC_k_forcez;
uniform Scalar AC_kaver;
uniform Scalar AC_ff_hel_rex;
uniform Scalar AC_ff_hel_rey;
uniform Scalar AC_ff_hel_rez;
uniform Scalar AC_ff_hel_imx;
uniform Scalar AC_ff_hel_imy;
uniform Scalar AC_ff_hel_imz;
// Additional helper params // (deduced from other params do not set these directly!)
uniform Scalar AC_G_CONST;
uniform Scalar AC_GM_star;
uniform Scalar AC_unit_mass;
uniform Scalar AC_sq2GM_star;
uniform Scalar AC_cs2_sound;
uniform Scalar AC_inv_dsx;
uniform Scalar AC_inv_dsy;
uniform Scalar AC_inv_dsz;
/*
* =============================================================================
* User-defined vertex buffers
* =============================================================================
*/
#if LENTROPY
uniform ScalarField VTXBUF_LNRHO;
uniform ScalarField VTXBUF_UUX;
uniform ScalarField VTXBUF_UUY;
uniform ScalarField VTXBUF_UUZ;
uniform ScalarField VTXBUF_AX;
uniform ScalarField VTXBUF_AY;
uniform ScalarField VTXBUF_AZ;
uniform ScalarField VTXBUF_ENTROPY;
#elif LMAGNETIC
uniform ScalarField VTXBUF_LNRHO;
uniform ScalarField VTXBUF_UUX;
uniform ScalarField VTXBUF_UUY;
uniform ScalarField VTXBUF_UUZ;
uniform ScalarField VTXBUF_AX;
uniform ScalarField VTXBUF_AY;
uniform ScalarField VTXBUF_AZ;
#elif LHYDRO
uniform ScalarField VTXBUF_LNRHO;
uniform ScalarField VTXBUF_UUX;
uniform ScalarField VTXBUF_UUY;
uniform ScalarField VTXBUF_UUZ;
#else
uniform ScalarField VTXBUF_LNRHO;
#endif

View File

@@ -1,26 +1,4 @@
// Declare uniforms (i.e. device constants)
uniform Scalar cs2_sound;
uniform Scalar nu_visc;
uniform Scalar cp_sound;
uniform Scalar cv_sound;
uniform Scalar mu0;
uniform Scalar eta;
uniform Scalar gamma;
uniform Scalar zeta;
uniform Scalar dsx;
uniform Scalar dsy;
uniform Scalar dsz;
uniform Scalar lnT0;
uniform Scalar lnrho0;
uniform int nx_min;
uniform int ny_min;
uniform int nz_min;
uniform int nx;
uniform int ny;
uniform int nz;
#include "stencil_definition.sdh"
Vector
value(in VectorField uu)
@@ -35,7 +13,7 @@ upwd_der6(in VectorField uu, in ScalarField lnrho)
Scalar uux = fabs(value(uu).x);
Scalar uuy = fabs(value(uu).y);
Scalar uuz = fabs(value(uu).z);
return (Scalar){uux*der6x_upwd(lnrho) + uuy*der6y_upwd(lnrho) + uuz*der6z_upwd(lnrho)};
return (Scalar){uux * der6x_upwd(lnrho) + uuy * der6y_upwd(lnrho) + uuz * der6z_upwd(lnrho)};
}
#endif
@@ -167,12 +145,12 @@ sink_accretion_velocity(int3 globalVertexIdx, in VectorField uu, Scalar dt) {
#endif
Scalar
continuity(int3 globalVertexIdx, in VectorField uu, in ScalarField lnrho, Scalar dt) {
continuity(int3 globalVertexIdx, in VectorField uu, in ScalarField lnrho, Scalar dt)
{
return -dot(value(uu), gradient(lnrho))
#if LUPWD
//This is a corrective hyperdiffusion term for upwinding.
// This is a corrective hyperdiffusion term for upwinding.
+ upwd_der6(uu, lnrho)
#endif
#if LSINK
@@ -185,148 +163,159 @@ continuity(int3 globalVertexIdx, in VectorField uu, in ScalarField lnrho, Scalar
#if LENTROPY
Vector
momentum(int3 globalVertexIdx, in VectorField uu, in ScalarField lnrho, in ScalarField ss, in VectorField aa, Scalar dt) {
const Matrix S = stress_tensor(uu);
const Scalar cs2 = cs2_sound * exp(gamma * value(ss) / cp_sound + (gamma - 1) * (value(lnrho) - lnrho0));
const Vector j = (Scalar(1.) / mu0) * (gradient_of_divergence(aa) - laplace_vec(aa)); // Current density
momentum(int3 globalVertexIdx, in VectorField uu, in ScalarField lnrho, in ScalarField ss, in VectorField aa, Scalar dt)
{
const Matrix S = stress_tensor(uu);
const Scalar cs2 = AC_cs2_sound * exp(AC_gamma * value(ss) / AC_cp_sound +
(AC_gamma - 1) * (value(lnrho) - AC_lnrho0));
const Vector j = (Scalar(1.) / AC_mu0) *
(gradient_of_divergence(aa) - laplace_vec(aa)); // Current density
const Vector B = curl(aa);
//TODO: DOES INTHERMAL VERSTION INCLUDE THE MAGNETIC FIELD?
// TODO: DOES INTHERMAL VERSTION INCLUDE THE MAGNETIC FIELD?
const Scalar inv_rho = Scalar(1.) / exp(value(lnrho));
// Regex replace CPU constants with get\(AC_([a-zA-Z_0-9]*)\)
// \1
const Vector mom = - mul(gradients(uu), value(uu))
- cs2 * ((Scalar(1.) / cp_sound) * gradient(ss) + gradient(lnrho))
+ inv_rho * cross(j, B)
+ nu_visc * (
laplace_vec(uu)
+ Scalar(1. / 3.) * gradient_of_divergence(uu)
+ Scalar(2.) * mul(S, gradient(lnrho))
)
+ zeta * gradient_of_divergence(uu)
const Vector mom = -mul(gradients(uu), value(uu)) -
cs2 * ((Scalar(1.) / AC_cp_sound) * gradient(ss) + gradient(lnrho)) +
inv_rho * cross(j, B) +
AC_nu_visc *
(laplace_vec(uu) + Scalar(1. / 3.) * gradient_of_divergence(uu) +
Scalar(2.) * mul(S, gradient(lnrho))) +
AC_zeta * gradient_of_divergence(uu)
#if LSINK
//Gravity term
+ sink_gravity(globalVertexIdx)
//Corresponding loss of momentum
- //(Scalar(1.0) / Scalar( (dsx*dsy*dsz) * exp(value(lnrho)))) * // Correction factor by unit mass
sink_accretion_velocity(globalVertexIdx, uu, dt) // As in Lee et al.(2014)
;
//Gravity term
+ sink_gravity(globalVertexIdx)
//Corresponding loss of momentum
- //(Scalar(1.0) / Scalar( (dsx*dsy*dsz) * exp(value(lnrho)))) * // Correction factor by unit mass
sink_accretion_velocity(globalVertexIdx, uu, dt) // As in Lee et al.(2014)
;
#else
;
;
#endif
return mom;
}
#elif LTEMPERATURE
Vector
momentum(int3 globalVertexIdx, in VectorField uu, in ScalarField lnrho, in ScalarField tt) {
momentum(int3 globalVertexIdx, in VectorField uu, in ScalarField lnrho, in ScalarField tt)
{
Vector mom;
const Matrix S = stress_tensor(uu);
const Vector pressure_term = (cp_sound - cv_sound) * (gradient(tt) + value(tt) * gradient(lnrho));
mom = -mul(gradients(uu), value(uu)) -
pressure_term +
nu_visc *
(laplace_vec(uu) + Scalar(1. / 3.) * gradient_of_divergence(uu) +
Scalar(2.) * mul(S, gradient(lnrho))) + zeta * gradient_of_divergence(uu)
const Matrix S = stress_tensor(uu);
const Vector pressure_term = (AC_cp_sound - AC_cv_sound) *
(gradient(tt) + value(tt) * gradient(lnrho));
mom = -mul(gradients(uu), value(uu)) - pressure_term +
AC_nu_visc * (laplace_vec(uu) + Scalar(1. / 3.) * gradient_of_divergence(uu) +
Scalar(2.) * mul(S, gradient(lnrho))) +
AC_zeta * gradient_of_divergence(uu)
#if LSINK
+ sink_gravity(globalVertexIdx);
+ sink_gravity(globalVertexIdx);
#else
;
;
#endif
return mom;
#if LGRAVITY
mom = mom - (Vector){0, 0, -10.0};
#endif
return mom;
}
#else
Vector
momentum(int3 globalVertexIdx, in VectorField uu, in ScalarField lnrho, Scalar dt) {
momentum(int3 globalVertexIdx, in VectorField uu, in ScalarField lnrho, Scalar dt)
{
Vector mom;
const Matrix S = stress_tensor(uu);
// Isothermal: we have constant speed of sound
mom = -mul(gradients(uu), value(uu)) -
cs2_sound * gradient(lnrho) +
nu_visc *
(laplace_vec(uu) + Scalar(1. / 3.) * gradient_of_divergence(uu) +
Scalar(2.) * mul(S, gradient(lnrho))) + zeta * gradient_of_divergence(uu)
mom = -mul(gradients(uu), value(uu)) - AC_cs2_sound * gradient(lnrho) +
AC_nu_visc * (laplace_vec(uu) + Scalar(1. / 3.) * gradient_of_divergence(uu) +
Scalar(2.) * mul(S, gradient(lnrho))) +
AC_zeta * gradient_of_divergence(uu)
#if LSINK
+ sink_gravity(globalVertexIdx);
//Corresponding loss of momentum
- //(Scalar(1.0) / Scalar( (dsx*dsy*dsz) * exp(value(lnrho)))) * // Correction factor by unit mass
sink_accretion_velocity(globalVertexIdx, uu, dt) // As in Lee et al.(2014)
;
+ sink_gravity(globalVertexIdx)
//Corresponding loss of momentum
- //(Scalar(1.0) / Scalar( (dsx*dsy*dsz) * exp(value(lnrho)))) * // Correction factor by unit mass
sink_accretion_velocity(globalVertexIdx, uu, dt) // As in Lee et al.(2014)
;
#else
;
;
#endif
return mom;
#if LGRAVITY
mom = mom - (Vector){0, 0, -10.0};
#endif
return mom;
}
#endif
Vector
induction(in VectorField uu, in VectorField aa) {
// Note: We do (-nabla^2 A + nabla(nabla dot A)) instead of (nabla x (nabla
// x A)) in order to avoid taking the first derivative twice (did the math,
// yes this actually works. See pg.28 in arXiv:astro-ph/0109497)
// u cross B - ETA * mu0 * (mu0^-1 * [- laplace A + grad div A ])
const Vector B = curl(aa);
const Vector grad_div = gradient_of_divergence(aa);
const Vector lap = laplace_vec(aa);
induction(in VectorField uu, in VectorField aa)
{
// Note: We do (-nabla^2 A + nabla(nabla dot A)) instead of (nabla x (nabla
// x A)) in order to avoid taking the first derivative twice (did the math,
// yes this actually works. See pg.28 in arXiv:astro-ph/0109497)
// u cross B - AC_eta * AC_mu0 * (AC_mu0^-1 * [- laplace A + grad div A ])
const Vector B = curl(aa);
const Vector grad_div = gradient_of_divergence(aa);
const Vector lap = laplace_vec(aa);
// Note, mu0 is cancelled out
const Vector ind = cross(value(uu), B) - eta * (grad_div - lap);
// Note, AC_mu0 is cancelled out
const Vector ind = cross(value(uu), B) - AC_eta * (grad_div - lap);
return ind;
return ind;
}
#if LENTROPY
Scalar
lnT( in ScalarField ss, in ScalarField lnrho) {
const Scalar lnT = lnT0 + gamma * value(ss) / cp_sound +
(gamma - Scalar(1.)) * (value(lnrho) - lnrho0);
return lnT;
lnT(in ScalarField ss, in ScalarField lnrho)
{
const Scalar lnT = AC_lnT0 + AC_gamma * value(ss) / AC_cp_sound +
(AC_gamma - Scalar(1.)) * (value(lnrho) - AC_lnrho0);
return lnT;
}
// Nabla dot (K nabla T) / (rho T)
Scalar
heat_conduction( in ScalarField ss, in ScalarField lnrho) {
const Scalar inv_cp_sound = AcReal(1.) / cp_sound;
heat_conduction(in ScalarField ss, in ScalarField lnrho)
{
const Scalar inv_AC_cp_sound = AcReal(1.) / AC_cp_sound;
const Vector grad_ln_chi = - gradient(lnrho);
const Vector grad_ln_chi = -gradient(lnrho);
const Scalar first_term = gamma * inv_cp_sound * laplace(ss) +
(gamma - AcReal(1.)) * laplace(lnrho);
const Vector second_term = gamma * inv_cp_sound * gradient(ss) +
(gamma - AcReal(1.)) * gradient(lnrho);
const Vector third_term = gamma * (inv_cp_sound * gradient(ss) +
gradient(lnrho)) + grad_ln_chi;
const Scalar first_term = AC_gamma * inv_AC_cp_sound * laplace(ss) +
(AC_gamma - AcReal(1.)) * laplace(lnrho);
const Vector second_term = AC_gamma * inv_AC_cp_sound * gradient(ss) +
(AC_gamma - AcReal(1.)) * gradient(lnrho);
const Vector third_term = AC_gamma * (inv_AC_cp_sound * gradient(ss) + gradient(lnrho)) +
grad_ln_chi;
const Scalar chi = AC_THERMAL_CONDUCTIVITY / (exp(value(lnrho)) * cp_sound);
return cp_sound * chi * (first_term + dot(second_term, third_term));
const Scalar chi = AC_THERMAL_CONDUCTIVITY / (exp(value(lnrho)) * AC_cp_sound);
return AC_cp_sound * chi * (first_term + dot(second_term, third_term));
}
Scalar
heating(const int i, const int j, const int k) {
return 1;
heating(const int i, const int j, const int k)
{
return 1;
}
Scalar
entropy(in ScalarField ss, in VectorField uu, in ScalarField lnrho, in VectorField aa) {
const Matrix S = stress_tensor(uu);
entropy(in ScalarField ss, in VectorField uu, in ScalarField lnrho, in VectorField aa)
{
const Matrix S = stress_tensor(uu);
const Scalar inv_pT = Scalar(1.) / (exp(value(lnrho)) * exp(lnT(ss, lnrho)));
const Vector j = (Scalar(1.) / mu0) * (gradient_of_divergence(aa) - laplace_vec(aa)); // Current density
const Scalar RHS = H_CONST - C_CONST
+ eta * (mu0) * dot(j, j)
+ Scalar(2.) * exp(value(lnrho)) * nu_visc * contract(S)
+ zeta * exp(value(lnrho)) * divergence(uu) * divergence(uu);
const Vector j = (Scalar(1.) / AC_mu0) *
(gradient_of_divergence(aa) - laplace_vec(aa)); // Current density
const Scalar RHS = H_CONST - C_CONST + AC_eta * (AC_mu0)*dot(j, j) +
Scalar(2.) * exp(value(lnrho)) * AC_nu_visc * contract(S) +
AC_zeta * exp(value(lnrho)) * divergence(uu) * divergence(uu);
return - dot(value(uu), gradient(ss))
+ inv_pT * RHS
+ heat_conduction(ss, lnrho);
return -dot(value(uu), gradient(ss)) + inv_pT * RHS + heat_conduction(ss, lnrho);
}
#endif
@@ -334,14 +323,15 @@ entropy(in ScalarField ss, in VectorField uu, in ScalarField lnrho, in VectorFie
Scalar
heat_transfer(in VectorField uu, in ScalarField lnrho, in ScalarField tt)
{
const Matrix S = stress_tensor(uu);
const Scalar heat_diffusivity_k = 0.0008; //8e-4;
return -dot(value(uu), gradient(tt)) + heat_diffusivity_k * laplace(tt) + heat_diffusivity_k * dot(gradient(lnrho), gradient(tt)) + nu_visc * contract(S) * (Scalar(1.) / cv_sound) - (gamma - 1) * value(tt) * divergence(uu);
const Matrix S = stress_tensor(uu);
const Scalar heat_diffusivity_k = 0.0008; // 8e-4;
return -dot(value(uu), gradient(tt)) + heat_diffusivity_k * laplace(tt) +
heat_diffusivity_k * dot(gradient(lnrho), gradient(tt)) +
AC_nu_visc * contract(S) * (Scalar(1.) / AC_cv_sound) -
(AC_gamma - 1) * value(tt) * divergence(uu);
}
#endif
#if LFORCING
Vector
simple_vortex_forcing(Vector a, Vector b, Scalar magnitude){
@@ -363,50 +353,40 @@ Vector
}
}
// The Pencil Code forcing_hel_noshear(), manual Eq. 222, inspired forcing function with adjustable helicity
// The Pencil Code forcing_hel_noshear(), manual Eq. 222, inspired forcing function with adjustable
// helicity
Vector
helical_forcing(Scalar magnitude, Vector k_force, Vector xx, Vector ff_re, Vector ff_im, Scalar phi)
{
int accretion_switch = DCONST_INT(AC_switch_accretion);
if (accretion_switch == 0){
// JP: This looks wrong:
// 1) Should it be AC_dsx * AC_nx instead of AC_dsx * AC_ny?
// 2) Should you also use globalGrid.n instead of the local n?
// MV: You are rigth. Made a quickfix. I did not see the error because multigpu is split
// in z direction not y direction.
// 3) Also final point: can we do this with vectors/quaternions instead?
// Tringonometric functions are much more expensive and inaccurate/
// MV: Good idea. No an immediate priority.
// Fun related article:
// https://randomascii.wordpress.com/2014/10/09/intel-underestimates-error-bounds-by-1-3-quintillion/
xx.x = xx.x * (2.0 * M_PI / (AC_dsx * globalGridN.x));
xx.y = xx.y * (2.0 * M_PI / (AC_dsy * globalGridN.y));
xx.z = xx.z * (2.0 * M_PI / (AC_dsz * globalGridN.z));
// JP: This looks wrong:
// 1) Should it be dsx * nx instead of dsx * ny?
// 2) Should you also use globalGrid.n instead of the local n?
// MV: You are rigth. Made a quickfix. I did not see the error because multigpu is split
// in z direction not y direction.
// 3) Also final point: can we do this with vectors/quaternions instead?
// Tringonometric functions are much more expensive and inaccurate/
// MV: Good idea. No an immediate priority.
// Fun related article:
// https://randomascii.wordpress.com/2014/10/09/intel-underestimates-error-bounds-by-1-3-quintillion/
xx.x = xx.x*(2.0*M_PI/(dsx*globalGridN.x));
xx.y = xx.y*(2.0*M_PI/(dsy*globalGridN.y));
xx.z = xx.z*(2.0*M_PI/(dsz*globalGridN.z));
Scalar cos_phi = cos(phi);
Scalar sin_phi = sin(phi);
Scalar cos_k_dot_x = cos(dot(k_force, xx));
Scalar sin_k_dot_x = sin(dot(k_force, xx));
// Phase affect only the x-component
//Scalar real_comp = cos_k_dot_x;
//Scalar imag_comp = sin_k_dot_x;
Scalar real_comp_phase = cos_k_dot_x*cos_phi - sin_k_dot_x*sin_phi;
Scalar imag_comp_phase = cos_k_dot_x*sin_phi + sin_k_dot_x*cos_phi;
Vector force = (Vector){ ff_re.x*real_comp_phase - ff_im.x*imag_comp_phase,
ff_re.y*real_comp_phase - ff_im.y*imag_comp_phase,
ff_re.z*real_comp_phase - ff_im.z*imag_comp_phase};
return force;
} else {
return (Vector){0,0,0};
}
Scalar cos_phi = cos(phi);
Scalar sin_phi = sin(phi);
Scalar cos_k_dot_x = cos(dot(k_force, xx));
Scalar sin_k_dot_x = sin(dot(k_force, xx));
// Phase affect only the x-component
// Scalar real_comp = cos_k_dot_x;
// Scalar imag_comp = sin_k_dot_x;
Scalar real_comp_phase = cos_k_dot_x * cos_phi - sin_k_dot_x * sin_phi;
Scalar imag_comp_phase = cos_k_dot_x * sin_phi + sin_k_dot_x * cos_phi;
Vector force = (Vector){ff_re.x * real_comp_phase - ff_im.x * imag_comp_phase,
ff_re.y * real_comp_phase - ff_im.y * imag_comp_phase,
ff_re.z * real_comp_phase - ff_im.z * imag_comp_phase};
return force;
}
Vector
@@ -458,12 +438,11 @@ in ScalarField lnrho(VTXBUF_LNRHO);
out ScalarField out_lnrho(VTXBUF_LNRHO);
in VectorField uu(VTXBUF_UUX, VTXBUF_UUY, VTXBUF_UUZ);
out VectorField out_uu(VTXBUF_UUX,VTXBUF_UUY,VTXBUF_UUZ);
out VectorField out_uu(VTXBUF_UUX, VTXBUF_UUY, VTXBUF_UUZ);
#if LMAGNETIC
in VectorField aa(VTXBUF_AX,VTXBUF_AY,VTXBUF_AZ);
out VectorField out_aa(VTXBUF_AX,VTXBUF_AY,VTXBUF_AZ);
in VectorField aa(VTXBUF_AX, VTXBUF_AY, VTXBUF_AZ);
out VectorField out_aa(VTXBUF_AX, VTXBUF_AY, VTXBUF_AZ);
#endif
#if LENTROPY
@@ -482,38 +461,36 @@ out Scalar out_accretion = VTXBUF_ACCRETION;
#endif
Kernel void
solve(Scalar dt) {
solve()
{
Scalar dt = AC_dt;
out_lnrho = rk3(out_lnrho, lnrho, continuity(globalVertexIdx, uu, lnrho, dt), dt);
#if LMAGNETIC
#if LMAGNETIC
out_aa = rk3(out_aa, aa, induction(uu, aa), dt);
#endif
#endif
#if LENTROPY
out_uu = rk3(out_uu, uu, momentum(globalVertexIdx, uu, lnrho, ss, aa, dt), dt);
out_ss = rk3(out_ss, ss, entropy(ss, uu, lnrho, aa), dt);
#elif LTEMPERATURE
out_uu = rk3(out_uu, uu, momentum(globalVertexIdx, uu, lnrho, tt), dt);
out_tt = rk3(out_tt, tt, heat_transfer(uu, lnrho, tt), dt);
#else
out_uu = rk3(out_uu, uu, momentum(globalVertexIdx, uu, lnrho, dt), dt);
#endif
#if LENTROPY
out_uu = rk3(out_uu, uu, momentum(uu, lnrho, ss, aa), dt);
out_ss = rk3(out_ss, ss, entropy(ss, uu, lnrho, aa), dt);
#elif LTEMPERATURE
out_uu = rk3(out_uu, uu, momentum(uu, lnrho, tt), dt);
out_tt = rk3(out_tt, tt, heat_transfer(uu, lnrho, tt), dt);
#else
out_uu = rk3(out_uu, uu, momentum(uu, lnrho), dt);
#endif
#if LFORCING
#if LFORCING
if (step_number == 2) {
out_uu = out_uu + forcing(globalVertexIdx, dt);
}
#endif
#if LSINK
// out_lnrho = log(exp(out_lnrho) - sink_accretion(globalVertexIdx, lnrho));
// out_accretion = value(accretion) + (sink_accretion(globalVertexIdx,lnrho) * dsx * dsy * dsz);
#endif
#if LSINK
out_accretion = rk3(out_accretion, accretion, sink_accretion(globalVertexIdx, lnrho, dt), dt);// unit now is rho!
// out_lnrho = log(exp(out_lnrho) - out_accretion);
if (step_number == 2) {
out_accretion = out_accretion * dsx * dsy * dsz;// unit is now mass!
}
//TODO: implement accretion correction to contiunity equation and momentum equation.
#endif
#endif
}

View File

@@ -15,7 +15,7 @@ L [a-zA-Z_]
"void" { return VOID; } /* Rest of the types inherited from C */
"int" { return INT; }
"int3" { return INT3; }
"ScalarField" { return SCALAR; }
"ScalarField" { return SCALARFIELD; }
"VectorField" { return VECTOR; }
"Kernel" { return KERNEL; } /* Function specifiers */

View File

@@ -16,7 +16,7 @@ int yyget_lineno();
%token CONSTANT IN OUT UNIFORM
%token IDENTIFIER NUMBER
%token RETURN
%token SCALAR VECTOR MATRIX
%token SCALAR VECTOR MATRIX SCALARFIELD
%token VOID INT INT3
%token IF ELSE FOR WHILE ELIF
%token LEQU LAND LOR LLEQU
@@ -209,6 +209,7 @@ type_specifier: VOID
| SCALAR { $$ = astnode_create(NODE_TYPE_SPECIFIER, NULL, NULL); $$->token = SCALAR; }
| VECTOR { $$ = astnode_create(NODE_TYPE_SPECIFIER, NULL, NULL); $$->token = VECTOR; }
| MATRIX { $$ = astnode_create(NODE_TYPE_SPECIFIER, NULL, NULL); $$->token = MATRIX; }
| SCALARFIELD { $$ = astnode_create(NODE_TYPE_SPECIFIER, NULL, NULL); $$->token = SCALARFIELD; }
;
identifier: IDENTIFIER { $$ = astnode_create(NODE_IDENTIFIER, NULL, NULL); astnode_set_buffer(yytext, $$); }

View File

@@ -36,7 +36,8 @@
ASTNode* root = NULL;
static const char inout_name_prefix[] = "handle_";
static bool doing_stencil_assembly = true;
typedef enum { STENCIL_ASSEMBLY, STENCIL_PROCESS, STENCIL_HEADER } CompilationType;
static CompilationType compilation_type;
/*
* =============================================================================
@@ -53,16 +54,17 @@ static const char* translation_table[TRANSLATION_TABLE_SIZE] = {
[WHILE] = "while",
[FOR] = "for",
// Type specifiers
[VOID] = "void",
[INT] = "int",
[INT3] = "int3",
[SCALAR] = "AcReal",
[VECTOR] = "AcReal3",
[MATRIX] = "AcMatrix",
[VOID] = "void",
[INT] = "int",
[INT3] = "int3",
[SCALAR] = "AcReal",
[VECTOR] = "AcReal3",
[MATRIX] = "AcMatrix",
[SCALARFIELD] = "AcReal",
// Type qualifiers
[KERNEL] = "template <int step_number> static "
"__global__", //__launch_bounds__(RK_THREADBLOCK_SIZE,
// RK_LAUNCH_BOUND_MIN_BLOCKS),
[KERNEL] = "template <int step_number> static __global__",
//__launch_bounds__(RK_THREADBLOCK_SIZE,
// RK_LAUNCH_BOUND_MIN_BLOCKS),
[PREPROCESSED] = "static __device__ "
"__forceinline__",
[CONSTANT] = "const",
@@ -212,11 +214,14 @@ translate_latest_symbol(void)
// FUNCTION PARAMETER
else if (symbol->type == SYMBOLTYPE_FUNCTION_PARAMETER) {
if (symbol->type_qualifier == IN || symbol->type_qualifier == OUT) {
if (doing_stencil_assembly)
if (compilation_type == STENCIL_ASSEMBLY)
printf("const __restrict__ %s* %s", translate(symbol->type_specifier),
symbol->identifier);
else
else if (compilation_type == STENCIL_PROCESS)
printf("const %sData& %s", translate(symbol->type_specifier), symbol->identifier);
else
printf("Invalid compilation type %d, IN and OUT qualifiers not supported\n",
compilation_type);
}
else {
print_symbol(handle);
@@ -224,14 +229,18 @@ translate_latest_symbol(void)
}
// UNIFORM
else if (symbol->type_qualifier == UNIFORM) {
// if (compilation_type != STENCIL_HEADER) {
// printf("ERROR: %s can only be used in stencil headers\n", translation_table[UNIFORM]);
//}
/* Do nothing */
}
// IN / OUT
else if (symbol->type != SYMBOLTYPE_FUNCTION_PARAMETER &&
(symbol->type_qualifier == IN || symbol->type_qualifier == OUT)) {
printf("static __device__ const %s %s%s", symbol->type_specifier == SCALAR ? "int" : "int3",
inout_name_prefix, symbol_table[handle].identifier);
printf("static __device__ const %s %s%s",
symbol->type_specifier == SCALARFIELD ? "int" : "int3", inout_name_prefix,
symbol_table[handle].identifier);
if (symbol->type_specifier == VECTOR)
printf(" = make_int3");
}
@@ -309,9 +318,15 @@ traverse(const ASTNode* node)
inside_kernel = true;
// Kernel parameter boilerplate
const char* kernel_parameter_boilerplate = "GEN_KERNEL_PARAM_BOILERPLATE, ";
if (inside_kernel && node->type == NODE_FUNCTION_PARAMETER_DECLARATION)
printf("%s ", kernel_parameter_boilerplate);
const char* kernel_parameter_boilerplate = "GEN_KERNEL_PARAM_BOILERPLATE";
if (inside_kernel && node->type == NODE_FUNCTION_PARAMETER_DECLARATION) {
printf("%s", kernel_parameter_boilerplate);
if (node->lhs != NULL) {
printf("Compilation error: function parameters for Kernel functions not allowed!\n");
exit(EXIT_FAILURE);
}
}
// Kernel builtin variables boilerplate (read input/output arrays and setup
// indices)
@@ -369,6 +384,8 @@ traverse(const ASTNode* node)
// printf("%s%s", inout_name_prefix, symbol->identifier);
//}
if (symbol->type_qualifier == UNIFORM) {
printf("DCONST(%s) ", symbol->identifier);
/*
if (symbol->type_specifier == SCALAR)
printf("DCONST_REAL(AC_%s) ", symbol->identifier);
else if (symbol->type_specifier == INT)
@@ -376,6 +393,7 @@ traverse(const ASTNode* node)
else
printf("INVALID UNIFORM type specifier %s with %s\n",
translate(symbol->type_specifier), symbol->identifier);
*/
}
else {
// Do a regular translation
@@ -545,14 +563,89 @@ generate_preprocessed_structures(void)
");
}
static void
generate_header(void)
{
printf("\n#pragma once\n");
// Int params
printf("#define AC_FOR_USER_INT_PARAM_TYPES(FUNC)");
for (int i = 0; i < num_symbols; ++i) {
if (symbol_table[i].type_specifier == INT) {
printf("\\\nFUNC(%s),", symbol_table[i].identifier);
}
}
printf("\n\n");
// Int3 params
printf("#define AC_FOR_USER_INT3_PARAM_TYPES(FUNC)");
for (int i = 0; i < num_symbols; ++i) {
if (symbol_table[i].type_specifier == INT3) {
printf("\\\nFUNC(%s),", symbol_table[i].identifier);
}
}
printf("\n\n");
// Scalar params
printf("#define AC_FOR_USER_REAL_PARAM_TYPES(FUNC)");
for (int i = 0; i < num_symbols; ++i) {
if (symbol_table[i].type_specifier == SCALAR) {
printf("\\\nFUNC(%s),", symbol_table[i].identifier);
}
}
printf("\n\n");
// Vector params
printf("#define AC_FOR_USER_REAL3_PARAM_TYPES(FUNC)");
for (int i = 0; i < num_symbols; ++i) {
if (symbol_table[i].type_specifier == VECTOR) {
printf("\\\nFUNC(%s),", symbol_table[i].identifier);
}
}
printf("\n\n");
// Scalar fields
printf("#define AC_FOR_VTXBUF_HANDLES(FUNC)");
for (int i = 0; i < num_symbols; ++i) {
if (symbol_table[i].type_specifier == SCALARFIELD) {
printf("\\\nFUNC(%s),", symbol_table[i].identifier);
}
}
printf("\n\n");
/*
printf("\n");
printf("typedef struct {\n");
for (int i = 0; i < num_symbols; ++i) {
if (symbol_table[i].type_qualifier == PREPROCESSED)
printf("%s %s;\n", translate(symbol_table[i].type_specifier),
symbol_table[i].identifier);
}
printf("} %sData;\n", translate(SCALAR));
*/
}
static void
generate_library_hooks(void)
{
for (int i = 0; i < num_symbols; ++i) {
if (symbol_table[i].type_qualifier == KERNEL) {
printf("GEN_DEVICE_FUNC_HOOK(%s)\n", symbol_table[i].identifier);
// printf("GEN_NODE_FUNC_HOOK(%s)\n", symbol_table[i].identifier);
}
}
}
int
main(int argc, char** argv)
{
if (argc == 2) {
if (!strcmp(argv[1], "-sas"))
doing_stencil_assembly = true;
compilation_type = STENCIL_ASSEMBLY;
else if (!strcmp(argv[1], "-sps"))
doing_stencil_assembly = false;
compilation_type = STENCIL_PROCESS;
else if (!strcmp(argv[1], "-sdh"))
compilation_type = STENCIL_HEADER;
else
printf("Unknown flag %s. Generating stencil assembly.\n", argv[1]);
}
@@ -560,8 +653,8 @@ main(int argc, char** argv)
printf("Usage: ./acc [flags]\n"
"Flags:\n"
"\t-sas - Generates code for the stencil assembly stage\n"
"\t-sps - Generates code for the stencil processing "
"stage\n");
"\t-sps - Generates code for the stencil processing stage\n"
"\t-hh - Generates stencil definitions from a header file\n");
printf("\n");
return EXIT_FAILURE;
}
@@ -576,8 +669,12 @@ main(int argc, char** argv)
// Traverse
traverse(root);
if (doing_stencil_assembly)
if (compilation_type == STENCIL_ASSEMBLY)
generate_preprocessed_structures();
else if (compilation_type == STENCIL_HEADER)
generate_header();
else if (compilation_type == STENCIL_PROCESS)
generate_library_hooks();
// print_symbol_table();

View File

@@ -0,0 +1,67 @@
# Astaroth specification and user manual
Copyright (C) 2014-2019, Johannes Pekkila, Miikka Vaisala.
Astaroth is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Astaroth is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Astaroth is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
# Introduction and background
Astaroth is a collection of tools for utilizing multiple graphics processing units (GPUs) efficiently in three-dimensional stencil computations. This document specifies the Astaroth application-programming interface (API) and domain-specific language (DSL).
Astaroth has been designed for the demands in computational sciences, where large stencils are often used to attain sufficient accuracy. The majority of previous work focuses on stencil computations with low-order stencils for which several efficient algorithms have been proposed, whereas work on high-order stencils is more limited. In addition, in computational physics multiple fields interact with each other, such as the velocity and magnetic fields of electrically conducting fluids. Such computations are especially challenging to solve efficiently because of the problem's relatively low operational intensity and the small caches provided by GPUs. Efficient methods for computations with several coupled fields and large stencils have not been addressed sufficiently in prior work.
With Astaroth, we have taken inspiration of image processing and graphics pipelines which rely on holding intermediate data in caches for the duration of computations, and extended the idea to work efficiently also with large three-dimensional stencils and an arbitrary number of coupled fields. As programming GPUs efficiently is relatively verbose and requires deep knowledge of the underlying hardware and execution model, we have created a high-level domain-specific language for expressing a wide range of tasks in computational sciences and provide a source-to-source compiler for translating stencil problems expressed in our language into efficient CUDA kernels.
The kernels generated from the Astaroth DSL are embedded in the Astaroth Core library, which is usable via the Astaroth API. While the Astaroth library is written in C++/CUDA, the API conforms to the C99 standard.
# Publications
The foundational work was done in (Väisälä, Pekkilä, 2017) and the library, API and DSL described in this document were introduced in (Pekkilä, 2019). We kindly wish the users of Astaroth to cite to these publications in their work.
> J. Pekkilä, Astaroth: A Library for Stencil Computations on Graphics Processing Units. Master's thesis, Aalto University School of Science, Espoo, Finland, 2019.
> M. S. Väisälä, Magnetic Phenomena of the Interstellar Medium in Theory and Observation. PhD thesis, University of Helsinki, Finland, 2017.
> J. Pekkilä, M. S. Väisälä, M. Käpylä, P. J. Käpylä, and O. Anjum, “Methods for compressible fluid simulation on GPUs using high-order finite differences, ”Computer Physics Communications, vol. 217, pp. 1122, Aug. 2017.
# Astaroth API
## Devices and nodes
## Meshes
## Streams and synchronization
## Interface
# Astaroth DSL
## Uniforms
## Vertex buffers
### Input and output buffers
## Built-in variables and functions

View File

@@ -0,0 +1,391 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!-- Created with Inkscape (http://www.inkscape.org/) -->
<svg
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:cc="http://creativecommons.org/ns#"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:svg="http://www.w3.org/2000/svg"
xmlns="http://www.w3.org/2000/svg"
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
width="76.050636mm"
height="85.851082mm"
viewBox="0 0 76.050636 85.851082"
version="1.1"
id="svg8"
inkscape:version="0.92.3 (2405546, 2018-03-11)"
sodipodi:docname="domain_decomposition.svg"
inkscape:export-filename="/home/mvaisala/astaroth/doc/domain_decomposition.svg.png"
inkscape:export-xdpi="233.7916"
inkscape:export-ydpi="233.7916">
<defs
id="defs2">
<inkscape:perspective
sodipodi:type="inkscape:persp3d"
inkscape:vp_x="-34.999999 : 849.45444 : 1"
inkscape:vp_y="0 : 3779.5274 : 0"
inkscape:vp_z="758.70081 : 849.45444 : 1"
inkscape:persp3d-origin="361.85039 : 662.36784 : 1"
id="perspective3883" />
<inkscape:perspective
sodipodi:type="inkscape:persp3d"
inkscape:vp_x="-28.760416 : 819.37885 : 1"
inkscape:vp_y="0 : 3779.5274 : 0"
inkscape:vp_z="764.94038 : 819.37885 : 1"
inkscape:persp3d-origin="368.08997 : 632.29225 : 1"
id="perspective3817" />
<inkscape:perspective
sodipodi:type="inkscape:persp3d"
inkscape:vp_x="139.37224 : 246.52176 : 1"
inkscape:vp_y="90.281989 : 563.44101 : 1"
inkscape:vp_z="193.82452 : 233.87492 : 1"
inkscape:persp3d-origin="90.281989 : 216.71184 : 1"
id="perspective3715" />
</defs>
<sodipodi:namedview
id="base"
pagecolor="#ffffff"
bordercolor="#666666"
borderopacity="1.0"
inkscape:pageopacity="0.0"
inkscape:pageshadow="2"
inkscape:zoom="1.4"
inkscape:cx="65.802138"
inkscape:cy="225.49817"
inkscape:document-units="mm"
inkscape:current-layer="layer1"
showgrid="true"
inkscape:snap-grids="true"
fit-margin-top="0"
fit-margin-left="0"
fit-margin-right="0"
fit-margin-bottom="0"
inkscape:window-width="1920"
inkscape:window-height="1025"
inkscape:window-x="0"
inkscape:window-y="27"
inkscape:window-maximized="1">
<inkscape:grid
type="xygrid"
id="grid3923"
originx="-51.915482"
originy="-200.5164" />
</sodipodi:namedview>
<metadata
id="metadata5">
<rdf:RDF>
<cc:Work
rdf:about="">
<dc:format>image/svg+xml</dc:format>
<dc:type
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
<dc:title></dc:title>
</cc:Work>
</rdf:RDF>
</metadata>
<g
inkscape:label="Layer 1"
inkscape:groupmode="layer"
id="layer1"
transform="translate(-51.915483,-10.632538)">
<g
inkscape:corner7="-0.091655035 : 0.066875827 : 0.25 : 1"
inkscape:corner0="0.2311558 : 0.078514233 : 0 : 1"
inkscape:perspectiveID="#perspective3817"
id="g3815"
sodipodi:type="inkscape:box3d">
<path
points="345.49477,-34.871325 425.05445,-25.867869 425.05445,-55.566078 345.49477,-70.600089 "
d="m 345.49477,-70.600089 v 35.728764 l 79.55968,9.003456 v -29.698209 z"
inkscape:box3dsidetype="6"
style="fill:#353564;fill-rule:evenodd;stroke:none;stroke-linejoin:round"
id="path3803"
sodipodi:type="inkscape:box3dside" />
<path
points="537.05691,-76.198873 537.05691,-38.224283 425.05445,-25.867869 425.05445,-55.566078 "
d="m 425.05445,-55.566078 112.00246,-20.632795 v 37.97459 l -112.00246,12.356414 z"
inkscape:box3dsidetype="11"
style="fill:#e9e9ff;fill-rule:evenodd;stroke:none;stroke-linejoin:round"
id="path3805"
sodipodi:type="inkscape:box3dside" />
<path
points="460.049,-102.2544 537.05691,-76.198873 425.05445,-55.566078 345.49477,-70.600089 "
d="M 345.49477,-70.600089 460.049,-102.2544 l 77.00791,26.055527 -112.00246,20.632795 z"
inkscape:box3dsidetype="5"
style="fill:#4d4d9f;fill-rule:evenodd;stroke:none;stroke-linejoin:round"
id="path3807"
sodipodi:type="inkscape:box3dside" />
<path
points="460.049,-53.828225 537.05691,-38.224283 425.05445,-25.867869 345.49477,-34.871325 "
d="m 345.49477,-34.871325 114.55423,-18.9569 77.00791,15.603942 -112.00246,12.356414 z"
inkscape:box3dsidetype="13"
style="fill:#afafde;fill-rule:evenodd;stroke:none;stroke-linejoin:round"
id="path3809"
sodipodi:type="inkscape:box3dside" />
<path
points="460.049,-53.828225 537.05691,-38.224283 537.05691,-76.198873 460.049,-102.2544 "
d="m 460.049,-102.2544 v 48.426175 l 77.00791,15.603942 v -37.97459 z"
inkscape:box3dsidetype="14"
style="fill:#d7d7ff;fill-rule:evenodd;stroke:none;stroke-linejoin:round"
id="path3811"
sodipodi:type="inkscape:box3dside" />
<path
points="460.049,-102.2544 460.049,-53.828225 345.49477,-34.871325 345.49477,-70.600089 "
d="M 345.49477,-70.600089 460.049,-102.2544 v 48.426175 l -114.55423,18.9569 z"
inkscape:box3dsidetype="3"
style="fill:#8686bf;fill-rule:evenodd;stroke:none;stroke-linejoin:round"
id="path3813"
sodipodi:type="inkscape:box3dside" />
</g>
<g
inkscape:corner7="-0.067801071 : 0.080724505 : 0.25 : 1"
inkscape:corner0="0.29810879 : 0.093471507 : 0 : 1"
inkscape:perspectiveID="#perspective3883"
id="g3881"
sodipodi:type="inkscape:box3d">
<path
points="322.62977,-102.51699 401.43343,-87.835826 401.43343,-118.95615 322.62977,-139.63071 "
d="m 322.62977,-139.63071 v 37.11372 l 78.80366,14.681164 v -31.120324 z"
inkscape:box3dsidetype="6"
style="fill:#353564;fill-rule:evenodd;stroke:none;stroke-linejoin:round"
id="path3869"
sodipodi:type="inkscape:box3dside" />
<path
points="520.44801,-152.18313 520.44801,-111.43056 401.43343,-87.835826 401.43343,-118.95615 "
d="m 401.43343,-118.95615 119.01458,-33.22698 v 40.75257 l -119.01458,23.594734 z"
inkscape:box3dsidetype="11"
style="fill:#e9e9ff;fill-rule:evenodd;stroke:none;stroke-linejoin:round"
id="path3871"
sodipodi:type="inkscape:box3dside" />
<path
points="442.62977,-189.88386 520.44801,-152.18313 401.43343,-118.95615 322.62977,-139.63071 "
d="m 322.62977,-139.63071 120,-50.25315 77.81824,37.70073 -119.01458,33.22698 z"
inkscape:box3dsidetype="5"
style="fill:#4d4d9f;fill-rule:evenodd;stroke:none;stroke-linejoin:round"
id="path3873"
sodipodi:type="inkscape:box3dside" />
<path
points="442.62977,-138.20214 520.44801,-111.43056 401.43343,-87.835826 322.62977,-102.51699 "
d="m 322.62977,-102.51699 120,-35.68515 77.81824,26.77158 -119.01458,23.594734 z"
inkscape:box3dsidetype="13"
style="fill:#afafde;fill-rule:evenodd;stroke:none;stroke-linejoin:round"
id="path3875"
sodipodi:type="inkscape:box3dside" />
<path
points="442.62977,-138.20214 520.44801,-111.43056 520.44801,-152.18313 442.62977,-189.88386 "
d="m 442.62977,-189.88386 v 51.68172 l 77.81824,26.77158 v -40.75257 z"
inkscape:box3dsidetype="14"
style="fill:#d7d7ff;fill-rule:evenodd;stroke:none;stroke-linejoin:round"
id="path3877"
sodipodi:type="inkscape:box3dside" />
<path
points="442.62977,-189.88386 442.62977,-138.20214 322.62977,-102.51699 322.62977,-139.63071 "
d="m 322.62977,-139.63071 120,-50.25315 v 51.68172 l -120,35.68515 z"
inkscape:box3dsidetype="3"
style="fill:#8686bf;fill-rule:evenodd;stroke:none;stroke-linejoin:round"
id="path3879"
sodipodi:type="inkscape:box3dside" />
</g>
<g
id="g4791"
transform="translate(13.229163,-23.812503)"
style="fill:#ffffff;fill-opacity:1">
<rect
y="87.979164"
x="63.5"
height="10.583336"
width="42.333332"
id="rect3925"
style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:0.5;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" />
<rect
y="77.395836"
x="63.500004"
height="10.583337"
width="42.333332"
id="rect3925-2"
style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:0.5;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" />
<rect
y="66.8125"
x="63.499996"
height="10.583337"
width="42.333332"
id="rect3925-0"
style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:0.5;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" />
<rect
y="56.229168"
x="63.499996"
height="10.583337"
width="42.333332"
id="rect3925-23"
style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:0.5;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" />
</g>
<path
style="fill:none;stroke:#000000;stroke-width:0.5;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
d="m 60.854159,48.291665 15.875,-15.875"
id="path4862"
inkscape:connector-curvature="0" />
<path
style="fill:none;stroke:#000000;stroke-width:0.5;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
d="m 103.18749,48.291666 15.875,-15.875001"
id="path4862-1"
inkscape:connector-curvature="0" />
<path
style="fill:none;stroke:#000000;stroke-width:0.5;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
d="m 103.18749,90.624998 15.875,-15.875"
id="path4862-2"
inkscape:connector-curvature="0" />
<path
style="fill:none;stroke:#000000;stroke-width:0.5;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
d="m 103.18749,80.041668 15.875,-15.875"
id="path4862-9"
inkscape:connector-curvature="0" />
<path
style="fill:none;stroke:#000000;stroke-width:0.5;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
d="m 103.18749,69.458328 15.875,-15.875"
id="path4862-3"
inkscape:connector-curvature="0" />
<path
style="fill:none;stroke:#000000;stroke-width:0.5;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
d="m 103.18749,58.874998 15.875,-15.874999"
id="path4862-19"
inkscape:connector-curvature="0" />
<path
style="fill:none;stroke:#000000;stroke-width:0.5;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
d="m 60.854158,58.874998 15.875001,-15.875"
id="path4862-4"
inkscape:connector-curvature="0" />
<text
xml:space="preserve"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:10.58333302px;line-height:1.25;font-family:'URW Gothic L';-inkscape-font-specification:'URW Gothic L';letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332"
x="90.045952"
y="15.407738"
id="text4926"><tspan
sodipodi:role="line"
x="90.926521"
y="15.407738"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:6.3499999px;line-height:10.58333302px;font-family:'URW Gothic L';-inkscape-font-specification:'URW Gothic L';text-align:center;text-anchor:middle;stroke-width:0.26458332"
id="tspan4928">Astaroth multi-GPU node </tspan><tspan
sodipodi:role="line"
x="90.045952"
y="28.636904"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:6.3499999px;line-height:10.58333302px;font-family:'URW Gothic L';-inkscape-font-specification:'URW Gothic L';text-align:center;text-anchor:middle;stroke-width:0.26458332"
id="tspan4930">domain decomposition</tspan></text>
</g>
<g
inkscape:groupmode="layer"
id="layer2"
inkscape:label="Layer 2"
transform="translate(-51.915483,-10.632538)">
<g
id="g4791-8"
transform="translate(-2.6458431,-7.9375045)"
style="fill:#ffffff;fill-opacity:1">
<rect
y="87.979164"
x="63.5"
height="10.583336"
width="42.333332"
id="rect3925-9"
style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:0.5;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" />
<rect
y="77.395836"
x="63.500004"
height="10.583337"
width="42.333332"
id="rect3925-2-7"
style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:0.5;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" />
<rect
y="66.8125"
x="63.499996"
height="10.583337"
width="42.333332"
id="rect3925-0-3"
style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:0.5;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" />
<rect
y="56.229168"
x="63.499996"
height="10.583337"
width="42.333332"
id="rect3925-23-6"
style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:0.5;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" />
</g>
<text
xml:space="preserve"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:10.58333302px;line-height:1.25;font-family:'URW Gothic L';-inkscape-font-specification:'URW Gothic L';letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332"
x="76.351189"
y="55.28421"
id="text4926-2"><tspan
sodipodi:role="line"
x="76.351189"
y="55.28421"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:4.23333311px;font-family:'URW Gothic L';-inkscape-font-specification:'URW Gothic L';stroke-width:0.26458332"
id="tspan4930-0">GPU 0</tspan></text>
<text
xml:space="preserve"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:10.58333302px;line-height:1.25;font-family:'URW Gothic L';-inkscape-font-specification:'URW Gothic L';letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332"
x="75.973213"
y="66.245522"
id="text4926-2-9"><tspan
sodipodi:role="line"
x="75.973213"
y="66.245522"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:4.23333311px;font-family:'URW Gothic L';-inkscape-font-specification:'URW Gothic L';stroke-width:0.26458332"
id="tspan4930-0-9">GPU 1</tspan></text>
<text
xml:space="preserve"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:10.58333302px;line-height:1.25;font-family:'URW Gothic L';-inkscape-font-specification:'URW Gothic L';letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332"
x="75.784225"
y="76.072899"
id="text4926-2-4"><tspan
sodipodi:role="line"
x="75.784225"
y="76.072899"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:4.23333311px;font-family:'URW Gothic L';-inkscape-font-specification:'URW Gothic L';stroke-width:0.26458332"
id="tspan4930-0-5">GPU 2</tspan></text>
<text
xml:space="preserve"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:10.58333302px;line-height:1.25;font-family:'URW Gothic L';-inkscape-font-specification:'URW Gothic L';letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332"
x="75.973213"
y="86.656235"
id="text4926-2-1"><tspan
sodipodi:role="line"
x="75.973213"
y="86.656235"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:4.23333311px;font-family:'URW Gothic L';-inkscape-font-specification:'URW Gothic L';stroke-width:0.26458332"
id="tspan4930-0-0">GPU 3</tspan></text>
<text
xml:space="preserve"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:10.58333302px;line-height:1.25;font-family:'URW Gothic L';-inkscape-font-specification:'URW Gothic L';letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332"
x="51.59375"
y="70.970222"
id="text4926-2-3"><tspan
sodipodi:role="line"
x="51.59375"
y="70.970222"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:4.23333311px;font-family:'URW Gothic L';-inkscape-font-specification:'URW Gothic L';stroke-width:0.26458332"
id="tspan4930-0-7">Nz</tspan></text>
<text
xml:space="preserve"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:10.58333302px;line-height:1.25;font-family:'URW Gothic L';-inkscape-font-specification:'URW Gothic L';letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332"
x="79.563988"
y="96.48362"
id="text4926-2-3-8"><tspan
sodipodi:role="line"
x="79.563988"
y="96.48362"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:4.23333311px;font-family:'URW Gothic L';-inkscape-font-specification:'URW Gothic L';stroke-width:0.26458332"
id="tspan4930-0-7-8">Nx</tspan></text>
<text
xml:space="preserve"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:10.58333302px;line-height:1.25;font-family:'URW Gothic L';-inkscape-font-specification:'URW Gothic L';letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332"
x="114.71577"
y="87.223206"
id="text4926-2-3-6"><tspan
sodipodi:role="line"
x="114.71577"
y="87.223206"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:4.23333311px;font-family:'URW Gothic L';-inkscape-font-specification:'URW Gothic L';stroke-width:0.26458332"
id="tspan4930-0-7-0">Ny</tspan></text>
</g>
</svg>

After

Width:  |  Height:  |  Size: 19 KiB

View File

@@ -45,6 +45,8 @@ typedef struct {
#endif // __CUDACC__
// Library flags
#define STENCIL_ORDER (6)
#define NGHOST (STENCIL_ORDER / 2)
#define VERBOSE_PRINTING (1)
// Built-in types and parameters
@@ -215,6 +217,9 @@ acVertexBufferIdx(const int i, const int j, const int k, const AcMeshInfo info)
k * info.int_params[AC_mx] * info.int_params[AC_my];
}
/** Prints all parameters inside AcMeshInfo */
void acPrintMeshInfo(const AcMeshInfo config);
/*
static inline int
acGetParam(const AcMeshInfo info, const AcIntParam param)

View File

@@ -46,8 +46,24 @@ AcResult acDeviceSynchronizeStream(const Device device, const Stream stream);
AcResult acDeviceSwapBuffers(const Device device);
/** */
AcResult acDeviceLoadConstant(const Device device, const Stream stream, const AcRealParam param,
const AcReal value);
AcResult acDeviceLoadScalarConstant(const Device device, const Stream stream,
const AcRealParam param, const AcReal value);
/** */
AcResult acDeviceLoadVectorConstant(const Device device, const Stream stream,
const AcReal3Param param, const AcReal3 value);
/** */
AcResult acDeviceLoadIntConstant(const Device device, const Stream stream, const AcIntParam param,
const int value);
/** */
AcResult acDeviceLoadInt3Constant(const Device device, const Stream stream, const AcInt3Param param,
const int3 value);
/** */
AcResult acDeviceLoadMeshInfo(const Device device, const Stream stream,
const AcMeshInfo device_config);
/** */
AcResult acDeviceLoadVertexBufferWithOffset(const Device device, const Stream stream,

View File

@@ -8,17 +8,18 @@ fi
KERNEL_DIR=${AC_HOME}"/src/core/kernels"
ACC_DIR=${AC_HOME}"/acc"
ACC_DEFAULT_HEADER="mhd_solver/stencil_defines.h"
ACC_DEFAULT_SAS="mhd_solver/stencil_assembly.sas"
ACC_DEFAULT_SPS="mhd_solver/stencil_process.sps"
ACC_DEFAULT_HEADER="mhd_solver/stencil_definition.sdh"
ACC_DEFAULT_INCLUDE_DIR="mhd_solver"
${ACC_DIR}/clean.sh
${ACC_DIR}/build_acc.sh
ACC_HEADER=${ACC_DEFAULT_HEADER}
ACC_SAS=${ACC_DEFAULT_SAS}
ACC_SPS=${ACC_DEFAULT_SPS}
ACC_HEADER=${ACC_DEFAULT_HEADER}
ACC_INCLUDE_DIR=${ACC_DEFAULT_INCLUDE_DIR}
while [ "$#" -gt 0 ]
do
@@ -56,9 +57,17 @@ echo "Header file:" ${ACC_DIR}/${ACC_HEADER}
echo "Assembly file: ${ACC_DIR}/${ACC_SAS}"
echo "Process file: ${ACC_DIR}/${ACC_SPS}"
cd ${KERNEL_DIR}
${ACC_DIR}/compile.sh ${ACC_DIR}/${ACC_SAS} ${ACC_DIR}/${ACC_HEADER}
${ACC_DIR}/compile.sh ${ACC_DIR}/${ACC_SPS} ${ACC_DIR}/${ACC_HEADER}
cd ${ACC_DIR}/${ACC_INCLUDE_DIR}
echo ${PWD}
${ACC_DIR}/compile.sh ${ACC_DIR}/${ACC_SAS}
${ACC_DIR}/compile.sh ${ACC_DIR}/${ACC_SPS}
${ACC_DIR}/compile.sh ${ACC_DIR}/${ACC_HEADER}
echo "Linking: " ${ACC_DIR}/${ACC_HEADER} " -> " ${AC_HOME}/include/stencil_defines.h
ln -sf ${ACC_DIR}/${ACC_HEADER} ${AC_HOME}/include/stencil_defines.h
echo "Moving stencil_assembly.cuh -> ${AC_HOME}/src/core/kernels"
mv stencil_assembly.cuh ${AC_HOME}/src/core/kernels
echo "Moving stencil_process.cuh -> ${AC_HOME}/src/core/kernels"
mv stencil_process.cuh ${AC_HOME}/src/core/kernels
echo "Moving stencil_defines.cuh -> ${AC_HOME}/include"
mv stencil_defines.h ${AC_HOME}/include

View File

@@ -0,0 +1 @@
nvcc -E ../src/core/device.cu -I ../include -I ../ > preprocessed_device_files.pp

View File

@@ -36,6 +36,21 @@ const char* vtxbuf_names[] = {AC_FOR_VTXBUF_HANDLES(AC_GEN_STR)};
static const int num_nodes = 1;
static Node nodes[num_nodes];
void
acPrintMeshInfo(const AcMeshInfo config)
{
for (int i = 0; i < NUM_INT_PARAMS; ++i)
printf("[%s]: %d\n", intparam_names[i], config.int_params[i]);
for (int i = 0; i < NUM_INT3_PARAMS; ++i)
printf("[%s]: (%d, %d, %d)\n", int3param_names[i], config.int3_params[i].x,
config.int3_params[i].y, config.int3_params[i].z);
for (int i = 0; i < NUM_REAL_PARAMS; ++i)
printf("[%s]: %g\n", realparam_names[i], double(config.real_params[i]));
for (int i = 0; i < NUM_REAL3_PARAMS; ++i)
printf("[%s]: (%g, %g, %g)\n", real3param_names[i], double(config.real3_params[i].x),
double(config.real3_params[i].y), double(config.real3_params[i].z));
}
AcResult
acInit(const AcMeshInfo mesh_info)
{

View File

@@ -39,35 +39,54 @@ typedef struct {
AcReal* out[NUM_VTXBUF_HANDLES];
} VertexBufferArray;
struct device_s {
int id;
AcMeshInfo local_config;
// Concurrency
cudaStream_t streams[NUM_STREAM_TYPES];
// Memory
VertexBufferArray vba;
AcReal* reduce_scratchpad;
AcReal* reduce_result;
#if PACKED_DATA_TRANSFERS
// Declare memory for buffers needed for packed data transfers here
// AcReal* data_packing_buffer;
#endif
};
__constant__ AcMeshInfo d_mesh_info;
static inline int __device__
static int __device__ __forceinline__
DCONST(const AcIntParam param)
{
return d_mesh_info.int_params[param];
}
static inline int3 __device__
static int3 __device__ __forceinline__
DCONST(const AcInt3Param param)
{
return d_mesh_info.int3_params[param];
}
static inline AcReal __device__
static AcReal __device__ __forceinline__
DCONST(const AcRealParam param)
{
return d_mesh_info.real_params[param];
}
static inline AcReal3 __device__
static AcReal3 __device__ __forceinline__
DCONST(const AcReal3Param param)
{
return d_mesh_info.real3_params[param];
}
constexpr VertexBufferHandle
DCONST(const VertexBufferHandle handle)
{
return handle;
}
#define DCONST_INT(x) DCONST(x)
#define DCONST_INT3(x) DCONST(x)
#define DCONST_REAL(x) DCONST(x)
#define DCONST_REAL3(x) DCONST(x)
//#define DCONST_INT(X) (d_mesh_info.int_params[X])
//#define DCONST_INT3(X) (d_mesh_info.int3_params[X])
//#define DCONST_REAL(X) (d_mesh_info.real_params[X])
//#define DCONST_REAL3(X) (d_mesh_info.real3_params[X])
#define DEVICE_VTXBUF_IDX(i, j, k) ((i) + (j)*DCONST_INT(AC_mx) + (k)*DCONST_INT(AC_mxy))
#define DEVICE_1D_COMPDOMAIN_IDX(i, j, k) ((i) + (j)*DCONST_INT(AC_nx) + (k)*DCONST_INT(AC_nxy))
#define globalGridN (d_mesh_info.int3_params[AC_global_grid_n])
@@ -88,26 +107,8 @@ static dim3 rk3_tpb(32, 1, 4);
// #include "kernels/pack_unpack.cuh"
#endif
struct device_s {
int id;
AcMeshInfo local_config;
// Concurrency
cudaStream_t streams[NUM_STREAM_TYPES];
// Memory
VertexBufferArray vba;
AcReal* reduce_scratchpad;
AcReal* reduce_result;
#if PACKED_DATA_TRANSFERS
// Declare memory for buffers needed for packed data transfers here
// AcReal* data_packing_buffer;
#endif
};
// clang-format off
static __global__ void dummy_kernel(void) {}
static __global__ void dummy_kernel(void) { DCONST((AcIntParam)0); DCONST((AcInt3Param)0); DCONST((AcRealParam)0); DCONST((AcReal3Param)0); }
// clang-format on
AcResult
@@ -153,8 +154,7 @@ acDeviceCreate(const int id, const AcMeshInfo device_config, Device* device_hand
#endif
// Device constants
ERRCHK_CUDA_ALWAYS(cudaMemcpyToSymbol(d_mesh_info, &device_config, sizeof(device_config), 0,
cudaMemcpyHostToDevice));
acDeviceLoadMeshInfo(device, STREAM_DEFAULT, device_config);
printf("Created device %d (%p)\n", device->id, device);
*device_handle = device;
@@ -303,8 +303,9 @@ acDeviceAutoOptimize(const Device device)
cudaEventRecord(tstart); // ---------------------------------------- Timing start
acDeviceLoadScalarConstant(device, STREAM_DEFAULT, AC_dt, FLT_EPSILON);
for (int i = 0; i < num_iterations; ++i)
solve<2><<<bpg, tpb>>>(start, end, device->vba, FLT_EPSILON);
solve<2><<<bpg, tpb>>>(start, end, device->vba);
cudaEventRecord(tstop); // ----------------------------------------- Timing end
cudaEventSynchronize(tstop);
@@ -361,8 +362,8 @@ acDeviceSwapBuffers(const Device device)
}
AcResult
acDeviceLoadConstant(const Device device, const Stream stream, const AcRealParam param,
const AcReal value)
acDeviceLoadScalarConstant(const Device device, const Stream stream, const AcRealParam param,
const AcReal value)
{
cudaSetDevice(device->id);
const size_t offset = (size_t)&d_mesh_info.real_params[param] - (size_t)&d_mesh_info;
@@ -371,6 +372,55 @@ acDeviceLoadConstant(const Device device, const Stream stream, const AcRealParam
return AC_SUCCESS;
}
AcResult
acDeviceLoadVectorConstant(const Device device, const Stream stream, const AcReal3Param param,
const AcReal3 value)
{
cudaSetDevice(device->id);
const size_t offset = (size_t)&d_mesh_info.real3_params[param] - (size_t)&d_mesh_info;
ERRCHK_CUDA(cudaMemcpyToSymbolAsync(d_mesh_info, &value, sizeof(value), offset,
cudaMemcpyHostToDevice, device->streams[stream]));
return AC_SUCCESS;
}
AcResult
acDeviceLoadIntConstant(const Device device, const Stream stream, const AcIntParam param,
const int value)
{
cudaSetDevice(device->id);
const size_t offset = (size_t)&d_mesh_info.int_params[param] - (size_t)&d_mesh_info;
ERRCHK_CUDA(cudaMemcpyToSymbolAsync(d_mesh_info, &value, sizeof(value), offset,
cudaMemcpyHostToDevice, device->streams[stream]));
return AC_SUCCESS;
}
AcResult
acDeviceLoadInt3Constant(const Device device, const Stream stream, const AcInt3Param param,
const int3 value)
{
cudaSetDevice(device->id);
const size_t offset = (size_t)&d_mesh_info.int3_params[param] - (size_t)&d_mesh_info;
ERRCHK_CUDA(cudaMemcpyToSymbolAsync(d_mesh_info, &value, sizeof(value), offset,
cudaMemcpyHostToDevice, device->streams[stream]));
return AC_SUCCESS;
}
AcResult
acDeviceLoadMeshInfo(const Device device, const Stream stream, const AcMeshInfo device_config)
{
cudaSetDevice(device->id);
ERRCHK_ALWAYS(device_config.int_params[AC_nx] == device->local_config.int_params[AC_nx]);
ERRCHK_ALWAYS(device_config.int_params[AC_ny] == device->local_config.int_params[AC_ny]);
ERRCHK_ALWAYS(device_config.int_params[AC_nz] == device->local_config.int_params[AC_nz]);
ERRCHK_ALWAYS(device_config.int_params[AC_multigpu_offset] ==
device->local_config.int_params[AC_multigpu_offset]);
ERRCHK_CUDA_ALWAYS(cudaMemcpyToSymbolAsync(d_mesh_info, &device_config, sizeof(device_config),
0, cudaMemcpyHostToDevice, device->streams[stream]));
return AC_SUCCESS;
}
AcResult
acDeviceLoadVertexBufferWithOffset(const Device device, const Stream stream, const AcMesh host_mesh,
const VertexBufferHandle vtxbuf_handle, const int3 src,
@@ -551,12 +601,13 @@ acDeviceIntegrateSubstep(const Device device, const Stream stream, const int ste
(unsigned int)ceil(n.y / AcReal(tpb.y)), //
(unsigned int)ceil(n.z / AcReal(tpb.z)));
acDeviceLoadScalarConstant(device, stream, AC_dt, dt);
if (step_number == 0)
solve<0><<<bpg, tpb, 0, device->streams[stream]>>>(start, end, device->vba, dt);
solve<0><<<bpg, tpb, 0, device->streams[stream]>>>(start, end, device->vba);
else if (step_number == 1)
solve<1><<<bpg, tpb, 0, device->streams[stream]>>>(start, end, device->vba, dt);
solve<1><<<bpg, tpb, 0, device->streams[stream]>>>(start, end, device->vba);
else
solve<2><<<bpg, tpb, 0, device->streams[stream]>>>(start, end, device->vba, dt);
solve<2><<<bpg, tpb, 0, device->streams[stream]>>>(start, end, device->vba);
ERRCHK_CUDA_KERNEL();

View File

@@ -26,6 +26,8 @@
*/
#pragma once
#include "src/core/math_utils.h"
#include <assert.h>
static __device__ __forceinline__ int
@@ -321,65 +323,6 @@ read_data(const int i, const int j, const int k,
* =============================================================================
*/
static __host__ __device__ __forceinline__ AcReal3
operator-(const AcReal3& a, const AcReal3& b)
{
return (AcReal3){a.x - b.x, a.y - b.y, a.z - b.z};
}
static __host__ __device__ __forceinline__ AcReal3
operator+(const AcReal3& a, const AcReal3& b)
{
return (AcReal3){a.x + b.x, a.y + b.y, a.z + b.z};
}
static __host__ __device__ __forceinline__ AcReal3
operator-(const AcReal3& a)
{
return (AcReal3){-a.x, -a.y, -a.z};
}
static __host__ __device__ __forceinline__ AcReal3 operator*(const AcReal a, const AcReal3& b)
{
return (AcReal3){a * b.x, a * b.y, a * b.z};
}
static __host__ __device__ __forceinline__ AcReal
dot(const AcReal3& a, const AcReal3& b)
{
return a.x * b.x + a.y * b.y + a.z * b.z;
}
static __host__ __device__ __forceinline__ AcReal3
mul(const AcMatrix& aa, const AcReal3& x)
{
return (AcReal3){dot(aa.row[0], x), dot(aa.row[1], x), dot(aa.row[2], x)};
}
static __host__ __device__ __forceinline__ AcReal3
cross(const AcReal3& a, const AcReal3& b)
{
AcReal3 c;
c.x = a.y * b.z - a.z * b.y;
c.y = a.z * b.x - a.x * b.z;
c.z = a.x * b.y - a.y * b.x;
return c;
}
static __host__ __device__ __forceinline__ bool
is_valid(const AcReal& a)
{
return !isnan(a) && !isinf(a);
}
static __host__ __device__ __forceinline__ bool
is_valid(const AcReal3& a)
{
return is_valid(a.x) && is_valid(a.y) && is_valid(a.z);
}
/*
* =============================================================================
* Level 1 (Stencil Processing Stage)
@@ -642,4 +585,54 @@ read_out(const int idx, AcReal* __restrict__ field[], const int3 handle)
\
const int idx = IDX(vertexIdx.x, vertexIdx.y, vertexIdx.z);
// clang-format off
#define GEN_DEVICE_FUNC_HOOK(identifier) \
template <int step_number> \
AcResult acDeviceKernel_##identifier(const Device device, const Stream stream, \
const int3 start, const int3 end) \
{ \
cudaSetDevice(device->id); \
\
const dim3 tpb(32, 1, 4); \
\
const int3 n = end - start; \
const dim3 bpg((unsigned int)ceil(n.x / AcReal(tpb.x)), \
(unsigned int)ceil(n.y / AcReal(tpb.y)), \
(unsigned int)ceil(n.z / AcReal(tpb.z))); \
\
identifier<step_number> \
<<<bpg, tpb, 0, device->streams[stream]>>>(start, end, device->vba); \
ERRCHK_CUDA_KERNEL(); \
\
return AC_SUCCESS; \
}
/*
#define GEN_NODE_FUNC_HOOK(identifier) \
template <int step_number> \
AcResult acNodeKernel_##identifier(const Node node, const Stream stream, const int3 start, \
const int3 end) \
{ \
acNodeSynchronizeStream(node, stream); \
\
for (int i = 0; i < node->num_devices; ++i) { \
\
const int3 d0 = (int3){NGHOST, NGHOST, NGHOST + i * node->subgrid.n.z}; \
const int3 d1 = d0 + (int3){node->subgrid.n.x, node->subgrid.n.y, node->subgrid.n.z}; \
\
const int3 da = max(start, d0); \
const int3 db = min(end, d1); \
\
if (db.z >= da.z) { \
const int3 da_local = da - (int3){0, 0, i * node->subgrid.n.z}; \
const int3 db_local = db - (int3){0, 0, i * node->subgrid.n.z}; \
acDeviceKernel_ #identifier(node->devices[i], stream, isubstep, da_local, \
db_local, dt); \
} \
} \
return AC_SUCCESS; \
}
*/
// clang-format on
#include "stencil_process.cuh"

View File

@@ -25,10 +25,10 @@
*
*/
#pragma once
#include <cmath>
using namespace std; // Potentially bad practice to declare namespace std here
// #include <math.h> // isnan, isinf // Overloads incorrect/bugged with GCC <= 6.0
// #include <tgmath.h> // Even this does not work
//#include <cmath>
// using namespace std; // Potentially bad practice to declare namespace std here
#include <math.h> // isnan, isinf // Overloads incorrect/bugged with GCC <= 6.0
//#include <tgmath.h> // Even this does not work
#include <stdlib.h> // rand
template <class T>
@@ -64,16 +64,6 @@ sum(const T& a, const T& b)
return a + b;
}
template <class T>
static inline const T
is_valid(const T& val)
{
if (isnan(val) || isinf(val))
return false;
else
return true;
}
template <class T>
static inline const T
clamp(const T& val, const T& min, const T& max)
@@ -87,20 +77,85 @@ randr()
return AcReal(rand()) / AcReal(RAND_MAX);
}
static inline int3
operator+(const int3& a, const int3& b)
{
return (int3){a.x + b.x, a.y + b.y, a.z + b.z};
}
static inline int3
operator-(const int3& a, const int3& b)
{
return (int3){a.x - b.x, a.y - b.y, a.z - b.z};
}
static inline bool
is_power_of_two(const unsigned val)
{
return val && !(val & (val - 1));
}
#ifdef __CUDACC__
#define HOST_DEVICE_INLINE __host__ __device__ __forceinline__
#else
#define HOST_DEVICE_INLINE inline
#endif // __CUDACC__
static HOST_DEVICE_INLINE AcReal3
operator+(const AcReal3& a, const AcReal3& b)
{
return (AcReal3){a.x + b.x, a.y + b.y, a.z + b.z};
}
static HOST_DEVICE_INLINE int3
operator+(const int3& a, const int3& b)
{
return (int3){a.x + b.x, a.y + b.y, a.z + b.z};
}
static HOST_DEVICE_INLINE AcReal3
operator-(const AcReal3& a, const AcReal3& b)
{
return (AcReal3){a.x - b.x, a.y - b.y, a.z - b.z};
}
static HOST_DEVICE_INLINE int3
operator-(const int3& a, const int3& b)
{
return (int3){a.x - b.x, a.y - b.y, a.z - b.z};
}
static HOST_DEVICE_INLINE AcReal3
operator-(const AcReal3& a)
{
return (AcReal3){-a.x, -a.y, -a.z};
}
static HOST_DEVICE_INLINE AcReal3 operator*(const AcReal& a, const AcReal3& b)
{
return (AcReal3){a * b.x, a * b.y, a * b.z};
}
static HOST_DEVICE_INLINE AcReal
dot(const AcReal3& a, const AcReal3& b)
{
return a.x * b.x + a.y * b.y + a.z * b.z;
}
static HOST_DEVICE_INLINE AcReal3
mul(const AcMatrix& aa, const AcReal3& x)
{
return (AcReal3){dot(aa.row[0], x), dot(aa.row[1], x), dot(aa.row[2], x)};
}
static HOST_DEVICE_INLINE AcReal3
cross(const AcReal3& a, const AcReal3& b)
{
AcReal3 c;
c.x = a.y * b.z - a.z * b.y;
c.y = a.z * b.x - a.x * b.z;
c.z = a.x * b.y - a.y * b.x;
return c;
}
static HOST_DEVICE_INLINE bool
is_valid(const AcReal a)
{
return !isnan(a) && !isinf(a);
}
static HOST_DEVICE_INLINE bool
is_valid(const AcReal3& a)
{
return is_valid(a.x) && is_valid(a.y) && is_valid(a.z);
}

View File

@@ -429,7 +429,7 @@ acNodeLoadConstant(const Node node, const Stream stream, const AcRealParam param
acNodeSynchronizeStream(node, stream);
// #pragma omp parallel for
for (int i = 0; i < node->num_devices; ++i) {
acDeviceLoadConstant(node->devices[i], stream, param, value);
acDeviceLoadScalarConstant(node->devices[i], stream, param, value);
}
return AC_SUCCESS;
}

View File

@@ -8,5 +8,5 @@ set(CMAKE_C_STANDARD_REQUIRED ON)
find_package(MPI REQUIRED)
add_executable(mpitest main.c)
target_include_directories(mpitest PRIVATE ${MPI_C_INCLUDE_PATH})
target_link_libraries(mpitest PRIVATE ${MPI_C_LIBRARIES} astaroth_core)
target_include_directories(mpitest PRIVATE ${CMAKE_SOURCE_DIR}/src/standalone ${MPI_C_INCLUDE_PATH})
target_link_libraries(mpitest astaroth_core astaroth_standalone ${MPI_C_LIBRARIES})

View File

@@ -16,13 +16,120 @@
You should have received a copy of the GNU General Public License
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
*/
/**
Running: mpirun -np <num processes> <executable>
*/
#undef NDEBUG // Assert always
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "astaroth.h"
#include "autotest.h"
#include <mpi.h>
// From Astaroth Standalone
#include "config_loader.h"
#include "model/host_memory.h"
static void
distribute_mesh(const AcMesh* src, AcMesh* dst)
{
MPI_Datatype datatype = MPI_FLOAT;
if (sizeof(AcReal) == 8)
datatype = MPI_DOUBLE;
int process_id, num_processes;
MPI_Comm_rank(MPI_COMM_WORLD, &process_id);
MPI_Comm_size(MPI_COMM_WORLD, &num_processes);
const size_t count = acVertexBufferSize(dst->info);
for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) {
// Communicate to self
if (process_id == 0) {
assert(src);
assert(dst);
memcpy(&dst->vertex_buffer[i][0], //
&src->vertex_buffer[i][0], //
count * sizeof(src->vertex_buffer[i][0]));
}
// Communicate to others
for (int j = 1; j < num_processes; ++j) {
if (process_id == 0) {
assert(src);
// Send
// TODO RECHECK THESE j INDICES
const size_t src_idx = j * dst->info.int_params[AC_mx] *
dst->info.int_params[AC_my] * src->info.int_params[AC_nz] /
num_processes;
MPI_Send(&src->vertex_buffer[i][src_idx], count, datatype, j, 0, MPI_COMM_WORLD);
}
else {
assert(dst);
// Recv
const size_t dst_idx = 0;
MPI_Status status;
MPI_Recv(&dst->vertex_buffer[i][dst_idx], count, datatype, 0, 0, MPI_COMM_WORLD,
&status);
}
}
}
}
static void
gather_mesh(const AcMesh* src, AcMesh* dst)
{
MPI_Datatype datatype = MPI_FLOAT;
if (sizeof(AcReal) == 8)
datatype = MPI_DOUBLE;
int process_id, num_processes;
MPI_Comm_rank(MPI_COMM_WORLD, &process_id);
MPI_Comm_size(MPI_COMM_WORLD, &num_processes);
size_t count = acVertexBufferSize(src->info);
for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) {
// Communicate to self
if (process_id == 0) {
assert(src);
assert(dst);
memcpy(&dst->vertex_buffer[i][0], //
&src->vertex_buffer[i][0], //
count * sizeof(AcReal));
}
// Communicate to others
for (int j = 1; j < num_processes; ++j) {
if (process_id == 0) {
// Recv
// const size_t dst_idx = j * acVertexBufferCompdomainSize(dst->info);
const size_t dst_idx = j * dst->info.int_params[AC_mx] *
dst->info.int_params[AC_my] * dst->info.int_params[AC_nz] /
num_processes;
assert(dst_idx + count <= acVertexBufferSize(dst->info));
MPI_Status status;
MPI_Recv(&dst->vertex_buffer[i][dst_idx], count, datatype, j, 0, MPI_COMM_WORLD,
&status);
}
else {
// Send
const size_t src_idx = 0;
assert(src_idx + count <= acVertexBufferSize(src->info));
MPI_Send(&src->vertex_buffer[i][src_idx], count, datatype, 0, 0, MPI_COMM_WORLD);
}
}
}
}
int
main(void)
{
@@ -37,14 +144,39 @@ main(void)
MPI_Get_processor_name(processor_name, &name_len);
printf("Processor %s. Process %d of %d.\n", processor_name, process_id, num_processes);
AcMeshInfo info = {
.int_params[AC_nx] = 128,
.int_params[AC_ny] = 64,
.int_params[AC_nz] = 32,
};
acInit(info);
acIntegrate(0.1f);
acQuit();
AcMeshInfo mesh_info;
load_config(&mesh_info);
update_config(&mesh_info);
AcMesh* main_mesh = NULL;
ModelMesh* model_mesh = NULL;
if (process_id == 0) {
main_mesh = acmesh_create(mesh_info);
acmesh_init_to(INIT_TYPE_RANDOM, main_mesh);
model_mesh = modelmesh_create(mesh_info);
acmesh_to_modelmesh(*main_mesh, model_mesh);
}
AcMeshInfo submesh_info = mesh_info;
submesh_info.int_params[AC_nz] /= num_processes;
update_config(&submesh_info);
AcMesh* submesh = acmesh_create(submesh_info);
/////////////////////
distribute_mesh(main_mesh, submesh);
gather_mesh(submesh, main_mesh);
/////////////////////////
// Autotest
bool is_acceptable = verify_meshes(*model_mesh, *main_mesh);
/////
acmesh_destroy(submesh);
if (process_id == 0) {
modelmesh_destroy(model_mesh);
acmesh_destroy(main_mesh);
}
MPI_Finalize();
return EXIT_SUCCESS;

View File

@@ -25,10 +25,11 @@ add_compile_options(-pipe ${OpenMP_CXX_FLAGS})
add_compile_options(-Wall -Wextra -Werror -Wdouble-promotion -Wfloat-conversion)# -Wshadow)
## Compile and link
add_library(astaroth_standalone ${SOURCES})
add_library(astaroth_standalone STATIC ${SOURCES})
target_link_libraries(astaroth_standalone PRIVATE astaroth_core "${OpenMP_CXX_FLAGS}" ${SDL2_LIBRARY})
add_executable(ac_run main.cc)
target_link_libraries(ac_run PRIVATE astaroth_standalone astaroth_core "${OpenMP_CXX_FLAGS}" ${SDL2_LIBRARY})
target_link_libraries(ac_run PRIVATE astaroth_standalone)
# Define the config directory
if (ALTER_CONF)

View File

@@ -75,6 +75,12 @@ static const InitType test_cases[] = {INIT_TYPE_RANDOM, INIT_TYPE_XWAVE,
INIT_TYPE_GAUSSIAN_RADIAL_EXPL, INIT_TYPE_ABC_FLOW};
// #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
static inline bool
is_valid(const ModelScalar a)
{
return !isnan(a) && !isinf(a);
}
#if TEST_TYPE == \
QUICK_TEST // REGULAR TEST START HERE
// --------------------------------------------------------------------------------------------------------------

View File

@@ -34,15 +34,6 @@
#include "src/core/errchk.h"
#include "src/core/math_utils.h"
static inline void
print(const AcMeshInfo& config)
{
for (int i = 0; i < NUM_INT_PARAMS; ++i)
printf("[%s]: %d\n", intparam_names[i], config.int_params[i]);
for (int i = 0; i < NUM_REAL_PARAMS; ++i)
printf("[%s]: %g\n", realparam_names[i], double(config.real_params[i]));
}
/**
\brief Find the index of the keyword in names
\return Index in range 0...n if the keyword is in names. -1 if the keyword was
@@ -163,7 +154,7 @@ update_config(AcMeshInfo* config)
#if VERBOSE_PRINTING // Defined in astaroth.h
printf("###############################################################\n");
printf("Config dimensions recalculated:\n");
print(*config);
acPrintMeshInfo(*config);
printf("###############################################################\n");
#endif
}

View File

@@ -26,7 +26,9 @@
*/
#include "host_forcing.h"
#include "src/core/math_utils.h"
// #include "src/core/math_utils.h"
#include <cmath>
using namespace std;
// The is a wrapper for genering random numbers with a chosen system.
AcReal
@@ -36,7 +38,7 @@ get_random_number_01()
return AcReal(rand()) / AcReal(RAND_MAX);
}
AcReal3
static AcReal3
cross(const AcReal3& a, const AcReal3& b)
{
AcReal3 c;
@@ -48,13 +50,13 @@ cross(const AcReal3& a, const AcReal3& b)
return c;
}
AcReal
static AcReal
dot(const AcReal3& a, const AcReal3& b)
{
return a.x * b.x + a.y * b.y + a.z * b.z;
}
AcReal3
static AcReal3
vec_norm(const AcReal3& a)
{
AcReal3 c;
@@ -67,7 +69,7 @@ vec_norm(const AcReal3& a)
return c;
}
AcReal3
static AcReal3
vec_multi_scal(const AcReal scal, const AcReal3& a)
{
AcReal3 c;

View File

@@ -32,14 +32,6 @@
AcReal get_random_number_01();
AcReal3 cross(const AcReal3& a, const AcReal3& b);
AcReal dot(const AcReal3& a, const AcReal3& b);
AcReal3 vec_norm(const AcReal3& a);
AcReal3 vec_multi_scal(const AcReal scal, const AcReal3& a);
AcReal3 helical_forcing_k_generator(const AcReal kmax, const AcReal kmin);
void helical_forcing_e_generator(AcReal3* e_force, const AcReal3 k_force);

View File

@@ -31,6 +31,16 @@
#include "host_memory.h"
#include "model_boundconds.h"
// Standalone flags
#define LDENSITY (1)
#define LHYDRO (1)
#define LMAGNETIC (1)
#define LENTROPY (1)
#define LTEMPERATURE (0)
#define LFORCING (1)
#define LUPWD (1)
#define AC_THERMAL_CONDUCTIVITY (AcReal(0.001)) // TODO: make an actual config parameter
typedef struct {
ModelScalar x, y, z;
} ModelVector;

View File

@@ -27,6 +27,7 @@
#pragma once
#include "astaroth.h"
#include "math.h"
typedef long double ModelScalar;