Merge branch 'master' into sink_20190723
Hopefully the merge isssues were resolved.
This commit is contained in:
@@ -8,17 +8,21 @@ FILENAME="${FULL_NAME%.*}"
|
||||
EXTENSION="${FULL_NAME##*.}"
|
||||
|
||||
if [ "${EXTENSION}" = "sas" ]; then
|
||||
echo "Generating stencil assembly stage ${FILENAME}.sas -> stencil_assembly.cuh"
|
||||
COMPILE_FLAGS="-sas" # Generate stencil assembly stage
|
||||
CUH_FILENAME="stencil_assembly.cuh"
|
||||
echo "Generating stencil assembly stage ${FILENAME}.sas -> ${CUH_FILENAME}"
|
||||
elif [ "${EXTENSION}" = "sps" ]; then
|
||||
echo "Generating stencil processing stage: ${FILENAME}.sps -> stencil_process.cuh"
|
||||
COMPILE_FLAGS="-sps" # Generate stencil processing stage
|
||||
CUH_FILENAME="stencil_process.cuh"
|
||||
echo "Generating stencil processing stage: ${FILENAME}.sps -> ${CUH_FILENAME}"
|
||||
elif [ "${EXTENSION}" = "sdh" ]; then
|
||||
COMPILE_FLAGS="-sdh" # Generate stencil definition header
|
||||
CUH_FILENAME="stencil_defines.h"
|
||||
echo "Generating stencil definition header: ${FILENAME}.sdh -> ${CUH_FILENAME}"
|
||||
else
|
||||
echo "Error: unknown extension" ${EXTENSION} "of file" ${FULL_NAME}
|
||||
echo "Extension should be either .sas or .sps"
|
||||
echo "Extension should be either .sas, .sps or .sdh"
|
||||
exit
|
||||
fi
|
||||
|
||||
${ACC_DIR}/preprocess.sh $2 $1 | ${ACC_DIR}/build/acc ${COMPILE_FLAGS} > ${CUH_FILENAME}
|
||||
${ACC_DIR}/preprocess.sh $1 | ${ACC_DIR}/build/acc ${COMPILE_FLAGS} > ${CUH_FILENAME}
|
||||
|
5
acc/mhd_solver/.gitignore
vendored
Normal file
5
acc/mhd_solver/.gitignore
vendored
Normal file
@@ -0,0 +1,5 @@
|
||||
build
|
||||
testbin
|
||||
|
||||
# Except this file
|
||||
!.gitignore
|
@@ -1,3 +1,5 @@
|
||||
#include "stencil_definition.sdh"
|
||||
|
||||
Preprocessed Scalar
|
||||
value(in ScalarField vertex)
|
||||
{
|
||||
@@ -7,9 +9,7 @@ value(in ScalarField vertex)
|
||||
Preprocessed Vector
|
||||
gradient(in ScalarField vertex)
|
||||
{
|
||||
return (Vector){derx(vertexIdx, vertex),
|
||||
dery(vertexIdx, vertex),
|
||||
derz(vertexIdx, vertex)};
|
||||
return (Vector){derx(vertexIdx, vertex), dery(vertexIdx, vertex), derz(vertexIdx, vertex)};
|
||||
}
|
||||
|
||||
#if LUPWD
|
||||
@@ -17,46 +17,46 @@ gradient(in ScalarField vertex)
|
||||
Preprocessed Scalar
|
||||
der6x_upwd(in ScalarField vertex)
|
||||
{
|
||||
Scalar inv_ds = DCONST_REAL(AC_inv_dsx);
|
||||
Scalar inv_ds = AC_inv_dsx;
|
||||
|
||||
return (Scalar){ Scalar(1.0/60.0)*inv_ds* (
|
||||
- Scalar(20.0)* vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z]
|
||||
+ Scalar(15.0)*(vertex[vertexIdx.x+1, vertexIdx.y, vertexIdx.z]
|
||||
+ vertex[vertexIdx.x-1, vertexIdx.y, vertexIdx.z])
|
||||
- Scalar( 6.0)*(vertex[vertexIdx.x+2, vertexIdx.y, vertexIdx.z]
|
||||
+ vertex[vertexIdx.x-2, vertexIdx.y, vertexIdx.z])
|
||||
+ vertex[vertexIdx.x+3, vertexIdx.y, vertexIdx.z]
|
||||
+ vertex[vertexIdx.x-3, vertexIdx.y, vertexIdx.z])};
|
||||
return (Scalar){Scalar(1.0 / 60.0) * inv_ds *
|
||||
(-Scalar(20.0) * vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z] +
|
||||
Scalar(15.0) * (vertex[vertexIdx.x + 1, vertexIdx.y, vertexIdx.z] +
|
||||
vertex[vertexIdx.x - 1, vertexIdx.y, vertexIdx.z]) -
|
||||
Scalar(6.0) * (vertex[vertexIdx.x + 2, vertexIdx.y, vertexIdx.z] +
|
||||
vertex[vertexIdx.x - 2, vertexIdx.y, vertexIdx.z]) +
|
||||
vertex[vertexIdx.x + 3, vertexIdx.y, vertexIdx.z] +
|
||||
vertex[vertexIdx.x - 3, vertexIdx.y, vertexIdx.z])};
|
||||
}
|
||||
|
||||
Preprocessed Scalar
|
||||
der6y_upwd(in ScalarField vertex)
|
||||
{
|
||||
Scalar inv_ds = DCONST_REAL(AC_inv_dsy);
|
||||
Scalar inv_ds = AC_inv_dsy;
|
||||
|
||||
return (Scalar){ Scalar(1.0/60.0)*inv_ds* (
|
||||
-Scalar( 20.0)* vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z]
|
||||
+Scalar( 15.0)*(vertex[vertexIdx.x, vertexIdx.y+1, vertexIdx.z]
|
||||
+ vertex[vertexIdx.x, vertexIdx.y-1, vertexIdx.z])
|
||||
-Scalar( 6.0)*(vertex[vertexIdx.x, vertexIdx.y+2, vertexIdx.z]
|
||||
+ vertex[vertexIdx.x, vertexIdx.y-2, vertexIdx.z])
|
||||
+ vertex[vertexIdx.x, vertexIdx.y+3, vertexIdx.z]
|
||||
+ vertex[vertexIdx.x, vertexIdx.y-3, vertexIdx.z])};
|
||||
return (Scalar){Scalar(1.0 / 60.0) * inv_ds *
|
||||
(-Scalar(20.0) * vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z] +
|
||||
Scalar(15.0) * (vertex[vertexIdx.x, vertexIdx.y + 1, vertexIdx.z] +
|
||||
vertex[vertexIdx.x, vertexIdx.y - 1, vertexIdx.z]) -
|
||||
Scalar(6.0) * (vertex[vertexIdx.x, vertexIdx.y + 2, vertexIdx.z] +
|
||||
vertex[vertexIdx.x, vertexIdx.y - 2, vertexIdx.z]) +
|
||||
vertex[vertexIdx.x, vertexIdx.y + 3, vertexIdx.z] +
|
||||
vertex[vertexIdx.x, vertexIdx.y - 3, vertexIdx.z])};
|
||||
}
|
||||
|
||||
Preprocessed Scalar
|
||||
der6z_upwd(in ScalarField vertex)
|
||||
{
|
||||
Scalar inv_ds = DCONST_REAL(AC_inv_dsz);
|
||||
Scalar inv_ds = AC_inv_dsz;
|
||||
|
||||
return (Scalar){ Scalar(1.0/60.0)*inv_ds* (
|
||||
-Scalar( 20.0)* vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z]
|
||||
+Scalar( 15.0)*(vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z+1]
|
||||
+ vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z-1])
|
||||
-Scalar( 6.0)*(vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z+2]
|
||||
+ vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z-2])
|
||||
+ vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z+3]
|
||||
+ vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z-3])};
|
||||
return (Scalar){Scalar(1.0 / 60.0) * inv_ds *
|
||||
(-Scalar(20.0) * vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z] +
|
||||
Scalar(15.0) * (vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z + 1] +
|
||||
vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z - 1]) -
|
||||
Scalar(6.0) * (vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z + 2] +
|
||||
vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z - 2]) +
|
||||
vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z + 3] +
|
||||
vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z - 3])};
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -66,9 +66,10 @@ hessian(in ScalarField vertex)
|
||||
{
|
||||
Matrix hessian;
|
||||
|
||||
hessian.row[0] = (Vector){derxx(vertexIdx, vertex), derxy(vertexIdx, vertex), derxz(vertexIdx, vertex)};
|
||||
hessian.row[1] = (Vector){hessian.row[0].y, deryy(vertexIdx, vertex), deryz(vertexIdx, vertex)};
|
||||
hessian.row[2] = (Vector){hessian.row[0].z, hessian.row[1].z, derzz(vertexIdx, vertex)};
|
||||
hessian.row[0] = (Vector){derxx(vertexIdx, vertex), derxy(vertexIdx, vertex),
|
||||
derxz(vertexIdx, vertex)};
|
||||
hessian.row[1] = (Vector){hessian.row[0].y, deryy(vertexIdx, vertex), deryz(vertexIdx, vertex)};
|
||||
hessian.row[2] = (Vector){hessian.row[0].z, hessian.row[1].z, derzz(vertexIdx, vertex)};
|
||||
|
||||
return hessian;
|
||||
}
|
||||
|
@@ -1,181 +0,0 @@
|
||||
/*
|
||||
Copyright (C) 2014-2019, Johannes Pekkilae, Miikka Vaeisalae.
|
||||
|
||||
This file is part of Astaroth.
|
||||
|
||||
Astaroth is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
Astaroth is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
/*
|
||||
* =============================================================================
|
||||
* Logical switches
|
||||
* =============================================================================
|
||||
*/
|
||||
#define STENCIL_ORDER (6)
|
||||
#define NGHOST (STENCIL_ORDER / 2)
|
||||
#define LDENSITY (1)
|
||||
#define LHYDRO (1)
|
||||
#define LMAGNETIC (1)
|
||||
#define LENTROPY (0)
|
||||
#define LTEMPERATURE (0)
|
||||
#define LFORCING (0)
|
||||
#define LUPWD (1)
|
||||
#define LSINK (1)
|
||||
|
||||
#define AC_THERMAL_CONDUCTIVITY (AcReal(0.001)) // TODO: make an actual config parameter
|
||||
|
||||
/*
|
||||
* =============================================================================
|
||||
* User-defined parameters
|
||||
* =============================================================================
|
||||
*/
|
||||
// clang-format off
|
||||
#define AC_FOR_USER_INT_PARAM_TYPES(FUNC)\
|
||||
/* Other */\
|
||||
FUNC(AC_max_steps), \
|
||||
FUNC(AC_save_steps), \
|
||||
FUNC(AC_bin_steps), \
|
||||
FUNC(AC_bc_type),
|
||||
|
||||
#define AC_FOR_USER_INT3_PARAM_TYPES(FUNC)
|
||||
|
||||
#define AC_FOR_USER_REAL_PARAM_TYPES(FUNC)\
|
||||
/* cparams */\
|
||||
FUNC(AC_dsx), \
|
||||
FUNC(AC_dsy), \
|
||||
FUNC(AC_dsz), \
|
||||
FUNC(AC_dsmin), \
|
||||
/* physical grid*/\
|
||||
FUNC(AC_xlen), \
|
||||
FUNC(AC_ylen), \
|
||||
FUNC(AC_zlen), \
|
||||
FUNC(AC_xorig), \
|
||||
FUNC(AC_yorig), \
|
||||
FUNC(AC_zorig), \
|
||||
/*Physical units*/\
|
||||
FUNC(AC_unit_density),\
|
||||
FUNC(AC_unit_velocity),\
|
||||
FUNC(AC_unit_length),\
|
||||
/* properties of gravitating star*/\
|
||||
FUNC(AC_star_pos_x),\
|
||||
FUNC(AC_star_pos_y),\
|
||||
FUNC(AC_star_pos_z),\
|
||||
FUNC(AC_M_star),\
|
||||
/* properties of sink particle*/\
|
||||
FUNC(AC_sink_pos_x),\
|
||||
FUNC(AC_sink_pos_y),\
|
||||
FUNC(AC_sink_pos_z),\
|
||||
FUNC(AC_M_sink),\
|
||||
FUNC(AC_M_sink_init),\
|
||||
FUNC(AC_M_sink_Msun),\
|
||||
FUNC(AC_soft),\
|
||||
FUNC(AC_accretion_range),\
|
||||
FUNC(AC_switch_accretion),\
|
||||
/* Run params */\
|
||||
FUNC(AC_cdt), \
|
||||
FUNC(AC_cdtv), \
|
||||
FUNC(AC_cdts), \
|
||||
FUNC(AC_nu_visc), \
|
||||
FUNC(AC_cs_sound), \
|
||||
FUNC(AC_eta), \
|
||||
FUNC(AC_mu0), \
|
||||
FUNC(AC_cp_sound), \
|
||||
FUNC(AC_gamma), \
|
||||
FUNC(AC_cv_sound), \
|
||||
FUNC(AC_lnT0), \
|
||||
FUNC(AC_lnrho0), \
|
||||
FUNC(AC_zeta), \
|
||||
FUNC(AC_trans),\
|
||||
/* Other */\
|
||||
FUNC(AC_bin_save_t), \
|
||||
/* Initial condition params */\
|
||||
FUNC(AC_ampl_lnrho), \
|
||||
FUNC(AC_ampl_uu), \
|
||||
FUNC(AC_angl_uu), \
|
||||
FUNC(AC_lnrho_edge),\
|
||||
FUNC(AC_lnrho_out),\
|
||||
/* Forcing parameters. User configured. */\
|
||||
FUNC(AC_forcing_magnitude),\
|
||||
FUNC(AC_relhel), \
|
||||
FUNC(AC_kmin), \
|
||||
FUNC(AC_kmax), \
|
||||
/* Forcing parameters. Set by the generator. */\
|
||||
FUNC(AC_forcing_phase),\
|
||||
FUNC(AC_k_forcex),\
|
||||
FUNC(AC_k_forcey),\
|
||||
FUNC(AC_k_forcez),\
|
||||
FUNC(AC_kaver),\
|
||||
FUNC(AC_ff_hel_rex),\
|
||||
FUNC(AC_ff_hel_rey),\
|
||||
FUNC(AC_ff_hel_rez),\
|
||||
FUNC(AC_ff_hel_imx),\
|
||||
FUNC(AC_ff_hel_imy),\
|
||||
FUNC(AC_ff_hel_imz),\
|
||||
/* Additional helper params */\
|
||||
/* (deduced from other params do not set these directly!) */\
|
||||
FUNC(AC_G_const),\
|
||||
FUNC(AC_unit_mass),\
|
||||
FUNC(AC_GM_star),\
|
||||
FUNC(AC_sq2GM_star),\
|
||||
FUNC(AC_cs2_sound), \
|
||||
FUNC(AC_inv_dsx), \
|
||||
FUNC(AC_inv_dsy), \
|
||||
FUNC(AC_inv_dsz),
|
||||
|
||||
#define AC_FOR_USER_REAL3_PARAM_TYPES(FUNC)
|
||||
// clang-format on
|
||||
|
||||
/*
|
||||
* =============================================================================
|
||||
* User-defined vertex buffers
|
||||
* =============================================================================
|
||||
*/
|
||||
// clang-format off
|
||||
#if LENTROPY
|
||||
#define AC_FOR_VTXBUF_HANDLES(FUNC) \
|
||||
FUNC(VTXBUF_LNRHO), \
|
||||
/*Added vertex buffer for sink. TODO: Invoke smarter.*/\
|
||||
FUNC(VTXBUF_ACCRETION), \
|
||||
FUNC(VTXBUF_UUX), \
|
||||
FUNC(VTXBUF_UUY), \
|
||||
FUNC(VTXBUF_UUZ), \
|
||||
FUNC(VTXBUF_AX), \
|
||||
FUNC(VTXBUF_AY), \
|
||||
FUNC(VTXBUF_AZ), \
|
||||
FUNC(VTXBUF_ENTROPY),
|
||||
#elif LMAGNETIC
|
||||
#define AC_FOR_VTXBUF_HANDLES(FUNC) \
|
||||
FUNC(VTXBUF_LNRHO), \
|
||||
/*Added vertex buffer for sink. TODO: Invoke smarter.*/\
|
||||
FUNC(VTXBUF_ACCRETION), \
|
||||
FUNC(VTXBUF_UUX), \
|
||||
FUNC(VTXBUF_UUY), \
|
||||
FUNC(VTXBUF_UUZ), \
|
||||
FUNC(VTXBUF_AX), \
|
||||
FUNC(VTXBUF_AY), \
|
||||
FUNC(VTXBUF_AZ),
|
||||
#elif LHYDRO
|
||||
#define AC_FOR_VTXBUF_HANDLES(FUNC) \
|
||||
FUNC(VTXBUF_LNRHO), \
|
||||
/*Added vertex buffer for sink. TODO: Invoke smarter.*/\
|
||||
FUNC(VTXBUF_ACCRETION), \
|
||||
FUNC(VTXBUF_UUX), \
|
||||
FUNC(VTXBUF_UUY), \
|
||||
FUNC(VTXBUF_UUZ),
|
||||
#else
|
||||
#define AC_FOR_VTXBUF_HANDLES(FUNC) \
|
||||
FUNC(VTXBUF_LNRHO),
|
||||
#endif
|
||||
// clang-format on
|
130
acc/mhd_solver/stencil_definition.sdh
Normal file
130
acc/mhd_solver/stencil_definition.sdh
Normal file
@@ -0,0 +1,130 @@
|
||||
#define LDENSITY (1)
|
||||
#define LHYDRO (1)
|
||||
#define LMAGNETIC (1)
|
||||
#define LENTROPY (1)
|
||||
#define LTEMPERATURE (0)
|
||||
#define LFORCING (1)
|
||||
#define LUPWD (1)
|
||||
#define LSINK (1)
|
||||
|
||||
#define AC_THERMAL_CONDUCTIVITY (AcReal(0.001)) // TODO: make an actual config parameter
|
||||
|
||||
// Int params
|
||||
uniform int AC_max_steps;
|
||||
uniform int AC_save_steps;
|
||||
uniform int AC_bin_steps;
|
||||
uniform int AC_bc_type;
|
||||
|
||||
// Real params
|
||||
uniform Scalar AC_dt;
|
||||
// Spacing
|
||||
uniform Scalar AC_dsx;
|
||||
uniform Scalar AC_dsy;
|
||||
uniform Scalar AC_dsz;
|
||||
uniform Scalar AC_dsmin;
|
||||
// physical grid
|
||||
uniform Scalar AC_xlen;
|
||||
uniform Scalar AC_ylen;
|
||||
uniform Scalar AC_zlen;
|
||||
uniform Scalar AC_xorig;
|
||||
uniform Scalar AC_yorig;
|
||||
uniform Scalar AC_zorig;
|
||||
// Physical units
|
||||
uniform Scalar AC_unit_density;
|
||||
uniform Scalar AC_unit_velocity;
|
||||
uniform Scalar AC_unit_length;
|
||||
// properties of gravitating star
|
||||
uniform Scalar AC_star_pos_x;
|
||||
uniform Scalar AC_star_pos_y;
|
||||
uniform Scalar AC_star_pos_z;
|
||||
uniform Scalar AC_M_star;
|
||||
// properties of sink particle
|
||||
uniform Scalar AC_sink_pos_x;
|
||||
uniform Scalar AC_sink_pos_y;
|
||||
uniform Scalar AC_sink_pos_z;
|
||||
uniform Scalar AC_M_sink;
|
||||
uniform Scalar AC_M_sink_init;
|
||||
uniform Scalar AC_M_sink_Msun;
|
||||
uniform Scalar AC_soft;
|
||||
uniform Scalar AC_accretion_range;
|
||||
uniform Scalar AC_switch_accretion;
|
||||
// Run params
|
||||
uniform Scalar AC_cdt;
|
||||
uniform Scalar AC_cdtv;
|
||||
uniform Scalar AC_cdts;
|
||||
uniform Scalar AC_nu_visc;
|
||||
uniform Scalar AC_cs_sound;
|
||||
uniform Scalar AC_eta;
|
||||
uniform Scalar AC_mu0;
|
||||
uniform Scalar AC_cp_sound;
|
||||
uniform Scalar AC_gamma;
|
||||
uniform Scalar AC_cv_sound;
|
||||
uniform Scalar AC_lnT0;
|
||||
uniform Scalar AC_lnrho0;
|
||||
uniform Scalar AC_zeta;
|
||||
uniform Scalar AC_trans;
|
||||
// Other
|
||||
uniform Scalar AC_bin_save_t;
|
||||
// Initial condition params
|
||||
uniform Scalar AC_ampl_lnrho;
|
||||
uniform Scalar AC_ampl_uu;
|
||||
uniform Scalar AC_angl_uu;
|
||||
uniform Scalar AC_lnrho_edge;
|
||||
uniform Scalar AC_lnrho_out;
|
||||
// Forcing parameters. User configured.
|
||||
uniform Scalar AC_forcing_magnitude;
|
||||
uniform Scalar AC_relhel;
|
||||
uniform Scalar AC_kmin;
|
||||
uniform Scalar AC_kmax;
|
||||
// Forcing parameters. Set by the generator.
|
||||
uniform Scalar AC_forcing_phase;
|
||||
uniform Scalar AC_k_forcex;
|
||||
uniform Scalar AC_k_forcey;
|
||||
uniform Scalar AC_k_forcez;
|
||||
uniform Scalar AC_kaver;
|
||||
uniform Scalar AC_ff_hel_rex;
|
||||
uniform Scalar AC_ff_hel_rey;
|
||||
uniform Scalar AC_ff_hel_rez;
|
||||
uniform Scalar AC_ff_hel_imx;
|
||||
uniform Scalar AC_ff_hel_imy;
|
||||
uniform Scalar AC_ff_hel_imz;
|
||||
// Additional helper params // (deduced from other params do not set these directly!)
|
||||
uniform Scalar AC_G_CONST;
|
||||
uniform Scalar AC_GM_star;
|
||||
uniform Scalar AC_unit_mass;
|
||||
uniform Scalar AC_sq2GM_star;
|
||||
uniform Scalar AC_cs2_sound;
|
||||
uniform Scalar AC_inv_dsx;
|
||||
uniform Scalar AC_inv_dsy;
|
||||
uniform Scalar AC_inv_dsz;
|
||||
|
||||
/*
|
||||
* =============================================================================
|
||||
* User-defined vertex buffers
|
||||
* =============================================================================
|
||||
*/
|
||||
#if LENTROPY
|
||||
uniform ScalarField VTXBUF_LNRHO;
|
||||
uniform ScalarField VTXBUF_UUX;
|
||||
uniform ScalarField VTXBUF_UUY;
|
||||
uniform ScalarField VTXBUF_UUZ;
|
||||
uniform ScalarField VTXBUF_AX;
|
||||
uniform ScalarField VTXBUF_AY;
|
||||
uniform ScalarField VTXBUF_AZ;
|
||||
uniform ScalarField VTXBUF_ENTROPY;
|
||||
#elif LMAGNETIC
|
||||
uniform ScalarField VTXBUF_LNRHO;
|
||||
uniform ScalarField VTXBUF_UUX;
|
||||
uniform ScalarField VTXBUF_UUY;
|
||||
uniform ScalarField VTXBUF_UUZ;
|
||||
uniform ScalarField VTXBUF_AX;
|
||||
uniform ScalarField VTXBUF_AY;
|
||||
uniform ScalarField VTXBUF_AZ;
|
||||
#elif LHYDRO
|
||||
uniform ScalarField VTXBUF_LNRHO;
|
||||
uniform ScalarField VTXBUF_UUX;
|
||||
uniform ScalarField VTXBUF_UUY;
|
||||
uniform ScalarField VTXBUF_UUZ;
|
||||
#else
|
||||
uniform ScalarField VTXBUF_LNRHO;
|
||||
#endif
|
@@ -1,26 +1,4 @@
|
||||
// Declare uniforms (i.e. device constants)
|
||||
uniform Scalar cs2_sound;
|
||||
uniform Scalar nu_visc;
|
||||
uniform Scalar cp_sound;
|
||||
uniform Scalar cv_sound;
|
||||
uniform Scalar mu0;
|
||||
uniform Scalar eta;
|
||||
uniform Scalar gamma;
|
||||
uniform Scalar zeta;
|
||||
|
||||
uniform Scalar dsx;
|
||||
uniform Scalar dsy;
|
||||
uniform Scalar dsz;
|
||||
|
||||
uniform Scalar lnT0;
|
||||
uniform Scalar lnrho0;
|
||||
|
||||
uniform int nx_min;
|
||||
uniform int ny_min;
|
||||
uniform int nz_min;
|
||||
uniform int nx;
|
||||
uniform int ny;
|
||||
uniform int nz;
|
||||
#include "stencil_definition.sdh"
|
||||
|
||||
Vector
|
||||
value(in VectorField uu)
|
||||
@@ -35,7 +13,7 @@ upwd_der6(in VectorField uu, in ScalarField lnrho)
|
||||
Scalar uux = fabs(value(uu).x);
|
||||
Scalar uuy = fabs(value(uu).y);
|
||||
Scalar uuz = fabs(value(uu).z);
|
||||
return (Scalar){uux*der6x_upwd(lnrho) + uuy*der6y_upwd(lnrho) + uuz*der6z_upwd(lnrho)};
|
||||
return (Scalar){uux * der6x_upwd(lnrho) + uuy * der6y_upwd(lnrho) + uuz * der6z_upwd(lnrho)};
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -167,12 +145,12 @@ sink_accretion_velocity(int3 globalVertexIdx, in VectorField uu, Scalar dt) {
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
Scalar
|
||||
continuity(int3 globalVertexIdx, in VectorField uu, in ScalarField lnrho, Scalar dt) {
|
||||
continuity(int3 globalVertexIdx, in VectorField uu, in ScalarField lnrho, Scalar dt)
|
||||
{
|
||||
return -dot(value(uu), gradient(lnrho))
|
||||
#if LUPWD
|
||||
//This is a corrective hyperdiffusion term for upwinding.
|
||||
// This is a corrective hyperdiffusion term for upwinding.
|
||||
+ upwd_der6(uu, lnrho)
|
||||
#endif
|
||||
#if LSINK
|
||||
@@ -185,148 +163,159 @@ continuity(int3 globalVertexIdx, in VectorField uu, in ScalarField lnrho, Scalar
|
||||
|
||||
#if LENTROPY
|
||||
Vector
|
||||
momentum(int3 globalVertexIdx, in VectorField uu, in ScalarField lnrho, in ScalarField ss, in VectorField aa, Scalar dt) {
|
||||
const Matrix S = stress_tensor(uu);
|
||||
const Scalar cs2 = cs2_sound * exp(gamma * value(ss) / cp_sound + (gamma - 1) * (value(lnrho) - lnrho0));
|
||||
const Vector j = (Scalar(1.) / mu0) * (gradient_of_divergence(aa) - laplace_vec(aa)); // Current density
|
||||
momentum(int3 globalVertexIdx, in VectorField uu, in ScalarField lnrho, in ScalarField ss, in VectorField aa, Scalar dt)
|
||||
{
|
||||
const Matrix S = stress_tensor(uu);
|
||||
const Scalar cs2 = AC_cs2_sound * exp(AC_gamma * value(ss) / AC_cp_sound +
|
||||
(AC_gamma - 1) * (value(lnrho) - AC_lnrho0));
|
||||
const Vector j = (Scalar(1.) / AC_mu0) *
|
||||
(gradient_of_divergence(aa) - laplace_vec(aa)); // Current density
|
||||
const Vector B = curl(aa);
|
||||
//TODO: DOES INTHERMAL VERSTION INCLUDE THE MAGNETIC FIELD?
|
||||
// TODO: DOES INTHERMAL VERSTION INCLUDE THE MAGNETIC FIELD?
|
||||
const Scalar inv_rho = Scalar(1.) / exp(value(lnrho));
|
||||
|
||||
// Regex replace CPU constants with get\(AC_([a-zA-Z_0-9]*)\)
|
||||
// \1
|
||||
const Vector mom = - mul(gradients(uu), value(uu))
|
||||
- cs2 * ((Scalar(1.) / cp_sound) * gradient(ss) + gradient(lnrho))
|
||||
+ inv_rho * cross(j, B)
|
||||
+ nu_visc * (
|
||||
laplace_vec(uu)
|
||||
+ Scalar(1. / 3.) * gradient_of_divergence(uu)
|
||||
+ Scalar(2.) * mul(S, gradient(lnrho))
|
||||
)
|
||||
+ zeta * gradient_of_divergence(uu)
|
||||
const Vector mom = -mul(gradients(uu), value(uu)) -
|
||||
cs2 * ((Scalar(1.) / AC_cp_sound) * gradient(ss) + gradient(lnrho)) +
|
||||
inv_rho * cross(j, B) +
|
||||
AC_nu_visc *
|
||||
(laplace_vec(uu) + Scalar(1. / 3.) * gradient_of_divergence(uu) +
|
||||
Scalar(2.) * mul(S, gradient(lnrho))) +
|
||||
AC_zeta * gradient_of_divergence(uu)
|
||||
#if LSINK
|
||||
//Gravity term
|
||||
+ sink_gravity(globalVertexIdx)
|
||||
//Corresponding loss of momentum
|
||||
- //(Scalar(1.0) / Scalar( (dsx*dsy*dsz) * exp(value(lnrho)))) * // Correction factor by unit mass
|
||||
sink_accretion_velocity(globalVertexIdx, uu, dt) // As in Lee et al.(2014)
|
||||
;
|
||||
//Gravity term
|
||||
+ sink_gravity(globalVertexIdx)
|
||||
//Corresponding loss of momentum
|
||||
- //(Scalar(1.0) / Scalar( (dsx*dsy*dsz) * exp(value(lnrho)))) * // Correction factor by unit mass
|
||||
sink_accretion_velocity(globalVertexIdx, uu, dt) // As in Lee et al.(2014)
|
||||
;
|
||||
#else
|
||||
;
|
||||
;
|
||||
#endif
|
||||
return mom;
|
||||
}
|
||||
#elif LTEMPERATURE
|
||||
Vector
|
||||
momentum(int3 globalVertexIdx, in VectorField uu, in ScalarField lnrho, in ScalarField tt) {
|
||||
momentum(int3 globalVertexIdx, in VectorField uu, in ScalarField lnrho, in ScalarField tt)
|
||||
{
|
||||
Vector mom;
|
||||
const Matrix S = stress_tensor(uu);
|
||||
const Vector pressure_term = (cp_sound - cv_sound) * (gradient(tt) + value(tt) * gradient(lnrho));
|
||||
mom = -mul(gradients(uu), value(uu)) -
|
||||
pressure_term +
|
||||
nu_visc *
|
||||
(laplace_vec(uu) + Scalar(1. / 3.) * gradient_of_divergence(uu) +
|
||||
Scalar(2.) * mul(S, gradient(lnrho))) + zeta * gradient_of_divergence(uu)
|
||||
|
||||
const Matrix S = stress_tensor(uu);
|
||||
|
||||
const Vector pressure_term = (AC_cp_sound - AC_cv_sound) *
|
||||
(gradient(tt) + value(tt) * gradient(lnrho));
|
||||
|
||||
mom = -mul(gradients(uu), value(uu)) - pressure_term +
|
||||
AC_nu_visc * (laplace_vec(uu) + Scalar(1. / 3.) * gradient_of_divergence(uu) +
|
||||
Scalar(2.) * mul(S, gradient(lnrho))) +
|
||||
AC_zeta * gradient_of_divergence(uu)
|
||||
#if LSINK
|
||||
+ sink_gravity(globalVertexIdx);
|
||||
+ sink_gravity(globalVertexIdx);
|
||||
#else
|
||||
;
|
||||
;
|
||||
#endif
|
||||
|
||||
return mom;
|
||||
#if LGRAVITY
|
||||
mom = mom - (Vector){0, 0, -10.0};
|
||||
#endif
|
||||
return mom;
|
||||
}
|
||||
#else
|
||||
Vector
|
||||
momentum(int3 globalVertexIdx, in VectorField uu, in ScalarField lnrho, Scalar dt) {
|
||||
momentum(int3 globalVertexIdx, in VectorField uu, in ScalarField lnrho, Scalar dt)
|
||||
{
|
||||
Vector mom;
|
||||
|
||||
const Matrix S = stress_tensor(uu);
|
||||
|
||||
// Isothermal: we have constant speed of sound
|
||||
|
||||
mom = -mul(gradients(uu), value(uu)) -
|
||||
cs2_sound * gradient(lnrho) +
|
||||
nu_visc *
|
||||
(laplace_vec(uu) + Scalar(1. / 3.) * gradient_of_divergence(uu) +
|
||||
Scalar(2.) * mul(S, gradient(lnrho))) + zeta * gradient_of_divergence(uu)
|
||||
|
||||
mom = -mul(gradients(uu), value(uu)) - AC_cs2_sound * gradient(lnrho) +
|
||||
AC_nu_visc * (laplace_vec(uu) + Scalar(1. / 3.) * gradient_of_divergence(uu) +
|
||||
Scalar(2.) * mul(S, gradient(lnrho))) +
|
||||
AC_zeta * gradient_of_divergence(uu)
|
||||
#if LSINK
|
||||
+ sink_gravity(globalVertexIdx);
|
||||
//Corresponding loss of momentum
|
||||
- //(Scalar(1.0) / Scalar( (dsx*dsy*dsz) * exp(value(lnrho)))) * // Correction factor by unit mass
|
||||
sink_accretion_velocity(globalVertexIdx, uu, dt) // As in Lee et al.(2014)
|
||||
;
|
||||
+ sink_gravity(globalVertexIdx)
|
||||
//Corresponding loss of momentum
|
||||
- //(Scalar(1.0) / Scalar( (dsx*dsy*dsz) * exp(value(lnrho)))) * // Correction factor by unit mass
|
||||
sink_accretion_velocity(globalVertexIdx, uu, dt) // As in Lee et al.(2014)
|
||||
;
|
||||
#else
|
||||
;
|
||||
;
|
||||
#endif
|
||||
|
||||
return mom;
|
||||
#if LGRAVITY
|
||||
mom = mom - (Vector){0, 0, -10.0};
|
||||
#endif
|
||||
|
||||
return mom;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
Vector
|
||||
induction(in VectorField uu, in VectorField aa) {
|
||||
// Note: We do (-nabla^2 A + nabla(nabla dot A)) instead of (nabla x (nabla
|
||||
// x A)) in order to avoid taking the first derivative twice (did the math,
|
||||
// yes this actually works. See pg.28 in arXiv:astro-ph/0109497)
|
||||
// u cross B - ETA * mu0 * (mu0^-1 * [- laplace A + grad div A ])
|
||||
const Vector B = curl(aa);
|
||||
const Vector grad_div = gradient_of_divergence(aa);
|
||||
const Vector lap = laplace_vec(aa);
|
||||
induction(in VectorField uu, in VectorField aa)
|
||||
{
|
||||
// Note: We do (-nabla^2 A + nabla(nabla dot A)) instead of (nabla x (nabla
|
||||
// x A)) in order to avoid taking the first derivative twice (did the math,
|
||||
// yes this actually works. See pg.28 in arXiv:astro-ph/0109497)
|
||||
// u cross B - AC_eta * AC_mu0 * (AC_mu0^-1 * [- laplace A + grad div A ])
|
||||
const Vector B = curl(aa);
|
||||
const Vector grad_div = gradient_of_divergence(aa);
|
||||
const Vector lap = laplace_vec(aa);
|
||||
|
||||
// Note, mu0 is cancelled out
|
||||
const Vector ind = cross(value(uu), B) - eta * (grad_div - lap);
|
||||
// Note, AC_mu0 is cancelled out
|
||||
const Vector ind = cross(value(uu), B) - AC_eta * (grad_div - lap);
|
||||
|
||||
return ind;
|
||||
return ind;
|
||||
}
|
||||
|
||||
|
||||
#if LENTROPY
|
||||
Scalar
|
||||
lnT( in ScalarField ss, in ScalarField lnrho) {
|
||||
const Scalar lnT = lnT0 + gamma * value(ss) / cp_sound +
|
||||
(gamma - Scalar(1.)) * (value(lnrho) - lnrho0);
|
||||
return lnT;
|
||||
lnT(in ScalarField ss, in ScalarField lnrho)
|
||||
{
|
||||
const Scalar lnT = AC_lnT0 + AC_gamma * value(ss) / AC_cp_sound +
|
||||
(AC_gamma - Scalar(1.)) * (value(lnrho) - AC_lnrho0);
|
||||
return lnT;
|
||||
}
|
||||
|
||||
// Nabla dot (K nabla T) / (rho T)
|
||||
Scalar
|
||||
heat_conduction( in ScalarField ss, in ScalarField lnrho) {
|
||||
const Scalar inv_cp_sound = AcReal(1.) / cp_sound;
|
||||
heat_conduction(in ScalarField ss, in ScalarField lnrho)
|
||||
{
|
||||
const Scalar inv_AC_cp_sound = AcReal(1.) / AC_cp_sound;
|
||||
|
||||
const Vector grad_ln_chi = - gradient(lnrho);
|
||||
const Vector grad_ln_chi = -gradient(lnrho);
|
||||
|
||||
const Scalar first_term = gamma * inv_cp_sound * laplace(ss) +
|
||||
(gamma - AcReal(1.)) * laplace(lnrho);
|
||||
const Vector second_term = gamma * inv_cp_sound * gradient(ss) +
|
||||
(gamma - AcReal(1.)) * gradient(lnrho);
|
||||
const Vector third_term = gamma * (inv_cp_sound * gradient(ss) +
|
||||
gradient(lnrho)) + grad_ln_chi;
|
||||
const Scalar first_term = AC_gamma * inv_AC_cp_sound * laplace(ss) +
|
||||
(AC_gamma - AcReal(1.)) * laplace(lnrho);
|
||||
const Vector second_term = AC_gamma * inv_AC_cp_sound * gradient(ss) +
|
||||
(AC_gamma - AcReal(1.)) * gradient(lnrho);
|
||||
const Vector third_term = AC_gamma * (inv_AC_cp_sound * gradient(ss) + gradient(lnrho)) +
|
||||
grad_ln_chi;
|
||||
|
||||
|
||||
const Scalar chi = AC_THERMAL_CONDUCTIVITY / (exp(value(lnrho)) * cp_sound);
|
||||
return cp_sound * chi * (first_term + dot(second_term, third_term));
|
||||
const Scalar chi = AC_THERMAL_CONDUCTIVITY / (exp(value(lnrho)) * AC_cp_sound);
|
||||
return AC_cp_sound * chi * (first_term + dot(second_term, third_term));
|
||||
}
|
||||
|
||||
Scalar
|
||||
heating(const int i, const int j, const int k) {
|
||||
return 1;
|
||||
heating(const int i, const int j, const int k)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
Scalar
|
||||
entropy(in ScalarField ss, in VectorField uu, in ScalarField lnrho, in VectorField aa) {
|
||||
const Matrix S = stress_tensor(uu);
|
||||
entropy(in ScalarField ss, in VectorField uu, in ScalarField lnrho, in VectorField aa)
|
||||
{
|
||||
const Matrix S = stress_tensor(uu);
|
||||
const Scalar inv_pT = Scalar(1.) / (exp(value(lnrho)) * exp(lnT(ss, lnrho)));
|
||||
const Vector j = (Scalar(1.) / mu0) * (gradient_of_divergence(aa) - laplace_vec(aa)); // Current density
|
||||
const Scalar RHS = H_CONST - C_CONST
|
||||
+ eta * (mu0) * dot(j, j)
|
||||
+ Scalar(2.) * exp(value(lnrho)) * nu_visc * contract(S)
|
||||
+ zeta * exp(value(lnrho)) * divergence(uu) * divergence(uu);
|
||||
const Vector j = (Scalar(1.) / AC_mu0) *
|
||||
(gradient_of_divergence(aa) - laplace_vec(aa)); // Current density
|
||||
const Scalar RHS = H_CONST - C_CONST + AC_eta * (AC_mu0)*dot(j, j) +
|
||||
Scalar(2.) * exp(value(lnrho)) * AC_nu_visc * contract(S) +
|
||||
AC_zeta * exp(value(lnrho)) * divergence(uu) * divergence(uu);
|
||||
|
||||
return - dot(value(uu), gradient(ss))
|
||||
+ inv_pT * RHS
|
||||
+ heat_conduction(ss, lnrho);
|
||||
return -dot(value(uu), gradient(ss)) + inv_pT * RHS + heat_conduction(ss, lnrho);
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -334,14 +323,15 @@ entropy(in ScalarField ss, in VectorField uu, in ScalarField lnrho, in VectorFie
|
||||
Scalar
|
||||
heat_transfer(in VectorField uu, in ScalarField lnrho, in ScalarField tt)
|
||||
{
|
||||
const Matrix S = stress_tensor(uu);
|
||||
const Scalar heat_diffusivity_k = 0.0008; //8e-4;
|
||||
return -dot(value(uu), gradient(tt)) + heat_diffusivity_k * laplace(tt) + heat_diffusivity_k * dot(gradient(lnrho), gradient(tt)) + nu_visc * contract(S) * (Scalar(1.) / cv_sound) - (gamma - 1) * value(tt) * divergence(uu);
|
||||
const Matrix S = stress_tensor(uu);
|
||||
const Scalar heat_diffusivity_k = 0.0008; // 8e-4;
|
||||
return -dot(value(uu), gradient(tt)) + heat_diffusivity_k * laplace(tt) +
|
||||
heat_diffusivity_k * dot(gradient(lnrho), gradient(tt)) +
|
||||
AC_nu_visc * contract(S) * (Scalar(1.) / AC_cv_sound) -
|
||||
(AC_gamma - 1) * value(tt) * divergence(uu);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
#if LFORCING
|
||||
Vector
|
||||
simple_vortex_forcing(Vector a, Vector b, Scalar magnitude){
|
||||
@@ -363,50 +353,40 @@ Vector
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// The Pencil Code forcing_hel_noshear(), manual Eq. 222, inspired forcing function with adjustable helicity
|
||||
// The Pencil Code forcing_hel_noshear(), manual Eq. 222, inspired forcing function with adjustable
|
||||
// helicity
|
||||
Vector
|
||||
helical_forcing(Scalar magnitude, Vector k_force, Vector xx, Vector ff_re, Vector ff_im, Scalar phi)
|
||||
{
|
||||
int accretion_switch = DCONST_INT(AC_switch_accretion);
|
||||
if (accretion_switch == 0){
|
||||
// JP: This looks wrong:
|
||||
// 1) Should it be AC_dsx * AC_nx instead of AC_dsx * AC_ny?
|
||||
// 2) Should you also use globalGrid.n instead of the local n?
|
||||
// MV: You are rigth. Made a quickfix. I did not see the error because multigpu is split
|
||||
// in z direction not y direction.
|
||||
// 3) Also final point: can we do this with vectors/quaternions instead?
|
||||
// Tringonometric functions are much more expensive and inaccurate/
|
||||
// MV: Good idea. No an immediate priority.
|
||||
// Fun related article:
|
||||
// https://randomascii.wordpress.com/2014/10/09/intel-underestimates-error-bounds-by-1-3-quintillion/
|
||||
xx.x = xx.x * (2.0 * M_PI / (AC_dsx * globalGridN.x));
|
||||
xx.y = xx.y * (2.0 * M_PI / (AC_dsy * globalGridN.y));
|
||||
xx.z = xx.z * (2.0 * M_PI / (AC_dsz * globalGridN.z));
|
||||
|
||||
|
||||
// JP: This looks wrong:
|
||||
// 1) Should it be dsx * nx instead of dsx * ny?
|
||||
// 2) Should you also use globalGrid.n instead of the local n?
|
||||
// MV: You are rigth. Made a quickfix. I did not see the error because multigpu is split
|
||||
// in z direction not y direction.
|
||||
// 3) Also final point: can we do this with vectors/quaternions instead?
|
||||
// Tringonometric functions are much more expensive and inaccurate/
|
||||
// MV: Good idea. No an immediate priority.
|
||||
// Fun related article:
|
||||
// https://randomascii.wordpress.com/2014/10/09/intel-underestimates-error-bounds-by-1-3-quintillion/
|
||||
xx.x = xx.x*(2.0*M_PI/(dsx*globalGridN.x));
|
||||
xx.y = xx.y*(2.0*M_PI/(dsy*globalGridN.y));
|
||||
xx.z = xx.z*(2.0*M_PI/(dsz*globalGridN.z));
|
||||
|
||||
Scalar cos_phi = cos(phi);
|
||||
Scalar sin_phi = sin(phi);
|
||||
Scalar cos_k_dot_x = cos(dot(k_force, xx));
|
||||
Scalar sin_k_dot_x = sin(dot(k_force, xx));
|
||||
// Phase affect only the x-component
|
||||
//Scalar real_comp = cos_k_dot_x;
|
||||
//Scalar imag_comp = sin_k_dot_x;
|
||||
Scalar real_comp_phase = cos_k_dot_x*cos_phi - sin_k_dot_x*sin_phi;
|
||||
Scalar imag_comp_phase = cos_k_dot_x*sin_phi + sin_k_dot_x*cos_phi;
|
||||
|
||||
|
||||
Vector force = (Vector){ ff_re.x*real_comp_phase - ff_im.x*imag_comp_phase,
|
||||
ff_re.y*real_comp_phase - ff_im.y*imag_comp_phase,
|
||||
ff_re.z*real_comp_phase - ff_im.z*imag_comp_phase};
|
||||
|
||||
|
||||
return force;
|
||||
} else {
|
||||
return (Vector){0,0,0};
|
||||
}
|
||||
Scalar cos_phi = cos(phi);
|
||||
Scalar sin_phi = sin(phi);
|
||||
Scalar cos_k_dot_x = cos(dot(k_force, xx));
|
||||
Scalar sin_k_dot_x = sin(dot(k_force, xx));
|
||||
// Phase affect only the x-component
|
||||
// Scalar real_comp = cos_k_dot_x;
|
||||
// Scalar imag_comp = sin_k_dot_x;
|
||||
Scalar real_comp_phase = cos_k_dot_x * cos_phi - sin_k_dot_x * sin_phi;
|
||||
Scalar imag_comp_phase = cos_k_dot_x * sin_phi + sin_k_dot_x * cos_phi;
|
||||
|
||||
Vector force = (Vector){ff_re.x * real_comp_phase - ff_im.x * imag_comp_phase,
|
||||
ff_re.y * real_comp_phase - ff_im.y * imag_comp_phase,
|
||||
ff_re.z * real_comp_phase - ff_im.z * imag_comp_phase};
|
||||
|
||||
return force;
|
||||
}
|
||||
|
||||
Vector
|
||||
@@ -458,12 +438,11 @@ in ScalarField lnrho(VTXBUF_LNRHO);
|
||||
out ScalarField out_lnrho(VTXBUF_LNRHO);
|
||||
|
||||
in VectorField uu(VTXBUF_UUX, VTXBUF_UUY, VTXBUF_UUZ);
|
||||
out VectorField out_uu(VTXBUF_UUX,VTXBUF_UUY,VTXBUF_UUZ);
|
||||
|
||||
out VectorField out_uu(VTXBUF_UUX, VTXBUF_UUY, VTXBUF_UUZ);
|
||||
|
||||
#if LMAGNETIC
|
||||
in VectorField aa(VTXBUF_AX,VTXBUF_AY,VTXBUF_AZ);
|
||||
out VectorField out_aa(VTXBUF_AX,VTXBUF_AY,VTXBUF_AZ);
|
||||
in VectorField aa(VTXBUF_AX, VTXBUF_AY, VTXBUF_AZ);
|
||||
out VectorField out_aa(VTXBUF_AX, VTXBUF_AY, VTXBUF_AZ);
|
||||
#endif
|
||||
|
||||
#if LENTROPY
|
||||
@@ -482,38 +461,36 @@ out Scalar out_accretion = VTXBUF_ACCRETION;
|
||||
#endif
|
||||
|
||||
Kernel void
|
||||
solve(Scalar dt) {
|
||||
solve()
|
||||
{
|
||||
Scalar dt = AC_dt;
|
||||
out_lnrho = rk3(out_lnrho, lnrho, continuity(globalVertexIdx, uu, lnrho, dt), dt);
|
||||
|
||||
#if LMAGNETIC
|
||||
#if LMAGNETIC
|
||||
out_aa = rk3(out_aa, aa, induction(uu, aa), dt);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if LENTROPY
|
||||
out_uu = rk3(out_uu, uu, momentum(globalVertexIdx, uu, lnrho, ss, aa, dt), dt);
|
||||
out_ss = rk3(out_ss, ss, entropy(ss, uu, lnrho, aa), dt);
|
||||
#elif LTEMPERATURE
|
||||
out_uu = rk3(out_uu, uu, momentum(globalVertexIdx, uu, lnrho, tt), dt);
|
||||
out_tt = rk3(out_tt, tt, heat_transfer(uu, lnrho, tt), dt);
|
||||
#else
|
||||
out_uu = rk3(out_uu, uu, momentum(globalVertexIdx, uu, lnrho, dt), dt);
|
||||
#endif
|
||||
#if LENTROPY
|
||||
out_uu = rk3(out_uu, uu, momentum(uu, lnrho, ss, aa), dt);
|
||||
out_ss = rk3(out_ss, ss, entropy(ss, uu, lnrho, aa), dt);
|
||||
#elif LTEMPERATURE
|
||||
out_uu = rk3(out_uu, uu, momentum(uu, lnrho, tt), dt);
|
||||
out_tt = rk3(out_tt, tt, heat_transfer(uu, lnrho, tt), dt);
|
||||
#else
|
||||
out_uu = rk3(out_uu, uu, momentum(uu, lnrho), dt);
|
||||
#endif
|
||||
|
||||
#if LFORCING
|
||||
#if LFORCING
|
||||
if (step_number == 2) {
|
||||
out_uu = out_uu + forcing(globalVertexIdx, dt);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if LSINK
|
||||
// out_lnrho = log(exp(out_lnrho) - sink_accretion(globalVertexIdx, lnrho));
|
||||
// out_accretion = value(accretion) + (sink_accretion(globalVertexIdx,lnrho) * dsx * dsy * dsz);
|
||||
#endif
|
||||
|
||||
#if LSINK
|
||||
out_accretion = rk3(out_accretion, accretion, sink_accretion(globalVertexIdx, lnrho, dt), dt);// unit now is rho!
|
||||
// out_lnrho = log(exp(out_lnrho) - out_accretion);
|
||||
|
||||
if (step_number == 2) {
|
||||
out_accretion = out_accretion * dsx * dsy * dsz;// unit is now mass!
|
||||
}
|
||||
//TODO: implement accretion correction to contiunity equation and momentum equation.
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
@@ -15,7 +15,7 @@ L [a-zA-Z_]
|
||||
"void" { return VOID; } /* Rest of the types inherited from C */
|
||||
"int" { return INT; }
|
||||
"int3" { return INT3; }
|
||||
"ScalarField" { return SCALAR; }
|
||||
"ScalarField" { return SCALARFIELD; }
|
||||
"VectorField" { return VECTOR; }
|
||||
|
||||
"Kernel" { return KERNEL; } /* Function specifiers */
|
||||
|
@@ -16,7 +16,7 @@ int yyget_lineno();
|
||||
%token CONSTANT IN OUT UNIFORM
|
||||
%token IDENTIFIER NUMBER
|
||||
%token RETURN
|
||||
%token SCALAR VECTOR MATRIX
|
||||
%token SCALAR VECTOR MATRIX SCALARFIELD
|
||||
%token VOID INT INT3
|
||||
%token IF ELSE FOR WHILE ELIF
|
||||
%token LEQU LAND LOR LLEQU
|
||||
@@ -209,6 +209,7 @@ type_specifier: VOID
|
||||
| SCALAR { $$ = astnode_create(NODE_TYPE_SPECIFIER, NULL, NULL); $$->token = SCALAR; }
|
||||
| VECTOR { $$ = astnode_create(NODE_TYPE_SPECIFIER, NULL, NULL); $$->token = VECTOR; }
|
||||
| MATRIX { $$ = astnode_create(NODE_TYPE_SPECIFIER, NULL, NULL); $$->token = MATRIX; }
|
||||
| SCALARFIELD { $$ = astnode_create(NODE_TYPE_SPECIFIER, NULL, NULL); $$->token = SCALARFIELD; }
|
||||
;
|
||||
|
||||
identifier: IDENTIFIER { $$ = astnode_create(NODE_IDENTIFIER, NULL, NULL); astnode_set_buffer(yytext, $$); }
|
||||
|
@@ -36,7 +36,8 @@
|
||||
ASTNode* root = NULL;
|
||||
|
||||
static const char inout_name_prefix[] = "handle_";
|
||||
static bool doing_stencil_assembly = true;
|
||||
typedef enum { STENCIL_ASSEMBLY, STENCIL_PROCESS, STENCIL_HEADER } CompilationType;
|
||||
static CompilationType compilation_type;
|
||||
|
||||
/*
|
||||
* =============================================================================
|
||||
@@ -53,16 +54,17 @@ static const char* translation_table[TRANSLATION_TABLE_SIZE] = {
|
||||
[WHILE] = "while",
|
||||
[FOR] = "for",
|
||||
// Type specifiers
|
||||
[VOID] = "void",
|
||||
[INT] = "int",
|
||||
[INT3] = "int3",
|
||||
[SCALAR] = "AcReal",
|
||||
[VECTOR] = "AcReal3",
|
||||
[MATRIX] = "AcMatrix",
|
||||
[VOID] = "void",
|
||||
[INT] = "int",
|
||||
[INT3] = "int3",
|
||||
[SCALAR] = "AcReal",
|
||||
[VECTOR] = "AcReal3",
|
||||
[MATRIX] = "AcMatrix",
|
||||
[SCALARFIELD] = "AcReal",
|
||||
// Type qualifiers
|
||||
[KERNEL] = "template <int step_number> static "
|
||||
"__global__", //__launch_bounds__(RK_THREADBLOCK_SIZE,
|
||||
// RK_LAUNCH_BOUND_MIN_BLOCKS),
|
||||
[KERNEL] = "template <int step_number> static __global__",
|
||||
//__launch_bounds__(RK_THREADBLOCK_SIZE,
|
||||
// RK_LAUNCH_BOUND_MIN_BLOCKS),
|
||||
[PREPROCESSED] = "static __device__ "
|
||||
"__forceinline__",
|
||||
[CONSTANT] = "const",
|
||||
@@ -212,11 +214,14 @@ translate_latest_symbol(void)
|
||||
// FUNCTION PARAMETER
|
||||
else if (symbol->type == SYMBOLTYPE_FUNCTION_PARAMETER) {
|
||||
if (symbol->type_qualifier == IN || symbol->type_qualifier == OUT) {
|
||||
if (doing_stencil_assembly)
|
||||
if (compilation_type == STENCIL_ASSEMBLY)
|
||||
printf("const __restrict__ %s* %s", translate(symbol->type_specifier),
|
||||
symbol->identifier);
|
||||
else
|
||||
else if (compilation_type == STENCIL_PROCESS)
|
||||
printf("const %sData& %s", translate(symbol->type_specifier), symbol->identifier);
|
||||
else
|
||||
printf("Invalid compilation type %d, IN and OUT qualifiers not supported\n",
|
||||
compilation_type);
|
||||
}
|
||||
else {
|
||||
print_symbol(handle);
|
||||
@@ -224,14 +229,18 @@ translate_latest_symbol(void)
|
||||
}
|
||||
// UNIFORM
|
||||
else if (symbol->type_qualifier == UNIFORM) {
|
||||
// if (compilation_type != STENCIL_HEADER) {
|
||||
// printf("ERROR: %s can only be used in stencil headers\n", translation_table[UNIFORM]);
|
||||
//}
|
||||
/* Do nothing */
|
||||
}
|
||||
// IN / OUT
|
||||
else if (symbol->type != SYMBOLTYPE_FUNCTION_PARAMETER &&
|
||||
(symbol->type_qualifier == IN || symbol->type_qualifier == OUT)) {
|
||||
|
||||
printf("static __device__ const %s %s%s", symbol->type_specifier == SCALAR ? "int" : "int3",
|
||||
inout_name_prefix, symbol_table[handle].identifier);
|
||||
printf("static __device__ const %s %s%s",
|
||||
symbol->type_specifier == SCALARFIELD ? "int" : "int3", inout_name_prefix,
|
||||
symbol_table[handle].identifier);
|
||||
if (symbol->type_specifier == VECTOR)
|
||||
printf(" = make_int3");
|
||||
}
|
||||
@@ -309,9 +318,15 @@ traverse(const ASTNode* node)
|
||||
inside_kernel = true;
|
||||
|
||||
// Kernel parameter boilerplate
|
||||
const char* kernel_parameter_boilerplate = "GEN_KERNEL_PARAM_BOILERPLATE, ";
|
||||
if (inside_kernel && node->type == NODE_FUNCTION_PARAMETER_DECLARATION)
|
||||
printf("%s ", kernel_parameter_boilerplate);
|
||||
const char* kernel_parameter_boilerplate = "GEN_KERNEL_PARAM_BOILERPLATE";
|
||||
if (inside_kernel && node->type == NODE_FUNCTION_PARAMETER_DECLARATION) {
|
||||
printf("%s", kernel_parameter_boilerplate);
|
||||
|
||||
if (node->lhs != NULL) {
|
||||
printf("Compilation error: function parameters for Kernel functions not allowed!\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
// Kernel builtin variables boilerplate (read input/output arrays and setup
|
||||
// indices)
|
||||
@@ -369,6 +384,8 @@ traverse(const ASTNode* node)
|
||||
// printf("%s%s", inout_name_prefix, symbol->identifier);
|
||||
//}
|
||||
if (symbol->type_qualifier == UNIFORM) {
|
||||
printf("DCONST(%s) ", symbol->identifier);
|
||||
/*
|
||||
if (symbol->type_specifier == SCALAR)
|
||||
printf("DCONST_REAL(AC_%s) ", symbol->identifier);
|
||||
else if (symbol->type_specifier == INT)
|
||||
@@ -376,6 +393,7 @@ traverse(const ASTNode* node)
|
||||
else
|
||||
printf("INVALID UNIFORM type specifier %s with %s\n",
|
||||
translate(symbol->type_specifier), symbol->identifier);
|
||||
*/
|
||||
}
|
||||
else {
|
||||
// Do a regular translation
|
||||
@@ -545,14 +563,89 @@ generate_preprocessed_structures(void)
|
||||
");
|
||||
}
|
||||
|
||||
static void
|
||||
generate_header(void)
|
||||
{
|
||||
printf("\n#pragma once\n");
|
||||
|
||||
// Int params
|
||||
printf("#define AC_FOR_USER_INT_PARAM_TYPES(FUNC)");
|
||||
for (int i = 0; i < num_symbols; ++i) {
|
||||
if (symbol_table[i].type_specifier == INT) {
|
||||
printf("\\\nFUNC(%s),", symbol_table[i].identifier);
|
||||
}
|
||||
}
|
||||
printf("\n\n");
|
||||
|
||||
// Int3 params
|
||||
printf("#define AC_FOR_USER_INT3_PARAM_TYPES(FUNC)");
|
||||
for (int i = 0; i < num_symbols; ++i) {
|
||||
if (symbol_table[i].type_specifier == INT3) {
|
||||
printf("\\\nFUNC(%s),", symbol_table[i].identifier);
|
||||
}
|
||||
}
|
||||
printf("\n\n");
|
||||
|
||||
// Scalar params
|
||||
printf("#define AC_FOR_USER_REAL_PARAM_TYPES(FUNC)");
|
||||
for (int i = 0; i < num_symbols; ++i) {
|
||||
if (symbol_table[i].type_specifier == SCALAR) {
|
||||
printf("\\\nFUNC(%s),", symbol_table[i].identifier);
|
||||
}
|
||||
}
|
||||
printf("\n\n");
|
||||
|
||||
// Vector params
|
||||
printf("#define AC_FOR_USER_REAL3_PARAM_TYPES(FUNC)");
|
||||
for (int i = 0; i < num_symbols; ++i) {
|
||||
if (symbol_table[i].type_specifier == VECTOR) {
|
||||
printf("\\\nFUNC(%s),", symbol_table[i].identifier);
|
||||
}
|
||||
}
|
||||
printf("\n\n");
|
||||
|
||||
// Scalar fields
|
||||
printf("#define AC_FOR_VTXBUF_HANDLES(FUNC)");
|
||||
for (int i = 0; i < num_symbols; ++i) {
|
||||
if (symbol_table[i].type_specifier == SCALARFIELD) {
|
||||
printf("\\\nFUNC(%s),", symbol_table[i].identifier);
|
||||
}
|
||||
}
|
||||
printf("\n\n");
|
||||
|
||||
/*
|
||||
printf("\n");
|
||||
printf("typedef struct {\n");
|
||||
for (int i = 0; i < num_symbols; ++i) {
|
||||
if (symbol_table[i].type_qualifier == PREPROCESSED)
|
||||
printf("%s %s;\n", translate(symbol_table[i].type_specifier),
|
||||
symbol_table[i].identifier);
|
||||
}
|
||||
printf("} %sData;\n", translate(SCALAR));
|
||||
*/
|
||||
}
|
||||
|
||||
static void
|
||||
generate_library_hooks(void)
|
||||
{
|
||||
for (int i = 0; i < num_symbols; ++i) {
|
||||
if (symbol_table[i].type_qualifier == KERNEL) {
|
||||
printf("GEN_DEVICE_FUNC_HOOK(%s)\n", symbol_table[i].identifier);
|
||||
// printf("GEN_NODE_FUNC_HOOK(%s)\n", symbol_table[i].identifier);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char** argv)
|
||||
{
|
||||
if (argc == 2) {
|
||||
if (!strcmp(argv[1], "-sas"))
|
||||
doing_stencil_assembly = true;
|
||||
compilation_type = STENCIL_ASSEMBLY;
|
||||
else if (!strcmp(argv[1], "-sps"))
|
||||
doing_stencil_assembly = false;
|
||||
compilation_type = STENCIL_PROCESS;
|
||||
else if (!strcmp(argv[1], "-sdh"))
|
||||
compilation_type = STENCIL_HEADER;
|
||||
else
|
||||
printf("Unknown flag %s. Generating stencil assembly.\n", argv[1]);
|
||||
}
|
||||
@@ -560,8 +653,8 @@ main(int argc, char** argv)
|
||||
printf("Usage: ./acc [flags]\n"
|
||||
"Flags:\n"
|
||||
"\t-sas - Generates code for the stencil assembly stage\n"
|
||||
"\t-sps - Generates code for the stencil processing "
|
||||
"stage\n");
|
||||
"\t-sps - Generates code for the stencil processing stage\n"
|
||||
"\t-hh - Generates stencil definitions from a header file\n");
|
||||
printf("\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
@@ -576,8 +669,12 @@ main(int argc, char** argv)
|
||||
|
||||
// Traverse
|
||||
traverse(root);
|
||||
if (doing_stencil_assembly)
|
||||
if (compilation_type == STENCIL_ASSEMBLY)
|
||||
generate_preprocessed_structures();
|
||||
else if (compilation_type == STENCIL_HEADER)
|
||||
generate_header();
|
||||
else if (compilation_type == STENCIL_PROCESS)
|
||||
generate_library_hooks();
|
||||
|
||||
// print_symbol_table();
|
||||
|
||||
|
@@ -0,0 +1,67 @@
|
||||
# Astaroth specification and user manual
|
||||
|
||||
Copyright (C) 2014-2019, Johannes Pekkila, Miikka Vaisala.
|
||||
|
||||
Astaroth is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
Astaroth is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
Astaroth is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
|
||||
# Introduction and background
|
||||
|
||||
Astaroth is a collection of tools for utilizing multiple graphics processing units (GPUs) efficiently in three-dimensional stencil computations. This document specifies the Astaroth application-programming interface (API) and domain-specific language (DSL).
|
||||
|
||||
Astaroth has been designed for the demands in computational sciences, where large stencils are often used to attain sufficient accuracy. The majority of previous work focuses on stencil computations with low-order stencils for which several efficient algorithms have been proposed, whereas work on high-order stencils is more limited. In addition, in computational physics multiple fields interact with each other, such as the velocity and magnetic fields of electrically conducting fluids. Such computations are especially challenging to solve efficiently because of the problem's relatively low operational intensity and the small caches provided by GPUs. Efficient methods for computations with several coupled fields and large stencils have not been addressed sufficiently in prior work.
|
||||
|
||||
With Astaroth, we have taken inspiration of image processing and graphics pipelines which rely on holding intermediate data in caches for the duration of computations, and extended the idea to work efficiently also with large three-dimensional stencils and an arbitrary number of coupled fields. As programming GPUs efficiently is relatively verbose and requires deep knowledge of the underlying hardware and execution model, we have created a high-level domain-specific language for expressing a wide range of tasks in computational sciences and provide a source-to-source compiler for translating stencil problems expressed in our language into efficient CUDA kernels.
|
||||
|
||||
The kernels generated from the Astaroth DSL are embedded in the Astaroth Core library, which is usable via the Astaroth API. While the Astaroth library is written in C++/CUDA, the API conforms to the C99 standard.
|
||||
|
||||
|
||||
# Publications
|
||||
|
||||
The foundational work was done in (Väisälä, Pekkilä, 2017) and the library, API and DSL described in this document were introduced in (Pekkilä, 2019). We kindly wish the users of Astaroth to cite to these publications in their work.
|
||||
|
||||
> J. Pekkilä, Astaroth: A Library for Stencil Computations on Graphics Processing Units. Master's thesis, Aalto University School of Science, Espoo, Finland, 2019.
|
||||
|
||||
> M. S. Väisälä, Magnetic Phenomena of the Interstellar Medium in Theory and Observation. PhD thesis, University of Helsinki, Finland, 2017.
|
||||
|
||||
> J. Pekkilä, M. S. Väisälä, M. Käpylä, P. J. Käpylä, and O. Anjum, “Methods for compressible fluid simulation on GPUs using high-order finite differences, ”Computer Physics Communications, vol. 217, pp. 11–22, Aug. 2017.
|
||||
|
||||
|
||||
|
||||
# Astaroth API
|
||||
|
||||
## Devices and nodes
|
||||
|
||||
## Meshes
|
||||
|
||||
## Streams and synchronization
|
||||
|
||||
## Interface
|
||||
|
||||
|
||||
# Astaroth DSL
|
||||
|
||||
## Uniforms
|
||||
|
||||
## Vertex buffers
|
||||
|
||||
### Input and output buffers
|
||||
|
||||
## Built-in variables and functions
|
||||
|
||||
|
391
doc/domain_decomposition.svg
Normal file
391
doc/domain_decomposition.svg
Normal file
@@ -0,0 +1,391 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<!-- Created with Inkscape (http://www.inkscape.org/) -->
|
||||
|
||||
<svg
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:cc="http://creativecommons.org/ns#"
|
||||
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
xmlns:svg="http://www.w3.org/2000/svg"
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
|
||||
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
|
||||
width="76.050636mm"
|
||||
height="85.851082mm"
|
||||
viewBox="0 0 76.050636 85.851082"
|
||||
version="1.1"
|
||||
id="svg8"
|
||||
inkscape:version="0.92.3 (2405546, 2018-03-11)"
|
||||
sodipodi:docname="domain_decomposition.svg"
|
||||
inkscape:export-filename="/home/mvaisala/astaroth/doc/domain_decomposition.svg.png"
|
||||
inkscape:export-xdpi="233.7916"
|
||||
inkscape:export-ydpi="233.7916">
|
||||
<defs
|
||||
id="defs2">
|
||||
<inkscape:perspective
|
||||
sodipodi:type="inkscape:persp3d"
|
||||
inkscape:vp_x="-34.999999 : 849.45444 : 1"
|
||||
inkscape:vp_y="0 : 3779.5274 : 0"
|
||||
inkscape:vp_z="758.70081 : 849.45444 : 1"
|
||||
inkscape:persp3d-origin="361.85039 : 662.36784 : 1"
|
||||
id="perspective3883" />
|
||||
<inkscape:perspective
|
||||
sodipodi:type="inkscape:persp3d"
|
||||
inkscape:vp_x="-28.760416 : 819.37885 : 1"
|
||||
inkscape:vp_y="0 : 3779.5274 : 0"
|
||||
inkscape:vp_z="764.94038 : 819.37885 : 1"
|
||||
inkscape:persp3d-origin="368.08997 : 632.29225 : 1"
|
||||
id="perspective3817" />
|
||||
<inkscape:perspective
|
||||
sodipodi:type="inkscape:persp3d"
|
||||
inkscape:vp_x="139.37224 : 246.52176 : 1"
|
||||
inkscape:vp_y="90.281989 : 563.44101 : 1"
|
||||
inkscape:vp_z="193.82452 : 233.87492 : 1"
|
||||
inkscape:persp3d-origin="90.281989 : 216.71184 : 1"
|
||||
id="perspective3715" />
|
||||
</defs>
|
||||
<sodipodi:namedview
|
||||
id="base"
|
||||
pagecolor="#ffffff"
|
||||
bordercolor="#666666"
|
||||
borderopacity="1.0"
|
||||
inkscape:pageopacity="0.0"
|
||||
inkscape:pageshadow="2"
|
||||
inkscape:zoom="1.4"
|
||||
inkscape:cx="65.802138"
|
||||
inkscape:cy="225.49817"
|
||||
inkscape:document-units="mm"
|
||||
inkscape:current-layer="layer1"
|
||||
showgrid="true"
|
||||
inkscape:snap-grids="true"
|
||||
fit-margin-top="0"
|
||||
fit-margin-left="0"
|
||||
fit-margin-right="0"
|
||||
fit-margin-bottom="0"
|
||||
inkscape:window-width="1920"
|
||||
inkscape:window-height="1025"
|
||||
inkscape:window-x="0"
|
||||
inkscape:window-y="27"
|
||||
inkscape:window-maximized="1">
|
||||
<inkscape:grid
|
||||
type="xygrid"
|
||||
id="grid3923"
|
||||
originx="-51.915482"
|
||||
originy="-200.5164" />
|
||||
</sodipodi:namedview>
|
||||
<metadata
|
||||
id="metadata5">
|
||||
<rdf:RDF>
|
||||
<cc:Work
|
||||
rdf:about="">
|
||||
<dc:format>image/svg+xml</dc:format>
|
||||
<dc:type
|
||||
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
|
||||
<dc:title></dc:title>
|
||||
</cc:Work>
|
||||
</rdf:RDF>
|
||||
</metadata>
|
||||
<g
|
||||
inkscape:label="Layer 1"
|
||||
inkscape:groupmode="layer"
|
||||
id="layer1"
|
||||
transform="translate(-51.915483,-10.632538)">
|
||||
<g
|
||||
inkscape:corner7="-0.091655035 : 0.066875827 : 0.25 : 1"
|
||||
inkscape:corner0="0.2311558 : 0.078514233 : 0 : 1"
|
||||
inkscape:perspectiveID="#perspective3817"
|
||||
id="g3815"
|
||||
sodipodi:type="inkscape:box3d">
|
||||
<path
|
||||
points="345.49477,-34.871325 425.05445,-25.867869 425.05445,-55.566078 345.49477,-70.600089 "
|
||||
d="m 345.49477,-70.600089 v 35.728764 l 79.55968,9.003456 v -29.698209 z"
|
||||
inkscape:box3dsidetype="6"
|
||||
style="fill:#353564;fill-rule:evenodd;stroke:none;stroke-linejoin:round"
|
||||
id="path3803"
|
||||
sodipodi:type="inkscape:box3dside" />
|
||||
<path
|
||||
points="537.05691,-76.198873 537.05691,-38.224283 425.05445,-25.867869 425.05445,-55.566078 "
|
||||
d="m 425.05445,-55.566078 112.00246,-20.632795 v 37.97459 l -112.00246,12.356414 z"
|
||||
inkscape:box3dsidetype="11"
|
||||
style="fill:#e9e9ff;fill-rule:evenodd;stroke:none;stroke-linejoin:round"
|
||||
id="path3805"
|
||||
sodipodi:type="inkscape:box3dside" />
|
||||
<path
|
||||
points="460.049,-102.2544 537.05691,-76.198873 425.05445,-55.566078 345.49477,-70.600089 "
|
||||
d="M 345.49477,-70.600089 460.049,-102.2544 l 77.00791,26.055527 -112.00246,20.632795 z"
|
||||
inkscape:box3dsidetype="5"
|
||||
style="fill:#4d4d9f;fill-rule:evenodd;stroke:none;stroke-linejoin:round"
|
||||
id="path3807"
|
||||
sodipodi:type="inkscape:box3dside" />
|
||||
<path
|
||||
points="460.049,-53.828225 537.05691,-38.224283 425.05445,-25.867869 345.49477,-34.871325 "
|
||||
d="m 345.49477,-34.871325 114.55423,-18.9569 77.00791,15.603942 -112.00246,12.356414 z"
|
||||
inkscape:box3dsidetype="13"
|
||||
style="fill:#afafde;fill-rule:evenodd;stroke:none;stroke-linejoin:round"
|
||||
id="path3809"
|
||||
sodipodi:type="inkscape:box3dside" />
|
||||
<path
|
||||
points="460.049,-53.828225 537.05691,-38.224283 537.05691,-76.198873 460.049,-102.2544 "
|
||||
d="m 460.049,-102.2544 v 48.426175 l 77.00791,15.603942 v -37.97459 z"
|
||||
inkscape:box3dsidetype="14"
|
||||
style="fill:#d7d7ff;fill-rule:evenodd;stroke:none;stroke-linejoin:round"
|
||||
id="path3811"
|
||||
sodipodi:type="inkscape:box3dside" />
|
||||
<path
|
||||
points="460.049,-102.2544 460.049,-53.828225 345.49477,-34.871325 345.49477,-70.600089 "
|
||||
d="M 345.49477,-70.600089 460.049,-102.2544 v 48.426175 l -114.55423,18.9569 z"
|
||||
inkscape:box3dsidetype="3"
|
||||
style="fill:#8686bf;fill-rule:evenodd;stroke:none;stroke-linejoin:round"
|
||||
id="path3813"
|
||||
sodipodi:type="inkscape:box3dside" />
|
||||
</g>
|
||||
<g
|
||||
inkscape:corner7="-0.067801071 : 0.080724505 : 0.25 : 1"
|
||||
inkscape:corner0="0.29810879 : 0.093471507 : 0 : 1"
|
||||
inkscape:perspectiveID="#perspective3883"
|
||||
id="g3881"
|
||||
sodipodi:type="inkscape:box3d">
|
||||
<path
|
||||
points="322.62977,-102.51699 401.43343,-87.835826 401.43343,-118.95615 322.62977,-139.63071 "
|
||||
d="m 322.62977,-139.63071 v 37.11372 l 78.80366,14.681164 v -31.120324 z"
|
||||
inkscape:box3dsidetype="6"
|
||||
style="fill:#353564;fill-rule:evenodd;stroke:none;stroke-linejoin:round"
|
||||
id="path3869"
|
||||
sodipodi:type="inkscape:box3dside" />
|
||||
<path
|
||||
points="520.44801,-152.18313 520.44801,-111.43056 401.43343,-87.835826 401.43343,-118.95615 "
|
||||
d="m 401.43343,-118.95615 119.01458,-33.22698 v 40.75257 l -119.01458,23.594734 z"
|
||||
inkscape:box3dsidetype="11"
|
||||
style="fill:#e9e9ff;fill-rule:evenodd;stroke:none;stroke-linejoin:round"
|
||||
id="path3871"
|
||||
sodipodi:type="inkscape:box3dside" />
|
||||
<path
|
||||
points="442.62977,-189.88386 520.44801,-152.18313 401.43343,-118.95615 322.62977,-139.63071 "
|
||||
d="m 322.62977,-139.63071 120,-50.25315 77.81824,37.70073 -119.01458,33.22698 z"
|
||||
inkscape:box3dsidetype="5"
|
||||
style="fill:#4d4d9f;fill-rule:evenodd;stroke:none;stroke-linejoin:round"
|
||||
id="path3873"
|
||||
sodipodi:type="inkscape:box3dside" />
|
||||
<path
|
||||
points="442.62977,-138.20214 520.44801,-111.43056 401.43343,-87.835826 322.62977,-102.51699 "
|
||||
d="m 322.62977,-102.51699 120,-35.68515 77.81824,26.77158 -119.01458,23.594734 z"
|
||||
inkscape:box3dsidetype="13"
|
||||
style="fill:#afafde;fill-rule:evenodd;stroke:none;stroke-linejoin:round"
|
||||
id="path3875"
|
||||
sodipodi:type="inkscape:box3dside" />
|
||||
<path
|
||||
points="442.62977,-138.20214 520.44801,-111.43056 520.44801,-152.18313 442.62977,-189.88386 "
|
||||
d="m 442.62977,-189.88386 v 51.68172 l 77.81824,26.77158 v -40.75257 z"
|
||||
inkscape:box3dsidetype="14"
|
||||
style="fill:#d7d7ff;fill-rule:evenodd;stroke:none;stroke-linejoin:round"
|
||||
id="path3877"
|
||||
sodipodi:type="inkscape:box3dside" />
|
||||
<path
|
||||
points="442.62977,-189.88386 442.62977,-138.20214 322.62977,-102.51699 322.62977,-139.63071 "
|
||||
d="m 322.62977,-139.63071 120,-50.25315 v 51.68172 l -120,35.68515 z"
|
||||
inkscape:box3dsidetype="3"
|
||||
style="fill:#8686bf;fill-rule:evenodd;stroke:none;stroke-linejoin:round"
|
||||
id="path3879"
|
||||
sodipodi:type="inkscape:box3dside" />
|
||||
</g>
|
||||
<g
|
||||
id="g4791"
|
||||
transform="translate(13.229163,-23.812503)"
|
||||
style="fill:#ffffff;fill-opacity:1">
|
||||
<rect
|
||||
y="87.979164"
|
||||
x="63.5"
|
||||
height="10.583336"
|
||||
width="42.333332"
|
||||
id="rect3925"
|
||||
style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:0.5;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" />
|
||||
<rect
|
||||
y="77.395836"
|
||||
x="63.500004"
|
||||
height="10.583337"
|
||||
width="42.333332"
|
||||
id="rect3925-2"
|
||||
style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:0.5;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" />
|
||||
<rect
|
||||
y="66.8125"
|
||||
x="63.499996"
|
||||
height="10.583337"
|
||||
width="42.333332"
|
||||
id="rect3925-0"
|
||||
style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:0.5;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" />
|
||||
<rect
|
||||
y="56.229168"
|
||||
x="63.499996"
|
||||
height="10.583337"
|
||||
width="42.333332"
|
||||
id="rect3925-23"
|
||||
style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:0.5;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" />
|
||||
</g>
|
||||
<path
|
||||
style="fill:none;stroke:#000000;stroke-width:0.5;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
|
||||
d="m 60.854159,48.291665 15.875,-15.875"
|
||||
id="path4862"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:none;stroke:#000000;stroke-width:0.5;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
|
||||
d="m 103.18749,48.291666 15.875,-15.875001"
|
||||
id="path4862-1"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:none;stroke:#000000;stroke-width:0.5;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
|
||||
d="m 103.18749,90.624998 15.875,-15.875"
|
||||
id="path4862-2"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:none;stroke:#000000;stroke-width:0.5;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
|
||||
d="m 103.18749,80.041668 15.875,-15.875"
|
||||
id="path4862-9"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:none;stroke:#000000;stroke-width:0.5;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
|
||||
d="m 103.18749,69.458328 15.875,-15.875"
|
||||
id="path4862-3"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:none;stroke:#000000;stroke-width:0.5;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
|
||||
d="m 103.18749,58.874998 15.875,-15.874999"
|
||||
id="path4862-19"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:none;stroke:#000000;stroke-width:0.5;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
|
||||
d="m 60.854158,58.874998 15.875001,-15.875"
|
||||
id="path4862-4"
|
||||
inkscape:connector-curvature="0" />
|
||||
<text
|
||||
xml:space="preserve"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:10.58333302px;line-height:1.25;font-family:'URW Gothic L';-inkscape-font-specification:'URW Gothic L';letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332"
|
||||
x="90.045952"
|
||||
y="15.407738"
|
||||
id="text4926"><tspan
|
||||
sodipodi:role="line"
|
||||
x="90.926521"
|
||||
y="15.407738"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:6.3499999px;line-height:10.58333302px;font-family:'URW Gothic L';-inkscape-font-specification:'URW Gothic L';text-align:center;text-anchor:middle;stroke-width:0.26458332"
|
||||
id="tspan4928">Astaroth multi-GPU node </tspan><tspan
|
||||
sodipodi:role="line"
|
||||
x="90.045952"
|
||||
y="28.636904"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:6.3499999px;line-height:10.58333302px;font-family:'URW Gothic L';-inkscape-font-specification:'URW Gothic L';text-align:center;text-anchor:middle;stroke-width:0.26458332"
|
||||
id="tspan4930">domain decomposition</tspan></text>
|
||||
</g>
|
||||
<g
|
||||
inkscape:groupmode="layer"
|
||||
id="layer2"
|
||||
inkscape:label="Layer 2"
|
||||
transform="translate(-51.915483,-10.632538)">
|
||||
<g
|
||||
id="g4791-8"
|
||||
transform="translate(-2.6458431,-7.9375045)"
|
||||
style="fill:#ffffff;fill-opacity:1">
|
||||
<rect
|
||||
y="87.979164"
|
||||
x="63.5"
|
||||
height="10.583336"
|
||||
width="42.333332"
|
||||
id="rect3925-9"
|
||||
style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:0.5;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" />
|
||||
<rect
|
||||
y="77.395836"
|
||||
x="63.500004"
|
||||
height="10.583337"
|
||||
width="42.333332"
|
||||
id="rect3925-2-7"
|
||||
style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:0.5;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" />
|
||||
<rect
|
||||
y="66.8125"
|
||||
x="63.499996"
|
||||
height="10.583337"
|
||||
width="42.333332"
|
||||
id="rect3925-0-3"
|
||||
style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:0.5;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" />
|
||||
<rect
|
||||
y="56.229168"
|
||||
x="63.499996"
|
||||
height="10.583337"
|
||||
width="42.333332"
|
||||
id="rect3925-23-6"
|
||||
style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:0.5;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" />
|
||||
</g>
|
||||
<text
|
||||
xml:space="preserve"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:10.58333302px;line-height:1.25;font-family:'URW Gothic L';-inkscape-font-specification:'URW Gothic L';letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332"
|
||||
x="76.351189"
|
||||
y="55.28421"
|
||||
id="text4926-2"><tspan
|
||||
sodipodi:role="line"
|
||||
x="76.351189"
|
||||
y="55.28421"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:4.23333311px;font-family:'URW Gothic L';-inkscape-font-specification:'URW Gothic L';stroke-width:0.26458332"
|
||||
id="tspan4930-0">GPU 0</tspan></text>
|
||||
<text
|
||||
xml:space="preserve"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:10.58333302px;line-height:1.25;font-family:'URW Gothic L';-inkscape-font-specification:'URW Gothic L';letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332"
|
||||
x="75.973213"
|
||||
y="66.245522"
|
||||
id="text4926-2-9"><tspan
|
||||
sodipodi:role="line"
|
||||
x="75.973213"
|
||||
y="66.245522"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:4.23333311px;font-family:'URW Gothic L';-inkscape-font-specification:'URW Gothic L';stroke-width:0.26458332"
|
||||
id="tspan4930-0-9">GPU 1</tspan></text>
|
||||
<text
|
||||
xml:space="preserve"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:10.58333302px;line-height:1.25;font-family:'URW Gothic L';-inkscape-font-specification:'URW Gothic L';letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332"
|
||||
x="75.784225"
|
||||
y="76.072899"
|
||||
id="text4926-2-4"><tspan
|
||||
sodipodi:role="line"
|
||||
x="75.784225"
|
||||
y="76.072899"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:4.23333311px;font-family:'URW Gothic L';-inkscape-font-specification:'URW Gothic L';stroke-width:0.26458332"
|
||||
id="tspan4930-0-5">GPU 2</tspan></text>
|
||||
<text
|
||||
xml:space="preserve"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:10.58333302px;line-height:1.25;font-family:'URW Gothic L';-inkscape-font-specification:'URW Gothic L';letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332"
|
||||
x="75.973213"
|
||||
y="86.656235"
|
||||
id="text4926-2-1"><tspan
|
||||
sodipodi:role="line"
|
||||
x="75.973213"
|
||||
y="86.656235"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:4.23333311px;font-family:'URW Gothic L';-inkscape-font-specification:'URW Gothic L';stroke-width:0.26458332"
|
||||
id="tspan4930-0-0">GPU 3</tspan></text>
|
||||
<text
|
||||
xml:space="preserve"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:10.58333302px;line-height:1.25;font-family:'URW Gothic L';-inkscape-font-specification:'URW Gothic L';letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332"
|
||||
x="51.59375"
|
||||
y="70.970222"
|
||||
id="text4926-2-3"><tspan
|
||||
sodipodi:role="line"
|
||||
x="51.59375"
|
||||
y="70.970222"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:4.23333311px;font-family:'URW Gothic L';-inkscape-font-specification:'URW Gothic L';stroke-width:0.26458332"
|
||||
id="tspan4930-0-7">Nz</tspan></text>
|
||||
<text
|
||||
xml:space="preserve"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:10.58333302px;line-height:1.25;font-family:'URW Gothic L';-inkscape-font-specification:'URW Gothic L';letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332"
|
||||
x="79.563988"
|
||||
y="96.48362"
|
||||
id="text4926-2-3-8"><tspan
|
||||
sodipodi:role="line"
|
||||
x="79.563988"
|
||||
y="96.48362"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:4.23333311px;font-family:'URW Gothic L';-inkscape-font-specification:'URW Gothic L';stroke-width:0.26458332"
|
||||
id="tspan4930-0-7-8">Nx</tspan></text>
|
||||
<text
|
||||
xml:space="preserve"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:10.58333302px;line-height:1.25;font-family:'URW Gothic L';-inkscape-font-specification:'URW Gothic L';letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332"
|
||||
x="114.71577"
|
||||
y="87.223206"
|
||||
id="text4926-2-3-6"><tspan
|
||||
sodipodi:role="line"
|
||||
x="114.71577"
|
||||
y="87.223206"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:4.23333311px;font-family:'URW Gothic L';-inkscape-font-specification:'URW Gothic L';stroke-width:0.26458332"
|
||||
id="tspan4930-0-7-0">Ny</tspan></text>
|
||||
</g>
|
||||
</svg>
|
After Width: | Height: | Size: 19 KiB |
@@ -45,6 +45,8 @@ typedef struct {
|
||||
#endif // __CUDACC__
|
||||
|
||||
// Library flags
|
||||
#define STENCIL_ORDER (6)
|
||||
#define NGHOST (STENCIL_ORDER / 2)
|
||||
#define VERBOSE_PRINTING (1)
|
||||
|
||||
// Built-in types and parameters
|
||||
@@ -215,6 +217,9 @@ acVertexBufferIdx(const int i, const int j, const int k, const AcMeshInfo info)
|
||||
k * info.int_params[AC_mx] * info.int_params[AC_my];
|
||||
}
|
||||
|
||||
/** Prints all parameters inside AcMeshInfo */
|
||||
void acPrintMeshInfo(const AcMeshInfo config);
|
||||
|
||||
/*
|
||||
static inline int
|
||||
acGetParam(const AcMeshInfo info, const AcIntParam param)
|
||||
|
@@ -46,8 +46,24 @@ AcResult acDeviceSynchronizeStream(const Device device, const Stream stream);
|
||||
AcResult acDeviceSwapBuffers(const Device device);
|
||||
|
||||
/** */
|
||||
AcResult acDeviceLoadConstant(const Device device, const Stream stream, const AcRealParam param,
|
||||
const AcReal value);
|
||||
AcResult acDeviceLoadScalarConstant(const Device device, const Stream stream,
|
||||
const AcRealParam param, const AcReal value);
|
||||
|
||||
/** */
|
||||
AcResult acDeviceLoadVectorConstant(const Device device, const Stream stream,
|
||||
const AcReal3Param param, const AcReal3 value);
|
||||
|
||||
/** */
|
||||
AcResult acDeviceLoadIntConstant(const Device device, const Stream stream, const AcIntParam param,
|
||||
const int value);
|
||||
|
||||
/** */
|
||||
AcResult acDeviceLoadInt3Constant(const Device device, const Stream stream, const AcInt3Param param,
|
||||
const int3 value);
|
||||
|
||||
/** */
|
||||
AcResult acDeviceLoadMeshInfo(const Device device, const Stream stream,
|
||||
const AcMeshInfo device_config);
|
||||
|
||||
/** */
|
||||
AcResult acDeviceLoadVertexBufferWithOffset(const Device device, const Stream stream,
|
||||
|
@@ -8,17 +8,18 @@ fi
|
||||
|
||||
KERNEL_DIR=${AC_HOME}"/src/core/kernels"
|
||||
ACC_DIR=${AC_HOME}"/acc"
|
||||
ACC_DEFAULT_HEADER="mhd_solver/stencil_defines.h"
|
||||
ACC_DEFAULT_SAS="mhd_solver/stencil_assembly.sas"
|
||||
ACC_DEFAULT_SPS="mhd_solver/stencil_process.sps"
|
||||
ACC_DEFAULT_HEADER="mhd_solver/stencil_definition.sdh"
|
||||
ACC_DEFAULT_INCLUDE_DIR="mhd_solver"
|
||||
|
||||
${ACC_DIR}/clean.sh
|
||||
${ACC_DIR}/build_acc.sh
|
||||
|
||||
|
||||
ACC_HEADER=${ACC_DEFAULT_HEADER}
|
||||
ACC_SAS=${ACC_DEFAULT_SAS}
|
||||
ACC_SPS=${ACC_DEFAULT_SPS}
|
||||
ACC_HEADER=${ACC_DEFAULT_HEADER}
|
||||
ACC_INCLUDE_DIR=${ACC_DEFAULT_INCLUDE_DIR}
|
||||
|
||||
while [ "$#" -gt 0 ]
|
||||
do
|
||||
@@ -56,9 +57,17 @@ echo "Header file:" ${ACC_DIR}/${ACC_HEADER}
|
||||
echo "Assembly file: ${ACC_DIR}/${ACC_SAS}"
|
||||
echo "Process file: ${ACC_DIR}/${ACC_SPS}"
|
||||
|
||||
cd ${KERNEL_DIR}
|
||||
${ACC_DIR}/compile.sh ${ACC_DIR}/${ACC_SAS} ${ACC_DIR}/${ACC_HEADER}
|
||||
${ACC_DIR}/compile.sh ${ACC_DIR}/${ACC_SPS} ${ACC_DIR}/${ACC_HEADER}
|
||||
cd ${ACC_DIR}/${ACC_INCLUDE_DIR}
|
||||
echo ${PWD}
|
||||
${ACC_DIR}/compile.sh ${ACC_DIR}/${ACC_SAS}
|
||||
${ACC_DIR}/compile.sh ${ACC_DIR}/${ACC_SPS}
|
||||
${ACC_DIR}/compile.sh ${ACC_DIR}/${ACC_HEADER}
|
||||
|
||||
echo "Linking: " ${ACC_DIR}/${ACC_HEADER} " -> " ${AC_HOME}/include/stencil_defines.h
|
||||
ln -sf ${ACC_DIR}/${ACC_HEADER} ${AC_HOME}/include/stencil_defines.h
|
||||
echo "Moving stencil_assembly.cuh -> ${AC_HOME}/src/core/kernels"
|
||||
mv stencil_assembly.cuh ${AC_HOME}/src/core/kernels
|
||||
|
||||
echo "Moving stencil_process.cuh -> ${AC_HOME}/src/core/kernels"
|
||||
mv stencil_process.cuh ${AC_HOME}/src/core/kernels
|
||||
|
||||
echo "Moving stencil_defines.cuh -> ${AC_HOME}/include"
|
||||
mv stencil_defines.h ${AC_HOME}/include
|
||||
|
1
scripts/preprocess_device_files.sh
Executable file
1
scripts/preprocess_device_files.sh
Executable file
@@ -0,0 +1 @@
|
||||
nvcc -E ../src/core/device.cu -I ../include -I ../ > preprocessed_device_files.pp
|
@@ -36,6 +36,21 @@ const char* vtxbuf_names[] = {AC_FOR_VTXBUF_HANDLES(AC_GEN_STR)};
|
||||
static const int num_nodes = 1;
|
||||
static Node nodes[num_nodes];
|
||||
|
||||
void
|
||||
acPrintMeshInfo(const AcMeshInfo config)
|
||||
{
|
||||
for (int i = 0; i < NUM_INT_PARAMS; ++i)
|
||||
printf("[%s]: %d\n", intparam_names[i], config.int_params[i]);
|
||||
for (int i = 0; i < NUM_INT3_PARAMS; ++i)
|
||||
printf("[%s]: (%d, %d, %d)\n", int3param_names[i], config.int3_params[i].x,
|
||||
config.int3_params[i].y, config.int3_params[i].z);
|
||||
for (int i = 0; i < NUM_REAL_PARAMS; ++i)
|
||||
printf("[%s]: %g\n", realparam_names[i], double(config.real_params[i]));
|
||||
for (int i = 0; i < NUM_REAL3_PARAMS; ++i)
|
||||
printf("[%s]: (%g, %g, %g)\n", real3param_names[i], double(config.real3_params[i].x),
|
||||
double(config.real3_params[i].y), double(config.real3_params[i].z));
|
||||
}
|
||||
|
||||
AcResult
|
||||
acInit(const AcMeshInfo mesh_info)
|
||||
{
|
||||
|
@@ -39,35 +39,54 @@ typedef struct {
|
||||
AcReal* out[NUM_VTXBUF_HANDLES];
|
||||
} VertexBufferArray;
|
||||
|
||||
struct device_s {
|
||||
int id;
|
||||
AcMeshInfo local_config;
|
||||
|
||||
// Concurrency
|
||||
cudaStream_t streams[NUM_STREAM_TYPES];
|
||||
|
||||
// Memory
|
||||
VertexBufferArray vba;
|
||||
AcReal* reduce_scratchpad;
|
||||
AcReal* reduce_result;
|
||||
|
||||
#if PACKED_DATA_TRANSFERS
|
||||
// Declare memory for buffers needed for packed data transfers here
|
||||
// AcReal* data_packing_buffer;
|
||||
#endif
|
||||
};
|
||||
|
||||
__constant__ AcMeshInfo d_mesh_info;
|
||||
static inline int __device__
|
||||
static int __device__ __forceinline__
|
||||
DCONST(const AcIntParam param)
|
||||
{
|
||||
return d_mesh_info.int_params[param];
|
||||
}
|
||||
static inline int3 __device__
|
||||
static int3 __device__ __forceinline__
|
||||
DCONST(const AcInt3Param param)
|
||||
{
|
||||
return d_mesh_info.int3_params[param];
|
||||
}
|
||||
static inline AcReal __device__
|
||||
static AcReal __device__ __forceinline__
|
||||
DCONST(const AcRealParam param)
|
||||
{
|
||||
return d_mesh_info.real_params[param];
|
||||
}
|
||||
static inline AcReal3 __device__
|
||||
static AcReal3 __device__ __forceinline__
|
||||
DCONST(const AcReal3Param param)
|
||||
{
|
||||
return d_mesh_info.real3_params[param];
|
||||
}
|
||||
constexpr VertexBufferHandle
|
||||
DCONST(const VertexBufferHandle handle)
|
||||
{
|
||||
return handle;
|
||||
}
|
||||
#define DCONST_INT(x) DCONST(x)
|
||||
#define DCONST_INT3(x) DCONST(x)
|
||||
#define DCONST_REAL(x) DCONST(x)
|
||||
#define DCONST_REAL3(x) DCONST(x)
|
||||
//#define DCONST_INT(X) (d_mesh_info.int_params[X])
|
||||
//#define DCONST_INT3(X) (d_mesh_info.int3_params[X])
|
||||
//#define DCONST_REAL(X) (d_mesh_info.real_params[X])
|
||||
//#define DCONST_REAL3(X) (d_mesh_info.real3_params[X])
|
||||
#define DEVICE_VTXBUF_IDX(i, j, k) ((i) + (j)*DCONST_INT(AC_mx) + (k)*DCONST_INT(AC_mxy))
|
||||
#define DEVICE_1D_COMPDOMAIN_IDX(i, j, k) ((i) + (j)*DCONST_INT(AC_nx) + (k)*DCONST_INT(AC_nxy))
|
||||
#define globalGridN (d_mesh_info.int3_params[AC_global_grid_n])
|
||||
@@ -88,26 +107,8 @@ static dim3 rk3_tpb(32, 1, 4);
|
||||
// #include "kernels/pack_unpack.cuh"
|
||||
#endif
|
||||
|
||||
struct device_s {
|
||||
int id;
|
||||
AcMeshInfo local_config;
|
||||
|
||||
// Concurrency
|
||||
cudaStream_t streams[NUM_STREAM_TYPES];
|
||||
|
||||
// Memory
|
||||
VertexBufferArray vba;
|
||||
AcReal* reduce_scratchpad;
|
||||
AcReal* reduce_result;
|
||||
|
||||
#if PACKED_DATA_TRANSFERS
|
||||
// Declare memory for buffers needed for packed data transfers here
|
||||
// AcReal* data_packing_buffer;
|
||||
#endif
|
||||
};
|
||||
|
||||
// clang-format off
|
||||
static __global__ void dummy_kernel(void) {}
|
||||
static __global__ void dummy_kernel(void) { DCONST((AcIntParam)0); DCONST((AcInt3Param)0); DCONST((AcRealParam)0); DCONST((AcReal3Param)0); }
|
||||
// clang-format on
|
||||
|
||||
AcResult
|
||||
@@ -153,8 +154,7 @@ acDeviceCreate(const int id, const AcMeshInfo device_config, Device* device_hand
|
||||
#endif
|
||||
|
||||
// Device constants
|
||||
ERRCHK_CUDA_ALWAYS(cudaMemcpyToSymbol(d_mesh_info, &device_config, sizeof(device_config), 0,
|
||||
cudaMemcpyHostToDevice));
|
||||
acDeviceLoadMeshInfo(device, STREAM_DEFAULT, device_config);
|
||||
|
||||
printf("Created device %d (%p)\n", device->id, device);
|
||||
*device_handle = device;
|
||||
@@ -303,8 +303,9 @@ acDeviceAutoOptimize(const Device device)
|
||||
|
||||
cudaEventRecord(tstart); // ---------------------------------------- Timing start
|
||||
|
||||
acDeviceLoadScalarConstant(device, STREAM_DEFAULT, AC_dt, FLT_EPSILON);
|
||||
for (int i = 0; i < num_iterations; ++i)
|
||||
solve<2><<<bpg, tpb>>>(start, end, device->vba, FLT_EPSILON);
|
||||
solve<2><<<bpg, tpb>>>(start, end, device->vba);
|
||||
|
||||
cudaEventRecord(tstop); // ----------------------------------------- Timing end
|
||||
cudaEventSynchronize(tstop);
|
||||
@@ -361,8 +362,8 @@ acDeviceSwapBuffers(const Device device)
|
||||
}
|
||||
|
||||
AcResult
|
||||
acDeviceLoadConstant(const Device device, const Stream stream, const AcRealParam param,
|
||||
const AcReal value)
|
||||
acDeviceLoadScalarConstant(const Device device, const Stream stream, const AcRealParam param,
|
||||
const AcReal value)
|
||||
{
|
||||
cudaSetDevice(device->id);
|
||||
const size_t offset = (size_t)&d_mesh_info.real_params[param] - (size_t)&d_mesh_info;
|
||||
@@ -371,6 +372,55 @@ acDeviceLoadConstant(const Device device, const Stream stream, const AcRealParam
|
||||
return AC_SUCCESS;
|
||||
}
|
||||
|
||||
AcResult
|
||||
acDeviceLoadVectorConstant(const Device device, const Stream stream, const AcReal3Param param,
|
||||
const AcReal3 value)
|
||||
{
|
||||
cudaSetDevice(device->id);
|
||||
const size_t offset = (size_t)&d_mesh_info.real3_params[param] - (size_t)&d_mesh_info;
|
||||
ERRCHK_CUDA(cudaMemcpyToSymbolAsync(d_mesh_info, &value, sizeof(value), offset,
|
||||
cudaMemcpyHostToDevice, device->streams[stream]));
|
||||
return AC_SUCCESS;
|
||||
}
|
||||
|
||||
AcResult
|
||||
acDeviceLoadIntConstant(const Device device, const Stream stream, const AcIntParam param,
|
||||
const int value)
|
||||
{
|
||||
cudaSetDevice(device->id);
|
||||
const size_t offset = (size_t)&d_mesh_info.int_params[param] - (size_t)&d_mesh_info;
|
||||
ERRCHK_CUDA(cudaMemcpyToSymbolAsync(d_mesh_info, &value, sizeof(value), offset,
|
||||
cudaMemcpyHostToDevice, device->streams[stream]));
|
||||
return AC_SUCCESS;
|
||||
}
|
||||
|
||||
AcResult
|
||||
acDeviceLoadInt3Constant(const Device device, const Stream stream, const AcInt3Param param,
|
||||
const int3 value)
|
||||
{
|
||||
cudaSetDevice(device->id);
|
||||
const size_t offset = (size_t)&d_mesh_info.int3_params[param] - (size_t)&d_mesh_info;
|
||||
ERRCHK_CUDA(cudaMemcpyToSymbolAsync(d_mesh_info, &value, sizeof(value), offset,
|
||||
cudaMemcpyHostToDevice, device->streams[stream]));
|
||||
return AC_SUCCESS;
|
||||
}
|
||||
|
||||
AcResult
|
||||
acDeviceLoadMeshInfo(const Device device, const Stream stream, const AcMeshInfo device_config)
|
||||
{
|
||||
cudaSetDevice(device->id);
|
||||
|
||||
ERRCHK_ALWAYS(device_config.int_params[AC_nx] == device->local_config.int_params[AC_nx]);
|
||||
ERRCHK_ALWAYS(device_config.int_params[AC_ny] == device->local_config.int_params[AC_ny]);
|
||||
ERRCHK_ALWAYS(device_config.int_params[AC_nz] == device->local_config.int_params[AC_nz]);
|
||||
ERRCHK_ALWAYS(device_config.int_params[AC_multigpu_offset] ==
|
||||
device->local_config.int_params[AC_multigpu_offset]);
|
||||
|
||||
ERRCHK_CUDA_ALWAYS(cudaMemcpyToSymbolAsync(d_mesh_info, &device_config, sizeof(device_config),
|
||||
0, cudaMemcpyHostToDevice, device->streams[stream]));
|
||||
return AC_SUCCESS;
|
||||
}
|
||||
|
||||
AcResult
|
||||
acDeviceLoadVertexBufferWithOffset(const Device device, const Stream stream, const AcMesh host_mesh,
|
||||
const VertexBufferHandle vtxbuf_handle, const int3 src,
|
||||
@@ -551,12 +601,13 @@ acDeviceIntegrateSubstep(const Device device, const Stream stream, const int ste
|
||||
(unsigned int)ceil(n.y / AcReal(tpb.y)), //
|
||||
(unsigned int)ceil(n.z / AcReal(tpb.z)));
|
||||
|
||||
acDeviceLoadScalarConstant(device, stream, AC_dt, dt);
|
||||
if (step_number == 0)
|
||||
solve<0><<<bpg, tpb, 0, device->streams[stream]>>>(start, end, device->vba, dt);
|
||||
solve<0><<<bpg, tpb, 0, device->streams[stream]>>>(start, end, device->vba);
|
||||
else if (step_number == 1)
|
||||
solve<1><<<bpg, tpb, 0, device->streams[stream]>>>(start, end, device->vba, dt);
|
||||
solve<1><<<bpg, tpb, 0, device->streams[stream]>>>(start, end, device->vba);
|
||||
else
|
||||
solve<2><<<bpg, tpb, 0, device->streams[stream]>>>(start, end, device->vba, dt);
|
||||
solve<2><<<bpg, tpb, 0, device->streams[stream]>>>(start, end, device->vba);
|
||||
|
||||
ERRCHK_CUDA_KERNEL();
|
||||
|
||||
|
@@ -26,6 +26,8 @@
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include "src/core/math_utils.h"
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
static __device__ __forceinline__ int
|
||||
@@ -321,65 +323,6 @@ read_data(const int i, const int j, const int k,
|
||||
* =============================================================================
|
||||
*/
|
||||
|
||||
static __host__ __device__ __forceinline__ AcReal3
|
||||
operator-(const AcReal3& a, const AcReal3& b)
|
||||
{
|
||||
return (AcReal3){a.x - b.x, a.y - b.y, a.z - b.z};
|
||||
}
|
||||
|
||||
static __host__ __device__ __forceinline__ AcReal3
|
||||
operator+(const AcReal3& a, const AcReal3& b)
|
||||
{
|
||||
return (AcReal3){a.x + b.x, a.y + b.y, a.z + b.z};
|
||||
}
|
||||
|
||||
static __host__ __device__ __forceinline__ AcReal3
|
||||
operator-(const AcReal3& a)
|
||||
{
|
||||
return (AcReal3){-a.x, -a.y, -a.z};
|
||||
}
|
||||
|
||||
static __host__ __device__ __forceinline__ AcReal3 operator*(const AcReal a, const AcReal3& b)
|
||||
{
|
||||
return (AcReal3){a * b.x, a * b.y, a * b.z};
|
||||
}
|
||||
|
||||
static __host__ __device__ __forceinline__ AcReal
|
||||
dot(const AcReal3& a, const AcReal3& b)
|
||||
{
|
||||
return a.x * b.x + a.y * b.y + a.z * b.z;
|
||||
}
|
||||
|
||||
static __host__ __device__ __forceinline__ AcReal3
|
||||
mul(const AcMatrix& aa, const AcReal3& x)
|
||||
{
|
||||
return (AcReal3){dot(aa.row[0], x), dot(aa.row[1], x), dot(aa.row[2], x)};
|
||||
}
|
||||
|
||||
static __host__ __device__ __forceinline__ AcReal3
|
||||
cross(const AcReal3& a, const AcReal3& b)
|
||||
{
|
||||
AcReal3 c;
|
||||
|
||||
c.x = a.y * b.z - a.z * b.y;
|
||||
c.y = a.z * b.x - a.x * b.z;
|
||||
c.z = a.x * b.y - a.y * b.x;
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
static __host__ __device__ __forceinline__ bool
|
||||
is_valid(const AcReal& a)
|
||||
{
|
||||
return !isnan(a) && !isinf(a);
|
||||
}
|
||||
|
||||
static __host__ __device__ __forceinline__ bool
|
||||
is_valid(const AcReal3& a)
|
||||
{
|
||||
return is_valid(a.x) && is_valid(a.y) && is_valid(a.z);
|
||||
}
|
||||
|
||||
/*
|
||||
* =============================================================================
|
||||
* Level 1 (Stencil Processing Stage)
|
||||
@@ -642,4 +585,54 @@ read_out(const int idx, AcReal* __restrict__ field[], const int3 handle)
|
||||
\
|
||||
const int idx = IDX(vertexIdx.x, vertexIdx.y, vertexIdx.z);
|
||||
|
||||
// clang-format off
|
||||
#define GEN_DEVICE_FUNC_HOOK(identifier) \
|
||||
template <int step_number> \
|
||||
AcResult acDeviceKernel_##identifier(const Device device, const Stream stream, \
|
||||
const int3 start, const int3 end) \
|
||||
{ \
|
||||
cudaSetDevice(device->id); \
|
||||
\
|
||||
const dim3 tpb(32, 1, 4); \
|
||||
\
|
||||
const int3 n = end - start; \
|
||||
const dim3 bpg((unsigned int)ceil(n.x / AcReal(tpb.x)), \
|
||||
(unsigned int)ceil(n.y / AcReal(tpb.y)), \
|
||||
(unsigned int)ceil(n.z / AcReal(tpb.z))); \
|
||||
\
|
||||
identifier<step_number> \
|
||||
<<<bpg, tpb, 0, device->streams[stream]>>>(start, end, device->vba); \
|
||||
ERRCHK_CUDA_KERNEL(); \
|
||||
\
|
||||
return AC_SUCCESS; \
|
||||
}
|
||||
|
||||
/*
|
||||
#define GEN_NODE_FUNC_HOOK(identifier) \
|
||||
template <int step_number> \
|
||||
AcResult acNodeKernel_##identifier(const Node node, const Stream stream, const int3 start, \
|
||||
const int3 end) \
|
||||
{ \
|
||||
acNodeSynchronizeStream(node, stream); \
|
||||
\
|
||||
for (int i = 0; i < node->num_devices; ++i) { \
|
||||
\
|
||||
const int3 d0 = (int3){NGHOST, NGHOST, NGHOST + i * node->subgrid.n.z}; \
|
||||
const int3 d1 = d0 + (int3){node->subgrid.n.x, node->subgrid.n.y, node->subgrid.n.z}; \
|
||||
\
|
||||
const int3 da = max(start, d0); \
|
||||
const int3 db = min(end, d1); \
|
||||
\
|
||||
if (db.z >= da.z) { \
|
||||
const int3 da_local = da - (int3){0, 0, i * node->subgrid.n.z}; \
|
||||
const int3 db_local = db - (int3){0, 0, i * node->subgrid.n.z}; \
|
||||
acDeviceKernel_ #identifier(node->devices[i], stream, isubstep, da_local, \
|
||||
db_local, dt); \
|
||||
} \
|
||||
} \
|
||||
return AC_SUCCESS; \
|
||||
}
|
||||
*/
|
||||
// clang-format on
|
||||
|
||||
#include "stencil_process.cuh"
|
||||
|
@@ -25,10 +25,10 @@
|
||||
*
|
||||
*/
|
||||
#pragma once
|
||||
#include <cmath>
|
||||
using namespace std; // Potentially bad practice to declare namespace std here
|
||||
// #include <math.h> // isnan, isinf // Overloads incorrect/bugged with GCC <= 6.0
|
||||
// #include <tgmath.h> // Even this does not work
|
||||
//#include <cmath>
|
||||
// using namespace std; // Potentially bad practice to declare namespace std here
|
||||
#include <math.h> // isnan, isinf // Overloads incorrect/bugged with GCC <= 6.0
|
||||
//#include <tgmath.h> // Even this does not work
|
||||
#include <stdlib.h> // rand
|
||||
|
||||
template <class T>
|
||||
@@ -64,16 +64,6 @@ sum(const T& a, const T& b)
|
||||
return a + b;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
static inline const T
|
||||
is_valid(const T& val)
|
||||
{
|
||||
if (isnan(val) || isinf(val))
|
||||
return false;
|
||||
else
|
||||
return true;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
static inline const T
|
||||
clamp(const T& val, const T& min, const T& max)
|
||||
@@ -87,20 +77,85 @@ randr()
|
||||
return AcReal(rand()) / AcReal(RAND_MAX);
|
||||
}
|
||||
|
||||
static inline int3
|
||||
operator+(const int3& a, const int3& b)
|
||||
{
|
||||
return (int3){a.x + b.x, a.y + b.y, a.z + b.z};
|
||||
}
|
||||
|
||||
static inline int3
|
||||
operator-(const int3& a, const int3& b)
|
||||
{
|
||||
return (int3){a.x - b.x, a.y - b.y, a.z - b.z};
|
||||
}
|
||||
|
||||
static inline bool
|
||||
is_power_of_two(const unsigned val)
|
||||
{
|
||||
return val && !(val & (val - 1));
|
||||
}
|
||||
|
||||
#ifdef __CUDACC__
|
||||
#define HOST_DEVICE_INLINE __host__ __device__ __forceinline__
|
||||
#else
|
||||
#define HOST_DEVICE_INLINE inline
|
||||
#endif // __CUDACC__
|
||||
|
||||
static HOST_DEVICE_INLINE AcReal3
|
||||
operator+(const AcReal3& a, const AcReal3& b)
|
||||
{
|
||||
return (AcReal3){a.x + b.x, a.y + b.y, a.z + b.z};
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE int3
|
||||
operator+(const int3& a, const int3& b)
|
||||
{
|
||||
return (int3){a.x + b.x, a.y + b.y, a.z + b.z};
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE AcReal3
|
||||
operator-(const AcReal3& a, const AcReal3& b)
|
||||
{
|
||||
return (AcReal3){a.x - b.x, a.y - b.y, a.z - b.z};
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE int3
|
||||
operator-(const int3& a, const int3& b)
|
||||
{
|
||||
return (int3){a.x - b.x, a.y - b.y, a.z - b.z};
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE AcReal3
|
||||
operator-(const AcReal3& a)
|
||||
{
|
||||
return (AcReal3){-a.x, -a.y, -a.z};
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE AcReal3 operator*(const AcReal& a, const AcReal3& b)
|
||||
{
|
||||
return (AcReal3){a * b.x, a * b.y, a * b.z};
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE AcReal
|
||||
dot(const AcReal3& a, const AcReal3& b)
|
||||
{
|
||||
return a.x * b.x + a.y * b.y + a.z * b.z;
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE AcReal3
|
||||
mul(const AcMatrix& aa, const AcReal3& x)
|
||||
{
|
||||
return (AcReal3){dot(aa.row[0], x), dot(aa.row[1], x), dot(aa.row[2], x)};
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE AcReal3
|
||||
cross(const AcReal3& a, const AcReal3& b)
|
||||
{
|
||||
AcReal3 c;
|
||||
|
||||
c.x = a.y * b.z - a.z * b.y;
|
||||
c.y = a.z * b.x - a.x * b.z;
|
||||
c.z = a.x * b.y - a.y * b.x;
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE bool
|
||||
is_valid(const AcReal a)
|
||||
{
|
||||
return !isnan(a) && !isinf(a);
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE bool
|
||||
is_valid(const AcReal3& a)
|
||||
{
|
||||
return is_valid(a.x) && is_valid(a.y) && is_valid(a.z);
|
||||
}
|
||||
|
@@ -429,7 +429,7 @@ acNodeLoadConstant(const Node node, const Stream stream, const AcRealParam param
|
||||
acNodeSynchronizeStream(node, stream);
|
||||
// #pragma omp parallel for
|
||||
for (int i = 0; i < node->num_devices; ++i) {
|
||||
acDeviceLoadConstant(node->devices[i], stream, param, value);
|
||||
acDeviceLoadScalarConstant(node->devices[i], stream, param, value);
|
||||
}
|
||||
return AC_SUCCESS;
|
||||
}
|
||||
|
@@ -8,5 +8,5 @@ set(CMAKE_C_STANDARD_REQUIRED ON)
|
||||
find_package(MPI REQUIRED)
|
||||
|
||||
add_executable(mpitest main.c)
|
||||
target_include_directories(mpitest PRIVATE ${MPI_C_INCLUDE_PATH})
|
||||
target_link_libraries(mpitest PRIVATE ${MPI_C_LIBRARIES} astaroth_core)
|
||||
target_include_directories(mpitest PRIVATE ${CMAKE_SOURCE_DIR}/src/standalone ${MPI_C_INCLUDE_PATH})
|
||||
target_link_libraries(mpitest astaroth_core astaroth_standalone ${MPI_C_LIBRARIES})
|
||||
|
@@ -16,13 +16,120 @@
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
/**
|
||||
Running: mpirun -np <num processes> <executable>
|
||||
*/
|
||||
#undef NDEBUG // Assert always
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "astaroth.h"
|
||||
#include "autotest.h"
|
||||
|
||||
#include <mpi.h>
|
||||
|
||||
// From Astaroth Standalone
|
||||
#include "config_loader.h"
|
||||
#include "model/host_memory.h"
|
||||
|
||||
static void
|
||||
distribute_mesh(const AcMesh* src, AcMesh* dst)
|
||||
{
|
||||
MPI_Datatype datatype = MPI_FLOAT;
|
||||
if (sizeof(AcReal) == 8)
|
||||
datatype = MPI_DOUBLE;
|
||||
|
||||
int process_id, num_processes;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &process_id);
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &num_processes);
|
||||
|
||||
const size_t count = acVertexBufferSize(dst->info);
|
||||
for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) {
|
||||
|
||||
// Communicate to self
|
||||
if (process_id == 0) {
|
||||
assert(src);
|
||||
assert(dst);
|
||||
memcpy(&dst->vertex_buffer[i][0], //
|
||||
&src->vertex_buffer[i][0], //
|
||||
count * sizeof(src->vertex_buffer[i][0]));
|
||||
}
|
||||
// Communicate to others
|
||||
for (int j = 1; j < num_processes; ++j) {
|
||||
if (process_id == 0) {
|
||||
assert(src);
|
||||
|
||||
// Send
|
||||
// TODO RECHECK THESE j INDICES
|
||||
const size_t src_idx = j * dst->info.int_params[AC_mx] *
|
||||
dst->info.int_params[AC_my] * src->info.int_params[AC_nz] /
|
||||
num_processes;
|
||||
|
||||
MPI_Send(&src->vertex_buffer[i][src_idx], count, datatype, j, 0, MPI_COMM_WORLD);
|
||||
}
|
||||
else {
|
||||
assert(dst);
|
||||
|
||||
// Recv
|
||||
const size_t dst_idx = 0;
|
||||
MPI_Status status;
|
||||
MPI_Recv(&dst->vertex_buffer[i][dst_idx], count, datatype, 0, 0, MPI_COMM_WORLD,
|
||||
&status);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
gather_mesh(const AcMesh* src, AcMesh* dst)
|
||||
{
|
||||
MPI_Datatype datatype = MPI_FLOAT;
|
||||
if (sizeof(AcReal) == 8)
|
||||
datatype = MPI_DOUBLE;
|
||||
|
||||
int process_id, num_processes;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &process_id);
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &num_processes);
|
||||
|
||||
size_t count = acVertexBufferSize(src->info);
|
||||
|
||||
for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) {
|
||||
// Communicate to self
|
||||
if (process_id == 0) {
|
||||
assert(src);
|
||||
assert(dst);
|
||||
memcpy(&dst->vertex_buffer[i][0], //
|
||||
&src->vertex_buffer[i][0], //
|
||||
count * sizeof(AcReal));
|
||||
}
|
||||
|
||||
// Communicate to others
|
||||
for (int j = 1; j < num_processes; ++j) {
|
||||
if (process_id == 0) {
|
||||
// Recv
|
||||
// const size_t dst_idx = j * acVertexBufferCompdomainSize(dst->info);
|
||||
const size_t dst_idx = j * dst->info.int_params[AC_mx] *
|
||||
dst->info.int_params[AC_my] * dst->info.int_params[AC_nz] /
|
||||
num_processes;
|
||||
|
||||
assert(dst_idx + count <= acVertexBufferSize(dst->info));
|
||||
MPI_Status status;
|
||||
MPI_Recv(&dst->vertex_buffer[i][dst_idx], count, datatype, j, 0, MPI_COMM_WORLD,
|
||||
&status);
|
||||
}
|
||||
else {
|
||||
// Send
|
||||
const size_t src_idx = 0;
|
||||
|
||||
assert(src_idx + count <= acVertexBufferSize(src->info));
|
||||
MPI_Send(&src->vertex_buffer[i][src_idx], count, datatype, 0, 0, MPI_COMM_WORLD);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
main(void)
|
||||
{
|
||||
@@ -37,14 +144,39 @@ main(void)
|
||||
MPI_Get_processor_name(processor_name, &name_len);
|
||||
printf("Processor %s. Process %d of %d.\n", processor_name, process_id, num_processes);
|
||||
|
||||
AcMeshInfo info = {
|
||||
.int_params[AC_nx] = 128,
|
||||
.int_params[AC_ny] = 64,
|
||||
.int_params[AC_nz] = 32,
|
||||
};
|
||||
acInit(info);
|
||||
acIntegrate(0.1f);
|
||||
acQuit();
|
||||
AcMeshInfo mesh_info;
|
||||
load_config(&mesh_info);
|
||||
update_config(&mesh_info);
|
||||
|
||||
AcMesh* main_mesh = NULL;
|
||||
ModelMesh* model_mesh = NULL;
|
||||
if (process_id == 0) {
|
||||
main_mesh = acmesh_create(mesh_info);
|
||||
acmesh_init_to(INIT_TYPE_RANDOM, main_mesh);
|
||||
model_mesh = modelmesh_create(mesh_info);
|
||||
acmesh_to_modelmesh(*main_mesh, model_mesh);
|
||||
}
|
||||
|
||||
AcMeshInfo submesh_info = mesh_info;
|
||||
submesh_info.int_params[AC_nz] /= num_processes;
|
||||
update_config(&submesh_info);
|
||||
|
||||
AcMesh* submesh = acmesh_create(submesh_info);
|
||||
|
||||
/////////////////////
|
||||
distribute_mesh(main_mesh, submesh);
|
||||
gather_mesh(submesh, main_mesh);
|
||||
/////////////////////////
|
||||
// Autotest
|
||||
bool is_acceptable = verify_meshes(*model_mesh, *main_mesh);
|
||||
/////
|
||||
|
||||
acmesh_destroy(submesh);
|
||||
|
||||
if (process_id == 0) {
|
||||
modelmesh_destroy(model_mesh);
|
||||
acmesh_destroy(main_mesh);
|
||||
}
|
||||
|
||||
MPI_Finalize();
|
||||
return EXIT_SUCCESS;
|
||||
|
@@ -25,10 +25,11 @@ add_compile_options(-pipe ${OpenMP_CXX_FLAGS})
|
||||
add_compile_options(-Wall -Wextra -Werror -Wdouble-promotion -Wfloat-conversion)# -Wshadow)
|
||||
|
||||
## Compile and link
|
||||
add_library(astaroth_standalone ${SOURCES})
|
||||
add_library(astaroth_standalone STATIC ${SOURCES})
|
||||
target_link_libraries(astaroth_standalone PRIVATE astaroth_core "${OpenMP_CXX_FLAGS}" ${SDL2_LIBRARY})
|
||||
|
||||
add_executable(ac_run main.cc)
|
||||
target_link_libraries(ac_run PRIVATE astaroth_standalone astaroth_core "${OpenMP_CXX_FLAGS}" ${SDL2_LIBRARY})
|
||||
target_link_libraries(ac_run PRIVATE astaroth_standalone)
|
||||
|
||||
# Define the config directory
|
||||
if (ALTER_CONF)
|
||||
|
@@ -75,6 +75,12 @@ static const InitType test_cases[] = {INIT_TYPE_RANDOM, INIT_TYPE_XWAVE,
|
||||
INIT_TYPE_GAUSSIAN_RADIAL_EXPL, INIT_TYPE_ABC_FLOW};
|
||||
// #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
|
||||
|
||||
static inline bool
|
||||
is_valid(const ModelScalar a)
|
||||
{
|
||||
return !isnan(a) && !isinf(a);
|
||||
}
|
||||
|
||||
#if TEST_TYPE == \
|
||||
QUICK_TEST // REGULAR TEST START HERE
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
@@ -34,15 +34,6 @@
|
||||
#include "src/core/errchk.h"
|
||||
#include "src/core/math_utils.h"
|
||||
|
||||
static inline void
|
||||
print(const AcMeshInfo& config)
|
||||
{
|
||||
for (int i = 0; i < NUM_INT_PARAMS; ++i)
|
||||
printf("[%s]: %d\n", intparam_names[i], config.int_params[i]);
|
||||
for (int i = 0; i < NUM_REAL_PARAMS; ++i)
|
||||
printf("[%s]: %g\n", realparam_names[i], double(config.real_params[i]));
|
||||
}
|
||||
|
||||
/**
|
||||
\brief Find the index of the keyword in names
|
||||
\return Index in range 0...n if the keyword is in names. -1 if the keyword was
|
||||
@@ -163,7 +154,7 @@ update_config(AcMeshInfo* config)
|
||||
#if VERBOSE_PRINTING // Defined in astaroth.h
|
||||
printf("###############################################################\n");
|
||||
printf("Config dimensions recalculated:\n");
|
||||
print(*config);
|
||||
acPrintMeshInfo(*config);
|
||||
printf("###############################################################\n");
|
||||
#endif
|
||||
}
|
||||
|
@@ -26,7 +26,9 @@
|
||||
*/
|
||||
#include "host_forcing.h"
|
||||
|
||||
#include "src/core/math_utils.h"
|
||||
// #include "src/core/math_utils.h"
|
||||
#include <cmath>
|
||||
using namespace std;
|
||||
|
||||
// The is a wrapper for genering random numbers with a chosen system.
|
||||
AcReal
|
||||
@@ -36,7 +38,7 @@ get_random_number_01()
|
||||
return AcReal(rand()) / AcReal(RAND_MAX);
|
||||
}
|
||||
|
||||
AcReal3
|
||||
static AcReal3
|
||||
cross(const AcReal3& a, const AcReal3& b)
|
||||
{
|
||||
AcReal3 c;
|
||||
@@ -48,13 +50,13 @@ cross(const AcReal3& a, const AcReal3& b)
|
||||
return c;
|
||||
}
|
||||
|
||||
AcReal
|
||||
static AcReal
|
||||
dot(const AcReal3& a, const AcReal3& b)
|
||||
{
|
||||
return a.x * b.x + a.y * b.y + a.z * b.z;
|
||||
}
|
||||
|
||||
AcReal3
|
||||
static AcReal3
|
||||
vec_norm(const AcReal3& a)
|
||||
{
|
||||
AcReal3 c;
|
||||
@@ -67,7 +69,7 @@ vec_norm(const AcReal3& a)
|
||||
return c;
|
||||
}
|
||||
|
||||
AcReal3
|
||||
static AcReal3
|
||||
vec_multi_scal(const AcReal scal, const AcReal3& a)
|
||||
{
|
||||
AcReal3 c;
|
||||
|
@@ -32,14 +32,6 @@
|
||||
|
||||
AcReal get_random_number_01();
|
||||
|
||||
AcReal3 cross(const AcReal3& a, const AcReal3& b);
|
||||
|
||||
AcReal dot(const AcReal3& a, const AcReal3& b);
|
||||
|
||||
AcReal3 vec_norm(const AcReal3& a);
|
||||
|
||||
AcReal3 vec_multi_scal(const AcReal scal, const AcReal3& a);
|
||||
|
||||
AcReal3 helical_forcing_k_generator(const AcReal kmax, const AcReal kmin);
|
||||
|
||||
void helical_forcing_e_generator(AcReal3* e_force, const AcReal3 k_force);
|
||||
|
@@ -31,6 +31,16 @@
|
||||
#include "host_memory.h"
|
||||
#include "model_boundconds.h"
|
||||
|
||||
// Standalone flags
|
||||
#define LDENSITY (1)
|
||||
#define LHYDRO (1)
|
||||
#define LMAGNETIC (1)
|
||||
#define LENTROPY (1)
|
||||
#define LTEMPERATURE (0)
|
||||
#define LFORCING (1)
|
||||
#define LUPWD (1)
|
||||
#define AC_THERMAL_CONDUCTIVITY (AcReal(0.001)) // TODO: make an actual config parameter
|
||||
|
||||
typedef struct {
|
||||
ModelScalar x, y, z;
|
||||
} ModelVector;
|
||||
|
@@ -27,6 +27,7 @@
|
||||
#pragma once
|
||||
#include "astaroth.h"
|
||||
|
||||
#include "math.h"
|
||||
|
||||
typedef long double ModelScalar;
|
||||
|
||||
|
Reference in New Issue
Block a user