Merge branch 'master' into sink_20190723
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -24,3 +24,4 @@ makefile~
|
|||||||
build/
|
build/
|
||||||
src/core/kernels/stencil_assembly.cuh
|
src/core/kernels/stencil_assembly.cuh
|
||||||
src/core/kernels/stencil_process.cuh
|
src/core/kernels/stencil_process.cuh
|
||||||
|
tests/
|
||||||
|
6
3rdparty/.gitignore
vendored
Normal file
6
3rdparty/.gitignore
vendored
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
# Ignore everything
|
||||||
|
*
|
||||||
|
|
||||||
|
# Except:
|
||||||
|
!.gitignore
|
||||||
|
!setup_dependencies.sh
|
163
acc/hydro_solver/stencil_defines.h
Normal file
163
acc/hydro_solver/stencil_defines.h
Normal file
@@ -0,0 +1,163 @@
|
|||||||
|
/*
|
||||||
|
Copyright (C) 2014-2019, Johannes Pekkilae, Miikka Vaeisalae.
|
||||||
|
|
||||||
|
This file is part of Astaroth.
|
||||||
|
|
||||||
|
Astaroth is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation, either version 3 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
Astaroth is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
/*
|
||||||
|
* =============================================================================
|
||||||
|
* Logical switches
|
||||||
|
* =============================================================================
|
||||||
|
*/
|
||||||
|
#define STENCIL_ORDER (6)
|
||||||
|
#define NGHOST (STENCIL_ORDER / 2)
|
||||||
|
#define LDENSITY (1)
|
||||||
|
#define LHYDRO (1)
|
||||||
|
#define LMAGNETIC (0)
|
||||||
|
#define LENTROPY (0)
|
||||||
|
#define LTEMPERATURE (0)
|
||||||
|
#define LFORCING (0)
|
||||||
|
#define LUPWD (0)
|
||||||
|
|
||||||
|
#define AC_THERMAL_CONDUCTIVITY (AcReal(0.001)) // TODO: make an actual config parameter
|
||||||
|
|
||||||
|
/*
|
||||||
|
* =============================================================================
|
||||||
|
* User-defined parameters
|
||||||
|
* =============================================================================
|
||||||
|
*/
|
||||||
|
// clang-format off
|
||||||
|
#define AC_FOR_USER_INT_PARAM_TYPES(FUNC)\
|
||||||
|
/* Other */\
|
||||||
|
FUNC(AC_max_steps), \
|
||||||
|
FUNC(AC_save_steps), \
|
||||||
|
FUNC(AC_bin_steps), \
|
||||||
|
FUNC(AC_bc_type),
|
||||||
|
|
||||||
|
#define AC_FOR_USER_INT3_PARAM_TYPES(FUNC)
|
||||||
|
|
||||||
|
#define AC_FOR_USER_REAL_PARAM_TYPES(FUNC)\
|
||||||
|
/* cparams */\
|
||||||
|
FUNC(AC_dsx), \
|
||||||
|
FUNC(AC_dsy), \
|
||||||
|
FUNC(AC_dsz), \
|
||||||
|
FUNC(AC_dsmin), \
|
||||||
|
/* physical grid*/\
|
||||||
|
FUNC(AC_xlen), \
|
||||||
|
FUNC(AC_ylen), \
|
||||||
|
FUNC(AC_zlen), \
|
||||||
|
FUNC(AC_xorig), \
|
||||||
|
FUNC(AC_yorig), \
|
||||||
|
FUNC(AC_zorig), \
|
||||||
|
/*Physical units*/\
|
||||||
|
FUNC(AC_unit_density),\
|
||||||
|
FUNC(AC_unit_velocity),\
|
||||||
|
FUNC(AC_unit_length),\
|
||||||
|
/* properties of gravitating star*/\
|
||||||
|
FUNC(AC_star_pos_x),\
|
||||||
|
FUNC(AC_star_pos_y),\
|
||||||
|
FUNC(AC_star_pos_z),\
|
||||||
|
FUNC(AC_M_star),\
|
||||||
|
/* Run params */\
|
||||||
|
FUNC(AC_cdt), \
|
||||||
|
FUNC(AC_cdtv), \
|
||||||
|
FUNC(AC_cdts), \
|
||||||
|
FUNC(AC_nu_visc), \
|
||||||
|
FUNC(AC_cs_sound), \
|
||||||
|
FUNC(AC_eta), \
|
||||||
|
FUNC(AC_mu0), \
|
||||||
|
FUNC(AC_cp_sound), \
|
||||||
|
FUNC(AC_gamma), \
|
||||||
|
FUNC(AC_cv_sound), \
|
||||||
|
FUNC(AC_lnT0), \
|
||||||
|
FUNC(AC_lnrho0), \
|
||||||
|
FUNC(AC_zeta), \
|
||||||
|
FUNC(AC_trans),\
|
||||||
|
/* Other */\
|
||||||
|
FUNC(AC_bin_save_t), \
|
||||||
|
/* Initial condition params */\
|
||||||
|
FUNC(AC_ampl_lnrho), \
|
||||||
|
FUNC(AC_ampl_uu), \
|
||||||
|
FUNC(AC_angl_uu), \
|
||||||
|
FUNC(AC_lnrho_edge),\
|
||||||
|
FUNC(AC_lnrho_out),\
|
||||||
|
/* Forcing parameters. User configured. */\
|
||||||
|
FUNC(AC_forcing_magnitude),\
|
||||||
|
FUNC(AC_relhel), \
|
||||||
|
FUNC(AC_kmin), \
|
||||||
|
FUNC(AC_kmax), \
|
||||||
|
/* Forcing parameters. Set by the generator. */\
|
||||||
|
FUNC(AC_forcing_phase),\
|
||||||
|
FUNC(AC_k_forcex),\
|
||||||
|
FUNC(AC_k_forcey),\
|
||||||
|
FUNC(AC_k_forcez),\
|
||||||
|
FUNC(AC_kaver),\
|
||||||
|
FUNC(AC_ff_hel_rex),\
|
||||||
|
FUNC(AC_ff_hel_rey),\
|
||||||
|
FUNC(AC_ff_hel_rez),\
|
||||||
|
FUNC(AC_ff_hel_imx),\
|
||||||
|
FUNC(AC_ff_hel_imy),\
|
||||||
|
FUNC(AC_ff_hel_imz),\
|
||||||
|
/* Additional helper params */\
|
||||||
|
/* (deduced from other params do not set these directly!) */\
|
||||||
|
FUNC(AC_G_CONST),\
|
||||||
|
FUNC(AC_GM_star),\
|
||||||
|
FUNC(AC_sq2GM_star),\
|
||||||
|
FUNC(AC_cs2_sound), \
|
||||||
|
FUNC(AC_inv_dsx), \
|
||||||
|
FUNC(AC_inv_dsy), \
|
||||||
|
FUNC(AC_inv_dsz),
|
||||||
|
|
||||||
|
#define AC_FOR_USER_REAL3_PARAM_TYPES(FUNC)
|
||||||
|
// clang-format on
|
||||||
|
|
||||||
|
/*
|
||||||
|
* =============================================================================
|
||||||
|
* User-defined vertex buffers
|
||||||
|
* =============================================================================
|
||||||
|
*/
|
||||||
|
// clang-format off
|
||||||
|
#if LENTROPY
|
||||||
|
#define AC_FOR_VTXBUF_HANDLES(FUNC) \
|
||||||
|
FUNC(VTXBUF_LNRHO), \
|
||||||
|
FUNC(VTXBUF_UUX), \
|
||||||
|
FUNC(VTXBUF_UUY), \
|
||||||
|
FUNC(VTXBUF_UUZ), \
|
||||||
|
FUNC(VTXBUF_AX), \
|
||||||
|
FUNC(VTXBUF_AY), \
|
||||||
|
FUNC(VTXBUF_AZ), \
|
||||||
|
FUNC(VTXBUF_ENTROPY),
|
||||||
|
#elif LMAGNETIC
|
||||||
|
#define AC_FOR_VTXBUF_HANDLES(FUNC) \
|
||||||
|
FUNC(VTXBUF_LNRHO), \
|
||||||
|
FUNC(VTXBUF_UUX), \
|
||||||
|
FUNC(VTXBUF_UUY), \
|
||||||
|
FUNC(VTXBUF_UUZ), \
|
||||||
|
FUNC(VTXBUF_AX), \
|
||||||
|
FUNC(VTXBUF_AY), \
|
||||||
|
FUNC(VTXBUF_AZ),
|
||||||
|
#elif LHYDRO
|
||||||
|
#define AC_FOR_VTXBUF_HANDLES(FUNC) \
|
||||||
|
FUNC(VTXBUF_LNRHO), \
|
||||||
|
FUNC(VTXBUF_UUX), \
|
||||||
|
FUNC(VTXBUF_UUY), \
|
||||||
|
FUNC(VTXBUF_UUZ),
|
||||||
|
#else
|
||||||
|
#define AC_FOR_VTXBUF_HANDLES(FUNC) \
|
||||||
|
FUNC(VTXBUF_LNRHO),
|
||||||
|
#endif
|
||||||
|
// clang-format on
|
163
acc/magnetic_solver/stencil_defines.h
Normal file
163
acc/magnetic_solver/stencil_defines.h
Normal file
@@ -0,0 +1,163 @@
|
|||||||
|
/*
|
||||||
|
Copyright (C) 2014-2019, Johannes Pekkilae, Miikka Vaeisalae.
|
||||||
|
|
||||||
|
This file is part of Astaroth.
|
||||||
|
|
||||||
|
Astaroth is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation, either version 3 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
Astaroth is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
/*
|
||||||
|
* =============================================================================
|
||||||
|
* Logical switches
|
||||||
|
* =============================================================================
|
||||||
|
*/
|
||||||
|
#define STENCIL_ORDER (6)
|
||||||
|
#define NGHOST (STENCIL_ORDER / 2)
|
||||||
|
#define LDENSITY (1)
|
||||||
|
#define LHYDRO (1)
|
||||||
|
#define LMAGNETIC (1)
|
||||||
|
#define LENTROPY (0)
|
||||||
|
#define LTEMPERATURE (0)
|
||||||
|
#define LFORCING (0)
|
||||||
|
#define LUPWD (0)
|
||||||
|
|
||||||
|
#define AC_THERMAL_CONDUCTIVITY (AcReal(0.001)) // TODO: make an actual config parameter
|
||||||
|
|
||||||
|
/*
|
||||||
|
* =============================================================================
|
||||||
|
* User-defined parameters
|
||||||
|
* =============================================================================
|
||||||
|
*/
|
||||||
|
// clang-format off
|
||||||
|
#define AC_FOR_USER_INT_PARAM_TYPES(FUNC)\
|
||||||
|
/* Other */\
|
||||||
|
FUNC(AC_max_steps), \
|
||||||
|
FUNC(AC_save_steps), \
|
||||||
|
FUNC(AC_bin_steps), \
|
||||||
|
FUNC(AC_bc_type),
|
||||||
|
|
||||||
|
#define AC_FOR_USER_INT3_PARAM_TYPES(FUNC)
|
||||||
|
|
||||||
|
#define AC_FOR_USER_REAL_PARAM_TYPES(FUNC)\
|
||||||
|
/* cparams */\
|
||||||
|
FUNC(AC_dsx), \
|
||||||
|
FUNC(AC_dsy), \
|
||||||
|
FUNC(AC_dsz), \
|
||||||
|
FUNC(AC_dsmin), \
|
||||||
|
/* physical grid*/\
|
||||||
|
FUNC(AC_xlen), \
|
||||||
|
FUNC(AC_ylen), \
|
||||||
|
FUNC(AC_zlen), \
|
||||||
|
FUNC(AC_xorig), \
|
||||||
|
FUNC(AC_yorig), \
|
||||||
|
FUNC(AC_zorig), \
|
||||||
|
/*Physical units*/\
|
||||||
|
FUNC(AC_unit_density),\
|
||||||
|
FUNC(AC_unit_velocity),\
|
||||||
|
FUNC(AC_unit_length),\
|
||||||
|
/* properties of gravitating star*/\
|
||||||
|
FUNC(AC_star_pos_x),\
|
||||||
|
FUNC(AC_star_pos_y),\
|
||||||
|
FUNC(AC_star_pos_z),\
|
||||||
|
FUNC(AC_M_star),\
|
||||||
|
/* Run params */\
|
||||||
|
FUNC(AC_cdt), \
|
||||||
|
FUNC(AC_cdtv), \
|
||||||
|
FUNC(AC_cdts), \
|
||||||
|
FUNC(AC_nu_visc), \
|
||||||
|
FUNC(AC_cs_sound), \
|
||||||
|
FUNC(AC_eta), \
|
||||||
|
FUNC(AC_mu0), \
|
||||||
|
FUNC(AC_cp_sound), \
|
||||||
|
FUNC(AC_gamma), \
|
||||||
|
FUNC(AC_cv_sound), \
|
||||||
|
FUNC(AC_lnT0), \
|
||||||
|
FUNC(AC_lnrho0), \
|
||||||
|
FUNC(AC_zeta), \
|
||||||
|
FUNC(AC_trans),\
|
||||||
|
/* Other */\
|
||||||
|
FUNC(AC_bin_save_t), \
|
||||||
|
/* Initial condition params */\
|
||||||
|
FUNC(AC_ampl_lnrho), \
|
||||||
|
FUNC(AC_ampl_uu), \
|
||||||
|
FUNC(AC_angl_uu), \
|
||||||
|
FUNC(AC_lnrho_edge),\
|
||||||
|
FUNC(AC_lnrho_out),\
|
||||||
|
/* Forcing parameters. User configured. */\
|
||||||
|
FUNC(AC_forcing_magnitude),\
|
||||||
|
FUNC(AC_relhel), \
|
||||||
|
FUNC(AC_kmin), \
|
||||||
|
FUNC(AC_kmax), \
|
||||||
|
/* Forcing parameters. Set by the generator. */\
|
||||||
|
FUNC(AC_forcing_phase),\
|
||||||
|
FUNC(AC_k_forcex),\
|
||||||
|
FUNC(AC_k_forcey),\
|
||||||
|
FUNC(AC_k_forcez),\
|
||||||
|
FUNC(AC_kaver),\
|
||||||
|
FUNC(AC_ff_hel_rex),\
|
||||||
|
FUNC(AC_ff_hel_rey),\
|
||||||
|
FUNC(AC_ff_hel_rez),\
|
||||||
|
FUNC(AC_ff_hel_imx),\
|
||||||
|
FUNC(AC_ff_hel_imy),\
|
||||||
|
FUNC(AC_ff_hel_imz),\
|
||||||
|
/* Additional helper params */\
|
||||||
|
/* (deduced from other params do not set these directly!) */\
|
||||||
|
FUNC(AC_G_CONST),\
|
||||||
|
FUNC(AC_GM_star),\
|
||||||
|
FUNC(AC_sq2GM_star),\
|
||||||
|
FUNC(AC_cs2_sound), \
|
||||||
|
FUNC(AC_inv_dsx), \
|
||||||
|
FUNC(AC_inv_dsy), \
|
||||||
|
FUNC(AC_inv_dsz),
|
||||||
|
|
||||||
|
#define AC_FOR_USER_REAL3_PARAM_TYPES(FUNC)
|
||||||
|
// clang-format on
|
||||||
|
|
||||||
|
/*
|
||||||
|
* =============================================================================
|
||||||
|
* User-defined vertex buffers
|
||||||
|
* =============================================================================
|
||||||
|
*/
|
||||||
|
// clang-format off
|
||||||
|
#if LENTROPY
|
||||||
|
#define AC_FOR_VTXBUF_HANDLES(FUNC) \
|
||||||
|
FUNC(VTXBUF_LNRHO), \
|
||||||
|
FUNC(VTXBUF_UUX), \
|
||||||
|
FUNC(VTXBUF_UUY), \
|
||||||
|
FUNC(VTXBUF_UUZ), \
|
||||||
|
FUNC(VTXBUF_AX), \
|
||||||
|
FUNC(VTXBUF_AY), \
|
||||||
|
FUNC(VTXBUF_AZ), \
|
||||||
|
FUNC(VTXBUF_ENTROPY),
|
||||||
|
#elif LMAGNETIC
|
||||||
|
#define AC_FOR_VTXBUF_HANDLES(FUNC) \
|
||||||
|
FUNC(VTXBUF_LNRHO), \
|
||||||
|
FUNC(VTXBUF_UUX), \
|
||||||
|
FUNC(VTXBUF_UUY), \
|
||||||
|
FUNC(VTXBUF_UUZ), \
|
||||||
|
FUNC(VTXBUF_AX), \
|
||||||
|
FUNC(VTXBUF_AY), \
|
||||||
|
FUNC(VTXBUF_AZ),
|
||||||
|
#elif LHYDRO
|
||||||
|
#define AC_FOR_VTXBUF_HANDLES(FUNC) \
|
||||||
|
FUNC(VTXBUF_LNRHO), \
|
||||||
|
FUNC(VTXBUF_UUX), \
|
||||||
|
FUNC(VTXBUF_UUY), \
|
||||||
|
FUNC(VTXBUF_UUZ),
|
||||||
|
#else
|
||||||
|
#define AC_FOR_VTXBUF_HANDLES(FUNC) \
|
||||||
|
FUNC(VTXBUF_LNRHO),
|
||||||
|
#endif
|
||||||
|
// clang-format on
|
@@ -30,7 +30,7 @@
|
|||||||
#define LMAGNETIC (1)
|
#define LMAGNETIC (1)
|
||||||
#define LENTROPY (1)
|
#define LENTROPY (1)
|
||||||
#define LTEMPERATURE (0)
|
#define LTEMPERATURE (0)
|
||||||
#define LFORCING (0)
|
#define LFORCING (1)
|
||||||
#define LUPWD (0)
|
#define LUPWD (0)
|
||||||
#define LSINK (1)
|
#define LSINK (1)
|
||||||
|
|
||||||
|
@@ -255,9 +255,6 @@ heat_transfer(in Vector uu, in Scalar lnrho, in Scalar tt)
|
|||||||
|
|
||||||
|
|
||||||
#if LFORCING
|
#if LFORCING
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Vector
|
Vector
|
||||||
simple_vortex_forcing(Vector a, Vector b, Scalar magnitude)
|
simple_vortex_forcing(Vector a, Vector b, Scalar magnitude)
|
||||||
{
|
{
|
||||||
@@ -275,7 +272,13 @@ simple_outward_flow_forcing(Vector a, Vector b, Scalar magnitude)
|
|||||||
Vector
|
Vector
|
||||||
helical_forcing(Scalar magnitude, Vector k_force, Vector xx, Vector ff_re, Vector ff_im, Scalar phi)
|
helical_forcing(Scalar magnitude, Vector k_force, Vector xx, Vector ff_re, Vector ff_im, Scalar phi)
|
||||||
{
|
{
|
||||||
|
// JP: This looks wrong:
|
||||||
|
// 1) Should it be dsx * nx instead of dsx * ny?
|
||||||
|
// 2) Should you also use globalGrid.n instead of the local n?
|
||||||
|
// 3) Also final point: can we do this with vectors/quaternions instead?
|
||||||
|
// Tringonometric functions are much more expensive and inaccurate/
|
||||||
|
// Fun related article:
|
||||||
|
// https://randomascii.wordpress.com/2014/10/09/intel-underestimates-error-bounds-by-1-3-quintillion/
|
||||||
xx.x = xx.x*(2.0*M_PI/(dsx*(DCONST_INT(AC_ny_max) - DCONST_INT(AC_ny_min))));
|
xx.x = xx.x*(2.0*M_PI/(dsx*(DCONST_INT(AC_ny_max) - DCONST_INT(AC_ny_min))));
|
||||||
xx.y = xx.y*(2.0*M_PI/(dsy*(DCONST_INT(AC_ny_max) - DCONST_INT(AC_ny_min))));
|
xx.y = xx.y*(2.0*M_PI/(dsy*(DCONST_INT(AC_ny_max) - DCONST_INT(AC_ny_min))));
|
||||||
xx.z = xx.z*(2.0*M_PI/(dsz*(DCONST_INT(AC_ny_max) - DCONST_INT(AC_ny_min))));
|
xx.z = xx.z*(2.0*M_PI/(dsz*(DCONST_INT(AC_ny_max) - DCONST_INT(AC_ny_min))));
|
||||||
|
@@ -40,7 +40,7 @@ AC_chi = 0.0001
|
|||||||
AC_relhel = 0.0
|
AC_relhel = 0.0
|
||||||
AC_forcing_magnitude = 1e-5
|
AC_forcing_magnitude = 1e-5
|
||||||
AC_kmin = 0.8
|
AC_kmin = 0.8
|
||||||
AC_kmax = 1.2
|
AC_kmax = 1.2
|
||||||
|
|
||||||
|
|
||||||
// Entropy
|
// Entropy
|
||||||
|
49
scripts/autotest.sh
Executable file
49
scripts/autotest.sh
Executable file
@@ -0,0 +1,49 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Usage:
|
||||||
|
# cd astaroth
|
||||||
|
# source ./sourceme.sh
|
||||||
|
# autotest.sh
|
||||||
|
#
|
||||||
|
# If you need slurm or to pass something before ./ac_run, set the variable
|
||||||
|
# SRUN_COMMAND before calling this script.
|
||||||
|
#
|
||||||
|
# F.ex. on Taito SRUN_COMMAND="srun --gres=gpu:k80:4 --mem=24000 -t 00:14:59 -p gputest --cpus-per-task 1 -n 1"
|
||||||
|
|
||||||
|
results=()
|
||||||
|
|
||||||
|
# Parameters
|
||||||
|
# $1: String testname
|
||||||
|
# $2: String stencil_define_file
|
||||||
|
# $3: String stencil_assembly_file
|
||||||
|
# $4: String stencil_process_file
|
||||||
|
test_solver() {
|
||||||
|
TEST_DIR="tests/"$1
|
||||||
|
ac_mkbuilddir.sh -b ${TEST_DIR}
|
||||||
|
cd ${TEST_DIR}
|
||||||
|
compile_acc.sh --header $2 --assembly $3 --process $4
|
||||||
|
make -j
|
||||||
|
${SRUN_COMMAND} ./ac_run -t
|
||||||
|
results+=(${TEST_DIR}" fail? "$?)
|
||||||
|
}
|
||||||
|
|
||||||
|
NAME="hydro"
|
||||||
|
HEADER="hydro_solver/stencil_defines.h"
|
||||||
|
ASSEMBLY="mhd_solver/stencil_assembly.sas"
|
||||||
|
PROCESS="mhd_solver/stencil_process.sps"
|
||||||
|
test_solver ${NAME} ${HEADER} ${ASSEMBLY} ${PROCESS}
|
||||||
|
|
||||||
|
NAME="magnetic"
|
||||||
|
HEADER="magnetic_solver/stencil_defines.h"
|
||||||
|
ASSEMBLY="mhd_solver/stencil_assembly.sas"
|
||||||
|
PROCESS="mhd_solver/stencil_process.sps"
|
||||||
|
test_solver ${NAME} ${HEADER} ${ASSEMBLY} ${PROCESS}
|
||||||
|
|
||||||
|
NAME="mhd"
|
||||||
|
HEADER="mhd_solver/stencil_defines.h"
|
||||||
|
ASSEMBLY="mhd_solver/stencil_assembly.sas"
|
||||||
|
PROCESS="mhd_solver/stencil_process.sps"
|
||||||
|
test_solver ${NAME} ${HEADER} ${ASSEMBLY} ${PROCESS}
|
||||||
|
|
||||||
|
# Print results
|
||||||
|
for i in "${results[@]}"; do echo "$i" ; done
|
@@ -1,3 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
cmake -DCUDA_BUILD_LEGACY=OFF -DDOUBLE_PRECISION=ON .. && make -j && valgrind --leak-check=full --show-leak-kinds=all ./ac_run -t && make clean &&\
|
|
||||||
cmake -DCUDA_BUILD_LEGACY=OFF -DDOUBLE_PRECISION=OFF .. && make -j && valgrind --leak-check=full --show-leak-kinds=all ./ac_run -t
|
|
@@ -1,9 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
if [[ $1 == "DO" && $2 == "IT!" ]]; then
|
|
||||||
find -name \*.h -o -name \*.cc -o -name \*.cu -o -name \*.cuh | xargs clang-format-6.0 -i -style=file
|
|
||||||
echo "It is done."
|
|
||||||
else
|
|
||||||
find -name \*.h -o -name \*.cc -o -name \*.cu -o -name \*.cuh
|
|
||||||
echo "I'm going to try to fix the style of these files."
|
|
||||||
echo "If you're absolutely sure, give \"DO IT!\" (without quotes) as a parameter."
|
|
||||||
fi
|
|
@@ -1,60 +0,0 @@
|
|||||||
/*
|
|
||||||
Copyright (C) 2014-2019, Johannes Pekkilae, Miikka Vaeisalae.
|
|
||||||
|
|
||||||
This file is part of Astaroth.
|
|
||||||
|
|
||||||
Astaroth is free software: you can redistribute it and/or modify
|
|
||||||
it under the terms of the GNU General Public License as published by
|
|
||||||
the Free Software Foundation, either version 3 of the License, or
|
|
||||||
(at your option) any later version.
|
|
||||||
|
|
||||||
Astaroth is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
GNU General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License
|
|
||||||
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @file
|
|
||||||
* \brief Generates a threadblock config file for RK3 using the given parameters.
|
|
||||||
*
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
|
|
||||||
#include <assert.h>
|
|
||||||
|
|
||||||
const char* rk3_threadblockconf_path = "rk3_threadblock.conf";
|
|
||||||
|
|
||||||
int
|
|
||||||
write_to_file(int threads_x, int threads_y, int threads_z, int elems_per_thread, int launch_bound)
|
|
||||||
{
|
|
||||||
FILE* fp;
|
|
||||||
fp = fopen(rk3_threadblockconf_path, "w");
|
|
||||||
|
|
||||||
if (fp != NULL) {
|
|
||||||
fprintf(fp, "#define RK_THREADS_X (%d)\n", threads_x);
|
|
||||||
fprintf(fp, "#define RK_THREADS_Y (%d)\n", threads_y);
|
|
||||||
fprintf(fp, "#define RK_THREADS_Z (%d)\n", threads_z);
|
|
||||||
fprintf(fp, "#define RK_ELEMS_PER_THREAD (%d)\n", elems_per_thread);
|
|
||||||
fprintf(fp, "#define RK_LAUNCH_BOUND_MIN_BLOCKS (%d)\n", launch_bound);
|
|
||||||
fclose(fp);
|
|
||||||
return EXIT_SUCCESS;
|
|
||||||
}
|
|
||||||
return EXIT_FAILURE;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Takes arguments and writes them into a file
|
|
||||||
// RK_THREADS_X, RK_THREADS_Y, RK_THREADS_Z, RK_ELEMS_PER_THREAD, RK_LAUNCH_BOUND_MIN_BLOCKS
|
|
||||||
int
|
|
||||||
main(int argc, char* argv[])
|
|
||||||
{
|
|
||||||
assert(argc == 6);
|
|
||||||
|
|
||||||
return write_to_file(atoi(argv[1]), atoi(argv[2]),atoi(argv[3]), atoi(argv[4]), atoi(argv[5]));
|
|
||||||
}
|
|
@@ -1,2 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
doxygen doxyfile
|
|
@@ -48,7 +48,9 @@ __constant__ Grid globalGrid;
|
|||||||
#define DCONST_REAL3(X) (d_mesh_info.real3_params[X])
|
#define DCONST_REAL3(X) (d_mesh_info.real3_params[X])
|
||||||
#define DEVICE_VTXBUF_IDX(i, j, k) ((i) + (j)*DCONST_INT(AC_mx) + (k)*DCONST_INT(AC_mxy))
|
#define DEVICE_VTXBUF_IDX(i, j, k) ((i) + (j)*DCONST_INT(AC_mx) + (k)*DCONST_INT(AC_mxy))
|
||||||
#define DEVICE_1D_COMPDOMAIN_IDX(i, j, k) ((i) + (j)*DCONST_INT(AC_nx) + (k)*DCONST_INT(AC_nxy))
|
#define DEVICE_1D_COMPDOMAIN_IDX(i, j, k) ((i) + (j)*DCONST_INT(AC_nx) + (k)*DCONST_INT(AC_nxy))
|
||||||
#include "kernels/kernels.cuh"
|
#include "kernels/boundconds.cuh"
|
||||||
|
#include "kernels/integration.cuh"
|
||||||
|
#include "kernels/reductions.cuh"
|
||||||
|
|
||||||
static dim3 rk3_tpb = (dim3){32, 1, 4};
|
static dim3 rk3_tpb = (dim3){32, 1, 4};
|
||||||
|
|
||||||
|
87
src/core/kernels/boundconds.cuh
Normal file
87
src/core/kernels/boundconds.cuh
Normal file
@@ -0,0 +1,87 @@
|
|||||||
|
/*
|
||||||
|
Copyright (C) 2014-2019, Johannes Pekkilae, Miikka Vaeisalae.
|
||||||
|
|
||||||
|
This file is part of Astaroth.
|
||||||
|
|
||||||
|
Astaroth is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation, either version 3 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
Astaroth is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @file
|
||||||
|
* \brief Brief info.
|
||||||
|
*
|
||||||
|
* Detailed info.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
__global__ void
|
||||||
|
kernel_periodic_boundconds(const int3 start, const int3 end, AcReal* vtxbuf)
|
||||||
|
{
|
||||||
|
const int i_dst = start.x + threadIdx.x + blockIdx.x * blockDim.x;
|
||||||
|
const int j_dst = start.y + threadIdx.y + blockIdx.y * blockDim.y;
|
||||||
|
const int k_dst = start.z + threadIdx.z + blockIdx.z * blockDim.z;
|
||||||
|
|
||||||
|
// If within the start-end range (this allows threadblock dims that are not
|
||||||
|
// divisible by end - start)
|
||||||
|
if (i_dst >= end.x || j_dst >= end.y || k_dst >= end.z)
|
||||||
|
return;
|
||||||
|
|
||||||
|
// if (i_dst >= DCONST_INT(AC_mx) || j_dst >= DCONST_INT(AC_my) || k_dst >= DCONST_INT(AC_mz))
|
||||||
|
// return;
|
||||||
|
|
||||||
|
// If destination index is inside the computational domain, return since
|
||||||
|
// the boundary conditions are only applied to the ghost zones
|
||||||
|
if (i_dst >= DCONST_INT(AC_nx_min) && i_dst < DCONST_INT(AC_nx_max) &&
|
||||||
|
j_dst >= DCONST_INT(AC_ny_min) && j_dst < DCONST_INT(AC_ny_max) &&
|
||||||
|
k_dst >= DCONST_INT(AC_nz_min) && k_dst < DCONST_INT(AC_nz_max))
|
||||||
|
return;
|
||||||
|
|
||||||
|
// Find the source index
|
||||||
|
// Map to nx, ny, nz coordinates
|
||||||
|
int i_src = i_dst - DCONST_INT(AC_nx_min);
|
||||||
|
int j_src = j_dst - DCONST_INT(AC_ny_min);
|
||||||
|
int k_src = k_dst - DCONST_INT(AC_nz_min);
|
||||||
|
|
||||||
|
// Translate (s.t. the index is always positive)
|
||||||
|
i_src += DCONST_INT(AC_nx);
|
||||||
|
j_src += DCONST_INT(AC_ny);
|
||||||
|
k_src += DCONST_INT(AC_nz);
|
||||||
|
|
||||||
|
// Wrap
|
||||||
|
i_src %= DCONST_INT(AC_nx);
|
||||||
|
j_src %= DCONST_INT(AC_ny);
|
||||||
|
k_src %= DCONST_INT(AC_nz);
|
||||||
|
|
||||||
|
// Map to mx, my, mz coordinates
|
||||||
|
i_src += DCONST_INT(AC_nx_min);
|
||||||
|
j_src += DCONST_INT(AC_ny_min);
|
||||||
|
k_src += DCONST_INT(AC_nz_min);
|
||||||
|
|
||||||
|
const int src_idx = DEVICE_VTXBUF_IDX(i_src, j_src, k_src);
|
||||||
|
const int dst_idx = DEVICE_VTXBUF_IDX(i_dst, j_dst, k_dst);
|
||||||
|
vtxbuf[dst_idx] = vtxbuf[src_idx];
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
periodic_boundconds(const cudaStream_t stream, const int3& start, const int3& end, AcReal* vtxbuf)
|
||||||
|
{
|
||||||
|
const dim3 tpb(8, 2, 8);
|
||||||
|
const dim3 bpg((unsigned int)ceil((end.x - start.x) / (float)tpb.x),
|
||||||
|
(unsigned int)ceil((end.y - start.y) / (float)tpb.y),
|
||||||
|
(unsigned int)ceil((end.z - start.z) / (float)tpb.z));
|
||||||
|
|
||||||
|
kernel_periodic_boundconds<<<bpg, tpb, 0, stream>>>(start, end, vtxbuf);
|
||||||
|
ERRCHK_CUDA_KERNEL();
|
||||||
|
}
|
@@ -26,67 +26,6 @@
|
|||||||
*/
|
*/
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
__global__ void
|
|
||||||
kernel_periodic_boundconds(const int3 start, const int3 end, AcReal* vtxbuf)
|
|
||||||
{
|
|
||||||
const int i_dst = start.x + threadIdx.x + blockIdx.x * blockDim.x;
|
|
||||||
const int j_dst = start.y + threadIdx.y + blockIdx.y * blockDim.y;
|
|
||||||
const int k_dst = start.z + threadIdx.z + blockIdx.z * blockDim.z;
|
|
||||||
|
|
||||||
// If within the start-end range (this allows threadblock dims that are not
|
|
||||||
// divisible by end - start)
|
|
||||||
if (i_dst >= end.x || j_dst >= end.y || k_dst >= end.z)
|
|
||||||
return;
|
|
||||||
|
|
||||||
// if (i_dst >= DCONST_INT(AC_mx) || j_dst >= DCONST_INT(AC_my) || k_dst >= DCONST_INT(AC_mz))
|
|
||||||
// return;
|
|
||||||
|
|
||||||
// If destination index is inside the computational domain, return since
|
|
||||||
// the boundary conditions are only applied to the ghost zones
|
|
||||||
if (i_dst >= DCONST_INT(AC_nx_min) && i_dst < DCONST_INT(AC_nx_max) &&
|
|
||||||
j_dst >= DCONST_INT(AC_ny_min) && j_dst < DCONST_INT(AC_ny_max) &&
|
|
||||||
k_dst >= DCONST_INT(AC_nz_min) && k_dst < DCONST_INT(AC_nz_max))
|
|
||||||
return;
|
|
||||||
|
|
||||||
// Find the source index
|
|
||||||
// Map to nx, ny, nz coordinates
|
|
||||||
int i_src = i_dst - DCONST_INT(AC_nx_min);
|
|
||||||
int j_src = j_dst - DCONST_INT(AC_ny_min);
|
|
||||||
int k_src = k_dst - DCONST_INT(AC_nz_min);
|
|
||||||
|
|
||||||
// Translate (s.t. the index is always positive)
|
|
||||||
i_src += DCONST_INT(AC_nx);
|
|
||||||
j_src += DCONST_INT(AC_ny);
|
|
||||||
k_src += DCONST_INT(AC_nz);
|
|
||||||
|
|
||||||
// Wrap
|
|
||||||
i_src %= DCONST_INT(AC_nx);
|
|
||||||
j_src %= DCONST_INT(AC_ny);
|
|
||||||
k_src %= DCONST_INT(AC_nz);
|
|
||||||
|
|
||||||
// Map to mx, my, mz coordinates
|
|
||||||
i_src += DCONST_INT(AC_nx_min);
|
|
||||||
j_src += DCONST_INT(AC_ny_min);
|
|
||||||
k_src += DCONST_INT(AC_nz_min);
|
|
||||||
|
|
||||||
const int src_idx = DEVICE_VTXBUF_IDX(i_src, j_src, k_src);
|
|
||||||
const int dst_idx = DEVICE_VTXBUF_IDX(i_dst, j_dst, k_dst);
|
|
||||||
vtxbuf[dst_idx] = vtxbuf[src_idx];
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
|
||||||
periodic_boundconds(const cudaStream_t stream, const int3& start, const int3& end, AcReal* vtxbuf)
|
|
||||||
{
|
|
||||||
const dim3 tpb(8, 2, 8);
|
|
||||||
const dim3 bpg((unsigned int)ceil((end.x - start.x) / (float)tpb.x),
|
|
||||||
(unsigned int)ceil((end.y - start.y) / (float)tpb.y),
|
|
||||||
(unsigned int)ceil((end.z - start.z) / (float)tpb.z));
|
|
||||||
|
|
||||||
kernel_periodic_boundconds<<<bpg, tpb, 0, stream>>>(start, end, vtxbuf);
|
|
||||||
ERRCHK_CUDA_KERNEL();
|
|
||||||
}
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
|
|
||||||
static __device__ __forceinline__ int
|
static __device__ __forceinline__ int
|
||||||
@@ -120,17 +59,18 @@ create_rotz(const AcReal radians)
|
|||||||
}
|
}
|
||||||
|
|
||||||
#if AC_DOUBLE_PRECISION == 0
|
#if AC_DOUBLE_PRECISION == 0
|
||||||
|
/*
|
||||||
|
// Fast but inaccurate
|
||||||
#define sin __sinf
|
#define sin __sinf
|
||||||
#define cos __cosf
|
#define cos __cosf
|
||||||
#define exp __expf
|
#define exp __expf
|
||||||
|
*/
|
||||||
|
#define sin sinf
|
||||||
|
#define cos cosf
|
||||||
|
#define exp expf
|
||||||
#define rsqrt rsqrtf // hardware reciprocal sqrt
|
#define rsqrt rsqrtf // hardware reciprocal sqrt
|
||||||
#endif // AC_DOUBLE_PRECISION == 0
|
#endif // AC_DOUBLE_PRECISION == 0
|
||||||
|
|
||||||
/*
|
|
||||||
typedef struct {
|
|
||||||
int i, j, k;
|
|
||||||
} int3;*/
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* =============================================================================
|
* =============================================================================
|
||||||
* Level 0 (Input Assembly Stage)
|
* Level 0 (Input Assembly Stage)
|
||||||
@@ -700,265 +640,3 @@ read_out(const int idx, AcReal* __restrict__ field[], const int3 handle)
|
|||||||
const int idx = IDX(vertexIdx.x, vertexIdx.y, vertexIdx.z);
|
const int idx = IDX(vertexIdx.x, vertexIdx.y, vertexIdx.z);
|
||||||
|
|
||||||
#include "stencil_process.cuh"
|
#include "stencil_process.cuh"
|
||||||
|
|
||||||
/*
|
|
||||||
* =============================================================================
|
|
||||||
* Level 2 (Host calls)
|
|
||||||
* =============================================================================
|
|
||||||
*/
|
|
||||||
|
|
||||||
////////////////REDUCE///////////////////////////
|
|
||||||
#include "src/core/math_utils.h" // is_power_of_two
|
|
||||||
|
|
||||||
/*
|
|
||||||
Reduction steps:
|
|
||||||
1 of 3: Compute the initial value (a, a*a or exp(a)*exp(a)) and put the result in scratchpad
|
|
||||||
2 of 3: Compute most of the reductions into a single block of data
|
|
||||||
3 of 3: After all results have been stored into the final block, reduce the data in the final block
|
|
||||||
*/
|
|
||||||
|
|
||||||
// Function pointer definitions
|
|
||||||
typedef AcReal (*FilterFunc)(const AcReal&);
|
|
||||||
typedef AcReal (*FilterFuncVec)(const AcReal&, const AcReal&, const AcReal&);
|
|
||||||
typedef AcReal (*ReduceFunc)(const AcReal&, const AcReal&);
|
|
||||||
|
|
||||||
// clang-format off
|
|
||||||
/* Comparison funcs */
|
|
||||||
static __device__ inline AcReal
|
|
||||||
dmax(const AcReal& a, const AcReal& b) { return a > b ? a : b; }
|
|
||||||
|
|
||||||
static __device__ inline AcReal
|
|
||||||
dmin(const AcReal& a, const AcReal& b) { return a < b ? a : b; }
|
|
||||||
|
|
||||||
static __device__ inline AcReal
|
|
||||||
dsum(const AcReal& a, const AcReal& b) { return a + b; }
|
|
||||||
|
|
||||||
/* Function used to determine the values used during reduction */
|
|
||||||
static __device__ inline AcReal
|
|
||||||
dvalue(const AcReal& a) { return AcReal(a); }
|
|
||||||
|
|
||||||
static __device__ inline AcReal
|
|
||||||
dsquared(const AcReal& a) { return (AcReal)(a*a); }
|
|
||||||
|
|
||||||
static __device__ inline AcReal
|
|
||||||
dexp_squared(const AcReal& a) { return exp(a)*exp(a); }
|
|
||||||
|
|
||||||
static __device__ inline AcReal
|
|
||||||
dlength_vec(const AcReal& a, const AcReal& b, const AcReal& c) { return sqrt(a*a + b*b + c*c); }
|
|
||||||
|
|
||||||
static __device__ inline AcReal
|
|
||||||
dsquared_vec(const AcReal& a, const AcReal& b, const AcReal& c) { return dsquared(a) + dsquared(b) + dsquared(c); }
|
|
||||||
|
|
||||||
static __device__ inline AcReal
|
|
||||||
dexp_squared_vec(const AcReal& a, const AcReal& b, const AcReal& c) { return dexp_squared(a) + dexp_squared(b) + dexp_squared(c); }
|
|
||||||
// clang-format on
|
|
||||||
|
|
||||||
#include <assert.h>
|
|
||||||
template <FilterFunc filter>
|
|
||||||
__global__ void
|
|
||||||
kernel_filter(const __restrict__ AcReal* src, const int3 start, const int3 end, AcReal* dst)
|
|
||||||
{
|
|
||||||
const int3 src_idx = (int3){start.x + threadIdx.x + blockIdx.x * blockDim.x,
|
|
||||||
start.y + threadIdx.y + blockIdx.y * blockDim.y,
|
|
||||||
start.z + threadIdx.z + blockIdx.z * blockDim.z};
|
|
||||||
|
|
||||||
const int nx = end.x - start.x;
|
|
||||||
const int ny = end.y - start.y;
|
|
||||||
const int nz = end.z - start.z;
|
|
||||||
(void)nz; // Suppressed unused variable warning when not compiling with debug flags
|
|
||||||
|
|
||||||
const int3 dst_idx = (int3){threadIdx.x + blockIdx.x * blockDim.x,
|
|
||||||
threadIdx.y + blockIdx.y * blockDim.y,
|
|
||||||
threadIdx.z + blockIdx.z * blockDim.z};
|
|
||||||
|
|
||||||
assert(src_idx.x < DCONST_INT(AC_nx_max) && src_idx.y < DCONST_INT(AC_ny_max) &&
|
|
||||||
src_idx.z < DCONST_INT(AC_nz_max));
|
|
||||||
assert(dst_idx.x < nx && dst_idx.y < ny && dst_idx.z < nz);
|
|
||||||
assert(dst_idx.x + dst_idx.y * nx + dst_idx.z * nx * ny < nx * ny * nz);
|
|
||||||
|
|
||||||
dst[dst_idx.x + dst_idx.y * nx + dst_idx.z * nx * ny] = filter(src[IDX(src_idx)]);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <FilterFuncVec filter>
|
|
||||||
__global__ void
|
|
||||||
kernel_filter_vec(const __restrict__ AcReal* src0, const __restrict__ AcReal* src1,
|
|
||||||
const __restrict__ AcReal* src2, const int3 start, const int3 end, AcReal* dst)
|
|
||||||
{
|
|
||||||
const int3 src_idx = (int3){start.x + threadIdx.x + blockIdx.x * blockDim.x,
|
|
||||||
start.y + threadIdx.y + blockIdx.y * blockDim.y,
|
|
||||||
start.z + threadIdx.z + blockIdx.z * blockDim.z};
|
|
||||||
|
|
||||||
const int nx = end.x - start.x;
|
|
||||||
const int ny = end.y - start.y;
|
|
||||||
const int nz = end.z - start.z;
|
|
||||||
(void)nz; // Suppressed unused variable warning when not compiling with debug flags
|
|
||||||
|
|
||||||
const int3 dst_idx = (int3){threadIdx.x + blockIdx.x * blockDim.x,
|
|
||||||
threadIdx.y + blockIdx.y * blockDim.y,
|
|
||||||
threadIdx.z + blockIdx.z * blockDim.z};
|
|
||||||
|
|
||||||
assert(src_idx.x < DCONST_INT(AC_nx_max) && src_idx.y < DCONST_INT(AC_ny_max) &&
|
|
||||||
src_idx.z < DCONST_INT(AC_nz_max));
|
|
||||||
assert(dst_idx.x < nx && dst_idx.y < ny && dst_idx.z < nz);
|
|
||||||
assert(dst_idx.x + dst_idx.y * nx + dst_idx.z * nx * ny < nx * ny * nz);
|
|
||||||
|
|
||||||
dst[dst_idx.x + dst_idx.y * nx + dst_idx.z * nx * ny] = filter(
|
|
||||||
src0[IDX(src_idx)], src1[IDX(src_idx)], src2[IDX(src_idx)]);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <ReduceFunc reduce>
|
|
||||||
__global__ void
|
|
||||||
kernel_reduce(AcReal* scratchpad, const int num_elems)
|
|
||||||
{
|
|
||||||
const int idx = threadIdx.x + blockIdx.x * blockDim.x;
|
|
||||||
|
|
||||||
extern __shared__ AcReal smem[];
|
|
||||||
if (idx < num_elems) {
|
|
||||||
smem[threadIdx.x] = scratchpad[idx];
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
smem[threadIdx.x] = NAN;
|
|
||||||
}
|
|
||||||
__syncthreads();
|
|
||||||
|
|
||||||
int offset = blockDim.x / 2;
|
|
||||||
assert(offset % 2 == 0);
|
|
||||||
while (offset > 0) {
|
|
||||||
if (threadIdx.x < offset) {
|
|
||||||
smem[threadIdx.x] = reduce(smem[threadIdx.x], smem[threadIdx.x + offset]);
|
|
||||||
}
|
|
||||||
offset /= 2;
|
|
||||||
__syncthreads();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (threadIdx.x == 0) {
|
|
||||||
scratchpad[idx] = smem[threadIdx.x];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <ReduceFunc reduce>
|
|
||||||
__global__ void
|
|
||||||
kernel_reduce_block(const __restrict__ AcReal* scratchpad, const int num_blocks,
|
|
||||||
const int block_size, AcReal* result)
|
|
||||||
{
|
|
||||||
const int idx = threadIdx.x + blockIdx.x * blockDim.x;
|
|
||||||
if (idx != 0) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
AcReal res = scratchpad[0];
|
|
||||||
for (int i = 1; i < num_blocks; ++i) {
|
|
||||||
res = reduce(res, scratchpad[i * block_size]);
|
|
||||||
}
|
|
||||||
*result = res;
|
|
||||||
}
|
|
||||||
|
|
||||||
AcReal
|
|
||||||
reduce_scal(const cudaStream_t stream, const ReductionType rtype, const int3& start,
|
|
||||||
const int3& end, const AcReal* vtxbuf, AcReal* scratchpad, AcReal* reduce_result)
|
|
||||||
{
|
|
||||||
const unsigned nx = end.x - start.x;
|
|
||||||
const unsigned ny = end.y - start.y;
|
|
||||||
const unsigned nz = end.z - start.z;
|
|
||||||
const unsigned num_elems = nx * ny * nz;
|
|
||||||
|
|
||||||
const dim3 tpb_filter(32, 4, 1);
|
|
||||||
const dim3 bpg_filter((unsigned int)ceil(nx / AcReal(tpb_filter.x)),
|
|
||||||
(unsigned int)ceil(ny / AcReal(tpb_filter.y)),
|
|
||||||
(unsigned int)ceil(nz / AcReal(tpb_filter.z)));
|
|
||||||
|
|
||||||
const int tpb_reduce = 128;
|
|
||||||
const int bpg_reduce = num_elems / tpb_reduce;
|
|
||||||
|
|
||||||
ERRCHK(nx >= tpb_filter.x);
|
|
||||||
ERRCHK(ny >= tpb_filter.y);
|
|
||||||
ERRCHK(nz >= tpb_filter.z);
|
|
||||||
ERRCHK(tpb_reduce <= num_elems);
|
|
||||||
ERRCHK(nx * ny * nz % 2 == 0);
|
|
||||||
|
|
||||||
// clang-format off
|
|
||||||
if (rtype == RTYPE_MAX) {
|
|
||||||
kernel_filter<dvalue><<<bpg_filter, tpb_filter, 0, stream>>>(vtxbuf, start, end, scratchpad);
|
|
||||||
kernel_reduce<dmax><<<bpg_reduce, tpb_reduce, sizeof(AcReal) * tpb_reduce, stream>>>(scratchpad, num_elems);
|
|
||||||
kernel_reduce_block<dmax><<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result);
|
|
||||||
} else if (rtype == RTYPE_MIN) {
|
|
||||||
kernel_filter<dvalue><<<bpg_filter, tpb_filter, 0, stream>>>(vtxbuf, start, end, scratchpad);
|
|
||||||
kernel_reduce<dmin><<<bpg_reduce, tpb_reduce, sizeof(AcReal) * tpb_reduce, stream>>>(scratchpad, num_elems);
|
|
||||||
kernel_reduce_block<dmin><<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result);
|
|
||||||
} else if (rtype == RTYPE_RMS) {
|
|
||||||
kernel_filter<dsquared><<<bpg_filter, tpb_filter, 0, stream>>>(vtxbuf, start, end, scratchpad);
|
|
||||||
kernel_reduce<dsum><<<bpg_reduce, tpb_reduce, sizeof(AcReal) * tpb_reduce, stream>>>(scratchpad, num_elems);
|
|
||||||
kernel_reduce_block<dsum><<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result);
|
|
||||||
} else if (rtype == RTYPE_RMS_EXP) {
|
|
||||||
kernel_filter<dexp_squared><<<bpg_filter, tpb_filter, 0, stream>>>(vtxbuf, start, end, scratchpad);
|
|
||||||
kernel_reduce<dsum><<<bpg_reduce, tpb_reduce, sizeof(AcReal) * tpb_reduce, stream>>>(scratchpad, num_elems);
|
|
||||||
kernel_reduce_block<dsum><<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result);
|
|
||||||
} else if (rtype == RTYPE_SUM) {
|
|
||||||
kernel_filter<dvalue><<<bpg_filter, tpb_filter, 0, stream>>>(vtxbuf, start, end, scratchpad);
|
|
||||||
kernel_reduce<dsum><<<bpg_reduce, tpb_reduce, sizeof(AcReal) * tpb_reduce, stream>>>(scratchpad, num_elems);
|
|
||||||
kernel_reduce_block<dsum><<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result);
|
|
||||||
} else {
|
|
||||||
ERROR("Unrecognized rtype");
|
|
||||||
}
|
|
||||||
// clang-format on
|
|
||||||
cudaStreamSynchronize(stream);
|
|
||||||
AcReal result;
|
|
||||||
cudaMemcpy(&result, reduce_result, sizeof(AcReal), cudaMemcpyDeviceToHost);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
AcReal
|
|
||||||
reduce_vec(const cudaStream_t stream, const ReductionType rtype, const int3& start, const int3& end,
|
|
||||||
const AcReal* vtxbuf0, const AcReal* vtxbuf1, const AcReal* vtxbuf2, AcReal* scratchpad,
|
|
||||||
AcReal* reduce_result)
|
|
||||||
{
|
|
||||||
const unsigned nx = end.x - start.x;
|
|
||||||
const unsigned ny = end.y - start.y;
|
|
||||||
const unsigned nz = end.z - start.z;
|
|
||||||
const unsigned num_elems = nx * ny * nz;
|
|
||||||
|
|
||||||
const dim3 tpb_filter(32, 4, 1);
|
|
||||||
const dim3 bpg_filter((unsigned int)ceil(nx / AcReal(tpb_filter.x)),
|
|
||||||
(unsigned int)ceil(ny / AcReal(tpb_filter.y)),
|
|
||||||
(unsigned int)ceil(nz / AcReal(tpb_filter.z)));
|
|
||||||
|
|
||||||
const int tpb_reduce = 128;
|
|
||||||
const int bpg_reduce = num_elems / tpb_reduce;
|
|
||||||
|
|
||||||
ERRCHK(nx >= tpb_filter.x);
|
|
||||||
ERRCHK(ny >= tpb_filter.y);
|
|
||||||
ERRCHK(nz >= tpb_filter.z);
|
|
||||||
ERRCHK(tpb_reduce <= num_elems);
|
|
||||||
ERRCHK(nx * ny * nz % 2 == 0);
|
|
||||||
|
|
||||||
// clang-format off
|
|
||||||
if (rtype == RTYPE_MAX) {
|
|
||||||
kernel_filter_vec<dlength_vec><<<bpg_filter, tpb_filter, 0, stream>>>(vtxbuf0, vtxbuf1, vtxbuf2, start, end, scratchpad);
|
|
||||||
kernel_reduce<dmax><<<bpg_reduce, tpb_reduce, sizeof(AcReal) * tpb_reduce, stream>>>(scratchpad, num_elems);
|
|
||||||
kernel_reduce_block<dmax><<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result);
|
|
||||||
} else if (rtype == RTYPE_MIN) {
|
|
||||||
kernel_filter_vec<dlength_vec><<<bpg_filter, tpb_filter, 0, stream>>>(vtxbuf0, vtxbuf1, vtxbuf2, start, end, scratchpad);
|
|
||||||
kernel_reduce<dmin><<<bpg_reduce, tpb_reduce, sizeof(AcReal) * tpb_reduce, stream>>>(scratchpad, num_elems);
|
|
||||||
kernel_reduce_block<dmin><<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result);
|
|
||||||
} else if (rtype == RTYPE_RMS) {
|
|
||||||
kernel_filter_vec<dsquared_vec><<<bpg_filter, tpb_filter, 0, stream>>>(vtxbuf0, vtxbuf1, vtxbuf2, start, end, scratchpad);
|
|
||||||
kernel_reduce<dsum><<<bpg_reduce, tpb_reduce, sizeof(AcReal) * tpb_reduce, stream>>>(scratchpad, num_elems);
|
|
||||||
kernel_reduce_block<dsum><<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result);
|
|
||||||
} else if (rtype == RTYPE_RMS_EXP) {
|
|
||||||
kernel_filter_vec<dexp_squared_vec><<<bpg_filter, tpb_filter, 0, stream>>>(vtxbuf0, vtxbuf1, vtxbuf2, start, end, scratchpad);
|
|
||||||
kernel_reduce<dsum><<<bpg_reduce, tpb_reduce, sizeof(AcReal) * tpb_reduce, stream>>>(scratchpad, num_elems);
|
|
||||||
kernel_reduce_block<dsum><<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result);
|
|
||||||
} else if (rtype == RTYPE_SUM) {
|
|
||||||
kernel_filter_vec<dlength_vec><<<bpg_filter, tpb_filter, 0, stream>>>(vtxbuf0, vtxbuf1, vtxbuf2, start, end, scratchpad);
|
|
||||||
kernel_reduce<dsum><<<bpg_reduce, tpb_reduce, sizeof(AcReal) * tpb_reduce, stream>>>(scratchpad, num_elems);
|
|
||||||
kernel_reduce_block<dsum><<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result);
|
|
||||||
} else {
|
|
||||||
ERROR("Unrecognized rtype");
|
|
||||||
}
|
|
||||||
// clang-format on
|
|
||||||
|
|
||||||
cudaStreamSynchronize(stream);
|
|
||||||
AcReal result;
|
|
||||||
cudaMemcpy(&result, reduce_result, sizeof(AcReal), cudaMemcpyDeviceToHost);
|
|
||||||
return result;
|
|
||||||
}
|
|
282
src/core/kernels/reductions.cuh
Normal file
282
src/core/kernels/reductions.cuh
Normal file
@@ -0,0 +1,282 @@
|
|||||||
|
/*
|
||||||
|
Copyright (C) 2014-2019, Johannes Pekkilae, Miikka Vaeisalae.
|
||||||
|
|
||||||
|
This file is part of Astaroth.
|
||||||
|
|
||||||
|
Astaroth is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation, either version 3 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
Astaroth is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @file
|
||||||
|
* \brief Brief info.
|
||||||
|
*
|
||||||
|
* Detailed info.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "src/core/math_utils.h" // is_power_of_two
|
||||||
|
|
||||||
|
/*
|
||||||
|
Reduction steps:
|
||||||
|
1 of 3: Compute the initial value (a, a*a or exp(a)*exp(a)) and put the result in scratchpad
|
||||||
|
2 of 3: Compute most of the reductions into a single block of data
|
||||||
|
3 of 3: After all results have been stored into the final block, reduce the data in the final block
|
||||||
|
*/
|
||||||
|
|
||||||
|
// Function pointer definitions
|
||||||
|
typedef AcReal (*FilterFunc)(const AcReal&);
|
||||||
|
typedef AcReal (*FilterFuncVec)(const AcReal&, const AcReal&, const AcReal&);
|
||||||
|
typedef AcReal (*ReduceFunc)(const AcReal&, const AcReal&);
|
||||||
|
|
||||||
|
// clang-format off
|
||||||
|
/* Comparison funcs */
|
||||||
|
static __device__ inline AcReal
|
||||||
|
dmax(const AcReal& a, const AcReal& b) { return a > b ? a : b; }
|
||||||
|
|
||||||
|
static __device__ inline AcReal
|
||||||
|
dmin(const AcReal& a, const AcReal& b) { return a < b ? a : b; }
|
||||||
|
|
||||||
|
static __device__ inline AcReal
|
||||||
|
dsum(const AcReal& a, const AcReal& b) { return a + b; }
|
||||||
|
|
||||||
|
/* Function used to determine the values used during reduction */
|
||||||
|
static __device__ inline AcReal
|
||||||
|
dvalue(const AcReal& a) { return AcReal(a); }
|
||||||
|
|
||||||
|
static __device__ inline AcReal
|
||||||
|
dsquared(const AcReal& a) { return (AcReal)(a*a); }
|
||||||
|
|
||||||
|
static __device__ inline AcReal
|
||||||
|
dexp_squared(const AcReal& a) { return exp(a)*exp(a); }
|
||||||
|
|
||||||
|
static __device__ inline AcReal
|
||||||
|
dlength_vec(const AcReal& a, const AcReal& b, const AcReal& c) { return sqrt(a*a + b*b + c*c); }
|
||||||
|
|
||||||
|
static __device__ inline AcReal
|
||||||
|
dsquared_vec(const AcReal& a, const AcReal& b, const AcReal& c) { return dsquared(a) + dsquared(b) + dsquared(c); }
|
||||||
|
|
||||||
|
static __device__ inline AcReal
|
||||||
|
dexp_squared_vec(const AcReal& a, const AcReal& b, const AcReal& c) { return dexp_squared(a) + dexp_squared(b) + dexp_squared(c); }
|
||||||
|
// clang-format on
|
||||||
|
|
||||||
|
#include <assert.h>
|
||||||
|
template <FilterFunc filter>
|
||||||
|
__global__ void
|
||||||
|
kernel_filter(const __restrict__ AcReal* src, const int3 start, const int3 end, AcReal* dst)
|
||||||
|
{
|
||||||
|
const int3 src_idx = (int3){start.x + threadIdx.x + blockIdx.x * blockDim.x,
|
||||||
|
start.y + threadIdx.y + blockIdx.y * blockDim.y,
|
||||||
|
start.z + threadIdx.z + blockIdx.z * blockDim.z};
|
||||||
|
|
||||||
|
const int nx = end.x - start.x;
|
||||||
|
const int ny = end.y - start.y;
|
||||||
|
const int nz = end.z - start.z;
|
||||||
|
(void)nz; // Suppressed unused variable warning when not compiling with debug flags
|
||||||
|
|
||||||
|
const int3 dst_idx = (int3){threadIdx.x + blockIdx.x * blockDim.x,
|
||||||
|
threadIdx.y + blockIdx.y * blockDim.y,
|
||||||
|
threadIdx.z + blockIdx.z * blockDim.z};
|
||||||
|
|
||||||
|
assert(src_idx.x < DCONST_INT(AC_nx_max) && src_idx.y < DCONST_INT(AC_ny_max) &&
|
||||||
|
src_idx.z < DCONST_INT(AC_nz_max));
|
||||||
|
assert(dst_idx.x < nx && dst_idx.y < ny && dst_idx.z < nz);
|
||||||
|
assert(dst_idx.x + dst_idx.y * nx + dst_idx.z * nx * ny < nx * ny * nz);
|
||||||
|
|
||||||
|
dst[dst_idx.x + dst_idx.y * nx + dst_idx.z * nx * ny] = filter(src[IDX(src_idx)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <FilterFuncVec filter>
|
||||||
|
__global__ void
|
||||||
|
kernel_filter_vec(const __restrict__ AcReal* src0, const __restrict__ AcReal* src1,
|
||||||
|
const __restrict__ AcReal* src2, const int3 start, const int3 end, AcReal* dst)
|
||||||
|
{
|
||||||
|
const int3 src_idx = (int3){start.x + threadIdx.x + blockIdx.x * blockDim.x,
|
||||||
|
start.y + threadIdx.y + blockIdx.y * blockDim.y,
|
||||||
|
start.z + threadIdx.z + blockIdx.z * blockDim.z};
|
||||||
|
|
||||||
|
const int nx = end.x - start.x;
|
||||||
|
const int ny = end.y - start.y;
|
||||||
|
const int nz = end.z - start.z;
|
||||||
|
(void)nz; // Suppressed unused variable warning when not compiling with debug flags
|
||||||
|
|
||||||
|
const int3 dst_idx = (int3){threadIdx.x + blockIdx.x * blockDim.x,
|
||||||
|
threadIdx.y + blockIdx.y * blockDim.y,
|
||||||
|
threadIdx.z + blockIdx.z * blockDim.z};
|
||||||
|
|
||||||
|
assert(src_idx.x < DCONST_INT(AC_nx_max) && src_idx.y < DCONST_INT(AC_ny_max) &&
|
||||||
|
src_idx.z < DCONST_INT(AC_nz_max));
|
||||||
|
assert(dst_idx.x < nx && dst_idx.y < ny && dst_idx.z < nz);
|
||||||
|
assert(dst_idx.x + dst_idx.y * nx + dst_idx.z * nx * ny < nx * ny * nz);
|
||||||
|
|
||||||
|
dst[dst_idx.x + dst_idx.y * nx + dst_idx.z * nx * ny] = filter(
|
||||||
|
src0[IDX(src_idx)], src1[IDX(src_idx)], src2[IDX(src_idx)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <ReduceFunc reduce>
|
||||||
|
__global__ void
|
||||||
|
kernel_reduce(AcReal* scratchpad, const int num_elems)
|
||||||
|
{
|
||||||
|
const int idx = threadIdx.x + blockIdx.x * blockDim.x;
|
||||||
|
|
||||||
|
extern __shared__ AcReal smem[];
|
||||||
|
if (idx < num_elems) {
|
||||||
|
smem[threadIdx.x] = scratchpad[idx];
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
smem[threadIdx.x] = NAN;
|
||||||
|
}
|
||||||
|
__syncthreads();
|
||||||
|
|
||||||
|
int offset = blockDim.x / 2;
|
||||||
|
assert(offset % 2 == 0);
|
||||||
|
while (offset > 0) {
|
||||||
|
if (threadIdx.x < offset) {
|
||||||
|
smem[threadIdx.x] = reduce(smem[threadIdx.x], smem[threadIdx.x + offset]);
|
||||||
|
}
|
||||||
|
offset /= 2;
|
||||||
|
__syncthreads();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (threadIdx.x == 0) {
|
||||||
|
scratchpad[idx] = smem[threadIdx.x];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <ReduceFunc reduce>
|
||||||
|
__global__ void
|
||||||
|
kernel_reduce_block(const __restrict__ AcReal* scratchpad, const int num_blocks,
|
||||||
|
const int block_size, AcReal* result)
|
||||||
|
{
|
||||||
|
const int idx = threadIdx.x + blockIdx.x * blockDim.x;
|
||||||
|
if (idx != 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
AcReal res = scratchpad[0];
|
||||||
|
for (int i = 1; i < num_blocks; ++i) {
|
||||||
|
res = reduce(res, scratchpad[i * block_size]);
|
||||||
|
}
|
||||||
|
*result = res;
|
||||||
|
}
|
||||||
|
|
||||||
|
AcReal
|
||||||
|
reduce_scal(const cudaStream_t stream, const ReductionType rtype, const int3& start,
|
||||||
|
const int3& end, const AcReal* vtxbuf, AcReal* scratchpad, AcReal* reduce_result)
|
||||||
|
{
|
||||||
|
const unsigned nx = end.x - start.x;
|
||||||
|
const unsigned ny = end.y - start.y;
|
||||||
|
const unsigned nz = end.z - start.z;
|
||||||
|
const unsigned num_elems = nx * ny * nz;
|
||||||
|
|
||||||
|
const dim3 tpb_filter(32, 4, 1);
|
||||||
|
const dim3 bpg_filter((unsigned int)ceil(nx / AcReal(tpb_filter.x)),
|
||||||
|
(unsigned int)ceil(ny / AcReal(tpb_filter.y)),
|
||||||
|
(unsigned int)ceil(nz / AcReal(tpb_filter.z)));
|
||||||
|
|
||||||
|
const int tpb_reduce = 128;
|
||||||
|
const int bpg_reduce = num_elems / tpb_reduce;
|
||||||
|
|
||||||
|
ERRCHK(nx >= tpb_filter.x);
|
||||||
|
ERRCHK(ny >= tpb_filter.y);
|
||||||
|
ERRCHK(nz >= tpb_filter.z);
|
||||||
|
ERRCHK(tpb_reduce <= num_elems);
|
||||||
|
ERRCHK(nx * ny * nz % 2 == 0);
|
||||||
|
|
||||||
|
// clang-format off
|
||||||
|
if (rtype == RTYPE_MAX) {
|
||||||
|
kernel_filter<dvalue><<<bpg_filter, tpb_filter, 0, stream>>>(vtxbuf, start, end, scratchpad);
|
||||||
|
kernel_reduce<dmax><<<bpg_reduce, tpb_reduce, sizeof(AcReal) * tpb_reduce, stream>>>(scratchpad, num_elems);
|
||||||
|
kernel_reduce_block<dmax><<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result);
|
||||||
|
} else if (rtype == RTYPE_MIN) {
|
||||||
|
kernel_filter<dvalue><<<bpg_filter, tpb_filter, 0, stream>>>(vtxbuf, start, end, scratchpad);
|
||||||
|
kernel_reduce<dmin><<<bpg_reduce, tpb_reduce, sizeof(AcReal) * tpb_reduce, stream>>>(scratchpad, num_elems);
|
||||||
|
kernel_reduce_block<dmin><<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result);
|
||||||
|
} else if (rtype == RTYPE_RMS) {
|
||||||
|
kernel_filter<dsquared><<<bpg_filter, tpb_filter, 0, stream>>>(vtxbuf, start, end, scratchpad);
|
||||||
|
kernel_reduce<dsum><<<bpg_reduce, tpb_reduce, sizeof(AcReal) * tpb_reduce, stream>>>(scratchpad, num_elems);
|
||||||
|
kernel_reduce_block<dsum><<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result);
|
||||||
|
} else if (rtype == RTYPE_RMS_EXP) {
|
||||||
|
kernel_filter<dexp_squared><<<bpg_filter, tpb_filter, 0, stream>>>(vtxbuf, start, end, scratchpad);
|
||||||
|
kernel_reduce<dsum><<<bpg_reduce, tpb_reduce, sizeof(AcReal) * tpb_reduce, stream>>>(scratchpad, num_elems);
|
||||||
|
kernel_reduce_block<dsum><<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result);
|
||||||
|
} else if (rtype == RTYPE_SUM) {
|
||||||
|
kernel_filter<dvalue><<<bpg_filter, tpb_filter, 0, stream>>>(vtxbuf, start, end, scratchpad);
|
||||||
|
kernel_reduce<dsum><<<bpg_reduce, tpb_reduce, sizeof(AcReal) * tpb_reduce, stream>>>(scratchpad, num_elems);
|
||||||
|
kernel_reduce_block<dsum><<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result);
|
||||||
|
} else {
|
||||||
|
ERROR("Unrecognized rtype");
|
||||||
|
}
|
||||||
|
// clang-format on
|
||||||
|
cudaStreamSynchronize(stream);
|
||||||
|
AcReal result;
|
||||||
|
cudaMemcpy(&result, reduce_result, sizeof(AcReal), cudaMemcpyDeviceToHost);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
AcReal
|
||||||
|
reduce_vec(const cudaStream_t stream, const ReductionType rtype, const int3& start, const int3& end,
|
||||||
|
const AcReal* vtxbuf0, const AcReal* vtxbuf1, const AcReal* vtxbuf2, AcReal* scratchpad,
|
||||||
|
AcReal* reduce_result)
|
||||||
|
{
|
||||||
|
const unsigned nx = end.x - start.x;
|
||||||
|
const unsigned ny = end.y - start.y;
|
||||||
|
const unsigned nz = end.z - start.z;
|
||||||
|
const unsigned num_elems = nx * ny * nz;
|
||||||
|
|
||||||
|
const dim3 tpb_filter(32, 4, 1);
|
||||||
|
const dim3 bpg_filter((unsigned int)ceil(nx / AcReal(tpb_filter.x)),
|
||||||
|
(unsigned int)ceil(ny / AcReal(tpb_filter.y)),
|
||||||
|
(unsigned int)ceil(nz / AcReal(tpb_filter.z)));
|
||||||
|
|
||||||
|
const int tpb_reduce = 128;
|
||||||
|
const int bpg_reduce = num_elems / tpb_reduce;
|
||||||
|
|
||||||
|
ERRCHK(nx >= tpb_filter.x);
|
||||||
|
ERRCHK(ny >= tpb_filter.y);
|
||||||
|
ERRCHK(nz >= tpb_filter.z);
|
||||||
|
ERRCHK(tpb_reduce <= num_elems);
|
||||||
|
ERRCHK(nx * ny * nz % 2 == 0);
|
||||||
|
|
||||||
|
// clang-format off
|
||||||
|
if (rtype == RTYPE_MAX) {
|
||||||
|
kernel_filter_vec<dlength_vec><<<bpg_filter, tpb_filter, 0, stream>>>(vtxbuf0, vtxbuf1, vtxbuf2, start, end, scratchpad);
|
||||||
|
kernel_reduce<dmax><<<bpg_reduce, tpb_reduce, sizeof(AcReal) * tpb_reduce, stream>>>(scratchpad, num_elems);
|
||||||
|
kernel_reduce_block<dmax><<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result);
|
||||||
|
} else if (rtype == RTYPE_MIN) {
|
||||||
|
kernel_filter_vec<dlength_vec><<<bpg_filter, tpb_filter, 0, stream>>>(vtxbuf0, vtxbuf1, vtxbuf2, start, end, scratchpad);
|
||||||
|
kernel_reduce<dmin><<<bpg_reduce, tpb_reduce, sizeof(AcReal) * tpb_reduce, stream>>>(scratchpad, num_elems);
|
||||||
|
kernel_reduce_block<dmin><<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result);
|
||||||
|
} else if (rtype == RTYPE_RMS) {
|
||||||
|
kernel_filter_vec<dsquared_vec><<<bpg_filter, tpb_filter, 0, stream>>>(vtxbuf0, vtxbuf1, vtxbuf2, start, end, scratchpad);
|
||||||
|
kernel_reduce<dsum><<<bpg_reduce, tpb_reduce, sizeof(AcReal) * tpb_reduce, stream>>>(scratchpad, num_elems);
|
||||||
|
kernel_reduce_block<dsum><<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result);
|
||||||
|
} else if (rtype == RTYPE_RMS_EXP) {
|
||||||
|
kernel_filter_vec<dexp_squared_vec><<<bpg_filter, tpb_filter, 0, stream>>>(vtxbuf0, vtxbuf1, vtxbuf2, start, end, scratchpad);
|
||||||
|
kernel_reduce<dsum><<<bpg_reduce, tpb_reduce, sizeof(AcReal) * tpb_reduce, stream>>>(scratchpad, num_elems);
|
||||||
|
kernel_reduce_block<dsum><<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result);
|
||||||
|
} else if (rtype == RTYPE_SUM) {
|
||||||
|
kernel_filter_vec<dlength_vec><<<bpg_filter, tpb_filter, 0, stream>>>(vtxbuf0, vtxbuf1, vtxbuf2, start, end, scratchpad);
|
||||||
|
kernel_reduce<dsum><<<bpg_reduce, tpb_reduce, sizeof(AcReal) * tpb_reduce, stream>>>(scratchpad, num_elems);
|
||||||
|
kernel_reduce_block<dsum><<<1, 1, 0, stream>>>(scratchpad, bpg_reduce, tpb_reduce, reduce_result);
|
||||||
|
} else {
|
||||||
|
ERROR("Unrecognized rtype");
|
||||||
|
}
|
||||||
|
// clang-format on
|
||||||
|
|
||||||
|
cudaStreamSynchronize(stream);
|
||||||
|
AcReal result;
|
||||||
|
cudaMemcpy(&result, reduce_result, sizeof(AcReal), cudaMemcpyDeviceToHost);
|
||||||
|
return result;
|
||||||
|
}
|
@@ -236,6 +236,14 @@ check_reductions(const AcMeshInfo& config)
|
|||||||
acLoad(*mesh);
|
acLoad(*mesh);
|
||||||
|
|
||||||
for (int rtype = 0; rtype < NUM_REDUCTION_TYPES; ++rtype) {
|
for (int rtype = 0; rtype < NUM_REDUCTION_TYPES; ++rtype) {
|
||||||
|
|
||||||
|
if (rtype == RTYPE_SUM) {
|
||||||
|
// Skip SUM test for now. The failure is either caused by floating-point
|
||||||
|
// cancellation or an actual issue
|
||||||
|
WARNING("Skipping RTYPE_SUM test\n");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
const VertexBufferHandle ftype = VTXBUF_UUX;
|
const VertexBufferHandle ftype = VTXBUF_UUX;
|
||||||
|
|
||||||
// Scal
|
// Scal
|
||||||
@@ -337,6 +345,8 @@ verify_meshes(const ModelMesh& model, const AcMesh& candidate)
|
|||||||
printf("Index (%d, %d, %d)\n", i0, j0, k0);
|
printf("Index (%d, %d, %d)\n", i0, j0, k0);
|
||||||
print_debug_info(model_val, cand_val, range);
|
print_debug_info(model_val, cand_val, range);
|
||||||
retval = false;
|
retval = false;
|
||||||
|
printf("Breaking\n");
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
const ModelScalar abs_error = get_absolute_error(model_val, cand_val);
|
const ModelScalar abs_error = get_absolute_error(model_val, cand_val);
|
||||||
@@ -412,6 +422,12 @@ check_rk3(const AcMeshInfo& mesh_info)
|
|||||||
// const AcReal dt = host_timestep(umax, mesh_info);
|
// const AcReal dt = host_timestep(umax, mesh_info);
|
||||||
const AcReal dt = AcReal(1e-2); // Use a small constant timestep to avoid instabilities
|
const AcReal dt = AcReal(1e-2); // Use a small constant timestep to avoid instabilities
|
||||||
|
|
||||||
|
#if LFORCING
|
||||||
|
const ForcingParams forcing_params = generateForcingParams(model_mesh->info);
|
||||||
|
loadForcingParamsToHost(forcing_params, model_mesh);
|
||||||
|
loadForcingParamsToDevice(forcing_params);
|
||||||
|
#endif
|
||||||
|
|
||||||
acIntegrate(dt);
|
acIntegrate(dt);
|
||||||
|
|
||||||
model_rk3(dt, model_mesh);
|
model_rk3(dt, model_mesh);
|
||||||
|
@@ -255,6 +255,30 @@ loadForcingParamsToHost(const ForcingParams& forcing_params, AcMesh* mesh)
|
|||||||
mesh->info.real_params[AC_kaver] = forcing_params.kaver;
|
mesh->info.real_params[AC_kaver] = forcing_params.kaver;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
loadForcingParamsToHost(const ForcingParams& forcing_params, ModelMesh* mesh)
|
||||||
|
{
|
||||||
|
// %JP: Left some regex magic here in case we need to modify the ForcingParams struct
|
||||||
|
// acLoadDeviceConstant\(([A-Za-z_]*), ([a-z_.]*)\);
|
||||||
|
// mesh->info.real_params[$1] = $2;
|
||||||
|
mesh->info.real_params[AC_forcing_magnitude] = forcing_params.magnitude;
|
||||||
|
mesh->info.real_params[AC_forcing_phase] = forcing_params.phase;
|
||||||
|
|
||||||
|
mesh->info.real_params[AC_k_forcex] = forcing_params.k_force.x;
|
||||||
|
mesh->info.real_params[AC_k_forcey] = forcing_params.k_force.y;
|
||||||
|
mesh->info.real_params[AC_k_forcez] = forcing_params.k_force.z;
|
||||||
|
|
||||||
|
mesh->info.real_params[AC_ff_hel_rex] = forcing_params.ff_hel_re.x;
|
||||||
|
mesh->info.real_params[AC_ff_hel_rey] = forcing_params.ff_hel_re.y;
|
||||||
|
mesh->info.real_params[AC_ff_hel_rez] = forcing_params.ff_hel_re.z;
|
||||||
|
|
||||||
|
mesh->info.real_params[AC_ff_hel_imx] = forcing_params.ff_hel_im.x;
|
||||||
|
mesh->info.real_params[AC_ff_hel_imy] = forcing_params.ff_hel_im.y;
|
||||||
|
mesh->info.real_params[AC_ff_hel_imz] = forcing_params.ff_hel_im.z;
|
||||||
|
|
||||||
|
mesh->info.real_params[AC_kaver] = forcing_params.kaver;
|
||||||
|
}
|
||||||
|
|
||||||
ForcingParams
|
ForcingParams
|
||||||
generateForcingParams(const AcMeshInfo& mesh_info)
|
generateForcingParams(const AcMeshInfo& mesh_info)
|
||||||
{
|
{
|
||||||
|
@@ -28,6 +28,8 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
#include "astaroth.h"
|
#include "astaroth.h"
|
||||||
|
|
||||||
|
#include "modelmesh.h"
|
||||||
|
|
||||||
AcReal get_random_number_01();
|
AcReal get_random_number_01();
|
||||||
|
|
||||||
AcReal3 cross(const AcReal3& a, const AcReal3& b);
|
AcReal3 cross(const AcReal3& a, const AcReal3& b);
|
||||||
@@ -64,5 +66,6 @@ typedef struct {
|
|||||||
void loadForcingParamsToDevice(const ForcingParams& forcing_params);
|
void loadForcingParamsToDevice(const ForcingParams& forcing_params);
|
||||||
|
|
||||||
void loadForcingParamsToHost(const ForcingParams& forcing_params, AcMesh* mesh);
|
void loadForcingParamsToHost(const ForcingParams& forcing_params, AcMesh* mesh);
|
||||||
|
void loadForcingParamsToHost(const ForcingParams& forcing_params, ModelMesh* mesh);
|
||||||
|
|
||||||
ForcingParams generateForcingParams(const AcMeshInfo& mesh_info);
|
ForcingParams generateForcingParams(const AcMeshInfo& mesh_info);
|
||||||
|
@@ -560,11 +560,9 @@ momentum(const ModelVectorData& uu, const ModelScalarData& lnrho
|
|||||||
#else
|
#else
|
||||||
// !!!!!!!!!!!!!!!!%JP: NOTE TODO IMPORTANT!!!!!!!!!!!!!!!!!!!!!!!!
|
// !!!!!!!!!!!!!!!!%JP: NOTE TODO IMPORTANT!!!!!!!!!!!!!!!!!!!!!!!!
|
||||||
// NOT CHECKED FOR CORRECTNESS: USE AT YOUR OWN RISK
|
// NOT CHECKED FOR CORRECTNESS: USE AT YOUR OWN RISK
|
||||||
const ModelMatrix S = stress_tensor(uu);
|
const ModelMatrix S = stress_tensor(uu);
|
||||||
const ModelScalar cs2 = get(AC_cs2_sound) *
|
|
||||||
expl((get(AC_gamma) - 1) * (value(lnrho) - get(AC_lnrho0)));
|
|
||||||
|
|
||||||
const ModelVector mom = -mul(gradients(uu), value(uu)) - cs2 * gradient(lnrho) +
|
const ModelVector mom = -mul(gradients(uu), value(uu)) - get(AC_cs2_sound) * gradient(lnrho) +
|
||||||
get(AC_nu_visc) * (laplace_vec(uu) +
|
get(AC_nu_visc) * (laplace_vec(uu) +
|
||||||
ModelScalar(1. / 3.) * gradient_of_divergence(uu) +
|
ModelScalar(1. / 3.) * gradient_of_divergence(uu) +
|
||||||
ModelScalar(2.) * mul(S, gradient(lnrho))) +
|
ModelScalar(2.) * mul(S, gradient(lnrho))) +
|
||||||
@@ -662,15 +660,13 @@ is_valid(const ModelVector& a)
|
|||||||
}
|
}
|
||||||
|
|
||||||
#if LFORCING
|
#if LFORCING
|
||||||
// FORCING NOT SUPPORTED FOR AUTOTEST
|
ModelVector
|
||||||
|
|
||||||
static inline ModelVector
|
|
||||||
simple_vortex_forcing(ModelVector a, ModelVector b, ModelScalar magnitude)
|
simple_vortex_forcing(ModelVector a, ModelVector b, ModelScalar magnitude)
|
||||||
{
|
{
|
||||||
return magnitude * cross(normalized(b - a), (ModelVector){0, 0, 1}); // Vortex
|
return magnitude * cross(normalized(b - a), (ModelVector){0, 0, 1}); // Vortex
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline ModelVector
|
ModelVector
|
||||||
simple_outward_flow_forcing(ModelVector a, ModelVector b, ModelScalar magnitude)
|
simple_outward_flow_forcing(ModelVector a, ModelVector b, ModelScalar magnitude)
|
||||||
{
|
{
|
||||||
return magnitude * (1 / length(b - a)) * normalized(b - a); // Outward flow
|
return magnitude * (1 / length(b - a)) * normalized(b - a); // Outward flow
|
||||||
@@ -678,22 +674,24 @@ simple_outward_flow_forcing(ModelVector a, ModelVector b, ModelScalar magnitude)
|
|||||||
|
|
||||||
// The Pencil Code forcing_hel_noshear(), manual Eq. 222, inspired forcing function with adjustable
|
// The Pencil Code forcing_hel_noshear(), manual Eq. 222, inspired forcing function with adjustable
|
||||||
// helicity
|
// helicity
|
||||||
static inline ModelVector
|
ModelVector
|
||||||
helical_forcing(ModelScalar /* magnitude */, ModelVector k_force, ModelVector xx, ModelVector ff_re,
|
helical_forcing(ModelScalar magnitude, ModelVector k_force, ModelVector xx, ModelVector ff_re,
|
||||||
ModelVector ff_im, ModelScalar phi)
|
ModelVector ff_im, ModelScalar phi)
|
||||||
{
|
{
|
||||||
|
(void)magnitude; // WARNING: unused
|
||||||
|
xx.x = xx.x * (2.0l * M_PI / (get(AC_dsx) * (get(AC_ny_max) - get(AC_ny_min))));
|
||||||
|
xx.y = xx.y * (2.0l * M_PI / (get(AC_dsy) * (get(AC_ny_max) - get(AC_ny_min))));
|
||||||
|
xx.z = xx.z * (2.0l * M_PI / (get(AC_dsz) * (get(AC_ny_max) - get(AC_ny_min))));
|
||||||
|
|
||||||
xx.x = xx.x * (2.0 * M_PI / (get(AC_dsx) * (get(AC_ny_max) - get(AC_ny_min))));
|
ModelScalar cosl_phi = cosl(phi);
|
||||||
xx.y = xx.y * (2.0 * M_PI / (get(AC_dsy) * (get(AC_ny_max) - get(AC_ny_min))));
|
ModelScalar sinl_phi = sinl(phi);
|
||||||
xx.z = xx.z * (2.0 * M_PI / (get(AC_dsz) * (get(AC_ny_max) - get(AC_ny_min))));
|
ModelScalar cosl_k_dox_x = cosl(dot(k_force, xx));
|
||||||
|
ModelScalar sinl_k_dox_x = sinl(dot(k_force, xx));
|
||||||
ModelScalar cos_phi = cosl(phi);
|
|
||||||
ModelScalar sin_phi = sinl(phi);
|
|
||||||
ModelScalar cos_k_dox_x = cosl(dot(k_force, xx));
|
|
||||||
ModelScalar sin_k_dox_x = sinl(dot(k_force, xx));
|
|
||||||
// Phase affect only the x-component
|
// Phase affect only the x-component
|
||||||
ModelScalar real_comp_phase = cos_k_dox_x * cos_phi - sin_k_dox_x * sin_phi;
|
// ModelScalar real_comp = cosl_k_dox_x;
|
||||||
ModelScalar imag_comp_phase = cos_k_dox_x * sin_phi + sin_k_dox_x * cos_phi;
|
// ModelScalar imag_comp = sinl_k_dox_x;
|
||||||
|
ModelScalar real_comp_phase = cosl_k_dox_x * cosl_phi - sinl_k_dox_x * sinl_phi;
|
||||||
|
ModelScalar imag_comp_phase = cosl_k_dox_x * sinl_phi + sinl_k_dox_x * cosl_phi;
|
||||||
|
|
||||||
ModelVector force = (ModelVector){ff_re.x * real_comp_phase - ff_im.x * imag_comp_phase,
|
ModelVector force = (ModelVector){ff_re.x * real_comp_phase - ff_im.x * imag_comp_phase,
|
||||||
ff_re.y * real_comp_phase - ff_im.y * imag_comp_phase,
|
ff_re.y * real_comp_phase - ff_im.y * imag_comp_phase,
|
||||||
@@ -702,18 +700,17 @@ helical_forcing(ModelScalar /* magnitude */, ModelVector k_force, ModelVector xx
|
|||||||
return force;
|
return force;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline ModelVector
|
ModelVector
|
||||||
forcing(int3 globalVertexIdx, ModelScalar dt)
|
forcing(int3 globalVertexIdx, ModelScalar dt)
|
||||||
{
|
{
|
||||||
/*
|
ModelVector a = ModelScalar(.5) * (ModelVector){get(AC_nx) * get(AC_dsx),
|
||||||
ModelVector a = ModelScalar(.5) * (ModelVector){get(AC_nx) * get(AC_dsx),
|
|
||||||
get(AC_ny) * get(AC_dsy),
|
get(AC_ny) * get(AC_dsy),
|
||||||
get(AC_nz) * get(AC_dsz)}; // source (origin)
|
get(AC_nz) * get(AC_dsz)}; // source (origin)
|
||||||
*/
|
(void)a; // WARNING: not used
|
||||||
ModelVector xx = (ModelVector){
|
ModelVector xx = (ModelVector){(globalVertexIdx.x - get(AC_nx_min)) * get(AC_dsx),
|
||||||
(globalVertexIdx.x - get(AC_nx_min)) * get(AC_dsx),
|
(globalVertexIdx.y - get(AC_ny_min)) * get(AC_dsy),
|
||||||
(globalVertexIdx.y - get(AC_ny_min) * get(AC_dsy)),
|
(globalVertexIdx.z - get(AC_nz_min)) *
|
||||||
(globalVertexIdx.z - get(AC_nz_min) * get(AC_dsz))}; // sink (current index)
|
get(AC_dsz)}; // sink (current index)
|
||||||
const ModelScalar cs2 = get(AC_cs2_sound);
|
const ModelScalar cs2 = get(AC_cs2_sound);
|
||||||
const ModelScalar cs = sqrtl(cs2);
|
const ModelScalar cs = sqrtl(cs2);
|
||||||
|
|
||||||
@@ -724,6 +721,11 @@ forcing(int3 globalVertexIdx, ModelScalar dt)
|
|||||||
ModelVector ff_re = (ModelVector){get(AC_ff_hel_rex), get(AC_ff_hel_rey), get(AC_ff_hel_rez)};
|
ModelVector ff_re = (ModelVector){get(AC_ff_hel_rex), get(AC_ff_hel_rey), get(AC_ff_hel_rez)};
|
||||||
ModelVector ff_im = (ModelVector){get(AC_ff_hel_imx), get(AC_ff_hel_imy), get(AC_ff_hel_imz)};
|
ModelVector ff_im = (ModelVector){get(AC_ff_hel_imx), get(AC_ff_hel_imy), get(AC_ff_hel_imz)};
|
||||||
|
|
||||||
|
(void)phase; // WARNING: unused with simple forcing. Should be defined in helical_forcing
|
||||||
|
(void)k_force; // WARNING: unused with simple forcing. Should be defined in helical_forcing
|
||||||
|
(void)ff_re; // WARNING: unused with simple forcing. Should be defined in helical_forcing
|
||||||
|
(void)ff_im; // WARNING: unused with simple forcing. Should be defined in helical_forcing
|
||||||
|
|
||||||
// Determine that forcing funtion type at this point.
|
// Determine that forcing funtion type at this point.
|
||||||
// ModelVector force = simple_vortex_forcing(a, xx, magnitude);
|
// ModelVector force = simple_vortex_forcing(a, xx, magnitude);
|
||||||
// ModelVector force = simple_outward_flow_forcing(a, xx, magnitude);
|
// ModelVector force = simple_outward_flow_forcing(a, xx, magnitude);
|
||||||
|
@@ -34,6 +34,7 @@
|
|||||||
#include "config_loader.h"
|
#include "config_loader.h"
|
||||||
#include "core/errchk.h"
|
#include "core/errchk.h"
|
||||||
#include "core/math_utils.h"
|
#include "core/math_utils.h"
|
||||||
|
#include "model/host_forcing.h"
|
||||||
#include "model/host_memory.h"
|
#include "model/host_memory.h"
|
||||||
#include "model/host_timestep.h"
|
#include "model/host_timestep.h"
|
||||||
#include "model/model_reduce.h"
|
#include "model/model_reduce.h"
|
||||||
@@ -383,6 +384,12 @@ run_renderer(void)
|
|||||||
#if 1
|
#if 1
|
||||||
const AcReal umax = acReduceVec(RTYPE_MAX, VTXBUF_UUX, VTXBUF_UUY, VTXBUF_UUZ);
|
const AcReal umax = acReduceVec(RTYPE_MAX, VTXBUF_UUX, VTXBUF_UUY, VTXBUF_UUZ);
|
||||||
const AcReal dt = host_timestep(umax, mesh_info);
|
const AcReal dt = host_timestep(umax, mesh_info);
|
||||||
|
|
||||||
|
#if LFORCING
|
||||||
|
const ForcingParams forcing_params = generateForcingParams(mesh->info);
|
||||||
|
loadForcingParamsToDevice(forcing_params);
|
||||||
|
#endif
|
||||||
|
|
||||||
acIntegrate(dt);
|
acIntegrate(dt);
|
||||||
#else
|
#else
|
||||||
ModelMesh* model_mesh = modelmesh_create(mesh->info);
|
ModelMesh* model_mesh = modelmesh_create(mesh->info);
|
||||||
|
Reference in New Issue
Block a user