Files
astaroth/acc/samples/common_header.h

423 lines
13 KiB
C

/*
Copyright (C) 2014-2019, Johannes Pekkilae, Miikka Vaeisalae.
This file is part of Astaroth.
Astaroth is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Astaroth is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* @file
* \brief Brief info.
*
* Provides an interface to Astaroth. Contains all the necessary configuration
* structs and functions for running the code on multiple GPUs.
*
* All interface functions declared here (such as acInit()) operate all GPUs
* available in the node under the hood, and the user does not need any
* information about the decomposition, synchronization or such to use these
* functions.
*
*/
#pragma once
/* Prevent name mangling */
#ifdef __cplusplus
extern "C" {
#endif
#include <float.h> // FLT_EPSILON, etc
#include <stdlib.h> // size_t
#include <vector_types.h> // CUDA vector types (float4, etc)
/*
* =============================================================================
* Flags for auto-optimization
* =============================================================================
*/
#define AUTO_OPTIMIZE (0) // DEPRECATED TODO remove
#define BOUNDCONDS_OPTIMIZE (0)
#define GENERATE_BENCHMARK_DATA (0)
// Device info
#define REGISTERS_PER_THREAD (255)
#define MAX_REGISTERS_PER_BLOCK (65536)
#define MAX_THREADS_PER_BLOCK (1024)
#define MAX_TB_DIM (MAX_THREADS_PER_BLOCK)
#define NUM_ITERATIONS (10)
#define WARP_SIZE (32)
/*
* =============================================================================
* Compile-time constants used during simulation (user definable)
* =============================================================================
*/
#define STENCIL_ORDER (6)
///////////// PAD TEST
// NOTE: works only with nx is divisible by 32
//#define PAD_LEAD (32 - STENCIL_ORDER/2)
//#define PAD_SIZE (32 - STENCIL_ORDER)
///////////// PAD TEST
// L-prefix inherited from the old Astaroth, no idea what it means
// MV: L means a Logical switch variale, something having true of false value.
#define LFORCING (0) // Note: forcing is disabled currently in the files generated by acc (compiler of our DSL)
#define LMAGNETIC (1)
#define LENTROPY (1)
#define LTEMPERATURE (0)
#define AC_THERMAL_CONDUCTIVITY (AcReal(0.001)) // TODO: make an actual config parameter
/*
* =============================================================================
* Identifiers used to construct the parameter lists for AcMeshInfo
* (IntParamType and RealParamType)
* (user definable)
* =============================================================================
*/
// clang-format off
#define AC_FOR_INT_PARAM_TYPES(FUNC)\
/* cparams */\
FUNC(AC_nx), \
FUNC(AC_ny), \
FUNC(AC_nz), \
FUNC(AC_mx), \
FUNC(AC_my), \
FUNC(AC_mz), \
FUNC(AC_nx_min), \
FUNC(AC_ny_min), \
FUNC(AC_nz_min), \
FUNC(AC_nx_max), \
FUNC(AC_ny_max), \
FUNC(AC_nz_max), \
/* Other */\
FUNC(AC_max_steps), \
FUNC(AC_save_steps), \
FUNC(AC_bin_steps), \
FUNC(AC_bc_type), \
/* Additional */\
FUNC(AC_mxy),\
FUNC(AC_nxy),\
FUNC(AC_nxyz)
#define AC_FOR_REAL_PARAM_TYPES(FUNC)\
/* cparams */\
FUNC(AC_dsx), \
FUNC(AC_dsy), \
FUNC(AC_dsz), \
FUNC(AC_dsmin), \
/* physical grid*/\
FUNC(AC_xlen), \
FUNC(AC_ylen), \
FUNC(AC_zlen), \
FUNC(AC_xorig), \
FUNC(AC_yorig), \
FUNC(AC_zorig), \
/*Physical units*/\
FUNC(AC_unit_density),\
FUNC(AC_unit_velocity),\
FUNC(AC_unit_length),\
/* properties of gravitating star*/\
FUNC(AC_star_pos_x),\
FUNC(AC_star_pos_y),\
FUNC(AC_star_pos_z),\
FUNC(AC_M_star),\
/* Run params */\
FUNC(AC_cdt), \
FUNC(AC_cdtv), \
FUNC(AC_cdts), \
FUNC(AC_nu_visc), \
FUNC(AC_cs_sound), \
FUNC(AC_eta), \
FUNC(AC_mu0), \
FUNC(AC_relhel), \
FUNC(AC_cp_sound), \
FUNC(AC_gamma), \
FUNC(AC_cv_sound), \
FUNC(AC_lnT0), \
FUNC(AC_lnrho0), \
FUNC(AC_zeta), \
FUNC(AC_trans),\
/* Other */\
FUNC(AC_bin_save_t), \
/* Initial condition params */\
FUNC(AC_ampl_lnrho), \
FUNC(AC_ampl_uu), \
FUNC(AC_angl_uu), \
FUNC(AC_lnrho_edge),\
FUNC(AC_lnrho_out),\
/* Additional helper params */\
/* (deduced from other params do not set these directly!) */\
FUNC(AC_G_CONST),\
FUNC(AC_GM_star),\
FUNC(AC_sq2GM_star),\
FUNC(AC_cs2_sound), \
FUNC(AC_inv_dsx), \
FUNC(AC_inv_dsy), \
FUNC(AC_inv_dsz)
// clang-format on
/*
* =============================================================================
* Identifiers for VertexBufferHandle
* (i.e. the arrays used to construct AcMesh)
* (user definable)
* =============================================================================
*/
// clang-format off
#define AC_FOR_HYDRO_VTXBUF_HANDLES(FUNC)\
FUNC(VTXBUF_LNRHO), \
FUNC(VTXBUF_UUX), \
FUNC(VTXBUF_UUY), \
FUNC(VTXBUF_UUZ), \
// FUNC(VTXBUF_DYE),
#if LMAGNETIC
#define AC_FOR_MAGNETIC_VTXBUF_HANDLES(FUNC)\
FUNC(VTXBUF_AX), \
FUNC(VTXBUF_AY), \
FUNC(VTXBUF_AZ),
#else
#define AC_FOR_MAGNETIC_VTXBUF_HANDLES(FUNC)
#endif
#if LENTROPY
#define AC_FOR_ENTROPY_VTXBUF_HANDLES(FUNC)\
FUNC(VTXBUF_ENTROPY),
#else
#define AC_FOR_ENTROPY_VTXBUF_HANDLES(FUNC)
#endif
#if LTEMPERATURE
#define AC_FOR_TEMPERATURE_VTXBUF_HANDLES(FUNC)\
FUNC(VTXBUF_TEMPERATURE),
#else
#define AC_FOR_TEMPERATURE_VTXBUF_HANDLES(FUNC)
#endif
#define AC_FOR_VTXBUF_HANDLES(FUNC)\
AC_FOR_HYDRO_VTXBUF_HANDLES(FUNC)\
AC_FOR_MAGNETIC_VTXBUF_HANDLES(FUNC)\
AC_FOR_ENTROPY_VTXBUF_HANDLES(FUNC)\
AC_FOR_TEMPERATURE_VTXBUF_HANDLES(FUNC)
// clang-format on
/*
* =============================================================================
* Single/double precision switch
* =============================================================================
*/
#if AC_DOUBLE_PRECISION == 1
typedef double AcReal;
typedef double3 AcReal3;
#define AC_REAL_MAX (DBL_MAX)
#define AC_REAL_MIN (DBL_MIN)
#define AC_REAL_EPSILON (DBL_EPSILON)
#else
typedef float AcReal;
typedef float3 AcReal3;
#define AC_REAL_MAX (FLT_MAX)
#define AC_REAL_MIN (FLT_MIN)
#define AC_REAL_EPSILON (FLT_EPSILON)
#endif
typedef struct {
AcReal3 row[3];
} AcMatrix;
/*
* =============================================================================
* Helper macros
* =============================================================================
*/
#define AC_GEN_ID(X) X
#define AC_GEN_STR(X) #X
/*
* =============================================================================
* Error codes
* =============================================================================
*/
typedef enum { AC_SUCCESS = 0, AC_FAILURE = 1 } AcResult;
/*
* =============================================================================
* Reduction types
* =============================================================================
*/
typedef enum {
RTYPE_MAX,
RTYPE_MIN,
RTYPE_RMS,
RTYPE_RMS_EXP,
NUM_REDUCTION_TYPES
} ReductionType;
/*
* =============================================================================
* Definitions for the enums and structs for AcMeshInfo (DO NOT TOUCH)
* =============================================================================
*/
typedef enum {
AC_FOR_INT_PARAM_TYPES(AC_GEN_ID),
NUM_INT_PARAMS
} AcIntParam;
typedef enum {
AC_FOR_REAL_PARAM_TYPES(AC_GEN_ID),
NUM_REAL_PARAMS
} AcRealParam;
extern const char* intparam_names[]; // Defined in astaroth.cu
extern const char* realparam_names[]; // Defined in astaroth.cu
typedef struct {
int int_params[NUM_INT_PARAMS];
AcReal real_params[NUM_REAL_PARAMS];
} AcMeshInfo;
/*
* =============================================================================
* Definitions for the enums and structs for AcMesh (DO NOT TOUCH)
* =============================================================================
*/
typedef enum {
AC_FOR_VTXBUF_HANDLES(AC_GEN_ID) NUM_VTXBUF_HANDLES
} VertexBufferHandle;
extern const char* vtxbuf_names[]; // Defined in astaroth.cu
/*
typedef struct {
AcReal* data;
} VertexBuffer;
*/
// NOTE: there's no particular benefit declaring AcMesh a class, since
// a library user may already have allocated memory for the vertex_buffers.
// But then we would allocate memory again when the user wants to start
// filling the class with data. => Its better to consider AcMesh as a
// payload-only struct
typedef struct {
AcReal* vertex_buffer[NUM_VTXBUF_HANDLES];
AcMeshInfo info;
} AcMesh;
#define acVertexBufferSize(mesh_info) \
((size_t)(mesh_info.int_params[AC_mx] * mesh_info.int_params[AC_my] * \
mesh_info.int_params[AC_mz]))
#define acVertexBufferSizeBytes(mesh_info) \
(sizeof(AcReal) * acVertexBufferSize(mesh_info))
#define acVertexBufferCompdomainSize(mesh_info) \
(mesh_info.int_params[AC_nx] * mesh_info.int_params[AC_ny] * \
mesh_info.int_params[AC_nz])
#define acVertexBufferCompdomainSizeBytes(mesh_info) \
(sizeof(AcReal) * acVertexBufferCompdomainSize(mesh_info))
#define acVertexBufferIdx(i, j, k, mesh_info) \
((i) + (j)*mesh_info.int_params[AC_mx] + \
(k)*mesh_info.int_params[AC_mx] * mesh_info.int_params[AC_my])
/*
* =============================================================================
* Astaroth interface
* =============================================================================
*/
/** Starting point of all GPU computation. Handles the allocation and
initialization of *all memory needed on all GPUs in the node*. In other words,
setups everything GPU-side so that calling any other GPU interface function
afterwards does not result in illegal memory accesses. */
AcResult acInit(const AcMeshInfo& mesh_info);
/** Splits the host_mesh and distributes it among the GPUs in the node */
AcResult acLoad(const AcMesh& host_mesh);
AcResult acLoadWithOffset(const AcMesh& host_mesh, const int3& start, const int num_vertices);
/** Does all three steps of the RK3 integration and computes the boundary
conditions when necessary. Note that the boundary conditions are not applied
after the final integration step.
The result can be fetched to CPU memory with acStore(). */
AcResult acIntegrate(const AcReal& dt);
/** Performs a single RK3 step without computing boundary conditions. */
AcResult acIntegrateStep(const int& isubstep, const AcReal& dt);
/** Applies boundary conditions on the GPU meshs and communicates the
ghost zones among GPUs if necessary */
AcResult acBoundcondStep(void);
/** Performs a scalar reduction on all GPUs in the node and returns the result.
*/
AcReal acReduceScal(const ReductionType& rtype, const VertexBufferHandle& a);
/** Performs a vector reduction on all GPUs in the node and returns the result.
*/
AcReal acReduceVec(const ReductionType& rtype, const VertexBufferHandle& a,
const VertexBufferHandle& b, const VertexBufferHandle& c);
/** Stores the mesh distributed among GPUs of the node back to a single host
* mesh */
AcResult acStore(AcMesh* host_mesh);
AcResult acStoreWithOffset(const int3& start, const int num_vertices, AcMesh* host_mesh);
/** Frees all GPU allocations and resets all devices in the node. Should be
* called at exit. */
AcResult acQuit(void);
/** Synchronizes all devices. All calls to Astaroth are asynchronous by default
unless otherwise stated. */
AcResult acSynchronize(void);
/* End extern "C" */
#ifdef __cplusplus
}
#endif
/*
* =============================================================================
* Notes
* =============================================================================
*/
/*
typedef enum {
VTX_BUF_LNRHO,
VTX_BUF_UUX,
VTX_BUF_UUY,
VTX_BUF_UUZ,
NUM_VERTEX_BUFFER_HANDLES
} VertexBufferHandle
// LNRHO etc
typedef struct {
AcReal* data;
} VertexBuffer;
// Host
typedef struct {
VertexBuffer vertex_buffers[NUM_VERTEX_BUFFER_HANDLES];
MeshInfo info;
} Mesh;
// Device
typedef struct {
VertexBuffer in[NUM_VERTEX_BUFFER_HANDLES];
VertexBuffer out[NUM_VERTEX_BUFFER_HANDLES];
} VertexBufferArray;
*/