Added the machinery for implementing forcing with the DSL on multiple GPUs and a simple model solution

This commit is contained in:
jpekkila
2019-06-18 16:13:32 +03:00
parent 57e2e48fb0
commit 4ca4dbefdf
5 changed files with 55 additions and 53 deletions

View File

@@ -2,6 +2,7 @@
#define LENTROPY (1)
#define LTEMPERATURE (0)
#define LGRAVITY (0)
#define LFORCING (1)
// Declare uniforms (i.e. device constants)
@@ -14,6 +15,10 @@ uniform Scalar eta;
uniform Scalar gamma;
uniform Scalar zeta;
uniform Scalar dsx;
uniform Scalar dsy;
uniform Scalar dsz;
uniform int nx_min;
uniform int ny_min;
uniform int nz_min;
@@ -21,6 +26,7 @@ uniform int nx;
uniform int ny;
uniform int nz;
Vector
value(in Vector uu)
{
@@ -49,12 +55,12 @@ momentum(in Vector uu, in Scalar lnrho, in Scalar ss, in Vector aa) {
// Regex replace CPU constants with get\(AC_([a-zA-Z_0-9]*)\)
// \1
const Vector mom = - mul(gradients(uu), value(uu))
const Vector mom = - mul(gradients(uu), value(uu))
- cs2 * ((Scalar(1.) / cp_sound) * gradient(ss) + gradient(lnrho))
+ inv_rho * cross(j, B)
+ nu_visc * (
laplace_vec(uu)
+ Scalar(1. / 3.) * gradient_of_divergence(uu)
laplace_vec(uu)
+ Scalar(1. / 3.) * gradient_of_divergence(uu)
+ Scalar(2.) * mul(S, gradient(lnrho))
)
+ zeta * gradient_of_divergence(uu);
@@ -159,7 +165,7 @@ entropy(in Scalar ss, in Vector uu, in Scalar lnrho, in Vector aa) {
const Scalar inv_pT = Scalar(1.) / (exp(value(lnrho)) * exp(lnT(ss, lnrho)));
const Vector j = (Scalar(1.) / mu0) * (gradient_of_divergence(aa) - laplace_vec(aa)); // Current density
const Scalar RHS = H_CONST - C_CONST
+ eta * (mu0) * dot(j, j)
+ eta * (mu0) * dot(j, j)
+ Scalar(2.) * exp(value(lnrho)) * nu_visc * contract(S)
+ zeta * exp(value(lnrho)) * divergence(uu) * divergence(uu);
@@ -179,6 +185,27 @@ heat_transfer(in Vector uu, in Scalar lnrho, in Scalar tt)
}
#endif
Vector
forcing(int3 globalVertexIdx)
{
#if LFORCING
Vector a = Scalar(.5) * (Vector){globalGrid.n.x * dsx,
globalGrid.n.y * dsy,
globalGrid.n.z * dsz}; // source (origin)
Vector b = (Vector){(globalVertexIdx.x - nx_min) * dsx,
(globalVertexIdx.y - ny_min) * dsy,
(globalVertexIdx.z - nz_min) * dsz}; // sink (current index)
Vector dir = normalized(b - a);
if (is_valid(dir)) {
return Scalar(1.) * dir;
} else {
return (Vector){0, 0, 0};
}
#else
return (Vector){0, 0, 0};
#endif // LFORCING
}
// Declare input and output arrays using locations specified in the
// array enum in astaroth.h
in Scalar lnrho = VTXBUF_LNRHO;
@@ -212,54 +239,12 @@ solve(Scalar dt) {
#endif
#if LENTROPY
out_uu = rk3(out_uu, uu, momentum(uu, lnrho, ss, aa), dt);
out_uu = rk3(out_uu, uu, momentum(uu, lnrho, ss, aa) + forcing(globalVertexIdx), dt);
out_ss = rk3(out_ss, ss, entropy(ss, uu, lnrho, aa), dt);
#elif LTEMPERATURE
out_uu =rk3(out_uu, uu, momentum(uu, lnrho, tt), dt);
out_tt = rk3(out_tt, tt, heat_transfer(uu, lnrho, tt), dt);
#else
out_uu = rk3(out_uu, uu, momentum(uu, lnrho), dt);
#endif
#endif
}

View File

@@ -40,11 +40,6 @@ static const int MAX_NUM_DEVICES = 32;
static int num_devices = 1;
static Device devices[MAX_NUM_DEVICES] = {};
typedef struct {
int3 m;
int3 n;
} Grid;
static Grid
createGrid(const AcMeshInfo& config)
{
@@ -132,6 +127,7 @@ acInit(const AcMeshInfo& config)
// Initialize the devices
for (int i = 0; i < num_devices; ++i) {
createDevice(i, subgrid_config, &devices[i]);
loadGlobalGrid(devices[i], grid);
printDeviceInfo(devices[i]);
}
return AC_SUCCESS;

View File

@@ -35,6 +35,7 @@ typedef struct {
__constant__ AcMeshInfo d_mesh_info;
__constant__ int3 d_multigpu_offset;
__constant__ Grid globalGrid;
#define DCONST_INT(X) (d_mesh_info.int_params[X])
#define DCONST_REAL(X) (d_mesh_info.real_params[X])
#define DEVICE_VTXBUF_IDX(i, j, k) ((i) + (j)*DCONST_INT(AC_mx) + (k)*DCONST_INT(AC_mxy))
@@ -377,3 +378,12 @@ loadDeviceConstant(const Device device, const AcRealParam param, const AcReal va
offset, cudaMemcpyHostToDevice));
return AC_SUCCESS;
}
AcResult
loadGlobalGrid(const Device device, const Grid grid)
{
cudaSetDevice(device->id);
ERRCHK_CUDA_ALWAYS(cudaMemcpyToSymbol(globalGrid, &grid, sizeof(grid),
0, cudaMemcpyHostToDevice));
return AC_SUCCESS;
}

View File

@@ -34,6 +34,11 @@ typedef enum {
STREAM_ALL
} StreamType;
typedef struct {
int3 m;
int3 n;
} Grid;
typedef struct device_s* Device; // Opaque pointer to device_s. Analogous to dispatchable handles
// in Vulkan, f.ex. VkDevice
@@ -92,3 +97,6 @@ AcResult loadDeviceConstant(const Device device, const AcIntParam param, const i
/** */
AcResult loadDeviceConstant(const Device device, const AcRealParam param, const AcReal value);
/** */
AcResult loadGlobalGrid(const Device device, const Grid grid);

View File

@@ -727,6 +727,9 @@ read_out(const int idx, AcReal* __restrict__ field[], const int3 handle)
const int3 vertexIdx = (int3){threadIdx.x + blockIdx.x * blockDim.x + start.x,\
threadIdx.y + blockIdx.y * blockDim.y + start.y,\
threadIdx.z + blockIdx.z * blockDim.z + start.z};\
const int3 globalVertexIdx = (int3){d_multigpu_offset.x + vertexIdx.x, \
d_multigpu_offset.y + vertexIdx.y, \
d_multigpu_offset.z + vertexIdx.z}; \
if (vertexIdx.x >= end.x || vertexIdx.y >= end.y || vertexIdx.z >= end.z)\
return;\
\