Added the machinery for implementing forcing with the DSL on multiple GPUs and a simple model solution
This commit is contained in:
@@ -2,6 +2,7 @@
|
|||||||
#define LENTROPY (1)
|
#define LENTROPY (1)
|
||||||
#define LTEMPERATURE (0)
|
#define LTEMPERATURE (0)
|
||||||
#define LGRAVITY (0)
|
#define LGRAVITY (0)
|
||||||
|
#define LFORCING (1)
|
||||||
|
|
||||||
|
|
||||||
// Declare uniforms (i.e. device constants)
|
// Declare uniforms (i.e. device constants)
|
||||||
@@ -14,6 +15,10 @@ uniform Scalar eta;
|
|||||||
uniform Scalar gamma;
|
uniform Scalar gamma;
|
||||||
uniform Scalar zeta;
|
uniform Scalar zeta;
|
||||||
|
|
||||||
|
uniform Scalar dsx;
|
||||||
|
uniform Scalar dsy;
|
||||||
|
uniform Scalar dsz;
|
||||||
|
|
||||||
uniform int nx_min;
|
uniform int nx_min;
|
||||||
uniform int ny_min;
|
uniform int ny_min;
|
||||||
uniform int nz_min;
|
uniform int nz_min;
|
||||||
@@ -21,6 +26,7 @@ uniform int nx;
|
|||||||
uniform int ny;
|
uniform int ny;
|
||||||
uniform int nz;
|
uniform int nz;
|
||||||
|
|
||||||
|
|
||||||
Vector
|
Vector
|
||||||
value(in Vector uu)
|
value(in Vector uu)
|
||||||
{
|
{
|
||||||
@@ -179,6 +185,27 @@ heat_transfer(in Vector uu, in Scalar lnrho, in Scalar tt)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
Vector
|
||||||
|
forcing(int3 globalVertexIdx)
|
||||||
|
{
|
||||||
|
#if LFORCING
|
||||||
|
Vector a = Scalar(.5) * (Vector){globalGrid.n.x * dsx,
|
||||||
|
globalGrid.n.y * dsy,
|
||||||
|
globalGrid.n.z * dsz}; // source (origin)
|
||||||
|
Vector b = (Vector){(globalVertexIdx.x - nx_min) * dsx,
|
||||||
|
(globalVertexIdx.y - ny_min) * dsy,
|
||||||
|
(globalVertexIdx.z - nz_min) * dsz}; // sink (current index)
|
||||||
|
Vector dir = normalized(b - a);
|
||||||
|
if (is_valid(dir)) {
|
||||||
|
return Scalar(1.) * dir;
|
||||||
|
} else {
|
||||||
|
return (Vector){0, 0, 0};
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
return (Vector){0, 0, 0};
|
||||||
|
#endif // LFORCING
|
||||||
|
}
|
||||||
|
|
||||||
// Declare input and output arrays using locations specified in the
|
// Declare input and output arrays using locations specified in the
|
||||||
// array enum in astaroth.h
|
// array enum in astaroth.h
|
||||||
in Scalar lnrho = VTXBUF_LNRHO;
|
in Scalar lnrho = VTXBUF_LNRHO;
|
||||||
@@ -212,7 +239,7 @@ solve(Scalar dt) {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if LENTROPY
|
#if LENTROPY
|
||||||
out_uu = rk3(out_uu, uu, momentum(uu, lnrho, ss, aa), dt);
|
out_uu = rk3(out_uu, uu, momentum(uu, lnrho, ss, aa) + forcing(globalVertexIdx), dt);
|
||||||
out_ss = rk3(out_ss, ss, entropy(ss, uu, lnrho, aa), dt);
|
out_ss = rk3(out_ss, ss, entropy(ss, uu, lnrho, aa), dt);
|
||||||
#elif LTEMPERATURE
|
#elif LTEMPERATURE
|
||||||
out_uu =rk3(out_uu, uu, momentum(uu, lnrho, tt), dt);
|
out_uu =rk3(out_uu, uu, momentum(uu, lnrho, tt), dt);
|
||||||
@@ -221,45 +248,3 @@ solve(Scalar dt) {
|
|||||||
out_uu = rk3(out_uu, uu, momentum(uu, lnrho), dt);
|
out_uu = rk3(out_uu, uu, momentum(uu, lnrho), dt);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@@ -40,11 +40,6 @@ static const int MAX_NUM_DEVICES = 32;
|
|||||||
static int num_devices = 1;
|
static int num_devices = 1;
|
||||||
static Device devices[MAX_NUM_DEVICES] = {};
|
static Device devices[MAX_NUM_DEVICES] = {};
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
int3 m;
|
|
||||||
int3 n;
|
|
||||||
} Grid;
|
|
||||||
|
|
||||||
static Grid
|
static Grid
|
||||||
createGrid(const AcMeshInfo& config)
|
createGrid(const AcMeshInfo& config)
|
||||||
{
|
{
|
||||||
@@ -132,6 +127,7 @@ acInit(const AcMeshInfo& config)
|
|||||||
// Initialize the devices
|
// Initialize the devices
|
||||||
for (int i = 0; i < num_devices; ++i) {
|
for (int i = 0; i < num_devices; ++i) {
|
||||||
createDevice(i, subgrid_config, &devices[i]);
|
createDevice(i, subgrid_config, &devices[i]);
|
||||||
|
loadGlobalGrid(devices[i], grid);
|
||||||
printDeviceInfo(devices[i]);
|
printDeviceInfo(devices[i]);
|
||||||
}
|
}
|
||||||
return AC_SUCCESS;
|
return AC_SUCCESS;
|
||||||
|
@@ -35,6 +35,7 @@ typedef struct {
|
|||||||
|
|
||||||
__constant__ AcMeshInfo d_mesh_info;
|
__constant__ AcMeshInfo d_mesh_info;
|
||||||
__constant__ int3 d_multigpu_offset;
|
__constant__ int3 d_multigpu_offset;
|
||||||
|
__constant__ Grid globalGrid;
|
||||||
#define DCONST_INT(X) (d_mesh_info.int_params[X])
|
#define DCONST_INT(X) (d_mesh_info.int_params[X])
|
||||||
#define DCONST_REAL(X) (d_mesh_info.real_params[X])
|
#define DCONST_REAL(X) (d_mesh_info.real_params[X])
|
||||||
#define DEVICE_VTXBUF_IDX(i, j, k) ((i) + (j)*DCONST_INT(AC_mx) + (k)*DCONST_INT(AC_mxy))
|
#define DEVICE_VTXBUF_IDX(i, j, k) ((i) + (j)*DCONST_INT(AC_mx) + (k)*DCONST_INT(AC_mxy))
|
||||||
@@ -377,3 +378,12 @@ loadDeviceConstant(const Device device, const AcRealParam param, const AcReal va
|
|||||||
offset, cudaMemcpyHostToDevice));
|
offset, cudaMemcpyHostToDevice));
|
||||||
return AC_SUCCESS;
|
return AC_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
AcResult
|
||||||
|
loadGlobalGrid(const Device device, const Grid grid)
|
||||||
|
{
|
||||||
|
cudaSetDevice(device->id);
|
||||||
|
ERRCHK_CUDA_ALWAYS(cudaMemcpyToSymbol(globalGrid, &grid, sizeof(grid),
|
||||||
|
0, cudaMemcpyHostToDevice));
|
||||||
|
return AC_SUCCESS;
|
||||||
|
}
|
||||||
|
@@ -34,6 +34,11 @@ typedef enum {
|
|||||||
STREAM_ALL
|
STREAM_ALL
|
||||||
} StreamType;
|
} StreamType;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
int3 m;
|
||||||
|
int3 n;
|
||||||
|
} Grid;
|
||||||
|
|
||||||
typedef struct device_s* Device; // Opaque pointer to device_s. Analogous to dispatchable handles
|
typedef struct device_s* Device; // Opaque pointer to device_s. Analogous to dispatchable handles
|
||||||
// in Vulkan, f.ex. VkDevice
|
// in Vulkan, f.ex. VkDevice
|
||||||
|
|
||||||
@@ -92,3 +97,6 @@ AcResult loadDeviceConstant(const Device device, const AcIntParam param, const i
|
|||||||
|
|
||||||
/** */
|
/** */
|
||||||
AcResult loadDeviceConstant(const Device device, const AcRealParam param, const AcReal value);
|
AcResult loadDeviceConstant(const Device device, const AcRealParam param, const AcReal value);
|
||||||
|
|
||||||
|
/** */
|
||||||
|
AcResult loadGlobalGrid(const Device device, const Grid grid);
|
||||||
|
@@ -727,6 +727,9 @@ read_out(const int idx, AcReal* __restrict__ field[], const int3 handle)
|
|||||||
const int3 vertexIdx = (int3){threadIdx.x + blockIdx.x * blockDim.x + start.x,\
|
const int3 vertexIdx = (int3){threadIdx.x + blockIdx.x * blockDim.x + start.x,\
|
||||||
threadIdx.y + blockIdx.y * blockDim.y + start.y,\
|
threadIdx.y + blockIdx.y * blockDim.y + start.y,\
|
||||||
threadIdx.z + blockIdx.z * blockDim.z + start.z};\
|
threadIdx.z + blockIdx.z * blockDim.z + start.z};\
|
||||||
|
const int3 globalVertexIdx = (int3){d_multigpu_offset.x + vertexIdx.x, \
|
||||||
|
d_multigpu_offset.y + vertexIdx.y, \
|
||||||
|
d_multigpu_offset.z + vertexIdx.z}; \
|
||||||
if (vertexIdx.x >= end.x || vertexIdx.y >= end.y || vertexIdx.z >= end.z)\
|
if (vertexIdx.x >= end.x || vertexIdx.y >= end.y || vertexIdx.z >= end.z)\
|
||||||
return;\
|
return;\
|
||||||
\
|
\
|
||||||
|
Reference in New Issue
Block a user