Added the machinery for implementing forcing with the DSL on multiple GPUs and a simple model solution
This commit is contained in:
@@ -40,11 +40,6 @@ static const int MAX_NUM_DEVICES = 32;
|
||||
static int num_devices = 1;
|
||||
static Device devices[MAX_NUM_DEVICES] = {};
|
||||
|
||||
typedef struct {
|
||||
int3 m;
|
||||
int3 n;
|
||||
} Grid;
|
||||
|
||||
static Grid
|
||||
createGrid(const AcMeshInfo& config)
|
||||
{
|
||||
@@ -132,6 +127,7 @@ acInit(const AcMeshInfo& config)
|
||||
// Initialize the devices
|
||||
for (int i = 0; i < num_devices; ++i) {
|
||||
createDevice(i, subgrid_config, &devices[i]);
|
||||
loadGlobalGrid(devices[i], grid);
|
||||
printDeviceInfo(devices[i]);
|
||||
}
|
||||
return AC_SUCCESS;
|
||||
|
||||
@@ -35,6 +35,7 @@ typedef struct {
|
||||
|
||||
__constant__ AcMeshInfo d_mesh_info;
|
||||
__constant__ int3 d_multigpu_offset;
|
||||
__constant__ Grid globalGrid;
|
||||
#define DCONST_INT(X) (d_mesh_info.int_params[X])
|
||||
#define DCONST_REAL(X) (d_mesh_info.real_params[X])
|
||||
#define DEVICE_VTXBUF_IDX(i, j, k) ((i) + (j)*DCONST_INT(AC_mx) + (k)*DCONST_INT(AC_mxy))
|
||||
@@ -377,3 +378,12 @@ loadDeviceConstant(const Device device, const AcRealParam param, const AcReal va
|
||||
offset, cudaMemcpyHostToDevice));
|
||||
return AC_SUCCESS;
|
||||
}
|
||||
|
||||
AcResult
|
||||
loadGlobalGrid(const Device device, const Grid grid)
|
||||
{
|
||||
cudaSetDevice(device->id);
|
||||
ERRCHK_CUDA_ALWAYS(cudaMemcpyToSymbol(globalGrid, &grid, sizeof(grid),
|
||||
0, cudaMemcpyHostToDevice));
|
||||
return AC_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -34,6 +34,11 @@ typedef enum {
|
||||
STREAM_ALL
|
||||
} StreamType;
|
||||
|
||||
typedef struct {
|
||||
int3 m;
|
||||
int3 n;
|
||||
} Grid;
|
||||
|
||||
typedef struct device_s* Device; // Opaque pointer to device_s. Analogous to dispatchable handles
|
||||
// in Vulkan, f.ex. VkDevice
|
||||
|
||||
@@ -92,3 +97,6 @@ AcResult loadDeviceConstant(const Device device, const AcIntParam param, const i
|
||||
|
||||
/** */
|
||||
AcResult loadDeviceConstant(const Device device, const AcRealParam param, const AcReal value);
|
||||
|
||||
/** */
|
||||
AcResult loadGlobalGrid(const Device device, const Grid grid);
|
||||
|
||||
@@ -727,6 +727,9 @@ read_out(const int idx, AcReal* __restrict__ field[], const int3 handle)
|
||||
const int3 vertexIdx = (int3){threadIdx.x + blockIdx.x * blockDim.x + start.x,\
|
||||
threadIdx.y + blockIdx.y * blockDim.y + start.y,\
|
||||
threadIdx.z + blockIdx.z * blockDim.z + start.z};\
|
||||
const int3 globalVertexIdx = (int3){d_multigpu_offset.x + vertexIdx.x, \
|
||||
d_multigpu_offset.y + vertexIdx.y, \
|
||||
d_multigpu_offset.z + vertexIdx.z}; \
|
||||
if (vertexIdx.x >= end.x || vertexIdx.y >= end.y || vertexIdx.z >= end.z)\
|
||||
return;\
|
||||
\
|
||||
|
||||
Reference in New Issue
Block a user