diff --git a/acc/mhd_solver/stencil_process.sps b/acc/mhd_solver/stencil_process.sps index 520ceb5..d85843d 100644 --- a/acc/mhd_solver/stencil_process.sps +++ b/acc/mhd_solver/stencil_process.sps @@ -2,6 +2,7 @@ #define LENTROPY (1) #define LTEMPERATURE (0) #define LGRAVITY (0) +#define LFORCING (1) // Declare uniforms (i.e. device constants) @@ -14,6 +15,10 @@ uniform Scalar eta; uniform Scalar gamma; uniform Scalar zeta; +uniform Scalar dsx; +uniform Scalar dsy; +uniform Scalar dsz; + uniform int nx_min; uniform int ny_min; uniform int nz_min; @@ -21,6 +26,7 @@ uniform int nx; uniform int ny; uniform int nz; + Vector value(in Vector uu) { @@ -49,12 +55,12 @@ momentum(in Vector uu, in Scalar lnrho, in Scalar ss, in Vector aa) { // Regex replace CPU constants with get\(AC_([a-zA-Z_0-9]*)\) // \1 - const Vector mom = - mul(gradients(uu), value(uu)) + const Vector mom = - mul(gradients(uu), value(uu)) - cs2 * ((Scalar(1.) / cp_sound) * gradient(ss) + gradient(lnrho)) + inv_rho * cross(j, B) + nu_visc * ( - laplace_vec(uu) - + Scalar(1. / 3.) * gradient_of_divergence(uu) + laplace_vec(uu) + + Scalar(1. / 3.) * gradient_of_divergence(uu) + Scalar(2.) * mul(S, gradient(lnrho)) ) + zeta * gradient_of_divergence(uu); @@ -159,7 +165,7 @@ entropy(in Scalar ss, in Vector uu, in Scalar lnrho, in Vector aa) { const Scalar inv_pT = Scalar(1.) / (exp(value(lnrho)) * exp(lnT(ss, lnrho))); const Vector j = (Scalar(1.) / mu0) * (gradient_of_divergence(aa) - laplace_vec(aa)); // Current density const Scalar RHS = H_CONST - C_CONST - + eta * (mu0) * dot(j, j) + + eta * (mu0) * dot(j, j) + Scalar(2.) * exp(value(lnrho)) * nu_visc * contract(S) + zeta * exp(value(lnrho)) * divergence(uu) * divergence(uu); @@ -179,6 +185,27 @@ heat_transfer(in Vector uu, in Scalar lnrho, in Scalar tt) } #endif +Vector +forcing(int3 globalVertexIdx) +{ + #if LFORCING + Vector a = Scalar(.5) * (Vector){globalGrid.n.x * dsx, + globalGrid.n.y * dsy, + globalGrid.n.z * dsz}; // source (origin) + Vector b = (Vector){(globalVertexIdx.x - nx_min) * dsx, + (globalVertexIdx.y - ny_min) * dsy, + (globalVertexIdx.z - nz_min) * dsz}; // sink (current index) + Vector dir = normalized(b - a); + if (is_valid(dir)) { + return Scalar(1.) * dir; + } else { + return (Vector){0, 0, 0}; + } + #else + return (Vector){0, 0, 0}; + #endif // LFORCING +} + // Declare input and output arrays using locations specified in the // array enum in astaroth.h in Scalar lnrho = VTXBUF_LNRHO; @@ -212,54 +239,12 @@ solve(Scalar dt) { #endif #if LENTROPY - out_uu = rk3(out_uu, uu, momentum(uu, lnrho, ss, aa), dt); + out_uu = rk3(out_uu, uu, momentum(uu, lnrho, ss, aa) + forcing(globalVertexIdx), dt); out_ss = rk3(out_ss, ss, entropy(ss, uu, lnrho, aa), dt); #elif LTEMPERATURE out_uu =rk3(out_uu, uu, momentum(uu, lnrho, tt), dt); out_tt = rk3(out_tt, tt, heat_transfer(uu, lnrho, tt), dt); #else out_uu = rk3(out_uu, uu, momentum(uu, lnrho), dt); - #endif + #endif } - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/src/core/astaroth.cu b/src/core/astaroth.cu index 5ebf83f..58b551b 100644 --- a/src/core/astaroth.cu +++ b/src/core/astaroth.cu @@ -40,11 +40,6 @@ static const int MAX_NUM_DEVICES = 32; static int num_devices = 1; static Device devices[MAX_NUM_DEVICES] = {}; -typedef struct { - int3 m; - int3 n; -} Grid; - static Grid createGrid(const AcMeshInfo& config) { @@ -132,6 +127,7 @@ acInit(const AcMeshInfo& config) // Initialize the devices for (int i = 0; i < num_devices; ++i) { createDevice(i, subgrid_config, &devices[i]); + loadGlobalGrid(devices[i], grid); printDeviceInfo(devices[i]); } return AC_SUCCESS; diff --git a/src/core/device.cu b/src/core/device.cu index a18ac54..1ef2648 100644 --- a/src/core/device.cu +++ b/src/core/device.cu @@ -35,6 +35,7 @@ typedef struct { __constant__ AcMeshInfo d_mesh_info; __constant__ int3 d_multigpu_offset; +__constant__ Grid globalGrid; #define DCONST_INT(X) (d_mesh_info.int_params[X]) #define DCONST_REAL(X) (d_mesh_info.real_params[X]) #define DEVICE_VTXBUF_IDX(i, j, k) ((i) + (j)*DCONST_INT(AC_mx) + (k)*DCONST_INT(AC_mxy)) @@ -377,3 +378,12 @@ loadDeviceConstant(const Device device, const AcRealParam param, const AcReal va offset, cudaMemcpyHostToDevice)); return AC_SUCCESS; } + +AcResult +loadGlobalGrid(const Device device, const Grid grid) +{ + cudaSetDevice(device->id); + ERRCHK_CUDA_ALWAYS(cudaMemcpyToSymbol(globalGrid, &grid, sizeof(grid), + 0, cudaMemcpyHostToDevice)); + return AC_SUCCESS; +} diff --git a/src/core/device.cuh b/src/core/device.cuh index 4286549..35d5209 100644 --- a/src/core/device.cuh +++ b/src/core/device.cuh @@ -34,6 +34,11 @@ typedef enum { STREAM_ALL } StreamType; +typedef struct { + int3 m; + int3 n; +} Grid; + typedef struct device_s* Device; // Opaque pointer to device_s. Analogous to dispatchable handles // in Vulkan, f.ex. VkDevice @@ -92,3 +97,6 @@ AcResult loadDeviceConstant(const Device device, const AcIntParam param, const i /** */ AcResult loadDeviceConstant(const Device device, const AcRealParam param, const AcReal value); + +/** */ +AcResult loadGlobalGrid(const Device device, const Grid grid); diff --git a/src/core/kernels/kernels.cuh b/src/core/kernels/kernels.cuh index 886d513..4028d8f 100644 --- a/src/core/kernels/kernels.cuh +++ b/src/core/kernels/kernels.cuh @@ -727,6 +727,9 @@ read_out(const int idx, AcReal* __restrict__ field[], const int3 handle) const int3 vertexIdx = (int3){threadIdx.x + blockIdx.x * blockDim.x + start.x,\ threadIdx.y + blockIdx.y * blockDim.y + start.y,\ threadIdx.z + blockIdx.z * blockDim.z + start.z};\ + const int3 globalVertexIdx = (int3){d_multigpu_offset.x + vertexIdx.x, \ + d_multigpu_offset.y + vertexIdx.y, \ + d_multigpu_offset.z + vertexIdx.z}; \ if (vertexIdx.x >= end.x || vertexIdx.y >= end.y || vertexIdx.z >= end.z)\ return;\ \