Removed everything unnecessary from integration.cuh. Now all derivatives etc are available in a standard library header (acc/stdlib/stdderiv.h)
This commit is contained in:
@@ -25,7 +25,6 @@
|
|||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "src/core/math_utils.h"
|
#include "src/core/math_utils.h"
|
||||||
|
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
@@ -51,387 +50,6 @@ IDX(const int3 idx)
|
|||||||
#define make_int3(a, b, c) \
|
#define make_int3(a, b, c) \
|
||||||
(int3) { (int)a, (int)b, (int)c }
|
(int3) { (int)a, (int)b, (int)c }
|
||||||
|
|
||||||
static __forceinline__ AcMatrix
|
|
||||||
create_rotz(const AcReal radians)
|
|
||||||
{
|
|
||||||
AcMatrix mat;
|
|
||||||
|
|
||||||
mat.row[0] = (AcReal3){cos(radians), -sin(radians), 0};
|
|
||||||
mat.row[1] = (AcReal3){sin(radians), cos(radians), 0};
|
|
||||||
mat.row[2] = (AcReal3){0, 0, 0};
|
|
||||||
|
|
||||||
return mat;
|
|
||||||
}
|
|
||||||
|
|
||||||
#if AC_DOUBLE_PRECISION == 0
|
|
||||||
/*
|
|
||||||
// Fast but inaccurate
|
|
||||||
#define sin __sinf
|
|
||||||
#define cos __cosf
|
|
||||||
#define exp __expf
|
|
||||||
*/
|
|
||||||
//#define sin sinf
|
|
||||||
//#define cos cosf
|
|
||||||
//#define exp expf
|
|
||||||
//#define rsqrt rsqrtf // hardware reciprocal sqrt
|
|
||||||
#endif // AC_DOUBLE_PRECISION == 0
|
|
||||||
|
|
||||||
/*
|
|
||||||
* =============================================================================
|
|
||||||
* Level 0 (Input Assembly Stage)
|
|
||||||
* =============================================================================
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* =============================================================================
|
|
||||||
* Level 0.1 (Read stencil elements and solve derivatives)
|
|
||||||
* =============================================================================
|
|
||||||
*/
|
|
||||||
static __device__ __forceinline__ AcReal
|
|
||||||
first_derivative(const AcReal* __restrict__ pencil, const AcReal inv_ds)
|
|
||||||
{
|
|
||||||
#if STENCIL_ORDER == 2
|
|
||||||
const AcReal coefficients[] = {0, 1.0 / 2.0};
|
|
||||||
#elif STENCIL_ORDER == 4
|
|
||||||
const AcReal coefficients[] = {0, 2.0 / 3.0, -1.0 / 12.0};
|
|
||||||
#elif STENCIL_ORDER == 6
|
|
||||||
const AcReal coefficients[] = {0, 3.0 / 4.0, -3.0 / 20.0, 1.0 / 60.0};
|
|
||||||
#elif STENCIL_ORDER == 8
|
|
||||||
const AcReal coefficients[] = {0, 4.0 / 5.0, -1.0 / 5.0, 4.0 / 105.0, -1.0 / 280.0};
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define MID (STENCIL_ORDER / 2)
|
|
||||||
AcReal res = 0;
|
|
||||||
|
|
||||||
#pragma unroll
|
|
||||||
for (int i = 1; i <= MID; ++i)
|
|
||||||
res += coefficients[i] * (pencil[MID + i] - pencil[MID - i]);
|
|
||||||
|
|
||||||
return res * inv_ds;
|
|
||||||
}
|
|
||||||
|
|
||||||
static __device__ __forceinline__ AcReal
|
|
||||||
second_derivative(const AcReal* __restrict__ pencil, const AcReal inv_ds)
|
|
||||||
{
|
|
||||||
#if STENCIL_ORDER == 2
|
|
||||||
const AcReal coefficients[] = {-2., 1.};
|
|
||||||
#elif STENCIL_ORDER == 4
|
|
||||||
const AcReal coefficients[] = {-5.0 / 2.0, 4.0 / 3.0, -1.0 / 12.0};
|
|
||||||
#elif STENCIL_ORDER == 6
|
|
||||||
const AcReal coefficients[] = {-49.0 / 18.0, 3.0 / 2.0, -3.0 / 20.0, 1.0 / 90.0};
|
|
||||||
#elif STENCIL_ORDER == 8
|
|
||||||
const AcReal coefficients[] = {-205.0 / 72.0, 8.0 / 5.0, -1.0 / 5.0, 8.0 / 315.0, -1.0 / 560.0};
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define MID (STENCIL_ORDER / 2)
|
|
||||||
AcReal res = coefficients[0] * pencil[MID];
|
|
||||||
|
|
||||||
#pragma unroll
|
|
||||||
for (int i = 1; i <= MID; ++i)
|
|
||||||
res += coefficients[i] * (pencil[MID + i] + pencil[MID - i]);
|
|
||||||
|
|
||||||
return res * inv_ds * inv_ds;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** inv_ds: inverted mesh spacing f.ex. 1. / mesh.int_params[AC_dsx] */
|
|
||||||
static __device__ __forceinline__ AcReal
|
|
||||||
cross_derivative(const AcReal* __restrict__ pencil_a, const AcReal* __restrict__ pencil_b,
|
|
||||||
const AcReal inv_ds_a, const AcReal inv_ds_b)
|
|
||||||
{
|
|
||||||
#if STENCIL_ORDER == 2
|
|
||||||
const AcReal coefficients[] = {0, 1.0 / 4.0};
|
|
||||||
#elif STENCIL_ORDER == 4
|
|
||||||
const AcReal coefficients[] = {
|
|
||||||
0, 1.0 / 32.0, 1.0 / 64.0}; // TODO correct coefficients, these are just placeholders
|
|
||||||
#elif STENCIL_ORDER == 6
|
|
||||||
const AcReal fac = (1. / 720.);
|
|
||||||
const AcReal coefficients[] = {0.0 * fac, 270.0 * fac, -27.0 * fac, 2.0 * fac};
|
|
||||||
#elif STENCIL_ORDER == 8
|
|
||||||
const AcReal fac = (1. / 20160.);
|
|
||||||
const AcReal coefficients[] = {0.0 * fac, 8064. * fac, -1008. * fac, 128. * fac, -9. * fac};
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define MID (STENCIL_ORDER / 2)
|
|
||||||
AcReal res = AcReal(0.);
|
|
||||||
|
|
||||||
#pragma unroll
|
|
||||||
for (int i = 1; i <= MID; ++i) {
|
|
||||||
res += coefficients[i] *
|
|
||||||
(pencil_a[MID + i] + pencil_a[MID - i] - pencil_b[MID + i] - pencil_b[MID - i]);
|
|
||||||
}
|
|
||||||
return res * inv_ds_a * inv_ds_b;
|
|
||||||
}
|
|
||||||
|
|
||||||
static __device__ __forceinline__ AcReal
|
|
||||||
derx(const int3 vertexIdx, const AcReal* __restrict__ arr)
|
|
||||||
{
|
|
||||||
AcReal pencil[STENCIL_ORDER + 1];
|
|
||||||
#pragma unroll
|
|
||||||
for (int offset = 0; offset < STENCIL_ORDER + 1; ++offset)
|
|
||||||
pencil[offset] = arr[IDX(vertexIdx.x + offset - STENCIL_ORDER / 2, vertexIdx.y,
|
|
||||||
vertexIdx.z)];
|
|
||||||
|
|
||||||
return first_derivative(pencil, DCONST_REAL(AC_inv_dsx));
|
|
||||||
}
|
|
||||||
|
|
||||||
static __device__ __forceinline__ AcReal
|
|
||||||
derxx(const int3 vertexIdx, const AcReal* __restrict__ arr)
|
|
||||||
{
|
|
||||||
AcReal pencil[STENCIL_ORDER + 1];
|
|
||||||
#pragma unroll
|
|
||||||
for (int offset = 0; offset < STENCIL_ORDER + 1; ++offset)
|
|
||||||
pencil[offset] = arr[IDX(vertexIdx.x + offset - STENCIL_ORDER / 2, vertexIdx.y,
|
|
||||||
vertexIdx.z)];
|
|
||||||
|
|
||||||
return second_derivative(pencil, DCONST_REAL(AC_inv_dsx));
|
|
||||||
}
|
|
||||||
|
|
||||||
static __device__ __forceinline__ AcReal
|
|
||||||
derxy(const int3 vertexIdx, const AcReal* __restrict__ arr)
|
|
||||||
{
|
|
||||||
AcReal pencil_a[STENCIL_ORDER + 1];
|
|
||||||
#pragma unroll
|
|
||||||
for (int offset = 0; offset < STENCIL_ORDER + 1; ++offset)
|
|
||||||
pencil_a[offset] = arr[IDX(vertexIdx.x + offset - STENCIL_ORDER / 2,
|
|
||||||
vertexIdx.y + offset - STENCIL_ORDER / 2, vertexIdx.z)];
|
|
||||||
|
|
||||||
AcReal pencil_b[STENCIL_ORDER + 1];
|
|
||||||
#pragma unroll
|
|
||||||
for (int offset = 0; offset < STENCIL_ORDER + 1; ++offset)
|
|
||||||
pencil_b[offset] = arr[IDX(vertexIdx.x + offset - STENCIL_ORDER / 2,
|
|
||||||
vertexIdx.y + STENCIL_ORDER / 2 - offset, vertexIdx.z)];
|
|
||||||
|
|
||||||
return cross_derivative(pencil_a, pencil_b, DCONST_REAL(AC_inv_dsx), DCONST_REAL(AC_inv_dsy));
|
|
||||||
}
|
|
||||||
|
|
||||||
static __device__ __forceinline__ AcReal
|
|
||||||
derxz(const int3 vertexIdx, const AcReal* __restrict__ arr)
|
|
||||||
{
|
|
||||||
AcReal pencil_a[STENCIL_ORDER + 1];
|
|
||||||
#pragma unroll
|
|
||||||
for (int offset = 0; offset < STENCIL_ORDER + 1; ++offset)
|
|
||||||
pencil_a[offset] = arr[IDX(vertexIdx.x + offset - STENCIL_ORDER / 2, vertexIdx.y,
|
|
||||||
vertexIdx.z + offset - STENCIL_ORDER / 2)];
|
|
||||||
|
|
||||||
AcReal pencil_b[STENCIL_ORDER + 1];
|
|
||||||
#pragma unroll
|
|
||||||
for (int offset = 0; offset < STENCIL_ORDER + 1; ++offset)
|
|
||||||
pencil_b[offset] = arr[IDX(vertexIdx.x + offset - STENCIL_ORDER / 2, vertexIdx.y,
|
|
||||||
vertexIdx.z + STENCIL_ORDER / 2 - offset)];
|
|
||||||
|
|
||||||
return cross_derivative(pencil_a, pencil_b, DCONST_REAL(AC_inv_dsx), DCONST_REAL(AC_inv_dsz));
|
|
||||||
}
|
|
||||||
|
|
||||||
static __device__ __forceinline__ AcReal
|
|
||||||
dery(const int3 vertexIdx, const AcReal* __restrict__ arr)
|
|
||||||
{
|
|
||||||
AcReal pencil[STENCIL_ORDER + 1];
|
|
||||||
#pragma unroll
|
|
||||||
for (int offset = 0; offset < STENCIL_ORDER + 1; ++offset)
|
|
||||||
pencil[offset] = arr[IDX(vertexIdx.x, vertexIdx.y + offset - STENCIL_ORDER / 2,
|
|
||||||
vertexIdx.z)];
|
|
||||||
|
|
||||||
return first_derivative(pencil, DCONST_REAL(AC_inv_dsy));
|
|
||||||
}
|
|
||||||
|
|
||||||
static __device__ __forceinline__ AcReal
|
|
||||||
deryy(const int3 vertexIdx, const AcReal* __restrict__ arr)
|
|
||||||
{
|
|
||||||
AcReal pencil[STENCIL_ORDER + 1];
|
|
||||||
#pragma unroll
|
|
||||||
for (int offset = 0; offset < STENCIL_ORDER + 1; ++offset)
|
|
||||||
pencil[offset] = arr[IDX(vertexIdx.x, vertexIdx.y + offset - STENCIL_ORDER / 2,
|
|
||||||
vertexIdx.z)];
|
|
||||||
|
|
||||||
return second_derivative(pencil, DCONST_REAL(AC_inv_dsy));
|
|
||||||
}
|
|
||||||
|
|
||||||
static __device__ __forceinline__ AcReal
|
|
||||||
deryz(const int3 vertexIdx, const AcReal* __restrict__ arr)
|
|
||||||
{
|
|
||||||
AcReal pencil_a[STENCIL_ORDER + 1];
|
|
||||||
#pragma unroll
|
|
||||||
for (int offset = 0; offset < STENCIL_ORDER + 1; ++offset)
|
|
||||||
pencil_a[offset] = arr[IDX(vertexIdx.x, vertexIdx.y + offset - STENCIL_ORDER / 2,
|
|
||||||
vertexIdx.z + offset - STENCIL_ORDER / 2)];
|
|
||||||
|
|
||||||
AcReal pencil_b[STENCIL_ORDER + 1];
|
|
||||||
#pragma unroll
|
|
||||||
for (int offset = 0; offset < STENCIL_ORDER + 1; ++offset)
|
|
||||||
pencil_b[offset] = arr[IDX(vertexIdx.x, vertexIdx.y + offset - STENCIL_ORDER / 2,
|
|
||||||
vertexIdx.z + STENCIL_ORDER / 2 - offset)];
|
|
||||||
|
|
||||||
return cross_derivative(pencil_a, pencil_b, DCONST_REAL(AC_inv_dsy), DCONST_REAL(AC_inv_dsz));
|
|
||||||
}
|
|
||||||
|
|
||||||
static __device__ __forceinline__ AcReal
|
|
||||||
derz(const int3 vertexIdx, const AcReal* __restrict__ arr)
|
|
||||||
{
|
|
||||||
AcReal pencil[STENCIL_ORDER + 1];
|
|
||||||
#pragma unroll
|
|
||||||
for (int offset = 0; offset < STENCIL_ORDER + 1; ++offset)
|
|
||||||
pencil[offset] = arr[IDX(vertexIdx.x, vertexIdx.y,
|
|
||||||
vertexIdx.z + offset - STENCIL_ORDER / 2)];
|
|
||||||
|
|
||||||
return first_derivative(pencil, DCONST_REAL(AC_inv_dsz));
|
|
||||||
}
|
|
||||||
|
|
||||||
static __device__ __forceinline__ AcReal
|
|
||||||
derzz(const int3 vertexIdx, const AcReal* __restrict__ arr)
|
|
||||||
{
|
|
||||||
AcReal pencil[STENCIL_ORDER + 1];
|
|
||||||
#pragma unroll
|
|
||||||
for (int offset = 0; offset < STENCIL_ORDER + 1; ++offset)
|
|
||||||
pencil[offset] = arr[IDX(vertexIdx.x, vertexIdx.y,
|
|
||||||
vertexIdx.z + offset - STENCIL_ORDER / 2)];
|
|
||||||
|
|
||||||
return second_derivative(pencil, DCONST_REAL(AC_inv_dsz));
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* =============================================================================
|
|
||||||
* Level 0.2 (Caching functions)
|
|
||||||
* =============================================================================
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "stencil_assembly.cuh"
|
|
||||||
|
|
||||||
/*
|
|
||||||
typedef struct {
|
|
||||||
AcRealData x;
|
|
||||||
AcRealData y;
|
|
||||||
AcRealData z;
|
|
||||||
} AcReal3Data;
|
|
||||||
|
|
||||||
static __device__ __forceinline__ AcReal3Data
|
|
||||||
read_data(const int i, const int j, const int k,
|
|
||||||
AcReal* __restrict__ buf[], const int3& handle)
|
|
||||||
{
|
|
||||||
AcReal3Data data;
|
|
||||||
|
|
||||||
data.x = read_data(i, j, k, buf, handle.x);
|
|
||||||
data.y = read_data(i, j, k, buf, handle.y);
|
|
||||||
data.z = read_data(i, j, k, buf, handle.z);
|
|
||||||
|
|
||||||
return data;
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* =============================================================================
|
|
||||||
* Level 0.3 (Built-in functions available during the Stencil Processing Stage)
|
|
||||||
* =============================================================================
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* =============================================================================
|
|
||||||
* Level 1 (Stencil Processing Stage)
|
|
||||||
* =============================================================================
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* =============================================================================
|
|
||||||
* Level 1.1 (Terms)
|
|
||||||
* =============================================================================
|
|
||||||
*/
|
|
||||||
static __device__ __forceinline__ AcReal
|
|
||||||
laplace(const AcRealData& data)
|
|
||||||
{
|
|
||||||
return hessian(data).row[0].x + hessian(data).row[1].y + hessian(data).row[2].z;
|
|
||||||
}
|
|
||||||
|
|
||||||
static __device__ __forceinline__ AcReal
|
|
||||||
divergence(const AcReal3Data& vec)
|
|
||||||
{
|
|
||||||
return gradient(vec.x).x + gradient(vec.y).y + gradient(vec.z).z;
|
|
||||||
}
|
|
||||||
|
|
||||||
static __device__ __forceinline__ AcReal3
|
|
||||||
laplace_vec(const AcReal3Data& vec)
|
|
||||||
{
|
|
||||||
return (AcReal3){laplace(vec.x), laplace(vec.y), laplace(vec.z)};
|
|
||||||
}
|
|
||||||
|
|
||||||
static __device__ __forceinline__ AcReal3
|
|
||||||
curl(const AcReal3Data& vec)
|
|
||||||
{
|
|
||||||
return (AcReal3){gradient(vec.z).y - gradient(vec.y).z, gradient(vec.x).z - gradient(vec.z).x,
|
|
||||||
gradient(vec.y).x - gradient(vec.x).y};
|
|
||||||
}
|
|
||||||
|
|
||||||
static __device__ __forceinline__ AcReal3
|
|
||||||
gradient_of_divergence(const AcReal3Data& vec)
|
|
||||||
{
|
|
||||||
return (AcReal3){hessian(vec.x).row[0].x + hessian(vec.y).row[0].y + hessian(vec.z).row[0].z,
|
|
||||||
hessian(vec.x).row[1].x + hessian(vec.y).row[1].y + hessian(vec.z).row[1].z,
|
|
||||||
hessian(vec.x).row[2].x + hessian(vec.y).row[2].y + hessian(vec.z).row[2].z};
|
|
||||||
}
|
|
||||||
|
|
||||||
// Takes uu gradients and returns S
|
|
||||||
static __device__ __forceinline__ AcMatrix
|
|
||||||
stress_tensor(const AcReal3Data& vec)
|
|
||||||
{
|
|
||||||
AcMatrix S;
|
|
||||||
|
|
||||||
S.row[0].x = AcReal(2. / 3.) * gradient(vec.x).x -
|
|
||||||
AcReal(1. / 3.) * (gradient(vec.y).y + gradient(vec.z).z);
|
|
||||||
S.row[0].y = AcReal(1. / 2.) * (gradient(vec.x).y + gradient(vec.y).x);
|
|
||||||
S.row[0].z = AcReal(1. / 2.) * (gradient(vec.x).z + gradient(vec.z).x);
|
|
||||||
|
|
||||||
S.row[1].y = AcReal(2. / 3.) * gradient(vec.y).y -
|
|
||||||
AcReal(1. / 3.) * (gradient(vec.x).x + gradient(vec.z).z);
|
|
||||||
|
|
||||||
S.row[1].z = AcReal(1. / 2.) * (gradient(vec.y).z + gradient(vec.z).y);
|
|
||||||
|
|
||||||
S.row[2].z = AcReal(2. / 3.) * gradient(vec.z).z -
|
|
||||||
AcReal(1. / 3.) * (gradient(vec.x).x + gradient(vec.y).y);
|
|
||||||
|
|
||||||
S.row[1].x = S.row[0].y;
|
|
||||||
S.row[2].x = S.row[0].z;
|
|
||||||
S.row[2].y = S.row[1].z;
|
|
||||||
|
|
||||||
return S;
|
|
||||||
}
|
|
||||||
|
|
||||||
static __device__ __forceinline__ AcReal
|
|
||||||
contract(const AcMatrix& mat)
|
|
||||||
{
|
|
||||||
AcReal res = 0;
|
|
||||||
|
|
||||||
#pragma unroll
|
|
||||||
for (int i = 0; i < 3; ++i)
|
|
||||||
res += dot(mat.row[i], mat.row[i]);
|
|
||||||
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* =============================================================================
|
|
||||||
* Level 1.2 (Equations)
|
|
||||||
* =============================================================================
|
|
||||||
*/
|
|
||||||
static __device__ __forceinline__ AcReal
|
|
||||||
length(const AcReal3& vec)
|
|
||||||
{
|
|
||||||
return sqrt(vec.x * vec.x + vec.y * vec.y + vec.z * vec.z);
|
|
||||||
}
|
|
||||||
|
|
||||||
static __device__ __forceinline__ AcReal
|
|
||||||
reciprocal_len(const AcReal3& vec)
|
|
||||||
{
|
|
||||||
return rsqrt(vec.x * vec.x + vec.y * vec.y + vec.z * vec.z);
|
|
||||||
}
|
|
||||||
|
|
||||||
static __device__ __forceinline__ AcReal3
|
|
||||||
normalized(const AcReal3& vec)
|
|
||||||
{
|
|
||||||
const AcReal inv_len = reciprocal_len(vec);
|
|
||||||
return inv_len * vec;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define H_CONST (AcReal(0.0))
|
|
||||||
#define C_CONST (AcReal(0.0))
|
|
||||||
|
|
||||||
template <int step_number>
|
template <int step_number>
|
||||||
static __device__ __forceinline__ AcReal
|
static __device__ __forceinline__ AcReal
|
||||||
rk3_integrate(const AcReal state_previous, const AcReal state_current, const AcReal rate_of_change,
|
rk3_integrate(const AcReal state_previous, const AcReal state_current, const AcReal rate_of_change,
|
||||||
@@ -456,33 +74,6 @@ rk3_integrate(const AcReal state_previous, const AcReal state_current, const AcR
|
|||||||
return NAN;
|
return NAN;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/*
|
|
||||||
template <int step_number>
|
|
||||||
static __device__ __forceinline__ AcReal
|
|
||||||
rk3_integrate_scal(const AcReal state_previous, const AcReal state_current,
|
|
||||||
const AcReal rate_of_change, const AcReal dt)
|
|
||||||
{
|
|
||||||
// Williamson (1980)
|
|
||||||
const AcReal alpha[] = {AcReal(.0), AcReal(-5. / 9.), AcReal(-153. / 128.)};
|
|
||||||
const AcReal beta[] = {AcReal(1. / 3.), AcReal(15. / 16.),
|
|
||||||
AcReal(8. / 15.)};
|
|
||||||
|
|
||||||
|
|
||||||
switch (step_number) {
|
|
||||||
case 0:
|
|
||||||
return state_current + beta[step_number] * rate_of_change * dt;
|
|
||||||
case 1: // Fallthrough
|
|
||||||
case 2:
|
|
||||||
return state_current +
|
|
||||||
beta[step_number] *
|
|
||||||
(alpha[step_number] * (AcReal(1.) / beta[step_number - 1]) *
|
|
||||||
(state_current - state_previous) +
|
|
||||||
rate_of_change * dt);
|
|
||||||
default:
|
|
||||||
return NAN;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|
||||||
template <int step_number>
|
template <int step_number>
|
||||||
static __device__ __forceinline__ AcReal3
|
static __device__ __forceinline__ AcReal3
|
||||||
@@ -498,37 +89,6 @@ rk3_integrate(const AcReal3 state_previous, const AcReal3 state_current,
|
|||||||
#define rk3(state_previous, state_current, rate_of_change, dt) \
|
#define rk3(state_previous, state_current, rate_of_change, dt) \
|
||||||
rk3_integrate<step_number>(state_previous, value(state_current), rate_of_change, dt)
|
rk3_integrate<step_number>(state_previous, value(state_current), rate_of_change, dt)
|
||||||
|
|
||||||
/*
|
|
||||||
template <int step_number>
|
|
||||||
static __device__ __forceinline__ AcReal
|
|
||||||
rk3_integrate(const int idx, const AcReal out, const int handle,
|
|
||||||
const AcRealData& in_cached, const AcReal rate_of_change, const AcReal dt)
|
|
||||||
{
|
|
||||||
return rk3_integrate_scal<step_number>(out, value(in_cached), rate_of_change, dt);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <int step_number>
|
|
||||||
static __device__ __forceinline__ AcReal3
|
|
||||||
rk3_integrate(const int idx, const AcReal3 out, const int3& handle,
|
|
||||||
const AcReal3Data& in_cached, const AcReal3& rate_of_change, const AcReal dt)
|
|
||||||
{
|
|
||||||
return (AcReal3) {
|
|
||||||
rk3_integrate<step_number>(idx, out, handle.x, in_cached.x, rate_of_change.x, dt),
|
|
||||||
rk3_integrate<step_number>(idx, out, handle.y, in_cached.y, rate_of_change.y, dt),
|
|
||||||
rk3_integrate<step_number>(idx, out, handle.z, in_cached.z, rate_of_change.z, dt)
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
#define RK3(handle, in_cached, rate_of_change, dt) \
|
|
||||||
rk3_integrate<step_number>(idx, buffer.out, handle, in_cached, rate_of_change, dt)
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* =============================================================================
|
|
||||||
* Level 1.3 (Kernels)
|
|
||||||
* =============================================================================
|
|
||||||
*/
|
|
||||||
|
|
||||||
static __device__ void
|
static __device__ void
|
||||||
write(AcReal* __restrict__ out[], const int handle, const int idx, const AcReal value)
|
write(AcReal* __restrict__ out[], const int handle, const int idx, const AcReal value)
|
||||||
{
|
{
|
||||||
@@ -560,10 +120,7 @@ read_out(const int idx, AcReal* __restrict__ field[], const int3 handle)
|
|||||||
#define READ(handle) (read_data(vertexIdx, globalVertexIdx, buffer.in, handle))
|
#define READ(handle) (read_data(vertexIdx, globalVertexIdx, buffer.in, handle))
|
||||||
#define READ_OUT(handle) (read_out(idx, buffer.out, handle))
|
#define READ_OUT(handle) (read_out(idx, buffer.out, handle))
|
||||||
|
|
||||||
// also write for clarity here also, not for the DSL
|
#define GEN_PREPROCESSED_PARAM_BOILERPLATE const int3 &vertexIdx, const int3 &globalVertexIdx
|
||||||
//#define WRITE(HANDLE) (write(idx, ...)) s.t. we don't have to insert boilerplat in the mid of the
|
|
||||||
// function
|
|
||||||
|
|
||||||
#define GEN_KERNEL_PARAM_BOILERPLATE const int3 start, const int3 end, VertexBufferArray buffer
|
#define GEN_KERNEL_PARAM_BOILERPLATE const int3 start, const int3 end, VertexBufferArray buffer
|
||||||
|
|
||||||
#define GEN_KERNEL_BUILTIN_VARIABLES_BOILERPLATE() \
|
#define GEN_KERNEL_BUILTIN_VARIABLES_BOILERPLATE() \
|
||||||
@@ -607,32 +164,5 @@ read_out(const int idx, AcReal* __restrict__ field[], const int3 handle)
|
|||||||
return AC_SUCCESS; \
|
return AC_SUCCESS; \
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
#define GEN_NODE_FUNC_HOOK(identifier) \
|
|
||||||
template <int step_number> \
|
|
||||||
AcResult acNodeKernel_##identifier(const Node node, const Stream stream, const int3 start, \
|
|
||||||
const int3 end) \
|
|
||||||
{ \
|
|
||||||
acNodeSynchronizeStream(node, stream); \
|
|
||||||
\
|
|
||||||
for (int i = 0; i < node->num_devices; ++i) { \
|
|
||||||
\
|
|
||||||
const int3 d0 = (int3){NGHOST, NGHOST, NGHOST + i * node->subgrid.n.z}; \
|
|
||||||
const int3 d1 = d0 + (int3){node->subgrid.n.x, node->subgrid.n.y, node->subgrid.n.z}; \
|
|
||||||
\
|
|
||||||
const int3 da = max(start, d0); \
|
|
||||||
const int3 db = min(end, d1); \
|
|
||||||
\
|
|
||||||
if (db.z >= da.z) { \
|
|
||||||
const int3 da_local = da - (int3){0, 0, i * node->subgrid.n.z}; \
|
|
||||||
const int3 db_local = db - (int3){0, 0, i * node->subgrid.n.z}; \
|
|
||||||
acDeviceKernel_ #identifier(node->devices[i], stream, isubstep, da_local, \
|
|
||||||
db_local, dt); \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
return AC_SUCCESS; \
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
// clang-format on
|
|
||||||
|
|
||||||
#include "stencil_process.cuh"
|
#include "user_kernels.h"
|
||||||
|
Reference in New Issue
Block a user