New feature: ScalarArray. ScalarArrays are read-only 1D arrays containing max(mx, max(my, mz)) elements. ScalarArray is a new type of uniform and can be used for storing f.ex. forcing profiles. The DSL now also supports complex numbers and some basic arithmetic (exp, multiplication)

This commit is contained in:
jpekkila
2019-09-02 21:26:57 +03:00
parent 18df9e5579
commit 9e57aba9b7
8 changed files with 87 additions and 25 deletions

View File

@@ -22,15 +22,16 @@
#include "math_utils.h" // int3 + int3
#define AC_GEN_STR(X) #X
const char* intparam_names[] = {AC_FOR_BUILTIN_INT_PARAM_TYPES(AC_GEN_STR) //
const char* intparam_names[] = {AC_FOR_BUILTIN_INT_PARAM_TYPES(AC_GEN_STR) //
AC_FOR_USER_INT_PARAM_TYPES(AC_GEN_STR)};
const char* int3param_names[] = {AC_FOR_BUILTIN_INT3_PARAM_TYPES(AC_GEN_STR) //
const char* int3param_names[] = {AC_FOR_BUILTIN_INT3_PARAM_TYPES(AC_GEN_STR) //
AC_FOR_USER_INT3_PARAM_TYPES(AC_GEN_STR)};
const char* realparam_names[] = {AC_FOR_BUILTIN_REAL_PARAM_TYPES(AC_GEN_STR) //
const char* realparam_names[] = {AC_FOR_BUILTIN_REAL_PARAM_TYPES(AC_GEN_STR) //
AC_FOR_USER_REAL_PARAM_TYPES(AC_GEN_STR)};
const char* real3param_names[] = {AC_FOR_BUILTIN_REAL3_PARAM_TYPES(AC_GEN_STR) //
const char* real3param_names[] = {AC_FOR_BUILTIN_REAL3_PARAM_TYPES(AC_GEN_STR) //
AC_FOR_USER_REAL3_PARAM_TYPES(AC_GEN_STR)};
const char* vtxbuf_names[] = {AC_FOR_VTXBUF_HANDLES(AC_GEN_STR)};
const char* scalararray_names[] = {AC_FOR_SCALARARRAY_HANDLES(AC_GEN_STR)};
const char* vtxbuf_names[] = {AC_FOR_VTXBUF_HANDLES(AC_GEN_STR)};
#undef AC_GEN_STR
static const int num_nodes = 1;

View File

@@ -37,6 +37,8 @@
typedef struct {
AcReal* in[NUM_VTXBUF_HANDLES];
AcReal* out[NUM_VTXBUF_HANDLES];
AcReal* profiles[NUM_SCALARARRAY_HANDLES];
} VertexBufferArray;
struct device_s {
@@ -97,6 +99,32 @@ DCONST(const VertexBufferHandle handle)
//#define globalMeshN_min // Placeholder
#define d_multigpu_offset (d_mesh_info.int3_params[AC_multigpu_offset])
//#define d_multinode_offset (d_mesh_info.int3_params[AC_multinode_offset]) // Placeholder
//#include <thrust/complex.h>
// using namespace thrust;
#include <cuComplex.h>
#if AC_DOUBLE_PRECISION == 1
typedef cuDoubleComplex acComplex;
#define acComplex(x, y) make_cuDoubleComplex(x, y)
#else
typedef cuFloatComplex acComplex;
#define acComplex(x, y) make_cuFloatComplex(x, y)
#endif
static __device__ inline acComplex
exp(const acComplex& val)
{
return acComplex(exp(val.x) * cos(val.y), exp(val.x) * sin(val.y));
}
static __device__ inline acComplex operator*(const AcReal& a, const acComplex& b)
{
return (acComplex){a * b.x, a * b.y};
}
static __device__ inline acComplex operator*(const acComplex& a, const acComplex& b)
{
return (acComplex){a.x * b.x - a.y * b.y, a.x * b.y + a.y * b.x};
}
//#include <complex>
#include "kernels/boundconds.cuh"
#include "kernels/integration.cuh"
#include "kernels/reductions.cuh"
@@ -140,11 +168,21 @@ acDeviceCreate(const int id, const AcMeshInfo device_config, Device* device_hand
}
// Memory
// VBA in/out
const size_t vba_size_bytes = acVertexBufferSizeBytes(device_config);
for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) {
ERRCHK_CUDA_ALWAYS(cudaMalloc(&device->vba.in[i], vba_size_bytes));
ERRCHK_CUDA_ALWAYS(cudaMalloc(&device->vba.out[i], vba_size_bytes));
}
// VBA Profiles
const size_t profile_size_bytes = sizeof(AcReal) * max(device_config.int_params[AC_mx],
max(device_config.int_params[AC_my],
device_config.int_params[AC_mz]));
for (int i = 0; i < NUM_SCALARARRAY_HANDLES; ++i) {
ERRCHK_CUDA_ALWAYS(cudaMalloc(&device->vba.profiles[i], profile_size_bytes));
}
// Reductions
ERRCHK_CUDA_ALWAYS(
cudaMalloc(&device->reduce_scratchpad, acVertexBufferCompdomainSizeBytes(device_config)));
ERRCHK_CUDA_ALWAYS(cudaMalloc(&device->reduce_result, sizeof(AcReal)));
@@ -178,6 +216,10 @@ acDeviceDestroy(Device device)
cudaFree(device->vba.in[i]);
cudaFree(device->vba.out[i]);
}
for (int i = 0; i < NUM_SCALARARRAY_HANDLES; ++i) {
cudaFree(device->vba.profiles[i]);
}
cudaFree(device->reduce_scratchpad);
cudaFree(device->reduce_result);

View File

@@ -70,11 +70,11 @@ create_rotz(const AcReal radians)
#define cos __cosf
#define exp __expf
*/
#define sin sinf
#define cos cosf
#define exp expf
#define rsqrt rsqrtf // hardware reciprocal sqrt
#endif // AC_DOUBLE_PRECISION == 0
//#define sin sinf
//#define cos cosf
//#define exp expf
//#define rsqrt rsqrtf // hardware reciprocal sqrt
#endif // AC_DOUBLE_PRECISION == 0
/*
* =============================================================================

View File

@@ -124,6 +124,11 @@ static HOST_DEVICE_INLINE AcReal3 operator*(const AcReal& a, const AcReal3& b)
return (AcReal3){a * b.x, a * b.y, a * b.z};
}
static HOST_DEVICE_INLINE AcReal3 operator*(const AcReal3& b, const AcReal& a)
{
return (AcReal3){a * b.x, a * b.y, a * b.z};
}
static HOST_DEVICE_INLINE AcReal
dot(const AcReal3& a, const AcReal3& b)
{