New feature: ScalarArray. ScalarArrays are read-only 1D arrays containing max(mx, max(my, mz)) elements. ScalarArray is a new type of uniform and can be used for storing f.ex. forcing profiles. The DSL now also supports complex numbers and some basic arithmetic (exp, multiplication)
This commit is contained in:
@@ -22,15 +22,16 @@
|
||||
#include "math_utils.h" // int3 + int3
|
||||
|
||||
#define AC_GEN_STR(X) #X
|
||||
const char* intparam_names[] = {AC_FOR_BUILTIN_INT_PARAM_TYPES(AC_GEN_STR) //
|
||||
const char* intparam_names[] = {AC_FOR_BUILTIN_INT_PARAM_TYPES(AC_GEN_STR) //
|
||||
AC_FOR_USER_INT_PARAM_TYPES(AC_GEN_STR)};
|
||||
const char* int3param_names[] = {AC_FOR_BUILTIN_INT3_PARAM_TYPES(AC_GEN_STR) //
|
||||
const char* int3param_names[] = {AC_FOR_BUILTIN_INT3_PARAM_TYPES(AC_GEN_STR) //
|
||||
AC_FOR_USER_INT3_PARAM_TYPES(AC_GEN_STR)};
|
||||
const char* realparam_names[] = {AC_FOR_BUILTIN_REAL_PARAM_TYPES(AC_GEN_STR) //
|
||||
const char* realparam_names[] = {AC_FOR_BUILTIN_REAL_PARAM_TYPES(AC_GEN_STR) //
|
||||
AC_FOR_USER_REAL_PARAM_TYPES(AC_GEN_STR)};
|
||||
const char* real3param_names[] = {AC_FOR_BUILTIN_REAL3_PARAM_TYPES(AC_GEN_STR) //
|
||||
const char* real3param_names[] = {AC_FOR_BUILTIN_REAL3_PARAM_TYPES(AC_GEN_STR) //
|
||||
AC_FOR_USER_REAL3_PARAM_TYPES(AC_GEN_STR)};
|
||||
const char* vtxbuf_names[] = {AC_FOR_VTXBUF_HANDLES(AC_GEN_STR)};
|
||||
const char* scalararray_names[] = {AC_FOR_SCALARARRAY_HANDLES(AC_GEN_STR)};
|
||||
const char* vtxbuf_names[] = {AC_FOR_VTXBUF_HANDLES(AC_GEN_STR)};
|
||||
#undef AC_GEN_STR
|
||||
|
||||
static const int num_nodes = 1;
|
||||
|
||||
@@ -37,6 +37,8 @@
|
||||
typedef struct {
|
||||
AcReal* in[NUM_VTXBUF_HANDLES];
|
||||
AcReal* out[NUM_VTXBUF_HANDLES];
|
||||
|
||||
AcReal* profiles[NUM_SCALARARRAY_HANDLES];
|
||||
} VertexBufferArray;
|
||||
|
||||
struct device_s {
|
||||
@@ -97,6 +99,32 @@ DCONST(const VertexBufferHandle handle)
|
||||
//#define globalMeshN_min // Placeholder
|
||||
#define d_multigpu_offset (d_mesh_info.int3_params[AC_multigpu_offset])
|
||||
//#define d_multinode_offset (d_mesh_info.int3_params[AC_multinode_offset]) // Placeholder
|
||||
//#include <thrust/complex.h>
|
||||
// using namespace thrust;
|
||||
#include <cuComplex.h>
|
||||
#if AC_DOUBLE_PRECISION == 1
|
||||
typedef cuDoubleComplex acComplex;
|
||||
#define acComplex(x, y) make_cuDoubleComplex(x, y)
|
||||
#else
|
||||
typedef cuFloatComplex acComplex;
|
||||
#define acComplex(x, y) make_cuFloatComplex(x, y)
|
||||
#endif
|
||||
static __device__ inline acComplex
|
||||
exp(const acComplex& val)
|
||||
{
|
||||
return acComplex(exp(val.x) * cos(val.y), exp(val.x) * sin(val.y));
|
||||
}
|
||||
static __device__ inline acComplex operator*(const AcReal& a, const acComplex& b)
|
||||
{
|
||||
return (acComplex){a * b.x, a * b.y};
|
||||
}
|
||||
|
||||
static __device__ inline acComplex operator*(const acComplex& a, const acComplex& b)
|
||||
{
|
||||
return (acComplex){a.x * b.x - a.y * b.y, a.x * b.y + a.y * b.x};
|
||||
}
|
||||
//#include <complex>
|
||||
|
||||
#include "kernels/boundconds.cuh"
|
||||
#include "kernels/integration.cuh"
|
||||
#include "kernels/reductions.cuh"
|
||||
@@ -140,11 +168,21 @@ acDeviceCreate(const int id, const AcMeshInfo device_config, Device* device_hand
|
||||
}
|
||||
|
||||
// Memory
|
||||
// VBA in/out
|
||||
const size_t vba_size_bytes = acVertexBufferSizeBytes(device_config);
|
||||
for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) {
|
||||
ERRCHK_CUDA_ALWAYS(cudaMalloc(&device->vba.in[i], vba_size_bytes));
|
||||
ERRCHK_CUDA_ALWAYS(cudaMalloc(&device->vba.out[i], vba_size_bytes));
|
||||
}
|
||||
// VBA Profiles
|
||||
const size_t profile_size_bytes = sizeof(AcReal) * max(device_config.int_params[AC_mx],
|
||||
max(device_config.int_params[AC_my],
|
||||
device_config.int_params[AC_mz]));
|
||||
for (int i = 0; i < NUM_SCALARARRAY_HANDLES; ++i) {
|
||||
ERRCHK_CUDA_ALWAYS(cudaMalloc(&device->vba.profiles[i], profile_size_bytes));
|
||||
}
|
||||
|
||||
// Reductions
|
||||
ERRCHK_CUDA_ALWAYS(
|
||||
cudaMalloc(&device->reduce_scratchpad, acVertexBufferCompdomainSizeBytes(device_config)));
|
||||
ERRCHK_CUDA_ALWAYS(cudaMalloc(&device->reduce_result, sizeof(AcReal)));
|
||||
@@ -178,6 +216,10 @@ acDeviceDestroy(Device device)
|
||||
cudaFree(device->vba.in[i]);
|
||||
cudaFree(device->vba.out[i]);
|
||||
}
|
||||
for (int i = 0; i < NUM_SCALARARRAY_HANDLES; ++i) {
|
||||
cudaFree(device->vba.profiles[i]);
|
||||
}
|
||||
|
||||
cudaFree(device->reduce_scratchpad);
|
||||
cudaFree(device->reduce_result);
|
||||
|
||||
|
||||
@@ -70,11 +70,11 @@ create_rotz(const AcReal radians)
|
||||
#define cos __cosf
|
||||
#define exp __expf
|
||||
*/
|
||||
#define sin sinf
|
||||
#define cos cosf
|
||||
#define exp expf
|
||||
#define rsqrt rsqrtf // hardware reciprocal sqrt
|
||||
#endif // AC_DOUBLE_PRECISION == 0
|
||||
//#define sin sinf
|
||||
//#define cos cosf
|
||||
//#define exp expf
|
||||
//#define rsqrt rsqrtf // hardware reciprocal sqrt
|
||||
#endif // AC_DOUBLE_PRECISION == 0
|
||||
|
||||
/*
|
||||
* =============================================================================
|
||||
|
||||
@@ -124,6 +124,11 @@ static HOST_DEVICE_INLINE AcReal3 operator*(const AcReal& a, const AcReal3& b)
|
||||
return (AcReal3){a * b.x, a * b.y, a * b.z};
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE AcReal3 operator*(const AcReal3& b, const AcReal& a)
|
||||
{
|
||||
return (AcReal3){a * b.x, a * b.y, a * b.z};
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE AcReal
|
||||
dot(const AcReal3& a, const AcReal3& b)
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user