Merge branch 'master' into sink_20190723
This commit is contained in:
@@ -22,15 +22,16 @@
|
||||
#include "math_utils.h" // int3 + int3
|
||||
|
||||
#define AC_GEN_STR(X) #X
|
||||
const char* intparam_names[] = {AC_FOR_BUILTIN_INT_PARAM_TYPES(AC_GEN_STR) //
|
||||
const char* intparam_names[] = {AC_FOR_BUILTIN_INT_PARAM_TYPES(AC_GEN_STR) //
|
||||
AC_FOR_USER_INT_PARAM_TYPES(AC_GEN_STR)};
|
||||
const char* int3param_names[] = {AC_FOR_BUILTIN_INT3_PARAM_TYPES(AC_GEN_STR) //
|
||||
const char* int3param_names[] = {AC_FOR_BUILTIN_INT3_PARAM_TYPES(AC_GEN_STR) //
|
||||
AC_FOR_USER_INT3_PARAM_TYPES(AC_GEN_STR)};
|
||||
const char* realparam_names[] = {AC_FOR_BUILTIN_REAL_PARAM_TYPES(AC_GEN_STR) //
|
||||
const char* realparam_names[] = {AC_FOR_BUILTIN_REAL_PARAM_TYPES(AC_GEN_STR) //
|
||||
AC_FOR_USER_REAL_PARAM_TYPES(AC_GEN_STR)};
|
||||
const char* real3param_names[] = {AC_FOR_BUILTIN_REAL3_PARAM_TYPES(AC_GEN_STR) //
|
||||
const char* real3param_names[] = {AC_FOR_BUILTIN_REAL3_PARAM_TYPES(AC_GEN_STR) //
|
||||
AC_FOR_USER_REAL3_PARAM_TYPES(AC_GEN_STR)};
|
||||
const char* vtxbuf_names[] = {AC_FOR_VTXBUF_HANDLES(AC_GEN_STR)};
|
||||
const char* scalararray_names[] = {AC_FOR_SCALARARRAY_HANDLES(AC_GEN_STR)};
|
||||
const char* vtxbuf_names[] = {AC_FOR_VTXBUF_HANDLES(AC_GEN_STR)};
|
||||
#undef AC_GEN_STR
|
||||
|
||||
static const int num_nodes = 1;
|
||||
|
||||
@@ -37,6 +37,8 @@
|
||||
typedef struct {
|
||||
AcReal* in[NUM_VTXBUF_HANDLES];
|
||||
AcReal* out[NUM_VTXBUF_HANDLES];
|
||||
|
||||
AcReal* profiles[NUM_SCALARARRAY_HANDLES];
|
||||
} VertexBufferArray;
|
||||
|
||||
struct device_s {
|
||||
@@ -44,7 +46,7 @@ struct device_s {
|
||||
AcMeshInfo local_config;
|
||||
|
||||
// Concurrency
|
||||
cudaStream_t streams[NUM_STREAM_TYPES];
|
||||
cudaStream_t streams[NUM_STREAMS];
|
||||
|
||||
// Memory
|
||||
VertexBufferArray vba;
|
||||
@@ -97,6 +99,32 @@ DCONST(const VertexBufferHandle handle)
|
||||
//#define globalMeshN_min // Placeholder
|
||||
#define d_multigpu_offset (d_mesh_info.int3_params[AC_multigpu_offset])
|
||||
//#define d_multinode_offset (d_mesh_info.int3_params[AC_multinode_offset]) // Placeholder
|
||||
//#include <thrust/complex.h>
|
||||
// using namespace thrust;
|
||||
#include <cuComplex.h>
|
||||
#if AC_DOUBLE_PRECISION == 1
|
||||
typedef cuDoubleComplex acComplex;
|
||||
#define acComplex(x, y) make_cuDoubleComplex(x, y)
|
||||
#else
|
||||
typedef cuFloatComplex acComplex;
|
||||
#define acComplex(x, y) make_cuFloatComplex(x, y)
|
||||
#endif
|
||||
static __device__ inline acComplex
|
||||
exp(const acComplex& val)
|
||||
{
|
||||
return acComplex(exp(val.x) * cos(val.y), exp(val.x) * sin(val.y));
|
||||
}
|
||||
static __device__ inline acComplex operator*(const AcReal& a, const acComplex& b)
|
||||
{
|
||||
return (acComplex){a * b.x, a * b.y};
|
||||
}
|
||||
|
||||
static __device__ inline acComplex operator*(const acComplex& a, const acComplex& b)
|
||||
{
|
||||
return (acComplex){a.x * b.x - a.y * b.y, a.x * b.y + a.y * b.x};
|
||||
}
|
||||
//#include <complex>
|
||||
|
||||
#include "kernels/boundconds.cuh"
|
||||
#include "kernels/integration.cuh"
|
||||
#include "kernels/reductions.cuh"
|
||||
@@ -135,16 +163,26 @@ acDeviceCreate(const int id, const AcMeshInfo device_config, Device* device_hand
|
||||
printf("Success!\n");
|
||||
|
||||
// Concurrency
|
||||
for (int i = 0; i < NUM_STREAM_TYPES; ++i) {
|
||||
for (int i = 0; i < NUM_STREAMS; ++i) {
|
||||
cudaStreamCreateWithPriority(&device->streams[i], cudaStreamNonBlocking, 0);
|
||||
}
|
||||
|
||||
// Memory
|
||||
// VBA in/out
|
||||
const size_t vba_size_bytes = acVertexBufferSizeBytes(device_config);
|
||||
for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) {
|
||||
ERRCHK_CUDA_ALWAYS(cudaMalloc(&device->vba.in[i], vba_size_bytes));
|
||||
ERRCHK_CUDA_ALWAYS(cudaMalloc(&device->vba.out[i], vba_size_bytes));
|
||||
}
|
||||
// VBA Profiles
|
||||
const size_t profile_size_bytes = sizeof(AcReal) * max(device_config.int_params[AC_mx],
|
||||
max(device_config.int_params[AC_my],
|
||||
device_config.int_params[AC_mz]));
|
||||
for (int i = 0; i < NUM_SCALARARRAY_HANDLES; ++i) {
|
||||
ERRCHK_CUDA_ALWAYS(cudaMalloc(&device->vba.profiles[i], profile_size_bytes));
|
||||
}
|
||||
|
||||
// Reductions
|
||||
ERRCHK_CUDA_ALWAYS(
|
||||
cudaMalloc(&device->reduce_scratchpad, acVertexBufferCompdomainSizeBytes(device_config)));
|
||||
ERRCHK_CUDA_ALWAYS(cudaMalloc(&device->reduce_result, sizeof(AcReal)));
|
||||
@@ -178,6 +216,10 @@ acDeviceDestroy(Device device)
|
||||
cudaFree(device->vba.in[i]);
|
||||
cudaFree(device->vba.out[i]);
|
||||
}
|
||||
for (int i = 0; i < NUM_SCALARARRAY_HANDLES; ++i) {
|
||||
cudaFree(device->vba.profiles[i]);
|
||||
}
|
||||
|
||||
cudaFree(device->reduce_scratchpad);
|
||||
cudaFree(device->reduce_result);
|
||||
|
||||
@@ -186,7 +228,7 @@ acDeviceDestroy(Device device)
|
||||
#endif
|
||||
|
||||
// Concurrency
|
||||
for (int i = 0; i < NUM_STREAM_TYPES; ++i) {
|
||||
for (int i = 0; i < NUM_STREAMS; ++i) {
|
||||
cudaStreamDestroy(device->streams[i]);
|
||||
}
|
||||
|
||||
@@ -405,6 +447,21 @@ acDeviceLoadInt3Constant(const Device device, const Stream stream, const AcInt3P
|
||||
return AC_SUCCESS;
|
||||
}
|
||||
|
||||
AcResult
|
||||
acDeviceLoadScalarArray(const Device device, const Stream stream, const ScalarArrayHandle handle,
|
||||
const size_t start, const AcReal* data, const size_t num)
|
||||
{
|
||||
cudaSetDevice(device->id);
|
||||
|
||||
ERRCHK(start + num <= max(device->local_config.int_params[AC_mx],
|
||||
max(device->local_config.int_params[AC_my],
|
||||
device->local_config.int_params[AC_mz])));
|
||||
|
||||
ERRCHK_CUDA(cudaMemcpyAsync(&device->vba.profiles[handle][start], data, sizeof(data[0]) * num,
|
||||
cudaMemcpyHostToDevice, device->streams[stream]));
|
||||
return AC_SUCCESS;
|
||||
}
|
||||
|
||||
AcResult
|
||||
acDeviceLoadMeshInfo(const Device device, const Stream stream, const AcMeshInfo device_config)
|
||||
{
|
||||
|
||||
@@ -70,11 +70,11 @@ create_rotz(const AcReal radians)
|
||||
#define cos __cosf
|
||||
#define exp __expf
|
||||
*/
|
||||
#define sin sinf
|
||||
#define cos cosf
|
||||
#define exp expf
|
||||
#define rsqrt rsqrtf // hardware reciprocal sqrt
|
||||
#endif // AC_DOUBLE_PRECISION == 0
|
||||
//#define sin sinf
|
||||
//#define cos cosf
|
||||
//#define exp expf
|
||||
//#define rsqrt rsqrtf // hardware reciprocal sqrt
|
||||
#endif // AC_DOUBLE_PRECISION == 0
|
||||
|
||||
/*
|
||||
* =============================================================================
|
||||
|
||||
@@ -124,6 +124,11 @@ static HOST_DEVICE_INLINE AcReal3 operator*(const AcReal& a, const AcReal3& b)
|
||||
return (AcReal3){a * b.x, a * b.y, a * b.z};
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE AcReal3 operator*(const AcReal3& b, const AcReal& a)
|
||||
{
|
||||
return (AcReal3){a * b.x, a * b.y, a * b.z};
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE AcReal
|
||||
dot(const AcReal3& a, const AcReal3& b)
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user