New feature: ScalarArray. ScalarArrays are read-only 1D arrays containing max(mx, max(my, mz)) elements. ScalarArray is a new type of uniform and can be used for storing f.ex. forcing profiles. The DSL now also supports complex numbers and some basic arithmetic (exp, multiplication)

This commit is contained in:
jpekkila
2019-09-02 21:26:57 +03:00
parent 18df9e5579
commit 9e57aba9b7
8 changed files with 87 additions and 25 deletions

View File

@@ -15,8 +15,10 @@ L [a-zA-Z_]
"void" { return VOID; } /* Rest of the types inherited from C */
"int" { return INT; }
"int3" { return INT3; }
"Complex" { return COMPLEX; }
"ScalarField" { return SCALARFIELD; }
"VectorField" { return VECTOR; }
"ScalarArray" { return SCALARARRAY; }
"Kernel" { return KERNEL; } /* Function specifiers */
"Preprocessed" { return PREPROCESSED; }

View File

@@ -16,8 +16,8 @@ int yyget_lineno();
%token CONSTANT IN OUT UNIFORM
%token IDENTIFIER NUMBER
%token RETURN
%token SCALAR VECTOR MATRIX SCALARFIELD
%token VOID INT INT3
%token SCALAR VECTOR MATRIX SCALARFIELD SCALARARRAY
%token VOID INT INT3 COMPLEX
%token IF ELSE FOR WHILE ELIF
%token LEQU LAND LOR LLEQU
%token KERNEL PREPROCESSED
@@ -210,6 +210,8 @@ type_specifier: VOID
| VECTOR { $$ = astnode_create(NODE_TYPE_SPECIFIER, NULL, NULL); $$->token = VECTOR; }
| MATRIX { $$ = astnode_create(NODE_TYPE_SPECIFIER, NULL, NULL); $$->token = MATRIX; }
| SCALARFIELD { $$ = astnode_create(NODE_TYPE_SPECIFIER, NULL, NULL); $$->token = SCALARFIELD; }
| SCALARARRAY { $$ = astnode_create(NODE_TYPE_SPECIFIER, NULL, NULL); $$->token = SCALARARRAY; }
| COMPLEX { $$ = astnode_create(NODE_TYPE_SPECIFIER, NULL, NULL); $$->token = COMPLEX; }
;
identifier: IDENTIFIER { $$ = astnode_create(NODE_IDENTIFIER, NULL, NULL); astnode_set_buffer(yytext, $$); }

View File

@@ -61,6 +61,8 @@ static const char* translation_table[TRANSLATION_TABLE_SIZE] = {
[VECTOR] = "AcReal3",
[MATRIX] = "AcMatrix",
[SCALARFIELD] = "AcReal",
[SCALARARRAY] = "const AcReal* __restrict__",
[COMPLEX] = "acComplex",
// Type qualifiers
[KERNEL] = "template <int step_number> static __global__",
//__launch_bounds__(RK_THREADBLOCK_SIZE,
@@ -380,20 +382,13 @@ traverse(const ASTNode* node)
if (handle >= 0) { // The variable exists in the symbol table
const Symbol* symbol = &symbol_table[handle];
// if (symbol->type_qualifier == OUT) {
// printf("%s%s", inout_name_prefix, symbol->identifier);
//}
if (symbol->type_qualifier == UNIFORM) {
printf("DCONST(%s) ", symbol->identifier);
/*
if (symbol->type_specifier == SCALAR)
printf("DCONST_REAL(AC_%s) ", symbol->identifier);
else if (symbol->type_specifier == INT)
printf("DCONST_INT(AC_%s) ", symbol->identifier);
else
printf("INVALID UNIFORM type specifier %s with %s\n",
translate(symbol->type_specifier), symbol->identifier);
*/
if (inside_kernel && symbol->type_specifier == SCALARARRAY) {
printf("buffer.profiles[%s] ", symbol->identifier);
}
else {
printf("DCONST(%s) ", symbol->identifier);
}
}
else {
// Do a regular translation
@@ -613,6 +608,15 @@ generate_header(void)
}
printf("\n\n");
// Scalar arrays
printf("#define AC_FOR_SCALARARRAY_HANDLES(FUNC)");
for (int i = 0; i < num_symbols; ++i) {
if (symbol_table[i].type_specifier == SCALARARRAY) {
printf("\\\nFUNC(%s),", symbol_table[i].identifier);
}
}
printf("\n\n");
/*
printf("\n");
printf("typedef struct {\n");

View File

@@ -156,6 +156,11 @@ typedef enum {
NUM_REAL3_PARAMS
} AcReal3Param;
typedef enum {
AC_FOR_SCALARARRAY_HANDLES(AC_GEN_ID) //
NUM_SCALARARRAY_HANDLES
} ScalarArrayHandle;
typedef enum {
AC_FOR_VTXBUF_HANDLES(AC_GEN_ID) //
NUM_VTXBUF_HANDLES
@@ -166,6 +171,7 @@ extern const char* intparam_names[];
extern const char* int3param_names[];
extern const char* realparam_names[];
extern const char* real3param_names[];
extern const char* scalararray_names[];
extern const char* vtxbuf_names[];
typedef struct {

View File

@@ -22,15 +22,16 @@
#include "math_utils.h" // int3 + int3
#define AC_GEN_STR(X) #X
const char* intparam_names[] = {AC_FOR_BUILTIN_INT_PARAM_TYPES(AC_GEN_STR) //
const char* intparam_names[] = {AC_FOR_BUILTIN_INT_PARAM_TYPES(AC_GEN_STR) //
AC_FOR_USER_INT_PARAM_TYPES(AC_GEN_STR)};
const char* int3param_names[] = {AC_FOR_BUILTIN_INT3_PARAM_TYPES(AC_GEN_STR) //
const char* int3param_names[] = {AC_FOR_BUILTIN_INT3_PARAM_TYPES(AC_GEN_STR) //
AC_FOR_USER_INT3_PARAM_TYPES(AC_GEN_STR)};
const char* realparam_names[] = {AC_FOR_BUILTIN_REAL_PARAM_TYPES(AC_GEN_STR) //
const char* realparam_names[] = {AC_FOR_BUILTIN_REAL_PARAM_TYPES(AC_GEN_STR) //
AC_FOR_USER_REAL_PARAM_TYPES(AC_GEN_STR)};
const char* real3param_names[] = {AC_FOR_BUILTIN_REAL3_PARAM_TYPES(AC_GEN_STR) //
const char* real3param_names[] = {AC_FOR_BUILTIN_REAL3_PARAM_TYPES(AC_GEN_STR) //
AC_FOR_USER_REAL3_PARAM_TYPES(AC_GEN_STR)};
const char* vtxbuf_names[] = {AC_FOR_VTXBUF_HANDLES(AC_GEN_STR)};
const char* scalararray_names[] = {AC_FOR_SCALARARRAY_HANDLES(AC_GEN_STR)};
const char* vtxbuf_names[] = {AC_FOR_VTXBUF_HANDLES(AC_GEN_STR)};
#undef AC_GEN_STR
static const int num_nodes = 1;

View File

@@ -37,6 +37,8 @@
typedef struct {
AcReal* in[NUM_VTXBUF_HANDLES];
AcReal* out[NUM_VTXBUF_HANDLES];
AcReal* profiles[NUM_SCALARARRAY_HANDLES];
} VertexBufferArray;
struct device_s {
@@ -97,6 +99,32 @@ DCONST(const VertexBufferHandle handle)
//#define globalMeshN_min // Placeholder
#define d_multigpu_offset (d_mesh_info.int3_params[AC_multigpu_offset])
//#define d_multinode_offset (d_mesh_info.int3_params[AC_multinode_offset]) // Placeholder
//#include <thrust/complex.h>
// using namespace thrust;
#include <cuComplex.h>
#if AC_DOUBLE_PRECISION == 1
typedef cuDoubleComplex acComplex;
#define acComplex(x, y) make_cuDoubleComplex(x, y)
#else
typedef cuFloatComplex acComplex;
#define acComplex(x, y) make_cuFloatComplex(x, y)
#endif
static __device__ inline acComplex
exp(const acComplex& val)
{
return acComplex(exp(val.x) * cos(val.y), exp(val.x) * sin(val.y));
}
static __device__ inline acComplex operator*(const AcReal& a, const acComplex& b)
{
return (acComplex){a * b.x, a * b.y};
}
static __device__ inline acComplex operator*(const acComplex& a, const acComplex& b)
{
return (acComplex){a.x * b.x - a.y * b.y, a.x * b.y + a.y * b.x};
}
//#include <complex>
#include "kernels/boundconds.cuh"
#include "kernels/integration.cuh"
#include "kernels/reductions.cuh"
@@ -140,11 +168,21 @@ acDeviceCreate(const int id, const AcMeshInfo device_config, Device* device_hand
}
// Memory
// VBA in/out
const size_t vba_size_bytes = acVertexBufferSizeBytes(device_config);
for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) {
ERRCHK_CUDA_ALWAYS(cudaMalloc(&device->vba.in[i], vba_size_bytes));
ERRCHK_CUDA_ALWAYS(cudaMalloc(&device->vba.out[i], vba_size_bytes));
}
// VBA Profiles
const size_t profile_size_bytes = sizeof(AcReal) * max(device_config.int_params[AC_mx],
max(device_config.int_params[AC_my],
device_config.int_params[AC_mz]));
for (int i = 0; i < NUM_SCALARARRAY_HANDLES; ++i) {
ERRCHK_CUDA_ALWAYS(cudaMalloc(&device->vba.profiles[i], profile_size_bytes));
}
// Reductions
ERRCHK_CUDA_ALWAYS(
cudaMalloc(&device->reduce_scratchpad, acVertexBufferCompdomainSizeBytes(device_config)));
ERRCHK_CUDA_ALWAYS(cudaMalloc(&device->reduce_result, sizeof(AcReal)));
@@ -178,6 +216,10 @@ acDeviceDestroy(Device device)
cudaFree(device->vba.in[i]);
cudaFree(device->vba.out[i]);
}
for (int i = 0; i < NUM_SCALARARRAY_HANDLES; ++i) {
cudaFree(device->vba.profiles[i]);
}
cudaFree(device->reduce_scratchpad);
cudaFree(device->reduce_result);

View File

@@ -70,11 +70,11 @@ create_rotz(const AcReal radians)
#define cos __cosf
#define exp __expf
*/
#define sin sinf
#define cos cosf
#define exp expf
#define rsqrt rsqrtf // hardware reciprocal sqrt
#endif // AC_DOUBLE_PRECISION == 0
//#define sin sinf
//#define cos cosf
//#define exp expf
//#define rsqrt rsqrtf // hardware reciprocal sqrt
#endif // AC_DOUBLE_PRECISION == 0
/*
* =============================================================================

View File

@@ -124,6 +124,11 @@ static HOST_DEVICE_INLINE AcReal3 operator*(const AcReal& a, const AcReal3& b)
return (AcReal3){a * b.x, a * b.y, a * b.z};
}
static HOST_DEVICE_INLINE AcReal3 operator*(const AcReal3& b, const AcReal& a)
{
return (AcReal3){a * b.x, a * b.y, a * b.z};
}
static HOST_DEVICE_INLINE AcReal
dot(const AcReal3& a, const AcReal3& b)
{