From 9e57aba9b711bda2c4f4324214c1cdd9043eda97 Mon Sep 17 00:00:00 2001 From: jpekkila Date: Mon, 2 Sep 2019 21:26:57 +0300 Subject: [PATCH] New feature: ScalarArray. ScalarArrays are read-only 1D arrays containing max(mx, max(my, mz)) elements. ScalarArray is a new type of uniform and can be used for storing f.ex. forcing profiles. The DSL now also supports complex numbers and some basic arithmetic (exp, multiplication) --- acc/src/acc.l | 2 ++ acc/src/acc.y | 6 +++-- acc/src/code_generator.c | 30 +++++++++++++---------- include/astaroth_defines.h | 6 +++++ src/core/astaroth.cu | 11 +++++---- src/core/device.cu | 42 ++++++++++++++++++++++++++++++++ src/core/kernels/integration.cuh | 10 ++++---- src/core/math_utils.h | 5 ++++ 8 files changed, 87 insertions(+), 25 deletions(-) diff --git a/acc/src/acc.l b/acc/src/acc.l index 76104d8..c0eaab9 100644 --- a/acc/src/acc.l +++ b/acc/src/acc.l @@ -15,8 +15,10 @@ L [a-zA-Z_] "void" { return VOID; } /* Rest of the types inherited from C */ "int" { return INT; } "int3" { return INT3; } +"Complex" { return COMPLEX; } "ScalarField" { return SCALARFIELD; } "VectorField" { return VECTOR; } +"ScalarArray" { return SCALARARRAY; } "Kernel" { return KERNEL; } /* Function specifiers */ "Preprocessed" { return PREPROCESSED; } diff --git a/acc/src/acc.y b/acc/src/acc.y index 0bd1d19..103b802 100644 --- a/acc/src/acc.y +++ b/acc/src/acc.y @@ -16,8 +16,8 @@ int yyget_lineno(); %token CONSTANT IN OUT UNIFORM %token IDENTIFIER NUMBER %token RETURN -%token SCALAR VECTOR MATRIX SCALARFIELD -%token VOID INT INT3 +%token SCALAR VECTOR MATRIX SCALARFIELD SCALARARRAY +%token VOID INT INT3 COMPLEX %token IF ELSE FOR WHILE ELIF %token LEQU LAND LOR LLEQU %token KERNEL PREPROCESSED @@ -210,6 +210,8 @@ type_specifier: VOID | VECTOR { $$ = astnode_create(NODE_TYPE_SPECIFIER, NULL, NULL); $$->token = VECTOR; } | MATRIX { $$ = astnode_create(NODE_TYPE_SPECIFIER, NULL, NULL); $$->token = MATRIX; } | SCALARFIELD { $$ = astnode_create(NODE_TYPE_SPECIFIER, NULL, NULL); $$->token = SCALARFIELD; } + | SCALARARRAY { $$ = astnode_create(NODE_TYPE_SPECIFIER, NULL, NULL); $$->token = SCALARARRAY; } + | COMPLEX { $$ = astnode_create(NODE_TYPE_SPECIFIER, NULL, NULL); $$->token = COMPLEX; } ; identifier: IDENTIFIER { $$ = astnode_create(NODE_IDENTIFIER, NULL, NULL); astnode_set_buffer(yytext, $$); } diff --git a/acc/src/code_generator.c b/acc/src/code_generator.c index 864d2ec..b961f77 100644 --- a/acc/src/code_generator.c +++ b/acc/src/code_generator.c @@ -61,6 +61,8 @@ static const char* translation_table[TRANSLATION_TABLE_SIZE] = { [VECTOR] = "AcReal3", [MATRIX] = "AcMatrix", [SCALARFIELD] = "AcReal", + [SCALARARRAY] = "const AcReal* __restrict__", + [COMPLEX] = "acComplex", // Type qualifiers [KERNEL] = "template static __global__", //__launch_bounds__(RK_THREADBLOCK_SIZE, @@ -380,20 +382,13 @@ traverse(const ASTNode* node) if (handle >= 0) { // The variable exists in the symbol table const Symbol* symbol = &symbol_table[handle]; - // if (symbol->type_qualifier == OUT) { - // printf("%s%s", inout_name_prefix, symbol->identifier); - //} if (symbol->type_qualifier == UNIFORM) { - printf("DCONST(%s) ", symbol->identifier); - /* - if (symbol->type_specifier == SCALAR) - printf("DCONST_REAL(AC_%s) ", symbol->identifier); - else if (symbol->type_specifier == INT) - printf("DCONST_INT(AC_%s) ", symbol->identifier); - else - printf("INVALID UNIFORM type specifier %s with %s\n", - translate(symbol->type_specifier), symbol->identifier); - */ + if (inside_kernel && symbol->type_specifier == SCALARARRAY) { + printf("buffer.profiles[%s] ", symbol->identifier); + } + else { + printf("DCONST(%s) ", symbol->identifier); + } } else { // Do a regular translation @@ -613,6 +608,15 @@ generate_header(void) } printf("\n\n"); + // Scalar arrays + printf("#define AC_FOR_SCALARARRAY_HANDLES(FUNC)"); + for (int i = 0; i < num_symbols; ++i) { + if (symbol_table[i].type_specifier == SCALARARRAY) { + printf("\\\nFUNC(%s),", symbol_table[i].identifier); + } + } + printf("\n\n"); + /* printf("\n"); printf("typedef struct {\n"); diff --git a/include/astaroth_defines.h b/include/astaroth_defines.h index 58525ba..1c7ba1e 100644 --- a/include/astaroth_defines.h +++ b/include/astaroth_defines.h @@ -156,6 +156,11 @@ typedef enum { NUM_REAL3_PARAMS } AcReal3Param; +typedef enum { + AC_FOR_SCALARARRAY_HANDLES(AC_GEN_ID) // + NUM_SCALARARRAY_HANDLES +} ScalarArrayHandle; + typedef enum { AC_FOR_VTXBUF_HANDLES(AC_GEN_ID) // NUM_VTXBUF_HANDLES @@ -166,6 +171,7 @@ extern const char* intparam_names[]; extern const char* int3param_names[]; extern const char* realparam_names[]; extern const char* real3param_names[]; +extern const char* scalararray_names[]; extern const char* vtxbuf_names[]; typedef struct { diff --git a/src/core/astaroth.cu b/src/core/astaroth.cu index 11956f7..4ab34a9 100644 --- a/src/core/astaroth.cu +++ b/src/core/astaroth.cu @@ -22,15 +22,16 @@ #include "math_utils.h" // int3 + int3 #define AC_GEN_STR(X) #X -const char* intparam_names[] = {AC_FOR_BUILTIN_INT_PARAM_TYPES(AC_GEN_STR) // +const char* intparam_names[] = {AC_FOR_BUILTIN_INT_PARAM_TYPES(AC_GEN_STR) // AC_FOR_USER_INT_PARAM_TYPES(AC_GEN_STR)}; -const char* int3param_names[] = {AC_FOR_BUILTIN_INT3_PARAM_TYPES(AC_GEN_STR) // +const char* int3param_names[] = {AC_FOR_BUILTIN_INT3_PARAM_TYPES(AC_GEN_STR) // AC_FOR_USER_INT3_PARAM_TYPES(AC_GEN_STR)}; -const char* realparam_names[] = {AC_FOR_BUILTIN_REAL_PARAM_TYPES(AC_GEN_STR) // +const char* realparam_names[] = {AC_FOR_BUILTIN_REAL_PARAM_TYPES(AC_GEN_STR) // AC_FOR_USER_REAL_PARAM_TYPES(AC_GEN_STR)}; -const char* real3param_names[] = {AC_FOR_BUILTIN_REAL3_PARAM_TYPES(AC_GEN_STR) // +const char* real3param_names[] = {AC_FOR_BUILTIN_REAL3_PARAM_TYPES(AC_GEN_STR) // AC_FOR_USER_REAL3_PARAM_TYPES(AC_GEN_STR)}; -const char* vtxbuf_names[] = {AC_FOR_VTXBUF_HANDLES(AC_GEN_STR)}; +const char* scalararray_names[] = {AC_FOR_SCALARARRAY_HANDLES(AC_GEN_STR)}; +const char* vtxbuf_names[] = {AC_FOR_VTXBUF_HANDLES(AC_GEN_STR)}; #undef AC_GEN_STR static const int num_nodes = 1; diff --git a/src/core/device.cu b/src/core/device.cu index 78e82d7..1faaa5e 100644 --- a/src/core/device.cu +++ b/src/core/device.cu @@ -37,6 +37,8 @@ typedef struct { AcReal* in[NUM_VTXBUF_HANDLES]; AcReal* out[NUM_VTXBUF_HANDLES]; + + AcReal* profiles[NUM_SCALARARRAY_HANDLES]; } VertexBufferArray; struct device_s { @@ -97,6 +99,32 @@ DCONST(const VertexBufferHandle handle) //#define globalMeshN_min // Placeholder #define d_multigpu_offset (d_mesh_info.int3_params[AC_multigpu_offset]) //#define d_multinode_offset (d_mesh_info.int3_params[AC_multinode_offset]) // Placeholder +//#include +// using namespace thrust; +#include +#if AC_DOUBLE_PRECISION == 1 +typedef cuDoubleComplex acComplex; +#define acComplex(x, y) make_cuDoubleComplex(x, y) +#else +typedef cuFloatComplex acComplex; +#define acComplex(x, y) make_cuFloatComplex(x, y) +#endif +static __device__ inline acComplex +exp(const acComplex& val) +{ + return acComplex(exp(val.x) * cos(val.y), exp(val.x) * sin(val.y)); +} +static __device__ inline acComplex operator*(const AcReal& a, const acComplex& b) +{ + return (acComplex){a * b.x, a * b.y}; +} + +static __device__ inline acComplex operator*(const acComplex& a, const acComplex& b) +{ + return (acComplex){a.x * b.x - a.y * b.y, a.x * b.y + a.y * b.x}; +} +//#include + #include "kernels/boundconds.cuh" #include "kernels/integration.cuh" #include "kernels/reductions.cuh" @@ -140,11 +168,21 @@ acDeviceCreate(const int id, const AcMeshInfo device_config, Device* device_hand } // Memory + // VBA in/out const size_t vba_size_bytes = acVertexBufferSizeBytes(device_config); for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) { ERRCHK_CUDA_ALWAYS(cudaMalloc(&device->vba.in[i], vba_size_bytes)); ERRCHK_CUDA_ALWAYS(cudaMalloc(&device->vba.out[i], vba_size_bytes)); } + // VBA Profiles + const size_t profile_size_bytes = sizeof(AcReal) * max(device_config.int_params[AC_mx], + max(device_config.int_params[AC_my], + device_config.int_params[AC_mz])); + for (int i = 0; i < NUM_SCALARARRAY_HANDLES; ++i) { + ERRCHK_CUDA_ALWAYS(cudaMalloc(&device->vba.profiles[i], profile_size_bytes)); + } + + // Reductions ERRCHK_CUDA_ALWAYS( cudaMalloc(&device->reduce_scratchpad, acVertexBufferCompdomainSizeBytes(device_config))); ERRCHK_CUDA_ALWAYS(cudaMalloc(&device->reduce_result, sizeof(AcReal))); @@ -178,6 +216,10 @@ acDeviceDestroy(Device device) cudaFree(device->vba.in[i]); cudaFree(device->vba.out[i]); } + for (int i = 0; i < NUM_SCALARARRAY_HANDLES; ++i) { + cudaFree(device->vba.profiles[i]); + } + cudaFree(device->reduce_scratchpad); cudaFree(device->reduce_result); diff --git a/src/core/kernels/integration.cuh b/src/core/kernels/integration.cuh index 0a06c2f..e1c723a 100644 --- a/src/core/kernels/integration.cuh +++ b/src/core/kernels/integration.cuh @@ -70,11 +70,11 @@ create_rotz(const AcReal radians) #define cos __cosf #define exp __expf */ -#define sin sinf -#define cos cosf -#define exp expf -#define rsqrt rsqrtf // hardware reciprocal sqrt -#endif // AC_DOUBLE_PRECISION == 0 +//#define sin sinf +//#define cos cosf +//#define exp expf +//#define rsqrt rsqrtf // hardware reciprocal sqrt +#endif // AC_DOUBLE_PRECISION == 0 /* * ============================================================================= diff --git a/src/core/math_utils.h b/src/core/math_utils.h index 4d41e4e..a7ea2e2 100644 --- a/src/core/math_utils.h +++ b/src/core/math_utils.h @@ -124,6 +124,11 @@ static HOST_DEVICE_INLINE AcReal3 operator*(const AcReal& a, const AcReal3& b) return (AcReal3){a * b.x, a * b.y, a * b.z}; } +static HOST_DEVICE_INLINE AcReal3 operator*(const AcReal3& b, const AcReal& a) +{ + return (AcReal3){a * b.x, a * b.y, a * b.z}; +} + static HOST_DEVICE_INLINE AcReal dot(const AcReal3& a, const AcReal3& b) {