New feature: ScalarArray. ScalarArrays are read-only 1D arrays containing max(mx, max(my, mz)) elements. ScalarArray is a new type of uniform and can be used for storing f.ex. forcing profiles. The DSL now also supports complex numbers and some basic arithmetic (exp, multiplication)

This commit is contained in:
jpekkila
2019-09-02 21:26:57 +03:00
parent 18df9e5579
commit 9e57aba9b7
8 changed files with 87 additions and 25 deletions

View File

@@ -15,8 +15,10 @@ L [a-zA-Z_]
"void" { return VOID; } /* Rest of the types inherited from C */ "void" { return VOID; } /* Rest of the types inherited from C */
"int" { return INT; } "int" { return INT; }
"int3" { return INT3; } "int3" { return INT3; }
"Complex" { return COMPLEX; }
"ScalarField" { return SCALARFIELD; } "ScalarField" { return SCALARFIELD; }
"VectorField" { return VECTOR; } "VectorField" { return VECTOR; }
"ScalarArray" { return SCALARARRAY; }
"Kernel" { return KERNEL; } /* Function specifiers */ "Kernel" { return KERNEL; } /* Function specifiers */
"Preprocessed" { return PREPROCESSED; } "Preprocessed" { return PREPROCESSED; }

View File

@@ -16,8 +16,8 @@ int yyget_lineno();
%token CONSTANT IN OUT UNIFORM %token CONSTANT IN OUT UNIFORM
%token IDENTIFIER NUMBER %token IDENTIFIER NUMBER
%token RETURN %token RETURN
%token SCALAR VECTOR MATRIX SCALARFIELD %token SCALAR VECTOR MATRIX SCALARFIELD SCALARARRAY
%token VOID INT INT3 %token VOID INT INT3 COMPLEX
%token IF ELSE FOR WHILE ELIF %token IF ELSE FOR WHILE ELIF
%token LEQU LAND LOR LLEQU %token LEQU LAND LOR LLEQU
%token KERNEL PREPROCESSED %token KERNEL PREPROCESSED
@@ -210,6 +210,8 @@ type_specifier: VOID
| VECTOR { $$ = astnode_create(NODE_TYPE_SPECIFIER, NULL, NULL); $$->token = VECTOR; } | VECTOR { $$ = astnode_create(NODE_TYPE_SPECIFIER, NULL, NULL); $$->token = VECTOR; }
| MATRIX { $$ = astnode_create(NODE_TYPE_SPECIFIER, NULL, NULL); $$->token = MATRIX; } | MATRIX { $$ = astnode_create(NODE_TYPE_SPECIFIER, NULL, NULL); $$->token = MATRIX; }
| SCALARFIELD { $$ = astnode_create(NODE_TYPE_SPECIFIER, NULL, NULL); $$->token = SCALARFIELD; } | SCALARFIELD { $$ = astnode_create(NODE_TYPE_SPECIFIER, NULL, NULL); $$->token = SCALARFIELD; }
| SCALARARRAY { $$ = astnode_create(NODE_TYPE_SPECIFIER, NULL, NULL); $$->token = SCALARARRAY; }
| COMPLEX { $$ = astnode_create(NODE_TYPE_SPECIFIER, NULL, NULL); $$->token = COMPLEX; }
; ;
identifier: IDENTIFIER { $$ = astnode_create(NODE_IDENTIFIER, NULL, NULL); astnode_set_buffer(yytext, $$); } identifier: IDENTIFIER { $$ = astnode_create(NODE_IDENTIFIER, NULL, NULL); astnode_set_buffer(yytext, $$); }

View File

@@ -61,6 +61,8 @@ static const char* translation_table[TRANSLATION_TABLE_SIZE] = {
[VECTOR] = "AcReal3", [VECTOR] = "AcReal3",
[MATRIX] = "AcMatrix", [MATRIX] = "AcMatrix",
[SCALARFIELD] = "AcReal", [SCALARFIELD] = "AcReal",
[SCALARARRAY] = "const AcReal* __restrict__",
[COMPLEX] = "acComplex",
// Type qualifiers // Type qualifiers
[KERNEL] = "template <int step_number> static __global__", [KERNEL] = "template <int step_number> static __global__",
//__launch_bounds__(RK_THREADBLOCK_SIZE, //__launch_bounds__(RK_THREADBLOCK_SIZE,
@@ -380,20 +382,13 @@ traverse(const ASTNode* node)
if (handle >= 0) { // The variable exists in the symbol table if (handle >= 0) { // The variable exists in the symbol table
const Symbol* symbol = &symbol_table[handle]; const Symbol* symbol = &symbol_table[handle];
// if (symbol->type_qualifier == OUT) {
// printf("%s%s", inout_name_prefix, symbol->identifier);
//}
if (symbol->type_qualifier == UNIFORM) { if (symbol->type_qualifier == UNIFORM) {
printf("DCONST(%s) ", symbol->identifier); if (inside_kernel && symbol->type_specifier == SCALARARRAY) {
/* printf("buffer.profiles[%s] ", symbol->identifier);
if (symbol->type_specifier == SCALAR) }
printf("DCONST_REAL(AC_%s) ", symbol->identifier); else {
else if (symbol->type_specifier == INT) printf("DCONST(%s) ", symbol->identifier);
printf("DCONST_INT(AC_%s) ", symbol->identifier); }
else
printf("INVALID UNIFORM type specifier %s with %s\n",
translate(symbol->type_specifier), symbol->identifier);
*/
} }
else { else {
// Do a regular translation // Do a regular translation
@@ -613,6 +608,15 @@ generate_header(void)
} }
printf("\n\n"); printf("\n\n");
// Scalar arrays
printf("#define AC_FOR_SCALARARRAY_HANDLES(FUNC)");
for (int i = 0; i < num_symbols; ++i) {
if (symbol_table[i].type_specifier == SCALARARRAY) {
printf("\\\nFUNC(%s),", symbol_table[i].identifier);
}
}
printf("\n\n");
/* /*
printf("\n"); printf("\n");
printf("typedef struct {\n"); printf("typedef struct {\n");

View File

@@ -156,6 +156,11 @@ typedef enum {
NUM_REAL3_PARAMS NUM_REAL3_PARAMS
} AcReal3Param; } AcReal3Param;
typedef enum {
AC_FOR_SCALARARRAY_HANDLES(AC_GEN_ID) //
NUM_SCALARARRAY_HANDLES
} ScalarArrayHandle;
typedef enum { typedef enum {
AC_FOR_VTXBUF_HANDLES(AC_GEN_ID) // AC_FOR_VTXBUF_HANDLES(AC_GEN_ID) //
NUM_VTXBUF_HANDLES NUM_VTXBUF_HANDLES
@@ -166,6 +171,7 @@ extern const char* intparam_names[];
extern const char* int3param_names[]; extern const char* int3param_names[];
extern const char* realparam_names[]; extern const char* realparam_names[];
extern const char* real3param_names[]; extern const char* real3param_names[];
extern const char* scalararray_names[];
extern const char* vtxbuf_names[]; extern const char* vtxbuf_names[];
typedef struct { typedef struct {

View File

@@ -22,15 +22,16 @@
#include "math_utils.h" // int3 + int3 #include "math_utils.h" // int3 + int3
#define AC_GEN_STR(X) #X #define AC_GEN_STR(X) #X
const char* intparam_names[] = {AC_FOR_BUILTIN_INT_PARAM_TYPES(AC_GEN_STR) // const char* intparam_names[] = {AC_FOR_BUILTIN_INT_PARAM_TYPES(AC_GEN_STR) //
AC_FOR_USER_INT_PARAM_TYPES(AC_GEN_STR)}; AC_FOR_USER_INT_PARAM_TYPES(AC_GEN_STR)};
const char* int3param_names[] = {AC_FOR_BUILTIN_INT3_PARAM_TYPES(AC_GEN_STR) // const char* int3param_names[] = {AC_FOR_BUILTIN_INT3_PARAM_TYPES(AC_GEN_STR) //
AC_FOR_USER_INT3_PARAM_TYPES(AC_GEN_STR)}; AC_FOR_USER_INT3_PARAM_TYPES(AC_GEN_STR)};
const char* realparam_names[] = {AC_FOR_BUILTIN_REAL_PARAM_TYPES(AC_GEN_STR) // const char* realparam_names[] = {AC_FOR_BUILTIN_REAL_PARAM_TYPES(AC_GEN_STR) //
AC_FOR_USER_REAL_PARAM_TYPES(AC_GEN_STR)}; AC_FOR_USER_REAL_PARAM_TYPES(AC_GEN_STR)};
const char* real3param_names[] = {AC_FOR_BUILTIN_REAL3_PARAM_TYPES(AC_GEN_STR) // const char* real3param_names[] = {AC_FOR_BUILTIN_REAL3_PARAM_TYPES(AC_GEN_STR) //
AC_FOR_USER_REAL3_PARAM_TYPES(AC_GEN_STR)}; AC_FOR_USER_REAL3_PARAM_TYPES(AC_GEN_STR)};
const char* vtxbuf_names[] = {AC_FOR_VTXBUF_HANDLES(AC_GEN_STR)}; const char* scalararray_names[] = {AC_FOR_SCALARARRAY_HANDLES(AC_GEN_STR)};
const char* vtxbuf_names[] = {AC_FOR_VTXBUF_HANDLES(AC_GEN_STR)};
#undef AC_GEN_STR #undef AC_GEN_STR
static const int num_nodes = 1; static const int num_nodes = 1;

View File

@@ -37,6 +37,8 @@
typedef struct { typedef struct {
AcReal* in[NUM_VTXBUF_HANDLES]; AcReal* in[NUM_VTXBUF_HANDLES];
AcReal* out[NUM_VTXBUF_HANDLES]; AcReal* out[NUM_VTXBUF_HANDLES];
AcReal* profiles[NUM_SCALARARRAY_HANDLES];
} VertexBufferArray; } VertexBufferArray;
struct device_s { struct device_s {
@@ -97,6 +99,32 @@ DCONST(const VertexBufferHandle handle)
//#define globalMeshN_min // Placeholder //#define globalMeshN_min // Placeholder
#define d_multigpu_offset (d_mesh_info.int3_params[AC_multigpu_offset]) #define d_multigpu_offset (d_mesh_info.int3_params[AC_multigpu_offset])
//#define d_multinode_offset (d_mesh_info.int3_params[AC_multinode_offset]) // Placeholder //#define d_multinode_offset (d_mesh_info.int3_params[AC_multinode_offset]) // Placeholder
//#include <thrust/complex.h>
// using namespace thrust;
#include <cuComplex.h>
#if AC_DOUBLE_PRECISION == 1
typedef cuDoubleComplex acComplex;
#define acComplex(x, y) make_cuDoubleComplex(x, y)
#else
typedef cuFloatComplex acComplex;
#define acComplex(x, y) make_cuFloatComplex(x, y)
#endif
static __device__ inline acComplex
exp(const acComplex& val)
{
return acComplex(exp(val.x) * cos(val.y), exp(val.x) * sin(val.y));
}
static __device__ inline acComplex operator*(const AcReal& a, const acComplex& b)
{
return (acComplex){a * b.x, a * b.y};
}
static __device__ inline acComplex operator*(const acComplex& a, const acComplex& b)
{
return (acComplex){a.x * b.x - a.y * b.y, a.x * b.y + a.y * b.x};
}
//#include <complex>
#include "kernels/boundconds.cuh" #include "kernels/boundconds.cuh"
#include "kernels/integration.cuh" #include "kernels/integration.cuh"
#include "kernels/reductions.cuh" #include "kernels/reductions.cuh"
@@ -140,11 +168,21 @@ acDeviceCreate(const int id, const AcMeshInfo device_config, Device* device_hand
} }
// Memory // Memory
// VBA in/out
const size_t vba_size_bytes = acVertexBufferSizeBytes(device_config); const size_t vba_size_bytes = acVertexBufferSizeBytes(device_config);
for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) { for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) {
ERRCHK_CUDA_ALWAYS(cudaMalloc(&device->vba.in[i], vba_size_bytes)); ERRCHK_CUDA_ALWAYS(cudaMalloc(&device->vba.in[i], vba_size_bytes));
ERRCHK_CUDA_ALWAYS(cudaMalloc(&device->vba.out[i], vba_size_bytes)); ERRCHK_CUDA_ALWAYS(cudaMalloc(&device->vba.out[i], vba_size_bytes));
} }
// VBA Profiles
const size_t profile_size_bytes = sizeof(AcReal) * max(device_config.int_params[AC_mx],
max(device_config.int_params[AC_my],
device_config.int_params[AC_mz]));
for (int i = 0; i < NUM_SCALARARRAY_HANDLES; ++i) {
ERRCHK_CUDA_ALWAYS(cudaMalloc(&device->vba.profiles[i], profile_size_bytes));
}
// Reductions
ERRCHK_CUDA_ALWAYS( ERRCHK_CUDA_ALWAYS(
cudaMalloc(&device->reduce_scratchpad, acVertexBufferCompdomainSizeBytes(device_config))); cudaMalloc(&device->reduce_scratchpad, acVertexBufferCompdomainSizeBytes(device_config)));
ERRCHK_CUDA_ALWAYS(cudaMalloc(&device->reduce_result, sizeof(AcReal))); ERRCHK_CUDA_ALWAYS(cudaMalloc(&device->reduce_result, sizeof(AcReal)));
@@ -178,6 +216,10 @@ acDeviceDestroy(Device device)
cudaFree(device->vba.in[i]); cudaFree(device->vba.in[i]);
cudaFree(device->vba.out[i]); cudaFree(device->vba.out[i]);
} }
for (int i = 0; i < NUM_SCALARARRAY_HANDLES; ++i) {
cudaFree(device->vba.profiles[i]);
}
cudaFree(device->reduce_scratchpad); cudaFree(device->reduce_scratchpad);
cudaFree(device->reduce_result); cudaFree(device->reduce_result);

View File

@@ -70,11 +70,11 @@ create_rotz(const AcReal radians)
#define cos __cosf #define cos __cosf
#define exp __expf #define exp __expf
*/ */
#define sin sinf //#define sin sinf
#define cos cosf //#define cos cosf
#define exp expf //#define exp expf
#define rsqrt rsqrtf // hardware reciprocal sqrt //#define rsqrt rsqrtf // hardware reciprocal sqrt
#endif // AC_DOUBLE_PRECISION == 0 #endif // AC_DOUBLE_PRECISION == 0
/* /*
* ============================================================================= * =============================================================================

View File

@@ -124,6 +124,11 @@ static HOST_DEVICE_INLINE AcReal3 operator*(const AcReal& a, const AcReal3& b)
return (AcReal3){a * b.x, a * b.y, a * b.z}; return (AcReal3){a * b.x, a * b.y, a * b.z};
} }
static HOST_DEVICE_INLINE AcReal3 operator*(const AcReal3& b, const AcReal& a)
{
return (AcReal3){a * b.x, a * b.y, a * b.z};
}
static HOST_DEVICE_INLINE AcReal static HOST_DEVICE_INLINE AcReal
dot(const AcReal3& a, const AcReal3& b) dot(const AcReal3& a, const AcReal3& b)
{ {