New feature: ScalarArray. ScalarArrays are read-only 1D arrays containing max(mx, max(my, mz)) elements. ScalarArray is a new type of uniform and can be used for storing f.ex. forcing profiles. The DSL now also supports complex numbers and some basic arithmetic (exp, multiplication)
This commit is contained in:
@@ -15,8 +15,10 @@ L [a-zA-Z_]
|
||||
"void" { return VOID; } /* Rest of the types inherited from C */
|
||||
"int" { return INT; }
|
||||
"int3" { return INT3; }
|
||||
"Complex" { return COMPLEX; }
|
||||
"ScalarField" { return SCALARFIELD; }
|
||||
"VectorField" { return VECTOR; }
|
||||
"ScalarArray" { return SCALARARRAY; }
|
||||
|
||||
"Kernel" { return KERNEL; } /* Function specifiers */
|
||||
"Preprocessed" { return PREPROCESSED; }
|
||||
|
@@ -16,8 +16,8 @@ int yyget_lineno();
|
||||
%token CONSTANT IN OUT UNIFORM
|
||||
%token IDENTIFIER NUMBER
|
||||
%token RETURN
|
||||
%token SCALAR VECTOR MATRIX SCALARFIELD
|
||||
%token VOID INT INT3
|
||||
%token SCALAR VECTOR MATRIX SCALARFIELD SCALARARRAY
|
||||
%token VOID INT INT3 COMPLEX
|
||||
%token IF ELSE FOR WHILE ELIF
|
||||
%token LEQU LAND LOR LLEQU
|
||||
%token KERNEL PREPROCESSED
|
||||
@@ -210,6 +210,8 @@ type_specifier: VOID
|
||||
| VECTOR { $$ = astnode_create(NODE_TYPE_SPECIFIER, NULL, NULL); $$->token = VECTOR; }
|
||||
| MATRIX { $$ = astnode_create(NODE_TYPE_SPECIFIER, NULL, NULL); $$->token = MATRIX; }
|
||||
| SCALARFIELD { $$ = astnode_create(NODE_TYPE_SPECIFIER, NULL, NULL); $$->token = SCALARFIELD; }
|
||||
| SCALARARRAY { $$ = astnode_create(NODE_TYPE_SPECIFIER, NULL, NULL); $$->token = SCALARARRAY; }
|
||||
| COMPLEX { $$ = astnode_create(NODE_TYPE_SPECIFIER, NULL, NULL); $$->token = COMPLEX; }
|
||||
;
|
||||
|
||||
identifier: IDENTIFIER { $$ = astnode_create(NODE_IDENTIFIER, NULL, NULL); astnode_set_buffer(yytext, $$); }
|
||||
|
@@ -61,6 +61,8 @@ static const char* translation_table[TRANSLATION_TABLE_SIZE] = {
|
||||
[VECTOR] = "AcReal3",
|
||||
[MATRIX] = "AcMatrix",
|
||||
[SCALARFIELD] = "AcReal",
|
||||
[SCALARARRAY] = "const AcReal* __restrict__",
|
||||
[COMPLEX] = "acComplex",
|
||||
// Type qualifiers
|
||||
[KERNEL] = "template <int step_number> static __global__",
|
||||
//__launch_bounds__(RK_THREADBLOCK_SIZE,
|
||||
@@ -380,20 +382,13 @@ traverse(const ASTNode* node)
|
||||
if (handle >= 0) { // The variable exists in the symbol table
|
||||
const Symbol* symbol = &symbol_table[handle];
|
||||
|
||||
// if (symbol->type_qualifier == OUT) {
|
||||
// printf("%s%s", inout_name_prefix, symbol->identifier);
|
||||
//}
|
||||
if (symbol->type_qualifier == UNIFORM) {
|
||||
printf("DCONST(%s) ", symbol->identifier);
|
||||
/*
|
||||
if (symbol->type_specifier == SCALAR)
|
||||
printf("DCONST_REAL(AC_%s) ", symbol->identifier);
|
||||
else if (symbol->type_specifier == INT)
|
||||
printf("DCONST_INT(AC_%s) ", symbol->identifier);
|
||||
else
|
||||
printf("INVALID UNIFORM type specifier %s with %s\n",
|
||||
translate(symbol->type_specifier), symbol->identifier);
|
||||
*/
|
||||
if (inside_kernel && symbol->type_specifier == SCALARARRAY) {
|
||||
printf("buffer.profiles[%s] ", symbol->identifier);
|
||||
}
|
||||
else {
|
||||
printf("DCONST(%s) ", symbol->identifier);
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Do a regular translation
|
||||
@@ -613,6 +608,15 @@ generate_header(void)
|
||||
}
|
||||
printf("\n\n");
|
||||
|
||||
// Scalar arrays
|
||||
printf("#define AC_FOR_SCALARARRAY_HANDLES(FUNC)");
|
||||
for (int i = 0; i < num_symbols; ++i) {
|
||||
if (symbol_table[i].type_specifier == SCALARARRAY) {
|
||||
printf("\\\nFUNC(%s),", symbol_table[i].identifier);
|
||||
}
|
||||
}
|
||||
printf("\n\n");
|
||||
|
||||
/*
|
||||
printf("\n");
|
||||
printf("typedef struct {\n");
|
||||
|
@@ -156,6 +156,11 @@ typedef enum {
|
||||
NUM_REAL3_PARAMS
|
||||
} AcReal3Param;
|
||||
|
||||
typedef enum {
|
||||
AC_FOR_SCALARARRAY_HANDLES(AC_GEN_ID) //
|
||||
NUM_SCALARARRAY_HANDLES
|
||||
} ScalarArrayHandle;
|
||||
|
||||
typedef enum {
|
||||
AC_FOR_VTXBUF_HANDLES(AC_GEN_ID) //
|
||||
NUM_VTXBUF_HANDLES
|
||||
@@ -166,6 +171,7 @@ extern const char* intparam_names[];
|
||||
extern const char* int3param_names[];
|
||||
extern const char* realparam_names[];
|
||||
extern const char* real3param_names[];
|
||||
extern const char* scalararray_names[];
|
||||
extern const char* vtxbuf_names[];
|
||||
|
||||
typedef struct {
|
||||
|
@@ -22,15 +22,16 @@
|
||||
#include "math_utils.h" // int3 + int3
|
||||
|
||||
#define AC_GEN_STR(X) #X
|
||||
const char* intparam_names[] = {AC_FOR_BUILTIN_INT_PARAM_TYPES(AC_GEN_STR) //
|
||||
const char* intparam_names[] = {AC_FOR_BUILTIN_INT_PARAM_TYPES(AC_GEN_STR) //
|
||||
AC_FOR_USER_INT_PARAM_TYPES(AC_GEN_STR)};
|
||||
const char* int3param_names[] = {AC_FOR_BUILTIN_INT3_PARAM_TYPES(AC_GEN_STR) //
|
||||
const char* int3param_names[] = {AC_FOR_BUILTIN_INT3_PARAM_TYPES(AC_GEN_STR) //
|
||||
AC_FOR_USER_INT3_PARAM_TYPES(AC_GEN_STR)};
|
||||
const char* realparam_names[] = {AC_FOR_BUILTIN_REAL_PARAM_TYPES(AC_GEN_STR) //
|
||||
const char* realparam_names[] = {AC_FOR_BUILTIN_REAL_PARAM_TYPES(AC_GEN_STR) //
|
||||
AC_FOR_USER_REAL_PARAM_TYPES(AC_GEN_STR)};
|
||||
const char* real3param_names[] = {AC_FOR_BUILTIN_REAL3_PARAM_TYPES(AC_GEN_STR) //
|
||||
const char* real3param_names[] = {AC_FOR_BUILTIN_REAL3_PARAM_TYPES(AC_GEN_STR) //
|
||||
AC_FOR_USER_REAL3_PARAM_TYPES(AC_GEN_STR)};
|
||||
const char* vtxbuf_names[] = {AC_FOR_VTXBUF_HANDLES(AC_GEN_STR)};
|
||||
const char* scalararray_names[] = {AC_FOR_SCALARARRAY_HANDLES(AC_GEN_STR)};
|
||||
const char* vtxbuf_names[] = {AC_FOR_VTXBUF_HANDLES(AC_GEN_STR)};
|
||||
#undef AC_GEN_STR
|
||||
|
||||
static const int num_nodes = 1;
|
||||
|
@@ -37,6 +37,8 @@
|
||||
typedef struct {
|
||||
AcReal* in[NUM_VTXBUF_HANDLES];
|
||||
AcReal* out[NUM_VTXBUF_HANDLES];
|
||||
|
||||
AcReal* profiles[NUM_SCALARARRAY_HANDLES];
|
||||
} VertexBufferArray;
|
||||
|
||||
struct device_s {
|
||||
@@ -97,6 +99,32 @@ DCONST(const VertexBufferHandle handle)
|
||||
//#define globalMeshN_min // Placeholder
|
||||
#define d_multigpu_offset (d_mesh_info.int3_params[AC_multigpu_offset])
|
||||
//#define d_multinode_offset (d_mesh_info.int3_params[AC_multinode_offset]) // Placeholder
|
||||
//#include <thrust/complex.h>
|
||||
// using namespace thrust;
|
||||
#include <cuComplex.h>
|
||||
#if AC_DOUBLE_PRECISION == 1
|
||||
typedef cuDoubleComplex acComplex;
|
||||
#define acComplex(x, y) make_cuDoubleComplex(x, y)
|
||||
#else
|
||||
typedef cuFloatComplex acComplex;
|
||||
#define acComplex(x, y) make_cuFloatComplex(x, y)
|
||||
#endif
|
||||
static __device__ inline acComplex
|
||||
exp(const acComplex& val)
|
||||
{
|
||||
return acComplex(exp(val.x) * cos(val.y), exp(val.x) * sin(val.y));
|
||||
}
|
||||
static __device__ inline acComplex operator*(const AcReal& a, const acComplex& b)
|
||||
{
|
||||
return (acComplex){a * b.x, a * b.y};
|
||||
}
|
||||
|
||||
static __device__ inline acComplex operator*(const acComplex& a, const acComplex& b)
|
||||
{
|
||||
return (acComplex){a.x * b.x - a.y * b.y, a.x * b.y + a.y * b.x};
|
||||
}
|
||||
//#include <complex>
|
||||
|
||||
#include "kernels/boundconds.cuh"
|
||||
#include "kernels/integration.cuh"
|
||||
#include "kernels/reductions.cuh"
|
||||
@@ -140,11 +168,21 @@ acDeviceCreate(const int id, const AcMeshInfo device_config, Device* device_hand
|
||||
}
|
||||
|
||||
// Memory
|
||||
// VBA in/out
|
||||
const size_t vba_size_bytes = acVertexBufferSizeBytes(device_config);
|
||||
for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) {
|
||||
ERRCHK_CUDA_ALWAYS(cudaMalloc(&device->vba.in[i], vba_size_bytes));
|
||||
ERRCHK_CUDA_ALWAYS(cudaMalloc(&device->vba.out[i], vba_size_bytes));
|
||||
}
|
||||
// VBA Profiles
|
||||
const size_t profile_size_bytes = sizeof(AcReal) * max(device_config.int_params[AC_mx],
|
||||
max(device_config.int_params[AC_my],
|
||||
device_config.int_params[AC_mz]));
|
||||
for (int i = 0; i < NUM_SCALARARRAY_HANDLES; ++i) {
|
||||
ERRCHK_CUDA_ALWAYS(cudaMalloc(&device->vba.profiles[i], profile_size_bytes));
|
||||
}
|
||||
|
||||
// Reductions
|
||||
ERRCHK_CUDA_ALWAYS(
|
||||
cudaMalloc(&device->reduce_scratchpad, acVertexBufferCompdomainSizeBytes(device_config)));
|
||||
ERRCHK_CUDA_ALWAYS(cudaMalloc(&device->reduce_result, sizeof(AcReal)));
|
||||
@@ -178,6 +216,10 @@ acDeviceDestroy(Device device)
|
||||
cudaFree(device->vba.in[i]);
|
||||
cudaFree(device->vba.out[i]);
|
||||
}
|
||||
for (int i = 0; i < NUM_SCALARARRAY_HANDLES; ++i) {
|
||||
cudaFree(device->vba.profiles[i]);
|
||||
}
|
||||
|
||||
cudaFree(device->reduce_scratchpad);
|
||||
cudaFree(device->reduce_result);
|
||||
|
||||
|
@@ -70,11 +70,11 @@ create_rotz(const AcReal radians)
|
||||
#define cos __cosf
|
||||
#define exp __expf
|
||||
*/
|
||||
#define sin sinf
|
||||
#define cos cosf
|
||||
#define exp expf
|
||||
#define rsqrt rsqrtf // hardware reciprocal sqrt
|
||||
#endif // AC_DOUBLE_PRECISION == 0
|
||||
//#define sin sinf
|
||||
//#define cos cosf
|
||||
//#define exp expf
|
||||
//#define rsqrt rsqrtf // hardware reciprocal sqrt
|
||||
#endif // AC_DOUBLE_PRECISION == 0
|
||||
|
||||
/*
|
||||
* =============================================================================
|
||||
|
@@ -124,6 +124,11 @@ static HOST_DEVICE_INLINE AcReal3 operator*(const AcReal& a, const AcReal3& b)
|
||||
return (AcReal3){a * b.x, a * b.y, a * b.z};
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE AcReal3 operator*(const AcReal3& b, const AcReal& a)
|
||||
{
|
||||
return (AcReal3){a * b.x, a * b.y, a * b.z};
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE AcReal
|
||||
dot(const AcReal3& a, const AcReal3& b)
|
||||
{
|
||||
|
Reference in New Issue
Block a user