From 0ccd4e3dbce47bc2d3d02d5c8d6efe2311c05070 Mon Sep 17 00:00:00 2001 From: jpekkila Date: Tue, 28 Jan 2020 18:17:31 +0200 Subject: [PATCH] Major improvement: uniforms can now be set to default values. The syntax is the same as for setting any other values, f.ex. 'uniform Scalar a = 1; uniform Scalar b = 0.5 * a;'. Undefined uniforms are still allowed, but in this case the user should load a proper value into it during runtime. Default uniform values can be overwritten by calling any of the uniform loader funcions (like acDeviceLoadScalarUniform). Improved also error checking. Now there are explicit warnings if the user tries to load an invalid value into a device constant. --- acc/mhd_solver/stencil_kernel.ac | 4 +- acc/src/code_generator.c | 81 +++++++++----- acc/stdlib/stdderiv.h | 12 +-- include/astaroth.h | 10 +- samples/ctest/main.c | 5 - src/common/errchk.h | 5 + src/core/device.cc | 3 +- src/core/kernels/kernels.cu | 177 +++++++++++++++++++++---------- src/core/kernels/kernels.h | 15 --- 9 files changed, 197 insertions(+), 115 deletions(-) diff --git a/acc/mhd_solver/stencil_kernel.ac b/acc/mhd_solver/stencil_kernel.ac index f1c5cf4..905eb65 100644 --- a/acc/mhd_solver/stencil_kernel.ac +++ b/acc/mhd_solver/stencil_kernel.ac @@ -56,7 +56,7 @@ uniform Scalar AC_cdt; uniform Scalar AC_cdtv; uniform Scalar AC_cdts; uniform Scalar AC_nu_visc; -uniform Scalar AC_cs_sound; +uniform Scalar AC_cs_sound = 1.0; uniform Scalar AC_eta; uniform Scalar AC_mu0; uniform Scalar AC_cp_sound; @@ -96,7 +96,7 @@ uniform Scalar AC_G_const; uniform Scalar AC_GM_star; uniform Scalar AC_unit_mass; uniform Scalar AC_sq2GM_star; -uniform Scalar AC_cs2_sound; +uniform Scalar AC_cs2_sound = AC_cs_sound * AC_cs_sound; /* * ============================================================================= diff --git a/acc/src/code_generator.c b/acc/src/code_generator.c index 05c8fc1..f69d186 100644 --- a/acc/src/code_generator.c +++ b/acc/src/code_generator.c @@ -224,6 +224,7 @@ print_symbol_table(void) */ static bool inside_declaration = false; static bool inside_kernel = false; +static bool inside_function = false; /* * ============================================================================= @@ -246,6 +247,8 @@ traverse(const ASTNode* node) } if (node->type == NODE_DECLARATION) inside_declaration = true; + if (node->type == NODE_FUNCTION_DEFINITION) + inside_function = true; if (node->token == KERNEL) inside_kernel = true; @@ -335,7 +338,12 @@ traverse(const ASTNode* node) // Translate the new symbol if (tqualifier == UNIFORM) { - // Do nothing + if (tspecifier == SCALAR || tspecifier == VECTOR || tspecifier == INT || + tspecifier == INT3) { + fprintf(CUDAHEADER, "static %s %s_DEFAULT_VALUE", translate(tspecifier), + identifier); + } + // else do nothing } else if (tqualifier == KERNEL) { fprintf(CUDAHEADER, "%s %s\n%s", // @@ -404,7 +412,13 @@ traverse(const ASTNode* node) if (symbol->type_qualifier == UNIFORM) { if (inside_kernel && symbol->type_specifier == SCALARARRAY) fprintf(CUDAHEADER, "buffer.profiles[%s] ", symbol->identifier); - else + else if (!inside_function && + ((symbol->type_specifier == SCALAR) || + (symbol->type_specifier == VECTOR) || (symbol->type_specifier == INT) || + (symbol->type_specifier == INT3))) // Global scope and an uniform which + // can be set to a default value + fprintf(CUDAHEADER, "%s_DEFAULT_VALUE ", symbol->identifier); + else // Use device constants inside device functions fprintf(CUDAHEADER, "DCONST(%s) ", symbol->identifier); } else if (node->parent->type != NODE_DECLARATION) { @@ -462,12 +476,27 @@ traverse(const ASTNode* node) } if (node->type == NODE_DECLARATION) inside_declaration = false; + if (node->type == NODE_FUNCTION_DEFINITION) + inside_function = false; // Postfix translation if (!inside_declaration && translate(node->postfix)) fprintf(CUDAHEADER, "%s", translate(node->postfix)); } +#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) + +static const char* builtin_int_params[] = { + "AC_nx", "AC_ny", "AC_nz", "AC_mx", "AC_my", + "AC_mz", "AC_nx_min", "AC_ny_min", "AC_nz_min", "AC_nx_max", + "AC_ny_max", "AC_nz_max", "AC_mxy", "AC_nxy", "AC_nxyz", +}; + +static const char* builtin_int3_params[] = { + "AC_global_grid_n", + "AC_multigpu_offset", +}; + static void generate_preprocessed_structures(void) { @@ -528,6 +557,16 @@ generate_preprocessed_structures(void) CUDAHEADER = fopen(cudaheader_filename, "w+"); fprintf(CUDAHEADER, "#pragma once\n"); + + // Add built-in params (the best way would be to inject these to user src with AC syntax) + for (size_t i = 0; i < ARRAY_SIZE(builtin_int_params); ++i) { + fprintf(CUDAHEADER, "static const int %s_DEFAULT_VALUE = 0;\n", builtin_int_params[i]); + } + for (size_t i = 0; i < ARRAY_SIZE(builtin_int3_params); ++i) { + fprintf(CUDAHEADER, "static const int3 %s_DEFAULT_VALUE = make_int3(0, 0, 0);\n", + builtin_int3_params[i]); + } + fprintf(CUDAHEADER, "typedef struct {\n"); for (size_t i = 0; i < num_symbols[current_nest]; ++i) { if (symbol_table[i].type_qualifier == PREPROCESSED) @@ -566,7 +605,7 @@ generate_header(void) fprintf(DSLHEADER, "#define AC_FOR_USER_INT_PARAM_TYPES(FUNC)"); for (size_t i = 0; i < num_symbols[current_nest]; ++i) { if (symbol_table[i].type_specifier == INT && symbol_table[i].type_qualifier == UNIFORM) { - fprintf(DSLHEADER, "\\\nFUNC(%s),", symbol_table[i].identifier); + fprintf(DSLHEADER, "\\\nFUNC(%s)", symbol_table[i].identifier); } } fprintf(DSLHEADER, "\n\n"); @@ -575,7 +614,7 @@ generate_header(void) fprintf(DSLHEADER, "#define AC_FOR_USER_INT3_PARAM_TYPES(FUNC)"); for (size_t i = 0; i < num_symbols[current_nest]; ++i) { if (symbol_table[i].type_specifier == INT3 && symbol_table[i].type_qualifier == UNIFORM) { - fprintf(DSLHEADER, "\\\nFUNC(%s),", symbol_table[i].identifier); + fprintf(DSLHEADER, "\\\nFUNC(%s)", symbol_table[i].identifier); } } fprintf(DSLHEADER, "\n\n"); @@ -584,7 +623,7 @@ generate_header(void) fprintf(DSLHEADER, "#define AC_FOR_USER_REAL_PARAM_TYPES(FUNC)"); for (size_t i = 0; i < num_symbols[current_nest]; ++i) { if (symbol_table[i].type_specifier == SCALAR && symbol_table[i].type_qualifier == UNIFORM) { - fprintf(DSLHEADER, "\\\nFUNC(%s),", symbol_table[i].identifier); + fprintf(DSLHEADER, "\\\nFUNC(%s)", symbol_table[i].identifier); } } fprintf(DSLHEADER, "\n\n"); @@ -593,7 +632,7 @@ generate_header(void) fprintf(DSLHEADER, "#define AC_FOR_USER_REAL3_PARAM_TYPES(FUNC)"); for (size_t i = 0; i < num_symbols[current_nest]; ++i) { if (symbol_table[i].type_specifier == VECTOR && symbol_table[i].type_qualifier == UNIFORM) { - fprintf(DSLHEADER, "\\\nFUNC(%s),", symbol_table[i].identifier); + fprintf(DSLHEADER, "\\\nFUNC(%s)", symbol_table[i].identifier); } } fprintf(DSLHEADER, "\n\n"); @@ -603,7 +642,7 @@ generate_header(void) for (size_t i = 0; i < num_symbols[current_nest]; ++i) { if (symbol_table[i].type_specifier == SCALARFIELD && symbol_table[i].type_qualifier == UNIFORM) { - fprintf(DSLHEADER, "\\\nFUNC(%s),", symbol_table[i].identifier); + fprintf(DSLHEADER, "\\\nFUNC(%s)", symbol_table[i].identifier); } } fprintf(DSLHEADER, "\n\n"); @@ -613,7 +652,7 @@ generate_header(void) for (size_t i = 0; i < num_symbols[current_nest]; ++i) { if (symbol_table[i].type_specifier == SCALARARRAY && symbol_table[i].type_qualifier == UNIFORM) { - fprintf(DSLHEADER, "\\\nFUNC(%s),", symbol_table[i].identifier); + fprintf(DSLHEADER, "\\\nFUNC(%s)", symbol_table[i].identifier); } } fprintf(DSLHEADER, "\n\n"); @@ -645,25 +684,13 @@ main(void) assert(DSLHEADER); assert(CUDAHEADER); - // Add built-in params - add_symbol(SYMBOLTYPE_OTHER, UNIFORM, INT, "AC_nx"); - add_symbol(SYMBOLTYPE_OTHER, UNIFORM, INT, "AC_ny"); - add_symbol(SYMBOLTYPE_OTHER, UNIFORM, INT, "AC_nz"); - add_symbol(SYMBOLTYPE_OTHER, UNIFORM, INT, "AC_mx"); - add_symbol(SYMBOLTYPE_OTHER, UNIFORM, INT, "AC_my"); - add_symbol(SYMBOLTYPE_OTHER, UNIFORM, INT, "AC_mz"); - add_symbol(SYMBOLTYPE_OTHER, UNIFORM, INT, "AC_nx_min"); - add_symbol(SYMBOLTYPE_OTHER, UNIFORM, INT, "AC_ny_min"); - add_symbol(SYMBOLTYPE_OTHER, UNIFORM, INT, "AC_nz_min"); - add_symbol(SYMBOLTYPE_OTHER, UNIFORM, INT, "AC_nx_max"); - add_symbol(SYMBOLTYPE_OTHER, UNIFORM, INT, "AC_ny_max"); - add_symbol(SYMBOLTYPE_OTHER, UNIFORM, INT, "AC_nz_max"); - add_symbol(SYMBOLTYPE_OTHER, UNIFORM, INT, "AC_mxy"); - add_symbol(SYMBOLTYPE_OTHER, UNIFORM, INT, "AC_nxy"); - add_symbol(SYMBOLTYPE_OTHER, UNIFORM, INT, "AC_nxyz"); - - add_symbol(SYMBOLTYPE_OTHER, UNIFORM, INT3, "AC_global_grid_n"); - add_symbol(SYMBOLTYPE_OTHER, UNIFORM, INT3, "AC_multigpu_offset"); + // Add built-in param symbols + for (size_t i = 0; i < ARRAY_SIZE(builtin_int_params); ++i) { + add_symbol(SYMBOLTYPE_OTHER, UNIFORM, INT, builtin_int_params[i]); + } + for (size_t i = 0; i < ARRAY_SIZE(builtin_int3_params); ++i) { + add_symbol(SYMBOLTYPE_OTHER, UNIFORM, INT3, builtin_int3_params[i]); + } // Generate traverse(root); diff --git a/acc/stdlib/stdderiv.h b/acc/stdlib/stdderiv.h index f7e34fd..bb1d274 100644 --- a/acc/stdlib/stdderiv.h +++ b/acc/stdlib/stdderiv.h @@ -3,12 +3,12 @@ #define STENCIL_ORDER (6) #endif -uniform Scalar AC_dsx; -uniform Scalar AC_dsy; -uniform Scalar AC_dsz; -uniform Scalar AC_inv_dsx; -uniform Scalar AC_inv_dsy; -uniform Scalar AC_inv_dsz; +uniform Scalar AC_dsx = 0.04908738521; +uniform Scalar AC_dsy = 0.04908738521; +uniform Scalar AC_dsz = 0.04908738521; +uniform Scalar AC_inv_dsx = 1.0 / AC_dsx; +uniform Scalar AC_inv_dsy = 1.0 / AC_dsy; +uniform Scalar AC_inv_dsz = 1.0 / AC_dsz; Scalar first_derivative(Scalar pencil[], Scalar inv_ds) diff --git a/include/astaroth.h b/include/astaroth.h index 5443f4d..f7109ae 100644 --- a/include/astaroth.h +++ b/include/astaroth.h @@ -82,7 +82,7 @@ typedef enum { } Stream; #define STREAM_ALL (NUM_STREAMS) -#define AC_GEN_ID(X) X +#define AC_GEN_ID(X) X, typedef enum { AC_FOR_USER_INT_PARAM_TYPES(AC_GEN_ID) // NUM_INT_PARAMS @@ -115,7 +115,7 @@ typedef enum { #undef AC_GEN_ID #define _UNUSED __attribute__((unused)) // Does not give a warning if unused -#define AC_GEN_STR(X) #X +#define AC_GEN_STR(X) #X, static const char* intparam_names[] _UNUSED = {AC_FOR_USER_INT_PARAM_TYPES(AC_GEN_STR)}; static const char* int3param_names[] _UNUSED = {AC_FOR_USER_INT3_PARAM_TYPES(AC_GEN_STR)}; static const char* realparam_names[] _UNUSED = {AC_FOR_USER_REAL_PARAM_TYPES(AC_GEN_STR)}; @@ -451,8 +451,10 @@ AcResult acDeviceLoadScalarArray(const Device device, const Stream stream, const AcReal* data, const size_t num); /** */ -AcResult acDeviceLoadMeshInfo(const Device device, const Stream stream, - const AcMeshInfo device_config); +AcResult acDeviceLoadMeshInfo(const Device device, const AcMeshInfo device_config); + +/** */ +AcResult acDeviceLoadDefaultUniforms(const Device device); /** */ AcResult acDeviceLoadVertexBufferWithOffset(const Device device, const Stream stream, diff --git a/samples/ctest/main.c b/samples/ctest/main.c index 2b4f9c0..8bf4e2d 100644 --- a/samples/ctest/main.c +++ b/samples/ctest/main.c @@ -27,11 +27,6 @@ main(void) { AcMeshInfo info; acLoadConfig(AC_DEFAULT_CONFIG, &info); - // Some real params must be calculated (for the MHD case) // TODO DANGEROUS - info.real_params[AC_inv_dsx] = (AcReal)(1.0) / info.real_params[AC_dsx]; - info.real_params[AC_inv_dsy] = (AcReal)(1.0) / info.real_params[AC_dsy]; - info.real_params[AC_inv_dsz] = (AcReal)(1.0) / info.real_params[AC_dsz]; - info.real_params[AC_cs2_sound] = info.real_params[AC_cs_sound] * info.real_params[AC_cs_sound]; // Alloc AcMesh model, candidate; diff --git a/src/common/errchk.h b/src/common/errchk.h index 391e620..2c57b88 100644 --- a/src/common/errchk.h +++ b/src/common/errchk.h @@ -66,6 +66,11 @@ if (!(retval)) \ WARNING(#retval " was false"); \ } +#define WARNCHK_ALWAYS(retval) \ + { \ + if (!(retval)) \ + WARNING(#retval " was false"); \ + } #define ERRCHK_ALWAYS(retval) \ { \ if (!(retval)) \ diff --git a/src/core/device.cc b/src/core/device.cc index 11f9d17..4331d96 100644 --- a/src/core/device.cc +++ b/src/core/device.cc @@ -150,7 +150,8 @@ acDeviceCreate(const int id, const AcMeshInfo device_config, Device* device_hand ERRCHK_CUDA_ALWAYS(cudaMalloc((void**)&device->reduce_result, sizeof(AcReal))); // Device constants - acDeviceLoadMeshInfo(device, STREAM_DEFAULT, device_config); + acDeviceLoadDefaultUniforms(device); + acDeviceLoadMeshInfo(device, device_config); printf("Created device %d (%p)\n", device->id, device); *device_handle = device; diff --git a/src/core/kernels/kernels.cu b/src/core/kernels/kernels.cu index 64fe8b3..2a65640 100644 --- a/src/core/kernels/kernels.cu +++ b/src/core/kernels/kernels.cu @@ -97,7 +97,90 @@ static __device__ inline acComplex operator*(const acComplex& a, const acComplex #include "reductions.cuh" AcResult -acDeviceLoadMeshInfo(const Device device, const Stream stream, const AcMeshInfo device_config) +acDeviceLoadScalarUniform(const Device device, const Stream stream, const AcRealParam param, + const AcReal value) +{ + cudaSetDevice(device->id); + if (!is_valid(value)) { + fprintf(stderr, "WARNING: Passed an invalid value %g to device constant %s. Skipping.\n", + (double)value, realparam_names[param]); + return AC_FAILURE; + } + + if (param >= NUM_REAL_PARAMS) + return AC_FAILURE; + + const size_t offset = (size_t)&d_mesh_info.real_params[param] - (size_t)&d_mesh_info; + ERRCHK_CUDA_ALWAYS(cudaMemcpyToSymbolAsync(d_mesh_info, &value, sizeof(value), offset, + cudaMemcpyHostToDevice, device->streams[stream])); + return AC_SUCCESS; +} + +AcResult +acDeviceLoadVectorUniform(const Device device, const Stream stream, const AcReal3Param param, + const AcReal3 value) +{ + cudaSetDevice(device->id); + if (!is_valid(value)) { + fprintf(stderr, + "WARNING: Passed an invalid value (%g, %g, %g) to device constant %s. Skipping.\n", + (double)value.x, (double)value.y, (double)value.z, real3param_names[param]); + return AC_FAILURE; + } + + if (param >= NUM_REAL3_PARAMS || !NUM_REAL3_PARAMS) + return AC_FAILURE; + + const size_t offset = (size_t)&d_mesh_info.real3_params[param] - (size_t)&d_mesh_info; + ERRCHK_CUDA_ALWAYS(cudaMemcpyToSymbolAsync(d_mesh_info, &value, sizeof(value), offset, + cudaMemcpyHostToDevice, device->streams[stream])); + return AC_SUCCESS; +} + +AcResult +acDeviceLoadIntUniform(const Device device, const Stream stream, const AcIntParam param, + const int value) +{ + cudaSetDevice(device->id); + if (!is_valid(value)) { + fprintf(stderr, "WARNING: Passed an invalid value %d to device constant %s. Skipping.\n", + value, intparam_names[param]); + return AC_FAILURE; + } + + if (param >= NUM_INT_PARAMS) + return AC_FAILURE; + + const size_t offset = (size_t)&d_mesh_info.int_params[param] - (size_t)&d_mesh_info; + ERRCHK_CUDA_ALWAYS(cudaMemcpyToSymbolAsync(d_mesh_info, &value, sizeof(value), offset, + cudaMemcpyHostToDevice, device->streams[stream])); + return AC_SUCCESS; +} + +AcResult +acDeviceLoadInt3Uniform(const Device device, const Stream stream, const AcInt3Param param, + const int3 value) +{ + cudaSetDevice(device->id); + if (!is_valid(value.x) || !is_valid(value.y) || !is_valid(value.z)) { + fprintf( + stderr, + "WARNING: Passed an invalid value (%d, %d, %def) to device constant %s. Skipping.\n", + value.x, value.y, value.z, int3param_names[param]); + return AC_FAILURE; + } + + if (param >= NUM_INT3_PARAMS) + return AC_FAILURE; + + const size_t offset = (size_t)&d_mesh_info.int3_params[param] - (size_t)&d_mesh_info; + ERRCHK_CUDA_ALWAYS(cudaMemcpyToSymbolAsync(d_mesh_info, &value, sizeof(value), offset, + cudaMemcpyHostToDevice, device->streams[stream])); + return AC_SUCCESS; +} + +AcResult +acDeviceLoadMeshInfo(const Device device, const AcMeshInfo device_config) { cudaSetDevice(device->id); @@ -107,67 +190,51 @@ acDeviceLoadMeshInfo(const Device device, const Stream stream, const AcMeshInfo ERRCHK_ALWAYS(device_config.int_params[AC_multigpu_offset] == device->local_config.int_params[AC_multigpu_offset]); - ERRCHK_CUDA_ALWAYS(cudaMemcpyToSymbolAsync(d_mesh_info, &device_config, sizeof(device_config), - 0, cudaMemcpyHostToDevice, device->streams[stream])); + for (int i = 0; i < NUM_INT_PARAMS; ++i) + acDeviceLoadIntUniform(device, STREAM_DEFAULT, (AcIntParam)i, device_config.int_params[i]); + + for (int i = 0; i < NUM_INT3_PARAMS; ++i) + acDeviceLoadInt3Uniform(device, STREAM_DEFAULT, (AcInt3Param)i, + device_config.int3_params[i]); + + for (int i = 0; i < NUM_REAL_PARAMS; ++i) + acDeviceLoadScalarUniform(device, STREAM_DEFAULT, (AcRealParam)i, + device_config.real_params[i]); + + for (int i = 0; i < NUM_REAL3_PARAMS; ++i) + acDeviceLoadVectorUniform(device, STREAM_DEFAULT, (AcReal3Param)i, + device_config.real3_params[i]); + return AC_SUCCESS; } AcResult -acDeviceLoadScalarUniform(const Device device, const Stream stream, const AcRealParam param, - const AcReal value) +acDeviceLoadDefaultUniforms(const Device device) { cudaSetDevice(device->id); - if (param >= NUM_REAL_PARAMS) - return AC_FAILURE; + // clang-format off + // Scalar + #define LOAD_DEFAULT_UNIFORM(X) acDeviceLoadScalarUniform(device, STREAM_DEFAULT, X, X##_DEFAULT_VALUE); + AC_FOR_USER_REAL_PARAM_TYPES(LOAD_DEFAULT_UNIFORM) + #undef LOAD_DEFAULT_UNIFORM - const size_t offset = (size_t)&d_mesh_info.real_params[param] - (size_t)&d_mesh_info; - ERRCHK_CUDA(cudaMemcpyToSymbolAsync(d_mesh_info, &value, sizeof(value), offset, - cudaMemcpyHostToDevice, device->streams[stream])); - return AC_SUCCESS; -} - -AcResult -acDeviceLoadVectorUniform(const Device device, const Stream stream, const AcReal3Param param, - const AcReal3 value) -{ - cudaSetDevice(device->id); - - if (param >= NUM_REAL3_PARAMS || !NUM_REAL3_PARAMS) - return AC_FAILURE; - - const size_t offset = (size_t)&d_mesh_info.real3_params[param] - (size_t)&d_mesh_info; - ERRCHK_CUDA(cudaMemcpyToSymbolAsync(d_mesh_info, &value, sizeof(value), offset, - cudaMemcpyHostToDevice, device->streams[stream])); - return AC_SUCCESS; -} - -AcResult -acDeviceLoadIntUniform(const Device device, const Stream stream, const AcIntParam param, - const int value) -{ - cudaSetDevice(device->id); - - if (param >= NUM_INT_PARAMS) - return AC_FAILURE; - - const size_t offset = (size_t)&d_mesh_info.int_params[param] - (size_t)&d_mesh_info; - ERRCHK_CUDA(cudaMemcpyToSymbolAsync(d_mesh_info, &value, sizeof(value), offset, - cudaMemcpyHostToDevice, device->streams[stream])); - return AC_SUCCESS; -} - -AcResult -acDeviceLoadInt3Uniform(const Device device, const Stream stream, const AcInt3Param param, - const int3 value) -{ - cudaSetDevice(device->id); - - if (param >= NUM_INT3_PARAMS) - return AC_FAILURE; - - const size_t offset = (size_t)&d_mesh_info.int3_params[param] - (size_t)&d_mesh_info; - ERRCHK_CUDA(cudaMemcpyToSymbolAsync(d_mesh_info, &value, sizeof(value), offset, - cudaMemcpyHostToDevice, device->streams[stream])); + // Vector + #define LOAD_DEFAULT_UNIFORM(X) acDeviceLoadVectorUniform(device, STREAM_DEFAULT, X, X##_DEFAULT_VALUE); + AC_FOR_USER_REAL3_PARAM_TYPES(LOAD_DEFAULT_UNIFORM) + #undef LOAD_DEFAULT_UNIFORM + + // Int + #define LOAD_DEFAULT_UNIFORM(X) acDeviceLoadIntUniform(device, STREAM_DEFAULT, X, X##_DEFAULT_VALUE); + AC_FOR_USER_INT_PARAM_TYPES(LOAD_DEFAULT_UNIFORM) + #undef LOAD_DEFAULT_UNIFORM + + // Int3 + #define LOAD_DEFAULT_UNIFORM(X) acDeviceLoadInt3Uniform(device, STREAM_DEFAULT, X, X##_DEFAULT_VALUE); + AC_FOR_USER_INT3_PARAM_TYPES(LOAD_DEFAULT_UNIFORM) + #undef LOAD_DEFAULT_UNIFORM + // clang-format on + + ERRCHK_CUDA_KERNEL_ALWAYS(); return AC_SUCCESS; } diff --git a/src/core/kernels/kernels.h b/src/core/kernels/kernels.h index a0ad336..476abae 100644 --- a/src/core/kernels/kernels.h +++ b/src/core/kernels/kernels.h @@ -62,21 +62,6 @@ AcReal acKernelReduceVec(const cudaStream_t stream, const ReductionType rtype, c const int3 end, const AcReal* vtxbuf0, const AcReal* vtxbuf1, const AcReal* vtxbuf2, AcReal* scratchpad, AcReal* reduce_result); -AcResult acDeviceLoadMeshInfo(const Device device, const Stream stream, - const AcMeshInfo device_config); - -AcResult acDeviceLoadScalarUniform(const Device device, const Stream stream, - const AcRealParam param, const AcReal value); - -AcResult acDeviceLoadVectorUniform(const Device device, const Stream stream, - const AcReal3Param param, const AcReal3 value); - -AcResult acDeviceLoadIntUniform(const Device device, const Stream stream, const AcIntParam param, - const int value); - -AcResult acDeviceLoadInt3Uniform(const Device device, const Stream stream, const AcInt3Param param, - const int3 value); - #ifdef __cplusplus } // extern "C" #endif