The code generator now generates more efficient CUDA: built-in parameters are passed as const references without copying
This commit is contained in:
@@ -338,8 +338,8 @@ traverse(const ASTNode* node)
|
|||||||
// Preprocessed parameter boilerplate
|
// Preprocessed parameter boilerplate
|
||||||
if (node->type == NODE_TYPE_QUALIFIER && node->token == PREPROCESSED)
|
if (node->type == NODE_TYPE_QUALIFIER && node->token == PREPROCESSED)
|
||||||
inside_preprocessed = true;
|
inside_preprocessed = true;
|
||||||
static const char
|
static const char preprocessed_parameter_boilerplate
|
||||||
preprocessed_parameter_boilerplate[] = "const int3 vertexIdx, const int3 globalVertexIdx, ";
|
[] = "const int3& vertexIdx, const int3& globalVertexIdx, ";
|
||||||
if (inside_preprocessed && node->type == NODE_FUNCTION_PARAMETER_DECLARATION)
|
if (inside_preprocessed && node->type == NODE_FUNCTION_PARAMETER_DECLARATION)
|
||||||
printf("%s ", preprocessed_parameter_boilerplate);
|
printf("%s ", preprocessed_parameter_boilerplate);
|
||||||
// BOILERPLATE END////////////////////////////////////////////////////////
|
// BOILERPLATE END////////////////////////////////////////////////////////
|
||||||
@@ -494,8 +494,8 @@ generate_preprocessed_structures(void)
|
|||||||
|
|
||||||
// FILLING THE DATA STRUCT
|
// FILLING THE DATA STRUCT
|
||||||
printf("static __device__ __forceinline__ AcRealData\
|
printf("static __device__ __forceinline__ AcRealData\
|
||||||
read_data(const int3 vertexIdx,\
|
read_data(const int3& vertexIdx,\
|
||||||
const int3 globalVertexIdx,\
|
const int3& globalVertexIdx,\
|
||||||
AcReal* __restrict__ buf[], const int handle)\
|
AcReal* __restrict__ buf[], const int handle)\
|
||||||
{\n\
|
{\n\
|
||||||
%sData data;\n",
|
%sData data;\n",
|
||||||
@@ -530,8 +530,8 @@ generate_preprocessed_structures(void)
|
|||||||
} AcReal3Data;\
|
} AcReal3Data;\
|
||||||
\
|
\
|
||||||
static __device__ __forceinline__ AcReal3Data\
|
static __device__ __forceinline__ AcReal3Data\
|
||||||
read_data(const int3 vertexIdx,\
|
read_data(const int3& vertexIdx,\
|
||||||
const int3 globalVertexIdx,\
|
const int3& globalVertexIdx,\
|
||||||
AcReal* __restrict__ buf[], const int3& handle)\
|
AcReal* __restrict__ buf[], const int3& handle)\
|
||||||
{\
|
{\
|
||||||
AcReal3Data data;\
|
AcReal3Data data;\
|
||||||
|
Reference in New Issue
Block a user