Merge branch 'cmakelist_rewrite_and_C_API_conformity_07-26' into node_device_interface_revision_07-23
This commit is contained in:
@@ -335,7 +335,8 @@ traverse(const ASTNode* node)
|
||||
// Preprocessed parameter boilerplate
|
||||
if (node->type == NODE_TYPE_QUALIFIER && node->token == PREPROCESSED)
|
||||
inside_preprocessed = true;
|
||||
static const char preprocessed_parameter_boilerplate[] = "const int3 vertexIdx, ";
|
||||
static const char
|
||||
preprocessed_parameter_boilerplate[] = "const int3 vertexIdx, const int3 globalVertexIdx, ";
|
||||
if (inside_preprocessed && node->type == NODE_FUNCTION_PARAMETER_DECLARATION)
|
||||
printf("%s ", preprocessed_parameter_boilerplate);
|
||||
// BOILERPLATE END////////////////////////////////////////////////////////
|
||||
@@ -491,6 +492,7 @@ generate_preprocessed_structures(void)
|
||||
// FILLING THE DATA STRUCT
|
||||
printf("static __device__ __forceinline__ AcRealData\
|
||||
read_data(const int3 vertexIdx,\
|
||||
const int3 globalVertexIdx,\
|
||||
AcReal* __restrict__ buf[], const int handle)\
|
||||
{\n\
|
||||
%sData data;\n",
|
||||
@@ -498,7 +500,7 @@ generate_preprocessed_structures(void)
|
||||
|
||||
for (int i = 0; i < num_symbols; ++i) {
|
||||
if (symbol_table[i].type_qualifier == PREPROCESSED)
|
||||
printf("data.%s = preprocessed_%s(vertexIdx, buf[handle]);\n",
|
||||
printf("data.%s = preprocessed_%s(vertexIdx, globalVertexIdx, buf[handle]);\n",
|
||||
symbol_table[i].identifier, symbol_table[i].identifier);
|
||||
}
|
||||
printf("return data;\n");
|
||||
@@ -526,13 +528,14 @@ generate_preprocessed_structures(void)
|
||||
\
|
||||
static __device__ __forceinline__ AcReal3Data\
|
||||
read_data(const int3 vertexIdx,\
|
||||
const int3 globalVertexIdx,\
|
||||
AcReal* __restrict__ buf[], const int3& handle)\
|
||||
{\
|
||||
AcReal3Data data;\
|
||||
\
|
||||
data.x = read_data(vertexIdx, buf, handle.x);\
|
||||
data.y = read_data(vertexIdx, buf, handle.y);\
|
||||
data.z = read_data(vertexIdx, buf, handle.z);\
|
||||
data.x = read_data(vertexIdx, globalVertexIdx, buf, handle.x);\
|
||||
data.y = read_data(vertexIdx, globalVertexIdx, buf, handle.y);\
|
||||
data.z = read_data(vertexIdx, globalVertexIdx, buf, handle.z);\
|
||||
\
|
||||
return data;\
|
||||
}\
|
||||
|
@@ -12,8 +12,7 @@ set(CUDA_ARCH_FLAGS -gencode arch=compute_37,code=sm_37
|
||||
-gencode arch=compute_61,code=sm_61
|
||||
-lineinfo
|
||||
-ftz=true # Flush denormalized floats to zero
|
||||
-std=c++11
|
||||
--compiler-options -march=native) # Native host machine code
|
||||
-std=c++11)
|
||||
#--maxrregcount=255
|
||||
# -Xptxas -dlcm=ca opt-in to cache all global loads to L1/texture cache
|
||||
# =cg to opt out
|
||||
|
@@ -659,7 +659,7 @@ read_out(const int idx, AcReal* __restrict__ field[], const int3 handle)
|
||||
}
|
||||
|
||||
#define WRITE_OUT(handle, value) (write(buffer.out, handle, idx, value))
|
||||
#define READ(handle) (read_data(vertexIdx, buffer.in, handle))
|
||||
#define READ(handle) (read_data(vertexIdx, globalVertexIdx, buffer.in, handle))
|
||||
#define READ_OUT(handle) (read_out(idx, buffer.out, handle))
|
||||
|
||||
// also write for clarity here also, not for the DSL
|
||||
|
@@ -21,7 +21,7 @@ if (BUILD_RT_VISUALIZATION)
|
||||
endif ()
|
||||
|
||||
## Compilation flags
|
||||
add_compile_options(-march=native -pipe ${OpenMP_CXX_FLAGS})
|
||||
add_compile_options(-pipe ${OpenMP_CXX_FLAGS})
|
||||
add_compile_options(-Wall -Wextra -Werror -Wdouble-promotion -Wfloat-conversion)# -Wshadow)
|
||||
|
||||
## Compile and link
|
||||
|
Reference in New Issue
Block a user