Enabled the generation of API hooks for calling DSL functions (was messing up with compilation earlier)

This commit is contained in:
jpekkila
2019-12-03 15:17:27 +02:00
parent 5a6a3110df
commit 7e4212ddd9

View File

@@ -136,12 +136,10 @@ read_out(const int idx, AcReal* __restrict__ field[], const int3 handle)
\ \
const int idx = IDX(vertexIdx.x, vertexIdx.y, vertexIdx.z); const int idx = IDX(vertexIdx.x, vertexIdx.y, vertexIdx.z);
// clang-format off
/*
#define GEN_DEVICE_FUNC_HOOK(identifier) \ #define GEN_DEVICE_FUNC_HOOK(identifier) \
template <int step_number> \ template <int step_number> \
AcResult acDeviceKernel_##identifier(const cudaStream_t stream, \ AcResult acDeviceKernel_##identifier(const cudaStream_t stream, const int3 start, \
const int3 start, const int3 end, VertexBufferArray vba) \ const int3 end, VertexBufferArray vba) \
{ \ { \
\ \
const dim3 tpb(32, 1, 4); \ const dim3 tpb(32, 1, 4); \
@@ -151,14 +149,11 @@ read_out(const int idx, AcReal* __restrict__ field[], const int3 handle)
(unsigned int)ceil(n.y / AcReal(tpb.y)), \ (unsigned int)ceil(n.y / AcReal(tpb.y)), \
(unsigned int)ceil(n.z / AcReal(tpb.z))); \ (unsigned int)ceil(n.z / AcReal(tpb.z))); \
\ \
identifier<step_number> \ identifier<step_number><<<bpg, tpb, 0, stream>>>(start, end, vba); \
<<<bpg, tpb, 0, stream>>>(start, end, vba); \
ERRCHK_CUDA_KERNEL(); \ ERRCHK_CUDA_KERNEL(); \
\ \
return AC_SUCCESS; \ return AC_SUCCESS; \
} }
*/
#define GEN_DEVICE_FUNC_HOOK(identifier)
#include "user_kernels.h" #include "user_kernels.h"
@@ -204,7 +199,8 @@ acKernelAutoOptimizeIntegration(const int3 start, const int3 end, VertexBufferAr
cudaEventCreate(&tstop); cudaEventCreate(&tstop);
// #ifdef AC_dt // #ifdef AC_dt
//acDeviceLoadScalarUniform(device, STREAM_DEFAULT, AC_dt, FLT_EPSILON); // TODO note, temporarily disabled // acDeviceLoadScalarUniform(device, STREAM_DEFAULT, AC_dt, FLT_EPSILON); // TODO
// note, temporarily disabled
/*#else /*#else
ERROR("FATAL ERROR: acDeviceAutoOptimize() or ERROR("FATAL ERROR: acDeviceAutoOptimize() or
acDeviceIntegrateSubstep() was " "called, but AC_dt was not defined. Either define acDeviceIntegrateSubstep() was " "called, but AC_dt was not defined. Either define
@@ -245,7 +241,8 @@ acKernelAutoOptimizeIntegration(const int3 start, const int3 end, VertexBufferAr
} }
AcResult AcResult
acKernelIntegrateSubstep(const cudaStream_t stream, const int step_number, const int3 start, const int3 end, VertexBufferArray vba) acKernelIntegrateSubstep(const cudaStream_t stream, const int step_number, const int3 start,
const int3 end, VertexBufferArray vba)
{ {
const dim3 tpb = rk3_tpb; const dim3 tpb = rk3_tpb;
@@ -255,7 +252,7 @@ acKernelIntegrateSubstep(const cudaStream_t stream, const int step_number, const
(unsigned int)ceil(n.z / AcReal(tpb.z))); (unsigned int)ceil(n.z / AcReal(tpb.z)));
//#ifdef AC_dt //#ifdef AC_dt
//acDeviceLoadScalarUniform(device, stream, AC_dt, dt); // acDeviceLoadScalarUniform(device, stream, AC_dt, dt);
/*#else /*#else
(void)dt; (void)dt;
ERROR("FATAL ERROR: acDeviceAutoOptimize() or acDeviceIntegrateSubstep() was " ERROR("FATAL ERROR: acDeviceAutoOptimize() or acDeviceIntegrateSubstep() was "