Attemptiong to make kernels to go where they should.

This commit is contained in:
Miikka Vaisala
2020-09-18 16:55:36 +08:00
parent 67aa87731b
commit f736aa1cd1
3 changed files with 26 additions and 5 deletions

View File

@@ -436,19 +436,19 @@ acDevicePeriodicBoundconds(const Device device, const Stream stream, const int3
AcResult AcResult
acDeviceGeneralBoundcondStep(const Device device, const Stream stream, acDeviceGeneralBoundcondStep(const Device device, const Stream stream,
const VertexBufferHandle vtxbuf_handle, const int3 start, const VertexBufferHandle vtxbuf_handle, const int3 start,
const int3 end) const int3 end, const int bound_direction)
{ {
cudaSetDevice(device->id); cudaSetDevice(device->id);
return acKernelGeneralBoundconds(device->streams[stream], start, end, return acKernelGeneralBoundconds(device->streams[stream], start, end,
device->vba.in[vtxbuf_handle]); device->vba.in[vtxbuf_handle], bound_direction);
} }
AcResult AcResult
acDeviceGeneralBoundconds(const Device device, const Stream stream, const int3 start, acDeviceGeneralBoundconds(const Device device, const Stream stream, const int3 start,
const int3 end) const int3 end, const int bound_direction)
{ {
for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) { for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) {
acDeviceGeneralBoundcondStep(device, stream, (VertexBufferHandle)i, start, end); acDeviceGeneralBoundcondStep(device, stream, (VertexBufferHandle)i, start, end, bound_direction);
} }
return AC_SUCCESS; return AC_SUCCESS;
} }
@@ -1844,7 +1844,8 @@ acGridIntegrate(const Stream stream, const AcReal dt)
(pid3d.y == 0) || (pid3d.y == decomposition.y - 1) || (pid3d.y == 0) || (pid3d.y == decomposition.y - 1) ||
(pid3d.z == 0) || (pid3d.z == decomposition.z - 1) ||) (pid3d.z == 0) || (pid3d.z == decomposition.z - 1) ||)
{ {
acDeviceGeneralBoundconds(device, stream, m1, m2); //TODO get bound_direction
acDeviceGeneralBoundconds(device, stream, m1, m2, bound_direction);
} }
acGridSynchronizeStream(stream); acGridSynchronizeStream(stream);

View File

@@ -60,3 +60,20 @@ acKernelPeriodicBoundconds(const cudaStream_t stream, const int3 start, const in
ERRCHK_CUDA_KERNEL(); ERRCHK_CUDA_KERNEL();
return AC_SUCCESS; return AC_SUCCESS;
} }
AcResult acKernelGeneralBoundconds(const cudaStream_t stream, const int3 start, const int3 end,
AcReal* vtxbuf, const int bound_direction);
{
const dim3 tpb(8, 2, 8);
const dim3 bpg((unsigned int)ceil((end.x - start.x) / (float)tpb.x),
(unsigned int)ceil((end.y - start.y) / (float)tpb.y),
(unsigned int)ceil((end.z - start.z) / (float)tpb.z));
if (DCONST(AC_bype) == BOUNDCOND_SYM)
{
kernel_symmetric_boundconds<<<bpg, tpb, 0, stream>>>(start, end, vtxbuf, bound_direction);
ERRCHK_CUDA_KERNEL();
}
return AC_SUCCESS;
}

View File

@@ -43,6 +43,9 @@ extern "C" {
/** */ /** */
AcResult acKernelPeriodicBoundconds(const cudaStream_t stream, const int3 start, const int3 end, AcResult acKernelPeriodicBoundconds(const cudaStream_t stream, const int3 start, const int3 end,
AcReal* vtxbuf); AcReal* vtxbuf);
/** */
AcResult acKernelGeneralBoundconds(const cudaStream_t stream, const int3 start, const int3 end,
AcReal* vtxbuf, const int bound_direction);
/** */ /** */
AcResult acKernelDummy(void); AcResult acKernelDummy(void);