Stashed some testing files used to make sure that the library can also be used from pure C projects (better compatibility). These changes will never go to master as-is.
This commit is contained in:
@@ -12,7 +12,7 @@
|
||||
|
||||
#-------------------General---------------------------------------------------#
|
||||
|
||||
project(ASTAROTH_2.0 CXX)
|
||||
project(ASTAROTH_2.0 C CXX)
|
||||
set (CMAKE_CXX_STANDARD 11)
|
||||
cmake_minimum_required (VERSION 3.5.1) # Need >= 3.8 for first-class CUDA support
|
||||
cmake_policy (SET CMP0023 NEW)
|
||||
@@ -181,3 +181,5 @@ if (BUILD_STANDALONE)
|
||||
cuda_add_executable(ac_run src/standalone/main.cc)
|
||||
target_link_libraries(ac_run astaroth_standalone astaroth_core ${SDL2_LIBRARY})
|
||||
endif()
|
||||
|
||||
add_subdirectory(ctest)
|
||||
|
3
ctest/CMakeLists.txt
Normal file
3
ctest/CMakeLists.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
include_directories(${CMAKE_SOURCE_DIR}/include)
|
||||
add_executable(ctest main.c)
|
||||
target_link_libraries(ctest astaroth_core)
|
17
ctest/main.c
Normal file
17
ctest/main.c
Normal file
@@ -0,0 +1,17 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "astaroth.h"
|
||||
|
||||
int
|
||||
main(void)
|
||||
{
|
||||
AcMeshInfo info = {
|
||||
.int_params[AC_mx] = 128,
|
||||
.int_params[AC_my] = 64,
|
||||
.int_params[AC_mz] = 32,
|
||||
};
|
||||
acInit(info);
|
||||
acQuit();
|
||||
return EXIT_SUCCESS;
|
||||
}
|
@@ -40,7 +40,7 @@ AcResult acSynchronizeMesh(void);
|
||||
initialization of *all memory needed on all GPUs in the node*. In other words,
|
||||
setups everything GPU-side so that calling any other GPU interface function
|
||||
afterwards does not result in illegal memory accesses. */
|
||||
AcResult acInit(const AcMeshInfo& mesh_info);
|
||||
AcResult acInit(const AcMeshInfo mesh_info);
|
||||
|
||||
/** Frees all GPU allocations and resets all devices in the node. Should be
|
||||
* called at exit. */
|
||||
@@ -49,23 +49,23 @@ AcResult acQuit(void);
|
||||
/** Does all three substeps of the RK3 integration and computes the boundary
|
||||
conditions when necessary. The result is synchronized and the boundary conditions are applied
|
||||
after the final substep, after which the result can be fetched to CPU memory with acStore. */
|
||||
AcResult acIntegrate(const AcReal& dt);
|
||||
AcResult acIntegrate(const AcReal dt);
|
||||
|
||||
/** Performs a scalar reduction on all GPUs in the node and returns the result. Operates on the
|
||||
* whole computational domain, which must be up to date and synchronized before calling
|
||||
* acReduceScal.
|
||||
*/
|
||||
AcReal acReduceScal(const ReductionType& rtype, const VertexBufferHandle& a);
|
||||
AcReal acReduceScal(const ReductionType rtype, const VertexBufferHandle a);
|
||||
|
||||
/** Performs a vector reduction on all GPUs in the node and returns the result. Operates on the
|
||||
* whole computational domain, which must be up to date and synchronized before calling
|
||||
* acReduceVec.
|
||||
*/
|
||||
AcReal acReduceVec(const ReductionType& rtype, const VertexBufferHandle& a,
|
||||
const VertexBufferHandle& b, const VertexBufferHandle& c);
|
||||
AcReal acReduceVec(const ReductionType rtype, const VertexBufferHandle a,
|
||||
const VertexBufferHandle b, const VertexBufferHandle c);
|
||||
|
||||
/** Distributes the host mesh among the GPUs in the node. Synchronous. */
|
||||
AcResult acLoad(const AcMesh& host_mesh);
|
||||
AcResult acLoad(const AcMesh host_mesh);
|
||||
|
||||
/** Gathers the mesh stored across GPUs in the node and stores it back to host memory. Synchronous.
|
||||
*/
|
||||
@@ -82,32 +82,38 @@ AcResult acLoadDeviceConstantAsync(const AcRealParam param, const AcReal value,
|
||||
const StreamType stream);
|
||||
|
||||
/** Splits a subset of the host_mesh and distributes it among the GPUs in the node. Asynchronous. */
|
||||
AcResult acLoadWithOffset(const AcMesh& host_mesh, const int3& start, const int num_vertices);
|
||||
AcResult acLoadWithOffsetAsync(const AcMesh& host_mesh, const int3& start, const int num_vertices,
|
||||
AcResult acLoadWithOffset(const AcMesh host_mesh, const int3 start, const int num_vertices);
|
||||
AcResult acLoadWithOffsetAsync(const AcMesh host_mesh, const int3 start, const int num_vertices,
|
||||
const StreamType stream);
|
||||
|
||||
/** Gathers a subset of the data distributed among the GPUs in the node and stores the mesh back to
|
||||
* CPU memory. Asynchronous.
|
||||
*/
|
||||
AcResult acStoreWithOffset(const int3& start, const int num_vertices, AcMesh* host_mesh);
|
||||
AcResult acStoreWithOffsetAsync(const int3& start, const int num_vertices, AcMesh* host_mesh,
|
||||
AcResult acStoreWithOffset(const int3 start, const int num_vertices, AcMesh* host_mesh);
|
||||
AcResult acStoreWithOffsetAsync(const int3 start, const int num_vertices, AcMesh* host_mesh,
|
||||
const StreamType stream);
|
||||
|
||||
/** Performs a single RK3 step without computing boundary conditions. Asynchronous.*/
|
||||
AcResult acIntegrateStep(const int& isubstep, const AcReal& dt);
|
||||
AcResult acIntegrateStepAsync(const int& isubstep, const AcReal& dt, const StreamType stream);
|
||||
AcResult acIntegrateStep(const int isubstep, const AcReal dt);
|
||||
AcResult acIntegrateStepAsync(const int isubstep, const AcReal dt, const StreamType stream);
|
||||
|
||||
/** Performs a single RK3 step on a subset of the mesh without computing the boundary conditions.
|
||||
* Asynchronous.*/
|
||||
AcResult acIntegrateStepWithOffset(const int& isubstep, const AcReal& dt, const int3& start,
|
||||
const int3& end);
|
||||
AcResult acIntegrateStepWithOffsetAsync(const int& isubstep, const AcReal& dt, const int3& start,
|
||||
const int3& end, const StreamType stream);
|
||||
AcResult acIntegrateStepWithOffset(const int isubstep, const AcReal dt, const int3 start,
|
||||
const int3 end);
|
||||
AcResult acIntegrateStepWithOffsetAsync(const int isubstep, const AcReal dt, const int3 start,
|
||||
const int3 end, const StreamType stream);
|
||||
|
||||
/** Performs the boundary condition step on the GPUs in the node. Asynchronous. */
|
||||
AcResult acBoundcondStep(void);
|
||||
AcResult acBoundcondStepAsync(const StreamType stream);
|
||||
|
||||
/*
|
||||
* =============================================================================
|
||||
* Revised interface
|
||||
* =============================================================================
|
||||
*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
@@ -140,31 +140,31 @@ typedef struct {
|
||||
* =============================================================================
|
||||
*/
|
||||
static inline size_t
|
||||
acVertexBufferSize(const AcMeshInfo& info)
|
||||
acVertexBufferSize(const AcMeshInfo info)
|
||||
{
|
||||
return info.int_params[AC_mx] * info.int_params[AC_my] * info.int_params[AC_mz];
|
||||
}
|
||||
|
||||
static inline size_t
|
||||
acVertexBufferSizeBytes(const AcMeshInfo& info)
|
||||
acVertexBufferSizeBytes(const AcMeshInfo info)
|
||||
{
|
||||
return sizeof(AcReal) * acVertexBufferSize(info);
|
||||
}
|
||||
|
||||
static inline size_t
|
||||
acVertexBufferCompdomainSize(const AcMeshInfo& info)
|
||||
acVertexBufferCompdomainSize(const AcMeshInfo info)
|
||||
{
|
||||
return info.int_params[AC_nx] * info.int_params[AC_ny] * info.int_params[AC_nz];
|
||||
}
|
||||
|
||||
static inline size_t
|
||||
acVertexBufferCompdomainSizeBytes(const AcMeshInfo& info)
|
||||
acVertexBufferCompdomainSizeBytes(const AcMeshInfo info)
|
||||
{
|
||||
return sizeof(AcReal) * acVertexBufferCompdomainSize(info);
|
||||
}
|
||||
|
||||
static inline size_t
|
||||
acVertexBufferIdx(const int i, const int j, const int k, const AcMeshInfo& info)
|
||||
acVertexBufferIdx(const int i, const int j, const int k, const AcMeshInfo info)
|
||||
{
|
||||
return i + //
|
||||
j * info.int_params[AC_mx] + //
|
||||
@@ -173,25 +173,25 @@ acVertexBufferIdx(const int i, const int j, const int k, const AcMeshInfo& info)
|
||||
|
||||
/*
|
||||
static inline int
|
||||
acGetParam(const AcMeshInfo& info, const AcIntParam param)
|
||||
acGetParam(const AcMeshInfo info, const AcIntParam param)
|
||||
{
|
||||
return info.int_params[param];
|
||||
}
|
||||
|
||||
static inline int3
|
||||
acGetParam(const AcMeshInfo& info, const AcInt3Param param)
|
||||
acGetParam(const AcMeshInfo info, const AcInt3Param param)
|
||||
{
|
||||
return info.int3_params[param];
|
||||
}
|
||||
|
||||
static inline AcReal
|
||||
acGetParam(const AcMeshInfo& info, const AcRealParam param)
|
||||
acGetParam(const AcMeshInfo info, const AcRealParam param)
|
||||
{
|
||||
return info.real_params[param];
|
||||
}
|
||||
|
||||
static inline AcReal3
|
||||
acGetParam(const AcMeshInfo& info, const AcReal3Param param)
|
||||
acGetParam(const AcMeshInfo info, const AcReal3Param param)
|
||||
{
|
||||
return info.real3_params[param];
|
||||
}
|
||||
|
@@ -57,3 +57,4 @@ file(GLOB CUDA_SOURCES "*.cu" "kernels/*.cu")
|
||||
# With fpic: 4.96 user, 4.02 system, 0:09.90 elapsed
|
||||
# With fPIC: 4.94 user, 4.05 system, 0:10.23 elapsed
|
||||
CUDA_ADD_LIBRARY(astaroth_core STATIC ${CUDA_SOURCES} OPTIONS --compiler-options "-fpic")
|
||||
target_link_libraries(astaroth_core m)
|
||||
|
@@ -127,8 +127,8 @@
|
||||
#include "errchk.h"
|
||||
|
||||
#include "device.cuh"
|
||||
#include "math_utils.h" // sum for reductions
|
||||
#include "standalone/config_loader.h" // update_config
|
||||
#include "math_utils.h" // sum for reductions
|
||||
// #include "standalone/config_loader.h" // update_config
|
||||
|
||||
#define AC_GEN_STR(X) #X
|
||||
const char* intparam_names[] = {AC_FOR_BUILTIN_INT_PARAM_TYPES(AC_GEN_STR) //
|
||||
@@ -156,7 +156,7 @@ gridIdx(const Grid grid, const int3 idx)
|
||||
}
|
||||
|
||||
static int3
|
||||
gridIdx3d(const Grid& grid, const int idx)
|
||||
gridIdx3d(const Grid grid, const int idx)
|
||||
{
|
||||
return (int3){idx % grid.m.x, (idx % (grid.m.x * grid.m.y)) / grid.m.x,
|
||||
idx / (grid.m.x * grid.m.y)};
|
||||
@@ -168,8 +168,49 @@ printInt3(const int3 vec)
|
||||
printf("(%d, %d, %d)", vec.x, vec.y, vec.z);
|
||||
}
|
||||
|
||||
static inline void
|
||||
print(const AcMeshInfo config)
|
||||
{
|
||||
for (int i = 0; i < NUM_INT_PARAMS; ++i)
|
||||
printf("[%s]: %d\n", intparam_names[i], config.int_params[i]);
|
||||
for (int i = 0; i < NUM_REAL_PARAMS; ++i)
|
||||
printf("[%s]: %g\n", realparam_names[i], double(config.real_params[i]));
|
||||
}
|
||||
|
||||
static void
|
||||
update_builtin_params(AcMeshInfo* config)
|
||||
{
|
||||
config->int_params[AC_mx] = config->int_params[AC_nx] + STENCIL_ORDER;
|
||||
///////////// PAD TEST
|
||||
// config->int_params[AC_mx] = config->int_params[AC_nx] + STENCIL_ORDER + PAD_SIZE;
|
||||
///////////// PAD TEST
|
||||
config->int_params[AC_my] = config->int_params[AC_ny] + STENCIL_ORDER;
|
||||
config->int_params[AC_mz] = config->int_params[AC_nz] + STENCIL_ORDER;
|
||||
|
||||
// Bounds for the computational domain, i.e. nx_min <= i < nx_max
|
||||
config->int_params[AC_nx_min] = NGHOST;
|
||||
config->int_params[AC_nx_max] = config->int_params[AC_nx_min] + config->int_params[AC_nx];
|
||||
config->int_params[AC_ny_min] = NGHOST;
|
||||
config->int_params[AC_ny_max] = config->int_params[AC_ny] + NGHOST;
|
||||
config->int_params[AC_nz_min] = NGHOST;
|
||||
config->int_params[AC_nz_max] = config->int_params[AC_nz] + NGHOST;
|
||||
|
||||
/* Additional helper params */
|
||||
// Int helpers
|
||||
config->int_params[AC_mxy] = config->int_params[AC_mx] * config->int_params[AC_my];
|
||||
config->int_params[AC_nxy] = config->int_params[AC_nx] * config->int_params[AC_ny];
|
||||
config->int_params[AC_nxyz] = config->int_params[AC_nxy] * config->int_params[AC_nz];
|
||||
|
||||
#if VERBOSE_PRINTING // Defined in astaroth.h
|
||||
printf("###############################################################\n");
|
||||
printf("Config dimensions recalculated:\n");
|
||||
print(*config);
|
||||
printf("###############################################################\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
static Grid
|
||||
createGrid(const AcMeshInfo& config)
|
||||
createGrid(const AcMeshInfo config)
|
||||
{
|
||||
Grid grid;
|
||||
|
||||
@@ -246,7 +287,7 @@ acSynchronizeMesh(void)
|
||||
}
|
||||
|
||||
AcResult
|
||||
acInit(const AcMeshInfo& config)
|
||||
acInit(const AcMeshInfo config)
|
||||
{
|
||||
// Get num_devices
|
||||
ERRCHK_CUDA_ALWAYS(cudaGetDeviceCount(&num_devices));
|
||||
@@ -274,7 +315,7 @@ acInit(const AcMeshInfo& config)
|
||||
// Subgrids
|
||||
AcMeshInfo subgrid_config = config;
|
||||
subgrid_config.int_params[AC_nz] /= num_devices;
|
||||
update_config(&subgrid_config);
|
||||
update_builtin_params(&subgrid_config);
|
||||
subgrid = createGrid(subgrid_config);
|
||||
|
||||
// Periodic boundary conditions become weird if the system can "fold unto itself".
|
||||
@@ -337,8 +378,8 @@ acQuit(void)
|
||||
}
|
||||
|
||||
AcResult
|
||||
acIntegrateStepWithOffsetAsync(const int& isubstep, const AcReal& dt, const int3& start,
|
||||
const int3& end, const StreamType stream)
|
||||
acIntegrateStepWithOffsetAsync(const int isubstep, const AcReal dt, const int3 start,
|
||||
const int3 end, const StreamType stream)
|
||||
{
|
||||
// See the beginning of the file for an explanation of the index mapping
|
||||
// #pragma omp parallel for
|
||||
@@ -360,13 +401,13 @@ acIntegrateStepWithOffsetAsync(const int& isubstep, const AcReal& dt, const int3
|
||||
}
|
||||
|
||||
AcResult
|
||||
acIntegrateStepWithOffset(const int& isubstep, const AcReal& dt, const int3& start, const int3& end)
|
||||
acIntegrateStepWithOffset(const int isubstep, const AcReal dt, const int3 start, const int3 end)
|
||||
{
|
||||
return acIntegrateStepWithOffsetAsync(isubstep, dt, start, end, STREAM_DEFAULT);
|
||||
}
|
||||
|
||||
AcResult
|
||||
acIntegrateStepAsync(const int& isubstep, const AcReal& dt, const StreamType stream)
|
||||
acIntegrateStepAsync(const int isubstep, const AcReal dt, const StreamType stream)
|
||||
{
|
||||
const int3 start = (int3){NGHOST, NGHOST, NGHOST};
|
||||
const int3 end = start + grid.n;
|
||||
@@ -374,7 +415,7 @@ acIntegrateStepAsync(const int& isubstep, const AcReal& dt, const StreamType str
|
||||
}
|
||||
|
||||
AcResult
|
||||
acIntegrateStep(const int& isubstep, const AcReal& dt)
|
||||
acIntegrateStep(const int isubstep, const AcReal dt)
|
||||
{
|
||||
return acIntegrateStepAsync(isubstep, dt, STREAM_DEFAULT);
|
||||
}
|
||||
@@ -452,7 +493,7 @@ swap_buffers(void)
|
||||
}
|
||||
|
||||
AcResult
|
||||
acIntegrate(const AcReal& dt)
|
||||
acIntegrate(const AcReal dt)
|
||||
{
|
||||
acSynchronizeStream(STREAM_ALL);
|
||||
for (int isubstep = 0; isubstep < 3; ++isubstep) {
|
||||
@@ -464,7 +505,7 @@ acIntegrate(const AcReal& dt)
|
||||
}
|
||||
|
||||
static AcReal
|
||||
simple_final_reduce_scal(const ReductionType& rtype, const AcReal* results, const int& n)
|
||||
simple_final_reduce_scal(const ReductionType rtype, const AcReal* results, const int n)
|
||||
{
|
||||
AcReal res = results[0];
|
||||
for (int i = 1; i < n; ++i) {
|
||||
@@ -491,7 +532,7 @@ simple_final_reduce_scal(const ReductionType& rtype, const AcReal* results, cons
|
||||
}
|
||||
|
||||
AcReal
|
||||
acReduceScal(const ReductionType& rtype, const VertexBufferHandle& vtxbuffer_handle)
|
||||
acReduceScal(const ReductionType rtype, const VertexBufferHandle vtxbuffer_handle)
|
||||
{
|
||||
acSynchronizeStream(STREAM_ALL);
|
||||
|
||||
@@ -505,8 +546,8 @@ acReduceScal(const ReductionType& rtype, const VertexBufferHandle& vtxbuffer_han
|
||||
}
|
||||
|
||||
AcReal
|
||||
acReduceVec(const ReductionType& rtype, const VertexBufferHandle& a, const VertexBufferHandle& b,
|
||||
const VertexBufferHandle& c)
|
||||
acReduceVec(const ReductionType rtype, const VertexBufferHandle a, const VertexBufferHandle b,
|
||||
const VertexBufferHandle c)
|
||||
{
|
||||
acSynchronizeStream(STREAM_ALL);
|
||||
|
||||
@@ -520,7 +561,7 @@ acReduceVec(const ReductionType& rtype, const VertexBufferHandle& a, const Verte
|
||||
}
|
||||
|
||||
AcResult
|
||||
acLoadWithOffsetAsync(const AcMesh& host_mesh, const int3& src, const int num_vertices,
|
||||
acLoadWithOffsetAsync(const AcMesh host_mesh, const int3 src, const int num_vertices,
|
||||
const StreamType stream)
|
||||
{
|
||||
// See the beginning of the file for an explanation of the index mapping
|
||||
@@ -558,13 +599,13 @@ acLoadWithOffsetAsync(const AcMesh& host_mesh, const int3& src, const int num_ve
|
||||
}
|
||||
|
||||
AcResult
|
||||
acLoadWithOffset(const AcMesh& host_mesh, const int3& src, const int num_vertices)
|
||||
acLoadWithOffset(const AcMesh host_mesh, const int3 src, const int num_vertices)
|
||||
{
|
||||
return acLoadWithOffsetAsync(host_mesh, src, num_vertices, STREAM_DEFAULT);
|
||||
}
|
||||
|
||||
AcResult
|
||||
acLoad(const AcMesh& host_mesh)
|
||||
acLoad(const AcMesh host_mesh)
|
||||
{
|
||||
acLoadWithOffset(host_mesh, (int3){0, 0, 0}, acVertexBufferSize(host_mesh.info));
|
||||
acSynchronizeStream(STREAM_ALL);
|
||||
@@ -572,7 +613,7 @@ acLoad(const AcMesh& host_mesh)
|
||||
}
|
||||
|
||||
AcResult
|
||||
acStoreWithOffsetAsync(const int3& src, const int num_vertices, AcMesh* host_mesh,
|
||||
acStoreWithOffsetAsync(const int3 src, const int num_vertices, AcMesh* host_mesh,
|
||||
const StreamType stream)
|
||||
{
|
||||
// See the beginning of the file for an explanation of the index mapping
|
||||
@@ -597,7 +638,7 @@ acStoreWithOffsetAsync(const int3& src, const int num_vertices, AcMesh* host_mes
|
||||
}
|
||||
|
||||
AcResult
|
||||
acStoreWithOffset(const int3& src, const int num_vertices, AcMesh* host_mesh)
|
||||
acStoreWithOffset(const int3 src, const int num_vertices, AcMesh* host_mesh)
|
||||
{
|
||||
return acStoreWithOffsetAsync(src, num_vertices, host_mesh, STREAM_DEFAULT);
|
||||
}
|
||||
@@ -625,3 +666,9 @@ acLoadDeviceConstant(const AcRealParam param, const AcReal value)
|
||||
{
|
||||
return acLoadDeviceConstantAsync(param, value, STREAM_DEFAULT);
|
||||
}
|
||||
|
||||
/*
|
||||
* =============================================================================
|
||||
* Revised interface
|
||||
* =============================================================================
|
||||
*/
|
||||
|
@@ -512,3 +512,9 @@ autoOptimize(const Device device)
|
||||
#if PACKED_DATA_TRANSFERS
|
||||
// Functions for calling packed data transfers
|
||||
#endif
|
||||
|
||||
/*
|
||||
* =============================================================================
|
||||
* Revised interface
|
||||
* =============================================================================
|
||||
*/
|
||||
|
@@ -99,3 +99,9 @@ AcResult autoOptimize(const Device device);
|
||||
#if PACKED_DATA_TRANSFERS
|
||||
// Declarations used for packed data transfers
|
||||
#endif
|
||||
|
||||
/*
|
||||
* =============================================================================
|
||||
* Revised interface
|
||||
* =============================================================================
|
||||
*/
|
||||
|
Reference in New Issue
Block a user