From b65454d523cc17bf383353f083a7e919608ea93d Mon Sep 17 00:00:00 2001 From: jpekkila Date: Tue, 23 Jul 2019 18:24:47 +0300 Subject: [PATCH 01/18] Stashed some testing files used to make sure that the library can also be used from pure C projects (better compatibility). These changes will never go to master as-is. --- CMakeLists.txt | 4 +- ctest/CMakeLists.txt | 3 ++ ctest/main.c | 17 ++++++++ include/astaroth.h | 38 +++++++++------- include/astaroth_defines.h | 18 ++++---- src/core/CMakeLists.txt | 1 + src/core/astaroth.cu | 89 +++++++++++++++++++++++++++++--------- src/core/device.cu | 6 +++ src/core/device.cuh | 6 +++ 9 files changed, 135 insertions(+), 47 deletions(-) create mode 100644 ctest/CMakeLists.txt create mode 100644 ctest/main.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 9b47e13..a3da8a2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -12,7 +12,7 @@ #-------------------General---------------------------------------------------# -project(ASTAROTH_2.0 CXX) +project(ASTAROTH_2.0 C CXX) set (CMAKE_CXX_STANDARD 11) cmake_minimum_required (VERSION 3.5.1) # Need >= 3.8 for first-class CUDA support cmake_policy (SET CMP0023 NEW) @@ -181,3 +181,5 @@ if (BUILD_STANDALONE) cuda_add_executable(ac_run src/standalone/main.cc) target_link_libraries(ac_run astaroth_standalone astaroth_core ${SDL2_LIBRARY}) endif() + +add_subdirectory(ctest) diff --git a/ctest/CMakeLists.txt b/ctest/CMakeLists.txt new file mode 100644 index 0000000..b2c84bf --- /dev/null +++ b/ctest/CMakeLists.txt @@ -0,0 +1,3 @@ +include_directories(${CMAKE_SOURCE_DIR}/include) +add_executable(ctest main.c) +target_link_libraries(ctest astaroth_core) diff --git a/ctest/main.c b/ctest/main.c new file mode 100644 index 0000000..5ed5ffc --- /dev/null +++ b/ctest/main.c @@ -0,0 +1,17 @@ +#include +#include + +#include "astaroth.h" + +int +main(void) +{ + AcMeshInfo info = { + .int_params[AC_mx] = 128, + .int_params[AC_my] = 64, + .int_params[AC_mz] = 32, + }; + acInit(info); + acQuit(); + return EXIT_SUCCESS; +} diff --git a/include/astaroth.h b/include/astaroth.h index be0081e..52ba3d0 100644 --- a/include/astaroth.h +++ b/include/astaroth.h @@ -40,7 +40,7 @@ AcResult acSynchronizeMesh(void); initialization of *all memory needed on all GPUs in the node*. In other words, setups everything GPU-side so that calling any other GPU interface function afterwards does not result in illegal memory accesses. */ -AcResult acInit(const AcMeshInfo& mesh_info); +AcResult acInit(const AcMeshInfo mesh_info); /** Frees all GPU allocations and resets all devices in the node. Should be * called at exit. */ @@ -49,23 +49,23 @@ AcResult acQuit(void); /** Does all three substeps of the RK3 integration and computes the boundary conditions when necessary. The result is synchronized and the boundary conditions are applied after the final substep, after which the result can be fetched to CPU memory with acStore. */ -AcResult acIntegrate(const AcReal& dt); +AcResult acIntegrate(const AcReal dt); /** Performs a scalar reduction on all GPUs in the node and returns the result. Operates on the * whole computational domain, which must be up to date and synchronized before calling * acReduceScal. */ -AcReal acReduceScal(const ReductionType& rtype, const VertexBufferHandle& a); +AcReal acReduceScal(const ReductionType rtype, const VertexBufferHandle a); /** Performs a vector reduction on all GPUs in the node and returns the result. Operates on the * whole computational domain, which must be up to date and synchronized before calling * acReduceVec. */ -AcReal acReduceVec(const ReductionType& rtype, const VertexBufferHandle& a, - const VertexBufferHandle& b, const VertexBufferHandle& c); +AcReal acReduceVec(const ReductionType rtype, const VertexBufferHandle a, + const VertexBufferHandle b, const VertexBufferHandle c); /** Distributes the host mesh among the GPUs in the node. Synchronous. */ -AcResult acLoad(const AcMesh& host_mesh); +AcResult acLoad(const AcMesh host_mesh); /** Gathers the mesh stored across GPUs in the node and stores it back to host memory. Synchronous. */ @@ -82,32 +82,38 @@ AcResult acLoadDeviceConstantAsync(const AcRealParam param, const AcReal value, const StreamType stream); /** Splits a subset of the host_mesh and distributes it among the GPUs in the node. Asynchronous. */ -AcResult acLoadWithOffset(const AcMesh& host_mesh, const int3& start, const int num_vertices); -AcResult acLoadWithOffsetAsync(const AcMesh& host_mesh, const int3& start, const int num_vertices, +AcResult acLoadWithOffset(const AcMesh host_mesh, const int3 start, const int num_vertices); +AcResult acLoadWithOffsetAsync(const AcMesh host_mesh, const int3 start, const int num_vertices, const StreamType stream); /** Gathers a subset of the data distributed among the GPUs in the node and stores the mesh back to * CPU memory. Asynchronous. */ -AcResult acStoreWithOffset(const int3& start, const int num_vertices, AcMesh* host_mesh); -AcResult acStoreWithOffsetAsync(const int3& start, const int num_vertices, AcMesh* host_mesh, +AcResult acStoreWithOffset(const int3 start, const int num_vertices, AcMesh* host_mesh); +AcResult acStoreWithOffsetAsync(const int3 start, const int num_vertices, AcMesh* host_mesh, const StreamType stream); /** Performs a single RK3 step without computing boundary conditions. Asynchronous.*/ -AcResult acIntegrateStep(const int& isubstep, const AcReal& dt); -AcResult acIntegrateStepAsync(const int& isubstep, const AcReal& dt, const StreamType stream); +AcResult acIntegrateStep(const int isubstep, const AcReal dt); +AcResult acIntegrateStepAsync(const int isubstep, const AcReal dt, const StreamType stream); /** Performs a single RK3 step on a subset of the mesh without computing the boundary conditions. * Asynchronous.*/ -AcResult acIntegrateStepWithOffset(const int& isubstep, const AcReal& dt, const int3& start, - const int3& end); -AcResult acIntegrateStepWithOffsetAsync(const int& isubstep, const AcReal& dt, const int3& start, - const int3& end, const StreamType stream); +AcResult acIntegrateStepWithOffset(const int isubstep, const AcReal dt, const int3 start, + const int3 end); +AcResult acIntegrateStepWithOffsetAsync(const int isubstep, const AcReal dt, const int3 start, + const int3 end, const StreamType stream); /** Performs the boundary condition step on the GPUs in the node. Asynchronous. */ AcResult acBoundcondStep(void); AcResult acBoundcondStepAsync(const StreamType stream); +/* + * ============================================================================= + * Revised interface + * ============================================================================= + */ + #ifdef __cplusplus } // extern "C" #endif diff --git a/include/astaroth_defines.h b/include/astaroth_defines.h index ac9804b..0915d79 100644 --- a/include/astaroth_defines.h +++ b/include/astaroth_defines.h @@ -140,31 +140,31 @@ typedef struct { * ============================================================================= */ static inline size_t -acVertexBufferSize(const AcMeshInfo& info) +acVertexBufferSize(const AcMeshInfo info) { return info.int_params[AC_mx] * info.int_params[AC_my] * info.int_params[AC_mz]; } static inline size_t -acVertexBufferSizeBytes(const AcMeshInfo& info) +acVertexBufferSizeBytes(const AcMeshInfo info) { return sizeof(AcReal) * acVertexBufferSize(info); } static inline size_t -acVertexBufferCompdomainSize(const AcMeshInfo& info) +acVertexBufferCompdomainSize(const AcMeshInfo info) { return info.int_params[AC_nx] * info.int_params[AC_ny] * info.int_params[AC_nz]; } static inline size_t -acVertexBufferCompdomainSizeBytes(const AcMeshInfo& info) +acVertexBufferCompdomainSizeBytes(const AcMeshInfo info) { return sizeof(AcReal) * acVertexBufferCompdomainSize(info); } static inline size_t -acVertexBufferIdx(const int i, const int j, const int k, const AcMeshInfo& info) +acVertexBufferIdx(const int i, const int j, const int k, const AcMeshInfo info) { return i + // j * info.int_params[AC_mx] + // @@ -173,25 +173,25 @@ acVertexBufferIdx(const int i, const int j, const int k, const AcMeshInfo& info) /* static inline int -acGetParam(const AcMeshInfo& info, const AcIntParam param) +acGetParam(const AcMeshInfo info, const AcIntParam param) { return info.int_params[param]; } static inline int3 -acGetParam(const AcMeshInfo& info, const AcInt3Param param) +acGetParam(const AcMeshInfo info, const AcInt3Param param) { return info.int3_params[param]; } static inline AcReal -acGetParam(const AcMeshInfo& info, const AcRealParam param) +acGetParam(const AcMeshInfo info, const AcRealParam param) { return info.real_params[param]; } static inline AcReal3 -acGetParam(const AcMeshInfo& info, const AcReal3Param param) +acGetParam(const AcMeshInfo info, const AcReal3Param param) { return info.real3_params[param]; } diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 68cdb25..79560c9 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -57,3 +57,4 @@ file(GLOB CUDA_SOURCES "*.cu" "kernels/*.cu") # With fpic: 4.96 user, 4.02 system, 0:09.90 elapsed # With fPIC: 4.94 user, 4.05 system, 0:10.23 elapsed CUDA_ADD_LIBRARY(astaroth_core STATIC ${CUDA_SOURCES} OPTIONS --compiler-options "-fpic") +target_link_libraries(astaroth_core m) diff --git a/src/core/astaroth.cu b/src/core/astaroth.cu index fadf4c0..05af3d3 100644 --- a/src/core/astaroth.cu +++ b/src/core/astaroth.cu @@ -127,8 +127,8 @@ #include "errchk.h" #include "device.cuh" -#include "math_utils.h" // sum for reductions -#include "standalone/config_loader.h" // update_config +#include "math_utils.h" // sum for reductions +// #include "standalone/config_loader.h" // update_config #define AC_GEN_STR(X) #X const char* intparam_names[] = {AC_FOR_BUILTIN_INT_PARAM_TYPES(AC_GEN_STR) // @@ -156,7 +156,7 @@ gridIdx(const Grid grid, const int3 idx) } static int3 -gridIdx3d(const Grid& grid, const int idx) +gridIdx3d(const Grid grid, const int idx) { return (int3){idx % grid.m.x, (idx % (grid.m.x * grid.m.y)) / grid.m.x, idx / (grid.m.x * grid.m.y)}; @@ -168,8 +168,49 @@ printInt3(const int3 vec) printf("(%d, %d, %d)", vec.x, vec.y, vec.z); } +static inline void +print(const AcMeshInfo config) +{ + for (int i = 0; i < NUM_INT_PARAMS; ++i) + printf("[%s]: %d\n", intparam_names[i], config.int_params[i]); + for (int i = 0; i < NUM_REAL_PARAMS; ++i) + printf("[%s]: %g\n", realparam_names[i], double(config.real_params[i])); +} + +static void +update_builtin_params(AcMeshInfo* config) +{ + config->int_params[AC_mx] = config->int_params[AC_nx] + STENCIL_ORDER; + ///////////// PAD TEST + // config->int_params[AC_mx] = config->int_params[AC_nx] + STENCIL_ORDER + PAD_SIZE; + ///////////// PAD TEST + config->int_params[AC_my] = config->int_params[AC_ny] + STENCIL_ORDER; + config->int_params[AC_mz] = config->int_params[AC_nz] + STENCIL_ORDER; + + // Bounds for the computational domain, i.e. nx_min <= i < nx_max + config->int_params[AC_nx_min] = NGHOST; + config->int_params[AC_nx_max] = config->int_params[AC_nx_min] + config->int_params[AC_nx]; + config->int_params[AC_ny_min] = NGHOST; + config->int_params[AC_ny_max] = config->int_params[AC_ny] + NGHOST; + config->int_params[AC_nz_min] = NGHOST; + config->int_params[AC_nz_max] = config->int_params[AC_nz] + NGHOST; + + /* Additional helper params */ + // Int helpers + config->int_params[AC_mxy] = config->int_params[AC_mx] * config->int_params[AC_my]; + config->int_params[AC_nxy] = config->int_params[AC_nx] * config->int_params[AC_ny]; + config->int_params[AC_nxyz] = config->int_params[AC_nxy] * config->int_params[AC_nz]; + +#if VERBOSE_PRINTING // Defined in astaroth.h + printf("###############################################################\n"); + printf("Config dimensions recalculated:\n"); + print(*config); + printf("###############################################################\n"); +#endif +} + static Grid -createGrid(const AcMeshInfo& config) +createGrid(const AcMeshInfo config) { Grid grid; @@ -246,7 +287,7 @@ acSynchronizeMesh(void) } AcResult -acInit(const AcMeshInfo& config) +acInit(const AcMeshInfo config) { // Get num_devices ERRCHK_CUDA_ALWAYS(cudaGetDeviceCount(&num_devices)); @@ -274,7 +315,7 @@ acInit(const AcMeshInfo& config) // Subgrids AcMeshInfo subgrid_config = config; subgrid_config.int_params[AC_nz] /= num_devices; - update_config(&subgrid_config); + update_builtin_params(&subgrid_config); subgrid = createGrid(subgrid_config); // Periodic boundary conditions become weird if the system can "fold unto itself". @@ -337,8 +378,8 @@ acQuit(void) } AcResult -acIntegrateStepWithOffsetAsync(const int& isubstep, const AcReal& dt, const int3& start, - const int3& end, const StreamType stream) +acIntegrateStepWithOffsetAsync(const int isubstep, const AcReal dt, const int3 start, + const int3 end, const StreamType stream) { // See the beginning of the file for an explanation of the index mapping // #pragma omp parallel for @@ -360,13 +401,13 @@ acIntegrateStepWithOffsetAsync(const int& isubstep, const AcReal& dt, const int3 } AcResult -acIntegrateStepWithOffset(const int& isubstep, const AcReal& dt, const int3& start, const int3& end) +acIntegrateStepWithOffset(const int isubstep, const AcReal dt, const int3 start, const int3 end) { return acIntegrateStepWithOffsetAsync(isubstep, dt, start, end, STREAM_DEFAULT); } AcResult -acIntegrateStepAsync(const int& isubstep, const AcReal& dt, const StreamType stream) +acIntegrateStepAsync(const int isubstep, const AcReal dt, const StreamType stream) { const int3 start = (int3){NGHOST, NGHOST, NGHOST}; const int3 end = start + grid.n; @@ -374,7 +415,7 @@ acIntegrateStepAsync(const int& isubstep, const AcReal& dt, const StreamType str } AcResult -acIntegrateStep(const int& isubstep, const AcReal& dt) +acIntegrateStep(const int isubstep, const AcReal dt) { return acIntegrateStepAsync(isubstep, dt, STREAM_DEFAULT); } @@ -452,7 +493,7 @@ swap_buffers(void) } AcResult -acIntegrate(const AcReal& dt) +acIntegrate(const AcReal dt) { acSynchronizeStream(STREAM_ALL); for (int isubstep = 0; isubstep < 3; ++isubstep) { @@ -464,7 +505,7 @@ acIntegrate(const AcReal& dt) } static AcReal -simple_final_reduce_scal(const ReductionType& rtype, const AcReal* results, const int& n) +simple_final_reduce_scal(const ReductionType rtype, const AcReal* results, const int n) { AcReal res = results[0]; for (int i = 1; i < n; ++i) { @@ -491,7 +532,7 @@ simple_final_reduce_scal(const ReductionType& rtype, const AcReal* results, cons } AcReal -acReduceScal(const ReductionType& rtype, const VertexBufferHandle& vtxbuffer_handle) +acReduceScal(const ReductionType rtype, const VertexBufferHandle vtxbuffer_handle) { acSynchronizeStream(STREAM_ALL); @@ -505,8 +546,8 @@ acReduceScal(const ReductionType& rtype, const VertexBufferHandle& vtxbuffer_han } AcReal -acReduceVec(const ReductionType& rtype, const VertexBufferHandle& a, const VertexBufferHandle& b, - const VertexBufferHandle& c) +acReduceVec(const ReductionType rtype, const VertexBufferHandle a, const VertexBufferHandle b, + const VertexBufferHandle c) { acSynchronizeStream(STREAM_ALL); @@ -520,7 +561,7 @@ acReduceVec(const ReductionType& rtype, const VertexBufferHandle& a, const Verte } AcResult -acLoadWithOffsetAsync(const AcMesh& host_mesh, const int3& src, const int num_vertices, +acLoadWithOffsetAsync(const AcMesh host_mesh, const int3 src, const int num_vertices, const StreamType stream) { // See the beginning of the file for an explanation of the index mapping @@ -558,13 +599,13 @@ acLoadWithOffsetAsync(const AcMesh& host_mesh, const int3& src, const int num_ve } AcResult -acLoadWithOffset(const AcMesh& host_mesh, const int3& src, const int num_vertices) +acLoadWithOffset(const AcMesh host_mesh, const int3 src, const int num_vertices) { return acLoadWithOffsetAsync(host_mesh, src, num_vertices, STREAM_DEFAULT); } AcResult -acLoad(const AcMesh& host_mesh) +acLoad(const AcMesh host_mesh) { acLoadWithOffset(host_mesh, (int3){0, 0, 0}, acVertexBufferSize(host_mesh.info)); acSynchronizeStream(STREAM_ALL); @@ -572,7 +613,7 @@ acLoad(const AcMesh& host_mesh) } AcResult -acStoreWithOffsetAsync(const int3& src, const int num_vertices, AcMesh* host_mesh, +acStoreWithOffsetAsync(const int3 src, const int num_vertices, AcMesh* host_mesh, const StreamType stream) { // See the beginning of the file for an explanation of the index mapping @@ -597,7 +638,7 @@ acStoreWithOffsetAsync(const int3& src, const int num_vertices, AcMesh* host_mes } AcResult -acStoreWithOffset(const int3& src, const int num_vertices, AcMesh* host_mesh) +acStoreWithOffset(const int3 src, const int num_vertices, AcMesh* host_mesh) { return acStoreWithOffsetAsync(src, num_vertices, host_mesh, STREAM_DEFAULT); } @@ -625,3 +666,9 @@ acLoadDeviceConstant(const AcRealParam param, const AcReal value) { return acLoadDeviceConstantAsync(param, value, STREAM_DEFAULT); } + +/* + * ============================================================================= + * Revised interface + * ============================================================================= + */ diff --git a/src/core/device.cu b/src/core/device.cu index 7b624e3..f4bef17 100644 --- a/src/core/device.cu +++ b/src/core/device.cu @@ -512,3 +512,9 @@ autoOptimize(const Device device) #if PACKED_DATA_TRANSFERS // Functions for calling packed data transfers #endif + +/* + * ============================================================================= + * Revised interface + * ============================================================================= + */ diff --git a/src/core/device.cuh b/src/core/device.cuh index 9d20620..7f1fad4 100644 --- a/src/core/device.cuh +++ b/src/core/device.cuh @@ -99,3 +99,9 @@ AcResult autoOptimize(const Device device); #if PACKED_DATA_TRANSFERS // Declarations used for packed data transfers #endif + +/* + * ============================================================================= + * Revised interface + * ============================================================================= + */ From f322bc8b37c81c990a19ccf67c850a7e6d755d98 Mon Sep 17 00:00:00 2001 From: jpekkila Date: Tue, 23 Jul 2019 20:50:37 +0300 Subject: [PATCH 02/18] Rewrote all CMakeLists. Now much cleaner and there's a clear separation during compilation between the core and standalone modules. --- CMakeLists.txt | 195 +++++------------------ config/astaroth.conf | 2 +- include/astaroth_defines.h | 24 ++- src/core/CMakeLists.txt | 59 ++----- src/core/kernels/kernels.cuh | 2 +- src/standalone/CMakeLists.txt | 38 ++++- src/standalone/model/model_boundconds.cc | 87 +++++----- 7 files changed, 148 insertions(+), 259 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a3da8a2..1541682 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,39 +1,39 @@ +################################### +## CMakeLists.txt for Astaroth ## +################################### # -# CMakeLists.txt for generating the makefile for Astaroth. -# Usage: mkdir build && cd build && cmake .. +# Usage: mkdir build && cd build && cmake .. && make # -# For example: cmake -DDOUBLE_PRECISION=ON .. +# If you want to see the exact flags used during compilation, compile with +# "make VERBOSE=1" # -# If you want to see the exact flags used during compilation, run -# "make -j VERBOSE=1" +# Print all options: cmake -LAH .. # -# Make sure your machine satisfies the system requirements: -# https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#system-requirements -#-------------------General---------------------------------------------------# -project(ASTAROTH_2.0 C CXX) -set (CMAKE_CXX_STANDARD 11) +## CMake settings cmake_minimum_required (VERSION 3.5.1) # Need >= 3.8 for first-class CUDA support -cmake_policy (SET CMP0023 NEW) + +## Project settings +project(astaroth CXX) +set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}) + +## Options +option(BUILD_DEBUG "Builds the program with extensive error checking" OFF) +option(BUILD_STANDALONE "Builds the standalone Astaroth" ON) +option(BUILD_RT_VISUALIZATION "Builds the module for real-time visualization using SDL2" OFF) +option(DOUBLE_PRECISION "Generates double precision code" OFF) +option(MULTIGPU_ENABLED "If enabled, uses all the available GPUs" ON) +option(ALTER_CONF "If enabled, loads astaroth.conf from the build directory" OFF) -#-------------------Set user options with default values---------------------# -#Usage f.ex. cmake -DBUILD_DEBUG=ON .. -option(BUILD_DEBUG "Builds the program with extensive error checking" OFF) -option(BUILD_STANDALONE "Builds standalone Astaroth" ON) -option(DOUBLE_PRECISION "Generates double precision code" OFF) -option(TIARA_CLUSTER "Special settings for compilation TIARA GPU cluster" OFF) -option(MULTIGPU_ENABLED "If enabled, uses all the available GPUs" ON) -option(ALTER_CONF "If enabled, loads astaroth.conf from the build directory" OFF) -option(BUILD_RT_VISUALIZATION "Builds the module for real-time visualization using SDL2" OFF) - -#-------------------Determine build type--------------------------------------# - -#Available types (case-sensitive): -#RELEASE (best performance) -#DEBUG (w/ debug information, non-concurrent kernels) +## Build types +# Available types (case-sensitive): +# RELEASE (best performance) +# DEBUG (w/ debug information, non-concurrent kernels) if (BUILD_DEBUG) set(CMAKE_BUILD_TYPE DEBUG) else () @@ -42,144 +42,25 @@ endif() message(STATUS "Build type: " ${CMAKE_BUILD_TYPE}) -#----------------------Find packages------------------------------------------# - -# C++ compiler info -message(STATUS "CMAKE_CXX_COMPILER: " ${CMAKE_CXX_COMPILER}) -message(STATUS "CMAKE_CXX_COMPILER: " ${CMAKE_CXX_COMPILER_ID}) - -if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") - if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.9.1) - # GCC >= 6.0 is required because of bug 48891. However, the fix seems to - # be backported so some older compilers which is why the code may also - # compile on gcc >= 4.9.1. - message(FATAL_ERROR "GCC version 4.9.1 or higher required") - endif() -endif() - -if (BUILD_RT_VISUALIZATION) - add_definitions(-DAC_BUILD_RT_VISUALIZATION=1) - # SDL 2 - set(SDL2_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/3rdparty/SDL2/include/) - set(SDL2_LIBRARY_DIR ${CMAKE_SOURCE_DIR}/3rdparty/SDL2/build/) - set(SDL2_LIBRARY "SDL2") - include_directories(${SDL2_INCLUDE_DIR}) - link_directories(${SDL2_LIBRARY_DIR}) -endif() - -# CUDA -find_package(CUDA) -if (NOT CUDA_FOUND) - # find_package(CUDA REQUIRED) gives a confusing error message if it fails, - # therefore we print the reason here explicitly - message(FATAL_ERROR "CUDA not found") -endif() -include_directories(${CUDA_INCLUDE_DIRS}) - -# OpenMP -find_package(OpenMP) -if (NOT OPENMP_FOUND) - message(WARNING "OpenMP not found. All host-side concurrency disabled \ - (lower performance).") -else () - set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") -endif() - -#----------------------Compilation settings-----------------------------------# - -#Debug and verification -#set(CMAKE_VERBOSE_MAKEFILE OFF) -#set(CXX_VERBOSE_BUILD OFF) -#set(CUDA_VERBOSE_BUILD OFF) -#include(CTest) -#add_test(ac_test ac_run) -#find_program(MEMORYCHECK_COMMAND valgrind) -#set(MEMORYCHECK_COMMAND_OPTIONS "--trace-children=yes --leak-check=full" ) - - -#----------------------Setup defines------------------------------------------# - +## Defines if (DOUBLE_PRECISION) - add_definitions(-DAC_DOUBLE_PRECISION=1) -else() + add_definitions(-DAC_DOUBLE_PRECISION=1) +else () add_definitions(-DAC_DOUBLE_PRECISION=0) -endif() - -# A full integration step is benchmarked by default, use this flag to override and -# benchmark RK3 only -if (GEN_BENCHMARK_RK3) - add_definitions(-DGEN_BENCHMARK_RK3=1) -else() - add_definitions(-DGEN_BENCHMARK_RK3=0) -endif() - +endif () if (MULTIGPU_ENABLED) add_definitions(-DAC_MULTIGPU_ENABLED=1) -else() +else () add_definitions(-DAC_MULTIGPU_ENABLED=0) -endif() +endif () -#-----------------------TIARA specific options--------------------------------# -#OLD#set (CXX_FLAGS_TIARA "-I/software/opt/cuda/9.0/include/") -# %JP: NOTE! This should not be needed anymore because the command -# find_package(CUDA) above should find and include this directory automatically -#USE THIS: -if (TIARA_CLUSTER) - set (CXX_FLAGS_TIARA "-mno-bmi2") -endif() - -#----------------------Setup CXX compilation flags----------------------------# -set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}\ - -O2 -march=native -pipe") - -set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}\ - -O0 -g") - -if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") - set (CXX_FLAGS_WARNING "-Wall -Wextra -Werror -Wdouble-promotion -Wfloat-conversion") # TODO: -Wshadow -Wconversion -elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel") - #MV: -Werror-all disabled because produces cryptical messages preventing compilation. - #TODO: Would be good to find an optimal set of warning flags. - #set (CXX_FLAGS_WARNING "-Wall -Wextra -Werror-all -Wsign-conversion") - set (CXX_FLAGS_WARNING "-Wall -Wextra -Wsign-conversion") -else() - message(WARNING "Using an unknown compiler. Compilation warning flags were not set.") -endif() - -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}\ - ${CXX_FLAGS_WARNING}\ - ${CXX_FLAGS_ETC}\ - ${CXX_FLAGS_TIARA}") # %JP: CXX_FLAGS_TIARA should not be needed, - # see comments in "TIARA specific options" - -message("CXX_FLAGS: " ${CMAKE_CXX_FLAGS}) - - -#----------------------Setup core subdirectories------------------------------# - -#Include root directory (.) so that the following modules can include their -#parent dir (f.ex. #include "common/stuff.h" instead of "../common/stuff") -include_directories(.) +## Include directories include_directories(include) -include_directories(src) +include_directories(.) # TODO remove +include_directories(src) # TODO remove -# CUDA sources +## Subdirectories add_subdirectory(src/core) - -#----------------------Link---------------------------------------------------# - if (BUILD_STANDALONE) - #Define the config directory - if (ALTER_CONF) - set(ASTAROTH_CONF_PATH "${CMAKE_BINARY_DIR}/") - else() - set(ASTAROTH_CONF_PATH "${CMAKE_SOURCE_DIR}/config/") - endif() - - #Add additional subdirectories - add_subdirectory (src/standalone) - cuda_add_executable(ac_run src/standalone/main.cc) - target_link_libraries(ac_run astaroth_standalone astaroth_core ${SDL2_LIBRARY}) -endif() - -add_subdirectory(ctest) + add_subdirectory(src/standalone) +endif () diff --git a/config/astaroth.conf b/config/astaroth.conf index 41b7e51..32f50a3 100644 --- a/config/astaroth.conf +++ b/config/astaroth.conf @@ -40,7 +40,7 @@ AC_chi = 0.0001 AC_relhel = 0.0 AC_forcing_magnitude = 1e-5 AC_kmin = 0.8 -AC_kmax = 1.2 +AC_kmax = 1.2 // Entropy diff --git a/include/astaroth_defines.h b/include/astaroth_defines.h index 0915d79..eb89daf 100644 --- a/include/astaroth_defines.h +++ b/include/astaroth_defines.h @@ -22,9 +22,27 @@ extern "C" { #endif -#include // FLT_EPSILON, etc -#include // size_t -#include // CUDA vector types (float4, etc) +#include // FLT_EPSILON, etc +#include // size_t +//#include // CUDA vector types (float4, etc) + +#ifndef __CUDACC__ +typedef struct { + int x, y, z; +} int3; + +typedef struct { + float x, y; +} float2; + +typedef struct { + float x, y, z; +} float3; + +typedef struct { + double x, y, z; +} double3; +#endif // __CUDACC__ #include "stencil_defines.h" diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 79560c9..1177995 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -2,59 +2,30 @@ ## CMakeLists.txt for Astaroth Core ## ######################################## -#----------------------Find CUDA-----------------------------------------------# - +## Find packages find_package(CUDA 9 REQUIRED) -#----------------------CUDA settings-------------------------------------------# - -set(CUDA_SEPARABLE_COMPILATION OFF) -set(CUDA_PROPAGATE_HOST_FLAGS ON) - -#----------------------Setup CUDA compilation flags----------------------------# - -# Generate code for the default architecture (Pascal) +## Architecture and optimization flags set(CUDA_ARCH_FLAGS -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -lineinfo - -ftz=true - -std=c++11) #--maxrregcount=255 -ftz=true #ftz = flush denormalized floats to zero -# -Xptxas -dlcm=ca opt-in to cache all global loads to L1/texture cache -# =cg to opt out - -# Additional CUDA optimization flags -if (CMAKE_BUILD_TYPE MATCHES RELEASE) - # Doesn't set any additional flags, see CUDA_NVCC_FLAGS_DEBUG below on how - # to add more - set(CUDA_NVCC_FLAGS_RELEASE ${CUDA_NVCC_FLAGS_RELEASE}) -endif() - -# Additional CUDA debug flags -if (CMAKE_BUILD_TYPE MATCHES DEBUG) - # The debug flags must be set inside this if clause, since either CMake 3.5 - # or nvcc 7.5 is bugged: - # CMake converts these into empty strings when doing RELEASE build, but nvcc - # 7.5 fails to parse empty flags. - set(CUDA_NVCC_FLAGS_DEBUG ${CUDA_NVCC_FLAGS_DEBUG}; - --device-debug; - --generate-line-info; - --ptxas-options=-v) -endif() - -set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};${CUDA_ARCH_FLAGS}") + -ftz=true # Flush denormalized floats to zero + -std=c++11, + --compiler-options -march=native) # Native host machine code + #--maxrregcount=255 + # -Xptxas -dlcm=ca opt-in to cache all global loads to L1/texture cache + # =cg to opt out -message("CUDA_NVCC_FLAGS: " ${CUDA_NVCC_FLAGS}) +set(CUDA_WARNING_FLAGS --compiler-options -Wall,-Wextra,-Werror,-Wdouble-promotion,-Wfloat-conversion) # -Wshadow +set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ${CUDA_ARCH_FLAGS} ${CUDA_WARNING_FLAGS}) +set(CUDA_NVCC_FLAGS_RELEASE) +set(CUDA_NVCC_FLAGS_DEBUG --device-debug --generate-line-info --ptxas-options=-v) -#------------------Compile and create a static library-------------------------# -file(GLOB CUDA_SOURCES "*.cu" "kernels/*.cu") - -# Use -fPIC if -fpic not supported. Some quick non-scientific tests: -# Without fpic: 4.94 user, 4.04 system, 0:09.88 elapsed -# With fpic: 4.96 user, 4.02 system, 0:09.90 elapsed -# With fPIC: 4.94 user, 4.05 system, 0:10.23 elapsed -CUDA_ADD_LIBRARY(astaroth_core STATIC ${CUDA_SOURCES} OPTIONS --compiler-options "-fpic") +## Create and link the library +include_directories(.) +cuda_add_library(astaroth_core STATIC astaroth.cu device.cu) target_link_libraries(astaroth_core m) diff --git a/src/core/kernels/kernels.cuh b/src/core/kernels/kernels.cuh index 8977d07..2c7d876 100644 --- a/src/core/kernels/kernels.cuh +++ b/src/core/kernels/kernels.cuh @@ -708,7 +708,7 @@ read_out(const int idx, AcReal* __restrict__ field[], const int3 handle) */ ////////////////REDUCE/////////////////////////// -#include "src/core/math_utils.h" // is_power_of_two +#include "math_utils.h" // is_power_of_two /* Reduction steps: diff --git a/src/standalone/CMakeLists.txt b/src/standalone/CMakeLists.txt index c6b535b..bb530e5 100644 --- a/src/standalone/CMakeLists.txt +++ b/src/standalone/CMakeLists.txt @@ -1,10 +1,34 @@ -################################ -## CMakeLists.txt for utils ## -################################ +############################################## +## CMakeLists.txt for Astaroth Standalone ## +############################################## +## Files file (GLOB SOURCES "*.cc" "model/*.cc") -add_library(astaroth_standalone STATIC ${SOURCES}) -target_include_directories(astaroth_standalone PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) -#target_compile_definitions(astaroth_standalone PRIVATE CONFIG_PATH=\"${CMAKE_SOURCE_DIR}/config/\") -target_compile_definitions(astaroth_standalone PRIVATE CONFIG_PATH=\"${ASTAROTH_CONF_PATH}\") +## Find packages +find_package(OpenMP REQUIRED) +if (BUILD_RT_VISUALIZATION) + add_definitions(-DAC_BUILD_RT_VISUALIZATION=1) + # SDL 2 + set(SDL2_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/3rdparty/SDL2/include/) + set(SDL2_LIBRARY_DIR ${CMAKE_SOURCE_DIR}/3rdparty/SDL2/build/) + set(SDL2_LIBRARY "SDL2") + include_directories(${SDL2_INCLUDE_DIR}) + link_directories(${SDL2_LIBRARY_DIR}) +endif () + + +## Compilation flags +add_compile_options(-march=native -pipe ${OpenMP_CXX_FLAGS}) +add_compile_options(-Wall -Wextra -Werror -Wdouble-promotion -Wfloat-conversion)# -Wshadow) + +## Compile and link +add_executable(ac_run ${SOURCES}) +target_link_libraries(ac_run PRIVATE "${OpenMP_CXX_FLAGS}" astaroth_core ${SDL2_LIBRARY}) + +# Define the config directory +if (ALTER_CONF) + target_compile_definitions(ac_run PRIVATE CONFIG_PATH="${CMAKE_BINARY_DIR}/") +else() + target_compile_definitions(ac_run PRIVATE CONFIG_PATH="${CMAKE_SOURCE_DIR}/config/") +endif() diff --git a/src/standalone/model/model_boundconds.cc b/src/standalone/model/model_boundconds.cc index 188b97e..f7a6aca 100644 --- a/src/standalone/model/model_boundconds.cc +++ b/src/standalone/model/model_boundconds.cc @@ -28,71 +28,66 @@ #include "core/errchk.h" - void boundconds(const AcMeshInfo& mesh_info, ModelMesh* mesh) { - #pragma omp parallel for +#pragma omp parallel for for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w) { const int3 start = (int3){0, 0, 0}; - const int3 end = (int3){ - mesh_info.int_params[AC_mx], - mesh_info.int_params[AC_my], - mesh_info.int_params[AC_mz] - }; + const int3 end = (int3){mesh_info.int_params[AC_mx], mesh_info.int_params[AC_my], + mesh_info.int_params[AC_mz]}; const int nx = mesh_info.int_params[AC_nx]; const int ny = mesh_info.int_params[AC_ny]; const int nz = mesh_info.int_params[AC_nz]; - const int nx_min = mesh_info.int_params[AC_nx_min]; - const int ny_min = mesh_info.int_params[AC_ny_min]; - const int nz_min = mesh_info.int_params[AC_nz_min]; + const int nx_min = mesh_info.int_params[AC_nx_min]; + const int ny_min = mesh_info.int_params[AC_ny_min]; + const int nz_min = mesh_info.int_params[AC_nz_min]; - // The old kxt was inclusive, but our mx_max is exclusive - const int nx_max = mesh_info.int_params[AC_nx_max]; - const int ny_max = mesh_info.int_params[AC_ny_max]; - const int nz_max = mesh_info.int_params[AC_nz_max]; + // The old kxt was inclusive, but our mx_max is exclusive + const int nx_max = mesh_info.int_params[AC_nx_max]; + const int ny_max = mesh_info.int_params[AC_ny_max]; + const int nz_max = mesh_info.int_params[AC_nz_max]; for (int k_dst = start.z; k_dst < end.z; ++k_dst) { - for (int j_dst = start.y; j_dst < end.y; ++j_dst) { - for (int i_dst = start.x; i_dst < end.x; ++i_dst) { + for (int j_dst = start.y; j_dst < end.y; ++j_dst) { + for (int i_dst = start.x; i_dst < end.x; ++i_dst) { - // If destination index is inside the computational domain, return since - // the boundary conditions are only applied to the ghost zones - if (i_dst >= nx_min && i_dst < nx_max && - j_dst >= ny_min && j_dst < ny_max && - k_dst >= nz_min && k_dst < nz_max) - continue; + // If destination index is inside the computational domain, return since + // the boundary conditions are only applied to the ghost zones + if (i_dst >= nx_min && i_dst < nx_max && j_dst >= ny_min && j_dst < ny_max && + k_dst >= nz_min && k_dst < nz_max) + continue; - // Find the source index - // Map to nx, ny, nz coordinates - int i_src = i_dst - nx_min; - int j_src = j_dst - ny_min; - int k_src = k_dst - nz_min; + // Find the source index + // Map to nx, ny, nz coordinates + int i_src = i_dst - nx_min; + int j_src = j_dst - ny_min; + int k_src = k_dst - nz_min; - // Translate (s.t. the index is always positive) - i_src += nx; - j_src += ny; - k_src += nz; + // Translate (s.t. the index is always positive) + i_src += nx; + j_src += ny; + k_src += nz; - // Wrap - i_src %= nx; - j_src %= ny; - k_src %= nz; + // Wrap + i_src %= nx; + j_src %= ny; + k_src %= nz; - // Map to mx, my, mz coordinates - i_src += nx_min; - j_src += ny_min; - k_src += nz_min; + // Map to mx, my, mz coordinates + i_src += nx_min; + j_src += ny_min; + k_src += nz_min; - const size_t src_idx = acVertexBufferIdx(i_src, j_src, k_src, mesh_info); - const size_t dst_idx = acVertexBufferIdx(i_dst, j_dst, k_dst, mesh_info); - ERRCHK(src_idx < acVertexBufferSize(mesh_info)); - ERRCHK(dst_idx < acVertexBufferSize(mesh_info)); - mesh->vertex_buffer[w][dst_idx] = mesh->vertex_buffer[w][src_idx]; - } - } + const size_t src_idx = acVertexBufferIdx(i_src, j_src, k_src, mesh_info); + const size_t dst_idx = acVertexBufferIdx(i_dst, j_dst, k_dst, mesh_info); + ERRCHK(src_idx < acVertexBufferSize(mesh_info)); + ERRCHK(dst_idx < acVertexBufferSize(mesh_info)); + mesh->vertex_buffer[w][dst_idx] = mesh->vertex_buffer[w][src_idx]; + } + } } } } From f0d1fba55c1d74ce2eb15e9b9df8c630c3b0e650 Mon Sep 17 00:00:00 2001 From: jpekkila Date: Tue, 23 Jul 2019 21:00:00 +0300 Subject: [PATCH 03/18] The pure C test works again. --- CMakeLists.txt | 4 +++- ctest/CMakeLists.txt | 3 --- src/ctest/CMakeLists.txt | 2 ++ {ctest => src/ctest}/main.c | 7 ++++--- 4 files changed, 9 insertions(+), 7 deletions(-) delete mode 100644 ctest/CMakeLists.txt create mode 100644 src/ctest/CMakeLists.txt rename {ctest => src/ctest}/main.c (58%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1541682..ba3f718 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,7 +15,7 @@ cmake_minimum_required (VERSION 3.5.1) # Need >= 3.8 for first-class CUDA support ## Project settings -project(astaroth CXX) +project(astaroth C CXX) set(CMAKE_CXX_STANDARD 11) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}) @@ -64,3 +64,5 @@ add_subdirectory(src/core) if (BUILD_STANDALONE) add_subdirectory(src/standalone) endif () + +add_subdirectory(src/ctest) diff --git a/ctest/CMakeLists.txt b/ctest/CMakeLists.txt deleted file mode 100644 index b2c84bf..0000000 --- a/ctest/CMakeLists.txt +++ /dev/null @@ -1,3 +0,0 @@ -include_directories(${CMAKE_SOURCE_DIR}/include) -add_executable(ctest main.c) -target_link_libraries(ctest astaroth_core) diff --git a/src/ctest/CMakeLists.txt b/src/ctest/CMakeLists.txt new file mode 100644 index 0000000..030d744 --- /dev/null +++ b/src/ctest/CMakeLists.txt @@ -0,0 +1,2 @@ +add_executable(ctest main.c) +target_link_libraries(ctest PRIVATE astaroth_core m) diff --git a/ctest/main.c b/src/ctest/main.c similarity index 58% rename from ctest/main.c rename to src/ctest/main.c index 5ed5ffc..a718b7d 100644 --- a/ctest/main.c +++ b/src/ctest/main.c @@ -7,11 +7,12 @@ int main(void) { AcMeshInfo info = { - .int_params[AC_mx] = 128, - .int_params[AC_my] = 64, - .int_params[AC_mz] = 32, + .int_params[AC_nx] = 128, + .int_params[AC_ny] = 64, + .int_params[AC_nz] = 32, }; acInit(info); + acIntegrate(0.1f); acQuit(); return EXIT_SUCCESS; } From be44354b3390e82ab8f06c771a4033f230759d3b Mon Sep 17 00:00:00 2001 From: jpekkila Date: Tue, 23 Jul 2019 21:03:42 +0300 Subject: [PATCH 04/18] Astaroth does not require any additional libraries to be included, which is good. Previously required CUDA and C/C++ math libraries. --- src/ctest/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ctest/CMakeLists.txt b/src/ctest/CMakeLists.txt index 030d744..393d7b6 100644 --- a/src/ctest/CMakeLists.txt +++ b/src/ctest/CMakeLists.txt @@ -1,2 +1,2 @@ add_executable(ctest main.c) -target_link_libraries(ctest PRIVATE astaroth_core m) +target_link_libraries(ctest PRIVATE astaroth_core) From 26316a4d15da4f6fef306880ed8aef9f504d51bb Mon Sep 17 00:00:00 2001 From: jpekkila Date: Tue, 23 Jul 2019 21:26:58 +0300 Subject: [PATCH 05/18] The standalone library is now compiled in parallel with the core library. Slightly faster. --- src/standalone/CMakeLists.txt | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/standalone/CMakeLists.txt b/src/standalone/CMakeLists.txt index bb530e5..9ca17e5 100644 --- a/src/standalone/CMakeLists.txt +++ b/src/standalone/CMakeLists.txt @@ -23,12 +23,14 @@ add_compile_options(-march=native -pipe ${OpenMP_CXX_FLAGS}) add_compile_options(-Wall -Wextra -Werror -Wdouble-promotion -Wfloat-conversion)# -Wshadow) ## Compile and link -add_executable(ac_run ${SOURCES}) -target_link_libraries(ac_run PRIVATE "${OpenMP_CXX_FLAGS}" astaroth_core ${SDL2_LIBRARY}) +add_library(astaroth_standalone ${SOURCES}) + +add_executable(ac_run main.cc) +target_link_libraries(ac_run PRIVATE astaroth_standalone astaroth_core "${OpenMP_CXX_FLAGS}" ${SDL2_LIBRARY}) # Define the config directory if (ALTER_CONF) - target_compile_definitions(ac_run PRIVATE CONFIG_PATH="${CMAKE_BINARY_DIR}/") + target_compile_definitions(astaroth_standalone PRIVATE CONFIG_PATH="${CMAKE_BINARY_DIR}/") else() - target_compile_definitions(ac_run PRIVATE CONFIG_PATH="${CMAKE_SOURCE_DIR}/config/") + target_compile_definitions(astaroth_standalone PRIVATE CONFIG_PATH="${CMAKE_SOURCE_DIR}/config/") endif() From 818893a0eaf5cb6af6ef7b0cdc50786ed03b9ae6 Mon Sep 17 00:00:00 2001 From: jpekkila Date: Fri, 26 Jul 2019 14:10:17 +0300 Subject: [PATCH 06/18] Fixed stray comma in CUDA_ARCH_FLAGS --- src/core/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 1177995..e70de80 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -12,7 +12,7 @@ set(CUDA_ARCH_FLAGS -gencode arch=compute_37,code=sm_37 -gencode arch=compute_61,code=sm_61 -lineinfo -ftz=true # Flush denormalized floats to zero - -std=c++11, + -std=c++11 --compiler-options -march=native) # Native host machine code #--maxrregcount=255 # -Xptxas -dlcm=ca opt-in to cache all global loads to L1/texture cache From b90d261e897aa3a3801cf3d1171f4639b5cce89e Mon Sep 17 00:00:00 2001 From: jpekkila Date: Fri, 26 Jul 2019 14:18:11 +0300 Subject: [PATCH 07/18] Removed an unnecessary include from the root CMakeLists.txt --- CMakeLists.txt | 3 +-- src/standalone/autotest.cc | 4 ++-- src/standalone/benchmark.cc | 2 +- src/standalone/config_loader.cc | 4 ++-- src/standalone/main.cc | 4 ++-- src/standalone/model/host_forcing.cc | 2 +- src/standalone/model/host_memory.cc | 2 +- src/standalone/model/host_timestep.cc | 4 ++-- src/standalone/model/model_boundconds.cc | 2 +- src/standalone/model/model_diff.h | 2 +- src/standalone/model/model_reduce.cc | 2 +- src/standalone/renderer.cc | 6 +++--- src/standalone/simulation.cc | 4 ++-- 13 files changed, 20 insertions(+), 21 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ba3f718..a59d256 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -55,9 +55,8 @@ else () endif () ## Include directories +include_directories(.) include_directories(include) -include_directories(.) # TODO remove -include_directories(src) # TODO remove ## Subdirectories add_subdirectory(src/core) diff --git a/src/standalone/autotest.cc b/src/standalone/autotest.cc index c7aec10..c408633 100644 --- a/src/standalone/autotest.cc +++ b/src/standalone/autotest.cc @@ -29,7 +29,7 @@ #include #include "config_loader.h" -#include "core/math_utils.h" +#include "src/core/math_utils.h" #include "model/host_forcing.h" #include "model/host_memory.h" #include "model/host_timestep.h" @@ -37,7 +37,7 @@ #include "model/model_reduce.h" #include "model/model_rk3.h" -#include "core/errchk.h" +#include "src/core/errchk.h" #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) diff --git a/src/standalone/benchmark.cc b/src/standalone/benchmark.cc index 6e01346..24f7f15 100644 --- a/src/standalone/benchmark.cc +++ b/src/standalone/benchmark.cc @@ -211,7 +211,7 @@ run_benchmark(void) #if AUTO_OPTIMIZE const char* benchmark_path = "benchmark.out"; -#include "core/kernels/rk3_threadblock.conf" +#include "src/core/kernels/rk3_threadblock.conf" static int write_result_to_file(const float& ms_per_step) { diff --git a/src/standalone/config_loader.cc b/src/standalone/config_loader.cc index 7a0a509..eeb223e 100644 --- a/src/standalone/config_loader.cc +++ b/src/standalone/config_loader.cc @@ -31,8 +31,8 @@ #include // print #include // memset -#include "core/errchk.h" -#include "core/math_utils.h" +#include "src/core/errchk.h" +#include "src/core/math_utils.h" static inline void print(const AcMeshInfo& config) diff --git a/src/standalone/main.cc b/src/standalone/main.cc index c393ace..0e14842 100644 --- a/src/standalone/main.cc +++ b/src/standalone/main.cc @@ -1,5 +1,5 @@ /* - Copyright (C) 2014-2019, Johannes Pekkilae, Miikka Vaeisalae. + Copyright (C) 2014-2018, Johannes Pekkilae, Miikka Vaeisalae. This file is part of Astaroth. @@ -28,7 +28,7 @@ #include #include -#include "core/errchk.h" +#include "src/core/errchk.h" #include "run.h" // Write all errors from stderr to an in the current working diff --git a/src/standalone/model/host_forcing.cc b/src/standalone/model/host_forcing.cc index 423bf19..c0ee7f0 100644 --- a/src/standalone/model/host_forcing.cc +++ b/src/standalone/model/host_forcing.cc @@ -26,7 +26,7 @@ */ #include "host_forcing.h" -#include "core/math_utils.h" +#include "src/core/math_utils.h" // The is a wrapper for genering random numbers with a chosen system. AcReal diff --git a/src/standalone/model/host_memory.cc b/src/standalone/model/host_memory.cc index 5cda923..5a68d9b 100644 --- a/src/standalone/model/host_memory.cc +++ b/src/standalone/model/host_memory.cc @@ -28,7 +28,7 @@ #include -#include "core/errchk.h" +#include "src/core/errchk.h" #define AC_GEN_STR(X) #X const char* init_type_names[] = {AC_FOR_INIT_TYPES(AC_GEN_STR)}; diff --git a/src/standalone/model/host_timestep.cc b/src/standalone/model/host_timestep.cc index 48f4134..9245cef 100644 --- a/src/standalone/model/host_timestep.cc +++ b/src/standalone/model/host_timestep.cc @@ -1,5 +1,5 @@ /* - Copyright (C) 2014-2019, Johannes Pekkilae, Miikka Vaeisalae. + Copyright (C) 2014-2018, Johannes Pekkilae, Miikka Vaeisalae. This file is part of Astaroth. @@ -26,7 +26,7 @@ */ #include "host_timestep.h" -#include "core/math_utils.h" +#include "src/core/math_utils.h" static AcReal timescale = AcReal(1.0); diff --git a/src/standalone/model/model_boundconds.cc b/src/standalone/model/model_boundconds.cc index f7a6aca..9490be1 100644 --- a/src/standalone/model/model_boundconds.cc +++ b/src/standalone/model/model_boundconds.cc @@ -26,7 +26,7 @@ */ #include "model_boundconds.h" -#include "core/errchk.h" +#include "src/core/errchk.h" void boundconds(const AcMeshInfo& mesh_info, ModelMesh* mesh) diff --git a/src/standalone/model/model_diff.h b/src/standalone/model/model_diff.h index 20678bf..303181d 100644 --- a/src/standalone/model/model_diff.h +++ b/src/standalone/model/model_diff.h @@ -25,7 +25,7 @@ * */ #pragma once -#include "core/errchk.h" +#include "src/core/errchk.h" typedef long double MODEL_REAL; diff --git a/src/standalone/model/model_reduce.cc b/src/standalone/model/model_reduce.cc index 6d48c4b..1de3b32 100644 --- a/src/standalone/model/model_reduce.cc +++ b/src/standalone/model/model_reduce.cc @@ -28,7 +28,7 @@ #include -#include "core/errchk.h" +#include "src/core/errchk.h" // Function pointer definitions typedef ModelScalar (*ReduceFunc)(const ModelScalar&, const ModelScalar&); diff --git a/src/standalone/renderer.cc b/src/standalone/renderer.cc index 1522cc5..6fd1913 100644 --- a/src/standalone/renderer.cc +++ b/src/standalone/renderer.cc @@ -32,8 +32,8 @@ #include // memcpy #include "config_loader.h" -#include "core/errchk.h" -#include "core/math_utils.h" +#include "src/core/errchk.h" +#include "src/core/math_utils.h" #include "model/host_memory.h" #include "model/host_timestep.h" #include "model/model_reduce.h" @@ -423,7 +423,7 @@ run_renderer(void) return 0; } #else // BUILD_RT_VISUALIZATION == 0 -#include "core/errchk.h" +#include "src/core/errchk.h" int run_renderer(void) { diff --git a/src/standalone/simulation.cc b/src/standalone/simulation.cc index f6b7e46..ebd527e 100644 --- a/src/standalone/simulation.cc +++ b/src/standalone/simulation.cc @@ -27,8 +27,8 @@ #include "run.h" #include "config_loader.h" -#include "core/errchk.h" -#include "core/math_utils.h" +#include "src/core/errchk.h" +#include "src/core/math_utils.h" #include "model/host_forcing.h" #include "model/host_memory.h" #include "model/host_timestep.h" From 5044228967abc786857edbf7c4dd73e497f9afe4 Mon Sep 17 00:00:00 2001 From: jpekkila Date: Fri, 26 Jul 2019 14:22:22 +0300 Subject: [PATCH 08/18] The text editor I use to edit stuff remotely is a complete piece of &^$%$, does not synchronize the files correctly. This commit fixes the issues introduced in the last commit --- src/standalone/main.cc | 4 ++-- src/standalone/model/host_timestep.cc | 27 +++++++++++++++------------ 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/src/standalone/main.cc b/src/standalone/main.cc index 0e14842..48d5fc6 100644 --- a/src/standalone/main.cc +++ b/src/standalone/main.cc @@ -1,5 +1,5 @@ /* - Copyright (C) 2014-2018, Johannes Pekkilae, Miikka Vaeisalae. + Copyright (C) 2014-2019, Johannes Pekkilae, Miikka Vaeisalae. This file is part of Astaroth. @@ -28,8 +28,8 @@ #include #include -#include "src/core/errchk.h" #include "run.h" +#include "src/core/errchk.h" // Write all errors from stderr to an in the current working // directory diff --git a/src/standalone/model/host_timestep.cc b/src/standalone/model/host_timestep.cc index 9245cef..fd8d0ce 100644 --- a/src/standalone/model/host_timestep.cc +++ b/src/standalone/model/host_timestep.cc @@ -1,5 +1,5 @@ /* - Copyright (C) 2014-2018, Johannes Pekkilae, Miikka Vaeisalae. + Copyright (C) 2014-2019, Johannes Pekkilae, Miikka Vaeisalae. This file is part of Astaroth. @@ -33,25 +33,28 @@ static AcReal timescale = AcReal(1.0); AcReal host_timestep(const AcReal& umax, const AcMeshInfo& mesh_info) { - const long double cdt = mesh_info.real_params[AC_cdt]; - const long double cdtv = mesh_info.real_params[AC_cdtv]; + const long double cdt = mesh_info.real_params[AC_cdt]; + const long double cdtv = mesh_info.real_params[AC_cdtv]; // const long double cdts = mesh_info.real_params[AC_cdts]; const long double cs2_sound = mesh_info.real_params[AC_cs2_sound]; - const long double nu_visc = mesh_info.real_params[AC_nu_visc]; - const long double eta = mesh_info.real_params[AC_eta]; - const long double chi = 0; // mesh_info.real_params[AC_chi]; // TODO not calculated - const long double gamma = mesh_info.real_params[AC_gamma]; - const long double dsmin = mesh_info.real_params[AC_dsmin]; + const long double nu_visc = mesh_info.real_params[AC_nu_visc]; + const long double eta = mesh_info.real_params[AC_eta]; + const long double chi = 0; // mesh_info.real_params[AC_chi]; // TODO not calculated + const long double gamma = mesh_info.real_params[AC_gamma]; + const long double dsmin = mesh_info.real_params[AC_dsmin]; // Old ones from legacy Astaroth - //const long double uu_dt = cdt * (dsmin / (umax + cs_sound)); - //const long double visc_dt = cdtv * dsmin * dsmin / nu_visc; + // const long double uu_dt = cdt * (dsmin / (umax + cs_sound)); + // const long double visc_dt = cdtv * dsmin * dsmin / nu_visc; // New, closer to the actual Courant timestep // See Pencil Code user manual p. 38 (timestep section) const long double uu_dt = cdt * dsmin / (fabsl(umax) + sqrtl(cs2_sound + 0.0l)); - const long double visc_dt = cdtv * dsmin * dsmin / max(max(nu_visc, eta), max(gamma, chi));// + 1; // TODO NOTE: comment the +1 out to get scientifically accurate results - //MV: White the +1? It was messing up my computations! + const long double visc_dt = cdtv * dsmin * dsmin / + max(max(nu_visc, eta), + max(gamma, chi)); // + 1; // TODO NOTE: comment the +1 out to + // get scientifically accurate results + // MV: White the +1? It was messing up my computations! const long double dt = min(uu_dt, visc_dt); return AcReal(timescale) * AcReal(dt); From c9fafe41e530daa54d15a1ac8b094cb0c78df945 Mon Sep 17 00:00:00 2001 From: jpekkila Date: Fri, 26 Jul 2019 15:12:55 +0300 Subject: [PATCH 09/18] Tidied the CMakeLists, moved stuff to more logical places and added comments. Also tested that ALTER_CONF=ON still works --- CMakeLists.txt | 19 ++++++------------- src/core/CMakeLists.txt | 7 +++++++ src/ctest/CMakeLists.txt | 7 +++++++ src/ctest/README.txt | 1 + src/standalone/CMakeLists.txt | 4 +++- 5 files changed, 24 insertions(+), 14 deletions(-) create mode 100644 src/ctest/README.txt diff --git a/CMakeLists.txt b/CMakeLists.txt index a59d256..a82620a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,26 +10,22 @@ # Print all options: cmake -LAH .. # - ## CMake settings cmake_minimum_required (VERSION 3.5.1) # Need >= 3.8 for first-class CUDA support ## Project settings project(astaroth C CXX) -set(CMAKE_CXX_STANDARD 11) -set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}) ## Options option(BUILD_DEBUG "Builds the program with extensive error checking" OFF) option(BUILD_STANDALONE "Builds the standalone Astaroth" ON) option(BUILD_RT_VISUALIZATION "Builds the module for real-time visualization using SDL2" OFF) +option(BUILD_C_API_TEST "Builds a C program to test whether the API is conformant" ON) option(DOUBLE_PRECISION "Generates double precision code" OFF) option(MULTIGPU_ENABLED "If enabled, uses all the available GPUs" ON) option(ALTER_CONF "If enabled, loads astaroth.conf from the build directory" OFF) - - ## Build types # Available types (case-sensitive): # RELEASE (best performance) @@ -41,27 +37,24 @@ else () endif() message(STATUS "Build type: " ${CMAKE_BUILD_TYPE}) - ## Defines if (DOUBLE_PRECISION) add_definitions(-DAC_DOUBLE_PRECISION=1) else () add_definitions(-DAC_DOUBLE_PRECISION=0) endif () -if (MULTIGPU_ENABLED) - add_definitions(-DAC_MULTIGPU_ENABLED=1) -else () - add_definitions(-DAC_MULTIGPU_ENABLED=0) -endif () ## Include directories include_directories(.) include_directories(include) ## Subdirectories -add_subdirectory(src/core) +add_subdirectory(src/core) # The core library + if (BUILD_STANDALONE) add_subdirectory(src/standalone) endif () -add_subdirectory(src/ctest) +if (BUILD_C_API_TEST) + add_subdirectory(src/ctest) +endif () diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index e70de80..5cbc271 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -25,6 +25,13 @@ set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ${CUDA_ARCH_FLAGS} ${CUDA_WARNING_FLAGS}) set(CUDA_NVCC_FLAGS_RELEASE) set(CUDA_NVCC_FLAGS_DEBUG --device-debug --generate-line-info --ptxas-options=-v) +## Definitions +if (MULTIGPU_ENABLED) + add_definitions(-DAC_MULTIGPU_ENABLED=1) +else () + add_definitions(-DAC_MULTIGPU_ENABLED=0) +endif () + ## Create and link the library include_directories(.) cuda_add_library(astaroth_core STATIC astaroth.cu device.cu) diff --git a/src/ctest/CMakeLists.txt b/src/ctest/CMakeLists.txt index 393d7b6..f3439f3 100644 --- a/src/ctest/CMakeLists.txt +++ b/src/ctest/CMakeLists.txt @@ -1,2 +1,9 @@ +############################################## +## CMakeLists.txt for the C API test ## +############################################## + +set(CMAKE_C_STANDARD 11) +set(CMAKE_C_STANDARD_REQUIRED ON) + add_executable(ctest main.c) target_link_libraries(ctest PRIVATE astaroth_core) diff --git a/src/ctest/README.txt b/src/ctest/README.txt new file mode 100644 index 0000000..68750d2 --- /dev/null +++ b/src/ctest/README.txt @@ -0,0 +1 @@ +This directory is used to test whether the Astaroth API is compatible with C. diff --git a/src/standalone/CMakeLists.txt b/src/standalone/CMakeLists.txt index 9ca17e5..b444800 100644 --- a/src/standalone/CMakeLists.txt +++ b/src/standalone/CMakeLists.txt @@ -2,6 +2,9 @@ ## CMakeLists.txt for Astaroth Standalone ## ############################################## +set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + ## Files file (GLOB SOURCES "*.cc" "model/*.cc") @@ -17,7 +20,6 @@ if (BUILD_RT_VISUALIZATION) link_directories(${SDL2_LIBRARY_DIR}) endif () - ## Compilation flags add_compile_options(-march=native -pipe ${OpenMP_CXX_FLAGS}) add_compile_options(-Wall -Wextra -Werror -Wdouble-promotion -Wfloat-conversion)# -Wshadow) From 7c256d3bb3f366a1af852530cfbc811b30b0f59f Mon Sep 17 00:00:00 2001 From: jpekkila Date: Fri, 26 Jul 2019 15:13:19 +0300 Subject: [PATCH 10/18] Added a comment about a redundant variable --- scripts/ac_mkbuilddir.sh | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/scripts/ac_mkbuilddir.sh b/scripts/ac_mkbuilddir.sh index eac417e..6afb3db 100755 --- a/scripts/ac_mkbuilddir.sh +++ b/scripts/ac_mkbuilddir.sh @@ -20,7 +20,7 @@ ALTER_CONF=${ALTER_CONF_DEFAULT} while [ "$#" -gt 0 ] do - case $1 in + case $1 in -h|--help) echo "You can set up a build directory separe of the ASTAROTH_HOME" echo "Available flags:" @@ -66,12 +66,14 @@ mkdir ${BUILD_DIR} cd ${BUILD_DIR} #Set up the astaroth.conf to be define and customized in the build directory to -#not always alter the default use i.e. for unit test etc. +#not always alter the default use i.e. for unit test etc. #Assumed by default if you do this thing anyway. echo "cp ${AC_HOME}/config/astaroth.conf ${PWD}" cp ${AC_HOME}/config/astaroth.conf . -CONF_DIR="-D ASTAROTH_CONF_PATH=${PWD}" +CONF_DIR="-D ASTAROTH_CONF_PATH=${PWD}" # JP: Warning, not used in src/standalone/CMakeLists.txt. + # The config path supplied by cmake and this define + # is redundant. #cmake -D CMAKE_C_COMPILER=icc -D CMAKE_CXX_COMPILER=icpc -DDOUBLE_PRECISION=OFF -DBUILD_DEBUG=OFF ${AC_HOME} From 5801144336ee68fd6a452b22132f4284ff470d00 Mon Sep 17 00:00:00 2001 From: jpekkila Date: Mon, 29 Jul 2019 15:21:15 +0300 Subject: [PATCH 11/18] ac_mkbuilddir.sh did not stop if the directory specified by the user did not exist. This lead to messing up the base astaroth directory with temporary cmake files. Added -p flag to mkdir to create parent directories if necessary to avoid this --- scripts/ac_mkbuilddir.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/ac_mkbuilddir.sh b/scripts/ac_mkbuilddir.sh index 6afb3db..14d04f9 100755 --- a/scripts/ac_mkbuilddir.sh +++ b/scripts/ac_mkbuilddir.sh @@ -61,7 +61,7 @@ done echo "Creating build directory: ${BUILD_DIR}" -mkdir ${BUILD_DIR} +mkdir -p ${BUILD_DIR} cd ${BUILD_DIR} From 7b5a02bf0f0006172552f94bd730b6867d29c070 Mon Sep 17 00:00:00 2001 From: jpekkila Date: Mon, 29 Jul 2019 15:35:17 +0300 Subject: [PATCH 12/18] The previous commit to ac_mkbuilddir.sh was not enough. Added a line that makes the script to stop if any of the commands fail to avoid cluttering the base astaroth directory. In my case the issue was permission denied when trying to create a project directory in /MYSCRATCH (system root directory) instead of MYSCRATCH (astaroth/MYSCRATCH) --- scripts/ac_mkbuilddir.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/ac_mkbuilddir.sh b/scripts/ac_mkbuilddir.sh index 14d04f9..c52bb8c 100755 --- a/scripts/ac_mkbuilddir.sh +++ b/scripts/ac_mkbuilddir.sh @@ -5,6 +5,8 @@ then exit 1 fi +# Exit if any of the following commands fail +set -e TIARA_SETUP_DEFAULT="" DOUBLE_DEFAULT="OFF" From a3359b0d048894d2ee605c33625e84a3295ac8bf Mon Sep 17 00:00:00 2001 From: jpekkila Date: Mon, 29 Jul 2019 15:55:27 +0300 Subject: [PATCH 13/18] CONFIG_PATH is now supplied by ac_mkbuilddir. While using would be a bit more idiomatic, ASTAROTH_CONF_PATH is probably safer since ac_mkbuilddir.sh does the copying and knows for sure what the correct path is. --- scripts/ac_mkbuilddir.sh | 5 +---- src/standalone/CMakeLists.txt | 3 ++- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/scripts/ac_mkbuilddir.sh b/scripts/ac_mkbuilddir.sh index c52bb8c..888f811 100755 --- a/scripts/ac_mkbuilddir.sh +++ b/scripts/ac_mkbuilddir.sh @@ -73,10 +73,7 @@ cd ${BUILD_DIR} echo "cp ${AC_HOME}/config/astaroth.conf ${PWD}" cp ${AC_HOME}/config/astaroth.conf . -CONF_DIR="-D ASTAROTH_CONF_PATH=${PWD}" # JP: Warning, not used in src/standalone/CMakeLists.txt. - # The config path supplied by cmake and this define - # is redundant. - +CONF_DIR="-D ASTAROTH_CONF_PATH=${PWD}" #cmake -D CMAKE_C_COMPILER=icc -D CMAKE_CXX_COMPILER=icpc -DDOUBLE_PRECISION=OFF -DBUILD_DEBUG=OFF ${AC_HOME} diff --git a/src/standalone/CMakeLists.txt b/src/standalone/CMakeLists.txt index b444800..6a03e32 100644 --- a/src/standalone/CMakeLists.txt +++ b/src/standalone/CMakeLists.txt @@ -32,7 +32,8 @@ target_link_libraries(ac_run PRIVATE astaroth_standalone astaroth_core "${OpenMP # Define the config directory if (ALTER_CONF) - target_compile_definitions(astaroth_standalone PRIVATE CONFIG_PATH="${CMAKE_BINARY_DIR}/") + # ASTAROTH_CONF_PATH supplied by ac_mkbuilddir.sh + target_compile_definitions(astaroth_standalone PRIVATE CONFIG_PATH="${ASTAROTH_CONF_PATH}/") else() target_compile_definitions(astaroth_standalone PRIVATE CONFIG_PATH="${CMAKE_SOURCE_DIR}/config/") endif() From d614a036531159753d6af02a320639efc3b2662a Mon Sep 17 00:00:00 2001 From: jpekkila Date: Mon, 29 Jul 2019 16:07:31 +0300 Subject: [PATCH 14/18] Made CMake to use the default compiler on system PATH. If the generic compiler names (cc, c++) point to some old version, then cmake would otherwise use those instead of the latest available compiler. --- CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index a82620a..04a73d8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -12,6 +12,8 @@ ## CMake settings cmake_minimum_required (VERSION 3.5.1) # Need >= 3.8 for first-class CUDA support +find_program(CMAKE_C_COMPILER NAMES $ENV{CC} gcc PATHS ENV PATH NO_DEFAULT_PATH) +find_program(CMAKE_CXX_COMPILER NAMES $ENV{CXX} g++ PATHS ENV PATH NO_DEFAULT_PATH) ## Project settings project(astaroth C CXX) From 66fa34611861e15371823a0fa34bcd45c9779fd0 Mon Sep 17 00:00:00 2001 From: jpekkila Date: Mon, 5 Aug 2019 18:24:55 +0300 Subject: [PATCH 15/18] Added a copyright text to ctest --- src/ctest/main.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/ctest/main.c b/src/ctest/main.c index a718b7d..5f31f17 100644 --- a/src/ctest/main.c +++ b/src/ctest/main.c @@ -1,3 +1,21 @@ +/* + Copyright (C) 2014-2019, Johannes Pekkilae, Miikka Vaeisalae. + + This file is part of Astaroth. + + Astaroth is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Astaroth is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Astaroth. If not, see . +*/ #include #include From da76fca0dc3fad476dfef2bef3a382d919534df2 Mon Sep 17 00:00:00 2001 From: jpekkila Date: Mon, 5 Aug 2019 18:26:12 +0300 Subject: [PATCH 16/18] Added a test for building an MPI project. Building for the MPI and C API tests is now also disabled by default. --- CMakeLists.txt | 7 +++++- src/mpitest/CMakeLists.txt | 12 +++++++++ src/mpitest/README.txt | 1 + src/mpitest/main.c | 51 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 70 insertions(+), 1 deletion(-) create mode 100644 src/mpitest/CMakeLists.txt create mode 100644 src/mpitest/README.txt create mode 100644 src/mpitest/main.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 04a73d8..4cf61b3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -23,7 +23,8 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}) option(BUILD_DEBUG "Builds the program with extensive error checking" OFF) option(BUILD_STANDALONE "Builds the standalone Astaroth" ON) option(BUILD_RT_VISUALIZATION "Builds the module for real-time visualization using SDL2" OFF) -option(BUILD_C_API_TEST "Builds a C program to test whether the API is conformant" ON) +option(BUILD_C_API_TEST "Builds a C program to test whether the API is conformant" OFF) +option(BUILD_MPI_TEST "Builds a C program to test whether MPI works" OFF) option(DOUBLE_PRECISION "Generates double precision code" OFF) option(MULTIGPU_ENABLED "If enabled, uses all the available GPUs" ON) option(ALTER_CONF "If enabled, loads astaroth.conf from the build directory" OFF) @@ -60,3 +61,7 @@ endif () if (BUILD_C_API_TEST) add_subdirectory(src/ctest) endif () + +if (BUILD_MPI_TEST) + add_subdirectory(src/mpitest) +endif () diff --git a/src/mpitest/CMakeLists.txt b/src/mpitest/CMakeLists.txt new file mode 100644 index 0000000..c64105d --- /dev/null +++ b/src/mpitest/CMakeLists.txt @@ -0,0 +1,12 @@ +############################################## +## CMakeLists.txt for the MPI test ## +############################################## + +set(CMAKE_C_STANDARD 11) +set(CMAKE_C_STANDARD_REQUIRED ON) + +find_package(MPI REQUIRED) + +add_executable(mpitest main.c) +target_include_directories(mpitest PRIVATE ${MPI_C_INCLUDE_PATH}) +target_link_libraries(mpitest PRIVATE ${MPI_C_LIBRARIES} astaroth_core) diff --git a/src/mpitest/README.txt b/src/mpitest/README.txt new file mode 100644 index 0000000..1547b08 --- /dev/null +++ b/src/mpitest/README.txt @@ -0,0 +1 @@ +This directory is used to test MPI with Astaroth. diff --git a/src/mpitest/main.c b/src/mpitest/main.c new file mode 100644 index 0000000..a07522e --- /dev/null +++ b/src/mpitest/main.c @@ -0,0 +1,51 @@ +/* + Copyright (C) 2014-2019, Johannes Pekkilae, Miikka Vaeisalae. + + This file is part of Astaroth. + + Astaroth is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Astaroth is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Astaroth. If not, see . +*/ +#include +#include + +#include "astaroth.h" + +#include + +int +main(void) +{ + MPI_Init(NULL, NULL); + + int num_processes, process_id; + MPI_Comm_size(MPI_COMM_WORLD, &num_processes); + MPI_Comm_rank(MPI_COMM_WORLD, &process_id); + + char processor_name[MPI_MAX_PROCESSOR_NAME]; + int name_len; + MPI_Get_processor_name(processor_name, &name_len); + printf("Processor %s. Process %d of %d.\n", processor_name, process_id, num_processes); + + AcMeshInfo info = { + .int_params[AC_nx] = 128, + .int_params[AC_ny] = 64, + .int_params[AC_nz] = 32, + }; + acInit(info); + acIntegrate(0.1f); + acQuit(); + + MPI_Finalize(); + return EXIT_SUCCESS; +} From 5f4246fb42802afa3b50175f4cb89c051e724fc4 Mon Sep 17 00:00:00 2001 From: jpekkila Date: Tue, 6 Aug 2019 14:46:13 +0300 Subject: [PATCH 17/18] Standalone now uses O2 optimization level instead of O3. Also removed -march=native since this causes issues if the program is compiled on a different architecture than it is run on. Since we do not do heavy arithmetic on the host side and the host code is not performance-critical part of the code, -march-native is not very useful anyways --- src/core/CMakeLists.txt | 3 +-- src/standalone/CMakeLists.txt | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 5cbc271..b56c770 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -12,8 +12,7 @@ set(CUDA_ARCH_FLAGS -gencode arch=compute_37,code=sm_37 -gencode arch=compute_61,code=sm_61 -lineinfo -ftz=true # Flush denormalized floats to zero - -std=c++11 - --compiler-options -march=native) # Native host machine code + -std=c++11) #--maxrregcount=255 # -Xptxas -dlcm=ca opt-in to cache all global loads to L1/texture cache # =cg to opt out diff --git a/src/standalone/CMakeLists.txt b/src/standalone/CMakeLists.txt index 6a03e32..ed9bda9 100644 --- a/src/standalone/CMakeLists.txt +++ b/src/standalone/CMakeLists.txt @@ -21,7 +21,7 @@ if (BUILD_RT_VISUALIZATION) endif () ## Compilation flags -add_compile_options(-march=native -pipe ${OpenMP_CXX_FLAGS}) +add_compile_options(-O2 -pipe ${OpenMP_CXX_FLAGS}) add_compile_options(-Wall -Wextra -Werror -Wdouble-promotion -Wfloat-conversion)# -Wshadow) ## Compile and link From e4b981fc6204d330439de92df1e27e764d67caf7 Mon Sep 17 00:00:00 2001 From: jpekkila Date: Tue, 6 Aug 2019 14:59:41 +0300 Subject: [PATCH 18/18] Removed the O2 flag since cmake still defines the O3 flag in CMAKE_CXX_FLAGS_RELEASE and it's confusing which one gcc chooses if both O3 and O2 are passed during compilation. If the issue was the march=native flag then this should also work on Tiara --- src/standalone/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/standalone/CMakeLists.txt b/src/standalone/CMakeLists.txt index ed9bda9..ea1d04c 100644 --- a/src/standalone/CMakeLists.txt +++ b/src/standalone/CMakeLists.txt @@ -21,7 +21,7 @@ if (BUILD_RT_VISUALIZATION) endif () ## Compilation flags -add_compile_options(-O2 -pipe ${OpenMP_CXX_FLAGS}) +add_compile_options(-pipe ${OpenMP_CXX_FLAGS}) add_compile_options(-Wall -Wextra -Werror -Wdouble-promotion -Wfloat-conversion)# -Wshadow) ## Compile and link