From f322bc8b37c81c990a19ccf67c850a7e6d755d98 Mon Sep 17 00:00:00 2001 From: jpekkila Date: Tue, 23 Jul 2019 20:50:37 +0300 Subject: [PATCH] Rewrote all CMakeLists. Now much cleaner and there's a clear separation during compilation between the core and standalone modules. --- CMakeLists.txt | 195 +++++------------------ config/astaroth.conf | 2 +- include/astaroth_defines.h | 24 ++- src/core/CMakeLists.txt | 59 ++----- src/core/kernels/kernels.cuh | 2 +- src/standalone/CMakeLists.txt | 38 ++++- src/standalone/model/model_boundconds.cc | 87 +++++----- 7 files changed, 148 insertions(+), 259 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a3da8a2..1541682 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,39 +1,39 @@ +################################### +## CMakeLists.txt for Astaroth ## +################################### # -# CMakeLists.txt for generating the makefile for Astaroth. -# Usage: mkdir build && cd build && cmake .. +# Usage: mkdir build && cd build && cmake .. && make # -# For example: cmake -DDOUBLE_PRECISION=ON .. +# If you want to see the exact flags used during compilation, compile with +# "make VERBOSE=1" # -# If you want to see the exact flags used during compilation, run -# "make -j VERBOSE=1" +# Print all options: cmake -LAH .. # -# Make sure your machine satisfies the system requirements: -# https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#system-requirements -#-------------------General---------------------------------------------------# -project(ASTAROTH_2.0 C CXX) -set (CMAKE_CXX_STANDARD 11) +## CMake settings cmake_minimum_required (VERSION 3.5.1) # Need >= 3.8 for first-class CUDA support -cmake_policy (SET CMP0023 NEW) + +## Project settings +project(astaroth CXX) +set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}) + +## Options +option(BUILD_DEBUG "Builds the program with extensive error checking" OFF) +option(BUILD_STANDALONE "Builds the standalone Astaroth" ON) +option(BUILD_RT_VISUALIZATION "Builds the module for real-time visualization using SDL2" OFF) +option(DOUBLE_PRECISION "Generates double precision code" OFF) +option(MULTIGPU_ENABLED "If enabled, uses all the available GPUs" ON) +option(ALTER_CONF "If enabled, loads astaroth.conf from the build directory" OFF) -#-------------------Set user options with default values---------------------# -#Usage f.ex. cmake -DBUILD_DEBUG=ON .. -option(BUILD_DEBUG "Builds the program with extensive error checking" OFF) -option(BUILD_STANDALONE "Builds standalone Astaroth" ON) -option(DOUBLE_PRECISION "Generates double precision code" OFF) -option(TIARA_CLUSTER "Special settings for compilation TIARA GPU cluster" OFF) -option(MULTIGPU_ENABLED "If enabled, uses all the available GPUs" ON) -option(ALTER_CONF "If enabled, loads astaroth.conf from the build directory" OFF) -option(BUILD_RT_VISUALIZATION "Builds the module for real-time visualization using SDL2" OFF) - -#-------------------Determine build type--------------------------------------# - -#Available types (case-sensitive): -#RELEASE (best performance) -#DEBUG (w/ debug information, non-concurrent kernels) +## Build types +# Available types (case-sensitive): +# RELEASE (best performance) +# DEBUG (w/ debug information, non-concurrent kernels) if (BUILD_DEBUG) set(CMAKE_BUILD_TYPE DEBUG) else () @@ -42,144 +42,25 @@ endif() message(STATUS "Build type: " ${CMAKE_BUILD_TYPE}) -#----------------------Find packages------------------------------------------# - -# C++ compiler info -message(STATUS "CMAKE_CXX_COMPILER: " ${CMAKE_CXX_COMPILER}) -message(STATUS "CMAKE_CXX_COMPILER: " ${CMAKE_CXX_COMPILER_ID}) - -if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") - if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.9.1) - # GCC >= 6.0 is required because of bug 48891. However, the fix seems to - # be backported so some older compilers which is why the code may also - # compile on gcc >= 4.9.1. - message(FATAL_ERROR "GCC version 4.9.1 or higher required") - endif() -endif() - -if (BUILD_RT_VISUALIZATION) - add_definitions(-DAC_BUILD_RT_VISUALIZATION=1) - # SDL 2 - set(SDL2_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/3rdparty/SDL2/include/) - set(SDL2_LIBRARY_DIR ${CMAKE_SOURCE_DIR}/3rdparty/SDL2/build/) - set(SDL2_LIBRARY "SDL2") - include_directories(${SDL2_INCLUDE_DIR}) - link_directories(${SDL2_LIBRARY_DIR}) -endif() - -# CUDA -find_package(CUDA) -if (NOT CUDA_FOUND) - # find_package(CUDA REQUIRED) gives a confusing error message if it fails, - # therefore we print the reason here explicitly - message(FATAL_ERROR "CUDA not found") -endif() -include_directories(${CUDA_INCLUDE_DIRS}) - -# OpenMP -find_package(OpenMP) -if (NOT OPENMP_FOUND) - message(WARNING "OpenMP not found. All host-side concurrency disabled \ - (lower performance).") -else () - set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") -endif() - -#----------------------Compilation settings-----------------------------------# - -#Debug and verification -#set(CMAKE_VERBOSE_MAKEFILE OFF) -#set(CXX_VERBOSE_BUILD OFF) -#set(CUDA_VERBOSE_BUILD OFF) -#include(CTest) -#add_test(ac_test ac_run) -#find_program(MEMORYCHECK_COMMAND valgrind) -#set(MEMORYCHECK_COMMAND_OPTIONS "--trace-children=yes --leak-check=full" ) - - -#----------------------Setup defines------------------------------------------# - +## Defines if (DOUBLE_PRECISION) - add_definitions(-DAC_DOUBLE_PRECISION=1) -else() + add_definitions(-DAC_DOUBLE_PRECISION=1) +else () add_definitions(-DAC_DOUBLE_PRECISION=0) -endif() - -# A full integration step is benchmarked by default, use this flag to override and -# benchmark RK3 only -if (GEN_BENCHMARK_RK3) - add_definitions(-DGEN_BENCHMARK_RK3=1) -else() - add_definitions(-DGEN_BENCHMARK_RK3=0) -endif() - +endif () if (MULTIGPU_ENABLED) add_definitions(-DAC_MULTIGPU_ENABLED=1) -else() +else () add_definitions(-DAC_MULTIGPU_ENABLED=0) -endif() +endif () -#-----------------------TIARA specific options--------------------------------# -#OLD#set (CXX_FLAGS_TIARA "-I/software/opt/cuda/9.0/include/") -# %JP: NOTE! This should not be needed anymore because the command -# find_package(CUDA) above should find and include this directory automatically -#USE THIS: -if (TIARA_CLUSTER) - set (CXX_FLAGS_TIARA "-mno-bmi2") -endif() - -#----------------------Setup CXX compilation flags----------------------------# -set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}\ - -O2 -march=native -pipe") - -set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}\ - -O0 -g") - -if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") - set (CXX_FLAGS_WARNING "-Wall -Wextra -Werror -Wdouble-promotion -Wfloat-conversion") # TODO: -Wshadow -Wconversion -elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel") - #MV: -Werror-all disabled because produces cryptical messages preventing compilation. - #TODO: Would be good to find an optimal set of warning flags. - #set (CXX_FLAGS_WARNING "-Wall -Wextra -Werror-all -Wsign-conversion") - set (CXX_FLAGS_WARNING "-Wall -Wextra -Wsign-conversion") -else() - message(WARNING "Using an unknown compiler. Compilation warning flags were not set.") -endif() - -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}\ - ${CXX_FLAGS_WARNING}\ - ${CXX_FLAGS_ETC}\ - ${CXX_FLAGS_TIARA}") # %JP: CXX_FLAGS_TIARA should not be needed, - # see comments in "TIARA specific options" - -message("CXX_FLAGS: " ${CMAKE_CXX_FLAGS}) - - -#----------------------Setup core subdirectories------------------------------# - -#Include root directory (.) so that the following modules can include their -#parent dir (f.ex. #include "common/stuff.h" instead of "../common/stuff") -include_directories(.) +## Include directories include_directories(include) -include_directories(src) +include_directories(.) # TODO remove +include_directories(src) # TODO remove -# CUDA sources +## Subdirectories add_subdirectory(src/core) - -#----------------------Link---------------------------------------------------# - if (BUILD_STANDALONE) - #Define the config directory - if (ALTER_CONF) - set(ASTAROTH_CONF_PATH "${CMAKE_BINARY_DIR}/") - else() - set(ASTAROTH_CONF_PATH "${CMAKE_SOURCE_DIR}/config/") - endif() - - #Add additional subdirectories - add_subdirectory (src/standalone) - cuda_add_executable(ac_run src/standalone/main.cc) - target_link_libraries(ac_run astaroth_standalone astaroth_core ${SDL2_LIBRARY}) -endif() - -add_subdirectory(ctest) + add_subdirectory(src/standalone) +endif () diff --git a/config/astaroth.conf b/config/astaroth.conf index 41b7e51..32f50a3 100644 --- a/config/astaroth.conf +++ b/config/astaroth.conf @@ -40,7 +40,7 @@ AC_chi = 0.0001 AC_relhel = 0.0 AC_forcing_magnitude = 1e-5 AC_kmin = 0.8 -AC_kmax = 1.2 +AC_kmax = 1.2 // Entropy diff --git a/include/astaroth_defines.h b/include/astaroth_defines.h index 0915d79..eb89daf 100644 --- a/include/astaroth_defines.h +++ b/include/astaroth_defines.h @@ -22,9 +22,27 @@ extern "C" { #endif -#include // FLT_EPSILON, etc -#include // size_t -#include // CUDA vector types (float4, etc) +#include // FLT_EPSILON, etc +#include // size_t +//#include // CUDA vector types (float4, etc) + +#ifndef __CUDACC__ +typedef struct { + int x, y, z; +} int3; + +typedef struct { + float x, y; +} float2; + +typedef struct { + float x, y, z; +} float3; + +typedef struct { + double x, y, z; +} double3; +#endif // __CUDACC__ #include "stencil_defines.h" diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 79560c9..1177995 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -2,59 +2,30 @@ ## CMakeLists.txt for Astaroth Core ## ######################################## -#----------------------Find CUDA-----------------------------------------------# - +## Find packages find_package(CUDA 9 REQUIRED) -#----------------------CUDA settings-------------------------------------------# - -set(CUDA_SEPARABLE_COMPILATION OFF) -set(CUDA_PROPAGATE_HOST_FLAGS ON) - -#----------------------Setup CUDA compilation flags----------------------------# - -# Generate code for the default architecture (Pascal) +## Architecture and optimization flags set(CUDA_ARCH_FLAGS -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -lineinfo - -ftz=true - -std=c++11) #--maxrregcount=255 -ftz=true #ftz = flush denormalized floats to zero -# -Xptxas -dlcm=ca opt-in to cache all global loads to L1/texture cache -# =cg to opt out - -# Additional CUDA optimization flags -if (CMAKE_BUILD_TYPE MATCHES RELEASE) - # Doesn't set any additional flags, see CUDA_NVCC_FLAGS_DEBUG below on how - # to add more - set(CUDA_NVCC_FLAGS_RELEASE ${CUDA_NVCC_FLAGS_RELEASE}) -endif() - -# Additional CUDA debug flags -if (CMAKE_BUILD_TYPE MATCHES DEBUG) - # The debug flags must be set inside this if clause, since either CMake 3.5 - # or nvcc 7.5 is bugged: - # CMake converts these into empty strings when doing RELEASE build, but nvcc - # 7.5 fails to parse empty flags. - set(CUDA_NVCC_FLAGS_DEBUG ${CUDA_NVCC_FLAGS_DEBUG}; - --device-debug; - --generate-line-info; - --ptxas-options=-v) -endif() - -set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};${CUDA_ARCH_FLAGS}") + -ftz=true # Flush denormalized floats to zero + -std=c++11, + --compiler-options -march=native) # Native host machine code + #--maxrregcount=255 + # -Xptxas -dlcm=ca opt-in to cache all global loads to L1/texture cache + # =cg to opt out -message("CUDA_NVCC_FLAGS: " ${CUDA_NVCC_FLAGS}) +set(CUDA_WARNING_FLAGS --compiler-options -Wall,-Wextra,-Werror,-Wdouble-promotion,-Wfloat-conversion) # -Wshadow +set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ${CUDA_ARCH_FLAGS} ${CUDA_WARNING_FLAGS}) +set(CUDA_NVCC_FLAGS_RELEASE) +set(CUDA_NVCC_FLAGS_DEBUG --device-debug --generate-line-info --ptxas-options=-v) -#------------------Compile and create a static library-------------------------# -file(GLOB CUDA_SOURCES "*.cu" "kernels/*.cu") - -# Use -fPIC if -fpic not supported. Some quick non-scientific tests: -# Without fpic: 4.94 user, 4.04 system, 0:09.88 elapsed -# With fpic: 4.96 user, 4.02 system, 0:09.90 elapsed -# With fPIC: 4.94 user, 4.05 system, 0:10.23 elapsed -CUDA_ADD_LIBRARY(astaroth_core STATIC ${CUDA_SOURCES} OPTIONS --compiler-options "-fpic") +## Create and link the library +include_directories(.) +cuda_add_library(astaroth_core STATIC astaroth.cu device.cu) target_link_libraries(astaroth_core m) diff --git a/src/core/kernels/kernels.cuh b/src/core/kernels/kernels.cuh index 8977d07..2c7d876 100644 --- a/src/core/kernels/kernels.cuh +++ b/src/core/kernels/kernels.cuh @@ -708,7 +708,7 @@ read_out(const int idx, AcReal* __restrict__ field[], const int3 handle) */ ////////////////REDUCE/////////////////////////// -#include "src/core/math_utils.h" // is_power_of_two +#include "math_utils.h" // is_power_of_two /* Reduction steps: diff --git a/src/standalone/CMakeLists.txt b/src/standalone/CMakeLists.txt index c6b535b..bb530e5 100644 --- a/src/standalone/CMakeLists.txt +++ b/src/standalone/CMakeLists.txt @@ -1,10 +1,34 @@ -################################ -## CMakeLists.txt for utils ## -################################ +############################################## +## CMakeLists.txt for Astaroth Standalone ## +############################################## +## Files file (GLOB SOURCES "*.cc" "model/*.cc") -add_library(astaroth_standalone STATIC ${SOURCES}) -target_include_directories(astaroth_standalone PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) -#target_compile_definitions(astaroth_standalone PRIVATE CONFIG_PATH=\"${CMAKE_SOURCE_DIR}/config/\") -target_compile_definitions(astaroth_standalone PRIVATE CONFIG_PATH=\"${ASTAROTH_CONF_PATH}\") +## Find packages +find_package(OpenMP REQUIRED) +if (BUILD_RT_VISUALIZATION) + add_definitions(-DAC_BUILD_RT_VISUALIZATION=1) + # SDL 2 + set(SDL2_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/3rdparty/SDL2/include/) + set(SDL2_LIBRARY_DIR ${CMAKE_SOURCE_DIR}/3rdparty/SDL2/build/) + set(SDL2_LIBRARY "SDL2") + include_directories(${SDL2_INCLUDE_DIR}) + link_directories(${SDL2_LIBRARY_DIR}) +endif () + + +## Compilation flags +add_compile_options(-march=native -pipe ${OpenMP_CXX_FLAGS}) +add_compile_options(-Wall -Wextra -Werror -Wdouble-promotion -Wfloat-conversion)# -Wshadow) + +## Compile and link +add_executable(ac_run ${SOURCES}) +target_link_libraries(ac_run PRIVATE "${OpenMP_CXX_FLAGS}" astaroth_core ${SDL2_LIBRARY}) + +# Define the config directory +if (ALTER_CONF) + target_compile_definitions(ac_run PRIVATE CONFIG_PATH="${CMAKE_BINARY_DIR}/") +else() + target_compile_definitions(ac_run PRIVATE CONFIG_PATH="${CMAKE_SOURCE_DIR}/config/") +endif() diff --git a/src/standalone/model/model_boundconds.cc b/src/standalone/model/model_boundconds.cc index 188b97e..f7a6aca 100644 --- a/src/standalone/model/model_boundconds.cc +++ b/src/standalone/model/model_boundconds.cc @@ -28,71 +28,66 @@ #include "core/errchk.h" - void boundconds(const AcMeshInfo& mesh_info, ModelMesh* mesh) { - #pragma omp parallel for +#pragma omp parallel for for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w) { const int3 start = (int3){0, 0, 0}; - const int3 end = (int3){ - mesh_info.int_params[AC_mx], - mesh_info.int_params[AC_my], - mesh_info.int_params[AC_mz] - }; + const int3 end = (int3){mesh_info.int_params[AC_mx], mesh_info.int_params[AC_my], + mesh_info.int_params[AC_mz]}; const int nx = mesh_info.int_params[AC_nx]; const int ny = mesh_info.int_params[AC_ny]; const int nz = mesh_info.int_params[AC_nz]; - const int nx_min = mesh_info.int_params[AC_nx_min]; - const int ny_min = mesh_info.int_params[AC_ny_min]; - const int nz_min = mesh_info.int_params[AC_nz_min]; + const int nx_min = mesh_info.int_params[AC_nx_min]; + const int ny_min = mesh_info.int_params[AC_ny_min]; + const int nz_min = mesh_info.int_params[AC_nz_min]; - // The old kxt was inclusive, but our mx_max is exclusive - const int nx_max = mesh_info.int_params[AC_nx_max]; - const int ny_max = mesh_info.int_params[AC_ny_max]; - const int nz_max = mesh_info.int_params[AC_nz_max]; + // The old kxt was inclusive, but our mx_max is exclusive + const int nx_max = mesh_info.int_params[AC_nx_max]; + const int ny_max = mesh_info.int_params[AC_ny_max]; + const int nz_max = mesh_info.int_params[AC_nz_max]; for (int k_dst = start.z; k_dst < end.z; ++k_dst) { - for (int j_dst = start.y; j_dst < end.y; ++j_dst) { - for (int i_dst = start.x; i_dst < end.x; ++i_dst) { + for (int j_dst = start.y; j_dst < end.y; ++j_dst) { + for (int i_dst = start.x; i_dst < end.x; ++i_dst) { - // If destination index is inside the computational domain, return since - // the boundary conditions are only applied to the ghost zones - if (i_dst >= nx_min && i_dst < nx_max && - j_dst >= ny_min && j_dst < ny_max && - k_dst >= nz_min && k_dst < nz_max) - continue; + // If destination index is inside the computational domain, return since + // the boundary conditions are only applied to the ghost zones + if (i_dst >= nx_min && i_dst < nx_max && j_dst >= ny_min && j_dst < ny_max && + k_dst >= nz_min && k_dst < nz_max) + continue; - // Find the source index - // Map to nx, ny, nz coordinates - int i_src = i_dst - nx_min; - int j_src = j_dst - ny_min; - int k_src = k_dst - nz_min; + // Find the source index + // Map to nx, ny, nz coordinates + int i_src = i_dst - nx_min; + int j_src = j_dst - ny_min; + int k_src = k_dst - nz_min; - // Translate (s.t. the index is always positive) - i_src += nx; - j_src += ny; - k_src += nz; + // Translate (s.t. the index is always positive) + i_src += nx; + j_src += ny; + k_src += nz; - // Wrap - i_src %= nx; - j_src %= ny; - k_src %= nz; + // Wrap + i_src %= nx; + j_src %= ny; + k_src %= nz; - // Map to mx, my, mz coordinates - i_src += nx_min; - j_src += ny_min; - k_src += nz_min; + // Map to mx, my, mz coordinates + i_src += nx_min; + j_src += ny_min; + k_src += nz_min; - const size_t src_idx = acVertexBufferIdx(i_src, j_src, k_src, mesh_info); - const size_t dst_idx = acVertexBufferIdx(i_dst, j_dst, k_dst, mesh_info); - ERRCHK(src_idx < acVertexBufferSize(mesh_info)); - ERRCHK(dst_idx < acVertexBufferSize(mesh_info)); - mesh->vertex_buffer[w][dst_idx] = mesh->vertex_buffer[w][src_idx]; - } - } + const size_t src_idx = acVertexBufferIdx(i_src, j_src, k_src, mesh_info); + const size_t dst_idx = acVertexBufferIdx(i_dst, j_dst, k_dst, mesh_info); + ERRCHK(src_idx < acVertexBufferSize(mesh_info)); + ERRCHK(dst_idx < acVertexBufferSize(mesh_info)); + mesh->vertex_buffer[w][dst_idx] = mesh->vertex_buffer[w][src_idx]; + } + } } } }