Rewrote all CMakeLists. Now much cleaner and there's a clear separation during compilation between the core and standalone modules.

This commit is contained in:
jpekkila
2019-07-23 20:50:37 +03:00
parent b65454d523
commit f322bc8b37
7 changed files with 148 additions and 259 deletions

View File

@@ -1,36 +1,36 @@
###################################
## CMakeLists.txt for Astaroth ##
###################################
#
# CMakeLists.txt for generating the makefile for Astaroth.
# Usage: mkdir build && cd build && cmake <optional flags> ..
# Usage: mkdir build && cd build && cmake <options> .. && make
#
# For example: cmake -DDOUBLE_PRECISION=ON ..
# If you want to see the exact flags used during compilation, compile with
# "make VERBOSE=1"
#
# If you want to see the exact flags used during compilation, run
# "make -j VERBOSE=1"
# Print all options: cmake -LAH ..
#
# Make sure your machine satisfies the system requirements:
# https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#system-requirements
#-------------------General---------------------------------------------------#
project(ASTAROTH_2.0 C CXX)
set (CMAKE_CXX_STANDARD 11)
## CMake settings
cmake_minimum_required (VERSION 3.5.1) # Need >= 3.8 for first-class CUDA support
cmake_policy (SET CMP0023 NEW)
## Project settings
project(astaroth CXX)
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR})
#-------------------Set user options with default values---------------------#
#Usage f.ex. cmake -DBUILD_DEBUG=ON ..
## Options
option(BUILD_DEBUG "Builds the program with extensive error checking" OFF)
option(BUILD_STANDALONE "Builds standalone Astaroth" ON)
option(BUILD_STANDALONE "Builds the standalone Astaroth" ON)
option(BUILD_RT_VISUALIZATION "Builds the module for real-time visualization using SDL2" OFF)
option(DOUBLE_PRECISION "Generates double precision code" OFF)
option(TIARA_CLUSTER "Special settings for compilation TIARA GPU cluster" OFF)
option(MULTIGPU_ENABLED "If enabled, uses all the available GPUs" ON)
option(ALTER_CONF "If enabled, loads astaroth.conf from the build directory" OFF)
option(BUILD_RT_VISUALIZATION "Builds the module for real-time visualization using SDL2" OFF)
#-------------------Determine build type--------------------------------------#
## Build types
# Available types (case-sensitive):
# RELEASE (best performance)
# DEBUG (w/ debug information, non-concurrent kernels)
@@ -42,144 +42,25 @@ endif()
message(STATUS "Build type: " ${CMAKE_BUILD_TYPE})
#----------------------Find packages------------------------------------------#
# C++ compiler info
message(STATUS "CMAKE_CXX_COMPILER: " ${CMAKE_CXX_COMPILER})
message(STATUS "CMAKE_CXX_COMPILER: " ${CMAKE_CXX_COMPILER_ID})
if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.9.1)
# GCC >= 6.0 is required because of bug 48891. However, the fix seems to
# be backported so some older compilers which is why the code may also
# compile on gcc >= 4.9.1.
message(FATAL_ERROR "GCC version 4.9.1 or higher required")
endif()
endif()
if (BUILD_RT_VISUALIZATION)
add_definitions(-DAC_BUILD_RT_VISUALIZATION=1)
# SDL 2
set(SDL2_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/3rdparty/SDL2/include/)
set(SDL2_LIBRARY_DIR ${CMAKE_SOURCE_DIR}/3rdparty/SDL2/build/)
set(SDL2_LIBRARY "SDL2")
include_directories(${SDL2_INCLUDE_DIR})
link_directories(${SDL2_LIBRARY_DIR})
endif()
# CUDA
find_package(CUDA)
if (NOT CUDA_FOUND)
# find_package(CUDA REQUIRED) gives a confusing error message if it fails,
# therefore we print the reason here explicitly
message(FATAL_ERROR "CUDA not found")
endif()
include_directories(${CUDA_INCLUDE_DIRS})
# OpenMP
find_package(OpenMP)
if (NOT OPENMP_FOUND)
message(WARNING "OpenMP not found. All host-side concurrency disabled \
(lower performance).")
else ()
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
endif()
#----------------------Compilation settings-----------------------------------#
#Debug and verification
#set(CMAKE_VERBOSE_MAKEFILE OFF)
#set(CXX_VERBOSE_BUILD OFF)
#set(CUDA_VERBOSE_BUILD OFF)
#include(CTest)
#add_test(ac_test ac_run)
#find_program(MEMORYCHECK_COMMAND valgrind)
#set(MEMORYCHECK_COMMAND_OPTIONS "--trace-children=yes --leak-check=full" )
#----------------------Setup defines------------------------------------------#
## Defines
if (DOUBLE_PRECISION)
add_definitions(-DAC_DOUBLE_PRECISION=1)
else ()
add_definitions(-DAC_DOUBLE_PRECISION=0)
endif ()
# A full integration step is benchmarked by default, use this flag to override and
# benchmark RK3 only
if (GEN_BENCHMARK_RK3)
add_definitions(-DGEN_BENCHMARK_RK3=1)
else()
add_definitions(-DGEN_BENCHMARK_RK3=0)
endif()
if (MULTIGPU_ENABLED)
add_definitions(-DAC_MULTIGPU_ENABLED=1)
else ()
add_definitions(-DAC_MULTIGPU_ENABLED=0)
endif ()
#-----------------------TIARA specific options--------------------------------#
#OLD#set (CXX_FLAGS_TIARA "-I/software/opt/cuda/9.0/include/")
# %JP: NOTE! This should not be needed anymore because the command
# find_package(CUDA) above should find and include this directory automatically
#USE THIS:
if (TIARA_CLUSTER)
set (CXX_FLAGS_TIARA "-mno-bmi2")
endif()
#----------------------Setup CXX compilation flags----------------------------#
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}\
-O2 -march=native -pipe")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}\
-O0 -g")
if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
set (CXX_FLAGS_WARNING "-Wall -Wextra -Werror -Wdouble-promotion -Wfloat-conversion") # TODO: -Wshadow -Wconversion
elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel")
#MV: -Werror-all disabled because produces cryptical messages preventing compilation.
#TODO: Would be good to find an optimal set of warning flags.
#set (CXX_FLAGS_WARNING "-Wall -Wextra -Werror-all -Wsign-conversion")
set (CXX_FLAGS_WARNING "-Wall -Wextra -Wsign-conversion")
else()
message(WARNING "Using an unknown compiler. Compilation warning flags were not set.")
endif()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}\
${CXX_FLAGS_WARNING}\
${CXX_FLAGS_ETC}\
${CXX_FLAGS_TIARA}") # %JP: CXX_FLAGS_TIARA should not be needed,
# see comments in "TIARA specific options"
message("CXX_FLAGS: " ${CMAKE_CXX_FLAGS})
#----------------------Setup core subdirectories------------------------------#
#Include root directory (.) so that the following modules can include their
#parent dir (f.ex. #include "common/stuff.h" instead of "../common/stuff")
include_directories(.)
## Include directories
include_directories(include)
include_directories(src)
include_directories(.) # TODO remove
include_directories(src) # TODO remove
# CUDA sources
## Subdirectories
add_subdirectory(src/core)
#----------------------Link---------------------------------------------------#
if (BUILD_STANDALONE)
#Define the config directory
if (ALTER_CONF)
set(ASTAROTH_CONF_PATH "${CMAKE_BINARY_DIR}/")
else()
set(ASTAROTH_CONF_PATH "${CMAKE_SOURCE_DIR}/config/")
endif()
#Add additional subdirectories
add_subdirectory(src/standalone)
cuda_add_executable(ac_run src/standalone/main.cc)
target_link_libraries(ac_run astaroth_standalone astaroth_core ${SDL2_LIBRARY})
endif ()
add_subdirectory(ctest)

View File

@@ -24,7 +24,25 @@ extern "C" {
#include <float.h> // FLT_EPSILON, etc
#include <stdlib.h> // size_t
#include <vector_types.h> // CUDA vector types (float4, etc)
//#include <vector_types.h> // CUDA vector types (float4, etc)
#ifndef __CUDACC__
typedef struct {
int x, y, z;
} int3;
typedef struct {
float x, y;
} float2;
typedef struct {
float x, y, z;
} float3;
typedef struct {
double x, y, z;
} double3;
#endif // __CUDACC__
#include "stencil_defines.h"

View File

@@ -2,59 +2,30 @@
## CMakeLists.txt for Astaroth Core ##
########################################
#----------------------Find CUDA-----------------------------------------------#
## Find packages
find_package(CUDA 9 REQUIRED)
#----------------------CUDA settings-------------------------------------------#
set(CUDA_SEPARABLE_COMPILATION OFF)
set(CUDA_PROPAGATE_HOST_FLAGS ON)
#----------------------Setup CUDA compilation flags----------------------------#
# Generate code for the default architecture (Pascal)
## Architecture and optimization flags
set(CUDA_ARCH_FLAGS -gencode arch=compute_37,code=sm_37
-gencode arch=compute_50,code=sm_50
-gencode arch=compute_60,code=sm_60
-gencode arch=compute_61,code=sm_61
-lineinfo
-ftz=true
-std=c++11) #--maxrregcount=255 -ftz=true #ftz = flush denormalized floats to zero
-ftz=true # Flush denormalized floats to zero
-std=c++11,
--compiler-options -march=native) # Native host machine code
#--maxrregcount=255
# -Xptxas -dlcm=ca opt-in to cache all global loads to L1/texture cache
# =cg to opt out
# Additional CUDA optimization flags
if (CMAKE_BUILD_TYPE MATCHES RELEASE)
# Doesn't set any additional flags, see CUDA_NVCC_FLAGS_DEBUG below on how
# to add more
set(CUDA_NVCC_FLAGS_RELEASE ${CUDA_NVCC_FLAGS_RELEASE})
endif()
# Additional CUDA debug flags
if (CMAKE_BUILD_TYPE MATCHES DEBUG)
# The debug flags must be set inside this if clause, since either CMake 3.5
# or nvcc 7.5 is bugged:
# CMake converts these into empty strings when doing RELEASE build, but nvcc
# 7.5 fails to parse empty flags.
set(CUDA_NVCC_FLAGS_DEBUG ${CUDA_NVCC_FLAGS_DEBUG};
--device-debug;
--generate-line-info;
--ptxas-options=-v)
endif()
set(CUDA_WARNING_FLAGS --compiler-options -Wall,-Wextra,-Werror,-Wdouble-promotion,-Wfloat-conversion) # -Wshadow
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};${CUDA_ARCH_FLAGS}")
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ${CUDA_ARCH_FLAGS} ${CUDA_WARNING_FLAGS})
set(CUDA_NVCC_FLAGS_RELEASE)
set(CUDA_NVCC_FLAGS_DEBUG --device-debug --generate-line-info --ptxas-options=-v)
message("CUDA_NVCC_FLAGS: " ${CUDA_NVCC_FLAGS})
#------------------Compile and create a static library-------------------------#
file(GLOB CUDA_SOURCES "*.cu" "kernels/*.cu")
# Use -fPIC if -fpic not supported. Some quick non-scientific tests:
# Without fpic: 4.94 user, 4.04 system, 0:09.88 elapsed
# With fpic: 4.96 user, 4.02 system, 0:09.90 elapsed
# With fPIC: 4.94 user, 4.05 system, 0:10.23 elapsed
CUDA_ADD_LIBRARY(astaroth_core STATIC ${CUDA_SOURCES} OPTIONS --compiler-options "-fpic")
## Create and link the library
include_directories(.)
cuda_add_library(astaroth_core STATIC astaroth.cu device.cu)
target_link_libraries(astaroth_core m)

View File

@@ -708,7 +708,7 @@ read_out(const int idx, AcReal* __restrict__ field[], const int3 handle)
*/
////////////////REDUCE///////////////////////////
#include "src/core/math_utils.h" // is_power_of_two
#include "math_utils.h" // is_power_of_two
/*
Reduction steps:

View File

@@ -1,10 +1,34 @@
################################
## CMakeLists.txt for utils ##
################################
##############################################
## CMakeLists.txt for Astaroth Standalone ##
##############################################
## Files
file (GLOB SOURCES "*.cc" "model/*.cc")
add_library(astaroth_standalone STATIC ${SOURCES})
target_include_directories(astaroth_standalone PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
#target_compile_definitions(astaroth_standalone PRIVATE CONFIG_PATH=\"${CMAKE_SOURCE_DIR}/config/\")
target_compile_definitions(astaroth_standalone PRIVATE CONFIG_PATH=\"${ASTAROTH_CONF_PATH}\")
## Find packages
find_package(OpenMP REQUIRED)
if (BUILD_RT_VISUALIZATION)
add_definitions(-DAC_BUILD_RT_VISUALIZATION=1)
# SDL 2
set(SDL2_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/3rdparty/SDL2/include/)
set(SDL2_LIBRARY_DIR ${CMAKE_SOURCE_DIR}/3rdparty/SDL2/build/)
set(SDL2_LIBRARY "SDL2")
include_directories(${SDL2_INCLUDE_DIR})
link_directories(${SDL2_LIBRARY_DIR})
endif ()
## Compilation flags
add_compile_options(-march=native -pipe ${OpenMP_CXX_FLAGS})
add_compile_options(-Wall -Wextra -Werror -Wdouble-promotion -Wfloat-conversion)# -Wshadow)
## Compile and link
add_executable(ac_run ${SOURCES})
target_link_libraries(ac_run PRIVATE "${OpenMP_CXX_FLAGS}" astaroth_core ${SDL2_LIBRARY})
# Define the config directory
if (ALTER_CONF)
target_compile_definitions(ac_run PRIVATE CONFIG_PATH="${CMAKE_BINARY_DIR}/")
else()
target_compile_definitions(ac_run PRIVATE CONFIG_PATH="${CMAKE_SOURCE_DIR}/config/")
endif()

View File

@@ -28,18 +28,14 @@
#include "core/errchk.h"
void
boundconds(const AcMeshInfo& mesh_info, ModelMesh* mesh)
{
#pragma omp parallel for
for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w) {
const int3 start = (int3){0, 0, 0};
const int3 end = (int3){
mesh_info.int_params[AC_mx],
mesh_info.int_params[AC_my],
mesh_info.int_params[AC_mz]
};
const int3 end = (int3){mesh_info.int_params[AC_mx], mesh_info.int_params[AC_my],
mesh_info.int_params[AC_mz]};
const int nx = mesh_info.int_params[AC_nx];
const int ny = mesh_info.int_params[AC_ny];
@@ -60,8 +56,7 @@ boundconds(const AcMeshInfo& mesh_info, ModelMesh* mesh)
// If destination index is inside the computational domain, return since
// the boundary conditions are only applied to the ghost zones
if (i_dst >= nx_min && i_dst < nx_max &&
j_dst >= ny_min && j_dst < ny_max &&
if (i_dst >= nx_min && i_dst < nx_max && j_dst >= ny_min && j_dst < ny_max &&
k_dst >= nz_min && k_dst < nz_max)
continue;