Rewrote all CMakeLists. Now much cleaner and there's a clear separation during compilation between the core and standalone modules.

This commit is contained in:
jpekkila
2019-07-23 20:50:37 +03:00
parent b65454d523
commit f322bc8b37
7 changed files with 148 additions and 259 deletions

View File

@@ -2,59 +2,30 @@
## CMakeLists.txt for Astaroth Core ##
########################################
#----------------------Find CUDA-----------------------------------------------#
## Find packages
find_package(CUDA 9 REQUIRED)
#----------------------CUDA settings-------------------------------------------#
set(CUDA_SEPARABLE_COMPILATION OFF)
set(CUDA_PROPAGATE_HOST_FLAGS ON)
#----------------------Setup CUDA compilation flags----------------------------#
# Generate code for the default architecture (Pascal)
## Architecture and optimization flags
set(CUDA_ARCH_FLAGS -gencode arch=compute_37,code=sm_37
-gencode arch=compute_50,code=sm_50
-gencode arch=compute_60,code=sm_60
-gencode arch=compute_61,code=sm_61
-lineinfo
-ftz=true
-std=c++11) #--maxrregcount=255 -ftz=true #ftz = flush denormalized floats to zero
# -Xptxas -dlcm=ca opt-in to cache all global loads to L1/texture cache
# =cg to opt out
# Additional CUDA optimization flags
if (CMAKE_BUILD_TYPE MATCHES RELEASE)
# Doesn't set any additional flags, see CUDA_NVCC_FLAGS_DEBUG below on how
# to add more
set(CUDA_NVCC_FLAGS_RELEASE ${CUDA_NVCC_FLAGS_RELEASE})
endif()
# Additional CUDA debug flags
if (CMAKE_BUILD_TYPE MATCHES DEBUG)
# The debug flags must be set inside this if clause, since either CMake 3.5
# or nvcc 7.5 is bugged:
# CMake converts these into empty strings when doing RELEASE build, but nvcc
# 7.5 fails to parse empty flags.
set(CUDA_NVCC_FLAGS_DEBUG ${CUDA_NVCC_FLAGS_DEBUG};
--device-debug;
--generate-line-info;
--ptxas-options=-v)
endif()
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};${CUDA_ARCH_FLAGS}")
-ftz=true # Flush denormalized floats to zero
-std=c++11,
--compiler-options -march=native) # Native host machine code
#--maxrregcount=255
# -Xptxas -dlcm=ca opt-in to cache all global loads to L1/texture cache
# =cg to opt out
message("CUDA_NVCC_FLAGS: " ${CUDA_NVCC_FLAGS})
set(CUDA_WARNING_FLAGS --compiler-options -Wall,-Wextra,-Werror,-Wdouble-promotion,-Wfloat-conversion) # -Wshadow
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ${CUDA_ARCH_FLAGS} ${CUDA_WARNING_FLAGS})
set(CUDA_NVCC_FLAGS_RELEASE)
set(CUDA_NVCC_FLAGS_DEBUG --device-debug --generate-line-info --ptxas-options=-v)
#------------------Compile and create a static library-------------------------#
file(GLOB CUDA_SOURCES "*.cu" "kernels/*.cu")
# Use -fPIC if -fpic not supported. Some quick non-scientific tests:
# Without fpic: 4.94 user, 4.04 system, 0:09.88 elapsed
# With fpic: 4.96 user, 4.02 system, 0:09.90 elapsed
# With fPIC: 4.94 user, 4.05 system, 0:10.23 elapsed
CUDA_ADD_LIBRARY(astaroth_core STATIC ${CUDA_SOURCES} OPTIONS --compiler-options "-fpic")
## Create and link the library
include_directories(.)
cuda_add_library(astaroth_core STATIC astaroth.cu device.cu)
target_link_libraries(astaroth_core m)

View File

@@ -708,7 +708,7 @@ read_out(const int idx, AcReal* __restrict__ field[], const int3 handle)
*/
////////////////REDUCE///////////////////////////
#include "src/core/math_utils.h" // is_power_of_two
#include "math_utils.h" // is_power_of_two
/*
Reduction steps:

View File

@@ -1,10 +1,34 @@
################################
## CMakeLists.txt for utils ##
################################
##############################################
## CMakeLists.txt for Astaroth Standalone ##
##############################################
## Files
file (GLOB SOURCES "*.cc" "model/*.cc")
add_library(astaroth_standalone STATIC ${SOURCES})
target_include_directories(astaroth_standalone PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
#target_compile_definitions(astaroth_standalone PRIVATE CONFIG_PATH=\"${CMAKE_SOURCE_DIR}/config/\")
target_compile_definitions(astaroth_standalone PRIVATE CONFIG_PATH=\"${ASTAROTH_CONF_PATH}\")
## Find packages
find_package(OpenMP REQUIRED)
if (BUILD_RT_VISUALIZATION)
add_definitions(-DAC_BUILD_RT_VISUALIZATION=1)
# SDL 2
set(SDL2_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/3rdparty/SDL2/include/)
set(SDL2_LIBRARY_DIR ${CMAKE_SOURCE_DIR}/3rdparty/SDL2/build/)
set(SDL2_LIBRARY "SDL2")
include_directories(${SDL2_INCLUDE_DIR})
link_directories(${SDL2_LIBRARY_DIR})
endif ()
## Compilation flags
add_compile_options(-march=native -pipe ${OpenMP_CXX_FLAGS})
add_compile_options(-Wall -Wextra -Werror -Wdouble-promotion -Wfloat-conversion)# -Wshadow)
## Compile and link
add_executable(ac_run ${SOURCES})
target_link_libraries(ac_run PRIVATE "${OpenMP_CXX_FLAGS}" astaroth_core ${SDL2_LIBRARY})
# Define the config directory
if (ALTER_CONF)
target_compile_definitions(ac_run PRIVATE CONFIG_PATH="${CMAKE_BINARY_DIR}/")
else()
target_compile_definitions(ac_run PRIVATE CONFIG_PATH="${CMAKE_SOURCE_DIR}/config/")
endif()

View File

@@ -28,71 +28,66 @@
#include "core/errchk.h"
void
boundconds(const AcMeshInfo& mesh_info, ModelMesh* mesh)
{
#pragma omp parallel for
#pragma omp parallel for
for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w) {
const int3 start = (int3){0, 0, 0};
const int3 end = (int3){
mesh_info.int_params[AC_mx],
mesh_info.int_params[AC_my],
mesh_info.int_params[AC_mz]
};
const int3 end = (int3){mesh_info.int_params[AC_mx], mesh_info.int_params[AC_my],
mesh_info.int_params[AC_mz]};
const int nx = mesh_info.int_params[AC_nx];
const int ny = mesh_info.int_params[AC_ny];
const int nz = mesh_info.int_params[AC_nz];
const int nx_min = mesh_info.int_params[AC_nx_min];
const int ny_min = mesh_info.int_params[AC_ny_min];
const int nz_min = mesh_info.int_params[AC_nz_min];
const int nx_min = mesh_info.int_params[AC_nx_min];
const int ny_min = mesh_info.int_params[AC_ny_min];
const int nz_min = mesh_info.int_params[AC_nz_min];
// The old kxt was inclusive, but our mx_max is exclusive
const int nx_max = mesh_info.int_params[AC_nx_max];
const int ny_max = mesh_info.int_params[AC_ny_max];
const int nz_max = mesh_info.int_params[AC_nz_max];
// The old kxt was inclusive, but our mx_max is exclusive
const int nx_max = mesh_info.int_params[AC_nx_max];
const int ny_max = mesh_info.int_params[AC_ny_max];
const int nz_max = mesh_info.int_params[AC_nz_max];
for (int k_dst = start.z; k_dst < end.z; ++k_dst) {
for (int j_dst = start.y; j_dst < end.y; ++j_dst) {
for (int i_dst = start.x; i_dst < end.x; ++i_dst) {
for (int j_dst = start.y; j_dst < end.y; ++j_dst) {
for (int i_dst = start.x; i_dst < end.x; ++i_dst) {
// If destination index is inside the computational domain, return since
// the boundary conditions are only applied to the ghost zones
if (i_dst >= nx_min && i_dst < nx_max &&
j_dst >= ny_min && j_dst < ny_max &&
k_dst >= nz_min && k_dst < nz_max)
continue;
// If destination index is inside the computational domain, return since
// the boundary conditions are only applied to the ghost zones
if (i_dst >= nx_min && i_dst < nx_max && j_dst >= ny_min && j_dst < ny_max &&
k_dst >= nz_min && k_dst < nz_max)
continue;
// Find the source index
// Map to nx, ny, nz coordinates
int i_src = i_dst - nx_min;
int j_src = j_dst - ny_min;
int k_src = k_dst - nz_min;
// Find the source index
// Map to nx, ny, nz coordinates
int i_src = i_dst - nx_min;
int j_src = j_dst - ny_min;
int k_src = k_dst - nz_min;
// Translate (s.t. the index is always positive)
i_src += nx;
j_src += ny;
k_src += nz;
// Translate (s.t. the index is always positive)
i_src += nx;
j_src += ny;
k_src += nz;
// Wrap
i_src %= nx;
j_src %= ny;
k_src %= nz;
// Wrap
i_src %= nx;
j_src %= ny;
k_src %= nz;
// Map to mx, my, mz coordinates
i_src += nx_min;
j_src += ny_min;
k_src += nz_min;
// Map to mx, my, mz coordinates
i_src += nx_min;
j_src += ny_min;
k_src += nz_min;
const size_t src_idx = acVertexBufferIdx(i_src, j_src, k_src, mesh_info);
const size_t dst_idx = acVertexBufferIdx(i_dst, j_dst, k_dst, mesh_info);
ERRCHK(src_idx < acVertexBufferSize(mesh_info));
ERRCHK(dst_idx < acVertexBufferSize(mesh_info));
mesh->vertex_buffer[w][dst_idx] = mesh->vertex_buffer[w][src_idx];
}
}
const size_t src_idx = acVertexBufferIdx(i_src, j_src, k_src, mesh_info);
const size_t dst_idx = acVertexBufferIdx(i_dst, j_dst, k_dst, mesh_info);
ERRCHK(src_idx < acVertexBufferSize(mesh_info));
ERRCHK(dst_idx < acVertexBufferSize(mesh_info));
mesh->vertex_buffer[w][dst_idx] = mesh->vertex_buffer[w][src_idx];
}
}
}
}
}