Rewrote all CMakeLists. Now much cleaner and there's a clear separation during compilation between the core and standalone modules.

2019-07-23 20:50:37 +03:00
parent b65454d523
commit f322bc8b37
7 changed files with 148 additions and 259 deletions
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -2,59 +2,30 @@
 ##  CMakeLists.txt for Astaroth Core  ##
 ########################################

-#----------------------Find CUDA-----------------------------------------------#
-
+## Find packages
 find_package(CUDA 9 REQUIRED)

-#----------------------CUDA settings-------------------------------------------#
-
-set(CUDA_SEPARABLE_COMPILATION OFF)
-set(CUDA_PROPAGATE_HOST_FLAGS ON)
-
-#----------------------Setup CUDA compilation flags----------------------------#
-
-# Generate code for the default architecture (Pascal)
+## Architecture and optimization flags
 set(CUDA_ARCH_FLAGS -gencode arch=compute_37,code=sm_37
                    -gencode arch=compute_50,code=sm_50
                    -gencode arch=compute_60,code=sm_60
                    -gencode arch=compute_61,code=sm_61
                    -lineinfo
-                    -ftz=true
-                    -std=c++11) #--maxrregcount=255 -ftz=true #ftz = flush denormalized floats to zero
-# -Xptxas -dlcm=ca opt-in to cache all global loads to L1/texture cache
-# =cg to opt out
-
-# Additional CUDA optimization flags
-if (CMAKE_BUILD_TYPE MATCHES RELEASE)
-    # Doesn't set any additional flags, see CUDA_NVCC_FLAGS_DEBUG below on how
-    # to add more
-    set(CUDA_NVCC_FLAGS_RELEASE ${CUDA_NVCC_FLAGS_RELEASE})
-endif()
-
-# Additional CUDA debug flags
-if (CMAKE_BUILD_TYPE MATCHES DEBUG)
-    # The debug flags must be set inside this if clause, since either CMake 3.5
-    # or nvcc 7.5 is bugged:
-    # CMake converts these into empty strings when doing RELEASE build, but nvcc
-    # 7.5 fails to parse empty flags.
-    set(CUDA_NVCC_FLAGS_DEBUG ${CUDA_NVCC_FLAGS_DEBUG};
-                               --device-debug;
-                               --generate-line-info;
-                               --ptxas-options=-v)
-endif()
-
-set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};${CUDA_ARCH_FLAGS}")
+                    -ftz=true # Flush denormalized floats to zero
+                    -std=c++11,
+                    --compiler-options -march=native) # Native host machine code
+                    #--maxrregcount=255
+                    # -Xptxas -dlcm=ca opt-in to cache all global loads to L1/texture cache
+                    # =cg to opt out


-message("CUDA_NVCC_FLAGS: " ${CUDA_NVCC_FLAGS})
+set(CUDA_WARNING_FLAGS --compiler-options -Wall,-Wextra,-Werror,-Wdouble-promotion,-Wfloat-conversion) # -Wshadow

+set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ${CUDA_ARCH_FLAGS} ${CUDA_WARNING_FLAGS})
+set(CUDA_NVCC_FLAGS_RELEASE)
+set(CUDA_NVCC_FLAGS_DEBUG --device-debug --generate-line-info --ptxas-options=-v)

-#------------------Compile and create a static library-------------------------#
-file(GLOB CUDA_SOURCES "*.cu" "kernels/*.cu")
-
-# Use -fPIC if -fpic not supported. Some quick non-scientific tests:
-# Without fpic: 4.94 user, 4.04 system, 0:09.88 elapsed
-# With fpic: 4.96 user, 4.02 system, 0:09.90 elapsed
-# With fPIC: 4.94 user, 4.05 system, 0:10.23 elapsed
-CUDA_ADD_LIBRARY(astaroth_core STATIC ${CUDA_SOURCES} OPTIONS --compiler-options "-fpic")
+## Create and link the library
+include_directories(.)
+cuda_add_library(astaroth_core STATIC astaroth.cu device.cu)
 target_link_libraries(astaroth_core m)
--- a/src/core/kernels/kernels.cuh
+++ b/src/core/kernels/kernels.cuh
@@ -708,7 +708,7 @@ read_out(const int idx, AcReal* __restrict__ field[], const int3 handle)
 */

 ////////////////REDUCE///////////////////////////
-#include "src/core/math_utils.h" // is_power_of_two
+#include "math_utils.h" // is_power_of_two

 /*
 Reduction steps:
--- a/src/standalone/CMakeLists.txt
+++ b/src/standalone/CMakeLists.txt
@@ -1,10 +1,34 @@
-################################
-##  CMakeLists.txt for utils  ##
-################################
+##############################################
+##  CMakeLists.txt for Astaroth Standalone  ##
+##############################################

+## Files
 file (GLOB SOURCES "*.cc" "model/*.cc")

-add_library(astaroth_standalone STATIC ${SOURCES})
-target_include_directories(astaroth_standalone PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
-#target_compile_definitions(astaroth_standalone PRIVATE CONFIG_PATH=\"${CMAKE_SOURCE_DIR}/config/\")
-target_compile_definitions(astaroth_standalone PRIVATE CONFIG_PATH=\"${ASTAROTH_CONF_PATH}\")
+## Find packages
+find_package(OpenMP REQUIRED)
+if (BUILD_RT_VISUALIZATION)
+    add_definitions(-DAC_BUILD_RT_VISUALIZATION=1)
+    # SDL 2
+    set(SDL2_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/3rdparty/SDL2/include/)
+    set(SDL2_LIBRARY_DIR ${CMAKE_SOURCE_DIR}/3rdparty/SDL2/build/)
+    set(SDL2_LIBRARY "SDL2")
+    include_directories(${SDL2_INCLUDE_DIR})
+    link_directories(${SDL2_LIBRARY_DIR})
+endif ()
+
+
+## Compilation flags
+add_compile_options(-march=native -pipe ${OpenMP_CXX_FLAGS})
+add_compile_options(-Wall -Wextra -Werror -Wdouble-promotion -Wfloat-conversion)# -Wshadow)
+
+## Compile and link
+add_executable(ac_run ${SOURCES})
+target_link_libraries(ac_run PRIVATE "${OpenMP_CXX_FLAGS}" astaroth_core ${SDL2_LIBRARY})
+
+# Define the config directory
+if (ALTER_CONF)
+    target_compile_definitions(ac_run PRIVATE CONFIG_PATH="${CMAKE_BINARY_DIR}/")
+else()
+    target_compile_definitions(ac_run PRIVATE CONFIG_PATH="${CMAKE_SOURCE_DIR}/config/")
+endif()
--- a/src/standalone/model/model_boundconds.cc
+++ b/src/standalone/model/model_boundconds.cc
@@ -28,71 +28,66 @@

 #include "core/errchk.h"

-
 void
 boundconds(const AcMeshInfo& mesh_info, ModelMesh* mesh)
 {
-    #pragma omp parallel for
+#pragma omp parallel for
    for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w) {
        const int3 start = (int3){0, 0, 0};
-        const int3 end = (int3){
-            mesh_info.int_params[AC_mx],
-            mesh_info.int_params[AC_my],
-            mesh_info.int_params[AC_mz]
-        };
+        const int3 end   = (int3){mesh_info.int_params[AC_mx], mesh_info.int_params[AC_my],
+                                mesh_info.int_params[AC_mz]};

        const int nx = mesh_info.int_params[AC_nx];
        const int ny = mesh_info.int_params[AC_ny];
        const int nz = mesh_info.int_params[AC_nz];

-         const int nx_min = mesh_info.int_params[AC_nx_min];
-         const int ny_min = mesh_info.int_params[AC_ny_min];
-         const int nz_min = mesh_info.int_params[AC_nz_min];
+        const int nx_min = mesh_info.int_params[AC_nx_min];
+        const int ny_min = mesh_info.int_params[AC_ny_min];
+        const int nz_min = mesh_info.int_params[AC_nz_min];

-         // The old kxt was inclusive, but our mx_max is exclusive
-         const int nx_max = mesh_info.int_params[AC_nx_max];
-         const int ny_max = mesh_info.int_params[AC_ny_max];
-         const int nz_max = mesh_info.int_params[AC_nz_max];
+        // The old kxt was inclusive, but our mx_max is exclusive
+        const int nx_max = mesh_info.int_params[AC_nx_max];
+        const int ny_max = mesh_info.int_params[AC_ny_max];
+        const int nz_max = mesh_info.int_params[AC_nz_max];

        for (int k_dst = start.z; k_dst < end.z; ++k_dst) {
-        for (int j_dst = start.y; j_dst < end.y; ++j_dst) {
-        for (int i_dst = start.x; i_dst < end.x; ++i_dst) {
+            for (int j_dst = start.y; j_dst < end.y; ++j_dst) {
+                for (int i_dst = start.x; i_dst < end.x; ++i_dst) {

-            // If destination index is inside the computational domain, return since
-            // the boundary conditions are only applied to the ghost zones
-            if (i_dst >= nx_min && i_dst < nx_max &&
-                j_dst >= ny_min && j_dst < ny_max &&
-                k_dst >= nz_min && k_dst < nz_max)
-                continue;
+                    // If destination index is inside the computational domain, return since
+                    // the boundary conditions are only applied to the ghost zones
+                    if (i_dst >= nx_min && i_dst < nx_max && j_dst >= ny_min && j_dst < ny_max &&
+                        k_dst >= nz_min && k_dst < nz_max)
+                        continue;

-            // Find the source index
-            // Map to nx, ny, nz coordinates
-            int i_src = i_dst - nx_min;
-            int j_src = j_dst - ny_min;
-            int k_src = k_dst - nz_min;
+                    // Find the source index
+                    // Map to nx, ny, nz coordinates
+                    int i_src = i_dst - nx_min;
+                    int j_src = j_dst - ny_min;
+                    int k_src = k_dst - nz_min;

-            // Translate (s.t. the index is always positive)
-            i_src += nx;
-            j_src += ny;
-            k_src += nz;
+                    // Translate (s.t. the index is always positive)
+                    i_src += nx;
+                    j_src += ny;
+                    k_src += nz;

-            // Wrap
-            i_src %= nx;
-            j_src %= ny;
-            k_src %= nz;
+                    // Wrap
+                    i_src %= nx;
+                    j_src %= ny;
+                    k_src %= nz;

-            // Map to mx, my, mz coordinates
-            i_src += nx_min;
-            j_src += ny_min;
-            k_src += nz_min;
+                    // Map to mx, my, mz coordinates
+                    i_src += nx_min;
+                    j_src += ny_min;
+                    k_src += nz_min;

-            const size_t src_idx      = acVertexBufferIdx(i_src, j_src, k_src, mesh_info);
-            const size_t dst_idx      = acVertexBufferIdx(i_dst, j_dst, k_dst, mesh_info);
-            ERRCHK(src_idx < acVertexBufferSize(mesh_info));
-            ERRCHK(dst_idx < acVertexBufferSize(mesh_info));
-            mesh->vertex_buffer[w][dst_idx] = mesh->vertex_buffer[w][src_idx];
-        }
-        }
+                    const size_t src_idx = acVertexBufferIdx(i_src, j_src, k_src, mesh_info);
+                    const size_t dst_idx = acVertexBufferIdx(i_dst, j_dst, k_dst, mesh_info);
+                    ERRCHK(src_idx < acVertexBufferSize(mesh_info));
+                    ERRCHK(dst_idx < acVertexBufferSize(mesh_info));
+                    mesh->vertex_buffer[w][dst_idx] = mesh->vertex_buffer[w][src_idx];
+                }
+            }
        }
    }
 }