From f322bc8b37c81c990a19ccf67c850a7e6d755d98 Mon Sep 17 00:00:00 2001
From: jpekkila <johannes.pekkila@aalto.fi>
Date: Tue, 23 Jul 2019 20:50:37 +0300
Subject: [PATCH] Rewrote all CMakeLists. Now much cleaner and there's a clear
 separation during compilation between the core and standalone modules.

---
 CMakeLists.txt                           | 195 +++++------------------
 config/astaroth.conf                     |   2 +-
 include/astaroth_defines.h               |  24 ++-
 src/core/CMakeLists.txt                  |  59 ++-----
 src/core/kernels/kernels.cuh             |   2 +-
 src/standalone/CMakeLists.txt            |  38 ++++-
 src/standalone/model/model_boundconds.cc |  87 +++++-----
 7 files changed, 148 insertions(+), 259 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index a3da8a2..1541682 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,39 +1,39 @@
+###################################
+##  CMakeLists.txt for Astaroth  ##
+###################################
 #
-# CMakeLists.txt for generating the makefile for Astaroth.
-#   Usage: mkdir build && cd build && cmake <optional flags> ..
+#   Usage: mkdir build && cd build && cmake <options> .. && make
 #
-#   For example: cmake -DDOUBLE_PRECISION=ON ..
+#   If you want to see the exact flags used during compilation, compile with
+#   "make VERBOSE=1"
 #
-#   If you want to see the exact flags used during compilation, run
-#   "make -j VERBOSE=1"
+#   Print all options: cmake -LAH ..
 #
-# Make sure your machine satisfies the system requirements:
-# https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#system-requirements
 
-#-------------------General---------------------------------------------------#
 
-project(ASTAROTH_2.0 C CXX)
-set (CMAKE_CXX_STANDARD 11)
+## CMake settings
 cmake_minimum_required (VERSION 3.5.1) # Need >= 3.8 for first-class CUDA support
-cmake_policy (SET CMP0023 NEW)
+
+## Project settings
+project(astaroth CXX)
+set(CMAKE_CXX_STANDARD 11)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR})
+
+## Options
+option(BUILD_DEBUG              "Builds the program with extensive error checking"          OFF)
+option(BUILD_STANDALONE         "Builds the standalone Astaroth"                            ON)
+option(BUILD_RT_VISUALIZATION   "Builds the module for real-time visualization using SDL2"  OFF)
+option(DOUBLE_PRECISION         "Generates double precision code"                           OFF)
+option(MULTIGPU_ENABLED         "If enabled, uses all the available GPUs"                   ON)
+option(ALTER_CONF               "If enabled, loads astaroth.conf from the build directory"  OFF)
 
 
-#-------------------Set user options with default values---------------------#
 
-#Usage f.ex. cmake -DBUILD_DEBUG=ON ..
-option(BUILD_DEBUG "Builds the program with extensive error checking" OFF)
-option(BUILD_STANDALONE "Builds standalone Astaroth" ON)
-option(DOUBLE_PRECISION "Generates double precision code" OFF)
-option(TIARA_CLUSTER "Special settings for compilation TIARA GPU cluster" OFF)
-option(MULTIGPU_ENABLED "If enabled, uses all the available GPUs" ON)
-option(ALTER_CONF "If enabled, loads astaroth.conf from the build directory" OFF)
-option(BUILD_RT_VISUALIZATION "Builds the module for real-time visualization using SDL2" OFF)
-
-#-------------------Determine build type--------------------------------------#
-
-#Available types (case-sensitive):
-#RELEASE         (best performance)
-#DEBUG           (w/ debug information, non-concurrent kernels)
+## Build types
+# Available types (case-sensitive):
+# RELEASE         (best performance)
+# DEBUG           (w/ debug information, non-concurrent kernels)
 if (BUILD_DEBUG)
     set(CMAKE_BUILD_TYPE DEBUG)
 else ()
@@ -42,144 +42,25 @@ endif()
 message(STATUS "Build type: " ${CMAKE_BUILD_TYPE})
 
 
-#----------------------Find packages------------------------------------------#
-
-# C++ compiler info
-message(STATUS "CMAKE_CXX_COMPILER: " ${CMAKE_CXX_COMPILER})
-message(STATUS "CMAKE_CXX_COMPILER: " ${CMAKE_CXX_COMPILER_ID})
-
-if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
-    if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.9.1)
-        # GCC >= 6.0 is required because of bug 48891. However, the fix seems to
-        # be backported so some older compilers which is why the code may also
-        # compile on gcc >= 4.9.1.
-        message(FATAL_ERROR "GCC version 4.9.1 or higher required")
-    endif()
-endif()
-
-if (BUILD_RT_VISUALIZATION)
-    add_definitions(-DAC_BUILD_RT_VISUALIZATION=1)
-    # SDL 2
-    set(SDL2_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/3rdparty/SDL2/include/)
-    set(SDL2_LIBRARY_DIR ${CMAKE_SOURCE_DIR}/3rdparty/SDL2/build/)
-    set(SDL2_LIBRARY "SDL2")
-    include_directories(${SDL2_INCLUDE_DIR})
-    link_directories(${SDL2_LIBRARY_DIR})
-endif()
-
-# CUDA
-find_package(CUDA)
-if (NOT CUDA_FOUND)
-    # find_package(CUDA REQUIRED) gives a confusing error message if it fails,
-    # therefore we print the reason here explicitly
-    message(FATAL_ERROR "CUDA not found")
-endif()
-include_directories(${CUDA_INCLUDE_DIRS})
-
-# OpenMP
-find_package(OpenMP)
-if (NOT OPENMP_FOUND)
-    message(WARNING "OpenMP not found. All host-side concurrency disabled \
-                    (lower performance).")
-else ()
-    set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
-endif()
-
-#----------------------Compilation settings-----------------------------------#
-
-#Debug and verification
-#set(CMAKE_VERBOSE_MAKEFILE OFF)
-#set(CXX_VERBOSE_BUILD OFF)
-#set(CUDA_VERBOSE_BUILD OFF)
-#include(CTest)
-#add_test(ac_test ac_run)
-#find_program(MEMORYCHECK_COMMAND valgrind)
-#set(MEMORYCHECK_COMMAND_OPTIONS "--trace-children=yes --leak-check=full" )
-
-
-#----------------------Setup defines------------------------------------------#
-
+## Defines
 if (DOUBLE_PRECISION)
-	add_definitions(-DAC_DOUBLE_PRECISION=1)
-else()
+    add_definitions(-DAC_DOUBLE_PRECISION=1)
+else ()
     add_definitions(-DAC_DOUBLE_PRECISION=0)
-endif()
-
-# A full integration step is benchmarked by default, use this flag to override and
-# benchmark RK3 only
-if (GEN_BENCHMARK_RK3)
-    add_definitions(-DGEN_BENCHMARK_RK3=1)
-else()
-    add_definitions(-DGEN_BENCHMARK_RK3=0)
-endif()
-
+endif ()
 if (MULTIGPU_ENABLED)
     add_definitions(-DAC_MULTIGPU_ENABLED=1)
-else()
+else ()
     add_definitions(-DAC_MULTIGPU_ENABLED=0)
-endif()
+endif ()
 
-#-----------------------TIARA specific options--------------------------------#
-#OLD#set (CXX_FLAGS_TIARA "-I/software/opt/cuda/9.0/include/")
-# %JP: NOTE! This should not be needed anymore because the command
-#      find_package(CUDA) above should find and include this directory automatically
-#USE THIS:
-if (TIARA_CLUSTER)
-	set (CXX_FLAGS_TIARA "-mno-bmi2")
-endif()
-
-#----------------------Setup CXX compilation flags----------------------------#
-set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}\
-                             -O2 -march=native -pipe")
-
-set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}\
-                             -O0 -g")
-
-if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
-    set (CXX_FLAGS_WARNING "-Wall -Wextra -Werror -Wdouble-promotion -Wfloat-conversion") # TODO: -Wshadow -Wconversion
-elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel")
-    #MV: -Werror-all disabled because produces cryptical messages preventing compilation.
-    #TODO: Would be good to find an optimal set of warning flags.
-    #set (CXX_FLAGS_WARNING "-Wall -Wextra -Werror-all -Wsign-conversion")
-    set (CXX_FLAGS_WARNING "-Wall -Wextra -Wsign-conversion")
-else()
-    message(WARNING "Using an unknown compiler. Compilation warning flags were not set.")
-endif()
-
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}\
-                    ${CXX_FLAGS_WARNING}\
-                    ${CXX_FLAGS_ETC}\
-                    ${CXX_FLAGS_TIARA}") # %JP: CXX_FLAGS_TIARA should not be needed,
-					 #      see comments in "TIARA specific options"
-
-message("CXX_FLAGS: " ${CMAKE_CXX_FLAGS})
-
-
-#----------------------Setup core subdirectories------------------------------#
-
-#Include root directory (.) so that the following modules can include their
-#parent dir (f.ex. #include "common/stuff.h" instead of "../common/stuff")
-include_directories(.)
+## Include directories
 include_directories(include)
-include_directories(src)
+include_directories(.) # TODO remove
+include_directories(src) # TODO remove
 
-# CUDA sources
+## Subdirectories
 add_subdirectory(src/core)
-
-#----------------------Link---------------------------------------------------#
-
 if (BUILD_STANDALONE)
-    #Define the config directory
-    if (ALTER_CONF)
-        set(ASTAROTH_CONF_PATH "${CMAKE_BINARY_DIR}/")
-    else()
-        set(ASTAROTH_CONF_PATH "${CMAKE_SOURCE_DIR}/config/")
-    endif()
-
-    #Add additional subdirectories
-    add_subdirectory (src/standalone)
-    cuda_add_executable(ac_run src/standalone/main.cc)
-    target_link_libraries(ac_run astaroth_standalone astaroth_core ${SDL2_LIBRARY})
-endif()
-
-add_subdirectory(ctest)
+    add_subdirectory(src/standalone)
+endif ()
diff --git a/config/astaroth.conf b/config/astaroth.conf
index 41b7e51..32f50a3 100644
--- a/config/astaroth.conf
+++ b/config/astaroth.conf
@@ -40,7 +40,7 @@ AC_chi = 0.0001
 AC_relhel = 0.0
 AC_forcing_magnitude = 1e-5
 AC_kmin              = 0.8
-AC_kmax              = 1.2 
+AC_kmax              = 1.2
 
 
 // Entropy
diff --git a/include/astaroth_defines.h b/include/astaroth_defines.h
index 0915d79..eb89daf 100644
--- a/include/astaroth_defines.h
+++ b/include/astaroth_defines.h
@@ -22,9 +22,27 @@
 extern "C" {
 #endif
 
-#include <float.h>        // FLT_EPSILON, etc
-#include <stdlib.h>       // size_t
-#include <vector_types.h> // CUDA vector types (float4, etc)
+#include <float.h>  // FLT_EPSILON, etc
+#include <stdlib.h> // size_t
+//#include <vector_types.h> // CUDA vector types (float4, etc)
+
+#ifndef __CUDACC__
+typedef struct {
+    int x, y, z;
+} int3;
+
+typedef struct {
+    float x, y;
+} float2;
+
+typedef struct {
+    float x, y, z;
+} float3;
+
+typedef struct {
+    double x, y, z;
+} double3;
+#endif // __CUDACC__
 
 #include "stencil_defines.h"
 
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 79560c9..1177995 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -2,59 +2,30 @@
 ##  CMakeLists.txt for Astaroth Core  ##
 ########################################
 
-#----------------------Find CUDA-----------------------------------------------#
-
+## Find packages
 find_package(CUDA 9 REQUIRED)
 
-#----------------------CUDA settings-------------------------------------------#
-
-set(CUDA_SEPARABLE_COMPILATION OFF)
-set(CUDA_PROPAGATE_HOST_FLAGS ON)
-
-#----------------------Setup CUDA compilation flags----------------------------#
-
-# Generate code for the default architecture (Pascal)
+## Architecture and optimization flags
 set(CUDA_ARCH_FLAGS -gencode arch=compute_37,code=sm_37
                     -gencode arch=compute_50,code=sm_50
                     -gencode arch=compute_60,code=sm_60
                     -gencode arch=compute_61,code=sm_61
                     -lineinfo
-                    -ftz=true
-                    -std=c++11) #--maxrregcount=255 -ftz=true #ftz = flush denormalized floats to zero
-# -Xptxas -dlcm=ca opt-in to cache all global loads to L1/texture cache
-# =cg to opt out
-
-# Additional CUDA optimization flags
-if (CMAKE_BUILD_TYPE MATCHES RELEASE)
-    # Doesn't set any additional flags, see CUDA_NVCC_FLAGS_DEBUG below on how
-    # to add more
-    set(CUDA_NVCC_FLAGS_RELEASE ${CUDA_NVCC_FLAGS_RELEASE})
-endif()
-
-# Additional CUDA debug flags
-if (CMAKE_BUILD_TYPE MATCHES DEBUG)
-    # The debug flags must be set inside this if clause, since either CMake 3.5
-    # or nvcc 7.5 is bugged:
-    # CMake converts these into empty strings when doing RELEASE build, but nvcc
-    # 7.5 fails to parse empty flags.
-    set(CUDA_NVCC_FLAGS_DEBUG ${CUDA_NVCC_FLAGS_DEBUG};
-                               --device-debug;
-                               --generate-line-info;
-                               --ptxas-options=-v)
-endif()
-
-set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};${CUDA_ARCH_FLAGS}")
+                    -ftz=true # Flush denormalized floats to zero
+                    -std=c++11,
+                    --compiler-options -march=native) # Native host machine code
+                    #--maxrregcount=255
+                    # -Xptxas -dlcm=ca opt-in to cache all global loads to L1/texture cache
+                    # =cg to opt out
 
 
-message("CUDA_NVCC_FLAGS: " ${CUDA_NVCC_FLAGS})
+set(CUDA_WARNING_FLAGS --compiler-options -Wall,-Wextra,-Werror,-Wdouble-promotion,-Wfloat-conversion) # -Wshadow
 
+set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ${CUDA_ARCH_FLAGS} ${CUDA_WARNING_FLAGS})
+set(CUDA_NVCC_FLAGS_RELEASE)
+set(CUDA_NVCC_FLAGS_DEBUG --device-debug --generate-line-info --ptxas-options=-v)
 
-#------------------Compile and create a static library-------------------------#
-file(GLOB CUDA_SOURCES "*.cu" "kernels/*.cu")
-
-# Use -fPIC if -fpic not supported. Some quick non-scientific tests:
-# Without fpic: 4.94 user, 4.04 system, 0:09.88 elapsed
-# With fpic: 4.96 user, 4.02 system, 0:09.90 elapsed
-# With fPIC: 4.94 user, 4.05 system, 0:10.23 elapsed
-CUDA_ADD_LIBRARY(astaroth_core STATIC ${CUDA_SOURCES} OPTIONS --compiler-options "-fpic")
+## Create and link the library
+include_directories(.)
+cuda_add_library(astaroth_core STATIC astaroth.cu device.cu)
 target_link_libraries(astaroth_core m)
diff --git a/src/core/kernels/kernels.cuh b/src/core/kernels/kernels.cuh
index 8977d07..2c7d876 100644
--- a/src/core/kernels/kernels.cuh
+++ b/src/core/kernels/kernels.cuh
@@ -708,7 +708,7 @@ read_out(const int idx, AcReal* __restrict__ field[], const int3 handle)
  */
 
 ////////////////REDUCE///////////////////////////
-#include "src/core/math_utils.h" // is_power_of_two
+#include "math_utils.h" // is_power_of_two
 
 /*
 Reduction steps:
diff --git a/src/standalone/CMakeLists.txt b/src/standalone/CMakeLists.txt
index c6b535b..bb530e5 100644
--- a/src/standalone/CMakeLists.txt
+++ b/src/standalone/CMakeLists.txt
@@ -1,10 +1,34 @@
-################################
-##  CMakeLists.txt for utils  ##
-################################
+##############################################
+##  CMakeLists.txt for Astaroth Standalone  ##
+##############################################
 
+## Files
 file (GLOB SOURCES "*.cc" "model/*.cc")
 
-add_library(astaroth_standalone STATIC ${SOURCES})
-target_include_directories(astaroth_standalone PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
-#target_compile_definitions(astaroth_standalone PRIVATE CONFIG_PATH=\"${CMAKE_SOURCE_DIR}/config/\")
-target_compile_definitions(astaroth_standalone PRIVATE CONFIG_PATH=\"${ASTAROTH_CONF_PATH}\")
+## Find packages
+find_package(OpenMP REQUIRED)
+if (BUILD_RT_VISUALIZATION)
+    add_definitions(-DAC_BUILD_RT_VISUALIZATION=1)
+    # SDL 2
+    set(SDL2_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/3rdparty/SDL2/include/)
+    set(SDL2_LIBRARY_DIR ${CMAKE_SOURCE_DIR}/3rdparty/SDL2/build/)
+    set(SDL2_LIBRARY "SDL2")
+    include_directories(${SDL2_INCLUDE_DIR})
+    link_directories(${SDL2_LIBRARY_DIR})
+endif ()
+
+
+## Compilation flags
+add_compile_options(-march=native -pipe ${OpenMP_CXX_FLAGS})
+add_compile_options(-Wall -Wextra -Werror -Wdouble-promotion -Wfloat-conversion)# -Wshadow)
+
+## Compile and link
+add_executable(ac_run ${SOURCES})
+target_link_libraries(ac_run PRIVATE "${OpenMP_CXX_FLAGS}" astaroth_core ${SDL2_LIBRARY})
+
+# Define the config directory
+if (ALTER_CONF)
+    target_compile_definitions(ac_run PRIVATE CONFIG_PATH="${CMAKE_BINARY_DIR}/")
+else()
+    target_compile_definitions(ac_run PRIVATE CONFIG_PATH="${CMAKE_SOURCE_DIR}/config/")
+endif()
diff --git a/src/standalone/model/model_boundconds.cc b/src/standalone/model/model_boundconds.cc
index 188b97e..f7a6aca 100644
--- a/src/standalone/model/model_boundconds.cc
+++ b/src/standalone/model/model_boundconds.cc
@@ -28,71 +28,66 @@
 
 #include "core/errchk.h"
 
-
 void
 boundconds(const AcMeshInfo& mesh_info, ModelMesh* mesh)
 {
-    #pragma omp parallel for
+#pragma omp parallel for
     for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w) {
         const int3 start = (int3){0, 0, 0};
-        const int3 end = (int3){
-            mesh_info.int_params[AC_mx],
-            mesh_info.int_params[AC_my],
-            mesh_info.int_params[AC_mz]
-        };
+        const int3 end   = (int3){mesh_info.int_params[AC_mx], mesh_info.int_params[AC_my],
+                                mesh_info.int_params[AC_mz]};
 
         const int nx = mesh_info.int_params[AC_nx];
         const int ny = mesh_info.int_params[AC_ny];
         const int nz = mesh_info.int_params[AC_nz];
 
-         const int nx_min = mesh_info.int_params[AC_nx_min];
-         const int ny_min = mesh_info.int_params[AC_ny_min];
-         const int nz_min = mesh_info.int_params[AC_nz_min];
+        const int nx_min = mesh_info.int_params[AC_nx_min];
+        const int ny_min = mesh_info.int_params[AC_ny_min];
+        const int nz_min = mesh_info.int_params[AC_nz_min];
 
-         // The old kxt was inclusive, but our mx_max is exclusive
-         const int nx_max = mesh_info.int_params[AC_nx_max];
-         const int ny_max = mesh_info.int_params[AC_ny_max];
-         const int nz_max = mesh_info.int_params[AC_nz_max];
+        // The old kxt was inclusive, but our mx_max is exclusive
+        const int nx_max = mesh_info.int_params[AC_nx_max];
+        const int ny_max = mesh_info.int_params[AC_ny_max];
+        const int nz_max = mesh_info.int_params[AC_nz_max];
 
         for (int k_dst = start.z; k_dst < end.z; ++k_dst) {
-        for (int j_dst = start.y; j_dst < end.y; ++j_dst) {
-        for (int i_dst = start.x; i_dst < end.x; ++i_dst) {
+            for (int j_dst = start.y; j_dst < end.y; ++j_dst) {
+                for (int i_dst = start.x; i_dst < end.x; ++i_dst) {
 
-            // If destination index is inside the computational domain, return since
-            // the boundary conditions are only applied to the ghost zones
-            if (i_dst >= nx_min && i_dst < nx_max &&
-                j_dst >= ny_min && j_dst < ny_max &&
-                k_dst >= nz_min && k_dst < nz_max)
-                continue;
+                    // If destination index is inside the computational domain, return since
+                    // the boundary conditions are only applied to the ghost zones
+                    if (i_dst >= nx_min && i_dst < nx_max && j_dst >= ny_min && j_dst < ny_max &&
+                        k_dst >= nz_min && k_dst < nz_max)
+                        continue;
 
-            // Find the source index
-            // Map to nx, ny, nz coordinates
-            int i_src = i_dst - nx_min;
-            int j_src = j_dst - ny_min;
-            int k_src = k_dst - nz_min;
+                    // Find the source index
+                    // Map to nx, ny, nz coordinates
+                    int i_src = i_dst - nx_min;
+                    int j_src = j_dst - ny_min;
+                    int k_src = k_dst - nz_min;
 
-            // Translate (s.t. the index is always positive)
-            i_src += nx;
-            j_src += ny;
-            k_src += nz;
+                    // Translate (s.t. the index is always positive)
+                    i_src += nx;
+                    j_src += ny;
+                    k_src += nz;
 
-            // Wrap
-            i_src %= nx;
-            j_src %= ny;
-            k_src %= nz;
+                    // Wrap
+                    i_src %= nx;
+                    j_src %= ny;
+                    k_src %= nz;
 
-            // Map to mx, my, mz coordinates
-            i_src += nx_min;
-            j_src += ny_min;
-            k_src += nz_min;
+                    // Map to mx, my, mz coordinates
+                    i_src += nx_min;
+                    j_src += ny_min;
+                    k_src += nz_min;
 
-            const size_t src_idx      = acVertexBufferIdx(i_src, j_src, k_src, mesh_info);
-            const size_t dst_idx      = acVertexBufferIdx(i_dst, j_dst, k_dst, mesh_info);
-            ERRCHK(src_idx < acVertexBufferSize(mesh_info));
-            ERRCHK(dst_idx < acVertexBufferSize(mesh_info));
-            mesh->vertex_buffer[w][dst_idx] = mesh->vertex_buffer[w][src_idx];
-        }
-        }
+                    const size_t src_idx = acVertexBufferIdx(i_src, j_src, k_src, mesh_info);
+                    const size_t dst_idx = acVertexBufferIdx(i_dst, j_dst, k_dst, mesh_info);
+                    ERRCHK(src_idx < acVertexBufferSize(mesh_info));
+                    ERRCHK(dst_idx < acVertexBufferSize(mesh_info));
+                    mesh->vertex_buffer[w][dst_idx] = mesh->vertex_buffer[w][src_idx];
+                }
+            }
         }
     }
 }