Merged in cmakelist_rewrite_and_C_API_conformity_07-26 (pull request #1)

This commit is contained in:
jpekkila
2019-08-07 06:53:17 +03:00
27 changed files with 415 additions and 334 deletions

View File

@@ -1,39 +1,38 @@
###################################
## CMakeLists.txt for Astaroth ##
###################################
# #
# CMakeLists.txt for generating the makefile for Astaroth. # Usage: mkdir build && cd build && cmake <options> .. && make
# Usage: mkdir build && cd build && cmake <optional flags> ..
# #
# For example: cmake -DDOUBLE_PRECISION=ON .. # If you want to see the exact flags used during compilation, compile with
# "make VERBOSE=1"
# #
# If you want to see the exact flags used during compilation, run # Print all options: cmake -LAH ..
# "make -j VERBOSE=1"
# #
# Make sure your machine satisfies the system requirements:
# https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#system-requirements
#-------------------General---------------------------------------------------# ## CMake settings
project(ASTAROTH_2.0 CXX)
set (CMAKE_CXX_STANDARD 11)
cmake_minimum_required (VERSION 3.5.1) # Need >= 3.8 for first-class CUDA support cmake_minimum_required (VERSION 3.5.1) # Need >= 3.8 for first-class CUDA support
cmake_policy (SET CMP0023 NEW) find_program(CMAKE_C_COMPILER NAMES $ENV{CC} gcc PATHS ENV PATH NO_DEFAULT_PATH)
find_program(CMAKE_CXX_COMPILER NAMES $ENV{CXX} g++ PATHS ENV PATH NO_DEFAULT_PATH)
## Project settings
project(astaroth C CXX)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR})
#-------------------Set user options with default values---------------------# ## Options
option(BUILD_DEBUG "Builds the program with extensive error checking" OFF)
option(BUILD_STANDALONE "Builds the standalone Astaroth" ON)
option(BUILD_RT_VISUALIZATION "Builds the module for real-time visualization using SDL2" OFF)
option(BUILD_C_API_TEST "Builds a C program to test whether the API is conformant" OFF)
option(BUILD_MPI_TEST "Builds a C program to test whether MPI works" OFF)
option(DOUBLE_PRECISION "Generates double precision code" OFF)
option(MULTIGPU_ENABLED "If enabled, uses all the available GPUs" ON)
option(ALTER_CONF "If enabled, loads astaroth.conf from the build directory" OFF)
#Usage f.ex. cmake -DBUILD_DEBUG=ON .. ## Build types
option(BUILD_DEBUG "Builds the program with extensive error checking" OFF) # Available types (case-sensitive):
option(BUILD_STANDALONE "Builds standalone Astaroth" ON) # RELEASE (best performance)
option(DOUBLE_PRECISION "Generates double precision code" OFF) # DEBUG (w/ debug information, non-concurrent kernels)
option(TIARA_CLUSTER "Special settings for compilation TIARA GPU cluster" OFF)
option(MULTIGPU_ENABLED "If enabled, uses all the available GPUs" ON)
option(ALTER_CONF "If enabled, loads astaroth.conf from the build directory" OFF)
option(BUILD_RT_VISUALIZATION "Builds the module for real-time visualization using SDL2" OFF)
#-------------------Determine build type--------------------------------------#
#Available types (case-sensitive):
#RELEASE (best performance)
#DEBUG (w/ debug information, non-concurrent kernels)
if (BUILD_DEBUG) if (BUILD_DEBUG)
set(CMAKE_BUILD_TYPE DEBUG) set(CMAKE_BUILD_TYPE DEBUG)
else () else ()
@@ -41,143 +40,28 @@ else ()
endif() endif()
message(STATUS "Build type: " ${CMAKE_BUILD_TYPE}) message(STATUS "Build type: " ${CMAKE_BUILD_TYPE})
## Defines
#----------------------Find packages------------------------------------------#
# C++ compiler info
message(STATUS "CMAKE_CXX_COMPILER: " ${CMAKE_CXX_COMPILER})
message(STATUS "CMAKE_CXX_COMPILER: " ${CMAKE_CXX_COMPILER_ID})
if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.9.1)
# GCC >= 6.0 is required because of bug 48891. However, the fix seems to
# be backported so some older compilers which is why the code may also
# compile on gcc >= 4.9.1.
message(FATAL_ERROR "GCC version 4.9.1 or higher required")
endif()
endif()
if (BUILD_RT_VISUALIZATION)
add_definitions(-DAC_BUILD_RT_VISUALIZATION=1)
# SDL 2
set(SDL2_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/3rdparty/SDL2/include/)
set(SDL2_LIBRARY_DIR ${CMAKE_SOURCE_DIR}/3rdparty/SDL2/build/)
set(SDL2_LIBRARY "SDL2")
include_directories(${SDL2_INCLUDE_DIR})
link_directories(${SDL2_LIBRARY_DIR})
endif()
# CUDA
find_package(CUDA)
if (NOT CUDA_FOUND)
# find_package(CUDA REQUIRED) gives a confusing error message if it fails,
# therefore we print the reason here explicitly
message(FATAL_ERROR "CUDA not found")
endif()
include_directories(${CUDA_INCLUDE_DIRS})
# OpenMP
find_package(OpenMP)
if (NOT OPENMP_FOUND)
message(WARNING "OpenMP not found. All host-side concurrency disabled \
(lower performance).")
else ()
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
endif()
#----------------------Compilation settings-----------------------------------#
#Debug and verification
#set(CMAKE_VERBOSE_MAKEFILE OFF)
#set(CXX_VERBOSE_BUILD OFF)
#set(CUDA_VERBOSE_BUILD OFF)
#include(CTest)
#add_test(ac_test ac_run)
#find_program(MEMORYCHECK_COMMAND valgrind)
#set(MEMORYCHECK_COMMAND_OPTIONS "--trace-children=yes --leak-check=full" )
#----------------------Setup defines------------------------------------------#
if (DOUBLE_PRECISION) if (DOUBLE_PRECISION)
add_definitions(-DAC_DOUBLE_PRECISION=1) add_definitions(-DAC_DOUBLE_PRECISION=1)
else() else ()
add_definitions(-DAC_DOUBLE_PRECISION=0) add_definitions(-DAC_DOUBLE_PRECISION=0)
endif() endif ()
# A full integration step is benchmarked by default, use this flag to override and ## Include directories
# benchmark RK3 only
if (GEN_BENCHMARK_RK3)
add_definitions(-DGEN_BENCHMARK_RK3=1)
else()
add_definitions(-DGEN_BENCHMARK_RK3=0)
endif()
if (MULTIGPU_ENABLED)
add_definitions(-DAC_MULTIGPU_ENABLED=1)
else()
add_definitions(-DAC_MULTIGPU_ENABLED=0)
endif()
#-----------------------TIARA specific options--------------------------------#
#OLD#set (CXX_FLAGS_TIARA "-I/software/opt/cuda/9.0/include/")
# %JP: NOTE! This should not be needed anymore because the command
# find_package(CUDA) above should find and include this directory automatically
#USE THIS:
if (TIARA_CLUSTER)
set (CXX_FLAGS_TIARA "-mno-bmi2")
endif()
#----------------------Setup CXX compilation flags----------------------------#
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}\
-O2 -march=native -pipe")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}\
-O0 -g")
if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
set (CXX_FLAGS_WARNING "-Wall -Wextra -Werror -Wdouble-promotion -Wfloat-conversion") # TODO: -Wshadow -Wconversion
elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel")
#MV: -Werror-all disabled because produces cryptical messages preventing compilation.
#TODO: Would be good to find an optimal set of warning flags.
#set (CXX_FLAGS_WARNING "-Wall -Wextra -Werror-all -Wsign-conversion")
set (CXX_FLAGS_WARNING "-Wall -Wextra -Wsign-conversion")
else()
message(WARNING "Using an unknown compiler. Compilation warning flags were not set.")
endif()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}\
${CXX_FLAGS_WARNING}\
${CXX_FLAGS_ETC}\
${CXX_FLAGS_TIARA}") # %JP: CXX_FLAGS_TIARA should not be needed,
# see comments in "TIARA specific options"
message("CXX_FLAGS: " ${CMAKE_CXX_FLAGS})
#----------------------Setup core subdirectories------------------------------#
#Include root directory (.) so that the following modules can include their
#parent dir (f.ex. #include "common/stuff.h" instead of "../common/stuff")
include_directories(.) include_directories(.)
include_directories(include) include_directories(include)
include_directories(src)
# CUDA sources ## Subdirectories
add_subdirectory(src/core) add_subdirectory(src/core) # The core library
#----------------------Link---------------------------------------------------#
if (BUILD_STANDALONE) if (BUILD_STANDALONE)
#Define the config directory add_subdirectory(src/standalone)
if (ALTER_CONF) endif ()
set(ASTAROTH_CONF_PATH "${CMAKE_BINARY_DIR}/")
else()
set(ASTAROTH_CONF_PATH "${CMAKE_SOURCE_DIR}/config/")
endif()
#Add additional subdirectories if (BUILD_C_API_TEST)
add_subdirectory (src/standalone) add_subdirectory(src/ctest)
cuda_add_executable(ac_run src/standalone/main.cc) endif ()
target_link_libraries(ac_run astaroth_standalone astaroth_core ${SDL2_LIBRARY})
endif() if (BUILD_MPI_TEST)
add_subdirectory(src/mpitest)
endif ()

View File

@@ -40,7 +40,7 @@ AcResult acSynchronizeMesh(void);
initialization of *all memory needed on all GPUs in the node*. In other words, initialization of *all memory needed on all GPUs in the node*. In other words,
setups everything GPU-side so that calling any other GPU interface function setups everything GPU-side so that calling any other GPU interface function
afterwards does not result in illegal memory accesses. */ afterwards does not result in illegal memory accesses. */
AcResult acInit(const AcMeshInfo& mesh_info); AcResult acInit(const AcMeshInfo mesh_info);
/** Frees all GPU allocations and resets all devices in the node. Should be /** Frees all GPU allocations and resets all devices in the node. Should be
* called at exit. */ * called at exit. */
@@ -49,23 +49,23 @@ AcResult acQuit(void);
/** Does all three substeps of the RK3 integration and computes the boundary /** Does all three substeps of the RK3 integration and computes the boundary
conditions when necessary. The result is synchronized and the boundary conditions are applied conditions when necessary. The result is synchronized and the boundary conditions are applied
after the final substep, after which the result can be fetched to CPU memory with acStore. */ after the final substep, after which the result can be fetched to CPU memory with acStore. */
AcResult acIntegrate(const AcReal& dt); AcResult acIntegrate(const AcReal dt);
/** Performs a scalar reduction on all GPUs in the node and returns the result. Operates on the /** Performs a scalar reduction on all GPUs in the node and returns the result. Operates on the
* whole computational domain, which must be up to date and synchronized before calling * whole computational domain, which must be up to date and synchronized before calling
* acReduceScal. * acReduceScal.
*/ */
AcReal acReduceScal(const ReductionType& rtype, const VertexBufferHandle& a); AcReal acReduceScal(const ReductionType rtype, const VertexBufferHandle a);
/** Performs a vector reduction on all GPUs in the node and returns the result. Operates on the /** Performs a vector reduction on all GPUs in the node and returns the result. Operates on the
* whole computational domain, which must be up to date and synchronized before calling * whole computational domain, which must be up to date and synchronized before calling
* acReduceVec. * acReduceVec.
*/ */
AcReal acReduceVec(const ReductionType& rtype, const VertexBufferHandle& a, AcReal acReduceVec(const ReductionType rtype, const VertexBufferHandle a,
const VertexBufferHandle& b, const VertexBufferHandle& c); const VertexBufferHandle b, const VertexBufferHandle c);
/** Distributes the host mesh among the GPUs in the node. Synchronous. */ /** Distributes the host mesh among the GPUs in the node. Synchronous. */
AcResult acLoad(const AcMesh& host_mesh); AcResult acLoad(const AcMesh host_mesh);
/** Gathers the mesh stored across GPUs in the node and stores it back to host memory. Synchronous. /** Gathers the mesh stored across GPUs in the node and stores it back to host memory. Synchronous.
*/ */
@@ -82,32 +82,38 @@ AcResult acLoadDeviceConstantAsync(const AcRealParam param, const AcReal value,
const StreamType stream); const StreamType stream);
/** Splits a subset of the host_mesh and distributes it among the GPUs in the node. Asynchronous. */ /** Splits a subset of the host_mesh and distributes it among the GPUs in the node. Asynchronous. */
AcResult acLoadWithOffset(const AcMesh& host_mesh, const int3& start, const int num_vertices); AcResult acLoadWithOffset(const AcMesh host_mesh, const int3 start, const int num_vertices);
AcResult acLoadWithOffsetAsync(const AcMesh& host_mesh, const int3& start, const int num_vertices, AcResult acLoadWithOffsetAsync(const AcMesh host_mesh, const int3 start, const int num_vertices,
const StreamType stream); const StreamType stream);
/** Gathers a subset of the data distributed among the GPUs in the node and stores the mesh back to /** Gathers a subset of the data distributed among the GPUs in the node and stores the mesh back to
* CPU memory. Asynchronous. * CPU memory. Asynchronous.
*/ */
AcResult acStoreWithOffset(const int3& start, const int num_vertices, AcMesh* host_mesh); AcResult acStoreWithOffset(const int3 start, const int num_vertices, AcMesh* host_mesh);
AcResult acStoreWithOffsetAsync(const int3& start, const int num_vertices, AcMesh* host_mesh, AcResult acStoreWithOffsetAsync(const int3 start, const int num_vertices, AcMesh* host_mesh,
const StreamType stream); const StreamType stream);
/** Performs a single RK3 step without computing boundary conditions. Asynchronous.*/ /** Performs a single RK3 step without computing boundary conditions. Asynchronous.*/
AcResult acIntegrateStep(const int& isubstep, const AcReal& dt); AcResult acIntegrateStep(const int isubstep, const AcReal dt);
AcResult acIntegrateStepAsync(const int& isubstep, const AcReal& dt, const StreamType stream); AcResult acIntegrateStepAsync(const int isubstep, const AcReal dt, const StreamType stream);
/** Performs a single RK3 step on a subset of the mesh without computing the boundary conditions. /** Performs a single RK3 step on a subset of the mesh without computing the boundary conditions.
* Asynchronous.*/ * Asynchronous.*/
AcResult acIntegrateStepWithOffset(const int& isubstep, const AcReal& dt, const int3& start, AcResult acIntegrateStepWithOffset(const int isubstep, const AcReal dt, const int3 start,
const int3& end); const int3 end);
AcResult acIntegrateStepWithOffsetAsync(const int& isubstep, const AcReal& dt, const int3& start, AcResult acIntegrateStepWithOffsetAsync(const int isubstep, const AcReal dt, const int3 start,
const int3& end, const StreamType stream); const int3 end, const StreamType stream);
/** Performs the boundary condition step on the GPUs in the node. Asynchronous. */ /** Performs the boundary condition step on the GPUs in the node. Asynchronous. */
AcResult acBoundcondStep(void); AcResult acBoundcondStep(void);
AcResult acBoundcondStepAsync(const StreamType stream); AcResult acBoundcondStepAsync(const StreamType stream);
/*
* =============================================================================
* Revised interface
* =============================================================================
*/
#ifdef __cplusplus #ifdef __cplusplus
} // extern "C" } // extern "C"
#endif #endif

View File

@@ -22,9 +22,27 @@
extern "C" { extern "C" {
#endif #endif
#include <float.h> // FLT_EPSILON, etc #include <float.h> // FLT_EPSILON, etc
#include <stdlib.h> // size_t #include <stdlib.h> // size_t
#include <vector_types.h> // CUDA vector types (float4, etc) //#include <vector_types.h> // CUDA vector types (float4, etc)
#ifndef __CUDACC__
typedef struct {
int x, y, z;
} int3;
typedef struct {
float x, y;
} float2;
typedef struct {
float x, y, z;
} float3;
typedef struct {
double x, y, z;
} double3;
#endif // __CUDACC__
#include "stencil_defines.h" #include "stencil_defines.h"
@@ -147,31 +165,31 @@ typedef struct {
* ============================================================================= * =============================================================================
*/ */
static inline size_t static inline size_t
acVertexBufferSize(const AcMeshInfo& info) acVertexBufferSize(const AcMeshInfo info)
{ {
return info.int_params[AC_mx] * info.int_params[AC_my] * info.int_params[AC_mz]; return info.int_params[AC_mx] * info.int_params[AC_my] * info.int_params[AC_mz];
} }
static inline size_t static inline size_t
acVertexBufferSizeBytes(const AcMeshInfo& info) acVertexBufferSizeBytes(const AcMeshInfo info)
{ {
return sizeof(AcReal) * acVertexBufferSize(info); return sizeof(AcReal) * acVertexBufferSize(info);
} }
static inline size_t static inline size_t
acVertexBufferCompdomainSize(const AcMeshInfo& info) acVertexBufferCompdomainSize(const AcMeshInfo info)
{ {
return info.int_params[AC_nx] * info.int_params[AC_ny] * info.int_params[AC_nz]; return info.int_params[AC_nx] * info.int_params[AC_ny] * info.int_params[AC_nz];
} }
static inline size_t static inline size_t
acVertexBufferCompdomainSizeBytes(const AcMeshInfo& info) acVertexBufferCompdomainSizeBytes(const AcMeshInfo info)
{ {
return sizeof(AcReal) * acVertexBufferCompdomainSize(info); return sizeof(AcReal) * acVertexBufferCompdomainSize(info);
} }
static inline size_t static inline size_t
acVertexBufferIdx(const int i, const int j, const int k, const AcMeshInfo& info) acVertexBufferIdx(const int i, const int j, const int k, const AcMeshInfo info)
{ {
return i + // return i + //
j * info.int_params[AC_mx] + // j * info.int_params[AC_mx] + //
@@ -180,25 +198,25 @@ acVertexBufferIdx(const int i, const int j, const int k, const AcMeshInfo& info)
/* /*
static inline int static inline int
acGetParam(const AcMeshInfo& info, const AcIntParam param) acGetParam(const AcMeshInfo info, const AcIntParam param)
{ {
return info.int_params[param]; return info.int_params[param];
} }
static inline int3 static inline int3
acGetParam(const AcMeshInfo& info, const AcInt3Param param) acGetParam(const AcMeshInfo info, const AcInt3Param param)
{ {
return info.int3_params[param]; return info.int3_params[param];
} }
static inline AcReal static inline AcReal
acGetParam(const AcMeshInfo& info, const AcRealParam param) acGetParam(const AcMeshInfo info, const AcRealParam param)
{ {
return info.real_params[param]; return info.real_params[param];
} }
static inline AcReal3 static inline AcReal3
acGetParam(const AcMeshInfo& info, const AcReal3Param param) acGetParam(const AcMeshInfo info, const AcReal3Param param)
{ {
return info.real3_params[param]; return info.real3_params[param];
} }

View File

@@ -22,7 +22,7 @@ ALTER_CONF=${ALTER_CONF_DEFAULT}
while [ "$#" -gt 0 ] while [ "$#" -gt 0 ]
do do
case $1 in case $1 in
-h|--help) -h|--help)
echo "You can set up a build directory separe of the ASTAROTH_HOME" echo "You can set up a build directory separe of the ASTAROTH_HOME"
echo "Available flags:" echo "Available flags:"
@@ -68,14 +68,13 @@ mkdir -p ${BUILD_DIR}
cd ${BUILD_DIR} cd ${BUILD_DIR}
#Set up the astaroth.conf to be define and customized in the build directory to #Set up the astaroth.conf to be define and customized in the build directory to
#not always alter the default use i.e. for unit test etc. #not always alter the default use i.e. for unit test etc.
#Assumed by default if you do this thing anyway. #Assumed by default if you do this thing anyway.
echo "cp ${AC_HOME}/config/astaroth.conf ${PWD}" echo "cp ${AC_HOME}/config/astaroth.conf ${PWD}"
cp ${AC_HOME}/config/astaroth.conf . cp ${AC_HOME}/config/astaroth.conf .
CONF_DIR="-D ASTAROTH_CONF_PATH=${PWD}" CONF_DIR="-D ASTAROTH_CONF_PATH=${PWD}"
#cmake -D CMAKE_C_COMPILER=icc -D CMAKE_CXX_COMPILER=icpc -DDOUBLE_PRECISION=OFF -DBUILD_DEBUG=OFF ${AC_HOME} #cmake -D CMAKE_C_COMPILER=icc -D CMAKE_CXX_COMPILER=icpc -DDOUBLE_PRECISION=OFF -DBUILD_DEBUG=OFF ${AC_HOME}
echo "cmake ${TIARA_SETUP} ${CONF_DIR} -DDOUBLE_PRECISION=${DOUBLE} -DBUILD_DEBUG=${DEBUG_MODE} -DALTER_CONF=${ALTER_CONF} ${AC_HOME}" echo "cmake ${TIARA_SETUP} ${CONF_DIR} -DDOUBLE_PRECISION=${DOUBLE} -DBUILD_DEBUG=${DEBUG_MODE} -DALTER_CONF=${ALTER_CONF} ${AC_HOME}"

View File

@@ -2,58 +2,36 @@
## CMakeLists.txt for Astaroth Core ## ## CMakeLists.txt for Astaroth Core ##
######################################## ########################################
#----------------------Find CUDA-----------------------------------------------# ## Find packages
find_package(CUDA 9 REQUIRED) find_package(CUDA 9 REQUIRED)
#----------------------CUDA settings-------------------------------------------# ## Architecture and optimization flags
set(CUDA_SEPARABLE_COMPILATION OFF)
set(CUDA_PROPAGATE_HOST_FLAGS ON)
#----------------------Setup CUDA compilation flags----------------------------#
# Generate code for the default architecture (Pascal)
set(CUDA_ARCH_FLAGS -gencode arch=compute_37,code=sm_37 set(CUDA_ARCH_FLAGS -gencode arch=compute_37,code=sm_37
-gencode arch=compute_50,code=sm_50 -gencode arch=compute_50,code=sm_50
-gencode arch=compute_60,code=sm_60 -gencode arch=compute_60,code=sm_60
-gencode arch=compute_61,code=sm_61 -gencode arch=compute_61,code=sm_61
-lineinfo -lineinfo
-ftz=true -ftz=true # Flush denormalized floats to zero
-std=c++11) #--maxrregcount=255 -ftz=true #ftz = flush denormalized floats to zero -std=c++11)
# -Xptxas -dlcm=ca opt-in to cache all global loads to L1/texture cache #--maxrregcount=255
# =cg to opt out # -Xptxas -dlcm=ca opt-in to cache all global loads to L1/texture cache
# =cg to opt out
# Additional CUDA optimization flags
if (CMAKE_BUILD_TYPE MATCHES RELEASE)
# Doesn't set any additional flags, see CUDA_NVCC_FLAGS_DEBUG below on how
# to add more
set(CUDA_NVCC_FLAGS_RELEASE ${CUDA_NVCC_FLAGS_RELEASE})
endif()
# Additional CUDA debug flags
if (CMAKE_BUILD_TYPE MATCHES DEBUG)
# The debug flags must be set inside this if clause, since either CMake 3.5
# or nvcc 7.5 is bugged:
# CMake converts these into empty strings when doing RELEASE build, but nvcc
# 7.5 fails to parse empty flags.
set(CUDA_NVCC_FLAGS_DEBUG ${CUDA_NVCC_FLAGS_DEBUG};
--device-debug;
--generate-line-info;
--ptxas-options=-v)
endif()
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};${CUDA_ARCH_FLAGS}")
message("CUDA_NVCC_FLAGS: " ${CUDA_NVCC_FLAGS}) set(CUDA_WARNING_FLAGS --compiler-options -Wall,-Wextra,-Werror,-Wdouble-promotion,-Wfloat-conversion) # -Wshadow
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ${CUDA_ARCH_FLAGS} ${CUDA_WARNING_FLAGS})
set(CUDA_NVCC_FLAGS_RELEASE)
set(CUDA_NVCC_FLAGS_DEBUG --device-debug --generate-line-info --ptxas-options=-v)
#------------------Compile and create a static library-------------------------# ## Definitions
file(GLOB CUDA_SOURCES "*.cu" "kernels/*.cu") if (MULTIGPU_ENABLED)
add_definitions(-DAC_MULTIGPU_ENABLED=1)
else ()
add_definitions(-DAC_MULTIGPU_ENABLED=0)
endif ()
# Use -fPIC if -fpic not supported. Some quick non-scientific tests: ## Create and link the library
# Without fpic: 4.94 user, 4.04 system, 0:09.88 elapsed include_directories(.)
# With fpic: 4.96 user, 4.02 system, 0:09.90 elapsed cuda_add_library(astaroth_core STATIC astaroth.cu device.cu)
# With fPIC: 4.94 user, 4.05 system, 0:10.23 elapsed target_link_libraries(astaroth_core m)
CUDA_ADD_LIBRARY(astaroth_core STATIC ${CUDA_SOURCES} OPTIONS --compiler-options "-fpic")

View File

@@ -127,8 +127,8 @@
#include "errchk.h" #include "errchk.h"
#include "device.cuh" #include "device.cuh"
#include "math_utils.h" // sum for reductions #include "math_utils.h" // sum for reductions
#include "standalone/config_loader.h" // update_config // #include "standalone/config_loader.h" // update_config
#define AC_GEN_STR(X) #X #define AC_GEN_STR(X) #X
const char* intparam_names[] = {AC_FOR_BUILTIN_INT_PARAM_TYPES(AC_GEN_STR) // const char* intparam_names[] = {AC_FOR_BUILTIN_INT_PARAM_TYPES(AC_GEN_STR) //
@@ -156,7 +156,7 @@ gridIdx(const Grid grid, const int3 idx)
} }
static int3 static int3
gridIdx3d(const Grid& grid, const int idx) gridIdx3d(const Grid grid, const int idx)
{ {
return (int3){idx % grid.m.x, (idx % (grid.m.x * grid.m.y)) / grid.m.x, return (int3){idx % grid.m.x, (idx % (grid.m.x * grid.m.y)) / grid.m.x,
idx / (grid.m.x * grid.m.y)}; idx / (grid.m.x * grid.m.y)};
@@ -168,8 +168,49 @@ printInt3(const int3 vec)
printf("(%d, %d, %d)", vec.x, vec.y, vec.z); printf("(%d, %d, %d)", vec.x, vec.y, vec.z);
} }
static inline void
print(const AcMeshInfo config)
{
for (int i = 0; i < NUM_INT_PARAMS; ++i)
printf("[%s]: %d\n", intparam_names[i], config.int_params[i]);
for (int i = 0; i < NUM_REAL_PARAMS; ++i)
printf("[%s]: %g\n", realparam_names[i], double(config.real_params[i]));
}
static void
update_builtin_params(AcMeshInfo* config)
{
config->int_params[AC_mx] = config->int_params[AC_nx] + STENCIL_ORDER;
///////////// PAD TEST
// config->int_params[AC_mx] = config->int_params[AC_nx] + STENCIL_ORDER + PAD_SIZE;
///////////// PAD TEST
config->int_params[AC_my] = config->int_params[AC_ny] + STENCIL_ORDER;
config->int_params[AC_mz] = config->int_params[AC_nz] + STENCIL_ORDER;
// Bounds for the computational domain, i.e. nx_min <= i < nx_max
config->int_params[AC_nx_min] = NGHOST;
config->int_params[AC_nx_max] = config->int_params[AC_nx_min] + config->int_params[AC_nx];
config->int_params[AC_ny_min] = NGHOST;
config->int_params[AC_ny_max] = config->int_params[AC_ny] + NGHOST;
config->int_params[AC_nz_min] = NGHOST;
config->int_params[AC_nz_max] = config->int_params[AC_nz] + NGHOST;
/* Additional helper params */
// Int helpers
config->int_params[AC_mxy] = config->int_params[AC_mx] * config->int_params[AC_my];
config->int_params[AC_nxy] = config->int_params[AC_nx] * config->int_params[AC_ny];
config->int_params[AC_nxyz] = config->int_params[AC_nxy] * config->int_params[AC_nz];
#if VERBOSE_PRINTING // Defined in astaroth.h
printf("###############################################################\n");
printf("Config dimensions recalculated:\n");
print(*config);
printf("###############################################################\n");
#endif
}
static Grid static Grid
createGrid(const AcMeshInfo& config) createGrid(const AcMeshInfo config)
{ {
Grid grid; Grid grid;
@@ -246,7 +287,7 @@ acSynchronizeMesh(void)
} }
AcResult AcResult
acInit(const AcMeshInfo& config) acInit(const AcMeshInfo config)
{ {
// Get num_devices // Get num_devices
ERRCHK_CUDA_ALWAYS(cudaGetDeviceCount(&num_devices)); ERRCHK_CUDA_ALWAYS(cudaGetDeviceCount(&num_devices));
@@ -274,7 +315,7 @@ acInit(const AcMeshInfo& config)
// Subgrids // Subgrids
AcMeshInfo subgrid_config = config; AcMeshInfo subgrid_config = config;
subgrid_config.int_params[AC_nz] /= num_devices; subgrid_config.int_params[AC_nz] /= num_devices;
update_config(&subgrid_config); update_builtin_params(&subgrid_config);
subgrid = createGrid(subgrid_config); subgrid = createGrid(subgrid_config);
// Periodic boundary conditions become weird if the system can "fold unto itself". // Periodic boundary conditions become weird if the system can "fold unto itself".
@@ -337,8 +378,8 @@ acQuit(void)
} }
AcResult AcResult
acIntegrateStepWithOffsetAsync(const int& isubstep, const AcReal& dt, const int3& start, acIntegrateStepWithOffsetAsync(const int isubstep, const AcReal dt, const int3 start,
const int3& end, const StreamType stream) const int3 end, const StreamType stream)
{ {
// See the beginning of the file for an explanation of the index mapping // See the beginning of the file for an explanation of the index mapping
// #pragma omp parallel for // #pragma omp parallel for
@@ -360,13 +401,13 @@ acIntegrateStepWithOffsetAsync(const int& isubstep, const AcReal& dt, const int3
} }
AcResult AcResult
acIntegrateStepWithOffset(const int& isubstep, const AcReal& dt, const int3& start, const int3& end) acIntegrateStepWithOffset(const int isubstep, const AcReal dt, const int3 start, const int3 end)
{ {
return acIntegrateStepWithOffsetAsync(isubstep, dt, start, end, STREAM_DEFAULT); return acIntegrateStepWithOffsetAsync(isubstep, dt, start, end, STREAM_DEFAULT);
} }
AcResult AcResult
acIntegrateStepAsync(const int& isubstep, const AcReal& dt, const StreamType stream) acIntegrateStepAsync(const int isubstep, const AcReal dt, const StreamType stream)
{ {
const int3 start = (int3){NGHOST, NGHOST, NGHOST}; const int3 start = (int3){NGHOST, NGHOST, NGHOST};
const int3 end = start + grid.n; const int3 end = start + grid.n;
@@ -374,7 +415,7 @@ acIntegrateStepAsync(const int& isubstep, const AcReal& dt, const StreamType str
} }
AcResult AcResult
acIntegrateStep(const int& isubstep, const AcReal& dt) acIntegrateStep(const int isubstep, const AcReal dt)
{ {
return acIntegrateStepAsync(isubstep, dt, STREAM_DEFAULT); return acIntegrateStepAsync(isubstep, dt, STREAM_DEFAULT);
} }
@@ -452,7 +493,7 @@ swap_buffers(void)
} }
AcResult AcResult
acIntegrate(const AcReal& dt) acIntegrate(const AcReal dt)
{ {
acSynchronizeStream(STREAM_ALL); acSynchronizeStream(STREAM_ALL);
for (int isubstep = 0; isubstep < 3; ++isubstep) { for (int isubstep = 0; isubstep < 3; ++isubstep) {
@@ -464,7 +505,7 @@ acIntegrate(const AcReal& dt)
} }
static AcReal static AcReal
simple_final_reduce_scal(const ReductionType& rtype, const AcReal* results, const int& n) simple_final_reduce_scal(const ReductionType rtype, const AcReal* results, const int n)
{ {
AcReal res = results[0]; AcReal res = results[0];
for (int i = 1; i < n; ++i) { for (int i = 1; i < n; ++i) {
@@ -490,7 +531,7 @@ simple_final_reduce_scal(const ReductionType& rtype, const AcReal* results, cons
} }
AcReal AcReal
acReduceScal(const ReductionType& rtype, const VertexBufferHandle& vtxbuffer_handle) acReduceScal(const ReductionType rtype, const VertexBufferHandle vtxbuffer_handle)
{ {
acSynchronizeStream(STREAM_ALL); acSynchronizeStream(STREAM_ALL);
@@ -504,8 +545,8 @@ acReduceScal(const ReductionType& rtype, const VertexBufferHandle& vtxbuffer_han
} }
AcReal AcReal
acReduceVec(const ReductionType& rtype, const VertexBufferHandle& a, const VertexBufferHandle& b, acReduceVec(const ReductionType rtype, const VertexBufferHandle a, const VertexBufferHandle b,
const VertexBufferHandle& c) const VertexBufferHandle c)
{ {
acSynchronizeStream(STREAM_ALL); acSynchronizeStream(STREAM_ALL);
@@ -519,7 +560,7 @@ acReduceVec(const ReductionType& rtype, const VertexBufferHandle& a, const Verte
} }
AcResult AcResult
acLoadWithOffsetAsync(const AcMesh& host_mesh, const int3& src, const int num_vertices, acLoadWithOffsetAsync(const AcMesh host_mesh, const int3 src, const int num_vertices,
const StreamType stream) const StreamType stream)
{ {
// See the beginning of the file for an explanation of the index mapping // See the beginning of the file for an explanation of the index mapping
@@ -557,13 +598,13 @@ acLoadWithOffsetAsync(const AcMesh& host_mesh, const int3& src, const int num_ve
} }
AcResult AcResult
acLoadWithOffset(const AcMesh& host_mesh, const int3& src, const int num_vertices) acLoadWithOffset(const AcMesh host_mesh, const int3 src, const int num_vertices)
{ {
return acLoadWithOffsetAsync(host_mesh, src, num_vertices, STREAM_DEFAULT); return acLoadWithOffsetAsync(host_mesh, src, num_vertices, STREAM_DEFAULT);
} }
AcResult AcResult
acLoad(const AcMesh& host_mesh) acLoad(const AcMesh host_mesh)
{ {
acLoadWithOffset(host_mesh, (int3){0, 0, 0}, acVertexBufferSize(host_mesh.info)); acLoadWithOffset(host_mesh, (int3){0, 0, 0}, acVertexBufferSize(host_mesh.info));
acSynchronizeStream(STREAM_ALL); acSynchronizeStream(STREAM_ALL);
@@ -571,7 +612,7 @@ acLoad(const AcMesh& host_mesh)
} }
AcResult AcResult
acStoreWithOffsetAsync(const int3& src, const int num_vertices, AcMesh* host_mesh, acStoreWithOffsetAsync(const int3 src, const int num_vertices, AcMesh* host_mesh,
const StreamType stream) const StreamType stream)
{ {
// See the beginning of the file for an explanation of the index mapping // See the beginning of the file for an explanation of the index mapping
@@ -596,7 +637,7 @@ acStoreWithOffsetAsync(const int3& src, const int num_vertices, AcMesh* host_mes
} }
AcResult AcResult
acStoreWithOffset(const int3& src, const int num_vertices, AcMesh* host_mesh) acStoreWithOffset(const int3 src, const int num_vertices, AcMesh* host_mesh)
{ {
return acStoreWithOffsetAsync(src, num_vertices, host_mesh, STREAM_DEFAULT); return acStoreWithOffsetAsync(src, num_vertices, host_mesh, STREAM_DEFAULT);
} }
@@ -624,3 +665,9 @@ acLoadDeviceConstant(const AcRealParam param, const AcReal value)
{ {
return acLoadDeviceConstantAsync(param, value, STREAM_DEFAULT); return acLoadDeviceConstantAsync(param, value, STREAM_DEFAULT);
} }
/*
* =============================================================================
* Revised interface
* =============================================================================
*/

View File

@@ -516,3 +516,9 @@ autoOptimize(const Device device)
#if PACKED_DATA_TRANSFERS #if PACKED_DATA_TRANSFERS
// Functions for calling packed data transfers // Functions for calling packed data transfers
#endif #endif
/*
* =============================================================================
* Revised interface
* =============================================================================
*/

View File

@@ -99,3 +99,9 @@ AcResult autoOptimize(const Device device);
#if PACKED_DATA_TRANSFERS #if PACKED_DATA_TRANSFERS
// Declarations used for packed data transfers // Declarations used for packed data transfers
#endif #endif
/*
* =============================================================================
* Revised interface
* =============================================================================
*/

9
src/ctest/CMakeLists.txt Normal file
View File

@@ -0,0 +1,9 @@
##############################################
## CMakeLists.txt for the C API test ##
##############################################
set(CMAKE_C_STANDARD 11)
set(CMAKE_C_STANDARD_REQUIRED ON)
add_executable(ctest main.c)
target_link_libraries(ctest PRIVATE astaroth_core)

1
src/ctest/README.txt Normal file
View File

@@ -0,0 +1 @@
This directory is used to test whether the Astaroth API is compatible with C.

36
src/ctest/main.c Normal file
View File

@@ -0,0 +1,36 @@
/*
Copyright (C) 2014-2019, Johannes Pekkilae, Miikka Vaeisalae.
This file is part of Astaroth.
Astaroth is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Astaroth is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
*/
#include <stdio.h>
#include <stdlib.h>
#include "astaroth.h"
int
main(void)
{
AcMeshInfo info = {
.int_params[AC_nx] = 128,
.int_params[AC_ny] = 64,
.int_params[AC_nz] = 32,
};
acInit(info);
acIntegrate(0.1f);
acQuit();
return EXIT_SUCCESS;
}

View File

@@ -0,0 +1,12 @@
##############################################
## CMakeLists.txt for the MPI test ##
##############################################
set(CMAKE_C_STANDARD 11)
set(CMAKE_C_STANDARD_REQUIRED ON)
find_package(MPI REQUIRED)
add_executable(mpitest main.c)
target_include_directories(mpitest PRIVATE ${MPI_C_INCLUDE_PATH})
target_link_libraries(mpitest PRIVATE ${MPI_C_LIBRARIES} astaroth_core)

1
src/mpitest/README.txt Normal file
View File

@@ -0,0 +1 @@
This directory is used to test MPI with Astaroth.

51
src/mpitest/main.c Normal file
View File

@@ -0,0 +1,51 @@
/*
Copyright (C) 2014-2019, Johannes Pekkilae, Miikka Vaeisalae.
This file is part of Astaroth.
Astaroth is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Astaroth is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
*/
#include <stdio.h>
#include <stdlib.h>
#include "astaroth.h"
#include <mpi.h>
int
main(void)
{
MPI_Init(NULL, NULL);
int num_processes, process_id;
MPI_Comm_size(MPI_COMM_WORLD, &num_processes);
MPI_Comm_rank(MPI_COMM_WORLD, &process_id);
char processor_name[MPI_MAX_PROCESSOR_NAME];
int name_len;
MPI_Get_processor_name(processor_name, &name_len);
printf("Processor %s. Process %d of %d.\n", processor_name, process_id, num_processes);
AcMeshInfo info = {
.int_params[AC_nx] = 128,
.int_params[AC_ny] = 64,
.int_params[AC_nz] = 32,
};
acInit(info);
acIntegrate(0.1f);
acQuit();
MPI_Finalize();
return EXIT_SUCCESS;
}

View File

@@ -1,10 +1,39 @@
################################ ##############################################
## CMakeLists.txt for utils ## ## CMakeLists.txt for Astaroth Standalone ##
################################ ##############################################
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
## Files
file (GLOB SOURCES "*.cc" "model/*.cc") file (GLOB SOURCES "*.cc" "model/*.cc")
add_library(astaroth_standalone STATIC ${SOURCES}) ## Find packages
target_include_directories(astaroth_standalone PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) find_package(OpenMP REQUIRED)
#target_compile_definitions(astaroth_standalone PRIVATE CONFIG_PATH=\"${CMAKE_SOURCE_DIR}/config/\") if (BUILD_RT_VISUALIZATION)
target_compile_definitions(astaroth_standalone PRIVATE CONFIG_PATH=\"${ASTAROTH_CONF_PATH}\") add_definitions(-DAC_BUILD_RT_VISUALIZATION=1)
# SDL 2
set(SDL2_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/3rdparty/SDL2/include/)
set(SDL2_LIBRARY_DIR ${CMAKE_SOURCE_DIR}/3rdparty/SDL2/build/)
set(SDL2_LIBRARY "SDL2")
include_directories(${SDL2_INCLUDE_DIR})
link_directories(${SDL2_LIBRARY_DIR})
endif ()
## Compilation flags
add_compile_options(-pipe ${OpenMP_CXX_FLAGS})
add_compile_options(-Wall -Wextra -Werror -Wdouble-promotion -Wfloat-conversion)# -Wshadow)
## Compile and link
add_library(astaroth_standalone ${SOURCES})
add_executable(ac_run main.cc)
target_link_libraries(ac_run PRIVATE astaroth_standalone astaroth_core "${OpenMP_CXX_FLAGS}" ${SDL2_LIBRARY})
# Define the config directory
if (ALTER_CONF)
# ASTAROTH_CONF_PATH supplied by ac_mkbuilddir.sh
target_compile_definitions(astaroth_standalone PRIVATE CONFIG_PATH="${ASTAROTH_CONF_PATH}/")
else()
target_compile_definitions(astaroth_standalone PRIVATE CONFIG_PATH="${CMAKE_SOURCE_DIR}/config/")
endif()

View File

@@ -29,7 +29,7 @@
#include <stdio.h> #include <stdio.h>
#include "config_loader.h" #include "config_loader.h"
#include "core/math_utils.h" #include "src/core/math_utils.h"
#include "model/host_forcing.h" #include "model/host_forcing.h"
#include "model/host_memory.h" #include "model/host_memory.h"
#include "model/host_timestep.h" #include "model/host_timestep.h"
@@ -37,7 +37,7 @@
#include "model/model_reduce.h" #include "model/model_reduce.h"
#include "model/model_rk3.h" #include "model/model_rk3.h"
#include "core/errchk.h" #include "src/core/errchk.h"
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))

View File

@@ -211,7 +211,7 @@ run_benchmark(void)
#if AUTO_OPTIMIZE #if AUTO_OPTIMIZE
const char* benchmark_path = "benchmark.out"; const char* benchmark_path = "benchmark.out";
#include "core/kernels/rk3_threadblock.conf" #include "src/core/kernels/rk3_threadblock.conf"
static int static int
write_result_to_file(const float& ms_per_step) write_result_to_file(const float& ms_per_step)
{ {

View File

@@ -31,8 +31,8 @@
#include <stdio.h> // print #include <stdio.h> // print
#include <string.h> // memset #include <string.h> // memset
#include "core/errchk.h" #include "src/core/errchk.h"
#include "core/math_utils.h" #include "src/core/math_utils.h"
static inline void static inline void
print(const AcMeshInfo& config) print(const AcMeshInfo& config)

View File

@@ -28,8 +28,8 @@
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include "core/errchk.h"
#include "run.h" #include "run.h"
#include "src/core/errchk.h"
// Write all errors from stderr to an <errorlog_name> in the current working // Write all errors from stderr to an <errorlog_name> in the current working
// directory // directory

View File

@@ -26,7 +26,7 @@
*/ */
#include "host_forcing.h" #include "host_forcing.h"
#include "core/math_utils.h" #include "src/core/math_utils.h"
// The is a wrapper for genering random numbers with a chosen system. // The is a wrapper for genering random numbers with a chosen system.
AcReal AcReal

View File

@@ -28,7 +28,7 @@
#include <math.h> #include <math.h>
#include "core/errchk.h" #include "src/core/errchk.h"
#define AC_GEN_STR(X) #X #define AC_GEN_STR(X) #X
const char* init_type_names[] = {AC_FOR_INIT_TYPES(AC_GEN_STR)}; const char* init_type_names[] = {AC_FOR_INIT_TYPES(AC_GEN_STR)};

View File

@@ -26,32 +26,35 @@
*/ */
#include "host_timestep.h" #include "host_timestep.h"
#include "core/math_utils.h" #include "src/core/math_utils.h"
static AcReal timescale = AcReal(1.0); static AcReal timescale = AcReal(1.0);
AcReal AcReal
host_timestep(const AcReal& umax, const AcMeshInfo& mesh_info) host_timestep(const AcReal& umax, const AcMeshInfo& mesh_info)
{ {
const long double cdt = mesh_info.real_params[AC_cdt]; const long double cdt = mesh_info.real_params[AC_cdt];
const long double cdtv = mesh_info.real_params[AC_cdtv]; const long double cdtv = mesh_info.real_params[AC_cdtv];
// const long double cdts = mesh_info.real_params[AC_cdts]; // const long double cdts = mesh_info.real_params[AC_cdts];
const long double cs2_sound = mesh_info.real_params[AC_cs2_sound]; const long double cs2_sound = mesh_info.real_params[AC_cs2_sound];
const long double nu_visc = mesh_info.real_params[AC_nu_visc]; const long double nu_visc = mesh_info.real_params[AC_nu_visc];
const long double eta = mesh_info.real_params[AC_eta]; const long double eta = mesh_info.real_params[AC_eta];
const long double chi = 0; // mesh_info.real_params[AC_chi]; // TODO not calculated const long double chi = 0; // mesh_info.real_params[AC_chi]; // TODO not calculated
const long double gamma = mesh_info.real_params[AC_gamma]; const long double gamma = mesh_info.real_params[AC_gamma];
const long double dsmin = mesh_info.real_params[AC_dsmin]; const long double dsmin = mesh_info.real_params[AC_dsmin];
// Old ones from legacy Astaroth // Old ones from legacy Astaroth
//const long double uu_dt = cdt * (dsmin / (umax + cs_sound)); // const long double uu_dt = cdt * (dsmin / (umax + cs_sound));
//const long double visc_dt = cdtv * dsmin * dsmin / nu_visc; // const long double visc_dt = cdtv * dsmin * dsmin / nu_visc;
// New, closer to the actual Courant timestep // New, closer to the actual Courant timestep
// See Pencil Code user manual p. 38 (timestep section) // See Pencil Code user manual p. 38 (timestep section)
const long double uu_dt = cdt * dsmin / (fabsl(umax) + sqrtl(cs2_sound + 0.0l)); const long double uu_dt = cdt * dsmin / (fabsl(umax) + sqrtl(cs2_sound + 0.0l));
const long double visc_dt = cdtv * dsmin * dsmin / max(max(nu_visc, eta), max(gamma, chi));// + 1; // TODO NOTE: comment the +1 out to get scientifically accurate results const long double visc_dt = cdtv * dsmin * dsmin /
//MV: White the +1? It was messing up my computations! max(max(nu_visc, eta),
max(gamma, chi)); // + 1; // TODO NOTE: comment the +1 out to
// get scientifically accurate results
// MV: White the +1? It was messing up my computations!
const long double dt = min(uu_dt, visc_dt); const long double dt = min(uu_dt, visc_dt);
return AcReal(timescale) * AcReal(dt); return AcReal(timescale) * AcReal(dt);

View File

@@ -26,73 +26,68 @@
*/ */
#include "model_boundconds.h" #include "model_boundconds.h"
#include "core/errchk.h" #include "src/core/errchk.h"
void void
boundconds(const AcMeshInfo& mesh_info, ModelMesh* mesh) boundconds(const AcMeshInfo& mesh_info, ModelMesh* mesh)
{ {
#pragma omp parallel for #pragma omp parallel for
for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w) { for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w) {
const int3 start = (int3){0, 0, 0}; const int3 start = (int3){0, 0, 0};
const int3 end = (int3){ const int3 end = (int3){mesh_info.int_params[AC_mx], mesh_info.int_params[AC_my],
mesh_info.int_params[AC_mx], mesh_info.int_params[AC_mz]};
mesh_info.int_params[AC_my],
mesh_info.int_params[AC_mz]
};
const int nx = mesh_info.int_params[AC_nx]; const int nx = mesh_info.int_params[AC_nx];
const int ny = mesh_info.int_params[AC_ny]; const int ny = mesh_info.int_params[AC_ny];
const int nz = mesh_info.int_params[AC_nz]; const int nz = mesh_info.int_params[AC_nz];
const int nx_min = mesh_info.int_params[AC_nx_min]; const int nx_min = mesh_info.int_params[AC_nx_min];
const int ny_min = mesh_info.int_params[AC_ny_min]; const int ny_min = mesh_info.int_params[AC_ny_min];
const int nz_min = mesh_info.int_params[AC_nz_min]; const int nz_min = mesh_info.int_params[AC_nz_min];
// The old kxt was inclusive, but our mx_max is exclusive // The old kxt was inclusive, but our mx_max is exclusive
const int nx_max = mesh_info.int_params[AC_nx_max]; const int nx_max = mesh_info.int_params[AC_nx_max];
const int ny_max = mesh_info.int_params[AC_ny_max]; const int ny_max = mesh_info.int_params[AC_ny_max];
const int nz_max = mesh_info.int_params[AC_nz_max]; const int nz_max = mesh_info.int_params[AC_nz_max];
for (int k_dst = start.z; k_dst < end.z; ++k_dst) { for (int k_dst = start.z; k_dst < end.z; ++k_dst) {
for (int j_dst = start.y; j_dst < end.y; ++j_dst) { for (int j_dst = start.y; j_dst < end.y; ++j_dst) {
for (int i_dst = start.x; i_dst < end.x; ++i_dst) { for (int i_dst = start.x; i_dst < end.x; ++i_dst) {
// If destination index is inside the computational domain, return since // If destination index is inside the computational domain, return since
// the boundary conditions are only applied to the ghost zones // the boundary conditions are only applied to the ghost zones
if (i_dst >= nx_min && i_dst < nx_max && if (i_dst >= nx_min && i_dst < nx_max && j_dst >= ny_min && j_dst < ny_max &&
j_dst >= ny_min && j_dst < ny_max && k_dst >= nz_min && k_dst < nz_max)
k_dst >= nz_min && k_dst < nz_max) continue;
continue;
// Find the source index // Find the source index
// Map to nx, ny, nz coordinates // Map to nx, ny, nz coordinates
int i_src = i_dst - nx_min; int i_src = i_dst - nx_min;
int j_src = j_dst - ny_min; int j_src = j_dst - ny_min;
int k_src = k_dst - nz_min; int k_src = k_dst - nz_min;
// Translate (s.t. the index is always positive) // Translate (s.t. the index is always positive)
i_src += nx; i_src += nx;
j_src += ny; j_src += ny;
k_src += nz; k_src += nz;
// Wrap // Wrap
i_src %= nx; i_src %= nx;
j_src %= ny; j_src %= ny;
k_src %= nz; k_src %= nz;
// Map to mx, my, mz coordinates // Map to mx, my, mz coordinates
i_src += nx_min; i_src += nx_min;
j_src += ny_min; j_src += ny_min;
k_src += nz_min; k_src += nz_min;
const size_t src_idx = acVertexBufferIdx(i_src, j_src, k_src, mesh_info); const size_t src_idx = acVertexBufferIdx(i_src, j_src, k_src, mesh_info);
const size_t dst_idx = acVertexBufferIdx(i_dst, j_dst, k_dst, mesh_info); const size_t dst_idx = acVertexBufferIdx(i_dst, j_dst, k_dst, mesh_info);
ERRCHK(src_idx < acVertexBufferSize(mesh_info)); ERRCHK(src_idx < acVertexBufferSize(mesh_info));
ERRCHK(dst_idx < acVertexBufferSize(mesh_info)); ERRCHK(dst_idx < acVertexBufferSize(mesh_info));
mesh->vertex_buffer[w][dst_idx] = mesh->vertex_buffer[w][src_idx]; mesh->vertex_buffer[w][dst_idx] = mesh->vertex_buffer[w][src_idx];
} }
} }
} }
} }
} }

View File

@@ -25,7 +25,7 @@
* *
*/ */
#pragma once #pragma once
#include "core/errchk.h" #include "src/core/errchk.h"
typedef long double MODEL_REAL; typedef long double MODEL_REAL;

View File

@@ -28,7 +28,7 @@
#include <math.h> #include <math.h>
#include "core/errchk.h" #include "src/core/errchk.h"
// Function pointer definitions // Function pointer definitions
typedef ModelScalar (*ReduceFunc)(const ModelScalar&, const ModelScalar&); typedef ModelScalar (*ReduceFunc)(const ModelScalar&, const ModelScalar&);

View File

@@ -32,8 +32,8 @@
#include <string.h> // memcpy #include <string.h> // memcpy
#include "config_loader.h" #include "config_loader.h"
#include "core/errchk.h" #include "src/core/errchk.h"
#include "core/math_utils.h" #include "src/core/math_utils.h"
#include "model/host_forcing.h" #include "model/host_forcing.h"
#include "model/host_memory.h" #include "model/host_memory.h"
#include "model/host_timestep.h" #include "model/host_timestep.h"
@@ -430,7 +430,7 @@ run_renderer(void)
return 0; return 0;
} }
#else // BUILD_RT_VISUALIZATION == 0 #else // BUILD_RT_VISUALIZATION == 0
#include "core/errchk.h" #include "src/core/errchk.h"
int int
run_renderer(void) run_renderer(void)
{ {

View File

@@ -27,8 +27,8 @@
#include "run.h" #include "run.h"
#include "config_loader.h" #include "config_loader.h"
#include "core/errchk.h" #include "src/core/errchk.h"
#include "core/math_utils.h" #include "src/core/math_utils.h"
#include "model/host_forcing.h" #include "model/host_forcing.h"
#include "model/host_memory.h" #include "model/host_memory.h"
#include "model/host_timestep.h" #include "model/host_timestep.h"