Merge branch 'master' into bugfix/upwind_autotest_20190807
This commit is contained in:
200
CMakeLists.txt
200
CMakeLists.txt
@@ -1,39 +1,38 @@
|
|||||||
|
###################################
|
||||||
|
## CMakeLists.txt for Astaroth ##
|
||||||
|
###################################
|
||||||
#
|
#
|
||||||
# CMakeLists.txt for generating the makefile for Astaroth.
|
# Usage: mkdir build && cd build && cmake <options> .. && make
|
||||||
# Usage: mkdir build && cd build && cmake <optional flags> ..
|
|
||||||
#
|
#
|
||||||
# For example: cmake -DDOUBLE_PRECISION=ON ..
|
# If you want to see the exact flags used during compilation, compile with
|
||||||
|
# "make VERBOSE=1"
|
||||||
#
|
#
|
||||||
# If you want to see the exact flags used during compilation, run
|
# Print all options: cmake -LAH ..
|
||||||
# "make -j VERBOSE=1"
|
|
||||||
#
|
#
|
||||||
# Make sure your machine satisfies the system requirements:
|
|
||||||
# https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#system-requirements
|
|
||||||
|
|
||||||
#-------------------General---------------------------------------------------#
|
## CMake settings
|
||||||
|
|
||||||
project(ASTAROTH_2.0 CXX)
|
|
||||||
set (CMAKE_CXX_STANDARD 11)
|
|
||||||
cmake_minimum_required (VERSION 3.5.1) # Need >= 3.8 for first-class CUDA support
|
cmake_minimum_required (VERSION 3.5.1) # Need >= 3.8 for first-class CUDA support
|
||||||
cmake_policy (SET CMP0023 NEW)
|
find_program(CMAKE_C_COMPILER NAMES $ENV{CC} gcc PATHS ENV PATH NO_DEFAULT_PATH)
|
||||||
|
find_program(CMAKE_CXX_COMPILER NAMES $ENV{CXX} g++ PATHS ENV PATH NO_DEFAULT_PATH)
|
||||||
|
|
||||||
|
## Project settings
|
||||||
|
project(astaroth C CXX)
|
||||||
|
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR})
|
||||||
|
|
||||||
#-------------------Set user options with default values---------------------#
|
## Options
|
||||||
|
option(BUILD_DEBUG "Builds the program with extensive error checking" OFF)
|
||||||
|
option(BUILD_STANDALONE "Builds the standalone Astaroth" ON)
|
||||||
|
option(BUILD_RT_VISUALIZATION "Builds the module for real-time visualization using SDL2" OFF)
|
||||||
|
option(BUILD_C_API_TEST "Builds a C program to test whether the API is conformant" OFF)
|
||||||
|
option(BUILD_MPI_TEST "Builds a C program to test whether MPI works" OFF)
|
||||||
|
option(DOUBLE_PRECISION "Generates double precision code" OFF)
|
||||||
|
option(MULTIGPU_ENABLED "If enabled, uses all the available GPUs" ON)
|
||||||
|
option(ALTER_CONF "If enabled, loads astaroth.conf from the build directory" OFF)
|
||||||
|
|
||||||
#Usage f.ex. cmake -DBUILD_DEBUG=ON ..
|
## Build types
|
||||||
option(BUILD_DEBUG "Builds the program with extensive error checking" OFF)
|
# Available types (case-sensitive):
|
||||||
option(BUILD_STANDALONE "Builds standalone Astaroth" ON)
|
# RELEASE (best performance)
|
||||||
option(DOUBLE_PRECISION "Generates double precision code" OFF)
|
# DEBUG (w/ debug information, non-concurrent kernels)
|
||||||
option(TIARA_CLUSTER "Special settings for compilation TIARA GPU cluster" OFF)
|
|
||||||
option(MULTIGPU_ENABLED "If enabled, uses all the available GPUs" ON)
|
|
||||||
option(ALTER_CONF "If enabled, loads astaroth.conf from the build directory" OFF)
|
|
||||||
option(BUILD_RT_VISUALIZATION "Builds the module for real-time visualization using SDL2" OFF)
|
|
||||||
|
|
||||||
#-------------------Determine build type--------------------------------------#
|
|
||||||
|
|
||||||
#Available types (case-sensitive):
|
|
||||||
#RELEASE (best performance)
|
|
||||||
#DEBUG (w/ debug information, non-concurrent kernels)
|
|
||||||
if (BUILD_DEBUG)
|
if (BUILD_DEBUG)
|
||||||
set(CMAKE_BUILD_TYPE DEBUG)
|
set(CMAKE_BUILD_TYPE DEBUG)
|
||||||
else ()
|
else ()
|
||||||
@@ -41,143 +40,28 @@ else ()
|
|||||||
endif()
|
endif()
|
||||||
message(STATUS "Build type: " ${CMAKE_BUILD_TYPE})
|
message(STATUS "Build type: " ${CMAKE_BUILD_TYPE})
|
||||||
|
|
||||||
|
## Defines
|
||||||
#----------------------Find packages------------------------------------------#
|
|
||||||
|
|
||||||
# C++ compiler info
|
|
||||||
message(STATUS "CMAKE_CXX_COMPILER: " ${CMAKE_CXX_COMPILER})
|
|
||||||
message(STATUS "CMAKE_CXX_COMPILER: " ${CMAKE_CXX_COMPILER_ID})
|
|
||||||
|
|
||||||
if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
|
|
||||||
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.9.1)
|
|
||||||
# GCC >= 6.0 is required because of bug 48891. However, the fix seems to
|
|
||||||
# be backported so some older compilers which is why the code may also
|
|
||||||
# compile on gcc >= 4.9.1.
|
|
||||||
message(FATAL_ERROR "GCC version 4.9.1 or higher required")
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if (BUILD_RT_VISUALIZATION)
|
|
||||||
add_definitions(-DAC_BUILD_RT_VISUALIZATION=1)
|
|
||||||
# SDL 2
|
|
||||||
set(SDL2_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/3rdparty/SDL2/include/)
|
|
||||||
set(SDL2_LIBRARY_DIR ${CMAKE_SOURCE_DIR}/3rdparty/SDL2/build/)
|
|
||||||
set(SDL2_LIBRARY "SDL2")
|
|
||||||
include_directories(${SDL2_INCLUDE_DIR})
|
|
||||||
link_directories(${SDL2_LIBRARY_DIR})
|
|
||||||
endif()
|
|
||||||
|
|
||||||
# CUDA
|
|
||||||
find_package(CUDA)
|
|
||||||
if (NOT CUDA_FOUND)
|
|
||||||
# find_package(CUDA REQUIRED) gives a confusing error message if it fails,
|
|
||||||
# therefore we print the reason here explicitly
|
|
||||||
message(FATAL_ERROR "CUDA not found")
|
|
||||||
endif()
|
|
||||||
include_directories(${CUDA_INCLUDE_DIRS})
|
|
||||||
|
|
||||||
# OpenMP
|
|
||||||
find_package(OpenMP)
|
|
||||||
if (NOT OPENMP_FOUND)
|
|
||||||
message(WARNING "OpenMP not found. All host-side concurrency disabled \
|
|
||||||
(lower performance).")
|
|
||||||
else ()
|
|
||||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
#----------------------Compilation settings-----------------------------------#
|
|
||||||
|
|
||||||
#Debug and verification
|
|
||||||
#set(CMAKE_VERBOSE_MAKEFILE OFF)
|
|
||||||
#set(CXX_VERBOSE_BUILD OFF)
|
|
||||||
#set(CUDA_VERBOSE_BUILD OFF)
|
|
||||||
#include(CTest)
|
|
||||||
#add_test(ac_test ac_run)
|
|
||||||
#find_program(MEMORYCHECK_COMMAND valgrind)
|
|
||||||
#set(MEMORYCHECK_COMMAND_OPTIONS "--trace-children=yes --leak-check=full" )
|
|
||||||
|
|
||||||
|
|
||||||
#----------------------Setup defines------------------------------------------#
|
|
||||||
|
|
||||||
if (DOUBLE_PRECISION)
|
if (DOUBLE_PRECISION)
|
||||||
add_definitions(-DAC_DOUBLE_PRECISION=1)
|
add_definitions(-DAC_DOUBLE_PRECISION=1)
|
||||||
else()
|
else ()
|
||||||
add_definitions(-DAC_DOUBLE_PRECISION=0)
|
add_definitions(-DAC_DOUBLE_PRECISION=0)
|
||||||
endif()
|
endif ()
|
||||||
|
|
||||||
# A full integration step is benchmarked by default, use this flag to override and
|
## Include directories
|
||||||
# benchmark RK3 only
|
|
||||||
if (GEN_BENCHMARK_RK3)
|
|
||||||
add_definitions(-DGEN_BENCHMARK_RK3=1)
|
|
||||||
else()
|
|
||||||
add_definitions(-DGEN_BENCHMARK_RK3=0)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if (MULTIGPU_ENABLED)
|
|
||||||
add_definitions(-DAC_MULTIGPU_ENABLED=1)
|
|
||||||
else()
|
|
||||||
add_definitions(-DAC_MULTIGPU_ENABLED=0)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
#-----------------------TIARA specific options--------------------------------#
|
|
||||||
#OLD#set (CXX_FLAGS_TIARA "-I/software/opt/cuda/9.0/include/")
|
|
||||||
# %JP: NOTE! This should not be needed anymore because the command
|
|
||||||
# find_package(CUDA) above should find and include this directory automatically
|
|
||||||
#USE THIS:
|
|
||||||
if (TIARA_CLUSTER)
|
|
||||||
set (CXX_FLAGS_TIARA "-mno-bmi2")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
#----------------------Setup CXX compilation flags----------------------------#
|
|
||||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}\
|
|
||||||
-O2 -march=native -pipe")
|
|
||||||
|
|
||||||
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}\
|
|
||||||
-O0 -g")
|
|
||||||
|
|
||||||
if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
|
|
||||||
set (CXX_FLAGS_WARNING "-Wall -Wextra -Werror -Wdouble-promotion -Wfloat-conversion") # TODO: -Wshadow -Wconversion
|
|
||||||
elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel")
|
|
||||||
#MV: -Werror-all disabled because produces cryptical messages preventing compilation.
|
|
||||||
#TODO: Would be good to find an optimal set of warning flags.
|
|
||||||
#set (CXX_FLAGS_WARNING "-Wall -Wextra -Werror-all -Wsign-conversion")
|
|
||||||
set (CXX_FLAGS_WARNING "-Wall -Wextra -Wsign-conversion")
|
|
||||||
else()
|
|
||||||
message(WARNING "Using an unknown compiler. Compilation warning flags were not set.")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}\
|
|
||||||
${CXX_FLAGS_WARNING}\
|
|
||||||
${CXX_FLAGS_ETC}\
|
|
||||||
${CXX_FLAGS_TIARA}") # %JP: CXX_FLAGS_TIARA should not be needed,
|
|
||||||
# see comments in "TIARA specific options"
|
|
||||||
|
|
||||||
message("CXX_FLAGS: " ${CMAKE_CXX_FLAGS})
|
|
||||||
|
|
||||||
|
|
||||||
#----------------------Setup core subdirectories------------------------------#
|
|
||||||
|
|
||||||
#Include root directory (.) so that the following modules can include their
|
|
||||||
#parent dir (f.ex. #include "common/stuff.h" instead of "../common/stuff")
|
|
||||||
include_directories(.)
|
include_directories(.)
|
||||||
include_directories(include)
|
include_directories(include)
|
||||||
include_directories(src)
|
|
||||||
|
|
||||||
# CUDA sources
|
## Subdirectories
|
||||||
add_subdirectory(src/core)
|
add_subdirectory(src/core) # The core library
|
||||||
|
|
||||||
#----------------------Link---------------------------------------------------#
|
|
||||||
|
|
||||||
if (BUILD_STANDALONE)
|
if (BUILD_STANDALONE)
|
||||||
#Define the config directory
|
add_subdirectory(src/standalone)
|
||||||
if (ALTER_CONF)
|
endif ()
|
||||||
set(ASTAROTH_CONF_PATH "${CMAKE_BINARY_DIR}/")
|
|
||||||
else()
|
|
||||||
set(ASTAROTH_CONF_PATH "${CMAKE_SOURCE_DIR}/config/")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
#Add additional subdirectories
|
if (BUILD_C_API_TEST)
|
||||||
add_subdirectory (src/standalone)
|
add_subdirectory(src/ctest)
|
||||||
cuda_add_executable(ac_run src/standalone/main.cc)
|
endif ()
|
||||||
target_link_libraries(ac_run astaroth_standalone astaroth_core ${SDL2_LIBRARY})
|
|
||||||
endif()
|
if (BUILD_MPI_TEST)
|
||||||
|
add_subdirectory(src/mpitest)
|
||||||
|
endif ()
|
||||||
|
@@ -222,13 +222,16 @@ helical_forcing(Scalar magnitude, Vector k_force, Vector xx, Vector ff_re, Vecto
|
|||||||
// JP: This looks wrong:
|
// JP: This looks wrong:
|
||||||
// 1) Should it be dsx * nx instead of dsx * ny?
|
// 1) Should it be dsx * nx instead of dsx * ny?
|
||||||
// 2) Should you also use globalGrid.n instead of the local n?
|
// 2) Should you also use globalGrid.n instead of the local n?
|
||||||
|
// MV: You are rigth. Made a quickfix. I did not see the error because multigpu is split
|
||||||
|
// in z direction not y direction.
|
||||||
// 3) Also final point: can we do this with vectors/quaternions instead?
|
// 3) Also final point: can we do this with vectors/quaternions instead?
|
||||||
// Tringonometric functions are much more expensive and inaccurate/
|
// Tringonometric functions are much more expensive and inaccurate/
|
||||||
|
// MV: Good idea. No an immediate priority.
|
||||||
// Fun related article:
|
// Fun related article:
|
||||||
// https://randomascii.wordpress.com/2014/10/09/intel-underestimates-error-bounds-by-1-3-quintillion/
|
// https://randomascii.wordpress.com/2014/10/09/intel-underestimates-error-bounds-by-1-3-quintillion/
|
||||||
xx.x = xx.x*(2.0*M_PI/(dsx*(DCONST_INT(AC_ny_max) - DCONST_INT(AC_ny_min))));
|
xx.x = xx.x*(2.0*M_PI/(dsx*globalGrid.n.x));
|
||||||
xx.y = xx.y*(2.0*M_PI/(dsy*(DCONST_INT(AC_ny_max) - DCONST_INT(AC_ny_min))));
|
xx.y = xx.y*(2.0*M_PI/(dsy*globalGrid.n.y));
|
||||||
xx.z = xx.z*(2.0*M_PI/(dsz*(DCONST_INT(AC_ny_max) - DCONST_INT(AC_ny_min))));
|
xx.z = xx.z*(2.0*M_PI/(dsz*globalGrid.n.z));
|
||||||
|
|
||||||
Scalar cos_phi = cos(phi);
|
Scalar cos_phi = cos(phi);
|
||||||
Scalar sin_phi = sin(phi);
|
Scalar sin_phi = sin(phi);
|
||||||
|
@@ -40,7 +40,7 @@ AcResult acSynchronizeMesh(void);
|
|||||||
initialization of *all memory needed on all GPUs in the node*. In other words,
|
initialization of *all memory needed on all GPUs in the node*. In other words,
|
||||||
setups everything GPU-side so that calling any other GPU interface function
|
setups everything GPU-side so that calling any other GPU interface function
|
||||||
afterwards does not result in illegal memory accesses. */
|
afterwards does not result in illegal memory accesses. */
|
||||||
AcResult acInit(const AcMeshInfo& mesh_info);
|
AcResult acInit(const AcMeshInfo mesh_info);
|
||||||
|
|
||||||
/** Frees all GPU allocations and resets all devices in the node. Should be
|
/** Frees all GPU allocations and resets all devices in the node. Should be
|
||||||
* called at exit. */
|
* called at exit. */
|
||||||
@@ -49,23 +49,23 @@ AcResult acQuit(void);
|
|||||||
/** Does all three substeps of the RK3 integration and computes the boundary
|
/** Does all three substeps of the RK3 integration and computes the boundary
|
||||||
conditions when necessary. The result is synchronized and the boundary conditions are applied
|
conditions when necessary. The result is synchronized and the boundary conditions are applied
|
||||||
after the final substep, after which the result can be fetched to CPU memory with acStore. */
|
after the final substep, after which the result can be fetched to CPU memory with acStore. */
|
||||||
AcResult acIntegrate(const AcReal& dt);
|
AcResult acIntegrate(const AcReal dt);
|
||||||
|
|
||||||
/** Performs a scalar reduction on all GPUs in the node and returns the result. Operates on the
|
/** Performs a scalar reduction on all GPUs in the node and returns the result. Operates on the
|
||||||
* whole computational domain, which must be up to date and synchronized before calling
|
* whole computational domain, which must be up to date and synchronized before calling
|
||||||
* acReduceScal.
|
* acReduceScal.
|
||||||
*/
|
*/
|
||||||
AcReal acReduceScal(const ReductionType& rtype, const VertexBufferHandle& a);
|
AcReal acReduceScal(const ReductionType rtype, const VertexBufferHandle a);
|
||||||
|
|
||||||
/** Performs a vector reduction on all GPUs in the node and returns the result. Operates on the
|
/** Performs a vector reduction on all GPUs in the node and returns the result. Operates on the
|
||||||
* whole computational domain, which must be up to date and synchronized before calling
|
* whole computational domain, which must be up to date and synchronized before calling
|
||||||
* acReduceVec.
|
* acReduceVec.
|
||||||
*/
|
*/
|
||||||
AcReal acReduceVec(const ReductionType& rtype, const VertexBufferHandle& a,
|
AcReal acReduceVec(const ReductionType rtype, const VertexBufferHandle a,
|
||||||
const VertexBufferHandle& b, const VertexBufferHandle& c);
|
const VertexBufferHandle b, const VertexBufferHandle c);
|
||||||
|
|
||||||
/** Distributes the host mesh among the GPUs in the node. Synchronous. */
|
/** Distributes the host mesh among the GPUs in the node. Synchronous. */
|
||||||
AcResult acLoad(const AcMesh& host_mesh);
|
AcResult acLoad(const AcMesh host_mesh);
|
||||||
|
|
||||||
/** Gathers the mesh stored across GPUs in the node and stores it back to host memory. Synchronous.
|
/** Gathers the mesh stored across GPUs in the node and stores it back to host memory. Synchronous.
|
||||||
*/
|
*/
|
||||||
@@ -82,32 +82,38 @@ AcResult acLoadDeviceConstantAsync(const AcRealParam param, const AcReal value,
|
|||||||
const StreamType stream);
|
const StreamType stream);
|
||||||
|
|
||||||
/** Splits a subset of the host_mesh and distributes it among the GPUs in the node. Asynchronous. */
|
/** Splits a subset of the host_mesh and distributes it among the GPUs in the node. Asynchronous. */
|
||||||
AcResult acLoadWithOffset(const AcMesh& host_mesh, const int3& start, const int num_vertices);
|
AcResult acLoadWithOffset(const AcMesh host_mesh, const int3 start, const int num_vertices);
|
||||||
AcResult acLoadWithOffsetAsync(const AcMesh& host_mesh, const int3& start, const int num_vertices,
|
AcResult acLoadWithOffsetAsync(const AcMesh host_mesh, const int3 start, const int num_vertices,
|
||||||
const StreamType stream);
|
const StreamType stream);
|
||||||
|
|
||||||
/** Gathers a subset of the data distributed among the GPUs in the node and stores the mesh back to
|
/** Gathers a subset of the data distributed among the GPUs in the node and stores the mesh back to
|
||||||
* CPU memory. Asynchronous.
|
* CPU memory. Asynchronous.
|
||||||
*/
|
*/
|
||||||
AcResult acStoreWithOffset(const int3& start, const int num_vertices, AcMesh* host_mesh);
|
AcResult acStoreWithOffset(const int3 start, const int num_vertices, AcMesh* host_mesh);
|
||||||
AcResult acStoreWithOffsetAsync(const int3& start, const int num_vertices, AcMesh* host_mesh,
|
AcResult acStoreWithOffsetAsync(const int3 start, const int num_vertices, AcMesh* host_mesh,
|
||||||
const StreamType stream);
|
const StreamType stream);
|
||||||
|
|
||||||
/** Performs a single RK3 step without computing boundary conditions. Asynchronous.*/
|
/** Performs a single RK3 step without computing boundary conditions. Asynchronous.*/
|
||||||
AcResult acIntegrateStep(const int& isubstep, const AcReal& dt);
|
AcResult acIntegrateStep(const int isubstep, const AcReal dt);
|
||||||
AcResult acIntegrateStepAsync(const int& isubstep, const AcReal& dt, const StreamType stream);
|
AcResult acIntegrateStepAsync(const int isubstep, const AcReal dt, const StreamType stream);
|
||||||
|
|
||||||
/** Performs a single RK3 step on a subset of the mesh without computing the boundary conditions.
|
/** Performs a single RK3 step on a subset of the mesh without computing the boundary conditions.
|
||||||
* Asynchronous.*/
|
* Asynchronous.*/
|
||||||
AcResult acIntegrateStepWithOffset(const int& isubstep, const AcReal& dt, const int3& start,
|
AcResult acIntegrateStepWithOffset(const int isubstep, const AcReal dt, const int3 start,
|
||||||
const int3& end);
|
const int3 end);
|
||||||
AcResult acIntegrateStepWithOffsetAsync(const int& isubstep, const AcReal& dt, const int3& start,
|
AcResult acIntegrateStepWithOffsetAsync(const int isubstep, const AcReal dt, const int3 start,
|
||||||
const int3& end, const StreamType stream);
|
const int3 end, const StreamType stream);
|
||||||
|
|
||||||
/** Performs the boundary condition step on the GPUs in the node. Asynchronous. */
|
/** Performs the boundary condition step on the GPUs in the node. Asynchronous. */
|
||||||
AcResult acBoundcondStep(void);
|
AcResult acBoundcondStep(void);
|
||||||
AcResult acBoundcondStepAsync(const StreamType stream);
|
AcResult acBoundcondStepAsync(const StreamType stream);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* =============================================================================
|
||||||
|
* Revised interface
|
||||||
|
* =============================================================================
|
||||||
|
*/
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
} // extern "C"
|
} // extern "C"
|
||||||
#endif
|
#endif
|
||||||
|
@@ -22,9 +22,27 @@
|
|||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include <float.h> // FLT_EPSILON, etc
|
#include <float.h> // FLT_EPSILON, etc
|
||||||
#include <stdlib.h> // size_t
|
#include <stdlib.h> // size_t
|
||||||
#include <vector_types.h> // CUDA vector types (float4, etc)
|
//#include <vector_types.h> // CUDA vector types (float4, etc)
|
||||||
|
|
||||||
|
#ifndef __CUDACC__
|
||||||
|
typedef struct {
|
||||||
|
int x, y, z;
|
||||||
|
} int3;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
float x, y;
|
||||||
|
} float2;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
float x, y, z;
|
||||||
|
} float3;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
double x, y, z;
|
||||||
|
} double3;
|
||||||
|
#endif // __CUDACC__
|
||||||
|
|
||||||
#include "stencil_defines.h"
|
#include "stencil_defines.h"
|
||||||
|
|
||||||
@@ -147,31 +165,31 @@ typedef struct {
|
|||||||
* =============================================================================
|
* =============================================================================
|
||||||
*/
|
*/
|
||||||
static inline size_t
|
static inline size_t
|
||||||
acVertexBufferSize(const AcMeshInfo& info)
|
acVertexBufferSize(const AcMeshInfo info)
|
||||||
{
|
{
|
||||||
return info.int_params[AC_mx] * info.int_params[AC_my] * info.int_params[AC_mz];
|
return info.int_params[AC_mx] * info.int_params[AC_my] * info.int_params[AC_mz];
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline size_t
|
static inline size_t
|
||||||
acVertexBufferSizeBytes(const AcMeshInfo& info)
|
acVertexBufferSizeBytes(const AcMeshInfo info)
|
||||||
{
|
{
|
||||||
return sizeof(AcReal) * acVertexBufferSize(info);
|
return sizeof(AcReal) * acVertexBufferSize(info);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline size_t
|
static inline size_t
|
||||||
acVertexBufferCompdomainSize(const AcMeshInfo& info)
|
acVertexBufferCompdomainSize(const AcMeshInfo info)
|
||||||
{
|
{
|
||||||
return info.int_params[AC_nx] * info.int_params[AC_ny] * info.int_params[AC_nz];
|
return info.int_params[AC_nx] * info.int_params[AC_ny] * info.int_params[AC_nz];
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline size_t
|
static inline size_t
|
||||||
acVertexBufferCompdomainSizeBytes(const AcMeshInfo& info)
|
acVertexBufferCompdomainSizeBytes(const AcMeshInfo info)
|
||||||
{
|
{
|
||||||
return sizeof(AcReal) * acVertexBufferCompdomainSize(info);
|
return sizeof(AcReal) * acVertexBufferCompdomainSize(info);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline size_t
|
static inline size_t
|
||||||
acVertexBufferIdx(const int i, const int j, const int k, const AcMeshInfo& info)
|
acVertexBufferIdx(const int i, const int j, const int k, const AcMeshInfo info)
|
||||||
{
|
{
|
||||||
return i + //
|
return i + //
|
||||||
j * info.int_params[AC_mx] + //
|
j * info.int_params[AC_mx] + //
|
||||||
@@ -180,25 +198,25 @@ acVertexBufferIdx(const int i, const int j, const int k, const AcMeshInfo& info)
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
static inline int
|
static inline int
|
||||||
acGetParam(const AcMeshInfo& info, const AcIntParam param)
|
acGetParam(const AcMeshInfo info, const AcIntParam param)
|
||||||
{
|
{
|
||||||
return info.int_params[param];
|
return info.int_params[param];
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int3
|
static inline int3
|
||||||
acGetParam(const AcMeshInfo& info, const AcInt3Param param)
|
acGetParam(const AcMeshInfo info, const AcInt3Param param)
|
||||||
{
|
{
|
||||||
return info.int3_params[param];
|
return info.int3_params[param];
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline AcReal
|
static inline AcReal
|
||||||
acGetParam(const AcMeshInfo& info, const AcRealParam param)
|
acGetParam(const AcMeshInfo info, const AcRealParam param)
|
||||||
{
|
{
|
||||||
return info.real_params[param];
|
return info.real_params[param];
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline AcReal3
|
static inline AcReal3
|
||||||
acGetParam(const AcMeshInfo& info, const AcReal3Param param)
|
acGetParam(const AcMeshInfo info, const AcReal3Param param)
|
||||||
{
|
{
|
||||||
return info.real3_params[param];
|
return info.real3_params[param];
|
||||||
}
|
}
|
||||||
|
@@ -75,7 +75,6 @@ cp ${AC_HOME}/config/astaroth.conf .
|
|||||||
|
|
||||||
CONF_DIR="-D ASTAROTH_CONF_PATH=${PWD}"
|
CONF_DIR="-D ASTAROTH_CONF_PATH=${PWD}"
|
||||||
|
|
||||||
|
|
||||||
#cmake -D CMAKE_C_COMPILER=icc -D CMAKE_CXX_COMPILER=icpc -DDOUBLE_PRECISION=OFF -DBUILD_DEBUG=OFF ${AC_HOME}
|
#cmake -D CMAKE_C_COMPILER=icc -D CMAKE_CXX_COMPILER=icpc -DDOUBLE_PRECISION=OFF -DBUILD_DEBUG=OFF ${AC_HOME}
|
||||||
|
|
||||||
echo "cmake ${TIARA_SETUP} ${CONF_DIR} -DDOUBLE_PRECISION=${DOUBLE} -DBUILD_DEBUG=${DEBUG_MODE} -DALTER_CONF=${ALTER_CONF} ${AC_HOME}"
|
echo "cmake ${TIARA_SETUP} ${CONF_DIR} -DDOUBLE_PRECISION=${DOUBLE} -DBUILD_DEBUG=${DEBUG_MODE} -DALTER_CONF=${ALTER_CONF} ${AC_HOME}"
|
||||||
|
@@ -2,58 +2,36 @@
|
|||||||
## CMakeLists.txt for Astaroth Core ##
|
## CMakeLists.txt for Astaroth Core ##
|
||||||
########################################
|
########################################
|
||||||
|
|
||||||
#----------------------Find CUDA-----------------------------------------------#
|
## Find packages
|
||||||
|
|
||||||
find_package(CUDA 9 REQUIRED)
|
find_package(CUDA 9 REQUIRED)
|
||||||
|
|
||||||
#----------------------CUDA settings-------------------------------------------#
|
## Architecture and optimization flags
|
||||||
|
|
||||||
set(CUDA_SEPARABLE_COMPILATION OFF)
|
|
||||||
set(CUDA_PROPAGATE_HOST_FLAGS ON)
|
|
||||||
|
|
||||||
#----------------------Setup CUDA compilation flags----------------------------#
|
|
||||||
|
|
||||||
# Generate code for the default architecture (Pascal)
|
|
||||||
set(CUDA_ARCH_FLAGS -gencode arch=compute_37,code=sm_37
|
set(CUDA_ARCH_FLAGS -gencode arch=compute_37,code=sm_37
|
||||||
-gencode arch=compute_50,code=sm_50
|
-gencode arch=compute_50,code=sm_50
|
||||||
-gencode arch=compute_60,code=sm_60
|
-gencode arch=compute_60,code=sm_60
|
||||||
-gencode arch=compute_61,code=sm_61
|
-gencode arch=compute_61,code=sm_61
|
||||||
-lineinfo
|
-lineinfo
|
||||||
-ftz=true
|
-ftz=true # Flush denormalized floats to zero
|
||||||
-std=c++11) #--maxrregcount=255 -ftz=true #ftz = flush denormalized floats to zero
|
-std=c++11)
|
||||||
# -Xptxas -dlcm=ca opt-in to cache all global loads to L1/texture cache
|
#--maxrregcount=255
|
||||||
# =cg to opt out
|
# -Xptxas -dlcm=ca opt-in to cache all global loads to L1/texture cache
|
||||||
|
# =cg to opt out
|
||||||
# Additional CUDA optimization flags
|
|
||||||
if (CMAKE_BUILD_TYPE MATCHES RELEASE)
|
|
||||||
# Doesn't set any additional flags, see CUDA_NVCC_FLAGS_DEBUG below on how
|
|
||||||
# to add more
|
|
||||||
set(CUDA_NVCC_FLAGS_RELEASE ${CUDA_NVCC_FLAGS_RELEASE})
|
|
||||||
endif()
|
|
||||||
|
|
||||||
# Additional CUDA debug flags
|
|
||||||
if (CMAKE_BUILD_TYPE MATCHES DEBUG)
|
|
||||||
# The debug flags must be set inside this if clause, since either CMake 3.5
|
|
||||||
# or nvcc 7.5 is bugged:
|
|
||||||
# CMake converts these into empty strings when doing RELEASE build, but nvcc
|
|
||||||
# 7.5 fails to parse empty flags.
|
|
||||||
set(CUDA_NVCC_FLAGS_DEBUG ${CUDA_NVCC_FLAGS_DEBUG};
|
|
||||||
--device-debug;
|
|
||||||
--generate-line-info;
|
|
||||||
--ptxas-options=-v)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};${CUDA_ARCH_FLAGS}")
|
|
||||||
|
|
||||||
|
|
||||||
message("CUDA_NVCC_FLAGS: " ${CUDA_NVCC_FLAGS})
|
set(CUDA_WARNING_FLAGS --compiler-options -Wall,-Wextra,-Werror,-Wdouble-promotion,-Wfloat-conversion) # -Wshadow
|
||||||
|
|
||||||
|
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ${CUDA_ARCH_FLAGS} ${CUDA_WARNING_FLAGS})
|
||||||
|
set(CUDA_NVCC_FLAGS_RELEASE)
|
||||||
|
set(CUDA_NVCC_FLAGS_DEBUG --device-debug --generate-line-info --ptxas-options=-v)
|
||||||
|
|
||||||
#------------------Compile and create a static library-------------------------#
|
## Definitions
|
||||||
file(GLOB CUDA_SOURCES "*.cu" "kernels/*.cu")
|
if (MULTIGPU_ENABLED)
|
||||||
|
add_definitions(-DAC_MULTIGPU_ENABLED=1)
|
||||||
|
else ()
|
||||||
|
add_definitions(-DAC_MULTIGPU_ENABLED=0)
|
||||||
|
endif ()
|
||||||
|
|
||||||
# Use -fPIC if -fpic not supported. Some quick non-scientific tests:
|
## Create and link the library
|
||||||
# Without fpic: 4.94 user, 4.04 system, 0:09.88 elapsed
|
include_directories(.)
|
||||||
# With fpic: 4.96 user, 4.02 system, 0:09.90 elapsed
|
cuda_add_library(astaroth_core STATIC astaroth.cu device.cu)
|
||||||
# With fPIC: 4.94 user, 4.05 system, 0:10.23 elapsed
|
target_link_libraries(astaroth_core m)
|
||||||
CUDA_ADD_LIBRARY(astaroth_core STATIC ${CUDA_SOURCES} OPTIONS --compiler-options "-fpic")
|
|
||||||
|
@@ -127,8 +127,8 @@
|
|||||||
#include "errchk.h"
|
#include "errchk.h"
|
||||||
|
|
||||||
#include "device.cuh"
|
#include "device.cuh"
|
||||||
#include "math_utils.h" // sum for reductions
|
#include "math_utils.h" // sum for reductions
|
||||||
#include "standalone/config_loader.h" // update_config
|
// #include "standalone/config_loader.h" // update_config
|
||||||
|
|
||||||
#define AC_GEN_STR(X) #X
|
#define AC_GEN_STR(X) #X
|
||||||
const char* intparam_names[] = {AC_FOR_BUILTIN_INT_PARAM_TYPES(AC_GEN_STR) //
|
const char* intparam_names[] = {AC_FOR_BUILTIN_INT_PARAM_TYPES(AC_GEN_STR) //
|
||||||
@@ -156,7 +156,7 @@ gridIdx(const Grid grid, const int3 idx)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int3
|
static int3
|
||||||
gridIdx3d(const Grid& grid, const int idx)
|
gridIdx3d(const Grid grid, const int idx)
|
||||||
{
|
{
|
||||||
return (int3){idx % grid.m.x, (idx % (grid.m.x * grid.m.y)) / grid.m.x,
|
return (int3){idx % grid.m.x, (idx % (grid.m.x * grid.m.y)) / grid.m.x,
|
||||||
idx / (grid.m.x * grid.m.y)};
|
idx / (grid.m.x * grid.m.y)};
|
||||||
@@ -168,8 +168,49 @@ printInt3(const int3 vec)
|
|||||||
printf("(%d, %d, %d)", vec.x, vec.y, vec.z);
|
printf("(%d, %d, %d)", vec.x, vec.y, vec.z);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
print(const AcMeshInfo config)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < NUM_INT_PARAMS; ++i)
|
||||||
|
printf("[%s]: %d\n", intparam_names[i], config.int_params[i]);
|
||||||
|
for (int i = 0; i < NUM_REAL_PARAMS; ++i)
|
||||||
|
printf("[%s]: %g\n", realparam_names[i], double(config.real_params[i]));
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
update_builtin_params(AcMeshInfo* config)
|
||||||
|
{
|
||||||
|
config->int_params[AC_mx] = config->int_params[AC_nx] + STENCIL_ORDER;
|
||||||
|
///////////// PAD TEST
|
||||||
|
// config->int_params[AC_mx] = config->int_params[AC_nx] + STENCIL_ORDER + PAD_SIZE;
|
||||||
|
///////////// PAD TEST
|
||||||
|
config->int_params[AC_my] = config->int_params[AC_ny] + STENCIL_ORDER;
|
||||||
|
config->int_params[AC_mz] = config->int_params[AC_nz] + STENCIL_ORDER;
|
||||||
|
|
||||||
|
// Bounds for the computational domain, i.e. nx_min <= i < nx_max
|
||||||
|
config->int_params[AC_nx_min] = NGHOST;
|
||||||
|
config->int_params[AC_nx_max] = config->int_params[AC_nx_min] + config->int_params[AC_nx];
|
||||||
|
config->int_params[AC_ny_min] = NGHOST;
|
||||||
|
config->int_params[AC_ny_max] = config->int_params[AC_ny] + NGHOST;
|
||||||
|
config->int_params[AC_nz_min] = NGHOST;
|
||||||
|
config->int_params[AC_nz_max] = config->int_params[AC_nz] + NGHOST;
|
||||||
|
|
||||||
|
/* Additional helper params */
|
||||||
|
// Int helpers
|
||||||
|
config->int_params[AC_mxy] = config->int_params[AC_mx] * config->int_params[AC_my];
|
||||||
|
config->int_params[AC_nxy] = config->int_params[AC_nx] * config->int_params[AC_ny];
|
||||||
|
config->int_params[AC_nxyz] = config->int_params[AC_nxy] * config->int_params[AC_nz];
|
||||||
|
|
||||||
|
#if VERBOSE_PRINTING // Defined in astaroth.h
|
||||||
|
printf("###############################################################\n");
|
||||||
|
printf("Config dimensions recalculated:\n");
|
||||||
|
print(*config);
|
||||||
|
printf("###############################################################\n");
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
static Grid
|
static Grid
|
||||||
createGrid(const AcMeshInfo& config)
|
createGrid(const AcMeshInfo config)
|
||||||
{
|
{
|
||||||
Grid grid;
|
Grid grid;
|
||||||
|
|
||||||
@@ -246,7 +287,7 @@ acSynchronizeMesh(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
AcResult
|
AcResult
|
||||||
acInit(const AcMeshInfo& config)
|
acInit(const AcMeshInfo config)
|
||||||
{
|
{
|
||||||
// Get num_devices
|
// Get num_devices
|
||||||
ERRCHK_CUDA_ALWAYS(cudaGetDeviceCount(&num_devices));
|
ERRCHK_CUDA_ALWAYS(cudaGetDeviceCount(&num_devices));
|
||||||
@@ -274,7 +315,7 @@ acInit(const AcMeshInfo& config)
|
|||||||
// Subgrids
|
// Subgrids
|
||||||
AcMeshInfo subgrid_config = config;
|
AcMeshInfo subgrid_config = config;
|
||||||
subgrid_config.int_params[AC_nz] /= num_devices;
|
subgrid_config.int_params[AC_nz] /= num_devices;
|
||||||
update_config(&subgrid_config);
|
update_builtin_params(&subgrid_config);
|
||||||
subgrid = createGrid(subgrid_config);
|
subgrid = createGrid(subgrid_config);
|
||||||
|
|
||||||
// Periodic boundary conditions become weird if the system can "fold unto itself".
|
// Periodic boundary conditions become weird if the system can "fold unto itself".
|
||||||
@@ -337,8 +378,8 @@ acQuit(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
AcResult
|
AcResult
|
||||||
acIntegrateStepWithOffsetAsync(const int& isubstep, const AcReal& dt, const int3& start,
|
acIntegrateStepWithOffsetAsync(const int isubstep, const AcReal dt, const int3 start,
|
||||||
const int3& end, const StreamType stream)
|
const int3 end, const StreamType stream)
|
||||||
{
|
{
|
||||||
// See the beginning of the file for an explanation of the index mapping
|
// See the beginning of the file for an explanation of the index mapping
|
||||||
// #pragma omp parallel for
|
// #pragma omp parallel for
|
||||||
@@ -360,13 +401,13 @@ acIntegrateStepWithOffsetAsync(const int& isubstep, const AcReal& dt, const int3
|
|||||||
}
|
}
|
||||||
|
|
||||||
AcResult
|
AcResult
|
||||||
acIntegrateStepWithOffset(const int& isubstep, const AcReal& dt, const int3& start, const int3& end)
|
acIntegrateStepWithOffset(const int isubstep, const AcReal dt, const int3 start, const int3 end)
|
||||||
{
|
{
|
||||||
return acIntegrateStepWithOffsetAsync(isubstep, dt, start, end, STREAM_DEFAULT);
|
return acIntegrateStepWithOffsetAsync(isubstep, dt, start, end, STREAM_DEFAULT);
|
||||||
}
|
}
|
||||||
|
|
||||||
AcResult
|
AcResult
|
||||||
acIntegrateStepAsync(const int& isubstep, const AcReal& dt, const StreamType stream)
|
acIntegrateStepAsync(const int isubstep, const AcReal dt, const StreamType stream)
|
||||||
{
|
{
|
||||||
const int3 start = (int3){NGHOST, NGHOST, NGHOST};
|
const int3 start = (int3){NGHOST, NGHOST, NGHOST};
|
||||||
const int3 end = start + grid.n;
|
const int3 end = start + grid.n;
|
||||||
@@ -374,7 +415,7 @@ acIntegrateStepAsync(const int& isubstep, const AcReal& dt, const StreamType str
|
|||||||
}
|
}
|
||||||
|
|
||||||
AcResult
|
AcResult
|
||||||
acIntegrateStep(const int& isubstep, const AcReal& dt)
|
acIntegrateStep(const int isubstep, const AcReal dt)
|
||||||
{
|
{
|
||||||
return acIntegrateStepAsync(isubstep, dt, STREAM_DEFAULT);
|
return acIntegrateStepAsync(isubstep, dt, STREAM_DEFAULT);
|
||||||
}
|
}
|
||||||
@@ -452,7 +493,7 @@ swap_buffers(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
AcResult
|
AcResult
|
||||||
acIntegrate(const AcReal& dt)
|
acIntegrate(const AcReal dt)
|
||||||
{
|
{
|
||||||
acSynchronizeStream(STREAM_ALL);
|
acSynchronizeStream(STREAM_ALL);
|
||||||
for (int isubstep = 0; isubstep < 3; ++isubstep) {
|
for (int isubstep = 0; isubstep < 3; ++isubstep) {
|
||||||
@@ -464,7 +505,7 @@ acIntegrate(const AcReal& dt)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static AcReal
|
static AcReal
|
||||||
simple_final_reduce_scal(const ReductionType& rtype, const AcReal* results, const int& n)
|
simple_final_reduce_scal(const ReductionType rtype, const AcReal* results, const int n)
|
||||||
{
|
{
|
||||||
AcReal res = results[0];
|
AcReal res = results[0];
|
||||||
for (int i = 1; i < n; ++i) {
|
for (int i = 1; i < n; ++i) {
|
||||||
@@ -490,7 +531,7 @@ simple_final_reduce_scal(const ReductionType& rtype, const AcReal* results, cons
|
|||||||
}
|
}
|
||||||
|
|
||||||
AcReal
|
AcReal
|
||||||
acReduceScal(const ReductionType& rtype, const VertexBufferHandle& vtxbuffer_handle)
|
acReduceScal(const ReductionType rtype, const VertexBufferHandle vtxbuffer_handle)
|
||||||
{
|
{
|
||||||
acSynchronizeStream(STREAM_ALL);
|
acSynchronizeStream(STREAM_ALL);
|
||||||
|
|
||||||
@@ -504,8 +545,8 @@ acReduceScal(const ReductionType& rtype, const VertexBufferHandle& vtxbuffer_han
|
|||||||
}
|
}
|
||||||
|
|
||||||
AcReal
|
AcReal
|
||||||
acReduceVec(const ReductionType& rtype, const VertexBufferHandle& a, const VertexBufferHandle& b,
|
acReduceVec(const ReductionType rtype, const VertexBufferHandle a, const VertexBufferHandle b,
|
||||||
const VertexBufferHandle& c)
|
const VertexBufferHandle c)
|
||||||
{
|
{
|
||||||
acSynchronizeStream(STREAM_ALL);
|
acSynchronizeStream(STREAM_ALL);
|
||||||
|
|
||||||
@@ -519,7 +560,7 @@ acReduceVec(const ReductionType& rtype, const VertexBufferHandle& a, const Verte
|
|||||||
}
|
}
|
||||||
|
|
||||||
AcResult
|
AcResult
|
||||||
acLoadWithOffsetAsync(const AcMesh& host_mesh, const int3& src, const int num_vertices,
|
acLoadWithOffsetAsync(const AcMesh host_mesh, const int3 src, const int num_vertices,
|
||||||
const StreamType stream)
|
const StreamType stream)
|
||||||
{
|
{
|
||||||
// See the beginning of the file for an explanation of the index mapping
|
// See the beginning of the file for an explanation of the index mapping
|
||||||
@@ -557,13 +598,13 @@ acLoadWithOffsetAsync(const AcMesh& host_mesh, const int3& src, const int num_ve
|
|||||||
}
|
}
|
||||||
|
|
||||||
AcResult
|
AcResult
|
||||||
acLoadWithOffset(const AcMesh& host_mesh, const int3& src, const int num_vertices)
|
acLoadWithOffset(const AcMesh host_mesh, const int3 src, const int num_vertices)
|
||||||
{
|
{
|
||||||
return acLoadWithOffsetAsync(host_mesh, src, num_vertices, STREAM_DEFAULT);
|
return acLoadWithOffsetAsync(host_mesh, src, num_vertices, STREAM_DEFAULT);
|
||||||
}
|
}
|
||||||
|
|
||||||
AcResult
|
AcResult
|
||||||
acLoad(const AcMesh& host_mesh)
|
acLoad(const AcMesh host_mesh)
|
||||||
{
|
{
|
||||||
acLoadWithOffset(host_mesh, (int3){0, 0, 0}, acVertexBufferSize(host_mesh.info));
|
acLoadWithOffset(host_mesh, (int3){0, 0, 0}, acVertexBufferSize(host_mesh.info));
|
||||||
acSynchronizeStream(STREAM_ALL);
|
acSynchronizeStream(STREAM_ALL);
|
||||||
@@ -571,7 +612,7 @@ acLoad(const AcMesh& host_mesh)
|
|||||||
}
|
}
|
||||||
|
|
||||||
AcResult
|
AcResult
|
||||||
acStoreWithOffsetAsync(const int3& src, const int num_vertices, AcMesh* host_mesh,
|
acStoreWithOffsetAsync(const int3 src, const int num_vertices, AcMesh* host_mesh,
|
||||||
const StreamType stream)
|
const StreamType stream)
|
||||||
{
|
{
|
||||||
// See the beginning of the file for an explanation of the index mapping
|
// See the beginning of the file for an explanation of the index mapping
|
||||||
@@ -596,7 +637,7 @@ acStoreWithOffsetAsync(const int3& src, const int num_vertices, AcMesh* host_mes
|
|||||||
}
|
}
|
||||||
|
|
||||||
AcResult
|
AcResult
|
||||||
acStoreWithOffset(const int3& src, const int num_vertices, AcMesh* host_mesh)
|
acStoreWithOffset(const int3 src, const int num_vertices, AcMesh* host_mesh)
|
||||||
{
|
{
|
||||||
return acStoreWithOffsetAsync(src, num_vertices, host_mesh, STREAM_DEFAULT);
|
return acStoreWithOffsetAsync(src, num_vertices, host_mesh, STREAM_DEFAULT);
|
||||||
}
|
}
|
||||||
@@ -624,3 +665,9 @@ acLoadDeviceConstant(const AcRealParam param, const AcReal value)
|
|||||||
{
|
{
|
||||||
return acLoadDeviceConstantAsync(param, value, STREAM_DEFAULT);
|
return acLoadDeviceConstantAsync(param, value, STREAM_DEFAULT);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* =============================================================================
|
||||||
|
* Revised interface
|
||||||
|
* =============================================================================
|
||||||
|
*/
|
||||||
|
@@ -516,3 +516,9 @@ autoOptimize(const Device device)
|
|||||||
#if PACKED_DATA_TRANSFERS
|
#if PACKED_DATA_TRANSFERS
|
||||||
// Functions for calling packed data transfers
|
// Functions for calling packed data transfers
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* =============================================================================
|
||||||
|
* Revised interface
|
||||||
|
* =============================================================================
|
||||||
|
*/
|
||||||
|
@@ -99,3 +99,9 @@ AcResult autoOptimize(const Device device);
|
|||||||
#if PACKED_DATA_TRANSFERS
|
#if PACKED_DATA_TRANSFERS
|
||||||
// Declarations used for packed data transfers
|
// Declarations used for packed data transfers
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* =============================================================================
|
||||||
|
* Revised interface
|
||||||
|
* =============================================================================
|
||||||
|
*/
|
||||||
|
9
src/ctest/CMakeLists.txt
Normal file
9
src/ctest/CMakeLists.txt
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
##############################################
|
||||||
|
## CMakeLists.txt for the C API test ##
|
||||||
|
##############################################
|
||||||
|
|
||||||
|
set(CMAKE_C_STANDARD 11)
|
||||||
|
set(CMAKE_C_STANDARD_REQUIRED ON)
|
||||||
|
|
||||||
|
add_executable(ctest main.c)
|
||||||
|
target_link_libraries(ctest PRIVATE astaroth_core)
|
1
src/ctest/README.txt
Normal file
1
src/ctest/README.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
This directory is used to test whether the Astaroth API is compatible with C.
|
36
src/ctest/main.c
Normal file
36
src/ctest/main.c
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
/*
|
||||||
|
Copyright (C) 2014-2019, Johannes Pekkilae, Miikka Vaeisalae.
|
||||||
|
|
||||||
|
This file is part of Astaroth.
|
||||||
|
|
||||||
|
Astaroth is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation, either version 3 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
Astaroth is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#include "astaroth.h"
|
||||||
|
|
||||||
|
int
|
||||||
|
main(void)
|
||||||
|
{
|
||||||
|
AcMeshInfo info = {
|
||||||
|
.int_params[AC_nx] = 128,
|
||||||
|
.int_params[AC_ny] = 64,
|
||||||
|
.int_params[AC_nz] = 32,
|
||||||
|
};
|
||||||
|
acInit(info);
|
||||||
|
acIntegrate(0.1f);
|
||||||
|
acQuit();
|
||||||
|
return EXIT_SUCCESS;
|
||||||
|
}
|
12
src/mpitest/CMakeLists.txt
Normal file
12
src/mpitest/CMakeLists.txt
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
##############################################
|
||||||
|
## CMakeLists.txt for the MPI test ##
|
||||||
|
##############################################
|
||||||
|
|
||||||
|
set(CMAKE_C_STANDARD 11)
|
||||||
|
set(CMAKE_C_STANDARD_REQUIRED ON)
|
||||||
|
|
||||||
|
find_package(MPI REQUIRED)
|
||||||
|
|
||||||
|
add_executable(mpitest main.c)
|
||||||
|
target_include_directories(mpitest PRIVATE ${MPI_C_INCLUDE_PATH})
|
||||||
|
target_link_libraries(mpitest PRIVATE ${MPI_C_LIBRARIES} astaroth_core)
|
1
src/mpitest/README.txt
Normal file
1
src/mpitest/README.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
This directory is used to test MPI with Astaroth.
|
51
src/mpitest/main.c
Normal file
51
src/mpitest/main.c
Normal file
@@ -0,0 +1,51 @@
|
|||||||
|
/*
|
||||||
|
Copyright (C) 2014-2019, Johannes Pekkilae, Miikka Vaeisalae.
|
||||||
|
|
||||||
|
This file is part of Astaroth.
|
||||||
|
|
||||||
|
Astaroth is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation, either version 3 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
Astaroth is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#include "astaroth.h"
|
||||||
|
|
||||||
|
#include <mpi.h>
|
||||||
|
|
||||||
|
int
|
||||||
|
main(void)
|
||||||
|
{
|
||||||
|
MPI_Init(NULL, NULL);
|
||||||
|
|
||||||
|
int num_processes, process_id;
|
||||||
|
MPI_Comm_size(MPI_COMM_WORLD, &num_processes);
|
||||||
|
MPI_Comm_rank(MPI_COMM_WORLD, &process_id);
|
||||||
|
|
||||||
|
char processor_name[MPI_MAX_PROCESSOR_NAME];
|
||||||
|
int name_len;
|
||||||
|
MPI_Get_processor_name(processor_name, &name_len);
|
||||||
|
printf("Processor %s. Process %d of %d.\n", processor_name, process_id, num_processes);
|
||||||
|
|
||||||
|
AcMeshInfo info = {
|
||||||
|
.int_params[AC_nx] = 128,
|
||||||
|
.int_params[AC_ny] = 64,
|
||||||
|
.int_params[AC_nz] = 32,
|
||||||
|
};
|
||||||
|
acInit(info);
|
||||||
|
acIntegrate(0.1f);
|
||||||
|
acQuit();
|
||||||
|
|
||||||
|
MPI_Finalize();
|
||||||
|
return EXIT_SUCCESS;
|
||||||
|
}
|
@@ -1,10 +1,39 @@
|
|||||||
################################
|
##############################################
|
||||||
## CMakeLists.txt for utils ##
|
## CMakeLists.txt for Astaroth Standalone ##
|
||||||
################################
|
##############################################
|
||||||
|
|
||||||
|
set(CMAKE_CXX_STANDARD 11)
|
||||||
|
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||||
|
|
||||||
|
## Files
|
||||||
file (GLOB SOURCES "*.cc" "model/*.cc")
|
file (GLOB SOURCES "*.cc" "model/*.cc")
|
||||||
|
|
||||||
add_library(astaroth_standalone STATIC ${SOURCES})
|
## Find packages
|
||||||
target_include_directories(astaroth_standalone PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
|
find_package(OpenMP REQUIRED)
|
||||||
#target_compile_definitions(astaroth_standalone PRIVATE CONFIG_PATH=\"${CMAKE_SOURCE_DIR}/config/\")
|
if (BUILD_RT_VISUALIZATION)
|
||||||
target_compile_definitions(astaroth_standalone PRIVATE CONFIG_PATH=\"${ASTAROTH_CONF_PATH}\")
|
add_definitions(-DAC_BUILD_RT_VISUALIZATION=1)
|
||||||
|
# SDL 2
|
||||||
|
set(SDL2_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/3rdparty/SDL2/include/)
|
||||||
|
set(SDL2_LIBRARY_DIR ${CMAKE_SOURCE_DIR}/3rdparty/SDL2/build/)
|
||||||
|
set(SDL2_LIBRARY "SDL2")
|
||||||
|
include_directories(${SDL2_INCLUDE_DIR})
|
||||||
|
link_directories(${SDL2_LIBRARY_DIR})
|
||||||
|
endif ()
|
||||||
|
|
||||||
|
## Compilation flags
|
||||||
|
add_compile_options(-pipe ${OpenMP_CXX_FLAGS})
|
||||||
|
add_compile_options(-Wall -Wextra -Werror -Wdouble-promotion -Wfloat-conversion)# -Wshadow)
|
||||||
|
|
||||||
|
## Compile and link
|
||||||
|
add_library(astaroth_standalone ${SOURCES})
|
||||||
|
|
||||||
|
add_executable(ac_run main.cc)
|
||||||
|
target_link_libraries(ac_run PRIVATE astaroth_standalone astaroth_core "${OpenMP_CXX_FLAGS}" ${SDL2_LIBRARY})
|
||||||
|
|
||||||
|
# Define the config directory
|
||||||
|
if (ALTER_CONF)
|
||||||
|
# ASTAROTH_CONF_PATH supplied by ac_mkbuilddir.sh
|
||||||
|
target_compile_definitions(astaroth_standalone PRIVATE CONFIG_PATH="${ASTAROTH_CONF_PATH}/")
|
||||||
|
else()
|
||||||
|
target_compile_definitions(astaroth_standalone PRIVATE CONFIG_PATH="${CMAKE_SOURCE_DIR}/config/")
|
||||||
|
endif()
|
||||||
|
@@ -29,7 +29,7 @@
|
|||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
|
||||||
#include "config_loader.h"
|
#include "config_loader.h"
|
||||||
#include "core/math_utils.h"
|
#include "src/core/math_utils.h"
|
||||||
#include "model/host_forcing.h"
|
#include "model/host_forcing.h"
|
||||||
#include "model/host_memory.h"
|
#include "model/host_memory.h"
|
||||||
#include "model/host_timestep.h"
|
#include "model/host_timestep.h"
|
||||||
@@ -37,7 +37,7 @@
|
|||||||
#include "model/model_reduce.h"
|
#include "model/model_reduce.h"
|
||||||
#include "model/model_rk3.h"
|
#include "model/model_rk3.h"
|
||||||
|
|
||||||
#include "core/errchk.h"
|
#include "src/core/errchk.h"
|
||||||
|
|
||||||
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
|
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
|
||||||
|
|
||||||
|
@@ -211,7 +211,7 @@ run_benchmark(void)
|
|||||||
#if AUTO_OPTIMIZE
|
#if AUTO_OPTIMIZE
|
||||||
const char* benchmark_path = "benchmark.out";
|
const char* benchmark_path = "benchmark.out";
|
||||||
|
|
||||||
#include "core/kernels/rk3_threadblock.conf"
|
#include "src/core/kernels/rk3_threadblock.conf"
|
||||||
static int
|
static int
|
||||||
write_result_to_file(const float& ms_per_step)
|
write_result_to_file(const float& ms_per_step)
|
||||||
{
|
{
|
||||||
|
@@ -31,8 +31,8 @@
|
|||||||
#include <stdio.h> // print
|
#include <stdio.h> // print
|
||||||
#include <string.h> // memset
|
#include <string.h> // memset
|
||||||
|
|
||||||
#include "core/errchk.h"
|
#include "src/core/errchk.h"
|
||||||
#include "core/math_utils.h"
|
#include "src/core/math_utils.h"
|
||||||
|
|
||||||
static inline void
|
static inline void
|
||||||
print(const AcMeshInfo& config)
|
print(const AcMeshInfo& config)
|
||||||
|
@@ -28,8 +28,8 @@
|
|||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
#include "core/errchk.h"
|
|
||||||
#include "run.h"
|
#include "run.h"
|
||||||
|
#include "src/core/errchk.h"
|
||||||
|
|
||||||
// Write all errors from stderr to an <errorlog_name> in the current working
|
// Write all errors from stderr to an <errorlog_name> in the current working
|
||||||
// directory
|
// directory
|
||||||
|
@@ -26,7 +26,7 @@
|
|||||||
*/
|
*/
|
||||||
#include "host_forcing.h"
|
#include "host_forcing.h"
|
||||||
|
|
||||||
#include "core/math_utils.h"
|
#include "src/core/math_utils.h"
|
||||||
|
|
||||||
// The is a wrapper for genering random numbers with a chosen system.
|
// The is a wrapper for genering random numbers with a chosen system.
|
||||||
AcReal
|
AcReal
|
||||||
|
@@ -28,7 +28,7 @@
|
|||||||
|
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
|
|
||||||
#include "core/errchk.h"
|
#include "src/core/errchk.h"
|
||||||
|
|
||||||
#define AC_GEN_STR(X) #X
|
#define AC_GEN_STR(X) #X
|
||||||
const char* init_type_names[] = {AC_FOR_INIT_TYPES(AC_GEN_STR)};
|
const char* init_type_names[] = {AC_FOR_INIT_TYPES(AC_GEN_STR)};
|
||||||
|
@@ -26,32 +26,35 @@
|
|||||||
*/
|
*/
|
||||||
#include "host_timestep.h"
|
#include "host_timestep.h"
|
||||||
|
|
||||||
#include "core/math_utils.h"
|
#include "src/core/math_utils.h"
|
||||||
|
|
||||||
static AcReal timescale = AcReal(1.0);
|
static AcReal timescale = AcReal(1.0);
|
||||||
|
|
||||||
AcReal
|
AcReal
|
||||||
host_timestep(const AcReal& umax, const AcMeshInfo& mesh_info)
|
host_timestep(const AcReal& umax, const AcMeshInfo& mesh_info)
|
||||||
{
|
{
|
||||||
const long double cdt = mesh_info.real_params[AC_cdt];
|
const long double cdt = mesh_info.real_params[AC_cdt];
|
||||||
const long double cdtv = mesh_info.real_params[AC_cdtv];
|
const long double cdtv = mesh_info.real_params[AC_cdtv];
|
||||||
// const long double cdts = mesh_info.real_params[AC_cdts];
|
// const long double cdts = mesh_info.real_params[AC_cdts];
|
||||||
const long double cs2_sound = mesh_info.real_params[AC_cs2_sound];
|
const long double cs2_sound = mesh_info.real_params[AC_cs2_sound];
|
||||||
const long double nu_visc = mesh_info.real_params[AC_nu_visc];
|
const long double nu_visc = mesh_info.real_params[AC_nu_visc];
|
||||||
const long double eta = mesh_info.real_params[AC_eta];
|
const long double eta = mesh_info.real_params[AC_eta];
|
||||||
const long double chi = 0; // mesh_info.real_params[AC_chi]; // TODO not calculated
|
const long double chi = 0; // mesh_info.real_params[AC_chi]; // TODO not calculated
|
||||||
const long double gamma = mesh_info.real_params[AC_gamma];
|
const long double gamma = mesh_info.real_params[AC_gamma];
|
||||||
const long double dsmin = mesh_info.real_params[AC_dsmin];
|
const long double dsmin = mesh_info.real_params[AC_dsmin];
|
||||||
|
|
||||||
// Old ones from legacy Astaroth
|
// Old ones from legacy Astaroth
|
||||||
//const long double uu_dt = cdt * (dsmin / (umax + cs_sound));
|
// const long double uu_dt = cdt * (dsmin / (umax + cs_sound));
|
||||||
//const long double visc_dt = cdtv * dsmin * dsmin / nu_visc;
|
// const long double visc_dt = cdtv * dsmin * dsmin / nu_visc;
|
||||||
|
|
||||||
// New, closer to the actual Courant timestep
|
// New, closer to the actual Courant timestep
|
||||||
// See Pencil Code user manual p. 38 (timestep section)
|
// See Pencil Code user manual p. 38 (timestep section)
|
||||||
const long double uu_dt = cdt * dsmin / (fabsl(umax) + sqrtl(cs2_sound + 0.0l));
|
const long double uu_dt = cdt * dsmin / (fabsl(umax) + sqrtl(cs2_sound + 0.0l));
|
||||||
const long double visc_dt = cdtv * dsmin * dsmin / max(max(nu_visc, eta), max(gamma, chi));// + 1; // TODO NOTE: comment the +1 out to get scientifically accurate results
|
const long double visc_dt = cdtv * dsmin * dsmin /
|
||||||
//MV: White the +1? It was messing up my computations!
|
max(max(nu_visc, eta),
|
||||||
|
max(gamma, chi)); // + 1; // TODO NOTE: comment the +1 out to
|
||||||
|
// get scientifically accurate results
|
||||||
|
// MV: White the +1? It was messing up my computations!
|
||||||
|
|
||||||
const long double dt = min(uu_dt, visc_dt);
|
const long double dt = min(uu_dt, visc_dt);
|
||||||
return AcReal(timescale) * AcReal(dt);
|
return AcReal(timescale) * AcReal(dt);
|
||||||
|
@@ -26,73 +26,68 @@
|
|||||||
*/
|
*/
|
||||||
#include "model_boundconds.h"
|
#include "model_boundconds.h"
|
||||||
|
|
||||||
#include "core/errchk.h"
|
#include "src/core/errchk.h"
|
||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
boundconds(const AcMeshInfo& mesh_info, ModelMesh* mesh)
|
boundconds(const AcMeshInfo& mesh_info, ModelMesh* mesh)
|
||||||
{
|
{
|
||||||
#pragma omp parallel for
|
#pragma omp parallel for
|
||||||
for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w) {
|
for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w) {
|
||||||
const int3 start = (int3){0, 0, 0};
|
const int3 start = (int3){0, 0, 0};
|
||||||
const int3 end = (int3){
|
const int3 end = (int3){mesh_info.int_params[AC_mx], mesh_info.int_params[AC_my],
|
||||||
mesh_info.int_params[AC_mx],
|
mesh_info.int_params[AC_mz]};
|
||||||
mesh_info.int_params[AC_my],
|
|
||||||
mesh_info.int_params[AC_mz]
|
|
||||||
};
|
|
||||||
|
|
||||||
const int nx = mesh_info.int_params[AC_nx];
|
const int nx = mesh_info.int_params[AC_nx];
|
||||||
const int ny = mesh_info.int_params[AC_ny];
|
const int ny = mesh_info.int_params[AC_ny];
|
||||||
const int nz = mesh_info.int_params[AC_nz];
|
const int nz = mesh_info.int_params[AC_nz];
|
||||||
|
|
||||||
const int nx_min = mesh_info.int_params[AC_nx_min];
|
const int nx_min = mesh_info.int_params[AC_nx_min];
|
||||||
const int ny_min = mesh_info.int_params[AC_ny_min];
|
const int ny_min = mesh_info.int_params[AC_ny_min];
|
||||||
const int nz_min = mesh_info.int_params[AC_nz_min];
|
const int nz_min = mesh_info.int_params[AC_nz_min];
|
||||||
|
|
||||||
// The old kxt was inclusive, but our mx_max is exclusive
|
// The old kxt was inclusive, but our mx_max is exclusive
|
||||||
const int nx_max = mesh_info.int_params[AC_nx_max];
|
const int nx_max = mesh_info.int_params[AC_nx_max];
|
||||||
const int ny_max = mesh_info.int_params[AC_ny_max];
|
const int ny_max = mesh_info.int_params[AC_ny_max];
|
||||||
const int nz_max = mesh_info.int_params[AC_nz_max];
|
const int nz_max = mesh_info.int_params[AC_nz_max];
|
||||||
|
|
||||||
for (int k_dst = start.z; k_dst < end.z; ++k_dst) {
|
for (int k_dst = start.z; k_dst < end.z; ++k_dst) {
|
||||||
for (int j_dst = start.y; j_dst < end.y; ++j_dst) {
|
for (int j_dst = start.y; j_dst < end.y; ++j_dst) {
|
||||||
for (int i_dst = start.x; i_dst < end.x; ++i_dst) {
|
for (int i_dst = start.x; i_dst < end.x; ++i_dst) {
|
||||||
|
|
||||||
// If destination index is inside the computational domain, return since
|
// If destination index is inside the computational domain, return since
|
||||||
// the boundary conditions are only applied to the ghost zones
|
// the boundary conditions are only applied to the ghost zones
|
||||||
if (i_dst >= nx_min && i_dst < nx_max &&
|
if (i_dst >= nx_min && i_dst < nx_max && j_dst >= ny_min && j_dst < ny_max &&
|
||||||
j_dst >= ny_min && j_dst < ny_max &&
|
k_dst >= nz_min && k_dst < nz_max)
|
||||||
k_dst >= nz_min && k_dst < nz_max)
|
continue;
|
||||||
continue;
|
|
||||||
|
|
||||||
// Find the source index
|
// Find the source index
|
||||||
// Map to nx, ny, nz coordinates
|
// Map to nx, ny, nz coordinates
|
||||||
int i_src = i_dst - nx_min;
|
int i_src = i_dst - nx_min;
|
||||||
int j_src = j_dst - ny_min;
|
int j_src = j_dst - ny_min;
|
||||||
int k_src = k_dst - nz_min;
|
int k_src = k_dst - nz_min;
|
||||||
|
|
||||||
// Translate (s.t. the index is always positive)
|
// Translate (s.t. the index is always positive)
|
||||||
i_src += nx;
|
i_src += nx;
|
||||||
j_src += ny;
|
j_src += ny;
|
||||||
k_src += nz;
|
k_src += nz;
|
||||||
|
|
||||||
// Wrap
|
// Wrap
|
||||||
i_src %= nx;
|
i_src %= nx;
|
||||||
j_src %= ny;
|
j_src %= ny;
|
||||||
k_src %= nz;
|
k_src %= nz;
|
||||||
|
|
||||||
// Map to mx, my, mz coordinates
|
// Map to mx, my, mz coordinates
|
||||||
i_src += nx_min;
|
i_src += nx_min;
|
||||||
j_src += ny_min;
|
j_src += ny_min;
|
||||||
k_src += nz_min;
|
k_src += nz_min;
|
||||||
|
|
||||||
const size_t src_idx = acVertexBufferIdx(i_src, j_src, k_src, mesh_info);
|
const size_t src_idx = acVertexBufferIdx(i_src, j_src, k_src, mesh_info);
|
||||||
const size_t dst_idx = acVertexBufferIdx(i_dst, j_dst, k_dst, mesh_info);
|
const size_t dst_idx = acVertexBufferIdx(i_dst, j_dst, k_dst, mesh_info);
|
||||||
ERRCHK(src_idx < acVertexBufferSize(mesh_info));
|
ERRCHK(src_idx < acVertexBufferSize(mesh_info));
|
||||||
ERRCHK(dst_idx < acVertexBufferSize(mesh_info));
|
ERRCHK(dst_idx < acVertexBufferSize(mesh_info));
|
||||||
mesh->vertex_buffer[w][dst_idx] = mesh->vertex_buffer[w][src_idx];
|
mesh->vertex_buffer[w][dst_idx] = mesh->vertex_buffer[w][src_idx];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -25,7 +25,7 @@
|
|||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
#pragma once
|
#pragma once
|
||||||
#include "core/errchk.h"
|
#include "src/core/errchk.h"
|
||||||
|
|
||||||
typedef long double MODEL_REAL;
|
typedef long double MODEL_REAL;
|
||||||
|
|
||||||
|
@@ -28,7 +28,7 @@
|
|||||||
|
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
|
|
||||||
#include "core/errchk.h"
|
#include "src/core/errchk.h"
|
||||||
|
|
||||||
// Function pointer definitions
|
// Function pointer definitions
|
||||||
typedef ModelScalar (*ReduceFunc)(const ModelScalar&, const ModelScalar&);
|
typedef ModelScalar (*ReduceFunc)(const ModelScalar&, const ModelScalar&);
|
||||||
|
@@ -32,8 +32,8 @@
|
|||||||
#include <string.h> // memcpy
|
#include <string.h> // memcpy
|
||||||
|
|
||||||
#include "config_loader.h"
|
#include "config_loader.h"
|
||||||
#include "core/errchk.h"
|
#include "src/core/errchk.h"
|
||||||
#include "core/math_utils.h"
|
#include "src/core/math_utils.h"
|
||||||
#include "model/host_forcing.h"
|
#include "model/host_forcing.h"
|
||||||
#include "model/host_memory.h"
|
#include "model/host_memory.h"
|
||||||
#include "model/host_timestep.h"
|
#include "model/host_timestep.h"
|
||||||
@@ -430,7 +430,7 @@ run_renderer(void)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
#else // BUILD_RT_VISUALIZATION == 0
|
#else // BUILD_RT_VISUALIZATION == 0
|
||||||
#include "core/errchk.h"
|
#include "src/core/errchk.h"
|
||||||
int
|
int
|
||||||
run_renderer(void)
|
run_renderer(void)
|
||||||
{
|
{
|
||||||
|
@@ -27,8 +27,8 @@
|
|||||||
#include "run.h"
|
#include "run.h"
|
||||||
|
|
||||||
#include "config_loader.h"
|
#include "config_loader.h"
|
||||||
#include "core/errchk.h"
|
#include "src/core/errchk.h"
|
||||||
#include "core/math_utils.h"
|
#include "src/core/math_utils.h"
|
||||||
#include "model/host_forcing.h"
|
#include "model/host_forcing.h"
|
||||||
#include "model/host_memory.h"
|
#include "model/host_memory.h"
|
||||||
#include "model/host_timestep.h"
|
#include "model/host_timestep.h"
|
||||||
|
Reference in New Issue
Block a user