Merge branch 'master' into sink_20190723
Hopefully the merge isssues were resolved.
This commit is contained in:
@@ -36,6 +36,21 @@ const char* vtxbuf_names[] = {AC_FOR_VTXBUF_HANDLES(AC_GEN_STR)};
|
||||
static const int num_nodes = 1;
|
||||
static Node nodes[num_nodes];
|
||||
|
||||
void
|
||||
acPrintMeshInfo(const AcMeshInfo config)
|
||||
{
|
||||
for (int i = 0; i < NUM_INT_PARAMS; ++i)
|
||||
printf("[%s]: %d\n", intparam_names[i], config.int_params[i]);
|
||||
for (int i = 0; i < NUM_INT3_PARAMS; ++i)
|
||||
printf("[%s]: (%d, %d, %d)\n", int3param_names[i], config.int3_params[i].x,
|
||||
config.int3_params[i].y, config.int3_params[i].z);
|
||||
for (int i = 0; i < NUM_REAL_PARAMS; ++i)
|
||||
printf("[%s]: %g\n", realparam_names[i], double(config.real_params[i]));
|
||||
for (int i = 0; i < NUM_REAL3_PARAMS; ++i)
|
||||
printf("[%s]: (%g, %g, %g)\n", real3param_names[i], double(config.real3_params[i].x),
|
||||
double(config.real3_params[i].y), double(config.real3_params[i].z));
|
||||
}
|
||||
|
||||
AcResult
|
||||
acInit(const AcMeshInfo mesh_info)
|
||||
{
|
||||
|
||||
@@ -39,35 +39,54 @@ typedef struct {
|
||||
AcReal* out[NUM_VTXBUF_HANDLES];
|
||||
} VertexBufferArray;
|
||||
|
||||
struct device_s {
|
||||
int id;
|
||||
AcMeshInfo local_config;
|
||||
|
||||
// Concurrency
|
||||
cudaStream_t streams[NUM_STREAM_TYPES];
|
||||
|
||||
// Memory
|
||||
VertexBufferArray vba;
|
||||
AcReal* reduce_scratchpad;
|
||||
AcReal* reduce_result;
|
||||
|
||||
#if PACKED_DATA_TRANSFERS
|
||||
// Declare memory for buffers needed for packed data transfers here
|
||||
// AcReal* data_packing_buffer;
|
||||
#endif
|
||||
};
|
||||
|
||||
__constant__ AcMeshInfo d_mesh_info;
|
||||
static inline int __device__
|
||||
static int __device__ __forceinline__
|
||||
DCONST(const AcIntParam param)
|
||||
{
|
||||
return d_mesh_info.int_params[param];
|
||||
}
|
||||
static inline int3 __device__
|
||||
static int3 __device__ __forceinline__
|
||||
DCONST(const AcInt3Param param)
|
||||
{
|
||||
return d_mesh_info.int3_params[param];
|
||||
}
|
||||
static inline AcReal __device__
|
||||
static AcReal __device__ __forceinline__
|
||||
DCONST(const AcRealParam param)
|
||||
{
|
||||
return d_mesh_info.real_params[param];
|
||||
}
|
||||
static inline AcReal3 __device__
|
||||
static AcReal3 __device__ __forceinline__
|
||||
DCONST(const AcReal3Param param)
|
||||
{
|
||||
return d_mesh_info.real3_params[param];
|
||||
}
|
||||
constexpr VertexBufferHandle
|
||||
DCONST(const VertexBufferHandle handle)
|
||||
{
|
||||
return handle;
|
||||
}
|
||||
#define DCONST_INT(x) DCONST(x)
|
||||
#define DCONST_INT3(x) DCONST(x)
|
||||
#define DCONST_REAL(x) DCONST(x)
|
||||
#define DCONST_REAL3(x) DCONST(x)
|
||||
//#define DCONST_INT(X) (d_mesh_info.int_params[X])
|
||||
//#define DCONST_INT3(X) (d_mesh_info.int3_params[X])
|
||||
//#define DCONST_REAL(X) (d_mesh_info.real_params[X])
|
||||
//#define DCONST_REAL3(X) (d_mesh_info.real3_params[X])
|
||||
#define DEVICE_VTXBUF_IDX(i, j, k) ((i) + (j)*DCONST_INT(AC_mx) + (k)*DCONST_INT(AC_mxy))
|
||||
#define DEVICE_1D_COMPDOMAIN_IDX(i, j, k) ((i) + (j)*DCONST_INT(AC_nx) + (k)*DCONST_INT(AC_nxy))
|
||||
#define globalGridN (d_mesh_info.int3_params[AC_global_grid_n])
|
||||
@@ -88,26 +107,8 @@ static dim3 rk3_tpb(32, 1, 4);
|
||||
// #include "kernels/pack_unpack.cuh"
|
||||
#endif
|
||||
|
||||
struct device_s {
|
||||
int id;
|
||||
AcMeshInfo local_config;
|
||||
|
||||
// Concurrency
|
||||
cudaStream_t streams[NUM_STREAM_TYPES];
|
||||
|
||||
// Memory
|
||||
VertexBufferArray vba;
|
||||
AcReal* reduce_scratchpad;
|
||||
AcReal* reduce_result;
|
||||
|
||||
#if PACKED_DATA_TRANSFERS
|
||||
// Declare memory for buffers needed for packed data transfers here
|
||||
// AcReal* data_packing_buffer;
|
||||
#endif
|
||||
};
|
||||
|
||||
// clang-format off
|
||||
static __global__ void dummy_kernel(void) {}
|
||||
static __global__ void dummy_kernel(void) { DCONST((AcIntParam)0); DCONST((AcInt3Param)0); DCONST((AcRealParam)0); DCONST((AcReal3Param)0); }
|
||||
// clang-format on
|
||||
|
||||
AcResult
|
||||
@@ -153,8 +154,7 @@ acDeviceCreate(const int id, const AcMeshInfo device_config, Device* device_hand
|
||||
#endif
|
||||
|
||||
// Device constants
|
||||
ERRCHK_CUDA_ALWAYS(cudaMemcpyToSymbol(d_mesh_info, &device_config, sizeof(device_config), 0,
|
||||
cudaMemcpyHostToDevice));
|
||||
acDeviceLoadMeshInfo(device, STREAM_DEFAULT, device_config);
|
||||
|
||||
printf("Created device %d (%p)\n", device->id, device);
|
||||
*device_handle = device;
|
||||
@@ -303,8 +303,9 @@ acDeviceAutoOptimize(const Device device)
|
||||
|
||||
cudaEventRecord(tstart); // ---------------------------------------- Timing start
|
||||
|
||||
acDeviceLoadScalarConstant(device, STREAM_DEFAULT, AC_dt, FLT_EPSILON);
|
||||
for (int i = 0; i < num_iterations; ++i)
|
||||
solve<2><<<bpg, tpb>>>(start, end, device->vba, FLT_EPSILON);
|
||||
solve<2><<<bpg, tpb>>>(start, end, device->vba);
|
||||
|
||||
cudaEventRecord(tstop); // ----------------------------------------- Timing end
|
||||
cudaEventSynchronize(tstop);
|
||||
@@ -361,8 +362,8 @@ acDeviceSwapBuffers(const Device device)
|
||||
}
|
||||
|
||||
AcResult
|
||||
acDeviceLoadConstant(const Device device, const Stream stream, const AcRealParam param,
|
||||
const AcReal value)
|
||||
acDeviceLoadScalarConstant(const Device device, const Stream stream, const AcRealParam param,
|
||||
const AcReal value)
|
||||
{
|
||||
cudaSetDevice(device->id);
|
||||
const size_t offset = (size_t)&d_mesh_info.real_params[param] - (size_t)&d_mesh_info;
|
||||
@@ -371,6 +372,55 @@ acDeviceLoadConstant(const Device device, const Stream stream, const AcRealParam
|
||||
return AC_SUCCESS;
|
||||
}
|
||||
|
||||
AcResult
|
||||
acDeviceLoadVectorConstant(const Device device, const Stream stream, const AcReal3Param param,
|
||||
const AcReal3 value)
|
||||
{
|
||||
cudaSetDevice(device->id);
|
||||
const size_t offset = (size_t)&d_mesh_info.real3_params[param] - (size_t)&d_mesh_info;
|
||||
ERRCHK_CUDA(cudaMemcpyToSymbolAsync(d_mesh_info, &value, sizeof(value), offset,
|
||||
cudaMemcpyHostToDevice, device->streams[stream]));
|
||||
return AC_SUCCESS;
|
||||
}
|
||||
|
||||
AcResult
|
||||
acDeviceLoadIntConstant(const Device device, const Stream stream, const AcIntParam param,
|
||||
const int value)
|
||||
{
|
||||
cudaSetDevice(device->id);
|
||||
const size_t offset = (size_t)&d_mesh_info.int_params[param] - (size_t)&d_mesh_info;
|
||||
ERRCHK_CUDA(cudaMemcpyToSymbolAsync(d_mesh_info, &value, sizeof(value), offset,
|
||||
cudaMemcpyHostToDevice, device->streams[stream]));
|
||||
return AC_SUCCESS;
|
||||
}
|
||||
|
||||
AcResult
|
||||
acDeviceLoadInt3Constant(const Device device, const Stream stream, const AcInt3Param param,
|
||||
const int3 value)
|
||||
{
|
||||
cudaSetDevice(device->id);
|
||||
const size_t offset = (size_t)&d_mesh_info.int3_params[param] - (size_t)&d_mesh_info;
|
||||
ERRCHK_CUDA(cudaMemcpyToSymbolAsync(d_mesh_info, &value, sizeof(value), offset,
|
||||
cudaMemcpyHostToDevice, device->streams[stream]));
|
||||
return AC_SUCCESS;
|
||||
}
|
||||
|
||||
AcResult
|
||||
acDeviceLoadMeshInfo(const Device device, const Stream stream, const AcMeshInfo device_config)
|
||||
{
|
||||
cudaSetDevice(device->id);
|
||||
|
||||
ERRCHK_ALWAYS(device_config.int_params[AC_nx] == device->local_config.int_params[AC_nx]);
|
||||
ERRCHK_ALWAYS(device_config.int_params[AC_ny] == device->local_config.int_params[AC_ny]);
|
||||
ERRCHK_ALWAYS(device_config.int_params[AC_nz] == device->local_config.int_params[AC_nz]);
|
||||
ERRCHK_ALWAYS(device_config.int_params[AC_multigpu_offset] ==
|
||||
device->local_config.int_params[AC_multigpu_offset]);
|
||||
|
||||
ERRCHK_CUDA_ALWAYS(cudaMemcpyToSymbolAsync(d_mesh_info, &device_config, sizeof(device_config),
|
||||
0, cudaMemcpyHostToDevice, device->streams[stream]));
|
||||
return AC_SUCCESS;
|
||||
}
|
||||
|
||||
AcResult
|
||||
acDeviceLoadVertexBufferWithOffset(const Device device, const Stream stream, const AcMesh host_mesh,
|
||||
const VertexBufferHandle vtxbuf_handle, const int3 src,
|
||||
@@ -551,12 +601,13 @@ acDeviceIntegrateSubstep(const Device device, const Stream stream, const int ste
|
||||
(unsigned int)ceil(n.y / AcReal(tpb.y)), //
|
||||
(unsigned int)ceil(n.z / AcReal(tpb.z)));
|
||||
|
||||
acDeviceLoadScalarConstant(device, stream, AC_dt, dt);
|
||||
if (step_number == 0)
|
||||
solve<0><<<bpg, tpb, 0, device->streams[stream]>>>(start, end, device->vba, dt);
|
||||
solve<0><<<bpg, tpb, 0, device->streams[stream]>>>(start, end, device->vba);
|
||||
else if (step_number == 1)
|
||||
solve<1><<<bpg, tpb, 0, device->streams[stream]>>>(start, end, device->vba, dt);
|
||||
solve<1><<<bpg, tpb, 0, device->streams[stream]>>>(start, end, device->vba);
|
||||
else
|
||||
solve<2><<<bpg, tpb, 0, device->streams[stream]>>>(start, end, device->vba, dt);
|
||||
solve<2><<<bpg, tpb, 0, device->streams[stream]>>>(start, end, device->vba);
|
||||
|
||||
ERRCHK_CUDA_KERNEL();
|
||||
|
||||
|
||||
@@ -26,6 +26,8 @@
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include "src/core/math_utils.h"
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
static __device__ __forceinline__ int
|
||||
@@ -321,65 +323,6 @@ read_data(const int i, const int j, const int k,
|
||||
* =============================================================================
|
||||
*/
|
||||
|
||||
static __host__ __device__ __forceinline__ AcReal3
|
||||
operator-(const AcReal3& a, const AcReal3& b)
|
||||
{
|
||||
return (AcReal3){a.x - b.x, a.y - b.y, a.z - b.z};
|
||||
}
|
||||
|
||||
static __host__ __device__ __forceinline__ AcReal3
|
||||
operator+(const AcReal3& a, const AcReal3& b)
|
||||
{
|
||||
return (AcReal3){a.x + b.x, a.y + b.y, a.z + b.z};
|
||||
}
|
||||
|
||||
static __host__ __device__ __forceinline__ AcReal3
|
||||
operator-(const AcReal3& a)
|
||||
{
|
||||
return (AcReal3){-a.x, -a.y, -a.z};
|
||||
}
|
||||
|
||||
static __host__ __device__ __forceinline__ AcReal3 operator*(const AcReal a, const AcReal3& b)
|
||||
{
|
||||
return (AcReal3){a * b.x, a * b.y, a * b.z};
|
||||
}
|
||||
|
||||
static __host__ __device__ __forceinline__ AcReal
|
||||
dot(const AcReal3& a, const AcReal3& b)
|
||||
{
|
||||
return a.x * b.x + a.y * b.y + a.z * b.z;
|
||||
}
|
||||
|
||||
static __host__ __device__ __forceinline__ AcReal3
|
||||
mul(const AcMatrix& aa, const AcReal3& x)
|
||||
{
|
||||
return (AcReal3){dot(aa.row[0], x), dot(aa.row[1], x), dot(aa.row[2], x)};
|
||||
}
|
||||
|
||||
static __host__ __device__ __forceinline__ AcReal3
|
||||
cross(const AcReal3& a, const AcReal3& b)
|
||||
{
|
||||
AcReal3 c;
|
||||
|
||||
c.x = a.y * b.z - a.z * b.y;
|
||||
c.y = a.z * b.x - a.x * b.z;
|
||||
c.z = a.x * b.y - a.y * b.x;
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
static __host__ __device__ __forceinline__ bool
|
||||
is_valid(const AcReal& a)
|
||||
{
|
||||
return !isnan(a) && !isinf(a);
|
||||
}
|
||||
|
||||
static __host__ __device__ __forceinline__ bool
|
||||
is_valid(const AcReal3& a)
|
||||
{
|
||||
return is_valid(a.x) && is_valid(a.y) && is_valid(a.z);
|
||||
}
|
||||
|
||||
/*
|
||||
* =============================================================================
|
||||
* Level 1 (Stencil Processing Stage)
|
||||
@@ -642,4 +585,54 @@ read_out(const int idx, AcReal* __restrict__ field[], const int3 handle)
|
||||
\
|
||||
const int idx = IDX(vertexIdx.x, vertexIdx.y, vertexIdx.z);
|
||||
|
||||
// clang-format off
|
||||
#define GEN_DEVICE_FUNC_HOOK(identifier) \
|
||||
template <int step_number> \
|
||||
AcResult acDeviceKernel_##identifier(const Device device, const Stream stream, \
|
||||
const int3 start, const int3 end) \
|
||||
{ \
|
||||
cudaSetDevice(device->id); \
|
||||
\
|
||||
const dim3 tpb(32, 1, 4); \
|
||||
\
|
||||
const int3 n = end - start; \
|
||||
const dim3 bpg((unsigned int)ceil(n.x / AcReal(tpb.x)), \
|
||||
(unsigned int)ceil(n.y / AcReal(tpb.y)), \
|
||||
(unsigned int)ceil(n.z / AcReal(tpb.z))); \
|
||||
\
|
||||
identifier<step_number> \
|
||||
<<<bpg, tpb, 0, device->streams[stream]>>>(start, end, device->vba); \
|
||||
ERRCHK_CUDA_KERNEL(); \
|
||||
\
|
||||
return AC_SUCCESS; \
|
||||
}
|
||||
|
||||
/*
|
||||
#define GEN_NODE_FUNC_HOOK(identifier) \
|
||||
template <int step_number> \
|
||||
AcResult acNodeKernel_##identifier(const Node node, const Stream stream, const int3 start, \
|
||||
const int3 end) \
|
||||
{ \
|
||||
acNodeSynchronizeStream(node, stream); \
|
||||
\
|
||||
for (int i = 0; i < node->num_devices; ++i) { \
|
||||
\
|
||||
const int3 d0 = (int3){NGHOST, NGHOST, NGHOST + i * node->subgrid.n.z}; \
|
||||
const int3 d1 = d0 + (int3){node->subgrid.n.x, node->subgrid.n.y, node->subgrid.n.z}; \
|
||||
\
|
||||
const int3 da = max(start, d0); \
|
||||
const int3 db = min(end, d1); \
|
||||
\
|
||||
if (db.z >= da.z) { \
|
||||
const int3 da_local = da - (int3){0, 0, i * node->subgrid.n.z}; \
|
||||
const int3 db_local = db - (int3){0, 0, i * node->subgrid.n.z}; \
|
||||
acDeviceKernel_ #identifier(node->devices[i], stream, isubstep, da_local, \
|
||||
db_local, dt); \
|
||||
} \
|
||||
} \
|
||||
return AC_SUCCESS; \
|
||||
}
|
||||
*/
|
||||
// clang-format on
|
||||
|
||||
#include "stencil_process.cuh"
|
||||
|
||||
@@ -25,10 +25,10 @@
|
||||
*
|
||||
*/
|
||||
#pragma once
|
||||
#include <cmath>
|
||||
using namespace std; // Potentially bad practice to declare namespace std here
|
||||
// #include <math.h> // isnan, isinf // Overloads incorrect/bugged with GCC <= 6.0
|
||||
// #include <tgmath.h> // Even this does not work
|
||||
//#include <cmath>
|
||||
// using namespace std; // Potentially bad practice to declare namespace std here
|
||||
#include <math.h> // isnan, isinf // Overloads incorrect/bugged with GCC <= 6.0
|
||||
//#include <tgmath.h> // Even this does not work
|
||||
#include <stdlib.h> // rand
|
||||
|
||||
template <class T>
|
||||
@@ -64,16 +64,6 @@ sum(const T& a, const T& b)
|
||||
return a + b;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
static inline const T
|
||||
is_valid(const T& val)
|
||||
{
|
||||
if (isnan(val) || isinf(val))
|
||||
return false;
|
||||
else
|
||||
return true;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
static inline const T
|
||||
clamp(const T& val, const T& min, const T& max)
|
||||
@@ -87,20 +77,85 @@ randr()
|
||||
return AcReal(rand()) / AcReal(RAND_MAX);
|
||||
}
|
||||
|
||||
static inline int3
|
||||
operator+(const int3& a, const int3& b)
|
||||
{
|
||||
return (int3){a.x + b.x, a.y + b.y, a.z + b.z};
|
||||
}
|
||||
|
||||
static inline int3
|
||||
operator-(const int3& a, const int3& b)
|
||||
{
|
||||
return (int3){a.x - b.x, a.y - b.y, a.z - b.z};
|
||||
}
|
||||
|
||||
static inline bool
|
||||
is_power_of_two(const unsigned val)
|
||||
{
|
||||
return val && !(val & (val - 1));
|
||||
}
|
||||
|
||||
#ifdef __CUDACC__
|
||||
#define HOST_DEVICE_INLINE __host__ __device__ __forceinline__
|
||||
#else
|
||||
#define HOST_DEVICE_INLINE inline
|
||||
#endif // __CUDACC__
|
||||
|
||||
static HOST_DEVICE_INLINE AcReal3
|
||||
operator+(const AcReal3& a, const AcReal3& b)
|
||||
{
|
||||
return (AcReal3){a.x + b.x, a.y + b.y, a.z + b.z};
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE int3
|
||||
operator+(const int3& a, const int3& b)
|
||||
{
|
||||
return (int3){a.x + b.x, a.y + b.y, a.z + b.z};
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE AcReal3
|
||||
operator-(const AcReal3& a, const AcReal3& b)
|
||||
{
|
||||
return (AcReal3){a.x - b.x, a.y - b.y, a.z - b.z};
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE int3
|
||||
operator-(const int3& a, const int3& b)
|
||||
{
|
||||
return (int3){a.x - b.x, a.y - b.y, a.z - b.z};
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE AcReal3
|
||||
operator-(const AcReal3& a)
|
||||
{
|
||||
return (AcReal3){-a.x, -a.y, -a.z};
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE AcReal3 operator*(const AcReal& a, const AcReal3& b)
|
||||
{
|
||||
return (AcReal3){a * b.x, a * b.y, a * b.z};
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE AcReal
|
||||
dot(const AcReal3& a, const AcReal3& b)
|
||||
{
|
||||
return a.x * b.x + a.y * b.y + a.z * b.z;
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE AcReal3
|
||||
mul(const AcMatrix& aa, const AcReal3& x)
|
||||
{
|
||||
return (AcReal3){dot(aa.row[0], x), dot(aa.row[1], x), dot(aa.row[2], x)};
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE AcReal3
|
||||
cross(const AcReal3& a, const AcReal3& b)
|
||||
{
|
||||
AcReal3 c;
|
||||
|
||||
c.x = a.y * b.z - a.z * b.y;
|
||||
c.y = a.z * b.x - a.x * b.z;
|
||||
c.z = a.x * b.y - a.y * b.x;
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE bool
|
||||
is_valid(const AcReal a)
|
||||
{
|
||||
return !isnan(a) && !isinf(a);
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE bool
|
||||
is_valid(const AcReal3& a)
|
||||
{
|
||||
return is_valid(a.x) && is_valid(a.y) && is_valid(a.z);
|
||||
}
|
||||
|
||||
@@ -429,7 +429,7 @@ acNodeLoadConstant(const Node node, const Stream stream, const AcRealParam param
|
||||
acNodeSynchronizeStream(node, stream);
|
||||
// #pragma omp parallel for
|
||||
for (int i = 0; i < node->num_devices; ++i) {
|
||||
acDeviceLoadConstant(node->devices[i], stream, param, value);
|
||||
acDeviceLoadScalarConstant(node->devices[i], stream, param, value);
|
||||
}
|
||||
return AC_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -8,5 +8,5 @@ set(CMAKE_C_STANDARD_REQUIRED ON)
|
||||
find_package(MPI REQUIRED)
|
||||
|
||||
add_executable(mpitest main.c)
|
||||
target_include_directories(mpitest PRIVATE ${MPI_C_INCLUDE_PATH})
|
||||
target_link_libraries(mpitest PRIVATE ${MPI_C_LIBRARIES} astaroth_core)
|
||||
target_include_directories(mpitest PRIVATE ${CMAKE_SOURCE_DIR}/src/standalone ${MPI_C_INCLUDE_PATH})
|
||||
target_link_libraries(mpitest astaroth_core astaroth_standalone ${MPI_C_LIBRARIES})
|
||||
|
||||
@@ -16,13 +16,120 @@
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
/**
|
||||
Running: mpirun -np <num processes> <executable>
|
||||
*/
|
||||
#undef NDEBUG // Assert always
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "astaroth.h"
|
||||
#include "autotest.h"
|
||||
|
||||
#include <mpi.h>
|
||||
|
||||
// From Astaroth Standalone
|
||||
#include "config_loader.h"
|
||||
#include "model/host_memory.h"
|
||||
|
||||
static void
|
||||
distribute_mesh(const AcMesh* src, AcMesh* dst)
|
||||
{
|
||||
MPI_Datatype datatype = MPI_FLOAT;
|
||||
if (sizeof(AcReal) == 8)
|
||||
datatype = MPI_DOUBLE;
|
||||
|
||||
int process_id, num_processes;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &process_id);
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &num_processes);
|
||||
|
||||
const size_t count = acVertexBufferSize(dst->info);
|
||||
for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) {
|
||||
|
||||
// Communicate to self
|
||||
if (process_id == 0) {
|
||||
assert(src);
|
||||
assert(dst);
|
||||
memcpy(&dst->vertex_buffer[i][0], //
|
||||
&src->vertex_buffer[i][0], //
|
||||
count * sizeof(src->vertex_buffer[i][0]));
|
||||
}
|
||||
// Communicate to others
|
||||
for (int j = 1; j < num_processes; ++j) {
|
||||
if (process_id == 0) {
|
||||
assert(src);
|
||||
|
||||
// Send
|
||||
// TODO RECHECK THESE j INDICES
|
||||
const size_t src_idx = j * dst->info.int_params[AC_mx] *
|
||||
dst->info.int_params[AC_my] * src->info.int_params[AC_nz] /
|
||||
num_processes;
|
||||
|
||||
MPI_Send(&src->vertex_buffer[i][src_idx], count, datatype, j, 0, MPI_COMM_WORLD);
|
||||
}
|
||||
else {
|
||||
assert(dst);
|
||||
|
||||
// Recv
|
||||
const size_t dst_idx = 0;
|
||||
MPI_Status status;
|
||||
MPI_Recv(&dst->vertex_buffer[i][dst_idx], count, datatype, 0, 0, MPI_COMM_WORLD,
|
||||
&status);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
gather_mesh(const AcMesh* src, AcMesh* dst)
|
||||
{
|
||||
MPI_Datatype datatype = MPI_FLOAT;
|
||||
if (sizeof(AcReal) == 8)
|
||||
datatype = MPI_DOUBLE;
|
||||
|
||||
int process_id, num_processes;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &process_id);
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &num_processes);
|
||||
|
||||
size_t count = acVertexBufferSize(src->info);
|
||||
|
||||
for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) {
|
||||
// Communicate to self
|
||||
if (process_id == 0) {
|
||||
assert(src);
|
||||
assert(dst);
|
||||
memcpy(&dst->vertex_buffer[i][0], //
|
||||
&src->vertex_buffer[i][0], //
|
||||
count * sizeof(AcReal));
|
||||
}
|
||||
|
||||
// Communicate to others
|
||||
for (int j = 1; j < num_processes; ++j) {
|
||||
if (process_id == 0) {
|
||||
// Recv
|
||||
// const size_t dst_idx = j * acVertexBufferCompdomainSize(dst->info);
|
||||
const size_t dst_idx = j * dst->info.int_params[AC_mx] *
|
||||
dst->info.int_params[AC_my] * dst->info.int_params[AC_nz] /
|
||||
num_processes;
|
||||
|
||||
assert(dst_idx + count <= acVertexBufferSize(dst->info));
|
||||
MPI_Status status;
|
||||
MPI_Recv(&dst->vertex_buffer[i][dst_idx], count, datatype, j, 0, MPI_COMM_WORLD,
|
||||
&status);
|
||||
}
|
||||
else {
|
||||
// Send
|
||||
const size_t src_idx = 0;
|
||||
|
||||
assert(src_idx + count <= acVertexBufferSize(src->info));
|
||||
MPI_Send(&src->vertex_buffer[i][src_idx], count, datatype, 0, 0, MPI_COMM_WORLD);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
main(void)
|
||||
{
|
||||
@@ -37,14 +144,39 @@ main(void)
|
||||
MPI_Get_processor_name(processor_name, &name_len);
|
||||
printf("Processor %s. Process %d of %d.\n", processor_name, process_id, num_processes);
|
||||
|
||||
AcMeshInfo info = {
|
||||
.int_params[AC_nx] = 128,
|
||||
.int_params[AC_ny] = 64,
|
||||
.int_params[AC_nz] = 32,
|
||||
};
|
||||
acInit(info);
|
||||
acIntegrate(0.1f);
|
||||
acQuit();
|
||||
AcMeshInfo mesh_info;
|
||||
load_config(&mesh_info);
|
||||
update_config(&mesh_info);
|
||||
|
||||
AcMesh* main_mesh = NULL;
|
||||
ModelMesh* model_mesh = NULL;
|
||||
if (process_id == 0) {
|
||||
main_mesh = acmesh_create(mesh_info);
|
||||
acmesh_init_to(INIT_TYPE_RANDOM, main_mesh);
|
||||
model_mesh = modelmesh_create(mesh_info);
|
||||
acmesh_to_modelmesh(*main_mesh, model_mesh);
|
||||
}
|
||||
|
||||
AcMeshInfo submesh_info = mesh_info;
|
||||
submesh_info.int_params[AC_nz] /= num_processes;
|
||||
update_config(&submesh_info);
|
||||
|
||||
AcMesh* submesh = acmesh_create(submesh_info);
|
||||
|
||||
/////////////////////
|
||||
distribute_mesh(main_mesh, submesh);
|
||||
gather_mesh(submesh, main_mesh);
|
||||
/////////////////////////
|
||||
// Autotest
|
||||
bool is_acceptable = verify_meshes(*model_mesh, *main_mesh);
|
||||
/////
|
||||
|
||||
acmesh_destroy(submesh);
|
||||
|
||||
if (process_id == 0) {
|
||||
modelmesh_destroy(model_mesh);
|
||||
acmesh_destroy(main_mesh);
|
||||
}
|
||||
|
||||
MPI_Finalize();
|
||||
return EXIT_SUCCESS;
|
||||
|
||||
@@ -25,10 +25,11 @@ add_compile_options(-pipe ${OpenMP_CXX_FLAGS})
|
||||
add_compile_options(-Wall -Wextra -Werror -Wdouble-promotion -Wfloat-conversion)# -Wshadow)
|
||||
|
||||
## Compile and link
|
||||
add_library(astaroth_standalone ${SOURCES})
|
||||
add_library(astaroth_standalone STATIC ${SOURCES})
|
||||
target_link_libraries(astaroth_standalone PRIVATE astaroth_core "${OpenMP_CXX_FLAGS}" ${SDL2_LIBRARY})
|
||||
|
||||
add_executable(ac_run main.cc)
|
||||
target_link_libraries(ac_run PRIVATE astaroth_standalone astaroth_core "${OpenMP_CXX_FLAGS}" ${SDL2_LIBRARY})
|
||||
target_link_libraries(ac_run PRIVATE astaroth_standalone)
|
||||
|
||||
# Define the config directory
|
||||
if (ALTER_CONF)
|
||||
|
||||
@@ -75,6 +75,12 @@ static const InitType test_cases[] = {INIT_TYPE_RANDOM, INIT_TYPE_XWAVE,
|
||||
INIT_TYPE_GAUSSIAN_RADIAL_EXPL, INIT_TYPE_ABC_FLOW};
|
||||
// #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
|
||||
|
||||
static inline bool
|
||||
is_valid(const ModelScalar a)
|
||||
{
|
||||
return !isnan(a) && !isinf(a);
|
||||
}
|
||||
|
||||
#if TEST_TYPE == \
|
||||
QUICK_TEST // REGULAR TEST START HERE
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
@@ -34,15 +34,6 @@
|
||||
#include "src/core/errchk.h"
|
||||
#include "src/core/math_utils.h"
|
||||
|
||||
static inline void
|
||||
print(const AcMeshInfo& config)
|
||||
{
|
||||
for (int i = 0; i < NUM_INT_PARAMS; ++i)
|
||||
printf("[%s]: %d\n", intparam_names[i], config.int_params[i]);
|
||||
for (int i = 0; i < NUM_REAL_PARAMS; ++i)
|
||||
printf("[%s]: %g\n", realparam_names[i], double(config.real_params[i]));
|
||||
}
|
||||
|
||||
/**
|
||||
\brief Find the index of the keyword in names
|
||||
\return Index in range 0...n if the keyword is in names. -1 if the keyword was
|
||||
@@ -163,7 +154,7 @@ update_config(AcMeshInfo* config)
|
||||
#if VERBOSE_PRINTING // Defined in astaroth.h
|
||||
printf("###############################################################\n");
|
||||
printf("Config dimensions recalculated:\n");
|
||||
print(*config);
|
||||
acPrintMeshInfo(*config);
|
||||
printf("###############################################################\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -26,7 +26,9 @@
|
||||
*/
|
||||
#include "host_forcing.h"
|
||||
|
||||
#include "src/core/math_utils.h"
|
||||
// #include "src/core/math_utils.h"
|
||||
#include <cmath>
|
||||
using namespace std;
|
||||
|
||||
// The is a wrapper for genering random numbers with a chosen system.
|
||||
AcReal
|
||||
@@ -36,7 +38,7 @@ get_random_number_01()
|
||||
return AcReal(rand()) / AcReal(RAND_MAX);
|
||||
}
|
||||
|
||||
AcReal3
|
||||
static AcReal3
|
||||
cross(const AcReal3& a, const AcReal3& b)
|
||||
{
|
||||
AcReal3 c;
|
||||
@@ -48,13 +50,13 @@ cross(const AcReal3& a, const AcReal3& b)
|
||||
return c;
|
||||
}
|
||||
|
||||
AcReal
|
||||
static AcReal
|
||||
dot(const AcReal3& a, const AcReal3& b)
|
||||
{
|
||||
return a.x * b.x + a.y * b.y + a.z * b.z;
|
||||
}
|
||||
|
||||
AcReal3
|
||||
static AcReal3
|
||||
vec_norm(const AcReal3& a)
|
||||
{
|
||||
AcReal3 c;
|
||||
@@ -67,7 +69,7 @@ vec_norm(const AcReal3& a)
|
||||
return c;
|
||||
}
|
||||
|
||||
AcReal3
|
||||
static AcReal3
|
||||
vec_multi_scal(const AcReal scal, const AcReal3& a)
|
||||
{
|
||||
AcReal3 c;
|
||||
|
||||
@@ -32,14 +32,6 @@
|
||||
|
||||
AcReal get_random_number_01();
|
||||
|
||||
AcReal3 cross(const AcReal3& a, const AcReal3& b);
|
||||
|
||||
AcReal dot(const AcReal3& a, const AcReal3& b);
|
||||
|
||||
AcReal3 vec_norm(const AcReal3& a);
|
||||
|
||||
AcReal3 vec_multi_scal(const AcReal scal, const AcReal3& a);
|
||||
|
||||
AcReal3 helical_forcing_k_generator(const AcReal kmax, const AcReal kmin);
|
||||
|
||||
void helical_forcing_e_generator(AcReal3* e_force, const AcReal3 k_force);
|
||||
|
||||
@@ -31,6 +31,16 @@
|
||||
#include "host_memory.h"
|
||||
#include "model_boundconds.h"
|
||||
|
||||
// Standalone flags
|
||||
#define LDENSITY (1)
|
||||
#define LHYDRO (1)
|
||||
#define LMAGNETIC (1)
|
||||
#define LENTROPY (1)
|
||||
#define LTEMPERATURE (0)
|
||||
#define LFORCING (1)
|
||||
#define LUPWD (1)
|
||||
#define AC_THERMAL_CONDUCTIVITY (AcReal(0.001)) // TODO: make an actual config parameter
|
||||
|
||||
typedef struct {
|
||||
ModelScalar x, y, z;
|
||||
} ModelVector;
|
||||
|
||||
@@ -27,6 +27,7 @@
|
||||
#pragma once
|
||||
#include "astaroth.h"
|
||||
|
||||
#include "math.h"
|
||||
|
||||
typedef long double ModelScalar;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user