Added stuff for the device layer for testing GPU-GPU MPI. This is a quick and dirty solution which is primarily meant for benchmarking/verification. Figuring out what the MPI interface should look like is more challenging and is not the priority right now
This commit is contained in:
@@ -144,7 +144,7 @@ AcResult acDeviceReduceVec(const Device device, const Stream stream_type, const
|
|||||||
const VertexBufferHandle vtxbuf2, AcReal* result);
|
const VertexBufferHandle vtxbuf2, AcReal* result);
|
||||||
|
|
||||||
/** */
|
/** */
|
||||||
AcResult acDeviceCommunicateHalosMPI(const Device device);
|
AcResult acDeviceRunMPITest(void);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
} // extern "C"
|
} // extern "C"
|
||||||
|
@@ -20,7 +20,6 @@ set(CUDA_ARCH_FLAGS -gencode arch=compute_37,code=sm_37
|
|||||||
# -Xptxas -dlcm=ca opt-in to cache all global loads to L1/texture cache
|
# -Xptxas -dlcm=ca opt-in to cache all global loads to L1/texture cache
|
||||||
# =cg to opt out
|
# =cg to opt out
|
||||||
|
|
||||||
|
|
||||||
set(CUDA_WARNING_FLAGS --compiler-options -Wall,-Wextra,-Werror,-Wdouble-promotion,-Wfloat-conversion) # -Wshadow
|
set(CUDA_WARNING_FLAGS --compiler-options -Wall,-Wextra,-Werror,-Wdouble-promotion,-Wfloat-conversion) # -Wshadow
|
||||||
|
|
||||||
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ${CUDA_ARCH_FLAGS} ${CUDA_WARNING_FLAGS})
|
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ${CUDA_ARCH_FLAGS} ${CUDA_WARNING_FLAGS})
|
||||||
@@ -32,9 +31,12 @@ if (MPI_ENABLED)
|
|||||||
|
|
||||||
add_definitions(-DAC_MPI_ENABLED=1)
|
add_definitions(-DAC_MPI_ENABLED=1)
|
||||||
cuda_include_directories(${MPI_C_INCLUDE_PATH})
|
cuda_include_directories(${MPI_C_INCLUDE_PATH})
|
||||||
|
|
||||||
|
add_definitions(-DAC_DEFAULT_CONFIG="${CMAKE_SOURCE_DIR}/config/astaroth.conf") # Hack, cmake doesnt propagate properly from utils. Probably because utils is compiled after core
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
## Create and link the library
|
## Create and link the library
|
||||||
|
set(CMAKE_POSITION_INDEPENDENT_CODE ON) # fpic for shared libraries
|
||||||
cuda_add_library(astaroth_core STATIC astaroth.cu device.cu node.cu)
|
cuda_add_library(astaroth_core STATIC astaroth.cu device.cu node.cu)
|
||||||
target_include_directories(astaroth_core PRIVATE .)
|
target_include_directories(astaroth_core PRIVATE .)
|
||||||
target_link_libraries(astaroth_core m)
|
target_link_libraries(astaroth_core m)
|
||||||
@@ -46,5 +48,5 @@ if (MULTIGPU_ENABLED)
|
|||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
if (MPI_ENABLED)
|
if (MPI_ENABLED)
|
||||||
target_link_libraries(astaroth_core ${MPI_C_LIBRARIES})
|
target_link_libraries(astaroth_core ${MPI_C_LIBRARIES} astaroth_utils)
|
||||||
endif ()
|
endif ()
|
||||||
|
@@ -760,10 +760,105 @@ acDeviceReduceVec(const Device device, const Stream stream, const ReductionType
|
|||||||
return AC_SUCCESS;
|
return AC_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// MPI tests
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
#if AC_MPI_ENABLED == 1
|
#if AC_MPI_ENABLED == 1
|
||||||
|
/**
|
||||||
|
Running: mpirun -np <num processes> <executable>
|
||||||
|
*/
|
||||||
#include <mpi.h>
|
#include <mpi.h>
|
||||||
|
|
||||||
|
static void
|
||||||
|
acDeviceDistributeMeshMPI(const AcMesh src, AcMesh* dst)
|
||||||
|
{
|
||||||
|
MPI_Barrier(MPI_COMM_WORLD);
|
||||||
|
printf("Distributing mesh...\n");
|
||||||
|
|
||||||
|
MPI_Datatype datatype = MPI_FLOAT;
|
||||||
|
if (sizeof(AcReal) == 8)
|
||||||
|
datatype = MPI_DOUBLE;
|
||||||
|
|
||||||
|
int pid, num_processes;
|
||||||
|
MPI_Comm_rank(MPI_COMM_WORLD, &pid);
|
||||||
|
MPI_Comm_size(MPI_COMM_WORLD, &num_processes);
|
||||||
|
|
||||||
|
const size_t count = acVertexBufferSize(dst->info);
|
||||||
|
for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) {
|
||||||
|
|
||||||
|
if (pid == 0) {
|
||||||
|
// Communicate to self
|
||||||
|
assert(dst);
|
||||||
|
memcpy(&dst->vertex_buffer[i][0], //
|
||||||
|
&src.vertex_buffer[i][0], //
|
||||||
|
count * sizeof(src.vertex_buffer[i][0]));
|
||||||
|
|
||||||
|
// Communicate to others
|
||||||
|
for (int j = 1; j < num_processes; ++j) {
|
||||||
|
const size_t src_idx = acVertexBufferIdx(
|
||||||
|
0, 0, j * src.info.int_params[AC_nz] / num_processes, src.info);
|
||||||
|
|
||||||
|
MPI_Send(&src.vertex_buffer[i][src_idx], count, datatype, j, 0, MPI_COMM_WORLD);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
assert(dst);
|
||||||
|
|
||||||
|
// Recv
|
||||||
|
const size_t dst_idx = 0;
|
||||||
|
MPI_Status status;
|
||||||
|
MPI_Recv(&dst->vertex_buffer[i][dst_idx], count, datatype, 0, 0, MPI_COMM_WORLD,
|
||||||
|
&status);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
acDeviceGatherMeshMPI(const AcMesh src, AcMesh* dst)
|
||||||
|
{
|
||||||
|
MPI_Barrier(MPI_COMM_WORLD);
|
||||||
|
printf("Gathering mesh...\n");
|
||||||
|
MPI_Datatype datatype = MPI_FLOAT;
|
||||||
|
if (sizeof(AcReal) == 8)
|
||||||
|
datatype = MPI_DOUBLE;
|
||||||
|
|
||||||
|
int pid, num_processes;
|
||||||
|
MPI_Comm_rank(MPI_COMM_WORLD, &pid);
|
||||||
|
MPI_Comm_size(MPI_COMM_WORLD, &num_processes);
|
||||||
|
|
||||||
|
size_t count = acVertexBufferSize(src.info);
|
||||||
|
|
||||||
|
for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) {
|
||||||
|
// Communicate to self
|
||||||
|
if (pid == 0) {
|
||||||
|
assert(dst);
|
||||||
|
memcpy(&dst->vertex_buffer[i][0], //
|
||||||
|
&src.vertex_buffer[i][0], //
|
||||||
|
count * sizeof(src.vertex_buffer[i][0]));
|
||||||
|
|
||||||
|
for (int j = 1; j < num_processes; ++j) {
|
||||||
|
// Recv
|
||||||
|
const size_t dst_idx = acVertexBufferIdx(
|
||||||
|
0, 0, j * dst->info.int_params[AC_nz] / num_processes, dst->info);
|
||||||
|
|
||||||
|
assert(dst_idx + count <= acVertexBufferSize(dst->info));
|
||||||
|
MPI_Status status;
|
||||||
|
MPI_Recv(&dst->vertex_buffer[i][dst_idx], count, datatype, j, 0, MPI_COMM_WORLD,
|
||||||
|
&status);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// Send
|
||||||
|
const size_t src_idx = 0;
|
||||||
|
|
||||||
|
assert(src_idx + count <= acVertexBufferSize(src.info));
|
||||||
|
MPI_Send(&src.vertex_buffer[i][src_idx], count, datatype, 0, 0, MPI_COMM_WORLD);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/** NOTE: Assumes 1 process per GPU */
|
/** NOTE: Assumes 1 process per GPU */
|
||||||
AcResult
|
static AcResult
|
||||||
acDeviceCommunicateHalosMPI(const Device device)
|
acDeviceCommunicateHalosMPI(const Device device)
|
||||||
{
|
{
|
||||||
MPI_Barrier(MPI_COMM_WORLD);
|
MPI_Barrier(MPI_COMM_WORLD);
|
||||||
@@ -819,12 +914,66 @@ acDeviceCommunicateHalosMPI(const Device device)
|
|||||||
}
|
}
|
||||||
return AC_SUCCESS;
|
return AC_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// From Astaroth Utils
|
||||||
|
#include "src/utils/config_loader.h"
|
||||||
|
#include "src/utils/memory.h"
|
||||||
|
#include "src/utils/verification.h"
|
||||||
|
|
||||||
|
AcResult
|
||||||
|
acDeviceRunMPITest(void)
|
||||||
|
{
|
||||||
|
int num_processes, pid;
|
||||||
|
MPI_Init(NULL, NULL);
|
||||||
|
MPI_Comm_size(MPI_COMM_WORLD, &num_processes);
|
||||||
|
MPI_Comm_rank(MPI_COMM_WORLD, &pid);
|
||||||
|
|
||||||
|
char processor_name[MPI_MAX_PROCESSOR_NAME];
|
||||||
|
int name_len;
|
||||||
|
MPI_Get_processor_name(processor_name, &name_len);
|
||||||
|
printf("Processor %s. Process %d of %d.\n", processor_name, pid, num_processes);
|
||||||
|
|
||||||
|
AcMeshInfo info;
|
||||||
|
acLoadConfig(AC_DEFAULT_CONFIG, &info);
|
||||||
|
|
||||||
|
AcMesh model, candidate, submesh;
|
||||||
|
|
||||||
|
// Master CPU
|
||||||
|
if (pid == 0) {
|
||||||
|
acMeshCreate(info, &model);
|
||||||
|
acMeshCreate(info, &candidate);
|
||||||
|
|
||||||
|
acMeshRandomize(&model);
|
||||||
|
acMeshApplyPeriodicBounds(&model);
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(info.int_params[AC_nz] % num_processes == 0);
|
||||||
|
|
||||||
|
AcMeshInfo submesh_info = info;
|
||||||
|
submesh_info.int_params[AC_nz] /= num_processes;
|
||||||
|
acUpdateConfig(&submesh_info);
|
||||||
|
acMeshCreate(submesh_info, &submesh);
|
||||||
|
|
||||||
|
acDeviceDistributeMeshMPI(model, &submesh);
|
||||||
|
acDeviceGatherMeshMPI(submesh, &candidate);
|
||||||
|
|
||||||
|
acMeshDestroy(&submesh);
|
||||||
|
|
||||||
|
// Master CPU
|
||||||
|
if (pid == 0) {
|
||||||
|
acVerifyMesh(model, candidate);
|
||||||
|
acMeshDestroy(&model);
|
||||||
|
acMeshDestroy(&candidate);
|
||||||
|
}
|
||||||
|
|
||||||
|
MPI_Finalize();
|
||||||
|
return AC_FAILURE;
|
||||||
|
}
|
||||||
#else
|
#else
|
||||||
AcResult
|
AcResult
|
||||||
acDeviceCommunicateHalosMPI(const Device device)
|
acDeviceRunMPITest(void)
|
||||||
{
|
{
|
||||||
(void)device;
|
WARNING("MPI was not enabled but acDeviceRunMPITest() was called");
|
||||||
WARNING("MPI was not enabled but acDeviceCommunicateHalosMPI() was called");
|
|
||||||
return AC_FAILURE;
|
return AC_FAILURE;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@@ -2,12 +2,8 @@
|
|||||||
## CMakeLists.txt for the MPI test ##
|
## CMakeLists.txt for the MPI test ##
|
||||||
##############################################
|
##############################################
|
||||||
|
|
||||||
set(CMAKE_C_STANDARD 11)
|
set(CMAKE_CXX_STANDARD 11)
|
||||||
set(CMAKE_C_STANDARD_REQUIRED ON)
|
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||||
|
|
||||||
find_package(MPI REQUIRED)
|
add_executable(mpitest main.cc)
|
||||||
|
target_link_libraries(mpitest astaroth_core)
|
||||||
add_executable(mpitest main.c)
|
|
||||||
target_include_directories(mpitest PRIVATE ${CMAKE_SOURCE_DIR}/src/standalone ${MPI_C_INCLUDE_PATH})
|
|
||||||
target_link_libraries(mpitest astaroth_core astaroth_utils ${MPI_C_LIBRARIES})
|
|
||||||
target_compile_definitions(mpitest PRIVATE -DAC_DEFAULT_CONFIG="${CMAKE_SOURCE_DIR}/config/astaroth.conf")
|
|
||||||
|
29
src/mpitest/main.cc
Normal file
29
src/mpitest/main.cc
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
/*
|
||||||
|
Copyright (C) 2014-2019, Johannes Pekkilae, Miikka Vaeisalae.
|
||||||
|
|
||||||
|
This file is part of Astaroth.
|
||||||
|
|
||||||
|
Astaroth is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation, either version 3 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
Astaroth is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
/**
|
||||||
|
Running: mpirun -np <num processes> <executable>
|
||||||
|
*/
|
||||||
|
#include "astaroth.h"
|
||||||
|
|
||||||
|
int
|
||||||
|
main(void)
|
||||||
|
{
|
||||||
|
acDeviceRunMPITest();
|
||||||
|
return EXIT_SUCCESS;
|
||||||
|
}
|
Reference in New Issue
Block a user