Squashed commit of the following:
commit 25e7cb77683736a588acb6b30a8ac89e2bd7f56a Author: Carl Pearson <pearson@illinois.edu> Date: Fri Sep 20 13:25:49 2019 -0500 automatically define PERFECT_HAS_CUDA with nvcc commit fcc699c165ba515619781aefb378d3c0c4d1093d Author: Carl Pearson <pearson@illinois.edu> Date: Fri Sep 20 13:18:42 2019 -0500 optional CUDA support
This commit is contained in:
23
.github/workflows/ccpp.yml
vendored
23
.github/workflows/ccpp.yml
vendored
@@ -38,3 +38,26 @@ jobs:
|
||||
g++ --version
|
||||
nvcc --version
|
||||
make VERBOSE=1
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v1
|
||||
- name: install cmake
|
||||
run: |
|
||||
export PREFIX=$HOME/software/cmake
|
||||
wget https://github.com/Kitware/CMake/releases/download/v3.15.3/cmake-3.15.3-Linux-x86_64.tar.gz -O cmake.tar.gz
|
||||
mkdir -p $PREFIX
|
||||
tar -xvf cmake.tar.gz --strip-components=1 -C $PREFIX
|
||||
- name: configure
|
||||
run: |
|
||||
export PATH=$HOME/software/cmake/bin:$PATH
|
||||
mkdir build
|
||||
cd build
|
||||
cmake --version
|
||||
cmake .. -DCMAKE_BUILD_TYPE=Debug
|
||||
- name: build
|
||||
run: |
|
||||
export PATH=$HOME/software/cmake/bin:$PATH
|
||||
cd build
|
||||
g++ --version
|
||||
make VERBOSE=1
|
||||
|
@@ -5,9 +5,22 @@
|
||||
# 3.13+ for target_link_directories
|
||||
cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
|
||||
|
||||
project(perfect LANGUAGES CUDA CXX VERSION 0.1.0)
|
||||
project(perfect LANGUAGES CXX VERSION 0.1.0)
|
||||
message(STATUS "Build type: " ${CMAKE_BUILD_TYPE})
|
||||
|
||||
include(CheckLanguage)
|
||||
|
||||
option(USE_CUDA "try to use CUDA" ON)
|
||||
|
||||
if(USE_CUDA)
|
||||
check_language(CUDA)
|
||||
if(CMAKE_CUDA_COMPILER)
|
||||
enable_language(CUDA)
|
||||
else()
|
||||
message(STATUS "No CUDA support")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
#https://blog.kitware.com/cmake-and-the-default-build-type/
|
||||
# Set a default build type if none was specified
|
||||
set(default_build_type "Release")
|
||||
@@ -33,19 +46,20 @@ elseif (CMAKE_BUILD_TYPE MATCHES Release)
|
||||
set(CMAKE_CUDA_FLAGS ${CMAKE_CUDA_FLAGS} -lineinfo)
|
||||
endif()
|
||||
|
||||
set(CMAKE_CUDA_STANDARD 11)
|
||||
|
||||
add_subdirectory(include/perfect)
|
||||
|
||||
add_library(perfect INTERFACE)
|
||||
target_compile_features(perfect INTERFACE cxx_std_11)
|
||||
if(CMAKE_CUDA_COMPILER)
|
||||
target_compile_definitions(perfect INTERFACE -DPERFECT_HAS_CUDA)
|
||||
# tell the host compiler where to find the CUDA includes and libraries
|
||||
# CMakeFiles/<version>/CMakeCUDACompiler.cmake
|
||||
target_include_directories(perfect INTERFACE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
|
||||
target_link_directories(perfect INTERFACE ${CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES})
|
||||
target_link_libraries(perfect INTERFACE nvidia-ml)
|
||||
endif()
|
||||
target_include_directories(perfect INTERFACE include/)
|
||||
|
||||
# tell the host compiler where to find the CUDA includes and libraries
|
||||
target_include_directories(perfect INTERFACE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
|
||||
# CMakeFiles/<version>/CMakeCUDACompiler.cmake
|
||||
target_link_directories(perfect INTERFACE ${CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES})
|
||||
target_link_libraries(perfect INTERFACE nvidia-ml)
|
||||
|
||||
|
||||
add_subdirectory(examples)
|
||||
add_subdirectory(tools)
|
29
README.md
29
README.md
@@ -16,6 +16,7 @@ CPU/GPU performance control library for benchmarking
|
||||
- [x] Set GPU clocks (nvidia)
|
||||
- [x] Disable GPU turbo (nvidia)
|
||||
- [x] Flush addresses from cache (amd64, POWER)
|
||||
- [x] CUDA not required (GPU functions will not be compiled)
|
||||
|
||||
## Installing
|
||||
|
||||
@@ -38,11 +39,21 @@ target_link_libraries(your-target perfect)
|
||||
```
|
||||
|
||||
### Without CMake
|
||||
Download the source and add the include directory to your includes and `nvidia-ml` to your link flags
|
||||
Download the source **AND**
|
||||
* for compiling with a non-cuda compiler:
|
||||
* add the include directory to your includes
|
||||
* add `nvidia-ml` to your link flags
|
||||
* add `-DPERFECT_HAS_CUDA` to your compile definitions
|
||||
* with a CUDA compiler, just compile normally (`PERFECT_HAS_CUDA` is defined for you)
|
||||
|
||||
```
|
||||
g++ code_using_perfect.cpp -I perfect/include -l nvidia-ml
|
||||
nvcc code_using_perfect.cu -I perfect/include
|
||||
g++ code_using_perfect.cpp -DPERFECT_HAS_CUDA -Iperfect/include -lnvidia-ml
|
||||
nvcc code_using_perfect.cu -Iperfect/include -lnvidia-ml
|
||||
```
|
||||
|
||||
If you don't have CUDA, then you could just do
|
||||
```
|
||||
g++ code_using_perfect.cpp -I perfect/include
|
||||
```
|
||||
|
||||
## Usage
|
||||
@@ -67,11 +78,11 @@ See [examples/cpu_turbo.cpp].
|
||||
#include "perfect/cpu_turbo.hpp"
|
||||
```
|
||||
|
||||
* `Result get_cpu_turbo_state(CpuTurboState *state)`
|
||||
* `Result set_cpu_turbo_state(CpuTurboState *state)`
|
||||
* `Result disable_cpu_turbo()`
|
||||
* `Result enable_cpu_turbo()`
|
||||
* `bool is_turbo_enabled(CpuTurboState state)`
|
||||
* `Result get_cpu_turbo_state(CpuTurboState *state)`: save the current CPU turbo state
|
||||
* `Result set_cpu_turbo_state(CpuTurboState *state)`: restore a saved CPU turbo state
|
||||
* `Result disable_cpu_turbo()`: disable CPU turbo
|
||||
* `Result enable_cpu_turbo()`: enable CPU turbo
|
||||
* `bool is_turbo_enabled(CpuTurboState state)`: check if turbo is enabled
|
||||
|
||||
### OS Performance
|
||||
|
||||
@@ -130,6 +141,6 @@ See [examples/cpu_cache.cpp].
|
||||
|
||||
## Wish List
|
||||
|
||||
- [ ] Make CUDA Optional
|
||||
|
||||
- [ ] Nvidia GPU power monitoring
|
||||
- [ ] Nivida GPU utilization monitoring
|
@@ -43,8 +43,12 @@ target_link_libraries(cpu-turbo perfect)
|
||||
add_executable(os-perf os_perf.cpp)
|
||||
target_link_libraries(os-perf perfect)
|
||||
|
||||
add_executable(gpu-clocks gpu_clocks.cu)
|
||||
target_link_libraries(gpu-clocks perfect)
|
||||
if(CMAKE_CUDA_COMPILER)
|
||||
add_executable(gpu-clocks gpu_clocks.cu)
|
||||
target_link_libraries(gpu-clocks perfect)
|
||||
endif()
|
||||
|
||||
add_executable(gpu-turbo gpu_turbo.cu)
|
||||
target_link_libraries(gpu-turbo perfect)
|
||||
if(CMAKE_CUDA_COMPILER)
|
||||
add_executable(gpu-turbo gpu_turbo.cu)
|
||||
target_link_libraries(gpu-turbo perfect)
|
||||
endif()
|
@@ -3,24 +3,25 @@
|
||||
#include <map>
|
||||
|
||||
int main(void) {
|
||||
perfect::init();
|
||||
perfect::init();
|
||||
|
||||
std::map<int, perfect::OsPerfState> states;
|
||||
std::map<int, perfect::OsPerfState> states;
|
||||
|
||||
for (auto cpu : perfect::cpus()) {
|
||||
perfect::OsPerfState state;
|
||||
perfect::get_os_perf_state(&state, cpu);
|
||||
states[cpu] = state;
|
||||
perfect::os_perf_state_maximum(cpu);
|
||||
for (auto cpu : perfect::cpus()) {
|
||||
perfect::OsPerfState state;
|
||||
perfect::Result result;
|
||||
result = perfect::get_os_perf_state(&state, cpu);
|
||||
if (perfect::Result::SUCCESS == result) {
|
||||
states[cpu] = state;
|
||||
}
|
||||
perfect::os_perf_state_maximum(cpu);
|
||||
}
|
||||
|
||||
// do things with all CPUs set to the maximum performancem mode by the OS
|
||||
|
||||
for (auto kv : states) {
|
||||
int cpu = kv.first;
|
||||
perfect::OsPerfState state = kv.second;
|
||||
perfect::set_os_perf_state(cpu, state);
|
||||
}
|
||||
|
||||
// do things with all CPUs set to the maximum performancem mode by the OS
|
||||
|
||||
for (auto kv : states) {
|
||||
int cpu = kv.first;
|
||||
perfect::OsPerfState state = kv.second;
|
||||
perfect::set_os_perf_state(cpu, state);
|
||||
}
|
||||
}
|
@@ -1,10 +1,14 @@
|
||||
#pragma once
|
||||
|
||||
#include <cassert>
|
||||
#include <cerrno>
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <cstring>
|
||||
#include <iostream>
|
||||
|
||||
#include <sched.h>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
@@ -46,7 +50,14 @@ Result set_governor(const int cpu, const std::string &governor) {
|
||||
ofs << governor;
|
||||
ofs.close();
|
||||
if (ofs.fail()) {
|
||||
return Result::NO_PERMISSION;
|
||||
switch (errno) {
|
||||
case EACCES:
|
||||
return Result::NO_PERMISSION;
|
||||
case ENOENT:
|
||||
return Result::NOT_SUPPORTED;
|
||||
default:
|
||||
return Result::UNKNOWN;
|
||||
}
|
||||
}
|
||||
return Result::SUCCESS;
|
||||
}
|
||||
|
@@ -1,6 +1,12 @@
|
||||
#pragma once
|
||||
|
||||
#ifdef __NVCC__
|
||||
#define PERFECT_HAS_CUDA
|
||||
#endif
|
||||
|
||||
#ifdef PERFECT_HAS_CUDA
|
||||
#include <nvml.h>
|
||||
#endif
|
||||
|
||||
namespace perfect {
|
||||
|
||||
@@ -11,11 +17,13 @@ Result init() {
|
||||
if (init_)
|
||||
return Result::SUCCESS;
|
||||
|
||||
// init nvml
|
||||
// init nvml
|
||||
#ifdef PERFECT_HAS_CUDA
|
||||
nvmlReturn_t ret = nvmlInit();
|
||||
if (ret != NVML_SUCCESS) {
|
||||
return from_nvml(ret);
|
||||
}
|
||||
#endif
|
||||
|
||||
// don't init again if init() called twice
|
||||
init_ = true;
|
||||
|
@@ -2,22 +2,27 @@
|
||||
|
||||
#include <cassert>
|
||||
|
||||
#ifdef __NVCC__
|
||||
#define PERFECT_HAS_CUDA
|
||||
#endif
|
||||
|
||||
#ifdef PERFECT_HAS_CUDA
|
||||
#include <nvml.h>
|
||||
#endif
|
||||
|
||||
namespace perfect {
|
||||
|
||||
enum class Result {
|
||||
SUCCESS,
|
||||
NVML_NOT_SUPPORTED,
|
||||
NVML_NO_PERMISSION,
|
||||
NVML_UNINITIALIZED,
|
||||
NO_PERMISSION,
|
||||
NOT_SUPPORTED,
|
||||
NVML_NO_PERMISSION,
|
||||
NVML_NOT_SUPPORTED,
|
||||
NVML_UNINITIALIZED,
|
||||
SUCCESS,
|
||||
UNKNOWN
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#ifdef PERFECT_HAS_CUDA
|
||||
Result from_nvml(nvmlReturn_t nvml) {
|
||||
switch (nvml) {
|
||||
case NVML_SUCCESS:
|
||||
@@ -26,6 +31,8 @@ Result from_nvml(nvmlReturn_t nvml) {
|
||||
return Result::NVML_UNINITIALIZED;
|
||||
case NVML_ERROR_NOT_SUPPORTED:
|
||||
return Result::NVML_NOT_SUPPORTED;
|
||||
case NVML_ERROR_NO_PERMISSION:
|
||||
return Result::NVML_NO_PERMISSION;
|
||||
case NVML_ERROR_INVALID_ARGUMENT:
|
||||
case NVML_ERROR_GPU_IS_LOST:
|
||||
case NVML_ERROR_UNKNOWN:
|
||||
@@ -34,6 +41,7 @@ Result from_nvml(nvmlReturn_t nvml) {
|
||||
}
|
||||
return Result::UNKNOWN;
|
||||
}
|
||||
#endif
|
||||
|
||||
const char *get_string(const Result &result) {
|
||||
switch (result) {
|
||||
@@ -47,6 +55,8 @@ const char *get_string(const Result &result) {
|
||||
return "nvidia-ml returned not supported";
|
||||
case Result::NVML_NO_PERMISSION:
|
||||
return "nvidia-ml returned no permission";
|
||||
case Result::NOT_SUPPORTED:
|
||||
return "unsupported operation";
|
||||
default:
|
||||
assert(0 && "unexpected perfect::Result");
|
||||
}
|
||||
@@ -58,7 +68,7 @@ const char *get_string(const Result &result) {
|
||||
inline void check(Result result, const char *file, const int line) {
|
||||
if (result != Result::SUCCESS) {
|
||||
fprintf(stderr, "%s@%d: perfect Error: %s\n", file, line,
|
||||
get_string(result));
|
||||
get_string(result));
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user