Squashed commit of the following:
commit 25e7cb77683736a588acb6b30a8ac89e2bd7f56a Author: Carl Pearson <pearson@illinois.edu> Date: Fri Sep 20 13:25:49 2019 -0500 automatically define PERFECT_HAS_CUDA with nvcc commit fcc699c165ba515619781aefb378d3c0c4d1093d Author: Carl Pearson <pearson@illinois.edu> Date: Fri Sep 20 13:18:42 2019 -0500 optional CUDA support
This commit is contained in:
23
.github/workflows/ccpp.yml
vendored
23
.github/workflows/ccpp.yml
vendored
@@ -38,3 +38,26 @@ jobs:
|
|||||||
g++ --version
|
g++ --version
|
||||||
nvcc --version
|
nvcc --version
|
||||||
make VERBOSE=1
|
make VERBOSE=1
|
||||||
|
build:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v1
|
||||||
|
- name: install cmake
|
||||||
|
run: |
|
||||||
|
export PREFIX=$HOME/software/cmake
|
||||||
|
wget https://github.com/Kitware/CMake/releases/download/v3.15.3/cmake-3.15.3-Linux-x86_64.tar.gz -O cmake.tar.gz
|
||||||
|
mkdir -p $PREFIX
|
||||||
|
tar -xvf cmake.tar.gz --strip-components=1 -C $PREFIX
|
||||||
|
- name: configure
|
||||||
|
run: |
|
||||||
|
export PATH=$HOME/software/cmake/bin:$PATH
|
||||||
|
mkdir build
|
||||||
|
cd build
|
||||||
|
cmake --version
|
||||||
|
cmake .. -DCMAKE_BUILD_TYPE=Debug
|
||||||
|
- name: build
|
||||||
|
run: |
|
||||||
|
export PATH=$HOME/software/cmake/bin:$PATH
|
||||||
|
cd build
|
||||||
|
g++ --version
|
||||||
|
make VERBOSE=1
|
||||||
|
@@ -5,9 +5,22 @@
|
|||||||
# 3.13+ for target_link_directories
|
# 3.13+ for target_link_directories
|
||||||
cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
|
cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
|
||||||
|
|
||||||
project(perfect LANGUAGES CUDA CXX VERSION 0.1.0)
|
project(perfect LANGUAGES CXX VERSION 0.1.0)
|
||||||
message(STATUS "Build type: " ${CMAKE_BUILD_TYPE})
|
message(STATUS "Build type: " ${CMAKE_BUILD_TYPE})
|
||||||
|
|
||||||
|
include(CheckLanguage)
|
||||||
|
|
||||||
|
option(USE_CUDA "try to use CUDA" ON)
|
||||||
|
|
||||||
|
if(USE_CUDA)
|
||||||
|
check_language(CUDA)
|
||||||
|
if(CMAKE_CUDA_COMPILER)
|
||||||
|
enable_language(CUDA)
|
||||||
|
else()
|
||||||
|
message(STATUS "No CUDA support")
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
#https://blog.kitware.com/cmake-and-the-default-build-type/
|
#https://blog.kitware.com/cmake-and-the-default-build-type/
|
||||||
# Set a default build type if none was specified
|
# Set a default build type if none was specified
|
||||||
set(default_build_type "Release")
|
set(default_build_type "Release")
|
||||||
@@ -33,18 +46,19 @@ elseif (CMAKE_BUILD_TYPE MATCHES Release)
|
|||||||
set(CMAKE_CUDA_FLAGS ${CMAKE_CUDA_FLAGS} -lineinfo)
|
set(CMAKE_CUDA_FLAGS ${CMAKE_CUDA_FLAGS} -lineinfo)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
set(CMAKE_CUDA_STANDARD 11)
|
|
||||||
|
|
||||||
add_subdirectory(include/perfect)
|
add_subdirectory(include/perfect)
|
||||||
|
|
||||||
add_library(perfect INTERFACE)
|
add_library(perfect INTERFACE)
|
||||||
target_include_directories(perfect INTERFACE include/)
|
target_compile_features(perfect INTERFACE cxx_std_11)
|
||||||
|
if(CMAKE_CUDA_COMPILER)
|
||||||
|
target_compile_definitions(perfect INTERFACE -DPERFECT_HAS_CUDA)
|
||||||
# tell the host compiler where to find the CUDA includes and libraries
|
# tell the host compiler where to find the CUDA includes and libraries
|
||||||
target_include_directories(perfect INTERFACE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
|
|
||||||
# CMakeFiles/<version>/CMakeCUDACompiler.cmake
|
# CMakeFiles/<version>/CMakeCUDACompiler.cmake
|
||||||
|
target_include_directories(perfect INTERFACE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
|
||||||
target_link_directories(perfect INTERFACE ${CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES})
|
target_link_directories(perfect INTERFACE ${CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES})
|
||||||
target_link_libraries(perfect INTERFACE nvidia-ml)
|
target_link_libraries(perfect INTERFACE nvidia-ml)
|
||||||
|
endif()
|
||||||
|
target_include_directories(perfect INTERFACE include/)
|
||||||
|
|
||||||
|
|
||||||
add_subdirectory(examples)
|
add_subdirectory(examples)
|
||||||
|
29
README.md
29
README.md
@@ -16,6 +16,7 @@ CPU/GPU performance control library for benchmarking
|
|||||||
- [x] Set GPU clocks (nvidia)
|
- [x] Set GPU clocks (nvidia)
|
||||||
- [x] Disable GPU turbo (nvidia)
|
- [x] Disable GPU turbo (nvidia)
|
||||||
- [x] Flush addresses from cache (amd64, POWER)
|
- [x] Flush addresses from cache (amd64, POWER)
|
||||||
|
- [x] CUDA not required (GPU functions will not be compiled)
|
||||||
|
|
||||||
## Installing
|
## Installing
|
||||||
|
|
||||||
@@ -38,11 +39,21 @@ target_link_libraries(your-target perfect)
|
|||||||
```
|
```
|
||||||
|
|
||||||
### Without CMake
|
### Without CMake
|
||||||
Download the source and add the include directory to your includes and `nvidia-ml` to your link flags
|
Download the source **AND**
|
||||||
|
* for compiling with a non-cuda compiler:
|
||||||
|
* add the include directory to your includes
|
||||||
|
* add `nvidia-ml` to your link flags
|
||||||
|
* add `-DPERFECT_HAS_CUDA` to your compile definitions
|
||||||
|
* with a CUDA compiler, just compile normally (`PERFECT_HAS_CUDA` is defined for you)
|
||||||
|
|
||||||
```
|
```
|
||||||
g++ code_using_perfect.cpp -I perfect/include -l nvidia-ml
|
g++ code_using_perfect.cpp -DPERFECT_HAS_CUDA -Iperfect/include -lnvidia-ml
|
||||||
nvcc code_using_perfect.cu -I perfect/include
|
nvcc code_using_perfect.cu -Iperfect/include -lnvidia-ml
|
||||||
|
```
|
||||||
|
|
||||||
|
If you don't have CUDA, then you could just do
|
||||||
|
```
|
||||||
|
g++ code_using_perfect.cpp -I perfect/include
|
||||||
```
|
```
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
@@ -67,11 +78,11 @@ See [examples/cpu_turbo.cpp].
|
|||||||
#include "perfect/cpu_turbo.hpp"
|
#include "perfect/cpu_turbo.hpp"
|
||||||
```
|
```
|
||||||
|
|
||||||
* `Result get_cpu_turbo_state(CpuTurboState *state)`
|
* `Result get_cpu_turbo_state(CpuTurboState *state)`: save the current CPU turbo state
|
||||||
* `Result set_cpu_turbo_state(CpuTurboState *state)`
|
* `Result set_cpu_turbo_state(CpuTurboState *state)`: restore a saved CPU turbo state
|
||||||
* `Result disable_cpu_turbo()`
|
* `Result disable_cpu_turbo()`: disable CPU turbo
|
||||||
* `Result enable_cpu_turbo()`
|
* `Result enable_cpu_turbo()`: enable CPU turbo
|
||||||
* `bool is_turbo_enabled(CpuTurboState state)`
|
* `bool is_turbo_enabled(CpuTurboState state)`: check if turbo is enabled
|
||||||
|
|
||||||
### OS Performance
|
### OS Performance
|
||||||
|
|
||||||
@@ -130,6 +141,6 @@ See [examples/cpu_cache.cpp].
|
|||||||
|
|
||||||
## Wish List
|
## Wish List
|
||||||
|
|
||||||
- [ ] Make CUDA Optional
|
|
||||||
- [ ] Nvidia GPU power monitoring
|
- [ ] Nvidia GPU power monitoring
|
||||||
- [ ] Nivida GPU utilization monitoring
|
- [ ] Nivida GPU utilization monitoring
|
@@ -43,8 +43,12 @@ target_link_libraries(cpu-turbo perfect)
|
|||||||
add_executable(os-perf os_perf.cpp)
|
add_executable(os-perf os_perf.cpp)
|
||||||
target_link_libraries(os-perf perfect)
|
target_link_libraries(os-perf perfect)
|
||||||
|
|
||||||
|
if(CMAKE_CUDA_COMPILER)
|
||||||
add_executable(gpu-clocks gpu_clocks.cu)
|
add_executable(gpu-clocks gpu_clocks.cu)
|
||||||
target_link_libraries(gpu-clocks perfect)
|
target_link_libraries(gpu-clocks perfect)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(CMAKE_CUDA_COMPILER)
|
||||||
add_executable(gpu-turbo gpu_turbo.cu)
|
add_executable(gpu-turbo gpu_turbo.cu)
|
||||||
target_link_libraries(gpu-turbo perfect)
|
target_link_libraries(gpu-turbo perfect)
|
||||||
|
endif()
|
@@ -9,8 +9,11 @@ int main(void) {
|
|||||||
|
|
||||||
for (auto cpu : perfect::cpus()) {
|
for (auto cpu : perfect::cpus()) {
|
||||||
perfect::OsPerfState state;
|
perfect::OsPerfState state;
|
||||||
perfect::get_os_perf_state(&state, cpu);
|
perfect::Result result;
|
||||||
|
result = perfect::get_os_perf_state(&state, cpu);
|
||||||
|
if (perfect::Result::SUCCESS == result) {
|
||||||
states[cpu] = state;
|
states[cpu] = state;
|
||||||
|
}
|
||||||
perfect::os_perf_state_maximum(cpu);
|
perfect::os_perf_state_maximum(cpu);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -21,6 +24,4 @@ int main(void) {
|
|||||||
perfect::OsPerfState state = kv.second;
|
perfect::OsPerfState state = kv.second;
|
||||||
perfect::set_os_perf_state(cpu, state);
|
perfect::set_os_perf_state(cpu, state);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
@@ -1,10 +1,14 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
|
#include <cerrno>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
#include <cstring>
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
#include <sched.h>
|
#include <sched.h>
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
@@ -46,7 +50,14 @@ Result set_governor(const int cpu, const std::string &governor) {
|
|||||||
ofs << governor;
|
ofs << governor;
|
||||||
ofs.close();
|
ofs.close();
|
||||||
if (ofs.fail()) {
|
if (ofs.fail()) {
|
||||||
|
switch (errno) {
|
||||||
|
case EACCES:
|
||||||
return Result::NO_PERMISSION;
|
return Result::NO_PERMISSION;
|
||||||
|
case ENOENT:
|
||||||
|
return Result::NOT_SUPPORTED;
|
||||||
|
default:
|
||||||
|
return Result::UNKNOWN;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return Result::SUCCESS;
|
return Result::SUCCESS;
|
||||||
}
|
}
|
||||||
|
@@ -1,6 +1,12 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#ifdef __NVCC__
|
||||||
|
#define PERFECT_HAS_CUDA
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef PERFECT_HAS_CUDA
|
||||||
#include <nvml.h>
|
#include <nvml.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
namespace perfect {
|
namespace perfect {
|
||||||
|
|
||||||
@@ -12,10 +18,12 @@ Result init() {
|
|||||||
return Result::SUCCESS;
|
return Result::SUCCESS;
|
||||||
|
|
||||||
// init nvml
|
// init nvml
|
||||||
|
#ifdef PERFECT_HAS_CUDA
|
||||||
nvmlReturn_t ret = nvmlInit();
|
nvmlReturn_t ret = nvmlInit();
|
||||||
if (ret != NVML_SUCCESS) {
|
if (ret != NVML_SUCCESS) {
|
||||||
return from_nvml(ret);
|
return from_nvml(ret);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
// don't init again if init() called twice
|
// don't init again if init() called twice
|
||||||
init_ = true;
|
init_ = true;
|
||||||
|
@@ -2,22 +2,27 @@
|
|||||||
|
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
|
|
||||||
|
#ifdef __NVCC__
|
||||||
|
#define PERFECT_HAS_CUDA
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef PERFECT_HAS_CUDA
|
||||||
#include <nvml.h>
|
#include <nvml.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
namespace perfect {
|
namespace perfect {
|
||||||
|
|
||||||
enum class Result {
|
enum class Result {
|
||||||
SUCCESS,
|
|
||||||
NVML_NOT_SUPPORTED,
|
|
||||||
NVML_NO_PERMISSION,
|
|
||||||
NVML_UNINITIALIZED,
|
|
||||||
NO_PERMISSION,
|
NO_PERMISSION,
|
||||||
|
NOT_SUPPORTED,
|
||||||
|
NVML_NO_PERMISSION,
|
||||||
|
NVML_NOT_SUPPORTED,
|
||||||
|
NVML_UNINITIALIZED,
|
||||||
|
SUCCESS,
|
||||||
UNKNOWN
|
UNKNOWN
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#ifdef PERFECT_HAS_CUDA
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Result from_nvml(nvmlReturn_t nvml) {
|
Result from_nvml(nvmlReturn_t nvml) {
|
||||||
switch (nvml) {
|
switch (nvml) {
|
||||||
case NVML_SUCCESS:
|
case NVML_SUCCESS:
|
||||||
@@ -26,6 +31,8 @@ Result from_nvml(nvmlReturn_t nvml) {
|
|||||||
return Result::NVML_UNINITIALIZED;
|
return Result::NVML_UNINITIALIZED;
|
||||||
case NVML_ERROR_NOT_SUPPORTED:
|
case NVML_ERROR_NOT_SUPPORTED:
|
||||||
return Result::NVML_NOT_SUPPORTED;
|
return Result::NVML_NOT_SUPPORTED;
|
||||||
|
case NVML_ERROR_NO_PERMISSION:
|
||||||
|
return Result::NVML_NO_PERMISSION;
|
||||||
case NVML_ERROR_INVALID_ARGUMENT:
|
case NVML_ERROR_INVALID_ARGUMENT:
|
||||||
case NVML_ERROR_GPU_IS_LOST:
|
case NVML_ERROR_GPU_IS_LOST:
|
||||||
case NVML_ERROR_UNKNOWN:
|
case NVML_ERROR_UNKNOWN:
|
||||||
@@ -34,6 +41,7 @@ Result from_nvml(nvmlReturn_t nvml) {
|
|||||||
}
|
}
|
||||||
return Result::UNKNOWN;
|
return Result::UNKNOWN;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
const char *get_string(const Result &result) {
|
const char *get_string(const Result &result) {
|
||||||
switch (result) {
|
switch (result) {
|
||||||
@@ -47,6 +55,8 @@ const char *get_string(const Result &result) {
|
|||||||
return "nvidia-ml returned not supported";
|
return "nvidia-ml returned not supported";
|
||||||
case Result::NVML_NO_PERMISSION:
|
case Result::NVML_NO_PERMISSION:
|
||||||
return "nvidia-ml returned no permission";
|
return "nvidia-ml returned no permission";
|
||||||
|
case Result::NOT_SUPPORTED:
|
||||||
|
return "unsupported operation";
|
||||||
default:
|
default:
|
||||||
assert(0 && "unexpected perfect::Result");
|
assert(0 && "unexpected perfect::Result");
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user