diff --git a/CMakeLists.txt b/CMakeLists.txt index 107637b..27b94f4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -# 3.8+ for project(LANGUAGES CUDA) +# 3.8+ for project(LANGUAGES CUDA) and CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES # 3.9+ for OpenMP::OpenMP_CXX # 3.10+ findopenmp gained support for language-specific components # 3.11+ for CMake not to add -fopenmp to the nvcc flags @@ -40,6 +40,9 @@ add_subdirectory(include/perfect) add_library(perfect INTERFACE) target_include_directories(perfect INTERFACE include/) +target_include_directories(perfect INTERFACE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) +target_link_libraries(perfect INTERFACE nvidia-ml) -add_subdirectory(examples) \ No newline at end of file +add_subdirectory(examples) +add_subdirectory(tools) \ No newline at end of file diff --git a/examples/cpu_turbo.cpp b/examples/cpu_turbo.cpp index e4d658b..a269c23 100644 --- a/examples/cpu_turbo.cpp +++ b/examples/cpu_turbo.cpp @@ -1,14 +1,27 @@ +#include + #include "perfect/cpu_turbo.hpp" int main(void) { - perfect::CpuTurboState state; - perfect::get_cpu_turbo_state(&state); + perfect::Result ret; + perfect::CpuTurboState state; - perfect::disable_cpu_turbo(); + // get the current turbo state + ret = perfect::get_cpu_turbo_state(&state); + if (ret != perfect::Result::SUCCESS) { + std::cerr << perfect::get_string(ret) << "\n"; + exit(EXIT_FAILURE); + } - // do things with CPU turbo disabled + // disable turbo + if ((ret = perfect::disable_cpu_turbo()) != perfect::Result::SUCCESS) { + std::cerr << perfect::get_string(ret) << " when disabling turbo\n"; + exit(EXIT_FAILURE); + } - perfect::set_cpu_turbo_state(state); + // do things with CPU turbo disabled + // restore the original state + ret = perfect::set_cpu_turbo_state(state); } \ No newline at end of file diff --git a/include/perfect/cpu_turbo.hpp b/include/perfect/cpu_turbo.hpp index 96cc535..577cc06 100644 --- a/include/perfect/cpu_turbo.hpp +++ b/include/perfect/cpu_turbo.hpp @@ -15,27 +15,32 @@ #error "unsupported OS" #endif - - #include "result.hpp" namespace perfect { struct CpuTurboState { - bool enabled; + bool enabled; + + CpuTurboState() : enabled(false) {} }; - Result get_cpu_turbo_state(CpuTurboState *state) { - state->enabled = is_turbo_enabled(); + state->enabled = detail::is_turbo_enabled(); + return Result::SUCCESS; } +inline bool is_turbo_enabled(CpuTurboState state) { return state.enabled; } + Result set_cpu_turbo_state(CpuTurboState state) { - if (state.enabled) { - enable_cpu_turbo(); - } else { - disable_cpu_turbo(); - } + if (state.enabled) { + return detail::enable_cpu_turbo(); + } else { + return detail::disable_cpu_turbo(); + } } -}; \ No newline at end of file +inline Result disable_cpu_turbo() { return detail::disable_cpu_turbo(); } +inline Result enable_cpu_turbo() { return detail::enable_cpu_turbo(); } + +}; // namespace perfect \ No newline at end of file diff --git a/include/perfect/detail/nvidia/nvidia-ml.hpp b/include/perfect/detail/nvidia/nvidia-ml.hpp new file mode 100644 index 0000000..1ac1dfb --- /dev/null +++ b/include/perfect/detail/nvidia/nvidia-ml.hpp @@ -0,0 +1,91 @@ +#pragma once + +#include +#include + +#include + +#include "perfect/result.hpp" + +inline void checkNvml(nvmlReturn_t result, const char *file, const int line) { + if (result != NVML_SUCCESS) { + printf("%s@%d: NVML Error: %s\n", file, line, nvmlErrorString(result)); + exit(-1); + } +} + +#define NVML(stmt) checkNvml(stmt, __FILE__, __LINE__); + +namespace perfect { +namespace detail { +std::vector get_device_memory_clocks(unsigned int index) { + std::vector result; + nvmlDevice_t device; + nvmlDeviceGetHandleByIndex(index, &device); + unsigned int resultCount = 0; + + auto ret = nvmlDeviceGetSupportedMemoryClocks(device, &resultCount, nullptr); + if (ret != NVML_ERROR_INSUFFICIENT_SIZE) { + NVML(ret); + } + result.resize(resultCount); + NVML(nvmlDeviceGetSupportedMemoryClocks(device, &resultCount, result.data())); + return result; +} + +Result get_device_graphics_clocks(std::vector &graphicsClocksMhz, + unsigned int index, + unsigned int memoryClockMhz) { + nvmlDevice_t device; + nvmlDeviceGetHandleByIndex(index, &device); + unsigned int resultCount = 0; + auto ret = nvmlDeviceGetSupportedGraphicsClocks(device, memoryClockMhz, + &resultCount, nullptr); + if (ret != NVML_ERROR_INSUFFICIENT_SIZE) { + return from_nvml(ret); + } + graphicsClocksMhz.resize(resultCount); + return from_nvml(nvmlDeviceGetSupportedGraphicsClocks( + device, memoryClockMhz, &resultCount, graphicsClocksMhz.data())); +} + +Result disable_gpu_turbo(unsigned int idx) { + nvmlDevice_t device; + nvmlReturn_t ret; + ret = nvmlDeviceGetHandleByIndex(idx, &device); + if (ret != NVML_SUCCESS) { + return from_nvml(ret); + } + return from_nvml( + nvmlDeviceSetAutoBoostedClocksEnabled(device, NVML_FEATURE_DISABLED)); +} + +Result enable_gpu_turbo(unsigned int idx) { + nvmlDevice_t device; + nvmlReturn_t ret; + ret = nvmlDeviceGetHandleByIndex(idx, &device); + if (ret != NVML_SUCCESS) { + return from_nvml(ret); + } + return from_nvml( + nvmlDeviceSetAutoBoostedClocksEnabled(device, NVML_FEATURE_ENABLED)); +} + +Result is_gpu_turbo_enabled(bool *enabled, unsigned int idx) { + nvmlDevice_t device; + nvmlReturn_t ret; + nvmlEnableState_t isEnabled; + nvmlEnableState_t defaultIsEnabled; + ret = nvmlDeviceGetHandleByIndex(idx, &device); + if (ret != NVML_SUCCESS) { + return from_nvml(ret); + } + ret = nvmlDeviceGetAutoBoostedClocksEnabled(device, &isEnabled, + &defaultIsEnabled); + + *enabled = (isEnabled == NVML_FEATURE_ENABLED); + return from_nvml(ret); +} + +} // namespace detail +} // namespace perfect \ No newline at end of file diff --git a/include/perfect/detail/os/linux.hpp b/include/perfect/detail/os/linux.hpp index 12f547a..6568c70 100644 --- a/include/perfect/detail/os/linux.hpp +++ b/include/perfect/detail/os/linux.hpp @@ -1,8 +1,8 @@ #pragma once -#include -#include #include +#include +#include #include #include @@ -13,17 +13,20 @@ namespace perfect { +/*! return a set of CPUs the current thread can run on + */ std::vector cpus() { - std::vector result; - cpu_set_t mask; - if (sched_getaffinity(0 /*caller*/, sizeof(cpu_set_t), &mask)) { - assert(0 && "failed sched_getaffinity"); - } - for (int i = 0; i < CPU_SETSIZE; ++i) { - if (CPU_ISSET(i, &mask)) { - result.push_back(i); - } + std::vector result; + cpu_set_t mask; + if (sched_getaffinity(0 /*caller*/, sizeof(cpu_set_t), &mask)) { + assert(0 && "failed sched_getaffinity"); + } + for (int i = 0; i < CPU_SETSIZE; ++i) { + if (CPU_ISSET(i, &mask)) { + result.push_back(i); } + } + return result; } Result get_governor(std::string &result, const int cpu) { @@ -48,4 +51,4 @@ Result set_governor(const int cpu, const std::string &governor) { return Result::SUCCESS; } -} \ No newline at end of file +} // namespace perfect \ No newline at end of file diff --git a/include/perfect/detail/turbo/linux_amd64.hpp b/include/perfect/detail/turbo/linux_amd64.hpp index de6b615..11371a4 100644 --- a/include/perfect/detail/turbo/linux_amd64.hpp +++ b/include/perfect/detail/turbo/linux_amd64.hpp @@ -6,45 +6,38 @@ #include "perfect/result.hpp" namespace perfect { +namespace detail { bool has_intel_pstate_no_turbo() { return bool(std::ifstream("/sys/devices/system/cpu/intel_pstate/no_turbo")); } -int write_intel_pstate_no_turbo(const std::string &s) { +Result write_intel_pstate_no_turbo(const std::string &s) { assert(has_intel_pstate_no_turbo()); std::string path("/sys/devices/system/cpu/intel_pstate/no_turbo"); -// SPDLOG_LOGGER_DEBUG(logger::console(), "writing {} to {}", s, path); std::ofstream ofs(path, std::ofstream::out); ofs << s; ofs.close(); if (ofs.fail()) { - // SPDLOG_LOGGER_DEBUG(logger::console(), "error writing {} to {}", s, path); - return 1; + return Result::NO_PERMISSION; } - return 0; + return Result::SUCCESS; } std::string read_intel_pstate_no_turbo() { assert(has_intel_pstate_no_turbo()); std::string path("/sys/devices/system/cpu/intel_pstate/no_turbo"); -// SPDLOG_LOGGER_TRACE(logger::console(), "reading {}", path); + // SPDLOG_LOGGER_TRACE(logger::console(), "reading {}", path); std::ifstream ifs(path, std::ifstream::in); std::string result; std::getline(ifs, result); return result; } - bool is_turbo_enabled() { - return "0" == read_intel_pstate_no_turbo(); - } +bool is_turbo_enabled() { return "0" == read_intel_pstate_no_turbo(); } - Result disable_cpu_turbo() { - write_intel_pstate_no_turbo("1"); - } - Result enable_cpu_turbo() { - write_intel_pstate_no_turbo("1"); - } +Result disable_cpu_turbo() { return write_intel_pstate_no_turbo("1"); } +Result enable_cpu_turbo() { return write_intel_pstate_no_turbo("0"); } - -} \ No newline at end of file +} // namespace detail +} // namespace perfect \ No newline at end of file diff --git a/include/perfect/gpu_clocks.hpp b/include/perfect/gpu_clocks.hpp index 7b9637e..e719b8d 100644 --- a/include/perfect/gpu_clocks.hpp +++ b/include/perfect/gpu_clocks.hpp @@ -1 +1,48 @@ -#pragma once \ No newline at end of file +#pragma once + +#include "detail/nvidia/nvidia-ml.hpp" + +namespace perfect { + +/*! + */ +Result set_max_gpu_clocks(unsigned int idx) { + + Result rt; + std::vector clksMhz; + + ret = get_device_memory_clocks(clksMhz, idx); + + auto maxMemMhz = *std::max_element(memClksMhz.begin(), memClksMhz.end()); + ret = get_device_graphics_clocks(clksMhz, idx); + auto maxCoreMhz = *std::max_element(clksMhz.begin(), clksMhz.end()); + + auto ret = nvmlDeviceSetApplicationsClocks(device, maxMemMhz, maxCoreMhz); + if (ret == NVML_ERROR_NOT_SUPPORTED) { + return Result::NVML_NOT_SUPPORTED; + } else if (ret == NVML_ERROR_NO_PERMISSION) { + return Result::NVML_NO_PERMISSION; + } + return Result::SUCCESS; +} + +/*! Reset GPU clocks to default behavior + */ +Result reset_gpu_clocks(unsigned int idx) { + + nvmlDevice_t device; + nvmlReturn_t ret; + ret = nvmlDeviceGetHandleByIndex(idx, &device); + if (ret != NVML_SUCCESS) { + assert(false); + } + ret = nvmlDeviceResetApplicationsClocks(device); + if (ret == NVML_ERROR_NOT_SUPPORTED) { + return Result::NVML_NOT_SUPPORTED; + } else if (ret == NVML_ERROR_NO_PERMISSION) { + return Result::NVML_NO_PERMISSION; + } + return Result::SUCCESS; +} + +}; // namespace perfect \ No newline at end of file diff --git a/include/perfect/gpu_turbo.hpp b/include/perfect/gpu_turbo.hpp index 7b9637e..6203a77 100644 --- a/include/perfect/gpu_turbo.hpp +++ b/include/perfect/gpu_turbo.hpp @@ -1 +1,36 @@ -#pragma once \ No newline at end of file +#pragma once + +#include "detail/nvidia/nvidia-ml.hpp" + +#include "result.hpp" + +namespace perfect { + +struct GpuTurboState { + bool enabled; + + GpuTurboState() : enabled(false) {} +}; + +Result get_gpu_turbo_state(GpuTurboState *state, unsigned int idx) { + return detail::is_gpu_turbo_enabled(&(state->enabled), idx); +} + +inline bool is_turbo_enabled(GpuTurboState state) { return state.enabled; } + +Result set_gpu_turbo_state(GpuTurboState state, unsigned int idx) { + if (state.enabled) { + return detail::enable_gpu_turbo(idx); + } else { + return detail::disable_gpu_turbo(idx); + } +} + +inline Result disable_gpu_turbo(unsigned int idx) { + return detail::disable_gpu_turbo(idx); +} +inline Result enable_gpu_turbo(unsigned int idx) { + return detail::enable_gpu_turbo(idx); +} + +}; // namespace perfect \ No newline at end of file diff --git a/include/perfect/init.hpp b/include/perfect/init.hpp new file mode 100644 index 0000000..9f95b78 --- /dev/null +++ b/include/perfect/init.hpp @@ -0,0 +1,25 @@ +#pragma once + +#include + +namespace perfect { + +/*! initialize the benchmark + */ +Result init() { + static bool init_ = false; + if (init_) + return Result::SUCCESS; + + // init nvml + nvmlReturn_t ret = nvmlInit(); + if (ret != NVML_SUCCESS) { + return from_nvml(ret); + } + + // don't init again if init() called twice + init_ = true; + return Result::SUCCESS; +} + +}; // namespace perfect \ No newline at end of file diff --git a/include/perfect/result.hpp b/include/perfect/result.hpp index b2d93b0..5cf3ba4 100644 --- a/include/perfect/result.hpp +++ b/include/perfect/result.hpp @@ -2,22 +2,53 @@ #include +#include + namespace perfect { - enum class Result { - SUCCESS, - NVIDIA_ML, - NO_PERMISSION, - UNKNOWN - }; +enum class Result { + SUCCESS, + NVML_NOT_SUPPORTED, + NVML_NO_PERMISSION, + NVML_UNINITIALIZED, + NO_PERMISSION, + UNKNOWN +}; -const char * get_string(const Result &result) { - switch (result) { - case Result::SUCCESS: return "success"; - case Result::NO_PERMISSION: return "no permission"; - case Result::UNKNOWN: return "unknown error"; - case Result::NVIDIA_ML: return "nvidia-ml error"; - default: assert(0 && "unexpected perfect::Result"); - } +Result from_nvml(nvmlReturn_t nvml) { + switch (nvml) { + case NVML_SUCCESS: + return Result::SUCCESS; + case NVML_ERROR_UNINITIALIZED: + return Result::NVML_UNINITIALIZED; + case NVML_ERROR_NOT_SUPPORTED: + return Result::NVML_NOT_SUPPORTED; + case NVML_ERROR_INVALID_ARGUMENT: + case NVML_ERROR_GPU_IS_LOST: + case NVML_ERROR_UNKNOWN: + default: + assert(0 && "unhandled nvmlReturn_t"); + } + return Result::UNKNOWN; } +const char *get_string(const Result &result) { + switch (result) { + case Result::SUCCESS: + return "success"; + case Result::NO_PERMISSION: + return "no permission"; + case Result::UNKNOWN: + return "unknown error"; + case Result::NVML_NOT_SUPPORTED: + return "nvidia-ml returned not supported"; + case Result::NVML_NO_PERMISSION: + return "nvidia-ml returned no permission"; + default: + assert(0 && "unexpected perfect::Result"); + } + + assert(0 && "unreachable"); + return ""; } + +} // namespace perfect diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt new file mode 100644 index 0000000..2e145ea --- /dev/null +++ b/tools/CMakeLists.txt @@ -0,0 +1,39 @@ +# removed -Wredundant-decls for cuda 10.1 +# removed -Wundef for cuda 10.0 +set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} \ + -Xcompiler=-Wall\ + -Xcompiler=-Wextra\ + -Xcompiler=-Wcast-qual \ + -Xcompiler=-Wcast-align \ + -Xcompiler=-Wstrict-aliasing \ + -Xcompiler=-Wpointer-arith \ + -Xcompiler=-Winit-self \ + -Xcompiler=-Wshadow \ + -Xcompiler=-Wswitch-enum \ + -Xcompiler=-Wfloat-equal \ + -Xcompiler=-Wvla\ + -Xcompiler=-fmax-errors=1 \ + -Xcompiler=-Wfatal-errors\ + ") + + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} \ + -Wredundant-decls \ + -Wundef \ + -Wall\ + -Wextra\ + -Wcast-qual \ + -Wcast-align \ + -Wstrict-aliasing \ + -Wpointer-arith \ + -Winit-self \ + -Wshadow \ + -Wswitch-enum \ + -Wfloat-equal \ + -Wvla\ + -fmax-errors=1 \ + -Wfatal-errors\ + ") + +add_executable(enable-turbo enable_turbo.cpp) +target_link_libraries(enable-turbo perfect) + diff --git a/tools/enable_turbo.cpp b/tools/enable_turbo.cpp new file mode 100644 index 0000000..208a8e4 --- /dev/null +++ b/tools/enable_turbo.cpp @@ -0,0 +1,35 @@ +#include + +#include "perfect/cpu_turbo.hpp" +#include "perfect/init.hpp" + +using namespace perfect; + +int main(void) { + + Result ret; + CpuTurboState state; + + perfect::init(); + + ret = get_cpu_turbo_state(&state); + + if (ret != Result::SUCCESS) { + std::cerr << "ERROR: " << get_string(ret) << "\n"; + exit(EXIT_FAILURE); + } + + if (is_turbo_enabled(state)) { + std::cerr << "turbo already enabled\n"; + exit(EXIT_SUCCESS); + } else { + ret = enable_cpu_turbo(); + if (ret != Result::SUCCESS) { + std::cerr << "ERROR: " << get_string(ret) << "\n"; + exit(EXIT_FAILURE); + } else { + std::cerr << "enabled turbo\n"; + exit(EXIT_SUCCESS); + } + } +} \ No newline at end of file