work on gpu turbo, gpu clocks, cpu turbo
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
# 3.8+ for project(LANGUAGES CUDA)
|
||||
# 3.8+ for project(LANGUAGES CUDA) and CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES
|
||||
# 3.9+ for OpenMP::OpenMP_CXX
|
||||
# 3.10+ findopenmp gained support for language-specific components
|
||||
# 3.11+ for CMake not to add -fopenmp to the nvcc flags
|
||||
@@ -40,6 +40,9 @@ add_subdirectory(include/perfect)
|
||||
|
||||
add_library(perfect INTERFACE)
|
||||
target_include_directories(perfect INTERFACE include/)
|
||||
target_include_directories(perfect INTERFACE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
|
||||
target_link_libraries(perfect INTERFACE nvidia-ml)
|
||||
|
||||
|
||||
add_subdirectory(examples)
|
||||
add_subdirectory(tools)
|
@@ -1,14 +1,27 @@
|
||||
#include <iostream>
|
||||
|
||||
#include "perfect/cpu_turbo.hpp"
|
||||
|
||||
int main(void) {
|
||||
|
||||
perfect::Result ret;
|
||||
perfect::CpuTurboState state;
|
||||
perfect::get_cpu_turbo_state(&state);
|
||||
|
||||
perfect::disable_cpu_turbo();
|
||||
// get the current turbo state
|
||||
ret = perfect::get_cpu_turbo_state(&state);
|
||||
if (ret != perfect::Result::SUCCESS) {
|
||||
std::cerr << perfect::get_string(ret) << "\n";
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
// disable turbo
|
||||
if ((ret = perfect::disable_cpu_turbo()) != perfect::Result::SUCCESS) {
|
||||
std::cerr << perfect::get_string(ret) << " when disabling turbo\n";
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
// do things with CPU turbo disabled
|
||||
|
||||
perfect::set_cpu_turbo_state(state);
|
||||
|
||||
// restore the original state
|
||||
ret = perfect::set_cpu_turbo_state(state);
|
||||
}
|
@@ -15,27 +15,32 @@
|
||||
#error "unsupported OS"
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
#include "result.hpp"
|
||||
|
||||
namespace perfect {
|
||||
|
||||
struct CpuTurboState {
|
||||
bool enabled;
|
||||
|
||||
CpuTurboState() : enabled(false) {}
|
||||
};
|
||||
|
||||
|
||||
Result get_cpu_turbo_state(CpuTurboState *state) {
|
||||
state->enabled = is_turbo_enabled();
|
||||
state->enabled = detail::is_turbo_enabled();
|
||||
return Result::SUCCESS;
|
||||
}
|
||||
|
||||
inline bool is_turbo_enabled(CpuTurboState state) { return state.enabled; }
|
||||
|
||||
Result set_cpu_turbo_state(CpuTurboState state) {
|
||||
if (state.enabled) {
|
||||
enable_cpu_turbo();
|
||||
return detail::enable_cpu_turbo();
|
||||
} else {
|
||||
disable_cpu_turbo();
|
||||
return detail::disable_cpu_turbo();
|
||||
}
|
||||
}
|
||||
|
||||
};
|
||||
inline Result disable_cpu_turbo() { return detail::disable_cpu_turbo(); }
|
||||
inline Result enable_cpu_turbo() { return detail::enable_cpu_turbo(); }
|
||||
|
||||
}; // namespace perfect
|
91
include/perfect/detail/nvidia/nvidia-ml.hpp
Normal file
91
include/perfect/detail/nvidia/nvidia-ml.hpp
Normal file
@@ -0,0 +1,91 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstdio>
|
||||
#include <vector>
|
||||
|
||||
#include <nvml.h>
|
||||
|
||||
#include "perfect/result.hpp"
|
||||
|
||||
inline void checkNvml(nvmlReturn_t result, const char *file, const int line) {
|
||||
if (result != NVML_SUCCESS) {
|
||||
printf("%s@%d: NVML Error: %s\n", file, line, nvmlErrorString(result));
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
|
||||
#define NVML(stmt) checkNvml(stmt, __FILE__, __LINE__);
|
||||
|
||||
namespace perfect {
|
||||
namespace detail {
|
||||
std::vector<unsigned int> get_device_memory_clocks(unsigned int index) {
|
||||
std::vector<unsigned int> result;
|
||||
nvmlDevice_t device;
|
||||
nvmlDeviceGetHandleByIndex(index, &device);
|
||||
unsigned int resultCount = 0;
|
||||
|
||||
auto ret = nvmlDeviceGetSupportedMemoryClocks(device, &resultCount, nullptr);
|
||||
if (ret != NVML_ERROR_INSUFFICIENT_SIZE) {
|
||||
NVML(ret);
|
||||
}
|
||||
result.resize(resultCount);
|
||||
NVML(nvmlDeviceGetSupportedMemoryClocks(device, &resultCount, result.data()));
|
||||
return result;
|
||||
}
|
||||
|
||||
Result get_device_graphics_clocks(std::vector<unsigned int> &graphicsClocksMhz,
|
||||
unsigned int index,
|
||||
unsigned int memoryClockMhz) {
|
||||
nvmlDevice_t device;
|
||||
nvmlDeviceGetHandleByIndex(index, &device);
|
||||
unsigned int resultCount = 0;
|
||||
auto ret = nvmlDeviceGetSupportedGraphicsClocks(device, memoryClockMhz,
|
||||
&resultCount, nullptr);
|
||||
if (ret != NVML_ERROR_INSUFFICIENT_SIZE) {
|
||||
return from_nvml(ret);
|
||||
}
|
||||
graphicsClocksMhz.resize(resultCount);
|
||||
return from_nvml(nvmlDeviceGetSupportedGraphicsClocks(
|
||||
device, memoryClockMhz, &resultCount, graphicsClocksMhz.data()));
|
||||
}
|
||||
|
||||
Result disable_gpu_turbo(unsigned int idx) {
|
||||
nvmlDevice_t device;
|
||||
nvmlReturn_t ret;
|
||||
ret = nvmlDeviceGetHandleByIndex(idx, &device);
|
||||
if (ret != NVML_SUCCESS) {
|
||||
return from_nvml(ret);
|
||||
}
|
||||
return from_nvml(
|
||||
nvmlDeviceSetAutoBoostedClocksEnabled(device, NVML_FEATURE_DISABLED));
|
||||
}
|
||||
|
||||
Result enable_gpu_turbo(unsigned int idx) {
|
||||
nvmlDevice_t device;
|
||||
nvmlReturn_t ret;
|
||||
ret = nvmlDeviceGetHandleByIndex(idx, &device);
|
||||
if (ret != NVML_SUCCESS) {
|
||||
return from_nvml(ret);
|
||||
}
|
||||
return from_nvml(
|
||||
nvmlDeviceSetAutoBoostedClocksEnabled(device, NVML_FEATURE_ENABLED));
|
||||
}
|
||||
|
||||
Result is_gpu_turbo_enabled(bool *enabled, unsigned int idx) {
|
||||
nvmlDevice_t device;
|
||||
nvmlReturn_t ret;
|
||||
nvmlEnableState_t isEnabled;
|
||||
nvmlEnableState_t defaultIsEnabled;
|
||||
ret = nvmlDeviceGetHandleByIndex(idx, &device);
|
||||
if (ret != NVML_SUCCESS) {
|
||||
return from_nvml(ret);
|
||||
}
|
||||
ret = nvmlDeviceGetAutoBoostedClocksEnabled(device, &isEnabled,
|
||||
&defaultIsEnabled);
|
||||
|
||||
*enabled = (isEnabled == NVML_FEATURE_ENABLED);
|
||||
return from_nvml(ret);
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace perfect
|
@@ -1,8 +1,8 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <fstream>
|
||||
#include <cassert>
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <sched.h>
|
||||
@@ -13,6 +13,8 @@
|
||||
|
||||
namespace perfect {
|
||||
|
||||
/*! return a set of CPUs the current thread can run on
|
||||
*/
|
||||
std::vector<int> cpus() {
|
||||
std::vector<int> result;
|
||||
cpu_set_t mask;
|
||||
@@ -24,6 +26,7 @@ std::vector<int> cpus() {
|
||||
result.push_back(i);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
Result get_governor(std::string &result, const int cpu) {
|
||||
@@ -48,4 +51,4 @@ Result set_governor(const int cpu, const std::string &governor) {
|
||||
return Result::SUCCESS;
|
||||
}
|
||||
|
||||
}
|
||||
} // namespace perfect
|
@@ -6,45 +6,38 @@
|
||||
#include "perfect/result.hpp"
|
||||
|
||||
namespace perfect {
|
||||
namespace detail {
|
||||
|
||||
bool has_intel_pstate_no_turbo() {
|
||||
return bool(std::ifstream("/sys/devices/system/cpu/intel_pstate/no_turbo"));
|
||||
}
|
||||
|
||||
int write_intel_pstate_no_turbo(const std::string &s) {
|
||||
Result write_intel_pstate_no_turbo(const std::string &s) {
|
||||
assert(has_intel_pstate_no_turbo());
|
||||
std::string path("/sys/devices/system/cpu/intel_pstate/no_turbo");
|
||||
// SPDLOG_LOGGER_DEBUG(logger::console(), "writing {} to {}", s, path);
|
||||
std::ofstream ofs(path, std::ofstream::out);
|
||||
ofs << s;
|
||||
ofs.close();
|
||||
if (ofs.fail()) {
|
||||
// SPDLOG_LOGGER_DEBUG(logger::console(), "error writing {} to {}", s, path);
|
||||
return 1;
|
||||
return Result::NO_PERMISSION;
|
||||
}
|
||||
return 0;
|
||||
return Result::SUCCESS;
|
||||
}
|
||||
|
||||
std::string read_intel_pstate_no_turbo() {
|
||||
assert(has_intel_pstate_no_turbo());
|
||||
std::string path("/sys/devices/system/cpu/intel_pstate/no_turbo");
|
||||
// SPDLOG_LOGGER_TRACE(logger::console(), "reading {}", path);
|
||||
// SPDLOG_LOGGER_TRACE(logger::console(), "reading {}", path);
|
||||
std::ifstream ifs(path, std::ifstream::in);
|
||||
std::string result;
|
||||
std::getline(ifs, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
bool is_turbo_enabled() {
|
||||
return "0" == read_intel_pstate_no_turbo();
|
||||
}
|
||||
bool is_turbo_enabled() { return "0" == read_intel_pstate_no_turbo(); }
|
||||
|
||||
Result disable_cpu_turbo() {
|
||||
write_intel_pstate_no_turbo("1");
|
||||
}
|
||||
Result enable_cpu_turbo() {
|
||||
write_intel_pstate_no_turbo("1");
|
||||
}
|
||||
Result disable_cpu_turbo() { return write_intel_pstate_no_turbo("1"); }
|
||||
Result enable_cpu_turbo() { return write_intel_pstate_no_turbo("0"); }
|
||||
|
||||
|
||||
}
|
||||
} // namespace detail
|
||||
} // namespace perfect
|
@@ -1 +1,48 @@
|
||||
#pragma once
|
||||
|
||||
#include "detail/nvidia/nvidia-ml.hpp"
|
||||
|
||||
namespace perfect {
|
||||
|
||||
/*!
|
||||
*/
|
||||
Result set_max_gpu_clocks(unsigned int idx) {
|
||||
|
||||
Result rt;
|
||||
std::vector<unsigned int> clksMhz;
|
||||
|
||||
ret = get_device_memory_clocks(clksMhz, idx);
|
||||
|
||||
auto maxMemMhz = *std::max_element(memClksMhz.begin(), memClksMhz.end());
|
||||
ret = get_device_graphics_clocks(clksMhz, idx);
|
||||
auto maxCoreMhz = *std::max_element(clksMhz.begin(), clksMhz.end());
|
||||
|
||||
auto ret = nvmlDeviceSetApplicationsClocks(device, maxMemMhz, maxCoreMhz);
|
||||
if (ret == NVML_ERROR_NOT_SUPPORTED) {
|
||||
return Result::NVML_NOT_SUPPORTED;
|
||||
} else if (ret == NVML_ERROR_NO_PERMISSION) {
|
||||
return Result::NVML_NO_PERMISSION;
|
||||
}
|
||||
return Result::SUCCESS;
|
||||
}
|
||||
|
||||
/*! Reset GPU clocks to default behavior
|
||||
*/
|
||||
Result reset_gpu_clocks(unsigned int idx) {
|
||||
|
||||
nvmlDevice_t device;
|
||||
nvmlReturn_t ret;
|
||||
ret = nvmlDeviceGetHandleByIndex(idx, &device);
|
||||
if (ret != NVML_SUCCESS) {
|
||||
assert(false);
|
||||
}
|
||||
ret = nvmlDeviceResetApplicationsClocks(device);
|
||||
if (ret == NVML_ERROR_NOT_SUPPORTED) {
|
||||
return Result::NVML_NOT_SUPPORTED;
|
||||
} else if (ret == NVML_ERROR_NO_PERMISSION) {
|
||||
return Result::NVML_NO_PERMISSION;
|
||||
}
|
||||
return Result::SUCCESS;
|
||||
}
|
||||
|
||||
}; // namespace perfect
|
@@ -1 +1,36 @@
|
||||
#pragma once
|
||||
|
||||
#include "detail/nvidia/nvidia-ml.hpp"
|
||||
|
||||
#include "result.hpp"
|
||||
|
||||
namespace perfect {
|
||||
|
||||
struct GpuTurboState {
|
||||
bool enabled;
|
||||
|
||||
GpuTurboState() : enabled(false) {}
|
||||
};
|
||||
|
||||
Result get_gpu_turbo_state(GpuTurboState *state, unsigned int idx) {
|
||||
return detail::is_gpu_turbo_enabled(&(state->enabled), idx);
|
||||
}
|
||||
|
||||
inline bool is_turbo_enabled(GpuTurboState state) { return state.enabled; }
|
||||
|
||||
Result set_gpu_turbo_state(GpuTurboState state, unsigned int idx) {
|
||||
if (state.enabled) {
|
||||
return detail::enable_gpu_turbo(idx);
|
||||
} else {
|
||||
return detail::disable_gpu_turbo(idx);
|
||||
}
|
||||
}
|
||||
|
||||
inline Result disable_gpu_turbo(unsigned int idx) {
|
||||
return detail::disable_gpu_turbo(idx);
|
||||
}
|
||||
inline Result enable_gpu_turbo(unsigned int idx) {
|
||||
return detail::enable_gpu_turbo(idx);
|
||||
}
|
||||
|
||||
}; // namespace perfect
|
25
include/perfect/init.hpp
Normal file
25
include/perfect/init.hpp
Normal file
@@ -0,0 +1,25 @@
|
||||
#pragma once
|
||||
|
||||
#include <nvml.h>
|
||||
|
||||
namespace perfect {
|
||||
|
||||
/*! initialize the benchmark
|
||||
*/
|
||||
Result init() {
|
||||
static bool init_ = false;
|
||||
if (init_)
|
||||
return Result::SUCCESS;
|
||||
|
||||
// init nvml
|
||||
nvmlReturn_t ret = nvmlInit();
|
||||
if (ret != NVML_SUCCESS) {
|
||||
return from_nvml(ret);
|
||||
}
|
||||
|
||||
// don't init again if init() called twice
|
||||
init_ = true;
|
||||
return Result::SUCCESS;
|
||||
}
|
||||
|
||||
}; // namespace perfect
|
@@ -2,22 +2,53 @@
|
||||
|
||||
#include <cassert>
|
||||
|
||||
#include <nvml.h>
|
||||
|
||||
namespace perfect {
|
||||
enum class Result {
|
||||
enum class Result {
|
||||
SUCCESS,
|
||||
NVIDIA_ML,
|
||||
NVML_NOT_SUPPORTED,
|
||||
NVML_NO_PERMISSION,
|
||||
NVML_UNINITIALIZED,
|
||||
NO_PERMISSION,
|
||||
UNKNOWN
|
||||
};
|
||||
};
|
||||
|
||||
const char * get_string(const Result &result) {
|
||||
switch (result) {
|
||||
case Result::SUCCESS: return "success";
|
||||
case Result::NO_PERMISSION: return "no permission";
|
||||
case Result::UNKNOWN: return "unknown error";
|
||||
case Result::NVIDIA_ML: return "nvidia-ml error";
|
||||
default: assert(0 && "unexpected perfect::Result");
|
||||
Result from_nvml(nvmlReturn_t nvml) {
|
||||
switch (nvml) {
|
||||
case NVML_SUCCESS:
|
||||
return Result::SUCCESS;
|
||||
case NVML_ERROR_UNINITIALIZED:
|
||||
return Result::NVML_UNINITIALIZED;
|
||||
case NVML_ERROR_NOT_SUPPORTED:
|
||||
return Result::NVML_NOT_SUPPORTED;
|
||||
case NVML_ERROR_INVALID_ARGUMENT:
|
||||
case NVML_ERROR_GPU_IS_LOST:
|
||||
case NVML_ERROR_UNKNOWN:
|
||||
default:
|
||||
assert(0 && "unhandled nvmlReturn_t");
|
||||
}
|
||||
return Result::UNKNOWN;
|
||||
}
|
||||
|
||||
const char *get_string(const Result &result) {
|
||||
switch (result) {
|
||||
case Result::SUCCESS:
|
||||
return "success";
|
||||
case Result::NO_PERMISSION:
|
||||
return "no permission";
|
||||
case Result::UNKNOWN:
|
||||
return "unknown error";
|
||||
case Result::NVML_NOT_SUPPORTED:
|
||||
return "nvidia-ml returned not supported";
|
||||
case Result::NVML_NO_PERMISSION:
|
||||
return "nvidia-ml returned no permission";
|
||||
default:
|
||||
assert(0 && "unexpected perfect::Result");
|
||||
}
|
||||
|
||||
assert(0 && "unreachable");
|
||||
return "";
|
||||
}
|
||||
|
||||
} // namespace perfect
|
||||
|
39
tools/CMakeLists.txt
Normal file
39
tools/CMakeLists.txt
Normal file
@@ -0,0 +1,39 @@
|
||||
# removed -Wredundant-decls for cuda 10.1
|
||||
# removed -Wundef for cuda 10.0
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} \
|
||||
-Xcompiler=-Wall\
|
||||
-Xcompiler=-Wextra\
|
||||
-Xcompiler=-Wcast-qual \
|
||||
-Xcompiler=-Wcast-align \
|
||||
-Xcompiler=-Wstrict-aliasing \
|
||||
-Xcompiler=-Wpointer-arith \
|
||||
-Xcompiler=-Winit-self \
|
||||
-Xcompiler=-Wshadow \
|
||||
-Xcompiler=-Wswitch-enum \
|
||||
-Xcompiler=-Wfloat-equal \
|
||||
-Xcompiler=-Wvla\
|
||||
-Xcompiler=-fmax-errors=1 \
|
||||
-Xcompiler=-Wfatal-errors\
|
||||
")
|
||||
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} \
|
||||
-Wredundant-decls \
|
||||
-Wundef \
|
||||
-Wall\
|
||||
-Wextra\
|
||||
-Wcast-qual \
|
||||
-Wcast-align \
|
||||
-Wstrict-aliasing \
|
||||
-Wpointer-arith \
|
||||
-Winit-self \
|
||||
-Wshadow \
|
||||
-Wswitch-enum \
|
||||
-Wfloat-equal \
|
||||
-Wvla\
|
||||
-fmax-errors=1 \
|
||||
-Wfatal-errors\
|
||||
")
|
||||
|
||||
add_executable(enable-turbo enable_turbo.cpp)
|
||||
target_link_libraries(enable-turbo perfect)
|
||||
|
35
tools/enable_turbo.cpp
Normal file
35
tools/enable_turbo.cpp
Normal file
@@ -0,0 +1,35 @@
|
||||
#include <iostream>
|
||||
|
||||
#include "perfect/cpu_turbo.hpp"
|
||||
#include "perfect/init.hpp"
|
||||
|
||||
using namespace perfect;
|
||||
|
||||
int main(void) {
|
||||
|
||||
Result ret;
|
||||
CpuTurboState state;
|
||||
|
||||
perfect::init();
|
||||
|
||||
ret = get_cpu_turbo_state(&state);
|
||||
|
||||
if (ret != Result::SUCCESS) {
|
||||
std::cerr << "ERROR: " << get_string(ret) << "\n";
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
if (is_turbo_enabled(state)) {
|
||||
std::cerr << "turbo already enabled\n";
|
||||
exit(EXIT_SUCCESS);
|
||||
} else {
|
||||
ret = enable_cpu_turbo();
|
||||
if (ret != Result::SUCCESS) {
|
||||
std::cerr << "ERROR: " << get_string(ret) << "\n";
|
||||
exit(EXIT_FAILURE);
|
||||
} else {
|
||||
std::cerr << "enabled turbo\n";
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user