work on gpu turbo, gpu clocks, cpu turbo

This commit is contained in:
Carl Pearson
2019-09-19 15:53:43 -05:00
parent f51ef904fb
commit 81cc7feafd
12 changed files with 383 additions and 63 deletions

View File

@@ -15,27 +15,32 @@
#error "unsupported OS"
#endif
#include "result.hpp"
namespace perfect {
struct CpuTurboState {
bool enabled;
bool enabled;
CpuTurboState() : enabled(false) {}
};
Result get_cpu_turbo_state(CpuTurboState *state) {
state->enabled = is_turbo_enabled();
state->enabled = detail::is_turbo_enabled();
return Result::SUCCESS;
}
inline bool is_turbo_enabled(CpuTurboState state) { return state.enabled; }
Result set_cpu_turbo_state(CpuTurboState state) {
if (state.enabled) {
enable_cpu_turbo();
} else {
disable_cpu_turbo();
}
if (state.enabled) {
return detail::enable_cpu_turbo();
} else {
return detail::disable_cpu_turbo();
}
}
};
inline Result disable_cpu_turbo() { return detail::disable_cpu_turbo(); }
inline Result enable_cpu_turbo() { return detail::enable_cpu_turbo(); }
}; // namespace perfect

View File

@@ -0,0 +1,91 @@
#pragma once
#include <cstdio>
#include <vector>
#include <nvml.h>
#include "perfect/result.hpp"
inline void checkNvml(nvmlReturn_t result, const char *file, const int line) {
if (result != NVML_SUCCESS) {
printf("%s@%d: NVML Error: %s\n", file, line, nvmlErrorString(result));
exit(-1);
}
}
#define NVML(stmt) checkNvml(stmt, __FILE__, __LINE__);
namespace perfect {
namespace detail {
std::vector<unsigned int> get_device_memory_clocks(unsigned int index) {
std::vector<unsigned int> result;
nvmlDevice_t device;
nvmlDeviceGetHandleByIndex(index, &device);
unsigned int resultCount = 0;
auto ret = nvmlDeviceGetSupportedMemoryClocks(device, &resultCount, nullptr);
if (ret != NVML_ERROR_INSUFFICIENT_SIZE) {
NVML(ret);
}
result.resize(resultCount);
NVML(nvmlDeviceGetSupportedMemoryClocks(device, &resultCount, result.data()));
return result;
}
Result get_device_graphics_clocks(std::vector<unsigned int> &graphicsClocksMhz,
unsigned int index,
unsigned int memoryClockMhz) {
nvmlDevice_t device;
nvmlDeviceGetHandleByIndex(index, &device);
unsigned int resultCount = 0;
auto ret = nvmlDeviceGetSupportedGraphicsClocks(device, memoryClockMhz,
&resultCount, nullptr);
if (ret != NVML_ERROR_INSUFFICIENT_SIZE) {
return from_nvml(ret);
}
graphicsClocksMhz.resize(resultCount);
return from_nvml(nvmlDeviceGetSupportedGraphicsClocks(
device, memoryClockMhz, &resultCount, graphicsClocksMhz.data()));
}
Result disable_gpu_turbo(unsigned int idx) {
nvmlDevice_t device;
nvmlReturn_t ret;
ret = nvmlDeviceGetHandleByIndex(idx, &device);
if (ret != NVML_SUCCESS) {
return from_nvml(ret);
}
return from_nvml(
nvmlDeviceSetAutoBoostedClocksEnabled(device, NVML_FEATURE_DISABLED));
}
Result enable_gpu_turbo(unsigned int idx) {
nvmlDevice_t device;
nvmlReturn_t ret;
ret = nvmlDeviceGetHandleByIndex(idx, &device);
if (ret != NVML_SUCCESS) {
return from_nvml(ret);
}
return from_nvml(
nvmlDeviceSetAutoBoostedClocksEnabled(device, NVML_FEATURE_ENABLED));
}
Result is_gpu_turbo_enabled(bool *enabled, unsigned int idx) {
nvmlDevice_t device;
nvmlReturn_t ret;
nvmlEnableState_t isEnabled;
nvmlEnableState_t defaultIsEnabled;
ret = nvmlDeviceGetHandleByIndex(idx, &device);
if (ret != NVML_SUCCESS) {
return from_nvml(ret);
}
ret = nvmlDeviceGetAutoBoostedClocksEnabled(device, &isEnabled,
&defaultIsEnabled);
*enabled = (isEnabled == NVML_FEATURE_ENABLED);
return from_nvml(ret);
}
} // namespace detail
} // namespace perfect

View File

@@ -1,8 +1,8 @@
#pragma once
#include <string>
#include <fstream>
#include <cassert>
#include <fstream>
#include <string>
#include <vector>
#include <sched.h>
@@ -13,17 +13,20 @@
namespace perfect {
/*! return a set of CPUs the current thread can run on
*/
std::vector<int> cpus() {
std::vector<int> result;
cpu_set_t mask;
if (sched_getaffinity(0 /*caller*/, sizeof(cpu_set_t), &mask)) {
assert(0 && "failed sched_getaffinity");
}
for (int i = 0; i < CPU_SETSIZE; ++i) {
if (CPU_ISSET(i, &mask)) {
result.push_back(i);
}
std::vector<int> result;
cpu_set_t mask;
if (sched_getaffinity(0 /*caller*/, sizeof(cpu_set_t), &mask)) {
assert(0 && "failed sched_getaffinity");
}
for (int i = 0; i < CPU_SETSIZE; ++i) {
if (CPU_ISSET(i, &mask)) {
result.push_back(i);
}
}
return result;
}
Result get_governor(std::string &result, const int cpu) {
@@ -48,4 +51,4 @@ Result set_governor(const int cpu, const std::string &governor) {
return Result::SUCCESS;
}
}
} // namespace perfect

View File

@@ -6,45 +6,38 @@
#include "perfect/result.hpp"
namespace perfect {
namespace detail {
bool has_intel_pstate_no_turbo() {
return bool(std::ifstream("/sys/devices/system/cpu/intel_pstate/no_turbo"));
}
int write_intel_pstate_no_turbo(const std::string &s) {
Result write_intel_pstate_no_turbo(const std::string &s) {
assert(has_intel_pstate_no_turbo());
std::string path("/sys/devices/system/cpu/intel_pstate/no_turbo");
// SPDLOG_LOGGER_DEBUG(logger::console(), "writing {} to {}", s, path);
std::ofstream ofs(path, std::ofstream::out);
ofs << s;
ofs.close();
if (ofs.fail()) {
// SPDLOG_LOGGER_DEBUG(logger::console(), "error writing {} to {}", s, path);
return 1;
return Result::NO_PERMISSION;
}
return 0;
return Result::SUCCESS;
}
std::string read_intel_pstate_no_turbo() {
assert(has_intel_pstate_no_turbo());
std::string path("/sys/devices/system/cpu/intel_pstate/no_turbo");
// SPDLOG_LOGGER_TRACE(logger::console(), "reading {}", path);
// SPDLOG_LOGGER_TRACE(logger::console(), "reading {}", path);
std::ifstream ifs(path, std::ifstream::in);
std::string result;
std::getline(ifs, result);
return result;
}
bool is_turbo_enabled() {
return "0" == read_intel_pstate_no_turbo();
}
bool is_turbo_enabled() { return "0" == read_intel_pstate_no_turbo(); }
Result disable_cpu_turbo() {
write_intel_pstate_no_turbo("1");
}
Result enable_cpu_turbo() {
write_intel_pstate_no_turbo("1");
}
Result disable_cpu_turbo() { return write_intel_pstate_no_turbo("1"); }
Result enable_cpu_turbo() { return write_intel_pstate_no_turbo("0"); }
}
} // namespace detail
} // namespace perfect

View File

@@ -1 +1,48 @@
#pragma once
#pragma once
#include "detail/nvidia/nvidia-ml.hpp"
namespace perfect {
/*!
*/
Result set_max_gpu_clocks(unsigned int idx) {
Result rt;
std::vector<unsigned int> clksMhz;
ret = get_device_memory_clocks(clksMhz, idx);
auto maxMemMhz = *std::max_element(memClksMhz.begin(), memClksMhz.end());
ret = get_device_graphics_clocks(clksMhz, idx);
auto maxCoreMhz = *std::max_element(clksMhz.begin(), clksMhz.end());
auto ret = nvmlDeviceSetApplicationsClocks(device, maxMemMhz, maxCoreMhz);
if (ret == NVML_ERROR_NOT_SUPPORTED) {
return Result::NVML_NOT_SUPPORTED;
} else if (ret == NVML_ERROR_NO_PERMISSION) {
return Result::NVML_NO_PERMISSION;
}
return Result::SUCCESS;
}
/*! Reset GPU clocks to default behavior
*/
Result reset_gpu_clocks(unsigned int idx) {
nvmlDevice_t device;
nvmlReturn_t ret;
ret = nvmlDeviceGetHandleByIndex(idx, &device);
if (ret != NVML_SUCCESS) {
assert(false);
}
ret = nvmlDeviceResetApplicationsClocks(device);
if (ret == NVML_ERROR_NOT_SUPPORTED) {
return Result::NVML_NOT_SUPPORTED;
} else if (ret == NVML_ERROR_NO_PERMISSION) {
return Result::NVML_NO_PERMISSION;
}
return Result::SUCCESS;
}
}; // namespace perfect

View File

@@ -1 +1,36 @@
#pragma once
#pragma once
#include "detail/nvidia/nvidia-ml.hpp"
#include "result.hpp"
namespace perfect {
struct GpuTurboState {
bool enabled;
GpuTurboState() : enabled(false) {}
};
Result get_gpu_turbo_state(GpuTurboState *state, unsigned int idx) {
return detail::is_gpu_turbo_enabled(&(state->enabled), idx);
}
inline bool is_turbo_enabled(GpuTurboState state) { return state.enabled; }
Result set_gpu_turbo_state(GpuTurboState state, unsigned int idx) {
if (state.enabled) {
return detail::enable_gpu_turbo(idx);
} else {
return detail::disable_gpu_turbo(idx);
}
}
inline Result disable_gpu_turbo(unsigned int idx) {
return detail::disable_gpu_turbo(idx);
}
inline Result enable_gpu_turbo(unsigned int idx) {
return detail::enable_gpu_turbo(idx);
}
}; // namespace perfect

25
include/perfect/init.hpp Normal file
View File

@@ -0,0 +1,25 @@
#pragma once
#include <nvml.h>
namespace perfect {
/*! initialize the benchmark
*/
Result init() {
static bool init_ = false;
if (init_)
return Result::SUCCESS;
// init nvml
nvmlReturn_t ret = nvmlInit();
if (ret != NVML_SUCCESS) {
return from_nvml(ret);
}
// don't init again if init() called twice
init_ = true;
return Result::SUCCESS;
}
}; // namespace perfect

View File

@@ -2,22 +2,53 @@
#include <cassert>
#include <nvml.h>
namespace perfect {
enum class Result {
SUCCESS,
NVIDIA_ML,
NO_PERMISSION,
UNKNOWN
};
enum class Result {
SUCCESS,
NVML_NOT_SUPPORTED,
NVML_NO_PERMISSION,
NVML_UNINITIALIZED,
NO_PERMISSION,
UNKNOWN
};
const char * get_string(const Result &result) {
switch (result) {
case Result::SUCCESS: return "success";
case Result::NO_PERMISSION: return "no permission";
case Result::UNKNOWN: return "unknown error";
case Result::NVIDIA_ML: return "nvidia-ml error";
default: assert(0 && "unexpected perfect::Result");
}
Result from_nvml(nvmlReturn_t nvml) {
switch (nvml) {
case NVML_SUCCESS:
return Result::SUCCESS;
case NVML_ERROR_UNINITIALIZED:
return Result::NVML_UNINITIALIZED;
case NVML_ERROR_NOT_SUPPORTED:
return Result::NVML_NOT_SUPPORTED;
case NVML_ERROR_INVALID_ARGUMENT:
case NVML_ERROR_GPU_IS_LOST:
case NVML_ERROR_UNKNOWN:
default:
assert(0 && "unhandled nvmlReturn_t");
}
return Result::UNKNOWN;
}
const char *get_string(const Result &result) {
switch (result) {
case Result::SUCCESS:
return "success";
case Result::NO_PERMISSION:
return "no permission";
case Result::UNKNOWN:
return "unknown error";
case Result::NVML_NOT_SUPPORTED:
return "nvidia-ml returned not supported";
case Result::NVML_NO_PERMISSION:
return "nvidia-ml returned no permission";
default:
assert(0 && "unexpected perfect::Result");
}
assert(0 && "unreachable");
return "";
}
} // namespace perfect