initial commit
This commit is contained in:
4
include/perfect/CMakeLists.txt
Normal file
4
include/perfect/CMakeLists.txt
Normal file
@@ -0,0 +1,4 @@
|
||||
set(PERFECT_HEADERS ${PERFECT_HEADERS} cpu_cache.hpp PARENT_SCOPE)
|
||||
set(PERFECT_HEADERS ${PERFECT_HEADERS} cpu_turbo.hpp PARENT_SCOPE)
|
||||
set(PERFECT_HEADERS ${PERFECT_HEADERS} os_perf.hpp PARENT_SCOPE)
|
||||
set(PERFECT_HEADERS ${PERFECT_HEADERS} result.hpp PARENT_SCOPE)
|
126
include/perfect/cpu_cache.hpp
Normal file
126
include/perfect/cpu_cache.hpp
Normal file
@@ -0,0 +1,126 @@
|
||||
#pragma once
|
||||
|
||||
/*!
|
||||
Routines for controlling CPU caching
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdio>
|
||||
#include <iostream>
|
||||
|
||||
#ifdef __linux__
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
// https://gcc.gnu.org/onlinedocs/gcc/Simple-Constraints.html#Simple-Constraints
|
||||
|
||||
inline void flush_line(void *p) {
|
||||
#ifdef __powerpc__
|
||||
|
||||
/*
|
||||
PowerISA_V2.07B p. 773
|
||||
dcbf RA,RB,L
|
||||
|
||||
effective address is RA|0 + RB
|
||||
this mnemonic has L=0, which is through all cache levels
|
||||
write block to storage and mark as invalid in all processors
|
||||
*/
|
||||
|
||||
/*!
|
||||
|
||||
linux/arch/powerpc/include/asm/cache.h
|
||||
*/
|
||||
asm volatile("dcbf 0, %0"
|
||||
: // no outputs
|
||||
: "r"(p)
|
||||
: "memory");
|
||||
|
||||
#elif __amd64__
|
||||
|
||||
/*!
|
||||
|
||||
arch/x86/include/asm/special_insns.h
|
||||
|
||||
p139
|
||||
https://www.amd.com/system/files/TechDocs/24594.pdf
|
||||
|
||||
clflush mem8
|
||||
*/
|
||||
|
||||
asm volatile("clflush %0"
|
||||
: "+m"(p)
|
||||
: // no inputs
|
||||
: // no clobbers
|
||||
);
|
||||
#else
|
||||
#error "unsupported platform"
|
||||
(void)p;
|
||||
#endif
|
||||
}
|
||||
|
||||
inline void barrier_all() {
|
||||
|
||||
#ifdef __powerpc__
|
||||
|
||||
// sync is a mnemonic for sync 0, heavyweight sync
|
||||
asm volatile("sync"
|
||||
: // no outputs
|
||||
: // no inputs
|
||||
: "memory");
|
||||
|
||||
#elif __amd64__
|
||||
|
||||
asm volatile("mfence"
|
||||
: // no outputs
|
||||
: // no inputs
|
||||
: "memory");
|
||||
|
||||
#else
|
||||
#error "unsupported platform"
|
||||
#endif
|
||||
}
|
||||
|
||||
/*! return the smallest cache line size detected on the platform.
|
||||
Return 16 if the cache line size could not be detected.
|
||||
*/
|
||||
size_t cache_linesize() {
|
||||
#ifdef __linux__
|
||||
long linesize, var;
|
||||
|
||||
var = sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
|
||||
linesize = var;
|
||||
|
||||
var = sysconf(_SC_LEVEL2_CACHE_LINESIZE);
|
||||
linesize = var ? std::min(linesize, var) : linesize;
|
||||
|
||||
var = sysconf(_SC_LEVEL3_CACHE_LINESIZE);
|
||||
linesize = var ? std::min(linesize, var) : linesize;
|
||||
|
||||
var = sysconf(_SC_LEVEL4_CACHE_LINESIZE);
|
||||
linesize = var ? std::min(linesize, var) : linesize;
|
||||
|
||||
linesize = linesize ? linesize : 16;
|
||||
return linesize;
|
||||
#else
|
||||
#error "unsupported platform"
|
||||
#endif
|
||||
}
|
||||
|
||||
inline void flush_all(void *p, const size_t n) {
|
||||
|
||||
size_t lineSize = cache_linesize();
|
||||
|
||||
// cache flush may not be ordered wrt other kinds of accesses
|
||||
barrier_all();
|
||||
|
||||
for (size_t i = 0; i < n; i += lineSize) {
|
||||
char *c = static_cast<char *>(p);
|
||||
flush_line(&c[i]);
|
||||
}
|
||||
|
||||
// make flushing visible to other accesses
|
||||
barrier_all();
|
||||
}
|
||||
|
41
include/perfect/cpu_turbo.hpp
Normal file
41
include/perfect/cpu_turbo.hpp
Normal file
@@ -0,0 +1,41 @@
|
||||
#pragma once
|
||||
|
||||
#ifdef __linux__ // linux
|
||||
#include "detail/os/linux.hpp"
|
||||
|
||||
#ifdef __amd64__
|
||||
#include "detail/turbo/linux_amd64.hpp"
|
||||
#elif __powerpc64__
|
||||
#include "detail/turbo/linux_power.hpp"
|
||||
#else
|
||||
#error "unsupported CPU arch"
|
||||
#endif
|
||||
|
||||
#else // not linux
|
||||
#error "unsupported OS"
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
#include "result.hpp"
|
||||
|
||||
namespace perfect {
|
||||
|
||||
struct CpuTurboState {
|
||||
bool enabled;
|
||||
};
|
||||
|
||||
|
||||
Result get_cpu_turbo_state(CpuTurboState *state) {
|
||||
state->enabled = is_turbo_enabled();
|
||||
}
|
||||
|
||||
Result set_cpu_turbo_state(CpuTurboState state) {
|
||||
if (state.enabled) {
|
||||
enable_cpu_turbo();
|
||||
} else {
|
||||
disable_cpu_turbo();
|
||||
}
|
||||
}
|
||||
|
||||
};
|
51
include/perfect/detail/os/linux.hpp
Normal file
51
include/perfect/detail/os/linux.hpp
Normal file
@@ -0,0 +1,51 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <fstream>
|
||||
#include <cassert>
|
||||
#include <vector>
|
||||
|
||||
#include <sched.h>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "perfect/result.hpp"
|
||||
|
||||
namespace perfect {
|
||||
|
||||
std::vector<int> cpus() {
|
||||
std::vector<int> result;
|
||||
cpu_set_t mask;
|
||||
if (sched_getaffinity(0 /*caller*/, sizeof(cpu_set_t), &mask)) {
|
||||
assert(0 && "failed sched_getaffinity");
|
||||
}
|
||||
for (int i = 0; i < CPU_SETSIZE; ++i) {
|
||||
if (CPU_ISSET(i, &mask)) {
|
||||
result.push_back(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Result get_governor(std::string &result, const int cpu) {
|
||||
std::string path("/sys/devices/system/cpu/cpu");
|
||||
path += std::to_string(cpu);
|
||||
path += "/cpufreq/scaling_governor";
|
||||
std::ifstream ifs(path, std::ifstream::in);
|
||||
std::getline(ifs, result);
|
||||
return Result::SUCCESS;
|
||||
}
|
||||
|
||||
Result set_governor(const int cpu, const std::string &governor) {
|
||||
std::string path("/sys/devices/system/cpu/cpu");
|
||||
path += std::to_string(cpu);
|
||||
path += "/cpufreq/scaling_governor";
|
||||
std::ofstream ofs(path, std::ofstream::out);
|
||||
ofs << governor;
|
||||
ofs.close();
|
||||
if (ofs.fail()) {
|
||||
return Result::NO_PERMISSION;
|
||||
}
|
||||
return Result::SUCCESS;
|
||||
}
|
||||
|
||||
}
|
50
include/perfect/detail/turbo/linux_amd64.hpp
Normal file
50
include/perfect/detail/turbo/linux_amd64.hpp
Normal file
@@ -0,0 +1,50 @@
|
||||
#pragma once
|
||||
|
||||
#include <cassert>
|
||||
#include <fstream>
|
||||
|
||||
#include "perfect/result.hpp"
|
||||
|
||||
namespace perfect {
|
||||
|
||||
bool has_intel_pstate_no_turbo() {
|
||||
return bool(std::ifstream("/sys/devices/system/cpu/intel_pstate/no_turbo"));
|
||||
}
|
||||
|
||||
int write_intel_pstate_no_turbo(const std::string &s) {
|
||||
assert(has_intel_pstate_no_turbo());
|
||||
std::string path("/sys/devices/system/cpu/intel_pstate/no_turbo");
|
||||
// SPDLOG_LOGGER_DEBUG(logger::console(), "writing {} to {}", s, path);
|
||||
std::ofstream ofs(path, std::ofstream::out);
|
||||
ofs << s;
|
||||
ofs.close();
|
||||
if (ofs.fail()) {
|
||||
// SPDLOG_LOGGER_DEBUG(logger::console(), "error writing {} to {}", s, path);
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
std::string read_intel_pstate_no_turbo() {
|
||||
assert(has_intel_pstate_no_turbo());
|
||||
std::string path("/sys/devices/system/cpu/intel_pstate/no_turbo");
|
||||
// SPDLOG_LOGGER_TRACE(logger::console(), "reading {}", path);
|
||||
std::ifstream ifs(path, std::ifstream::in);
|
||||
std::string result;
|
||||
std::getline(ifs, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
bool is_turbo_enabled() {
|
||||
return "0" == read_intel_pstate_no_turbo();
|
||||
}
|
||||
|
||||
Result disable_cpu_turbo() {
|
||||
write_intel_pstate_no_turbo("1");
|
||||
}
|
||||
Result enable_cpu_turbo() {
|
||||
write_intel_pstate_no_turbo("1");
|
||||
}
|
||||
|
||||
|
||||
}
|
47
include/perfect/detail/turbo/linux_power.hpp
Normal file
47
include/perfect/detail/turbo/linux_power.hpp
Normal file
@@ -0,0 +1,47 @@
|
||||
#pragma once
|
||||
|
||||
#include "perfect/result.hpp"
|
||||
|
||||
namespace perfect {
|
||||
|
||||
bool has_acpi_cpufreq_boost() {
|
||||
return bool(std::ifstream("/sys/devices/system/cpu/cpufreq/boost"));
|
||||
}
|
||||
|
||||
int write_acpi_cpufreq_boost(const std::string &s) {
|
||||
assert(has_acpi_cpufreq_boost());
|
||||
std::string path("/sys/devices/system/cpu/cpufreq/boost");
|
||||
SPDLOG_LOGGER_TRACE(logger::console(), "writing to {}", path);
|
||||
std::ofstream ofs(path, std::ofstream::out);
|
||||
ofs << s;
|
||||
ofs.close();
|
||||
if (ofs.fail()) {
|
||||
SPDLOG_LOGGER_TRACE(logger::console(), "error writing to {}", path);
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
std::string read_acpi_cpufeq_boost() {
|
||||
assert(has_acpi_cpufreq_boost());
|
||||
std::string path("/sys/devices/system/cpu/cpufreq/boost");
|
||||
SPDLOG_LOGGER_TRACE(logger::console(), "reading {}", path);
|
||||
std::ifstream ifs(path, std::ifstream::in);
|
||||
std::string result;
|
||||
std::getline(ifs, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
bool is_turbo_enabled() {
|
||||
return "1" == read_acpi_cpufeq_boost();
|
||||
}
|
||||
|
||||
Result disable_cpu_turbo() {
|
||||
write_acpi_cpufeq_boost("0");
|
||||
}
|
||||
|
||||
Result enable_cpu_turbo() {
|
||||
write_acpi_cpufeq_boost("1");
|
||||
}
|
||||
|
||||
}
|
1
include/perfect/gpu_clocks.hpp
Normal file
1
include/perfect/gpu_clocks.hpp
Normal file
@@ -0,0 +1 @@
|
||||
#pragma once
|
1
include/perfect/gpu_turbo.hpp
Normal file
1
include/perfect/gpu_turbo.hpp
Normal file
@@ -0,0 +1 @@
|
||||
#pragma once
|
55
include/perfect/os_perf.hpp
Normal file
55
include/perfect/os_perf.hpp
Normal file
@@ -0,0 +1,55 @@
|
||||
#pragma once
|
||||
|
||||
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <cassert>
|
||||
|
||||
|
||||
|
||||
#ifdef __linux__
|
||||
#include "detail/os/linux.hpp"
|
||||
#else
|
||||
#error "unsupported platform"
|
||||
#endif
|
||||
|
||||
#include "result.hpp"
|
||||
|
||||
namespace perfect {
|
||||
|
||||
struct OsPerfState {
|
||||
#ifdef __linux__
|
||||
std::string governor;
|
||||
#else
|
||||
#error "unsupported platform"
|
||||
#endif
|
||||
};
|
||||
|
||||
Result get_os_perf_state(OsPerfState *state, const int cpu) {
|
||||
assert(state);
|
||||
#ifdef __linux__
|
||||
return get_governor(state->governor, cpu);
|
||||
#else
|
||||
#error "unsupported platform"
|
||||
#endif
|
||||
}
|
||||
|
||||
Result os_perf_state_maximum(const int cpu) {
|
||||
#ifdef __linux__
|
||||
return set_governor(cpu, "performance");
|
||||
#else
|
||||
#error "unsupported platform"
|
||||
#endif
|
||||
}
|
||||
|
||||
Result set_os_perf_state(const int cpu, OsPerfState state) {
|
||||
#ifdef __linux__
|
||||
return set_governor(cpu, state.governor);
|
||||
#else
|
||||
#error "unsupported platform"
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
};
|
23
include/perfect/result.hpp
Normal file
23
include/perfect/result.hpp
Normal file
@@ -0,0 +1,23 @@
|
||||
#pragma once
|
||||
|
||||
#include <cassert>
|
||||
|
||||
namespace perfect {
|
||||
enum class Result {
|
||||
SUCCESS,
|
||||
NVIDIA_ML,
|
||||
NO_PERMISSION,
|
||||
UNKNOWN
|
||||
};
|
||||
|
||||
const char * get_string(const Result &result) {
|
||||
switch (result) {
|
||||
case Result::SUCCESS: return "success";
|
||||
case Result::NO_PERMISSION: return "no permission";
|
||||
case Result::UNKNOWN: return "unknown error";
|
||||
case Result::NVIDIA_ML: return "nvidia-ml error";
|
||||
default: assert(0 && "unexpected perfect::Result");
|
||||
}
|
||||
}
|
||||
|
||||
}
|
Reference in New Issue
Block a user