initial commit

This commit is contained in:
Carl Pearson
2019-09-19 10:59:28 -05:00
parent a8a014e706
commit f51ef904fb
18 changed files with 572 additions and 0 deletions

45
CMakeLists.txt Normal file
View File

@@ -0,0 +1,45 @@
# 3.8+ for project(LANGUAGES CUDA)
# 3.9+ for OpenMP::OpenMP_CXX
# 3.10+ findopenmp gained support for language-specific components
# 3.11+ for CMake not to add -fopenmp to the nvcc flags
cmake_minimum_required(VERSION 3.11 FATAL_ERROR)
project(perfect LANGUAGES CUDA CXX VERSION 0.1.0)
message(STATUS "Build type: " ${CMAKE_BUILD_TYPE})
#https://blog.kitware.com/cmake-and-the-default-build-type/
# Set a default build type if none was specified
set(default_build_type "Release")
if(EXISTS "${CMAKE_SOURCE_DIR}/.git")
set(default_build_type "Debug")
endif()
if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
message(STATUS "Setting build type to '${default_build_type}' as none was specified.")
set(CMAKE_BUILD_TYPE "${default_build_type}" CACHE
STRING "Choose the type of build." FORCE)
# Set the possible values of build type for cmake-gui
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS
"Debug" "Release" "MinSizeRel" "RelWithDebInfo")
endif()
if (CMAKE_BUILD_TYPE MATCHES Debug)
message(STATUS "Setting verbose build during Debug")
set(CMAKE_VERBOSE_MAKEFILE ON)
set(CMAKE_CUDA_FLAGS ${CMAKE_CUDA_FLAGS} -G)
elseif (CMAKE_BUILD_TYPE MATCHES Release)
add_definitions(-DNDEBUG)
set(CMAKE_CUDA_FLAGS ${CMAKE_CUDA_FLAGS} -lineinfo)
endif()
set(CMAKE_CUDA_STANDARD 11)
add_subdirectory(include/perfect)
add_library(perfect INTERFACE)
target_include_directories(perfect INTERFACE include/)
add_subdirectory(examples)

View File

@@ -1,2 +1,19 @@
# perfect
CPU/GPU performance control library for benchmarking
* x86
* POWER
* Nvidia
## Features
- [x] Disable CPU turbo (linux)
- [x] Set OS CPU performance mode to maximum (linux)
- [x] Set GPU clocks (nvidia)
- [x] Disable GPU turbo (nvidia)
- [x] Flush addresses from cache (amd64, POWER)
## Wish List
- [ ] Nvidia GPU power monitoring
- [ ] Nivida GPU utilization monitoring

50
examples/CMakeLists.txt Normal file
View File

@@ -0,0 +1,50 @@
# removed -Wredundant-decls for cuda 10.1
# removed -Wundef for cuda 10.0
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} \
-Xcompiler=-Wall\
-Xcompiler=-Wextra\
-Xcompiler=-Wcast-qual \
-Xcompiler=-Wcast-align \
-Xcompiler=-Wstrict-aliasing \
-Xcompiler=-Wpointer-arith \
-Xcompiler=-Winit-self \
-Xcompiler=-Wshadow \
-Xcompiler=-Wswitch-enum \
-Xcompiler=-Wfloat-equal \
-Xcompiler=-Wvla\
-Xcompiler=-fmax-errors=1 \
-Xcompiler=-Wfatal-errors\
")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} \
-Wredundant-decls \
-Wundef \
-Wall\
-Wextra\
-Wcast-qual \
-Wcast-align \
-Wstrict-aliasing \
-Wpointer-arith \
-Winit-self \
-Wshadow \
-Wswitch-enum \
-Wfloat-equal \
-Wvla\
-fmax-errors=1 \
-Wfatal-errors\
")
add_executable(cpu-cache cpu_cache.cpp)
target_link_libraries(cpu-cache perfect)
add_executable(cpu-turbo cpu_turbo.cpp)
target_link_libraries(cpu-turbo perfect)
add_executable(os-perf os_perf.cpp)
target_link_libraries(os-perf perfect)
add_executable(gpu-clocks gpu_clocks.cu)
target_link_libraries(gpu-clocks perfect)
add_executable(gpu-turbo gpu_turbo.cu)
target_link_libraries(gpu-turbo perfect)

11
examples/cpu_cache.cpp Normal file
View File

@@ -0,0 +1,11 @@
#include "perfect/cpu_cache.hpp"
int main(void) {
int *a = new int[1024];
flush_all(a, 1024 * sizeof(int));
// do things with `a` flushed from cache into main memory
// furthermore, all loads and stores before this function call are guaranteed to be complete
delete[] a;
}

14
examples/cpu_turbo.cpp Normal file
View File

@@ -0,0 +1,14 @@
#include "perfect/cpu_turbo.hpp"
int main(void) {
perfect::CpuTurboState state;
perfect::get_cpu_turbo_state(&state);
perfect::disable_cpu_turbo();
// do things with CPU turbo disabled
perfect::set_cpu_turbo_state(state);
}

5
examples/gpu_clocks.cu Normal file
View File

@@ -0,0 +1,5 @@
#include "perfect/gpu_clocks.hpp"
int main(void) {
}

5
examples/gpu_turbo.cu Normal file
View File

@@ -0,0 +1,5 @@
#include "perfect/gpu_turbo.hpp"
int main(void) {
}

26
examples/os_perf.cpp Normal file
View File

@@ -0,0 +1,26 @@
#include "perfect/os_perf.hpp"
#include <map>
int main(void) {
std::map<int, perfect::OsPerfState> states;
for (auto cpu : perfect::cpus()) {
perfect::OsPerfState state;
perfect::get_os_perf_state(&state, cpu);
states[cpu] = state;
perfect::os_perf_state_maximum(cpu);
}
// do things with all CPUs set to the maximum performancem mode by the OS
for (auto kv : states) {
int cpu = kv.first;
perfect::OsPerfState state = kv.second;
perfect::set_os_perf_state(cpu, state);
}
}

View File

@@ -0,0 +1,4 @@
set(PERFECT_HEADERS ${PERFECT_HEADERS} cpu_cache.hpp PARENT_SCOPE)
set(PERFECT_HEADERS ${PERFECT_HEADERS} cpu_turbo.hpp PARENT_SCOPE)
set(PERFECT_HEADERS ${PERFECT_HEADERS} os_perf.hpp PARENT_SCOPE)
set(PERFECT_HEADERS ${PERFECT_HEADERS} result.hpp PARENT_SCOPE)

View File

@@ -0,0 +1,126 @@
#pragma once
/*!
Routines for controlling CPU caching
*/
#pragma once
#include <algorithm>
#include <cstdio>
#include <iostream>
#ifdef __linux__
#include <unistd.h>
#endif
// https://gcc.gnu.org/onlinedocs/gcc/Simple-Constraints.html#Simple-Constraints
inline void flush_line(void *p) {
#ifdef __powerpc__
/*
PowerISA_V2.07B p. 773
dcbf RA,RB,L
effective address is RA|0 + RB
this mnemonic has L=0, which is through all cache levels
write block to storage and mark as invalid in all processors
*/
/*!
linux/arch/powerpc/include/asm/cache.h
*/
asm volatile("dcbf 0, %0"
: // no outputs
: "r"(p)
: "memory");
#elif __amd64__
/*!
arch/x86/include/asm/special_insns.h
p139
https://www.amd.com/system/files/TechDocs/24594.pdf
clflush mem8
*/
asm volatile("clflush %0"
: "+m"(p)
: // no inputs
: // no clobbers
);
#else
#error "unsupported platform"
(void)p;
#endif
}
inline void barrier_all() {
#ifdef __powerpc__
// sync is a mnemonic for sync 0, heavyweight sync
asm volatile("sync"
: // no outputs
: // no inputs
: "memory");
#elif __amd64__
asm volatile("mfence"
: // no outputs
: // no inputs
: "memory");
#else
#error "unsupported platform"
#endif
}
/*! return the smallest cache line size detected on the platform.
Return 16 if the cache line size could not be detected.
*/
size_t cache_linesize() {
#ifdef __linux__
long linesize, var;
var = sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
linesize = var;
var = sysconf(_SC_LEVEL2_CACHE_LINESIZE);
linesize = var ? std::min(linesize, var) : linesize;
var = sysconf(_SC_LEVEL3_CACHE_LINESIZE);
linesize = var ? std::min(linesize, var) : linesize;
var = sysconf(_SC_LEVEL4_CACHE_LINESIZE);
linesize = var ? std::min(linesize, var) : linesize;
linesize = linesize ? linesize : 16;
return linesize;
#else
#error "unsupported platform"
#endif
}
inline void flush_all(void *p, const size_t n) {
size_t lineSize = cache_linesize();
// cache flush may not be ordered wrt other kinds of accesses
barrier_all();
for (size_t i = 0; i < n; i += lineSize) {
char *c = static_cast<char *>(p);
flush_line(&c[i]);
}
// make flushing visible to other accesses
barrier_all();
}

View File

@@ -0,0 +1,41 @@
#pragma once
#ifdef __linux__ // linux
#include "detail/os/linux.hpp"
#ifdef __amd64__
#include "detail/turbo/linux_amd64.hpp"
#elif __powerpc64__
#include "detail/turbo/linux_power.hpp"
#else
#error "unsupported CPU arch"
#endif
#else // not linux
#error "unsupported OS"
#endif
#include "result.hpp"
namespace perfect {
struct CpuTurboState {
bool enabled;
};
Result get_cpu_turbo_state(CpuTurboState *state) {
state->enabled = is_turbo_enabled();
}
Result set_cpu_turbo_state(CpuTurboState state) {
if (state.enabled) {
enable_cpu_turbo();
} else {
disable_cpu_turbo();
}
}
};

View File

@@ -0,0 +1,51 @@
#pragma once
#include <string>
#include <fstream>
#include <cassert>
#include <vector>
#include <sched.h>
#include <sys/types.h>
#include <unistd.h>
#include "perfect/result.hpp"
namespace perfect {
std::vector<int> cpus() {
std::vector<int> result;
cpu_set_t mask;
if (sched_getaffinity(0 /*caller*/, sizeof(cpu_set_t), &mask)) {
assert(0 && "failed sched_getaffinity");
}
for (int i = 0; i < CPU_SETSIZE; ++i) {
if (CPU_ISSET(i, &mask)) {
result.push_back(i);
}
}
}
Result get_governor(std::string &result, const int cpu) {
std::string path("/sys/devices/system/cpu/cpu");
path += std::to_string(cpu);
path += "/cpufreq/scaling_governor";
std::ifstream ifs(path, std::ifstream::in);
std::getline(ifs, result);
return Result::SUCCESS;
}
Result set_governor(const int cpu, const std::string &governor) {
std::string path("/sys/devices/system/cpu/cpu");
path += std::to_string(cpu);
path += "/cpufreq/scaling_governor";
std::ofstream ofs(path, std::ofstream::out);
ofs << governor;
ofs.close();
if (ofs.fail()) {
return Result::NO_PERMISSION;
}
return Result::SUCCESS;
}
}

View File

@@ -0,0 +1,50 @@
#pragma once
#include <cassert>
#include <fstream>
#include "perfect/result.hpp"
namespace perfect {
bool has_intel_pstate_no_turbo() {
return bool(std::ifstream("/sys/devices/system/cpu/intel_pstate/no_turbo"));
}
int write_intel_pstate_no_turbo(const std::string &s) {
assert(has_intel_pstate_no_turbo());
std::string path("/sys/devices/system/cpu/intel_pstate/no_turbo");
// SPDLOG_LOGGER_DEBUG(logger::console(), "writing {} to {}", s, path);
std::ofstream ofs(path, std::ofstream::out);
ofs << s;
ofs.close();
if (ofs.fail()) {
// SPDLOG_LOGGER_DEBUG(logger::console(), "error writing {} to {}", s, path);
return 1;
}
return 0;
}
std::string read_intel_pstate_no_turbo() {
assert(has_intel_pstate_no_turbo());
std::string path("/sys/devices/system/cpu/intel_pstate/no_turbo");
// SPDLOG_LOGGER_TRACE(logger::console(), "reading {}", path);
std::ifstream ifs(path, std::ifstream::in);
std::string result;
std::getline(ifs, result);
return result;
}
bool is_turbo_enabled() {
return "0" == read_intel_pstate_no_turbo();
}
Result disable_cpu_turbo() {
write_intel_pstate_no_turbo("1");
}
Result enable_cpu_turbo() {
write_intel_pstate_no_turbo("1");
}
}

View File

@@ -0,0 +1,47 @@
#pragma once
#include "perfect/result.hpp"
namespace perfect {
bool has_acpi_cpufreq_boost() {
return bool(std::ifstream("/sys/devices/system/cpu/cpufreq/boost"));
}
int write_acpi_cpufreq_boost(const std::string &s) {
assert(has_acpi_cpufreq_boost());
std::string path("/sys/devices/system/cpu/cpufreq/boost");
SPDLOG_LOGGER_TRACE(logger::console(), "writing to {}", path);
std::ofstream ofs(path, std::ofstream::out);
ofs << s;
ofs.close();
if (ofs.fail()) {
SPDLOG_LOGGER_TRACE(logger::console(), "error writing to {}", path);
return 1;
}
return 0;
}
std::string read_acpi_cpufeq_boost() {
assert(has_acpi_cpufreq_boost());
std::string path("/sys/devices/system/cpu/cpufreq/boost");
SPDLOG_LOGGER_TRACE(logger::console(), "reading {}", path);
std::ifstream ifs(path, std::ifstream::in);
std::string result;
std::getline(ifs, result);
return result;
}
bool is_turbo_enabled() {
return "1" == read_acpi_cpufeq_boost();
}
Result disable_cpu_turbo() {
write_acpi_cpufeq_boost("0");
}
Result enable_cpu_turbo() {
write_acpi_cpufeq_boost("1");
}
}

View File

@@ -0,0 +1 @@
#pragma once

View File

@@ -0,0 +1 @@
#pragma once

View File

@@ -0,0 +1,55 @@
#pragma once
#include <vector>
#include <string>
#include <cassert>
#ifdef __linux__
#include "detail/os/linux.hpp"
#else
#error "unsupported platform"
#endif
#include "result.hpp"
namespace perfect {
struct OsPerfState {
#ifdef __linux__
std::string governor;
#else
#error "unsupported platform"
#endif
};
Result get_os_perf_state(OsPerfState *state, const int cpu) {
assert(state);
#ifdef __linux__
return get_governor(state->governor, cpu);
#else
#error "unsupported platform"
#endif
}
Result os_perf_state_maximum(const int cpu) {
#ifdef __linux__
return set_governor(cpu, "performance");
#else
#error "unsupported platform"
#endif
}
Result set_os_perf_state(const int cpu, OsPerfState state) {
#ifdef __linux__
return set_governor(cpu, state.governor);
#else
#error "unsupported platform"
#endif
}
};

View File

@@ -0,0 +1,23 @@
#pragma once
#include <cassert>
namespace perfect {
enum class Result {
SUCCESS,
NVIDIA_ML,
NO_PERMISSION,
UNKNOWN
};
const char * get_string(const Result &result) {
switch (result) {
case Result::SUCCESS: return "success";
case Result::NO_PERMISSION: return "no permission";
case Result::UNKNOWN: return "unknown error";
case Result::NVIDIA_ML: return "nvidia-ml error";
default: assert(0 && "unexpected perfect::Result");
}
}
}