Compare commits
16 Commits
Author | SHA1 | Date | |
---|---|---|---|
![]() |
cd9a95365f | ||
![]() |
57bf39bb97 | ||
![]() |
c358f18c22 | ||
![]() |
37c61fe2fb | ||
![]() |
f2961b3075 | ||
![]() |
3a86aef546 | ||
![]() |
cd14d68c47 | ||
![]() |
2e32089786 | ||
![]() |
7503a29a5c | ||
![]() |
02e0c7c464 | ||
![]() |
1682a05d08 | ||
![]() |
14791badb1 | ||
![]() |
7890d17b57 | ||
![]() |
3bf2fd1df2 | ||
![]() |
227c4ebb11 | ||
![]() |
3ff80c86f2 |
@@ -5,7 +5,7 @@
|
||||
# 3.13+ for target_link_directories
|
||||
cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
|
||||
|
||||
project(perfect LANGUAGES CXX VERSION 0.3.0)
|
||||
project(perfect LANGUAGES CXX VERSION 0.5.0)
|
||||
message(STATUS "Build type: " ${CMAKE_BUILD_TYPE})
|
||||
|
||||
include(CheckLanguage)
|
||||
|
44
README.md
44
README.md
@@ -2,12 +2,9 @@
|
||||
|
||||
| Branch | Status |
|
||||
|-|-|
|
||||
| master |[](https://actions-badge.atrox.dev/cwpearson/perfect/goto?ref=master)|
|
||||
| master |[](https://actions-badge.atrox.dev/cwpearson/perfect/goto?ref=master) |
|
||||
|
||||
CPU/GPU performance control library for benchmarking
|
||||
* x86
|
||||
* POWER
|
||||
* Nvidia
|
||||
CPU/GPU Performance control library for benchmarking on Linux, x86, POWER, and Nvidia.
|
||||
|
||||
## Features
|
||||
|
||||
@@ -19,6 +16,10 @@ CPU/GPU performance control library for benchmarking
|
||||
- [x] Flush addresses from cache (amd64, POWER)
|
||||
- [x] CUDA not required (GPU functions will not be compiled)
|
||||
- [x] Flush file system caches (linux)
|
||||
- [x] Disable ASLR (linux)
|
||||
|
||||
## Contributors
|
||||
* [Carl Pearson](https://cwpearson.github.io)
|
||||
|
||||
## Installing
|
||||
|
||||
@@ -85,6 +86,20 @@ See [examples/gpu_monitor.cu](examples/gpu_monitor.cu)
|
||||
* `void Monitor::pause()`: pause the monitor thread
|
||||
* `void Monitor::resume()`: resume the monitor thread
|
||||
|
||||
### Disable ASLR
|
||||
|
||||
`perfect` can disable ASLR
|
||||
|
||||
See [tools/no_aslr.cpp](tools/no_aslr.cpp)
|
||||
|
||||
```c++
|
||||
#include "perfect/aslr.hpp"
|
||||
```
|
||||
|
||||
* `Result disable_aslr()`: disable ASLR
|
||||
* `Result get_aslr(AslrState &state)`: save the current ASLR state
|
||||
* `Result set_aslr(const AslrState &state)`: set a previously-saved ASLR state
|
||||
|
||||
### Flush file system caches
|
||||
|
||||
`perfect` can drop various filesystem caches
|
||||
@@ -175,6 +190,16 @@ See [examples/cpu_cache.cpp](examples/cpu_cache.cpp).
|
||||
|
||||
## Changelog
|
||||
|
||||
* v0.5.0
|
||||
* add tools/stress
|
||||
* add tools/max-os-perf
|
||||
* add tools/min-os-perf
|
||||
* add tools/enable-cpu-turbo
|
||||
* add tools/disable-cpu-turbo
|
||||
* v0.4.0
|
||||
* Add ASLR interface
|
||||
* Disambiguate some filesystem errors
|
||||
* Fix some powerpc namespace issues
|
||||
* v0.3.0
|
||||
* Add filesystem cache interface
|
||||
* v0.2.0
|
||||
@@ -191,8 +216,17 @@ See [examples/cpu_cache.cpp](examples/cpu_cache.cpp).
|
||||
## Wish List
|
||||
|
||||
- [ ] only monitor certain GPUs
|
||||
- [ ] hyperthreading interface
|
||||
- [ ] process priority interface
|
||||
- [ ] A wrapper utility
|
||||
- [ ] disable hyperthreading
|
||||
- [ ] reserve cores
|
||||
- [ ] set process priority
|
||||
- [ ] disable ASLR
|
||||
|
||||
## Related
|
||||
|
||||
* [LLVM benchmarking instructions](https://llvm.org/docs/Benchmarking.html#linux) covering ASLR, Linux governor, cpuset shielding, SMT, and Intel turbo.
|
||||
* [easyperf.net](https://easyperf.net/blog/2019/08/02/Perf-measurement-environment-on-Linux#2-disable-hyper-threading) blog post discussing ACPI/Intel turbo, SMT, Linux governor, CPU affinity, process priority, file system caches, and ASLR.
|
||||
* [temci](https://github.com/parttimenerd/temci) benchmarking tool for cpu sheilding and disabling hyperthreading, among other things.
|
||||
* [perflock](https://github.com/aclements/perflock) tool for locking CPU frequency scaling domains
|
||||
|
40
include/perfect/aslr.hpp
Normal file
40
include/perfect/aslr.hpp
Normal file
@@ -0,0 +1,40 @@
|
||||
#pragma once
|
||||
|
||||
#include <cerrno>
|
||||
#include <iostream>
|
||||
|
||||
#ifdef __linux__
|
||||
#include "detail/os/linux.hpp"
|
||||
#endif
|
||||
#include "init.hpp"
|
||||
#include "result.hpp"
|
||||
|
||||
namespace perfect {
|
||||
|
||||
struct AslrState {
|
||||
#ifdef __linux__
|
||||
unsigned long persona;
|
||||
#else
|
||||
#error "unsupported platform"
|
||||
#endif
|
||||
};
|
||||
|
||||
Result get_aslr(AslrState &state) {
|
||||
int persona;
|
||||
PERFECT_SUCCESS_OR_RETURN(detail::get_personality(persona));
|
||||
state.persona = persona;
|
||||
return Result::SUCCESS;
|
||||
}
|
||||
|
||||
Result set_aslr(const AslrState &state) {
|
||||
return detail::set_personality(state.persona);
|
||||
}
|
||||
|
||||
Result disable_aslr() {
|
||||
int persona;
|
||||
PERFECT_SUCCESS_OR_RETURN(detail::get_personality(persona));
|
||||
persona |= ADDR_NO_RANDOMIZE;
|
||||
return detail::set_personality(persona);
|
||||
}
|
||||
|
||||
} // namespace perfect
|
6
include/perfect/detail/cache/power.hpp
vendored
6
include/perfect/detail/cache/power.hpp
vendored
@@ -1,5 +1,8 @@
|
||||
#pragma once
|
||||
|
||||
namespace perfect {
|
||||
namespace detail {
|
||||
|
||||
inline void flush_line(void *p) {
|
||||
|
||||
/*
|
||||
@@ -33,3 +36,6 @@ inline void barrier_all() {
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
@@ -3,14 +3,28 @@
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
|
||||
#include "perfect/result.hpp"
|
||||
#include "../result.hpp"
|
||||
|
||||
#ifdef __linux__
|
||||
#include "fs/linux.hpp"
|
||||
#else
|
||||
#error "unsupported platform"
|
||||
#endif
|
||||
|
||||
namespace perfect {
|
||||
namespace detail {
|
||||
|
||||
Result write_str(const std::string &path, const std::string &val) {
|
||||
|
||||
if (!path_exists(path)) {
|
||||
std::cerr << "write_str(): does not exist: " << path << "\n";
|
||||
return Result::NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
std::ofstream ofs(path);
|
||||
if (ofs.fail()) {
|
||||
return Result::NOT_SUPPORTED;
|
||||
std::cerr << "failed to open " << path << "\n";
|
||||
return Result::NO_PERMISSION;
|
||||
}
|
||||
|
||||
ofs << val;
|
||||
@@ -24,6 +38,7 @@ Result write_str(const std::string &path, const std::string &val) {
|
||||
std::cerr << "EPERM when writing to " << path << "\n";
|
||||
return Result::NO_PERMISSION;
|
||||
case ENOENT:
|
||||
std::cerr << "ENOENT when writing to " << path << "\n";
|
||||
return Result::NOT_SUPPORTED;
|
||||
default:
|
||||
return Result::UNKNOWN;
|
||||
|
31
include/perfect/detail/fs/linux.hpp
Normal file
31
include/perfect/detail/fs/linux.hpp
Normal file
@@ -0,0 +1,31 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <cerrno>
|
||||
#include <iostream>
|
||||
#include <cassert>
|
||||
|
||||
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <unistd.h>
|
||||
|
||||
namespace perfect {
|
||||
namespace detail {
|
||||
bool path_exists(const std::string &path) {
|
||||
struct stat sb;
|
||||
if (stat(path.c_str(), &sb)) {
|
||||
switch (errno) {
|
||||
case ENOENT: return false;
|
||||
case ENOTDIR: return false;
|
||||
default: {
|
||||
std::cerr << "unhandled error in stat() for " << path << "\n";
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
@@ -12,6 +12,7 @@
|
||||
#include <sched.h>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/personality.h>
|
||||
|
||||
#include "perfect/result.hpp"
|
||||
|
||||
@@ -88,4 +89,24 @@ size_t cache_linesize() {
|
||||
#endif
|
||||
}
|
||||
|
||||
namespace detail {
|
||||
Result get_personality(int &persona) {
|
||||
int ret = personality(0xffffffff);
|
||||
if (-1 == ret) {
|
||||
return Result::UNKNOWN;
|
||||
} else {
|
||||
persona = ret;
|
||||
}
|
||||
return Result::SUCCESS;
|
||||
}
|
||||
|
||||
Result set_personality(const int persona) {
|
||||
int ret = personality(persona);
|
||||
if (-1 == ret) {
|
||||
return Result::UNKNOWN;
|
||||
}
|
||||
return Result::SUCCESS;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace perfect
|
@@ -4,6 +4,7 @@
|
||||
#include <fstream>
|
||||
|
||||
#include "perfect/result.hpp"
|
||||
#include "perfect/detail/fs.hpp"
|
||||
|
||||
namespace perfect {
|
||||
namespace detail {
|
||||
@@ -15,19 +16,12 @@ bool has_intel_pstate_no_turbo() {
|
||||
Result write_intel_pstate_no_turbo(const std::string &s) {
|
||||
assert(has_intel_pstate_no_turbo());
|
||||
std::string path("/sys/devices/system/cpu/intel_pstate/no_turbo");
|
||||
std::ofstream ofs(path, std::ofstream::out);
|
||||
ofs << s;
|
||||
ofs.close();
|
||||
if (ofs.fail()) {
|
||||
return Result::NO_PERMISSION;
|
||||
}
|
||||
return Result::SUCCESS;
|
||||
return write_str(path, s);
|
||||
}
|
||||
|
||||
std::string read_intel_pstate_no_turbo() {
|
||||
assert(has_intel_pstate_no_turbo());
|
||||
std::string path("/sys/devices/system/cpu/intel_pstate/no_turbo");
|
||||
// SPDLOG_LOGGER_TRACE(logger::console(), "reading {}", path);
|
||||
std::ifstream ifs(path, std::ifstream::in);
|
||||
std::string result;
|
||||
std::getline(ifs, result);
|
||||
|
@@ -1,31 +1,24 @@
|
||||
#pragma once
|
||||
|
||||
#include "perfect/result.hpp"
|
||||
#include "perfect/detail/fs.hpp"
|
||||
|
||||
namespace perfect {
|
||||
namespace detail {
|
||||
|
||||
bool has_acpi_cpufreq_boost() {
|
||||
return bool(std::ifstream("/sys/devices/system/cpu/cpufreq/boost"));
|
||||
}
|
||||
|
||||
int write_acpi_cpufreq_boost(const std::string &s) {
|
||||
Result write_acpi_cpufreq_boost(const std::string &s) {
|
||||
assert(has_acpi_cpufreq_boost());
|
||||
std::string path("/sys/devices/system/cpu/cpufreq/boost");
|
||||
SPDLOG_LOGGER_TRACE(logger::console(), "writing to {}", path);
|
||||
std::ofstream ofs(path, std::ofstream::out);
|
||||
ofs << s;
|
||||
ofs.close();
|
||||
if (ofs.fail()) {
|
||||
SPDLOG_LOGGER_TRACE(logger::console(), "error writing to {}", path);
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
return write_str(path, s);
|
||||
}
|
||||
|
||||
std::string read_acpi_cpufeq_boost() {
|
||||
assert(has_acpi_cpufreq_boost());
|
||||
std::string path("/sys/devices/system/cpu/cpufreq/boost");
|
||||
SPDLOG_LOGGER_TRACE(logger::console(), "reading {}", path);
|
||||
std::ifstream ifs(path, std::ifstream::in);
|
||||
std::string result;
|
||||
std::getline(ifs, result);
|
||||
@@ -37,11 +30,12 @@ std::string read_acpi_cpufeq_boost() {
|
||||
}
|
||||
|
||||
Result disable_cpu_turbo() {
|
||||
write_acpi_cpufeq_boost("0");
|
||||
return write_acpi_cpufreq_boost("0");
|
||||
}
|
||||
|
||||
Result enable_cpu_turbo() {
|
||||
write_acpi_cpufeq_boost("1");
|
||||
return write_acpi_cpufreq_boost("1");
|
||||
}
|
||||
|
||||
}
|
||||
} // namespace detail
|
||||
} // namespace perfect
|
||||
|
@@ -40,6 +40,14 @@ Result os_perf_state_maximum(const int cpu) {
|
||||
#endif
|
||||
}
|
||||
|
||||
Result os_perf_state_minimum(const int cpu) {
|
||||
#ifdef __linux__
|
||||
return set_governor(cpu, "powersave");
|
||||
#else
|
||||
#error "unsupported platform"
|
||||
#endif
|
||||
}
|
||||
|
||||
Result set_os_perf_state(const int cpu, OsPerfState state) {
|
||||
#ifdef __linux__
|
||||
return set_governor(cpu, state.governor);
|
||||
|
@@ -57,6 +57,8 @@ const char *get_string(const Result &result) {
|
||||
return "nvidia-ml returned not supported";
|
||||
case Result::NVML_NO_PERMISSION:
|
||||
return "nvidia-ml returned no permission";
|
||||
case Result::NVML_UNINITIALIZED:
|
||||
return "nvidia-ml returned uninitialized";
|
||||
case Result::NOT_SUPPORTED:
|
||||
return "unsupported operation";
|
||||
default:
|
||||
|
@@ -34,8 +34,31 @@ set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} \
|
||||
-Wfatal-errors\
|
||||
")
|
||||
|
||||
add_executable(enable-turbo enable_turbo.cpp)
|
||||
target_link_libraries(enable-turbo perfect)
|
||||
add_executable(enable-cpu-turbo enable_cpu_turbo.cpp)
|
||||
target_link_libraries(enable-cpu-turbo perfect)
|
||||
|
||||
add_executable(disable-cpu-turbo disable_cpu_turbo.cpp)
|
||||
target_link_libraries(disable-cpu-turbo perfect)
|
||||
|
||||
add_executable(sync-drop-caches sync_drop_caches.cpp)
|
||||
target_link_libraries(sync-drop-caches perfect)
|
||||
|
||||
add_executable(no-aslr no_aslr.cpp)
|
||||
target_link_libraries(no-aslr perfect)
|
||||
|
||||
add_executable(max-os-perf max_os_perf.cpp)
|
||||
target_link_libraries(max-os-perf perfect)
|
||||
|
||||
add_executable(min-os-perf min_os_perf.cpp)
|
||||
target_link_libraries(min-os-perf perfect)
|
||||
|
||||
## OpenMP
|
||||
find_package(OpenMP)
|
||||
if (OpenMP_FOUND)
|
||||
add_executable(stress stress.cpp)
|
||||
target_link_libraries(stress perfect)
|
||||
target_link_libraries(stress OpenMP::OpenMP_CXX)
|
||||
else(OpenMP_FOUND)
|
||||
message(WARNING "didn't find OpenMP, some benchmarks will be unavailable.")
|
||||
endif(OpenMP_FOUND)
|
||||
|
||||
|
23
tools/disable_cpu_turbo.cpp
Normal file
23
tools/disable_cpu_turbo.cpp
Normal file
@@ -0,0 +1,23 @@
|
||||
#include <iostream>
|
||||
|
||||
#include "perfect/cpu_turbo.hpp"
|
||||
|
||||
using namespace perfect;
|
||||
|
||||
int main(void) {
|
||||
|
||||
CpuTurboState state;
|
||||
|
||||
perfect::init();
|
||||
|
||||
PERFECT(get_cpu_turbo_state(&state));
|
||||
|
||||
if (!is_turbo_enabled(state)) {
|
||||
std::cerr << "cpu turbo already disabled\n";
|
||||
exit(EXIT_SUCCESS);
|
||||
} else {
|
||||
PERFECT(disable_cpu_turbo());
|
||||
std::cerr << "disabled cpu turbo\n";
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
}
|
@@ -6,7 +6,6 @@ using namespace perfect;
|
||||
|
||||
int main(void) {
|
||||
|
||||
Result ret;
|
||||
CpuTurboState state;
|
||||
|
||||
perfect::init();
|
9
tools/max_os_perf.cpp
Normal file
9
tools/max_os_perf.cpp
Normal file
@@ -0,0 +1,9 @@
|
||||
#include "perfect/os_perf.hpp"
|
||||
|
||||
int main(void) {
|
||||
PERFECT(perfect::init());
|
||||
|
||||
for (auto cpu : perfect::cpus()) {
|
||||
PERFECT(perfect::os_perf_state_maximum(cpu));
|
||||
}
|
||||
}
|
9
tools/min_os_perf.cpp
Normal file
9
tools/min_os_perf.cpp
Normal file
@@ -0,0 +1,9 @@
|
||||
#include "perfect/os_perf.hpp"
|
||||
|
||||
int main(void) {
|
||||
PERFECT(perfect::init());
|
||||
|
||||
for (auto cpu : perfect::cpus()) {
|
||||
PERFECT(perfect::os_perf_state_minimum(cpu));
|
||||
}
|
||||
}
|
77
tools/no_aslr.cpp
Normal file
77
tools/no_aslr.cpp
Normal file
@@ -0,0 +1,77 @@
|
||||
#include <iostream>
|
||||
|
||||
#include <errno.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/wait.h>
|
||||
#include <unistd.h>
|
||||
#include <cstring>
|
||||
#include <vector>
|
||||
|
||||
#include "perfect/aslr.hpp"
|
||||
|
||||
using namespace perfect;
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
|
||||
using namespace perfect;
|
||||
|
||||
PERFECT(init());
|
||||
|
||||
pid_t pid;
|
||||
int status;
|
||||
pid = fork();
|
||||
if (pid == -1) {
|
||||
// pid == -1 means error occured
|
||||
std::cerr << "can't fork, error occured\n";
|
||||
exit(EXIT_FAILURE);
|
||||
} else if (pid == 0) {
|
||||
// in the child process
|
||||
|
||||
// skip the first argument, which is this program
|
||||
std::vector<char*> args;
|
||||
for (int i = 1; i < argc; ++i) {
|
||||
args.push_back(argv[i]);
|
||||
}
|
||||
assert(args.size() > 0);
|
||||
args.push_back(nullptr);
|
||||
|
||||
PERFECT(disable_aslr());
|
||||
|
||||
// the execv() only return if error occured.
|
||||
// The return value is -1
|
||||
return execvp(args[0], args.data());
|
||||
} else {
|
||||
// parent process
|
||||
|
||||
if (waitpid(pid, &status, 0) > 0) {
|
||||
|
||||
if (WIFEXITED(status) && !WEXITSTATUS(status)) {
|
||||
// success
|
||||
exit(status);
|
||||
}
|
||||
|
||||
else if (WIFEXITED(status) && WEXITSTATUS(status)) {
|
||||
if (WEXITSTATUS(status) == 127) {
|
||||
|
||||
// execv failed
|
||||
std::cerr << "execv failed\n";
|
||||
exit(status);
|
||||
} else {
|
||||
std::cerr << "program terminated normally, but returned a non-zero status\n";
|
||||
exit(status);
|
||||
}
|
||||
} else {
|
||||
printf("program didn't terminate normally\n");
|
||||
exit(status);
|
||||
}
|
||||
} else {
|
||||
// waitpid() failed
|
||||
printf("waitpid() failed\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
exit(0);
|
||||
}
|
||||
return 0;
|
||||
}
|
49
tools/stress.cpp
Normal file
49
tools/stress.cpp
Normal file
@@ -0,0 +1,49 @@
|
||||
#include <iostream>
|
||||
|
||||
#include <cmath>
|
||||
|
||||
#include <omp.h>
|
||||
#include <cstring>
|
||||
#include <chrono>
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
|
||||
size_t numThreads = std::stoi(argv[1]);
|
||||
std::vector<size_t> totals(numThreads, 0);
|
||||
omp_set_num_threads(numThreads);
|
||||
|
||||
auto start = std::chrono::system_clock::now();
|
||||
double time = std::stod(argv[2]);
|
||||
|
||||
#pragma omp parallel
|
||||
{
|
||||
size_t tid = omp_get_thread_num();
|
||||
double a = rand();
|
||||
while (true) {
|
||||
for (size_t i = 0; i < 500; ++i) {
|
||||
double x;
|
||||
asm volatile(""::"r"(a));
|
||||
x = sqrt(a);
|
||||
asm volatile(""::"r"(x));
|
||||
|
||||
asm volatile(""::"r"(a));
|
||||
x = sqrt(a);
|
||||
asm volatile(""::"r"(x));
|
||||
}
|
||||
totals[tid] += 1000;
|
||||
auto elapsed = (std::chrono::system_clock::now() - start).count() / 1e9;
|
||||
if (elapsed > time) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
size_t sum = 0;
|
||||
for (auto t : totals) {
|
||||
sum += t;
|
||||
}
|
||||
std::cout << (double)sum / time << "\n";
|
||||
|
||||
};
|
Reference in New Issue
Block a user