41 Commits

Author SHA1 Message Date
Carl Pearson
fabfecd306 Update README.md
Some checks failed
CI / build_cuda10-1 (push) Failing after 10s
CI / build (push) Failing after 2s
2019-10-02 07:34:58 -05:00
Carl Pearson
4c0eabed89 add a tool to fix broken cpusets 2019-10-01 14:48:00 -05:00
Carl Pearson
46ca4d00ef perfect-cli cleans up on SIGINT, fixed a problem where cpu_set would silently fail 2019-10-01 14:31:36 -05:00
Carl Pearson
bbda6e1262 add interface for scheduling priority 2019-10-01 06:55:50 -05:00
Carl Pearson
343b2b35ca remove test from actions on CUDA job 2019-09-30 15:08:08 -05:00
Carl Pearson
c28e7b0945 add -h --help flag 2019-09-30 13:23:25 -05:00
Carl Pearson
46aa8c85ac run build/tools/perfect-cli -h in test step 2019-09-30 13:07:19 -05:00
Carl Pearson
7b6332c90e add test -h to binary 2019-09-30 12:07:29 -05:00
Carl Pearson
cc92923509 drop fs caches before each iteration 2019-09-30 12:04:52 -05:00
Carl Pearson
09e8757f72 . 2019-09-30 11:56:08 -05:00
Carl Pearson
1695ebb8ea Add -n flag, change --no-aslr to --aslr, add --stdout and --stderr, chown outputs when run with sudo 2019-09-30 11:51:04 -05:00
Carl Pearson
158bffa61f always change CPU turbo state 2019-09-26 12:30:10 -05:00
Carl Pearson
057fec7411 --no-cpu-turbo -> --cpu-turbo 2019-09-26 12:24:37 -05:00
Carl Pearson
a8d83417e8 add drop fs caches to tools/perfect-cli 2019-09-26 11:02:53 -05:00
Carl Pearson
1b3cf604a8 OsPerfState saves for all CPUs 2019-09-26 10:58:01 -05:00
Carl Pearson
d576ac099d add tools/perfect-cli 2019-09-26 10:37:26 -05:00
Carl Pearson
aff90d408e add NO_TASK result 2019-09-26 10:37:14 -05:00
Carl Pearson
6ace6932a7 simplify addrs 2019-09-26 08:56:46 -05:00
Carl Pearson
33243fe3bb add some discussion of ASLR tools 2019-09-25 15:49:20 -05:00
Carl Pearson
64eb67cc2d add tools/addrs 2019-09-25 15:45:18 -05:00
Carl Pearson
cd9a95365f changelog, bump version
Some checks failed
CI / build_cuda10-1 (push) Failing after 2s
CI / build (push) Failing after 3s
2019-09-25 12:31:56 -05:00
Carl Pearson
57bf39bb97 add stress, os_perf_state_minimum, min-os-perf, max-os-perf 2019-09-25 12:27:06 -05:00
Carl Pearson
c358f18c22 add enable-cpu-turbo, disable-cpu-turbo 2019-09-25 10:47:42 -05:00
Carl Pearson
37c61fe2fb bump version
Some checks failed
CI / build_cuda10-1 (push) Failing after 2s
CI / build (push) Failing after 2s
2019-09-25 10:05:55 -05:00
Carl Pearson
f2961b3075 v0.4.0 changelog 2019-09-25 10:05:42 -05:00
Carl Pearson
3a86aef546 Squashed commit of the following:
commit 3007df0d153ade6d328321dad8f88d63869159c8
Merge: 7557851 cd14d68
Author: Carl Pearson <pearson@illinois.edu>
Date:   Wed Sep 25 09:59:54 2019 -0500

    Merge branch 'master' into feature/aslr

commit 7557851508f0e1c2d75244267c5f78fb3d1ca303
Author: Carl Pearson <pearson@illinois.edu>
Date:   Tue Sep 24 07:50:15 2019 -0500

    wish list

commit 0bbbac31f354c16ae1942dbfe4683a66ec260498
Author: Carl Pearson <pearson@illinois.edu>
Date:   Tue Sep 24 07:49:36 2019 -0500

    ASLR documentation

commit f1ae37e057792696a739e30ecdbd09e071b8a7d4
Author: Carl Pearson <pearson@illinois.edu>
Date:   Tue Sep 24 07:45:54 2019 -0500

    add ASLR interface
2019-09-25 10:03:32 -05:00
Carl Pearson
cd14d68c47 check for existance using stat 2019-09-25 09:56:57 -05:00
Carl Pearson
2e32089786 improve reporting in detail/fs 2019-09-25 09:36:19 -05:00
Carl Pearson
7503a29a5c improve reporting in detail/fs 2019-09-25 09:34:45 -05:00
Carl Pearson
02e0c7c464 use detail::fs in detail/turbo 2019-09-25 08:54:46 -05:00
Carl Pearson
1682a05d08 finish get_string 2019-09-25 08:49:56 -05:00
Carl Pearson
14791badb1 fix powerpc namespace issues 2019-09-25 09:42:37 -04:00
Carl Pearson
7890d17b57 Update README.md 2019-09-25 08:34:07 -04:00
Carl Pearson
3bf2fd1df2 Update README.md 2019-09-24 12:00:12 -05:00
Carl Pearson
227c4ebb11 wish list 2019-09-24 06:59:41 -05:00
Carl Pearson
3ff80c86f2 related work 2019-09-24 06:54:47 -05:00
Carl Pearson
3da56a3a4a bump version
Some checks failed
CI / build_cuda10-1 (push) Failing after 3s
CI / build (push) Failing after 2s
2019-09-24 06:50:45 -05:00
Carl Pearson
196c8a20b1 changelog 2019-09-24 06:50:29 -05:00
Carl Pearson
d6c861719f Squashed commit of the following:
commit b96ddedf4ffbba57faaaf8bf18781a7abfb9d4c1
Author: Carl Pearson <pearson@illinois.edu>
Date:   Mon Sep 23 16:53:54 2019 -0500

    add newline to result.hpp

commit c7e9f6ff4775bf86f9af216cbe311f65bf985f63
Author: Carl Pearson <pearson@illinois.edu>
Date:   Mon Sep 23 16:53:09 2019 -0500

    add EPERM to fs operations

commit bac918fd022006cad0da899c06ac31e9db59a2fb
Author: Carl Pearson <pearson@illinois.edu>
Date:   Mon Sep 23 16:49:25 2019 -0500

    add filesystem cache interface
2019-09-23 16:54:33 -05:00
Carl Pearson
7f1b1289bb wish list 2019-09-20 16:26:47 -05:00
Carl Pearson
1c917c3154 minor fixes 2019-09-20 15:23:47 -05:00
31 changed files with 10195 additions and 65 deletions

View File

@@ -38,6 +38,7 @@ jobs:
g++ --version
nvcc --version
make VERBOSE=1
build:
runs-on: ubuntu-latest
steps:
@@ -61,3 +62,6 @@ jobs:
cd build
g++ --version
make VERBOSE=1
- name: test
run: |
build/tools/perfect-cli -h

View File

@@ -5,7 +5,7 @@
# 3.13+ for target_link_directories
cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
project(perfect LANGUAGES CXX VERSION 0.2.0)
project(perfect LANGUAGES CXX VERSION 0.5.0)
message(STATUS "Build type: " ${CMAKE_BUILD_TYPE})
include(CheckLanguage)

186
README.md
View File

@@ -2,12 +2,9 @@
| Branch | Status |
|-|-|
| master |[![Build Status](https://img.shields.io/endpoint.svg?url=https%3A%2F%2Factions-badge.atrox.dev%2Fcwpearson%2Fperfect%2Fbadge%3Fref%3Dmaster&style=flat)](https://actions-badge.atrox.dev/cwpearson/perfect/goto?ref=master)|
| master |[![Build Status](https://img.shields.io/endpoint.svg?url=https%3A%2F%2Factions-badge.atrox.dev%2Fcwpearson%2Fperfect%2Fbadge%3Fref%3Dmaster&style=flat)](https://actions-badge.atrox.dev/cwpearson/perfect/goto?ref=master) |
CPU/GPU performance control library for benchmarking
* x86
* POWER
* Nvidia
CPU/GPU Performance control library for benchmarking on Linux, x86, POWER, and Nvidia.
## Features
@@ -18,6 +15,12 @@ CPU/GPU performance control library for benchmarking
- [x] Disable GPU turbo (nvidia)
- [x] Flush addresses from cache (amd64, POWER)
- [x] CUDA not required (GPU functions will not be compiled)
- [x] Flush file system caches (linux)
- [x] Disable ASLR (linux)
- [x] process priority interface (linux)
## Contributors
* [Carl Pearson](https://cwpearson.github.io)
## Installing
@@ -57,7 +60,87 @@ If you don't have CUDA, then you could just do
g++ code_using_perfect.cpp -I perfect/include
```
## Usage
## Tools Usage
### tools/perfect-cli
`perfect` provides some useful tools on Linux:
```
$ tools/perfect-cli -h
SYNOPSIS
./tools/perfect-cli --no-mod [-n <INT>] -- <cmd>...
./tools/perfect-cli ([-u <INT>] | [-s <INT>]) [--no-drop-cache] [--no-max-perf] [--aslr]
[--cpu-turbo] [--stdout <PATH>] [--stderr <PATH>] [-n <INT>] -- <cmd>...
OPTIONS
--no-mod don't control performance
-u number of unshielded CPUs
-s number of shielded CPUs
--no-drop-cache do not drop filesystem caches
--no-max-perf do not max os perf
--aslr enable ASLR
--cpu-turbo enable CPU turbo
--stdout redirect child stdout
--stderr redirect child stderr
-n run multiple times
```
The basic usage is `tools/perfect-cli -- my-exe`, which will attempt to configure the system for repeatable performance before executing `my-exe`, and then restore the system to the original performance state before exiting.
Most modifications require elevated privileges.
The default behavior is to:
* disable ASLR
* set CPU performance to maximum
* disable CPU turbo
* drop filesystem caches before each iteration
Some options (all should provided before the `--` option):
* `--no-mod` flag will cause `perfect-cli` to not modify the system performance state
* `-n INT` will run the requested program `INT` times.
* `--stderr`/`--stdout` will redirect the program-under-test's stderr and stdout to the provided paths.
* `-s`/`-u`: set the number of shielded /unshielded CPUs. The program-under-test will run on the shielded CPUs. All other tasks will run on the unshielded CPUs.
A common invocation might look like:
```
sudo tools/perfect-cli -n 5 --stderr=run.err --stdout=run.out -- ./my-benchmark
```
This will disable ASLR, set CPU performance to maximum, disable CPU turbo, and then run `./my-benchmark` 5 times after dropping the filesystem cache before each run, redirecting stdout/stderr of ./my-benchmark to `run.out`/`run.err`.
The owner of `run.out` and `run.err` will be set to whichever user called `sudo`.
### tools/addr
Print the address of `main`, a stack variable, and a heap variable.
Useful for demoing ASLR.
### tools/no-aslr
Disable ASLR on the provided execution.
With ASLR, addresses are different with each invocation
```
$ tools/addr
main: 94685074364704
stack: 140734279743492
heap: 94685084978800
$ tools/addr
main: 93891046344992
stack: 140722671706708
heap: 93891068624496
```
Without ASLR, addresses are the same in each invocation
```
$ tools/no-aslr tools/addrs
main: 93824992233760
stack: 140737488347460
heap: 93824994414192
$ tools/no-aslr tools/addrs
main: 93824992233760
stack: 140737488347460
heap: 93824994414192
```
## API Usage
The `perfect` functions all return a `perfect::Result`, which is defined in [include/perfect/result.hpp].
When things are working, it will be `perfect::Result::SUCCESS`.
@@ -68,14 +151,26 @@ perfect::CpuTurboState state;
PERFECT(perfect::get_cpu_turbo_state(&state));
```
## Monitoring
### High Priority
`perfect` can set high scheduling priority for a process
See [examples/high_priority.cpp](examples/high_priority.cpp)
```c++
#include "perfect/priority.hpp"
```
* `Result set_high_priority()`: set the highest possible scheduling priority for the calling process
### Monitoring
`perfect` can monitor and record GPU activity.
See [examples/gpu_monitor.cu](examples/gpu_monitor.cu)
```c++
#include "perfect/cpu_monitor.hpp"
#include "perfect/gpu_monitor.hpp"
```
* `Monitor(std::ostream *stream)`: create a monitor that will write to `stream`.
@@ -84,11 +179,42 @@ See [examples/gpu_monitor.cu](examples/gpu_monitor.cu)
* `void Monitor::pause()`: pause the monitor thread
* `void Monitor::resume()`: resume the monitor thread
### Disable ASLR
`perfect` can disable ASLR
See [tools/no_aslr.cpp](tools/no_aslr.cpp)
```c++
#include "perfect/aslr.hpp"
```
* `Result disable_aslr()`: disable ASLR
* `Result get_aslr(AslrState &state)`: save the current ASLR state
* `Result set_aslr(const AslrState &state)`: set a previously-saved ASLR state
### Flush file system caches
`perfect` can drop various filesystem caches
See [tools/sync_drop_caches.cpp](tools/sync_drop_caches.cpp)
```c++
#include "perfect/drop_caches.hpp"
```
* `Result sync()`: flush filesystem caches to disk
* `Result drop_caches(DropCaches_t mode = DropCaches_t(PAGECACHE | ENTRIES))`: remove file system caches
* `mode = PAGECACHE`: drop page caches
* `mode = ENTRIES`: drop dentries and inodes
* `mode = PAGECACHE | ENTRIES`: both
### CPU Turbo
`perfect` can enable and disable CPU boost through the Intel p-state mechanism or the ACPI cpufreq mechanism.
See [examples/cpu_turbo.cpp].
See [examples/cpu_turbo.cpp](examples/cpu_turbo.cpp).
```c++
@@ -105,21 +231,21 @@ See [examples/cpu_turbo.cpp].
`perfect` can control the OS governor on linux.
See [examples/os_perf.cpp].
See [examples/os_perf.cpp](examples/os_perf.cpp).
```c++
#include "perfect/os_perf.hpp"
```
* `Result get_os_perf_state(OsPerfState *state, const int cpu)`: Save the current OS governor mode for CPU `cpu`.
* `Result get_os_perf_state(OsPerfState &state)`: Save the current OS governor mode for all CPUs.
* `Result os_perf_state_maximum(const int cpu)`: Set the OS governor to it's maximum performance mode.
* `Result set_os_perf_state(const int cpu, OsPerfState state)`: Restore a previously-saved OS governor mode.
* `Result set_os_perf_state(OsPerfState state)`: Restore a previously-saved OS governor mode.
### GPU Turbo
`perfect` can enable/disable GPU turbo boost.
See [examples/gpu_turbo.cu]
See [examples/gpu_turbo.cu](examples/gpu_turbo.cu).
```c++
#include "perfect/gpu_turbo.hpp"
@@ -135,7 +261,7 @@ See [examples/gpu_turbo.cu]
`perfect` can lock GPU clocks to their maximum values.
See [examples/gpu_clocks.cu]
See [examples/gpu_clocks.cu](examples/gpu_clocks.cu).
```c++
#include "perfect/gpu_clocks.hpp"
@@ -148,7 +274,7 @@ See [examples/gpu_clocks.cu]
`perfect` can flush data from CPU caches. Unlike the other APIs, these do not return a `Result` because they do not fail.
See [examples/cpu_cache.cpp].
See [examples/cpu_cache.cpp](examples/cpu_cache.cpp).
```c++
#include "perfect/cpu_cache.hpp"
@@ -156,8 +282,21 @@ See [examples/cpu_cache.cpp].
* `void flush_all(void *p, const size_t n)`: Flush all cache lines starting at `p` for `n` bytes.
## Changelog
* v0.5.0
* add tools/stress
* add tools/max-os-perf
* add tools/min-os-perf
* add tools/enable-cpu-turbo
* add tools/disable-cpu-turbo
* v0.4.0
* Add ASLR interface
* Disambiguate some filesystem errors
* Fix some powerpc namespace issues
* v0.3.0
* Add filesystem cache interface
* v0.2.0
* add GPU monitoring
* Make CUDA optional
@@ -171,4 +310,19 @@ See [examples/cpu_cache.cpp].
## Wish List
- [ ] nothing right now
- [ ] only monitor certain GPUs
- [ ] hyperthreading interface
## Related
* [LLVM benchmarking instructions](https://llvm.org/docs/Benchmarking.html#linux) covering ASLR, Linux governor, cpuset shielding, SMT, and Intel turbo.
* [easyperf.net blog post](https://easyperf.net/blog/2019/08/02/Perf-measurement-environment-on-Linux#2-disable-hyper-threading) discussing ACPI/Intel turbo, SMT, Linux governor, CPU affinity, process priority, file system caches, and ASLR.
* [parttimenerd/temci](https://github.com/parttimenerd/temci) benchmarking tool for cpu sheilding and disabling hyperthreading, among other things.
* [aclements/perflock](https://github.com/aclements/perflock) tool for locking CPU frequency scaling domains
* [lpechacek/cpuset](https://github.com/lpechacek/cpuset) python package/tool for managing CPU shielding
## Acks
* Uses [muellan/clipp](https://github.com/muellan/clipp) for cli option parsing.
* Uses [martinmoene/optional-lite](https://github.com/martinmoene/optional-lite).

View File

@@ -43,6 +43,9 @@ target_link_libraries(cpu-turbo perfect)
add_executable(os-perf os_perf.cpp)
target_link_libraries(os-perf perfect)
add_executable(high-priority high_priority.cpp)
target_link_libraries(high-priority perfect)
if(CMAKE_CUDA_COMPILER)
add_executable(gpu-clocks gpu_clocks.cu)
target_link_libraries(gpu-clocks perfect)

View File

@@ -0,0 +1,12 @@
#include <iostream>
#include "perfect/priority.hpp"
int main(void) {
perfect::init();
PERFECT(perfect::set_high_priority());
// do things with high process scheduling priority
}

View File

@@ -5,23 +5,20 @@
int main(void) {
perfect::init();
std::map<int, perfect::OsPerfState> states;
// os performance state for each cpu
perfect::OsPerfState state;
// store the current state
PERFECT(perfect::get_os_perf_state(state));
// max state for each cpu
for (auto cpu : perfect::cpus()) {
perfect::OsPerfState state;
perfect::Result result;
result = perfect::get_os_perf_state(&state, cpu);
if (perfect::Result::SUCCESS == result) {
states[cpu] = state;
}
perfect::os_perf_state_maximum(cpu);
PERFECT(perfect::os_perf_state_maximum(cpu));
}
// do things with all CPUs set to the maximum performancem mode by the OS
for (auto kv : states) {
int cpu = kv.first;
perfect::OsPerfState state = kv.second;
perfect::set_os_perf_state(cpu, state);
}
// restore original state
PERFECT(perfect::set_os_perf_state(state));
}

40
include/perfect/aslr.hpp Normal file
View File

@@ -0,0 +1,40 @@
#pragma once
#include <cerrno>
#include <iostream>
#ifdef __linux__
#include "detail/os/linux.hpp"
#endif
#include "init.hpp"
#include "result.hpp"
namespace perfect {
struct AslrState {
#ifdef __linux__
unsigned long persona;
#else
#error "unsupported platform"
#endif
};
Result get_aslr(AslrState &state) {
int persona;
PERFECT_SUCCESS_OR_RETURN(detail::get_personality(persona));
state.persona = persona;
return Result::SUCCESS;
}
Result set_aslr(const AslrState &state) {
return detail::set_personality(state.persona);
}
Result disable_aslr() {
int persona;
PERFECT_SUCCESS_OR_RETURN(detail::get_personality(persona));
persona |= ADDR_NO_RANDOMIZE;
return detail::set_personality(persona);
}
} // namespace perfect

385
include/perfect/cpu_set.hpp Normal file
View File

@@ -0,0 +1,385 @@
#pragma once
#include <sys/mount.h>
#include <sys/stat.h>
#include <unistd.h>
#include <algorithm>
#include <cstring>
#include <fstream>
#include <set>
#include <sstream>
#include <string>
#include <vector>
#include "detail/fs.hpp"
#include "init.hpp"
#include "result.hpp"
std::set<int> operator-(const std::set<int> &lhs, const std::set<int> &rhs) {
std::set<int> result;
for (auto e : lhs) {
if (0 == rhs.count(e)) {
result.insert(e);
}
}
return result;
}
// intersection
std::set<int> operator&(const std::set<int> &lhs, const std::set<int> &rhs) {
std::set<int> result;
for (auto e : lhs) {
if (1 == rhs.count(e)) {
result.insert(e);
}
}
return result;
}
std::string remove_space(const std::string &s) {
std::string result;
for (auto c : s) {
if (!isspace(c)) {
result += c;
}
}
return result;
}
// like "8" or "8-10"
std::set<int> parse_token(const std::string &token) {
// std::cerr << "parse_token: parsing '" << s << "'\n";
std::set<int> result;
std::string s = token;
// ignore empty string
if (s.empty()) {
return result;
}
// remove newline
s = remove_space(s);
size_t pos = 0;
int first = std::stoi(s, &pos);
// std::cerr << "parse_token: found '" << first << "'\n";
// single int
if (pos == s.length()) {
result.insert(first);
return result;
}
// next char should be a "-"
assert(s[pos] == '-');
std::string rest = s.substr(pos + 1);
int second = std::stoi(rest, &pos);
// std::cerr << "parse_token: found '" << second << "'\n";
// insert first-second
// std::cerr << "parse_token: range " << first << " to " << second << "\n";
for (int i = first; i <= second; ++i) {
result.insert(i);
}
return result;
}
std::set<int> parse_cpuset(const std::string &s) {
std::set<int> result;
std::string token;
std::stringstream ss(s);
while (std::getline(ss, token, ',')) {
if ("\n" != token) {
auto newCpus = parse_token(token);
for (auto cpu : newCpus) {
result.insert(cpu);
}
}
}
return result;
}
// http://man7.org/linux/man-pages/man7/cpuset.7.html
namespace perfect {
class CpuSet {
public:
std::string path_;
CpuSet *parent_;
CpuSet() : path_(""), parent_(nullptr) {}
CpuSet(const CpuSet &other) : path_(other.path_), parent_(other.parent_) {}
// make sure cpuset system is initialized
static Result init() {
// check for "nodev cpuset" in /proc/filesystems
// mkdir /dev/cpuset
if (mkdir("/dev/cpuset", S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)) {
switch (errno) {
case EEXIST: {
// okay
break;
}
case EACCES:
// std::cerr << "access error in mkdir: " << strerror(errno) << "\n";
return Result::NO_PERMISSION;
case ENOENT:
case EROFS:
default:
std::cerr << "unhandled error in mkdir: " << strerror(errno) << "\n";
return Result::UNKNOWN;
}
// mount -t cpuset none /dev/cpuset
if (mount("none", "/dev/cpuset", "cpuset", 0, nullptr)) {
switch (errno) {
case EEXIST: {
// okay
break;
}
case EBUSY: {
// FIXME: something is mounted here, assume it is what we want
// std::cerr << "EBUSY in mount: " << strerror(errno) << "\n";
return Result::SUCCESS;
}
case EPERM: {
// std::cerr << "EPERM in mount: " << strerror(errno) << "\n";
return Result::NO_PERMISSION;
}
case ENOENT:
case EROFS:
default:
std::cerr << "unhandled error in mount: " << strerror(errno) << "\n";
return Result::UNKNOWN;
}
}
}
return Result::SUCCESS;
}
std::string get_raw_cpus() const {
std::string path = path_ + "/cpuset.cpus";
std::ifstream is(path);
std::stringstream ss;
ss << is.rdbuf();
return remove_space(ss.str());
}
std::string get_raw_mems() const {
std::ifstream is(path_ + "/cpuset.mems");
std::stringstream ss;
ss << is.rdbuf();
return remove_space(ss.str());
}
std::set<int> get_cpus() const { return parse_cpuset(get_raw_cpus()); }
std::set<int> get_mems() const { return parse_cpuset(get_raw_mems()); }
// migrate the caller task from this cpu set to another
Result migrate_self_to(CpuSet &other) {
// enable memory migration in other
other.enable_memory_migration();
// get my pid
pid_t self = this_task();
// read this tasks and write each line to other.tasks
std::ifstream is(path_ + "/tasks");
std::string line;
while (std::getline(is, line)) {
line = remove_space(line);
if (std::to_string(self) == line) {
// std::cerr << "migrating self task " << line << " to " << other.path_
// << "\n";
pid_t pid = std::stoi(line);
return other.write_task(pid);
}
}
return Result::NO_TASK;
}
// migrate tasks in this cpu set to another
Result migrate_tasks_to(CpuSet &other) {
// other must have cpus and mems
auto s = other.get_cpus();
assert(!other.get_cpus().empty());
assert(!other.get_mems().empty());
// enable memory migration in other
PERFECT_SUCCESS_OR_RETURN(other.enable_memory_migration());
// read this tasks and write each line to other.tasks
std::ifstream is(path_ + "/tasks");
std::string line;
while (std::getline(is, line)) {
pid_t pid = std::stoi(line);
// std::cerr << "migrating task " << pid << " to " << other.path_ << "\n";
Result result = other.write_task(pid);
if (Result::ERRNO_INVALID == result) {
// std::cerr << "task " << pid << " is unmovable\n";
} else {
PERFECT_SUCCESS_OR_RETURN(result);
}
}
return Result::SUCCESS;
}
Result enable_memory_migration() {
return detail::write_str(path_ + "/cpuset.memory_migrate", "1");
}
Result write_task(pid_t pid) {
return detail::write_str(path_ + "/tasks", std::to_string(pid) + "\n");
}
static Result get_affinity(std::set<int> &cpus, pid_t pid) {
cpu_set_t mask;
CPU_ZERO(&mask);
if (sched_getaffinity(pid, sizeof(mask), &mask)) {
return from_errno(errno);
}
cpus.clear();
for (int i = 0; i < CPU_SETSIZE; ++i) {
if
CPU_ISSET(i, &mask) { cpus.insert(i); }
}
return Result::SUCCESS;
}
// object representing the root CPU set
static Result get_root(CpuSet &root) {
PERFECT_SUCCESS_OR_RETURN(CpuSet::init());
root.path_ = "/dev/cpuset";
root.parent_ = nullptr;
return Result::SUCCESS;
}
// the ID of this task
static pid_t this_task() { return getpid(); }
Result make_child(CpuSet &child, const std::string &name) {
if (mkdir((path_ + "/" + name).c_str(),
S_IRUSR | S_IWUSR | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH)) {
switch (errno) {
case EEXIST: {
// okay
break;
}
case EACCES:
return Result::NO_PERMISSION;
default:
return Result::UNKNOWN;
}
}
child.path_ = path_ + "/" + name;
child.parent_ = this;
return Result::SUCCESS;
}
std::vector<CpuSet> get_children() { assert(false && "unimplemented"); }
Result enable_cpu(const int cpu) {
std::set<int> cpus = get_cpus();
cpus.insert(cpu);
return write_cpus(cpus);
}
Result enable_cpus(const std::set<int> &cpus) {
std::set<int> finalCpus = get_cpus();
for (auto cpu : cpus) {
finalCpus.insert(cpu);
}
return write_cpus(finalCpus);
}
Result write_cpus(std::set<int> cpus) {
std::string str;
bool comma = false;
for (auto cpu : cpus) {
if (comma)
str += ",";
str += std::to_string(cpu) + "-" + std::to_string(cpu);
comma = true;
}
return detail::write_str(path_ + "/cpuset.cpus", str);
}
Result write_mems(std::set<int> mems) {
std::string str;
bool comma = false;
for (auto mem : mems) {
if (comma)
str += ",";
str += std::to_string(mem) + "-" + std::to_string(mem);
comma = true;
}
return detail::write_str(path_ + "/cpuset.mems", str);
}
Result enable_mem(const int mem) {
std::set<int> mems = get_mems();
mems.insert(mem);
return write_mems(mems);
}
Result enable_mems(const std::set<int> &mems) {
std::set<int> finalMems = get_mems();
for (auto mem : mems) {
finalMems.insert(mem);
}
return write_mems(finalMems);
}
Result destroy() {
// already destroyed
if (!detail::path_exists(path_)) {
return Result::SUCCESS;
}
// remove all child cpu sets
// move all attached processes back to parent
assert(parent_ && "should not call destroy on root cpuset");
PERFECT_SUCCESS_OR_RETURN(migrate_tasks_to(*parent_));
// remove with rmdir
Result result = Result::UNKNOWN;
if (rmdir(path_.c_str())) {
switch (errno) {
case ENOENT:
// already gone
result = Result::SUCCESS;
break;
default:
std::cerr << "unhandled error in rmdir: " << strerror(errno) << "\n";
result = Result::UNKNOWN;
}
}
path_ = "";
return result;
}
};
std::ostream &operator<<(std::ostream &s, const CpuSet &c) {
s << c.path_;
return s;
}
} // namespace perfect

View File

@@ -1,5 +1,8 @@
#pragma once
namespace perfect {
namespace detail {
inline void flush_line(void *p) {
/*
@@ -32,4 +35,7 @@ inline void barrier_all() {
: "memory");
}
}
}
}

View File

@@ -0,0 +1,57 @@
#pragma once
#include <cstring>
#include <fstream>
#include <string>
#include "../result.hpp"
#ifdef __linux__
#include "fs/linux.hpp"
#else
#error "unsupported platform"
#endif
namespace perfect {
namespace detail {
Result write_str(const std::string &path, const std::string &val) {
if (!path_exists(path)) {
std::cerr << "write_str(): does not exist: " << path << "\n";
return Result::NOT_SUPPORTED;
}
std::ofstream ofs(path);
if (ofs.fail()) {
std::cerr << "failed to open " << path << "\n";
return Result::NO_PERMISSION;
}
ofs << val;
ofs.close();
if (ofs.fail()) {
switch (errno) {
case EACCES:
// std::cerr << "EACCES when writing to " << path << "\n";
return Result::NO_PERMISSION;
case EPERM:
// std::cerr << "EPERM when writing to " << path << "\n";
return Result::NO_PERMISSION;
case ENOENT:
// std::cerr << "ENOENT when writing to " << path << "\n";
return Result::NOT_SUPPORTED;
case EINVAL:
// std::cerr << "EINVAL when writing to " << path << "\n";
return Result::ERRNO_INVALID;
default:
std::cerr << strerror(errno) << " when writing " << val << " to " << path
<< "\n";
return Result::UNKNOWN;
}
}
return Result::SUCCESS;
}
} // namespace detail
} // namespace perfect

View File

@@ -0,0 +1,31 @@
#pragma once
#include <string>
#include <cerrno>
#include <iostream>
#include <cassert>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
namespace perfect {
namespace detail {
bool path_exists(const std::string &path) {
struct stat sb;
if (stat(path.c_str(), &sb)) {
switch (errno) {
case ENOENT: return false;
case ENOTDIR: return false;
default: {
std::cerr << "unhandled error in stat() for " << path << "\n";
assert(0);
}
}
}
return true;
}
}
}

View File

@@ -12,6 +12,9 @@
#include <sched.h>
#include <sys/types.h>
#include <unistd.h>
#include <sys/personality.h>
#include <sys/time.h>
#include <sys/resource.h>
#include "perfect/result.hpp"
@@ -88,4 +91,44 @@ size_t cache_linesize() {
#endif
}
namespace detail {
Result get_personality(int &persona) {
int ret = personality(0xffffffff);
if (-1 == ret) {
return Result::UNKNOWN;
} else {
persona = ret;
}
return Result::SUCCESS;
}
Result set_personality(const int persona) {
int ret = personality(persona);
if (-1 == ret) {
return Result::UNKNOWN;
}
return Result::SUCCESS;
}
// give the calling process the highest priority
Result set_high_priority() {
if (setpriority(PRIO_PROCESS, 0, -20)) {
return from_errno(errno);
}
return Result::SUCCESS;
}
// disable all but one SMT thread for all CPUs the calling process can run on
Result disable_smt() {
return Result::NOT_SUPPORTED;
}
// enable SMT for all CPUs the calling process can run on
Result enable_smt() {
return Result::NOT_SUPPORTED;
}
} // namespace detail
} // namespace perfect

View File

@@ -4,6 +4,7 @@
#include <fstream>
#include "perfect/result.hpp"
#include "perfect/detail/fs.hpp"
namespace perfect {
namespace detail {
@@ -15,19 +16,12 @@ bool has_intel_pstate_no_turbo() {
Result write_intel_pstate_no_turbo(const std::string &s) {
assert(has_intel_pstate_no_turbo());
std::string path("/sys/devices/system/cpu/intel_pstate/no_turbo");
std::ofstream ofs(path, std::ofstream::out);
ofs << s;
ofs.close();
if (ofs.fail()) {
return Result::NO_PERMISSION;
}
return Result::SUCCESS;
return write_str(path, s);
}
std::string read_intel_pstate_no_turbo() {
assert(has_intel_pstate_no_turbo());
std::string path("/sys/devices/system/cpu/intel_pstate/no_turbo");
// SPDLOG_LOGGER_TRACE(logger::console(), "reading {}", path);
std::ifstream ifs(path, std::ifstream::in);
std::string result;
std::getline(ifs, result);

View File

@@ -1,31 +1,24 @@
#pragma once
#include "perfect/result.hpp"
#include "perfect/detail/fs.hpp"
namespace perfect {
namespace detail {
bool has_acpi_cpufreq_boost() {
return bool(std::ifstream("/sys/devices/system/cpu/cpufreq/boost"));
}
int write_acpi_cpufreq_boost(const std::string &s) {
Result write_acpi_cpufreq_boost(const std::string &s) {
assert(has_acpi_cpufreq_boost());
std::string path("/sys/devices/system/cpu/cpufreq/boost");
SPDLOG_LOGGER_TRACE(logger::console(), "writing to {}", path);
std::ofstream ofs(path, std::ofstream::out);
ofs << s;
ofs.close();
if (ofs.fail()) {
SPDLOG_LOGGER_TRACE(logger::console(), "error writing to {}", path);
return 1;
}
return 0;
return write_str(path, s);
}
std::string read_acpi_cpufeq_boost() {
assert(has_acpi_cpufreq_boost());
std::string path("/sys/devices/system/cpu/cpufreq/boost");
SPDLOG_LOGGER_TRACE(logger::console(), "reading {}", path);
std::ifstream ifs(path, std::ifstream::in);
std::string result;
std::getline(ifs, result);
@@ -37,11 +30,12 @@ std::string read_acpi_cpufeq_boost() {
}
Result disable_cpu_turbo() {
write_acpi_cpufeq_boost("0");
return write_acpi_cpufreq_boost("0");
}
Result enable_cpu_turbo() {
write_acpi_cpufeq_boost("1");
return write_acpi_cpufreq_boost("1");
}
}
} // namespace detail
} // namespace perfect

View File

@@ -0,0 +1,43 @@
#pragma once
#include <unistd.h>
#include <fstream>
#include <iostream>
#include "result.hpp"
#include "init.hpp"
#include "detail/fs.hpp"
namespace perfect {
enum DropCaches_t {
PAGECACHE = 0x1,
ENTRIES = 0x2
};
// commit filesystem caches to disk
Result sync() {
// http://man7.org/linux/man-pages/man2/sync.2.html
::sync(); // always successful
return Result::SUCCESS;
}
Result drop_caches(const DropCaches_t mode = DropCaches_t(PAGECACHE | ENTRIES)) {
using detail::write_str;
const std::string path = "/proc/sys/vm/drop_caches";
if (mode & PAGECACHE & ENTRIES) {
PERFECT_SUCCESS_OR_RETURN(write_str(path, "3"));
} else if (mode & PAGECACHE) {
PERFECT_SUCCESS_OR_RETURN(write_str(path, "1"));
} else if (mode & ENTRIES) {
PERFECT_SUCCESS_OR_RETURN(write_str(path, "2"));
} else {
std::cerr << "unexpected mode: " << mode << "\n";
return Result::UNKNOWN;
}
return Result::SUCCESS;
}
}

View File

@@ -3,6 +3,7 @@
#include <vector>
#include <string>
#include <cassert>
#include <map>
#ifdef __linux__
#include "detail/os/linux.hpp"
@@ -17,19 +18,23 @@ namespace perfect {
struct OsPerfState {
#ifdef __linux__
std::string governor;
std::map<int, std::string> governors;
#else
#error "unsupported platform"
#endif
};
Result get_os_perf_state(OsPerfState *state, const int cpu) {
assert(state);
Result get_os_perf_state(OsPerfState &state) {
#ifdef __linux__
return get_governor(state->governor, cpu);
for (auto cpu : cpus()) {
std::string gov;
PERFECT_SUCCESS_OR_RETURN(get_governor(gov, cpu));
state.governors[cpu] = gov;
}
#else
#error "unsupported platform"
#endif
return Result::SUCCESS;
}
Result os_perf_state_maximum(const int cpu) {
@@ -40,13 +45,23 @@ Result os_perf_state_maximum(const int cpu) {
#endif
}
Result set_os_perf_state(const int cpu, OsPerfState state) {
#ifdef __linux__
return set_governor(cpu, state.governor);
Result os_perf_state_minimum(const int cpu) {
#ifdef __linux__
return set_governor(cpu, "powersave");
#else
#error "unsupported platform"
#endif
}
Result set_os_perf_state(OsPerfState state) {
#ifdef __linux__
for (auto kv : state.governors) {
PERFECT_SUCCESS_OR_RETURN(set_governor(kv.first, kv.second));
}
#else
#error "unsupported platform"
#endif
return Result::SUCCESS;
}
};

View File

@@ -0,0 +1,15 @@
#pragma once
#ifdef __linux__
#include "detail/os/linux.hpp"
#else
#error "unsupported platform"
#endif
#include "init.hpp"
namespace perfect {
Result set_high_priority() {
return detail::set_high_priority();
}
}

View File

@@ -12,11 +12,17 @@
#include <nvml.h>
#endif
#ifdef __linux__
#include <cerrno>
#endif
namespace perfect {
enum class Result {
NO_PERMISSION,
NOT_SUPPORTED,
NO_TASK,
ERRNO_INVALID,
NVML_NO_PERMISSION,
NVML_NOT_SUPPORTED,
NVML_UNINITIALIZED,
@@ -38,6 +44,23 @@ Result from_nvml(nvmlReturn_t nvml) {
case NVML_ERROR_INVALID_ARGUMENT:
case NVML_ERROR_GPU_IS_LOST:
case NVML_ERROR_UNKNOWN:
case NVML_ERROR_ALREADY_INITIALIZED:
case NVML_ERROR_NOT_FOUND:
case NVML_ERROR_INSUFFICIENT_SIZE:
case NVML_ERROR_INSUFFICIENT_POWER:
case NVML_ERROR_DRIVER_NOT_LOADED:
case NVML_ERROR_TIMEOUT:
case NVML_ERROR_IRQ_ISSUE:
case NVML_ERROR_LIBRARY_NOT_FOUND:
case NVML_ERROR_FUNCTION_NOT_FOUND:
case NVML_ERROR_CORRUPTED_INFOROM:
case NVML_ERROR_RESET_REQUIRED:
case NVML_ERROR_OPERATING_SYSTEM:
case NVML_ERROR_LIB_RM_VERSION_MISMATCH:
case NVML_ERROR_IN_USE:
case NVML_ERROR_MEMORY:
case NVML_ERROR_NO_DATA:
case NVML_ERROR_VGPU_ECC_NOT_SUPPORTED:
default:
assert(0 && "unhandled nvmlReturn_t");
}
@@ -45,20 +68,37 @@ Result from_nvml(nvmlReturn_t nvml) {
}
#endif
#ifdef __linux__
Result from_errno(int err) {
switch (err) {
default:
assert(0 && "unhandled errno");
}
return Result::UNKNOWN;
}
#endif
const char *get_string(const Result &result) {
switch (result) {
case Result::SUCCESS:
return "success";
case Result::NO_PERMISSION:
return "no permission";
case Result::NOT_SUPPORTED:
return "unsupported operation";
case Result::NO_TASK:
return "no such task";
case Result::ERRNO_INVALID:
return "errno EINVAL";
case Result::UNKNOWN:
return "unknown error";
case Result::NVML_NOT_SUPPORTED:
return "nvidia-ml returned not supported";
case Result::NVML_NO_PERMISSION:
return "nvidia-ml returned no permission";
case Result::NOT_SUPPORTED:
return "unsupported operation";
case Result::NVML_UNINITIALIZED:
return "nvidia-ml returned uninitialized";
default:
assert(0 && "unexpected perfect::Result");
}
@@ -78,3 +118,12 @@ inline void check(Result result, const char *file, const int line) {
} // namespace perfect
#define PERFECT(stmt) check(stmt, __FILE__, __LINE__);
#define PERFECT_SUCCESS_OR_RETURN(stmt) \
{ \
Result _ret; \
_ret = (stmt); \
if (_ret != Result::SUCCESS) { \
return _ret; \
} \
}

View File

@@ -34,6 +34,37 @@ set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} \
-Wfatal-errors\
")
add_executable(enable-turbo enable_turbo.cpp)
target_link_libraries(enable-turbo perfect)
add_executable(enable-cpu-turbo enable_cpu_turbo.cpp)
target_link_libraries(enable-cpu-turbo perfect)
add_executable(disable-cpu-turbo disable_cpu_turbo.cpp)
target_link_libraries(disable-cpu-turbo perfect)
add_executable(sync-drop-caches sync_drop_caches.cpp)
target_link_libraries(sync-drop-caches perfect)
add_executable(no-aslr no_aslr.cpp)
target_link_libraries(no-aslr perfect)
add_executable(max-os-perf max_os_perf.cpp)
target_link_libraries(max-os-perf perfect)
add_executable(min-os-perf min_os_perf.cpp)
target_link_libraries(min-os-perf perfect)
add_executable(addrs addrs.cpp)
add_executable(perfect-cli perfect.cpp)
target_link_libraries(perfect-cli perfect)
target_include_directories(perfect-cli PUBLIC thirdparty)
## OpenMP
find_package(OpenMP)
if (OpenMP_FOUND)
add_executable(stress stress.cpp)
target_link_libraries(stress perfect)
target_link_libraries(stress OpenMP::OpenMP_CXX)
else(OpenMP_FOUND)
message(WARNING "didn't find OpenMP, some benchmarks will be unavailable.")
endif(OpenMP_FOUND)

9
tools/addrs.cpp Normal file
View File

@@ -0,0 +1,9 @@
#include <iostream>
int main(void) {
int *a = new int;
std::cout << "main: " << uintptr_t(main) << "\n";
std::cout << "stack: " << uintptr_t(&a) << "\n";
std::cout << "heap: " << uintptr_t(a) << "\n";
delete a;
}

View File

@@ -0,0 +1,23 @@
#include <iostream>
#include "perfect/cpu_turbo.hpp"
using namespace perfect;
int main(void) {
CpuTurboState state;
perfect::init();
PERFECT(get_cpu_turbo_state(&state));
if (!is_turbo_enabled(state)) {
std::cerr << "cpu turbo already disabled\n";
exit(EXIT_SUCCESS);
} else {
PERFECT(disable_cpu_turbo());
std::cerr << "disabled cpu turbo\n";
exit(EXIT_SUCCESS);
}
}

View File

@@ -6,7 +6,6 @@ using namespace perfect;
int main(void) {
Result ret;
CpuTurboState state;
perfect::init();

9
tools/max_os_perf.cpp Normal file
View File

@@ -0,0 +1,9 @@
#include "perfect/os_perf.hpp"
int main(void) {
PERFECT(perfect::init());
for (auto cpu : perfect::cpus()) {
PERFECT(perfect::os_perf_state_maximum(cpu));
}
}

14
tools/migrate-to-cpuset.sh Executable file
View File

@@ -0,0 +1,14 @@
#! /bin/bash
while read i; do
echo $i;
echo $i > /dev/cpuset/tasks;
done < /dev/cpuset/unshielded/tasks
while read i; do
echo $i;
echo $i > /dev/cpuset/tasks;
done < /dev/cpuset/shielded/tasks
rmdir /dev/cpuset/shielded
rmdir /dev/cpuset/unshielded

9
tools/min_os_perf.cpp Normal file
View File

@@ -0,0 +1,9 @@
#include "perfect/os_perf.hpp"
int main(void) {
PERFECT(perfect::init());
for (auto cpu : perfect::cpus()) {
PERFECT(perfect::os_perf_state_minimum(cpu));
}
}

77
tools/no_aslr.cpp Normal file
View File

@@ -0,0 +1,77 @@
#include <iostream>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#include <cstring>
#include <vector>
#include "perfect/aslr.hpp"
using namespace perfect;
int main(int argc, char **argv) {
using namespace perfect;
PERFECT(init());
pid_t pid;
int status;
pid = fork();
if (pid == -1) {
// pid == -1 means error occured
std::cerr << "can't fork, error occured\n";
exit(EXIT_FAILURE);
} else if (pid == 0) {
// in the child process
// skip the first argument, which is this program
std::vector<char*> args;
for (int i = 1; i < argc; ++i) {
args.push_back(argv[i]);
}
assert(args.size() > 0);
args.push_back(nullptr);
PERFECT(disable_aslr());
// the execv() only return if error occured.
// The return value is -1
return execvp(args[0], args.data());
} else {
// parent process
if (waitpid(pid, &status, 0) > 0) {
if (WIFEXITED(status) && !WEXITSTATUS(status)) {
// success
exit(status);
}
else if (WIFEXITED(status) && WEXITSTATUS(status)) {
if (WEXITSTATUS(status) == 127) {
// execv failed
std::cerr << "execv failed\n";
exit(status);
} else {
std::cerr << "program terminated normally, but returned a non-zero status\n";
exit(status);
}
} else {
printf("program didn't terminate normally\n");
exit(status);
}
} else {
// waitpid() failed
printf("waitpid() failed\n");
exit(EXIT_FAILURE);
}
exit(0);
}
return 0;
}

446
tools/perfect.cpp Normal file
View File

@@ -0,0 +1,446 @@
#include <cassert>
#include <cerrno>
#include <chrono>
#include <functional>
#include <iostream>
#include <string>
#include <thread>
#include <vector>
#ifdef __linux__
#include <fcntl.h>
#include <pwd.h>
#include <signal.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#else
#error "unsupported platform"
#endif
#include "clipp/clipp.h"
#include "nonstd/optional.hpp"
#include "perfect/aslr.hpp"
#include "perfect/cpu_set.hpp"
#include "perfect/cpu_turbo.hpp"
#include "perfect/detail/os/linux.hpp"
#include "perfect/drop_caches.hpp"
#include "perfect/os_perf.hpp"
#include "perfect/priority.hpp"
typedef std::function<perfect::Result()> CleanupFn;
std::vector<CleanupFn> cleanups;
// restore the system state to how we found it
void cleanup(int dummy) {
(void)dummy;
std::cerr << "caught ctrl-c\n";
// unregister our handler
signal(SIGINT, SIG_DFL);
std::cerr << "cleaning up\n";
std::cerr << "ctrl-c again to quit\n";
for (auto f : cleanups) {
perfect::Result result = f();
}
exit(EXIT_FAILURE);
}
// argv should be null-terminated
// outf and errf are file descriptors to where stdout and stderr should be
// redirected write stdout to out and stderr to err, if not null
int fork_child(char *const *argv, int outf, int errf) {
pid_t pid;
int status;
pid = fork();
if (pid == -1) {
// pid == -1 means error occured
std::cerr << "can't fork, error occured\n";
return EXIT_FAILURE;
} else if (pid == 0) {
// in the child process
if (outf > 0) {
std::cerr << "redirecting child stdout to file\n";
if (dup2(outf, 1)) {
std::cerr << "dup2 error: " << strerror(errno) << "\n";
/*
EBADF
oldfd isn't an open file descriptor, or newfd is out of the allowed
range for file descriptors. EBUSY (Linux only) This may be returned by
dup2() or dup3() during a race condition with open(2) and dup(). EINTR The
dup2() or dup3() call was interrupted by a signal; see signal(7). EINVAL
(dup3()) flags contain an invalid value. Or, oldfd was equal to newfd.
EMFILE
The process already has the maximum number of file descriptors open and
tried to open a new one.
*/
}
if (close(outf)) {
/*
EBADF
The fildes argument is not a valid file descriptor.
EINTR
The close() function was interrupted by a signal.
The close() function may fail if:
EIO
An I/O error occurred while reading from or writing to the file
system.
*/
}
}
if (errf > 0) {
std::cerr << "redirecting child stderr to file\n";
if (dup2(errf, 2)) {
std::cerr << "dup2 error: " << strerror(errno) << "\n";
/*
EBADF
oldfd isn't an open file descriptor, or newfd is out of the allowed
range for file descriptors. EBUSY (Linux only) This may be returned by
dup2() or dup3() during a race condition with open(2) and dup(). EINTR The
dup2() or dup3() call was interrupted by a signal; see signal(7). EINVAL
(dup3()) flags contain an invalid value. Or, oldfd was equal to newfd.
EMFILE
The process already has the maximum number of file descriptors open and
tried to open a new one.
*/
}
if (close(errf)) {
/*
EBADF
The fildes argument is not a valid file descriptor.
EINTR
The close() function was interrupted by a signal.
The close() function may fail if:
EIO
An I/O error occurred while reading from or writing to the file system.
*/
}
}
// the execv() only return if error occured.
// The return value is -1
return execvp(argv[0], argv);
} else {
// parent process
if (waitpid(pid, &status, 0) > 0) {
if (WIFEXITED(status) && !WEXITSTATUS(status)) {
// success
return status;
}
else if (WIFEXITED(status) && WEXITSTATUS(status)) {
if (WEXITSTATUS(status) == 127) {
std::cerr << "execv failed\n";
return status;
} else {
std::cerr << "program terminated normally, but returned a non-zero "
"status\n";
return status;
}
} else {
printf("program didn't terminate normally\n");
return status;
}
} else {
printf("waitpid() failed\n");
return EXIT_FAILURE;
}
return 0;
}
}
int main(int argc, char **argv) {
signal(SIGINT, cleanup);
using namespace clipp;
size_t numUnshielded = 0;
size_t numShielded = 0;
bool aslr = false;
nonstd::optional<bool> cpuTurbo = false;
nonstd::optional<bool> maxOsPerf = true;
bool dropCaches = true;
bool highPriority = true;
std::vector<std::string> program;
std::string stdoutPath;
std::string stderrPath;
int iters = 1;
int sleepMs = 1000;
bool help = false;
auto helpMode = option("-h", "--help").set(help).doc("show help");
auto shieldGroup = ((option("-u").doc("number of unshielded CPUs") &
value("INT", numUnshielded)) |
(option("-s").doc("number of shielded CPUs") &
value("INT", numShielded)));
auto noModMode = (option("--no-mod")
.doc("don't control performance")
.set(aslr, true)
.call([&]() { cpuTurbo = nonstd::nullopt; })
.call([&]() { maxOsPerf = nonstd::nullopt; })
.set(dropCaches, false)
.set(highPriority, false));
auto modMode = (shieldGroup,
option("--no-drop-cache")
.set(dropCaches, false)
.doc("do not drop filesystem caches"),
option("--no-max-perf").doc("do not max os perf").call([&]() {
maxOsPerf = false;
}),
option("--aslr").set(aslr, true).doc("enable ASLR"),
option("--no-priority")
.set(highPriority, false)
.doc("don't set high priority"),
option("--cpu-turbo").doc("enable CPU turbo").call([&]() {
cpuTurbo = true;
}),
(option("--stdout").doc("redirect child stdout") &
value("PATH", stdoutPath)),
(option("--stderr").doc("redirect child stderr") &
value("PATH", stderrPath)));
auto cli =
helpMode |
((noModMode | modMode),
(option("--sleep-ms").doc("sleep before run") & value("INT", sleepMs)),
(option("-n").doc("run multiple times") & value("INT", iters)), helpMode,
// run everything after "--"
required("--") & greedy(values("cmd", program))
);
if (!parse(argc, argv, cli)) {
auto fmt = doc_formatting{}.doc_column(31);
std::cout << make_man_page(cli, argv[0], fmt);
return -1;
}
if (help) {
auto fmt = doc_formatting{}.doc_column(31);
std::cout << make_man_page(cli, argv[0], fmt);
return 0;
}
// open the redirect files, if needed
int errf = 0;
int outf = 0;
if (!stderrPath.empty()) {
std::cerr << "open " << stderrPath << "\n";
errf = open(stderrPath.c_str(), O_WRONLY | O_CREAT,
S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
if (-1 == errf) {
std::cerr << "error while opening " << stderrPath << ": "
<< strerror(errno) << "\n";
}
}
if (!stdoutPath.empty()) {
outf = open(stdoutPath.c_str(), O_WRONLY | O_CREAT,
S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
if (-1 == outf) {
std::cerr << "error while opening " << stdoutPath << ": "
<< strerror(errno) << "\n";
}
}
// if called with sudo, chown the files to whoever called sudo
const char *sudoUser = std::getenv("SUDO_USER");
if (sudoUser) {
std::cerr << "called with sudo by " << sudoUser << "\n";
uid_t uid;
gid_t gid;
struct passwd *pwd;
pwd = getpwnam(sudoUser);
if (pwd == NULL) {
// die("Failed to get uid");
}
uid = pwd->pw_uid;
gid = pwd->pw_gid;
if (!stdoutPath.empty()) {
if (chown(stdoutPath.c_str(), uid, gid) == -1) {
// die("chown fail");
}
}
if (!stderrPath.empty()) {
if (chown(stderrPath.c_str(), uid, gid) == -1) {
// die("chown fail");
}
}
}
// build the program arguments
std::vector<char *> args;
for (auto &c : program) {
args.push_back((char *)c.c_str());
}
args.push_back(nullptr);
// init the perfect library
PERFECT(perfect::init());
auto cpus = perfect::cpus();
if (0 < numShielded) {
numUnshielded = cpus.size() - numShielded;
} else if (0 < numUnshielded) {
numShielded = cpus.size() - numUnshielded;
}
// handle CPU shielding
perfect::CpuSet root, shielded, unshielded;
if (numShielded) {
std::cerr << "shielding " << numShielded << " cpus\n";
PERFECT(perfect::CpuSet::get_root(root));
PERFECT(root.make_child(shielded, "shielded"));
PERFECT(root.make_child(unshielded, "unshielded"));
std::cerr << "enable memory\n";
PERFECT(shielded.enable_mem(0));
PERFECT(unshielded.enable_mem(0));
std::cerr << "enable cpus\n";
size_t i = 0;
for (; i < cpus.size() - numShielded; ++i) {
std::cerr << "unshield cpu " << cpus[i] << "\n";
unshielded.enable_cpu(cpus[i]);
}
for (; i < cpus.size(); ++i) {
std::cerr << "shield cpu " << cpus[i] << "\n";
shielded.enable_cpu(cpus[i]);
}
std::cerr << "migrate self\n";
PERFECT(root.migrate_self_to(shielded));
std::cerr << "migrate other (1/2)\n";
PERFECT(root.migrate_tasks_to(unshielded));
// some tasks may have been spawned by unmigrated tasks while we migrated
std::cerr << "migrate other (2/2)\n";
PERFECT(root.migrate_tasks_to(unshielded));
cleanups.push_back(CleanupFn([&] {
std::cerr << "cleanup: shielded cpu set\n";
shielded.destroy();
std::cerr << "cleanup: unshielded cpu set\n";
unshielded.destroy();
return perfect::Result::SUCCESS;
}));
}
// handle aslr
if (!aslr) {
std::cerr << "disable ASLR for this process\n";
PERFECT(perfect::disable_aslr());
}
// handle CPU turbo
perfect::CpuTurboState cpuTurboState;
if (cpuTurbo.has_value()) {
PERFECT(perfect::get_cpu_turbo_state(&cpuTurboState));
if (false == cpuTurbo) {
std::cerr << "disabling cpu turbo\n";
PERFECT(perfect::disable_cpu_turbo());
} else {
std::cerr << "enabling cpu turbo\n";
PERFECT(perfect::enable_cpu_turbo());
}
cleanups.push_back(CleanupFn([&] {
std::cerr << "cleanup: restore CPU turbo state\n";
return perfect::set_cpu_turbo_state(cpuTurboState);
}));
}
// handle governor
perfect::OsPerfState osPerfState;
if (maxOsPerf.has_value()) {
PERFECT(perfect::get_os_perf_state(osPerfState));
if (true == maxOsPerf) {
std::cerr << "set max performance state\n";
for (auto cpu : perfect::cpus()) {
PERFECT(perfect::os_perf_state_maximum(cpu));
}
}
cleanups.push_back(CleanupFn([&] {
std::cerr << "cleanup: os governor\n";
return perfect::set_os_perf_state(osPerfState);
}));
}
if (highPriority) {
std::cerr << "set high priority\n";
PERFECT(perfect::set_high_priority());
}
// parent should return
for (int runIter = 0; runIter < iters; ++runIter) {
// drop filesystem caches before each run
if (dropCaches) {
std::cerr << "clearing file system cache\n";
PERFECT(perfect::drop_caches());
}
// sleep before each run
if (sleepMs) {
std::cerr << "sleep " << sleepMs << " ms before run\n";
std::this_thread::sleep_for(std::chrono::milliseconds(sleepMs));
}
std::cerr << "exec ";
for (size_t i = 0; i < args.size() - 1; ++i) {
std::cerr << args[i] << " ";
}
std::cerr << "\n";
int status = fork_child(args.data(), outf, errf);
if (0 != status) {
std::cerr << "did not terminate successfully\n";
}
std::cerr << "finished execution\n";
}
// clean up CpuSets (if needed)
if (numShielded) {
std::cerr << "clean up cpu sets\n";
shielded.destroy();
unshielded.destroy();
}
// restore original turbo state
if (cpuTurbo.has_value()) {
std::cerr << "restore CPU turbo\n";
PERFECT(perfect::set_cpu_turbo_state(cpuTurboState));
}
if (maxOsPerf.has_value()) {
std::cerr << "restore os performance state\n";
PERFECT(perfect::set_os_perf_state(osPerfState));
}
return 0;
}

49
tools/stress.cpp Normal file
View File

@@ -0,0 +1,49 @@
#include <iostream>
#include <cmath>
#include <omp.h>
#include <cstring>
#include <chrono>
#include <vector>
#include <iostream>
int main(int argc, char **argv) {
size_t numThreads = std::stoi(argv[1]);
std::vector<size_t> totals(numThreads, 0);
omp_set_num_threads(numThreads);
auto start = std::chrono::system_clock::now();
double time = std::stod(argv[2]);
#pragma omp parallel
{
size_t tid = omp_get_thread_num();
double a = rand();
while (true) {
for (size_t i = 0; i < 500; ++i) {
double x;
asm volatile(""::"r"(a));
x = sqrt(a);
asm volatile(""::"r"(x));
asm volatile(""::"r"(a));
x = sqrt(a);
asm volatile(""::"r"(x));
}
totals[tid] += 1000;
auto elapsed = (std::chrono::system_clock::now() - start).count() / 1e9;
if (elapsed > time) {
break;
}
}
}
size_t sum = 0;
for (auto t : totals) {
sum += t;
}
std::cout << (double)sum / time << "\n";
};

View File

@@ -0,0 +1,14 @@
#include <iostream>
#include "perfect/drop_caches.hpp"
using namespace perfect;
int main(void) {
using namespace perfect;
PERFECT(init());
PERFECT(perfect::sync());
PERFECT(drop_caches(DropCaches_t(PAGECACHE | ENTRIES)));
}

7023
tools/thirdparty/clipp/clipp.h vendored Normal file

File diff suppressed because it is too large Load Diff

1585
tools/thirdparty/nonstd/optional.hpp vendored Normal file

File diff suppressed because it is too large Load Diff