Compare commits
13 Commits
v0.4.0
...
1695ebb8ea
Author | SHA1 | Date | |
---|---|---|---|
![]() |
1695ebb8ea | ||
![]() |
158bffa61f | ||
![]() |
057fec7411 | ||
![]() |
a8d83417e8 | ||
![]() |
1b3cf604a8 | ||
![]() |
d576ac099d | ||
![]() |
aff90d408e | ||
![]() |
6ace6932a7 | ||
![]() |
33243fe3bb | ||
![]() |
64eb67cc2d | ||
![]() |
cd9a95365f | ||
![]() |
57bf39bb97 | ||
![]() |
c358f18c22 |
@@ -5,7 +5,7 @@
|
||||
# 3.13+ for target_link_directories
|
||||
cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
|
||||
|
||||
project(perfect LANGUAGES CXX VERSION 0.4.0)
|
||||
project(perfect LANGUAGES CXX VERSION 0.5.0)
|
||||
message(STATUS "Build type: " ${CMAKE_BUILD_TYPE})
|
||||
|
||||
include(CheckLanguage)
|
||||
|
98
README.md
98
README.md
@@ -59,7 +59,46 @@ If you don't have CUDA, then you could just do
|
||||
g++ code_using_perfect.cpp -I perfect/include
|
||||
```
|
||||
|
||||
## Usage
|
||||
## Tools Usage
|
||||
|
||||
`perfect` provides some useful tools on Linux:
|
||||
|
||||
```
|
||||
$ tools/perfect-cli -h
|
||||
SYNOPSIS
|
||||
./tools/perfect-cli --no-mod [-n <INT>] -- <cmd>...
|
||||
./tools/perfect-cli ([-u <INT>] | [-s <INT>]) [--no-drop-cache] [--no-max-perf] [--aslr]
|
||||
[--cpu-turbo] [--stdout <PATH>] [--stderr <PATH>] [-n <INT>] -- <cmd>...
|
||||
|
||||
OPTIONS
|
||||
--no-mod don't control performance
|
||||
-u number of unshielded CPUs
|
||||
-s number of shielded CPUs
|
||||
--no-drop-cache do not drop filesystem caches
|
||||
--no-max-perf do not max os perf
|
||||
--aslr enable ASLR
|
||||
--cpu-turbo enable CPU turbo
|
||||
--stdout redirect child stdout
|
||||
--stderr redirect child stderr
|
||||
-n run multiple times
|
||||
```
|
||||
|
||||
The basic usage is `tools/perfect-cli -- my-exe`, which will attempt to configure the system for repeatable performance before executing `my-exe`, and then restore the system to the original performance state before exiting.
|
||||
Most modifications require elevated privileges.
|
||||
The default behavior is to:
|
||||
* disable ASLR
|
||||
* drop filesystem caches
|
||||
* set CPU performance to maximum
|
||||
* disable CPU turbo
|
||||
|
||||
Some options (all should provided before the `--` option):
|
||||
* `--no-mod` flag will cause `perfect-cli` to not modify the system performance state
|
||||
* `-n INT` will run the requested program `INT` times.
|
||||
* `--stderr`/`--stdout` will redirect the program-under-test's stderr and stdout to the provided paths.
|
||||
* `-s`/`-u`: set the number of shielded /unshielded CPUs. The program-under-test will run on the shielded CPUs. All other tasks will run on the unshielded CPUs.
|
||||
|
||||
|
||||
## API Usage
|
||||
|
||||
The `perfect` functions all return a `perfect::Result`, which is defined in [include/perfect/result.hpp].
|
||||
When things are working, it will be `perfect::Result::SUCCESS`.
|
||||
@@ -100,6 +139,11 @@ See [tools/no_aslr.cpp](tools/no_aslr.cpp)
|
||||
* `Result get_aslr(AslrState &state)`: save the current ASLR state
|
||||
* `Result set_aslr(const AslrState &state)`: set a previously-saved ASLR state
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
### Flush file system caches
|
||||
|
||||
`perfect` can drop various filesystem caches
|
||||
@@ -111,7 +155,7 @@ See [tools/sync_drop_caches.cpp](tools/sync_drop_caches.cpp)
|
||||
```
|
||||
|
||||
* `Result sync()`: flush filesystem caches to disk
|
||||
* `Result drop_caches(DropCaches_t mode)`: remove file system caches
|
||||
* `Result drop_caches(DropCaches_t mode = DropCaches_t(PAGECACHE | ENTRIES))`: remove file system caches
|
||||
* `mode = PAGECACHE`: drop page caches
|
||||
* `mode = ENTRIES`: drop dentries and inodes
|
||||
* `mode = PAGECACHE | ENTRIES`: both
|
||||
@@ -143,9 +187,9 @@ See [examples/os_perf.cpp](examples/os_perf.cpp).
|
||||
#include "perfect/os_perf.hpp"
|
||||
```
|
||||
|
||||
* `Result get_os_perf_state(OsPerfState *state, const int cpu)`: Save the current OS governor mode for CPU `cpu`.
|
||||
* `Result get_os_perf_state(OsPerfState &state)`: Save the current OS governor mode for all CPUs.
|
||||
* `Result os_perf_state_maximum(const int cpu)`: Set the OS governor to it's maximum performance mode.
|
||||
* `Result set_os_perf_state(const int cpu, OsPerfState state)`: Restore a previously-saved OS governor mode.
|
||||
* `Result set_os_perf_state(OsPerfState state)`: Restore a previously-saved OS governor mode.
|
||||
|
||||
### GPU Turbo
|
||||
|
||||
@@ -188,8 +232,49 @@ See [examples/cpu_cache.cpp](examples/cpu_cache.cpp).
|
||||
|
||||
* `void flush_all(void *p, const size_t n)`: Flush all cache lines starting at `p` for `n` bytes.
|
||||
|
||||
## Tools
|
||||
|
||||
### tools/addr
|
||||
|
||||
Print the address of `main`, a stack variable, and a heap variable.
|
||||
Useful for demoing ASLR.
|
||||
|
||||
### tools/no-aslr
|
||||
|
||||
Disable ASLR on the provided execution.
|
||||
|
||||
With ASLR, addresses are different with each invocation
|
||||
```
|
||||
$ tools/addr
|
||||
main: 94685074364704
|
||||
stack: 140734279743492
|
||||
heap: 94685084978800
|
||||
$ tools/addr
|
||||
main: 93891046344992
|
||||
stack: 140722671706708
|
||||
heap: 93891068624496
|
||||
```
|
||||
|
||||
Without ASLR, addresses are the same in each invocation
|
||||
```
|
||||
$ tools/no-aslr tools/addrs
|
||||
main: 93824992233760
|
||||
stack: 140737488347460
|
||||
heap: 93824994414192
|
||||
$ tools/no-aslr tools/addrs
|
||||
main: 93824992233760
|
||||
stack: 140737488347460
|
||||
heap: 93824994414192
|
||||
```
|
||||
|
||||
## Changelog
|
||||
|
||||
* v0.5.0
|
||||
* add tools/stress
|
||||
* add tools/max-os-perf
|
||||
* add tools/min-os-perf
|
||||
* add tools/enable-cpu-turbo
|
||||
* add tools/disable-cpu-turbo
|
||||
* v0.4.0
|
||||
* Add ASLR interface
|
||||
* Disambiguate some filesystem errors
|
||||
@@ -224,3 +309,8 @@ See [examples/cpu_cache.cpp](examples/cpu_cache.cpp).
|
||||
* [easyperf.net](https://easyperf.net/blog/2019/08/02/Perf-measurement-environment-on-Linux#2-disable-hyper-threading) blog post discussing ACPI/Intel turbo, SMT, Linux governor, CPU affinity, process priority, file system caches, and ASLR.
|
||||
* [temci](https://github.com/parttimenerd/temci) benchmarking tool for cpu sheilding and disabling hyperthreading, among other things.
|
||||
* [perflock](https://github.com/aclements/perflock) tool for locking CPU frequency scaling domains
|
||||
|
||||
## Acks
|
||||
|
||||
Uses [muellan/clipp](https://github.com/muellan/clipp) for cli option parsing.
|
||||
Uses [martinmoene/optional-lite](https://github.com/martinmoene/optional-lite).
|
||||
|
@@ -5,23 +5,20 @@
|
||||
int main(void) {
|
||||
perfect::init();
|
||||
|
||||
std::map<int, perfect::OsPerfState> states;
|
||||
// os performance state for each cpu
|
||||
perfect::OsPerfState state;
|
||||
|
||||
// store the current state
|
||||
PERFECT(perfect::get_os_perf_state(state));
|
||||
|
||||
// max state for each cpu
|
||||
for (auto cpu : perfect::cpus()) {
|
||||
perfect::OsPerfState state;
|
||||
perfect::Result result;
|
||||
result = perfect::get_os_perf_state(&state, cpu);
|
||||
if (perfect::Result::SUCCESS == result) {
|
||||
states[cpu] = state;
|
||||
}
|
||||
perfect::os_perf_state_maximum(cpu);
|
||||
PERFECT(perfect::os_perf_state_maximum(cpu));
|
||||
}
|
||||
|
||||
// do things with all CPUs set to the maximum performancem mode by the OS
|
||||
|
||||
for (auto kv : states) {
|
||||
int cpu = kv.first;
|
||||
perfect::OsPerfState state = kv.second;
|
||||
perfect::set_os_perf_state(cpu, state);
|
||||
}
|
||||
// restore original state
|
||||
PERFECT(perfect::set_os_perf_state(state));
|
||||
|
||||
}
|
@@ -24,7 +24,7 @@ Result sync() {
|
||||
return Result::SUCCESS;
|
||||
}
|
||||
|
||||
Result drop_caches(const DropCaches_t mode) {
|
||||
Result drop_caches(const DropCaches_t mode = DropCaches_t(PAGECACHE | ENTRIES)) {
|
||||
using detail::write_str;
|
||||
const std::string path = "/proc/sys/vm/drop_caches";
|
||||
if (mode & PAGECACHE & ENTRIES) {
|
||||
|
@@ -3,6 +3,7 @@
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <cassert>
|
||||
#include <map>
|
||||
|
||||
#ifdef __linux__
|
||||
#include "detail/os/linux.hpp"
|
||||
@@ -17,19 +18,23 @@ namespace perfect {
|
||||
|
||||
struct OsPerfState {
|
||||
#ifdef __linux__
|
||||
std::string governor;
|
||||
std::map<int, std::string> governors;
|
||||
#else
|
||||
#error "unsupported platform"
|
||||
#endif
|
||||
};
|
||||
|
||||
Result get_os_perf_state(OsPerfState *state, const int cpu) {
|
||||
assert(state);
|
||||
Result get_os_perf_state(OsPerfState &state) {
|
||||
#ifdef __linux__
|
||||
return get_governor(state->governor, cpu);
|
||||
for (auto cpu : cpus()) {
|
||||
std::string gov;
|
||||
PERFECT_SUCCESS_OR_RETURN(get_governor(gov, cpu));
|
||||
state.governors[cpu] = gov;
|
||||
}
|
||||
#else
|
||||
#error "unsupported platform"
|
||||
#endif
|
||||
return Result::SUCCESS;
|
||||
}
|
||||
|
||||
Result os_perf_state_maximum(const int cpu) {
|
||||
@@ -40,13 +45,23 @@ Result os_perf_state_maximum(const int cpu) {
|
||||
#endif
|
||||
}
|
||||
|
||||
Result set_os_perf_state(const int cpu, OsPerfState state) {
|
||||
#ifdef __linux__
|
||||
return set_governor(cpu, state.governor);
|
||||
Result os_perf_state_minimum(const int cpu) {
|
||||
#ifdef __linux__
|
||||
return set_governor(cpu, "powersave");
|
||||
#else
|
||||
#error "unsupported platform"
|
||||
#endif
|
||||
}
|
||||
|
||||
Result set_os_perf_state(OsPerfState state) {
|
||||
#ifdef __linux__
|
||||
for (auto kv : state.governors) {
|
||||
PERFECT_SUCCESS_OR_RETURN(set_governor(kv.first, kv.second));
|
||||
}
|
||||
#else
|
||||
#error "unsupported platform"
|
||||
#endif
|
||||
return Result::SUCCESS;
|
||||
}
|
||||
|
||||
};
|
@@ -12,11 +12,17 @@
|
||||
#include <nvml.h>
|
||||
#endif
|
||||
|
||||
#ifdef __linux__
|
||||
#include <cerrno>
|
||||
#endif
|
||||
|
||||
namespace perfect {
|
||||
|
||||
enum class Result {
|
||||
NO_PERMISSION,
|
||||
NOT_SUPPORTED,
|
||||
NO_TASK,
|
||||
|
||||
NVML_NO_PERMISSION,
|
||||
NVML_NOT_SUPPORTED,
|
||||
NVML_UNINITIALIZED,
|
||||
@@ -38,6 +44,23 @@ Result from_nvml(nvmlReturn_t nvml) {
|
||||
case NVML_ERROR_INVALID_ARGUMENT:
|
||||
case NVML_ERROR_GPU_IS_LOST:
|
||||
case NVML_ERROR_UNKNOWN:
|
||||
case NVML_ERROR_ALREADY_INITIALIZED:
|
||||
case NVML_ERROR_NOT_FOUND:
|
||||
case NVML_ERROR_INSUFFICIENT_SIZE:
|
||||
case NVML_ERROR_INSUFFICIENT_POWER:
|
||||
case NVML_ERROR_DRIVER_NOT_LOADED:
|
||||
case NVML_ERROR_TIMEOUT:
|
||||
case NVML_ERROR_IRQ_ISSUE:
|
||||
case NVML_ERROR_LIBRARY_NOT_FOUND:
|
||||
case NVML_ERROR_FUNCTION_NOT_FOUND:
|
||||
case NVML_ERROR_CORRUPTED_INFOROM:
|
||||
case NVML_ERROR_RESET_REQUIRED:
|
||||
case NVML_ERROR_OPERATING_SYSTEM:
|
||||
case NVML_ERROR_LIB_RM_VERSION_MISMATCH:
|
||||
case NVML_ERROR_IN_USE:
|
||||
case NVML_ERROR_MEMORY:
|
||||
case NVML_ERROR_NO_DATA:
|
||||
case NVML_ERROR_VGPU_ECC_NOT_SUPPORTED:
|
||||
default:
|
||||
assert(0 && "unhandled nvmlReturn_t");
|
||||
}
|
||||
@@ -45,12 +68,26 @@ Result from_nvml(nvmlReturn_t nvml) {
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __linux__
|
||||
Result from_errno(int err) {
|
||||
switch (err) {
|
||||
default:
|
||||
assert(0 && "unhandled errno");
|
||||
}
|
||||
return Result::UNKNOWN;
|
||||
}
|
||||
#endif
|
||||
|
||||
const char *get_string(const Result &result) {
|
||||
switch (result) {
|
||||
case Result::SUCCESS:
|
||||
return "success";
|
||||
case Result::NO_PERMISSION:
|
||||
return "no permission";
|
||||
case Result::NOT_SUPPORTED:
|
||||
return "unsupported operation";
|
||||
case Result::NO_TASK:
|
||||
return "no such task";
|
||||
case Result::UNKNOWN:
|
||||
return "unknown error";
|
||||
case Result::NVML_NOT_SUPPORTED:
|
||||
@@ -59,8 +96,7 @@ const char *get_string(const Result &result) {
|
||||
return "nvidia-ml returned no permission";
|
||||
case Result::NVML_UNINITIALIZED:
|
||||
return "nvidia-ml returned uninitialized";
|
||||
case Result::NOT_SUPPORTED:
|
||||
return "unsupported operation";
|
||||
|
||||
default:
|
||||
assert(0 && "unexpected perfect::Result");
|
||||
}
|
||||
|
@@ -34,11 +34,37 @@ set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} \
|
||||
-Wfatal-errors\
|
||||
")
|
||||
|
||||
add_executable(enable-turbo enable_turbo.cpp)
|
||||
target_link_libraries(enable-turbo perfect)
|
||||
add_executable(enable-cpu-turbo enable_cpu_turbo.cpp)
|
||||
target_link_libraries(enable-cpu-turbo perfect)
|
||||
|
||||
add_executable(disable-cpu-turbo disable_cpu_turbo.cpp)
|
||||
target_link_libraries(disable-cpu-turbo perfect)
|
||||
|
||||
add_executable(sync-drop-caches sync_drop_caches.cpp)
|
||||
target_link_libraries(sync-drop-caches perfect)
|
||||
|
||||
add_executable(no-aslr no_aslr.cpp)
|
||||
target_link_libraries(no-aslr perfect)
|
||||
target_link_libraries(no-aslr perfect)
|
||||
|
||||
add_executable(max-os-perf max_os_perf.cpp)
|
||||
target_link_libraries(max-os-perf perfect)
|
||||
|
||||
add_executable(min-os-perf min_os_perf.cpp)
|
||||
target_link_libraries(min-os-perf perfect)
|
||||
|
||||
add_executable(addrs addrs.cpp)
|
||||
|
||||
add_executable(perfect-cli perfect.cpp)
|
||||
target_link_libraries(perfect-cli perfect)
|
||||
target_include_directories(perfect-cli PUBLIC thirdparty)
|
||||
|
||||
## OpenMP
|
||||
find_package(OpenMP)
|
||||
if (OpenMP_FOUND)
|
||||
add_executable(stress stress.cpp)
|
||||
target_link_libraries(stress perfect)
|
||||
target_link_libraries(stress OpenMP::OpenMP_CXX)
|
||||
else(OpenMP_FOUND)
|
||||
message(WARNING "didn't find OpenMP, some benchmarks will be unavailable.")
|
||||
endif(OpenMP_FOUND)
|
||||
|
||||
|
9
tools/addrs.cpp
Normal file
9
tools/addrs.cpp
Normal file
@@ -0,0 +1,9 @@
|
||||
#include <iostream>
|
||||
|
||||
int main(void) {
|
||||
int *a = new int;
|
||||
std::cout << "main: " << uintptr_t(main) << "\n";
|
||||
std::cout << "stack: " << uintptr_t(&a) << "\n";
|
||||
std::cout << "heap: " << uintptr_t(a) << "\n";
|
||||
delete a;
|
||||
}
|
23
tools/disable_cpu_turbo.cpp
Normal file
23
tools/disable_cpu_turbo.cpp
Normal file
@@ -0,0 +1,23 @@
|
||||
#include <iostream>
|
||||
|
||||
#include "perfect/cpu_turbo.hpp"
|
||||
|
||||
using namespace perfect;
|
||||
|
||||
int main(void) {
|
||||
|
||||
CpuTurboState state;
|
||||
|
||||
perfect::init();
|
||||
|
||||
PERFECT(get_cpu_turbo_state(&state));
|
||||
|
||||
if (!is_turbo_enabled(state)) {
|
||||
std::cerr << "cpu turbo already disabled\n";
|
||||
exit(EXIT_SUCCESS);
|
||||
} else {
|
||||
PERFECT(disable_cpu_turbo());
|
||||
std::cerr << "disabled cpu turbo\n";
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
}
|
9
tools/max_os_perf.cpp
Normal file
9
tools/max_os_perf.cpp
Normal file
@@ -0,0 +1,9 @@
|
||||
#include "perfect/os_perf.hpp"
|
||||
|
||||
int main(void) {
|
||||
PERFECT(perfect::init());
|
||||
|
||||
for (auto cpu : perfect::cpus()) {
|
||||
PERFECT(perfect::os_perf_state_maximum(cpu));
|
||||
}
|
||||
}
|
9
tools/min_os_perf.cpp
Normal file
9
tools/min_os_perf.cpp
Normal file
@@ -0,0 +1,9 @@
|
||||
#include "perfect/os_perf.hpp"
|
||||
|
||||
int main(void) {
|
||||
PERFECT(perfect::init());
|
||||
|
||||
for (auto cpu : perfect::cpus()) {
|
||||
PERFECT(perfect::os_perf_state_minimum(cpu));
|
||||
}
|
||||
}
|
365
tools/perfect.cpp
Normal file
365
tools/perfect.cpp
Normal file
@@ -0,0 +1,365 @@
|
||||
#include <cassert>
|
||||
#include <cerrno>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#ifdef __linux__
|
||||
#include <fcntl.h>
|
||||
#include <pwd.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/wait.h>
|
||||
#include <unistd.h>
|
||||
#else
|
||||
#error "unsupported platform"
|
||||
#endif
|
||||
|
||||
#include "clipp/clipp.h"
|
||||
#include "nonstd/optional.hpp"
|
||||
|
||||
#include "perfect/aslr.hpp"
|
||||
#include "perfect/cpu_set.hpp"
|
||||
#include "perfect/cpu_turbo.hpp"
|
||||
#include "perfect/detail/os/linux.hpp"
|
||||
#include "perfect/drop_caches.hpp"
|
||||
#include "perfect/os_perf.hpp"
|
||||
|
||||
// argv should be null-terminated
|
||||
// outf and errf are file descriptors to where stdout and stderr should be
|
||||
// redirected write stdout to out and stderr to err, if not null
|
||||
int fork_child(char *const *argv, int outf, int errf) {
|
||||
|
||||
pid_t pid;
|
||||
int status;
|
||||
pid = fork();
|
||||
if (pid == -1) {
|
||||
// pid == -1 means error occured
|
||||
std::cerr << "can't fork, error occured\n";
|
||||
return EXIT_FAILURE;
|
||||
} else if (pid == 0) {
|
||||
// in the child process
|
||||
|
||||
if (outf > 0) {
|
||||
std::cerr << "redirecting child stdout to file\n";
|
||||
if (dup2(outf, 1)) {
|
||||
std::cerr << "dup2 error: " << strerror(errno) << "\n";
|
||||
/*
|
||||
|
||||
EBADF
|
||||
oldfd isn't an open file descriptor, or newfd is out of the allowed
|
||||
range for file descriptors. EBUSY (Linux only) This may be returned by
|
||||
dup2() or dup3() during a race condition with open(2) and dup(). EINTR The
|
||||
dup2() or dup3() call was interrupted by a signal; see signal(7). EINVAL
|
||||
(dup3()) flags contain an invalid value. Or, oldfd was equal to newfd.
|
||||
EMFILE
|
||||
The process already has the maximum number of file descriptors open and
|
||||
tried to open a new one.
|
||||
*/
|
||||
}
|
||||
|
||||
if (close(outf)) {
|
||||
/*
|
||||
EBADF
|
||||
The fildes argument is not a valid file descriptor.
|
||||
EINTR
|
||||
The close() function was interrupted by a signal.
|
||||
|
||||
The close() function may fail if:
|
||||
|
||||
EIO
|
||||
An I/O error occurred while reading from or writing to the file
|
||||
system.
|
||||
*/
|
||||
}
|
||||
}
|
||||
|
||||
if (errf > 0) {
|
||||
std::cerr << "redirecting child stderr to file\n";
|
||||
if (dup2(errf, 2)) {
|
||||
std::cerr << "dup2 error: " << strerror(errno) << "\n";
|
||||
|
||||
/*
|
||||
|
||||
EBADF
|
||||
oldfd isn't an open file descriptor, or newfd is out of the allowed
|
||||
range for file descriptors. EBUSY (Linux only) This may be returned by
|
||||
dup2() or dup3() during a race condition with open(2) and dup(). EINTR The
|
||||
dup2() or dup3() call was interrupted by a signal; see signal(7). EINVAL
|
||||
(dup3()) flags contain an invalid value. Or, oldfd was equal to newfd.
|
||||
EMFILE
|
||||
The process already has the maximum number of file descriptors open and
|
||||
tried to open a new one.
|
||||
*/
|
||||
}
|
||||
|
||||
if (close(errf)) {
|
||||
/*
|
||||
EBADF
|
||||
The fildes argument is not a valid file descriptor.
|
||||
EINTR
|
||||
The close() function was interrupted by a signal.
|
||||
|
||||
The close() function may fail if:
|
||||
|
||||
EIO
|
||||
An I/O error occurred while reading from or writing to the file system.
|
||||
*/
|
||||
}
|
||||
}
|
||||
|
||||
// the execv() only return if error occured.
|
||||
// The return value is -1
|
||||
return execvp(argv[0], argv);
|
||||
} else {
|
||||
// parent process
|
||||
if (waitpid(pid, &status, 0) > 0) {
|
||||
|
||||
if (WIFEXITED(status) && !WEXITSTATUS(status)) {
|
||||
// success
|
||||
return status;
|
||||
}
|
||||
|
||||
else if (WIFEXITED(status) && WEXITSTATUS(status)) {
|
||||
if (WEXITSTATUS(status) == 127) {
|
||||
std::cerr << "execv failed\n";
|
||||
return status;
|
||||
} else {
|
||||
std::cerr << "program terminated normally, but returned a non-zero "
|
||||
"status\n";
|
||||
return status;
|
||||
}
|
||||
} else {
|
||||
printf("program didn't terminate normally\n");
|
||||
return status;
|
||||
}
|
||||
} else {
|
||||
printf("waitpid() failed\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
using namespace clipp;
|
||||
|
||||
size_t numUnshielded = 0;
|
||||
size_t numShielded = 0;
|
||||
bool aslr = false;
|
||||
nonstd::optional<bool> cpuTurbo = false;
|
||||
nonstd::optional<bool> maxOsPerf = true;
|
||||
bool dropCaches = true;
|
||||
|
||||
std::vector<std::string> program;
|
||||
std::string stdoutPath;
|
||||
std::string stderrPath;
|
||||
int iters = 1;
|
||||
|
||||
auto shieldGroup = ((option("-u").doc("number of unshielded CPUs") &
|
||||
value("INT", numUnshielded)) |
|
||||
(option("-s").doc("number of shielded CPUs") &
|
||||
value("INT", numShielded)));
|
||||
|
||||
auto noModMode = (option("--no-mod")
|
||||
.doc("don't control performance")
|
||||
.set(aslr, true)
|
||||
.call([&]() { cpuTurbo = nonstd::nullopt; })
|
||||
.call([&]() { maxOsPerf = nonstd::nullopt; })
|
||||
.set(dropCaches, false));
|
||||
|
||||
auto modMode = (shieldGroup,
|
||||
option("--no-drop-cache")
|
||||
.set(dropCaches, false)
|
||||
.doc("do not drop filesystem caches"),
|
||||
option("--no-max-perf").doc("do not max os perf").call([&]() {
|
||||
maxOsPerf = false;
|
||||
}),
|
||||
option("--aslr").set(aslr, true).doc("enable ASLR"),
|
||||
option("--cpu-turbo").doc("enable CPU turbo").call([&]() {
|
||||
cpuTurbo = true;
|
||||
}),
|
||||
(option("--stdout").doc("redirect child stdout") &
|
||||
value("PATH", stdoutPath)),
|
||||
(option("--stderr").doc("redirect child stderr") &
|
||||
value("PATH", stderrPath)));
|
||||
|
||||
auto cli = ((noModMode | modMode),
|
||||
(option("-n").doc("run multiple times") & value("INT", iters)),
|
||||
// run everything after "--"
|
||||
required("--") & greedy(values("cmd", program))
|
||||
|
||||
);
|
||||
|
||||
if (!parse(argc, argv, cli)) {
|
||||
auto fmt = doc_formatting{}.doc_column(31);
|
||||
std::cout << make_man_page(cli, argv[0], fmt);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// open the redirect files, if needed
|
||||
int errf = 0;
|
||||
int outf = 0;
|
||||
if (!stderrPath.empty()) {
|
||||
std::cerr << "open " << stderrPath << "\n";
|
||||
errf = open(stderrPath.c_str(), O_WRONLY | O_CREAT,
|
||||
S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
|
||||
if (-1 == errf) {
|
||||
std::cerr << "error while opening " << stderrPath << ": "
|
||||
<< strerror(errno) << "\n";
|
||||
}
|
||||
}
|
||||
if (!stdoutPath.empty()) {
|
||||
outf = open(stdoutPath.c_str(), O_WRONLY | O_CREAT,
|
||||
S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
|
||||
if (-1 == outf) {
|
||||
std::cerr << "error while opening " << stdoutPath << ": "
|
||||
<< strerror(errno) << "\n";
|
||||
}
|
||||
}
|
||||
|
||||
// if called with sudo, chown the files to whoever called sudo
|
||||
const char *sudoUser = std::getenv("SUDO_USER");
|
||||
if (sudoUser) {
|
||||
std::cerr << "called with sudo by " << sudoUser << "\n";
|
||||
uid_t uid;
|
||||
gid_t gid;
|
||||
struct passwd *pwd;
|
||||
|
||||
pwd = getpwnam(sudoUser);
|
||||
if (pwd == NULL) {
|
||||
// die("Failed to get uid");
|
||||
}
|
||||
uid = pwd->pw_uid;
|
||||
gid = pwd->pw_gid;
|
||||
|
||||
if (!stdoutPath.empty()) {
|
||||
if (chown(stdoutPath.c_str(), uid, gid) == -1) {
|
||||
// die("chown fail");
|
||||
}
|
||||
}
|
||||
if (!stderrPath.empty()) {
|
||||
if (chown(stderrPath.c_str(), uid, gid) == -1) {
|
||||
// die("chown fail");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// exec the rest of the options
|
||||
std::vector<char *> args;
|
||||
for (auto &c : program) {
|
||||
args.push_back((char *)c.c_str());
|
||||
}
|
||||
args.push_back(nullptr);
|
||||
|
||||
PERFECT(perfect::init());
|
||||
|
||||
auto cpus = perfect::cpus();
|
||||
if (0 < numShielded) {
|
||||
numUnshielded = cpus.size() - numShielded;
|
||||
} else if (0 < numUnshielded) {
|
||||
numShielded = cpus.size() - numUnshielded;
|
||||
}
|
||||
|
||||
// handle CPU shielding
|
||||
perfect::CpuSet shielded, unshielded;
|
||||
if (numShielded) {
|
||||
std::cerr << "shielding " << numShielded << " cpus\n";
|
||||
|
||||
perfect::CpuSet root;
|
||||
PERFECT(perfect::CpuSet::get_root(root));
|
||||
PERFECT(root.make_child(shielded, "shielded"));
|
||||
PERFECT(root.make_child(unshielded, "unshielded"));
|
||||
|
||||
std::cerr << "enable memory\n";
|
||||
PERFECT(shielded.enable_mem(0));
|
||||
PERFECT(shielded.enable_mem(0));
|
||||
|
||||
std::cerr << "enable cpus\n";
|
||||
size_t i = 0;
|
||||
for (; i < numShielded; ++i) {
|
||||
std::cerr << "shield cpu " << cpus[i] << "\n";
|
||||
shielded.enable_cpu(cpus[i]);
|
||||
}
|
||||
for (; i < cpus.size(); ++i) {
|
||||
std::cerr << "unshield cpu " << cpus[i] << "\n";
|
||||
unshielded.enable_cpu(cpus[i]);
|
||||
}
|
||||
|
||||
std::cerr << "migrate self\n";
|
||||
PERFECT(root.migrate_self_to(shielded));
|
||||
std::cerr << "migrate other\n";
|
||||
PERFECT(root.migrate_tasks_to(unshielded));
|
||||
}
|
||||
|
||||
// handle aslr
|
||||
if (!aslr) {
|
||||
std::cerr << "disable ASLR for this process\n";
|
||||
PERFECT(perfect::disable_aslr());
|
||||
}
|
||||
|
||||
// handle CPU turbo
|
||||
perfect::CpuTurboState cpuTurboState;
|
||||
if (cpuTurbo.has_value()) {
|
||||
PERFECT(perfect::get_cpu_turbo_state(&cpuTurboState));
|
||||
if (false == cpuTurbo) {
|
||||
std::cerr << "disabling cpu turbo\n";
|
||||
PERFECT(perfect::disable_cpu_turbo());
|
||||
} else {
|
||||
std::cerr << "enabling cpu turbo\n";
|
||||
PERFECT(perfect::enable_cpu_turbo());
|
||||
}
|
||||
}
|
||||
|
||||
// handle governor
|
||||
perfect::OsPerfState osPerfState;
|
||||
if (maxOsPerf.has_value()) {
|
||||
PERFECT(perfect::get_os_perf_state(osPerfState));
|
||||
if (true == maxOsPerf) {
|
||||
std::cerr << "set max performance state\n";
|
||||
for (auto cpu : perfect::cpus()) {
|
||||
PERFECT(perfect::os_perf_state_maximum(cpu));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// handle file system caches
|
||||
if (dropCaches) {
|
||||
std::cerr << "clearing file system cache\n";
|
||||
PERFECT(perfect::drop_caches());
|
||||
}
|
||||
|
||||
// parent should return
|
||||
for (int runIter = 0; runIter < iters; ++runIter) {
|
||||
std::cerr << "exec ";
|
||||
for (size_t i = 0; i < args.size() - 1; ++i) {
|
||||
std::cerr << args[i] << " ";
|
||||
}
|
||||
std::cerr << "\n";
|
||||
int status = fork_child(args.data(), outf, errf);
|
||||
if (0 != status) {
|
||||
std::cerr << "did not terminate successfully\n";
|
||||
}
|
||||
std::cerr << "finished execution\n";
|
||||
}
|
||||
|
||||
// clean up CpuSets (if needed)
|
||||
if (numShielded) {
|
||||
std::cerr << "clean up cpu sets\n";
|
||||
shielded.destroy();
|
||||
unshielded.destroy();
|
||||
}
|
||||
|
||||
// restore original turbo state
|
||||
if (cpuTurbo.has_value()) {
|
||||
std::cerr << "restore CPU turbo\n";
|
||||
PERFECT(perfect::set_cpu_turbo_state(cpuTurboState));
|
||||
}
|
||||
|
||||
if (maxOsPerf.has_value()) {
|
||||
std::cerr << "restore os performance state\n";
|
||||
PERFECT(perfect::set_os_perf_state(osPerfState));
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
49
tools/stress.cpp
Normal file
49
tools/stress.cpp
Normal file
@@ -0,0 +1,49 @@
|
||||
#include <iostream>
|
||||
|
||||
#include <cmath>
|
||||
|
||||
#include <omp.h>
|
||||
#include <cstring>
|
||||
#include <chrono>
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
|
||||
size_t numThreads = std::stoi(argv[1]);
|
||||
std::vector<size_t> totals(numThreads, 0);
|
||||
omp_set_num_threads(numThreads);
|
||||
|
||||
auto start = std::chrono::system_clock::now();
|
||||
double time = std::stod(argv[2]);
|
||||
|
||||
#pragma omp parallel
|
||||
{
|
||||
size_t tid = omp_get_thread_num();
|
||||
double a = rand();
|
||||
while (true) {
|
||||
for (size_t i = 0; i < 500; ++i) {
|
||||
double x;
|
||||
asm volatile(""::"r"(a));
|
||||
x = sqrt(a);
|
||||
asm volatile(""::"r"(x));
|
||||
|
||||
asm volatile(""::"r"(a));
|
||||
x = sqrt(a);
|
||||
asm volatile(""::"r"(x));
|
||||
}
|
||||
totals[tid] += 1000;
|
||||
auto elapsed = (std::chrono::system_clock::now() - start).count() / 1e9;
|
||||
if (elapsed > time) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
size_t sum = 0;
|
||||
for (auto t : totals) {
|
||||
sum += t;
|
||||
}
|
||||
std::cout << (double)sum / time << "\n";
|
||||
|
||||
};
|
7023
tools/thirdparty/clipp/clipp.h
vendored
Normal file
7023
tools/thirdparty/clipp/clipp.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1585
tools/thirdparty/nonstd/optional.hpp
vendored
Normal file
1585
tools/thirdparty/nonstd/optional.hpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user