Compare commits
25 Commits
Author | SHA1 | Date | |
---|---|---|---|
![]() |
cd9a95365f | ||
![]() |
57bf39bb97 | ||
![]() |
c358f18c22 | ||
![]() |
37c61fe2fb | ||
![]() |
f2961b3075 | ||
![]() |
3a86aef546 | ||
![]() |
cd14d68c47 | ||
![]() |
2e32089786 | ||
![]() |
7503a29a5c | ||
![]() |
02e0c7c464 | ||
![]() |
1682a05d08 | ||
![]() |
14791badb1 | ||
![]() |
7890d17b57 | ||
![]() |
3bf2fd1df2 | ||
![]() |
227c4ebb11 | ||
![]() |
3ff80c86f2 | ||
![]() |
3da56a3a4a | ||
![]() |
196c8a20b1 | ||
![]() |
d6c861719f | ||
![]() |
7f1b1289bb | ||
![]() |
1c917c3154 | ||
![]() |
b8aca1be1c | ||
![]() |
77b23ac3af | ||
![]() |
8da5aeb754 | ||
![]() |
714f7c3dcd |
@@ -5,7 +5,7 @@
|
||||
# 3.13+ for target_link_directories
|
||||
cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
|
||||
|
||||
project(perfect LANGUAGES CXX VERSION 0.1.0)
|
||||
project(perfect LANGUAGES CXX VERSION 0.5.0)
|
||||
message(STATUS "Build type: " ${CMAKE_BUILD_TYPE})
|
||||
|
||||
include(CheckLanguage)
|
||||
|
114
README.md
114
README.md
@@ -2,21 +2,24 @@
|
||||
|
||||
| Branch | Status |
|
||||
|-|-|
|
||||
| master |[](https://actions-badge.atrox.dev/cwpearson/perfect/goto?ref=master)|
|
||||
| master |[](https://actions-badge.atrox.dev/cwpearson/perfect/goto?ref=master) |
|
||||
|
||||
CPU/GPU performance control library for benchmarking
|
||||
* x86
|
||||
* POWER
|
||||
* Nvidia
|
||||
CPU/GPU Performance control library for benchmarking on Linux, x86, POWER, and Nvidia.
|
||||
|
||||
## Features
|
||||
|
||||
- [x] GPU power/utilization/temperature monitoring (nvidia)
|
||||
- [x] Disable CPU turbo (linux)
|
||||
- [x] Set OS CPU performance mode to maximum (linux)
|
||||
- [x] Set GPU clocks (nvidia)
|
||||
- [x] Disable GPU turbo (nvidia)
|
||||
- [x] Flush addresses from cache (amd64, POWER)
|
||||
- [x] CUDA not required (GPU functions will not be compiled)
|
||||
- [x] Flush file system caches (linux)
|
||||
- [x] Disable ASLR (linux)
|
||||
|
||||
## Contributors
|
||||
* [Carl Pearson](https://cwpearson.github.io)
|
||||
|
||||
## Installing
|
||||
|
||||
@@ -67,11 +70,57 @@ perfect::CpuTurboState state;
|
||||
PERFECT(perfect::get_cpu_turbo_state(&state));
|
||||
```
|
||||
|
||||
## Monitoring
|
||||
|
||||
`perfect` can monitor and record GPU activity.
|
||||
|
||||
See [examples/gpu_monitor.cu](examples/gpu_monitor.cu)
|
||||
|
||||
```c++
|
||||
#include "perfect/gpu_monitor.hpp"
|
||||
```
|
||||
|
||||
* `Monitor(std::ostream *stream)`: create a monitor that will write to `stream`.
|
||||
* `void Monitor::start()`: start the monitor
|
||||
* `void Monitor::stop()`: terminate the monitor
|
||||
* `void Monitor::pause()`: pause the monitor thread
|
||||
* `void Monitor::resume()`: resume the monitor thread
|
||||
|
||||
### Disable ASLR
|
||||
|
||||
`perfect` can disable ASLR
|
||||
|
||||
See [tools/no_aslr.cpp](tools/no_aslr.cpp)
|
||||
|
||||
```c++
|
||||
#include "perfect/aslr.hpp"
|
||||
```
|
||||
|
||||
* `Result disable_aslr()`: disable ASLR
|
||||
* `Result get_aslr(AslrState &state)`: save the current ASLR state
|
||||
* `Result set_aslr(const AslrState &state)`: set a previously-saved ASLR state
|
||||
|
||||
### Flush file system caches
|
||||
|
||||
`perfect` can drop various filesystem caches
|
||||
|
||||
See [tools/sync_drop_caches.cpp](tools/sync_drop_caches.cpp)
|
||||
|
||||
```c++
|
||||
#include "perfect/drop_caches.hpp"
|
||||
```
|
||||
|
||||
* `Result sync()`: flush filesystem caches to disk
|
||||
* `Result drop_caches(DropCaches_t mode)`: remove file system caches
|
||||
* `mode = PAGECACHE`: drop page caches
|
||||
* `mode = ENTRIES`: drop dentries and inodes
|
||||
* `mode = PAGECACHE | ENTRIES`: both
|
||||
|
||||
### CPU Turbo
|
||||
|
||||
`perfect` can enable and disable CPU boost through the Intel p-state mechanism or the ACPI cpufreq mechanism.
|
||||
|
||||
See [examples/cpu_turbo.cpp].
|
||||
See [examples/cpu_turbo.cpp](examples/cpu_turbo.cpp).
|
||||
|
||||
|
||||
```c++
|
||||
@@ -88,7 +137,7 @@ See [examples/cpu_turbo.cpp].
|
||||
|
||||
`perfect` can control the OS governor on linux.
|
||||
|
||||
See [examples/os_perf.cpp].
|
||||
See [examples/os_perf.cpp](examples/os_perf.cpp).
|
||||
|
||||
```c++
|
||||
#include "perfect/os_perf.hpp"
|
||||
@@ -102,7 +151,7 @@ See [examples/os_perf.cpp].
|
||||
|
||||
`perfect` can enable/disable GPU turbo boost.
|
||||
|
||||
See [examples/gpu_turbo.cu]
|
||||
See [examples/gpu_turbo.cu](examples/gpu_turbo.cu).
|
||||
|
||||
```c++
|
||||
#include "perfect/gpu_turbo.hpp"
|
||||
@@ -118,10 +167,10 @@ See [examples/gpu_turbo.cu]
|
||||
|
||||
`perfect` can lock GPU clocks to their maximum values.
|
||||
|
||||
See [examples/gpu_clocks.cu]
|
||||
See [examples/gpu_clocks.cu](examples/gpu_clocks.cu).
|
||||
|
||||
```c++
|
||||
#include "perfect/gpu_clocks.hpp`
|
||||
#include "perfect/gpu_clocks.hpp"
|
||||
```
|
||||
|
||||
* `Result set_max_gpu_clocks(unsigned int idx)`: Set GPU `idx` clocks to their maximum reported values.
|
||||
@@ -131,16 +180,53 @@ See [examples/gpu_clocks.cu]
|
||||
|
||||
`perfect` can flush data from CPU caches. Unlike the other APIs, these do not return a `Result` because they do not fail.
|
||||
|
||||
See [examples/cpu_cache.cpp].
|
||||
See [examples/cpu_cache.cpp](examples/cpu_cache.cpp).
|
||||
|
||||
```c++
|
||||
#include "perfect/cpu_cache.hpp`
|
||||
#include "perfect/cpu_cache.hpp"
|
||||
```
|
||||
|
||||
* `void flush_all(void *p, const size_t n)`: Flush all cache lines starting at `p` for `n` bytes.
|
||||
|
||||
## Changelog
|
||||
|
||||
* v0.5.0
|
||||
* add tools/stress
|
||||
* add tools/max-os-perf
|
||||
* add tools/min-os-perf
|
||||
* add tools/enable-cpu-turbo
|
||||
* add tools/disable-cpu-turbo
|
||||
* v0.4.0
|
||||
* Add ASLR interface
|
||||
* Disambiguate some filesystem errors
|
||||
* Fix some powerpc namespace issues
|
||||
* v0.3.0
|
||||
* Add filesystem cache interface
|
||||
* v0.2.0
|
||||
* add GPU monitoring
|
||||
* Make CUDA optional
|
||||
* v0.1.0
|
||||
* cache control
|
||||
* Intel P-State control
|
||||
* linux governor control
|
||||
* POWER cpufreq control
|
||||
* Nvidia GPU boost control
|
||||
* Nvidia GPU clock control
|
||||
|
||||
## Wish List
|
||||
|
||||
- [ ] only monitor certain GPUs
|
||||
- [ ] hyperthreading interface
|
||||
- [ ] process priority interface
|
||||
- [ ] A wrapper utility
|
||||
- [ ] disable hyperthreading
|
||||
- [ ] reserve cores
|
||||
- [ ] set process priority
|
||||
- [ ] disable ASLR
|
||||
|
||||
- [ ] Nvidia GPU power monitoring
|
||||
- [ ] Nivida GPU utilization monitoring
|
||||
## Related
|
||||
|
||||
* [LLVM benchmarking instructions](https://llvm.org/docs/Benchmarking.html#linux) covering ASLR, Linux governor, cpuset shielding, SMT, and Intel turbo.
|
||||
* [easyperf.net](https://easyperf.net/blog/2019/08/02/Perf-measurement-environment-on-Linux#2-disable-hyper-threading) blog post discussing ACPI/Intel turbo, SMT, Linux governor, CPU affinity, process priority, file system caches, and ASLR.
|
||||
* [temci](https://github.com/parttimenerd/temci) benchmarking tool for cpu sheilding and disabling hyperthreading, among other things.
|
||||
* [perflock](https://github.com/aclements/perflock) tool for locking CPU frequency scaling domains
|
||||
|
@@ -51,4 +51,9 @@ endif()
|
||||
if(CMAKE_CUDA_COMPILER)
|
||||
add_executable(gpu-turbo gpu_turbo.cu)
|
||||
target_link_libraries(gpu-turbo perfect)
|
||||
endif()
|
||||
|
||||
if(CMAKE_CUDA_COMPILER)
|
||||
add_executable(gpu-monitor gpu_monitor.cu)
|
||||
target_link_libraries(gpu-monitor perfect)
|
||||
endif()
|
25
examples/gpu_monitor.cu
Normal file
25
examples/gpu_monitor.cu
Normal file
@@ -0,0 +1,25 @@
|
||||
#include <chrono>
|
||||
#include <iostream>
|
||||
#include <thread>
|
||||
|
||||
#include "perfect/gpu_monitor.hpp"
|
||||
|
||||
int main(void) {
|
||||
using namespace perfect;
|
||||
init();
|
||||
|
||||
// write to stderr
|
||||
Monitor m(&std::cerr);
|
||||
|
||||
// don't record GPU utilization
|
||||
m.config.utilization = false;
|
||||
|
||||
m.start();
|
||||
|
||||
// ctrl-c to exit
|
||||
while (true) {
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(5000));
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
40
include/perfect/aslr.hpp
Normal file
40
include/perfect/aslr.hpp
Normal file
@@ -0,0 +1,40 @@
|
||||
#pragma once
|
||||
|
||||
#include <cerrno>
|
||||
#include <iostream>
|
||||
|
||||
#ifdef __linux__
|
||||
#include "detail/os/linux.hpp"
|
||||
#endif
|
||||
#include "init.hpp"
|
||||
#include "result.hpp"
|
||||
|
||||
namespace perfect {
|
||||
|
||||
struct AslrState {
|
||||
#ifdef __linux__
|
||||
unsigned long persona;
|
||||
#else
|
||||
#error "unsupported platform"
|
||||
#endif
|
||||
};
|
||||
|
||||
Result get_aslr(AslrState &state) {
|
||||
int persona;
|
||||
PERFECT_SUCCESS_OR_RETURN(detail::get_personality(persona));
|
||||
state.persona = persona;
|
||||
return Result::SUCCESS;
|
||||
}
|
||||
|
||||
Result set_aslr(const AslrState &state) {
|
||||
return detail::set_personality(state.persona);
|
||||
}
|
||||
|
||||
Result disable_aslr() {
|
||||
int persona;
|
||||
PERFECT_SUCCESS_OR_RETURN(detail::get_personality(persona));
|
||||
persona |= ADDR_NO_RANDOMIZE;
|
||||
return detail::set_personality(persona);
|
||||
}
|
||||
|
||||
} // namespace perfect
|
361
include/perfect/cpu_set.hpp
Normal file
361
include/perfect/cpu_set.hpp
Normal file
@@ -0,0 +1,361 @@
|
||||
#pragma once
|
||||
|
||||
#include <sys/mount.h>
|
||||
#include <sys/stat.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <set>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "init.hpp"
|
||||
#include "result.hpp"
|
||||
|
||||
#define SUCCESS_OR_RETURN(stmt) \
|
||||
{\
|
||||
Result _ret; \
|
||||
_ret = (stmt); \
|
||||
if (_ret != Result::SUCCESS) {\
|
||||
return _ret;\
|
||||
}\
|
||||
}
|
||||
|
||||
std::set<int> operator-(const std::set<int> &lhs, const std::set<int> &rhs) {
|
||||
std::set<int> result;
|
||||
for (auto e : lhs) {
|
||||
if (0 == rhs.count(e)) {
|
||||
result.insert(e);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string remove_space(const std::string &s) {
|
||||
std::string result;
|
||||
|
||||
for (auto c : s) {
|
||||
if (!isspace(c)) {
|
||||
result += c;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// like "8" or "8-10"
|
||||
std::set<int> parse_token(const std::string &token) {
|
||||
// std::cerr << "parse_token: parsing '" << s << "'\n";
|
||||
std::set<int> result;
|
||||
|
||||
std::string s = token;
|
||||
// ignore empty string
|
||||
if (s.empty()) {
|
||||
return result;
|
||||
}
|
||||
|
||||
// remove newline
|
||||
s = remove_space(s);
|
||||
|
||||
size_t pos = 0;
|
||||
|
||||
int first = std::stoi(s, &pos);
|
||||
// std::cerr << "parse_token: found '" << first << "'\n";
|
||||
|
||||
// single int
|
||||
if (pos == s.length()) {
|
||||
result.insert(first);
|
||||
return result;
|
||||
}
|
||||
|
||||
// next char should be a "-"
|
||||
assert(s[pos] == '-');
|
||||
|
||||
std::string rest = s.substr(pos + 1);
|
||||
int second = std::stoi(rest, &pos);
|
||||
// std::cerr << "parse_token: found '" << second << "'\n";
|
||||
|
||||
// insert first-second
|
||||
// std::cerr << "parse_token: range " << first << " to " << second << "\n";
|
||||
for (int i = first; i <= second; ++i) {
|
||||
result.insert(i);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
std::set<int> parse_cpuset(const std::string &s) {
|
||||
// std::cerr << "parse_cpuset: parsing '" << s << "'\n";
|
||||
std::set<int> result;
|
||||
|
||||
std::string token;
|
||||
std::stringstream ss(s);
|
||||
while (std::getline(ss, token, ',')) {
|
||||
|
||||
if ("\n" != token) {
|
||||
auto newCpus = parse_token(token);
|
||||
for (auto cpu : newCpus) {
|
||||
result.insert(cpu);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// http://man7.org/linux/man-pages/man7/cpuset.7.html
|
||||
namespace perfect {
|
||||
class CpuSet {
|
||||
public:
|
||||
std::string path_;
|
||||
std::set<int> cpus_;
|
||||
std::set<int> mems_;
|
||||
CpuSet *parent_;
|
||||
|
||||
// make sure cpuset is initialized
|
||||
static Result init() {
|
||||
|
||||
// check for "nodev cpuset" in /proc/filesystems
|
||||
|
||||
// mkdir /dev/cpuset
|
||||
if (mkdir("/dev/cpuset", S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)) {
|
||||
switch (errno) {
|
||||
case EEXIST: {
|
||||
// okay
|
||||
break;
|
||||
}
|
||||
case EACCES:
|
||||
// std::cerr << "access error in mkdir: " << strerror(errno) << "\n";
|
||||
return Result::NO_PERMISSION;
|
||||
case ENOENT:
|
||||
case EROFS:
|
||||
default:
|
||||
std::cerr << "unhandled error in mkdir: " << strerror(errno) << "\n";
|
||||
return Result::UNKNOWN;
|
||||
}
|
||||
|
||||
// mount -t cpuset none /dev/cpuset
|
||||
if (mount("none", "/dev/cpuset", "cpuset", 0, nullptr)) {
|
||||
switch (errno) {
|
||||
case EEXIST: {
|
||||
// okay
|
||||
break;
|
||||
}
|
||||
case EBUSY: {
|
||||
// FIXME: something is mounted here, assume it is what we want
|
||||
// std::cerr << "EBUSY in mount: " << strerror(errno) << "\n";
|
||||
return Result::SUCCESS;
|
||||
}
|
||||
case EPERM: {
|
||||
// std::cerr << "EPERM in mount: " << strerror(errno) << "\n";
|
||||
return Result::NO_PERMISSION;
|
||||
}
|
||||
case ENOENT:
|
||||
case EROFS:
|
||||
default:
|
||||
std::cerr << "unhandled error in mount: " << strerror(errno) << "\n";
|
||||
return Result::UNKNOWN;
|
||||
}
|
||||
}
|
||||
}
|
||||
return Result::SUCCESS;
|
||||
}
|
||||
|
||||
std::string get_raw_cpus() {
|
||||
std::ifstream is(path_ + "/cpuset.cpus");
|
||||
std::stringstream ss;
|
||||
ss << is.rdbuf();
|
||||
return remove_space(ss.str());
|
||||
}
|
||||
|
||||
std::string get_raw_mems() {
|
||||
std::ifstream is(path_ + "/cpuset.mems");
|
||||
std::stringstream ss;
|
||||
ss << is.rdbuf();
|
||||
return remove_space(ss.str());
|
||||
}
|
||||
|
||||
std::set<int> get_cpus() { return parse_cpuset(get_raw_cpus()); }
|
||||
|
||||
std::set<int> get_mems() { return parse_cpuset(get_raw_mems()); }
|
||||
|
||||
// migrate the caller task from this cpu set to another
|
||||
Result migrate_self_to(CpuSet &other) {
|
||||
// enable memory migration in other
|
||||
other.enable_memory_migration();
|
||||
|
||||
// get my pid
|
||||
pid_t self = this_task();
|
||||
|
||||
// read this tasks and write each line to other.tasks
|
||||
std::ifstream is(path_ + "/tasks");
|
||||
std::string line;
|
||||
while (std::getline(is, line)) {
|
||||
line = remove_space(line);
|
||||
if (std::to_string(self) == line) {
|
||||
// std::cerr << "migrating self task " << line << " to " << other.path
|
||||
// << "\n";
|
||||
other.write_task(line);
|
||||
return Result::SUCCESS;
|
||||
}
|
||||
}
|
||||
return Result::NO_TASK;
|
||||
}
|
||||
|
||||
// migrate tasks in this cpu set to another
|
||||
Result migrate_tasks_to(CpuSet &other) {
|
||||
// enable memory migration in other
|
||||
SUCCESS_OR_RETURN(other.enable_memory_migration());
|
||||
|
||||
// read this tasks and write each line to other.tasks
|
||||
std::ifstream is(path_ + "/tasks");
|
||||
std::string line;
|
||||
while (std::getline(is, line)) {
|
||||
// std::cerr << "migrating task " << line << " to " << other.path << "\n";
|
||||
other.write_task(line);
|
||||
}
|
||||
|
||||
return Result::SUCCESS;
|
||||
}
|
||||
|
||||
Result enable_memory_migration() {
|
||||
std::ofstream ofs(path_ + "/" + "cpuset.memory_migrate");
|
||||
ofs << "1";
|
||||
ofs.close();
|
||||
if (ofs.fail()) {
|
||||
switch (errno) {
|
||||
case EACCES:
|
||||
return Result::NO_PERMISSION;
|
||||
case ENOENT:
|
||||
return Result::NOT_SUPPORTED;
|
||||
default:
|
||||
return Result::UNKNOWN;
|
||||
}
|
||||
}
|
||||
return Result::SUCCESS;
|
||||
}
|
||||
|
||||
void write_task(const std::string &task) {
|
||||
// write `task` to path/tasks
|
||||
std::ofstream os(path_ + "/tasks");
|
||||
os << task << "\n";
|
||||
}
|
||||
|
||||
// object representing the root CPU set
|
||||
static Result get_root(CpuSet &root) {
|
||||
SUCCESS_OR_RETURN(CpuSet::init());
|
||||
root.path_ = "/dev/cpuset";
|
||||
root.parent_ = nullptr;
|
||||
return Result::SUCCESS;
|
||||
}
|
||||
|
||||
// the ID of this task
|
||||
static pid_t this_task() { return getpid(); }
|
||||
|
||||
Result make_child(CpuSet &child, const std::string &name) {
|
||||
|
||||
if (mkdir((path_ + "/" + name).c_str(),
|
||||
S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)) {
|
||||
switch (errno) {
|
||||
case EEXIST: {
|
||||
// okay
|
||||
break;
|
||||
}
|
||||
case EACCES:
|
||||
return Result::NO_PERMISSION;
|
||||
case ENOENT:
|
||||
case EROFS:
|
||||
default:
|
||||
return Result::UNKNOWN;
|
||||
}
|
||||
}
|
||||
|
||||
child.path_ = path_ + "/" + name;
|
||||
child.parent_ = this;
|
||||
return Result::SUCCESS;
|
||||
}
|
||||
|
||||
Result enable_cpu(const int cpu) {
|
||||
std::set<int> cpus = get_cpus();
|
||||
cpus.insert(cpu);
|
||||
return write_cpus(cpus);
|
||||
}
|
||||
|
||||
Result enable_cpus(const std::set<int> &cpus) {
|
||||
std::set<int> finalCpus = get_cpus();
|
||||
for (auto cpu : cpus) {
|
||||
finalCpus.insert(cpu);
|
||||
}
|
||||
return write_cpus(finalCpus);
|
||||
}
|
||||
|
||||
// FIXME: check error
|
||||
Result write_cpus(std::set<int> cpus) {
|
||||
std::ofstream os(path_ + "/cpuset.cpus");
|
||||
bool comma = false;
|
||||
for (auto cpu : cpus) {
|
||||
if (comma)
|
||||
os << ",";
|
||||
os << cpu << "-" << cpu;
|
||||
comma = true;
|
||||
}
|
||||
return Result::SUCCESS;
|
||||
}
|
||||
|
||||
// FIXME: check write
|
||||
Result write_mems(std::set<int> mems) {
|
||||
std::ofstream os(path_ + "/cpuset.mems");
|
||||
bool comma = false;
|
||||
for (auto mem : mems) {
|
||||
if (comma)
|
||||
os << ",";
|
||||
os << mem << "-" << mem;
|
||||
comma = true;
|
||||
}
|
||||
return Result::SUCCESS;
|
||||
}
|
||||
|
||||
Result enable_mem(const int mem) {
|
||||
std::set<int> mems = get_mems();
|
||||
mems.insert(mem);
|
||||
return write_mems(mems);
|
||||
}
|
||||
|
||||
Result enable_mems(const std::set<int> &mems) {
|
||||
std::set<int> finalMems = get_mems();
|
||||
for (auto mem : mems) {
|
||||
finalMems.insert(mem);
|
||||
}
|
||||
return write_mems(finalMems);
|
||||
}
|
||||
|
||||
Result destroy() {
|
||||
// remove all child cpu sets
|
||||
|
||||
// move all attached processes back to parent
|
||||
assert(parent_);
|
||||
migrate_tasks_to(*parent_);
|
||||
|
||||
// remove with rmdir
|
||||
if (rmdir(path_.c_str())) {
|
||||
switch (errno) {
|
||||
default:
|
||||
std::cerr << "unhandled error in rmdir: " << strerror(errno) << "\n";
|
||||
return Result::UNKNOWN;
|
||||
}
|
||||
}
|
||||
|
||||
path_ = "";
|
||||
return Result::SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
};
|
||||
|
||||
std::ostream &operator<<(std::ostream &s, const CpuSet &c) {
|
||||
s << c.path_;
|
||||
return s;
|
||||
}
|
||||
|
||||
} // namespace perfect
|
8
include/perfect/detail/cache/power.hpp
vendored
8
include/perfect/detail/cache/power.hpp
vendored
@@ -1,5 +1,8 @@
|
||||
#pragma once
|
||||
|
||||
namespace perfect {
|
||||
namespace detail {
|
||||
|
||||
inline void flush_line(void *p) {
|
||||
|
||||
/*
|
||||
@@ -32,4 +35,7 @@ inline void barrier_all() {
|
||||
: "memory");
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
51
include/perfect/detail/fs.hpp
Normal file
51
include/perfect/detail/fs.hpp
Normal file
@@ -0,0 +1,51 @@
|
||||
#pragma once
|
||||
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
|
||||
#include "../result.hpp"
|
||||
|
||||
#ifdef __linux__
|
||||
#include "fs/linux.hpp"
|
||||
#else
|
||||
#error "unsupported platform"
|
||||
#endif
|
||||
|
||||
namespace perfect {
|
||||
namespace detail {
|
||||
|
||||
Result write_str(const std::string &path, const std::string &val) {
|
||||
|
||||
if (!path_exists(path)) {
|
||||
std::cerr << "write_str(): does not exist: " << path << "\n";
|
||||
return Result::NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
std::ofstream ofs(path);
|
||||
if (ofs.fail()) {
|
||||
std::cerr << "failed to open " << path << "\n";
|
||||
return Result::NO_PERMISSION;
|
||||
}
|
||||
|
||||
ofs << val;
|
||||
ofs.close();
|
||||
if (ofs.fail()) {
|
||||
switch (errno) {
|
||||
case EACCES:
|
||||
std::cerr << "EACCES when writing to " << path << "\n";
|
||||
return Result::NO_PERMISSION;
|
||||
case EPERM:
|
||||
std::cerr << "EPERM when writing to " << path << "\n";
|
||||
return Result::NO_PERMISSION;
|
||||
case ENOENT:
|
||||
std::cerr << "ENOENT when writing to " << path << "\n";
|
||||
return Result::NOT_SUPPORTED;
|
||||
default:
|
||||
return Result::UNKNOWN;
|
||||
}
|
||||
}
|
||||
return Result::SUCCESS;
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace perfect
|
31
include/perfect/detail/fs/linux.hpp
Normal file
31
include/perfect/detail/fs/linux.hpp
Normal file
@@ -0,0 +1,31 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <cerrno>
|
||||
#include <iostream>
|
||||
#include <cassert>
|
||||
|
||||
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <unistd.h>
|
||||
|
||||
namespace perfect {
|
||||
namespace detail {
|
||||
bool path_exists(const std::string &path) {
|
||||
struct stat sb;
|
||||
if (stat(path.c_str(), &sb)) {
|
||||
switch (errno) {
|
||||
case ENOENT: return false;
|
||||
case ENOTDIR: return false;
|
||||
default: {
|
||||
std::cerr << "unhandled error in stat() for " << path << "\n";
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
@@ -12,6 +12,7 @@
|
||||
#include <sched.h>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/personality.h>
|
||||
|
||||
#include "perfect/result.hpp"
|
||||
|
||||
@@ -88,4 +89,24 @@ size_t cache_linesize() {
|
||||
#endif
|
||||
}
|
||||
|
||||
namespace detail {
|
||||
Result get_personality(int &persona) {
|
||||
int ret = personality(0xffffffff);
|
||||
if (-1 == ret) {
|
||||
return Result::UNKNOWN;
|
||||
} else {
|
||||
persona = ret;
|
||||
}
|
||||
return Result::SUCCESS;
|
||||
}
|
||||
|
||||
Result set_personality(const int persona) {
|
||||
int ret = personality(persona);
|
||||
if (-1 == ret) {
|
||||
return Result::UNKNOWN;
|
||||
}
|
||||
return Result::SUCCESS;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace perfect
|
@@ -4,6 +4,7 @@
|
||||
#include <fstream>
|
||||
|
||||
#include "perfect/result.hpp"
|
||||
#include "perfect/detail/fs.hpp"
|
||||
|
||||
namespace perfect {
|
||||
namespace detail {
|
||||
@@ -15,19 +16,12 @@ bool has_intel_pstate_no_turbo() {
|
||||
Result write_intel_pstate_no_turbo(const std::string &s) {
|
||||
assert(has_intel_pstate_no_turbo());
|
||||
std::string path("/sys/devices/system/cpu/intel_pstate/no_turbo");
|
||||
std::ofstream ofs(path, std::ofstream::out);
|
||||
ofs << s;
|
||||
ofs.close();
|
||||
if (ofs.fail()) {
|
||||
return Result::NO_PERMISSION;
|
||||
}
|
||||
return Result::SUCCESS;
|
||||
return write_str(path, s);
|
||||
}
|
||||
|
||||
std::string read_intel_pstate_no_turbo() {
|
||||
assert(has_intel_pstate_no_turbo());
|
||||
std::string path("/sys/devices/system/cpu/intel_pstate/no_turbo");
|
||||
// SPDLOG_LOGGER_TRACE(logger::console(), "reading {}", path);
|
||||
std::ifstream ifs(path, std::ifstream::in);
|
||||
std::string result;
|
||||
std::getline(ifs, result);
|
||||
|
@@ -1,31 +1,24 @@
|
||||
#pragma once
|
||||
|
||||
#include "perfect/result.hpp"
|
||||
#include "perfect/detail/fs.hpp"
|
||||
|
||||
namespace perfect {
|
||||
namespace detail {
|
||||
|
||||
bool has_acpi_cpufreq_boost() {
|
||||
return bool(std::ifstream("/sys/devices/system/cpu/cpufreq/boost"));
|
||||
}
|
||||
|
||||
int write_acpi_cpufreq_boost(const std::string &s) {
|
||||
Result write_acpi_cpufreq_boost(const std::string &s) {
|
||||
assert(has_acpi_cpufreq_boost());
|
||||
std::string path("/sys/devices/system/cpu/cpufreq/boost");
|
||||
SPDLOG_LOGGER_TRACE(logger::console(), "writing to {}", path);
|
||||
std::ofstream ofs(path, std::ofstream::out);
|
||||
ofs << s;
|
||||
ofs.close();
|
||||
if (ofs.fail()) {
|
||||
SPDLOG_LOGGER_TRACE(logger::console(), "error writing to {}", path);
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
return write_str(path, s);
|
||||
}
|
||||
|
||||
std::string read_acpi_cpufeq_boost() {
|
||||
assert(has_acpi_cpufreq_boost());
|
||||
std::string path("/sys/devices/system/cpu/cpufreq/boost");
|
||||
SPDLOG_LOGGER_TRACE(logger::console(), "reading {}", path);
|
||||
std::ifstream ifs(path, std::ifstream::in);
|
||||
std::string result;
|
||||
std::getline(ifs, result);
|
||||
@@ -37,11 +30,12 @@ std::string read_acpi_cpufeq_boost() {
|
||||
}
|
||||
|
||||
Result disable_cpu_turbo() {
|
||||
write_acpi_cpufeq_boost("0");
|
||||
return write_acpi_cpufreq_boost("0");
|
||||
}
|
||||
|
||||
Result enable_cpu_turbo() {
|
||||
write_acpi_cpufeq_boost("1");
|
||||
return write_acpi_cpufreq_boost("1");
|
||||
}
|
||||
|
||||
}
|
||||
} // namespace detail
|
||||
} // namespace perfect
|
||||
|
43
include/perfect/drop_caches.hpp
Normal file
43
include/perfect/drop_caches.hpp
Normal file
@@ -0,0 +1,43 @@
|
||||
#pragma once
|
||||
|
||||
#include <unistd.h>
|
||||
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
|
||||
#include "result.hpp"
|
||||
#include "init.hpp"
|
||||
#include "detail/fs.hpp"
|
||||
|
||||
namespace perfect {
|
||||
|
||||
enum DropCaches_t {
|
||||
PAGECACHE = 0x1,
|
||||
ENTRIES = 0x2
|
||||
};
|
||||
|
||||
|
||||
// commit filesystem caches to disk
|
||||
Result sync() {
|
||||
// http://man7.org/linux/man-pages/man2/sync.2.html
|
||||
::sync(); // always successful
|
||||
return Result::SUCCESS;
|
||||
}
|
||||
|
||||
Result drop_caches(const DropCaches_t mode) {
|
||||
using detail::write_str;
|
||||
const std::string path = "/proc/sys/vm/drop_caches";
|
||||
if (mode & PAGECACHE & ENTRIES) {
|
||||
PERFECT_SUCCESS_OR_RETURN(write_str(path, "3"));
|
||||
} else if (mode & PAGECACHE) {
|
||||
PERFECT_SUCCESS_OR_RETURN(write_str(path, "1"));
|
||||
} else if (mode & ENTRIES) {
|
||||
PERFECT_SUCCESS_OR_RETURN(write_str(path, "2"));
|
||||
} else {
|
||||
std::cerr << "unexpected mode: " << mode << "\n";
|
||||
return Result::UNKNOWN;
|
||||
}
|
||||
return Result::SUCCESS;
|
||||
}
|
||||
|
||||
}
|
147
include/perfect/gpu_monitor.hpp
Normal file
147
include/perfect/gpu_monitor.hpp
Normal file
@@ -0,0 +1,147 @@
|
||||
#pragma once
|
||||
|
||||
#ifdef __NVCC__
|
||||
#ifndef PERFECT_HAS_CUDA
|
||||
#define PERFECT_HAS_CUDA
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef PERFECT_HAS_CUDA
|
||||
#include <nvml.h>
|
||||
#endif
|
||||
|
||||
#include <atomic>
|
||||
#include <chrono>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include "perfect/init.hpp"
|
||||
|
||||
namespace perfect {
|
||||
|
||||
class Monitor {
|
||||
public:
|
||||
struct Config {
|
||||
std::atomic<bool> stop;
|
||||
std::atomic<bool> pause;
|
||||
double samplePeriodMs;
|
||||
|
||||
std::atomic<bool> power;
|
||||
std::atomic<bool> utilization;
|
||||
std::atomic<bool> temperature;
|
||||
std::atomic<bool> pstate;
|
||||
std::ostream *stream_;
|
||||
|
||||
Config(std::ostream *stream)
|
||||
: stop(true), pause(false), power(true), samplePeriodMs(100),
|
||||
utilization(true), temperature(true), pstate(true), stream_(stream) {}
|
||||
};
|
||||
|
||||
std::thread worker;
|
||||
Config config;
|
||||
|
||||
Monitor(std::ostream *stream) : config(stream) {}
|
||||
|
||||
static void worker_func(const Config &cfg) {
|
||||
|
||||
nvmlReturn_t ret;
|
||||
nvmlDevice_t device;
|
||||
unsigned int deviceCount;
|
||||
|
||||
nvmlUtilization_t utilization;
|
||||
unsigned int milliwatts;
|
||||
unsigned int temperature;
|
||||
nvmlPstates_t pState;
|
||||
|
||||
ret = nvmlDeviceGetCount(&deviceCount);
|
||||
|
||||
std::chrono::time_point<std::chrono::system_clock> start;
|
||||
|
||||
while (!cfg.stop.load()) {
|
||||
if (std::chrono::time_point<std::chrono::system_clock>() == start) {
|
||||
start = std::chrono::system_clock::now();
|
||||
}
|
||||
if (!cfg.pause.load()) {
|
||||
|
||||
const double elapsed =
|
||||
(std::chrono::system_clock::now() - start).count() / 1e9 * 1e3;
|
||||
|
||||
for (unsigned int i = 0; i < deviceCount; ++i) {
|
||||
|
||||
(*cfg.stream_) << elapsed << "," << i;
|
||||
|
||||
ret = nvmlDeviceGetHandleByIndex(i, &device);
|
||||
|
||||
if (cfg.power.load()) {
|
||||
ret = nvmlDeviceGetPowerUsage(device, &milliwatts);
|
||||
if (ret == NVML_SUCCESS) {
|
||||
(*cfg.stream_) << "," << milliwatts;
|
||||
} else {
|
||||
(*cfg.stream_) << "," << -1;
|
||||
}
|
||||
} else {
|
||||
(*cfg.stream_) << ","
|
||||
<< "x";
|
||||
}
|
||||
if (cfg.utilization.load()) {
|
||||
// period is between 1 second and 1/6 second depending on product
|
||||
ret = nvmlDeviceGetUtilizationRates(device, &utilization);
|
||||
if (ret == NVML_SUCCESS) {
|
||||
(*cfg.stream_)
|
||||
<< "," << utilization.gpu << "," << utilization.memory;
|
||||
} else {
|
||||
(*cfg.stream_) << "," << -1 << "," << -1;
|
||||
}
|
||||
} else {
|
||||
(*cfg.stream_) << ","
|
||||
<< "x"
|
||||
<< ","
|
||||
<< "x";
|
||||
}
|
||||
if (cfg.temperature.load()) {
|
||||
ret = nvmlDeviceGetTemperature(device, NVML_TEMPERATURE_GPU,
|
||||
&temperature);
|
||||
if (ret == NVML_SUCCESS) {
|
||||
(*cfg.stream_) << "," << temperature;
|
||||
} else {
|
||||
(*cfg.stream_) << "," << -1;
|
||||
}
|
||||
} else {
|
||||
(*cfg.stream_) << ","
|
||||
<< "x";
|
||||
}
|
||||
if (cfg.pstate.load()) {
|
||||
ret = nvmlDeviceGetPerformanceState(device, &pState);
|
||||
if (ret == NVML_SUCCESS) {
|
||||
(*cfg.stream_) << "," << pState;
|
||||
} else {
|
||||
(*cfg.stream_) << "," << -1;
|
||||
}
|
||||
} else {
|
||||
(*cfg.stream_) << ","
|
||||
<< "x";
|
||||
}
|
||||
|
||||
(*cfg.stream_) << "\n";
|
||||
}
|
||||
}
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(100));
|
||||
}
|
||||
}
|
||||
|
||||
void start() {
|
||||
config.stop = false;
|
||||
worker = std::thread(worker_func, std::ref(config));
|
||||
}
|
||||
void stop() {
|
||||
config.stop = true;
|
||||
worker.join();
|
||||
}
|
||||
|
||||
void resume() { config.pause.store(false); }
|
||||
void pause() { config.pause.store(true); }
|
||||
};
|
||||
|
||||
} // namespace perfect
|
@@ -1,13 +1,17 @@
|
||||
#pragma once
|
||||
|
||||
#ifdef __NVCC__
|
||||
#ifndef PERFECT_HAS_CUDA
|
||||
#define PERFECT_HAS_CUDA
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef PERFECT_HAS_CUDA
|
||||
#include <nvml.h>
|
||||
#endif
|
||||
|
||||
#include "perfect/result.hpp"
|
||||
|
||||
namespace perfect {
|
||||
|
||||
/*! initialize the benchmark
|
||||
|
@@ -40,6 +40,14 @@ Result os_perf_state_maximum(const int cpu) {
|
||||
#endif
|
||||
}
|
||||
|
||||
Result os_perf_state_minimum(const int cpu) {
|
||||
#ifdef __linux__
|
||||
return set_governor(cpu, "powersave");
|
||||
#else
|
||||
#error "unsupported platform"
|
||||
#endif
|
||||
}
|
||||
|
||||
Result set_os_perf_state(const int cpu, OsPerfState state) {
|
||||
#ifdef __linux__
|
||||
return set_governor(cpu, state.governor);
|
||||
|
@@ -3,8 +3,10 @@
|
||||
#include <cassert>
|
||||
|
||||
#ifdef __NVCC__
|
||||
#ifndef PERFECT_HAS_CUDA
|
||||
#define PERFECT_HAS_CUDA
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef PERFECT_HAS_CUDA
|
||||
#include <nvml.h>
|
||||
@@ -55,6 +57,8 @@ const char *get_string(const Result &result) {
|
||||
return "nvidia-ml returned not supported";
|
||||
case Result::NVML_NO_PERMISSION:
|
||||
return "nvidia-ml returned no permission";
|
||||
case Result::NVML_UNINITIALIZED:
|
||||
return "nvidia-ml returned uninitialized";
|
||||
case Result::NOT_SUPPORTED:
|
||||
return "unsupported operation";
|
||||
default:
|
||||
@@ -76,3 +80,12 @@ inline void check(Result result, const char *file, const int line) {
|
||||
} // namespace perfect
|
||||
|
||||
#define PERFECT(stmt) check(stmt, __FILE__, __LINE__);
|
||||
|
||||
#define PERFECT_SUCCESS_OR_RETURN(stmt) \
|
||||
{\
|
||||
Result _ret; \
|
||||
_ret = (stmt); \
|
||||
if (_ret != Result::SUCCESS) {\
|
||||
return _ret;\
|
||||
}\
|
||||
}
|
||||
|
@@ -34,6 +34,31 @@ set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} \
|
||||
-Wfatal-errors\
|
||||
")
|
||||
|
||||
add_executable(enable-turbo enable_turbo.cpp)
|
||||
target_link_libraries(enable-turbo perfect)
|
||||
add_executable(enable-cpu-turbo enable_cpu_turbo.cpp)
|
||||
target_link_libraries(enable-cpu-turbo perfect)
|
||||
|
||||
add_executable(disable-cpu-turbo disable_cpu_turbo.cpp)
|
||||
target_link_libraries(disable-cpu-turbo perfect)
|
||||
|
||||
add_executable(sync-drop-caches sync_drop_caches.cpp)
|
||||
target_link_libraries(sync-drop-caches perfect)
|
||||
|
||||
add_executable(no-aslr no_aslr.cpp)
|
||||
target_link_libraries(no-aslr perfect)
|
||||
|
||||
add_executable(max-os-perf max_os_perf.cpp)
|
||||
target_link_libraries(max-os-perf perfect)
|
||||
|
||||
add_executable(min-os-perf min_os_perf.cpp)
|
||||
target_link_libraries(min-os-perf perfect)
|
||||
|
||||
## OpenMP
|
||||
find_package(OpenMP)
|
||||
if (OpenMP_FOUND)
|
||||
add_executable(stress stress.cpp)
|
||||
target_link_libraries(stress perfect)
|
||||
target_link_libraries(stress OpenMP::OpenMP_CXX)
|
||||
else(OpenMP_FOUND)
|
||||
message(WARNING "didn't find OpenMP, some benchmarks will be unavailable.")
|
||||
endif(OpenMP_FOUND)
|
||||
|
||||
|
23
tools/disable_cpu_turbo.cpp
Normal file
23
tools/disable_cpu_turbo.cpp
Normal file
@@ -0,0 +1,23 @@
|
||||
#include <iostream>
|
||||
|
||||
#include "perfect/cpu_turbo.hpp"
|
||||
|
||||
using namespace perfect;
|
||||
|
||||
int main(void) {
|
||||
|
||||
CpuTurboState state;
|
||||
|
||||
perfect::init();
|
||||
|
||||
PERFECT(get_cpu_turbo_state(&state));
|
||||
|
||||
if (!is_turbo_enabled(state)) {
|
||||
std::cerr << "cpu turbo already disabled\n";
|
||||
exit(EXIT_SUCCESS);
|
||||
} else {
|
||||
PERFECT(disable_cpu_turbo());
|
||||
std::cerr << "disabled cpu turbo\n";
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
}
|
@@ -6,7 +6,6 @@ using namespace perfect;
|
||||
|
||||
int main(void) {
|
||||
|
||||
Result ret;
|
||||
CpuTurboState state;
|
||||
|
||||
perfect::init();
|
9
tools/max_os_perf.cpp
Normal file
9
tools/max_os_perf.cpp
Normal file
@@ -0,0 +1,9 @@
|
||||
#include "perfect/os_perf.hpp"
|
||||
|
||||
int main(void) {
|
||||
PERFECT(perfect::init());
|
||||
|
||||
for (auto cpu : perfect::cpus()) {
|
||||
PERFECT(perfect::os_perf_state_maximum(cpu));
|
||||
}
|
||||
}
|
9
tools/min_os_perf.cpp
Normal file
9
tools/min_os_perf.cpp
Normal file
@@ -0,0 +1,9 @@
|
||||
#include "perfect/os_perf.hpp"
|
||||
|
||||
int main(void) {
|
||||
PERFECT(perfect::init());
|
||||
|
||||
for (auto cpu : perfect::cpus()) {
|
||||
PERFECT(perfect::os_perf_state_minimum(cpu));
|
||||
}
|
||||
}
|
77
tools/no_aslr.cpp
Normal file
77
tools/no_aslr.cpp
Normal file
@@ -0,0 +1,77 @@
|
||||
#include <iostream>
|
||||
|
||||
#include <errno.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/wait.h>
|
||||
#include <unistd.h>
|
||||
#include <cstring>
|
||||
#include <vector>
|
||||
|
||||
#include "perfect/aslr.hpp"
|
||||
|
||||
using namespace perfect;
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
|
||||
using namespace perfect;
|
||||
|
||||
PERFECT(init());
|
||||
|
||||
pid_t pid;
|
||||
int status;
|
||||
pid = fork();
|
||||
if (pid == -1) {
|
||||
// pid == -1 means error occured
|
||||
std::cerr << "can't fork, error occured\n";
|
||||
exit(EXIT_FAILURE);
|
||||
} else if (pid == 0) {
|
||||
// in the child process
|
||||
|
||||
// skip the first argument, which is this program
|
||||
std::vector<char*> args;
|
||||
for (int i = 1; i < argc; ++i) {
|
||||
args.push_back(argv[i]);
|
||||
}
|
||||
assert(args.size() > 0);
|
||||
args.push_back(nullptr);
|
||||
|
||||
PERFECT(disable_aslr());
|
||||
|
||||
// the execv() only return if error occured.
|
||||
// The return value is -1
|
||||
return execvp(args[0], args.data());
|
||||
} else {
|
||||
// parent process
|
||||
|
||||
if (waitpid(pid, &status, 0) > 0) {
|
||||
|
||||
if (WIFEXITED(status) && !WEXITSTATUS(status)) {
|
||||
// success
|
||||
exit(status);
|
||||
}
|
||||
|
||||
else if (WIFEXITED(status) && WEXITSTATUS(status)) {
|
||||
if (WEXITSTATUS(status) == 127) {
|
||||
|
||||
// execv failed
|
||||
std::cerr << "execv failed\n";
|
||||
exit(status);
|
||||
} else {
|
||||
std::cerr << "program terminated normally, but returned a non-zero status\n";
|
||||
exit(status);
|
||||
}
|
||||
} else {
|
||||
printf("program didn't terminate normally\n");
|
||||
exit(status);
|
||||
}
|
||||
} else {
|
||||
// waitpid() failed
|
||||
printf("waitpid() failed\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
exit(0);
|
||||
}
|
||||
return 0;
|
||||
}
|
49
tools/stress.cpp
Normal file
49
tools/stress.cpp
Normal file
@@ -0,0 +1,49 @@
|
||||
#include <iostream>
|
||||
|
||||
#include <cmath>
|
||||
|
||||
#include <omp.h>
|
||||
#include <cstring>
|
||||
#include <chrono>
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
|
||||
size_t numThreads = std::stoi(argv[1]);
|
||||
std::vector<size_t> totals(numThreads, 0);
|
||||
omp_set_num_threads(numThreads);
|
||||
|
||||
auto start = std::chrono::system_clock::now();
|
||||
double time = std::stod(argv[2]);
|
||||
|
||||
#pragma omp parallel
|
||||
{
|
||||
size_t tid = omp_get_thread_num();
|
||||
double a = rand();
|
||||
while (true) {
|
||||
for (size_t i = 0; i < 500; ++i) {
|
||||
double x;
|
||||
asm volatile(""::"r"(a));
|
||||
x = sqrt(a);
|
||||
asm volatile(""::"r"(x));
|
||||
|
||||
asm volatile(""::"r"(a));
|
||||
x = sqrt(a);
|
||||
asm volatile(""::"r"(x));
|
||||
}
|
||||
totals[tid] += 1000;
|
||||
auto elapsed = (std::chrono::system_clock::now() - start).count() / 1e9;
|
||||
if (elapsed > time) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
size_t sum = 0;
|
||||
for (auto t : totals) {
|
||||
sum += t;
|
||||
}
|
||||
std::cout << (double)sum / time << "\n";
|
||||
|
||||
};
|
14
tools/sync_drop_caches.cpp
Normal file
14
tools/sync_drop_caches.cpp
Normal file
@@ -0,0 +1,14 @@
|
||||
#include <iostream>
|
||||
|
||||
#include "perfect/drop_caches.hpp"
|
||||
|
||||
using namespace perfect;
|
||||
|
||||
int main(void) {
|
||||
|
||||
using namespace perfect;
|
||||
|
||||
PERFECT(init());
|
||||
PERFECT(perfect::sync());
|
||||
PERFECT(drop_caches(DropCaches_t(PAGECACHE | ENTRIES)));
|
||||
}
|
Reference in New Issue
Block a user