Compare commits
9 Commits
Author | SHA1 | Date | |
---|---|---|---|
![]() |
3da56a3a4a | ||
![]() |
196c8a20b1 | ||
![]() |
d6c861719f | ||
![]() |
7f1b1289bb | ||
![]() |
1c917c3154 | ||
![]() |
b8aca1be1c | ||
![]() |
77b23ac3af | ||
![]() |
8da5aeb754 | ||
![]() |
714f7c3dcd |
@@ -5,7 +5,7 @@
|
||||
# 3.13+ for target_link_directories
|
||||
cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
|
||||
|
||||
project(perfect LANGUAGES CXX VERSION 0.1.0)
|
||||
project(perfect LANGUAGES CXX VERSION 0.3.0)
|
||||
message(STATUS "Build type: " ${CMAKE_BUILD_TYPE})
|
||||
|
||||
include(CheckLanguage)
|
||||
|
72
README.md
72
README.md
@@ -11,12 +11,14 @@ CPU/GPU performance control library for benchmarking
|
||||
|
||||
## Features
|
||||
|
||||
- [x] GPU power/utilization/temperature monitoring (nvidia)
|
||||
- [x] Disable CPU turbo (linux)
|
||||
- [x] Set OS CPU performance mode to maximum (linux)
|
||||
- [x] Set GPU clocks (nvidia)
|
||||
- [x] Disable GPU turbo (nvidia)
|
||||
- [x] Flush addresses from cache (amd64, POWER)
|
||||
- [x] CUDA not required (GPU functions will not be compiled)
|
||||
- [x] Flush file system caches (linux)
|
||||
|
||||
## Installing
|
||||
|
||||
@@ -67,11 +69,43 @@ perfect::CpuTurboState state;
|
||||
PERFECT(perfect::get_cpu_turbo_state(&state));
|
||||
```
|
||||
|
||||
## Monitoring
|
||||
|
||||
`perfect` can monitor and record GPU activity.
|
||||
|
||||
See [examples/gpu_monitor.cu](examples/gpu_monitor.cu)
|
||||
|
||||
```c++
|
||||
#include "perfect/gpu_monitor.hpp"
|
||||
```
|
||||
|
||||
* `Monitor(std::ostream *stream)`: create a monitor that will write to `stream`.
|
||||
* `void Monitor::start()`: start the monitor
|
||||
* `void Monitor::stop()`: terminate the monitor
|
||||
* `void Monitor::pause()`: pause the monitor thread
|
||||
* `void Monitor::resume()`: resume the monitor thread
|
||||
|
||||
### Flush file system caches
|
||||
|
||||
`perfect` can drop various filesystem caches
|
||||
|
||||
See [tools/sync_drop_caches.cpp](tools/sync_drop_caches.cpp)
|
||||
|
||||
```c++
|
||||
#include "perfect/drop_caches.hpp"
|
||||
```
|
||||
|
||||
* `Result sync()`: flush filesystem caches to disk
|
||||
* `Result drop_caches(DropCaches_t mode)`: remove file system caches
|
||||
* `mode = PAGECACHE`: drop page caches
|
||||
* `mode = ENTRIES`: drop dentries and inodes
|
||||
* `mode = PAGECACHE | ENTRIES`: both
|
||||
|
||||
### CPU Turbo
|
||||
|
||||
`perfect` can enable and disable CPU boost through the Intel p-state mechanism or the ACPI cpufreq mechanism.
|
||||
|
||||
See [examples/cpu_turbo.cpp].
|
||||
See [examples/cpu_turbo.cpp](examples/cpu_turbo.cpp).
|
||||
|
||||
|
||||
```c++
|
||||
@@ -88,7 +122,7 @@ See [examples/cpu_turbo.cpp].
|
||||
|
||||
`perfect` can control the OS governor on linux.
|
||||
|
||||
See [examples/os_perf.cpp].
|
||||
See [examples/os_perf.cpp](examples/os_perf.cpp).
|
||||
|
||||
```c++
|
||||
#include "perfect/os_perf.hpp"
|
||||
@@ -102,7 +136,7 @@ See [examples/os_perf.cpp].
|
||||
|
||||
`perfect` can enable/disable GPU turbo boost.
|
||||
|
||||
See [examples/gpu_turbo.cu]
|
||||
See [examples/gpu_turbo.cu](examples/gpu_turbo.cu).
|
||||
|
||||
```c++
|
||||
#include "perfect/gpu_turbo.hpp"
|
||||
@@ -118,10 +152,10 @@ See [examples/gpu_turbo.cu]
|
||||
|
||||
`perfect` can lock GPU clocks to their maximum values.
|
||||
|
||||
See [examples/gpu_clocks.cu]
|
||||
See [examples/gpu_clocks.cu](examples/gpu_clocks.cu).
|
||||
|
||||
```c++
|
||||
#include "perfect/gpu_clocks.hpp`
|
||||
#include "perfect/gpu_clocks.hpp"
|
||||
```
|
||||
|
||||
* `Result set_max_gpu_clocks(unsigned int idx)`: Set GPU `idx` clocks to their maximum reported values.
|
||||
@@ -131,16 +165,34 @@ See [examples/gpu_clocks.cu]
|
||||
|
||||
`perfect` can flush data from CPU caches. Unlike the other APIs, these do not return a `Result` because they do not fail.
|
||||
|
||||
See [examples/cpu_cache.cpp].
|
||||
See [examples/cpu_cache.cpp](examples/cpu_cache.cpp).
|
||||
|
||||
```c++
|
||||
#include "perfect/cpu_cache.hpp`
|
||||
#include "perfect/cpu_cache.hpp"
|
||||
```
|
||||
|
||||
* `void flush_all(void *p, const size_t n)`: Flush all cache lines starting at `p` for `n` bytes.
|
||||
|
||||
## Changelog
|
||||
|
||||
* v0.3.0
|
||||
* Add filesystem cache interface
|
||||
* v0.2.0
|
||||
* add GPU monitoring
|
||||
* Make CUDA optional
|
||||
* v0.1.0
|
||||
* cache control
|
||||
* Intel P-State control
|
||||
* linux governor control
|
||||
* POWER cpufreq control
|
||||
* Nvidia GPU boost control
|
||||
* Nvidia GPU clock control
|
||||
|
||||
## Wish List
|
||||
|
||||
|
||||
- [ ] Nvidia GPU power monitoring
|
||||
- [ ] Nivida GPU utilization monitoring
|
||||
- [ ] only monitor certain GPUs
|
||||
- [ ] A wrapper utility
|
||||
- [ ] disable hyperthreading
|
||||
- [ ] reserve cores
|
||||
- [ ] set process priority
|
||||
- [ ] disable ASLR
|
@@ -51,4 +51,9 @@ endif()
|
||||
if(CMAKE_CUDA_COMPILER)
|
||||
add_executable(gpu-turbo gpu_turbo.cu)
|
||||
target_link_libraries(gpu-turbo perfect)
|
||||
endif()
|
||||
|
||||
if(CMAKE_CUDA_COMPILER)
|
||||
add_executable(gpu-monitor gpu_monitor.cu)
|
||||
target_link_libraries(gpu-monitor perfect)
|
||||
endif()
|
25
examples/gpu_monitor.cu
Normal file
25
examples/gpu_monitor.cu
Normal file
@@ -0,0 +1,25 @@
|
||||
#include <chrono>
|
||||
#include <iostream>
|
||||
#include <thread>
|
||||
|
||||
#include "perfect/gpu_monitor.hpp"
|
||||
|
||||
int main(void) {
|
||||
using namespace perfect;
|
||||
init();
|
||||
|
||||
// write to stderr
|
||||
Monitor m(&std::cerr);
|
||||
|
||||
// don't record GPU utilization
|
||||
m.config.utilization = false;
|
||||
|
||||
m.start();
|
||||
|
||||
// ctrl-c to exit
|
||||
while (true) {
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(5000));
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
361
include/perfect/cpu_set.hpp
Normal file
361
include/perfect/cpu_set.hpp
Normal file
@@ -0,0 +1,361 @@
|
||||
#pragma once
|
||||
|
||||
#include <sys/mount.h>
|
||||
#include <sys/stat.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <set>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "init.hpp"
|
||||
#include "result.hpp"
|
||||
|
||||
#define SUCCESS_OR_RETURN(stmt) \
|
||||
{\
|
||||
Result _ret; \
|
||||
_ret = (stmt); \
|
||||
if (_ret != Result::SUCCESS) {\
|
||||
return _ret;\
|
||||
}\
|
||||
}
|
||||
|
||||
std::set<int> operator-(const std::set<int> &lhs, const std::set<int> &rhs) {
|
||||
std::set<int> result;
|
||||
for (auto e : lhs) {
|
||||
if (0 == rhs.count(e)) {
|
||||
result.insert(e);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string remove_space(const std::string &s) {
|
||||
std::string result;
|
||||
|
||||
for (auto c : s) {
|
||||
if (!isspace(c)) {
|
||||
result += c;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// like "8" or "8-10"
|
||||
std::set<int> parse_token(const std::string &token) {
|
||||
// std::cerr << "parse_token: parsing '" << s << "'\n";
|
||||
std::set<int> result;
|
||||
|
||||
std::string s = token;
|
||||
// ignore empty string
|
||||
if (s.empty()) {
|
||||
return result;
|
||||
}
|
||||
|
||||
// remove newline
|
||||
s = remove_space(s);
|
||||
|
||||
size_t pos = 0;
|
||||
|
||||
int first = std::stoi(s, &pos);
|
||||
// std::cerr << "parse_token: found '" << first << "'\n";
|
||||
|
||||
// single int
|
||||
if (pos == s.length()) {
|
||||
result.insert(first);
|
||||
return result;
|
||||
}
|
||||
|
||||
// next char should be a "-"
|
||||
assert(s[pos] == '-');
|
||||
|
||||
std::string rest = s.substr(pos + 1);
|
||||
int second = std::stoi(rest, &pos);
|
||||
// std::cerr << "parse_token: found '" << second << "'\n";
|
||||
|
||||
// insert first-second
|
||||
// std::cerr << "parse_token: range " << first << " to " << second << "\n";
|
||||
for (int i = first; i <= second; ++i) {
|
||||
result.insert(i);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
std::set<int> parse_cpuset(const std::string &s) {
|
||||
// std::cerr << "parse_cpuset: parsing '" << s << "'\n";
|
||||
std::set<int> result;
|
||||
|
||||
std::string token;
|
||||
std::stringstream ss(s);
|
||||
while (std::getline(ss, token, ',')) {
|
||||
|
||||
if ("\n" != token) {
|
||||
auto newCpus = parse_token(token);
|
||||
for (auto cpu : newCpus) {
|
||||
result.insert(cpu);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// http://man7.org/linux/man-pages/man7/cpuset.7.html
|
||||
namespace perfect {
|
||||
class CpuSet {
|
||||
public:
|
||||
std::string path_;
|
||||
std::set<int> cpus_;
|
||||
std::set<int> mems_;
|
||||
CpuSet *parent_;
|
||||
|
||||
// make sure cpuset is initialized
|
||||
static Result init() {
|
||||
|
||||
// check for "nodev cpuset" in /proc/filesystems
|
||||
|
||||
// mkdir /dev/cpuset
|
||||
if (mkdir("/dev/cpuset", S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)) {
|
||||
switch (errno) {
|
||||
case EEXIST: {
|
||||
// okay
|
||||
break;
|
||||
}
|
||||
case EACCES:
|
||||
// std::cerr << "access error in mkdir: " << strerror(errno) << "\n";
|
||||
return Result::NO_PERMISSION;
|
||||
case ENOENT:
|
||||
case EROFS:
|
||||
default:
|
||||
std::cerr << "unhandled error in mkdir: " << strerror(errno) << "\n";
|
||||
return Result::UNKNOWN;
|
||||
}
|
||||
|
||||
// mount -t cpuset none /dev/cpuset
|
||||
if (mount("none", "/dev/cpuset", "cpuset", 0, nullptr)) {
|
||||
switch (errno) {
|
||||
case EEXIST: {
|
||||
// okay
|
||||
break;
|
||||
}
|
||||
case EBUSY: {
|
||||
// FIXME: something is mounted here, assume it is what we want
|
||||
// std::cerr << "EBUSY in mount: " << strerror(errno) << "\n";
|
||||
return Result::SUCCESS;
|
||||
}
|
||||
case EPERM: {
|
||||
// std::cerr << "EPERM in mount: " << strerror(errno) << "\n";
|
||||
return Result::NO_PERMISSION;
|
||||
}
|
||||
case ENOENT:
|
||||
case EROFS:
|
||||
default:
|
||||
std::cerr << "unhandled error in mount: " << strerror(errno) << "\n";
|
||||
return Result::UNKNOWN;
|
||||
}
|
||||
}
|
||||
}
|
||||
return Result::SUCCESS;
|
||||
}
|
||||
|
||||
std::string get_raw_cpus() {
|
||||
std::ifstream is(path_ + "/cpuset.cpus");
|
||||
std::stringstream ss;
|
||||
ss << is.rdbuf();
|
||||
return remove_space(ss.str());
|
||||
}
|
||||
|
||||
std::string get_raw_mems() {
|
||||
std::ifstream is(path_ + "/cpuset.mems");
|
||||
std::stringstream ss;
|
||||
ss << is.rdbuf();
|
||||
return remove_space(ss.str());
|
||||
}
|
||||
|
||||
std::set<int> get_cpus() { return parse_cpuset(get_raw_cpus()); }
|
||||
|
||||
std::set<int> get_mems() { return parse_cpuset(get_raw_mems()); }
|
||||
|
||||
// migrate the caller task from this cpu set to another
|
||||
Result migrate_self_to(CpuSet &other) {
|
||||
// enable memory migration in other
|
||||
other.enable_memory_migration();
|
||||
|
||||
// get my pid
|
||||
pid_t self = this_task();
|
||||
|
||||
// read this tasks and write each line to other.tasks
|
||||
std::ifstream is(path_ + "/tasks");
|
||||
std::string line;
|
||||
while (std::getline(is, line)) {
|
||||
line = remove_space(line);
|
||||
if (std::to_string(self) == line) {
|
||||
// std::cerr << "migrating self task " << line << " to " << other.path
|
||||
// << "\n";
|
||||
other.write_task(line);
|
||||
return Result::SUCCESS;
|
||||
}
|
||||
}
|
||||
return Result::NO_TASK;
|
||||
}
|
||||
|
||||
// migrate tasks in this cpu set to another
|
||||
Result migrate_tasks_to(CpuSet &other) {
|
||||
// enable memory migration in other
|
||||
SUCCESS_OR_RETURN(other.enable_memory_migration());
|
||||
|
||||
// read this tasks and write each line to other.tasks
|
||||
std::ifstream is(path_ + "/tasks");
|
||||
std::string line;
|
||||
while (std::getline(is, line)) {
|
||||
// std::cerr << "migrating task " << line << " to " << other.path << "\n";
|
||||
other.write_task(line);
|
||||
}
|
||||
|
||||
return Result::SUCCESS;
|
||||
}
|
||||
|
||||
Result enable_memory_migration() {
|
||||
std::ofstream ofs(path_ + "/" + "cpuset.memory_migrate");
|
||||
ofs << "1";
|
||||
ofs.close();
|
||||
if (ofs.fail()) {
|
||||
switch (errno) {
|
||||
case EACCES:
|
||||
return Result::NO_PERMISSION;
|
||||
case ENOENT:
|
||||
return Result::NOT_SUPPORTED;
|
||||
default:
|
||||
return Result::UNKNOWN;
|
||||
}
|
||||
}
|
||||
return Result::SUCCESS;
|
||||
}
|
||||
|
||||
void write_task(const std::string &task) {
|
||||
// write `task` to path/tasks
|
||||
std::ofstream os(path_ + "/tasks");
|
||||
os << task << "\n";
|
||||
}
|
||||
|
||||
// object representing the root CPU set
|
||||
static Result get_root(CpuSet &root) {
|
||||
SUCCESS_OR_RETURN(CpuSet::init());
|
||||
root.path_ = "/dev/cpuset";
|
||||
root.parent_ = nullptr;
|
||||
return Result::SUCCESS;
|
||||
}
|
||||
|
||||
// the ID of this task
|
||||
static pid_t this_task() { return getpid(); }
|
||||
|
||||
Result make_child(CpuSet &child, const std::string &name) {
|
||||
|
||||
if (mkdir((path_ + "/" + name).c_str(),
|
||||
S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)) {
|
||||
switch (errno) {
|
||||
case EEXIST: {
|
||||
// okay
|
||||
break;
|
||||
}
|
||||
case EACCES:
|
||||
return Result::NO_PERMISSION;
|
||||
case ENOENT:
|
||||
case EROFS:
|
||||
default:
|
||||
return Result::UNKNOWN;
|
||||
}
|
||||
}
|
||||
|
||||
child.path_ = path_ + "/" + name;
|
||||
child.parent_ = this;
|
||||
return Result::SUCCESS;
|
||||
}
|
||||
|
||||
Result enable_cpu(const int cpu) {
|
||||
std::set<int> cpus = get_cpus();
|
||||
cpus.insert(cpu);
|
||||
return write_cpus(cpus);
|
||||
}
|
||||
|
||||
Result enable_cpus(const std::set<int> &cpus) {
|
||||
std::set<int> finalCpus = get_cpus();
|
||||
for (auto cpu : cpus) {
|
||||
finalCpus.insert(cpu);
|
||||
}
|
||||
return write_cpus(finalCpus);
|
||||
}
|
||||
|
||||
// FIXME: check error
|
||||
Result write_cpus(std::set<int> cpus) {
|
||||
std::ofstream os(path_ + "/cpuset.cpus");
|
||||
bool comma = false;
|
||||
for (auto cpu : cpus) {
|
||||
if (comma)
|
||||
os << ",";
|
||||
os << cpu << "-" << cpu;
|
||||
comma = true;
|
||||
}
|
||||
return Result::SUCCESS;
|
||||
}
|
||||
|
||||
// FIXME: check write
|
||||
Result write_mems(std::set<int> mems) {
|
||||
std::ofstream os(path_ + "/cpuset.mems");
|
||||
bool comma = false;
|
||||
for (auto mem : mems) {
|
||||
if (comma)
|
||||
os << ",";
|
||||
os << mem << "-" << mem;
|
||||
comma = true;
|
||||
}
|
||||
return Result::SUCCESS;
|
||||
}
|
||||
|
||||
Result enable_mem(const int mem) {
|
||||
std::set<int> mems = get_mems();
|
||||
mems.insert(mem);
|
||||
return write_mems(mems);
|
||||
}
|
||||
|
||||
Result enable_mems(const std::set<int> &mems) {
|
||||
std::set<int> finalMems = get_mems();
|
||||
for (auto mem : mems) {
|
||||
finalMems.insert(mem);
|
||||
}
|
||||
return write_mems(finalMems);
|
||||
}
|
||||
|
||||
Result destroy() {
|
||||
// remove all child cpu sets
|
||||
|
||||
// move all attached processes back to parent
|
||||
assert(parent_);
|
||||
migrate_tasks_to(*parent_);
|
||||
|
||||
// remove with rmdir
|
||||
if (rmdir(path_.c_str())) {
|
||||
switch (errno) {
|
||||
default:
|
||||
std::cerr << "unhandled error in rmdir: " << strerror(errno) << "\n";
|
||||
return Result::UNKNOWN;
|
||||
}
|
||||
}
|
||||
|
||||
path_ = "";
|
||||
return Result::SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
};
|
||||
|
||||
std::ostream &operator<<(std::ostream &s, const CpuSet &c) {
|
||||
s << c.path_;
|
||||
return s;
|
||||
}
|
||||
|
||||
} // namespace perfect
|
36
include/perfect/detail/fs.hpp
Normal file
36
include/perfect/detail/fs.hpp
Normal file
@@ -0,0 +1,36 @@
|
||||
#pragma once
|
||||
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
|
||||
#include "perfect/result.hpp"
|
||||
|
||||
namespace perfect {
|
||||
namespace detail {
|
||||
Result write_str(const std::string &path, const std::string &val) {
|
||||
std::ofstream ofs(path);
|
||||
if (ofs.fail()) {
|
||||
return Result::NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
ofs << val;
|
||||
ofs.close();
|
||||
if (ofs.fail()) {
|
||||
switch (errno) {
|
||||
case EACCES:
|
||||
std::cerr << "EACCES when writing to " << path << "\n";
|
||||
return Result::NO_PERMISSION;
|
||||
case EPERM:
|
||||
std::cerr << "EPERM when writing to " << path << "\n";
|
||||
return Result::NO_PERMISSION;
|
||||
case ENOENT:
|
||||
return Result::NOT_SUPPORTED;
|
||||
default:
|
||||
return Result::UNKNOWN;
|
||||
}
|
||||
}
|
||||
return Result::SUCCESS;
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace perfect
|
43
include/perfect/drop_caches.hpp
Normal file
43
include/perfect/drop_caches.hpp
Normal file
@@ -0,0 +1,43 @@
|
||||
#pragma once
|
||||
|
||||
#include <unistd.h>
|
||||
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
|
||||
#include "result.hpp"
|
||||
#include "init.hpp"
|
||||
#include "detail/fs.hpp"
|
||||
|
||||
namespace perfect {
|
||||
|
||||
enum DropCaches_t {
|
||||
PAGECACHE = 0x1,
|
||||
ENTRIES = 0x2
|
||||
};
|
||||
|
||||
|
||||
// commit filesystem caches to disk
|
||||
Result sync() {
|
||||
// http://man7.org/linux/man-pages/man2/sync.2.html
|
||||
::sync(); // always successful
|
||||
return Result::SUCCESS;
|
||||
}
|
||||
|
||||
Result drop_caches(const DropCaches_t mode) {
|
||||
using detail::write_str;
|
||||
const std::string path = "/proc/sys/vm/drop_caches";
|
||||
if (mode & PAGECACHE & ENTRIES) {
|
||||
PERFECT_SUCCESS_OR_RETURN(write_str(path, "3"));
|
||||
} else if (mode & PAGECACHE) {
|
||||
PERFECT_SUCCESS_OR_RETURN(write_str(path, "1"));
|
||||
} else if (mode & ENTRIES) {
|
||||
PERFECT_SUCCESS_OR_RETURN(write_str(path, "2"));
|
||||
} else {
|
||||
std::cerr << "unexpected mode: " << mode << "\n";
|
||||
return Result::UNKNOWN;
|
||||
}
|
||||
return Result::SUCCESS;
|
||||
}
|
||||
|
||||
}
|
147
include/perfect/gpu_monitor.hpp
Normal file
147
include/perfect/gpu_monitor.hpp
Normal file
@@ -0,0 +1,147 @@
|
||||
#pragma once
|
||||
|
||||
#ifdef __NVCC__
|
||||
#ifndef PERFECT_HAS_CUDA
|
||||
#define PERFECT_HAS_CUDA
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef PERFECT_HAS_CUDA
|
||||
#include <nvml.h>
|
||||
#endif
|
||||
|
||||
#include <atomic>
|
||||
#include <chrono>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include "perfect/init.hpp"
|
||||
|
||||
namespace perfect {
|
||||
|
||||
class Monitor {
|
||||
public:
|
||||
struct Config {
|
||||
std::atomic<bool> stop;
|
||||
std::atomic<bool> pause;
|
||||
double samplePeriodMs;
|
||||
|
||||
std::atomic<bool> power;
|
||||
std::atomic<bool> utilization;
|
||||
std::atomic<bool> temperature;
|
||||
std::atomic<bool> pstate;
|
||||
std::ostream *stream_;
|
||||
|
||||
Config(std::ostream *stream)
|
||||
: stop(true), pause(false), power(true), samplePeriodMs(100),
|
||||
utilization(true), temperature(true), pstate(true), stream_(stream) {}
|
||||
};
|
||||
|
||||
std::thread worker;
|
||||
Config config;
|
||||
|
||||
Monitor(std::ostream *stream) : config(stream) {}
|
||||
|
||||
static void worker_func(const Config &cfg) {
|
||||
|
||||
nvmlReturn_t ret;
|
||||
nvmlDevice_t device;
|
||||
unsigned int deviceCount;
|
||||
|
||||
nvmlUtilization_t utilization;
|
||||
unsigned int milliwatts;
|
||||
unsigned int temperature;
|
||||
nvmlPstates_t pState;
|
||||
|
||||
ret = nvmlDeviceGetCount(&deviceCount);
|
||||
|
||||
std::chrono::time_point<std::chrono::system_clock> start;
|
||||
|
||||
while (!cfg.stop.load()) {
|
||||
if (std::chrono::time_point<std::chrono::system_clock>() == start) {
|
||||
start = std::chrono::system_clock::now();
|
||||
}
|
||||
if (!cfg.pause.load()) {
|
||||
|
||||
const double elapsed =
|
||||
(std::chrono::system_clock::now() - start).count() / 1e9 * 1e3;
|
||||
|
||||
for (unsigned int i = 0; i < deviceCount; ++i) {
|
||||
|
||||
(*cfg.stream_) << elapsed << "," << i;
|
||||
|
||||
ret = nvmlDeviceGetHandleByIndex(i, &device);
|
||||
|
||||
if (cfg.power.load()) {
|
||||
ret = nvmlDeviceGetPowerUsage(device, &milliwatts);
|
||||
if (ret == NVML_SUCCESS) {
|
||||
(*cfg.stream_) << "," << milliwatts;
|
||||
} else {
|
||||
(*cfg.stream_) << "," << -1;
|
||||
}
|
||||
} else {
|
||||
(*cfg.stream_) << ","
|
||||
<< "x";
|
||||
}
|
||||
if (cfg.utilization.load()) {
|
||||
// period is between 1 second and 1/6 second depending on product
|
||||
ret = nvmlDeviceGetUtilizationRates(device, &utilization);
|
||||
if (ret == NVML_SUCCESS) {
|
||||
(*cfg.stream_)
|
||||
<< "," << utilization.gpu << "," << utilization.memory;
|
||||
} else {
|
||||
(*cfg.stream_) << "," << -1 << "," << -1;
|
||||
}
|
||||
} else {
|
||||
(*cfg.stream_) << ","
|
||||
<< "x"
|
||||
<< ","
|
||||
<< "x";
|
||||
}
|
||||
if (cfg.temperature.load()) {
|
||||
ret = nvmlDeviceGetTemperature(device, NVML_TEMPERATURE_GPU,
|
||||
&temperature);
|
||||
if (ret == NVML_SUCCESS) {
|
||||
(*cfg.stream_) << "," << temperature;
|
||||
} else {
|
||||
(*cfg.stream_) << "," << -1;
|
||||
}
|
||||
} else {
|
||||
(*cfg.stream_) << ","
|
||||
<< "x";
|
||||
}
|
||||
if (cfg.pstate.load()) {
|
||||
ret = nvmlDeviceGetPerformanceState(device, &pState);
|
||||
if (ret == NVML_SUCCESS) {
|
||||
(*cfg.stream_) << "," << pState;
|
||||
} else {
|
||||
(*cfg.stream_) << "," << -1;
|
||||
}
|
||||
} else {
|
||||
(*cfg.stream_) << ","
|
||||
<< "x";
|
||||
}
|
||||
|
||||
(*cfg.stream_) << "\n";
|
||||
}
|
||||
}
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(100));
|
||||
}
|
||||
}
|
||||
|
||||
void start() {
|
||||
config.stop = false;
|
||||
worker = std::thread(worker_func, std::ref(config));
|
||||
}
|
||||
void stop() {
|
||||
config.stop = true;
|
||||
worker.join();
|
||||
}
|
||||
|
||||
void resume() { config.pause.store(false); }
|
||||
void pause() { config.pause.store(true); }
|
||||
};
|
||||
|
||||
} // namespace perfect
|
@@ -1,13 +1,17 @@
|
||||
#pragma once
|
||||
|
||||
#ifdef __NVCC__
|
||||
#ifndef PERFECT_HAS_CUDA
|
||||
#define PERFECT_HAS_CUDA
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef PERFECT_HAS_CUDA
|
||||
#include <nvml.h>
|
||||
#endif
|
||||
|
||||
#include "perfect/result.hpp"
|
||||
|
||||
namespace perfect {
|
||||
|
||||
/*! initialize the benchmark
|
||||
|
@@ -3,8 +3,10 @@
|
||||
#include <cassert>
|
||||
|
||||
#ifdef __NVCC__
|
||||
#ifndef PERFECT_HAS_CUDA
|
||||
#define PERFECT_HAS_CUDA
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef PERFECT_HAS_CUDA
|
||||
#include <nvml.h>
|
||||
@@ -76,3 +78,12 @@ inline void check(Result result, const char *file, const int line) {
|
||||
} // namespace perfect
|
||||
|
||||
#define PERFECT(stmt) check(stmt, __FILE__, __LINE__);
|
||||
|
||||
#define PERFECT_SUCCESS_OR_RETURN(stmt) \
|
||||
{\
|
||||
Result _ret; \
|
||||
_ret = (stmt); \
|
||||
if (_ret != Result::SUCCESS) {\
|
||||
return _ret;\
|
||||
}\
|
||||
}
|
||||
|
@@ -37,3 +37,5 @@ set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} \
|
||||
add_executable(enable-turbo enable_turbo.cpp)
|
||||
target_link_libraries(enable-turbo perfect)
|
||||
|
||||
add_executable(sync-drop-caches sync_drop_caches.cpp)
|
||||
target_link_libraries(sync-drop-caches perfect)
|
14
tools/sync_drop_caches.cpp
Normal file
14
tools/sync_drop_caches.cpp
Normal file
@@ -0,0 +1,14 @@
|
||||
#include <iostream>
|
||||
|
||||
#include "perfect/drop_caches.hpp"
|
||||
|
||||
using namespace perfect;
|
||||
|
||||
int main(void) {
|
||||
|
||||
using namespace perfect;
|
||||
|
||||
PERFECT(init());
|
||||
PERFECT(perfect::sync());
|
||||
PERFECT(drop_caches(DropCaches_t(PAGECACHE | ENTRIES)));
|
||||
}
|
Reference in New Issue
Block a user