Squashed commit of the following:
commit 2ac5b61d544fb46a7f74e62729882ba15100bf1f Author: Carl Pearson <pearson@illinois.edu> Date: Fri Sep 20 14:53:10 2019 -0500 changelog in readme commit 1072efa135c840753892d350985129d6ebc88e7e Author: Carl Pearson <pearson@illinois.edu> Date: Fri Sep 20 14:50:09 2019 -0500 clean up gpu_monitor example commit a8b111a84e61d466d1eb3f4ed9133b428f775714 Author: Carl Pearson <pearson@illinois.edu> Date: Fri Sep 20 14:47:52 2019 -0500 add examples/gpu_monitor commit b0a46680c666c06119d74d462acb2e9a69bf7d85 Author: Carl Pearson <pearson@illinois.edu> Date: Fri Sep 20 14:46:12 2019 -0500 update readme commit b8d29e4d89b5ca0e7fffe71dc969e44cbbd53ab9 Author: Carl Pearson <pearson@illinois.edu> Date: Fri Sep 20 14:41:27 2019 -0500 add GPU monitor
This commit is contained in:
37
README.md
37
README.md
@@ -11,6 +11,7 @@ CPU/GPU performance control library for benchmarking
|
||||
|
||||
## Features
|
||||
|
||||
- [x] GPU power/utilization/temperature monitoring (nvidia)
|
||||
- [x] Disable CPU turbo (linux)
|
||||
- [x] Set OS CPU performance mode to maximum (linux)
|
||||
- [x] Set GPU clocks (nvidia)
|
||||
@@ -67,6 +68,22 @@ perfect::CpuTurboState state;
|
||||
PERFECT(perfect::get_cpu_turbo_state(&state));
|
||||
```
|
||||
|
||||
## Monitoring
|
||||
|
||||
`perfect` can monitor and record GPU activity.
|
||||
|
||||
See [examples/gpu_monitor.cu](examples/gpu_monitor.cu)
|
||||
|
||||
```c++
|
||||
#include "perfect/cpu_monitor.hpp"
|
||||
```
|
||||
|
||||
* `Monitor(std::ostream *stream)`: create a monitor that will write to `stream`.
|
||||
* `void Monitor::start()`: start the monitor
|
||||
* `void Monitor::stop()`: terminate the monitor
|
||||
* `void Monitor::pause()`: pause the monitor thread
|
||||
* `void Monitor::resume()`: resume the monitor thread
|
||||
|
||||
### CPU Turbo
|
||||
|
||||
`perfect` can enable and disable CPU boost through the Intel p-state mechanism or the ACPI cpufreq mechanism.
|
||||
@@ -121,7 +138,7 @@ See [examples/gpu_turbo.cu]
|
||||
See [examples/gpu_clocks.cu]
|
||||
|
||||
```c++
|
||||
#include "perfect/gpu_clocks.hpp`
|
||||
#include "perfect/gpu_clocks.hpp"
|
||||
```
|
||||
|
||||
* `Result set_max_gpu_clocks(unsigned int idx)`: Set GPU `idx` clocks to their maximum reported values.
|
||||
@@ -134,13 +151,23 @@ See [examples/gpu_clocks.cu]
|
||||
See [examples/cpu_cache.cpp].
|
||||
|
||||
```c++
|
||||
#include "perfect/cpu_cache.hpp`
|
||||
#include "perfect/cpu_cache.hpp"
|
||||
```
|
||||
|
||||
* `void flush_all(void *p, const size_t n)`: Flush all cache lines starting at `p` for `n` bytes.
|
||||
|
||||
## Changelog
|
||||
|
||||
* v0.2.0
|
||||
* add GPU monitoring
|
||||
* v0.1.0
|
||||
* cache control
|
||||
* Intel P-State control
|
||||
* linux governor control
|
||||
* POWER cpufreq control
|
||||
* Nvidia GPU boost control
|
||||
* Nvidia GPU clock control
|
||||
|
||||
## Wish List
|
||||
|
||||
|
||||
- [ ] Nvidia GPU power monitoring
|
||||
- [ ] Nivida GPU utilization monitoring
|
||||
- [ ] nothing right now
|
@@ -51,4 +51,9 @@ endif()
|
||||
if(CMAKE_CUDA_COMPILER)
|
||||
add_executable(gpu-turbo gpu_turbo.cu)
|
||||
target_link_libraries(gpu-turbo perfect)
|
||||
endif()
|
||||
|
||||
if(CMAKE_CUDA_COMPILER)
|
||||
add_executable(gpu-power gpu_power.cu)
|
||||
target_link_libraries(gpu-power perfect)
|
||||
endif()
|
25
examples/gpu_monitor.cu
Normal file
25
examples/gpu_monitor.cu
Normal file
@@ -0,0 +1,25 @@
|
||||
#include <chrono>
|
||||
#include <iostream>
|
||||
#include <thread>
|
||||
|
||||
#include "perfect/gpu_monitor.hpp"
|
||||
|
||||
int main(void) {
|
||||
using namespace perfect;
|
||||
init();
|
||||
|
||||
// write to stderr
|
||||
Monitor m(&std::cerr);
|
||||
|
||||
// don't record GPU utilization
|
||||
m.config.utilization = false;
|
||||
|
||||
m.start();
|
||||
|
||||
// ctrl-c to exit
|
||||
while (true) {
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(5000));
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
147
include/perfect/gpu_monitor.hpp
Normal file
147
include/perfect/gpu_monitor.hpp
Normal file
@@ -0,0 +1,147 @@
|
||||
#pragma once
|
||||
|
||||
#ifdef __NVCC__
|
||||
#ifndef PERFECT_HAS_CUDA
|
||||
#define PERFECT_HAS_CUDA
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef PERFECT_HAS_CUDA
|
||||
#include <nvml.h>
|
||||
#endif
|
||||
|
||||
#include <atomic>
|
||||
#include <chrono>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include "perfect/init.hpp"
|
||||
|
||||
namespace perfect {
|
||||
|
||||
class Monitor {
|
||||
public:
|
||||
struct Config {
|
||||
std::atomic<bool> stop;
|
||||
std::atomic<bool> pause;
|
||||
double samplePeriodMs;
|
||||
|
||||
std::atomic<bool> power;
|
||||
std::atomic<bool> utilization;
|
||||
std::atomic<bool> temperature;
|
||||
std::atomic<bool> pstate;
|
||||
std::ostream *stream_;
|
||||
|
||||
Config(std::ostream *stream)
|
||||
: stop(true), pause(false), power(true), samplePeriodMs(100),
|
||||
utilization(true), temperature(true), pstate(true), stream_(stream) {}
|
||||
};
|
||||
|
||||
std::thread worker;
|
||||
Config config;
|
||||
|
||||
Monitor(std::ostream *stream) : config(stream) {}
|
||||
|
||||
static void worker_func(const Config &cfg) {
|
||||
|
||||
nvmlReturn_t ret;
|
||||
nvmlDevice_t device;
|
||||
unsigned int deviceCount;
|
||||
|
||||
nvmlUtilization_t utilization;
|
||||
unsigned int milliwatts;
|
||||
unsigned int temperature;
|
||||
nvmlPstates_t pState;
|
||||
|
||||
ret = nvmlDeviceGetCount(&deviceCount);
|
||||
|
||||
std::chrono::time_point<std::chrono::system_clock> start;
|
||||
|
||||
while (!cfg.stop.load()) {
|
||||
if (std::chrono::time_point<std::chrono::system_clock>() == start) {
|
||||
start = std::chrono::system_clock::now();
|
||||
}
|
||||
if (!cfg.pause.load()) {
|
||||
|
||||
const double elapsed =
|
||||
(std::chrono::system_clock::now() - start).count() / 1e9 * 1e3;
|
||||
|
||||
for (unsigned int i = 0; i < deviceCount; ++i) {
|
||||
|
||||
(*cfg.stream_) << elapsed << "," << i;
|
||||
|
||||
ret = nvmlDeviceGetHandleByIndex(i, &device);
|
||||
|
||||
if (cfg.power.load()) {
|
||||
ret = nvmlDeviceGetPowerUsage(device, &milliwatts);
|
||||
if (ret == NVML_SUCCESS) {
|
||||
(*cfg.stream_) << "," << milliwatts;
|
||||
} else {
|
||||
(*cfg.stream_) << "," << -1;
|
||||
}
|
||||
} else {
|
||||
(*cfg.stream_) << ","
|
||||
<< "x";
|
||||
}
|
||||
if (cfg.utilization.load()) {
|
||||
// period is between 1 second and 1/6 second depending on product
|
||||
ret = nvmlDeviceGetUtilizationRates(device, &utilization);
|
||||
if (ret == NVML_SUCCESS) {
|
||||
(*cfg.stream_)
|
||||
<< "," << utilization.gpu << "," << utilization.memory;
|
||||
} else {
|
||||
(*cfg.stream_) << "," << -1 << "," << -1;
|
||||
}
|
||||
} else {
|
||||
(*cfg.stream_) << ","
|
||||
<< "x"
|
||||
<< ","
|
||||
<< "x";
|
||||
}
|
||||
if (cfg.temperature.load()) {
|
||||
ret = nvmlDeviceGetTemperature(device, NVML_TEMPERATURE_GPU,
|
||||
&temperature);
|
||||
if (ret == NVML_SUCCESS) {
|
||||
(*cfg.stream_) << "," << temperature;
|
||||
} else {
|
||||
(*cfg.stream_) << "," << -1;
|
||||
}
|
||||
} else {
|
||||
(*cfg.stream_) << ","
|
||||
<< "x";
|
||||
}
|
||||
if (cfg.pstate.load()) {
|
||||
ret = nvmlDeviceGetPerformanceState(device, &pState);
|
||||
if (ret == NVML_SUCCESS) {
|
||||
(*cfg.stream_) << "," << pState;
|
||||
} else {
|
||||
(*cfg.stream_) << "," << -1;
|
||||
}
|
||||
} else {
|
||||
(*cfg.stream_) << ","
|
||||
<< "x";
|
||||
}
|
||||
|
||||
(*cfg.stream_) << "\n";
|
||||
}
|
||||
}
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(100));
|
||||
}
|
||||
}
|
||||
|
||||
void start() {
|
||||
config.stop = false;
|
||||
worker = std::thread(worker_func, std::ref(config));
|
||||
}
|
||||
void stop() {
|
||||
config.stop = true;
|
||||
worker.join();
|
||||
}
|
||||
|
||||
void resume() { config.pause.store(false); }
|
||||
void pause() { config.pause.store(true); }
|
||||
};
|
||||
|
||||
} // namespace perfect
|
@@ -1,13 +1,17 @@
|
||||
#pragma once
|
||||
|
||||
#ifdef __NVCC__
|
||||
#ifndef PERFECT_HAS_CUDA
|
||||
#define PERFECT_HAS_CUDA
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef PERFECT_HAS_CUDA
|
||||
#include <nvml.h>
|
||||
#endif
|
||||
|
||||
#include "perfect/result.hpp"
|
||||
|
||||
namespace perfect {
|
||||
|
||||
/*! initialize the benchmark
|
||||
|
@@ -3,8 +3,10 @@
|
||||
#include <cassert>
|
||||
|
||||
#ifdef __NVCC__
|
||||
#ifndef PERFECT_HAS_CUDA
|
||||
#define PERFECT_HAS_CUDA
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef PERFECT_HAS_CUDA
|
||||
#include <nvml.h>
|
||||
|
Reference in New Issue
Block a user