Squashed commit of the following:

commit 2ac5b61d544fb46a7f74e62729882ba15100bf1f
Author: Carl Pearson <pearson@illinois.edu>
Date:   Fri Sep 20 14:53:10 2019 -0500

    changelog in readme

commit 1072efa135c840753892d350985129d6ebc88e7e
Author: Carl Pearson <pearson@illinois.edu>
Date:   Fri Sep 20 14:50:09 2019 -0500

    clean up gpu_monitor example

commit a8b111a84e61d466d1eb3f4ed9133b428f775714
Author: Carl Pearson <pearson@illinois.edu>
Date:   Fri Sep 20 14:47:52 2019 -0500

    add examples/gpu_monitor

commit b0a46680c666c06119d74d462acb2e9a69bf7d85
Author: Carl Pearson <pearson@illinois.edu>
Date:   Fri Sep 20 14:46:12 2019 -0500

    update readme

commit b8d29e4d89b5ca0e7fffe71dc969e44cbbd53ab9
Author: Carl Pearson <pearson@illinois.edu>
Date:   Fri Sep 20 14:41:27 2019 -0500

    add GPU monitor
This commit is contained in:
Carl Pearson
2019-09-20 14:53:27 -05:00
parent 91e15ee7ea
commit 714f7c3dcd
6 changed files with 215 additions and 5 deletions

View File

@@ -11,6 +11,7 @@ CPU/GPU performance control library for benchmarking
## Features
- [x] GPU power/utilization/temperature monitoring (nvidia)
- [x] Disable CPU turbo (linux)
- [x] Set OS CPU performance mode to maximum (linux)
- [x] Set GPU clocks (nvidia)
@@ -67,6 +68,22 @@ perfect::CpuTurboState state;
PERFECT(perfect::get_cpu_turbo_state(&state));
```
## Monitoring
`perfect` can monitor and record GPU activity.
See [examples/gpu_monitor.cu](examples/gpu_monitor.cu)
```c++
#include "perfect/cpu_monitor.hpp"
```
* `Monitor(std::ostream *stream)`: create a monitor that will write to `stream`.
* `void Monitor::start()`: start the monitor
* `void Monitor::stop()`: terminate the monitor
* `void Monitor::pause()`: pause the monitor thread
* `void Monitor::resume()`: resume the monitor thread
### CPU Turbo
`perfect` can enable and disable CPU boost through the Intel p-state mechanism or the ACPI cpufreq mechanism.
@@ -121,7 +138,7 @@ See [examples/gpu_turbo.cu]
See [examples/gpu_clocks.cu]
```c++
#include "perfect/gpu_clocks.hpp`
#include "perfect/gpu_clocks.hpp"
```
* `Result set_max_gpu_clocks(unsigned int idx)`: Set GPU `idx` clocks to their maximum reported values.
@@ -134,13 +151,23 @@ See [examples/gpu_clocks.cu]
See [examples/cpu_cache.cpp].
```c++
#include "perfect/cpu_cache.hpp`
#include "perfect/cpu_cache.hpp"
```
* `void flush_all(void *p, const size_t n)`: Flush all cache lines starting at `p` for `n` bytes.
## Changelog
* v0.2.0
* add GPU monitoring
* v0.1.0
* cache control
* Intel P-State control
* linux governor control
* POWER cpufreq control
* Nvidia GPU boost control
* Nvidia GPU clock control
## Wish List
- [ ] Nvidia GPU power monitoring
- [ ] Nivida GPU utilization monitoring
- [ ] nothing right now

View File

@@ -51,4 +51,9 @@ endif()
if(CMAKE_CUDA_COMPILER)
add_executable(gpu-turbo gpu_turbo.cu)
target_link_libraries(gpu-turbo perfect)
endif()
if(CMAKE_CUDA_COMPILER)
add_executable(gpu-power gpu_power.cu)
target_link_libraries(gpu-power perfect)
endif()

25
examples/gpu_monitor.cu Normal file
View File

@@ -0,0 +1,25 @@
#include <chrono>
#include <iostream>
#include <thread>
#include "perfect/gpu_monitor.hpp"
int main(void) {
using namespace perfect;
init();
// write to stderr
Monitor m(&std::cerr);
// don't record GPU utilization
m.config.utilization = false;
m.start();
// ctrl-c to exit
while (true) {
std::this_thread::sleep_for(std::chrono::milliseconds(5000));
}
return 0;
}

View File

@@ -0,0 +1,147 @@
#pragma once
#ifdef __NVCC__
#ifndef PERFECT_HAS_CUDA
#define PERFECT_HAS_CUDA
#endif
#endif
#ifdef PERFECT_HAS_CUDA
#include <nvml.h>
#endif
#include <atomic>
#include <chrono>
#include <string>
#include <thread>
#include <iostream>
#include "perfect/init.hpp"
namespace perfect {
class Monitor {
public:
struct Config {
std::atomic<bool> stop;
std::atomic<bool> pause;
double samplePeriodMs;
std::atomic<bool> power;
std::atomic<bool> utilization;
std::atomic<bool> temperature;
std::atomic<bool> pstate;
std::ostream *stream_;
Config(std::ostream *stream)
: stop(true), pause(false), power(true), samplePeriodMs(100),
utilization(true), temperature(true), pstate(true), stream_(stream) {}
};
std::thread worker;
Config config;
Monitor(std::ostream *stream) : config(stream) {}
static void worker_func(const Config &cfg) {
nvmlReturn_t ret;
nvmlDevice_t device;
unsigned int deviceCount;
nvmlUtilization_t utilization;
unsigned int milliwatts;
unsigned int temperature;
nvmlPstates_t pState;
ret = nvmlDeviceGetCount(&deviceCount);
std::chrono::time_point<std::chrono::system_clock> start;
while (!cfg.stop.load()) {
if (std::chrono::time_point<std::chrono::system_clock>() == start) {
start = std::chrono::system_clock::now();
}
if (!cfg.pause.load()) {
const double elapsed =
(std::chrono::system_clock::now() - start).count() / 1e9 * 1e3;
for (unsigned int i = 0; i < deviceCount; ++i) {
(*cfg.stream_) << elapsed << "," << i;
ret = nvmlDeviceGetHandleByIndex(i, &device);
if (cfg.power.load()) {
ret = nvmlDeviceGetPowerUsage(device, &milliwatts);
if (ret == NVML_SUCCESS) {
(*cfg.stream_) << "," << milliwatts;
} else {
(*cfg.stream_) << "," << -1;
}
} else {
(*cfg.stream_) << ","
<< "x";
}
if (cfg.utilization.load()) {
// period is between 1 second and 1/6 second depending on product
ret = nvmlDeviceGetUtilizationRates(device, &utilization);
if (ret == NVML_SUCCESS) {
(*cfg.stream_)
<< "," << utilization.gpu << "," << utilization.memory;
} else {
(*cfg.stream_) << "," << -1 << "," << -1;
}
} else {
(*cfg.stream_) << ","
<< "x"
<< ","
<< "x";
}
if (cfg.temperature.load()) {
ret = nvmlDeviceGetTemperature(device, NVML_TEMPERATURE_GPU,
&temperature);
if (ret == NVML_SUCCESS) {
(*cfg.stream_) << "," << temperature;
} else {
(*cfg.stream_) << "," << -1;
}
} else {
(*cfg.stream_) << ","
<< "x";
}
if (cfg.pstate.load()) {
ret = nvmlDeviceGetPerformanceState(device, &pState);
if (ret == NVML_SUCCESS) {
(*cfg.stream_) << "," << pState;
} else {
(*cfg.stream_) << "," << -1;
}
} else {
(*cfg.stream_) << ","
<< "x";
}
(*cfg.stream_) << "\n";
}
}
std::this_thread::sleep_for(std::chrono::milliseconds(100));
}
}
void start() {
config.stop = false;
worker = std::thread(worker_func, std::ref(config));
}
void stop() {
config.stop = true;
worker.join();
}
void resume() { config.pause.store(false); }
void pause() { config.pause.store(true); }
};
} // namespace perfect

View File

@@ -1,13 +1,17 @@
#pragma once
#ifdef __NVCC__
#ifndef PERFECT_HAS_CUDA
#define PERFECT_HAS_CUDA
#endif
#endif
#ifdef PERFECT_HAS_CUDA
#include <nvml.h>
#endif
#include "perfect/result.hpp"
namespace perfect {
/*! initialize the benchmark

View File

@@ -3,8 +3,10 @@
#include <cassert>
#ifdef __NVCC__
#ifndef PERFECT_HAS_CUDA
#define PERFECT_HAS_CUDA
#endif
#endif
#ifdef PERFECT_HAS_CUDA
#include <nvml.h>