diff --git a/README.md b/README.md index cfcf41c..21f04fe 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,7 @@ CPU/GPU performance control library for benchmarking ## Features +- [x] GPU power/utilization/temperature monitoring (nvidia) - [x] Disable CPU turbo (linux) - [x] Set OS CPU performance mode to maximum (linux) - [x] Set GPU clocks (nvidia) @@ -67,6 +68,22 @@ perfect::CpuTurboState state; PERFECT(perfect::get_cpu_turbo_state(&state)); ``` +## Monitoring + +`perfect` can monitor and record GPU activity. + +See [examples/gpu_monitor.cu](examples/gpu_monitor.cu) + +```c++ +#include "perfect/cpu_monitor.hpp" +``` + +* `Monitor(std::ostream *stream)`: create a monitor that will write to `stream`. +* `void Monitor::start()`: start the monitor +* `void Monitor::stop()`: terminate the monitor +* `void Monitor::pause()`: pause the monitor thread +* `void Monitor::resume()`: resume the monitor thread + ### CPU Turbo `perfect` can enable and disable CPU boost through the Intel p-state mechanism or the ACPI cpufreq mechanism. @@ -121,7 +138,7 @@ See [examples/gpu_turbo.cu] See [examples/gpu_clocks.cu] ```c++ -#include "perfect/gpu_clocks.hpp` +#include "perfect/gpu_clocks.hpp" ``` * `Result set_max_gpu_clocks(unsigned int idx)`: Set GPU `idx` clocks to their maximum reported values. @@ -134,13 +151,23 @@ See [examples/gpu_clocks.cu] See [examples/cpu_cache.cpp]. ```c++ -#include "perfect/cpu_cache.hpp` +#include "perfect/cpu_cache.hpp" ``` * `void flush_all(void *p, const size_t n)`: Flush all cache lines starting at `p` for `n` bytes. +## Changelog + +* v0.2.0 + * add GPU monitoring +* v0.1.0 + * cache control + * Intel P-State control + * linux governor control + * POWER cpufreq control + * Nvidia GPU boost control + * Nvidia GPU clock control + ## Wish List - -- [ ] Nvidia GPU power monitoring -- [ ] Nivida GPU utilization monitoring \ No newline at end of file +- [ ] nothing right now \ No newline at end of file diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 4f761cd..1730116 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -51,4 +51,9 @@ endif() if(CMAKE_CUDA_COMPILER) add_executable(gpu-turbo gpu_turbo.cu) target_link_libraries(gpu-turbo perfect) +endif() + +if(CMAKE_CUDA_COMPILER) + add_executable(gpu-power gpu_power.cu) + target_link_libraries(gpu-power perfect) endif() \ No newline at end of file diff --git a/examples/gpu_monitor.cu b/examples/gpu_monitor.cu new file mode 100644 index 0000000..2e67331 --- /dev/null +++ b/examples/gpu_monitor.cu @@ -0,0 +1,25 @@ +#include +#include +#include + +#include "perfect/gpu_monitor.hpp" + +int main(void) { + using namespace perfect; + init(); + + // write to stderr + Monitor m(&std::cerr); + + // don't record GPU utilization + m.config.utilization = false; + + m.start(); + + // ctrl-c to exit + while (true) { + std::this_thread::sleep_for(std::chrono::milliseconds(5000)); + } + + return 0; +} \ No newline at end of file diff --git a/include/perfect/gpu_monitor.hpp b/include/perfect/gpu_monitor.hpp new file mode 100644 index 0000000..0857193 --- /dev/null +++ b/include/perfect/gpu_monitor.hpp @@ -0,0 +1,147 @@ +#pragma once + +#ifdef __NVCC__ +#ifndef PERFECT_HAS_CUDA +#define PERFECT_HAS_CUDA +#endif +#endif + +#ifdef PERFECT_HAS_CUDA +#include +#endif + +#include +#include +#include +#include + +#include + +#include "perfect/init.hpp" + +namespace perfect { + +class Monitor { +public: + struct Config { + std::atomic stop; + std::atomic pause; + double samplePeriodMs; + + std::atomic power; + std::atomic utilization; + std::atomic temperature; + std::atomic pstate; + std::ostream *stream_; + + Config(std::ostream *stream) + : stop(true), pause(false), power(true), samplePeriodMs(100), + utilization(true), temperature(true), pstate(true), stream_(stream) {} + }; + + std::thread worker; + Config config; + + Monitor(std::ostream *stream) : config(stream) {} + + static void worker_func(const Config &cfg) { + + nvmlReturn_t ret; + nvmlDevice_t device; + unsigned int deviceCount; + + nvmlUtilization_t utilization; + unsigned int milliwatts; + unsigned int temperature; + nvmlPstates_t pState; + + ret = nvmlDeviceGetCount(&deviceCount); + + std::chrono::time_point start; + + while (!cfg.stop.load()) { + if (std::chrono::time_point() == start) { + start = std::chrono::system_clock::now(); + } + if (!cfg.pause.load()) { + + const double elapsed = + (std::chrono::system_clock::now() - start).count() / 1e9 * 1e3; + + for (unsigned int i = 0; i < deviceCount; ++i) { + + (*cfg.stream_) << elapsed << "," << i; + + ret = nvmlDeviceGetHandleByIndex(i, &device); + + if (cfg.power.load()) { + ret = nvmlDeviceGetPowerUsage(device, &milliwatts); + if (ret == NVML_SUCCESS) { + (*cfg.stream_) << "," << milliwatts; + } else { + (*cfg.stream_) << "," << -1; + } + } else { + (*cfg.stream_) << "," + << "x"; + } + if (cfg.utilization.load()) { + // period is between 1 second and 1/6 second depending on product + ret = nvmlDeviceGetUtilizationRates(device, &utilization); + if (ret == NVML_SUCCESS) { + (*cfg.stream_) + << "," << utilization.gpu << "," << utilization.memory; + } else { + (*cfg.stream_) << "," << -1 << "," << -1; + } + } else { + (*cfg.stream_) << "," + << "x" + << "," + << "x"; + } + if (cfg.temperature.load()) { + ret = nvmlDeviceGetTemperature(device, NVML_TEMPERATURE_GPU, + &temperature); + if (ret == NVML_SUCCESS) { + (*cfg.stream_) << "," << temperature; + } else { + (*cfg.stream_) << "," << -1; + } + } else { + (*cfg.stream_) << "," + << "x"; + } + if (cfg.pstate.load()) { + ret = nvmlDeviceGetPerformanceState(device, &pState); + if (ret == NVML_SUCCESS) { + (*cfg.stream_) << "," << pState; + } else { + (*cfg.stream_) << "," << -1; + } + } else { + (*cfg.stream_) << "," + << "x"; + } + + (*cfg.stream_) << "\n"; + } + } + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + } + } + + void start() { + config.stop = false; + worker = std::thread(worker_func, std::ref(config)); + } + void stop() { + config.stop = true; + worker.join(); + } + + void resume() { config.pause.store(false); } + void pause() { config.pause.store(true); } +}; + +} // namespace perfect \ No newline at end of file diff --git a/include/perfect/init.hpp b/include/perfect/init.hpp index 6a87dc5..1e5acc8 100644 --- a/include/perfect/init.hpp +++ b/include/perfect/init.hpp @@ -1,13 +1,17 @@ #pragma once #ifdef __NVCC__ +#ifndef PERFECT_HAS_CUDA #define PERFECT_HAS_CUDA #endif +#endif #ifdef PERFECT_HAS_CUDA #include #endif +#include "perfect/result.hpp" + namespace perfect { /*! initialize the benchmark diff --git a/include/perfect/result.hpp b/include/perfect/result.hpp index 93ad46e..2c2036b 100644 --- a/include/perfect/result.hpp +++ b/include/perfect/result.hpp @@ -3,8 +3,10 @@ #include #ifdef __NVCC__ +#ifndef PERFECT_HAS_CUDA #define PERFECT_HAS_CUDA #endif +#endif #ifdef PERFECT_HAS_CUDA #include