diff --git a/README.md b/README.md index 6fbff96..a57492e 100644 --- a/README.md +++ b/README.md @@ -17,11 +17,119 @@ CPU/GPU performance control library for benchmarking - [x] Disable GPU turbo (nvidia) - [x] Flush addresses from cache (amd64, POWER) -## API +## Installing + +### CMake + +Ensure you have CMake 3.13+. + +Add the source tree to your project and then use add_subdirectory + +``` +git submodule add git@github.com:cwpearson/perfect.git thirdparty/perfect +``` + +`CMakeLists.txt` +``` +... +add_subdirectory(thirdparty/perfect) +... +target_link_libraries(your-target perfect) +``` + +### Without CMake +Download the source and add the include directory to your includes and `nvidia-ml` to your link flags + +``` +g++ code_using_perfect.cpp -I perfect/include -l nvidia-ml +nvcc code_using_perfect.cu -I perfect/include +``` + +## Usage + +The `perfect` functions all return a `perfect::Result`, which is defined in [include/perfect/result.hpp]. +When things are working, it will be `perfect::Result::SUCCESS`. +A `PERFECT` macro is also defined, which will terminate with an error message unless the `perfect::Result` is `perfect::Result::SUCCESS`. + +```c++ +perfect::CpuTurboState state; +PERFECT(perfect::get_cpu_turbo_state(&state)); +``` + +### CPU Turbo + +`perfect` can enable and disable CPU boost through the Intel p-state mechanism or the ACPI cpufreq mechanism. + +See [examples/cpu_turbo.cpp]. +```c++ +#include "perfect/cpu_turbo.hpp" +``` + +* `Result get_cpu_turbo_state(CpuTurboState *state)` +* `Result set_cpu_turbo_state(CpuTurboState *state)` +* `Result disable_cpu_turbo()` +* `Result enable_cpu_turbo()` +* `bool is_turbo_enabled(CpuTurboState state)` + +### OS Performance + +`perfect` can control the OS governor on linux. + +See [examples/os_perf.cpp]. + +```c++ +#include "perfect/os_perf.hpp" +``` + +* `Result get_os_perf_state(OsPerfState *state, const int cpu)`: Save the current OS governor mode for CPU `cpu`. +* `Result os_perf_state_maximum(const int cpu)`: Set the OS governor to it's maximum performance mode. +* `Result set_os_perf_state(const int cpu, OsPerfState state)`: Restore a previously-saved OS governor mode. + +### GPU Turbo + +`perfect` can enable/disable GPU turbo boost. + +See [examples/gpu_turbo.cu] + +```c++ +#include "perfect/gpu_turbo.hpp" +``` + +* `Result get_gpu_turbo_state(GpuTurboState *state, unsigned int idx)`: Get the current turbo state for GPU `idx`, useful to restore later. +* `bool is_turbo_enabled(GpuTurboState state)`: Check if turbo is enabled. +* `Result set_gpu_turbo_state(GpuTurboState state, unsigned int idx)`: Set a previously saved turbo state. +* `Result disable_gpu_turbo(unsigned int idx)`: Disable GPU `idx` turbo. +* `Result enable_gpu_turbo(unsigned int idx)`: Enable GPU `idx` turbo. + +### GPU Clocks + +`perfect` can lock GPU clocks to their maximum values. + +See [examples/gpu_clocks.cu] + +```c++ +#include "perfect/gpu_clocks.hpp` +``` + +* `Result set_max_gpu_clocks(unsigned int idx)`: Set GPU `idx` clocks to their maximum reported values. +* `Result reset_gpu_clocks(unsigned int idx)`: Unset GPU `idx` clocks. + +### CPU Cache + +`perfect` can flush data from CPU caches. Unlike the other APIs, these do not return a `Result` because they do not fail. + +See [examples/cpu_cache.cpp]. + +```c++ +#include "perfect/cpu_cache.hpp` +``` + +* `void flush_all(void *p, const size_t n)`: Flush all cache lines starting at `p` for `n` bytes. ## Wish List +- [ ] Make CUDA Optional - [ ] Nvidia GPU power monitoring - [ ] Nivida GPU utilization monitoring \ No newline at end of file diff --git a/examples/cpu_turbo.cpp b/examples/cpu_turbo.cpp index a269c23..a3410ca 100644 --- a/examples/cpu_turbo.cpp +++ b/examples/cpu_turbo.cpp @@ -3,6 +3,7 @@ #include "perfect/cpu_turbo.hpp" int main(void) { + perfect::init(); perfect::Result ret; perfect::CpuTurboState state; diff --git a/examples/gpu_clocks.cu b/examples/gpu_clocks.cu index 3d2d2ef..79ce449 100644 --- a/examples/gpu_clocks.cu +++ b/examples/gpu_clocks.cu @@ -4,9 +4,7 @@ #include "perfect/init.hpp" int main(void) { - using namespace perfect; - init(); for (unsigned int gpu = 0; gpu < 1; ++gpu) { diff --git a/examples/gpu_turbo.cu b/examples/gpu_turbo.cu index dfd7fc0..4672cf0 100644 --- a/examples/gpu_turbo.cu +++ b/examples/gpu_turbo.cu @@ -4,12 +4,11 @@ #define OR_DIE(expr) int main(void) { - using namespace perfect; - GpuTurboState state; - init(); + GpuTurboState state; + for (unsigned int gpu = 0; gpu < 1; ++gpu) { PERFECT(perfect::get_gpu_turbo_state(&state, gpu)); PERFECT(perfect::disable_gpu_turbo(gpu)); diff --git a/examples/os_perf.cpp b/examples/os_perf.cpp index b8b59a6..a0342fc 100644 --- a/examples/os_perf.cpp +++ b/examples/os_perf.cpp @@ -3,7 +3,7 @@ #include int main(void) { - + perfect::init(); std::map states; diff --git a/include/perfect/cpu_cache.hpp b/include/perfect/cpu_cache.hpp index b28e85c..09eb410 100644 --- a/include/perfect/cpu_cache.hpp +++ b/include/perfect/cpu_cache.hpp @@ -24,6 +24,8 @@ Routines for controlling CPU caching #error "unsupported CPU arch" #endif +#include "init.hpp" + namespace perfect { inline void flush_all(void *p, const size_t n) { diff --git a/include/perfect/cpu_turbo.hpp b/include/perfect/cpu_turbo.hpp index 577cc06..d5be0f1 100644 --- a/include/perfect/cpu_turbo.hpp +++ b/include/perfect/cpu_turbo.hpp @@ -15,6 +15,7 @@ #error "unsupported OS" #endif +#include "init.hpp" #include "result.hpp" namespace perfect { diff --git a/include/perfect/gpu_clocks.hpp b/include/perfect/gpu_clocks.hpp index 6cf4226..8c97887 100644 --- a/include/perfect/gpu_clocks.hpp +++ b/include/perfect/gpu_clocks.hpp @@ -4,6 +4,9 @@ #include "detail/nvidia/nvidia-ml.hpp" +#include "result.hpp" +#include "init.hpp" + namespace perfect { /*! diff --git a/include/perfect/gpu_turbo.hpp b/include/perfect/gpu_turbo.hpp index 6203a77..58c956f 100644 --- a/include/perfect/gpu_turbo.hpp +++ b/include/perfect/gpu_turbo.hpp @@ -3,6 +3,7 @@ #include "detail/nvidia/nvidia-ml.hpp" #include "result.hpp" +#include "init.hpp" namespace perfect { diff --git a/include/perfect/os_perf.hpp b/include/perfect/os_perf.hpp index d3ba39d..e35128f 100644 --- a/include/perfect/os_perf.hpp +++ b/include/perfect/os_perf.hpp @@ -1,13 +1,9 @@ #pragma once - - #include #include #include - - #ifdef __linux__ #include "detail/os/linux.hpp" #else @@ -15,6 +11,7 @@ #endif #include "result.hpp" +#include "init.hpp" namespace perfect { diff --git a/tools/enable_turbo.cpp b/tools/enable_turbo.cpp index ca97206..9465260 100644 --- a/tools/enable_turbo.cpp +++ b/tools/enable_turbo.cpp @@ -1,7 +1,6 @@ #include #include "perfect/cpu_turbo.hpp" -#include "perfect/init.hpp" using namespace perfect;