Squashed commit of the following:

commit 25e7cb77683736a588acb6b30a8ac89e2bd7f56a Author: Carl Pearson <pearson@illinois.edu> Date: Fri Sep 20 13:25:49 2019 -0500 automatically define PERFECT_HAS_CUDA with nvcc commit fcc699c165ba515619781aefb378d3c0c4d1093d Author: Carl Pearson <pearson@illinois.edu> Date: Fri Sep 20 13:18:42 2019 -0500 optional CUDA support
2019-09-20 13:26:08 -05:00
parent 72c39c1dd5
commit 91e15ee7ea
8 changed files with 130 additions and 48 deletions
--- a/.github/workflows/ccpp.yml
+++ b/.github/workflows/ccpp.yml
@@ -38,3 +38,26 @@ jobs:
        g++ --version
        nvcc --version
        make VERBOSE=1
+  build:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v1
+    - name: install cmake
+      run: |
+        export PREFIX=$HOME/software/cmake
+        wget https://github.com/Kitware/CMake/releases/download/v3.15.3/cmake-3.15.3-Linux-x86_64.tar.gz -O cmake.tar.gz
+        mkdir -p $PREFIX
+        tar -xvf cmake.tar.gz --strip-components=1 -C $PREFIX
+    - name: configure
+      run: |
+        export PATH=$HOME/software/cmake/bin:$PATH
+        mkdir build
+        cd build
+        cmake --version
+        cmake .. -DCMAKE_BUILD_TYPE=Debug
+    - name: build
+      run: |
+        export PATH=$HOME/software/cmake/bin:$PATH
+        cd build
+        g++ --version
+        make VERBOSE=1
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -5,9 +5,22 @@
 # 3.13+ for target_link_directories
 cmake_minimum_required(VERSION 3.13 FATAL_ERROR)

-project(perfect LANGUAGES CUDA CXX VERSION 0.1.0)
+project(perfect LANGUAGES CXX VERSION 0.1.0)
 message(STATUS "Build type: " ${CMAKE_BUILD_TYPE})

+include(CheckLanguage)
+
+option(USE_CUDA "try to use CUDA" ON)
+
+if(USE_CUDA)
+  check_language(CUDA)
+  if(CMAKE_CUDA_COMPILER)
+    enable_language(CUDA)
+  else()
+    message(STATUS "No CUDA support")
+  endif()
+endif()
+
 #https://blog.kitware.com/cmake-and-the-default-build-type/
 # Set a default build type if none was specified
 set(default_build_type "Release")
@@ -33,19 +46,20 @@ elseif (CMAKE_BUILD_TYPE MATCHES Release)
  set(CMAKE_CUDA_FLAGS ${CMAKE_CUDA_FLAGS} -lineinfo)
 endif()

-set(CMAKE_CUDA_STANDARD 11)
-
 add_subdirectory(include/perfect)

 add_library(perfect INTERFACE)
+target_compile_features(perfect INTERFACE cxx_std_11)
+if(CMAKE_CUDA_COMPILER)
+  target_compile_definitions(perfect INTERFACE -DPERFECT_HAS_CUDA)
+  # tell the host compiler where to find the CUDA includes and libraries
+  # CMakeFiles/<version>/CMakeCUDACompiler.cmake
+  target_include_directories(perfect INTERFACE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
+  target_link_directories(perfect INTERFACE ${CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES})
+  target_link_libraries(perfect INTERFACE nvidia-ml)
+endif()
 target_include_directories(perfect INTERFACE include/)

-# tell the host compiler where to find the CUDA includes and libraries
-target_include_directories(perfect INTERFACE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
-# CMakeFiles/<version>/CMakeCUDACompiler.cmake
-target_link_directories(perfect INTERFACE ${CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES})
-target_link_libraries(perfect INTERFACE nvidia-ml)
-

 add_subdirectory(examples)
 add_subdirectory(tools)
--- a/README.md
+++ b/README.md
@@ -16,6 +16,7 @@ CPU/GPU performance control library for benchmarking
 - [x] Set GPU clocks (nvidia)
 - [x] Disable GPU turbo (nvidia)
 - [x] Flush addresses from cache (amd64, POWER)
+- [x] CUDA not required (GPU functions will not be compiled)

 ## Installing

@@ -38,11 +39,21 @@ target_link_libraries(your-target perfect)
 ```

 ### Without CMake 
-Download the source and add the include directory to your includes and `nvidia-ml` to your link flags
+Download the source **AND**
+* for compiling with a non-cuda compiler:
+    * add the include directory to your includes
+    * add `nvidia-ml` to your link flags
+    * add `-DPERFECT_HAS_CUDA` to your compile definitions
+* with a CUDA compiler, just compile normally (`PERFECT_HAS_CUDA` is defined for you)

 ```
-g++ code_using_perfect.cpp -I perfect/include -l nvidia-ml
-nvcc code_using_perfect.cu -I perfect/include
+g++ code_using_perfect.cpp -DPERFECT_HAS_CUDA -Iperfect/include -lnvidia-ml 
+nvcc code_using_perfect.cu -Iperfect/include -lnvidia-ml
+```
+
+If you don't have CUDA, then you could just do
+```
+g++ code_using_perfect.cpp -I perfect/include
 ```

 ## Usage
@@ -67,11 +78,11 @@ See [examples/cpu_turbo.cpp].
 #include "perfect/cpu_turbo.hpp"
 ```

-* `Result get_cpu_turbo_state(CpuTurboState *state)`
-* `Result set_cpu_turbo_state(CpuTurboState *state)`
-* `Result disable_cpu_turbo()`
-* `Result enable_cpu_turbo()`
-* `bool is_turbo_enabled(CpuTurboState state)`
+* `Result get_cpu_turbo_state(CpuTurboState *state)`: save the current CPU turbo state
+* `Result set_cpu_turbo_state(CpuTurboState *state)`: restore a saved CPU turbo state
+* `Result disable_cpu_turbo()`: disable CPU turbo
+* `Result enable_cpu_turbo()`: enable CPU turbo
+* `bool is_turbo_enabled(CpuTurboState state)`: check if turbo is enabled

 ### OS Performance

@@ -130,6 +141,6 @@ See [examples/cpu_cache.cpp].

 ## Wish List

- [ ] Make CUDA Optional
+
 - [ ] Nvidia GPU power monitoring
 - [ ] Nivida GPU utilization monitoring
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -43,8 +43,12 @@ target_link_libraries(cpu-turbo perfect)
 add_executable(os-perf os_perf.cpp)
 target_link_libraries(os-perf perfect)

-add_executable(gpu-clocks gpu_clocks.cu)
-target_link_libraries(gpu-clocks perfect)
+if(CMAKE_CUDA_COMPILER)
+    add_executable(gpu-clocks gpu_clocks.cu)
+    target_link_libraries(gpu-clocks perfect)
+endif()

-add_executable(gpu-turbo gpu_turbo.cu)
-target_link_libraries(gpu-turbo perfect)
+if(CMAKE_CUDA_COMPILER)
+    add_executable(gpu-turbo gpu_turbo.cu)
+    target_link_libraries(gpu-turbo perfect)
+endif()
--- a/examples/os_perf.cpp
+++ b/examples/os_perf.cpp
@@ -3,24 +3,25 @@
 #include <map>

 int main(void) {
-    perfect::init();
+  perfect::init();

-    std::map<int, perfect::OsPerfState> states;
+  std::map<int, perfect::OsPerfState> states;

-    for (auto cpu : perfect::cpus()) {
-        perfect::OsPerfState state;
-        perfect::get_os_perf_state(&state, cpu);
-        states[cpu] = state;
-        perfect::os_perf_state_maximum(cpu);
+  for (auto cpu : perfect::cpus()) {
+    perfect::OsPerfState state;
+    perfect::Result result;
+    result = perfect::get_os_perf_state(&state, cpu);
+    if (perfect::Result::SUCCESS == result) {
+      states[cpu] = state;
    }
+    perfect::os_perf_state_maximum(cpu);
+  }

-    // do things with all CPUs set to the maximum performancem mode by the OS
-
-    for (auto kv : states) {
-        int cpu = kv.first;
-        perfect::OsPerfState state = kv.second;
-        perfect::set_os_perf_state(cpu, state);
-    }
-    
+  // do things with all CPUs set to the maximum performancem mode by the OS

+  for (auto kv : states) {
+    int cpu = kv.first;
+    perfect::OsPerfState state = kv.second;
+    perfect::set_os_perf_state(cpu, state);
+  }
 }
--- a/include/perfect/detail/os/linux.hpp
+++ b/include/perfect/detail/os/linux.hpp
@@ -1,10 +1,14 @@
 #pragma once

 #include <cassert>
+#include <cerrno>
 #include <fstream>
 #include <string>
 #include <vector>

+#include <cstring>
+#include <iostream>
+
 #include <sched.h>
 #include <sys/types.h>
 #include <unistd.h>
@@ -46,7 +50,14 @@ Result set_governor(const int cpu, const std::string &governor) {
  ofs << governor;
  ofs.close();
  if (ofs.fail()) {
-    return Result::NO_PERMISSION;
+    switch (errno) {
+    case EACCES:
+      return Result::NO_PERMISSION;
+    case ENOENT:
+      return Result::NOT_SUPPORTED;
+    default:
+      return Result::UNKNOWN;
+    }
  }
  return Result::SUCCESS;
 }
--- a/include/perfect/init.hpp
+++ b/include/perfect/init.hpp
@@ -1,6 +1,12 @@
 #pragma once

+#ifdef __NVCC__
+#define PERFECT_HAS_CUDA
+#endif
+
+#ifdef PERFECT_HAS_CUDA
 #include <nvml.h>
+#endif

 namespace perfect {

@@ -11,11 +17,13 @@ Result init() {
  if (init_)
    return Result::SUCCESS;

-  // init nvml
+// init nvml
+#ifdef PERFECT_HAS_CUDA
  nvmlReturn_t ret = nvmlInit();
  if (ret != NVML_SUCCESS) {
    return from_nvml(ret);
  }
+#endif

  // don't init again if init() called twice
  init_ = true;
--- a/include/perfect/result.hpp
+++ b/include/perfect/result.hpp
@@ -2,22 +2,27 @@

 #include <cassert>

+#ifdef __NVCC__
+#define PERFECT_HAS_CUDA
+#endif
+
+#ifdef PERFECT_HAS_CUDA
 #include <nvml.h>
+#endif

 namespace perfect {
+
 enum class Result {
-  SUCCESS,
-  NVML_NOT_SUPPORTED,
-  NVML_NO_PERMISSION,
-  NVML_UNINITIALIZED,
  NO_PERMISSION,
+  NOT_SUPPORTED,
+  NVML_NO_PERMISSION,
+  NVML_NOT_SUPPORTED,
+  NVML_UNINITIALIZED,
+  SUCCESS,
  UNKNOWN
 };

-
-
-
-
+#ifdef PERFECT_HAS_CUDA
 Result from_nvml(nvmlReturn_t nvml) {
  switch (nvml) {
  case NVML_SUCCESS:
@@ -26,6 +31,8 @@ Result from_nvml(nvmlReturn_t nvml) {
    return Result::NVML_UNINITIALIZED;
  case NVML_ERROR_NOT_SUPPORTED:
    return Result::NVML_NOT_SUPPORTED;
+  case NVML_ERROR_NO_PERMISSION:
+    return Result::NVML_NO_PERMISSION;
  case NVML_ERROR_INVALID_ARGUMENT:
  case NVML_ERROR_GPU_IS_LOST:
  case NVML_ERROR_UNKNOWN:
@@ -34,6 +41,7 @@ Result from_nvml(nvmlReturn_t nvml) {
  }
  return Result::UNKNOWN;
 }
+#endif

 const char *get_string(const Result &result) {
  switch (result) {
@@ -47,6 +55,8 @@ const char *get_string(const Result &result) {
    return "nvidia-ml returned not supported";
  case Result::NVML_NO_PERMISSION:
    return "nvidia-ml returned no permission";
+  case Result::NOT_SUPPORTED:
+    return "unsupported operation";
  default:
    assert(0 && "unexpected perfect::Result");
  }
@@ -58,7 +68,7 @@ const char *get_string(const Result &result) {
 inline void check(Result result, const char *file, const int line) {
  if (result != Result::SUCCESS) {
    fprintf(stderr, "%s@%d: perfect Error: %s\n", file, line,
-           get_string(result));
+            get_string(result));
    exit(-1);
  }
 }