Standalone now uses O2 optimization level instead of O3. Also removed -march=native since this causes issues if the program is compiled on a different architecture than it is run on. Since we do not do heavy arithmetic on the host side and the host code is not performance-critical part of the code, -march-native is not very useful anyways

2019-08-06 14:46:13 +03:00
parent da76fca0dc
commit 5f4246fb42
2 changed files with 2 additions and 3 deletions
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -12,8 +12,7 @@ set(CUDA_ARCH_FLAGS -gencode arch=compute_37,code=sm_37
                    -gencode arch=compute_61,code=sm_61
                    -lineinfo
                    -ftz=true # Flush denormalized floats to zero
-                    -std=c++11
+                    -std=c++11)
                    --compiler-options -march=native) # Native host machine code
                    #--maxrregcount=255
                    # -Xptxas -dlcm=ca opt-in to cache all global loads to L1/texture cache
                    # =cg to opt out
--- a/src/standalone/CMakeLists.txt
+++ b/src/standalone/CMakeLists.txt
@@ -21,7 +21,7 @@ if (BUILD_RT_VISUALIZATION)
 endif ()
 ## Compilation flags
-add_compile_options(-march=native -pipe ${OpenMP_CXX_FLAGS})
+add_compile_options(-O2 -pipe ${OpenMP_CXX_FLAGS})
 add_compile_options(-Wall -Wextra -Werror -Wdouble-promotion -Wfloat-conversion)# -Wshadow)
 ## Compile and link