Disabled the project-wide maxrregcount flag by default since it is only beneficial for resource-heavy kernels. The maximum register count should be defined per kernel instead if needed.
This commit is contained in:
@@ -19,7 +19,6 @@ set(CUDA_ARCH_FLAGS -gencode arch=compute_37,code=sm_37
|
|||||||
-gencode arch=compute_60,code=sm_60
|
-gencode arch=compute_60,code=sm_60
|
||||||
-gencode arch=compute_61,code=sm_61
|
-gencode arch=compute_61,code=sm_61
|
||||||
-lineinfo
|
-lineinfo
|
||||||
--maxrregcount=255
|
|
||||||
-ftz=true
|
-ftz=true
|
||||||
-std=c++11) #--maxrregcount=255 -ftz=true #ftz = flush denormalized floats to zero
|
-std=c++11) #--maxrregcount=255 -ftz=true #ftz = flush denormalized floats to zero
|
||||||
# -Xptxas -dlcm=ca opt-in to cache all global loads to L1/texture cache
|
# -Xptxas -dlcm=ca opt-in to cache all global loads to L1/texture cache
|
||||||
|
|||||||
Reference in New Issue
Block a user