Disabled a power-of-two error check as this may be too strict. See test_dims in autotest.cc which dimensions are guaranteed to work with reductions. Other dimensions may also work but are not yet extensively tested

2019-06-17 18:32:23 +03:00
parent 5e6cc9b8cc
commit f970dca68c
3 changed files with 8 additions and 8 deletions
--- a/config/astaroth.conf
+++ b/config/astaroth.conf
@@ -6,8 +6,8 @@
 * =============================================================================
 */
 AC_nx = 192
-AC_ny = 120
+AC_ny = 128
-AC_nz = 7
+AC_nz = 8
 AC_dsx = 0.04908738521
 AC_dsy = 0.04908738521
--- a/src/core/kernels/kernels.cuh
+++ b/src/core/kernels/kernels.cuh
@@ -1025,9 +1025,9 @@ reduce_vec(const cudaStream_t stream,
    // NOTE! Also does not work properly with non-power of two mesh dimension
    // Issue is with "smem[BLOCK_SIZE];". If you init smem to NANs, you can
    // see that uninitialized smem values are used in the comparison
-    ERRCHK_ALWAYS(is_power_of_two(nx));
+    //ERRCHK_ALWAYS(is_power_of_two(nx));
-    ERRCHK_ALWAYS(is_power_of_two(ny));
+    //ERRCHK_ALWAYS(is_power_of_two(ny));
-    ERRCHK_ALWAYS(is_power_of_two(nz));
+    //ERRCHK_ALWAYS(is_power_of_two(nz));
    if (rtype == RTYPE_MAX || rtype == RTYPE_MIN) {
        kernel_reduce_1of3_vec<dlength_vec><<<bpg, tpb, 0, stream>>>(vtxbuf0, vtxbuf1, vtxbuf2, scratchpad);
--- a/src/standalone/renderer.cc
+++ b/src/standalone/renderer.cc
@@ -153,8 +153,8 @@ draw_vertex_buffer(const AcMesh& mesh, const VertexBufferHandle& vertex_buffer,
    const float max = float(model_reduce_scal(mesh, RTYPE_MAX, vertex_buffer));
    const float min = float(model_reduce_scal(mesh, RTYPE_MIN, vertex_buffer));
    */
-    const float max = 1.f;//float(acReduceScal(RTYPE_MAX, vertex_buffer));
+    const float max = float(acReduceScal(RTYPE_MAX, vertex_buffer));
-    const float min = 0.f;//float(acReduceScal(RTYPE_MIN, vertex_buffer));
+    const float min = float(acReduceScal(RTYPE_MIN, vertex_buffer));
    const float range = fabsf(max - min);
    const float mid   = max - .5f * range;