Moved standalone from src to samples

2020-08-19 13:35:49 +03:00
parent 38e3fad023
commit e051d72091
24 changed files with 0 additions and 0 deletions
--- a/samples/standalone/CMakeLists.txt
+++ b/samples/standalone/CMakeLists.txt
@@ -0,0 +1,29 @@
+##############################################
+##  CMakeLists.txt for Astaroth Standalone  ##
+##############################################
+
+set(CMAKE_CXX_STANDARD 11)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+
+## Files
+file (GLOB SOURCES "*.cc" "model/*.cc")
+
+## Find packages
+find_package(OpenMP REQUIRED)
+if (BUILD_RT_VISUALIZATION)
+    add_definitions(-DAC_BUILD_RT_VISUALIZATION=1)
+    # SDL 2
+    set(SDL2_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/3rdparty/SDL2/include/)
+    set(SDL2_LIBRARY_DIR ${CMAKE_SOURCE_DIR}/3rdparty/SDL2/build/)
+    set(SDL2_LIBRARY "SDL2")
+    include_directories(${SDL2_INCLUDE_DIR})
+    link_directories(${SDL2_LIBRARY_DIR})
+endif ()
+
+# Compile and link
+add_library(astaroth_standalone STATIC ${SOURCES})
+target_link_libraries(astaroth_standalone PRIVATE OpenMP::OpenMP_CXX astaroth_core ${SDL2_LIBRARY})
+target_compile_options(astaroth_standalone PRIVATE -pipe -Wall -Wextra -Werror -Wdouble-promotion -Wfloat-conversion -Wshadow)
+
+add_executable(ac_run main.cc)
+target_link_libraries(ac_run PRIVATE astaroth_standalone)
--- a/samples/standalone/autotest.cc
+++ b/samples/standalone/autotest.cc
@@ -0,0 +1,772 @@
+/*
+   Copyright (C) 2014-2020, Johannes Pekkila, Miikka Vaisala.
+
+   This file is part of Astaroth.
+
+   Astaroth is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   Astaroth is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with Astaroth.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/**
+ * @file
+ * \brief Brief info.
+ *
+ * Detailed info.
+ *
+ */
+#include "run.h"
+
+#include <stdio.h>
+
+#include "config_loader.h"
+#include "math_utils.h"
+#include "model/host_forcing.h"
+#include "model/host_memory.h"
+#include "model/host_timestep.h"
+#include "model/model_boundconds.h"
+#include "model/model_reduce.h"
+#include "model/model_rk3.h"
+
+#include "errchk.h"
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+
+// Defines for colored output
+#define RED "\x1B[31m"
+#define GRN "\x1B[32m"
+#define YEL "\x1B[33m"
+#define BLU "\x1B[34m"
+#define MAG "\x1B[35m"
+#define CYN "\x1B[36m"
+#define WHT "\x1B[37m"
+#define RESET "\x1B[0m"
+
+#define GEN_TEST_RESULT (1) // Generate a test file always during testing
+
+typedef struct {
+    int x, y, z;
+} vec3i;
+
+typedef struct {
+    AcReal x, y, z;
+} vec3r;
+
+typedef struct {
+    ModelScalar model;
+    AcReal candidate;
+    ModelScalar error;
+} ErrorInfo;
+
+#define QUICK_TEST (0)
+#define THOROUGH_TEST (1)
+#define TEST_TYPE QUICK_TEST
+
+static const InitType test_cases[] = {INIT_TYPE_RANDOM, INIT_TYPE_XWAVE,
+                                      INIT_TYPE_GAUSSIAN_RADIAL_EXPL, INIT_TYPE_ABC_FLOW};
+// #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
+
+#ifndef AC_DOUBLE_PRECISION
+#define AC_DOUBLE_PRECISION (0)
+#endif
+
+static inline bool
+is_valid(const ModelScalar a)
+{
+    return !isnan(a) && !isinf(a);
+}
+
+#if TEST_TYPE ==                                                                                   \
+    QUICK_TEST // REGULAR TEST START HERE
+               // --------------------------------------------------------------------------------------------------------------
+static inline ModelScalar
+get_absolute_error(const ModelScalar& model, const AcReal& candidate)
+{
+    return fabsl(candidate - model);
+}
+
+static inline ModelScalar
+get_acceptable_absolute_error(const ModelScalar& range)
+{
+    // This is the upper limit, which assumes that both the min and max values
+    // are used in a calculation (which inherently leads to cancellation).
+    //
+    // AFAIK if this breaks, there is definitely something wrong with the code.
+    // Otherwise the error is so small it's indistiguishable from inherent
+    // inaccuracies in floating-point arithmetic.
+    return range * AC_REAL_EPSILON;
+}
+
+static inline ModelScalar
+get_acceptable_relative_error(void)
+{
+    return 30; // machine epsilons
+}
+
+static inline ModelScalar
+get_relative_error(const ModelScalar& model, const AcReal& candidate)
+{
+    ModelScalar error = NAN;
+
+#if 0
+	const ModelScalar abs_epsilon = get_acceptable_absolute_error(range);
+	if (fabsl(model) < abs_epsilon) { // Model is close to zero
+		/*
+		   if (fabsl(candidate - model) <= AC_REAL_EPSILON * fabsl(candidate))
+		   error = 0;
+		// Knuth section 4.2.2 pages 217-218 TODO
+		 */
+		if (fabsl(candidate) < abs_epsilon) // If candidate is close to zero
+			error = fabsl(candidate);       // return candidate itself
+		else
+			error = INFINITY;
+	}
+	else {
+		error = fabsl(1.0l - candidate / model);
+	}
+#endif
+    error = fabsl(1.0l - candidate / model);
+
+    // Return the relative error as multiples of the machine epsilon
+    // See Sect. Relative Error and Ulps in
+    // What Every Computer Scientist Should Know About Floating-Point Arithmetic
+    // By David Goldberg (1991)
+    return error / AC_REAL_EPSILON;
+}
+
+static bool
+verify(const ModelScalar& model, const AcReal& cand, const ModelScalar& range)
+{
+    if (!is_valid(model) || !is_valid(cand))
+        return false;
+
+    const ModelScalar relative_error = get_relative_error(model, cand);
+    if (relative_error < get_acceptable_relative_error())
+        return true;
+
+    const ModelScalar absolute_error = get_absolute_error(model, cand);
+    if (absolute_error < get_acceptable_absolute_error(range))
+        return true;
+
+    return false;
+}
+
+static ModelScalar
+get_reduction_range(const ModelMesh& mesh)
+{
+    ERRCHK(NUM_VTXBUF_HANDLES >= 3);
+
+    const ModelScalar max0     = model_reduce_scal(mesh, RTYPE_MAX, VertexBufferHandle(0));
+    const ModelScalar max1     = model_reduce_scal(mesh, RTYPE_MAX, VertexBufferHandle(1));
+    const ModelScalar max2     = model_reduce_scal(mesh, RTYPE_MAX, VertexBufferHandle(2));
+    const ModelScalar max_scal = max(max0, max(max1, max2));
+
+    const ModelScalar min0     = model_reduce_scal(mesh, RTYPE_MIN, VertexBufferHandle(0));
+    const ModelScalar min1     = model_reduce_scal(mesh, RTYPE_MIN, VertexBufferHandle(1));
+    const ModelScalar min2     = model_reduce_scal(mesh, RTYPE_MIN, VertexBufferHandle(2));
+    const ModelScalar min_scal = min(min0, min(min1, min2));
+
+    return max_scal - min_scal;
+}
+
+static void
+print_debug_info(const ModelScalar& model, const AcReal& candidate, const ModelScalar& range)
+{
+    printf("MeshPointInfo\n");
+    printf("\tModel: %e\n", double(model));
+    printf("\tCandidate: %e\n", double(candidate));
+    printf("\tRange: %e\n", double(range));
+
+    printf("\tAbsolute error: %Le (max acceptable: %Le)\n", get_absolute_error(model, candidate),
+           get_acceptable_absolute_error(range));
+    printf("\tRelative error: %Le (max acceptable: %Le)\n", get_relative_error(model, candidate),
+           get_acceptable_relative_error());
+    printf("\tIs acceptable: %d\n", verify(model, candidate, range));
+}
+
+static void
+print_result(const ModelScalar& model, const AcReal& candidate, const ModelScalar& range,
+             const char* name = "???")
+{
+    const ModelScalar rel_err = get_relative_error(model, candidate);
+    const ModelScalar abs_err = get_absolute_error(model, candidate);
+    if (!verify(model, candidate, range)) {
+        printf("\t%-12s... ", name);
+        printf(RED "FAIL! " RESET);
+    }
+    else {
+        printf("\t%-12s... ", name);
+        printf(GRN "OK! " RESET);
+    }
+
+    printf("(relative error: %.3Lg \u03B5, absolute error: %Lg)\n", rel_err, abs_err);
+    /*
+    // DEPRECATED: TODO remove
+    if (rel_err < get_acceptable_relative_error())
+    printf("(relative error: %Lg \u03B5, max accepted %Lg)\n", rel_err,
+    get_acceptable_relative_error());
+    else
+    printf("(absolute error: %Lg, max accepted %Lg)\n", abs_err,
+    get_acceptable_absolute_error(range));
+     */
+}
+
+static int
+check_reductions(const AcMeshInfo& config)
+{
+    printf("Testing reductions\n");
+    int num_failures = 0;
+
+    // Init CPU meshes
+    AcMesh* mesh         = acmesh_create(config);
+    ModelMesh* modelmesh = modelmesh_create(config);
+
+    // Init GPU meshes
+    acInit(config);
+
+    for (unsigned int i = 0; i < ARRAY_SIZE(test_cases); ++i) {
+        const InitType itype = test_cases[i];
+        printf("Checking %s...\n", init_type_names[InitType(itype)]);
+
+        // Init the mesh and figure out the acceptable range for error
+        acmesh_init_to(InitType(itype), mesh);
+
+        acmesh_to_modelmesh(*mesh, modelmesh);
+        const ModelScalar range = get_reduction_range(*modelmesh);
+
+        acLoad(*mesh);
+
+        for (int rtype = 0; rtype < NUM_REDUCTION_TYPES; ++rtype) {
+
+            if (rtype == RTYPE_SUM) {
+                // Skip SUM test for now. The failure is either caused by floating-point
+                // cancellation or an actual issue
+                WARNING("Skipping RTYPE_SUM test\n");
+                continue;
+            }
+
+            const VertexBufferHandle ftype = VTXBUF_UUX;
+
+            // Scal
+            ModelScalar model = model_reduce_scal(*modelmesh, ReductionType(rtype),
+                                                  VertexBufferHandle(ftype));
+            AcReal candidate  = acReduceScal(ReductionType(rtype), VertexBufferHandle(ftype));
+            print_result(model, candidate, range, "UUX scal");
+
+            bool is_acceptable = verify(model, candidate, range);
+            if (!is_acceptable) {
+                ++num_failures;
+
+                // Print debug info
+                printf("Scalar reduction type %d FAIL\n", rtype);
+                print_debug_info(model, candidate, range);
+            }
+
+            // Vec
+            model     = model_reduce_vec(*modelmesh, ReductionType(rtype), VTXBUF_UUX, VTXBUF_UUY,
+                                     VTXBUF_UUZ);
+            candidate = acReduceVec(ReductionType(rtype), VTXBUF_UUX, VTXBUF_UUY, VTXBUF_UUZ);
+            print_result(model, candidate, range, "UUXYZ vec");
+
+            is_acceptable = verify(model, candidate, range);
+            if (!is_acceptable) {
+                ++num_failures;
+
+                // Print debug info
+                printf("Vector reduction type %d FAIL\n", rtype);
+                print_debug_info(model, candidate, range);
+            }
+        }
+
+        printf("Acceptable relative error: < %Lg \u03B5, absolute error < %Lg\n",
+               get_acceptable_relative_error(), get_acceptable_absolute_error(range));
+    }
+    acQuit();
+    modelmesh_destroy(modelmesh);
+    acmesh_destroy(mesh);
+
+    return num_failures;
+}
+
+/** Finds the maximum and minimum in all meshes and computes the range.
+ * Note! Potentially dangerous if all meshes do not interact with each other.
+ * Otherwise the range may be too high.
+ */
+static ModelScalar
+get_data_range(const ModelMesh& model)
+{
+    ModelScalar vertex_buffer_max_all = -INFINITY;
+    ModelScalar vertex_buffer_min_all = INFINITY;
+    for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w) {
+        const ModelScalar vertex_buffer_max = model_reduce_scal(model, RTYPE_MAX,
+                                                                VertexBufferHandle(w));
+        const ModelScalar vertex_buffer_min = model_reduce_scal(model, RTYPE_MIN,
+                                                                VertexBufferHandle(w));
+
+        if (vertex_buffer_max > vertex_buffer_max_all)
+            vertex_buffer_max_all = vertex_buffer_max;
+        if (vertex_buffer_min < vertex_buffer_min_all)
+            vertex_buffer_min_all = vertex_buffer_min;
+    }
+    return fabsl(vertex_buffer_max_all - vertex_buffer_min_all);
+}
+
+// #define GEN_TEST_RESULT
+#if GEN_TEST_RESULT == 1
+static FILE* test_result = NULL;
+#endif
+
+static bool
+verify_meshes(const ModelMesh& model, const AcMesh& candidate)
+{
+    bool retval = true;
+
+#if GEN_TEST_RESULT == 1
+    ErrorInfo err = ErrorInfo();
+#endif
+
+    const ModelScalar range = get_data_range(model);
+    for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w) {
+        const size_t n = acVertexBufferSize(model.info);
+
+        // Maximum errors
+        ErrorInfo max_abs_error = ErrorInfo();
+        ErrorInfo max_rel_error = ErrorInfo();
+
+        for (size_t i = 0; i < n; ++i) {
+            const ModelScalar model_val = model.vertex_buffer[VertexBufferHandle(w)][i];
+            const AcReal cand_val       = candidate.vertex_buffer[VertexBufferHandle(w)][i];
+
+            if (!verify(model_val, cand_val, range)) {
+                const int i0 = i % model.info.int_params[AC_mx];
+                const int j0 = ((i %
+                                 (model.info.int_params[AC_mx] * model.info.int_params[AC_my])) /
+                                model.info.int_params[AC_mx]);
+                const int k0 = i / (model.info.int_params[AC_mx] * model.info.int_params[AC_my]);
+                printf("Index (%d, %d, %d)\n", i0, j0, k0);
+                print_debug_info(model_val, cand_val, range);
+                retval = false;
+                printf("Breaking\n");
+                break;
+            }
+
+            const ModelScalar abs_error = get_absolute_error(model_val, cand_val);
+            if (abs_error > max_abs_error.error) {
+                max_abs_error.error     = abs_error;
+                max_abs_error.model     = model_val;
+                max_abs_error.candidate = cand_val;
+            }
+
+            const ModelScalar rel_error = get_relative_error(model_val, cand_val);
+            if (rel_error > max_rel_error.error) {
+                max_rel_error.error     = rel_error;
+                max_rel_error.model     = model_val;
+                max_rel_error.candidate = cand_val;
+            }
+
+#if GEN_TEST_RESULT == 1
+            if (abs_error > err.error) {
+                err.error     = abs_error;
+                err.model     = model_val;
+                err.candidate = cand_val;
+            }
+#endif
+        }
+        // print_result(max_rel_error.model, max_rel_error.candidate, range,
+        // vtxbuf_names[VertexBufferHandle(w)]);
+        print_result(max_abs_error.model, max_abs_error.candidate, range,
+                     vtxbuf_names[VertexBufferHandle(w)]);
+    }
+
+#if GEN_TEST_RESULT == 1
+    const ModelScalar rel_err = get_relative_error(err.model, err.candidate);
+    const ModelScalar abs_err = get_absolute_error(err.model, err.candidate);
+    fprintf(test_result, "%.3Lg & %.3Lg\n", abs_err, rel_err);
+#endif
+
+    printf("Acceptable relative error: < %Lg \u03B5, absolute error < %Lg\n",
+           get_acceptable_relative_error(), get_acceptable_absolute_error(range));
+
+    return retval;
+}
+
+int
+check_rk3(const AcMeshInfo& mesh_info)
+{
+    const int num_iterations = 1; // Note: should work up to at least 15 steps
+    printf("Testing RK3 (running %d steps before checking the result)\n", num_iterations);
+    int num_failures = 0;
+
+    // Init CPU meshes
+    AcMesh* gpu_mesh      = acmesh_create(mesh_info);
+    ModelMesh* model_mesh = modelmesh_create(mesh_info);
+
+    // Init GPU meshes
+    acInit(mesh_info);
+
+    for (unsigned int i = 0; i < ARRAY_SIZE(test_cases); ++i) {
+        const InitType itype = test_cases[i];
+        printf("Checking %s...\n", init_type_names[InitType(itype)]);
+
+        // Init the mesh and figure out the acceptable range for error
+        acmesh_init_to(InitType(itype), gpu_mesh);
+
+        acLoad(*gpu_mesh);
+        acmesh_to_modelmesh(*gpu_mesh, model_mesh);
+
+        acBoundcondStep();
+        boundconds(model_mesh->info, model_mesh);
+
+        for (int j = 0; j < num_iterations; ++j) {
+            // const AcReal umax = AcReal(acReduceVec(RTYPE_MAX, VTXBUF_UUX, VTXBUF_UUY,
+            // VTXBUF_UUZ));
+            // const AcReal dt   = host_timestep(umax, mesh_info);
+            const AcReal dt = AcReal(1e-2); // Use a small constant timestep to avoid instabilities
+
+#if LFORCING
+            const ForcingParams forcing_params = generateForcingParams(model_mesh->info);
+            loadForcingParamsToHost(forcing_params, model_mesh);
+            loadForcingParamsToDevice(forcing_params);
+#endif
+
+            acIntegrate(dt);
+
+            model_rk3(dt, model_mesh);
+        }
+        boundconds(model_mesh->info, model_mesh);
+        acBoundcondStep();
+        acStore(gpu_mesh);
+
+        bool is_acceptable = verify_meshes(*model_mesh, *gpu_mesh);
+        if (!is_acceptable) {
+            ++num_failures;
+        }
+    }
+
+    acQuit();
+    acmesh_destroy(gpu_mesh);
+    modelmesh_destroy(model_mesh);
+
+    return num_failures;
+}
+
+int
+run_autotest(const char* config_path)
+{
+#if GEN_TEST_RESULT == 1
+    char testresult_path[256];
+    sprintf(testresult_path, "%s_fullstep_testresult.out",
+            AC_DOUBLE_PRECISION ? "double" : "float");
+
+    test_result = fopen(testresult_path, "w");
+    ERRCHK(test_result);
+
+    fprintf(test_result, "n, max abs error, corresponding rel error\n");
+#endif
+
+    /* Parse configs */
+    AcMeshInfo config;
+    load_config(config_path, &config);
+
+    if (STENCIL_ORDER > 6)
+        printf("WARNING!!! If the stencil order is larger than the computational domain some "
+               "vertices may be done twice (f.ex. doing inner and outer domains separately and "
+               "some of the front/back/left/right/etc slabs collide). The mesh must be large "
+               "enough s.t. this doesn't happen.");
+
+    const vec3i test_dims[] = {{32, 32, 32}, {64, 32, 32}, {32, 64, 32}, {32, 32, 64},
+                               {64, 64, 32}, {64, 32, 64}, {32, 64, 64}};
+
+    int num_failures = 0;
+    for (size_t i = 0; i < ARRAY_SIZE(test_dims); ++i) {
+        config.int_params[AC_nx] = test_dims[i].x;
+        config.int_params[AC_ny] = test_dims[i].y;
+        config.int_params[AC_nz] = test_dims[i].z;
+        update_config(&config);
+
+        printf("Testing mesh (%d, %d, %d):\n", //
+               test_dims[i].x, test_dims[i].y, test_dims[i].z);
+
+        num_failures += check_reductions(config);
+        fflush(stdout);
+    }
+
+    for (size_t i = 0; i < ARRAY_SIZE(test_dims); ++i) {
+        config.int_params[AC_nx] = test_dims[i].x;
+        config.int_params[AC_ny] = test_dims[i].y;
+        config.int_params[AC_nz] = test_dims[i].z;
+        update_config(&config);
+
+        printf("Testing mesh (%d, %d, %d):\n", //
+               test_dims[i].x, test_dims[i].y, test_dims[i].z);
+
+        num_failures += check_rk3(config);
+        fflush(stdout);
+    }
+
+    printf("\n--------Testing done---------\n");
+    printf("Failures found: %d\n", num_failures);
+
+#if GEN_TEST_RESULT == 1
+    fflush(test_result);
+    fclose(test_result);
+#endif
+
+    if (num_failures > 0)
+        return EXIT_FAILURE;
+    else
+        return EXIT_SUCCESS;
+}
+
+#elif TEST_TYPE ==                                                                                 \
+    THOROUGH_TEST // GEN TEST FILE START HERE
+                  // --------------------------------------------------------------------------------------------------------------
+typedef struct {
+    ModelScalar model;
+    AcReal candidate;
+    ModelScalar abs_error;
+    ModelScalar ulp_error;
+    ModelScalar rel_error;
+    ModelScalar maximum_magnitude;
+    ModelScalar minimum_magnitude;
+} Error;
+
+Error
+get_error(ModelScalar model, AcReal candidate)
+{
+    Error error;
+    error.abs_error = 0;
+
+    error.model     = model;
+    error.candidate = candidate;
+
+    if (error.model == error.candidate || fabsl(model - candidate) == 0) { // If exact
+        error.abs_error = 0;
+        error.rel_error = 0;
+        error.ulp_error = 0;
+    }
+    else if (!is_valid(error.model) || !is_valid(error.candidate)) {
+        error.abs_error = INFINITY;
+        error.rel_error = INFINITY;
+        error.ulp_error = INFINITY;
+    }
+    else {
+        const int base = 2;
+        const int p    = sizeof(AcReal) == 4 ? 24 : 53; // Bits in the significant
+
+        const ModelScalar e = floorl(logl(fabsl(error.model)) / logl(2));
+
+        const ModelScalar ulp             = powl(base, e - (p - 1));
+        const ModelScalar machine_epsilon = 0.5 * powl(base, -(p - 1));
+        error.abs_error                   = fabsl(model - candidate);
+        error.ulp_error                   = error.abs_error / ulp;
+        error.rel_error                   = fabsl(1.0l - candidate / model) / machine_epsilon;
+    }
+
+    return error;
+}
+
+Error
+get_max_abs_error_mesh(const ModelMesh& model_mesh, const AcMesh& candidate_mesh)
+{
+    Error error;
+    error.abs_error = -1;
+
+    for (size_t j = 0; j < NUM_VTXBUF_HANDLES; ++j) {
+        for (size_t i = 0; i < acVertexBufferSize(model_mesh.info); ++i) {
+            Error curr_error = get_error(model_mesh.vertex_buffer[j][i],
+                                         candidate_mesh.vertex_buffer[j][i]);
+            if (curr_error.abs_error > error.abs_error)
+                error = curr_error;
+        }
+    }
+
+    error.maximum_magnitude = -1; // Not calculated.
+    error.minimum_magnitude = -1; // Not calculated.
+
+    return error;
+}
+
+static ModelScalar
+get_maximum_magnitude(const ModelScalar* field, const AcMeshInfo info)
+{
+    ModelScalar maximum = -INFINITY;
+
+    for (size_t i = 0; i < acVertexBufferSize(info); ++i)
+        maximum = max(maximum, fabsl(field[i]));
+
+    return maximum;
+}
+
+static ModelScalar
+get_minimum_magnitude(const ModelScalar* field, const AcMeshInfo info)
+{
+    ModelScalar minimum = INFINITY;
+
+    for (size_t i = 0; i < acVertexBufferSize(info); ++i)
+        minimum = min(minimum, fabsl(field[i]));
+
+    return minimum;
+}
+
+Error
+get_max_abs_error_vtxbuf(const VertexBufferHandle vtxbuf_handle, const ModelMesh& model_mesh,
+                         const AcMesh& candidate_mesh)
+{
+    ModelScalar* model_vtxbuf = model_mesh.vertex_buffer[vtxbuf_handle];
+    AcReal* candidate_vtxbuf  = candidate_mesh.vertex_buffer[vtxbuf_handle];
+
+    Error error;
+    error.abs_error = -1;
+
+    for (size_t i = 0; i < acVertexBufferSize(model_mesh.info); ++i) {
+
+        Error curr_error = get_error(model_vtxbuf[i], candidate_vtxbuf[i]);
+
+        if (curr_error.abs_error > error.abs_error)
+            error = curr_error;
+    }
+
+    error.maximum_magnitude = get_maximum_magnitude(model_vtxbuf, model_mesh.info);
+    error.minimum_magnitude = get_minimum_magnitude(model_vtxbuf, model_mesh.info);
+
+    return error;
+}
+
+void
+print_error_to_file(const char* path, const int n, const Error error)
+{
+    FILE* file = fopen(path, "a");
+    fprintf(file, "%d, %Lg, %Lg, %Lg, %Lg, %Lg\n", n, error.ulp_error, error.abs_error,
+            error.rel_error, error.maximum_magnitude, error.minimum_magnitude);
+    // fprintf(file, "%d, %Lg, %Lg, %Lg, %Lg, %Lg\n", n, error.maximum_magnitude,
+    // error.minimum_magnitude, error.abs_error, error.ulp_error, error.rel_error);
+    fclose(file);
+}
+
+#define MAX_PATH_LEN (256)
+
+int
+run_autotest(void)
+{
+
+#define N_MIN (32)
+#define N_MAX (512)
+    for (int n = N_MIN; n <= N_MAX; n += N_MIN) {
+        AcMeshInfo config;
+        load_config(&config);
+        config.int_params[AC_nx] = config.int_params[AC_ny] = config.int_params[AC_nz] = n;
+        update_config(&config);
+
+        // Init host
+        AcMesh* candidate_mesh = acmesh_create(config);
+        ModelMesh* model_mesh  = modelmesh_create(config);
+
+        // Init device
+        acInit(config);
+
+        // Check all initial conditions
+        for (int i = 0; i < ARRAY_SIZE(test_cases); ++i) {
+            const InitType init_type = test_cases[i];
+            acmesh_init_to((InitType)init_type, candidate_mesh);
+            acmesh_to_modelmesh(*candidate_mesh, model_mesh); // Load to Host
+            acLoad(*candidate_mesh);                          // Load to Device
+
+            boundconds(model_mesh->info, model_mesh);
+            acBoundcondStep();
+
+            { // Check boundconds
+                acStore(candidate_mesh);
+                Error boundcond_error = get_max_abs_error_mesh(*model_mesh, *candidate_mesh);
+                char boundcond_path[MAX_PATH_LEN];
+                sprintf(boundcond_path, "%s_boundcond_%s.testresult",
+                        AC_DOUBLE_PRECISION ? "double" : "float",
+                        init_type_names[(InitType)init_type]);
+                print_error_to_file(boundcond_path, n, boundcond_error);
+            }
+
+            { // Check scalar max reduction
+                ModelScalar model         = model_reduce_scal(*model_mesh, (ReductionType)RTYPE_MAX,
+                                                      VTXBUF_UUX);
+                AcReal candidate          = acReduceScal((ReductionType)RTYPE_MAX, VTXBUF_UUX);
+                Error scalar_reduce_error = get_error(model, candidate);
+                char scalar_reduce_path[MAX_PATH_LEN];
+                sprintf(scalar_reduce_path, "%s_scalar_reduce_%s.testresult",
+                        AC_DOUBLE_PRECISION ? "double" : "float",
+                        init_type_names[(InitType)init_type]);
+                print_error_to_file(scalar_reduce_path, n, scalar_reduce_error);
+            }
+
+            { // Check vector max reduction
+                ModelScalar model = model_reduce_vec(*model_mesh, (ReductionType)RTYPE_MAX,
+                                                     VTXBUF_UUX, VTXBUF_UUY, VTXBUF_UUZ);
+                AcReal candidate  = acReduceVec((ReductionType)RTYPE_MAX, VTXBUF_UUX, VTXBUF_UUY,
+                                               VTXBUF_UUZ);
+                Error vector_reduce_error = get_error(model, candidate);
+                char vector_reduce_path[MAX_PATH_LEN];
+                sprintf(vector_reduce_path, "%s_vector_reduce_%s.testresult",
+                        AC_DOUBLE_PRECISION ? "double" : "float",
+                        init_type_names[(InitType)init_type]);
+                print_error_to_file(vector_reduce_path, n, vector_reduce_error);
+            }
+
+            // Time advance
+            {
+                const AcReal umax = (AcReal)model_reduce_vec(*model_mesh, (ReductionType)RTYPE_MAX,
+                                                             VTXBUF_UUX, VTXBUF_UUY, VTXBUF_UUZ);
+                const AcReal dt   = host_timestep(umax, config);
+
+#if LFORCING
+
+                // CURRENTLY AUTOTEST NOT SUPPORTED WITH FORCING!!!
+
+#endif
+
+                // Host integration step
+                model_rk3(dt, model_mesh);
+                boundconds(config, model_mesh);
+
+                // Device integration step
+                acIntegrate(dt);
+                acBoundcondStep();
+                acStore(candidate_mesh);
+
+                // Check fields
+                for (int vtxbuf_handle = 0; vtxbuf_handle < NUM_VTXBUF_HANDLES; ++vtxbuf_handle) {
+                    Error field_error = get_max_abs_error_vtxbuf((VertexBufferHandle)vtxbuf_handle,
+                                                                 *model_mesh, *candidate_mesh);
+
+                    printf("model %Lg, cand %Lg, abs %Lg, rel %Lg\n",
+                           (ModelScalar)field_error.model, (ModelScalar)field_error.candidate,
+                           (ModelScalar)field_error.abs_error, (ModelScalar)field_error.rel_error);
+
+                    char field_path[MAX_PATH_LEN];
+                    sprintf(field_path, "%s_integrationstep_%s_%s.testresult",
+                            AC_DOUBLE_PRECISION ? "double" : "float",
+                            init_type_names[(InitType)init_type],
+                            vtxbuf_names[(VertexBufferHandle)vtxbuf_handle]);
+                    print_error_to_file(field_path, n, field_error);
+                }
+            }
+        }
+
+        // Deallocate host
+        acmesh_destroy(candidate_mesh);
+        modelmesh_destroy(model_mesh);
+
+        // Deallocate device
+        acQuit();
+    }
+
+    return 0;
+}
+#endif
--- a/samples/standalone/benchmark.cc
+++ b/samples/standalone/benchmark.cc
@@ -0,0 +1,386 @@
+/*
+    Copyright (C) 2014-2020, Johannes Pekkila, Miikka Vaisala.
+
+    This file is part of Astaroth.
+
+    Astaroth is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    Astaroth is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Astaroth.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file
+ * \brief Brief info.
+ *
+ * Detailed info.
+ *
+ */
+#include "run.h"
+
+#include <stdlib.h> // EXIT_SUCCESS
+
+#include "config_loader.h"
+#include "model/host_memory.h"
+#include "model/host_timestep.h"
+#include "model/model_reduce.h"
+#include "model/model_rk3.h"
+#include "timer_hires.h"
+
+#include "errchk.h"
+#include <algorithm>
+#include <math.h>
+#include <vector>
+
+static bool
+smaller_than(const double& a, const double& b)
+{
+    return a < b;
+}
+
+int
+run_benchmark(const char* config_path)
+{
+    const int nn        = 512;
+    const int num_iters = 100;
+#define BENCH_STRONG_SCALING (1)
+    const int num_processes = acGetNumDevicesPerNode();
+
+    AcMeshInfo mesh_info;
+    load_config(config_path, &mesh_info);
+    mesh_info.int_params[AC_nx] = mesh_info.int_params[AC_ny] = nn;
+    mesh_info.int_params[AC_nz] = BENCH_STRONG_SCALING ? nn : nn * num_processes;
+    update_config(&mesh_info);
+
+    AcMesh* mesh = acmesh_create(mesh_info);
+    acmesh_init_to(INIT_TYPE_ABC_FLOW, mesh);
+
+    acInit(mesh_info);
+    acLoad(*mesh);
+
+    std::vector<double> results;
+    results.reserve(num_iters);
+
+    // Warmup
+    for (int i = 0; i < 10; ++i) {
+        acIntegrate(0);
+    }
+    acSynchronize();
+    const AcReal dt = FLT_EPSILON;
+    printf("Using dt = %g\n", (double)dt);
+
+    Timer total_time;
+    timer_reset(&total_time);
+
+    Timer step_time;
+    for (int i = 0; i < num_iters; ++i) {
+        timer_reset(&step_time);
+
+        acIntegrate(dt);
+        acSynchronize();
+
+        results.push_back(timer_diff_nsec(step_time) / 1e6);
+    }
+    acSynchronize();
+    const double ms_elapsed     = timer_diff_nsec(total_time) / 1e6;
+    const double nth_percentile = 0.90;
+    std::sort(results.begin(), results.end(), smaller_than);
+
+    printf("vertices: %d^3, iterations: %d\n", nn, num_iters);
+    printf("Total time: %f ms\n", ms_elapsed);
+    printf("Time per step: %f ms\n", ms_elapsed / num_iters);
+
+    const size_t nth_index = int(nth_percentile * num_iters);
+    printf("%dth percentile per step: %f ms\n", int(100 * nth_percentile), results[nth_index]);
+
+    // Write out
+    char buf[256];
+    sprintf(buf, "nprocs_%d_result_%s.bench", num_processes,
+            BENCH_STRONG_SCALING ? "strong" : "weak");
+    FILE* fp = fopen(buf, "w");
+    ERRCHK_ALWAYS(fp);
+    fprintf(fp, "num_processes, percentile (%dth)\n", int(100 * nth_percentile));
+    fprintf(fp, "%d, %g\n", num_processes, results[nth_index]);
+    fclose(fp);
+
+    acQuit();
+    acmesh_destroy(mesh);
+
+    return AC_SUCCESS;
+}
+
+#if 0 // Old single-GPU benchmark
+static bool
+smaller_than(const double& a, const double& b)
+{
+    return a < b;
+}
+
+static int
+write_runningtimes(const char* path, const int n, const double min, const double max,
+                   const double median, const double perc)
+{
+    FILE* fp;
+    fp = fopen(path, "a");
+
+    if (fp != NULL) {
+        fprintf(fp, "%d, %f, %f, %f, %f\n", n, min, max, median, perc);
+        fclose(fp);
+        return EXIT_SUCCESS;
+    }
+    return EXIT_FAILURE;
+}
+
+static int
+write_percentiles(const char* path, const int num_iters, const std::vector<double>& results)
+{
+    FILE* fp;
+    fp = fopen(path, "w");
+
+    if (fp != NULL) {
+        for (int i = 0; i < 100; ++i) {
+            fprintf(fp, "%f\n", results[(long unsigned)((i / 100.) * num_iters)]);
+        }
+        fclose(fp);
+        return EXIT_SUCCESS;
+    }
+    return EXIT_FAILURE;
+}
+
+int
+run_benchmark(void)
+{
+    char runningtime_path[256];
+    sprintf(runningtime_path, "%s_%s_runningtimes.out", AC_DOUBLE_PRECISION ? "double" : "float",
+            GEN_BENCHMARK_RK3 ? "rk3substep" : "fullstep");
+
+    FILE* fp;
+    fp = fopen(runningtime_path, "w");
+
+    if (fp != NULL) {
+        fprintf(fp, "n, min, max, median, perc\n");
+        fclose(fp);
+    }
+    else {
+        return EXIT_FAILURE;
+    }
+
+#define N_STEP_SIZE (128)
+#define MAX_MESH_DIM (128)
+#define NUM_ITERS (100)
+    for (int n = N_STEP_SIZE; n <= MAX_MESH_DIM; n += N_STEP_SIZE) {
+        /* Parse configs */
+        AcMeshInfo mesh_info;
+        load_config(&mesh_info);
+        mesh_info.int_params[AC_nx] = n;
+        mesh_info.int_params[AC_ny] = mesh_info.int_params[AC_nx];
+        mesh_info.int_params[AC_nz] = mesh_info.int_params[AC_nx];
+        update_config(&mesh_info);
+
+        AcMesh* mesh = acmesh_create(mesh_info);
+        acmesh_init_to(INIT_TYPE_ABC_FLOW, mesh);
+
+        acInit(mesh_info);
+        acLoad(*mesh);
+
+        std::vector<double> results;
+        results.reserve(NUM_ITERS);
+
+        // Optimize
+        // acAutoOptimize();
+
+        // Warmup
+        for (int i = 0; i < 10; ++i) {
+            acIntegrate(0);
+        }
+
+        Timer t;
+        for (int i = 0; i < NUM_ITERS; ++i) {
+
+            timer_reset(&t);
+            const AcReal dt = FLT_EPSILON; // TODO NOTE: time to timestep not measured
+#if GEN_BENCHMARK_RK3 == 1
+            acIntegrateStep(2, dt);
+            acSynchronizeStream(STREAM_ALL);
+#else // GEN_BENCHMARK_FULL
+            acIntegrate(dt);
+#endif
+            const double ms_elapsed = timer_diff_nsec(t) / 1e6;
+            results.push_back(ms_elapsed);
+        }
+
+#define NTH_PERCENTILE (0.95)
+        std::sort(results.begin(), results.end(), smaller_than);
+        write_runningtimes(runningtime_path, n, results[0], results[results.size() - 1],
+                           results[int(0.5 * NUM_ITERS)], results[int(NTH_PERCENTILE * NUM_ITERS)]);
+
+        char percentile_path[256];
+        sprintf(percentile_path, "%d_%s_%s_percentiles.out", n,
+                AC_DOUBLE_PRECISION ? "double" : "float",
+                GEN_BENCHMARK_RK3 ? "rk3substep" : "fullstep");
+        write_percentiles(percentile_path, NUM_ITERS, results);
+
+        printf("%s running time %g ms, (%dth percentile, nx = %d) \n",
+               GEN_BENCHMARK_RK3 ? "RK3 step" : "Fullstep",
+               double(results[int(NTH_PERCENTILE * NUM_ITERS)]), int(NTH_PERCENTILE * 100),
+               mesh_info.int_params[AC_nx]);
+
+        acQuit();
+        acmesh_destroy(mesh);
+    }
+
+    return 0;
+}
+#endif // single-GPU benchmark
+
+/*
+
+#if AUTO_OPTIMIZE
+const char* benchmark_path = "benchmark.out";
+
+#include "kernels/rk3_threadblock.conf"
+static int
+write_result_to_file(const float& ms_per_step)
+{
+    FILE* fp;
+    fp = fopen(benchmark_path, "a");
+
+    if (fp != NULL) {
+        fprintf(fp,
+                "(%d, %d, %d), %d elems per thread, launch bound %d, %f ms\n",
+                RK_THREADS_X, RK_THREADS_Y, RK_THREADS_Z, RK_ELEMS_PER_THREAD,
+                RK_LAUNCH_BOUND_MIN_BLOCKS, double(ms_per_step));
+        fclose(fp);
+        return EXIT_SUCCESS;
+    }
+    return EXIT_FAILURE;
+}
+#endif
+
+#if GENERATE_BENCHMARK_DATA != 1
+int
+run_benchmark(void)
+{
+    // Parse configs
+    AcMeshInfo mesh_info;
+    load_config(&mesh_info);
+    mesh_info.int_params[AC_nx] = 128;
+    mesh_info.int_params[AC_ny] = mesh_info.int_params[AC_nx];
+    mesh_info.int_params[AC_nz] = mesh_info.int_params[AC_nx];
+    update_config(&mesh_info);
+
+    AcMesh* mesh = acmesh_create(mesh_info);
+    acmesh_init_to(INIT_TYPE_ABC_FLOW, mesh);
+
+    acInit(mesh_info);
+    acLoad(*mesh);
+
+    Timer t;
+    timer_reset(&t);
+
+    int steps           = 0;
+    const int num_steps = 100;
+    while (steps < num_steps) {
+        // Advance the simulation
+        const AcReal umax = acReduceVec(RTYPE_MAX, VTXBUF_UUX, VTXBUF_UUY,
+                                        VTXBUF_UUZ);
+        const AcReal dt   = host_timestep(umax, mesh_info);
+        acIntegrate(dt);
+        ++steps;
+    }
+    acSynchronize();
+    const float wallclock = timer_diff_nsec(t) / 1e9f;
+    printf("%d steps. Wallclock time %f s per step\n", steps,
+           double(wallclock) / num_steps);
+    #if AUTO_OPTIMIZE
+    write_result_to_file(wallclock * 1e3f / steps);
+    #endif
+
+    acQuit();
+    acmesh_destroy(mesh);
+
+    return 0;
+}
+
+#else
+//////////////////////////////////////////////////////////////////////////GENERATE_BENCHMARK_DATA
+
+
+
+
+int
+run_benchmark(void)
+{
+    const char path[] = "result.out";
+    FILE* fp;
+    fp = fopen(path, "w");
+
+    if (fp != NULL) {
+        fprintf(fp, "n, min, max, median, perc\n");
+        fclose(fp);
+    } else {
+        return EXIT_FAILURE;
+    }
+
+    #define N_STEP_SIZE (256)
+    #define MAX_MESH_DIM (256)
+    #define NUM_ITERS (1000)
+    for (int n = N_STEP_SIZE; n <= MAX_MESH_DIM; n += N_STEP_SIZE) {
+        // Parse configs
+        AcMeshInfo mesh_info;
+        load_config(&mesh_info);
+        mesh_info.int_params[AC_nx] = n;
+        mesh_info.int_params[AC_ny] = mesh_info.int_params[AC_nx];
+        mesh_info.int_params[AC_nz] = mesh_info.int_params[AC_nx];
+        update_config(&mesh_info);
+
+        AcMesh* mesh = acmesh_create(mesh_info);
+        acmesh_init_to(INIT_TYPE_ABC_FLOW, mesh);
+
+        acInit(mesh_info);
+        acLoad(*mesh);
+
+        std::vector<double> results;
+        results.reserve(NUM_ITERS);
+
+
+        // Warmup
+        for (int i = 0; i < 10; ++i) {
+            acIntegrate(0);
+            acSynchronize();
+        }
+
+        Timer t;
+
+        const AcReal dt = AcReal(1e-5);
+        for (int i = 0; i < NUM_ITERS; ++i) {
+
+            timer_reset(&t);
+            //acIntegrate(dt);
+            acIntegrateStep(2, dt);
+            acSynchronize();
+
+            const double ms_elapsed = timer_diff_nsec(t) / 1e6;
+            results.push_back(ms_elapsed);
+        }
+
+
+
+        #define NTH_PERCENTILE (0.95)
+        std::sort(results.begin(), results.end(), smaller_than);
+        write_result(n, results[0], results[results.size()-1], results[int(0.5 * NUM_ITERS)],
+results[int(NTH_PERCENTILE * NUM_ITERS)]); write_percentiles(n, NUM_ITERS, results);
+    }
+
+    return 0;
+}
+#endif
+*/
--- a/samples/standalone/config_loader.cc
+++ b/samples/standalone/config_loader.cc
@@ -0,0 +1,184 @@
+/*
+    Copyright (C) 2014-2020, Johannes Pekkila, Miikka Vaisala.
+
+    This file is part of Astaroth.
+
+    Astaroth is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    Astaroth is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Astaroth.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file
+ * \brief Brief info.
+ *
+ * Detailed info.
+ *
+ */
+#include "config_loader.h"
+
+#include <limits.h> // UINT_MAX
+#include <stdint.h> // uint8_t, uint32_t
+#include <stdio.h>  // print
+#include <string.h> // memset
+
+#include "errchk.h"
+#include "math_utils.h"
+
+/**
+ \brief Find the index of the keyword in names
+ \return Index in range 0...n if the keyword is in names. -1 if the keyword was
+ not found.
+ */
+static int
+find_str(const char keyword[], const char* names[], const int& n)
+{
+    for (int i = 0; i < n; ++i)
+        if (!strcmp(keyword, names[i]))
+            return i;
+
+    return -1;
+}
+
+static void
+parse_config(const char* path, AcMeshInfo* config)
+{
+    FILE* fp;
+    fp = fopen(path, "r");
+    // For knowing which .conf file will be used
+    printf("Config file path: \n %s \n ", path);
+    ERRCHK(fp != NULL);
+
+    const size_t BUF_SIZE = 128;
+    char keyword[BUF_SIZE];
+    char value[BUF_SIZE];
+    int items_matched;
+    while ((items_matched = fscanf(fp, "%s = %s", keyword, value)) != EOF) {
+
+        if (items_matched < 2)
+            continue;
+
+        int idx = -1;
+        if ((idx = find_str(keyword, intparam_names, NUM_INT_PARAMS)) >= 0)
+            config->int_params[idx] = atoi(value);
+        else if ((idx = find_str(keyword, realparam_names, NUM_REAL_PARAMS)) >= 0)
+            config->real_params[idx] = AcReal(atof(value));
+    }
+
+    fclose(fp);
+}
+
+void
+update_config(AcMeshInfo* config)
+{
+    config->int_params[AC_mx] = config->int_params[AC_nx] + STENCIL_ORDER;
+    ///////////// PAD TEST
+    // config->int_params[AC_mx] = config->int_params[AC_nx] + STENCIL_ORDER + PAD_SIZE;
+    ///////////// PAD TEST
+    config->int_params[AC_my] = config->int_params[AC_ny] + STENCIL_ORDER;
+    config->int_params[AC_mz] = config->int_params[AC_nz] + STENCIL_ORDER;
+
+    // Bounds for the computational domain, i.e. nx_min <= i < nx_max
+    config->int_params[AC_nx_min] = STENCIL_ORDER / 2;
+    config->int_params[AC_nx_max] = config->int_params[AC_nx_min] + config->int_params[AC_nx];
+    config->int_params[AC_ny_min] = STENCIL_ORDER / 2;
+    config->int_params[AC_ny_max] = config->int_params[AC_ny] + STENCIL_ORDER / 2;
+    config->int_params[AC_nz_min] = STENCIL_ORDER / 2;
+    config->int_params[AC_nz_max] = config->int_params[AC_nz] + STENCIL_ORDER / 2;
+
+    // Spacing
+    config->real_params[AC_inv_dsx] = AcReal(1.) / config->real_params[AC_dsx];
+    config->real_params[AC_inv_dsy] = AcReal(1.) / config->real_params[AC_dsy];
+    config->real_params[AC_inv_dsz] = AcReal(1.) / config->real_params[AC_dsz];
+    config->real_params[AC_dsmin]   = min(
+        config->real_params[AC_dsx], min(config->real_params[AC_dsy], config->real_params[AC_dsz]));
+
+    // Real grid coordanates (DEFINE FOR GRID WITH THE GHOST ZONES)
+    config->real_params[AC_xlen] = config->real_params[AC_dsx] * config->int_params[AC_mx];
+    config->real_params[AC_ylen] = config->real_params[AC_dsy] * config->int_params[AC_my];
+    config->real_params[AC_zlen] = config->real_params[AC_dsz] * config->int_params[AC_mz];
+
+    config->real_params[AC_xorig] = AcReal(.5) * config->real_params[AC_xlen];
+    config->real_params[AC_yorig] = AcReal(.5) * config->real_params[AC_ylen];
+    config->real_params[AC_zorig] = AcReal(.5) * config->real_params[AC_zlen];
+
+    /* Additional helper params */
+    // Int helpers
+    config->int_params[AC_mxy]  = config->int_params[AC_mx] * config->int_params[AC_my];
+    config->int_params[AC_nxy]  = config->int_params[AC_nx] * config->int_params[AC_ny];
+    config->int_params[AC_nxyz] = config->int_params[AC_nxy] * config->int_params[AC_nz];
+
+    // Real helpers
+    config->real_params[AC_cs2_sound] = config->real_params[AC_cs_sound] *
+                                        config->real_params[AC_cs_sound];
+
+    config->real_params[AC_cv_sound] = config->real_params[AC_cp_sound] /
+                                       config->real_params[AC_gamma];
+
+    AcReal G_CONST_CGS = AcReal(
+        6.674e-8); // cm^3/(g*s^2) GGS definition //TODO define in a separate module
+    AcReal M_sun = AcReal(1.989e33); // g solar mass
+
+    config->real_params[AC_unit_mass] = (config->real_params[AC_unit_length] *
+                                         config->real_params[AC_unit_length] *
+                                         config->real_params[AC_unit_length]) *
+                                        config->real_params[AC_unit_density];
+
+    config->real_params[AC_M_sink] = config->real_params[AC_M_sink_Msun] * M_sun /
+                                     config->real_params[AC_unit_mass];
+    config->real_params[AC_M_sink_init] = config->real_params[AC_M_sink_Msun] * M_sun /
+                                          config->real_params[AC_unit_mass];
+
+    config->real_params[AC_G_const] = G_CONST_CGS / ((config->real_params[AC_unit_velocity] *
+                                                      config->real_params[AC_unit_velocity]) /
+                                                     (config->real_params[AC_unit_density] *
+                                                      config->real_params[AC_unit_length] *
+                                                      config->real_params[AC_unit_length]));
+
+    config->real_params[AC_sq2GM_star] = AcReal(sqrt(AcReal(2) * config->real_params[AC_GM_star]));
+
+#if VERBOSE_PRINTING // Defined in astaroth.h
+    printf("###############################################################\n");
+    printf("Config dimensions recalculated:\n");
+    acPrintMeshInfo(*config);
+    printf("###############################################################\n");
+#endif
+}
+
+/**
+\brief Loads data from astaroth.conf into a config struct.
+\return 0 on success, -1 if there are potentially uninitialized values.
+*/
+int
+load_config(const char* config_path, AcMeshInfo* config)
+{
+    int retval = 0;
+    ERRCHK(config_path);
+
+    // memset reads the second parameter as a byte even though it says int in
+    // the function declaration
+    memset(config, (uint8_t)0xFF, sizeof(*config));
+
+    parse_config(config_path, config);
+    update_config(config);
+
+    // sizeof(config) must be a multiple of 4 bytes for this to work
+    ERRCHK(sizeof(*config) % sizeof(uint32_t) == 0);
+    for (size_t i = 0; i < sizeof(*config) / sizeof(uint32_t); ++i) {
+        if (((uint32_t*)config)[i] == (uint32_t)0xFFFFFFFF) {
+            WARNING("Some config values may be uninitialized. "
+                    "See that all are defined in astaroth.conf\n");
+            retval = -1;
+        }
+    }
+    return retval;
+}
--- a/samples/standalone/config_loader.h
+++ b/samples/standalone/config_loader.h
@@ -0,0 +1,34 @@
+/*
+    Copyright (C) 2014-2020, Johannes Pekkila, Miikka Vaisala.
+
+    This file is part of Astaroth.
+
+    Astaroth is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    Astaroth is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Astaroth.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file
+ * \brief Functions for loading and updating AcMeshInfo.
+ *
+ */
+#pragma once
+#include "astaroth.h"
+
+/** Loads data from the config file */
+int load_config(const char* config_path, AcMeshInfo* config);
+
+/** Recalculates the portion of int parameters which get their values from nx,
+ * ny and nz. Must be called after modifying the config struct or otherwise
+ * contents of the struct will be incorrect */
+void update_config(AcMeshInfo* config);
--- a/samples/standalone/main.cc
+++ b/samples/standalone/main.cc
@@ -0,0 +1,217 @@
+/*
+    Copyright (C) 2014-2020, Johannes Pekkila, Miikka Vaisala.
+
+    This file is part of Astaroth.
+
+    Astaroth is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    Astaroth is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Astaroth.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file
+ * \brief Brief info.
+ *
+ * Detailed info.
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "run.h"
+#include "errchk.h"
+
+// Write all errors from stderr to an <errorlog_name> in the current working
+// directory
+static const bool write_log_to_a_file = false;
+static const char* errorlog_name      = "error.log";
+
+static void
+errorlog_init(void)
+{
+    FILE* fp = freopen(errorlog_name, "w", stderr); // Log errors to a file
+    if (!fp)
+        perror("Error redirecting stderr to a file");
+}
+
+static void
+errorlog_quit(void)
+{
+    fclose(stderr);
+
+    // Print contents of the latest errorlog to screen
+    FILE* fp = fopen(errorlog_name, "r");
+    if (fp) {
+        for (int c = getc(fp); c != EOF; c = getc(fp))
+            putchar(c);
+        fclose(fp);
+    }
+    else {
+        perror("Error opening error log");
+    }
+}
+
+typedef struct {
+    char* key[2];
+    char* description;
+} Option;
+
+static Option
+createOption(const char* key, const char* key_short, const char* description)
+{
+    Option option;
+
+    option.key[0]      = strdup(key);
+    option.key[1]      = strdup(key_short);
+    option.description = strdup(description);
+
+    return option;
+}
+
+static void
+destroyOption(Option* option)
+{
+    free(option->key[0]);
+    free(option->key[1]);
+    free(option->description);
+}
+
+typedef enum {
+    HELP,
+    TEST,
+    BENCHMARK,
+    SIMULATE,
+    RENDER,
+    CONFIG,
+    NUM_OPTIONS,
+} OptionType;
+
+static int
+findOption(const char* str, const Option options[NUM_OPTIONS])
+{
+    for (int i = 0; i < NUM_OPTIONS; ++i)
+        if (!strcmp(options[i].key[0], str) || !strcmp(options[i].key[1], str))
+            return i;
+
+    return -1;
+}
+
+static void
+print_options(const Option options[NUM_OPTIONS])
+{
+    // Formatting
+    int keylen[2] = {0};
+    for (int i = 0; i < NUM_OPTIONS; ++i) {
+        int len0 = strlen(options[i].key[0]);
+        int len1 = strlen(options[i].key[1]);
+        if (keylen[0] < len0)
+            keylen[0] = len0;
+        if (keylen[1] < len1)
+            keylen[1] = len1;
+    }
+
+    for (int i = 0; i < NUM_OPTIONS; ++i)
+        printf("\t%*s | %*s: %s\n", keylen[0], options[i].key[0], keylen[1], options[i].key[1],
+               options[i].description);
+}
+
+static void
+print_help(const Option options[NUM_OPTIONS])
+{
+    puts("Usage: ./ac_run [options]");
+    print_options(options);
+    printf("\n");
+    puts("For bug reporting, see README.md");
+}
+
+int
+main(int argc, char* argv[])
+{
+    if (write_log_to_a_file) {
+        errorlog_init();
+        atexit(errorlog_quit);
+    }
+
+    // Create options
+    // clang-format off
+    Option options[NUM_OPTIONS];
+    options[HELP]               = createOption("--help", "-h", "Prints this help.");
+    options[TEST]               = createOption("--test", "-t", "Runs autotests.");
+    options[BENCHMARK]          = createOption("--benchmark", "-b", "Runs benchmarks.");
+    options[SIMULATE]           = createOption("--simulate", "-s", "Runs the simulation.");
+    options[RENDER]             = createOption("--render", "-r", "Runs the real-time renderer.");
+    options[CONFIG]             = createOption("--config", "-c", "Uses the config file given after this flag instead of the default.");
+    // clang-format on
+
+    if (argc == 1) {
+        print_help(options);
+    }
+    else {
+        char* config_path = NULL;
+        for (int i = 1; i < argc; ++i) {
+            const int option = findOption(argv[i], options);
+            switch (option) {
+            case CONFIG:
+                if (i + 1 < argc) {
+                    config_path = strdup(argv[i + 1]);
+                }
+                else {
+                    printf("Syntax error. Usage: --config <config path>.\n");
+                    return EXIT_FAILURE;
+                }
+                break;
+            default:
+                break; // Do nothing
+            }
+        }
+        if (!config_path)
+            config_path = strdup(AC_DEFAULT_CONFIG);
+
+        printf("Config path: %s\n", config_path);
+        ERRCHK_ALWAYS(config_path);
+
+        for (int i = 1; i < argc; ++i) {
+            const int option = findOption(argv[i], options);
+            switch (option) {
+            case HELP:
+                print_help(options);
+                break;
+            case TEST:
+                run_autotest(config_path);
+                break;
+            case BENCHMARK:
+                run_benchmark(config_path);
+                break;
+            case SIMULATE:
+                run_simulation(config_path);
+                break;
+            case RENDER:
+                run_renderer(config_path);
+                break;
+            case CONFIG:
+                ++i;
+                break;
+            default:
+                printf("Invalid option %s\n", argv[i]);
+                break; // Do nothing
+            }
+        }
+
+        free(config_path);
+    }
+
+    for (int i = 0; i < NUM_OPTIONS; ++i)
+        destroyOption(&options[i]);
+
+    return EXIT_SUCCESS;
+}
--- a/samples/standalone/model/host_forcing.cc
+++ b/samples/standalone/model/host_forcing.cc
@@ -0,0 +1,320 @@
+/*
+    Copyright (C) 2014-2020, Johannes Pekkila, Miikka Vaisala.
+
+    This file is part of Astaroth.
+
+    Astaroth is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    Astaroth is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Astaroth.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file
+ * \brief Brief info.
+ *
+ * Detailed info.
+ *
+ */
+#include "host_forcing.h"
+
+// #include "math_utils.h"
+#include <cmath>
+using namespace std;
+
+// The is a wrapper for genering random numbers with a chosen system.
+AcReal
+get_random_number_01()
+{
+    // TODO: Implement better randon number generator http://www.cplusplus.com/reference/random/
+    return AcReal(rand()) / AcReal(RAND_MAX);
+}
+
+static AcReal3
+cross(const AcReal3& a, const AcReal3& b)
+{
+    AcReal3 c;
+
+    c.x = a.y * b.z - a.z * b.y;
+    c.y = a.z * b.x - a.x * b.z;
+    c.z = a.x * b.y - a.y * b.x;
+
+    return c;
+}
+
+static AcReal
+dot(const AcReal3& a, const AcReal3& b)
+{
+    return a.x * b.x + a.y * b.y + a.z * b.z;
+}
+
+static AcReal3
+vec_norm(const AcReal3& a)
+{
+    AcReal3 c;
+    AcReal norm = dot(a, a);
+
+    c.x = a.x / sqrt(norm);
+    c.y = a.y / sqrt(norm);
+    c.z = a.z / sqrt(norm);
+
+    return c;
+}
+
+static AcReal3
+vec_multi_scal(const AcReal scal, const AcReal3& a)
+{
+    AcReal3 c;
+
+    c.x = a.x * scal;
+    c.y = a.y * scal;
+    c.z = a.z * scal;
+
+    return c;
+}
+
+// Generate forcing wave vector k_force
+AcReal3
+helical_forcing_k_generator(const AcReal kmax, const AcReal kmin)
+{
+    AcReal phi, theta, kk; // Spherical direction coordinates
+    AcReal3 k_force;       // forcing wave vector
+
+    AcReal delta_k = kmax - kmin;
+
+    // Generate vector in spherical coordinates
+    phi   = AcReal(2.0) * AcReal(M_PI) * get_random_number_01();
+    theta = AcReal(M_PI) * get_random_number_01();
+    kk    = delta_k * get_random_number_01() + kmin;
+
+    // Cast into Cartesian form
+    k_force = (AcReal3){AcReal(kk * sin(theta) * cos(phi)), //
+                        AcReal(kk * sin(theta) * sin(phi)), //
+                        AcReal(kk * cos(theta))};
+
+    // printf("k_force.x %f, k_force.y %f, k_force.z %f \n", k_force.x, k_force.y, k_force.z);
+
+    // Round the numbers. In that way k(x/y/z) will get complete waves.
+    k_force.x = round(k_force.x);
+    k_force.y = round(k_force.y);
+    k_force.z = round(k_force.z);
+
+    // printf("After rounding --> k_force.x %f, k_force.y %f, k_force.z %f \n", k_force.x,
+    // k_force.y, k_force.z);
+
+    return k_force;
+}
+
+// Generate the unit perpendicular unit vector e required for helical forcing
+// Addapted from Pencil code forcing.f90 hel_vec() subroutine.
+void
+helical_forcing_e_generator(AcReal3* e_force, const AcReal3 k_force)
+{
+
+    AcReal3 k_cross_e         = cross(k_force, *e_force);
+    k_cross_e                 = vec_norm(k_cross_e);
+    AcReal3 k_cross_k_cross_e = cross(k_force, k_cross_e);
+    k_cross_k_cross_e         = vec_norm(k_cross_k_cross_e);
+    AcReal phi                = AcReal(2.0) * AcReal(M_PI) * get_random_number_01();
+    AcReal3 ee_tmp1           = vec_multi_scal(cos(phi), k_cross_e);
+    AcReal3 ee_tmp2           = vec_multi_scal(sin(phi), k_cross_k_cross_e);
+
+    *e_force = (AcReal3){ee_tmp1.x + ee_tmp2.x, ee_tmp1.y + ee_tmp2.y, ee_tmp1.z + ee_tmp2.z};
+}
+
+// PC Manual Eq. 223
+void
+helical_forcing_special_vector(AcReal3* ff_hel_re, AcReal3* ff_hel_im, const AcReal3 k_force,
+                               const AcReal3 e_force, const AcReal relhel)
+{
+
+    // k dot e
+    AcReal3 kdote;
+    kdote.x = k_force.x * e_force.x;
+    kdote.y = k_force.y * e_force.y;
+    kdote.z = k_force.z * e_force.z;
+
+    // k cross e
+    AcReal3 k_cross_e;
+    k_cross_e.x = k_force.y * e_force.z - k_force.z * e_force.y;
+    k_cross_e.y = k_force.z * e_force.x - k_force.x * e_force.z;
+    k_cross_e.z = k_force.x * e_force.y - k_force.y * e_force.x;
+
+    // k cross k cross e
+    AcReal3 k_cross_k_cross_e;
+    k_cross_k_cross_e.x = k_force.y * k_cross_e.z - k_force.z * k_cross_e.y;
+    k_cross_k_cross_e.y = k_force.z * k_cross_e.x - k_force.x * k_cross_e.z;
+    k_cross_k_cross_e.z = k_force.x * k_cross_e.y - k_force.y * k_cross_e.x;
+
+    // abs(k)
+    AcReal kabs = sqrt(k_force.x * k_force.x + k_force.y * k_force.y + k_force.z * k_force.z);
+
+    AcReal denominator = sqrt(AcReal(1.0) + relhel * relhel) * kabs *
+                         sqrt(kabs * kabs -
+                              (kdote.x * kdote.x + kdote.y * kdote.y + kdote.z * kdote.z));
+
+    // MV: I suspect there is a typo in the Pencil Code manual!
+    //*ff_hel_re = (AcReal3){-relhel*kabs*k_cross_e.x/denominator,
+    //                       -relhel*kabs*k_cross_e.y/denominator,
+    //                       -relhel*kabs*k_cross_e.z/denominator};
+
+    //*ff_hel_im = (AcReal3){k_cross_k_cross_e.x/denominator,
+    //                       k_cross_k_cross_e.y/denominator,
+    //                       k_cross_k_cross_e.z/denominator};
+
+    // See PC forcing.f90 forcing_hel_both()
+    *ff_hel_im = (AcReal3){kabs * k_cross_e.x / denominator, 
+                           kabs * k_cross_e.y / denominator,
+                           kabs * k_cross_e.z / denominator};
+
+    *ff_hel_re = (AcReal3){relhel * k_cross_k_cross_e.x / denominator, 
+                           relhel * k_cross_k_cross_e.y / denominator,
+                           relhel * k_cross_k_cross_e.z / denominator};
+
+}
+
+// Tool for loading forcing vector information into the device memory
+// %JP: Added a generic function for loading device constants (acLoadDeviceConstant).
+// This acForcingVec should go outside the core library since it references user-defined
+// parameters such as AC_forcing_magnitude which may not be defined in all projects.
+// host_forcing.cc is probably a good place for this.
+// %JP update: moved this here out of astaroth.cu. Should be renamed such that it cannot be
+// confused with actual interface functions.
+// %JP update 2: deprecated acForcingVec: use loadForcingParams instead
+void
+DEPRECATED_acForcingVec(const AcReal forcing_magnitude, const AcReal3 k_force,
+                        const AcReal3 ff_hel_re, const AcReal3 ff_hel_im,
+                        const AcReal forcing_phase, const AcReal kaver)
+{
+    acLoadDeviceConstant(AC_forcing_magnitude, forcing_magnitude);
+    acLoadDeviceConstant(AC_forcing_phase, forcing_phase);
+
+    acLoadDeviceConstant(AC_k_forcex, k_force.x);
+    acLoadDeviceConstant(AC_k_forcey, k_force.y);
+    acLoadDeviceConstant(AC_k_forcez, k_force.z);
+
+    acLoadDeviceConstant(AC_ff_hel_rex, ff_hel_re.x);
+    acLoadDeviceConstant(AC_ff_hel_rey, ff_hel_re.y);
+    acLoadDeviceConstant(AC_ff_hel_rez, ff_hel_re.z);
+
+    acLoadDeviceConstant(AC_ff_hel_imx, ff_hel_im.x);
+    acLoadDeviceConstant(AC_ff_hel_imy, ff_hel_im.y);
+    acLoadDeviceConstant(AC_ff_hel_imz, ff_hel_im.z);
+
+    acLoadDeviceConstant(AC_kaver, kaver);
+}
+
+void
+loadForcingParamsToDevice(const ForcingParams& forcing_params)
+{
+    acLoadDeviceConstant(AC_forcing_magnitude, forcing_params.magnitude);
+    acLoadDeviceConstant(AC_forcing_phase, forcing_params.phase);
+
+    acLoadDeviceConstant(AC_k_forcex, forcing_params.k_force.x);
+    acLoadDeviceConstant(AC_k_forcey, forcing_params.k_force.y);
+    acLoadDeviceConstant(AC_k_forcez, forcing_params.k_force.z);
+
+    acLoadDeviceConstant(AC_ff_hel_rex, forcing_params.ff_hel_re.x);
+    acLoadDeviceConstant(AC_ff_hel_rey, forcing_params.ff_hel_re.y);
+    acLoadDeviceConstant(AC_ff_hel_rez, forcing_params.ff_hel_re.z);
+
+    acLoadDeviceConstant(AC_ff_hel_imx, forcing_params.ff_hel_im.x);
+    acLoadDeviceConstant(AC_ff_hel_imy, forcing_params.ff_hel_im.y);
+    acLoadDeviceConstant(AC_ff_hel_imz, forcing_params.ff_hel_im.z);
+
+    acLoadDeviceConstant(AC_kaver, forcing_params.kaver);
+    acSynchronizeStream(STREAM_ALL);
+}
+
+/** This function would be used in autotesting to update the forcing params of the host
+    configuration. */
+void
+loadForcingParamsToHost(const ForcingParams& forcing_params, AcMesh* mesh)
+{
+    // %JP: Left some regex magic here in case we need to modify the ForcingParams struct
+    // acLoadDeviceConstant\(([A-Za-z_]*), ([a-z_.]*)\);
+    // mesh->info.real_params[$1] = $2;
+    mesh->info.real_params[AC_forcing_magnitude] = forcing_params.magnitude;
+    mesh->info.real_params[AC_forcing_phase]     = forcing_params.phase;
+
+    mesh->info.real_params[AC_k_forcex] = forcing_params.k_force.x;
+    mesh->info.real_params[AC_k_forcey] = forcing_params.k_force.y;
+    mesh->info.real_params[AC_k_forcez] = forcing_params.k_force.z;
+
+    mesh->info.real_params[AC_ff_hel_rex] = forcing_params.ff_hel_re.x;
+    mesh->info.real_params[AC_ff_hel_rey] = forcing_params.ff_hel_re.y;
+    mesh->info.real_params[AC_ff_hel_rez] = forcing_params.ff_hel_re.z;
+
+    mesh->info.real_params[AC_ff_hel_imx] = forcing_params.ff_hel_im.x;
+    mesh->info.real_params[AC_ff_hel_imy] = forcing_params.ff_hel_im.y;
+    mesh->info.real_params[AC_ff_hel_imz] = forcing_params.ff_hel_im.z;
+
+    mesh->info.real_params[AC_kaver] = forcing_params.kaver;
+}
+
+void
+loadForcingParamsToHost(const ForcingParams& forcing_params, ModelMesh* mesh)
+{
+    // %JP: Left some regex magic here in case we need to modify the ForcingParams struct
+    // acLoadDeviceConstant\(([A-Za-z_]*), ([a-z_.]*)\);
+    // mesh->info.real_params[$1] = $2;
+    mesh->info.real_params[AC_forcing_magnitude] = forcing_params.magnitude;
+    mesh->info.real_params[AC_forcing_phase]     = forcing_params.phase;
+
+    mesh->info.real_params[AC_k_forcex] = forcing_params.k_force.x;
+    mesh->info.real_params[AC_k_forcey] = forcing_params.k_force.y;
+    mesh->info.real_params[AC_k_forcez] = forcing_params.k_force.z;
+
+    mesh->info.real_params[AC_ff_hel_rex] = forcing_params.ff_hel_re.x;
+    mesh->info.real_params[AC_ff_hel_rey] = forcing_params.ff_hel_re.y;
+    mesh->info.real_params[AC_ff_hel_rez] = forcing_params.ff_hel_re.z;
+
+    mesh->info.real_params[AC_ff_hel_imx] = forcing_params.ff_hel_im.x;
+    mesh->info.real_params[AC_ff_hel_imy] = forcing_params.ff_hel_im.y;
+    mesh->info.real_params[AC_ff_hel_imz] = forcing_params.ff_hel_im.z;
+
+    mesh->info.real_params[AC_kaver] = forcing_params.kaver;
+}
+
+ForcingParams
+generateForcingParams(const AcMeshInfo& mesh_info)
+{
+    ForcingParams params = {};
+
+    // Forcing properties
+    AcReal relhel    = mesh_info.real_params[AC_relhel];
+    params.magnitude = mesh_info.real_params[AC_forcing_magnitude];
+    AcReal kmin      = mesh_info.real_params[AC_kmin];
+    AcReal kmax      = mesh_info.real_params[AC_kmax];
+
+    params.kaver = (kmax - kmin) / AcReal(2.0);
+
+    // Generate forcing wave vector k_force
+    params.k_force = helical_forcing_k_generator(kmax, kmin);
+
+    // Randomize the phase
+    params.phase = AcReal(2.0) * AcReal(M_PI) * get_random_number_01();
+
+    // Generate e for k. Needed for the sake of isotrophy.
+    AcReal3 e_force;
+    if ((params.k_force.y == AcReal(0.0)) && (params.k_force.z == AcReal(0.0))) {
+        e_force = (AcReal3){0.0, 1.0, 0.0};
+    }
+    else {
+        e_force = (AcReal3){1.0, 0.0, 0.0};
+    }
+    helical_forcing_e_generator(&e_force, params.k_force);
+
+    helical_forcing_special_vector(&params.ff_hel_re, &params.ff_hel_im, params.k_force, e_force,
+                                   relhel);
+
+    return params;
+}
--- a/samples/standalone/model/host_forcing.h
+++ b/samples/standalone/model/host_forcing.h
@@ -0,0 +1,63 @@
+
+/*
+    Copyright (C) 2014-2020, Johannes Pekkila, Miikka Vaisala.
+
+    This file is part of Astaroth.
+
+    Astaroth is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    Astaroth is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Astaroth.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file
+ * \brief Brief info.
+ *
+ * Detailed info.
+ *
+ */
+#pragma once
+#include "astaroth.h"
+
+#include "modelmesh.h"
+
+AcReal get_random_number_01();
+
+AcReal3 helical_forcing_k_generator(const AcReal kmax, const AcReal kmin);
+
+void helical_forcing_e_generator(AcReal3* e_force, const AcReal3 k_force);
+
+void helical_forcing_special_vector(AcReal3* ff_hel_re, AcReal3* ff_hel_im, const AcReal3 k_force,
+                                    const AcReal3 e_force, const AcReal relhel);
+
+/** Tool for loading forcing vector information into the device memory
+    // DEPRECATED in favour of loadForcingParams
+ */
+void DEPRECATED_acForcingVec(const AcReal forcing_magnitude, const AcReal3 k_force,
+                             const AcReal3 ff_hel_re, const AcReal3 ff_hel_im,
+                             const AcReal forcing_phase, const AcReal kaver);
+
+typedef struct {
+    AcReal magnitude;
+    AcReal3 k_force;
+    AcReal3 ff_hel_re;
+    AcReal3 ff_hel_im;
+    AcReal phase;
+    AcReal kaver;
+} ForcingParams;
+
+void loadForcingParamsToDevice(const ForcingParams& forcing_params);
+
+void loadForcingParamsToHost(const ForcingParams& forcing_params, AcMesh* mesh);
+void loadForcingParamsToHost(const ForcingParams& forcing_params, ModelMesh* mesh);
+
+ForcingParams generateForcingParams(const AcMeshInfo& mesh_info);
--- a/samples/standalone/model/host_memory.cc
+++ b/samples/standalone/model/host_memory.cc
@@ -0,0 +1,829 @@
+/*
+    Copyright (C) 2014-2020, Johannes Pekkila, Miikka Vaisala.
+
+    This file is part of Astaroth.
+
+    Astaroth is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    Astaroth is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Astaroth.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file
+ * \brief Brief info.
+ *
+ * Detailed info.
+ *
+ */
+#include "host_memory.h"
+
+#include <math.h>
+
+#include "errchk.h"
+
+#define AC_GEN_STR(X) #X
+const char* init_type_names[] = {AC_FOR_INIT_TYPES(AC_GEN_STR)};
+#undef AC_GEN_STR
+
+#define XORIG (AcReal(.5) * mesh->info.int_params[AC_nx] * mesh->info.real_params[AC_dsx])
+#define YORIG (AcReal(.5) * mesh->info.int_params[AC_ny] * mesh->info.real_params[AC_dsy])
+#define ZORIG (AcReal(.5) * mesh->info.int_params[AC_nz] * mesh->info.real_params[AC_dsz])
+
+/*
+#include <stdint.h>
+static uint64_t ac_rand_next = 1;
+
+static int32_t
+ac_rand(void)
+{
+        ac_rand_next = ac_rand_next * 1103515245 + 12345;
+        return (uint32_t)(ac_rand_next/65536) % 32768;
+}
+
+static void
+ac_srand(const uint32_t seed)
+{
+        ac_rand_next = seed;
+}
+*/
+
+AcMesh*
+acmesh_create(const AcMeshInfo& mesh_info)
+{
+    AcMesh* mesh = (AcMesh*)malloc(sizeof(*mesh));
+    mesh->info   = mesh_info;
+
+    const size_t bytes = acVertexBufferSizeBytes(mesh->info);
+    for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) {
+        mesh->vertex_buffer[VertexBufferHandle(i)] = (AcReal*)malloc(bytes);
+        ERRCHK(mesh->vertex_buffer[VertexBufferHandle(i)] != NULL);
+    }
+
+    return mesh;
+}
+
+void
+vertex_buffer_set(const VertexBufferHandle& key, const AcReal& val, AcMesh* mesh)
+{
+    const int n = acVertexBufferSize(mesh->info);
+    for (int i = 0; i < n; ++i)
+        mesh->vertex_buffer[key][i] = val;
+}
+
+/** Inits all fields to 1. Setting the mesh to zero is problematic because some fields are supposed
+    to be > 0 and the results would vary widely, which leads to loss of precision in the
+    computations */
+void
+acmesh_clear(AcMesh* mesh)
+{
+    for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w)
+        vertex_buffer_set(VertexBufferHandle(w), 1, mesh); // Init all fields to 1 by default.
+}
+
+static AcReal
+randr(void)
+{
+    return AcReal(rand()) / AcReal(RAND_MAX);
+}
+
+void
+lnrho_step(AcMesh* mesh)
+{
+    const int mx = mesh->info.int_params[AC_mx];
+    const int my = mesh->info.int_params[AC_my];
+    const int mz = mesh->info.int_params[AC_mz];
+
+    // const int    nx_min = mesh->info.int_params[AC_nx_min];
+    // const int    nx_max = mesh->info.int_params[AC_nx_max];
+    // const int    ny_min = mesh->info.int_params[AC_ny_min];
+    // const int    ny_max = mesh->info.int_params[AC_ny_max];
+    // const int    nz_min = mesh->info.int_params[AC_nz_min];
+    // const int    nz_max = mesh->info.int_params[AC_nz_max];
+
+    // const AcReal DX     = mesh->info.real_params[AC_dsx];
+    // const AcReal DY     = mesh->info.real_params[AC_dsy];
+    // const AcReal DZ     = mesh->info.real_params[AC_dsz];
+    // const AcReal xmax   = DX * (nx_max - nx_min) ;
+    // const AcReal zmax   = DZ * (nz_max - nz_min) ;
+
+    // const AcReal lnrho1 = (AcReal) -1.0; // TODO mesh->info.real_params[AC_lnrho1];
+    const AcReal lnrho2 = (AcReal)0.0; // TODO mesh->info.real_params[AC_lnrho2];
+    // const AcReal rho1   = (AcReal) exp(lnrho1);
+    // const AcReal rho2   = (AcReal) exp(lnrho2);
+
+    // const AcReal k_pert    = (AcReal) 1.0; //mesh->info.real_params[AC_k_pert]; //Wamenumber of
+    // the perturbation const AcReal k_pert    = 4.0; //mesh->info.real_params[AC_k_pert];
+    // //Wamenumber of the perturbation
+    // const AcReal ampl_pert = xmax/10.0; // xmax/mesh->info.real_params[AC_pert]; //Amplitude of
+    // the perturbation
+    // const AcReal ampl_pert = (AcReal) 0.0;//xmax/20.0; // xmax/mesh->info.real_params[AC_pert];
+    // //Amplitude of the perturbation const AcReal two_pi       = (AcReal) 6.28318531;
+
+    // const AcReal xorig  = mesh->info.real_params[AC_xorig];
+    // const AcReal zorig  = mesh->info.real_params[AC_zorig];
+    // const AcReal trans  = mesh->info.real_params[AC_trans];
+
+    // AcReal       xx, zz, tanhprof, cosz_wave;
+
+    for (int k = 0; k < mz; k++) {
+        for (int j = 0; j < my; j++) {
+            for (int i = 0; i < mx; i++) {
+                int idx = i + j * mx + k * mx * my;
+                // zz = DZ * AcReal(k) - zorig; // Not used
+                // cosz_wave = ampl_pert*AcReal(cos(k_pert*((zz/zmax)*two_pi))); // Not used
+                // xx = DX * AcReal(i) - xorig + cosz_wave; //ADD WAVE TODO // Not used
+                // tanhprof = AcReal(0.5)*((rho2+rho1) + (rho2-rho1)*AcReal(tanh(xx/trans))); // Not
+                // used Commented out the step function initial codition.
+                // mesh->vertex_buffer[VTXBUF_LNRHO][idx] = log(tanhprof);
+                mesh->vertex_buffer[VTXBUF_LNRHO][idx] = lnrho2;
+            }
+        }
+    }
+}
+
+// This is the initial condition type for the infalling vedge in the pseudodisk
+// model.
+void
+inflow_vedge(AcMesh* mesh)
+{
+    const int mx = mesh->info.int_params[AC_mx];
+    const int my = mesh->info.int_params[AC_my];
+    const int mz = mesh->info.int_params[AC_mz];
+
+    // const int nx_min = mesh->info.int_params[AC_nx_min];
+    // const int nx_max = mesh->info.int_params[AC_nx_max];
+    // const int ny_min = mesh->info.int_params[AC_ny_min];
+    // const int ny_max = mesh->info.int_params[AC_ny_max];
+    // const int nz_min = mesh->info.int_params[AC_nz_min];
+    // const int nz_max = mesh->info.int_params[AC_nz_max];
+
+    // const double DX    = mesh->info.real_params[AC_dsx];
+    // const double DY    = mesh->info.real_params[AC_dsy];
+    const double DZ = mesh->info.real_params[AC_dsz];
+
+    const double AMPL_UU = mesh->info.real_params[AC_ampl_uu];
+    const double ANGL_UU = mesh->info.real_params[AC_angl_uu];
+
+    const double zorig = mesh->info.real_params[AC_zorig];
+    double zz;
+    double trans = mesh->info.real_params[AC_trans];
+
+    // const AcReal range = AcReal(.5);
+
+    // const AcReal zmax  = AcReal(DZ * (nz_max - nz_min));
+    // const AcReal gaussr  = zmax / AcReal(4.0);
+
+    // for (int k = nz_min; k < nz_max; k++) {
+    //    for (int j = ny_min; j < ny_max; j++) {
+    //        for (int i = nx_min; i < nx_max; i++) {
+    for (int k = 0; k < mz; k++) {
+        for (int j = 0; j < my; j++) {
+            for (int i = 0; i < mx; i++) {
+                int idx = i + j * mx + k * mx * my;
+                zz      = DZ * double(k) - zorig;
+                // mesh->vertex_buffer[VTXBUF_UUX][idx] = -AMPL_UU*cos(ANGL_UU);
+                mesh->vertex_buffer[VTXBUF_UUX][idx] = AcReal(-AMPL_UU * cos(ANGL_UU) *
+                                                              fabs(tanh(zz / trans)));
+                mesh->vertex_buffer[VTXBUF_UUY][idx] = AcReal(0.0);
+                mesh->vertex_buffer[VTXBUF_UUZ][idx] = AcReal(-AMPL_UU * sin(ANGL_UU) *
+                                                              tanh(zz / trans));
+
+                // Variarion to density
+                // AcReal rho = exp(mesh->vertex_buffer[VTXBUF_LNRHO][idx]);
+                // NO GAUSSIAN//rho = rho*exp(-(zz/gaussr)*(zz/gaussr));
+                // mesh->vertex_buffer[VTXBUF_LNRHO][idx] = log(rho + (range*rho) * (randr() -
+                // AcReal(-0.5)));
+            }
+        }
+    }
+}
+
+// This is the initial condition type for the infalling vedge in the pseudodisk
+// model.
+void
+simple_uniform_core(AcMesh* mesh)
+{
+    const int mx = mesh->info.int_params[AC_mx];
+    const int my = mesh->info.int_params[AC_my];
+    const int mz = mesh->info.int_params[AC_mz];
+
+    const double DX = mesh->info.real_params[AC_dsx];
+    const double DY = mesh->info.real_params[AC_dsy];
+    const double DZ = mesh->info.real_params[AC_dsz];
+
+    const double ampl_lnrho = mesh->info.real_params[AC_ampl_lnrho];
+
+    const double xorig   = mesh->info.real_params[AC_xorig];
+    const double yorig   = mesh->info.real_params[AC_yorig];
+    const double zorig   = mesh->info.real_params[AC_zorig];
+
+    const double G_const = mesh->info.real_params[AC_G_const]; 
+    const double M_sink_init = mesh->info.real_params[AC_M_sink_init];
+    const double cs2_sound = mesh->info.real_params[AC_cs2_sound];
+
+    const double RR_inner_bound = mesh->info.real_params[AC_soft]/AcReal(2.0);
+    const double core_coeff   = (exp(ampl_lnrho)  * cs2_sound) / (double(4.0)*M_PI * G_const);
+
+    double xx, yy, zz, RR;
+    double core_profile;
+
+    //TEMPORARY TEST INPUT PARAMETERS
+    const double core_radius = DX*32.0;
+    const double trans = DX*12.0;
+    //const double epsilon = DX*2.0;
+    const double vel_scale = mesh->info.real_params[AC_ampl_uu];
+    double abso_vel;
+
+    RR = 1.0;
+    printf("%e %e %e \n", RR, trans, core_radius);
+     
+
+    for (int k = 0; k < mz; k++) {
+        for (int j = 0; j < my; j++) {
+            for (int i = 0; i < mx; i++) {
+                int idx = i + j*mx + k*mx*my;
+                xx     = DX * double(i) - xorig;
+                yy     = DY * double(j) - yorig;
+                zz     = DZ * double(k) - zorig;
+
+                RR     = sqrt(xx*xx + yy*yy + zz*zz);
+                     
+                if (RR >= RR_inner_bound) {
+                    abso_vel = vel_scale * sqrt(2.0 * G_const 
+                                                    * M_sink_init / RR);
+                    core_profile = pow(RR, -2.0);   //double(1.0);
+                } else {
+                    abso_vel = vel_scale * sqrt(2.0 * AC_G_const 
+                                                    * AC_M_sink_init / RR_inner_bound);
+                    core_profile = pow(RR_inner_bound, -2.0);   //double(1.0);
+                }
+
+               if (RR <= sqrt(DX*DX + DY*DY + DZ*DZ)) {
+                    abso_vel = 0.0;
+                    RR = 1.0;
+               }
+
+
+                mesh->vertex_buffer[VTXBUF_LNRHO][idx] = AcReal(log(core_coeff*core_profile));
+                mesh->vertex_buffer[VTXBUF_UUX][idx]   = AcReal(-abso_vel * (yy / RR));
+                mesh->vertex_buffer[VTXBUF_UUY][idx]   = AcReal( abso_vel * (xx / RR));
+                mesh->vertex_buffer[VTXBUF_UUZ][idx]   = AcReal(0.0);
+
+            }
+        }
+    }
+}
+
+
+// This is the initial condition type for the infalling vedge in the pseudodisk
+// model.
+void
+inflow_vedge_freefall(AcMesh* mesh)
+{
+    const int mx = mesh->info.int_params[AC_mx];
+    const int my = mesh->info.int_params[AC_my];
+    const int mz = mesh->info.int_params[AC_mz];
+
+    // const int nx_min = mesh->info.int_params[AC_nx_min];
+    // const int nx_max = mesh->info.int_params[AC_nx_max];
+    // const int ny_min = mesh->info.int_params[AC_ny_min];
+    // const int ny_max = mesh->info.int_params[AC_ny_max];
+    // const int nz_min = mesh->info.int_params[AC_nz_min];
+    // const int nz_max = mesh->info.int_params[AC_nz_max];
+
+    const double DX = mesh->info.real_params[AC_dsx];
+    // const double DY    = mesh->info.real_params[AC_dsy];
+    const double DZ = mesh->info.real_params[AC_dsz];
+
+    // const double AMPL_UU = mesh->info.real_params[AC_ampl_uu];
+    const double ANGL_UU = mesh->info.real_params[AC_angl_uu];
+    const double SQ2GM   = mesh->info.real_params[AC_sq2GM_star];
+    // const double GM = mesh->info.real_params[AC_GM_star];
+    // const double M_star  = mesh->info.real_params[AC_M_star];
+    // const double G_CONST = mesh->info.real_params[AC_G_CONST];
+
+    // const double unit_length   = mesh->info.real_params[AC_unit_length];
+    // const double unit_density  = mesh->info.real_params[AC_unit_density];
+    // const double unit_velocity = mesh->info.real_params[AC_unit_velocity];
+
+    const double xorig = mesh->info.real_params[AC_xorig];
+    // const double yorig = mesh->info.real_params[AC_yorig];
+    const double zorig = mesh->info.real_params[AC_zorig];
+    // const double trans = mesh->info.real_params[AC_trans];
+    //  double xx, yy, zz, RR;
+    double xx, zz, RR;
+    // double delx, dely, delz;
+    double delx, delz;
+    // double u_x, u_y, u_z, veltot, tanhz;
+    double u_x, u_z, veltot, tanhz;
+
+    const double star_pos_x = mesh->info.real_params[AC_star_pos_x];
+    const double star_pos_z = mesh->info.real_params[AC_star_pos_z];
+
+    for (int k = 0; k < mz; k++) {
+        for (int j = 0; j < my; j++) {
+            for (int i = 0; i < mx; i++) {
+                int idx = i + j * mx + k * mx * my;
+                xx      = DX * double(i) - xorig;
+                zz      = DZ * double(k) - zorig;
+
+                delx = xx - star_pos_x;
+                delz = zz - star_pos_z;
+                // TODO: Figure out isthis needed. Now a placeholder.
+                // tanhz = fabs(tanh(zz/trans));
+                tanhz = 1.0;
+
+                RR     = sqrt(delx * delx + delz * delz);
+                veltot = SQ2GM / sqrt(RR); // Free fall velocity
+
+                // Normal velocity components
+                u_x = -veltot * (delx / RR);
+                u_z = -veltot * (delz / RR);
+
+                // printf("star_pos_z %e, zz %e, delz %e, RR %e\n", star_pos_z, zz, delz, RR);
+
+                // printf("unit_length = %e, unit_density = %e, unit_velocity = %e,\n M_star = %e,
+                // G_CONST = %e, GM = %e, SQ2GM = %e, \n RR = %e, u_x = %e, u_z %e\n",
+                //        unit_length, unit_density,
+                //        unit_velocity, M_star, G_CONST, GM, SQ2GM, RR, u_x, u_z);
+                // printf("%e\n", unit_length*unit_length*unit_length);
+
+                // Here including an angel tilt due to pseudodisk
+                if (delz >= 0.0) {
+                    mesh->vertex_buffer[VTXBUF_UUX][idx] = AcReal(
+                        (u_x * cos(ANGL_UU) - u_z * sin(ANGL_UU)) * tanhz);
+                    mesh->vertex_buffer[VTXBUF_UUY][idx] = AcReal(0.0);
+                    mesh->vertex_buffer[VTXBUF_UUZ][idx] = AcReal(
+                        (u_x * sin(ANGL_UU) + u_z * cos(ANGL_UU)) * tanhz);
+                }
+                else {
+                    mesh->vertex_buffer[VTXBUF_UUX][idx] = AcReal(
+                        (u_x * cos(ANGL_UU) + u_z * sin(ANGL_UU)) * tanhz);
+                    mesh->vertex_buffer[VTXBUF_UUY][idx] = AcReal(0.0);
+                    mesh->vertex_buffer[VTXBUF_UUZ][idx] = AcReal(
+                        (-u_x * sin(ANGL_UU) + u_z * cos(ANGL_UU)) * tanhz);
+                }
+            }
+        }
+    }
+}
+
+// Only x-direction free fall
+void
+inflow_freefall_x(AcMesh* mesh)
+{
+    const int mx = mesh->info.int_params[AC_mx];
+    const int my = mesh->info.int_params[AC_my];
+    const int mz = mesh->info.int_params[AC_mz];
+
+    const double DX = mesh->info.real_params[AC_dsx];
+
+    const double SQ2GM = mesh->info.real_params[AC_sq2GM_star];
+    // const double G_CONST = mesh->info.real_params[AC_G_CONST];
+
+    const double xorig = mesh->info.real_params[AC_xorig];
+    double xx, RR;
+    double delx;
+    double /*u_x,*/ veltot;
+
+    const double star_pos_x = mesh->info.real_params[AC_star_pos_x];
+
+    const double ampl_lnrho = mesh->info.real_params[AC_ampl_lnrho];
+
+    for (int k = 0; k < mz; k++) {
+        for (int j = 0; j < my; j++) {
+            for (int i = 0; i < mx; i++) {
+                int idx = i + j * mx + k * mx * my;
+                xx      = DX * double(i) - xorig;
+
+                delx = xx - star_pos_x;
+
+                RR = fabs(delx);
+
+                veltot = SQ2GM / sqrt(RR); // Free fall velocity
+
+                if (isinf(veltot) == 1)
+                    printf("xx %e star_pos_x %e delz %e RR %e veltot %e\n", xx, star_pos_x, delx,
+                           RR, veltot);
+
+                // Normal velocity components
+                // u_x = - veltot; // Not used
+
+                // Freefall condition
+                // mesh->vertex_buffer[VTXBUF_UUX][idx] = u_x;
+                // mesh->vertex_buffer[VTXBUF_UUY][idx] = 0.0;
+                // mesh->vertex_buffer[VTXBUF_UUZ][idx] = 0.0;
+
+                // Starting with steady state
+                mesh->vertex_buffer[VTXBUF_UUX][idx] = 0.0;
+                mesh->vertex_buffer[VTXBUF_UUY][idx] = 0.0;
+                mesh->vertex_buffer[VTXBUF_UUZ][idx] = 0.0;
+
+                mesh->vertex_buffer[VTXBUF_LNRHO][idx] = AcReal(ampl_lnrho);
+            }
+        }
+    }
+}
+
+void
+gaussian_radial_explosion(AcMesh* mesh)
+{
+    AcReal* uu_x = mesh->vertex_buffer[VTXBUF_UUX];
+    AcReal* uu_y = mesh->vertex_buffer[VTXBUF_UUY];
+    AcReal* uu_z = mesh->vertex_buffer[VTXBUF_UUZ];
+
+    const int mx = mesh->info.int_params[AC_mx];
+    const int my = mesh->info.int_params[AC_my];
+
+    const int nx_min = mesh->info.int_params[AC_nx_min];
+    const int nx_max = mesh->info.int_params[AC_nx_max];
+    const int ny_min = mesh->info.int_params[AC_ny_min];
+    const int ny_max = mesh->info.int_params[AC_ny_max];
+    const int nz_min = mesh->info.int_params[AC_nz_min];
+    const int nz_max = mesh->info.int_params[AC_nz_max];
+
+    const double DX = mesh->info.real_params[AC_dsx];
+    const double DY = mesh->info.real_params[AC_dsy];
+    const double DZ = mesh->info.real_params[AC_dsz];
+
+    const double xorig = double(XORIG) - 0.000001;
+    const double yorig = double(YORIG) - 0.000001;
+    const double zorig = double(ZORIG) - 0.000001;
+
+    const double INIT_LOC_UU_X = 0.0;
+    const double INIT_LOC_UU_Y = 0.0;
+    const double INIT_LOC_UU_Z = 0.0;
+
+    const double AMPL_UU    = mesh->info.real_params[AC_ampl_uu];
+    const double UU_SHELL_R = 0.8;
+    const double WIDTH_UU   = 0.2;
+
+    // Outward explosion with gaussian initial velocity profile.
+    int idx;
+    double xx, yy, zz, rr2, rr, theta = 0.0, phi = 0.0;
+    double uu_radial;
+
+    // double theta_old = 0.0;
+
+    for (int k = nz_min; k < nz_max; k++) {
+        for (int j = ny_min; j < ny_max; j++) {
+            for (int i = nx_min; i < nx_max; i++) {
+                // Calculate the value of velocity in a particular radius.
+                idx = i + j * mx + k * mx * my;
+                // Determine the coordinates
+                xx = DX * (i - nx_min) - xorig;
+                xx = xx - INIT_LOC_UU_X;
+
+                yy = DY * (j - ny_min) - yorig;
+                yy = yy - INIT_LOC_UU_Y;
+
+                zz = DZ * (k - nz_min) - zorig;
+                zz = zz - INIT_LOC_UU_Z;
+
+                rr2 = pow(xx, 2.0) + pow(yy, 2.0) + pow(zz, 2.0);
+                rr  = sqrt(rr2);
+
+                // Origin is different!
+                double xx_abs, yy_abs, zz_abs;
+                if (rr > 0.0) {
+                    // theta range [0, PI]
+                    if (zz >= 0.0) {
+                        theta = acos(zz / rr);
+                        if (theta > M_PI / 2.0 || theta < 0.0) {
+                            printf("Explosion THETA WRONG: zz = %.3f, rr = "
+                                   "%.3f, theta = %.3e/PI, M_PI = %.3e\n",
+                                   zz, rr, theta / M_PI, M_PI);
+                        }
+                    }
+                    else {
+                        zz_abs = -zz; // Needs a posite value for acos
+                        theta  = M_PI - acos(zz_abs / rr);
+                        if (theta < M_PI / 2.0 || theta > 2 * M_PI) {
+                            printf("Explosion THETA WRONG: zz = %.3f, rr = "
+                                   "%.3f, theta = %.3e/PI, M_PI = %.3e\n",
+                                   zz, rr, theta / M_PI, M_PI);
+                        }
+                    }
+
+                    // phi range [0, 2*PI]i
+                    if (xx != 0.0) {
+                        if (xx < 0.0 && yy >= 0.0) {
+                            //-+
+                            xx_abs = -xx; // Needs a posite value for atan
+                            phi    = M_PI - atan(yy / xx_abs);
+                            if (phi < (M_PI / 2.0) || phi > M_PI) {
+                                printf("Explosion PHI WRONG -+: xx = %.3f, yy "
+                                       "= %.3f, phi = %.3e/PI, M_PI = %.3e\n",
+                                       xx, yy, phi / M_PI, M_PI);
+                            }
+                        }
+                        else if (xx > 0.0 && yy < 0.0) {
+                            //+-
+                            yy_abs = -yy;
+                            phi    = 2.0 * M_PI - atan(yy_abs / xx);
+                            if (phi < (3.0 * M_PI) / 2.0 || phi > (2.0 * M_PI + 1e-6)) {
+                                printf("Explosion PHI WRONG +-: xx = %.3f, yy "
+                                       "= %.3f, phi = %.3e/PI, M_PI = %.3e\n",
+                                       xx, yy, phi / M_PI, M_PI);
+                            }
+                        }
+                        else if (xx < 0.0 && yy < 0.0) {
+                            //--
+                            yy_abs = -yy;
+                            xx_abs = -xx;
+                            phi    = M_PI + atan(yy_abs / xx_abs);
+                            if (phi < M_PI || phi > ((3.0 * M_PI) / 2.0 + 1e-6)) {
+                                printf("Explosion PHI WRONG --: xx = %.3f, yy "
+                                       "= %.3f, xx_abs = %.3f, yy_abs = %.3f, "
+                                       "phi = %.3e, (3.0*M_PI)/2.0 = %.3e\n",
+                                       xx, yy, xx_abs, yy_abs, phi, (3.0 * M_PI) / 2.0);
+                            }
+                        }
+                        else {
+                            //++
+                            phi = atan(yy / xx);
+                            if (phi < 0 || phi > M_PI / 2.0) {
+                                printf("Explosion PHI WRONG --: xx = %.3f, yy = "
+                                       "%.3f, phi = %.3e, (3.0*M_PI)/2.0 = %.3e\n",
+                                       xx, yy, phi, (3.0 * M_PI) / 2.0);
+                            }
+                        }
+                    }
+                    else { // To avoid div by zero with atan
+                        if (yy > 0.0) {
+                            phi = M_PI / 2.0;
+                        }
+                        else if (yy < 0.0) {
+                            phi = (3.0 * M_PI) / 2.0;
+                        }
+                        else {
+                            phi = 0.0;
+                        }
+                    }
+
+                    // Set zero for explicit safekeeping
+                    if (xx == 0.0 && yy == 0.0) {
+                        phi = 0.0;
+                    }
+
+                    // Gaussian velocity
+                    // uu_radial = AMPL_UU*exp( -rr2 / (2.0*pow(WIDTH_UU, 2.0))
+                    // ); New distribution, where that gaussion wave is not in
+                    // the exact centre coordinates uu_radial = AMPL_UU*exp(
+                    // -pow((rr - 4.0*WIDTH_UU),2.0) / (2.0*pow(WIDTH_UU, 2.0))
+                    // ); //TODO: Parametrize the peak location.
+                    uu_radial = AMPL_UU *
+                                exp(-pow((rr - UU_SHELL_R), 2.0) / (2.0 * pow(WIDTH_UU, 2.0)));
+                }
+                else {
+                    uu_radial = 0.0; // TODO: There will be a discontinuity in
+                                     // the origin... Should the shape of the
+                                     // distribution be different?
+                }
+
+                // Determine the carthesian velocity components and lnrho
+                uu_x[idx] = AcReal(uu_radial * sin(theta) * cos(phi));
+                uu_y[idx] = AcReal(uu_radial * sin(theta) * sin(phi));
+                uu_z[idx] = AcReal(uu_radial * cos(theta));
+
+                // Temporary diagnosticv output (TODO: Remove after not needed)
+                // if (theta > theta_old) {
+                // if (theta > M_PI || theta < 0.0 || phi < 0.0 || phi > 2*M_PI)
+                // {
+                /*	printf("Explosion: xx = %.3f, yy = %.3f, zz = %.3f, rr =
+                   %.3f, phi = %.3e/PI, theta = %.3e/PI\n, M_PI = %.3e", xx, yy,
+                   zz, rr, phi/M_PI, theta/M_PI, M_PI); printf(" uu_radial =
+                   %.3e, uu_x[%i] = %.3e, uu_y[%i] = %.3e, uu_z[%i] = %.3e \n",
+                                uu_radial, idx, uu_x[idx], idx, uu_y[idx], idx,
+                   uu_z[idx]); theta_old = theta;
+                */
+            }
+        }
+    }
+}
+
+void
+acmesh_init_to(const InitType& init_type, AcMesh* mesh)
+{
+    srand(123456789);
+
+    const int n = acVertexBufferSize(mesh->info);
+
+    const int mx = mesh->info.int_params[AC_mx];
+    const int my = mesh->info.int_params[AC_my];
+    const int mz = mesh->info.int_params[AC_mz];
+
+    const int nx_min = mesh->info.int_params[AC_nx_min];
+    const int nx_max = mesh->info.int_params[AC_nx_max];
+    const int ny_min = mesh->info.int_params[AC_ny_min];
+    const int ny_max = mesh->info.int_params[AC_ny_max];
+    const int nz_min = mesh->info.int_params[AC_nz_min];
+    const int nz_max = mesh->info.int_params[AC_nz_max];
+
+    switch (init_type) {
+    case INIT_TYPE_RANDOM: {
+        acmesh_clear(mesh);
+        const AcReal range = AcReal(0.01);
+        for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w)
+            for (int i = 0; i < n; ++i)
+                mesh->vertex_buffer[w][i] = 2 * range * randr() - range;
+
+        break;
+    }
+    case INIT_TYPE_GAUSSIAN_RADIAL_EXPL:
+        acmesh_clear(mesh);
+        vertex_buffer_set(VTXBUF_LNRHO, mesh->info.real_params[AC_ampl_lnrho], mesh);
+        // acmesh_init_to(INIT_TYPE_RANDOM, mesh);
+        gaussian_radial_explosion(mesh);
+
+        break;
+    case INIT_TYPE_XWAVE:
+        acmesh_clear(mesh);
+        acmesh_init_to(INIT_TYPE_RANDOM, mesh);
+        for (int k = 0; k < mz; k++) {
+            for (int j = 0; j < my; j++) {
+                for (int i = 0; i < mx; i++) {
+                    int idx                              = i + j * mx + k * mx * my;
+                    mesh->vertex_buffer[VTXBUF_UUX][idx] = 2 * AcReal(sin(j * AcReal(M_PI) / mx)) -
+                                                           1;
+                }
+            }
+        }
+        break;
+    case INIT_TYPE_SIMPLE_CORE:
+        acmesh_clear(mesh);
+        simple_uniform_core(mesh);
+        break;
+    case INIT_TYPE_VEDGE:
+        acmesh_clear(mesh);
+        inflow_vedge_freefall(mesh);
+        break;
+    case INIT_TYPE_VEDGEX:
+        acmesh_clear(mesh);
+        inflow_freefall_x(mesh);
+        break;
+    case INIT_TYPE_RAYLEIGH_TAYLOR:
+        acmesh_clear(mesh);
+        inflow_freefall_x(mesh);
+        lnrho_step(mesh);
+        break;
+    case INIT_TYPE_ABC_FLOW: {
+        acmesh_clear(mesh);
+        acmesh_init_to(INIT_TYPE_RANDOM, mesh);
+        for (int k = nz_min; k < nz_max; k++) {
+            for (int j = ny_min; j < ny_max; j++) {
+                for (int i = nx_min; i < nx_max; i++) {
+                    const int idx = i + j * mx + k * mx * my;
+
+                    /*
+                    const double xx = double(
+                        mesh->info.real_params[AC_dsx] *
+                            (i - mesh->info.int_params[AC_nx_min]) -
+                        XORIG + AcReal(.5) * mesh->info.real_params[AC_dsx]);
+                    const double yy = double(
+                        mesh->info.real_params[AC_dsy] *
+                            (j - mesh->info.int_params[AC_ny_min]) -
+                        YORIG + AcReal(.5) * mesh->info.real_params[AC_dsy]);
+                    const double zz = double(
+                        mesh->info.real_params[AC_dsz] *
+                            (k - mesh->info.int_params[AC_nz_min]) -
+                        ZORIG + AcReal(.5) * mesh->info.real_params[AC_dsz]);
+                    */
+
+                    const AcReal xx = (i - nx_min) * mesh->info.real_params[AC_dsx] - XORIG;
+                    const AcReal yy = (j - ny_min) * mesh->info.real_params[AC_dsy] - YORIG;
+                    const AcReal zz = (k - nz_min) * mesh->info.real_params[AC_dsz] - ZORIG;
+
+                    const AcReal ampl_uu = 0.5;
+                    const AcReal ABC_A   = 1.;
+                    const AcReal ABC_B   = 1.;
+                    const AcReal ABC_C   = 1.;
+                    const AcReal kx_uu   = 8.;
+                    const AcReal ky_uu   = 8.;
+                    const AcReal kz_uu   = 8.;
+
+                    mesh->vertex_buffer[VTXBUF_UUX][idx] = ampl_uu *
+                                                           (ABC_A * (AcReal)sin(kz_uu * zz) +
+                                                            ABC_C * (AcReal)cos(ky_uu * yy));
+                    mesh->vertex_buffer[VTXBUF_UUY][idx] = ampl_uu *
+                                                           (ABC_B * (AcReal)sin(kx_uu * xx) +
+                                                            ABC_A * (AcReal)cos(kz_uu * zz));
+                    mesh->vertex_buffer[VTXBUF_UUZ][idx] = ampl_uu *
+                                                           (ABC_C * (AcReal)sin(ky_uu * yy) +
+                                                            ABC_B * (AcReal)cos(kx_uu * xx));
+                }
+            }
+        }
+        break;
+    }
+    case INIT_TYPE_RAYLEIGH_BENARD: {
+        acmesh_init_to(INIT_TYPE_RANDOM, mesh);
+#if LTEMPERATURE
+        vertex_buffer_set(VTXBUF_LNRHO, 1, mesh);
+        const AcReal range = AcReal(0.9);
+        for (int k = nz_min; k < nz_max; k++) {
+            for (int j = ny_min; j < ny_max; j++) {
+                for (int i = nx_min; i < nx_max; i++) {
+                    const int idx                                = i + j * mx + k * mx * my;
+                    mesh->vertex_buffer[VTXBUF_TEMPERATURE][idx] = (range * (k - nz_min)) /
+                                                                       mesh->info
+                                                                           .int_params[AC_nz] +
+                                                                   0.1;
+                }
+            }
+        }
+#else
+        WARNING("INIT_TYPE_RAYLEIGH_BERNARD called even though VTXBUF_TEMPERATURE is not used");
+#endif
+        break;
+    }
+    default:
+        ERROR("Unknown init_type");
+    }
+
+    AcReal max_val = AcReal(-1e-32);
+    AcReal min_val = AcReal(1e32);
+    // Normalize the grid
+    for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w) {
+        for (int i = 0; i < n; ++i) {
+            if (mesh->vertex_buffer[w][i] < min_val)
+                min_val = mesh->vertex_buffer[w][i];
+            if (mesh->vertex_buffer[w][i] > max_val)
+                max_val = mesh->vertex_buffer[w][i];
+        }
+    }
+    printf("MAX: %f MIN %f\n", double(max_val), double(min_val));
+    /*
+    const AcReal inv_range = AcReal(1.) / fabs(max_val - min_val);
+    for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w) {
+        for (int i = 0; i < n; ++i) {
+            mesh->vertex_buffer[w][i] = 2*inv_range*(mesh->vertex_buffer[w][i] - min_val) - 1;
+        }
+    }
+    */
+}
+
+void
+acmesh_destroy(AcMesh* mesh)
+{
+    for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i)
+        free(mesh->vertex_buffer[VertexBufferHandle(i)]);
+
+    free(mesh);
+}
+
+ModelMesh*
+modelmesh_create(const AcMeshInfo& mesh_info)
+{
+    ModelMesh* mesh = (ModelMesh*)malloc(sizeof(*mesh));
+    mesh->info      = mesh_info;
+
+    const size_t bytes = acVertexBufferSize(mesh->info) * sizeof(mesh->vertex_buffer[0][0]);
+    for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) {
+        mesh->vertex_buffer[VertexBufferHandle(i)] = (ModelScalar*)malloc(bytes);
+        ERRCHK(mesh->vertex_buffer[VertexBufferHandle(i)] != NULL);
+    }
+
+    return mesh;
+}
+
+void
+modelmesh_destroy(ModelMesh* mesh)
+{
+    for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i)
+        free(mesh->vertex_buffer[VertexBufferHandle(i)]);
+
+    free(mesh);
+}
+
+#include <string.h> // memcpy
+void
+acmesh_to_modelmesh(const AcMesh& acmesh, ModelMesh* modelmesh)
+{
+    ERRCHK(sizeof(acmesh.info) == sizeof(modelmesh->info));
+    memcpy(&modelmesh->info, &acmesh.info, sizeof(acmesh.info));
+
+    for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i)
+        for (size_t j = 0; j < acVertexBufferSize(acmesh.info); ++j)
+            modelmesh->vertex_buffer[i][j] = (ModelScalar)acmesh.vertex_buffer[i][j];
+}
+
+void
+modelmesh_to_acmesh(const ModelMesh& modelmesh, AcMesh* acmesh)
+{
+    ERRCHK(sizeof(acmesh->info) == sizeof(modelmesh.info));
+    memcpy(&acmesh->info, &modelmesh.info, sizeof(modelmesh.info));
+
+    for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i)
+        for (size_t j = 0; j < acVertexBufferSize(modelmesh.info); ++j)
+            acmesh->vertex_buffer[i][j] = (AcReal)modelmesh.vertex_buffer[i][j];
+}
--- a/samples/standalone/model/host_memory.h
+++ b/samples/standalone/model/host_memory.h
@@ -0,0 +1,63 @@
+/*
+    Copyright (C) 2014-2020, Johannes Pekkila, Miikka Vaisala.
+
+    This file is part of Astaroth.
+
+    Astaroth is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    Astaroth is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Astaroth.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file
+ * \brief Brief info.
+ *
+ * Detailed info.
+ *
+ */
+#pragma once
+#include "astaroth.h"
+#include "modelmesh.h"
+
+// clang-format off
+#define AC_FOR_INIT_TYPES(FUNC)\
+        FUNC(INIT_TYPE_RANDOM), \
+        FUNC(INIT_TYPE_XWAVE), \
+        FUNC(INIT_TYPE_GAUSSIAN_RADIAL_EXPL), \
+        FUNC(INIT_TYPE_ABC_FLOW) , \
+        FUNC(INIT_TYPE_SIMPLE_CORE), \
+        FUNC(INIT_TYPE_VEDGE), \
+        FUNC(INIT_TYPE_VEDGEX), \
+        FUNC(INIT_TYPE_RAYLEIGH_TAYLOR), \
+        FUNC(INIT_TYPE_RAYLEIGH_BENARD)
+// clang-format on
+
+#define AC_GEN_ID(X) X
+typedef enum { AC_FOR_INIT_TYPES(AC_GEN_ID), NUM_INIT_TYPES } InitType;
+#undef AC_GEN_ID
+
+extern const char* init_type_names[]; // Defined in host_memory.cc
+
+AcMesh* acmesh_create(const AcMeshInfo& mesh_info);
+
+void vertex_buffer_set(const VertexBufferHandle& key, const AcReal& val, AcMesh* mesh);
+
+void acmesh_clear(AcMesh* mesh);
+
+void acmesh_init_to(const InitType& type, AcMesh* mesh);
+
+void acmesh_destroy(AcMesh* mesh);
+
+ModelMesh* modelmesh_create(const AcMeshInfo& mesh_info);
+void modelmesh_destroy(ModelMesh* mesh);
+void acmesh_to_modelmesh(const AcMesh& acmesh, ModelMesh* modelmesh);
+void modelmesh_to_acmesh(const ModelMesh& model, AcMesh* acmesh);
--- a/samples/standalone/model/host_timestep.cc
+++ b/samples/standalone/model/host_timestep.cc
@@ -0,0 +1,67 @@
+/*
+    Copyright (C) 2014-2020, Johannes Pekkila, Miikka Vaisala.
+
+    This file is part of Astaroth.
+
+    Astaroth is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    Astaroth is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Astaroth.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file
+ * \brief Brief info.
+ *
+ * Detailed info.
+ *
+ */
+#include "host_timestep.h"
+
+#include "math_utils.h"
+
+static AcReal timescale = AcReal(1.0);
+
+AcReal
+host_timestep(const AcReal& umax, const AcMeshInfo& mesh_info)
+{
+    const long double cdt  = mesh_info.real_params[AC_cdt];
+    const long double cdtv = mesh_info.real_params[AC_cdtv];
+    // const long double cdts     = mesh_info.real_params[AC_cdts];
+    const long double cs2_sound = mesh_info.real_params[AC_cs2_sound];
+    const long double nu_visc   = mesh_info.real_params[AC_nu_visc];
+    const long double eta       = mesh_info.real_params[AC_eta];
+    const long double chi       = 0; // mesh_info.real_params[AC_chi]; // TODO not calculated
+    const long double gamma     = mesh_info.real_params[AC_gamma];
+    const long double dsmin     = mesh_info.real_params[AC_dsmin];
+
+    // Old ones from legacy Astaroth
+    // const long double uu_dt   = cdt * (dsmin / (umax + cs_sound));
+    // const long double visc_dt = cdtv * dsmin * dsmin / nu_visc;
+
+    // New, closer to the actual Courant timestep
+    // See Pencil Code user manual p. 38 (timestep section)
+    const long double uu_dt   = cdt * dsmin / (fabsl(umax) + sqrtl(cs2_sound + 0.0l));
+    const long double visc_dt = cdtv * dsmin * dsmin /
+                                max(max(nu_visc, eta),
+                                    max(gamma, chi)); // + 1; // TODO NOTE: comment the +1 out to
+                                                      // get scientifically accurate results
+    // MV: White the +1? It was messing up my computations!
+
+    const long double dt = min(uu_dt, visc_dt);
+    return AcReal(timescale) * AcReal(dt);
+}
+
+void
+set_timescale(const AcReal scale)
+{
+    timescale = scale;
+}
--- a/samples/standalone/model/host_timestep.h
+++ b/samples/standalone/model/host_timestep.h
@@ -0,0 +1,32 @@
+/*
+    Copyright (C) 2014-2020, Johannes Pekkila, Miikka Vaisala.
+
+    This file is part of Astaroth.
+
+    Astaroth is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    Astaroth is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Astaroth.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file
+ * \brief Brief info.
+ *
+ * Detailed info.
+ *
+ */
+#pragma once
+#include "astaroth.h"
+
+AcReal host_timestep(const AcReal& umax, const AcMeshInfo& mesh_info);
+
+void set_timescale(const AcReal scale);
--- a/samples/standalone/model/model_boundconds.cc
+++ b/samples/standalone/model/model_boundconds.cc
@@ -0,0 +1,482 @@
+/*
+    Copyright (C) 2014-2020, Johannes Pekkila, Miikka Vaisala.
+
+    This file is part of Astaroth.
+
+    Astaroth is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) amy later version.
+
+    Astaroth is distributed in the hope that it will be useful,
+    but WITHOUT Amy WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Astaroth.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file
+ * \brief Brief info.
+ *
+ * Detailed info.
+ *
+ */
+#include "model_boundconds.h"
+
+#include "errchk.h"
+
+void
+boundconds(const AcMeshInfo& mesh_info, ModelMesh* mesh)
+{
+#pragma omp parallel for
+    for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w) {
+        const int3 start = (int3){0, 0, 0};
+        const int3 end   = (int3){mesh_info.int_params[AC_mx], mesh_info.int_params[AC_my],
+                                mesh_info.int_params[AC_mz]};
+
+        const int nx = mesh_info.int_params[AC_nx];
+        const int ny = mesh_info.int_params[AC_ny];
+        const int nz = mesh_info.int_params[AC_nz];
+
+        const int nx_min = mesh_info.int_params[AC_nx_min];
+        const int ny_min = mesh_info.int_params[AC_ny_min];
+        const int nz_min = mesh_info.int_params[AC_nz_min];
+
+        // The old kxt was inclusive, but our mx_max is exclusive
+        const int nx_max = mesh_info.int_params[AC_nx_max];
+        const int ny_max = mesh_info.int_params[AC_ny_max];
+        const int nz_max = mesh_info.int_params[AC_nz_max];
+
+        for (int k_dst = start.z; k_dst < end.z; ++k_dst) {
+            for (int j_dst = start.y; j_dst < end.y; ++j_dst) {
+                for (int i_dst = start.x; i_dst < end.x; ++i_dst) {
+
+                    // If destination index is inside the computational domain, return since
+                    // the boundary conditions are only applied to the ghost zones
+                    if (i_dst >= nx_min && i_dst < nx_max && j_dst >= ny_min && j_dst < ny_max &&
+                        k_dst >= nz_min && k_dst < nz_max)
+                        continue;
+
+                    // Find the source index
+                    // Map to nx, ny, nz coordinates
+                    int i_src = i_dst - nx_min;
+                    int j_src = j_dst - ny_min;
+                    int k_src = k_dst - nz_min;
+
+                    // Translate (s.t. the index is always positive)
+                    i_src += nx;
+                    j_src += ny;
+                    k_src += nz;
+
+                    // Wrap
+                    i_src %= nx;
+                    j_src %= ny;
+                    k_src %= nz;
+
+                    // Map to mx, my, mz coordinates
+                    i_src += nx_min;
+                    j_src += ny_min;
+                    k_src += nz_min;
+
+                    const size_t src_idx = acVertexBufferIdx(i_src, j_src, k_src, mesh_info);
+                    const size_t dst_idx = acVertexBufferIdx(i_dst, j_dst, k_dst, mesh_info);
+                    ERRCHK(src_idx < acVertexBufferSize(mesh_info));
+                    ERRCHK(dst_idx < acVertexBufferSize(mesh_info));
+                    mesh->vertex_buffer[w][dst_idx] = mesh->vertex_buffer[w][src_idx];
+                }
+            }
+        }
+    }
+}
+
+#if 0
+void
+boundconds(const AcMeshInfo& mesh_info, ModelMesh* mesh)
+{
+    const int mx = mesh_info.int_params[AC_mx];
+    const int my = mesh_info.int_params[AC_my];
+    const int mz = mesh_info.int_params[AC_mz];
+
+    // Volatile here suppresses the warning about strict-overflow (i.e. compiler
+    // wanted to optimize these loops by assuming that kxb etc never overflow)
+    // However we do not need the performance improvement (~1-3%) and it's
+    // not either good to
+    //	a) get useless warnings originating from here
+    //	b) disable the warnings completely
+    volatile const int kxb = mesh_info.int_params[AC_nx_min];
+    volatile const int kyb = mesh_info.int_params[AC_ny_min];
+    volatile const int kzb = mesh_info.int_params[AC_nz_min];
+
+    // The old kxt was inclusive, but our mx_max is exclusive
+    volatile const int kxt = mesh_info.int_params[AC_nx_max] - 1;
+    volatile const int kyt = mesh_info.int_params[AC_ny_max] - 1;
+    volatile const int kzt = mesh_info.int_params[AC_nz_max] - 1;
+    const int bound[3]     = {0, 0, 0};
+
+    // Periodic boundary conditions
+    if (bound[0] == 0) {
+        for (int k = kzb; k <= kzt; k++) {
+            for (int j = kyb; j <= kyt; j++) {
+                for (int i = kxb; i <= kxb + 2; i++) {
+                    const int inds = i + j * mx + k * mx * my;
+                    const int indr = (kxt + i - 2) + j * mx + k * mx * my;
+                    for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w)
+                        mesh->vertex_buffer[w]
+                                           [indr] = mesh->vertex_buffer[w]
+                                                                       [inds];
+                }
+                for (int i = kxt - 2; i <= kxt; i++) {
+                    const int inds = i + j * mx + k * mx * my;
+                    const int indr = (i - kxt + 2) + j * mx + k * mx * my;
+                    for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w)
+                        mesh->vertex_buffer[w]
+                                           [indr] = mesh->vertex_buffer[w]
+                                                                       [inds];
+                }
+            }
+        }
+    }
+    if (bound[1] == 0) {
+        for (int k = kzb; k <= kzt; k++) {
+            for (int i = kxb; i <= kxt; i++) {
+                for (int j = kyb; j <= kyb + 2; j++) {
+                    const int inds = i + j * mx + k * mx * my;
+                    const int indr = i + (kyt + j - 2) * mx + k * mx * my;
+                    for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w)
+                        mesh->vertex_buffer[w]
+                                           [indr] = mesh->vertex_buffer[w]
+                                                                       [inds];
+                }
+                for (int j = kyt - 2; j <= kyt; j++) {
+                    const int inds = i + j * mx + k * mx * my;
+                    const int indr = i + (j - kyt + 2) * mx + k * mx * my;
+                    for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w)
+                        mesh->vertex_buffer[w]
+                                           [indr] = mesh->vertex_buffer[w]
+                                                                       [inds];
+                }
+            }
+        }
+    }
+
+    if (bound[2] == 0) {
+        for (int i = kxb; i <= kxt; i++) {
+            for (int j = kyb; j <= kyt; j++) {
+                for (int k = kzb; k <= kzb + 2; k++) {
+                    const int inds = i + j * mx + k * mx * my;
+                    const int indr = i + j * mx + (kzt + k - 2) * mx * my;
+                    for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w)
+                        mesh->vertex_buffer[w]
+                                           [indr] = mesh->vertex_buffer[w]
+                                                                       [inds];
+                }
+                for (int k = kzt - 2; k <= kzt; k++) {
+                    const int inds = i + j * mx + k * mx * my;
+                    const int indr = i + j * mx + (k - kzt + 2) * mx * my;
+                    for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w)
+                        mesh->vertex_buffer[w]
+                                           [indr] = mesh->vertex_buffer[w]
+                                                                       [inds];
+                }
+            }
+        }
+    }
+
+    // Copy the corners in the fully periodic case
+    if (bound[0] == 0 && bound[1] == 0 && bound[2] == 0) {
+        // Source corner: x=0, y=0, z=0
+        for (int i = kxb; i <= kxb + 2; i++) {
+            for (int j = kyb; j <= kyb + 2; j++) {
+                for (int k = kzb; k <= kzb + 2; k++) {
+                    const int inds = i + j * mx + k * mx * my;
+                    const int indr = (i + mx - STENCIL_ORDER) + (j + my - STENCIL_ORDER) * mx +
+                                     (k + mz - STENCIL_ORDER) * mx * my;
+                    for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w)
+                        mesh->vertex_buffer[w]
+                                           [indr] = mesh->vertex_buffer[w]
+                                                                       [inds];
+                }
+            }
+        }
+        // Source corner: x=1, y=0, z=0
+        for (int i = kxt - 2; i <= kxt; i++) {
+            for (int j = kyb; j <= kyb + 2; j++) {
+                for (int k = kzb; k <= kzb + 2; k++) {
+                    const int inds = i + j * mx + k * mx * my;
+                    const int indr = (i - mx + STENCIL_ORDER) + (j + my - STENCIL_ORDER) * mx +
+                                     (k + mz - STENCIL_ORDER) * mx * my;
+                    for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w)
+                        mesh->vertex_buffer[w]
+                                           [indr] = mesh->vertex_buffer[w]
+                                                                       [inds];
+                }
+            }
+        }
+        // Source corner: x=0, y=1, z=0
+        for (int i = kxb; i <= kxb + 2; i++) {
+            for (int j = kyt - 2; j <= kyt; j++) {
+                for (int k = kzb; k <= kzb + 2; k++) {
+                    const int inds = i + j * mx + k * mx * my;
+                    const int indr = (i + mx - STENCIL_ORDER) + (j - my + STENCIL_ORDER) * mx +
+                                     (k + mz - STENCIL_ORDER) * mx * my;
+                    for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w)
+                        mesh->vertex_buffer[w]
+                                           [indr] = mesh->vertex_buffer[w]
+                                                                       [inds];
+                }
+            }
+        }
+        // Source corner: x=0, y=0, z=1
+        for (int i = kxb; i <= kxb + 2; i++) {
+            for (int j = kyb; j <= kyb + 2; j++) {
+                for (int k = kzt - 2; k <= kzt; k++) {
+                    const int inds = i + j * mx + k * mx * my;
+                    const int indr = (i + mx - STENCIL_ORDER) + (j + my - STENCIL_ORDER) * mx +
+                                     (k - mz + STENCIL_ORDER) * mx * my;
+                    for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w)
+                        mesh->vertex_buffer[w]
+                                           [indr] = mesh->vertex_buffer[w]
+                                                                       [inds];
+                }
+            }
+        }
+        // Source corner: x=1, y=1, z=0
+        for (int i = kxt - 2; i <= kxt; i++) {
+            for (int j = kyt - 2; j <= kyt; j++) {
+                for (int k = kzb; k <= kzb + 2; k++) {
+                    const int inds = i + j * mx + k * mx * my;
+                    const int indr = (i - mx + STENCIL_ORDER) + (j - my + STENCIL_ORDER) * mx +
+                                     (k + mz - STENCIL_ORDER) * mx * my;
+                    for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w)
+                        mesh->vertex_buffer[w]
+                                           [indr] = mesh->vertex_buffer[w]
+                                                                       [inds];
+                }
+            }
+        }
+        // Source corner: x=1, y=0, z=1
+        for (int i = kxt - 2; i <= kxt; i++) {
+            for (int j = kyb; j <= kyb + 2; j++) {
+                for (int k = kzt - 2; k <= kzt; k++) {
+                    const int inds = i + j * mx + k * mx * my;
+                    const int indr = (i - mx + STENCIL_ORDER) + (j + my - STENCIL_ORDER) * mx +
+                                     (k - mz + STENCIL_ORDER) * mx * my;
+                    for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w)
+                        mesh->vertex_buffer[w]
+                                           [indr] = mesh->vertex_buffer[w]
+                                                                       [inds];
+                }
+            }
+        }
+        // Source corner: x=0, y=1, z=1
+        for (int i = kxb; i <= kxb + 2; i++) {
+            for (int j = kyt - 2; j <= kyt; j++) {
+                for (int k = kzt - 2; k <= kzt; k++) {
+                    const int inds = i + j * mx + k * mx * my;
+                    const int indr = (i + mx - STENCIL_ORDER) + (j - my + STENCIL_ORDER) * mx +
+                                     (k - mz + STENCIL_ORDER) * mx * my;
+                    for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w)
+                        mesh->vertex_buffer[w]
+                                           [indr] = mesh->vertex_buffer[w]
+                                                                       [inds];
+                }
+            }
+        }
+        // Source corner: x=1, y=1, z=1
+        for (int i = kxt - 2; i <= kxt; i++) {
+            for (int j = kyt - 2; j <= kyt; j++) {
+                for (int k = kzt - 2; k <= kzt; k++) {
+                    const int inds = i + j * mx + k * mx * my;
+                    const int indr = (i - mx + STENCIL_ORDER) + (j - my + STENCIL_ORDER) * mx +
+                                     (k - mz + STENCIL_ORDER) * mx * my;
+                    for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w)
+                        mesh->vertex_buffer[w]
+                                           [indr] = mesh->vertex_buffer[w]
+                                                                       [inds];
+                }
+            }
+        }
+    }
+    else {
+        ERROR("ONLY FULLY PERIODIC WORKS WITH CORNERS SO FAR! \n");
+    }
+
+    // Copy the edges in the fully periodic case
+    if (bound[0] == 0 && bound[1] == 0 && bound[2] == 0) {
+        // Source edge: x = 0, y = 0
+        for (int i = kxb; i <= kxb + 2; i++) {
+            for (int j = kyb; j <= kyb + 2; j++) {
+                for (int k = kzb; k <= kzt; k++) {
+                    const int inds = i + j * mx + k * mx * my;
+                    const int indr = (i + mx - STENCIL_ORDER) + (j + my - STENCIL_ORDER) * mx +
+                                     k * mx * my;
+                    for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w)
+                        mesh->vertex_buffer[w]
+                                           [indr] = mesh->vertex_buffer[w]
+                                                                       [inds];
+                }
+            }
+        }
+        // Source edge: x = 1, y = 0
+        for (int i = kxt - 2; i <= kxt; i++) {
+            for (int j = kyb; j <= kyb + 2; j++) {
+                for (int k = kzb; k <= kzt; k++) {
+                    const int inds = i + j * mx + k * mx * my;
+                    const int indr = (i - mx + STENCIL_ORDER) + (j + my - STENCIL_ORDER) * mx +
+                                     k * mx * my;
+                    for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w)
+                        mesh->vertex_buffer[w]
+                                           [indr] = mesh->vertex_buffer[w]
+                                                                       [inds];
+                }
+            }
+        }
+        // Source edge: x = 0, y = 1
+        for (int i = kxb; i <= kxb + 2; i++) {
+            for (int j = kyt - 2; j <= kyt; j++) {
+                for (int k = kzb; k <= kzt; k++) {
+                    const int inds = i + j * mx + k * mx * my;
+                    const int indr = (i + mx - STENCIL_ORDER) + (j - my + STENCIL_ORDER) * mx +
+                                     k * mx * my;
+                    for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w)
+                        mesh->vertex_buffer[w]
+                                           [indr] = mesh->vertex_buffer[w]
+                                                                       [inds];
+                }
+            }
+        }
+        // Source edge: x = 1, y = 1
+        for (int i = kxt - 2; i <= kxt; i++) {
+            for (int j = kyt - 2; j <= kyt; j++) {
+                for (int k = kzb; k <= kzt; k++) {
+                    const int inds = i + j * mx + k * mx * my;
+                    const int indr = (i - mx + STENCIL_ORDER) + (j - my + STENCIL_ORDER) * mx +
+                                     k * mx * my;
+                    for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w)
+                        mesh->vertex_buffer[w]
+                                           [indr] = mesh->vertex_buffer[w]
+                                                                       [inds];
+                }
+            }
+        }
+        // Source edge: x = 0, z = 0
+        for (int i = kxb; i <= kxb + 2; i++) {
+            for (int j = kyb; j <= kyt; j++) {
+                for (int k = kzb; k <= kzb + 2; k++) {
+                    const int inds = i + j * mx + k * mx * my;
+                    const int indr = (i + mx - STENCIL_ORDER) + j * mx +
+                                     (k + mz - STENCIL_ORDER) * mx * my;
+                    for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w)
+                        mesh->vertex_buffer[w]
+                                           [indr] = mesh->vertex_buffer[w]
+                                                                       [inds];
+                }
+            }
+        }
+        // Source edge: x = 1, z = 0
+        for (int i = kxt - 2; i <= kxt; i++) {
+            for (int j = kyb; j <= kyt; j++) {
+                for (int k = kzb; k <= kzb + 2; k++) {
+                    const int inds = i + j * mx + k * mx * my;
+                    const int indr = (i - mx + STENCIL_ORDER) + j * mx +
+                                     (k + mz - STENCIL_ORDER) * mx * my;
+                    for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w)
+                        mesh->vertex_buffer[w]
+                                           [indr] = mesh->vertex_buffer[w]
+                                                                       [inds];
+                }
+            }
+        }
+        // Source edge: x = 0, z = 1
+        for (int i = kxb; i <= kxb + 2; i++) {
+            for (int j = kyb; j <= kyt; j++) {
+                for (int k = kzt - 2; k <= kzt; k++) {
+                    const int inds = i + j * mx + k * mx * my;
+                    const int indr = (i + mx - STENCIL_ORDER) + j * mx +
+                                     (k - mz + STENCIL_ORDER) * mx * my;
+                    for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w)
+                        mesh->vertex_buffer[w]
+                                           [indr] = mesh->vertex_buffer[w]
+                                                                       [inds];
+                }
+            }
+        }
+        // Source edge: x = 1, z = 1
+        for (int i = kxt - 2; i <= kxt; i++) {
+            for (int j = kyb; j <= kyt; j++) {
+                for (int k = kzt - 2; k <= kzt; k++) {
+                    const int inds = i + j * mx + k * mx * my;
+                    const int indr = (i - mx + STENCIL_ORDER) + j * mx +
+                                     (k - mz + STENCIL_ORDER) * mx * my;
+                    for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w)
+                        mesh->vertex_buffer[w]
+                                           [indr] = mesh->vertex_buffer[w]
+                                                                       [inds];
+                }
+            }
+        }
+        // Source edge: y = 0, z = 0
+        for (int i = kxb; i <= kxt; i++) {
+            for (int j = kyb; j <= kyb + 2; j++) {
+                for (int k = kzb; k <= kzb + 2; k++) {
+                    const int inds = i + j * mx + k * mx * my;
+                    const int indr = i + (j + my - STENCIL_ORDER) * mx +
+                                     (k + mz - STENCIL_ORDER) * mx * my;
+                    for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w)
+                        mesh->vertex_buffer[w]
+                                           [indr] = mesh->vertex_buffer[w]
+                                                                       [inds];
+                }
+            }
+        }
+        // Source edge: y = 1, z = 0
+        for (int i = kxb; i <= kxt; i++) {
+            for (int j = kyt - 2; j <= kyt; j++) {
+                for (int k = kzb; k <= kzb + 2; k++) {
+                    const int inds = i + j * mx + k * mx * my;
+                    const int indr = i + (j - my + STENCIL_ORDER) * mx +
+                                     (k + mz - STENCIL_ORDER) * mx * my;
+                    for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w)
+                        mesh->vertex_buffer[w]
+                                           [indr] = mesh->vertex_buffer[w]
+                                                                       [inds];
+                }
+            }
+        }
+        // Source edge: y = 0, z = 1
+        for (int i = kxb; i <= kxt; i++) {
+            for (int j = kyb; j <= kyb + 2; j++) {
+                for (int k = kzt - 2; k <= kzt; k++) {
+                    const int inds = i + j * mx + k * mx * my;
+                    const int indr = i + (j + my - STENCIL_ORDER) * mx +
+                                     (k - mz + STENCIL_ORDER) * mx * my;
+                    for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w)
+                        mesh->vertex_buffer[w]
+                                           [indr] = mesh->vertex_buffer[w]
+                                                                       [inds];
+                }
+            }
+        }
+        // Source edge: y = 1, z = 1
+        for (int i = kxb; i <= kxt; i++) {
+            for (int j = kyt - 2; j <= kyt; j++) {
+                for (int k = kzt - 2; k <= kzt; k++) {
+                    const int inds = i + j * mx + k * mx * my;
+                    const int indr = i + (j - my + STENCIL_ORDER) * mx +
+                                     (k - mz + STENCIL_ORDER) * mx * my;
+                    for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w)
+                        mesh->vertex_buffer[w]
+                                           [indr] = mesh->vertex_buffer[w]
+                                                                       [inds];
+                }
+            }
+        }
+    }
+    else {
+        ERROR("ONLY FULLY PERIODIC WORKS WITH EDGES SO FAR! \n");
+    }
+}
+#endif
--- a/samples/standalone/model/model_boundconds.h
+++ b/samples/standalone/model/model_boundconds.h
@@ -0,0 +1,31 @@
+/*
+    Copyright (C) 2014-2020, Johannes Pekkila, Miikka Vaisala.
+
+    This file is part of Astaroth.
+
+    Astaroth is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    Astaroth is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Astaroth.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file
+ * \brief Brief info.
+ *
+ * Detailed info.
+ *
+ */
+#pragma once
+#include "astaroth.h"
+#include "modelmesh.h"
+
+void boundconds(const AcMeshInfo& mesh_info, ModelMesh* mesh);
--- a/samples/standalone/model/model_diff.h
+++ b/samples/standalone/model/model_diff.h
@@ -0,0 +1,353 @@
+/*
+    Copyright (C) 2014-2020, Johannes Pekkila, Miikka Vaisala.
+
+    This file is part of Astaroth.
+
+    Astaroth is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    Astaroth is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Astaroth.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file
+ * \brief Brief info.
+ *
+ * Detailed info.
+ *
+ */
+#pragma once
+#include "errchk.h"
+
+typedef long double MODEL_REAL;
+
+typedef enum { AXIS_X, AXIS_Y, AXIS_Z, NUM_AXIS_TYPES } AxisType;
+
+template <AxisType axis>
+static inline MODEL_REAL
+der_scal(const int& i, const int& j, const int& k, const AcMeshInfo& mesh_info,
+         const MODEL_REAL* scal)
+{
+    MODEL_REAL f0, f1, f2, f4, f5, f6;
+    MODEL_REAL ds;
+
+    switch (axis) {
+    case AXIS_X:
+        f0 = scal[acVertexBufferIdx(i - 3, j, k, mesh_info)];
+        f1 = scal[acVertexBufferIdx(i - 2, j, k, mesh_info)];
+        f2 = scal[acVertexBufferIdx(i - 1, j, k, mesh_info)];
+        f4 = scal[acVertexBufferIdx(i + 1, j, k, mesh_info)];
+        f5 = scal[acVertexBufferIdx(i + 2, j, k, mesh_info)];
+        f6 = scal[acVertexBufferIdx(i + 3, j, k, mesh_info)];
+        ds = mesh_info.real_params[AC_dsx];
+        break;
+    case AXIS_Y:
+        f0 = scal[acVertexBufferIdx(i, j - 3, k, mesh_info)];
+        f1 = scal[acVertexBufferIdx(i, j - 2, k, mesh_info)];
+        f2 = scal[acVertexBufferIdx(i, j - 1, k, mesh_info)];
+        f4 = scal[acVertexBufferIdx(i, j + 1, k, mesh_info)];
+        f5 = scal[acVertexBufferIdx(i, j + 2, k, mesh_info)];
+        f6 = scal[acVertexBufferIdx(i, j + 3, k, mesh_info)];
+        ds = mesh_info.real_params[AC_dsy];
+        break;
+    case AXIS_Z:
+        f0 = scal[acVertexBufferIdx(i, j, k - 3, mesh_info)];
+        f1 = scal[acVertexBufferIdx(i, j, k - 2, mesh_info)];
+        f2 = scal[acVertexBufferIdx(i, j, k - 1, mesh_info)];
+        f4 = scal[acVertexBufferIdx(i, j, k + 1, mesh_info)];
+        f5 = scal[acVertexBufferIdx(i, j, k + 2, mesh_info)];
+        f6 = scal[acVertexBufferIdx(i, j, k + 3, mesh_info)];
+        ds = mesh_info.real_params[AC_dsz];
+        break;
+    default:
+        ERROR("Unknown axis type");
+    }
+    return ((f6 - f0) + MODEL_REAL(-9.) * (f5 - f1) + MODEL_REAL(45.) * (f4 - f2)) /
+           (MODEL_REAL(60.) * ds);
+}
+
+template <AxisType axis>
+static inline MODEL_REAL
+der2_scal(const int& i, const int& j, const int& k, const AcMeshInfo& mesh_info,
+          const MODEL_REAL* scal)
+{
+    MODEL_REAL f0, f1, f2, f3, f4, f5, f6;
+    MODEL_REAL ds;
+
+    f3 = scal[acVertexBufferIdx(i, j, k, mesh_info)];
+
+    switch (axis) {
+    case AXIS_X:
+        f0 = scal[acVertexBufferIdx(i - 3, j, k, mesh_info)];
+        f1 = scal[acVertexBufferIdx(i - 2, j, k, mesh_info)];
+        f2 = scal[acVertexBufferIdx(i - 1, j, k, mesh_info)];
+        f4 = scal[acVertexBufferIdx(i + 1, j, k, mesh_info)];
+        f5 = scal[acVertexBufferIdx(i + 2, j, k, mesh_info)];
+        f6 = scal[acVertexBufferIdx(i + 3, j, k, mesh_info)];
+        ds = mesh_info.real_params[AC_dsx];
+        break;
+    case AXIS_Y:
+        f0 = scal[acVertexBufferIdx(i, j - 3, k, mesh_info)];
+        f1 = scal[acVertexBufferIdx(i, j - 2, k, mesh_info)];
+        f2 = scal[acVertexBufferIdx(i, j - 1, k, mesh_info)];
+        f4 = scal[acVertexBufferIdx(i, j + 1, k, mesh_info)];
+        f5 = scal[acVertexBufferIdx(i, j + 2, k, mesh_info)];
+        f6 = scal[acVertexBufferIdx(i, j + 3, k, mesh_info)];
+        ds = mesh_info.real_params[AC_dsy];
+        break;
+    case AXIS_Z:
+        f0 = scal[acVertexBufferIdx(i, j, k - 3, mesh_info)];
+        f1 = scal[acVertexBufferIdx(i, j, k - 2, mesh_info)];
+        f2 = scal[acVertexBufferIdx(i, j, k - 1, mesh_info)];
+        f4 = scal[acVertexBufferIdx(i, j, k + 1, mesh_info)];
+        f5 = scal[acVertexBufferIdx(i, j, k + 2, mesh_info)];
+        f6 = scal[acVertexBufferIdx(i, j, k + 3, mesh_info)];
+        ds = mesh_info.real_params[AC_dsz];
+        break;
+    default:
+        ERROR("Unknown axis type");
+    }
+    return (MODEL_REAL(2.) * (f0 + f6) + MODEL_REAL(-27.) * (f1 + f5) +
+            MODEL_REAL(270.) * (f2 + f4) + MODEL_REAL(-490.) * f3) /
+           (MODEL_REAL(180.) * ds * ds);
+}
+
+static MODEL_REAL
+laplace_scal(const int& i, const int& j, const int& k,
+             const AcMeshInfo& mesh_info, const MODEL_REAL* scal)
+{
+    return der2_scal<AXIS_X>(i, j, k, mesh_info, scal) +
+           der2_scal<AXIS_Y>(i, j, k, mesh_info, scal) +
+           der2_scal<AXIS_Z>(i, j, k, mesh_info, scal);
+}
+
+static void
+laplace_vec(const int& i, const int& j, const int& k,
+            const AcMeshInfo& mesh_info, const MODEL_REAL* vec_x,
+            const MODEL_REAL* vec_y, const MODEL_REAL* vec_z, MODEL_REAL* laplace_x,
+            MODEL_REAL* laplace_y, MODEL_REAL* laplace_z)
+{
+    *laplace_x = laplace_scal(i, j, k, mesh_info, vec_x);
+    *laplace_y = laplace_scal(i, j, k, mesh_info, vec_y);
+    *laplace_z = laplace_scal(i, j, k, mesh_info, vec_z);
+}
+
+static MODEL_REAL
+div_vec(const int& i, const int& j, const int& k, const AcMeshInfo& mesh_info,
+        const MODEL_REAL* vec_x, const MODEL_REAL* vec_y, const MODEL_REAL* vec_z)
+{
+    return der_scal<AXIS_X>(i, j, k, mesh_info, vec_x) +
+           der_scal<AXIS_Y>(i, j, k, mesh_info, vec_y) +
+           der_scal<AXIS_Z>(i, j, k, mesh_info, vec_z);
+}
+
+static void
+grad(const int& i, const int& j, const int& k, const AcMeshInfo& mesh_info,
+     const MODEL_REAL* scal, MODEL_REAL* res_x, MODEL_REAL* res_y, MODEL_REAL* res_z)
+{
+    *res_x = der_scal<AXIS_X>(i, j, k, mesh_info, scal);
+    *res_y = der_scal<AXIS_Y>(i, j, k, mesh_info, scal);
+    *res_z = der_scal<AXIS_Z>(i, j, k, mesh_info, scal);
+}
+
+static MODEL_REAL
+vec_dot_nabla_scal(const int& i, const int& j, const int& k,
+                   const AcMeshInfo& mesh_info, const MODEL_REAL* vec_x,
+                   const MODEL_REAL* vec_y, const MODEL_REAL* vec_z, const MODEL_REAL* scal)
+{
+    const int idx = acVertexBufferIdx(i, j, k, mesh_info);
+    MODEL_REAL ddx_scal, ddy_scal, ddz_scal;
+    grad(i, j, k, mesh_info, scal, &ddx_scal, &ddy_scal, &ddz_scal);
+    return vec_x[idx] * ddx_scal + vec_y[idx] * ddy_scal +
+           vec_z[idx] * ddz_scal;
+}
+
+/*
+ * =============================================================================
+ * Viscosity
+ * =============================================================================
+ */
+typedef enum { DERNM_XY, DERNM_YZ, DERNM_XZ } DernmType;
+
+template <DernmType dernm>
+static MODEL_REAL
+dernm_scal(const int& i, const int& j, const int& k,
+           const AcMeshInfo& mesh_info, const MODEL_REAL* scal)
+{
+
+    MODEL_REAL fac;
+
+    const MODEL_REAL dsx = mesh_info.real_params[AC_dsx];
+    const MODEL_REAL dsy = mesh_info.real_params[AC_dsy];
+    const MODEL_REAL dsz = mesh_info.real_params[AC_dsz];
+
+    MODEL_REAL f_p1_p1, f_m1_p1, f_m1_m1, f_p1_m1;
+    MODEL_REAL f_p2_p2, f_m2_p2, f_m2_m2, f_p2_m2;
+    MODEL_REAL f_p3_p3, f_m3_p3, f_m3_m3, f_p3_m3;
+
+    switch (dernm) {
+    case DERNM_XY:
+        fac     = MODEL_REAL(1. / 720.) * (MODEL_REAL(1.) / dsx) * (MODEL_REAL(1.) / dsy);
+        f_p1_p1 = scal[acVertexBufferIdx(i + 1, j + 1, k, mesh_info)];
+        f_m1_p1 = scal[acVertexBufferIdx(i - 1, j + 1, k, mesh_info)];
+        f_m1_m1 = scal[acVertexBufferIdx(i - 1, j - 1, k, mesh_info)];
+        f_p1_m1 = scal[acVertexBufferIdx(i + 1, j - 1, k, mesh_info)];
+
+        f_p2_p2 = scal[acVertexBufferIdx(i + 2, j + 2, k, mesh_info)];
+        f_m2_p2 = scal[acVertexBufferIdx(i - 2, j + 2, k, mesh_info)];
+        f_m2_m2 = scal[acVertexBufferIdx(i - 2, j - 2, k, mesh_info)];
+        f_p2_m2 = scal[acVertexBufferIdx(i + 2, j - 2, k, mesh_info)];
+
+        f_p3_p3 = scal[acVertexBufferIdx(i + 3, j + 3, k, mesh_info)];
+        f_m3_p3 = scal[acVertexBufferIdx(i - 3, j + 3, k, mesh_info)];
+        f_m3_m3 = scal[acVertexBufferIdx(i - 3, j - 3, k, mesh_info)];
+        f_p3_m3 = scal[acVertexBufferIdx(i + 3, j - 3, k, mesh_info)];
+        break;
+    case DERNM_YZ:
+        // NOTE this is a bit different from the old one, second is j+1k-1
+        // instead of j-1,k+1
+        fac     = MODEL_REAL(1. / 720.) * (MODEL_REAL(1.) / dsy) * (MODEL_REAL(1.) / dsz);
+        f_p1_p1 = scal[acVertexBufferIdx(i, j + 1, k + 1, mesh_info)];
+        f_m1_p1 = scal[acVertexBufferIdx(i, j - 1, k + 1, mesh_info)];
+        f_m1_m1 = scal[acVertexBufferIdx(i, j - 1, k - 1, mesh_info)];
+        f_p1_m1 = scal[acVertexBufferIdx(i, j + 1, k - 1, mesh_info)];
+
+        f_p2_p2 = scal[acVertexBufferIdx(i, j + 2, k + 2, mesh_info)];
+        f_m2_p2 = scal[acVertexBufferIdx(i, j - 2, k + 2, mesh_info)];
+        f_m2_m2 = scal[acVertexBufferIdx(i, j - 2, k - 2, mesh_info)];
+        f_p2_m2 = scal[acVertexBufferIdx(i, j + 2, k - 2, mesh_info)];
+
+        f_p3_p3 = scal[acVertexBufferIdx(i, j + 3, k + 3, mesh_info)];
+        f_m3_p3 = scal[acVertexBufferIdx(i, j - 3, k + 3, mesh_info)];
+        f_m3_m3 = scal[acVertexBufferIdx(i, j - 3, k - 3, mesh_info)];
+        f_p3_m3 = scal[acVertexBufferIdx(i, j + 3, k - 3, mesh_info)];
+        break;
+    case DERNM_XZ:
+        fac     = MODEL_REAL(1. / 720.) * (MODEL_REAL(1.) / dsx) * (MODEL_REAL(1.) / dsz);
+        f_p1_p1 = scal[acVertexBufferIdx(i + 1, j, k + 1, mesh_info)];
+        f_m1_p1 = scal[acVertexBufferIdx(i - 1, j, k + 1, mesh_info)];
+        f_m1_m1 = scal[acVertexBufferIdx(i - 1, j, k - 1, mesh_info)];
+        f_p1_m1 = scal[acVertexBufferIdx(i + 1, j, k - 1, mesh_info)];
+
+        f_p2_p2 = scal[acVertexBufferIdx(i + 2, j, k + 2, mesh_info)];
+        f_m2_p2 = scal[acVertexBufferIdx(i - 2, j, k + 2, mesh_info)];
+        f_m2_m2 = scal[acVertexBufferIdx(i - 2, j, k - 2, mesh_info)];
+        f_p2_m2 = scal[acVertexBufferIdx(i + 2, j, k - 2, mesh_info)];
+
+        f_p3_p3 = scal[acVertexBufferIdx(i + 3, j, k + 3, mesh_info)];
+        f_m3_p3 = scal[acVertexBufferIdx(i - 3, j, k + 3, mesh_info)];
+        f_m3_m3 = scal[acVertexBufferIdx(i - 3, j, k - 3, mesh_info)];
+        f_p3_m3 = scal[acVertexBufferIdx(i + 3, j, k - 3, mesh_info)];
+        break;
+    default:
+        ERROR("Invalid dernm type");
+    }
+    return fac * (MODEL_REAL(270.) * (f_p1_p1 - f_m1_p1 + f_m1_m1 - f_p1_m1) -
+                  MODEL_REAL(27.) * (f_p2_p2 - f_m2_p2 + f_m2_m2 - f_p2_m2) +
+                  MODEL_REAL(2.) * (f_p3_p3 - f_m3_p3 + f_m3_m3 - f_p3_m3));
+}
+
+static void
+grad_div_vec(const int& i, const int& j, const int& k,
+             const AcMeshInfo& mesh_info, const MODEL_REAL* vec_x,
+             const MODEL_REAL* vec_y, const MODEL_REAL* vec_z, MODEL_REAL* gdvx,
+             MODEL_REAL* gdvy, MODEL_REAL* gdvz)
+{
+    *gdvx = der2_scal<AXIS_X>(i, j, k, mesh_info, vec_x) +
+            dernm_scal<DERNM_XY>(i, j, k, mesh_info, vec_y) +
+            dernm_scal<DERNM_XZ>(i, j, k, mesh_info, vec_z);
+
+    *gdvy = dernm_scal<DERNM_XY>(i, j, k, mesh_info, vec_x) +
+            der2_scal<AXIS_Y>(i, j, k, mesh_info, vec_y) +
+            dernm_scal<DERNM_YZ>(i, j, k, mesh_info, vec_z);
+
+    *gdvz = dernm_scal<DERNM_XZ>(i, j, k, mesh_info, vec_x) +
+            dernm_scal<DERNM_YZ>(i, j, k, mesh_info, vec_y) +
+            der2_scal<AXIS_Z>(i, j, k, mesh_info, vec_z);
+}
+
+static void
+S_grad_lnrho(const int& i, const int& j, const int& k,
+             const AcMeshInfo& mesh_info, const MODEL_REAL* vec_x,
+             const MODEL_REAL* vec_y, const MODEL_REAL* vec_z, const MODEL_REAL* lnrho,
+             MODEL_REAL* sgrhox, MODEL_REAL* sgrhoy, MODEL_REAL* sgrhoz)
+{
+    const MODEL_REAL c23 = MODEL_REAL(2. / 3.);
+    const MODEL_REAL c13 = MODEL_REAL(1. / 3.);
+
+    const MODEL_REAL Sxx = c23 * der_scal<AXIS_X>(i, j, k, mesh_info, vec_x) -
+                       c13 * (der_scal<AXIS_Y>(i, j, k, mesh_info, vec_y) +
+                              der_scal<AXIS_Z>(i, j, k, mesh_info, vec_z));
+    const MODEL_REAL Sxy = MODEL_REAL(.5) *
+                       (der_scal<AXIS_Y>(i, j, k, mesh_info, vec_x) +
+                        der_scal<AXIS_X>(i, j, k, mesh_info, vec_y));
+    const MODEL_REAL Sxz = MODEL_REAL(.5) *
+                       (der_scal<AXIS_Z>(i, j, k, mesh_info, vec_x) +
+                        der_scal<AXIS_X>(i, j, k, mesh_info, vec_z));
+
+    const MODEL_REAL Syx = Sxy;
+    const MODEL_REAL Syy = c23 * der_scal<AXIS_Y>(i, j, k, mesh_info, vec_y) -
+                       c13 * (der_scal<AXIS_X>(i, j, k, mesh_info, vec_x) +
+                              der_scal<AXIS_Z>(i, j, k, mesh_info, vec_z));
+    const MODEL_REAL Syz = MODEL_REAL(.5) *
+                       (der_scal<AXIS_Z>(i, j, k, mesh_info, vec_y) +
+                        der_scal<AXIS_Y>(i, j, k, mesh_info, vec_z));
+
+    const MODEL_REAL Szx = Sxz;
+    const MODEL_REAL Szy = Syz;
+    const MODEL_REAL Szz = c23 *
+                           der_scal<AXIS_Z>(
+                               i, j, k, mesh_info,
+                               vec_z) // replaced from "c23*der_scal<AXIS_Z>(i,
+                                      // j, k, mesh_info, vec_x)"! TODO recheck
+                                      // that ddz_uu_z is the correct one
+                       - c13 * (der_scal<AXIS_X>(i, j, k, mesh_info, vec_x) +
+                                der_scal<AXIS_Y>(i, j, k, mesh_info, vec_y));
+
+    // Grad lnrho
+
+    MODEL_REAL glnx, glny, glnz;
+
+    grad(i, j, k, mesh_info, lnrho, &glnx, &glny, &glnz);
+
+    *sgrhox = Sxx * glnx + Sxy * glny + Sxz * glnz;
+    *sgrhoy = Syx * glnx + Syy * glny + Syz * glnz;
+    *sgrhoz = Szx * glnx + Szy * glny + Szz * glnz;
+}
+
+static void
+nu_const(const int& i, const int& j, const int& k, const AcMeshInfo& mesh_info,
+         const MODEL_REAL* vec_x, const MODEL_REAL* vec_y, const MODEL_REAL* vec_z,
+         const MODEL_REAL* scal, MODEL_REAL* visc_x, MODEL_REAL* visc_y, MODEL_REAL* visc_z)
+{
+    MODEL_REAL lx, ly, lz;
+    laplace_vec(i, j, k, mesh_info, vec_x, vec_y, vec_z, &lx, &ly, &lz);
+    // lx = ly = lz = .0f;
+
+    MODEL_REAL gx, gy, gz;
+    grad_div_vec(i, j, k, mesh_info, vec_x, vec_y, vec_z, &gx, &gy, &gz);
+    // gx = gy =gz = .0f;
+
+    MODEL_REAL sgrhox, sgrhoy, sgrhoz;
+    S_grad_lnrho(i, j, k, mesh_info, vec_x, vec_y, vec_z, scal, &sgrhox,
+                 &sgrhoy, &sgrhoz);
+    // sgrhox = sgrhoy = sgrhoz = .0f;
+
+    *visc_x = mesh_info.real_params[AC_nu_visc] *
+              (lx + MODEL_REAL(1. / 3.) * gx + MODEL_REAL(2.) * sgrhox)
+              + mesh_info.real_params[AC_zeta] * gx;
+    *visc_y = mesh_info.real_params[AC_nu_visc] *
+              (ly + MODEL_REAL(1. / 3.) * gy + MODEL_REAL(2.) * sgrhoy)
+              + mesh_info.real_params[AC_zeta] * gy;
+    *visc_z = mesh_info.real_params[AC_nu_visc] *
+              (lz + MODEL_REAL(1. / 3.) * gz + MODEL_REAL(2.) * sgrhoz)
+              + mesh_info.real_params[AC_zeta] * gz;
+}
--- a/samples/standalone/model/model_reduce.cc
+++ b/samples/standalone/model/model_reduce.cc
@@ -0,0 +1,203 @@
+/*
+    Copyright (C) 2014-2020, Johannes Pekkila, Miikka Vaisala.
+
+    This file is part of Astaroth.
+
+    Astaroth is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    Astaroth is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Astaroth.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file
+ * \brief Brief info.
+ *
+ * Detailed info.
+ *
+ */
+#include "model_reduce.h"
+
+#include <math.h>
+
+#include "errchk.h"
+
+// Function pointer definitions
+typedef ModelScalar (*ReduceFunc)(const ModelScalar&, const ModelScalar&);
+typedef ModelScalar (*ReduceInitialScalFunc)(const ModelScalar&);
+typedef ModelScalar (*ReduceInitialVecFunc)(const ModelScalar&, const ModelScalar&,
+                                            const ModelScalar&);
+
+// clang-format off
+/* Comparison funcs */
+static inline ModelScalar
+max(const ModelScalar& a, const ModelScalar& b) { return a > b ? a : b; }
+
+static inline ModelScalar
+min(const ModelScalar& a, const ModelScalar& b) { return a < b ? a : b; }
+
+static inline ModelScalar
+sum(const ModelScalar& a, const ModelScalar& b) { return a + b; }
+
+/* Function used to determine the values used during reduction */
+static inline ModelScalar
+length(const ModelScalar& a) { return (ModelScalar)(a); }
+
+static inline ModelScalar
+length(const ModelScalar& a, const ModelScalar& b, const ModelScalar& c) { return sqrtl(a*a + b*b + c*c); }
+
+static inline ModelScalar
+squared(const ModelScalar& a) { return (ModelScalar)(a*a); }
+
+static inline ModelScalar
+squared(const ModelScalar& a, const ModelScalar& b, const ModelScalar& c) { return squared(a) + squared(b) + squared(c); }
+
+static inline ModelScalar
+exp_squared(const ModelScalar& a) { return expl(a)*expl(a); }
+
+static inline ModelScalar
+exp_squared(const ModelScalar& a, const ModelScalar& b, const ModelScalar& c) { return exp_squared(a) + exp_squared(b) + exp_squared(c); }
+// clang-format on
+
+ModelScalar
+model_reduce_scal(const ModelMesh& mesh, const ReductionType& rtype, const VertexBufferHandle& a)
+{
+    ReduceInitialScalFunc reduce_initial;
+    ReduceFunc reduce;
+
+    bool solve_mean = false;
+
+    switch (rtype) {
+    case RTYPE_MAX:
+        reduce_initial = length;
+        reduce         = max;
+        break;
+    case RTYPE_MIN:
+        reduce_initial = length;
+        reduce         = min;
+        break;
+    case RTYPE_RMS:
+        reduce_initial = squared;
+        reduce         = sum;
+        solve_mean     = true;
+        break;
+    case RTYPE_RMS_EXP:
+        reduce_initial = exp_squared;
+        reduce         = sum;
+        solve_mean     = true;
+        break;
+    case RTYPE_SUM:
+        reduce_initial = length;
+        reduce         = sum;
+        break;
+    default:
+        ERROR("Unrecognized RTYPE");
+    }
+
+    const int initial_idx = acVertexBufferIdx(mesh.info.int_params[AC_nx_min],
+                                              mesh.info.int_params[AC_ny_min],
+                                              mesh.info.int_params[AC_nz_min], mesh.info);
+
+    ModelScalar res;
+    if (rtype == RTYPE_MAX || rtype == RTYPE_MIN)
+        res = reduce_initial(mesh.vertex_buffer[a][initial_idx]);
+    else
+        res = 0;
+
+    for (int k = mesh.info.int_params[AC_nz_min]; k < mesh.info.int_params[AC_nz_max]; ++k) {
+        for (int j = mesh.info.int_params[AC_ny_min]; j < mesh.info.int_params[AC_ny_max]; ++j) {
+            for (int i = mesh.info.int_params[AC_nx_min]; i < mesh.info.int_params[AC_nx_max];
+                 ++i) {
+                const int idx              = acVertexBufferIdx(i, j, k, mesh.info);
+                const ModelScalar curr_val = reduce_initial(mesh.vertex_buffer[a][idx]);
+                res                        = reduce(res, curr_val);
+            }
+        }
+    }
+
+    if (solve_mean) {
+        const ModelScalar inv_n = 1.0l / mesh.info.int_params[AC_nxyz];
+        return sqrtl(inv_n * res);
+    }
+    else {
+        return res;
+    }
+}
+
+ModelScalar
+model_reduce_vec(const ModelMesh& mesh, const ReductionType& rtype, const VertexBufferHandle& a,
+                 const VertexBufferHandle& b, const VertexBufferHandle& c)
+{
+    // ModelScalar (*reduce_initial)(ModelScalar, ModelScalar, ModelScalar);
+    ReduceInitialVecFunc reduce_initial;
+    ReduceFunc reduce;
+
+    bool solve_mean = false;
+
+    switch (rtype) {
+    case RTYPE_MAX:
+        reduce_initial = length;
+        reduce         = max;
+        break;
+    case RTYPE_MIN:
+        reduce_initial = length;
+        reduce         = min;
+        break;
+    case RTYPE_RMS:
+        reduce_initial = squared;
+        reduce         = sum;
+        solve_mean     = true;
+        break;
+    case RTYPE_RMS_EXP:
+        reduce_initial = exp_squared;
+        reduce         = sum;
+        solve_mean     = true;
+        break;
+    case RTYPE_SUM:
+        reduce_initial = length;
+        reduce         = sum;
+        break;
+    default:
+        ERROR("Unrecognized RTYPE");
+    }
+
+    const int initial_idx = acVertexBufferIdx(mesh.info.int_params[AC_nx_min],
+                                              mesh.info.int_params[AC_ny_min],
+                                              mesh.info.int_params[AC_nz_min], mesh.info);
+
+    ModelScalar res;
+    if (rtype == RTYPE_MAX || rtype == RTYPE_MIN)
+        res = reduce_initial(mesh.vertex_buffer[a][initial_idx], mesh.vertex_buffer[b][initial_idx],
+                             mesh.vertex_buffer[c][initial_idx]);
+    else
+        res = 0;
+
+    for (int k = mesh.info.int_params[AC_nz_min]; k < mesh.info.int_params[AC_nz_max]; k++) {
+        for (int j = mesh.info.int_params[AC_ny_min]; j < mesh.info.int_params[AC_ny_max]; j++) {
+            for (int i = mesh.info.int_params[AC_nx_min]; i < mesh.info.int_params[AC_nx_max];
+                 i++) {
+                const int idx              = acVertexBufferIdx(i, j, k, mesh.info);
+                const ModelScalar curr_val = reduce_initial(mesh.vertex_buffer[a][idx],
+                                                            mesh.vertex_buffer[b][idx],
+                                                            mesh.vertex_buffer[c][idx]);
+                res                        = reduce(res, curr_val);
+            }
+        }
+    }
+
+    if (solve_mean) {
+        const ModelScalar inv_n = 1.0l / mesh.info.int_params[AC_nxyz];
+        return sqrtl(inv_n * res);
+    }
+    else {
+        return res;
+    }
+}
--- a/samples/standalone/model/model_reduce.h
+++ b/samples/standalone/model/model_reduce.h
@@ -0,0 +1,37 @@
+/*
+    Copyright (C) 2014-2020, Johannes Pekkila, Miikka Vaisala.
+
+    This file is part of Astaroth.
+
+    Astaroth is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    Astaroth is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Astaroth.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file
+ * \brief Brief info.
+ *
+ * Detailed info.
+ *
+ */
+#pragma once
+#include "astaroth.h"
+#include "modelmesh.h"
+
+ModelScalar model_reduce_scal(const ModelMesh& mesh, const ReductionType& rtype,
+                              const VertexBufferHandle& a);
+
+ModelScalar model_reduce_vec(const ModelMesh& mesh, const ReductionType& rtype,
+                             const VertexBufferHandle& a,
+                             const VertexBufferHandle& b,
+                             const VertexBufferHandle& c);
--- a/samples/standalone/model/model_rk3.cc
+++ b/samples/standalone/model/model_rk3.cc
@@ -0,0 +1,961 @@
+/*
+    Copyright (C) 2014-2020, Johannes Pekkila, Miikka Vaisala.
+
+    This file is part of Astaroth.
+
+    Astaroth is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    Astaroth is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Astaroth.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file
+ * \brief Brief info.
+ *
+ * Detailed info.
+ *
+ */
+#include "model_rk3.h"
+
+#include <math.h>
+
+#include "host_memory.h"
+#include "model_boundconds.h"
+
+// Standalone flags
+#define LDENSITY (1)
+#define LHYDRO (1)
+#define LMAGNETIC (1)
+#define LENTROPY (1)
+#define LTEMPERATURE (0)
+#define LFORCING (1)
+#define LUPWD (1)
+#define AC_THERMAL_CONDUCTIVITY (AcReal(0.001)) // TODO: make an actual config parameter
+
+typedef struct {
+    ModelScalar x, y, z;
+} ModelVector;
+
+typedef struct {
+    ModelVector row[3];
+} ModelMatrix;
+
+typedef struct {
+    ModelScalar value;
+    ModelVector gradient;
+    ModelMatrix hessian;
+#if LUPWD
+    ModelVector upwind;
+#endif
+} ModelScalarData;
+
+typedef struct {
+    ModelScalarData x;
+    ModelScalarData y;
+    ModelScalarData z;
+} ModelVectorData;
+
+static AcMeshInfo* mesh_info = NULL;
+
+static inline int
+get(const AcIntParam param)
+{
+    return mesh_info->int_params[param];
+}
+
+static inline ModelScalar
+get(const AcRealParam param)
+{
+    return mesh_info->real_params[param];
+}
+
+static inline int
+IDX(const int i, const int j, const int k)
+{
+    return acVertexBufferIdx(i, j, k, (*mesh_info));
+}
+
+/*
+ * =============================================================================
+ * Stencil Assembly Stage
+ * =============================================================================
+ */
+static inline ModelScalar
+first_derivative(const ModelScalar* pencil, const ModelScalar inv_ds)
+{
+#if STENCIL_ORDER == 2
+    const ModelScalar coefficients[] = {0, 1. / 2.};
+#elif STENCIL_ORDER == 4
+    const ModelScalar coefficients[] = {0, 2.0 / 3.0, -1.0 / 12.0};
+#elif STENCIL_ORDER == 6
+    const ModelScalar coefficients[] = {0, 3.0 / 4.0, -3.0 / 20.0, 1.0 / 60.0};
+#elif STENCIL_ORDER == 8
+    const ModelScalar coefficients[] = {0, 4.0 / 5.0, -1.0 / 5.0, 4.0 / 105.0, -1.0 / 280.0};
+#endif
+
+#define MID (STENCIL_ORDER / 2)
+    ModelScalar res = 0;
+
+    //#pragma unroll
+    for (int i = 1; i <= MID; ++i)
+        res += coefficients[i] * (pencil[MID + i] - pencil[MID - i]);
+
+    return res * inv_ds;
+}
+
+static inline ModelScalar
+second_derivative(const ModelScalar* pencil, const ModelScalar inv_ds)
+{
+#if STENCIL_ORDER == 2
+    const ModelScalar coefficients[] = {-2., 1.};
+#elif STENCIL_ORDER == 4
+    const ModelScalar coefficients[] = {-5.0 / 2.0, 4.0 / 3.0, -1.0 / 12.0};
+#elif STENCIL_ORDER == 6
+    const ModelScalar coefficients[] = {-49.0 / 18.0, 3.0 / 2.0, -3.0 / 20.0, 1.0 / 90.0};
+#elif STENCIL_ORDER == 8
+    const ModelScalar coefficients[] = {-205.0 / 72.0, 8.0 / 5.0, -1.0 / 5.0, 8.0 / 315.0,
+                                        -1.0 / 560.0};
+#endif
+
+#define MID (STENCIL_ORDER / 2)
+    ModelScalar res = coefficients[0] * pencil[MID];
+
+    //#pragma unroll
+    for (int i = 1; i <= MID; ++i)
+        res += coefficients[i] * (pencil[MID + i] + pencil[MID - i]);
+
+    return res * inv_ds * inv_ds;
+}
+
+/** inv_ds: inverted mesh spacing f.ex. 1. / mesh.int_params[AC_dsx] */
+static inline ModelScalar
+cross_derivative(const ModelScalar* pencil_a, const ModelScalar* pencil_b,
+                 const ModelScalar inv_ds_a, const ModelScalar inv_ds_b)
+{
+#if STENCIL_ORDER == 2
+    const ModelScalar coefficients[] = {0, 1.0 / 4.0};
+#elif STENCIL_ORDER == 4
+    const ModelScalar coefficients[] = {
+        0, 1.0 / 32.0, 1.0 / 64.0}; // TODO correct coefficients, these are just placeholders
+#elif STENCIL_ORDER == 6
+    const ModelScalar fac            = (1. / 720.);
+    const ModelScalar coefficients[] = {0.0 * fac, 270.0 * fac, -27.0 * fac, 2.0 * fac};
+#elif STENCIL_ORDER == 8
+    const ModelScalar fac            = (1. / 20160.);
+    const ModelScalar coefficients[] = {0.0 * fac, 8064. * fac, -1008. * fac, 128. * fac,
+                                        -9. * fac};
+#endif
+
+#define MID (STENCIL_ORDER / 2)
+    ModelScalar res = ModelScalar(0.);
+
+    //#pragma unroll
+    for (int i = 1; i <= MID; ++i) {
+        res += coefficients[i] *
+               (pencil_a[MID + i] + pencil_a[MID - i] - pencil_b[MID + i] - pencil_b[MID - i]);
+    }
+    return res * inv_ds_a * inv_ds_b;
+}
+
+static inline ModelScalar
+derx(const int i, const int j, const int k, const ModelScalar* arr)
+{
+    ModelScalar pencil[STENCIL_ORDER + 1];
+    //#pragma unroll
+    for (int offset = 0; offset < STENCIL_ORDER + 1; ++offset)
+        pencil[offset] = arr[IDX(i + offset - STENCIL_ORDER / 2, j, k)];
+
+    return first_derivative(pencil, get(AC_inv_dsx));
+}
+
+static inline ModelScalar
+derxx(const int i, const int j, const int k, const ModelScalar* arr)
+{
+    ModelScalar pencil[STENCIL_ORDER + 1];
+    //#pragma unroll
+    for (int offset = 0; offset < STENCIL_ORDER + 1; ++offset)
+        pencil[offset] = arr[IDX(i + offset - STENCIL_ORDER / 2, j, k)];
+
+    return second_derivative(pencil, get(AC_inv_dsx));
+}
+
+static inline ModelScalar
+derxy(const int i, const int j, const int k, const ModelScalar* arr)
+{
+    ModelScalar pencil_a[STENCIL_ORDER + 1];
+    //#pragma unroll
+    for (int offset = 0; offset < STENCIL_ORDER + 1; ++offset)
+        pencil_a[offset] = arr[IDX(i + offset - STENCIL_ORDER / 2, j + offset - STENCIL_ORDER / 2,
+                                   k)];
+
+    ModelScalar pencil_b[STENCIL_ORDER + 1];
+    //#pragma unroll
+    for (int offset = 0; offset < STENCIL_ORDER + 1; ++offset)
+        pencil_b[offset] = arr[IDX(i + offset - STENCIL_ORDER / 2, j + STENCIL_ORDER / 2 - offset,
+                                   k)];
+
+    return cross_derivative(pencil_a, pencil_b, get(AC_inv_dsx), get(AC_inv_dsy));
+}
+
+static inline ModelScalar
+derxz(const int i, const int j, const int k, const ModelScalar* arr)
+{
+    ModelScalar pencil_a[STENCIL_ORDER + 1];
+    //#pragma unroll
+    for (int offset = 0; offset < STENCIL_ORDER + 1; ++offset)
+        pencil_a[offset] = arr[IDX(i + offset - STENCIL_ORDER / 2, j,
+                                   k + offset - STENCIL_ORDER / 2)];
+
+    ModelScalar pencil_b[STENCIL_ORDER + 1];
+    //#pragma unroll
+    for (int offset = 0; offset < STENCIL_ORDER + 1; ++offset)
+        pencil_b[offset] = arr[IDX(i + offset - STENCIL_ORDER / 2, j,
+                                   k + STENCIL_ORDER / 2 - offset)];
+
+    return cross_derivative(pencil_a, pencil_b, get(AC_inv_dsx), get(AC_inv_dsz));
+}
+
+static inline ModelScalar
+dery(const int i, const int j, const int k, const ModelScalar* arr)
+{
+    ModelScalar pencil[STENCIL_ORDER + 1];
+    //#pragma unroll
+    for (int offset = 0; offset < STENCIL_ORDER + 1; ++offset)
+        pencil[offset] = arr[IDX(i, j + offset - STENCIL_ORDER / 2, k)];
+
+    return first_derivative(pencil, get(AC_inv_dsy));
+}
+
+static inline ModelScalar
+deryy(const int i, const int j, const int k, const ModelScalar* arr)
+{
+    ModelScalar pencil[STENCIL_ORDER + 1];
+    //#pragma unroll
+    for (int offset = 0; offset < STENCIL_ORDER + 1; ++offset)
+        pencil[offset] = arr[IDX(i, j + offset - STENCIL_ORDER / 2, k)];
+
+    return second_derivative(pencil, get(AC_inv_dsy));
+}
+
+static inline ModelScalar
+deryz(const int i, const int j, const int k, const ModelScalar* arr)
+{
+    ModelScalar pencil_a[STENCIL_ORDER + 1];
+    //#pragma unroll
+    for (int offset = 0; offset < STENCIL_ORDER + 1; ++offset)
+        pencil_a[offset] = arr[IDX(i, j + offset - STENCIL_ORDER / 2,
+                                   k + offset - STENCIL_ORDER / 2)];
+
+    ModelScalar pencil_b[STENCIL_ORDER + 1];
+    //#pragma unroll
+    for (int offset = 0; offset < STENCIL_ORDER + 1; ++offset)
+        pencil_b[offset] = arr[IDX(i, j + offset - STENCIL_ORDER / 2,
+                                   k + STENCIL_ORDER / 2 - offset)];
+
+    return cross_derivative(pencil_a, pencil_b, get(AC_inv_dsy), get(AC_inv_dsz));
+}
+
+static inline ModelScalar
+derz(const int i, const int j, const int k, const ModelScalar* arr)
+{
+    ModelScalar pencil[STENCIL_ORDER + 1];
+    //#pragma unroll
+    for (int offset = 0; offset < STENCIL_ORDER + 1; ++offset)
+        pencil[offset] = arr[IDX(i, j, k + offset - STENCIL_ORDER / 2)];
+
+    return first_derivative(pencil, get(AC_inv_dsz));
+}
+
+static inline ModelScalar
+derzz(const int i, const int j, const int k, const ModelScalar* arr)
+{
+    ModelScalar pencil[STENCIL_ORDER + 1];
+    //#pragma unroll
+    for (int offset = 0; offset < STENCIL_ORDER + 1; ++offset)
+        pencil[offset] = arr[IDX(i, j, k + offset - STENCIL_ORDER / 2)];
+
+    return second_derivative(pencil, get(AC_inv_dsz));
+}
+
+#if LUPWD
+static inline ModelScalar
+der6x_upwd(const int i, const int j, const int k, const ModelScalar* arr)
+{
+    ModelScalar inv_ds = get(AC_inv_dsx);
+
+    return ModelScalar(1.0 / 60.0) * inv_ds *
+           (-ModelScalar(20.0) * arr[IDX(i, j, k)] +
+            ModelScalar(15.0) * (arr[IDX(i + 1, j, k)] + arr[IDX(i - 1, j, k)]) -
+            ModelScalar(6.0) * (arr[IDX(i + 2, j, k)] + arr[IDX(i - 2, j, k)]) +
+            arr[IDX(i + 3, j, k)] + arr[IDX(i - 3, j, k)]);
+}
+
+static inline ModelScalar
+der6y_upwd(const int i, const int j, const int k, const ModelScalar* arr)
+{
+    ModelScalar inv_ds = get(AC_inv_dsy);
+
+    return ModelScalar(1.0 / 60.0) * inv_ds *
+           (-ModelScalar(20.0) * arr[IDX(i, j, k)] +
+            ModelScalar(15.0) * (arr[IDX(i, j + 1, k)] + arr[IDX(i, j - 1, k)]) -
+            ModelScalar(6.0) * (arr[IDX(i, j + 2, k)] + arr[IDX(i, j - 2, k)]) +
+            arr[IDX(i, j + 3, k)] + arr[IDX(i, j - 3, k)]);
+}
+
+static inline ModelScalar
+der6z_upwd(const int i, const int j, const int k, const ModelScalar* arr)
+{
+    ModelScalar inv_ds = get(AC_inv_dsz);
+
+    return ModelScalar(1.0 / 60.0) * inv_ds *
+           (-ModelScalar(20.0) * arr[IDX(i, j, k)] +
+            ModelScalar(15.0) * (arr[IDX(i, j, k + 1)] + arr[IDX(i, j, k - 1)]) -
+            ModelScalar(6.0) * (arr[IDX(i, j, k + 2)] + arr[IDX(i, j, k - 2)]) +
+            arr[IDX(i, j, k + 3)] + arr[IDX(i, j, k - 3)]);
+}
+#endif
+
+static inline ModelScalar
+compute_value(const int i, const int j, const int k, const ModelScalar* arr)
+{
+    return arr[IDX(i, j, k)];
+}
+
+static inline ModelVector
+compute_gradient(const int i, const int j, const int k, const ModelScalar* arr)
+{
+    return (ModelVector){derx(i, j, k, arr), dery(i, j, k, arr), derz(i, j, k, arr)};
+}
+
+#if LUPWD
+static inline ModelVector
+compute_upwind(const int i, const int j, const int k, const ModelScalar* arr)
+{
+    return (ModelVector){der6x_upwd(i, j, k, arr), der6y_upwd(i, j, k, arr),
+                         der6z_upwd(i, j, k, arr)};
+}
+#endif
+
+static inline ModelMatrix
+compute_hessian(const int i, const int j, const int k, const ModelScalar* arr)
+{
+    ModelMatrix hessian;
+
+    hessian.row[0] = (ModelVector){derxx(i, j, k, arr), derxy(i, j, k, arr), derxz(i, j, k, arr)};
+    hessian.row[1] = (ModelVector){hessian.row[0].y, deryy(i, j, k, arr), deryz(i, j, k, arr)};
+    hessian.row[2] = (ModelVector){hessian.row[0].z, hessian.row[1].z, derzz(i, j, k, arr)};
+
+    return hessian;
+}
+
+static inline ModelScalarData
+read_data(const int i, const int j, const int k, ModelScalar* buf[], const int handle)
+{
+    ModelScalarData data;
+
+    data.value    = compute_value(i, j, k, buf[handle]);
+    data.gradient = compute_gradient(i, j, k, buf[handle]);
+
+    // No significant effect on performance even though we do not need the
+    // diagonals with all arrays
+    data.hessian = compute_hessian(i, j, k, buf[handle]);
+
+#if LUPWD
+    data.upwind = compute_upwind(i, j, k, buf[handle]);
+#endif
+
+    return data;
+}
+
+static inline ModelVectorData
+read_data(const int i, const int j, const int k, ModelScalar* buf[], const int3& handle)
+{
+    ModelVectorData data;
+
+    data.x = read_data(i, j, k, buf, handle.x);
+    data.y = read_data(i, j, k, buf, handle.y);
+    data.z = read_data(i, j, k, buf, handle.z);
+
+    return data;
+}
+
+static inline ModelScalar
+value(const ModelScalarData& data)
+{
+    return data.value;
+}
+
+static inline ModelVector
+gradient(const ModelScalarData& data)
+{
+    return data.gradient;
+}
+
+static inline ModelMatrix
+hessian(const ModelScalarData& data)
+{
+    return data.hessian;
+}
+
+static inline ModelVector
+value(const ModelVectorData& data)
+{
+    return (ModelVector){value(data.x), value(data.y), value(data.z)};
+}
+
+static inline ModelMatrix
+gradients(const ModelVectorData& data)
+{
+    return (ModelMatrix){gradient(data.x), gradient(data.y), gradient(data.z)};
+}
+
+/*
+ * =============================================================================
+ * Level 0.3 (Built-in functions available during the Stencil Processing Stage)
+ * =============================================================================
+ */
+
+static inline ModelVector
+operator-(const ModelVector& a, const ModelVector& b)
+{
+    return (ModelVector){a.x - b.x, a.y - b.y, a.z - b.z};
+}
+
+static inline ModelVector
+operator+(const ModelVector& a, const ModelVector& b)
+{
+    return (ModelVector){a.x + b.x, a.y + b.y, a.z + b.z};
+}
+
+static inline ModelVector
+operator-(const ModelVector& a)
+{
+    return (ModelVector){-a.x, -a.y, -a.z};
+}
+
+static inline ModelVector operator*(const ModelScalar a, const ModelVector& b)
+{
+    return (ModelVector){a * b.x, a * b.y, a * b.z};
+}
+
+static inline ModelScalar
+dot(const ModelVector& a, const ModelVector& b)
+{
+    return a.x * b.x + a.y * b.y + a.z * b.z;
+}
+
+static inline ModelVector
+mul(const ModelMatrix& aa, const ModelVector& x)
+{
+    return (ModelVector){dot(aa.row[0], x), dot(aa.row[1], x), dot(aa.row[2], x)};
+}
+
+static inline ModelVector
+cross(const ModelVector& a, const ModelVector& b)
+{
+    ModelVector c;
+
+    c.x = a.y * b.z - a.z * b.y;
+    c.y = a.z * b.x - a.x * b.z;
+    c.z = a.x * b.y - a.y * b.x;
+
+    return c;
+}
+/*
+static inline bool
+is_valid(const ModelScalar a)
+{
+    return !isnan(a) && !isinf(a);
+}
+
+static inline bool
+is_valid(const ModelVector& a)
+{
+    return is_valid(a.x) && is_valid(a.y) && is_valid(a.z);
+}
+*/
+/*
+ * =============================================================================
+ * Stencil Processing Stage (helper functions)
+ * =============================================================================
+ */
+static inline ModelScalar
+laplace(const ModelScalarData& data)
+{
+    return hessian(data).row[0].x + hessian(data).row[1].y + hessian(data).row[2].z;
+}
+
+static inline ModelScalar
+divergence(const ModelVectorData& vec)
+{
+    return gradient(vec.x).x + gradient(vec.y).y + gradient(vec.z).z;
+}
+
+static inline ModelVector
+laplace_vec(const ModelVectorData& vec)
+{
+    return (ModelVector){laplace(vec.x), laplace(vec.y), laplace(vec.z)};
+}
+
+static inline ModelVector
+curl(const ModelVectorData& vec)
+{
+    return (ModelVector){gradient(vec.z).y - gradient(vec.y).z,
+                         gradient(vec.x).z - gradient(vec.z).x,
+                         gradient(vec.y).x - gradient(vec.x).y};
+}
+
+static inline ModelVector
+gradient_of_divergence(const ModelVectorData& vec)
+{
+    return (ModelVector){
+        hessian(vec.x).row[0].x + hessian(vec.y).row[0].y + hessian(vec.z).row[0].z,
+        hessian(vec.x).row[1].x + hessian(vec.y).row[1].y + hessian(vec.z).row[1].z,
+        hessian(vec.x).row[2].x + hessian(vec.y).row[2].y + hessian(vec.z).row[2].z};
+}
+
+// Takes uu gradients and returns S
+static inline ModelMatrix
+stress_tensor(const ModelVectorData& vec)
+{
+    ModelMatrix S;
+
+    S.row[0].x = ModelScalar(2. / 3.) * gradient(vec.x).x -
+                 ModelScalar(1. / 3.) * (gradient(vec.y).y + gradient(vec.z).z);
+    S.row[0].y = ModelScalar(1. / 2.) * (gradient(vec.x).y + gradient(vec.y).x);
+    S.row[0].z = ModelScalar(1. / 2.) * (gradient(vec.x).z + gradient(vec.z).x);
+
+    S.row[1].y = ModelScalar(2. / 3.) * gradient(vec.y).y -
+                 ModelScalar(1. / 3.) * (gradient(vec.x).x + gradient(vec.z).z);
+
+    S.row[1].z = ModelScalar(1. / 2.) * (gradient(vec.y).z + gradient(vec.z).y);
+
+    S.row[2].z = ModelScalar(2. / 3.) * gradient(vec.z).z -
+                 ModelScalar(1. / 3.) * (gradient(vec.x).x + gradient(vec.y).y);
+
+    S.row[1].x = S.row[0].y;
+    S.row[2].x = S.row[0].z;
+    S.row[2].y = S.row[1].z;
+
+    return S;
+}
+
+static inline ModelScalar
+contract(const ModelMatrix& mat)
+{
+    ModelScalar res = 0;
+
+    //#pragma unroll
+    for (int i = 0; i < 3; ++i)
+        res += dot(mat.row[i], mat.row[i]);
+
+    return res;
+}
+
+/*
+ * =============================================================================
+ * Stencil Processing Stage (equations)
+ * =============================================================================
+ */
+
+#if LUPWD
+ModelScalar
+upwd_der6(const ModelVectorData& uu, const ModelScalarData& lnrho)
+{
+    ModelScalar uux = fabsl(value(uu).x);
+    ModelScalar uuy = fabsl(value(uu).y);
+    ModelScalar uuz = fabsl(value(uu).z);
+    return uux * lnrho.upwind.x + uuy * lnrho.upwind.y + uuz * lnrho.upwind.z;
+}
+#endif
+
+static inline ModelScalar
+continuity(const ModelVectorData& uu, const ModelScalarData& lnrho)
+{
+    return -dot(value(uu), gradient(lnrho))
+#if LUPWD
+           // This is a corrective hyperdiffusion term for upwinding.
+           + upwd_der6(uu, lnrho)
+#endif
+           - divergence(uu);
+}
+
+static inline ModelScalar
+length(const ModelVector& vec)
+{
+    return sqrtl(vec.x * vec.x + vec.y * vec.y + vec.z * vec.z);
+}
+
+static inline ModelScalar
+reciprocal_len(const ModelVector& vec)
+{
+    return 1.l / sqrtl(vec.x * vec.x + vec.y * vec.y + vec.z * vec.z);
+}
+
+static inline ModelVector
+normalized(const ModelVector& vec)
+{
+    const ModelScalar inv_len = reciprocal_len(vec);
+    return inv_len * vec;
+}
+
+#define H_CONST (ModelScalar(0.0))
+#define C_CONST (ModelScalar(0.0))
+
+static inline ModelVector
+momentum(const ModelVectorData& uu, const ModelScalarData& lnrho
+#if LENTROPY
+         ,
+         const ModelScalarData& ss, const ModelVectorData& aa
+#endif
+)
+{
+#if LENTROPY
+    const ModelMatrix S   = stress_tensor(uu);
+    const ModelScalar cs2 = get(AC_cs2_sound) *
+                            expl(get(AC_gamma) * value(ss) / get(AC_cp_sound) +
+                                 (get(AC_gamma) - 1) * (value(lnrho) - get(AC_lnrho0)));
+    const ModelVector j = (ModelScalar(1.) / get(AC_mu0)) *
+                          (gradient_of_divergence(aa) - laplace_vec(aa)); // Current density
+    const ModelVector B       = curl(aa);
+    const ModelScalar inv_rho = ModelScalar(1.) / expl(value(lnrho));
+
+    const ModelVector mom = -mul(gradients(uu), value(uu)) -
+                            cs2 * ((ModelScalar(1.) / get(AC_cp_sound)) * gradient(ss) +
+                                   gradient(lnrho)) +
+                            inv_rho * cross(j, B) +
+                            get(AC_nu_visc) * (laplace_vec(uu) +
+                                               ModelScalar(1. / 3.) * gradient_of_divergence(uu) +
+                                               ModelScalar(2.) * mul(S, gradient(lnrho))) +
+                            get(AC_zeta) * gradient_of_divergence(uu);
+    return mom;
+#else
+    // !!!!!!!!!!!!!!!!%JP: NOTE TODO IMPORTANT!!!!!!!!!!!!!!!!!!!!!!!!
+    // NOT CHECKED FOR CORRECTNESS: USE AT YOUR OWN RISK
+    const ModelMatrix S = stress_tensor(uu);
+
+    const ModelVector mom = -mul(gradients(uu), value(uu)) - get(AC_cs2_sound) * gradient(lnrho) +
+                            get(AC_nu_visc) * (laplace_vec(uu) +
+                                               ModelScalar(1. / 3.) * gradient_of_divergence(uu) +
+                                               ModelScalar(2.) * mul(S, gradient(lnrho))) +
+                            get(AC_zeta) * gradient_of_divergence(uu);
+    return mom;
+#endif
+}
+
+static inline ModelVector
+induction(const ModelVectorData& uu, const ModelVectorData& aa)
+{
+    ModelVector ind;
+    // Note: We do (-nabla^2 A + nabla(nabla dot A)) instead of (nabla x (nabla
+    // x A)) in order to avoid taking the first derivative twice (did the math,
+    // yes this actually works. See pg.28 in arXiv:astro-ph/0109497)
+    // u cross B - ETA * mu0 * (mu0^-1 * [- laplace A + grad div A ])
+    const ModelVector B        = curl(aa);
+    const ModelVector grad_div = gradient_of_divergence(aa);
+    const ModelVector lap      = laplace_vec(aa);
+
+    // Note, mu0 is cancelled out
+    ind = cross(value(uu), B) - get(AC_eta) * (grad_div - lap);
+
+    return ind;
+}
+
+static inline ModelScalar
+lnT(const ModelScalarData& ss, const ModelScalarData& lnrho)
+{
+    const ModelScalar lnT = get(AC_lnT0) + get(AC_gamma) * value(ss) / get(AC_cp_sound) +
+                            (get(AC_gamma) - ModelScalar(1.)) * (value(lnrho) - get(AC_lnrho0));
+    return lnT;
+}
+
+// Nabla dot (K nabla T) / (rho T)
+static inline ModelScalar
+heat_conduction(const ModelScalarData& ss, const ModelScalarData& lnrho)
+{
+    const ModelScalar inv_cp_sound = ModelScalar(1.) / get(AC_cp_sound);
+
+    const ModelVector grad_ln_chi = -gradient(lnrho);
+
+    const ModelScalar first_term = get(AC_gamma) * inv_cp_sound * laplace(ss) +
+                                   (get(AC_gamma) - ModelScalar(1.)) * laplace(lnrho);
+    const ModelVector second_term = get(AC_gamma) * inv_cp_sound * gradient(ss) +
+                                    (get(AC_gamma) - ModelScalar(1.)) * gradient(lnrho);
+    const ModelVector third_term = get(AC_gamma) * (inv_cp_sound * gradient(ss) + gradient(lnrho)) +
+                                   grad_ln_chi;
+
+    const ModelScalar chi = AC_THERMAL_CONDUCTIVITY / (expl(value(lnrho)) * get(AC_cp_sound));
+    return get(AC_cp_sound) * chi * (first_term + dot(second_term, third_term));
+}
+
+static inline ModelScalar
+entropy(const ModelScalarData& ss, const ModelVectorData& uu, const ModelScalarData& lnrho,
+        const ModelVectorData& aa)
+{
+    const ModelMatrix S      = stress_tensor(uu);
+    const ModelScalar inv_pT = ModelScalar(1.) / (expl(value(lnrho)) * expl(lnT(ss, lnrho)));
+    const ModelVector j      = (ModelScalar(1.) / get(AC_mu0)) *
+                          (gradient_of_divergence(aa) - laplace_vec(aa)); // Current density
+    const ModelScalar RHS = H_CONST - C_CONST + get(AC_eta) * get(AC_mu0) * dot(j, j) +
+                            ModelScalar(2.) * expl(value(lnrho)) * get(AC_nu_visc) * contract(S) +
+                            get(AC_zeta) * expl(value(lnrho)) * divergence(uu) * divergence(uu);
+
+    return -dot(value(uu), gradient(ss)) + inv_pT * RHS + heat_conduction(ss, lnrho);
+    /*
+    const ModelMatrix S = stress_tensor(uu);
+
+    // nabla x nabla x A / mu0 = nabla(nabla dot A) - nabla^2(A)
+    const ModelVector j = gradient_of_divergence(aa) - laplace_vec(aa);
+
+    const ModelScalar inv_pT = ModelScalar(1.) / (expl(value(lnrho)) + expl(lnT(ss, lnrho)));
+
+    return - dot(value(uu), gradient(ss))
+           + inv_pT * ( H_CONST - C_CONST
+                + get(AC_eta) * get(AC_mu0) * dot(j, j)
+                + ModelScalar(2.) * expl(value(lnrho)) * get(AC_nu_visc) * contract(S)
+                + get(AC_zeta) * expl(value(lnrho)) * divergence(uu) * divergence(uu)
+            )
+            + heat_conduction(ss, lnrho);
+    */
+}
+
+static inline bool
+is_valid(const ModelScalar a)
+{
+    return !isnan(a) && !isinf(a);
+}
+
+static inline bool
+is_valid(const ModelVector& a)
+{
+    return is_valid(a.x) && is_valid(a.y) && is_valid(a.z);
+}
+
+#if LFORCING
+ModelVector
+simple_vortex_forcing(ModelVector a, ModelVector b, ModelScalar magnitude)
+{
+    return magnitude * cross(normalized(b - a), (ModelVector){0, 0, 1}); // Vortex
+}
+
+ModelVector
+simple_outward_flow_forcing(ModelVector a, ModelVector b, ModelScalar magnitude)
+{
+    return magnitude * (1 / length(b - a)) * normalized(b - a); // Outward flow
+}
+
+// The Pencil Code forcing_hel_noshear(), manual Eq. 222, inspired forcing function with adjustable
+// helicity
+ModelVector
+helical_forcing(ModelScalar magnitude, ModelVector k_force, ModelVector xx, ModelVector ff_re,
+                ModelVector ff_im, ModelScalar phi)
+{
+    (void)magnitude; // WARNING: unused
+    xx.x = xx.x * (2.0 * M_PI / (get(AC_dsx) * get(AC_nx)));
+    xx.y = xx.y * (2.0 * M_PI / (get(AC_dsy) * get(AC_ny)));
+    xx.z = xx.z * (2.0 * M_PI / (get(AC_dsz) * get(AC_nz)));
+
+    ModelScalar cos_phi     = cosl(phi);
+    ModelScalar sin_phi     = sinl(phi);
+    ModelScalar cos_k_dot_x = cosl(dot(k_force, xx));
+    ModelScalar sin_k_dot_x = sinl(dot(k_force, xx));
+    // Phase affect only the x-component
+    // Scalar real_comp       = cos_k_dot_x;
+    // Scalar imag_comp       = sin_k_dot_x;
+    ModelScalar real_comp_phase = cos_k_dot_x * cos_phi - sin_k_dot_x * sin_phi;
+    ModelScalar imag_comp_phase = cos_k_dot_x * sin_phi + sin_k_dot_x * cos_phi;
+
+    ModelVector force = (ModelVector){ff_re.x * real_comp_phase - ff_im.x * imag_comp_phase,
+                                      ff_re.y * real_comp_phase - ff_im.y * imag_comp_phase,
+                                      ff_re.z * real_comp_phase - ff_im.z * imag_comp_phase};
+
+    return force;
+}
+
+ModelVector
+forcing(int3 globalVertexIdx, ModelScalar dt)
+{
+    ModelVector a = ModelScalar(.5) * (ModelVector){get(AC_nx) * get(AC_dsx),
+                                                    get(AC_ny) * get(AC_dsy),
+                                                    get(AC_nz) * get(AC_dsz)}; // source (origin)
+    (void)a;                                                                   // WARNING: not used
+    ModelVector xx        = (ModelVector){(globalVertexIdx.x - get(AC_nx_min)) * get(AC_dsx),
+                                   (globalVertexIdx.y - get(AC_ny_min)) * get(AC_dsy),
+                                   (globalVertexIdx.z - get(AC_nz_min)) *
+                                       get(AC_dsz)}; // sink (current index)
+    const ModelScalar cs2 = get(AC_cs2_sound);
+    const ModelScalar cs  = sqrtl(cs2);
+
+    // Placeholders until determined properly
+    ModelScalar magnitude = get(AC_forcing_magnitude);
+    ModelScalar phase     = get(AC_forcing_phase);
+    ModelVector k_force   = (ModelVector){get(AC_k_forcex), get(AC_k_forcey), get(AC_k_forcez)};
+    ModelVector ff_re = (ModelVector){get(AC_ff_hel_rex), get(AC_ff_hel_rey), get(AC_ff_hel_rez)};
+    ModelVector ff_im = (ModelVector){get(AC_ff_hel_imx), get(AC_ff_hel_imy), get(AC_ff_hel_imz)};
+
+    (void)phase;   // WARNING: unused with simple forcing. Should be defined in helical_forcing
+    (void)k_force; // WARNING: unused with simple forcing. Should be defined in helical_forcing
+    (void)ff_re;   // WARNING: unused with simple forcing. Should be defined in helical_forcing
+    (void)ff_im;   // WARNING: unused with simple forcing. Should be defined in helical_forcing
+
+    // Determine that forcing funtion type at this point.
+    // ModelVector force = simple_vortex_forcing(a, xx, magnitude);
+    // ModelVector force = simple_outward_flow_forcing(a, xx, magnitude);
+    ModelVector force = helical_forcing(magnitude, k_force, xx, ff_re, ff_im, phase);
+
+    // Scaling N = magnitude*cs*sqrtl(k*cs/dt)  * dt
+    const ModelScalar NN = cs * sqrtl(get(AC_kaver) * cs);
+    // MV: Like in the Pencil Code. I don't understandf the logic here.
+    force.x = sqrtl(dt) * NN * force.x;
+    force.y = sqrtl(dt) * NN * force.y;
+    force.z = sqrtl(dt) * NN * force.z;
+
+    if (is_valid(force)) {
+        return force;
+    }
+    else {
+        return (ModelVector){0, 0, 0};
+    }
+}
+#endif
+
+static void
+solve_alpha_step(const int step_number, const ModelScalar dt, const int i, const int j, const int k,
+                 ModelMesh& in, ModelMesh* out)
+{
+    const int idx = acVertexBufferIdx(i, j, k, in.info);
+
+    const ModelScalarData lnrho = read_data(i, j, k, in.vertex_buffer, VTXBUF_LNRHO);
+    const ModelVectorData uu    = read_data(i, j, k, in.vertex_buffer,
+                                         (int3){VTXBUF_UUX, VTXBUF_UUY, VTXBUF_UUZ});
+
+    ModelScalar rate_of_change[NUM_VTXBUF_HANDLES] = {0};
+    rate_of_change[VTXBUF_LNRHO]                   = continuity(uu, lnrho);
+
+#if LMAGNETIC
+    const ModelVectorData aa  = read_data(i, j, k, in.vertex_buffer,
+                                         (int3){VTXBUF_AX, VTXBUF_AY, VTXBUF_AZ});
+    const ModelVector aa_res  = induction(uu, aa);
+    rate_of_change[VTXBUF_AX] = aa_res.x;
+    rate_of_change[VTXBUF_AY] = aa_res.y;
+    rate_of_change[VTXBUF_AZ] = aa_res.z;
+#endif
+#if LENTROPY
+    const ModelScalarData ss       = read_data(i, j, k, in.vertex_buffer, VTXBUF_ENTROPY);
+    const ModelVector uu_res       = momentum(uu, lnrho, ss, aa);
+    rate_of_change[VTXBUF_UUX]     = uu_res.x;
+    rate_of_change[VTXBUF_UUY]     = uu_res.y;
+    rate_of_change[VTXBUF_UUZ]     = uu_res.z;
+    rate_of_change[VTXBUF_ENTROPY] = entropy(ss, uu, lnrho, aa);
+#else
+    const ModelVector uu_res   = momentum(uu, lnrho);
+    rate_of_change[VTXBUF_UUX] = uu_res.x;
+    rate_of_change[VTXBUF_UUY] = uu_res.y;
+    rate_of_change[VTXBUF_UUZ] = uu_res.z;
+#endif
+
+    // Williamson (1980) NOTE: older version of astaroth used inhomogenous
+    const ModelScalar alpha[] = {ModelScalar(.0), ModelScalar(-5. / 9.), ModelScalar(-153. / 128.)};
+    for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w) {
+        if (step_number == 0) {
+            out->vertex_buffer[w][idx] = rate_of_change[w] * dt;
+        }
+        else {
+            out->vertex_buffer[w][idx] = alpha[step_number] * out->vertex_buffer[w][idx] +
+                                         rate_of_change[w] * dt;
+        }
+    }
+}
+
+static void
+solve_beta_step(const int step_number, const ModelScalar dt, const int i, const int j, const int k,
+                const ModelMesh& in, ModelMesh* out)
+{
+    const int idx = acVertexBufferIdx(i, j, k, in.info);
+
+    // Williamson (1980) NOTE: older version of astaroth used inhomogenous
+    const ModelScalar beta[] = {ModelScalar(1. / 3.), ModelScalar(15. / 16.),
+                                ModelScalar(8. / 15.)};
+
+    for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w)
+        out->vertex_buffer[w][idx] += beta[step_number] * in.vertex_buffer[w][idx];
+
+    (void)dt; // Suppress unused variable warning if forcing not used
+#if LFORCING
+    if (step_number == 2) {
+        ModelVector force = forcing((int3){i, j, k}, dt);
+        out->vertex_buffer[VTXBUF_UUX][idx] += force.x;
+        out->vertex_buffer[VTXBUF_UUY][idx] += force.y;
+        out->vertex_buffer[VTXBUF_UUZ][idx] += force.z;
+    }
+#endif
+}
+
+void
+model_rk3_step(const int step_number, const ModelScalar dt, ModelMesh* mesh)
+{
+    mesh_info = &(mesh->info);
+
+    ModelMesh* tmp = modelmesh_create(mesh->info);
+
+    boundconds(mesh->info, mesh);
+#pragma omp parallel for
+    for (int k = get(AC_nz_min); k < get(AC_nz_max); ++k) {
+        for (int j = get(AC_ny_min); j < get(AC_ny_max); ++j) {
+            for (int i = get(AC_nx_min); i < get(AC_nx_max); ++i) {
+                solve_alpha_step(step_number, dt, i, j, k, *mesh, tmp);
+            }
+        }
+    }
+#pragma omp parallel for
+    for (int k = get(AC_nz_min); k < get(AC_nz_max); ++k) {
+        for (int j = get(AC_ny_min); j < get(AC_ny_max); ++j) {
+            for (int i = get(AC_nx_min); i < get(AC_nx_max); ++i) {
+                solve_beta_step(step_number, dt, i, j, k, *tmp, mesh);
+            }
+        }
+    }
+
+    modelmesh_destroy(tmp);
+    mesh_info = NULL;
+}
+
+void
+model_rk3(const ModelScalar dt, ModelMesh* mesh)
+{
+    mesh_info = &(mesh->info);
+
+    ModelMesh* tmp = modelmesh_create(mesh->info);
+
+    for (int step_number = 0; step_number < 3; ++step_number) {
+        boundconds(mesh->info, mesh);
+#pragma omp parallel for
+        for (int k = get(AC_nz_min); k < get(AC_nz_max); ++k) {
+            for (int j = get(AC_ny_min); j < get(AC_ny_max); ++j) {
+                for (int i = get(AC_nx_min); i < get(AC_nx_max); ++i) {
+                    solve_alpha_step(step_number, dt, i, j, k, *mesh, tmp);
+                }
+            }
+        }
+#pragma omp parallel for
+        for (int k = get(AC_nz_min); k < get(AC_nz_max); ++k) {
+            for (int j = get(AC_ny_min); j < get(AC_ny_max); ++j) {
+                for (int i = get(AC_nx_min); i < get(AC_nx_max); ++i) {
+                    solve_beta_step(step_number, dt, i, j, k, *tmp, mesh);
+                }
+            }
+        }
+    }
+
+    modelmesh_destroy(tmp);
+    mesh_info = NULL;
+}
--- a/samples/standalone/model/model_rk3.h
+++ b/samples/standalone/model/model_rk3.h
@@ -0,0 +1,33 @@
+/*
+    Copyright (C) 2014-2020, Johannes Pekkila, Miikka Vaisala.
+
+    This file is part of Astaroth.
+
+    Astaroth is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    Astaroth is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Astaroth.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file
+ * \brief Brief info.
+ *
+ * Detailed info.
+ *
+ */
+#pragma once
+#include "astaroth.h"
+#include "modelmesh.h"
+
+void model_rk3(const ModelScalar dt, ModelMesh* mesh);
+
+void model_rk3_step(const int step_number, const ModelScalar dt, ModelMesh* mesh);
--- a/samples/standalone/model/modelmesh.h
+++ b/samples/standalone/model/modelmesh.h
@@ -0,0 +1,37 @@
+/*
+    Copyright (C) 2014-2020, Johannes Pekkila, Miikka Vaisala.
+
+    This file is part of Astaroth.
+
+    Astaroth is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    Astaroth is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Astaroth.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file
+ * \brief Brief info.
+ *
+ * Detailed info.
+ *
+ */
+#pragma once
+#include "astaroth.h"
+
+#include "math.h"
+
+typedef long double ModelScalar;
+
+typedef struct {
+    ModelScalar* vertex_buffer[NUM_VTXBUF_HANDLES];
+    AcMeshInfo info;
+} ModelMesh;
--- a/samples/standalone/renderer.cc
+++ b/samples/standalone/renderer.cc
@@ -0,0 +1,463 @@
+/*
+    Copyright (C) 2014-2020, Johannes Pekkila, Miikka Vaisala.
+
+    This file is part of Astaroth.
+
+    Astaroth is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    Astaroth is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Astaroth.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file
+ * \brief Brief info.
+ *
+ * Detailed info.
+ *
+ */
+#include "run.h"
+
+#if AC_BUILD_RT_VISUALIZATION
+#include <SDL.h>    // Note: using local version in src/3rdparty dir
+#include <math.h>   // ceil
+#include <string.h> // memcpy
+
+#include "config_loader.h"
+#include "errchk.h"
+#include "math_utils.h"
+#include "model/host_forcing.h"
+#include "model/host_memory.h"
+#include "model/host_timestep.h"
+#include "model/model_reduce.h"
+#include "model/model_rk3.h"
+#include "timer_hires.h"
+
+// NEED TO BE DEFINED HERE. IS NOT NOTICED BY compile_acc call.
+#define LFORCING (1)
+#define LSINK (0)
+
+// Window
+SDL_Renderer* renderer      = NULL;
+static SDL_Window* window   = NULL;
+static int window_width     = 800;
+static int window_height    = 600;
+static const int window_bpp = 32; // Bits per pixel
+
+// Surfaces
+SDL_Surface* surfaces[NUM_VTXBUF_HANDLES];
+static int datasurface_width  = -1;
+static int datasurface_height = -1;
+static int k_slice            = 0;
+static int k_slice_max        = 0;
+
+// Colors
+static SDL_Color color_bg      = (SDL_Color){30, 30, 35, 255};
+static const int num_tiles     = NUM_VTXBUF_HANDLES + 1;
+static const int tiles_per_row = 3;
+
+/*
+ * =============================================================================
+ * Camera
+ * =============================================================================
+ */
+/*
+typedef struct {
+   float x, y;
+} float2;
+*/
+typedef struct {
+    float x, y, w, h;
+} vec4;
+
+typedef struct {
+    float2 pos;
+    float scale;
+} Camera;
+
+static Camera camera = (Camera){(float2){.0f, .0f}, 1.f};
+
+static inline vec4
+project_ortho(const float2& pos, const float2& bbox, const float2& wdims)
+{
+    const vec4 rect = (vec4){camera.scale * (pos.x - camera.pos.x) + 0.5f * wdims.x,
+                             camera.scale * (pos.y - camera.pos.y) + 0.5f * wdims.y,
+                             camera.scale * bbox.x, camera.scale * bbox.y};
+
+    return rect;
+}
+
+/*
+ * =============================================================================
+ * Renderer
+ * =============================================================================
+ */
+
+static int
+renderer_init(const int& mx, const int& my)
+{
+    // Init video
+    SDL_InitSubSystem(SDL_INIT_VIDEO | SDL_INIT_EVENTS);
+
+    // Setup window
+    window = SDL_CreateWindow("Astaroth", SDL_WINDOWPOS_UNDEFINED, SDL_WINDOWPOS_UNDEFINED,
+                              window_width, window_height, SDL_WINDOW_SHOWN);
+    ERRCHK_ALWAYS(window);
+
+    // Setup SDL renderer
+    renderer = SDL_CreateRenderer(window, -1, SDL_RENDERER_ACCELERATED);
+    // SDL_SetWindowFullscreen(window, SDL_WINDOW_FULLSCREEN_DESKTOP);
+    SDL_GetWindowSize(window, &window_width, &window_height);
+
+    SDL_SetHint(SDL_HINT_RENDER_SCALE_QUALITY, "1"); // Linear filtering
+
+    datasurface_width  = mx;
+    datasurface_height = my;
+    // vec drawing uses the surface of the first component, no memory issues here
+    for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i)
+        surfaces[i] = SDL_CreateRGBSurfaceWithFormat(0, datasurface_width, datasurface_height,
+                                                     window_bpp, SDL_PIXELFORMAT_RGBA8888);
+
+    camera.pos   = (float2){.5f * tiles_per_row * datasurface_width - .5f * datasurface_width,
+                          -.5f * (num_tiles / tiles_per_row) * datasurface_height +
+                              .5f * datasurface_height};
+    camera.scale = min(window_width / float(datasurface_width * tiles_per_row),
+                       window_height / float(datasurface_height * (num_tiles / tiles_per_row)));
+
+    SDL_RendererInfo renderer_info;
+    SDL_GetRendererInfo(renderer, &renderer_info);
+    printf("SDL renderer max texture dims: (%d, %d)\n", renderer_info.max_texture_width,
+           renderer_info.max_texture_height);
+    return 0;
+}
+
+static int
+set_pixel(const int& i, const int& j, const uint32_t& color, SDL_Surface* surface)
+{
+    uint32_t* pixels           = (uint32_t*)surface->pixels;
+    pixels[i + j * surface->w] = color;
+    return 0;
+}
+
+static int
+draw_vertex_buffer(const AcMesh& mesh, const VertexBufferHandle& vertex_buffer, const int& tile)
+{
+    const float xoffset = (tile % tiles_per_row) * datasurface_width;
+    const float yoffset = -(tile / tiles_per_row) * datasurface_height;
+
+    /*
+    const float max = float(model_reduce_scal(mesh, RTYPE_MAX, vertex_buffer));
+    const float min = float(model_reduce_scal(mesh, RTYPE_MIN, vertex_buffer));
+    */
+    const float max   = float(acReduceScal(RTYPE_MAX, vertex_buffer));
+    const float min   = float(acReduceScal(RTYPE_MIN, vertex_buffer));
+    const float range = fabsf(max - min);
+    const float mid   = max - .5f * range;
+
+    const int k = k_slice; // mesh.info.int_params[AC_mz] / 2;
+
+    for (int j = 0; j < mesh.info.int_params[AC_my]; ++j) {
+        for (int i = 0; i < mesh.info.int_params[AC_mx]; ++i) {
+            ERRCHK(i < datasurface_width && j < datasurface_height);
+
+            const int idx       = acVertexBufferIdx(i, j, k, mesh.info);
+            const uint8_t shade = (uint8_t)(
+                255.f * (fabsf(float(mesh.vertex_buffer[vertex_buffer][idx]) - mid)) / range);
+            uint8_t color[4]            = {0, 0, 0, 255};
+            color[tile % 3]             = shade;
+            const uint32_t mapped_color = SDL_MapRGBA(surfaces[vertex_buffer]->format, color[0],
+                                                      color[1], color[2], color[3]);
+            set_pixel(i, j, mapped_color, surfaces[vertex_buffer]);
+        }
+    }
+
+    const float2 pos   = (float2){xoffset, yoffset};
+    const float2 bbox  = (float2){.5f * datasurface_width, .5f * datasurface_height};
+    const float2 wsize = (float2){float(window_width), float(window_height)};
+    const vec4 rectf   = project_ortho(pos, bbox, wsize);
+    SDL_Rect rect      = (SDL_Rect){int(rectf.x - rectf.w), int(wsize.y - rectf.y - rectf.h),
+                               int(ceil(2.f * rectf.w)), int(ceil(2.f * rectf.h))};
+
+    SDL_Texture* tex = SDL_CreateTextureFromSurface(renderer, surfaces[vertex_buffer]);
+    SDL_RenderCopy(renderer, tex, NULL, &rect);
+    SDL_DestroyTexture(tex);
+
+    return 0;
+}
+
+static int
+draw_vertex_buffer_vec(const AcMesh& mesh, const VertexBufferHandle& vertex_buffer_a,
+                       const VertexBufferHandle& vertex_buffer_b,
+                       const VertexBufferHandle& vertex_buffer_c, const int& tile)
+{
+    const float xoffset = (tile % tiles_per_row) * datasurface_width;
+    const float yoffset = -(tile / tiles_per_row) * datasurface_height;
+
+    /*
+    const float maxx = float(
+        max(model_reduce_scal(mesh, RTYPE_MAX, vertex_buffer_a),
+            max(model_reduce_scal(mesh, RTYPE_MAX, vertex_buffer_b),
+                model_reduce_scal(mesh, RTYPE_MAX, vertex_buffer_c))));
+    const float minn = float(
+        min(model_reduce_scal(mesh, RTYPE_MIN, vertex_buffer_a),
+            min(model_reduce_scal(mesh, RTYPE_MIN, vertex_buffer_b),
+                model_reduce_scal(mesh, RTYPE_MIN, vertex_buffer_c))));
+    */
+    const float maxx  = float(max(
+        acReduceScal(RTYPE_MAX, vertex_buffer_a),
+        max(acReduceScal(RTYPE_MAX, vertex_buffer_b), acReduceScal(RTYPE_MAX, vertex_buffer_c))));
+    const float minn  = float(min(
+        acReduceScal(RTYPE_MIN, vertex_buffer_a),
+        min(acReduceScal(RTYPE_MIN, vertex_buffer_b), acReduceScal(RTYPE_MIN, vertex_buffer_c))));
+    const float range = fabsf(maxx - minn);
+    const float mid   = maxx - .5f * range;
+
+    const int k = k_slice; // mesh.info.int_params[AC_mz] / 2;
+    for (int j = 0; j < mesh.info.int_params[AC_my]; ++j) {
+        for (int i = 0; i < mesh.info.int_params[AC_mx]; ++i) {
+            ERRCHK(i < datasurface_width && j < datasurface_height);
+
+            const int idx   = acVertexBufferIdx(i, j, k, mesh.info);
+            const uint8_t r = (uint8_t)(
+                255.f * (fabsf(float(mesh.vertex_buffer[vertex_buffer_a][idx]) - mid)) / range);
+            const uint8_t g = (uint8_t)(
+                255.f * (fabsf(float(mesh.vertex_buffer[vertex_buffer_b][idx]) - mid)) / range);
+            const uint8_t b = (uint8_t)(
+                255.f * (fabsf(float(mesh.vertex_buffer[vertex_buffer_c][idx]) - mid)) / range);
+            const uint32_t mapped_color = SDL_MapRGBA(surfaces[vertex_buffer_a]->format, r, g, b,
+                                                      255);
+            set_pixel(i, j, mapped_color, surfaces[vertex_buffer_a]);
+        }
+    }
+
+    const float2 pos   = (float2){xoffset, yoffset};
+    const float2 bbox  = (float2){.5f * datasurface_width, .5f * datasurface_height};
+    const float2 wsize = (float2){float(window_width), float(window_height)};
+    const vec4 rectf   = project_ortho(pos, bbox, wsize);
+    SDL_Rect rect      = (SDL_Rect){int(rectf.x - rectf.w), int(wsize.y - rectf.y - rectf.h),
+                               int(ceil(2.f * rectf.w)), int(ceil(2.f * rectf.h))};
+
+    SDL_Texture* tex = SDL_CreateTextureFromSurface(renderer, surfaces[vertex_buffer_a]);
+    SDL_RenderCopy(renderer, tex, NULL, &rect);
+    SDL_DestroyTexture(tex);
+
+    return 0;
+}
+
+static int
+renderer_draw(const AcMesh& mesh)
+{
+    for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i)
+        draw_vertex_buffer(mesh, VertexBufferHandle(i), i);
+    draw_vertex_buffer_vec(mesh, VTXBUF_UUX, VTXBUF_UUY, VTXBUF_UUZ, NUM_VTXBUF_HANDLES);
+
+    // Drawing done, present
+    SDL_RenderPresent(renderer);
+    SDL_SetRenderDrawColor(renderer, color_bg.r, color_bg.g, color_bg.b, color_bg.a);
+    SDL_RenderClear(renderer);
+
+    for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) {
+        const VertexBufferHandle vertex_buffer = VertexBufferHandle(i);
+        /*
+        printf("\t%s umax %e, min %e\n", vtxbuf_names[vertex_buffer],
+               (double)model_reduce_scal(mesh, RTYPE_MAX, vertex_buffer),
+               (double)model_reduce_scal(mesh, RTYPE_MIN, vertex_buffer));
+        */
+        printf("\t%s umax %e, min %e\n", vtxbuf_names[vertex_buffer],
+               (double)acReduceScal(RTYPE_MAX, vertex_buffer),
+               (double)acReduceScal(RTYPE_MIN, vertex_buffer));
+    }
+    printf("\n");
+
+    return 0;
+}
+
+static int
+renderer_quit(void)
+{
+    for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i)
+        SDL_FreeSurface(surfaces[i]);
+
+    SDL_DestroyRenderer(renderer);
+    SDL_DestroyWindow(window);
+
+    renderer = NULL;
+    window   = NULL;
+
+    SDL_Quit();
+    return 0;
+}
+
+static int init_type = INIT_TYPE_GAUSSIAN_RADIAL_EXPL;
+// static int init_type = INIT_TYPE_SIMPLE_CORE;
+
+static bool
+running(AcMesh* mesh)
+{
+    SDL_Event e;
+    while (SDL_PollEvent(&e)) {
+        if (e.type == SDL_QUIT) {
+            return false;
+        }
+        else if (e.type == SDL_KEYDOWN) {
+            if (e.key.keysym.sym == SDLK_ESCAPE)
+                return false;
+            if (e.key.keysym.sym == SDLK_SPACE) {
+                init_type = (init_type + 1) % NUM_INIT_TYPES;
+                acmesh_init_to(InitType(init_type), mesh);
+                acLoad(*mesh);
+            }
+            if (e.key.keysym.sym == SDLK_i) {
+                k_slice = (k_slice + 1) % k_slice_max;
+                printf("k_slice %d\n", k_slice);
+            }
+            if (e.key.keysym.sym == SDLK_k) {
+                k_slice = (k_slice - 1 + k_slice_max) % k_slice_max;
+                printf("k_slice %d\n", k_slice);
+            }
+        }
+    }
+    return true;
+}
+
+static void
+check_input(const float& dt)
+{
+    /* Camera movement */
+    const float camera_translate_rate = 1000.f / camera.scale;
+    const float camera_scale_rate     = 1.0001f;
+    const uint8_t* keystates          = (uint8_t*)SDL_GetKeyboardState(NULL);
+    if (keystates[SDL_SCANCODE_UP])
+        camera.pos.y += camera_translate_rate * dt;
+    if (keystates[SDL_SCANCODE_DOWN])
+        camera.pos.y -= camera_translate_rate * dt;
+    if (keystates[SDL_SCANCODE_LEFT])
+        camera.pos.x -= camera_translate_rate * dt;
+    if (keystates[SDL_SCANCODE_RIGHT])
+        camera.pos.x += camera_translate_rate * dt;
+    if (keystates[SDL_SCANCODE_PAGEUP])
+        camera.scale += camera.scale * camera_scale_rate * dt;
+    if (keystates[SDL_SCANCODE_PAGEDOWN])
+        camera.scale -= camera.scale * camera_scale_rate * dt;
+    if (keystates[SDL_SCANCODE_COMMA])
+        set_timescale(AcReal(.1));
+    if (keystates[SDL_SCANCODE_PERIOD])
+        set_timescale(AcReal(1.));
+}
+
+int
+run_renderer(const char* config_path)
+{
+    /* Parse configs */
+    AcMeshInfo mesh_info;
+    load_config(config_path, &mesh_info);
+    renderer_init(mesh_info.int_params[AC_mx], mesh_info.int_params[AC_my]);
+
+    AcMesh* mesh = acmesh_create(mesh_info);
+    acmesh_init_to(InitType(init_type), mesh);
+
+#if LSINK
+    vertex_buffer_set(VTXBUF_ACCRETION, 0.0, mesh);
+#endif
+
+    acInit(mesh_info);
+    acLoad(*mesh);
+
+    Timer frame_timer;
+    timer_reset(&frame_timer);
+
+    Timer wallclock;
+    timer_reset(&wallclock);
+
+    Timer io_timer;
+    timer_reset(&io_timer);
+
+    const float desired_frame_time = 1.f / 60.f;
+    int steps                      = 0;
+    k_slice                        = mesh->info.int_params[AC_mz] / 2;
+    k_slice_max                    = mesh->info.int_params[AC_mz];
+#if LSINK
+    AcReal accreted_mass = 0.0;
+#endif
+    while (running(mesh)) {
+
+        /* Input */
+        check_input(timer_diff_nsec(io_timer) / 1e9f);
+        timer_reset(&io_timer);
+
+/* Step the simulation */
+#if LSINK
+        const AcReal sum_mass = acReduceScal(RTYPE_SUM, VTXBUF_ACCRETION);
+        accreted_mass         = accreted_mass + sum_mass;
+        AcReal sink_mass      = mesh_info.real_params[AC_M_sink_init] + accreted_mass;
+        printf("sink mass is: %e \n", sink_mass);
+        printf("accreted mass is: %e \n", accreted_mass);
+        acLoadDeviceConstant(AC_M_sink, sink_mass);
+        vertex_buffer_set(VTXBUF_ACCRETION, 0.0, mesh);
+#endif
+
+#if LFORCING
+        const ForcingParams forcing_params = generateForcingParams(mesh->info);
+        loadForcingParamsToDevice(forcing_params);
+#endif
+
+#if 1
+        const AcReal umax = acReduceVec(RTYPE_MAX, VTXBUF_UUX, VTXBUF_UUY, VTXBUF_UUZ);
+        const AcReal dt   = host_timestep(umax, mesh_info);
+
+        acIntegrate(dt);
+#else
+        ModelMesh* model_mesh = modelmesh_create(mesh->info);
+        const AcReal umax     = AcReal(
+            model_reduce_vec(*model_mesh, RTYPE_MAX, VTXBUF_UUX, VTXBUF_UUY, VTXBUF_UUZ));
+        const AcReal dt = host_timestep(umax, mesh_info);
+        acmesh_to_modelmesh(*mesh, model_mesh);
+        model_rk3(dt, model_mesh);
+        modelmesh_to_acmesh(*model_mesh, mesh);
+        modelmesh_destroy(model_mesh);
+        acLoad(*mesh); // Just a quick hack s.t. we do not have to add an
+                       // additional if to the render part
+#endif
+
+        ++steps;
+
+        /* Render */
+        const float timer_diff_sec = timer_diff_nsec(frame_timer) / 1e9f;
+        if (timer_diff_sec >= desired_frame_time) {
+            const int num_vertices = mesh->info.int_params[AC_mxy];
+            const int3 dst         = (int3){0, 0, k_slice};
+            acBoundcondStep();
+            // acStore(mesh);
+            acStoreWithOffset(dst, num_vertices, mesh);
+            acSynchronizeStream(STREAM_ALL);
+            renderer_draw(*mesh); // Bottleneck is here
+            printf("Step #%d, dt: %f\n", steps, double(dt));
+            timer_reset(&frame_timer);
+        }
+    }
+    printf("Wallclock time %f s\n", double(timer_diff_nsec(wallclock) / 1e9f));
+
+    acQuit();
+    acmesh_destroy(mesh);
+
+    renderer_quit();
+
+    return 0;
+}
+#else // BUILD_RT_VISUALIZATION == 0
+#include "errchk.h"
+int
+run_renderer(const char* /*config_path*/)
+{
+    WARNING("Real-time visualization module not built. Set BUILD_RT_VISUALIZATION=ON with cmake.");
+    return 1;
+}
+#endif // BUILD_RT_VISUALIZATION
--- a/samples/standalone/run.h
+++ b/samples/standalone/run.h
@@ -0,0 +1,35 @@
+/*
+    Copyright (C) 2014-2020, Johannes Pekkila, Miikka Vaisala.
+
+    This file is part of Astaroth.
+
+    Astaroth is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    Astaroth is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Astaroth.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file
+ * \brief Brief info.
+ *
+ * Detailed info.
+ *
+ */
+#pragma once
+
+int run_autotest(const char* config_path);
+
+int run_simulation(const char* config_path);
+
+int run_benchmark(const char* config_path);
+
+int run_renderer(const char* config_path);
--- a/samples/standalone/simulation.cc
+++ b/samples/standalone/simulation.cc
@@ -0,0 +1,404 @@
+/*
+    Copyright (C) 2014-2020, Johannes Pekkila, Miikka Vaisala.
+
+    This file is part of Astaroth.
+
+    Astaroth is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    Astaroth is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Astaroth.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file
+ * \brief Brief info.
+ *
+ * Detailed info.
+ *
+ */
+#include "run.h"
+
+#include "config_loader.h"
+#include "model/host_forcing.h"
+#include "model/host_memory.h"
+#include "model/host_timestep.h"
+#include "model/model_reduce.h"
+#include "model/model_rk3.h"
+#include "errchk.h"
+#include "math_utils.h"
+#include "timer_hires.h"
+
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+// NEED TO BE DEFINED HERE. IS NOT NOTICED BY compile_acc call.
+#define LFORCING (1)
+#define LSINK (0)
+
+// Write all setting info into a separate ascii file. This is done to guarantee
+// that we have the data specifi information in the thing, even though in
+// principle these things are in the astaroth.conf.
+static inline void
+write_mesh_info(const AcMeshInfo* config)
+{
+
+    FILE* infotxt;
+
+    infotxt = fopen("purge.sh", "w");
+    fprintf(infotxt, "#!/bin/bash\n");
+    fprintf(infotxt, "rm *.list *.mesh *.ts purge.sh\n");
+    fclose(infotxt);
+
+    infotxt = fopen("mesh_info.list", "w");
+
+    // Determine endianness
+    unsigned int EE = 1;
+    char *CC = (char*) &EE;
+    const int endianness = (int) *CC; 
+    // endianness = 0 -> big endian 
+    // endianness = 1 -> little endian
+
+    fprintf(infotxt, "size_t %s %lu \n", "AcRealSize", sizeof(AcReal));
+
+    fprintf(infotxt, "int %s %i \n", "endian", endianness);
+
+    // JP: this could be done shorter and with smaller chance for errors with the following
+    // (modified from acPrintMeshInfo() in astaroth.cu)
+    // MV: Now adapted into working condition. E.g. removed useless / harmful formatting. 
+
+    for (int i = 0; i < NUM_INT_PARAMS; ++i)
+        fprintf(infotxt, "int %s %d\n", intparam_names[i], config->int_params[i]);
+
+    for (int i = 0; i < NUM_INT3_PARAMS; ++i)
+        fprintf(infotxt, "int3 %s  %d %d %d\n", int3param_names[i], config->int3_params[i].x,
+                                                                        config->int3_params[i].y,
+                                                                        config->int3_params[i].z);
+
+    for (int i = 0; i < NUM_REAL_PARAMS; ++i)
+        fprintf(infotxt, "real %s %g\n", realparam_names[i], double(config->real_params[i]));
+
+    for (int i = 0; i < NUM_REAL3_PARAMS; ++i)
+        fprintf(infotxt, "real3 %s  %g %g %g\n", real3param_names[i],
+                                                    double(config->real3_params[i].x),
+                                                    double(config->real3_params[i].y),
+                                                    double(config->real3_params[i].z));
+
+    fclose(infotxt);
+}
+
+// This funtion writes a run state into a set of C binaries. 
+static inline void
+save_mesh(const AcMesh& save_mesh, const int step, const AcReal t_step)
+{
+    FILE* save_ptr;
+
+    for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w) {
+        const size_t n = acVertexBufferSize(save_mesh.info);
+
+        const char* buffername = vtxbuf_names[w];
+        char cstep[11];
+        char bin_filename[80] = "\0";
+
+        // sprintf(bin_filename, "");
+
+        sprintf(cstep, "%d", step);
+
+        strcat(bin_filename, buffername);
+        strcat(bin_filename, "_");
+        strcat(bin_filename, cstep);
+        strcat(bin_filename, ".mesh");
+
+        printf("Savefile %s \n", bin_filename);
+
+        save_ptr = fopen(bin_filename, "wb");
+
+        // Start file with time stamp
+        AcReal write_long_buf = (AcReal)t_step;
+        fwrite(&write_long_buf, sizeof(AcReal), 1, save_ptr);
+        // Grid data
+        for (size_t i = 0; i < n; ++i) {
+            const AcReal point_val     = save_mesh.vertex_buffer[VertexBufferHandle(w)][i];
+            AcReal write_long_buf2 = (AcReal)point_val;
+            fwrite(&write_long_buf2, sizeof(AcReal), 1, save_ptr);
+        }
+        fclose(save_ptr);
+    }
+}
+
+// This funtion reads a run state from a set of C binaries.
+static inline void 
+read_mesh(AcMesh& read_mesh, const int step, AcReal* t_step)
+{
+    FILE* read_ptr;
+
+    for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w) {
+        const size_t n = acVertexBufferSize(read_mesh.info);
+
+        const char* buffername = vtxbuf_names[w];
+        char cstep[11];
+        char bin_filename[80] = "\0";
+
+        // sprintf(bin_filename, "");
+
+        sprintf(cstep, "%d", step);
+
+        strcat(bin_filename, buffername);
+        strcat(bin_filename, "_");
+        strcat(bin_filename, cstep);
+        strcat(bin_filename, ".mesh");
+
+        printf("Reading savefile %s \n", bin_filename);
+
+        read_ptr = fopen(bin_filename, "rb");
+
+        // Start file with time stamp
+        size_t result;
+        result = fread(t_step, sizeof(AcReal), 1, read_ptr);
+        // Read grid data
+        AcReal read_buf;
+        for (size_t i = 0; i < n; ++i) {
+            result = fread(&read_buf, sizeof(AcReal), 1, read_ptr);
+            read_mesh.vertex_buffer[VertexBufferHandle(w)][i] = read_buf;
+            if (int(result) != 1) {
+                fprintf(stderr, "Reading error in %s, element %i\n", vtxbuf_names[w], int(i));
+                fprintf(stderr, "Result = %i,  \n", int(result));
+            }
+        }
+        fclose(read_ptr);
+    }
+}
+
+// This function prints out the diagnostic values to std.out and also saves and
+// appends an ascii file to contain all the result.
+static inline void
+print_diagnostics(const int step, const AcReal dt, const AcReal t_step, FILE* diag_file,
+                  const AcReal sink_mass, const AcReal accreted_mass)
+{
+
+    AcReal buf_rms, buf_max, buf_min;
+    const int max_name_width = 16;
+
+    // Calculate rms, min and max from the velocity vector field
+    buf_max = acReduceVec(RTYPE_MAX, VTXBUF_UUX, VTXBUF_UUY, VTXBUF_UUZ);
+    buf_min = acReduceVec(RTYPE_MIN, VTXBUF_UUX, VTXBUF_UUY, VTXBUF_UUZ);
+    buf_rms = acReduceVec(RTYPE_RMS, VTXBUF_UUX, VTXBUF_UUY, VTXBUF_UUZ);
+
+    // MV: The ordering in the earlier version was wrong in terms of variable
+    // MV: name and its diagnostics.
+    printf("Step %d, t_step %.3e, dt %e s\n", step, double(t_step), double(dt));
+    printf("  %*s: min %.3e,\trms %.3e,\tmax %.3e\n", max_name_width, "uu total", double(buf_min),
+           double(buf_rms), double(buf_max));
+    fprintf(diag_file, "%d %e %e %e %e %e ", step, double(t_step), double(dt), double(buf_min),
+            double(buf_rms), double(buf_max));
+
+    // Calculate rms, min and max from the variables as scalars
+    for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) {
+        buf_max = acReduceScal(RTYPE_MAX, VertexBufferHandle(i));
+        buf_min = acReduceScal(RTYPE_MIN, VertexBufferHandle(i));
+        buf_rms = acReduceScal(RTYPE_RMS, VertexBufferHandle(i));
+
+        printf("  %*s: min %.3e,\trms %.3e,\tmax %.3e\n", max_name_width, vtxbuf_names[i],
+               double(buf_min), double(buf_rms), double(buf_max));
+        fprintf(diag_file, "%e %e %e ", double(buf_min), double(buf_rms), double(buf_max));
+    }
+
+    if ((sink_mass >= AcReal(0.0)) || (accreted_mass >= AcReal(0.0))) {
+        fprintf(diag_file, "%e %e ", double(sink_mass), double(accreted_mass));
+    }
+
+    fprintf(diag_file, "\n");
+}
+
+/*
+    MV NOTE: At the moment I have no clear idea how to calculate magnetic
+    diagnostic variables from grid. Vector potential measures have a limited
+    value. TODO: Smart way to get brms, bmin and bmax.
+*/
+
+int
+run_simulation(const char* config_path)
+{
+    /* Parse configs */
+    AcMeshInfo mesh_info;
+    load_config(config_path, &mesh_info);
+
+    AcMesh* mesh = acmesh_create(mesh_info);
+    // TODO: This need to be possible to define in astaroth.conf
+    acmesh_init_to(INIT_TYPE_GAUSSIAN_RADIAL_EXPL, mesh);
+    // acmesh_init_to(INIT_TYPE_SIMPLE_CORE, mesh); //Initial condition for a collapse test
+
+#if LSINK
+    vertex_buffer_set(VTXBUF_ACCRETION, 0.0, mesh);
+#endif
+
+    // Read old binary if we want to continue from an existing snapshot 
+    // WARNING: Explicit specification of step needed!
+    const int start_step = mesh_info.int_params[AC_start_step];
+    AcReal t_step = 0.0;
+    if (start_step > 0) { 
+        read_mesh(*mesh, start_step, &t_step);
+    }
+ 
+    acInit(mesh_info);
+    acLoad(*mesh);
+
+    FILE* diag_file;
+    diag_file = fopen("timeseries.ts", "a");
+
+    // Generate the title row.
+    if (start_step == 0) {
+        fprintf(diag_file, "step  t_step  dt  uu_total_min  uu_total_rms  uu_total_max  ");
+        for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) {
+            fprintf(diag_file, "%s_min  %s_rms  %s_max  ", vtxbuf_names[i], vtxbuf_names[i],
+                    vtxbuf_names[i]);
+        }
+    }
+#if LSINK
+    fprintf(diag_file, "sink_mass  accreted_mass  ");
+#endif
+    fprintf(diag_file, "\n");
+
+    write_mesh_info(&mesh_info);
+
+    if (start_step == 0) {
+#if LSINK
+        print_diagnostics(0, AcReal(.0), t_step, diag_file, mesh_info.real_params[AC_M_sink_init], 0.0);
+#else
+        print_diagnostics(0, AcReal(.0), t_step, diag_file, -1.0, -1.0);
+#endif
+    }
+
+    acBoundcondStep();
+    acStore(mesh);
+    if (start_step == 0) {
+        save_mesh(*mesh, 0, t_step);
+    }
+
+    const int max_steps      = mesh_info.int_params[AC_max_steps];
+    const int save_steps     = mesh_info.int_params[AC_save_steps];
+    const int bin_save_steps = mesh_info.int_params[AC_bin_steps]; 
+
+    const AcReal max_time   = mesh_info.real_params[AC_max_time]; 
+    const AcReal bin_save_t = mesh_info.real_params[AC_bin_save_t];
+    AcReal bin_crit_t = bin_save_t;
+
+    /* initialize random seed: */
+    srand(312256655);
+
+    /* Step the simulation */
+    AcReal accreted_mass = 0.0;
+    AcReal sink_mass     = 0.0;
+    for (int i = start_step + 1; i < max_steps; ++i) {
+        const AcReal umax = acReduceVec(RTYPE_MAX, VTXBUF_UUX, VTXBUF_UUY, VTXBUF_UUZ);
+        const AcReal dt   = host_timestep(umax, mesh_info);
+
+#if LSINK
+
+        const AcReal sum_mass = acReduceScal(RTYPE_SUM, VTXBUF_ACCRETION);
+        accreted_mass         = accreted_mass + sum_mass;
+        sink_mass             = 0.0;
+        sink_mass             = mesh_info.real_params[AC_M_sink_init] + accreted_mass;
+        acLoadDeviceConstant(AC_M_sink, sink_mass);
+        vertex_buffer_set(VTXBUF_ACCRETION, 0.0, mesh);
+
+        int on_off_switch;
+        if (i < 1) {
+            on_off_switch = 0; // accretion is off till certain amount of steps.
+        }
+        else {
+            on_off_switch = 1;
+        }
+        acLoadDeviceConstant(AC_switch_accretion, on_off_switch);
+#else
+        accreted_mass = -1.0;
+        sink_mass     = -1.0;
+#endif
+
+#if LFORCING
+        const ForcingParams forcing_params = generateForcingParams(mesh_info);
+        loadForcingParamsToDevice(forcing_params);
+#endif
+
+        acIntegrate(dt);
+
+        t_step += dt;
+
+        
+
+        /* Save the simulation state and print diagnostics */
+        if ((i % save_steps) == 0) {
+
+            /*
+                print_diagnostics() writes out both std.out printout from the
+                results and saves the diagnostics into a table for ascii file
+                timeseries.ts.
+            */
+
+            print_diagnostics(i, dt, t_step, diag_file, sink_mass, accreted_mass);
+#if LSINK
+            printf("sink mass is: %.15e \n", double(sink_mass));
+            printf("accreted mass is: %.15e \n", double(accreted_mass));
+#endif
+            /*
+                We would also might want an XY-average calculating funtion,
+                which can be very useful when observing behaviour of turbulent
+                simulations. (TODO)
+            */
+        }
+
+        /* Save the simulation state and print diagnostics */
+        if ((i % bin_save_steps) == 0 || t_step >= bin_crit_t) {
+
+            /*
+                This loop saves the data into simple C binaries which can be
+                used for analysing the data snapshots closely.
+
+                The updated mesh will be located on the GPU. Also all calls
+                to the astaroth interface (functions beginning with ac*) are
+                assumed to be asynchronous, so the meshes must be also synchronized
+                before transferring the data to the CPU. Like so:
+
+                acBoundcondStep();
+                acStore(mesh);
+            */
+            acBoundcondStep();
+            acStore(mesh);
+
+            save_mesh(*mesh, i, t_step);
+
+            bin_crit_t += bin_save_t;
+        }
+
+        // End loop if max time reached. 
+        if (max_time > AcReal(0.0)) {
+            if (t_step >= max_time) {
+                printf("Time limit reached! at t = %e \n", double(t_step));
+                break;
+                
+            }
+        }
+
+    }
+
+    //////Save the final snapshot
+    ////acSynchronize();
+    ////acStore(mesh);
+
+    ////save_mesh(*mesh, , t_step);
+
+    acQuit();
+    acmesh_destroy(mesh);
+
+    fclose(diag_file);
+
+    return 0;
+}
--- a/samples/standalone/timer_hires.h
+++ b/samples/standalone/timer_hires.h
@@ -0,0 +1,63 @@
+/*
+    Copyright (C) 2014-2020, Johannes Pekkila, Miikka Vaisala.
+
+    This file is part of Astaroth.
+
+    Astaroth is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    Astaroth is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Astaroth.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+  @file
+ \brief High-resolution timer.
+
+    Usage:
+        Timer t;
+        timer_reset(&t);
+        timer_diff_nsec(t);
+
+    If there are issues, try compiling with -std=gnu11 -lrt
+ */
+#pragma once
+#include <stdio.h> // perror
+#include <time.h>
+
+typedef struct timespec Timer;
+// Contains at least the following members:
+// time_t tv_sec;
+// long tv_nsec;
+
+static inline int
+timer_reset(Timer* t)
+{
+    const int retval = clock_gettime(CLOCK_REALTIME, t);
+    if (retval == -1)
+        perror("clock_gettime failure");
+
+    return retval;
+}
+
+static inline long
+timer_diff_nsec(const Timer start)
+{
+    Timer end;
+    timer_reset(&end);
+    const long diff = (end.tv_sec - start.tv_sec) * 1000000000l + (end.tv_nsec - start.tv_nsec);
+    return diff;
+}
+
+static inline void
+timer_diff_print(const Timer t)
+{
+    printf("Time elapsed: %g ms\n", timer_diff_nsec(t) / 1e6);
+}