Merge branch 'master' into io_improvement_20190924

2019-10-02 10:47:51 +08:00
parent d7bd92ed1b 7d76250f70
commit cbbfd941f8
32 changed files with 848 additions and 167 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -22,6 +22,4 @@ makefile~
 *~

 build/
-src/core/kernels/stencil_assembly.cuh
-src/core/kernels/stencil_process.cuh
 tests/
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -18,23 +18,21 @@ find_program(CMAKE_CXX_COMPILER NAMES $ENV{CXX} g++ PATHS ENV PATH NO_DEFAULT_PA
 ## Project settings
 project(astaroth C CXX)
 set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR})
+set(CMAKE_POSITION_INDEPENDENT_CODE ON)

 ## Options
 option(BUILD_DEBUG              "Builds the program with extensive error checking"          OFF)
 option(BUILD_STANDALONE         "Builds the standalone Astaroth"                            ON)
+option(BUILD_UTILS              "Builds the utility library"                                ON)
 option(BUILD_RT_VISUALIZATION   "Builds the module for real-time visualization using SDL2"  OFF)
 option(BUILD_C_API_TEST         "Builds a C program to test whether the API is conformant"  OFF)
 option(BUILD_MPI_TEST           "Builds a C program to test whether MPI works"              OFF)
 option(DOUBLE_PRECISION         "Generates double precision code"                           OFF)
 option(MULTIGPU_ENABLED         "If enabled, uses all the available GPUs"                   ON)

-## Compile the Astaroth Code compiler
-find_package(FLEX)
-find_package(BISON)
-execute_process (
-    COMMAND ./build_acc.sh
-    WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/acc
-)
+## Compile the Astaroth Code Compiler
+add_subdirectory(acc)
+
 # NOTE: Manually defined DSL_MODULE_DIR must be set relative to the project root, not the actual
 # build directory!
 # NO! ../acc/mhd_solver
@@ -50,12 +48,13 @@ set(DSL_HEADERS "${PROJECT_BINARY_DIR}/stencil_assembly.cuh"
                "${PROJECT_BINARY_DIR}/stencil_process.cuh"
                "${PROJECT_BINARY_DIR}/stencil_defines.h")
 add_custom_command (
-    COMMENT "Building AC objects ${DSL_MODULE_DIR}"
+    COMMENT "Building ACC objects ${DSL_MODULE_DIR}"
    COMMAND ${CMAKE_SOURCE_DIR}/scripts/compile_acc_module.sh ${DSL_MODULE_DIR}
    DEPENDS ${DSL_SOURCES}
    OUTPUT ${DSL_HEADERS}
 )
 add_custom_target(dsl_headers ALL DEPENDS ${DSL_HEADERS})
+add_dependencies(dsl_headers acc)

 ## Build types
 # Available types (case-sensitive):
@@ -103,3 +102,8 @@ endif ()
 if (BUILD_MPI_TEST)
    add_subdirectory(src/mpitest)
 endif ()
+
+if (BUILD_UTILS)
+    add_subdirectory(src/utils)
+    add_subdirectory(src/exampleproject) # TODO this should not be here
+endif ()
--- a/acc/.gitignore
+++ b/acc/.gitignore
@@ -1,5 +0,0 @@
-build
-testbin
-
-# Except this file
-!.gitignore
--- a/acc/CMakeLists.txt
+++ b/acc/CMakeLists.txt
@@ -0,0 +1,12 @@
+## CMake settings
+cmake_minimum_required (VERSION 3.5.1)
+find_program(CMAKE_C_COMPILER NAMES $ENV{CC} gcc PATHS ENV PATH NO_DEFAULT_PATH)
+
+## Project settings
+project(acc C)
+set(CMAKE_C_STANDARD 11)
+set(CMAKE_C_STANDARD_REQUIRED ON)
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR})
+
+include_directories(src)
+add_subdirectory(src)
--- a/acc/compile.sh
+++ b/acc/compile.sh
@@ -1,16 +1,16 @@
 #!/bin/bash
-# Usage ./compile <source file> <gcc preprocessor flags, f.ex. -I some/path>
-
+# Usage ./compile <acc binary> <source file> <gcc preprocessor flags, f.ex. -I some/path>
 ACC_DIR=`dirname $0`
+ACC_BINARY=$1

-FULL_NAME=$(basename -- $1)
+FULL_NAME=$(basename -- $2)
 FILENAME="${FULL_NAME%.*}"
 EXTENSION="${FULL_NAME##*.}"

 if [ "${EXTENSION}" = "sas" ]; then
    COMPILE_FLAGS="-sas" # Generate stencil assembly stage
    CUH_FILENAME="stencil_assembly.cuh"
-    echo "-- Generating stencil assembly stage: ${FILENAME}.sas -> ${CUH_FILENAME}"
+    echo "-- Generating stencil assembly stage:    ${FILENAME}.sas -> ${CUH_FILENAME}"
 elif [ "${EXTENSION}" = "sps" ]; then
    COMPILE_FLAGS="-sps" # Generate stencil processing stage
    CUH_FILENAME="stencil_process.cuh"
@@ -18,11 +18,11 @@ elif [ "${EXTENSION}" = "sps" ]; then
 elif [ "${EXTENSION}" = "sdh" ]; then
    COMPILE_FLAGS="-sdh" # Generate stencil definition header
    CUH_FILENAME="stencil_defines.h"
-    echo "-- Generating stencil definition header:  ${FILENAME}.sdh -> ${CUH_FILENAME}"
+    echo "-- Generating stencil definition header: ${FILENAME}.sdh -> ${CUH_FILENAME}"
 else
    echo "-- Error: unknown extension" ${EXTENSION} "of file" ${FULL_NAME}
    echo "-- Extension should be either .sas, .sps or .sdh"
    exit
 fi

-${ACC_DIR}/preprocess.sh $@ | ${ACC_DIR}/build/acc ${COMPILE_FLAGS} > ${CUH_FILENAME}
+${ACC_DIR}/preprocess.sh ${@:2} | ${ACC_BINARY} ${COMPILE_FLAGS} > ${CUH_FILENAME}
--- a/acc/mhd_solver/.gitignore
+++ b/acc/mhd_solver/.gitignore
@@ -1,5 +0,0 @@
-build
-testbin
-
-# Except this file
-!.gitignore
--- a/acc/mhd_solver/stencil_process.sps
+++ b/acc/mhd_solver/stencil_process.sps
@@ -23,7 +23,7 @@ gradients(in VectorField uu)
    return (Matrix){gradient(uu.x), gradient(uu.y), gradient(uu.z)};
 }

-#if LSINK 
+#if LSINK
 Vector
 sink_gravity(int3 globalVertexIdx){
    int accretion_switch = int(AC_switch_accretion);
@@ -35,11 +35,11 @@ sink_gravity(int3 globalVertexIdx){
        const Scalar sink_mass = AC_M_sink;
        const Vector sink_pos = (Vector){AC_sink_pos_x,
                                         AC_sink_pos_y,
-                                         AC_sink_pos_z}; 
+                                         AC_sink_pos_z};
        const Scalar distance = length(grid_pos - sink_pos);
        const Scalar soft = AC_soft;
        //MV: The commit 083ff59 had AC_G_const defined wrong here in DSL making it exxessively strong.
-        //MV: Scalar gravity_magnitude = ... below is correct! 
+        //MV: Scalar gravity_magnitude = ... below is correct!
        const Scalar gravity_magnitude = (AC_G_const * sink_mass) / pow(((distance * distance) +  soft*soft), 1.5);
        const Vector direction = (Vector){(sink_pos.x - grid_pos.x) / distance,
                                          (sink_pos.y - grid_pos.y) / distance,
@@ -60,14 +60,14 @@ truelove_density(in ScalarField lnrho){
    const Scalar rho = exp(value(lnrho));
    const Scalar Jeans_length_squared = (M_PI * AC_cs2_sound) / (AC_G_const * rho);
    const Scalar TJ_rho = ((M_PI) * ((AC_dsx * AC_dsx) / Jeans_length_squared) * AC_cs2_sound) / (AC_G_const * AC_dsx * AC_dsx);
-    //TODO: AC_dsx will cancel out, deal with it later for optimization.      
- 
+    //TODO: AC_dsx will cancel out, deal with it later for optimization.
+
    Scalar accretion_rho = TJ_rho;
-       
+
    return accretion_rho;
 }

-// This controls accretion of density/mass to the sink particle. 
+// This controls accretion of density/mass to the sink particle.
 Scalar
 sink_accretion(int3 globalVertexIdx, in ScalarField lnrho, Scalar dt){
    const Vector grid_pos = (Vector){(globalVertexIdx.x - DCONST(AC_nx_min)) * AC_dsx,
@@ -78,11 +78,11 @@ sink_accretion(int3 globalVertexIdx, in ScalarField lnrho, Scalar dt){
                                     AC_sink_pos_z};
    const Scalar profile_range = AC_accretion_range;
    const Scalar accretion_distance = length(grid_pos - sink_pos);
-    int accretion_switch = AC_switch_accretion; 
-    Scalar accretion_density; 
+    int accretion_switch = AC_switch_accretion;
+    Scalar accretion_density;
    Scalar weight;

-    if (accretion_switch == 1){   
+    if (accretion_switch == 1){
        if ((accretion_distance) <= profile_range){
            //weight = Scalar(1.0);
            //Hann window function
@@ -91,23 +91,23 @@ sink_accretion(int3 globalVertexIdx, in ScalarField lnrho, Scalar dt){
        } else {
            weight = Scalar(0.0);
        }
-        
-        //Truelove criterion is used as a kind of arbitrary density floor. 
+
+        //Truelove criterion is used as a kind of arbitrary density floor.
        const Scalar lnrho_min = log(truelove_density(lnrho));
-        Scalar rate;     
+        Scalar rate;
        if (value(lnrho) > lnrho_min) {
            rate = (exp(value(lnrho)) - exp(lnrho_min)) / dt;
-        } else { 
+        } else {
            rate = Scalar(0.0);
        }
        accretion_density = weight * rate ;
    } else {
-        accretion_density = Scalar(0.0); 
+        accretion_density = Scalar(0.0);
    }
    return accretion_density;
-} 
+}

-// This controls accretion of velocity to the sink particle. 
+// This controls accretion of velocity to the sink particle.
 Vector
 sink_accretion_velocity(int3 globalVertexIdx, in VectorField uu, Scalar dt) {
    const Vector grid_pos = (Vector){(globalVertexIdx.x - DCONST(AC_nx_min)) * AC_dsx,
@@ -117,15 +117,15 @@ sink_accretion_velocity(int3 globalVertexIdx, in VectorField uu, Scalar dt) {
                                     AC_sink_pos_y,
                                     AC_sink_pos_z};
    const Scalar profile_range = AC_accretion_range;
-    const Scalar accretion_distance = length(grid_pos - sink_pos);   
-    int accretion_switch = AC_switch_accretion; 
-    Vector accretion_velocity; 
+    const Scalar accretion_distance = length(grid_pos - sink_pos);
+    int accretion_switch = AC_switch_accretion;
+    Vector accretion_velocity;

    if (accretion_switch == 1){
        Scalar weight;
        // Step function weighting
-        // Arch of a cosine function? 
-        // Cubic spline x^3 - x in range [-0.5 , 0.5] 
+        // Arch of a cosine function?
+        // Cubic spline x^3 - x in range [-0.5 , 0.5]
        if ((accretion_distance) <= profile_range){
            //weight = Scalar(1.0);
            //Hann window function
@@ -136,12 +136,12 @@ sink_accretion_velocity(int3 globalVertexIdx, in VectorField uu, Scalar dt) {
        }


-        Vector rate;     
-        // MV: Could we use divergence here ephasize velocitie which are compressive and 
-        // MV: not absorbins stuff that would not be accreted anyway? 
+        Vector rate;
+        // MV: Could we use divergence here ephasize velocitie which are compressive and
+        // MV: not absorbins stuff that would not be accreted anyway?
        if (length(value(uu)) > Scalar(0.0)) {
            rate = (Scalar(1.0)/dt) * value(uu);
-        } else { 
+        } else {
            rate = (Vector){0.0, 0.0, 0.0};
        }
        accretion_velocity = weight * rate ;
@@ -154,7 +154,7 @@ sink_accretion_velocity(int3 globalVertexIdx, in VectorField uu, Scalar dt) {


 Scalar
-continuity(int3 globalVertexIdx, in VectorField uu, in ScalarField lnrho, Scalar dt) 
+continuity(int3 globalVertexIdx, in VectorField uu, in ScalarField lnrho, Scalar dt)
 {
    return -dot(value(uu), gradient(lnrho))
 #if LUPWD
@@ -162,7 +162,7 @@ continuity(int3 globalVertexIdx, in VectorField uu, in ScalarField lnrho, Scalar
           + upwd_der6(uu, lnrho)
 #endif
 #if LSINK
-	   - sink_accretion(globalVertexIdx, lnrho, dt) / exp(value(lnrho)) 
+	   - sink_accretion(globalVertexIdx, lnrho, dt) / exp(value(lnrho))
 #endif
           - divergence(uu);
 }
@@ -171,33 +171,33 @@ continuity(int3 globalVertexIdx, in VectorField uu, in ScalarField lnrho, Scalar

 #if LENTROPY
 Vector
-momentum(int3 globalVertexIdx, in VectorField uu, in ScalarField lnrho, in ScalarField ss, in VectorField aa, Scalar dt) 
+momentum(int3 globalVertexIdx, in VectorField uu, in ScalarField lnrho, in ScalarField ss, in VectorField aa, Scalar dt)
 {
    const Matrix S   = stress_tensor(uu);
    const Scalar cs2 = AC_cs2_sound * exp(AC_gamma * value(ss) / AC_cp_sound +
                                          (AC_gamma - 1) * (value(lnrho) - AC_lnrho0));
-    const Vector j   = (Scalar(1.) / AC_mu0) *
+    const Vector j   = (Scalar(1.0) / AC_mu0) *
                     (gradient_of_divergence(aa) - laplace_vec(aa)); // Current density
    const Vector B = curl(aa);
    // TODO: DOES INTHERMAL VERSTION INCLUDE THE MAGNETIC FIELD?
-    const Scalar inv_rho = Scalar(1.) / exp(value(lnrho));
+    const Scalar inv_rho = Scalar(1.0) / exp(value(lnrho));

    // Regex replace CPU constants with get\(AC_([a-zA-Z_0-9]*)\)
    // \1
    const Vector mom = -mul(gradients(uu), value(uu)) -
-                       cs2 * ((Scalar(1.) / AC_cp_sound) * gradient(ss) + gradient(lnrho)) +
+                       cs2 * ((Scalar(1.0) / AC_cp_sound) * gradient(ss) + gradient(lnrho)) +
                       inv_rho * cross(j, B) +
                       AC_nu_visc *
-                           (laplace_vec(uu) + Scalar(1. / 3.) * gradient_of_divergence(uu) +
-                            Scalar(2.) * mul(S, gradient(lnrho))) +
+                           (laplace_vec(uu) + Scalar(1.0 / 3.0) * gradient_of_divergence(uu) +
+                            Scalar(2.0) * mul(S, gradient(lnrho))) +
                       AC_zeta * gradient_of_divergence(uu)
    #if LSINK
                       //Gravity term
-                       + sink_gravity(globalVertexIdx) 
+                       + sink_gravity(globalVertexIdx)
                       //Corresponding loss of momentum
                       -     //(Scalar(1.0) / Scalar( (AC_dsx*AC_dsy*AC_dsz) * exp(value(lnrho)))) *  // Correction factor by unit mass
    	                 sink_accretion_velocity(globalVertexIdx, uu, dt) // As in Lee et al.(2014)
-                       ; 
+                       ;
    #else
                       ;
    #endif
@@ -205,7 +205,7 @@ momentum(int3 globalVertexIdx, in VectorField uu, in ScalarField lnrho, in Scala
 }
 #elif LTEMPERATURE
 Vector
-momentum(int3 globalVertexIdx, in VectorField uu, in ScalarField lnrho, in ScalarField tt) 
+momentum(int3 globalVertexIdx, in VectorField uu, in ScalarField lnrho, in ScalarField tt)
 {
    Vector mom;

@@ -215,8 +215,8 @@ momentum(int3 globalVertexIdx, in VectorField uu, in ScalarField lnrho, in Scala
                                 (gradient(tt) + value(tt) * gradient(lnrho));

    mom = -mul(gradients(uu), value(uu)) - pressure_term +
-          AC_nu_visc * (laplace_vec(uu) + Scalar(1. / 3.) * gradient_of_divergence(uu) +
-                        Scalar(2.) * mul(S, gradient(lnrho))) +
+          AC_nu_visc * (laplace_vec(uu) + Scalar(1.0 / 3.0) * gradient_of_divergence(uu) +
+                        Scalar(2.0) * mul(S, gradient(lnrho))) +
          AC_zeta * gradient_of_divergence(uu)
    #if LSINK
          + sink_gravity(globalVertexIdx);
@@ -231,7 +231,7 @@ momentum(int3 globalVertexIdx, in VectorField uu, in ScalarField lnrho, in Scala
 }
 #else
 Vector
-momentum(int3 globalVertexIdx, in VectorField uu, in ScalarField lnrho, Scalar dt) 
+momentum(int3 globalVertexIdx, in VectorField uu, in ScalarField lnrho, Scalar dt)
 {
    Vector mom;

@@ -240,15 +240,15 @@ momentum(int3 globalVertexIdx, in VectorField uu, in ScalarField lnrho, Scalar d
    // Isothermal: we have constant speed of sound

    mom = -mul(gradients(uu), value(uu)) - AC_cs2_sound * gradient(lnrho) +
-          AC_nu_visc * (laplace_vec(uu) + Scalar(1. / 3.) * gradient_of_divergence(uu) +
-                        Scalar(2.) * mul(S, gradient(lnrho))) +
+          AC_nu_visc * (laplace_vec(uu) + Scalar(1.0 / 3.0) * gradient_of_divergence(uu) +
+                        Scalar(2.0) * mul(S, gradient(lnrho))) +
          AC_zeta * gradient_of_divergence(uu)
    #if LSINK
          + sink_gravity(globalVertexIdx)
          //Corresponding loss of momentum
          -     //(Scalar(1.0) / Scalar( (AC_dsx*AC_dsy*AC_dsz) * exp(value(lnrho)))) *  // Correction factor by unit mass
    	    sink_accretion_velocity(globalVertexIdx, uu, dt) // As in Lee et al.(2014)
-                                              ; 
+                                              ;
    #else
                                              ;
    #endif
@@ -283,7 +283,7 @@ Scalar
 lnT(in ScalarField ss, in ScalarField lnrho)
 {
    const Scalar lnT = AC_lnT0 + AC_gamma * value(ss) / AC_cp_sound +
-                       (AC_gamma - Scalar(1.)) * (value(lnrho) - AC_lnrho0);
+                       (AC_gamma - Scalar(1.0)) * (value(lnrho) - AC_lnrho0);
    return lnT;
 }

@@ -291,14 +291,14 @@ lnT(in ScalarField ss, in ScalarField lnrho)
 Scalar
 heat_conduction(in ScalarField ss, in ScalarField lnrho)
 {
-    const Scalar inv_AC_cp_sound = AcReal(1.) / AC_cp_sound;
+    const Scalar inv_AC_cp_sound = AcReal(1.0) / AC_cp_sound;

    const Vector grad_ln_chi = -gradient(lnrho);

    const Scalar first_term = AC_gamma * inv_AC_cp_sound * laplace(ss) +
-                              (AC_gamma - AcReal(1.)) * laplace(lnrho);
+                              (AC_gamma - AcReal(1.0)) * laplace(lnrho);
    const Vector second_term = AC_gamma * inv_AC_cp_sound * gradient(ss) +
-                               (AC_gamma - AcReal(1.)) * gradient(lnrho);
+                               (AC_gamma - AcReal(1.0)) * gradient(lnrho);
    const Vector third_term = AC_gamma * (inv_AC_cp_sound * gradient(ss) + gradient(lnrho)) +
                              grad_ln_chi;

@@ -316,11 +316,11 @@ Scalar
 entropy(in ScalarField ss, in VectorField uu, in ScalarField lnrho, in VectorField aa)
 {
    const Matrix S      = stress_tensor(uu);
-    const Scalar inv_pT = Scalar(1.) / (exp(value(lnrho)) * exp(lnT(ss, lnrho)));
-    const Vector j      = (Scalar(1.) / AC_mu0) *
+    const Scalar inv_pT = Scalar(1.0) / (exp(value(lnrho)) * exp(lnT(ss, lnrho)));
+    const Vector j      = (Scalar(1.0) / AC_mu0) *
                     (gradient_of_divergence(aa) - laplace_vec(aa)); // Current density
    const Scalar RHS = H_CONST - C_CONST + AC_eta * (AC_mu0)*dot(j, j) +
-                       Scalar(2.) * exp(value(lnrho)) * AC_nu_visc * contract(S) +
+                       Scalar(2.0) * exp(value(lnrho)) * AC_nu_visc * contract(S) +
                       AC_zeta * exp(value(lnrho)) * divergence(uu) * divergence(uu);

    return -dot(value(uu), gradient(ss)) + inv_pT * RHS + heat_conduction(ss, lnrho);
@@ -335,7 +335,7 @@ heat_transfer(in VectorField uu, in ScalarField lnrho, in ScalarField tt)
    const Scalar heat_diffusivity_k = 0.0008; // 8e-4;
    return -dot(value(uu), gradient(tt)) + heat_diffusivity_k * laplace(tt) +
           heat_diffusivity_k * dot(gradient(lnrho), gradient(tt)) +
-           AC_nu_visc * contract(S) * (Scalar(1.) / AC_cv_sound) -
+           AC_nu_visc * contract(S) * (Scalar(1.0) / AC_cv_sound) -
           (AC_gamma - 1) * value(tt) * divergence(uu);
 }
 #endif
@@ -343,18 +343,18 @@ heat_transfer(in VectorField uu, in ScalarField lnrho, in ScalarField tt)
 #if LFORCING
 Vector
    simple_vortex_forcing(Vector a, Vector b, Scalar magnitude){
-    int accretion_switch = AC_switch_accretion; 
-    
+    int accretion_switch = AC_switch_accretion;
+
    if (accretion_switch == 0){
        return magnitude * cross(normalized(b - a), (Vector){ 0, 0, 1}); // Vortex
-    } else { 
-        return (Vector){0,0,0};  
+    } else {
+        return (Vector){0,0,0};
    }
-}        
+}
 Vector
    simple_outward_flow_forcing(Vector a, Vector b, Scalar magnitude){
    int accretion_switch = AC_switch_accretion;
-    if (accretion_switch == 0){            
+    if (accretion_switch == 0){
        return magnitude * (1 / length(b - a)) * normalized(b - a); // Outward flow
    } else {
        return (Vector){0,0,0};
@@ -403,7 +403,7 @@ forcing(int3 globalVertexIdx, Scalar dt)
    int accretion_switch = AC_switch_accretion;
    if (accretion_switch == 0){

-        Vector a = Scalar(.5) * (Vector){globalGridN.x * AC_dsx,
+        Vector a = Scalar(0.5) * (Vector){globalGridN.x * AC_dsx,
                                         globalGridN.y * AC_dsy,
                                         globalGridN.z * AC_dsz}; // source (origin)
        Vector xx = (Vector){(globalVertexIdx.x - DCONST(AC_nx_min)) * AC_dsx,
@@ -411,27 +411,27 @@ forcing(int3 globalVertexIdx, Scalar dt)
                             (globalVertexIdx.z - DCONST(AC_nz_min)) * AC_dsz}; // sink (current index)
        const Scalar cs2 = AC_cs2_sound;
        const Scalar cs = sqrt(cs2);
-    
+
        //Placeholders until determined properly
        Scalar magnitude = AC_forcing_magnitude;
        Scalar phase     = AC_forcing_phase;
        Vector k_force   = (Vector){AC_k_forcex,   AC_k_forcey,   AC_k_forcez};
        Vector ff_re     = (Vector){AC_ff_hel_rex, AC_ff_hel_rey, AC_ff_hel_rez};
        Vector ff_im     = (Vector){AC_ff_hel_imx, AC_ff_hel_imy, AC_ff_hel_imz};
-    
-    
+
+
        //Determine that forcing funtion type at this point.
        //Vector force = simple_vortex_forcing(a, xx, magnitude);
        //Vector force = simple_outward_flow_forcing(a, xx, magnitude);
        Vector force   = helical_forcing(magnitude, k_force, xx, ff_re,ff_im, phase);
-    
+
        //Scaling N = magnitude*cs*sqrt(k*cs/dt)  * dt
        const Scalar NN = cs*sqrt(AC_kaver*cs);
        //MV: Like in the Pencil Code. I don't understandf the logic here.
        force.x = sqrt(dt)*NN*force.x;
        force.y = sqrt(dt)*NN*force.y;
        force.z = sqrt(dt)*NN*force.z;
-    
+
        if (is_valid(force)) { return force; }
        else                 { return (Vector){0, 0, 0}; }
    } else {
@@ -496,7 +496,7 @@ solve()

 #if LSINK
    out_accretion = rk3(out_accretion, accretion, sink_accretion(globalVertexIdx, lnrho, dt), dt);// unit now is rho!
-          
+
    if (step_number == 2) {
        out_accretion = out_accretion * AC_dsx * AC_dsy * AC_dsz;// unit is now mass!
    }
--- a/acc/pc_mhd_solver/.gitignore
+++ b/acc/pc_mhd_solver/.gitignore
@@ -1,5 +0,0 @@
-build
-testbin
-
-# Except this file
-!.gitignore
--- a/acc/preprocess.sh
+++ b/acc/preprocess.sh
@@ -1,4 +1,4 @@
 #!/bin/bash
 # Preprocesses the give file using GCC. This script is usually automatically called in
 # ./compile.sh, but may be called also individually for debugging purposes.
-cat $1 | gcc ${@:2} -x c -E - | sed "s/#.*//g"
+cat $1 | gcc ${@:2} -x c -E - | sed "s/#.*//g" | tee ${PWD}/$(basename -- "$1").preprocessed
--- a/acc/src/CMakeLists.txt
+++ b/acc/src/CMakeLists.txt
@@ -0,0 +1,10 @@
+## Compile the Astaroth Code compiler
+find_package(BISON)
+find_package(FLEX 2.5.5 REQUIRED)
+
+bison_target(acc_parser acc.y ${CMAKE_CURRENT_BINARY_DIR}/acc.tab.c)
+flex_target(acc_lexer acc.l ${CMAKE_CURRENT_BINARY_DIR}/acc.yy.c)
+add_flex_bison_dependency(acc_lexer acc_parser)
+
+add_executable(acc code_generator.c ${BISON_acc_parser_OUTPUTS} ${FLEX_acc_lexer_OUTPUTS})
+target_include_directories(acc PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
--- a/acc/src/acc.l
+++ b/acc/src/acc.l
@@ -1,4 +1,5 @@
 %option yylineno
+%option noyywrap

 D [0-9]
 L [a-zA-Z_]
@@ -36,8 +37,9 @@ L [a-zA-Z_]

 "return"    { return RETURN; }

-{D}+"."?{D}*[flud]? { return NUMBER; } /* Literals */
-"."{D}+[flud]?      { return NUMBER; }
+{D}+"."{D}+         { return REAL_NUMBER; } /* Literals */
+{D}+"."{D}+[fd]+    { return NUMBER; }
+{D}+[lu]*           { return NUMBER; }
 {L}({L}|{D})*       { return IDENTIFIER; }
 \"(.)*\"            { return IDENTIFIER; } /* String */

--- a/acc/src/acc.y
+++ b/acc/src/acc.y
@@ -14,7 +14,7 @@ int yyget_lineno();
 %}

 %token CONSTANT IN OUT UNIFORM
-%token IDENTIFIER NUMBER
+%token IDENTIFIER NUMBER REAL_NUMBER
 %token RETURN
 %token SCALAR VECTOR MATRIX SCALARFIELD SCALARARRAY
 %token VOID INT INT3 COMPLEX
@@ -217,7 +217,8 @@ type_specifier: VOID
 identifier: IDENTIFIER                                                                  { $$ = astnode_create(NODE_IDENTIFIER, NULL, NULL); astnode_set_buffer(yytext, $$); }
          ;

-number: NUMBER                                                                          { $$ = astnode_create(NODE_UNKNOWN, NULL, NULL); astnode_set_buffer(yytext, $$); }
+number: REAL_NUMBER                                                                     { $$ = astnode_create(NODE_REAL_NUMBER, NULL, NULL); astnode_set_buffer(yytext, $$); }
+      | NUMBER                                                                          { $$ = astnode_create(NODE_UNKNOWN, NULL, NULL); astnode_set_buffer(yytext, $$); }
      ;

 return: RETURN                                                                          { $$ = astnode_create(NODE_UNKNOWN, NULL, NULL); astnode_set_buffer(yytext, $$); }
@@ -235,4 +236,5 @@ int
 yyerror(const char* str)
 {
    fprintf(stderr, "%s on line %d when processing char %d: [%s]\n", str, yyget_lineno(), *yytext, yytext);
+    return EXIT_FAILURE;
 }
--- a/acc/src/ast.h
+++ b/acc/src/ast.h
@@ -7,14 +7,15 @@
    Statements: return value
                block
 */
-#include <stdlib.h>
 #include <assert.h>
+#include <stdlib.h>

 #define BUFFER_SIZE (4096)

 #define GEN_ID(X) X
 #define GEN_STR(X) #X

+// clang-format off
 #define FOR_NODE_TYPES(FUNC) \
    FUNC(NODE_UNKNOWN), \
    FUNC(NODE_DEFINITION), \
@@ -27,16 +28,18 @@
    FUNC(NODE_FUNCTION_DECLARATION), \
    FUNC(NODE_COMPOUND_STATEMENT), \
    FUNC(NODE_FUNCTION_PARAMETER_DECLARATION), \
-    FUNC(NODE_MULTIDIM_SUBSCRIPT_EXPRESSION)
+    FUNC(NODE_MULTIDIM_SUBSCRIPT_EXPRESSION), \
+    FUNC(NODE_REAL_NUMBER)
+// clang-format on

-/* 
+/*
 // Recreating strdup is not needed when using the GNU compiler.
 // Let's also just say that anything but the GNU
 // compiler is NOT supported, since there are also
-// some gcc-specific calls in the files generated 
+// some gcc-specific calls in the files generated
 // by flex and being completely compiler-independent is
 // not a priority right now
-#ifndef strdup 
+#ifndef strdup
 static inline char*
 strdup(const char* in)
 {
@@ -53,36 +56,32 @@ strdup(const char* in)
 #endif
 */

-typedef enum {
-    FOR_NODE_TYPES(GEN_ID),
-    NUM_NODE_TYPES
-} NodeType;
+typedef enum { FOR_NODE_TYPES(GEN_ID), NUM_NODE_TYPES } NodeType;

 typedef struct astnode_s {
    int id;
    struct astnode_s* lhs;
    struct astnode_s* rhs;
-    NodeType type;          // Type of the AST node
-    char* buffer;           // Indentifiers and other strings (empty by default)
+    NodeType type; // Type of the AST node
+    char* buffer;  // Indentifiers and other strings (empty by default)

-    int token;              // Type of a terminal (that is not a simple char)
-    int prefix;            // Tokens. Also makes the grammar since we don't have
-    int infix;             // to divide it into max two-child rules
-    int postfix;           // (which makes it much harder to read)
+    int token;   // Type of a terminal (that is not a simple char)
+    int prefix;  // Tokens. Also makes the grammar since we don't have
+    int infix;   // to divide it into max two-child rules
+    int postfix; // (which makes it much harder to read)
 } ASTNode;

-
 static inline ASTNode*
 astnode_create(const NodeType type, ASTNode* lhs, ASTNode* rhs)
 {
    ASTNode* node = malloc(sizeof(node[0]));

    static int id_counter = 0;
-    node->id     = id_counter++;
-    node->type   = type;
-    node->lhs    = lhs;
-    node->rhs    = rhs;
-    node->buffer = NULL;
+    node->id              = id_counter++;
+    node->type            = type;
+    node->lhs             = lhs;
+    node->rhs             = rhs;
+    node->buffer          = NULL;

    node->prefix = node->infix = node->postfix = 0;

@@ -107,7 +106,6 @@ astnode_destroy(ASTNode* node)
    free(node);
 }

-
 extern ASTNode* root;

 /*
--- a/acc/src/code_generator.c
+++ b/acc/src/code_generator.c
@@ -163,6 +163,7 @@ static void
 rm_symbol(const int handle)
 {
    assert(handle >= 0 && handle < num_symbols);
+    assert(num_symbols > 0);

    if (&symbol_table[handle] != &symbol_table[num_symbols - 1])
        memcpy(&symbol_table[handle], &symbol_table[num_symbols - 1], sizeof(Symbol));
@@ -179,7 +180,7 @@ print_symbol(const int handle)
                            symbol_table[handle].identifier};
    const size_t num_fields = sizeof(fields) / sizeof(fields[0]);

-    for (int i = 0; i < num_fields; ++i)
+    for (size_t i = 0; i < num_fields; ++i)
        if (fields[i])
            printf("%s ", fields[i]);
 }
@@ -252,19 +253,19 @@ translate_latest_symbol(void)
    }
 }

-static void
+static inline void
 print_symbol_table(void)
 {
    for (int i = 0; i < num_symbols; ++i) {
        printf("%d: ", i);
-        const char* fields[]    = {translate(symbol_table[i].type_qualifier),
+        const char* fields[] = {translate(symbol_table[i].type_qualifier),
                                translate(symbol_table[i].type_specifier),
                                symbol_table[i].identifier};
-        const size_t num_fields = sizeof(fields) / sizeof(fields[0]);

-        for (int i = 0; i < num_fields; ++i)
-            if (fields[i])
-                printf("%s ", fields[i]);
+        const size_t num_fields = sizeof(fields) / sizeof(fields[0]);
+        for (size_t j = 0; j < num_fields; ++j)
+            if (fields[j])
+                printf("%s ", fields[j]);

        if (symbol_table[i].type == SYMBOLTYPE_FUNCTION)
            printf("(function)");
@@ -394,16 +395,29 @@ traverse(const ASTNode* node)
                // Do a regular translation
                if (translate(node->token))
                    printf("%s ", translate(node->token));
-                if (node->buffer)
-                    printf("%s ", node->buffer);
+                if (node->buffer) {
+                    if (node->type == NODE_REAL_NUMBER) {
+                        printf("%s(%s) ", translate(SCALAR),
+                               node->buffer); // Cast to correct precision
+                    }
+                    else {
+                        printf("%s ", node->buffer);
+                    }
+                }
            }
        }
        else {
            // Do a regular translation
            if (translate(node->token))
                printf("%s ", translate(node->token));
-            if (node->buffer)
-                printf("%s ", node->buffer);
+            if (node->buffer) {
+                if (node->type == NODE_REAL_NUMBER) {
+                    printf("%s(%s) ", translate(SCALAR), node->buffer); // Cast to correct precision
+                }
+                else {
+                    printf("%s ", node->buffer);
+                }
+            }
        }
    }

--- a/acc/test_solver/stencil_assembly.sas
+++ b/acc/test_solver/stencil_assembly.sas
@@ -0,0 +1,36 @@
+#include "stencil_definition.sdh"
+
+
+//JP NOTE IMPORTANT/////////////////////////////////////////////////////////////////////////////////
+// These functions are defined here temporarily.
+//
+// Currently the built-in functions (derx, derxx etc) are defined in CUDA in integrate.cuh.
+// This is bad. Instead the built-in functions should be defined in the DSL, and be "includable"
+// as a standard DSL library, analogous to f.ex. stdlib.h in C.
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+Preprocessed Scalar
+value(in ScalarField vertex)
+{
+    return vertex[vertexIdx];
+}
+
+Preprocessed Vector
+gradient(in ScalarField vertex)
+{
+    return (Vector){derx(vertexIdx, vertex), dery(vertexIdx, vertex), derz(vertexIdx, vertex)};
+}
+
+Preprocessed Matrix
+hessian(in ScalarField vertex)
+{
+    Matrix hessian;
+
+    hessian.row[0] = (Vector){derxx(vertexIdx, vertex), derxy(vertexIdx, vertex),
+                              derxz(vertexIdx, vertex)};
+    hessian.row[1] = (Vector){hessian.row[0].y, deryy(vertexIdx, vertex), deryz(vertexIdx, vertex)};
+    hessian.row[2] = (Vector){hessian.row[0].z, hessian.row[1].z, derzz(vertexIdx, vertex)};
+
+    return hessian;
+}
--- a/acc/test_solver/stencil_definition.sdh
+++ b/acc/test_solver/stencil_definition.sdh
@@ -0,0 +1,18 @@
+//JP NOTE IMPORTANT/////////////////////////////////////////////////////////////////////////////////
+// AC_dsx etc are defined here temporarily (otherwise does not compile).
+//
+// These should ultimately be defined in the standard DSL libraries.
+// See test_solver/stencil_assembly.sas for more info.
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+uniform Scalar AC_dsx;// TODO include these from the std lib
+uniform Scalar AC_dsy;
+uniform Scalar AC_dsz;
+uniform Scalar AC_inv_dsx;
+uniform Scalar AC_inv_dsy;
+uniform Scalar AC_inv_dsz;
+uniform Scalar AC_dt;
+
+uniform ScalarField VTXBUF_A;
+uniform ScalarField VTXBUF_B;
+uniform ScalarField VTXBUF_C;
--- a/acc/test_solver/stencil_process.sps
+++ b/acc/test_solver/stencil_process.sps
@@ -0,0 +1,18 @@
+#include "stencil_definition.sdh"
+
+Vector
+value(in VectorField uu)
+{
+    return (Vector){value(uu.x), value(uu.y), value(uu.z)};
+}
+
+in VectorField uu(VTXBUF_A, VTXBUF_B, VTXBUF_C);
+out VectorField out_uu(VTXBUF_A, VTXBUF_B, VTXBUF_C);
+
+Kernel void
+solve()
+{
+    Scalar dt = AC_dt;
+    Vector rate_of_change = (Vector){1, 2, 3};
+    out_uu = rk3(out_uu, uu, rate_of_change, dt);
+}
--- a/doc/Astaroth_API_specification_and_user_manual/API_specification_and_user_manual.md
+++ b/doc/Astaroth_API_specification_and_user_manual/API_specification_and_user_manual.md
@@ -200,13 +200,13 @@ AcResult acDeviceTransferVertexBufferWithOffset(const Device src_device, const S

 Loading uniforms (device constants)
 ```C
-AcResult acDeviceLoadScalarConstant(const Device device, const Stream stream,
+AcResult acDeviceLoadScalarUniform(const Device device, const Stream stream,
                                    const AcRealParam param, const AcReal value);
-AcResult acDeviceLoadVectorConstant(const Device device, const Stream stream,
+AcResult acDeviceLoadVectorUniform(const Device device, const Stream stream,
                                    const AcReal3Param param, const AcReal3 value);
-AcResult acDeviceLoadIntConstant(const Device device, const Stream stream, const AcIntParam param,
+AcResult acDeviceLoadIntUniform(const Device device, const Stream stream, const AcIntParam param,
                                 const int value);
-AcResult acDeviceLoadInt3Constant(const Device device, const Stream stream, const AcInt3Param param,
+AcResult acDeviceLoadInt3Uniform(const Device device, const Stream stream, const AcInt3Param param,
                                  const int3 value);
 AcResult acDeviceLoadScalarArray(const Device device, const Stream stream,
                                 const ScalarArrayHandle handle, const AcReal* data,
@@ -462,7 +462,13 @@ function `Kernel solve()` can be called with `acDeviceKernel_solve()` via the AP

 # Astaroth Domain-Specific Language

-We designed the Astaroth Domain-specific Language (DSL) for expressing stencil computations in a high-level language that can be translated into efficient GPU kernels. The benefits of creating a DSL are two-fold. First, scientists using the language can focus on developing solvers and mathematical models using an easy-to-use language, while still achieving performance close to handwritten code. Second, procedures written in the DSL are decoupled from implementation, which allows us to extend the DSL compiler, say, to generate optimized code for several hardware generations without the users having to modify existing DSL sources.
+We designed the Astaroth Domain-specific Language (DSL) for expressing stencil computations in a
+high-level language that can be translated into efficient GPU kernels. The benefits of creating a
+DSL are two-fold. First, scientists using the language can focus on developing solvers and
+mathematical models using an easy-to-use language, while still achieving performance close to
+handwritten code. Second, procedures written in the DSL are decoupled from implementation, which
+allows us to extend the DSL compiler, say, to generate optimized code for several hardware
+generations without the users having to modify existing DSL sources.

 ## Overview

@@ -506,24 +512,217 @@ In addition to basic datatypes in C/C++/CUDA, such as int and int3, we provide t
 | ScalarField | An abstraction of a three-dimensional scalar field stored in device memory. Is implemented as a handle to a one-dimensional Scalar array consisting of input and output segments. The data is stored linearly in order i + j * mx + k * mx * my, given some vertex index (i, j, k) and mesh constisting of (mx, my, mz) vertices. | float[2][] or double[2][]                                                                            |
 | VectorField | An abstraction of a three-dimensional vector field stored in device memory. Is implemented as a tuple of three ScalarField handles.                                                                                                                                                                                               | Three distinct float[2][] or double[2][] arrays for each component. Stored as a structure of arrays. |

-## Built-in variables and functions
-
 ## Control flow
-// Runtime constants are as fast as compile-time constants as long as
-// 1) They are not placed in tight loops, especially those that inlcude global memory accesses, that could be unrolled
-// 2) They are not multiplied with each other
-// 3) At least 32 neighboring threads in the x-axis access the same constant

-// Safe and efficient to use as switches
+Conditional statements are expressed with the `if-else` construct. Unlike in C and C++, we require
+that the scope of the `if-else` statement is explicitly declared using braces `{` and `}` in order
+to avoid the ambiguity in the case
+```C
+if (a)
+    b;
+if (c)
+    d;
+else
+    e;
+```

-## Uniforms
-// Device constants
-// Loaded at runtime
+
+The syntax for conditional statements, even if there is only a single `if`, is
+```C
+if (a) {
+    b;
+}
+```

 ## Kernels

-// in and out
+Kernels are small programs executed on the device. Each kernel comprises of all the pipeline stages
+discussed in previous sections. Functions qualified with the type qualifier `Kernel` are analogous
+to `main` functions of host code. 
+
+Kernels must be declared in stencil processing files. DSL kernels can be called from host code
+using the API function
+```C
+AcResult acDeviceKernel_##identifier(const Device device, const Stream stream,
+                                     const int3 start, const int3 end);
+```
+, where ##identifier is the name of the kernel function.
+
+The following built-in variables are available in `Kernel`s.
+| Built-in variable | Description                                                                                                                                                                               |
+|-------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| vertexIdx         | Holds the local index of the currently processed vertex.                                                                                                                                  |
+| globalVertexIdx   | Holds the global index of the currently processed vertex. If there is only single device, then vertexIdx is the same as globalVertexIdx. Otherwise globalVertexIdx is offset accordingly. |
+| globalGridN       | Holds the dimensions of the computational domain.                                                                                                                                         |

 ## Preprocessed functions

-// Reading input fields
+The type qualifier `Preprocessed` indicates which functions should be evaluated immediately when
+entering a `Kernel` function. The return values of `Preprocessed` functions are cached and calling
+these functions during the stencil processing stage is essentially free. As main memory bandwidth is
+significantly slower than on-chip memories and registers, declaring reading-heavy functions as
+`Preprocessed` is critical for obtaining good performance in stencil codes. 
+
+`Preprocessed` functions may only be defined in stencil assembly files.
+
+The built-in variables `vertexIdx`, `globalVertexidx` and `globalGridN` are available in all
+`Preprocessed` functions.
+
+## Uniforms
+
+`Uniform`s are global device variables which stay constant for the duration of a kernel launch.
+`Uniform`s can be updated between kernel launches using the `acLoadScalarUniform` and related functions
+discussed in Section 'Loading and storing'. 
+
+`Uniform`s are declared in stencil definition headers. The header must be included in all files
+which use those uniforms. 
+
+`Uniform`s can be of type `Scalar`, `Vector`, `int`, `int3`, `ScalarField` and `ScalarArray`.
+
+> Note: As of 2019-10-01, the types `ScalarField` (DSL) and `VertexBuffer` (CUDA) are aliases of the
+same type. This naming may be changed in the future. 
+
+> Note: As of 2019-10-01, ScalarFields cannot be declared as uniforms. Instead, one should declare
+each component as a `ScalarField` and use them to construct a `VectorField` during the stencil
+processing stage. For example, `in VectorField(A, B, C);`, where `A`, `B` and `C` are
+`uniform ScalarField`s.
+
+> Note: As of 2019-10-01, `uniform`s cannot be assigned values in the stencil definition headers. 
+Instead, one should load the appropriate values during runtime using the `acLoadScalarUniform` and
+related functions.
+
+
+## Standard libraries
+
+> Not implemented
+
+## Performance considerations
+
+Uniforms are as fast as compile-time constants as long as
+
+0. The halting condition of a tight loop does not depend on an uniform or a variable, as this would prevent unrolling of the loop during compile-time. 
+0. Uniforms are not multiplied with each other. The result should be stored in an auxiliary uniform instead. For example, the result of `nx * ny` should be stored in a new `uniform nxy`
+0. At least 32 neighboring streams in the x-axis access the same `uniform`. That is, the vertices at vertexIdx.x = i... i + 32 should access the same `uniform` where i is a multiple of 32.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
--- a/include/.gitignore
+++ b/include/.gitignore
@@ -1,2 +0,0 @@
-# Ignore the DSL headers
-stencil_defines.h
--- a/include/astaroth_device.h
+++ b/include/astaroth_device.h
@@ -46,19 +46,19 @@ AcResult acDeviceSynchronizeStream(const Device device, const Stream stream);
 AcResult acDeviceSwapBuffers(const Device device);

 /** */
-AcResult acDeviceLoadScalarConstant(const Device device, const Stream stream,
+AcResult acDeviceLoadScalarUniform(const Device device, const Stream stream,
                                    const AcRealParam param, const AcReal value);

 /** */
-AcResult acDeviceLoadVectorConstant(const Device device, const Stream stream,
+AcResult acDeviceLoadVectorUniform(const Device device, const Stream stream,
                                    const AcReal3Param param, const AcReal3 value);

 /** */
-AcResult acDeviceLoadIntConstant(const Device device, const Stream stream, const AcIntParam param,
+AcResult acDeviceLoadIntUniform(const Device device, const Stream stream, const AcIntParam param,
                                 const int value);

 /** */
-AcResult acDeviceLoadInt3Constant(const Device device, const Stream stream, const AcInt3Param param,
+AcResult acDeviceLoadInt3Uniform(const Device device, const Stream stream, const AcInt3Param param,
                                  const int3 value);

 /** */
--- a/scripts/compile_acc_module.sh
+++ b/scripts/compile_acc_module.sh
@@ -1,9 +1,11 @@
 #!/bin/bash
 ACC_DIR=$(readlink -f $(dirname $0)/../acc)
+ACC_BINARY_DIR=$(pwd)/acc
 MODULE_DIR=$(readlink -f $1)

-echo "-- Compiling project in "${MODULE_DIR}
+echo "-- ACC binary dir:  ${ACC_BINARY_DIR}"
+echo "-- ACC project dir: ${MODULE_DIR}"
 for source in ${MODULE_DIR}/*.sas ${MODULE_DIR}/*.sps ${MODULE_DIR}/*.sdh
 do
-    ${ACC_DIR}/compile.sh $source -I ${MODULE_DIR}
+    ${ACC_DIR}/compile.sh ${ACC_BINARY_DIR}/acc $source -I ${MODULE_DIR}
 done
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -11,6 +11,7 @@ set(CUDA_ARCH_FLAGS -gencode arch=compute_37,code=sm_37
                    -gencode arch=compute_50,code=sm_50
                    -gencode arch=compute_60,code=sm_60
                    -gencode arch=compute_61,code=sm_61
+                    -gencode arch=compute_70,code=sm_70
                    -lineinfo
                    -ftz=true # Flush denormalized floats to zero
                    -std=c++11
--- a/src/core/device.cu
+++ b/src/core/device.cu
@@ -357,7 +357,7 @@ acDeviceAutoOptimize(const Device device)

                cudaEventRecord(tstart); // ---------------------------------------- Timing start

-                acDeviceLoadScalarConstant(device, STREAM_DEFAULT, AC_dt, FLT_EPSILON);
+                acDeviceLoadScalarUniform(device, STREAM_DEFAULT, AC_dt, FLT_EPSILON);
                for (int i = 0; i < num_iterations; ++i)
                    solve<2><<<bpg, tpb>>>(start, end, device->vba);

@@ -416,7 +416,7 @@ acDeviceSwapBuffers(const Device device)
 }

 AcResult
-acDeviceLoadScalarConstant(const Device device, const Stream stream, const AcRealParam param,
+acDeviceLoadScalarUniform(const Device device, const Stream stream, const AcRealParam param,
                           const AcReal value)
 {
    cudaSetDevice(device->id);
@@ -427,7 +427,7 @@ acDeviceLoadScalarConstant(const Device device, const Stream stream, const AcRea
 }

 AcResult
-acDeviceLoadVectorConstant(const Device device, const Stream stream, const AcReal3Param param,
+acDeviceLoadVectorUniform(const Device device, const Stream stream, const AcReal3Param param,
                           const AcReal3 value)
 {
    cudaSetDevice(device->id);
@@ -438,7 +438,7 @@ acDeviceLoadVectorConstant(const Device device, const Stream stream, const AcRea
 }

 AcResult
-acDeviceLoadIntConstant(const Device device, const Stream stream, const AcIntParam param,
+acDeviceLoadIntUniform(const Device device, const Stream stream, const AcIntParam param,
                        const int value)
 {
    cudaSetDevice(device->id);
@@ -449,7 +449,7 @@ acDeviceLoadIntConstant(const Device device, const Stream stream, const AcIntPar
 }

 AcResult
-acDeviceLoadInt3Constant(const Device device, const Stream stream, const AcInt3Param param,
+acDeviceLoadInt3Uniform(const Device device, const Stream stream, const AcInt3Param param,
                         const int3 value)
 {
    cudaSetDevice(device->id);
@@ -670,7 +670,7 @@ acDeviceIntegrateSubstep(const Device device, const Stream stream, const int ste
                   (unsigned int)ceil(n.y / AcReal(tpb.y)), //
                   (unsigned int)ceil(n.z / AcReal(tpb.z)));

-    acDeviceLoadScalarConstant(device, stream, AC_dt, dt);
+    acDeviceLoadScalarUniform(device, stream, AC_dt, dt);
    if (step_number == 0)
        solve<0><<<bpg, tpb, 0, device->streams[stream]>>>(start, end, device->vba);
    else if (step_number == 1)
--- a/src/core/kernels/.gitignore
+++ b/src/core/kernels/.gitignore
@@ -1,2 +0,0 @@
-# Ignore the generated headers
-stencil_process.cuh stencil_assembly.cuh
--- a/src/core/node.cu
+++ b/src/core/node.cu
@@ -429,7 +429,7 @@ acNodeLoadConstant(const Node node, const Stream stream, const AcRealParam param
    acNodeSynchronizeStream(node, stream);
    // #pragma omp parallel for
    for (int i = 0; i < node->num_devices; ++i) {
-        acDeviceLoadScalarConstant(node->devices[i], stream, param, value);
+        acDeviceLoadScalarUniform(node->devices[i], stream, param, value);
    }
    return AC_SUCCESS;
 }
--- a/src/exampleproject/CMakeLists.txt
+++ b/src/exampleproject/CMakeLists.txt
@@ -0,0 +1,11 @@
+## C++ standard
+set(CMAKE_CXX_STANDARD 11)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+
+## Compilation flags
+add_compile_options(-Wall -Wextra -Werror -Wdouble-promotion -Wfloat-conversion -Wshadow)
+
+## Compile and link
+add_executable(ac_simulate simulation.cc)
+target_link_libraries(ac_simulate PRIVATE astaroth_core astaroth_utils)
+add_definitions(-DAC_DEFAULT_CONFIG="${CMAKE_SOURCE_DIR}/config/astaroth.conf")
--- a/src/exampleproject/simulation.cc
+++ b/src/exampleproject/simulation.cc
@@ -0,0 +1,77 @@
+/*
+    Copyright (C) 2014-2019, Johannes Pekkilae, Miikka Vaeisalae.
+
+    This file is part of Astaroth.
+
+    Astaroth is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    Astaroth is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Astaroth.  If not, see <http://www.gnu.org/licenses/>.
+*/
+#include "src/utils/config_loader.h"
+#include "src/utils/memory.h"
+
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+
+int
+run_simulation(const char* config_path)
+{
+    AcMeshInfo info;
+    acLoadConfig(config_path, &info);
+
+    AcMesh mesh;
+    acMeshCreate(info, &mesh);
+    acMeshClear(&mesh);
+
+    acInit(info);
+    acLoad(mesh);
+
+    const size_t num_steps = 10;
+    for (size_t i = 0; i < num_steps; ++i) {
+        const AcReal dt = 1; // JP: TODO! Set timestep here!
+
+        // JP: TODO! Make sure that AcMeshInfo parameters are properly initialized before calling
+        // acIntegrate()
+        // NANs indicate that either dt is too large or something was uninitalized
+        acIntegrate(dt);
+    }
+    for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) {
+        printf("%s:\n", vtxbuf_names[i]);
+        printf("\tmax: %g\n", (double)acReduceScal(RTYPE_MAX, VertexBufferHandle(i)));
+        printf("\tmin: %g\n", (double)acReduceScal(RTYPE_MIN, VertexBufferHandle(i)));
+        printf("\trms: %g\n", (double)acReduceScal(RTYPE_RMS, VertexBufferHandle(i)));
+        printf("\texp rms: %g\n", (double)acReduceScal(RTYPE_RMS_EXP, VertexBufferHandle(i)));
+    }
+    acStore(&mesh);
+    acQuit();
+
+    acMeshDestroy(&mesh);
+    return EXIT_SUCCESS;
+}
+
+int
+main(int argc, char* argv[])
+{
+    printf("Args: \n");
+    for (int i = 0; i < argc; ++i)
+        printf("%d: %s\n", i, argv[i]);
+
+    char* config_path;
+    (argc == 3) ? config_path = strdup(argv[2]) : config_path = strdup(AC_DEFAULT_CONFIG);
+
+    printf("Config path: %s\n", config_path);
+    assert(config_path);
+    run_simulation(config_path);
+    free(config_path);
+    return EXIT_SUCCESS;
+}
--- a/src/utils/CMakeLists.txt
+++ b/src/utils/CMakeLists.txt
@@ -0,0 +1,10 @@
+## C++ standard
+set(CMAKE_C_STANDARD 11)
+set(CMAKE_C_STANDARD_REQUIRED ON)
+
+## Compilation flags
+add_compile_options(-Wall -Wextra -Werror -Wdouble-promotion -Wfloat-conversion -Wshadow)
+
+## Compile
+add_library(astaroth_utils STATIC config_loader.c memory.c)
+target_link_libraries(astaroth_utils PRIVATE astaroth_core)
--- a/src/utils/config_loader.c
+++ b/src/utils/config_loader.c
@@ -0,0 +1,143 @@
+/*
+    Copyright (C) 2014-2019, Johannes Pekkilae, Miikka Vaeisalae.
+
+    This file is part of Astaroth.
+
+    Astaroth is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    Astaroth is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Astaroth.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file
+ * \brief Brief info.
+ *
+ * Detailed info.
+ *
+ */
+#include "config_loader.h"
+
+#include <assert.h>
+#include <limits.h> // UINT_MAX
+#include <math.h>
+#include <stdint.h> // uint8_t, uint32_t
+#include <stdio.h>  // print
+#include <string.h> // memset
+//#include "src/core/math_utils.h"
+
+/**
+ \brief Find the index of the keyword in names
+ \return Index in range 0...n if the keyword is in names. -1 if the keyword was
+ not found.
+ */
+static int
+find_str(const char keyword[], const char* names[], const int n)
+{
+    for (int i = 0; i < n; ++i)
+        if (!strcmp(keyword, names[i]))
+            return i;
+
+    return -1;
+}
+
+static void
+parse_config(const char* path, AcMeshInfo* config)
+{
+    FILE* fp;
+    fp = fopen(path, "r");
+    // For knowing which .conf file will be used
+    printf("Config file path: \n %s \n ", path);
+    assert(fp != NULL);
+
+    const size_t BUF_SIZE = 128;
+    char keyword[BUF_SIZE];
+    char value[BUF_SIZE];
+    int items_matched;
+    while ((items_matched = fscanf(fp, "%s = %s", keyword, value)) != EOF) {
+
+        if (items_matched < 2)
+            continue;
+
+        int idx = -1;
+        if ((idx = find_str(keyword, intparam_names, NUM_INT_PARAMS)) >= 0)
+            config->int_params[idx] = atoi(value);
+        else if ((idx = find_str(keyword, realparam_names, NUM_REAL_PARAMS)) >= 0)
+            config->real_params[idx] = (AcReal)(atof(value));
+    }
+
+    fclose(fp);
+}
+
+void
+update_config(AcMeshInfo* config)
+{
+    config->int_params[AC_mx] = config->int_params[AC_nx] + STENCIL_ORDER;
+    ///////////// PAD TEST
+    // config->int_params[AC_mx] = config->int_params[AC_nx] + STENCIL_ORDER + PAD_SIZE;
+    ///////////// PAD TEST
+    config->int_params[AC_my] = config->int_params[AC_ny] + STENCIL_ORDER;
+    config->int_params[AC_mz] = config->int_params[AC_nz] + STENCIL_ORDER;
+
+    // Bounds for the computational domain, i.e. nx_min <= i < nx_max
+    config->int_params[AC_nx_min] = STENCIL_ORDER / 2;
+    config->int_params[AC_nx_max] = config->int_params[AC_nx_min] + config->int_params[AC_nx];
+    config->int_params[AC_ny_min] = STENCIL_ORDER / 2;
+    config->int_params[AC_ny_max] = config->int_params[AC_ny] + STENCIL_ORDER / 2;
+    config->int_params[AC_nz_min] = STENCIL_ORDER / 2;
+    config->int_params[AC_nz_max] = config->int_params[AC_nz] + STENCIL_ORDER / 2;
+
+    // Spacing
+    config->real_params[AC_inv_dsx] = (AcReal)(1.) / config->real_params[AC_dsx];
+    config->real_params[AC_inv_dsy] = (AcReal)(1.) / config->real_params[AC_dsy];
+    config->real_params[AC_inv_dsz] = (AcReal)(1.) / config->real_params[AC_dsz];
+
+    /* Additional helper params */
+    // Int helpers
+    config->int_params[AC_mxy]  = config->int_params[AC_mx] * config->int_params[AC_my];
+    config->int_params[AC_nxy]  = config->int_params[AC_nx] * config->int_params[AC_ny];
+    config->int_params[AC_nxyz] = config->int_params[AC_nxy] * config->int_params[AC_nz];
+}
+
+/**
+\brief Loads data from astaroth.conf into a config struct.
+\return AC_SUCCESS on success, AC_FAILURE if there are potentially uninitialized values.
+*/
+AcResult
+acLoadConfig(const char* config_path, AcMeshInfo* config)
+{
+    int retval = AC_SUCCESS;
+    assert(config_path);
+
+    // memset reads the second parameter as a byte even though it says int in
+    // the function declaration
+    memset(config, (uint8_t)0xFF, sizeof(*config));
+
+    parse_config(config_path, config);
+    update_config(config);
+#if VERBOSE_PRINTING // Defined in astaroth.h
+    printf("###############################################################\n");
+    printf("Config dimensions recalculated:\n");
+    acPrintMeshInfo(*config);
+    printf("###############################################################\n");
+#endif
+
+    // sizeof(config) must be a multiple of 4 bytes for this to work
+    assert(sizeof(*config) % sizeof(uint32_t) == 0);
+    for (size_t i = 0; i < sizeof(*config) / sizeof(uint32_t); ++i) {
+        if (((uint32_t*)config)[i] == (uint32_t)0xFFFFFFFF) {
+            fprintf(stderr, "Some config values may be uninitialized. "
+                            "See that all are defined in astaroth.conf\n");
+            retval = AC_FAILURE;
+        }
+    }
+    return retval;
+}
--- a/src/utils/config_loader.h
+++ b/src/utils/config_loader.h
@@ -0,0 +1,37 @@
+/*
+    Copyright (C) 2014-2019, Johannes Pekkilae, Miikka Vaeisalae.
+
+    This file is part of Astaroth.
+
+    Astaroth is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    Astaroth is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Astaroth.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file
+ * \brief Functions for loading and updating AcMeshInfo.
+ *
+ */
+#pragma once
+#include "astaroth.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** Loads data from the config file */
+AcResult acLoadConfig(const char* config_path, AcMeshInfo* config);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
--- a/src/utils/memory.c
+++ b/src/utils/memory.c
@@ -0,0 +1,64 @@
+/*
+    Copyright (C) 2014-2019, Johannes Pekkilae, Miikka Vaeisalae.
+
+    This file is part of Astaroth.
+
+    Astaroth is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    Astaroth is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Astaroth.  If not, see <http://www.gnu.org/licenses/>.
+*/
+#include "memory.h"
+
+#include <math.h>
+#include <string.h>
+
+#include "src/core/errchk.h"
+
+AcResult
+acMeshCreate(const AcMeshInfo info, AcMesh* mesh)
+{
+    mesh->info = info;
+
+    const size_t bytes = acVertexBufferSizeBytes(mesh->info);
+    for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w) {
+        mesh->vertex_buffer[w] = malloc(bytes);
+        ERRCHK_ALWAYS(mesh->vertex_buffer[w]);
+    }
+
+    return AC_SUCCESS;
+}
+
+AcResult
+acMeshDestroy(AcMesh* mesh)
+{
+    for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w)
+        free(mesh->vertex_buffer[w]);
+
+    return AC_SUCCESS;
+}
+
+AcResult
+acMeshSet(const AcReal value, AcMesh* mesh)
+{
+    const int n = acVertexBufferSize(mesh->info);
+    for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w)
+        for (int i = 0; i < n; ++i)
+            mesh->vertex_buffer[w][i] = value;
+
+    return AC_SUCCESS;
+}
+
+AcResult
+acMeshClear(AcMesh* mesh)
+{
+    return acMeshSet(0, mesh);
+}
--- a/src/utils/memory.h
+++ b/src/utils/memory.h
@@ -0,0 +1,44 @@
+/*
+    Copyright (C) 2014-2019, Johannes Pekkilae, Miikka Vaeisalae.
+
+    This file is part of Astaroth.
+
+    Astaroth is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    Astaroth is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Astaroth.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * @file
+ * \brief Brief info.
+ *
+ * Detailed info.
+ *
+ */
+#pragma once
+#include "astaroth.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+AcResult acMeshCreate(const AcMeshInfo mesh_info, AcMesh* mesh);
+
+AcResult acMeshDestroy(AcMesh* mesh);
+
+AcResult acMeshSet(const AcReal value, AcMesh* mesh);
+
+AcResult acMeshClear(AcMesh* mesh);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif