diff --git a/src/core/astaroth.cc b/src/core/astaroth.cc
index 3a38240..1c8d0f7 100644
--- a/src/core/astaroth.cc
+++ b/src/core/astaroth.cc
@@ -126,6 +126,15 @@ acReduceVec(const ReductionType rtype, const VertexBufferHandle a, const VertexB
     return result;
 }
 
+AcReal
+acReduceVecScal(const ReductionType rtype, const VertexBufferHandle a, const VertexBufferHandle b,
+            const VertexBufferHandle c, const VertexBufferHandle d)
+{
+    AcReal result;
+    acNodeReduceVecScal(nodes[0], STREAM_DEFAULT, rtype, a, b, c, d, &result);
+    return result;
+}
+
 AcResult
 acStoreWithOffset(const int3 dst, const size_t num_vertices, AcMesh* host_mesh)
 {
diff --git a/src/core/device.cc b/src/core/device.cc
index 3e5515a..a6ec793 100644
--- a/src/core/device.cc
+++ b/src/core/device.cc
@@ -474,6 +474,28 @@ acDeviceReduceVec(const Device device, const Stream stream, const ReductionType
     return AC_SUCCESS;
 }
 
+AcResult
+acDeviceReduceVecScal(const Device device, const Stream stream, const ReductionType rtype,
+                  const VertexBufferHandle vtxbuf0, const VertexBufferHandle vtxbuf1,
+                  const VertexBufferHandle vtxbuf2, const VertexBufferHandle vtxbuf3, AcReal* result)
+{
+    cudaSetDevice(device->id);
+
+    const int3 start = (int3){device->local_config.int_params[AC_nx_min],
+                              device->local_config.int_params[AC_ny_min],
+                              device->local_config.int_params[AC_nz_min]};
+
+    const int3 end = (int3){device->local_config.int_params[AC_nx_max],
+                            device->local_config.int_params[AC_ny_max],
+                            device->local_config.int_params[AC_nz_max]};
+
+    *result = acKernelReduceVecScal(device->streams[stream], rtype, start, end, device->vba.in[vtxbuf0],
+                                    device->vba.in[vtxbuf1], device->vba.in[vtxbuf2], device->vba.in[vtxbuf3],
+                                    device->reduce_scratchpad, device->reduce_result);
+    return AC_SUCCESS;
+}
+
+
 #if AC_MPI_ENABLED
 /**
 Quick overview of the MPI implementation:
@@ -2047,4 +2069,7 @@ acGridReduceVec(const Stream stream, const ReductionType rtype, const VertexBuff
 
     return acMPIReduceScal(local_result, rtype, result);
 }
+
+//MV: for MPI we will need acGridReduceVecScal() to get Alfven speeds etc. TODO 
+
 #endif // AC_MPI_ENABLED
diff --git a/src/core/node.cc b/src/core/node.cc
index 7a0ca03..70c3115 100644
--- a/src/core/node.cc
+++ b/src/core/node.cc
@@ -797,13 +797,13 @@ simple_final_reduce_scal(const Node node, const ReductionType& rtype, const AcRe
 {
     AcReal res = results[0];
     for (int i = 1; i < n; ++i) {
-        if (rtype == RTYPE_MAX) {
+        if (rtype == RTYPE_MAX || rtype == RTYPE_ALFVEN_MAX) {
             res = max(res, results[i]);
         }
-        else if (rtype == RTYPE_MIN) {
+        else if (rtype == RTYPE_MIN || rtype == RTYPE_ALFVEN_MIN) {
             res = min(res, results[i]);
         }
-        else if (rtype == RTYPE_RMS || rtype == RTYPE_RMS_EXP || rtype == RTYPE_SUM) {
+        else if (rtype == RTYPE_RMS || rtype == RTYPE_RMS_EXP || rtype == RTYPE_SUM || rtype == RTYPE_ALFVEN_RMS) {
             res = sum(res, results[i]);
         }
         else {
@@ -811,7 +811,7 @@ simple_final_reduce_scal(const Node node, const ReductionType& rtype, const AcRe
         }
     }
 
-    if (rtype == RTYPE_RMS || rtype == RTYPE_RMS_EXP) {
+    if (rtype == RTYPE_RMS || rtype == RTYPE_RMS_EXP || rtype == RTYPE_ALFVEN_RMS) {
         const AcReal inv_n = AcReal(1.) / (node->grid.n.x * node->grid.n.y * node->grid.n.z);
         res                = sqrt(inv_n * res);
     }
@@ -850,3 +850,21 @@ acNodeReduceVec(const Node node, const Stream stream, const ReductionType rtype,
     *result = simple_final_reduce_scal(node, rtype, results, node->num_devices);
     return AC_SUCCESS;
 }
+
+AcResult
+acNodeReduceVecScal(const Node node, const Stream stream, const ReductionType rtype,
+                const VertexBufferHandle a, const VertexBufferHandle b, const VertexBufferHandle c,
+                const VertexBufferHandle d, AcReal* result)
+{
+    acNodeSynchronizeStream(node, STREAM_ALL);
+
+    AcReal results[node->num_devices];
+    // #pragma omp parallel for
+    for (int i = 0; i < node->num_devices; ++i) {
+        acDeviceReduceVecScal(node->devices[i], stream, rtype, a, b, c, d, &results[i]);
+    }
+
+    *result = simple_final_reduce_scal(node, rtype, results, node->num_devices);
+    return AC_SUCCESS;
+}
+