Merge branch 'master' into node_device_interface_revision_07-23
This commit is contained in:
@@ -21,9 +21,12 @@ der6x_upwd(in Scalar vertex)
|
||||
|
||||
return (Scalar){ Scalar(1.0/60.0)*inv_ds* (
|
||||
- Scalar(20.0)* vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z]
|
||||
+ Scalar(15.0)*(vertex[vertexIdx.x+1, vertexIdx.y, vertexIdx.z] + vertex[vertexIdx.x-1, vertexIdx.y, vertexIdx.z])
|
||||
- Scalar( 6.0)*(vertex[vertexIdx.x+2, vertexIdx.y, vertexIdx.z] + vertex[vertexIdx.x-2, vertexIdx.y, vertexIdx.z])
|
||||
+ vertex[vertexIdx.x+3, vertexIdx.y, vertexIdx.z] + vertex[vertexIdx.x-3, vertexIdx.y, vertexIdx.z])};
|
||||
+ Scalar(15.0)*(vertex[vertexIdx.x+1, vertexIdx.y, vertexIdx.z]
|
||||
+ vertex[vertexIdx.x-1, vertexIdx.y, vertexIdx.z])
|
||||
- Scalar( 6.0)*(vertex[vertexIdx.x+2, vertexIdx.y, vertexIdx.z]
|
||||
+ vertex[vertexIdx.x-2, vertexIdx.y, vertexIdx.z])
|
||||
+ vertex[vertexIdx.x+3, vertexIdx.y, vertexIdx.z]
|
||||
+ vertex[vertexIdx.x-3, vertexIdx.y, vertexIdx.z])};
|
||||
}
|
||||
|
||||
Preprocessed Scalar
|
||||
@@ -33,9 +36,12 @@ der6y_upwd(in Scalar vertex)
|
||||
|
||||
return (Scalar){ Scalar(1.0/60.0)*inv_ds* (
|
||||
-Scalar( 20.0)* vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z]
|
||||
+Scalar( 15.0)*(vertex[vertexIdx.x, vertexIdx.y+1, vertexIdx.z] + vertex[vertexIdx.x, vertexIdx.y-1, vertexIdx.z])
|
||||
-Scalar( 6.0)*(vertex[vertexIdx.x, vertexIdx.y+2, vertexIdx.z] + vertex[vertexIdx.x, vertexIdx.y-2, vertexIdx.z])
|
||||
+ vertex[vertexIdx.x, vertexIdx.y+3, vertexIdx.z] + vertex[vertexIdx.x, vertexIdx.y-3, vertexIdx.z])};
|
||||
+Scalar( 15.0)*(vertex[vertexIdx.x, vertexIdx.y+1, vertexIdx.z]
|
||||
+ vertex[vertexIdx.x, vertexIdx.y-1, vertexIdx.z])
|
||||
-Scalar( 6.0)*(vertex[vertexIdx.x, vertexIdx.y+2, vertexIdx.z]
|
||||
+ vertex[vertexIdx.x, vertexIdx.y-2, vertexIdx.z])
|
||||
+ vertex[vertexIdx.x, vertexIdx.y+3, vertexIdx.z]
|
||||
+ vertex[vertexIdx.x, vertexIdx.y-3, vertexIdx.z])};
|
||||
}
|
||||
|
||||
Preprocessed Scalar
|
||||
@@ -45,9 +51,12 @@ der6z_upwd(in Scalar vertex)
|
||||
|
||||
return (Scalar){ Scalar(1.0/60.0)*inv_ds* (
|
||||
-Scalar( 20.0)* vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z]
|
||||
+Scalar( 15.0)*(vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z+1] + vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z-1])
|
||||
-Scalar( 6.0)*(vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z+2] + vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z-2])
|
||||
+ vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z+3] + vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z-3])};
|
||||
+Scalar( 15.0)*(vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z+1]
|
||||
+ vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z-1])
|
||||
-Scalar( 6.0)*(vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z+2]
|
||||
+ vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z-2])
|
||||
+ vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z+3]
|
||||
+ vertex[vertexIdx.x, vertexIdx.y, vertexIdx.z-3])};
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@@ -31,7 +31,7 @@
|
||||
#define LENTROPY (1)
|
||||
#define LTEMPERATURE (0)
|
||||
#define LFORCING (1)
|
||||
#define LUPWD (0)
|
||||
#define LUPWD (1)
|
||||
|
||||
#define AC_THERMAL_CONDUCTIVITY (AcReal(0.001)) // TODO: make an actual config parameter
|
||||
|
||||
|
@@ -222,23 +222,26 @@ helical_forcing(Scalar magnitude, Vector k_force, Vector xx, Vector ff_re, Vecto
|
||||
// JP: This looks wrong:
|
||||
// 1) Should it be dsx * nx instead of dsx * ny?
|
||||
// 2) Should you also use globalGrid.n instead of the local n?
|
||||
// MV: You are rigth. Made a quickfix. I did not see the error because multigpu is split
|
||||
// in z direction not y direction.
|
||||
// 3) Also final point: can we do this with vectors/quaternions instead?
|
||||
// Tringonometric functions are much more expensive and inaccurate/
|
||||
// MV: Good idea. No an immediate priority.
|
||||
// Fun related article:
|
||||
// https://randomascii.wordpress.com/2014/10/09/intel-underestimates-error-bounds-by-1-3-quintillion/
|
||||
xx.x = xx.x*(2.0*M_PI/(dsx*(DCONST_INT(AC_ny_max) - DCONST_INT(AC_ny_min))));
|
||||
xx.y = xx.y*(2.0*M_PI/(dsy*(DCONST_INT(AC_ny_max) - DCONST_INT(AC_ny_min))));
|
||||
xx.z = xx.z*(2.0*M_PI/(dsz*(DCONST_INT(AC_ny_max) - DCONST_INT(AC_ny_min))));
|
||||
xx.x = xx.x*(2.0*M_PI/(dsx*globalGrid.n.x));
|
||||
xx.y = xx.y*(2.0*M_PI/(dsy*globalGrid.n.y));
|
||||
xx.z = xx.z*(2.0*M_PI/(dsz*globalGrid.n.z));
|
||||
|
||||
Scalar cos_phi = cos(phi);
|
||||
Scalar sin_phi = sin(phi);
|
||||
Scalar cos_k_dox_x = cos(dot(k_force, xx));
|
||||
Scalar sin_k_dox_x = sin(dot(k_force, xx));
|
||||
Scalar cos_k_dot_x = cos(dot(k_force, xx));
|
||||
Scalar sin_k_dot_x = sin(dot(k_force, xx));
|
||||
// Phase affect only the x-component
|
||||
//Scalar real_comp = cos_k_dox_x;
|
||||
//Scalar imag_comp = sin_k_dox_x;
|
||||
Scalar real_comp_phase = cos_k_dox_x*cos_phi - sin_k_dox_x*sin_phi;
|
||||
Scalar imag_comp_phase = cos_k_dox_x*sin_phi + sin_k_dox_x*cos_phi;
|
||||
//Scalar real_comp = cos_k_dot_x;
|
||||
//Scalar imag_comp = sin_k_dot_x;
|
||||
Scalar real_comp_phase = cos_k_dot_x*cos_phi - sin_k_dot_x*sin_phi;
|
||||
Scalar imag_comp_phase = cos_k_dot_x*sin_phi + sin_k_dot_x*cos_phi;
|
||||
|
||||
|
||||
Vector force = (Vector){ ff_re.x*real_comp_phase - ff_im.x*imag_comp_phase,
|
||||
|
2
doc/manual/.gitignore
vendored
Normal file
2
doc/manual/.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
*.html
|
||||
*.pdf
|
@@ -1,51 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Run this in your build directory (cd build && ../scripts/auto_optimize.sh)
|
||||
# Generates a ${BENCHMARK_FILE} which contains the threadblock dims and other
|
||||
# constants used in the integration in addition to the time used.
|
||||
|
||||
MAX_THREADS=1024 # Max size of the thread block, depends on hardware
|
||||
|
||||
BENCHMARK_FILE="benchmark.out"
|
||||
TBCONFCREATOR_SRC_PATH="../scripts/gen_rk3_threadblockconf.c"
|
||||
TBCONFFILE_DST_PATH="../src/core/kernels"
|
||||
|
||||
C_COMPILER_NAME="gcc"
|
||||
|
||||
rm ${BENCHMARK_FILE}
|
||||
|
||||
for (( tz=2; tz<=8; tz*=2))
|
||||
do
|
||||
for (( ty=1; ty<=1; ty+=1))
|
||||
do
|
||||
for (( tx=16; tx<=64; tx*=2))
|
||||
do
|
||||
|
||||
if ( (${tx}*${ty}*${tz}) > ${MAX_THREADS})
|
||||
then break
|
||||
fi
|
||||
|
||||
for (( launch_bound=1; launch_bound<=8; launch_bound*=2))
|
||||
do
|
||||
for (( elems_per_thread=1; elems_per_thread<=128; elems_per_thread*=2))
|
||||
do
|
||||
# Generate the threadblock configuration
|
||||
${C_COMPILER_NAME} ${TBCONFCREATOR_SRC_PATH} -o gen_rk3_threadblockconf
|
||||
./gen_rk3_threadblockconf ${tx} ${ty} ${tz} ${elems_per_thread} ${launch_bound}
|
||||
rm gen_rk3_threadblockconf
|
||||
mv rk3_threadblock.conf ${TBCONFFILE_DST_PATH}
|
||||
|
||||
# Compile and run the test build
|
||||
cmake -DBUILD_DEBUG=OFF -DDOUBLE_PRECISION=OFF -DAUTO_OPTIMIZE=ON .. && make -j
|
||||
#if ./ac_run -t; then
|
||||
# echo Success
|
||||
./ac_run -b
|
||||
#else
|
||||
# echo fail!
|
||||
#fi
|
||||
done
|
||||
done
|
||||
done
|
||||
done
|
||||
done
|
||||
|
@@ -5,10 +5,13 @@
|
||||
# source ./sourceme.sh
|
||||
# autotest.sh
|
||||
#
|
||||
# If you need slurm or to pass something before ./ac_run, set the variable
|
||||
# If you need slurm or to pass something before ./ac_run, export the variable
|
||||
# SRUN_COMMAND before calling this script.
|
||||
#
|
||||
# F.ex. on Taito SRUN_COMMAND="srun --gres=gpu:k80:4 --mem=24000 -t 00:14:59 -p gputest --cpus-per-task 1 -n 1"
|
||||
# export SRUN_COMMAND
|
||||
# autotest.sh
|
||||
echo "SRUN_COMMAND: " ${SRUN_COMMAND}
|
||||
|
||||
results=()
|
||||
|
||||
|
@@ -43,6 +43,9 @@ typedef struct {
|
||||
ModelScalar value;
|
||||
ModelVector gradient;
|
||||
ModelMatrix hessian;
|
||||
#if LUPWD
|
||||
ModelVector upwind;
|
||||
#endif
|
||||
} ModelScalarData;
|
||||
|
||||
typedef struct {
|
||||
@@ -273,6 +276,53 @@ derzz(const int i, const int j, const int k, const ModelScalar* arr)
|
||||
return second_derivative(pencil, get(AC_inv_dsz));
|
||||
}
|
||||
|
||||
#if LUPWD
|
||||
static inline ModelScalar
|
||||
der6x_upwd(const int i, const int j, const int k, const ModelScalar* arr)
|
||||
{
|
||||
ModelScalar inv_ds = get(AC_inv_dsx);
|
||||
|
||||
return ModelScalar(1.0/60.0)*inv_ds* (
|
||||
-ModelScalar( 20.0)* arr[IDX(i, j, k)]
|
||||
+ModelScalar( 15.0)*(arr[IDX(i+1, j, k)]
|
||||
+ arr[IDX(i-1, j, k)])
|
||||
-ModelScalar( 6.0)*(arr[IDX(i+2, j, k)]
|
||||
+ arr[IDX(i-2, j, k)])
|
||||
+ arr[IDX(i+3, j, k)]
|
||||
+ arr[IDX(i-3, j, k)]);
|
||||
}
|
||||
|
||||
static inline ModelScalar
|
||||
der6y_upwd(const int i, const int j, const int k, const ModelScalar* arr)
|
||||
{
|
||||
ModelScalar inv_ds = get(AC_inv_dsy);
|
||||
|
||||
return ModelScalar(1.0/60.0)*inv_ds* (
|
||||
-ModelScalar( 20.0)* arr[IDX(i, j, k)]
|
||||
+ModelScalar( 15.0)*(arr[IDX(i, j+1, k)]
|
||||
+ arr[IDX(i, j-1, k)])
|
||||
-ModelScalar( 6.0)*(arr[IDX(i, j+2, k)]
|
||||
+ arr[IDX(i, j-2, k)])
|
||||
+ arr[IDX(i, j+3, k)]
|
||||
+ arr[IDX(i, j-3, k)]);
|
||||
}
|
||||
|
||||
static inline ModelScalar
|
||||
der6z_upwd(const int i, const int j, const int k, const ModelScalar* arr)
|
||||
{
|
||||
ModelScalar inv_ds = get(AC_inv_dsz);
|
||||
|
||||
return ModelScalar(1.0/60.0)*inv_ds* (
|
||||
-ModelScalar( 20.0)* arr[IDX(i, j, k )]
|
||||
+ModelScalar( 15.0)*(arr[IDX(i, j, k+1)]
|
||||
+ arr[IDX(i, j, k-1)])
|
||||
-ModelScalar( 6.0)*(arr[IDX(i, j, k+2)]
|
||||
+ arr[IDX(i, j, k-2)])
|
||||
+ arr[IDX(i, j, k+3)]
|
||||
+ arr[IDX(i, j, k-3)]);
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline ModelScalar
|
||||
compute_value(const int i, const int j, const int k, const ModelScalar* arr)
|
||||
{
|
||||
@@ -285,6 +335,14 @@ compute_gradient(const int i, const int j, const int k, const ModelScalar* arr)
|
||||
return (ModelVector){derx(i, j, k, arr), dery(i, j, k, arr), derz(i, j, k, arr)};
|
||||
}
|
||||
|
||||
#if LUPWD
|
||||
static inline ModelVector
|
||||
compute_upwind(const int i, const int j, const int k, const ModelScalar* arr)
|
||||
{
|
||||
return (ModelVector){der6x_upwd(i, j, k, arr), der6y_upwd(i, j, k, arr), der6z_upwd(i, j, k, arr)};
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline ModelMatrix
|
||||
compute_hessian(const int i, const int j, const int k, const ModelScalar* arr)
|
||||
{
|
||||
@@ -309,6 +367,10 @@ read_data(const int i, const int j, const int k, ModelScalar* buf[], const int h
|
||||
// diagonals with all arrays
|
||||
data.hessian = compute_hessian(i, j, k, buf[handle]);
|
||||
|
||||
#if LUPWD
|
||||
data.upwind = compute_upwind(i, j, k, buf[handle]);
|
||||
#endif
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
@@ -354,6 +416,8 @@ gradients(const ModelVectorData& data)
|
||||
return (ModelMatrix){gradient(data.x), gradient(data.y), gradient(data.z)};
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* =============================================================================
|
||||
* Level 0.3 (Built-in functions available during the Stencil Processing Stage)
|
||||
@@ -502,10 +566,27 @@ contract(const ModelMatrix& mat)
|
||||
* Stencil Processing Stage (equations)
|
||||
* =============================================================================
|
||||
*/
|
||||
|
||||
#if LUPWD
|
||||
ModelScalar
|
||||
upwd_der6(const ModelVectorData& uu, const ModelScalarData& lnrho)
|
||||
{
|
||||
ModelScalar uux = fabs(value(uu).x);
|
||||
ModelScalar uuy = fabs(value(uu).y);
|
||||
ModelScalar uuz = fabs(value(uu).z);
|
||||
return uux*lnrho.upwind.x + uuy*lnrho.upwind.y + uuz*lnrho.upwind.z;
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline ModelScalar
|
||||
continuity(const ModelVectorData& uu, const ModelScalarData& lnrho)
|
||||
{
|
||||
return -dot(value(uu), gradient(lnrho)) - divergence(uu);
|
||||
return -dot(value(uu), gradient(lnrho))
|
||||
#if LUPWD
|
||||
//This is a corrective hyperdiffusion term for upwinding.
|
||||
+ upwd_der6(uu, lnrho)
|
||||
#endif
|
||||
- divergence(uu);
|
||||
}
|
||||
|
||||
static inline ModelScalar
|
||||
@@ -679,19 +760,20 @@ helical_forcing(ModelScalar magnitude, ModelVector k_force, ModelVector xx, Mode
|
||||
ModelVector ff_im, ModelScalar phi)
|
||||
{
|
||||
(void)magnitude; // WARNING: unused
|
||||
xx.x = xx.x * (2.0l * M_PI / (get(AC_dsx) * (get(AC_ny_max) - get(AC_ny_min))));
|
||||
xx.y = xx.y * (2.0l * M_PI / (get(AC_dsy) * (get(AC_ny_max) - get(AC_ny_min))));
|
||||
xx.z = xx.z * (2.0l * M_PI / (get(AC_dsz) * (get(AC_ny_max) - get(AC_ny_min))));
|
||||
xx.x = xx.x*(2.0*M_PI/(get(AC_dsx)*get(AC_nx)));
|
||||
xx.y = xx.y*(2.0*M_PI/(get(AC_dsy)*get(AC_ny)));
|
||||
xx.z = xx.z*(2.0*M_PI/(get(AC_dsz)*get(AC_nz)));
|
||||
|
||||
ModelScalar cosl_phi = cosl(phi);
|
||||
ModelScalar sinl_phi = sinl(phi);
|
||||
ModelScalar cosl_k_dox_x = cosl(dot(k_force, xx));
|
||||
ModelScalar sinl_k_dox_x = sinl(dot(k_force, xx));
|
||||
ModelScalar cos_phi = cosl(phi);
|
||||
ModelScalar sin_phi = sinl(phi);
|
||||
ModelScalar cos_k_dot_x = cosl(dot(k_force, xx));
|
||||
ModelScalar sin_k_dot_x = sinl(dot(k_force, xx));
|
||||
// Phase affect only the x-component
|
||||
// ModelScalar real_comp = cosl_k_dox_x;
|
||||
// ModelScalar imag_comp = sinl_k_dox_x;
|
||||
ModelScalar real_comp_phase = cosl_k_dox_x * cosl_phi - sinl_k_dox_x * sinl_phi;
|
||||
ModelScalar imag_comp_phase = cosl_k_dox_x * sinl_phi + sinl_k_dox_x * cosl_phi;
|
||||
//Scalar real_comp = cos_k_dot_x;
|
||||
//Scalar imag_comp = sin_k_dot_x;
|
||||
ModelScalar real_comp_phase = cos_k_dot_x*cos_phi - sin_k_dot_x*sin_phi;
|
||||
ModelScalar imag_comp_phase = cos_k_dot_x*sin_phi + sin_k_dot_x*cos_phi;
|
||||
|
||||
|
||||
ModelVector force = (ModelVector){ ff_re.x*real_comp_phase - ff_im.x*imag_comp_phase,
|
||||
ff_re.y*real_comp_phase - ff_im.y*imag_comp_phase,
|
||||
|
Reference in New Issue
Block a user