Added Astaroth 2.0

This commit is contained in:
jpekkila
2019-06-14 14:18:35 +03:00
parent 4e4f84c8ff
commit 0e48766a68
87 changed files with 18058 additions and 1 deletions

5
acc/.gitignore vendored Normal file
View File

@@ -0,0 +1,5 @@
build
testbin
# Except this file
!.gitignore

42
acc/README.md Normal file
View File

@@ -0,0 +1,42 @@
# Dependencies
## Debian/Ubuntu
`apt install flex bison build-essential`
# Usage
* `./build_acc.sh # Builds the ASPL compiler (acc)`
* `./compile.sh <.sps or .sas source> # Compiles the given stage into CUDA`
* `./test.sh # Tries to compile the sample stages`
* `./clean.sh # Removed directories generated by build_acc.sh and test.sh`
## Example
- `./compile.sh src/stencil_assembly.sas # Generates stencil_assembly.cuh`
- `./compile.sh src/stencil_process.sps # Generates stencil_process.cuh`
# What happens under the hood
The compiler is made of a scanner (flex), parser (bison), implementation of the abstract syntax tree (AST) and a code generator.
The language is defined by tokens and grammars found in acc.l and acc.y. These files are given as input to flex and bison, which generate the scanning and parsing stages for the compiler. The resulting AST is defined in ast.h. Finally, we traverse the generated AST with our code generator, generating CUDA code.
## ACC compilation stages
### In short:
* Preprocess .ac
* Compile preprocessed .ac to .cuh
* Compile .cuh
### More detailed:
0. A Parser is generated: bison --verbose -d acc.y
0. A Scanner is generated: flex acc.l
0. The compiler is built: gcc -std=gnu11 code_generator.c acc.tab.c lex.yy.c -lfl
0. Source files (.sps and .sas) are preprocessed using the GCC preprocessor and cleaned from any residual directives which would be useful when compiling the code further with GCC. We do not need those when compiling with ACC and are not recognized by our grammar.
0. Either the stencil processing stage (.sps) or the stencil assembly stage (.sas) are generated by passing the preprocessed file to acc. This emits the final CUDA code.
0. Compilation is continued with the NVIDIA CUDA compiler
### Even more detailed:
The NVIDIA CUDA compiler compiles .cuh to .fatbin, which is embedded into a C++ binary containig host code of the program. A fatbin contains .cubin files, which contain the configuration of the GPU and the kernels in a streaming assembly code (.sass). We could also compile for a virtual architecture (.ptx) instead of the actual hardware-specific machine code (.cubin) by passing -code=compute_XX flag to nvcc, which would compile cuda sources at runtime (just-in-time compilation, JIT) when creating the CUDA context. However, we alway know which architecture we want to run the code on and JIT compilation would just increase the time to takes to launch the program.
nvcc -DAC_DOUBLE_PRECISION=1 -ptx --relocatable-device-code true -O3 -std=c++11 --maxrregcount=255 -ftz=true -gencode arch=compute_60,code=sm_60 device.cu -I ../../include -I ../../
nvcc -DAC_DOUBLE_PRECISION=1 -cubin --relocatable-device-code true -O3 -std=c++11 --maxrregcount=255 -ftz=true -gencode arch=compute_60,code=sm_60 device.cu -I ../../include -I ../../
cuobjdump --dump-sass device.cubin > device.sass

25
acc/build_acc.sh Executable file
View File

@@ -0,0 +1,25 @@
#!/bin/bash
cd `dirname $0` # Only operate in the same directory with this script
COMPILER_NAME="acc"
SRC_DIR=${PWD}/src
BUILD_DIR=${PWD}/build
echo "Created" ${BUILD_DIR}
mkdir -p ${BUILD_DIR}
cd ${BUILD_DIR}
echo ${BASE_DIR}
echo ${SRC_DIR}
echo ${BUILD_DIR}
# Generate Bison headers
bison --verbose -d ${SRC_DIR}/${COMPILER_NAME}.y
## Generate Flex sources and headers
flex ${SRC_DIR}/${COMPILER_NAME}.l
## Compile the ASPL compiler
gcc -std=gnu11 ${SRC_DIR}/code_generator.c ${COMPILER_NAME}.tab.c lex.yy.c -lfl -I ${BUILD_DIR} -I ${SRC_DIR} -o ${COMPILER_NAME}

5
acc/clean.sh Executable file
View File

@@ -0,0 +1,5 @@
#!/bin/bash
cd `dirname $0` # Only operate in the same directory with this script
rm -rf build testbin

24
acc/compile.sh Executable file
View File

@@ -0,0 +1,24 @@
#!/bin/bash
# Usage ./compile <source file>
ACC_DIR=`dirname $0`
FULL_NAME=$(basename -- $1)
FILENAME="${FULL_NAME%.*}"
EXTENSION="${FULL_NAME##*.}"
if [ "${EXTENSION}" = "sas" ]; then
echo "Generating stencil assembly stage ${FILENAME}.sas -> stencil_assembly.cuh"
COMPILE_FLAGS="-sas" # Generate stencil assembly stage
CUH_FILENAME="stencil_assembly.cuh"
elif [ "${EXTENSION}" = "sps" ]; then
echo "Generating stencil processing stage: ${FILENAME}.sps -> stencil_process.cuh"
COMPILE_FLAGS="-sps" # Generate stencil processing stage
CUH_FILENAME="stencil_process.cuh"
else
echo "Error: unknown extension" ${EXTENSION} "of file" ${FULL_NAME}
echo "Extension should be either .sas or .sps"
exit
fi
${ACC_DIR}/preprocess.sh $1 | ${ACC_DIR}/build/acc ${COMPILE_FLAGS} > ${CUH_FILENAME}

View File

@@ -0,0 +1,26 @@
Preprocessed Scalar
value(in Scalar vertex)
{
return vertex[vertexIdx];
}
Preprocessed Vector
gradient(in Scalar vertex)
{
return (Vector){derx(vertexIdx, vertex),
dery(vertexIdx, vertex),
derz(vertexIdx, vertex)};
}
Preprocessed Matrix
hessian(in Scalar vertex)
{
Matrix hessian;
hessian.row[0] = (Vector){derxx(vertexIdx, vertex), derxy(vertexIdx, vertex), derxz(vertexIdx, vertex)};
hessian.row[1] = (Vector){hessian.row[0].y, deryy(vertexIdx, vertex), deryz(vertexIdx, vertex)};
hessian.row[2] = (Vector){hessian.row[0].z, hessian.row[1].z, derzz(vertexIdx, vertex)};
return hessian;
}

View File

@@ -0,0 +1,265 @@
#define LINDUCTION (1)
#define LENTROPY (1)
#define LTEMPERATURE (0)
#define LGRAVITY (0)
// Declare uniforms (i.e. device constants)
uniform Scalar cs2_sound;
uniform Scalar nu_visc;
uniform Scalar cp_sound;
uniform Scalar cv_sound;
uniform Scalar mu0;
uniform Scalar eta;
uniform Scalar gamma;
uniform Scalar zeta;
uniform int nx_min;
uniform int ny_min;
uniform int nz_min;
uniform int nx;
uniform int ny;
uniform int nz;
Vector
value(in Vector uu)
{
return (Vector){value(uu.x), value(uu.y), value(uu.z)};
}
Matrix
gradients(in Vector uu)
{
return (Matrix){gradient(uu.x), gradient(uu.y), gradient(uu.z)};
}
Scalar
continuity(in Vector uu, in Scalar lnrho) {
return -dot(value(uu), gradient(lnrho)) - divergence(uu);
}
#if LENTROPY
Vector
momentum(in Vector uu, in Scalar lnrho, in Scalar ss, in Vector aa) {
const Matrix S = stress_tensor(uu);
const Scalar cs2 = cs2_sound * exp(gamma * value(ss) / cp_sound + (gamma - 1) * (value(lnrho) - LNRHO0));
const Vector j = (Scalar(1.) / mu0) * (gradient_of_divergence(aa) - laplace_vec(aa)); // Current density
const Vector B = curl(aa);
const Scalar inv_rho = Scalar(1.) / exp(value(lnrho));
// Regex replace CPU constants with get\(AC_([a-zA-Z_0-9]*)\)
// \1
const Vector mom = - mul(gradients(uu), value(uu))
- cs2 * ((Scalar(1.) / cp_sound) * gradient(ss) + gradient(lnrho))
+ inv_rho * cross(j, B)
+ nu_visc * (
laplace_vec(uu)
+ Scalar(1. / 3.) * gradient_of_divergence(uu)
+ Scalar(2.) * mul(S, gradient(lnrho))
)
+ zeta * gradient_of_divergence(uu);
return mom;
}
#elif LTEMPERATURE
Vector
momentum(in Vector uu, in Scalar lnrho, in Scalar tt) {
Vector mom;
const Matrix S = stress_tensor(uu);
const Vector pressure_term = (cp_sound - cv_sound) * (gradient(tt) + value(tt) * gradient(lnrho));
mom = -mul(gradients(uu), value(uu)) -
pressure_term +
nu_visc *
(laplace_vec(uu) + Scalar(1. / 3.) * gradient_of_divergence(uu) +
Scalar(2.) * mul(S, gradient(lnrho))) + zeta * gradient_of_divergence(uu);
#if LGRAVITY
mom = mom - (Vector){0, 0, -10.0};
#endif
return mom;
}
#else
Vector
momentum(in Vector uu, in Scalar lnrho) {
Vector mom;
const Matrix S = stress_tensor(uu);
// Isothermal: we have constant speed of sound
mom = -mul(gradients(uu), value(uu)) -
cs2_sound * gradient(lnrho) +
nu_visc *
(laplace_vec(uu) + Scalar(1. / 3.) * gradient_of_divergence(uu) +
Scalar(2.) * mul(S, gradient(lnrho))) + zeta * gradient_of_divergence(uu);
#if LGRAVITY
mom = mom - (Vector){0, 0, -10.0};
#endif
return mom;
}
#endif
Vector
induction(in Vector uu, in Vector aa) {
// Note: We do (-nabla^2 A + nabla(nabla dot A)) instead of (nabla x (nabla
// x A)) in order to avoid taking the first derivative twice (did the math,
// yes this actually works. See pg.28 in arXiv:astro-ph/0109497)
// u cross B - ETA * mu0 * (mu0^-1 * [- laplace A + grad div A ])
const Vector B = curl(aa);
const Vector grad_div = gradient_of_divergence(aa);
const Vector lap = laplace_vec(aa);
// Note, mu0 is cancelled out
const Vector ind = cross(value(uu), B) - eta * (grad_div - lap);
return ind;
}
#if LENTROPY
Scalar
lnT( in Scalar ss, in Scalar lnrho) {
const Scalar lnT = LNT0 + gamma * value(ss) / cp_sound +
(gamma - Scalar(1.)) * (value(lnrho) - LNRHO0);
return lnT;
}
// Nabla dot (K nabla T) / (rho T)
Scalar
heat_conduction( in Scalar ss, in Scalar lnrho) {
const Scalar inv_cp_sound = AcReal(1.) / cp_sound;
const Vector grad_ln_chi = - gradient(lnrho);
const Scalar first_term = gamma * inv_cp_sound * laplace(ss) +
(gamma - AcReal(1.)) * laplace(lnrho);
const Vector second_term = gamma * inv_cp_sound * gradient(ss) +
(gamma - AcReal(1.)) * gradient(lnrho);
const Vector third_term = gamma * (inv_cp_sound * gradient(ss) +
gradient(lnrho)) + grad_ln_chi;
const Scalar chi = AC_THERMAL_CONDUCTIVITY / (exp(value(lnrho)) * cp_sound);
return cp_sound * chi * (first_term + dot(second_term, third_term));
}
Scalar
heating(const int i, const int j, const int k) {
return 1;
}
Scalar
entropy(in Scalar ss, in Vector uu, in Scalar lnrho, in Vector aa) {
const Matrix S = stress_tensor(uu);
const Scalar inv_pT = Scalar(1.) / (exp(value(lnrho)) * exp(lnT(ss, lnrho)));
const Vector j = (Scalar(1.) / mu0) * (gradient_of_divergence(aa) - laplace_vec(aa)); // Current density
const Scalar RHS = H_CONST - C_CONST
+ eta * (mu0) * dot(j, j)
+ Scalar(2.) * exp(value(lnrho)) * nu_visc * contract(S)
+ zeta * exp(value(lnrho)) * divergence(uu) * divergence(uu);
return - dot(value(uu), gradient(ss))
+ inv_pT * RHS
+ heat_conduction(ss, lnrho);
}
#endif
#if LTEMPERATURE
Scalar
heat_transfer(in Vector uu, in Scalar lnrho, in Scalar tt)
{
const Matrix S = stress_tensor(uu);
const Scalar heat_diffusivity_k = 0.0008; //8e-4;
return -dot(value(uu), gradient(tt)) + heat_diffusivity_k * laplace(tt) + heat_diffusivity_k * dot(gradient(lnrho), gradient(tt)) + nu_visc * contract(S) * (Scalar(1.) / cv_sound) - (gamma - 1) * value(tt) * divergence(uu);
}
#endif
// Declare input and output arrays using locations specified in the
// array enum in astaroth.h
in Scalar lnrho = VTXBUF_LNRHO;
out Scalar out_lnrho = VTXBUF_LNRHO;
in Vector uu = (int3) {VTXBUF_UUX, VTXBUF_UUY, VTXBUF_UUZ};
out Vector out_uu = (int3) {VTXBUF_UUX,VTXBUF_UUY,VTXBUF_UUZ};
#if LINDUCTION
in Vector aa = (int3) {VTXBUF_AX,VTXBUF_AY,VTXBUF_AZ};
out Vector out_aa = (int3) {VTXBUF_AX,VTXBUF_AY,VTXBUF_AZ};
#endif
#if LENTROPY
in Scalar ss = VTXBUF_ENTROPY;
out Scalar out_ss = VTXBUF_ENTROPY;
#endif
#if LTEMPERATURE
in Scalar tt = VTXBUF_TEMPERATURE;
out Scalar out_tt = VTXBUF_TEMPERATURE;
#endif
Kernel void
solve(Scalar dt) {
out_lnrho = rk3(out_lnrho, lnrho, continuity(uu, lnrho), dt);
#if LINDUCTION
out_aa = rk3(out_aa, aa, induction(uu, aa), dt);
#endif
#if LENTROPY
out_uu = rk3(out_uu, uu, momentum(uu, lnrho, ss, aa), dt);
out_ss = rk3(out_ss, ss, entropy(ss, uu, lnrho, aa), dt);
#elif LTEMPERATURE
out_uu =rk3(out_uu, uu, momentum(uu, lnrho, tt), dt);
out_tt = rk3(out_tt, tt, heat_transfer(uu, lnrho, tt), dt);
#else
out_uu = rk3(out_uu, uu, momentum(uu, lnrho), dt);
#endif
}

4
acc/preprocess.sh Executable file
View File

@@ -0,0 +1,4 @@
#!/bin/bash
# Preprocesses the give file using GCC. This script is usually automatically called in
# ./compile.sh, but may be called also individually for debugging purposes.
gcc -E -x c ${@} | sed "s/#.*//g"

View File

@@ -0,0 +1,228 @@
#define LINDUCTION (1)
#define LENTROPY (1)
// Declare uniforms (i.e. device constants)
uniform Scalar cs2_sound;
uniform Scalar nu_visc;
uniform Scalar cp_sound;
uniform Scalar mu0;
uniform Scalar eta;
uniform Scalar gamma;
uniform Scalar chi;
uniform Scalar zeta;
uniform int nx_min;
uniform int ny_min;
uniform int nz_min;
uniform int nx;
uniform int ny;
uniform int nz;
uniform Scalar xorig;
uniform Scalar yorig;
uniform Scalar zorig;
//Star position
uniform Scalar star_pos_x;
uniform Scalar star_pos_z;
uniform Scalar GM_star;
//Needed for gravity
uniform Scalar dsx;
uniform Scalar dsy;
uniform Scalar dsz;
uniform Scalar inv_dsx;
uniform Scalar inv_dsy;
uniform Scalar inv_dsz;
Scalar
distance_x(Vector a, Vector b)
{
return sqrt(dot(a-b, a-b));
}
Vector
value(in Vector uu)
{
return (Vector){value(uu.x), value(uu.y), value(uu.z)};
}
Matrix
gradients(in Vector uu)
{
return (Matrix){gradient(uu.x), gradient(uu.y), gradient(uu.z)};
}
Scalar
continuity(in Vector uu, in Scalar lnrho) {
return -dot(value(uu), gradient(lnrho)) - divergence(uu);
}
// Gravitation for in negative x-direction.
Vector
grav_force_line(const int3 vertexIdx)
{
Vector vertex_pos = (Vector){dsx * vertexIdx.x - xorig, dsy * vertexIdx.y - yorig, dsz * vertexIdx.z - zorig};
Vector star_pos = (Vector){star_pos_x, dsy * vertexIdx.y - yorig, dsz * vertexIdx.z - zorig};
const Scalar RR = vertex_pos.x - star_pos.x;
const Scalar G_force_abs = GM_star / (RR*RR); // Force per unit mass;
Vector G_force = (Vector){ - G_force_abs,
AcReal(0.0),
AcReal(0.0)};
return G_force;
}
#if LENTROPY
Vector
momentum(in Vector uu, in Scalar lnrho, in Scalar ss, in Vector aa, const int3 vertexIdx) {
Vector mom;
const Matrix S = stress_tensor(uu);
mom = -mul(gradients(uu), value(uu)) -
cs2_sound * gradient(lnrho) +
nu_visc *
(laplace_vec(uu) + Scalar(1. / 3.) * gradient_of_divergence(uu) +
Scalar(2.) * mul(S, gradient(lnrho))) + zeta * gradient_of_divergence(uu);
mom = mom - cs2_sound * (Scalar(1.) / cp_sound) * gradient(ss);
const Vector grad_div = gradient_of_divergence(aa);
const Vector lap = laplace_vec(aa);
const Vector j = (Scalar(1.) / mu0) * (grad_div - lap);
const Vector B = curl(aa);
mom = mom + (Scalar(1.) / exp(value(lnrho))) * cross(j, B);
mom = mom + grav_force_line(vertexIdx);
return mom;
}
#else
Vector
momentum(in Vector uu, in Scalar lnrho, const int3 vertexIdx) {
Vector mom;
const Matrix S = stress_tensor(uu);
mom = -mul(gradients(uu), value(uu)) -
cs2_sound * gradient(lnrho) +
nu_visc *
(laplace_vec(uu) + Scalar(1. / 3.) * gradient_of_divergence(uu) +
Scalar(2.) * mul(S, gradient(lnrho))) + zeta * gradient_of_divergence(uu);
mom = mom + grav_force_line(vertexIdx);
return mom;
}
#endif
Vector
induction(in Vector uu, in Vector aa) {
// Note: We do (-nabla^2 A + nabla(nabla dot A)) instead of (nabla x (nabla
// x A)) in order to avoid taking the first derivative twice (did the math,
// yes this actually works. See pg.28 in arXiv:astro-ph/0109497)
// u cross B - ETA * mu0 * (mu0^-1 * [- laplace A + grad div A ])
const Vector B = curl(aa);
const Vector grad_div = gradient_of_divergence(aa);
const Vector lap = laplace_vec(aa);
// Note, mu0 is cancelled out
const Vector ind = cross(value(uu), B) - eta * (grad_div - lap);
return ind;
}
#if LENTROPY
Scalar
lnT( in Scalar ss, in Scalar lnrho) {
const Scalar lnT = LNT0 + value(ss) / cp_sound +
(gamma - AcReal(1.)) * (value(lnrho) - LNRHO0);
return lnT;
}
// Nabla dot (K nabla T) / (rho T)
Scalar
heat_conduction( in Scalar ss, in Scalar lnrho) {
const Scalar inv_cp_sound = AcReal(1.) / cp_sound;
const Vector grad_ln_chi = (Vector) {
0,
0,
0
}; // TODO not used
const Scalar first_term = gamma * inv_cp_sound * laplace(ss) +
(gamma - AcReal(1.)) * laplace(lnrho);
const Vector second_term = gamma * inv_cp_sound * gradient(ss) +
(gamma - AcReal(1.)) * gradient(lnrho);
const Vector third_term = gamma * (inv_cp_sound * gradient(ss) +
gradient(lnrho)) + grad_ln_chi;
return cp_sound * chi * (first_term + dot(second_term, third_term));
}
Scalar
heating(const int i, const int j, const int k) {
return 1;
}
Scalar
entropy(in Scalar ss, in Vector uu, in Scalar lnrho, in Vector aa) {
const Matrix S = stress_tensor(uu);
// nabla x nabla x A / mu0 = nabla(nabla dot A) - nabla^2(A)
const Vector j = gradient_of_divergence(aa) - laplace_vec(aa);
const Scalar inv_pT = AcReal(1.) / (exp(value(lnrho)) + exp(lnT(ss, lnrho)));
return -dot(value(uu), gradient(ss)) +
inv_pT * (H_CONST - C_CONST +
eta * mu0 * dot(j, j) +
AcReal(2.) * exp(value(lnrho)) * nu_visc * contract(S) +
zeta * exp(value(lnrho)) * divergence(uu) * divergence(uu)
) + heat_conduction(ss, lnrho);
}
#endif
// Declare input and output arrays using locations specified in the
// array enum in astaroth.h
in Scalar lnrho = VTXBUF_LNRHO;
out Scalar out_lnrho = VTXBUF_LNRHO;
in Vector uu = (int3) {VTXBUF_UUX, VTXBUF_UUY, VTXBUF_UUZ};
out Vector out_uu = (int3) {VTXBUF_UUX,VTXBUF_UUY,VTXBUF_UUZ};
#if LINDUCTION
in Vector aa = (int3) {VTXBUF_AX,VTXBUF_AY,VTXBUF_AZ};
out Vector out_aa = (int3) {VTXBUF_AX,VTXBUF_AY,VTXBUF_AZ};
#endif
#if LENTROPY
in Scalar ss = VTXBUF_ENTROPY;
out Scalar out_ss = VTXBUF_ENTROPY;
#endif
Kernel void
solve(Scalar dt) {
WRITE(out_lnrho, RK3(out_lnrho, lnrho, continuity(uu, lnrho), dt));
#if LINDUCTION
WRITE(out_aa, RK3(out_aa, aa, induction(uu, aa), dt));
#endif
#if LENTROPY
WRITE(out_uu, RK3(out_uu, uu, momentum(uu, lnrho, ss, aa, vertexIdx), dt));
WRITE(out_ss, RK3(out_ss, ss, entropy(ss, uu, lnrho, aa), dt));
#else
WRITE(out_uu, RK3(out_uu, uu, momentum(uu, lnrho, vertexIdx), dt));
#endif
}

View File

@@ -0,0 +1,169 @@
// Declare uniforms (i.e. device constants)
uniform Scalar cs2_sound;
uniform Scalar nu_visc;
uniform Scalar cp_sound;
uniform Scalar mu0;
uniform Scalar eta;
uniform Scalar gamma;
uniform Scalar chi;
uniform Scalar zeta;
uniform Scalar xorig;
uniform Scalar yorig;
uniform Scalar zorig;
//Star position
uniform Scalar star_pos_x;
uniform Scalar star_pos_z;
uniform Scalar GM_star;
uniform int nx_min;
uniform int ny_min;
uniform int nz_min;
uniform int nx;
uniform int ny;
uniform int nz;
//Needed for gravity
uniform Scalar dsx;
uniform Scalar dsy;
uniform Scalar dsz;
uniform Scalar inv_dsx;
uniform Scalar inv_dsy;
uniform Scalar inv_dsz;
Scalar
distance_x(Vector a, Vector b)
{
return sqrt(dot(a-b, a-b));
}
Vector
value(in Vector uu)
{
return (Vector){value(uu.x), value(uu.y), value(uu.z)};
}
Matrix
gradients(in Vector uu)
{
return (Matrix){gradient(uu.x), gradient(uu.y), gradient(uu.z)};
}
Scalar
continuity(in Vector uu, in Scalar lnrho) {
return -dot(value(uu), gradient(lnrho)) - divergence(uu);
}
// "Line-like" gravity with no y-component
Vector
grav_force_line(const int3 vertexIdx)
{
Vector vertex_pos = (Vector){dsx * vertexIdx.x - xorig, dsy * vertexIdx.y - yorig, dsz * vertexIdx.z - zorig};
Vector star_pos = (Vector){star_pos_x, dsy * vertexIdx.y - yorig, dsz * vertexIdx.z - zorig};
const Scalar RR = vertex_pos.x - star_pos.x;
const Scalar G_force_abs = GM_star / (RR*RR); // Force per unit mass;
Vector G_force = (Vector){ - G_force_abs,
AcReal(0.0),
AcReal(0.0)};
return G_force;
}
Vector
momentum(in Vector uu, in Scalar lnrho, const int3 vertexIdx) {
Vector mom;
const Matrix S = stress_tensor(uu);
mom = -mul(gradients(uu), value(uu)) -
cs2_sound * gradient(lnrho) +
nu_visc *
(laplace_vec(uu) + Scalar(1. / 3.) * gradient_of_divergence(uu) +
Scalar(2.) * mul(S, gradient(lnrho))) + zeta * gradient_of_divergence(uu)
+ grav_force_line(vertexIdx);
return mom;
}
Vector
induction(in Vector uu, in Vector aa) {
// Note: We do (-nabla^2 A + nabla(nabla dot A)) instead of (nabla x (nabla
// x A)) in order to avoid taking the first derivative twice (did the math,
// yes this actually works. See pg.28 in arXiv:astro-ph/0109497)
// u cross B - ETA * mu0 * (mu0^-1 * [- laplace A + grad div A ])
const Vector B = curl(aa);
const Vector grad_div = gradient_of_divergence(aa);
const Vector lap = laplace_vec(aa);
// Note, mu0 is cancelled out
const Vector ind = cross(value(uu), B) - eta * (grad_div - lap);
return ind;
}
// Declare input and output arrays using locations specified in the
// array enum in astaroth.h
in Scalar lnrho = VTXBUF_LNRHO;
out Scalar out_lnrho = VTXBUF_LNRHO;
in Vector uu = (int3) {VTXBUF_UUX, VTXBUF_UUY, VTXBUF_UUZ};
out Vector out_uu = (int3) {VTXBUF_UUX,VTXBUF_UUY,VTXBUF_UUZ};
#if LINDUCTION
in Vector aa = (int3) {VTXBUF_AX,VTXBUF_AY,VTXBUF_AZ};
out Vector out_aa = (int3) {VTXBUF_AX,VTXBUF_AY,VTXBUF_AZ};
#endif
Kernel void
solve(Scalar dt) {
WRITE(out_lnrho, RK3(out_lnrho, lnrho, continuity(uu, lnrho), dt));
#if LINDUCTION
WRITE(out_aa, RK3(out_aa, aa, induction(uu, aa), dt));
#endif
WRITE(out_uu, RK3(out_uu, uu, momentum(uu, lnrho, vertexIdx), dt));
}

View File

@@ -0,0 +1,174 @@
// Declare uniforms (i.e. device constants)
uniform Scalar cs2_sound;
uniform Scalar nu_visc;
uniform Scalar cp_sound;
uniform Scalar mu0;
uniform Scalar eta;
uniform Scalar gamma;
uniform Scalar chi;
uniform Scalar zeta;
uniform Scalar xorig;
uniform Scalar yorig;
uniform Scalar zorig;
//Star position
uniform Scalar star_pos_x;
uniform Scalar star_pos_z;
uniform Scalar GM_star;
uniform int nx_min;
uniform int ny_min;
uniform int nz_min;
uniform int nx;
uniform int ny;
uniform int nz;
//Needed for gravity
uniform Scalar dsx;
uniform Scalar dsy;
uniform Scalar dsz;
uniform Scalar inv_dsx;
uniform Scalar inv_dsy;
uniform Scalar inv_dsz;
Scalar
distance(Vector a, Vector b)
{
return sqrt(dot(a-b, a-b));
}
Vector
value(in Vector uu)
{
return (Vector){value(uu.x), value(uu.y), value(uu.z)};
}
Matrix
gradients(in Vector uu)
{
return (Matrix){gradient(uu.x), gradient(uu.y), gradient(uu.z)};
}
Scalar
continuity(in Vector uu, in Scalar lnrho) {
return -dot(value(uu), gradient(lnrho)) - divergence(uu);
}
// "Line-like" gravity with no y-component
Vector
grav_force_line(const int3 vertexIdx)
{
Vector vertex_pos = (Vector){dsx * vertexIdx.x - xorig, dsy * vertexIdx.y - yorig, dsz * vertexIdx.z - zorig};
//Vector star_pos = (Vector){star_pos_x - xorig, dsy * vertexIdx.y - yorig, star_pos_z - zorig};
Vector star_pos = (Vector){star_pos_x, dsy * vertexIdx.y - yorig, star_pos_z};
//LIKE THIS: Vector star_pos = (Vector){star_pos_x, 0.0, star_pos_z};
const Scalar RR = distance(star_pos, vertex_pos);
const Scalar G_force_abs = GM_star / (RR*RR); // Force per unit mass;
//const Scalar G_force_abs = 1.0; // Simple temp. test;
Vector G_force = (Vector){ - G_force_abs*((vertex_pos.x-star_pos.x)/RR),
AcReal(0.0),
- G_force_abs*((vertex_pos.z-star_pos.z)/RR)};
//printf("G_force %e %e %e", G_force_abs.x, G_force_abs.y, G_force_abs.z)
return G_force;
}
Vector
momentum(in Vector uu, in Scalar lnrho, const int3 vertexIdx) {
Vector mom;
const Matrix S = stress_tensor(uu);
mom = -mul(gradients(uu), value(uu)) -
cs2_sound * gradient(lnrho) +
nu_visc *
(laplace_vec(uu) + Scalar(1. / 3.) * gradient_of_divergence(uu) +
Scalar(2.) * mul(S, gradient(lnrho))) + zeta * gradient_of_divergence(uu)
+ grav_force_line(vertexIdx);
return mom;
}
Vector
induction(in Vector uu, in Vector aa) {
// Note: We do (-nabla^2 A + nabla(nabla dot A)) instead of (nabla x (nabla
// x A)) in order to avoid taking the first derivative twice (did the math,
// yes this actually works. See pg.28 in arXiv:astro-ph/0109497)
// u cross B - ETA * mu0 * (mu0^-1 * [- laplace A + grad div A ])
const Vector B = curl(aa);
const Vector grad_div = gradient_of_divergence(aa);
const Vector lap = laplace_vec(aa);
// Note, mu0 is cancelled out
const Vector ind = cross(value(uu), B) - eta * (grad_div - lap);
return ind;
}
// Declare input and output arrays using locations specified in the
// array enum in astaroth.h
in Scalar lnrho = VTXBUF_LNRHO;
out Scalar out_lnrho = VTXBUF_LNRHO;
in Vector uu = (int3) {VTXBUF_UUX, VTXBUF_UUY, VTXBUF_UUZ};
out Vector out_uu = (int3) {VTXBUF_UUX,VTXBUF_UUY,VTXBUF_UUZ};
#if LINDUCTION
in Vector aa = (int3) {VTXBUF_AX,VTXBUF_AY,VTXBUF_AZ};
out Vector out_aa = (int3) {VTXBUF_AX,VTXBUF_AY,VTXBUF_AZ};
#endif
Kernel void
solve(Scalar dt) {
WRITE(out_lnrho, RK3(out_lnrho, lnrho, continuity(uu, lnrho), dt));
#if LINDUCTION
WRITE(out_aa, RK3(out_aa, aa, induction(uu, aa), dt));
#endif
WRITE(out_uu, RK3(out_uu, uu, momentum(uu, lnrho, vertexIdx), dt));
}

View File

@@ -0,0 +1,233 @@
#define LINDUCTION (1)
#define LENTROPY (1)
// Declare uniforms (i.e. device constants)
uniform Scalar cs2_sound;
uniform Scalar nu_visc;
uniform Scalar cp_sound;
uniform Scalar mu0;
uniform Scalar eta;
uniform Scalar gamma;
uniform Scalar chi;
uniform Scalar zeta;
uniform int nx_min;
uniform int ny_min;
uniform int nz_min;
uniform int nx;
uniform int ny;
uniform int nz;
uniform Scalar xorig;
uniform Scalar yorig;
uniform Scalar zorig;
//Star position
uniform Scalar star_pos_x;
uniform Scalar star_pos_z;
uniform Scalar GM_star;
//Needed for gravity
uniform Scalar dsx;
uniform Scalar dsy;
uniform Scalar dsz;
uniform Scalar inv_dsx;
uniform Scalar inv_dsy;
uniform Scalar inv_dsz;
Scalar
distance_x(Vector a, Vector b)
{
return sqrt(dot(a-b, a-b));
}
Vector
value(in Vector uu)
{
return (Vector){value(uu.x), value(uu.y), value(uu.z)};
}
Matrix
gradients(in Vector uu)
{
return (Matrix){gradient(uu.x), gradient(uu.y), gradient(uu.z)};
}
Scalar
continuity(in Vector uu, in Scalar lnrho) {
return -dot(value(uu), gradient(lnrho)) - divergence(uu);
}
// "Line-like" gravity with no y-component
Vector
grav_force_line(const int3 vertexIdx)
{
Vector vertex_pos = (Vector){dsx * vertexIdx.x - xorig, dsy * vertexIdx.y - yorig, dsz * vertexIdx.z - zorig};
//Vector star_pos = (Vector){star_pos_x - xorig, dsy * vertexIdx.y - yorig, star_pos_z - zorig};
Vector star_pos = (Vector){star_pos_x, dsy * vertexIdx.y - yorig, star_pos_z};
//LIKE THIS: Vector star_pos = (Vector){star_pos_x, 0.0, star_pos_z};
const Scalar RR = distance(star_pos, vertex_pos);
const Scalar G_force_abs = GM_star / (RR*RR); // Force per unit mass;
//const Scalar G_force_abs = 1.0; // Simple temp. test;
Vector G_force = (Vector){ - G_force_abs*((vertex_pos.x-star_pos.x)/RR),
AcReal(0.0),
- G_force_abs*((vertex_pos.z-star_pos.z)/RR)};
//printf("G_force %e %e %e", G_force_abs.x, G_force_abs.y, G_force_abs.z)
return G_force;
}
#if LENTROPY
Vector
momentum(in Vector uu, in Scalar lnrho, in Scalar ss, in Vector aa, const int3 vertexIdx) {
Vector mom;
const Matrix S = stress_tensor(uu);
mom = -mul(gradients(uu), value(uu)) -
cs2_sound * gradient(lnrho) +
nu_visc *
(laplace_vec(uu) + Scalar(1. / 3.) * gradient_of_divergence(uu) +
Scalar(2.) * mul(S, gradient(lnrho))) + zeta * gradient_of_divergence(uu);
mom = mom - cs2_sound * (Scalar(1.) / cp_sound) * gradient(ss);
const Vector grad_div = gradient_of_divergence(aa);
const Vector lap = laplace_vec(aa);
const Vector j = (Scalar(1.) / mu0) * (grad_div - lap);
const Vector B = curl(aa);
mom = mom + (Scalar(1.) / exp(value(lnrho))) * cross(j, B);
mom = mom + grav_force_line(vertexIdx);
return mom;
}
#else
Vector
momentum(in Vector uu, in Scalar lnrho, const int3 vertexIdx) {
Vector mom;
const Matrix S = stress_tensor(uu);
mom = -mul(gradients(uu), value(uu)) -
cs2_sound * gradient(lnrho) +
nu_visc *
(laplace_vec(uu) + Scalar(1. / 3.) * gradient_of_divergence(uu) +
Scalar(2.) * mul(S, gradient(lnrho))) + zeta * gradient_of_divergence(uu);
mom = mom + grav_force_line(vertexIdx);
return mom;
}
#endif
Vector
induction(in Vector uu, in Vector aa) {
// Note: We do (-nabla^2 A + nabla(nabla dot A)) instead of (nabla x (nabla
// x A)) in order to avoid taking the first derivative twice (did the math,
// yes this actually works. See pg.28 in arXiv:astro-ph/0109497)
// u cross B - ETA * mu0 * (mu0^-1 * [- laplace A + grad div A ])
const Vector B = curl(aa);
const Vector grad_div = gradient_of_divergence(aa);
const Vector lap = laplace_vec(aa);
// Note, mu0 is cancelled out
const Vector ind = cross(value(uu), B) - eta * (grad_div - lap);
return ind;
}
#if LENTROPY
Scalar
lnT( in Scalar ss, in Scalar lnrho) {
const Scalar lnT = LNT0 + value(ss) / cp_sound +
(gamma - AcReal(1.)) * (value(lnrho) - LNRHO0);
return lnT;
}
// Nabla dot (K nabla T) / (rho T)
Scalar
heat_conduction( in Scalar ss, in Scalar lnrho) {
const Scalar inv_cp_sound = AcReal(1.) / cp_sound;
const Vector grad_ln_chi = (Vector) {
0,
0,
0
}; // TODO not used
const Scalar first_term = gamma * inv_cp_sound * laplace(ss) +
(gamma - AcReal(1.)) * laplace(lnrho);
const Vector second_term = gamma * inv_cp_sound * gradient(ss) +
(gamma - AcReal(1.)) * gradient(lnrho);
const Vector third_term = gamma * (inv_cp_sound * gradient(ss) +
gradient(lnrho)) + grad_ln_chi;
return cp_sound * chi * (first_term + dot(second_term, third_term));
}
Scalar
heating(const int i, const int j, const int k) {
return 1;
}
Scalar
entropy(in Scalar ss, in Vector uu, in Scalar lnrho, in Vector aa) {
const Matrix S = stress_tensor(uu);
// nabla x nabla x A / mu0 = nabla(nabla dot A) - nabla^2(A)
const Vector j = gradient_of_divergence(aa) - laplace_vec(aa);
const Scalar inv_pT = AcReal(1.) / (exp(value(lnrho)) + exp(lnT(ss, lnrho)));
return -dot(value(uu), gradient(ss)) +
inv_pT * (H_CONST - C_CONST +
eta * mu0 * dot(j, j) +
AcReal(2.) * exp(value(lnrho)) * nu_visc * contract(S) +
zeta * exp(value(lnrho)) * divergence(uu) * divergence(uu)
) + heat_conduction(ss, lnrho);
}
#endif
// Declare input and output arrays using locations specified in the
// array enum in astaroth.h
in Scalar lnrho = VTXBUF_LNRHO;
out Scalar out_lnrho = VTXBUF_LNRHO;
in Vector uu = (int3) {VTXBUF_UUX, VTXBUF_UUY, VTXBUF_UUZ};
out Vector out_uu = (int3) {VTXBUF_UUX,VTXBUF_UUY,VTXBUF_UUZ};
#if LINDUCTION
in Vector aa = (int3) {VTXBUF_AX,VTXBUF_AY,VTXBUF_AZ};
out Vector out_aa = (int3) {VTXBUF_AX,VTXBUF_AY,VTXBUF_AZ};
#endif
#if LENTROPY
in Scalar ss = VTXBUF_ENTROPY;
out Scalar out_ss = VTXBUF_ENTROPY;
#endif
Kernel void
solve(Scalar dt) {
WRITE(out_lnrho, RK3(out_lnrho, lnrho, continuity(uu, lnrho), dt));
#if LINDUCTION
WRITE(out_aa, RK3(out_aa, aa, induction(uu, aa), dt));
#endif
#if LENTROPY
WRITE(out_uu, RK3(out_uu, uu, momentum(uu, lnrho, ss, aa, vertexIdx), dt));
WRITE(out_ss, RK3(out_ss, ss, entropy(ss, uu, lnrho, aa), dt));
#else
WRITE(out_uu, RK3(out_uu, uu, momentum(uu, lnrho, vertexIdx), dt));
#endif
}

422
acc/samples/common_header.h Normal file
View File

@@ -0,0 +1,422 @@
/*
Copyright (C) 2014-2018, Johannes Pekkilae, Miikka Vaeisalae.
This file is part of Astaroth.
Astaroth is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Astaroth is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* @file
* \brief Brief info.
*
* Provides an interface to Astaroth. Contains all the necessary configuration
* structs and functions for running the code on multiple GPUs.
*
* All interface functions declared here (such as acInit()) operate all GPUs
* available in the node under the hood, and the user does not need any
* information about the decomposition, synchronization or such to use these
* functions.
*
*/
#pragma once
/* Prevent name mangling */
#ifdef __cplusplus
extern "C" {
#endif
#include <float.h> // FLT_EPSILON, etc
#include <stdlib.h> // size_t
#include <vector_types.h> // CUDA vector types (float4, etc)
/*
* =============================================================================
* Flags for auto-optimization
* =============================================================================
*/
#define AUTO_OPTIMIZE (0) // DEPRECATED TODO remove
#define BOUNDCONDS_OPTIMIZE (0)
#define GENERATE_BENCHMARK_DATA (0)
// Device info
#define REGISTERS_PER_THREAD (255)
#define MAX_REGISTERS_PER_BLOCK (65536)
#define MAX_THREADS_PER_BLOCK (1024)
#define MAX_TB_DIM (MAX_THREADS_PER_BLOCK)
#define NUM_ITERATIONS (10)
#define WARP_SIZE (32)
/*
* =============================================================================
* Compile-time constants used during simulation (user definable)
* =============================================================================
*/
#define STENCIL_ORDER (6)
///////////// PAD TEST
// NOTE: works only with nx is divisible by 32
//#define PAD_LEAD (32 - STENCIL_ORDER/2)
//#define PAD_SIZE (32 - STENCIL_ORDER)
///////////// PAD TEST
// L-prefix inherited from the old Astaroth, no idea what it means
// MV: L means a Logical switch variale, something having true of false value.
#define LFORCING (0) // Note: forcing is disabled currently in the files generated by acc (compiler of our DSL)
#define LINDUCTION (1)
#define LENTROPY (1)
#define LTEMPERATURE (0)
#define AC_THERMAL_CONDUCTIVITY (AcReal(0.001)) // TODO: make an actual config parameter
/*
* =============================================================================
* Identifiers used to construct the parameter lists for AcMeshInfo
* (IntParamType and RealParamType)
* (user definable)
* =============================================================================
*/
// clang-format off
#define AC_FOR_INT_PARAM_TYPES(FUNC)\
/* cparams */\
FUNC(AC_nx), \
FUNC(AC_ny), \
FUNC(AC_nz), \
FUNC(AC_mx), \
FUNC(AC_my), \
FUNC(AC_mz), \
FUNC(AC_nx_min), \
FUNC(AC_ny_min), \
FUNC(AC_nz_min), \
FUNC(AC_nx_max), \
FUNC(AC_ny_max), \
FUNC(AC_nz_max), \
/* Other */\
FUNC(AC_max_steps), \
FUNC(AC_save_steps), \
FUNC(AC_bin_steps), \
FUNC(AC_bc_type), \
/* Additional */\
FUNC(AC_mxy),\
FUNC(AC_nxy),\
FUNC(AC_nxyz)
#define AC_FOR_REAL_PARAM_TYPES(FUNC)\
/* cparams */\
FUNC(AC_dsx), \
FUNC(AC_dsy), \
FUNC(AC_dsz), \
FUNC(AC_dsmin), \
/* physical grid*/\
FUNC(AC_xlen), \
FUNC(AC_ylen), \
FUNC(AC_zlen), \
FUNC(AC_xorig), \
FUNC(AC_yorig), \
FUNC(AC_zorig), \
/*Physical units*/\
FUNC(AC_unit_density),\
FUNC(AC_unit_velocity),\
FUNC(AC_unit_length),\
/* properties of gravitating star*/\
FUNC(AC_star_pos_x),\
FUNC(AC_star_pos_y),\
FUNC(AC_star_pos_z),\
FUNC(AC_M_star),\
/* Run params */\
FUNC(AC_cdt), \
FUNC(AC_cdtv), \
FUNC(AC_cdts), \
FUNC(AC_nu_visc), \
FUNC(AC_cs_sound), \
FUNC(AC_eta), \
FUNC(AC_mu0), \
FUNC(AC_relhel), \
FUNC(AC_cp_sound), \
FUNC(AC_gamma), \
FUNC(AC_cv_sound), \
FUNC(AC_lnT0), \
FUNC(AC_lnrho0), \
FUNC(AC_zeta), \
FUNC(AC_trans),\
/* Other */\
FUNC(AC_bin_save_t), \
/* Initial condition params */\
FUNC(AC_ampl_lnrho), \
FUNC(AC_ampl_uu), \
FUNC(AC_angl_uu), \
FUNC(AC_lnrho_edge),\
FUNC(AC_lnrho_out),\
/* Additional helper params */\
/* (deduced from other params do not set these directly!) */\
FUNC(AC_G_CONST),\
FUNC(AC_GM_star),\
FUNC(AC_sq2GM_star),\
FUNC(AC_cs2_sound), \
FUNC(AC_inv_dsx), \
FUNC(AC_inv_dsy), \
FUNC(AC_inv_dsz)
// clang-format on
/*
* =============================================================================
* Identifiers for VertexBufferHandle
* (i.e. the arrays used to construct AcMesh)
* (user definable)
* =============================================================================
*/
// clang-format off
#define AC_FOR_HYDRO_VTXBUF_HANDLES(FUNC)\
FUNC(VTXBUF_LNRHO), \
FUNC(VTXBUF_UUX), \
FUNC(VTXBUF_UUY), \
FUNC(VTXBUF_UUZ), \
// FUNC(VTXBUF_DYE),
#if LINDUCTION
#define AC_FOR_INDUCTION_VTXBUF_HANDLES(FUNC)\
FUNC(VTXBUF_AX), \
FUNC(VTXBUF_AY), \
FUNC(VTXBUF_AZ),
#else
#define AC_FOR_INDUCTION_VTXBUF_HANDLES(FUNC)
#endif
#if LENTROPY
#define AC_FOR_ENTROPY_VTXBUF_HANDLES(FUNC)\
FUNC(VTXBUF_ENTROPY),
#else
#define AC_FOR_ENTROPY_VTXBUF_HANDLES(FUNC)
#endif
#if LTEMPERATURE
#define AC_FOR_TEMPERATURE_VTXBUF_HANDLES(FUNC)\
FUNC(VTXBUF_TEMPERATURE),
#else
#define AC_FOR_TEMPERATURE_VTXBUF_HANDLES(FUNC)
#endif
#define AC_FOR_VTXBUF_HANDLES(FUNC)\
AC_FOR_HYDRO_VTXBUF_HANDLES(FUNC)\
AC_FOR_INDUCTION_VTXBUF_HANDLES(FUNC)\
AC_FOR_ENTROPY_VTXBUF_HANDLES(FUNC)\
AC_FOR_TEMPERATURE_VTXBUF_HANDLES(FUNC)
// clang-format on
/*
* =============================================================================
* Single/double precision switch
* =============================================================================
*/
#if AC_DOUBLE_PRECISION == 1
typedef double AcReal;
typedef double3 AcReal3;
#define AC_REAL_MAX (DBL_MAX)
#define AC_REAL_MIN (DBL_MIN)
#define AC_REAL_EPSILON (DBL_EPSILON)
#else
typedef float AcReal;
typedef float3 AcReal3;
#define AC_REAL_MAX (FLT_MAX)
#define AC_REAL_MIN (FLT_MIN)
#define AC_REAL_EPSILON (FLT_EPSILON)
#endif
typedef struct {
AcReal3 row[3];
} AcMatrix;
/*
* =============================================================================
* Helper macros
* =============================================================================
*/
#define AC_GEN_ID(X) X
#define AC_GEN_STR(X) #X
/*
* =============================================================================
* Error codes
* =============================================================================
*/
typedef enum { AC_SUCCESS = 0, AC_FAILURE = 1 } AcResult;
/*
* =============================================================================
* Reduction types
* =============================================================================
*/
typedef enum {
RTYPE_MAX,
RTYPE_MIN,
RTYPE_RMS,
RTYPE_RMS_EXP,
NUM_REDUCTION_TYPES
} ReductionType;
/*
* =============================================================================
* Definitions for the enums and structs for AcMeshInfo (DO NOT TOUCH)
* =============================================================================
*/
typedef enum {
AC_FOR_INT_PARAM_TYPES(AC_GEN_ID),
NUM_INT_PARAM_TYPES
} AcIntParam;
typedef enum {
AC_FOR_REAL_PARAM_TYPES(AC_GEN_ID),
NUM_REAL_PARAM_TYPES
} AcRealParam;
extern const char* intparam_names[]; // Defined in astaroth.cu
extern const char* realparam_names[]; // Defined in astaroth.cu
typedef struct {
int int_params[NUM_INT_PARAM_TYPES];
AcReal real_params[NUM_REAL_PARAM_TYPES];
} AcMeshInfo;
/*
* =============================================================================
* Definitions for the enums and structs for AcMesh (DO NOT TOUCH)
* =============================================================================
*/
typedef enum {
AC_FOR_VTXBUF_HANDLES(AC_GEN_ID) NUM_VTXBUF_HANDLES
} VertexBufferHandle;
extern const char* vtxbuf_names[]; // Defined in astaroth.cu
/*
typedef struct {
AcReal* data;
} VertexBuffer;
*/
// NOTE: there's no particular benefit declaring AcMesh a class, since
// a library user may already have allocated memory for the vertex_buffers.
// But then we would allocate memory again when the user wants to start
// filling the class with data. => Its better to consider AcMesh as a
// payload-only struct
typedef struct {
AcReal* vertex_buffer[NUM_VTXBUF_HANDLES];
AcMeshInfo info;
} AcMesh;
#define AC_VTXBUF_SIZE(mesh_info) \
((size_t)(mesh_info.int_params[AC_mx] * mesh_info.int_params[AC_my] * \
mesh_info.int_params[AC_mz]))
#define AC_VTXBUF_SIZE_BYTES(mesh_info) \
(sizeof(AcReal) * AC_VTXBUF_SIZE(mesh_info))
#define AC_VTXBUF_COMPDOMAIN_SIZE(mesh_info) \
(mesh_info.int_params[AC_nx] * mesh_info.int_params[AC_ny] * \
mesh_info.int_params[AC_nz])
#define AC_VTXBUF_COMPDOMAIN_SIZE_BYTES(mesh_info) \
(sizeof(AcReal) * AC_VTXBUF_COMPDOMAIN_SIZE(mesh_info))
#define AC_VTXBUF_IDX(i, j, k, mesh_info) \
((i) + (j)*mesh_info.int_params[AC_mx] + \
(k)*mesh_info.int_params[AC_mx] * mesh_info.int_params[AC_my])
/*
* =============================================================================
* Astaroth interface
* =============================================================================
*/
/** Starting point of all GPU computation. Handles the allocation and
initialization of *all memory needed on all GPUs in the node*. In other words,
setups everything GPU-side so that calling any other GPU interface function
afterwards does not result in illegal memory accesses. */
AcResult acInit(const AcMeshInfo& mesh_info);
/** Splits the host_mesh and distributes it among the GPUs in the node */
AcResult acLoad(const AcMesh& host_mesh);
AcResult acLoadWithOffset(const AcMesh& host_mesh, const int3& start, const int num_vertices);
/** Does all three steps of the RK3 integration and computes the boundary
conditions when necessary. Note that the boundary conditions are not applied
after the final integration step.
The result can be fetched to CPU memory with acStore(). */
AcResult acIntegrate(const AcReal& dt);
/** Performs a single RK3 step without computing boundary conditions. */
AcResult acIntegrateStep(const int& isubstep, const AcReal& dt);
/** Applies boundary conditions on the GPU meshs and communicates the
ghost zones among GPUs if necessary */
AcResult acBoundcondStep(void);
/** Performs a scalar reduction on all GPUs in the node and returns the result.
*/
AcReal acReduceScal(const ReductionType& rtype, const VertexBufferHandle& a);
/** Performs a vector reduction on all GPUs in the node and returns the result.
*/
AcReal acReduceVec(const ReductionType& rtype, const VertexBufferHandle& a,
const VertexBufferHandle& b, const VertexBufferHandle& c);
/** Stores the mesh distributed among GPUs of the node back to a single host
* mesh */
AcResult acStore(AcMesh* host_mesh);
AcResult acStoreWithOffset(const int3& start, const int num_vertices, AcMesh* host_mesh);
/** Frees all GPU allocations and resets all devices in the node. Should be
* called at exit. */
AcResult acQuit(void);
/** Synchronizes all devices. All calls to Astaroth are asynchronous by default
unless otherwise stated. */
AcResult acSynchronize(void);
/* End extern "C" */
#ifdef __cplusplus
}
#endif
/*
* =============================================================================
* Notes
* =============================================================================
*/
/*
typedef enum {
VTX_BUF_LNRHO,
VTX_BUF_UUX,
VTX_BUF_UUY,
VTX_BUF_UUZ,
NUM_VERTEX_BUFFER_HANDLES
} VertexBufferHandle
// LNRHO etc
typedef struct {
AcReal* data;
} VertexBuffer;
// Host
typedef struct {
VertexBuffer vertex_buffers[NUM_VERTEX_BUFFER_HANDLES];
MeshInfo info;
} Mesh;
// Device
typedef struct {
VertexBuffer in[NUM_VERTEX_BUFFER_HANDLES];
VertexBuffer out[NUM_VERTEX_BUFFER_HANDLES];
} VertexBufferArray;
*/

View File

@@ -0,0 +1,49 @@
// TODO comments and reformatting
//Scalar
//dostuff(in Scalar uux)
//{
// return uux[vertexIdx.x, vertexIdx.y, vertexIdx.z];
//}
// stencil_assembly.in
Preprocessed Scalar
some_exotic_stencil_computation(in Scalar uux)
{
//#if STENCIL_ORDER == 2
// const Scalar coefficients[] = {1, 1, 1};
//#else if STENCIL_ORDER == 4
// const Scalar coefficients[] = {....};
//#endif
int i = vertexIdx.x;
int j = vertexIdx.y;
int k = vertexIdx.z;
const Scalar coefficients[] = {1, 2, 3};
return coefficients[0] * uux[i-1, j, k] +
coefficients[1] * uux[i, j, k] +
coefficients[2] * uux[i+1, j, k];
}
// stencil_process.in
//in Scalar uux_in = VTXBUF_UUX;
//out Scalar uux_out = VTXBUF_UUX;
//Kernel
//solve(Scalar dt)
//{
// uux_out = some_exotic_stencil(uux_in);
//}

View File

@@ -0,0 +1,149 @@
// TODO comments and reformatting
uniform Scalar dsx;
uniform Scalar dsy;
uniform Scalar dsz;
uniform Scalar GM_star;
// Other uniforms types than Scalar or int not yet supported
// BUILTIN
//Scalar dot(...){}
// BUILTIN
//Scalar distance(Vector a, Vector b) { return sqrt(dot(a, b)); }
// BUILTIN
// Scalar first_derivative(Scalar pencil[], Scalar inv_ds) { return pencil[3] * inv_ds; }
Scalar first_derivative(Scalar pencil[], Scalar inv_ds)
{
Scalar res = 0;
for (int i = 0; i < STENCIL_ORDER+1; ++i) {
res = res + pencil[i];
}
return inv_ds * res;
}
Scalar distance(Vector a, Vector b)
{
return sqrt(a.x * b.x + a.y * b.y + a.z * b.z);
}
Scalar
gravity_potential(int i, int j, int k)
{
Vector star_pos = (Vector){0, 0, 0};
Vector vertex_pos = (Vector){dsx * i, dsy * j, dsz * k};
return GM_star / distance(star_pos, vertex_pos);
}
Scalar
gradx_gravity_potential(int i, int j, int k)
{
Scalar pencil[STENCIL_ORDER + 1];
for (int offset = -STENCIL_ORDER; offset <= STENCIL_ORDER; ++offset) {
pencil[offset+STENCIL_ORDER] = gravity_potential(i + offset, j, k);
}
Scalar inv_ds = Scalar(1.) / dsx;
return first_derivative(pencil, inv_ds);
}
Scalar
grady_gravity_potential(int i, int j, int k)
{
Scalar pencil[STENCIL_ORDER + 1];
for (int offset = -STENCIL_ORDER; offset <= STENCIL_ORDER; ++offset) {
pencil[offset+STENCIL_ORDER] = gravity_potential(i, j + offset, k);
}
Scalar inv_ds = Scalar(1.) / dsy;
return first_derivative(pencil, inv_ds);
}
Scalar
gradz_gravity_potential(int i, int j, int k)
{
Scalar pencil[STENCIL_ORDER + 1];
for (int offset = -STENCIL_ORDER; offset <= STENCIL_ORDER; ++offset) {
pencil[offset+STENCIL_ORDER] = gravity_potential(i, j, k + offset);
}
Scalar inv_ds = Scalar(1.) / dsz;
return first_derivative(pencil, inv_ds);
}
Vector
momentum(int i, int j, int k, in Vector uu)
{
Vector gravity_potential = (Vector){gradx_gravity_potential(i, j, k),
grady_gravity_potential(i, j, k),
gradz_gravity_potential(i, j, k)};
return gravity_potential;
}

56
acc/src/acc.l Normal file
View File

@@ -0,0 +1,56 @@
%option yylineno
D [0-9]
L [a-zA-Z_]
%{
#include "acc.tab.h"
%}
%%
"Scalar" { return SCALAR; } /* Builtin types */
"Vector" { return VECTOR; }
"Matrix" { return MATRIX; }
"void" { return VOID; } /* Rest of the types inherited from C */
"int" { return INT; }
"int3" { return INT3; }
"Kernel" { return KERNEL; } /* Function specifiers */
"Preprocessed" { return PREPROCESSED; }
"const" { return CONSTANT; }
"in" { return IN; } /* Device func storage specifiers */
"out" { return OUT; }
"uniform" { return UNIFORM; }
"else if" { return ELIF; }
"if" { return IF; }
"else" { return ELSE; }
"for" { return FOR; }
"while" { return WHILE; }
"return" { return RETURN; }
{D}+"."?{D}*[flud]? { return NUMBER; } /* Literals */
"."{D}+[flud]? { return NUMBER; }
{L}({L}|{D})* { return IDENTIFIER; }
\"(.)*\" { return IDENTIFIER; } /* String */
"==" { return LEQU; }/* Logic operations */
"&&" { return LAND; }
"||" { return LOR; }
"<=" { return LLEQU; }
"++" { return INPLACE_INC; }
"--" { return INPLACE_DEC; }
[-+*/;=\[\]{}(),\.<>] { return yytext[0]; } /* Characters */
"//".* { /* Skip regular comments */ }
[ \t\n\v\r]+ { /* Ignore whitespace, tabs and newlines */ }
. { printf("unrecognized char %d: [%c]\n", *yytext, *yytext); }
%%

234
acc/src/acc.y Normal file
View File

@@ -0,0 +1,234 @@
%{
#include <stdio.h>
#include <string.h>
#include "ast.h"
extern char* yytext;
int yylex();
int yyerror(const char* str);
int yyget_lineno();
#define YYSTYPE ASTNode* // Sets the default type
%}
%token CONSTANT IN OUT UNIFORM
%token IDENTIFIER NUMBER
%token RETURN
%token SCALAR VECTOR MATRIX
%token VOID INT INT3
%token IF ELSE FOR WHILE ELIF
%token LEQU LAND LOR LLEQU
%token KERNEL PREPROCESSED
%token INPLACE_INC INPLACE_DEC
%%
root: program { root->lhs = $1; }
;
program: /* Empty*/ { $$ = astnode_create(NODE_UNKNOWN, NULL, NULL); }
| program function_definition { $$ = astnode_create(NODE_UNKNOWN, $1, $2); }
| program assignment ';' /* Global definition */ { $$ = astnode_create(NODE_UNKNOWN, $1, $2); $$->postfix = ';'; }
| program declaration ';' /* Global declaration */ { $$ = astnode_create(NODE_UNKNOWN, $1, $2); $$->postfix = ';'; }
;
/*
* =============================================================================
* Functions
* =============================================================================
*/
function_definition: function_declaration compound_statement { $$ = astnode_create(NODE_FUNCTION_DEFINITION, $1, $2); }
;
function_declaration: declaration function_parameter_declaration { $$ = astnode_create(NODE_FUNCTION_DECLARATION, $1, $2); }
;
function_parameter_declaration: '(' ')' { $$ = astnode_create(NODE_FUNCTION_PARAMETER_DECLARATION, NULL, NULL); $$->prefix = '('; $$->postfix = ')'; }
| '(' declaration_list ')' { $$ = astnode_create(NODE_FUNCTION_PARAMETER_DECLARATION, $2, NULL); $$->prefix = '('; $$->postfix = ')'; }
;
/*
* =============================================================================
* Statement
* =============================================================================
*/
statement_list: statement { $$ = astnode_create(NODE_UNKNOWN, $1, NULL); }
| statement_list statement { $$ = astnode_create(NODE_UNKNOWN, $1, $2); }
;
compound_statement: '{' '}' { $$ = astnode_create(NODE_COMPOUND_STATEMENT, NULL, NULL); $$->prefix = '{'; $$->postfix = '}'; }
| '{' statement_list '}' { $$ = astnode_create(NODE_COMPOUND_STATEMENT, $2, NULL); $$->prefix = '{'; $$->postfix = '}'; }
;
statement: selection_statement { $$ = astnode_create(NODE_UNKNOWN, $1, NULL); }
| iteration_statement { $$ = astnode_create(NODE_UNKNOWN, $1, NULL); }
| exec_statement ';' { $$ = astnode_create(NODE_UNKNOWN, $1, NULL); $$->postfix = ';'; }
;
selection_statement: IF expression else_selection_statement { $$ = astnode_create(NODE_UNKNOWN, $2, $3); $$->prefix = IF; }
;
else_selection_statement: compound_statement { $$ = astnode_create(NODE_UNKNOWN, $1, NULL); }
| compound_statement elif_selection_statement { $$ = astnode_create(NODE_UNKNOWN, $1, $2); }
| compound_statement ELSE compound_statement { $$ = astnode_create(NODE_UNKNOWN, $1, $3); $$->infix = ELSE; }
;
elif_selection_statement: ELIF expression else_selection_statement { $$ = astnode_create(NODE_UNKNOWN, $2, $3); $$->prefix = ELIF; }
;
iteration_statement: WHILE expression compound_statement { $$ = astnode_create(NODE_UNKNOWN, $2, $3); $$->prefix = WHILE; }
| FOR for_expression compound_statement { $$ = astnode_create(NODE_UNKNOWN, $2, $3); $$->prefix = FOR; }
;
for_expression: '(' for_init_param for_other_params ')' { $$ = astnode_create(NODE_UNKNOWN, $2, $3); $$->prefix = '('; $$->postfix = ')'; }
;
for_init_param: expression ';' { $$ = astnode_create(NODE_UNKNOWN, $1, NULL); $$->postfix = ';'; }
| assignment ';' { $$ = astnode_create(NODE_UNKNOWN, $1, NULL); $$->postfix = ';'; }
;
for_other_params: expression ';' { $$ = astnode_create(NODE_UNKNOWN, $1, NULL); $$->postfix = ';'; }
| expression ';' expression { $$ = astnode_create(NODE_UNKNOWN, $1, $3); $$->infix = ';'; }
;
exec_statement: declaration { $$ = astnode_create(NODE_UNKNOWN, $1, NULL); }
| assignment { $$ = astnode_create(NODE_UNKNOWN, $1, NULL); }
| expression { $$ = astnode_create(NODE_UNKNOWN, $1, NULL); }
| return return_statement { $$ = astnode_create(NODE_UNKNOWN, $1, $2); }
;
assignment: declaration '=' expression { $$ = astnode_create(NODE_UNKNOWN, $1, $3); $$->infix = '='; }
| expression '=' expression { $$ = astnode_create(NODE_UNKNOWN, $1, $3); $$->infix = '='; }
;
return_statement: /* Empty */ { $$ = astnode_create(NODE_UNKNOWN, NULL, NULL); }
| expression { $$ = astnode_create(NODE_UNKNOWN, $1, NULL); }
;
/*
* =============================================================================
* Declaration
* =============================================================================
*/
declaration_list: declaration { $$ = astnode_create(NODE_UNKNOWN, $1, NULL); }
| declaration_list ',' declaration { $$ = astnode_create(NODE_UNKNOWN, $1, $3); $$->infix = ','; }
;
declaration: type_declaration identifier { $$ = astnode_create(NODE_DECLARATION, $1, $2); } // Note: accepts only one type qualifier. Good or not?
| type_declaration array_declaration { $$ = astnode_create(NODE_DECLARATION, $1, $2); }
;
array_declaration: identifier '[' ']' { $$ = astnode_create(NODE_UNKNOWN, $1, NULL); $$->infix = '['; $$->postfix = ']'; }
| identifier '[' expression ']' { $$ = astnode_create(NODE_UNKNOWN, $1, $3); $$->infix = '['; $$->postfix = ']'; }
;
type_declaration: type_specifier { $$ = astnode_create(NODE_UNKNOWN, $1, NULL); }
| type_qualifier type_specifier { $$ = astnode_create(NODE_UNKNOWN, $1, $2); }
;
/*
* =============================================================================
* Expressions
* =============================================================================
*/
expression_list: expression { $$ = astnode_create(NODE_UNKNOWN, $1, NULL); }
| expression_list ',' expression { $$ = astnode_create(NODE_UNKNOWN, $1, $3); $$->infix = ','; }
;
expression: unary_expression { $$ = astnode_create(NODE_UNKNOWN, $1, NULL); }
| expression binary_expression { $$ = astnode_create(NODE_UNKNOWN, $1, $2); }
;
binary_expression: binary_operator unary_expression { $$ = astnode_create(NODE_UNKNOWN, $1, $2); }
;
unary_expression: postfix_expression { $$ = astnode_create(NODE_UNKNOWN, $1, NULL); }
| unary_operator postfix_expression { $$ = astnode_create(NODE_UNKNOWN, $1, $2); }
;
postfix_expression: primary_expression { $$ = astnode_create(NODE_UNKNOWN, $1, NULL); }
| postfix_expression '[' expression_list ']' /* Subscript */ { $$ = astnode_create(NODE_MULTIDIM_SUBSCRIPT_EXPRESSION, $1, $3); $$->infix = '['; $$->postfix = ']'; }
| cast_expression '{' expression_list '}' /* Array */ { $$ = astnode_create(NODE_UNKNOWN, $1, $3); $$->infix = '{'; $$->postfix = '}'; }
| postfix_expression '(' ')' /* Function call */ { $$ = astnode_create(NODE_UNKNOWN, $1, NULL); $$->infix = '('; $$->postfix = ')'; }
| postfix_expression '(' expression_list ')' /* Function call */ { $$ = astnode_create(NODE_UNKNOWN, $1, $3); $$->infix = '('; $$->postfix = ')'; }
| type_specifier '(' expression_list ')' /* Cast */ { $$ = astnode_create(NODE_UNKNOWN, $1, $3); $$->infix = '('; $$->postfix = ')'; }
| postfix_expression '.' identifier /* Member access */ { $$ = astnode_create(NODE_UNKNOWN, $1, $3); $$->infix = '.'; }
;
cast_expression: /* Empty: implicit cast */ { $$ = astnode_create(NODE_UNKNOWN, NULL, NULL); }
| '(' type_specifier ')' { $$ = astnode_create(NODE_UNKNOWN, $2, NULL); $$->prefix = '('; $$->postfix = ')'; }
;
primary_expression: identifier { $$ = astnode_create(NODE_UNKNOWN, $1, NULL); }
| number { $$ = astnode_create(NODE_UNKNOWN, $1, NULL); }
| '(' expression ')' { $$ = astnode_create(NODE_UNKNOWN, $2, NULL); $$->prefix = '('; $$->postfix = ')'; }
;
/*
* =============================================================================
* Terminals
* =============================================================================
*/
binary_operator: '+' { $$ = astnode_create(NODE_UNKNOWN, NULL, NULL); $$->infix = yytext[0]; }
| '-' { $$ = astnode_create(NODE_UNKNOWN, NULL, NULL); $$->infix = yytext[0]; }
| '/' { $$ = astnode_create(NODE_UNKNOWN, NULL, NULL); $$->infix = yytext[0]; }
| '*' { $$ = astnode_create(NODE_UNKNOWN, NULL, NULL); $$->infix = yytext[0]; }
| '<' { $$ = astnode_create(NODE_UNKNOWN, NULL, NULL); $$->infix = yytext[0]; }
| '>' { $$ = astnode_create(NODE_UNKNOWN, NULL, NULL); $$->infix = yytext[0]; }
| LEQU { $$ = astnode_create(NODE_UNKNOWN, NULL, NULL); astnode_set_buffer(yytext, $$); }
| LAND { $$ = astnode_create(NODE_UNKNOWN, NULL, NULL); astnode_set_buffer(yytext, $$); }
| LOR { $$ = astnode_create(NODE_UNKNOWN, NULL, NULL); astnode_set_buffer(yytext, $$); }
| LLEQU { $$ = astnode_create(NODE_UNKNOWN, NULL, NULL); astnode_set_buffer(yytext, $$); }
;
unary_operator: '-' /* C-style casts are disallowed, would otherwise be defined here */ { $$ = astnode_create(NODE_UNKNOWN, NULL, NULL); $$->infix = yytext[0]; }
| '!' { $$ = astnode_create(NODE_UNKNOWN, NULL, NULL); $$->infix = yytext[0]; }
| INPLACE_INC { $$ = astnode_create(NODE_UNKNOWN, NULL, NULL); $$->token = INPLACE_INC; }
| INPLACE_DEC { $$ = astnode_create(NODE_UNKNOWN, NULL, NULL); $$->token = INPLACE_DEC; }
;
type_qualifier: KERNEL { $$ = astnode_create(NODE_TYPE_QUALIFIER, NULL, NULL); $$->token = KERNEL; }
| PREPROCESSED { $$ = astnode_create(NODE_TYPE_QUALIFIER, NULL, NULL); $$->token = PREPROCESSED; }
| CONSTANT { $$ = astnode_create(NODE_TYPE_QUALIFIER, NULL, NULL); $$->token = CONSTANT; }
| IN { $$ = astnode_create(NODE_TYPE_QUALIFIER, NULL, NULL); $$->token = IN; }
| OUT { $$ = astnode_create(NODE_TYPE_QUALIFIER, NULL, NULL); $$->token = OUT; }
| UNIFORM { $$ = astnode_create(NODE_TYPE_QUALIFIER, NULL, NULL); $$->token = UNIFORM; }
;
type_specifier: VOID { $$ = astnode_create(NODE_TYPE_SPECIFIER, NULL, NULL); $$->token = VOID; }
| INT { $$ = astnode_create(NODE_TYPE_SPECIFIER, NULL, NULL); $$->token = INT; }
| INT3 { $$ = astnode_create(NODE_TYPE_SPECIFIER, NULL, NULL); $$->token = INT3; }
| SCALAR { $$ = astnode_create(NODE_TYPE_SPECIFIER, NULL, NULL); $$->token = SCALAR; }
| VECTOR { $$ = astnode_create(NODE_TYPE_SPECIFIER, NULL, NULL); $$->token = VECTOR; }
| MATRIX { $$ = astnode_create(NODE_TYPE_SPECIFIER, NULL, NULL); $$->token = MATRIX; }
;
identifier: IDENTIFIER { $$ = astnode_create(NODE_IDENTIFIER, NULL, NULL); astnode_set_buffer(yytext, $$); }
;
number: NUMBER { $$ = astnode_create(NODE_UNKNOWN, NULL, NULL); astnode_set_buffer(yytext, $$); }
;
return: RETURN { $$ = astnode_create(NODE_UNKNOWN, NULL, NULL); astnode_set_buffer(yytext, $$); }
;
%%
void
print(void)
{
printf("%s\n", yytext);
}
int
yyerror(const char* str)
{
fprintf(stderr, "%s on line %d when processing char %d: [%s]\n", str, yyget_lineno(), *yytext, yytext);
}

126
acc/src/ast.h Normal file
View File

@@ -0,0 +1,126 @@
/*
Nodes for the Abstract Syntax Tree
Statement: syntactic unit tha expresses some action.
May have internal components, expressions, which are evaluated
Statements: return value
block
*/
#include <stdlib.h>
#include <assert.h>
#define BUFFER_SIZE (4096)
#define GEN_ID(X) X
#define GEN_STR(X) #X
#define FOR_NODE_TYPES(FUNC) \
FUNC(NODE_UNKNOWN), \
FUNC(NODE_DEFINITION), \
FUNC(NODE_GLOBAL_DEFINITION), \
FUNC(NODE_DECLARATION), \
FUNC(NODE_TYPE_QUALIFIER), \
FUNC(NODE_TYPE_SPECIFIER), \
FUNC(NODE_IDENTIFIER), \
FUNC(NODE_FUNCTION_DEFINITION), \
FUNC(NODE_FUNCTION_DECLARATION), \
FUNC(NODE_COMPOUND_STATEMENT), \
FUNC(NODE_FUNCTION_PARAMETER_DECLARATION), \
FUNC(NODE_MULTIDIM_SUBSCRIPT_EXPRESSION)
/*
// Recreating strdup is not needed when using the GNU compiler.
// Let's also just say that anything but the GNU
// compiler is NOT supported, since there are also
// some gcc-specific calls in the files generated
// by flex and being completely compiler-independent is
// not a priority right now
#ifndef strdup
static inline char*
strdup(const char* in)
{
const size_t len = strlen(in) + 1;
char* out = malloc(len);
if (out) {
memcpy(out, in, len);
return out;
} else {
return NULL;
}
}
#endif
*/
typedef enum {
FOR_NODE_TYPES(GEN_ID),
NUM_NODE_TYPES
} NodeType;
typedef struct astnode_s {
int id;
struct astnode_s* lhs;
struct astnode_s* rhs;
NodeType type; // Type of the AST node
char* buffer; // Indentifiers and other strings (empty by default)
int token; // Type of a terminal (that is not a simple char)
int prefix; // Tokens. Also makes the grammar since we don't have
int infix; // to divide it into max two-child rules
int postfix; // (which makes it much harder to read)
} ASTNode;
static inline ASTNode*
astnode_create(const NodeType type, ASTNode* lhs, ASTNode* rhs)
{
ASTNode* node = malloc(sizeof(node[0]));
static int id_counter = 0;
node->id = id_counter++;
node->type = type;
node->lhs = lhs;
node->rhs = rhs;
node->buffer = NULL;
node->prefix = node->infix = node->postfix = 0;
return node;
}
static inline void
astnode_set_buffer(const char* buffer, ASTNode* node)
{
node->buffer = strdup(buffer);
}
static inline void
astnode_destroy(ASTNode* node)
{
if (node->lhs)
astnode_destroy(node->lhs);
if (node->rhs)
astnode_destroy(node->rhs);
if (node->buffer)
free(node->buffer);
free(node);
}
extern ASTNode* root;
/*
typedef enum {
SCOPE_BLOCK
} ScopeType;
typedef struct symbol_s {
int type_specifier;
char* identifier;
int scope;
struct symbol_s* next;
} Symbol;
extern ASTNode* symbol_table;
*/

569
acc/src/code_generator.c Normal file
View File

@@ -0,0 +1,569 @@
/*
Copyright (C) 2014-2018, Johannes Pekkilae, Miikka Vaeisalae.
This file is part of Astaroth.
Astaroth is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Astaroth is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* @file
* \brief Brief info.
*
* Detailed info.
*
*/
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "acc.tab.h"
#include "ast.h"
ASTNode* root = NULL;
static const char inout_name_prefix[] = "handle_";
static bool doing_stencil_assembly = true;
/*
* =============================================================================
* Translation
* =============================================================================
*/
#define TRANSLATION_TABLE_SIZE (1024)
static const char* translation_table[TRANSLATION_TABLE_SIZE] = {
[0] = NULL,
// Control flow
[IF] = "if",
[ELSE] = "else",
[ELIF] = "else if",
[WHILE] = "while",
[FOR] = "for",
// Type specifiers
[VOID] = "void",
[INT] = "int",
[INT3] = "int3",
[SCALAR] = "AcReal",
[VECTOR] = "AcReal3",
[MATRIX] = "AcMatrix",
// Type qualifiers
[KERNEL] = "template <int step_number> static "
"__global__", //__launch_bounds__(RK_THREADBLOCK_SIZE,
// RK_LAUNCH_BOUND_MIN_BLOCKS),
[PREPROCESSED] = "static __device__ "
"__forceinline__",
[CONSTANT] = "const",
[IN] = "in",
[OUT] = "out",
[UNIFORM] = "uniform",
// ETC
[INPLACE_INC] = "++",
[INPLACE_DEC] = "--",
// Unary
[','] = ",",
[';'] = ";\n",
['('] = "(",
[')'] = ")",
['['] = "[",
[']'] = "]",
['{'] = "{\n",
['}'] = "}\n",
['='] = "=",
['+'] = "+",
['-'] = "-",
['/'] = "/",
['*'] = "*",
['<'] = "<",
['>'] = ">",
['!'] = "!",
['.'] = "."};
static const char*
translate(const int token)
{
assert(token >= 0);
assert(token < TRANSLATION_TABLE_SIZE);
if (token > 0) {
if (!translation_table[token])
printf("ERROR: unidentified token %d\n", token);
assert(translation_table[token]);
}
return translation_table[token];
}
/*
* =============================================================================
* Symbols
* =============================================================================
*/
typedef enum {
SYMBOLTYPE_FUNCTION,
SYMBOLTYPE_FUNCTION_PARAMETER,
SYMBOLTYPE_OTHER,
NUM_SYMBOLTYPES
} SymbolType;
#define MAX_ID_LEN (128)
typedef struct {
SymbolType type;
int type_qualifier;
int type_specifier;
char identifier[MAX_ID_LEN];
} Symbol;
#define SYMBOL_TABLE_SIZE (4096)
static Symbol symbol_table[SYMBOL_TABLE_SIZE] = {};
static int num_symbols = 0;
static int
symboltable_lookup(const char* identifier)
{
if (!identifier)
return -1;
for (int i = 0; i < num_symbols; ++i)
if (strcmp(identifier, symbol_table[i].identifier) == 0)
return i;
return -1;
}
static void
add_symbol(const SymbolType type, const int tqualifier, const int tspecifier, const char* id)
{
assert(num_symbols < SYMBOL_TABLE_SIZE);
symbol_table[num_symbols].type = type;
symbol_table[num_symbols].type_qualifier = tqualifier;
symbol_table[num_symbols].type_specifier = tspecifier;
strcpy(symbol_table[num_symbols].identifier, id);
++num_symbols;
}
static void
rm_symbol(const int handle)
{
assert(handle >= 0 && handle < num_symbols);
if (&symbol_table[handle] != &symbol_table[num_symbols - 1])
memcpy(&symbol_table[handle], &symbol_table[num_symbols - 1], sizeof(Symbol));
--num_symbols;
}
static void
print_symbol(const int handle)
{
assert(handle < SYMBOL_TABLE_SIZE);
const char* fields[] = {translate(symbol_table[handle].type_qualifier),
translate(symbol_table[handle].type_specifier),
symbol_table[handle].identifier};
const size_t num_fields = sizeof(fields) / sizeof(fields[0]);
for (int i = 0; i < num_fields; ++i)
if (fields[i])
printf("%s ", fields[i]);
}
static void
translate_latest_symbol(void)
{
const int handle = num_symbols - 1;
assert(handle < SYMBOL_TABLE_SIZE);
Symbol* symbol = &symbol_table[handle];
// FUNCTION
if (symbol->type == SYMBOLTYPE_FUNCTION) {
// KERNEL FUNCTION
if (symbol->type_qualifier == KERNEL) {
printf("%s %s\n%s", translate(symbol->type_qualifier),
translate(symbol->type_specifier), symbol->identifier);
}
// PREPROCESSED FUNCTION
else if (symbol->type_qualifier == PREPROCESSED) {
printf("%s %s\npreprocessed_%s", translate(symbol->type_qualifier),
translate(symbol->type_specifier), symbol->identifier);
}
// OTHER FUNCTION
else {
const char* regular_function_decorator = "static __device__ "
"__forceinline__";
printf("%s %s %s\n%s", regular_function_decorator,
translate(symbol->type_qualifier) ? translate(symbol->type_qualifier) : "",
translate(symbol->type_specifier), symbol->identifier);
}
}
// FUNCTION PARAMETER
else if (symbol->type == SYMBOLTYPE_FUNCTION_PARAMETER) {
if (symbol->type_qualifier == IN || symbol->type_qualifier == OUT) {
if (doing_stencil_assembly)
printf("const __restrict__ %s* %s", translate(symbol->type_specifier),
symbol->identifier);
else
printf("const %sData& %s", translate(symbol->type_specifier), symbol->identifier);
}
else {
print_symbol(handle);
}
}
// UNIFORM
else if (symbol->type_qualifier == UNIFORM) {
/* Do nothing */
}
// IN / OUT
else if (symbol->type != SYMBOLTYPE_FUNCTION_PARAMETER &&
(symbol->type_qualifier == IN || symbol->type_qualifier == OUT)) {
const char* inout_type_qualifier = "static __device__ const auto";
printf("%s %s%s", inout_type_qualifier, inout_name_prefix, symbol_table[handle].identifier);
}
// OTHER
else {
print_symbol(handle);
}
}
static void
print_symbol_table(void)
{
for (int i = 0; i < num_symbols; ++i) {
printf("%d: ", i);
const char* fields[] = {translate(symbol_table[i].type_qualifier),
translate(symbol_table[i].type_specifier),
symbol_table[i].identifier};
const size_t num_fields = sizeof(fields) / sizeof(fields[0]);
for (int i = 0; i < num_fields; ++i)
if (fields[i])
printf("%s ", fields[i]);
if (symbol_table[i].type == SYMBOLTYPE_FUNCTION)
printf("(function)");
else if (symbol_table[i].type == SYMBOLTYPE_FUNCTION_PARAMETER)
printf("(function parameter)");
else
printf("(other)");
printf("\n");
}
}
/*
* =============================================================================
* State
* =============================================================================
*/
static bool inside_declaration = false;
static bool inside_function_declaration = false;
static bool inside_function_parameter_declaration = false;
static bool inside_kernel = false;
static bool inside_preprocessed = false;
static int scope_start = 0;
/*
* =============================================================================
* AST traversal
* =============================================================================
*/
static void
traverse(const ASTNode* node)
{
// Prefix logic %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
if (node->type == NODE_FUNCTION_DECLARATION)
inside_function_declaration = true;
if (node->type == NODE_FUNCTION_PARAMETER_DECLARATION)
inside_function_parameter_declaration = true;
if (node->type == NODE_DECLARATION)
inside_declaration = true;
if (!inside_declaration && translate(node->prefix))
printf("%s", translate(node->prefix));
// BOILERPLATE START////////////////////////////////////////////////////////
if (node->type == NODE_TYPE_QUALIFIER && node->token == KERNEL)
inside_kernel = true;
// Kernel parameter boilerplate
const char* kernel_parameter_boilerplate = "GEN_KERNEL_PARAM_BOILERPLATE, ";
if (inside_kernel && node->type == NODE_FUNCTION_PARAMETER_DECLARATION)
printf("%s ", kernel_parameter_boilerplate);
// Kernel builtin variables boilerplate (read input/output arrays and setup
// indices)
const char* kernel_builtin_variables_boilerplate = "GEN_KERNEL_BUILTIN_VARIABLES_"
"BOILERPLATE();";
if (inside_kernel && node->type == NODE_COMPOUND_STATEMENT) {
printf("%s ", kernel_builtin_variables_boilerplate);
for (int i = 0; i < num_symbols; ++i) {
if (symbol_table[i].type_qualifier == IN) {
printf("const %sData %s = READ(%s%s);\n", translate(symbol_table[i].type_specifier),
symbol_table[i].identifier, inout_name_prefix, symbol_table[i].identifier);
} else if (symbol_table[i].type_qualifier == OUT) {
printf("%s %s = READ_OUT(%s%s);", translate(symbol_table[i].type_specifier), symbol_table[i].identifier, inout_name_prefix, symbol_table[i].identifier);
//printf("%s %s = buffer.out[%s%s][IDX(vertexIdx.x, vertexIdx.y, vertexIdx.z)];\n", translate(symbol_table[i].type_specifier), symbol_table[i].identifier, inout_name_prefix, symbol_table[i].identifier);
}
}
}
// Preprocessed parameter boilerplate
if (node->type == NODE_TYPE_QUALIFIER && node->token == PREPROCESSED)
inside_preprocessed = true;
static const char
preprocessed_parameter_boilerplate[] = "const int3 vertexIdx, ";
if (inside_preprocessed && node->type == NODE_FUNCTION_PARAMETER_DECLARATION)
printf("%s ", preprocessed_parameter_boilerplate);
// BOILERPLATE END////////////////////////////////////////////////////////
// Enter LHS
if (node->lhs)
traverse(node->lhs);
// Infix logic %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
if (!inside_declaration && translate(node->infix))
printf("%s ", translate(node->infix));
if (node->type == NODE_FUNCTION_DECLARATION)
inside_function_declaration = false;
// If the node is a subscript expression and the expression list inside it is not empty
if (node->type == NODE_MULTIDIM_SUBSCRIPT_EXPRESSION && node->rhs)
printf("IDX(");
// Do a regular translation
if (!inside_declaration) {
const int handle = symboltable_lookup(node->buffer);
if (handle >= 0) { // The variable exists in the symbol table
const Symbol* symbol = &symbol_table[handle];
//if (symbol->type_qualifier == OUT) {
// printf("%s%s", inout_name_prefix, symbol->identifier);
//}
if (symbol->type_qualifier == UNIFORM) {
if (symbol->type_specifier == SCALAR)
printf("DCONST_REAL(AC_%s) ", symbol->identifier);
else if (symbol->type_specifier == INT)
printf("DCONST_INT(AC_%s) ", symbol->identifier);
else
printf("INVALID UNIFORM type specifier %s with %s\n",
translate(symbol->type_specifier), symbol->identifier);
}
else {
// Do a regular translation
if (translate(node->token))
printf("%s ", translate(node->token));
if (node->buffer)
printf("%s ", node->buffer);
}
}
else {
// Do a regular translation
if (translate(node->token))
printf("%s ", translate(node->token));
if (node->buffer)
printf("%s ", node->buffer);
}
}
if (node->type == NODE_FUNCTION_DECLARATION) {
scope_start = num_symbols;
}
// Enter RHS
if (node->rhs)
traverse(node->rhs);
// Postfix logic %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
// If the node is a subscript expression and the expression list inside it is not empty
if (node->type == NODE_MULTIDIM_SUBSCRIPT_EXPRESSION && node->rhs)
printf(")"); // Closing bracket of IDX()
// Generate writeback boilerplate for OUT fields
if (inside_kernel && node->type == NODE_COMPOUND_STATEMENT) {
for (int i = 0; i < num_symbols; ++i) {
if (symbol_table[i].type_qualifier == OUT) {
printf("WRITE_OUT(%s%s, %s);\n", inout_name_prefix, symbol_table[i].identifier, symbol_table[i].identifier);
//printf("buffer.out[%s%s][IDX(vertexIdx.x, vertexIdx.y, vertexIdx.z)] = %s;\n", inout_name_prefix, symbol_table[i].identifier, symbol_table[i].identifier);
}
}
}
if (!inside_declaration && translate(node->postfix))
printf("%s", translate(node->postfix));
if (node->type == NODE_DECLARATION) {
inside_declaration = false;
int tqual = 0;
int tspec = 0;
if (node->lhs && node->lhs->lhs) {
if (node->lhs->lhs->type == NODE_TYPE_QUALIFIER)
tqual = node->lhs->lhs->token;
else if (node->lhs->lhs->type == NODE_TYPE_SPECIFIER)
tspec = node->lhs->lhs->token;
}
if (node->lhs && node->lhs->rhs) {
if (node->lhs->rhs->type == NODE_TYPE_SPECIFIER)
tspec = node->lhs->rhs->token;
}
// Determine symbol type
SymbolType symboltype = SYMBOLTYPE_OTHER;
if (inside_function_declaration)
symboltype = SYMBOLTYPE_FUNCTION;
else if (inside_function_parameter_declaration)
symboltype = SYMBOLTYPE_FUNCTION_PARAMETER;
// Determine identifier
if (node->rhs->type == NODE_IDENTIFIER) {
add_symbol(symboltype, tqual, tspec, node->rhs->buffer); // Ordinary
translate_latest_symbol();
}
else {
add_symbol(symboltype, tqual, tspec,
node->rhs->lhs->buffer); // Array
translate_latest_symbol();
// Traverse the expression once again, this time with
// "inside_declaration" flag off
printf("%s ", translate(node->rhs->infix));
if (node->rhs->rhs)
traverse(node->rhs->rhs);
printf("%s ", translate(node->rhs->postfix));
}
}
if (node->type == NODE_FUNCTION_PARAMETER_DECLARATION)
inside_function_parameter_declaration = false;
if (node->type == NODE_FUNCTION_DEFINITION) {
while (num_symbols > scope_start)
rm_symbol(num_symbols - 1);
inside_kernel = false;
inside_preprocessed = false;
}
}
// TODO: these should use the generic type names SCALAR and VECTOR
static void
generate_preprocessed_structures(void)
{
// PREPROCESSED DATA STRUCT
printf("\n");
printf("typedef struct {\n");
for (int i = 0; i < num_symbols; ++i) {
if (symbol_table[i].type_qualifier == PREPROCESSED)
printf("%s %s;\n", translate(symbol_table[i].type_specifier),
symbol_table[i].identifier);
}
printf("} %sData;\n", translate(SCALAR));
// FILLING THE DATA STRUCT
printf("static __device__ __forceinline__ AcRealData\
read_data(const int3 vertexIdx,\
AcReal* __restrict__ buf[], const int handle)\
{\n\
%sData data;\n",
translate(SCALAR));
for (int i = 0; i < num_symbols; ++i) {
if (symbol_table[i].type_qualifier == PREPROCESSED)
printf("data.%s = preprocessed_%s(vertexIdx, buf[handle]);\n", symbol_table[i].identifier,
symbol_table[i].identifier);
}
printf("return data;\n");
printf("}\n");
// FUNCTIONS FOR ACCESSING MEMBERS OF THE PREPROCESSED STRUCT
for (int i = 0; i < num_symbols; ++i) {
if (symbol_table[i].type_qualifier == PREPROCESSED)
printf("static __device__ __forceinline__ %s\
%s(const AcRealData& data)\
{\n\
return data.%s;\
}\n",
translate(symbol_table[i].type_specifier), symbol_table[i].identifier,
symbol_table[i].identifier);
}
// Syntactic sugar: generate also a Vector data struct
printf("\
typedef struct {\
AcRealData x;\
AcRealData y;\
AcRealData z;\
} AcReal3Data;\
\
static __device__ __forceinline__ AcReal3Data\
read_data(const int3 vertexIdx,\
AcReal* __restrict__ buf[], const int3& handle)\
{\
AcReal3Data data;\
\
data.x = read_data(vertexIdx, buf, handle.x);\
data.y = read_data(vertexIdx, buf, handle.y);\
data.z = read_data(vertexIdx, buf, handle.z);\
\
return data;\
}\
");
}
int
main(int argc, char** argv)
{
if (argc == 2) {
if (!strcmp(argv[1], "-sas"))
doing_stencil_assembly = true;
else if (!strcmp(argv[1], "-sps"))
doing_stencil_assembly = false;
else
printf("Unknown flag %s. Generating stencil assembly.\n", argv[1]);
}
else {
printf("Usage: ./acc [flags]\n"
"Flags:\n"
"\t-sas - Generates code for the stencil assembly stage\n"
"\t-sps - Generates code for the stencil processing "
"stage\n");
printf("\n");
return EXIT_FAILURE;
}
root = astnode_create(NODE_UNKNOWN, NULL, NULL);
const int retval = yyparse();
if (retval) {
printf("COMPILATION FAILED\n");
return EXIT_FAILURE;
}
// Traverse
traverse(root);
if (doing_stencil_assembly)
generate_preprocessed_structures();
// print_symbol_table();
// Cleanup
astnode_destroy(root);
// printf("COMPILATION SUCCESS\n");
}

48
acc/test_grammar.sh Executable file
View File

@@ -0,0 +1,48 @@
#!/bin/bash
cd `dirname $0` # Only operate in the same directory with this script
./build_acc.sh
mkdir -p testbin
./compile.sh samples/sample_stencil_process.sps
./compile.sh samples/sample_stencil_assembly.sas
mv stencil_process.cuh testbin/
mv stencil_assembly.cuh testbin/
printf "
#include <stdio.h>
#include <stdlib.h>
#include \"%s\" // i.e. astaroth.h
__constant__ AcMeshInfo d_mesh_info;
#define DCONST_INT(X) (d_mesh_info.int_params[X])
#define DCONST_REAL(X) (d_mesh_info.real_params[X])
#define DEVICE_VTXBUF_IDX(i, j, k) ((i) + (j)*DCONST_INT(AC_mx) + (k)*DCONST_INT(AC_mxy))
static __device__ __forceinline__ int
IDX(const int i)
{
return i;
}
static __device__ __forceinline__ int
IDX(const int i, const int j, const int k)
{
return DEVICE_VTXBUF_IDX(i, j, k);
}
static __device__ __forceinline__ int
IDX(const int3 idx)
{
return DEVICE_VTXBUF_IDX(idx.x, idx.y, idx.z);
}
#include \"%s\"
#include \"%s\"
int main(void) { printf(\"Grammar check complete.\\\nAll tests passed.\\\n\"); return EXIT_SUCCESS; }
" common_header.h stencil_assembly.cuh stencil_process.cuh >testbin/test.cu
cd testbin
nvcc -std=c++11 test.cu -I ../samples -o test && ./test