Added Astaroth 2.0

This commit is contained in:
jpekkila
2019-06-14 14:18:35 +03:00
parent 4e4f84c8ff
commit 0e48766a68
87 changed files with 18058 additions and 1 deletions

17
3rdparty/setup_dependencies.sh vendored Executable file
View File

@@ -0,0 +1,17 @@
#!/bin/bash
INITIAL_DIR=$(pwd)
# Fetch SDL2
git clone https://github.com/davidsiaw/SDL2.git
cd SDL2
git pull
mkdir build
cd build && cmake .. && make -j
# See https://github.com/davidsiaw/SDL2/blob/master/docs/README-linux.md
# if there are isses with building
# Done
cd $INITIAL_DIR

172
CMakeLists.txt Normal file
View File

@@ -0,0 +1,172 @@
#
# CMakeLists.txt for generating the makefile for Astaroth.
# Usage: mkdir build && cd build && cmake <optional flags> ..
#
# For example: cmake -DDOUBLE_PRECISION=ON ..
#
# If you want to see the exact flags used during compilation, run
# "make -j VERBOSE=1"
#
# Make sure your machine satisfies the system requirements:
# https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#system-requirements
#-------------------General---------------------------------------------------#
project(ASTAROTH_2.0 CXX)
set (CMAKE_CXX_STANDARD 98)
cmake_minimum_required (VERSION 3.5.1) # Need >= 3.8 for first-class CUDA support
cmake_policy (SET CMP0023 NEW)
#-------------------Set user options with default values---------------------#
#Usage f.ex. cmake -DBUILD_DEBUG=ON ..
option(BUILD_DEBUG "Builds the program with extensive error checking" OFF)
option(BUILD_STANDALONE "Builds standalone Astaroth" ON)
option(DOUBLE_PRECISION "Generates double precision code" OFF)
option(TIARA_CLUSTER "Special settings for compilation TIARA GPU cluster" OFF)
option(MULTIGPU_ENABLED "If enabled, uses all the available GPUs" ON)
option(ALTER_CONF "If enabled, loads astaroth.conf from the build directory" OFF)
#-------------------Determine build type--------------------------------------#
#Available types (case-sensitive):
#RELEASE (best performance)
#DEBUG (w/ debug information, non-concurrent kernels)
if (BUILD_DEBUG)
set(CMAKE_BUILD_TYPE DEBUG)
else ()
set(CMAKE_BUILD_TYPE RELEASE)
endif()
message(STATUS "Build type: " ${CMAKE_BUILD_TYPE})
#----------------------Find packages------------------------------------------#
# C++ compiler info
message(STATUS "CMAKE_CXX_COMPILER: " ${CMAKE_CXX_COMPILER})
message(STATUS "CMAKE_CXX_COMPILER: " ${CMAKE_CXX_COMPILER_ID})
# SDL 2
set(SDL2_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/3rdparty/SDL2/include/)
set(SDL2_LIBRARY_DIR ${CMAKE_SOURCE_DIR}/3rdparty/SDL2/build/)
set(SDL2_LIBRARY "SDL2")
include_directories(${SDL2_INCLUDE_DIR})
link_directories(${SDL2_LIBRARY_DIR})
# CUDA
find_package(CUDA)
if (NOT CUDA_FOUND)
# find_package(CUDA REQUIRED) gives a confusing error message if it fails,
# therefore we print the reason here explicitly
message(FATAL_ERROR "CUDA not found")
endif()
include_directories(${CUDA_INCLUDE_DIRS})
# OpenMP
find_package(OpenMP)
if (NOT OPENMP_FOUND)
message(WARNING "OpenMP not found. All host-side concurrency disabled \
(lower performance).")
else ()
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
endif()
#----------------------Compilation settings-----------------------------------#
#Debug and verification
#set(CMAKE_VERBOSE_MAKEFILE OFF)
#set(CXX_VERBOSE_BUILD OFF)
#set(CUDA_VERBOSE_BUILD OFF)
#include(CTest)
#add_test(ac_test ac_run)
#find_program(MEMORYCHECK_COMMAND valgrind)
#set(MEMORYCHECK_COMMAND_OPTIONS "--trace-children=yes --leak-check=full" )
#----------------------Setup defines------------------------------------------#
if (DOUBLE_PRECISION)
add_definitions(-DAC_DOUBLE_PRECISION=1)
else()
add_definitions(-DAC_DOUBLE_PRECISION=0)
endif()
# A full integration step is benchmarked by default, use this flag to override and
# benchmark RK3 only
if (GEN_BENCHMARK_RK3)
add_definitions(-DGEN_BENCHMARK_RK3=1)
else()
add_definitions(-DGEN_BENCHMARK_RK3=0)
endif()
if (MULTIGPU_ENABLED)
add_definitions(-DAC_MULTIGPU_ENABLED=1)
else()
add_definitions(-DAC_MULTIGPU_ENABLED=0)
endif()
#-----------------------TIARA specific options--------------------------------#
#OLD#set (CXX_FLAGS_TIARA "-I/software/opt/cuda/9.0/include/")
# %JP: NOTE! This should not be needed anymore because the command
# find_package(CUDA) above should find and include this directory automatically
#USE THIS:
if (TIARA_CLUSTER)
set (CXX_FLAGS_TIARA "-mno-bmi2")
endif()
#----------------------Setup CXX compilation flags----------------------------#
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}\
-O2 -march=native -pipe")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}\
-O0 -g")
set (CXX_FLAGS_WARNING "-Wall -Wextra -Werror -Wno-error=unused-parameter\
-Wno-error=unused-function -Wno-error=unknown-pragmas")
# Also warn about implicit conversions if the compiler supports it
if (${CMAKE_CXX_COMPILER_ID} STREQUAL "GNU")
set (CXX_FLAGS_WARNING "${CXX_FLAGS_WARNING} -Wdouble-promotion -Wfloat-conversion")
endif()
# Other flags. -D_FORCE_INLINES is a workaround to some CUDA/C++ "feature"
# which botches the compilation ("memcpy was not declared in this scope")
# (Not required with cc >= 3.0)
#set(CXX_FLAGS_ETC "-D_FORCE_INLINES")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}\
${CXX_FLAGS_WARNING}\
${CXX_FLAGS_ETC}\
${CXX_FLAGS_TIARA}") # %JP: CXX_FLAGS_TIARA should not be needed,
# see comments in "TIARA specific options"
message("CXX_FLAGS: " ${CMAKE_CXX_FLAGS})
#----------------------Setup core subdirectories------------------------------#
#Include root directory (.) so that the following modules can include their
#parent dir (f.ex. #include "common/stuff.h" instead of "../common/stuff")
include_directories(.)
include_directories(include)
include_directories(src)
# CUDA sources
add_subdirectory(src/core)
#----------------------Link---------------------------------------------------#
if (BUILD_STANDALONE)
#Define the config directory
if (ALTER_CONF)
set(ASTAROTH_CONF_PATH "${CMAKE_BINARY_DIR}/")
else()
set(ASTAROTH_CONF_PATH "${CMAKE_SOURCE_DIR}/config/")
endif()
#Add additional subdirectories
add_subdirectory (src/standalone)
cuda_add_executable(ac_run src/standalone/main.cc)
target_link_libraries(ac_run astaroth_standalone astaroth_core ${SDL2_LIBRARY})
endif()

18
LICENCE.txt Normal file
View File

@@ -0,0 +1,18 @@
/*
Copyright (C) 2014-2018, Johannes Pekkilae, Miikka Vaeisalae.
This file is part of Astaroth.
Astaroth is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Astaroth is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
*/

118
README.md
View File

@@ -1,2 +1,118 @@
# Astaroth
# Astaroth - A Multi-GPU library for generic stencil computations
Astaroth is a single-node multi-GPU library for multiphysics and other problems, which involve stencil computations in a discrete mesh. It's licenced under the terms of the GNU General Public Licence, version 3, or later (see [LICENCE.txt](https://bitbucket.org/miikkavaisala/astaroth-code/src/master/astaroth_2.0/LICENCE.txt)). Astaroth ships with a domain-specific language, that can be used to translate high-level representation of the stencil computations into a heavily inlined GPU pipeline.
## System requirements
NVIDIA GPU with >= 3.0 compute capability. See https://en.wikipedia.org/wiki/CUDA#GPUs_supported.
## Building (3rd party libraries)
1. `cd 3rdparty`
1. `./setup_dependencies.sh` Note: this may take some time.
## Building (Astaroth 2.0)
1. `cd astaroth_2.0/build`
1. `cmake -DDOUBLE_PRECISION=OFF -DBUILD_DEBUG=OFF ..` (Use `cmake -D CMAKE_C_COMPILER=icc -D CMAKE_CXX_COMPILER=icpc -DDOUBLE_PRECISION=OFF -DBUILD_DEBUG=OFF ..` if compiling on TIARA)
1. `../scripts/compile_acc.sh && make -j`
1. `./ac_run <options>`
If you encounter issues, recheck that the 3rd party libraries were successfully built during the previous step.
### Available options
- `-s` simulation
- `-b` benchmark
- `-t` automated test (NOTE! This is expected to fail with the default configuration as there's no CPU model solution for forcing/entropy)
By default, the program does a real-time visualization of the simulation domain. The camera and the initial conditions can be controller by `arrow keys`, `pgup`, `pgdown` and `spacebar`.
## Generating documentation
Run `doxygen doxyfile` in astaroth_2.0 directory. The generated files can be found in `doc/doxygen`. The main page of the documentation will be at `dox/doxygen/astaroth_doc_html/index.html`.
## Formatting
If you have clang-format, you may run `scripts/fix_style.sh`. This script will recursively fix style of all the source files down from the current working directory. The script will ask for a confirmation before making any changes.
## Directory structure
## Coding style.
### In a nutshell
- Use [K&R indentation style](https://en.wikipedia.org/wiki/Indentation_style#K&R_style) and 4 space tabs.
- Line width is 100 characters
- Start function names after a linebreak in source files.
- [Be generous with `const` type qualifiers](https://isocpp.org/wiki/faq/const-correctness).
- When in doubt, see [Google C++ Style Guide](https://google.github.io/styleguide/cppguide.html).
### Header example:
```cpp
// Licence notice and doxygen description here
#pragma once
#include "avoid_including_headers_here.h"
/** Doxygen comments */
void global_function(void);
```
### Source example:
```cpp
#include "parent_header.h"
#include <standard_library_headers.h>
#include "other_headers.h"
#include "more_headers.h"
typedef struct {
int data;
} SomeStruct;
static inline int small_function(const SomeStruct& stuff) { return stuff.data; }
// Pass constant structs always by reference (&) and use const type qualifier.
// Modified structs are always passed as pointers (*), never as references.
// Constant parameters should be on the left-hand side, while non-consts go to the right.
static void
local_function(const SomeStruct& constant_struct, SomeStruct* modified_struct)
{
modified_struct->data = constant_struct.data;
}
void
global_function(void)
{
return;
}
```
## Miikka's compilation notes
Modules Modules usen when compiling when compiling
* intel/2016
* hdf5/1.8.16_openmpi_1.10.2_ic16.0
* cmake/3.9.5
* openmpi/1.10.2_ic16.0
* gcc/5.3.0
* cuda/9.0
Requires this gcc flag to compile: `-mno-bmi2` Otherwise you get assembler error!
For stencil pre-processing `flex` and particularly `libfl` is required for `acc/code_generator.c` to compile.
Need CUDA version 9.2 or above version.
Comment out cudaGetDeviceCount(&num_devices) in astaroth.cu
OLD: `astaroth_2.0/acc/build.sh` only work when each line is written individually. (**solution needed**)
(**These are here because I don't dare to delete them yet** OLD: Intel compiler does not get correct flags with cmake on default settings.
This worked with 1.0: `cmake -D CMAKE_C_COMPILER=icc -D CMAKE_CXX_COMPILER=icpc -DDOUBLE_PRECISION=OFF -DBUILD_DEBUG=OFF ..`
but not this time. Issue with calling c+11 + definin compiler flags correctly in nvcc.
OLD: I need to put `-I/software/opt/cuda/9.0/include` into the ../CMakeLists.txt so that it compiles. )

5
acc/.gitignore vendored Normal file
View File

@@ -0,0 +1,5 @@
build
testbin
# Except this file
!.gitignore

42
acc/README.md Normal file
View File

@@ -0,0 +1,42 @@
# Dependencies
## Debian/Ubuntu
`apt install flex bison build-essential`
# Usage
* `./build_acc.sh # Builds the ASPL compiler (acc)`
* `./compile.sh <.sps or .sas source> # Compiles the given stage into CUDA`
* `./test.sh # Tries to compile the sample stages`
* `./clean.sh # Removed directories generated by build_acc.sh and test.sh`
## Example
- `./compile.sh src/stencil_assembly.sas # Generates stencil_assembly.cuh`
- `./compile.sh src/stencil_process.sps # Generates stencil_process.cuh`
# What happens under the hood
The compiler is made of a scanner (flex), parser (bison), implementation of the abstract syntax tree (AST) and a code generator.
The language is defined by tokens and grammars found in acc.l and acc.y. These files are given as input to flex and bison, which generate the scanning and parsing stages for the compiler. The resulting AST is defined in ast.h. Finally, we traverse the generated AST with our code generator, generating CUDA code.
## ACC compilation stages
### In short:
* Preprocess .ac
* Compile preprocessed .ac to .cuh
* Compile .cuh
### More detailed:
0. A Parser is generated: bison --verbose -d acc.y
0. A Scanner is generated: flex acc.l
0. The compiler is built: gcc -std=gnu11 code_generator.c acc.tab.c lex.yy.c -lfl
0. Source files (.sps and .sas) are preprocessed using the GCC preprocessor and cleaned from any residual directives which would be useful when compiling the code further with GCC. We do not need those when compiling with ACC and are not recognized by our grammar.
0. Either the stencil processing stage (.sps) or the stencil assembly stage (.sas) are generated by passing the preprocessed file to acc. This emits the final CUDA code.
0. Compilation is continued with the NVIDIA CUDA compiler
### Even more detailed:
The NVIDIA CUDA compiler compiles .cuh to .fatbin, which is embedded into a C++ binary containig host code of the program. A fatbin contains .cubin files, which contain the configuration of the GPU and the kernels in a streaming assembly code (.sass). We could also compile for a virtual architecture (.ptx) instead of the actual hardware-specific machine code (.cubin) by passing -code=compute_XX flag to nvcc, which would compile cuda sources at runtime (just-in-time compilation, JIT) when creating the CUDA context. However, we alway know which architecture we want to run the code on and JIT compilation would just increase the time to takes to launch the program.
nvcc -DAC_DOUBLE_PRECISION=1 -ptx --relocatable-device-code true -O3 -std=c++11 --maxrregcount=255 -ftz=true -gencode arch=compute_60,code=sm_60 device.cu -I ../../include -I ../../
nvcc -DAC_DOUBLE_PRECISION=1 -cubin --relocatable-device-code true -O3 -std=c++11 --maxrregcount=255 -ftz=true -gencode arch=compute_60,code=sm_60 device.cu -I ../../include -I ../../
cuobjdump --dump-sass device.cubin > device.sass

25
acc/build_acc.sh Executable file
View File

@@ -0,0 +1,25 @@
#!/bin/bash
cd `dirname $0` # Only operate in the same directory with this script
COMPILER_NAME="acc"
SRC_DIR=${PWD}/src
BUILD_DIR=${PWD}/build
echo "Created" ${BUILD_DIR}
mkdir -p ${BUILD_DIR}
cd ${BUILD_DIR}
echo ${BASE_DIR}
echo ${SRC_DIR}
echo ${BUILD_DIR}
# Generate Bison headers
bison --verbose -d ${SRC_DIR}/${COMPILER_NAME}.y
## Generate Flex sources and headers
flex ${SRC_DIR}/${COMPILER_NAME}.l
## Compile the ASPL compiler
gcc -std=gnu11 ${SRC_DIR}/code_generator.c ${COMPILER_NAME}.tab.c lex.yy.c -lfl -I ${BUILD_DIR} -I ${SRC_DIR} -o ${COMPILER_NAME}

5
acc/clean.sh Executable file
View File

@@ -0,0 +1,5 @@
#!/bin/bash
cd `dirname $0` # Only operate in the same directory with this script
rm -rf build testbin

24
acc/compile.sh Executable file
View File

@@ -0,0 +1,24 @@
#!/bin/bash
# Usage ./compile <source file>
ACC_DIR=`dirname $0`
FULL_NAME=$(basename -- $1)
FILENAME="${FULL_NAME%.*}"
EXTENSION="${FULL_NAME##*.}"
if [ "${EXTENSION}" = "sas" ]; then
echo "Generating stencil assembly stage ${FILENAME}.sas -> stencil_assembly.cuh"
COMPILE_FLAGS="-sas" # Generate stencil assembly stage
CUH_FILENAME="stencil_assembly.cuh"
elif [ "${EXTENSION}" = "sps" ]; then
echo "Generating stencil processing stage: ${FILENAME}.sps -> stencil_process.cuh"
COMPILE_FLAGS="-sps" # Generate stencil processing stage
CUH_FILENAME="stencil_process.cuh"
else
echo "Error: unknown extension" ${EXTENSION} "of file" ${FULL_NAME}
echo "Extension should be either .sas or .sps"
exit
fi
${ACC_DIR}/preprocess.sh $1 | ${ACC_DIR}/build/acc ${COMPILE_FLAGS} > ${CUH_FILENAME}

View File

@@ -0,0 +1,26 @@
Preprocessed Scalar
value(in Scalar vertex)
{
return vertex[vertexIdx];
}
Preprocessed Vector
gradient(in Scalar vertex)
{
return (Vector){derx(vertexIdx, vertex),
dery(vertexIdx, vertex),
derz(vertexIdx, vertex)};
}
Preprocessed Matrix
hessian(in Scalar vertex)
{
Matrix hessian;
hessian.row[0] = (Vector){derxx(vertexIdx, vertex), derxy(vertexIdx, vertex), derxz(vertexIdx, vertex)};
hessian.row[1] = (Vector){hessian.row[0].y, deryy(vertexIdx, vertex), deryz(vertexIdx, vertex)};
hessian.row[2] = (Vector){hessian.row[0].z, hessian.row[1].z, derzz(vertexIdx, vertex)};
return hessian;
}

View File

@@ -0,0 +1,265 @@
#define LINDUCTION (1)
#define LENTROPY (1)
#define LTEMPERATURE (0)
#define LGRAVITY (0)
// Declare uniforms (i.e. device constants)
uniform Scalar cs2_sound;
uniform Scalar nu_visc;
uniform Scalar cp_sound;
uniform Scalar cv_sound;
uniform Scalar mu0;
uniform Scalar eta;
uniform Scalar gamma;
uniform Scalar zeta;
uniform int nx_min;
uniform int ny_min;
uniform int nz_min;
uniform int nx;
uniform int ny;
uniform int nz;
Vector
value(in Vector uu)
{
return (Vector){value(uu.x), value(uu.y), value(uu.z)};
}
Matrix
gradients(in Vector uu)
{
return (Matrix){gradient(uu.x), gradient(uu.y), gradient(uu.z)};
}
Scalar
continuity(in Vector uu, in Scalar lnrho) {
return -dot(value(uu), gradient(lnrho)) - divergence(uu);
}
#if LENTROPY
Vector
momentum(in Vector uu, in Scalar lnrho, in Scalar ss, in Vector aa) {
const Matrix S = stress_tensor(uu);
const Scalar cs2 = cs2_sound * exp(gamma * value(ss) / cp_sound + (gamma - 1) * (value(lnrho) - LNRHO0));
const Vector j = (Scalar(1.) / mu0) * (gradient_of_divergence(aa) - laplace_vec(aa)); // Current density
const Vector B = curl(aa);
const Scalar inv_rho = Scalar(1.) / exp(value(lnrho));
// Regex replace CPU constants with get\(AC_([a-zA-Z_0-9]*)\)
// \1
const Vector mom = - mul(gradients(uu), value(uu))
- cs2 * ((Scalar(1.) / cp_sound) * gradient(ss) + gradient(lnrho))
+ inv_rho * cross(j, B)
+ nu_visc * (
laplace_vec(uu)
+ Scalar(1. / 3.) * gradient_of_divergence(uu)
+ Scalar(2.) * mul(S, gradient(lnrho))
)
+ zeta * gradient_of_divergence(uu);
return mom;
}
#elif LTEMPERATURE
Vector
momentum(in Vector uu, in Scalar lnrho, in Scalar tt) {
Vector mom;
const Matrix S = stress_tensor(uu);
const Vector pressure_term = (cp_sound - cv_sound) * (gradient(tt) + value(tt) * gradient(lnrho));
mom = -mul(gradients(uu), value(uu)) -
pressure_term +
nu_visc *
(laplace_vec(uu) + Scalar(1. / 3.) * gradient_of_divergence(uu) +
Scalar(2.) * mul(S, gradient(lnrho))) + zeta * gradient_of_divergence(uu);
#if LGRAVITY
mom = mom - (Vector){0, 0, -10.0};
#endif
return mom;
}
#else
Vector
momentum(in Vector uu, in Scalar lnrho) {
Vector mom;
const Matrix S = stress_tensor(uu);
// Isothermal: we have constant speed of sound
mom = -mul(gradients(uu), value(uu)) -
cs2_sound * gradient(lnrho) +
nu_visc *
(laplace_vec(uu) + Scalar(1. / 3.) * gradient_of_divergence(uu) +
Scalar(2.) * mul(S, gradient(lnrho))) + zeta * gradient_of_divergence(uu);
#if LGRAVITY
mom = mom - (Vector){0, 0, -10.0};
#endif
return mom;
}
#endif
Vector
induction(in Vector uu, in Vector aa) {
// Note: We do (-nabla^2 A + nabla(nabla dot A)) instead of (nabla x (nabla
// x A)) in order to avoid taking the first derivative twice (did the math,
// yes this actually works. See pg.28 in arXiv:astro-ph/0109497)
// u cross B - ETA * mu0 * (mu0^-1 * [- laplace A + grad div A ])
const Vector B = curl(aa);
const Vector grad_div = gradient_of_divergence(aa);
const Vector lap = laplace_vec(aa);
// Note, mu0 is cancelled out
const Vector ind = cross(value(uu), B) - eta * (grad_div - lap);
return ind;
}
#if LENTROPY
Scalar
lnT( in Scalar ss, in Scalar lnrho) {
const Scalar lnT = LNT0 + gamma * value(ss) / cp_sound +
(gamma - Scalar(1.)) * (value(lnrho) - LNRHO0);
return lnT;
}
// Nabla dot (K nabla T) / (rho T)
Scalar
heat_conduction( in Scalar ss, in Scalar lnrho) {
const Scalar inv_cp_sound = AcReal(1.) / cp_sound;
const Vector grad_ln_chi = - gradient(lnrho);
const Scalar first_term = gamma * inv_cp_sound * laplace(ss) +
(gamma - AcReal(1.)) * laplace(lnrho);
const Vector second_term = gamma * inv_cp_sound * gradient(ss) +
(gamma - AcReal(1.)) * gradient(lnrho);
const Vector third_term = gamma * (inv_cp_sound * gradient(ss) +
gradient(lnrho)) + grad_ln_chi;
const Scalar chi = AC_THERMAL_CONDUCTIVITY / (exp(value(lnrho)) * cp_sound);
return cp_sound * chi * (first_term + dot(second_term, third_term));
}
Scalar
heating(const int i, const int j, const int k) {
return 1;
}
Scalar
entropy(in Scalar ss, in Vector uu, in Scalar lnrho, in Vector aa) {
const Matrix S = stress_tensor(uu);
const Scalar inv_pT = Scalar(1.) / (exp(value(lnrho)) * exp(lnT(ss, lnrho)));
const Vector j = (Scalar(1.) / mu0) * (gradient_of_divergence(aa) - laplace_vec(aa)); // Current density
const Scalar RHS = H_CONST - C_CONST
+ eta * (mu0) * dot(j, j)
+ Scalar(2.) * exp(value(lnrho)) * nu_visc * contract(S)
+ zeta * exp(value(lnrho)) * divergence(uu) * divergence(uu);
return - dot(value(uu), gradient(ss))
+ inv_pT * RHS
+ heat_conduction(ss, lnrho);
}
#endif
#if LTEMPERATURE
Scalar
heat_transfer(in Vector uu, in Scalar lnrho, in Scalar tt)
{
const Matrix S = stress_tensor(uu);
const Scalar heat_diffusivity_k = 0.0008; //8e-4;
return -dot(value(uu), gradient(tt)) + heat_diffusivity_k * laplace(tt) + heat_diffusivity_k * dot(gradient(lnrho), gradient(tt)) + nu_visc * contract(S) * (Scalar(1.) / cv_sound) - (gamma - 1) * value(tt) * divergence(uu);
}
#endif
// Declare input and output arrays using locations specified in the
// array enum in astaroth.h
in Scalar lnrho = VTXBUF_LNRHO;
out Scalar out_lnrho = VTXBUF_LNRHO;
in Vector uu = (int3) {VTXBUF_UUX, VTXBUF_UUY, VTXBUF_UUZ};
out Vector out_uu = (int3) {VTXBUF_UUX,VTXBUF_UUY,VTXBUF_UUZ};
#if LINDUCTION
in Vector aa = (int3) {VTXBUF_AX,VTXBUF_AY,VTXBUF_AZ};
out Vector out_aa = (int3) {VTXBUF_AX,VTXBUF_AY,VTXBUF_AZ};
#endif
#if LENTROPY
in Scalar ss = VTXBUF_ENTROPY;
out Scalar out_ss = VTXBUF_ENTROPY;
#endif
#if LTEMPERATURE
in Scalar tt = VTXBUF_TEMPERATURE;
out Scalar out_tt = VTXBUF_TEMPERATURE;
#endif
Kernel void
solve(Scalar dt) {
out_lnrho = rk3(out_lnrho, lnrho, continuity(uu, lnrho), dt);
#if LINDUCTION
out_aa = rk3(out_aa, aa, induction(uu, aa), dt);
#endif
#if LENTROPY
out_uu = rk3(out_uu, uu, momentum(uu, lnrho, ss, aa), dt);
out_ss = rk3(out_ss, ss, entropy(ss, uu, lnrho, aa), dt);
#elif LTEMPERATURE
out_uu =rk3(out_uu, uu, momentum(uu, lnrho, tt), dt);
out_tt = rk3(out_tt, tt, heat_transfer(uu, lnrho, tt), dt);
#else
out_uu = rk3(out_uu, uu, momentum(uu, lnrho), dt);
#endif
}

4
acc/preprocess.sh Executable file
View File

@@ -0,0 +1,4 @@
#!/bin/bash
# Preprocesses the give file using GCC. This script is usually automatically called in
# ./compile.sh, but may be called also individually for debugging purposes.
gcc -E -x c ${@} | sed "s/#.*//g"

View File

@@ -0,0 +1,228 @@
#define LINDUCTION (1)
#define LENTROPY (1)
// Declare uniforms (i.e. device constants)
uniform Scalar cs2_sound;
uniform Scalar nu_visc;
uniform Scalar cp_sound;
uniform Scalar mu0;
uniform Scalar eta;
uniform Scalar gamma;
uniform Scalar chi;
uniform Scalar zeta;
uniform int nx_min;
uniform int ny_min;
uniform int nz_min;
uniform int nx;
uniform int ny;
uniform int nz;
uniform Scalar xorig;
uniform Scalar yorig;
uniform Scalar zorig;
//Star position
uniform Scalar star_pos_x;
uniform Scalar star_pos_z;
uniform Scalar GM_star;
//Needed for gravity
uniform Scalar dsx;
uniform Scalar dsy;
uniform Scalar dsz;
uniform Scalar inv_dsx;
uniform Scalar inv_dsy;
uniform Scalar inv_dsz;
Scalar
distance_x(Vector a, Vector b)
{
return sqrt(dot(a-b, a-b));
}
Vector
value(in Vector uu)
{
return (Vector){value(uu.x), value(uu.y), value(uu.z)};
}
Matrix
gradients(in Vector uu)
{
return (Matrix){gradient(uu.x), gradient(uu.y), gradient(uu.z)};
}
Scalar
continuity(in Vector uu, in Scalar lnrho) {
return -dot(value(uu), gradient(lnrho)) - divergence(uu);
}
// Gravitation for in negative x-direction.
Vector
grav_force_line(const int3 vertexIdx)
{
Vector vertex_pos = (Vector){dsx * vertexIdx.x - xorig, dsy * vertexIdx.y - yorig, dsz * vertexIdx.z - zorig};
Vector star_pos = (Vector){star_pos_x, dsy * vertexIdx.y - yorig, dsz * vertexIdx.z - zorig};
const Scalar RR = vertex_pos.x - star_pos.x;
const Scalar G_force_abs = GM_star / (RR*RR); // Force per unit mass;
Vector G_force = (Vector){ - G_force_abs,
AcReal(0.0),
AcReal(0.0)};
return G_force;
}
#if LENTROPY
Vector
momentum(in Vector uu, in Scalar lnrho, in Scalar ss, in Vector aa, const int3 vertexIdx) {
Vector mom;
const Matrix S = stress_tensor(uu);
mom = -mul(gradients(uu), value(uu)) -
cs2_sound * gradient(lnrho) +
nu_visc *
(laplace_vec(uu) + Scalar(1. / 3.) * gradient_of_divergence(uu) +
Scalar(2.) * mul(S, gradient(lnrho))) + zeta * gradient_of_divergence(uu);
mom = mom - cs2_sound * (Scalar(1.) / cp_sound) * gradient(ss);
const Vector grad_div = gradient_of_divergence(aa);
const Vector lap = laplace_vec(aa);
const Vector j = (Scalar(1.) / mu0) * (grad_div - lap);
const Vector B = curl(aa);
mom = mom + (Scalar(1.) / exp(value(lnrho))) * cross(j, B);
mom = mom + grav_force_line(vertexIdx);
return mom;
}
#else
Vector
momentum(in Vector uu, in Scalar lnrho, const int3 vertexIdx) {
Vector mom;
const Matrix S = stress_tensor(uu);
mom = -mul(gradients(uu), value(uu)) -
cs2_sound * gradient(lnrho) +
nu_visc *
(laplace_vec(uu) + Scalar(1. / 3.) * gradient_of_divergence(uu) +
Scalar(2.) * mul(S, gradient(lnrho))) + zeta * gradient_of_divergence(uu);
mom = mom + grav_force_line(vertexIdx);
return mom;
}
#endif
Vector
induction(in Vector uu, in Vector aa) {
// Note: We do (-nabla^2 A + nabla(nabla dot A)) instead of (nabla x (nabla
// x A)) in order to avoid taking the first derivative twice (did the math,
// yes this actually works. See pg.28 in arXiv:astro-ph/0109497)
// u cross B - ETA * mu0 * (mu0^-1 * [- laplace A + grad div A ])
const Vector B = curl(aa);
const Vector grad_div = gradient_of_divergence(aa);
const Vector lap = laplace_vec(aa);
// Note, mu0 is cancelled out
const Vector ind = cross(value(uu), B) - eta * (grad_div - lap);
return ind;
}
#if LENTROPY
Scalar
lnT( in Scalar ss, in Scalar lnrho) {
const Scalar lnT = LNT0 + value(ss) / cp_sound +
(gamma - AcReal(1.)) * (value(lnrho) - LNRHO0);
return lnT;
}
// Nabla dot (K nabla T) / (rho T)
Scalar
heat_conduction( in Scalar ss, in Scalar lnrho) {
const Scalar inv_cp_sound = AcReal(1.) / cp_sound;
const Vector grad_ln_chi = (Vector) {
0,
0,
0
}; // TODO not used
const Scalar first_term = gamma * inv_cp_sound * laplace(ss) +
(gamma - AcReal(1.)) * laplace(lnrho);
const Vector second_term = gamma * inv_cp_sound * gradient(ss) +
(gamma - AcReal(1.)) * gradient(lnrho);
const Vector third_term = gamma * (inv_cp_sound * gradient(ss) +
gradient(lnrho)) + grad_ln_chi;
return cp_sound * chi * (first_term + dot(second_term, third_term));
}
Scalar
heating(const int i, const int j, const int k) {
return 1;
}
Scalar
entropy(in Scalar ss, in Vector uu, in Scalar lnrho, in Vector aa) {
const Matrix S = stress_tensor(uu);
// nabla x nabla x A / mu0 = nabla(nabla dot A) - nabla^2(A)
const Vector j = gradient_of_divergence(aa) - laplace_vec(aa);
const Scalar inv_pT = AcReal(1.) / (exp(value(lnrho)) + exp(lnT(ss, lnrho)));
return -dot(value(uu), gradient(ss)) +
inv_pT * (H_CONST - C_CONST +
eta * mu0 * dot(j, j) +
AcReal(2.) * exp(value(lnrho)) * nu_visc * contract(S) +
zeta * exp(value(lnrho)) * divergence(uu) * divergence(uu)
) + heat_conduction(ss, lnrho);
}
#endif
// Declare input and output arrays using locations specified in the
// array enum in astaroth.h
in Scalar lnrho = VTXBUF_LNRHO;
out Scalar out_lnrho = VTXBUF_LNRHO;
in Vector uu = (int3) {VTXBUF_UUX, VTXBUF_UUY, VTXBUF_UUZ};
out Vector out_uu = (int3) {VTXBUF_UUX,VTXBUF_UUY,VTXBUF_UUZ};
#if LINDUCTION
in Vector aa = (int3) {VTXBUF_AX,VTXBUF_AY,VTXBUF_AZ};
out Vector out_aa = (int3) {VTXBUF_AX,VTXBUF_AY,VTXBUF_AZ};
#endif
#if LENTROPY
in Scalar ss = VTXBUF_ENTROPY;
out Scalar out_ss = VTXBUF_ENTROPY;
#endif
Kernel void
solve(Scalar dt) {
WRITE(out_lnrho, RK3(out_lnrho, lnrho, continuity(uu, lnrho), dt));
#if LINDUCTION
WRITE(out_aa, RK3(out_aa, aa, induction(uu, aa), dt));
#endif
#if LENTROPY
WRITE(out_uu, RK3(out_uu, uu, momentum(uu, lnrho, ss, aa, vertexIdx), dt));
WRITE(out_ss, RK3(out_ss, ss, entropy(ss, uu, lnrho, aa), dt));
#else
WRITE(out_uu, RK3(out_uu, uu, momentum(uu, lnrho, vertexIdx), dt));
#endif
}

View File

@@ -0,0 +1,169 @@
// Declare uniforms (i.e. device constants)
uniform Scalar cs2_sound;
uniform Scalar nu_visc;
uniform Scalar cp_sound;
uniform Scalar mu0;
uniform Scalar eta;
uniform Scalar gamma;
uniform Scalar chi;
uniform Scalar zeta;
uniform Scalar xorig;
uniform Scalar yorig;
uniform Scalar zorig;
//Star position
uniform Scalar star_pos_x;
uniform Scalar star_pos_z;
uniform Scalar GM_star;
uniform int nx_min;
uniform int ny_min;
uniform int nz_min;
uniform int nx;
uniform int ny;
uniform int nz;
//Needed for gravity
uniform Scalar dsx;
uniform Scalar dsy;
uniform Scalar dsz;
uniform Scalar inv_dsx;
uniform Scalar inv_dsy;
uniform Scalar inv_dsz;
Scalar
distance_x(Vector a, Vector b)
{
return sqrt(dot(a-b, a-b));
}
Vector
value(in Vector uu)
{
return (Vector){value(uu.x), value(uu.y), value(uu.z)};
}
Matrix
gradients(in Vector uu)
{
return (Matrix){gradient(uu.x), gradient(uu.y), gradient(uu.z)};
}
Scalar
continuity(in Vector uu, in Scalar lnrho) {
return -dot(value(uu), gradient(lnrho)) - divergence(uu);
}
// "Line-like" gravity with no y-component
Vector
grav_force_line(const int3 vertexIdx)
{
Vector vertex_pos = (Vector){dsx * vertexIdx.x - xorig, dsy * vertexIdx.y - yorig, dsz * vertexIdx.z - zorig};
Vector star_pos = (Vector){star_pos_x, dsy * vertexIdx.y - yorig, dsz * vertexIdx.z - zorig};
const Scalar RR = vertex_pos.x - star_pos.x;
const Scalar G_force_abs = GM_star / (RR*RR); // Force per unit mass;
Vector G_force = (Vector){ - G_force_abs,
AcReal(0.0),
AcReal(0.0)};
return G_force;
}
Vector
momentum(in Vector uu, in Scalar lnrho, const int3 vertexIdx) {
Vector mom;
const Matrix S = stress_tensor(uu);
mom = -mul(gradients(uu), value(uu)) -
cs2_sound * gradient(lnrho) +
nu_visc *
(laplace_vec(uu) + Scalar(1. / 3.) * gradient_of_divergence(uu) +
Scalar(2.) * mul(S, gradient(lnrho))) + zeta * gradient_of_divergence(uu)
+ grav_force_line(vertexIdx);
return mom;
}
Vector
induction(in Vector uu, in Vector aa) {
// Note: We do (-nabla^2 A + nabla(nabla dot A)) instead of (nabla x (nabla
// x A)) in order to avoid taking the first derivative twice (did the math,
// yes this actually works. See pg.28 in arXiv:astro-ph/0109497)
// u cross B - ETA * mu0 * (mu0^-1 * [- laplace A + grad div A ])
const Vector B = curl(aa);
const Vector grad_div = gradient_of_divergence(aa);
const Vector lap = laplace_vec(aa);
// Note, mu0 is cancelled out
const Vector ind = cross(value(uu), B) - eta * (grad_div - lap);
return ind;
}
// Declare input and output arrays using locations specified in the
// array enum in astaroth.h
in Scalar lnrho = VTXBUF_LNRHO;
out Scalar out_lnrho = VTXBUF_LNRHO;
in Vector uu = (int3) {VTXBUF_UUX, VTXBUF_UUY, VTXBUF_UUZ};
out Vector out_uu = (int3) {VTXBUF_UUX,VTXBUF_UUY,VTXBUF_UUZ};
#if LINDUCTION
in Vector aa = (int3) {VTXBUF_AX,VTXBUF_AY,VTXBUF_AZ};
out Vector out_aa = (int3) {VTXBUF_AX,VTXBUF_AY,VTXBUF_AZ};
#endif
Kernel void
solve(Scalar dt) {
WRITE(out_lnrho, RK3(out_lnrho, lnrho, continuity(uu, lnrho), dt));
#if LINDUCTION
WRITE(out_aa, RK3(out_aa, aa, induction(uu, aa), dt));
#endif
WRITE(out_uu, RK3(out_uu, uu, momentum(uu, lnrho, vertexIdx), dt));
}

View File

@@ -0,0 +1,174 @@
// Declare uniforms (i.e. device constants)
uniform Scalar cs2_sound;
uniform Scalar nu_visc;
uniform Scalar cp_sound;
uniform Scalar mu0;
uniform Scalar eta;
uniform Scalar gamma;
uniform Scalar chi;
uniform Scalar zeta;
uniform Scalar xorig;
uniform Scalar yorig;
uniform Scalar zorig;
//Star position
uniform Scalar star_pos_x;
uniform Scalar star_pos_z;
uniform Scalar GM_star;
uniform int nx_min;
uniform int ny_min;
uniform int nz_min;
uniform int nx;
uniform int ny;
uniform int nz;
//Needed for gravity
uniform Scalar dsx;
uniform Scalar dsy;
uniform Scalar dsz;
uniform Scalar inv_dsx;
uniform Scalar inv_dsy;
uniform Scalar inv_dsz;
Scalar
distance(Vector a, Vector b)
{
return sqrt(dot(a-b, a-b));
}
Vector
value(in Vector uu)
{
return (Vector){value(uu.x), value(uu.y), value(uu.z)};
}
Matrix
gradients(in Vector uu)
{
return (Matrix){gradient(uu.x), gradient(uu.y), gradient(uu.z)};
}
Scalar
continuity(in Vector uu, in Scalar lnrho) {
return -dot(value(uu), gradient(lnrho)) - divergence(uu);
}
// "Line-like" gravity with no y-component
Vector
grav_force_line(const int3 vertexIdx)
{
Vector vertex_pos = (Vector){dsx * vertexIdx.x - xorig, dsy * vertexIdx.y - yorig, dsz * vertexIdx.z - zorig};
//Vector star_pos = (Vector){star_pos_x - xorig, dsy * vertexIdx.y - yorig, star_pos_z - zorig};
Vector star_pos = (Vector){star_pos_x, dsy * vertexIdx.y - yorig, star_pos_z};
//LIKE THIS: Vector star_pos = (Vector){star_pos_x, 0.0, star_pos_z};
const Scalar RR = distance(star_pos, vertex_pos);
const Scalar G_force_abs = GM_star / (RR*RR); // Force per unit mass;
//const Scalar G_force_abs = 1.0; // Simple temp. test;
Vector G_force = (Vector){ - G_force_abs*((vertex_pos.x-star_pos.x)/RR),
AcReal(0.0),
- G_force_abs*((vertex_pos.z-star_pos.z)/RR)};
//printf("G_force %e %e %e", G_force_abs.x, G_force_abs.y, G_force_abs.z)
return G_force;
}
Vector
momentum(in Vector uu, in Scalar lnrho, const int3 vertexIdx) {
Vector mom;
const Matrix S = stress_tensor(uu);
mom = -mul(gradients(uu), value(uu)) -
cs2_sound * gradient(lnrho) +
nu_visc *
(laplace_vec(uu) + Scalar(1. / 3.) * gradient_of_divergence(uu) +
Scalar(2.) * mul(S, gradient(lnrho))) + zeta * gradient_of_divergence(uu)
+ grav_force_line(vertexIdx);
return mom;
}
Vector
induction(in Vector uu, in Vector aa) {
// Note: We do (-nabla^2 A + nabla(nabla dot A)) instead of (nabla x (nabla
// x A)) in order to avoid taking the first derivative twice (did the math,
// yes this actually works. See pg.28 in arXiv:astro-ph/0109497)
// u cross B - ETA * mu0 * (mu0^-1 * [- laplace A + grad div A ])
const Vector B = curl(aa);
const Vector grad_div = gradient_of_divergence(aa);
const Vector lap = laplace_vec(aa);
// Note, mu0 is cancelled out
const Vector ind = cross(value(uu), B) - eta * (grad_div - lap);
return ind;
}
// Declare input and output arrays using locations specified in the
// array enum in astaroth.h
in Scalar lnrho = VTXBUF_LNRHO;
out Scalar out_lnrho = VTXBUF_LNRHO;
in Vector uu = (int3) {VTXBUF_UUX, VTXBUF_UUY, VTXBUF_UUZ};
out Vector out_uu = (int3) {VTXBUF_UUX,VTXBUF_UUY,VTXBUF_UUZ};
#if LINDUCTION
in Vector aa = (int3) {VTXBUF_AX,VTXBUF_AY,VTXBUF_AZ};
out Vector out_aa = (int3) {VTXBUF_AX,VTXBUF_AY,VTXBUF_AZ};
#endif
Kernel void
solve(Scalar dt) {
WRITE(out_lnrho, RK3(out_lnrho, lnrho, continuity(uu, lnrho), dt));
#if LINDUCTION
WRITE(out_aa, RK3(out_aa, aa, induction(uu, aa), dt));
#endif
WRITE(out_uu, RK3(out_uu, uu, momentum(uu, lnrho, vertexIdx), dt));
}

View File

@@ -0,0 +1,233 @@
#define LINDUCTION (1)
#define LENTROPY (1)
// Declare uniforms (i.e. device constants)
uniform Scalar cs2_sound;
uniform Scalar nu_visc;
uniform Scalar cp_sound;
uniform Scalar mu0;
uniform Scalar eta;
uniform Scalar gamma;
uniform Scalar chi;
uniform Scalar zeta;
uniform int nx_min;
uniform int ny_min;
uniform int nz_min;
uniform int nx;
uniform int ny;
uniform int nz;
uniform Scalar xorig;
uniform Scalar yorig;
uniform Scalar zorig;
//Star position
uniform Scalar star_pos_x;
uniform Scalar star_pos_z;
uniform Scalar GM_star;
//Needed for gravity
uniform Scalar dsx;
uniform Scalar dsy;
uniform Scalar dsz;
uniform Scalar inv_dsx;
uniform Scalar inv_dsy;
uniform Scalar inv_dsz;
Scalar
distance_x(Vector a, Vector b)
{
return sqrt(dot(a-b, a-b));
}
Vector
value(in Vector uu)
{
return (Vector){value(uu.x), value(uu.y), value(uu.z)};
}
Matrix
gradients(in Vector uu)
{
return (Matrix){gradient(uu.x), gradient(uu.y), gradient(uu.z)};
}
Scalar
continuity(in Vector uu, in Scalar lnrho) {
return -dot(value(uu), gradient(lnrho)) - divergence(uu);
}
// "Line-like" gravity with no y-component
Vector
grav_force_line(const int3 vertexIdx)
{
Vector vertex_pos = (Vector){dsx * vertexIdx.x - xorig, dsy * vertexIdx.y - yorig, dsz * vertexIdx.z - zorig};
//Vector star_pos = (Vector){star_pos_x - xorig, dsy * vertexIdx.y - yorig, star_pos_z - zorig};
Vector star_pos = (Vector){star_pos_x, dsy * vertexIdx.y - yorig, star_pos_z};
//LIKE THIS: Vector star_pos = (Vector){star_pos_x, 0.0, star_pos_z};
const Scalar RR = distance(star_pos, vertex_pos);
const Scalar G_force_abs = GM_star / (RR*RR); // Force per unit mass;
//const Scalar G_force_abs = 1.0; // Simple temp. test;
Vector G_force = (Vector){ - G_force_abs*((vertex_pos.x-star_pos.x)/RR),
AcReal(0.0),
- G_force_abs*((vertex_pos.z-star_pos.z)/RR)};
//printf("G_force %e %e %e", G_force_abs.x, G_force_abs.y, G_force_abs.z)
return G_force;
}
#if LENTROPY
Vector
momentum(in Vector uu, in Scalar lnrho, in Scalar ss, in Vector aa, const int3 vertexIdx) {
Vector mom;
const Matrix S = stress_tensor(uu);
mom = -mul(gradients(uu), value(uu)) -
cs2_sound * gradient(lnrho) +
nu_visc *
(laplace_vec(uu) + Scalar(1. / 3.) * gradient_of_divergence(uu) +
Scalar(2.) * mul(S, gradient(lnrho))) + zeta * gradient_of_divergence(uu);
mom = mom - cs2_sound * (Scalar(1.) / cp_sound) * gradient(ss);
const Vector grad_div = gradient_of_divergence(aa);
const Vector lap = laplace_vec(aa);
const Vector j = (Scalar(1.) / mu0) * (grad_div - lap);
const Vector B = curl(aa);
mom = mom + (Scalar(1.) / exp(value(lnrho))) * cross(j, B);
mom = mom + grav_force_line(vertexIdx);
return mom;
}
#else
Vector
momentum(in Vector uu, in Scalar lnrho, const int3 vertexIdx) {
Vector mom;
const Matrix S = stress_tensor(uu);
mom = -mul(gradients(uu), value(uu)) -
cs2_sound * gradient(lnrho) +
nu_visc *
(laplace_vec(uu) + Scalar(1. / 3.) * gradient_of_divergence(uu) +
Scalar(2.) * mul(S, gradient(lnrho))) + zeta * gradient_of_divergence(uu);
mom = mom + grav_force_line(vertexIdx);
return mom;
}
#endif
Vector
induction(in Vector uu, in Vector aa) {
// Note: We do (-nabla^2 A + nabla(nabla dot A)) instead of (nabla x (nabla
// x A)) in order to avoid taking the first derivative twice (did the math,
// yes this actually works. See pg.28 in arXiv:astro-ph/0109497)
// u cross B - ETA * mu0 * (mu0^-1 * [- laplace A + grad div A ])
const Vector B = curl(aa);
const Vector grad_div = gradient_of_divergence(aa);
const Vector lap = laplace_vec(aa);
// Note, mu0 is cancelled out
const Vector ind = cross(value(uu), B) - eta * (grad_div - lap);
return ind;
}
#if LENTROPY
Scalar
lnT( in Scalar ss, in Scalar lnrho) {
const Scalar lnT = LNT0 + value(ss) / cp_sound +
(gamma - AcReal(1.)) * (value(lnrho) - LNRHO0);
return lnT;
}
// Nabla dot (K nabla T) / (rho T)
Scalar
heat_conduction( in Scalar ss, in Scalar lnrho) {
const Scalar inv_cp_sound = AcReal(1.) / cp_sound;
const Vector grad_ln_chi = (Vector) {
0,
0,
0
}; // TODO not used
const Scalar first_term = gamma * inv_cp_sound * laplace(ss) +
(gamma - AcReal(1.)) * laplace(lnrho);
const Vector second_term = gamma * inv_cp_sound * gradient(ss) +
(gamma - AcReal(1.)) * gradient(lnrho);
const Vector third_term = gamma * (inv_cp_sound * gradient(ss) +
gradient(lnrho)) + grad_ln_chi;
return cp_sound * chi * (first_term + dot(second_term, third_term));
}
Scalar
heating(const int i, const int j, const int k) {
return 1;
}
Scalar
entropy(in Scalar ss, in Vector uu, in Scalar lnrho, in Vector aa) {
const Matrix S = stress_tensor(uu);
// nabla x nabla x A / mu0 = nabla(nabla dot A) - nabla^2(A)
const Vector j = gradient_of_divergence(aa) - laplace_vec(aa);
const Scalar inv_pT = AcReal(1.) / (exp(value(lnrho)) + exp(lnT(ss, lnrho)));
return -dot(value(uu), gradient(ss)) +
inv_pT * (H_CONST - C_CONST +
eta * mu0 * dot(j, j) +
AcReal(2.) * exp(value(lnrho)) * nu_visc * contract(S) +
zeta * exp(value(lnrho)) * divergence(uu) * divergence(uu)
) + heat_conduction(ss, lnrho);
}
#endif
// Declare input and output arrays using locations specified in the
// array enum in astaroth.h
in Scalar lnrho = VTXBUF_LNRHO;
out Scalar out_lnrho = VTXBUF_LNRHO;
in Vector uu = (int3) {VTXBUF_UUX, VTXBUF_UUY, VTXBUF_UUZ};
out Vector out_uu = (int3) {VTXBUF_UUX,VTXBUF_UUY,VTXBUF_UUZ};
#if LINDUCTION
in Vector aa = (int3) {VTXBUF_AX,VTXBUF_AY,VTXBUF_AZ};
out Vector out_aa = (int3) {VTXBUF_AX,VTXBUF_AY,VTXBUF_AZ};
#endif
#if LENTROPY
in Scalar ss = VTXBUF_ENTROPY;
out Scalar out_ss = VTXBUF_ENTROPY;
#endif
Kernel void
solve(Scalar dt) {
WRITE(out_lnrho, RK3(out_lnrho, lnrho, continuity(uu, lnrho), dt));
#if LINDUCTION
WRITE(out_aa, RK3(out_aa, aa, induction(uu, aa), dt));
#endif
#if LENTROPY
WRITE(out_uu, RK3(out_uu, uu, momentum(uu, lnrho, ss, aa, vertexIdx), dt));
WRITE(out_ss, RK3(out_ss, ss, entropy(ss, uu, lnrho, aa), dt));
#else
WRITE(out_uu, RK3(out_uu, uu, momentum(uu, lnrho, vertexIdx), dt));
#endif
}

422
acc/samples/common_header.h Normal file
View File

@@ -0,0 +1,422 @@
/*
Copyright (C) 2014-2018, Johannes Pekkilae, Miikka Vaeisalae.
This file is part of Astaroth.
Astaroth is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Astaroth is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* @file
* \brief Brief info.
*
* Provides an interface to Astaroth. Contains all the necessary configuration
* structs and functions for running the code on multiple GPUs.
*
* All interface functions declared here (such as acInit()) operate all GPUs
* available in the node under the hood, and the user does not need any
* information about the decomposition, synchronization or such to use these
* functions.
*
*/
#pragma once
/* Prevent name mangling */
#ifdef __cplusplus
extern "C" {
#endif
#include <float.h> // FLT_EPSILON, etc
#include <stdlib.h> // size_t
#include <vector_types.h> // CUDA vector types (float4, etc)
/*
* =============================================================================
* Flags for auto-optimization
* =============================================================================
*/
#define AUTO_OPTIMIZE (0) // DEPRECATED TODO remove
#define BOUNDCONDS_OPTIMIZE (0)
#define GENERATE_BENCHMARK_DATA (0)
// Device info
#define REGISTERS_PER_THREAD (255)
#define MAX_REGISTERS_PER_BLOCK (65536)
#define MAX_THREADS_PER_BLOCK (1024)
#define MAX_TB_DIM (MAX_THREADS_PER_BLOCK)
#define NUM_ITERATIONS (10)
#define WARP_SIZE (32)
/*
* =============================================================================
* Compile-time constants used during simulation (user definable)
* =============================================================================
*/
#define STENCIL_ORDER (6)
///////////// PAD TEST
// NOTE: works only with nx is divisible by 32
//#define PAD_LEAD (32 - STENCIL_ORDER/2)
//#define PAD_SIZE (32 - STENCIL_ORDER)
///////////// PAD TEST
// L-prefix inherited from the old Astaroth, no idea what it means
// MV: L means a Logical switch variale, something having true of false value.
#define LFORCING (0) // Note: forcing is disabled currently in the files generated by acc (compiler of our DSL)
#define LINDUCTION (1)
#define LENTROPY (1)
#define LTEMPERATURE (0)
#define AC_THERMAL_CONDUCTIVITY (AcReal(0.001)) // TODO: make an actual config parameter
/*
* =============================================================================
* Identifiers used to construct the parameter lists for AcMeshInfo
* (IntParamType and RealParamType)
* (user definable)
* =============================================================================
*/
// clang-format off
#define AC_FOR_INT_PARAM_TYPES(FUNC)\
/* cparams */\
FUNC(AC_nx), \
FUNC(AC_ny), \
FUNC(AC_nz), \
FUNC(AC_mx), \
FUNC(AC_my), \
FUNC(AC_mz), \
FUNC(AC_nx_min), \
FUNC(AC_ny_min), \
FUNC(AC_nz_min), \
FUNC(AC_nx_max), \
FUNC(AC_ny_max), \
FUNC(AC_nz_max), \
/* Other */\
FUNC(AC_max_steps), \
FUNC(AC_save_steps), \
FUNC(AC_bin_steps), \
FUNC(AC_bc_type), \
/* Additional */\
FUNC(AC_mxy),\
FUNC(AC_nxy),\
FUNC(AC_nxyz)
#define AC_FOR_REAL_PARAM_TYPES(FUNC)\
/* cparams */\
FUNC(AC_dsx), \
FUNC(AC_dsy), \
FUNC(AC_dsz), \
FUNC(AC_dsmin), \
/* physical grid*/\
FUNC(AC_xlen), \
FUNC(AC_ylen), \
FUNC(AC_zlen), \
FUNC(AC_xorig), \
FUNC(AC_yorig), \
FUNC(AC_zorig), \
/*Physical units*/\
FUNC(AC_unit_density),\
FUNC(AC_unit_velocity),\
FUNC(AC_unit_length),\
/* properties of gravitating star*/\
FUNC(AC_star_pos_x),\
FUNC(AC_star_pos_y),\
FUNC(AC_star_pos_z),\
FUNC(AC_M_star),\
/* Run params */\
FUNC(AC_cdt), \
FUNC(AC_cdtv), \
FUNC(AC_cdts), \
FUNC(AC_nu_visc), \
FUNC(AC_cs_sound), \
FUNC(AC_eta), \
FUNC(AC_mu0), \
FUNC(AC_relhel), \
FUNC(AC_cp_sound), \
FUNC(AC_gamma), \
FUNC(AC_cv_sound), \
FUNC(AC_lnT0), \
FUNC(AC_lnrho0), \
FUNC(AC_zeta), \
FUNC(AC_trans),\
/* Other */\
FUNC(AC_bin_save_t), \
/* Initial condition params */\
FUNC(AC_ampl_lnrho), \
FUNC(AC_ampl_uu), \
FUNC(AC_angl_uu), \
FUNC(AC_lnrho_edge),\
FUNC(AC_lnrho_out),\
/* Additional helper params */\
/* (deduced from other params do not set these directly!) */\
FUNC(AC_G_CONST),\
FUNC(AC_GM_star),\
FUNC(AC_sq2GM_star),\
FUNC(AC_cs2_sound), \
FUNC(AC_inv_dsx), \
FUNC(AC_inv_dsy), \
FUNC(AC_inv_dsz)
// clang-format on
/*
* =============================================================================
* Identifiers for VertexBufferHandle
* (i.e. the arrays used to construct AcMesh)
* (user definable)
* =============================================================================
*/
// clang-format off
#define AC_FOR_HYDRO_VTXBUF_HANDLES(FUNC)\
FUNC(VTXBUF_LNRHO), \
FUNC(VTXBUF_UUX), \
FUNC(VTXBUF_UUY), \
FUNC(VTXBUF_UUZ), \
// FUNC(VTXBUF_DYE),
#if LINDUCTION
#define AC_FOR_INDUCTION_VTXBUF_HANDLES(FUNC)\
FUNC(VTXBUF_AX), \
FUNC(VTXBUF_AY), \
FUNC(VTXBUF_AZ),
#else
#define AC_FOR_INDUCTION_VTXBUF_HANDLES(FUNC)
#endif
#if LENTROPY
#define AC_FOR_ENTROPY_VTXBUF_HANDLES(FUNC)\
FUNC(VTXBUF_ENTROPY),
#else
#define AC_FOR_ENTROPY_VTXBUF_HANDLES(FUNC)
#endif
#if LTEMPERATURE
#define AC_FOR_TEMPERATURE_VTXBUF_HANDLES(FUNC)\
FUNC(VTXBUF_TEMPERATURE),
#else
#define AC_FOR_TEMPERATURE_VTXBUF_HANDLES(FUNC)
#endif
#define AC_FOR_VTXBUF_HANDLES(FUNC)\
AC_FOR_HYDRO_VTXBUF_HANDLES(FUNC)\
AC_FOR_INDUCTION_VTXBUF_HANDLES(FUNC)\
AC_FOR_ENTROPY_VTXBUF_HANDLES(FUNC)\
AC_FOR_TEMPERATURE_VTXBUF_HANDLES(FUNC)
// clang-format on
/*
* =============================================================================
* Single/double precision switch
* =============================================================================
*/
#if AC_DOUBLE_PRECISION == 1
typedef double AcReal;
typedef double3 AcReal3;
#define AC_REAL_MAX (DBL_MAX)
#define AC_REAL_MIN (DBL_MIN)
#define AC_REAL_EPSILON (DBL_EPSILON)
#else
typedef float AcReal;
typedef float3 AcReal3;
#define AC_REAL_MAX (FLT_MAX)
#define AC_REAL_MIN (FLT_MIN)
#define AC_REAL_EPSILON (FLT_EPSILON)
#endif
typedef struct {
AcReal3 row[3];
} AcMatrix;
/*
* =============================================================================
* Helper macros
* =============================================================================
*/
#define AC_GEN_ID(X) X
#define AC_GEN_STR(X) #X
/*
* =============================================================================
* Error codes
* =============================================================================
*/
typedef enum { AC_SUCCESS = 0, AC_FAILURE = 1 } AcResult;
/*
* =============================================================================
* Reduction types
* =============================================================================
*/
typedef enum {
RTYPE_MAX,
RTYPE_MIN,
RTYPE_RMS,
RTYPE_RMS_EXP,
NUM_REDUCTION_TYPES
} ReductionType;
/*
* =============================================================================
* Definitions for the enums and structs for AcMeshInfo (DO NOT TOUCH)
* =============================================================================
*/
typedef enum {
AC_FOR_INT_PARAM_TYPES(AC_GEN_ID),
NUM_INT_PARAM_TYPES
} AcIntParam;
typedef enum {
AC_FOR_REAL_PARAM_TYPES(AC_GEN_ID),
NUM_REAL_PARAM_TYPES
} AcRealParam;
extern const char* intparam_names[]; // Defined in astaroth.cu
extern const char* realparam_names[]; // Defined in astaroth.cu
typedef struct {
int int_params[NUM_INT_PARAM_TYPES];
AcReal real_params[NUM_REAL_PARAM_TYPES];
} AcMeshInfo;
/*
* =============================================================================
* Definitions for the enums and structs for AcMesh (DO NOT TOUCH)
* =============================================================================
*/
typedef enum {
AC_FOR_VTXBUF_HANDLES(AC_GEN_ID) NUM_VTXBUF_HANDLES
} VertexBufferHandle;
extern const char* vtxbuf_names[]; // Defined in astaroth.cu
/*
typedef struct {
AcReal* data;
} VertexBuffer;
*/
// NOTE: there's no particular benefit declaring AcMesh a class, since
// a library user may already have allocated memory for the vertex_buffers.
// But then we would allocate memory again when the user wants to start
// filling the class with data. => Its better to consider AcMesh as a
// payload-only struct
typedef struct {
AcReal* vertex_buffer[NUM_VTXBUF_HANDLES];
AcMeshInfo info;
} AcMesh;
#define AC_VTXBUF_SIZE(mesh_info) \
((size_t)(mesh_info.int_params[AC_mx] * mesh_info.int_params[AC_my] * \
mesh_info.int_params[AC_mz]))
#define AC_VTXBUF_SIZE_BYTES(mesh_info) \
(sizeof(AcReal) * AC_VTXBUF_SIZE(mesh_info))
#define AC_VTXBUF_COMPDOMAIN_SIZE(mesh_info) \
(mesh_info.int_params[AC_nx] * mesh_info.int_params[AC_ny] * \
mesh_info.int_params[AC_nz])
#define AC_VTXBUF_COMPDOMAIN_SIZE_BYTES(mesh_info) \
(sizeof(AcReal) * AC_VTXBUF_COMPDOMAIN_SIZE(mesh_info))
#define AC_VTXBUF_IDX(i, j, k, mesh_info) \
((i) + (j)*mesh_info.int_params[AC_mx] + \
(k)*mesh_info.int_params[AC_mx] * mesh_info.int_params[AC_my])
/*
* =============================================================================
* Astaroth interface
* =============================================================================
*/
/** Starting point of all GPU computation. Handles the allocation and
initialization of *all memory needed on all GPUs in the node*. In other words,
setups everything GPU-side so that calling any other GPU interface function
afterwards does not result in illegal memory accesses. */
AcResult acInit(const AcMeshInfo& mesh_info);
/** Splits the host_mesh and distributes it among the GPUs in the node */
AcResult acLoad(const AcMesh& host_mesh);
AcResult acLoadWithOffset(const AcMesh& host_mesh, const int3& start, const int num_vertices);
/** Does all three steps of the RK3 integration and computes the boundary
conditions when necessary. Note that the boundary conditions are not applied
after the final integration step.
The result can be fetched to CPU memory with acStore(). */
AcResult acIntegrate(const AcReal& dt);
/** Performs a single RK3 step without computing boundary conditions. */
AcResult acIntegrateStep(const int& isubstep, const AcReal& dt);
/** Applies boundary conditions on the GPU meshs and communicates the
ghost zones among GPUs if necessary */
AcResult acBoundcondStep(void);
/** Performs a scalar reduction on all GPUs in the node and returns the result.
*/
AcReal acReduceScal(const ReductionType& rtype, const VertexBufferHandle& a);
/** Performs a vector reduction on all GPUs in the node and returns the result.
*/
AcReal acReduceVec(const ReductionType& rtype, const VertexBufferHandle& a,
const VertexBufferHandle& b, const VertexBufferHandle& c);
/** Stores the mesh distributed among GPUs of the node back to a single host
* mesh */
AcResult acStore(AcMesh* host_mesh);
AcResult acStoreWithOffset(const int3& start, const int num_vertices, AcMesh* host_mesh);
/** Frees all GPU allocations and resets all devices in the node. Should be
* called at exit. */
AcResult acQuit(void);
/** Synchronizes all devices. All calls to Astaroth are asynchronous by default
unless otherwise stated. */
AcResult acSynchronize(void);
/* End extern "C" */
#ifdef __cplusplus
}
#endif
/*
* =============================================================================
* Notes
* =============================================================================
*/
/*
typedef enum {
VTX_BUF_LNRHO,
VTX_BUF_UUX,
VTX_BUF_UUY,
VTX_BUF_UUZ,
NUM_VERTEX_BUFFER_HANDLES
} VertexBufferHandle
// LNRHO etc
typedef struct {
AcReal* data;
} VertexBuffer;
// Host
typedef struct {
VertexBuffer vertex_buffers[NUM_VERTEX_BUFFER_HANDLES];
MeshInfo info;
} Mesh;
// Device
typedef struct {
VertexBuffer in[NUM_VERTEX_BUFFER_HANDLES];
VertexBuffer out[NUM_VERTEX_BUFFER_HANDLES];
} VertexBufferArray;
*/

View File

@@ -0,0 +1,49 @@
// TODO comments and reformatting
//Scalar
//dostuff(in Scalar uux)
//{
// return uux[vertexIdx.x, vertexIdx.y, vertexIdx.z];
//}
// stencil_assembly.in
Preprocessed Scalar
some_exotic_stencil_computation(in Scalar uux)
{
//#if STENCIL_ORDER == 2
// const Scalar coefficients[] = {1, 1, 1};
//#else if STENCIL_ORDER == 4
// const Scalar coefficients[] = {....};
//#endif
int i = vertexIdx.x;
int j = vertexIdx.y;
int k = vertexIdx.z;
const Scalar coefficients[] = {1, 2, 3};
return coefficients[0] * uux[i-1, j, k] +
coefficients[1] * uux[i, j, k] +
coefficients[2] * uux[i+1, j, k];
}
// stencil_process.in
//in Scalar uux_in = VTXBUF_UUX;
//out Scalar uux_out = VTXBUF_UUX;
//Kernel
//solve(Scalar dt)
//{
// uux_out = some_exotic_stencil(uux_in);
//}

View File

@@ -0,0 +1,149 @@
// TODO comments and reformatting
uniform Scalar dsx;
uniform Scalar dsy;
uniform Scalar dsz;
uniform Scalar GM_star;
// Other uniforms types than Scalar or int not yet supported
// BUILTIN
//Scalar dot(...){}
// BUILTIN
//Scalar distance(Vector a, Vector b) { return sqrt(dot(a, b)); }
// BUILTIN
// Scalar first_derivative(Scalar pencil[], Scalar inv_ds) { return pencil[3] * inv_ds; }
Scalar first_derivative(Scalar pencil[], Scalar inv_ds)
{
Scalar res = 0;
for (int i = 0; i < STENCIL_ORDER+1; ++i) {
res = res + pencil[i];
}
return inv_ds * res;
}
Scalar distance(Vector a, Vector b)
{
return sqrt(a.x * b.x + a.y * b.y + a.z * b.z);
}
Scalar
gravity_potential(int i, int j, int k)
{
Vector star_pos = (Vector){0, 0, 0};
Vector vertex_pos = (Vector){dsx * i, dsy * j, dsz * k};
return GM_star / distance(star_pos, vertex_pos);
}
Scalar
gradx_gravity_potential(int i, int j, int k)
{
Scalar pencil[STENCIL_ORDER + 1];
for (int offset = -STENCIL_ORDER; offset <= STENCIL_ORDER; ++offset) {
pencil[offset+STENCIL_ORDER] = gravity_potential(i + offset, j, k);
}
Scalar inv_ds = Scalar(1.) / dsx;
return first_derivative(pencil, inv_ds);
}
Scalar
grady_gravity_potential(int i, int j, int k)
{
Scalar pencil[STENCIL_ORDER + 1];
for (int offset = -STENCIL_ORDER; offset <= STENCIL_ORDER; ++offset) {
pencil[offset+STENCIL_ORDER] = gravity_potential(i, j + offset, k);
}
Scalar inv_ds = Scalar(1.) / dsy;
return first_derivative(pencil, inv_ds);
}
Scalar
gradz_gravity_potential(int i, int j, int k)
{
Scalar pencil[STENCIL_ORDER + 1];
for (int offset = -STENCIL_ORDER; offset <= STENCIL_ORDER; ++offset) {
pencil[offset+STENCIL_ORDER] = gravity_potential(i, j, k + offset);
}
Scalar inv_ds = Scalar(1.) / dsz;
return first_derivative(pencil, inv_ds);
}
Vector
momentum(int i, int j, int k, in Vector uu)
{
Vector gravity_potential = (Vector){gradx_gravity_potential(i, j, k),
grady_gravity_potential(i, j, k),
gradz_gravity_potential(i, j, k)};
return gravity_potential;
}

56
acc/src/acc.l Normal file
View File

@@ -0,0 +1,56 @@
%option yylineno
D [0-9]
L [a-zA-Z_]
%{
#include "acc.tab.h"
%}
%%
"Scalar" { return SCALAR; } /* Builtin types */
"Vector" { return VECTOR; }
"Matrix" { return MATRIX; }
"void" { return VOID; } /* Rest of the types inherited from C */
"int" { return INT; }
"int3" { return INT3; }
"Kernel" { return KERNEL; } /* Function specifiers */
"Preprocessed" { return PREPROCESSED; }
"const" { return CONSTANT; }
"in" { return IN; } /* Device func storage specifiers */
"out" { return OUT; }
"uniform" { return UNIFORM; }
"else if" { return ELIF; }
"if" { return IF; }
"else" { return ELSE; }
"for" { return FOR; }
"while" { return WHILE; }
"return" { return RETURN; }
{D}+"."?{D}*[flud]? { return NUMBER; } /* Literals */
"."{D}+[flud]? { return NUMBER; }
{L}({L}|{D})* { return IDENTIFIER; }
\"(.)*\" { return IDENTIFIER; } /* String */
"==" { return LEQU; }/* Logic operations */
"&&" { return LAND; }
"||" { return LOR; }
"<=" { return LLEQU; }
"++" { return INPLACE_INC; }
"--" { return INPLACE_DEC; }
[-+*/;=\[\]{}(),\.<>] { return yytext[0]; } /* Characters */
"//".* { /* Skip regular comments */ }
[ \t\n\v\r]+ { /* Ignore whitespace, tabs and newlines */ }
. { printf("unrecognized char %d: [%c]\n", *yytext, *yytext); }
%%

234
acc/src/acc.y Normal file
View File

@@ -0,0 +1,234 @@
%{
#include <stdio.h>
#include <string.h>
#include "ast.h"
extern char* yytext;
int yylex();
int yyerror(const char* str);
int yyget_lineno();
#define YYSTYPE ASTNode* // Sets the default type
%}
%token CONSTANT IN OUT UNIFORM
%token IDENTIFIER NUMBER
%token RETURN
%token SCALAR VECTOR MATRIX
%token VOID INT INT3
%token IF ELSE FOR WHILE ELIF
%token LEQU LAND LOR LLEQU
%token KERNEL PREPROCESSED
%token INPLACE_INC INPLACE_DEC
%%
root: program { root->lhs = $1; }
;
program: /* Empty*/ { $$ = astnode_create(NODE_UNKNOWN, NULL, NULL); }
| program function_definition { $$ = astnode_create(NODE_UNKNOWN, $1, $2); }
| program assignment ';' /* Global definition */ { $$ = astnode_create(NODE_UNKNOWN, $1, $2); $$->postfix = ';'; }
| program declaration ';' /* Global declaration */ { $$ = astnode_create(NODE_UNKNOWN, $1, $2); $$->postfix = ';'; }
;
/*
* =============================================================================
* Functions
* =============================================================================
*/
function_definition: function_declaration compound_statement { $$ = astnode_create(NODE_FUNCTION_DEFINITION, $1, $2); }
;
function_declaration: declaration function_parameter_declaration { $$ = astnode_create(NODE_FUNCTION_DECLARATION, $1, $2); }
;
function_parameter_declaration: '(' ')' { $$ = astnode_create(NODE_FUNCTION_PARAMETER_DECLARATION, NULL, NULL); $$->prefix = '('; $$->postfix = ')'; }
| '(' declaration_list ')' { $$ = astnode_create(NODE_FUNCTION_PARAMETER_DECLARATION, $2, NULL); $$->prefix = '('; $$->postfix = ')'; }
;
/*
* =============================================================================
* Statement
* =============================================================================
*/
statement_list: statement { $$ = astnode_create(NODE_UNKNOWN, $1, NULL); }
| statement_list statement { $$ = astnode_create(NODE_UNKNOWN, $1, $2); }
;
compound_statement: '{' '}' { $$ = astnode_create(NODE_COMPOUND_STATEMENT, NULL, NULL); $$->prefix = '{'; $$->postfix = '}'; }
| '{' statement_list '}' { $$ = astnode_create(NODE_COMPOUND_STATEMENT, $2, NULL); $$->prefix = '{'; $$->postfix = '}'; }
;
statement: selection_statement { $$ = astnode_create(NODE_UNKNOWN, $1, NULL); }
| iteration_statement { $$ = astnode_create(NODE_UNKNOWN, $1, NULL); }
| exec_statement ';' { $$ = astnode_create(NODE_UNKNOWN, $1, NULL); $$->postfix = ';'; }
;
selection_statement: IF expression else_selection_statement { $$ = astnode_create(NODE_UNKNOWN, $2, $3); $$->prefix = IF; }
;
else_selection_statement: compound_statement { $$ = astnode_create(NODE_UNKNOWN, $1, NULL); }
| compound_statement elif_selection_statement { $$ = astnode_create(NODE_UNKNOWN, $1, $2); }
| compound_statement ELSE compound_statement { $$ = astnode_create(NODE_UNKNOWN, $1, $3); $$->infix = ELSE; }
;
elif_selection_statement: ELIF expression else_selection_statement { $$ = astnode_create(NODE_UNKNOWN, $2, $3); $$->prefix = ELIF; }
;
iteration_statement: WHILE expression compound_statement { $$ = astnode_create(NODE_UNKNOWN, $2, $3); $$->prefix = WHILE; }
| FOR for_expression compound_statement { $$ = astnode_create(NODE_UNKNOWN, $2, $3); $$->prefix = FOR; }
;
for_expression: '(' for_init_param for_other_params ')' { $$ = astnode_create(NODE_UNKNOWN, $2, $3); $$->prefix = '('; $$->postfix = ')'; }
;
for_init_param: expression ';' { $$ = astnode_create(NODE_UNKNOWN, $1, NULL); $$->postfix = ';'; }
| assignment ';' { $$ = astnode_create(NODE_UNKNOWN, $1, NULL); $$->postfix = ';'; }
;
for_other_params: expression ';' { $$ = astnode_create(NODE_UNKNOWN, $1, NULL); $$->postfix = ';'; }
| expression ';' expression { $$ = astnode_create(NODE_UNKNOWN, $1, $3); $$->infix = ';'; }
;
exec_statement: declaration { $$ = astnode_create(NODE_UNKNOWN, $1, NULL); }
| assignment { $$ = astnode_create(NODE_UNKNOWN, $1, NULL); }
| expression { $$ = astnode_create(NODE_UNKNOWN, $1, NULL); }
| return return_statement { $$ = astnode_create(NODE_UNKNOWN, $1, $2); }
;
assignment: declaration '=' expression { $$ = astnode_create(NODE_UNKNOWN, $1, $3); $$->infix = '='; }
| expression '=' expression { $$ = astnode_create(NODE_UNKNOWN, $1, $3); $$->infix = '='; }
;
return_statement: /* Empty */ { $$ = astnode_create(NODE_UNKNOWN, NULL, NULL); }
| expression { $$ = astnode_create(NODE_UNKNOWN, $1, NULL); }
;
/*
* =============================================================================
* Declaration
* =============================================================================
*/
declaration_list: declaration { $$ = astnode_create(NODE_UNKNOWN, $1, NULL); }
| declaration_list ',' declaration { $$ = astnode_create(NODE_UNKNOWN, $1, $3); $$->infix = ','; }
;
declaration: type_declaration identifier { $$ = astnode_create(NODE_DECLARATION, $1, $2); } // Note: accepts only one type qualifier. Good or not?
| type_declaration array_declaration { $$ = astnode_create(NODE_DECLARATION, $1, $2); }
;
array_declaration: identifier '[' ']' { $$ = astnode_create(NODE_UNKNOWN, $1, NULL); $$->infix = '['; $$->postfix = ']'; }
| identifier '[' expression ']' { $$ = astnode_create(NODE_UNKNOWN, $1, $3); $$->infix = '['; $$->postfix = ']'; }
;
type_declaration: type_specifier { $$ = astnode_create(NODE_UNKNOWN, $1, NULL); }
| type_qualifier type_specifier { $$ = astnode_create(NODE_UNKNOWN, $1, $2); }
;
/*
* =============================================================================
* Expressions
* =============================================================================
*/
expression_list: expression { $$ = astnode_create(NODE_UNKNOWN, $1, NULL); }
| expression_list ',' expression { $$ = astnode_create(NODE_UNKNOWN, $1, $3); $$->infix = ','; }
;
expression: unary_expression { $$ = astnode_create(NODE_UNKNOWN, $1, NULL); }
| expression binary_expression { $$ = astnode_create(NODE_UNKNOWN, $1, $2); }
;
binary_expression: binary_operator unary_expression { $$ = astnode_create(NODE_UNKNOWN, $1, $2); }
;
unary_expression: postfix_expression { $$ = astnode_create(NODE_UNKNOWN, $1, NULL); }
| unary_operator postfix_expression { $$ = astnode_create(NODE_UNKNOWN, $1, $2); }
;
postfix_expression: primary_expression { $$ = astnode_create(NODE_UNKNOWN, $1, NULL); }
| postfix_expression '[' expression_list ']' /* Subscript */ { $$ = astnode_create(NODE_MULTIDIM_SUBSCRIPT_EXPRESSION, $1, $3); $$->infix = '['; $$->postfix = ']'; }
| cast_expression '{' expression_list '}' /* Array */ { $$ = astnode_create(NODE_UNKNOWN, $1, $3); $$->infix = '{'; $$->postfix = '}'; }
| postfix_expression '(' ')' /* Function call */ { $$ = astnode_create(NODE_UNKNOWN, $1, NULL); $$->infix = '('; $$->postfix = ')'; }
| postfix_expression '(' expression_list ')' /* Function call */ { $$ = astnode_create(NODE_UNKNOWN, $1, $3); $$->infix = '('; $$->postfix = ')'; }
| type_specifier '(' expression_list ')' /* Cast */ { $$ = astnode_create(NODE_UNKNOWN, $1, $3); $$->infix = '('; $$->postfix = ')'; }
| postfix_expression '.' identifier /* Member access */ { $$ = astnode_create(NODE_UNKNOWN, $1, $3); $$->infix = '.'; }
;
cast_expression: /* Empty: implicit cast */ { $$ = astnode_create(NODE_UNKNOWN, NULL, NULL); }
| '(' type_specifier ')' { $$ = astnode_create(NODE_UNKNOWN, $2, NULL); $$->prefix = '('; $$->postfix = ')'; }
;
primary_expression: identifier { $$ = astnode_create(NODE_UNKNOWN, $1, NULL); }
| number { $$ = astnode_create(NODE_UNKNOWN, $1, NULL); }
| '(' expression ')' { $$ = astnode_create(NODE_UNKNOWN, $2, NULL); $$->prefix = '('; $$->postfix = ')'; }
;
/*
* =============================================================================
* Terminals
* =============================================================================
*/
binary_operator: '+' { $$ = astnode_create(NODE_UNKNOWN, NULL, NULL); $$->infix = yytext[0]; }
| '-' { $$ = astnode_create(NODE_UNKNOWN, NULL, NULL); $$->infix = yytext[0]; }
| '/' { $$ = astnode_create(NODE_UNKNOWN, NULL, NULL); $$->infix = yytext[0]; }
| '*' { $$ = astnode_create(NODE_UNKNOWN, NULL, NULL); $$->infix = yytext[0]; }
| '<' { $$ = astnode_create(NODE_UNKNOWN, NULL, NULL); $$->infix = yytext[0]; }
| '>' { $$ = astnode_create(NODE_UNKNOWN, NULL, NULL); $$->infix = yytext[0]; }
| LEQU { $$ = astnode_create(NODE_UNKNOWN, NULL, NULL); astnode_set_buffer(yytext, $$); }
| LAND { $$ = astnode_create(NODE_UNKNOWN, NULL, NULL); astnode_set_buffer(yytext, $$); }
| LOR { $$ = astnode_create(NODE_UNKNOWN, NULL, NULL); astnode_set_buffer(yytext, $$); }
| LLEQU { $$ = astnode_create(NODE_UNKNOWN, NULL, NULL); astnode_set_buffer(yytext, $$); }
;
unary_operator: '-' /* C-style casts are disallowed, would otherwise be defined here */ { $$ = astnode_create(NODE_UNKNOWN, NULL, NULL); $$->infix = yytext[0]; }
| '!' { $$ = astnode_create(NODE_UNKNOWN, NULL, NULL); $$->infix = yytext[0]; }
| INPLACE_INC { $$ = astnode_create(NODE_UNKNOWN, NULL, NULL); $$->token = INPLACE_INC; }
| INPLACE_DEC { $$ = astnode_create(NODE_UNKNOWN, NULL, NULL); $$->token = INPLACE_DEC; }
;
type_qualifier: KERNEL { $$ = astnode_create(NODE_TYPE_QUALIFIER, NULL, NULL); $$->token = KERNEL; }
| PREPROCESSED { $$ = astnode_create(NODE_TYPE_QUALIFIER, NULL, NULL); $$->token = PREPROCESSED; }
| CONSTANT { $$ = astnode_create(NODE_TYPE_QUALIFIER, NULL, NULL); $$->token = CONSTANT; }
| IN { $$ = astnode_create(NODE_TYPE_QUALIFIER, NULL, NULL); $$->token = IN; }
| OUT { $$ = astnode_create(NODE_TYPE_QUALIFIER, NULL, NULL); $$->token = OUT; }
| UNIFORM { $$ = astnode_create(NODE_TYPE_QUALIFIER, NULL, NULL); $$->token = UNIFORM; }
;
type_specifier: VOID { $$ = astnode_create(NODE_TYPE_SPECIFIER, NULL, NULL); $$->token = VOID; }
| INT { $$ = astnode_create(NODE_TYPE_SPECIFIER, NULL, NULL); $$->token = INT; }
| INT3 { $$ = astnode_create(NODE_TYPE_SPECIFIER, NULL, NULL); $$->token = INT3; }
| SCALAR { $$ = astnode_create(NODE_TYPE_SPECIFIER, NULL, NULL); $$->token = SCALAR; }
| VECTOR { $$ = astnode_create(NODE_TYPE_SPECIFIER, NULL, NULL); $$->token = VECTOR; }
| MATRIX { $$ = astnode_create(NODE_TYPE_SPECIFIER, NULL, NULL); $$->token = MATRIX; }
;
identifier: IDENTIFIER { $$ = astnode_create(NODE_IDENTIFIER, NULL, NULL); astnode_set_buffer(yytext, $$); }
;
number: NUMBER { $$ = astnode_create(NODE_UNKNOWN, NULL, NULL); astnode_set_buffer(yytext, $$); }
;
return: RETURN { $$ = astnode_create(NODE_UNKNOWN, NULL, NULL); astnode_set_buffer(yytext, $$); }
;
%%
void
print(void)
{
printf("%s\n", yytext);
}
int
yyerror(const char* str)
{
fprintf(stderr, "%s on line %d when processing char %d: [%s]\n", str, yyget_lineno(), *yytext, yytext);
}

126
acc/src/ast.h Normal file
View File

@@ -0,0 +1,126 @@
/*
Nodes for the Abstract Syntax Tree
Statement: syntactic unit tha expresses some action.
May have internal components, expressions, which are evaluated
Statements: return value
block
*/
#include <stdlib.h>
#include <assert.h>
#define BUFFER_SIZE (4096)
#define GEN_ID(X) X
#define GEN_STR(X) #X
#define FOR_NODE_TYPES(FUNC) \
FUNC(NODE_UNKNOWN), \
FUNC(NODE_DEFINITION), \
FUNC(NODE_GLOBAL_DEFINITION), \
FUNC(NODE_DECLARATION), \
FUNC(NODE_TYPE_QUALIFIER), \
FUNC(NODE_TYPE_SPECIFIER), \
FUNC(NODE_IDENTIFIER), \
FUNC(NODE_FUNCTION_DEFINITION), \
FUNC(NODE_FUNCTION_DECLARATION), \
FUNC(NODE_COMPOUND_STATEMENT), \
FUNC(NODE_FUNCTION_PARAMETER_DECLARATION), \
FUNC(NODE_MULTIDIM_SUBSCRIPT_EXPRESSION)
/*
// Recreating strdup is not needed when using the GNU compiler.
// Let's also just say that anything but the GNU
// compiler is NOT supported, since there are also
// some gcc-specific calls in the files generated
// by flex and being completely compiler-independent is
// not a priority right now
#ifndef strdup
static inline char*
strdup(const char* in)
{
const size_t len = strlen(in) + 1;
char* out = malloc(len);
if (out) {
memcpy(out, in, len);
return out;
} else {
return NULL;
}
}
#endif
*/
typedef enum {
FOR_NODE_TYPES(GEN_ID),
NUM_NODE_TYPES
} NodeType;
typedef struct astnode_s {
int id;
struct astnode_s* lhs;
struct astnode_s* rhs;
NodeType type; // Type of the AST node
char* buffer; // Indentifiers and other strings (empty by default)
int token; // Type of a terminal (that is not a simple char)
int prefix; // Tokens. Also makes the grammar since we don't have
int infix; // to divide it into max two-child rules
int postfix; // (which makes it much harder to read)
} ASTNode;
static inline ASTNode*
astnode_create(const NodeType type, ASTNode* lhs, ASTNode* rhs)
{
ASTNode* node = malloc(sizeof(node[0]));
static int id_counter = 0;
node->id = id_counter++;
node->type = type;
node->lhs = lhs;
node->rhs = rhs;
node->buffer = NULL;
node->prefix = node->infix = node->postfix = 0;
return node;
}
static inline void
astnode_set_buffer(const char* buffer, ASTNode* node)
{
node->buffer = strdup(buffer);
}
static inline void
astnode_destroy(ASTNode* node)
{
if (node->lhs)
astnode_destroy(node->lhs);
if (node->rhs)
astnode_destroy(node->rhs);
if (node->buffer)
free(node->buffer);
free(node);
}
extern ASTNode* root;
/*
typedef enum {
SCOPE_BLOCK
} ScopeType;
typedef struct symbol_s {
int type_specifier;
char* identifier;
int scope;
struct symbol_s* next;
} Symbol;
extern ASTNode* symbol_table;
*/

569
acc/src/code_generator.c Normal file
View File

@@ -0,0 +1,569 @@
/*
Copyright (C) 2014-2018, Johannes Pekkilae, Miikka Vaeisalae.
This file is part of Astaroth.
Astaroth is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Astaroth is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* @file
* \brief Brief info.
*
* Detailed info.
*
*/
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "acc.tab.h"
#include "ast.h"
ASTNode* root = NULL;
static const char inout_name_prefix[] = "handle_";
static bool doing_stencil_assembly = true;
/*
* =============================================================================
* Translation
* =============================================================================
*/
#define TRANSLATION_TABLE_SIZE (1024)
static const char* translation_table[TRANSLATION_TABLE_SIZE] = {
[0] = NULL,
// Control flow
[IF] = "if",
[ELSE] = "else",
[ELIF] = "else if",
[WHILE] = "while",
[FOR] = "for",
// Type specifiers
[VOID] = "void",
[INT] = "int",
[INT3] = "int3",
[SCALAR] = "AcReal",
[VECTOR] = "AcReal3",
[MATRIX] = "AcMatrix",
// Type qualifiers
[KERNEL] = "template <int step_number> static "
"__global__", //__launch_bounds__(RK_THREADBLOCK_SIZE,
// RK_LAUNCH_BOUND_MIN_BLOCKS),
[PREPROCESSED] = "static __device__ "
"__forceinline__",
[CONSTANT] = "const",
[IN] = "in",
[OUT] = "out",
[UNIFORM] = "uniform",
// ETC
[INPLACE_INC] = "++",
[INPLACE_DEC] = "--",
// Unary
[','] = ",",
[';'] = ";\n",
['('] = "(",
[')'] = ")",
['['] = "[",
[']'] = "]",
['{'] = "{\n",
['}'] = "}\n",
['='] = "=",
['+'] = "+",
['-'] = "-",
['/'] = "/",
['*'] = "*",
['<'] = "<",
['>'] = ">",
['!'] = "!",
['.'] = "."};
static const char*
translate(const int token)
{
assert(token >= 0);
assert(token < TRANSLATION_TABLE_SIZE);
if (token > 0) {
if (!translation_table[token])
printf("ERROR: unidentified token %d\n", token);
assert(translation_table[token]);
}
return translation_table[token];
}
/*
* =============================================================================
* Symbols
* =============================================================================
*/
typedef enum {
SYMBOLTYPE_FUNCTION,
SYMBOLTYPE_FUNCTION_PARAMETER,
SYMBOLTYPE_OTHER,
NUM_SYMBOLTYPES
} SymbolType;
#define MAX_ID_LEN (128)
typedef struct {
SymbolType type;
int type_qualifier;
int type_specifier;
char identifier[MAX_ID_LEN];
} Symbol;
#define SYMBOL_TABLE_SIZE (4096)
static Symbol symbol_table[SYMBOL_TABLE_SIZE] = {};
static int num_symbols = 0;
static int
symboltable_lookup(const char* identifier)
{
if (!identifier)
return -1;
for (int i = 0; i < num_symbols; ++i)
if (strcmp(identifier, symbol_table[i].identifier) == 0)
return i;
return -1;
}
static void
add_symbol(const SymbolType type, const int tqualifier, const int tspecifier, const char* id)
{
assert(num_symbols < SYMBOL_TABLE_SIZE);
symbol_table[num_symbols].type = type;
symbol_table[num_symbols].type_qualifier = tqualifier;
symbol_table[num_symbols].type_specifier = tspecifier;
strcpy(symbol_table[num_symbols].identifier, id);
++num_symbols;
}
static void
rm_symbol(const int handle)
{
assert(handle >= 0 && handle < num_symbols);
if (&symbol_table[handle] != &symbol_table[num_symbols - 1])
memcpy(&symbol_table[handle], &symbol_table[num_symbols - 1], sizeof(Symbol));
--num_symbols;
}
static void
print_symbol(const int handle)
{
assert(handle < SYMBOL_TABLE_SIZE);
const char* fields[] = {translate(symbol_table[handle].type_qualifier),
translate(symbol_table[handle].type_specifier),
symbol_table[handle].identifier};
const size_t num_fields = sizeof(fields) / sizeof(fields[0]);
for (int i = 0; i < num_fields; ++i)
if (fields[i])
printf("%s ", fields[i]);
}
static void
translate_latest_symbol(void)
{
const int handle = num_symbols - 1;
assert(handle < SYMBOL_TABLE_SIZE);
Symbol* symbol = &symbol_table[handle];
// FUNCTION
if (symbol->type == SYMBOLTYPE_FUNCTION) {
// KERNEL FUNCTION
if (symbol->type_qualifier == KERNEL) {
printf("%s %s\n%s", translate(symbol->type_qualifier),
translate(symbol->type_specifier), symbol->identifier);
}
// PREPROCESSED FUNCTION
else if (symbol->type_qualifier == PREPROCESSED) {
printf("%s %s\npreprocessed_%s", translate(symbol->type_qualifier),
translate(symbol->type_specifier), symbol->identifier);
}
// OTHER FUNCTION
else {
const char* regular_function_decorator = "static __device__ "
"__forceinline__";
printf("%s %s %s\n%s", regular_function_decorator,
translate(symbol->type_qualifier) ? translate(symbol->type_qualifier) : "",
translate(symbol->type_specifier), symbol->identifier);
}
}
// FUNCTION PARAMETER
else if (symbol->type == SYMBOLTYPE_FUNCTION_PARAMETER) {
if (symbol->type_qualifier == IN || symbol->type_qualifier == OUT) {
if (doing_stencil_assembly)
printf("const __restrict__ %s* %s", translate(symbol->type_specifier),
symbol->identifier);
else
printf("const %sData& %s", translate(symbol->type_specifier), symbol->identifier);
}
else {
print_symbol(handle);
}
}
// UNIFORM
else if (symbol->type_qualifier == UNIFORM) {
/* Do nothing */
}
// IN / OUT
else if (symbol->type != SYMBOLTYPE_FUNCTION_PARAMETER &&
(symbol->type_qualifier == IN || symbol->type_qualifier == OUT)) {
const char* inout_type_qualifier = "static __device__ const auto";
printf("%s %s%s", inout_type_qualifier, inout_name_prefix, symbol_table[handle].identifier);
}
// OTHER
else {
print_symbol(handle);
}
}
static void
print_symbol_table(void)
{
for (int i = 0; i < num_symbols; ++i) {
printf("%d: ", i);
const char* fields[] = {translate(symbol_table[i].type_qualifier),
translate(symbol_table[i].type_specifier),
symbol_table[i].identifier};
const size_t num_fields = sizeof(fields) / sizeof(fields[0]);
for (int i = 0; i < num_fields; ++i)
if (fields[i])
printf("%s ", fields[i]);
if (symbol_table[i].type == SYMBOLTYPE_FUNCTION)
printf("(function)");
else if (symbol_table[i].type == SYMBOLTYPE_FUNCTION_PARAMETER)
printf("(function parameter)");
else
printf("(other)");
printf("\n");
}
}
/*
* =============================================================================
* State
* =============================================================================
*/
static bool inside_declaration = false;
static bool inside_function_declaration = false;
static bool inside_function_parameter_declaration = false;
static bool inside_kernel = false;
static bool inside_preprocessed = false;
static int scope_start = 0;
/*
* =============================================================================
* AST traversal
* =============================================================================
*/
static void
traverse(const ASTNode* node)
{
// Prefix logic %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
if (node->type == NODE_FUNCTION_DECLARATION)
inside_function_declaration = true;
if (node->type == NODE_FUNCTION_PARAMETER_DECLARATION)
inside_function_parameter_declaration = true;
if (node->type == NODE_DECLARATION)
inside_declaration = true;
if (!inside_declaration && translate(node->prefix))
printf("%s", translate(node->prefix));
// BOILERPLATE START////////////////////////////////////////////////////////
if (node->type == NODE_TYPE_QUALIFIER && node->token == KERNEL)
inside_kernel = true;
// Kernel parameter boilerplate
const char* kernel_parameter_boilerplate = "GEN_KERNEL_PARAM_BOILERPLATE, ";
if (inside_kernel && node->type == NODE_FUNCTION_PARAMETER_DECLARATION)
printf("%s ", kernel_parameter_boilerplate);
// Kernel builtin variables boilerplate (read input/output arrays and setup
// indices)
const char* kernel_builtin_variables_boilerplate = "GEN_KERNEL_BUILTIN_VARIABLES_"
"BOILERPLATE();";
if (inside_kernel && node->type == NODE_COMPOUND_STATEMENT) {
printf("%s ", kernel_builtin_variables_boilerplate);
for (int i = 0; i < num_symbols; ++i) {
if (symbol_table[i].type_qualifier == IN) {
printf("const %sData %s = READ(%s%s);\n", translate(symbol_table[i].type_specifier),
symbol_table[i].identifier, inout_name_prefix, symbol_table[i].identifier);
} else if (symbol_table[i].type_qualifier == OUT) {
printf("%s %s = READ_OUT(%s%s);", translate(symbol_table[i].type_specifier), symbol_table[i].identifier, inout_name_prefix, symbol_table[i].identifier);
//printf("%s %s = buffer.out[%s%s][IDX(vertexIdx.x, vertexIdx.y, vertexIdx.z)];\n", translate(symbol_table[i].type_specifier), symbol_table[i].identifier, inout_name_prefix, symbol_table[i].identifier);
}
}
}
// Preprocessed parameter boilerplate
if (node->type == NODE_TYPE_QUALIFIER && node->token == PREPROCESSED)
inside_preprocessed = true;
static const char
preprocessed_parameter_boilerplate[] = "const int3 vertexIdx, ";
if (inside_preprocessed && node->type == NODE_FUNCTION_PARAMETER_DECLARATION)
printf("%s ", preprocessed_parameter_boilerplate);
// BOILERPLATE END////////////////////////////////////////////////////////
// Enter LHS
if (node->lhs)
traverse(node->lhs);
// Infix logic %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
if (!inside_declaration && translate(node->infix))
printf("%s ", translate(node->infix));
if (node->type == NODE_FUNCTION_DECLARATION)
inside_function_declaration = false;
// If the node is a subscript expression and the expression list inside it is not empty
if (node->type == NODE_MULTIDIM_SUBSCRIPT_EXPRESSION && node->rhs)
printf("IDX(");
// Do a regular translation
if (!inside_declaration) {
const int handle = symboltable_lookup(node->buffer);
if (handle >= 0) { // The variable exists in the symbol table
const Symbol* symbol = &symbol_table[handle];
//if (symbol->type_qualifier == OUT) {
// printf("%s%s", inout_name_prefix, symbol->identifier);
//}
if (symbol->type_qualifier == UNIFORM) {
if (symbol->type_specifier == SCALAR)
printf("DCONST_REAL(AC_%s) ", symbol->identifier);
else if (symbol->type_specifier == INT)
printf("DCONST_INT(AC_%s) ", symbol->identifier);
else
printf("INVALID UNIFORM type specifier %s with %s\n",
translate(symbol->type_specifier), symbol->identifier);
}
else {
// Do a regular translation
if (translate(node->token))
printf("%s ", translate(node->token));
if (node->buffer)
printf("%s ", node->buffer);
}
}
else {
// Do a regular translation
if (translate(node->token))
printf("%s ", translate(node->token));
if (node->buffer)
printf("%s ", node->buffer);
}
}
if (node->type == NODE_FUNCTION_DECLARATION) {
scope_start = num_symbols;
}
// Enter RHS
if (node->rhs)
traverse(node->rhs);
// Postfix logic %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
// If the node is a subscript expression and the expression list inside it is not empty
if (node->type == NODE_MULTIDIM_SUBSCRIPT_EXPRESSION && node->rhs)
printf(")"); // Closing bracket of IDX()
// Generate writeback boilerplate for OUT fields
if (inside_kernel && node->type == NODE_COMPOUND_STATEMENT) {
for (int i = 0; i < num_symbols; ++i) {
if (symbol_table[i].type_qualifier == OUT) {
printf("WRITE_OUT(%s%s, %s);\n", inout_name_prefix, symbol_table[i].identifier, symbol_table[i].identifier);
//printf("buffer.out[%s%s][IDX(vertexIdx.x, vertexIdx.y, vertexIdx.z)] = %s;\n", inout_name_prefix, symbol_table[i].identifier, symbol_table[i].identifier);
}
}
}
if (!inside_declaration && translate(node->postfix))
printf("%s", translate(node->postfix));
if (node->type == NODE_DECLARATION) {
inside_declaration = false;
int tqual = 0;
int tspec = 0;
if (node->lhs && node->lhs->lhs) {
if (node->lhs->lhs->type == NODE_TYPE_QUALIFIER)
tqual = node->lhs->lhs->token;
else if (node->lhs->lhs->type == NODE_TYPE_SPECIFIER)
tspec = node->lhs->lhs->token;
}
if (node->lhs && node->lhs->rhs) {
if (node->lhs->rhs->type == NODE_TYPE_SPECIFIER)
tspec = node->lhs->rhs->token;
}
// Determine symbol type
SymbolType symboltype = SYMBOLTYPE_OTHER;
if (inside_function_declaration)
symboltype = SYMBOLTYPE_FUNCTION;
else if (inside_function_parameter_declaration)
symboltype = SYMBOLTYPE_FUNCTION_PARAMETER;
// Determine identifier
if (node->rhs->type == NODE_IDENTIFIER) {
add_symbol(symboltype, tqual, tspec, node->rhs->buffer); // Ordinary
translate_latest_symbol();
}
else {
add_symbol(symboltype, tqual, tspec,
node->rhs->lhs->buffer); // Array
translate_latest_symbol();
// Traverse the expression once again, this time with
// "inside_declaration" flag off
printf("%s ", translate(node->rhs->infix));
if (node->rhs->rhs)
traverse(node->rhs->rhs);
printf("%s ", translate(node->rhs->postfix));
}
}
if (node->type == NODE_FUNCTION_PARAMETER_DECLARATION)
inside_function_parameter_declaration = false;
if (node->type == NODE_FUNCTION_DEFINITION) {
while (num_symbols > scope_start)
rm_symbol(num_symbols - 1);
inside_kernel = false;
inside_preprocessed = false;
}
}
// TODO: these should use the generic type names SCALAR and VECTOR
static void
generate_preprocessed_structures(void)
{
// PREPROCESSED DATA STRUCT
printf("\n");
printf("typedef struct {\n");
for (int i = 0; i < num_symbols; ++i) {
if (symbol_table[i].type_qualifier == PREPROCESSED)
printf("%s %s;\n", translate(symbol_table[i].type_specifier),
symbol_table[i].identifier);
}
printf("} %sData;\n", translate(SCALAR));
// FILLING THE DATA STRUCT
printf("static __device__ __forceinline__ AcRealData\
read_data(const int3 vertexIdx,\
AcReal* __restrict__ buf[], const int handle)\
{\n\
%sData data;\n",
translate(SCALAR));
for (int i = 0; i < num_symbols; ++i) {
if (symbol_table[i].type_qualifier == PREPROCESSED)
printf("data.%s = preprocessed_%s(vertexIdx, buf[handle]);\n", symbol_table[i].identifier,
symbol_table[i].identifier);
}
printf("return data;\n");
printf("}\n");
// FUNCTIONS FOR ACCESSING MEMBERS OF THE PREPROCESSED STRUCT
for (int i = 0; i < num_symbols; ++i) {
if (symbol_table[i].type_qualifier == PREPROCESSED)
printf("static __device__ __forceinline__ %s\
%s(const AcRealData& data)\
{\n\
return data.%s;\
}\n",
translate(symbol_table[i].type_specifier), symbol_table[i].identifier,
symbol_table[i].identifier);
}
// Syntactic sugar: generate also a Vector data struct
printf("\
typedef struct {\
AcRealData x;\
AcRealData y;\
AcRealData z;\
} AcReal3Data;\
\
static __device__ __forceinline__ AcReal3Data\
read_data(const int3 vertexIdx,\
AcReal* __restrict__ buf[], const int3& handle)\
{\
AcReal3Data data;\
\
data.x = read_data(vertexIdx, buf, handle.x);\
data.y = read_data(vertexIdx, buf, handle.y);\
data.z = read_data(vertexIdx, buf, handle.z);\
\
return data;\
}\
");
}
int
main(int argc, char** argv)
{
if (argc == 2) {
if (!strcmp(argv[1], "-sas"))
doing_stencil_assembly = true;
else if (!strcmp(argv[1], "-sps"))
doing_stencil_assembly = false;
else
printf("Unknown flag %s. Generating stencil assembly.\n", argv[1]);
}
else {
printf("Usage: ./acc [flags]\n"
"Flags:\n"
"\t-sas - Generates code for the stencil assembly stage\n"
"\t-sps - Generates code for the stencil processing "
"stage\n");
printf("\n");
return EXIT_FAILURE;
}
root = astnode_create(NODE_UNKNOWN, NULL, NULL);
const int retval = yyparse();
if (retval) {
printf("COMPILATION FAILED\n");
return EXIT_FAILURE;
}
// Traverse
traverse(root);
if (doing_stencil_assembly)
generate_preprocessed_structures();
// print_symbol_table();
// Cleanup
astnode_destroy(root);
// printf("COMPILATION SUCCESS\n");
}

48
acc/test_grammar.sh Executable file
View File

@@ -0,0 +1,48 @@
#!/bin/bash
cd `dirname $0` # Only operate in the same directory with this script
./build_acc.sh
mkdir -p testbin
./compile.sh samples/sample_stencil_process.sps
./compile.sh samples/sample_stencil_assembly.sas
mv stencil_process.cuh testbin/
mv stencil_assembly.cuh testbin/
printf "
#include <stdio.h>
#include <stdlib.h>
#include \"%s\" // i.e. astaroth.h
__constant__ AcMeshInfo d_mesh_info;
#define DCONST_INT(X) (d_mesh_info.int_params[X])
#define DCONST_REAL(X) (d_mesh_info.real_params[X])
#define DEVICE_VTXBUF_IDX(i, j, k) ((i) + (j)*DCONST_INT(AC_mx) + (k)*DCONST_INT(AC_mxy))
static __device__ __forceinline__ int
IDX(const int i)
{
return i;
}
static __device__ __forceinline__ int
IDX(const int i, const int j, const int k)
{
return DEVICE_VTXBUF_IDX(i, j, k);
}
static __device__ __forceinline__ int
IDX(const int3 idx)
{
return DEVICE_VTXBUF_IDX(idx.x, idx.y, idx.z);
}
#include \"%s\"
#include \"%s\"
int main(void) { printf(\"Grammar check complete.\\\nAll tests passed.\\\n\"); return EXIT_SUCCESS; }
" common_header.h stencil_assembly.cuh stencil_process.cuh >testbin/test.cu
cd testbin
nvcc -std=c++11 test.cu -I ../samples -o test && ./test

1
analysis/python/.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
*.png

View File

@@ -0,0 +1,7 @@
# Python directory
This directory is for Python script connected to data visualization and analysis.
Content of this directory should be structured so that it is always callable by
`import astar` more task related scips should be written elsewhere, depending
the user's convenience.

View File

@@ -0,0 +1,3 @@
export PYTHONPATH=${PYTHONPATH}:$PWD/

View File

@@ -0,0 +1,24 @@
'''
Copyright (C) 2014-2019, Johannes Pekkilae, Miikka Vaeisalae.
This file is part of Astaroth.
Astaroth is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Astaroth is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
'''
# Developers note. We require Python 3 approach to have
# compatibility towards the future.
import numpy as np
import pylab as plt

View File

@@ -0,0 +1,21 @@
'''
Copyright (C) 2014-2019, Johannes Pekkilae, Miikka Vaeisalae.
This file is part of Astaroth.
Astaroth is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Astaroth is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
'''
from . import read

View File

@@ -0,0 +1,142 @@
'''
Copyright (C) 2014-2019, Johannes Pekkilae, Miikka Vaeisalae.
This file is part of Astaroth.
Astaroth is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Astaroth is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
'''
# This module is for reading data.
import numpy as np
def read_bin(fname, fdir, fnum, minfo, numtype=np.longdouble):
'''Read in a floating point array'''
filename = fdir + fname + '_' + fnum + '.mesh'
datas = np.DataSource()
read_ok = datas.exists(filename)
if read_ok:
print(filename)
array = np.fromfile(filename, dtype=numtype)
timestamp = array[0]
array = np.reshape(array[1:], (minfo.contents['AC_mx'],
minfo.contents['AC_my'],
minfo.contents['AC_mz']), order='F')
else:
array = None
timestamp = None
return array, timestamp, read_ok
def read_meshtxt(fdir, fname):
with open(fdir+fname) as f:
filetext = f.read().splitlines()
contents = {}
for line in filetext:
line = line.split()
if line[0] == 'int':
contents[line[1]] = np.int(line[2])
elif line[0] == 'real':
contents[line[1]] = np.float(line[2])
else:
print('ERROR: ' + line[0] +' no recognized!')
return contents
class MeshInfo():
'''Object that contains all mesh info'''
def __init__(self, fdir):
self.contents = read_meshtxt(fdir, 'mesh_info.list')
class Mesh:
'''Class tha contains all 3d mesh data'''
def __init__(self, fnum, fdir=""):
fnum = str(fnum)
self.framenum = fnum.zfill(10)
self.minfo = MeshInfo(fdir)
self.lnrho, self.timestamp, self.ok = read_bin('VTXBUF_LNRHO', fdir, fnum, self.minfo)
if self.ok:
self.ss, timestamp, ok = read_bin('VTXBUF_ENTROPY', fdir, fnum, self.minfo)
#TODO Generalize is a dict. Do not hardcode!
uux, timestamp, ok = read_bin('VTXBUF_UUX', fdir, fnum, self.minfo)
uuy, timestamp, ok = read_bin('VTXBUF_UUY', fdir, fnum, self.minfo)
uuz, timestamp, ok = read_bin('VTXBUF_UUZ', fdir, fnum, self.minfo)
self.uu = (uux, uuy, uuz)
uux = []
uuy = []
uuz = []
aax, timestamp, ok = read_bin('VTXBUF_AX', fdir, fnum, self.minfo)
aay, timestamp, ok = read_bin('VTXBUF_AY', fdir, fnum, self.minfo)
aaz, timestamp, ok = read_bin('VTXBUF_AZ', fdir, fnum, self.minfo)
self.aa = (aax, aay, aaz)
aax = []
aay = []
aaz = []
self.xx = self.minfo.contents['AC_inv_dsx']*np.arange(self.minfo.contents['AC_mx'])
self.yy = self.minfo.contents['AC_inv_dsy']*np.arange(self.minfo.contents['AC_my'])
self.zz = self.minfo.contents['AC_inv_dsz']*np.arange(self.minfo.contents['AC_mz'])
self.xmid = int(self.minfo.contents['AC_mx']/2)
self.ymid = int(self.minfo.contents['AC_my']/2)
self.zmid = int(self.minfo.contents['AC_mz']/2)
def parse_ts(fdir, fname):
with open(fdir+fname) as f:
filetext = f.read().splitlines()
var = {}
line = filetext[0].split()
for i in range(len(line)):
line[i] = line[i].replace('VTXBUF_', "")
line[i] = line[i].replace('UU', "uu")
line[i] = line[i].replace('_total', "tot")
line[i] = line[i].replace('A', "aa")
line[i] = line[i].replace('LNRHO', "lnrho")
line[i] = line[i].replace('X', "x")
line[i] = line[i].replace('Y', "y")
line[i] = line[i].replace('Z', "z")
tsdata = np.loadtxt(fdir+fname,skiprows=1)
for i in range(len(line)):
var[line[i]] = tsdata[:,i]
var['step'] = np.int64(var['step'])
print("HERE ARE ALL KEYS FOR TS DATA:")
print(var.keys())
return var
class TimeSeries:
'''Class for time series data'''
def __init__(self, fdir="", fname="timeseries.ts"):
self.var = parse_ts(fdir, fname)

View File

@@ -0,0 +1,21 @@
'''
Copyright (C) 2014-2019, Johannes Pekkilae, Miikka Vaeisalae.
This file is part of Astaroth.
Astaroth is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Astaroth is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
'''
from . import slices

View File

@@ -0,0 +1,92 @@
'''
Copyright (C) 2014-2019, Johannes Pekkilae, Miikka Vaeisalae.
This file is part of Astaroth.
Astaroth is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Astaroth is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
'''
import pylab as plt
import numpy as np
import matplotlib.gridspec as gridspec
import matplotlib.colors as colors
CM_INFERNO = plt.get_cmap('inferno')
def plot_3(mesh, input_grid, title = '', fname = 'default', bitmap=False, slicetype = 'middle', colrange=None, colormap=CM_INFERNO , contourplot=False):
fig = plt.figure(figsize=(8, 8))
grid = gridspec.GridSpec(2, 3, wspace=0.4, hspace=0.4, width_ratios=[1,1, 0.15])
ax00 = fig.add_subplot( grid[0,0] )
ax10 = fig.add_subplot( grid[0,1] )
ax11 = fig.add_subplot( grid[1,1] )
axcbar = fig.add_subplot( grid[:,2] )
print(mesh.minfo.contents.keys())
if slicetype == 'middle':
yz_slice = input_grid[mesh.xmid, :, :]
xz_slice = input_grid[:, mesh.ymid, :]
xy_slice = input_grid[:, :, mesh.zmid]
if colrange==None:
plotnorm = colors.Normalize(vmin=input_grid.min(),vmax=input_grid.max())
else:
plotnorm = colors.Normalize(vmin=colrange[0],vmax=colrange[1])
elif slicetype == 'sum':
yz_slice = np.sum(input_grid, axis=0)
xz_slice = np.sum(input_grid, axis=1)
xy_slice = np.sum(input_grid, axis=2)
cmin = np.amin([yz_slice.min(), xz_slice.min(), xy_slice.min()])
cmax = np.amax([yz_slice.max(), xz_slice.max(), xy_slice.max()])
if colrange==None:
plotnorm = colors.Normalize(vmin=cmin,vmax=cmax)
else:
plotnorm = colors.Normalize(vmin=colrange[0],vmax=colrange[1])
yy, zz = np.meshgrid(mesh.yy, mesh.zz, indexing='ij')
if contourplot:
map1 = ax00.contourf(yy, zz, yz_slice, norm=plotnorm, cmap=colormap, nlev=10)
else:
map1 = ax00.pcolormesh(yy, zz, yz_slice, norm=plotnorm, cmap=colormap)
ax00.set_xlabel('y')
ax00.set_ylabel('z')
ax00.set_title('%s t = %.4e' % (title, mesh.timestamp) )
ax00.set_aspect('equal')
xx, zz = np.meshgrid(mesh.xx, mesh.zz, indexing='ij')
if contourplot:
ax10.contourf(xx, zz, xz_slice, norm=plotnorm, cmap=colormap, nlev=10)
else:
ax10.pcolormesh(xx, zz, xz_slice, norm=plotnorm, cmap=colormap)
ax10.set_xlabel('x')
ax10.set_ylabel('z')
ax10.set_aspect('equal')
xx, yy = np.meshgrid(mesh.xx, mesh.yy, indexing='ij')
if contourplot:
ax11.contourf(xx, yy, xy_slice, norm=plotnorm, cmap=colormap, nlev=10)
else:
ax11.pcolormesh(xx, yy, xy_slice, norm=plotnorm, cmap=colormap)
ax11.set_xlabel('x')
ax11.set_ylabel('y')
ax11.set_aspect('equal')
cbar = plt.colorbar(map1, cax=axcbar)
if bitmap:
plt.savefig('%s_%s.png' % (fname, mesh.framenum))
print('Saved %s_%s.png' % (fname, mesh.framenum))
plt.close(fig)

View File

@@ -0,0 +1,9 @@
#!/bin/bash
#gm convert -delay 40 colden_*.png colden.gif
DATE=`date '+%Y_%m_%d_%H_%M'`
echo $DATE
gm convert -delay 15 $1_*.png $1_$DATE.gif

View File

@@ -0,0 +1,835 @@
'''
Copyright (C) 2014-2019, Johannes Pekkilae, Miikka Vaeisalae.
This file is part of Astaroth.
Astaroth is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Astaroth is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
'''
import numpy as np
import pylab as plt
import scipy as scp
import matplotlib.colors as colors
G_newton = 6.674e-8 #cm**3 g**-1 s**-2
# Time to convert to physical quantities
yr = 3.154e+7 #s
kyr = 1000.0*yr
km = 1e5 #cm
AU = 1.496e+13 #cm
Msun = 1.98847e33 #g
#cs0 = 20000.0 #cs cm/s "a" in Shu notation
cs0 = 35000.0 #cs cm/s "a" in Shu notation
B0 = 30e-6 #G
ksii = 11.3 #
#GS Eq. 10
ttm = 9.03e12*(cs0/35000.0)/(B0/30e-6)
CM_INFERNO = plt.cm.get_cmap('inferno')
def P_harmonics(theta, J=666):
#Vector spherical harmonics in e_r direction
if J == 0:
P = np.ones_like(theta) # 1.0
elif J == 2:
cos_theta = np.cos(theta)
P = (1.0/2.0)*(3.0*(cos_theta**2.0) - 1.0)
else:
P = 0.0
#print("P_2", P)
return P
def B_harmonics(theta, J=666):
#Vector spherical harmonics in e_theta direction
#print("B_harmonics theta", theta)
if J == 2:
sin_theta = np.abs(np.sin(theta))
cos_theta = np.cos(theta)
#B = -(3.0/np.sqrt(6.0))*cos_theta*sin_theta #Morse & Feshbach 1953 book
B = -3.0*cos_theta*sin_theta #GS93 Appendix B
else:
B = 0.0*theta
#print("B_harmonics", B)
return B
def get_tau(tt):
return tt/ttm
def get_SHU77_potential(xx_point):
#Copied here again for convenience
m0 = 0.975 #Shu 77 core reduced mass
xx_SHU_table = np.array([ 0.05, 0.10, 0.15, 0.20, 0.25,
0.30, 0.35, 0.40, 0.45, 0.50,
0.55, 0.60, 0.65, 0.70, 0.75,
0.80, 0.85, 0.90, 0.95, 1.00])
mm_SHU77_table = np.array([0.981, 0.993, 1.01, 1.03, 1.05,
1.08, 1.12, 1.16, 1.20, 1.25,
1.30, 1.36, 1.42, 1.49, 1.56,
1.64, 1.72, 1.81, 1.90, 2.00])
xx = xx_SHU_table[ np.where(xx_SHU <= xx_point)]
mm = mm_SHU77_table[np.where(xx_SHU <= xx_point)]
psi = - m0/xx_point + np.trapz(mm/(xx**2.0), xx)
return psi
def psi2(xx_SHU, mm_term, pp_term, J=666):
#GS93 Eq. 113
if J == 0:
psi2 = - mm_term/xx_SHU + pp_term
elif J == 2:
psi2 = - mm_term/(xx_SHU**3.0) + (xx_SHU**2.0)*pp_term
else:
psi2 = 0.0
#print('psi2', psi2, 'J', J, 'mm_term', mm_term, 'xx_SHU', xx_SHU, 'pp_term', pp_term)
return psi2
# Calculate the directional parameter
def dv_dx(xx,vv, alpha):
EE = alpha*(xx-vv) - 2.0/xx
HH = (xx-vv)**2.0 - 1.0
return (EE/HH)*(xx-vv)
def dalpha_dx(xx,vv, alpha):
EE = alpha*(alpha - (2.0/xx)*(xx-vv))
HH = (xx-vv)**2.0 - 1.0
return (EE/HH)*(xx-vv)
def dpsi_dx(xx, mm):
return mm/(xx**2.0)
def dmm_dx(xx, alpha):
return (xx**2.0)*alpha
def dphi_dx(xx, alpha, mm, theta):
ff_zero_der = 0.5*mm*dmm_dx(xx, alpha)
sin_theta = np.sin(theta)
return ff_zero_der*(sin_theta*2.0)
def deltaspace(theta, tau):
#Assuming J= 0, 2 only
v0 = -2.222e-1
v2 = 2.177e-1
deltaJ2 = -(1.0/3.0)*((v0+2.0/3.0)*P_harmonics(theta, J=0) + (v2 - 2.0/3.0)*P_harmonics(theta, J=2))
delta = 1 + (tau**2.0)*deltaJ2
return delta
def delta2(theta, tau):
#Assuming J= 0, 2 only
return deltaspace(theta, tau)**2.0
def yy_transform(xx_SHU, alpha_SHU77, alpha_mono_GS93, alpha_quad_GS93):
return alpha_mono_GS93, alpha_quad_GS93
# Calculating the perturbation stage
def alpha_perturb(tau, xx_SHU, vv_SHU77, alpha_SHU77, alpha_mono_GS93, alpha_quad_GS93, theta):
#Assuming J= 0, 2 only
directional = xx_SHU*dalpha_dx(xx_SHU, vv_SHU77, alpha_SHU77)*delta2(theta, tau)
directional = 0.0 #
alpha = alpha_mono_GS93*P_harmonics(theta, J=0) + alpha_quad_GS93*P_harmonics(theta, J=2) + directional
return alpha
def vv_perturb(tau, xx_SHU, vv_SHU77, alpha_SHU77, vv_ww_mono_GS93, vv_ww_quad_GS93, theta):
#Assuming J= 0, 2 only
directional = xx_SHU*dv_dx(xx_SHU, vv_SHU77, alpha_SHU77)*delta2(theta, tau)
directional = 0.0 #
vv_mono = vv_ww_mono_GS93[0]
vv_quad = vv_ww_quad_GS93[0]
ww_mono = vv_ww_mono_GS93[1]
ww_quad = vv_ww_quad_GS93[1]
#print('vv_mono, vv_quad, ww_mono, ww_quad', vv_mono, vv_quad, ww_mono, ww_quad)
vv_r = vv_mono*P_harmonics(theta, J=0) + vv_quad*P_harmonics(theta, J=2) + directional ## vv
vv_theta = ww_mono*B_harmonics(theta, J=0) + ww_quad*B_harmonics(theta, J=2) + directional ## ww
#print("vv_r, vv_theta", vv_r, vv_theta)
vv = np.array([vv_r, vv_theta])
return vv
def psi_perturb(tau, xx_SHU, mm_SHU77, mm_pp_mono_GS93, mm_pp_quad_GS93, theta):
#Assuming J= 0, 2 only
directional = xx_SHU*dpsi_dx(xx_SHU, mm_SHU77)*delta2(theta, tau)
directional = 0.0 #
mm_mono = mm_pp_mono_GS93[0]
mm_quad = mm_pp_quad_GS93[0]
pp_mono = mm_pp_mono_GS93[1]
pp_quad = mm_pp_quad_GS93[1]
#print('mm_pp_mono_GS93', mm_pp_mono_GS93)
#print('mm_mono', mm_mono)
psi = psi2(xx_SHU, mm_mono, pp_mono, J=0)*P_harmonics(theta, J=0) \
+ psi2(xx_SHU, mm_quad, pp_quad, J=0)*P_harmonics(theta, J=2) \
+ directional
#print('psi_perturb', psi)
return psi
def phi_vecpot_second_order(tau, xx_SHU, mm_SHU77, alpha_SHU77, FF_DD_mono_GS93, FF_DD_quad_GS93, theta):
directional = xx_SHU*dphi_dx(xx_SHU, alpha_SHU77, mm_SHU77, theta)*delta2(theta, tau)
directional = 0.0 #
sin_theta = np.sin(theta)
#print(FF_DD_mono_GS93)
#print(FF_DD_quad_GS93)
#print(ksii, P_harmonics(theta, J=0), P_harmonics(theta, J=2))
mono_term = (FF_DD_mono_GS93[0] + (1.0/ksii)*FF_DD_mono_GS93[1])
quad_term = (FF_DD_quad_GS93[0] + (1.0/ksii)*FF_DD_quad_GS93[1])
phi_vecpot_second = (sin_theta**2.0)*( mono_term*P_harmonics(theta, J=0) \
+ quad_term*P_harmonics(theta, J=2) ) \
+ directional
return phi_vecpot_second
def phi_vecpot_zero_order(xx_SHU, mm_SHU77, theta):
ff_zero = 0.25*(mm_SHU77**2.0)
sin_theta = np.sin(theta)
phi_vecpot_zero = ff_zero*(sin_theta*2.0)
return phi_vecpot_zero
# Combining the perturbation stage.
def alpha_xvec_tau(tau, xx_SHU, vv_SHU77, alpha_SHU77, alpha_mono_GS93, alpha_quad_GS93, theta):
alpha = alpha_SHU77 + (tau**2.0)*alpha_perturb(tau, xx_SHU, vv_SHU77, alpha_SHU77, alpha_mono_GS93, alpha_quad_GS93, theta)
return alpha
def vv_xvec_tau(tau, xx_SHU, vv_SHU77, alpha_SHU77, vv_ww_mono_GS93, vv_ww_quad_GS93, theta):
vv = (tau**2.0)*vv_perturb(tau, xx_SHU, vv_SHU77, alpha_SHU77, vv_ww_mono_GS93, vv_ww_quad_GS93, theta)
#print("BF",vv, vv_ww_mono_GS93, vv_ww_quad_GS93)
vv[0] = vv_SHU77 + vv[0]
vv[1] = 0.0 + vv[1] #No poloidal velocity in Shu77
#print("AF",vv)
return vv
def psi_xvec_tau(tau, xx_SHU, mm_SHU77, mm_pp_mono, mm_pp_quad, theta):
#print("psi_xvec_tau --- tau, xx_SHU, mm_SHU7, mm_pp_mono, mm_pp_quad, theta", tau, xx_SHU, mm_SHU77, mm_pp_mono, mm_pp_quad, theta)
psi = (tau**2.0)*psi_perturb(tau, xx_SHU, mm_SHU77, mm_pp_mono, mm_pp_quad, theta)
psi77 = get_SHU77_potential(xx_SHU)
#print('psi77', psi77)
psi = psi77 + psi
#print('psi_xvec_tau', psi)
return psi
def phi_vecpot_xvec_tau(tau, xx_SHU, mm_SHU77, alpha_SHU77, FF_DD_mono_GS93, FF_DD_quad_GS93, theta):
phi_vecpot_second = (tau**2.0)*phi_vecpot_second_order(tau, xx_SHU, mm_SHU77, alpha_SHU77, FF_DD_mono_GS93, FF_DD_quad_GS93, theta)
phi_vecpot_zero = phi_vecpot_zero_order(xx_SHU, mm_SHU77, theta)
phi_vecpot = phi_vecpot_zero + phi_vecpot_second
return phi_vecpot
#Physical unit converion stage
def rho_rt(tt, xx_SHU, vv_SHU77, alpha_SHU77, alpha_mono_GS93, alpha_quad_GS93, theta):
tau = get_tau(tt)
alpha_xvec = alpha_xvec_tau(tau, xx_SHU, vv_SHU77, alpha_SHU77, alpha_mono_GS93, alpha_quad_GS93, theta)
rho = (1.0/(4.0*np.pi*G_newton*(tt**2.0))) * alpha_xvec
return rho, alpha_xvec
def uu_rt(tt, xx_SHU, vv_SHU77, alpha_SHU77, vv_ww_mono_GS93, vv_ww_quad_GS93, theta):
tau = get_tau(tt)
vv_xvec = vv_xvec_tau(tau, xx_SHU, vv_SHU77, alpha_SHU77, vv_ww_mono_GS93, vv_ww_quad_GS93, theta)
uu = cs0*vv_xvec
return uu, vv_xvec
def grav_psi_rt(tt, xx_SHU, mm_SHU77, mm_pp_mono, mm_pp_quad, theta):
tau = get_tau(tt)
#print("tt , xx_SHU, mm_SHU77, mm_pp_mono, mm_pp_quad, theta", tt, xx_SHU, mm_SHU77, mm_pp_mono, mm_pp_quad, theta)
psi_xvec = psi_xvec_tau(tau, xx_SHU, mm_SHU77, mm_pp_mono, mm_pp_quad, theta)
Vpot = (cs0**2.0)*psi_xvec
return Vpot, psi_xvec
def vectorpot_rt(tt, xx_SHU, mm_SHU77, alpha_SHU77, FF_DD_mono_GS93, FF_DD_quad_GS93, theta):
tau = get_tau(tt)
phi_vecpot_xvec = phi_vecpot_xvec_tau(tau, xx_SHU, mm_SHU77, alpha_SHU77, FF_DD_mono_GS93, FF_DD_quad_GS93, theta)
Phi_flux = np.pi*B0*((cs0*tt)**2.0)*phi_vecpot_xvec
return Phi_flux, phi_vecpot_xvec
###def match_xx(xx_rad, xx_SHU):
### xx_buffer = np.empty_like(xx_rad)
### stride = np.abs(xx_SHU[1] - xx_SHU[0])
### for xx in xx_SHU:
### #where xx - stride < xx_rad < xx + stride -> xx_rad[i] = xx
### #loc = np.where((xx_rad <= (xx + stride) and xx_rad > (xx - stride) ))
### loc = np.where(xx_rad <= (xx + stride) )
### print(loc)
def get_shu_index(xx, xx_SHU):
stride = np.abs(xx_SHU[1] - xx_SHU[0])/2.0
#ishu = np.where((xx_SHU <= (xx + stride)) & (xx_SHU > (xx - stride)))[0]
#TODO Now a purkka version. Do better.
# Can be improve by taking the treatment of the actual low and high x cases.
if (xx > xx_SHU[xx_SHU.size-1]):
ishu = xx_SHU.size-1
elif (xx < xx_SHU[0]):
ishu = 0
else:
ishu = np.where((xx_SHU <= (xx + stride)) & (xx_SHU > (xx - stride)))[0]
#print("get_shu_index", ishu, ishu.size)
ishu = ishu[0]
#print("get_shu_index", ishu, ishu.size)
#print(ishu, xx_SHU[ishu], xx)
return ishu
def plot_figure(tt, xx_horizontal_corners, xx_vertical_corners, xx_horizontal, xx_vertical, xxvar, physvar,
vv_hor=np.array(None), vv_ver=np.array(None), uu_hor=np.array(None), uu_ver=np.array(None),
title1=r"\alpha", title2=r"\rho", filetitle='density',
var_min=[None, None], var_max=[None, None], colmap=CM_INFERNO, normtype='log',
streamlines = 0, contourplot = 0):
if var_min[0] != None:
if normtype == 'log':
mynorm1 = colors.LogNorm( vmin=var_min[0], vmax=var_max[0] )
mynorm2 = colors.LogNorm( vmin=var_min[1], vmax=var_max[1] )
else:
mynorm1 = colors.Normalize( vmin=var_min[0], vmax=var_max[0] )
mynorm2 = colors.Normalize( vmin=var_min[1], vmax=var_max[1] )
else:
mynorm1 = colors.Normalize( )
mynorm2 = colors.Normalize( )
if contourplot:
if normtype =='cdensity':
numbers = np.arange(0, 20, dtype=np.float64)
contourlevs = 1e-20*(np.sqrt(2.0)**numbers)
contournorm = colors.LogNorm( vmin=contourlevs.min(), vmax=contourlevs.max() )
elif normtype =='cflux':
contourlevs = np.linspace(1.0, 1e31, num=20)
contournorm = colors.Normalize( vmin=contourlevs.min(), vmax=contourlevs.max() )
else:
contourlevs = np.linspace(physvar.min(), physvar.max(), num=10)
contournorm = colors.Normalize( vmin=contourlevs.min(), vmax=contourlevs.max() )
##rr_horizontal_corners = xx_horizontal_corners*(cs0*tt)/AU
##rr_vertical_corners = xx_vertical_corners* (cs0*tt)/AU
##rr_horizontal = xx_horizontal*(cs0*tt)/AU
##rr_vertical = xx_vertical* (cs0*tt)/AU
rr_horizontal_corners = xx_horizontal_corners*(cs0*tt)/1e17
rr_vertical_corners = xx_vertical_corners* (cs0*tt)/1e17
rr_horizontal = xx_horizontal*(cs0*tt)/1e17
rr_vertical = xx_vertical* (cs0*tt)/1e17
figa, axa = plt.subplots(nrows=1, ncols=2, figsize=(16,6))
if contourplot:
mapa = axa[0].contourf(xx_horizontal, xx_vertical, xxvar, cmap=colmap, norm=mynorm1)
maprho = axa[1].contourf(rr_horizontal, rr_vertical, physvar, contourlevs, cmap=colmap, norm=contournorm)
else:
mapa = axa[0].pcolormesh(xx_horizontal_corners, xx_vertical_corners, xxvar, cmap=colmap, norm=mynorm1 )
maprho = axa[1].pcolormesh(rr_horizontal_corners, rr_vertical_corners, physvar, cmap=colmap, norm=mynorm2)
#mapa = axa[0].contourf(xx_horizontal, xx_vertical, alpha, cmap=CM_INFERNO, norm=colors.LogNorm(vmin=0.1, vmax=50.0))
#maprho = axa[1].contourf(xx_horizontal*(cs0*tt)/AU, xx_vertical*(cs0*tt)/AU, rho, cmap=CM_INFERNO, norm=colors.LogNorm(vmin=1e15, vmax=1e20))
if vv_hor.any() != None:
if streamlines:
#vv_tot = np.sqrt(vv_hor**2.0 + vv_ver**2.0)
#vv_tot = np.log(vv_tot/vv_tot.max())
axa[0].streamplot(xx_horizontal, xx_vertical, vv_hor, vv_ver, color = 'k')
axa[1].streamplot(rr_horizontal, rr_vertical, uu_hor, uu_ver, color = 'k' )
else:
axa[0].quiver(xx_horizontal, xx_vertical, vv_hor, vv_ver, pivot = 'middle')
axa[1].quiver(rr_horizontal, rr_vertical, uu_hor, uu_ver, pivot = 'middle')
fig.colorbar(mapa, ax=axa[0])
fig.colorbar(maprho, ax=axa[1])
tau = get_tau(tt)
tt_kyr = tt/kyr
axa[0].set_title(r'$%s(x, \tau = %.3f)$ ' % (title1, tau))
axa[1].set_title(r'$%s(r, t = %.3f \mathrm{kyr})$ ' % (title2, tt_kyr))
axa[0].set_xlabel('x')
axa[0].set_ylabel('x')
#axa[1].set_xlabel('r (AU)')
#axa[1].set_ylabel('r (AU)')
axa[1].set_xlabel(r'r ($10^{17}$ cm)')
axa[1].set_ylabel(r'r ($10^{17}$ cm)' )
##axa[1].set_xlim(0.0, 3e17/AU)
##axa[1].set_ylim(0.0, 3e17/AU)
axa[1].set_xlim(0.0, 3.0)
axa[1].set_ylim(0.0, 3.0)
axa[0].set_aspect('equal', 'datalim')
#axa[1].set_aspect('equal', 'datalim')
figfile = '%s_%s.png' % (filetitle, str(numslice).zfill(6))
print(figfile)
figa.savefig(figfile)
plt.close(figa)
xx_SHU = np.array([ 0.05, 0.10, 0.15, 0.20, 0.25,
0.30, 0.35, 0.40, 0.45, 0.50,
0.55, 0.60, 0.65, 0.70, 0.75,
0.80, 0.85, 0.90, 0.95, 1.00])
alpha_SHU77 = np.array([ 71.5, 27.8, 16.4, 11.5, 8.76,
7.09, 5.95, 5.14, 4.52, 4.04,
3.66, 3.35, 3.08, 2.86, 2.67,
2.50, 2.35, 2.22, 2.10, 2.00])
vv_SHU77 = -np.array([ 5.44, 3.47, 2.58, 2.05, 1.68,
1.40, 1.18, 1.01, 0.861, 0.735,
0.625, 0.528, 0.442, 0.363, 0.291,
0.225, 0.163, 0.106, 0.051, 0.00])
mm_SHU77 = np.array([0.981, 0.993, 1.01, 1.03, 1.05,
1.08, 1.12, 1.16, 1.20, 1.25,
1.30, 1.36, 1.42, 1.49, 1.56,
1.64, 1.72, 1.81, 1.90, 2.00])
#GS Table 1
alpha_mono_GS93 = np.array([ 6.304, 2.600, 1.652, 1.156, 9.005e-1,
7.314e-1, 6.084e-1, 5.084e-1, 4.256e-1, 3.517e-1,
2.829e-1, 2.172e-1, 1.488e-1, 8.091e-2, 8.360e-3,
-6.826e-2, -1.512e-1, -2.406e-1, -3.382e-1, -4.444e-1])
vv_ww_mono_GS93 = np.array([[4.372e-1, 3.335e-1, 2.390e-1, 1.918e-1, 1.522e-1,
1.226e-1, 9.579e-2, 7.103e-2, 4.828e-2, 2.640e-2,
5.058e-3, -1.588e-2, -3.791e-2, -5.975e-2, -8.293e-2,
-1.071e-1, -1.330e-1, -1.605e-1, -1.902e-1, -2.222e-1],
[ 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0]])
mm_pp_mono_GS93 = np.array([[8.634e-4, 1.959e-3, 3.560e-3, 5.661e-3, 8.235e-3,
1.130e-2, 1.482e-2, 1.873e-2, 2.293e-2, 2.730e-2,
3.166e-2, 3.579e-2, 3.935e-2, 4.196e-2, 4.312e-2,
4.221e-2, 3.847e-2, 3.097e-2, 1.859e-2, 0.0],
[ 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0]])
FF_DD_mono_GS93 = np.array([[ -1.130, -3.275e-1, -1.355e-1, -6.415e-2, -2.889e-2, #F
-8.387e-3, 5.358e-3, 1.534e-2, 2.303e-2, 2.931e-2,
3.454e-2, 3.888e-2, 4.225e-2, 4.442e-2, 4.504e-2,
4.358e-2, 3.935e-2, 3.146e-2, 1.881e-2, 0.0],
[ -1.246e1, -3.168, -1.141, -5.740e-1, -3.178e-1, #D
-1.878e-1, -1.049e-1, -4.547e-2, 3.393e-4, 3.924e-2,
7.431e-2, 1.070e-1, 1.376e-1, 1.650e-1, 1.867e-1,
1.992e-1, 1.966e-1, 1.708e-1, 1.103e-1, 0.0]])
#GS Table 2
alpha_quad_GS93 = np.array([ -1.096e3, -1.191e2, -3.148e1, -1.158e1, -5.105,
-2.456, -1.217, -5.889e-1, -2.569e-1, -7.024e-2,
3.790e-2, 1.042e-1, 1.505e-1, 1.845e-1, 2.163e-1,
2.492e-1, 2.865e-1, 3.302e-1, 3.823e-1, 4.437e-1])
vv_ww_quad_GS93 = np.array([[ -2.581, -1.533, -8.072e-1, -5.666e-1, -3.905e-1, #v
-2.790e-1, -1.928e-1, -1.254e-1, -7.156e-2, -2.614e-2,
1.267e-2, 4.650e-2, 7.724e-2, 1.042e-1, 1.288e-1,
1.510e-1, 1.711e-1, 1.889e-1, 2.045e-1, 2.177e-1],
[ -2.085, -4.890, -1.811, -8.842e-1, -4.816e-1, #w
-2.807e-1, -1.628e-1, -8.779e-2, -3.852e-2, -4.481e-3,
1.928e-2, 3.578e-2, 4.683e-2, 5.306e-2, 5.512e-2,
5.312e-2, 4.704e-2, 3.670e-2, 2.179e-2, 1.898e-3]])
mm_pp_quad_GS93 = np.array([[-3.860e-5, -1.541e-4, -3.044e-4, -4.847e-4, -6.831e-4, #m
-8.874e-4, -1.083e-3, -1.253e-3, -1.385e-3, -1.462e-3,
-1.470e-3, -1.389e-3, -1.191e-3, -8.405e-4, -2.841e-4,
5.579e-4, 1.800e-3, 3.609e-3, 6.218e-3, 9.951e-3],
[ -7.539e1, -7.275, -1.730, -5.586e-1, -1.999e-1, #p
-6.591e-1, -1.062e-2, 1.294e-2, 2.267e-2, 2.600e-2,
2.625e-2, 2.500e-2, 2.294e-2, 2.046e-2, 1.769e-2,
1.469e-2, 1.146e-2, 7.941e-3, 4.102e-3, -1.214e-4]])
FF_DD_quad_GS93 = np.array([[ -2.253, -6.517e-1, -2.722e-1, -1.345e-1, -6.993e-2, #F
-3.593e-2, -1.660e-2, -5.864e-3, -6.809e-4, 8.213e-4,
-3.086e-4, -3.338e-3, -7.681e-3, -1.272e-2, -1.778e-2,
-2.191e-2, -2.392e-2, -2.219e-2, -1.457e-2, 1.729e-3],
[ -2.484e1, -6.258, -2.221, -1.102, -6.127e-1, #D
-3.645e-1, -2.213e-1, -1.297e-1, -7.020e-2, -1.112e-2,
-2.139e-3, -1.615e-2, 2.744e-2, 3.252e-2, 3.269e-2,
2.839e-2, 2.104e-2, 1.199e-2, 3.732e-3, 0.0]])
tt = 0.3*ttm
theta = 0.5*np.pi
xx_SHU = xx_SHU[:-1]
vv_SHU77 = vv_SHU77[:-1]
alpha_SHU77 = alpha_SHU77[:-1]
alpha_mono_GS93 = alpha_mono_GS93[:-1]
alpha_quad_GS93 = alpha_quad_GS93[:-1]
vv_ww_mono_GS93 = np.array([vv_ww_mono_GS93[0][:-1], vv_ww_mono_GS93[1][:-1]])
vv_ww_quad_GS93 = np.array([vv_ww_quad_GS93[0][:-1], vv_ww_quad_GS93[1][:-1]])
rho, alpha_xvec = rho_rt(tt, xx_SHU, vv_SHU77, alpha_SHU77, alpha_mono_GS93, alpha_quad_GS93, theta)
rr = xx_SHU*cs0*tt
np.set_printoptions(linewidth=200)
print(rho.shape)
print(xx_SHU.shape)
print(rho)
print(xx_SHU)
print(vv_ww_mono_GS93)
print(vv_ww_quad_GS93)
print(vv_ww_quad_GS93[0])
print(vv_ww_quad_GS93[1])
#plt.figure()
#plt.plot(rr, rho)
#
#plt.figure()
#plt.plot(xx_SHU, alpha_xvec, label = "GS93")
#plt.plot(xx_SHU, alpha_SHU77, label = "Shu77")
#plt.legend()
#alpha_mono_yy, alpha_quad_yy, alpha_mono_yy = yy_transform(xx_SHU, alpha_SHU77, alpha_mono_GS93, alpha_quad_GS93)
plt.figure()
plt.plot(xx_SHU, alpha_SHU77, label=r"$\alpha^{(0)}$")
plt.plot(xx_SHU, alpha_mono_GS93, label=r"$\alpha^{(2)}_0$")
plt.plot(xx_SHU, alpha_quad_GS93, label=r"$\alpha^{(2)}_2$")
plt.ylim([-5.0,5.0])
plt.legend()
plt.show()
'''
ii = 0
theta_axis = np.linspace(0.0, np.pi)
xx_theta = np.array([])
print("PIIP")
plt.figure()
for ii in range(0,xx_SHU.size):
alpha_theta = np.array([])
alpha_shuref = np.array([])
for theta in theta_axis:
rho, alpha_xvec = rho_rt(tt, xx_SHU[ii], vv_SHU77[ii], alpha_SHU77[ii], alpha_mono_GS93[ii], alpha_quad_GS93[ii])
alpha_theta = np.append(alpha_theta, alpha_xvec)
alpha_shuref = np.append(alpha_shuref, alpha_SHU77[ii])
plt.plot(alpha_theta, theta_axis, label = "GS93")
#plt.plot(alpha_shuref, theta_axis, label = "GS93")
'''
#Interpolate a mesh.
xx_SHU_GRID = np.insert(xx_SHU, 0, 0.0)
print(xx_SHU_GRID)
xx_horizontal, xx_vertical = np.meshgrid(xx_SHU_GRID, xx_SHU_GRID, indexing='xy')
theta = np.arctan2(xx_horizontal, xx_vertical)
#Take pcolormesh coordinate system into account, which marks corners instead of centre points.
dxx = np.abs(xx_horizontal[0,1] - xx_horizontal[0,0])
print(dxx)
xx_horizontal_corners = xx_horizontal - dxx/2.0
xx_vertical_corners = xx_vertical - dxx/2.0
xx_rad = np.sqrt(xx_horizontal**2.0 + xx_vertical**2.0)
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(16,4))
map1 = ax[0].pcolormesh(xx_horizontal_corners, xx_vertical_corners, theta)
map2 = ax[1].pcolormesh(xx_horizontal_corners, xx_vertical_corners, xx_rad)
ax[0].set_title(r"$\theta$")
ax[1].set_title(r"$x_\mathrm{rad}$")
fig.colorbar(map1, ax=ax[0])
fig.colorbar(map2, ax=ax[1])
ax[0].set_aspect('equal', 'datalim')
ax[1].set_aspect('equal', 'datalim')
Pfig, Pax = plt.subplots(nrows=1, ncols=3, figsize=(16,4))
print("P_harmonics(theta, J=0)", P_harmonics(theta, J=0))
Pmap1 = Pax[0].pcolormesh(xx_horizontal_corners, xx_vertical_corners, P_harmonics(theta, J=0))
Pmap2 = Pax[1].pcolormesh(xx_horizontal_corners, xx_vertical_corners, P_harmonics(theta, J=2))
Pmap3 = Pax[2].pcolormesh(xx_horizontal_corners, xx_vertical_corners, deltaspace(theta, 0.5))
Pax[0].set_title(r"$P_0(\theta)$")
Pax[1].set_title(r"$P_2(\theta)$")
Pax[2].set_title(r"$\Delta(\theta, \tau = 0.5)$")
Pfig.colorbar(Pmap1, ax=Pax[0])
Pfig.colorbar(Pmap2, ax=Pax[1])
Pfig.colorbar(Pmap3, ax=Pax[2])
Pax[0].set_aspect('equal', 'datalim')
Pax[1].set_aspect('equal', 'datalim')
Pax[2].set_aspect('equal', 'datalim')
Bfig, Bax = plt.subplots(nrows=1, ncols=2, figsize=(16,4))
print("B_harmonics(theta, J=0)", B_harmonics(theta, J=0))
Bmap1 = Bax[0].pcolormesh(xx_horizontal_corners, xx_vertical_corners, B_harmonics(theta, J=0))
Bmap2 = Bax[1].pcolormesh(xx_horizontal_corners, xx_vertical_corners, B_harmonics(theta, J=2))
Bax[0].set_title(r"$B_0(\theta)$")
Bax[1].set_title(r"$B_2(\theta)$")
Bfig.colorbar(Bmap1, ax=Bax[0])
Bfig.colorbar(Bmap2, ax=Bax[1])
Bax[0].set_aspect('equal', 'datalim')
Bax[1].set_aspect('equal', 'datalim')
plt.show()
##xx_horizontal_corners = np.append(xx_horizontal_corners, (np.amax(xx_horizontal_corners)+dxx)*np.ones((xx_horizontal_corners.shape[1],1)), axis=1)
print(xx_horizontal_corners[-1,:])
print(xx_horizontal_corners)
##xx_horizontal_corners = np.vstack((xx_horizontal_corners, xx_horizontal_corners[-1,:]))
##print(xx_horizontal_corners)
##xx_vertical_corners = np.append(xx_vertical_corners, (np.amax(xx_vertical_corners)+dxx)*np.ones((1,xx_vertical_corners.shape[0])), axis=0)
print(xx_vertical_corners[:, -1])
print(xx_vertical_corners)
##xx_vertical_corners = np.hstack((xx_vertical_corners, xx_vertical_corners[:,-1]))
print(xx_vertical_corners)
numslice = 0
frametot = 201
#frametot = 101
#frametot = 11
for tt in np.linspace(0.1, ttm, num=frametot):
alpha = np.empty_like(xx_rad)
alpha77 = np.empty_like(xx_rad)
rho = np.empty_like(xx_rad)
vv_rad = np.empty_like(xx_rad)
vv_pol = np.empty_like(xx_rad)
uu_rad = np.empty_like(xx_rad)
uu_pol = np.empty_like(xx_rad)
psi = np.empty_like(xx_rad)
Vpot = np.empty_like(xx_rad)
Delta = np.empty_like(xx_rad)
Phi_flux = np.empty_like(xx_rad)
phi_vecpot = np.empty_like(xx_rad)
alpha_2_J = np.empty_like(xx_rad)
for ii in range(xx_SHU_GRID.size):
for kk in range(xx_SHU_GRID.size):
xx = xx_rad[ii,kk]
th = theta[ii,kk]
ishu = get_shu_index(xx, xx_SHU)
rho[ii, kk], alpha[ii, kk] = rho_rt(tt, xx_SHU[ishu],
vv_SHU77[ishu],
alpha_SHU77[ishu],
alpha_mono_GS93[ishu],
alpha_quad_GS93[ishu], th)
alpha77[ii, kk] = alpha_SHU77[ishu]
vv_ww_mono_point = vv_ww_mono_GS93[:, ishu]
vv_ww_quad_point = vv_ww_quad_GS93[:, ishu]
uu_dump, vv_dump = uu_rt(tt, xx_SHU[ishu], vv_SHU77[ishu], alpha_SHU77[ishu], vv_ww_mono_point, vv_ww_quad_point, th)
vv_rad[ii, kk] = vv_dump[0]
vv_pol[ii, kk] = vv_dump[1]
uu_rad[ii, kk] = uu_dump[0]
uu_pol[ii, kk] = uu_dump[1]
mm_pp_mono_point = mm_pp_mono_GS93[:, ishu]
mm_pp_quad_point = mm_pp_quad_GS93[:, ishu]
Vpot[ii, kk], psi[ii, kk] = grav_psi_rt(tt, xx_SHU[ishu], mm_SHU77[ishu], mm_pp_mono_point, mm_pp_quad_point, th)
Phi_flux[ii, kk], phi_vecpot[ii, kk] = vectorpot_rt(tt, xx_SHU[ishu], mm_SHU77[ishu], alpha_SHU77[ishu],
FF_DD_mono_GS93[:, ishu],
FF_DD_quad_GS93[:, ishu], th)
Delta[ii, kk] = deltaspace(th, get_tau(tt))
alpha_2_J[ii, kk] = alpha_mono_GS93[ishu]*P_harmonics(th, J=0) + alpha_quad_GS93[ishu]*P_harmonics(th, J=2)
vv_hor = vv_pol*np.cos(theta) + vv_rad*np.sin(theta)
vv_ver = - vv_pol*np.sin(theta) + vv_rad*np.cos(theta)
uu_hor = uu_pol*np.cos(theta) + uu_rad*np.sin(theta)
uu_ver = - uu_pol*np.sin(theta) + uu_rad*np.cos(theta)
rho77 = alpha77 * (1.0/(4.0*np.pi*G_newton)*tt) #TODO WRONG COEFFS!!!
#Apply mask
rad_mask = 0.2
alpha = np.ma.masked_where(xx_rad < rad_mask, alpha)
rho = np.ma.masked_where(xx_rad < rad_mask, rho)
vv_rad = np.ma.masked_where(xx_rad < rad_mask, vv_rad)
uu_rad = np.ma.masked_where(xx_rad < rad_mask, uu_rad)
vv_pol = np.ma.masked_where(xx_rad < rad_mask, vv_pol)
uu_pol = np.ma.masked_where(xx_rad < rad_mask, uu_pol)
vv_hor = np.ma.masked_where(xx_rad < rad_mask, vv_hor)
vv_ver = np.ma.masked_where(xx_rad < rad_mask, vv_ver)
uu_hor = np.ma.masked_where(xx_rad < rad_mask, uu_hor)
uu_ver = np.ma.masked_where(xx_rad < rad_mask, uu_ver)
psi = np.ma.masked_where(xx_rad < rad_mask, psi )
Vpot = np.ma.masked_where(xx_rad < rad_mask, Vpot)
phi_vecpot = np.ma.masked_where(xx_rad < rad_mask, phi_vecpot)
Phi_flux = np.ma.masked_where(xx_rad < rad_mask, Phi_flux )
alpha_2_J = np.ma.masked_where(xx_rad < rad_mask, alpha_2_J)
Delta = np.ma.masked_where(xx_rad < rad_mask, Delta )
plot_figure(tt, xx_horizontal_corners, xx_vertical_corners, xx_horizontal, xx_vertical, alpha, rho,
vv_hor=vv_hor, vv_ver=vv_ver, uu_hor=uu_hor, uu_ver=uu_ver,
title1=r"\alpha", title2=r"\rho", filetitle='GS93density',
streamlines = 1, contourplot=1,
var_min=[0.00, 1e15], var_max=[16, 1e21],
normtype = 'cdensity')
plot_figure(tt, xx_horizontal_corners, xx_vertical_corners, xx_horizontal, xx_vertical, alpha77, rho77,
#var_min=[0.00, 0], var_max=[16, 1e20],
title1=r"\alpha", title2=r"\rho", filetitle='S77density')
plot_figure(tt, xx_horizontal_corners, xx_vertical_corners, xx_horizontal, xx_vertical, vv_rad, uu_rad,
vv_hor=vv_hor, vv_ver=vv_ver, uu_hor=uu_hor, uu_ver=uu_ver,
title1=r"v_r", title2=r"u_r", filetitle='GS93velocity_rad',
var_min=[-2.5, -2.5*cs0], var_max=[0.0, 0.0*cs0],
normtype = 'lin')
plot_figure(tt, xx_horizontal_corners, xx_vertical_corners, xx_horizontal, xx_vertical, vv_pol, uu_pol,
vv_hor=vv_hor, vv_ver=vv_ver, uu_hor=uu_hor, uu_ver=uu_ver,
title1=r"v_\theta", title2=r"u_\theta", filetitle='GS93velocity_pol',
var_min=[0.0, 0.0*cs0], var_max=[0.5, 0.5*cs0],
normtype = 'lin')
plot_figure(tt, xx_horizontal_corners, xx_vertical_corners, xx_horizontal, xx_vertical, psi, Vpot,
vv_hor=vv_hor, vv_ver=vv_ver, uu_hor=uu_hor, uu_ver=uu_ver,
title1=r"\psi", title2=r"V_\mathrm{pot}", filetitle='GS93gravpot',
var_min=[12.0, 12.0*(cs0**2.0)], var_max=[21.0, 21.0*(cs0**2.0)],
normtype = 'lin')
plot_figure(tt, xx_horizontal_corners, xx_vertical_corners, xx_horizontal, xx_vertical, phi_vecpot, Phi_flux,
title1=r"\phi", title2=r"\Phi_\mathrm{flux}", filetitle='GS93vecpot',
vv_hor=vv_hor, vv_ver=vv_ver, uu_hor=uu_hor, uu_ver=uu_ver,
streamlines = 1, contourplot=1,
normtype = 'cflux')
plot_figure(tt, xx_horizontal_corners, xx_vertical_corners, xx_horizontal, xx_vertical, np.sqrt(vv_hor**2.0 + vv_ver**2.0), np.sqrt(uu_hor**2.0 + uu_ver**2.0),
title1=r"|v|", title2=r"|u| (cm/s)", filetitle='GS93vel2',
var_min=[0.0, 0.0*cs0], var_max=[2.5, 2.5*cs0],
vv_hor=vv_hor, vv_ver=vv_ver, uu_hor=uu_hor, uu_ver=uu_ver,
streamlines = 1,
normtype = 'lin')
##plot_figure(tt, xx_horizontal_corners, xx_vertical_corners, xx_horizontal, xx_vertical, Delta, Delta,
## title1=r"\Delta", title2=r"\Delta", filetitle='Delta',
## normtype = 'lin')
##plot_figure(tt, xx_horizontal_corners, xx_vertical_corners, xx_horizontal, xx_vertical, alpha_2_J, alpha_2_J,
## title1=r"\sum \alpha^{(2)}_J", title2=r"\sum \alpha^{(2)}_J", filetitle='alpha_2_J',
## normtype = 'lin')
numslice += 1

1
analysis/python/calc/purge.sh Executable file
View File

@@ -0,0 +1 @@
rm *.png

View File

@@ -0,0 +1,279 @@
'''
Copyright (C) 2014-2019, Johannes Pekkilae, Miikka Vaeisalae.
This file is part of Astaroth.
Astaroth is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Astaroth is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
'''
import numpy as np
import pylab as plt
G_newton = 6.674e-8 #cm**3 g**-1 s**-2
def dv_dx(xx,vv, alpha):
EE = alpha*(xx-vv) - 2.0/xx
HH = (xx-vv)**2.0 - 1.0
return (EE/HH)*(xx-vv)
def dalpha_dx(xx,vv, alpha):
EE = alpha*(alpha - (2.0/xx)*(xx-vv))
HH = (xx-vv)**2.0 - 1.0
return (EE/HH)*(xx-vv)
###def dv_dx(xx,vv, alpha):
### return 2.0*(xx-vv)
###
###def dalpha_dx(xx,vv, alpha):
### return -1.0*(xx-vv)
def get_m(xx, vv, alpha):
mm = xx**2.0 * alpha * (xx-vv)
return mm
def alpha_to_rho(alpha, tt):
rho = alpha/(4.0*np.pi*G_newton*(tt**2.0))
return rho
def vv_to_uu(vv, cs0):
uu = cs0*vv
return uu
def mm_to_MM(mm, tt, cs0):
MM = (((cs0**3.0)*tt)/G_newton)*mm
return MM
def euler(xx_step, xx, vv, alpha, mm, target):
diff = target - xx[-1]
if diff >= 0:
while xx[-1] <= target:
vv_step = vv[-1] + xx_step*dv_dx(xx[-1], vv[-1], alpha[-1])
alpha_step = alpha[-1] + xx_step*dalpha_dx(xx[-1], vv[-1], alpha[-1])
xx = np.append(xx, xx[-1]+xx_step)
alpha = np.append(alpha, alpha_step)
vv = np.append(vv, vv_step)
mm_step = get_m(xx[-1], vv[-1], alpha[-1])
mm = np.append(mm, mm_step)
else:
while xx[-1] <= target:
vv_step = vv[-1] + xx_step*dv_dx(xx[-1], vv[-1], alpha[-1])
alpha_step = alpha[-1] + xx_step*dalpha_dx(xx[-1], vv[-1], alpha[-1])
xx = np.append(xx, xx[-1]+xx_step)
alpha = np.append(alpha, alpha_step)
vv = np.append(vv, vv_step)
mm_step = get_m(xx[-1], vv[-1], alpha[-1])
mm = np.append(mm, mm_step)
return xx, vv, alpha, mm
def RK4_step(vv, xx, alpha, xx_step):
vv1 = xx_step*dv_dx(xx[-1], vv[-1], alpha[-1])
alpha1 = xx_step*dalpha_dx(xx[-1], vv[-1], alpha[-1])
vv2 = xx_step*dv_dx(xx[-1]+xx_step/2.0, vv[-1]+vv1/2.0, alpha[-1]+alpha1/2.0)
alpha2 = xx_step*dalpha_dx(xx[-1]+xx_step/2.0, vv[-1]+vv1/2.0, alpha[-1]+alpha1/2.0)
vv3 = xx_step*dv_dx(xx[-1]+xx_step/2.0, vv[-1]+vv2/2.0, alpha[-1]+alpha2/2.0)
alpha3 = xx_step*dalpha_dx(xx[-1]+xx_step/2.0, vv[-1]+vv2/2.0, alpha[-1]+alpha2/2.0)
vv4 = xx_step*dv_dx(xx[-1]+xx_step, vv[-1]+vv3, alpha[-1]+alpha3)
alpha4 = xx_step*dalpha_dx(xx[-1]+xx_step, vv[-1]+vv3, alpha[-1]+alpha3)
vv_step = vv[-1] + (1.0/6.0)*(vv1 + 2.0*vv2 + 2.0*vv3 + vv4)
alpha_step = alpha[-1] + (1.0/6.0)*(alpha1 + 2.0*alpha2 + 2.0*alpha3 + alpha4)
return vv_step, alpha_step
def RK4(xx_step, xx, vv, alpha, mm, target, epsilon):
#Runge-Kutta RK4
diff = target - xx[-1]
#if diff < 0:
if diff >= 0:
while xx[-1] <= target:
if (np.abs(xx[-1] - vv[-1] - 1.0) > epsilon):
vv_step, alpha_step = RK4_step(vv, xx, alpha, xx_step)
print( vv_step, alpha_step)
else:
vv_step = vv[-1]
alpha_step = alpha[-1]
print("PIIP")
#print(np.abs(xx[-1] - vv[-1]), epsilon)
xx = np.append(xx, xx[-1]+xx_step)
alpha = np.append(alpha, alpha_step)
vv = np.append(vv, vv_step)
mm_step = get_m(xx[-1], vv[-1], alpha[-1])
mm = np.append(mm, mm_step)
else:
while xx[-1] >= target:
if (np.abs(xx[-1] - vv[-1] - 1.0) > epsilon):
vv_step, alpha_step = RK4_step(vv, xx, alpha, xx_step)
print( vv_step, alpha_step)
else:
vv_step = vv[-1]
alpha_step = alpha[-1]
print("PIIP")
#print(np.abs(xx[-1] - vv[-1]), epsilon)
xx = np.append(xx, xx[-1]+xx_step)
alpha = np.append(alpha, alpha_step)
vv = np.append(vv, vv_step)
mm_step = get_m(xx[-1], vv[-1], alpha[-1])
mm = np.append(mm, mm_step)
return xx, vv, alpha, mm
# From Shu 1977 TABLE II
xx_SHU = np.array([0.05 , 0.10 , 0.15 , 0.20 , 0.25 , 0.30 , 0.35 , 0.40 , 0.45 ,
0.50 , 0.55 , 0.60 , 0.65 , 0.70 , 0.75 , 0.80 , 0.85 ,
0.90 , 0.95 , 1.00])
alpha_SHU = np.array([71.5 , 27.8 , 16.4 , 11.5 , 8.76 , 7.09 , 5.95 , 5.14 , 4.52 ,
4.04 , 3.66 , 3.35 , 3.08 , 2.86 , 2.67 , 2.50 , 2.35 ,
2.22 , 2.10 , 2.00])
vv_SHU = -np.array([5.44 , 3.47 , 2.58 , 2.05 , 1.68 , 1.40 , 1.18 , 1.01 , 0.861,
0.735, 0.625, 0.528, 0.442, 0.363, 0.291, 0.225, 0.163,
0.106, 0.051, 0.00])
mm_SHU = np.array([0.981, 0.993, 1.01 , 1.03 , 1.05 , 1.08 , 1.12 , 1.16 , 1.20 ,
1.25 , 1.30 , 1.36 , 1.42 , 1.49 , 1.56 , 1.64 , 1.72 ,
1.81 , 1.90 , 2.00])
##From Shu (1977)
#AA = [ 2.0, 2.2, 2.4, 2.6, 2.8, 3.0, 3.2, 3.4, 3.6, 3.8, 4.0]
#m0 = [0.975, 1.45, 1.88, 2.31, 2.74, 3.18, 3.63, 4.10, 4.58, 5.08, 5.58]
#AA = np.array(AA)
#m0 = np.array(m0)
#xx0 = xx_SHU[1]
#alpha0 = alpha_SHU[1]
#vv0 = vv_SHU[1]
#xx_step = 0.005
#target = 1.0
xx0 = xx_SHU[-3]
alpha0 = alpha_SHU[-3]
vv0 = vv_SHU[-3]
target = 0.05
xx_step = -0.005
xx_step = -0.001
print(get_m(xx0, alpha0, vv0))
xx = np.array([])
alpha = np.array([])
vv = np.array([])
mm = np.array([])
xx = np.append(xx, xx0)
alpha = np.append(alpha, alpha0)
vv = np.append(vv, vv0)
mm = np.append(mm, get_m(xx0, alpha0, vv0))
print(xx, alpha, vv, mm)
xx_EUL, vv_EUL, alpha_EUL, mm_EUL = euler(xx_step, xx, vv, alpha, mm, target)
xx_RK , vv_RK , alpha_RK , mm_RK = RK4(xx_step, xx, vv, alpha, mm, target, epsilon = 0.000001)
mm_EUL = get_m(xx_EUL, alpha_EUL, vv_EUL)
mm_RK = get_m(xx_RK , alpha_RK , vv_RK )
mm_SHU = get_m(xx_SHU, alpha_SHU, vv_SHU)
# Plotting time
figQ, axQ = plt.subplots(nrows=2, ncols=2, sharex=True)
axQ[0,0].plot(xx_EUL, alpha_EUL, label=r'$\alpha$ (Euler)', linewidth = 3.0)
axQ[0,0].plot(xx_RK , alpha_RK , label=r'$\alpha$ (RK4)', linewidth = 3.0)
axQ[0,0].plot(xx_SHU, alpha_SHU, 'd', label=r'$\alpha$ (Shu)', linewidth = 3.0)
axQ[0,0].set_xlabel(r'x')
axQ[0,0].set_ylabel(r'$\alpha$')
axQ[0,0].legend()
axQ[0,1].plot(xx_EUL, np.abs(vv_EUL), label='v (Euler)', linewidth = 3.0)
axQ[0,1].plot(xx_RK , np.abs(vv_RK ), label='v (RK4)', linewidth = 3.0)
axQ[0,1].plot(xx_SHU, np.abs(vv_SHU),'d', label='v (Shu)', linewidth = 3.0)
axQ[0,1].set_xlabel(r'x')
axQ[0,1].set_ylabel(r'-v')
axQ[0,1].legend()
axQ[1,0].plot(xx_EUL, mm_EUL, label='m (Euler)', linewidth = 3.0)
axQ[1,0].plot(xx_RK , mm_RK , label='m (RK4)', linewidth = 3.0)
axQ[1,0].plot(xx_SHU , mm_SHU , 'd', label='m (Shu)', linewidth = 3.0)
axQ[1,0].set_xlabel(r'x')
axQ[1,0].set_ylabel(r'm')
axQ[1,0].legend()
axQ[1,1].plot(xx_EUL, xx_EUL-vv_EUL, label='x-v (Euler)', linewidth = 3.0)
axQ[1,1].plot(xx_RK , xx_RK -vv_RK , label='x-v (RK4)', linewidth = 3.0)
axQ[1,1].plot(xx_SHU, xx_SHU-vv_SHU, 'd', label='x-v (Shu)', linewidth = 3.0)
axQ[1,1].set_xlabel(r'x')
axQ[1,1].set_ylabel(r'x-v')
axQ[1,1].legend()
# Time to convert to physical quantities
yr = 3.154e+7 #s
kyr = 1000.0*yr
km = 1e5 #cm
AU = 1.496e+13 #cm
Msun = 1.98847e33 #g
cs0 = 20000 #cs cm/s "a" in Shu notation
tt_list = np.linspace(10*kyr, 20.0*kyr, num=4)
mm = get_m(xx_RK, vv_RK, alpha_RK)
fig, ax = plt.subplots(nrows=1, ncols=3, sharex=True)
for tt in tt_list:
rho = alpha_to_rho(alpha_RK, tt)
RR = xx_RK*(cs0*tt)
time = r'%.2f $\mathrm{kyr}$' % (tt/kyr)
ax[0].plot(RR/AU, rho, label= r'$\rho$, t = ' + time, linewidth = 3.0)
ax[0].set_xlabel(r'R (AU)')
ax[0].set_ylabel(r'$\rho$ (g/cm$^3$)')
ax[0].set_xscale('log')
ax[0].set_yscale('log')
ax[0].legend()
uu = vv_to_uu(vv_RK, cs0)
ax[1].plot(RR/AU, -uu/km, label= r'$u$, t = ' + time, linewidth = 3.0)
ax[1].set_xlabel(r'R (AU)')
ax[1].set_ylabel(r'-$u$ (km/s)')
ax[1].set_yscale('log')
ax[1].legend()
MM = mm_to_MM(mm, tt, cs0)
ax[2].plot(RR/AU, MM/Msun, label= r'$M$, t = ' + time, linewidth = 3.0)
ax[2].set_xlabel(r'R (AU)')
ax[2].set_ylabel(r'$M$ ($M_\odot}$)')
ax[2].legend()
plt.show()

1
analysis/python/purgepng.sh Executable file
View File

@@ -0,0 +1 @@
rm *.png

View File

@@ -0,0 +1,3 @@
# Analysis script samples
This directory is for sample scripts useable for data analysis and visualization.

View File

@@ -0,0 +1,41 @@
import pylab as plt
import numpy as np
def do_bound(coeff):
vertex_buffer = np.zeros(7, dtype=np.float32)
xx = np.arange(vertex_buffer.size)
edge_idx = 3
for dst_idx in range(3):
i_diff = abs(edge_idx - dst_idx)
vertex_buffer[dst_idx] = coeff*np.exp(vertex_buffer[edge_idx])
print("initial",vertex_buffer)
for i in range(i_diff):
vertex_buffer[dst_idx] = coeff*vertex_buffer[dst_idx]
print("looped", vertex_buffer[dst_idx])
vertex_buffer[dst_idx] = np.log(vertex_buffer[dst_idx]);
print("final",vertex_buffer)
return xx, vertex_buffer
AC_dsx = 0.04908738521
coeff1 = 1.0 - AC_dsx/(25.0*AC_dsx)
coeff2 = 1.0 - AC_dsx/(100.0*AC_dsx)
plt.figure()
xx, yy = do_bound(coeff1)
plt.plot(xx, yy)
plt.figure()
xx, yy = do_bound(coeff2)
plt.plot(xx, yy)
plt.show()

View File

@@ -0,0 +1,260 @@
'''
Copyright (C) 2014-2019, Johannes Pekkilae, Miikka Vaeisalae.
This file is part of Astaroth.
Astaroth is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Astaroth is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
'''
import astar.data as ad
import astar.visual as vis
import pylab as plt
import numpy as np
import sys
##mesh = ad.read.Mesh(500, fdir="/tiara/home/mvaisala/astaroth-code/astaroth_2.0/build/")
##
##print(np.shape(mesh.uu))
##print(np.shape(mesh.lnrho))
##
##uu_tot = np.sqrt(mesh.uu[0]**2.0 + mesh.uu[1]**2.0 + mesh.uu[2]**2.0)
##vis.slices.plot_3(mesh, uu_tot, title = r'$|u|$', bitmap = True, fname = 'uutot')
##
##vis.slices.plot_3(mesh, mesh.lnrho, title = r'$\ln \rho$', bitmap = True, fname = 'lnrho')
##
##print(mesh.minfo.contents)
AC_unit_density = 1e-17
AC_unit_velocity = 1e5
AC_unit_length = 1.496e+13
print("sys.argv", sys.argv)
#meshdir = "/tiara/home/mvaisala/astaroth-code/astaroth_2.0/build/"
meshdir = "/tiara/ara/data/mvaisala/tmp/astaroth-code/astaroth_2.0/build/"
#meshdir = "/tiara/ara/data/mvaisala/asth_testbed_double/"
if "xtopbound" in sys.argv:
for i in range(0, 171):
mesh = ad.read.Mesh(i, fdir=meshdir)
if mesh.ok:
np.set_printoptions(precision=4, linewidth=150)
uu_tot = np.sqrt(mesh.uu[0]**2.0 + mesh.uu[1]**2.0 + mesh.uu[2]**2.0)
print(mesh.lnrho.shape)
print(range((mesh.lnrho.shape[0]-7),mesh.lnrho.shape[0]))
print('lnrho', i, mesh.lnrho[(mesh.lnrho.shape[0]-7):mesh.lnrho.shape[0], 20, 100])
print('uux', i, mesh.uu[0][(mesh.lnrho.shape[0]-7):mesh.lnrho.shape[0], 20, 100])
print('uuy', i, mesh.uu[1][(mesh.lnrho.shape[0]-7):mesh.lnrho.shape[0], 20, 100])
print('uuz', i, mesh.uu[2][(mesh.lnrho.shape[0]-7):mesh.lnrho.shape[0], 20, 100])
print('uu_tot', i, uu_tot[ (mesh.lnrho.shape[0]-7):mesh.lnrho.shape[0], 20, 100])
if "single" in sys.argv:
mesh = ad.read.Mesh(1, fdir=meshdir)
print(mesh.lnrho.shape)
print( mesh.lnrho[1, 50, 100], 0.0)
print( mesh.lnrho[197, 50, 100], 0.0)
print( mesh.lnrho[100, 50, 1], 0.0)
print( mesh.lnrho[100, 50, 197], 0.0)
print( mesh.lnrho[100, 1, 100], "periodic")
print( mesh.lnrho[100, 101, 00], "periodic")
angle = 0.78
UUXX = -0.25 * np.cos(angle)
zorig = 4.85965
zz = [0.0490874*1.0 - zorig, 0.0490874*100.0 - zorig, 0.0490874*197.0 - zorig]
print (zz)
zz = np.array(zz)
UUZZ = - 0.25*np.sin(angle)*np.tanh(zz/0.2)
#plt.plot(np.linspace(-5.0, 5.0, num=100),- (0.25*np.sin(angle))*np.tanh(np.linspace(-5.0, 5.0, num=100)/0.2))
#plt.show()
print("---- UUX")
print( mesh.uu[0][1, 50, 100], 0.0)
print( mesh.uu[0][197, 50, 100], UUXX)
print( mesh.uu[0][100, 50, 1], UUXX)
print( mesh.uu[0][100, 50, 197], UUXX)
print( mesh.uu[0][100, 1, 100], "periodic")
print( mesh.uu[0][100, 101, 00], "periodic")
print("---- UUY")
print( mesh.uu[1][1, 50, 100], 0.0)
print( mesh.uu[1][197, 50, 100], 0.0)
print( mesh.uu[1][100, 50, 1], 0.0)
print( mesh.uu[1][100, 50, 197], 0.0)
print( mesh.uu[1][100, 1, 100], "periodic")
print( mesh.uu[1][100, 101, 00], "periodic")
print("---- UUZ")
print( mesh.uu[2][1, 50, 100], 0.0)
print( mesh.uu[2][197, 50, 100], UUZZ[1])
print( mesh.uu[2][100, 50, 1], UUZZ[0])
print( mesh.uu[2][100, 50, 197], UUZZ[2])
print( mesh.uu[2][100, 1, 100], "periodic")
print( mesh.uu[2][100, 101, 00], "periodic")
if 'xline' in sys.argv:
mesh = ad.read.Mesh(0, fdir=meshdir)
plt.figure()
plt.plot(mesh.uu[0][100, 50, :] , label="z")
plt.plot(mesh.uu[0][100, :, 100], label="x")
plt.plot(mesh.uu[0][:, 50, 100] , label="y")
plt.legend()
plt.figure()
plt.plot(mesh.uu[0][197, 50, :] , label="z edge")
plt.figure()
plt.plot(mesh.uu[1][100, 50, :] , label="z")
plt.plot(mesh.uu[1][100, :, 100], label="x")
plt.plot(mesh.uu[1][:, 50, 100] , label="y")
plt.legend()
plt.figure()
plt.plot(mesh.uu[2][100, 50, :] , label="z")
plt.plot(mesh.uu[2][100, :, 100], label="x")
plt.plot(mesh.uu[2][:, 50, 100] , label="y")
plt.legend()
plt.show()
if 'check' in sys.argv:
mesh = ad.read.Mesh(0, fdir=meshdir)
vis.slices.plot_3(mesh, mesh.lnrho, title = r'$\ln \rho$', bitmap = False, fname = 'lnrho', contourplot = True)
plt.show()
if 'diff' in sys.argv:
mesh0 = ad.read.Mesh(1, fdir=meshdir)
mesh1 = ad.read.Mesh(2, fdir=meshdir)
vis.slices.plot_3(mesh1, mesh1.lnrho - mesh0.lnrho, title = r'$\ln \rho$', bitmap = True, fname = 'lnrho')
vis.slices.plot_3(mesh1, mesh1.uu[0] - mesh0.uu[0], title = r'$u_x$', bitmap = True, fname = 'uux')
vis.slices.plot_3(mesh1, mesh1.uu[1] - mesh0.uu[1], title = r'$u_y$', bitmap = True, fname = 'uuy')
vis.slices.plot_3(mesh1, mesh1.uu[2] - mesh0.uu[2], title = r'$u_z$', bitmap = True, fname = 'uuz')
if '1d' in sys.argv:
plt.figure()
for i in range(0, 100001, 1000):
mesh = ad.read.Mesh(i, fdir=meshdir)
if mesh.ok:
if 'lnrho' in sys.argv:
plt.plot(mesh.lnrho[:, 20, 100], label=i)
elif 'uux' in sys.argv:
plt.plot(mesh.uu[0][:, 20, 100], label=i)
elif 'uuy' in sys.argv:
plt.plot(mesh.uu[1][:, 20, 100], label=i)
elif 'uuz' in sys.argv:
plt.plot(mesh.uu[2][:, 20, 100], label=i)
elif 'uutot' in sys.argv:
uu_tot = np.sqrt(mesh.uu[0]**2.0 + mesh.uu[1]**2.0 + mesh.uu[2]**2.0)
plt.plot(uu_tot[:, 20, 100], label=i)
plt.legend()
plt.show()
if 'sl' in sys.argv:
maxfiles = 200002
stride = 10000
for i in range(0, maxfiles, stride):
mesh = ad.read.Mesh(i, fdir=meshdir)
print(" %i / %i" % (i, maxfiles))
if mesh.ok:
uu_tot = np.sqrt(mesh.uu[0]**2.0 + mesh.uu[1]**2.0 + mesh.uu[2]**2.0)
if 'lim' in sys.argv:
vis.slices.plot_3(mesh, mesh.lnrho, title = r'$\ln \rho$', bitmap = True, fname = 'lnrho', colrange=[-0.02, 0.0])
vis.slices.plot_3(mesh, np.exp(mesh.lnrho), title = r'$\rho$', bitmap = True, fname = 'rho', colrange=[0.97, 1.0])
vis.slices.plot_3(mesh, mesh.uu[0], title = r'$u_x$', bitmap = True, fname = 'uux', colrange=[-0.002, 0.002])
vis.slices.plot_3(mesh, mesh.uu[1], title = r'$u_y$', bitmap = True, fname = 'uuy', colrange=[-1.0e-20, 1.0e-20])
vis.slices.plot_3(mesh, mesh.uu[2], title = r'$u_z$', bitmap = True, fname = 'uuz', colrange=[-0.002, 0.002])
vis.slices.plot_3(mesh, np.exp(mesh.lnrho), title = r'$N_\mathrm{col}$', bitmap = True, fname = 'colden', slicetype = 'sum', colrange=[0.0, 100.0])
vis.slices.plot_3(mesh, uu_tot, title = r'$|u|$', bitmap = True, fname = 'uutot', colrange=[0.00, 0.004])
else:
vis.slices.plot_3(mesh, mesh.lnrho, title = r'$\ln \rho$', bitmap = True, fname = 'lnrho')
vis.slices.plot_3(mesh, np.exp(mesh.lnrho), title = r'$\rho$', bitmap = True, fname = 'rho')
#vis.slices.plot_3(mesh, mesh.ss, title = r'$s$', bitmap = True, fname = 'ss')
vis.slices.plot_3(mesh, mesh.uu[0], title = r'$u_x$', bitmap = True, fname = 'uux')
vis.slices.plot_3(mesh, mesh.uu[1], title = r'$u_y$', bitmap = True, fname = 'uuy')
vis.slices.plot_3(mesh, mesh.uu[2], title = r'$u_z$', bitmap = True, fname = 'uuz')
vis.slices.plot_3(mesh, np.exp(mesh.lnrho), title = r'$N_\mathrm{col}$', bitmap = True, fname = 'colden', slicetype = 'sum')
vis.slices.plot_3(mesh, uu_tot, title = r'$|u|$', bitmap = True, fname = 'uutot')
if 'ts' in sys.argv:
ts = ad.read.TimeSeries(fdir=meshdir)
end_rm = -1 #-35#-40
plt.figure()
xaxis = 't_step'
yaxis1 = 'lnrho_rms'
yaxis2 = 'lnrho_min'
yaxis3 = 'lnrho_max'
plt.plot(ts.var[xaxis][:end_rm], ts.var[yaxis1][:end_rm], label=yaxis1)
plt.plot(ts.var[xaxis][:end_rm], ts.var[yaxis2][:end_rm], label=yaxis2)
plt.plot(ts.var[xaxis][:end_rm], ts.var[yaxis3][:end_rm], label=yaxis3)
plt.xlabel(xaxis)
plt.legend()
plt.figure()
xaxis = 't_step'
yaxis1 = 'uutot_rms'
yaxis2 = 'uutot_min'
yaxis3 = 'uutot_max'
plt.plot(ts.var[xaxis][:end_rm], ts.var[yaxis1][:end_rm], label=yaxis1)
plt.plot(ts.var[xaxis][:end_rm], ts.var[yaxis2][:end_rm], label=yaxis2)
plt.plot(ts.var[xaxis][:end_rm], ts.var[yaxis3][:end_rm], label=yaxis3)
plt.xlabel(xaxis)
plt.legend()
plt.figure()
xaxis = 't_step'
yaxis1 = 'uux_rms'
yaxis2 = 'uux_min'
yaxis3 = 'uux_max'
plt.plot(ts.var[xaxis][:end_rm], ts.var[yaxis1][:end_rm], label=yaxis1)
plt.plot(ts.var[xaxis][:end_rm], ts.var[yaxis2][:end_rm], label=yaxis2)
plt.plot(ts.var[xaxis][:end_rm], ts.var[yaxis3][:end_rm], label=yaxis3)
plt.xlabel(xaxis)
plt.legend()
plt.figure()
xaxis = 't_step'
yaxis1 = 'uuy_rms'
yaxis2 = 'uuy_min'
yaxis3 = 'uuy_max'
plt.plot(ts.var[xaxis][:end_rm], ts.var[yaxis1][:end_rm], label=yaxis1)
plt.plot(ts.var[xaxis][:end_rm], ts.var[yaxis2][:end_rm], label=yaxis2)
plt.plot(ts.var[xaxis][:end_rm], ts.var[yaxis3][:end_rm], label=yaxis3)
plt.xlabel(xaxis)
plt.legend()
plt.figure()
xaxis = 't_step'
yaxis1 = 'uuz_rms'
yaxis2 = 'uuz_min'
yaxis3 = 'uuz_max'
plt.plot(ts.var[xaxis][:end_rm], ts.var[yaxis1][:end_rm], label=yaxis1)
plt.plot(ts.var[xaxis][:end_rm], ts.var[yaxis2][:end_rm], label=yaxis2)
plt.plot(ts.var[xaxis][:end_rm], ts.var[yaxis3][:end_rm], label=yaxis3)
plt.xlabel(xaxis)
plt.legend()
plt.show()

54
config/astaroth.conf Normal file
View File

@@ -0,0 +1,54 @@
/*
* =============================================================================
* "Compile-time" params
* =============================================================================
*/
AC_nx = 192
AC_ny = 120
AC_nz = 7
AC_dsx = 0.04908738521
AC_dsy = 0.04908738521
AC_dsz = 0.04908738521
/*
* =============================================================================
* Run-time params
* =============================================================================
*/
AC_max_steps = 1001
AC_save_steps = 10
AC_bin_steps = 1000
AC_bin_save_t = 1e666
// Hydro
AC_cdt = 0.4
AC_cdtv = 0.3
AC_cdts = 1.0
AC_nu_visc = 5e-3
AC_cs_sound = 1.0
AC_zeta = 0.01
// Magnetic
AC_eta = 5e-3
AC_mu0 = 1.4
AC_chi = 0.0001
// Forcing
AC_relhel = 0.0
// Entropy
AC_cp_sound = 1.0
AC_gamma = 0.5
AC_lnT0 = 1.2
AC_lnrho0 = 1.3
/*
* =============================================================================
* Initial conditions
* =============================================================================
*/
AC_ampl_lnrho = 0.0
AC_ampl_uu = 1.0

View File

@@ -0,0 +1,121 @@
/*
* =============================================================================
* "Compile-time" params
* =============================================================================
*/
AC_nx = 192
AC_ny = 48
AC_nz = 192
AC_dsx = 0.04908738521
AC_dsy = 0.04908738521
AC_dsz = 0.04908738521
/*
* =============================================================================
* Run-time params
* =============================================================================
*/
//AC_max_steps = 16001
//AC_save_steps = 50
//AC_bin_steps = 16000
//AC_max_steps = 1001
//AC_save_steps = 10
//AC_bin_steps = 1000
//AC_max_steps = 11
//AC_save_steps = 1
//AC_bin_steps = 1
//AC_max_steps = 4
//AC_save_steps = 1
//AC_bin_steps = 1
//AC_max_steps = 1201
//AC_save_steps = 10
//AC_bin_steps = 1200
//AC_bin_save_t = 5.0
//AC_max_steps = 50001
//AC_save_steps = 100
//AC_bin_steps = 10000
AC_max_steps = 100001
AC_save_steps = 500
AC_bin_steps = 20000
AC_bin_save_t = 2300000.0
// Hydro
AC_cdt = 0.4
AC_cdtv = 0.3
AC_cdts = 1.0
//GOOD VISC Re_mesh = 3
//AC_nu_visc = 3.0e-3
AC_nu_visc = 1.0e-3
AC_cs_sound = 0.2
AC_zeta = 1.0e-3
// Magnetic
AC_eta = 5e-3
AC_mu0 = 1.4
AC_chi = 0.0001
// Forcing
AC_relhel = 0.0
// Entropy
// cp arbitrary
AC_cp_sound = 1.0
// 5/3 adiabatic process
AC_gamma = 1.66
AC_lnT0 = 1.0
AC_lnrho0 = 0.0
// Boundary condition. Defined by arbitrary int.
AC_bc_type = 666
//AC_bc_type = 121
AC_trans = 0.6
//Physical units (cgs)
// Based on Shu 1977 model calculations with t = 20 kyr, R = 500 AU
// g/cm^3
AC_unit_density = 1e-17
// cm/s
// Now 1 km/s
//AC_unit_velocity = 1e5
AC_unit_velocity = 1.0
// cm
// Now 1 AU
AC_unit_length = 1.496e+13
//Properties of gravitating star*
AC_star_pos_x = -500.0
//AC_star_pos_x = -10.0
AC_star_pos_y = 0.0
AC_star_pos_z = 0.0
//In M_sun
//AC_M_star = 0.05
AC_M_star = 0.5
//AC_M_star = 0.0
/*
* =============================================================================
* Initial conditions
* =============================================================================
*/
AC_ampl_lnrho = 0.0
AC_lnrho_edge = -1.0
AC_lnrho_out = 0.0
//original
//AC_ampl_uu = 0.25
//For gravity test
AC_ampl_uu = 0.0
AC_angl_uu = 0.0
//AC_angl_uu = 0.35

4
doc/doxygen/.gitignore vendored Normal file
View File

@@ -0,0 +1,4 @@
# Ignore everything in this directory
*
# Except this file
!.gitignore

131
doc/manual/manual.md Normal file
View File

@@ -0,0 +1,131 @@
*Miikka Vaisala: This is just something I have astarted to write up to make sense about the Astaroth 2.0. Starting for personally important notes to understand the code. Will be refined as my understanding improves.*
#Astaroth manual
## Compilation
See the `README.md`. At the moment, let us keep certaint things in one place.
## Simulation instructions
At the moment it is only possible to build and run in the `astaroth_2.0/build/` directory. Possibility to add separate run directories will be included later.
### Choosing physics
Runtime settings can be adjusted from `astaroth_2.0/include/astaroth.h` and `astaroth_2.0/config/astaroth.conf`.
Howeve, physics switches LENTROPY, LFORCING etc. do not work at the moment. There has been an issue to get pre-processor combatible with astaroth-domain-specific language in Astaroth 2.0. Therefore, all features are online by default.
To get the switcher working now, rename `astaroth_2.0/src/core/kernels/rk3handtuned.cuh` -> `rk3.cuh`. (**MV:** Not yet tested.)
How to use?
What kind of runtime settings?
### Setting initial conditions
Where can we effectively choose the initial condition?
### Launchin a run
`./ac_run -s` assuming you are doing a normal simulation. Basic code for this invocation can be found in the source file `astaroth_2.0/src/standalone/simulation.cc`.
Please note that launching `./ac_run -t` will *fail if entropy and forcing are in use*. Test is mainly for finding paralleization bugs. (In principle if hydro stuff and induction work, so will forcing and entropy.)
### Diagnostic variables
What is calculated?
Where it is saved?
### Simulation data
Saving output binaries is not enabled yet.
**MV:** I am planning to implement HDF5 format for the data. **TOP PRIORITY**.
#### Notes about data structures
- Configuration parameters have prefix `AC_`, such as `AC_dsx`.
- All configurations are stored in the struct `AcMeshInfo`, containing tables `int_params` ja `real_params`. **NOTE:** `int_params` and `real_params` require diligence. If you call e.g. `int_params[AC_dsx]`, the result will be something unexpected. So-far error checking with this has now been possible to be automated.
- All mesh data is stored to the struct `AcMesh`, containing both configuration values and vertex data (`lnrho`, `uux`, etc.)
- All essential tructs, macros and enumerators are found in astaroth.h for better reference.
- In the case there is changes in the data layout, better use macro `AC_VTXBUF_IDX(i, j, k, mesh_info)`which transform indices from 3D to 1D. Therefore no need to start writing `i + j * mesh_info.int_params[AC_mx] + ...` which would affect the code readability.
- AcReal on generic floating point real number type used everywhere in the code. Currently can be either `float` or `double`. Possibly in the future also `half` or `long double` could become available.
Sample code:
```cpp
AcMeshInfo mesh_info;
// Loads data from astaroth.conf into the AcMeshInfo struct
load_config(&mesh_info);
// Allocates data on the host for the AcMesh struct using information found in mesh_info.
AcMesh* mesh = acmesh_create(mesh_info);
// Initializes mesh to InitType (specified in standalone/model/host_memory.h)
acmesh_init_to(INIT_TYPE_GAUSSIAN_RADIAL_EXPL, mesh);
// Allocates data on the device for the AcMesh struct
acInit(mesh_info);
acLoad(*mesh); // Loads the mesh to the device
const AcReal dt = 1.f;
// Synchronizes previous device commands
acSynchronize();
// Does a full rk3 integration step on the device
acIntegrate(dt);
acSynchronize();
// Store data from device to host mesh
acStore(mesh);
printf("nx: %d, dsx %f\n",
mesh->info.int_params[AC_nx],
double(mesh->info.real_params[AC_dsx]));
printf("First vertex of the computational domain: %f\n",
double(mesh->vertex_buffer[VTXBUF_LNRHO][AC_VTXBUF_IDX(3, 3, 3, mesh_info)]));
```
### Reading data
Depends on the output format. With HDF5 should be simple enough.
[Jupyter notebook](http://jupyter.org/) visualization?
Do we want to use [YT?](https://yt-project.org/)
### Live rendering
MV: Cool, but does not work for remote cluster so far. A GPU workstation is required.
##Multi-GPU
At the moment multi-GPU is not included in Astaroth 2.0. However, it has been implemented 1.0 (`astaroth_1.0/src/gpu/cuda/cuda_generic.cu`) could be essentially ported by copypasting to `astaroth_2.0/src/core/astaroth.cu` after we have clear idea how to run things with single GPU. Could be done overnight in principle.
## Profiling
The built-in beachmark is currently unreliable due to an unknown reason. Please use [nvprof and nvvp](https://docs.nvidia.com/cuda/profiler-users-guide/index.html) for precise profiling. Also, NVIDIA suggests their [Nsight Systems](https://developer.nvidia.com/nsight-systems).
## ETC
**Note** `auto_optimize.sh` does not currently work, but it aims to tune thread block dimensions automatically.

2427
doxyfile Normal file

File diff suppressed because it is too large Load Diff

422
include/astaroth.h Normal file
View File

@@ -0,0 +1,422 @@
/*
Copyright (C) 2014-2018, Johannes Pekkilae, Miikka Vaeisalae.
This file is part of Astaroth.
Astaroth is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Astaroth is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* @file
* \brief Brief info.
*
* Provides an interface to Astaroth. Contains all the necessary configuration
* structs and functions for running the code on multiple GPUs.
*
* All interface functions declared here (such as acInit()) operate all GPUs
* available in the node under the hood, and the user does not need any
* information about the decomposition, synchronization or such to use these
* functions.
*
*/
#pragma once
/* Prevent name mangling */
#ifdef __cplusplus
extern "C" {
#endif
#include <float.h> // FLT_EPSILON, etc
#include <stdlib.h> // size_t
#include <vector_types.h> // CUDA vector types (float4, etc)
/*
* =============================================================================
* Flags for auto-optimization
* =============================================================================
*/
#define AUTO_OPTIMIZE (0) // DEPRECATED TODO remove
#define BOUNDCONDS_OPTIMIZE (0)
#define GENERATE_BENCHMARK_DATA (0)
// Device info
#define REGISTERS_PER_THREAD (255)
#define MAX_REGISTERS_PER_BLOCK (65536)
#define MAX_THREADS_PER_BLOCK (1024)
#define MAX_TB_DIM (MAX_THREADS_PER_BLOCK)
#define NUM_ITERATIONS (10)
#define WARP_SIZE (32)
/*
* =============================================================================
* Compile-time constants used during simulation (user definable)
* =============================================================================
*/
#define STENCIL_ORDER (6)
///////////// PAD TEST
// NOTE: works only with nx is divisible by 32
//#define PAD_LEAD (32 - STENCIL_ORDER/2)
//#define PAD_SIZE (32 - STENCIL_ORDER)
///////////// PAD TEST
// L-prefix inherited from the old Astaroth, no idea what it means
// MV: L means a Logical switch variale, something having true of false value.
#define LFORCING (0) // Note: forcing is disabled currently in the files generated by acc (compiler of our DSL)
#define LINDUCTION (1)
#define LENTROPY (1)
#define LTEMPERATURE (0)
#define AC_THERMAL_CONDUCTIVITY (AcReal(0.001)) // TODO: make an actual config parameter
/*
* =============================================================================
* Identifiers used to construct the parameter lists for AcMeshInfo
* (IntParamType and RealParamType)
* (user definable)
* =============================================================================
*/
// clang-format off
#define AC_FOR_INT_PARAM_TYPES(FUNC)\
/* cparams */\
FUNC(AC_nx), \
FUNC(AC_ny), \
FUNC(AC_nz), \
FUNC(AC_mx), \
FUNC(AC_my), \
FUNC(AC_mz), \
FUNC(AC_nx_min), \
FUNC(AC_ny_min), \
FUNC(AC_nz_min), \
FUNC(AC_nx_max), \
FUNC(AC_ny_max), \
FUNC(AC_nz_max), \
/* Other */\
FUNC(AC_max_steps), \
FUNC(AC_save_steps), \
FUNC(AC_bin_steps), \
FUNC(AC_bc_type), \
/* Additional */\
FUNC(AC_mxy),\
FUNC(AC_nxy),\
FUNC(AC_nxyz)
#define AC_FOR_REAL_PARAM_TYPES(FUNC)\
/* cparams */\
FUNC(AC_dsx), \
FUNC(AC_dsy), \
FUNC(AC_dsz), \
FUNC(AC_dsmin), \
/* physical grid*/\
FUNC(AC_xlen), \
FUNC(AC_ylen), \
FUNC(AC_zlen), \
FUNC(AC_xorig), \
FUNC(AC_yorig), \
FUNC(AC_zorig), \
/*Physical units*/\
FUNC(AC_unit_density),\
FUNC(AC_unit_velocity),\
FUNC(AC_unit_length),\
/* properties of gravitating star*/\
FUNC(AC_star_pos_x),\
FUNC(AC_star_pos_y),\
FUNC(AC_star_pos_z),\
FUNC(AC_M_star),\
/* Run params */\
FUNC(AC_cdt), \
FUNC(AC_cdtv), \
FUNC(AC_cdts), \
FUNC(AC_nu_visc), \
FUNC(AC_cs_sound), \
FUNC(AC_eta), \
FUNC(AC_mu0), \
FUNC(AC_relhel), \
FUNC(AC_cp_sound), \
FUNC(AC_gamma), \
FUNC(AC_cv_sound), \
FUNC(AC_lnT0), \
FUNC(AC_lnrho0), \
FUNC(AC_zeta), \
FUNC(AC_trans),\
/* Other */\
FUNC(AC_bin_save_t), \
/* Initial condition params */\
FUNC(AC_ampl_lnrho), \
FUNC(AC_ampl_uu), \
FUNC(AC_angl_uu), \
FUNC(AC_lnrho_edge),\
FUNC(AC_lnrho_out),\
/* Additional helper params */\
/* (deduced from other params do not set these directly!) */\
FUNC(AC_G_CONST),\
FUNC(AC_GM_star),\
FUNC(AC_sq2GM_star),\
FUNC(AC_cs2_sound), \
FUNC(AC_inv_dsx), \
FUNC(AC_inv_dsy), \
FUNC(AC_inv_dsz)
// clang-format on
/*
* =============================================================================
* Identifiers for VertexBufferHandle
* (i.e. the arrays used to construct AcMesh)
* (user definable)
* =============================================================================
*/
// clang-format off
#define AC_FOR_HYDRO_VTXBUF_HANDLES(FUNC)\
FUNC(VTXBUF_LNRHO), \
FUNC(VTXBUF_UUX), \
FUNC(VTXBUF_UUY), \
FUNC(VTXBUF_UUZ), \
// FUNC(VTXBUF_DYE),
#if LINDUCTION
#define AC_FOR_INDUCTION_VTXBUF_HANDLES(FUNC)\
FUNC(VTXBUF_AX), \
FUNC(VTXBUF_AY), \
FUNC(VTXBUF_AZ),
#else
#define AC_FOR_INDUCTION_VTXBUF_HANDLES(FUNC)
#endif
#if LENTROPY
#define AC_FOR_ENTROPY_VTXBUF_HANDLES(FUNC)\
FUNC(VTXBUF_ENTROPY),
#else
#define AC_FOR_ENTROPY_VTXBUF_HANDLES(FUNC)
#endif
#if LTEMPERATURE
#define AC_FOR_TEMPERATURE_VTXBUF_HANDLES(FUNC)\
FUNC(VTXBUF_TEMPERATURE),
#else
#define AC_FOR_TEMPERATURE_VTXBUF_HANDLES(FUNC)
#endif
#define AC_FOR_VTXBUF_HANDLES(FUNC)\
AC_FOR_HYDRO_VTXBUF_HANDLES(FUNC)\
AC_FOR_INDUCTION_VTXBUF_HANDLES(FUNC)\
AC_FOR_ENTROPY_VTXBUF_HANDLES(FUNC)\
AC_FOR_TEMPERATURE_VTXBUF_HANDLES(FUNC)
// clang-format on
/*
* =============================================================================
* Single/double precision switch
* =============================================================================
*/
#if AC_DOUBLE_PRECISION == 1
typedef double AcReal;
typedef double3 AcReal3;
#define AC_REAL_MAX (DBL_MAX)
#define AC_REAL_MIN (DBL_MIN)
#define AC_REAL_EPSILON (DBL_EPSILON)
#else
typedef float AcReal;
typedef float3 AcReal3;
#define AC_REAL_MAX (FLT_MAX)
#define AC_REAL_MIN (FLT_MIN)
#define AC_REAL_EPSILON (FLT_EPSILON)
#endif
typedef struct {
AcReal3 row[3];
} AcMatrix;
/*
* =============================================================================
* Helper macros
* =============================================================================
*/
#define AC_GEN_ID(X) X
#define AC_GEN_STR(X) #X
/*
* =============================================================================
* Error codes
* =============================================================================
*/
typedef enum { AC_SUCCESS = 0, AC_FAILURE = 1 } AcResult;
/*
* =============================================================================
* Reduction types
* =============================================================================
*/
typedef enum {
RTYPE_MAX,
RTYPE_MIN,
RTYPE_RMS,
RTYPE_RMS_EXP,
NUM_REDUCTION_TYPES
} ReductionType;
/*
* =============================================================================
* Definitions for the enums and structs for AcMeshInfo (DO NOT TOUCH)
* =============================================================================
*/
typedef enum {
AC_FOR_INT_PARAM_TYPES(AC_GEN_ID),
NUM_INT_PARAM_TYPES
} AcIntParam;
typedef enum {
AC_FOR_REAL_PARAM_TYPES(AC_GEN_ID),
NUM_REAL_PARAM_TYPES
} AcRealParam;
extern const char* intparam_names[]; // Defined in astaroth.cu
extern const char* realparam_names[]; // Defined in astaroth.cu
typedef struct {
int int_params[NUM_INT_PARAM_TYPES];
AcReal real_params[NUM_REAL_PARAM_TYPES];
} AcMeshInfo;
/*
* =============================================================================
* Definitions for the enums and structs for AcMesh (DO NOT TOUCH)
* =============================================================================
*/
typedef enum {
AC_FOR_VTXBUF_HANDLES(AC_GEN_ID) NUM_VTXBUF_HANDLES
} VertexBufferHandle;
extern const char* vtxbuf_names[]; // Defined in astaroth.cu
/*
typedef struct {
AcReal* data;
} VertexBuffer;
*/
// NOTE: there's no particular benefit declaring AcMesh a class, since
// a library user may already have allocated memory for the vertex_buffers.
// But then we would allocate memory again when the user wants to start
// filling the class with data. => Its better to consider AcMesh as a
// payload-only struct
typedef struct {
AcReal* vertex_buffer[NUM_VTXBUF_HANDLES];
AcMeshInfo info;
} AcMesh;
#define AC_VTXBUF_SIZE(mesh_info) \
((size_t)(mesh_info.int_params[AC_mx] * mesh_info.int_params[AC_my] * \
mesh_info.int_params[AC_mz]))
#define AC_VTXBUF_SIZE_BYTES(mesh_info) \
(sizeof(AcReal) * AC_VTXBUF_SIZE(mesh_info))
#define AC_VTXBUF_COMPDOMAIN_SIZE(mesh_info) \
(mesh_info.int_params[AC_nx] * mesh_info.int_params[AC_ny] * \
mesh_info.int_params[AC_nz])
#define AC_VTXBUF_COMPDOMAIN_SIZE_BYTES(mesh_info) \
(sizeof(AcReal) * AC_VTXBUF_COMPDOMAIN_SIZE(mesh_info))
#define AC_VTXBUF_IDX(i, j, k, mesh_info) \
((i) + (j)*mesh_info.int_params[AC_mx] + \
(k)*mesh_info.int_params[AC_mx] * mesh_info.int_params[AC_my])
/*
* =============================================================================
* Astaroth interface
* =============================================================================
*/
/** Starting point of all GPU computation. Handles the allocation and
initialization of *all memory needed on all GPUs in the node*. In other words,
setups everything GPU-side so that calling any other GPU interface function
afterwards does not result in illegal memory accesses. */
AcResult acInit(const AcMeshInfo& mesh_info);
/** Splits the host_mesh and distributes it among the GPUs in the node */
AcResult acLoad(const AcMesh& host_mesh);
AcResult acLoadWithOffset(const AcMesh& host_mesh, const int3& start, const int num_vertices);
/** Does all three steps of the RK3 integration and computes the boundary
conditions when necessary. Note that the boundary conditions are not applied
after the final integration step.
The result can be fetched to CPU memory with acStore(). */
AcResult acIntegrate(const AcReal& dt);
/** Performs a single RK3 step without computing boundary conditions. */
AcResult acIntegrateStep(const int& isubstep, const AcReal& dt);
/** Applies boundary conditions on the GPU meshs and communicates the
ghost zones among GPUs if necessary */
AcResult acBoundcondStep(void);
/** Performs a scalar reduction on all GPUs in the node and returns the result.
*/
AcReal acReduceScal(const ReductionType& rtype, const VertexBufferHandle& a);
/** Performs a vector reduction on all GPUs in the node and returns the result.
*/
AcReal acReduceVec(const ReductionType& rtype, const VertexBufferHandle& a,
const VertexBufferHandle& b, const VertexBufferHandle& c);
/** Stores the mesh distributed among GPUs of the node back to a single host
* mesh */
AcResult acStore(AcMesh* host_mesh);
AcResult acStoreWithOffset(const int3& start, const int num_vertices, AcMesh* host_mesh);
/** Frees all GPU allocations and resets all devices in the node. Should be
* called at exit. */
AcResult acQuit(void);
/** Synchronizes all devices. All calls to Astaroth are asynchronous by default
unless otherwise stated. */
AcResult acSynchronize(void);
/* End extern "C" */
#ifdef __cplusplus
}
#endif
/*
* =============================================================================
* Notes
* =============================================================================
*/
/*
typedef enum {
VTX_BUF_LNRHO,
VTX_BUF_UUX,
VTX_BUF_UUY,
VTX_BUF_UUZ,
NUM_VERTEX_BUFFER_HANDLES
} VertexBufferHandle
// LNRHO etc
typedef struct {
AcReal* data;
} VertexBuffer;
// Host
typedef struct {
VertexBuffer vertex_buffers[NUM_VERTEX_BUFFER_HANDLES];
MeshInfo info;
} Mesh;
// Device
typedef struct {
VertexBuffer in[NUM_VERTEX_BUFFER_HANDLES];
VertexBuffer out[NUM_VERTEX_BUFFER_HANDLES];
} VertexBufferArray;
*/

81
scripts/ac_mkbuilddir.sh Executable file
View File

@@ -0,0 +1,81 @@
#!/bin/bash
if [ -z $AC_HOME ]
then
echo "ASTAROTH_HOME environment variable not set, run \"source ./sourceme.sh\" in Astaroth home directory"
exit 1
fi
TIARA_SETUP_DEFAULT=""
DOUBLE_DEFAULT="OFF"
DEBUG_MODE_DEFAULT="OFF"
BUILD_DIR_DEFAULT=${AC_HOME}/build/
ALTER_CONF_DEFAULT="OFF"
BUILD_DIR=${BUILD_DIR_DEFAULT}
TIARA_SETUP=${TIARA_SETUP_DEFAULT}
DOUBLE=${DOUBLE_DEFAULT}
DEBUG_MODE=${DEBUG_MODE_DEFAULT}
ALTER_CONF=${ALTER_CONF_DEFAULT}
while [ "$#" -gt 0 ]
do
case $1 in
-h|--help)
echo "You can set up a build directory separe of the ASTAROTH_HOME"
echo "Available flags:"
echo "-b, --buildir [PATH] : Set build directory"
echo "-t,--tiara : Use TIARA cluster setting for cmake"
echo "-d, --double : Compile with double precision"
echo "-e, --debug: : Compile in debug mode"
echo "Example:"
echo "ac_mkbuilddir.sh -b my_build_dir/"
exit 0
;;
-b|--buildir)
shift
BUILD_DIR=${1}
shift
echo "Setting up build directory..."
ALTER_CONF="ON"
;;
-t|--tiara)
shift
TIARA_SETUP="-D CMAKE_C_COMPILER=icc -D CMAKE_CXX_COMPILER=icpc"
echo "Using TIARA cluster compiler settings"
;;
-d|--double)
shift
DOUBLE="ON"
echo "Double precision"
;;
-e|--debug)
shift
DEBUG_MODE="ON"
echo "Debug mode compilation"
;;
*)
break
esac
done
echo "Creating build directory: ${BUILD_DIR}"
mkdir ${BUILD_DIR}
cd ${BUILD_DIR}
#Set up the astaroth.conf to be define and customized in the build directory to
#not always alter the default use i.e. for unit test etc.
#Assumed by default if you do this thing anyway.
echo "cp ${AC_HOME}/config/astaroth.conf ${PWD}"
cp ${AC_HOME}/config/astaroth.conf .
CONF_DIR="-D ASTAROTH_CONF_PATH=${PWD}"
#cmake -D CMAKE_C_COMPILER=icc -D CMAKE_CXX_COMPILER=icpc -DDOUBLE_PRECISION=OFF -DBUILD_DEBUG=OFF ${AC_HOME}
echo "cmake ${TIARA_SETUP} ${CONF_DIR} -DDOUBLE_PRECISION=${DOUBLE} -DBUILD_DEBUG=${DEBUG_MODE} -DALTER_CONF=${ALTER_CONF} ${AC_HOME}"
cmake ${TIARA_SETUP} ${CONF_DIR} -DDOUBLE_PRECISION=${DOUBLE} -DBUILD_DEBUG=${DEBUG_MODE} -DALTER_CONF=${ALTER_CONF} ${AC_HOME}

51
scripts/auto_optimize.sh Executable file
View File

@@ -0,0 +1,51 @@
#!/bin/bash
# Run this in your build directory (cd build && ../scripts/auto_optimize.sh)
# Generates a ${BENCHMARK_FILE} which contains the threadblock dims and other
# constants used in the integration in addition to the time used.
MAX_THREADS=1024 # Max size of the thread block, depends on hardware
BENCHMARK_FILE="benchmark.out"
TBCONFCREATOR_SRC_PATH="../scripts/gen_rk3_threadblockconf.c"
TBCONFFILE_DST_PATH="../src/core/kernels"
C_COMPILER_NAME="gcc"
rm ${BENCHMARK_FILE}
for (( tz=2; tz<=8; tz*=2))
do
for (( ty=1; ty<=1; ty+=1))
do
for (( tx=16; tx<=64; tx*=2))
do
if ( (${tx}*${ty}*${tz}) > ${MAX_THREADS})
then break
fi
for (( launch_bound=1; launch_bound<=8; launch_bound*=2))
do
for (( elems_per_thread=1; elems_per_thread<=128; elems_per_thread*=2))
do
# Generate the threadblock configuration
${C_COMPILER_NAME} ${TBCONFCREATOR_SRC_PATH} -o gen_rk3_threadblockconf
./gen_rk3_threadblockconf ${tx} ${ty} ${tz} ${elems_per_thread} ${launch_bound}
rm gen_rk3_threadblockconf
mv rk3_threadblock.conf ${TBCONFFILE_DST_PATH}
# Compile and run the test build
cmake -DBUILD_DEBUG=OFF -DDOUBLE_PRECISION=OFF -DAUTO_OPTIMIZE=ON .. && make -j
#if ./ac_run -t; then
# echo Success
./ac_run -b
#else
# echo fail!
#fi
done
done
done
done
done

3
scripts/buildtest.sh Executable file
View File

@@ -0,0 +1,3 @@
#!/bin/bash
cmake -DCUDA_BUILD_LEGACY=OFF -DDOUBLE_PRECISION=ON .. && make -j && valgrind --leak-check=full --show-leak-kinds=all ./ac_run -t && make clean &&\
cmake -DCUDA_BUILD_LEGACY=OFF -DDOUBLE_PRECISION=OFF .. && make -j && valgrind --leak-check=full --show-leak-kinds=all ./ac_run -t

52
scripts/compile_acc.sh Executable file
View File

@@ -0,0 +1,52 @@
#!/bin/bash
#!/bin/bash
if [ -z $AC_HOME ]
then
echo "ASTAROTH_HOME environment variable not set, run \"source ./sourceme.sh\" in Astaroth home directory"
exit 1
fi
KERNEL_DIR=${AC_HOME}"/src/core/kernels"
ACC_DIR=${AC_HOME}"/acc"
ACC_DEFAULT_SAS="mhd_solver/stencil_assembly.sas"
ACC_DEFAULT_SPS="mhd_solver/stencil_process.sps"
${ACC_DIR}/clean.sh
${ACC_DIR}/build_acc.sh
ACC_SAS=${ACC_DEFAULT_SAS}
ACC_SPS=${ACC_DEFAULT_SPS}
while [ "$#" -gt 0 ]
do
case $1 in
-h|--help)
echo "You can set a custom files for DSL under the path $AC_HOME/"
echo "Example:"
echo "compile_acc.sh -a custom_setup/custom_assembly.sas -p custom_setup/custom_process.sps"
exit 0
;;
-a|--assembly)
shift
ACC_SAS=${1}
shift
echo "CUSTOM Assembly file!"
;;
-p|--process)
shift
ACC_SPS=${1}
shift
echo "CUSTOM Process file!"
;;
*)
break
esac
done
echo "Assembly file: ${ACC_DIR}/${ACC_SAS}"
echo "Process file: ${ACC_DIR}/${ACC_SPS}"
cd ${KERNEL_DIR}
${ACC_DIR}/compile.sh ${ACC_DIR}/${ACC_SAS}
${ACC_DIR}/compile.sh ${ACC_DIR}/${ACC_SPS}

9
scripts/fix_style.sh Executable file
View File

@@ -0,0 +1,9 @@
#!/bin/bash
if [[ $1 == "DO" && $2 == "IT!" ]]; then
find -name \*.h -o -name \*.cc -o -name \*.cu -o -name \*.cuh | xargs clang-format-6.0 -i -style=file
echo "It is done."
else
find -name \*.h -o -name \*.cc -o -name \*.cu -o -name \*.cuh
echo "I'm going to try to fix the style of these files."
echo "If you're absolutely sure, give \"DO IT!\" (without quotes) as a parameter."
fi

View File

@@ -0,0 +1,60 @@
/*
Copyright (C) 2014-2018, Johannes Pekkilae, Miikka Vaeisalae.
This file is part of Astaroth.
Astaroth is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Astaroth is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* @file
* \brief Generates a threadblock config file for RK3 using the given parameters.
*
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
const char* rk3_threadblockconf_path = "rk3_threadblock.conf";
int
write_to_file(int threads_x, int threads_y, int threads_z, int elems_per_thread, int launch_bound)
{
FILE* fp;
fp = fopen(rk3_threadblockconf_path, "w");
if (fp != NULL) {
fprintf(fp, "#define RK_THREADS_X (%d)\n", threads_x);
fprintf(fp, "#define RK_THREADS_Y (%d)\n", threads_y);
fprintf(fp, "#define RK_THREADS_Z (%d)\n", threads_z);
fprintf(fp, "#define RK_ELEMS_PER_THREAD (%d)\n", elems_per_thread);
fprintf(fp, "#define RK_LAUNCH_BOUND_MIN_BLOCKS (%d)\n", launch_bound);
fclose(fp);
return EXIT_SUCCESS;
}
return EXIT_FAILURE;
}
// Takes arguments and writes them into a file
// RK_THREADS_X, RK_THREADS_Y, RK_THREADS_Z, RK_ELEMS_PER_THREAD, RK_LAUNCH_BOUND_MIN_BLOCKS
int
main(int argc, char* argv[])
{
assert(argc == 6);
return write_to_file(atoi(argv[1]), atoi(argv[2]),atoi(argv[3]), atoi(argv[4]), atoi(argv[5]));
}

2
scripts/generate_doc.sh Executable file
View File

@@ -0,0 +1,2 @@
#!/bin/bash
doxygen doxyfile

7
sourceme.sh Normal file
View File

@@ -0,0 +1,7 @@
#!/bin/bash
export AC_HOME=$PWD
export PATH=${PATH}:$AC_HOME/scripts/
echo $AC_HOME
echo $PATH

70
src/core/CMakeLists.txt Normal file
View File

@@ -0,0 +1,70 @@
########################################
## CMakeLists.txt for Astaroth Core ##
########################################
#----------------------Find CUDA-----------------------------------------------#
find_package(CUDA)
if (NOT CUDA_FOUND)
# find_package(CUDA REQUIRED) gives a confusing error message if it fails,
# therefore we print the reason here explicitly
message(FATAL_ERROR "CUDA not found")
endif()
#----------------------CUDA settings-------------------------------------------#
set(CUDA_SEPARABLE_COMPILATION ON)
set(CUDA_PROPAGATE_HOST_FLAGS ON)
# CUDA_BUILD_CUBIN requires that we're compiling for only one architecture
# set(CUDA_BUILD_CUBIN ON)
#----------------------Setup CUDA compilation flags----------------------------#
# Generate code for the default architecture (Pascal)
set(CUDA_ARCH_FLAGS -gencode arch=compute_37,code=sm_37
-gencode arch=compute_50,code=sm_50
-gencode arch=compute_60,code=sm_60
-gencode arch=compute_61,code=sm_61
-lineinfo
--maxrregcount=255
-ftz=true
-std=c++11) #--maxrregcount=255 -ftz=true #ftz = flush denormalized floats to zero
# -Xptxas -dlcm=ca opt-in to cache all global loads to L1/texture cache
# =cg to opt out
# Additional CUDA optimization flags
if (CMAKE_BUILD_TYPE MATCHES RELEASE)
# Doesn't set any additional flags, see CUDA_NVCC_FLAGS_DEBUG below on how
# to add more
set(CUDA_NVCC_FLAGS_RELEASE ${CUDA_NVCC_FLAGS_RELEASE})
endif()
# Additional CUDA debug flags
if (CMAKE_BUILD_TYPE MATCHES DEBUG)
# The debug flags must be set inside this if clause, since either CMake 3.5
# or nvcc 7.5 is bugged:
# CMake converts these into empty strings when doing RELEASE build, but nvcc
# 7.5 fails to parse empty flags.
set(CUDA_NVCC_FLAGS_DEBUG ${CUDA_NVCC_FLAGS_DEBUG};
--device-debug;
--generate-line-info;
--ptxas-options=-v)
endif()
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};${CUDA_ARCH_FLAGS}")
message("CUDA_NVCC_FLAGS: " ${CUDA_NVCC_FLAGS})
#------------------Compile and create a static library-------------------------#
file(GLOB CUDA_SOURCES "*.cu" "kernels/*.cu")
# Use -fPIC if -fpic not supported. Some quick non-scientific tests:
# Without fpic: 4.94 user, 4.04 system, 0:09.88 elapsed
# With fpic: 4.96 user, 4.02 system, 0:09.90 elapsed
# With fPIC: 4.94 user, 4.05 system, 0:10.23 elapsed
CUDA_ADD_LIBRARY(astaroth_core STATIC ${CUDA_SOURCES} OPTIONS --compiler-options "-fpic")

451
src/core/astaroth.cu Normal file
View File

@@ -0,0 +1,451 @@
/*
Copyright (C) 2014-2018, Johannes Pekkilae, Miikka Vaeisalae.
This file is part of Astaroth.
Astaroth is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Astaroth is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* @file
* \brief Multi-GPU implementation.
*
* Detailed info.
*
*/
#include "astaroth.h"
#include "errchk.h"
#include "device.cuh"
#include "math_utils.h" // sum for reductions
#include "standalone/config_loader.h" // update_config
const char* intparam_names[] = {AC_FOR_INT_PARAM_TYPES(AC_GEN_STR)};
const char* realparam_names[] = {AC_FOR_REAL_PARAM_TYPES(AC_GEN_STR)};
const char* vtxbuf_names[] = {AC_FOR_VTXBUF_HANDLES(AC_GEN_STR)};
static const int MAX_NUM_DEVICES = 32;
static int num_devices = 1;
static Device devices[MAX_NUM_DEVICES] = {};
typedef struct {
int3 m;
int3 n;
} Grid;
static Grid
createGrid(const AcMeshInfo& config)
{
Grid grid;
grid.m = (int3) {
config.int_params[AC_mx],
config.int_params[AC_my],
config.int_params[AC_mz]
};
grid.n = (int3) {
config.int_params[AC_nx],
config.int_params[AC_ny],
config.int_params[AC_nz]
};
return grid;
}
static Grid grid; // A grid consists of num_devices subgrids
static Grid subgrid;
static int
gridIdx(const Grid& grid, const int i, const int j, const int k)
{
return i + j * grid.m.x + k * grid.m.x * grid.m.y;
}
static int3
gridIdx3d(const Grid& grid, const int idx)
{
return (int3){idx % grid.m.x,
(idx % (grid.m.x * grid.m.y)) / grid.m.x,
idx / (grid.m.x * grid.m.y)};
}
void
printInt3(const int3 vec)
{
printf("(%d, %d, %d)", vec.x, vec.y, vec.z);
}
AcResult
acInit(const AcMeshInfo& config)
{
// Check devices
cudaGetDeviceCount(&num_devices);
if (num_devices < 1) {
ERROR("No CUDA devices found!");
return AC_FAILURE;
}
if (num_devices > MAX_NUM_DEVICES) {
WARNING("More devices found than MAX_NUM_DEVICES. Using only MAX_NUM_DEVICES");
num_devices = MAX_NUM_DEVICES;
}
if (!AC_MULTIGPU_ENABLED) {
WARNING("MULTIGPU_ENABLED was false. Using only one device");
num_devices = 1; // Use only one device if multi-GPU is not enabled
}
// Check that num_devices is divisible with AC_nz. This makes decomposing the
// problem domain to multiple GPUs much easier since we do not have to worry
// about remainders
ERRCHK_ALWAYS(config.int_params[AC_nz] % num_devices == 0);
// Decompose the problem domain
// The main grid
grid = createGrid(config);
// Subgrids
AcMeshInfo subgrid_config = config;
subgrid_config.int_params[AC_nz] /= num_devices;
update_config(&subgrid_config);
subgrid = createGrid(subgrid_config);
// Periodic boundary conditions become weird if the system can "fold unto itself".
ERRCHK_ALWAYS(subgrid.n.x >= STENCIL_ORDER);
ERRCHK_ALWAYS(subgrid.n.y >= STENCIL_ORDER);
ERRCHK_ALWAYS(subgrid.n.z >= STENCIL_ORDER);
printf("Grid m "); printInt3(grid.m); printf("\n");
printf("Grid n "); printInt3(grid.n); printf("\n");
printf("Subrid m "); printInt3(subgrid.m); printf("\n");
printf("Subrid n "); printInt3(subgrid.n); printf("\n");
// Initialize the devices
for (int i = 0; i < num_devices; ++i) {
createDevice(i, subgrid_config, &devices[i]);
printDeviceInfo(devices[i]);
}
return AC_SUCCESS;
}
AcResult
acQuit(void)
{
for (int i = 0; i < num_devices; ++i) {
destroyDevice(devices[i]);
}
return AC_SUCCESS;
}
int
gridIdxx(const Grid grid, const int3 idx)
{
return gridIdx(grid, idx.x, idx.y, idx.z);
}
AcResult
acLoadWithOffset(const AcMesh& host_mesh, const int3& src, const int num_vertices)
{
/*
Here we decompose the host mesh and distribute it among the GPUs in
the node.
The host mesh is a huge contiguous block of data. Its dimensions are given by
the global variable named "grid". A "grid" is decomposed into "subgrids",
one for each GPU. Here we check which parts of the range s0...s1 maps
to the memory space stored by some GPU, ranging d0...d1, and transfer
the data if needed.
The index mapping is inherently quite involved, but here's a picture which
hopefully helps make sense out of all this.
Grid
|----num_vertices---|
xxx|....................................................|xxx
^ ^ ^ ^
d0 d1 s0 (src) s1
Subgrid
xxx|.............|xxx
^ ^
d0 d1
^ ^
db da
*/
for (int i = 0; i < num_devices; ++i) {
const int3 d0 = (int3){0, 0, i * subgrid.n.z}; // DECOMPOSITION OFFSET HERE
const int3 d1 = (int3){subgrid.m.x, subgrid.m.y, d0.z + subgrid.m.z};
const int3 s0 = src;
const int3 s1 = gridIdx3d(grid, gridIdx(grid, s0.x, s0.y, s0.z) + num_vertices);
const int3 da = (int3){max(s0.x, d0.x), max(s0.y, d0.y), max(s0.z, d0.z)};
const int3 db = (int3){min(s1.x, d1.x), min(s1.y, d1.y), min(s1.z, d1.z)};
/*
printf("Device %d\n", i);
printf("\ts0: "); printInt3(s0); printf("\n");
printf("\td0: "); printInt3(d0); printf("\n");
printf("\tda: "); printInt3(da); printf("\n");
printf("\tdb: "); printInt3(db); printf("\n");
printf("\td1: "); printInt3(d1); printf("\n");
printf("\ts1: "); printInt3(s1); printf("\n");
printf("\t-> %s to device %d\n", db.z >= da.z ? "Copy" : "Do not copy", i);
*/
if (db.z >= da.z) {
const int copy_cells = gridIdxx(subgrid, db) - gridIdxx(subgrid, da);
const int3 da_local = (int3){da.x, da.y, da.z - i * grid.n.z / num_devices}; // DECOMPOSITION OFFSET HERE
// printf("\t\tcopy %d cells to local index ", copy_cells); printInt3(da_local); printf("\n");
copyMeshToDevice(devices[i], STREAM_PRIMARY, host_mesh, da, da_local, copy_cells);
}
printf("\n");
}
return AC_SUCCESS;
}
AcResult
acStoreWithOffset(const int3& src, const int num_vertices, AcMesh* host_mesh)
{
// See acLoadWithOffset() for an explanation of the index mapping
for (int i = 0; i < num_devices; ++i) {
const int3 d0 = (int3){0, 0, i * subgrid.n.z}; // DECOMPOSITION OFFSET HERE
const int3 d1 = (int3){subgrid.m.x, subgrid.m.y, d0.z + subgrid.m.z};
const int3 s0 = src;
const int3 s1 = gridIdx3d(grid, gridIdx(grid, s0.x, s0.y, s0.z) + num_vertices);
const int3 da = (int3){max(s0.x, d0.x), max(s0.y, d0.y), max(s0.z, d0.z)};
const int3 db = (int3){min(s1.x, d1.x), min(s1.y, d1.y), min(s1.z, d1.z)};
/*
printf("Device %d\n", i);
printf("\ts0: "); printInt3(s0); printf("\n");
printf("\td0: "); printInt3(d0); printf("\n");
printf("\tda: "); printInt3(da); printf("\n");
printf("\tdb: "); printInt3(db); printf("\n");
printf("\td1: "); printInt3(d1); printf("\n");
printf("\ts1: "); printInt3(s1); printf("\n");
printf("\t-> %s to device %d\n", db.z >= da.z ? "Copy" : "Do not copy", i);
*/
if (db.z >= da.z) {
const int copy_cells = gridIdxx(subgrid, db) - gridIdxx(subgrid, da);
const int3 da_local = (int3){da.x, da.y, da.z - i * grid.n.z / num_devices}; // DECOMPOSITION OFFSET HERE
// printf("\t\tcopy %d cells from local index ", copy_cells); printInt3(da_local); printf("\n");
copyMeshToHost(devices[i], STREAM_PRIMARY, da_local, da, copy_cells, host_mesh);
}
printf("\n");
}
return AC_SUCCESS;
}
// acCopyMeshToDevice
AcResult
acLoad(const AcMesh& host_mesh)
{
return acLoadWithOffset(host_mesh, (int3){0, 0, 0}, AC_VTXBUF_SIZE(host_mesh.info));
}
// acCopyMeshToHost
AcResult
acStore(AcMesh* host_mesh)
{
return acStoreWithOffset((int3){0, 0, 0}, AC_VTXBUF_SIZE(host_mesh->info), host_mesh);
}
AcResult
acIntegrateStep(const int& isubstep, const AcReal& dt)
{
const int3 start = (int3){STENCIL_ORDER/2, STENCIL_ORDER/2, STENCIL_ORDER/2};
const int3 end = (int3){STENCIL_ORDER/2 + subgrid.n.x,
STENCIL_ORDER/2 + subgrid.n.y,
STENCIL_ORDER/2 + subgrid.n.z};
for (int i = 0; i < num_devices; ++i) {
rkStep(devices[i], STREAM_PRIMARY, isubstep, start, end, dt);
}
return AC_SUCCESS;
}
AcResult
acBoundcondStep(void)
{
acSynchronize();
if (num_devices == 1) {
boundcondStep(devices[0], STREAM_PRIMARY,
(int3){0, 0, 0}, (int3){subgrid.m.x, subgrid.m.y, subgrid.m.z});
} else {
// Local boundary conditions
for (int i = 0; i < num_devices; ++i) {
const int3 d0 = (int3){0, 0, STENCIL_ORDER/2}; // DECOMPOSITION OFFSET HERE
const int3 d1 = (int3){subgrid.m.x, subgrid.m.y, d0.z + subgrid.n.z};
boundcondStep(devices[i], STREAM_PRIMARY, d0, d1);
}
/*
// ===MIIKKANOTE START==========================================
%JP: The old way for computing boundary conditions conflicts with the
way we have to do things with multiple GPUs.
The older approach relied on unified memory, which represented the whole
memory area as one huge mesh instead of several smaller ones. However, unified memory
in its current state is more meant for quick prototyping when performance is not an issue.
Getting the CUDA driver to migrate data intelligently across GPUs is much more difficult than
when managing the memory explicitly.
In this new approach, I have simplified the multi- and single-GPU layers significantly.
Quick rundown:
New struct: Grid. There are two global variables, "grid" and "subgrid", which
contain the extents of the whole simulation domain and the decomposed grids, respectively.
To simplify thing, we require that each GPU is assigned the same amount of work,
therefore each GPU in the node is assigned and "subgrid.m" -sized block of data
to work with.
The whole simulation domain is decomposed with respect to the z dimension.
For example, if the grid contains (nx, ny, nz) vertices, then the subgrids
contain (nx, ny, nz / num_devices) vertices.
An local index (i, j, k) in some subgrid can be mapped to the global grid with
global idx = (i, j, k + device_id * subgrid.n.z)
Terminology:
- Single-GPU function: a function defined on the single-GPU layer (device.cu)
Changes required to this commented code block:
- The thread block dimensions (tpb) are no longer passed to the kernel here but in device.cu
instead. Same holds for any complex index calculations. Instead, the local coordinates
should be passed as an int3 type without having to consider how the data is actually
laid out in device memory
- The unified memory buffer no longer exists (d_buffer). Instead, we have an opaque handle
of type "Device" which should be passed to single-GPU functions. In this file, all devices
are stored in a global array "devices[num_devices]".
- Every single-GPU function is executed asynchronously by default such that we
can optimize Astaroth by executing memory transactions concurrently with computation.
Therefore a StreamType should be passed as a parameter to single-GPU functions.
Refresher: CUDA function calls are non-blocking when a stream is explicitly passed
as a parameter and commands executing in different streams can be processed
in parallel/concurrently.
Note on periodic boundaries (might be helpful when implementing other boundary conditions):
With multiple GPUs, periodic boundary conditions applied on indices ranging from
(0, 0, STENCIL_ORDER/2) to (subgrid.m.x, subgrid.m.y, subgrid.m.z - STENCIL_ORDER/2)
on a single device are "local", in the sense that they can be computed without having
to exchange data with neighboring GPUs. Special care is needed only for transferring
the data to the fron and back plates outside this range. In the solution we use here,
we solve the local boundaries first, and then just exchange the front and back plates
in a "ring", like so
device_id
(n) <-> 0 <-> 1 <-> ... <-> n <-> (0)
// ======MIIKKANOTE END==========================================
<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< MIIKKANOTE: This code block was essentially
moved into device.cu, function boundCondStep()
In astaroth.cu, we use acBoundcondStep()
just to distribute the work and manage
communication between GPUs.
printf("Boundconds best dims (%d, %d, %d) %f ms\n", best_dims.x, best_dims.y, best_dims.z, double(best_time) / NUM_ITERATIONS);
exit(0);
#else
const int depth = (int)ceil(mesh_info.int_params[AC_mz]/(float)num_devices);
const int3 start = (int3){0, 0, device_id * depth};
const int3 end = (int3){mesh_info.int_params[AC_mx],
mesh_info.int_params[AC_my],
min((device_id+1) * depth, mesh_info.int_params[AC_mz])};
const dim3 tpb(8,2,8);
// TODO uses the default stream currently
if (mesh_info.int_params[AC_bc_type] == 666) { // TODO MAKE A BETTER SWITCH
wedge_boundconds(0, tpb, start, end, d_buffer);
} else {
for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i)
periodic_boundconds(0, tpb, start, end, d_buffer.in[i]);
<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
*/
// Exchange halos
for (int i = 0; i < num_devices; ++i) {
const int num_vertices = subgrid.m.x * subgrid.m.y * STENCIL_ORDER/2;
// ...|ooooxxx|... -> xxx|ooooooo|...
{
const int3 src = (int3) {0, 0, subgrid.n.z};
const int3 dst = (int3) {0, 0, 0};
copyMeshDeviceToDevice(devices[i], STREAM_PRIMARY, src, devices[(i+1) % num_devices], dst, num_vertices);
}
// ...|ooooooo|xxx <- ...|xxxoooo|...
{
const int3 src = (int3) {0, 0, STENCIL_ORDER/2};
const int3 dst = (int3) {0, 0, STENCIL_ORDER/2 + subgrid.n.z};
copyMeshDeviceToDevice(devices[(i+1) % num_devices], STREAM_PRIMARY, src, devices[i], dst, num_vertices);
}
}
}
acSynchronize();
return AC_SUCCESS;
}
static AcResult
acSwapBuffers(void)
{
for (int i = 0; i < num_devices; ++i) {
swapBuffers(devices[i]);
}
return AC_SUCCESS;
}
AcResult
acIntegrate(const AcReal& dt)
{
for (int isubstep = 0; isubstep < 3; ++isubstep) {
acBoundcondStep();
acIntegrateStep(isubstep, dt);
acSwapBuffers();
}
return AC_SUCCESS;
}
AcReal
acReduceScal(const ReductionType& rtype,
const VertexBufferHandle& vtxbuffer_handle)
{
// TODO
return 0;
}
AcReal
acReduceVec(const ReductionType& rtype, const VertexBufferHandle& a,
const VertexBufferHandle& b, const VertexBufferHandle& c)
{
// TODO
return 0;
}
AcResult
acSynchronize(void)
{
for (int i = 0; i < num_devices; ++i) {
synchronize(devices[i], STREAM_ALL);
}
return AC_SUCCESS;
}

309
src/core/device.cu Normal file
View File

@@ -0,0 +1,309 @@
/*
Copyright (C) 2014-2018, Johannes Pekkilae, Miikka Vaeisalae.
This file is part of Astaroth.
Astaroth is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Astaroth is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* @file
* \brief Brief info.
*
* Detailed info.
*
*/
#include "device.cuh"
#include "errchk.h"
typedef struct {
AcReal* in[NUM_VTXBUF_HANDLES];
AcReal* out[NUM_VTXBUF_HANDLES];
} VertexBufferArray;
__constant__ AcMeshInfo d_mesh_info;
#define DCONST_INT(X) (d_mesh_info.int_params[X])
#define DCONST_REAL(X) (d_mesh_info.real_params[X])
#define DEVICE_VTXBUF_IDX(i, j, k) ((i) + (j)*DCONST_INT(AC_mx) + (k)*DCONST_INT(AC_mxy))
#include "kernels/kernels.cuh"
struct device_s {
int id;
AcMeshInfo local_config;
// Concurrency
cudaStream_t streams[NUM_STREAM_TYPES];
// Memory
VertexBufferArray vba;
AcReal* reduce_scratchpad;
AcReal* reduce_result;
};
AcResult
printDeviceInfo(const Device device)
{
const int device_id = device->id;
cudaDeviceProp props;
cudaGetDeviceProperties(&props, device_id);
printf("--------------------------------------------------\n");
printf("Device Number: %d\n", device_id);
const size_t bus_id_max_len = 128;
char bus_id[bus_id_max_len];
cudaDeviceGetPCIBusId(bus_id, bus_id_max_len, device_id);
printf(" PCI bus ID: %s\n", bus_id);
printf(" Device name: %s\n", props.name);
printf(" Compute capability: %d.%d\n", props.major, props.minor);
// Compute
printf(" Compute\n");
printf(" Clock rate (GHz): %g\n", props.clockRate / 1e6); // KHz -> GHz
printf(" Stream processors: %d\n", props.multiProcessorCount);
printf(" SP to DP flops performance ratio: %d:1\n", props.singleToDoublePrecisionPerfRatio);
printf(" Compute mode: %d\n", (int)props.computeMode); // https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__TYPES.html#group__CUDART__TYPES_1g7eb25f5413a962faad0956d92bae10d0
// Memory
printf(" Global memory\n");
printf(" Memory Clock Rate (MHz): %d\n", props.memoryClockRate / (1000));
printf(" Memory Bus Width (bits): %d\n", props.memoryBusWidth);
printf(" Peak Memory Bandwidth (GiB/s): %f\n",
2 * (props.memoryClockRate * 1e3) * props.memoryBusWidth /
(8. * 1024. * 1024. * 1024.));
printf(" ECC enabled: %d\n", props.ECCEnabled);
// Memory usage
size_t free_bytes, total_bytes;
cudaMemGetInfo(&free_bytes, &total_bytes);
const size_t used_bytes = total_bytes - free_bytes;
printf(" Total global mem: %.2f GiB\n",
props.totalGlobalMem / (1024.0 * 1024 * 1024));
printf(" Gmem used (GiB): %.2f\n", used_bytes / (1024.0 * 1024 * 1024));
printf(" Gmem memory free (GiB): %.2f\n",
free_bytes / (1024.0 * 1024 * 1024));
printf(" Gmem memory total (GiB): %.2f\n",
total_bytes / (1024.0 * 1024 * 1024));
printf(" Caches\n");
printf(" Local L1 cache supported: %d\n", props.localL1CacheSupported);
printf(" Global L1 cache supported: %d\n", props.globalL1CacheSupported);
printf(" L2 size: %d KiB\n", props.l2CacheSize / (1024));
printf(" Total const mem: %ld KiB\n", props.totalConstMem / (1024));
printf(" Shared mem per block: %ld KiB\n",
props.sharedMemPerBlock / (1024));
printf(" Other\n");
printf(" Warp size: %d\n", props.warpSize);
// printf(" Single to double perf. ratio: %dx\n",
// props.singleToDoublePrecisionPerfRatio); //Not supported with older CUDA
// versions
printf(" Stream priorities supported: %d\n",
props.streamPrioritiesSupported);
printf("--------------------------------------------------\n");
return AC_SUCCESS;
}
static __global__ void dummy_kernel(void) {}
AcResult
createDevice(const int id, const AcMeshInfo device_config, Device* device_handle)
{
cudaSetDevice(id);
cudaDeviceReset();
// Create Device
struct device_s* device = (struct device_s*) malloc(sizeof(*device));
ERRCHK_ALWAYS(device);
device->id = id;
device->local_config = device_config;
// Check that the code was compiled for the proper GPU architecture
printf("Trying to run a dummy kernel. If this fails, make sure that your\n"
"device supports the CUDA architecture you are compiling for.\n"
"Running dummy kernel... ");
fflush(stdout);
dummy_kernel<<<1, 1>>>();
ERRCHK_CUDA_KERNEL_ALWAYS();
printf("Success!\n");
// Concurrency
for (int i = 0; i < NUM_STREAM_TYPES; ++i) {
cudaStreamCreate(&device->streams[i]);
}
// Memory
const size_t vba_size_bytes = AC_VTXBUF_SIZE_BYTES(device_config);
for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) {
ERRCHK_CUDA_ALWAYS(cudaMalloc(&device->vba.in[i], vba_size_bytes));
ERRCHK_CUDA_ALWAYS(cudaMalloc(&device->vba.out[i], vba_size_bytes));
}
ERRCHK_CUDA_ALWAYS(cudaMalloc(&device->reduce_scratchpad,
AC_VTXBUF_COMPDOMAIN_SIZE_BYTES(device_config)));
ERRCHK_CUDA_ALWAYS(cudaMalloc(&device->reduce_result, sizeof(AcReal)));
// Device constants
ERRCHK_CUDA_ALWAYS(cudaMemcpyToSymbol(d_mesh_info, &device_config, sizeof(device_config), 0,
cudaMemcpyHostToDevice));
printf("Created device %d (%p)\n", device->id, device);
*device_handle = device;
return AC_SUCCESS;
}
AcResult
destroyDevice(Device device)
{
cudaSetDevice(device->id);
printf("Destroying device %d (%p)\n", device->id, device);
// Memory
for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) {
cudaFree(device->vba.in[i]);
cudaFree(device->vba.out[i]);
}
cudaFree(device->reduce_scratchpad);
cudaFree(device->reduce_result);
// Concurrency
for (int i = 0; i < NUM_STREAM_TYPES; ++i)
cudaStreamDestroy(device->streams[i]);
// Destroy Device
free(device);
return AC_SUCCESS;
}
AcResult
boundcondStep(const Device device, const StreamType stream_type, const int3& start, const int3& end)
{
cudaSetDevice(device->id);
for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) {
periodic_boundconds(device->streams[stream_type], start, end, device->vba.in[i]);
}
return AC_SUCCESS;
}
AcResult
reduceScal(const Device device)
{
cudaSetDevice(device->id);
return AC_SUCCESS;
}
AcResult
reduceVec(const Device device)
{
cudaSetDevice(device->id);
return AC_SUCCESS;
}
AcResult
rkStep(const Device device, const StreamType stream_type, const int step_number,
const int3& start, const int3& end, const AcReal dt)
{
cudaSetDevice(device->id);
rk3_step_async(device->streams[stream_type], step_number, start, end, dt, &device->vba);
return AC_SUCCESS;
}
AcResult
synchronize(const Device device, const StreamType stream_type)
{
cudaSetDevice(device->id);
if (stream_type == STREAM_ALL) {
cudaDeviceSynchronize();
} else {
cudaStreamSynchronize(device->streams[stream_type]);
}
return AC_SUCCESS;
}
static AcResult
loadWithOffset(const Device device, const StreamType stream_type,
const AcReal* src, const size_t bytes, AcReal* dst)
{
cudaSetDevice(device->id);
ERRCHK_CUDA(cudaMemcpyAsync(dst, src, bytes, cudaMemcpyHostToDevice,
device->streams[stream_type]));
return AC_SUCCESS;
}
static AcResult
storeWithOffset(const Device device, const StreamType stream_type,
const AcReal* src, const size_t bytes, AcReal* dst)
{
cudaSetDevice(device->id);
ERRCHK_CUDA(cudaMemcpyAsync(dst, src, bytes, cudaMemcpyDeviceToHost,
device->streams[stream_type]));
return AC_SUCCESS;
}
AcResult
copyMeshToDevice(const Device device, const StreamType stream_type,
const AcMesh& host_mesh, const int3& src, const int3& dst,
const int num_vertices)
{
const size_t src_idx = AC_VTXBUF_IDX(src.x, src.y, src.z, host_mesh.info);
const size_t dst_idx = AC_VTXBUF_IDX(dst.x, dst.y, dst.z, device->local_config);
for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) {
loadWithOffset(device, stream_type, &host_mesh.vertex_buffer[i][src_idx], num_vertices * sizeof(AcReal),
&device->vba.in[i][dst_idx]);
}
return AC_SUCCESS;
}
AcResult
copyMeshToHost(const Device device, const StreamType stream_type,
const int3& src, const int3& dst, const int num_vertices,
AcMesh* host_mesh)
{
const size_t src_idx = AC_VTXBUF_IDX(src.x, src.y, src.z, device->local_config);
const size_t dst_idx = AC_VTXBUF_IDX(dst.x, dst.y, dst.z, host_mesh->info);
for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) {
storeWithOffset(device, stream_type, &device->vba.in[i][src_idx],
num_vertices * sizeof(AcReal),
&host_mesh->vertex_buffer[i][dst_idx]);
}
return AC_SUCCESS;
}
AcResult
copyMeshDeviceToDevice(const Device src_device, const StreamType stream_type,
const int3& src, Device dst_device, const int3& dst,
const int num_vertices)
{
cudaSetDevice(src_device->id);
const size_t src_idx = AC_VTXBUF_IDX(src.x, src.y, src.z, src_device->local_config);
const size_t dst_idx = AC_VTXBUF_IDX(dst.x, dst.y, dst.z, dst_device->local_config);
for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) {
ERRCHK_CUDA(cudaMemcpyPeerAsync(&dst_device->vba.in[i][dst_idx], dst_device->id,
&src_device->vba.in[i][src_idx], src_device->id,
sizeof(src_device->vba.in[i][0]) * num_vertices,
src_device->streams[stream_type]));
}
return AC_SUCCESS;
}
AcResult
swapBuffers(const Device device)
{
cudaSetDevice(device->id);
for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) {
AcReal* tmp = device->vba.in[i];
device->vba.in[i] = device->vba.out[i];
device->vba.out[i] = tmp;
}
return AC_SUCCESS;
}

82
src/core/device.cuh Normal file
View File

@@ -0,0 +1,82 @@
/*
Copyright (C) 2014-2018, Johannes Pekkilae, Miikka Vaeisalae.
This file is part of Astaroth.
Astaroth is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Astaroth is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* @file
* \brief Brief info.
*
* Detailed info.
*
*/
#pragma once
#include "astaroth.h"
typedef enum {
STREAM_PRIMARY,
STREAM_SECONDARY,
NUM_STREAM_TYPES,
STREAM_ALL
} StreamType;
typedef struct device_s* Device; // Opaque pointer to device_s. Analogous to dispatchable handles
// in Vulkan, f.ex. VkDevice
/** */
AcResult printDeviceInfo(const Device device);
/** */
AcResult createDevice(const int id, const AcMeshInfo device_config, Device* device);
/** */
AcResult destroyDevice(Device device);
/** */
AcResult boundcondStep(const Device device, const StreamType stream_type,
const int3& start, const int3& end);
/** */
AcResult reduceScal(const Device device);
/** */
AcResult reduceVec(const Device device);
/** */
AcResult rkStep(const Device device, const StreamType stream_type, const int step_number,
const int3& start, const int3& end, const AcReal dt);
/** Sychronizes the device with respect to stream_type. If STREAM_ALL is given as
a StreamType, the function synchronizes all streams on the device. */
AcResult synchronize(const Device device, const StreamType stream_type);
/** */
AcResult copyMeshToDevice(const Device device, const StreamType stream_type,
const AcMesh& host_mesh, const int3& src, const int3& dst,
const int num_vertices);
/** */
AcResult copyMeshToHost(const Device device, const StreamType stream_type,
const int3& src, const int3& dst, const int num_vertices,
AcMesh* host_mesh);
/** */
AcResult copyMeshDeviceToDevice(const Device src, const StreamType stream_type, const int3& src_idx,
Device dst, const int3& dst_idx, const int num_vertices);
/** Swaps the input/output buffers used in computations */
AcResult swapBuffers(const Device device);

112
src/core/errchk.h Normal file
View File

@@ -0,0 +1,112 @@
/*
Copyright (C) 2014-2018, Johannes Pekkilae, Miikka Vaeisalae.
This file is part of Astaroth.
Astaroth is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Astaroth is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* @file
* \brief Brief info.
*
* Detailed info.
*
*/
#pragma once
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
// clang-format off
/*
* =============================================================================
* General error checking
* =============================================================================
*/
#define ERROR(str) \
{ \
time_t t; time(&t); \
fprintf(stderr, "%s", ctime(&t)); \
fprintf(stderr, "\tError in file %s line %d: %s\n", \
__FILE__, __LINE__, str); \
fflush(stderr); \
exit(EXIT_FAILURE); \
abort(); \
}
#define WARNING(str) \
{ \
time_t t; time(&t); \
fprintf(stderr, "%s", ctime(&t)); \
fprintf(stderr, "\tWarning in file %s line %d: %s\n", \
__FILE__, __LINE__, str); \
fflush(stderr); \
}
// DO NOT REMOVE BRACKETS AROUND RETVAL. F.ex. if (!a < b) vs if (!(a < b)).
#define ERRCHK(retval) { if (!(retval)) ERROR(#retval " was false"); }
#define WARNCHK(retval) { if (!(retval)) WARNING(#retval " was false"); }
#define ERRCHK_ALWAYS(retval) { if (!(retval)) ERROR(#retval " was false"); }
/*
* =============================================================================
* CUDA-specific error checking
* =============================================================================
*/
#ifdef __CUDACC__
static inline void
cuda_assert(cudaError_t code, const char* file, int line, bool abort = true)
{
if (code != cudaSuccess) {
time_t t; time(&t); \
fprintf(stderr, "%s", ctime(&t)); \
fprintf(stderr, "\tCUDA error in file %s line %d: %s\n", \
file, line, cudaGetErrorString(code)); \
fflush(stderr); \
if (abort)
exit(code);
}
}
#ifdef NDEBUG
#undef ERRCHK
#undef WARNCHK
#define ERRCHK(params)
#define WARNCHK(params)
#define ERRCHK_CUDA(params) params;
#define WARNCHK_CUDA(params) params;
#define ERRCHK_CUDA_KERNEL() {}
#else
#define ERRCHK_CUDA(params) { cuda_assert((params), __FILE__, __LINE__); }
#define WARNCHK_CUDA(params) { cuda_assert((params), __FILE__, __LINE__, false); }
#define ERRCHK_CUDA_KERNEL() \
{ \
ERRCHK_CUDA(cudaPeekAtLastError()); \
ERRCHK_CUDA(cudaDeviceSynchronize()); \
}
#endif
#endif
#define ERRCHK_CUDA_ALWAYS(params) { cuda_assert((params), __FILE__, __LINE__); }
#define ERRCHK_CUDA_KERNEL_ALWAYS() \
{ \
ERRCHK_CUDA_ALWAYS(cudaPeekAtLastError()); \
ERRCHK_CUDA_ALWAYS(cudaDeviceSynchronize()); \
}
// clang-format on

2
src/core/kernels/.gitignore vendored Normal file
View File

@@ -0,0 +1,2 @@
# Ignore the generated headers
stencil_process.cuh stencil_assembly.cuh

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,794 @@
/*
Copyright (C) 2014-2018, Johannes Pekkilae, Miikka Vaeisalae.
This file is part of Astaroth.
Astaroth is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Astaroth is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* @file
* \brief Brief info.
*
* Detailed info.
*
*/
#pragma once
__global__ void
kernel_periodic_boundconds(const int3 start, const int3 end, AcReal* vertex_buffer)
{
const int i_dst = start.x + threadIdx.x + blockIdx.x * blockDim.x;
const int j_dst = start.y + threadIdx.y + blockIdx.y * blockDim.y;
const int k_dst = start.z + threadIdx.z + blockIdx.z * blockDim.z;
// If within the start-end range (this allows threadblock dims that are not
// divisible by end - start)
if (i_dst >= end.x || j_dst >= end.y || k_dst >= end.z)
return;
//if (i_dst >= DCONST_INT(AC_mx) || j_dst >= DCONST_INT(AC_my) || k_dst >= DCONST_INT(AC_mz))
// return;
// If destination index is inside the computational domain, return since
// the boundary conditions are only applied to the ghost zones
if (i_dst >= DCONST_INT(AC_nx_min) && i_dst < DCONST_INT(AC_nx_max) &&
j_dst >= DCONST_INT(AC_ny_min) && j_dst < DCONST_INT(AC_ny_max) &&
k_dst >= DCONST_INT(AC_nz_min) && k_dst < DCONST_INT(AC_nz_max))
return;
// Find the source index
// Map to nx, ny, nz coordinates
int i_src = i_dst - DCONST_INT(AC_nx_min);
int j_src = j_dst - DCONST_INT(AC_ny_min);
int k_src = k_dst - DCONST_INT(AC_nz_min);
// Translate (s.t. the index is always positive)
i_src += DCONST_INT(AC_nx);
j_src += DCONST_INT(AC_ny);
k_src += DCONST_INT(AC_nz);
// Wrap
i_src %= DCONST_INT(AC_nx);
j_src %= DCONST_INT(AC_ny);
k_src %= DCONST_INT(AC_nz);
// Map to mx, my, mz coordinates
i_src += DCONST_INT(AC_nx_min);
j_src += DCONST_INT(AC_ny_min);
k_src += DCONST_INT(AC_nz_min);
const int src_idx = DEVICE_VTXBUF_IDX(i_src, j_src, k_src);
const int dst_idx = DEVICE_VTXBUF_IDX(i_dst, j_dst, k_dst);
vertex_buffer[dst_idx] = vertex_buffer[src_idx];
}
void
periodic_boundconds(const cudaStream_t stream, const int3& start, const int3& end, AcReal* vertex_buffer)
{
const dim3 tpb(8,2,8);
const dim3 bpg((unsigned int)ceil((end.x - start.x) / (float)tpb.x),
(unsigned int)ceil((end.y - start.y) / (float)tpb.y),
(unsigned int)ceil((end.z - start.z) / (float)tpb.z));
kernel_periodic_boundconds<<<bpg, tpb, 0, stream>>>(start, end, vertex_buffer);
ERRCHK_CUDA_KERNEL();
}
///////////////////////////////////////////////////////////////////////////////////////////////////
#include <assert.h>
static __device__ __forceinline__ int
IDX(const int i)
{
return i;
}
static __device__ __forceinline__ int
IDX(const int i, const int j, const int k)
{
return DEVICE_VTXBUF_IDX(i, j, k);
}
static __device__ __forceinline__ int
IDX(const int3 idx)
{
return DEVICE_VTXBUF_IDX(idx.x, idx.y, idx.z);
}
static __forceinline__ AcMatrix
create_rotz(const AcReal radians)
{
AcMatrix mat;
mat.row[0] = (AcReal3){cos(radians), -sin(radians), 0};
mat.row[1] = (AcReal3){sin(radians), cos(radians), 0};
mat.row[2] = (AcReal3){0, 0, 0};
return mat;
}
#if AC_DOUBLE_PRECISION == 0
#define sin __sinf
#define cos __cosf
#define exp __expf
#define rsqrt rsqrtf // hardware reciprocal sqrt
#endif // AC_DOUBLE_PRECISION == 0
/*
typedef struct {
int i, j, k;
} int3;*/
/*
* =============================================================================
* Level 0 (Input Assembly Stage)
* =============================================================================
*/
/*
* =============================================================================
* Level 0.1 (Read stencil elements and solve derivatives)
* =============================================================================
*/
static __device__ __forceinline__ AcReal
first_derivative(const AcReal* __restrict__ pencil, const AcReal inv_ds)
{
#if STENCIL_ORDER == 2
const AcReal coefficients[] = {0, 1.0 / 2.0};
#elif STENCIL_ORDER == 4
const AcReal coefficients[] = {0, 2.0 / 3.0, -1.0 / 12.0};
#elif STENCIL_ORDER == 6
const AcReal coefficients[] = {0, 3.0 / 4.0, -3.0 / 20.0, 1.0 / 60.0};
#elif STENCIL_ORDER == 8
const AcReal coefficients[] = {0, 4.0 / 5.0, -1.0 / 5.0, 4.0 / 105.0,
-1.0 / 280.0};
#endif
#define MID (STENCIL_ORDER / 2)
AcReal res = 0;
#pragma unroll
for (int i = 1; i <= MID; ++i)
res += coefficients[i] * (pencil[MID + i] - pencil[MID - i]);
return res * inv_ds;
}
static __device__ __forceinline__ AcReal
second_derivative(const AcReal* __restrict__ pencil, const AcReal inv_ds)
{
#if STENCIL_ORDER == 2
const AcReal coefficients[] = {-2., 1.};
#elif STENCIL_ORDER == 4
const AcReal coefficients[] = {-5.0/2.0, 4.0/3.0, -1.0/12.0};
#elif STENCIL_ORDER == 6
const AcReal coefficients[] = {-49.0 / 18.0, 3.0 / 2.0, -3.0 / 20.0,
1.0 / 90.0};
#elif STENCIL_ORDER == 8
const AcReal coefficients[] = {-205.0 / 72.0, 8.0 / 5.0, -1.0 / 5.0,
8.0 / 315.0, -1.0 / 560.0};
#endif
#define MID (STENCIL_ORDER / 2)
AcReal res = coefficients[0] * pencil[MID];
#pragma unroll
for (int i = 1; i <= MID; ++i)
res += coefficients[i] * (pencil[MID + i] + pencil[MID - i]);
return res * inv_ds * inv_ds;
}
/** inv_ds: inverted mesh spacing f.ex. 1. / mesh.int_params[AC_dsx] */
static __device__ __forceinline__ AcReal
cross_derivative(const AcReal* __restrict__ pencil_a,
const AcReal* __restrict__ pencil_b, const AcReal inv_ds_a,
const AcReal inv_ds_b)
{
#if STENCIL_ORDER == 2
const AcReal coefficients[] = {0, 1.0 / 4.0};
#elif STENCIL_ORDER == 4
const AcReal coefficients[] = {0, 1.0 / 32.0, 1.0 / 64.0}; // TODO correct coefficients, these are just placeholders
#elif STENCIL_ORDER == 6
const AcReal fac = (1. / 720.);
const AcReal coefficients[] = {0.0 * fac, 270.0 * fac, -27.0 * fac,
2.0 * fac};
#elif STENCIL_ORDER == 8
const AcReal fac = (1. / 20160.);
const AcReal coefficients[] = {0.0 * fac, 8064. * fac, -1008. * fac,
128. * fac, -9. * fac};
#endif
#define MID (STENCIL_ORDER / 2)
AcReal res = AcReal(0.);
#pragma unroll
for (int i = 1; i <= MID; ++i) {
res += coefficients[i] * (pencil_a[MID + i] + pencil_a[MID - i] -
pencil_b[MID + i] - pencil_b[MID - i]);
}
return res * inv_ds_a * inv_ds_b;
}
static __device__ __forceinline__ AcReal
derx(const int3 vertexIdx, const AcReal* __restrict__ arr)
{
AcReal pencil[STENCIL_ORDER + 1];
#pragma unroll
for (int offset = 0; offset < STENCIL_ORDER + 1; ++offset)
pencil[offset] = arr[IDX(vertexIdx.x + offset - STENCIL_ORDER / 2, vertexIdx.y, vertexIdx.z)];
return first_derivative(pencil, DCONST_REAL(AC_inv_dsx));
}
static __device__ __forceinline__ AcReal
derxx(const int3 vertexIdx, const AcReal* __restrict__ arr)
{
AcReal pencil[STENCIL_ORDER + 1];
#pragma unroll
for (int offset = 0; offset < STENCIL_ORDER + 1; ++offset)
pencil[offset] = arr[IDX(vertexIdx.x + offset - STENCIL_ORDER / 2, vertexIdx.y, vertexIdx.z)];
return second_derivative(pencil, DCONST_REAL(AC_inv_dsx));
}
static __device__ __forceinline__ AcReal
derxy(const int3 vertexIdx, const AcReal* __restrict__ arr)
{
AcReal pencil_a[STENCIL_ORDER + 1];
#pragma unroll
for (int offset = 0; offset < STENCIL_ORDER + 1; ++offset)
pencil_a[offset] = arr[IDX(vertexIdx.x + offset - STENCIL_ORDER / 2,
vertexIdx.y + offset - STENCIL_ORDER / 2, vertexIdx.z)];
AcReal pencil_b[STENCIL_ORDER + 1];
#pragma unroll
for (int offset = 0; offset < STENCIL_ORDER + 1; ++offset)
pencil_b[offset] = arr[IDX(vertexIdx.x + offset - STENCIL_ORDER / 2,
vertexIdx.y + STENCIL_ORDER / 2 - offset, vertexIdx.z)];
return cross_derivative(pencil_a, pencil_b, DCONST_REAL(AC_inv_dsx),
DCONST_REAL(AC_inv_dsy));
}
static __device__ __forceinline__ AcReal
derxz(const int3 vertexIdx, const AcReal* __restrict__ arr)
{
AcReal pencil_a[STENCIL_ORDER + 1];
#pragma unroll
for (int offset = 0; offset < STENCIL_ORDER + 1; ++offset)
pencil_a[offset] = arr[IDX(vertexIdx.x + offset - STENCIL_ORDER / 2, vertexIdx.y,
vertexIdx.z + offset - STENCIL_ORDER / 2)];
AcReal pencil_b[STENCIL_ORDER + 1];
#pragma unroll
for (int offset = 0; offset < STENCIL_ORDER + 1; ++offset)
pencil_b[offset] = arr[IDX(vertexIdx.x + offset - STENCIL_ORDER / 2, vertexIdx.y,
vertexIdx.z + STENCIL_ORDER / 2 - offset)];
return cross_derivative(pencil_a, pencil_b, DCONST_REAL(AC_inv_dsx),
DCONST_REAL(AC_inv_dsz));
}
static __device__ __forceinline__ AcReal
dery(const int3 vertexIdx, const AcReal* __restrict__ arr)
{
AcReal pencil[STENCIL_ORDER + 1];
#pragma unroll
for (int offset = 0; offset < STENCIL_ORDER + 1; ++offset)
pencil[offset] = arr[IDX(vertexIdx.x, vertexIdx.y + offset - STENCIL_ORDER / 2, vertexIdx.z)];
return first_derivative(pencil, DCONST_REAL(AC_inv_dsy));
}
static __device__ __forceinline__ AcReal
deryy(const int3 vertexIdx, const AcReal* __restrict__ arr)
{
AcReal pencil[STENCIL_ORDER + 1];
#pragma unroll
for (int offset = 0; offset < STENCIL_ORDER + 1; ++offset)
pencil[offset] = arr[IDX(vertexIdx.x, vertexIdx.y + offset - STENCIL_ORDER / 2, vertexIdx.z)];
return second_derivative(pencil, DCONST_REAL(AC_inv_dsy));
}
static __device__ __forceinline__ AcReal
deryz(const int3 vertexIdx, const AcReal* __restrict__ arr)
{
AcReal pencil_a[STENCIL_ORDER + 1];
#pragma unroll
for (int offset = 0; offset < STENCIL_ORDER + 1; ++offset)
pencil_a[offset] = arr[IDX(vertexIdx.x, vertexIdx.y + offset - STENCIL_ORDER / 2,
vertexIdx.z + offset - STENCIL_ORDER / 2)];
AcReal pencil_b[STENCIL_ORDER + 1];
#pragma unroll
for (int offset = 0; offset < STENCIL_ORDER + 1; ++offset)
pencil_b[offset] = arr[IDX(vertexIdx.x, vertexIdx.y + offset - STENCIL_ORDER / 2,
vertexIdx.z + STENCIL_ORDER / 2 - offset)];
return cross_derivative(pencil_a, pencil_b, DCONST_REAL(AC_inv_dsy),
DCONST_REAL(AC_inv_dsz));
}
static __device__ __forceinline__ AcReal
derz(const int3 vertexIdx, const AcReal* __restrict__ arr)
{
AcReal pencil[STENCIL_ORDER + 1];
#pragma unroll
for (int offset = 0; offset < STENCIL_ORDER + 1; ++offset)
pencil[offset] = arr[IDX(vertexIdx.x, vertexIdx.y, vertexIdx.z + offset - STENCIL_ORDER / 2)];
return first_derivative(pencil, DCONST_REAL(AC_inv_dsz));
}
static __device__ __forceinline__ AcReal
derzz(const int3 vertexIdx, const AcReal* __restrict__ arr)
{
AcReal pencil[STENCIL_ORDER + 1];
#pragma unroll
for (int offset = 0; offset < STENCIL_ORDER + 1; ++offset)
pencil[offset] = arr[IDX(vertexIdx.x, vertexIdx.y, vertexIdx.z + offset - STENCIL_ORDER / 2)];
return second_derivative(pencil, DCONST_REAL(AC_inv_dsz));
}
/*
* =============================================================================
* Level 0.2 (Caching functions)
* =============================================================================
*/
#include "stencil_assembly.cuh"
/*
typedef struct {
AcRealData x;
AcRealData y;
AcRealData z;
} AcReal3Data;
static __device__ __forceinline__ AcReal3Data
read_data(const int i, const int j, const int k,
AcReal* __restrict__ buf[], const int3& handle)
{
AcReal3Data data;
data.x = read_data(i, j, k, buf, handle.x);
data.y = read_data(i, j, k, buf, handle.y);
data.z = read_data(i, j, k, buf, handle.z);
return data;
}
*/
/*
* =============================================================================
* Level 0.3 (Built-in functions available during the Stencil Processing Stage)
* =============================================================================
*/
static __host__ __device__ __forceinline__ AcReal3
operator-(const AcReal3& a, const AcReal3& b)
{
return (AcReal3){a.x - b.x, a.y - b.y, a.z - b.z};
}
static __host__ __device__ __forceinline__ AcReal3
operator+(const AcReal3& a, const AcReal3& b)
{
return (AcReal3){a.x + b.x, a.y + b.y, a.z + b.z};
}
static __host__ __device__ __forceinline__ AcReal3
operator-(const AcReal3& a)
{
return (AcReal3){-a.x, -a.y, -a.z};
}
static __host__ __device__ __forceinline__ AcReal3
operator*(const AcReal a, const AcReal3& b)
{
return (AcReal3){a * b.x, a * b.y, a * b.z};
}
static __host__ __device__ __forceinline__ AcReal
dot(const AcReal3& a, const AcReal3& b)
{
return a.x * b.x + a.y * b.y + a.z * b.z;
}
static __host__ __device__ __forceinline__ AcReal3
mul(const AcMatrix& aa, const AcReal3& x)
{
return (AcReal3){dot(aa.row[0], x), dot(aa.row[1], x), dot(aa.row[2], x)};
}
static __host__ __device__ __forceinline__ AcReal3
cross(const AcReal3& a, const AcReal3& b)
{
AcReal3 c;
c.x = a.y * b.z - a.z * b.y;
c.y = a.z * b.x - a.x * b.z;
c.z = a.x * b.y - a.y * b.x;
return c;
}
static __host__ __device__ __forceinline__ bool
is_valid(const AcReal a)
{
return !isnan(a) && !isinf(a);
}
static __host__ __device__ __forceinline__ bool
is_valid(const AcReal3& a)
{
return is_valid(a.x) && is_valid(a.y) && is_valid(a.z);
}
/*
* =============================================================================
* Level 1 (Stencil Processing Stage)
* =============================================================================
*/
/*
* =============================================================================
* Level 1.1 (Terms)
* =============================================================================
*/
static __device__ __forceinline__ AcReal
laplace(const AcRealData& data)
{
return hessian(data).row[0].x + hessian(data).row[1].y + hessian(data).row[2].z;
}
static __device__ __forceinline__ AcReal
divergence(const AcReal3Data& vec)
{
return gradient(vec.x).x + gradient(vec.y).y + gradient(vec.z).z;
}
static __device__ __forceinline__ AcReal3
laplace_vec(const AcReal3Data& vec)
{
return (AcReal3){laplace(vec.x), laplace(vec.y), laplace(vec.z)};
}
static __device__ __forceinline__ AcReal3
curl(const AcReal3Data& vec)
{
return (AcReal3){gradient(vec.z).y - gradient(vec.y).z,
gradient(vec.x).z - gradient(vec.z).x,
gradient(vec.y).x - gradient(vec.x).y};
}
static __device__ __forceinline__ AcReal3
gradient_of_divergence(const AcReal3Data& vec)
{
return (AcReal3){hessian(vec.x).row[0].x + hessian(vec.y).row[0].y + hessian(vec.z).row[0].z,
hessian(vec.x).row[1].x + hessian(vec.y).row[1].y + hessian(vec.z).row[1].z,
hessian(vec.x).row[2].x + hessian(vec.y).row[2].y + hessian(vec.z).row[2].z};
}
// Takes uu gradients and returns S
static __device__ __forceinline__ AcMatrix
stress_tensor(const AcReal3Data& vec)
{
AcMatrix S;
S.row[0].x = AcReal(2. / 3.) * gradient(vec.x).x -
AcReal(1. / 3.) * (gradient(vec.y).y + gradient(vec.z).z);
S.row[0].y = AcReal(1. / 2.) * (gradient(vec.x).y + gradient(vec.y).x);
S.row[0].z = AcReal(1. / 2.) * (gradient(vec.x).z + gradient(vec.z).x);
S.row[1].y = AcReal(2. / 3.) * gradient(vec.y).y -
AcReal(1. / 3.) * (gradient(vec.x).x + gradient(vec.z).z);
S.row[1].z = AcReal(1. / 2.) * (gradient(vec.y).z + gradient(vec.z).y);
S.row[2].z = AcReal(2. / 3.) * gradient(vec.z).z -
AcReal(1. / 3.) * (gradient(vec.x).x + gradient(vec.y).y);
S.row[1].x = S.row[0].y;
S.row[2].x = S.row[0].z;
S.row[2].y = S.row[1].z;
return S;
}
static __device__ __forceinline__ AcReal
contract(const AcMatrix& mat)
{
AcReal res = 0;
#pragma unroll
for (int i = 0; i < 3; ++i)
res += dot(mat.row[i], mat.row[i]);
return res;
}
/*
* =============================================================================
* Level 1.2 (Equations)
* =============================================================================
*/
static __device__ __forceinline__ AcReal
length(const AcReal3& vec)
{
return sqrt(vec.x * vec.x + vec.y * vec.y + vec.z * vec.z);
}
static __device__ __forceinline__ AcReal
reciprocal_len(const AcReal3& vec)
{
return rsqrt(vec.x * vec.x + vec.y * vec.y + vec.z * vec.z);
}
static __device__ __forceinline__ AcReal3
normalized(const AcReal3& vec)
{
const AcReal inv_len = reciprocal_len(vec);
return inv_len * vec;
}
// Sinusoidal forcing
// https://arxiv.org/pdf/1704.04676.pdf
__constant__ AcReal3 forcing_vec;
__constant__ AcReal forcing_phi;
static __device__ __forceinline__ AcReal3
forcing(const int i, const int j, const int k)
{
#define DOMAIN_SIZE_X (DCONST_INT(AC_nx) * DCONST_REAL(AC_dsx))
#define DOMAIN_SIZE_Y (DCONST_INT(AC_ny) * DCONST_REAL(AC_dsy))
#define DOMAIN_SIZE_Z (DCONST_INT(AC_nz) * DCONST_REAL(AC_dsz))
const AcReal3 k_vec = (AcReal3){(i - DCONST_INT(AC_nx_min)) * DCONST_REAL(AC_dsx) - AcReal(.5) * DOMAIN_SIZE_X,
(j - DCONST_INT(AC_ny_min)) * DCONST_REAL(AC_dsy) - AcReal(.5) * DOMAIN_SIZE_Y,
(k - DCONST_INT(AC_nz_min)) * DCONST_REAL(AC_dsz) - AcReal(.5) * DOMAIN_SIZE_Z};
AcReal inv_len = reciprocal_len(k_vec);
if (isnan(inv_len) || isinf(inv_len))
inv_len = 0;
if (inv_len > 2) // hack to make it cool
inv_len = 2;
const AcReal k_dot_x = dot(k_vec, forcing_vec);
const AcReal waves = cos(k_dot_x)*cos(forcing_phi) - sin(k_dot_x) * sin(forcing_phi);
return inv_len * inv_len * waves * forcing_vec;
}
// Note: LNT0 and LNRHO0 must be set very carefully: if the magnitude is different that other values in the mesh, then we will inherently lose precision
#define LNT0 (AcReal(0.0))
#define LNRHO0 (AcReal(0.0))
#define H_CONST (AcReal(0.0))
#define C_CONST (AcReal(0.0))
template <int step_number>
static __device__ __forceinline__ AcReal
rk3_integrate(const AcReal state_previous, const AcReal state_current,
const AcReal rate_of_change, const AcReal dt)
{
// Williamson (1980)
const AcReal alpha[] = {0, AcReal(.0), AcReal(-5. / 9.), AcReal(-153. / 128.)};
const AcReal beta[] = {0, AcReal(1. / 3.), AcReal(15. / 16.),
AcReal(8. / 15.)};
// Note the indexing: +1 to avoid an unnecessary warning about "out-of-bounds"
// access (when accessing beta[step_number-1] even when step_number >= 1)
switch (step_number) {
case 0:
return state_current + beta[step_number + 1] * rate_of_change * dt;
case 1: // Fallthrough
case 2:
return state_current +
beta[step_number + 1] *
(alpha[step_number + 1] * (AcReal(1.) / beta[step_number]) *
(state_current - state_previous) +
rate_of_change * dt);
default:
return NAN;
}
}
/*
template <int step_number>
static __device__ __forceinline__ AcReal
rk3_integrate_scal(const AcReal state_previous, const AcReal state_current,
const AcReal rate_of_change, const AcReal dt)
{
// Williamson (1980)
const AcReal alpha[] = {AcReal(.0), AcReal(-5. / 9.), AcReal(-153. / 128.)};
const AcReal beta[] = {AcReal(1. / 3.), AcReal(15. / 16.),
AcReal(8. / 15.)};
switch (step_number) {
case 0:
return state_current + beta[step_number] * rate_of_change * dt;
case 1: // Fallthrough
case 2:
return state_current +
beta[step_number] *
(alpha[step_number] * (AcReal(1.) / beta[step_number - 1]) *
(state_current - state_previous) +
rate_of_change * dt);
default:
return NAN;
}
}
*/
template <int step_number>
static __device__ __forceinline__ AcReal3
rk3_integrate(const AcReal3 state_previous, const AcReal3 state_current,
const AcReal3 rate_of_change, const AcReal dt)
{
return (AcReal3) { rk3_integrate<step_number>(state_previous.x, state_current.x, rate_of_change.x, dt),
rk3_integrate<step_number>(state_previous.y, state_current.y, rate_of_change.y, dt),
rk3_integrate<step_number>(state_previous.z, state_current.z, rate_of_change.z, dt)};
}
#define rk3(state_previous, state_current, rate_of_change, dt)\
rk3_integrate<step_number>(state_previous, value(state_current), rate_of_change, dt)
/*
template <int step_number>
static __device__ __forceinline__ AcReal
rk3_integrate(const int idx, const AcReal out, const int handle,
const AcRealData& in_cached, const AcReal rate_of_change, const AcReal dt)
{
return rk3_integrate_scal<step_number>(out, value(in_cached), rate_of_change, dt);
}
template <int step_number>
static __device__ __forceinline__ AcReal3
rk3_integrate(const int idx, const AcReal3 out, const int3& handle,
const AcReal3Data& in_cached, const AcReal3& rate_of_change, const AcReal dt)
{
return (AcReal3) {
rk3_integrate<step_number>(idx, out, handle.x, in_cached.x, rate_of_change.x, dt),
rk3_integrate<step_number>(idx, out, handle.y, in_cached.y, rate_of_change.y, dt),
rk3_integrate<step_number>(idx, out, handle.z, in_cached.z, rate_of_change.z, dt)
};
}
#define RK3(handle, in_cached, rate_of_change, dt) \
rk3_integrate<step_number>(idx, buffer.out, handle, in_cached, rate_of_change, dt)
*/
/*
* =============================================================================
* Level 1.3 (Kernels)
* =============================================================================
*/
static __device__ void
write(AcReal* __restrict__ out[], const int handle, const int idx, const AcReal value)
{
out[handle][idx] = value;
}
static __device__ void
write(AcReal* __restrict__ out[], const int3 vec, const int idx, const AcReal3 value)
{
write(out, vec.x, idx, value.x);
write(out, vec.y, idx, value.y);
write(out, vec.z, idx, value.z);
}
static __device__ AcReal
read_out(const int idx, AcReal* __restrict__ field[], const int handle)
{
return field[handle][idx];
}
static __device__ AcReal3
read_out(const int idx, AcReal* __restrict__ field[], const int3 handle)
{
return (AcReal3) { read_out(idx, field, handle.x),
read_out(idx, field, handle.y),
read_out(idx, field, handle.z) };
}
#define WRITE_OUT(handle, value) (write(buffer.out, handle, idx, value))
#define READ(handle) (read_data(vertexIdx, buffer.in, handle))
#define READ_OUT(handle) (read_out(idx, buffer.out, handle))
// also write for clarity here also, not for the DSL
//#define WRITE(HANDLE) (write(idx, ...)) s.t. we don't have to insert boilerplat in the mid of the function
#define GEN_KERNEL_PARAM_BOILERPLATE \
const int3 start, const int3 end, VertexBufferArray buffer
#define GEN_KERNEL_BUILTIN_VARIABLES_BOILERPLATE() \
const int3 vertexIdx = (int3){threadIdx.x + blockIdx.x * blockDim.x + start.x,\
threadIdx.y + blockIdx.y * blockDim.y + start.y,\
threadIdx.z + blockIdx.z * blockDim.z + start.z};\
if (vertexIdx.x >= end.x || vertexIdx.y >= end.y || vertexIdx.z >= end.z)\
return;\
\
\
assert(vertexIdx.x < DCONST_INT(AC_nx_max) && vertexIdx.y < DCONST_INT(AC_ny_max) &&\
vertexIdx.z < DCONST_INT(AC_nz_max));\
\
assert(vertexIdx.x >= DCONST_INT(AC_nx_min) && vertexIdx.y >= DCONST_INT(AC_ny_min) &&\
vertexIdx.z >= DCONST_INT(AC_nz_min));\
\
const int idx = IDX(vertexIdx.x, vertexIdx.y, vertexIdx.z);
#include "stencil_process.cuh"
/*
* =============================================================================
* Level 2 (Host calls)
* =============================================================================
*/
static AcReal
randf(void)
{
return AcReal(rand()) / AcReal(RAND_MAX);
}
AcResult
rk3_step_async(const cudaStream_t stream, const int& step_number, const int3& start, const int3& end,
const AcReal dt, VertexBufferArray* buffer)
{
const dim3 tpb(32, 1, 4);
/////////////////// Forcing
#if LFORCING
const AcReal ff_scale = AcReal(.2);
static AcReal3 ff = ff_scale * (AcReal3){1, 0, 0};
const AcReal radians = randf() * AcReal(2*M_PI) / 360 / 8;
const AcMatrix rotz = create_rotz(radians);
ff = mul(rotz, ff);
cudaMemcpyToSymbolAsync(forcing_vec, &ff, sizeof(ff), 0, cudaMemcpyHostToDevice, stream);
const AcReal ff_phi = AcReal(M_PI);//AcReal(2 * M_PI) * randf();
cudaMemcpyToSymbolAsync(forcing_phi, &ff_phi, sizeof(ff_phi), 0, cudaMemcpyHostToDevice, stream);
#endif // LFORCING
//////////////////////////
const int nx = end.x - start.x;
const int ny = end.y - start.y;
const int nz = end.z - start.z;
const dim3 bpg(
(unsigned int)ceil(nx / AcReal(tpb.x)),
(unsigned int)ceil(ny / AcReal(tpb.y)),
(unsigned int)ceil(nz / AcReal(tpb.z)));
if (step_number == 0)
solve<0><<<bpg, tpb, 0, stream>>>(start, end, *buffer, dt);
else if (step_number == 1)
solve<1><<<bpg, tpb, 0, stream>>>(start, end, *buffer, dt);
else
solve<2><<<bpg, tpb, 0, stream>>>(start, end, *buffer, dt);
ERRCHK_CUDA_KERNEL();
return AC_SUCCESS;
}

338
src/core/kernels/reduce.cuh Normal file
View File

@@ -0,0 +1,338 @@
/*
Copyright (C) 2014-2018, Johannes Pekkilae, Miikka Vaeisalae.
This file is part of Astaroth.
Astaroth is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Astaroth is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* @file
* \brief Brief info.
*
* Detailed info.
*
*/
#pragma once
#include "device_globals.cuh"
#include "src/core/errchk.h"
#include "src/core/math_utils.h"
// Function pointer definitions
typedef AcReal (*ReduceFunc)(const AcReal&, const AcReal&);
typedef AcReal (*ReduceInitialScalFunc)(const AcReal&);
typedef AcReal (*ReduceInitialVecFunc)(const AcReal&, const AcReal&,
const AcReal&);
// clang-format off
/* Comparison funcs */
__device__ inline AcReal
_device_max(const AcReal& a, const AcReal& b) { return a > b ? a : b; }
__device__ inline AcReal
_device_min(const AcReal& a, const AcReal& b) { return a < b ? a : b; }
__device__ inline AcReal
_device_sum(const AcReal& a, const AcReal& b) { return a + b; }
/* Function used to determine the values used during reduction */
__device__ inline AcReal
_device_length_scal(const AcReal& a) { return AcReal(a); }
__device__ inline AcReal
_device_squared_scal(const AcReal& a) { return (AcReal)(a*a); }
__device__ inline AcReal
_device_exp_squared_scal(const AcReal& a) { return exp(a)*exp(a); }
__device__ inline AcReal
_device_length_vec(const AcReal& a, const AcReal& b, const AcReal& c) { return sqrt(a*a + b*b + c*c); }
__device__ inline AcReal
_device_squared_vec(const AcReal& a, const AcReal& b, const AcReal& c) { return _device_squared_scal(a) + _device_squared_scal(b) + _device_squared_scal(c); }
__device__ inline AcReal
_device_exp_squared_vec(const AcReal& a, const AcReal& b, const AcReal& c) { return _device_exp_squared_scal(a) + _device_exp_squared_scal(b) + _device_exp_squared_scal(c); }
// clang-format on
__device__ inline bool
oob(const int& i, const int& j, const int& k)
{
if (i >= d_mesh_info.int_params[AC_nx] ||
j >= d_mesh_info.int_params[AC_ny] ||
k >= d_mesh_info.int_params[AC_nz])
return true;
else
return false;
}
template <ReduceInitialScalFunc reduce_initial>
__global__ void
_kernel_reduce_scal(const __restrict__ AcReal* src, AcReal* dst)
{
const int i = threadIdx.x + blockIdx.x * blockDim.x;
const int j = threadIdx.y + blockIdx.y * blockDim.y;
const int k = threadIdx.z + blockIdx.z * blockDim.z;
if (oob(i, j, k))
return;
const int src_idx = DEVICE_VTXBUF_IDX(
i + d_mesh_info.int_params[AC_nx_min],
j + d_mesh_info.int_params[AC_ny_min],
k + d_mesh_info.int_params[AC_nz_min]);
const int dst_idx = DEVICE_1D_COMPDOMAIN_IDX(i, j, k);
dst[dst_idx] = reduce_initial(src[src_idx]);
}
template <ReduceInitialVecFunc reduce_initial>
__global__ void
_kernel_reduce_vec(const __restrict__ AcReal* src_a,
const __restrict__ AcReal* src_b,
const __restrict__ AcReal* src_c, AcReal* dst)
{
const int i = threadIdx.x + blockIdx.x * blockDim.x;
const int j = threadIdx.y + blockIdx.y * blockDim.y;
const int k = threadIdx.z + blockIdx.z * blockDim.z;
if (oob(i, j, k))
return;
const int src_idx = DEVICE_VTXBUF_IDX(
i + d_mesh_info.int_params[AC_nx_min],
j + d_mesh_info.int_params[AC_ny_min],
k + d_mesh_info.int_params[AC_nz_min]);
const int dst_idx = DEVICE_1D_COMPDOMAIN_IDX(i, j, k);
dst[dst_idx] = reduce_initial(src_a[src_idx], src_b[src_idx],
src_c[src_idx]);
}
///////////////////////////////////////////////////////////////////////////////
#define BLOCK_SIZE (1024)
#define ELEMS_PER_THREAD (32)
template <ReduceFunc reduce>
__global__ void
_kernel_reduce(AcReal* src, AcReal* result)
{
const int idx = threadIdx.x + blockIdx.x * BLOCK_SIZE * ELEMS_PER_THREAD;
const int scratchpad_size = DCONST_INT(AC_nxyz);
if (idx >= scratchpad_size)
return;
__shared__ AcReal smem[BLOCK_SIZE];
AcReal tmp = src[idx];
for (int i = 1; i < ELEMS_PER_THREAD; ++i) {
const int src_idx = idx + i * BLOCK_SIZE;
if (src_idx >= scratchpad_size) {
// This check is for safety: if accessing uninitialized values
// beyond the mesh boundaries, we will immediately start seeing NANs
if (threadIdx.x < BLOCK_SIZE)
smem[threadIdx.x] = NAN;
else
break;
}
tmp = reduce(tmp, src[src_idx]);
}
smem[threadIdx.x] = tmp;
__syncthreads();
int offset = BLOCK_SIZE / 2;
while (offset > 0) {
if (threadIdx.x < offset) {
tmp = reduce(tmp, smem[threadIdx.x + offset]);
smem[threadIdx.x] = tmp;
}
offset /= 2;
__syncthreads();
}
if (threadIdx.x == 0)
src[idx] = tmp;
}
template <ReduceFunc reduce>
__global__ void
_kernel_reduce_block(const __restrict__ AcReal* src, AcReal* result)
{
const int scratchpad_size = DCONST_INT(AC_nxyz);
const int idx = threadIdx.x + blockIdx.x * BLOCK_SIZE * ELEMS_PER_THREAD;
AcReal tmp = src[idx];
const int block_offset = BLOCK_SIZE * ELEMS_PER_THREAD;
for (int i = 1; idx + i * block_offset < scratchpad_size; ++i)
tmp = reduce(tmp, src[idx + i * block_offset]);
*result = tmp;
}
//////////////////////////////////////////////////////////////////////////////
AcReal
_reduce_scal(const cudaStream_t stream,
const ReductionType& rtype, const int& nx, const int& ny,
const int& nz, const AcReal* vertex_buffer,
AcReal* reduce_scratchpad, AcReal* reduce_result)
{
bool solve_mean = false;
const dim3 tpb(32, 4, 1);
const dim3 bpg(int(ceil(AcReal(nx) / tpb.x)), int(ceil(AcReal(ny) / tpb.y)),
int(ceil(AcReal(nz) / tpb.z)));
const int scratchpad_size = nx * ny * nz;
const int bpg2 = (unsigned int)ceil(AcReal(scratchpad_size) /
AcReal(ELEMS_PER_THREAD * BLOCK_SIZE));
switch (rtype) {
case RTYPE_MAX:
_kernel_reduce_scal<_device_length_scal>
<<<bpg, tpb, 0, stream>>>(vertex_buffer, reduce_scratchpad);
_kernel_reduce<_device_max>
<<<bpg2, BLOCK_SIZE, 0, stream>>>(reduce_scratchpad, reduce_result);
_kernel_reduce_block<_device_max>
<<<1, 1, 0, stream>>>(reduce_scratchpad, reduce_result);
break;
case RTYPE_MIN:
_kernel_reduce_scal<_device_length_scal>
<<<bpg, tpb, 0, stream>>>(vertex_buffer, reduce_scratchpad);
_kernel_reduce<_device_min>
<<<bpg2, BLOCK_SIZE, 0, stream>>>(reduce_scratchpad, reduce_result);
_kernel_reduce_block<_device_min>
<<<1, 1, 0, stream>>>(reduce_scratchpad, reduce_result);
break;
case RTYPE_RMS:
_kernel_reduce_scal<_device_squared_scal>
<<<bpg, tpb, 0, stream>>>(vertex_buffer, reduce_scratchpad);
_kernel_reduce<_device_sum>
<<<bpg2, BLOCK_SIZE, 0, stream>>>(reduce_scratchpad, reduce_result);
_kernel_reduce_block<_device_sum>
<<<1, 1, 0, stream>>>(reduce_scratchpad, reduce_result);
solve_mean = true;
break;
case RTYPE_RMS_EXP:
_kernel_reduce_scal<_device_exp_squared_scal>
<<<bpg, tpb, 0, stream>>>(vertex_buffer, reduce_scratchpad);
_kernel_reduce<_device_sum>
<<<bpg2, BLOCK_SIZE, 0, stream>>>(reduce_scratchpad, reduce_result);
_kernel_reduce_block<_device_sum>
<<<1, 1, 0, stream>>>(reduce_scratchpad, reduce_result);
solve_mean = true;
break;
default:
ERROR("Unrecognized RTYPE");
}
AcReal result;
cudaMemcpy(&result, reduce_result, sizeof(AcReal), cudaMemcpyDeviceToHost);
if (solve_mean) {
const AcReal inv_n = AcReal(1.0) / (nx * ny * nz);
return inv_n * result;
}
else {
return result;
}
}
AcReal
_reduce_vec(const cudaStream_t stream,
const ReductionType& rtype, const int& nx, const int& ny,
const int& nz, const AcReal* vertex_buffer_a,
const AcReal* vertex_buffer_b, const AcReal* vertex_buffer_c,
AcReal* reduce_scratchpad, AcReal* reduce_result)
{
bool solve_mean = false;
const dim3 tpb(32, 4, 1);
const dim3 bpg(int(ceil(float(nx) / tpb.x)),
int(ceil(float(ny) / tpb.y)),
int(ceil(float(nz) / tpb.z)));
const int scratchpad_size = nx * ny * nz;
const int bpg2 = (unsigned int)ceil(float(scratchpad_size) /
float(ELEMS_PER_THREAD * BLOCK_SIZE));
// "Features" of this quick & efficient reduction:
// Block size must be smaller than the computational domain size
// (otherwise we would have do some additional bounds checking in the
// second half of _kernel_reduce, which gets quite confusing)
// Also the BLOCK_SIZE must be a multiple of two s.t. we can easily split
// the work without worrying too much about the array bounds.
ERRCHK(BLOCK_SIZE <= scratchpad_size);
ERRCHK(!(BLOCK_SIZE % 2));
// NOTE! Also does not work properly with non-power of two mesh dimension
// Issue is with "smem[BLOCK_SIZE];". If you init smem to NANs, you can
// see that uninitialized smem values are used in the comparison
ERRCHK(is_power_of_two(nx));
ERRCHK(is_power_of_two(ny));
ERRCHK(is_power_of_two(nz));
switch (rtype) {
case RTYPE_MAX:
_kernel_reduce_vec<_device_length_vec>
<<<bpg, tpb, 0, stream>>>(vertex_buffer_a, vertex_buffer_b, vertex_buffer_c,
reduce_scratchpad);
_kernel_reduce<_device_max>
<<<bpg2, BLOCK_SIZE, 0, stream>>>(reduce_scratchpad, reduce_result);
_kernel_reduce_block<_device_max>
<<<1, 1, 0, stream>>>(reduce_scratchpad, reduce_result);
break;
case RTYPE_MIN:
_kernel_reduce_vec<_device_length_vec>
<<<bpg, tpb, 0, stream>>>(vertex_buffer_a, vertex_buffer_b, vertex_buffer_c,
reduce_scratchpad);
_kernel_reduce<_device_min>
<<<bpg2, BLOCK_SIZE, 0, stream>>>(reduce_scratchpad, reduce_result);
_kernel_reduce_block<_device_min>
<<<1, 1, 0, stream>>>(reduce_scratchpad, reduce_result);
break;
case RTYPE_RMS:
_kernel_reduce_vec<_device_squared_vec>
<<<bpg, tpb, 0, stream>>>(vertex_buffer_a, vertex_buffer_b, vertex_buffer_c,
reduce_scratchpad);
_kernel_reduce<_device_sum>
<<<bpg2, BLOCK_SIZE, 0, stream>>>(reduce_scratchpad, reduce_result);
_kernel_reduce_block<_device_sum>
<<<1, 1, 0, stream>>>(reduce_scratchpad, reduce_result);
solve_mean = true;
break;
case RTYPE_RMS_EXP:
_kernel_reduce_vec<_device_exp_squared_vec>
<<<bpg, tpb, 0, stream>>>(vertex_buffer_a, vertex_buffer_b, vertex_buffer_c,
reduce_scratchpad);
_kernel_reduce<_device_sum>
<<<bpg2, BLOCK_SIZE, 0, stream>>>(reduce_scratchpad, reduce_result);
_kernel_reduce_block<_device_sum>
<<<1, 1, 0, stream>>>(reduce_scratchpad, reduce_result);
solve_mean = true;
break;
default:
ERROR("Unrecognized RTYPE");
}
AcReal result;
cudaMemcpy(&result, reduce_result, sizeof(AcReal), cudaMemcpyDeviceToHost);
if (solve_mean) {
const AcReal inv_n = AcReal(1.0) / (nx * ny * nz);
return inv_n * result;
}
else {
return result;
}
}

742
src/core/kernels/rk3.cuh Normal file
View File

@@ -0,0 +1,742 @@
/*
Copyright (C) 2014-2018, Johannes Pekkilae, Miikka Vaeisalae.
This file is part of Astaroth.
Astaroth is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Astaroth is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* @file
* \brief Implementation of the integration pipeline
*
*
*
*/
#pragma once
#include "device_globals.cuh"
#include <assert.h>
/*
#define RK_THREADS_X (32)
#define RK_THREADS_Y (1)
#define RK_THREADS_Z (4)
#define RK_LAUNCH_BOUND_MIN_BLOCKS (4)
#define RK_THREADBLOCK_SIZE (RK_THREADS_X * RK_THREADS_Y * RK_THREADS_Z)
*/
static __device__ __forceinline__ int
IDX(const int i)
{
return i;
}
static __device__ __forceinline__ int
IDX(const int i, const int j, const int k)
{
return DEVICE_VTXBUF_IDX(i, j, k);
}
static __device__ __forceinline__ int
IDX(const int3 idx)
{
return DEVICE_VTXBUF_IDX(idx.x, idx.y, idx.z);
}
static __forceinline__ AcMatrix
create_rotz(const AcReal radians)
{
AcMatrix mat;
mat.row[0] = (AcReal3){cos(radians), -sin(radians), 0};
mat.row[1] = (AcReal3){sin(radians), cos(radians), 0};
mat.row[2] = (AcReal3){0, 0, 0};
return mat;
}
#if AC_DOUBLE_PRECISION == 0
#define sin __sinf
#define cos __cosf
#define exp __expf
#define rsqrt rsqrtf // hardware reciprocal sqrt
#endif // AC_DOUBLE_PRECISION == 0
/*
typedef struct {
int i, j, k;
} int3;*/
/*
* =============================================================================
* Level 0 (Input Assembly Stage)
* =============================================================================
*/
/*
* =============================================================================
* Level 0.1 (Read stencil elements and solve derivatives)
* =============================================================================
*/
static __device__ __forceinline__ AcReal
first_derivative(const AcReal* __restrict__ pencil, const AcReal inv_ds)
{
#if STENCIL_ORDER == 2
const AcReal coefficients[] = {0, 1.0 / 2.0};
#elif STENCIL_ORDER == 4
const AcReal coefficients[] = {0, 2.0 / 3.0, -1.0 / 12.0};
#elif STENCIL_ORDER == 6
const AcReal coefficients[] = {0, 3.0 / 4.0, -3.0 / 20.0, 1.0 / 60.0};
#elif STENCIL_ORDER == 8
const AcReal coefficients[] = {0, 4.0 / 5.0, -1.0 / 5.0, 4.0 / 105.0,
-1.0 / 280.0};
#endif
#define MID (STENCIL_ORDER / 2)
AcReal res = 0;
#pragma unroll
for (int i = 1; i <= MID; ++i)
res += coefficients[i] * (pencil[MID + i] - pencil[MID - i]);
return res * inv_ds;
}
static __device__ __forceinline__ AcReal
second_derivative(const AcReal* __restrict__ pencil, const AcReal inv_ds)
{
#if STENCIL_ORDER == 2
const AcReal coefficients[] = {-2., 1.};
#elif STENCIL_ORDER == 4
const AcReal coefficients[] = {-5.0/2.0, 4.0/3.0, -1.0/12.0};
#elif STENCIL_ORDER == 6
const AcReal coefficients[] = {-49.0 / 18.0, 3.0 / 2.0, -3.0 / 20.0,
1.0 / 90.0};
#elif STENCIL_ORDER == 8
const AcReal coefficients[] = {-205.0 / 72.0, 8.0 / 5.0, -1.0 / 5.0,
8.0 / 315.0, -1.0 / 560.0};
#endif
#define MID (STENCIL_ORDER / 2)
AcReal res = coefficients[0] * pencil[MID];
#pragma unroll
for (int i = 1; i <= MID; ++i)
res += coefficients[i] * (pencil[MID + i] + pencil[MID - i]);
return res * inv_ds * inv_ds;
}
/** inv_ds: inverted mesh spacing f.ex. 1. / mesh.int_params[AC_dsx] */
static __device__ __forceinline__ AcReal
cross_derivative(const AcReal* __restrict__ pencil_a,
const AcReal* __restrict__ pencil_b, const AcReal inv_ds_a,
const AcReal inv_ds_b)
{
#if STENCIL_ORDER == 2
const AcReal coefficients[] = {0, 1.0 / 4.0};
#elif STENCIL_ORDER == 4
const AcReal coefficients[] = {0, 1.0 / 32.0, 1.0 / 64.0}; // TODO correct coefficients, these are just placeholders
#elif STENCIL_ORDER == 6
const AcReal fac = (1. / 720.);
const AcReal coefficients[] = {0.0 * fac, 270.0 * fac, -27.0 * fac,
2.0 * fac};
#elif STENCIL_ORDER == 8
const AcReal fac = (1. / 20160.);
const AcReal coefficients[] = {0.0 * fac, 8064. * fac, -1008. * fac,
128. * fac, -9. * fac};
#endif
#define MID (STENCIL_ORDER / 2)
AcReal res = AcReal(0.);
#pragma unroll
for (int i = 1; i <= MID; ++i) {
res += coefficients[i] * (pencil_a[MID + i] + pencil_a[MID - i] -
pencil_b[MID + i] - pencil_b[MID - i]);
}
return res * inv_ds_a * inv_ds_b;
}
static __device__ __forceinline__ AcReal
derx(const int3 vertexIdx, const AcReal* __restrict__ arr)
{
AcReal pencil[STENCIL_ORDER + 1];
#pragma unroll
for (int offset = 0; offset < STENCIL_ORDER + 1; ++offset)
pencil[offset] = arr[IDX(vertexIdx.x + offset - STENCIL_ORDER / 2, vertexIdx.y, vertexIdx.z)];
return first_derivative(pencil, DCONST_REAL(AC_inv_dsx));
}
static __device__ __forceinline__ AcReal
derxx(const int3 vertexIdx, const AcReal* __restrict__ arr)
{
AcReal pencil[STENCIL_ORDER + 1];
#pragma unroll
for (int offset = 0; offset < STENCIL_ORDER + 1; ++offset)
pencil[offset] = arr[IDX(vertexIdx.x + offset - STENCIL_ORDER / 2, vertexIdx.y, vertexIdx.z)];
return second_derivative(pencil, DCONST_REAL(AC_inv_dsx));
}
static __device__ __forceinline__ AcReal
derxy(const int3 vertexIdx, const AcReal* __restrict__ arr)
{
AcReal pencil_a[STENCIL_ORDER + 1];
#pragma unroll
for (int offset = 0; offset < STENCIL_ORDER + 1; ++offset)
pencil_a[offset] = arr[IDX(vertexIdx.x + offset - STENCIL_ORDER / 2,
vertexIdx.y + offset - STENCIL_ORDER / 2, vertexIdx.z)];
AcReal pencil_b[STENCIL_ORDER + 1];
#pragma unroll
for (int offset = 0; offset < STENCIL_ORDER + 1; ++offset)
pencil_b[offset] = arr[IDX(vertexIdx.x + offset - STENCIL_ORDER / 2,
vertexIdx.y + STENCIL_ORDER / 2 - offset, vertexIdx.z)];
return cross_derivative(pencil_a, pencil_b, DCONST_REAL(AC_inv_dsx),
DCONST_REAL(AC_inv_dsy));
}
static __device__ __forceinline__ AcReal
derxz(const int3 vertexIdx, const AcReal* __restrict__ arr)
{
AcReal pencil_a[STENCIL_ORDER + 1];
#pragma unroll
for (int offset = 0; offset < STENCIL_ORDER + 1; ++offset)
pencil_a[offset] = arr[IDX(vertexIdx.x + offset - STENCIL_ORDER / 2, vertexIdx.y,
vertexIdx.z + offset - STENCIL_ORDER / 2)];
AcReal pencil_b[STENCIL_ORDER + 1];
#pragma unroll
for (int offset = 0; offset < STENCIL_ORDER + 1; ++offset)
pencil_b[offset] = arr[IDX(vertexIdx.x + offset - STENCIL_ORDER / 2, vertexIdx.y,
vertexIdx.z + STENCIL_ORDER / 2 - offset)];
return cross_derivative(pencil_a, pencil_b, DCONST_REAL(AC_inv_dsx),
DCONST_REAL(AC_inv_dsz));
}
static __device__ __forceinline__ AcReal
dery(const int3 vertexIdx, const AcReal* __restrict__ arr)
{
AcReal pencil[STENCIL_ORDER + 1];
#pragma unroll
for (int offset = 0; offset < STENCIL_ORDER + 1; ++offset)
pencil[offset] = arr[IDX(vertexIdx.x, vertexIdx.y + offset - STENCIL_ORDER / 2, vertexIdx.z)];
return first_derivative(pencil, DCONST_REAL(AC_inv_dsy));
}
static __device__ __forceinline__ AcReal
deryy(const int3 vertexIdx, const AcReal* __restrict__ arr)
{
AcReal pencil[STENCIL_ORDER + 1];
#pragma unroll
for (int offset = 0; offset < STENCIL_ORDER + 1; ++offset)
pencil[offset] = arr[IDX(vertexIdx.x, vertexIdx.y + offset - STENCIL_ORDER / 2, vertexIdx.z)];
return second_derivative(pencil, DCONST_REAL(AC_inv_dsy));
}
static __device__ __forceinline__ AcReal
deryz(const int3 vertexIdx, const AcReal* __restrict__ arr)
{
AcReal pencil_a[STENCIL_ORDER + 1];
#pragma unroll
for (int offset = 0; offset < STENCIL_ORDER + 1; ++offset)
pencil_a[offset] = arr[IDX(vertexIdx.x, vertexIdx.y + offset - STENCIL_ORDER / 2,
vertexIdx.z + offset - STENCIL_ORDER / 2)];
AcReal pencil_b[STENCIL_ORDER + 1];
#pragma unroll
for (int offset = 0; offset < STENCIL_ORDER + 1; ++offset)
pencil_b[offset] = arr[IDX(vertexIdx.x, vertexIdx.y + offset - STENCIL_ORDER / 2,
vertexIdx.z + STENCIL_ORDER / 2 - offset)];
return cross_derivative(pencil_a, pencil_b, DCONST_REAL(AC_inv_dsy),
DCONST_REAL(AC_inv_dsz));
}
static __device__ __forceinline__ AcReal
derz(const int3 vertexIdx, const AcReal* __restrict__ arr)
{
AcReal pencil[STENCIL_ORDER + 1];
#pragma unroll
for (int offset = 0; offset < STENCIL_ORDER + 1; ++offset)
pencil[offset] = arr[IDX(vertexIdx.x, vertexIdx.y, vertexIdx.z + offset - STENCIL_ORDER / 2)];
return first_derivative(pencil, DCONST_REAL(AC_inv_dsz));
}
static __device__ __forceinline__ AcReal
derzz(const int3 vertexIdx, const AcReal* __restrict__ arr)
{
AcReal pencil[STENCIL_ORDER + 1];
#pragma unroll
for (int offset = 0; offset < STENCIL_ORDER + 1; ++offset)
pencil[offset] = arr[IDX(vertexIdx.x, vertexIdx.y, vertexIdx.z + offset - STENCIL_ORDER / 2)];
return second_derivative(pencil, DCONST_REAL(AC_inv_dsz));
}
/*
* =============================================================================
* Level 0.2 (Caching functions)
* =============================================================================
*/
#include "stencil_assembly.cuh"
/*
typedef struct {
AcRealData x;
AcRealData y;
AcRealData z;
} AcReal3Data;
static __device__ __forceinline__ AcReal3Data
read_data(const int i, const int j, const int k,
AcReal* __restrict__ buf[], const int3& handle)
{
AcReal3Data data;
data.x = read_data(i, j, k, buf, handle.x);
data.y = read_data(i, j, k, buf, handle.y);
data.z = read_data(i, j, k, buf, handle.z);
return data;
}
*/
/*
* =============================================================================
* Level 0.3 (Built-in functions available during the Stencil Processing Stage)
* =============================================================================
*/
static __host__ __device__ __forceinline__ AcReal3
operator-(const AcReal3& a, const AcReal3& b)
{
return (AcReal3){a.x - b.x, a.y - b.y, a.z - b.z};
}
static __host__ __device__ __forceinline__ AcReal3
operator+(const AcReal3& a, const AcReal3& b)
{
return (AcReal3){a.x + b.x, a.y + b.y, a.z + b.z};
}
static __host__ __device__ __forceinline__ AcReal3
operator-(const AcReal3& a)
{
return (AcReal3){-a.x, -a.y, -a.z};
}
static __host__ __device__ __forceinline__ AcReal3
operator*(const AcReal a, const AcReal3& b)
{
return (AcReal3){a * b.x, a * b.y, a * b.z};
}
static __host__ __device__ __forceinline__ AcReal
dot(const AcReal3& a, const AcReal3& b)
{
return a.x * b.x + a.y * b.y + a.z * b.z;
}
static __host__ __device__ __forceinline__ AcReal3
mul(const AcMatrix& aa, const AcReal3& x)
{
return (AcReal3){dot(aa.row[0], x), dot(aa.row[1], x), dot(aa.row[2], x)};
}
static __host__ __device__ __forceinline__ AcReal3
cross(const AcReal3& a, const AcReal3& b)
{
AcReal3 c;
c.x = a.y * b.z - a.z * b.y;
c.y = a.z * b.x - a.x * b.z;
c.z = a.x * b.y - a.y * b.x;
return c;
}
static __host__ __device__ __forceinline__ bool
is_valid(const AcReal a)
{
return !isnan(a) && !isinf(a);
}
static __host__ __device__ __forceinline__ bool
is_valid(const AcReal3& a)
{
return is_valid(a.x) && is_valid(a.y) && is_valid(a.z);
}
/*
* =============================================================================
* Level 1 (Stencil Processing Stage)
* =============================================================================
*/
/*
* =============================================================================
* Level 1.1 (Terms)
* =============================================================================
*/
static __device__ __forceinline__ AcReal
laplace(const AcRealData& data)
{
return hessian(data).row[0].x + hessian(data).row[1].y + hessian(data).row[2].z;
}
static __device__ __forceinline__ AcReal
divergence(const AcReal3Data& vec)
{
return gradient(vec.x).x + gradient(vec.y).y + gradient(vec.z).z;
}
static __device__ __forceinline__ AcReal3
laplace_vec(const AcReal3Data& vec)
{
return (AcReal3){laplace(vec.x), laplace(vec.y), laplace(vec.z)};
}
static __device__ __forceinline__ AcReal3
curl(const AcReal3Data& vec)
{
return (AcReal3){gradient(vec.z).y - gradient(vec.y).z,
gradient(vec.x).z - gradient(vec.z).x,
gradient(vec.y).x - gradient(vec.x).y};
}
static __device__ __forceinline__ AcReal3
gradient_of_divergence(const AcReal3Data& vec)
{
return (AcReal3){hessian(vec.x).row[0].x + hessian(vec.y).row[0].y + hessian(vec.z).row[0].z,
hessian(vec.x).row[1].x + hessian(vec.y).row[1].y + hessian(vec.z).row[1].z,
hessian(vec.x).row[2].x + hessian(vec.y).row[2].y + hessian(vec.z).row[2].z};
}
// Takes uu gradients and returns S
static __device__ __forceinline__ AcMatrix
stress_tensor(const AcReal3Data& vec)
{
AcMatrix S;
S.row[0].x = AcReal(2. / 3.) * gradient(vec.x).x -
AcReal(1. / 3.) * (gradient(vec.y).y + gradient(vec.z).z);
S.row[0].y = AcReal(1. / 2.) * (gradient(vec.x).y + gradient(vec.y).x);
S.row[0].z = AcReal(1. / 2.) * (gradient(vec.x).z + gradient(vec.z).x);
S.row[1].y = AcReal(2. / 3.) * gradient(vec.y).y -
AcReal(1. / 3.) * (gradient(vec.x).x + gradient(vec.z).z);
S.row[1].z = AcReal(1. / 2.) * (gradient(vec.y).z + gradient(vec.z).y);
S.row[2].z = AcReal(2. / 3.) * gradient(vec.z).z -
AcReal(1. / 3.) * (gradient(vec.x).x + gradient(vec.y).y);
S.row[1].x = S.row[0].y;
S.row[2].x = S.row[0].z;
S.row[2].y = S.row[1].z;
return S;
}
static __device__ __forceinline__ AcReal
contract(const AcMatrix& mat)
{
AcReal res = 0;
#pragma unroll
for (int i = 0; i < 3; ++i)
res += dot(mat.row[i], mat.row[i]);
return res;
}
/*
* =============================================================================
* Level 1.2 (Equations)
* =============================================================================
*/
static __device__ __forceinline__ AcReal
length(const AcReal3& vec)
{
return sqrt(vec.x * vec.x + vec.y * vec.y + vec.z * vec.z);
}
static __device__ __forceinline__ AcReal
reciprocal_len(const AcReal3& vec)
{
return rsqrt(vec.x * vec.x + vec.y * vec.y + vec.z * vec.z);
}
static __device__ __forceinline__ AcReal3
normalized(const AcReal3& vec)
{
const AcReal inv_len = reciprocal_len(vec);
return inv_len * vec;
}
// Sinusoidal forcing
// https://arxiv.org/pdf/1704.04676.pdf
__constant__ AcReal3 forcing_vec;
__constant__ AcReal forcing_phi;
static __device__ __forceinline__ AcReal3
forcing(const int i, const int j, const int k)
{
#define DOMAIN_SIZE_X (DCONST_INT(AC_nx) * DCONST_REAL(AC_dsx))
#define DOMAIN_SIZE_Y (DCONST_INT(AC_ny) * DCONST_REAL(AC_dsy))
#define DOMAIN_SIZE_Z (DCONST_INT(AC_nz) * DCONST_REAL(AC_dsz))
const AcReal3 k_vec = (AcReal3){(i - DCONST_INT(AC_nx_min)) * DCONST_REAL(AC_dsx) - AcReal(.5) * DOMAIN_SIZE_X,
(j - DCONST_INT(AC_ny_min)) * DCONST_REAL(AC_dsy) - AcReal(.5) * DOMAIN_SIZE_Y,
(k - DCONST_INT(AC_nz_min)) * DCONST_REAL(AC_dsz) - AcReal(.5) * DOMAIN_SIZE_Z};
AcReal inv_len = reciprocal_len(k_vec);
if (isnan(inv_len) || isinf(inv_len))
inv_len = 0;
if (inv_len > 2) // hack to make it cool
inv_len = 2;
const AcReal k_dot_x = dot(k_vec, forcing_vec);
const AcReal waves = cos(k_dot_x)*cos(forcing_phi) - sin(k_dot_x) * sin(forcing_phi);
return inv_len * inv_len * waves * forcing_vec;
}
// Note: LNT0 and LNRHO0 must be set very carefully: if the magnitude is different that other values in the mesh, then we will inherently lose precision
#define LNT0 (AcReal(0.0))
#define LNRHO0 (AcReal(0.0))
#define H_CONST (AcReal(0.0))
#define C_CONST (AcReal(0.0))
template <int step_number>
static __device__ __forceinline__ AcReal
rk3_integrate(const AcReal state_previous, const AcReal state_current,
const AcReal rate_of_change, const AcReal dt)
{
// Williamson (1980)
const AcReal alpha[] = {0, AcReal(.0), AcReal(-5. / 9.), AcReal(-153. / 128.)};
const AcReal beta[] = {0, AcReal(1. / 3.), AcReal(15. / 16.),
AcReal(8. / 15.)};
// Note the indexing: +1 to avoid an unnecessary warning about "out-of-bounds"
// access (when accessing beta[step_number-1] even when step_number >= 1)
switch (step_number) {
case 0:
return state_current + beta[step_number + 1] * rate_of_change * dt;
case 1: // Fallthrough
case 2:
return state_current +
beta[step_number + 1] *
(alpha[step_number + 1] * (AcReal(1.) / beta[step_number]) *
(state_current - state_previous) +
rate_of_change * dt);
default:
return NAN;
}
}
/*
template <int step_number>
static __device__ __forceinline__ AcReal
rk3_integrate_scal(const AcReal state_previous, const AcReal state_current,
const AcReal rate_of_change, const AcReal dt)
{
// Williamson (1980)
const AcReal alpha[] = {AcReal(.0), AcReal(-5. / 9.), AcReal(-153. / 128.)};
const AcReal beta[] = {AcReal(1. / 3.), AcReal(15. / 16.),
AcReal(8. / 15.)};
switch (step_number) {
case 0:
return state_current + beta[step_number] * rate_of_change * dt;
case 1: // Fallthrough
case 2:
return state_current +
beta[step_number] *
(alpha[step_number] * (AcReal(1.) / beta[step_number - 1]) *
(state_current - state_previous) +
rate_of_change * dt);
default:
return NAN;
}
}
*/
template <int step_number>
static __device__ __forceinline__ AcReal3
rk3_integrate(const AcReal3 state_previous, const AcReal3 state_current,
const AcReal3 rate_of_change, const AcReal dt)
{
return (AcReal3) { rk3_integrate<step_number>(state_previous.x, state_current.x, rate_of_change.x, dt),
rk3_integrate<step_number>(state_previous.y, state_current.y, rate_of_change.y, dt),
rk3_integrate<step_number>(state_previous.z, state_current.z, rate_of_change.z, dt)};
}
#define rk3(state_previous, state_current, rate_of_change, dt)\
rk3_integrate<step_number>(state_previous, value(state_current), rate_of_change, dt)
/*
template <int step_number>
static __device__ __forceinline__ AcReal
rk3_integrate(const int idx, const AcReal out, const int handle,
const AcRealData& in_cached, const AcReal rate_of_change, const AcReal dt)
{
return rk3_integrate_scal<step_number>(out, value(in_cached), rate_of_change, dt);
}
template <int step_number>
static __device__ __forceinline__ AcReal3
rk3_integrate(const int idx, const AcReal3 out, const int3& handle,
const AcReal3Data& in_cached, const AcReal3& rate_of_change, const AcReal dt)
{
return (AcReal3) {
rk3_integrate<step_number>(idx, out, handle.x, in_cached.x, rate_of_change.x, dt),
rk3_integrate<step_number>(idx, out, handle.y, in_cached.y, rate_of_change.y, dt),
rk3_integrate<step_number>(idx, out, handle.z, in_cached.z, rate_of_change.z, dt)
};
}
#define RK3(handle, in_cached, rate_of_change, dt) \
rk3_integrate<step_number>(idx, buffer.out, handle, in_cached, rate_of_change, dt)
*/
/*
* =============================================================================
* Level 1.3 (Kernels)
* =============================================================================
*/
static __device__ void
write(AcReal* __restrict__ out[], const int handle, const int idx, const AcReal value)
{
out[handle][idx] = value;
}
static __device__ void
write(AcReal* __restrict__ out[], const int3 vec, const int idx, const AcReal3 value)
{
write(out, vec.x, idx, value.x);
write(out, vec.y, idx, value.y);
write(out, vec.z, idx, value.z);
}
static __device__ AcReal
read_out(const int idx, AcReal* __restrict__ field[], const int handle)
{
return field[handle][idx];
}
static __device__ AcReal3
read_out(const int idx, AcReal* __restrict__ field[], const int3 handle)
{
return (AcReal3) { read_out(idx, field, handle.x),
read_out(idx, field, handle.y),
read_out(idx, field, handle.z) };
}
#define WRITE_OUT(handle, value) (write(buffer.out, handle, idx, value))
#define READ(handle) (read_data(vertexIdx, buffer.in, handle))
#define READ_OUT(handle) (read_out(idx, buffer.out, handle))
// also write for clarity here also, not for the DSL
//#define WRITE(HANDLE) (write(idx, ...)) s.t. we don't have to insert boilerplat in the mid of the function
#define GEN_KERNEL_PARAM_BOILERPLATE \
const int3 start, const int3 end, VertexBufferArray buffer
#define GEN_KERNEL_BUILTIN_VARIABLES_BOILERPLATE() \
const int3 vertexIdx = (int3){threadIdx.x + blockIdx.x * blockDim.x + start.x,\
threadIdx.y + blockIdx.y * blockDim.y + start.y,\
threadIdx.z + blockIdx.z * blockDim.z + start.z};\
if (vertexIdx.x >= end.x || vertexIdx.y >= end.y || vertexIdx.z >= end.z)\
return;\
\
\
assert(vertexIdx.x < DCONST_INT(AC_nx_max) && vertexIdx.y < DCONST_INT(AC_ny_max) &&\
vertexIdx.z < DCONST_INT(AC_nz_max));\
\
assert(vertexIdx.x >= DCONST_INT(AC_nx_min) && vertexIdx.y >= DCONST_INT(AC_ny_min) &&\
vertexIdx.z >= DCONST_INT(AC_nz_min));\
\
const int idx = IDX(vertexIdx.x, vertexIdx.y, vertexIdx.z);
#include "stencil_process.cuh"
/*
* =============================================================================
* Level 2 (Host calls)
* =============================================================================
*/
static AcReal
randf(void)
{
return AcReal(rand()) / AcReal(RAND_MAX);
}
AcResult
rk3_step_async(const cudaStream_t stream, const dim3& tpb,
const int3& start, const int3& end, const int& step_number,
const AcReal dt, const AcMeshInfo& /*mesh_info*/,
VertexBufferArray* buffer)
{
/////////////////// Forcing
#if LFORCING
const AcReal ff_scale = AcReal(.2);
static AcReal3 ff = ff_scale * (AcReal3){1, 0, 0};
const AcReal radians = randf() * AcReal(2*M_PI) / 360 / 8;
const AcMatrix rotz = create_rotz(radians);
ff = mul(rotz, ff);
cudaMemcpyToSymbolAsync(forcing_vec, &ff, sizeof(ff), 0, cudaMemcpyHostToDevice, stream);
const AcReal ff_phi = AcReal(M_PI);//AcReal(2 * M_PI) * randf();
cudaMemcpyToSymbolAsync(forcing_phi, &ff_phi, sizeof(ff_phi), 0, cudaMemcpyHostToDevice, stream);
#endif // LFORCING
//////////////////////////
const int nx = end.x - start.x;
const int ny = end.y - start.y;
const int nz = end.z - start.z;
const dim3 bpg(
(unsigned int)ceil(nx / AcReal(tpb.x)),
(unsigned int)ceil(ny / AcReal(tpb.y)),
(unsigned int)ceil(nz / AcReal(tpb.z)));
if (step_number == 0)
solve<0><<<bpg, tpb, 0, stream>>>(start, end, *buffer, dt);
else if (step_number == 1)
solve<1><<<bpg, tpb, 0, stream>>>(start, end, *buffer, dt);
else
solve<2><<<bpg, tpb, 0, stream>>>(start, end, *buffer, dt);
ERRCHK_CUDA_KERNEL();
return AC_SUCCESS;
}

91
src/core/math_utils.h Normal file
View File

@@ -0,0 +1,91 @@
/*
Copyright (C) 2014-2018, Johannes Pekkilae, Miikka Vaeisalae.
This file is part of Astaroth.
Astaroth is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Astaroth is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* @file
* \brief Brief info.
*
* Detailed info.
*
*/
#pragma once
#include <math.h> // isnan, isinf
#include <stdlib.h> // rand
template <class T>
static inline const T
max(const T& a, const T& b)
{
return a > b ? a : b;
}
template <class T>
static inline const T
min(const T& a, const T& b)
{
return a < b ? a : b;
}
template <class T>
static inline const T
sum(const T& a, const T& b)
{
return a + b;
}
template <class T>
static inline const T
is_valid(const T& val)
{
if (isnan(val) || isinf(val))
return false;
else
return true;
}
template <class T>
static inline const T
clamp(const T& val, const T& min, const T& max)
{
return val < min ? min : val > max ? max : val;
}
static inline AcReal
randr()
{
return AcReal(rand()) / AcReal(RAND_MAX);
}
static inline int3
operator+(const int3& a, const int3& b)
{
return (int3){a.x + b.x, a.y + b.y, a.z + b.z};
}
static inline int3
operator-(const int3& a, const int3& b)
{
return (int3){a.x - b.x, a.y - b.y, a.z - b.z};
}
static inline bool
is_power_of_two(const unsigned val)
{
return val && !(val & (val - 1));
}

View File

@@ -0,0 +1,10 @@
################################
## CMakeLists.txt for utils ##
################################
file (GLOB SOURCES "*.cc" "model/*.cc")
add_library(astaroth_standalone STATIC ${SOURCES})
target_include_directories(astaroth_standalone PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
#target_compile_definitions(astaroth_standalone PRIVATE CONFIG_PATH=\"${CMAKE_SOURCE_DIR}/config/\")
target_compile_definitions(astaroth_standalone PRIVATE CONFIG_PATH=\"${ASTAROTH_CONF_PATH}\")

732
src/standalone/autotest.cc Normal file
View File

@@ -0,0 +1,732 @@
/*
Copyright (C) 2014-2018, Johannes Pekkilae, Miikka Vaeisalae.
This file is part of Astaroth.
Astaroth is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Astaroth is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* @file
* \brief Brief info.
*
* Detailed info.
*
*/
#include "run.h"
#include <stdio.h>
#include "config_loader.h"
#include "core/math_utils.h"
#include "model/host_memory.h"
#include "model/host_timestep.h"
#include "model/model_boundconds.h"
#include "model/model_reduce.h"
#include "model/model_rk3.h"
#include "core/errchk.h"
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
// Defines for colored output
#define RED "\x1B[31m"
#define GRN "\x1B[32m"
#define YEL "\x1B[33m"
#define BLU "\x1B[34m"
#define MAG "\x1B[35m"
#define CYN "\x1B[36m"
#define WHT "\x1B[37m"
#define RESET "\x1B[0m"
#define GEN_TEST_RESULT (1) // Generate a test file always during testing
typedef struct {
int x, y, z;
} vec3i;
typedef struct {
AcReal x, y, z;
} vec3r;
typedef struct {
ModelScalar model;
AcReal candidate;
ModelScalar error;
} ErrorInfo;
#define QUICK_TEST (0)
#define THOROUGH_TEST (1)
#define TEST_TYPE QUICK_TEST
static const InitType test_cases[] = {INIT_TYPE_RANDOM, INIT_TYPE_XWAVE, INIT_TYPE_GAUSSIAN_RADIAL_EXPL, INIT_TYPE_ABC_FLOW};
// #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
#if TEST_TYPE == QUICK_TEST // REGULAR TEST START HERE --------------------------------------------------------------------------------------------------------------
static inline ModelScalar
get_absolute_error(const ModelScalar& model, const AcReal& candidate)
{
return fabsl(candidate - model);
}
static inline ModelScalar
get_acceptable_absolute_error(const ModelScalar& range)
{
// This is the upper limit, which assumes that both the min and max values
// are used in a calculation (which inherently leads to cancellation).
//
// AFAIK if this breaks, there is definitely something wrong with the code.
// Otherwise the error is so small it's indistiguishable from inherent
// inaccuracies in floating-point arithmetic.
return range * AC_REAL_EPSILON;
}
static inline ModelScalar
get_acceptable_relative_error(void)
{
return 30; // machine epsilons
}
static inline ModelScalar
get_relative_error(const ModelScalar& model, const AcReal& candidate)
{
ModelScalar error = NAN;
#if 0
const ModelScalar abs_epsilon = get_acceptable_absolute_error(range);
if (fabsl(model) < abs_epsilon) { // Model is close to zero
/*
if (fabsl(candidate - model) <= AC_REAL_EPSILON * fabsl(candidate))
error = 0;
// Knuth section 4.2.2 pages 217-218 TODO
*/
if (fabsl(candidate) < abs_epsilon) // If candidate is close to zero
error = fabsl(candidate); // return candidate itself
else
error = INFINITY;
}
else {
error = fabsl(1.0l - candidate / model);
}
#endif
error = fabsl(1.0l - candidate / model);
// Return the relative error as multiples of the machine epsilon
// See Sect. Relative Error and Ulps in
// What Every Computer Scientist Should Know About Floating-Point Arithmetic
// By David Goldberg (1991)
return error / AC_REAL_EPSILON;
}
static bool
verify(const ModelScalar& model, const AcReal& cand, const ModelScalar& range)
{
if (!is_valid(model) || !is_valid(cand))
return false;
const ModelScalar relative_error = get_relative_error(model, cand);
if (relative_error < get_acceptable_relative_error())
return true;
const ModelScalar absolute_error = get_absolute_error(model, cand);
if (absolute_error < get_acceptable_absolute_error(range))
return true;
return false;
}
static ModelScalar
get_reduction_range(const ModelMesh& mesh)
{
ERRCHK(NUM_VTXBUF_HANDLES >= 3);
const ModelScalar max0 = model_reduce_scal(mesh, RTYPE_MAX,
VertexBufferHandle(0));
const ModelScalar max1 = model_reduce_scal(mesh, RTYPE_MAX,
VertexBufferHandle(1));
const ModelScalar max2 = model_reduce_scal(mesh, RTYPE_MAX,
VertexBufferHandle(2));
const ModelScalar max_scal = max(max0, max(max1, max2));
const ModelScalar min0 = model_reduce_scal(mesh, RTYPE_MIN,
VertexBufferHandle(0));
const ModelScalar min1 = model_reduce_scal(mesh, RTYPE_MIN,
VertexBufferHandle(1));
const ModelScalar min2 = model_reduce_scal(mesh, RTYPE_MIN,
VertexBufferHandle(2));
const ModelScalar min_scal = min(min0, min(min1, min2));
return max_scal - min_scal;
}
static void
print_debug_info(const ModelScalar& model, const AcReal& candidate,
const ModelScalar& range)
{
printf("MeshPointInfo\n");
printf("\tModel: %e\n", double(model));
printf("\tCandidate: %e\n", double(candidate));
printf("\tRange: %e\n", double(range));
printf("\tAbsolute error: %Le (max acceptable: %Le)\n",
get_absolute_error(model, candidate),
get_acceptable_absolute_error(range));
printf("\tRelative error: %Le (max acceptable: %Le)\n",
get_relative_error(model, candidate),
get_acceptable_relative_error());
printf("\tIs acceptable: %d\n", verify(model, candidate, range));
}
static void
print_result(const ModelScalar& model, const AcReal& candidate,
const ModelScalar& range, const char* name = "???")
{
const ModelScalar rel_err = get_relative_error(model, candidate);
const ModelScalar abs_err = get_absolute_error(model, candidate);
if (!verify(model, candidate, range)) {
printf("\t%-12s... ", name);
printf(RED "FAIL! " RESET);
}
else {
printf("\t%-12s... ", name);
printf(GRN "OK! " RESET);
}
printf("(relative error: %.3Lg \u03B5, absolute error: %Lg)\n", rel_err, abs_err);
/*
// DEPRECATED: TODO remove
if (rel_err < get_acceptable_relative_error())
printf("(relative error: %Lg \u03B5, max accepted %Lg)\n", rel_err,
get_acceptable_relative_error());
else
printf("(absolute error: %Lg, max accepted %Lg)\n", abs_err,
get_acceptable_absolute_error(range));
*/
}
static int
check_reductions(const AcMeshInfo& config)
{
printf("Testing reductions\n");
int num_failures = 0;
// Init CPU meshes
AcMesh* mesh = acmesh_create(config);
ModelMesh* modelmesh = modelmesh_create(config);
// Init GPU meshes
acInit(config);
for (unsigned int i = 0; i < ARRAY_SIZE(test_cases); ++i) {
const InitType itype = test_cases[i];
printf("Checking %s...\n", init_type_names[InitType(itype)]);
// Init the mesh and figure out the acceptable range for error
acmesh_init_to(InitType(itype), mesh);
acmesh_to_modelmesh(*mesh, modelmesh);
const ModelScalar range = get_reduction_range(*modelmesh);
acLoad(*mesh);
for (int rtype = 0; rtype < NUM_REDUCTION_TYPES; ++rtype) {
const VertexBufferHandle ftype = VTXBUF_UUX;
// Scal
ModelScalar model = model_reduce_scal(*modelmesh, ReductionType(rtype),
VertexBufferHandle(ftype));
AcReal candidate = acReduceScal(ReductionType(rtype),
VertexBufferHandle(ftype));
print_result(model, candidate, range, "UUX scal");
bool is_acceptable = verify(model, candidate, range);
if (!is_acceptable) {
++num_failures;
// Print debug info
printf("Scalar reduction type %d FAIL\n", rtype);
print_debug_info(model, candidate, range);
}
// Vec
model = model_reduce_vec(*modelmesh, ReductionType(rtype), VTXBUF_UUX,
VTXBUF_UUY, VTXBUF_UUZ);
candidate = acReduceVec(ReductionType(rtype), VTXBUF_UUX,
VTXBUF_UUY, VTXBUF_UUZ);
print_result(model, candidate, range, "UUXYZ vec");
is_acceptable = verify(model, candidate, range);
if (!is_acceptable) {
++num_failures;
// Print debug info
printf("Vector reduction type %d FAIL\n", rtype);
print_debug_info(model, candidate, range);
}
}
printf("Acceptable relative error: < %Lg \u03B5, absolute error < %Lg\n", get_acceptable_relative_error(), get_acceptable_absolute_error(range));
}
acQuit();
modelmesh_destroy(modelmesh);
acmesh_destroy(mesh);
return num_failures;
}
/** Finds the maximum and minimum in all meshes and computes the range.
* Note! Potentially dangerous if all meshes do not interact with each other.
* Otherwise the range may be too high.
*/
static ModelScalar
get_data_range(const ModelMesh& model)
{
ModelScalar vertex_buffer_max_all = -INFINITY;
ModelScalar vertex_buffer_min_all = INFINITY;
for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w) {
const ModelScalar vertex_buffer_max = model_reduce_scal(model, RTYPE_MAX, VertexBufferHandle(w));
const ModelScalar vertex_buffer_min = model_reduce_scal(model, RTYPE_MIN, VertexBufferHandle(w));
if (vertex_buffer_max > vertex_buffer_max_all)
vertex_buffer_max_all = vertex_buffer_max;
if (vertex_buffer_min < vertex_buffer_min_all)
vertex_buffer_min_all = vertex_buffer_min;
}
return fabsl(vertex_buffer_max_all - vertex_buffer_min_all);
}
// #define GEN_TEST_RESULT
#if GEN_TEST_RESULT == 1
static FILE* test_result = NULL;
#endif
static bool
verify_meshes(const ModelMesh& model, const AcMesh& candidate)
{
bool retval = true;
#if GEN_TEST_RESULT == 1
ErrorInfo err = ErrorInfo();
#endif
const ModelScalar range = get_data_range(model);
for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w) {
const size_t n = AC_VTXBUF_SIZE(model.info);
// Maximum errors
ErrorInfo max_abs_error = ErrorInfo();
ErrorInfo max_rel_error = ErrorInfo();
for (size_t i = 0; i < n; ++i) {
const ModelScalar model_val = model.vertex_buffer[VertexBufferHandle(w)][i];
const AcReal cand_val = candidate.vertex_buffer[VertexBufferHandle(w)][i];
if (!verify(model_val, cand_val, range)) {
const int i0 = i % model.info.int_params[AC_mx];
const int j0 = ((i % (model.info.int_params[AC_mx] *
model.info.int_params[AC_my])) /
model.info.int_params[AC_mx]);
const int k0 = i / (model.info.int_params[AC_mx] *
model.info.int_params[AC_my]);
printf("Index (%d, %d, %d)\n", i0, j0, k0);
print_debug_info(model_val, cand_val, range);
retval = false;
}
const ModelScalar abs_error = get_absolute_error(model_val,
cand_val);
if (abs_error > max_abs_error.error) {
max_abs_error.error = abs_error;
max_abs_error.model = model_val;
max_abs_error.candidate = cand_val;
}
const ModelScalar rel_error = get_relative_error(model_val, cand_val);
if (rel_error > max_rel_error.error) {
max_rel_error.error = rel_error;
max_rel_error.model = model_val;
max_rel_error.candidate = cand_val;
}
#if GEN_TEST_RESULT == 1
if (abs_error > err.error) {
err.error = abs_error;
err.model = model_val;
err.candidate = cand_val;
}
#endif
}
//print_result(max_rel_error.model, max_rel_error.candidate, range, vtxbuf_names[VertexBufferHandle(w)]);
print_result(max_abs_error.model, max_abs_error.candidate, range, vtxbuf_names[VertexBufferHandle(w)]);
}
#if GEN_TEST_RESULT == 1
const ModelScalar rel_err = get_relative_error(err.model, err.candidate);
const ModelScalar abs_err = get_absolute_error(err.model, err.candidate);
fprintf(test_result, "%.3Lg & %.3Lg\n", abs_err, rel_err);
#endif
printf("Acceptable relative error: < %Lg \u03B5, absolute error < %Lg\n", get_acceptable_relative_error(), get_acceptable_absolute_error(range));
return retval;
}
int
check_rk3(const AcMeshInfo& mesh_info)
{
const int num_iterations = 1; // Note: should work up to at least 15 steps
printf("Testing RK3 (running %d steps before checking the result)\n",
num_iterations);
int num_failures = 0;
// Init CPU meshes
AcMesh* gpu_mesh = acmesh_create(mesh_info);
ModelMesh* model_mesh = modelmesh_create(mesh_info);
// Init GPU meshes
acInit(mesh_info);
for (unsigned int i = 0; i < ARRAY_SIZE(test_cases); ++i) {
const InitType itype = test_cases[i];
printf("Checking %s...\n", init_type_names[InitType(itype)]);
// Init the mesh and figure out the acceptable range for error
acmesh_init_to(InitType(itype), gpu_mesh);
acLoad(*gpu_mesh);
acmesh_to_modelmesh(*gpu_mesh, model_mesh);
acBoundcondStep();
boundconds(model_mesh->info, model_mesh);
for (int i = 0; i < num_iterations; ++i) {
//const AcReal umax = AcReal(acReduceVec(RTYPE_MAX, VTXBUF_UUX, VTXBUF_UUY, VTXBUF_UUZ));
//const AcReal dt = host_timestep(umax, mesh_info);
const AcReal dt = AcReal(1e-2); // Use a small constant timestep to avoid instabilities
acIntegrate(dt);
acBoundcondStep();
acSynchronize();
model_rk3(dt, model_mesh);
boundconds(model_mesh->info, model_mesh);
}
acStore(gpu_mesh);
bool is_acceptable = verify_meshes(*model_mesh, *gpu_mesh);
if (!is_acceptable) {
++num_failures;
}
}
acQuit();
acmesh_destroy(gpu_mesh);
modelmesh_destroy(model_mesh);
return num_failures;
}
int
run_autotest(void)
{
#if GEN_TEST_RESULT == 1
char testresult_path[256];
sprintf(testresult_path, "%s_fullstep_testresult.out", AC_DOUBLE_PRECISION ? "double" : "float");
test_result = fopen(testresult_path, "w");
ERRCHK(test_result);
fprintf(test_result, "n, max abs error, corresponding rel error\n");
#endif
/* Parse configs */
AcMeshInfo config;
load_config(&config);
if (STENCIL_ORDER > 6)
printf("WARNING!!! If the stencil order is larger than the computational domain some vertices may be done twice (f.ex. doing inner and outer domains separately and some of the front/back/left/right/etc slabs collide). The mesh must be large enough s.t. this doesn't happen.");
/*
const vec3i test_dims[] = { //
{15, 11, 13}, //
{17, 61, 127}, //
{511, 17, 16}, //
{64, 64, 8}, //
{32, 32, 64}, //
{64, 32, 32}, //
{128, 64, 32}};
*/
const vec3i test_dims[] = {{512, 16, 32}, //
{64, 64, 32}, //
{32, 32, 64}, //
{64, 32, 32}, //
{128, 64, 32}};
//const vec3i test_dims[] = {{256,256,256}};
//const vec3i test_dims[] = {{256,256,256}};
//const vec3i test_dims[] = {{32, 32, 32}};
int num_failures = 0;
/*for (size_t i = 0; i < ARRAY_SIZE(test_dims); ++i) {
config.int_params[AC_nx] = test_dims[i].x;
config.int_params[AC_ny] = test_dims[i].y;
config.int_params[AC_nz] = test_dims[i].z;
update_config(&config);
printf("Testing mesh (%d, %d, %d):\n", //
test_dims[i].x, test_dims[i].y, test_dims[i].z);
num_failures += check_reductions(config);
fflush(stdout);
}*/ // TODO uncomment
for (size_t i = 0; i < ARRAY_SIZE(test_dims); ++i) {
config.int_params[AC_nx] = test_dims[i].x;
config.int_params[AC_ny] = test_dims[i].y;
config.int_params[AC_nz] = test_dims[i].z;
update_config(&config);
printf("Testing mesh (%d, %d, %d):\n", //
test_dims[i].x, test_dims[i].y, test_dims[i].z);
num_failures += check_rk3(config);
fflush(stdout);
}
printf("\n--------Testing done---------\n");
printf("Failures found: %d\n", num_failures);
#if GEN_TEST_RESULT == 1
fflush(test_result);
fclose(test_result);
#endif
if (num_failures > 0)
return EXIT_FAILURE;
else
return EXIT_SUCCESS;
}
#elif TEST_TYPE == THOROUGH_TEST // GEN TEST FILE START HERE --------------------------------------------------------------------------------------------------------------
typedef struct {
ModelScalar model;
AcReal candidate;
ModelScalar abs_error;
ModelScalar ulp_error;
ModelScalar rel_error;
ModelScalar maximum_magnitude;
ModelScalar minimum_magnitude;
} Error;
Error get_error(ModelScalar model, AcReal candidate)
{
Error error;
error.abs_error = 0;
error.model = model;
error.candidate = candidate;
if (error.model == error.candidate || fabsl(model - candidate) == 0) { // If exact
error.abs_error = 0;
error.rel_error = 0;
error.ulp_error = 0;
} else if (!is_valid(error.model) || !is_valid(error.candidate)) {
error.abs_error = INFINITY;
error.rel_error = INFINITY;
error.ulp_error = INFINITY;
} else {
const int base = 2;
const int p = sizeof(AcReal) == 4 ? 24 : 53; // Bits in the significant
const ModelScalar e = floorl(logl(fabsl(error.model)) / logl(2));
const ModelScalar ulp = powl(base, e - (p-1));
const ModelScalar machine_epsilon = 0.5 * powl(base, -(p-1));
error.abs_error = fabsl(model - candidate);
error.ulp_error = error.abs_error / ulp;
error.rel_error = fabsl(1.0l - candidate / model) / machine_epsilon;
}
return error;
}
Error get_max_abs_error_mesh(const ModelMesh& model_mesh, const AcMesh& candidate_mesh)
{
Error error;
error.abs_error = -1;
for (size_t j = 0; j < NUM_VTXBUF_HANDLES; ++j) {
for (size_t i = 0; i < AC_VTXBUF_SIZE(model_mesh.info); ++i) {
Error curr_error = get_error(model_mesh.vertex_buffer[j][i], candidate_mesh.vertex_buffer[j][i]);
if (curr_error.abs_error > error.abs_error)
error = curr_error;
}
}
error.maximum_magnitude = -1; // Not calculated.
error.minimum_magnitude = -1; // Not calculated.
return error;
}
static ModelScalar
get_maximum_magnitude(const ModelScalar* field, const AcMeshInfo info)
{
ModelScalar maximum = -INFINITY;
for (size_t i = 0; i < AC_VTXBUF_SIZE(info); ++i)
maximum = max(maximum, fabsl(field[i]));
return maximum;
}
static ModelScalar
get_minimum_magnitude(const ModelScalar* field, const AcMeshInfo info)
{
ModelScalar minimum = INFINITY;
for (size_t i = 0; i < AC_VTXBUF_SIZE(info); ++i)
minimum = min(minimum, fabsl(field[i]));
return minimum;
}
Error get_max_abs_error_vtxbuf(const VertexBufferHandle vtxbuf_handle, const ModelMesh& model_mesh, const AcMesh& candidate_mesh)
{
ModelScalar* model_vtxbuf = model_mesh.vertex_buffer[vtxbuf_handle];
AcReal* candidate_vtxbuf = candidate_mesh.vertex_buffer[vtxbuf_handle];
Error error;
error.abs_error = -1;
for (size_t i = 0; i < AC_VTXBUF_SIZE(model_mesh.info); ++i) {
Error curr_error = get_error(model_vtxbuf[i], candidate_vtxbuf[i]);
if (curr_error.abs_error > error.abs_error)
error = curr_error;
}
error.maximum_magnitude = get_maximum_magnitude(model_vtxbuf, model_mesh.info);
error.minimum_magnitude = get_minimum_magnitude(model_vtxbuf, model_mesh.info);
return error;
}
void
print_error_to_file(const char* path, const int n, const Error error)
{
FILE* file = fopen(path, "a");
fprintf(file, "%d, %Lg, %Lg, %Lg, %Lg, %Lg\n", n, error.ulp_error, error.abs_error, error.rel_error, error.maximum_magnitude, error.minimum_magnitude);
//fprintf(file, "%d, %Lg, %Lg, %Lg, %Lg, %Lg\n", n, error.maximum_magnitude, error.minimum_magnitude, error.abs_error, error.ulp_error, error.rel_error);
fclose(file);
}
#define MAX_PATH_LEN (256)
int run_autotest(void)
{
#define N_MIN (32)
#define N_MAX (512)
for (int n = N_MIN; n <= N_MAX; n += N_MIN) {
AcMeshInfo config;
load_config(&config);
config.int_params[AC_nx] = config.int_params[AC_ny] = config.int_params[AC_nz] = n;
update_config(&config);
// Init host
AcMesh* candidate_mesh = acmesh_create(config);
ModelMesh* model_mesh = modelmesh_create(config);
// Init device
acInit(config);
// Check all initial conditions
for (int i = 0; i < ARRAY_SIZE(test_cases); ++i) {
const InitType init_type = test_cases[i];
acmesh_init_to((InitType)init_type, candidate_mesh);
acmesh_to_modelmesh(*candidate_mesh, model_mesh); // Load to Host
acLoad(*candidate_mesh); // Load to Device
boundconds(model_mesh->info, model_mesh);
acBoundcondStep();
{ // Check boundconds
acStore(candidate_mesh);
Error boundcond_error = get_max_abs_error_mesh(*model_mesh, *candidate_mesh);
char boundcond_path[MAX_PATH_LEN];
sprintf(boundcond_path, "%s_boundcond_%s.testresult", AC_DOUBLE_PRECISION ? "double" : "float", init_type_names[(InitType)init_type]);
print_error_to_file(boundcond_path, n, boundcond_error);
}
{ // Check scalar max reduction
ModelScalar model = model_reduce_scal(*model_mesh, (ReductionType)RTYPE_MAX, VTXBUF_UUX);
AcReal candidate = acReduceScal((ReductionType)RTYPE_MAX, VTXBUF_UUX);
Error scalar_reduce_error = get_error(model, candidate);
char scalar_reduce_path[MAX_PATH_LEN];
sprintf(scalar_reduce_path, "%s_scalar_reduce_%s.testresult", AC_DOUBLE_PRECISION ? "double" : "float", init_type_names[(InitType)init_type]);
print_error_to_file(scalar_reduce_path, n, scalar_reduce_error);
}
{ // Check vector max reduction
ModelScalar model = model_reduce_vec(*model_mesh, (ReductionType)RTYPE_MAX, VTXBUF_UUX, VTXBUF_UUY, VTXBUF_UUZ);
AcReal candidate = acReduceVec((ReductionType)RTYPE_MAX, VTXBUF_UUX, VTXBUF_UUY, VTXBUF_UUZ);
Error vector_reduce_error = get_error(model, candidate);
char vector_reduce_path[MAX_PATH_LEN];
sprintf(vector_reduce_path, "%s_vector_reduce_%s.testresult", AC_DOUBLE_PRECISION ? "double" : "float", init_type_names[(InitType)init_type]);
print_error_to_file(vector_reduce_path, n, vector_reduce_error);
}
// Time advance
{
const AcReal umax = (AcReal)model_reduce_vec(*model_mesh, (ReductionType)RTYPE_MAX, VTXBUF_UUX, VTXBUF_UUY, VTXBUF_UUZ);
const AcReal dt = host_timestep(umax, config);
// Host integration step
model_rk3(dt, model_mesh);
boundconds(config, model_mesh);
// Device integration step
acIntegrate(dt);
acBoundcondStep();
acSynchronize();
acStore(candidate_mesh);
// Check fields
for (int vtxbuf_handle = 0; vtxbuf_handle < NUM_VTXBUF_HANDLES; ++vtxbuf_handle) {
Error field_error = get_max_abs_error_vtxbuf((VertexBufferHandle)vtxbuf_handle, *model_mesh, *candidate_mesh);
printf("model %Lg, cand %Lg, abs %Lg, rel %Lg\n", (ModelScalar)field_error.model, (ModelScalar)field_error.candidate, (ModelScalar)field_error.abs_error, (ModelScalar)field_error.rel_error);
char field_path[MAX_PATH_LEN];
sprintf(field_path, "%s_integrationstep_%s_%s.testresult", AC_DOUBLE_PRECISION ? "double" : "float", init_type_names[(InitType)init_type], vtxbuf_names[(VertexBufferHandle)vtxbuf_handle]);
print_error_to_file(field_path, n, field_error);
}
}
}
// Deallocate host
acmesh_destroy(candidate_mesh);
modelmesh_destroy(model_mesh);
// Deallocate device
acQuit();
}
return 0;
}
#endif

300
src/standalone/benchmark.cc Normal file
View File

@@ -0,0 +1,300 @@
/*
Copyright (C) 2014-2018, Johannes Pekkilae, Miikka Vaeisalae.
This file is part of Astaroth.
Astaroth is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Astaroth is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* @file
* \brief Brief info.
*
* Detailed info.
*
*/
#include "run.h"
#include <stdlib.h> // EXIT_SUCCESS
#include "config_loader.h"
#include "model/host_memory.h"
#include "model/host_timestep.h"
#include "model/model_reduce.h"
#include "model/model_rk3.h"
#include "timer_hires.h"
#include <vector>
#include <algorithm>
#include <math.h>
#include "src/core/errchk.h"
static bool
smaller_than(const double& a, const double& b)
{
return a < b;
}
static int
write_runningtimes(const char* path, const int n, const double min, const double max, const double median, const double perc)
{
FILE* fp;
fp = fopen(path, "a");
if (fp != NULL) {
fprintf(fp, "%d, %f, %f, %f, %f\n", n, min, max, median, perc);
fclose(fp);
return EXIT_SUCCESS;
}
return EXIT_FAILURE;
}
static int
write_percentiles(const char* path, const int num_iters, const std::vector<double>& results)
{
FILE* fp;
fp = fopen(path, "w");
if (fp != NULL) {
for (int i = 0; i < 100; ++i) {
fprintf(fp, "%f\n", results[(long unsigned)((i / 100.) * num_iters)]);
}
fclose(fp);
return EXIT_SUCCESS;
}
return EXIT_FAILURE;
}
int
run_benchmark(void)
{
char runningtime_path[256];
sprintf(runningtime_path, "%s_%s_runningtimes.out", AC_DOUBLE_PRECISION ? "double" : "float", GEN_BENCHMARK_RK3 ? "rk3substep" : "fullstep");
FILE* fp;
fp = fopen(runningtime_path, "w");
if (fp != NULL) {
fprintf(fp, "n, min, max, median, perc\n");
fclose(fp);
} else {
return EXIT_FAILURE;
}
#define N_STEP_SIZE (128)
#define MAX_MESH_DIM (128)
#define NUM_ITERS (100)
for (int n = N_STEP_SIZE; n <= MAX_MESH_DIM; n += N_STEP_SIZE) {
/* Parse configs */
AcMeshInfo mesh_info;
load_config(&mesh_info);
mesh_info.int_params[AC_nx] = n;
mesh_info.int_params[AC_ny] = mesh_info.int_params[AC_nx];
mesh_info.int_params[AC_nz] = mesh_info.int_params[AC_nx];
update_config(&mesh_info);
AcMesh* mesh = acmesh_create(mesh_info);
acmesh_init_to(INIT_TYPE_ABC_FLOW, mesh);
acInit(mesh_info);
acLoad(*mesh);
std::vector<double> results;
results.reserve(NUM_ITERS);
// Warmup
for (int i = 0; i < 10; ++i) {
acIntegrate(0);
acSynchronize();
}
Timer t;
for (int i = 0; i < NUM_ITERS; ++i) {
timer_reset(&t);
#if GEN_BENCHMARK_RK3 == 1
acIntegrateStep(2, FLT_EPSILON);
#else // GEN_BENCHMARK_FULL
//const AcReal umax = acReduceVec(RTYPE_MAX, VTXBUF_UUX, VTXBUF_UUY, VTXBUF_UUZ);
const AcReal dt = AcReal(1e-2); // TODO adaptive timestep //host_timestep(umax, mesh_info);
acIntegrate(dt);
#endif
acSynchronize();
const double ms_elapsed = timer_diff_nsec(t) / 1e6;
results.push_back(ms_elapsed);
}
#define NTH_PERCENTILE (0.95)
std::sort(results.begin(), results.end(), smaller_than);
write_runningtimes(runningtime_path, n, results[0], results[results.size()-1], results[int(0.5 * NUM_ITERS)], results[int(NTH_PERCENTILE * NUM_ITERS)]);
char percentile_path[256];
sprintf(percentile_path, "%d_%s_%s_percentiles.out", n, AC_DOUBLE_PRECISION ? "double" : "float", GEN_BENCHMARK_RK3 ? "rk3substep" : "fullstep");
write_percentiles(percentile_path, NUM_ITERS, results);
printf("%s running time %g ms, (%dth percentile, nx = %d) \n", GEN_BENCHMARK_RK3 ? "RK3 step" : "Fullstep", double(results[int(NTH_PERCENTILE * NUM_ITERS)]), int(NTH_PERCENTILE * 100), mesh_info.int_params[AC_nx]);
acStore(mesh);
acQuit();
acmesh_destroy(mesh);
}
return 0;
}
/*
#if AUTO_OPTIMIZE
const char* benchmark_path = "benchmark.out";
#include "core/kernels/rk3_threadblock.conf"
static int
write_result_to_file(const float& ms_per_step)
{
FILE* fp;
fp = fopen(benchmark_path, "a");
if (fp != NULL) {
fprintf(fp,
"(%d, %d, %d), %d elems per thread, launch bound %d, %f ms\n",
RK_THREADS_X, RK_THREADS_Y, RK_THREADS_Z, RK_ELEMS_PER_THREAD,
RK_LAUNCH_BOUND_MIN_BLOCKS, double(ms_per_step));
fclose(fp);
return EXIT_SUCCESS;
}
return EXIT_FAILURE;
}
#endif
#if GENERATE_BENCHMARK_DATA != 1
int
run_benchmark(void)
{
// Parse configs
AcMeshInfo mesh_info;
load_config(&mesh_info);
mesh_info.int_params[AC_nx] = 128;
mesh_info.int_params[AC_ny] = mesh_info.int_params[AC_nx];
mesh_info.int_params[AC_nz] = mesh_info.int_params[AC_nx];
update_config(&mesh_info);
AcMesh* mesh = acmesh_create(mesh_info);
acmesh_init_to(INIT_TYPE_ABC_FLOW, mesh);
acInit(mesh_info);
acLoad(*mesh);
Timer t;
timer_reset(&t);
int steps = 0;
const int num_steps = 100;
while (steps < num_steps) {
// Advance the simulation
const AcReal umax = acReduceVec(RTYPE_MAX, VTXBUF_UUX, VTXBUF_UUY,
VTXBUF_UUZ);
const AcReal dt = host_timestep(umax, mesh_info);
acIntegrate(dt);
++steps;
}
acSynchronize();
const float wallclock = timer_diff_nsec(t) / 1e9f;
printf("%d steps. Wallclock time %f s per step\n", steps,
double(wallclock) / num_steps);
#if AUTO_OPTIMIZE
write_result_to_file(wallclock * 1e3f / steps);
#endif
acStore(mesh);
acQuit();
acmesh_destroy(mesh);
return 0;
}
#else //////////////////////////////////////////////////////////////////////////GENERATE_BENCHMARK_DATA
int
run_benchmark(void)
{
const char path[] = "result.out";
FILE* fp;
fp = fopen(path, "w");
if (fp != NULL) {
fprintf(fp, "n, min, max, median, perc\n");
fclose(fp);
} else {
return EXIT_FAILURE;
}
#define N_STEP_SIZE (256)
#define MAX_MESH_DIM (256)
#define NUM_ITERS (1000)
for (int n = N_STEP_SIZE; n <= MAX_MESH_DIM; n += N_STEP_SIZE) {
// Parse configs
AcMeshInfo mesh_info;
load_config(&mesh_info);
mesh_info.int_params[AC_nx] = n;
mesh_info.int_params[AC_ny] = mesh_info.int_params[AC_nx];
mesh_info.int_params[AC_nz] = mesh_info.int_params[AC_nx];
update_config(&mesh_info);
AcMesh* mesh = acmesh_create(mesh_info);
acmesh_init_to(INIT_TYPE_ABC_FLOW, mesh);
acInit(mesh_info);
acLoad(*mesh);
std::vector<double> results;
results.reserve(NUM_ITERS);
// Warmup
for (int i = 0; i < 10; ++i) {
acIntegrate(0);
acSynchronize();
}
Timer t;
const AcReal dt = AcReal(1e-5);
for (int i = 0; i < NUM_ITERS; ++i) {
timer_reset(&t);
//acIntegrate(dt);
acIntegrateStep(2, dt);
acSynchronize();
const double ms_elapsed = timer_diff_nsec(t) / 1e6;
results.push_back(ms_elapsed);
}
#define NTH_PERCENTILE (0.95)
std::sort(results.begin(), results.end(), smaller_than);
write_result(n, results[0], results[results.size()-1], results[int(0.5 * NUM_ITERS)], results[int(NTH_PERCENTILE * NUM_ITERS)]);
write_percentiles(n, NUM_ITERS, results);
}
return 0;
}
#endif
*/

View File

@@ -0,0 +1,194 @@
/*
Copyright (C) 2014-2018, Johannes Pekkilae, Miikka Vaeisalae.
This file is part of Astaroth.
Astaroth is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Astaroth is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* @file
* \brief Brief info.
*
* Detailed info.
*
*/
#include "config_loader.h"
#include <limits.h> // UINT_MAX
#include <stdint.h> // uint8_t, uint32_t
#include <stdio.h> // print
#include <string.h> // memset
#include "core/errchk.h"
#include "core/math_utils.h"
static inline void
print(const AcMeshInfo& config)
{
for (int i = 0; i < NUM_INT_PARAM_TYPES; ++i)
printf("[%s]: %d\n", intparam_names[i], config.int_params[i]);
for (int i = 0; i < NUM_REAL_PARAM_TYPES; ++i)
printf("[%s]: %g\n", realparam_names[i], double(config.real_params[i]));
}
/**
\brief Find the index of the keyword in names
\return Index in range 0...n if the keyword is in names. -1 if the keyword was
not found.
*/
static int
find_str(const char keyword[], const char* names[], const int& n)
{
for (int i = 0; i < n; ++i)
if (!strcmp(keyword, names[i]))
return i;
return -1;
}
static void
parse_config(const char* path, AcMeshInfo* config)
{
FILE* fp;
fp = fopen(path, "r");
// For knowing which .conf file will be used
printf("Config file path: \n %s \n ", path);
ERRCHK(fp != NULL);
const size_t BUF_SIZE = 128;
char keyword[BUF_SIZE];
char value[BUF_SIZE];
int items_matched;
while ((items_matched = fscanf(fp, "%s = %s", keyword, value)) != EOF) {
if (items_matched < 2)
continue;
int idx = -1;
if ((idx = find_str(keyword, intparam_names, NUM_INT_PARAM_TYPES)) >= 0)
config->int_params[idx] = atoi(value);
else if ((idx = find_str(keyword, realparam_names,
NUM_REAL_PARAM_TYPES)) >= 0)
config->real_params[idx] = AcReal(atof(value));
}
fclose(fp);
}
void
update_config(AcMeshInfo* config)
{
config->int_params[AC_mx] = config->int_params[AC_nx] + STENCIL_ORDER;
///////////// PAD TEST
//config->int_params[AC_mx] = config->int_params[AC_nx] + STENCIL_ORDER + PAD_SIZE;
///////////// PAD TEST
config->int_params[AC_my] = config->int_params[AC_ny] + STENCIL_ORDER;
config->int_params[AC_mz] = config->int_params[AC_nz] + STENCIL_ORDER;
// Bounds for the computational domain, i.e. nx_min <= i < nx_max
config->int_params[AC_nx_min] = STENCIL_ORDER / 2;
config->int_params[AC_nx_max] = config->int_params[AC_nx_min] +
config->int_params[AC_nx];
config->int_params[AC_ny_min] = STENCIL_ORDER / 2;
config->int_params[AC_ny_max] = config->int_params[AC_ny] +
STENCIL_ORDER / 2;
config->int_params[AC_nz_min] = STENCIL_ORDER / 2;
config->int_params[AC_nz_max] = config->int_params[AC_nz] +
STENCIL_ORDER / 2;
// Spacing
config->real_params[AC_inv_dsx] = AcReal(1.) / config->real_params[AC_dsx];
config->real_params[AC_inv_dsy] = AcReal(1.) / config->real_params[AC_dsy];
config->real_params[AC_inv_dsz] = AcReal(1.) / config->real_params[AC_dsz];
config->real_params[AC_dsmin] = min(config->real_params[AC_dsx], min(config->real_params[AC_dsy], config->real_params[AC_dsz]));
// Real grid coordanates (DEFINE FOR GRID WITH THE GHOST ZONES)
config->real_params[AC_xlen] = config->real_params[AC_dsx]*config->int_params[AC_mx];
config->real_params[AC_ylen] = config->real_params[AC_dsy]*config->int_params[AC_my];
config->real_params[AC_zlen] = config->real_params[AC_dsz]*config->int_params[AC_mz];
config->real_params[AC_xorig] = AcReal(.5) * config->real_params[AC_xlen];
config->real_params[AC_yorig] = AcReal(.5) * config->real_params[AC_ylen];
config->real_params[AC_zorig] = AcReal(.5) * config->real_params[AC_zlen];
/* Additional helper params */
// Int helpers
config->int_params[AC_mxy] = config->int_params[AC_mx] *
config->int_params[AC_my];
config->int_params[AC_nxy] = config->int_params[AC_nx] *
config->int_params[AC_ny];
config->int_params[AC_nxyz] = config->int_params[AC_nxy] *
config->int_params[AC_nz];
// Real helpers
config->real_params[AC_cs2_sound] = config->real_params[AC_cs_sound] *
config->real_params[AC_cs_sound];
config->real_params[AC_cv_sound] = config->real_params[AC_cp_sound] / config->real_params[AC_gamma];
AcReal G_CONST_CGS = AcReal(6.674e-8); // g/cm3/s GGS definition //TODO define in a separate module
AcReal M_sun = AcReal(1.989e33); // g solar mass
config->real_params[AC_M_star] = config->real_params[AC_M_star]*M_sun /
( (config->real_params[AC_unit_length]*
config->real_params[AC_unit_length]*
config->real_params[AC_unit_length]) *
config->real_params[AC_unit_density] ) ;
config->real_params[AC_G_CONST] = G_CONST_CGS /
( (config->real_params[AC_unit_velocity]*config->real_params[AC_unit_velocity]) /
(config->real_params[AC_unit_density] *config->real_params[AC_unit_length]) ) ;
config->real_params[AC_GM_star] = config->real_params[AC_M_star]*config->real_params[AC_G_CONST];
config->real_params[AC_sq2GM_star] = AcReal(sqrt(AcReal(2)*config->real_params[AC_GM_star]));
const bool print_config = true;
if (print_config) {
printf("###############################################################"
"\n");
printf("Config dimensions recalculated:\n");
print(*config);
printf("###############################################################"
"\n");
}
}
/**
\brief Loads data from astaroth.conf into a config struct.
\return 0 on success, -1 if there are potentially uninitialized values.
*/
int
load_config(AcMeshInfo* config)
{
int retval = 0;
// memset reads the second parameter as a byte even though it says int in
// the function declaration
memset(config, (uint8_t)0xFF, sizeof(*config));
parse_config(CONFIG_PATH "astaroth.conf", config);
update_config(config);
// sizeof(config) must be a multiple of 4 bytes for this to work
ERRCHK(sizeof(*config) % sizeof(uint32_t) == 0);
for (size_t i = 0; i < sizeof(*config) / sizeof(uint32_t); ++i) {
if (((uint32_t*)config)[i] == (uint32_t)0xFFFFFFFF) {
WARNING("Some config values may be uninitialized. "
"See that all are defined in astaroth.conf\n");
retval = -1;
}
}
return retval;
}

View File

@@ -0,0 +1,34 @@
/*
Copyright (C) 2014-2018, Johannes Pekkilae, Miikka Vaeisalae.
This file is part of Astaroth.
Astaroth is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Astaroth is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* @file
* \brief Functions for loading and updating AcMeshInfo.
*
*/
#pragma once
#include "astaroth.h"
/** Loads data from the config file */
int load_config(AcMeshInfo* config);
/** Recalculates the portion of int parameters which get their values from nx,
* ny and nz. Must be called after modifying the config struct or otherwise
* contents of the struct will be incorrect */
void update_config(AcMeshInfo* config);

94
src/standalone/main.cc Normal file
View File

@@ -0,0 +1,94 @@
/*
Copyright (C) 2014-2018, Johannes Pekkilae, Miikka Vaeisalae.
This file is part of Astaroth.
Astaroth is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Astaroth is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* @file
* \brief Brief info.
*
* Detailed info.
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "core/errchk.h"
#include "run.h"
// Write all errors from stderr to an <errorlog_name> in the current working
// directory
static const bool write_log_to_a_file = false;
static const char* errorlog_name = "error.log";
static void
errorlog_init(void)
{
FILE* fp = freopen(errorlog_name, "w", stderr); // Log errors to a file
if (!fp)
perror("Error redirecting stderr to a file");
}
static void
errorlog_quit(void)
{
fclose(stderr);
// Print contents of the latest errorlog to screen
FILE* fp = fopen(errorlog_name, "r");
if (fp) {
for (int c = getc(fp); c != EOF; c = getc(fp))
putchar(c);
fclose(fp);
}
else {
perror("Error opening error log");
}
}
int
main(int argc, char* argv[])
{
if (write_log_to_a_file) {
errorlog_init();
atexit(errorlog_quit);
}
printf("Args: \n");
for (int i = 0; i < argc; ++i)
printf("%d: %s\n", i, argv[i]);
if (argc == 1) {
return run_renderer();
}
else if (argc == 2) {
if (strcmp(argv[1], "-t") == 0)
return run_autotest();
else if (strcmp(argv[1], "-b") == 0)
return run_benchmark();
else if (strcmp(argv[1], "-s") == 0)
return run_simulation();
else
WARNING("Unrecognized option");
}
else {
WARNING("Too many options given");
}
return EXIT_FAILURE;
}

View File

@@ -0,0 +1,737 @@
/*
Copyright (C) 2014-2018, Johannes Pekkilae, Miikka Vaeisalae.
This file is part of Astaroth.
Astaroth is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Astaroth is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* @file
* \brief Brief info.
*
* Detailed info.
*
*/
#include "host_memory.h"
#include <math.h>
#include "core/errchk.h"
const char* init_type_names[] = {AC_FOR_INIT_TYPES(AC_GEN_STR)};
#define XORIG (AcReal(.5) * mesh->info.int_params[AC_nx] * mesh->info.real_params[AC_dsx])
#define YORIG (AcReal(.5) * mesh->info.int_params[AC_ny] * mesh->info.real_params[AC_dsy])
#define ZORIG (AcReal(.5) * mesh->info.int_params[AC_nz] * mesh->info.real_params[AC_dsz])
/*
#include <stdint.h>
static uint64_t ac_rand_next = 1;
static int32_t
ac_rand(void)
{
ac_rand_next = ac_rand_next * 1103515245 + 12345;
return (uint32_t)(ac_rand_next/65536) % 32768;
}
static void
ac_srand(const uint32_t seed)
{
ac_rand_next = seed;
}
*/
AcMesh*
acmesh_create(const AcMeshInfo& mesh_info)
{
AcMesh* mesh = (AcMesh*)malloc(sizeof(*mesh));
mesh->info = mesh_info;
const size_t bytes = AC_VTXBUF_SIZE_BYTES(mesh->info);
for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) {
mesh->vertex_buffer[VertexBufferHandle(i)] = (AcReal*)malloc(bytes);
ERRCHK(mesh->vertex_buffer[VertexBufferHandle(i)] != NULL);
}
return mesh;
}
static void
vertex_buffer_set(const VertexBufferHandle& key, const AcReal& val,
AcMesh* mesh)
{
const int n = AC_VTXBUF_SIZE(mesh->info);
for (int i = 0; i < n; ++i)
mesh->vertex_buffer[key][i] = val;
}
/** Inits all fields to 1. Setting the mesh to zero is problematic because some fields are supposed
to be > 0 and the results would vary widely, which leads to loss of precision in the
computations */
void
acmesh_clear(AcMesh* mesh)
{
for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w)
vertex_buffer_set(VertexBufferHandle(w), 1, mesh); // Init all fields to 1 by default.
}
static AcReal
randr(void)
{
return AcReal(rand()) / AcReal(RAND_MAX);
}
void
lnrho_step(AcMesh* mesh)
{
const int mx = mesh->info.int_params[AC_mx];
const int my = mesh->info.int_params[AC_my];
const int mz = mesh->info.int_params[AC_mz];
// const int nx_min = mesh->info.int_params[AC_nx_min];
// const int nx_max = mesh->info.int_params[AC_nx_max];
// const int ny_min = mesh->info.int_params[AC_ny_min];
// const int ny_max = mesh->info.int_params[AC_ny_max];
// const int nz_min = mesh->info.int_params[AC_nz_min];
// const int nz_max = mesh->info.int_params[AC_nz_max];
// const AcReal DX = mesh->info.real_params[AC_dsx];
// const AcReal DY = mesh->info.real_params[AC_dsy];
// const AcReal DZ = mesh->info.real_params[AC_dsz];
// const AcReal xmax = DX * (nx_max - nx_min) ;
// const AcReal zmax = DZ * (nz_max - nz_min) ;
// const AcReal lnrho1 = (AcReal) -1.0; // TODO mesh->info.real_params[AC_lnrho1];
const AcReal lnrho2 = (AcReal) 0.0; // TODO mesh->info.real_params[AC_lnrho2];
// const AcReal rho1 = (AcReal) exp(lnrho1);
// const AcReal rho2 = (AcReal) exp(lnrho2);
// const AcReal k_pert = (AcReal) 1.0; //mesh->info.real_params[AC_k_pert]; //Wamenumber of the perturbation
// const AcReal k_pert = 4.0; //mesh->info.real_params[AC_k_pert]; //Wamenumber of the perturbation
//const AcReal ampl_pert = xmax/10.0; // xmax/mesh->info.real_params[AC_pert]; //Amplitude of the perturbation
// const AcReal ampl_pert = (AcReal) 0.0;//xmax/20.0; // xmax/mesh->info.real_params[AC_pert]; //Amplitude of the perturbation
// const AcReal two_pi = (AcReal) 6.28318531;
// const AcReal xorig = mesh->info.real_params[AC_xorig];
// const AcReal zorig = mesh->info.real_params[AC_zorig];
// const AcReal trans = mesh->info.real_params[AC_trans];
// AcReal xx, zz, tanhprof, cosz_wave;
for (int k = 0; k < mz; k++) {
for (int j = 0; j < my; j++) {
for (int i = 0; i < mx; i++) {
int idx = i + j * mx + k * mx * my;
// zz = DZ * AcReal(k) - zorig; // Not used
// cosz_wave = ampl_pert*AcReal(cos(k_pert*((zz/zmax)*two_pi))); // Not used
// xx = DX * AcReal(i) - xorig + cosz_wave; //ADD WAVE TODO // Not used
// tanhprof = AcReal(0.5)*((rho2+rho1) + (rho2-rho1)*AcReal(tanh(xx/trans))); // Not used
// Commented out the step function initial codition.
//mesh->vertex_buffer[VTXBUF_LNRHO][idx] = log(tanhprof);
mesh->vertex_buffer[VTXBUF_LNRHO][idx] = lnrho2;
}
}
}
}
// This is the initial condition type for the infalling vedge in the pseudodisk
// model.
void
inflow_vedge(AcMesh* mesh)
{
const int mx = mesh->info.int_params[AC_mx];
const int my = mesh->info.int_params[AC_my];
const int mz = mesh->info.int_params[AC_mz];
// const int nx_min = mesh->info.int_params[AC_nx_min];
// const int nx_max = mesh->info.int_params[AC_nx_max];
// const int ny_min = mesh->info.int_params[AC_ny_min];
// const int ny_max = mesh->info.int_params[AC_ny_max];
// const int nz_min = mesh->info.int_params[AC_nz_min];
// const int nz_max = mesh->info.int_params[AC_nz_max];
// const double DX = mesh->info.real_params[AC_dsx];
// const double DY = mesh->info.real_params[AC_dsy];
const double DZ = mesh->info.real_params[AC_dsz];
const double AMPL_UU = mesh->info.real_params[AC_ampl_uu];
const double ANGL_UU = mesh->info.real_params[AC_angl_uu];
const double zorig = mesh->info.real_params[AC_zorig];
double zz;
double trans = mesh->info.real_params[AC_trans];
// const AcReal range = AcReal(.5);
// const AcReal zmax = AcReal(DZ * (nz_max - nz_min));
// const AcReal gaussr = zmax / AcReal(4.0);
//for (int k = nz_min; k < nz_max; k++) {
// for (int j = ny_min; j < ny_max; j++) {
// for (int i = nx_min; i < nx_max; i++) {
for (int k = 0; k < mz; k++) {
for (int j = 0; j < my; j++) {
for (int i = 0; i < mx; i++) {
int idx = i + j * mx + k * mx * my;
zz = DZ * double(k) - zorig;
//mesh->vertex_buffer[VTXBUF_UUX][idx] = -AMPL_UU*cos(ANGL_UU);
mesh->vertex_buffer[VTXBUF_UUX][idx] = AcReal(-AMPL_UU*cos(ANGL_UU)*fabs(tanh(zz/trans)));
mesh->vertex_buffer[VTXBUF_UUY][idx] = AcReal(0.0);
mesh->vertex_buffer[VTXBUF_UUZ][idx] = AcReal(-AMPL_UU*sin(ANGL_UU)*tanh(zz/trans));
//Variarion to density
//AcReal rho = exp(mesh->vertex_buffer[VTXBUF_LNRHO][idx]);
//NO GAUSSIAN//rho = rho*exp(-(zz/gaussr)*(zz/gaussr));
//mesh->vertex_buffer[VTXBUF_LNRHO][idx] = log(rho + (range*rho) * (randr() - AcReal(-0.5)));
}
}
}
}
// This is the initial condition type for the infalling vedge in the pseudodisk
// model.
void
inflow_vedge_freefall(AcMesh* mesh)
{
const int mx = mesh->info.int_params[AC_mx];
const int my = mesh->info.int_params[AC_my];
const int mz = mesh->info.int_params[AC_mz];
// const int nx_min = mesh->info.int_params[AC_nx_min];
// const int nx_max = mesh->info.int_params[AC_nx_max];
// const int ny_min = mesh->info.int_params[AC_ny_min];
// const int ny_max = mesh->info.int_params[AC_ny_max];
// const int nz_min = mesh->info.int_params[AC_nz_min];
// const int nz_max = mesh->info.int_params[AC_nz_max];
const double DX = mesh->info.real_params[AC_dsx];
// const double DY = mesh->info.real_params[AC_dsy];
const double DZ = mesh->info.real_params[AC_dsz];
// const double AMPL_UU = mesh->info.real_params[AC_ampl_uu];
const double ANGL_UU = mesh->info.real_params[AC_angl_uu];
const double SQ2GM = mesh->info.real_params[AC_sq2GM_star];
// const double GM = mesh->info.real_params[AC_GM_star];
// const double M_star = mesh->info.real_params[AC_M_star];
// const double G_CONST = mesh->info.real_params[AC_G_CONST];
// const double unit_length = mesh->info.real_params[AC_unit_length];
// const double unit_density = mesh->info.real_params[AC_unit_density];
// const double unit_velocity = mesh->info.real_params[AC_unit_velocity];
const double xorig = mesh->info.real_params[AC_xorig];
// const double yorig = mesh->info.real_params[AC_yorig];
const double zorig = mesh->info.real_params[AC_zorig];
// const double trans = mesh->info.real_params[AC_trans];
// double xx, yy, zz, RR;
double xx, zz, RR;
// double delx, dely, delz;
double delx, delz;
// double u_x, u_y, u_z, veltot, tanhz;
double u_x, u_z, veltot, tanhz;
const double star_pos_x = mesh->info.real_params[AC_star_pos_x];
const double star_pos_z = mesh->info.real_params[AC_star_pos_z];
for (int k = 0; k < mz; k++) {
for (int j = 0; j < my; j++) {
for (int i = 0; i < mx; i++) {
int idx = i + j * mx + k * mx * my;
xx = DX * double(i) - xorig;
zz = DZ * double(k) - zorig;
delx = xx - star_pos_x;
delz = zz - star_pos_z;
//TODO: Figure out isthis needed. Now a placeholder.
//tanhz = fabs(tanh(zz/trans));
tanhz = 1.0;
RR = sqrt(delx*delx + delz*delz);
veltot = SQ2GM/sqrt(RR); //Free fall velocity
//Normal velocity components
u_x = - veltot*(delx/RR);
u_z = - veltot*(delz/RR);
//printf("star_pos_z %e, zz %e, delz %e, RR %e\n", star_pos_z, zz, delz, RR);
//printf("unit_length = %e, unit_density = %e, unit_velocity = %e,\n M_star = %e, G_CONST = %e, GM = %e, SQ2GM = %e, \n RR = %e, u_x = %e, u_z %e\n",
// unit_length, unit_density,
// unit_velocity, M_star, G_CONST, GM, SQ2GM, RR, u_x, u_z);
//printf("%e\n", unit_length*unit_length*unit_length);
//Here including an angel tilt due to pseudodisk
if (delz >= 0.0) {
mesh->vertex_buffer[VTXBUF_UUX][idx] = AcReal((u_x*cos(ANGL_UU) - u_z*sin(ANGL_UU))*tanhz);
mesh->vertex_buffer[VTXBUF_UUY][idx] = AcReal(0.0);
mesh->vertex_buffer[VTXBUF_UUZ][idx] = AcReal((u_x*sin(ANGL_UU) + u_z*cos(ANGL_UU))*tanhz);
} else {
mesh->vertex_buffer[VTXBUF_UUX][idx] = AcReal((u_x*cos(ANGL_UU) + u_z*sin(ANGL_UU))*tanhz);
mesh->vertex_buffer[VTXBUF_UUY][idx] = AcReal(0.0);
mesh->vertex_buffer[VTXBUF_UUZ][idx] = AcReal((-u_x*sin(ANGL_UU) + u_z*cos(ANGL_UU))*tanhz);
}
}
}
}
}
// Only x-direction free fall
void
inflow_freefall_x(AcMesh* mesh)
{
const int mx = mesh->info.int_params[AC_mx];
const int my = mesh->info.int_params[AC_my];
const int mz = mesh->info.int_params[AC_mz];
const double DX = mesh->info.real_params[AC_dsx];
const double SQ2GM = mesh->info.real_params[AC_sq2GM_star];
// const double G_CONST = mesh->info.real_params[AC_G_CONST];
const double xorig = mesh->info.real_params[AC_xorig];
double xx, RR;
double delx;
double /*u_x,*/ veltot;
const double star_pos_x = mesh->info.real_params[AC_star_pos_x];
const double ampl_lnrho = mesh->info.real_params[AC_ampl_lnrho];
for (int k = 0; k < mz; k++) {
for (int j = 0; j < my; j++) {
for (int i = 0; i < mx; i++) {
int idx = i + j * mx + k * mx * my;
xx = DX * double(i) - xorig;
delx = xx - star_pos_x;
RR = fabs(delx);
veltot = SQ2GM/sqrt(RR); //Free fall velocity
if (isinf(veltot) == 1) printf("xx %e star_pos_x %e delz %e RR %e veltot %e\n",xx, star_pos_x, delx, RR, veltot);
//Normal velocity components
// u_x = - veltot; // Not used
//Freefall condition
//mesh->vertex_buffer[VTXBUF_UUX][idx] = u_x;
//mesh->vertex_buffer[VTXBUF_UUY][idx] = 0.0;
//mesh->vertex_buffer[VTXBUF_UUZ][idx] = 0.0;
//Starting with steady state
mesh->vertex_buffer[VTXBUF_UUX][idx] = 0.0;
mesh->vertex_buffer[VTXBUF_UUY][idx] = 0.0;
mesh->vertex_buffer[VTXBUF_UUZ][idx] = 0.0;
mesh->vertex_buffer[VTXBUF_LNRHO][idx] = AcReal(ampl_lnrho);
}
}
}
}
void
gaussian_radial_explosion(AcMesh* mesh)
{
AcReal* uu_x = mesh->vertex_buffer[VTXBUF_UUX];
AcReal* uu_y = mesh->vertex_buffer[VTXBUF_UUY];
AcReal* uu_z = mesh->vertex_buffer[VTXBUF_UUZ];
const int mx = mesh->info.int_params[AC_mx];
const int my = mesh->info.int_params[AC_my];
const int nx_min = mesh->info.int_params[AC_nx_min];
const int nx_max = mesh->info.int_params[AC_nx_max];
const int ny_min = mesh->info.int_params[AC_ny_min];
const int ny_max = mesh->info.int_params[AC_ny_max];
const int nz_min = mesh->info.int_params[AC_nz_min];
const int nz_max = mesh->info.int_params[AC_nz_max];
const double DX = mesh->info.real_params[AC_dsx];
const double DY = mesh->info.real_params[AC_dsy];
const double DZ = mesh->info.real_params[AC_dsz];
const double xorig = double(XORIG) - 0.000001;
const double yorig = double(YORIG) - 0.000001;
const double zorig = double(ZORIG) - 0.000001;
const double INIT_LOC_UU_X = 0.0;
const double INIT_LOC_UU_Y = 0.0;
const double INIT_LOC_UU_Z = 0.0;
const double AMPL_UU = mesh->info.real_params[AC_ampl_uu];
const double UU_SHELL_R = 0.8;
const double WIDTH_UU = 0.2;
// Outward explosion with gaussian initial velocity profile.
int idx;
double xx, yy, zz, rr2, rr, theta = 0.0, phi = 0.0;
double uu_radial;
// double theta_old = 0.0;
for (int k = nz_min; k < nz_max; k++) {
for (int j = ny_min; j < ny_max; j++) {
for (int i = nx_min; i < nx_max; i++) {
// Calculate the value of velocity in a particular radius.
idx = i + j * mx + k * mx * my;
// Determine the coordinates
xx = DX * (i - nx_min) - xorig;
xx = xx - INIT_LOC_UU_X;
yy = DY * (j - ny_min) - yorig;
yy = yy - INIT_LOC_UU_Y;
zz = DZ * (k - nz_min) - zorig;
zz = zz - INIT_LOC_UU_Z;
rr2 = pow(xx, 2.0) + pow(yy, 2.0) + pow(zz, 2.0);
rr = sqrt(rr2);
// Origin is different!
double xx_abs, yy_abs, zz_abs;
if (rr > 0.0) {
// theta range [0, PI]
if (zz >= 0.0) {
theta = acos(zz / rr);
if (theta > M_PI / 2.0 || theta < 0.0) {
printf("Explosion THETA WRONG: zz = %.3f, rr = "
"%.3f, theta = %.3e/PI, M_PI = %.3e\n",
zz, rr, theta / M_PI, M_PI);
}
}
else {
zz_abs = -zz; // Needs a posite value for acos
theta = M_PI - acos(zz_abs / rr);
if (theta < M_PI / 2.0 || theta > 2 * M_PI) {
printf("Explosion THETA WRONG: zz = %.3f, rr = "
"%.3f, theta = %.3e/PI, M_PI = %.3e\n",
zz, rr, theta / M_PI, M_PI);
}
}
// phi range [0, 2*PI]i
if (xx != 0.0) {
if (xx < 0.0 && yy >= 0.0) {
//-+
xx_abs = -xx; // Needs a posite value for atan
phi = M_PI - atan(yy / xx_abs);
if (phi < (M_PI / 2.0) || phi > M_PI) {
printf("Explosion PHI WRONG -+: xx = %.3f, yy "
"= %.3f, phi = %.3e/PI, M_PI = %.3e\n",
xx, yy, phi / M_PI, M_PI);
}
}
else if (xx > 0.0 && yy < 0.0) {
//+-
yy_abs = -yy;
phi = 2.0 * M_PI - atan(yy_abs / xx);
if (phi < (3.0 * M_PI) / 2.0 ||
phi > (2.0 * M_PI + 1e-6)) {
printf("Explosion PHI WRONG +-: xx = %.3f, yy "
"= %.3f, phi = %.3e/PI, M_PI = %.3e\n",
xx, yy, phi / M_PI, M_PI);
}
}
else if (xx < 0.0 && yy < 0.0) {
//--
yy_abs = -yy;
xx_abs = -xx;
phi = M_PI + atan(yy_abs / xx_abs);
if (phi < M_PI ||
phi > ((3.0 * M_PI) / 2.0 + 1e-6)) {
printf("Explosion PHI WRONG --: xx = %.3f, yy "
"= %.3f, xx_abs = %.3f, yy_abs = %.3f, "
"phi = %.3e, (3.0*M_PI)/2.0 = %.3e\n",
xx, yy, xx_abs, yy_abs, phi,
(3.0 * M_PI) / 2.0);
}
}
else {
//++
phi = atan(yy / xx);
if (phi < 0 || phi > M_PI / 2.0) {
printf(
"Explosion PHI WRONG --: xx = %.3f, yy = "
"%.3f, phi = %.3e, (3.0*M_PI)/2.0 = %.3e\n",
xx, yy, phi, (3.0 * M_PI) / 2.0);
}
}
}
else { // To avoid div by zero with atan
if (yy > 0.0) {
phi = M_PI / 2.0;
}
else if (yy < 0.0) {
phi = (3.0 * M_PI) / 2.0;
}
else {
phi = 0.0;
}
}
// Set zero for explicit safekeeping
if (xx == 0.0 && yy == 0.0) {
phi = 0.0;
}
// Gaussian velocity
// uu_radial = AMPL_UU*exp( -rr2 / (2.0*pow(WIDTH_UU, 2.0))
// ); New distribution, where that gaussion wave is not in
// the exact centre coordinates uu_radial = AMPL_UU*exp(
// -pow((rr - 4.0*WIDTH_UU),2.0) / (2.0*pow(WIDTH_UU, 2.0))
// ); //TODO: Parametrize the peak location.
uu_radial = AMPL_UU * exp(-pow((rr - UU_SHELL_R), 2.0) /
(2.0 * pow(WIDTH_UU, 2.0)));
}
else {
uu_radial = 0.0; // TODO: There will be a discontinuity in
// the origin... Should the shape of the
// distribution be different?
}
// Determine the carthesian velocity components and lnrho
uu_x[idx] = AcReal(uu_radial * sin(theta) * cos(phi));
uu_y[idx] = AcReal(uu_radial * sin(theta) * sin(phi));
uu_z[idx] = AcReal(uu_radial * cos(theta));
// Temporary diagnosticv output (TODO: Remove after not needed)
// if (theta > theta_old) {
// if (theta > M_PI || theta < 0.0 || phi < 0.0 || phi > 2*M_PI)
// {
/* printf("Explosion: xx = %.3f, yy = %.3f, zz = %.3f, rr =
%.3f, phi = %.3e/PI, theta = %.3e/PI\n, M_PI = %.3e", xx, yy,
zz, rr, phi/M_PI, theta/M_PI, M_PI); printf(" uu_radial =
%.3e, uu_x[%i] = %.3e, uu_y[%i] = %.3e, uu_z[%i] = %.3e \n",
uu_radial, idx, uu_x[idx], idx, uu_y[idx], idx,
uu_z[idx]); theta_old = theta;
*/
}
}
}
}
void
acmesh_init_to(const InitType& init_type, AcMesh* mesh)
{
srand(123456789);
const int n = AC_VTXBUF_SIZE(mesh->info);
const int mx = mesh->info.int_params[AC_mx];
const int my = mesh->info.int_params[AC_my];
const int mz = mesh->info.int_params[AC_mz];
const int nx_min = mesh->info.int_params[AC_nx_min];
const int nx_max = mesh->info.int_params[AC_nx_max];
const int ny_min = mesh->info.int_params[AC_ny_min];
const int ny_max = mesh->info.int_params[AC_ny_max];
const int nz_min = mesh->info.int_params[AC_nz_min];
const int nz_max = mesh->info.int_params[AC_nz_max];
switch (init_type) {
case INIT_TYPE_RANDOM: {
acmesh_clear(mesh);
const AcReal range = AcReal(0.01);
for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w)
for (int i = 0; i < n; ++i)
mesh->vertex_buffer[w][i] = 2 * range * randr() - range;
break;
}
case INIT_TYPE_GAUSSIAN_RADIAL_EXPL:
acmesh_clear(mesh);
//acmesh_init_to(INIT_TYPE_RANDOM, mesh);
gaussian_radial_explosion(mesh);
break;
case INIT_TYPE_XWAVE:
acmesh_clear(mesh);
acmesh_init_to(INIT_TYPE_RANDOM, mesh);
for (int k = 0; k < mz; k++) {
for (int j = 0; j < my; j++) {
for (int i = 0; i < mx; i++) {
int idx = i + j * mx + k * mx * my;
mesh->vertex_buffer[VTXBUF_UUX][idx] = 2*AcReal(sin(j * AcReal(M_PI) / mx)) - 1;
}
}
}
break;
case INIT_TYPE_VEDGE:
acmesh_clear(mesh);
inflow_vedge_freefall(mesh);
break;
case INIT_TYPE_VEDGEX:
acmesh_clear(mesh);
inflow_freefall_x(mesh);
break;
case INIT_TYPE_RAYLEIGH_TAYLOR:
acmesh_clear(mesh);
inflow_freefall_x(mesh);
lnrho_step(mesh);
break;
case INIT_TYPE_ABC_FLOW: {
acmesh_clear(mesh);
acmesh_init_to(INIT_TYPE_RANDOM, mesh);
for (int k = nz_min; k < nz_max; k++) {
for (int j = ny_min; j < ny_max; j++) {
for (int i = nx_min; i < nx_max; i++) {
const int idx = i + j * mx + k * mx * my;
/*
const double xx = double(
mesh->info.real_params[AC_dsx] *
(i - mesh->info.int_params[AC_nx_min]) -
XORIG + AcReal(.5) * mesh->info.real_params[AC_dsx]);
const double yy = double(
mesh->info.real_params[AC_dsy] *
(j - mesh->info.int_params[AC_ny_min]) -
YORIG + AcReal(.5) * mesh->info.real_params[AC_dsy]);
const double zz = double(
mesh->info.real_params[AC_dsz] *
(k - mesh->info.int_params[AC_nz_min]) -
ZORIG + AcReal(.5) * mesh->info.real_params[AC_dsz]);
*/
const AcReal xx = (i - nx_min) * mesh->info.real_params[AC_dsx] - XORIG;
const AcReal yy = (j - ny_min) * mesh->info.real_params[AC_dsy] - YORIG;
const AcReal zz = (k - nz_min) * mesh->info.real_params[AC_dsz] - ZORIG;
const AcReal ampl_uu = 0.5;
const AcReal ABC_A = 1.;
const AcReal ABC_B = 1.;
const AcReal ABC_C = 1.;
const AcReal kx_uu = 8.;
const AcReal ky_uu = 8.;
const AcReal kz_uu = 8.;
mesh->vertex_buffer[VTXBUF_UUX][idx] = ampl_uu * (ABC_A * (AcReal)sin(kz_uu * zz) + ABC_C * (AcReal)cos(ky_uu * yy));
mesh->vertex_buffer[VTXBUF_UUY][idx] = ampl_uu * (ABC_B * (AcReal)sin(kx_uu * xx) + ABC_A * (AcReal)cos(kz_uu * zz));
mesh->vertex_buffer[VTXBUF_UUZ][idx] = ampl_uu * (ABC_C * (AcReal)sin(ky_uu * yy) + ABC_B * (AcReal)cos(kx_uu * xx));
}
}
}
break;
}
case INIT_TYPE_RAYLEIGH_BENARD: {
acmesh_init_to(INIT_TYPE_RANDOM, mesh);
#if LTEMPERATURE
vertex_buffer_set(VTXBUF_LNRHO, 1, mesh);
const AcReal range = AcReal(0.9);
for (int k = nz_min; k < nz_max; k++) {
for (int j = ny_min; j < ny_max; j++) {
for (int i = nx_min; i < nx_max; i++) {
const int idx = i + j * mx + k * mx * my;
mesh->vertex_buffer[VTXBUF_TEMPERATURE][idx] = (range * (k - nz_min)) / mesh->info.int_params[AC_nz] + 0.1;
}
}
}
#else
WARNING("INIT_TYPE_RAYLEIGH_BERNARD called even though VTXBUF_TEMPERATURE is not used");
#endif
break;
}
default:
ERROR("Unknown init_type");
}
AcReal max_val = AcReal(-1e-32);
AcReal min_val = AcReal(1e32);
// Normalize the grid
for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w) {
for (int i = 0; i < n; ++i) {
if (mesh->vertex_buffer[w][i] < min_val)
min_val = mesh->vertex_buffer[w][i];
if (mesh->vertex_buffer[w][i] > max_val)
max_val = mesh->vertex_buffer[w][i];
}
}
printf("MAX: %f MIN %f\n", double(max_val), double(min_val));
/*
const AcReal inv_range = AcReal(1.) / fabs(max_val - min_val);
for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w) {
for (int i = 0; i < n; ++i) {
mesh->vertex_buffer[w][i] = 2*inv_range*(mesh->vertex_buffer[w][i] - min_val) - 1;
}
}
*/
}
void
acmesh_destroy(AcMesh* mesh)
{
for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i)
free(mesh->vertex_buffer[VertexBufferHandle(i)]);
free(mesh);
}
ModelMesh*
modelmesh_create(const AcMeshInfo& mesh_info)
{
ModelMesh* mesh = (ModelMesh*)malloc(sizeof(*mesh));
mesh->info = mesh_info;
const size_t bytes = AC_VTXBUF_SIZE(mesh->info) * sizeof(mesh->vertex_buffer[0][0]);
for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) {
mesh->vertex_buffer[VertexBufferHandle(i)] = (ModelScalar*)malloc(bytes);
ERRCHK(mesh->vertex_buffer[VertexBufferHandle(i)] != NULL);
}
return mesh;
}
void
modelmesh_destroy(ModelMesh* mesh)
{
for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i)
free(mesh->vertex_buffer[VertexBufferHandle(i)]);
free(mesh);
}
#include <string.h> // memcpy
void
acmesh_to_modelmesh(const AcMesh& acmesh, ModelMesh* modelmesh)
{
ERRCHK(sizeof(acmesh.info) == sizeof(modelmesh->info));
memcpy(&modelmesh->info, &acmesh.info, sizeof(acmesh.info));
for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i)
for (size_t j = 0; j < AC_VTXBUF_SIZE(acmesh.info); ++j)
modelmesh->vertex_buffer[i][j] = (ModelScalar)acmesh.vertex_buffer[i][j];
}
void
modelmesh_to_acmesh(const ModelMesh& modelmesh, AcMesh* acmesh)
{
ERRCHK(sizeof(acmesh->info) == sizeof(modelmesh.info));
memcpy(&acmesh->info, &modelmesh.info, sizeof(modelmesh.info));
for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i)
for (size_t j = 0; j < AC_VTXBUF_SIZE(modelmesh.info); ++j)
acmesh->vertex_buffer[i][j] = (AcReal)modelmesh.vertex_buffer[i][j];
}

View File

@@ -0,0 +1,58 @@
/*
Copyright (C) 2014-2018, Johannes Pekkilae, Miikka Vaeisalae.
This file is part of Astaroth.
Astaroth is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Astaroth is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* @file
* \brief Brief info.
*
* Detailed info.
*
*/
#pragma once
#include "astaroth.h"
#include "modelmesh.h"
// clang-format off
#define AC_FOR_INIT_TYPES(FUNC)\
FUNC(INIT_TYPE_RANDOM), \
FUNC(INIT_TYPE_XWAVE), \
FUNC(INIT_TYPE_GAUSSIAN_RADIAL_EXPL), \
FUNC(INIT_TYPE_ABC_FLOW) , \
FUNC(INIT_TYPE_VEDGE), \
FUNC(INIT_TYPE_VEDGEX), \
FUNC(INIT_TYPE_RAYLEIGH_TAYLOR), \
FUNC(INIT_TYPE_RAYLEIGH_BENARD)
// clang-format on
typedef enum { AC_FOR_INIT_TYPES(AC_GEN_ID), NUM_INIT_TYPES } InitType;
extern const char* init_type_names[]; // Defined in host_memory.cc
AcMesh* acmesh_create(const AcMeshInfo& mesh_info);
void acmesh_clear(AcMesh* mesh);
void acmesh_init_to(const InitType& type, AcMesh* mesh);
void acmesh_destroy(AcMesh* mesh);
ModelMesh* modelmesh_create(const AcMeshInfo& mesh_info);
void modelmesh_destroy(ModelMesh* mesh);
void acmesh_to_modelmesh(const AcMesh& acmesh, ModelMesh* modelmesh);
void modelmesh_to_acmesh(const ModelMesh& model, AcMesh* acmesh);

View File

@@ -0,0 +1,63 @@
/*
Copyright (C) 2014-2018, Johannes Pekkilae, Miikka Vaeisalae.
This file is part of Astaroth.
Astaroth is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Astaroth is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* @file
* \brief Brief info.
*
* Detailed info.
*
*/
#include "host_timestep.h"
#include "core/math_utils.h"
static AcReal timescale = AcReal(1.0);
AcReal
host_timestep(const AcReal& umax, const AcMeshInfo& mesh_info)
{
const long double cdt = mesh_info.real_params[AC_cdt];
const long double cdtv = mesh_info.real_params[AC_cdtv];
// const long double cdts = mesh_info.real_params[AC_cdts];
const long double cs2_sound = mesh_info.real_params[AC_cs2_sound];
const long double nu_visc = mesh_info.real_params[AC_nu_visc];
const long double eta = mesh_info.real_params[AC_eta];
const long double chi = 0; // mesh_info.real_params[AC_chi]; // TODO not calculated
const long double gamma = mesh_info.real_params[AC_gamma];
const long double dsmin = mesh_info.real_params[AC_dsmin];
// Old ones from legacy Astaroth
//const long double uu_dt = cdt * (dsmin / (umax + cs_sound));
//const long double visc_dt = cdtv * dsmin * dsmin / nu_visc;
// New, closer to the actual Courant timestep
// See Pencil Code user manual p. 38 (timestep section)
const long double uu_dt = cdt * dsmin / (fabsl(umax) + sqrtl(cs2_sound + 0.0l));
const long double visc_dt = cdtv * dsmin * dsmin / max(max(nu_visc, eta), max(gamma, chi)) + 1; // TODO NOTE: comment the +1 out to get scientifically accurate results
const long double dt = min(uu_dt, visc_dt);
return AcReal(timescale) * AcReal(dt);
}
void
set_timescale(const AcReal scale)
{
timescale = scale;
}

View File

@@ -0,0 +1,32 @@
/*
Copyright (C) 2014-2018, Johannes Pekkilae, Miikka Vaeisalae.
This file is part of Astaroth.
Astaroth is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Astaroth is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* @file
* \brief Brief info.
*
* Detailed info.
*
*/
#pragma once
#include "astaroth.h"
AcReal host_timestep(const AcReal& umax, const AcMeshInfo& mesh_info);
void set_timescale(const AcReal scale);

View File

@@ -0,0 +1,487 @@
/*
Copyright (C) 2014-2018, Johannes Pekkilae, Miikka Vaeisalae.
This file is part of Astaroth.
Astaroth is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) amy later version.
Astaroth is distributed in the hope that it will be useful,
but WITHOUT Amy WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* @file
* \brief Brief info.
*
* Detailed info.
*
*/
#include "model_boundconds.h"
#include "core/errchk.h"
void
boundconds(const AcMeshInfo& mesh_info, ModelMesh* mesh)
{
#pragma omp parallel for
for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w) {
const int3 start = (int3){0, 0, 0};
const int3 end = (int3){
mesh_info.int_params[AC_mx],
mesh_info.int_params[AC_my],
mesh_info.int_params[AC_mz]
};
const int nx = mesh_info.int_params[AC_nx];
const int ny = mesh_info.int_params[AC_ny];
const int nz = mesh_info.int_params[AC_nz];
const int nx_min = mesh_info.int_params[AC_nx_min];
const int ny_min = mesh_info.int_params[AC_ny_min];
const int nz_min = mesh_info.int_params[AC_nz_min];
// The old kxt was inclusive, but our mx_max is exclusive
const int nx_max = mesh_info.int_params[AC_nx_max];
const int ny_max = mesh_info.int_params[AC_ny_max];
const int nz_max = mesh_info.int_params[AC_nz_max];
for (int k_dst = start.z; k_dst < end.z; ++k_dst) {
for (int j_dst = start.y; j_dst < end.y; ++j_dst) {
for (int i_dst = start.x; i_dst < end.x; ++i_dst) {
// If destination index is inside the computational domain, return since
// the boundary conditions are only applied to the ghost zones
if (i_dst >= nx_min && i_dst < nx_max &&
j_dst >= ny_min && j_dst < ny_max &&
k_dst >= nz_min && k_dst < nz_max)
continue;
// Find the source index
// Map to nx, ny, nz coordinates
int i_src = i_dst - nx_min;
int j_src = j_dst - ny_min;
int k_src = k_dst - nz_min;
// Translate (s.t. the index is always positive)
i_src += nx;
j_src += ny;
k_src += nz;
// Wrap
i_src %= nx;
j_src %= ny;
k_src %= nz;
// Map to mx, my, mz coordinates
i_src += nx_min;
j_src += ny_min;
k_src += nz_min;
const size_t src_idx = AC_VTXBUF_IDX(i_src, j_src, k_src, mesh_info);
const size_t dst_idx = AC_VTXBUF_IDX(i_dst, j_dst, k_dst, mesh_info);
ERRCHK(src_idx < AC_VTXBUF_SIZE(mesh_info));
ERRCHK(dst_idx < AC_VTXBUF_SIZE(mesh_info));
mesh->vertex_buffer[w][dst_idx] = mesh->vertex_buffer[w][src_idx];
}
}
}
}
}
#if 0
void
boundconds(const AcMeshInfo& mesh_info, ModelMesh* mesh)
{
const int mx = mesh_info.int_params[AC_mx];
const int my = mesh_info.int_params[AC_my];
const int mz = mesh_info.int_params[AC_mz];
// Volatile here suppresses the warning about strict-overflow (i.e. compiler
// wanted to optimize these loops by assuming that kxb etc never overflow)
// However we do not need the performance improvement (~1-3%) and it's
// not either good to
// a) get useless warnings originating from here
// b) disable the warnings completely
volatile const int kxb = mesh_info.int_params[AC_nx_min];
volatile const int kyb = mesh_info.int_params[AC_ny_min];
volatile const int kzb = mesh_info.int_params[AC_nz_min];
// The old kxt was inclusive, but our mx_max is exclusive
volatile const int kxt = mesh_info.int_params[AC_nx_max] - 1;
volatile const int kyt = mesh_info.int_params[AC_ny_max] - 1;
volatile const int kzt = mesh_info.int_params[AC_nz_max] - 1;
const int bound[3] = {0, 0, 0};
// Periodic boundary conditions
if (bound[0] == 0) {
for (int k = kzb; k <= kzt; k++) {
for (int j = kyb; j <= kyt; j++) {
for (int i = kxb; i <= kxb + 2; i++) {
const int inds = i + j * mx + k * mx * my;
const int indr = (kxt + i - 2) + j * mx + k * mx * my;
for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w)
mesh->vertex_buffer[w]
[indr] = mesh->vertex_buffer[w]
[inds];
}
for (int i = kxt - 2; i <= kxt; i++) {
const int inds = i + j * mx + k * mx * my;
const int indr = (i - kxt + 2) + j * mx + k * mx * my;
for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w)
mesh->vertex_buffer[w]
[indr] = mesh->vertex_buffer[w]
[inds];
}
}
}
}
if (bound[1] == 0) {
for (int k = kzb; k <= kzt; k++) {
for (int i = kxb; i <= kxt; i++) {
for (int j = kyb; j <= kyb + 2; j++) {
const int inds = i + j * mx + k * mx * my;
const int indr = i + (kyt + j - 2) * mx + k * mx * my;
for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w)
mesh->vertex_buffer[w]
[indr] = mesh->vertex_buffer[w]
[inds];
}
for (int j = kyt - 2; j <= kyt; j++) {
const int inds = i + j * mx + k * mx * my;
const int indr = i + (j - kyt + 2) * mx + k * mx * my;
for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w)
mesh->vertex_buffer[w]
[indr] = mesh->vertex_buffer[w]
[inds];
}
}
}
}
if (bound[2] == 0) {
for (int i = kxb; i <= kxt; i++) {
for (int j = kyb; j <= kyt; j++) {
for (int k = kzb; k <= kzb + 2; k++) {
const int inds = i + j * mx + k * mx * my;
const int indr = i + j * mx + (kzt + k - 2) * mx * my;
for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w)
mesh->vertex_buffer[w]
[indr] = mesh->vertex_buffer[w]
[inds];
}
for (int k = kzt - 2; k <= kzt; k++) {
const int inds = i + j * mx + k * mx * my;
const int indr = i + j * mx + (k - kzt + 2) * mx * my;
for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w)
mesh->vertex_buffer[w]
[indr] = mesh->vertex_buffer[w]
[inds];
}
}
}
}
// Copy the corners in the fully periodic case
if (bound[0] == 0 && bound[1] == 0 && bound[2] == 0) {
// Source corner: x=0, y=0, z=0
for (int i = kxb; i <= kxb + 2; i++) {
for (int j = kyb; j <= kyb + 2; j++) {
for (int k = kzb; k <= kzb + 2; k++) {
const int inds = i + j * mx + k * mx * my;
const int indr = (i + mx - STENCIL_ORDER) + (j + my - STENCIL_ORDER) * mx +
(k + mz - STENCIL_ORDER) * mx * my;
for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w)
mesh->vertex_buffer[w]
[indr] = mesh->vertex_buffer[w]
[inds];
}
}
}
// Source corner: x=1, y=0, z=0
for (int i = kxt - 2; i <= kxt; i++) {
for (int j = kyb; j <= kyb + 2; j++) {
for (int k = kzb; k <= kzb + 2; k++) {
const int inds = i + j * mx + k * mx * my;
const int indr = (i - mx + STENCIL_ORDER) + (j + my - STENCIL_ORDER) * mx +
(k + mz - STENCIL_ORDER) * mx * my;
for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w)
mesh->vertex_buffer[w]
[indr] = mesh->vertex_buffer[w]
[inds];
}
}
}
// Source corner: x=0, y=1, z=0
for (int i = kxb; i <= kxb + 2; i++) {
for (int j = kyt - 2; j <= kyt; j++) {
for (int k = kzb; k <= kzb + 2; k++) {
const int inds = i + j * mx + k * mx * my;
const int indr = (i + mx - STENCIL_ORDER) + (j - my + STENCIL_ORDER) * mx +
(k + mz - STENCIL_ORDER) * mx * my;
for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w)
mesh->vertex_buffer[w]
[indr] = mesh->vertex_buffer[w]
[inds];
}
}
}
// Source corner: x=0, y=0, z=1
for (int i = kxb; i <= kxb + 2; i++) {
for (int j = kyb; j <= kyb + 2; j++) {
for (int k = kzt - 2; k <= kzt; k++) {
const int inds = i + j * mx + k * mx * my;
const int indr = (i + mx - STENCIL_ORDER) + (j + my - STENCIL_ORDER) * mx +
(k - mz + STENCIL_ORDER) * mx * my;
for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w)
mesh->vertex_buffer[w]
[indr] = mesh->vertex_buffer[w]
[inds];
}
}
}
// Source corner: x=1, y=1, z=0
for (int i = kxt - 2; i <= kxt; i++) {
for (int j = kyt - 2; j <= kyt; j++) {
for (int k = kzb; k <= kzb + 2; k++) {
const int inds = i + j * mx + k * mx * my;
const int indr = (i - mx + STENCIL_ORDER) + (j - my + STENCIL_ORDER) * mx +
(k + mz - STENCIL_ORDER) * mx * my;
for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w)
mesh->vertex_buffer[w]
[indr] = mesh->vertex_buffer[w]
[inds];
}
}
}
// Source corner: x=1, y=0, z=1
for (int i = kxt - 2; i <= kxt; i++) {
for (int j = kyb; j <= kyb + 2; j++) {
for (int k = kzt - 2; k <= kzt; k++) {
const int inds = i + j * mx + k * mx * my;
const int indr = (i - mx + STENCIL_ORDER) + (j + my - STENCIL_ORDER) * mx +
(k - mz + STENCIL_ORDER) * mx * my;
for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w)
mesh->vertex_buffer[w]
[indr] = mesh->vertex_buffer[w]
[inds];
}
}
}
// Source corner: x=0, y=1, z=1
for (int i = kxb; i <= kxb + 2; i++) {
for (int j = kyt - 2; j <= kyt; j++) {
for (int k = kzt - 2; k <= kzt; k++) {
const int inds = i + j * mx + k * mx * my;
const int indr = (i + mx - STENCIL_ORDER) + (j - my + STENCIL_ORDER) * mx +
(k - mz + STENCIL_ORDER) * mx * my;
for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w)
mesh->vertex_buffer[w]
[indr] = mesh->vertex_buffer[w]
[inds];
}
}
}
// Source corner: x=1, y=1, z=1
for (int i = kxt - 2; i <= kxt; i++) {
for (int j = kyt - 2; j <= kyt; j++) {
for (int k = kzt - 2; k <= kzt; k++) {
const int inds = i + j * mx + k * mx * my;
const int indr = (i - mx + STENCIL_ORDER) + (j - my + STENCIL_ORDER) * mx +
(k - mz + STENCIL_ORDER) * mx * my;
for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w)
mesh->vertex_buffer[w]
[indr] = mesh->vertex_buffer[w]
[inds];
}
}
}
}
else {
ERROR("ONLY FULLY PERIODIC WORKS WITH CORNERS SO FAR! \n");
}
// Copy the edges in the fully periodic case
if (bound[0] == 0 && bound[1] == 0 && bound[2] == 0) {
// Source edge: x = 0, y = 0
for (int i = kxb; i <= kxb + 2; i++) {
for (int j = kyb; j <= kyb + 2; j++) {
for (int k = kzb; k <= kzt; k++) {
const int inds = i + j * mx + k * mx * my;
const int indr = (i + mx - STENCIL_ORDER) + (j + my - STENCIL_ORDER) * mx +
k * mx * my;
for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w)
mesh->vertex_buffer[w]
[indr] = mesh->vertex_buffer[w]
[inds];
}
}
}
// Source edge: x = 1, y = 0
for (int i = kxt - 2; i <= kxt; i++) {
for (int j = kyb; j <= kyb + 2; j++) {
for (int k = kzb; k <= kzt; k++) {
const int inds = i + j * mx + k * mx * my;
const int indr = (i - mx + STENCIL_ORDER) + (j + my - STENCIL_ORDER) * mx +
k * mx * my;
for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w)
mesh->vertex_buffer[w]
[indr] = mesh->vertex_buffer[w]
[inds];
}
}
}
// Source edge: x = 0, y = 1
for (int i = kxb; i <= kxb + 2; i++) {
for (int j = kyt - 2; j <= kyt; j++) {
for (int k = kzb; k <= kzt; k++) {
const int inds = i + j * mx + k * mx * my;
const int indr = (i + mx - STENCIL_ORDER) + (j - my + STENCIL_ORDER) * mx +
k * mx * my;
for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w)
mesh->vertex_buffer[w]
[indr] = mesh->vertex_buffer[w]
[inds];
}
}
}
// Source edge: x = 1, y = 1
for (int i = kxt - 2; i <= kxt; i++) {
for (int j = kyt - 2; j <= kyt; j++) {
for (int k = kzb; k <= kzt; k++) {
const int inds = i + j * mx + k * mx * my;
const int indr = (i - mx + STENCIL_ORDER) + (j - my + STENCIL_ORDER) * mx +
k * mx * my;
for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w)
mesh->vertex_buffer[w]
[indr] = mesh->vertex_buffer[w]
[inds];
}
}
}
// Source edge: x = 0, z = 0
for (int i = kxb; i <= kxb + 2; i++) {
for (int j = kyb; j <= kyt; j++) {
for (int k = kzb; k <= kzb + 2; k++) {
const int inds = i + j * mx + k * mx * my;
const int indr = (i + mx - STENCIL_ORDER) + j * mx +
(k + mz - STENCIL_ORDER) * mx * my;
for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w)
mesh->vertex_buffer[w]
[indr] = mesh->vertex_buffer[w]
[inds];
}
}
}
// Source edge: x = 1, z = 0
for (int i = kxt - 2; i <= kxt; i++) {
for (int j = kyb; j <= kyt; j++) {
for (int k = kzb; k <= kzb + 2; k++) {
const int inds = i + j * mx + k * mx * my;
const int indr = (i - mx + STENCIL_ORDER) + j * mx +
(k + mz - STENCIL_ORDER) * mx * my;
for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w)
mesh->vertex_buffer[w]
[indr] = mesh->vertex_buffer[w]
[inds];
}
}
}
// Source edge: x = 0, z = 1
for (int i = kxb; i <= kxb + 2; i++) {
for (int j = kyb; j <= kyt; j++) {
for (int k = kzt - 2; k <= kzt; k++) {
const int inds = i + j * mx + k * mx * my;
const int indr = (i + mx - STENCIL_ORDER) + j * mx +
(k - mz + STENCIL_ORDER) * mx * my;
for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w)
mesh->vertex_buffer[w]
[indr] = mesh->vertex_buffer[w]
[inds];
}
}
}
// Source edge: x = 1, z = 1
for (int i = kxt - 2; i <= kxt; i++) {
for (int j = kyb; j <= kyt; j++) {
for (int k = kzt - 2; k <= kzt; k++) {
const int inds = i + j * mx + k * mx * my;
const int indr = (i - mx + STENCIL_ORDER) + j * mx +
(k - mz + STENCIL_ORDER) * mx * my;
for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w)
mesh->vertex_buffer[w]
[indr] = mesh->vertex_buffer[w]
[inds];
}
}
}
// Source edge: y = 0, z = 0
for (int i = kxb; i <= kxt; i++) {
for (int j = kyb; j <= kyb + 2; j++) {
for (int k = kzb; k <= kzb + 2; k++) {
const int inds = i + j * mx + k * mx * my;
const int indr = i + (j + my - STENCIL_ORDER) * mx +
(k + mz - STENCIL_ORDER) * mx * my;
for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w)
mesh->vertex_buffer[w]
[indr] = mesh->vertex_buffer[w]
[inds];
}
}
}
// Source edge: y = 1, z = 0
for (int i = kxb; i <= kxt; i++) {
for (int j = kyt - 2; j <= kyt; j++) {
for (int k = kzb; k <= kzb + 2; k++) {
const int inds = i + j * mx + k * mx * my;
const int indr = i + (j - my + STENCIL_ORDER) * mx +
(k + mz - STENCIL_ORDER) * mx * my;
for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w)
mesh->vertex_buffer[w]
[indr] = mesh->vertex_buffer[w]
[inds];
}
}
}
// Source edge: y = 0, z = 1
for (int i = kxb; i <= kxt; i++) {
for (int j = kyb; j <= kyb + 2; j++) {
for (int k = kzt - 2; k <= kzt; k++) {
const int inds = i + j * mx + k * mx * my;
const int indr = i + (j + my - STENCIL_ORDER) * mx +
(k - mz + STENCIL_ORDER) * mx * my;
for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w)
mesh->vertex_buffer[w]
[indr] = mesh->vertex_buffer[w]
[inds];
}
}
}
// Source edge: y = 1, z = 1
for (int i = kxb; i <= kxt; i++) {
for (int j = kyt - 2; j <= kyt; j++) {
for (int k = kzt - 2; k <= kzt; k++) {
const int inds = i + j * mx + k * mx * my;
const int indr = i + (j - my + STENCIL_ORDER) * mx +
(k - mz + STENCIL_ORDER) * mx * my;
for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w)
mesh->vertex_buffer[w]
[indr] = mesh->vertex_buffer[w]
[inds];
}
}
}
}
else {
ERROR("ONLY FULLY PERIODIC WORKS WITH EDGES SO FAR! \n");
}
}
#endif

View File

@@ -0,0 +1,31 @@
/*
Copyright (C) 2014-2018, Johannes Pekkilae, Miikka Vaeisalae.
This file is part of Astaroth.
Astaroth is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Astaroth is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* @file
* \brief Brief info.
*
* Detailed info.
*
*/
#pragma once
#include "astaroth.h"
#include "modelmesh.h"
void boundconds(const AcMeshInfo& mesh_info, ModelMesh* mesh);

View File

@@ -0,0 +1,353 @@
/*
Copyright (C) 2014-2018, Johannes Pekkilae, Miikka Vaeisalae.
This file is part of Astaroth.
Astaroth is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Astaroth is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* @file
* \brief Brief info.
*
* Detailed info.
*
*/
#pragma once
#include "core/errchk.h"
typedef long double MODEL_REAL;
typedef enum { AXIS_X, AXIS_Y, AXIS_Z, NUM_AXIS_TYPES } AxisType;
template <AxisType axis>
static inline MODEL_REAL
der_scal(const int& i, const int& j, const int& k, const AcMeshInfo& mesh_info,
const MODEL_REAL* scal)
{
MODEL_REAL f0, f1, f2, f4, f5, f6;
MODEL_REAL ds;
switch (axis) {
case AXIS_X:
f0 = scal[AC_VTXBUF_IDX(i - 3, j, k, mesh_info)];
f1 = scal[AC_VTXBUF_IDX(i - 2, j, k, mesh_info)];
f2 = scal[AC_VTXBUF_IDX(i - 1, j, k, mesh_info)];
f4 = scal[AC_VTXBUF_IDX(i + 1, j, k, mesh_info)];
f5 = scal[AC_VTXBUF_IDX(i + 2, j, k, mesh_info)];
f6 = scal[AC_VTXBUF_IDX(i + 3, j, k, mesh_info)];
ds = mesh_info.real_params[AC_dsx];
break;
case AXIS_Y:
f0 = scal[AC_VTXBUF_IDX(i, j - 3, k, mesh_info)];
f1 = scal[AC_VTXBUF_IDX(i, j - 2, k, mesh_info)];
f2 = scal[AC_VTXBUF_IDX(i, j - 1, k, mesh_info)];
f4 = scal[AC_VTXBUF_IDX(i, j + 1, k, mesh_info)];
f5 = scal[AC_VTXBUF_IDX(i, j + 2, k, mesh_info)];
f6 = scal[AC_VTXBUF_IDX(i, j + 3, k, mesh_info)];
ds = mesh_info.real_params[AC_dsy];
break;
case AXIS_Z:
f0 = scal[AC_VTXBUF_IDX(i, j, k - 3, mesh_info)];
f1 = scal[AC_VTXBUF_IDX(i, j, k - 2, mesh_info)];
f2 = scal[AC_VTXBUF_IDX(i, j, k - 1, mesh_info)];
f4 = scal[AC_VTXBUF_IDX(i, j, k + 1, mesh_info)];
f5 = scal[AC_VTXBUF_IDX(i, j, k + 2, mesh_info)];
f6 = scal[AC_VTXBUF_IDX(i, j, k + 3, mesh_info)];
ds = mesh_info.real_params[AC_dsz];
break;
default:
ERROR("Unknown axis type");
}
return ((f6 - f0) + MODEL_REAL(-9.) * (f5 - f1) + MODEL_REAL(45.) * (f4 - f2)) /
(MODEL_REAL(60.) * ds);
}
template <AxisType axis>
static inline MODEL_REAL
der2_scal(const int& i, const int& j, const int& k, const AcMeshInfo& mesh_info,
const MODEL_REAL* scal)
{
MODEL_REAL f0, f1, f2, f3, f4, f5, f6;
MODEL_REAL ds;
f3 = scal[AC_VTXBUF_IDX(i, j, k, mesh_info)];
switch (axis) {
case AXIS_X:
f0 = scal[AC_VTXBUF_IDX(i - 3, j, k, mesh_info)];
f1 = scal[AC_VTXBUF_IDX(i - 2, j, k, mesh_info)];
f2 = scal[AC_VTXBUF_IDX(i - 1, j, k, mesh_info)];
f4 = scal[AC_VTXBUF_IDX(i + 1, j, k, mesh_info)];
f5 = scal[AC_VTXBUF_IDX(i + 2, j, k, mesh_info)];
f6 = scal[AC_VTXBUF_IDX(i + 3, j, k, mesh_info)];
ds = mesh_info.real_params[AC_dsx];
break;
case AXIS_Y:
f0 = scal[AC_VTXBUF_IDX(i, j - 3, k, mesh_info)];
f1 = scal[AC_VTXBUF_IDX(i, j - 2, k, mesh_info)];
f2 = scal[AC_VTXBUF_IDX(i, j - 1, k, mesh_info)];
f4 = scal[AC_VTXBUF_IDX(i, j + 1, k, mesh_info)];
f5 = scal[AC_VTXBUF_IDX(i, j + 2, k, mesh_info)];
f6 = scal[AC_VTXBUF_IDX(i, j + 3, k, mesh_info)];
ds = mesh_info.real_params[AC_dsy];
break;
case AXIS_Z:
f0 = scal[AC_VTXBUF_IDX(i, j, k - 3, mesh_info)];
f1 = scal[AC_VTXBUF_IDX(i, j, k - 2, mesh_info)];
f2 = scal[AC_VTXBUF_IDX(i, j, k - 1, mesh_info)];
f4 = scal[AC_VTXBUF_IDX(i, j, k + 1, mesh_info)];
f5 = scal[AC_VTXBUF_IDX(i, j, k + 2, mesh_info)];
f6 = scal[AC_VTXBUF_IDX(i, j, k + 3, mesh_info)];
ds = mesh_info.real_params[AC_dsz];
break;
default:
ERROR("Unknown axis type");
}
return (MODEL_REAL(2.) * (f0 + f6) + MODEL_REAL(-27.) * (f1 + f5) +
MODEL_REAL(270.) * (f2 + f4) + MODEL_REAL(-490.) * f3) /
(MODEL_REAL(180.) * ds * ds);
}
static MODEL_REAL
laplace_scal(const int& i, const int& j, const int& k,
const AcMeshInfo& mesh_info, const MODEL_REAL* scal)
{
return der2_scal<AXIS_X>(i, j, k, mesh_info, scal) +
der2_scal<AXIS_Y>(i, j, k, mesh_info, scal) +
der2_scal<AXIS_Z>(i, j, k, mesh_info, scal);
}
static void
laplace_vec(const int& i, const int& j, const int& k,
const AcMeshInfo& mesh_info, const MODEL_REAL* vec_x,
const MODEL_REAL* vec_y, const MODEL_REAL* vec_z, MODEL_REAL* laplace_x,
MODEL_REAL* laplace_y, MODEL_REAL* laplace_z)
{
*laplace_x = laplace_scal(i, j, k, mesh_info, vec_x);
*laplace_y = laplace_scal(i, j, k, mesh_info, vec_y);
*laplace_z = laplace_scal(i, j, k, mesh_info, vec_z);
}
static MODEL_REAL
div_vec(const int& i, const int& j, const int& k, const AcMeshInfo& mesh_info,
const MODEL_REAL* vec_x, const MODEL_REAL* vec_y, const MODEL_REAL* vec_z)
{
return der_scal<AXIS_X>(i, j, k, mesh_info, vec_x) +
der_scal<AXIS_Y>(i, j, k, mesh_info, vec_y) +
der_scal<AXIS_Z>(i, j, k, mesh_info, vec_z);
}
static void
grad(const int& i, const int& j, const int& k, const AcMeshInfo& mesh_info,
const MODEL_REAL* scal, MODEL_REAL* res_x, MODEL_REAL* res_y, MODEL_REAL* res_z)
{
*res_x = der_scal<AXIS_X>(i, j, k, mesh_info, scal);
*res_y = der_scal<AXIS_Y>(i, j, k, mesh_info, scal);
*res_z = der_scal<AXIS_Z>(i, j, k, mesh_info, scal);
}
static MODEL_REAL
vec_dot_nabla_scal(const int& i, const int& j, const int& k,
const AcMeshInfo& mesh_info, const MODEL_REAL* vec_x,
const MODEL_REAL* vec_y, const MODEL_REAL* vec_z, const MODEL_REAL* scal)
{
const int idx = AC_VTXBUF_IDX(i, j, k, mesh_info);
MODEL_REAL ddx_scal, ddy_scal, ddz_scal;
grad(i, j, k, mesh_info, scal, &ddx_scal, &ddy_scal, &ddz_scal);
return vec_x[idx] * ddx_scal + vec_y[idx] * ddy_scal +
vec_z[idx] * ddz_scal;
}
/*
* =============================================================================
* Viscosity
* =============================================================================
*/
typedef enum { DERNM_XY, DERNM_YZ, DERNM_XZ } DernmType;
template <DernmType dernm>
static MODEL_REAL
dernm_scal(const int& i, const int& j, const int& k,
const AcMeshInfo& mesh_info, const MODEL_REAL* scal)
{
MODEL_REAL fac;
const MODEL_REAL dsx = mesh_info.real_params[AC_dsx];
const MODEL_REAL dsy = mesh_info.real_params[AC_dsy];
const MODEL_REAL dsz = mesh_info.real_params[AC_dsz];
MODEL_REAL f_p1_p1, f_m1_p1, f_m1_m1, f_p1_m1;
MODEL_REAL f_p2_p2, f_m2_p2, f_m2_m2, f_p2_m2;
MODEL_REAL f_p3_p3, f_m3_p3, f_m3_m3, f_p3_m3;
switch (dernm) {
case DERNM_XY:
fac = MODEL_REAL(1. / 720.) * (MODEL_REAL(1.) / dsx) * (MODEL_REAL(1.) / dsy);
f_p1_p1 = scal[AC_VTXBUF_IDX(i + 1, j + 1, k, mesh_info)];
f_m1_p1 = scal[AC_VTXBUF_IDX(i - 1, j + 1, k, mesh_info)];
f_m1_m1 = scal[AC_VTXBUF_IDX(i - 1, j - 1, k, mesh_info)];
f_p1_m1 = scal[AC_VTXBUF_IDX(i + 1, j - 1, k, mesh_info)];
f_p2_p2 = scal[AC_VTXBUF_IDX(i + 2, j + 2, k, mesh_info)];
f_m2_p2 = scal[AC_VTXBUF_IDX(i - 2, j + 2, k, mesh_info)];
f_m2_m2 = scal[AC_VTXBUF_IDX(i - 2, j - 2, k, mesh_info)];
f_p2_m2 = scal[AC_VTXBUF_IDX(i + 2, j - 2, k, mesh_info)];
f_p3_p3 = scal[AC_VTXBUF_IDX(i + 3, j + 3, k, mesh_info)];
f_m3_p3 = scal[AC_VTXBUF_IDX(i - 3, j + 3, k, mesh_info)];
f_m3_m3 = scal[AC_VTXBUF_IDX(i - 3, j - 3, k, mesh_info)];
f_p3_m3 = scal[AC_VTXBUF_IDX(i + 3, j - 3, k, mesh_info)];
break;
case DERNM_YZ:
// NOTE this is a bit different from the old one, second is j+1k-1
// instead of j-1,k+1
fac = MODEL_REAL(1. / 720.) * (MODEL_REAL(1.) / dsy) * (MODEL_REAL(1.) / dsz);
f_p1_p1 = scal[AC_VTXBUF_IDX(i, j + 1, k + 1, mesh_info)];
f_m1_p1 = scal[AC_VTXBUF_IDX(i, j - 1, k + 1, mesh_info)];
f_m1_m1 = scal[AC_VTXBUF_IDX(i, j - 1, k - 1, mesh_info)];
f_p1_m1 = scal[AC_VTXBUF_IDX(i, j + 1, k - 1, mesh_info)];
f_p2_p2 = scal[AC_VTXBUF_IDX(i, j + 2, k + 2, mesh_info)];
f_m2_p2 = scal[AC_VTXBUF_IDX(i, j - 2, k + 2, mesh_info)];
f_m2_m2 = scal[AC_VTXBUF_IDX(i, j - 2, k - 2, mesh_info)];
f_p2_m2 = scal[AC_VTXBUF_IDX(i, j + 2, k - 2, mesh_info)];
f_p3_p3 = scal[AC_VTXBUF_IDX(i, j + 3, k + 3, mesh_info)];
f_m3_p3 = scal[AC_VTXBUF_IDX(i, j - 3, k + 3, mesh_info)];
f_m3_m3 = scal[AC_VTXBUF_IDX(i, j - 3, k - 3, mesh_info)];
f_p3_m3 = scal[AC_VTXBUF_IDX(i, j + 3, k - 3, mesh_info)];
break;
case DERNM_XZ:
fac = MODEL_REAL(1. / 720.) * (MODEL_REAL(1.) / dsx) * (MODEL_REAL(1.) / dsz);
f_p1_p1 = scal[AC_VTXBUF_IDX(i + 1, j, k + 1, mesh_info)];
f_m1_p1 = scal[AC_VTXBUF_IDX(i - 1, j, k + 1, mesh_info)];
f_m1_m1 = scal[AC_VTXBUF_IDX(i - 1, j, k - 1, mesh_info)];
f_p1_m1 = scal[AC_VTXBUF_IDX(i + 1, j, k - 1, mesh_info)];
f_p2_p2 = scal[AC_VTXBUF_IDX(i + 2, j, k + 2, mesh_info)];
f_m2_p2 = scal[AC_VTXBUF_IDX(i - 2, j, k + 2, mesh_info)];
f_m2_m2 = scal[AC_VTXBUF_IDX(i - 2, j, k - 2, mesh_info)];
f_p2_m2 = scal[AC_VTXBUF_IDX(i + 2, j, k - 2, mesh_info)];
f_p3_p3 = scal[AC_VTXBUF_IDX(i + 3, j, k + 3, mesh_info)];
f_m3_p3 = scal[AC_VTXBUF_IDX(i - 3, j, k + 3, mesh_info)];
f_m3_m3 = scal[AC_VTXBUF_IDX(i - 3, j, k - 3, mesh_info)];
f_p3_m3 = scal[AC_VTXBUF_IDX(i + 3, j, k - 3, mesh_info)];
break;
default:
ERROR("Invalid dernm type");
}
return fac * (MODEL_REAL(270.) * (f_p1_p1 - f_m1_p1 + f_m1_m1 - f_p1_m1) -
MODEL_REAL(27.) * (f_p2_p2 - f_m2_p2 + f_m2_m2 - f_p2_m2) +
MODEL_REAL(2.) * (f_p3_p3 - f_m3_p3 + f_m3_m3 - f_p3_m3));
}
static void
grad_div_vec(const int& i, const int& j, const int& k,
const AcMeshInfo& mesh_info, const MODEL_REAL* vec_x,
const MODEL_REAL* vec_y, const MODEL_REAL* vec_z, MODEL_REAL* gdvx,
MODEL_REAL* gdvy, MODEL_REAL* gdvz)
{
*gdvx = der2_scal<AXIS_X>(i, j, k, mesh_info, vec_x) +
dernm_scal<DERNM_XY>(i, j, k, mesh_info, vec_y) +
dernm_scal<DERNM_XZ>(i, j, k, mesh_info, vec_z);
*gdvy = dernm_scal<DERNM_XY>(i, j, k, mesh_info, vec_x) +
der2_scal<AXIS_Y>(i, j, k, mesh_info, vec_y) +
dernm_scal<DERNM_YZ>(i, j, k, mesh_info, vec_z);
*gdvz = dernm_scal<DERNM_XZ>(i, j, k, mesh_info, vec_x) +
dernm_scal<DERNM_YZ>(i, j, k, mesh_info, vec_y) +
der2_scal<AXIS_Z>(i, j, k, mesh_info, vec_z);
}
static void
S_grad_lnrho(const int& i, const int& j, const int& k,
const AcMeshInfo& mesh_info, const MODEL_REAL* vec_x,
const MODEL_REAL* vec_y, const MODEL_REAL* vec_z, const MODEL_REAL* lnrho,
MODEL_REAL* sgrhox, MODEL_REAL* sgrhoy, MODEL_REAL* sgrhoz)
{
const MODEL_REAL c23 = MODEL_REAL(2. / 3.);
const MODEL_REAL c13 = MODEL_REAL(1. / 3.);
const MODEL_REAL Sxx = c23 * der_scal<AXIS_X>(i, j, k, mesh_info, vec_x) -
c13 * (der_scal<AXIS_Y>(i, j, k, mesh_info, vec_y) +
der_scal<AXIS_Z>(i, j, k, mesh_info, vec_z));
const MODEL_REAL Sxy = MODEL_REAL(.5) *
(der_scal<AXIS_Y>(i, j, k, mesh_info, vec_x) +
der_scal<AXIS_X>(i, j, k, mesh_info, vec_y));
const MODEL_REAL Sxz = MODEL_REAL(.5) *
(der_scal<AXIS_Z>(i, j, k, mesh_info, vec_x) +
der_scal<AXIS_X>(i, j, k, mesh_info, vec_z));
const MODEL_REAL Syx = Sxy;
const MODEL_REAL Syy = c23 * der_scal<AXIS_Y>(i, j, k, mesh_info, vec_y) -
c13 * (der_scal<AXIS_X>(i, j, k, mesh_info, vec_x) +
der_scal<AXIS_Z>(i, j, k, mesh_info, vec_z));
const MODEL_REAL Syz = MODEL_REAL(.5) *
(der_scal<AXIS_Z>(i, j, k, mesh_info, vec_y) +
der_scal<AXIS_Y>(i, j, k, mesh_info, vec_z));
const MODEL_REAL Szx = Sxz;
const MODEL_REAL Szy = Syz;
const MODEL_REAL Szz = c23 *
der_scal<AXIS_Z>(
i, j, k, mesh_info,
vec_z) // replaced from "c23*der_scal<AXIS_Z>(i,
// j, k, mesh_info, vec_x)"! TODO recheck
// that ddz_uu_z is the correct one
- c13 * (der_scal<AXIS_X>(i, j, k, mesh_info, vec_x) +
der_scal<AXIS_Y>(i, j, k, mesh_info, vec_y));
// Grad lnrho
MODEL_REAL glnx, glny, glnz;
grad(i, j, k, mesh_info, lnrho, &glnx, &glny, &glnz);
*sgrhox = Sxx * glnx + Sxy * glny + Sxz * glnz;
*sgrhoy = Syx * glnx + Syy * glny + Syz * glnz;
*sgrhoz = Szx * glnx + Szy * glny + Szz * glnz;
}
static void
nu_const(const int& i, const int& j, const int& k, const AcMeshInfo& mesh_info,
const MODEL_REAL* vec_x, const MODEL_REAL* vec_y, const MODEL_REAL* vec_z,
const MODEL_REAL* scal, MODEL_REAL* visc_x, MODEL_REAL* visc_y, MODEL_REAL* visc_z)
{
MODEL_REAL lx, ly, lz;
laplace_vec(i, j, k, mesh_info, vec_x, vec_y, vec_z, &lx, &ly, &lz);
// lx = ly = lz = .0f;
MODEL_REAL gx, gy, gz;
grad_div_vec(i, j, k, mesh_info, vec_x, vec_y, vec_z, &gx, &gy, &gz);
// gx = gy =gz = .0f;
MODEL_REAL sgrhox, sgrhoy, sgrhoz;
S_grad_lnrho(i, j, k, mesh_info, vec_x, vec_y, vec_z, scal, &sgrhox,
&sgrhoy, &sgrhoz);
// sgrhox = sgrhoy = sgrhoz = .0f;
*visc_x = mesh_info.real_params[AC_nu_visc] *
(lx + MODEL_REAL(1. / 3.) * gx + MODEL_REAL(2.) * sgrhox)
+ mesh_info.real_params[AC_zeta] * gx;
*visc_y = mesh_info.real_params[AC_nu_visc] *
(ly + MODEL_REAL(1. / 3.) * gy + MODEL_REAL(2.) * sgrhoy)
+ mesh_info.real_params[AC_zeta] * gy;
*visc_z = mesh_info.real_params[AC_nu_visc] *
(lz + MODEL_REAL(1. / 3.) * gz + MODEL_REAL(2.) * sgrhoz)
+ mesh_info.real_params[AC_zeta] * gz;
}

View File

@@ -0,0 +1,203 @@
/*
Copyright (C) 2014-2018, Johannes Pekkilae, Miikka Vaeisalae.
This file is part of Astaroth.
Astaroth is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Astaroth is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* @file
* \brief Brief info.
*
* Detailed info.
*
*/
#include "model_reduce.h"
#include <math.h>
#include "core/errchk.h"
// Function pointer definitions
typedef ModelScalar (*ReduceFunc)(const ModelScalar&, const ModelScalar&);
typedef ModelScalar (*ReduceInitialScalFunc)(const ModelScalar&);
typedef ModelScalar (*ReduceInitialVecFunc)(const ModelScalar&, const ModelScalar&,
const ModelScalar&);
// clang-format off
/* Comparison funcs */
static inline ModelScalar
max(const ModelScalar& a, const ModelScalar& b) { return a > b ? a : b; }
static inline ModelScalar
min(const ModelScalar& a, const ModelScalar& b) { return a < b ? a : b; }
static inline ModelScalar
sum(const ModelScalar& a, const ModelScalar& b) { return a + b; }
/* Function used to determine the values used during reduction */
static inline ModelScalar
length(const ModelScalar& a) { return (ModelScalar)(a); }
static inline ModelScalar
length(const ModelScalar& a, const ModelScalar& b, const ModelScalar& c) { return sqrtl(a*a + b*b + c*c); }
static inline ModelScalar
squared(const ModelScalar& a) { return (ModelScalar)(a*a); }
static inline ModelScalar
squared(const ModelScalar& a, const ModelScalar& b, const ModelScalar& c) { return squared(a) + squared(b) + squared(c); }
static inline ModelScalar
exp_squared(const ModelScalar& a) { return expl(a)*expl(a); }
static inline ModelScalar
exp_squared(const ModelScalar& a, const ModelScalar& b, const ModelScalar& c) { return exp_squared(a) + exp_squared(b) + exp_squared(c); }
// clang-format on
ModelScalar
model_reduce_scal(const ModelMesh& mesh, const ReductionType& rtype,
const VertexBufferHandle& a)
{
ReduceInitialScalFunc reduce_initial;
ReduceFunc reduce;
bool solve_mean = false;
switch (rtype) {
case RTYPE_MAX:
reduce_initial = length;
reduce = max;
break;
case RTYPE_MIN:
reduce_initial = length;
reduce = min;
break;
case RTYPE_RMS:
reduce_initial = squared;
reduce = sum;
solve_mean = true;
break;
case RTYPE_RMS_EXP:
reduce_initial = exp_squared;
reduce = sum;
solve_mean = true;
break;
default:
ERROR("Unrecognized RTYPE");
}
const int initial_idx = AC_VTXBUF_IDX(
mesh.info.int_params[AC_nx_min], mesh.info.int_params[AC_ny_min],
mesh.info.int_params[AC_nz_min], mesh.info);
ModelScalar res;
if (rtype == RTYPE_MAX || rtype == RTYPE_MIN)
res = reduce_initial(mesh.vertex_buffer[a][initial_idx]);
else
res = .0f;
for (int k = mesh.info.int_params[AC_nz_min];
k < mesh.info.int_params[AC_nz_max]; ++k) {
for (int j = mesh.info.int_params[AC_ny_min];
j < mesh.info.int_params[AC_ny_max]; ++j) {
for (int i = mesh.info.int_params[AC_nx_min];
i < mesh.info.int_params[AC_nx_max]; ++i) {
const int idx = AC_VTXBUF_IDX(i, j, k, mesh.info);
const ModelScalar curr_val = reduce_initial(
mesh.vertex_buffer[a][idx]);
res = reduce(res, curr_val);
}
}
}
if (solve_mean) {
const ModelScalar inv_n = 1.0l / mesh.info.int_params[AC_nxyz];
return sqrtl(inv_n * res);
}
else {
return res;
}
}
ModelScalar
model_reduce_vec(const ModelMesh& mesh, const ReductionType& rtype,
const VertexBufferHandle& a, const VertexBufferHandle& b,
const VertexBufferHandle& c)
{
// ModelScalar (*reduce_initial)(ModelScalar, ModelScalar, ModelScalar);
ReduceInitialVecFunc reduce_initial;
ReduceFunc reduce;
bool solve_mean = false;
switch (rtype) {
case RTYPE_MAX:
reduce_initial = length;
reduce = max;
break;
case RTYPE_MIN:
reduce_initial = length;
reduce = min;
break;
case RTYPE_RMS:
reduce_initial = squared;
reduce = sum;
solve_mean = true;
break;
case RTYPE_RMS_EXP:
reduce_initial = exp_squared;
reduce = sum;
solve_mean = true;
break;
default:
ERROR("Unrecognized RTYPE");
}
const int initial_idx = AC_VTXBUF_IDX(
mesh.info.int_params[AC_nx_min], mesh.info.int_params[AC_ny_min],
mesh.info.int_params[AC_nz_min], mesh.info);
ModelScalar res;
if (rtype == RTYPE_MAX || rtype == RTYPE_MIN)
res = reduce_initial(mesh.vertex_buffer[a][initial_idx],
mesh.vertex_buffer[b][initial_idx],
mesh.vertex_buffer[c][initial_idx]);
else
res = 0;
for (int k = mesh.info.int_params[AC_nz_min];
k < mesh.info.int_params[AC_nz_max]; k++) {
for (int j = mesh.info.int_params[AC_ny_min];
j < mesh.info.int_params[AC_ny_max]; j++) {
for (int i = mesh.info.int_params[AC_nx_min];
i < mesh.info.int_params[AC_nx_max]; i++) {
const int idx = AC_VTXBUF_IDX(i, j, k, mesh.info);
const ModelScalar curr_val = reduce_initial(
mesh.vertex_buffer[a][idx], mesh.vertex_buffer[b][idx],
mesh.vertex_buffer[c][idx]);
res = reduce(res, curr_val);
}
}
}
if (solve_mean) {
const ModelScalar inv_n = 1.0l / mesh.info.int_params[AC_nxyz];
return sqrtl(inv_n * res);
}
else {
return res;
}
}

View File

@@ -0,0 +1,37 @@
/*
Copyright (C) 2014-2018, Johannes Pekkilae, Miikka Vaeisalae.
This file is part of Astaroth.
Astaroth is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Astaroth is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* @file
* \brief Brief info.
*
* Detailed info.
*
*/
#pragma once
#include "astaroth.h"
#include "modelmesh.h"
ModelScalar model_reduce_scal(const ModelMesh& mesh, const ReductionType& rtype,
const VertexBufferHandle& a);
ModelScalar model_reduce_vec(const ModelMesh& mesh, const ReductionType& rtype,
const VertexBufferHandle& a,
const VertexBufferHandle& b,
const VertexBufferHandle& c);

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,33 @@
/*
Copyright (C) 2014-2018, Johannes Pekkilae, Miikka Vaeisalae.
This file is part of Astaroth.
Astaroth is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Astaroth is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* @file
* \brief Brief info.
*
* Detailed info.
*
*/
#pragma once
#include "astaroth.h"
#include "modelmesh.h"
void model_rk3(const ModelScalar dt, ModelMesh* mesh);
void model_rk3_step(const int step_number, const ModelScalar dt, ModelMesh* mesh);

View File

@@ -0,0 +1,36 @@
/*
Copyright (C) 2014-2018, Johannes Pekkilae, Miikka Vaeisalae.
This file is part of Astaroth.
Astaroth is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Astaroth is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* @file
* \brief Brief info.
*
* Detailed info.
*
*/
#pragma once
#include "astaroth.h"
typedef long double ModelScalar;
typedef struct {
ModelScalar* vertex_buffer[NUM_VTXBUF_HANDLES];
AcMeshInfo info;
} ModelMesh;

447
src/standalone/renderer.cc Normal file
View File

@@ -0,0 +1,447 @@
/*
Copyright (C) 2014-2018, Johannes Pekkilae, Miikka Vaeisalae.
This file is part of Astaroth.
Astaroth is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Astaroth is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* @file
* \brief Brief info.
*
* Detailed info.
*
*/
#include "run.h"
#include <SDL.h> // Note: using local version in src/3rdparty dir
#include <math.h> // ceil
#include <string.h> // memcpy
#include "config_loader.h"
#include "core/errchk.h"
#include "core/math_utils.h"
#include "model/host_memory.h"
#include "model/host_timestep.h"
#include "model/model_reduce.h"
#include "model/model_rk3.h"
#include "timer_hires.h"
// Window
SDL_Renderer* renderer = NULL;
static SDL_Window* window = NULL;
static int window_width = 800;
static int window_height = 600;
static const int window_bpp = 32; // Bits per pixel
// Surfaces
SDL_Surface* surfaces[NUM_VTXBUF_HANDLES];
static int datasurface_width = -1;
static int datasurface_height = -1;
static int k_slice = 0;
static int k_slice_max = 0;
// Colors
static SDL_Color color_bg = (SDL_Color){30, 30, 35, 255};
static const int num_tiles = NUM_VTXBUF_HANDLES + 1;
static const int tiles_per_row = 3;
/*
* =============================================================================
* Camera
* =============================================================================
*/
/*
typedef struct {
float x, y;
} float2;
*/
typedef struct {
float x, y, w, h;
} vec4;
typedef struct {
float2 pos;
float scale;
} Camera;
static Camera camera = (Camera){(float2){.0f, .0f}, 1.f};
static inline vec4
project_ortho(const float2& pos, const float2& bbox, const float2& wdims)
{
const vec4 rect = (vec4){
camera.scale * (pos.x - camera.pos.x) + 0.5f * wdims.x,
camera.scale * (pos.y - camera.pos.y) + 0.5f * wdims.y,
camera.scale * bbox.x, camera.scale * bbox.y};
return rect;
}
/*
* =============================================================================
* Renderer
* =============================================================================
*/
static int
renderer_init(const int& mx, const int& my)
{
// Init video
SDL_InitSubSystem(SDL_INIT_VIDEO | SDL_INIT_EVENTS);
// Setup window
window = SDL_CreateWindow("Astaroth", SDL_WINDOWPOS_UNDEFINED,
SDL_WINDOWPOS_UNDEFINED, window_width,
window_height, SDL_WINDOW_SHOWN);
// Setup SDL renderer
renderer = SDL_CreateRenderer(window, -1, SDL_RENDERER_ACCELERATED);
//SDL_SetWindowFullscreen(window, SDL_WINDOW_FULLSCREEN_DESKTOP);
SDL_GetWindowSize(window, &window_width, &window_height);
SDL_SetHint(SDL_HINT_RENDER_SCALE_QUALITY, "1"); // Linear filtering
datasurface_width = mx;
datasurface_height = my;
// vec drawing uses the surface of the first component, no memory issues here
for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i)
surfaces[i] = SDL_CreateRGBSurfaceWithFormat(
0, datasurface_width, datasurface_height, window_bpp,
SDL_PIXELFORMAT_RGBA8888);
camera.pos = (float2){.5f * tiles_per_row * datasurface_width - .5f * datasurface_width,
-.5f * (num_tiles / tiles_per_row) * datasurface_height + .5f * datasurface_height};
camera.scale = min(window_width / float(datasurface_width * tiles_per_row),
window_height / float(datasurface_height * (num_tiles/tiles_per_row)));
SDL_RendererInfo renderer_info;
SDL_GetRendererInfo(renderer, &renderer_info);
printf("SDL renderer max texture dims: (%d, %d)\n", renderer_info.max_texture_width, renderer_info.max_texture_height);
return 0;
}
static int
set_pixel(const int& i, const int& j, const uint32_t& color,
SDL_Surface* surface)
{
uint32_t* pixels = (uint32_t*)surface->pixels;
pixels[i + j * surface->w] = color;
return 0;
}
static int
draw_vertex_buffer(const AcMesh& mesh, const VertexBufferHandle& vertex_buffer,
const int& tile)
{
const float xoffset = (tile % tiles_per_row) * datasurface_width;
const float yoffset = - (tile / tiles_per_row) * datasurface_height;
/*
const float max = float(model_reduce_scal(mesh, RTYPE_MAX, vertex_buffer));
const float min = float(model_reduce_scal(mesh, RTYPE_MIN, vertex_buffer));
*/
const float max = 1.f;//float(acReduceScal(RTYPE_MAX, vertex_buffer));
const float min = 0.f;//float(acReduceScal(RTYPE_MIN, vertex_buffer));
const float range = fabsf(max - min);
const float mid = max - .5f * range;
const int k = k_slice; //mesh.info.int_params[AC_mz] / 2;
for (int j = 0; j < mesh.info.int_params[AC_my]; ++j) {
for (int i = 0; i < mesh.info.int_params[AC_mx]; ++i) {
ERRCHK(i < datasurface_width && j < datasurface_height);
const int idx = AC_VTXBUF_IDX(i, j, k, mesh.info);
const uint8_t shade = (uint8_t)(
255.f *
(fabsf(float(mesh.vertex_buffer[vertex_buffer][idx]) - mid)) /
range);
uint8_t color[4] = {0, 0, 0, 255};
color[tile % 3] = shade;
const uint32_t mapped_color = SDL_MapRGBA(
surfaces[vertex_buffer]->format, color[0], color[1], color[2],
color[3]);
set_pixel(i, j, mapped_color, surfaces[vertex_buffer]);
}
}
const float2 pos = (float2){xoffset, yoffset};
const float2 bbox = (float2){.5f * datasurface_width,
.5f * datasurface_height};
const float2 wsize = (float2){float(window_width), float(window_height)};
const vec4 rectf = project_ortho(pos, bbox, wsize);
SDL_Rect rect = (SDL_Rect){
int(rectf.x - rectf.w), int(wsize.y - rectf.y - rectf.h),
int(ceil(2.f * rectf.w)), int(ceil(2.f * rectf.h))};
SDL_Texture* tex = SDL_CreateTextureFromSurface(renderer,
surfaces[vertex_buffer]);
SDL_RenderCopy(renderer, tex, NULL, &rect);
SDL_DestroyTexture(tex);
return 0;
}
static int
draw_vertex_buffer_vec(const AcMesh& mesh,
const VertexBufferHandle& vertex_buffer_a,
const VertexBufferHandle& vertex_buffer_b,
const VertexBufferHandle& vertex_buffer_c,
const int& tile)
{
const float xoffset = (tile % tiles_per_row) * datasurface_width;
const float yoffset = - (tile / tiles_per_row) * datasurface_height;
/*
const float maxx = float(
max(model_reduce_scal(mesh, RTYPE_MAX, vertex_buffer_a),
max(model_reduce_scal(mesh, RTYPE_MAX, vertex_buffer_b),
model_reduce_scal(mesh, RTYPE_MAX, vertex_buffer_c))));
const float minn = float(
min(model_reduce_scal(mesh, RTYPE_MIN, vertex_buffer_a),
min(model_reduce_scal(mesh, RTYPE_MIN, vertex_buffer_b),
model_reduce_scal(mesh, RTYPE_MIN, vertex_buffer_c))));
*/
const float maxx = float(
max(acReduceScal(RTYPE_MAX, vertex_buffer_a),
max(acReduceScal(RTYPE_MAX, vertex_buffer_b),
acReduceScal(RTYPE_MAX, vertex_buffer_c))));
const float minn = float(
min(acReduceScal(RTYPE_MIN, vertex_buffer_a),
min(acReduceScal(RTYPE_MIN, vertex_buffer_b),
acReduceScal(RTYPE_MIN, vertex_buffer_c))));
const float range = fabsf(maxx - minn);
const float mid = maxx - .5f * range;
const int k = k_slice; //mesh.info.int_params[AC_mz] / 2;
for (int j = 0; j < mesh.info.int_params[AC_my]; ++j) {
for (int i = 0; i < mesh.info.int_params[AC_mx]; ++i) {
ERRCHK(i < datasurface_width && j < datasurface_height);
const int idx = AC_VTXBUF_IDX(i, j, k, mesh.info);
const uint8_t r = (uint8_t)(
255.f *
(fabsf(float(mesh.vertex_buffer[vertex_buffer_a][idx]) - mid)) /
range);
const uint8_t g = (uint8_t)(
255.f *
(fabsf(float(mesh.vertex_buffer[vertex_buffer_b][idx]) - mid)) /
range);
const uint8_t b = (uint8_t)(
255.f *
(fabsf(float(mesh.vertex_buffer[vertex_buffer_c][idx]) - mid)) /
range);
const uint32_t mapped_color = SDL_MapRGBA(
surfaces[vertex_buffer_a]->format, r, g, b, 255);
set_pixel(i, j, mapped_color, surfaces[vertex_buffer_a]);
}
}
const float2 pos = (float2){xoffset, yoffset};
const float2 bbox = (float2){.5f * datasurface_width,
.5f * datasurface_height};
const float2 wsize = (float2){float(window_width), float(window_height)};
const vec4 rectf = project_ortho(pos, bbox, wsize);
SDL_Rect rect = (SDL_Rect){
int(rectf.x - rectf.w), int(wsize.y - rectf.y - rectf.h),
int(ceil(2.f * rectf.w)), int(ceil(2.f * rectf.h))};
SDL_Texture* tex = SDL_CreateTextureFromSurface(renderer,
surfaces[vertex_buffer_a]);
SDL_RenderCopy(renderer, tex, NULL, &rect);
SDL_DestroyTexture(tex);
return 0;
}
static int
renderer_draw(const AcMesh& mesh)
{
for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i)
draw_vertex_buffer(mesh, VertexBufferHandle(i), i);
draw_vertex_buffer_vec(mesh, VTXBUF_UUX, VTXBUF_UUY, VTXBUF_UUZ,
NUM_VTXBUF_HANDLES);
// Drawing done, present
SDL_RenderPresent(renderer);
SDL_SetRenderDrawColor(renderer, color_bg.r, color_bg.g, color_bg.b,
color_bg.a);
SDL_RenderClear(renderer);
for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) {
const VertexBufferHandle vertex_buffer = VertexBufferHandle(i);
/*
printf("\t%s umax %e, min %e\n", vtxbuf_names[vertex_buffer],
(double)model_reduce_scal(mesh, RTYPE_MAX, vertex_buffer),
(double)model_reduce_scal(mesh, RTYPE_MIN, vertex_buffer));
*/
printf("\t%s umax %e, min %e\n", vtxbuf_names[vertex_buffer],
(double)acReduceScal(RTYPE_MAX, vertex_buffer),
(double)acReduceScal(RTYPE_MIN, vertex_buffer));
}
printf("\n");
return 0;
}
static int
renderer_quit(void)
{
for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i)
SDL_FreeSurface(surfaces[i]);
SDL_DestroyRenderer(renderer);
SDL_DestroyWindow(window);
renderer = NULL;
window = NULL;
SDL_Quit();
return 0;
}
static int init_type = INIT_TYPE_GAUSSIAN_RADIAL_EXPL;
static bool
running(AcMesh* mesh)
{
SDL_Event e;
while (SDL_PollEvent(&e)) {
if (e.type == SDL_QUIT) {
return false;
}
else if (e.type == SDL_KEYDOWN) {
if (e.key.keysym.sym == SDLK_ESCAPE)
return false;
if (e.key.keysym.sym == SDLK_SPACE) {
init_type = (init_type + 1) % NUM_INIT_TYPES;
acmesh_init_to(InitType(init_type), mesh);
acLoad(*mesh);
}
if (e.key.keysym.sym == SDLK_i) {
k_slice = (k_slice + 1) % k_slice_max;
printf("k_slice %d\n", k_slice);
}
if (e.key.keysym.sym == SDLK_k) {
k_slice = (k_slice - 1 + k_slice_max) % k_slice_max;
printf("k_slice %d\n", k_slice);
}
}
}
return true;
}
static void
check_input(const float& dt)
{
/* Camera movement */
const float camera_translate_rate = 1000.f / camera.scale;
const float camera_scale_rate = 1.0001f;
const uint8_t* keystates = (uint8_t*)SDL_GetKeyboardState(NULL);
if (keystates[SDL_SCANCODE_UP])
camera.pos.y += camera_translate_rate * dt;
if (keystates[SDL_SCANCODE_DOWN])
camera.pos.y -= camera_translate_rate * dt;
if (keystates[SDL_SCANCODE_LEFT])
camera.pos.x -= camera_translate_rate * dt;
if (keystates[SDL_SCANCODE_RIGHT])
camera.pos.x += camera_translate_rate * dt;
if (keystates[SDL_SCANCODE_PAGEUP])
camera.scale += camera.scale * camera_scale_rate * dt;
if (keystates[SDL_SCANCODE_PAGEDOWN])
camera.scale -= camera.scale * camera_scale_rate * dt;
if (keystates[SDL_SCANCODE_COMMA])
set_timescale(AcReal(.1));
if (keystates[SDL_SCANCODE_PERIOD])
set_timescale(AcReal(1.));
}
int
run_renderer(void)
{
/* Parse configs */
AcMeshInfo mesh_info;
load_config(&mesh_info);
renderer_init(mesh_info.int_params[AC_mx], mesh_info.int_params[AC_my]);
AcMesh* mesh = acmesh_create(mesh_info);
acmesh_init_to(InitType(init_type), mesh);
acInit(mesh_info);
acLoad(*mesh);
Timer frame_timer;
timer_reset(&frame_timer);
Timer wallclock;
timer_reset(&wallclock);
Timer io_timer;
timer_reset(&io_timer);
const float desired_frame_time = 1.f / 60.f;
int steps = 0;
k_slice = mesh->info.int_params[AC_mz] / 2;
k_slice_max = mesh->info.int_params[AC_mz];
while (running(mesh)) {
/* Input */
check_input(timer_diff_nsec(io_timer) / 1e9f);
timer_reset(&io_timer);
/* Step the simulation */
#if 1
const AcReal umax = acReduceVec(RTYPE_MAX, VTXBUF_UUX, VTXBUF_UUY,
VTXBUF_UUZ);
const AcReal dt = host_timestep(umax, mesh_info);
acIntegrate(dt);
#else
ModelMesh* model_mesh = modelmesh_create(mesh->info);
const AcReal umax = AcReal(model_reduce_vec(*model_mesh, RTYPE_MAX, VTXBUF_UUX, VTXBUF_UUY, VTXBUF_UUZ));
const AcReal dt = host_timestep(umax, mesh_info);
acmesh_to_modelmesh(*mesh, model_mesh);
model_rk3(dt, model_mesh);
modelmesh_to_acmesh(*model_mesh, mesh);
modelmesh_destroy(model_mesh);
acLoad(*mesh); // Just a quick hack s.t. we do not have to add an
// additional if to the render part
#endif
++steps;
/* Render */
const float timer_diff_sec = timer_diff_nsec(frame_timer) / 1e9f;
if (timer_diff_sec >= desired_frame_time) {
//acStore(mesh);
const int num_vertices = mesh->info.int_params[AC_mxy];
const int3 dst = (int3){0, 0, k_slice};
acStoreWithOffset(dst, num_vertices, mesh);
acSynchronize();
renderer_draw(*mesh); // Bottleneck is here
printf("Step #%d, dt: %f\n", steps, double(dt));
timer_reset(&frame_timer);
}
}
printf("Wallclock time %f s\n", double(timer_diff_nsec(wallclock) / 1e9f));
acStore(mesh);
acQuit();
acmesh_destroy(mesh);
renderer_quit();
return 0;
}

35
src/standalone/run.h Normal file
View File

@@ -0,0 +1,35 @@
/*
Copyright (C) 2014-2018, Johannes Pekkilae, Miikka Vaeisalae.
This file is part of Astaroth.
Astaroth is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Astaroth is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* @file
* \brief Brief info.
*
* Detailed info.
*
*/
#pragma once
int run_autotest(void);
int run_simulation(void);
int run_benchmark(void);
int run_renderer(void);

View File

@@ -0,0 +1,339 @@
/*
Copyright (C) 2014-2018, Johannes Pekkilae, Miikka Vaeisalae.
This file is part of Astaroth.
Astaroth is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Astaroth is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* @file
* \brief Brief info.
*
* Detailed info.
*
*/
#include "run.h"
#include "config_loader.h"
#include "core/errchk.h"
#include "core/math_utils.h"
#include "model/host_memory.h"
#include "model/host_timestep.h"
#include "model/model_reduce.h"
#include "model/model_rk3.h"
#include "timer_hires.h"
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
/*
// DEPRECATED: TODO remove
static inline void
print_diagnostics(const AcMesh& mesh, const int& step, const AcReal& dt)
{
const int max_name_width = 16;
printf("Step %d, dt %e s\n", step, double(dt));
printf(" %*s: min %.3e,\trms %.3e,\tmax %.3e\n", max_name_width, "uu total",
double(model_reduce_vec(mesh, RTYPE_MAX, VTXBUF_UUX, VTXBUF_UUY, VTXBUF_UUZ)),
double(model_reduce_vec(mesh, RTYPE_MIN, VTXBUF_UUX, VTXBUF_UUY, VTXBUF_UUZ)),
double(model_reduce_vec(mesh, RTYPE_RMS, VTXBUF_UUX, VTXBUF_UUY, VTXBUF_UUZ)));
for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) {
printf(" %*s: min %.3e,\trms %.3e,\tmax %.3e\n", max_name_width, vtxbuf_names[i],
double(model_reduce_scal(mesh, RTYPE_MAX, VertexBufferHandle(i))),
double(model_reduce_scal(mesh, RTYPE_MIN, VertexBufferHandle(i))),
double(model_reduce_scal(mesh, RTYPE_RMS, VertexBufferHandle(i))));
}
}
*/
//Write all setting info into a separate ascii file. This is done to guarantee
//that we have the data specifi information in the thing, even though in
//principle these things are in the astaroth.conf.
static inline
void write_mesh_info(const AcMeshInfo* config)
{
FILE* infotxt;
infotxt = fopen("purge.sh","w");
fprintf(infotxt, "#!/bin/bash\n");
fprintf(infotxt, "rm *.list *.mesh *.ts purge.sh\n");
fclose(infotxt);
infotxt = fopen("mesh_info.list","w");
//Total grid dimensions
fprintf(infotxt, "int AC_mx %i \n", config->int_params[AC_mx]);
fprintf(infotxt, "int AC_my %i \n", config->int_params[AC_my]);
fprintf(infotxt, "int AC_mz %i \n", config->int_params[AC_mz]);
// Bounds for the computational domain, i.e. nx_min <= i < nx_max
fprintf(infotxt, "int AC_nx_min %i \n", config->int_params[AC_nx_min]);
fprintf(infotxt, "int AC_nx_max %i \n", config->int_params[AC_nx_max]);
fprintf(infotxt, "int AC_ny_min %i \n", config->int_params[AC_ny_min]);
fprintf(infotxt, "int AC_ny_max %i \n", config->int_params[AC_ny_max]);
fprintf(infotxt, "int AC_nz_min %i \n", config->int_params[AC_nz_min]);
fprintf(infotxt, "int AC_nz_max %i \n", config->int_params[AC_nz_max]);
// Spacing
fprintf(infotxt, "real AC_inv_dsx %e \n", (double)config->real_params[AC_inv_dsx]);
fprintf(infotxt, "real AC_inv_dsy %e \n", (double)config->real_params[AC_inv_dsy]);
fprintf(infotxt, "real AC_inv_dsz %e \n", (double)config->real_params[AC_inv_dsz]);
fprintf(infotxt, "real AC_dsmin %e \n", (double)config->real_params[AC_dsmin ]);
/* Additional helper params */
// Int helpers
fprintf(infotxt, "int AC_mxy %i \n", config->int_params[AC_mxy ]);
fprintf(infotxt, "int AC_nxy %i \n", config->int_params[AC_nxy ]);
fprintf(infotxt, "int AC_nxyz %i \n", config->int_params[AC_nxyz]);
// Real helpers
fprintf(infotxt, "real AC_cs2_sound %e \n", (double)config->real_params[AC_cs2_sound]);
fprintf(infotxt, "real AC_cv_sound %e \n", (double)config->real_params[AC_cv_sound ]);
fclose(infotxt);
}
//This funtion writes a run state into a set of C binaries. For the sake of
//accuracy, all floating point numbers are to be saved in long double precision
//regardless of the choise of accuracy during runtime.
static inline void
save_mesh(const AcMesh &save_mesh, const int step,
const AcReal t_step)
{
FILE* save_ptr;
for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w) {
const size_t n = AC_VTXBUF_SIZE(save_mesh.info);
const char* buffername = vtxbuf_names[w];
char cstep[10];
char bin_filename[80] = "\0";
//sprintf(bin_filename, "");
sprintf(cstep, "%d", step);
strcat(bin_filename, buffername);
strcat(bin_filename, "_");
strcat(bin_filename, cstep);
strcat(bin_filename, ".mesh");
printf("Savefile %s \n", bin_filename);
save_ptr = fopen(bin_filename,"wb");
//Start file with time stamp
long double write_long_buf = (long double) t_step;
fwrite(&write_long_buf, sizeof(long double), 1, save_ptr);
//Grid data
for (size_t i = 0; i < n; ++i) {
const AcReal point_val = save_mesh.vertex_buffer[VertexBufferHandle(w)][i];
long double write_long_buf = (long double) point_val;
fwrite(&write_long_buf, sizeof(long double), 1, save_ptr);
}
fclose(save_ptr);
}
}
// This function prints out the diagnostic values to std.out and also saves and
// appends an ascii file to contain all the result.
static inline void
print_diagnostics(const int step, const AcReal dt, const AcReal t_step, FILE *diag_file)
{
AcReal buf_rms, buf_max, buf_min;
const int max_name_width = 16;
// Calculate rms, min and max from the velocity vector field
buf_max = acReduceVec(RTYPE_MAX, VTXBUF_UUX, VTXBUF_UUY, VTXBUF_UUZ);
buf_min = acReduceVec(RTYPE_MIN, VTXBUF_UUX, VTXBUF_UUY, VTXBUF_UUZ);
buf_rms = acReduceVec(RTYPE_RMS, VTXBUF_UUX, VTXBUF_UUY, VTXBUF_UUZ);
// MV: The ordering in the earlier version was wrong in terms of variable
// MV: name and its diagnostics.
printf("Step %d, t_step %.3e, dt %e s\n", step, double(t_step), double(dt));
printf(" %*s: min %.3e,\trms %.3e,\tmax %.3e\n", max_name_width, "uu total",
double(buf_min), double(buf_rms), double(buf_max));
fprintf(diag_file, "%d %e %e %e %e %e ", step, double(t_step), double(dt),
double(buf_min), double(buf_rms), double(buf_max));
// Calculate rms, min and max from the variables as scalars
for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) {
buf_max = acReduceScal(RTYPE_MAX, VertexBufferHandle(i));
buf_min = acReduceScal(RTYPE_MIN, VertexBufferHandle(i));
buf_rms = acReduceScal(RTYPE_RMS, VertexBufferHandle(i));
printf(" %*s: min %.3e,\trms %.3e,\tmax %.3e\n", max_name_width, vtxbuf_names[i],
double(buf_min), double(buf_rms), double(buf_max));
fprintf(diag_file, "%e %e %e ", double(buf_min), double(buf_rms), double(buf_max));
}
fprintf(diag_file, "\n");
}
/*
MV NOTE: At the moment I have no clear idea how to calculate magnetic
diagnostic variables from grid. Vector potential measures have a limited
value. TODO: Smart way to get brms, bmin and bmax.
*/
int
run_simulation(void)
{
/* Parse configs */
AcMeshInfo mesh_info;
load_config(&mesh_info);
AcMesh* mesh = acmesh_create(mesh_info);
acmesh_init_to(INIT_TYPE_GAUSSIAN_RADIAL_EXPL, mesh);
acInit(mesh_info);
acLoad(*mesh);
FILE *diag_file;
diag_file = fopen("timeseries.ts", "a");
// TODO Get time from earlier state.
AcReal t_step = 0.0;
// Generate the title row.
fprintf(diag_file, "step t_step dt uu_total_min uu_total_rms uu_total_max ");
for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) {
fprintf(diag_file, "%s_min %s_rms %s_max ", vtxbuf_names[i], vtxbuf_names[i], vtxbuf_names[i]);
}
fprintf(diag_file, "\n");
write_mesh_info(&mesh_info);
print_diagnostics(0, AcReal(.0), t_step, diag_file);
acSynchronize();
acStore(mesh);
save_mesh(*mesh, 0, t_step);
const int max_steps = mesh_info.int_params[AC_max_steps];
const int save_steps = mesh_info.int_params[AC_save_steps];
const int bin_save_steps = mesh_info.int_params[AC_bin_steps]; //TODO Get from mesh_info
AcReal bin_save_t = mesh_info.real_params[AC_bin_save_t];
AcReal bin_crit_t = bin_save_t;
/* Step the simulation */
for (int i = 1; i < max_steps; ++i) {
const AcReal umax = acReduceVec(RTYPE_MAX, VTXBUF_UUX, VTXBUF_UUY,
VTXBUF_UUZ);
const AcReal dt = host_timestep(umax, mesh_info);
acIntegrate(dt);
t_step += dt;
/* Save the simulation state and print diagnostics */
if ((i % save_steps) == 0) {
/*
print_diagnostics() writes out both std.out printout from the
results and saves the diagnostics into a table for ascii file
timeseries.ts.
*/
print_diagnostics(i, dt, t_step, diag_file);
/*
We would also might want an XY-average calculating funtion,
which can be very useful when observing behaviour of turbulent
simulations. (TODO)
*/
}
/* Save the simulation state and print diagnostics */
if ((i % bin_save_steps) == 0 || t_step >= bin_crit_t) {
/*
This loop saves the data into simple C binaries which can be
used for analysing the data snapshots closely.
Saving simulation state should happen in a separate stage. We do
not want to save it as often as diagnostics. The file format
should IDEALLY be HDF5 which has become a well supported, portable and
reliable data format when it comes to HPC applications.
However, implementing it will have to for more simpler approach
to function. (TODO?)
*/
/*
The updated mesh will be located on the GPU. Also all calls
to the astaroth interface (functions beginning with ac*) are
assumed to be asynchronous, so the meshes must be also synchronized
before transferring the data to the CPU. Like so:
acSynchronize();
acStore(mesh);
*/
acSynchronize();
acStore(mesh);
save_mesh(*mesh, i, t_step);
bin_crit_t += bin_save_t;
}
}
//////Save the final snapshot
////acSynchronize();
////acStore(mesh);
////save_mesh(*mesh, , t_step);
acQuit();
acmesh_destroy(mesh);
fclose(diag_file);
return 0;
}

View File

@@ -0,0 +1,64 @@
/*
Copyright (C) 2014-2018, Johannes Pekkilae, Miikka Vaeisalae.
This file is part of Astaroth.
Astaroth is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Astaroth is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
*/
/**
@file
\brief High-resolution timer.
Usage:
Timer t;
timer_reset(&t);
timer_diff_nsec(t);
If there are issues, try compiling with -std=gnu11 -lrt
*/
#pragma once
#include <stdio.h> // perror
#include <time.h>
typedef struct timespec Timer;
// Contains at least the following members:
// time_t tv_sec;
// long tv_nsec;
static inline int
timer_reset(Timer* t)
{
const int retval = clock_gettime(CLOCK_REALTIME, t);
if (retval == -1)
perror("clock_gettime failure");
return retval;
}
static inline long
timer_diff_nsec(const Timer start)
{
Timer end;
timer_reset(&end);
const long diff = (end.tv_sec - start.tv_sec) * 1000000000l +
(end.tv_nsec - start.tv_nsec);
return diff;
}
static inline void
timer_diff_print(const Timer t)
{
printf("Time elapsed: %g ms\n", timer_diff_nsec(t) / 1e6);
}