Merge branch 'master' into io_improvement_20190924

This commit is contained in:
Miikka Vaisala
2019-10-02 10:47:51 +08:00
32 changed files with 848 additions and 167 deletions

2
.gitignore vendored
View File

@@ -22,6 +22,4 @@ makefile~
*~ *~
build/ build/
src/core/kernels/stencil_assembly.cuh
src/core/kernels/stencil_process.cuh
tests/ tests/

View File

@@ -18,23 +18,21 @@ find_program(CMAKE_CXX_COMPILER NAMES $ENV{CXX} g++ PATHS ENV PATH NO_DEFAULT_PA
## Project settings ## Project settings
project(astaroth C CXX) project(astaroth C CXX)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR})
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
## Options ## Options
option(BUILD_DEBUG "Builds the program with extensive error checking" OFF) option(BUILD_DEBUG "Builds the program with extensive error checking" OFF)
option(BUILD_STANDALONE "Builds the standalone Astaroth" ON) option(BUILD_STANDALONE "Builds the standalone Astaroth" ON)
option(BUILD_UTILS "Builds the utility library" ON)
option(BUILD_RT_VISUALIZATION "Builds the module for real-time visualization using SDL2" OFF) option(BUILD_RT_VISUALIZATION "Builds the module for real-time visualization using SDL2" OFF)
option(BUILD_C_API_TEST "Builds a C program to test whether the API is conformant" OFF) option(BUILD_C_API_TEST "Builds a C program to test whether the API is conformant" OFF)
option(BUILD_MPI_TEST "Builds a C program to test whether MPI works" OFF) option(BUILD_MPI_TEST "Builds a C program to test whether MPI works" OFF)
option(DOUBLE_PRECISION "Generates double precision code" OFF) option(DOUBLE_PRECISION "Generates double precision code" OFF)
option(MULTIGPU_ENABLED "If enabled, uses all the available GPUs" ON) option(MULTIGPU_ENABLED "If enabled, uses all the available GPUs" ON)
## Compile the Astaroth Code compiler ## Compile the Astaroth Code Compiler
find_package(FLEX) add_subdirectory(acc)
find_package(BISON)
execute_process (
COMMAND ./build_acc.sh
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/acc
)
# NOTE: Manually defined DSL_MODULE_DIR must be set relative to the project root, not the actual # NOTE: Manually defined DSL_MODULE_DIR must be set relative to the project root, not the actual
# build directory! # build directory!
# NO! ../acc/mhd_solver # NO! ../acc/mhd_solver
@@ -50,12 +48,13 @@ set(DSL_HEADERS "${PROJECT_BINARY_DIR}/stencil_assembly.cuh"
"${PROJECT_BINARY_DIR}/stencil_process.cuh" "${PROJECT_BINARY_DIR}/stencil_process.cuh"
"${PROJECT_BINARY_DIR}/stencil_defines.h") "${PROJECT_BINARY_DIR}/stencil_defines.h")
add_custom_command ( add_custom_command (
COMMENT "Building AC objects ${DSL_MODULE_DIR}" COMMENT "Building ACC objects ${DSL_MODULE_DIR}"
COMMAND ${CMAKE_SOURCE_DIR}/scripts/compile_acc_module.sh ${DSL_MODULE_DIR} COMMAND ${CMAKE_SOURCE_DIR}/scripts/compile_acc_module.sh ${DSL_MODULE_DIR}
DEPENDS ${DSL_SOURCES} DEPENDS ${DSL_SOURCES}
OUTPUT ${DSL_HEADERS} OUTPUT ${DSL_HEADERS}
) )
add_custom_target(dsl_headers ALL DEPENDS ${DSL_HEADERS}) add_custom_target(dsl_headers ALL DEPENDS ${DSL_HEADERS})
add_dependencies(dsl_headers acc)
## Build types ## Build types
# Available types (case-sensitive): # Available types (case-sensitive):
@@ -103,3 +102,8 @@ endif ()
if (BUILD_MPI_TEST) if (BUILD_MPI_TEST)
add_subdirectory(src/mpitest) add_subdirectory(src/mpitest)
endif () endif ()
if (BUILD_UTILS)
add_subdirectory(src/utils)
add_subdirectory(src/exampleproject) # TODO this should not be here
endif ()

5
acc/.gitignore vendored
View File

@@ -1,5 +0,0 @@
build
testbin
# Except this file
!.gitignore

12
acc/CMakeLists.txt Normal file
View File

@@ -0,0 +1,12 @@
## CMake settings
cmake_minimum_required (VERSION 3.5.1)
find_program(CMAKE_C_COMPILER NAMES $ENV{CC} gcc PATHS ENV PATH NO_DEFAULT_PATH)
## Project settings
project(acc C)
set(CMAKE_C_STANDARD 11)
set(CMAKE_C_STANDARD_REQUIRED ON)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR})
include_directories(src)
add_subdirectory(src)

View File

@@ -1,9 +1,9 @@
#!/bin/bash #!/bin/bash
# Usage ./compile <source file> <gcc preprocessor flags, f.ex. -I some/path> # Usage ./compile <acc binary> <source file> <gcc preprocessor flags, f.ex. -I some/path>
ACC_DIR=`dirname $0` ACC_DIR=`dirname $0`
ACC_BINARY=$1
FULL_NAME=$(basename -- $1) FULL_NAME=$(basename -- $2)
FILENAME="${FULL_NAME%.*}" FILENAME="${FULL_NAME%.*}"
EXTENSION="${FULL_NAME##*.}" EXTENSION="${FULL_NAME##*.}"
@@ -25,4 +25,4 @@ else
exit exit
fi fi
${ACC_DIR}/preprocess.sh $@ | ${ACC_DIR}/build/acc ${COMPILE_FLAGS} > ${CUH_FILENAME} ${ACC_DIR}/preprocess.sh ${@:2} | ${ACC_BINARY} ${COMPILE_FLAGS} > ${CUH_FILENAME}

View File

@@ -1,5 +0,0 @@
build
testbin
# Except this file
!.gitignore

View File

@@ -176,20 +176,20 @@ momentum(int3 globalVertexIdx, in VectorField uu, in ScalarField lnrho, in Scala
const Matrix S = stress_tensor(uu); const Matrix S = stress_tensor(uu);
const Scalar cs2 = AC_cs2_sound * exp(AC_gamma * value(ss) / AC_cp_sound + const Scalar cs2 = AC_cs2_sound * exp(AC_gamma * value(ss) / AC_cp_sound +
(AC_gamma - 1) * (value(lnrho) - AC_lnrho0)); (AC_gamma - 1) * (value(lnrho) - AC_lnrho0));
const Vector j = (Scalar(1.) / AC_mu0) * const Vector j = (Scalar(1.0) / AC_mu0) *
(gradient_of_divergence(aa) - laplace_vec(aa)); // Current density (gradient_of_divergence(aa) - laplace_vec(aa)); // Current density
const Vector B = curl(aa); const Vector B = curl(aa);
// TODO: DOES INTHERMAL VERSTION INCLUDE THE MAGNETIC FIELD? // TODO: DOES INTHERMAL VERSTION INCLUDE THE MAGNETIC FIELD?
const Scalar inv_rho = Scalar(1.) / exp(value(lnrho)); const Scalar inv_rho = Scalar(1.0) / exp(value(lnrho));
// Regex replace CPU constants with get\(AC_([a-zA-Z_0-9]*)\) // Regex replace CPU constants with get\(AC_([a-zA-Z_0-9]*)\)
// \1 // \1
const Vector mom = -mul(gradients(uu), value(uu)) - const Vector mom = -mul(gradients(uu), value(uu)) -
cs2 * ((Scalar(1.) / AC_cp_sound) * gradient(ss) + gradient(lnrho)) + cs2 * ((Scalar(1.0) / AC_cp_sound) * gradient(ss) + gradient(lnrho)) +
inv_rho * cross(j, B) + inv_rho * cross(j, B) +
AC_nu_visc * AC_nu_visc *
(laplace_vec(uu) + Scalar(1. / 3.) * gradient_of_divergence(uu) + (laplace_vec(uu) + Scalar(1.0 / 3.0) * gradient_of_divergence(uu) +
Scalar(2.) * mul(S, gradient(lnrho))) + Scalar(2.0) * mul(S, gradient(lnrho))) +
AC_zeta * gradient_of_divergence(uu) AC_zeta * gradient_of_divergence(uu)
#if LSINK #if LSINK
//Gravity term //Gravity term
@@ -215,8 +215,8 @@ momentum(int3 globalVertexIdx, in VectorField uu, in ScalarField lnrho, in Scala
(gradient(tt) + value(tt) * gradient(lnrho)); (gradient(tt) + value(tt) * gradient(lnrho));
mom = -mul(gradients(uu), value(uu)) - pressure_term + mom = -mul(gradients(uu), value(uu)) - pressure_term +
AC_nu_visc * (laplace_vec(uu) + Scalar(1. / 3.) * gradient_of_divergence(uu) + AC_nu_visc * (laplace_vec(uu) + Scalar(1.0 / 3.0) * gradient_of_divergence(uu) +
Scalar(2.) * mul(S, gradient(lnrho))) + Scalar(2.0) * mul(S, gradient(lnrho))) +
AC_zeta * gradient_of_divergence(uu) AC_zeta * gradient_of_divergence(uu)
#if LSINK #if LSINK
+ sink_gravity(globalVertexIdx); + sink_gravity(globalVertexIdx);
@@ -240,8 +240,8 @@ momentum(int3 globalVertexIdx, in VectorField uu, in ScalarField lnrho, Scalar d
// Isothermal: we have constant speed of sound // Isothermal: we have constant speed of sound
mom = -mul(gradients(uu), value(uu)) - AC_cs2_sound * gradient(lnrho) + mom = -mul(gradients(uu), value(uu)) - AC_cs2_sound * gradient(lnrho) +
AC_nu_visc * (laplace_vec(uu) + Scalar(1. / 3.) * gradient_of_divergence(uu) + AC_nu_visc * (laplace_vec(uu) + Scalar(1.0 / 3.0) * gradient_of_divergence(uu) +
Scalar(2.) * mul(S, gradient(lnrho))) + Scalar(2.0) * mul(S, gradient(lnrho))) +
AC_zeta * gradient_of_divergence(uu) AC_zeta * gradient_of_divergence(uu)
#if LSINK #if LSINK
+ sink_gravity(globalVertexIdx) + sink_gravity(globalVertexIdx)
@@ -283,7 +283,7 @@ Scalar
lnT(in ScalarField ss, in ScalarField lnrho) lnT(in ScalarField ss, in ScalarField lnrho)
{ {
const Scalar lnT = AC_lnT0 + AC_gamma * value(ss) / AC_cp_sound + const Scalar lnT = AC_lnT0 + AC_gamma * value(ss) / AC_cp_sound +
(AC_gamma - Scalar(1.)) * (value(lnrho) - AC_lnrho0); (AC_gamma - Scalar(1.0)) * (value(lnrho) - AC_lnrho0);
return lnT; return lnT;
} }
@@ -291,14 +291,14 @@ lnT(in ScalarField ss, in ScalarField lnrho)
Scalar Scalar
heat_conduction(in ScalarField ss, in ScalarField lnrho) heat_conduction(in ScalarField ss, in ScalarField lnrho)
{ {
const Scalar inv_AC_cp_sound = AcReal(1.) / AC_cp_sound; const Scalar inv_AC_cp_sound = AcReal(1.0) / AC_cp_sound;
const Vector grad_ln_chi = -gradient(lnrho); const Vector grad_ln_chi = -gradient(lnrho);
const Scalar first_term = AC_gamma * inv_AC_cp_sound * laplace(ss) + const Scalar first_term = AC_gamma * inv_AC_cp_sound * laplace(ss) +
(AC_gamma - AcReal(1.)) * laplace(lnrho); (AC_gamma - AcReal(1.0)) * laplace(lnrho);
const Vector second_term = AC_gamma * inv_AC_cp_sound * gradient(ss) + const Vector second_term = AC_gamma * inv_AC_cp_sound * gradient(ss) +
(AC_gamma - AcReal(1.)) * gradient(lnrho); (AC_gamma - AcReal(1.0)) * gradient(lnrho);
const Vector third_term = AC_gamma * (inv_AC_cp_sound * gradient(ss) + gradient(lnrho)) + const Vector third_term = AC_gamma * (inv_AC_cp_sound * gradient(ss) + gradient(lnrho)) +
grad_ln_chi; grad_ln_chi;
@@ -316,11 +316,11 @@ Scalar
entropy(in ScalarField ss, in VectorField uu, in ScalarField lnrho, in VectorField aa) entropy(in ScalarField ss, in VectorField uu, in ScalarField lnrho, in VectorField aa)
{ {
const Matrix S = stress_tensor(uu); const Matrix S = stress_tensor(uu);
const Scalar inv_pT = Scalar(1.) / (exp(value(lnrho)) * exp(lnT(ss, lnrho))); const Scalar inv_pT = Scalar(1.0) / (exp(value(lnrho)) * exp(lnT(ss, lnrho)));
const Vector j = (Scalar(1.) / AC_mu0) * const Vector j = (Scalar(1.0) / AC_mu0) *
(gradient_of_divergence(aa) - laplace_vec(aa)); // Current density (gradient_of_divergence(aa) - laplace_vec(aa)); // Current density
const Scalar RHS = H_CONST - C_CONST + AC_eta * (AC_mu0)*dot(j, j) + const Scalar RHS = H_CONST - C_CONST + AC_eta * (AC_mu0)*dot(j, j) +
Scalar(2.) * exp(value(lnrho)) * AC_nu_visc * contract(S) + Scalar(2.0) * exp(value(lnrho)) * AC_nu_visc * contract(S) +
AC_zeta * exp(value(lnrho)) * divergence(uu) * divergence(uu); AC_zeta * exp(value(lnrho)) * divergence(uu) * divergence(uu);
return -dot(value(uu), gradient(ss)) + inv_pT * RHS + heat_conduction(ss, lnrho); return -dot(value(uu), gradient(ss)) + inv_pT * RHS + heat_conduction(ss, lnrho);
@@ -335,7 +335,7 @@ heat_transfer(in VectorField uu, in ScalarField lnrho, in ScalarField tt)
const Scalar heat_diffusivity_k = 0.0008; // 8e-4; const Scalar heat_diffusivity_k = 0.0008; // 8e-4;
return -dot(value(uu), gradient(tt)) + heat_diffusivity_k * laplace(tt) + return -dot(value(uu), gradient(tt)) + heat_diffusivity_k * laplace(tt) +
heat_diffusivity_k * dot(gradient(lnrho), gradient(tt)) + heat_diffusivity_k * dot(gradient(lnrho), gradient(tt)) +
AC_nu_visc * contract(S) * (Scalar(1.) / AC_cv_sound) - AC_nu_visc * contract(S) * (Scalar(1.0) / AC_cv_sound) -
(AC_gamma - 1) * value(tt) * divergence(uu); (AC_gamma - 1) * value(tt) * divergence(uu);
} }
#endif #endif
@@ -403,7 +403,7 @@ forcing(int3 globalVertexIdx, Scalar dt)
int accretion_switch = AC_switch_accretion; int accretion_switch = AC_switch_accretion;
if (accretion_switch == 0){ if (accretion_switch == 0){
Vector a = Scalar(.5) * (Vector){globalGridN.x * AC_dsx, Vector a = Scalar(0.5) * (Vector){globalGridN.x * AC_dsx,
globalGridN.y * AC_dsy, globalGridN.y * AC_dsy,
globalGridN.z * AC_dsz}; // source (origin) globalGridN.z * AC_dsz}; // source (origin)
Vector xx = (Vector){(globalVertexIdx.x - DCONST(AC_nx_min)) * AC_dsx, Vector xx = (Vector){(globalVertexIdx.x - DCONST(AC_nx_min)) * AC_dsx,

View File

@@ -1,5 +0,0 @@
build
testbin
# Except this file
!.gitignore

View File

@@ -1,4 +1,4 @@
#!/bin/bash #!/bin/bash
# Preprocesses the give file using GCC. This script is usually automatically called in # Preprocesses the give file using GCC. This script is usually automatically called in
# ./compile.sh, but may be called also individually for debugging purposes. # ./compile.sh, but may be called also individually for debugging purposes.
cat $1 | gcc ${@:2} -x c -E - | sed "s/#.*//g" cat $1 | gcc ${@:2} -x c -E - | sed "s/#.*//g" | tee ${PWD}/$(basename -- "$1").preprocessed

10
acc/src/CMakeLists.txt Normal file
View File

@@ -0,0 +1,10 @@
## Compile the Astaroth Code compiler
find_package(BISON)
find_package(FLEX 2.5.5 REQUIRED)
bison_target(acc_parser acc.y ${CMAKE_CURRENT_BINARY_DIR}/acc.tab.c)
flex_target(acc_lexer acc.l ${CMAKE_CURRENT_BINARY_DIR}/acc.yy.c)
add_flex_bison_dependency(acc_lexer acc_parser)
add_executable(acc code_generator.c ${BISON_acc_parser_OUTPUTS} ${FLEX_acc_lexer_OUTPUTS})
target_include_directories(acc PRIVATE ${CMAKE_CURRENT_BINARY_DIR})

View File

@@ -1,4 +1,5 @@
%option yylineno %option yylineno
%option noyywrap
D [0-9] D [0-9]
L [a-zA-Z_] L [a-zA-Z_]
@@ -36,8 +37,9 @@ L [a-zA-Z_]
"return" { return RETURN; } "return" { return RETURN; }
{D}+"."?{D}*[flud]? { return NUMBER; } /* Literals */ {D}+"."{D}+ { return REAL_NUMBER; } /* Literals */
"."{D}+[flud]? { return NUMBER; } {D}+"."{D}+[fd]+ { return NUMBER; }
{D}+[lu]* { return NUMBER; }
{L}({L}|{D})* { return IDENTIFIER; } {L}({L}|{D})* { return IDENTIFIER; }
\"(.)*\" { return IDENTIFIER; } /* String */ \"(.)*\" { return IDENTIFIER; } /* String */

View File

@@ -14,7 +14,7 @@ int yyget_lineno();
%} %}
%token CONSTANT IN OUT UNIFORM %token CONSTANT IN OUT UNIFORM
%token IDENTIFIER NUMBER %token IDENTIFIER NUMBER REAL_NUMBER
%token RETURN %token RETURN
%token SCALAR VECTOR MATRIX SCALARFIELD SCALARARRAY %token SCALAR VECTOR MATRIX SCALARFIELD SCALARARRAY
%token VOID INT INT3 COMPLEX %token VOID INT INT3 COMPLEX
@@ -217,7 +217,8 @@ type_specifier: VOID
identifier: IDENTIFIER { $$ = astnode_create(NODE_IDENTIFIER, NULL, NULL); astnode_set_buffer(yytext, $$); } identifier: IDENTIFIER { $$ = astnode_create(NODE_IDENTIFIER, NULL, NULL); astnode_set_buffer(yytext, $$); }
; ;
number: NUMBER { $$ = astnode_create(NODE_UNKNOWN, NULL, NULL); astnode_set_buffer(yytext, $$); } number: REAL_NUMBER { $$ = astnode_create(NODE_REAL_NUMBER, NULL, NULL); astnode_set_buffer(yytext, $$); }
| NUMBER { $$ = astnode_create(NODE_UNKNOWN, NULL, NULL); astnode_set_buffer(yytext, $$); }
; ;
return: RETURN { $$ = astnode_create(NODE_UNKNOWN, NULL, NULL); astnode_set_buffer(yytext, $$); } return: RETURN { $$ = astnode_create(NODE_UNKNOWN, NULL, NULL); astnode_set_buffer(yytext, $$); }
@@ -235,4 +236,5 @@ int
yyerror(const char* str) yyerror(const char* str)
{ {
fprintf(stderr, "%s on line %d when processing char %d: [%s]\n", str, yyget_lineno(), *yytext, yytext); fprintf(stderr, "%s on line %d when processing char %d: [%s]\n", str, yyget_lineno(), *yytext, yytext);
return EXIT_FAILURE;
} }

View File

@@ -7,14 +7,15 @@
Statements: return value Statements: return value
block block
*/ */
#include <stdlib.h>
#include <assert.h> #include <assert.h>
#include <stdlib.h>
#define BUFFER_SIZE (4096) #define BUFFER_SIZE (4096)
#define GEN_ID(X) X #define GEN_ID(X) X
#define GEN_STR(X) #X #define GEN_STR(X) #X
// clang-format off
#define FOR_NODE_TYPES(FUNC) \ #define FOR_NODE_TYPES(FUNC) \
FUNC(NODE_UNKNOWN), \ FUNC(NODE_UNKNOWN), \
FUNC(NODE_DEFINITION), \ FUNC(NODE_DEFINITION), \
@@ -27,7 +28,9 @@
FUNC(NODE_FUNCTION_DECLARATION), \ FUNC(NODE_FUNCTION_DECLARATION), \
FUNC(NODE_COMPOUND_STATEMENT), \ FUNC(NODE_COMPOUND_STATEMENT), \
FUNC(NODE_FUNCTION_PARAMETER_DECLARATION), \ FUNC(NODE_FUNCTION_PARAMETER_DECLARATION), \
FUNC(NODE_MULTIDIM_SUBSCRIPT_EXPRESSION) FUNC(NODE_MULTIDIM_SUBSCRIPT_EXPRESSION), \
FUNC(NODE_REAL_NUMBER)
// clang-format on
/* /*
// Recreating strdup is not needed when using the GNU compiler. // Recreating strdup is not needed when using the GNU compiler.
@@ -53,10 +56,7 @@ strdup(const char* in)
#endif #endif
*/ */
typedef enum { typedef enum { FOR_NODE_TYPES(GEN_ID), NUM_NODE_TYPES } NodeType;
FOR_NODE_TYPES(GEN_ID),
NUM_NODE_TYPES
} NodeType;
typedef struct astnode_s { typedef struct astnode_s {
int id; int id;
@@ -71,7 +71,6 @@ typedef struct astnode_s {
int postfix; // (which makes it much harder to read) int postfix; // (which makes it much harder to read)
} ASTNode; } ASTNode;
static inline ASTNode* static inline ASTNode*
astnode_create(const NodeType type, ASTNode* lhs, ASTNode* rhs) astnode_create(const NodeType type, ASTNode* lhs, ASTNode* rhs)
{ {
@@ -107,7 +106,6 @@ astnode_destroy(ASTNode* node)
free(node); free(node);
} }
extern ASTNode* root; extern ASTNode* root;
/* /*

View File

@@ -163,6 +163,7 @@ static void
rm_symbol(const int handle) rm_symbol(const int handle)
{ {
assert(handle >= 0 && handle < num_symbols); assert(handle >= 0 && handle < num_symbols);
assert(num_symbols > 0);
if (&symbol_table[handle] != &symbol_table[num_symbols - 1]) if (&symbol_table[handle] != &symbol_table[num_symbols - 1])
memcpy(&symbol_table[handle], &symbol_table[num_symbols - 1], sizeof(Symbol)); memcpy(&symbol_table[handle], &symbol_table[num_symbols - 1], sizeof(Symbol));
@@ -179,7 +180,7 @@ print_symbol(const int handle)
symbol_table[handle].identifier}; symbol_table[handle].identifier};
const size_t num_fields = sizeof(fields) / sizeof(fields[0]); const size_t num_fields = sizeof(fields) / sizeof(fields[0]);
for (int i = 0; i < num_fields; ++i) for (size_t i = 0; i < num_fields; ++i)
if (fields[i]) if (fields[i])
printf("%s ", fields[i]); printf("%s ", fields[i]);
} }
@@ -252,7 +253,7 @@ translate_latest_symbol(void)
} }
} }
static void static inline void
print_symbol_table(void) print_symbol_table(void)
{ {
for (int i = 0; i < num_symbols; ++i) { for (int i = 0; i < num_symbols; ++i) {
@@ -260,11 +261,11 @@ print_symbol_table(void)
const char* fields[] = {translate(symbol_table[i].type_qualifier), const char* fields[] = {translate(symbol_table[i].type_qualifier),
translate(symbol_table[i].type_specifier), translate(symbol_table[i].type_specifier),
symbol_table[i].identifier}; symbol_table[i].identifier};
const size_t num_fields = sizeof(fields) / sizeof(fields[0]);
for (int i = 0; i < num_fields; ++i) const size_t num_fields = sizeof(fields) / sizeof(fields[0]);
if (fields[i]) for (size_t j = 0; j < num_fields; ++j)
printf("%s ", fields[i]); if (fields[j])
printf("%s ", fields[j]);
if (symbol_table[i].type == SYMBOLTYPE_FUNCTION) if (symbol_table[i].type == SYMBOLTYPE_FUNCTION)
printf("(function)"); printf("(function)");
@@ -394,18 +395,31 @@ traverse(const ASTNode* node)
// Do a regular translation // Do a regular translation
if (translate(node->token)) if (translate(node->token))
printf("%s ", translate(node->token)); printf("%s ", translate(node->token));
if (node->buffer) if (node->buffer) {
if (node->type == NODE_REAL_NUMBER) {
printf("%s(%s) ", translate(SCALAR),
node->buffer); // Cast to correct precision
}
else {
printf("%s ", node->buffer); printf("%s ", node->buffer);
} }
} }
}
}
else { else {
// Do a regular translation // Do a regular translation
if (translate(node->token)) if (translate(node->token))
printf("%s ", translate(node->token)); printf("%s ", translate(node->token));
if (node->buffer) if (node->buffer) {
if (node->type == NODE_REAL_NUMBER) {
printf("%s(%s) ", translate(SCALAR), node->buffer); // Cast to correct precision
}
else {
printf("%s ", node->buffer); printf("%s ", node->buffer);
} }
} }
}
}
if (node->type == NODE_FUNCTION_DECLARATION) { if (node->type == NODE_FUNCTION_DECLARATION) {
scope_start = num_symbols; scope_start = num_symbols;

View File

@@ -0,0 +1,36 @@
#include "stencil_definition.sdh"
//JP NOTE IMPORTANT/////////////////////////////////////////////////////////////////////////////////
// These functions are defined here temporarily.
//
// Currently the built-in functions (derx, derxx etc) are defined in CUDA in integrate.cuh.
// This is bad. Instead the built-in functions should be defined in the DSL, and be "includable"
// as a standard DSL library, analogous to f.ex. stdlib.h in C.
////////////////////////////////////////////////////////////////////////////////////////////////////
Preprocessed Scalar
value(in ScalarField vertex)
{
return vertex[vertexIdx];
}
Preprocessed Vector
gradient(in ScalarField vertex)
{
return (Vector){derx(vertexIdx, vertex), dery(vertexIdx, vertex), derz(vertexIdx, vertex)};
}
Preprocessed Matrix
hessian(in ScalarField vertex)
{
Matrix hessian;
hessian.row[0] = (Vector){derxx(vertexIdx, vertex), derxy(vertexIdx, vertex),
derxz(vertexIdx, vertex)};
hessian.row[1] = (Vector){hessian.row[0].y, deryy(vertexIdx, vertex), deryz(vertexIdx, vertex)};
hessian.row[2] = (Vector){hessian.row[0].z, hessian.row[1].z, derzz(vertexIdx, vertex)};
return hessian;
}

View File

@@ -0,0 +1,18 @@
//JP NOTE IMPORTANT/////////////////////////////////////////////////////////////////////////////////
// AC_dsx etc are defined here temporarily (otherwise does not compile).
//
// These should ultimately be defined in the standard DSL libraries.
// See test_solver/stencil_assembly.sas for more info.
////////////////////////////////////////////////////////////////////////////////////////////////////
uniform Scalar AC_dsx;// TODO include these from the std lib
uniform Scalar AC_dsy;
uniform Scalar AC_dsz;
uniform Scalar AC_inv_dsx;
uniform Scalar AC_inv_dsy;
uniform Scalar AC_inv_dsz;
uniform Scalar AC_dt;
uniform ScalarField VTXBUF_A;
uniform ScalarField VTXBUF_B;
uniform ScalarField VTXBUF_C;

View File

@@ -0,0 +1,18 @@
#include "stencil_definition.sdh"
Vector
value(in VectorField uu)
{
return (Vector){value(uu.x), value(uu.y), value(uu.z)};
}
in VectorField uu(VTXBUF_A, VTXBUF_B, VTXBUF_C);
out VectorField out_uu(VTXBUF_A, VTXBUF_B, VTXBUF_C);
Kernel void
solve()
{
Scalar dt = AC_dt;
Vector rate_of_change = (Vector){1, 2, 3};
out_uu = rk3(out_uu, uu, rate_of_change, dt);
}

View File

@@ -200,13 +200,13 @@ AcResult acDeviceTransferVertexBufferWithOffset(const Device src_device, const S
Loading uniforms (device constants) Loading uniforms (device constants)
```C ```C
AcResult acDeviceLoadScalarConstant(const Device device, const Stream stream, AcResult acDeviceLoadScalarUniform(const Device device, const Stream stream,
const AcRealParam param, const AcReal value); const AcRealParam param, const AcReal value);
AcResult acDeviceLoadVectorConstant(const Device device, const Stream stream, AcResult acDeviceLoadVectorUniform(const Device device, const Stream stream,
const AcReal3Param param, const AcReal3 value); const AcReal3Param param, const AcReal3 value);
AcResult acDeviceLoadIntConstant(const Device device, const Stream stream, const AcIntParam param, AcResult acDeviceLoadIntUniform(const Device device, const Stream stream, const AcIntParam param,
const int value); const int value);
AcResult acDeviceLoadInt3Constant(const Device device, const Stream stream, const AcInt3Param param, AcResult acDeviceLoadInt3Uniform(const Device device, const Stream stream, const AcInt3Param param,
const int3 value); const int3 value);
AcResult acDeviceLoadScalarArray(const Device device, const Stream stream, AcResult acDeviceLoadScalarArray(const Device device, const Stream stream,
const ScalarArrayHandle handle, const AcReal* data, const ScalarArrayHandle handle, const AcReal* data,
@@ -462,7 +462,13 @@ function `Kernel solve()` can be called with `acDeviceKernel_solve()` via the AP
# Astaroth Domain-Specific Language # Astaroth Domain-Specific Language
We designed the Astaroth Domain-specific Language (DSL) for expressing stencil computations in a high-level language that can be translated into efficient GPU kernels. The benefits of creating a DSL are two-fold. First, scientists using the language can focus on developing solvers and mathematical models using an easy-to-use language, while still achieving performance close to handwritten code. Second, procedures written in the DSL are decoupled from implementation, which allows us to extend the DSL compiler, say, to generate optimized code for several hardware generations without the users having to modify existing DSL sources. We designed the Astaroth Domain-specific Language (DSL) for expressing stencil computations in a
high-level language that can be translated into efficient GPU kernels. The benefits of creating a
DSL are two-fold. First, scientists using the language can focus on developing solvers and
mathematical models using an easy-to-use language, while still achieving performance close to
handwritten code. Second, procedures written in the DSL are decoupled from implementation, which
allows us to extend the DSL compiler, say, to generate optimized code for several hardware
generations without the users having to modify existing DSL sources.
## Overview ## Overview
@@ -506,24 +512,217 @@ In addition to basic datatypes in C/C++/CUDA, such as int and int3, we provide t
| ScalarField | An abstraction of a three-dimensional scalar field stored in device memory. Is implemented as a handle to a one-dimensional Scalar array consisting of input and output segments. The data is stored linearly in order i + j * mx + k * mx * my, given some vertex index (i, j, k) and mesh constisting of (mx, my, mz) vertices. | float[2][] or double[2][] | | ScalarField | An abstraction of a three-dimensional scalar field stored in device memory. Is implemented as a handle to a one-dimensional Scalar array consisting of input and output segments. The data is stored linearly in order i + j * mx + k * mx * my, given some vertex index (i, j, k) and mesh constisting of (mx, my, mz) vertices. | float[2][] or double[2][] |
| VectorField | An abstraction of a three-dimensional vector field stored in device memory. Is implemented as a tuple of three ScalarField handles. | Three distinct float[2][] or double[2][] arrays for each component. Stored as a structure of arrays. | | VectorField | An abstraction of a three-dimensional vector field stored in device memory. Is implemented as a tuple of three ScalarField handles. | Three distinct float[2][] or double[2][] arrays for each component. Stored as a structure of arrays. |
## Built-in variables and functions
## Control flow ## Control flow
// Runtime constants are as fast as compile-time constants as long as
// 1) They are not placed in tight loops, especially those that inlcude global memory accesses, that could be unrolled
// 2) They are not multiplied with each other
// 3) At least 32 neighboring threads in the x-axis access the same constant
// Safe and efficient to use as switches Conditional statements are expressed with the `if-else` construct. Unlike in C and C++, we require
that the scope of the `if-else` statement is explicitly declared using braces `{` and `}` in order
to avoid the ambiguity in the case
```C
if (a)
b;
if (c)
d;
else
e;
```
## Uniforms
// Device constants The syntax for conditional statements, even if there is only a single `if`, is
// Loaded at runtime ```C
if (a) {
b;
}
```
## Kernels ## Kernels
// in and out Kernels are small programs executed on the device. Each kernel comprises of all the pipeline stages
discussed in previous sections. Functions qualified with the type qualifier `Kernel` are analogous
to `main` functions of host code.
Kernels must be declared in stencil processing files. DSL kernels can be called from host code
using the API function
```C
AcResult acDeviceKernel_##identifier(const Device device, const Stream stream,
const int3 start, const int3 end);
```
, where ##identifier is the name of the kernel function.
The following built-in variables are available in `Kernel`s.
| Built-in variable | Description |
|-------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| vertexIdx | Holds the local index of the currently processed vertex. |
| globalVertexIdx | Holds the global index of the currently processed vertex. If there is only single device, then vertexIdx is the same as globalVertexIdx. Otherwise globalVertexIdx is offset accordingly. |
| globalGridN | Holds the dimensions of the computational domain. |
## Preprocessed functions ## Preprocessed functions
// Reading input fields The type qualifier `Preprocessed` indicates which functions should be evaluated immediately when
entering a `Kernel` function. The return values of `Preprocessed` functions are cached and calling
these functions during the stencil processing stage is essentially free. As main memory bandwidth is
significantly slower than on-chip memories and registers, declaring reading-heavy functions as
`Preprocessed` is critical for obtaining good performance in stencil codes.
`Preprocessed` functions may only be defined in stencil assembly files.
The built-in variables `vertexIdx`, `globalVertexidx` and `globalGridN` are available in all
`Preprocessed` functions.
## Uniforms
`Uniform`s are global device variables which stay constant for the duration of a kernel launch.
`Uniform`s can be updated between kernel launches using the `acLoadScalarUniform` and related functions
discussed in Section 'Loading and storing'.
`Uniform`s are declared in stencil definition headers. The header must be included in all files
which use those uniforms.
`Uniform`s can be of type `Scalar`, `Vector`, `int`, `int3`, `ScalarField` and `ScalarArray`.
> Note: As of 2019-10-01, the types `ScalarField` (DSL) and `VertexBuffer` (CUDA) are aliases of the
same type. This naming may be changed in the future.
> Note: As of 2019-10-01, ScalarFields cannot be declared as uniforms. Instead, one should declare
each component as a `ScalarField` and use them to construct a `VectorField` during the stencil
processing stage. For example, `in VectorField(A, B, C);`, where `A`, `B` and `C` are
`uniform ScalarField`s.
> Note: As of 2019-10-01, `uniform`s cannot be assigned values in the stencil definition headers.
Instead, one should load the appropriate values during runtime using the `acLoadScalarUniform` and
related functions.
## Standard libraries
> Not implemented
## Performance considerations
Uniforms are as fast as compile-time constants as long as
0. The halting condition of a tight loop does not depend on an uniform or a variable, as this would prevent unrolling of the loop during compile-time.
0. Uniforms are not multiplied with each other. The result should be stored in an auxiliary uniform instead. For example, the result of `nx * ny` should be stored in a new `uniform nxy`
0. At least 32 neighboring streams in the x-axis access the same `uniform`. That is, the vertices at vertexIdx.x = i... i + 32 should access the same `uniform` where i is a multiple of 32.

2
include/.gitignore vendored
View File

@@ -1,2 +0,0 @@
# Ignore the DSL headers
stencil_defines.h

View File

@@ -46,19 +46,19 @@ AcResult acDeviceSynchronizeStream(const Device device, const Stream stream);
AcResult acDeviceSwapBuffers(const Device device); AcResult acDeviceSwapBuffers(const Device device);
/** */ /** */
AcResult acDeviceLoadScalarConstant(const Device device, const Stream stream, AcResult acDeviceLoadScalarUniform(const Device device, const Stream stream,
const AcRealParam param, const AcReal value); const AcRealParam param, const AcReal value);
/** */ /** */
AcResult acDeviceLoadVectorConstant(const Device device, const Stream stream, AcResult acDeviceLoadVectorUniform(const Device device, const Stream stream,
const AcReal3Param param, const AcReal3 value); const AcReal3Param param, const AcReal3 value);
/** */ /** */
AcResult acDeviceLoadIntConstant(const Device device, const Stream stream, const AcIntParam param, AcResult acDeviceLoadIntUniform(const Device device, const Stream stream, const AcIntParam param,
const int value); const int value);
/** */ /** */
AcResult acDeviceLoadInt3Constant(const Device device, const Stream stream, const AcInt3Param param, AcResult acDeviceLoadInt3Uniform(const Device device, const Stream stream, const AcInt3Param param,
const int3 value); const int3 value);
/** */ /** */

View File

@@ -1,9 +1,11 @@
#!/bin/bash #!/bin/bash
ACC_DIR=$(readlink -f $(dirname $0)/../acc) ACC_DIR=$(readlink -f $(dirname $0)/../acc)
ACC_BINARY_DIR=$(pwd)/acc
MODULE_DIR=$(readlink -f $1) MODULE_DIR=$(readlink -f $1)
echo "-- Compiling project in "${MODULE_DIR} echo "-- ACC binary dir: ${ACC_BINARY_DIR}"
echo "-- ACC project dir: ${MODULE_DIR}"
for source in ${MODULE_DIR}/*.sas ${MODULE_DIR}/*.sps ${MODULE_DIR}/*.sdh for source in ${MODULE_DIR}/*.sas ${MODULE_DIR}/*.sps ${MODULE_DIR}/*.sdh
do do
${ACC_DIR}/compile.sh $source -I ${MODULE_DIR} ${ACC_DIR}/compile.sh ${ACC_BINARY_DIR}/acc $source -I ${MODULE_DIR}
done done

View File

@@ -11,6 +11,7 @@ set(CUDA_ARCH_FLAGS -gencode arch=compute_37,code=sm_37
-gencode arch=compute_50,code=sm_50 -gencode arch=compute_50,code=sm_50
-gencode arch=compute_60,code=sm_60 -gencode arch=compute_60,code=sm_60
-gencode arch=compute_61,code=sm_61 -gencode arch=compute_61,code=sm_61
-gencode arch=compute_70,code=sm_70
-lineinfo -lineinfo
-ftz=true # Flush denormalized floats to zero -ftz=true # Flush denormalized floats to zero
-std=c++11 -std=c++11

View File

@@ -357,7 +357,7 @@ acDeviceAutoOptimize(const Device device)
cudaEventRecord(tstart); // ---------------------------------------- Timing start cudaEventRecord(tstart); // ---------------------------------------- Timing start
acDeviceLoadScalarConstant(device, STREAM_DEFAULT, AC_dt, FLT_EPSILON); acDeviceLoadScalarUniform(device, STREAM_DEFAULT, AC_dt, FLT_EPSILON);
for (int i = 0; i < num_iterations; ++i) for (int i = 0; i < num_iterations; ++i)
solve<2><<<bpg, tpb>>>(start, end, device->vba); solve<2><<<bpg, tpb>>>(start, end, device->vba);
@@ -416,7 +416,7 @@ acDeviceSwapBuffers(const Device device)
} }
AcResult AcResult
acDeviceLoadScalarConstant(const Device device, const Stream stream, const AcRealParam param, acDeviceLoadScalarUniform(const Device device, const Stream stream, const AcRealParam param,
const AcReal value) const AcReal value)
{ {
cudaSetDevice(device->id); cudaSetDevice(device->id);
@@ -427,7 +427,7 @@ acDeviceLoadScalarConstant(const Device device, const Stream stream, const AcRea
} }
AcResult AcResult
acDeviceLoadVectorConstant(const Device device, const Stream stream, const AcReal3Param param, acDeviceLoadVectorUniform(const Device device, const Stream stream, const AcReal3Param param,
const AcReal3 value) const AcReal3 value)
{ {
cudaSetDevice(device->id); cudaSetDevice(device->id);
@@ -438,7 +438,7 @@ acDeviceLoadVectorConstant(const Device device, const Stream stream, const AcRea
} }
AcResult AcResult
acDeviceLoadIntConstant(const Device device, const Stream stream, const AcIntParam param, acDeviceLoadIntUniform(const Device device, const Stream stream, const AcIntParam param,
const int value) const int value)
{ {
cudaSetDevice(device->id); cudaSetDevice(device->id);
@@ -449,7 +449,7 @@ acDeviceLoadIntConstant(const Device device, const Stream stream, const AcIntPar
} }
AcResult AcResult
acDeviceLoadInt3Constant(const Device device, const Stream stream, const AcInt3Param param, acDeviceLoadInt3Uniform(const Device device, const Stream stream, const AcInt3Param param,
const int3 value) const int3 value)
{ {
cudaSetDevice(device->id); cudaSetDevice(device->id);
@@ -670,7 +670,7 @@ acDeviceIntegrateSubstep(const Device device, const Stream stream, const int ste
(unsigned int)ceil(n.y / AcReal(tpb.y)), // (unsigned int)ceil(n.y / AcReal(tpb.y)), //
(unsigned int)ceil(n.z / AcReal(tpb.z))); (unsigned int)ceil(n.z / AcReal(tpb.z)));
acDeviceLoadScalarConstant(device, stream, AC_dt, dt); acDeviceLoadScalarUniform(device, stream, AC_dt, dt);
if (step_number == 0) if (step_number == 0)
solve<0><<<bpg, tpb, 0, device->streams[stream]>>>(start, end, device->vba); solve<0><<<bpg, tpb, 0, device->streams[stream]>>>(start, end, device->vba);
else if (step_number == 1) else if (step_number == 1)

View File

@@ -1,2 +0,0 @@
# Ignore the generated headers
stencil_process.cuh stencil_assembly.cuh

View File

@@ -429,7 +429,7 @@ acNodeLoadConstant(const Node node, const Stream stream, const AcRealParam param
acNodeSynchronizeStream(node, stream); acNodeSynchronizeStream(node, stream);
// #pragma omp parallel for // #pragma omp parallel for
for (int i = 0; i < node->num_devices; ++i) { for (int i = 0; i < node->num_devices; ++i) {
acDeviceLoadScalarConstant(node->devices[i], stream, param, value); acDeviceLoadScalarUniform(node->devices[i], stream, param, value);
} }
return AC_SUCCESS; return AC_SUCCESS;
} }

View File

@@ -0,0 +1,11 @@
## C++ standard
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
## Compilation flags
add_compile_options(-Wall -Wextra -Werror -Wdouble-promotion -Wfloat-conversion -Wshadow)
## Compile and link
add_executable(ac_simulate simulation.cc)
target_link_libraries(ac_simulate PRIVATE astaroth_core astaroth_utils)
add_definitions(-DAC_DEFAULT_CONFIG="${CMAKE_SOURCE_DIR}/config/astaroth.conf")

View File

@@ -0,0 +1,77 @@
/*
Copyright (C) 2014-2019, Johannes Pekkilae, Miikka Vaeisalae.
This file is part of Astaroth.
Astaroth is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Astaroth is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
*/
#include "src/utils/config_loader.h"
#include "src/utils/memory.h"
#include <assert.h>
#include <stdio.h>
#include <string.h>
int
run_simulation(const char* config_path)
{
AcMeshInfo info;
acLoadConfig(config_path, &info);
AcMesh mesh;
acMeshCreate(info, &mesh);
acMeshClear(&mesh);
acInit(info);
acLoad(mesh);
const size_t num_steps = 10;
for (size_t i = 0; i < num_steps; ++i) {
const AcReal dt = 1; // JP: TODO! Set timestep here!
// JP: TODO! Make sure that AcMeshInfo parameters are properly initialized before calling
// acIntegrate()
// NANs indicate that either dt is too large or something was uninitalized
acIntegrate(dt);
}
for (int i = 0; i < NUM_VTXBUF_HANDLES; ++i) {
printf("%s:\n", vtxbuf_names[i]);
printf("\tmax: %g\n", (double)acReduceScal(RTYPE_MAX, VertexBufferHandle(i)));
printf("\tmin: %g\n", (double)acReduceScal(RTYPE_MIN, VertexBufferHandle(i)));
printf("\trms: %g\n", (double)acReduceScal(RTYPE_RMS, VertexBufferHandle(i)));
printf("\texp rms: %g\n", (double)acReduceScal(RTYPE_RMS_EXP, VertexBufferHandle(i)));
}
acStore(&mesh);
acQuit();
acMeshDestroy(&mesh);
return EXIT_SUCCESS;
}
int
main(int argc, char* argv[])
{
printf("Args: \n");
for (int i = 0; i < argc; ++i)
printf("%d: %s\n", i, argv[i]);
char* config_path;
(argc == 3) ? config_path = strdup(argv[2]) : config_path = strdup(AC_DEFAULT_CONFIG);
printf("Config path: %s\n", config_path);
assert(config_path);
run_simulation(config_path);
free(config_path);
return EXIT_SUCCESS;
}

10
src/utils/CMakeLists.txt Normal file
View File

@@ -0,0 +1,10 @@
## C++ standard
set(CMAKE_C_STANDARD 11)
set(CMAKE_C_STANDARD_REQUIRED ON)
## Compilation flags
add_compile_options(-Wall -Wextra -Werror -Wdouble-promotion -Wfloat-conversion -Wshadow)
## Compile
add_library(astaroth_utils STATIC config_loader.c memory.c)
target_link_libraries(astaroth_utils PRIVATE astaroth_core)

143
src/utils/config_loader.c Normal file
View File

@@ -0,0 +1,143 @@
/*
Copyright (C) 2014-2019, Johannes Pekkilae, Miikka Vaeisalae.
This file is part of Astaroth.
Astaroth is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Astaroth is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* @file
* \brief Brief info.
*
* Detailed info.
*
*/
#include "config_loader.h"
#include <assert.h>
#include <limits.h> // UINT_MAX
#include <math.h>
#include <stdint.h> // uint8_t, uint32_t
#include <stdio.h> // print
#include <string.h> // memset
//#include "src/core/math_utils.h"
/**
\brief Find the index of the keyword in names
\return Index in range 0...n if the keyword is in names. -1 if the keyword was
not found.
*/
static int
find_str(const char keyword[], const char* names[], const int n)
{
for (int i = 0; i < n; ++i)
if (!strcmp(keyword, names[i]))
return i;
return -1;
}
static void
parse_config(const char* path, AcMeshInfo* config)
{
FILE* fp;
fp = fopen(path, "r");
// For knowing which .conf file will be used
printf("Config file path: \n %s \n ", path);
assert(fp != NULL);
const size_t BUF_SIZE = 128;
char keyword[BUF_SIZE];
char value[BUF_SIZE];
int items_matched;
while ((items_matched = fscanf(fp, "%s = %s", keyword, value)) != EOF) {
if (items_matched < 2)
continue;
int idx = -1;
if ((idx = find_str(keyword, intparam_names, NUM_INT_PARAMS)) >= 0)
config->int_params[idx] = atoi(value);
else if ((idx = find_str(keyword, realparam_names, NUM_REAL_PARAMS)) >= 0)
config->real_params[idx] = (AcReal)(atof(value));
}
fclose(fp);
}
void
update_config(AcMeshInfo* config)
{
config->int_params[AC_mx] = config->int_params[AC_nx] + STENCIL_ORDER;
///////////// PAD TEST
// config->int_params[AC_mx] = config->int_params[AC_nx] + STENCIL_ORDER + PAD_SIZE;
///////////// PAD TEST
config->int_params[AC_my] = config->int_params[AC_ny] + STENCIL_ORDER;
config->int_params[AC_mz] = config->int_params[AC_nz] + STENCIL_ORDER;
// Bounds for the computational domain, i.e. nx_min <= i < nx_max
config->int_params[AC_nx_min] = STENCIL_ORDER / 2;
config->int_params[AC_nx_max] = config->int_params[AC_nx_min] + config->int_params[AC_nx];
config->int_params[AC_ny_min] = STENCIL_ORDER / 2;
config->int_params[AC_ny_max] = config->int_params[AC_ny] + STENCIL_ORDER / 2;
config->int_params[AC_nz_min] = STENCIL_ORDER / 2;
config->int_params[AC_nz_max] = config->int_params[AC_nz] + STENCIL_ORDER / 2;
// Spacing
config->real_params[AC_inv_dsx] = (AcReal)(1.) / config->real_params[AC_dsx];
config->real_params[AC_inv_dsy] = (AcReal)(1.) / config->real_params[AC_dsy];
config->real_params[AC_inv_dsz] = (AcReal)(1.) / config->real_params[AC_dsz];
/* Additional helper params */
// Int helpers
config->int_params[AC_mxy] = config->int_params[AC_mx] * config->int_params[AC_my];
config->int_params[AC_nxy] = config->int_params[AC_nx] * config->int_params[AC_ny];
config->int_params[AC_nxyz] = config->int_params[AC_nxy] * config->int_params[AC_nz];
}
/**
\brief Loads data from astaroth.conf into a config struct.
\return AC_SUCCESS on success, AC_FAILURE if there are potentially uninitialized values.
*/
AcResult
acLoadConfig(const char* config_path, AcMeshInfo* config)
{
int retval = AC_SUCCESS;
assert(config_path);
// memset reads the second parameter as a byte even though it says int in
// the function declaration
memset(config, (uint8_t)0xFF, sizeof(*config));
parse_config(config_path, config);
update_config(config);
#if VERBOSE_PRINTING // Defined in astaroth.h
printf("###############################################################\n");
printf("Config dimensions recalculated:\n");
acPrintMeshInfo(*config);
printf("###############################################################\n");
#endif
// sizeof(config) must be a multiple of 4 bytes for this to work
assert(sizeof(*config) % sizeof(uint32_t) == 0);
for (size_t i = 0; i < sizeof(*config) / sizeof(uint32_t); ++i) {
if (((uint32_t*)config)[i] == (uint32_t)0xFFFFFFFF) {
fprintf(stderr, "Some config values may be uninitialized. "
"See that all are defined in astaroth.conf\n");
retval = AC_FAILURE;
}
}
return retval;
}

37
src/utils/config_loader.h Normal file
View File

@@ -0,0 +1,37 @@
/*
Copyright (C) 2014-2019, Johannes Pekkilae, Miikka Vaeisalae.
This file is part of Astaroth.
Astaroth is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Astaroth is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* @file
* \brief Functions for loading and updating AcMeshInfo.
*
*/
#pragma once
#include "astaroth.h"
#ifdef __cplusplus
extern "C" {
#endif
/** Loads data from the config file */
AcResult acLoadConfig(const char* config_path, AcMeshInfo* config);
#ifdef __cplusplus
} // extern "C"
#endif

64
src/utils/memory.c Normal file
View File

@@ -0,0 +1,64 @@
/*
Copyright (C) 2014-2019, Johannes Pekkilae, Miikka Vaeisalae.
This file is part of Astaroth.
Astaroth is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Astaroth is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
*/
#include "memory.h"
#include <math.h>
#include <string.h>
#include "src/core/errchk.h"
AcResult
acMeshCreate(const AcMeshInfo info, AcMesh* mesh)
{
mesh->info = info;
const size_t bytes = acVertexBufferSizeBytes(mesh->info);
for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w) {
mesh->vertex_buffer[w] = malloc(bytes);
ERRCHK_ALWAYS(mesh->vertex_buffer[w]);
}
return AC_SUCCESS;
}
AcResult
acMeshDestroy(AcMesh* mesh)
{
for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w)
free(mesh->vertex_buffer[w]);
return AC_SUCCESS;
}
AcResult
acMeshSet(const AcReal value, AcMesh* mesh)
{
const int n = acVertexBufferSize(mesh->info);
for (int w = 0; w < NUM_VTXBUF_HANDLES; ++w)
for (int i = 0; i < n; ++i)
mesh->vertex_buffer[w][i] = value;
return AC_SUCCESS;
}
AcResult
acMeshClear(AcMesh* mesh)
{
return acMeshSet(0, mesh);
}

44
src/utils/memory.h Normal file
View File

@@ -0,0 +1,44 @@
/*
Copyright (C) 2014-2019, Johannes Pekkilae, Miikka Vaeisalae.
This file is part of Astaroth.
Astaroth is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Astaroth is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Astaroth. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* @file
* \brief Brief info.
*
* Detailed info.
*
*/
#pragma once
#include "astaroth.h"
#ifdef __cplusplus
extern "C" {
#endif
AcResult acMeshCreate(const AcMeshInfo mesh_info, AcMesh* mesh);
AcResult acMeshDestroy(AcMesh* mesh);
AcResult acMeshSet(const AcReal value, AcMesh* mesh);
AcResult acMeshClear(AcMesh* mesh);
#ifdef __cplusplus
} // extern "C"
#endif