15#include "../../offload/offload_runtime.h"
21#define GRID_NBACKENDS 5
22#define GRID_NKERNELS 4
37#error "OpenMP is required. Please add -fopenmp to your C compiler flags."
42 "Please do not build CP2K with NDEBUG. There is no performance advantage and asserts will save your neck."
51 printf(
"Error: Grid library was already initialized.\n");
55#if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_GRID)
61 offloadEnsureMallocHeapSize(64 * 1024 * 1024);
69#pragma omp parallel default(none) shared(per_thread_globals) \
70 num_threads(max_threads)
72 const int ithread = omp_get_thread_num();
87 printf(
"Error: Grid library is not initialized.\n");
105 const int ithread = omp_get_thread_num();
133 const int increment) {
140 const int ithread = omp_get_thread_num();
150 return *(
long *)b - *(
long *)a;
160 const int output_unit) {
162 printf(
"Error: Grid library is not initialized.\n");
168 long counters[ncounters][2];
169 memset(counters, 0, ncounters * 2 *
sizeof(
long));
171 for (
int i = 0;
i < ncounters;
i++) {
177 total += counters[
i][0];
185 print_func(
" ----------------------------------------------------------------"
197 print_func(
" ----------------------------------------------------------------"
204 const char *kernel_names[] = {
"collocate ortho",
"integrate ortho",
205 "collocate general",
"integrate general"};
206 const char *backend_names[] = {
"REF",
"CPU",
"DGEMM",
"GPU",
"HIP"};
208 for (
int i = 0;
i < ncounters;
i++) {
209 if (counters[
i][0] == 0)
211 const double percent = 100.0 * counters[
i][0] / total;
212 const int idx = counters[
i][1];
214 const int back =
idx / backend_stride;
218 snprintf(buffer,
sizeof(buffer),
" %-5i %-17s %-6s %34li %10.2f%%\n", lp,
219 kernel_names[kern], backend_names[back], counters[
i][0], percent);
223 print_func(
" ----------------------------------------------------------------"
static int imin(int x, int y)
Returns the smaller of the two integers (missing from the C standard).
static void print_func(char *message, int output_unit)
Wrapper for printf, passed to dbm_library_print_stats.
static GRID_HOST_DEVICE int idx(const orbital a)
Return coset index of given orbital angular momentum.
static void const int const int i
void apply_cutoff(void *ptr)
void grid_library_finalize(void)
Finalizes the grid library.
static bool library_initialized
void grid_library_print_stats(void(*mpi_sum_func)(long *, int), const int mpi_comm, void(*print_func)(char *, int), const int output_unit)
Prints statistics gathered by the grid library.
static grid_library_config config
grid_sphere_cache * grid_library_get_sphere_cache(void)
Returns a pointer to the thread local sphere cache.
void grid_library_init(void)
Initializes the grid library.
grid_library_config grid_library_get_config(void)
Returns the library config.
void grid_library_counter_add(const int lp, const enum grid_backend backend, const enum grid_library_kernel kernel, const int increment)
Adds given increment to counter specified by lp, backend, and kernel.
void grid_library_set_config(const enum grid_backend backend, const bool validate, const bool apply_cutoff)
Configures the grid library.
static int compare_counters(const void *a, const void *b)
Comperator passed to qsort to compare two counters.
static grid_library_globals ** per_thread_globals
grid_library_kernel
Various kernels provided by the grid library.
void mpi_sum_func(long *number, int mpi_comm)
void grid_sphere_cache_free(grid_sphere_cache *cache)
Free the memory of the sphere cache.
Configuration of the grid library.
enum grid_backend backend
grid_sphere_cache sphere_cache
long counters[GRID_NBACKENDS *GRID_NKERNELS *GRID_MAX_LP]
Struct holding the entire sphere cache, ie. for all grids.