15 #include "../../offload/offload_runtime.h"
21 #define GRID_NBACKENDS 5
22 #define GRID_NKERNELS 4
23 #define GRID_MAX_LP 20
37 #error "OpenMP is required. Please add -fopenmp to your C compiler flags."
42 "Please do not build CP2K with NDEBUG. There is no performance advantage and asserts will save your neck."
51 printf(
"Error: Grid library was already initialized.\n");
55 #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_GRID)
61 offloadEnsureMallocHeapSize(64 * 1024 * 1024);
68 #pragma omp parallel default(none) shared(per_thread_globals) \
69 num_threads(max_threads)
71 const int ithread = omp_get_thread_num();
85 printf(
"Error: Grid library is not initialized.\n");
103 const int ithread = omp_get_thread_num();
131 const int increment) {
138 const int ithread = omp_get_thread_num();
148 return *(
long *)
b - *(
long *)
a;
158 const int output_unit) {
160 printf(
"Error: Grid library is not initialized.\n");
166 long counters[ncounters][2];
167 memset(counters, 0, ncounters * 2 *
sizeof(
long));
169 for (
int i = 0;
i < ncounters;
i++) {
175 total += counters[
i][0];
183 print_func(
" ----------------------------------------------------------------"
195 print_func(
" ----------------------------------------------------------------"
202 const char *kernel_names[] = {
"collocate ortho",
"integrate ortho",
203 "collocate general",
"integrate general"};
204 const char *backend_names[] = {
"REF",
"CPU",
"DGEMM",
"GPU",
"HIP"};
206 for (
int i = 0;
i < ncounters;
i++) {
207 if (counters[
i][0] == 0)
209 const double percent = 100.0 * counters[
i][0] / total;
210 const int idx = counters[
i][1];
212 const int back =
idx / backend_stride;
216 snprintf(buffer,
sizeof(buffer),
" %-5i %-17s %-6s %34li %10.2f%%\n", lp,
217 kernel_names[kern], backend_names[back], counters[
i][0], percent);
221 print_func(
" ----------------------------------------------------------------"
static int imin(int x, int y)
Returns the smaller of two given integer (missing from the C standard)
static GRID_HOST_DEVICE int idx(const orbital a)
Return coset index of given orbital angular momentum.
static void const int const int i
void apply_cutoff(void *ptr)
void grid_library_finalize(void)
Finalizes the grid library.
static bool library_initialized
void grid_library_print_stats(void(*mpi_sum_func)(long *, int), const int mpi_comm, void(*print_func)(char *, int), const int output_unit)
Prints statistics gathered by the grid library.
static grid_library_config config
grid_sphere_cache * grid_library_get_sphere_cache(void)
Returns a pointer to the thread local sphere cache.
void grid_library_init(void)
Initializes the grid library.
grid_library_config grid_library_get_config(void)
Returns the library config.
void grid_library_counter_add(const int lp, const enum grid_backend backend, const enum grid_library_kernel kernel, const int increment)
Adds given increment to counter specified by lp, backend, and kernel.
void grid_library_set_config(const enum grid_backend backend, const bool validate, const bool apply_cutoff)
Configures the grid library.
static int compare_counters(const void *a, const void *b)
Comperator passed to qsort to compare two counters.
static grid_library_globals ** per_thread_globals
grid_library_kernel
Various kernels provided by the grid library.
void mpi_sum_func(long *number, int mpi_comm)
void print_func(char *message, int output_unit)
void grid_sphere_cache_free(grid_sphere_cache *cache)
Free the memory of the sphere cache.
Configuration of the grid library.
enum grid_backend backend
grid_sphere_cache sphere_cache
long counters[GRID_NBACKENDS *GRID_NKERNELS *GRID_MAX_LP]
Struct holding the entire sphere cache, ie. for all grids.