(git:e7e05ae)
grid_library.c
Go to the documentation of this file.
1 /*----------------------------------------------------------------------------*/
2 /* CP2K: A general program to perform molecular dynamics simulations */
3 /* Copyright 2000-2024 CP2K developers group <https://cp2k.org> */
4 /* */
5 /* SPDX-License-Identifier: BSD-3-Clause */
6 /*----------------------------------------------------------------------------*/
7 
8 #include <assert.h>
9 #include <omp.h>
10 #include <stddef.h>
11 #include <stdio.h>
12 #include <stdlib.h>
13 #include <string.h>
14 
15 #include "../../offload/offload_runtime.h"
16 #include "grid_common.h"
17 #include "grid_constants.h"
18 #include "grid_library.h"
19 
20 // counter dimensions
21 #define GRID_NBACKENDS 5
22 #define GRID_NKERNELS 4
23 #define GRID_MAX_LP 20
24 
25 typedef struct {
29 
31 static bool library_initialized = false;
32 static int max_threads = 0;
34  .backend = GRID_BACKEND_AUTO, .validate = false, .apply_cutoff = false};
35 
36 #if !defined(_OPENMP)
37 #error "OpenMP is required. Please add -fopenmp to your C compiler flags."
38 #endif
39 
40 #if defined(NDEBUG)
41 #error \
42  "Please do not build CP2K with NDEBUG. There is no performance advantage and asserts will save your neck."
43 #endif
44 
45 /*******************************************************************************
46  * \brief Initializes the grid library.
47  * \author Ole Schuett
48  ******************************************************************************/
49 void grid_library_init(void) {
50  if (library_initialized) {
51  printf("Error: Grid library was already initialized.\n");
52  abort();
53  }
54 
55 #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_GRID)
56  // Reserve global GPU memory for storing the intermediate Cab matrix blocks.
57  // CUDA does not allow to increase this limit after a kernel was launched.
58  // Unfortunately, the required memory is hard to predict because we neither
59  // know which tasks will be run nor how many thread blocks the available GPU
60  // can execute in parallel... 64 MiB ought to be enough for anybody ;-)
61  offloadEnsureMallocHeapSize(64 * 1024 * 1024);
62 #endif
63 
64  max_threads = omp_get_max_threads();
66 
67 // Using parallel regions to ensure memory is allocated near a thread's core.
68 #pragma omp parallel default(none) shared(per_thread_globals) \
69  num_threads(max_threads)
70  {
71  const int ithread = omp_get_thread_num();
72  per_thread_globals[ithread] = malloc(sizeof(grid_library_globals));
73  memset(per_thread_globals[ithread], 0, sizeof(grid_library_globals));
74  }
75 
76  library_initialized = true;
77 }
78 
79 /*******************************************************************************
80  * \brief Finalizes the grid library.
81  * \author Ole Schuett
82  ******************************************************************************/
84  if (!library_initialized) {
85  printf("Error: Grid library is not initialized.\n");
86  abort();
87  }
88 
89  for (int i = 0; i < max_threads; i++) {
91  free(per_thread_globals[i]);
92  }
93  free(per_thread_globals);
94  per_thread_globals = NULL;
95  library_initialized = false;
96 }
97 
98 /*******************************************************************************
99  * \brief Returns a pointer to the thread local sphere cache.
100  * \author Ole Schuett
101  ******************************************************************************/
103  const int ithread = omp_get_thread_num();
104  assert(ithread < max_threads);
105  return &per_thread_globals[ithread]->sphere_cache;
106 }
107 
108 /*******************************************************************************
109  * \brief Configures the grid library.
110  * \author Ole Schuett
111  ******************************************************************************/
112 void grid_library_set_config(const enum grid_backend backend,
113  const bool validate, const bool apply_cutoff) {
114  config.backend = backend;
115  config.validate = validate;
117 }
118 
119 /*******************************************************************************
120  * \brief Returns the library config.
121  * \author Ole Schuett
122  ******************************************************************************/
124 
125 /*******************************************************************************
126  * \brief Adds given increment to counter specified by lp, backend, and kernel.
127  * \author Ole Schuett
128  ******************************************************************************/
129 void grid_library_counter_add(const int lp, const enum grid_backend backend,
130  const enum grid_library_kernel kernel,
131  const int increment) {
132  assert(lp >= 0);
133  assert(kernel < GRID_NKERNELS);
134  const int back = backend - GRID_BACKEND_REF;
135  assert(back < GRID_NBACKENDS);
136  const int idx = back * GRID_NKERNELS * GRID_MAX_LP + kernel * GRID_MAX_LP +
137  imin(lp, GRID_MAX_LP - 1);
138  const int ithread = omp_get_thread_num();
139  assert(ithread < max_threads);
140  per_thread_globals[ithread]->counters[idx] += increment;
141 }
142 
143 /*******************************************************************************
144  * \brief Comperator passed to qsort to compare two counters.
145  * \author Ole Schuett
146  ******************************************************************************/
147 static int compare_counters(const void *a, const void *b) {
148  return *(long *)b - *(long *)a;
149 }
150 
151 /*******************************************************************************
152  * \brief Prints statistics gathered by the grid library.
153  * \author Ole Schuett
154  ******************************************************************************/
155 void grid_library_print_stats(void (*mpi_sum_func)(long *, int),
156  const int mpi_comm,
157  void (*print_func)(char *, int),
158  const int output_unit) {
159  if (!library_initialized) {
160  printf("Error: Grid library is not initialized.\n");
161  abort();
162  }
163 
164  // Sum all counters across threads and mpi ranks.
165  const int ncounters = GRID_NBACKENDS * GRID_NKERNELS * GRID_MAX_LP;
166  long counters[ncounters][2];
167  memset(counters, 0, ncounters * 2 * sizeof(long));
168  double total = 0.0;
169  for (int i = 0; i < ncounters; i++) {
170  counters[i][1] = i; // needed as inverse index after qsort
171  for (int j = 0; j < max_threads; j++) {
172  counters[i][0] += per_thread_globals[j]->counters[i];
173  }
174  mpi_sum_func(&counters[i][0], mpi_comm);
175  total += counters[i][0];
176  }
177 
178  // Sort counters.
179  qsort(counters, ncounters, 2 * sizeof(long), &compare_counters);
180 
181  // Print counters.
182  print_func("\n", output_unit);
183  print_func(" ----------------------------------------------------------------"
184  "---------------\n",
185  output_unit);
186  print_func(" - "
187  " -\n",
188  output_unit);
189  print_func(" - GRID STATISTICS "
190  " -\n",
191  output_unit);
192  print_func(" - "
193  " -\n",
194  output_unit);
195  print_func(" ----------------------------------------------------------------"
196  "---------------\n",
197  output_unit);
198  print_func(" LP KERNEL BACKEND "
199  "COUNT PERCENT\n",
200  output_unit);
201 
202  const char *kernel_names[] = {"collocate ortho", "integrate ortho",
203  "collocate general", "integrate general"};
204  const char *backend_names[] = {"REF", "CPU", "DGEMM", "GPU", "HIP"};
205 
206  for (int i = 0; i < ncounters; i++) {
207  if (counters[i][0] == 0)
208  continue; // skip empty counters
209  const double percent = 100.0 * counters[i][0] / total;
210  const int idx = counters[i][1];
211  const int backend_stride = GRID_NKERNELS * GRID_MAX_LP;
212  const int back = idx / backend_stride;
213  const int kern = (idx % backend_stride) / GRID_MAX_LP;
214  const int lp = (idx % backend_stride) % GRID_MAX_LP;
215  char buffer[100];
216  snprintf(buffer, sizeof(buffer), " %-5i %-17s %-6s %34li %10.2f%%\n", lp,
217  kernel_names[kern], backend_names[back], counters[i][0], percent);
218  print_func(buffer, output_unit);
219  }
220 
221  print_func(" ----------------------------------------------------------------"
222  "---------------\n",
223  output_unit);
224 }
225 
226 // EOF
static int imin(int x, int y)
Returns the smaller of two given integer (missing from the C standard)
Definition: dbm_miniapp.c:38
static GRID_HOST_DEVICE int idx(const orbital a)
Return coset index of given orbital angular momentum.
Definition: grid_common.h:153
grid_backend
@ GRID_BACKEND_REF
@ GRID_BACKEND_AUTO
static void const int const int i
void apply_cutoff(void *ptr)
void grid_library_finalize(void)
Finalizes the grid library.
Definition: grid_library.c:83
#define GRID_MAX_LP
Definition: grid_library.c:23
static bool library_initialized
Definition: grid_library.c:31
void grid_library_print_stats(void(*mpi_sum_func)(long *, int), const int mpi_comm, void(*print_func)(char *, int), const int output_unit)
Prints statistics gathered by the grid library.
Definition: grid_library.c:155
static grid_library_config config
Definition: grid_library.c:33
#define GRID_NKERNELS
Definition: grid_library.c:22
grid_sphere_cache * grid_library_get_sphere_cache(void)
Returns a pointer to the thread local sphere cache.
Definition: grid_library.c:102
void grid_library_init(void)
Initializes the grid library.
Definition: grid_library.c:49
#define GRID_NBACKENDS
Definition: grid_library.c:21
static int max_threads
Definition: grid_library.c:32
grid_library_config grid_library_get_config(void)
Returns the library config.
Definition: grid_library.c:123
void grid_library_counter_add(const int lp, const enum grid_backend backend, const enum grid_library_kernel kernel, const int increment)
Adds given increment to counter specified by lp, backend, and kernel.
Definition: grid_library.c:129
void grid_library_set_config(const enum grid_backend backend, const bool validate, const bool apply_cutoff)
Configures the grid library.
Definition: grid_library.c:112
static int compare_counters(const void *a, const void *b)
Comperator passed to qsort to compare two counters.
Definition: grid_library.c:147
static grid_library_globals ** per_thread_globals
Definition: grid_library.c:30
grid_library_kernel
Various kernels provided by the grid library.
Definition: grid_library.h:65
void mpi_sum_func(long *number, int mpi_comm)
Definition: grid_miniapp.c:16
void print_func(char *message, int output_unit)
Definition: grid_miniapp.c:21
void grid_sphere_cache_free(grid_sphere_cache *cache)
Free the memory of the sphere cache.
real(dp), dimension(3) a
Definition: ai_eri_debug.F:31
real(dp), dimension(3) b
Definition: ai_eri_debug.F:31
Configuration of the grid library.
Definition: grid_library.h:34
enum grid_backend backend
Definition: grid_library.h:35
grid_sphere_cache sphere_cache
Definition: grid_library.c:26
long counters[GRID_NBACKENDS *GRID_NKERNELS *GRID_MAX_LP]
Definition: grid_library.c:27
Struct holding the entire sphere cache, ie. for all grids.