(git:b77b4be)
Loading...
Searching...
No Matches
grid_library.c
Go to the documentation of this file.
1/*----------------------------------------------------------------------------*/
2/* CP2K: A general program to perform molecular dynamics simulations */
3/* Copyright 2000-2025 CP2K developers group <https://cp2k.org> */
4/* */
5/* SPDX-License-Identifier: BSD-3-Clause */
6/*----------------------------------------------------------------------------*/
7
8#include <assert.h>
9#include <omp.h>
10#include <stddef.h>
11#include <stdio.h>
12#include <stdlib.h>
13#include <string.h>
14
15#include "../../offload/offload_runtime.h"
16#include "grid_common.h"
17#include "grid_constants.h"
18#include "grid_library.h"
19
20// counter dimensions
21#define GRID_NBACKENDS 5
22#define GRID_NKERNELS 4
23#define GRID_MAX_LP 20
24
29
31static bool library_initialized = false;
32static int max_threads = 0;
34 .backend = GRID_BACKEND_AUTO, .validate = false, .apply_cutoff = false};
35
36#if !defined(_OPENMP)
37#error "OpenMP is required. Please add -fopenmp to your C compiler flags."
38#endif
39
40#if defined(NDEBUG)
41#error \
42 "Please do not build CP2K with NDEBUG. There is no performance advantage and asserts will save your neck."
43#endif
44
45/*******************************************************************************
46 * \brief Initializes the grid library.
47 * \author Ole Schuett
48 ******************************************************************************/
51 printf("Error: Grid library was already initialized.\n");
52 abort();
53 }
54
55#if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_GRID)
56 // Reserve global GPU memory for storing the intermediate Cab matrix blocks.
57 // CUDA does not allow to increase this limit after a kernel was launched.
58 // Unfortunately, the required memory is hard to predict because we neither
59 // know which tasks will be run nor how many thread blocks the available GPU
60 // can execute in parallel... 64 MiB ought to be enough for anybody ;-)
61 offloadEnsureMallocHeapSize(64 * 1024 * 1024);
62#endif
63
64 max_threads = omp_get_max_threads();
66 assert(per_thread_globals != NULL);
67
68// Using parallel regions to ensure memory is allocated near a thread's core.
69#pragma omp parallel default(none) shared(per_thread_globals) \
70 num_threads(max_threads)
71 {
72 const int ithread = omp_get_thread_num();
73 per_thread_globals[ithread] = malloc(sizeof(grid_library_globals));
74 assert(per_thread_globals[ithread] != NULL);
75 memset(per_thread_globals[ithread], 0, sizeof(grid_library_globals));
76 }
77
79}
80
81/*******************************************************************************
82 * \brief Finalizes the grid library.
83 * \author Ole Schuett
84 ******************************************************************************/
87 printf("Error: Grid library is not initialized.\n");
88 abort();
89 }
90
91 for (int i = 0; i < max_threads; i++) {
93 free(per_thread_globals[i]);
94 }
96 per_thread_globals = NULL;
97 library_initialized = false;
98}
99
100/*******************************************************************************
101 * \brief Returns a pointer to the thread local sphere cache.
102 * \author Ole Schuett
103 ******************************************************************************/
105 const int ithread = omp_get_thread_num();
106 assert(ithread < max_threads);
107 return &per_thread_globals[ithread]->sphere_cache;
108}
109
110/*******************************************************************************
111 * \brief Configures the grid library.
112 * \author Ole Schuett
113 ******************************************************************************/
114void grid_library_set_config(const enum grid_backend backend,
115 const bool validate, const bool apply_cutoff) {
116 config.backend = backend;
117 config.validate = validate;
119}
120
121/*******************************************************************************
122 * \brief Returns the library config.
123 * \author Ole Schuett
124 ******************************************************************************/
126
127/*******************************************************************************
128 * \brief Adds given increment to counter specified by lp, backend, and kernel.
129 * \author Ole Schuett
130 ******************************************************************************/
131void grid_library_counter_add(const int lp, const enum grid_backend backend,
132 const enum grid_library_kernel kernel,
133 const int increment) {
134 assert(lp >= 0);
135 assert(kernel < GRID_NKERNELS);
136 const int back = backend - GRID_BACKEND_REF;
137 assert(back < GRID_NBACKENDS);
138 const int idx = back * GRID_NKERNELS * GRID_MAX_LP + kernel * GRID_MAX_LP +
139 imin(lp, GRID_MAX_LP - 1);
140 const int ithread = omp_get_thread_num();
141 assert(ithread < max_threads);
142 per_thread_globals[ithread]->counters[idx] += increment;
143}
144
145/*******************************************************************************
146 * \brief Comperator passed to qsort to compare two counters.
147 * \author Ole Schuett
148 ******************************************************************************/
149static int compare_counters(const void *a, const void *b) {
150 return *(long *)b - *(long *)a;
151}
152
153/*******************************************************************************
154 * \brief Prints statistics gathered by the grid library.
155 * \author Ole Schuett
156 ******************************************************************************/
157void grid_library_print_stats(void (*mpi_sum_func)(long *, int),
158 const int mpi_comm,
159 void (*print_func)(char *, int),
160 const int output_unit) {
161 if (!library_initialized) {
162 printf("Error: Grid library is not initialized.\n");
163 abort();
164 }
165
166 // Sum all counters across threads and mpi ranks.
167 const int ncounters = GRID_NBACKENDS * GRID_NKERNELS * GRID_MAX_LP;
168 long counters[ncounters][2];
169 memset(counters, 0, ncounters * 2 * sizeof(long));
170 double total = 0.0;
171 for (int i = 0; i < ncounters; i++) {
172 counters[i][1] = i; // needed as inverse index after qsort
173 for (int j = 0; j < max_threads; j++) {
174 counters[i][0] += per_thread_globals[j]->counters[i];
175 }
176 mpi_sum_func(&counters[i][0], mpi_comm);
177 total += counters[i][0];
178 }
179
180 // Sort counters.
181 qsort(counters, ncounters, 2 * sizeof(long), &compare_counters);
182
183 // Print counters.
184 print_func("\n", output_unit);
185 print_func(" ----------------------------------------------------------------"
186 "---------------\n",
187 output_unit);
188 print_func(" - "
189 " -\n",
190 output_unit);
191 print_func(" - GRID STATISTICS "
192 " -\n",
193 output_unit);
194 print_func(" - "
195 " -\n",
196 output_unit);
197 print_func(" ----------------------------------------------------------------"
198 "---------------\n",
199 output_unit);
200 print_func(" LP KERNEL BACKEND "
201 "COUNT PERCENT\n",
202 output_unit);
203
204 const char *kernel_names[] = {"collocate ortho", "integrate ortho",
205 "collocate general", "integrate general"};
206 const char *backend_names[] = {"REF", "CPU", "DGEMM", "GPU", "HIP"};
207
208 for (int i = 0; i < ncounters; i++) {
209 if (counters[i][0] == 0)
210 continue; // skip empty counters
211 const double percent = 100.0 * counters[i][0] / total;
212 const int idx = counters[i][1];
213 const int backend_stride = GRID_NKERNELS * GRID_MAX_LP;
214 const int back = idx / backend_stride;
215 const int kern = (idx % backend_stride) / GRID_MAX_LP;
216 const int lp = (idx % backend_stride) % GRID_MAX_LP;
217 char buffer[100];
218 snprintf(buffer, sizeof(buffer), " %-5i %-17s %-6s %34li %10.2f%%\n", lp,
219 kernel_names[kern], backend_names[back], counters[i][0], percent);
220 print_func(buffer, output_unit);
221 }
222
223 print_func(" ----------------------------------------------------------------"
224 "---------------\n",
225 output_unit);
226}
227
228// EOF
static int imin(int x, int y)
Returns the smaller of the two integers (missing from the C standard).
Definition dbm_miniapp.c:38
static void print_func(char *message, int output_unit)
Wrapper for printf, passed to dbm_library_print_stats.
Definition dbm_miniapp.c:28
static GRID_HOST_DEVICE int idx(const orbital a)
Return coset index of given orbital angular momentum.
grid_backend
@ GRID_BACKEND_REF
@ GRID_BACKEND_AUTO
static void const int const int i
void apply_cutoff(void *ptr)
void grid_library_finalize(void)
Finalizes the grid library.
#define GRID_MAX_LP
static bool library_initialized
void grid_library_print_stats(void(*mpi_sum_func)(long *, int), const int mpi_comm, void(*print_func)(char *, int), const int output_unit)
Prints statistics gathered by the grid library.
static grid_library_config config
grid_sphere_cache * grid_library_get_sphere_cache(void)
Returns a pointer to the thread local sphere cache.
#define GRID_NKERNELS
void grid_library_init(void)
Initializes the grid library.
#define GRID_NBACKENDS
static int max_threads
grid_library_config grid_library_get_config(void)
Returns the library config.
void grid_library_counter_add(const int lp, const enum grid_backend backend, const enum grid_library_kernel kernel, const int increment)
Adds given increment to counter specified by lp, backend, and kernel.
void grid_library_set_config(const enum grid_backend backend, const bool validate, const bool apply_cutoff)
Configures the grid library.
static int compare_counters(const void *a, const void *b)
Comperator passed to qsort to compare two counters.
static grid_library_globals ** per_thread_globals
grid_library_kernel
Various kernels provided by the grid library.
void mpi_sum_func(long *number, int mpi_comm)
void grid_sphere_cache_free(grid_sphere_cache *cache)
Free the memory of the sphere cache.
Configuration of the grid library.
enum grid_backend backend
grid_sphere_cache sphere_cache
long counters[GRID_NBACKENDS *GRID_NKERNELS *GRID_MAX_LP]
Struct holding the entire sphere cache, ie. for all grids.