(git:badb799)
Loading...
Searching...
No Matches
grid_library.c
Go to the documentation of this file.
1/*----------------------------------------------------------------------------*/
2/* CP2K: A general program to perform molecular dynamics simulations */
3/* Copyright 2000-2025 CP2K developers group <https://cp2k.org> */
4/* */
5/* SPDX-License-Identifier: BSD-3-Clause */
6/*----------------------------------------------------------------------------*/
7#include "grid_library.h"
8#include "grid_common.h"
9#include "grid_constants.h"
10
11#include "../../mpiwrap/cp_mpi.h"
12#include "../../offload/offload_runtime.h"
13
14#include <assert.h>
15#include <omp.h>
16#include <stddef.h>
17#include <stdio.h>
18#include <stdlib.h>
19#include <string.h>
20
21#define GRID_LIBRARY_PRINT(FN, MSG, OUTPUT_UNIT) \
22 ((FN)(MSG, (int)strlen(MSG), OUTPUT_UNIT))
23
24// counter dimensions
25#define GRID_NBACKENDS 5
26#define GRID_NKERNELS 4
27#define GRID_MAX_LP 20
28
33
35static bool library_initialized = false;
36static int max_threads = 0;
38 .backend = GRID_BACKEND_AUTO, .validate = false, .apply_cutoff = false};
39
40#if !defined(_OPENMP)
41#error "OpenMP is required. Please add -fopenmp to your C compiler flags."
42#endif
43
44#if defined(NDEBUG)
45#error \
46 "Please do not build CP2K with NDEBUG. There is no performance advantage and asserts will save your neck."
47#endif
48
49/*******************************************************************************
50 * \brief Initializes the grid library.
51 * \author Ole Schuett
52 ******************************************************************************/
55 printf("Error: Grid library was already initialized.\n");
56 abort();
57 }
58
59#if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_GRID)
60 // Reserve global GPU memory for storing the intermediate Cab matrix blocks.
61 // CUDA does not allow to increase this limit after a kernel was launched.
62 // Unfortunately, the required memory is hard to predict because we neither
63 // know which tasks will be run nor how many thread blocks the available GPU
64 // can execute in parallel... 64 MiB ought to be enough for anybody ;-)
65 offloadEnsureMallocHeapSize(64 * 1024 * 1024);
66#endif
67
68 max_threads = omp_get_max_threads();
70 assert(per_thread_globals != NULL);
71
72// Using parallel regions to ensure memory is allocated near a thread's core.
73#pragma omp parallel default(none) shared(per_thread_globals) \
74 num_threads(max_threads)
75 {
76 const int ithread = omp_get_thread_num();
77 per_thread_globals[ithread] = malloc(sizeof(grid_library_globals));
78 assert(per_thread_globals[ithread] != NULL);
79 memset(per_thread_globals[ithread], 0, sizeof(grid_library_globals));
80 }
81
83}
84
85/*******************************************************************************
86 * \brief Finalizes the grid library.
87 * \author Ole Schuett
88 ******************************************************************************/
91 printf("Error: Grid library is not initialized.\n");
92 abort();
93 }
94
95 for (int i = 0; i < max_threads; i++) {
97 free(per_thread_globals[i]);
98 }
100 per_thread_globals = NULL;
101 library_initialized = false;
102}
103
104/*******************************************************************************
105 * \brief Returns a pointer to the thread local sphere cache.
106 * \author Ole Schuett
107 ******************************************************************************/
109 const int ithread = omp_get_thread_num();
110 assert(ithread < max_threads);
111 return &per_thread_globals[ithread]->sphere_cache;
112}
113
114/*******************************************************************************
115 * \brief Configures the grid library.
116 * \author Ole Schuett
117 ******************************************************************************/
118void grid_library_set_config(const enum grid_backend backend,
119 const bool validate, const bool apply_cutoff) {
120 config.backend = backend;
121 config.validate = validate;
123}
124
125/*******************************************************************************
126 * \brief Returns the library config.
127 * \author Ole Schuett
128 ******************************************************************************/
130
131/*******************************************************************************
132 * \brief Adds given increment to counter specified by lp, backend, and kernel.
133 * \author Ole Schuett
134 ******************************************************************************/
135void grid_library_counter_add(const int lp, const enum grid_backend backend,
136 const enum grid_library_kernel kernel,
137 const int increment) {
138 assert(lp >= 0);
139 assert(kernel < GRID_NKERNELS);
140 const int back = backend - GRID_BACKEND_REF;
141 assert(back < GRID_NBACKENDS);
142 const int idx = back * GRID_NKERNELS * GRID_MAX_LP + kernel * GRID_MAX_LP +
143 imin(lp, GRID_MAX_LP - 1);
144 const int ithread = omp_get_thread_num();
145 assert(ithread < max_threads);
146 per_thread_globals[ithread]->counters[idx] += increment;
147}
148
149/*******************************************************************************
150 * \brief Comperator passed to qsort to compare two counters.
151 * \author Ole Schuett
152 ******************************************************************************/
153static int compare_counters(const void *a, const void *b) {
154 return *(long *)b - *(long *)a;
155}
156
157/*******************************************************************************
158 * \brief Prints statistics gathered by the grid library.
159 * \author Ole Schuett
160 ******************************************************************************/
161void grid_library_print_stats(const int fortran_comm,
162 void (*print_func)(const char *, int, int),
163 const int output_unit) {
164 if (!library_initialized) {
165 printf("Error: Grid library is not initialized.\n");
166 abort();
167 }
168
169 const cp_mpi_comm_t comm = cp_mpi_comm_f2c(fortran_comm);
170 // Sum all counters across threads and mpi ranks.
171 const int ncounters = GRID_NBACKENDS * GRID_NKERNELS * GRID_MAX_LP;
172 long counters[ncounters][2];
173 memset(counters, 0, ncounters * 2 * sizeof(long));
174 double total = 0.0;
175 for (int i = 0; i < ncounters; i++) {
176 counters[i][1] = i; // needed as inverse index after qsort
177 for (int j = 0; j < max_threads; j++) {
178 counters[i][0] += per_thread_globals[j]->counters[i];
179 }
180 cp_mpi_sum_long(&counters[i][0], 1, comm);
181 total += counters[i][0];
182 }
183
184 // Sort counters.
185 qsort(counters, ncounters, 2 * sizeof(long), &compare_counters);
186
187 // Determine if anything needs to be printed.
188 bool print = false;
189 for (int i = 0; i < ncounters && !print; i++) {
190 if (counters[i][0] != 0) {
191 print = true;
192 }
193 }
194 if (!print) {
195 return; // nothing to be printed
196 }
197
198 // Print counters.
199 GRID_LIBRARY_PRINT(print_func, "\n", output_unit);
202 " ----------------------------------------------------------------"
203 "---------------\n",
204 output_unit);
207 " - "
208 " -\n",
209 output_unit);
212 " - GRID STATISTICS "
213 " -\n",
214 output_unit);
217 " - "
218 " -\n",
219 output_unit);
222 " ----------------------------------------------------------------"
223 "---------------\n",
224 output_unit);
227 " LP KERNEL BACKEND "
228 "COUNT PERCENT\n",
229 output_unit);
230
231 const char *kernel_names[] = {"collocate ortho", "integrate ortho",
232 "collocate general", "integrate general"};
233 const char *backend_names[] = {"REF", "CPU", "DGEMM", "GPU", "HIP"};
234
235 for (int i = 0; i < ncounters; i++) {
236 if (counters[i][0] == 0)
237 continue; // skip empty counters
238 const double percent = 100.0 * counters[i][0] / total;
239 const int idx = counters[i][1];
240 const int backend_stride = GRID_NKERNELS * GRID_MAX_LP;
241 const int back = idx / backend_stride;
242 const int kern = (idx % backend_stride) / GRID_MAX_LP;
243 const int lp = (idx % backend_stride) % GRID_MAX_LP;
244 char buffer[100];
245 snprintf(buffer, sizeof(buffer), " %-5i %-17s %-6s %34li %10.2f%%\n", lp,
246 kernel_names[kern], backend_names[back], counters[i][0], percent);
247 GRID_LIBRARY_PRINT(print_func, buffer, output_unit);
248 }
249
252 " ----------------------------------------------------------------"
253 "---------------\n",
254 output_unit);
255}
256
257// EOF
void cp_mpi_sum_long(long *values, const int count, const cp_mpi_comm_t comm)
Wrapper around MPI_Allreduce for op MPI_SUM and datatype MPI_LONG.
Definition cp_mpi.c:340
cp_mpi_comm_t cp_mpi_comm_f2c(const int fortran_comm)
Wrapper around MPI_Comm_f2c.
Definition cp_mpi.c:69
int cp_mpi_comm_t
Definition cp_mpi.h:18
static void print_func(const char *msg, int msglen, int output_unit)
Wrapper for printf, passed to dbm_library_print_stats.
Definition dbm_miniapp.c:29
static int imin(int x, int y)
Returns the smaller of the two integers (missing from the C standard).
Definition dbm_miniapp.c:40
static GRID_HOST_DEVICE int idx(const orbital a)
Return coset index of given orbital angular momentum.
grid_backend
@ GRID_BACKEND_REF
@ GRID_BACKEND_AUTO
static void const int const int i
void apply_cutoff(void *ptr)
void grid_library_finalize(void)
Finalizes the grid library.
#define GRID_MAX_LP
static bool library_initialized
#define GRID_LIBRARY_PRINT(FN, MSG, OUTPUT_UNIT)
static grid_library_config config
grid_sphere_cache * grid_library_get_sphere_cache(void)
Returns a pointer to the thread local sphere cache.
#define GRID_NKERNELS
void grid_library_init(void)
Initializes the grid library.
#define GRID_NBACKENDS
static int max_threads
grid_library_config grid_library_get_config(void)
Returns the library config.
void grid_library_counter_add(const int lp, const enum grid_backend backend, const enum grid_library_kernel kernel, const int increment)
Adds given increment to counter specified by lp, backend, and kernel.
void grid_library_set_config(const enum grid_backend backend, const bool validate, const bool apply_cutoff)
Configures the grid library.
void grid_library_print_stats(const int fortran_comm, void(*print_func)(const char *, int, int), const int output_unit)
Prints statistics gathered by the grid library.
static int compare_counters(const void *a, const void *b)
Comperator passed to qsort to compare two counters.
static grid_library_globals ** per_thread_globals
grid_library_kernel
Various kernels provided by the grid library.
void grid_sphere_cache_free(grid_sphere_cache *cache)
Free the memory of the sphere cache.
Configuration of the grid library.
enum grid_backend backend
grid_sphere_cache sphere_cache
long counters[GRID_NBACKENDS *GRID_NKERNELS *GRID_MAX_LP]
Struct holding the entire sphere cache, ie. for all grids.