(git:374b731)
Loading...
Searching...
No Matches
grid_library.c
Go to the documentation of this file.
1/*----------------------------------------------------------------------------*/
2/* CP2K: A general program to perform molecular dynamics simulations */
3/* Copyright 2000-2024 CP2K developers group <https://cp2k.org> */
4/* */
5/* SPDX-License-Identifier: BSD-3-Clause */
6/*----------------------------------------------------------------------------*/
7
8#include <assert.h>
9#include <omp.h>
10#include <stddef.h>
11#include <stdio.h>
12#include <stdlib.h>
13#include <string.h>
14
15#include "../../offload/offload_runtime.h"
16#include "grid_common.h"
17#include "grid_constants.h"
18#include "grid_library.h"
19
20// counter dimensions
21#define GRID_NBACKENDS 5
22#define GRID_NKERNELS 4
23#define GRID_MAX_LP 20
24
29
31static bool library_initialized = false;
32static int max_threads = 0;
34 .backend = GRID_BACKEND_AUTO, .validate = false, .apply_cutoff = false};
35
36#if !defined(_OPENMP)
37#error "OpenMP is required. Please add -fopenmp to your C compiler flags."
38#endif
39
40#if defined(NDEBUG)
41#error \
42 "Please do not build CP2K with NDEBUG. There is no performance advantage and asserts will save your neck."
43#endif
44
45/*******************************************************************************
46 * \brief Initializes the grid library.
47 * \author Ole Schuett
48 ******************************************************************************/
51 printf("Error: Grid library was already initialized.\n");
52 abort();
53 }
54
55#if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_GRID)
56 // Reserve global GPU memory for storing the intermediate Cab matrix blocks.
57 // CUDA does not allow to increase this limit after a kernel was launched.
58 // Unfortunately, the required memory is hard to predict because we neither
59 // know which tasks will be run nor how many thread blocks the available GPU
60 // can execute in parallel... 64 MiB ought to be enough for anybody ;-)
61 offloadEnsureMallocHeapSize(64 * 1024 * 1024);
62#endif
63
64 max_threads = omp_get_max_threads();
66
67// Using parallel regions to ensure memory is allocated near a thread's core.
68#pragma omp parallel default(none) shared(per_thread_globals) \
69 num_threads(max_threads)
70 {
71 const int ithread = omp_get_thread_num();
72 per_thread_globals[ithread] = malloc(sizeof(grid_library_globals));
73 memset(per_thread_globals[ithread], 0, sizeof(grid_library_globals));
74 }
75
77}
78
79/*******************************************************************************
80 * \brief Finalizes the grid library.
81 * \author Ole Schuett
82 ******************************************************************************/
85 printf("Error: Grid library is not initialized.\n");
86 abort();
87 }
88
89 for (int i = 0; i < max_threads; i++) {
91 free(per_thread_globals[i]);
92 }
94 per_thread_globals = NULL;
95 library_initialized = false;
96}
97
98/*******************************************************************************
99 * \brief Returns a pointer to the thread local sphere cache.
100 * \author Ole Schuett
101 ******************************************************************************/
103 const int ithread = omp_get_thread_num();
104 assert(ithread < max_threads);
105 return &per_thread_globals[ithread]->sphere_cache;
106}
107
108/*******************************************************************************
109 * \brief Configures the grid library.
110 * \author Ole Schuett
111 ******************************************************************************/
112void grid_library_set_config(const enum grid_backend backend,
113 const bool validate, const bool apply_cutoff) {
114 config.backend = backend;
115 config.validate = validate;
117}
118
119/*******************************************************************************
120 * \brief Returns the library config.
121 * \author Ole Schuett
122 ******************************************************************************/
124
125/*******************************************************************************
126 * \brief Adds given increment to counter specified by lp, backend, and kernel.
127 * \author Ole Schuett
128 ******************************************************************************/
129void grid_library_counter_add(const int lp, const enum grid_backend backend,
130 const enum grid_library_kernel kernel,
131 const int increment) {
132 assert(lp >= 0);
133 assert(kernel < GRID_NKERNELS);
134 const int back = backend - GRID_BACKEND_REF;
135 assert(back < GRID_NBACKENDS);
136 const int idx = back * GRID_NKERNELS * GRID_MAX_LP + kernel * GRID_MAX_LP +
137 imin(lp, GRID_MAX_LP - 1);
138 const int ithread = omp_get_thread_num();
139 assert(ithread < max_threads);
140 per_thread_globals[ithread]->counters[idx] += increment;
141}
142
143/*******************************************************************************
144 * \brief Comperator passed to qsort to compare two counters.
145 * \author Ole Schuett
146 ******************************************************************************/
147static int compare_counters(const void *a, const void *b) {
148 return *(long *)b - *(long *)a;
149}
150
151/*******************************************************************************
152 * \brief Prints statistics gathered by the grid library.
153 * \author Ole Schuett
154 ******************************************************************************/
155void grid_library_print_stats(void (*mpi_sum_func)(long *, int),
156 const int mpi_comm,
157 void (*print_func)(char *, int),
158 const int output_unit) {
159 if (!library_initialized) {
160 printf("Error: Grid library is not initialized.\n");
161 abort();
162 }
163
164 // Sum all counters across threads and mpi ranks.
165 const int ncounters = GRID_NBACKENDS * GRID_NKERNELS * GRID_MAX_LP;
166 long counters[ncounters][2];
167 memset(counters, 0, ncounters * 2 * sizeof(long));
168 double total = 0.0;
169 for (int i = 0; i < ncounters; i++) {
170 counters[i][1] = i; // needed as inverse index after qsort
171 for (int j = 0; j < max_threads; j++) {
172 counters[i][0] += per_thread_globals[j]->counters[i];
173 }
174 mpi_sum_func(&counters[i][0], mpi_comm);
175 total += counters[i][0];
176 }
177
178 // Sort counters.
179 qsort(counters, ncounters, 2 * sizeof(long), &compare_counters);
180
181 // Print counters.
182 print_func("\n", output_unit);
183 print_func(" ----------------------------------------------------------------"
184 "---------------\n",
185 output_unit);
186 print_func(" - "
187 " -\n",
188 output_unit);
189 print_func(" - GRID STATISTICS "
190 " -\n",
191 output_unit);
192 print_func(" - "
193 " -\n",
194 output_unit);
195 print_func(" ----------------------------------------------------------------"
196 "---------------\n",
197 output_unit);
198 print_func(" LP KERNEL BACKEND "
199 "COUNT PERCENT\n",
200 output_unit);
201
202 const char *kernel_names[] = {"collocate ortho", "integrate ortho",
203 "collocate general", "integrate general"};
204 const char *backend_names[] = {"REF", "CPU", "DGEMM", "GPU", "HIP"};
205
206 for (int i = 0; i < ncounters; i++) {
207 if (counters[i][0] == 0)
208 continue; // skip empty counters
209 const double percent = 100.0 * counters[i][0] / total;
210 const int idx = counters[i][1];
211 const int backend_stride = GRID_NKERNELS * GRID_MAX_LP;
212 const int back = idx / backend_stride;
213 const int kern = (idx % backend_stride) / GRID_MAX_LP;
214 const int lp = (idx % backend_stride) % GRID_MAX_LP;
215 char buffer[100];
216 snprintf(buffer, sizeof(buffer), " %-5i %-17s %-6s %34li %10.2f%%\n", lp,
217 kernel_names[kern], backend_names[back], counters[i][0], percent);
218 print_func(buffer, output_unit);
219 }
220
221 print_func(" ----------------------------------------------------------------"
222 "---------------\n",
223 output_unit);
224}
225
226// EOF
static int imin(int x, int y)
Returns the smaller of two given integer (missing from the C standard)
Definition dbm_miniapp.c:38
static void print_func(char *message, int output_unit)
Wrapper for printf, passed to dbm_library_print_stats.
Definition dbm_miniapp.c:28
static GRID_HOST_DEVICE int idx(const orbital a)
Return coset index of given orbital angular momentum.
grid_backend
@ GRID_BACKEND_REF
@ GRID_BACKEND_AUTO
static void const int const int i
void apply_cutoff(void *ptr)
void grid_library_finalize(void)
Finalizes the grid library.
#define GRID_MAX_LP
static bool library_initialized
void grid_library_print_stats(void(*mpi_sum_func)(long *, int), const int mpi_comm, void(*print_func)(char *, int), const int output_unit)
Prints statistics gathered by the grid library.
static grid_library_config config
grid_sphere_cache * grid_library_get_sphere_cache(void)
Returns a pointer to the thread local sphere cache.
#define GRID_NKERNELS
void grid_library_init(void)
Initializes the grid library.
#define GRID_NBACKENDS
static int max_threads
grid_library_config grid_library_get_config(void)
Returns the library config.
void grid_library_counter_add(const int lp, const enum grid_backend backend, const enum grid_library_kernel kernel, const int increment)
Adds given increment to counter specified by lp, backend, and kernel.
void grid_library_set_config(const enum grid_backend backend, const bool validate, const bool apply_cutoff)
Configures the grid library.
static int compare_counters(const void *a, const void *b)
Comperator passed to qsort to compare two counters.
static grid_library_globals ** per_thread_globals
grid_library_kernel
Various kernels provided by the grid library.
void mpi_sum_func(long *number, int mpi_comm)
void grid_sphere_cache_free(grid_sphere_cache *cache)
Free the memory of the sphere cache.
Configuration of the grid library.
enum grid_backend backend
grid_sphere_cache sphere_cache
long counters[GRID_NBACKENDS *GRID_NKERNELS *GRID_MAX_LP]
Struct holding the entire sphere cache, ie. for all grids.