(git:ed6f26b)
Loading...
Searching...
No Matches
dbm_library.c
Go to the documentation of this file.
1/*----------------------------------------------------------------------------*/
2/* CP2K: A general program to perform molecular dynamics simulations */
3/* Copyright 2000-2025 CP2K developers group <https://cp2k.org> */
4/* */
5/* SPDX-License-Identifier: BSD-3-Clause */
6/*----------------------------------------------------------------------------*/
7
8#include <assert.h>
9#include <inttypes.h>
10#include <omp.h>
11#include <stdio.h>
12#include <stdlib.h>
13#include <string.h>
14
15#include "dbm_library.h"
16#include "dbm_mempool.h"
17#include "dbm_mpi.h"
18
19#define DBM_NUM_COUNTERS 64
20
21static int64_t **per_thread_counters = NULL;
22static bool library_initialized = false;
23static int max_threads = 0;
24
25#if !defined(_OPENMP)
26#error "OpenMP is required. Please add -fopenmp to your C compiler flags."
27#endif
28
29/*******************************************************************************
30 * \brief Initializes the DBM library.
31 * \author Ole Schuett
32 ******************************************************************************/
33void dbm_library_init(void) {
34 assert(omp_get_num_threads() == 1);
35
37 fprintf(stderr, "DBM library was already initialized.\n");
38 abort();
39 }
40
41 max_threads = omp_get_max_threads();
42 per_thread_counters = malloc(max_threads * sizeof(int64_t *));
43 assert(per_thread_counters != NULL);
44
45 // Using parallel regions to ensure memory is allocated near a thread's core.
46#pragma omp parallel default(none) shared(per_thread_counters) \
47 num_threads(max_threads)
48 {
49 const int ithread = omp_get_thread_num();
50 const size_t counters_size = DBM_NUM_COUNTERS * sizeof(int64_t);
51 per_thread_counters[ithread] = malloc(counters_size);
52 assert(per_thread_counters[ithread] != NULL);
53 memset(per_thread_counters[ithread], 0, counters_size);
54 }
55
57}
58
59/*******************************************************************************
60 * \brief Finalizes the DBM library.
61 * \author Ole Schuett
62 ******************************************************************************/
64 assert(omp_get_num_threads() == 1);
65
67 fprintf(stderr, "Error: DBM library is not initialized.\n");
68 abort();
69 }
70
71 for (int i = 0; i < max_threads; i++) {
73 }
76
78 library_initialized = false;
79}
80
81/*******************************************************************************
82 * \brief Computes min(3, floor(log10(x))).
83 * \author Ole Schuett
84 ******************************************************************************/
85static int floorlog10(const int x) {
86 if (x >= 1000) {
87 return 3;
88 }
89 if (x >= 100) {
90 return 2;
91 }
92 if (x >= 10) {
93 return 1;
94 }
95 return 0;
96}
97
98/*******************************************************************************
99 * \brief Add given block multiplication to stats. This routine is thread-safe.
100 * \author Ole Schuett
101 ******************************************************************************/
102void dbm_library_counter_increment(const int m, const int n, const int k) {
103 const int ithread = omp_get_thread_num();
104 assert(ithread < max_threads);
105 const int idx = 16 * floorlog10(m) + 4 * floorlog10(n) + floorlog10(k);
106 per_thread_counters[ithread][idx]++;
107}
108
109/*******************************************************************************
110 * \brief Comperator passed to qsort to compare two counters.
111 * \author Ole Schuett
112 ******************************************************************************/
113static int compare_counters(const void *a, const void *b) {
114 return *(const int64_t *)b - *(const int64_t *)a;
115}
116
117/*******************************************************************************
118 * \brief Prints statistics gathered by the DBM library.
119 * \author Ole Schuett
120 ******************************************************************************/
121void dbm_library_print_stats(const int fortran_comm,
122 void (*print_func)(char *, int),
123 const int output_unit) {
124 assert(omp_get_num_threads() == 1);
125
126 if (!library_initialized) {
127 fprintf(stderr, "Error: DBM library is not initialized.\n");
128 abort();
129 }
130
131 const dbm_mpi_comm_t comm = dbm_mpi_comm_f2c(fortran_comm);
132 // Sum all counters across threads and mpi ranks.
133 int64_t counters[DBM_NUM_COUNTERS][2] = {{0}};
134 double total = 0.0;
135 for (int i = 0; i < DBM_NUM_COUNTERS; i++) {
136 counters[i][1] = i; // needed as inverse index after qsort
137 for (int j = 0; j < max_threads; j++) {
138 counters[i][0] += per_thread_counters[j][i];
139 }
140 dbm_mpi_sum_int64(&counters[i][0], 1, comm);
141 total += counters[i][0];
142 }
143
144 // Sort counters.
145 qsort(counters, DBM_NUM_COUNTERS, 2 * sizeof(int64_t), &compare_counters);
146
147 // Print counters.
148 print_func("\n", output_unit);
149 print_func(" ----------------------------------------------------------------"
150 "---------------\n",
151 output_unit);
152 print_func(" - "
153 " -\n",
154 output_unit);
155 print_func(" - DBM STATISTICS "
156 " -\n",
157 output_unit);
158 print_func(" - "
159 " -\n",
160 output_unit);
161 print_func(" ----------------------------------------------------------------"
162 "---------------\n",
163 output_unit);
164 print_func(" M x N x K "
165 "COUNT PERCENT\n",
166 output_unit);
167
168 const char *labels[] = {"?", "??", "???", ">999"};
169 char buffer[100];
170 for (int i = 0; i < DBM_NUM_COUNTERS; i++) {
171 if (counters[i][0] == 0) {
172 continue; // skip empty counters
173 }
174 const double percent = 100.0 * counters[i][0] / total;
175 const int idx = counters[i][1];
176 const int m = (idx % 64) / 16;
177 const int n = (idx % 16) / 4;
178 const int k = (idx % 4) / 1;
179 snprintf(buffer, sizeof(buffer),
180 " %4s x %4s x %4s %46" PRId64 " %10.2f%%\n", labels[m],
181 labels[n], labels[k], counters[i][0], percent);
182 print_func(buffer, output_unit);
183 }
184
185 print_func(" ----------------------------------------------------------------"
186 "---------------\n",
187 output_unit);
188
189 dbm_memstats_t memstats;
190 dbm_mempool_statistics(&memstats);
191 dbm_mpi_max_uint64(&memstats.device_mallocs, 1, comm);
192 dbm_mpi_max_uint64(&memstats.host_mallocs, 1, comm);
193
194 if (0 != memstats.device_mallocs || 0 != memstats.host_mallocs) {
195 print_func(" Memory consumption "
196 " Number of allocations Used [MiB] Size [MiB]\n",
197 output_unit);
198 }
199 if (0 < memstats.device_mallocs) {
200 dbm_mpi_max_uint64(&memstats.device_size, 1, comm);
201 snprintf(buffer, sizeof(buffer),
202 " Device "
203 " %20" PRIuPTR " %10" PRIuPTR " %10" PRIuPTR "\n",
204 (uintptr_t)memstats.device_mallocs,
205 (uintptr_t)((memstats.device_used + (512U << 10)) >> 20),
206 (uintptr_t)((memstats.device_size + (512U << 10)) >> 20));
207 print_func(buffer, output_unit);
208 }
209 if (0 < memstats.host_mallocs) {
210 dbm_mpi_max_uint64(&memstats.host_size, 1, comm);
211 snprintf(buffer, sizeof(buffer),
212 " Host "
213 " %20" PRIuPTR " %10" PRIuPTR " %10" PRIuPTR "\n",
214 (uintptr_t)memstats.host_mallocs,
215 (uintptr_t)((memstats.host_used + (512U << 10)) >> 20),
216 (uintptr_t)((memstats.host_size + (512U << 10)) >> 20));
217 print_func(buffer, output_unit);
218 }
219 if (0 < memstats.device_mallocs || 0 < memstats.host_mallocs) {
221 " ----------------------------------------------------------------"
222 "---------------\n",
223 output_unit);
224 }
225}
226
227// EOF
static bool library_initialized
Definition dbm_library.c:22
void dbm_library_finalize(void)
Finalizes the DBM library.
Definition dbm_library.c:63
void dbm_library_counter_increment(const int m, const int n, const int k)
Add given block multiplication to stats. This routine is thread-safe.
#define DBM_NUM_COUNTERS
Definition dbm_library.c:19
static int floorlog10(const int x)
Computes min(3, floor(log10(x))).
Definition dbm_library.c:85
void dbm_library_init(void)
Initializes the DBM library.
Definition dbm_library.c:33
static int max_threads
Definition dbm_library.c:23
static int compare_counters(const void *a, const void *b)
Comperator passed to qsort to compare two counters.
static int64_t ** per_thread_counters
Definition dbm_library.c:21
void dbm_mempool_statistics(dbm_memstats_t *memstats)
Internal routine to query statistics.
void dbm_mempool_clear(void)
Internal routine for freeing all memory in the pool.
static void print_func(char *message, int output_unit)
Wrapper for printf, passed to dbm_library_print_stats.
Definition dbm_miniapp.c:28
void dbm_mpi_sum_int64(int64_t *values, const int count, const dbm_mpi_comm_t comm)
Wrapper around MPI_Allreduce for op MPI_SUM and datatype MPI_INT64_T.
Definition dbm_mpi.c:325
void dbm_mpi_max_uint64(uint64_t *values, const int count, const dbm_mpi_comm_t comm)
Wrapper around MPI_Allreduce for op MPI_MAX and datatype MPI_UINT64_T.
Definition dbm_mpi.c:261
dbm_mpi_comm_t dbm_mpi_comm_f2c(const int fortran_comm)
Wrapper around MPI_Comm_f2c.
Definition dbm_mpi.c:69
int dbm_mpi_comm_t
Definition dbm_mpi.h:19
static GRID_HOST_DEVICE int idx(const orbital a)
Return coset index of given orbital angular momentum.
static void const int const int i
Internal struct for pool statistics.
Definition dbm_mempool.h:50
uint64_t host_size
Definition dbm_mempool.h:54
uint64_t device_used
Definition dbm_mempool.h:52
uint64_t host_mallocs
Definition dbm_mempool.h:56
uint64_t host_used
Definition dbm_mempool.h:52
uint64_t device_mallocs
Definition dbm_mempool.h:56
uint64_t device_size
Definition dbm_mempool.h:54