(git:44e3845)
Loading...
Searching...
No Matches
dbm_library.c
Go to the documentation of this file.
1/*----------------------------------------------------------------------------*/
2/* CP2K: A general program to perform molecular dynamics simulations */
3/* Copyright 2000-2025 CP2K developers group <https://cp2k.org> */
4/* */
5/* SPDX-License-Identifier: BSD-3-Clause */
6/*----------------------------------------------------------------------------*/
7#include "dbm_library.h"
8#include "../mpiwrap/cp_mpi.h"
9#include "../offload/offload_mempool.h"
10
11#include <assert.h>
12#include <inttypes.h>
13#include <omp.h>
14#include <stdio.h>
15#include <stdlib.h>
16#include <string.h>
17
18#define DBM_LIBRARY_PRINT(FN, MSG, OUTPUT_UNIT) \
19 ((FN)(MSG, (int)strlen(MSG), OUTPUT_UNIT))
20#define DBM_NUM_COUNTERS 64
21
22static int64_t **per_thread_counters = NULL;
23static bool library_initialized = false;
24static int max_threads = 0;
25
26#if !defined(_OPENMP)
27#error "OpenMP is required. Please add -fopenmp to your C compiler flags."
28#endif
29
30/*******************************************************************************
31 * \brief Initializes the DBM library.
32 * \author Ole Schuett
33 ******************************************************************************/
34void dbm_library_init(void) {
35 assert(omp_get_num_threads() == 1);
36
38 fprintf(stderr, "DBM library was already initialized.\n");
39 abort();
40 }
41
42 max_threads = omp_get_max_threads();
43 per_thread_counters = malloc(max_threads * sizeof(int64_t *));
44 assert(per_thread_counters != NULL);
45
46 // Using parallel regions to ensure memory is allocated near a thread's core.
47#pragma omp parallel default(none) shared(per_thread_counters) \
48 num_threads(max_threads)
49 {
50 const int ithread = omp_get_thread_num();
51 const size_t counters_size = DBM_NUM_COUNTERS * sizeof(int64_t);
52 per_thread_counters[ithread] = malloc(counters_size);
53 assert(per_thread_counters[ithread] != NULL);
54 memset(per_thread_counters[ithread], 0, counters_size);
55 }
56
58}
59
60/*******************************************************************************
61 * \brief Finalizes the DBM library.
62 * \author Ole Schuett
63 ******************************************************************************/
65 assert(omp_get_num_threads() == 1);
66
68 fprintf(stderr, "Error: DBM library is not initialized.\n");
69 abort();
70 }
71
72 for (int i = 0; i < max_threads; i++) {
74 }
77
79 library_initialized = false;
80}
81
82/*******************************************************************************
83 * \brief Computes min(3, floor(log10(x))).
84 * \author Ole Schuett
85 ******************************************************************************/
86static int floorlog10(const int x) {
87 if (x >= 1000) {
88 return 3;
89 }
90 if (x >= 100) {
91 return 2;
92 }
93 if (x >= 10) {
94 return 1;
95 }
96 return 0;
97}
98
99/*******************************************************************************
100 * \brief Add given block multiplication to stats. This routine is thread-safe.
101 * \author Ole Schuett
102 ******************************************************************************/
103void dbm_library_counter_increment(const int m, const int n, const int k) {
104 const int ithread = omp_get_thread_num();
105 assert(ithread < max_threads);
106 const int idx = 16 * floorlog10(m) + 4 * floorlog10(n) + floorlog10(k);
107 per_thread_counters[ithread][idx]++;
108}
109
110/*******************************************************************************
111 * \brief Comperator passed to qsort to compare two counters.
112 * \author Ole Schuett
113 ******************************************************************************/
114static int compare_counters(const void *a, const void *b) {
115 return *(const int64_t *)b - *(const int64_t *)a;
116}
117
118/*******************************************************************************
119 * \brief Prints statistics gathered by the DBM library.
120 * \author Ole Schuett
121 ******************************************************************************/
122void dbm_library_print_stats(const int fortran_comm,
123 void (*print_func)(const char *, int, int),
124 const int output_unit) {
125 assert(omp_get_num_threads() == 1);
126
127 if (!library_initialized) {
128 fprintf(stderr, "Error: DBM library is not initialized.\n");
129 abort();
130 }
131
132 const cp_mpi_comm_t comm = cp_mpi_comm_f2c(fortran_comm);
133 // Sum all counters across threads and mpi ranks.
134 int64_t counters[DBM_NUM_COUNTERS][2] = {{0}};
135 double total = 0.0;
136 for (int i = 0; i < DBM_NUM_COUNTERS; i++) {
137 counters[i][1] = i; // needed as inverse index after qsort
138 for (int j = 0; j < max_threads; j++) {
139 counters[i][0] += per_thread_counters[j][i];
140 }
141 cp_mpi_sum_int64(&counters[i][0], 1, comm);
142 total += counters[i][0];
143 }
144
145 // Sort counters.
146 qsort(counters, DBM_NUM_COUNTERS, 2 * sizeof(int64_t), &compare_counters);
147
148 // Determine if anything needs to be printed.
149 bool print = false;
150 for (int i = 0; i < DBM_NUM_COUNTERS && !print; i++) {
151 if (counters[i][0] != 0) {
152 print = true;
153 }
154 }
155 if (!print) {
156 return; // nothing to be printed
157 }
158
159 // Print counters.
160 DBM_LIBRARY_PRINT(print_func, "\n", output_unit);
163 " ----------------------------------------------------------------"
164 "---------------\n",
165 output_unit);
168 " - "
169 " -\n",
170 output_unit);
173 " - DBM STATISTICS "
174 " -\n",
175 output_unit);
178 " - "
179 " -\n",
180 output_unit);
183 " ----------------------------------------------------------------"
184 "---------------\n",
185 output_unit);
188 " M x N x K "
189 "COUNT PERCENT\n",
190 output_unit);
191
192 const char *labels[] = {"?", "??", "???", ">999"};
193 char buffer[100];
194 for (int i = 0; i < DBM_NUM_COUNTERS; i++) {
195 if (counters[i][0] == 0) {
196 continue; // skip empty counters
197 }
198 const double percent = 100.0 * counters[i][0] / total;
199 const int idx = counters[i][1];
200 const int m = (idx % 64) / 16;
201 const int n = (idx % 16) / 4;
202 const int k = (idx % 4) / 1;
203 snprintf(buffer, sizeof(buffer),
204 " %4s x %4s x %4s %46" PRId64 " %10.2f%%\n", labels[m],
205 labels[n], labels[k], counters[i][0], percent);
206 DBM_LIBRARY_PRINT(print_func, buffer, output_unit);
207 }
208
211 " ----------------------------------------------------------------"
212 "---------------\n",
213 output_unit);
214}
215
216// EOF
void cp_mpi_sum_int64(int64_t *values, const int count, const cp_mpi_comm_t comm)
Wrapper around MPI_Allreduce for op MPI_SUM and datatype MPI_INT64_T.
Definition cp_mpi.c:363
cp_mpi_comm_t cp_mpi_comm_f2c(const int fortran_comm)
Wrapper around MPI_Comm_f2c.
Definition cp_mpi.c:69
int cp_mpi_comm_t
Definition cp_mpi.h:18
static bool library_initialized
Definition dbm_library.c:23
void dbm_library_finalize(void)
Finalizes the DBM library.
Definition dbm_library.c:64
void dbm_library_counter_increment(const int m, const int n, const int k)
Add given block multiplication to stats. This routine is thread-safe.
#define DBM_NUM_COUNTERS
Definition dbm_library.c:20
#define DBM_LIBRARY_PRINT(FN, MSG, OUTPUT_UNIT)
Definition dbm_library.c:18
static int floorlog10(const int x)
Computes min(3, floor(log10(x))).
Definition dbm_library.c:86
void dbm_library_init(void)
Initializes the DBM library.
Definition dbm_library.c:34
static int max_threads
Definition dbm_library.c:24
static int compare_counters(const void *a, const void *b)
Comperator passed to qsort to compare two counters.
static int64_t ** per_thread_counters
Definition dbm_library.c:22
static void print_func(const char *msg, int msglen, int output_unit)
Wrapper for printf, passed to dbm_library_print_stats.
Definition dbm_miniapp.c:29
static GRID_HOST_DEVICE int idx(const orbital a)
Return coset index of given orbital angular momentum.
static void const int const int i
void offload_mempool_clear(void)
Internal routine for freeing all memory in the pool.