(git:1f285aa)
dbm_library.c
Go to the documentation of this file.
1 /*----------------------------------------------------------------------------*/
2 /* CP2K: A general program to perform molecular dynamics simulations */
3 /* Copyright 2000-2024 CP2K developers group <https://cp2k.org> */
4 /* */
5 /* SPDX-License-Identifier: BSD-3-Clause */
6 /*----------------------------------------------------------------------------*/
7 
8 #include <assert.h>
9 #include <inttypes.h>
10 #include <omp.h>
11 #include <stdbool.h>
12 #include <stdio.h>
13 #include <stdlib.h>
14 #include <string.h>
15 
16 #include "dbm_library.h"
17 #include "dbm_mempool.h"
18 #include "dbm_mpi.h"
19 
20 #define DBM_NUM_COUNTERS 64
21 
22 static int64_t **per_thread_counters = NULL;
23 static bool library_initialized = false;
24 static int max_threads = 0;
25 
26 #if !defined(_OPENMP)
27 #error "OpenMP is required. Please add -fopenmp to your C compiler flags."
28 #endif
29 
30 /*******************************************************************************
31  * \brief Initializes the DBM library.
32  * \author Ole Schuett
33  ******************************************************************************/
34 void dbm_library_init(void) {
35  assert(omp_get_num_threads() == 1);
36 
37  if (library_initialized) {
38  fprintf(stderr, "DBM library was already initialized.\n");
39  abort();
40  }
41 
42  max_threads = omp_get_max_threads();
43  per_thread_counters = malloc(max_threads * sizeof(int64_t *));
44 
45  // Using parallel regions to ensure memory is allocated near a thread's core.
46 #pragma omp parallel default(none) shared(per_thread_counters) \
47  num_threads(max_threads)
48  {
49  const int ithread = omp_get_thread_num();
50  const size_t counters_size = DBM_NUM_COUNTERS * sizeof(int64_t);
51  per_thread_counters[ithread] = malloc(counters_size);
52  memset(per_thread_counters[ithread], 0, counters_size);
53  }
54 
55  library_initialized = true;
56 }
57 
58 /*******************************************************************************
59  * \brief Finalizes the DBM library.
60  * \author Ole Schuett
61  ******************************************************************************/
63  assert(omp_get_num_threads() == 1);
64 
65  if (!library_initialized) {
66  fprintf(stderr, "Error: DBM library is not initialized.\n");
67  abort();
68  }
69 
70  for (int i = 0; i < max_threads; i++) {
71  free(per_thread_counters[i]);
72  }
73  free(per_thread_counters);
74  per_thread_counters = NULL;
75 
77  library_initialized = false;
78 }
79 
80 /*******************************************************************************
81  * \brief Computes min(3, floor(log10(x))).
82  * \author Ole Schuett
83  ******************************************************************************/
84 static int floorlog10(const int x) {
85  if (x >= 1000) {
86  return 3;
87  }
88  if (x >= 100) {
89  return 2;
90  }
91  if (x >= 10) {
92  return 1;
93  }
94  return 0;
95 }
96 
97 /*******************************************************************************
98  * \brief Add given block multiplication to stats. This routine is thread-safe.
99  * \author Ole Schuett
100  ******************************************************************************/
101 void dbm_library_counter_increment(const int m, const int n, const int k) {
102  const int ithread = omp_get_thread_num();
103  assert(ithread < max_threads);
104  const int idx = 16 * floorlog10(m) + 4 * floorlog10(n) + floorlog10(k);
105  per_thread_counters[ithread][idx]++;
106 }
107 
108 /*******************************************************************************
109  * \brief Comperator passed to qsort to compare two counters.
110  * \author Ole Schuett
111  ******************************************************************************/
112 static int compare_counters(const void *a, const void *b) {
113  return *(const int64_t *)b - *(const int64_t *)a;
114 }
115 
116 /*******************************************************************************
117  * \brief Prints statistics gathered by the DBM library.
118  * \author Ole Schuett
119  ******************************************************************************/
120 void dbm_library_print_stats(const int fortran_comm,
121  void (*print_func)(char *, int),
122  const int output_unit) {
123  assert(omp_get_num_threads() == 1);
124 
125  if (!library_initialized) {
126  fprintf(stderr, "Error: DBM library is not initialized.\n");
127  abort();
128  }
129 
130  const dbm_mpi_comm_t comm = dbm_mpi_comm_f2c(fortran_comm);
131  // Sum all counters across threads and mpi ranks.
132  int64_t counters[DBM_NUM_COUNTERS][2];
133  memset(counters, 0, DBM_NUM_COUNTERS * 2 * sizeof(int64_t));
134  double total = 0.0;
135  for (int i = 0; i < DBM_NUM_COUNTERS; i++) {
136  counters[i][1] = i; // needed as inverse index after qsort
137  for (int j = 0; j < max_threads; j++) {
138  counters[i][0] += per_thread_counters[j][i];
139  }
140  dbm_mpi_sum_int64(&counters[i][0], 1, comm);
141  total += counters[i][0];
142  }
143 
144  // Sort counters.
145  qsort(counters, DBM_NUM_COUNTERS, 2 * sizeof(int64_t), &compare_counters);
146 
147  // Print counters.
148  print_func("\n", output_unit);
149  print_func(" ----------------------------------------------------------------"
150  "---------------\n",
151  output_unit);
152  print_func(" - "
153  " -\n",
154  output_unit);
155  print_func(" - DBM STATISTICS "
156  " -\n",
157  output_unit);
158  print_func(" - "
159  " -\n",
160  output_unit);
161  print_func(" ----------------------------------------------------------------"
162  "---------------\n",
163  output_unit);
164  print_func(" M x N x K "
165  "COUNT PERCENT\n",
166  output_unit);
167 
168  const char *labels[] = {"?", "??", "???", ">999"};
169  for (int i = 0; i < DBM_NUM_COUNTERS; i++) {
170  if (counters[i][0] == 0) {
171  continue; // skip empty counters
172  }
173  const double percent = 100.0 * counters[i][0] / total;
174  const int idx = counters[i][1];
175  const int m = (idx % 64) / 16;
176  const int n = (idx % 16) / 4;
177  const int k = (idx % 4) / 1;
178  char buffer[100];
179  snprintf(buffer, sizeof(buffer),
180  " %4s x %4s x %4s %46" PRId64 " %10.2f%%\n", labels[m],
181  labels[n], labels[k], counters[i][0], percent);
182  print_func(buffer, output_unit);
183  }
184 
185  print_func(" ----------------------------------------------------------------"
186  "---------------\n",
187  output_unit);
188 }
189 
190 // EOF
static bool library_initialized
Definition: dbm_library.c:23
void dbm_library_finalize(void)
Finalizes the DBM library.
Definition: dbm_library.c:62
void dbm_library_counter_increment(const int m, const int n, const int k)
Add given block multiplication to stats. This routine is thread-safe.
Definition: dbm_library.c:101
#define DBM_NUM_COUNTERS
Definition: dbm_library.c:20
static int floorlog10(const int x)
Computes min(3, floor(log10(x))).
Definition: dbm_library.c:84
void dbm_library_print_stats(const int fortran_comm, void(*print_func)(char *, int), const int output_unit)
Prints statistics gathered by the DBM library.
Definition: dbm_library.c:120
void dbm_library_init(void)
Initializes the DBM library.
Definition: dbm_library.c:34
static int max_threads
Definition: dbm_library.c:24
static int compare_counters(const void *a, const void *b)
Comperator passed to qsort to compare two counters.
Definition: dbm_library.c:112
static int64_t ** per_thread_counters
Definition: dbm_library.c:22
void dbm_mempool_clear(void)
Internal routine for freeing all memory in the pool.
Definition: dbm_mempool.c:185
void dbm_mpi_sum_int64(int64_t *values, const int count, const dbm_mpi_comm_t comm)
Wrapper around MPI_Allreduce for op MPI_SUM and datatype MPI_INT64_T.
Definition: dbm_mpi.c:290
dbm_mpi_comm_t dbm_mpi_comm_f2c(const int fortran_comm)
Wrapper around MPI_Comm_f2c.
Definition: dbm_mpi.c:66
int dbm_mpi_comm_t
Definition: dbm_mpi.h:18
static GRID_HOST_DEVICE int idx(const orbital a)
Return coset index of given orbital angular momentum.
Definition: grid_common.h:153
static void const int const int i
void print_func(char *message, int output_unit)
Definition: grid_miniapp.c:21
real(dp), dimension(3) a
Definition: ai_eri_debug.F:31
real(dp), dimension(3) b
Definition: ai_eri_debug.F:31