(git:374b731)
Loading...
Searching...
No Matches
dbm_library.c
Go to the documentation of this file.
1/*----------------------------------------------------------------------------*/
2/* CP2K: A general program to perform molecular dynamics simulations */
3/* Copyright 2000-2024 CP2K developers group <https://cp2k.org> */
4/* */
5/* SPDX-License-Identifier: BSD-3-Clause */
6/*----------------------------------------------------------------------------*/
7
8#include <assert.h>
9#include <inttypes.h>
10#include <omp.h>
11#include <stdbool.h>
12#include <stdio.h>
13#include <stdlib.h>
14#include <string.h>
15
16#include "dbm_library.h"
17#include "dbm_mempool.h"
18#include "dbm_mpi.h"
19
20#define DBM_NUM_COUNTERS 64
21
22static int64_t **per_thread_counters = NULL;
23static bool library_initialized = false;
24static int max_threads = 0;
25
26#if !defined(_OPENMP)
27#error "OpenMP is required. Please add -fopenmp to your C compiler flags."
28#endif
29
30/*******************************************************************************
31 * \brief Initializes the DBM library.
32 * \author Ole Schuett
33 ******************************************************************************/
34void dbm_library_init(void) {
35 assert(omp_get_num_threads() == 1);
36
38 fprintf(stderr, "DBM library was already initialized.\n");
39 abort();
40 }
41
42 max_threads = omp_get_max_threads();
43 per_thread_counters = malloc(max_threads * sizeof(int64_t *));
44
45 // Using parallel regions to ensure memory is allocated near a thread's core.
46#pragma omp parallel default(none) shared(per_thread_counters) \
47 num_threads(max_threads)
48 {
49 const int ithread = omp_get_thread_num();
50 const size_t counters_size = DBM_NUM_COUNTERS * sizeof(int64_t);
51 per_thread_counters[ithread] = malloc(counters_size);
52 memset(per_thread_counters[ithread], 0, counters_size);
53 }
54
56}
57
58/*******************************************************************************
59 * \brief Finalizes the DBM library.
60 * \author Ole Schuett
61 ******************************************************************************/
63 assert(omp_get_num_threads() == 1);
64
66 fprintf(stderr, "Error: DBM library is not initialized.\n");
67 abort();
68 }
69
70 for (int i = 0; i < max_threads; i++) {
72 }
75
77 library_initialized = false;
78}
79
80/*******************************************************************************
81 * \brief Computes min(3, floor(log10(x))).
82 * \author Ole Schuett
83 ******************************************************************************/
84static int floorlog10(const int x) {
85 if (x >= 1000) {
86 return 3;
87 }
88 if (x >= 100) {
89 return 2;
90 }
91 if (x >= 10) {
92 return 1;
93 }
94 return 0;
95}
96
97/*******************************************************************************
98 * \brief Add given block multiplication to stats. This routine is thread-safe.
99 * \author Ole Schuett
100 ******************************************************************************/
101void dbm_library_counter_increment(const int m, const int n, const int k) {
102 const int ithread = omp_get_thread_num();
103 assert(ithread < max_threads);
104 const int idx = 16 * floorlog10(m) + 4 * floorlog10(n) + floorlog10(k);
105 per_thread_counters[ithread][idx]++;
106}
107
108/*******************************************************************************
109 * \brief Comperator passed to qsort to compare two counters.
110 * \author Ole Schuett
111 ******************************************************************************/
112static int compare_counters(const void *a, const void *b) {
113 return *(const int64_t *)b - *(const int64_t *)a;
114}
115
116/*******************************************************************************
117 * \brief Prints statistics gathered by the DBM library.
118 * \author Ole Schuett
119 ******************************************************************************/
120void dbm_library_print_stats(const int fortran_comm,
121 void (*print_func)(char *, int),
122 const int output_unit) {
123 assert(omp_get_num_threads() == 1);
124
125 if (!library_initialized) {
126 fprintf(stderr, "Error: DBM library is not initialized.\n");
127 abort();
128 }
129
130 const dbm_mpi_comm_t comm = dbm_mpi_comm_f2c(fortran_comm);
131 // Sum all counters across threads and mpi ranks.
132 int64_t counters[DBM_NUM_COUNTERS][2];
133 memset(counters, 0, DBM_NUM_COUNTERS * 2 * sizeof(int64_t));
134 double total = 0.0;
135 for (int i = 0; i < DBM_NUM_COUNTERS; i++) {
136 counters[i][1] = i; // needed as inverse index after qsort
137 for (int j = 0; j < max_threads; j++) {
138 counters[i][0] += per_thread_counters[j][i];
139 }
140 dbm_mpi_sum_int64(&counters[i][0], 1, comm);
141 total += counters[i][0];
142 }
143
144 // Sort counters.
145 qsort(counters, DBM_NUM_COUNTERS, 2 * sizeof(int64_t), &compare_counters);
146
147 // Print counters.
148 print_func("\n", output_unit);
149 print_func(" ----------------------------------------------------------------"
150 "---------------\n",
151 output_unit);
152 print_func(" - "
153 " -\n",
154 output_unit);
155 print_func(" - DBM STATISTICS "
156 " -\n",
157 output_unit);
158 print_func(" - "
159 " -\n",
160 output_unit);
161 print_func(" ----------------------------------------------------------------"
162 "---------------\n",
163 output_unit);
164 print_func(" M x N x K "
165 "COUNT PERCENT\n",
166 output_unit);
167
168 const char *labels[] = {"?", "??", "???", ">999"};
169 for (int i = 0; i < DBM_NUM_COUNTERS; i++) {
170 if (counters[i][0] == 0) {
171 continue; // skip empty counters
172 }
173 const double percent = 100.0 * counters[i][0] / total;
174 const int idx = counters[i][1];
175 const int m = (idx % 64) / 16;
176 const int n = (idx % 16) / 4;
177 const int k = (idx % 4) / 1;
178 char buffer[100];
179 snprintf(buffer, sizeof(buffer),
180 " %4s x %4s x %4s %46" PRId64 " %10.2f%%\n", labels[m],
181 labels[n], labels[k], counters[i][0], percent);
182 print_func(buffer, output_unit);
183 }
184
185 print_func(" ----------------------------------------------------------------"
186 "---------------\n",
187 output_unit);
188}
189
190// EOF
static bool library_initialized
Definition dbm_library.c:23
void dbm_library_finalize(void)
Finalizes the DBM library.
Definition dbm_library.c:62
void dbm_library_counter_increment(const int m, const int n, const int k)
Add given block multiplication to stats. This routine is thread-safe.
#define DBM_NUM_COUNTERS
Definition dbm_library.c:20
static int floorlog10(const int x)
Computes min(3, floor(log10(x))).
Definition dbm_library.c:84
void dbm_library_init(void)
Initializes the DBM library.
Definition dbm_library.c:34
static int max_threads
Definition dbm_library.c:24
static int compare_counters(const void *a, const void *b)
Comperator passed to qsort to compare two counters.
static int64_t ** per_thread_counters
Definition dbm_library.c:22
void dbm_mempool_clear(void)
Internal routine for freeing all memory in the pool.
static void print_func(char *message, int output_unit)
Wrapper for printf, passed to dbm_library_print_stats.
Definition dbm_miniapp.c:28
void dbm_mpi_sum_int64(int64_t *values, const int count, const dbm_mpi_comm_t comm)
Wrapper around MPI_Allreduce for op MPI_SUM and datatype MPI_INT64_T.
Definition dbm_mpi.c:290
dbm_mpi_comm_t dbm_mpi_comm_f2c(const int fortran_comm)
Wrapper around MPI_Comm_f2c.
Definition dbm_mpi.c:66
int dbm_mpi_comm_t
Definition dbm_mpi.h:18
static GRID_HOST_DEVICE int idx(const orbital a)
Return coset index of given orbital angular momentum.
static void const int const int i