8#include "../mpiwrap/cp_mpi.h"
9#include "../offload/offload_mempool.h"
18#define DBM_LIBRARY_PRINT(FN, MSG, OUTPUT_UNIT) \
19 ((FN)(MSG, (int)strlen(MSG), OUTPUT_UNIT))
20#define DBM_NUM_COUNTERS 64
27#error "OpenMP is required. Please add -fopenmp to your C compiler flags."
35 assert(omp_get_num_threads() == 1);
38 fprintf(stderr,
"DBM library was already initialized.\n");
47#pragma omp parallel default(none) shared(per_thread_counters) \
48 num_threads(max_threads)
50 const int ithread = omp_get_thread_num();
65 assert(omp_get_num_threads() == 1);
68 fprintf(stderr,
"Error: DBM library is not initialized.\n");
104 const int ithread = omp_get_thread_num();
115 return *(
const int64_t *)b - *(
const int64_t *)a;
122void dbm_library_print_stats(
const int fortran_comm,
124 const int output_unit) {
125 assert(omp_get_num_threads() == 1);
128 fprintf(stderr,
"Error: DBM library is not initialized.\n");
142 total += counters[
i][0];
151 if (counters[
i][0] != 0) {
163 " ----------------------------------------------------------------"
183 " ----------------------------------------------------------------"
192 const char *labels[] = {
"?",
"??",
"???",
">999"};
195 if (counters[
i][0] == 0) {
198 const double percent = 100.0 * counters[
i][0] / total;
199 const int idx = counters[
i][1];
200 const int m = (
idx % 64) / 16;
201 const int n = (
idx % 16) / 4;
202 const int k = (
idx % 4) / 1;
203 snprintf(buffer,
sizeof(buffer),
204 " %4s x %4s x %4s %46" PRId64
" %10.2f%%\n", labels[m],
205 labels[n], labels[k], counters[
i][0], percent);
211 " ----------------------------------------------------------------"
void cp_mpi_sum_int64(int64_t *values, const int count, const cp_mpi_comm_t comm)
Wrapper around MPI_Allreduce for op MPI_SUM and datatype MPI_INT64_T.
cp_mpi_comm_t cp_mpi_comm_f2c(const int fortran_comm)
Wrapper around MPI_Comm_f2c.
static bool library_initialized
void dbm_library_finalize(void)
Finalizes the DBM library.
void dbm_library_counter_increment(const int m, const int n, const int k)
Add given block multiplication to stats. This routine is thread-safe.
#define DBM_LIBRARY_PRINT(FN, MSG, OUTPUT_UNIT)
static int floorlog10(const int x)
Computes min(3, floor(log10(x))).
void dbm_library_init(void)
Initializes the DBM library.
static int compare_counters(const void *a, const void *b)
Comperator passed to qsort to compare two counters.
static int64_t ** per_thread_counters
static void print_func(const char *msg, int msglen, int output_unit)
Wrapper for printf, passed to dbm_library_print_stats.
static GRID_HOST_DEVICE int idx(const orbital a)
Return coset index of given orbital angular momentum.
static void const int const int i
void offload_mempool_clear(void)
Internal routine for freeing all memory in the pool.