20 #define DBM_NUM_COUNTERS 64
27 #error "OpenMP is required. Please add -fopenmp to your C compiler flags."
35 assert(omp_get_num_threads() == 1);
38 fprintf(stderr,
"DBM library was already initialized.\n");
46 #pragma omp parallel default(none) shared(per_thread_counters) \
47 num_threads(max_threads)
49 const int ithread = omp_get_thread_num();
63 assert(omp_get_num_threads() == 1);
66 fprintf(stderr,
"Error: DBM library is not initialized.\n");
102 const int ithread = omp_get_thread_num();
113 return *(
const int64_t *)
b - *(
const int64_t *)
a;
122 const int output_unit) {
123 assert(omp_get_num_threads() == 1);
126 fprintf(stderr,
"Error: DBM library is not initialized.\n");
141 total += counters[
i][0];
149 print_func(
" ----------------------------------------------------------------"
161 print_func(
" ----------------------------------------------------------------"
168 const char *labels[] = {
"?",
"??",
"???",
">999"};
170 if (counters[
i][0] == 0) {
173 const double percent = 100.0 * counters[
i][0] / total;
174 const int idx = counters[
i][1];
175 const int m = (
idx % 64) / 16;
176 const int n = (
idx % 16) / 4;
177 const int k = (
idx % 4) / 1;
179 snprintf(buffer,
sizeof(buffer),
180 " %4s x %4s x %4s %46" PRId64
" %10.2f%%\n", labels[m],
181 labels[n], labels[k], counters[
i][0], percent);
185 print_func(
" ----------------------------------------------------------------"
static bool library_initialized
void dbm_library_finalize(void)
Finalizes the DBM library.
void dbm_library_counter_increment(const int m, const int n, const int k)
Add given block multiplication to stats. This routine is thread-safe.
static int floorlog10(const int x)
Computes min(3, floor(log10(x))).
void dbm_library_print_stats(const int fortran_comm, void(*print_func)(char *, int), const int output_unit)
Prints statistics gathered by the DBM library.
void dbm_library_init(void)
Initializes the DBM library.
static int compare_counters(const void *a, const void *b)
Comperator passed to qsort to compare two counters.
static int64_t ** per_thread_counters
void dbm_mempool_clear(void)
Internal routine for freeing all memory in the pool.
void dbm_mpi_sum_int64(int64_t *values, const int count, const dbm_mpi_comm_t comm)
Wrapper around MPI_Allreduce for op MPI_SUM and datatype MPI_INT64_T.
dbm_mpi_comm_t dbm_mpi_comm_f2c(const int fortran_comm)
Wrapper around MPI_Comm_f2c.
static GRID_HOST_DEVICE int idx(const orbital a)
Return coset index of given orbital angular momentum.
static void const int const int i
void print_func(char *message, int output_unit)