d0/d6f/grid__library_8c_source.html

/*----------------------------------------------------------------------------*/

/*  CP2K: A general program to perform molecular dynamics simulations         */

/*  Copyright 2000-2025 CP2K developers group <https://cp2k.org>              */

/*                                                                            */

/*  SPDX-License-Identifier: BSD-3-Clause                                     */

/*----------------------------------------------------------------------------*/

#include "grid_library.h"

#include "grid_common.h"

#include "grid_constants.h"


#include "../../mpiwrap/cp_mpi.h"

#include "../../offload/offload_runtime.h"


#include <assert.h>

#include <omp.h>

#include <stddef.h>

#include <stdio.h>

#include <stdlib.h>

#include <string.h>


#define GRID_LIBRARY_PRINT(FN, MSG, OUTPUT_UNIT)                               \

  ((FN)(MSG, (int)strlen(MSG), OUTPUT_UNIT))


// counter dimensions

#define GRID_NBACKENDS 5

#define GRID_NKERNELS 4

#define GRID_MAX_LP 20


typedef struct {

  grid_sphere_cache sphere_cache;

  long counters[GRID_NBACKENDS * GRID_NKERNELS * GRID_MAX_LP];

} grid_library_globals;


static grid_library_globals **per_thread_globals = NULL;

static bool library_initialized = false;

static int max_threads = 0;


static grid_library_config config = {

    .backend = GRID_BACKEND_AUTO, .validate = false, .apply_cutoff = false};


#if !defined(_OPENMP)

#error "OpenMP is required. Please add -fopenmp to your C compiler flags."

#endif


#if defined(NDEBUG)

#error                                                                         \

    "Please do not build CP2K with NDEBUG. There is no performance advantage and asserts will save your neck."

#endif


/*******************************************************************************

 * \brief Initializes the grid library.

 * \author Ole Schuett

 ******************************************************************************/


void grid_library_init(void) {

  if (library_initialized) {

    printf("Error: Grid library was already initialized.\n");

    abort();

  }


#if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_GRID)

  // Reserve global GPU memory for storing the intermediate Cab matrix blocks.

  // CUDA does not allow to increase this limit after a kernel was launched.

  // Unfortunately, the required memory is hard to predict because we neither

  // know which tasks will be run nor how many thread blocks the available GPU

  // can execute in parallel... 64 MiB ought to be enough for anybody ;-)

  offloadEnsureMallocHeapSize(64 * 1024 * 1024);

#endif


  max_threads = omp_get_max_threads();

  per_thread_globals = malloc(max_threads * sizeof(grid_library_globals *));

  assert(per_thread_globals != NULL);


// Using parallel regions to ensure memory is allocated near a thread's core.

#pragma omp parallel default(none) shared(per_thread_globals)                  \

    num_threads(max_threads)

  {

    const int ithread = omp_get_thread_num();

    per_thread_globals[ithread] = malloc(sizeof(grid_library_globals));

    assert(per_thread_globals[ithread] != NULL);

    memset(per_thread_globals[ithread], 0, sizeof(grid_library_globals));

  }


  library_initialized = true;

}


/*******************************************************************************

 * \brief Finalizes the grid library.

 * \author Ole Schuett

 ******************************************************************************/


void grid_library_finalize(void) {

  if (!library_initialized) {

    printf("Error: Grid library is not initialized.\n");

    abort();

  }


  for (int i = 0; i < max_threads; i++) {

    grid_sphere_cache_free(&per_thread_globals[i]->sphere_cache);

    free(per_thread_globals[i]);

  }

  free(per_thread_globals);

  per_thread_globals = NULL;

  library_initialized = false;

}


/*******************************************************************************

 * \brief Returns a pointer to the thread local sphere cache.

 * \author Ole Schuett

 ******************************************************************************/


grid_sphere_cache *grid_library_get_sphere_cache(void) {

  const int ithread = omp_get_thread_num();

  assert(ithread < max_threads);

  return &per_thread_globals[ithread]->sphere_cache;

}


/*******************************************************************************

 * \brief Configures the grid library.

 * \author Ole Schuett

 ******************************************************************************/


void grid_library_set_config(const enum grid_backend backend,

                             const bool validate, const bool apply_cutoff) {

  config.backend = backend;

  config.validate = validate;

  config.apply_cutoff = apply_cutoff;

}


/*******************************************************************************

 * \brief Returns the library config.

 * \author Ole Schuett

 ******************************************************************************/

grid_library_config grid_library_get_config(void) { return config; }


/*******************************************************************************

 * \brief Adds given increment to counter specified by lp, backend, and kernel.

 * \author Ole Schuett

 ******************************************************************************/


void grid_library_counter_add(const int lp, const enum grid_backend backend,

                              const enum grid_library_kernel kernel,

                              const int increment) {

  assert(lp >= 0);

  assert(kernel < GRID_NKERNELS);

  const int back = backend - GRID_BACKEND_REF;

  assert(back < GRID_NBACKENDS);

  const int idx = back * GRID_NKERNELS * GRID_MAX_LP + kernel * GRID_MAX_LP +

                  imin(lp, GRID_MAX_LP - 1);

  const int ithread = omp_get_thread_num();

  assert(ithread < max_threads);

  per_thread_globals[ithread]->counters[idx] += increment;

}


/*******************************************************************************

 * \brief Comperator passed to qsort to compare two counters.

 * \author Ole Schuett

 ******************************************************************************/


static int compare_counters(const void *a, const void *b) {

  return *(long *)b - *(long *)a;

}


/*******************************************************************************

 * \brief Prints statistics gathered by the grid library.

 * \author Ole Schuett

 ******************************************************************************/


void grid_library_print_stats(const int fortran_comm,

                              void (*print_func)(const char *, int, int),

                              const int output_unit) {

  if (!library_initialized) {

    printf("Error: Grid library is not initialized.\n");

    abort();

  }


  const cp_mpi_comm_t comm = cp_mpi_comm_f2c(fortran_comm);

  // Sum all counters across threads and mpi ranks.

  const int ncounters = GRID_NBACKENDS * GRID_NKERNELS * GRID_MAX_LP;

  long counters[ncounters][2];

  memset(counters, 0, ncounters * 2 * sizeof(long));

  double total = 0.0;

  for (int i = 0; i < ncounters; i++) {

    counters[i][1] = i; // needed as inverse index after qsort

    for (int j = 0; j < max_threads; j++) {

      counters[i][0] += per_thread_globals[j]->counters[i];

    }

    cp_mpi_sum_long(&counters[i][0], 1, comm);

    total += counters[i][0];

  }


  // Sort counters.

  qsort(counters, ncounters, 2 * sizeof(long), &compare_counters);


  // Determine if anything needs to be printed.

  bool print = false;

  for (int i = 0; i < ncounters && !print; i++) {

    if (counters[i][0] != 0) {

      print = true;

    }

  }

  if (!print) {

    return; // nothing to be printed

  }


  // Print counters.

  GRID_LIBRARY_PRINT(print_func, "\n", output_unit);

  GRID_LIBRARY_PRINT(

      print_func,

      " ----------------------------------------------------------------"

      "---------------\n",

      output_unit);

  GRID_LIBRARY_PRINT(

      print_func,

      " -                                                               "

      "              -\n",

      output_unit);

  GRID_LIBRARY_PRINT(

      print_func,

      " -                                GRID STATISTICS                "

      "              -\n",

      output_unit);

  GRID_LIBRARY_PRINT(

      print_func,

      " -                                                               "

      "              -\n",

      output_unit);

  GRID_LIBRARY_PRINT(

      print_func,

      " ----------------------------------------------------------------"

      "---------------\n",

      output_unit);

  GRID_LIBRARY_PRINT(

      print_func,

      " LP    KERNEL             BACKEND                              "

      "COUNT     PERCENT\n",

      output_unit);


  const char *kernel_names[] = {"collocate ortho", "integrate ortho",

                                "collocate general", "integrate general"};

  const char *backend_names[] = {"REF", "CPU", "DGEMM", "GPU", "HIP"};


  for (int i = 0; i < ncounters; i++) {

    if (counters[i][0] == 0)

      continue; // skip empty counters

    const double percent = 100.0 * counters[i][0] / total;

    const int idx = counters[i][1];

    const int backend_stride = GRID_NKERNELS * GRID_MAX_LP;

    const int back = idx / backend_stride;

    const int kern = (idx % backend_stride) / GRID_MAX_LP;

    const int lp = (idx % backend_stride) % GRID_MAX_LP;

    char buffer[100];

    snprintf(buffer, sizeof(buffer), " %-5i %-17s  %-6s  %34li %10.2f%%\n", lp,

             kernel_names[kern], backend_names[back], counters[i][0], percent);

    GRID_LIBRARY_PRINT(print_func, buffer, output_unit);

  }


  GRID_LIBRARY_PRINT(

      print_func,

      " ----------------------------------------------------------------"

      "---------------\n",

      output_unit);

}


// EOF

cp_mpi_sum_long
void cp_mpi_sum_long(long *values, const int count, const cp_mpi_comm_t comm)
Wrapper around MPI_Allreduce for op MPI_SUM and datatype MPI_LONG.
Definition cp_mpi.c:340

cp_mpi_comm_f2c
cp_mpi_comm_t cp_mpi_comm_f2c(const int fortran_comm)
Wrapper around MPI_Comm_f2c.
Definition cp_mpi.c:69

cp_mpi_comm_t
int cp_mpi_comm_t
Definition cp_mpi.h:18

print_func
static void print_func(const char *msg, int msglen, int output_unit)
Wrapper for printf, passed to dbm_library_print_stats.
Definition dbm_miniapp.c:29

imin
static int imin(int x, int y)
Returns the smaller of the two integers (missing from the C standard).
Definition dbm_miniapp.c:40

grid_common.h

idx
static GRID_HOST_DEVICE int idx(const orbital a)
Return coset index of given orbital angular momentum.
Definition grid_common.h:156

grid_constants.h

grid_backend
grid_backend
Definition grid_constants.h:48

GRID_BACKEND_REF
@ GRID_BACKEND_REF
Definition grid_constants.h:50

GRID_BACKEND_AUTO
@ GRID_BACKEND_AUTO
Definition grid_constants.h:49

i
static void const int const int i
Definition grid_cpu_collint.h:38

apply_cutoff
void apply_cutoff(void *ptr)
Definition grid_dgemm_context.c:493

grid_library_finalize
void grid_library_finalize(void)
Finalizes the grid library.
Definition grid_library.c:89

GRID_MAX_LP
#define GRID_MAX_LP
Definition grid_library.c:27

library_initialized
static bool library_initialized
Definition grid_library.c:35

GRID_LIBRARY_PRINT
#define GRID_LIBRARY_PRINT(FN, MSG, OUTPUT_UNIT)
Definition grid_library.c:21

config
static grid_library_config config
Definition grid_library.c:37

grid_library_get_sphere_cache
grid_sphere_cache * grid_library_get_sphere_cache(void)
Returns a pointer to the thread local sphere cache.
Definition grid_library.c:108

GRID_NKERNELS
#define GRID_NKERNELS
Definition grid_library.c:26

grid_library_init
void grid_library_init(void)
Initializes the grid library.
Definition grid_library.c:53

GRID_NBACKENDS
#define GRID_NBACKENDS
Definition grid_library.c:25

max_threads
static int max_threads
Definition grid_library.c:36

grid_library_get_config
grid_library_config grid_library_get_config(void)
Returns the library config.
Definition grid_library.c:129

grid_library_counter_add
void grid_library_counter_add(const int lp, const enum grid_backend backend, const enum grid_library_kernel kernel, const int increment)
Adds given increment to counter specified by lp, backend, and kernel.
Definition grid_library.c:135

grid_library_set_config
void grid_library_set_config(const enum grid_backend backend, const bool validate, const bool apply_cutoff)
Configures the grid library.
Definition grid_library.c:118

grid_library_print_stats
void grid_library_print_stats(const int fortran_comm, void(*print_func)(const char *, int, int), const int output_unit)
Prints statistics gathered by the grid library.
Definition grid_library.c:161

compare_counters
static int compare_counters(const void *a, const void *b)
Comperator passed to qsort to compare two counters.
Definition grid_library.c:153

per_thread_globals
static grid_library_globals ** per_thread_globals
Definition grid_library.c:34

grid_library.h

grid_library_kernel
grid_library_kernel
Various kernels provided by the grid library.
Definition grid_library.h:66

grid_sphere_cache_free
void grid_sphere_cache_free(grid_sphere_cache *cache)
Free the memory of the sphere cache.
Definition grid_sphere_cache.c:168

grid_library_config
Configuration of the grid library.
Definition grid_library.h:34

grid_library_config::validate
bool validate
Definition grid_library.h:37

grid_library_config::backend
enum grid_backend backend
Definition grid_library.h:35

grid_library_config::apply_cutoff
bool apply_cutoff
Definition grid_library.h:38

grid_library_globals
Definition grid_library.c:29

grid_library_globals::sphere_cache
grid_sphere_cache sphere_cache
Definition grid_library.c:30

grid_library_globals::counters
long counters[GRID_NBACKENDS *GRID_NKERNELS *GRID_MAX_LP]
Definition grid_library.c:31

grid_sphere_cache
Struct holding the entire sphere cache, ie. for all grids.
Definition grid_sphere_cache.h:28