d6/d5b/dbm__multiply__gpu_8h_source.html

/*----------------------------------------------------------------------------*/

/*  CP2K: A general program to perform molecular dynamics simulations         */

/*  Copyright 2000-2025 CP2K developers group <https://cp2k.org>              */

/*                                                                            */

/*  SPDX-License-Identifier: BSD-3-Clause                                     */

/*----------------------------------------------------------------------------*/

#ifndef DBM_MULTIPLY_GPU_H

#define DBM_MULTIPLY_GPU_H


#include "../offload/offload_runtime.h"

#if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_DBM)


#include "dbm_internal.h"

#include "dbm_shard.h"


/*******************************************************************************

 * \brief Internal struct for storing per shard gpu objects.

 * \author Ole Schuett

 ******************************************************************************/

typedef struct {

  double *data; // on the device

  int data_size;

  int data_allocated;

  offloadStream_t stream;

  offloadEvent_t event;

} dbm_shard_gpu_t;


/*******************************************************************************

 * \brief Internal struct for storing the gpu backend's context.

 * \author Ole Schuett

 ******************************************************************************/

typedef struct {

  offloadStream_t main_stream;

  offloadEvent_t upload_event;


  int nshards;

  dbm_shard_gpu_t *shards_c_dev;


  dbm_pack_t pack_a_dev;

  dbm_pack_t pack_b_dev;


  int max_batch_size;

  dbm_task_t *batches_dev;

} dbm_multiply_gpu_context_t;


/*******************************************************************************

 * \brief Internal routine for initializing the gpu backend.

 * \author Ole Schuett

 ******************************************************************************/

void dbm_multiply_gpu_start(const int max_batch_size, const int nshards,

                            dbm_shard_t *shards_c_host,

                            dbm_multiply_gpu_context_t *ctx);


/*******************************************************************************

 * \brief Internal routine for uploading newly arrived packs onto the device.

 * \author Ole Schuett and Hans Pabst

 ******************************************************************************/

bool dbm_multiply_gpu_upload_packs(const dbm_pack_t *pack_a,

                                   const dbm_pack_t *pack_b,

                                   dbm_multiply_gpu_context_t *ctx);


/*******************************************************************************

 * \brief Internal routine for executing the tasks in given batch on the GPU.

 * \author Ole Schuett

 ******************************************************************************/

void dbm_multiply_gpu_process_batch(const int ntasks, const dbm_task_t *batch,

                                    const double alpha, dbm_shard_t *shard_c,

                                    const int kshard, const bool finish,

                                    dbm_multiply_gpu_context_t *ctx);


/*******************************************************************************

 * \brief Internal routine for shutting down the gpu backend.

 * \author Ole Schuett

 ******************************************************************************/

void dbm_multiply_gpu_stop(dbm_multiply_gpu_context_t *ctx);


#endif // defined(__OFFLOAD) && !defined(__NO_OFFLOAD_DBM)

#endif


// EOF

dbm_internal.h

dbm_shard.h

dbm_pack_t
Internal struct for storing a pack - essentially a shard for MPI.
Definition dbm_internal.h:31

dbm_shard_t
Internal struct for storing a matrix shard.
Definition dbm_shard.h:30

dbm_task_t
Internal struct for storing a task, ie. a single block multiplication.
Definition dbm_internal.h:42