(git:34ef472)
dbm_multiply_gpu.h
Go to the documentation of this file.
1 /*----------------------------------------------------------------------------*/
2 /* CP2K: A general program to perform molecular dynamics simulations */
3 /* Copyright 2000-2024 CP2K developers group <https://cp2k.org> */
4 /* */
5 /* SPDX-License-Identifier: BSD-3-Clause */
6 /*----------------------------------------------------------------------------*/
7 
8 #ifndef DBM_MULTIPLY_GPU_H
9 #define DBM_MULTIPLY_GPU_H
10 
11 #include "../offload/offload_runtime.h"
12 #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_DBM)
13 
14 #include "dbm_multiply_internal.h"
15 #include "dbm_shard.h"
16 
17 /*******************************************************************************
18  * \brief Internal struct for storing per shard gpu objects.
19  * \author Ole Schuett
20  ******************************************************************************/
21 typedef struct {
22  double *data; // on the device
23  int data_size;
24  int data_allocated;
25  offloadStream_t stream;
26 } dbm_shard_gpu_t;
27 
28 /*******************************************************************************
29  * \brief Internal struct for storing the gpu backend's context.
30  * \author Ole Schuett
31  ******************************************************************************/
32 typedef struct {
33  offloadStream_t main_stream;
34 
35  int nshards;
36  dbm_shard_t *shards_c_host;
37  dbm_shard_gpu_t *shards_c_dev;
38 
39  dbm_pack_t pack_a_dev;
40  dbm_pack_t pack_b_dev;
41 
42  int max_batch_size;
43  dbm_task_t *batches_dev;
44 } dbm_multiply_gpu_context_t;
45 
46 /*******************************************************************************
47  * \brief Internal routine for intializing the gpu backend.
48  * \author Ole Schuett
49  ******************************************************************************/
50 void dbm_multiply_gpu_start(const int max_batch_size, const int nshards,
51  dbm_shard_t *shards_c_host,
52  dbm_multiply_gpu_context_t *ctx);
53 
54 /*******************************************************************************
55  * \brief Internal routine for uploading newly arrived packs onto the device.
56  * \author Ole Schuett
57  ******************************************************************************/
58 void dbm_multiply_gpu_upload_packs(const dbm_pack_t *pack_a,
59  const dbm_pack_t *pack_b,
60  dbm_multiply_gpu_context_t *ctx);
61 
62 /*******************************************************************************
63  * \brief Internal routine for executing the tasks in given batch on the GPU.
64  * \author Ole Schuett
65  ******************************************************************************/
66 void dbm_multiply_gpu_process_batch(const int ntasks, const dbm_task_t *batch,
67  const int mnk_range[3][2],
68  const double alpha, const int kshard,
69  dbm_multiply_gpu_context_t *ctx);
70 
71 /*******************************************************************************
72  * \brief Internal routine for downloading results from the device.
73  * \author Ole Schuett
74  ******************************************************************************/
75 void dbm_multiply_gpu_download_results(dbm_multiply_gpu_context_t *ctx);
76 
77 /*******************************************************************************
78  * \brief Internal routine for shutting down the gpu backend.
79  * \author Ole Schuett
80  ******************************************************************************/
81 void dbm_multiply_gpu_stop(dbm_multiply_gpu_context_t *ctx);
82 
83 #endif // defined(__OFFLOAD) && !defined(__NO_OFFLOAD_DBM)
84 #endif
85 
86 // EOF
Internal struct for storing a pack - essentially a shard for MPI.
Internal struct for storing a matrix shard.
Definition: dbm_shard.h:30
Internal struct for storing a task, ie. a single block multiplication.