(git:374b731)
Loading...
Searching...
No Matches
dbm_multiply_gpu.h
Go to the documentation of this file.
1/*----------------------------------------------------------------------------*/
2/* CP2K: A general program to perform molecular dynamics simulations */
3/* Copyright 2000-2024 CP2K developers group <https://cp2k.org> */
4/* */
5/* SPDX-License-Identifier: BSD-3-Clause */
6/*----------------------------------------------------------------------------*/
7
8#ifndef DBM_MULTIPLY_GPU_H
9#define DBM_MULTIPLY_GPU_H
10
11#include "../offload/offload_runtime.h"
12#if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_DBM)
13
15#include "dbm_shard.h"
16
17/*******************************************************************************
18 * \brief Internal struct for storing per shard gpu objects.
19 * \author Ole Schuett
20 ******************************************************************************/
21typedef struct {
22 double *data; // on the device
23 int data_size;
24 int data_allocated;
25 offloadStream_t stream;
26} dbm_shard_gpu_t;
27
28/*******************************************************************************
29 * \brief Internal struct for storing the gpu backend's context.
30 * \author Ole Schuett
31 ******************************************************************************/
32typedef struct {
33 offloadStream_t main_stream;
34
35 int nshards;
36 dbm_shard_t *shards_c_host;
37 dbm_shard_gpu_t *shards_c_dev;
38
39 dbm_pack_t pack_a_dev;
40 dbm_pack_t pack_b_dev;
41
42 int max_batch_size;
43 dbm_task_t *batches_dev;
44} dbm_multiply_gpu_context_t;
45
46/*******************************************************************************
47 * \brief Internal routine for intializing the gpu backend.
48 * \author Ole Schuett
49 ******************************************************************************/
50void dbm_multiply_gpu_start(const int max_batch_size, const int nshards,
51 dbm_shard_t *shards_c_host,
52 dbm_multiply_gpu_context_t *ctx);
53
54/*******************************************************************************
55 * \brief Internal routine for uploading newly arrived packs onto the device.
56 * \author Ole Schuett
57 ******************************************************************************/
58void dbm_multiply_gpu_upload_packs(const dbm_pack_t *pack_a,
59 const dbm_pack_t *pack_b,
60 dbm_multiply_gpu_context_t *ctx);
61
62/*******************************************************************************
63 * \brief Internal routine for executing the tasks in given batch on the GPU.
64 * \author Ole Schuett
65 ******************************************************************************/
66void dbm_multiply_gpu_process_batch(const int ntasks, const dbm_task_t *batch,
67 const int mnk_range[3][2],
68 const double alpha, const int kshard,
69 dbm_multiply_gpu_context_t *ctx);
70
71/*******************************************************************************
72 * \brief Internal routine for downloading results from the device.
73 * \author Ole Schuett
74 ******************************************************************************/
75void dbm_multiply_gpu_download_results(dbm_multiply_gpu_context_t *ctx);
76
77/*******************************************************************************
78 * \brief Internal routine for shutting down the gpu backend.
79 * \author Ole Schuett
80 ******************************************************************************/
81void dbm_multiply_gpu_stop(dbm_multiply_gpu_context_t *ctx);
82
83#endif // defined(__OFFLOAD) && !defined(__NO_OFFLOAD_DBM)
84#endif
85
86// EOF
Internal struct for storing a pack - essentially a shard for MPI.
Internal struct for storing a matrix shard.
Definition dbm_shard.h:30
Internal struct for storing a task, ie. a single block multiplication.