(git:e68414f)
Loading...
Searching...
No Matches
dbm_multiply_gpu.h
Go to the documentation of this file.
1/*----------------------------------------------------------------------------*/
2/* CP2K: A general program to perform molecular dynamics simulations */
3/* Copyright 2000-2025 CP2K developers group <https://cp2k.org> */
4/* */
5/* SPDX-License-Identifier: BSD-3-Clause */
6/*----------------------------------------------------------------------------*/
7#ifndef DBM_MULTIPLY_GPU_H
8#define DBM_MULTIPLY_GPU_H
9
10#include "../offload/offload_runtime.h"
11#if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_DBM)
12
13#include "dbm_internal.h"
14#include "dbm_shard.h"
15
16/*******************************************************************************
17 * \brief Internal struct for storing per shard gpu objects.
18 * \author Ole Schuett
19 ******************************************************************************/
20typedef struct {
21 double *data; // on the device
22 int data_size;
23 int data_allocated;
24 offloadStream_t stream;
25 offloadEvent_t event;
26} dbm_shard_gpu_t;
27
28/*******************************************************************************
29 * \brief Internal struct for storing the gpu backend's context.
30 * \author Ole Schuett
31 ******************************************************************************/
32typedef struct {
33 offloadStream_t main_stream;
34 offloadEvent_t upload_event;
35
36 int nshards;
37 dbm_shard_gpu_t *shards_c_dev;
38
39 dbm_pack_t pack_a_dev;
40 dbm_pack_t pack_b_dev;
41
42 int max_batch_size;
43 dbm_task_t *batches_dev;
44} dbm_multiply_gpu_context_t;
45
46/*******************************************************************************
47 * \brief Internal routine for initializing the gpu backend.
48 * \author Ole Schuett
49 ******************************************************************************/
50void dbm_multiply_gpu_start(const int max_batch_size, const int nshards,
51 dbm_shard_t *shards_c_host,
52 dbm_multiply_gpu_context_t *ctx);
53
54/*******************************************************************************
55 * \brief Internal routine for uploading newly arrived packs onto the device.
56 * \author Ole Schuett and Hans Pabst
57 ******************************************************************************/
58bool dbm_multiply_gpu_upload_packs(const dbm_pack_t *pack_a,
59 const dbm_pack_t *pack_b,
60 dbm_multiply_gpu_context_t *ctx);
61
62/*******************************************************************************
63 * \brief Internal routine for executing the tasks in given batch on the GPU.
64 * \author Ole Schuett
65 ******************************************************************************/
66void dbm_multiply_gpu_process_batch(const int ntasks, const dbm_task_t *batch,
67 const double alpha, dbm_shard_t *shard_c,
68 const int kshard, const bool finish,
69 dbm_multiply_gpu_context_t *ctx);
70
71/*******************************************************************************
72 * \brief Internal routine for shutting down the gpu backend.
73 * \author Ole Schuett
74 ******************************************************************************/
75void dbm_multiply_gpu_stop(dbm_multiply_gpu_context_t *ctx);
76
77#endif // defined(__OFFLOAD) && !defined(__NO_OFFLOAD_DBM)
78#endif
79
80// EOF
Internal struct for storing a pack - essentially a shard for MPI.
Internal struct for storing a matrix shard.
Definition dbm_shard.h:30
Internal struct for storing a task, ie. a single block multiplication.