(git:916bf6e)
Loading...
Searching...
No Matches
dbm_multiply_gpu.h
Go to the documentation of this file.
1/*----------------------------------------------------------------------------*/
2/* CP2K: A general program to perform molecular dynamics simulations */
3/* Copyright 2000-2025 CP2K developers group <https://cp2k.org> */
4/* */
5/* SPDX-License-Identifier: BSD-3-Clause */
6/*----------------------------------------------------------------------------*/
7
8#ifndef DBM_MULTIPLY_GPU_H
9#define DBM_MULTIPLY_GPU_H
10
11#include "../offload/offload_runtime.h"
12#if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_DBM)
13
14#include "dbm_internal.h"
15#include "dbm_shard.h"
16
17/*******************************************************************************
18 * \brief Internal struct for storing per shard gpu objects.
19 * \author Ole Schuett
20 ******************************************************************************/
21typedef struct {
22 double *data; // on the device
23 int data_size;
24 int data_allocated;
25 offloadStream_t stream;
26 offloadEvent_t event;
27} dbm_shard_gpu_t;
28
29/*******************************************************************************
30 * \brief Internal struct for storing the gpu backend's context.
31 * \author Ole Schuett
32 ******************************************************************************/
33typedef struct {
34 offloadStream_t main_stream;
35 offloadEvent_t upload_event;
36
37 int nshards;
38 dbm_shard_gpu_t *shards_c_dev;
39
40 dbm_pack_t pack_a_dev;
41 dbm_pack_t pack_b_dev;
42
43 int max_batch_size;
44 dbm_task_t *batches_dev;
45} dbm_multiply_gpu_context_t;
46
47/*******************************************************************************
48 * \brief Internal routine for initializing the gpu backend.
49 * \author Ole Schuett
50 ******************************************************************************/
51void dbm_multiply_gpu_start(const int max_batch_size, const int nshards,
52 dbm_shard_t *shards_c_host,
53 dbm_multiply_gpu_context_t *ctx);
54
55/*******************************************************************************
56 * \brief Internal routine for uploading newly arrived packs onto the device.
57 * \author Ole Schuett and Hans Pabst
58 ******************************************************************************/
59bool dbm_multiply_gpu_upload_packs(const dbm_pack_t *pack_a,
60 const dbm_pack_t *pack_b,
61 dbm_multiply_gpu_context_t *ctx);
62
63/*******************************************************************************
64 * \brief Internal routine for executing the tasks in given batch on the GPU.
65 * \author Ole Schuett
66 ******************************************************************************/
67void dbm_multiply_gpu_process_batch(const int ntasks, const dbm_task_t *batch,
68 const double alpha, dbm_shard_t *shard_c,
69 const int kshard, const bool finish,
70 dbm_multiply_gpu_context_t *ctx);
71
72/*******************************************************************************
73 * \brief Internal routine for shutting down the gpu backend.
74 * \author Ole Schuett
75 ******************************************************************************/
76void dbm_multiply_gpu_stop(dbm_multiply_gpu_context_t *ctx);
77
78#endif // defined(__OFFLOAD) && !defined(__NO_OFFLOAD_DBM)
79#endif
80
81// EOF
Internal struct for storing a pack - essentially a shard for MPI.
Internal struct for storing a matrix shard.
Definition dbm_shard.h:30
Internal struct for storing a task, ie. a single block multiplication.