(git:0de0cc2)
offload_library.c
Go to the documentation of this file.
1 /*----------------------------------------------------------------------------*/
2 /* CP2K: A general program to perform molecular dynamics simulations */
3 /* Copyright 2000-2024 CP2K developers group <https://cp2k.org> */
4 /* */
5 /* SPDX-License-Identifier: BSD-3-Clause */
6 /*----------------------------------------------------------------------------*/
7 
8 #include <assert.h>
9 #include <stdint.h>
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include <string.h>
13 
14 #include "offload_library.h"
15 #include "offload_runtime.h"
16 
17 #if defined(__OFFLOAD_CUDA)
18 #include <cuda.h>
19 #elif defined(__OFFLOAD_HIP)
20 #include <hip/hip_runtime_api.h>
21 #endif
22 
23 #if defined(__OFFLOAD_PROFILING)
24 #if defined(__OFFLOAD_CUDA)
25 #include <nvToolsExt.h>
26 #elif defined(__OFFLOAD_HIP) && defined(__HIP_PLATFORM_AMD__)
27 #include <roctracer/roctx.h>
28 #endif
29 #endif
30 
31 static int chosen_device_id = -1;
32 
33 const uint32_t colormap[] = {0xFFFFFF00, // Yellow
34  0xFFFF00FF, // Fuchsia
35  0xFFFF0000, // Red
36  0xFFC0C0C0, // Silver
37  0xFF808080, // Gray
38  0xFF808000, // Olive
39  0xFF800080, // Purple
40  0xFF800000, // Maroon
41  0xFF00FFFF, // Aqua
42  0xFF00FF00, // Lime
43  0xFF008080, // Teal
44  0xFF008000, // Green
45  0xFF0000FF, // Blue
46  0xFF000080}; // Navy
47 
48 /*******************************************************************************
49  * \brief Initialize runtime.
50  * \author Rocco Meli
51  ******************************************************************************/
52 void offload_init(void) {
53 #if defined(__OFFLOAD_CUDA)
54  CUresult error = cuInit(0);
55  if (error != CUDA_SUCCESS) {
56  fprintf(stderr, "ERROR: %s %d %s %d\n", "cuInit failed with error: ", error,
57  __FILE__, __LINE__);
58  abort();
59  }
60 #elif defined(__OFFLOAD_HIP)
61  OFFLOAD_CHECK(hipInit(0));
62 #elif defined(__OFFLOAD_OPENCL)
63  OFFLOAD_CHECK(c_dbcsr_acc_init());
64 #endif
65 }
66 
67 /*******************************************************************************
68  * \brief Returns the number of available devices.
69  * \author Ole Schuett
70  ******************************************************************************/
72  int count = 0;
73 #if defined(__OFFLOAD_CUDA)
74  OFFLOAD_CHECK(cudaGetDeviceCount(&count));
75 #elif defined(__OFFLOAD_HIP)
76  OFFLOAD_CHECK(hipGetDeviceCount(&count));
77 #elif defined(__OFFLOAD_OPENCL)
78  OFFLOAD_CHECK(c_dbcsr_acc_get_ndevices(&count));
79 #endif
80  return count;
81 }
82 
83 /*******************************************************************************
84  * \brief Selects the chosen device to be used.
85  * \author Ole Schuett
86  ******************************************************************************/
87 void offload_set_chosen_device(int device_id) { chosen_device_id = device_id; }
88 
89 /*******************************************************************************
90  * \brief Returns the chosen device.
91  * \author Ole Schuett
92  ******************************************************************************/
94 
95 /*******************************************************************************
96  * \brief Activates the device selected via offload_set_chosen_device()
97  * \author Ole Schuett
98  ******************************************************************************/
100 #if defined(__OFFLOAD_CUDA)
101  OFFLOAD_CHECK(cudaSetDevice(chosen_device_id));
102 #elif defined(__OFFLOAD_HIP)
103  OFFLOAD_CHECK(hipSetDevice(chosen_device_id));
104 #elif defined(__OFFLOAD_OPENCL)
105  OFFLOAD_CHECK(c_dbcsr_acc_set_active_device(chosen_device_id));
106 #endif
107 }
108 
109 /*******************************************************************************
110  * \brief Starts a timing range.
111  * \author Ole Schuett
112  ******************************************************************************/
113 void offload_timeset(const char *message) {
114 #if defined(__OFFLOAD_PROFILING)
115 #if defined(__OFFLOAD_CUDA)
116  // colors are picked based on a (very simple) hash value of the message
117  int hash = 0;
118  for (size_t i = 0; i < strlen(message); i++) {
119  hash += i * message[i] * message[i];
120  }
121  nvtxEventAttributes_t eventAttrib = {0};
122  eventAttrib.version = NVTX_VERSION;
123  eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
124  eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
125  eventAttrib.message.ascii = message;
126  eventAttrib.colorType = NVTX_COLOR_ARGB;
127  eventAttrib.color = colormap[hash % 14];
128  eventAttrib.payloadType = NVTX_PAYLOAD_TYPE_INT64;
129  eventAttrib.payload.llValue = 123;
130  eventAttrib.category = 42;
131  nvtxRangePushEx(&eventAttrib);
132 #elif defined(__OFFLOAD_HIP) && defined(__HIP_PLATFORM_AMD__)
133  roctxRangePushA(message);
134 #endif
135 #endif
136  (void)message; // mark argument as used
137 }
138 
139 /*******************************************************************************
140  * \brief Ends a timing range.
141  * \author Ole Schuett
142  ******************************************************************************/
143 void offload_timestop(void) {
144 #if defined(__OFFLOAD_PROFILING)
145 #if defined(__OFFLOAD_CUDA)
146  nvtxRangePop();
147 #elif defined(__OFFLOAD_HIP) && defined(__HIP_PLATFORM_AMD__)
148  roctxRangePop();
149 #endif
150 #endif
151 }
152 
153 /*******************************************************************************
154  * \brief Gets free and total device memory.
155  * \author Ole Schuett
156  ******************************************************************************/
157 void offload_mem_info(size_t *free, size_t *total) {
158 #if defined(__OFFLOAD_CUDA)
159  OFFLOAD_CHECK(cudaMemGetInfo(free, total));
160 #elif defined(__OFFLOAD_HIP)
161  OFFLOAD_CHECK(hipMemGetInfo(free, total));
162 #elif defined(__OFFLOAD_OPENCL)
163  OFFLOAD_CHECK(c_dbcsr_acc_dev_mem_info(free, total));
164 #else
165  *free = *total = 0;
166 #endif
167 }
168 
169 int offload_host_malloc(void **ptr__, const size_t size__) {
170 #if defined(__OFFLOAD)
171  offloadMallocHost(ptr__, size__); /* checked */
172  return offloadSuccess;
173 #else
174  *ptr__ = malloc(size__);
175  return EXIT_SUCCESS;
176 #endif
177 }
178 
179 int offload_host_free(void *ptr__) {
180 #if defined(__OFFLOAD)
181  offloadFreeHost(ptr__); /* checked */
182  return offloadSuccess;
183 #else
184  free(ptr__);
185  return EXIT_SUCCESS;
186 #endif
187 }
188 
189 // EOF
static unsigned int hash(const dbm_task_t task)
Private hash function based on Szudzik's elegant pairing. Using unsigned int to return a positive num...
static void const int const int i
const uint32_t colormap[]
int offload_get_device_count(void)
Returns the number of available devices.
void offload_mem_info(size_t *free, size_t *total)
Gets free and total device memory.
void offload_activate_chosen_device(void)
Activates the device selected via offload_set_chosen_device()
int offload_host_free(void *ptr__)
free pinned memory (or simple free when there is no gpu)
static int chosen_device_id
void offload_timestop(void)
Ends a timing range.
void offload_timeset(const char *message)
Starts a timing range.
int offload_get_chosen_device(void)
Returns the chosen device.
void offload_set_chosen_device(int device_id)
Selects the chosen device to be used.
void offload_init(void)
Initialize runtime.
int offload_host_malloc(void **ptr__, const size_t size__)
Allocate pinned memory (or simple malloc when there is no gpu)