16#if defined(__OFFLOAD_CUDA)
18#elif defined(__OFFLOAD_HIP)
19#include <hip/hip_runtime_api.h>
22#if defined(__OFFLOAD_PROFILING)
23#if defined(__OFFLOAD_CUDA)
24#include <nvToolsExt.h>
25#elif defined(__OFFLOAD_HIP) && defined(__HIP_PLATFORM_AMD__)
26#include <roctracer/roctx.h>
52#if defined(__OFFLOAD_CUDA)
53 CUresult error = cuInit(0);
54 if (error != CUDA_SUCCESS) {
55 fprintf(stderr,
"ERROR: %s %d %s %d\n",
"cuInit failed with error: ", error,
59#elif defined(__OFFLOAD_HIP)
60 OFFLOAD_CHECK(hipInit(0));
61#elif defined(__OFFLOAD_OPENCL)
62 OFFLOAD_CHECK(c_dbcsr_acc_init());
72#if defined(__OFFLOAD_CUDA)
73 OFFLOAD_CHECK(cudaGetDeviceCount(&count));
74#elif defined(__OFFLOAD_HIP)
75 OFFLOAD_CHECK(hipGetDeviceCount(&count));
76#elif defined(__OFFLOAD_OPENCL)
77 OFFLOAD_CHECK(c_dbcsr_acc_get_ndevices(&count));
99#if defined(__OFFLOAD_CUDA)
101#elif defined(__OFFLOAD_HIP)
103#elif defined(__OFFLOAD_OPENCL)
112void offload_timeset(
const char *message) {
113#if defined(__OFFLOAD_PROFILING)
114#if defined(__OFFLOAD_CUDA)
117 for (
size_t i = 0;
i < strlen(message);
i++) {
118 hash +=
i * message[
i] * message[
i];
120 nvtxEventAttributes_t eventAttrib = {0};
121 eventAttrib.version = NVTX_VERSION;
122 eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
123 eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
124 eventAttrib.message.ascii = message;
125 eventAttrib.colorType = NVTX_COLOR_ARGB;
127 eventAttrib.payloadType = NVTX_PAYLOAD_TYPE_INT64;
128 eventAttrib.payload.llValue = 123;
129 eventAttrib.category = 42;
130 nvtxRangePushEx(&eventAttrib);
131#elif defined(__OFFLOAD_HIP) && defined(__HIP_PLATFORM_AMD__)
132 roctxRangePushA(message);
143#if defined(__OFFLOAD_PROFILING)
144#if defined(__OFFLOAD_CUDA)
146#elif defined(__OFFLOAD_HIP) && defined(__HIP_PLATFORM_AMD__)
156void offload_mem_info(
size_t *free,
size_t *total) {
157#if defined(__OFFLOAD_CUDA)
158 OFFLOAD_CHECK(cudaMemGetInfo(free, total));
159#elif defined(__OFFLOAD_HIP)
160 OFFLOAD_CHECK(hipMemGetInfo(free, total));
161#elif defined(__OFFLOAD_OPENCL)
162 OFFLOAD_CHECK(c_dbcsr_acc_dev_mem_info(free, total));
169#if defined(__OFFLOAD)
170 offloadMallocHost(ptr__, size__);
171 return offloadSuccess;
173 *ptr__ = malloc(size__);
179#if defined(__OFFLOAD)
180 offloadFreeHost(ptr__);
181 return offloadSuccess;
static unsigned int hash(const dbm_task_t task)
Private hash function based on Szudzik's elegant pairing. Using unsigned int to return a positive num...
static void const int const int i
const uint32_t colormap[]
int offload_get_device_count(void)
Returns the number of available devices.
void offload_activate_chosen_device(void)
Activates the device selected via offload_set_chosen_device()
int offload_host_free(void *ptr__)
free pinned memory (or simple free when there is no gpu)
static int chosen_device_id
void offload_timestop(void)
Ends a timing range.
int offload_get_chosen_device(void)
Returns the chosen device.
void offload_init(void)
Initialize runtime.
int offload_host_malloc(void **ptr__, const size_t size__)
Allocate pinned memory (or simple malloc when there is no gpu)