17#if defined(__OFFLOAD_CUDA)
19#elif defined(__OFFLOAD_HIP)
20#include <hip/hip_runtime_api.h>
23#if defined(__OFFLOAD_PROFILING)
24#if defined(__OFFLOAD_CUDA)
25#include <nvToolsExt.h>
26#elif defined(__OFFLOAD_HIP) && defined(__HIP_PLATFORM_AMD__)
27#include <roctracer/roctx.h>
53#if defined(__OFFLOAD_CUDA)
54 CUresult error = cuInit(0);
55 if (error != CUDA_SUCCESS) {
56 fprintf(stderr,
"ERROR: %s %d %s %d\n",
"cuInit failed with error: ", error,
60#elif defined(__OFFLOAD_HIP)
61 OFFLOAD_CHECK(hipInit(0));
62#elif defined(__OFFLOAD_OPENCL)
63 OFFLOAD_CHECK(c_dbcsr_acc_init());
73#if defined(__OFFLOAD_CUDA)
74 OFFLOAD_CHECK(cudaGetDeviceCount(&count));
75#elif defined(__OFFLOAD_HIP)
76 OFFLOAD_CHECK(hipGetDeviceCount(&count));
77#elif defined(__OFFLOAD_OPENCL)
78 OFFLOAD_CHECK(c_dbcsr_acc_get_ndevices(&count));
100#if defined(__OFFLOAD_CUDA)
102#elif defined(__OFFLOAD_HIP)
104#elif defined(__OFFLOAD_OPENCL)
113void offload_timeset(
const char *message) {
114#if defined(__OFFLOAD_PROFILING)
115#if defined(__OFFLOAD_CUDA)
118 for (
size_t i = 0;
i < strlen(message);
i++) {
119 hash +=
i * message[
i] * message[
i];
121 nvtxEventAttributes_t eventAttrib = {0};
122 eventAttrib.version = NVTX_VERSION;
123 eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
124 eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
125 eventAttrib.message.ascii = message;
126 eventAttrib.colorType = NVTX_COLOR_ARGB;
128 eventAttrib.payloadType = NVTX_PAYLOAD_TYPE_INT64;
129 eventAttrib.payload.llValue = 123;
130 eventAttrib.category = 42;
131 nvtxRangePushEx(&eventAttrib);
132#elif defined(__OFFLOAD_HIP) && defined(__HIP_PLATFORM_AMD__)
133 roctxRangePushA(message);
144#if defined(__OFFLOAD_PROFILING)
145#if defined(__OFFLOAD_CUDA)
147#elif defined(__OFFLOAD_HIP) && defined(__HIP_PLATFORM_AMD__)
157void offload_mem_info(
size_t *free,
size_t *total) {
158#if defined(__OFFLOAD_CUDA)
159 OFFLOAD_CHECK(cudaMemGetInfo(free, total));
160#elif defined(__OFFLOAD_HIP)
161 OFFLOAD_CHECK(hipMemGetInfo(free, total));
162#elif defined(__OFFLOAD_OPENCL)
163 OFFLOAD_CHECK(c_dbcsr_acc_dev_mem_info(free, total));
170#if defined(__OFFLOAD)
171 offloadMallocHost(ptr__, size__);
172 return offloadSuccess;
174 *ptr__ = malloc(size__);
180#if defined(__OFFLOAD)
181 offloadFreeHost(ptr__);
182 return offloadSuccess;
static unsigned int hash(const unsigned int row, const unsigned int col)
Private hash function based on Cantor pairing function. https://en.wikipedia.org/wiki/Pairing_functio...
static void const int const int i
const uint32_t colormap[]
int offload_get_device_count(void)
Returns the number of available devices.
void offload_activate_chosen_device(void)
Activates the device selected via offload_set_chosen_device()
int offload_host_free(void *ptr__)
free pinned memory (or simple free when there is no gpu)
static int chosen_device_id
void offload_timestop(void)
Ends a timing range.
int offload_get_chosen_device(void)
Returns the chosen device.
void offload_init(void)
Initialize runtime.
int offload_host_malloc(void **ptr__, const size_t size__)
Allocate pinned memory (or simple malloc when there is no gpu)