17 #if defined(__OFFLOAD_CUDA)
19 #elif defined(__OFFLOAD_HIP)
20 #include <hip/hip_runtime_api.h>
23 #if defined(__OFFLOAD_PROFILING)
24 #if defined(__OFFLOAD_CUDA)
25 #include <nvToolsExt.h>
26 #elif defined(__OFFLOAD_HIP) && defined(__HIP_PLATFORM_AMD__)
27 #include <roctracer/roctx.h>
53 #if defined(__OFFLOAD_CUDA)
54 CUresult error = cuInit(0);
55 if (error != CUDA_SUCCESS) {
56 fprintf(stderr,
"ERROR: %s %d %s %d\n",
"cuInit failed with error: ", error,
60 #elif defined(__OFFLOAD_HIP)
61 OFFLOAD_CHECK(hipInit(0));
62 #elif defined(__OFFLOAD_OPENCL)
63 OFFLOAD_CHECK(c_dbcsr_acc_init());
73 #if defined(__OFFLOAD_CUDA)
74 OFFLOAD_CHECK(cudaGetDeviceCount(&count));
75 #elif defined(__OFFLOAD_HIP)
76 OFFLOAD_CHECK(hipGetDeviceCount(&count));
77 #elif defined(__OFFLOAD_OPENCL)
78 OFFLOAD_CHECK(c_dbcsr_acc_get_ndevices(&count));
100 #if defined(__OFFLOAD_CUDA)
102 #elif defined(__OFFLOAD_HIP)
104 #elif defined(__OFFLOAD_OPENCL)
114 #if defined(__OFFLOAD_PROFILING)
115 #if defined(__OFFLOAD_CUDA)
118 for (
size_t i = 0;
i < strlen(message);
i++) {
119 hash +=
i * message[
i] * message[
i];
121 nvtxEventAttributes_t eventAttrib = {0};
122 eventAttrib.version = NVTX_VERSION;
123 eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
124 eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
125 eventAttrib.message.ascii = message;
126 eventAttrib.colorType = NVTX_COLOR_ARGB;
128 eventAttrib.payloadType = NVTX_PAYLOAD_TYPE_INT64;
129 eventAttrib.payload.llValue = 123;
130 eventAttrib.category = 42;
131 nvtxRangePushEx(&eventAttrib);
132 #elif defined(__OFFLOAD_HIP) && defined(__HIP_PLATFORM_AMD__)
133 roctxRangePushA(message);
144 #if defined(__OFFLOAD_PROFILING)
145 #if defined(__OFFLOAD_CUDA)
147 #elif defined(__OFFLOAD_HIP) && defined(__HIP_PLATFORM_AMD__)
158 #if defined(__OFFLOAD_CUDA)
159 OFFLOAD_CHECK(cudaMemGetInfo(free, total));
160 #elif defined(__OFFLOAD_HIP)
161 OFFLOAD_CHECK(hipMemGetInfo(free, total));
162 #elif defined(__OFFLOAD_OPENCL)
163 OFFLOAD_CHECK(c_dbcsr_acc_dev_mem_info(free, total));
170 #if defined(__OFFLOAD)
171 offloadMallocHost(ptr__, size__);
172 return offloadSuccess;
174 *ptr__ = malloc(size__);
180 #if defined(__OFFLOAD)
181 offloadFreeHost(ptr__);
182 return offloadSuccess;
static unsigned int hash(const dbm_task_t task)
Private hash function based on Szudzik's elegant pairing. Using unsigned int to return a positive num...
static void const int const int i
const uint32_t colormap[]
int offload_get_device_count(void)
Returns the number of available devices.
void offload_mem_info(size_t *free, size_t *total)
Gets free and total device memory.
void offload_activate_chosen_device(void)
Activates the device selected via offload_set_chosen_device()
int offload_host_free(void *ptr__)
free pinned memory (or simple free when there is no gpu)
static int chosen_device_id
void offload_timestop(void)
Ends a timing range.
void offload_timeset(const char *message)
Starts a timing range.
int offload_get_chosen_device(void)
Returns the chosen device.
void offload_set_chosen_device(int device_id)
Selects the chosen device to be used.
void offload_init(void)
Initialize runtime.
int offload_host_malloc(void **ptr__, const size_t size__)
Allocate pinned memory (or simple malloc when there is no gpu)