8#include "../mpiwrap/cp_mpi.h"
20#if defined(__parallel)
24#define OFFLOAD_MEMPOOL_PRINT(FN, MSG, OUTPUT_UNIT) \
25 ((FN)(MSG, (int)strlen(MSG), OUTPUT_UNIT))
26#define OFFLOAD_MEMPOOL_OMPALLOC 1
71 offload_activate_chosen_device();
72 offloadMalloc(&memory, size);
74 offload_activate_chosen_device();
75 offloadMallocHost(&memory, size);
77#elif OFFLOAD_MEMPOOL_OMPALLOC && (201811 <= _OPENMP)
78 memory = omp_alloc(size, omp_null_allocator);
79#elif defined(__parallel) && !OFFLOAD_MEMPOOL_OMPALLOC
80 if (MPI_SUCCESS != MPI_Alloc_mem((MPI_Aint)size, MPI_INFO_NULL, &memory)) {
81 fprintf(stderr,
"ERROR: MPI_Alloc_mem failed at %s:%i\n", name, __FILE__,
83 MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
86 memory = malloc(size);
98 assert(memory != NULL);
107 if (NULL == memory) {
111#if defined(__OFFLOAD)
113 offload_activate_chosen_device();
116 offload_activate_chosen_device();
117 offloadFreeHost(memory);
119#elif OFFLOAD_MEMPOOL_OMPALLOC && (201811 <= _OPENMP)
121 omp_free(memory, omp_null_allocator);
122#elif defined(__parallel) && !OFFLOAD_MEMPOOL_OMPALLOC
124 if (MPI_SUCCESS != MPI_Free_mem(memory)) {
125 fprintf(stderr,
"ERROR: MPI_Free_mem failed at %s:%i\n", name, __FILE__,
127 MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
140 const bool on_device) {
147#pragma omp critical(offload_mempool_modify)
153 while (*indirect != NULL) {
154 const size_t s = (*indirect)->
size;
155 if (size <= s && (reuse == NULL || s < (*reuse)->
size)) {
160 }
else if (reclaim == NULL || (*reclaim)->size < s) {
163 indirect = &(*indirect)->
next;
170 *reuse = chunk->
next;
171 }
else if (reclaim != NULL) {
174 *reclaim = chunk->
next;
178 assert(chunk != NULL);
183 if (chunk->
size < size) {
192#pragma omp critical(offload_mempool_modify)
226#pragma omp critical(offload_mempool_modify)
230 while (*indirect != NULL && (*indirect)->
mem != mem) {
231 indirect = &(*indirect)->
next;
234 assert(chunk != NULL && chunk->
mem == mem);
237 *indirect = chunk->
next;
266 const bool on_device) {
267#pragma omp critical(offload_mempool_modify)
296 uint64_t size_sum = 0;
298 chunk = chunk->
next) {
299 size_sum += chunk->size;
309 uint64_t used_sum = 0;
311 chunk = chunk->
next) {
312 used_sum += chunk->used;
322 assert(NULL != memstats);
323#pragma omp critical(offload_mempool_modify)
343void offload_mempool_stats_print(
int fortran_comm,
346 assert(omp_get_num_threads() == 1);
359 " ----------------------------------------------------------------"
370 " - OFFLOAD MEMPOOL STATISTICS "
380 " ----------------------------------------------------------------"
384 " Memory consumption "
385 " Number of allocations Used [MiB] Size [MiB]\n",
390 snprintf(buffer,
sizeof(buffer),
392 " %20" PRIuPTR
" %10" PRIuPTR
" %10" PRIuPTR
"\n",
394 (uintptr_t)((memstats.
device_used + (512U << 10)) >> 20),
395 (uintptr_t)((memstats.
device_size + (512U << 10)) >> 20));
400 snprintf(buffer,
sizeof(buffer),
402 " %20" PRIuPTR
" %10" PRIuPTR
" %10" PRIuPTR
"\n",
404 (uintptr_t)((memstats.
host_used + (512U << 10)) >> 20),
405 (uintptr_t)((memstats.
host_size + (512U << 10)) >> 20));
411 " ----------------------------------------------------------------"
void cp_mpi_max_uint64(uint64_t *values, const int count, const cp_mpi_comm_t comm)
Wrapper around MPI_Allreduce for op MPI_MAX and datatype MPI_UINT64_T.
cp_mpi_comm_t cp_mpi_comm_f2c(const int fortran_comm)
Wrapper around MPI_Comm_f2c.
static void print_func(const char *msg, int, int output_unit)
Wrapper for printf, passed to dbm_library_print_stats.
static void internal_mempool_free(offload_mempool_t *pool, const void *mem)
Private routine for releasing memory back to the pool.
struct offload_memchunk offload_memchunk_t
Private struct for storing a chunk of memory.
static offload_mempool_t mempool_device
static void actual_free(void *memory, const bool on_device)
Private routine for actually freeing system memory.
void offload_mempool_host_free(const void *memory)
Internal routine for releasing memory back to the pool.
void offload_mempool_clear(void)
Internal routine for freeing all memory in the pool.
void offload_mempool_device_free(const void *memory)
Internal routine for releasing memory back to the pool.
static void * internal_mempool_malloc(offload_mempool_t *pool, const size_t size, const bool on_device)
Private routine for allocating host or device memory from the pool.
#define OFFLOAD_MEMPOOL_PRINT(FN, MSG, OUTPUT_UNIT)
static uint64_t host_malloc_counter
Private some counters for statistics.
void offload_mempool_stats_get(offload_mempool_stats_t *memstats)
Internal routine to query statistics.
static void * actual_malloc(const size_t size, const bool on_device)
Private routine for actually allocating system memory.
static void internal_mempool_clear(offload_mempool_t *pool, const bool on_device)
Private routine for freeing all memory in the pool.
static uint64_t sum_chunks_used(const offload_memchunk_t *head)
Private routine for summing used sizes of all chunks in given list.
struct offload_mempool offload_mempool_t
Private struct for storing a memory pool.
static uint64_t device_malloc_counter
void * offload_mempool_host_malloc(const size_t size)
Internal routine for allocating host memory from the pool.
void * offload_mempool_device_malloc(const size_t size)
Internal routine for allocating device memory from the pool.
static offload_mempool_t mempool_host
Private pools for host and device memory.
static uint64_t sum_chunks_size(const offload_memchunk_t *head)
Private routine for summing alloc sizes of all chunks in given list.
Private struct for storing a chunk of memory.
struct offload_memchunk * next
Internal struct for pool statistics.
Private struct for storing a memory pool.
offload_memchunk_t * allocated_head
offload_memchunk_t * available_head