(git:e966546)
Loading...
Searching...
No Matches
offload_mempool.c
Go to the documentation of this file.
1/*----------------------------------------------------------------------------*/
2/* CP2K: A general program to perform molecular dynamics simulations */
3/* Copyright 2000-2025 CP2K developers group <https://cp2k.org> */
4/* */
5/* SPDX-License-Identifier: BSD-3-Clause */
6/*----------------------------------------------------------------------------*/
7#include "offload_mempool.h"
8#include "../mpiwrap/cp_mpi.h"
9#include "offload_library.h"
10#include "offload_runtime.h"
11
12#include <assert.h>
13#include <inttypes.h>
14#include <omp.h>
15#include <stdbool.h>
16#include <stdio.h>
17#include <stdlib.h>
18#include <string.h>
19
20#if defined(__parallel)
21#include <mpi.h>
22#endif
23
24#define OFFLOAD_MEMPOOL_PRINT(FN, MSG, OUTPUT_UNIT) \
25 ((FN)(MSG, (int)strlen(MSG), OUTPUT_UNIT))
26#define OFFLOAD_MEMPOOL_OMPALLOC 1
27
28/*******************************************************************************
29 * \brief Private struct for storing a chunk of memory.
30 * \author Ole Schuett
31 ******************************************************************************/
32typedef struct offload_memchunk {
33 void *mem; // first: allows to cast memchunk into mem-ptr...
35 size_t size, used;
37
38/*******************************************************************************
39 * \brief Private struct for storing a memory pool.
40 * \author Ole Schuett
41 ******************************************************************************/
45
46/*******************************************************************************
47 * \brief Private pools for host and device memory.
48 * \author Ole Schuett
49 ******************************************************************************/
51
52/*******************************************************************************
53 * \brief Private some counters for statistics.
54 * \author Hans Pabst
55 ******************************************************************************/
57
58/*******************************************************************************
59 * \brief Private routine for actually allocating system memory.
60 * \author Ole Schuett
61 ******************************************************************************/
62static void *actual_malloc(const size_t size, const bool on_device) {
63 if (size == 0) {
64 return NULL;
65 }
66
67 void *memory = NULL;
68
69#if defined(__OFFLOAD)
70 if (on_device) {
71 offload_activate_chosen_device();
72 offloadMalloc(&memory, size);
73 } else {
74 offload_activate_chosen_device();
75 offloadMallocHost(&memory, size);
76 }
77#elif OFFLOAD_MEMPOOL_OMPALLOC && (201811 /*v5.0*/ <= _OPENMP)
78 memory = omp_alloc(size, omp_null_allocator);
79#elif defined(__parallel) && !OFFLOAD_MEMPOOL_OMPALLOC
80 if (MPI_SUCCESS != MPI_Alloc_mem((MPI_Aint)size, MPI_INFO_NULL, &memory)) {
81 fprintf(stderr, "ERROR: MPI_Alloc_mem failed at %s:%i\n", name, __FILE__,
82 __LINE__);
83 MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
84 }
85#else
86 memory = malloc(size);
87#endif
88
89 // Update statistics.
90 if (on_device) {
91#pragma omp atomic
93 } else {
94#pragma omp atomic
96 }
97
98 assert(memory != NULL);
99 return memory;
100}
101
102/*******************************************************************************
103 * \brief Private routine for actually freeing system memory.
104 * \author Ole Schuett
105 ******************************************************************************/
106static void actual_free(void *memory, const bool on_device) {
107 if (NULL == memory) {
108 return;
109 }
110
111#if defined(__OFFLOAD)
112 if (on_device) {
113 offload_activate_chosen_device();
114 offloadFree(memory);
115 } else {
116 offload_activate_chosen_device();
117 offloadFreeHost(memory);
118 }
119#elif OFFLOAD_MEMPOOL_OMPALLOC && (201811 /*v5.0*/ <= _OPENMP)
120 (void)on_device; // mark used
121 omp_free(memory, omp_null_allocator);
122#elif defined(__parallel) && !OFFLOAD_MEMPOOL_OMPALLOC
123 (void)on_device; // mark used
124 if (MPI_SUCCESS != MPI_Free_mem(memory)) {
125 fprintf(stderr, "ERROR: MPI_Free_mem failed at %s:%i\n", name, __FILE__,
126 __LINE__);
127 MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
128 }
129#else
130 (void)on_device; // mark used
131 free(memory);
132#endif
133}
134
135/*******************************************************************************
136 * \brief Private routine for allocating host or device memory from the pool.
137 * \author Ole Schuett and Hans Pabst
138 ******************************************************************************/
139static void *internal_mempool_malloc(offload_mempool_t *pool, const size_t size,
140 const bool on_device) {
141 if (size == 0) {
142 return NULL;
143 }
144
145 offload_memchunk_t *chunk;
146
147#pragma omp critical(offload_mempool_modify)
148 {
149 // Find a possible chunk to reuse or reclaim in available list.
150 offload_memchunk_t **reuse = NULL,
151 **reclaim = NULL; // ** for easy list removal
152 offload_memchunk_t **indirect = &pool->available_head;
153 while (*indirect != NULL) {
154 const size_t s = (*indirect)->size;
155 if (size <= s && (reuse == NULL || s < (*reuse)->size)) {
156 reuse = indirect; // reuse smallest suitable chunk
157 if (s == size) {
158 break; // perfect match, exit early
159 }
160 } else if (reclaim == NULL || (*reclaim)->size < s) {
161 reclaim = indirect; // reclaim largest unsuitable chunk
162 }
163 indirect = &(*indirect)->next;
164 }
165
166 // Select an existing chunk or allocate a new one.
167 if (reuse != NULL) {
168 // Reusing an exising chunk that's already large enough.
169 chunk = *reuse;
170 *reuse = chunk->next; // remove chunk from available list.
171 } else if (reclaim != NULL) {
172 // Reclaiming an existing chunk (resize will happen outside crit. region).
173 chunk = *reclaim;
174 *reclaim = chunk->next; // remove chunk from available list.
175 } else {
176 // Found no available chunk, allocate a new one.
177 chunk = calloc(1, sizeof(offload_memchunk_t));
178 assert(chunk != NULL);
179 }
180 }
181
182 // Resize chunk outside of critical region before adding it to allocated list.
183 if (chunk->size < size) {
184 actual_free(chunk->mem, on_device);
185 chunk->mem = actual_malloc(size, on_device);
186 chunk->size = size;
187 }
188
189 chunk->used = size; // for statistics
190
191 // Insert chunk into allocated list.
192#pragma omp critical(offload_mempool_modify)
193 {
194 chunk->next = pool->allocated_head;
195 pool->allocated_head = chunk;
196 }
197
198 return chunk->mem;
199}
200
201/*******************************************************************************
202 * \brief Internal routine for allocating host memory from the pool.
203 * \author Ole Schuett
204 ******************************************************************************/
205void *offload_mempool_host_malloc(const size_t size) {
206 return internal_mempool_malloc(&mempool_host, size, false);
207}
208
209/*******************************************************************************
210 * \brief Internal routine for allocating device memory from the pool
211 * \author Ole Schuett
212 ******************************************************************************/
213void *offload_mempool_device_malloc(const size_t size) {
214 return internal_mempool_malloc(&mempool_device, size, true);
215}
216
217/*******************************************************************************
218 * \brief Private routine for releasing memory back to the pool.
219 * \author Ole Schuett
220 ******************************************************************************/
221static void internal_mempool_free(offload_mempool_t *pool, const void *mem) {
222 if (mem == NULL) {
223 return;
224 }
225
226#pragma omp critical(offload_mempool_modify)
227 {
228 // Find chunk in allocated list.
229 offload_memchunk_t **indirect = &pool->allocated_head;
230 while (*indirect != NULL && (*indirect)->mem != mem) {
231 indirect = &(*indirect)->next;
232 }
233 offload_memchunk_t *chunk = *indirect;
234 assert(chunk != NULL && chunk->mem == mem);
235
236 // Remove chunk from allocated list.
237 *indirect = chunk->next;
238
239 // Add chunk to available list.
240 chunk->next = pool->available_head;
241 pool->available_head = chunk;
242 }
243}
244
245/*******************************************************************************
246 * \brief Internal routine for releasing memory back to the pool.
247 * \author Ole Schuett
248 ******************************************************************************/
249void offload_mempool_host_free(const void *memory) {
251}
252
253/*******************************************************************************
254 * \brief Internal routine for releasing memory back to the pool.
255 * \author Ole Schuett
256 ******************************************************************************/
257void offload_mempool_device_free(const void *memory) {
259}
260
261/*******************************************************************************
262 * \brief Private routine for freeing all memory in the pool.
263 * \author Ole Schuett
264 ******************************************************************************/
266 const bool on_device) {
267#pragma omp critical(offload_mempool_modify)
268 {
269 // Check for leaks, i.e. that the allocated list is empty.
270 assert(pool->allocated_head == NULL);
271
272 // Free all chunks in available list.
273 while (pool->available_head != NULL) {
274 offload_memchunk_t *chunk = pool->available_head;
275 pool->available_head = chunk->next; // remove chunk
276 actual_free(chunk->mem, on_device);
277 free(chunk);
278 }
279 }
280}
281
282/*******************************************************************************
283 * \brief Internal routine for freeing all memory in the pool.
284 * \author Ole Schuett
285 ******************************************************************************/
290
291/*******************************************************************************
292 * \brief Private routine for summing alloc sizes of all chunks in given list.
293 * \author Ole Schuett
294 ******************************************************************************/
295static uint64_t sum_chunks_size(const offload_memchunk_t *head) {
296 uint64_t size_sum = 0;
297 for (const offload_memchunk_t *chunk = head; chunk != NULL;
298 chunk = chunk->next) {
299 size_sum += chunk->size;
300 }
301 return size_sum;
302}
303
304/*******************************************************************************
305 * \brief Private routine for summing used sizes of all chunks in given list.
306 * \author Ole Schuett
307 ******************************************************************************/
308static uint64_t sum_chunks_used(const offload_memchunk_t *head) {
309 uint64_t used_sum = 0;
310 for (const offload_memchunk_t *chunk = head; chunk != NULL;
311 chunk = chunk->next) {
312 used_sum += chunk->used;
313 }
314 return used_sum;
315}
316
317/*******************************************************************************
318 * \brief Internal routine to query statistics.
319 * \author Hans Pabst
320 ******************************************************************************/
338
339/*******************************************************************************
340 * \brief Print allocation statistics..
341 * \author Hans Pabst
342 ******************************************************************************/
343void offload_mempool_stats_print(int fortran_comm,
344 void (*print_func)(const char *, int, int),
345 int output_unit) {
346 assert(omp_get_num_threads() == 1);
347
348 char buffer[100];
349 const cp_mpi_comm_t comm = cp_mpi_comm_f2c(fortran_comm);
351 offload_mempool_stats_get(&memstats);
352 cp_mpi_max_uint64(&memstats.device_mallocs, 1, comm);
353 cp_mpi_max_uint64(&memstats.host_mallocs, 1, comm);
354
355 if (0 != memstats.device_mallocs || 0 != memstats.host_mallocs) {
356 OFFLOAD_MEMPOOL_PRINT(print_func, "\n", output_unit);
359 " ----------------------------------------------------------------"
360 "---------------\n",
361 output_unit);
364 " - "
365 " -\n",
366 output_unit);
367
370 " - OFFLOAD MEMPOOL STATISTICS "
371 " -\n",
372 output_unit);
375 " - "
376 " -\n",
377 output_unit);
380 " ----------------------------------------------------------------"
381 "---------------\n",
382 output_unit);
384 " Memory consumption "
385 " Number of allocations Used [MiB] Size [MiB]\n",
386 output_unit);
387 }
388 if (0 < memstats.device_mallocs) {
389 cp_mpi_max_uint64(&memstats.device_size, 1, comm);
390 snprintf(buffer, sizeof(buffer),
391 " Device "
392 " %20" PRIuPTR " %10" PRIuPTR " %10" PRIuPTR "\n",
393 (uintptr_t)memstats.device_mallocs,
394 (uintptr_t)((memstats.device_used + (512U << 10)) >> 20),
395 (uintptr_t)((memstats.device_size + (512U << 10)) >> 20));
396 OFFLOAD_MEMPOOL_PRINT(print_func, buffer, output_unit);
397 }
398 if (0 < memstats.host_mallocs) {
399 cp_mpi_max_uint64(&memstats.host_size, 1, comm);
400 snprintf(buffer, sizeof(buffer),
401 " Host "
402 " %20" PRIuPTR " %10" PRIuPTR " %10" PRIuPTR "\n",
403 (uintptr_t)memstats.host_mallocs,
404 (uintptr_t)((memstats.host_used + (512U << 10)) >> 20),
405 (uintptr_t)((memstats.host_size + (512U << 10)) >> 20));
406 OFFLOAD_MEMPOOL_PRINT(print_func, buffer, output_unit);
407 }
408 if (0 < memstats.device_mallocs || 0 < memstats.host_mallocs) {
411 " ----------------------------------------------------------------"
412 "---------------\n",
413 output_unit);
414 }
415}
416
417// EOF
void cp_mpi_max_uint64(uint64_t *values, const int count, const cp_mpi_comm_t comm)
Wrapper around MPI_Allreduce for op MPI_MAX and datatype MPI_UINT64_T.
Definition cp_mpi.c:261
cp_mpi_comm_t cp_mpi_comm_f2c(const int fortran_comm)
Wrapper around MPI_Comm_f2c.
Definition cp_mpi.c:69
int cp_mpi_comm_t
Definition cp_mpi.h:18
static void print_func(const char *msg, int, int output_unit)
Wrapper for printf, passed to dbm_library_print_stats.
Definition dbm_miniapp.c:29
static void internal_mempool_free(offload_mempool_t *pool, const void *mem)
Private routine for releasing memory back to the pool.
struct offload_memchunk offload_memchunk_t
Private struct for storing a chunk of memory.
static offload_mempool_t mempool_device
static void actual_free(void *memory, const bool on_device)
Private routine for actually freeing system memory.
void offload_mempool_host_free(const void *memory)
Internal routine for releasing memory back to the pool.
void offload_mempool_clear(void)
Internal routine for freeing all memory in the pool.
void offload_mempool_device_free(const void *memory)
Internal routine for releasing memory back to the pool.
static void * internal_mempool_malloc(offload_mempool_t *pool, const size_t size, const bool on_device)
Private routine for allocating host or device memory from the pool.
#define OFFLOAD_MEMPOOL_PRINT(FN, MSG, OUTPUT_UNIT)
static uint64_t host_malloc_counter
Private some counters for statistics.
void offload_mempool_stats_get(offload_mempool_stats_t *memstats)
Internal routine to query statistics.
static void * actual_malloc(const size_t size, const bool on_device)
Private routine for actually allocating system memory.
static void internal_mempool_clear(offload_mempool_t *pool, const bool on_device)
Private routine for freeing all memory in the pool.
static uint64_t sum_chunks_used(const offload_memchunk_t *head)
Private routine for summing used sizes of all chunks in given list.
struct offload_mempool offload_mempool_t
Private struct for storing a memory pool.
static uint64_t device_malloc_counter
void * offload_mempool_host_malloc(const size_t size)
Internal routine for allocating host memory from the pool.
void * offload_mempool_device_malloc(const size_t size)
Internal routine for allocating device memory from the pool.
static offload_mempool_t mempool_host
Private pools for host and device memory.
static uint64_t sum_chunks_size(const offload_memchunk_t *head)
Private routine for summing alloc sizes of all chunks in given list.
Private struct for storing a chunk of memory.
struct offload_memchunk * next
Internal struct for pool statistics.
Private struct for storing a memory pool.
offload_memchunk_t * allocated_head
offload_memchunk_t * available_head