(git:ed6f26b)
Loading...
Searching...
No Matches
grid_task_list.c
Go to the documentation of this file.
1/*----------------------------------------------------------------------------*/
2/* CP2K: A general program to perform molecular dynamics simulations */
3/* Copyright 2000-2025 CP2K developers group <https://cp2k.org> */
4/* */
5/* SPDX-License-Identifier: BSD-3-Clause */
6/*----------------------------------------------------------------------------*/
7
8#include <assert.h>
9#include <math.h>
10#include <stddef.h>
11#include <stdio.h>
12#include <stdlib.h>
13#include <string.h>
14
15#include "common/grid_common.h"
17#include "common/grid_library.h"
18#include "grid_task_list.h"
19
20/*******************************************************************************
21 * \brief Allocates a task list which can be passed to grid_collocate_task_list.
22 * See grid_task_list.h for details.
23 * \author Ole Schuett
24 ******************************************************************************/
25void grid_create_task_list(
26 const bool orthorhombic, const int ntasks, const int nlevels,
27 const int natoms, const int nkinds, const int nblocks,
28 const int block_offsets[nblocks], const double atom_positions[natoms][3],
29 const int atom_kinds[natoms], const grid_basis_set *basis_sets[nkinds],
30 const int level_list[ntasks], const int iatom_list[ntasks],
31 const int jatom_list[ntasks], const int iset_list[ntasks],
32 const int jset_list[ntasks], const int ipgf_list[ntasks],
33 const int jpgf_list[ntasks], const int border_mask_list[ntasks],
34 const int block_num_list[ntasks], const double radius_list[ntasks],
35 const double rab_list[ntasks][3], const int npts_global[nlevels][3],
36 const int npts_local[nlevels][3], const int shift_local[nlevels][3],
37 const int border_width[nlevels][3], const double dh[nlevels][3][3],
38 const double dh_inv[nlevels][3][3], grid_task_list **task_list_out) {
39
41
42 grid_task_list *task_list = NULL;
43
44 if (*task_list_out == NULL) {
45 task_list = malloc(sizeof(grid_task_list));
46 assert(task_list != NULL);
47 memset(task_list, 0, sizeof(grid_task_list));
48
49 // Resolve AUTO to a concrete backend.
51
52#if defined(__OFFLOAD_HIP) && !defined(__NO_OFFLOAD_GRID)
53 task_list->backend = GRID_BACKEND_HIP;
54#elif defined(__OFFLOAD) && !defined(__NO_OFFLOAD_GRID)
55 task_list->backend = GRID_BACKEND_GPU;
56#else
57 task_list->backend = GRID_BACKEND_CPU;
58#endif
59 } else {
60 task_list->backend = config.backend;
61 }
62 } else {
63 // Reuse existing task list.
64 task_list = *task_list_out;
65 free(task_list->npts_local);
66 }
67
68 // Store npts_local for bounds checking and validation.
69 task_list->nlevels = nlevels;
70 size_t size = nlevels * 3 * sizeof(int);
71 task_list->npts_local = malloc(size);
72 assert(task_list->npts_local != NULL);
73 memcpy(task_list->npts_local, npts_local, size);
74
75 // Always create reference backend because it might be needed for validation.
77 orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
78 atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
79 jatom_list, iset_list, jset_list, ipgf_list, jpgf_list, border_mask_list,
80 block_num_list, radius_list, rab_list, npts_global, npts_local,
81 shift_local, border_width, dh, dh_inv, &task_list->ref);
82
83 // Create other backend, if selected.
84 switch (task_list->backend) {
86 break; // was already created above
89 orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
90 atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
91 jatom_list, iset_list, jset_list, ipgf_list, jpgf_list,
92 border_mask_list, block_num_list, radius_list, rab_list, npts_global,
93 npts_local, shift_local, border_width, dh, dh_inv, &task_list->cpu);
94 break;
97 orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
98 atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
99 jatom_list, iset_list, jset_list, ipgf_list, jpgf_list,
100 border_mask_list, block_num_list, radius_list, rab_list, npts_global,
101 npts_local, shift_local, border_width, dh, dh_inv, &task_list->dgemm);
102 break;
103
104 case GRID_BACKEND_GPU:
105#if (defined(__OFFLOAD) && !defined(__NO_OFFLOAD_GRID))
106 grid_gpu_create_task_list(
107 orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
108 atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
109 jatom_list, iset_list, jset_list, ipgf_list, jpgf_list,
110 border_mask_list, block_num_list, radius_list, rab_list, npts_global,
111 npts_local, shift_local, border_width, dh, dh_inv, &task_list->gpu);
112#else
113 fprintf(stderr,
114 "Error: The GPU grid backend is not available. "
115 "Please re-compile with -D__OFFLOAD_CUDA or -D__OFFLOAD_HIP");
116 abort();
117#endif
118 break;
119
120 case GRID_BACKEND_HIP:
121#if defined(__OFFLOAD_HIP) && !defined(__NO_OFFLOAD_GRID)
122 grid_hip_create_task_list(
123 orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
124 &atom_positions[0][0], atom_kinds, basis_sets, level_list, iatom_list,
125 jatom_list, iset_list, jset_list, ipgf_list, jpgf_list,
126 border_mask_list, block_num_list, radius_list, &rab_list[0][0],
127 &npts_global[0][0], &npts_local[0][0], &shift_local[0][0],
128 &border_width[0][0], &dh[0][0][0], &dh_inv[0][0][0], &task_list->hip);
129#else
130 fprintf(stderr, "Error: The HIP grid backend is not available. "
131 "Please re-compile with -D__OFFLOAD_HIP");
132 abort();
133#endif
134 break;
135
136 default:
137 printf("Error: Unknown grid backend: %i.\n", config.backend);
138 abort();
139 break;
140 }
141
142 *task_list_out = task_list;
143}
144
145/*******************************************************************************
146 * \brief Deallocates given task list, basis_sets have to be freed separately.
147 * \author Ole Schuett
148 ******************************************************************************/
149void grid_free_task_list(grid_task_list *task_list) {
150
151 if (task_list->ref != NULL) {
152 grid_ref_free_task_list(task_list->ref);
153 task_list->ref = NULL;
154 }
155 if (task_list->cpu != NULL) {
156 grid_cpu_free_task_list(task_list->cpu);
157 task_list->cpu = NULL;
158 }
159 if (task_list->dgemm != NULL) {
161 task_list->dgemm = NULL;
162 }
163#if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_GRID)
164 if (task_list->gpu != NULL) {
165 grid_gpu_free_task_list(task_list->gpu);
166 task_list->gpu = NULL;
167 }
168#endif
169#if defined(__OFFLOAD_HIP) && !defined(__NO_OFFLOAD_GRID)
170 if (task_list->hip != NULL) {
171 grid_hip_free_task_list(task_list->hip);
172 task_list->hip = NULL;
173 }
174#endif
175
176 free(task_list->npts_local);
177 free(task_list);
178}
179
180/*******************************************************************************
181 * \brief Collocate all tasks of in given list onto given grids.
182 * See grid_task_list.h for details.
183 * \author Ole Schuett
184 ******************************************************************************/
185void grid_collocate_task_list(const grid_task_list *task_list,
186 const enum grid_func func, const int nlevels,
187 const int npts_local[nlevels][3],
188 const offload_buffer *pab_blocks,
189 offload_buffer *grids[nlevels]) {
190
191 // Bounds check.
192 assert(task_list->nlevels == nlevels);
193 for (int ilevel = 0; ilevel < nlevels; ilevel++) {
194 assert(task_list->npts_local[ilevel][0] == npts_local[ilevel][0]);
195 assert(task_list->npts_local[ilevel][1] == npts_local[ilevel][1]);
196 assert(task_list->npts_local[ilevel][2] == npts_local[ilevel][2]);
197 }
198
199 switch (task_list->backend) {
200 case GRID_BACKEND_REF:
201 grid_ref_collocate_task_list(task_list->ref, func, nlevels, pab_blocks,
202 grids);
203 break;
204 case GRID_BACKEND_CPU:
205 grid_cpu_collocate_task_list(task_list->cpu, func, nlevels, pab_blocks,
206 grids);
207 break;
209 grid_dgemm_collocate_task_list(task_list->dgemm, func, nlevels, pab_blocks,
210 grids);
211 break;
212#if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_GRID)
213 case GRID_BACKEND_GPU:
214 grid_gpu_collocate_task_list(task_list->gpu, func, nlevels, pab_blocks,
215 grids);
216 break;
217#endif
218#if defined(__OFFLOAD_HIP) && !defined(__NO_OFFLOAD_GRID)
219 case GRID_BACKEND_HIP:
220 grid_hip_collocate_task_list(task_list->hip, func, nlevels, pab_blocks,
221 grids);
222 break;
223#endif
224 default:
225 printf("Error: Unknown grid backend: %i.\n", task_list->backend);
226 abort();
227 break;
228 }
229
230 // Perform validation if enabled.
231 if (grid_library_get_config().validate) {
232 // Allocate space for reference results.
233 offload_buffer *grids_ref[nlevels];
234 for (int level = 0; level < nlevels; level++) {
235 const int npts_local_total =
236 npts_local[level][0] * npts_local[level][1] * npts_local[level][2];
237 grids_ref[level] = NULL;
238 offload_create_buffer(npts_local_total, &grids_ref[level]);
239 }
240
241 // Call reference implementation.
242 grid_ref_collocate_task_list(task_list->ref, func, nlevels, pab_blocks,
243 grids_ref);
244
245 // Compare results.
246 const double tolerance = 1e-12;
247 double max_rel_diff = 0.0;
248 for (int level = 0; level < nlevels; level++) {
249 for (int i = 0; i < npts_local[level][0]; i++) {
250 for (int j = 0; j < npts_local[level][1]; j++) {
251 for (int k = 0; k < npts_local[level][2]; k++) {
252 const int idx = k * npts_local[level][1] * npts_local[level][0] +
253 j * npts_local[level][0] + i;
254 const double ref_value = grids_ref[level]->host_buffer[idx];
255 const double test_value = grids[level]->host_buffer[idx];
256 const double diff = fabs(test_value - ref_value);
257 const double rel_diff = diff / fmax(1.0, fabs(ref_value));
258 max_rel_diff = fmax(max_rel_diff, rel_diff);
259 if (rel_diff > tolerance) {
260 fprintf(stderr, "Error: Validation failure in grid collocate\n");
261 fprintf(stderr, " diff: %le\n", diff);
262 fprintf(stderr, " rel_diff: %le\n", rel_diff);
263 fprintf(stderr, " value: %le\n", ref_value);
264 fprintf(stderr, " level: %i\n", level);
265 fprintf(stderr, " ijk: %i %i %i\n", i, j, k);
266 abort();
267 }
268 }
269 }
270 }
271 offload_free_buffer(grids_ref[level]);
272 printf("Validated grid collocate, max rel. diff: %le\n", max_rel_diff);
273 }
274 }
275}
276
277/*******************************************************************************
278 * \brief Integrate all tasks of in given list from given grids.
279 * See grid_task_list.h for details.
280 * \author Ole Schuett
281 ******************************************************************************/
282void grid_integrate_task_list(
283 const grid_task_list *task_list, const bool compute_tau, const int natoms,
284 const int nlevels, const int npts_local[nlevels][3],
285 const offload_buffer *pab_blocks, const offload_buffer *grids[nlevels],
286 offload_buffer *hab_blocks, double forces[natoms][3], double virial[3][3]) {
287
288 // Bounds check.
289 assert(task_list->nlevels == nlevels);
290 for (int ilevel = 0; ilevel < nlevels; ilevel++) {
291 assert(task_list->npts_local[ilevel][0] == npts_local[ilevel][0]);
292 assert(task_list->npts_local[ilevel][1] == npts_local[ilevel][1]);
293 assert(task_list->npts_local[ilevel][2] == npts_local[ilevel][2]);
294 }
295
296 assert(forces == NULL || pab_blocks != NULL);
297 assert(virial == NULL || pab_blocks != NULL);
298
299 switch (task_list->backend) {
300#if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_GRID)
301 case GRID_BACKEND_GPU:
302 grid_gpu_integrate_task_list(task_list->gpu, compute_tau, natoms, nlevels,
303 pab_blocks, grids, hab_blocks, forces, virial);
304 break;
305#endif
306#if defined(__OFFLOAD_HIP) && !defined(__NO_OFFLOAD_GRID)
307 case GRID_BACKEND_HIP:
308 grid_hip_integrate_task_list(task_list->hip, compute_tau, nlevels,
309 pab_blocks, grids, hab_blocks, &forces[0][0],
310 &virial[0][0]);
311 break;
312#endif
314 grid_dgemm_integrate_task_list(task_list->dgemm, compute_tau, natoms,
315 nlevels, pab_blocks, grids, hab_blocks,
316 forces, virial);
317 break;
318 case GRID_BACKEND_CPU:
319 grid_cpu_integrate_task_list(task_list->cpu, compute_tau, natoms, nlevels,
320 pab_blocks, grids, hab_blocks, forces, virial);
321 break;
322 case GRID_BACKEND_REF:
323 grid_ref_integrate_task_list(task_list->ref, compute_tau, natoms, nlevels,
324 pab_blocks, grids, hab_blocks, forces, virial);
325 break;
326 default:
327 printf("Error: Unknown grid backend: %i.\n", task_list->backend);
328 abort();
329 break;
330 }
331
332 // Perform validation if enabled.
333 if (grid_library_get_config().validate) {
334 // Allocate space for reference results.
335 const int hab_length = hab_blocks->size / sizeof(double);
336 offload_buffer *hab_blocks_ref = NULL;
337 offload_create_buffer(hab_length, &hab_blocks_ref);
338 double forces_ref[natoms][3], virial_ref[3][3];
339
340 // Call reference implementation.
341 grid_ref_integrate_task_list(task_list->ref, compute_tau, natoms, nlevels,
342 pab_blocks, grids, hab_blocks_ref,
343 (forces != NULL) ? forces_ref : NULL,
344 (virial != NULL) ? virial_ref : NULL);
345
346 // Compare hab.
347 const double hab_tolerance = 1e-12;
348 double hab_max_rel_diff = 0.0;
349 for (int i = 0; i < hab_length; i++) {
350 const double ref_value = hab_blocks_ref->host_buffer[i];
351 const double test_value = hab_blocks->host_buffer[i];
352 const double diff = fabs(test_value - ref_value);
353 const double rel_diff = diff / fmax(1.0, fabs(ref_value));
354 hab_max_rel_diff = fmax(hab_max_rel_diff, rel_diff);
355 if (rel_diff > hab_tolerance) {
356 fprintf(stderr, "Error: Validation failure in grid integrate\n");
357 fprintf(stderr, " hab diff: %le\n", diff);
358 fprintf(stderr, " hab rel_diff: %le\n", rel_diff);
359 fprintf(stderr, " hab value: %le\n", ref_value);
360 fprintf(stderr, " hab i: %i\n", i);
361 abort();
362 }
363 }
364
365 // Compare forces.
366 const double forces_tolerance = 1e-8; // account for higher numeric noise
367 double forces_max_rel_diff = 0.0;
368 if (forces != NULL) {
369 for (int iatom = 0; iatom < natoms; iatom++) {
370 for (int idir = 0; idir < 3; idir++) {
371 const double ref_value = forces_ref[iatom][idir];
372 const double test_value = forces[iatom][idir];
373 const double diff = fabs(test_value - ref_value);
374 const double rel_diff = diff / fmax(1.0, fabs(ref_value));
375 forces_max_rel_diff = fmax(forces_max_rel_diff, rel_diff);
376 if (rel_diff > forces_tolerance) {
377 fprintf(stderr, "Error: Validation failure in grid integrate\n");
378 fprintf(stderr, " forces diff: %le\n", diff);
379 fprintf(stderr, " forces rel_diff: %le\n", rel_diff);
380 fprintf(stderr, " forces value: %le\n", ref_value);
381 fprintf(stderr, " forces atom: %i\n", iatom);
382 fprintf(stderr, " forces dir: %i\n", idir);
383 abort();
384 }
385 }
386 }
387 }
388
389 // Compare virial.
390 const double virial_tolerance = 1e-8; // account for higher numeric noise
391 double virial_max_rel_diff = 0.0;
392 if (virial != NULL) {
393 for (int i = 0; i < 3; i++) {
394 for (int j = 0; j < 3; j++) {
395 const double ref_value = virial_ref[i][j];
396 const double test_value = virial[i][j];
397 const double diff = fabs(test_value - ref_value);
398 const double rel_diff = diff / fmax(1.0, fabs(ref_value));
399 virial_max_rel_diff = fmax(virial_max_rel_diff, rel_diff);
400 if (rel_diff > virial_tolerance) {
401 fprintf(stderr, "Error: Validation failure in grid integrate\n");
402 fprintf(stderr, " virial diff: %le\n", diff);
403 fprintf(stderr, " virial rel_diff: %le\n", rel_diff);
404 fprintf(stderr, " virial value: %le\n", ref_value);
405 fprintf(stderr, " virial ij: %i %i\n", i, j);
406 abort();
407 }
408 }
409 }
410 }
411
412 printf("Validated grid_integrate, max rel. diff: %le %le %le\n",
413 hab_max_rel_diff, forces_max_rel_diff, virial_max_rel_diff);
414 offload_free_buffer(hab_blocks_ref);
415 }
416}
417
418// EOF
static GRID_HOST_DEVICE int idx(const orbital a)
Return coset index of given orbital angular momentum.
@ GRID_BACKEND_REF
@ GRID_BACKEND_DGEMM
@ GRID_BACKEND_AUTO
@ GRID_BACKEND_HIP
@ GRID_BACKEND_CPU
@ GRID_BACKEND_GPU
grid_func
static void const int const int i
static void const int const int const int const int const int const double const int const int const int npts_local[3]
void grid_cpu_free_task_list(grid_cpu_task_list *task_list)
Deallocates given task list, basis_sets have to be freed separately.
void grid_cpu_create_task_list(const bool orthorhombic, const int ntasks, const int nlevels, const int natoms, const int nkinds, const int nblocks, const int block_offsets[nblocks], const double atom_positions[natoms][3], const int atom_kinds[natoms], const grid_basis_set *basis_sets[nkinds], const int level_list[ntasks], const int iatom_list[ntasks], const int jatom_list[ntasks], const int iset_list[ntasks], const int jset_list[ntasks], const int ipgf_list[ntasks], const int jpgf_list[ntasks], const int border_mask_list[ntasks], const int block_num_list[ntasks], const double radius_list[ntasks], const double rab_list[ntasks][3], const int npts_global[nlevels][3], const int npts_local[nlevels][3], const int shift_local[nlevels][3], const int border_width[nlevels][3], const double dh[nlevels][3][3], const double dh_inv[nlevels][3][3], grid_cpu_task_list **task_list_out)
Allocates a task list for the cpu backend. See grid_task_list.h for details.
void grid_cpu_collocate_task_list(const grid_cpu_task_list *task_list, const enum grid_func func, const int nlevels, const offload_buffer *pab_blocks, offload_buffer *grids[nlevels])
Collocate all tasks of in given list onto given grids. See grid_task_list.h for details.
void grid_cpu_integrate_task_list(const grid_cpu_task_list *task_list, const bool compute_tau, const int natoms, const int nlevels, const offload_buffer *pab_blocks, const offload_buffer *grids[nlevels], offload_buffer *hab_blocks, double forces[natoms][3], double virial[3][3])
Integrate all tasks of in given list from given grids. See grid_task_list.h for details.
void grid_dgemm_collocate_task_list(grid_dgemm_task_list *const ptr, const enum grid_func func, const int nlevels, const offload_buffer *pab_blocks, offload_buffer *grids[nlevels])
Collocate all tasks of a given list onto given grids. See grid_task_list.h for details.
void grid_dgemm_create_task_list(const bool orthorhombic, const int ntasks, const int nlevels, const int natoms, const int nkinds, const int nblocks, const int block_offsets[nblocks], const double atom_positions[natoms][3], const int atom_kinds[natoms], const grid_basis_set *basis_sets[nkinds], const int level_list[ntasks], const int iatom_list[ntasks], const int jatom_list[ntasks], const int iset_list[ntasks], const int jset_list[ntasks], const int ipgf_list[ntasks], const int jpgf_list[ntasks], const int border_mask_list[ntasks], const int block_num_list[ntasks], const double radius_list[ntasks], const double rab_list[ntasks][3], const int npts_global[nlevels][3], const int npts_local[nlevels][3], const int shift_local[nlevels][3], const int border_width[nlevels][3], const double dh[nlevels][3][3], const double dh_inv[nlevels][3][3], grid_dgemm_task_list **task_list)
Allocates a task list for the dgemm backend. See grid_task_list.h for details.
void grid_dgemm_free_task_list(grid_dgemm_task_list *task_list)
Deallocates given task list, basis_sets have to be freed separately.
void grid_dgemm_integrate_task_list(void *ptr, const bool compute_tau, const int natoms, const int nlevels, const offload_buffer *const pab_blocks, offload_buffer *grids[nlevels], offload_buffer *hab_blocks, double forces[natoms][3], double virial[3][3])
Integrate all tasks of in given list from given grids using matrix - matrix multiplication.
static grid_library_config config
grid_library_config grid_library_get_config(void)
Returns the library config.
void grid_ref_collocate_task_list(const grid_ref_task_list *task_list, const enum grid_func func, const int nlevels, const offload_buffer *pab_blocks, offload_buffer *grids[nlevels])
Collocate all tasks of in given list onto given grids. See grid_task_list.h for details.
void grid_ref_free_task_list(grid_ref_task_list *task_list)
Deallocates given task list, basis_sets have to be freed separately.
void grid_ref_integrate_task_list(const grid_ref_task_list *task_list, const bool compute_tau, const int natoms, const int nlevels, const offload_buffer *pab_blocks, const offload_buffer *grids[nlevels], offload_buffer *hab_blocks, double forces[natoms][3], double virial[3][3])
Integrate all tasks of in given list from given grids. See grid_task_list.h for details.
void grid_ref_create_task_list(const bool orthorhombic, const int ntasks, const int nlevels, const int natoms, const int nkinds, const int nblocks, const int block_offsets[nblocks], const double atom_positions[natoms][3], const int atom_kinds[natoms], const grid_basis_set *basis_sets[nkinds], const int level_list[ntasks], const int iatom_list[ntasks], const int jatom_list[ntasks], const int iset_list[ntasks], const int jset_list[ntasks], const int ipgf_list[ntasks], const int jpgf_list[ntasks], const int border_mask_list[ntasks], const int block_num_list[ntasks], const double radius_list[ntasks], const double rab_list[ntasks][3], const int npts_global[nlevels][3], const int npts_local[nlevels][3], const int shift_local[nlevels][3], const int border_width[nlevels][3], const double dh[nlevels][3][3], const double dh_inv[nlevels][3][3], grid_ref_task_list **task_list_out)
Allocates a task list for the reference backend. See grid_task_list.h for details.
Internal representation of a basis set.
Configuration of the grid library.
enum grid_backend backend
Internal representation of a task list, abstracting various backends.
grid_ref_task_list * ref
grid_dgemm_task_list * dgemm
grid_cpu_task_list * cpu
int(* npts_local)[3]
Internal representation of a buffer.
double * host_buffer