(git:374b731)
Loading...
Searching...
No Matches
grid_task_list.c
Go to the documentation of this file.
1/*----------------------------------------------------------------------------*/
2/* CP2K: A general program to perform molecular dynamics simulations */
3/* Copyright 2000-2024 CP2K developers group <https://cp2k.org> */
4/* */
5/* SPDX-License-Identifier: BSD-3-Clause */
6/*----------------------------------------------------------------------------*/
7
8#include <assert.h>
9#include <math.h>
10#include <stddef.h>
11#include <stdio.h>
12#include <stdlib.h>
13#include <string.h>
14
15#include "common/grid_common.h"
17#include "common/grid_library.h"
18#include "grid_task_list.h"
19
20/*******************************************************************************
21 * \brief Allocates a task list which can be passed to grid_collocate_task_list.
22 * See grid_task_list.h for details.
23 * \author Ole Schuett
24 ******************************************************************************/
25void grid_create_task_list(
26 const bool orthorhombic, const int ntasks, const int nlevels,
27 const int natoms, const int nkinds, const int nblocks,
28 const int block_offsets[nblocks], const double atom_positions[natoms][3],
29 const int atom_kinds[natoms], const grid_basis_set *basis_sets[nkinds],
30 const int level_list[ntasks], const int iatom_list[ntasks],
31 const int jatom_list[ntasks], const int iset_list[ntasks],
32 const int jset_list[ntasks], const int ipgf_list[ntasks],
33 const int jpgf_list[ntasks], const int border_mask_list[ntasks],
34 const int block_num_list[ntasks], const double radius_list[ntasks],
35 const double rab_list[ntasks][3], const int npts_global[nlevels][3],
36 const int npts_local[nlevels][3], const int shift_local[nlevels][3],
37 const int border_width[nlevels][3], const double dh[nlevels][3][3],
38 const double dh_inv[nlevels][3][3], grid_task_list **task_list_out) {
39
41
42 grid_task_list *task_list = NULL;
43
44 if (*task_list_out == NULL) {
45 task_list = malloc(sizeof(grid_task_list));
46 memset(task_list, 0, sizeof(grid_task_list));
47
48 // Resolve AUTO to a concrete backend.
50
51#if defined(__OFFLOAD_HIP) && !defined(__NO_OFFLOAD_GRID)
52 task_list->backend = GRID_BACKEND_HIP;
53#elif defined(__OFFLOAD) && !defined(__NO_OFFLOAD_GRID)
54 task_list->backend = GRID_BACKEND_GPU;
55#else
56 task_list->backend = GRID_BACKEND_CPU;
57#endif
58 } else {
59 task_list->backend = config.backend;
60 }
61 } else {
62 // Reuse existing task list.
63 task_list = *task_list_out;
64 free(task_list->npts_local);
65 }
66
67 // Store npts_local for bounds checking and validation.
68 task_list->nlevels = nlevels;
69 size_t size = nlevels * 3 * sizeof(int);
70 task_list->npts_local = malloc(size);
71 memcpy(task_list->npts_local, npts_local, size);
72
73 // Always create reference backend because it might be needed for validation.
75 orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
76 atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
77 jatom_list, iset_list, jset_list, ipgf_list, jpgf_list, border_mask_list,
78 block_num_list, radius_list, rab_list, npts_global, npts_local,
79 shift_local, border_width, dh, dh_inv, &task_list->ref);
80
81 // Create other backend, if selected.
82 switch (task_list->backend) {
84 break; // was already created above
87 orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
88 atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
89 jatom_list, iset_list, jset_list, ipgf_list, jpgf_list,
90 border_mask_list, block_num_list, radius_list, rab_list, npts_global,
91 npts_local, shift_local, border_width, dh, dh_inv, &task_list->cpu);
92 break;
95 orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
96 atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
97 jatom_list, iset_list, jset_list, ipgf_list, jpgf_list,
98 border_mask_list, block_num_list, radius_list, rab_list, npts_global,
99 npts_local, shift_local, border_width, dh, dh_inv, &task_list->dgemm);
100 break;
101
102 case GRID_BACKEND_GPU:
103#if (defined(__OFFLOAD) && !defined(__NO_OFFLOAD_GRID))
104 grid_gpu_create_task_list(
105 orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
106 atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
107 jatom_list, iset_list, jset_list, ipgf_list, jpgf_list,
108 border_mask_list, block_num_list, radius_list, rab_list, npts_global,
109 npts_local, shift_local, border_width, dh, dh_inv, &task_list->gpu);
110#else
111 fprintf(stderr,
112 "Error: The GPU grid backend is not available. "
113 "Please re-compile with -D__OFFLOAD_CUDA or -D__OFFLOAD_HIP");
114 abort();
115#endif
116 break;
117
118 case GRID_BACKEND_HIP:
119#if defined(__OFFLOAD_HIP) && !defined(__NO_OFFLOAD_GRID)
120 grid_hip_create_task_list(
121 orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
122 &atom_positions[0][0], atom_kinds, basis_sets, level_list, iatom_list,
123 jatom_list, iset_list, jset_list, ipgf_list, jpgf_list,
124 border_mask_list, block_num_list, radius_list, &rab_list[0][0],
125 &npts_global[0][0], &npts_local[0][0], &shift_local[0][0],
126 &border_width[0][0], &dh[0][0][0], &dh_inv[0][0][0], &task_list->hip);
127#else
128 fprintf(stderr, "Error: The HIP grid backend is not available. "
129 "Please re-compile with -D__OFFLOAD_HIP");
130 abort();
131#endif
132 break;
133
134 default:
135 printf("Error: Unknown grid backend: %i.\n", config.backend);
136 abort();
137 break;
138 }
139
140 *task_list_out = task_list;
141}
142
143/*******************************************************************************
144 * \brief Deallocates given task list, basis_sets have to be freed separately.
145 * \author Ole Schuett
146 ******************************************************************************/
147void grid_free_task_list(grid_task_list *task_list) {
148
149 if (task_list->ref != NULL) {
150 grid_ref_free_task_list(task_list->ref);
151 task_list->ref = NULL;
152 }
153 if (task_list->cpu != NULL) {
154 grid_cpu_free_task_list(task_list->cpu);
155 task_list->cpu = NULL;
156 }
157 if (task_list->dgemm != NULL) {
159 task_list->dgemm = NULL;
160 }
161#if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_GRID)
162 if (task_list->gpu != NULL) {
163 grid_gpu_free_task_list(task_list->gpu);
164 task_list->gpu = NULL;
165 }
166#endif
167#if defined(__OFFLOAD_HIP) && !defined(__NO_OFFLOAD_GRID)
168 if (task_list->hip != NULL) {
169 grid_hip_free_task_list(task_list->hip);
170 task_list->hip = NULL;
171 }
172#endif
173
174 free(task_list->npts_local);
175 free(task_list);
176}
177
178/*******************************************************************************
179 * \brief Collocate all tasks of in given list onto given grids.
180 * See grid_task_list.h for details.
181 * \author Ole Schuett
182 ******************************************************************************/
183void grid_collocate_task_list(const grid_task_list *task_list,
184 const enum grid_func func, const int nlevels,
185 const int npts_local[nlevels][3],
186 const offload_buffer *pab_blocks,
187 offload_buffer *grids[nlevels]) {
188
189 // Bounds check.
190 assert(task_list->nlevels == nlevels);
191 for (int ilevel = 0; ilevel < nlevels; ilevel++) {
192 assert(task_list->npts_local[ilevel][0] == npts_local[ilevel][0]);
193 assert(task_list->npts_local[ilevel][1] == npts_local[ilevel][1]);
194 assert(task_list->npts_local[ilevel][2] == npts_local[ilevel][2]);
195 }
196
197 switch (task_list->backend) {
198 case GRID_BACKEND_REF:
199 grid_ref_collocate_task_list(task_list->ref, func, nlevels, pab_blocks,
200 grids);
201 break;
202 case GRID_BACKEND_CPU:
203 grid_cpu_collocate_task_list(task_list->cpu, func, nlevels, pab_blocks,
204 grids);
205 break;
207 grid_dgemm_collocate_task_list(task_list->dgemm, func, nlevels, pab_blocks,
208 grids);
209 break;
210#if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_GRID)
211 case GRID_BACKEND_GPU:
212 grid_gpu_collocate_task_list(task_list->gpu, func, nlevels, pab_blocks,
213 grids);
214 break;
215#endif
216#if defined(__OFFLOAD_HIP) && !defined(__NO_OFFLOAD_GRID)
217 case GRID_BACKEND_HIP:
218 grid_hip_collocate_task_list(task_list->hip, func, nlevels, pab_blocks,
219 grids);
220 break;
221#endif
222 default:
223 printf("Error: Unknown grid backend: %i.\n", task_list->backend);
224 abort();
225 break;
226 }
227
228 // Perform validation if enabled.
229 if (grid_library_get_config().validate) {
230 // Allocate space for reference results.
231 offload_buffer *grids_ref[nlevels];
232 for (int level = 0; level < nlevels; level++) {
233 const int npts_local_total =
234 npts_local[level][0] * npts_local[level][1] * npts_local[level][2];
235 grids_ref[level] = NULL;
236 offload_create_buffer(npts_local_total, &grids_ref[level]);
237 }
238
239 // Call reference implementation.
240 grid_ref_collocate_task_list(task_list->ref, func, nlevels, pab_blocks,
241 grids_ref);
242
243 // Compare results.
244 const double tolerance = 1e-12;
245 double max_rel_diff = 0.0;
246 for (int level = 0; level < nlevels; level++) {
247 for (int i = 0; i < npts_local[level][0]; i++) {
248 for (int j = 0; j < npts_local[level][1]; j++) {
249 for (int k = 0; k < npts_local[level][2]; k++) {
250 const int idx = k * npts_local[level][1] * npts_local[level][0] +
251 j * npts_local[level][0] + i;
252 const double ref_value = grids_ref[level]->host_buffer[idx];
253 const double test_value = grids[level]->host_buffer[idx];
254 const double diff = fabs(test_value - ref_value);
255 const double rel_diff = diff / fmax(1.0, fabs(ref_value));
256 max_rel_diff = fmax(max_rel_diff, rel_diff);
257 if (rel_diff > tolerance) {
258 fprintf(stderr, "Error: Validation failure in grid collocate\n");
259 fprintf(stderr, " diff: %le\n", diff);
260 fprintf(stderr, " rel_diff: %le\n", rel_diff);
261 fprintf(stderr, " value: %le\n", ref_value);
262 fprintf(stderr, " level: %i\n", level);
263 fprintf(stderr, " ijk: %i %i %i\n", i, j, k);
264 abort();
265 }
266 }
267 }
268 }
269 offload_free_buffer(grids_ref[level]);
270 printf("Validated grid collocate, max rel. diff: %le\n", max_rel_diff);
271 }
272 }
273}
274
275/*******************************************************************************
276 * \brief Integrate all tasks of in given list from given grids.
277 * See grid_task_list.h for details.
278 * \author Ole Schuett
279 ******************************************************************************/
280void grid_integrate_task_list(
281 const grid_task_list *task_list, const bool compute_tau, const int natoms,
282 const int nlevels, const int npts_local[nlevels][3],
283 const offload_buffer *pab_blocks, const offload_buffer *grids[nlevels],
284 offload_buffer *hab_blocks, double forces[natoms][3], double virial[3][3]) {
285
286 // Bounds check.
287 assert(task_list->nlevels == nlevels);
288 for (int ilevel = 0; ilevel < nlevels; ilevel++) {
289 assert(task_list->npts_local[ilevel][0] == npts_local[ilevel][0]);
290 assert(task_list->npts_local[ilevel][1] == npts_local[ilevel][1]);
291 assert(task_list->npts_local[ilevel][2] == npts_local[ilevel][2]);
292 }
293
294 assert(forces == NULL || pab_blocks != NULL);
295 assert(virial == NULL || pab_blocks != NULL);
296
297 switch (task_list->backend) {
298#if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_GRID)
299 case GRID_BACKEND_GPU:
300 grid_gpu_integrate_task_list(task_list->gpu, compute_tau, natoms, nlevels,
301 pab_blocks, grids, hab_blocks, forces, virial);
302 break;
303#endif
304#if defined(__OFFLOAD_HIP) && !defined(__NO_OFFLOAD_GRID)
305 case GRID_BACKEND_HIP:
306 grid_hip_integrate_task_list(task_list->hip, compute_tau, nlevels,
307 pab_blocks, grids, hab_blocks, &forces[0][0],
308 &virial[0][0]);
309 break;
310#endif
312 grid_dgemm_integrate_task_list(task_list->dgemm, compute_tau, natoms,
313 nlevels, pab_blocks, grids, hab_blocks,
314 forces, virial);
315 break;
316 case GRID_BACKEND_CPU:
317 grid_cpu_integrate_task_list(task_list->cpu, compute_tau, natoms, nlevels,
318 pab_blocks, grids, hab_blocks, forces, virial);
319 break;
320 case GRID_BACKEND_REF:
321 grid_ref_integrate_task_list(task_list->ref, compute_tau, natoms, nlevels,
322 pab_blocks, grids, hab_blocks, forces, virial);
323 break;
324 default:
325 printf("Error: Unknown grid backend: %i.\n", task_list->backend);
326 abort();
327 break;
328 }
329
330 // Perform validation if enabled.
331 if (grid_library_get_config().validate) {
332 // Allocate space for reference results.
333 const int hab_length = hab_blocks->size / sizeof(double);
334 offload_buffer *hab_blocks_ref = NULL;
335 offload_create_buffer(hab_length, &hab_blocks_ref);
336 double forces_ref[natoms][3], virial_ref[3][3];
337
338 // Call reference implementation.
339 grid_ref_integrate_task_list(task_list->ref, compute_tau, natoms, nlevels,
340 pab_blocks, grids, hab_blocks_ref,
341 (forces != NULL) ? forces_ref : NULL,
342 (virial != NULL) ? virial_ref : NULL);
343
344 // Compare hab.
345 const double hab_tolerance = 1e-12;
346 double hab_max_rel_diff = 0.0;
347 for (int i = 0; i < hab_length; i++) {
348 const double ref_value = hab_blocks_ref->host_buffer[i];
349 const double test_value = hab_blocks->host_buffer[i];
350 const double diff = fabs(test_value - ref_value);
351 const double rel_diff = diff / fmax(1.0, fabs(ref_value));
352 hab_max_rel_diff = fmax(hab_max_rel_diff, rel_diff);
353 if (rel_diff > hab_tolerance) {
354 fprintf(stderr, "Error: Validation failure in grid integrate\n");
355 fprintf(stderr, " hab diff: %le\n", diff);
356 fprintf(stderr, " hab rel_diff: %le\n", rel_diff);
357 fprintf(stderr, " hab value: %le\n", ref_value);
358 fprintf(stderr, " hab i: %i\n", i);
359 abort();
360 }
361 }
362
363 // Compare forces.
364 const double forces_tolerance = 1e-8; // account for higher numeric noise
365 double forces_max_rel_diff = 0.0;
366 if (forces != NULL) {
367 for (int iatom = 0; iatom < natoms; iatom++) {
368 for (int idir = 0; idir < 3; idir++) {
369 const double ref_value = forces_ref[iatom][idir];
370 const double test_value = forces[iatom][idir];
371 const double diff = fabs(test_value - ref_value);
372 const double rel_diff = diff / fmax(1.0, fabs(ref_value));
373 forces_max_rel_diff = fmax(forces_max_rel_diff, rel_diff);
374 if (rel_diff > forces_tolerance) {
375 fprintf(stderr, "Error: Validation failure in grid integrate\n");
376 fprintf(stderr, " forces diff: %le\n", diff);
377 fprintf(stderr, " forces rel_diff: %le\n", rel_diff);
378 fprintf(stderr, " forces value: %le\n", ref_value);
379 fprintf(stderr, " forces atom: %i\n", iatom);
380 fprintf(stderr, " forces dir: %i\n", idir);
381 abort();
382 }
383 }
384 }
385 }
386
387 // Compare virial.
388 const double virial_tolerance = 1e-8; // account for higher numeric noise
389 double virial_max_rel_diff = 0.0;
390 if (virial != NULL) {
391 for (int i = 0; i < 3; i++) {
392 for (int j = 0; j < 3; j++) {
393 const double ref_value = virial_ref[i][j];
394 const double test_value = virial[i][j];
395 const double diff = fabs(test_value - ref_value);
396 const double rel_diff = diff / fmax(1.0, fabs(ref_value));
397 virial_max_rel_diff = fmax(virial_max_rel_diff, rel_diff);
398 if (rel_diff > virial_tolerance) {
399 fprintf(stderr, "Error: Validation failure in grid integrate\n");
400 fprintf(stderr, " virial diff: %le\n", diff);
401 fprintf(stderr, " virial rel_diff: %le\n", rel_diff);
402 fprintf(stderr, " virial value: %le\n", ref_value);
403 fprintf(stderr, " virial ij: %i %i\n", i, j);
404 abort();
405 }
406 }
407 }
408 }
409
410 printf("Validated grid_integrate, max rel. diff: %le %le %le\n",
411 hab_max_rel_diff, forces_max_rel_diff, virial_max_rel_diff);
412 offload_free_buffer(hab_blocks_ref);
413 }
414}
415
416// EOF
static GRID_HOST_DEVICE int idx(const orbital a)
Return coset index of given orbital angular momentum.
@ GRID_BACKEND_REF
@ GRID_BACKEND_DGEMM
@ GRID_BACKEND_AUTO
@ GRID_BACKEND_HIP
@ GRID_BACKEND_CPU
@ GRID_BACKEND_GPU
grid_func
static void const int const int i
static void const int const int const int const int const int const double const int const int const int npts_local[3]
void grid_cpu_free_task_list(grid_cpu_task_list *task_list)
Deallocates given task list, basis_sets have to be freed separately.
void grid_cpu_create_task_list(const bool orthorhombic, const int ntasks, const int nlevels, const int natoms, const int nkinds, const int nblocks, const int block_offsets[nblocks], const double atom_positions[natoms][3], const int atom_kinds[natoms], const grid_basis_set *basis_sets[nkinds], const int level_list[ntasks], const int iatom_list[ntasks], const int jatom_list[ntasks], const int iset_list[ntasks], const int jset_list[ntasks], const int ipgf_list[ntasks], const int jpgf_list[ntasks], const int border_mask_list[ntasks], const int block_num_list[ntasks], const double radius_list[ntasks], const double rab_list[ntasks][3], const int npts_global[nlevels][3], const int npts_local[nlevels][3], const int shift_local[nlevels][3], const int border_width[nlevels][3], const double dh[nlevels][3][3], const double dh_inv[nlevels][3][3], grid_cpu_task_list **task_list_out)
Allocates a task list for the cpu backend. See grid_task_list.h for details.
void grid_cpu_collocate_task_list(const grid_cpu_task_list *task_list, const enum grid_func func, const int nlevels, const offload_buffer *pab_blocks, offload_buffer *grids[nlevels])
Collocate all tasks of in given list onto given grids. See grid_task_list.h for details.
void grid_cpu_integrate_task_list(const grid_cpu_task_list *task_list, const bool compute_tau, const int natoms, const int nlevels, const offload_buffer *pab_blocks, const offload_buffer *grids[nlevels], offload_buffer *hab_blocks, double forces[natoms][3], double virial[3][3])
Integrate all tasks of in given list from given grids. See grid_task_list.h for details.
void grid_dgemm_collocate_task_list(grid_dgemm_task_list *const ptr, const enum grid_func func, const int nlevels, const offload_buffer *pab_blocks, offload_buffer *grids[nlevels])
Collocate all tasks of a given list onto given grids. See grid_task_list.h for details.
void grid_dgemm_create_task_list(const bool orthorhombic, const int ntasks, const int nlevels, const int natoms, const int nkinds, const int nblocks, const int block_offsets[nblocks], const double atom_positions[natoms][3], const int atom_kinds[natoms], const grid_basis_set *basis_sets[nkinds], const int level_list[ntasks], const int iatom_list[ntasks], const int jatom_list[ntasks], const int iset_list[ntasks], const int jset_list[ntasks], const int ipgf_list[ntasks], const int jpgf_list[ntasks], const int border_mask_list[ntasks], const int block_num_list[ntasks], const double radius_list[ntasks], const double rab_list[ntasks][3], const int npts_global[nlevels][3], const int npts_local[nlevels][3], const int shift_local[nlevels][3], const int border_width[nlevels][3], const double dh[nlevels][3][3], const double dh_inv[nlevels][3][3], grid_dgemm_task_list **task_list)
Allocates a task list for the dgemm backend. See grid_task_list.h for details.
void grid_dgemm_free_task_list(grid_dgemm_task_list *task_list)
Deallocates given task list, basis_sets have to be freed separately.
void grid_dgemm_integrate_task_list(void *ptr, const bool compute_tau, const int natoms, const int nlevels, const offload_buffer *const pab_blocks, offload_buffer *grids[nlevels], offload_buffer *hab_blocks, double forces[natoms][3], double virial[3][3])
Integrate all tasks of in given list from given grids using matrix - matrix multiplication.
static grid_library_config config
grid_library_config grid_library_get_config(void)
Returns the library config.
void grid_ref_collocate_task_list(const grid_ref_task_list *task_list, const enum grid_func func, const int nlevels, const offload_buffer *pab_blocks, offload_buffer *grids[nlevels])
Collocate all tasks of in given list onto given grids. See grid_task_list.h for details.
void grid_ref_free_task_list(grid_ref_task_list *task_list)
Deallocates given task list, basis_sets have to be freed separately.
void grid_ref_integrate_task_list(const grid_ref_task_list *task_list, const bool compute_tau, const int natoms, const int nlevels, const offload_buffer *pab_blocks, const offload_buffer *grids[nlevels], offload_buffer *hab_blocks, double forces[natoms][3], double virial[3][3])
Integrate all tasks of in given list from given grids. See grid_task_list.h for details.
void grid_ref_create_task_list(const bool orthorhombic, const int ntasks, const int nlevels, const int natoms, const int nkinds, const int nblocks, const int block_offsets[nblocks], const double atom_positions[natoms][3], const int atom_kinds[natoms], const grid_basis_set *basis_sets[nkinds], const int level_list[ntasks], const int iatom_list[ntasks], const int jatom_list[ntasks], const int iset_list[ntasks], const int jset_list[ntasks], const int ipgf_list[ntasks], const int jpgf_list[ntasks], const int border_mask_list[ntasks], const int block_num_list[ntasks], const double radius_list[ntasks], const double rab_list[ntasks][3], const int npts_global[nlevels][3], const int npts_local[nlevels][3], const int shift_local[nlevels][3], const int border_width[nlevels][3], const double dh[nlevels][3][3], const double dh_inv[nlevels][3][3], grid_ref_task_list **task_list_out)
Allocates a task list for the reference backend. See grid_task_list.h for details.
Internal representation of a basis set.
Configuration of the grid library.
enum grid_backend backend
Internal representation of a task list, abstracting various backends.
grid_ref_task_list * ref
grid_dgemm_task_list * dgemm
grid_cpu_task_list * cpu
int(* npts_local)[3]
Internal representation of a buffer.
double * host_buffer