(git:48ec5b0)
Loading...
Searching...
No Matches
grid_replay.c
Go to the documentation of this file.
1/*----------------------------------------------------------------------------*/
2/* CP2K: A general program to perform molecular dynamics simulations */
3/* Copyright 2000-2025 CP2K developers group <https://cp2k.org> */
4/* */
5/* SPDX-License-Identifier: BSD-3-Clause */
6/*----------------------------------------------------------------------------*/
7
8#include <assert.h>
9#include <fenv.h>
10#include <limits.h>
11#include <math.h>
12#include <omp.h>
13#include <stdarg.h>
14#include <stdbool.h>
15#include <stdio.h>
16#include <stdlib.h>
17#include <string.h>
18
19#include "../offload/offload_buffer.h"
20#include "common/grid_common.h"
21#include "grid_replay.h"
22
25#include "grid_task_list.h"
26
27/*******************************************************************************
28 * \brief Reads next line from given filehandle and handles errors.
29 * \author Ole Schuett
30 ******************************************************************************/
31static void read_next_line(char line[], int length, FILE *fp) {
32 if (fgets(line, length, fp) == NULL) {
33 fprintf(stderr, "Error: Could not read line.\n");
34 abort();
35 }
36}
37
38/*******************************************************************************
39 * \brief Shorthand for parsing a single integer value.
40 * \author Ole Schuett
41 ******************************************************************************/
42static int parse_int(const char key[], FILE *fp) {
43 int value;
44 char line[100], format[100];
45 read_next_line(line, sizeof(line), fp);
46 snprintf(format, sizeof(format), "%s %%i", key);
47 assert(sscanf(line, format, &value) == 1);
48 return value;
49}
50
51/*******************************************************************************
52 * \brief Shorthand for parsing a vector of three integer values.
53 * \author Ole Schuett
54 ******************************************************************************/
55static void parse_int3(const char key[], FILE *fp, int vec[3]) {
56 char line[100], format[100];
57 read_next_line(line, sizeof(line), fp);
58 snprintf(format, sizeof(format), "%s %%i %%i %%i", key);
59 assert(sscanf(line, format, &vec[0], &vec[1], &vec[2]) == 3);
60}
61
62/*******************************************************************************
63 * \brief Shorthand for parsing a single double value.
64 * \author Ole Schuett
65 ******************************************************************************/
66static double parse_double(const char key[], FILE *fp) {
67 double value;
68 char line[100], format[100];
69 read_next_line(line, sizeof(line), fp);
70 snprintf(format, sizeof(format), "%s %%le", key);
71 assert(sscanf(line, format, &value) == 1);
72 return value;
73}
74
75/*******************************************************************************
76 * \brief Shorthand for parsing a vector of three double values.
77 * \author Ole Schuett
78 ******************************************************************************/
79static void parse_double3(const char key[], FILE *fp, double vec[3]) {
80 char line[100], format[100];
81 read_next_line(line, sizeof(line), fp);
82 snprintf(format, sizeof(format), "%s %%le %%le %%le", key);
83 assert(sscanf(line, format, &vec[0], &vec[1], &vec[2]) == 3);
84}
85
86/*******************************************************************************
87 * \brief Shorthand for parsing a 3x3 matrix of doubles.
88 * \author Ole Schuett
89 ******************************************************************************/
90static void parse_double3x3(const char key[], FILE *fp, double mat[3][3]) {
91 char line[100], format[100];
92 for (int i = 0; i < 3; i++) {
93 read_next_line(line, sizeof(line), fp);
94 snprintf(format, sizeof(format), "%s %i %%le %%le %%le", key, i);
95 assert(sscanf(line, format, &mat[i][0], &mat[i][1], &mat[i][2]) == 3);
96 }
97}
98
99/*******************************************************************************
100 * \brief Creates mock basis set using the identity as decontraction matrix.
101 * \author Ole Schuett
102 ******************************************************************************/
103static void create_dummy_basis_set(const int size, const int lmin,
104 const int lmax, const double zet,
105 grid_basis_set **basis_set) {
106
107 double sphi_mutable[size][size];
108 for (int i = 0; i < size; i++) {
109 for (int j = 0; j < size; j++) {
110 sphi_mutable[i][j] = (i == j) ? 1.0 : 0.0; // identity matrix
111 }
112 }
113 const double(*sphi)[size] = (const double(*)[size])sphi_mutable;
114
115 const int npgf = size / ncoset(lmax);
116 assert(size == npgf * ncoset(lmax));
117
118 const int first_sgf[1] = {1};
119
120 double zet_array_mutable[1][npgf];
121 for (int i = 0; i < npgf; i++) {
122 zet_array_mutable[0][i] = zet;
123 }
124 const double(*zet_array)[npgf] = (const double(*)[npgf])zet_array_mutable;
125
126 grid_create_basis_set(/*nset=*/1,
127 /*nsgf=*/size,
128 /*maxco=*/size,
129 /*maxpgf=*/npgf,
130 /*lmin=*/&lmin,
131 /*lmax=*/&lmax,
132 /*npgf=*/&npgf,
133 /*nsgf_set=*/&size,
134 /*first_sgf=*/first_sgf,
135 /*sphi=*/sphi,
136 /*zet=*/zet_array, basis_set);
137}
138
139/*******************************************************************************
140 * \brief Creates mock task list with one task per cycle.
141 * \author Ole Schuett
142 ******************************************************************************/
144 const bool orthorhombic, const int border_mask, const double ra[3],
145 const double rab[3], const double radius, const grid_basis_set *basis_set_a,
146 const grid_basis_set *basis_set_b, const int o1, const int o2,
147 const int la_max, const int lb_max, const int cycles,
148 const int cycles_per_block, const int npts_global[][3],
149 const int npts_local[][3], const int shift_local[][3],
150 const int border_width[][3], const double dh[][3][3],
151 const double dh_inv[][3][3], grid_task_list **task_list) {
152
153 const int ntasks = cycles;
154 const int nlevels = 1;
155 const int natoms = 2;
156 const int nkinds = 2;
157 int nblocks = cycles / cycles_per_block + 1;
158
159 /* we can not have more blocks than the number of tasks */
160 if (cycles == 1) {
161 nblocks = 1;
162 }
163
164 int block_offsets[nblocks];
165 memset(block_offsets, 0, nblocks * sizeof(int)); // all point to same data
166 const double atom_positions[2][3] = {
167 {ra[0], ra[1], ra[2]}, {rab[0] + ra[0], rab[1] + ra[1], rab[2] + ra[2]}};
168 const int atom_kinds[2] = {1, 2};
169 const grid_basis_set *basis_sets[2] = {basis_set_a, basis_set_b};
170 const int ipgf = o1 / ncoset(la_max) + 1;
171 const int jpgf = o2 / ncoset(lb_max) + 1;
172 assert(o1 == (ipgf - 1) * ncoset(la_max));
173 assert(o2 == (jpgf - 1) * ncoset(lb_max));
174
175 int level_list[ntasks], iatom_list[ntasks], jatom_list[ntasks];
176 int iset_list[ntasks], jset_list[ntasks], ipgf_list[ntasks],
177 jpgf_list[ntasks];
178 int border_mask_list[ntasks], block_num_list[ntasks];
179 double radius_list[ntasks], rab_list_mutable[ntasks][3];
180 for (int i = 0; i < cycles; i++) {
181 level_list[i] = 1;
182 iatom_list[i] = 1;
183 jatom_list[i] = 2;
184 iset_list[i] = 1;
185 jset_list[i] = 1;
186 ipgf_list[i] = ipgf;
187 jpgf_list[i] = jpgf;
188 border_mask_list[i] = border_mask;
189 block_num_list[i] = i / cycles_per_block + 1;
190 radius_list[i] = radius;
191 rab_list_mutable[i][0] = rab[0];
192 rab_list_mutable[i][1] = rab[1];
193 rab_list_mutable[i][2] = rab[2];
194 }
195 const double(*rab_list)[3] = (const double(*)[3])rab_list_mutable;
196
197 grid_create_task_list(
198 orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
199 atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
200 jatom_list, iset_list, jset_list, ipgf_list, jpgf_list, border_mask_list,
201 block_num_list, radius_list, rab_list, npts_global, npts_local,
202 shift_local, border_width, dh, dh_inv, task_list);
203}
204
205/*******************************************************************************
206 * \brief Reads a .task file, collocates/integrates it, and compares results.
207 * See grid_replay.h for details.
208 * \author Ole Schuett
209 ******************************************************************************/
210bool grid_replay(const char *filename, const int cycles, const bool collocate,
211 const bool batch, const int cycles_per_block,
212 const double tolerance) {
213
214 if (cycles < 1) {
215 fprintf(stderr, "Error: Cycles have to be greater than zero.\n");
216 exit(1);
217 }
218
219 if (cycles_per_block < 1 || cycles_per_block > cycles) {
220 fprintf(stderr,
221 "Error: Cycles per block has to be between 1 and cycles.\n");
222 exit(1);
223 }
224
225 FILE *fp = fopen(filename, "r");
226 if (fp == NULL) {
227 fprintf(stderr, "Could not open task file: %s\n", filename);
228 exit(1);
229 }
230
231 char header_line[100];
232 read_next_line(header_line, sizeof(header_line), fp);
233 if (strcmp(header_line, "#Grid task v10\n") != 0) {
234 fprintf(stderr, "Error: Wrong file header.\n");
235 abort();
236 }
237
238 const bool orthorhombic = parse_int("orthorhombic", fp);
239 const int border_mask = parse_int("border_mask", fp);
240 const enum grid_func func = (enum grid_func)parse_int("func", fp);
241 const bool compute_tau = (func == GRID_FUNC_DADB);
242 const int la_max = parse_int("la_max", fp);
243 const int la_min = parse_int("la_min", fp);
244 const int lb_max = parse_int("lb_max", fp);
245 const int lb_min = parse_int("lb_min", fp);
246 const double zeta = parse_double("zeta", fp);
247 const double zetb = parse_double("zetb", fp);
248 const double rscale = parse_double("rscale", fp);
249
250 double dh_mutable[3][3], dh_inv_mutable[3][3], ra[3], rab[3];
251 parse_double3x3("dh", fp, dh_mutable);
252 parse_double3x3("dh_inv", fp, dh_inv_mutable);
253 parse_double3("ra", fp, ra);
254 parse_double3("rab", fp, rab);
255 const double(*dh)[3] = (const double(*)[3])dh_mutable;
256 const double(*dh_inv)[3] = (const double(*)[3])dh_inv_mutable;
257
258 int npts_global[3], npts_local[3], shift_local[3], border_width[3];
259 parse_int3("npts_global", fp, npts_global);
260 parse_int3("npts_local", fp, npts_local);
261 parse_int3("shift_local", fp, shift_local);
262 parse_int3("border_width", fp, border_width);
263
264 const double radius = parse_double("radius", fp);
265 const int o1 = parse_int("o1", fp);
266 const int o2 = parse_int("o2", fp);
267 const int n1 = parse_int("n1", fp);
268 const int n2 = parse_int("n2", fp);
269
270 double pab_mutable[n2][n1];
271 char line[100], format[100];
272 for (int i = 0; i < n2; i++) {
273 for (int j = 0; j < n1; j++) {
274 read_next_line(line, sizeof(line), fp);
275 snprintf(format, sizeof(format), "pab %i %i %%le", i, j);
276 assert(sscanf(line, format, &pab_mutable[i][j]) == 1);
277 }
278 }
279 const double(*pab)[n1] = (const double(*)[n1])pab_mutable;
280
281 const int npts_local_total = npts_local[0] * npts_local[1] * npts_local[2];
282 offload_buffer *grid_ref = NULL;
283 offload_create_buffer(npts_local_total, &grid_ref);
284 memset(grid_ref->host_buffer, 0, npts_local_total * sizeof(double));
285
286 const int ngrid_nonzero = parse_int("ngrid_nonzero", fp);
287 for (int n = 0; n < ngrid_nonzero; n++) {
288 int i, j, k;
289 double value;
290 read_next_line(line, sizeof(line), fp);
291 assert(sscanf(line, "grid %i %i %i %le", &i, &j, &k, &value) == 4);
292 grid_ref->host_buffer[k * npts_local[1] * npts_local[0] +
293 j * npts_local[0] + i] = value;
294 }
295
296 double hab_ref[n2][n1];
297 memset(hab_ref, 0, n2 * n1 * sizeof(double));
298 for (int i = o2; i < ncoset(lb_max) + o2; i++) {
299 for (int j = o1; j < ncoset(la_max) + o1; j++) {
300 read_next_line(line, sizeof(line), fp);
301 snprintf(format, sizeof(format), "hab %i %i %%le", i, j);
302 assert(sscanf(line, format, &hab_ref[i][j]) == 1);
303 }
304 }
305
306 double forces_ref[2][3];
307 parse_double3("force_a", fp, forces_ref[0]);
308 parse_double3("force_b", fp, forces_ref[1]);
309
310 double virial_ref[3][3];
311 parse_double3x3("virial", fp, virial_ref);
312
313 char footer_line[100];
314 read_next_line(footer_line, sizeof(footer_line), fp);
315 if (strcmp(footer_line, "#THE_END\n") != 0) {
316 fprintf(stderr, "Error: Wrong footer line.\n");
317 abort();
318 }
319
320 if (fclose(fp) != 0) {
321 fprintf(stderr, "Could not close task file: %s\n", filename);
322 abort();
323 }
324
325 offload_buffer *grid_test = NULL;
326 offload_create_buffer(npts_local_total, &grid_test);
327 double hab_test[n2][n1];
328 double forces_test[2][3];
329 double virial_test[3][3];
330 double start_time, end_time;
331
332 if (batch) {
333 grid_basis_set *basisa = NULL, *basisb = NULL;
334 create_dummy_basis_set(n1, la_min, la_max, zeta, &basisa);
335 create_dummy_basis_set(n2, lb_min, lb_max, zetb, &basisb);
336 grid_task_list *task_list = NULL;
338 orthorhombic, border_mask, ra, rab, radius, basisa, basisb, o1, o2,
339 la_max, lb_max, cycles, cycles_per_block, (const int(*)[3])npts_global,
340 (const int(*)[3])npts_local, (const int(*)[3])shift_local,
341 (const int(*)[3])border_width, (const double(*)[3][3])dh,
342 (const double(*)[3][3])dh_inv, &task_list);
343 offload_buffer *pab_blocks = NULL, *hab_blocks = NULL;
344 offload_create_buffer(n1 * n2, &pab_blocks);
345 offload_create_buffer(n1 * n2, &hab_blocks);
346 const double f = (collocate) ? rscale : 1.0;
347 for (int i = 0; i < n1; i++) {
348 for (int j = 0; j < n2; j++) {
349 pab_blocks->host_buffer[j * n1 + i] = 0.5 * f * pab[j][i];
350 }
351 }
352 start_time = omp_get_wtime();
353 const int nlevels = 1;
354 const int natoms = 2;
355 if (collocate) {
356 // collocate
357 offload_buffer *grids[1] = {grid_test};
358 grid_collocate_task_list(task_list, func, nlevels,
359 (const int(*)[3])npts_local, pab_blocks, grids);
360 } else {
361 // integrate
362 const offload_buffer *grids[1] = {grid_ref};
363 grid_integrate_task_list(task_list, compute_tau, natoms, nlevels,
364 (const int(*)[3])npts_local, pab_blocks, grids,
365 hab_blocks, forces_test, virial_test);
366 for (int i = 0; i < n2; i++) {
367 for (int j = 0; j < n1; j++) {
368 hab_test[i][j] = hab_blocks->host_buffer[i * n1 + j];
369 }
370 }
371 }
372 end_time = omp_get_wtime();
373 grid_free_basis_set(basisa);
374 grid_free_basis_set(basisb);
375 grid_free_task_list(task_list);
376 offload_free_buffer(pab_blocks);
377 offload_free_buffer(hab_blocks);
378 } else {
379 start_time = omp_get_wtime();
380 if (collocate) {
381 // collocate
382 memset(grid_test->host_buffer, 0, npts_local_total * sizeof(double));
383 for (int i = 0; i < cycles; i++) {
385 orthorhombic, border_mask, func, la_max, la_min, lb_max, lb_min,
386 zeta, zetb, rscale, dh, dh_inv, ra, rab, npts_global, npts_local,
387 shift_local, border_width, radius, o1, o2, n1, n2, pab,
388 grid_test->host_buffer);
389 }
390 } else {
391 // integrate
392 memset(hab_test, 0, n2 * n1 * sizeof(double));
393 memset(forces_test, 0, 2 * 3 * sizeof(double));
394 double virials_test[2][3][3] = {0};
395 for (int i = 0; i < cycles; i++) {
397 orthorhombic, compute_tau, border_mask, la_max, la_min, lb_max,
398 lb_min, zeta, zetb, dh, dh_inv, ra, rab, npts_global, npts_local,
399 shift_local, border_width, radius, o1, o2, n1, n2,
400 grid_ref->host_buffer, hab_test, pab, forces_test, virials_test,
401 NULL, NULL, NULL);
402 }
403 for (int i = 0; i < 3; i++) {
404 for (int j = 0; j < 3; j++) {
405 virial_test[i][j] = virials_test[0][i][j] + virials_test[1][i][j];
406 }
407 }
408 }
409 end_time = omp_get_wtime();
410 }
411
412 double max_value = 0.0;
413 double max_rel_diff = 0.0;
414 const double derivatives_precision = 1e-4; // account for higher numeric noise
415 if (collocate) {
416 // collocate
417 // compare grid
418 for (int i = 0; i < npts_local_total; i++) {
419 const double ref_value = cycles * grid_ref->host_buffer[i];
420 const double test_value = grid_test->host_buffer[i];
421 const double diff = fabs(test_value - ref_value);
422 const double rel_diff = diff / fmax(1.0, fabs(ref_value));
423 max_rel_diff = fmax(max_rel_diff, rel_diff);
424 max_value = fmax(max_value, fabs(test_value));
425 }
426 } else {
427 // integrate
428 // compare hab
429 for (int i = 0; i < n2; i++) {
430 for (int j = 0; j < n1; j++) {
431 const double ref_value = cycles * hab_ref[i][j];
432 const double test_value = hab_test[i][j];
433 const double diff = fabs(test_value - ref_value);
434 const double rel_diff = diff / fmax(1.0, fabs(ref_value));
435 max_rel_diff = fmax(max_rel_diff, rel_diff);
436 max_value = fmax(max_value, fabs(test_value));
437 if (rel_diff > tolerance) {
438 printf("hab[%i, %i] ref: %le test: %le diff:%le rel_diff: %le\n", i,
439 j, ref_value, test_value, diff, rel_diff);
440 }
441 }
442 }
443 // compare forces
444 for (int i = 0; i < 2; i++) {
445 for (int j = 0; j < 3; j++) {
446 const double ref_value = cycles * forces_ref[i][j];
447 const double test_value = forces_test[i][j];
448 const double diff = fabs(test_value - ref_value);
449 const double rel_diff = diff / fmax(1.0, fabs(ref_value));
450 max_rel_diff = fmax(max_rel_diff, rel_diff * derivatives_precision);
451 if (rel_diff * derivatives_precision > tolerance) {
452 printf("forces[%i, %i] ref: %le test: %le diff:%le rel_diff: %le\n",
453 i, j, ref_value, test_value, diff, rel_diff);
454 }
455 }
456 }
457 // compare virial
458 for (int i = 0; i < 3; i++) {
459 for (int j = 0; j < 3; j++) {
460 const double ref_value = cycles * virial_ref[i][j];
461 const double test_value = virial_test[i][j];
462 const double diff = fabs(test_value - ref_value);
463 const double rel_diff = diff / fmax(1.0, fabs(ref_value));
464 max_rel_diff = fmax(max_rel_diff, rel_diff * derivatives_precision);
465 if (rel_diff * derivatives_precision > tolerance) {
466 printf("virial[ %i, %i] ref: %le test: %le diff:%le rel_diff: %le\n",
467 i, j, ref_value, test_value, diff, rel_diff);
468 }
469 }
470 }
471 }
472 printf("Task: %-55s %9s %-7s Cycles: %e Max value: %le "
473 "Max rel diff: %le Time: %le sec\n",
474 filename, collocate ? "Collocate" : "Integrate",
475 batch ? "Batched" : "PGF-CPU", (float)cycles, max_value, max_rel_diff,
476 end_time - start_time);
477
478 offload_free_buffer(grid_ref);
479 offload_free_buffer(grid_test);
480
481 // Check floating point exceptions.
482 if (fetestexcept(FE_DIVBYZERO) != 0) {
483 fprintf(stderr, "Error: Floating point exception FE_DIVBYZERO.\n");
484 exit(1);
485 }
486 if (fetestexcept(FE_OVERFLOW) != 0) {
487 fprintf(stderr, "Error: Floating point exception FE_OVERFLOW.\n");
488 exit(1);
489 }
490
491 return max_rel_diff < tolerance;
492}
493
494// EOF
void grid_create_basis_set(const int nset, const int nsgf, const int maxco, const int maxpgf, const int lmin[nset], const int lmax[nset], const int npgf[nset], const int nsgf_set[nset], const int first_sgf[nset], const double sphi[nsgf][maxco], const double zet[nset][maxpgf], grid_basis_set **basis_set_out)
Allocates a basis set which can be passed to grid_create_task_list. See grid_task_list....
void grid_free_basis_set(grid_basis_set *basis_set)
Deallocates given basis set.
grid_func
@ GRID_FUNC_DADB
static void const int const int i
static void const int const int const int const int const int const double const int const int const int npts_local[3]
void grid_cpu_collocate_pgf_product(const bool orthorhombic, const int border_mask, const enum grid_func func, const int la_max, const int la_min, const int lb_max, const int lb_min, const double zeta, const double zetb, const double rscale, const double dh[3][3], const double dh_inv[3][3], const double ra[3], const double rab[3], const int npts_global[3], const int npts_local[3], const int shift_local[3], const int border_width[3], const double radius, const int o1, const int o2, const int n1, const int n2, const double pab[n2][n1], double *grid)
Public entry point. A thin wrapper with the only purpose of calling write_task_file when DUMP_TASKS =...
void grid_cpu_integrate_pgf_product(const bool orthorhombic, const bool compute_tau, const int border_mask, const int la_max, const int la_min, const int lb_max, const int lb_min, const double zeta, const double zetb, const double dh[3][3], const double dh_inv[3][3], const double ra[3], const double rab[3], const int npts_global[3], const int npts_local[3], const int shift_local[3], const int border_width[3], const double radius, const int o1, const int o2, const int n1, const int n2, const double *grid, double hab[n2][n1], const double pab[n2][n1], double forces[2][3], double virials[2][3][3], double hdab[n2][n1][3], double hadb[n2][n1][3], double a_hdab[n2][n1][3][3])
Integrates a single task. See grid_cpu_integrate.h for details.
static void parse_double3(const char key[], FILE *fp, double vec[3])
Shorthand for parsing a vector of three double values.
Definition grid_replay.c:79
static void parse_double3x3(const char key[], FILE *fp, double mat[3][3])
Shorthand for parsing a 3x3 matrix of doubles.
Definition grid_replay.c:90
bool grid_replay(const char *filename, const int cycles, const bool collocate, const bool batch, const int cycles_per_block, const double tolerance)
Reads a .task file, collocates/integrates it, and compares results. See grid_replay....
static void parse_int3(const char key[], FILE *fp, int vec[3])
Shorthand for parsing a vector of three integer values.
Definition grid_replay.c:55
static void create_dummy_task_list(const bool orthorhombic, const int border_mask, const double ra[3], const double rab[3], const double radius, const grid_basis_set *basis_set_a, const grid_basis_set *basis_set_b, const int o1, const int o2, const int la_max, const int lb_max, const int cycles, const int cycles_per_block, const int npts_global[][3], const int npts_local[][3], const int shift_local[][3], const int border_width[][3], const double dh[][3][3], const double dh_inv[][3][3], grid_task_list **task_list)
Creates mock task list with one task per cycle.
static void create_dummy_basis_set(const int size, const int lmin, const int lmax, const double zet, grid_basis_set **basis_set)
Creates mock basis set using the identity as decontraction matrix.
static void read_next_line(char line[], int length, FILE *fp)
Reads next line from given filehandle and handles errors.
Definition grid_replay.c:31
static int parse_int(const char key[], FILE *fp)
Shorthand for parsing a single integer value.
Definition grid_replay.c:42
static double parse_double(const char key[], FILE *fp)
Shorthand for parsing a single double value.
Definition grid_replay.c:66
Internal representation of a basis set.
Internal representation of a task list, abstracting various backends.
Internal representation of a buffer.
double * host_buffer