155 const bool orthorhombic,
const int border_mask,
const double ra[3],
156 const double rab[3],
const double radius,
const grid_basis_set *basis_set_a,
158 const int la_max,
const int lb_max,
const int cycles,
159 const int cycles_per_block,
const int npts_global[][3],
160 const int npts_local[][3],
const int shift_local[][3],
161 const int border_width[][3],
const double dh[][3][3],
164 const int ntasks = cycles;
165 const int nlevels = 1;
166 const int natoms = 2;
167 const int nkinds = 2;
168 int nblocks = cycles / cycles_per_block + 1;
175 int block_offsets[nblocks];
176 memset(block_offsets, 0, nblocks *
sizeof(
int));
177 const double atom_positions[2][3] = {
178 {ra[0], ra[1], ra[2]}, {rab[0] + ra[0], rab[1] + ra[1], rab[2] + ra[2]}};
179 const int atom_kinds[2] = {1, 2};
181 const int ipgf = o1 / ncoset(la_max) + 1;
182 const int jpgf = o2 / ncoset(lb_max) + 1;
183 assert(o1 == (ipgf - 1) * ncoset(la_max));
184 assert(o2 == (jpgf - 1) * ncoset(lb_max));
186 int level_list[ntasks], iatom_list[ntasks], jatom_list[ntasks];
187 int iset_list[ntasks], jset_list[ntasks], ipgf_list[ntasks],
189 int border_mask_list[ntasks], block_num_list[ntasks];
190 double radius_list[ntasks], rab_list_mutable[ntasks][3];
191 for (
int i = 0;
i < cycles;
i++) {
199 border_mask_list[
i] = border_mask;
200 block_num_list[
i] =
i / cycles_per_block + 1;
201 radius_list[
i] = radius;
202 rab_list_mutable[
i][0] = rab[0];
203 rab_list_mutable[
i][1] = rab[1];
204 rab_list_mutable[
i][2] = rab[2];
206 const double(*rab_list)[3] = (
const double(*)[3])rab_list_mutable;
208 grid_create_task_list(
209 orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
210 atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
211 jatom_list, iset_list, jset_list, ipgf_list, jpgf_list, border_mask_list,
212 block_num_list, radius_list, rab_list, npts_global,
npts_local,
213 shift_local, border_width, dh, dh_inv, task_list);
221bool grid_replay(
const char *filename,
const int cycles,
const bool collocate,
222 const bool batch,
const int cycles_per_block,
223 const double tolerance) {
226 fprintf(stderr,
"Error: Cycles have to be greater than zero.\n");
230 if (cycles_per_block < 1 || cycles_per_block > cycles) {
232 "Error: Cycles per block has to be between 1 and cycles.\n");
236 FILE *fp = fopen(filename,
"r");
238 fprintf(stderr,
"Could not open task file: %s\n", filename);
242 char header_line[100];
244 if (strcmp(header_line,
"#Grid task v10\n") != 0) {
245 fprintf(stderr,
"Error: Wrong file header.\n");
249 const bool orthorhombic =
parse_int(
"orthorhombic", fp);
250 const int border_mask =
parse_int(
"border_mask", fp);
253 const int la_max =
parse_int(
"la_max", fp);
254 const int la_min =
parse_int(
"la_min", fp);
255 const int lb_max =
parse_int(
"lb_max", fp);
256 const int lb_min =
parse_int(
"lb_min", fp);
261 double dh_mutable[3][3], dh_inv_mutable[3][3], ra[3], rab[3];
266 const double(*dh)[3] = (
const double(*)[3])dh_mutable;
267 const double(*dh_inv)[3] = (
const double(*)[3])dh_inv_mutable;
269 int npts_global[3],
npts_local[3], shift_local[3], border_width[3];
280 const size_t n12 = (size_t)n1 * (
size_t)n2;
282 double *pab_storage = malloc(n12 *
sizeof(
double));
283 if (pab_storage == NULL) {
284 fprintf(stderr,
"Error: Could not allocate pab buffer.\n");
287 double(*pab)[n1] = (double(*)[n1])pab_storage;
288 char line[100], format[100];
289 for (
int i = 0;
i < n2;
i++) {
290 for (
int j = 0; j < n1; j++) {
292 snprintf(format,
sizeof(format),
"pab %i %i %%lf",
i, j);
293 if (sscanf(line, format, &pab[
i][j]) != 1) {
294 assert(!
"parse_pab failed");
301 offload_create_buffer(npts_local_total, &grid_ref);
302 memset(grid_ref->
host_buffer, 0, npts_local_total *
sizeof(
double));
304 const int ngrid_nonzero =
parse_int(
"ngrid_nonzero", fp);
305 for (
int n = 0; n < ngrid_nonzero; n++) {
309 if (sscanf(line,
"grid %i %i %i %le", &
i, &j, &k, &value) != 4) {
310 assert(!
"parse_grid failed");
316 double *hab_ref_storage = calloc(n12,
sizeof(
double));
317 if (hab_ref_storage == NULL) {
318 fprintf(stderr,
"Error: Could not allocate hab_ref buffer.\n");
321 double(*hab_ref)[n1] = (double(*)[n1])hab_ref_storage;
322 for (
int i = o2;
i < ncoset(lb_max) + o2;
i++) {
323 for (
int j = o1; j < ncoset(la_max) + o1; j++) {
325 snprintf(format,
sizeof(format),
"hab %i %i %%lf",
i, j);
326 if (sscanf(line, format, &hab_ref[
i][j]) != 1) {
327 assert(!
"parse_hab failed");
332 double forces_ref[2][3];
336 double virial_ref[3][3];
339 char footer_line[100];
341 if (strcmp(footer_line,
"#THE_END\n") != 0) {
342 fprintf(stderr,
"Error: Wrong footer line.\n");
346 if (fclose(fp) != 0) {
347 fprintf(stderr,
"Could not close task file: %s\n", filename);
352 offload_create_buffer(npts_local_total, &grid_test);
353 double *hab_test_storage = calloc(n12,
sizeof(
double));
354 if (hab_test_storage == NULL) {
355 fprintf(stderr,
"Error: Could not allocate hab_test buffer.\n");
358 double(*hab_test)[n1] = (double(*)[n1])hab_test_storage;
359 double forces_test[2][3];
360 double virial_test[3][3];
361 double start_time, end_time;
369 orthorhombic, border_mask, ra, rab, radius, basisa, basisb, o1, o2,
370 la_max, lb_max, cycles, cycles_per_block, (
const int(*)[3])npts_global,
371 (
const int(*)[3])
npts_local, (
const int(*)[3])shift_local,
372 (
const int(*)[3])border_width, (
const double(*)[3][3])dh,
373 (
const double(*)[3][3])dh_inv, &task_list);
375 offload_create_buffer(n1 * n2, &pab_blocks);
376 offload_create_buffer(n1 * n2, &hab_blocks);
377 const double f = (collocate) ? rscale : 1.0;
378 for (
int i = 0;
i < n1;
i++) {
379 for (
int j = 0; j < n2; j++) {
383 start_time = omp_get_wtime();
384 const int nlevels = 1;
385 const int natoms = 2;
389 grid_collocate_task_list(task_list, func, nlevels,
390 (
const int(*)[3])
npts_local, pab_blocks, grids);
394 grid_integrate_task_list(task_list, compute_tau, natoms, nlevels,
395 (
const int(*)[3])
npts_local, pab_blocks, grids,
396 hab_blocks, forces_test, virial_test);
397 for (
int i = 0;
i < n2;
i++) {
398 for (
int j = 0; j < n1; j++) {
399 hab_test[
i][j] = hab_blocks->host_buffer[
i * n1 + j];
403 end_time = omp_get_wtime();
406 grid_free_task_list(task_list);
407 offload_free_buffer(pab_blocks);
408 offload_free_buffer(hab_blocks);
410 start_time = omp_get_wtime();
413 memset(grid_test->
host_buffer, 0, npts_local_total *
sizeof(
double));
414 for (
int i = 0;
i < cycles;
i++) {
416 orthorhombic, border_mask, func, la_max, la_min, lb_max, lb_min,
417 zeta, zetb, rscale, dh, dh_inv, ra, rab, npts_global,
npts_local,
418 shift_local, border_width, radius, o1, o2, n1, n2, pab,
423 memset(hab_test_storage, 0, n12 *
sizeof(
double));
424 memset(forces_test, 0, 2 * 3 *
sizeof(
double));
425 double virials_test[2][3][3] = {0};
426 for (
int i = 0;
i < cycles;
i++) {
428 orthorhombic, compute_tau, border_mask, la_max, la_min, lb_max,
429 lb_min, zeta, zetb, dh, dh_inv, ra, rab, npts_global,
npts_local,
430 shift_local, border_width, radius, o1, o2, n1, n2,
431 grid_ref->
host_buffer, hab_test, pab, forces_test, virials_test,
434 for (
int i = 0;
i < 3;
i++) {
435 for (
int j = 0; j < 3; j++) {
436 virial_test[
i][j] = virials_test[0][
i][j] + virials_test[1][
i][j];
440 end_time = omp_get_wtime();
443 double max_value = 0.0;
444 double max_rel_diff = 0.0;
445 const double derivatives_precision = 1e-4;
449 for (
int i = 0;
i < npts_local_total;
i++) {
450 const double ref_value = cycles * grid_ref->
host_buffer[
i];
452 const double diff = fabs(test_value - ref_value);
453 const double rel_diff = diff / fmax(1.0, fabs(ref_value));
454 max_rel_diff = fmax(max_rel_diff, rel_diff);
455 max_value = fmax(max_value, fabs(test_value));
460 for (
int i = 0;
i < n2;
i++) {
461 for (
int j = 0; j < n1; j++) {
462 const double ref_value = cycles * hab_ref[
i][j];
463 const double test_value = hab_test[
i][j];
464 const double diff = fabs(test_value - ref_value);
465 const double rel_diff = diff / fmax(1.0, fabs(ref_value));
466 max_rel_diff = fmax(max_rel_diff, rel_diff);
467 max_value = fmax(max_value, fabs(test_value));
468 if (rel_diff > tolerance) {
469 printf(
"hab[%i, %i] ref: %le test: %le diff:%le rel_diff: %le\n",
i,
470 j, ref_value, test_value, diff, rel_diff);
475 for (
int i = 0;
i < 2;
i++) {
476 for (
int j = 0; j < 3; j++) {
477 const double ref_value = cycles * forces_ref[
i][j];
478 const double test_value = forces_test[
i][j];
479 const double diff = fabs(test_value - ref_value);
480 const double rel_diff = diff / fmax(1.0, fabs(ref_value));
481 max_rel_diff = fmax(max_rel_diff, rel_diff * derivatives_precision);
482 if (rel_diff * derivatives_precision > tolerance) {
483 printf(
"forces[%i, %i] ref: %le test: %le diff:%le rel_diff: %le\n",
484 i, j, ref_value, test_value, diff, rel_diff);
489 for (
int i = 0;
i < 3;
i++) {
490 for (
int j = 0; j < 3; j++) {
491 const double ref_value = cycles * virial_ref[
i][j];
492 const double test_value = virial_test[
i][j];
493 const double diff = fabs(test_value - ref_value);
494 const double rel_diff = diff / fmax(1.0, fabs(ref_value));
495 max_rel_diff = fmax(max_rel_diff, rel_diff * derivatives_precision);
496 if (rel_diff * derivatives_precision > tolerance) {
497 printf(
"virial[ %i, %i] ref: %le test: %le diff:%le rel_diff: %le\n",
498 i, j, ref_value, test_value, diff, rel_diff);
503 printf(
"Task: %-55s %9s %-7s Cycles: %e Max value: %le "
504 "Max rel diff: %le Time: %le sec\n",
505 filename, collocate ?
"Collocate" :
"Integrate",
506 batch ?
"Batched" :
"PGF-CPU", (float)cycles, max_value, max_rel_diff,
507 end_time - start_time);
509 offload_free_buffer(grid_ref);
510 offload_free_buffer(grid_test);
512 free(hab_ref_storage);
513 free(hab_test_storage);
516 if (fetestexcept(FE_DIVBYZERO) != 0) {
517 fprintf(stderr,
"Error: Floating point exception FE_DIVBYZERO.\n");
520 if (fetestexcept(FE_OVERFLOW) != 0) {
521 fprintf(stderr,
"Error: Floating point exception FE_OVERFLOW.\n");
525 return max_rel_diff < tolerance;
void grid_cpu_collocate_pgf_product(const bool orthorhombic, const int border_mask, const enum grid_func func, const int la_max, const int la_min, const int lb_max, const int lb_min, const double zeta, const double zetb, const double rscale, const double dh[3][3], const double dh_inv[3][3], const double ra[3], const double rab[3], const int npts_global[3], const int npts_local[3], const int shift_local[3], const int border_width[3], const double radius, const int o1, const int o2, const int n1, const int n2, const double pab[n2][n1], double *grid)
Public entry point. A thin wrapper with the only purpose of calling write_task_file when DUMP_TASKS =...
void grid_cpu_integrate_pgf_product(const bool orthorhombic, const bool compute_tau, const int border_mask, const int la_max, const int la_min, const int lb_max, const int lb_min, const double zeta, const double zetb, const double dh[3][3], const double dh_inv[3][3], const double ra[3], const double rab[3], const int npts_global[3], const int npts_local[3], const int shift_local[3], const int border_width[3], const double radius, const int o1, const int o2, const int n1, const int n2, const double *grid, double hab[n2][n1], const double pab[n2][n1], double forces[2][3], double virials[2][3][3], double hdab[n2][n1][3], double hadb[n2][n1][3], double a_hdab[n2][n1][3][3])
Integrates a single task. See grid_cpu_integrate.h for details.