19 #include "../offload/offload_buffer.h"
32 if (fgets(line, length, fp) == NULL) {
33 fprintf(stderr,
"Error: Could not read line.\n");
43 const int nargs, ...) {
47 char full_format[100];
48 strcpy(full_format, key);
49 strcat(full_format,
" ");
50 strcat(full_format, format);
53 va_start(varargs, nargs);
54 if (vsscanf(line, full_format, varargs) != nargs) {
55 fprintf(stderr,
"Error: Could not parse line.\n");
56 fprintf(stderr,
"Line: %s\n", line);
57 fprintf(stderr,
"Format: %s\n", full_format);
77 static void parse_int3(
const char key[], FILE *fp,
int vec[3]) {
105 for (
int i = 0;
i < 3;
i++) {
106 sprintf(format,
"%i %%le %%le %%le",
i);
116 const int lmax,
const double zet,
119 double sphi_mutable[size][size];
120 for (
int i = 0;
i < size;
i++) {
121 for (
int j = 0; j < size; j++) {
122 sphi_mutable[
i][j] = (
i == j) ? 1.0 : 0.0;
125 const double(*sphi)[size] = (
const double(*)[size])sphi_mutable;
130 const int first_sgf[1] = {1};
132 double zet_array_mutable[1][npgf];
133 for (
int i = 0;
i < npgf;
i++) {
134 zet_array_mutable[0][
i] = zet;
136 const double(*zet_array)[npgf] = (
const double(*)[npgf])zet_array_mutable;
148 zet_array, basis_set);
156 const bool orthorhombic,
const int border_mask,
const double ra[3],
157 const double rab[3],
const double radius,
const grid_basis_set *basis_set_a,
159 const int la_max,
const int lb_max,
const int cycles,
160 const int cycles_per_block,
const int npts_global[][3],
161 const int npts_local[][3],
const int shift_local[][3],
162 const int border_width[][3],
const double dh[][3][3],
165 const int ntasks = cycles;
166 const int nlevels = 1;
167 const int natoms = 2;
168 const int nkinds = 2;
169 int nblocks = cycles / cycles_per_block + 1;
176 int block_offsets[nblocks];
177 memset(block_offsets, 0, nblocks *
sizeof(
int));
178 const double atom_positions[2][3] = {
179 {ra[0], ra[1], ra[2]}, {rab[0] + ra[0], rab[1] + ra[1], rab[2] + ra[2]}};
180 const int atom_kinds[2] = {1, 2};
182 const int ipgf = o1 /
ncoset(la_max) + 1;
183 const int jpgf = o2 /
ncoset(lb_max) + 1;
184 assert(o1 == (ipgf - 1) *
ncoset(la_max));
185 assert(o2 == (jpgf - 1) *
ncoset(lb_max));
187 int level_list[ntasks], iatom_list[ntasks], jatom_list[ntasks];
188 int iset_list[ntasks], jset_list[ntasks], ipgf_list[ntasks],
190 int border_mask_list[ntasks], block_num_list[ntasks];
191 double radius_list[ntasks], rab_list_mutable[ntasks][3];
192 for (
int i = 0;
i < cycles;
i++) {
200 border_mask_list[
i] = border_mask;
201 block_num_list[
i] =
i / cycles_per_block + 1;
202 radius_list[
i] = radius;
203 rab_list_mutable[
i][0] = rab[0];
204 rab_list_mutable[
i][1] = rab[1];
205 rab_list_mutable[
i][2] = rab[2];
207 const double(*rab_list)[3] = (
const double(*)[3])rab_list_mutable;
210 orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
211 atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
212 jatom_list, iset_list, jset_list, ipgf_list, jpgf_list, border_mask_list,
213 block_num_list, radius_list, rab_list, npts_global,
npts_local,
214 shift_local, border_width, dh, dh_inv, task_list);
222 bool grid_replay(
const char *filename,
const int cycles,
const bool collocate,
223 const bool batch,
const int cycles_per_block,
224 const double tolerance) {
227 fprintf(stderr,
"Error: Cycles have to be greater than zero.\n");
231 if (cycles_per_block < 1 || cycles_per_block > cycles) {
233 "Error: Cycles per block has to be between 1 and cycles.\n");
237 FILE *fp = fopen(filename,
"r");
239 fprintf(stderr,
"Could not open task file: %s\n", filename);
243 char header_line[100];
245 if (strcmp(header_line,
"#Grid task v10\n") != 0) {
246 fprintf(stderr,
"Error: Wrong file header.\n");
250 const bool orthorhombic =
parse_int(
"orthorhombic", fp);
251 const int border_mask =
parse_int(
"border_mask", fp);
254 const int la_max =
parse_int(
"la_max", fp);
255 const int la_min =
parse_int(
"la_min", fp);
256 const int lb_max =
parse_int(
"lb_max", fp);
257 const int lb_min =
parse_int(
"lb_min", fp);
262 double dh_mutable[3][3], dh_inv_mutable[3][3], ra[3], rab[3];
267 const double(*dh)[3] = (
const double(*)[3])dh_mutable;
268 const double(*dh_inv)[3] = (
const double(*)[3])dh_inv_mutable;
270 int npts_global[3],
npts_local[3], shift_local[3], border_width[3];
282 double pab_mutable[n2][n1];
284 for (
int i = 0;
i < n2;
i++) {
285 for (
int j = 0; j < n1; j++) {
286 sprintf(format,
"%i %i %%le",
i, j);
290 const double(*pab)[n1] = (
const double(*)[n1])pab_mutable;
295 memset(grid_ref->
host_buffer, 0, npts_local_total *
sizeof(
double));
297 const int ngrid_nonzero =
parse_int(
"ngrid_nonzero", fp);
298 for (
int n = 0; n < ngrid_nonzero; n++) {
306 double hab_ref[n2][n1];
307 memset(hab_ref, 0, n2 * n1 *
sizeof(
double));
308 for (
int i = o2;
i <
ncoset(lb_max) + o2;
i++) {
309 for (
int j = o1; j <
ncoset(la_max) + o1; j++) {
310 sprintf(format,
"%i %i %%le",
i, j);
315 double forces_ref[2][3];
319 double virial_ref[3][3];
322 char footer_line[100];
324 if (strcmp(footer_line,
"#THE_END\n") != 0) {
325 fprintf(stderr,
"Error: Wrong footer line.\n");
331 double hab_test[n2][n1];
332 double forces_test[2][3];
333 double virial_test[3][3];
334 double start_time, end_time;
342 orthorhombic, border_mask, ra, rab, radius, basisa, basisb, o1, o2,
343 la_max, lb_max, cycles, cycles_per_block, (
const int(*)[3])npts_global,
344 (
const int(*)[3])
npts_local, (
const int(*)[3])shift_local,
345 (
const int(*)[3])border_width, (
const double(*)[3][3])dh,
346 (
const double(*)[3][3])dh_inv, &task_list);
350 const double f = (collocate) ? rscale : 1.0;
351 for (
int i = 0;
i < n1;
i++) {
352 for (
int j = 0; j < n2; j++) {
356 start_time = omp_get_wtime();
357 const int nlevels = 1;
358 const int natoms = 2;
363 (
const int(*)[3])
npts_local, pab_blocks, grids);
368 (
const int(*)[3])
npts_local, pab_blocks, grids,
369 hab_blocks, forces_test, virial_test);
370 for (
int i = 0;
i < n2;
i++) {
371 for (
int j = 0; j < n1; j++) {
372 hab_test[
i][j] = hab_blocks->host_buffer[
i * n1 + j];
376 end_time = omp_get_wtime();
383 start_time = omp_get_wtime();
386 memset(grid_test->
host_buffer, 0, npts_local_total *
sizeof(
double));
387 for (
int i = 0;
i < cycles;
i++) {
389 orthorhombic, border_mask, func, la_max, la_min, lb_max, lb_min,
391 shift_local, border_width, radius, o1, o2, n1, n2, pab,
396 memset(hab_test, 0, n2 * n1 *
sizeof(
double));
397 memset(forces_test, 0, 2 * 3 *
sizeof(
double));
398 double virials_test[2][3][3] = {0};
399 for (
int i = 0;
i < cycles;
i++) {
401 orthorhombic, compute_tau, border_mask, la_max, la_min, lb_max,
403 shift_local, border_width, radius, o1, o2, n1, n2,
404 grid_ref->
host_buffer, hab_test, pab, forces_test, virials_test,
407 for (
int i = 0;
i < 3;
i++) {
408 for (
int j = 0; j < 3; j++) {
409 virial_test[
i][j] = virials_test[0][
i][j] + virials_test[1][
i][j];
413 end_time = omp_get_wtime();
416 double max_value = 0.0;
417 double max_rel_diff = 0.0;
418 const double derivatives_precision = 1e-4;
422 for (
int i = 0;
i < npts_local_total;
i++) {
423 const double ref_value = cycles * grid_ref->
host_buffer[
i];
425 const double diff = fabs(test_value - ref_value);
426 const double rel_diff = diff / fmax(1.0, fabs(ref_value));
427 max_rel_diff = fmax(max_rel_diff, rel_diff);
428 max_value = fmax(max_value, fabs(test_value));
433 for (
int i = 0;
i < n2;
i++) {
434 for (
int j = 0; j < n1; j++) {
435 const double ref_value = cycles * hab_ref[
i][j];
436 const double test_value = hab_test[
i][j];
437 const double diff = fabs(test_value - ref_value);
438 const double rel_diff = diff / fmax(1.0, fabs(ref_value));
439 max_rel_diff = fmax(max_rel_diff, rel_diff);
440 max_value = fmax(max_value, fabs(test_value));
441 if (rel_diff > tolerance) {
442 printf(
"hab[%i, %i] ref: %le test: %le diff:%le rel_diff: %le\n",
i,
443 j, ref_value, test_value, diff, rel_diff);
448 for (
int i = 0;
i < 2;
i++) {
449 for (
int j = 0; j < 3; j++) {
450 const double ref_value = cycles * forces_ref[
i][j];
451 const double test_value = forces_test[
i][j];
452 const double diff = fabs(test_value - ref_value);
453 const double rel_diff = diff / fmax(1.0, fabs(ref_value));
454 max_rel_diff = fmax(max_rel_diff, rel_diff * derivatives_precision);
455 if (rel_diff * derivatives_precision > tolerance) {
456 printf(
"forces[%i, %i] ref: %le test: %le diff:%le rel_diff: %le\n",
457 i, j, ref_value, test_value, diff, rel_diff);
462 for (
int i = 0;
i < 3;
i++) {
463 for (
int j = 0; j < 3; j++) {
464 const double ref_value = cycles * virial_ref[
i][j];
465 const double test_value = virial_test[
i][j];
466 const double diff = fabs(test_value - ref_value);
467 const double rel_diff = diff / fmax(1.0, fabs(ref_value));
468 max_rel_diff = fmax(max_rel_diff, rel_diff * derivatives_precision);
469 if (rel_diff * derivatives_precision > tolerance) {
470 printf(
"virial[ %i, %i] ref: %le test: %le diff:%le rel_diff: %le\n",
471 i, j, ref_value, test_value, diff, rel_diff);
476 printf(
"Task: %-55s %9s %-7s Cycles: %e Max value: %le "
477 "Max rel diff: %le Time: %le sec\n",
478 filename, collocate ?
"Collocate" :
"Integrate",
479 batch ?
"Batched" :
"PGF-CPU", (
float)cycles, max_value, max_rel_diff,
480 end_time - start_time);
486 if (fetestexcept(FE_DIVBYZERO) != 0) {
487 fprintf(stderr,
"Error: Floating point exception FE_DIVBYZERO.\n");
490 if (fetestexcept(FE_OVERFLOW) != 0) {
491 fprintf(stderr,
"Error: Floating point exception FE_OVERFLOW.\n");
495 return max_rel_diff < tolerance;
void grid_create_basis_set(const int nset, const int nsgf, const int maxco, const int maxpgf, const int lmin[nset], const int lmax[nset], const int npgf[nset], const int nsgf_set[nset], const int first_sgf[nset], const double sphi[nsgf][maxco], const double zet[nset][maxpgf], grid_basis_set **basis_set_out)
Allocates a basis set which can be passed to grid_create_task_list. See grid_task_list....
void grid_free_basis_set(grid_basis_set *basis_set)
Deallocates given basis set.
static GRID_HOST_DEVICE int ncoset(const int l)
Number of Cartesian orbitals up to given angular momentum quantum.
static void const int const int i
static void const int const int const int const int const int const double const int const int const int npts_local[3]
void grid_cpu_collocate_pgf_product(const bool orthorhombic, const int border_mask, const enum grid_func func, const int la_max, const int la_min, const int lb_max, const int lb_min, const double zeta, const double zetb, const double rscale, const double dh[3][3], const double dh_inv[3][3], const double ra[3], const double rab[3], const int npts_global[3], const int npts_local[3], const int shift_local[3], const int border_width[3], const double radius, const int o1, const int o2, const int n1, const int n2, const double pab[n2][n1], double *grid)
Public entry point. A thin wrapper with the only purpose of calling write_task_file when DUMP_TASKS =...
void grid_cpu_integrate_pgf_product(const bool orthorhombic, const bool compute_tau, const int border_mask, const int la_max, const int la_min, const int lb_max, const int lb_min, const double zeta, const double zetb, const double dh[3][3], const double dh_inv[3][3], const double ra[3], const double rab[3], const int npts_global[3], const int npts_local[3], const int shift_local[3], const int border_width[3], const double radius, const int o1, const int o2, const int n1, const int n2, const double *grid, double hab[n2][n1], const double pab[n2][n1], double forces[2][3], double virials[2][3][3], double hdab[n2][n1][3], double hadb[n2][n1][3], double a_hdab[n2][n1][3][3])
Integrates a single task. See grid_cpu_integrate.h for details.
static void parse_double3(const char key[], FILE *fp, double vec[3])
Shorthand for parsing a vector of three double values.
static void parse_double3x3(const char key[], FILE *fp, double mat[3][3])
Shorthand for parsing a 3x3 matrix of doubles.
bool grid_replay(const char *filename, const int cycles, const bool collocate, const bool batch, const int cycles_per_block, const double tolerance)
Reads a .task file, collocates/integrates it, and compares results. See grid_replay....
static void parse_next_line(const char key[], FILE *fp, const char format[], const int nargs,...)
Parses next line from file, expecting it to match "${key} ${format}".
static void parse_int3(const char key[], FILE *fp, int vec[3])
Shorthand for parsing a vector of three integer values.
static void create_dummy_task_list(const bool orthorhombic, const int border_mask, const double ra[3], const double rab[3], const double radius, const grid_basis_set *basis_set_a, const grid_basis_set *basis_set_b, const int o1, const int o2, const int la_max, const int lb_max, const int cycles, const int cycles_per_block, const int npts_global[][3], const int npts_local[][3], const int shift_local[][3], const int border_width[][3], const double dh[][3][3], const double dh_inv[][3][3], grid_task_list **task_list)
Creates mock task list with one task per cycle.
static void create_dummy_basis_set(const int size, const int lmin, const int lmax, const double zet, grid_basis_set **basis_set)
Creates mock basis set using the identity as decontraction matrix.
static void read_next_line(char line[], int length, FILE *fp)
Reads next line from given filehandle and handles errors.
static int parse_int(const char key[], FILE *fp)
Shorthand for parsing a single integer value.
static double parse_double(const char key[], FILE *fp)
Shorthand for parsing a single double value.
subroutine, public grid_collocate_task_list(task_list, ga_gb_function, pab_blocks, rs_grids)
Collocate all tasks of in given list onto given grids.
subroutine, public grid_free_task_list(task_list)
Deallocates given task list, basis_sets have to be freed separately.
subroutine, public grid_create_task_list(ntasks, natoms, nkinds, nblocks, block_offsets, atom_positions, atom_kinds, basis_sets, level_list, iatom_list, jatom_list, iset_list, jset_list, ipgf_list, jpgf_list, border_mask_list, block_num_list, radius_list, rab_list, rs_grids, task_list)
Allocates a task list which can be passed to grid_collocate_task_list.
subroutine, public grid_integrate_task_list(task_list, compute_tau, calculate_forces, calculate_virial, pab_blocks, rs_grids, hab_blocks, forces, virial)
Integrate all tasks of in given list from given grids.
subroutine, public offload_free_buffer(buffer)
Deallocates given buffer.
subroutine, public offload_create_buffer(length, buffer)
Allocates a buffer of given length, ie. number of elements.
Internal representation of a basis set.
Internal representation of a task list, abstracting various backends.
Internal representation of a buffer.