23void grid_create_task_list(
24 const bool orthorhombic,
const int ntasks,
const int nlevels,
25 const int natoms,
const int nkinds,
const int nblocks,
26 const int block_offsets[nblocks],
const double atom_positions[natoms][3],
27 const int atom_kinds[natoms],
const grid_basis_set *basis_sets[nkinds],
28 const int level_list[ntasks],
const int iatom_list[ntasks],
29 const int jatom_list[ntasks],
const int iset_list[ntasks],
30 const int jset_list[ntasks],
const int ipgf_list[ntasks],
31 const int jpgf_list[ntasks],
const int border_mask_list[ntasks],
32 const int block_num_list[ntasks],
const double radius_list[ntasks],
33 const double rab_list[ntasks][3],
const int npts_global[nlevels][3],
34 const int npts_local[nlevels][3],
const int shift_local[nlevels][3],
35 const int border_width[nlevels][3],
const double dh[nlevels][3][3],
36 const double dh_inv[nlevels][3][3],
grid_task_list **task_list_out) {
42 if (*task_list_out == NULL) {
45 assert(task_list != NULL);
47 *task_list_out = task_list;
51#if (defined(__OFFLOAD_CUDA) || defined(__OFFLOAD_HIP)) && \
52 !defined(__NO_OFFLOAD_GRID)
65 if ((nblocks == 0) || (ntasks == 0) || (nlevels == 0)) {
66 task_list->
empty =
true;
69 task_list->
empty =
false;
72 size_t size = (size_t)nlevels * 3 *
sizeof(
int);
74 if (task_list->
nlevels < nlevels) {
86 orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
87 atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
88 jatom_list, iset_list, jset_list, ipgf_list, jpgf_list, border_mask_list,
89 block_num_list, radius_list, rab_list, npts_global,
npts_local,
90 shift_local, border_width, dh, dh_inv, &task_list->
ref);
98 orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
99 atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
100 jatom_list, iset_list, jset_list, ipgf_list, jpgf_list,
101 border_mask_list, block_num_list, radius_list, rab_list, npts_global,
102 npts_local, shift_local, border_width, dh, dh_inv, &task_list->
cpu);
106 orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
107 atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
108 jatom_list, iset_list, jset_list, ipgf_list, jpgf_list,
109 border_mask_list, block_num_list, radius_list, rab_list, npts_global,
114#if (defined(__OFFLOAD_CUDA) || defined(__OFFLOAD_HIP)) && \
115 !defined(__NO_OFFLOAD_GRID)
117 orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
118 &atom_positions[0][0], atom_kinds, basis_sets, level_list, iatom_list,
119 jatom_list, iset_list, jset_list, ipgf_list, jpgf_list,
120 border_mask_list, block_num_list, radius_list, &rab_list[0][0],
121 &npts_global[0][0], &
npts_local[0][0], &shift_local[0][0],
122 &border_width[0][0], &dh[0][0][0], &dh_inv[0][0][0], &task_list->gpu);
124 fprintf(stderr,
"Error: The GPU grid backend is not available. "
125 "Please re-compile with -D__OFFLOAD");
131 printf(
"Error: Unknown grid backend: %i.\n",
config.
backend);
147 if (task_list->
ref != NULL) {
149 task_list->
ref = NULL;
151 if (task_list->
cpu != NULL) {
153 task_list->
cpu = NULL;
155 if (task_list->
dgemm != NULL) {
157 task_list->
dgemm = NULL;
159#if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_GRID)
160 if (task_list->gpu != NULL) {
162 task_list->gpu = NULL;
176 const enum grid_func func,
const int nlevels,
185 if (task_list->
empty) {
186 for (
int level = 0; level < nlevels; level++)
187 memset(grids[level]->host_buffer, 0, grids[level]->size);
192 assert(task_list->
nlevels == nlevels);
193 for (
int ilevel = 0; ilevel < nlevels; ilevel++) {
212#if (defined(__OFFLOAD_CUDA) || defined(__OFFLOAD_HIP)) && \
213 !defined(__NO_OFFLOAD_GRID)
220 printf(
"Error: Unknown grid backend: %i.\n", task_list->
backend);
229 for (
int level = 0; level < nlevels; level++) {
230 const int npts_local_total =
232 grids_ref[level] = NULL;
233 offload_create_buffer(npts_local_total, &grids_ref[level]);
241 const double tolerance = 1e-12;
242 double max_rel_diff = 0.0;
243 for (
int level = 0; level < nlevels; level++) {
245 for (
int j = 0; j <
npts_local[level][1]; j++) {
246 for (
int k = 0; k <
npts_local[level][2]; k++) {
250 const double test_value = grids[level]->host_buffer[
idx];
251 const double diff = fabs(test_value - ref_value);
252 const double rel_diff = diff / fmax(1.0, fabs(ref_value));
253 max_rel_diff = fmax(max_rel_diff, rel_diff);
254 if (rel_diff > tolerance) {
255 fprintf(stderr,
"Error: Validation failure in grid collocate\n");
256 fprintf(stderr,
" diff: %le\n", diff);
257 fprintf(stderr,
" rel_diff: %le\n", rel_diff);
258 fprintf(stderr,
" value: %le\n", ref_value);
259 fprintf(stderr,
" level: %i\n", level);
260 fprintf(stderr,
" ijk: %i %i %i\n",
i, j, k);
266 offload_free_buffer(grids_ref[level]);
267 printf(
"Validated grid collocate, max rel. diff: %le\n", max_rel_diff);
278 const int natoms,
const int nlevels,
283 double forces[natoms][3],
double virial[3][3]) {
290 if (task_list->
empty) {
305 forces[
atom][0] = 0.0;
306 forces[
atom][1] = 0.0;
307 forces[
atom][2] = 0.0;
314 assert(task_list->
nlevels == nlevels);
315 for (
int ilevel = 0; ilevel < nlevels; ilevel++) {
321 assert(forces == NULL || pab_blocks != NULL);
322 assert(virial == NULL || pab_blocks != NULL);
325#if (defined(__OFFLOAD_CUDA) || defined(__OFFLOAD_HIP)) && \
326 !defined(__NO_OFFLOAD_GRID)
329 pab_blocks, grids, hab_blocks, &forces[0][0],
335 nlevels, pab_blocks, grids, hab_blocks,
340 pab_blocks, grids, hab_blocks, forces, virial);
344 pab_blocks, grids, hab_blocks, forces, virial);
347 printf(
"Error: Unknown grid backend: %i.\n", task_list->
backend);
355 const int hab_length = hab_blocks->
size /
sizeof(double);
357 offload_create_buffer(hab_length, &hab_blocks_ref);
358 double forces_ref[natoms][3], virial_ref[3][3];
362 pab_blocks, grids, hab_blocks_ref,
363 (forces != NULL) ? forces_ref : NULL,
364 (virial != NULL) ? virial_ref : NULL);
367 const double hab_tolerance = 1e-12;
368 double hab_max_rel_diff = 0.0;
369 for (
int i = 0;
i < hab_length;
i++) {
370 const double ref_value = hab_blocks_ref->
host_buffer[
i];
372 const double diff = fabs(test_value - ref_value);
373 const double rel_diff = diff / fmax(1.0, fabs(ref_value));
374 hab_max_rel_diff = fmax(hab_max_rel_diff, rel_diff);
375 if (rel_diff > hab_tolerance) {
376 fprintf(stderr,
"Error: Validation failure in grid integrate\n");
377 fprintf(stderr,
" hab diff: %le\n", diff);
378 fprintf(stderr,
" hab rel_diff: %le\n", rel_diff);
379 fprintf(stderr,
" hab value: %le\n", ref_value);
380 fprintf(stderr,
" hab i: %i\n",
i);
386 const double forces_tolerance = 1e-8;
387 double forces_max_rel_diff = 0.0;
388 if (forces != NULL) {
389 for (
int iatom = 0; iatom < natoms; iatom++) {
390 for (
int idir = 0; idir < 3; idir++) {
391 const double ref_value = forces_ref[iatom][idir];
392 const double test_value = forces[iatom][idir];
393 const double diff = fabs(test_value - ref_value);
394 const double rel_diff = diff / fmax(1.0, fabs(ref_value));
395 forces_max_rel_diff = fmax(forces_max_rel_diff, rel_diff);
396 if (rel_diff > forces_tolerance) {
397 fprintf(stderr,
"Error: Validation failure in grid integrate\n");
398 fprintf(stderr,
" forces diff: %le\n", diff);
399 fprintf(stderr,
" forces rel_diff: %le\n", rel_diff);
400 fprintf(stderr,
" forces value: %le\n", ref_value);
401 fprintf(stderr,
" forces atom: %i\n", iatom);
402 fprintf(stderr,
" forces dir: %i\n", idir);
410 const double virial_tolerance = 1e-8;
411 double virial_max_rel_diff = 0.0;
412 if (virial != NULL) {
413 for (
int i = 0;
i < 3;
i++) {
414 for (
int j = 0; j < 3; j++) {
415 const double ref_value = virial_ref[
i][j];
416 const double test_value = virial[
i][j];
417 const double diff = fabs(test_value - ref_value);
418 const double rel_diff = diff / fmax(1.0, fabs(ref_value));
419 virial_max_rel_diff = fmax(virial_max_rel_diff, rel_diff);
420 if (rel_diff > virial_tolerance) {
421 fprintf(stderr,
"Error: Validation failure in grid integrate\n");
422 fprintf(stderr,
" virial diff: %le\n", diff);
423 fprintf(stderr,
" virial rel_diff: %le\n", rel_diff);
424 fprintf(stderr,
" virial value: %le\n", ref_value);
425 fprintf(stderr,
" virial ij: %i %i\n",
i, j);
432 printf(
"Validated grid_integrate, max rel. diff: %le %le %le\n",
433 hab_max_rel_diff, forces_max_rel_diff, virial_max_rel_diff);
434 offload_free_buffer(hab_blocks_ref);
static GRID_HOST_DEVICE int idx(const orbital a)
Return coset index of given orbital angular momentum.
static void const int const int i
static void const int const int const int const int const int const double const int const int const int npts_local[3]
void grid_cpu_collocate_task_list(const grid_cpu_task_list *ptr, const enum grid_func func, const int nlevels, const offload_buffer *pab_blocks, offload_buffer *grids[nlevels])
Collocate all tasks of in given list onto given grids. See grid_task_list.h for details.
void grid_cpu_free_task_list(grid_cpu_task_list *ptr)
Deallocates given task list, basis_sets have to be freed separately.
void grid_cpu_create_task_list(const bool orthorhombic, const int ntasks, const int nlevels, const int natoms, const int nkinds, const int nblocks, const int block_offsets[nblocks], const double atom_positions[natoms][3], const int atom_kinds[natoms], const grid_basis_set *basis_sets[nkinds], const int level_list[ntasks], const int iatom_list[ntasks], const int jatom_list[ntasks], const int iset_list[ntasks], const int jset_list[ntasks], const int ipgf_list[ntasks], const int jpgf_list[ntasks], const int border_mask_list[ntasks], const int block_num_list[ntasks], const double radius_list[ntasks], const double rab_list[ntasks][3], const int npts_global[nlevels][3], const int npts_local[nlevels][3], const int shift_local[nlevels][3], const int border_width[nlevels][3], const double dh[nlevels][3][3], const double dh_inv[nlevels][3][3], grid_cpu_task_list **task_list_out)
Allocates a task list for the cpu backend. See grid_task_list.h for details.
void grid_cpu_integrate_task_list(const grid_cpu_task_list *ptr, const bool compute_tau, const int natoms, const int nlevels, const offload_buffer *pab_blocks, const offload_buffer *grids[nlevels], offload_buffer *hab_blocks, double forces[natoms][3], double virial[3][3])
Integrate all tasks of in given list from given grids. See grid_task_list.h for details.
void grid_dgemm_collocate_task_list(grid_dgemm_task_list *const ptr, const enum grid_func func, const int nlevels, const offload_buffer *pab_blocks, offload_buffer *grids[nlevels])
Collocate all tasks of a given list onto given grids. See grid_task_list.h for details.
void grid_dgemm_create_task_list(const bool orthorhombic, const int ntasks, const int nlevels, const int natoms, const int nkinds, const int nblocks, const int block_offsets[nblocks], const double atom_positions[natoms][3], const int atom_kinds[natoms], const grid_basis_set *basis_sets[nkinds], const int level_list[ntasks], const int iatom_list[ntasks], const int jatom_list[ntasks], const int iset_list[ntasks], const int jset_list[ntasks], const int ipgf_list[ntasks], const int jpgf_list[ntasks], const int border_mask_list[ntasks], const int block_num_list[ntasks], const double radius_list[ntasks], const double rab_list[ntasks][3], const int npts_global[nlevels][3], const int npts_local[nlevels][3], const int shift_local[nlevels][3], const int border_width[nlevels][3], const double dh[nlevels][3][3], const double dh_inv[nlevels][3][3], grid_dgemm_task_list **task_list)
Allocates a task list for the dgemm backend. See grid_task_list.h for details.
void grid_dgemm_free_task_list(grid_dgemm_task_list *task_list)
Deallocates given task list, basis_sets have to be freed separately.
void grid_dgemm_integrate_task_list(void *ptr, const bool compute_tau, const int natoms, const int nlevels, const offload_buffer *const pab_blocks, offload_buffer *grids[nlevels], offload_buffer *hab_blocks, double forces[natoms][3], double virial[3][3])
Integrate all tasks of in given list from given grids using matrix - matrix multiplication.
void grid_gpu_collocate_task_list(const grid_gpu_task_list *ptr, const enum grid_func func, const int nlevels, const offload_buffer *pab_blocks, offload_buffer **grids)
Collocate all tasks of in given list onto given grids.
void grid_gpu_create_task_list(const bool ortho, const int ntasks, const int nlevels, const int natoms, const int nkinds, const int nblocks, const int *block_offsets, const double *atom_positions, const int *atom_kinds, const grid_basis_set **basis_sets, const int *level_list, const int *iatom_list, const int *jatom_list, const int *iset_list, const int *jset_list, const int *ipgf_list, const int *jpgf_list, const int *border_mask_list, const int *block_num_list, const double *radius_list, const double *rab_list, const int *npts_global, const int *npts_local, const int *shift_local, const int *border_width, const double *dh, const double *dh_inv, grid_gpu_task_list **ptr)
Allocates a task list for the GPU backend. See grid_ctx.h for details.
void grid_gpu_free_task_list(grid_gpu_task_list *ptr)
destroy a context
void grid_gpu_integrate_task_list(const grid_gpu_task_list *ptr, const bool compute_tau, const int nlevels, const offload_buffer *pab_blocks, const offload_buffer **grids, offload_buffer *hab_blocks, double *forces, double *virial)
Integrate all tasks of in given list onto given grids. See grid_ctx.h for details.
static grid_library_config config
grid_library_config grid_library_get_config(void)
Returns the library config.
void grid_ref_free_task_list(grid_ref_task_list *ptr)
Deallocates given task list, basis_sets have to be freed separately.
void grid_ref_create_task_list(const bool orthorhombic, const int ntasks, const int nlevels, const int natoms, const int nkinds, const int nblocks, const int block_offsets[nblocks], const double atom_positions[natoms][3], const int atom_kinds[natoms], const grid_basis_set *basis_sets[nkinds], const int level_list[ntasks], const int iatom_list[ntasks], const int jatom_list[ntasks], const int iset_list[ntasks], const int jset_list[ntasks], const int ipgf_list[ntasks], const int jpgf_list[ntasks], const int border_mask_list[ntasks], const int block_num_list[ntasks], const double radius_list[ntasks], const double rab_list[ntasks][3], const int npts_global[nlevels][3], const int npts_local[nlevels][3], const int shift_local[nlevels][3], const int border_width[nlevels][3], const double dh[nlevels][3][3], const double dh_inv[nlevels][3][3], grid_ref_task_list **task_list_out)
Allocates a task list for the reference backend. See grid_task_list.h for details.
void grid_ref_integrate_task_list(const grid_ref_task_list *ptr, const bool compute_tau, const int natoms, const int nlevels, const offload_buffer *pab_blocks, const offload_buffer *grids[nlevels], offload_buffer *hab_blocks, double forces[natoms][3], double virial[3][3])
Integrate all tasks of in given list from given grids. See grid_task_list.h for details.
void grid_ref_collocate_task_list(const grid_ref_task_list *ptr, const enum grid_func func, const int nlevels, const offload_buffer *pab_blocks, offload_buffer *grids[nlevels])
Collocate all tasks of in given list onto given grids. See grid_task_list.h for details.
Internal representation of a basis set.
Configuration of the grid library.
enum grid_backend backend
Internal representation of a task list, abstracting various backends.
grid_dgemm_task_list * dgemm
Internal representation of a buffer.