(git:3add494)
grid_dgemm_context.c
Go to the documentation of this file.
1 /*----------------------------------------------------------------------------*/
2 /* CP2K: A general program to perform molecular dynamics simulations */
3 /* Copyright 2000-2024 CP2K developers group <https://cp2k.org> */
4 /* */
5 /* SPDX-License-Identifier: BSD-3-Clause */
6 /*----------------------------------------------------------------------------*/
7 
8 #include <math.h>
9 #include <omp.h>
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include <string.h>
13 
14 #include "../common/grid_library.h"
15 #include "grid_dgemm_collocate.h"
17 #include "grid_dgemm_context.h"
19 #include "grid_dgemm_task_list.h"
21 #include "grid_dgemm_utils.h"
22 
23 void return_dh(void *const ptr, const int level, double *const dh) {
24  grid_context *const ctx = (grid_context *)ptr;
25 
26  assert(ctx->checksum == ctx_checksum);
27  dh[0] = ctx->grid[level].dh[0][0];
28  dh[1] = ctx->grid[level].dh[0][1];
29  dh[2] = ctx->grid[level].dh[0][2];
30  dh[3] = ctx->grid[level].dh[1][0];
31  dh[4] = ctx->grid[level].dh[1][1];
32  dh[5] = ctx->grid[level].dh[1][2];
33  dh[6] = ctx->grid[level].dh[2][0];
34  dh[7] = ctx->grid[level].dh[2][1];
35  dh[8] = ctx->grid[level].dh[2][2];
36 }
37 
38 void return_dh_inv(void *const ptr, const int level, double *const dh_inv) {
39  grid_context *const ctx = (grid_context *)ptr;
40 
41  assert(ctx->checksum == ctx_checksum);
42  dh_inv[0] = ctx->grid[level].dh_inv[0][0];
43  dh_inv[1] = ctx->grid[level].dh_inv[0][1];
44  dh_inv[2] = ctx->grid[level].dh_inv[0][2];
45  dh_inv[3] = ctx->grid[level].dh_inv[1][0];
46  dh_inv[4] = ctx->grid[level].dh_inv[1][1];
47  dh_inv[5] = ctx->grid[level].dh_inv[1][2];
48  dh_inv[6] = ctx->grid[level].dh_inv[2][0];
49  dh_inv[7] = ctx->grid[level].dh_inv[2][1];
50  dh_inv[8] = ctx->grid[level].dh_inv[2][2];
51 }
52 
53 int return_num_devs(void *const ptr) {
54  grid_context *const ctx = (grid_context *)ptr;
55  assert(ctx->checksum == ctx_checksum);
56 
57  return ctx->number_of_devices;
58 }
59 
60 int return_device_id(void *const ptr, const int device) {
61  grid_context *const ctx = (grid_context *)ptr;
62  assert(ctx->checksum == ctx_checksum);
63 
64  return ctx->device_id[device];
65 }
66 
67 int is_grid_orthorhombic(void *const ptr) {
68  grid_context *const ctx = (grid_context *)ptr;
69  assert(ctx->checksum == ctx_checksum);
70  return ctx->orthorhombic;
71 }
72 
73 void update_queue_length(void *const ptr, const int queue_length) {
74  grid_context *const ctx = (grid_context *)ptr;
75  assert(ctx->checksum == ctx_checksum);
76  ctx->queue_length = queue_length;
77 }
78 
79 void update_atoms_position(const int natoms,
80  const double atoms_positions[natoms][3],
81  grid_context *data) {
82  assert(data != NULL);
83 
84  if (natoms == 0)
85  return;
86 
87  if (data->atom_positions == NULL) {
88  data->atom_positions = malloc(3 * natoms * sizeof(double));
89  } else {
90  if (natoms > data->natoms) {
91  data->atom_positions =
92  realloc(data->atom_positions, 3 * natoms * sizeof(double));
93  }
94  }
95 
96  data->natoms = natoms;
97 
98  if (data->atom_positions) {
99  for (int i = 0; i < natoms; i++) {
100  data->atom_positions[3 * i] = atoms_positions[i][0];
101  data->atom_positions[3 * i + 1] = atoms_positions[i][1];
102  data->atom_positions[3 * i + 2] = atoms_positions[i][2];
103  }
104  }
105 }
106 
107 void update_atoms_kinds(const int natoms, const int *atoms_kinds,
108  grid_context *data) {
109  assert(data != NULL);
110 
111  // data->atom_kinds is a table that give the type of a given atom.
112  if (natoms == 0)
113  return;
114 
115  if (data->atom_kinds == NULL) {
116  data->atom_kinds = malloc(natoms * sizeof(int));
117  } else {
118  if ((natoms > data->natoms) && (data->natoms > 0)) {
119  data->atom_kinds = realloc(data->atom_kinds, natoms * sizeof(int));
120  }
121  }
122  // data->natoms is initialized before calling this function
123  if (data->natoms)
124  memcpy(data->atom_kinds, atoms_kinds, sizeof(int) * natoms);
125 
126  for (int i = 0; i < natoms; i++) {
127  data->atom_kinds[i] -= 1;
128  }
129 }
130 
131 void update_block_offsets(const int nblocks, const int *const block_offsets,
132  grid_context *data) {
133  assert(data != NULL);
134 
135  if (nblocks == 0)
136  return;
137 
138  if (data->block_offsets == NULL) {
139  data->block_offsets = malloc(nblocks * sizeof(int));
140  } else {
141  if ((nblocks > data->nblocks_total) && (data->nblocks_total > 0)) {
142  data->block_offsets = realloc(data->block_offsets, sizeof(int) * nblocks);
143  }
144  }
145 
146  data->nblocks = nblocks;
147  data->nblocks_total = imax(data->nblocks_total, nblocks);
148  if (nblocks)
149  memcpy(data->block_offsets, block_offsets, nblocks * sizeof(int));
150 }
151 
152 void update_basis_set(const int nkinds, const grid_basis_set **const basis_sets,
153  grid_context *data) {
154  if (nkinds > data->nkinds_total) {
155  if (data->basis_sets == NULL) {
156  data->basis_sets = malloc(nkinds * sizeof(grid_basis_set *));
157  } else {
158  data->basis_sets =
159  realloc(data->basis_sets, nkinds * sizeof(grid_basis_set *));
160  }
161  }
162  data->nkinds = nkinds;
163  data->nkinds_total = imax(data->nkinds_total, nkinds);
164  memcpy(data->basis_sets, basis_sets, nkinds * sizeof(grid_basis_set *));
165 }
166 
167 void update_task_lists(const int nlevels, const int ntasks,
168  const int *const level_list, const int *const iatom_list,
169  const int *const jatom_list, const int *const iset_list,
170  const int *const jset_list, const int *const ipgf_list,
171  const int *const jpgf_list,
172  const int *const border_mask_list,
173  const int *block_num_list,
174  const double *const radius_list,
175  const double rab_list[ntasks][3], grid_context *ctx) {
176 
177  assert(ctx->checksum == ctx_checksum);
178 
179  if (nlevels == 0)
180  return;
181 
182  if (ctx->ntasks == 0) {
183  // Count tasks per level.
184  size_t size = nlevels * sizeof(int);
185  ctx->tasks_per_level = malloc(size);
186  ctx->tasks = malloc(nlevels * sizeof(_task *));
187  /* memset(ctx->tasks, 0, nlevels * sizeof(_task *)); */
188  if (ntasks)
189  ctx->tasks[0] = malloc(ntasks * sizeof(_task));
190  else
191  ctx->tasks[0] = NULL;
192  } else {
193  if (ctx->nlevels_total < nlevels) {
194  /* save the address of the full task list. NULL when completly empty */
195  ctx->tasks = realloc(ctx->tasks, nlevels * sizeof(_task *));
196  }
197  if (ctx->ntasks_total < ntasks) {
198  ctx->tasks[0] = realloc(ctx->tasks[0], ntasks * sizeof(_task));
199  }
200  }
201 
202  memset(ctx->tasks_per_level, 0, nlevels * sizeof(int));
203  ctx->nlevels = nlevels;
204  ctx->nlevels_total = imax(ctx->nlevels_total, nlevels);
205  ctx->ntasks_total = imax(ctx->ntasks_total, ntasks);
206  ctx->ntasks = ntasks;
207 
208  for (int i = 0; i < ntasks; i++) {
209  ctx->tasks_per_level[level_list[i] - 1]++;
210  assert(i == 0 || level_list[i] >= level_list[i - 1]); // expect ordered list
211  }
212 
213  for (int i = 1; i < ctx->nlevels; i++) {
214  ctx->tasks[i] = ctx->tasks[i - 1] + ctx->tasks_per_level[i - 1];
215  }
216 
217  int prev_block_num = -1;
218  int prev_iset = -1;
219  int prev_jset = -1;
220  int prev_level = -1;
221  _task *task = ctx->tasks[0];
222  for (int i = 0; i < ntasks; i++) {
223  if (prev_level != (level_list[i] - 1)) {
224  prev_level = level_list[i] - 1;
225  prev_block_num = -1;
226  prev_iset = -1;
227  prev_jset = -1;
228  }
229  task->level = level_list[i] - 1;
230  task->iatom = iatom_list[i] - 1;
231  task->jatom = jatom_list[i] - 1;
232  task->iset = iset_list[i] - 1;
233  task->jset = jset_list[i] - 1;
234  task->ipgf = ipgf_list[i] - 1;
235  task->jpgf = jpgf_list[i] - 1;
236  task->border_mask = border_mask_list[i];
237  task->block_num = block_num_list[i] - 1;
238  task->radius = radius_list[i];
239  task->rab[0] = rab_list[i][0];
240  task->rab[1] = rab_list[i][1];
241  task->rab[2] = rab_list[i][2];
242  const int iatom = task->iatom;
243  const int jatom = task->jatom;
244  const int iset = task->iset;
245  const int jset = task->jset;
246  const int ipgf = task->ipgf;
247  const int jpgf = task->jpgf;
248  const int ikind = ctx->atom_kinds[iatom];
249  const int jkind = ctx->atom_kinds[jatom];
250  const grid_basis_set *ibasis = ctx->basis_sets[ikind];
251  const grid_basis_set *jbasis = ctx->basis_sets[jkind];
252  const int ncoseta = ncoset(ibasis->lmax[iset]);
253  const int ncosetb = ncoset(jbasis->lmax[jset]);
254 
255  task->zeta[0] = ibasis->zet[iset * ibasis->maxpgf + ipgf];
256  task->zeta[1] = jbasis->zet[jset * jbasis->maxpgf + jpgf];
257 
258  const double *ra = &ctx->atom_positions[3 * iatom];
259  const double zetp = task->zeta[0] + task->zeta[1];
260  const double f = task->zeta[1] / zetp;
261  const double rab2 = task->rab[0] * task->rab[0] +
262  task->rab[1] * task->rab[1] +
263  task->rab[2] * task->rab[2];
264 
265  task->prefactor = exp(-task->zeta[0] * f * rab2);
266  task->zetp = zetp;
267 
268  const int block_num = task->block_num;
269 
270  for (int i = 0; i < 3; i++) {
271  task->ra[i] = ra[i];
272  task->rp[i] = ra[i] + f * task->rab[i];
273  task->rb[i] = ra[i] + task->rab[i];
274  }
275 
276  task->lmax[0] = ibasis->lmax[iset];
277  task->lmax[1] = jbasis->lmax[jset];
278  task->lmin[0] = ibasis->lmin[iset];
279  task->lmin[1] = jbasis->lmin[jset];
280 
281  if ((block_num != prev_block_num) || (iset != prev_iset) ||
282  (jset != prev_jset)) {
283  task->update_block_ = true;
284  prev_block_num = block_num;
285  prev_iset = iset;
286  prev_jset = jset;
287  } else {
288  task->update_block_ = false;
289  }
290 
291  task->offset[0] = ipgf * ncoseta;
292  task->offset[1] = jpgf * ncosetb;
293  task++;
294  }
295 
296  // Find largest Cartesian subblock size.
297  ctx->maxco = 0;
298  for (int i = 0; i < ctx->nkinds; i++) {
299  ctx->maxco = imax(ctx->maxco, ctx->basis_sets[i]->maxco);
300  }
301 }
302 
303 void update_layouts(const int nlevels, const int npts_global[nlevels][3],
304  const int npts_local[nlevels][3],
305  const int shift_local[nlevels][3],
306  const int border_width[nlevels][3],
307  const double dh[nlevels][3][3],
308  const double dh_inv[nlevels][3][3], grid_context *ctx) {
309 
310  assert(ctx != NULL);
311  assert(ctx->checksum == ctx_checksum);
312 
313  if (ctx->layouts != NULL) {
314  free(ctx->layouts);
315  }
316 
317  ctx->layouts = malloc(sizeof(_layout) * nlevels);
318 
319  for (int level = 0; level < nlevels; level++) {
320  for (int i = 0; i < 3; i++) {
321  ctx->layouts[level].npts_global[i] = npts_global[level][i];
322  ctx->layouts[level].npts_local[i] = npts_local[level][i];
323  ctx->layouts[level].shift_local[i] = shift_local[level][i];
324  ctx->layouts[level].border_width[i] = border_width[level][i];
325  for (int j = 0; j < 3; j++) {
326  ctx->layouts[level].dh[i][j] = dh[level][i][j];
327  ctx->layouts[level].dh_inv[i][j] = dh_inv[level][i][j];
328  }
329  }
330  }
331 }
332 
333 void update_grid(const int nlevels, grid_context *ctx) {
334  assert(ctx != NULL);
335  assert(ctx->checksum == ctx_checksum);
336 
337  if (nlevels == 0)
338  return;
339 
340  if (ctx->grid == NULL) {
341  ctx->grid = malloc(sizeof(tensor) * nlevels);
342  } else {
343  if (ctx->nlevels_total < nlevels) {
344  ctx->grid = realloc(ctx->grid, sizeof(tensor) * nlevels);
345  }
346  }
347 
348  ctx->nlevels_total = imax(ctx->nlevels_total, nlevels);
349  ctx->nlevels = nlevels;
350 }
351 
353  const bool orthorhombic, const int ntasks, const int nlevels,
354  const int natoms, const int nkinds, const int nblocks,
355  const int *block_offsets, const double atom_positions[natoms][3],
356  const int *const atom_kinds, const grid_basis_set **const basis_sets,
357  const int *const level_list, const int *const iatom_list,
358  const int *jatom_list, const int *const iset_list,
359  const int *const jset_list, const int *const ipgf_list,
360  const int *const jpgf_list, const int *const border_mask_list,
361  const int *block_num_list, const double *const radius_list,
362  const double rab_list[ntasks][3], const int npts_global[nlevels][3],
363  const int npts_local[nlevels][3], const int shift_local[nlevels][3],
364  const int border_width[nlevels][3], const double dh[nlevels][3][3],
365  const double dh_inv[nlevels][3][3]) {
366 
367  grid_context *ctx = malloc(sizeof(grid_context));
368 
369  memset(ctx, 0, sizeof(grid_context));
370 
371  ctx->checksum = ctx_checksum;
372  ctx->orthorhombic = orthorhombic;
373  update_block_offsets(nblocks, block_offsets, ctx);
374  update_atoms_position(natoms, atom_positions, ctx);
375  update_atoms_kinds(natoms, atom_kinds, ctx);
376  update_basis_set(nkinds, basis_sets, ctx);
377  update_task_lists(nlevels, ntasks, level_list, iatom_list, jatom_list,
378  iset_list, jset_list, ipgf_list, jpgf_list,
379  border_mask_list, block_num_list, radius_list, rab_list,
380  ctx);
381  update_layouts(nlevels, npts_global, npts_local, shift_local, border_width,
382  dh, dh_inv, ctx);
383  update_grid(nlevels, ctx);
384 
385  const int max_threads = omp_get_max_threads();
386 
387  ctx->handler =
388  malloc(sizeof(struct collocation_integration_ *) * max_threads);
389 
390  for (int i = 0; i < max_threads; i++) {
392  }
393 
395 
396  return ctx;
397 }
398 
400  const bool orthorhombic, const int ntasks, const int nlevels,
401  const int natoms, const int nkinds, const int nblocks,
402  const int *block_offsets, const double atom_positions[natoms][3],
403  const int *const atom_kinds, const grid_basis_set **const basis_sets,
404  const int *const level_list, const int *const iatom_list,
405  const int *jatom_list, const int *const iset_list,
406  const int *const jset_list, const int *const ipgf_list,
407  const int *const jpgf_list, const int *const border_mask_list,
408  const int *block_num_list, const double *const radius_list,
409  const double rab_list[ntasks][3], const int npts_global[nlevels][3],
410  const int npts_local[nlevels][3], const int shift_local[nlevels][3],
411  const int border_width[nlevels][3], const double dh[nlevels][3][3],
412  const double dh_inv[nlevels][3][3], void *ptr) {
413 
414  assert(ptr != NULL);
415  grid_context *ctx = (grid_context *)ptr;
416  assert(ctx->checksum == ctx_checksum);
417 
418  ctx->orthorhombic = orthorhombic;
419  update_block_offsets(nblocks, block_offsets, ctx);
420  update_atoms_position(natoms, atom_positions, ctx);
421  update_atoms_kinds(natoms, atom_kinds, ctx);
422  update_basis_set(nkinds, basis_sets, ctx);
423  update_task_lists(nlevels, ntasks, level_list, iatom_list, jatom_list,
424  iset_list, jset_list, ipgf_list, jpgf_list,
425  border_mask_list, block_num_list, radius_list, rab_list,
426  ctx);
427  update_layouts(nlevels, npts_global, npts_local, shift_local, border_width,
428  dh, dh_inv, ctx);
429  update_grid(nlevels, ctx);
430 
431  // Find largest Cartesian subblock size.
432  ctx->maxco = 0;
433  for (int i = 0; i < nkinds; i++) {
434  ctx->maxco = imax(ctx->maxco, ctx->basis_sets[i]->maxco);
435  }
436 }
437 
438 void initialize_grid_context_on_gpu(void *ptr, const int number_of_devices,
439  const int *device_id) {
440  assert(ptr != NULL);
441  grid_context *ctx = (grid_context *)ptr;
442  assert(ctx->checksum == ctx_checksum);
443  ctx->work_on_gpu = false;
444  if (number_of_devices <= 0) {
445  return;
446  }
447 
448  ctx->number_of_devices = number_of_devices;
449  ctx->queue_length = 8192;
450  if (ctx->device_id == NULL)
451  ctx->device_id = malloc(sizeof(int) * number_of_devices);
452  else
453  ctx->device_id = realloc(ctx->device_id, sizeof(int) * number_of_devices);
454 
455  memcpy(ctx->device_id, device_id, sizeof(int) * number_of_devices);
456 }
457 
458 void destroy_grid_context_dgemm(void *ptr) {
459  assert(ptr);
460  grid_context *ctx = (grid_context *)ptr;
461  assert(ctx->checksum == ctx_checksum);
462  free(ctx->block_offsets);
463  free(ctx->atom_positions);
464  free(ctx->atom_kinds);
465  free(ctx->basis_sets);
466  free(ctx->tasks[0]);
467  free(ctx->tasks);
468  free(ctx->tasks_per_level);
469  free(ctx->layouts);
470  free(ctx->grid);
471  if (ctx->device_id)
472  free(ctx->device_id);
473 
474  if (ctx->handler) {
475  for (int i = 0; i < ctx->number_of_handler; i++) {
477  }
478  free(ctx->handler);
479  }
480 
481  free(ctx);
482 }
483 
484 void apply_cutoff(void *ptr) {
485  assert(ptr);
486  grid_context *ctx = (grid_context *)ptr;
487  assert(ctx->checksum == ctx_checksum);
488  ctx->apply_cutoff = true;
489 }
490 
492  tensor *grid, const bool orthorhombic,
493  const int grid_full_size[3], /* size of the full grid */
494  const int grid_local_size[3], /* size of the local grid block */
495  const int shift_local[3], /* coordinates of the lower coordinates of the
496  local grid window */
497  const int border_width[3], /* width of the borders */
498  const double
499  dh[3][3], /* displacement vectors of the grid (cartesian) -> (ijk) */
500  const double dh_inv[3][3], /* (ijk) -> (x,y,z) */
501  offload_buffer *grid_) {
502  memset(grid, 0, sizeof(tensor));
503  initialize_tensor_3(grid, grid_local_size[2], grid_local_size[1],
504  grid_local_size[0]);
505 
506  grid->data = grid_->host_buffer;
507  grid->ld_ = grid_local_size[0];
508 
509  setup_global_grid_size(grid, &grid_full_size[0]);
510 
511  /* the grid is divided over several ranks or not periodic */
512  if ((grid_local_size[0] != grid_full_size[0]) ||
513  (grid_local_size[1] != grid_full_size[1]) ||
514  (grid_local_size[2] != grid_full_size[2])) {
515  setup_grid_window(grid, shift_local, border_width, 0);
516  } else {
517  grid->window_shift[0] = 0;
518  grid->window_shift[1] = 0;
519  grid->window_shift[2] = 0;
520 
521  grid->window_size[0] = grid->size[0];
522  grid->window_size[1] = grid->size[1];
523  grid->window_size[2] = grid->size[2];
524  }
525 
526  grid->dh[0][0] = dh[0][0];
527  grid->dh[0][1] = dh[0][1];
528  grid->dh[0][2] = dh[0][2];
529  grid->dh[1][0] = dh[1][0];
530  grid->dh[1][1] = dh[1][1];
531  grid->dh[1][2] = dh[1][2];
532  grid->dh[2][0] = dh[2][0];
533  grid->dh[2][1] = dh[2][1];
534  grid->dh[2][2] = dh[2][2];
535 
536  grid->dh_inv[0][0] = dh_inv[0][0];
537  grid->dh_inv[0][1] = dh_inv[0][1];
538  grid->dh_inv[0][2] = dh_inv[0][2];
539  grid->dh_inv[1][0] = dh_inv[1][0];
540  grid->dh_inv[1][1] = dh_inv[1][1];
541  grid->dh_inv[1][2] = dh_inv[1][2];
542  grid->dh_inv[2][0] = dh_inv[2][0];
543  grid->dh_inv[2][1] = dh_inv[2][1];
544  grid->dh_inv[2][2] = dh_inv[2][2];
545 
546  verify_orthogonality(dh, grid->orthogonal);
547 
548  if (orthorhombic) {
549  grid->orthogonal[0] = true;
550  grid->orthogonal[1] = true;
551  grid->orthogonal[2] = true;
552  }
553 }
554 
555 /*******************************************************************************
556  * \brief Allocates a task list for the dgemm backend.
557  * See grid_task_list.h for details.
558  ******************************************************************************/
560  const bool orthorhombic, const int ntasks, const int nlevels,
561  const int natoms, const int nkinds, const int nblocks,
562  const int block_offsets[nblocks], const double atom_positions[natoms][3],
563  const int atom_kinds[natoms], const grid_basis_set *basis_sets[nkinds],
564  const int level_list[ntasks], const int iatom_list[ntasks],
565  const int jatom_list[ntasks], const int iset_list[ntasks],
566  const int jset_list[ntasks], const int ipgf_list[ntasks],
567  const int jpgf_list[ntasks], const int border_mask_list[ntasks],
568  const int block_num_list[ntasks], const double radius_list[ntasks],
569  const double rab_list[ntasks][3], const int npts_global[nlevels][3],
570  const int npts_local[nlevels][3], const int shift_local[nlevels][3],
571  const int border_width[nlevels][3], const double dh[nlevels][3][3],
572  const double dh_inv[nlevels][3][3], grid_dgemm_task_list **task_list) {
573 
574  if (*task_list == NULL) {
575  *task_list = create_grid_context_dgemm(
576  orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
577  atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
578  jatom_list, iset_list, jset_list, ipgf_list, jpgf_list,
579  border_mask_list, block_num_list, radius_list, rab_list, npts_global,
580  npts_local, shift_local, border_width, dh, dh_inv);
581  } else {
583  orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
584  atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
585  jatom_list, iset_list, jset_list, ipgf_list, jpgf_list,
586  border_mask_list, block_num_list, radius_list, rab_list, npts_global,
587  npts_local, shift_local, border_width, dh, dh_inv, *task_list);
588  }
589 
591  if (config.apply_cutoff) {
592  apply_cutoff(*task_list);
593  }
594 }
595 
596 /*******************************************************************************
597  * \brief Deallocates given task list, basis_sets have to be freed separately.
598  ******************************************************************************/
600  destroy_grid_context_dgemm(task_list);
601 }
static int imax(int x, int y)
Returns the larger of two given integer (missing from the C standard)
static int max_threads
Definition: dbm_library.c:24
static GRID_HOST_DEVICE int ncoset(const int l)
Number of Cartesian orbitals up to given angular momentum quantum.
Definition: grid_common.h:73
static void const int const int const int const int const int const double const int const int const int int GRID_CONST_WHEN_COLLOCATE double GRID_CONST_WHEN_INTEGRATE double * grid
static void const int const int i
static void const int const int const int const int const int const double const int const int const int npts_local[3]
struct collocation_integration_ * collocate_create_handle(void)
void collocate_destroy_handle(void *gaussian_handle)
void update_layouts(const int nlevels, const int npts_global[nlevels][3], const int npts_local[nlevels][3], const int shift_local[nlevels][3], const int border_width[nlevels][3], const double dh[nlevels][3][3], const double dh_inv[nlevels][3][3], grid_context *ctx)
void update_grid(const int nlevels, grid_context *ctx)
void update_atoms_kinds(const int natoms, const int *atoms_kinds, grid_context *data)
void update_grid_context_dgemm(const bool orthorhombic, const int ntasks, const int nlevels, const int natoms, const int nkinds, const int nblocks, const int *block_offsets, const double atom_positions[natoms][3], const int *const atom_kinds, const grid_basis_set **const basis_sets, const int *const level_list, const int *const iatom_list, const int *jatom_list, const int *const iset_list, const int *const jset_list, const int *const ipgf_list, const int *const jpgf_list, const int *const border_mask_list, const int *block_num_list, const double *const radius_list, const double rab_list[ntasks][3], const int npts_global[nlevels][3], const int npts_local[nlevels][3], const int shift_local[nlevels][3], const int border_width[nlevels][3], const double dh[nlevels][3][3], const double dh_inv[nlevels][3][3], void *ptr)
int is_grid_orthorhombic(void *const ptr)
void update_block_offsets(const int nblocks, const int *const block_offsets, grid_context *data)
int return_device_id(void *const ptr, const int device)
void destroy_grid_context_dgemm(void *ptr)
void return_dh(void *const ptr, const int level, double *const dh)
void update_task_lists(const int nlevels, const int ntasks, const int *const level_list, const int *const iatom_list, const int *const jatom_list, const int *const iset_list, const int *const jset_list, const int *const ipgf_list, const int *const jpgf_list, const int *const border_mask_list, const int *block_num_list, const double *const radius_list, const double rab_list[ntasks][3], grid_context *ctx)
void apply_cutoff(void *ptr)
int return_num_devs(void *const ptr)
void return_dh_inv(void *const ptr, const int level, double *const dh_inv)
void update_basis_set(const int nkinds, const grid_basis_set **const basis_sets, grid_context *data)
void grid_dgemm_create_task_list(const bool orthorhombic, const int ntasks, const int nlevels, const int natoms, const int nkinds, const int nblocks, const int block_offsets[nblocks], const double atom_positions[natoms][3], const int atom_kinds[natoms], const grid_basis_set *basis_sets[nkinds], const int level_list[ntasks], const int iatom_list[ntasks], const int jatom_list[ntasks], const int iset_list[ntasks], const int jset_list[ntasks], const int ipgf_list[ntasks], const int jpgf_list[ntasks], const int border_mask_list[ntasks], const int block_num_list[ntasks], const double radius_list[ntasks], const double rab_list[ntasks][3], const int npts_global[nlevels][3], const int npts_local[nlevels][3], const int shift_local[nlevels][3], const int border_width[nlevels][3], const double dh[nlevels][3][3], const double dh_inv[nlevels][3][3], grid_dgemm_task_list **task_list)
Allocates a task list for the dgemm backend. See grid_task_list.h for details.
void update_atoms_position(const int natoms, const double atoms_positions[natoms][3], grid_context *data)
void grid_dgemm_free_task_list(grid_dgemm_task_list *task_list)
Deallocates given task list, basis_sets have to be freed separately.
void * create_grid_context_dgemm(const bool orthorhombic, const int ntasks, const int nlevels, const int natoms, const int nkinds, const int nblocks, const int *block_offsets, const double atom_positions[natoms][3], const int *const atom_kinds, const grid_basis_set **const basis_sets, const int *const level_list, const int *const iatom_list, const int *jatom_list, const int *const iset_list, const int *const jset_list, const int *const ipgf_list, const int *const jpgf_list, const int *const border_mask_list, const int *block_num_list, const double *const radius_list, const double rab_list[ntasks][3], const int npts_global[nlevels][3], const int npts_local[nlevels][3], const int shift_local[nlevels][3], const int border_width[nlevels][3], const double dh[nlevels][3][3], const double dh_inv[nlevels][3][3])
void update_queue_length(void *const ptr, const int queue_length)
void set_grid_parameters(tensor *grid, const bool orthorhombic, const int grid_full_size[3], const int grid_local_size[3], const int shift_local[3], const int border_width[3], const double dh[3][3], const double dh_inv[3][3], offload_buffer *grid_)
void initialize_grid_context_on_gpu(void *ptr, const int number_of_devices, const int *device_id)
static void setup_grid_window(tensor *const grid, const int *const shift_local, const int *const border_width, const int border_mask)
static void setup_global_grid_size(tensor *const grid, const int *const full_size)
static void initialize_tensor_3(struct tensor_ *a, int n1, int n2, int n3)
void verify_orthogonality(const double dh[3][3], bool orthogonal[3])
static grid_library_config config
Definition: grid_library.c:33
grid_library_config grid_library_get_config(void)
Returns the library config.
Definition: grid_library.c:123
double dh_inv[3][3]
Internal representation of a basis set.
grid_basis_set ** basis_sets
struct collocation_integration_ ** handler
Configuration of the grid library.
Definition: grid_library.h:34
Internal representation of a buffer.
double * host_buffer
double dh[3][3]
double dh_inv[3][3]