(git:ed6f26b)
Loading...
Searching...
No Matches
grid_dgemm_context.c
Go to the documentation of this file.
1/*----------------------------------------------------------------------------*/
2/* CP2K: A general program to perform molecular dynamics simulations */
3/* Copyright 2000-2025 CP2K developers group <https://cp2k.org> */
4/* */
5/* SPDX-License-Identifier: BSD-3-Clause */
6/*----------------------------------------------------------------------------*/
7
8#include <math.h>
9#include <omp.h>
10#include <stdio.h>
11#include <stdlib.h>
12#include <string.h>
13
14#include "../common/grid_library.h"
17#include "grid_dgemm_context.h"
21#include "grid_dgemm_utils.h"
22
23void return_dh(void *const ptr, const int level, double *const dh) {
24 grid_context *const ctx = (grid_context *)ptr;
25
26 assert(ctx->checksum == ctx_checksum);
27 dh[0] = ctx->grid[level].dh[0][0];
28 dh[1] = ctx->grid[level].dh[0][1];
29 dh[2] = ctx->grid[level].dh[0][2];
30 dh[3] = ctx->grid[level].dh[1][0];
31 dh[4] = ctx->grid[level].dh[1][1];
32 dh[5] = ctx->grid[level].dh[1][2];
33 dh[6] = ctx->grid[level].dh[2][0];
34 dh[7] = ctx->grid[level].dh[2][1];
35 dh[8] = ctx->grid[level].dh[2][2];
36}
37
38void return_dh_inv(void *const ptr, const int level, double *const dh_inv) {
39 grid_context *const ctx = (grid_context *)ptr;
40
41 assert(ctx->checksum == ctx_checksum);
42 dh_inv[0] = ctx->grid[level].dh_inv[0][0];
43 dh_inv[1] = ctx->grid[level].dh_inv[0][1];
44 dh_inv[2] = ctx->grid[level].dh_inv[0][2];
45 dh_inv[3] = ctx->grid[level].dh_inv[1][0];
46 dh_inv[4] = ctx->grid[level].dh_inv[1][1];
47 dh_inv[5] = ctx->grid[level].dh_inv[1][2];
48 dh_inv[6] = ctx->grid[level].dh_inv[2][0];
49 dh_inv[7] = ctx->grid[level].dh_inv[2][1];
50 dh_inv[8] = ctx->grid[level].dh_inv[2][2];
51}
52
53int return_num_devs(void *const ptr) {
54 grid_context *const ctx = (grid_context *)ptr;
55 assert(ctx->checksum == ctx_checksum);
56
57 return ctx->number_of_devices;
58}
59
60int return_device_id(void *const ptr, const int device) {
61 grid_context *const ctx = (grid_context *)ptr;
62 assert(ctx->checksum == ctx_checksum);
63
64 return ctx->device_id[device];
65}
66
67int is_grid_orthorhombic(void *const ptr) {
68 grid_context *const ctx = (grid_context *)ptr;
69 assert(ctx->checksum == ctx_checksum);
70 return ctx->orthorhombic;
71}
72
73void update_queue_length(void *const ptr, const int queue_length) {
74 grid_context *const ctx = (grid_context *)ptr;
75 assert(ctx->checksum == ctx_checksum);
76 ctx->queue_length = queue_length;
77}
78
79void update_atoms_position(const int natoms,
80 const double atoms_positions[natoms][3],
81 grid_context *data) {
82 assert(data != NULL);
83
84 if (natoms == 0)
85 return;
86
87 if (data->atom_positions == NULL) {
88 data->atom_positions = malloc(3 * natoms * sizeof(double));
89 } else {
90 if (natoms > data->natoms) {
91 data->atom_positions =
92 realloc(data->atom_positions, 3 * natoms * sizeof(double));
93 }
94 }
95 assert(data->atom_positions != NULL);
96
97 data->natoms = natoms;
98
99 if (data->atom_positions) {
100 for (int i = 0; i < natoms; i++) {
101 data->atom_positions[3 * i] = atoms_positions[i][0];
102 data->atom_positions[3 * i + 1] = atoms_positions[i][1];
103 data->atom_positions[3 * i + 2] = atoms_positions[i][2];
104 }
105 }
106}
107
108void update_atoms_kinds(const int natoms, const int *atoms_kinds,
109 grid_context *data) {
110 assert(data != NULL);
111
112 // data->atom_kinds is a table that give the type of a given atom.
113 if (natoms == 0)
114 return;
115
116 if (data->atom_kinds == NULL) {
117 data->atom_kinds = malloc(natoms * sizeof(int));
118 } else {
119 if ((natoms > data->natoms) && (data->natoms > 0)) {
120 data->atom_kinds = realloc(data->atom_kinds, natoms * sizeof(int));
121 }
122 }
123 assert(data->atom_kinds != NULL);
124 // data->natoms is initialized before calling this function
125 if (data->natoms)
126 memcpy(data->atom_kinds, atoms_kinds, sizeof(int) * natoms);
127
128 for (int i = 0; i < natoms; i++) {
129 data->atom_kinds[i] -= 1;
130 }
131}
132
133void update_block_offsets(const int nblocks, const int *const block_offsets,
134 grid_context *data) {
135 assert(data != NULL);
136
137 if (nblocks == 0)
138 return;
139
140 if (data->block_offsets == NULL) {
141 data->block_offsets = malloc(nblocks * sizeof(int));
142 } else {
143 if ((nblocks > data->nblocks_total) && (data->nblocks_total > 0)) {
144 data->block_offsets = realloc(data->block_offsets, sizeof(int) * nblocks);
145 }
146 }
147 assert(data->block_offsets != NULL);
148
149 data->nblocks = nblocks;
150 data->nblocks_total = imax(data->nblocks_total, nblocks);
151 if (nblocks)
152 memcpy(data->block_offsets, block_offsets, nblocks * sizeof(int));
153}
154
155void update_basis_set(const int nkinds, const grid_basis_set **const basis_sets,
156 grid_context *data) {
157 if (nkinds > data->nkinds_total) {
158 if (data->basis_sets == NULL) {
159 data->basis_sets = malloc(nkinds * sizeof(grid_basis_set *));
160 } else {
161 data->basis_sets =
162 realloc(data->basis_sets, nkinds * sizeof(grid_basis_set *));
163 }
164 }
165 assert(data->basis_sets != NULL);
166 data->nkinds = nkinds;
167 data->nkinds_total = imax(data->nkinds_total, nkinds);
168 memcpy(data->basis_sets, basis_sets, nkinds * sizeof(grid_basis_set *));
169}
170
171void update_task_lists(const int nlevels, const int ntasks,
172 const int *const level_list, const int *const iatom_list,
173 const int *const jatom_list, const int *const iset_list,
174 const int *const jset_list, const int *const ipgf_list,
175 const int *const jpgf_list,
176 const int *const border_mask_list,
177 const int *block_num_list,
178 const double *const radius_list,
179 const double rab_list[ntasks][3], grid_context *ctx) {
180
181 assert(ctx->checksum == ctx_checksum);
182
183 if (nlevels == 0)
184 return;
185
186 if (ctx->ntasks == 0) {
187 // Count tasks per level.
188 size_t size = nlevels * sizeof(int);
189 ctx->tasks_per_level = malloc(size);
190 ctx->tasks = malloc(nlevels * sizeof(_task *));
191 /* memset(ctx->tasks, 0, nlevels * sizeof(_task *)); */
192 if (ntasks)
193 ctx->tasks[0] = malloc(ntasks * sizeof(_task));
194 else
195 ctx->tasks[0] = NULL;
196 } else {
197 if (ctx->nlevels_total < nlevels) {
198 /* save the address of the full task list. NULL when completly empty */
199 ctx->tasks = realloc(ctx->tasks, nlevels * sizeof(_task *));
200 assert(ctx->tasks != NULL);
201 }
202 if (ctx->ntasks_total < ntasks) {
203 ctx->tasks[0] = realloc(ctx->tasks[0], ntasks * sizeof(_task));
204 assert(ctx->tasks[0] != NULL);
205 }
206 }
207
208 memset(ctx->tasks_per_level, 0, nlevels * sizeof(int));
209 ctx->nlevels = nlevels;
210 ctx->nlevels_total = imax(ctx->nlevels_total, nlevels);
211 ctx->ntasks_total = imax(ctx->ntasks_total, ntasks);
212 ctx->ntasks = ntasks;
213
214 for (int i = 0; i < ntasks; i++) {
215 ctx->tasks_per_level[level_list[i] - 1]++;
216 assert(i == 0 || level_list[i] >= level_list[i - 1]); // expect ordered list
217 }
218
219 for (int i = 1; i < ctx->nlevels; i++) {
220 ctx->tasks[i] = ctx->tasks[i - 1] + ctx->tasks_per_level[i - 1];
221 }
222
223 int prev_block_num = -1;
224 int prev_iset = -1;
225 int prev_jset = -1;
226 int prev_level = -1;
227 _task *task = ctx->tasks[0];
228 for (int i = 0; i < ntasks; i++) {
229 if (prev_level != (level_list[i] - 1)) {
230 prev_level = level_list[i] - 1;
231 prev_block_num = -1;
232 prev_iset = -1;
233 prev_jset = -1;
234 }
235 task->level = level_list[i] - 1;
236 task->iatom = iatom_list[i] - 1;
237 task->jatom = jatom_list[i] - 1;
238 task->iset = iset_list[i] - 1;
239 task->jset = jset_list[i] - 1;
240 task->ipgf = ipgf_list[i] - 1;
241 task->jpgf = jpgf_list[i] - 1;
242 task->border_mask = border_mask_list[i];
243 task->block_num = block_num_list[i] - 1;
244 task->radius = radius_list[i];
245 task->rab[0] = rab_list[i][0];
246 task->rab[1] = rab_list[i][1];
247 task->rab[2] = rab_list[i][2];
248 const int iatom = task->iatom;
249 const int jatom = task->jatom;
250 const int iset = task->iset;
251 const int jset = task->jset;
252 const int ipgf = task->ipgf;
253 const int jpgf = task->jpgf;
254 const int ikind = ctx->atom_kinds[iatom];
255 const int jkind = ctx->atom_kinds[jatom];
256 const grid_basis_set *ibasis = ctx->basis_sets[ikind];
257 const grid_basis_set *jbasis = ctx->basis_sets[jkind];
258 const int ncoseta = ncoset(ibasis->lmax[iset]);
259 const int ncosetb = ncoset(jbasis->lmax[jset]);
260
261 task->zeta[0] = ibasis->zet[iset * ibasis->maxpgf + ipgf];
262 task->zeta[1] = jbasis->zet[jset * jbasis->maxpgf + jpgf];
263
264 const double *ra = &ctx->atom_positions[3 * iatom];
265 const double zetp = task->zeta[0] + task->zeta[1];
266 const double f = task->zeta[1] / zetp;
267 const double rab2 = task->rab[0] * task->rab[0] +
268 task->rab[1] * task->rab[1] +
269 task->rab[2] * task->rab[2];
270
271 task->prefactor = exp(-task->zeta[0] * f * rab2);
272 task->zetp = zetp;
273
274 const int block_num = task->block_num;
275
276 for (int i = 0; i < 3; i++) {
277 task->ra[i] = ra[i];
278 task->rp[i] = ra[i] + f * task->rab[i];
279 task->rb[i] = ra[i] + task->rab[i];
280 }
281
282 task->lmax[0] = ibasis->lmax[iset];
283 task->lmax[1] = jbasis->lmax[jset];
284 task->lmin[0] = ibasis->lmin[iset];
285 task->lmin[1] = jbasis->lmin[jset];
286
287 if ((block_num != prev_block_num) || (iset != prev_iset) ||
288 (jset != prev_jset)) {
289 task->update_block_ = true;
290 prev_block_num = block_num;
291 prev_iset = iset;
292 prev_jset = jset;
293 } else {
294 task->update_block_ = false;
295 }
296
297 task->offset[0] = ipgf * ncoseta;
298 task->offset[1] = jpgf * ncosetb;
299 task++;
300 }
301
302 // Find largest Cartesian subblock size.
303 ctx->maxco = 0;
304 for (int i = 0; i < ctx->nkinds; i++) {
305 ctx->maxco = imax(ctx->maxco, ctx->basis_sets[i]->maxco);
306 }
307}
308
309void update_layouts(const int nlevels, const int npts_global[nlevels][3],
310 const int npts_local[nlevels][3],
311 const int shift_local[nlevels][3],
312 const int border_width[nlevels][3],
313 const double dh[nlevels][3][3],
314 const double dh_inv[nlevels][3][3], grid_context *ctx) {
315
316 assert(ctx != NULL);
317 assert(ctx->checksum == ctx_checksum);
318
319 if (ctx->layouts != NULL) {
320 free(ctx->layouts);
321 }
322
323 ctx->layouts = malloc(sizeof(_layout) * nlevels);
324
325 for (int level = 0; level < nlevels; level++) {
326 for (int i = 0; i < 3; i++) {
327 ctx->layouts[level].npts_global[i] = npts_global[level][i];
328 ctx->layouts[level].npts_local[i] = npts_local[level][i];
329 ctx->layouts[level].shift_local[i] = shift_local[level][i];
330 ctx->layouts[level].border_width[i] = border_width[level][i];
331 for (int j = 0; j < 3; j++) {
332 ctx->layouts[level].dh[i][j] = dh[level][i][j];
333 ctx->layouts[level].dh_inv[i][j] = dh_inv[level][i][j];
334 }
335 }
336 }
337}
338
339void update_grid(const int nlevels, grid_context *ctx) {
340 assert(ctx != NULL);
341 assert(ctx->checksum == ctx_checksum);
342
343 if (nlevels == 0)
344 return;
345
346 if (ctx->grid == NULL) {
347 ctx->grid = malloc(sizeof(tensor) * nlevels);
348 } else {
349 if (ctx->nlevels_total < nlevels) {
350 ctx->grid = realloc(ctx->grid, sizeof(tensor) * nlevels);
351 }
352 }
353 assert(ctx->grid != NULL);
354
355 ctx->nlevels_total = imax(ctx->nlevels_total, nlevels);
356 ctx->nlevels = nlevels;
357}
358
360 const bool orthorhombic, const int ntasks, const int nlevels,
361 const int natoms, const int nkinds, const int nblocks,
362 const int *block_offsets, const double atom_positions[natoms][3],
363 const int *const atom_kinds, const grid_basis_set **const basis_sets,
364 const int *const level_list, const int *const iatom_list,
365 const int *jatom_list, const int *const iset_list,
366 const int *const jset_list, const int *const ipgf_list,
367 const int *const jpgf_list, const int *const border_mask_list,
368 const int *block_num_list, const double *const radius_list,
369 const double rab_list[ntasks][3], const int npts_global[nlevels][3],
370 const int npts_local[nlevels][3], const int shift_local[nlevels][3],
371 const int border_width[nlevels][3], const double dh[nlevels][3][3],
372 const double dh_inv[nlevels][3][3]) {
373
374 grid_context *ctx = malloc(sizeof(grid_context));
375
376 memset(ctx, 0, sizeof(grid_context));
377
378 ctx->checksum = ctx_checksum;
379 ctx->orthorhombic = orthorhombic;
380 update_block_offsets(nblocks, block_offsets, ctx);
381 update_atoms_position(natoms, atom_positions, ctx);
382 update_atoms_kinds(natoms, atom_kinds, ctx);
383 update_basis_set(nkinds, basis_sets, ctx);
384 update_task_lists(nlevels, ntasks, level_list, iatom_list, jatom_list,
385 iset_list, jset_list, ipgf_list, jpgf_list,
386 border_mask_list, block_num_list, radius_list, rab_list,
387 ctx);
388 update_layouts(nlevels, npts_global, npts_local, shift_local, border_width,
389 dh, dh_inv, ctx);
390 update_grid(nlevels, ctx);
391
392 const int max_threads = omp_get_max_threads();
393
394 ctx->handler =
395 malloc(sizeof(struct collocation_integration_ *) * max_threads);
396
397 for (int i = 0; i < max_threads; i++) {
399 }
400
402
403 return ctx;
404}
405
407 const bool orthorhombic, const int ntasks, const int nlevels,
408 const int natoms, const int nkinds, const int nblocks,
409 const int *block_offsets, const double atom_positions[natoms][3],
410 const int *const atom_kinds, const grid_basis_set **const basis_sets,
411 const int *const level_list, const int *const iatom_list,
412 const int *jatom_list, const int *const iset_list,
413 const int *const jset_list, const int *const ipgf_list,
414 const int *const jpgf_list, const int *const border_mask_list,
415 const int *block_num_list, const double *const radius_list,
416 const double rab_list[ntasks][3], const int npts_global[nlevels][3],
417 const int npts_local[nlevels][3], const int shift_local[nlevels][3],
418 const int border_width[nlevels][3], const double dh[nlevels][3][3],
419 const double dh_inv[nlevels][3][3], void *ptr) {
420
421 assert(ptr != NULL);
422 grid_context *ctx = (grid_context *)ptr;
423 assert(ctx->checksum == ctx_checksum);
424
425 ctx->orthorhombic = orthorhombic;
426 update_block_offsets(nblocks, block_offsets, ctx);
427 update_atoms_position(natoms, atom_positions, ctx);
428 update_atoms_kinds(natoms, atom_kinds, ctx);
429 update_basis_set(nkinds, basis_sets, ctx);
430 update_task_lists(nlevels, ntasks, level_list, iatom_list, jatom_list,
431 iset_list, jset_list, ipgf_list, jpgf_list,
432 border_mask_list, block_num_list, radius_list, rab_list,
433 ctx);
434 update_layouts(nlevels, npts_global, npts_local, shift_local, border_width,
435 dh, dh_inv, ctx);
436 update_grid(nlevels, ctx);
437
438 // Find largest Cartesian subblock size.
439 ctx->maxco = 0;
440 for (int i = 0; i < nkinds; i++) {
441 ctx->maxco = imax(ctx->maxco, ctx->basis_sets[i]->maxco);
442 }
443}
444
445void initialize_grid_context_on_gpu(void *ptr, const int number_of_devices,
446 const int *device_id) {
447 assert(ptr != NULL);
448 grid_context *ctx = (grid_context *)ptr;
449 assert(ctx->checksum == ctx_checksum);
450 ctx->work_on_gpu = false;
451 if (number_of_devices <= 0) {
452 return;
453 }
454
455 ctx->number_of_devices = number_of_devices;
456 ctx->queue_length = 8192;
457 if (ctx->device_id == NULL) {
458 ctx->device_id = malloc(sizeof(int) * number_of_devices);
459 } else {
460 ctx->device_id = realloc(ctx->device_id, sizeof(int) * number_of_devices);
461 }
462 assert(ctx->device_id != NULL);
463
464 memcpy(ctx->device_id, device_id, sizeof(int) * number_of_devices);
465}
466
468 assert(ptr);
469 grid_context *ctx = (grid_context *)ptr;
470 assert(ctx->checksum == ctx_checksum);
471 free(ctx->block_offsets);
472 free(ctx->atom_positions);
473 free(ctx->atom_kinds);
474 free(ctx->basis_sets);
475 free(ctx->tasks[0]);
476 free(ctx->tasks);
477 free(ctx->tasks_per_level);
478 free(ctx->layouts);
479 free(ctx->grid);
480 if (ctx->device_id)
481 free(ctx->device_id);
482
483 if (ctx->handler) {
484 for (int i = 0; i < ctx->number_of_handler; i++) {
486 }
487 free(ctx->handler);
488 }
489
490 free(ctx);
491}
492
493void apply_cutoff(void *ptr) {
494 assert(ptr);
495 grid_context *ctx = (grid_context *)ptr;
496 assert(ctx->checksum == ctx_checksum);
497 ctx->apply_cutoff = true;
498}
499
501 tensor *grid, const bool orthorhombic,
502 const int grid_full_size[3], /* size of the full grid */
503 const int grid_local_size[3], /* size of the local grid block */
504 const int shift_local[3], /* coordinates of the lower coordinates of the
505 local grid window */
506 const int border_width[3], /* width of the borders */
507 const double
508 dh[3][3], /* displacement vectors of the grid (cartesian) -> (ijk) */
509 const double dh_inv[3][3], /* (ijk) -> (x,y,z) */
510 offload_buffer *grid_) {
511 memset(grid, 0, sizeof(tensor));
512 initialize_tensor_3(grid, grid_local_size[2], grid_local_size[1],
513 grid_local_size[0]);
514
515 grid->data = grid_->host_buffer;
516 grid->ld_ = grid_local_size[0];
517
518 setup_global_grid_size(grid, &grid_full_size[0]);
519
520 /* the grid is divided over several ranks or not periodic */
521 if ((grid_local_size[0] != grid_full_size[0]) ||
522 (grid_local_size[1] != grid_full_size[1]) ||
523 (grid_local_size[2] != grid_full_size[2])) {
524 setup_grid_window(grid, shift_local, border_width, 0);
525 } else {
526 grid->window_shift[0] = 0;
527 grid->window_shift[1] = 0;
528 grid->window_shift[2] = 0;
529
530 grid->window_size[0] = grid->size[0];
531 grid->window_size[1] = grid->size[1];
532 grid->window_size[2] = grid->size[2];
533 }
534
535 grid->dh[0][0] = dh[0][0];
536 grid->dh[0][1] = dh[0][1];
537 grid->dh[0][2] = dh[0][2];
538 grid->dh[1][0] = dh[1][0];
539 grid->dh[1][1] = dh[1][1];
540 grid->dh[1][2] = dh[1][2];
541 grid->dh[2][0] = dh[2][0];
542 grid->dh[2][1] = dh[2][1];
543 grid->dh[2][2] = dh[2][2];
544
545 grid->dh_inv[0][0] = dh_inv[0][0];
546 grid->dh_inv[0][1] = dh_inv[0][1];
547 grid->dh_inv[0][2] = dh_inv[0][2];
548 grid->dh_inv[1][0] = dh_inv[1][0];
549 grid->dh_inv[1][1] = dh_inv[1][1];
550 grid->dh_inv[1][2] = dh_inv[1][2];
551 grid->dh_inv[2][0] = dh_inv[2][0];
552 grid->dh_inv[2][1] = dh_inv[2][1];
553 grid->dh_inv[2][2] = dh_inv[2][2];
554
555 verify_orthogonality(dh, grid->orthogonal);
556
557 if (orthorhombic) {
558 grid->orthogonal[0] = true;
559 grid->orthogonal[1] = true;
560 grid->orthogonal[2] = true;
561 }
562}
563
564/*******************************************************************************
565 * \brief Allocates a task list for the dgemm backend.
566 * See grid_task_list.h for details.
567 ******************************************************************************/
569 const bool orthorhombic, const int ntasks, const int nlevels,
570 const int natoms, const int nkinds, const int nblocks,
571 const int block_offsets[nblocks], const double atom_positions[natoms][3],
572 const int atom_kinds[natoms], const grid_basis_set *basis_sets[nkinds],
573 const int level_list[ntasks], const int iatom_list[ntasks],
574 const int jatom_list[ntasks], const int iset_list[ntasks],
575 const int jset_list[ntasks], const int ipgf_list[ntasks],
576 const int jpgf_list[ntasks], const int border_mask_list[ntasks],
577 const int block_num_list[ntasks], const double radius_list[ntasks],
578 const double rab_list[ntasks][3], const int npts_global[nlevels][3],
579 const int npts_local[nlevels][3], const int shift_local[nlevels][3],
580 const int border_width[nlevels][3], const double dh[nlevels][3][3],
581 const double dh_inv[nlevels][3][3], grid_dgemm_task_list **task_list) {
582
583 if (*task_list == NULL) {
584 *task_list = create_grid_context_dgemm(
585 orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
586 atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
587 jatom_list, iset_list, jset_list, ipgf_list, jpgf_list,
588 border_mask_list, block_num_list, radius_list, rab_list, npts_global,
589 npts_local, shift_local, border_width, dh, dh_inv);
590 } else {
592 orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
593 atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
594 jatom_list, iset_list, jset_list, ipgf_list, jpgf_list,
595 border_mask_list, block_num_list, radius_list, rab_list, npts_global,
596 npts_local, shift_local, border_width, dh, dh_inv, *task_list);
597 }
598
600 if (config.apply_cutoff) {
601 apply_cutoff(*task_list);
602 }
603}
604
605/*******************************************************************************
606 * \brief Deallocates given task list, basis_sets have to be freed separately.
607 ******************************************************************************/
static int imax(int x, int y)
Returns the larger of two given integers (missing from the C standard)
static int max_threads
Definition dbm_library.c:23
static void const int const int const int const int const int const double const int const int const int int GRID_CONST_WHEN_COLLOCATE double GRID_CONST_WHEN_INTEGRATE double * grid
static void const int const int i
static void const int const int const int const int const int const double const int const int const int npts_local[3]
void collocate_destroy_handle(void *gaussian_handle)
struct collocation_integration_ * collocate_create_handle(void)
void update_layouts(const int nlevels, const int npts_global[nlevels][3], const int npts_local[nlevels][3], const int shift_local[nlevels][3], const int border_width[nlevels][3], const double dh[nlevels][3][3], const double dh_inv[nlevels][3][3], grid_context *ctx)
void update_grid(const int nlevels, grid_context *ctx)
void update_atoms_kinds(const int natoms, const int *atoms_kinds, grid_context *data)
void update_grid_context_dgemm(const bool orthorhombic, const int ntasks, const int nlevels, const int natoms, const int nkinds, const int nblocks, const int *block_offsets, const double atom_positions[natoms][3], const int *const atom_kinds, const grid_basis_set **const basis_sets, const int *const level_list, const int *const iatom_list, const int *jatom_list, const int *const iset_list, const int *const jset_list, const int *const ipgf_list, const int *const jpgf_list, const int *const border_mask_list, const int *block_num_list, const double *const radius_list, const double rab_list[ntasks][3], const int npts_global[nlevels][3], const int npts_local[nlevels][3], const int shift_local[nlevels][3], const int border_width[nlevels][3], const double dh[nlevels][3][3], const double dh_inv[nlevels][3][3], void *ptr)
void * create_grid_context_dgemm(const bool orthorhombic, const int ntasks, const int nlevels, const int natoms, const int nkinds, const int nblocks, const int *block_offsets, const double atom_positions[natoms][3], const int *const atom_kinds, const grid_basis_set **const basis_sets, const int *const level_list, const int *const iatom_list, const int *jatom_list, const int *const iset_list, const int *const jset_list, const int *const ipgf_list, const int *const jpgf_list, const int *const border_mask_list, const int *block_num_list, const double *const radius_list, const double rab_list[ntasks][3], const int npts_global[nlevels][3], const int npts_local[nlevels][3], const int shift_local[nlevels][3], const int border_width[nlevels][3], const double dh[nlevels][3][3], const double dh_inv[nlevels][3][3])
int is_grid_orthorhombic(void *const ptr)
void update_block_offsets(const int nblocks, const int *const block_offsets, grid_context *data)
int return_device_id(void *const ptr, const int device)
void destroy_grid_context_dgemm(void *ptr)
void return_dh(void *const ptr, const int level, double *const dh)
void update_task_lists(const int nlevels, const int ntasks, const int *const level_list, const int *const iatom_list, const int *const jatom_list, const int *const iset_list, const int *const jset_list, const int *const ipgf_list, const int *const jpgf_list, const int *const border_mask_list, const int *block_num_list, const double *const radius_list, const double rab_list[ntasks][3], grid_context *ctx)
void apply_cutoff(void *ptr)
int return_num_devs(void *const ptr)
void return_dh_inv(void *const ptr, const int level, double *const dh_inv)
void update_basis_set(const int nkinds, const grid_basis_set **const basis_sets, grid_context *data)
void grid_dgemm_create_task_list(const bool orthorhombic, const int ntasks, const int nlevels, const int natoms, const int nkinds, const int nblocks, const int block_offsets[nblocks], const double atom_positions[natoms][3], const int atom_kinds[natoms], const grid_basis_set *basis_sets[nkinds], const int level_list[ntasks], const int iatom_list[ntasks], const int jatom_list[ntasks], const int iset_list[ntasks], const int jset_list[ntasks], const int ipgf_list[ntasks], const int jpgf_list[ntasks], const int border_mask_list[ntasks], const int block_num_list[ntasks], const double radius_list[ntasks], const double rab_list[ntasks][3], const int npts_global[nlevels][3], const int npts_local[nlevels][3], const int shift_local[nlevels][3], const int border_width[nlevels][3], const double dh[nlevels][3][3], const double dh_inv[nlevels][3][3], grid_dgemm_task_list **task_list)
Allocates a task list for the dgemm backend. See grid_task_list.h for details.
void update_atoms_position(const int natoms, const double atoms_positions[natoms][3], grid_context *data)
void grid_dgemm_free_task_list(grid_dgemm_task_list *task_list)
Deallocates given task list, basis_sets have to be freed separately.
void update_queue_length(void *const ptr, const int queue_length)
void set_grid_parameters(tensor *grid, const bool orthorhombic, const int grid_full_size[3], const int grid_local_size[3], const int shift_local[3], const int border_width[3], const double dh[3][3], const double dh_inv[3][3], offload_buffer *grid_)
void initialize_grid_context_on_gpu(void *ptr, const int number_of_devices, const int *device_id)
static void setup_grid_window(tensor *const grid, const int *const shift_local, const int *const border_width, const int border_mask)
static void setup_global_grid_size(tensor *const grid, const int *const full_size)
static void initialize_tensor_3(struct tensor_ *a, int n1, int n2, int n3)
void verify_orthogonality(const double dh[3][3], bool orthogonal[3])
static grid_library_config config
grid_library_config grid_library_get_config(void)
Returns the library config.
double dh_inv[3][3]
Internal representation of a basis set.
grid_basis_set ** basis_sets
struct collocation_integration_ ** handler
Configuration of the grid library.
Internal representation of a buffer.
double * host_buffer
double dh[3][3]
double dh_inv[3][3]