14#include "../common/grid_library.h"
23void return_dh(
void *
const ptr,
const int level,
double *
const dh) {
27 dh[0] = ctx->
grid[level].
dh[0][0];
28 dh[1] = ctx->
grid[level].
dh[0][1];
29 dh[2] = ctx->
grid[level].
dh[0][2];
30 dh[3] = ctx->
grid[level].
dh[1][0];
31 dh[4] = ctx->
grid[level].
dh[1][1];
32 dh[5] = ctx->
grid[level].
dh[1][2];
33 dh[6] = ctx->
grid[level].
dh[2][0];
34 dh[7] = ctx->
grid[level].
dh[2][1];
35 dh[8] = ctx->
grid[level].
dh[2][2];
38void return_dh_inv(
void *
const ptr,
const int level,
double *
const dh_inv) {
80 const double atoms_positions[natoms][3],
90 if (natoms > data->
natoms) {
100 for (
int i = 0;
i < natoms;
i++) {
110 assert(data != NULL);
117 data->
atom_kinds = malloc(natoms *
sizeof(
int));
126 memcpy(data->
atom_kinds, atoms_kinds,
sizeof(
int) * natoms);
128 for (
int i = 0;
i < natoms;
i++) {
135 assert(data != NULL);
152 memcpy(data->
block_offsets, block_offsets, nblocks *
sizeof(
int));
172 const int *
const level_list,
const int *
const iatom_list,
173 const int *
const jatom_list,
const int *
const iset_list,
174 const int *
const jset_list,
const int *
const ipgf_list,
175 const int *
const jpgf_list,
176 const int *
const border_mask_list,
177 const int *block_num_list,
178 const double *
const radius_list,
188 size_t size = nlevels *
sizeof(int);
190 ctx->
tasks = malloc(nlevels *
sizeof(
_task *));
193 ctx->
tasks[0] = malloc(ntasks *
sizeof(
_task));
195 ctx->
tasks[0] = NULL;
200 assert(ctx->
tasks != NULL);
204 assert(ctx->
tasks[0] != NULL);
214 for (
int i = 0;
i < ntasks;
i++) {
216 assert(
i == 0 || level_list[
i] >= level_list[
i - 1]);
223 int prev_block_num = -1;
228 for (
int i = 0;
i < ntasks;
i++) {
229 if (prev_level != (level_list[
i] - 1)) {
230 prev_level = level_list[
i] - 1;
235 task->
level = level_list[
i] - 1;
236 task->
iatom = iatom_list[
i] - 1;
237 task->
jatom = jatom_list[
i] - 1;
238 task->
iset = iset_list[
i] - 1;
239 task->
jset = jset_list[
i] - 1;
240 task->
ipgf = ipgf_list[
i] - 1;
241 task->
jpgf = jpgf_list[
i] - 1;
245 task->
rab[0] = rab_list[
i][0];
246 task->
rab[1] = rab_list[
i][1];
247 task->
rab[2] = rab_list[
i][2];
248 const int iatom = task->
iatom;
249 const int jatom = task->
jatom;
250 const int iset = task->
iset;
251 const int jset = task->
jset;
252 const int ipgf = task->
ipgf;
253 const int jpgf = task->
jpgf;
258 const int ncoseta = ncoset(ibasis->
lmax[iset]);
259 const int ncosetb = ncoset(jbasis->
lmax[jset]);
265 const double zetp = task->
zeta[0] + task->
zeta[1];
266 const double f = task->
zeta[1] / zetp;
267 const double rab2 = task->
rab[0] * task->
rab[0] +
268 task->
rab[1] * task->
rab[1] +
269 task->
rab[2] * task->
rab[2];
276 for (
int i = 0;
i < 3;
i++) {
278 task->
rp[
i] = ra[
i] + f * task->
rab[
i];
287 if ((block_num != prev_block_num) || (iset != prev_iset) ||
288 (jset != prev_jset)) {
290 prev_block_num = block_num;
297 task->
offset[0] = ipgf * ncoseta;
298 task->
offset[1] = jpgf * ncosetb;
311 const int shift_local[nlevels][3],
312 const int border_width[nlevels][3],
313 const double dh[nlevels][3][3],
314 const double dh_inv[nlevels][3][3],
grid_context *ctx) {
325 for (
int level = 0; level < nlevels; level++) {
326 for (
int i = 0;
i < 3;
i++) {
331 for (
int j = 0; j < 3; j++) {
346 if (ctx->
grid == NULL) {
353 assert(ctx->
grid != NULL);
360 const bool orthorhombic,
const int ntasks,
const int nlevels,
361 const int natoms,
const int nkinds,
const int nblocks,
362 const int *block_offsets,
const double atom_positions[natoms][3],
363 const int *
const atom_kinds,
const grid_basis_set **
const basis_sets,
364 const int *
const level_list,
const int *
const iatom_list,
365 const int *jatom_list,
const int *
const iset_list,
366 const int *
const jset_list,
const int *
const ipgf_list,
367 const int *
const jpgf_list,
const int *
const border_mask_list,
368 const int *block_num_list,
const double *
const radius_list,
369 const double rab_list[ntasks][3],
const int npts_global[nlevels][3],
370 const int npts_local[nlevels][3],
const int shift_local[nlevels][3],
371 const int border_width[nlevels][3],
const double dh[nlevels][3][3],
372 const double dh_inv[nlevels][3][3]) {
385 iset_list, jset_list, ipgf_list, jpgf_list,
386 border_mask_list, block_num_list, radius_list, rab_list,
407 const bool orthorhombic,
const int ntasks,
const int nlevels,
408 const int natoms,
const int nkinds,
const int nblocks,
409 const int *block_offsets,
const double atom_positions[natoms][3],
410 const int *
const atom_kinds,
const grid_basis_set **
const basis_sets,
411 const int *
const level_list,
const int *
const iatom_list,
412 const int *jatom_list,
const int *
const iset_list,
413 const int *
const jset_list,
const int *
const ipgf_list,
414 const int *
const jpgf_list,
const int *
const border_mask_list,
415 const int *block_num_list,
const double *
const radius_list,
416 const double rab_list[ntasks][3],
const int npts_global[nlevels][3],
417 const int npts_local[nlevels][3],
const int shift_local[nlevels][3],
418 const int border_width[nlevels][3],
const double dh[nlevels][3][3],
419 const double dh_inv[nlevels][3][3],
void *ptr) {
431 iset_list, jset_list, ipgf_list, jpgf_list,
432 border_mask_list, block_num_list, radius_list, rab_list,
440 for (
int i = 0;
i < nkinds;
i++) {
446 const int *device_id) {
451 if (number_of_devices <= 0) {
458 ctx->
device_id = malloc(
sizeof(
int) * number_of_devices);
464 memcpy(ctx->
device_id, device_id,
sizeof(
int) * number_of_devices);
502 const int grid_full_size[3],
503 const int grid_local_size[3],
504 const int shift_local[3],
506 const int border_width[3],
509 const double dh_inv[3][3],
516 grid->ld_ = grid_local_size[0];
521 if ((grid_local_size[0] != grid_full_size[0]) ||
522 (grid_local_size[1] != grid_full_size[1]) ||
523 (grid_local_size[2] != grid_full_size[2])) {
526 grid->window_shift[0] = 0;
527 grid->window_shift[1] = 0;
528 grid->window_shift[2] = 0;
530 grid->window_size[0] =
grid->size[0];
531 grid->window_size[1] =
grid->size[1];
532 grid->window_size[2] =
grid->size[2];
535 grid->dh[0][0] = dh[0][0];
536 grid->dh[0][1] = dh[0][1];
537 grid->dh[0][2] = dh[0][2];
538 grid->dh[1][0] = dh[1][0];
539 grid->dh[1][1] = dh[1][1];
540 grid->dh[1][2] = dh[1][2];
541 grid->dh[2][0] = dh[2][0];
542 grid->dh[2][1] = dh[2][1];
543 grid->dh[2][2] = dh[2][2];
545 grid->dh_inv[0][0] = dh_inv[0][0];
546 grid->dh_inv[0][1] = dh_inv[0][1];
547 grid->dh_inv[0][2] = dh_inv[0][2];
548 grid->dh_inv[1][0] = dh_inv[1][0];
549 grid->dh_inv[1][1] = dh_inv[1][1];
550 grid->dh_inv[1][2] = dh_inv[1][2];
551 grid->dh_inv[2][0] = dh_inv[2][0];
552 grid->dh_inv[2][1] = dh_inv[2][1];
553 grid->dh_inv[2][2] = dh_inv[2][2];
558 grid->orthogonal[0] =
true;
559 grid->orthogonal[1] =
true;
560 grid->orthogonal[2] =
true;
569 const bool orthorhombic,
const int ntasks,
const int nlevels,
570 const int natoms,
const int nkinds,
const int nblocks,
571 const int block_offsets[nblocks],
const double atom_positions[natoms][3],
572 const int atom_kinds[natoms],
const grid_basis_set *basis_sets[nkinds],
573 const int level_list[ntasks],
const int iatom_list[ntasks],
574 const int jatom_list[ntasks],
const int iset_list[ntasks],
575 const int jset_list[ntasks],
const int ipgf_list[ntasks],
576 const int jpgf_list[ntasks],
const int border_mask_list[ntasks],
577 const int block_num_list[ntasks],
const double radius_list[ntasks],
578 const double rab_list[ntasks][3],
const int npts_global[nlevels][3],
579 const int npts_local[nlevels][3],
const int shift_local[nlevels][3],
580 const int border_width[nlevels][3],
const double dh[nlevels][3][3],
583 if (*task_list == NULL) {
585 orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
586 atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
587 jatom_list, iset_list, jset_list, ipgf_list, jpgf_list,
588 border_mask_list, block_num_list, radius_list, rab_list, npts_global,
589 npts_local, shift_local, border_width, dh, dh_inv);
592 orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
593 atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
594 jatom_list, iset_list, jset_list, ipgf_list, jpgf_list,
595 border_mask_list, block_num_list, radius_list, rab_list, npts_global,
596 npts_local, shift_local, border_width, dh, dh_inv, *task_list);
static int imax(int x, int y)
Returns the larger of two given integers (missing from the C standard)
static void const int const int const int const int const int const double const int const int const int int GRID_CONST_WHEN_COLLOCATE double GRID_CONST_WHEN_INTEGRATE double * grid
static void const int const int i
static void const int const int const int const int const int const double const int const int const int npts_local[3]
void collocate_destroy_handle(void *gaussian_handle)
struct collocation_integration_ * collocate_create_handle(void)
void update_layouts(const int nlevels, const int npts_global[nlevels][3], const int npts_local[nlevels][3], const int shift_local[nlevels][3], const int border_width[nlevels][3], const double dh[nlevels][3][3], const double dh_inv[nlevels][3][3], grid_context *ctx)
void update_grid(const int nlevels, grid_context *ctx)
void update_atoms_kinds(const int natoms, const int *atoms_kinds, grid_context *data)
void update_grid_context_dgemm(const bool orthorhombic, const int ntasks, const int nlevels, const int natoms, const int nkinds, const int nblocks, const int *block_offsets, const double atom_positions[natoms][3], const int *const atom_kinds, const grid_basis_set **const basis_sets, const int *const level_list, const int *const iatom_list, const int *jatom_list, const int *const iset_list, const int *const jset_list, const int *const ipgf_list, const int *const jpgf_list, const int *const border_mask_list, const int *block_num_list, const double *const radius_list, const double rab_list[ntasks][3], const int npts_global[nlevels][3], const int npts_local[nlevels][3], const int shift_local[nlevels][3], const int border_width[nlevels][3], const double dh[nlevels][3][3], const double dh_inv[nlevels][3][3], void *ptr)
void * create_grid_context_dgemm(const bool orthorhombic, const int ntasks, const int nlevels, const int natoms, const int nkinds, const int nblocks, const int *block_offsets, const double atom_positions[natoms][3], const int *const atom_kinds, const grid_basis_set **const basis_sets, const int *const level_list, const int *const iatom_list, const int *jatom_list, const int *const iset_list, const int *const jset_list, const int *const ipgf_list, const int *const jpgf_list, const int *const border_mask_list, const int *block_num_list, const double *const radius_list, const double rab_list[ntasks][3], const int npts_global[nlevels][3], const int npts_local[nlevels][3], const int shift_local[nlevels][3], const int border_width[nlevels][3], const double dh[nlevels][3][3], const double dh_inv[nlevels][3][3])
int is_grid_orthorhombic(void *const ptr)
void update_block_offsets(const int nblocks, const int *const block_offsets, grid_context *data)
int return_device_id(void *const ptr, const int device)
void destroy_grid_context_dgemm(void *ptr)
void return_dh(void *const ptr, const int level, double *const dh)
void update_task_lists(const int nlevels, const int ntasks, const int *const level_list, const int *const iatom_list, const int *const jatom_list, const int *const iset_list, const int *const jset_list, const int *const ipgf_list, const int *const jpgf_list, const int *const border_mask_list, const int *block_num_list, const double *const radius_list, const double rab_list[ntasks][3], grid_context *ctx)
void apply_cutoff(void *ptr)
int return_num_devs(void *const ptr)
void return_dh_inv(void *const ptr, const int level, double *const dh_inv)
void update_basis_set(const int nkinds, const grid_basis_set **const basis_sets, grid_context *data)
void grid_dgemm_create_task_list(const bool orthorhombic, const int ntasks, const int nlevels, const int natoms, const int nkinds, const int nblocks, const int block_offsets[nblocks], const double atom_positions[natoms][3], const int atom_kinds[natoms], const grid_basis_set *basis_sets[nkinds], const int level_list[ntasks], const int iatom_list[ntasks], const int jatom_list[ntasks], const int iset_list[ntasks], const int jset_list[ntasks], const int ipgf_list[ntasks], const int jpgf_list[ntasks], const int border_mask_list[ntasks], const int block_num_list[ntasks], const double radius_list[ntasks], const double rab_list[ntasks][3], const int npts_global[nlevels][3], const int npts_local[nlevels][3], const int shift_local[nlevels][3], const int border_width[nlevels][3], const double dh[nlevels][3][3], const double dh_inv[nlevels][3][3], grid_dgemm_task_list **task_list)
Allocates a task list for the dgemm backend. See grid_task_list.h for details.
void update_atoms_position(const int natoms, const double atoms_positions[natoms][3], grid_context *data)
void grid_dgemm_free_task_list(grid_dgemm_task_list *task_list)
Deallocates given task list, basis_sets have to be freed separately.
void update_queue_length(void *const ptr, const int queue_length)
void set_grid_parameters(tensor *grid, const bool orthorhombic, const int grid_full_size[3], const int grid_local_size[3], const int shift_local[3], const int border_width[3], const double dh[3][3], const double dh_inv[3][3], offload_buffer *grid_)
void initialize_grid_context_on_gpu(void *ptr, const int number_of_devices, const int *device_id)
static void setup_grid_window(tensor *const grid, const int *const shift_local, const int *const border_width, const int border_mask)
static void setup_global_grid_size(tensor *const grid, const int *const full_size)
static void initialize_tensor_3(struct tensor_ *a, int n1, int n2, int n3)
void verify_orthogonality(const double dh[3][3], bool orthogonal[3])
static grid_library_config config
grid_library_config grid_library_get_config(void)
Returns the library config.
Internal representation of a basis set.
grid_basis_set ** basis_sets
struct collocation_integration_ ** handler
Configuration of the grid library.
Internal representation of a buffer.