14#include "../common/grid_library.h"
23void return_dh(
void *
const ptr,
const int level,
double *
const dh) {
27 dh[0] = ctx->
grid[level].
dh[0][0];
28 dh[1] = ctx->
grid[level].
dh[0][1];
29 dh[2] = ctx->
grid[level].
dh[0][2];
30 dh[3] = ctx->
grid[level].
dh[1][0];
31 dh[4] = ctx->
grid[level].
dh[1][1];
32 dh[5] = ctx->
grid[level].
dh[1][2];
33 dh[6] = ctx->
grid[level].
dh[2][0];
34 dh[7] = ctx->
grid[level].
dh[2][1];
35 dh[8] = ctx->
grid[level].
dh[2][2];
38void return_dh_inv(
void *
const ptr,
const int level,
double *
const dh_inv) {
80 const double atoms_positions[natoms][3],
90 if (natoms > data->
natoms) {
99 for (
int i = 0;
i < natoms;
i++) {
109 assert(data != NULL);
116 data->
atom_kinds = malloc(natoms *
sizeof(
int));
124 memcpy(data->
atom_kinds, atoms_kinds,
sizeof(
int) * natoms);
126 for (
int i = 0;
i < natoms;
i++) {
133 assert(data != NULL);
149 memcpy(data->
block_offsets, block_offsets, nblocks *
sizeof(
int));
168 const int *
const level_list,
const int *
const iatom_list,
169 const int *
const jatom_list,
const int *
const iset_list,
170 const int *
const jset_list,
const int *
const ipgf_list,
171 const int *
const jpgf_list,
172 const int *
const border_mask_list,
173 const int *block_num_list,
174 const double *
const radius_list,
184 size_t size = nlevels *
sizeof(int);
186 ctx->
tasks = malloc(nlevels *
sizeof(
_task *));
189 ctx->
tasks[0] = malloc(ntasks *
sizeof(
_task));
191 ctx->
tasks[0] = NULL;
208 for (
int i = 0;
i < ntasks;
i++) {
210 assert(
i == 0 || level_list[
i] >= level_list[
i - 1]);
217 int prev_block_num = -1;
222 for (
int i = 0;
i < ntasks;
i++) {
223 if (prev_level != (level_list[
i] - 1)) {
224 prev_level = level_list[
i] - 1;
229 task->
level = level_list[
i] - 1;
230 task->
iatom = iatom_list[
i] - 1;
231 task->
jatom = jatom_list[
i] - 1;
232 task->
iset = iset_list[
i] - 1;
233 task->
jset = jset_list[
i] - 1;
234 task->
ipgf = ipgf_list[
i] - 1;
235 task->
jpgf = jpgf_list[
i] - 1;
239 task->
rab[0] = rab_list[
i][0];
240 task->
rab[1] = rab_list[
i][1];
241 task->
rab[2] = rab_list[
i][2];
242 const int iatom = task->
iatom;
243 const int jatom = task->
jatom;
244 const int iset = task->
iset;
245 const int jset = task->
jset;
246 const int ipgf = task->
ipgf;
247 const int jpgf = task->
jpgf;
252 const int ncoseta = ncoset(ibasis->
lmax[iset]);
253 const int ncosetb = ncoset(jbasis->
lmax[jset]);
259 const double zetp = task->
zeta[0] + task->
zeta[1];
260 const double f = task->
zeta[1] / zetp;
261 const double rab2 = task->
rab[0] * task->
rab[0] +
262 task->
rab[1] * task->
rab[1] +
263 task->
rab[2] * task->
rab[2];
270 for (
int i = 0;
i < 3;
i++) {
272 task->
rp[
i] = ra[
i] + f * task->
rab[
i];
281 if ((block_num != prev_block_num) || (iset != prev_iset) ||
282 (jset != prev_jset)) {
284 prev_block_num = block_num;
291 task->
offset[0] = ipgf * ncoseta;
292 task->
offset[1] = jpgf * ncosetb;
305 const int shift_local[nlevels][3],
306 const int border_width[nlevels][3],
307 const double dh[nlevels][3][3],
308 const double dh_inv[nlevels][3][3],
grid_context *ctx) {
319 for (
int level = 0; level < nlevels; level++) {
320 for (
int i = 0;
i < 3;
i++) {
325 for (
int j = 0; j < 3; j++) {
340 if (ctx->
grid == NULL) {
353 const bool orthorhombic,
const int ntasks,
const int nlevels,
354 const int natoms,
const int nkinds,
const int nblocks,
355 const int *block_offsets,
const double atom_positions[natoms][3],
356 const int *
const atom_kinds,
const grid_basis_set **
const basis_sets,
357 const int *
const level_list,
const int *
const iatom_list,
358 const int *jatom_list,
const int *
const iset_list,
359 const int *
const jset_list,
const int *
const ipgf_list,
360 const int *
const jpgf_list,
const int *
const border_mask_list,
361 const int *block_num_list,
const double *
const radius_list,
362 const double rab_list[ntasks][3],
const int npts_global[nlevels][3],
363 const int npts_local[nlevels][3],
const int shift_local[nlevels][3],
364 const int border_width[nlevels][3],
const double dh[nlevels][3][3],
365 const double dh_inv[nlevels][3][3]) {
378 iset_list, jset_list, ipgf_list, jpgf_list,
379 border_mask_list, block_num_list, radius_list, rab_list,
400 const bool orthorhombic,
const int ntasks,
const int nlevels,
401 const int natoms,
const int nkinds,
const int nblocks,
402 const int *block_offsets,
const double atom_positions[natoms][3],
403 const int *
const atom_kinds,
const grid_basis_set **
const basis_sets,
404 const int *
const level_list,
const int *
const iatom_list,
405 const int *jatom_list,
const int *
const iset_list,
406 const int *
const jset_list,
const int *
const ipgf_list,
407 const int *
const jpgf_list,
const int *
const border_mask_list,
408 const int *block_num_list,
const double *
const radius_list,
409 const double rab_list[ntasks][3],
const int npts_global[nlevels][3],
410 const int npts_local[nlevels][3],
const int shift_local[nlevels][3],
411 const int border_width[nlevels][3],
const double dh[nlevels][3][3],
412 const double dh_inv[nlevels][3][3],
void *ptr) {
424 iset_list, jset_list, ipgf_list, jpgf_list,
425 border_mask_list, block_num_list, radius_list, rab_list,
433 for (
int i = 0;
i < nkinds;
i++) {
439 const int *device_id) {
444 if (number_of_devices <= 0) {
451 ctx->
device_id = malloc(
sizeof(
int) * number_of_devices);
455 memcpy(ctx->
device_id, device_id,
sizeof(
int) * number_of_devices);
493 const int grid_full_size[3],
494 const int grid_local_size[3],
495 const int shift_local[3],
497 const int border_width[3],
500 const double dh_inv[3][3],
507 grid->ld_ = grid_local_size[0];
512 if ((grid_local_size[0] != grid_full_size[0]) ||
513 (grid_local_size[1] != grid_full_size[1]) ||
514 (grid_local_size[2] != grid_full_size[2])) {
517 grid->window_shift[0] = 0;
518 grid->window_shift[1] = 0;
519 grid->window_shift[2] = 0;
521 grid->window_size[0] =
grid->size[0];
522 grid->window_size[1] =
grid->size[1];
523 grid->window_size[2] =
grid->size[2];
526 grid->dh[0][0] = dh[0][0];
527 grid->dh[0][1] = dh[0][1];
528 grid->dh[0][2] = dh[0][2];
529 grid->dh[1][0] = dh[1][0];
530 grid->dh[1][1] = dh[1][1];
531 grid->dh[1][2] = dh[1][2];
532 grid->dh[2][0] = dh[2][0];
533 grid->dh[2][1] = dh[2][1];
534 grid->dh[2][2] = dh[2][2];
536 grid->dh_inv[0][0] = dh_inv[0][0];
537 grid->dh_inv[0][1] = dh_inv[0][1];
538 grid->dh_inv[0][2] = dh_inv[0][2];
539 grid->dh_inv[1][0] = dh_inv[1][0];
540 grid->dh_inv[1][1] = dh_inv[1][1];
541 grid->dh_inv[1][2] = dh_inv[1][2];
542 grid->dh_inv[2][0] = dh_inv[2][0];
543 grid->dh_inv[2][1] = dh_inv[2][1];
544 grid->dh_inv[2][2] = dh_inv[2][2];
549 grid->orthogonal[0] =
true;
550 grid->orthogonal[1] =
true;
551 grid->orthogonal[2] =
true;
560 const bool orthorhombic,
const int ntasks,
const int nlevels,
561 const int natoms,
const int nkinds,
const int nblocks,
562 const int block_offsets[nblocks],
const double atom_positions[natoms][3],
563 const int atom_kinds[natoms],
const grid_basis_set *basis_sets[nkinds],
564 const int level_list[ntasks],
const int iatom_list[ntasks],
565 const int jatom_list[ntasks],
const int iset_list[ntasks],
566 const int jset_list[ntasks],
const int ipgf_list[ntasks],
567 const int jpgf_list[ntasks],
const int border_mask_list[ntasks],
568 const int block_num_list[ntasks],
const double radius_list[ntasks],
569 const double rab_list[ntasks][3],
const int npts_global[nlevels][3],
570 const int npts_local[nlevels][3],
const int shift_local[nlevels][3],
571 const int border_width[nlevels][3],
const double dh[nlevels][3][3],
574 if (*task_list == NULL) {
576 orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
577 atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
578 jatom_list, iset_list, jset_list, ipgf_list, jpgf_list,
579 border_mask_list, block_num_list, radius_list, rab_list, npts_global,
580 npts_local, shift_local, border_width, dh, dh_inv);
583 orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
584 atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
585 jatom_list, iset_list, jset_list, ipgf_list, jpgf_list,
586 border_mask_list, block_num_list, radius_list, rab_list, npts_global,
587 npts_local, shift_local, border_width, dh, dh_inv, *task_list);
static int imax(int x, int y)
Returns the larger of two given integer (missing from the C standard)
static void const int const int const int const int const int const double const int const int const int int GRID_CONST_WHEN_COLLOCATE double GRID_CONST_WHEN_INTEGRATE double * grid
static void const int const int i
static void const int const int const int const int const int const double const int const int const int npts_local[3]
void collocate_destroy_handle(void *gaussian_handle)
struct collocation_integration_ * collocate_create_handle(void)
void update_layouts(const int nlevels, const int npts_global[nlevels][3], const int npts_local[nlevels][3], const int shift_local[nlevels][3], const int border_width[nlevels][3], const double dh[nlevels][3][3], const double dh_inv[nlevels][3][3], grid_context *ctx)
void update_grid(const int nlevels, grid_context *ctx)
void update_atoms_kinds(const int natoms, const int *atoms_kinds, grid_context *data)
void update_grid_context_dgemm(const bool orthorhombic, const int ntasks, const int nlevels, const int natoms, const int nkinds, const int nblocks, const int *block_offsets, const double atom_positions[natoms][3], const int *const atom_kinds, const grid_basis_set **const basis_sets, const int *const level_list, const int *const iatom_list, const int *jatom_list, const int *const iset_list, const int *const jset_list, const int *const ipgf_list, const int *const jpgf_list, const int *const border_mask_list, const int *block_num_list, const double *const radius_list, const double rab_list[ntasks][3], const int npts_global[nlevels][3], const int npts_local[nlevels][3], const int shift_local[nlevels][3], const int border_width[nlevels][3], const double dh[nlevels][3][3], const double dh_inv[nlevels][3][3], void *ptr)
void * create_grid_context_dgemm(const bool orthorhombic, const int ntasks, const int nlevels, const int natoms, const int nkinds, const int nblocks, const int *block_offsets, const double atom_positions[natoms][3], const int *const atom_kinds, const grid_basis_set **const basis_sets, const int *const level_list, const int *const iatom_list, const int *jatom_list, const int *const iset_list, const int *const jset_list, const int *const ipgf_list, const int *const jpgf_list, const int *const border_mask_list, const int *block_num_list, const double *const radius_list, const double rab_list[ntasks][3], const int npts_global[nlevels][3], const int npts_local[nlevels][3], const int shift_local[nlevels][3], const int border_width[nlevels][3], const double dh[nlevels][3][3], const double dh_inv[nlevels][3][3])
int is_grid_orthorhombic(void *const ptr)
void update_block_offsets(const int nblocks, const int *const block_offsets, grid_context *data)
int return_device_id(void *const ptr, const int device)
void destroy_grid_context_dgemm(void *ptr)
void return_dh(void *const ptr, const int level, double *const dh)
void update_task_lists(const int nlevels, const int ntasks, const int *const level_list, const int *const iatom_list, const int *const jatom_list, const int *const iset_list, const int *const jset_list, const int *const ipgf_list, const int *const jpgf_list, const int *const border_mask_list, const int *block_num_list, const double *const radius_list, const double rab_list[ntasks][3], grid_context *ctx)
void apply_cutoff(void *ptr)
int return_num_devs(void *const ptr)
void return_dh_inv(void *const ptr, const int level, double *const dh_inv)
void update_basis_set(const int nkinds, const grid_basis_set **const basis_sets, grid_context *data)
void grid_dgemm_create_task_list(const bool orthorhombic, const int ntasks, const int nlevels, const int natoms, const int nkinds, const int nblocks, const int block_offsets[nblocks], const double atom_positions[natoms][3], const int atom_kinds[natoms], const grid_basis_set *basis_sets[nkinds], const int level_list[ntasks], const int iatom_list[ntasks], const int jatom_list[ntasks], const int iset_list[ntasks], const int jset_list[ntasks], const int ipgf_list[ntasks], const int jpgf_list[ntasks], const int border_mask_list[ntasks], const int block_num_list[ntasks], const double radius_list[ntasks], const double rab_list[ntasks][3], const int npts_global[nlevels][3], const int npts_local[nlevels][3], const int shift_local[nlevels][3], const int border_width[nlevels][3], const double dh[nlevels][3][3], const double dh_inv[nlevels][3][3], grid_dgemm_task_list **task_list)
Allocates a task list for the dgemm backend. See grid_task_list.h for details.
void update_atoms_position(const int natoms, const double atoms_positions[natoms][3], grid_context *data)
void grid_dgemm_free_task_list(grid_dgemm_task_list *task_list)
Deallocates given task list, basis_sets have to be freed separately.
void update_queue_length(void *const ptr, const int queue_length)
void set_grid_parameters(tensor *grid, const bool orthorhombic, const int grid_full_size[3], const int grid_local_size[3], const int shift_local[3], const int border_width[3], const double dh[3][3], const double dh_inv[3][3], offload_buffer *grid_)
void initialize_grid_context_on_gpu(void *ptr, const int number_of_devices, const int *device_id)
static void setup_grid_window(tensor *const grid, const int *const shift_local, const int *const border_width, const int border_mask)
static void setup_global_grid_size(tensor *const grid, const int *const full_size)
static void initialize_tensor_3(struct tensor_ *a, int n1, int n2, int n3)
void verify_orthogonality(const double dh[3][3], bool orthogonal[3])
static grid_library_config config
grid_library_config grid_library_get_config(void)
Returns the library config.
Internal representation of a basis set.
grid_basis_set ** basis_sets
struct collocation_integration_ ** handler
Configuration of the grid library.
Internal representation of a buffer.