21#include "../common/grid_basis_set.h"
22#include "../common/grid_common.h"
23#include "../common/grid_constants.h"
33void collocate_l0(
double *scratch,
const double alpha,
const bool orthogonal,
35 const struct tensor_ *p_alpha_beta_reduced_,
40 const int iatom,
const int jatom,
41 const int iset,
const int jset,
42 double *
const block,
tensor *work,
49 const int nsgf_seta = ibasis->
nsgf_set[iset];
50 const int nsgf_setb = jbasis->
nsgf_set[jset];
51 const int nsgfa = ibasis->
nsgf;
52 const int nsgfb = jbasis->
nsgf;
53 const int sgfa = ibasis->
first_sgf[iset] - 1;
54 const int sgfb = jbasis->
first_sgf[jset] - 1;
55 const int maxcoa = ibasis->
maxco;
56 const int maxcob = jbasis->
maxco;
57 const int ncoseta = ncoset(ibasis->
lmax[iset]);
58 const int ncosetb = ncoset(jbasis->
lmax[jset]);
59 const int ncoa = ibasis->
npgf[iset] * ncoseta;
60 const int ncob = jbasis->
npgf[jset] * ncosetb;
77 m1.a = block + sgfb * nsgfa + sgfa;
79 m1.b = &ibasis->
sphi[sgfa * maxcoa];
91 m1.a = block + sgfa * nsgfb + sgfb;
93 m1.b = &ibasis->
sphi[sgfa * maxcoa];
98 m1.use_libxsmm =
true;
105 m2.k = work->
size[0];
108 m2.a = &jbasis->
sphi[sgfb * maxcob];
114 m2.use_libxsmm =
true;
119 double *scratch,
const double alpha,
const bool *
const orthogonal,
121 const struct tensor_ *p_alpha_beta_reduced_,
struct tensor_ *cube);
129 const double roffset,
const int pol_offset,
130 const int xmin,
const int xmax,
const int lp,
131 const int cmax,
const double zetp,
double *pol_) {
133 const double t_exp_1 = exp(-zetp * dr * dr);
134 const double t_exp_2 = t_exp_1 * t_exp_1;
136 double t_exp_min_1 = exp(-zetp * (dr - roffset) * (dr - roffset));
137 double t_exp_min_2 = exp(2.0 * zetp * (dr - roffset) * dr);
139 double t_exp_plus_1 = exp(-zetp * roffset * roffset);
140 double t_exp_plus_2 = exp(2.0 * zetp * roffset * dr);
147 for (
int ig = 0; ig >= xmin; ig--) {
148 const double rpg = ig * dr - roffset;
149 t_exp_min_1 *= t_exp_min_2 * t_exp_1;
150 t_exp_min_2 *= t_exp_2;
151 double pg = t_exp_min_1;
152 for (
int icoef = 0; icoef <= lp; icoef++) {
153 idx2(
pol, pol_offset + ig - xmin, icoef) = pg;
158 double t_exp_plus_1 = exp(-zetp * roffset * roffset);
159 double t_exp_plus_2 = exp(2 * zetp * roffset * dr);
160 for (
int ig = 1; ig <= xmax; ig++) {
161 const double rpg = ig * dr - roffset;
162 t_exp_plus_1 *= t_exp_plus_2 * t_exp_1;
163 t_exp_plus_2 *= t_exp_2;
164 double pg = t_exp_plus_1;
165 for (
int icoef = 0; icoef <= lp; icoef++) {
166 idx2(
pol, pol_offset + ig - xmin, icoef) = pg;
186 for (
int ig = 0; ig >= xmin; ig--) {
187 const double rpg = ig * dr - roffset;
188 t_exp_min_1 *= t_exp_min_2 * t_exp_1;
189 t_exp_min_2 *= t_exp_2;
190 double pg = t_exp_min_1;
191 idx2(
pol, 0, pol_offset + ig - xmin) = pg;
193 idx2(
pol, 1, pol_offset + ig - xmin) = rpg;
196 for (
int ig = 1; ig <= xmax; ig++) {
197 const double rpg = ig * dr - roffset;
198 t_exp_plus_1 *= t_exp_plus_2 * t_exp_1;
199 t_exp_plus_2 *= t_exp_2;
200 double pg = t_exp_plus_1;
201 idx2(
pol, 0, pol_offset + ig - xmin) = pg;
203 idx2(
pol, 1, pol_offset + ig - xmin) = rpg;
208 double *__restrict__ poly = &
idx2(
pol, 1, 0);
209 double *__restrict__ src1 = &
idx2(
pol, 0, 0);
210 double *__restrict__ dst = &
idx2(
pol, 2, 0);
213 for (
int ig = 0; ig < (xmax - xmin + 1 + pol_offset); ig++)
214 dst[ig] = src1[ig] * poly[ig] * poly[ig];
217 for (
int icoef = 3; icoef <= lp; icoef++) {
218 double *__restrict__ poly = &
idx2(
pol, 1, 0);
219 double *__restrict__ src1 = &
idx2(
pol, icoef - 1, 0);
220 double *__restrict__ dst = &
idx2(
pol, icoef, 0);
222 for (
int ig = 0; ig < (xmax - xmin + 1 + pol_offset); ig++) {
223 dst[ig] = src1[ig] * poly[ig];
231 double *__restrict__ dst = &
idx2(
pol, 1, 0);
232 double *__restrict__ src = &
idx2(
pol, 0, 0);
234 for (
int ig = 0; ig < (xmax - xmin + 1 + pol_offset); ig++) {
242 const double disr_radius,
const int cmax,
243 const int *
const lb_cube,
244 const int *
const cube_center) {
251 for (
int i = 0;
i < 3;
i++) {
252 for (
int ig = 0; ig < handler->
cube.
size[
i]; ig++) {
266 for (
int kg = 0; kg < handler->
cube.
size[0]; kg++) {
267 const int k =
map[0][kg];
269 (2 * (kg + lb_cube[0]) - 1) / 2;
270 const double kr = kd * handler->
dh[2][2];
271 const double kremain = disr_radius * disr_radius - kr * kr;
275 const int jgmin = ceil(-1e-8 - sqrt(kremain) * handler->
dh_inv[1][1]);
276 for (
int jg = jgmin; jg <= (1 - jgmin); jg++) {
277 const int j =
map[1][jg - lb_cube[1]];
278 const double jr = ((2 * jg - 1) >> 1) *
280 const double jremain = kremain - jr * jr;
283 const int xmin = ceil(-1e-8 - sqrt(jremain) * handler->
dh_inv[0][0]);
284 const int xmax = 1 - xmin;
287 for (
int x = xmin - lb_cube[2];
288 x <
imin((xmax - lb_cube[2]), handler->
cube.
size[2]); x++) {
289 const int x1 =
map[2][x];
294 int lower_corner[3] = {k, j, x1};
295 int upper_corner[3] = {k + 1, j + 1, x1 + 1};
298 handler->
grid.
size[2], xmax - lb_cube[2], x1, &x,
299 lower_corner + 2, upper_corner + 2, xwindow);
301 if (upper_corner[2] - lower_corner[2]) {
302 const int position1[3] = {kg, jg - lb_cube[1], x};
304 double *restrict dst = &
idx3(handler->
grid, lower_corner[0],
305 lower_corner[1], lower_corner[2]);
306 double *restrict src = &
idx3(handler->
cube, position1[0],
307 position1[1], position1[2]);
309 const int sizex = upper_corner[2] - lower_corner[2];
311 for (
int x = 0; x < sizex; x++) {
319 x += upper_corner[2] - lower_corner[2] - 1;
329 const int cmax,
const int *
const lb_cube,
const int *
const ub_cube,
330 const double *
const roffset,
const int *
const cube_center) {
332 const double a = handler->
dh[0][0] * handler->
dh[0][0] +
333 handler->
dh[0][1] * handler->
dh[0][1] +
334 handler->
dh[0][2] * handler->
dh[0][2];
335 const double a_inv = 1.0 / a;
342 for (
int i = 0;
i < 3;
i++) {
343 for (
int ig = 0; ig < handler->
cube.
size[
i]; ig++) {
357 for (
int k = 0; k < handler->
cube.
size[0]; k++) {
358 const int iz =
map[0][k];
363 const double tz = (k + lb_cube[0] - roffset[0]);
364 const double z[3] = {tz * handler->
dh[2][0], tz * handler->
dh[2][1],
365 tz * handler->
dh[2][2]};
367 for (
int j = 0; j < handler->
cube.
size[1]; j++) {
368 const int iy =
map[1][j];
373 const double ty = (j - roffset[1] + lb_cube[1]);
374 const double y[3] = {z[0] + ty * handler->
dh[1][0],
375 z[1] + ty * handler->
dh[1][1],
376 z[2] + ty * handler->
dh[1][2]};
383 -2.0 * (handler->
dh[0][0] * (roffset[2] * handler->
dh[0][0] - y[0]) +
384 handler->
dh[0][1] * (roffset[2] * handler->
dh[0][1] - y[1]) +
385 handler->
dh[0][2] * (roffset[2] * handler->
dh[0][2] - y[2]));
387 const double c = (roffset[2] * handler->
dh[0][0] - y[0]) *
388 (roffset[2] * handler->
dh[0][0] - y[0]) +
389 (roffset[2] * handler->
dh[0][1] - y[1]) *
390 (roffset[2] * handler->
dh[0][1] - y[1]) +
391 (roffset[2] * handler->
dh[0][2] - y[2]) *
392 (roffset[2] * handler->
dh[0][2] - y[2]) -
393 disr_radius * disr_radius;
395 double delta = b * b - 4.0 * a * c;
402 const int xmin =
imax(ceil((-b - delta) * 0.5 * a_inv), lb_cube[2]);
403 const int xmax =
imin(floor((-b + delta) * 0.5 * a_inv), ub_cube[2]);
405 int lower_corner[3] = {iz, iy, xmin};
406 int upper_corner[3] = {iz + 1, iy + 1, xmin};
408 for (
int x = xmin - lb_cube[2];
409 x <
imin((xmax - lb_cube[2]), handler->
cube.
size[2]); x++) {
410 const int x1 =
map[2][x];
416 handler->
grid.
size[2], xmax - lb_cube[2], x1, &x,
417 lower_corner + 2, upper_corner + 2, xwindow);
419 if (upper_corner[2] - lower_corner[2]) {
420 const int position1[3] = {k, j, x};
425 double *__restrict__ dst = &
idx3(handler->
grid, lower_corner[0],
426 lower_corner[1], lower_corner[2]);
427 double *__restrict__ src =
428 &
idx3(handler->
cube, position1[0], position1[1], position1[2]);
430 const int sizex = upper_corner[2] - lower_corner[2];
432 for (
int x = 0; x < sizex; x++) {
439 x += upper_corner[2] - lower_corner[2] - 1;
446void collocate_l0(
double *scratch,
const double alpha,
const bool orthogonal_xy,
448 const struct tensor_ *p_alpha_beta_reduced_,
450 const double *__restrict pz =
451 &
idx3(p_alpha_beta_reduced_[0], 0, 0, 0);
452 const double *__restrict py =
453 &
idx3(p_alpha_beta_reduced_[0], 1, 0, 0);
454 const double *__restrict px =
455 &
idx3(p_alpha_beta_reduced_[0], 2, 0, 0);
457 memset(&
idx3(cube[0], 0, 0, 0), 0,
sizeof(
double) * cube->
alloc_size_);
458 memset(scratch, 0,
sizeof(
double) * cube->
size[1] * cube->
ld_);
463 if (exp_xy && !orthogonal_xy) {
464 for (
int y = 0; y < cube->
size[1]; y++) {
465 const double *__restrict src = &
idx2(exp_xy[0], y, 0);
466 double *__restrict dst = &scratch[y * cube->
ld_];
469 for (
int x = 0; x < cube->
size[2]; x++) {
476 1, scratch, 1, &
idx3(cube[0], 0, 0, 0), cube->
size[1] * cube->
ld_);
486 double *scratch,
const double alpha,
const bool *
const orthogonal,
488 const struct tensor_ *p_alpha_beta_reduced_,
struct tensor_ *cube) {
489 if (co->size[0] > 1) {
498 double *__restrict
const pz =
499 &
idx3(p_alpha_beta_reduced_[0], 0, 0, 0);
500 double *__restrict
const py =
501 &
idx3(p_alpha_beta_reduced_[0], 1, 0, 0);
502 double *__restrict
const px =
503 &
idx3(p_alpha_beta_reduced_[0], 2, 0, 0);
527 m1.m = co->size[0] * co->size[1];
528 m1.n = cube->
size[1];
533 m1.ldb = p_alpha_beta_reduced_->
ld_;
536 m1.use_libxsmm =
true;
553 m2.m = cube->
size[1] * co->size[1];
554 m2.n = cube->
size[2];
557 m2.lda = T.
ld_ * co->size[1];
559 m2.ldb = p_alpha_beta_reduced_->
ld_;
562 m2.use_libxsmm =
true;
575 m3.
m = cube->
size[0];
579 m3.
lda = p_alpha_beta_reduced_->
ld_;
580 m3.
b = &
idx3(W, 0, 0, 0);
582 m3.
c = &
idx3(cube[0], 0, 0, 0);
589 if (Exp && !orthogonal[2]) {
592 exp_xy.
data = &
idx3(Exp[0], 2, 0, 0);
598 if (Exp && !orthogonal[2]) {
602 exp_xy.
data = &
idx3(Exp[0], 2, 0, 0);
603 collocate_l0(scratch, co->data[0] * alpha, orthogonal[2], &exp_xy,
604 p_alpha_beta_reduced_, cube);
607 p_alpha_beta_reduced_, cube);
614 if (Exp && (!orthogonal[0] && !orthogonal[1])) {
619 exp_xz.
data = &
idx3(Exp[0], 0, 0, 0);
620 exp_yz.
data = &
idx3(Exp[0], 1, 0, 0);
625 if (Exp && (!orthogonal[0] || !orthogonal[1])) {
626 if (!orthogonal[0]) {
629 exp_xz.
data = &
idx3(Exp[0], 0, 0, 0);
633 if (!orthogonal[1]) {
636 exp_yz.
data = &
idx3(Exp[0], 1, 0, 0);
649 const int cmax,
const int *
const lower_boundaries_cube,
650 const int *
const cube_center) {
657 for (
int i = 0;
i < 3;
i++) {
658 for (
int ig = 0; ig < handler->
cube.
size[
i]; ig++) {
659 map[
i][ig] =
modulo(cube_center[
i] + ig + lower_boundaries_cube[
i] -
690 for (
int z = 0; (z < handler->
cube.
size[0]); z++) {
691 const int z1 =
map[0][z];
697 handler->
cube.
size[0], z1, &z, lower_corner, upper_corner,
701 if (upper_corner[0] - lower_corner[0]) {
702 for (
int y = 0; y < handler->
cube.
size[1]; y++) {
703 const int y1 =
map[1][y];
711 lower_corner + 1, upper_corner + 1, ywindow);
713 if (upper_corner[1] - lower_corner[1]) {
714 for (
int x = 0; x < handler->
cube.
size[2]; x++) {
715 const int x1 =
map[2][x];
722 &x, lower_corner + 2, upper_corner + 2, xwindow);
724 if (upper_corner[2] - lower_corner[2]) {
725 const int position1[3] = {z, y, x};
739 x += upper_corner[2] - lower_corner[2] - 1;
745 y += upper_corner[1] - lower_corner[1] - 1;
751 z += upper_corner[0] - lower_corner[0] - 1;
758 const bool use_ortho,
const double zetp,
const double rp[3],
759 const double radius) {
771 int lb_cube[3], ub_cube[3];
772 int pol_offset[3] = {0, 0, 0};
785 use_ortho, radius, (
const double(*)[3])handler->
dh,
786 (
const double(*)[3])handler->
dh_inv, rp, &disr_radius, roffset,
787 cubecenter, lb_cube, ub_cube, cube_size);
801 lb_cube[2], ub_cube[2], handler->
coef.
size[2] - 1,
cmax,
802 zetp, &
idx3(handler->
pol, 2, 0, 0));
804 lb_cube[1], ub_cube[1], handler->
coef.
size[1] - 1,
cmax,
805 zetp, &
idx3(handler->
pol, 1, 0, 0));
807 lb_cube[0], ub_cube[0], handler->
coef.
size[0] - 1,
cmax,
808 zetp, &
idx3(handler->
pol, 0, 0, 0));
812 zetp * handler->
dx[0],
816 zetp * handler->
dx[1],
820 zetp * handler->
dx[2],
824 zetp, roffset, (
const double(*)[3])handler->
dh, lb_cube, ub_cube,
850 roffset, cubecenter);
859 const bool use_ortho,
const int border_mask,
const enum grid_func func,
860 const int la_max,
const int la_min,
const int lb_max,
const int lb_min,
861 const double zeta,
const double zetb,
const double rscale,
862 const double dh[3][3],
const double dh_inv[3][3],
const double ra[3],
863 const double rab[3],
const int grid_global_size[3],
864 const int grid_local_size[3],
const int shift_local[3],
865 const int border_width[3],
const double radius,
const int o1,
const int o2,
866 const int n1,
const int n2,
const double pab_[n2][n1],
867 double *
const grid_) {
873 memcpy(pab.
data, pab_,
sizeof(
double) * n1 * n2);
876 int offset[2] = {o1, o2};
878 int lmax[2] = {la_max, lb_max};
879 int lmin[2] = {la_min, lb_min};
881 const double zetp = zeta + zetb;
882 const double f = zetb / zetp;
883 const double rab2 = rab[0] * rab[0] + rab[1] * rab[1] + rab[2] * rab[2];
884 const double prefactor = rscale * exp(-zeta * f * rab2);
885 const double zeta_pair[2] = {zeta, zetb};
892 handler->
grid.
ld_ = grid_local_size[0];
899 handler->
grid.
ld_ = grid_local_size[0];
904 for (
int i = 0;
i < 3;
i++) {
905 rp[
i] = ra[
i] + f * rab[
i];
906 rb[
i] = ra[
i] + rab[
i];
909 int lmin_diff[2], lmax_diff[2];
915 lmin_prep[0] =
imax(lmin[0] + lmin_diff[0], 0);
916 lmin_prep[1] =
imax(lmin[1] + lmin_diff[1], 0);
918 lmax_prep[0] = lmax[0] + lmax_diff[0];
919 lmax_prep[1] = lmax[1] + lmax_diff[1];
921 const int n1_prep = ncoset(lmax_prep[0]);
922 const int n2_prep = ncoset(lmax_prep[1]);
952 lmax_prep[0] + lmax_prep[1] + 1);
955 const int lp = lmax_prep[0] + lmax_prep[1];
974 &(handler->
alpha), &pab_prep,
986 const int iatom = task->
iatom;
987 const int jatom = task->
jatom;
988 const int iset = task->
iset;
989 const int jset = task->
jset;
1004 double *
const block = &pab_blocks->
host_buffer[block_offset];
1012 const _task *
const previous_task,
1013 const _task *
const task,
1031 const int n1_prep = ncoset(lmax_prep[0]);
1032 const int n2_prep = ncoset(lmax_prep[1]);
1041 &task->
zeta[0], pab, pab_prep);
1063 lmax_prep[0] + lmax_prep[1] + 1);
1066 const int lp = lmax_prep[0] + lmax_prep[1];
1079 lmin_prep, lmax_prep, lp,
1081 &handler->
alpha, pab_prep, &handler->
coef);
1085 const int *
const border_width,
1086 const int *
const shift_local,
1087 const enum grid_func func,
const int level,
1092#pragma omp parallel default(shared)
1094 const int num_threads = omp_get_num_threads();
1095 const int thread_id = omp_get_thread_num();
1097 tensor work, pab, pab_prep;
1118 (
const double(*)[3])
grid->dh_inv);
1125 for (
int d = 0; d < 3; d++)
1128 if ((thread_id == 0) || (num_threads == 1)) {
1133 if ((num_threads > 1) && (thread_id > 0)) {
1136 memset(handler->
grid.
data, 0,
sizeof(
double) *
grid->alloc_size_);
1142 const _task *prev_task = NULL;
1143#pragma omp for schedule(static)
1146 const _task *task = &ctx->
tasks[level][itask];
1148 if (task->
level != level) {
1149 printf(
"level %d, %d\n", task->
level, level);
1199 if (num_threads > 1) {
1200 if ((
grid->alloc_size_ / (num_threads - 1)) >= 2) {
1201 const int block_size =
grid->alloc_size_ / (num_threads - 1) +
1202 (
grid->alloc_size_ % (num_threads - 1));
1204 for (
int bk = 0; bk < num_threads; bk++) {
1205 if (thread_id > 0) {
1206 int bk_id = (bk + thread_id - 1) % num_threads;
1207 int begin = bk_id * block_size;
1208 int end =
imin((bk_id + 1) * block_size,
grid->alloc_size_);
1210 grid->data + begin, 1);
1224 free(pab_prep.
data);
1245 assert(ctx->
nlevels == nlevels);
1248 for (
int level = 0; level < ctx->
nlevels; level++) {
1253 layout->
dh_inv, grids[level]);
1263 for (
int x = 1; x < nlevels; x++) {
1267 max_size = ((max_size / 4096) + (max_size % 4096 != 0)) * 4096;
1274 for (
int level = 0; level < ctx->
nlevels; level++) {
static int imax(int x, int y)
Returns the larger of two given integers (missing from the C standard)
static int imin(int x, int y)
Returns the smaller of the two integers (missing from the C standard).
#define GRID_PRAGMA_SIMD(OBJS, N)
static GRID_HOST_DEVICE int modulo(int a, int m)
Equivalent of Fortran's MODULO, which always return a positive number. https://gcc....
static void const int const int const int const int const int const double const int const int const int int GRID_CONST_WHEN_COLLOCATE double GRID_CONST_WHEN_INTEGRATE double * grid
static void const int const int i
static void const int cmax
static void const int const int const double pol[3][lp+1][2 *cmax+1]
static void const int const int const int const int const int const double const int map[3][2 *cmax+1]
void grid_transform_coef_xzy_to_ikj(const double dh[3][3], const tensor *coef_xyz)
void grid_compute_coefficients_dgemm(const int *lmin, const int *lmax, const int lp, const double prefactor, const tensor *const alpha, const tensor *const pab, tensor *coef_xyz)
void grid_prepare_alpha_dgemm(const double ra[3], const double rb[3], const double rp[3], const int *lmax, tensor *alpha)
void grid_fill_pol_dgemm(const bool transpose, const double dr, const double roffset, const int pol_offset, const int xmin, const int xmax, const int lp, const int cmax, const double zetp, double *pol_)
void grid_dgemm_collocate_pgf_product(const bool use_ortho, const int border_mask, const enum grid_func func, const int la_max, const int la_min, const int lb_max, const int lb_min, const double zeta, const double zetb, const double rscale, const double dh[3][3], const double dh_inv[3][3], const double ra[3], const double rab[3], const int grid_global_size[3], const int grid_local_size[3], const int shift_local[3], const int border_width[3], const double radius, const int o1, const int o2, const int n1, const int n2, const double pab_[n2][n1], double *const grid_)
void grid_dgemm_collocate_task_list(grid_dgemm_task_list *const ptr, const enum grid_func func, const int nlevels, const offload_buffer *pab_blocks, offload_buffer *grids[nlevels])
Collocate all tasks of a given list onto given grids. See grid_task_list.h for details.
void apply_mapping_cubic(struct collocation_integration_ *handler, const int cmax, const int *const lower_boundaries_cube, const int *const cube_center)
void extract_blocks(grid_context *const ctx, const _task *const task, const offload_buffer *pab_blocks, tensor *const work, tensor *const pab)
void rotate_to_cartesian_harmonics(const grid_basis_set *ibasis, const grid_basis_set *jbasis, const int iatom, const int jatom, const int iset, const int jset, double *const block, tensor *work, tensor *pab)
void apply_spherical_cutoff_generic(struct collocation_integration_ *const handler, const double disr_radius, const int cmax, const int *const lb_cube, const int *const ub_cube, const double *const roffset, const int *const cube_center)
void tensor_reduction_for_collocate_integrate(double *scratch, const double alpha, const bool *const orthogonal, const struct tensor_ *Exp, const struct tensor_ *co, const struct tensor_ *p_alpha_beta_reduced_, struct tensor_ *cube)
void collocate_l0(double *scratch, const double alpha, const bool orthogonal, const struct tensor_ *exp_xy, const struct tensor_ *p_alpha_beta_reduced_, struct tensor_ *cube)
void compute_coefficients(grid_context *const ctx, struct collocation_integration_ *handler, const _task *const previous_task, const _task *const task, const offload_buffer *pab_blocks, tensor *const pab, tensor *const work, tensor *const pab_prep)
void collocate_one_grid_level_dgemm(grid_context *const ctx, const int *const border_width, const int *const shift_local, const enum grid_func func, const int level, const offload_buffer *pab_blocks)
void apply_sphere_cutoff_ortho(struct collocation_integration_ *const handler, const double disr_radius, const int cmax, const int *const lb_cube, const int *const cube_center)
void grid_collocate(collocation_integration *const handler, const bool use_ortho, const double zetp, const double rp[3], const double radius)
void initialize_basis_vectors(collocation_integration *const handler, const double dh[3][3], const double dh_inv[3][3])
void collocate_destroy_handle(void *gaussian_handle)
void initialize_W_and_T(collocation_integration *const handler, const tensor *cube, const tensor *coef)
struct collocation_integration_ * collocate_create_handle(void)
void set_grid_parameters(tensor *grid, const bool orthorhombic, const int grid_full_size[3], const int grid_local_size[3], const int shift_local[3], const int border_width[3], const double dh[3][3], const double dh_inv[3][3], offload_buffer *grid_)
void calculate_non_orthorombic_corrections_tensor(const double mu_mean, const double *r_ab, const double basis[3][3], const int *const xmin, const int *const xmax, bool plane[3], tensor *const Exp)
void apply_non_orthorombic_corrections_xz_yz_blocked(const struct tensor_ *const Exp_xz, const struct tensor_ *const Exp_yz, struct tensor_ *const m)
void apply_non_orthorombic_corrections_yz_blocked(const struct tensor_ *const Exp, struct tensor_ *const m)
void apply_non_orthorombic_corrections_xz_blocked(const struct tensor_ *const Exp, struct tensor_ *const m)
void apply_non_orthorombic_corrections_xy_blocked(const struct tensor_ *const Exp, struct tensor_ *const m)
void grid_prepare_pab_dgemm(const enum grid_func func, const int *const offset, const int *const lmin, const int *const lmax, const double *const zeta, tensor *const pab, tensor *const pab_prep)
void grid_prepare_get_ldiffs_dgemm(const enum grid_func func, int *const lmin_diff, int *const lmax_diff)
void tensor_copy(tensor *const b, const tensor *const a)
size_t realloc_tensor(tensor *t)
void alloc_tensor(tensor *t)
static void setup_grid_window(tensor *const grid, const int *const shift_local, const int *const border_width, const int border_mask)
static void initialize_tensor_4(struct tensor_ *a, int n1, int n2, int n3, int n4)
static void setup_global_grid_size(tensor *const grid, const int *const full_size)
static void initialize_tensor_3(struct tensor_ *a, int n1, int n2, int n3)
static void initialize_tensor_2(struct tensor_ *a, int n1, int n2)
int compute_cube_properties(const bool ortho, const double radius, const double dh[3][3], const double dh_inv[3][3], const double *rp, double *disr_radius, double *roffset, int *cubecenter, int *lb_cube, int *ub_cube, int *cube_size)
void compute_interval(const int *const map, const int full_size, const int size, const int cube_size, const int x1, int *x, int *const lower_corner, int *const upper_corner, const Interval window)
void cblas_dger(const CBLAS_LAYOUT Layout, const int M, const int N, const double alpha, const double *X, const int incX, const double *Y, const int incY, double *A, const int lda)
void cblas_daxpy(const int N, const double alpha, const double *X, const int incX, double *Y, const int incY)
void verify_orthogonality(const double dh[3][3], bool orthogonal[3])
void dgemm_simplified(dgemm_params *const m)
void add_sub_grid(const int *lower_corner, const int *upper_corner, const int *position, const tensor *subgrid, tensor *grid)
static void * grid_allocate_scratch(size_t size)
static void grid_free_scratch(void *ptr)
Internal representation of a basis set.
grid_basis_set ** basis_sets
struct collocation_integration_ ** handler
Internal representation of a buffer.