17#include "../common/grid_basis_set.h"
18#include "../common/grid_common.h"
19#include "../common/grid_constants.h"
29void collocate_l0(
double *scratch,
const double alpha,
const bool orthogonal,
31 const struct tensor_ *p_alpha_beta_reduced_,
36 const int iatom,
const int jatom,
37 const int iset,
const int jset,
38 double *
const block,
tensor *work,
45 const int nsgf_seta = ibasis->
nsgf_set[iset];
46 const int nsgf_setb = jbasis->
nsgf_set[jset];
47 const int nsgfa = ibasis->
nsgf;
48 const int nsgfb = jbasis->
nsgf;
49 const int sgfa = ibasis->
first_sgf[iset] - 1;
50 const int sgfb = jbasis->
first_sgf[jset] - 1;
51 const int maxcoa = ibasis->
maxco;
52 const int maxcob = jbasis->
maxco;
53 const int ncoseta = ncoset(ibasis->
lmax[iset]);
54 const int ncosetb = ncoset(jbasis->
lmax[jset]);
55 const int ncoa = ibasis->
npgf[iset] * ncoseta;
56 const int ncob = jbasis->
npgf[jset] * ncosetb;
73 m1.a = block + sgfb * nsgfa + sgfa;
75 m1.b = &ibasis->
sphi[sgfa * maxcoa];
87 m1.a = block + sgfa * nsgfb + sgfb;
89 m1.b = &ibasis->
sphi[sgfa * maxcoa];
101 m2.k = work->
size[0];
104 m2.a = &jbasis->
sphi[sgfb * maxcob];
115 double *scratch,
const double alpha,
const bool *
const orthogonal,
117 const struct tensor_ *p_alpha_beta_reduced_,
struct tensor_ *cube);
125 const double roffset,
const int pol_offset,
126 const int xmin,
const int xmax,
const int lp,
127 const int cmax,
const double zetp,
double *pol_) {
129 const double t_exp_1 = exp(-zetp * dr * dr);
130 const double t_exp_2 = t_exp_1 * t_exp_1;
132 double t_exp_min_1 = exp(-zetp * (dr - roffset) * (dr - roffset));
133 double t_exp_min_2 = exp(2.0 * zetp * (dr - roffset) * dr);
135 double t_exp_plus_1 = exp(-zetp * roffset * roffset);
136 double t_exp_plus_2 = exp(2.0 * zetp * roffset * dr);
143 for (
int ig = 0; ig >= xmin; ig--) {
144 const double rpg = ig * dr - roffset;
145 t_exp_min_1 *= t_exp_min_2 * t_exp_1;
146 t_exp_min_2 *= t_exp_2;
147 double pg = t_exp_min_1;
148 for (
int icoef = 0; icoef <= lp; icoef++) {
149 idx2(
pol, pol_offset + ig - xmin, icoef) = pg;
154 double t_exp_plus_1 = exp(-zetp * roffset * roffset);
155 double t_exp_plus_2 = exp(2 * zetp * roffset * dr);
156 for (
int ig = 1; ig <= xmax; ig++) {
157 const double rpg = ig * dr - roffset;
158 t_exp_plus_1 *= t_exp_plus_2 * t_exp_1;
159 t_exp_plus_2 *= t_exp_2;
160 double pg = t_exp_plus_1;
161 for (
int icoef = 0; icoef <= lp; icoef++) {
162 idx2(
pol, pol_offset + ig - xmin, icoef) = pg;
182 for (
int ig = 0; ig >= xmin; ig--) {
183 const double rpg = ig * dr - roffset;
184 t_exp_min_1 *= t_exp_min_2 * t_exp_1;
185 t_exp_min_2 *= t_exp_2;
186 double pg = t_exp_min_1;
187 idx2(
pol, 0, pol_offset + ig - xmin) = pg;
189 idx2(
pol, 1, pol_offset + ig - xmin) = rpg;
192 for (
int ig = 1; ig <= xmax; ig++) {
193 const double rpg = ig * dr - roffset;
194 t_exp_plus_1 *= t_exp_plus_2 * t_exp_1;
195 t_exp_plus_2 *= t_exp_2;
196 double pg = t_exp_plus_1;
197 idx2(
pol, 0, pol_offset + ig - xmin) = pg;
199 idx2(
pol, 1, pol_offset + ig - xmin) = rpg;
204 double *__restrict__ poly = &
idx2(
pol, 1, 0);
205 double *__restrict__ src1 = &
idx2(
pol, 0, 0);
206 double *__restrict__ dst = &
idx2(
pol, 2, 0);
209 for (
int ig = 0; ig < (xmax - xmin + 1 + pol_offset); ig++)
210 dst[ig] = src1[ig] * poly[ig] * poly[ig];
213 for (
int icoef = 3; icoef <= lp; icoef++) {
214 double *__restrict__ poly = &
idx2(
pol, 1, 0);
215 double *__restrict__ src1 = &
idx2(
pol, icoef - 1, 0);
216 double *__restrict__ dst = &
idx2(
pol, icoef, 0);
218 for (
int ig = 0; ig < (xmax - xmin + 1 + pol_offset); ig++) {
219 dst[ig] = src1[ig] * poly[ig];
227 double *__restrict__ dst = &
idx2(
pol, 1, 0);
228 double *__restrict__ src = &
idx2(
pol, 0, 0);
230 for (
int ig = 0; ig < (xmax - xmin + 1 + pol_offset); ig++) {
238 const double disr_radius,
const int cmax,
239 const int *
const lb_cube,
240 const int *
const cube_center) {
247 for (
int i = 0;
i < 3;
i++) {
248 for (
int ig = 0; ig < handler->
cube.
size[
i]; ig++) {
262 for (
int kg = 0; kg < handler->
cube.
size[0]; kg++) {
263 const int k =
map[0][kg];
265 (2 * (kg + lb_cube[0]) - 1) / 2;
266 const double kr = kd * handler->
dh[2][2];
267 const double kremain = disr_radius * disr_radius - kr * kr;
271 const int jgmin = ceil(-1e-8 - sqrt(kremain) * handler->
dh_inv[1][1]);
272 for (
int jg = jgmin; jg <= (1 - jgmin); jg++) {
273 const int j =
map[1][jg - lb_cube[1]];
274 const double jr = ((2 * jg - 1) >> 1) *
276 const double jremain = kremain - jr * jr;
279 const int xmin = ceil(-1e-8 - sqrt(jremain) * handler->
dh_inv[0][0]);
280 const int xmax = 1 - xmin;
283 for (
int x = xmin - lb_cube[2];
284 x <
imin((xmax - lb_cube[2]), handler->
cube.
size[2]); x++) {
285 const int x1 =
map[2][x];
290 int lower_corner[3] = {k, j, x1};
291 int upper_corner[3] = {k + 1, j + 1, x1 + 1};
294 handler->
grid.
size[2], xmax - lb_cube[2], x1, &x,
295 lower_corner + 2, upper_corner + 2, xwindow);
297 if (upper_corner[2] - lower_corner[2]) {
298 const int position1[3] = {kg, jg - lb_cube[1], x};
300 double *restrict dst = &
idx3(handler->
grid, lower_corner[0],
301 lower_corner[1], lower_corner[2]);
302 double *restrict src = &
idx3(handler->
cube, position1[0],
303 position1[1], position1[2]);
305 const int sizex = upper_corner[2] - lower_corner[2];
307 for (
int x = 0; x < sizex; x++) {
315 x += upper_corner[2] - lower_corner[2] - 1;
325 const int cmax,
const int *
const lb_cube,
const int *
const ub_cube,
326 const double *
const roffset,
const int *
const cube_center) {
328 const double a = handler->
dh[0][0] * handler->
dh[0][0] +
329 handler->
dh[0][1] * handler->
dh[0][1] +
330 handler->
dh[0][2] * handler->
dh[0][2];
331 const double a_inv = 1.0 / a;
338 for (
int i = 0;
i < 3;
i++) {
339 for (
int ig = 0; ig < handler->
cube.
size[
i]; ig++) {
353 for (
int k = 0; k < handler->
cube.
size[0]; k++) {
354 const int iz =
map[0][k];
359 const double tz = (k + lb_cube[0] - roffset[0]);
360 const double z[3] = {tz * handler->
dh[2][0], tz * handler->
dh[2][1],
361 tz * handler->
dh[2][2]};
363 for (
int j = 0; j < handler->
cube.
size[1]; j++) {
364 const int iy =
map[1][j];
369 const double ty = (j - roffset[1] + lb_cube[1]);
370 const double y[3] = {z[0] + ty * handler->
dh[1][0],
371 z[1] + ty * handler->
dh[1][1],
372 z[2] + ty * handler->
dh[1][2]};
379 -2.0 * (handler->
dh[0][0] * (roffset[2] * handler->
dh[0][0] - y[0]) +
380 handler->
dh[0][1] * (roffset[2] * handler->
dh[0][1] - y[1]) +
381 handler->
dh[0][2] * (roffset[2] * handler->
dh[0][2] - y[2]));
383 const double c = (roffset[2] * handler->
dh[0][0] - y[0]) *
384 (roffset[2] * handler->
dh[0][0] - y[0]) +
385 (roffset[2] * handler->
dh[0][1] - y[1]) *
386 (roffset[2] * handler->
dh[0][1] - y[1]) +
387 (roffset[2] * handler->
dh[0][2] - y[2]) *
388 (roffset[2] * handler->
dh[0][2] - y[2]) -
389 disr_radius * disr_radius;
391 double delta = b * b - 4.0 * a * c;
398 const int xmin =
imax(ceil((-b - delta) * 0.5 * a_inv), lb_cube[2]);
399 const int xmax =
imin(floor((-b + delta) * 0.5 * a_inv), ub_cube[2]);
401 int lower_corner[3] = {iz, iy, xmin};
402 int upper_corner[3] = {iz + 1, iy + 1, xmin};
404 for (
int x = xmin - lb_cube[2];
405 x <
imin((xmax - lb_cube[2]), handler->
cube.
size[2]); x++) {
406 const int x1 =
map[2][x];
412 handler->
grid.
size[2], xmax - lb_cube[2], x1, &x,
413 lower_corner + 2, upper_corner + 2, xwindow);
415 if (upper_corner[2] - lower_corner[2]) {
416 const int position1[3] = {k, j, x};
421 double *__restrict__ dst = &
idx3(handler->
grid, lower_corner[0],
422 lower_corner[1], lower_corner[2]);
423 double *__restrict__ src =
424 &
idx3(handler->
cube, position1[0], position1[1], position1[2]);
426 const int sizex = upper_corner[2] - lower_corner[2];
428 for (
int x = 0; x < sizex; x++) {
435 x += upper_corner[2] - lower_corner[2] - 1;
442void collocate_l0(
double *scratch,
const double alpha,
const bool orthogonal_xy,
444 const struct tensor_ *p_alpha_beta_reduced_,
446 const double *__restrict pz =
447 &
idx3(p_alpha_beta_reduced_[0], 0, 0, 0);
448 const double *__restrict py =
449 &
idx3(p_alpha_beta_reduced_[0], 1, 0, 0);
450 const double *__restrict px =
451 &
idx3(p_alpha_beta_reduced_[0], 2, 0, 0);
453 memset(&
idx3(cube[0], 0, 0, 0), 0,
sizeof(
double) * cube->
alloc_size_);
454 memset(scratch, 0,
sizeof(
double) * cube->
size[1] * cube->
ld_);
459 if (exp_xy && !orthogonal_xy) {
460 for (
int y = 0; y < cube->
size[1]; y++) {
461 const double *__restrict src = &
idx2(exp_xy[0], y, 0);
462 double *__restrict dst = &scratch[y * cube->
ld_];
465 for (
int x = 0; x < cube->
size[2]; x++) {
472 1, scratch, 1, &
idx3(cube[0], 0, 0, 0), cube->
size[1] * cube->
ld_);
482 double *scratch,
const double alpha,
const bool *
const orthogonal,
484 const struct tensor_ *p_alpha_beta_reduced_,
struct tensor_ *cube) {
485 if (co->size[0] > 1) {
494 double *__restrict
const pz =
495 &
idx3(p_alpha_beta_reduced_[0], 0, 0, 0);
496 double *__restrict
const py =
497 &
idx3(p_alpha_beta_reduced_[0], 1, 0, 0);
498 double *__restrict
const px =
499 &
idx3(p_alpha_beta_reduced_[0], 2, 0, 0);
523 m1.m = co->size[0] * co->size[1];
524 m1.n = cube->
size[1];
529 m1.ldb = p_alpha_beta_reduced_->
ld_;
549 m2.m = cube->
size[1] * co->size[1];
550 m2.n = cube->
size[2];
553 m2.lda = T.
ld_ * co->size[1];
555 m2.ldb = p_alpha_beta_reduced_->
ld_;
571 m3.
m = cube->
size[0];
575 m3.
lda = p_alpha_beta_reduced_->
ld_;
576 m3.
b = &
idx3(W, 0, 0, 0);
578 m3.
c = &
idx3(cube[0], 0, 0, 0);
585 if (Exp && !orthogonal[2]) {
588 exp_xy.
data = &
idx3(Exp[0], 2, 0, 0);
594 if (Exp && !orthogonal[2]) {
598 exp_xy.
data = &
idx3(Exp[0], 2, 0, 0);
599 collocate_l0(scratch, co->data[0] * alpha, orthogonal[2], &exp_xy,
600 p_alpha_beta_reduced_, cube);
603 p_alpha_beta_reduced_, cube);
610 if (Exp && (!orthogonal[0] && !orthogonal[1])) {
615 exp_xz.
data = &
idx3(Exp[0], 0, 0, 0);
616 exp_yz.
data = &
idx3(Exp[0], 1, 0, 0);
621 if (Exp && (!orthogonal[0] || !orthogonal[1])) {
622 if (!orthogonal[0]) {
625 exp_xz.
data = &
idx3(Exp[0], 0, 0, 0);
629 if (!orthogonal[1]) {
632 exp_yz.
data = &
idx3(Exp[0], 1, 0, 0);
645 const int cmax,
const int *
const lower_boundaries_cube,
646 const int *
const cube_center) {
653 for (
int i = 0;
i < 3;
i++) {
654 for (
int ig = 0; ig < handler->
cube.
size[
i]; ig++) {
655 map[
i][ig] =
modulo(cube_center[
i] + ig + lower_boundaries_cube[
i] -
686 for (
int z = 0; (z < handler->
cube.
size[0]); z++) {
687 const int z1 =
map[0][z];
693 handler->
cube.
size[0], z1, &z, lower_corner, upper_corner,
697 if (upper_corner[0] - lower_corner[0]) {
698 for (
int y = 0; y < handler->
cube.
size[1]; y++) {
699 const int y1 =
map[1][y];
707 lower_corner + 1, upper_corner + 1, ywindow);
709 if (upper_corner[1] - lower_corner[1]) {
710 for (
int x = 0; x < handler->
cube.
size[2]; x++) {
711 const int x1 =
map[2][x];
718 &x, lower_corner + 2, upper_corner + 2, xwindow);
720 if (upper_corner[2] - lower_corner[2]) {
721 const int position1[3] = {z, y, x};
735 x += upper_corner[2] - lower_corner[2] - 1;
741 y += upper_corner[1] - lower_corner[1] - 1;
747 z += upper_corner[0] - lower_corner[0] - 1;
754 const bool use_ortho,
const double zetp,
const double rp[3],
755 const double radius) {
767 int lb_cube[3], ub_cube[3];
768 int pol_offset[3] = {0, 0, 0};
781 use_ortho, radius, (
const double(*)[3])handler->
dh,
782 (
const double(*)[3])handler->
dh_inv, rp, &disr_radius, roffset,
783 cubecenter, lb_cube, ub_cube, cube_size);
797 lb_cube[2], ub_cube[2], handler->
coef.
size[2] - 1,
cmax,
798 zetp, &
idx3(handler->
pol, 2, 0, 0));
800 lb_cube[1], ub_cube[1], handler->
coef.
size[1] - 1,
cmax,
801 zetp, &
idx3(handler->
pol, 1, 0, 0));
803 lb_cube[0], ub_cube[0], handler->
coef.
size[0] - 1,
cmax,
804 zetp, &
idx3(handler->
pol, 0, 0, 0));
808 zetp * handler->
dx[0],
812 zetp * handler->
dx[1],
816 zetp * handler->
dx[2],
820 zetp, roffset, (
const double(*)[3])handler->
dh, lb_cube, ub_cube,
846 roffset, cubecenter);
855 const bool use_ortho,
const int border_mask,
const enum grid_func func,
856 const int la_max,
const int la_min,
const int lb_max,
const int lb_min,
857 const double zeta,
const double zetb,
const double rscale,
858 const double dh[3][3],
const double dh_inv[3][3],
const double ra[3],
859 const double rab[3],
const int grid_global_size[3],
860 const int grid_local_size[3],
const int shift_local[3],
861 const int border_width[3],
const double radius,
const int o1,
const int o2,
862 const int n1,
const int n2,
const double pab_[n2][n1],
863 double *
const grid_) {
869 memcpy(pab.
data, pab_,
sizeof(
double) * n1 * n2);
872 int offset[2] = {o1, o2};
874 int lmax[2] = {la_max, lb_max};
875 int lmin[2] = {la_min, lb_min};
877 const double zetp = zeta + zetb;
878 const double f = zetb / zetp;
879 const double rab2 = rab[0] * rab[0] + rab[1] * rab[1] + rab[2] * rab[2];
880 const double prefactor = rscale * exp(-zeta * f * rab2);
881 const double zeta_pair[2] = {zeta, zetb};
888 handler->
grid.
ld_ = grid_local_size[0];
895 handler->
grid.
ld_ = grid_local_size[0];
900 for (
int i = 0;
i < 3;
i++) {
901 rp[
i] = ra[
i] + f * rab[
i];
902 rb[
i] = ra[
i] + rab[
i];
905 int lmin_diff[2], lmax_diff[2];
911 lmin_prep[0] =
imax(lmin[0] + lmin_diff[0], 0);
912 lmin_prep[1] =
imax(lmin[1] + lmin_diff[1], 0);
914 lmax_prep[0] = lmax[0] + lmax_diff[0];
915 lmax_prep[1] = lmax[1] + lmax_diff[1];
917 const int n1_prep = ncoset(lmax_prep[0]);
918 const int n2_prep = ncoset(lmax_prep[1]);
948 lmax_prep[0] + lmax_prep[1] + 1);
951 const int lp = lmax_prep[0] + lmax_prep[1];
970 &(handler->
alpha), &pab_prep,
982 const int iatom = task->
iatom;
983 const int jatom = task->
jatom;
984 const int iset = task->
iset;
985 const int jset = task->
jset;
1000 double *
const block = &pab_blocks->
host_buffer[block_offset];
1008 const _task *
const previous_task,
1009 const _task *
const task,
1027 const int n1_prep = ncoset(lmax_prep[0]);
1028 const int n2_prep = ncoset(lmax_prep[1]);
1037 &task->
zeta[0], pab, pab_prep);
1059 lmax_prep[0] + lmax_prep[1] + 1);
1062 const int lp = lmax_prep[0] + lmax_prep[1];
1075 lmin_prep, lmax_prep, lp,
1077 &handler->
alpha, pab_prep, &handler->
coef);
1081 const int *
const border_width,
1082 const int *
const shift_local,
1083 const enum grid_func func,
const int level,
1088#pragma omp parallel default(shared)
1090 const int num_threads = omp_get_num_threads();
1091 const int thread_id = omp_get_thread_num();
1093 tensor work, pab, pab_prep;
1114 (
const double(*)[3])
grid->dh_inv);
1121 for (
int d = 0; d < 3; d++)
1124 if ((thread_id == 0) || (num_threads == 1)) {
1129 if ((num_threads > 1) && (thread_id > 0)) {
1132 memset(handler->
grid.
data, 0,
sizeof(
double) *
grid->alloc_size_);
1138 const _task *prev_task = NULL;
1139#pragma omp for schedule(static)
1142 const _task *task = &ctx->
tasks[level][itask];
1144 if (task->
level != level) {
1145 printf(
"level %d, %d\n", task->
level, level);
1195 if (num_threads > 1) {
1196 if ((
grid->alloc_size_ / (num_threads - 1)) >= 2) {
1197 const int block_size =
grid->alloc_size_ / (num_threads - 1) +
1198 (
grid->alloc_size_ % (num_threads - 1));
1200 for (
int bk = 0; bk < num_threads; bk++) {
1201 if (thread_id > 0) {
1202 int bk_id = (bk + thread_id - 1) % num_threads;
1203 int begin = bk_id * block_size;
1204 int end =
imin((bk_id + 1) * block_size,
grid->alloc_size_);
1206 grid->data + begin, 1);
1220 free(pab_prep.
data);
1241 assert(ctx->
nlevels == nlevels);
1244 for (
int level = 0; level < ctx->
nlevels; level++) {
1249 layout->
dh_inv, grids[level]);
1259 for (
int x = 1; x < nlevels; x++) {
1263 max_size = ((max_size / 4096) + (max_size % 4096 != 0)) * 4096;
1270 for (
int level = 0; level < ctx->
nlevels; level++) {
static int imax(int x, int y)
Returns the larger of two given integers (missing from the C standard)
static int imin(int x, int y)
Returns the smaller of the two integers (missing from the C standard).
#define GRID_PRAGMA_SIMD(OBJS, N)
static GRID_HOST_DEVICE int modulo(int a, int m)
Equivalent of Fortran's MODULO, which always return a positive number. https://gcc....
static void const int const int const int const int const int const double const int const int const int int GRID_CONST_WHEN_COLLOCATE double GRID_CONST_WHEN_INTEGRATE double * grid
static void const int const int i
static void const int cmax
static void const int const int const double pol[3][lp+1][2 *cmax+1]
static void const int const int const int const int const int const double const int map[3][2 *cmax+1]
void grid_transform_coef_xzy_to_ikj(const double dh[3][3], const tensor *coef_xyz)
void grid_compute_coefficients_dgemm(const int *lmin, const int *lmax, const int lp, const double prefactor, const tensor *const alpha, const tensor *const pab, tensor *coef_xyz)
void grid_prepare_alpha_dgemm(const double ra[3], const double rb[3], const double rp[3], const int *lmax, tensor *alpha)
void grid_fill_pol_dgemm(const bool transpose, const double dr, const double roffset, const int pol_offset, const int xmin, const int xmax, const int lp, const int cmax, const double zetp, double *pol_)
void grid_dgemm_collocate_pgf_product(const bool use_ortho, const int border_mask, const enum grid_func func, const int la_max, const int la_min, const int lb_max, const int lb_min, const double zeta, const double zetb, const double rscale, const double dh[3][3], const double dh_inv[3][3], const double ra[3], const double rab[3], const int grid_global_size[3], const int grid_local_size[3], const int shift_local[3], const int border_width[3], const double radius, const int o1, const int o2, const int n1, const int n2, const double pab_[n2][n1], double *const grid_)
void grid_dgemm_collocate_task_list(grid_dgemm_task_list *const ptr, const enum grid_func func, const int nlevels, const offload_buffer *pab_blocks, offload_buffer *grids[nlevels])
Collocate all tasks of a given list onto given grids. See grid_task_list.h for details.
void apply_mapping_cubic(struct collocation_integration_ *handler, const int cmax, const int *const lower_boundaries_cube, const int *const cube_center)
void extract_blocks(grid_context *const ctx, const _task *const task, const offload_buffer *pab_blocks, tensor *const work, tensor *const pab)
void rotate_to_cartesian_harmonics(const grid_basis_set *ibasis, const grid_basis_set *jbasis, const int iatom, const int jatom, const int iset, const int jset, double *const block, tensor *work, tensor *pab)
void apply_spherical_cutoff_generic(struct collocation_integration_ *const handler, const double disr_radius, const int cmax, const int *const lb_cube, const int *const ub_cube, const double *const roffset, const int *const cube_center)
void tensor_reduction_for_collocate_integrate(double *scratch, const double alpha, const bool *const orthogonal, const struct tensor_ *Exp, const struct tensor_ *co, const struct tensor_ *p_alpha_beta_reduced_, struct tensor_ *cube)
void collocate_l0(double *scratch, const double alpha, const bool orthogonal, const struct tensor_ *exp_xy, const struct tensor_ *p_alpha_beta_reduced_, struct tensor_ *cube)
void compute_coefficients(grid_context *const ctx, struct collocation_integration_ *handler, const _task *const previous_task, const _task *const task, const offload_buffer *pab_blocks, tensor *const pab, tensor *const work, tensor *const pab_prep)
void collocate_one_grid_level_dgemm(grid_context *const ctx, const int *const border_width, const int *const shift_local, const enum grid_func func, const int level, const offload_buffer *pab_blocks)
void apply_sphere_cutoff_ortho(struct collocation_integration_ *const handler, const double disr_radius, const int cmax, const int *const lb_cube, const int *const cube_center)
void grid_collocate(collocation_integration *const handler, const bool use_ortho, const double zetp, const double rp[3], const double radius)
void initialize_basis_vectors(collocation_integration *const handler, const double dh[3][3], const double dh_inv[3][3])
void collocate_destroy_handle(void *gaussian_handle)
void initialize_W_and_T(collocation_integration *const handler, const tensor *cube, const tensor *coef)
struct collocation_integration_ * collocate_create_handle(void)
void set_grid_parameters(tensor *grid, const bool orthorhombic, const int grid_full_size[3], const int grid_local_size[3], const int shift_local[3], const int border_width[3], const double dh[3][3], const double dh_inv[3][3], offload_buffer *grid_)
void calculate_non_orthorombic_corrections_tensor(const double mu_mean, const double *r_ab, const double basis[3][3], const int *const xmin, const int *const xmax, bool plane[3], tensor *const Exp)
void apply_non_orthorombic_corrections_xz_yz_blocked(const struct tensor_ *const Exp_xz, const struct tensor_ *const Exp_yz, struct tensor_ *const m)
void apply_non_orthorombic_corrections_yz_blocked(const struct tensor_ *const Exp, struct tensor_ *const m)
void apply_non_orthorombic_corrections_xz_blocked(const struct tensor_ *const Exp, struct tensor_ *const m)
void apply_non_orthorombic_corrections_xy_blocked(const struct tensor_ *const Exp, struct tensor_ *const m)
void grid_prepare_pab_dgemm(const enum grid_func func, const int *const offset, const int *const lmin, const int *const lmax, const double *const zeta, tensor *const pab, tensor *const pab_prep)
void grid_prepare_get_ldiffs_dgemm(const enum grid_func func, int *const lmin_diff, int *const lmax_diff)
void grid_dgemm_task_list
opaque pointer hidding the internal representation of the structure. It is not needed to know what ex...
void tensor_copy(tensor *const b, const tensor *const a)
size_t realloc_tensor(tensor *t)
void alloc_tensor(tensor *t)
static void setup_grid_window(tensor *const grid, const int *const shift_local, const int *const border_width, const int border_mask)
static void initialize_tensor_4(struct tensor_ *a, int n1, int n2, int n3, int n4)
static void setup_global_grid_size(tensor *const grid, const int *const full_size)
static void initialize_tensor_3(struct tensor_ *a, int n1, int n2, int n3)
static void initialize_tensor_2(struct tensor_ *a, int n1, int n2)
int compute_cube_properties(const bool ortho, const double radius, const double dh[3][3], const double dh_inv[3][3], const double *rp, double *disr_radius, double *roffset, int *cubecenter, int *lb_cube, int *ub_cube, int *cube_size)
void compute_interval(const int *const map, const int full_size, const int size, const int cube_size, const int x1, int *x, int *const lower_corner, int *const upper_corner, const Interval window)
void cblas_dger(const CBLAS_LAYOUT Layout, const int M, const int N, const double alpha, const double *X, const int incX, const double *Y, const int incY, double *A, const int lda)
void cblas_daxpy(const int N, const double alpha, const double *X, const int incX, double *Y, const int incY)
void verify_orthogonality(const double dh[3][3], bool orthogonal[3])
void dgemm_simplified(dgemm_params *const m)
void add_sub_grid(const int *lower_corner, const int *upper_corner, const int *position, const tensor *subgrid, tensor *grid)
static void * grid_allocate_scratch(size_t size)
static void grid_free_scratch(void *ptr)
Internal representation of a basis set.
grid_basis_set ** basis_sets
struct collocation_integration_ ** handler
Internal representation of a buffer.