15#include "../common/grid_common.h"
16#include "../common/grid_library.h"
18#if (GRID_DO_COLLOCATE)
19#define GRID_CONST_WHEN_COLLOCATE const
20#define GRID_CONST_WHEN_INTEGRATE
22#define GRID_CONST_WHEN_COLLOCATE
23#define GRID_CONST_WHEN_INTEGRATE const
32 const int j2,
const int cmax,
33 const double pol[3][lp + 1][2 *
cmax + 1],
34 const int map[3][2 *
cmax + 1],
const double dh[3][3],
35 const double dh_inv[3][3],
const double kremain,
46 const int jd = (2 *
j1 - 1) / 2;
47 const double jr = jd * dh[1][1];
48 const double jremain = kremain - jr * jr;
49 const int istart = (int)ceil(-1e-8 - sqrt(fmax(0.0, jremain)) * dh_inv[0][0]);
50 const int iend = 1 - istart;
52 for (
int i = istart;
i <= iend;
i++) {
66#if (GRID_DO_COLLOCATE)
68 double reg[4] = {0.0, 0.0, 0.0, 0.0};
69 for (
int lxp = 0; lxp <= lp; lxp++) {
70 const double p =
pol[0][lxp][
i +
cmax];
71 reg[0] +=
cx[lxp * 4 + 0] * p;
72 reg[1] +=
cx[lxp * 4 + 1] * p;
73 reg[2] +=
cx[lxp * 4 + 2] * p;
74 reg[3] +=
cx[lxp * 4 + 3] * p;
84 for (
int lxp = 0; lxp <= lp; lxp++) {
85 const double p =
pol[0][lxp][
i +
cmax];
86 cx[lxp * 4 + 0] += reg[0] * p;
87 cx[lxp * 4 + 1] += reg[1] * p;
88 cx[lxp * 4 + 2] += reg[2] * p;
89 cx[lxp * 4 + 3] += reg[3] * p;
101 const double pol[3][lp + 1][2 *
cmax + 1],
105 for (
int lyp = 0; lyp <= lp; lyp++) {
106 for (
int lxp = 0; lxp <= lp - lyp; lxp++) {
107 const double p1 =
pol[1][lyp][
j1 +
cmax];
108 const double p2 =
pol[1][lyp][
j2 +
cmax];
109 const int cxy_index = lyp * (lp + 1) * 2 + lxp * 2;
111#if (GRID_DO_COLLOCATE)
113 cx[lxp * 4 + 0] +=
cxy[cxy_index + 0] * p1;
114 cx[lxp * 4 + 1] +=
cxy[cxy_index + 1] * p1;
115 cx[lxp * 4 + 2] +=
cxy[cxy_index + 0] * p2;
116 cx[lxp * 4 + 3] +=
cxy[cxy_index + 1] * p2;
119 cxy[cxy_index + 0] +=
cx[lxp * 4 + 0] * p1;
120 cxy[cxy_index + 1] +=
cx[lxp * 4 + 1] * p1;
121 cxy[cxy_index + 0] +=
cx[lxp * 4 + 2] * p2;
122 cxy[cxy_index + 1] +=
cx[lxp * 4 + 3] * p2;
133 const int lp,
const int k1,
const int k2,
const int cmax,
134 const double pol[3][lp + 1][2 *
cmax + 1],
const int map[3][2 *
cmax + 1],
135 const double dh[3][3],
const double dh_inv[3][3],
const double disr_radius,
144 const int kd = (2 * k1 - 1) / 2;
145 const double kr = kd * dh[2][2];
146 const double kremain = disr_radius * disr_radius - kr * kr;
147 const int jstart = (int)ceil(-1e-8 - sqrt(fmax(0.0, kremain)) * dh_inv[1][1]);
149 const size_t cx_size = (lp + 1) * 4;
151 for (
int j1 = jstart;
j1 <= 0;
j1++) {
152 const int j2 = 1 -
j1;
153 memset(
cx, 0, cx_size *
sizeof(
double));
155#if (GRID_DO_COLLOCATE)
158 ortho_cx_to_grid(lp, k1, k2,
j1,
j2,
cmax,
pol,
map, dh, dh_inv, kremain,
162 ortho_cx_to_grid(lp, k1, k2,
j1,
j2,
cmax,
pol,
map, dh, dh_inv, kremain,
175 const double pol[3][lp + 1][2 *
cmax + 1],
179 for (
int lzp = 0; lzp <= lp; lzp++) {
180 for (
int lyp = 0; lyp <= lp - lzp; lyp++) {
181 for (
int lxp = 0; lxp <= lp - lzp - lyp; lxp++) {
182 const double p1 =
pol[2][lzp][k1 +
cmax];
183 const double p2 =
pol[2][lzp][k2 +
cmax];
184 const int cxyz_index =
185 lzp * (lp + 1) * (lp + 1) + lyp * (lp + 1) + lxp;
186 const int cxy_index = lyp * (lp + 1) * 2 + lxp * 2;
188#if (GRID_DO_COLLOCATE)
190 cxy[cxy_index + 0] += cxyz[cxyz_index] * p1;
191 cxy[cxy_index + 1] += cxyz[cxyz_index] * p2;
194 cxyz[cxyz_index] +=
cxy[cxy_index + 0] * p1;
195 cxyz[cxyz_index] +=
cxy[cxy_index + 1] * p2;
208 const double dh_inv[3][3],
const double rp[3],
209 const int npts_global[3],
const int npts_local[3],
210 const int shift_local[3],
const double radius,
224 for (
int i = 0;
i < 3;
i++) {
225 double dh_inv_rp = 0.0;
226 for (
int j = 0; j < 3; j++) {
227 dh_inv_rp += dh_inv[j][
i] * rp[j];
229 cubecenter[
i] = (int)floor(dh_inv_rp);
233 for (
int i = 0;
i < 3;
i++) {
234 roffset[
i] = rp[
i] - ((double)cubecenter[
i]) * dh[
i][
i];
238 const double drmin = fmin(dh[0][0], fmin(dh[1][1], dh[2][2]));
239 const double disr_radius = drmin * fmax(1.0, ceil(radius / drmin));
242 int lb_cube[3], ub_cube[3];
243 for (
int i = 0;
i < 3;
i++) {
244 lb_cube[
i] = (int)ceil(-1e-8 - disr_radius * dh_inv[
i][
i]);
245 ub_cube[
i] = 1 - lb_cube[
i];
249 modulo(cubecenter[
i] + lb_cube[
i] - shift_local[
i], npts_global[
i]) -
252 assert(offset + lb_cube[
i] >= 0);
257 const int cmax =
imax(
imax(ub_cube[0], ub_cube[1]), ub_cube[2]);
260 double pol_mutable[3][lp + 1][2 *
cmax + 1];
261 for (
int idir = 0; idir < 3; idir++) {
262 const double dr = dh[idir][idir];
263 const double ro = roffset[idir];
267 const double t_exp_1 = exp(-zetp * pow(dr, 2));
268 const double t_exp_2 = pow(t_exp_1, 2);
269 double t_exp_min_1 = exp(-zetp * pow(+dr - ro, 2));
270 double t_exp_min_2 = exp(-2 * zetp * (+dr - ro) * (-dr));
271 for (
int ig = 0; ig >= lb_cube[idir]; ig--) {
272 const double rpg = ig * dr - ro;
273 t_exp_min_1 *= t_exp_min_2 * t_exp_1;
274 t_exp_min_2 *= t_exp_2;
275 double pg = t_exp_min_1;
276 for (
int icoef = 0; icoef <= lp; icoef++) {
277 pol_mutable[idir][icoef][ig +
cmax] = pg;
281 double t_exp_plus_1 = exp(-zetp * pow(-ro, 2));
282 double t_exp_plus_2 = exp(-2 * zetp * (-ro) * (+dr));
283 for (
int ig = 0; ig >= lb_cube[idir]; ig--) {
284 const double rpg = (1 - ig) * dr - ro;
285 t_exp_plus_1 *= t_exp_plus_2 * t_exp_1;
286 t_exp_plus_2 *= t_exp_2;
287 double pg = t_exp_plus_1;
288 for (
int icoef = 0; icoef <= lp; icoef++) {
289 pol_mutable[idir][icoef][1 - ig +
cmax] = pg;
294 const double(*
pol)[lp + 1][2 *
cmax + 1] =
295 (
const double(*)[lp + 1][2 *
cmax + 1]) pol_mutable;
298 int map_mutable[3][2 *
cmax + 1];
299 for (
int i = 0;
i < 3;
i++) {
300 for (
int k = -
cmax; k <= +
cmax; k++) {
301 map_mutable[
i][k +
cmax] =
302 modulo(cubecenter[
i] + k - shift_local[
i], npts_global[
i]);
305 const int(*
map)[2 *
cmax + 1] = (
const int(*)[2 *
cmax + 1]) map_mutable;
308 const int kstart = (int)ceil(-1e-8 - disr_radius * dh_inv[2][2]);
309 const size_t cxy_size = (lp + 1) * (lp + 1) * 2;
310 double cxy[cxy_size];
311 for (
int k1 = kstart; k1 <= 0; k1++) {
312 const int k2 = 1 - k1;
313 memset(
cxy, 0, cxy_size *
sizeof(
double));
315#if (GRID_DO_COLLOCATE)
336 const int index_min[3],
const int index_max[3],
337 const int map_i[],
const double gp[3],
const int k,
338 const int j,
const double exp_ij[],
const double exp_jk[],
344 for (
int i = ismin;
i <= ismax;
i++) {
345 const int ig = map_i[
i - index_min[0]];
349 const double di =
i - gp[0];
351 const int stride_i = index_max[0] - index_min[0] + 1;
352 const int stride_j = index_max[1] - index_min[1] + 1;
353 const int stride_k = index_max[2] - index_min[2] + 1;
354 const int idx_ij = (j - index_min[1]) * stride_i +
i - index_min[0];
355 const int idx_jk = (k - index_min[2]) * stride_j + j - index_min[1];
356 const int idx_ki = (
i - index_min[0]) * stride_k + k - index_min[2];
367 const double gaussian =
exp_ij[idx_ij] * exp_jk[idx_jk] * exp_ki[idx_ki];
369 const int grid_index = base + ig;
370 double dip = gaussian;
372#if (GRID_DO_COLLOCATE)
375 for (
int il = 0; il <= lp; il++) {
379 grid[grid_index] += reg;
382 const double reg =
grid[grid_index];
383 for (
int il = 0; il <= lp; il++) {
399 for (
int jl = 0; jl <= lp; jl++) {
400 for (
int il = 0; il <= lp - jl; il++) {
401 const int cij_index = jl * (lp + 1) + il;
402#if (GRID_DO_COLLOCATE)
403 ci[il] += cij[cij_index] * djp;
405 cij[cij_index] += ci[il] * djp;
417 const int lp,
const int k,
const int kg,
const int npts_local[3],
418 const int index_min[3],
const int index_max[3],
const int map_i[],
419 const int map_j[],
const double dh[3][3],
const double gp[3],
420 const double radius,
const double exp_ij[],
const double exp_jk[],
424 for (
int j = index_min[1]; j <= index_max[1]; j++) {
425 const int jg = map_j[j - index_min[1]];
429 const double dj = j - gp[1];
450 double a = 0.0, b = 0.0, c = 0.0;
451 for (
int i = 0;
i < 3;
i++) {
452 const double v = (0 - gp[0]) * dh[0][
i] + (j - gp[1]) * dh[1][
i] +
453 (k - gp[2]) * dh[2][
i];
454 a += dh[0][
i] * dh[0][
i];
455 b += 2.0 * v * dh[0][
i];
458 const double d = b * b - 4.0 * a * (c - radius * radius);
460 const double sqrt_d = sqrt(d);
461 const double inv_2a = 1.0 / (2.0 * a);
462 const int ismin = (int)ceil((-b - sqrt_d) * inv_2a);
463 const int ismax = (int)floor((-b + sqrt_d) * inv_2a);
466 memset(ci, 0,
sizeof(ci));
468#if (GRID_DO_COLLOCATE)
472 index_max, map_i, gp, k, j,
exp_ij, exp_jk, exp_ki, ci,
477 index_max, map_i, gp, k, j,
exp_ij, exp_jk, exp_ki, ci,
493 for (
int kl = 0; kl <= lp; kl++) {
494 for (
int jl = 0; jl <= lp - kl; jl++) {
495 for (
int il = 0; il <= lp - kl - jl; il++) {
496 const int cij_index = jl * (lp + 1) + il;
497 const int cijk_index =
498 kl * (lp + 1) * (lp + 1) + jl * (lp + 1) + il;
499#if (GRID_DO_COLLOCATE)
500 cij[cij_index] += cijk[cijk_index] * dkp;
502 cijk[cijk_index] += cij[cij_index] * dkp;
516 const int shift_local,
517 const int npts_global,
518 const int bounds[2],
int map[]) {
521 for (
int k = index_min; k <= index_max; k++) {
522 const int kg =
modulo(k - shift_local, npts_global);
523 if (bounds[0] <= kg && kg <= bounds[1]) {
524 map[k - index_min] = kg;
526 map[k - index_min] = INT_MIN;
537 const int index_max[3],
const double zetp,
538 const double dh[3][3],
const double gp[3],
539 double exp_table[]) {
541 const int stride_i = index_max[idir] - index_min[idir] + 1;
542 const double h_ii = dh[idir][0] * dh[idir][0] + dh[idir][1] * dh[idir][1] +
543 dh[idir][2] * dh[idir][2];
544 const double h_ij = dh[idir][0] * dh[jdir][0] + dh[idir][1] * dh[jdir][1] +
545 dh[idir][2] * dh[jdir][2];
547 for (
int i = index_min[idir];
i <= index_max[idir];
i++) {
548 const double di =
i - gp[idir];
549 const double rii = di * di * h_ii;
550 const double rij_unit = di * h_ij;
551 const double exp_ij_unit = exp(-zetp * 2.0 * rij_unit);
554 const int j_center = (int)gp[jdir];
555 const double dj_center = j_center - gp[jdir];
556 const double rij_center = dj_center * rij_unit;
557 const double exp_ij_center = exp(-zetp * (rii + 2.0 * rij_center));
560 double exp_ij = exp_ij_center;
561 for (
int j = j_center; j <= index_max[jdir]; j++) {
562 const int idx = (j - index_min[jdir]) * stride_i +
i - index_min[idir];
568 const double exp_ij_unit_inv = 1.0 / exp_ij_unit;
569 exp_ij = exp_ij_center * exp_ij_unit_inv;
570 for (
int j = j_center - 1; j >= index_min[jdir]; j--) {
571 const int idx = (j - index_min[jdir]) * stride_i +
i - index_min[idir];
573 exp_ij *= exp_ij_unit_inv;
584 const double dh[3][3],
const double dh_inv[3][3],
585 const double rp[3],
const int npts_global[3],
586 const int npts_local[3],
const int shift_local[3],
587 const int border_width[3],
const double radius,
598 if (border_mask & (1 << 0))
599 bounds_i[0] += border_width[0];
600 if (border_mask & (1 << 1))
601 bounds_i[1] -= border_width[0];
602 if (border_mask & (1 << 2))
603 bounds_j[0] += border_width[1];
604 if (border_mask & (1 << 3))
605 bounds_j[1] -= border_width[1];
606 if (border_mask & (1 << 4))
607 bounds_k[0] += border_width[2];
608 if (border_mask & (1 << 5))
609 bounds_k[1] -= border_width[2];
613 double gp[3] = {0.0, 0.0, 0.0};
614 for (
int i = 0;
i < 3;
i++) {
615 for (
int j = 0; j < 3; j++) {
616 gp[
i] += dh_inv[j][
i] * rp[j];
624 int index_min[3] = {INT_MAX, INT_MAX, INT_MAX};
625 int index_max[3] = {INT_MIN, INT_MIN, INT_MIN};
626 for (
int i = -1;
i <= 1;
i++) {
627 for (
int j = -1; j <= 1; j++) {
628 for (
int k = -1; k <= 1; k++) {
629 const double x = rp[0] +
i * radius;
630 const double y = rp[1] + j * radius;
631 const double z = rp[2] + k * radius;
632 for (
int idir = 0; idir < 3; idir++) {
634 dh_inv[0][idir] * x + dh_inv[1][idir] * y + dh_inv[2][idir] * z;
635 index_min[idir] =
imin(index_min[idir], (
int)floor(resc));
636 index_max[idir] =
imax(index_max[idir], (
int)ceil(resc));
643 const int range_i = index_max[0] - index_min[0] + 1;
646 npts_global[0], bounds_i, map_i);
647 const int range_j = index_max[1] - index_min[1] + 1;
650 npts_global[1], bounds_j, map_j);
651 const int range_k = index_max[2] - index_min[2] + 1;
654 npts_global[2], bounds_k, map_k);
657 double exp_ij[range_i * range_j];
659 double exp_jk[range_j * range_k];
661 double exp_ki[range_k * range_i];
665 const int cij_size = (lp + 1) * (lp + 1);
666 double cij[cij_size];
667 for (
int k = index_min[2]; k <= index_max[2]; k++) {
668 const int kg = map_k[k - index_min[2]];
669 if (kg < bounds_k[0] || bounds_k[1] < kg) {
672 const double dk = k - gp[2];
675 memset(cij, 0, cij_size *
sizeof(
double));
677#if (GRID_DO_COLLOCATE)
681 map_j, dh, gp, radius,
exp_ij, exp_jk, exp_ki, cij,
686 map_j, dh, gp, radius,
exp_ij, exp_jk, exp_ki, cij,
707 double hmatgridp[lp + 1][3][3];
708 for (
int i = 0;
i < 3;
i++) {
709 for (
int j = 0; j < 3; j++) {
710 hmatgridp[0][j][
i] = 1.0;
711 for (
int k = 1; k <= lp; k++) {
712 hmatgridp[k][j][
i] = hmatgridp[k - 1][j][
i] * dh[j][
i];
718 for (
int klx = 0; klx <= lpx; klx++) {
719 for (
int jlx = 0; jlx <= lpx - klx; jlx++) {
720 for (
int ilx = 0; ilx <= lpx - klx - jlx; ilx++) {
721 const int lx = ilx + jlx + klx;
722 const int lpy = lp - lx;
723 for (
int kly = 0; kly <= lpy; kly++) {
724 for (
int jly = 0; jly <= lpy - kly; jly++) {
725 for (
int ily = 0; ily <= lpy - kly - jly; ily++) {
726 const int ly = ily + jly + kly;
727 const int lpz = lp - lx - ly;
728 for (
int klz = 0; klz <= lpz; klz++) {
729 for (
int jlz = 0; jlz <= lpz - klz; jlz++) {
730 for (
int ilz = 0; ilz <= lpz - klz - jlz; ilz++) {
731 const int lz = ilz + jlz + klz;
732 const int il = ilx + ily + ilz;
733 const int jl = jlx + jly + jlz;
734 const int kl = klx + kly + klz;
735 const int lp1 = lp + 1;
736 const int cijk_index =
737 kl * lp1 * lp1 + jl * lp1 + il;
738 const int cxyz_index =
739 lz * lp1 * lp1 + ly * lp1 + lx;
741 hmatgridp[ilx][0][0] * hmatgridp[jlx][1][0] *
742 hmatgridp[klx][2][0] * hmatgridp[ily][0][1] *
743 hmatgridp[jly][1][1] * hmatgridp[kly][2][1] *
744 hmatgridp[ilz][0][2] * hmatgridp[jlz][1][2] *
745 hmatgridp[klz][2][2] * fac(lx) * fac(ly) * fac(lz) /
746 (fac(ilx) * fac(ily) * fac(ilz) * fac(jlx) * fac(jly) *
747 fac(jlz) * fac(klx) * fac(kly) * fac(klz));
748#if (GRID_DO_COLLOCATE)
749 cijk[cijk_index] += cxyz[cxyz_index] * p;
751 cxyz[cxyz_index] += cijk[cijk_index] * p;
770 const double dh[3][3],
const double dh_inv[3][3],
771 const double rp[3],
const int npts_global[3],
772 const int npts_local[3],
const int shift_local[3],
773 const int border_width[3],
const double radius,
777 const size_t cijk_size = (lp + 1) * (lp + 1) * (lp + 1);
778 double cijk[cijk_size];
779 memset(cijk, 0, cijk_size *
sizeof(
double));
781#if (GRID_DO_COLLOCATE)
785 npts_local, shift_local, border_width, radius, cijk,
790 npts_local, shift_local, border_width, radius, cijk,
801cxyz_to_grid(
const bool orthorhombic,
const int border_mask,
const int lp,
802 const double zetp,
const double dh[3][3],
803 const double dh_inv[3][3],
const double rp[3],
804 const int npts_global[3],
const int npts_local[3],
805 const int shift_local[3],
const int border_width[3],
810 if (orthorhombic && border_mask == 0) {
813 shift_local, radius, cxyz,
grid);
817 npts_local, shift_local, border_width, radius, cxyz,
828 const int lb_max,
const int lb_min,
829 const double prefactor,
const double ra[3],
830 const double rb[3],
const double rp[3],
836 const int lp = la_max + lb_max;
837 double alpha[3][lb_max + 1][la_max + 1][lp + 1];
838 memset(alpha, 0, 3 * (lb_max + 1) * (la_max + 1) * (lp + 1) *
sizeof(
double));
840 for (
int i = 0;
i < 3;
i++) {
841 const double drpa = rp[
i] - ra[
i];
842 const double drpb = rp[
i] - rb[
i];
843 for (
int lxa = 0; lxa <= la_max; lxa++) {
844 for (
int lxb = 0; lxb <= lb_max; lxb++) {
845 double binomial_k_lxa = 1.0;
847 for (
int k = 0; k <= lxa; k++) {
848 double binomial_l_lxb = 1.0;
850 for (
int l = 0; l <= lxb; l++) {
851 alpha[
i][lxb][lxa][lxa - l + lxb - k] +=
852 binomial_k_lxa * binomial_l_lxb * a * b;
853 binomial_l_lxb *= ((double)(lxb - l)) / ((
double)(l + 1));
856 binomial_k_lxa *= ((double)(lxa - k)) / ((
double)(k + 1));
877 for (
int lzb = 0; lzb <= lb_max; lzb++) {
878 for (
int lza = 0; lza <= la_max; lza++) {
879 for (
int lyb = 0; lyb <= lb_max - lzb; lyb++) {
880 for (
int lya = 0; lya <= la_max - lza; lya++) {
881 const int lxb_min =
imax(lb_min - lzb - lyb, 0);
882 const int lxa_min =
imax(la_min - lza - lya, 0);
883 for (
int lxb = lxb_min; lxb <= lb_max - lzb - lyb; lxb++) {
884 for (
int lxa = lxa_min; lxa <= la_max - lza - lya; lxa++) {
885 const int ico = coset(lxa, lya, lza);
886 const int jco = coset(lxb, lyb, lzb);
887 const int cab_index = jco * ncoset(la_max) + ico;
888 for (
int lzp = 0; lzp <= lza + lzb; lzp++) {
889 for (
int lyp = 0; lyp <= lp - lza - lzb; lyp++) {
890 for (
int lxp = 0; lxp <= lp - lza - lzb - lyp; lxp++) {
891 const double p = alpha[0][lxb][lxa][lxp] *
892 alpha[1][lyb][lya][lyp] *
893 alpha[2][lzb][lza][lzp] * prefactor;
894 const int lp1 = lp + 1;
895 const int cxyz_index =
896 lzp * lp1 * lp1 + lyp * lp1 + lxp;
897#if (GRID_DO_COLLOCATE)
898 cxyz[cxyz_index] += cab[cab_index] * p;
900 cab[cab_index] += cxyz[cxyz_index] * p;
918cab_to_grid(
const bool orthorhombic,
const int border_mask,
const int la_max,
919 const int la_min,
const int lb_max,
const int lb_min,
920 const double zeta,
const double zetb,
const double rscale,
921 const double dh[3][3],
const double dh_inv[3][3],
922 const double ra[3],
const double rab[3],
const int npts_global[3],
923 const int npts_local[3],
const int shift_local[3],
924 const int border_width[3],
const double radius,
930 for (
int i = 0;
i < 3;
i++) {
931 for (
int j = 0; j < 3; j++) {
932 dh_max = fmax(dh_max, fabs(dh[
i][j]));
935 if (2.0 * radius < dh_max) {
939 const double zetp = zeta + zetb;
940 const double f = zetb / zetp;
941 const double rab2 = rab[0] * rab[0] + rab[1] * rab[1] + rab[2] * rab[2];
942 const double prefactor = rscale * exp(-zeta * f * rab2);
944 for (
int i = 0;
i < 3;
i++) {
945 rp[
i] = ra[
i] + f * rab[
i];
946 rb[
i] = ra[
i] + rab[
i];
949 const int lp = la_max + lb_max;
950 const size_t cxyz_size = (lp + 1) * (lp + 1) * (lp + 1);
951 double cxyz[cxyz_size];
952 memset(cxyz, 0, cxyz_size *
sizeof(
double));
954#if (GRID_DO_COLLOCATE)
956 cab_to_cxyz(la_max, la_min, lb_max, lb_min, prefactor, ra, rb, rp, cab, cxyz);
957 cxyz_to_grid(orthorhombic, border_mask, lp, zetp, dh, dh_inv, rp, npts_global,
961 cxyz_to_grid(orthorhombic, border_mask, lp, zetp, dh, dh_inv, rp, npts_global,
963 cab_to_cxyz(la_max, la_min, lb_max, lb_min, prefactor, ra, rb, rp, cab, cxyz);
static int imax(int x, int y)
Returns the larger of two given integers (missing from the C standard)
static int imin(int x, int y)
Returns the smaller of the two integers (missing from the C standard).
static GRID_HOST_DEVICE int modulo(int a, int m)
Equivalent of Fortran's MODULO, which always return a positive number. https://gcc....
static GRID_HOST_DEVICE int idx(const orbital a)
Return coset index of given orbital angular momentum.
static void const int const int const int const int const int const double const int const int const int int GRID_CONST_WHEN_COLLOCATE double GRID_CONST_WHEN_INTEGRATE double * grid
static void const int const int const double GRID_CONST_WHEN_COLLOCATE double GRID_CONST_WHEN_INTEGRATE double GRID_CONST_WHEN_INTEGRATE double GRID_CONST_WHEN_INTEGRATE double * grid_2
static void const int const int const int jg1
static void const int const int j2
static void const int const int const double GRID_CONST_WHEN_COLLOCATE double GRID_CONST_WHEN_INTEGRATE double GRID_CONST_WHEN_INTEGRATE double * grid_1
static void const int const int kg2
static void const int const int i
static void const int const int const int const double GRID_CONST_WHEN_COLLOCATE double * cxy
static void const int cmax
static void const int kg1
static void const int const int const double pol[3][lp+1][2 *cmax+1]
static void const int const int const int const int const int const double const int const int const int npts_local[3]
static void const int const int const int const int jg2
static void const int const int const double GRID_CONST_WHEN_COLLOCATE double GRID_CONST_WHEN_INTEGRATE double * grid_0
static void const int const int const double GRID_CONST_WHEN_COLLOCATE double GRID_CONST_WHEN_INTEGRATE double GRID_CONST_WHEN_INTEGRATE double GRID_CONST_WHEN_INTEGRATE double GRID_CONST_WHEN_INTEGRATE double * grid_3
static void const int const int const int const int const int const double const int map[3][2 *cmax+1]
static void const int const int const double GRID_CONST_WHEN_COLLOCATE double * cx
#define GRID_DO_COLLOCATE
void exp_ij(const double alpha, const int offset_i, const int imin, const int imax, const int offset_j, const int jmin, const int jmax, tensor *exp_ij_)
void grid_library_counter_add(const int lp, const enum grid_backend backend, const enum grid_library_kernel kernel, const int increment)
Adds given increment to counter specified by lp, backend, and kernel.
grid_library_kernel
Various kernels provided by the grid library.
static void cxyz_to_grid(const bool orthorhombic, const int border_mask, const int lp, const double zetp, const double dh[3][3], const double dh_inv[3][3], const double rp[3], const int npts_global[3], const int npts_local[3], const int shift_local[3], const int border_width[3], const double radius, GRID_CONST_WHEN_COLLOCATE double *cxyz, GRID_CONST_WHEN_INTEGRATE double *grid)
Collocates coefficients C_xyz onto the grid.
static void cab_to_cxyz(const int la_max, const int la_min, const int lb_max, const int lb_min, const double prefactor, const double ra[3], const double rb[3], const double rp[3], GRID_CONST_WHEN_COLLOCATE double *cab, GRID_CONST_WHEN_INTEGRATE double *cxyz)
Transforms coefficients C_ab into C_xyz.
#define GRID_CONST_WHEN_COLLOCATE
static void ortho_cxy_to_grid(const int lp, const int k1, const int k2, const int cmax, const double pol[3][lp+1][2 *cmax+1], const int map[3][2 *cmax+1], const double dh[3][3], const double dh_inv[3][3], const double disr_radius, const int npts_local[3], GRID_CONST_WHEN_COLLOCATE double *cxy, GRID_CONST_WHEN_INTEGRATE double *grid)
Collocates coefficients C_xy onto the grid for orthorhombic case.
static void general_cij_to_ci(const int lp, const double dj, GRID_CONST_WHEN_COLLOCATE double *cij, GRID_CONST_WHEN_INTEGRATE double *ci)
Transforms coefficients C_ij into C_i by fixing grid index j.
static void general_cijk_to_cij(const int lp, const double dk, GRID_CONST_WHEN_COLLOCATE double *cijk, GRID_CONST_WHEN_INTEGRATE double *cij)
Transforms coefficients C_ijk into C_ij by fixing grid index k.
static void general_precompute_mapping(const int index_min, const int index_max, const int shift_local, const int npts_global, const int bounds[2], int map[])
Precompute mapping of grid indices for general case.
static void ortho_cxy_to_cx(const int lp, const int j1, const int j2, const int cmax, const double pol[3][lp+1][2 *cmax+1], GRID_CONST_WHEN_COLLOCATE double *cxy, GRID_CONST_WHEN_INTEGRATE double *cx)
Transforms coefficients C_xy into C_x by fixing grid index j.
static void general_cxyz_to_cijk(const int lp, const double dh[3][3], GRID_CONST_WHEN_COLLOCATE double *cxyz, GRID_CONST_WHEN_INTEGRATE double *cijk)
Transforms coefficients C_xyz into C_ijk.
static void general_cij_to_grid(const int lp, const int k, const int kg, const int npts_local[3], const int index_min[3], const int index_max[3], const int map_i[], const int map_j[], const double dh[3][3], const double gp[3], const double radius, const double exp_ij[], const double exp_jk[], const double exp_ki[], GRID_CONST_WHEN_COLLOCATE double *cij, GRID_CONST_WHEN_INTEGRATE double *grid)
Collocates coefficients C_ij onto the grid for general case.
static void general_cijk_to_grid(const int border_mask, const int lp, const double zetp, const double dh[3][3], const double dh_inv[3][3], const double rp[3], const int npts_global[3], const int npts_local[3], const int shift_local[3], const int border_width[3], const double radius, GRID_CONST_WHEN_COLLOCATE double *cijk, GRID_CONST_WHEN_INTEGRATE double *grid)
Collocates coefficients C_ijk onto the grid for general case.
static void cab_to_grid(const bool orthorhombic, const int border_mask, const int la_max, const int la_min, const int lb_max, const int lb_min, const double zeta, const double zetb, const double rscale, const double dh[3][3], const double dh_inv[3][3], const double ra[3], const double rab[3], const int npts_global[3], const int npts_local[3], const int shift_local[3], const int border_width[3], const double radius, GRID_CONST_WHEN_COLLOCATE double *cab, GRID_CONST_WHEN_INTEGRATE double *grid)
Collocates coefficients C_ab onto the grid.
static void general_ci_to_grid(const int lp, const int jg, const int kg, const int ismin, const int ismax, const int npts_local[3], const int index_min[3], const int index_max[3], const int map_i[], const double gp[3], const int k, const int j, const double exp_ij[], const double exp_jk[], const double exp_ki[], GRID_CONST_WHEN_COLLOCATE double *ci, GRID_CONST_WHEN_INTEGRATE double *grid)
Collocates coefficients C_i onto the grid for general case.
static void ortho_cx_to_grid(const int lp, const int k1, const int k2, const int j1, const int j2, const int cmax, const double pol[3][lp+1][2 *cmax+1], const int map[3][2 *cmax+1], const double dh[3][3], const double dh_inv[3][3], const double kremain, const int npts_local[3], GRID_CONST_WHEN_COLLOCATE double *cx, GRID_CONST_WHEN_INTEGRATE double *grid)
Collocates coefficients C_x onto the grid for orthorhombic case.
static void ortho_cxyz_to_cxy(const int lp, const int k1, const int k2, const int cmax, const double pol[3][lp+1][2 *cmax+1], GRID_CONST_WHEN_COLLOCATE double *cxyz, GRID_CONST_WHEN_INTEGRATE double *cxy)
Transforms coefficients C_xyz into C_xz by fixing grid index k.
static void general_cxyz_to_grid(const int border_mask, const int lp, const double zetp, const double dh[3][3], const double dh_inv[3][3], const double rp[3], const int npts_global[3], const int npts_local[3], const int shift_local[3], const int border_width[3], const double radius, GRID_CONST_WHEN_COLLOCATE double *cxyz, GRID_CONST_WHEN_INTEGRATE double *grid)
Collocates coefficients C_xyz onto the grid for general case.
static void ortho_cxyz_to_grid(const int lp, const double zetp, const double dh[3][3], const double dh_inv[3][3], const double rp[3], const int npts_global[3], const int npts_local[3], const int shift_local[3], const double radius, GRID_CONST_WHEN_COLLOCATE double *cxyz, GRID_CONST_WHEN_INTEGRATE double *grid)
Collocates coefficients C_xyz onto the grid for orthorhombic case.
#define GRID_CONST_WHEN_INTEGRATE
static void general_fill_exp_table(const int idir, const int jdir, const int index_min[3], const int index_max[3], const double zetp, const double dh[3][3], const double gp[3], double exp_table[])
Fill one of the 2D tables that speedup 3D Gaussian (Mathieu's trick).