15#include "../common/grid_common.h"
29 for (
int i = 0;
i < -index;
i++) {
36 for (
int i = 0;
i < index;
i++) {
46 double *restrict
const res) {
47 const double c_exp_co = exp(alpha);
49 res[0] = exp(
imin * alpha);
56void exp_ij(
const double alpha,
const int offset_i,
const int imin,
57 const int imax,
const int offset_j,
const int jmin,
const int jmax,
59 double c_exp = exp(alpha *
imin);
60 const double c_exp_co = exp(alpha);
63 double *restrict dst = &
idx2(exp_ij_[0],
i + offset_i, offset_j);
66 for (
int j = 0; j < (jmax - jmin); j++) {
75 const double mu_mean,
const double *r_ab,
const double basis[3][3],
76 const int *
const xmin,
const int *
const xmax,
bool plane[3],
79 const int n[3][2] = {{0, 2}, {0, 1}, {1, 2}};
85 (basis[0][0] * basis[2][0] + basis[0][1] * basis[2][1] +
86 basis[0][2] * basis[2][2]),
89 (basis[1][0] * basis[2][0] + basis[1][1] * basis[2][1] +
90 basis[1][2] * basis[2][2]),
93 (basis[0][0] * basis[1][0] + basis[0][1] * basis[1][1] +
94 basis[0][2] * basis[1][2])};
110 if (plane[0] && plane[1] && plane[2])
117 imax(
imax(xmax[0] - xmin[0], xmax[1] - xmin[1]), xmax[2] - xmin[2]) + 1;
126 for (
int dir = 0; dir < 3; dir++) {
131 const double c_exp_const = exp(c[dir] * r_ab[d1] * r_ab[d2]);
133 exp_i(-r_ab[d2] * c[dir], xmin[d1], xmax[d1] + 1, x1);
134 exp_i(-r_ab[d1] * c[dir], xmin[d2], xmax[d2] + 1, x2);
136 exp_tmp.
data = &
idx3(Exp[0], dir, 0, 0);
138 xmax[d2] - xmin[d2] + 1, c_exp_const, x1, 1, x2, 1,
139 &
idx2(exp_tmp, 0, 0), exp_tmp.
ld_);
140 exp_ij(c[dir], 0, xmin[d1], xmax[d1] + 1, 0, xmin[d2], xmax[d2] + 1,
149 const double mu_mean,
const double *r_ab,
const double basis[3][3],
150 const int *
const lower_corner,
const int *
const upper_corner,
151 const int *
const block_size,
const int *
const offset,
const int *
const xmin,
152 const int *
const xmax,
bool *plane,
tensor *
const Exp) {
154 const int n[3][2] = {{0, 2}, {0, 1}, {1, 2}};
157 const double c[3] = {
160 (basis[0][0] * basis[2][0] + basis[0][1] * basis[2][1] +
161 basis[0][2] * basis[2][2]),
164 (basis[1][0] * basis[2][0] + basis[1][1] * basis[2][1] +
165 basis[1][2] * basis[2][2]),
168 (basis[0][0] * basis[1][0] + basis[0][1] * basis[1][1] +
169 basis[0][2] * basis[1][2])};
185 if (plane[0] && plane[1] && plane[2])
192 imax(block_size[1], block_size[2]));
194 const int cube_size[3] = {(upper_corner[0] - lower_corner[0]) * block_size[0],
195 (upper_corner[1] - lower_corner[1]) * block_size[1],
196 (upper_corner[2] - lower_corner[2]) *
199 const int max_elem =
imax(
imax(cube_size[0], cube_size[1]), cube_size[2]);
204 imax(upper_corner[0] - lower_corner[0],
205 upper_corner[1] - lower_corner[1]),
206 imax(upper_corner[2] - lower_corner[2],
207 upper_corner[1] - lower_corner[1]),
213 for (
int dir = 0; dir < 3; dir++) {
218 const double c_exp_const = exp(c[dir] * r_ab[d1] * r_ab[d2]);
219 memset(x1, 0,
sizeof(
double) * max_elem);
220 memset(x2, 0,
sizeof(
double) * max_elem);
222 exp_i(-r_ab[d2] * c[dir], xmin[d1], xmax[d1] + 1, x1 + offset[d1]);
223 exp_i(-r_ab[d1] * c[dir], xmin[d2], xmax[d2] + 1, x2 + offset[d2]);
225 for (
int y = 0; y < (upper_corner[d1] - lower_corner[d1]); y++) {
226 const int y_1 = y * block_size[d1];
227 for (
int x = 0; x < (upper_corner[d2] - lower_corner[d2]); x++) {
228 const int x_1 = x * block_size[d2];
229 exp_blocked.
data = &
idx4(Exp[0], dir, y, x, 0);
231 for (
int y2 = 0; y2 < block_size[d1]; y2++) {
232 double *restrict dst = &
idx2(exp_blocked, y2, 0);
233 const double scal = x1[y_1 + y2] * c_exp_const;
234 double *restrict src = &x2[x_1];
237 for (
int x3 = 0; x3 < block_size[d2]; x3++) {
238 dst[x3] = scal * src[x3];
242 exp_ij(c[dir], 0, xmin[d1] - offset[d1] + y_1,
243 xmin[d1] - offset[d1] + y_1 + block_size[d1], 0,
244 xmin[d2] - offset[d2] + x_1,
245 xmin[d2] - offset[d2] + x_1 + block_size[d2], &exp_blocked);
261 if (plane[0] && plane[1] && plane[2])
265 if (plane[0] && plane[1]) {
266 for (
int z = 0; z < cube->
size[0]; z++) {
267 for (
int y = 0; y < cube->
size[1]; y++) {
268 double *restrict yx = &
idx3(Exp[0], 2, y, 0);
269 double *restrict dst = &
idx3(cube[0], z, y, 0);
271 for (
int x = 0; x < cube->
size[2]; x++) {
280 if (plane[0] && plane[2]) {
281 for (
int z = 0; z < cube->
size[0]; z++) {
282 for (
int y = 0; y < cube->
size[1]; y++) {
283 const double zy =
idx3(Exp[0], 1, z, y);
284 double *restrict dst = &
idx3(cube[0], z, y, 0);
288 for (
int x = 0; x < cube->
size[2]; x++) {
297 if (plane[1] && plane[2]) {
298 for (
int z = 0; z < cube->
size[0]; z++) {
299 double *restrict zx = &
idx3(Exp[0], 0, z, 0);
300 for (
int y = 0; y < cube->
size[1]; y++) {
301 double *restrict dst = &
idx3(cube[0], z, y, 0);
304 for (
int x = 0; x < cube->
size[2]; x++) {
314 for (
int z = 0; z < cube->
size[0]; z++) {
315 for (
int y = 0; y < cube->
size[1]; y++) {
316 const double zy =
idx3(Exp[0], 1, z, y);
317 double *restrict yx = &
idx3(Exp[0], 2, y, 0);
318 double *restrict dst = &
idx3(cube[0], z, y, 0);
322 for (
int x = 0; x < cube->
size[2]; x++) {
323 dst[x] *= zy * yx[x];
332 for (
int z = 0; z < cube->
size[0]; z++) {
333 double *restrict zx = &
idx3(Exp[0], 0, z, 0);
334 for (
int y = 0; y < cube->
size[1]; y++) {
335 double *restrict yx = &
idx3(Exp[0], 2, y, 0);
336 double *restrict dst = &
idx3(cube[0], z, y, 0);
339 for (
int x = 0; x < cube->
size[2]; x++) {
340 dst[x] *= zx[x] * yx[x];
349 for (
int z = 0; z < cube->
size[0]; z++) {
350 double *restrict zx = &
idx3(Exp[0], 0, z, 0);
351 for (
int y = 0; y < cube->
size[1]; y++) {
352 const double zy =
idx3(Exp[0], 1, z, y);
353 double *restrict dst = &
idx3(cube[0], z, y, 0);
357 for (
int x = 0; x < cube->
size[2]; x++) {
358 dst[x] *= zx[x] * zy;
367 for (
int z = 0; z < cube->
size[0]; z++) {
368 double *restrict zx = &
idx3(Exp[0], 0, z, 0);
369 for (
int y = 0; y < cube->
size[1]; y++) {
370 const double zy =
idx3(Exp[0], 1, z, y);
371 const double *restrict yx = &
idx3(Exp[0], 2, y, 0);
372 double *restrict dst = &
idx3(cube[0], z, y, 0);
376 for (
int x = 0; x < cube->
size[2]; x++) {
377 dst[x] *= zx[x] * zy * yx[x];
387 for (
int y1 = 0; y1 < m->
size[1]; y1++) {
388 double *restrict dst = &
idx3(m[0],
gamma, y1, 0);
389 double *restrict src = &
idx2(Exp[0], y1, 0);
393 for (
int x1 = 0; x1 < m->
size[2]; x1++) {
402 for (
int z1 = 0; z1 < m->
size[0]; z1++) {
403 double *restrict src = &
idx2(Exp[0], z1, 0);
404 for (
int y1 = 0; y1 < m->
size[1]; y1++) {
405 double *restrict dst = &
idx3(m[0], z1, y1, 0);
408 for (
int x1 = 0; x1 < m->
size[2]; x1++) {
417 for (
int z1 = 0; z1 < m->
size[0]; z1++) {
418 for (
int y1 = 0; y1 < m->
size[1]; y1++) {
419 const double src =
idx2(Exp[0], z1, y1);
420 double *restrict dst = &
idx3(m[0], z1, y1, 0);
423 for (
int x1 = 0; x1 < m->
size[2]; x1++) {
431 const struct tensor_ *
const Exp_xz,
const struct tensor_ *
const Exp_yz,
433 for (
int z1 = 0; z1 < m->
size[0]; z1++) {
434 const double *restrict src_xz = &
idx2(Exp_xz[0], z1, 0);
435 for (
int y1 = 0; y1 < m->
size[1]; y1++) {
436 const double src =
idx2(Exp_yz[0], z1, y1);
437 double *restrict dst = &
idx3(m[0], z1, y1, 0);
440 for (
int x1 = 0; x1 < m->
size[2]; x1++) {
441 dst[x1] *= src * src_xz[x1];
static int imax(int x, int y)
Returns the larger of two given integers (missing from the C standard)
static int imin(int x, int y)
Returns the smaller of the two integers (missing from the C standard).
#define GRID_PRAGMA_SIMD(OBJS, N)
static void const int const int i
void apply_non_orthorombic_corrections(const bool plane[restrict 3], const tensor *const Exp, tensor *const cube)
void exp_ij(const double alpha, const int offset_i, const int imin, const int imax, const int offset_j, const int jmin, const int jmax, tensor *exp_ij_)
void exp_i(const double alpha, const int imin, const int imax, double *restrict const res)
void calculate_non_orthorombic_corrections_tensor(const double mu_mean, const double *r_ab, const double basis[3][3], const int *const xmin, const int *const xmax, bool plane[3], tensor *const Exp)
void apply_non_orthorombic_corrections_xz_yz_blocked(const struct tensor_ *const Exp_xz, const struct tensor_ *const Exp_yz, struct tensor_ *const m)
void apply_non_orthorombic_corrections_yz_blocked(const struct tensor_ *const Exp, struct tensor_ *const m)
double exp_recursive(const double c_exp, const double c_exp_minus_1, const int index)
void calculate_non_orthorombic_corrections_tensor_blocked(const double mu_mean, const double *r_ab, const double basis[3][3], const int *const lower_corner, const int *const upper_corner, const int *const block_size, const int *const offset, const int *const xmin, const int *const xmax, bool *plane, tensor *const Exp)
void apply_non_orthorombic_corrections_xz_blocked(const struct tensor_ *const Exp, struct tensor_ *const m)
void apply_non_orthorombic_corrections_xy_blocked(const struct tensor_ *const Exp, struct tensor_ *const m)
size_t realloc_tensor(tensor *t)
static void initialize_tensor_4(struct tensor_ *a, int n1, int n2, int n3, int n4)
#define idx4(a, i, j, k, l)
static void initialize_tensor_3(struct tensor_ *a, int n1, int n2, int n3)
static void initialize_tensor_2(struct tensor_ *a, int n1, int n2)
void cblas_dger(const CBLAS_LAYOUT Layout, const int M, const int N, const double alpha, const double *X, const int incX, const double *Y, const int incY, double *A, const int lda)
static void * grid_allocate_scratch(size_t size)
static void grid_free_scratch(void *ptr)
Calculation of the incomplete Gamma function F_n(t) for multi-center integrals over Cartesian Gaussia...