23#include "../common/grid_common.h"
34 const int cmax,
const int *
const lb_cube,
const int *
const cube_center) {
41 for (
int i = 0;
i < 3;
i++) {
42 for (
int ig = 0; ig < handler->
cube.
size[
i]; ig++) {
56 for (
int kg = 0; kg < handler->
cube.
size[0]; kg++) {
57 const int k =
map[0][kg];
59 (2 * (kg + lb_cube[0]) - 1) / 2;
60 const double kr = kd * handler->
dh[2][2];
61 const double kremain = disr_radius * disr_radius - kr * kr;
65 const int jgmin = ceil(-1e-8 - sqrt(kremain) * handler->
dh_inv[1][1]);
66 for (
int jg = jgmin; jg <= (1 - jgmin); jg++) {
67 const int j =
map[1][jg - lb_cube[1]];
68 const double jr = ((2 * jg - 1) >> 1) *
70 const double jremain = kremain - jr * jr;
73 const int xmin = ceil(-1e-8 - sqrt(jremain) * handler->
dh_inv[0][0]);
74 const int xmax = 1 - xmin;
77 for (
int x = xmin - lb_cube[2];
78 x <
imin((xmax - lb_cube[2]), handler->
cube.
size[2]); x++) {
79 const int x1 =
map[2][x];
84 int lower_corner[3] = {k, j, x1};
85 int upper_corner[3] = {k + 1, j + 1, x1 + 1};
89 &x, lower_corner + 2, upper_corner + 2, xwindow);
91 if (upper_corner[2] - lower_corner[2]) {
92 const int position1[3] = {kg, jg - lb_cube[1], x};
97 double *restrict dst = &
idx3(handler->
cube, position1[0],
98 position1[1], position1[2]);
100 double *restrict src = &
idx3(handler->
grid, lower_corner[0],
101 lower_corner[1], lower_corner[2]);
103 const int sizex = upper_corner[2] - lower_corner[2];
105 for (
int x = 0; x < sizex; x++) {
113 x += upper_corner[2] - lower_corner[2] - 1;
123 const int cmax,
const int *
const lb_cube,
const int *
const ub_cube,
124 const double *
const roffset,
const int *
const cube_center) {
126 const double a = handler->
dh[0][0] * handler->
dh[0][0] +
127 handler->
dh[0][1] * handler->
dh[0][1] +
128 handler->
dh[0][2] * handler->
dh[0][2];
129 const double a_inv = 1.0 / a;
136 for (
int i = 0;
i < 3;
i++) {
137 for (
int ig = 0; ig < handler->
cube.
size[
i]; ig++) {
151 for (
int k = 0; k < handler->
cube.
size[0]; k++) {
152 const int iz =
map[0][k];
157 const double tz = (k + lb_cube[0] - roffset[0]);
158 const double z[3] = {tz * handler->
dh[2][0], tz * handler->
dh[2][1],
159 tz * handler->
dh[2][2]};
161 for (
int j = 0; j < handler->
cube.
size[1]; j++) {
162 const int iy =
map[1][j];
167 const double ty = (j - roffset[1] + lb_cube[1]);
168 const double y[3] = {z[0] + ty * handler->
dh[1][0],
169 z[1] + ty * handler->
dh[1][1],
170 z[2] + ty * handler->
dh[1][2]};
177 -2.0 * (handler->
dh[0][0] * (roffset[2] * handler->
dh[0][0] - y[0]) +
178 handler->
dh[0][1] * (roffset[2] * handler->
dh[0][1] - y[1]) +
179 handler->
dh[0][2] * (roffset[2] * handler->
dh[0][2] - y[2]));
181 const double c = (roffset[2] * handler->
dh[0][0] - y[0]) *
182 (roffset[2] * handler->
dh[0][0] - y[0]) +
183 (roffset[2] * handler->
dh[0][1] - y[1]) *
184 (roffset[2] * handler->
dh[0][1] - y[1]) +
185 (roffset[2] * handler->
dh[0][2] - y[2]) *
186 (roffset[2] * handler->
dh[0][2] - y[2]) -
187 disr_radius * disr_radius;
189 double delta = b * b - 4.0 * a * c;
196 const int xmin =
imax(ceil((-b - delta) * 0.5 * a_inv), lb_cube[2]);
197 const int xmax =
imin(floor((-b + delta) * 0.5 * a_inv), ub_cube[2]);
199 int lower_corner[3] = {iz, iy, xmin};
200 int upper_corner[3] = {iz + 1, iy + 1, xmin};
202 for (
int x = xmin - lb_cube[2];
203 x <
imin((xmax - lb_cube[2]), handler->
cube.
size[2]); x++) {
204 const int x1 =
map[2][x];
211 lower_corner + 2, upper_corner + 2, xwindow);
213 if (upper_corner[2] - lower_corner[2]) {
214 const int position1[3] = {k, j, x};
219 double *__restrict__ src = &
idx3(handler->
grid, lower_corner[0],
220 lower_corner[1], lower_corner[2]);
221 double *__restrict__ dst =
222 &
idx3(handler->
cube, position1[0], position1[1], position1[2]);
224 const int sizex = upper_corner[2] - lower_corner[2];
228 for (
int x = 0; x < sizex; x++) {
235 x += upper_corner[2] - lower_corner[2] - 1;
243 const _task *prev_task,
247 if (prev_task != NULL) {
252 const int iatom = prev_task->
iatom;
253 const int jatom = prev_task->
jatom;
254 const int iset = prev_task->
iset;
255 const int jset = prev_task->
jset;
261 const int block_num = prev_task->
block_num;
262 double *
const block = &blocks[ctx->
block_offsets[block_num]];
264 const int ncoseta = ncoset(ibasis->
lmax[iset]);
265 const int ncosetb = ncoset(jbasis->
lmax[jset]);
267 const int ncoa = ibasis->
npgf[iset] * ncoseta;
268 const int ncob = jbasis->
npgf[jset] * ncosetb;
270 const int nsgf_seta = ibasis->
nsgf_set[iset];
271 const int nsgf_setb = jbasis->
nsgf_set[jset];
272 const int nsgfa = ibasis->
nsgf;
273 const int nsgfb = jbasis->
nsgf;
274 const int sgfa = ibasis->
first_sgf[iset] - 1;
275 const int sgfb = jbasis->
first_sgf[jset] - 1;
276 const int maxcoa = ibasis->
maxco;
277 const int maxcob = jbasis->
maxco;
295 m1.a = &jbasis->
sphi[sgfb * maxcob];
305 if (iatom <= jatom) {
316 m2.b = &ibasis->
sphi[sgfa * maxcoa];
318 m2.c = block + sgfb * nsgfa + sgfa;
329 m2.a = &ibasis->
sphi[sgfa * maxcoa];
333 m2.c = block + sgfa * nsgfb + sgfb;
339 m1.use_libxsmm =
true;
340 m2.use_libxsmm =
true;
348 const int iatom = task->
iatom;
349 const int jatom = task->
jatom;
352 const int iset = task->
iset;
353 const int jset = task->
jset;
356 const int ncoseta = ncoset(ibasis->
lmax[iset]);
357 const int ncosetb = ncoset(jbasis->
lmax[jset]);
359 const int ncoa = ibasis->
npgf[iset] * ncoseta;
360 const int ncob = jbasis->
npgf[jset] * ncosetb;
368 const double ftz[2],
const double *
const rab,
370 const double axpm0 =
idx2(vab[0],
idx(b),
idx(a));
371 for (
int i = 0;
i < 3;
i++) {
375 idx2(force[0], 0,
i) += pab * (ftz[0] * aip1 - a.l[
i] * aim1);
376 idx2(force[0], 1,
i) +=
377 pab * (ftz[1] * (aip1 - rab[
i] * axpm0) - b.l[
i] * bim1);
382 const double ftz[2],
const double *
const rab,
384 for (
int i = 0;
i < 3;
i++) {
385 for (
int j = 0; j < 3; j++) {
386 idx3(virial[0], 0,
i, j) +=
392 for (
int i = 0;
i < 3;
i++) {
393 for (
int j = 0; j < 3; j++) {
394 idx3(virial[0], 1,
i, j) +=
405void update_all(
const bool compute_forces,
const bool compute_virial,
407 const double *
const ftz,
const double *rab,
const tensor *vab,
408 const double pab,
double *hab,
tensor *forces,
413 if (compute_forces) {
417 if (compute_virial) {
422static void update_tau(
const bool compute_forces,
const bool compute_virial,
424 const double *
const rab,
const tensor *
const vab,
425 const double pab,
double *
const hab,
tensor *forces,
428 for (
int i = 0;
i < 3;
i++) {
430 0.5 * a.l[
i] * b.l[
i], ftz, rab, vab, pab, hab, forces, virials);
432 -0.5 * ftz[0] * b.l[
i], ftz, rab, vab, pab, hab, forces,
435 -0.5 * a.l[
i] * ftz[1], ftz, rab, vab, pab, hab, forces,
438 0.5 * ftz[0] * ftz[1], ftz, rab, vab, pab, hab, forces, virials);
444 const tensor *
const pab,
const bool compute_tau,
445 const bool compute_forces,
446 const bool compute_virial,
tensor *
const forces,
448 double zeta[2] = {task->
zeta[0] * 2.0, task->
zeta[1] * 2.0};
449 for (
int lb = task->
lmin[1]; lb <= task->
lmax[1]; lb++) {
450 for (
int la = task->
lmin[0]; la <= task->
lmax[0]; la++) {
451 for (
int bx = 0; bx <= lb; bx++) {
452 for (
int by = 0; by <= lb - bx; by++) {
453 const int bz = lb - bx - by;
454 const orbital b = {{bx, by, bz}};
455 const int idx_b = task->
offset[1] +
idx(b);
456 for (
int ax = 0; ax <= la; ax++) {
457 for (
int ay = 0; ay <= la - ax; ay++) {
458 const int az = la - ax - ay;
459 const orbital a = {{ax, ay, az}};
460 const int idx_a = task->
offset[0] +
idx(a);
461 double *habval = &
idx2(hab[0], idx_b, idx_a);
462 const double prefactor =
idx2(pab[0], idx_b, idx_a);
466 update_tau(compute_forces, compute_virial, a, b, zeta,
467 task->
rab, vab, prefactor, habval, forces, virial);
469 update_all(compute_forces, compute_virial, a, b, 1.0, zeta,
470 task->
rab, vab, prefactor, habval, forces, virial);
485 const int *lower_boundaries_cube,
const int *cube_center) {
492 for (
int i = 0;
i < 3;
i++) {
493 for (
int ig = 0; ig < handler->
cube.
size[
i]; ig++) {
494 map[
i][ig] =
modulo(cube_center[
i] + ig + lower_boundaries_cube[
i] -
510 for (
int z = 0; (z < handler->
cube.
size[0]); z++) {
511 const int z1 =
map[0][z];
517 handler->
cube.
size[0], z1, &z, lower_corner, upper_corner,
521 if (upper_corner[0] - lower_corner[0]) {
522 for (
int y = 0; y < handler->
cube.
size[1]; y++) {
523 const int y1 =
map[1][y];
531 lower_corner + 1, upper_corner + 1, ywindow);
533 if (upper_corner[1] - lower_corner[1]) {
534 for (
int x = 0; x < handler->
cube.
size[2]; x++) {
535 const int x1 =
map[2][x];
542 &x, lower_corner + 2, upper_corner + 2, xwindow);
544 if (upper_corner[2] - lower_corner[2]) {
545 const int position1[3] = {z, y, x};
561 x += upper_corner[2] - lower_corner[2] - 1;
567 y += upper_corner[1] - lower_corner[1] - 1;
573 z += upper_corner[0] - lower_corner[0] - 1;
579 const bool use_ortho,
const double zetp,
const double rp[3],
580 const double radius) {
584 const int lp = handler->
coef.
size[0] - 1;
588 int lb_cube[3], ub_cube[3];
599 use_ortho, radius, (
const double(*)[3])handler->
dh,
600 (
const double(*)[3])handler->
dh_inv, rp, &disr_radius, roffset,
601 cubecenter, lb_cube, ub_cube, cube_size);
635 int perm[3] = {2, 0, 1};
648 ub_cube[2], lp,
cmax, zetp,
649 &
idx3(handler->
pol, perm[2], 0, 0));
651 ub_cube[1], lp,
cmax, zetp,
652 &
idx3(handler->
pol, perm[1], 0, 0));
654 ub_cube[0], lp,
cmax, zetp,
655 &
idx3(handler->
pol, perm[0], 0, 0));
659 dx[2] = handler->
dh[0][0] * handler->
dh[0][0] +
660 handler->
dh[0][1] * handler->
dh[0][1] +
661 handler->
dh[0][2] * handler->
dh[0][2];
663 dx[1] = handler->
dh[1][0] * handler->
dh[1][0] +
664 handler->
dh[1][1] * handler->
dh[1][1] +
665 handler->
dh[1][2] * handler->
dh[1][2];
667 dx[0] = handler->
dh[2][0] * handler->
dh[2][0] +
668 handler->
dh[2][1] * handler->
dh[2][1] +
669 handler->
dh[2][2] * handler->
dh[2][2];
672 lp,
cmax, zetp * dx[2],
673 &
idx3(handler->
pol, perm[2], 0, 0));
675 lp,
cmax, zetp * dx[1],
676 &
idx3(handler->
pol, perm[1], 0, 0));
678 lp,
cmax, zetp * dx[0],
679 &
idx3(handler->
pol, perm[0], 0, 0));
683 zetp, roffset, (
const double(*)[3])handler->
dh, lb_cube, ub_cube,
689 if (!use_ortho && !use_ortho_forced) {
691 handler, disr_radius,
cmax, lb_cube, ub_cube, roffset, cubecenter);
694 lb_cube, cubecenter);
700 if (!use_ortho && !use_ortho_forced)
741 1, &
idx3(handler->
pol, 0, 0, 0), 1);
753 grid_context *
const ctx,
const int level,
const bool calculate_tau,
754 const bool calculate_forces,
const bool calculate_virial,
755 const int *
const shift_local,
const int *
const border_width,
762#pragma omp parallel default(shared)
764 const int num_threads = omp_get_num_threads();
765 const int thread_id = omp_get_thread_num();
767 double *hab_block_local = NULL;
769 if (num_threads == 1) {
772 hab_block_local = ((
double *)ctx->
scratch) +
773 thread_id * (hab_blocks->
size /
sizeof(double));
774 memset(hab_block_local, 0, hab_blocks->
size);
777 tensor work, pab, hab, vab, forces_local_, virial_local_,
778 forces_local_pair_, virial_local_pair_;
779 memset(&work, 0,
sizeof(
tensor));
780 memset(&pab, 0,
sizeof(
tensor));
781 memset(&hab, 0,
sizeof(
tensor));
782 memset(&vab, 0,
sizeof(
tensor));
783 memset(&forces_local_, 0,
sizeof(
tensor));
784 memset(&virial_local_, 0,
sizeof(
tensor));
785 memset(&forces_local_pair_, 0,
sizeof(
tensor));
786 memset(&virial_local_pair_, 0,
sizeof(
tensor));
795 if (calculate_tau || calculate_forces || calculate_virial) {
802 if (calculate_virial) {
827 if (calculate_forces) {
832 memset(forces_local_.
data, 0,
sizeof(
double) * forces_local_.
alloc_size_);
833 memset(virial_local_.
data, 0,
sizeof(
double) * virial_local_.
alloc_size_);
841 (
const double(*)[3])
grid->dh_inv);
845 for (
int d = 0; d < 3; d++)
851 const _task *prev_task = NULL;
852#pragma omp for schedule(static)
855 const _task *task = &ctx->
tasks[level][itask];
857 if (task->
level != level) {
858 printf(
"level %d, %d\n", task->
level, level);
864 if (calculate_forces) {
898 lmin[0] =
imax(lmin[0], 0);
899 lmin[1] =
imax(lmin[1], 0);
931 if (calculate_forces) {
932 memset(forces_local_pair_.
data, 0,
934 memset(virial_local_pair_.
data, 0,
939 task, &vab, &pab, calculate_tau, calculate_forces, calculate_virial,
950 if (calculate_forces) {
951 const double scaling = (task->
iatom == task->
jatom) ? 1.0 : 2.0;
953 scaling *
idx2(forces_local_pair_, 0, 0);
955 scaling *
idx2(forces_local_pair_, 0, 1);
957 scaling *
idx2(forces_local_pair_, 0, 2);
960 scaling *
idx2(forces_local_pair_, 1, 0);
962 scaling *
idx2(forces_local_pair_, 1, 1);
964 scaling *
idx2(forces_local_pair_, 1, 2);
965 if (calculate_virial) {
966 for (
int i = 0;
i < 3;
i++) {
967 for (
int j = 0; j < 3; j++) {
968 idx2(virial_local_,
i, j) +=
969 scaling * (
idx3(virial_local_pair_, 0,
i, j) +
970 idx3(virial_local_pair_, 1,
i, j));
981 if (num_threads > 1) {
984 const int hab_size = hab_blocks->
size /
sizeof(double);
985 if ((hab_size / num_threads) >= 2) {
986 const int block_size =
987 hab_size / num_threads + (hab_size % num_threads);
989 for (
int bk = 0; bk < num_threads; bk++) {
990 int bk_id = (bk + thread_id) % num_threads;
991 size_t begin = bk_id * block_size;
992 size_t end =
imin((bk_id + 1) * block_size, hab_size);
993 cblas_daxpy(end - begin, 1.0, hab_block_local + begin, 1,
998 const int hab_size = hab_blocks->
size /
sizeof(double);
1005 if (calculate_forces) {
1006 if (num_threads > 1) {
1008 const int block_size = forces_->
alloc_size_ / num_threads +
1011 for (
int bk = 0; bk < num_threads; bk++) {
1012 int bk_id = (bk + thread_id) % num_threads;
1013 int begin = bk_id * block_size;
1016 forces_->
data + begin, 1);
1031 if (calculate_virial) {
1033 for (
int i = 0;
i < 3;
i++) {
1034 for (
int j = 0; j < 3; j++) {
1035 idx2(virial_[0],
i, j) +=
idx2(virial_local_,
i, j);
1046 if (calculate_forces) {
1047 free(forces_local_.
data);
1048 free(virial_local_.
data);
1049 free(virial_local_pair_.
data);
1050 free(forces_local_pair_.
data);
1060 void *ptr,
const bool compute_tau,
const int natoms,
const int nlevels,
1062 offload_buffer *hab_blocks,
double forces[natoms][3],
double virial[3][3]) {
1067 assert(ctx->
nlevels == nlevels);
1068 assert(ctx->
natoms == natoms);
1080 for (
int level = 0; level < ctx->
nlevels; level++) {
1085 layout->
dh_inv, grids[level]);
1086 ctx->
grid[level].
data = grids[level]->host_buffer;
1089 bool calculate_virial = (virial != NULL);
1090 bool calculate_forces = (forces != NULL);
1093 if (calculate_forces) {
1102 for (
int level = 0; level < ctx->
nlevels; level++) {
1107 &forces_, &virial_);
1110 if (calculate_forces) {
1111 if (calculate_virial) {
1112 virial[0][0] =
idx2(virial_, 0, 0);
1113 virial[0][1] =
idx2(virial_, 0, 1);
1114 virial[0][2] =
idx2(virial_, 0, 2);
1115 virial[1][0] =
idx2(virial_, 1, 0);
1116 virial[1][1] =
idx2(virial_, 1, 1);
1117 virial[1][2] =
idx2(virial_, 1, 2);
1118 virial[2][0] =
idx2(virial_, 2, 0);
1119 virial[2][1] =
idx2(virial_, 2, 1);
1120 virial[2][2] =
idx2(virial_, 2, 2);
1123 memcpy(forces[0], forces_.
data,
sizeof(
double) * forces_.
alloc_size_);
static int imax(int x, int y)
Returns the larger of two given integers (missing from the C standard)
static int imin(int x, int y)
Returns the smaller of the two integers (missing from the C standard).
#define GRID_PRAGMA_SIMD(OBJS, N)
static GRID_HOST_DEVICE int modulo(int a, int m)
Equivalent of Fortran's MODULO, which always return a positive number. https://gcc....
static GRID_HOST_DEVICE orbital up(const int i, const orbital a)
Increase i'th component of given orbital angular momentum.
static GRID_HOST_DEVICE int idx(const orbital a)
Return coset index of given orbital angular momentum.
static GRID_HOST_DEVICE orbital down(const int i, const orbital a)
Decrease i'th component of given orbital angular momentum.
static void const int const int const int const int const int const double const int const int const int int GRID_CONST_WHEN_COLLOCATE double GRID_CONST_WHEN_INTEGRATE double * grid
static void const int const int i
static void const int cmax
static void const int const int const int const int const int const double const int map[3][2 *cmax+1]
void grid_compute_vab(const int *const lmin, const int *const lmax, const int lp, const double prefactor, const tensor *const alpha, const tensor *const coef_xyz, tensor *vab)
void grid_transform_coef_jik_to_yxz(const double dh[3][3], const tensor *coef_xyz)
void grid_prepare_alpha_dgemm(const double ra[3], const double rb[3], const double rp[3], const int *lmax, tensor *alpha)
void grid_fill_pol_dgemm(const bool transpose, const double dr, const double roffset, const int pol_offset, const int xmin, const int xmax, const int lp, const int cmax, const double zetp, double *pol_)
void extract_blocks(grid_context *const ctx, const _task *const task, const offload_buffer *pab_blocks, tensor *const work, tensor *const pab)
void tensor_reduction_for_collocate_integrate(double *scratch, const double alpha, const bool *const orthogonal, const struct tensor_ *Exp, const struct tensor_ *co, const struct tensor_ *p_alpha_beta_reduced_, struct tensor_ *cube)
void initialize_basis_vectors(collocation_integration *const handler, const double dh[3][3], const double dh_inv[3][3])
void initialize_W_and_T(collocation_integration *const handler, const tensor *cube, const tensor *coef)
void set_grid_parameters(tensor *grid, const bool orthorhombic, const int grid_full_size[3], const int grid_local_size[3], const int shift_local[3], const int border_width[3], const double dh[3][3], const double dh_inv[3][3], offload_buffer *grid_)
static void rotate_and_store_coefficients(grid_context *const ctx, const _task *prev_task, const _task *task, tensor *const hab, tensor *work, double *blocks)
static void update_tau(const bool compute_forces, const bool compute_virial, const orbital a, const orbital b, const double ftz[2], const double *const rab, const tensor *const vab, const double pab, double *const hab, tensor *forces, tensor *virials)
void integrate_one_grid_level_dgemm(grid_context *const ctx, const int level, const bool calculate_tau, const bool calculate_forces, const bool calculate_virial, const int *const shift_local, const int *const border_width, const offload_buffer *const pab_blocks, offload_buffer *const hab_blocks, tensor *forces_, tensor *virial_)
void extract_cube_within_spherical_cutoff_generic(struct collocation_integration_ *const handler, const double disr_radius, const int cmax, const int *const lb_cube, const int *const ub_cube, const double *const roffset, const int *const cube_center)
void update_force_pair(orbital a, orbital b, const double pab, const double ftz[2], const double *const rab, const tensor *const vab, tensor *force)
void update_virial_pair(orbital a, orbital b, const double pab, const double ftz[2], const double *const rab, const tensor *const vab, tensor *virial)
void grid_dgemm_integrate_task_list(void *ptr, const bool compute_tau, const int natoms, const int nlevels, const offload_buffer *const pab_blocks, offload_buffer *grids[nlevels], offload_buffer *hab_blocks, double forces[natoms][3], double virial[3][3])
Integrate all tasks of in given list from given grids using matrix - matrix multiplication.
void extract_cube_within_spherical_cutoff_ortho(struct collocation_integration_ *const handler, const double disr_radius, const int cmax, const int *const lb_cube, const int *const cube_center)
static void update_hab_forces_and_stress(const _task *task, const tensor *const vab, const tensor *const pab, const bool compute_tau, const bool compute_forces, const bool compute_virial, tensor *const forces, tensor *const virial, tensor *const hab)
void grid_integrate(collocation_integration *const handler, const bool use_ortho, const double zetp, const double rp[3], const double radius)
void extract_cube(struct collocation_integration_ *handler, const int cmax, const int *lower_boundaries_cube, const int *cube_center)
void update_all(const bool compute_forces, const bool compute_virial, const orbital a, const orbital b, const double f, const double *const ftz, const double *rab, const tensor *vab, const double pab, double *hab, tensor *forces, tensor *virials)
void apply_non_orthorombic_corrections(const bool plane[restrict 3], const tensor *const Exp, tensor *const cube)
void calculate_non_orthorombic_corrections_tensor(const double mu_mean, const double *r_ab, const double basis[3][3], const int *const xmin, const int *const xmax, bool plane[3], tensor *const Exp)
void tensor_copy(tensor *const b, const tensor *const a)
size_t realloc_tensor(tensor *t)
void alloc_tensor(tensor *t)
static void setup_grid_window(tensor *const grid, const int *const shift_local, const int *const border_width, const int border_mask)
static void initialize_tensor_4(struct tensor_ *a, int n1, int n2, int n3, int n4)
static void initialize_tensor_3(struct tensor_ *a, int n1, int n2, int n3)
static void initialize_tensor_2(struct tensor_ *a, int n1, int n2)
int compute_cube_properties(const bool ortho, const double radius, const double dh[3][3], const double dh_inv[3][3], const double *rp, double *disr_radius, double *roffset, int *cubecenter, int *lb_cube, int *ub_cube, int *cube_size)
double cblas_ddot(const int N, const double *X, const int incX, const double *Y, const int incY)
void extract_sub_grid(const int *lower_corner, const int *upper_corner, const int *position, const tensor *const grid, tensor *const subgrid)
void compute_interval(const int *const map, const int full_size, const int size, const int cube_size, const int x1, int *x, int *const lower_corner, int *const upper_corner, const Interval window)
void cblas_daxpy(const int N, const double alpha, const double *X, const int incX, double *Y, const int incY)
void cblas_dgemv(const CBLAS_LAYOUT order, const CBLAS_TRANSPOSE TransA, const int M, const int N, const double alpha, const double *A, const int lda, const double *X, const int incX, const double beta, double *Y, const int incY)
void dgemm_simplified(dgemm_params *const m)
Internal representation of a basis set.
grid_basis_set ** basis_sets
struct collocation_integration_ ** handler
Internal representation of a buffer.
Orbital angular momentum.