8#ifndef GRID_DGEMM_UTILS_H
9#define GRID_DGEMM_UTILS_H
24#include "../common/grid_common.h"
32 0.166666666666666666666666666667,
33 0.0416666666666666666666666666667,
34 0.00833333333333333333333333333333,
35 0.00138888888888888888888888888889,
36 0.000198412698412698412698412698413,
37 0.0000248015873015873015873015873016,
38 2.7557319223985890652557319224e-6,
39 2.7557319223985890652557319224e-7,
40 2.50521083854417187750521083854e-8,
41 2.08767569878680989792100903212e-9,
42 1.60590438368216145993923771702e-10,
43 1.14707455977297247138516979787e-11,
44 7.64716373181981647590113198579e-13,
45 4.77947733238738529743820749112e-14,
46 2.81145725434552076319894558301e-15,
47 1.56192069685862264622163643501e-16,
48 8.22063524662432971695598123687e-18,
49 4.11031762331216485847799061844e-19,
50 1.95729410633912612308475743735e-20,
51 8.8967913924505732867488974425e-22,
52 3.86817017063068403771691193152e-23,
53 1.6117375710961183490487133048e-24,
54 6.4469502843844733961948532192e-26,
55 2.47959626322479746007494354585e-27,
56 9.18368986379554614842571683647e-29,
57 3.27988923706983791015204172731e-30,
58 1.13099628864477169315587645769e-31,
59 3.76998762881590564385292152565e-33};
62 const int l = lx + ly + lz;
66 return ((l - lx) * (l - lx + 1)) / 2 + lz;
82 libxsmm_dmmfunction kernel;
90 const int batch_size);
96void dgemm_(
const char *transa,
const char *transb,
const int *m,
const int *n,
97 const int *k,
const double *alpha,
const double *a,
const int *lda,
98 const double *b,
const int *ldb,
const double *beta,
double *c,
101extern void extract_sub_grid(
const int *lower_corner,
const int *upper_corner,
102 const int *position,
const tensor *
const grid,
104extern void add_sub_grid(
const int *lower_corner,
const int *upper_corner,
105 const int *position,
const tensor *subgrid,
108 const int *lower_boundaries_cube,
109 const int *period,
int *
const position);
114 const double dh[3][3],
115 const double dh_inv[3][3],
const double *rp,
116 double *disr_radius,
double *roffset,
117 int *cubecenter,
int *lb_cube,
int *ub_cube,
121 static const int offset_[] = {1, 4, 7, 11, 16, 22, 29,
122 37, 46, 56, 67, 79, 92, 106,
123 121, 137, 154, 172, 191, 211, 232};
129 const int l = alpha + beta +
gamma;
135 return libxsmm_aligned_scratch(size, 0 );
166double cblas_ddot(
const int N,
const double *X,
const int incX,
const double *Y,
170 const double alpha,
const double *X,
const int incX,
171 const double *Y,
const int incY,
double *A,
const int lda);
173void cblas_daxpy(
const int N,
const double alpha,
const double *X,
174 const int incX,
double *Y,
const int incY);
177 const int M,
const int N,
const double alpha,
const double *A,
178 const int lda,
const double *X,
const int incX,
179 const double beta,
double *Y,
const int incY);
184 const int size,
const int cube_size,
const int x1,
185 int *x,
int *
const lower_corner,
186 int *
const upper_corner,
Interval window);
static void const int const int const int const int const int const double const int const int const int int GRID_CONST_WHEN_COLLOCATE double GRID_CONST_WHEN_INTEGRATE double * grid
static void const int const int const int const int const int const double const int map[3][2 *cmax+1]
int compute_cube_properties(const bool ortho, const double radius, const double dh[3][3], const double dh_inv[3][3], const double *rp, double *disr_radius, double *roffset, int *cubecenter, int *lb_cube, int *ub_cube, int *cube_size)
double cblas_ddot(const int N, const double *X, const int incX, const double *Y, const int incY)
int coset_without_offset(int lx, int ly, int lz)
static void * grid_allocate_scratch(size_t size)
void extract_sub_grid(const int *lower_corner, const int *upper_corner, const int *position, const tensor *const grid, tensor *const subgrid)
static const double inv_fac[]
void compute_interval(const int *const map, const int full_size, const int size, const int cube_size, const int x1, int *x, int *const lower_corner, int *const upper_corner, Interval window)
struct dgemm_params_ dgemm_params
void cblas_dgemv(const CBLAS_LAYOUT Layout, const CBLAS_TRANSPOSE TransA, const int M, const int N, const double alpha, const double *A, const int lda, const double *X, const int incX, const double beta, double *Y, const int incY)
void cblas_dger(const CBLAS_LAYOUT Layout, const int M, const int N, const double alpha, const double *X, const int incX, const double *Y, const int incY, double *A, const int lda)
static void grid_free_scratch(void *ptr)
void cblas_daxpy(const int N, const double alpha, const double *X, const int incX, double *Y, const int incY)
void dgemm_(const char *transa, const char *transb, const int *m, const int *n, const int *k, const double *alpha, const double *a, const int *lda, const double *b, const int *ldb, const double *beta, double *c, const int *ldc)
Prototype for BLAS dgemm.
void verify_orthogonality(const double dh[3][3], bool orthogonal[3])
int return_offset_l(const int l)
void return_cube_position(const int *lb_grid, const int *cube_center, const int *lower_boundaries_cube, const int *period, int *const position)
void dgemm_simplified(dgemm_params *const m)
void add_sub_grid(const int *lower_corner, const int *upper_corner, const int *position, const tensor *subgrid, tensor *grid)
void batched_dgemm_simplified(dgemm_params *const m, const int batch_size)
int return_linear_index_from_exponents(const int alpha, const int beta, const int gamma)
Calculation of the incomplete Gamma function F_n(t) for multi-center integrals over Cartesian Gaussia...