(git:374b731)
Loading...
Searching...
No Matches
grid_dgemm_utils.h
Go to the documentation of this file.
1/*----------------------------------------------------------------------------*/
2/* CP2K: A general program to perform molecular dynamics simulations */
3/* Copyright 2000-2024 CP2K developers group <https://cp2k.org> */
4/* */
5/* SPDX-License-Identifier: BSD-3-Clause */
6/*----------------------------------------------------------------------------*/
7
8#ifndef GRID_DGEMM_UTILS_H
9#define GRID_DGEMM_UTILS_H
10
11#include <stdbool.h>
12#include <stdio.h>
13#include <string.h>
14
15#if defined(__MKL)
16#include <mkl.h>
17#include <mkl_cblas.h>
18#endif
19
20#if defined(__LIBXSMM)
21#include <libxsmm.h>
22#endif
23
24#include "../common/grid_common.h"
27
28/* inverse of the factorials */
29static const double inv_fac[] = {1.0,
30 1.0,
31 0.5,
32 0.166666666666666666666666666667,
33 0.0416666666666666666666666666667,
34 0.00833333333333333333333333333333,
35 0.00138888888888888888888888888889,
36 0.000198412698412698412698412698413,
37 0.0000248015873015873015873015873016,
38 2.7557319223985890652557319224e-6,
39 2.7557319223985890652557319224e-7,
40 2.50521083854417187750521083854e-8,
41 2.08767569878680989792100903212e-9,
42 1.60590438368216145993923771702e-10,
43 1.14707455977297247138516979787e-11,
44 7.64716373181981647590113198579e-13,
45 4.77947733238738529743820749112e-14,
46 2.81145725434552076319894558301e-15,
47 1.56192069685862264622163643501e-16,
48 8.22063524662432971695598123687e-18,
49 4.11031762331216485847799061844e-19,
50 1.95729410633912612308475743735e-20,
51 8.8967913924505732867488974425e-22,
52 3.86817017063068403771691193152e-23,
53 1.6117375710961183490487133048e-24,
54 6.4469502843844733961948532192e-26,
55 2.47959626322479746007494354585e-27,
56 9.18368986379554614842571683647e-29,
57 3.27988923706983791015204172731e-30,
58 1.13099628864477169315587645769e-31,
59 3.76998762881590564385292152565e-33};
60
61inline int coset_without_offset(int lx, int ly, int lz) {
62 const int l = lx + ly + lz;
63 if (l == 0) {
64 return 0;
65 } else {
66 return ((l - lx) * (l - lx + 1)) / 2 + lz;
67 }
68}
69
70typedef struct dgemm_params_ {
71 char storage;
72 char op1;
73 char op2;
74 double alpha;
75 double beta;
76 double *a, *b, *c;
77 int m, n, k, lda, ldb, ldc;
78 int x, y, z;
79 int x1, y1, z1;
81#if defined(__LIBXSMM)
82 libxsmm_dmmfunction kernel;
83 int prefetch;
84 int flags;
85#endif
87
88extern void dgemm_simplified(dgemm_params *const m);
89extern void batched_dgemm_simplified(dgemm_params *const m,
90 const int batch_size);
91
92/*******************************************************************************
93 * \brief Prototype for BLAS dgemm.
94 * \author Ole Schuett
95 ******************************************************************************/
96void dgemm_(const char *transa, const char *transb, const int *m, const int *n,
97 const int *k, const double *alpha, const double *a, const int *lda,
98 const double *b, const int *ldb, const double *beta, double *c,
99 const int *ldc);
100
101extern void extract_sub_grid(const int *lower_corner, const int *upper_corner,
102 const int *position, const tensor *const grid,
103 tensor *const subgrid);
104extern void add_sub_grid(const int *lower_corner, const int *upper_corner,
105 const int *position, const tensor *subgrid,
106 tensor *grid);
107extern void return_cube_position(const int *lb_grid, const int *cube_center,
108 const int *lower_boundaries_cube,
109 const int *period, int *const position);
110
111extern void verify_orthogonality(const double dh[3][3], bool orthogonal[3]);
112
113extern int compute_cube_properties(const bool ortho, const double radius,
114 const double dh[3][3],
115 const double dh_inv[3][3], const double *rp,
116 double *disr_radius, double *roffset,
117 int *cubecenter, int *lb_cube, int *ub_cube,
118 int *cube_size);
119
120inline int return_offset_l(const int l) {
121 static const int offset_[] = {1, 4, 7, 11, 16, 22, 29,
122 37, 46, 56, 67, 79, 92, 106,
123 121, 137, 154, 172, 191, 211, 232};
124 return offset_[l];
125}
126
127inline int return_linear_index_from_exponents(const int alpha, const int beta,
128 const int gamma) {
129 const int l = alpha + beta + gamma;
130 return return_offset_l(l) + (l - alpha) * (l - alpha + 1) / 2 + gamma;
131}
132
133static inline void *grid_allocate_scratch(size_t size) {
134#ifdef __LIBXSMM
135 return libxsmm_aligned_scratch(size, 0 /*auto-alignment*/);
136#else
137 return malloc(size);
138#endif
139}
140
141static inline void grid_free_scratch(void *ptr) {
142#ifdef __LIBXSMM
143 libxsmm_free(ptr);
144#else
145 free(ptr);
146#endif
147}
148
149/* even openblas and lapack has cblas versions of lapack and blas. */
150#ifndef __MKL
157enum CBLAS_UPLO { CblasUpper = 121, CblasLower = 122 };
158enum CBLAS_DIAG { CblasNonUnit = 131, CblasUnit = 132 };
159enum CBLAS_SIDE { CblasLeft = 141, CblasRight = 142 };
160
165
166double cblas_ddot(const int N, const double *X, const int incX, const double *Y,
167 const int incY);
168
169void cblas_dger(const CBLAS_LAYOUT Layout, const int M, const int N,
170 const double alpha, const double *X, const int incX,
171 const double *Y, const int incY, double *A, const int lda);
172
173void cblas_daxpy(const int N, const double alpha, const double *X,
174 const int incX, double *Y, const int incY);
175
176void cblas_dgemv(const CBLAS_LAYOUT Layout, const CBLAS_TRANSPOSE TransA,
177 const int M, const int N, const double alpha, const double *A,
178 const int lda, const double *X, const int incX,
179 const double beta, double *Y, const int incY);
180
181#endif
182
183extern void compute_interval(const int *const map, const int full_size,
184 const int size, const int cube_size, const int x1,
185 int *x, int *const lower_corner,
186 int *const upper_corner, Interval window);
187#endif
static void const int const int const int const int const int const double const int const int const int int GRID_CONST_WHEN_COLLOCATE double GRID_CONST_WHEN_INTEGRATE double * grid
static void const int const int const int const int const int const double const int map[3][2 *cmax+1]
int compute_cube_properties(const bool ortho, const double radius, const double dh[3][3], const double dh_inv[3][3], const double *rp, double *disr_radius, double *roffset, int *cubecenter, int *lb_cube, int *ub_cube, int *cube_size)
double cblas_ddot(const int N, const double *X, const int incX, const double *Y, const int incY)
@ CblasLower
@ CblasUpper
int coset_without_offset(int lx, int ly, int lz)
static void * grid_allocate_scratch(size_t size)
void extract_sub_grid(const int *lower_corner, const int *upper_corner, const int *position, const tensor *const grid, tensor *const subgrid)
static const double inv_fac[]
CBLAS_TRANSPOSE
@ CblasNoTrans
@ CblasTrans
@ CblasConjTrans
@ CblasRight
@ CblasLeft
CBLAS_LAYOUT
@ CblasColMajor
@ CblasRowMajor
void compute_interval(const int *const map, const int full_size, const int size, const int cube_size, const int x1, int *x, int *const lower_corner, int *const upper_corner, Interval window)
struct dgemm_params_ dgemm_params
void cblas_dgemv(const CBLAS_LAYOUT Layout, const CBLAS_TRANSPOSE TransA, const int M, const int N, const double alpha, const double *A, const int lda, const double *X, const int incX, const double beta, double *Y, const int incY)
void cblas_dger(const CBLAS_LAYOUT Layout, const int M, const int N, const double alpha, const double *X, const int incX, const double *Y, const int incY, double *A, const int lda)
@ CblasUnit
@ CblasNonUnit
static void grid_free_scratch(void *ptr)
void cblas_daxpy(const int N, const double alpha, const double *X, const int incX, double *Y, const int incY)
void dgemm_(const char *transa, const char *transb, const int *m, const int *n, const int *k, const double *alpha, const double *a, const int *lda, const double *b, const int *ldb, const double *beta, double *c, const int *ldc)
Prototype for BLAS dgemm.
void verify_orthogonality(const double dh[3][3], bool orthogonal[3])
int return_offset_l(const int l)
void return_cube_position(const int *lb_grid, const int *cube_center, const int *lower_boundaries_cube, const int *period, int *const position)
void dgemm_simplified(dgemm_params *const m)
void add_sub_grid(const int *lower_corner, const int *upper_corner, const int *position, const tensor *subgrid, tensor *grid)
void batched_dgemm_simplified(dgemm_params *const m, const int batch_size)
int return_linear_index_from_exponents(const int alpha, const int beta, const int gamma)
Calculation of the incomplete Gamma function F_n(t) for multi-center integrals over Cartesian Gaussia...
Definition gamma.F:15