(git:374b731)
Loading...
Searching...
No Matches
grid_dgemm_coefficients.c
Go to the documentation of this file.
1/*----------------------------------------------------------------------------*/
2/* CP2K: A general program to perform molecular dynamics simulations */
3/* Copyright 2000-2024 CP2K developers group <https://cp2k.org> */
4/* */
5/* SPDX-License-Identifier: BSD-3-Clause */
6/*----------------------------------------------------------------------------*/
7
8#include <assert.h>
9#include <stdio.h>
10#include <stdlib.h>
11#include <string.h>
12#if defined(__LIBXSMM)
13#include <libxsmm.h>
14#endif
15#include "../common/grid_common.h"
19
20void transform_xyz_to_triangular(const tensor *const coef,
21 double *const coef_xyz) {
22 assert(coef != NULL);
23 assert(coef_xyz != NULL);
24
25 int lxyz = 0;
26 const int lp = (coef->size[0] - 1);
27 for (int lzp = 0; lzp <= lp; lzp++) {
28 for (int lyp = 0; lyp <= lp - lzp; lyp++) {
29 for (int lxp = 0; lxp <= lp - lzp - lyp; lxp++, lxyz++) {
30 coef_xyz[lxyz] = idx3(coef[0], lzp, lyp, lxp);
31 }
32 }
33 }
34}
35
36void transform_yxz_to_triangular(const tensor *const coef,
37 double *const coef_xyz) {
38 assert(coef != NULL);
39 assert(coef_xyz != NULL);
40 int lxyz = 0;
41 const int lp = (coef->size[0] - 1);
42 for (int lzp = 0; lzp <= lp; lzp++) {
43 for (int lyp = 0; lyp <= lp - lzp; lyp++) {
44 for (int lxp = 0; lxp <= lp - lzp - lyp; lxp++, lxyz++) {
45 coef_xyz[lxyz] = idx3(coef[0], lyp, lxp, lzp);
46 }
47 }
48 }
49}
50
51void transform_triangular_to_xyz(const double *const coef_xyz,
52 tensor *const coef) {
53 assert(coef != NULL);
54 assert(coef_xyz != NULL);
55 int lxyz = 0;
56 const int lp = coef->size[0] - 1;
57 for (int lzp = 0; lzp <= lp; lzp++) {
58 for (int lyp = 0; lyp <= lp - lzp; lyp++) {
59 for (int lxp = 0; lxp <= lp - lzp - lyp; lxp++, lxyz++) {
60 idx3(coef[0], lzp, lyp, lxp) = coef_xyz[lxyz];
61 }
62 /* initialize the remaining coefficients to zero */
63 for (int lxp = lp - lzp - lyp + 1; lxp <= lp; lxp++)
64 idx3(coef[0], lzp, lyp, lxp) = 0.0;
65 }
66 }
67}
68
69/* Rotate from the (x - x_1) ^ alpha_1 (x - x_2) ^ \alpha_2 to (x - x_{12}) ^ k
70 * in all three directions */
71
73 const int *lmin, const int *lmax, const int lp, const double prefactor,
74 const tensor *const alpha, // [3][lb_max+1][la_max+1][lp+1]
75 const tensor *const pab,
76 tensor *coef_xyz) //[lp+1][lp+1][lp+1]
77{
78 /* can be done with dgemms as well, since it is a change of basis from (x -
79 * x1) (x - x2) to (x - x12)^alpha */
80
81 assert(alpha != NULL);
82 assert(coef_xyz != NULL);
83 assert(coef_xyz->data != NULL);
84 memset(coef_xyz->data, 0, coef_xyz->alloc_size_ * sizeof(double));
85 // we need a proper fix for that. We can use the tensor structure for this
86
87 for (int lzb = 0; lzb <= lmax[1]; lzb++) {
88 for (int lyb = 0; lyb <= lmax[1] - lzb; lyb++) {
89 const int lxb_min = imax(lmin[1] - lzb - lyb, 0);
90 for (int lxb = lxb_min; lxb <= lmax[1] - lzb - lyb; lxb++) {
91 const int jco = coset(lxb, lyb, lzb);
92 for (int lza = 0; lza <= lmax[0]; lza++) {
93 for (int lya = 0; lya <= lmax[0] - lza; lya++) {
94 const int lxa_min = imax(lmin[0] - lza - lya, 0);
95 for (int lxa = lxa_min; lxa <= lmax[0] - lza - lya; lxa++) {
96 const int ico = coset(lxa, lya, lza);
97 const double pab_ = idx2(pab[0], jco, ico);
98 for (int lxp = 0; lxp <= lxa + lxb; lxp++) {
99 const double p1 =
100 idx4(alpha[0], 0, lxb, lxa, lxp) * pab_ * prefactor;
101 for (int lzp = 0; lzp <= lp - lxa - lxb; lzp++) {
102 for (int lyp = 0; lyp <= lp - lxa - lxb - lzp; lyp++) {
103 const double p2 = idx4(alpha[0], 1, lyb, lya, lyp) *
104 idx4(alpha[0], 2, lzb, lza, lzp) * p1;
105 idx3(coef_xyz[0], lxp, lzp, lyp) += p2;
106 }
107 }
108 }
109 }
110 }
111 }
112 }
113 }
114 }
115}
116
117/* Rotate from (x - x_{12}) ^ k to (x - x_1) ^ alpha_1 (x - x_2) ^ \alpha_2 in
118 * all three directions */
119
121 const int *const lmin, const int *const lmax, const int lp,
122 const double prefactor,
123 const tensor *const alpha, // transformation parameters (x - x_1)^n (x -
124 // x_2)^m -> (x - x_12) ^ l
125 const tensor *const coef_xyz, tensor *vab) {
126 /* can be done with dgemms as well, since it is a change of basis from (x -
127 * x1) (x - x2) to (x - x12)^alpha */
128
129 assert(alpha != NULL);
130 assert(coef_xyz != NULL);
131 assert(coef_xyz->data != NULL);
132
133 memset(vab->data, 0, sizeof(double) * vab->alloc_size_);
134 // we need a proper fix for that. We can use the tensor structure for this
135
136 for (int lzb = 0; lzb <= lmax[1]; lzb++) {
137 for (int lyb = 0; lyb <= lmax[1] - lzb; lyb++) {
138 const int lxb_min = imax(lmin[1] - lzb - lyb, 0);
139 for (int lxb = lxb_min; lxb <= lmax[1] - lzb - lyb; lxb++) {
140 const int jco = coset(lxb, lyb, lzb);
141 for (int lza = 0; lza <= lmax[0]; lza++) {
142 for (int lya = 0; lya <= lmax[0] - lza; lya++) {
143 const int lxa_min = imax(lmin[0] - lza - lya, 0);
144 for (int lxa = lxa_min; lxa <= lmax[0] - lza - lya; lxa++) {
145 const int ico = coset(lxa, lya, lza);
146 double pab_ = 0.0;
147
148 /* this can be done with 3 dgemms actually but need to
149 * set coef accordingly (triangular along the second
150 * diagonal) */
151
152 for (int lxp = 0; lxp <= lxa + lxb; lxp++) {
153 for (int lzp = 0; lzp <= lp - lxa - lxb; lzp++) {
154 for (int lyp = 0; lyp <= lp - lxa - lxb - lzp; lyp++) {
155 const double p2 = idx4(alpha[0], 1, lyb, lya, lyp) *
156 idx4(alpha[0], 2, lzb, lza, lzp) *
157 idx4(alpha[0], 0, lxb, lxa, lxp) *
158 prefactor;
159 pab_ += idx3(coef_xyz[0], lyp, lxp, lzp) * p2;
160 }
161 }
162 }
163 idx2(vab[0], jco, ico) += pab_;
164 }
165 }
166 }
167 }
168 }
169 }
170}
171
172// *****************************************************************************
173void grid_prepare_alpha_dgemm(const double ra[3], const double rb[3],
174 const double rp[3], const int *lmax,
175 tensor *alpha) {
176 assert(alpha != NULL);
177 // Initialize with zeros.
178 memset(alpha->data, 0, alpha->alloc_size_ * sizeof(double));
179
180 //
181 // compute polynomial expansion coefs -> (x-a)**lxa (x-b)**lxb -> sum_{ls}
182 // alpha(ls,lxa,lxb,1)*(x-p)**ls
183 //
184
185 for (int iaxis = 0; iaxis < 3; iaxis++) {
186 const double drpa = rp[iaxis] - ra[iaxis];
187 const double drpb = rp[iaxis] - rb[iaxis];
188 for (int lxa = 0; lxa <= lmax[0]; lxa++) {
189 for (int lxb = 0; lxb <= lmax[1]; lxb++) {
190 double binomial_k_lxa = 1.0;
191 double a = 1.0;
192 for (int k = 0; k <= lxa; k++) {
193 double binomial_l_lxb = 1.0;
194 double b = 1.0;
195 for (int l = 0; l <= lxb; l++) {
196 idx4(alpha[0], iaxis, lxb, lxa, lxa - l + lxb - k) +=
197 binomial_k_lxa * binomial_l_lxb * a * b;
198 binomial_l_lxb *= ((double)(lxb - l)) / ((double)(l + 1));
199 b *= drpb;
200 }
201 binomial_k_lxa *= ((double)(lxa - k)) / ((double)(k + 1));
202 a *= drpa;
203 }
204 }
205 }
206 }
207}
208
209/* this function computes the coefficients initially expressed in the cartesian
210 * space to the grid space. It is inplane and can also be done with
211 * matrix-matrix multiplication. It is in fact a tensor reduction. */
212
213void grid_transform_coef_xzy_to_ikj(const double dh[3][3],
214 const tensor *coef_xyz) {
215 assert(coef_xyz != NULL);
216 const int lp = coef_xyz->size[0] - 1;
217 tensor coef_ijk;
218
219 /* this tensor corresponds to the term
220 * $v_{11}^{k_{11}}v_{12}^{k_{12}}v_{13}^{k_{13}}
221 * v_{21}^{k_{21}}v_{22}^{k_{22}}v_{23}^{k_{23}}
222 * v_{31}^{k_{31}}v_{32}^{k_{32}} v_{33}^{k_{33}}$ in Eq.26 found section
223 * III.A of the notes */
224 tensor hmatgridp;
225
226 initialize_tensor_3(&coef_ijk, coef_xyz->size[0], coef_xyz->size[1],
227 coef_xyz->size[2]);
228
229 coef_ijk.data = grid_allocate_scratch(sizeof(double) * coef_ijk.alloc_size_);
230
231 if (coef_ijk.data == NULL)
232 abort();
233
234 memset(coef_ijk.data, 0, sizeof(double) * coef_ijk.alloc_size_);
235 initialize_tensor_3(&hmatgridp, coef_xyz->size[0], 3, 3);
236
237 hmatgridp.data =
238 grid_allocate_scratch(sizeof(double) * hmatgridp.alloc_size_);
239
240 // transform using multinomials
241 for (int i = 0; i < 3; i++) {
242 for (int j = 0; j < 3; j++) {
243 idx3(hmatgridp, 0, j, i) = 1.0;
244 for (int k = 1; k <= lp; k++) {
245 idx3(hmatgridp, k, j, i) = idx3(hmatgridp, k - 1, j, i) * dh[j][i];
246 }
247 }
248 }
249
250 const int lpx = lp;
251 for (int klx = 0; klx <= lpx; klx++) {
252 for (int jlx = 0; jlx <= lpx - klx; jlx++) {
253 for (int ilx = 0; ilx <= lpx - klx - jlx; ilx++) {
254 const int lx = ilx + jlx + klx;
255 const int lpy = lp - lx;
256 const double tx = idx3(hmatgridp, ilx, 0, 0) *
257 idx3(hmatgridp, jlx, 1, 0) *
258 idx3(hmatgridp, klx, 2, 0) * fac(lx) * inv_fac[klx] *
259 inv_fac[jlx] * inv_fac[ilx];
260
261 for (int kly = 0; kly <= lpy; kly++) {
262 for (int jly = 0; jly <= lpy - kly; jly++) {
263 for (int ily = 0; ily <= lpy - kly - jly; ily++) {
264 const int ly = ily + jly + kly;
265 const int lpz = lp - lx - ly;
266 const double ty = tx * idx3(hmatgridp, ily, 0, 1) *
267 idx3(hmatgridp, jly, 1, 1) *
268 idx3(hmatgridp, kly, 2, 1) * fac(ly) *
269 inv_fac[kly] * inv_fac[jly] * inv_fac[ily];
270 for (int klz = 0; klz <= lpz; klz++) {
271 for (int jlz = 0; jlz <= lpz - klz; jlz++) {
272 for (int ilz = 0; ilz <= lpz - klz - jlz; ilz++) {
273 const int lz = ilz + jlz + klz;
274 const int il = ilx + ily + ilz;
275 const int jl = jlx + jly + jlz;
276 const int kl = klx + kly + klz;
277 // const int lijk= coef_map[kl][jl][il];
278 /* the fac table is the factorial. It
279 * would be better to use the
280 * multinomials. */
281 idx3(coef_ijk, il, kl, jl) +=
282 idx3(coef_xyz[0], lx, lz, ly) * ty *
283 idx3(hmatgridp, ilz, 0, 2) *
284 idx3(hmatgridp, jlz, 1, 2) *
285 idx3(hmatgridp, klz, 2, 2) * fac(lz) * inv_fac[klz] *
286 inv_fac[jlz] * inv_fac[ilz];
287 }
288 }
289 }
290 }
291 }
292 }
293 }
294 }
295 }
296
297 memcpy(coef_xyz->data, coef_ijk.data, sizeof(double) * coef_ijk.alloc_size_);
298 grid_free_scratch(coef_ijk.data);
299 grid_free_scratch(hmatgridp.data);
300}
301
302/* Rotate the coefficients computed in the local grid coordinates to the
303 * cartesians coorinates. The order of the indices indicates how the
304 * coefficients are stored */
305void grid_transform_coef_jik_to_yxz(const double dh[3][3],
306 const tensor *coef_xyz) {
307 assert(coef_xyz);
308 const int lp = coef_xyz->size[0] - 1;
309 tensor coef_ijk;
310
311 /* this tensor corresponds to the term
312 * $v_{11}^{k_{11}}v_{12}^{k_{12}}v_{13}^{k_{13}}
313 * v_{21}^{k_{21}}v_{22}^{k_{22}}v_{23}^{k_{23}}
314 * v_{31}^{k_{31}}v_{32}^{k_{32}} v_{33}^{k_{33}}$ in Eq.26 found section
315 * III.A of the notes */
316 tensor hmatgridp;
317
318 initialize_tensor_3(&coef_ijk, coef_xyz->size[0], coef_xyz->size[1],
319 coef_xyz->size[2]);
320
321 coef_ijk.data = grid_allocate_scratch(sizeof(double) * coef_ijk.alloc_size_);
322 if (coef_ijk.data == NULL)
323 abort();
324
325 memset(coef_ijk.data, 0, sizeof(double) * coef_ijk.alloc_size_);
326 initialize_tensor_3(&hmatgridp, coef_xyz->size[0], 3, 3);
327
328 hmatgridp.data =
329 grid_allocate_scratch(sizeof(double) * hmatgridp.alloc_size_);
330
331 // transform using multinomials
332 for (int i = 0; i < 3; i++) {
333 for (int j = 0; j < 3; j++) {
334 idx3(hmatgridp, 0, j, i) = 1.0;
335 for (int k = 1; k <= lp; k++) {
336 idx3(hmatgridp, k, j, i) = idx3(hmatgridp, k - 1, j, i) * dh[j][i];
337 }
338 }
339 }
340
341 const int lpx = lp;
342 for (int klx = 0; klx <= lpx; klx++) {
343 for (int jlx = 0; jlx <= lpx - klx; jlx++) {
344 for (int ilx = 0; ilx <= lpx - klx - jlx; ilx++) {
345 const int lx = ilx + jlx + klx;
346 const int lpy = lp - lx;
347 for (int kly = 0; kly <= lpy; kly++) {
348 for (int jly = 0; jly <= lpy - kly; jly++) {
349 for (int ily = 0; ily <= lpy - kly - jly; ily++) {
350 const int ly = ily + jly + kly;
351 const int lpz = lp - lx - ly;
352 for (int klz = 0; klz <= lpz; klz++) {
353 for (int jlz = 0; jlz <= lpz - klz; jlz++) {
354 for (int ilz = 0; ilz <= lpz - klz - jlz; ilz++) {
355 const int lz = ilz + jlz + klz;
356 const int il = ilx + ily + ilz;
357 const int jl = jlx + jly + jlz;
358 const int kl = klx + kly + klz;
359 // const int lijk= coef_map[kl][jl][il];
360 /* the fac table is the factorial. It
361 * would be better to use the
362 * multinomials. */
363 idx3(coef_ijk, ly, lx, lz) +=
364 idx3(coef_xyz[0], jl, il, kl) *
365 idx3(hmatgridp, ilx, 0, 0) *
366 idx3(hmatgridp, jlx, 1, 0) *
367 idx3(hmatgridp, klx, 2, 0) *
368 idx3(hmatgridp, ily, 0, 1) *
369 idx3(hmatgridp, jly, 1, 1) *
370 idx3(hmatgridp, kly, 2, 1) *
371 idx3(hmatgridp, ilz, 0, 2) *
372 idx3(hmatgridp, jlz, 1, 2) *
373 idx3(hmatgridp, klz, 2, 2) * fac(lx) * fac(ly) *
374 fac(lz) /
375 (fac(ilx) * fac(ily) * fac(ilz) * fac(jlx) * fac(jly) *
376 fac(jlz) * fac(klx) * fac(kly) * fac(klz));
377 }
378 }
379 }
380 }
381 }
382 }
383 }
384 }
385 }
386 memcpy(coef_xyz->data, coef_ijk.data, sizeof(double) * coef_ijk.alloc_size_);
387 grid_free_scratch(coef_ijk.data);
388 grid_free_scratch(hmatgridp.data);
389}
static int imax(int x, int y)
Returns the larger of two given integer (missing from the C standard)
static void const int const int i
void grid_transform_coef_xzy_to_ikj(const double dh[3][3], const tensor *coef_xyz)
void transform_xyz_to_triangular(const tensor *const coef, double *const coef_xyz)
void grid_compute_coefficients_dgemm(const int *lmin, const int *lmax, const int lp, const double prefactor, const tensor *const alpha, const tensor *const pab, tensor *coef_xyz)
void grid_compute_vab(const int *const lmin, const int *const lmax, const int lp, const double prefactor, const tensor *const alpha, const tensor *const coef_xyz, tensor *vab)
void transform_triangular_to_xyz(const double *const coef_xyz, tensor *const coef)
void transform_yxz_to_triangular(const tensor *const coef, double *const coef_xyz)
void grid_transform_coef_jik_to_yxz(const double dh[3][3], const tensor *coef_xyz)
void grid_prepare_alpha_dgemm(const double ra[3], const double rb[3], const double rp[3], const int *lmax, tensor *alpha)
#define idx2(a, i, j)
#define idx4(a, i, j, k, l)
static void initialize_tensor_3(struct tensor_ *a, int n1, int n2, int n3)
#define idx3(a, i, j, k)
static void * grid_allocate_scratch(size_t size)
static const double inv_fac[]
static void grid_free_scratch(void *ptr)