(git:374b731)
Loading...
Searching...
No Matches
grid_dgemm_prepare_pab.c
Go to the documentation of this file.
1/*----------------------------------------------------------------------------*/
2/* CP2K: A general program to perform molecular dynamics simulations */
3/* Copyright 2000-2024 CP2K developers group <https://cp2k.org> */
4/* */
5/* SPDX-License-Identifier: BSD-3-Clause */
6/*----------------------------------------------------------------------------*/
7
9
10#include <assert.h>
11#include <stdbool.h>
12
13#include "../common/grid_common.h"
14#include "../common/grid_constants.h"
15#include "grid_dgemm_utils.h"
16
18 int offset[2];
19 int lmax[2];
20 int lmin[2];
21 double zeta[2];
24 int dir1, dir2;
25};
26
27// *****************************************************************************
28static void grid_prepare_pab_AB(struct pab_computation_struct_ *const tp) {
29 for (int lxa = 0; lxa <= tp->lmax[0]; lxa++) {
30 for (int lxb = 0; lxb <= tp->lmax[1]; lxb++) {
31 for (int lya = 0; lya <= tp->lmax[0] - lxa; lya++) {
32 for (int lyb = 0; lyb <= tp->lmax[1] - lxb; lyb++) {
33 for (int lza = imax(tp->lmin[0] - lxa - lya, 0);
34 lza <= tp->lmax[0] - lxa - lya; lza++) {
35 for (int lzb = imax(tp->lmin[1] - lxb - lyb, 0);
36 lzb <= tp->lmax[1] - lxb - lyb; lzb++) {
37 const int ico = tp->offset[0] + coset(lxa, lya, lza);
38 const int jco = tp->offset[1] + coset(lxb, lyb, lzb);
39 idx2(tp->pab_prep[0], coset(lxb, lyb, lzb),
40 coset(lxa, lya, lza)) = idx2(tp->pab[0], jco, ico);
41 }
42 }
43 }
44 }
45 }
46 }
47}
48
49// *****************************************************************************
50static void grid_prepare_pab_DADB(struct pab_computation_struct_ *const tp) {
51 // create a new pab_prep so that mapping pab_prep with pgf_a pgf_b
52 // is equivalent to mapping pab with 0.5 * (nabla pgf_a) . (nabla pgf_b)
53 // (ddx pgf_a ) (ddx pgf_b) = (lax pgf_{a-1x} - 2*zeta*pgf_{a+1x})*(lbx
54 // pgf_{b-1x} - 2*tp->zeta[1]*pgf_{b+1x})
55
56 for (int lxa = 0; lxa <= tp->lmax[0]; lxa++) {
57 for (int lxb = 0; lxb <= tp->lmax[1]; lxb++) {
58 for (int lya = 0; lya <= tp->lmax[0] - lxa; lya++) {
59 for (int lyb = 0; lyb <= tp->lmax[1] - lxb; lyb++) {
60 for (int lza = imax(tp->lmin[0] - lxa - lya, 0);
61 lza <= tp->lmax[0] - lxa - lya; lza++) {
62 for (int lzb = imax(tp->lmin[1] - lxb - lyb, 0);
63 lzb <= tp->lmax[1] - lxb - lyb; lzb++) {
64 const int ico = tp->offset[0] + coset(lxa, lya, lza);
65 const int jco = tp->offset[1] + coset(lxb, lyb, lzb);
66 const double pab = idx2(tp->pab[0], jco, ico);
67 int ico_l, jco_l;
68 // x (all safe if lxa = 0, as the spurious added terms have zero
69 // prefactor)
70
71 ico_l = coset(imax(lxa - 1, 0), lya, lza);
72 jco_l = coset(imax(lxb - 1, 0), lyb, lzb);
73 idx2(tp->pab_prep[0], jco_l, ico_l) += 0.5 * lxa * lxb * pab;
74
75 ico_l = coset(imax(lxa - 1, 0), lya, lza);
76 jco_l = coset((lxb + 1), lyb, lzb);
77 idx2(tp->pab_prep[0], jco_l, ico_l) -= lxa * tp->zeta[1] * pab;
78
79 ico_l = coset((lxa + 1), lya, lza);
80 jco_l = coset(imax(lxb - 1, 0), lyb, lzb);
81 idx2(tp->pab_prep[0], jco_l, ico_l) -= tp->zeta[0] * lxb * pab;
82
83 ico_l = coset((lxa + 1), lya, lza);
84 jco_l = coset((lxb + 1), lyb, lzb);
85 idx2(tp->pab_prep[0], jco_l, ico_l) +=
86 2.0 * tp->zeta[0] * tp->zeta[1] * pab;
87
88 // y
89
90 ico_l = coset(lxa, imax(lya - 1, 0), lza);
91 jco_l = coset(lxb, imax(lyb - 1, 0), lzb);
92 idx2(tp->pab_prep[0], jco_l, ico_l) += 0.5 * lya * lyb * pab;
93
94 ico_l = coset(lxa, imax(lya - 1, 0), lza);
95 jco_l = coset(lxb, (lyb + 1), lzb);
96 idx2(tp->pab_prep[0], jco_l, ico_l) -= lya * tp->zeta[1] * pab;
97
98 ico_l = coset(lxa, (lya + 1), lza);
99 jco_l = coset(lxb, imax(lyb - 1, 0), lzb);
100 idx2(tp->pab_prep[0], jco_l, ico_l) -= tp->zeta[0] * lyb * pab;
101
102 ico_l = coset(lxa, (lya + 1), lza);
103 jco_l = coset(lxb, (lyb + 1), lzb);
104 idx2(tp->pab_prep[0], jco_l, ico_l) +=
105 2.0 * tp->zeta[0] * tp->zeta[1] * pab;
106
107 // z
108
109 ico_l = coset(lxa, lya, imax(lza - 1, 0));
110 jco_l = coset(lxb, lyb, imax(lzb - 1, 0));
111 idx2(tp->pab_prep[0], jco_l, ico_l) += 0.5 * lza * lzb * pab;
112
113 ico_l = coset(lxa, lya, imax(lza - 1, 0));
114 jco_l = coset(lxb, lyb, (lzb + 1));
115 idx2(tp->pab_prep[0], jco_l, ico_l) -= lza * tp->zeta[1] * pab;
116
117 ico_l = coset(lxa, lya, (lza + 1));
118 jco_l = coset(lxb, lyb, imax(lzb - 1, 0));
119 idx2(tp->pab_prep[0], jco_l, ico_l) -= tp->zeta[0] * lzb * pab;
120
121 ico_l = coset(lxa, lya, (lza + 1));
122 jco_l = coset(lxb, lyb, (lzb + 1));
123 idx2(tp->pab_prep[0], jco_l, ico_l) +=
124 2.0 * tp->zeta[0] * tp->zeta[1] * pab;
125 }
126 }
127 }
128 }
129 }
130 }
131}
132
133// *****************************************************************************
135 // create a new pab_prep so that mapping pab_prep with pgf_a pgf_b
136 // is equivalent to mapping pab with
137 // pgf_a (nabla_{idir} pgf_b) - (nabla_{idir} pgf_a) pgf_b
138 // ( pgf_a ) (ddx pgf_b) - (ddx pgf_a)( pgf_b ) =
139 // pgf_a *(lbx pgf_{b-1x} - 2*tp->zeta[1]*pgf_{b+1x}) -
140 // (lax pgf_{a-1x} - 2*zeta*pgf_{a+1x}) pgf_b
141
142 for (int lxa = 0; lxa <= tp->lmax[0]; lxa++) {
143 for (int lxb = 0; lxb <= tp->lmax[1]; lxb++) {
144 for (int lya = 0; lya <= tp->lmax[0] - lxa; lya++) {
145 for (int lyb = 0; lyb <= tp->lmax[1] - lxb; lyb++) {
146 for (int lza = imax(tp->lmin[0] - lxa - lya, 0);
147 lza <= tp->lmax[0] - lxa - lya; lza++) {
148 for (int lzb = imax(tp->lmin[1] - lxb - lyb, 0);
149 lzb <= tp->lmax[1] - lxb - lyb; lzb++) {
150 const int ico = tp->offset[0] + coset(lxa, lya, lza);
151 const int jco = tp->offset[1] + coset(lxb, lyb, lzb);
152 const double pab = idx2(tp->pab[0], jco, ico);
153 int ico_l, jco_l;
154
155 // ! this element of pab results in 4 elements of pab_prep
156 switch (tp->dir1) {
157 case 'X': { // x
158 ico_l = coset(lxa, lya, lza);
159 jco_l = coset(imax(lxb - 1, 0), lyb, lzb);
160 idx2(tp->pab_prep[0], jco_l, ico_l) += lxb * pab;
161
162 // ico_l = coset(lxa, lya, lza);
163 jco_l = coset((lxb + 1), lyb, lzb);
164 idx2(tp->pab_prep[0], jco_l, ico_l) -= 2.0 * tp->zeta[1] * pab;
165
166 ico_l = coset(imax(lxa - 1, 0), lya, lza);
167 jco_l = coset(lxb, lyb, lzb);
168 idx2(tp->pab_prep[0], jco_l, ico_l) -= lxa * pab;
169
170 ico_l = coset(lxa + 1, lya, lza);
171 // jco_l = coset(lxb, lyb, lzb);
172 idx2(tp->pab_prep[0], jco_l, ico_l) += 2.0 * tp->zeta[0] * pab;
173 } break;
174 case 'Y': { // y
175 ico_l = coset(lxa, lya, lza);
176 jco_l = coset(lxb, imax(lyb - 1, 0), lzb);
177 idx2(tp->pab_prep[0], jco_l, ico_l) += lyb * pab;
178
179 // ico_l = coset(lxa, lya, lza);
180 jco_l = coset(lxb, lyb + 1, lzb);
181 idx2(tp->pab_prep[0], jco_l, ico_l) -= 2.0 * tp->zeta[1] * pab;
182
183 ico_l = coset(lxa, imax(lya - 1, 0), lza);
184 jco_l = coset(lxb, lyb, lzb);
185 idx2(tp->pab_prep[0], jco_l, ico_l) -= lya * pab;
186
187 ico_l = coset(lxa, lya + 1, lza);
188 // jco_l = coset(lxb, lyb, lzb);
189 idx2(tp->pab_prep[0], jco_l, ico_l) += 2.0 * tp->zeta[0] * pab;
190 } break;
191 case 'Z': { // z
192 ico_l = coset(lxa, lya, lza);
193 jco_l = coset(lxb, lyb, imax(lzb - 1, 0));
194 idx2(tp->pab_prep[0], jco_l, ico_l) += lzb * pab;
195
196 // ico_l = coset(lxa, lya, lza);
197 jco_l = coset(lxb, lyb, lzb + 1);
198 idx2(tp->pab_prep[0], jco_l, ico_l) -= 2.0 * tp->zeta[1] * pab;
199
200 ico_l = coset(lxa, lya, imax(lza - 1, 0));
201 jco_l = coset(lxb, lyb, lzb);
202 idx2(tp->pab_prep[0], jco_l, ico_l) -= lza * pab;
203
204 ico_l = coset(lxa, lya, lza + 1);
205 // jco_l = coset(lxb, lyb, lzb);
206 idx2(tp->pab_prep[0], jco_l, ico_l) += 2.0 * tp->zeta[0] * pab;
207 } break;
208 default:
209 break;
210 }
211 }
212 }
213 }
214 }
215 }
216 }
217}
218// *****************************************************************************
219static void
221 // create a new pab_prep so that mapping pab_prep with pgf_a pgf_b
222 // is equivalent to mapping pab with
223 // pgf_a (r-Rb)_{ir} (nabla_{idir} pgf_b) - (nabla_{idir} pgf_a) (r-Rb)_{ir}
224 // pgf_b ( pgf_a )(r-Rb)_{ir} (ddx pgf_b) - (ddx pgf_a) (r-Rb)_{ir} ( pgf_b )
225 // =
226 // pgf_a *(lbx pgf_{b-1x+1ir} -
227 // 2*tp->zeta[1]*pgf_{b+1x+1ir}) -
228 // (lax pgf_{a-1x} - 2*zeta*pgf_{a+1x}) pgf_{b+1ir}
229
230 assert(1 <= tp->dir1 && tp->dir1 <= 3);
231 assert(1 <= tp->dir2 && tp->dir2 <= 3);
232
233 for (int lxa = 0; lxa <= tp->lmax[0]; lxa++) {
234 for (int lxb = 0; lxb <= tp->lmax[1]; lxb++) {
235 for (int lya = 0; lya <= tp->lmax[0] - lxa; lya++) {
236 for (int lyb = 0; lyb <= tp->lmax[1] - lxb; lyb++) {
237 for (int lza = imax(tp->lmin[0] - lxa - lya, 0);
238 lza <= tp->lmax[0] - lxa - lya; lza++) {
239 for (int lzb = imax(tp->lmin[1] - lxb - lyb, 0);
240 lzb <= tp->lmax[1] - lxb - lyb; lzb++) {
241 const int ico = tp->offset[0] + coset(lxa, lya, lza);
242 const int jco = tp->offset[1] + coset(lxb, lyb, lzb);
243 const double pab = idx2(tp->pab[0], jco, ico);
244
245 int ico_l, jco_l;
246
247 // this element of pab results in 4 elements of pab_prep
248 switch (tp->dir1) {
249 case 'X': {
250 switch (tp->dir2) {
251 case 'X': {
252 ico_l = coset(lxa, lya, lza);
253 jco_l = coset(lxb, lyb, lzb);
254 idx2(tp->pab_prep[0], jco_l, ico_l) += lxb * pab;
255
256 ico_l = coset(lxa, lya, lza);
257 jco_l = coset((lxb + 2), lyb, lzb);
258 idx2(tp->pab_prep[0], jco_l, ico_l) -=
259 2.0 * tp->zeta[1] * pab;
260
261 ico_l = coset(imax(lxa - 1, 0), lya, lza);
262 jco_l = coset((lxb + 1), lyb, lzb);
263 idx2(tp->pab_prep[0], jco_l, ico_l) -= lxa * pab;
264
265 ico_l = coset((lxa + 1), lya, lza);
266 jco_l = coset((lxb + 1), lyb, lzb);
267 idx2(tp->pab_prep[0], jco_l, ico_l) +=
268 2.0 * tp->zeta[0] * pab;
269 } break;
270 case 'Y': {
271 ico_l = coset(lxa, lya, lza);
272 jco_l = coset(imax(lxb - 1, 0), (lyb + 1), lzb);
273 idx2(tp->pab_prep[0], jco_l, ico_l) += lxb * pab;
274
275 ico_l = coset(lxa, lya, lza);
276 jco_l = coset((lxb + 1), (lyb + 1), lzb);
277 idx2(tp->pab_prep[0], jco_l, ico_l) -=
278 2.0 * tp->zeta[1] * pab;
279
280 ico_l = coset(imax(lxa - 1, 0), lya, lza);
281 jco_l = coset(lxb, (lyb + 1), lzb);
282 idx2(tp->pab_prep[0], jco_l, ico_l) -= lxa * pab;
283
284 ico_l = coset((lxa + 1), lya, lza);
285 jco_l = coset(lxb, (lyb + 1), lzb);
286 idx2(tp->pab_prep[0], jco_l, ico_l) +=
287 2.0 * tp->zeta[0] * pab;
288 } break;
289 case 'Z': {
290 ico_l = coset(lxa, lya, lza);
291 jco_l = coset(imax(lxb - 1, 0), lyb, (lzb + 1));
292 idx2(tp->pab_prep[0], jco_l, ico_l) += lxb * pab;
293
294 ico_l = coset(lxa, lya, lza);
295 jco_l = coset((lxb + 1), lyb, (lzb + 1));
296 idx2(tp->pab_prep[0], jco_l, ico_l) -=
297 2.0 * tp->zeta[1] * pab;
298
299 ico_l = coset(imax(lxa - 1, 0), lya, lza);
300 jco_l = coset(lxb, lyb, (lzb + 1));
301 idx2(tp->pab_prep[0], jco_l, ico_l) -= lxa * pab;
302
303 ico_l = coset((lxa + 1), lya, lza);
304 jco_l = coset(lxb, lyb, (lzb + 1));
305 idx2(tp->pab_prep[0], jco_l, ico_l) +=
306 2.0 * tp->zeta[0] * pab;
307 } break;
308 default:
309 break;
310 }
311 } break;
312 case 'Y': {
313 switch (tp->dir2) {
314 case 'X': {
315 ico_l = coset(lxa, lya, lza);
316 jco_l = coset((lxb + 1), imax(lyb - 1, 0), lzb);
317 idx2(tp->pab_prep[0], jco_l, ico_l) += lyb * pab;
318
319 ico_l = coset(lxa, lya, lza);
320 jco_l = coset((lxb + 1), (lyb + 1), lzb);
321 idx2(tp->pab_prep[0], jco_l, ico_l) -=
322 2.0 * tp->zeta[1] * pab;
323
324 ico_l = coset(lxa, imax(lya - 1, 0), lza);
325 jco_l = coset((lxb + 1), lyb, lzb);
326 idx2(tp->pab_prep[0], jco_l, ico_l) -= lya * pab;
327
328 ico_l = coset(lxa, (lya + 1), lza);
329 jco_l = coset((lxb + 1), lyb, lzb);
330 idx2(tp->pab_prep[0], jco_l, ico_l) +=
331 2.0 * tp->zeta[0] * pab;
332 } break;
333 case 'Y': {
334 ico_l = coset(lxa, lya, lza);
335 jco_l = coset(lxb, lyb, lzb);
336 idx2(tp->pab_prep[0], jco_l, ico_l) += lyb * pab;
337
338 ico_l = coset(lxa, lya, lza);
339 jco_l = coset(lxb, (lyb + 2), lzb);
340 idx2(tp->pab_prep[0], jco_l, ico_l) -=
341 2.0 * tp->zeta[1] * pab;
342
343 ico_l = coset(lxa, imax(lya - 1, 0), lza);
344 jco_l = coset(lxb, (lyb + 1), lzb);
345 idx2(tp->pab_prep[0], jco_l, ico_l) -= lya * pab;
346
347 ico_l = coset(lxa, (lya + 1), lza);
348 jco_l = coset(lxb, (lyb + 1), lzb);
349 idx2(tp->pab_prep[0], jco_l, ico_l) +=
350 2.0 * tp->zeta[0] * pab;
351 } break;
352 case 'Z': {
353 ico_l = coset(lxa, lya, lza);
354 jco_l = coset(lxb, imax(lyb - 1, 0), (lzb + 1));
355 idx2(tp->pab_prep[0], jco_l, ico_l) += lyb * pab;
356
357 ico_l = coset(lxa, lya, lza);
358 jco_l = coset(lxb, (lyb + 1), (lzb + 1));
359 idx2(tp->pab_prep[0], jco_l, ico_l) -=
360 2.0 * tp->zeta[1] * pab;
361
362 ico_l = coset(lxa, imax(lya - 1, 0), lza);
363 jco_l = coset(lxb, lyb, (lzb + 1));
364 idx2(tp->pab_prep[0], jco_l, ico_l) -= lya * pab;
365
366 ico_l = coset(lxa, (lya + 1), lza);
367 jco_l = coset(lxb, lyb, (lzb + 1));
368 idx2(tp->pab_prep[0], jco_l, ico_l) +=
369 2.0 * tp->zeta[0] * pab;
370 } break;
371 default:
372 break;
373 }
374 } break;
375 case 'Z': {
376 switch (tp->dir2) {
377 case 'X': {
378 ico_l = coset(lxa, lya, lza);
379 jco_l = coset((lxb + 1), lyb, imax(lzb - 1, 0));
380 idx2(tp->pab_prep[0], jco_l, ico_l) += lzb * pab;
381
382 ico_l = coset(lxa, lya, lza);
383 jco_l = coset((lxb + 1), lyb, (lzb + 1));
384 idx2(tp->pab_prep[0], jco_l, ico_l) -=
385 2.0 * tp->zeta[1] * pab;
386
387 ico_l = coset(lxa, lya, imax(lza - 1, 0));
388 jco_l = coset((lxb + 1), lyb, lzb);
389 idx2(tp->pab_prep[0], jco_l, ico_l) -= lza * pab;
390
391 ico_l = coset(lxa, lya, (lza + 1));
392 jco_l = coset((lxb + 1), lyb, lzb);
393 idx2(tp->pab_prep[0], jco_l, ico_l) +=
394 2.0 * tp->zeta[0] * pab;
395 } break;
396 case 'Y': {
397 ico_l = coset(lxa, lya, lza);
398 jco_l = coset(lxb, (lyb + 1), imax(lzb - 1, 0));
399 idx2(tp->pab_prep[0], jco_l, ico_l) += lzb * pab;
400
401 ico_l = coset(lxa, lya, lza);
402 jco_l = coset(lxb, (lyb + 1), (lzb + 1));
403 idx2(tp->pab_prep[0], jco_l, ico_l) +=
404 -2.0 * tp->zeta[1] * pab;
405
406 ico_l = coset(lxa, lya, imax(lza - 1, 0));
407 jco_l = coset(lxb, (lyb + 1), lzb);
408 idx2(tp->pab_prep[0], jco_l, ico_l) -= lza * pab;
409
410 ico_l = coset(lxa, lya, (lza + 1));
411 jco_l = coset(lxb, (lyb + 1), lzb);
412 idx2(tp->pab_prep[0], jco_l, ico_l) +=
413 2.0 * tp->zeta[0] * pab;
414 } break;
415 case 'Z': {
416 ico_l = coset(lxa, lya, lza);
417 jco_l = coset(lxb, lyb, lzb);
418 idx2(tp->pab_prep[0], jco_l, ico_l) += lzb * pab;
419
420 ico_l = coset(lxa, lya, lza);
421 jco_l = coset(lxb, lyb, (lzb + 2));
422 idx2(tp->pab_prep[0], jco_l, ico_l) -=
423 2.0 * tp->zeta[1] * pab;
424
425 ico_l = coset(lxa, lya, imax(lza - 1, 0));
426 jco_l = coset(lxb, lyb, (lzb + 1));
427 idx2(tp->pab_prep[0], jco_l, ico_l) -= lza * pab;
428
429 ico_l = coset(lxa, lya, (lza + 1));
430 jco_l = coset(lxb, lyb, (lzb + 1));
431 idx2(tp->pab_prep[0], jco_l, ico_l) +=
432 2.0 * tp->zeta[0] * pab;
433 } break;
434 default:
435 break;
436 }
437 } break;
438 default:
439 break;
440 }
441 }
442 }
443 }
444 }
445 }
446 }
447}
448// *****************************************************************************
450 // create a new pab_prep so that mapping pab_prep with pgf_a pgf_b
451 // is equivalent to mapping pab with
452 // pgf_a (nabla_{idir} pgf_b) + (nabla_{idir} pgf_a) pgf_b
453 // ( pgf_a ) (ddx pgf_b) + (ddx pgf_a)( pgf_b ) =
454 // pgf_a *(lbx pgf_{b-1x} + 2*tp->zeta[1]*pgf_{b+1x}) +
455 // (lax pgf_{a-1x} + 2*zeta*pgf_{a+1x}) pgf_b
456 for (int lxa = 0; lxa <= tp->lmax[0]; lxa++) {
457 for (int lxb = 0; lxb <= tp->lmax[1]; lxb++) {
458 for (int lya = 0; lya <= tp->lmax[0] - lxa; lya++) {
459 for (int lyb = 0; lyb <= tp->lmax[1] - lxb; lyb++) {
460 for (int lza = imax(tp->lmin[0] - lxa - lya, 0);
461 lza <= tp->lmax[0] - lxa - lya; lza++) {
462 for (int lzb = imax(tp->lmin[1] - lxb - lyb, 0);
463 lzb <= tp->lmax[1] - lxb - lyb; lzb++) {
464 const int ico = tp->offset[0] + coset(lxa, lya, lza);
465 const int jco = tp->offset[1] + coset(lxb, lyb, lzb);
466 const double pab = idx2(tp->pab[0], jco, ico);
467
468 int ico_l, jco_l;
469
470 // this element of pab results in 4 elements of pab_prep
471
472 switch (tp->dir1) {
473 case 'X': {
474 ico_l = coset(lxa, lya, lza);
475 jco_l = coset(imax(lxb - 1, 0), lyb, lzb);
476 idx2(tp->pab_prep[0], jco_l, ico_l) += lxb * pab;
477
478 ico_l = coset(lxa, lya, lza);
479 jco_l = coset((lxb + 1), lyb, lzb);
480 idx2(tp->pab_prep[0], jco_l, ico_l) -= 2.0 * tp->zeta[1] * pab;
481
482 ico_l = coset(imax(lxa - 1, 0), lya, lza);
483 jco_l = coset(lxb, lyb, lzb);
484 idx2(tp->pab_prep[0], jco_l, ico_l) += lxa * pab;
485
486 ico_l = coset((lxa + 1), lya, lza);
487 jco_l = coset(lxb, lyb, lzb);
488 idx2(tp->pab_prep[0], jco_l, ico_l) -= 2.0 * tp->zeta[0] * pab;
489 } break;
490 case 'Y': { // y
491 ico_l = coset(lxa, lya, lza);
492 jco_l = coset(lxb, imax(lyb - 1, 0), lzb);
493 idx2(tp->pab_prep[0], jco_l, ico_l) += lyb * pab;
494
495 ico_l = coset(lxa, lya, lza);
496 jco_l = coset(lxb, (lyb + 1), lzb);
497 idx2(tp->pab_prep[0], jco_l, ico_l) -= 2.0 * tp->zeta[1] * pab;
498
499 ico_l = coset(lxa, imax(lya - 1, 0), lza);
500 jco_l = coset(lxb, lyb, lzb);
501 idx2(tp->pab_prep[0], jco_l, ico_l) += lya * pab;
502
503 ico_l = coset(lxa, (lya + 1), lza);
504 jco_l = coset(lxb, lyb, lzb);
505 idx2(tp->pab_prep[0], jco_l, ico_l) -= 2.0 * tp->zeta[0] * pab;
506 } break;
507 case 'Z': { // z
508 ico_l = coset(lxa, lya, lza);
509 jco_l = coset(lxb, lyb, imax(lzb - 1, 0));
510 idx2(tp->pab_prep[0], jco_l, ico_l) += lzb * pab;
511
512 ico_l = coset(lxa, lya, lza);
513 jco_l = coset(lxb, lyb, (lzb + 1));
514 idx2(tp->pab_prep[0], jco_l, ico_l) -= 2.0 * tp->zeta[1] * pab;
515
516 ico_l = coset(lxa, lya, imax(lza - 1, 0));
517 jco_l = coset(lxb, lyb, lzb);
518 idx2(tp->pab_prep[0], jco_l, ico_l) += lza * pab;
519
520 ico_l = coset(lxa, lya, (lza + 1));
521 jco_l = coset(lxb, lyb, lzb);
522 idx2(tp->pab_prep[0], jco_l, ico_l) -= 2.0 * tp->zeta[0] * pab;
523 break;
524 }
525 default:
526 break;
527 }
528 }
529 }
530 }
531 }
532 }
533 }
534}
535// *****************************************************************************
536static void grid_prepare_pab_Di(struct pab_computation_struct_ *const tp) {
537 // create a new pab_local so that mapping pab_local with pgf_a pgf_b
538 // is equivalent to mapping pab with
539 // d_{ider1} pgf_a d_{ider1} pgf_b
540 // dx pgf_a dx pgf_b =
541 // (lax pgf_{a-1x})*(lbx pgf_{b-1x}) -
542 // 2*tp->zeta[1]*lax*pgf_{a-1x}*pgf_{b+1x} -
543 // lbx pgf_{b-1x}*2*zeta*pgf_{a+1x}+ 4*zeta*zetab*pgf_{a+1x}pgf_{b+1x}
544
545 for (int lxa = 0; lxa <= tp->lmax[0]; lxa++) {
546 for (int lxb = 0; lxb <= tp->lmax[1]; lxb++) {
547 for (int lya = 0; lya <= tp->lmax[0] - lxa; lya++) {
548 for (int lyb = 0; lyb <= tp->lmax[1] - lxb; lyb++) {
549 for (int lza = imax(tp->lmin[0] - lxa - lya, 0);
550 lza <= tp->lmax[0] - lxa - lya; lza++) {
551 for (int lzb = imax(tp->lmin[1] - lxb - lyb, 0);
552 lzb <= tp->lmax[1] - lxb - lyb; lzb++) {
553 const int ico = tp->offset[0] + coset(lxa, lya, lza);
554 const int jco = tp->offset[1] + coset(lxb, lyb, lzb);
555 const double pab = idx2(tp->pab[0], jco, ico);
556
557 int ico_l, jco_l;
558 // this element of pab results in 12 elements of pab_prep
559
560 switch (tp->dir1) {
561 case 'X': {
562 // x (all safe if lxa = 0, as the spurious added terms have
563 // zero prefactor)
564 ico_l = coset(imax(lxa - 1, 0), lya, lza);
565 jco_l = coset(imax(lxb - 1, 0), lyb, lzb);
566 idx2(tp->pab_prep[0], jco_l, ico_l) += lxa * lxb * pab;
567
568 ico_l = coset(imax(lxa - 1, 0), lya, lza);
569 jco_l = coset((lxb + 1), lyb, lzb);
570 idx2(tp->pab_prep[0], jco_l, ico_l) -=
571 2.0 * lxa * tp->zeta[1] * pab;
572
573 ico_l = coset((lxa + 1), lya, lza);
574 jco_l = coset(imax(lxb - 1, 0), lyb, lzb);
575 idx2(tp->pab_prep[0], jco_l, ico_l) -=
576 2.0 * tp->zeta[0] * lxb * pab;
577
578 ico_l = coset((lxa + 1), lya, lza);
579 jco_l = coset((lxb + 1), lyb, lzb);
580 idx2(tp->pab_prep[0], jco_l, ico_l) +=
581 4.0 * tp->zeta[0] * tp->zeta[1] * pab;
582 } break;
583 case 'Y': {
584 // y
585 ico_l = coset(lxa, imax(lya - 1, 0), lza);
586 jco_l = coset(lxb, imax(lyb - 1, 0), lzb);
587 idx2(tp->pab_prep[0], jco_l, ico_l) += lya * lyb * pab;
588
589 ico_l = coset(lxa, imax(lya - 1, 0), lza);
590 jco_l = coset(lxb, (lyb + 1), lzb);
591 idx2(tp->pab_prep[0], jco_l, ico_l) -=
592 2.0 * lya * tp->zeta[1] * pab;
593
594 ico_l = coset(lxa, (lya + 1), lza);
595 jco_l = coset(lxb, imax(lyb - 1, 0), lzb);
596 idx2(tp->pab_prep[0], jco_l, ico_l) -=
597 2.0 * tp->zeta[0] * lyb * pab;
598
599 ico_l = coset(lxa, (lya + 1), lza);
600 jco_l = coset(lxb, (lyb + 1), lzb);
601 idx2(tp->pab_prep[0], jco_l, ico_l) +=
602 4.0 * tp->zeta[0] * tp->zeta[1] * pab;
603 } break;
604 case 'Z': {
605 // z
606 ico_l = coset(lxa, lya, imax(lza - 1, 0));
607 jco_l = coset(lxb, lyb, imax(lzb - 1, 0));
608 idx2(tp->pab_prep[0], jco_l, ico_l) += lza * lzb * pab;
609
610 ico_l = coset(lxa, lya, imax(lza - 1, 0));
611 jco_l = coset(lxb, lyb, (lzb + 1));
612 idx2(tp->pab_prep[0], jco_l, ico_l) -=
613 2.0 * lza * tp->zeta[1] * pab;
614
615 ico_l = coset(lxa, lya, (lza + 1));
616 jco_l = coset(lxb, lyb, imax(lzb - 1, 0));
617 idx2(tp->pab_prep[0], jco_l, ico_l) -=
618 2.0 * tp->zeta[0] * lzb * pab;
619
620 ico_l = coset(lxa, lya, (lza + 1));
621 jco_l = coset(lxb, lyb, (lzb + 1));
622 idx2(tp->pab_prep[0], jco_l, ico_l) +=
623 4.0 * tp->zeta[0] * tp->zeta[1] * pab;
624 } break;
625 default:
626 break;
627 }
628 }
629 }
630 }
631 }
632 }
633 }
634}
635
636// *****************************************************************************
637static void oneterm_dijdij(const int idir, const double func_a, const int ico_l,
638 const int lx, const int ly, const int lz,
639 const double zet, tensor *const pab_prep) {
640 int jco_l;
641
642 switch (idir) {
643 case 'X': {
644 const int l1 = lx;
645 const int l2 = ly;
646 jco_l = coset(imax(lx - 1, 0), imax(ly - 1, 0), lz);
647 idx2(pab_prep[0], jco_l, ico_l) += l1 * l2 * func_a;
648
649 jco_l = coset(lx + 1, imax(ly - 1, 0), lz);
650 idx2(pab_prep[0], jco_l, ico_l) -= 2.0 * zet * l2 * func_a;
651
652 jco_l = coset(imax(lx - 1, 0), ly + 1, lz);
653 idx2(pab_prep[0], jco_l, ico_l) -= 2.0 * zet * l1 * func_a;
654
655 jco_l = coset(lx + 1, ly + 1, lz);
656 idx2(pab_prep[0], jco_l, ico_l) += 4.0 * zet * zet * func_a;
657 } break;
658 case 'Y': {
659 const int l1 = ly;
660 const int l2 = lz;
661 jco_l = coset(lx, imax(ly - 1, 0), imax(lz - 1, 0));
662 idx2(pab_prep[0], jco_l, ico_l) += l1 * l2 * func_a;
663
664 jco_l = coset(lx, ly + 1, imax(lz - 1, 0));
665 idx2(pab_prep[0], jco_l, ico_l) -= 2.0 * zet * l2 * func_a;
666
667 jco_l = coset(lx, imax(ly - 1, 0), lz + 1);
668 idx2(pab_prep[0], jco_l, ico_l) -= 2.0 * zet * l1 * func_a;
669
670 jco_l = coset(lx, ly + 1, lz + 1);
671 idx2(pab_prep[0], jco_l, ico_l) += 4.0 * zet * zet * func_a;
672 } break;
673 case 'Z': {
674 const int l1 = lz;
675 const int l2 = lx;
676 jco_l = coset(imax(lx - 1, 0), ly, imax(lz - 1, 0));
677 idx2(pab_prep[0], jco_l, ico_l) += l1 * l2 * func_a;
678
679 jco_l = coset(imax(lx - 1, 0), ly, lz + 1);
680 idx2(pab_prep[0], jco_l, ico_l) -= 2.0 * zet * l2 * func_a;
681
682 jco_l = coset(lx + 1, ly, imax(lz - 1, 0));
683 idx2(pab_prep[0], jco_l, ico_l) -= 2.0 * zet * l1 * func_a;
684
685 jco_l = coset(lx + 1, ly, lz + 1);
686 idx2(pab_prep[0], jco_l, ico_l) += 4.0 * zet * zet * func_a;
687 } break;
688 default:
689 break;
690 }
691}
692// *****************************************************************************
693static void grid_prepare_pab_DiDj(struct pab_computation_struct_ *const tp) {
694 // create a new pab_local so that mapping pab_local with pgf_a pgf_b
695 // is equivalent to mapping pab with
696 // d_{ider1} pgf_a d_{ider1} pgf_b
697
698 for (int lxa = 0; lxa <= tp->lmax[0]; lxa++) {
699 for (int lxb = 0; lxb <= tp->lmax[1]; lxb++) {
700 for (int lya = 0; lya <= tp->lmax[0] - lxa; lya++) {
701 for (int lyb = 0; lyb <= tp->lmax[1] - lxb; lyb++) {
702 for (int lza = imax(tp->lmin[0] - lxa - lya, 0);
703 lza <= tp->lmax[0] - lxa - lya; lza++) {
704 for (int lzb = imax(tp->lmin[1] - lxb - lyb, 0);
705 lzb <= tp->lmax[1] - lxb - lyb; lzb++) {
706 const int ico = tp->offset[0] + coset(lxa, lya, lza);
707 const int jco = tp->offset[1] + coset(lxb, lyb, lzb);
708 const double pab = idx2(tp->pab[0], jco, ico);
709
710 int ico_l;
711 double func_a;
712
713 // this element of pab results in 12 elements of pab_local
714
715 if ((tp->dir1 == 'X' && tp->dir2 == 'Y') ||
716 (tp->dir1 == 'Y' && tp->dir2 == 'X')) {
717 // xy
718 ico_l = coset(imax(lxa - 1, 0), imax(lya - 1, 0), lza);
719 func_a = lxa * lya * pab;
720 oneterm_dijdij('X', func_a, ico_l, lxb, lyb, lzb, tp->zeta[1],
721 tp->pab_prep);
722
723 ico_l = coset(lxa + 1, imax(lya - 1, 0), lza);
724 func_a = -2.0 * tp->zeta[0] * lya * pab;
725 oneterm_dijdij('X', func_a, ico_l, lxb, lyb, lzb, tp->zeta[1],
726 tp->pab_prep);
727
728 ico_l = coset(imax(lxa - 1, 0), lya + 1, lza);
729 func_a = -2.0 * tp->zeta[0] * lxa * pab;
730 oneterm_dijdij('X', func_a, ico_l, lxb, lyb, lzb, tp->zeta[1],
731 tp->pab_prep);
732
733 ico_l = coset(lxa + 1, lya + 1, lza);
734 func_a = 4.0 * tp->zeta[0] * tp->zeta[0] * pab;
735 oneterm_dijdij('X', func_a, ico_l, lxb, lyb, lzb, tp->zeta[1],
736 tp->pab_prep);
737 } else if ((tp->dir1 == 'Y' && tp->dir2 == 'Z') ||
738 (tp->dir1 == 'Z' && tp->dir2 == 'Y')) {
739 // yz
740 ico_l = coset(lxa, imax(lya - 1, 0), imax(lza - 1, 0));
741 func_a = lya * lza * pab;
742 oneterm_dijdij('Y', func_a, ico_l, lxb, lyb, lzb, tp->zeta[1],
743 tp->pab_prep);
744
745 ico_l = coset(lxa, lya + 1, imax(lza - 1, 0));
746 func_a = -2.0 * tp->zeta[0] * lza * pab;
747 oneterm_dijdij('Y', func_a, ico_l, lxb, lyb, lzb, tp->zeta[1],
748 tp->pab_prep);
749
750 ico_l = coset(lxa, imax(lya - 1, 0), lza + 1);
751 func_a = -2.0 * tp->zeta[0] * lya * pab;
752 oneterm_dijdij('Y', func_a, ico_l, lxb, lyb, lzb, tp->zeta[1],
753 tp->pab_prep);
754
755 ico_l = coset(lxa, lya + 1, lza + 1);
756 func_a = 4.0 * tp->zeta[0] * tp->zeta[0] * pab;
757 oneterm_dijdij(2, func_a, ico_l, lxb, lyb, lzb, tp->zeta[1],
758 tp->pab_prep);
759 } else if ((tp->dir1 == 'Z' && tp->dir2 == 'X') ||
760 (tp->dir1 == 'X' && tp->dir2 == 'Z')) {
761 // zx
762 ico_l = coset(imax(lxa - 1, 0), lya, imax(lza - 1, 0));
763 func_a = lza * lxa * pab;
764 oneterm_dijdij('Z', func_a, ico_l, lxb, lyb, lzb, tp->zeta[1],
765 tp->pab_prep);
766
767 ico_l = coset(imax(lxa - 1, 0), lya, lza + 1);
768 func_a = -2.0 * tp->zeta[0] * lxa * pab;
769 oneterm_dijdij('Z', func_a, ico_l, lxb, lyb, lzb, tp->zeta[1],
770 tp->pab_prep);
771
772 ico_l = coset(lxa + 1, lya, imax(lza - 1, 0));
773 func_a = -2.0 * tp->zeta[0] * lza * pab;
774 oneterm_dijdij('Z', func_a, ico_l, lxb, lyb, lzb, tp->zeta[1],
775 tp->pab_prep);
776
777 ico_l = coset(lxa + 1, lya, lza + 1);
778 func_a = 4.0 * tp->zeta[0] * tp->zeta[0] * pab;
779 oneterm_dijdij('Z', func_a, ico_l, lxb, lyb, lzb, tp->zeta[1],
780 tp->pab_prep);
781 }
782 }
783 }
784 }
785 }
786 }
787 }
788}
789
790// *****************************************************************************
791static void oneterm_diidii(const int idir, const double func_a, const int ico_l,
792 const int lx, const int ly, const int lz,
793 const double zet, tensor *const pab_prep) {
794 int jco_l;
795
796 switch (idir) {
797 case 'X': {
798 const int l1 = lx;
799 jco_l = coset(imax(lx - 2, 0), ly, lz);
800 idx2(pab_prep[0], jco_l, ico_l) += l1 * (l1 - 1) * func_a;
801
802 jco_l = coset(lx, ly, lz);
803 idx2(pab_prep[0], jco_l, ico_l) -= 2.0 * zet * (2 * l1 + 1) * func_a;
804
805 jco_l = coset(lx + 2, ly, lz);
806 idx2(pab_prep[0], jco_l, ico_l) += 4.0 * zet * zet * func_a;
807 } break;
808 case 'Y': {
809 const int l1 = ly;
810 jco_l = coset(lx, imax(ly - 2, 0), lz);
811 idx2(pab_prep[0], jco_l, ico_l) += l1 * (l1 - 1) * func_a;
812
813 jco_l = coset(lx, ly, lz);
814 idx2(pab_prep[0], jco_l, ico_l) -= 2.0 * zet * (2 * l1 + 1) * func_a;
815
816 jco_l = coset(lx, ly + 2, lz);
817 idx2(pab_prep[0], jco_l, ico_l) += 4.0 * zet * zet * func_a;
818 } break;
819 case 'Z': {
820 const int l1 = lz;
821 jco_l = coset(lx, ly, imax(lz - 2, 0));
822 idx2(pab_prep[0], jco_l, ico_l) += l1 * (l1 - 1) * func_a;
823
824 jco_l = coset(lx, ly, lz);
825 idx2(pab_prep[0], jco_l, ico_l) -= 2.0 * zet * (2 * l1 + 1) * func_a;
826
827 jco_l = coset(lx, ly, lz + 2);
828 idx2(pab_prep[0], jco_l, ico_l) += 4.0 * zet * zet * func_a;
829 } break;
830 default:
831 printf("Wrong value for ider: should be 1, 2, or 3\n");
832 abort();
833 break;
834 }
835}
836
837// *****************************************************************************
838static void grid_prepare_pab_Di2(struct pab_computation_struct_ *const tp) {
839 // create a new pab_local so that mapping pab_local with pgf_a pgf_b
840 // is equivalent to mapping pab with
841 // dd_{ider1} pgf_a dd_{ider1} pgf_b
842
843 for (int lxa = 0; lxa <= tp->lmax[0]; lxa++) {
844 for (int lxb = 0; lxb <= tp->lmax[1]; lxb++) {
845 for (int lya = 0; lya <= tp->lmax[0] - lxa; lya++) {
846 for (int lyb = 0; lyb <= tp->lmax[1] - lxb; lyb++) {
847 for (int lza = imax(tp->lmin[0] - lxa - lya, 0);
848 lza <= tp->lmax[0] - lxa - lya; lza++) {
849 for (int lzb = imax(tp->lmin[1] - lxb - lyb, 0);
850 lzb <= tp->lmax[1] - lxb - lyb; lzb++) {
851 const int ico = tp->offset[0] + coset(lxa, lya, lza);
852 const int jco = tp->offset[1] + coset(lxb, lyb, lzb);
853 const double pab = idx2(tp->pab[0], jco, ico);
854
855 int ico_l;
856 double func_a;
857
858 // this element of pab results in 9 elements of pab_local
859 switch (tp->dir1) {
860 case 'X': {
861 // x
862 ico_l = coset(imax(lxa - 2, 0), lya, lza);
863 func_a = lxa * (lxa - 1) * pab;
864 oneterm_diidii('X', func_a, ico_l, lxb, lyb, lzb, tp->zeta[1],
865 tp->pab_prep);
866
867 ico_l = coset(lxa, lya, lza);
868 func_a = -2.0 * tp->zeta[0] * (2 * lxa + 1) * pab;
869 oneterm_diidii('X', func_a, ico_l, lxb, lyb, lzb, tp->zeta[1],
870 tp->pab_prep);
871
872 ico_l = coset(lxa + 2, lya, lza);
873 func_a = 4.0 * tp->zeta[0] * tp->zeta[0] * pab;
874 oneterm_diidii('X', func_a, ico_l, lxb, lyb, lzb, tp->zeta[1],
875 tp->pab_prep);
876 } break;
877 case 'Y': {
878 // y
879 ico_l = coset(lxa, imax(lya - 2, 0), lza);
880 func_a = lya * (lya - 1) * pab;
881 oneterm_diidii('Y', func_a, ico_l, lxb, lyb, lzb, tp->zeta[1],
882 tp->pab_prep);
883
884 ico_l = coset(lxa, lya, lza);
885 func_a = -2.0 * tp->zeta[0] * (2 * lya + 1) * pab;
886 oneterm_diidii('Y', func_a, ico_l, lxb, lyb, lzb, tp->zeta[1],
887 tp->pab_prep);
888
889 ico_l = coset(lxa, lya + 2, lza);
890 func_a = 4.0 * tp->zeta[0] * tp->zeta[0] * pab;
891 oneterm_diidii('Y', func_a, ico_l, lxb, lyb, lzb, tp->zeta[1],
892 tp->pab_prep);
893 } break;
894 case 'Z': {
895 // z
896 ico_l = coset(lxa, lya, imax(lza - 2, 0));
897 func_a = lza * (lza - 1) * pab;
898 oneterm_diidii('Z', func_a, ico_l, lxb, lyb, lzb, tp->zeta[1],
899 tp->pab_prep);
900
901 ico_l = coset(lxa, lya, lza);
902 func_a = -2.0 * tp->zeta[0] * (2 * lza + 1) * pab;
903 oneterm_diidii('Z', func_a, ico_l, lxb, lyb, lzb, tp->zeta[1],
904 tp->pab_prep);
905
906 ico_l = coset(lxa, lya, lza + 2);
907 func_a = 4.0 * tp->zeta[0] * tp->zeta[0] * pab;
908 oneterm_diidii('Z', func_a, ico_l, lxb, lyb, lzb, tp->zeta[1],
909 tp->pab_prep);
910 } break;
911 default:
912 printf("Wrong value for ider: should be 'X', 'Y' or 'Z'.\n");
913 abort();
914 break;
915 }
916 }
917 }
918 }
919 }
920 }
921 }
922}
923
924// *****************************************************************************
926 int *const lmin_diff, int *const lmax_diff) {
927 switch (func) {
928 case GRID_FUNC_AB:
929 lmax_diff[0] = 0;
930 lmin_diff[0] = 0;
931 lmax_diff[1] = 0;
932 lmin_diff[1] = 0;
933 break;
934 case GRID_FUNC_DADB:
941 lmax_diff[0] = 1;
942 lmin_diff[0] = -1;
943 lmax_diff[1] = 1;
944 lmin_diff[1] = -1;
945 break;
955 lmax_diff[0] = 1; // TODO: mistake???, then we could merge la and lb.
956 lmin_diff[0] = -1;
957 lmax_diff[1] = 2;
958 lmin_diff[1] = -1;
959 break;
960 case GRID_FUNC_DX:
961 case GRID_FUNC_DY:
962 case GRID_FUNC_DZ:
963 lmax_diff[0] = 1;
964 lmin_diff[0] = -1;
965 lmax_diff[1] = 1;
966 lmin_diff[1] = -1;
967 break;
968 case GRID_FUNC_DXDY:
969 case GRID_FUNC_DYDZ:
970 case GRID_FUNC_DZDX:
971 case GRID_FUNC_DXDX:
972 case GRID_FUNC_DYDY:
973 case GRID_FUNC_DZDZ:
974 lmax_diff[0] = 2;
975 lmin_diff[0] = -2;
976 lmax_diff[1] = 2;
977 lmin_diff[1] = -2;
978 break;
979 default:
980 printf("Unkown ga-gb function");
981 abort();
982 }
983}
984
985// *****************************************************************************
986void grid_prepare_pab_dgemm(const enum grid_func func, const int *const offset,
987 const int *const lmin, const int *const lmax,
988 const double *const zeta, tensor *const pab,
989 tensor *const pab_prep) {
990 struct pab_computation_struct_ tmp;
991
992 tmp.offset[0] = offset[0];
993 tmp.offset[1] = offset[1];
994
995 tmp.lmin[0] = lmin[0];
996 tmp.lmin[1] = lmin[1];
997
998 tmp.lmax[0] = lmax[0];
999 tmp.lmax[1] = lmax[1];
1000
1001 tmp.pab = pab;
1002 tmp.pab_prep = pab_prep;
1003
1004 tmp.zeta[0] = zeta[0];
1005 tmp.zeta[1] = zeta[1];
1006 memset(pab_prep->data, 0, pab_prep->alloc_size_ * sizeof(double));
1007
1008 switch (func) {
1009 case GRID_FUNC_AB:
1010 grid_prepare_pab_AB(&tmp);
1011 break;
1012 case GRID_FUNC_DADB:
1014 break;
1016 tmp.dir1 = 'X';
1018 break;
1020 tmp.dir1 = 'Y';
1022 break;
1024 tmp.dir1 = 'Z';
1026 break;
1028 tmp.dir1 = 'X';
1029 tmp.dir2 = 'X';
1031 break;
1033 tmp.dir1 = 'X';
1034 tmp.dir2 = 'Y';
1036 break;
1038 tmp.dir1 = 'X';
1039 tmp.dir2 = 'Z';
1041 break;
1043 tmp.dir1 = 'Y';
1044 tmp.dir2 = 'X';
1046 break;
1048 tmp.dir1 = 'Y';
1049 tmp.dir2 = 'Y';
1051 break;
1053 tmp.dir1 = 'Y';
1054 tmp.dir2 = 'Z';
1056 break;
1058 tmp.dir1 = 'Z';
1059 tmp.dir2 = 'X';
1061 break;
1063 tmp.dir1 = 'Z';
1064 tmp.dir2 = 'Y';
1066 break;
1068 tmp.dir1 = 'Z';
1069 tmp.dir2 = 'Z';
1071 break;
1073 tmp.dir1 = 'X';
1075 break;
1077 tmp.dir1 = 'Y';
1079 break;
1081 tmp.dir1 = 'Z';
1083 break;
1084 case GRID_FUNC_DX:
1085 tmp.dir1 = 'X';
1086 grid_prepare_pab_Di(&tmp);
1087 break;
1088 case GRID_FUNC_DY:
1089 tmp.dir1 = 'Y';
1090 grid_prepare_pab_Di(&tmp);
1091 break;
1092 case GRID_FUNC_DZ:
1093 tmp.dir1 = 'Z';
1094 grid_prepare_pab_Di(&tmp);
1095 break;
1096 case GRID_FUNC_DXDY:
1097 tmp.dir1 = 'X';
1098 tmp.dir2 = 'Y';
1100 break;
1101 case GRID_FUNC_DYDZ:
1102 tmp.dir1 = 'Y';
1103 tmp.dir2 = 'Z';
1105 break;
1106 case GRID_FUNC_DZDX:
1107 tmp.dir1 = 'Z';
1108 tmp.dir2 = 'X';
1110 break;
1111 case GRID_FUNC_DXDX:
1112 tmp.dir1 = 'X';
1114 break;
1115 case GRID_FUNC_DYDY:
1116 tmp.dir1 = 'Y';
1118 break;
1119 case GRID_FUNC_DZDZ:
1120 tmp.dir1 = 'Z';
1122 break;
1123 default:
1124 printf("Unkown ga-gb function");
1125 abort();
1126 }
1127}
static int imax(int x, int y)
Returns the larger of two given integer (missing from the C standard)
grid_func
@ GRID_FUNC_ADBmDAB_X
@ GRID_FUNC_DZ
@ GRID_FUNC_DABpADB_X
@ GRID_FUNC_DY
@ GRID_FUNC_ARDBmDARB_YY
@ GRID_FUNC_DX
@ GRID_FUNC_ARDBmDARB_XY
@ GRID_FUNC_ARDBmDARB_ZY
@ GRID_FUNC_DYDY
@ GRID_FUNC_ARDBmDARB_XZ
@ GRID_FUNC_DZDX
@ GRID_FUNC_ARDBmDARB_YZ
@ GRID_FUNC_AB
@ GRID_FUNC_DYDZ
@ GRID_FUNC_ADBmDAB_Z
@ GRID_FUNC_DXDY
@ GRID_FUNC_DXDX
@ GRID_FUNC_ADBmDAB_Y
@ GRID_FUNC_DABpADB_Z
@ GRID_FUNC_ARDBmDARB_ZX
@ GRID_FUNC_DZDZ
@ GRID_FUNC_ARDBmDARB_YX
@ GRID_FUNC_ARDBmDARB_ZZ
@ GRID_FUNC_ARDBmDARB_XX
@ GRID_FUNC_DABpADB_Y
@ GRID_FUNC_DADB
static void grid_prepare_pab_Di2(struct pab_computation_struct_ *const tp)
static void grid_prepare_pab_ARDBmDARB(struct pab_computation_struct_ *const tp)
static void grid_prepare_pab_DABpADB(struct pab_computation_struct_ *const tp)
static void grid_prepare_pab_DiDj(struct pab_computation_struct_ *const tp)
static void oneterm_dijdij(const int idir, const double func_a, const int ico_l, const int lx, const int ly, const int lz, const double zet, tensor *const pab_prep)
void grid_prepare_pab_dgemm(const enum grid_func func, const int *const offset, const int *const lmin, const int *const lmax, const double *const zeta, tensor *const pab, tensor *const pab_prep)
static void grid_prepare_pab_DADB(struct pab_computation_struct_ *const tp)
static void grid_prepare_pab_AB(struct pab_computation_struct_ *const tp)
static void grid_prepare_pab_ADBmDAB(struct pab_computation_struct_ *const tp)
void grid_prepare_get_ldiffs_dgemm(const enum grid_func func, int *const lmin_diff, int *const lmax_diff)
static void oneterm_diidii(const int idir, const double func_a, const int ico_l, const int lx, const int ly, const int lz, const double zet, tensor *const pab_prep)
static void grid_prepare_pab_Di(struct pab_computation_struct_ *const tp)
#define idx2(a, i, j)