(git:374b731)
Loading...
Searching...
No Matches
grid_dgemm_utils.c
Go to the documentation of this file.
1/*----------------------------------------------------------------------------*/
2/* CP2K: A general program to perform molecular dynamics simulations */
3/* Copyright 2000-2024 CP2K developers group <https://cp2k.org> */
4/* */
5/* SPDX-License-Identifier: BSD-3-Clause */
6/*----------------------------------------------------------------------------*/
7
8#include <assert.h>
9#include <limits.h>
10#include <math.h>
11#include <stdbool.h>
12#include <stdio.h>
13#include <string.h>
14
15#ifdef __LIBXSMM
16#include <libxsmm.h>
17#endif
18
19#ifdef __MKL
20#include <mkl.h>
21#endif
22
23#include "../common/grid_common.h"
25#include "grid_dgemm_utils.h"
26
27void convert_to_lattice_coordinates(const double dh_inv_[3][3],
28 const double *restrict const rp,
29 double *restrict rp_c) {
30 rp_c[0] =
31 dh_inv_[0][0] * rp[0] + dh_inv_[1][0] * rp[1] + dh_inv_[0][0] * rp[2];
32 rp_c[1] =
33 dh_inv_[0][1] * rp[0] + dh_inv_[1][1] * rp[1] + dh_inv_[1][1] * rp[2];
34 rp_c[2] =
35 dh_inv_[0][2] * rp[0] + dh_inv_[1][2] * rp[1] + dh_inv_[2][2] * rp[2];
36}
37
38/* DO NOT CHANGE THIS. */
39
41 if (m == NULL)
42 abort();
43
44#if defined(__LIBXSMM)
45#if LIBXSMM_VERSION2(1, 17) >= \
46 LIBXSMM_VERSION2(LIBXSMM_VERSION_MAJOR, LIBXSMM_VERSION_MINOR) && \
47 (2079 > LIBXSMM_VERSION_PATCH)
48 if (m->use_libxsmm && m->op2 == 'N') {
49 /* we are in row major but xsmm is in column major */
50 m->prefetch = LIBXSMM_PREFETCH_AUTO;
51 /* in the future, more flags can be or'd together (like NONE | TRANS_B,
52 * etc.)*/
53 m->flags =
54 (m->op1 != 'T' ? LIBXSMM_GEMM_FLAG_NONE : LIBXSMM_GEMM_FLAG_TRANS_B);
55
56 if (m->kernel == NULL) {
57 m->kernel =
58 libxsmm_dmmdispatch(m->n, m->m, m->k, &m->ldb, &m->lda, &m->ldc,
59 &m->alpha, &m->beta, &m->flags, &m->prefetch);
60 }
61
62 if (m->kernel) {
63 m->kernel(m->b, m->a, m->c, m->b, m->a, m->c);
64 return;
65 }
66 }
67#endif
68#endif
69
70#if defined(__MKL)
71 // fall back to mkl
72 if ((m->op1 == 'N') && (m->op2 == 'N'))
73 cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, m->m, m->n, m->k,
74 m->alpha, m->a, m->lda, m->b, m->ldb, m->beta, m->c, m->ldc);
75
76 if ((m->op1 == 'T') && (m->op2 == 'N'))
77 cblas_dgemm(CblasRowMajor, CblasTrans, CblasNoTrans, m->m, m->n, m->k,
78 m->alpha, m->a, m->lda, m->b, m->ldb, m->beta, m->c, m->ldc);
79
80 if ((m->op1 == 'N') && (m->op2 == 'T'))
81 cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasTrans, m->m, m->n, m->k,
82 m->alpha, m->a, m->lda, m->b, m->ldb, m->beta, m->c, m->ldc);
83
84 if ((m->op1 == 'T') && (m->op2 == 'T'))
85 cblas_dgemm(CblasRowMajor, CblasTrans, CblasTrans, m->m, m->n, m->k,
86 m->alpha, m->a, m->lda, m->b, m->ldb, m->beta, m->c, m->ldc);
87
88#else
89
90 if ((m->op1 == 'N') && (m->op2 == 'N'))
91 dgemm_("N", "N", &m->n, &m->m, &m->k, &m->alpha, m->b, &m->ldb, m->a,
92 &m->lda, &m->beta, m->c, &m->ldc);
93
94 if ((m->op1 == 'T') && (m->op2 == 'N'))
95 dgemm_("N", "T", &m->n, &m->m, &m->k, &m->alpha, m->b, &m->ldb, m->a,
96 &m->lda, &m->beta, m->c, &m->ldc);
97
98 if ((m->op1 == 'T') && (m->op2 == 'T'))
99 dgemm_("T", "T", &m->n, &m->m, &m->k, &m->alpha, m->b, &m->ldb, m->a,
100 &m->lda, &m->beta, m->c, &m->ldc);
101
102 if ((m->op1 == 'N') && (m->op2 == 'T'))
103 dgemm_("T", "N", &m->n, &m->m, &m->k, &m->alpha, m->b, &m->ldb, m->a,
104 &m->lda, &m->beta, m->c, &m->ldc);
105
106#endif
107}
108
109void batched_dgemm_simplified(dgemm_params *const m, const int batch_size) {
110 assert(m != NULL);
111 assert(batch_size > 0);
112
113#if defined(__LIBXSMM)
114#if LIBXSMM_VERSION2(1, 17) >= \
115 LIBXSMM_VERSION2(LIBXSMM_VERSION_MAJOR, LIBXSMM_VERSION_MINOR) && \
116 (2079 > LIBXSMM_VERSION_PATCH)
117 if (m->use_libxsmm && m->op2 == 'N') {
118 /* we are in row major but xsmm is in column major */
119 m->prefetch = LIBXSMM_PREFETCH_AUTO;
120 /* in the future, more flags can be or'd together (like NONE | TRANS_B,
121 * etc.)*/
122 m->flags =
123 (m->op1 != 'T' ? LIBXSMM_GEMM_FLAG_NONE : LIBXSMM_GEMM_FLAG_TRANS_B);
124
125 if (m->kernel == NULL) {
126 m->kernel =
127 libxsmm_dmmdispatch(m->n, m->m, m->k, &m->ldb, &m->lda, &m->ldc,
128 &m->alpha, &m->beta, &m->flags, &m->prefetch);
129 }
130
131 if (m->kernel) {
132 for (int s = 0; s < batch_size - 1; s++) {
133 m->kernel(m[s].b, m[s].a, m[s].c, m[s + 1].b, m[s + 1].a, m[s + 1].c);
134 }
135 m->kernel(m[batch_size - 1].b, m[batch_size - 1].a, m[batch_size - 1].c,
136 m[batch_size - 1].b, m[batch_size - 1].a, m[batch_size - 1].c);
137 return;
138 }
139 }
140#endif
141#endif
142
143#if defined(__MKL)
144 // fall back to mkl
145 for (int s = 0; s < batch_size; s++) {
146 if ((m[s].op1 == 'N') && (m[s].op2 == 'N'))
147 cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, m[s].m, m[s].n,
148 m[s].k, m[s].alpha, m[s].a, m[s].lda, m[s].b, m[s].ldb,
149 m[s].beta, m[s].c, m[s].ldc);
150
151 if ((m[s].op1 == 'T') && (m[s].op2 == 'N'))
152 cblas_dgemm(CblasRowMajor, CblasTrans, CblasNoTrans, m[s].m, m[s].n,
153 m[s].k, m[s].alpha, m[s].a, m[s].lda, m[s].b, m[s].ldb,
154 m[s].beta, m[s].c, m[s].ldc);
155
156 if ((m[s].op1 == 'N') && (m[s].op2 == 'T'))
157 cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasTrans, m[s].m, m[s].n,
158 m[s].k, m[s].alpha, m[s].a, m[s].lda, m[s].b, m[s].ldb,
159 m[s].beta, m[s].c, m[s].ldc);
160
161 if ((m[s].op1 == 'T') && (m[s].op2 == 'T'))
162 cblas_dgemm(CblasRowMajor, CblasTrans, CblasTrans, m[s].m, m[s].n, m[s].k,
163 m[s].alpha, m[s].a, m[s].lda, m[s].b, m[s].ldb, m[s].beta,
164 m[s].c, m[s].ldc);
165 }
166#else
167 for (int s = 0; s < batch_size; s++) {
168 if ((m[s].op1 == 'N') && (m[s].op2 == 'N'))
169 dgemm_("N", "N", &m[s].n, &m[s].m, &m[s].k, &m[s].alpha, m[s].b,
170 &m[s].ldb, m[s].a, &m[s].lda, &m[s].beta, m[s].c, &m[s].ldc);
171
172 if ((m[s].op1 == 'T') && (m[s].op2 == 'N'))
173 dgemm_("N", "T", &m[s].n, &m[s].m, &m[s].k, &m[s].alpha, m[s].b,
174 &m[s].ldb, m[s].a, &m[s].lda, &m[s].beta, m[s].c, &m[s].ldc);
175
176 if ((m[s].op1 == 'T') && (m[s].op2 == 'T'))
177 dgemm_("T", "T", &m[s].n, &m[s].m, &m[s].k, &m[s].alpha, m[s].b,
178 &m[s].ldb, m[s].a, &m[s].lda, &m[s].beta, m[s].c, &m[s].ldc);
179
180 if ((m[s].op1 == 'N') && (m[s].op2 == 'T'))
181 dgemm_("T", "N", &m[s].n, &m[s].m, &m[s].k, &m[s].alpha, m[s].b,
182 &m[s].ldb, m[s].a, &m[s].lda, &m[s].beta, m[s].c, &m[s].ldc);
183 }
184#endif
185}
186
187void extract_sub_grid(const int *lower_corner, const int *upper_corner,
188 const int *position, const tensor *const grid,
189 tensor *const subgrid) {
190 int position1[3] = {0, 0, 0};
191
192 if (position) {
193 position1[0] = position[0];
194 position1[1] = position[1];
195 position1[2] = position[2];
196 }
197
198 const int sizex = upper_corner[2] - lower_corner[2];
199 const int sizey = upper_corner[1] - lower_corner[1];
200 const int sizez = upper_corner[0] - lower_corner[0];
201
202 for (int z = 0; z < sizez; z++) {
203 /* maybe use matcopy from libxsmm if possible */
204 for (int y = 0; y < sizey; y++) {
205 double *restrict src =
206 &idx3(grid[0], lower_corner[0] + z - grid->window_shift[0],
207 lower_corner[1] + y - grid->window_shift[1],
208 lower_corner[2] - grid->window_shift[2]);
209 double *restrict dst =
210 &idx3(subgrid[0], position1[0] + z, position1[1] + y, position1[2]);
211#ifdef __LIBXSMM
212 LIBXSMM_PRAGMA_SIMD
213#else
214 // #pragma omp simd linear(dst, src) simdlen(8)
215 GRID_PRAGMA_SIMD((dst, src), 8)
216#endif
217 for (int x = 0; x < sizex; x++) {
218 dst[x] = src[x];
219 }
220 }
221 }
222
223 return;
224}
225
226void add_sub_grid(const int *lower_corner, const int *upper_corner,
227 const int *position, const tensor *subgrid, tensor *grid) {
228 int position1[3] = {0, 0, 0};
229
230 if (position) {
231 position1[0] = position[0];
232 position1[1] = position[1];
233 position1[2] = position[2];
234 }
235
236 const int sizex = upper_corner[2] - lower_corner[2];
237 const int sizey = upper_corner[1] - lower_corner[1];
238 const int sizez = upper_corner[0] - lower_corner[0];
239
240 for (int z = 0; z < sizez; z++) {
241 double *restrict dst =
242 &idx3(grid[0], lower_corner[0] + z, lower_corner[1], lower_corner[2]);
243 double *restrict src =
244 &idx3(subgrid[0], position1[0] + z, position1[1], position1[2]);
245 for (int y = 0; y < sizey - 1; y++) {
246 //__builtin_prefetch(dst + grid->ld_);
247#ifdef __LIBXSMM
248 LIBXSMM_PRAGMA_SIMD
249#else
250 // #pragma omp simd linear(dst, src) simdlen(8)
251 GRID_PRAGMA_SIMD((dst, src), 8)
252#endif
253 for (int x = 0; x < sizex; x++) {
254 dst[x] += src[x];
255 }
256
257 dst += grid->ld_;
258 src += subgrid->ld_;
259 }
260
261 // #pragma omp simd linear(dst, src) simdlen(8)
262 GRID_PRAGMA_SIMD((dst, src), 8)
263 for (int x = 0; x < sizex; x++) {
264 dst[x] += src[x];
265 }
266 }
267 return;
268}
269
270int compute_cube_properties(const bool ortho, const double radius,
271 const double dh[3][3], const double dh_inv[3][3],
272 const double *rp, double *disr_radius,
273 double *roffset, int *cubecenter, int *lb_cube,
274 int *ub_cube, int *cube_size) {
275 int cmax = 0;
276
277 /* center of the gaussian in the lattice coordinates */
278 double rp1[3];
279
280 /* it is in the lattice vector frame */
281 for (int i = 0; i < 3; i++) {
282 double dh_inv_rp = 0.0;
283 for (int j = 0; j < 3; j++) {
284 dh_inv_rp += dh_inv[j][i] * rp[j];
285 }
286 rp1[2 - i] = dh_inv_rp;
287 cubecenter[2 - i] = floor(dh_inv_rp);
288 }
289
290 if (ortho) {
291 /* seting up the cube parameters */
292 const double dx[3] = {dh[2][2], dh[1][1], dh[0][0]};
293 const double dx_inv[3] = {dh_inv[2][2], dh_inv[1][1], dh_inv[0][0]};
294 /* cube center */
295
296 /* lower and upper bounds */
297
298 // Historically, the radius gets discretized.
299 const double drmin = fmin(dh[0][0], fmin(dh[1][1], dh[2][2]));
300 *disr_radius = drmin * fmax(1.0, ceil(radius / drmin));
301
302 for (int i = 0; i < 3; i++) {
303 roffset[i] = rp[2 - i] - ((double)cubecenter[i]) * dx[i];
304 }
305
306 for (int i = 0; i < 3; i++) {
307 lb_cube[i] = ceil(-1e-8 - *disr_radius * dx_inv[i]);
308 }
309
310 // Symmetric interval
311 for (int i = 0; i < 3; i++) {
312 ub_cube[i] = 1 - lb_cube[i];
313 }
314
315 } else {
316 for (int idir = 0; idir < 3; idir++) {
317 lb_cube[idir] = INT_MAX;
318 ub_cube[idir] = INT_MIN;
319 }
320
321 /* compute the size of the box. It is a fairly trivial way to compute
322 * the box and it may have far more point than needed */
323 for (int i = -1; i <= 1; i++) {
324 for (int j = -1; j <= 1; j++) {
325 for (int k = -1; k <= 1; k++) {
326 double x[3] = {/* rp[0] + */ ((double)i) * radius,
327 /* rp[1] + */ ((double)j) * radius,
328 /* rp[2] + */ ((double)k) * radius};
329 /* convert_to_lattice_coordinates(dh_inv, x, y); */
330 for (int idir = 0; idir < 3; idir++) {
331 const double resc = dh_inv[0][idir] * x[0] +
332 dh_inv[1][idir] * x[1] + dh_inv[2][idir] * x[2];
333 lb_cube[2 - idir] = imin(lb_cube[2 - idir], floor(resc));
334 ub_cube[2 - idir] = imax(ub_cube[2 - idir], ceil(resc));
335 }
336 }
337 }
338 }
339
340 /* compute the offset in lattice coordinates */
341
342 for (int i = 0; i < 3; i++) {
343 roffset[i] = rp1[i] - cubecenter[i];
344 }
345
346 *disr_radius = radius;
347 }
348
349 /* compute the cube size ignoring periodicity */
350
351 /* the +1 is normal here */
352 cube_size[0] = ub_cube[0] - lb_cube[0] + 1;
353 cube_size[1] = ub_cube[1] - lb_cube[1] + 1;
354 cube_size[2] = ub_cube[2] - lb_cube[2] + 1;
355
356 for (int i = 0; i < 3; i++) {
357 cmax = imax(cmax, cube_size[i]);
358 }
359
360 return cmax;
361}
362
363void return_cube_position(const int *const lb_grid,
364 const int *const cube_center,
365 const int *const lower_boundaries_cube,
366 const int *const period, int *const position) {
367 for (int i = 0; i < 3; i++)
368 position[i] = modulo(cube_center[i] - lb_grid[i] + lower_boundaries_cube[i],
369 period[i]);
370}
371
372void verify_orthogonality(const double dh[3][3], bool orthogonal[3]) {
373 double norm1, norm2, norm3;
374
375 norm1 = dh[0][0] * dh[0][0] + dh[0][1] * dh[0][1] + dh[0][2] * dh[0][2];
376 norm2 = dh[1][0] * dh[1][0] + dh[1][1] * dh[1][1] + dh[1][2] * dh[1][2];
377 norm3 = dh[2][0] * dh[2][0] + dh[2][1] * dh[2][1] + dh[2][2] * dh[2][2];
378
379 norm1 = 1.0 / sqrt(norm1);
380 norm2 = 1.0 / sqrt(norm2);
381 norm3 = 1.0 / sqrt(norm3);
382
383 /* x z */
384 orthogonal[0] =
385 ((fabs(dh[0][0] * dh[2][0] + dh[0][1] * dh[2][1] + dh[0][2] * dh[2][2]) *
386 norm1 * norm3) < 1e-12);
387 /* y z */
388 orthogonal[1] =
389 ((fabs(dh[1][0] * dh[2][0] + dh[1][1] * dh[2][1] + dh[1][2] * dh[2][2]) *
390 norm2 * norm3) < 1e-12);
391 /* x y */
392 orthogonal[2] =
393 ((fabs(dh[0][0] * dh[1][0] + dh[0][1] * dh[1][1] + dh[0][2] * dh[1][2]) *
394 norm1 * norm2) < 1e-12);
395}
396
397#ifndef __MKL
398extern void dger_(const int *M, const int *N, const double *alpha,
399 const double *X, const int *incX, const double *Y,
400 const int *incY, double *A, const int *lda);
401extern void dgemv_(const char *Trans, const int *M, const int *N,
402 const double *alpha, const double *A, const int *lda,
403 const double *X, const int *incX, const double *beta,
404 double *Y, const int *incY);
405
406void cblas_daxpy(const int N, const double alpha, const double *X,
407 const int incX, double *Y, const int incY) {
408 if ((incX == 1) && (incY == 1)) {
409 for (int i = 0; i < N; i++)
410 Y[i] += alpha * X[i];
411 return;
412 }
413
414 if (incX == 1) {
415 for (int i = 0; i < N; i++)
416 Y[i + incY] += alpha * X[i];
417 return;
418 }
419
420 if (incY == 1) {
421 for (int i = 0; i < N; i++)
422 Y[i] += alpha * X[i + incX];
423 return;
424 }
425
426 for (int i = 0; i < N; i++)
427 Y[i + incY] += alpha * X[i + incX];
428 return;
429}
430
431double cblas_ddot(const int N, const double *X, const int incX, const double *Y,
432 const int incY) {
433 if ((incX == incY) && (incY == 1)) {
434 double res = 0.0;
435
436 for (int i = 0; i < N; i++) {
437 res += X[i] * Y[i];
438 }
439 return res;
440 }
441
442 if (incX == 1) {
443 double res = 0.0;
444
445 for (int i = 0; i < N; i++) {
446 res += X[i] * Y[i + incY];
447 }
448 return res;
449 }
450
451 if (incY == 1) {
452 double res = 0.0;
453
454 for (int i = 0; i < N; i++) {
455 res += X[i + incX] * Y[i];
456 }
457 return res;
458 }
459
460 double res = 0.0;
461
462 for (int i = 0; i < N; i++) {
463 res += X[i + incX] * Y[i + incY];
464 }
465 return res;
466}
467
468void cblas_dger(const CBLAS_LAYOUT Layout, const int M, const int N,
469 const double alpha, const double *X, const int incX,
470 const double *Y, const int incY, double *A, const int lda) {
471 if (Layout == CblasRowMajor) {
472 dger_(&N, &M, &alpha, Y, &incY, X, &incX, A, &lda);
473 } else {
474 dger_(&N, &M, &alpha, X, &incX, Y, &incY, A, &lda);
475 }
476}
477
478/* code taken from gsl_cblas. We really need to use a proper cblas interface and
479 * build system.... */
480void cblas_dgemv(const CBLAS_LAYOUT order, const CBLAS_TRANSPOSE TransA,
481 const int M, const int N, const double alpha, const double *A,
482 const int lda, const double *X, const int incX,
483 const double beta, double *Y, const int incY) {
484
485 if (order == CblasColMajor) {
486 if (TransA == CblasTrans)
487 dgemv_("T", &M, &N, &alpha, A, &lda, X, &incX, &beta, Y, &incY);
488 else {
489 dgemv_("N", &M, &N, &alpha, A, &lda, X, &incX, &beta, Y, &incY);
490 }
491 } else {
492 if (TransA == CblasTrans)
493 dgemv_("N", &N, &M, &alpha, A, &lda, X, &incX, &beta, Y, &incY);
494 else {
495 dgemv_("T", &N, &M, &alpha, A, &lda, X, &incX, &beta, Y, &incY);
496 }
497 }
498}
499#endif
500
501void compute_interval(const int *const map, const int full_size, const int size,
502 const int cube_size, const int x1, int *x,
503 int *const lower_corner, int *const upper_corner,
504 const Interval window) {
505 if (size == full_size) {
506 /* we have the full grid in that direction */
507 /* lower boundary is within the window */
508 *lower_corner = x1;
509 /* now compute the upper corner */
510 /* needs to be as large as possible. basically I take [x1..
511 * min(grid.full_size, cube_size - x)] */
512
513 *upper_corner = compute_next_boundaries(x1, *x, full_size, cube_size);
514
515 /* { */
516 /* Interval tz = create_interval(*lower_corner, *upper_corner); */
517 /* Interval res = intersection_interval(tz, window); */
518 /* *lower_corner = res.xmin; */
519 /* *upper_corner = res.xmax; */
520 /* } */
521 } else {
522 *lower_corner = x1;
523 *upper_corner = x1 + 1;
524
525 // the map is always increasing by 1 except when we cross the boundaries of
526 // the grid and pbc are applied. Since we are only interested in by a
527 // subwindow of the full table we check that the next point is inside the
528 // window of interest and is also equal to the previous point + 1. The last
529 // check is pointless in practice.
530
531 for (int i = *x + 1; (i < cube_size) && (*upper_corner == map[i]) &&
532 is_point_in_interval(map[i], window);
533 i++) {
534 (*upper_corner)++;
535 }
536 }
537}
static int imax(int x, int y)
Returns the larger of two given integer (missing from the C standard)
static int imin(int x, int y)
Returns the smaller of two given integer (missing from the C standard)
Definition dbm_miniapp.c:38
void dgemm_(const char *transa, const char *transb, const int *m, const int *n, const int *k, const double *alpha, const double *a, const int *lda, const double *b, const int *ldb, const double *beta, double *c, const int *ldc)
Prototype for BLAS dgemm.
#define GRID_PRAGMA_SIMD(OBJS, N)
Definition grid_common.h:20
static GRID_HOST_DEVICE int modulo(int a, int m)
Equivalent of Fortran's MODULO, which always return a positive number. https://gcc....
static void const int const int const int const int const int const double const int const int const int int GRID_CONST_WHEN_COLLOCATE double GRID_CONST_WHEN_INTEGRATE double * grid
static void const int const int i
static void const int cmax
static void const int const int const int const int const int const double const int map[3][2 *cmax+1]
static bool is_point_in_interval(const int value, Interval x)
static int compute_next_boundaries(const int y1, const int y, const int grid_size, const int cube_size)
#define idx3(a, i, j, k)
int compute_cube_properties(const bool ortho, const double radius, const double dh[3][3], const double dh_inv[3][3], const double *rp, double *disr_radius, double *roffset, int *cubecenter, int *lb_cube, int *ub_cube, int *cube_size)
double cblas_ddot(const int N, const double *X, const int incX, const double *Y, const int incY)
void dgemv_(const char *Trans, const int *M, const int *N, const double *alpha, const double *A, const int *lda, const double *X, const int *incX, const double *beta, double *Y, const int *incY)
void extract_sub_grid(const int *lower_corner, const int *upper_corner, const int *position, const tensor *const grid, tensor *const subgrid)
void dger_(const int *M, const int *N, const double *alpha, const double *X, const int *incX, const double *Y, const int *incY, double *A, const int *lda)
void convert_to_lattice_coordinates(const double dh_inv_[3][3], const double *restrict const rp, double *restrict rp_c)
void compute_interval(const int *const map, const int full_size, const int size, const int cube_size, const int x1, int *x, int *const lower_corner, int *const upper_corner, const Interval window)
void return_cube_position(const int *const lb_grid, const int *const cube_center, const int *const lower_boundaries_cube, const int *const period, int *const position)
void cblas_dger(const CBLAS_LAYOUT Layout, const int M, const int N, const double alpha, const double *X, const int incX, const double *Y, const int incY, double *A, const int lda)
void cblas_daxpy(const int N, const double alpha, const double *X, const int incX, double *Y, const int incY)
void verify_orthogonality(const double dh[3][3], bool orthogonal[3])
void cblas_dgemv(const CBLAS_LAYOUT order, const CBLAS_TRANSPOSE TransA, const int M, const int N, const double alpha, const double *A, const int lda, const double *X, const int incX, const double beta, double *Y, const int incY)
void dgemm_simplified(dgemm_params *const m)
void add_sub_grid(const int *lower_corner, const int *upper_corner, const int *position, const tensor *subgrid, tensor *grid)
void batched_dgemm_simplified(dgemm_params *const m, const int batch_size)
CBLAS_TRANSPOSE
@ CblasNoTrans
@ CblasTrans
CBLAS_LAYOUT
@ CblasColMajor
@ CblasRowMajor