(git:c9ed8e0)
Loading...
Searching...
No Matches
dbm_matrix.c
Go to the documentation of this file.
1/*----------------------------------------------------------------------------*/
2/* CP2K: A general program to perform molecular dynamics simulations */
3/* Copyright 2000-2025 CP2K developers group <https://cp2k.org> */
4/* */
5/* SPDX-License-Identifier: BSD-3-Clause */
6/*----------------------------------------------------------------------------*/
7
8#include <assert.h>
9#include <math.h>
10#include <omp.h>
11#include <stdbool.h>
12#include <stddef.h>
13#include <stdlib.h>
14#include <string.h>
15
16#include "dbm_hyperparams.h"
17#include "dbm_matrix.h"
18
19/*******************************************************************************
20 * \brief Creates a new matrix.
21 * \author Ole Schuett
22 ******************************************************************************/
23void dbm_create(dbm_matrix_t **matrix_out, dbm_distribution_t *dist,
24 const char name[], const int nrows, const int ncols,
25 const int row_sizes[nrows], const int col_sizes[ncols]) {
26 assert(omp_get_num_threads() == 1);
27
28 dbm_matrix_t *matrix = calloc(1, sizeof(dbm_matrix_t));
29
30 assert(dist->rows.length == nrows);
31 assert(dist->cols.length == ncols);
32 dbm_distribution_hold(dist);
33 matrix->dist = dist;
34
35 size_t size = (strlen(name) + 1) * sizeof(char);
36 matrix->name = malloc(size);
37 assert(matrix->name != NULL);
38 memcpy(matrix->name, name, size);
39
40 matrix->nrows = nrows;
41 matrix->ncols = ncols;
42
43 size = nrows * sizeof(int);
44 matrix->row_sizes = malloc(size);
45 assert(matrix->row_sizes != NULL);
46 memcpy(matrix->row_sizes, row_sizes, size);
47
48 size = ncols * sizeof(int);
49 matrix->col_sizes = malloc(size);
50 assert(matrix->col_sizes != NULL);
51 memcpy(matrix->col_sizes, col_sizes, size);
52
53 matrix->shards = malloc(dbm_get_num_shards(matrix) * sizeof(dbm_shard_t));
54 assert(matrix->shards != NULL);
55#pragma omp parallel for
56 for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
57 dbm_shard_init(&matrix->shards[ishard]);
58 }
59
60 assert(*matrix_out == NULL);
61 *matrix_out = matrix;
62}
63
64/*******************************************************************************
65 * \brief Releases a matrix and all its ressources.
66 * \author Ole Schuett
67 ******************************************************************************/
68void dbm_release(dbm_matrix_t *matrix) {
69 assert(omp_get_num_threads() == 1);
70 for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
71 dbm_shard_release(&matrix->shards[ishard]);
72 }
73 dbm_distribution_release(matrix->dist);
74 free(matrix->row_sizes);
75 free(matrix->col_sizes);
76 free(matrix->shards);
77 free(matrix->name);
78 free(matrix);
79}
80
81/*******************************************************************************
82 * \brief Copies content of matrix_b into matrix_a.
83 * Matrices must have the same row/col block sizes and distribution.
84 * \author Ole Schuett
85 ******************************************************************************/
86void dbm_copy(dbm_matrix_t *matrix_a, const dbm_matrix_t *matrix_b) {
87 assert(omp_get_num_threads() == 1);
88
89 assert(matrix_b->nrows == matrix_a->nrows);
90 for (int i = 0; i < matrix_b->nrows; i++) {
91 assert(matrix_b->row_sizes[i] == matrix_a->row_sizes[i]);
92 }
93 assert(matrix_b->ncols == matrix_a->ncols);
94 for (int i = 0; i < matrix_b->ncols; i++) {
95 assert(matrix_b->col_sizes[i] == matrix_a->col_sizes[i]);
96 }
97
98 assert(matrix_a->dist == matrix_b->dist);
99
100#pragma omp parallel for DBM_OMP_SCHEDULE
101 for (int ishard = 0; ishard < dbm_get_num_shards(matrix_a); ishard++) {
102 dbm_shard_copy(&matrix_a->shards[ishard], &matrix_b->shards[ishard]);
103 }
104}
105
106/*******************************************************************************
107 * \brief Copies content of matrix_b into matrix_a.
108 * Matrices may have different distributions.
109 * \author Ole Schuett
110 ******************************************************************************/
111void dbm_redistribute(const dbm_matrix_t *matrix, dbm_matrix_t *redist) {
112 assert(omp_get_num_threads() == 1);
113 assert(matrix->nrows == redist->nrows);
114 for (int i = 0; i < matrix->nrows; i++) {
115 assert(matrix->row_sizes[i] == redist->row_sizes[i]);
116 }
117 assert(matrix->ncols == redist->ncols);
118 for (int i = 0; i < matrix->ncols; i++) {
119 assert(matrix->col_sizes[i] == redist->col_sizes[i]);
120 }
121
122 assert(dbm_mpi_comms_are_similar(matrix->dist->comm, redist->dist->comm));
123 const dbm_mpi_comm_t comm = redist->dist->comm;
124 const int nranks = dbm_mpi_comm_size(comm);
125
126 // 1st pass: Compute send_count.
127 int send_count[nranks];
128 memset(send_count, 0, nranks * sizeof(int));
129 for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
130 dbm_shard_t *shard = &matrix->shards[ishard];
131 for (int iblock = 0; iblock < shard->nblocks; iblock++) {
132 const dbm_block_t *blk = &shard->blocks[iblock];
133 const int row_size = matrix->row_sizes[blk->row];
134 const int col_size = matrix->col_sizes[blk->col];
135 const int block_size = row_size * col_size;
136 const int rank = dbm_get_stored_coordinates(redist, blk->row, blk->col);
137 assert(0 <= rank && rank < nranks);
138 send_count[rank] += 2 + block_size;
139 }
140 }
141
142 // 1st communication: Exchange counts.
143 int recv_count[nranks];
144 dbm_mpi_alltoall_int(send_count, 1, recv_count, 1, comm);
145
146 // Compute displacements and allocate data buffers.
147 int send_displ[nranks + 1], recv_displ[nranks + 1];
148 send_displ[0] = recv_displ[0] = 0;
149 for (int irank = 1; irank <= nranks; irank++) {
150 send_displ[irank] = send_displ[irank - 1] + send_count[irank - 1];
151 recv_displ[irank] = recv_displ[irank - 1] + recv_count[irank - 1];
152 }
153 const int total_send_count = send_displ[nranks];
154 const int total_recv_count = recv_displ[nranks];
155 double *data_send = dbm_mpi_alloc_mem(total_send_count * sizeof(double));
156 double *data_recv = dbm_mpi_alloc_mem(total_recv_count * sizeof(double));
157
158 // 2nd pass: Fill send_data.
159 int send_data_positions[nranks];
160 memcpy(send_data_positions, send_displ, nranks * sizeof(int));
161 for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
162 dbm_shard_t *shard = &matrix->shards[ishard];
163 for (int iblock = 0; iblock < shard->nblocks; iblock++) {
164 const dbm_block_t *blk = &shard->blocks[iblock];
165 const double *blk_data = &shard->data[blk->offset];
166 const int row_size = matrix->row_sizes[blk->row];
167 const int col_size = matrix->col_sizes[blk->col];
168 const int block_size = row_size * col_size;
169 const int rank = dbm_get_stored_coordinates(redist, blk->row, blk->col);
170 const int pos = send_data_positions[rank];
171 data_send[pos + 0] = blk->row; // send integers as doubles
172 data_send[pos + 1] = blk->col;
173 memcpy(&data_send[pos + 2], blk_data, block_size * sizeof(double));
174 send_data_positions[rank] += 2 + block_size;
175 }
176 }
177 for (int irank = 0; irank < nranks; irank++) {
178 assert(send_data_positions[irank] == send_displ[irank + 1]);
179 }
180
181 // 2nd communication: Exchange data.
182 dbm_mpi_alltoallv_double(data_send, send_count, send_displ, data_recv,
183 recv_count, recv_displ, comm);
184 dbm_mpi_free_mem(data_send);
185
186 // 3rd pass: Unpack data.
187 dbm_clear(redist);
188 int recv_data_pos = 0;
189 while (recv_data_pos < total_recv_count) {
190 const int row = (int)data_recv[recv_data_pos + 0];
191 const int col = (int)data_recv[recv_data_pos + 1];
192 assert(data_recv[recv_data_pos + 0] - (double)row == 0.0);
193 assert(data_recv[recv_data_pos + 1] - (double)col == 0.0);
194 dbm_put_block(redist, row, col, false, &data_recv[recv_data_pos + 2]);
195 const int row_size = matrix->row_sizes[row];
196 const int col_size = matrix->col_sizes[col];
197 const int block_size = row_size * col_size;
198 recv_data_pos += 2 + block_size;
199 }
200 assert(recv_data_pos == total_recv_count);
201 dbm_mpi_free_mem(data_recv);
202}
203
204/*******************************************************************************
205 * \brief Looks up a block from given matrics. This routine is thread-safe.
206 * If the block is not found then a null pointer is returned.
207 * \author Ole Schuett
208 ******************************************************************************/
209void dbm_get_block_p(dbm_matrix_t *matrix, const int row, const int col,
210 double **block, int *row_size, int *col_size) {
211 assert(0 <= row && row < matrix->nrows);
212 assert(0 <= col && col < matrix->ncols);
213 assert(dbm_get_stored_coordinates(matrix, row, col) == matrix->dist->my_rank);
214 *row_size = matrix->row_sizes[row];
215 *col_size = matrix->col_sizes[col];
216 *block = NULL;
217
218 const int ishard = dbm_get_shard_index(matrix, row, col);
219 const dbm_shard_t *shard = &matrix->shards[ishard];
220 dbm_block_t *blk = dbm_shard_lookup(shard, row, col);
221 if (blk != NULL) {
222 *block = &shard->data[blk->offset];
223 }
224}
225
226/*******************************************************************************
227 * \brief Adds a block to given matrix. This routine is thread-safe.
228 * If block already exist then it gets overwritten (or summed).
229 * \author Ole Schuett
230 ******************************************************************************/
231void dbm_put_block(dbm_matrix_t *matrix, const int row, const int col,
232 const bool summation, const double *block) {
233 assert(0 <= row && row < matrix->nrows);
234 assert(0 <= col && col < matrix->ncols);
235 assert(dbm_get_stored_coordinates(matrix, row, col) == matrix->dist->my_rank);
236 const int row_size = matrix->row_sizes[row];
237 const int col_size = matrix->col_sizes[col];
238 const int block_size = row_size * col_size;
239
240 const int ishard = dbm_get_shard_index(matrix, row, col);
241 dbm_shard_t *shard = &matrix->shards[ishard];
242 omp_set_lock(&shard->lock);
243 dbm_block_t *blk =
244 dbm_shard_get_or_allocate_block(shard, row, col, block_size);
245 double *blk_data = &shard->data[blk->offset];
246 if (summation) {
247 for (int i = 0; i < block_size; i++) {
248 blk_data[i] += block[i];
249 }
250 } else {
251 memcpy(blk_data, block, block_size * sizeof(double));
252 }
253 omp_unset_lock(&shard->lock);
254}
255
256/*******************************************************************************
257 * \brief Remove all blocks from matrix, but does not release underlying memory.
258 * \author Ole Schuett
259 ******************************************************************************/
260void dbm_clear(dbm_matrix_t *matrix) {
261 assert(omp_get_num_threads() == 1);
262
263#pragma omp parallel for DBM_OMP_SCHEDULE
264 for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
265 dbm_shard_t *shard = &matrix->shards[ishard];
266 shard->nblocks = 0;
267 shard->data_size = 0;
268 shard->data_promised = 0;
269 // Does not deallocate memory, hence data_allocated remains unchanged.
270 memset(shard->hashtable, 0, shard->hashtable_size * sizeof(int));
271 }
272}
273
274/*******************************************************************************
275 * \brief Removes all blocks from the matrix whose norm is below the threshold.
276 * Blocks of size zero are always kept.
277 * \author Ole Schuett
278 ******************************************************************************/
279void dbm_filter(dbm_matrix_t *matrix, const double eps) {
280 assert(omp_get_num_threads() == 1);
281
282 if (eps == 0.0) {
283 return;
284 }
285 const double eps2 = eps * eps;
286
287#pragma omp parallel for DBM_OMP_SCHEDULE
288 for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
289 dbm_shard_t *shard = &matrix->shards[ishard];
290 const int old_nblocks = shard->nblocks;
291 shard->nblocks = 0;
292 shard->data_promised = 0;
293 memset(shard->hashtable, 0, shard->hashtable_size * sizeof(int));
294
295 for (int iblock = 0; iblock < old_nblocks; iblock++) {
296 const dbm_block_t old_blk = shard->blocks[iblock];
297 const double *old_blk_data = &shard->data[old_blk.offset];
298 const int row_size = matrix->row_sizes[old_blk.row];
299 const int col_size = matrix->col_sizes[old_blk.col];
300 const int block_size = row_size * col_size;
301 double norm = 0.0;
302 for (int i = 0; i < block_size; i++) {
303 const double value = old_blk_data[i];
304 norm += value * value;
305 if (eps2 <= norm) {
306 break;
307 }
308 }
309 // For historic reasons zero-sized blocks are never filtered.
310 if (block_size > 0 && norm < eps2) {
311 continue; // filter the block
312 }
313 // Re-create block.
315 shard, old_blk.row, old_blk.col, block_size);
316 assert(new_blk->offset <= old_blk.offset);
317 if (new_blk->offset != old_blk.offset) {
318 // Using memmove because it can handle overlapping buffers.
319 double *new_blk_data = &shard->data[new_blk->offset];
320 memmove(new_blk_data, old_blk_data, block_size * sizeof(double));
321 }
322 }
323 shard->data_size = shard->data_promised;
324 // TODO: Could call realloc to release excess memory.
325 }
326}
327
328/*******************************************************************************
329 * \brief Adds list of blocks efficiently. The blocks will be filled with zeros.
330 * This routine must always be called within an OpenMP parallel region.
331 * \author Ole Schuett
332 ******************************************************************************/
333void dbm_reserve_blocks(dbm_matrix_t *matrix, const int nblocks,
334 const int rows[], const int cols[]) {
335 assert(omp_get_num_threads() == omp_get_max_threads() &&
336 "Please call dbm_reserve_blocks within an OpenMP parallel region.");
337 const int my_rank = matrix->dist->my_rank;
338 for (int i = 0; i < nblocks; i++) {
339 const int ishard = dbm_get_shard_index(matrix, rows[i], cols[i]);
340 dbm_shard_t *shard = &matrix->shards[ishard];
341 omp_set_lock(&shard->lock);
342 assert(0 <= rows[i] && rows[i] < matrix->nrows);
343 assert(0 <= cols[i] && cols[i] < matrix->ncols);
344 assert(dbm_get_stored_coordinates(matrix, rows[i], cols[i]) == my_rank);
345 const int row_size = matrix->row_sizes[rows[i]];
346 const int col_size = matrix->col_sizes[cols[i]];
347 const int block_size = row_size * col_size;
348 dbm_shard_get_or_promise_block(shard, rows[i], cols[i], block_size);
349 omp_unset_lock(&shard->lock);
350 }
351#pragma omp barrier
352
353#pragma omp for DBM_OMP_SCHEDULE
354 for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
355 dbm_shard_t *shard = &matrix->shards[ishard];
357 }
358}
359
360/*******************************************************************************
361 * \brief Multiplies all entries in the given matrix by the given factor alpha.
362 * \author Ole Schuett
363 ******************************************************************************/
364void dbm_scale(dbm_matrix_t *matrix, const double alpha) {
365 assert(omp_get_num_threads() == 1);
366 if (alpha == 1.0) {
367 return;
368 }
369 if (alpha == 0.0) {
370 dbm_zero(matrix);
371 return;
372 }
373
374#pragma omp parallel for DBM_OMP_SCHEDULE
375 for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
376 dbm_shard_t *shard = &matrix->shards[ishard];
377 for (int i = 0; i < shard->data_size; i++) {
378 shard->data[i] *= alpha;
379 }
380 }
381}
382
383/*******************************************************************************
384 * \brief Sets all blocks in the given matrix to zero.
385 * \author Ole Schuett
386 ******************************************************************************/
387void dbm_zero(dbm_matrix_t *matrix) {
388 assert(omp_get_num_threads() == 1);
389
390#pragma omp parallel for DBM_OMP_SCHEDULE
391 for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
392 dbm_shard_t *shard = &matrix->shards[ishard];
393 if (shard->data != NULL) {
394 memset(shard->data, 0, shard->data_size * sizeof(double));
395 }
396 }
397}
398
399/*******************************************************************************
400 * \brief Adds matrix_b to matrix_a.
401 * \author Ole Schuett
402 ******************************************************************************/
403void dbm_add(dbm_matrix_t *matrix_a, const dbm_matrix_t *matrix_b) {
404 assert(omp_get_num_threads() == 1);
405 assert(matrix_a->dist == matrix_b->dist);
406
407#pragma omp parallel for DBM_OMP_SCHEDULE
408 for (int ishard = 0; ishard < dbm_get_num_shards(matrix_b); ishard++) {
409 dbm_shard_t *shard_a = &matrix_a->shards[ishard];
410 const dbm_shard_t *shard_b = &matrix_b->shards[ishard];
411 for (int iblock = 0; iblock < shard_b->nblocks; iblock++) {
412 const dbm_block_t blk_b = shard_b->blocks[iblock];
413
414 const int row_size = matrix_b->row_sizes[blk_b.row];
415 const int col_size = matrix_b->col_sizes[blk_b.col];
416 assert(row_size == matrix_a->row_sizes[blk_b.row]);
417 assert(col_size == matrix_a->col_sizes[blk_b.col]);
418 const int block_size = row_size * col_size;
420 shard_a, blk_b.row, blk_b.col, block_size);
421 double *data_a = &shard_a->data[blk_a->offset];
422 const double *data_b = &shard_b->data[blk_b.offset];
423 for (int i = 0; i < block_size; i++) {
424 data_a[i] += data_b[i];
425 }
426 }
427 }
428}
429
430/*******************************************************************************
431 * \brief Creates an iterator for the blocks of the given matrix.
432 * The iteration order is not stable.
433 * This routine must always be called within an OpenMP parallel region.
434 * \author Ole Schuett
435 ******************************************************************************/
436void dbm_iterator_start(dbm_iterator_t **iter_out, const dbm_matrix_t *matrix) {
437 assert(omp_get_num_threads() == omp_get_max_threads() &&
438 "Please call dbm_iterator_start within an OpenMP parallel region.");
439 dbm_iterator_t *iter = malloc(sizeof(dbm_iterator_t));
440 assert(iter != NULL);
441 iter->matrix = matrix;
442 iter->next_block = 0;
443 iter->next_shard = omp_get_thread_num();
444 while (iter->next_shard < dbm_get_num_shards(matrix) &&
445 matrix->shards[iter->next_shard].nblocks == 0) {
446 iter->next_shard += omp_get_num_threads();
447 }
448 assert(*iter_out == NULL);
449 *iter_out = iter;
450}
451
452/*******************************************************************************
453 * \brief Returns number of blocks the iterator will provide to calling thread.
454 * \author Ole Schuett
455 ******************************************************************************/
456int dbm_iterator_num_blocks(const dbm_iterator_t *iter) {
457 int num_blocks = 0;
458 for (int ishard = omp_get_thread_num();
459 ishard < dbm_get_num_shards(iter->matrix);
460 ishard += omp_get_num_threads()) {
461 num_blocks += iter->matrix->shards[ishard].nblocks;
462 }
463 return num_blocks;
464}
465
466/*******************************************************************************
467 * \brief Tests whether the given iterator has any block left.
468 * \author Ole Schuett
469 ******************************************************************************/
470bool dbm_iterator_blocks_left(const dbm_iterator_t *iter) {
471 return iter->next_shard < dbm_get_num_shards(iter->matrix);
472}
473
474/*******************************************************************************
475 * \brief Returns the next block from the given iterator.
476 * \author Ole Schuett
477 ******************************************************************************/
478void dbm_iterator_next_block(dbm_iterator_t *iter, int *row, int *col,
479 double **block, int *row_size, int *col_size) {
480 const dbm_matrix_t *matrix = iter->matrix;
481 assert(iter->next_shard < dbm_get_num_shards(matrix));
482 const dbm_shard_t *shard = &matrix->shards[iter->next_shard];
483 assert(iter->next_block < shard->nblocks);
484 dbm_block_t *blk = &shard->blocks[iter->next_block];
485
486 *row = blk->row;
487 *col = blk->col;
488 *row_size = matrix->row_sizes[blk->row];
489 *col_size = matrix->col_sizes[blk->col];
490 *block = &shard->data[blk->offset];
491
492 iter->next_block++;
493 if (iter->next_block >= shard->nblocks) {
494 // Advance to the next non-empty shard...
495 iter->next_shard += omp_get_num_threads();
496 while (iter->next_shard < dbm_get_num_shards(matrix) &&
497 matrix->shards[iter->next_shard].nblocks == 0) {
498 iter->next_shard += omp_get_num_threads();
499 }
500 iter->next_block = 0; // ...and continue with its first block.
501 }
502}
503
504/*******************************************************************************
505 * \brief Releases the given iterator.
506 * \author Ole Schuett
507 ******************************************************************************/
508void dbm_iterator_stop(dbm_iterator_t *iter) { free(iter); }
509
510/*******************************************************************************
511 * \brief Private routine to accumulate using Kahan's summation.
512 * \author Hans Pabst
513 ******************************************************************************/
514static double kahan_sum(double value, double *accumulator,
515 double *compensation) {
516 double r, c;
517 assert(NULL != accumulator && NULL != compensation);
518 c = value - *compensation;
519 r = *accumulator + c;
520 *compensation = (r - *accumulator) - c;
521 *accumulator = r;
522 return r;
523}
524
525/*******************************************************************************
526 * \brief Computes a checksum of the given matrix.
527 * \author Ole Schuett
528 ******************************************************************************/
529double dbm_checksum(const dbm_matrix_t *matrix) {
530 double checksum = 0.0, compensation = 0.0;
531 for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
532 const dbm_shard_t *shard = &matrix->shards[ishard];
533 for (int i = 0; i < shard->data_size; i++) {
534 const double value = shard->data[i];
535 kahan_sum(value * value, &checksum, &compensation);
536 }
537 }
538 dbm_mpi_sum_double(&checksum, 1, matrix->dist->comm);
539 return checksum;
540}
541
542/*******************************************************************************
543 * \brief Returns the largest absolute value of all matrix elements.
544 * \author Ole Schuett
545 ******************************************************************************/
546double dbm_maxabs(const dbm_matrix_t *matrix) {
547 double maxabs = 0.0;
548 for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
549 const dbm_shard_t *shard = &matrix->shards[ishard];
550 for (int i = 0; i < shard->data_size; i++) {
551 maxabs = fmax(maxabs, fabs(shard->data[i]));
552 }
553 }
554 dbm_mpi_max_double(&maxabs, 1, matrix->dist->comm);
555 return maxabs;
556}
557
558/*******************************************************************************
559 * \brief Returns the name of the matrix of the given matrix.
560 * \author Ole Schuett
561 ******************************************************************************/
562const char *dbm_get_name(const dbm_matrix_t *matrix) { return matrix->name; }
563
564/*******************************************************************************
565 * \brief Returns the number of local Non-Zero Elements of the given matrix.
566 * \author Ole Schuett
567 ******************************************************************************/
568int dbm_get_nze(const dbm_matrix_t *matrix) {
569 int nze = 0;
570 for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
571 nze += matrix->shards[ishard].data_size;
572 }
573 return nze;
574}
575
576/*******************************************************************************
577 * \brief Returns the number of local blocks of the given matrix.
578 * \author Ole Schuett
579 ******************************************************************************/
580int dbm_get_num_blocks(const dbm_matrix_t *matrix) {
581 int nblocks = 0;
582 for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
583 nblocks += matrix->shards[ishard].nblocks;
584 }
585 return nblocks;
586}
587
588/*******************************************************************************
589 * \brief Returns the row block sizes of the given matrix.
590 * \author Ole Schuett
591 ******************************************************************************/
592void dbm_get_row_sizes(const dbm_matrix_t *matrix, int *nrows,
593 const int **row_sizes) {
594 *nrows = matrix->nrows;
595 *row_sizes = matrix->row_sizes;
596}
597
598/*******************************************************************************
599 * \brief Returns the column block sizes of the given matrix.
600 * \author Ole Schuett
601 ******************************************************************************/
602void dbm_get_col_sizes(const dbm_matrix_t *matrix, int *ncols,
603 const int **col_sizes) {
604 *ncols = matrix->ncols;
605 *col_sizes = matrix->col_sizes;
606}
607
608/*******************************************************************************
609 * \brief Returns the local row block sizes of the given matrix.
610 * \author Ole Schuett
611 ******************************************************************************/
612void dbm_get_local_rows(const dbm_matrix_t *matrix, int *nlocal_rows,
613 const int **local_rows) {
614 *nlocal_rows = matrix->dist->rows.nlocals;
615 *local_rows = matrix->dist->rows.local_indicies;
616}
617
618/*******************************************************************************
619 * \brief Returns the local column block sizes of the given matrix.
620 * \author Ole Schuett
621 ******************************************************************************/
622void dbm_get_local_cols(const dbm_matrix_t *matrix, int *nlocal_cols,
623 const int **local_cols) {
624 *nlocal_cols = matrix->dist->cols.nlocals;
625 *local_cols = matrix->dist->cols.local_indicies;
626}
627
628/*******************************************************************************
629 * \brief Returns the MPI rank on which the given block should be stored.
630 * \author Ole Schuett
631 ******************************************************************************/
632int dbm_get_stored_coordinates(const dbm_matrix_t *matrix, const int row,
633 const int col) {
634 return dbm_distribution_stored_coords(matrix->dist, row, col);
635}
636
637/*******************************************************************************
638 * \brief Returns the distribution of the given matrix.
639 * \author Ole Schuett
640 ******************************************************************************/
641const dbm_distribution_t *dbm_get_distribution(const dbm_matrix_t *matrix) {
642 return matrix->dist;
643}
644
645// EOF
int dbm_distribution_stored_coords(const dbm_distribution_t *dist, const int row, const int col)
Returns the MPI rank on which the given block should be stored.
void dbm_get_col_sizes(const dbm_matrix_t *matrix, int *ncols, const int **col_sizes)
Returns the column block sizes of the given matrix.
Definition dbm_matrix.c:602
void dbm_get_row_sizes(const dbm_matrix_t *matrix, int *nrows, const int **row_sizes)
Returns the row block sizes of the given matrix.
Definition dbm_matrix.c:592
static int dbm_get_shard_index(const dbm_matrix_t *matrix, const int row, const int col)
Internal routine for getting a block's shard index.
Definition dbm_matrix.h:240
static int dbm_get_num_shards(const dbm_matrix_t *matrix)
Internal routine that returns the number of shards for given matrix.
Definition dbm_matrix.h:232
void dbm_mpi_free_mem(void *mem)
Wrapper around MPI_Free_mem.
Definition dbm_mpi.c:499
void * dbm_mpi_alloc_mem(size_t size)
Wrapper around MPI_Alloc_mem.
Definition dbm_mpi.c:483
bool dbm_mpi_comms_are_similar(const dbm_mpi_comm_t comm1, const dbm_mpi_comm_t comm2)
Wrapper around MPI_Comm_compare.
Definition dbm_mpi.c:224
int dbm_mpi_comm_size(const dbm_mpi_comm_t comm)
Wrapper around MPI_Comm_size.
Definition dbm_mpi.c:110
void dbm_mpi_sum_double(double *values, const int count, const dbm_mpi_comm_t comm)
Wrapper around MPI_Allreduce for op MPI_SUM and datatype MPI_DOUBLE.
Definition dbm_mpi.c:347
void dbm_mpi_max_double(double *values, const int count, const dbm_mpi_comm_t comm)
Wrapper around MPI_Allreduce for op MPI_MAX and datatype MPI_DOUBLE.
Definition dbm_mpi.c:283
void dbm_mpi_alltoall_int(const int *sendbuf, const int sendcount, int *recvbuf, const int recvcount, const dbm_mpi_comm_t comm)
Wrapper around MPI_Alltoall for datatype MPI_INT.
Definition dbm_mpi.c:429
void dbm_mpi_alltoallv_double(const double *sendbuf, const int *sendcounts, const int *sdispls, double *recvbuf, const int *recvcounts, const int *rdispls, const dbm_mpi_comm_t comm)
Wrapper around MPI_Alltoallv for datatype MPI_DOUBLE.
Definition dbm_mpi.c:464
int dbm_mpi_comm_t
Definition dbm_mpi.h:19
void dbm_shard_release(dbm_shard_t *shard)
Internal routine for releasing a shard.
Definition dbm_shard.c:125
dbm_block_t * dbm_shard_get_or_allocate_block(dbm_shard_t *shard, const int row, const int col, const int block_size)
Internal routine for getting block or allocating a new one.
Definition dbm_shard.c:276
dbm_block_t * dbm_shard_promise_new_block(dbm_shard_t *shard, const int row, const int col, const int block_size)
Internal routine for allocating the metadata of a new block.
Definition dbm_shard.c:197
dbm_block_t * dbm_shard_lookup(const dbm_shard_t *shard, const int row, const int col)
Internal routine for looking up a block from a shard.
Definition dbm_shard.c:175
void dbm_shard_allocate_promised_blocks(dbm_shard_t *shard)
Internal routine for allocating and zeroing any promised block's data.
Definition dbm_shard.c:231
dbm_block_t * dbm_shard_get_or_promise_block(dbm_shard_t *shard, const int row, const int col, const int block_size)
Internal routine for getting block or promising a new one.
Definition dbm_shard.c:261
void dbm_shard_init(dbm_shard_t *shard)
Internal routine for initializing a shard.
Definition dbm_shard.c:64
void dbm_shard_copy(dbm_shard_t *shard_a, const dbm_shard_t *shard_b)
Internal routine for copying content of shard_b into shard_a.
Definition dbm_shard.c:80
static void const int const int i
sums arrays of real/complex numbers with much reduced round-off as compared to a naive implementation...
Definition kahan_sum.F:29
Internal struct for storing a block's metadata.
Definition dbm_shard.h:20
Internal struct for storing a two dimensional distribution.
dbm_mpi_comm_t comm
Internal struct for storing a block iterator.
Definition dbm_matrix.h:35
const dbm_matrix_t * matrix
Definition dbm_matrix.h:36
Internal struct for storing a matrix.
Definition dbm_matrix.h:20
int * row_sizes
Definition dbm_matrix.h:25
char * name
Definition dbm_matrix.h:22
int * col_sizes
Definition dbm_matrix.h:26
dbm_shard_t * shards
Definition dbm_matrix.h:28
dbm_distribution_t * dist
Definition dbm_matrix.h:21
Internal struct for storing a matrix shard.
Definition dbm_shard.h:30
double * data
Definition dbm_shard.h:43
int * hashtable
Definition dbm_shard.h:37
omp_lock_t lock
Definition dbm_shard.h:45
dbm_block_t * blocks
Definition dbm_shard.h:33
int data_size
Definition dbm_shard.h:42
int hashtable_size
Definition dbm_shard.h:35
int data_promised
Definition dbm_shard.h:39