(git:34ef472)
dbm_matrix.c
Go to the documentation of this file.
1 /*----------------------------------------------------------------------------*/
2 /* CP2K: A general program to perform molecular dynamics simulations */
3 /* Copyright 2000-2024 CP2K developers group <https://cp2k.org> */
4 /* */
5 /* SPDX-License-Identifier: BSD-3-Clause */
6 /*----------------------------------------------------------------------------*/
7 
8 #include <assert.h>
9 #include <math.h>
10 #include <omp.h>
11 #include <stdbool.h>
12 #include <stddef.h>
13 #include <stdlib.h>
14 #include <string.h>
15 
16 #include "dbm_hyperparams.h"
17 #include "dbm_matrix.h"
18 #include "dbm_mpi.h"
19 
20 /*******************************************************************************
21  * \brief Creates a new matrix.
22  * \author Ole Schuett
23  ******************************************************************************/
24 void dbm_create(dbm_matrix_t **matrix_out, dbm_distribution_t *dist,
25  const char name[], const int nrows, const int ncols,
26  const int row_sizes[nrows], const int col_sizes[ncols]) {
27  assert(omp_get_num_threads() == 1);
28 
29  dbm_matrix_t *matrix = calloc(1, sizeof(dbm_matrix_t));
30 
31  assert(dist->rows.length == nrows);
32  assert(dist->cols.length == ncols);
34  matrix->dist = dist;
35 
36  size_t size = (strlen(name) + 1) * sizeof(char);
37  matrix->name = malloc(size);
38  memcpy(matrix->name, name, size);
39 
40  matrix->nrows = nrows;
41  matrix->ncols = ncols;
42 
43  size = nrows * sizeof(int);
44  matrix->row_sizes = malloc(size);
45  memcpy(matrix->row_sizes, row_sizes, size);
46 
47  size = ncols * sizeof(int);
48  matrix->col_sizes = malloc(size);
49  memcpy(matrix->col_sizes, col_sizes, size);
50 
51  matrix->shards = malloc(dbm_get_num_shards(matrix) * sizeof(dbm_shard_t));
52 #pragma omp parallel for
53  for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
54  dbm_shard_init(&matrix->shards[ishard]);
55  }
56 
57  assert(*matrix_out == NULL);
58  *matrix_out = matrix;
59 }
60 
61 /*******************************************************************************
62  * \brief Releases a matrix and all its ressources.
63  * \author Ole Schuett
64  ******************************************************************************/
65 void dbm_release(dbm_matrix_t *matrix) {
66  assert(omp_get_num_threads() == 1);
67  for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
68  dbm_shard_release(&matrix->shards[ishard]);
69  }
71  free(matrix->row_sizes);
72  free(matrix->col_sizes);
73  free(matrix->shards);
74  free(matrix->name);
75  free(matrix);
76 }
77 
78 /*******************************************************************************
79  * \brief Copies content of matrix_b into matrix_a.
80  * Matrices must have the same row/col block sizes and distribution.
81  * \author Ole Schuett
82  ******************************************************************************/
83 void dbm_copy(dbm_matrix_t *matrix_a, const dbm_matrix_t *matrix_b) {
84  assert(omp_get_num_threads() == 1);
85 
86  assert(matrix_b->nrows == matrix_a->nrows);
87  for (int i = 0; i < matrix_b->nrows; i++) {
88  assert(matrix_b->row_sizes[i] == matrix_a->row_sizes[i]);
89  }
90  assert(matrix_b->ncols == matrix_a->ncols);
91  for (int i = 0; i < matrix_b->ncols; i++) {
92  assert(matrix_b->col_sizes[i] == matrix_a->col_sizes[i]);
93  }
94 
95  assert(matrix_a->dist == matrix_b->dist);
96 
97 #pragma omp parallel for schedule(dynamic)
98  for (int ishard = 0; ishard < dbm_get_num_shards(matrix_a); ishard++) {
99  dbm_shard_copy(&matrix_a->shards[ishard], &matrix_b->shards[ishard]);
100  }
101 }
102 
103 /*******************************************************************************
104  * \brief Copies content of matrix_b into matrix_a.
105  * Matrices may have different distributions.
106  * \author Ole Schuett
107  ******************************************************************************/
108 void dbm_redistribute(const dbm_matrix_t *matrix, dbm_matrix_t *redist) {
109  assert(omp_get_num_threads() == 1);
110  assert(matrix->nrows == redist->nrows);
111  for (int i = 0; i < matrix->nrows; i++) {
112  assert(matrix->row_sizes[i] == redist->row_sizes[i]);
113  }
114  assert(matrix->ncols == redist->ncols);
115  for (int i = 0; i < matrix->ncols; i++) {
116  assert(matrix->col_sizes[i] == redist->col_sizes[i]);
117  }
118 
119  assert(dbm_mpi_comms_are_similar(matrix->dist->comm, redist->dist->comm));
120  const dbm_mpi_comm_t comm = redist->dist->comm;
121  const int nranks = dbm_mpi_comm_size(comm);
122 
123  // 1st pass: Compute send_count.
124  int send_count[nranks];
125  memset(send_count, 0, nranks * sizeof(int));
126  for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
127  dbm_shard_t *shard = &matrix->shards[ishard];
128  for (int iblock = 0; iblock < shard->nblocks; iblock++) {
129  const dbm_block_t *blk = &shard->blocks[iblock];
130  const int row_size = matrix->row_sizes[blk->row];
131  const int col_size = matrix->col_sizes[blk->col];
132  const int block_size = row_size * col_size;
133  const int rank = dbm_get_stored_coordinates(redist, blk->row, blk->col);
134  assert(0 <= rank && rank < nranks);
135  send_count[rank] += 2 + block_size;
136  }
137  }
138 
139  // 1st communication: Exchange counts.
140  int recv_count[nranks];
141  dbm_mpi_alltoall_int(send_count, 1, recv_count, 1, comm);
142 
143  // Compute displacements and allocate data buffers.
144  int send_displ[nranks + 1], recv_displ[nranks + 1];
145  send_displ[0] = recv_displ[0] = 0;
146  for (int irank = 1; irank < nranks + 1; irank++) {
147  send_displ[irank] = send_displ[irank - 1] + send_count[irank - 1];
148  recv_displ[irank] = recv_displ[irank - 1] + recv_count[irank - 1];
149  }
150  const int total_send_count = send_displ[nranks];
151  const int total_recv_count = recv_displ[nranks];
152  double *data_send = malloc(total_send_count * sizeof(double));
153  double *data_recv = malloc(total_recv_count * sizeof(double));
154 
155  // 2nd pass: Fill send_data.
156  int send_data_positions[nranks];
157  memcpy(send_data_positions, send_displ, nranks * sizeof(int));
158  for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
159  dbm_shard_t *shard = &matrix->shards[ishard];
160  for (int iblock = 0; iblock < shard->nblocks; iblock++) {
161  const dbm_block_t *blk = &shard->blocks[iblock];
162  const double *blk_data = &shard->data[blk->offset];
163  const int row_size = matrix->row_sizes[blk->row];
164  const int col_size = matrix->col_sizes[blk->col];
165  const int block_size = row_size * col_size;
166  const int rank = dbm_get_stored_coordinates(redist, blk->row, blk->col);
167  const int pos = send_data_positions[rank];
168  data_send[pos + 0] = blk->row; // send integers as doubles
169  data_send[pos + 1] = blk->col;
170  memcpy(&data_send[pos + 2], blk_data, block_size * sizeof(double));
171  send_data_positions[rank] += 2 + block_size;
172  }
173  }
174  for (int irank = 0; irank < nranks; irank++) {
175  assert(send_data_positions[irank] == send_displ[irank + 1]);
176  }
177 
178  // 2nd communication: Exchange data.
179  dbm_mpi_alltoallv_double(data_send, send_count, send_displ, data_recv,
180  recv_count, recv_displ, comm);
181  free(data_send);
182 
183  // 3rd pass: Unpack data.
184  dbm_clear(redist);
185  int recv_data_pos = 0;
186  while (recv_data_pos < total_recv_count) {
187  const int row = (int)data_recv[recv_data_pos + 0];
188  const int col = (int)data_recv[recv_data_pos + 1];
189  assert(data_recv[recv_data_pos + 0] - (double)row == 0.0);
190  assert(data_recv[recv_data_pos + 1] - (double)col == 0.0);
191  dbm_put_block(redist, row, col, false, &data_recv[recv_data_pos + 2]);
192  const int row_size = matrix->row_sizes[row];
193  const int col_size = matrix->col_sizes[col];
194  const int block_size = row_size * col_size;
195  recv_data_pos += 2 + block_size;
196  }
197  assert(recv_data_pos == total_recv_count);
198  free(data_recv);
199 }
200 
201 /*******************************************************************************
202  * \brief Looks up a block from given matrics. This routine is thread-safe.
203  * If the block is not found then a null pointer is returned.
204  * \author Ole Schuett
205  ******************************************************************************/
206 void dbm_get_block_p(dbm_matrix_t *matrix, const int row, const int col,
207  double **block, int *row_size, int *col_size) {
208  assert(0 <= row && row < matrix->nrows);
209  assert(0 <= col && col < matrix->ncols);
210  assert(dbm_get_stored_coordinates(matrix, row, col) == matrix->dist->my_rank);
211  *row_size = matrix->row_sizes[row];
212  *col_size = matrix->col_sizes[col];
213  *block = NULL;
214 
215  const int ishard = dbm_get_shard_index(matrix, row, col);
216  const dbm_shard_t *shard = &matrix->shards[ishard];
217  dbm_block_t *blk = dbm_shard_lookup(shard, row, col);
218  if (blk != NULL) {
219  *block = &shard->data[blk->offset];
220  }
221 }
222 
223 /*******************************************************************************
224  * \brief Adds a block to given matrix. This routine is thread-safe.
225  * If block already exist then it gets overwritten (or summed).
226  * \author Ole Schuett
227  ******************************************************************************/
228 void dbm_put_block(dbm_matrix_t *matrix, const int row, const int col,
229  const bool summation, const double *block) {
230  assert(0 <= row && row < matrix->nrows);
231  assert(0 <= col && col < matrix->ncols);
232  assert(dbm_get_stored_coordinates(matrix, row, col) == matrix->dist->my_rank);
233  const int row_size = matrix->row_sizes[row];
234  const int col_size = matrix->col_sizes[col];
235  const int block_size = row_size * col_size;
236 
237  const int ishard = dbm_get_shard_index(matrix, row, col);
238  dbm_shard_t *shard = &matrix->shards[ishard];
239  omp_set_lock(&shard->lock);
240  dbm_block_t *blk =
241  dbm_shard_get_or_allocate_block(shard, row, col, block_size);
242  double *blk_data = &shard->data[blk->offset];
243  if (summation) {
244  for (int i = 0; i < block_size; i++) {
245  blk_data[i] += block[i];
246  }
247  } else {
248  memcpy(blk_data, block, block_size * sizeof(double));
249  }
250  omp_unset_lock(&shard->lock);
251 }
252 
253 /*******************************************************************************
254  * \brief Remove all blocks from matrix, but does not release underlying memory.
255  * \author Ole Schuett
256  ******************************************************************************/
257 void dbm_clear(dbm_matrix_t *matrix) {
258  assert(omp_get_num_threads() == 1);
259 
260 #pragma omp parallel for schedule(dynamic)
261  for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
262  dbm_shard_t *shard = &matrix->shards[ishard];
263  shard->nblocks = 0;
264  shard->data_size = 0;
265  shard->data_promised = 0;
266  // Does not deallocate memory, hence data_allocated remains unchanged.
267  memset(shard->hashtable, 0, shard->hashtable_size * sizeof(int));
268  }
269 }
270 
271 /*******************************************************************************
272  * \brief Removes all blocks from the matrix whose norm is below the threshold.
273  * Blocks of size zero are always kept.
274  * \author Ole Schuett
275  ******************************************************************************/
276 void dbm_filter(dbm_matrix_t *matrix, const double eps) {
277  assert(omp_get_num_threads() == 1);
278 
279  if (eps == 0.0) {
280  return;
281  }
282 
283 #pragma omp parallel for schedule(dynamic)
284  for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
285  dbm_shard_t *shard = &matrix->shards[ishard];
286  const int old_nblocks = shard->nblocks;
287  shard->nblocks = 0;
288  shard->data_promised = 0;
289  memset(shard->hashtable, 0, shard->hashtable_size * sizeof(int));
290 
291  for (int iblock = 0; iblock < old_nblocks; iblock++) {
292  const dbm_block_t old_blk = shard->blocks[iblock];
293  const double *old_blk_data = &shard->data[old_blk.offset];
294  const int row_size = matrix->row_sizes[old_blk.row];
295  const int col_size = matrix->col_sizes[old_blk.col];
296  const int block_size = row_size * col_size;
297  double norm = 0.0; // Compute norm as double, but compare as float.
298  for (int i = 0; i < block_size; i++) {
299  norm += old_blk_data[i] * old_blk_data[i];
300  }
301  // For historic reasons zero-sized blocks are never filtered.
302  if (sqrt((float)norm) < eps && block_size > 0) {
303  continue; // filter the block
304  }
305  // Re-create block.
307  shard, old_blk.row, old_blk.col, block_size);
308  assert(new_blk->offset <= old_blk.offset);
309  if (new_blk->offset != old_blk.offset) {
310  // Using memmove instead of memcpy because it handles overlap correctly.
311  double *new_blk_data = &shard->data[new_blk->offset];
312  memmove(new_blk_data, old_blk_data, block_size * sizeof(double));
313  }
314  }
315  shard->data_size = shard->data_promised;
316  // TODO: Could call realloc to release excess memory.
317  }
318 }
319 
320 /*******************************************************************************
321  * \brief Adds list of blocks efficiently. The blocks will be filled with zeros.
322  * This routine must always be called within an OpenMP parallel region.
323  * \author Ole Schuett
324  ******************************************************************************/
325 void dbm_reserve_blocks(dbm_matrix_t *matrix, const int nblocks,
326  const int rows[], const int cols[]) {
327  assert(omp_get_num_threads() == omp_get_max_threads() &&
328  "Please call dbm_reserve_blocks within an OpenMP parallel region.");
329  const int my_rank = matrix->dist->my_rank;
330  for (int i = 0; i < nblocks; i++) {
331  const int ishard = dbm_get_shard_index(matrix, rows[i], cols[i]);
332  dbm_shard_t *shard = &matrix->shards[ishard];
333  omp_set_lock(&shard->lock);
334  assert(0 <= rows[i] && rows[i] < matrix->nrows);
335  assert(0 <= cols[i] && cols[i] < matrix->ncols);
336  assert(dbm_get_stored_coordinates(matrix, rows[i], cols[i]) == my_rank);
337  const int row_size = matrix->row_sizes[rows[i]];
338  const int col_size = matrix->col_sizes[cols[i]];
339  const int block_size = row_size * col_size;
340  dbm_shard_get_or_promise_block(shard, rows[i], cols[i], block_size);
341  omp_unset_lock(&shard->lock);
342  }
343 #pragma omp barrier
344 
345 #pragma omp for schedule(dynamic)
346  for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
347  dbm_shard_t *shard = &matrix->shards[ishard];
349  }
350 }
351 
352 /*******************************************************************************
353  * \brief Multiplies all entries in the given matrix by the given factor alpha.
354  * \author Ole Schuett
355  ******************************************************************************/
356 void dbm_scale(dbm_matrix_t *matrix, const double alpha) {
357  assert(omp_get_num_threads() == 1);
358  if (alpha == 1.0) {
359  return;
360  }
361  if (alpha == 0.0) {
362  dbm_zero(matrix);
363  return;
364  }
365 
366 #pragma omp parallel for schedule(dynamic)
367  for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
368  dbm_shard_t *shard = &matrix->shards[ishard];
369  for (int i = 0; i < shard->data_size; i++) {
370  shard->data[i] *= alpha;
371  }
372  }
373 }
374 
375 /*******************************************************************************
376  * \brief Sets all blocks in the given matrix to zero.
377  * \author Ole Schuett
378  ******************************************************************************/
379 void dbm_zero(dbm_matrix_t *matrix) {
380  assert(omp_get_num_threads() == 1);
381 
382 #pragma omp parallel for schedule(dynamic)
383  for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
384  dbm_shard_t *shard = &matrix->shards[ishard];
385  memset(shard->data, 0, shard->data_size * sizeof(double));
386  }
387 }
388 
389 /*******************************************************************************
390  * \brief Adds matrix_b to matrix_a.
391  * \author Ole Schuett
392  ******************************************************************************/
393 void dbm_add(dbm_matrix_t *matrix_a, const dbm_matrix_t *matrix_b) {
394  assert(omp_get_num_threads() == 1);
395  assert(matrix_a->dist == matrix_b->dist);
396 
397 #pragma omp parallel for schedule(dynamic)
398  for (int ishard = 0; ishard < dbm_get_num_shards(matrix_b); ishard++) {
399  dbm_shard_t *shard_a = &matrix_a->shards[ishard];
400  const dbm_shard_t *shard_b = &matrix_b->shards[ishard];
401  for (int iblock = 0; iblock < shard_b->nblocks; iblock++) {
402  const dbm_block_t blk_b = shard_b->blocks[iblock];
403 
404  const int row_size = matrix_b->row_sizes[blk_b.row];
405  const int col_size = matrix_b->col_sizes[blk_b.col];
406  assert(row_size == matrix_a->row_sizes[blk_b.row]);
407  assert(col_size == matrix_a->col_sizes[blk_b.col]);
408  const int block_size = row_size * col_size;
410  shard_a, blk_b.row, blk_b.col, block_size);
411  double *data_a = &shard_a->data[blk_a->offset];
412  const double *data_b = &shard_b->data[blk_b.offset];
413  for (int i = 0; i < block_size; i++) {
414  data_a[i] += data_b[i];
415  }
416  }
417  }
418 }
419 
420 /*******************************************************************************
421  * \brief Creates an iterator for the blocks of the given matrix.
422  * The iteration order is not stable.
423  * This routine must always be called within an OpenMP parallel region.
424  * \author Ole Schuett
425  ******************************************************************************/
426 void dbm_iterator_start(dbm_iterator_t **iter_out, const dbm_matrix_t *matrix) {
427  assert(omp_get_num_threads() == omp_get_max_threads() &&
428  "Please call dbm_iterator_start within an OpenMP parallel region.");
429  dbm_iterator_t *iter = malloc(sizeof(dbm_iterator_t));
430  iter->matrix = matrix;
431  iter->next_block = 0;
432  iter->next_shard = omp_get_thread_num();
433  while (iter->next_shard < dbm_get_num_shards(matrix) &&
434  matrix->shards[iter->next_shard].nblocks == 0) {
435  iter->next_shard += omp_get_num_threads();
436  }
437  assert(*iter_out == NULL);
438  *iter_out = iter;
439 }
440 
441 /*******************************************************************************
442  * \brief Returns number of blocks the iterator will provide to calling thread.
443  * \author Ole Schuett
444  ******************************************************************************/
446  int num_blocks = 0;
447  for (int ishard = omp_get_thread_num();
448  ishard < dbm_get_num_shards(iter->matrix);
449  ishard += omp_get_num_threads()) {
450  num_blocks += iter->matrix->shards[ishard].nblocks;
451  }
452  return num_blocks;
453 }
454 
455 /*******************************************************************************
456  * \brief Tests whether the given iterator has any block left.
457  * \author Ole Schuett
458  ******************************************************************************/
460  return iter->next_shard < dbm_get_num_shards(iter->matrix);
461 }
462 
463 /*******************************************************************************
464  * \brief Returns the next block from the given iterator.
465  * \author Ole Schuett
466  ******************************************************************************/
467 void dbm_iterator_next_block(dbm_iterator_t *iter, int *row, int *col,
468  double **block, int *row_size, int *col_size) {
469  const dbm_matrix_t *matrix = iter->matrix;
470  assert(iter->next_shard < dbm_get_num_shards(matrix));
471  const dbm_shard_t *shard = &matrix->shards[iter->next_shard];
472  assert(iter->next_block < shard->nblocks);
473  dbm_block_t *blk = &shard->blocks[iter->next_block];
474 
475  *row = blk->row;
476  *col = blk->col;
477  *row_size = matrix->row_sizes[blk->row];
478  *col_size = matrix->col_sizes[blk->col];
479  *block = &shard->data[blk->offset];
480 
481  iter->next_block++;
482  if (iter->next_block >= shard->nblocks) {
483  // Advance to the next non-empty shard...
484  iter->next_shard += omp_get_num_threads();
485  while (iter->next_shard < dbm_get_num_shards(matrix) &&
486  matrix->shards[iter->next_shard].nblocks == 0) {
487  iter->next_shard += omp_get_num_threads();
488  }
489  iter->next_block = 0; // ...and continue with its first block.
490  }
491 }
492 
493 /*******************************************************************************
494  * \brief Releases the given iterator.
495  * \author Ole Schuett
496  ******************************************************************************/
497 void dbm_iterator_stop(dbm_iterator_t *iter) { free(iter); }
498 
499 /*******************************************************************************
500  * \brief Computes a checksum of the given matrix.
501  * \author Ole Schuett
502  ******************************************************************************/
503 double dbm_checksum(const dbm_matrix_t *matrix) {
504  double checksum = 0.0;
505  for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
506  const dbm_shard_t *shard = &matrix->shards[ishard];
507  for (int i = 0; i < shard->data_size; i++) {
508  checksum += shard->data[i] * shard->data[i];
509  }
510  }
511  dbm_mpi_sum_double(&checksum, 1, matrix->dist->comm);
512  return checksum;
513 }
514 
515 /*******************************************************************************
516  * \brief Returns the absolute value of the larges element of the entire matrix.
517  * \author Ole Schuett
518  ******************************************************************************/
519 double dbm_maxabs(const dbm_matrix_t *matrix) {
520  double maxabs = 0.0;
521  for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
522  dbm_shard_t *shard = &matrix->shards[ishard];
523  for (int i = 0; i < shard->data_size; i++) {
524  maxabs = fmax(maxabs, fabs(shard->data[i]));
525  }
526  }
527  dbm_mpi_max_double(&maxabs, 1, matrix->dist->comm);
528  return maxabs;
529 }
530 
531 /*******************************************************************************
532  * \brief Returns the name of the matrix of the given matrix.
533  * \author Ole Schuett
534  ******************************************************************************/
535 const char *dbm_get_name(const dbm_matrix_t *matrix) { return matrix->name; }
536 
537 /*******************************************************************************
538  * \brief Returns the number of local Non-Zero Elements of the given matrix.
539  * \author Ole Schuett
540  ******************************************************************************/
541 int dbm_get_nze(const dbm_matrix_t *matrix) {
542  int nze = 0;
543  for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
544  nze += matrix->shards[ishard].data_size;
545  }
546  return nze;
547 }
548 
549 /*******************************************************************************
550  * \brief Returns the number of local blocks of the given matrix.
551  * \author Ole Schuett
552  ******************************************************************************/
553 int dbm_get_num_blocks(const dbm_matrix_t *matrix) {
554  int nblocks = 0;
555  for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
556  nblocks += matrix->shards[ishard].nblocks;
557  }
558  return nblocks;
559 }
560 
561 /*******************************************************************************
562  * \brief Returns the row block sizes of the given matrix.
563  * \author Ole Schuett
564  ******************************************************************************/
565 void dbm_get_row_sizes(const dbm_matrix_t *matrix, int *nrows,
566  const int **row_sizes) {
567  *nrows = matrix->nrows;
568  *row_sizes = matrix->row_sizes;
569 }
570 
571 /*******************************************************************************
572  * \brief Returns the column block sizes of the given matrix.
573  * \author Ole Schuett
574  ******************************************************************************/
575 void dbm_get_col_sizes(const dbm_matrix_t *matrix, int *ncols,
576  const int **col_sizes) {
577  *ncols = matrix->ncols;
578  *col_sizes = matrix->col_sizes;
579 }
580 
581 /*******************************************************************************
582  * \brief Returns the local row block sizes of the given matrix.
583  * \author Ole Schuett
584  ******************************************************************************/
585 void dbm_get_local_rows(const dbm_matrix_t *matrix, int *nlocal_rows,
586  const int **local_rows) {
587  *nlocal_rows = matrix->dist->rows.nlocals;
588  *local_rows = matrix->dist->rows.local_indicies;
589 }
590 
591 /*******************************************************************************
592  * \brief Returns the local column block sizes of the given matrix.
593  * \author Ole Schuett
594  ******************************************************************************/
595 void dbm_get_local_cols(const dbm_matrix_t *matrix, int *nlocal_cols,
596  const int **local_cols) {
597  *nlocal_cols = matrix->dist->cols.nlocals;
598  *local_cols = matrix->dist->cols.local_indicies;
599 }
600 
601 /*******************************************************************************
602  * \brief Returns the MPI rank on which the given block should be stored.
603  * \author Ole Schuett
604  ******************************************************************************/
605 int dbm_get_stored_coordinates(const dbm_matrix_t *matrix, const int row,
606  const int col) {
607  return dbm_distribution_stored_coords(matrix->dist, row, col);
608 }
609 
610 /*******************************************************************************
611  * \brief Returns the distribution of the given matrix.
612  * \author Ole Schuett
613  ******************************************************************************/
615  return matrix->dist;
616 }
617 
618 // EOF
int dbm_distribution_stored_coords(const dbm_distribution_t *dist, const int row, const int col)
Returns the MPI rank on which the given block should be stored.
void dbm_get_block_p(dbm_matrix_t *matrix, const int row, const int col, double **block, int *row_size, int *col_size)
Looks up a block from given matrics. This routine is thread-safe. If the block is not found then a nu...
Definition: dbm_matrix.c:206
void dbm_redistribute(const dbm_matrix_t *matrix, dbm_matrix_t *redist)
Copies content of matrix_b into matrix_a. Matrices may have different distributions.
Definition: dbm_matrix.c:108
void dbm_reserve_blocks(dbm_matrix_t *matrix, const int nblocks, const int rows[], const int cols[])
Adds list of blocks efficiently. The blocks will be filled with zeros. This routine must always be ca...
Definition: dbm_matrix.c:325
bool dbm_iterator_blocks_left(const dbm_iterator_t *iter)
Tests whether the given iterator has any block left.
Definition: dbm_matrix.c:459
double dbm_maxabs(const dbm_matrix_t *matrix)
Returns the absolute value of the larges element of the entire matrix.
Definition: dbm_matrix.c:519
void dbm_filter(dbm_matrix_t *matrix, const double eps)
Removes all blocks from the matrix whose norm is below the threshold. Blocks of size zero are always ...
Definition: dbm_matrix.c:276
void dbm_get_local_cols(const dbm_matrix_t *matrix, int *nlocal_cols, const int **local_cols)
Returns the local column block sizes of the given matrix.
Definition: dbm_matrix.c:595
void dbm_add(dbm_matrix_t *matrix_a, const dbm_matrix_t *matrix_b)
Adds matrix_b to matrix_a.
Definition: dbm_matrix.c:393
void dbm_get_local_rows(const dbm_matrix_t *matrix, int *nlocal_rows, const int **local_rows)
Returns the local row block sizes of the given matrix.
Definition: dbm_matrix.c:585
void dbm_iterator_stop(dbm_iterator_t *iter)
Releases the given iterator.
Definition: dbm_matrix.c:497
void dbm_get_col_sizes(const dbm_matrix_t *matrix, int *ncols, const int **col_sizes)
Returns the column block sizes of the given matrix.
Definition: dbm_matrix.c:575
void dbm_create(dbm_matrix_t **matrix_out, dbm_distribution_t *dist, const char name[], const int nrows, const int ncols, const int row_sizes[nrows], const int col_sizes[ncols])
Creates a new matrix.
Definition: dbm_matrix.c:24
const char * dbm_get_name(const dbm_matrix_t *matrix)
Returns the name of the matrix of the given matrix.
Definition: dbm_matrix.c:535
void dbm_put_block(dbm_matrix_t *matrix, const int row, const int col, const bool summation, const double *block)
Adds a block to given matrix. This routine is thread-safe. If block already exist then it gets overwr...
Definition: dbm_matrix.c:228
int dbm_iterator_num_blocks(const dbm_iterator_t *iter)
Returns number of blocks the iterator will provide to calling thread.
Definition: dbm_matrix.c:445
void dbm_iterator_start(dbm_iterator_t **iter_out, const dbm_matrix_t *matrix)
Creates an iterator for the blocks of the given matrix. The iteration order is not stable....
Definition: dbm_matrix.c:426
void dbm_iterator_next_block(dbm_iterator_t *iter, int *row, int *col, double **block, int *row_size, int *col_size)
Returns the next block from the given iterator.
Definition: dbm_matrix.c:467
double dbm_checksum(const dbm_matrix_t *matrix)
Computes a checksum of the given matrix.
Definition: dbm_matrix.c:503
int dbm_get_num_blocks(const dbm_matrix_t *matrix)
Returns the number of local blocks of the given matrix.
Definition: dbm_matrix.c:553
void dbm_get_row_sizes(const dbm_matrix_t *matrix, int *nrows, const int **row_sizes)
Returns the row block sizes of the given matrix.
Definition: dbm_matrix.c:565
void dbm_scale(dbm_matrix_t *matrix, const double alpha)
Multiplies all entries in the given matrix by the given factor alpha.
Definition: dbm_matrix.c:356
void dbm_copy(dbm_matrix_t *matrix_a, const dbm_matrix_t *matrix_b)
Copies content of matrix_b into matrix_a. Matrices must have the same row/col block sizes and distrib...
Definition: dbm_matrix.c:83
void dbm_clear(dbm_matrix_t *matrix)
Remove all blocks from matrix, but does not release underlying memory.
Definition: dbm_matrix.c:257
void dbm_zero(dbm_matrix_t *matrix)
Sets all blocks in the given matrix to zero.
Definition: dbm_matrix.c:379
int dbm_get_stored_coordinates(const dbm_matrix_t *matrix, const int row, const int col)
Returns the MPI rank on which the given block should be stored.
Definition: dbm_matrix.c:605
int dbm_get_nze(const dbm_matrix_t *matrix)
Returns the number of local Non-Zero Elements of the given matrix.
Definition: dbm_matrix.c:541
void dbm_release(dbm_matrix_t *matrix)
Releases a matrix and all its ressources.
Definition: dbm_matrix.c:65
const dbm_distribution_t * dbm_get_distribution(const dbm_matrix_t *matrix)
Returns the distribution of the given matrix.
Definition: dbm_matrix.c:614
static int dbm_get_shard_index(const dbm_matrix_t *matrix, const int row, const int col)
Internal routine for getting a block's shard index.
Definition: dbm_matrix.h:240
static int dbm_get_num_shards(const dbm_matrix_t *matrix)
Internal routine that returns the number of shards for given matrix.
Definition: dbm_matrix.h:232
bool dbm_mpi_comms_are_similar(const dbm_mpi_comm_t comm1, const dbm_mpi_comm_t comm2)
Wrapper around MPI_Comm_compare.
Definition: dbm_mpi.c:221
int dbm_mpi_comm_size(const dbm_mpi_comm_t comm)
Wrapper around MPI_Comm_size.
Definition: dbm_mpi.c:107
void dbm_mpi_sum_double(double *values, const int count, const dbm_mpi_comm_t comm)
Wrapper around MPI_Allreduce for op MPI_SUM and datatype MPI_DOUBLE.
Definition: dbm_mpi.c:308
void dbm_mpi_max_double(double *values, const int count, const dbm_mpi_comm_t comm)
Wrapper around MPI_Allreduce for op MPI_MAX and datatype MPI_DOUBLE.
Definition: dbm_mpi.c:255
void dbm_mpi_alltoall_int(const int *sendbuf, const int sendcount, int *recvbuf, const int recvcount, const dbm_mpi_comm_t comm)
Wrapper around MPI_Alltoall for datatype MPI_INT.
Definition: dbm_mpi.c:386
void dbm_mpi_alltoallv_double(const double *sendbuf, const int *sendcounts, const int *sdispls, double *recvbuf, const int *recvcounts, const int *rdispls, const dbm_mpi_comm_t comm)
Wrapper around MPI_Alltoallv for datatype MPI_DOUBLE.
Definition: dbm_mpi.c:421
int dbm_mpi_comm_t
Definition: dbm_mpi.h:18
dbm_block_t * dbm_shard_promise_new_block(dbm_shard_t *shard, const int row, const int col, const int block_size)
Internal routine for allocating the metadata of a new block.
Definition: dbm_shard.c:171
void dbm_shard_release(dbm_shard_t *shard)
Internal routine for releasing a shard.
Definition: dbm_shard.c:107
dbm_block_t * dbm_shard_get_or_allocate_block(dbm_shard_t *shard, const int row, const int col, const int block_size)
Internal routine for getting block or allocating a new one.
Definition: dbm_shard.c:240
dbm_block_t * dbm_shard_lookup(const dbm_shard_t *shard, const int row, const int col)
Internal routine for looking up a block from a shard.
Definition: dbm_shard.c:149
void dbm_shard_allocate_promised_blocks(dbm_shard_t *shard)
Internal routine for allocating and zeroing any promised block's data.
Definition: dbm_shard.c:203
void dbm_shard_init(dbm_shard_t *shard)
Internal routine for initializing a shard.
Definition: dbm_shard.c:63
void dbm_shard_copy(dbm_shard_t *shard_a, const dbm_shard_t *shard_b)
Internal routine for copying content of shard_b into shard_a.
Definition: dbm_shard.c:80
dbm_block_t * dbm_shard_get_or_promise_block(dbm_shard_t *shard, const int row, const int col, const int block_size)
Internal routine for getting block or promising a new one.
Definition: dbm_shard.c:225
static void const int const int i
subroutine, public dbm_distribution_release(dist)
Decreases the reference counter of the given distribution.
Definition: dbm_api.F:1401
subroutine, public dbm_distribution_hold(dist)
Increases the reference counter of the given distribution.
Definition: dbm_api.F:1378
Internal struct for storing a block's metadata.
Definition: dbm_shard.h:20
int offset
Definition: dbm_shard.h:23
Internal struct for storing a two dimensional distribution.
dbm_dist_1d_t rows
dbm_mpi_comm_t comm
dbm_dist_1d_t cols
Internal struct for storing a block iterator.
Definition: dbm_matrix.h:35
const dbm_matrix_t * matrix
Definition: dbm_matrix.h:36
Internal struct for storing a matrix.
Definition: dbm_matrix.h:20
int * row_sizes
Definition: dbm_matrix.h:25
char * name
Definition: dbm_matrix.h:22
int * col_sizes
Definition: dbm_matrix.h:26
dbm_shard_t * shards
Definition: dbm_matrix.h:28
dbm_distribution_t * dist
Definition: dbm_matrix.h:21
Internal struct for storing a matrix shard.
Definition: dbm_shard.h:30
double * data
Definition: dbm_shard.h:44
int * hashtable
Definition: dbm_shard.h:38
omp_lock_t lock
Definition: dbm_shard.h:46
dbm_block_t * blocks
Definition: dbm_shard.h:33
int nblocks
Definition: dbm_shard.h:31
int data_size
Definition: dbm_shard.h:43
int hashtable_size
Definition: dbm_shard.h:35
int data_promised
Definition: dbm_shard.h:40