(git:48ec5b0)
Loading...
Searching...
No Matches
dbm_matrix.c
Go to the documentation of this file.
1/*----------------------------------------------------------------------------*/
2/* CP2K: A general program to perform molecular dynamics simulations */
3/* Copyright 2000-2025 CP2K developers group <https://cp2k.org> */
4/* */
5/* SPDX-License-Identifier: BSD-3-Clause */
6/*----------------------------------------------------------------------------*/
7
8#include <assert.h>
9#include <math.h>
10#include <omp.h>
11#include <stdbool.h>
12#include <stddef.h>
13#include <stdlib.h>
14#include <string.h>
15
16#include "dbm_hyperparams.h"
17#include "dbm_matrix.h"
18
19/*******************************************************************************
20 * \brief Creates a new matrix.
21 * \author Ole Schuett
22 ******************************************************************************/
23void dbm_create(dbm_matrix_t **matrix_out, dbm_distribution_t *dist,
24 const char name[], const int nrows, const int ncols,
25 const int row_sizes[nrows], const int col_sizes[ncols]) {
26 assert(omp_get_num_threads() == 1);
27
28 dbm_matrix_t *matrix = calloc(1, sizeof(dbm_matrix_t));
29
30 assert(dist->rows.length == nrows);
31 assert(dist->cols.length == ncols);
32 dbm_distribution_hold(dist);
33 matrix->dist = dist;
34
35 size_t size = (strlen(name) + 1) * sizeof(char);
36 matrix->name = malloc(size);
37 assert(matrix->name != NULL && name != NULL);
38 memcpy(matrix->name, name, size);
39
40 matrix->nrows = nrows;
41 matrix->ncols = ncols;
42
43 size = nrows * sizeof(int);
44 matrix->row_sizes = malloc(size);
45 assert(matrix->row_sizes != NULL || size == 0);
46 if (size != 0) {
47 memcpy(matrix->row_sizes, row_sizes, size);
48 }
49
50 size = ncols * sizeof(int);
51 matrix->col_sizes = malloc(size);
52 assert(matrix->col_sizes != NULL || size == 0);
53 if (size != 0) {
54 memcpy(matrix->col_sizes, col_sizes, size);
55 }
56
57 const int num_shards = dbm_get_num_shards(matrix);
58 matrix->shards = malloc(num_shards * sizeof(dbm_shard_t));
59 assert(matrix->shards != NULL || num_shards == 0);
60#pragma omp parallel for
61 for (int ishard = 0; ishard < num_shards; ishard++) {
62 dbm_shard_init(&matrix->shards[ishard]);
63 }
64
65 assert(*matrix_out == NULL);
66 *matrix_out = matrix;
67}
68
69/*******************************************************************************
70 * \brief Releases a matrix and all its ressources.
71 * \author Ole Schuett
72 ******************************************************************************/
73void dbm_release(dbm_matrix_t *matrix) {
74 assert(omp_get_num_threads() == 1);
75 for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
76 dbm_shard_release(&matrix->shards[ishard]);
77 }
78 dbm_distribution_release(matrix->dist);
79 free(matrix->row_sizes);
80 free(matrix->col_sizes);
81 free(matrix->shards);
82 free(matrix->name);
83 free(matrix);
84}
85
86/*******************************************************************************
87 * \brief Copies content of matrix_b into matrix_a.
88 * Matrices must have the same row/col block sizes and distribution.
89 * \author Ole Schuett
90 ******************************************************************************/
91void dbm_copy(dbm_matrix_t *matrix_a, const dbm_matrix_t *matrix_b) {
92 assert(omp_get_num_threads() == 1);
93
94 assert(matrix_b->nrows == matrix_a->nrows);
95 for (int i = 0; i < matrix_b->nrows; i++) {
96 assert(matrix_b->row_sizes[i] == matrix_a->row_sizes[i]);
97 }
98 assert(matrix_b->ncols == matrix_a->ncols);
99 for (int i = 0; i < matrix_b->ncols; i++) {
100 assert(matrix_b->col_sizes[i] == matrix_a->col_sizes[i]);
101 }
102
103 assert(matrix_a->dist == matrix_b->dist);
104
105#pragma omp parallel for DBM_OMP_SCHEDULE
106 for (int ishard = 0; ishard < dbm_get_num_shards(matrix_a); ishard++) {
107 dbm_shard_copy(&matrix_a->shards[ishard], &matrix_b->shards[ishard]);
108 }
109}
110
111/*******************************************************************************
112 * \brief Copies content of matrix_b into matrix_a.
113 * Matrices may have different distributions.
114 * \author Ole Schuett
115 ******************************************************************************/
116void dbm_redistribute(const dbm_matrix_t *matrix, dbm_matrix_t *redist) {
117 assert(omp_get_num_threads() == 1);
118 assert(matrix->nrows == redist->nrows);
119 for (int i = 0; i < matrix->nrows; i++) {
120 assert(matrix->row_sizes[i] == redist->row_sizes[i]);
121 }
122 assert(matrix->ncols == redist->ncols);
123 for (int i = 0; i < matrix->ncols; i++) {
124 assert(matrix->col_sizes[i] == redist->col_sizes[i]);
125 }
126
127 assert(dbm_mpi_comms_are_similar(matrix->dist->comm, redist->dist->comm));
128 const dbm_mpi_comm_t comm = redist->dist->comm;
129 const int nranks = dbm_mpi_comm_size(comm);
130
131 // 1st pass: Compute send_count.
132 int send_count[nranks];
133 memset(send_count, 0, nranks * sizeof(int));
134 for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
135 dbm_shard_t *shard = &matrix->shards[ishard];
136 for (int iblock = 0; iblock < shard->nblocks; iblock++) {
137 const dbm_block_t *blk = &shard->blocks[iblock];
138 const int row_size = matrix->row_sizes[blk->row];
139 const int col_size = matrix->col_sizes[blk->col];
140 const int block_size = row_size * col_size;
141 const int rank = dbm_get_stored_coordinates(redist, blk->row, blk->col);
142 assert(0 <= rank && rank < nranks);
143 send_count[rank] += 2 + block_size;
144 }
145 }
146
147 // 1st communication: Exchange counts.
148 int recv_count[nranks];
149 dbm_mpi_alltoall_int(send_count, 1, recv_count, 1, comm);
150
151 // Compute displacements and allocate data buffers.
152 int send_displ[nranks + 1], recv_displ[nranks + 1];
153 send_displ[0] = recv_displ[0] = 0;
154 for (int irank = 1; irank <= nranks; irank++) {
155 send_displ[irank] = send_displ[irank - 1] + send_count[irank - 1];
156 recv_displ[irank] = recv_displ[irank - 1] + recv_count[irank - 1];
157 }
158 const int total_send_count = send_displ[nranks];
159 const int total_recv_count = recv_displ[nranks];
160 double *data_send = dbm_mpi_alloc_mem(total_send_count * sizeof(double));
161 double *data_recv = dbm_mpi_alloc_mem(total_recv_count * sizeof(double));
162
163 // 2nd pass: Fill send_data.
164 int send_data_positions[nranks];
165 memcpy(send_data_positions, send_displ, nranks * sizeof(int));
166 for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
167 dbm_shard_t *shard = &matrix->shards[ishard];
168 for (int iblock = 0; iblock < shard->nblocks; iblock++) {
169 const dbm_block_t *blk = &shard->blocks[iblock];
170 const double *blk_data = &shard->data[blk->offset];
171 const int row_size = matrix->row_sizes[blk->row];
172 const int col_size = matrix->col_sizes[blk->col];
173 const int block_size = row_size * col_size;
174 const int rank = dbm_get_stored_coordinates(redist, blk->row, blk->col);
175 const int pos = send_data_positions[rank];
176 data_send[pos + 0] = blk->row; // send integers as doubles
177 data_send[pos + 1] = blk->col;
178 memcpy(&data_send[pos + 2], blk_data, block_size * sizeof(double));
179 send_data_positions[rank] += 2 + block_size;
180 }
181 }
182 for (int irank = 0; irank < nranks; irank++) {
183 assert(send_data_positions[irank] == send_displ[irank + 1]);
184 }
185
186 // 2nd communication: Exchange data.
187 dbm_mpi_alltoallv_double(data_send, send_count, send_displ, data_recv,
188 recv_count, recv_displ, comm);
189 dbm_mpi_free_mem(data_send);
190
191 // 3rd pass: Unpack data.
192 dbm_clear(redist);
193 int recv_data_pos = 0;
194 while (recv_data_pos < total_recv_count) {
195 const int row = (int)data_recv[recv_data_pos + 0];
196 const int col = (int)data_recv[recv_data_pos + 1];
197 assert(data_recv[recv_data_pos + 0] - (double)row == 0.0);
198 assert(data_recv[recv_data_pos + 1] - (double)col == 0.0);
199 dbm_put_block(redist, row, col, false, &data_recv[recv_data_pos + 2]);
200 const int row_size = matrix->row_sizes[row];
201 const int col_size = matrix->col_sizes[col];
202 const int block_size = row_size * col_size;
203 recv_data_pos += 2 + block_size;
204 }
205 assert(recv_data_pos == total_recv_count);
206 dbm_mpi_free_mem(data_recv);
207}
208
209/*******************************************************************************
210 * \brief Looks up a block from given matrics. This routine is thread-safe.
211 * If the block is not found then a null pointer is returned.
212 * \author Ole Schuett
213 ******************************************************************************/
214void dbm_get_block_p(dbm_matrix_t *matrix, const int row, const int col,
215 double **block, int *row_size, int *col_size) {
216 assert(0 <= row && row < matrix->nrows);
217 assert(0 <= col && col < matrix->ncols);
218 assert(dbm_get_stored_coordinates(matrix, row, col) == matrix->dist->my_rank);
219 *row_size = matrix->row_sizes[row];
220 *col_size = matrix->col_sizes[col];
221 *block = NULL;
222
223 const int ishard = dbm_get_shard_index(matrix, row, col);
224 const dbm_shard_t *shard = &matrix->shards[ishard];
225 dbm_block_t *blk = dbm_shard_lookup(shard, row, col);
226 if (blk != NULL) {
227 *block = &shard->data[blk->offset];
228 }
229}
230
231/*******************************************************************************
232 * \brief Adds a block to given matrix. This routine is thread-safe.
233 * If block already exist then it gets overwritten (or summed).
234 * \author Ole Schuett
235 ******************************************************************************/
236void dbm_put_block(dbm_matrix_t *matrix, const int row, const int col,
237 const bool summation, const double *block) {
238 assert(0 <= row && row < matrix->nrows);
239 assert(0 <= col && col < matrix->ncols);
240 assert(dbm_get_stored_coordinates(matrix, row, col) == matrix->dist->my_rank);
241 const int row_size = matrix->row_sizes[row];
242 const int col_size = matrix->col_sizes[col];
243 const int block_size = row_size * col_size;
244
245 const int ishard = dbm_get_shard_index(matrix, row, col);
246 dbm_shard_t *shard = &matrix->shards[ishard];
247 omp_set_lock(&shard->lock);
248 dbm_block_t *blk =
249 dbm_shard_get_or_allocate_block(shard, row, col, block_size);
250 double *blk_data = &shard->data[blk->offset];
251 if (summation) {
252 assert(blk_data != NULL || 0 == block_size);
253 for (int i = 0; i < block_size; i++) {
254 blk_data[i] += block[i];
255 }
256 } else if (block_size != 0) {
257 memcpy(blk_data, block, block_size * sizeof(double));
258 }
259 omp_unset_lock(&shard->lock);
260}
261
262/*******************************************************************************
263 * \brief Remove all blocks from matrix, but does not release underlying memory.
264 * \author Ole Schuett
265 ******************************************************************************/
266void dbm_clear(dbm_matrix_t *matrix) {
267 assert(omp_get_num_threads() == 1);
268
269#pragma omp parallel for DBM_OMP_SCHEDULE
270 for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
271 dbm_shard_t *shard = &matrix->shards[ishard];
272 shard->nblocks = 0;
273 shard->data_size = 0;
274 shard->data_promised = 0;
275 // Does not deallocate memory, hence data_allocated remains unchanged.
276 memset(shard->hashtable, 0, shard->hashtable_size * sizeof(int));
277 }
278}
279
280/*******************************************************************************
281 * \brief Removes all blocks from the matrix whose norm is below the threshold.
282 * Blocks of size zero are always kept.
283 * \author Ole Schuett
284 ******************************************************************************/
285void dbm_filter(dbm_matrix_t *matrix, const double eps) {
286 assert(omp_get_num_threads() == 1);
287
288 if (eps == 0.0) {
289 return;
290 }
291 const double eps2 = eps * eps;
292
293#pragma omp parallel for DBM_OMP_SCHEDULE
294 for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
295 dbm_shard_t *shard = &matrix->shards[ishard];
296 const int old_nblocks = shard->nblocks;
297 shard->nblocks = 0;
298 shard->data_promised = 0;
299 memset(shard->hashtable, 0, shard->hashtable_size * sizeof(int));
300
301 for (int iblock = 0; iblock < old_nblocks; iblock++) {
302 const dbm_block_t old_blk = shard->blocks[iblock];
303 const double *old_blk_data = &shard->data[old_blk.offset];
304 const int row_size = matrix->row_sizes[old_blk.row];
305 const int col_size = matrix->col_sizes[old_blk.col];
306 const int block_size = row_size * col_size;
307 double norm = 0.0;
308 for (int i = 0; i < block_size; i++) {
309 const double value = old_blk_data[i];
310 norm += value * value;
311 if (eps2 <= norm) {
312 break;
313 }
314 }
315 // For historic reasons zero-sized blocks are never filtered.
316 if (block_size > 0 && norm < eps2) {
317 continue; // filter the block
318 }
319 // Re-create block.
321 shard, old_blk.row, old_blk.col, block_size);
322 assert(new_blk->offset <= old_blk.offset);
323 if (new_blk->offset != old_blk.offset) {
324 // Using memmove because it can handle overlapping buffers.
325 double *new_blk_data = &shard->data[new_blk->offset];
326 memmove(new_blk_data, old_blk_data, block_size * sizeof(double));
327 }
328 }
329 shard->data_size = shard->data_promised;
330 // TODO: Could call realloc to release excess memory.
331 }
332}
333
334/*******************************************************************************
335 * \brief Adds list of blocks efficiently. The blocks will be filled with zeros.
336 * This routine must always be called within an OpenMP parallel region.
337 * \author Ole Schuett
338 ******************************************************************************/
339void dbm_reserve_blocks(dbm_matrix_t *matrix, const int nblocks,
340 const int rows[], const int cols[]) {
341 assert(omp_get_num_threads() == omp_get_max_threads() &&
342 "Please call dbm_reserve_blocks within an OpenMP parallel region.");
343 const int my_rank = matrix->dist->my_rank;
344 for (int i = 0; i < nblocks; i++) {
345 const int ishard = dbm_get_shard_index(matrix, rows[i], cols[i]);
346 dbm_shard_t *shard = &matrix->shards[ishard];
347 omp_set_lock(&shard->lock);
348 assert(0 <= rows[i] && rows[i] < matrix->nrows);
349 assert(0 <= cols[i] && cols[i] < matrix->ncols);
350 assert(dbm_get_stored_coordinates(matrix, rows[i], cols[i]) == my_rank);
351 const int row_size = matrix->row_sizes[rows[i]];
352 const int col_size = matrix->col_sizes[cols[i]];
353 const int block_size = row_size * col_size;
354 dbm_shard_get_or_promise_block(shard, rows[i], cols[i], block_size);
355 omp_unset_lock(&shard->lock);
356 }
357#pragma omp barrier
358
359#pragma omp for DBM_OMP_SCHEDULE
360 for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
361 dbm_shard_t *shard = &matrix->shards[ishard];
363 }
364}
365
366/*******************************************************************************
367 * \brief Multiplies all entries in the given matrix by the given factor alpha.
368 * \author Ole Schuett
369 ******************************************************************************/
370void dbm_scale(dbm_matrix_t *matrix, const double alpha) {
371 assert(omp_get_num_threads() == 1);
372 if (alpha == 1.0) {
373 return;
374 }
375 if (alpha == 0.0) {
376 dbm_zero(matrix);
377 return;
378 }
379
380#pragma omp parallel for DBM_OMP_SCHEDULE
381 for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
382 dbm_shard_t *shard = &matrix->shards[ishard];
383 for (int i = 0; i < shard->data_size; i++) {
384 shard->data[i] *= alpha;
385 }
386 }
387}
388
389/*******************************************************************************
390 * \brief Sets all blocks in the given matrix to zero.
391 * \author Ole Schuett
392 ******************************************************************************/
393void dbm_zero(dbm_matrix_t *matrix) {
394 assert(omp_get_num_threads() == 1);
395
396#pragma omp parallel for DBM_OMP_SCHEDULE
397 for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
398 dbm_shard_t *shard = &matrix->shards[ishard];
399 if (shard->data != NULL) {
400 memset(shard->data, 0, shard->data_size * sizeof(double));
401 }
402 }
403}
404
405/*******************************************************************************
406 * \brief Adds matrix_b to matrix_a.
407 * \author Ole Schuett
408 ******************************************************************************/
409void dbm_add(dbm_matrix_t *matrix_a, const dbm_matrix_t *matrix_b) {
410 assert(omp_get_num_threads() == 1);
411 assert(matrix_a->dist == matrix_b->dist);
412
413#pragma omp parallel for DBM_OMP_SCHEDULE
414 for (int ishard = 0; ishard < dbm_get_num_shards(matrix_b); ishard++) {
415 dbm_shard_t *shard_a = &matrix_a->shards[ishard];
416 const dbm_shard_t *shard_b = &matrix_b->shards[ishard];
417 for (int iblock = 0; iblock < shard_b->nblocks; iblock++) {
418 const dbm_block_t blk_b = shard_b->blocks[iblock];
419 const int row_size = matrix_b->row_sizes[blk_b.row];
420 const int col_size = matrix_b->col_sizes[blk_b.col];
421 assert(row_size == matrix_a->row_sizes[blk_b.row]);
422 assert(col_size == matrix_a->col_sizes[blk_b.col]);
423 const int block_size = row_size * col_size;
425 shard_a, blk_b.row, blk_b.col, block_size);
426 double *data_a = &shard_a->data[blk_a->offset];
427 const double *data_b = &shard_b->data[blk_b.offset];
428 for (int i = 0; i < block_size; i++) {
429 data_a[i] += data_b[i];
430 }
431 }
432 }
433}
434
435/*******************************************************************************
436 * \brief Creates an iterator for the blocks of the given matrix.
437 * The iteration order is not stable.
438 * This routine must always be called within an OpenMP parallel region.
439 * \author Ole Schuett
440 ******************************************************************************/
441void dbm_iterator_start(dbm_iterator_t **iter_out, const dbm_matrix_t *matrix) {
442 assert(omp_get_num_threads() == omp_get_max_threads() &&
443 "Please call dbm_iterator_start within an OpenMP parallel region.");
444 dbm_iterator_t *iter = malloc(sizeof(dbm_iterator_t));
445 assert(iter != NULL);
446 iter->matrix = matrix;
447 iter->next_block = 0;
448 iter->next_shard = omp_get_thread_num();
449 while (iter->next_shard < dbm_get_num_shards(matrix) &&
450 matrix->shards[iter->next_shard].nblocks == 0) {
451 iter->next_shard += omp_get_num_threads();
452 }
453 assert(*iter_out == NULL);
454 *iter_out = iter;
455}
456
457/*******************************************************************************
458 * \brief Returns number of blocks the iterator will provide to calling thread.
459 * \author Ole Schuett
460 ******************************************************************************/
461int dbm_iterator_num_blocks(const dbm_iterator_t *iter) {
462 int num_blocks = 0;
463 for (int ishard = omp_get_thread_num();
464 ishard < dbm_get_num_shards(iter->matrix);
465 ishard += omp_get_num_threads()) {
466 num_blocks += iter->matrix->shards[ishard].nblocks;
467 }
468 return num_blocks;
469}
470
471/*******************************************************************************
472 * \brief Tests whether the given iterator has any block left.
473 * \author Ole Schuett
474 ******************************************************************************/
475bool dbm_iterator_blocks_left(const dbm_iterator_t *iter) {
476 return iter->next_shard < dbm_get_num_shards(iter->matrix);
477}
478
479/*******************************************************************************
480 * \brief Returns the next block from the given iterator.
481 * \author Ole Schuett
482 ******************************************************************************/
483void dbm_iterator_next_block(dbm_iterator_t *iter, int *row, int *col,
484 double **block, int *row_size, int *col_size) {
485 const dbm_matrix_t *matrix = iter->matrix;
486 assert(iter->next_shard < dbm_get_num_shards(matrix));
487 const dbm_shard_t *shard = &matrix->shards[iter->next_shard];
488 assert(iter->next_block < shard->nblocks);
489 dbm_block_t *blk = &shard->blocks[iter->next_block];
490
491 *row = blk->row;
492 *col = blk->col;
493 *row_size = matrix->row_sizes[blk->row];
494 *col_size = matrix->col_sizes[blk->col];
495 *block = &shard->data[blk->offset];
496
497 iter->next_block++;
498 if (iter->next_block >= shard->nblocks) {
499 // Advance to the next non-empty shard...
500 iter->next_shard += omp_get_num_threads();
501 while (iter->next_shard < dbm_get_num_shards(matrix) &&
502 matrix->shards[iter->next_shard].nblocks == 0) {
503 iter->next_shard += omp_get_num_threads();
504 }
505 iter->next_block = 0; // ...and continue with its first block.
506 }
507}
508
509/*******************************************************************************
510 * \brief Releases the given iterator.
511 * \author Ole Schuett
512 ******************************************************************************/
513void dbm_iterator_stop(dbm_iterator_t *iter) { free(iter); }
514
515/*******************************************************************************
516 * \brief Private routine to accumulate using Kahan's summation.
517 * \author Hans Pabst
518 ******************************************************************************/
519static double kahan_sum(double value, double *accumulator,
520 double *compensation) {
521 double r, c;
522 assert(NULL != accumulator && NULL != compensation);
523 c = value - *compensation;
524 r = *accumulator + c;
525 *compensation = (r - *accumulator) - c;
526 *accumulator = r;
527 return r;
528}
529
530/*******************************************************************************
531 * \brief Computes a checksum of the given matrix.
532 * \author Ole Schuett
533 ******************************************************************************/
534double dbm_checksum(const dbm_matrix_t *matrix) {
535 double checksum = 0.0, compensation = 0.0;
536 for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
537 const dbm_shard_t *shard = &matrix->shards[ishard];
538 for (int i = 0; i < shard->data_size; i++) {
539 const double value = shard->data[i];
540 kahan_sum(value * value, &checksum, &compensation);
541 }
542 }
543 dbm_mpi_sum_double(&checksum, 1, matrix->dist->comm);
544 return checksum;
545}
546
547/*******************************************************************************
548 * \brief Returns the largest absolute value of all matrix elements.
549 * \author Ole Schuett
550 ******************************************************************************/
551double dbm_maxabs(const dbm_matrix_t *matrix) {
552 double maxabs = 0.0;
553 for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
554 const dbm_shard_t *shard = &matrix->shards[ishard];
555 for (int i = 0; i < shard->data_size; i++) {
556 maxabs = fmax(maxabs, fabs(shard->data[i]));
557 }
558 }
559 dbm_mpi_max_double(&maxabs, 1, matrix->dist->comm);
560 return maxabs;
561}
562
563/*******************************************************************************
564 * \brief Calculates maximum relative difference between matrix_a and matrix_b.
565 * \author Hans Pabst
566 ******************************************************************************/
567double dbm_maxeps(const dbm_matrix_t *matrix_a, const dbm_matrix_t *matrix_b) {
568 const int num_shards = dbm_get_num_shards(matrix_a);
569 double epsilon = 0;
570
571 assert(omp_get_num_threads() == 1);
572 assert(matrix_a->dist == matrix_b->dist);
573 assert(num_shards == dbm_get_num_shards(matrix_b));
574
575#pragma omp parallel for DBM_OMP_SCHEDULE
576 for (int ishard = 0; ishard < num_shards; ++ishard) {
577 const dbm_shard_t *const shard_a = &matrix_a->shards[ishard];
578 const dbm_shard_t *const shard_b = &matrix_b->shards[ishard];
579 assert(shard_a->nblocks == shard_b->nblocks);
580 for (int iblock = 0; iblock < shard_a->nblocks; ++iblock) {
581 const dbm_block_t *const blk_a = &shard_a->blocks[iblock];
582 const dbm_block_t *const blk_b =
583 dbm_shard_lookup(shard_b, blk_a->row, blk_a->col);
584 const int row_size = matrix_a->row_sizes[blk_a->row];
585 const int col_size = matrix_a->col_sizes[blk_a->col];
586 assert(row_size == matrix_b->row_sizes[blk_b->row]);
587 assert(col_size == matrix_b->col_sizes[blk_b->col]);
588 const double *const data_a = &shard_a->data[blk_a->offset];
589 const double *const data_b = &shard_b->data[blk_b->offset];
590 const int block_size = row_size * col_size;
591 for (int i = 0; i < block_size; ++i) {
592 const double d = data_a[i] - data_b[i];
593 const double e = fabs(0 != data_a[i] ? (d / data_a[i]) : d);
594 if (epsilon < e) {
595 epsilon = e;
596 }
597 }
598 }
599 }
600
601 dbm_mpi_max_double(&epsilon, 1, matrix_a->dist->comm);
602 return epsilon;
603}
604
605/*******************************************************************************
606 * \brief Returns the name of the matrix of the given matrix.
607 * \author Ole Schuett
608 ******************************************************************************/
609const char *dbm_get_name(const dbm_matrix_t *matrix) { return matrix->name; }
610
611/*******************************************************************************
612 * \brief Returns the number of local Non-Zero Elements of the given matrix.
613 * \author Ole Schuett
614 ******************************************************************************/
615int dbm_get_nze(const dbm_matrix_t *matrix) {
616 int nze = 0;
617 for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
618 nze += matrix->shards[ishard].data_size;
619 }
620 return nze;
621}
622
623/*******************************************************************************
624 * \brief Returns the number of local blocks of the given matrix.
625 * \author Ole Schuett
626 ******************************************************************************/
627int dbm_get_num_blocks(const dbm_matrix_t *matrix) {
628 int nblocks = 0;
629 for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
630 nblocks += matrix->shards[ishard].nblocks;
631 }
632 return nblocks;
633}
634
635/*******************************************************************************
636 * \brief Returns the row block sizes of the given matrix.
637 * \author Ole Schuett
638 ******************************************************************************/
639void dbm_get_row_sizes(const dbm_matrix_t *matrix, int *nrows,
640 const int **row_sizes) {
641 *nrows = matrix->nrows;
642 *row_sizes = matrix->row_sizes;
643}
644
645/*******************************************************************************
646 * \brief Returns the column block sizes of the given matrix.
647 * \author Ole Schuett
648 ******************************************************************************/
649void dbm_get_col_sizes(const dbm_matrix_t *matrix, int *ncols,
650 const int **col_sizes) {
651 *ncols = matrix->ncols;
652 *col_sizes = matrix->col_sizes;
653}
654
655/*******************************************************************************
656 * \brief Returns the local row block sizes of the given matrix.
657 * \author Ole Schuett
658 ******************************************************************************/
659void dbm_get_local_rows(const dbm_matrix_t *matrix, int *nlocal_rows,
660 const int **local_rows) {
661 *nlocal_rows = matrix->dist->rows.nlocals;
662 *local_rows = matrix->dist->rows.local_indicies;
663}
664
665/*******************************************************************************
666 * \brief Returns the local column block sizes of the given matrix.
667 * \author Ole Schuett
668 ******************************************************************************/
669void dbm_get_local_cols(const dbm_matrix_t *matrix, int *nlocal_cols,
670 const int **local_cols) {
671 *nlocal_cols = matrix->dist->cols.nlocals;
672 *local_cols = matrix->dist->cols.local_indicies;
673}
674
675/*******************************************************************************
676 * \brief Returns the MPI rank on which the given block should be stored.
677 * \author Ole Schuett
678 ******************************************************************************/
679int dbm_get_stored_coordinates(const dbm_matrix_t *matrix, const int row,
680 const int col) {
681 return dbm_distribution_stored_coords(matrix->dist, row, col);
682}
683
684/*******************************************************************************
685 * \brief Returns the distribution of the given matrix.
686 * \author Ole Schuett
687 ******************************************************************************/
688const dbm_distribution_t *dbm_get_distribution(const dbm_matrix_t *matrix) {
689 return matrix->dist;
690}
691
692// EOF
int dbm_distribution_stored_coords(const dbm_distribution_t *dist, const int row, const int col)
Returns the MPI rank on which the given block should be stored.
void dbm_get_col_sizes(const dbm_matrix_t *matrix, int *ncols, const int **col_sizes)
Returns the column block sizes of the given matrix.
Definition dbm_matrix.c:649
void dbm_get_row_sizes(const dbm_matrix_t *matrix, int *nrows, const int **row_sizes)
Returns the row block sizes of the given matrix.
Definition dbm_matrix.c:639
double dbm_maxeps(const dbm_matrix_t *matrix_a, const dbm_matrix_t *matrix_b)
Calculates maximum relative difference between matrix_a and matrix_b.
Definition dbm_matrix.c:567
static int dbm_get_shard_index(const dbm_matrix_t *matrix, const int row, const int col)
Internal routine for getting a block's shard index.
Definition dbm_matrix.h:246
static int dbm_get_num_shards(const dbm_matrix_t *matrix)
Internal routine that returns the number of shards for given matrix.
Definition dbm_matrix.h:238
void dbm_mpi_free_mem(void *mem)
Wrapper around MPI_Free_mem.
Definition dbm_mpi.c:499
void * dbm_mpi_alloc_mem(size_t size)
Wrapper around MPI_Alloc_mem.
Definition dbm_mpi.c:483
bool dbm_mpi_comms_are_similar(const dbm_mpi_comm_t comm1, const dbm_mpi_comm_t comm2)
Wrapper around MPI_Comm_compare.
Definition dbm_mpi.c:224
int dbm_mpi_comm_size(const dbm_mpi_comm_t comm)
Wrapper around MPI_Comm_size.
Definition dbm_mpi.c:110
void dbm_mpi_sum_double(double *values, const int count, const dbm_mpi_comm_t comm)
Wrapper around MPI_Allreduce for op MPI_SUM and datatype MPI_DOUBLE.
Definition dbm_mpi.c:347
void dbm_mpi_max_double(double *values, const int count, const dbm_mpi_comm_t comm)
Wrapper around MPI_Allreduce for op MPI_MAX and datatype MPI_DOUBLE.
Definition dbm_mpi.c:283
void dbm_mpi_alltoall_int(const int *sendbuf, const int sendcount, int *recvbuf, const int recvcount, const dbm_mpi_comm_t comm)
Wrapper around MPI_Alltoall for datatype MPI_INT.
Definition dbm_mpi.c:429
void dbm_mpi_alltoallv_double(const double *sendbuf, const int *sendcounts, const int *sdispls, double *recvbuf, const int *recvcounts, const int *rdispls, const dbm_mpi_comm_t comm)
Wrapper around MPI_Alltoallv for datatype MPI_DOUBLE.
Definition dbm_mpi.c:464
int dbm_mpi_comm_t
Definition dbm_mpi.h:19
void dbm_shard_release(dbm_shard_t *shard)
Internal routine for releasing a shard.
Definition dbm_shard.c:128
dbm_block_t * dbm_shard_get_or_allocate_block(dbm_shard_t *shard, const int row, const int col, const int block_size)
Internal routine for getting block or allocating a new one.
Definition dbm_shard.c:279
dbm_block_t * dbm_shard_promise_new_block(dbm_shard_t *shard, const int row, const int col, const int block_size)
Internal routine for allocating the metadata of a new block.
Definition dbm_shard.c:200
dbm_block_t * dbm_shard_lookup(const dbm_shard_t *shard, const int row, const int col)
Internal routine for looking up a block from a shard.
Definition dbm_shard.c:178
void dbm_shard_allocate_promised_blocks(dbm_shard_t *shard)
Internal routine for allocating and zeroing any promised block's data.
Definition dbm_shard.c:234
dbm_block_t * dbm_shard_get_or_promise_block(dbm_shard_t *shard, const int row, const int col, const int block_size)
Internal routine for getting block or promising a new one.
Definition dbm_shard.c:264
void dbm_shard_init(dbm_shard_t *shard)
Internal routine for initializing a shard.
Definition dbm_shard.c:64
void dbm_shard_copy(dbm_shard_t *shard_a, const dbm_shard_t *shard_b)
Internal routine for copying content of shard_b into shard_a.
Definition dbm_shard.c:80
static void const int const int i
sums arrays of real/complex numbers with much reduced round-off as compared to a naive implementation...
Definition kahan_sum.F:29
Internal struct for storing a block's metadata.
Definition dbm_shard.h:20
Internal struct for storing a two dimensional distribution.
dbm_mpi_comm_t comm
Internal struct for storing a block iterator.
Definition dbm_matrix.h:35
const dbm_matrix_t * matrix
Definition dbm_matrix.h:36
Internal struct for storing a matrix.
Definition dbm_matrix.h:20
int * row_sizes
Definition dbm_matrix.h:25
char * name
Definition dbm_matrix.h:22
int * col_sizes
Definition dbm_matrix.h:26
dbm_shard_t * shards
Definition dbm_matrix.h:28
dbm_distribution_t * dist
Definition dbm_matrix.h:21
Internal struct for storing a matrix shard.
Definition dbm_shard.h:30
double * data
Definition dbm_shard.h:43
int * hashtable
Definition dbm_shard.h:37
omp_lock_t lock
Definition dbm_shard.h:45
dbm_block_t * blocks
Definition dbm_shard.h:33
int data_size
Definition dbm_shard.h:42
int hashtable_size
Definition dbm_shard.h:35
int data_promised
Definition dbm_shard.h:39