(git:9ca3469)
Loading...
Searching...
No Matches
dbm_matrix.c
Go to the documentation of this file.
1/*----------------------------------------------------------------------------*/
2/* CP2K: A general program to perform molecular dynamics simulations */
3/* Copyright 2000-2026 CP2K developers group <https://cp2k.org> */
4/* */
5/* SPDX-License-Identifier: BSD-3-Clause */
6/*----------------------------------------------------------------------------*/
7#include "dbm_matrix.h"
8#include "dbm_hyperparams.h"
9
10#include <assert.h>
11#include <math.h>
12#include <omp.h>
13#include <stdbool.h>
14#include <stddef.h>
15#include <stdlib.h>
16#include <string.h>
17
18/*******************************************************************************
19 * \brief Creates a new matrix.
20 * \author Ole Schuett
21 ******************************************************************************/
22void dbm_create(dbm_matrix_t **matrix_out, dbm_distribution_t *dist,
23 const char name[], const int nrows, const int ncols,
24 const int row_sizes[nrows], const int col_sizes[ncols]) {
25 assert(omp_get_num_threads() == 1);
26
27 dbm_matrix_t *const matrix = calloc(1, sizeof(dbm_matrix_t));
28
29 assert(dist->rows.length == nrows);
30 assert(dist->cols.length == ncols);
31 dbm_distribution_hold(dist);
32 matrix->dist = dist;
33
34 size_t size = (strlen(name) + 1) * sizeof(char);
35 matrix->name = malloc(size);
36 assert(matrix->name != NULL && name != NULL);
37 memcpy(matrix->name, name, size);
38
39 matrix->nrows = nrows;
40 matrix->ncols = ncols;
41
42 size = nrows * sizeof(int);
43 matrix->row_sizes = malloc(size);
44 assert(matrix->row_sizes != NULL || size == 0);
45 if (size != 0) {
46 memcpy(matrix->row_sizes, row_sizes, size);
47 }
48
49 size = ncols * sizeof(int);
50 matrix->col_sizes = malloc(size);
51 assert(matrix->col_sizes != NULL || size == 0);
52 if (size != 0) {
53 memcpy(matrix->col_sizes, col_sizes, size);
54 }
55
56 const int num_shards = dbm_get_num_shards(matrix);
57 matrix->shards = malloc(num_shards * sizeof(dbm_shard_t));
58 assert(matrix->shards != NULL || num_shards == 0);
59#pragma omp parallel for
60 for (int ishard = 0; ishard < num_shards; ishard++) {
61 dbm_shard_init(&matrix->shards[ishard]);
62 }
63
64 assert(*matrix_out == NULL);
65 *matrix_out = matrix;
66}
67
68/*******************************************************************************
69 * \brief Releases a matrix and all its ressources.
70 * \author Ole Schuett
71 ******************************************************************************/
72void dbm_release(dbm_matrix_t *matrix) {
73 assert(omp_get_num_threads() == 1);
74 for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
75 dbm_shard_release(&matrix->shards[ishard]);
76 }
77 dbm_distribution_release(matrix->dist);
78 free(matrix->row_sizes);
79 free(matrix->col_sizes);
80 free(matrix->shards);
81 free(matrix->name);
82 free(matrix);
83}
84
85/*******************************************************************************
86 * \brief Copies content of matrix_b into matrix_a.
87 * Matrices must have the same row/col block sizes and distribution.
88 * \author Ole Schuett
89 ******************************************************************************/
90void dbm_copy(dbm_matrix_t *matrix_a, const dbm_matrix_t *matrix_b) {
91 assert(omp_get_num_threads() == 1);
92
93 assert(matrix_b->nrows == matrix_a->nrows);
94 for (int i = 0; i < matrix_b->nrows; i++) {
95 assert(matrix_b->row_sizes[i] == matrix_a->row_sizes[i]);
96 }
97 assert(matrix_b->ncols == matrix_a->ncols);
98 for (int i = 0; i < matrix_b->ncols; i++) {
99 assert(matrix_b->col_sizes[i] == matrix_a->col_sizes[i]);
100 }
101
102 assert(matrix_a->dist == matrix_b->dist);
103
104#pragma omp parallel for DBM_OMP_SCHEDULE
105 for (int ishard = 0; ishard < dbm_get_num_shards(matrix_a); ishard++) {
106 dbm_shard_copy(&matrix_a->shards[ishard], &matrix_b->shards[ishard]);
107 }
108}
109
110/*******************************************************************************
111 * \brief Copies content of matrix_b into matrix_a.
112 * Matrices may have different distributions.
113 * \author Ole Schuett
114 ******************************************************************************/
115void dbm_redistribute(const dbm_matrix_t *matrix, dbm_matrix_t *redist) {
116 assert(omp_get_num_threads() == 1);
117 assert(matrix->nrows == redist->nrows);
118 for (int i = 0; i < matrix->nrows; i++) {
119 assert(matrix->row_sizes[i] == redist->row_sizes[i]);
120 }
121 assert(matrix->ncols == redist->ncols);
122 for (int i = 0; i < matrix->ncols; i++) {
123 assert(matrix->col_sizes[i] == redist->col_sizes[i]);
124 }
125
126 assert(cp_mpi_comms_are_similar(matrix->dist->comm, redist->dist->comm));
127 const cp_mpi_comm_t comm = redist->dist->comm;
128 const int nranks = cp_mpi_comm_size(comm);
129
130 // 1st pass: Compute send_count.
131 int send_count[nranks];
132 memset(send_count, 0, nranks * sizeof(int));
133 for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
134 const dbm_shard_t *const shard = &matrix->shards[ishard];
135 for (int iblock = 0; iblock < shard->nblocks; iblock++) {
136 const dbm_block_t *const blk = &shard->blocks[iblock];
137 const int row_size = matrix->row_sizes[blk->row];
138 const int col_size = matrix->col_sizes[blk->col];
139 const int block_size = row_size * col_size;
140 const int rank = dbm_get_stored_coordinates(redist, blk->row, blk->col);
141 assert(0 <= rank && rank < nranks);
142 send_count[rank] += 2 + block_size;
143 }
144 }
145
146 // 1st communication: Exchange counts.
147 int recv_count[nranks];
148 cp_mpi_alltoall_int(send_count, 1, recv_count, 1, comm);
149
150 // Compute displacements and allocate data buffers.
151 int send_displ[nranks + 1], recv_displ[nranks + 1];
152 send_displ[0] = recv_displ[0] = 0;
153 for (int irank = 1; irank <= nranks; irank++) {
154 send_displ[irank] = send_displ[irank - 1] + send_count[irank - 1];
155 recv_displ[irank] = recv_displ[irank - 1] + recv_count[irank - 1];
156 }
157 const int total_send_count = send_displ[nranks];
158 const int total_recv_count = recv_displ[nranks];
159 double *const data_send = cp_mpi_alloc_mem(total_send_count * sizeof(double));
160 double *const data_recv = cp_mpi_alloc_mem(total_recv_count * sizeof(double));
161
162 // 2nd pass: Fill send_data.
163 int send_data_positions[nranks];
164 memcpy(send_data_positions, send_displ, nranks * sizeof(int));
165 for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
166 const dbm_shard_t *const shard = &matrix->shards[ishard];
167 for (int iblock = 0; iblock < shard->nblocks; iblock++) {
168 const dbm_block_t *const blk = &shard->blocks[iblock];
169 const double *const blk_data = &shard->data[blk->offset];
170 const int row_size = matrix->row_sizes[blk->row];
171 const int col_size = matrix->col_sizes[blk->col];
172 const int block_size = row_size * col_size;
173 const int rank = dbm_get_stored_coordinates(redist, blk->row, blk->col);
174 const int pos = send_data_positions[rank];
175 data_send[pos + 0] = blk->row; // send integers as doubles
176 data_send[pos + 1] = blk->col;
177 memcpy(&data_send[pos + 2], blk_data, block_size * sizeof(double));
178 send_data_positions[rank] += 2 + block_size;
179 }
180 }
181 for (int irank = 0; irank < nranks; irank++) {
182 assert(send_data_positions[irank] == send_displ[irank + 1]);
183 }
184
185 // 2nd communication: Exchange data.
186 cp_mpi_alltoallv_double(data_send, send_count, send_displ, data_recv,
187 recv_count, recv_displ, comm);
188 cp_mpi_free_mem(data_send);
189
190 // 3rd pass: Unpack data.
191 dbm_clear(redist);
192 int recv_data_pos = 0;
193 while (recv_data_pos < total_recv_count) {
194 const int row = (int)data_recv[recv_data_pos + 0];
195 const int col = (int)data_recv[recv_data_pos + 1];
196 assert(data_recv[recv_data_pos + 0] == (double)row);
197 assert(data_recv[recv_data_pos + 1] == (double)col);
198 dbm_put_block(redist, row, col, false, &data_recv[recv_data_pos + 2]);
199 const int row_size = matrix->row_sizes[row];
200 const int col_size = matrix->col_sizes[col];
201 const int block_size = row_size * col_size;
202 recv_data_pos += 2 + block_size;
203 }
204 assert(recv_data_pos == total_recv_count);
205 cp_mpi_free_mem(data_recv);
206}
207
208/*******************************************************************************
209 * \brief Looks up a block from given matrics. This routine is thread-safe.
210 * If the block is not found then a null pointer is returned.
211 * \author Ole Schuett
212 ******************************************************************************/
213void dbm_get_block_p(dbm_matrix_t *matrix, const int row, const int col,
214 double **block, int *row_size, int *col_size) {
215 assert(0 <= row && row < matrix->nrows);
216 assert(0 <= col && col < matrix->ncols);
217 assert(dbm_get_stored_coordinates(matrix, row, col) == matrix->dist->my_rank);
218 *row_size = matrix->row_sizes[row];
219 *col_size = matrix->col_sizes[col];
220 *block = NULL;
221
222 const int ishard = dbm_get_shard_index(matrix, row, col);
223 const dbm_shard_t *const shard = &matrix->shards[ishard];
224 const dbm_block_t *const blk = dbm_shard_lookup(shard, row, col);
225 if (blk != NULL) {
226 *block = &shard->data[blk->offset];
227 }
228}
229
230/*******************************************************************************
231 * \brief Adds a block to given matrix. This routine is thread-safe.
232 * If block already exist then it gets overwritten (or summed).
233 * \author Ole Schuett
234 ******************************************************************************/
235void dbm_put_block(dbm_matrix_t *matrix, const int row, const int col,
236 const bool summation, const double *block) {
237 assert(0 <= row && row < matrix->nrows);
238 assert(0 <= col && col < matrix->ncols);
239 assert(dbm_get_stored_coordinates(matrix, row, col) == matrix->dist->my_rank);
240 const int row_size = matrix->row_sizes[row];
241 const int col_size = matrix->col_sizes[col];
242 const int block_size = row_size * col_size;
243
244 const int ishard = dbm_get_shard_index(matrix, row, col);
245 dbm_shard_t *const shard = &matrix->shards[ishard];
246 omp_set_lock(&shard->lock);
247 const dbm_block_t *const blk =
248 dbm_shard_get_or_allocate_block(shard, row, col, block_size);
249 double *const blk_data = &shard->data[blk->offset];
250 if (summation) {
251 assert(blk_data != NULL || 0 == block_size);
252 for (int i = 0; i < block_size; i++) {
253 blk_data[i] += block[i];
254 }
255 } else if (block_size != 0) {
256 memcpy(blk_data, block, block_size * sizeof(double));
257 }
258 omp_unset_lock(&shard->lock);
259}
260
261/*******************************************************************************
262 * \brief Remove all blocks from matrix, but does not release underlying memory.
263 * \author Ole Schuett
264 ******************************************************************************/
265void dbm_clear(dbm_matrix_t *matrix) {
266 assert(omp_get_num_threads() == 1);
267
268#pragma omp parallel for DBM_OMP_SCHEDULE
269 for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
270 dbm_shard_t *const shard = &matrix->shards[ishard];
271 shard->data_promised = shard->data_size = shard->nblocks = 0;
272 // Does not deallocate memory, hence data_allocated remains unchanged.
273 memset(shard->hashtable, 0, shard->hashtable_size * sizeof(int));
274 }
275}
276
277/*******************************************************************************
278 * \brief Removes all blocks from the matrix whose norm is below the threshold.
279 * Blocks of size zero are always kept.
280 * \author Ole Schuett
281 ******************************************************************************/
282void dbm_filter(dbm_matrix_t *matrix, const double eps) {
283 assert(omp_get_num_threads() == 1);
284
285 if (eps == 0.0) {
286 return;
287 }
288 const double eps2 = eps * eps;
289
290#pragma omp parallel for DBM_OMP_SCHEDULE
291 for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
292 dbm_shard_t *const shard = &matrix->shards[ishard];
293 const int old_nblocks = shard->nblocks;
294 shard->nblocks = shard->data_promised = 0;
295 memset(shard->hashtable, 0, shard->hashtable_size * sizeof(int));
296
297 for (int iblock = 0; iblock < old_nblocks; iblock++) {
298 const dbm_block_t old_blk = shard->blocks[iblock];
299 const double *old_blk_data = &shard->data[old_blk.offset];
300 const int row_size = matrix->row_sizes[old_blk.row];
301 const int col_size = matrix->col_sizes[old_blk.col];
302 const int block_size = row_size * col_size;
303 double norm = 0.0;
304 for (int i = 0; i < block_size; i++) {
305 const double value = old_blk_data[i];
306 norm += value * value;
307 if (eps2 <= norm) {
308 break;
309 }
310 }
311 // For historic reasons zero-sized blocks are never filtered.
312 if (block_size > 0 && norm < eps2) {
313 continue; // filter the block
314 }
315 // Re-create block.
317 shard, old_blk.row, old_blk.col, block_size);
318 assert(new_blk->offset <= old_blk.offset);
319 if (new_blk->offset != old_blk.offset) {
320 // Using memmove because it can handle overlapping buffers.
321 double *new_blk_data = &shard->data[new_blk->offset];
322 memmove(new_blk_data, old_blk_data, block_size * sizeof(double));
323 }
324 }
325 shard->data_size = shard->data_promised;
326 // TODO: Could call realloc to release excess memory.
327 }
328}
329
330/*******************************************************************************
331 * \brief Adds list of blocks efficiently. The blocks will be filled with zeros.
332 * This routine must always be called within an OpenMP parallel region.
333 * \author Ole Schuett
334 ******************************************************************************/
335void dbm_reserve_blocks(dbm_matrix_t *matrix, const int nblocks,
336 const int rows[], const int cols[]) {
337 assert(omp_get_num_threads() == omp_get_max_threads() &&
338 "Please call dbm_reserve_blocks within an OpenMP parallel region.");
339 const int my_rank = matrix->dist->my_rank;
340
341 for (int i = 0; i < nblocks; i++) {
342 const int row = rows[i], col = cols[i];
343 assert(0 <= row && row < matrix->nrows);
344 assert(0 <= col && col < matrix->ncols);
345 assert(dbm_get_stored_coordinates(matrix, row, col) == my_rank);
346 const int ishard = dbm_get_shard_index(matrix, row, col);
347 dbm_shard_t *const shard = &matrix->shards[ishard];
348 const int row_size = matrix->row_sizes[row];
349 const int col_size = matrix->col_sizes[col];
350 const int block_size = row_size * col_size;
351 omp_set_lock(&shard->lock);
352 dbm_shard_get_or_promise_block(shard, row, col, block_size);
353 omp_unset_lock(&shard->lock);
354 }
355#pragma omp barrier
356
357#pragma omp for DBM_OMP_SCHEDULE
358 for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
359 dbm_shard_t *const shard = &matrix->shards[ishard];
361 }
362}
363
364/*******************************************************************************
365 * \brief Multiplies all entries in the given matrix by the given factor alpha.
366 * \author Ole Schuett
367 ******************************************************************************/
368void dbm_scale(dbm_matrix_t *matrix, const double alpha) {
369 assert(omp_get_num_threads() == 1);
370 if (alpha == 1.0) {
371 return;
372 }
373 if (alpha == 0.0) {
374 dbm_zero(matrix);
375 return;
376 }
377
378#pragma omp parallel for DBM_OMP_SCHEDULE
379 for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
380 dbm_shard_t *const shard = &matrix->shards[ishard];
381 for (int i = 0; i < shard->data_size; i++) {
382 shard->data[i] *= alpha;
383 }
384 }
385}
386
387/*******************************************************************************
388 * \brief Sets all blocks in the given matrix to zero.
389 * \author Ole Schuett
390 ******************************************************************************/
391void dbm_zero(dbm_matrix_t *matrix) {
392 assert(omp_get_num_threads() == 1);
393
394#pragma omp parallel for DBM_OMP_SCHEDULE
395 for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
396 dbm_shard_t *const shard = &matrix->shards[ishard];
397 if (shard->data != NULL) {
398 memset(shard->data, 0, shard->data_size * sizeof(double));
399 }
400 }
401}
402
403/*******************************************************************************
404 * \brief Adds matrix_b to matrix_a.
405 * \author Ole Schuett
406 ******************************************************************************/
407void dbm_add(dbm_matrix_t *matrix_a, const dbm_matrix_t *matrix_b) {
408 assert(omp_get_num_threads() == 1);
409 assert(matrix_a->dist == matrix_b->dist);
410
411#pragma omp parallel for DBM_OMP_SCHEDULE
412 for (int ishard = 0; ishard < dbm_get_num_shards(matrix_b); ishard++) {
413 dbm_shard_t *const shard_a = &matrix_a->shards[ishard];
414 const dbm_shard_t *shard_b = &matrix_b->shards[ishard];
415 for (int iblock = 0; iblock < shard_b->nblocks; iblock++) {
416 const dbm_block_t blk_b = shard_b->blocks[iblock];
417 const int row_size = matrix_b->row_sizes[blk_b.row];
418 const int col_size = matrix_b->col_sizes[blk_b.col];
419 assert(row_size == matrix_a->row_sizes[blk_b.row]);
420 assert(col_size == matrix_a->col_sizes[blk_b.col]);
421 const int block_size = row_size * col_size;
422 const dbm_block_t *const blk_a = dbm_shard_get_or_allocate_block(
423 shard_a, blk_b.row, blk_b.col, block_size);
424 double *const data_a = &shard_a->data[blk_a->offset];
425 const double *const data_b = &shard_b->data[blk_b.offset];
426 for (int i = 0; i < block_size; i++) {
427 data_a[i] += data_b[i];
428 }
429 }
430 }
431}
432
433/*******************************************************************************
434 * \brief Creates an iterator for the blocks of the given matrix.
435 * The iteration order is not stable.
436 * This routine must always be called within an OpenMP parallel region.
437 * \author Ole Schuett
438 ******************************************************************************/
439void dbm_iterator_start(dbm_iterator_t **iter_out, const dbm_matrix_t *matrix) {
440 assert(omp_get_num_threads() == omp_get_max_threads() &&
441 "Please call dbm_iterator_start within an OpenMP parallel region.");
442 dbm_iterator_t *iter = malloc(sizeof(dbm_iterator_t));
443 assert(iter != NULL);
444 iter->matrix = matrix;
445 iter->next_block = 0;
446 iter->next_shard = omp_get_thread_num();
447 while (iter->next_shard < dbm_get_num_shards(matrix) &&
448 matrix->shards[iter->next_shard].nblocks == 0) {
449 iter->next_shard += omp_get_num_threads();
450 }
451 assert(*iter_out == NULL);
452 *iter_out = iter;
453}
454
455/*******************************************************************************
456 * \brief Returns number of blocks the iterator will provide to calling thread.
457 * \author Ole Schuett
458 ******************************************************************************/
459int dbm_iterator_num_blocks(const dbm_iterator_t *iter) {
460 int num_blocks = 0;
461 for (int ishard = omp_get_thread_num();
462 ishard < dbm_get_num_shards(iter->matrix);
463 ishard += omp_get_num_threads()) {
464 num_blocks += iter->matrix->shards[ishard].nblocks;
465 }
466 return num_blocks;
467}
468
469/*******************************************************************************
470 * \brief Tests whether the given iterator has any block left.
471 * \author Ole Schuett
472 ******************************************************************************/
473bool dbm_iterator_blocks_left(const dbm_iterator_t *iter) {
474 return iter->next_shard < dbm_get_num_shards(iter->matrix);
475}
476
477/*******************************************************************************
478 * \brief Returns the next block from the given iterator.
479 * \author Ole Schuett
480 ******************************************************************************/
481void dbm_iterator_next_block(dbm_iterator_t *iter, int *row, int *col,
482 double **block, int *row_size, int *col_size) {
483 const dbm_matrix_t *matrix = iter->matrix;
484 assert(iter->next_shard < dbm_get_num_shards(matrix));
485 const dbm_shard_t *shard = &matrix->shards[iter->next_shard];
486 assert(iter->next_block < shard->nblocks);
487 dbm_block_t *blk = &shard->blocks[iter->next_block];
488
489 *row = blk->row;
490 *col = blk->col;
491 *row_size = matrix->row_sizes[blk->row];
492 *col_size = matrix->col_sizes[blk->col];
493 *block = &shard->data[blk->offset];
494
495 iter->next_block++;
496 if (iter->next_block >= shard->nblocks) {
497 // Advance to the next non-empty shard...
498 iter->next_shard += omp_get_num_threads();
499 while (iter->next_shard < dbm_get_num_shards(matrix) &&
500 matrix->shards[iter->next_shard].nblocks == 0) {
501 iter->next_shard += omp_get_num_threads();
502 }
503 iter->next_block = 0; // ...and continue with its first block.
504 }
505}
506
507/*******************************************************************************
508 * \brief Releases the given iterator.
509 * \author Ole Schuett
510 ******************************************************************************/
511void dbm_iterator_stop(dbm_iterator_t *iter) { free(iter); }
512
513/*******************************************************************************
514 * \brief Private routine to accumulate using Kahan's summation.
515 * \author Hans Pabst
516 ******************************************************************************/
517static double kahan_sum(double value, double *accumulator,
518 double *compensation) {
519 double r, c;
520 assert(NULL != accumulator && NULL != compensation);
521 c = value - *compensation;
522 r = *accumulator + c;
523 *compensation = (r - *accumulator) - c;
524 *accumulator = r;
525 return r;
526}
527
528/*******************************************************************************
529 * \brief Computes a checksum of the given matrix.
530 * \author Ole Schuett
531 ******************************************************************************/
532double dbm_checksum(const dbm_matrix_t *matrix) {
533 double checksum = 0.0, compensation = 0.0;
534 for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
535 const dbm_shard_t *shard = &matrix->shards[ishard];
536 for (int i = 0; i < shard->data_size; i++) {
537 const double value = shard->data[i];
538 kahan_sum(value * value, &checksum, &compensation);
539 }
540 }
541 cp_mpi_sum_double(&checksum, 1, matrix->dist->comm);
542 return checksum;
543}
544
545/*******************************************************************************
546 * \brief Returns the largest absolute value of all matrix elements.
547 * \author Ole Schuett
548 ******************************************************************************/
549double dbm_maxabs(const dbm_matrix_t *matrix) {
550 double maxabs = 0.0;
551 for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
552 const dbm_shard_t *shard = &matrix->shards[ishard];
553 for (int i = 0; i < shard->data_size; i++) {
554 maxabs = fmax(maxabs, fabs(shard->data[i]));
555 }
556 }
557 cp_mpi_max_double(&maxabs, 1, matrix->dist->comm);
558 return maxabs;
559}
560
561/*******************************************************************************
562 * \brief Calculates maximum relative difference between matrix_a and matrix_b.
563 * \author Hans Pabst
564 ******************************************************************************/
565double dbm_maxeps(const dbm_matrix_t *matrix_a, const dbm_matrix_t *matrix_b) {
566 const int num_shards = dbm_get_num_shards(matrix_a);
567 double epsilon = 0;
568
569 assert(omp_get_num_threads() == 1);
570 assert(matrix_a->dist == matrix_b->dist);
571 assert(num_shards == dbm_get_num_shards(matrix_b));
572
573#pragma omp parallel for reduction(max : epsilon) DBM_OMP_SCHEDULE
574 for (int ishard = 0; ishard < num_shards; ++ishard) {
575 const dbm_shard_t *const shard_a = &matrix_a->shards[ishard];
576 const dbm_shard_t *const shard_b = &matrix_b->shards[ishard];
577 for (int iblock = 0; iblock < shard_a->nblocks; ++iblock) {
578 const dbm_block_t *const blk_a = &shard_a->blocks[iblock];
579 const dbm_block_t *const blk_b =
580 dbm_shard_lookup(shard_b, blk_a->row, blk_a->col);
581 const int row_size = matrix_a->row_sizes[blk_a->row];
582 const int col_size = matrix_a->col_sizes[blk_a->col];
583 assert(NULL == blk_b || row_size == matrix_b->row_sizes[blk_b->row]);
584 assert(NULL == blk_b || col_size == matrix_b->col_sizes[blk_b->col]);
585 const double *const data_a = &shard_a->data[blk_a->offset];
586 const double *const data_b =
587 (NULL == blk_b) ? NULL : &shard_b->data[blk_b->offset];
588 const int block_size = row_size * col_size;
589 for (int i = 0; i < block_size; ++i) {
590 const double d = data_a[i] - (NULL == data_b ? 0.0 : data_b[i]);
591 const double e = fabs(0 != data_a[i] ? (d / data_a[i]) : d);
592 if (epsilon < e) {
593 epsilon = e;
594 }
595 }
596 }
597 for (int iblock = 0; iblock < shard_b->nblocks; ++iblock) {
598 const dbm_block_t *const blk_b = &shard_b->blocks[iblock];
599 if (NULL != dbm_shard_lookup(shard_a, blk_b->row, blk_b->col)) {
600 continue;
601 }
602 const int row_size = matrix_b->row_sizes[blk_b->row];
603 const int col_size = matrix_b->col_sizes[blk_b->col];
604 const double *const data_b = &shard_b->data[blk_b->offset];
605 const int block_size = row_size * col_size;
606 for (int i = 0; i < block_size; ++i) {
607 const double e = fabs(data_b[i]);
608 if (epsilon < e) {
609 epsilon = e;
610 }
611 }
612 }
613 }
614
615 cp_mpi_max_double(&epsilon, 1, matrix_a->dist->comm);
616 return epsilon;
617}
618
619/*******************************************************************************
620 * \brief Returns the name of the matrix of the given matrix.
621 * \author Ole Schuett
622 ******************************************************************************/
623const char *dbm_get_name(const dbm_matrix_t *matrix) { return matrix->name; }
624
625/*******************************************************************************
626 * \brief Returns the number of local Non-Zero Elements of the given matrix.
627 * \author Ole Schuett
628 ******************************************************************************/
629int dbm_get_nze(const dbm_matrix_t *matrix) {
630 int nze = 0;
631 for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
632 nze += matrix->shards[ishard].data_size;
633 }
634 return nze;
635}
636
637/*******************************************************************************
638 * \brief Returns the number of local blocks of the given matrix.
639 * \author Ole Schuett
640 ******************************************************************************/
641int dbm_get_num_blocks(const dbm_matrix_t *matrix) {
642 int nblocks = 0;
643 for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
644 nblocks += matrix->shards[ishard].nblocks;
645 }
646 return nblocks;
647}
648
649/*******************************************************************************
650 * \brief Returns the row block sizes of the given matrix.
651 * \author Ole Schuett
652 ******************************************************************************/
653void dbm_get_row_sizes(const dbm_matrix_t *matrix, int *nrows,
654 const int **row_sizes) {
655 *nrows = matrix->nrows;
656 *row_sizes = matrix->row_sizes;
657}
658
659/*******************************************************************************
660 * \brief Returns the column block sizes of the given matrix.
661 * \author Ole Schuett
662 ******************************************************************************/
663void dbm_get_col_sizes(const dbm_matrix_t *matrix, int *ncols,
664 const int **col_sizes) {
665 *ncols = matrix->ncols;
666 *col_sizes = matrix->col_sizes;
667}
668
669/*******************************************************************************
670 * \brief Returns the local row block sizes of the given matrix.
671 * \author Ole Schuett
672 ******************************************************************************/
673void dbm_get_local_rows(const dbm_matrix_t *matrix, int *nlocal_rows,
674 const int **local_rows) {
675 *nlocal_rows = matrix->dist->rows.nlocals;
676 *local_rows = matrix->dist->rows.local_indicies;
677}
678
679/*******************************************************************************
680 * \brief Returns the local column block sizes of the given matrix.
681 * \author Ole Schuett
682 ******************************************************************************/
683void dbm_get_local_cols(const dbm_matrix_t *matrix, int *nlocal_cols,
684 const int **local_cols) {
685 *nlocal_cols = matrix->dist->cols.nlocals;
686 *local_cols = matrix->dist->cols.local_indicies;
687}
688
689/*******************************************************************************
690 * \brief Returns the MPI rank on which the given block should be stored.
691 * \author Ole Schuett
692 ******************************************************************************/
693int dbm_get_stored_coordinates(const dbm_matrix_t *matrix, const int row,
694 const int col) {
695 return dbm_distribution_stored_coords(matrix->dist, row, col);
696}
697
698/*******************************************************************************
699 * \brief Returns the distribution of the given matrix.
700 * \author Ole Schuett
701 ******************************************************************************/
702const dbm_distribution_t *dbm_get_distribution(const dbm_matrix_t *matrix) {
703 return matrix->dist;
704}
705
706// EOF
int cp_mpi_comm_size(const cp_mpi_comm_t comm)
Wrapper around MPI_Comm_size.
Definition cp_mpi.c:113
void cp_mpi_free_mem(void *mem)
Wrapper around MPI_Free_mem.
Definition cp_mpi.c:541
void cp_mpi_sum_double(double *values, const int count, const cp_mpi_comm_t comm)
Wrapper around MPI_Allreduce for op MPI_SUM and datatype MPI_DOUBLE.
Definition cp_mpi.c:387
void cp_mpi_alltoallv_double(const double *sendbuf, const int *sendcounts, const int *sdispls, double *recvbuf, const int *recvcounts, const int *rdispls, const cp_mpi_comm_t comm)
Wrapper around MPI_Alltoallv for datatype MPI_DOUBLE.
Definition cp_mpi.c:506
void * cp_mpi_alloc_mem(size_t size)
Wrapper around MPI_Alloc_mem.
Definition cp_mpi.c:525
void cp_mpi_max_double(double *values, const int count, const cp_mpi_comm_t comm)
Wrapper around MPI_Allreduce for op MPI_MAX and datatype MPI_DOUBLE.
Definition cp_mpi.c:293
void cp_mpi_alltoall_int(const int *sendbuf, const int sendcount, int *recvbuf, const int recvcount, const cp_mpi_comm_t comm)
Wrapper around MPI_Alltoall for datatype MPI_INT.
Definition cp_mpi.c:471
bool cp_mpi_comms_are_similar(const cp_mpi_comm_t comm1, const cp_mpi_comm_t comm2)
Wrapper around MPI_Comm_compare.
Definition cp_mpi.c:229
int cp_mpi_comm_t
Definition cp_mpi.h:18
int dbm_distribution_stored_coords(const dbm_distribution_t *dist, const int row, const int col)
Returns the MPI rank on which the given block should be stored.
void dbm_get_col_sizes(const dbm_matrix_t *matrix, int *ncols, const int **col_sizes)
Returns the column block sizes of the given matrix.
Definition dbm_matrix.c:663
void dbm_get_row_sizes(const dbm_matrix_t *matrix, int *nrows, const int **row_sizes)
Returns the row block sizes of the given matrix.
Definition dbm_matrix.c:653
double dbm_maxeps(const dbm_matrix_t *matrix_a, const dbm_matrix_t *matrix_b)
Calculates maximum relative difference between matrix_a and matrix_b.
Definition dbm_matrix.c:565
static int dbm_get_shard_index(const dbm_matrix_t *matrix, const int row, const int col)
Internal routine for getting a block's shard index.
Definition dbm_matrix.h:245
static int dbm_get_num_shards(const dbm_matrix_t *matrix)
Internal routine that returns the number of shards for given matrix.
Definition dbm_matrix.h:237
void dbm_shard_release(dbm_shard_t *shard)
Internal routine for releasing a shard.
Definition dbm_shard.c:128
dbm_block_t * dbm_shard_get_or_allocate_block(dbm_shard_t *shard, const int row, const int col, const int block_size)
Internal routine for getting block or allocating a new one.
Definition dbm_shard.c:278
dbm_block_t * dbm_shard_promise_new_block(dbm_shard_t *shard, const int row, const int col, const int block_size)
Internal routine for allocating the metadata of a new block.
Definition dbm_shard.c:200
dbm_block_t * dbm_shard_lookup(const dbm_shard_t *shard, const int row, const int col)
Internal routine for looking up a block from a shard.
Definition dbm_shard.c:178
void dbm_shard_allocate_promised_blocks(dbm_shard_t *shard)
Internal routine for allocating and zeroing any promised block's data.
Definition dbm_shard.c:234
dbm_block_t * dbm_shard_get_or_promise_block(dbm_shard_t *shard, const int row, const int col, const int block_size)
Internal routine for getting block or promising a new one.
Definition dbm_shard.c:263
void dbm_shard_init(dbm_shard_t *shard)
Internal routine for initializing a shard.
Definition dbm_shard.c:64
void dbm_shard_copy(dbm_shard_t *shard_a, const dbm_shard_t *shard_b)
Internal routine for copying content of shard_b into shard_a.
Definition dbm_shard.c:80
static void const int const int i
sums arrays of real/complex numbers with much reduced round-off as compared to a naive implementation...
Definition kahan_sum.F:29
Internal struct for storing a block's metadata.
Definition dbm_shard.h:20
Internal struct for storing a two dimensional distribution.
Internal struct for storing a block iterator.
Definition dbm_matrix.h:34
const dbm_matrix_t * matrix
Definition dbm_matrix.h:35
Internal struct for storing a matrix.
Definition dbm_matrix.h:19
int * row_sizes
Definition dbm_matrix.h:24
char * name
Definition dbm_matrix.h:21
int * col_sizes
Definition dbm_matrix.h:25
dbm_shard_t * shards
Definition dbm_matrix.h:27
dbm_distribution_t * dist
Definition dbm_matrix.h:20
Internal struct for storing a matrix shard.
Definition dbm_shard.h:30
double * data
Definition dbm_shard.h:42
int * hashtable
Definition dbm_shard.h:37
omp_lock_t lock
Definition dbm_shard.h:44
dbm_block_t * blocks
Definition dbm_shard.h:33
int data_size
Definition dbm_shard.h:41
int hashtable_size
Definition dbm_shard.h:35
int data_promised
Definition dbm_shard.h:39