13 USE omp_lib,
ONLY: omp_get_num_threads,&
50#include "../../base/base_uses.f90"
55 CHARACTER(len=*),
PARAMETER,
PRIVATE :: moduleN =
'dbt_tas_reshape_ops'
63 INTEGER :: nblock = -1
64 INTEGER(KIND=int_8),
DIMENSION(:, :),
ALLOCATABLE :: indx
65 REAL(dp),
DIMENSION(:),
ALLOCATABLE :: msg
66 INTEGER :: endpos = -1
80 RECURSIVE SUBROUTINE dbt_tas_reshape(matrix_in, matrix_out, summation, transposed, move_data)
81 TYPE(
dbt_tas_type),
INTENT(INOUT) :: matrix_in, matrix_out
82 LOGICAL,
INTENT(IN),
OPTIONAL :: summation, transposed, move_data
84 CHARACTER(LEN=*),
PARAMETER :: routinen =
'dbt_tas_reshape'
86 INTEGER :: a, b, bcount, handle, handle2, iproc, &
87 nblk, nblk_per_thread, ndata, numnodes
88 INTEGER(KIND=int_8),
ALLOCATABLE,
DIMENSION(:, :) :: blks_to_allocate, index_recv
89 INTEGER(KIND=int_8),
DIMENSION(2) :: blk_index
90 INTEGER(kind=omp_lock_kind),
ALLOCATABLE, &
92 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: num_blocks_recv, num_blocks_send, &
93 num_entries_recv, num_entries_send, &
95 INTEGER,
DIMENSION(2) :: blk_size
96 LOGICAL :: move_prv, tr_in
97 REAL(kind=
dp),
DIMENSION(:, :),
POINTER :: block
98 TYPE(dbt_buffer_type),
ALLOCATABLE,
DIMENSION(:) :: buffer_recv, buffer_send
103 DIMENSION(:, :) :: req_array
105 CALL timeset(routinen, handle)
107 IF (
PRESENT(summation))
THEN
108 IF (.NOT. summation)
CALL dbm_clear(matrix_out%matrix)
113 IF (
PRESENT(move_data))
THEN
119 IF (
PRESENT(transposed))
THEN
125 IF (.NOT. matrix_out%valid)
THEN
126 cpabort(
"can not reshape into invalid matrix")
130 mp_comm = info%mp_comm
131 numnodes = mp_comm%num_pe
132 ALLOCATE (buffer_send(0:numnodes - 1))
133 ALLOCATE (buffer_recv(0:numnodes - 1))
134 ALLOCATE (num_blocks_recv(0:numnodes - 1))
135 ALLOCATE (num_blocks_send(0:numnodes - 1))
136 ALLOCATE (num_entries_recv(0:numnodes - 1))
137 ALLOCATE (num_entries_send(0:numnodes - 1))
138 ALLOCATE (num_rec(0:2*numnodes - 1))
139 ALLOCATE (num_send(0:2*numnodes - 1))
141 ALLOCATE (req_array(1:numnodes, 4))
142 ALLOCATE (locks(0:numnodes - 1))
143 DO iproc = 0, numnodes - 1
144 CALL omp_init_lock(locks(iproc))
147 CALL timeset(routinen//
"_get_coord", handle2)
153 row_size=blk_size(1), col_size=blk_size(2))
160 num_send(2*iproc) = num_send(2*iproc) + product(blk_size)
162 num_send(2*iproc + 1) = num_send(2*iproc + 1) + 1
166 CALL timestop(handle2)
168 CALL timeset(routinen//
"_alltoall", handle2)
169 CALL mp_comm%alltoall(num_send, num_rec, 2)
170 CALL timestop(handle2)
172 CALL timeset(routinen//
"_buffer_fill", handle2)
173 DO iproc = 0, numnodes - 1
174 num_entries_recv(iproc) = num_rec(2*iproc)
175 num_blocks_recv(iproc) = num_rec(2*iproc + 1)
176 num_entries_send(iproc) = num_send(2*iproc)
177 num_blocks_send(iproc) = num_send(2*iproc + 1)
179 CALL dbt_buffer_create(buffer_send(iproc), num_blocks_send(iproc), num_entries_send(iproc))
181 CALL dbt_buffer_create(buffer_recv(iproc), num_blocks_recv(iproc), num_entries_recv(iproc))
190 row_size=blk_size(1), col_size=blk_size(2))
196 CALL omp_set_lock(locks(iproc))
197 CALL dbt_buffer_add_block(buffer_send(iproc), blk_index, block, transposed=tr_in)
198 CALL omp_unset_lock(locks(iproc))
205 CALL timestop(handle2)
207 CALL timeset(routinen//
"_communicate_buffer", handle2)
208 CALL dbt_tas_communicate_buffer(mp_comm, buffer_recv, buffer_send, req_array)
210 DO iproc = 0, numnodes - 1
211 CALL dbt_buffer_destroy(buffer_send(iproc))
214 CALL timestop(handle2)
216 CALL timeset(routinen//
"_buffer_obtain", handle2)
219 nblk = sum(num_blocks_recv)
220 ALLOCATE (blks_to_allocate(nblk, 2))
223 DO iproc = 0, numnodes - 1
224 CALL dbt_buffer_get_index(buffer_recv(iproc), index_recv)
225 blks_to_allocate(bcount + 1:bcount +
SIZE(index_recv, 1), :) = index_recv(:, :)
226 bcount = bcount +
SIZE(index_recv, 1)
227 DEALLOCATE (index_recv)
232 nblk_per_thread = nblk/omp_get_num_threads() + 1
233 a = omp_get_thread_num()*nblk_per_thread + 1
234 b = min(a + nblk_per_thread, nblk)
237 DEALLOCATE (blks_to_allocate)
239 DO iproc = 0, numnodes - 1
241 DO WHILE (dbt_buffer_blocks_left(buffer_recv(iproc)))
242 CALL dbt_buffer_get_next_block(buffer_recv(iproc), ndata, blk_index)
243 CALL dbt_tas_blk_sizes(matrix_out, blk_index(1), blk_index(2), blk_size(1), blk_size(2))
244 ALLOCATE (block(blk_size(1), blk_size(2)))
245 CALL dbt_buffer_get_next_block(buffer_recv(iproc), ndata, blk_index, block)
246 CALL dbt_tas_put_block(matrix_out, blk_index(1), blk_index(2), block, summation=summation)
249 CALL dbt_buffer_destroy(buffer_recv(iproc))
252 CALL timestop(handle2)
256 CALL timestop(handle)
269 TYPE(
dbm_type),
INTENT(INOUT) :: matrix_in
272 LOGICAL,
INTENT(IN),
OPTIONAL :: nodata, move_data
274 INTEGER :: a, b, nblk_per_thread, nblkcols, nblkrows
275 INTEGER,
DIMENSION(2) :: pdims
276 INTEGER,
DIMENSION(:),
POINTER :: col_blk_size, col_dist, row_blk_size, &
287 INTEGER :: numnodes, ngroup
288 INTEGER(kind=omp_lock_kind),
ALLOCATABLE,
DIMENSION(:) :: locks
289 TYPE(dbt_buffer_type),
ALLOCATABLE,
DIMENSION(:) :: buffer_recv, buffer_send
290 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: num_blocks_recv, num_blocks_send, &
291 num_entries_recv, num_entries_send, &
294 INTEGER,
ALLOCATABLE,
DIMENSION(:, :) :: blks_to_allocate
295 INTEGER,
DIMENSION(2) :: blk_size
296 INTEGER,
DIMENSION(2) :: blk_index
297 INTEGER(KIND=int_8),
DIMENSION(2) :: blk_index_i8
299 INTEGER :: i, iproc, bcount, nblk
300 INTEGER,
DIMENSION(:),
ALLOCATABLE :: iprocs
301 LOGICAL :: nodata_prv, move_prv
302 INTEGER(KIND=int_8),
ALLOCATABLE,
DIMENSION(:, :) :: index_recv
306 REAL(kind=
dp),
DIMENSION(:, :),
POINTER :: block
308 CHARACTER(LEN=*),
PARAMETER :: routinen =
'dbt_tas_replicate'
310 INTEGER :: handle, handle2
312 NULLIFY (col_blk_size, row_blk_size)
314 CALL timeset(routinen, handle)
316 IF (
PRESENT(nodata))
THEN
322 IF (
PRESENT(move_data))
THEN
330 nblkrows =
SIZE(row_blk_size)
331 nblkcols =
SIZE(col_blk_size)
336 mp_comm = info%mp_comm
339 numnodes = mp_comm%num_pe
340 pdims = mp_comm%num_pe_cart
342 SELECT CASE (info%split_rowcol)
344 repl_dist =
dbt_tas_dist_repl(row_dist, pdims(1), nblkrows, info%ngroup, info%pgrid_split_size)
348 ALLOCATE (row_dist_obj, source=repl_dist)
349 ALLOCATE (col_dist_obj, source=dir_dist)
350 ALLOCATE (row_bsize_obj, source=repl_blksize)
351 ALLOCATE (col_bsize_obj, source=dir_blksize)
354 repl_dist =
dbt_tas_dist_repl(col_dist, pdims(2), nblkcols, info%ngroup, info%pgrid_split_size)
357 ALLOCATE (row_dist_obj, source=dir_dist)
358 ALLOCATE (col_dist_obj, source=repl_dist)
359 ALLOCATE (row_bsize_obj, source=dir_blksize)
360 ALLOCATE (col_bsize_obj, source=repl_blksize)
365 dist, row_bsize_obj, col_bsize_obj, own_dist=.true.)
369 CALL timestop(handle)
373 ALLOCATE (buffer_send(0:numnodes - 1))
374 ALLOCATE (buffer_recv(0:numnodes - 1))
375 ALLOCATE (num_blocks_recv(0:numnodes - 1))
376 ALLOCATE (num_blocks_send(0:numnodes - 1))
377 ALLOCATE (num_entries_recv(0:numnodes - 1))
378 ALLOCATE (num_entries_send(0:numnodes - 1))
379 ALLOCATE (num_rec(0:2*numnodes - 1))
380 ALLOCATE (num_send(0:2*numnodes - 1))
382 ALLOCATE (req_array(1:numnodes, 4))
383 ALLOCATE (locks(0:numnodes - 1))
384 DO iproc = 0, numnodes - 1
385 CALL omp_init_lock(locks(iproc))
390 ALLOCATE (iprocs(ngroup))
394 row_size=blk_size(1), col_size=blk_size(2))
396 DO i = 1,
SIZE(iprocs)
398 num_send(2*iprocs(i)) = num_send(2*iprocs(i)) + product(blk_size)
400 num_send(2*iprocs(i) + 1) = num_send(2*iprocs(i) + 1) + 1
407 CALL timeset(routinen//
"_alltoall", handle2)
408 CALL mp_comm%alltoall(num_send, num_rec, 2)
409 CALL timestop(handle2)
411 DO iproc = 0, numnodes - 1
412 num_entries_recv(iproc) = num_rec(2*iproc)
413 num_blocks_recv(iproc) = num_rec(2*iproc + 1)
414 num_entries_send(iproc) = num_send(2*iproc)
415 num_blocks_send(iproc) = num_send(2*iproc + 1)
417 CALL dbt_buffer_create(buffer_send(iproc), num_blocks_send(iproc), num_entries_send(iproc))
419 CALL dbt_buffer_create(buffer_recv(iproc), num_blocks_recv(iproc), num_entries_recv(iproc))
425 ALLOCATE (iprocs(ngroup))
429 row_size=blk_size(1), col_size=blk_size(2))
431 DO i = 1,
SIZE(iprocs)
432 CALL omp_set_lock(locks(iprocs(i)))
433 CALL dbt_buffer_add_block(buffer_send(iprocs(i)), int(blk_index, kind=
int_8), block)
434 CALL omp_unset_lock(locks(iprocs(i)))
443 CALL timeset(routinen//
"_communicate_buffer", handle2)
444 CALL dbt_tas_communicate_buffer(mp_comm, buffer_recv, buffer_send, req_array)
446 DO iproc = 0, numnodes - 1
447 CALL dbt_buffer_destroy(buffer_send(iproc))
450 CALL timestop(handle2)
453 nblk = sum(num_blocks_recv)
454 ALLOCATE (blks_to_allocate(nblk, 2))
457 DO iproc = 0, numnodes - 1
458 CALL dbt_buffer_get_index(buffer_recv(iproc), index_recv)
459 blks_to_allocate(bcount + 1:bcount +
SIZE(index_recv, 1), :) = int(index_recv(:, :))
460 bcount = bcount +
SIZE(index_recv, 1)
461 DEALLOCATE (index_recv)
466 nblk_per_thread = nblk/omp_get_num_threads() + 1
467 a = omp_get_thread_num()*nblk_per_thread + 1
468 b = min(a + nblk_per_thread, nblk)
469 CALL dbm_reserve_blocks(matrix_out%matrix, blks_to_allocate(a:b, 1), blks_to_allocate(a:b, 2))
471 DEALLOCATE (blks_to_allocate)
473 DO iproc = 0, numnodes - 1
475 DO WHILE (dbt_buffer_blocks_left(buffer_recv(iproc)))
476 CALL dbt_buffer_get_next_block(buffer_recv(iproc), ndata, blk_index_i8)
477 CALL dbt_tas_blk_sizes(matrix_out, blk_index_i8(1), blk_index_i8(2), blk_size(1), blk_size(2))
478 ALLOCATE (block(blk_size(1), blk_size(2)))
479 CALL dbt_buffer_get_next_block(buffer_recv(iproc), ndata, blk_index_i8, block)
480 CALL dbm_put_block(matrix_out%matrix, int(blk_index_i8(1)), int(blk_index_i8(2)), block)
484 CALL dbt_buffer_destroy(buffer_recv(iproc))
489 CALL timestop(handle)
502 TYPE(
dbm_type),
INTENT(INOUT) :: matrix_out
504 LOGICAL,
INTENT(IN),
OPTIONAL :: summation, move_data
506 CHARACTER(LEN=*),
PARAMETER :: routinen =
'dbt_tas_merge'
508 INTEGER :: a, b, bcount, handle, handle2, iproc, &
509 nblk, nblk_per_thread, ndata, numnodes
510 INTEGER(KIND=int_8),
ALLOCATABLE,
DIMENSION(:, :) :: index_recv
511 INTEGER(KIND=int_8),
DIMENSION(2) :: blk_index_i8
512 INTEGER(kind=omp_lock_kind),
ALLOCATABLE, &
513 DIMENSION(:) :: locks
514 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: num_blocks_recv, num_blocks_send, &
515 num_entries_recv, num_entries_send, &
517 INTEGER,
ALLOCATABLE,
DIMENSION(:, :) :: blks_to_allocate
518 INTEGER,
DIMENSION(2) :: blk_index, blk_size
519 INTEGER,
DIMENSION(:),
POINTER :: col_block_sizes, row_block_sizes
521 REAL(
dp),
DIMENSION(:, :),
POINTER :: block
523 TYPE(dbt_buffer_type),
ALLOCATABLE,
DIMENSION(:) :: buffer_recv, buffer_send
527 DIMENSION(:, :) :: req_array
531 CALL timeset(routinen, handle)
533 IF (
PRESENT(summation))
THEN
534 IF (.NOT. summation)
CALL dbm_clear(matrix_out)
539 IF (
PRESENT(move_data))
THEN
547 numnodes = mp_comm%num_pe
549 ALLOCATE (buffer_send(0:numnodes - 1))
550 ALLOCATE (buffer_recv(0:numnodes - 1))
551 ALLOCATE (num_blocks_recv(0:numnodes - 1))
552 ALLOCATE (num_blocks_send(0:numnodes - 1))
553 ALLOCATE (num_entries_recv(0:numnodes - 1))
554 ALLOCATE (num_entries_send(0:numnodes - 1))
555 ALLOCATE (num_rec(0:2*numnodes - 1))
556 ALLOCATE (num_send(0:2*numnodes - 1))
558 ALLOCATE (req_array(1:numnodes, 4))
559 ALLOCATE (locks(0:numnodes - 1))
560 DO iproc = 0, numnodes - 1
561 CALL omp_init_lock(locks(iproc))
569 row_size=blk_size(1), col_size=blk_size(2))
572 num_send(2*iproc) = num_send(2*iproc) + product(blk_size)
574 num_send(2*iproc + 1) = num_send(2*iproc + 1) + 1
579 CALL timeset(routinen//
"_alltoall", handle2)
580 CALL mp_comm%alltoall(num_send, num_rec, 2)
581 CALL timestop(handle2)
583 DO iproc = 0, numnodes - 1
584 num_entries_recv(iproc) = num_rec(2*iproc)
585 num_blocks_recv(iproc) = num_rec(2*iproc + 1)
586 num_entries_send(iproc) = num_send(2*iproc)
587 num_blocks_send(iproc) = num_send(2*iproc + 1)
589 CALL dbt_buffer_create(buffer_send(iproc), num_blocks_send(iproc), num_entries_send(iproc))
591 CALL dbt_buffer_create(buffer_recv(iproc), num_blocks_recv(iproc), num_entries_recv(iproc))
600 row_size=blk_size(1), col_size=blk_size(2))
602 CALL omp_set_lock(locks(iproc))
603 CALL dbt_buffer_add_block(buffer_send(iproc), int(blk_index, kind=
int_8), block)
604 CALL omp_unset_lock(locks(iproc))
611 CALL timeset(routinen//
"_communicate_buffer", handle2)
612 CALL dbt_tas_communicate_buffer(mp_comm, buffer_recv, buffer_send, req_array)
614 DO iproc = 0, numnodes - 1
615 CALL dbt_buffer_destroy(buffer_send(iproc))
618 CALL timestop(handle2)
621 nblk = sum(num_blocks_recv)
622 ALLOCATE (blks_to_allocate(nblk, 2))
625 DO iproc = 0, numnodes - 1
626 CALL dbt_buffer_get_index(buffer_recv(iproc), index_recv)
627 blks_to_allocate(bcount + 1:bcount +
SIZE(index_recv, 1), :) = int(index_recv(:, :))
628 bcount = bcount +
SIZE(index_recv, 1)
629 DEALLOCATE (index_recv)
634 nblk_per_thread = nblk/omp_get_num_threads() + 1
635 a = omp_get_thread_num()*nblk_per_thread + 1
636 b = min(a + nblk_per_thread, nblk)
637 CALL dbm_reserve_blocks(matrix_out, blks_to_allocate(a:b, 1), blks_to_allocate(a:b, 2))
639 DEALLOCATE (blks_to_allocate)
641 DO iproc = 0, numnodes - 1
643 DO WHILE (dbt_buffer_blocks_left(buffer_recv(iproc)))
644 CALL dbt_buffer_get_next_block(buffer_recv(iproc), ndata, blk_index_i8)
647 blk_size(1) = row_block_sizes(int(blk_index_i8(1)))
648 blk_size(2) = col_block_sizes(int(blk_index_i8(2)))
649 ALLOCATE (block(blk_size(1), blk_size(2)))
650 CALL dbt_buffer_get_next_block(buffer_recv(iproc), ndata, blk_index_i8, block)
651 CALL dbm_put_block(matrix_out, int(blk_index_i8(1)), int(blk_index_i8(2)), block, summation=.true.)
654 CALL dbt_buffer_destroy(buffer_recv(iproc))
659 CALL timestop(handle)
668 SUBROUTINE dbt_buffer_get_index(buffer, index)
669 TYPE(dbt_buffer_type),
INTENT(IN) :: buffer
670 INTEGER(KIND=int_8),
ALLOCATABLE, &
671 DIMENSION(:, :),
INTENT(OUT) :: index
673 CHARACTER(LEN=*),
PARAMETER :: routinen =
'dbt_buffer_get_index'
676 INTEGER,
DIMENSION(2) :: indx_shape
678 CALL timeset(routinen, handle)
680 indx_shape = shape(buffer%indx) - [0, 1]
681 ALLOCATE (index(indx_shape(1), indx_shape(2)))
682 index(:, :) = buffer%indx(1:indx_shape(1), 1:indx_shape(2))
683 CALL timestop(handle)
692 PURE FUNCTION dbt_buffer_blocks_left(buffer)
693 TYPE(dbt_buffer_type),
INTENT(IN) :: buffer
694 LOGICAL :: dbt_buffer_blocks_left
696 dbt_buffer_blocks_left = buffer%endpos .LT. buffer%nblock
706 SUBROUTINE dbt_buffer_create(buffer, nblock, ndata)
707 TYPE(dbt_buffer_type),
INTENT(OUT) :: buffer
708 INTEGER,
INTENT(IN) :: nblock, ndata
710 buffer%nblock = nblock
712 ALLOCATE (buffer%msg(ndata))
713 ALLOCATE (buffer%indx(nblock, 3))
721 SUBROUTINE dbt_buffer_destroy(buffer)
722 TYPE(dbt_buffer_type),
INTENT(INOUT) :: buffer
724 DEALLOCATE (buffer%msg)
725 DEALLOCATE (buffer%indx)
728 END SUBROUTINE dbt_buffer_destroy
738 SUBROUTINE dbt_buffer_add_block(buffer, index, block, transposed)
739 TYPE(dbt_buffer_type),
INTENT(INOUT) :: buffer
740 INTEGER(KIND=int_8),
DIMENSION(2),
INTENT(IN) :: index
741 REAL(
dp),
DIMENSION(:, :),
INTENT(IN) :: block
742 LOGICAL,
INTENT(IN),
OPTIONAL :: transposed
744 INTEGER :: ndata, p, p_data
745 INTEGER(KIND=int_8),
DIMENSION(2) :: index_prv
748 IF (
PRESENT(transposed))
THEN
754 index_prv(:) = index(:)
758 ndata = product(shape(block))
764 p_data = int(buffer%indx(p, 3))
768 buffer%msg(p_data + 1:p_data + ndata) = reshape(transpose(block), [ndata])
770 buffer%msg(p_data + 1:p_data + ndata) = reshape(block, [ndata])
773 buffer%indx(p + 1, 1:2) = index_prv(:)
775 buffer%indx(p + 1, 3) = buffer%indx(p, 3) + int(ndata, kind=
int_8)
777 buffer%indx(p + 1, 3) = int(ndata, kind=
int_8)
779 buffer%endpos = buffer%endpos + 1
791 SUBROUTINE dbt_buffer_get_next_block(buffer, ndata, index, block, advance_iter)
792 TYPE(dbt_buffer_type),
INTENT(INOUT) :: buffer
793 INTEGER,
INTENT(OUT) :: ndata
794 INTEGER(KIND=int_8),
DIMENSION(2),
INTENT(OUT) :: index
795 REAL(
dp),
DIMENSION(:, :),
INTENT(OUT),
OPTIONAL :: block
796 LOGICAL,
INTENT(IN),
OPTIONAL :: advance_iter
799 LOGICAL :: do_advance
802 IF (
PRESENT(advance_iter))
THEN
803 do_advance = advance_iter
804 ELSE IF (
PRESENT(block))
THEN
812 p_data = int(buffer%indx(p, 3))
816 ndata = int(buffer%indx(p + 1, 3) - buffer%indx(p, 3))
818 ndata = int(buffer%indx(p + 1, 3))
820 index(:) = buffer%indx(p + 1, 1:2)
822 IF (
PRESENT(block))
THEN
823 block(:, :) = reshape(buffer%msg(p_data + 1:p_data + ndata), shape(block))
826 IF (do_advance) buffer%endpos = buffer%endpos + 1
837 SUBROUTINE dbt_tas_communicate_buffer(mp_comm, buffer_recv, buffer_send, req_array)
839 TYPE(dbt_buffer_type),
DIMENSION(0:), &
840 INTENT(INOUT) :: buffer_recv, buffer_send
842 INTENT(OUT) :: req_array
844 CHARACTER(LEN=*),
PARAMETER :: routinen =
'dbt_tas_communicate_buffer'
846 INTEGER :: handle, iproc, numnodes, &
847 rec_counter, send_counter
849 CALL timeset(routinen, handle)
850 numnodes = mp_comm%num_pe
852 IF (numnodes > 1)
THEN
857 DO iproc = 0, numnodes - 1
858 IF (buffer_recv(iproc)%nblock > 0)
THEN
859 rec_counter = rec_counter + 1
860 CALL mp_comm%irecv(buffer_recv(iproc)%indx, iproc, req_array(rec_counter, 3), tag=4)
861 CALL mp_comm%irecv(buffer_recv(iproc)%msg, iproc, req_array(rec_counter, 4), tag=7)
865 DO iproc = 0, numnodes - 1
866 IF (buffer_send(iproc)%nblock > 0)
THEN
867 send_counter = send_counter + 1
868 CALL mp_comm%isend(buffer_send(iproc)%indx, iproc, req_array(send_counter, 1), tag=4)
869 CALL mp_comm%isend(buffer_send(iproc)%msg, iproc, req_array(send_counter, 2), tag=7)
873 IF (send_counter > 0)
THEN
874 CALL mp_waitall(req_array(1:send_counter, 1:2))
876 IF (rec_counter > 0)
THEN
877 CALL mp_waitall(req_array(1:rec_counter, 3:4))
881 IF (buffer_recv(0)%nblock > 0)
THEN
882 buffer_recv(0)%indx(:, :) = buffer_send(0)%indx(:, :)
883 buffer_recv(0)%msg(:) = buffer_send(0)%msg(:)
886 CALL timestop(handle)
subroutine, public dbm_clear(matrix)
Remove all blocks from given matrix, but does not release the underlying memory.
subroutine, public dbm_iterator_next_block(iterator, row, column, block, row_size, col_size)
Returns the next block from the given iterator.
subroutine, public dbm_get_stored_coordinates(matrix, row, column, processor)
Returns the MPI rank on which the given block should be stored.
type(dbm_distribution_obj) function, public dbm_get_distribution(matrix)
Returns the distribution of the given matrix.
integer function, dimension(:), pointer, contiguous, public dbm_get_row_block_sizes(matrix)
Returns the row block sizes of the given matrix.
logical function, public dbm_iterator_blocks_left(iterator)
Tests whether the given iterator has any block left.
integer function, dimension(:), pointer, contiguous, public dbm_distribution_col_dist(dist)
Returns the columns of the given distribution.
subroutine, public dbm_reserve_blocks(matrix, rows, cols)
Adds given list of blocks efficiently. The blocks will be filled with zeros.
subroutine, public dbm_iterator_stop(iterator)
Releases the given iterator.
subroutine, public dbm_put_block(matrix, row, col, block, summation)
Adds a block to given matrix. This routine is thread-safe. If block already exist then it gets overwr...
character(len=default_string_length) function, public dbm_get_name(matrix)
Returns the name of the matrix of the given matrix.
subroutine, public dbm_finalize(matrix)
Needed to be called for DBCSR after blocks where inserted. For DBM it's a no-opt.
subroutine, public dbm_iterator_start(iterator, matrix)
Creates an iterator for the blocks of the given matrix. The iteration order is not stable.
integer function, dimension(:), pointer, contiguous, public dbm_get_col_block_sizes(matrix)
Returns the column block sizes of the given matrix.
integer function, dimension(:), pointer, contiguous, public dbm_distribution_row_dist(dist)
Returns the rows of the given distribution.
Tall-and-skinny matrices: base routines similar to DBM API, mostly wrappers around existing DBM routi...
subroutine, public dbt_tas_blk_sizes(matrix, row, col, row_size, col_size)
Get block size for a given row & column.
subroutine, public dbt_tas_get_stored_coordinates(matrix, row, column, processor)
As dbt_get_stored_coordinates.
subroutine, public dbt_tas_iterator_start(iter, matrix_in)
As dbm_iterator_start.
logical function, public dbt_tas_iterator_blocks_left(iter)
As dbm_iterator_blocks_left.
subroutine, public dbt_repl_get_stored_coordinates(matrix, row, column, processors)
Get all processors for a given row/col combination if matrix is replicated on each process subgroup.
subroutine, public dbt_tas_iterator_stop(iter)
As dbm_iterator_stop.
subroutine, public dbt_tas_finalize(matrix)
...
subroutine, public dbt_tas_distribution_new(dist, mp_comm, row_dist, col_dist, split_info, nosplit)
create new distribution. Exactly like dbm_distribution_new but with custom types for row_dist and col...
subroutine, public dbt_tas_clear(matrix)
Clear matrix (erase all data)
type(dbt_tas_split_info) function, pointer, public dbt_tas_info(matrix)
get info on mpi grid splitting
subroutine, public dbt_tas_put_block(matrix, row, col, block, summation)
As dbm_put_block.
Global data (distribution and block sizes) for tall-and-skinny matrices For very sparse matrices with...
communication routines to reshape / replicate / merge tall-and-skinny matrices.
subroutine, public dbt_tas_merge(matrix_out, matrix_in, summation, move_data)
Merge submatrices of matrix_in to matrix_out by sum.
subroutine, public dbt_tas_replicate(matrix_in, info, matrix_out, nodata, move_data)
Replicate matrix_in such that each submatrix of matrix_out is an exact copy of matrix_in.
recursive subroutine, public dbt_tas_reshape(matrix_in, matrix_out, summation, transposed, move_data)
copy data (involves reshape)
methods to split tall-and-skinny matrices along longest dimension. Basically, we are splitting proces...
integer, parameter, public rowsplit
subroutine, public dbt_tas_get_split_info(info, mp_comm, nsplit, igroup, mp_comm_group, split_rowcol, pgrid_offset)
Get info on split.
integer, parameter, public colsplit
DBT tall-and-skinny base types. Mostly wrappers around existing DBM routines.
often used utilities for tall-and-skinny matrices
Defines the basic variable types.
integer, parameter, public int_8
integer, parameter, public dp
Interface to the message passing library MPI.
type for arbitrary block sizes
type for replicated block sizes
type for arbitrary distributions
type for replicated distribution