13 USE omp_lib,
ONLY: omp_destroy_lock,&
52#include "../../base/base_uses.f90"
57 CHARACTER(len=*),
PARAMETER,
PRIVATE :: moduleN =
'dbt_tas_reshape_ops'
65 INTEGER :: nblock = -1
66 INTEGER(KIND=int_8),
DIMENSION(:, :),
ALLOCATABLE :: indx
67 REAL(dp),
DIMENSION(:),
ALLOCATABLE :: msg
68 INTEGER :: endpos = -1
69 END TYPE dbt_buffer_type
82 RECURSIVE SUBROUTINE dbt_tas_reshape(matrix_in, matrix_out, summation, transposed, move_data)
83 TYPE(
dbt_tas_type),
INTENT(INOUT) :: matrix_in, matrix_out
84 LOGICAL,
INTENT(IN),
OPTIONAL :: summation, transposed, move_data
86 CHARACTER(LEN=*),
PARAMETER :: routinen =
'dbt_tas_reshape'
88 INTEGER :: a, b, bcount, handle, handle2, iproc, &
89 nblk, nblk_per_thread, ndata, numnodes
90 INTEGER(KIND=int_8),
ALLOCATABLE,
DIMENSION(:, :) :: blks_to_allocate, index_recv
91 INTEGER(KIND=int_8),
DIMENSION(2) :: blk_index
92 INTEGER(kind=omp_lock_kind),
ALLOCATABLE, &
94 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: num_blocks_recv, num_blocks_send, &
95 num_entries_recv, num_entries_send, &
97 INTEGER,
DIMENSION(2) :: blk_size
98 LOGICAL :: move_prv, tr_in
99 REAL(kind=
dp),
DIMENSION(:, :),
POINTER :: block
100 TYPE(dbt_buffer_type),
ALLOCATABLE,
DIMENSION(:) :: buffer_recv, buffer_send
105 DIMENSION(:, :) :: req_array
107 CALL timeset(routinen, handle)
109 IF (
PRESENT(summation))
THEN
110 IF (.NOT. summation)
CALL dbm_clear(matrix_out%matrix)
115 IF (
PRESENT(move_data))
THEN
121 IF (
PRESENT(transposed))
THEN
127 IF (.NOT. matrix_out%valid)
THEN
128 cpabort(
"can not reshape into invalid matrix")
132 mp_comm = info%mp_comm
133 numnodes = mp_comm%num_pe
134 ALLOCATE (buffer_send(0:numnodes - 1))
135 ALLOCATE (buffer_recv(0:numnodes - 1))
136 ALLOCATE (num_blocks_recv(0:numnodes - 1))
137 ALLOCATE (num_blocks_send(0:numnodes - 1))
138 ALLOCATE (num_entries_recv(0:numnodes - 1))
139 ALLOCATE (num_entries_send(0:numnodes - 1))
140 ALLOCATE (num_rec(0:2*numnodes - 1))
141 ALLOCATE (num_send(0:2*numnodes - 1), source=0)
142 ALLOCATE (req_array(1:numnodes, 4))
143 ALLOCATE (locks(0:numnodes - 1))
144 DO iproc = 0, numnodes - 1
145 CALL omp_init_lock(locks(iproc))
148 CALL timeset(routinen//
"_get_coord", handle2)
154 row_size=blk_size(1), col_size=blk_size(2))
160 CALL omp_set_lock(locks(iproc))
161 num_send(2*iproc) = num_send(2*iproc) + product(blk_size)
162 num_send(2*iproc + 1) = num_send(2*iproc + 1) + 1
163 CALL omp_unset_lock(locks(iproc))
167 CALL timestop(handle2)
169 CALL timeset(routinen//
"_alltoall", handle2)
170 CALL mp_comm%alltoall(num_send, num_rec, 2)
171 CALL timestop(handle2)
173 CALL timeset(routinen//
"_buffer_fill", handle2)
174 DO iproc = 0, numnodes - 1
175 num_entries_recv(iproc) = num_rec(2*iproc)
176 num_blocks_recv(iproc) = num_rec(2*iproc + 1)
177 num_entries_send(iproc) = num_send(2*iproc)
178 num_blocks_send(iproc) = num_send(2*iproc + 1)
180 CALL dbt_buffer_create(buffer_send(iproc), num_blocks_send(iproc), num_entries_send(iproc))
181 CALL dbt_buffer_create(buffer_recv(iproc), num_blocks_recv(iproc), num_entries_recv(iproc))
189 row_size=blk_size(1), col_size=blk_size(2))
195 CALL omp_set_lock(locks(iproc))
196 CALL dbt_buffer_add_block(buffer_send(iproc), blk_index, block, transposed=tr_in)
197 CALL omp_unset_lock(locks(iproc))
203 CALL timestop(handle2)
205 CALL timeset(routinen//
"_communicate_buffer", handle2)
206 CALL dbt_tas_communicate_buffer(mp_comm, buffer_recv, buffer_send, req_array)
208 DO iproc = 0, numnodes - 1
209 CALL dbt_buffer_destroy(buffer_send(iproc))
210 CALL omp_destroy_lock(locks(iproc))
214 CALL timestop(handle2)
216 CALL timeset(routinen//
"_buffer_obtain", handle2)
219 nblk = sum(num_blocks_recv)
220 ALLOCATE (blks_to_allocate(nblk, 2))
223 DO iproc = 0, numnodes - 1
224 CALL dbt_buffer_get_index(buffer_recv(iproc), index_recv)
225 blks_to_allocate(bcount + 1:bcount +
SIZE(index_recv, 1), :) = index_recv(:, :)
226 bcount = bcount +
SIZE(index_recv, 1)
227 DEALLOCATE (index_recv)
232 nblk_per_thread = nblk/omp_get_num_threads() + 1
233 a = omp_get_thread_num()*nblk_per_thread + 1
234 b = min(a + nblk_per_thread, nblk)
237 DEALLOCATE (blks_to_allocate)
242 DO iproc = 0, numnodes - 1
244 DO WHILE (dbt_buffer_blocks_left(buffer_recv(iproc)))
245 CALL dbt_buffer_get_next_block(buffer_recv(iproc), ndata, blk_index)
246 CALL dbt_tas_blk_sizes(matrix_out, blk_index(1), blk_index(2), blk_size(1), blk_size(2))
247 ALLOCATE (block(blk_size(1), blk_size(2)))
248 CALL dbt_buffer_get_next_block(buffer_recv(iproc), ndata, blk_index, block)
249 CALL dbt_tas_put_block(matrix_out, blk_index(1), blk_index(2), block, summation=summation)
252 CALL dbt_buffer_destroy(buffer_recv(iproc))
257 CALL timestop(handle2)
261 CALL timestop(handle)
274 TYPE(
dbm_type),
INTENT(INOUT) :: matrix_in
277 LOGICAL,
INTENT(IN),
OPTIONAL :: nodata, move_data
279 INTEGER :: a, b, nblk_per_thread, nblkcols, nblkrows
280 INTEGER,
DIMENSION(2) :: pdims
281 INTEGER,
DIMENSION(:),
POINTER :: col_blk_size, col_dist, row_blk_size, &
292 INTEGER :: numnodes, ngroup, max_threads, cache_idx
293 INTEGER(kind=omp_lock_kind),
ALLOCATABLE,
DIMENSION(:) :: locks
294 TYPE(dbt_buffer_type),
ALLOCATABLE,
DIMENSION(:) :: buffer_recv, buffer_send
295 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: num_blocks_recv, num_blocks_send, &
296 num_entries_recv, num_entries_send, &
299 INTEGER,
ALLOCATABLE,
DIMENSION(:, :) :: blks_to_allocate
300 INTEGER,
DIMENSION(2) :: blk_size
301 INTEGER,
DIMENSION(2) :: blk_index
302 INTEGER(KIND=int_8),
DIMENSION(2) :: blk_index_i8
304 INTEGER :: i, iproc, bcount, nblk
305 INTEGER,
ALLOCATABLE,
DIMENSION(:, :) :: iprocs
306 LOGICAL :: nodata_prv, move_prv
307 INTEGER(KIND=int_8),
ALLOCATABLE,
DIMENSION(:, :) :: index_recv
311 REAL(kind=
dp),
DIMENSION(:, :),
POINTER :: block
313 CHARACTER(LEN=*),
PARAMETER :: routinen =
'dbt_tas_replicate'
315 INTEGER :: handle, handle2
317 NULLIFY (col_blk_size, row_blk_size)
319 CALL timeset(routinen, handle)
321 IF (
PRESENT(nodata))
THEN
327 IF (
PRESENT(move_data))
THEN
335 nblkrows =
SIZE(row_blk_size)
336 nblkcols =
SIZE(col_blk_size)
341 mp_comm = info%mp_comm
344 numnodes = mp_comm%num_pe
345 pdims = mp_comm%num_pe_cart
347 SELECT CASE (info%split_rowcol)
349 repl_dist =
dbt_tas_dist_repl(row_dist, pdims(1), nblkrows, info%ngroup, info%pgrid_split_size)
353 ALLOCATE (row_dist_obj, source=repl_dist)
354 ALLOCATE (col_dist_obj, source=dir_dist)
355 ALLOCATE (row_bsize_obj, source=repl_blksize)
356 ALLOCATE (col_bsize_obj, source=dir_blksize)
359 repl_dist =
dbt_tas_dist_repl(col_dist, pdims(2), nblkcols, info%ngroup, info%pgrid_split_size)
362 ALLOCATE (row_dist_obj, source=dir_dist)
363 ALLOCATE (col_dist_obj, source=repl_dist)
364 ALLOCATE (row_bsize_obj, source=dir_blksize)
365 ALLOCATE (col_bsize_obj, source=repl_blksize)
370 dist, row_bsize_obj, col_bsize_obj, own_dist=.true.)
374 CALL timestop(handle)
378 ALLOCATE (buffer_send(0:numnodes - 1))
379 ALLOCATE (buffer_recv(0:numnodes - 1))
380 ALLOCATE (num_blocks_recv(0:numnodes - 1))
381 ALLOCATE (num_blocks_send(0:numnodes - 1))
382 ALLOCATE (num_entries_recv(0:numnodes - 1))
383 ALLOCATE (num_entries_send(0:numnodes - 1))
384 ALLOCATE (num_rec(0:2*numnodes - 1))
385 ALLOCATE (num_send(0:2*numnodes - 1), source=0)
386 ALLOCATE (req_array(1:numnodes, 4))
387 ALLOCATE (locks(0:numnodes - 1))
390 ALLOCATE (iprocs(ngroup, max_threads))
391 DO iproc = 0, numnodes - 1
392 CALL omp_init_lock(locks(iproc))
397 cache_idx = omp_get_thread_num() + 1
401 row_size=blk_size(1), col_size=blk_size(2))
403 iprocs(:, cache_idx))
405 iproc = iprocs(i, cache_idx)
406 CALL omp_set_lock(locks(iproc))
407 num_send(2*iproc) = num_send(2*iproc) + product(blk_size)
408 num_send(2*iproc + 1) = num_send(2*iproc + 1) + 1
409 CALL omp_unset_lock(locks(iproc))
415 CALL timeset(routinen//
"_alltoall", handle2)
416 CALL mp_comm%alltoall(num_send, num_rec, 2)
417 CALL timestop(handle2)
419 DO iproc = 0, numnodes - 1
420 num_entries_recv(iproc) = num_rec(2*iproc)
421 num_blocks_recv(iproc) = num_rec(2*iproc + 1)
422 num_entries_send(iproc) = num_send(2*iproc)
423 num_blocks_send(iproc) = num_send(2*iproc + 1)
425 CALL dbt_buffer_create(buffer_send(iproc), num_blocks_send(iproc), num_entries_send(iproc))
426 CALL dbt_buffer_create(buffer_recv(iproc), num_blocks_recv(iproc), num_entries_recv(iproc))
431 cache_idx = omp_get_thread_num() + 1
435 row_size=blk_size(1), col_size=blk_size(2))
437 iprocs(:, cache_idx))
439 iproc = iprocs(i, cache_idx)
440 CALL omp_set_lock(locks(iproc))
441 CALL dbt_buffer_add_block(buffer_send(iproc), int(blk_index, kind=
int_8), block)
442 CALL omp_unset_lock(locks(iproc))
452 CALL timeset(routinen//
"_communicate_buffer", handle2)
453 CALL dbt_tas_communicate_buffer(mp_comm, buffer_recv, buffer_send, req_array)
455 DO iproc = 0, numnodes - 1
456 CALL dbt_buffer_destroy(buffer_send(iproc))
457 CALL omp_destroy_lock(locks(iproc))
461 CALL timestop(handle2)
464 nblk = sum(num_blocks_recv)
465 ALLOCATE (blks_to_allocate(nblk, 2))
468 DO iproc = 0, numnodes - 1
469 CALL dbt_buffer_get_index(buffer_recv(iproc), index_recv)
470 blks_to_allocate(bcount + 1:bcount +
SIZE(index_recv, 1), :) = int(index_recv(:, :))
471 bcount = bcount +
SIZE(index_recv, 1)
472 DEALLOCATE (index_recv)
477 nblk_per_thread = nblk/omp_get_num_threads() + 1
478 a = omp_get_thread_num()*nblk_per_thread + 1
479 b = min(a + nblk_per_thread, nblk)
480 CALL dbm_reserve_blocks(matrix_out%matrix, blks_to_allocate(a:b, 1), blks_to_allocate(a:b, 2))
482 DEALLOCATE (blks_to_allocate)
487 DO iproc = 0, numnodes - 1
489 DO WHILE (dbt_buffer_blocks_left(buffer_recv(iproc)))
490 CALL dbt_buffer_get_next_block(buffer_recv(iproc), ndata, blk_index_i8)
491 CALL dbt_tas_blk_sizes(matrix_out, blk_index_i8(1), blk_index_i8(2), blk_size(1), blk_size(2))
492 ALLOCATE (block(blk_size(1), blk_size(2)))
493 CALL dbt_buffer_get_next_block(buffer_recv(iproc), ndata, blk_index_i8, block)
494 CALL dbm_put_block(matrix_out%matrix, int(blk_index_i8(1)), int(blk_index_i8(2)), block)
498 CALL dbt_buffer_destroy(buffer_recv(iproc))
505 CALL timestop(handle)
518 TYPE(
dbm_type),
INTENT(INOUT) :: matrix_out
520 LOGICAL,
INTENT(IN),
OPTIONAL :: summation, move_data
522 CHARACTER(LEN=*),
PARAMETER :: routinen =
'dbt_tas_merge'
524 INTEGER :: a, b, bcount, handle, handle2, iproc, &
525 nblk, nblk_per_thread, ndata, numnodes
526 INTEGER(KIND=int_8),
ALLOCATABLE,
DIMENSION(:, :) :: index_recv
527 INTEGER(KIND=int_8),
DIMENSION(2) :: blk_index_i8
528 INTEGER(kind=omp_lock_kind),
ALLOCATABLE, &
529 DIMENSION(:) :: locks
530 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: num_blocks_recv, num_blocks_send, &
531 num_entries_recv, num_entries_send, &
533 INTEGER,
ALLOCATABLE,
DIMENSION(:, :) :: blks_to_allocate
534 INTEGER,
DIMENSION(2) :: blk_index, blk_size
535 INTEGER,
DIMENSION(:),
POINTER :: col_block_sizes, row_block_sizes
537 REAL(
dp),
DIMENSION(:, :),
POINTER :: block
539 TYPE(dbt_buffer_type),
ALLOCATABLE,
DIMENSION(:) :: buffer_recv, buffer_send
543 DIMENSION(:, :) :: req_array
545 CALL timeset(routinen, handle)
547 IF (
PRESENT(summation))
THEN
548 IF (.NOT. summation)
CALL dbm_clear(matrix_out)
553 IF (
PRESENT(move_data))
THEN
561 numnodes = mp_comm%num_pe
563 ALLOCATE (buffer_send(0:numnodes - 1))
564 ALLOCATE (buffer_recv(0:numnodes - 1))
565 ALLOCATE (num_blocks_recv(0:numnodes - 1))
566 ALLOCATE (num_blocks_send(0:numnodes - 1))
567 ALLOCATE (num_entries_recv(0:numnodes - 1))
568 ALLOCATE (num_entries_send(0:numnodes - 1))
569 ALLOCATE (num_rec(0:2*numnodes - 1))
570 ALLOCATE (num_send(0:2*numnodes - 1), source=0)
571 ALLOCATE (req_array(1:numnodes, 4))
572 ALLOCATE (locks(0:numnodes - 1))
573 DO iproc = 0, numnodes - 1
574 CALL omp_init_lock(locks(iproc))
582 row_size=blk_size(1), col_size=blk_size(2))
584 CALL omp_set_lock(locks(iproc))
585 num_send(2*iproc) = num_send(2*iproc) + product(blk_size)
586 num_send(2*iproc + 1) = num_send(2*iproc + 1) + 1
587 CALL omp_unset_lock(locks(iproc))
592 CALL timeset(routinen//
"_alltoall", handle2)
593 CALL mp_comm%alltoall(num_send, num_rec, 2)
594 CALL timestop(handle2)
596 DO iproc = 0, numnodes - 1
597 num_entries_recv(iproc) = num_rec(2*iproc)
598 num_blocks_recv(iproc) = num_rec(2*iproc + 1)
599 num_entries_send(iproc) = num_send(2*iproc)
600 num_blocks_send(iproc) = num_send(2*iproc + 1)
602 CALL dbt_buffer_create(buffer_send(iproc), num_blocks_send(iproc), num_entries_send(iproc))
603 CALL dbt_buffer_create(buffer_recv(iproc), num_blocks_recv(iproc), num_entries_recv(iproc))
611 row_size=blk_size(1), col_size=blk_size(2))
613 CALL omp_set_lock(locks(iproc))
614 CALL dbt_buffer_add_block(buffer_send(iproc), int(blk_index, kind=
int_8), block)
615 CALL omp_unset_lock(locks(iproc))
622 CALL timeset(routinen//
"_communicate_buffer", handle2)
623 CALL dbt_tas_communicate_buffer(mp_comm, buffer_recv, buffer_send, req_array)
625 DO iproc = 0, numnodes - 1
626 CALL dbt_buffer_destroy(buffer_send(iproc))
627 CALL omp_destroy_lock(locks(iproc))
631 CALL timestop(handle2)
634 nblk = sum(num_blocks_recv)
635 ALLOCATE (blks_to_allocate(nblk, 2))
638 DO iproc = 0, numnodes - 1
639 CALL dbt_buffer_get_index(buffer_recv(iproc), index_recv)
640 blks_to_allocate(bcount + 1:bcount +
SIZE(index_recv, 1), :) = int(index_recv(:, :))
641 bcount = bcount +
SIZE(index_recv, 1)
642 DEALLOCATE (index_recv)
647 nblk_per_thread = nblk/omp_get_num_threads() + 1
648 a = omp_get_thread_num()*nblk_per_thread + 1
649 b = min(a + nblk_per_thread, nblk)
650 CALL dbm_reserve_blocks(matrix_out, blks_to_allocate(a:b, 1), blks_to_allocate(a:b, 2))
652 DEALLOCATE (blks_to_allocate)
660 DO iproc = 0, numnodes - 1
662 DO WHILE (dbt_buffer_blocks_left(buffer_recv(iproc)))
663 CALL dbt_buffer_get_next_block(buffer_recv(iproc), ndata, blk_index_i8)
664 blk_size(1) = row_block_sizes(int(blk_index_i8(1)))
665 blk_size(2) = col_block_sizes(int(blk_index_i8(2)))
666 ALLOCATE (block(blk_size(1), blk_size(2)))
667 CALL dbt_buffer_get_next_block(buffer_recv(iproc), ndata, blk_index_i8, block)
668 CALL dbm_put_block(matrix_out, int(blk_index_i8(1)), int(blk_index_i8(2)), block, summation=.true.)
671 CALL dbt_buffer_destroy(buffer_recv(iproc))
678 CALL timestop(handle)
687 SUBROUTINE dbt_buffer_get_index(buffer, index)
688 TYPE(dbt_buffer_type),
INTENT(IN) :: buffer
689 INTEGER(KIND=int_8),
ALLOCATABLE, &
690 DIMENSION(:, :),
INTENT(OUT) :: index
692 CHARACTER(LEN=*),
PARAMETER :: routinen =
'dbt_buffer_get_index'
695 INTEGER,
DIMENSION(2) :: indx_shape
697 CALL timeset(routinen, handle)
699 indx_shape = shape(buffer%indx) - [0, 1]
700 ALLOCATE (index(indx_shape(1), indx_shape(2)))
701 index(:, :) = buffer%indx(1:indx_shape(1), 1:indx_shape(2))
702 CALL timestop(handle)
703 END SUBROUTINE dbt_buffer_get_index
711 PURE FUNCTION dbt_buffer_blocks_left(buffer)
712 TYPE(dbt_buffer_type),
INTENT(IN) :: buffer
713 LOGICAL :: dbt_buffer_blocks_left
715 dbt_buffer_blocks_left = buffer%endpos < buffer%nblock
716 END FUNCTION dbt_buffer_blocks_left
725 SUBROUTINE dbt_buffer_create(buffer, nblock, ndata)
726 TYPE(dbt_buffer_type),
INTENT(OUT) :: buffer
727 INTEGER,
INTENT(IN) :: nblock, ndata
729 buffer%nblock = nblock
731 ALLOCATE (buffer%msg(ndata))
732 ALLOCATE (buffer%indx(nblock, 3))
733 END SUBROUTINE dbt_buffer_create
740 SUBROUTINE dbt_buffer_destroy(buffer)
741 TYPE(dbt_buffer_type),
INTENT(INOUT) :: buffer
743 DEALLOCATE (buffer%msg)
744 DEALLOCATE (buffer%indx)
747 END SUBROUTINE dbt_buffer_destroy
757 SUBROUTINE dbt_buffer_add_block(buffer, index, block, transposed)
758 TYPE(dbt_buffer_type),
INTENT(INOUT) :: buffer
759 INTEGER(KIND=int_8),
DIMENSION(2),
INTENT(IN) :: index
760 REAL(
dp),
DIMENSION(:, :),
INTENT(IN) :: block
761 LOGICAL,
INTENT(IN),
OPTIONAL :: transposed
763 INTEGER :: ndata, p, p_data
764 INTEGER(KIND=int_8),
DIMENSION(2) :: index_prv
767 IF (
PRESENT(transposed))
THEN
773 index_prv(:) = index(:)
777 ndata = product(shape(block))
783 p_data = int(buffer%indx(p, 3))
787 buffer%msg(p_data + 1:p_data + ndata) = reshape(transpose(block), [ndata])
789 buffer%msg(p_data + 1:p_data + ndata) = reshape(block, [ndata])
792 buffer%indx(p + 1, 1:2) = index_prv(:)
794 buffer%indx(p + 1, 3) = buffer%indx(p, 3) + int(ndata, kind=
int_8)
796 buffer%indx(p + 1, 3) = int(ndata, kind=
int_8)
798 buffer%endpos = buffer%endpos + 1
799 END SUBROUTINE dbt_buffer_add_block
810 SUBROUTINE dbt_buffer_get_next_block(buffer, ndata, index, block, advance_iter)
811 TYPE(dbt_buffer_type),
INTENT(INOUT) :: buffer
812 INTEGER,
INTENT(OUT) :: ndata
813 INTEGER(KIND=int_8),
DIMENSION(2),
INTENT(OUT) :: index
814 REAL(
dp),
DIMENSION(:, :),
INTENT(OUT),
OPTIONAL :: block
815 LOGICAL,
INTENT(IN),
OPTIONAL :: advance_iter
818 LOGICAL :: do_advance
821 IF (
PRESENT(advance_iter))
THEN
822 do_advance = advance_iter
823 ELSE IF (
PRESENT(block))
THEN
831 p_data = int(buffer%indx(p, 3))
835 ndata = int(buffer%indx(p + 1, 3) - buffer%indx(p, 3))
837 ndata = int(buffer%indx(p + 1, 3))
839 index(:) = buffer%indx(p + 1, 1:2)
841 IF (
PRESENT(block))
THEN
842 block(:, :) = reshape(buffer%msg(p_data + 1:p_data + ndata), shape(block))
845 IF (do_advance) buffer%endpos = buffer%endpos + 1
846 END SUBROUTINE dbt_buffer_get_next_block
856 SUBROUTINE dbt_tas_communicate_buffer(mp_comm, buffer_recv, buffer_send, req_array)
858 TYPE(dbt_buffer_type),
DIMENSION(0:), &
859 INTENT(INOUT) :: buffer_recv, buffer_send
861 INTENT(OUT) :: req_array
863 CHARACTER(LEN=*),
PARAMETER :: routinen =
'dbt_tas_communicate_buffer'
865 INTEGER :: handle, iproc, numnodes, &
866 rec_counter, send_counter
868 CALL timeset(routinen, handle)
869 numnodes = mp_comm%num_pe
871 IF (numnodes > 1)
THEN
876 DO iproc = 0, numnodes - 1
877 IF (buffer_recv(iproc)%nblock > 0)
THEN
878 rec_counter = rec_counter + 1
879 CALL mp_comm%irecv(buffer_recv(iproc)%indx, iproc, req_array(rec_counter, 3), tag=4)
880 CALL mp_comm%irecv(buffer_recv(iproc)%msg, iproc, req_array(rec_counter, 4), tag=7)
884 DO iproc = 0, numnodes - 1
885 IF (buffer_send(iproc)%nblock > 0)
THEN
886 send_counter = send_counter + 1
887 CALL mp_comm%isend(buffer_send(iproc)%indx, iproc, req_array(send_counter, 1), tag=4)
888 CALL mp_comm%isend(buffer_send(iproc)%msg, iproc, req_array(send_counter, 2), tag=7)
892 IF (send_counter > 0)
THEN
893 CALL mp_waitall(req_array(1:send_counter, 1:2))
895 IF (rec_counter > 0)
THEN
896 CALL mp_waitall(req_array(1:rec_counter, 3:4))
900 IF (buffer_recv(0)%nblock > 0)
THEN
901 buffer_recv(0)%indx(:, :) = buffer_send(0)%indx(:, :)
902 buffer_recv(0)%msg(:) = buffer_send(0)%msg(:)
905 CALL timestop(handle)
906 END SUBROUTINE dbt_tas_communicate_buffer
subroutine, public dbm_clear(matrix)
Remove all blocks from given matrix, but does not release the underlying memory.
subroutine, public dbm_iterator_next_block(iterator, row, column, block, row_size, col_size)
Returns the next block from the given iterator.
subroutine, public dbm_get_stored_coordinates(matrix, row, column, processor)
Returns the MPI rank on which the given block should be stored.
type(dbm_distribution_obj) function, public dbm_get_distribution(matrix)
Returns the distribution of the given matrix.
integer function, dimension(:), pointer, contiguous, public dbm_get_row_block_sizes(matrix)
Returns the row block sizes of the given matrix.
logical function, public dbm_iterator_blocks_left(iterator)
Tests whether the given iterator has any block left.
integer function, dimension(:), pointer, contiguous, public dbm_distribution_col_dist(dist)
Returns the columns of the given distribution.
subroutine, public dbm_reserve_blocks(matrix, rows, cols)
Adds given list of blocks efficiently. The blocks will be filled with zeros.
subroutine, public dbm_iterator_stop(iterator)
Releases the given iterator.
subroutine, public dbm_put_block(matrix, row, col, block, summation)
Adds a block to given matrix. This routine is thread-safe. If block already exist then it gets overwr...
character(len=default_string_length) function, public dbm_get_name(matrix)
Returns the name of the matrix of the given matrix.
subroutine, public dbm_finalize(matrix)
Needed to be called for DBCSR after blocks where inserted. For DBM it's a no-opt.
subroutine, public dbm_iterator_start(iterator, matrix)
Creates an iterator for the blocks of the given matrix. The iteration order is not stable.
integer function, dimension(:), pointer, contiguous, public dbm_get_col_block_sizes(matrix)
Returns the column block sizes of the given matrix.
integer function, dimension(:), pointer, contiguous, public dbm_distribution_row_dist(dist)
Returns the rows of the given distribution.
Tall-and-skinny matrices: base routines similar to DBM API, mostly wrappers around existing DBM routi...
subroutine, public dbt_tas_blk_sizes(matrix, row, col, row_size, col_size)
Get block size for a given row & column.
subroutine, public dbt_tas_get_stored_coordinates(matrix, row, column, processor)
As dbt_get_stored_coordinates.
subroutine, public dbt_tas_iterator_start(iter, matrix_in)
As dbm_iterator_start.
logical function, public dbt_tas_iterator_blocks_left(iter)
As dbm_iterator_blocks_left.
subroutine, public dbt_repl_get_stored_coordinates(matrix, row, column, processors)
Get all processors for a given row/col combination if matrix is replicated on each process subgroup.
subroutine, public dbt_tas_iterator_stop(iter)
As dbm_iterator_stop.
subroutine, public dbt_tas_finalize(matrix)
...
subroutine, public dbt_tas_distribution_new(dist, mp_comm, row_dist, col_dist, split_info, nosplit)
create new distribution. Exactly like dbm_distribution_new but with custom types for row_dist and col...
subroutine, public dbt_tas_clear(matrix)
Clear matrix (erase all data)
type(dbt_tas_split_info) function, pointer, public dbt_tas_info(matrix)
get info on mpi grid splitting
subroutine, public dbt_tas_put_block(matrix, row, col, block, summation)
As dbm_put_block.
Global data (distribution and block sizes) for tall-and-skinny matrices For very sparse matrices with...
communication routines to reshape / replicate / merge tall-and-skinny matrices.
subroutine, public dbt_tas_merge(matrix_out, matrix_in, summation, move_data)
Merge submatrices of matrix_in to matrix_out by sum.
subroutine, public dbt_tas_replicate(matrix_in, info, matrix_out, nodata, move_data)
Replicate matrix_in such that each submatrix of matrix_out is an exact copy of matrix_in.
recursive subroutine, public dbt_tas_reshape(matrix_in, matrix_out, summation, transposed, move_data)
copy data (involves reshape)
methods to split tall-and-skinny matrices along longest dimension. Basically, we are splitting proces...
integer, parameter, public rowsplit
subroutine, public dbt_tas_get_split_info(info, mp_comm, nsplit, igroup, mp_comm_group, split_rowcol, pgrid_offset)
Get info on split.
integer, parameter, public colsplit
DBT tall-and-skinny base types. Mostly wrappers around existing DBM routines.
often used utilities for tall-and-skinny matrices
Defines the basic variable types.
integer, parameter, public int_8
integer, parameter, public dp
Interface to the message passing library MPI.
type for arbitrary block sizes
type for replicated block sizes
type for arbitrary distributions
type for replicated distribution