59#include "./base/base_uses.f90"
64 CHARACTER(len=*),
PARAMETER,
PRIVATE :: moduleN =
'submatrix_dissection'
69 LOGICAL :: initialized = .false.
71 INTEGER :: numnodes = -1, myrank = -1, nblkcols = -1, &
72 nblkrows = -1, nblks = -1, local_blocks = -1, &
73 cols_per_sm = -1, number_of_submatrices = -1
74 INTEGER,
DIMENSION(:),
POINTER :: row_blk_size => null(), col_blk_size => null()
75 INTEGER,
DIMENSION(:),
ALLOCATABLE :: coo_cols, coo_rows, coo_col_offsets, coo_cols_local, coo_rows_local, &
76 coo_col_offsets_local, submatrix_owners, submatrix_sizes
77 TYPE(
buffer_type),
DIMENSION(:),
ALLOCATABLE :: recvbufs, result_sendbufs
78 TYPE(
set_type),
DIMENSION(:),
ALLOCATABLE :: result_blocks_for_rank, result_blocks_from_rank
80 TYPE(
intbuffer_type),
DIMENSION(:),
ALLOCATABLE :: result_blocks_for_rank_buf_offsets
82 PROCEDURE :: init => submatrix_dissection_init
83 PROCEDURE :: final => submatrix_dissection_final
84 PROCEDURE :: get_sm_ids_for_rank => submatrix_get_sm_ids_for_rank
85 PROCEDURE :: generate_submatrix => submatrix_generate_sm
86 PROCEDURE :: copy_resultcol => submatrix_cpy_resultcol
87 PROCEDURE :: communicate_results => submatrix_communicate_results
102 INTEGER,
INTENT(IN) :: sm_id
103 INTEGER,
INTENT(OUT) :: first, last
104 INTEGER :: i, j, blkid
108 DO i = (sm_id - 1)*this%cols_per_sm + 1, sm_id*this%cols_per_sm
109 DO j = this%coo_col_offsets(i), this%coo_col_offsets(i + 1) - 1
110 CALL nonzero_rows%insert(this%coo_rows(j))
115 DO i = 1, nonzero_rows%elements
116 blkid = nonzero_rows%get(i)
117 IF (blkid .EQ. (sm_id - 1)*this%cols_per_sm + 1)
THEN
121 DO j = i, nonzero_rows%elements
122 blkid = nonzero_rows%get(j)
123 last = last + this%col_blk_size(blkid)
124 IF (blkid .EQ. (sm_id)*this%cols_per_sm)
EXIT
128 first = first + this%col_blk_size(blkid)
131 CALL nonzero_rows%reset
143 SUBROUTINE submatrix_dissection_init(this, matrix_p)
147 INTEGER :: cur_row, cur_col, i, j, k, l, m, l_limit_left, l_limit_right, &
148 bufsize, bufsize_next
149 INTEGER,
DIMENSION(:),
ALLOCATABLE :: blocks_per_rank, coo_dsplcmnts, num_blockids_send, num_blockids_recv
152 REAL(KIND=
dp) :: flops_total, flops_per_rank, flops_per_sm, flops_threshold, &
153 flops_current, flops_remaining
156 TYPE(
set_type),
DIMENSION(:),
ALLOCATABLE :: blocks_from_rank
157 TYPE(
buffer_type),
DIMENSION(:),
ALLOCATABLE :: sendbufs
158 TYPE(
intbuffer_type),
DIMENSION(:),
ALLOCATABLE :: blocks_for_rank
161 INTEGER :: numthreads, mytid
163 TYPE(
setarray_type),
DIMENSION(:),
ALLOCATABLE :: nonzero_rows_t
164 TYPE(
setarray_type),
DIMENSION(:),
ALLOCATABLE :: result_blocks_from_rank_t, result_blocks_for_rank_t, &
168 REAL(KIND=
dp),
DIMENSION(:, :),
POINTER :: blockp
170 CHARACTER(LEN=*),
PARAMETER :: routineN =
'submatrix_dissection_init'
173 CALL timeset(routinen, handle)
176 this%dbcsr_mat = matrix_p
177 CALL dbcsr_get_info(matrix=this%dbcsr_mat, nblkcols_total=this%nblkcols, nblkrows_total=this%nblkrows, &
178 row_blk_size=this%row_blk_size, col_blk_size=this%col_blk_size, group=this%group, distribution=this%dist)
181 IF (this%nblkcols .NE. this%nblkrows)
THEN
182 cpabort(
"Number of block rows and cols need to be identical")
185 DO i = 1, this%nblkcols
186 IF (this%col_blk_size(i) .NE. this%row_blk_size(i))
THEN
187 cpabort(
"Block row sizes and col sizes need to be identical")
201 this%local_blocks = 0
205 this%local_blocks = this%local_blocks + 1
209 ALLOCATE (this%coo_cols_local(this%local_blocks), this%coo_rows_local(this%local_blocks), blocks_per_rank(this%numnodes), &
210 coo_dsplcmnts(this%numnodes), this%coo_col_offsets_local(this%nblkcols + 1), &
211 blocks_for_rank(0:this%numnodes - 1), blocks_from_rank(0:this%numnodes - 1), &
212 sendbufs(0:this%numnodes - 1), this%recvbufs(0:this%numnodes - 1), this%result_sendbufs(0:this%numnodes - 1), &
213 this%result_blocks_for_rank(0:this%numnodes - 1), this%result_blocks_from_rank(0:this%numnodes - 1), &
214 this%result_blocks_for_rank_buf_offsets(0:this%numnodes - 1))
220 this%coo_cols_local(i) = cur_col
221 this%coo_rows_local(i) = cur_row
227 CALL this%group%allgather(msgout=this%local_blocks, msgin=blocks_per_rank)
229 DO i = 1, this%numnodes - 1
230 coo_dsplcmnts(i + 1) = coo_dsplcmnts(i) + blocks_per_rank(i)
234 this%nblks = sum(blocks_per_rank)
235 ALLOCATE (this%coo_cols(this%nblks), this%coo_rows(this%nblks), this%coo_col_offsets(this%nblkcols + 1), &
236 this%coo_dptr(this%nblks))
237 CALL this%group%allgatherv(msgout=this%coo_rows_local, msgin=this%coo_rows, rcount=blocks_per_rank, &
238 rdispl=coo_dsplcmnts)
239 CALL this%group%allgatherv(msgout=this%coo_cols_local, msgin=this%coo_cols, rcount=blocks_per_rank, &
240 rdispl=coo_dsplcmnts)
242 DEALLOCATE (blocks_per_rank, coo_dsplcmnts)
245 CALL qsort_two(this%coo_cols_local, this%coo_rows_local, 1, this%local_blocks)
246 CALL qsort_two(this%coo_cols, this%coo_rows, 1, this%nblks)
249 this%coo_col_offsets(this%nblkcols + 1) = this%nblks + 1
251 DO i = 1, this%nblkcols
252 DO WHILE ((j .LE. this%nblks))
253 IF (this%coo_cols(j) .GE. i)
EXIT
256 this%coo_col_offsets(i) = j
260 this%coo_col_offsets_local(this%nblkcols + 1) = this%local_blocks + 1
262 DO i = 1, this%nblkcols
263 DO WHILE ((j .LE. this%local_blocks))
264 IF (this%coo_cols_local(j) .GE. i)
EXIT
267 this%coo_col_offsets_local(i) = j
276 this%number_of_submatrices = this%nblkcols/this%cols_per_sm
277 ALLOCATE (this%submatrix_sizes(this%number_of_submatrices))
278 this%submatrix_sizes = 0
280 DO i = 1, this%number_of_submatrices
281 CALL nonzero_rows%reset
282 DO j = (i - 1)*this%cols_per_sm + 1, i*this%cols_per_sm
283 DO k = this%coo_col_offsets(j), this%coo_col_offsets(j + 1) - 1
284 CALL nonzero_rows%insert(this%coo_rows(k))
287 DO j = 1, nonzero_rows%elements
288 this%submatrix_sizes(i) = this%submatrix_sizes(i) + this%col_blk_size(nonzero_rows%get(j))
290 flops_total = flops_total + 2.0d0*this%submatrix_sizes(i)*this%submatrix_sizes(i)*this%submatrix_sizes(i)
295 ALLOCATE (this%submatrix_owners(this%number_of_submatrices))
296 flops_per_sm = flops_total/this%number_of_submatrices
297 flops_per_rank = flops_total/this%numnodes
298 flops_current = 0.0d0
300 DO i = 1, this%number_of_submatrices
301 this%submatrix_owners(i) = j
302 flops_current = flops_current + 2.0d0*this%submatrix_sizes(i)*this%submatrix_sizes(i)*this%submatrix_sizes(i)
303 flops_remaining = flops_total - flops_current
304 flops_threshold = (this%numnodes - j - 1)*flops_per_rank
305 IF ((j .LT. (this%numnodes - 1)) &
306 .AND. ((flops_remaining .LE. flops_threshold &
307 .OR. (this%number_of_submatrices - i) .LT. (this%numnodes - j))))
THEN
309 flops_total = flops_total - flops_current
310 flops_current = 0.0d0
318 ALLOCATE (result_blocks_from_rank_t(0:numthreads - 1), &
319 result_blocks_for_rank_t(0:numthreads - 1), &
320 blocks_from_rank_t(0:numthreads - 1), &
321 nonzero_rows_t(0:numthreads - 1))
333 ALLOCATE (nonzero_rows_t(mytid)%sets(1), &
334 result_blocks_from_rank_t(mytid)%sets(0:this%numnodes - 1), &
335 result_blocks_for_rank_t(mytid)%sets(0:this%numnodes - 1), &
336 blocks_from_rank_t(mytid)%sets(0:this%numnodes - 1))
339 DO i = 1, this%number_of_submatrices
340 CALL nonzero_rows_t(mytid)%sets(1)%reset
341 DO j = (i - 1)*this%cols_per_sm + 1, i*this%cols_per_sm
342 DO k = this%coo_col_offsets(j), this%coo_col_offsets(j + 1) - 1
346 IF (m .EQ. this%myrank)
THEN
347 CALL result_blocks_from_rank_t(mytid)%sets(this%submatrix_owners(i))%insert(k)
349 IF (this%submatrix_owners(i) .EQ. this%myrank)
THEN
350 CALL nonzero_rows_t(mytid)%sets(1)%insert(this%coo_rows(k))
351 CALL result_blocks_for_rank_t(mytid)%sets(m)%insert(k)
356 IF (this%submatrix_owners(i) .NE. this%myrank) cycle
360 DO j = 1, nonzero_rows_t(mytid)%sets(1)%elements
361 cur_col = nonzero_rows_t(mytid)%sets(1)%get(j)
362 l_limit_left = this%coo_col_offsets(cur_col)
363 l_limit_right = this%coo_col_offsets(cur_col + 1) - 1
364 DO k = 1, nonzero_rows_t(mytid)%sets(1)%elements
365 cur_row = nonzero_rows_t(mytid)%sets(1)%get(k)
367 DO WHILE (l .LE. l_limit_right)
368 IF (this%coo_rows(l) .GE. cur_row)
EXIT
372 IF (l .LE. l_limit_right)
THEN
373 IF (this%coo_rows(l) .EQ. cur_row)
THEN
376 column=this%coo_cols(l), processor=m)
377 CALL blocks_from_rank_t(mytid)%sets(m)%insert(l)
387 DO i = 0, this%numnodes - 1
388 DO j = 0, numthreads - 1
389 DO k = 1, result_blocks_from_rank_t(j)%sets(i)%elements
390 CALL this%result_blocks_from_rank(i)%insert(result_blocks_from_rank_t(j)%sets(i)%get(k))
392 CALL result_blocks_from_rank_t(j)%sets(i)%reset
393 DO k = 1, result_blocks_for_rank_t(j)%sets(i)%elements
394 CALL this%result_blocks_for_rank(i)%insert(result_blocks_for_rank_t(j)%sets(i)%get(k))
396 CALL result_blocks_for_rank_t(j)%sets(i)%reset
397 DO k = 1, blocks_from_rank_t(j)%sets(i)%elements
398 CALL blocks_from_rank(i)%insert(blocks_from_rank_t(j)%sets(i)%get(k))
400 CALL blocks_from_rank_t(j)%sets(i)%reset
403 DO i = 0, numthreads - 1
404 CALL nonzero_rows_t(i)%sets(1)%reset
405 DEALLOCATE (result_blocks_from_rank_t(i)%sets, result_blocks_for_rank_t(i)%sets, blocks_from_rank_t(i)%sets, &
406 nonzero_rows_t(i)%sets)
408 DEALLOCATE (result_blocks_from_rank_t, result_blocks_for_rank_t, blocks_from_rank_t, nonzero_rows_t)
411 ALLOCATE (num_blockids_send(0:this%numnodes - 1), num_blockids_recv(0:this%numnodes - 1))
412 DO i = 0, this%numnodes - 1
413 num_blockids_send(i) = blocks_from_rank(i)%elements
415 CALL this%group%alltoall(num_blockids_send, num_blockids_recv, 1)
416 DO i = 0, this%numnodes - 1
417 CALL blocks_for_rank(i)%alloc(num_blockids_recv(i))
419 DEALLOCATE (num_blockids_send, num_blockids_recv)
421 IF (this%numnodes .GT. 1)
THEN
422 DO i = 1, this%numnodes
423 k =
modulo(this%myrank - i, this%numnodes)
424 CALL this%group%irecv(msgout=blocks_for_rank(k)%data, source=k, request=blocks_for_rank(k)%mpi_request)
426 DO i = 1, this%numnodes
427 k =
modulo(this%myrank + i, this%numnodes)
428 CALL this%group%send(blocks_from_rank(k)%getall(), k, 0)
430 DO i = 0, this%numnodes - 1
431 CALL blocks_for_rank(i)%mpi_request%wait()
434 blocks_for_rank(0)%data = blocks_from_rank(0)%getall()
438 CALL nonzero_rows%reset
441 DO m = 0, this%numnodes - 1
442 IF ((.NOT. this%result_blocks_for_rank(m)%sorted_up_to_date) .AND. (this%result_blocks_for_rank(m)%elements .GT. 0))
THEN
443 CALL this%result_blocks_for_rank(m)%update_sorted
448 DO i = 0, this%numnodes - 1
450 DO j = 1, blocks_for_rank(i)%size
451 k = blocks_for_rank(i)%data(j)
452 bufsize = bufsize + this%col_blk_size(this%coo_cols(k))*this%col_blk_size(this%coo_rows(k))
454 CALL sendbufs(i)%alloc(bufsize)
457 CALL this%result_blocks_for_rank_buf_offsets(i)%alloc(this%result_blocks_for_rank(i)%elements)
458 DO j = 1, this%result_blocks_for_rank(i)%elements
459 k = this%result_blocks_for_rank(i)%get(j)
460 this%result_blocks_for_rank_buf_offsets(i)%data(j) = bufsize
461 bufsize = bufsize + this%col_blk_size(this%coo_cols(k))*this%col_blk_size(this%coo_rows(k))
463 CALL this%result_sendbufs(i)%alloc(bufsize)
466 DO j = 1, blocks_for_rank(i)%size
467 k = blocks_for_rank(i)%data(j)
468 CALL dbcsr_get_block_p(this%dbcsr_mat, row=this%coo_rows(k), col=this%coo_cols(k), block=blockp, found=valid)
469 IF (.NOT. valid)
THEN
470 cpabort(
"Block included in our COO and placed on our rank could not be fetched!")
472 bufsize_next = bufsize +
SIZE(blockp)
473 sendbufs(i)%data(bufsize + 1:bufsize_next) = reshape(blockp, [
SIZE(blockp)])
474 bufsize = bufsize_next
479 DO i = 0, this%numnodes - 1
481 DO j = 1, blocks_from_rank(i)%elements
482 k = blocks_from_rank(i)%get(j)
483 bufsize = bufsize + this%col_blk_size(this%coo_cols(k))*this%col_blk_size(this%coo_rows(k))
485 CALL this%recvbufs(i)%alloc(bufsize)
487 DO j = 1, blocks_from_rank(i)%elements
488 k = blocks_from_rank(i)%get(j)
489 bufsize_next = bufsize + this%col_blk_size(this%coo_cols(k))*this%col_blk_size(this%coo_rows(k))
490 this%coo_dptr(k)%target => this%recvbufs(i)%data(bufsize + 1:bufsize_next)
491 bufsize = bufsize_next
495 DO i = 0, this%numnodes - 1
496 CALL blocks_for_rank(i)%dealloc
497 CALL blocks_from_rank(i)%reset
499 DEALLOCATE (blocks_for_rank, blocks_from_rank)
501 IF (this%numnodes .GT. 1)
THEN
504 DO i = 1, this%numnodes
505 k =
modulo(this%myrank - i, this%numnodes)
506 CALL this%group%irecv(msgout=this%recvbufs(k)%data, source=k, request=this%recvbufs(k)%mpi_request)
507 k =
modulo(this%myrank + i, this%numnodes)
508 CALL this%group%isend(msgin=sendbufs(k)%data, dest=k, request=sendbufs(k)%mpi_request)
510 DO i = 0, this%numnodes - 1
511 CALL sendbufs(i)%mpi_request%wait()
512 CALL this%recvbufs(i)%mpi_request%wait()
515 this%recvbufs(0)%data = sendbufs(0)%data
518 DO i = 0, this%numnodes - 1
519 CALL sendbufs(i)%dealloc
521 DEALLOCATE (sendbufs)
523 this%initialized = .true.
525 CALL timestop(handle)
526 END SUBROUTINE submatrix_dissection_init
532 PURE SUBROUTINE submatrix_dissection_final(this)
536 this%initialized = .false.
538 IF (
ALLOCATED(this%submatrix_sizes))
DEALLOCATE (this%submatrix_sizes)
539 IF (
ALLOCATED(this%coo_cols_local))
DEALLOCATE (this%coo_cols_local)
540 IF (
ALLOCATED(this%coo_rows_local))
DEALLOCATE (this%coo_rows_local)
541 IF (
ALLOCATED(this%coo_col_offsets_local))
DEALLOCATE (this%coo_col_offsets_local)
542 IF (
ALLOCATED(this%result_blocks_for_rank_buf_offsets))
THEN
543 DO i = 0, this%numnodes - 1
544 CALL this%result_blocks_for_rank_buf_offsets(i)%dealloc
546 DEALLOCATE (this%result_blocks_for_rank_buf_offsets)
548 IF (
ALLOCATED(this%recvbufs))
THEN
549 DO i = 0, this%numnodes - 1
550 CALL this%recvbufs(i)%dealloc
552 DEALLOCATE (this%recvbufs)
554 IF (
ALLOCATED(this%result_sendbufs))
THEN
555 DO i = 0, this%numnodes - 1
556 CALL this%result_sendbufs(i)%dealloc
558 DEALLOCATE (this%result_sendbufs)
560 IF (
ALLOCATED(this%result_blocks_for_rank))
THEN
561 DO i = 0, this%numnodes - 1
562 CALL this%result_blocks_for_rank(i)%reset
564 DEALLOCATE (this%result_blocks_for_rank)
566 IF (
ALLOCATED(this%result_blocks_from_rank))
THEN
567 DO i = 0, this%numnodes - 1
568 CALL this%result_blocks_from_rank(i)%reset
570 DEALLOCATE (this%result_blocks_from_rank)
572 IF (
ALLOCATED(this%coo_cols))
DEALLOCATE (this%coo_cols)
573 IF (
ALLOCATED(this%coo_rows))
DEALLOCATE (this%coo_rows)
574 IF (
ALLOCATED(this%coo_col_offsets))
DEALLOCATE (this%coo_col_offsets)
575 IF (
ALLOCATED(this%coo_dptr))
DEALLOCATE (this%coo_dptr)
576 IF (
ALLOCATED(this%submatrix_owners))
DEALLOCATE (this%submatrix_owners)
578 END SUBROUTINE submatrix_dissection_final
586 SUBROUTINE submatrix_generate_sm(this, sm_id, sm)
588 INTEGER,
INTENT(IN) :: sm_id
589 REAL(KIND=
dp),
DIMENSION(:, :),
ALLOCATABLE,
INTENT(OUT) :: sm
591 INTEGER :: sm_dim, i, j, k, offset_x1, offset_x2, offset_y1, &
592 offset_y2, k_limit_left, k_limit_right
595 IF (.NOT. this%initialized)
THEN
596 cpabort(
"Submatrix dissection not initialized")
599 IF (this%myrank .NE. this%submatrix_owners(sm_id))
THEN
600 cpabort(
"This rank is not supposed to construct this submatrix")
604 CALL nonzero_rows%reset
605 DO i = (sm_id - 1)*this%cols_per_sm + 1, sm_id*this%cols_per_sm
606 DO j = this%coo_col_offsets(i), this%coo_col_offsets(i + 1) - 1
607 CALL nonzero_rows%insert(this%coo_rows(j))
611 DO i = 1, nonzero_rows%elements
612 sm_dim = sm_dim + this%col_blk_size(nonzero_rows%get(i))
615 ALLOCATE (sm(sm_dim, sm_dim))
619 DO j = 1, nonzero_rows%elements
620 offset_x2 = offset_x1 + this%col_blk_size(nonzero_rows%get(j))
622 k_limit_left = this%coo_col_offsets(nonzero_rows%get(j))
623 k_limit_right = this%coo_col_offsets(nonzero_rows%get(j) + 1) - 1
624 DO i = 1, nonzero_rows%elements
625 offset_y2 = offset_y1 + this%col_blk_size(nonzero_rows%get(i))
628 DO WHILE (k .LE. k_limit_right)
629 IF (this%coo_rows(k) .GE. nonzero_rows%get(i))
EXIT
633 IF (this%coo_rows(k) .EQ. nonzero_rows%get(i))
THEN
634 sm(offset_y1 + 1:offset_y2, offset_x1 + 1:offset_x2) = reshape(this%coo_dptr(k)%target, &
635 (/offset_y2 - offset_y1, offset_x2 - offset_x1/))
637 offset_y1 = offset_y2
639 offset_x1 = offset_x2
643 CALL nonzero_rows%reset
645 END SUBROUTINE submatrix_generate_sm
653 SUBROUTINE submatrix_get_sm_ids_for_rank(this, rank, sm_ids)
655 INTEGER,
INTENT(IN) :: rank
656 INTEGER,
DIMENSION(:),
ALLOCATABLE,
INTENT(OUT) :: sm_ids
659 IF (.NOT. this%initialized)
THEN
660 cpabort(
"Submatrix dissection not initialized")
664 DO i = 1, this%number_of_submatrices
665 IF (this%submatrix_owners(i) .EQ. rank) count = count + 1
668 ALLOCATE (sm_ids(count))
671 DO i = 1, this%number_of_submatrices
672 IF (this%submatrix_owners(i) .EQ. rank)
THEN
678 END SUBROUTINE submatrix_get_sm_ids_for_rank
686 SUBROUTINE submatrix_cpy_resultcol(this, sm_id, sm)
688 INTEGER,
INTENT(in) :: sm_id
689 REAL(KIND=
dp),
DIMENSION(:, :),
ALLOCATABLE,
INTENT(IN) :: sm
692 INTEGER :: i, j, k, m, sm_col_offset, offset_x1, offset_x2, offset_y1, &
693 offset_y2, bufsize, bufsize_next, k_limit_left, k_limit_right
694 INTEGER,
DIMENSION(:),
ALLOCATABLE :: buf_offset_idxs
696 IF (.NOT. this%initialized)
THEN
697 cpabort(
"Submatrix dissection is not initizalized")
700 IF (this%myrank .NE. this%submatrix_owners(sm_id))
THEN
701 cpabort(
"This rank is not supposed to operate on this submatrix")
704 ALLOCATE (buf_offset_idxs(0:this%numnodes - 1))
709 DO i = (sm_id - 1)*this%cols_per_sm + 1, sm_id*this%cols_per_sm
710 DO j = this%coo_col_offsets(i), this%coo_col_offsets(i + 1) - 1
711 CALL nonzero_rows%insert(this%coo_rows(j))
716 DO i = 1, nonzero_rows%elements
717 IF (nonzero_rows%get(i) .EQ. (sm_id - 1)*this%cols_per_sm + 1)
THEN
723 IF (sm_col_offset .EQ. 0)
THEN
724 cpabort(
"Could not determine relevant result columns of submatrix")
728 DO j = 1, nonzero_rows%elements
729 offset_x2 = offset_x1 + this%col_blk_size(nonzero_rows%get(j))
731 IF ((j .GE. sm_col_offset) .AND. (j .LT. sm_col_offset + this%cols_per_sm))
THEN
733 k_limit_left = this%coo_col_offsets(nonzero_rows%get(j))
734 k_limit_right = this%coo_col_offsets(nonzero_rows%get(j) + 1) - 1
735 DO i = 1, nonzero_rows%elements
736 offset_y2 = offset_y1 + this%col_blk_size(nonzero_rows%get(i))
740 DO WHILE (k .LE. k_limit_right)
741 IF (this%coo_rows(k) .GE. nonzero_rows%get(i))
EXIT
745 IF (this%coo_rows(k) .EQ. nonzero_rows%get(i))
THEN
748 DO WHILE (buf_offset_idxs(m) .LE. this%result_blocks_for_rank(m)%elements)
749 IF (this%result_blocks_for_rank(m)%get(buf_offset_idxs(m)) .GE. k)
EXIT
750 buf_offset_idxs(m) = buf_offset_idxs(m) + 1
752 IF (this%result_blocks_for_rank(m)%get(buf_offset_idxs(m)) .NE. k)
THEN
753 cpabort(
"Could not determine buffer offset for block")
755 bufsize = this%result_blocks_for_rank_buf_offsets(m)%data(buf_offset_idxs(m))
756 bufsize_next = bufsize + this%col_blk_size(this%coo_cols(k))*this%col_blk_size(this%coo_rows(k))
757 this%result_sendbufs(m)%data(bufsize + 1:bufsize_next) = reshape( &
758 sm(offset_y1 + 1:offset_y2, offset_x1 + 1:offset_x2), &
759 (/bufsize_next - bufsize/))
761 offset_y1 = offset_y2
764 offset_x1 = offset_x2
767 DEALLOCATE (buf_offset_idxs)
770 CALL nonzero_rows%reset
772 END SUBROUTINE submatrix_cpy_resultcol
782 SUBROUTINE submatrix_communicate_results(this, resultmat)
786 INTEGER :: i, j, k, cur_row, cur_col, cur_sm, cur_buf, last_buf, &
787 bufsize, bufsize_next, row_size, col_size
788 REAL(kind=
dp),
DIMENSION(:),
POINTER :: vector
789 TYPE(
buffer_type),
DIMENSION(:),
ALLOCATABLE :: result_recvbufs
791 CHARACTER(LEN=*),
PARAMETER :: routinen =
'submatrix_communicate_results'
794 CALL timeset(routinen, handle)
796 ALLOCATE (result_recvbufs(0:this%numnodes - 1))
797 DO i = 0, this%numnodes - 1
799 DO j = 1, this%result_blocks_from_rank(i)%elements
800 k = this%result_blocks_from_rank(i)%get(j)
801 bufsize = bufsize + this%col_blk_size(this%coo_cols(k))*this%col_blk_size(this%coo_rows(k))
803 CALL result_recvbufs(i)%alloc(bufsize)
808 IF (this%numnodes .GT. 1)
THEN
809 DO i = 1, this%numnodes
810 k =
modulo(this%myrank - i, this%numnodes)
811 CALL this%group%irecv(msgout=result_recvbufs(k)%data, source=k, request=result_recvbufs(k)%mpi_request)
812 k =
modulo(this%myrank + i, this%numnodes)
813 CALL this%group%isend(msgin=this%result_sendbufs(k)%data, dest=k, request=this%result_sendbufs(k)%mpi_request)
815 DO i = 0, this%numnodes - 1
816 CALL this%result_sendbufs(i)%mpi_request%wait()
817 CALL result_recvbufs(i)%mpi_request%wait()
820 result_recvbufs(0)%data = this%result_sendbufs(0)%data
825 DO i = 1, this%local_blocks
826 cur_col = this%coo_cols_local(i)
827 cur_row = this%coo_rows_local(i)
828 cur_sm = (cur_col - 1)/this%cols_per_sm + 1
829 cur_buf = this%submatrix_owners(cur_sm)
830 IF (cur_buf .GT. last_buf) bufsize = 0
831 row_size = this%row_blk_size(cur_row)
832 col_size = this%col_blk_size(cur_col)
833 bufsize_next = bufsize + row_size*col_size
834 vector => result_recvbufs(cur_buf)%data(bufsize + 1:bufsize_next)
836 block=reshape(vector, [row_size, col_size]))
837 bufsize = bufsize_next
841 DO i = 0, this%numnodes - 1
842 CALL result_recvbufs(i)%dealloc
844 DEALLOCATE (result_recvbufs)
848 CALL timestop(handle)
849 END SUBROUTINE submatrix_communicate_results
858 RECURSIVE PURE SUBROUTINE qsort_two(arr_a, arr_b, left, right)
860 INTEGER,
DIMENSION(:),
INTENT(inout) :: arr_a, arr_b
861 INTEGER,
INTENT(in) :: left, right
863 INTEGER :: i, j, pivot_a, pivot_b, tmp
865 IF (right - left .LT. 1)
RETURN
869 pivot_a = arr_a(right)
870 pivot_b = arr_b(right)
873 DO WHILE ((arr_a(i) .LT. pivot_a) .OR. ((arr_a(i) .EQ. pivot_a) .AND. (arr_b(i) .LT. pivot_b)))
876 DO WHILE ((j .GT. i) .AND. ((arr_a(j) .GT. pivot_a) .OR. ((arr_a(j) .EQ. pivot_a) .AND. (arr_b(j) .GE. pivot_b))))
891 IF ((arr_a(i) .GT. pivot_a) .OR. (arr_a(i) .EQ. pivot_a .AND. arr_b(i) .GT. pivot_b))
THEN
893 arr_a(i) = arr_a(right)
896 arr_b(i) = arr_b(right)
900 IF (i - 1 .GT. left)
CALL qsort_two(arr_a, arr_b, left, i - 1)
901 IF (i + 1 .LT. right)
CALL qsort_two(arr_a, arr_b, i + 1, right)
903 END SUBROUTINE qsort_two
static GRID_HOST_DEVICE int modulo(int a, int m)
Equivalent of Fortran's MODULO, which always return a positive number. https://gcc....
collects all references to literature in CP2K as new algorithms / method are included from literature...
integer, save, public lass2018
subroutine, public dbcsr_iterator_next_block(iterator, row, column, block, block_number_argument_has_been_removed, row_size, col_size, row_offset, col_offset)
...
logical function, public dbcsr_iterator_blocks_left(iterator)
...
subroutine, public dbcsr_iterator_stop(iterator)
...
subroutine, public dbcsr_get_block_p(matrix, row, col, block, found, row_size, col_size)
...
subroutine, public dbcsr_get_info(matrix, nblkrows_total, nblkcols_total, nfullrows_total, nfullcols_total, nblkrows_local, nblkcols_local, nfullrows_local, nfullcols_local, my_prow, my_pcol, local_rows, local_cols, proc_row_dist, proc_col_dist, row_blk_size, col_blk_size, row_blk_offset, col_blk_offset, distribution, name, matrix_type, group)
...
subroutine, public dbcsr_get_stored_coordinates(matrix, row, column, processor)
...
subroutine, public dbcsr_finalize(matrix)
...
subroutine, public dbcsr_iterator_readonly_start(iterator, matrix, shared, dynamic, dynamic_byrows)
Like dbcsr_iterator_start() but with matrix being INTENT(IN). When invoking this routine,...
subroutine, public dbcsr_put_block(matrix, row, col, block, summation)
...
subroutine, public dbcsr_distribution_get(dist, row_dist, col_dist, nrows, ncols, has_threads, group, mynode, numnodes, nprows, npcols, myprow, mypcol, pgrid, subgroups_defined, prow_group, pcol_group)
...
Defines the basic variable types.
integer, parameter, public dp
Interface to the message passing library MPI.
subroutine submatrix_get_relevant_sm_columns(this, sm_id, first, last)
determine which columns of the submatrix are relevant for the result matrix