44 USE dbcsr_api,
ONLY: &
45 dbcsr_distribution_get, dbcsr_distribution_type, dbcsr_finalize, dbcsr_get_block_p, &
46 dbcsr_get_info, dbcsr_get_stored_coordinates, dbcsr_iterator_blocks_left, &
47 dbcsr_iterator_next_block, dbcsr_iterator_start, dbcsr_iterator_stop, dbcsr_iterator_type, &
48 dbcsr_put_block, dbcsr_type
59 #include "./base/base_uses.f90"
64 CHARACTER(len=*),
PARAMETER,
PRIVATE :: moduleN =
'submatrix_dissection'
67 TYPE(dbcsr_type) :: dbcsr_mat
68 TYPE(dbcsr_distribution_type) :: dist
69 LOGICAL :: initialized = .false.
70 TYPE(mp_comm_type) :: group
71 INTEGER :: numnodes, myrank, nblkcols, nblkrows, nblks, local_blocks, &
72 cols_per_sm, number_of_submatrices
73 INTEGER,
DIMENSION(:),
POINTER :: row_blk_size, col_blk_size
74 INTEGER,
DIMENSION(:),
ALLOCATABLE :: coo_cols, coo_rows, coo_col_offsets, coo_cols_local, coo_rows_local, &
75 coo_col_offsets_local, submatrix_owners, submatrix_sizes
76 TYPE(
buffer_type),
DIMENSION(:),
ALLOCATABLE :: recvbufs, result_sendbufs
77 TYPE(
set_type),
DIMENSION(:),
ALLOCATABLE :: result_blocks_for_rank, result_blocks_from_rank
79 TYPE(
intbuffer_type),
DIMENSION(:),
ALLOCATABLE :: result_blocks_for_rank_buf_offsets
81 PROCEDURE :: init => submatrix_dissection_init
82 PROCEDURE :: final => submatrix_dissection_final
83 PROCEDURE :: get_sm_ids_for_rank => submatrix_get_sm_ids_for_rank
84 PROCEDURE :: generate_submatrix => submatrix_generate_sm
85 PROCEDURE :: copy_resultcol => submatrix_cpy_resultcol
86 PROCEDURE :: communicate_results => submatrix_communicate_results
101 INTEGER,
INTENT(IN) :: sm_id
102 INTEGER,
INTENT(OUT) :: first, last
103 INTEGER :: i, j, blkid
107 DO i = (sm_id - 1)*this%cols_per_sm + 1, sm_id*this%cols_per_sm
108 DO j = this%coo_col_offsets(i), this%coo_col_offsets(i + 1) - 1
109 CALL nonzero_rows%insert(this%coo_rows(j))
114 DO i = 1, nonzero_rows%elements
115 blkid = nonzero_rows%get(i)
116 IF (blkid .EQ. (sm_id - 1)*this%cols_per_sm + 1)
THEN
120 DO j = i, nonzero_rows%elements
121 blkid = nonzero_rows%get(j)
122 last = last + this%col_blk_size(blkid)
123 IF (blkid .EQ. (sm_id)*this%cols_per_sm)
EXIT
127 first = first + this%col_blk_size(blkid)
130 CALL nonzero_rows%reset
142 SUBROUTINE submatrix_dissection_init(this, matrix_p)
144 TYPE(dbcsr_type),
INTENT(IN) :: matrix_p
146 INTEGER :: cur_row, cur_col, cur_blk, i, j, k, l, m, l_limit_left, l_limit_right, &
147 bufsize, bufsize_next
148 INTEGER,
DIMENSION(:),
ALLOCATABLE :: blocks_per_rank, coo_dsplcmnts, num_blockids_send, num_blockids_recv
149 TYPE(dbcsr_iterator_type) :: iter
151 REAL(KIND=
dp) :: flops_total, flops_per_rank, flops_per_sm, flops_threshold, &
152 flops_current, flops_remaining
155 TYPE(
set_type),
DIMENSION(:),
ALLOCATABLE :: blocks_from_rank
156 TYPE(
buffer_type),
DIMENSION(:),
ALLOCATABLE :: sendbufs
157 TYPE(
intbuffer_type),
DIMENSION(:),
ALLOCATABLE :: blocks_for_rank
160 INTEGER :: numthreads, mytid, group_handle
162 TYPE(
setarray_type),
DIMENSION(:),
ALLOCATABLE :: nonzero_rows_t
163 TYPE(
setarray_type),
DIMENSION(:),
ALLOCATABLE :: result_blocks_from_rank_t, result_blocks_for_rank_t, &
167 REAL(KIND=
dp),
DIMENSION(:),
POINTER :: blockp
169 CHARACTER(LEN=*),
PARAMETER :: routineN =
'submatrix_dissection_init'
172 CALL timeset(routinen, handle)
175 this%dbcsr_mat = matrix_p
176 CALL dbcsr_get_info(matrix=this%dbcsr_mat, nblkcols_total=this%nblkcols, nblkrows_total=this%nblkrows, &
177 row_blk_size=this%row_blk_size, col_blk_size=this%col_blk_size, group=group_handle, distribution=this%dist)
178 CALL dbcsr_distribution_get(dist=this%dist, mynode=this%myrank, numnodes=this%numnodes)
180 CALL this%group%set_handle(group_handle)
182 IF (this%nblkcols .NE. this%nblkrows)
THEN
183 cpabort(
"Number of block rows and cols need to be identical")
186 DO i = 1, this%nblkcols
187 IF (this%col_blk_size(i) .NE. this%row_blk_size(i))
THEN
188 cpabort(
"Block row sizes and col sizes need to be identical")
202 this%local_blocks = 0
203 CALL dbcsr_iterator_start(iter, this%dbcsr_mat, read_only=.true.)
204 DO WHILE (dbcsr_iterator_blocks_left(iter))
205 CALL dbcsr_iterator_next_block(iter, cur_row, cur_col, cur_blk)
206 this%local_blocks = this%local_blocks + 1
208 CALL dbcsr_iterator_stop(iter)
210 ALLOCATE (this%coo_cols_local(this%local_blocks), this%coo_rows_local(this%local_blocks), blocks_per_rank(this%numnodes), &
211 coo_dsplcmnts(this%numnodes), this%coo_col_offsets_local(this%nblkcols + 1), &
212 blocks_for_rank(0:this%numnodes - 1), blocks_from_rank(0:this%numnodes - 1), &
213 sendbufs(0:this%numnodes - 1), this%recvbufs(0:this%numnodes - 1), this%result_sendbufs(0:this%numnodes - 1), &
214 this%result_blocks_for_rank(0:this%numnodes - 1), this%result_blocks_from_rank(0:this%numnodes - 1), &
215 this%result_blocks_for_rank_buf_offsets(0:this%numnodes - 1))
218 CALL dbcsr_iterator_start(iter, this%dbcsr_mat, read_only=.true.)
219 DO WHILE (dbcsr_iterator_blocks_left(iter))
220 CALL dbcsr_iterator_next_block(iter, cur_row, cur_col, cur_blk)
221 this%coo_cols_local(i) = cur_col
222 this%coo_rows_local(i) = cur_row
225 CALL dbcsr_iterator_stop(iter)
228 CALL this%group%allgather(msgout=this%local_blocks, msgin=blocks_per_rank)
230 DO i = 1, this%numnodes - 1
231 coo_dsplcmnts(i + 1) = coo_dsplcmnts(i) + blocks_per_rank(i)
235 this%nblks = sum(blocks_per_rank)
236 ALLOCATE (this%coo_cols(this%nblks), this%coo_rows(this%nblks), this%coo_col_offsets(this%nblkcols + 1), &
237 this%coo_dptr(this%nblks))
238 CALL this%group%allgatherv(msgout=this%coo_rows_local, msgin=this%coo_rows, rcount=blocks_per_rank, &
239 rdispl=coo_dsplcmnts)
240 CALL this%group%allgatherv(msgout=this%coo_cols_local, msgin=this%coo_cols, rcount=blocks_per_rank, &
241 rdispl=coo_dsplcmnts)
243 DEALLOCATE (blocks_per_rank, coo_dsplcmnts)
246 CALL qsort_two(this%coo_cols_local, this%coo_rows_local, 1, this%local_blocks)
247 CALL qsort_two(this%coo_cols, this%coo_rows, 1, this%nblks)
250 this%coo_col_offsets(this%nblkcols + 1) = this%nblks + 1
252 DO i = 1, this%nblkcols
253 DO WHILE ((j .LE. this%nblks))
254 IF (this%coo_cols(j) .GE. i)
EXIT
257 this%coo_col_offsets(i) = j
261 this%coo_col_offsets_local(this%nblkcols + 1) = this%local_blocks + 1
263 DO i = 1, this%nblkcols
264 DO WHILE ((j .LE. this%local_blocks))
265 IF (this%coo_cols_local(j) .GE. i)
EXIT
268 this%coo_col_offsets_local(i) = j
277 this%number_of_submatrices = this%nblkcols/this%cols_per_sm
278 ALLOCATE (this%submatrix_sizes(this%number_of_submatrices))
279 this%submatrix_sizes = 0
281 DO i = 1, this%number_of_submatrices
282 CALL nonzero_rows%reset
283 DO j = (i - 1)*this%cols_per_sm + 1, i*this%cols_per_sm
284 DO k = this%coo_col_offsets(j), this%coo_col_offsets(j + 1) - 1
285 CALL nonzero_rows%insert(this%coo_rows(k))
288 DO j = 1, nonzero_rows%elements
289 this%submatrix_sizes(i) = this%submatrix_sizes(i) + this%col_blk_size(nonzero_rows%get(j))
291 flops_total = flops_total + 2.0d0*this%submatrix_sizes(i)*this%submatrix_sizes(i)*this%submatrix_sizes(i)
296 ALLOCATE (this%submatrix_owners(this%number_of_submatrices))
297 flops_per_sm = flops_total/this%number_of_submatrices
298 flops_per_rank = flops_total/this%numnodes
299 flops_current = 0.0d0
301 DO i = 1, this%number_of_submatrices
302 this%submatrix_owners(i) = j
303 flops_current = flops_current + 2.0d0*this%submatrix_sizes(i)*this%submatrix_sizes(i)*this%submatrix_sizes(i)
304 flops_remaining = flops_total - flops_current
305 flops_threshold = (this%numnodes - j - 1)*flops_per_rank
306 IF ((j .LT. (this%numnodes - 1)) &
307 .AND. ((flops_remaining .LE. flops_threshold &
308 .OR. (this%number_of_submatrices - i) .LT. (this%numnodes - j))))
THEN
310 flops_total = flops_total - flops_current
311 flops_current = 0.0d0
319 ALLOCATE (result_blocks_from_rank_t(0:numthreads - 1), &
320 result_blocks_for_rank_t(0:numthreads - 1), &
321 blocks_from_rank_t(0:numthreads - 1), &
322 nonzero_rows_t(0:numthreads - 1))
334 ALLOCATE (nonzero_rows_t(mytid)%sets(1), &
335 result_blocks_from_rank_t(mytid)%sets(0:this%numnodes - 1), &
336 result_blocks_for_rank_t(mytid)%sets(0:this%numnodes - 1), &
337 blocks_from_rank_t(mytid)%sets(0:this%numnodes - 1))
340 DO i = 1, this%number_of_submatrices
341 CALL nonzero_rows_t(mytid)%sets(1)%reset
342 DO j = (i - 1)*this%cols_per_sm + 1, i*this%cols_per_sm
343 DO k = this%coo_col_offsets(j), this%coo_col_offsets(j + 1) - 1
346 CALL dbcsr_get_stored_coordinates(matrix=this%dbcsr_mat, row=this%coo_rows(k), column=j, processor=m)
347 IF (m .EQ. this%myrank)
THEN
348 CALL result_blocks_from_rank_t(mytid)%sets(this%submatrix_owners(i))%insert(k)
350 IF (this%submatrix_owners(i) .EQ. this%myrank)
THEN
351 CALL nonzero_rows_t(mytid)%sets(1)%insert(this%coo_rows(k))
352 CALL result_blocks_for_rank_t(mytid)%sets(m)%insert(k)
357 IF (this%submatrix_owners(i) .NE. this%myrank) cycle
361 DO j = 1, nonzero_rows_t(mytid)%sets(1)%elements
362 cur_col = nonzero_rows_t(mytid)%sets(1)%get(j)
363 l_limit_left = this%coo_col_offsets(cur_col)
364 l_limit_right = this%coo_col_offsets(cur_col + 1) - 1
365 DO k = 1, nonzero_rows_t(mytid)%sets(1)%elements
366 cur_row = nonzero_rows_t(mytid)%sets(1)%get(k)
368 DO WHILE (l .LE. l_limit_right)
369 IF (this%coo_rows(l) .GE. cur_row)
EXIT
373 IF (l .LE. l_limit_right)
THEN
374 IF (this%coo_rows(l) .EQ. cur_row)
THEN
376 CALL dbcsr_get_stored_coordinates(matrix=this%dbcsr_mat, row=this%coo_rows(l), &
377 column=this%coo_cols(l), processor=m)
378 CALL blocks_from_rank_t(mytid)%sets(m)%insert(l)
388 DO i = 0, this%numnodes - 1
389 DO j = 0, numthreads - 1
390 DO k = 1, result_blocks_from_rank_t(j)%sets(i)%elements
391 CALL this%result_blocks_from_rank(i)%insert(result_blocks_from_rank_t(j)%sets(i)%get(k))
393 CALL result_blocks_from_rank_t(j)%sets(i)%reset
394 DO k = 1, result_blocks_for_rank_t(j)%sets(i)%elements
395 CALL this%result_blocks_for_rank(i)%insert(result_blocks_for_rank_t(j)%sets(i)%get(k))
397 CALL result_blocks_for_rank_t(j)%sets(i)%reset
398 DO k = 1, blocks_from_rank_t(j)%sets(i)%elements
399 CALL blocks_from_rank(i)%insert(blocks_from_rank_t(j)%sets(i)%get(k))
401 CALL blocks_from_rank_t(j)%sets(i)%reset
404 DO i = 0, numthreads - 1
405 CALL nonzero_rows_t(i)%sets(1)%reset
406 DEALLOCATE (result_blocks_from_rank_t(i)%sets, result_blocks_for_rank_t(i)%sets, blocks_from_rank_t(i)%sets, &
407 nonzero_rows_t(i)%sets)
409 DEALLOCATE (result_blocks_from_rank_t, result_blocks_for_rank_t, blocks_from_rank_t, nonzero_rows_t)
412 ALLOCATE (num_blockids_send(0:this%numnodes - 1), num_blockids_recv(0:this%numnodes - 1))
413 DO i = 0, this%numnodes - 1
414 num_blockids_send(i) = blocks_from_rank(i)%elements
416 CALL this%group%alltoall(num_blockids_send, num_blockids_recv, 1)
417 DO i = 0, this%numnodes - 1
418 CALL blocks_for_rank(i)%alloc(num_blockids_recv(i))
420 DEALLOCATE (num_blockids_send, num_blockids_recv)
422 IF (this%numnodes .GT. 1)
THEN
423 DO i = 1, this%numnodes
424 k =
modulo(this%myrank - i, this%numnodes)
425 CALL this%group%irecv(msgout=blocks_for_rank(k)%data, source=k, request=blocks_for_rank(k)%mpi_request)
427 DO i = 1, this%numnodes
428 k =
modulo(this%myrank + i, this%numnodes)
429 CALL this%group%send(blocks_from_rank(k)%getall(), k, 0)
431 DO i = 0, this%numnodes - 1
432 CALL blocks_for_rank(i)%mpi_request%wait()
435 blocks_for_rank(0)%data = blocks_from_rank(0)%getall()
439 CALL nonzero_rows%reset
442 DO m = 0, this%numnodes - 1
443 IF ((.NOT. this%result_blocks_for_rank(m)%sorted_up_to_date) .AND. (this%result_blocks_for_rank(m)%elements .GT. 0))
THEN
444 CALL this%result_blocks_for_rank(m)%update_sorted
449 DO i = 0, this%numnodes - 1
451 DO j = 1, blocks_for_rank(i)%size
452 k = blocks_for_rank(i)%data(j)
453 bufsize = bufsize + this%col_blk_size(this%coo_cols(k))*this%col_blk_size(this%coo_rows(k))
455 CALL sendbufs(i)%alloc(bufsize)
458 CALL this%result_blocks_for_rank_buf_offsets(i)%alloc(this%result_blocks_for_rank(i)%elements)
459 DO j = 1, this%result_blocks_for_rank(i)%elements
460 k = this%result_blocks_for_rank(i)%get(j)
461 this%result_blocks_for_rank_buf_offsets(i)%data(j) = bufsize
462 bufsize = bufsize + this%col_blk_size(this%coo_cols(k))*this%col_blk_size(this%coo_rows(k))
464 CALL this%result_sendbufs(i)%alloc(bufsize)
467 DO j = 1, blocks_for_rank(i)%size
468 k = blocks_for_rank(i)%data(j)
469 CALL dbcsr_get_block_p(this%dbcsr_mat, row=this%coo_rows(k), col=this%coo_cols(k), block=blockp, found=valid)
470 IF (.NOT. valid)
THEN
471 cpabort(
"Block included in our COO and placed on our rank could not be fetched!")
473 bufsize_next = bufsize +
SIZE(blockp)
474 sendbufs(i)%data(bufsize + 1:bufsize_next) = blockp
475 bufsize = bufsize_next
480 DO i = 0, this%numnodes - 1
482 DO j = 1, blocks_from_rank(i)%elements
483 k = blocks_from_rank(i)%get(j)
484 bufsize = bufsize + this%col_blk_size(this%coo_cols(k))*this%col_blk_size(this%coo_rows(k))
486 CALL this%recvbufs(i)%alloc(bufsize)
488 DO j = 1, blocks_from_rank(i)%elements
489 k = blocks_from_rank(i)%get(j)
490 bufsize_next = bufsize + this%col_blk_size(this%coo_cols(k))*this%col_blk_size(this%coo_rows(k))
491 this%coo_dptr(k)%target => this%recvbufs(i)%data(bufsize + 1:bufsize_next)
492 bufsize = bufsize_next
496 DO i = 0, this%numnodes - 1
497 CALL blocks_for_rank(i)%dealloc
498 CALL blocks_from_rank(i)%reset
500 DEALLOCATE (blocks_for_rank, blocks_from_rank)
502 IF (this%numnodes .GT. 1)
THEN
505 DO i = 1, this%numnodes
506 k =
modulo(this%myrank - i, this%numnodes)
507 CALL this%group%irecv(msgout=this%recvbufs(k)%data, source=k, request=this%recvbufs(k)%mpi_request)
508 k =
modulo(this%myrank + i, this%numnodes)
509 CALL this%group%isend(msgin=sendbufs(k)%data, dest=k, request=sendbufs(k)%mpi_request)
511 DO i = 0, this%numnodes - 1
512 CALL sendbufs(i)%mpi_request%wait()
513 CALL this%recvbufs(i)%mpi_request%wait()
516 this%recvbufs(0)%data = sendbufs(0)%data
519 DO i = 0, this%numnodes - 1
520 CALL sendbufs(i)%dealloc
522 DEALLOCATE (sendbufs)
524 this%initialized = .true.
526 CALL timestop(handle)
527 END SUBROUTINE submatrix_dissection_init
533 PURE SUBROUTINE submatrix_dissection_final(this)
537 this%initialized = .false.
539 IF (
ALLOCATED(this%submatrix_sizes))
DEALLOCATE (this%submatrix_sizes)
540 IF (
ALLOCATED(this%coo_cols_local))
DEALLOCATE (this%coo_cols_local)
541 IF (
ALLOCATED(this%coo_rows_local))
DEALLOCATE (this%coo_rows_local)
542 IF (
ALLOCATED(this%coo_col_offsets_local))
DEALLOCATE (this%coo_col_offsets_local)
543 IF (
ALLOCATED(this%result_blocks_for_rank_buf_offsets))
THEN
544 DO i = 0, this%numnodes - 1
545 CALL this%result_blocks_for_rank_buf_offsets(i)%dealloc
547 DEALLOCATE (this%result_blocks_for_rank_buf_offsets)
549 IF (
ALLOCATED(this%recvbufs))
THEN
550 DO i = 0, this%numnodes - 1
551 CALL this%recvbufs(i)%dealloc
553 DEALLOCATE (this%recvbufs)
555 IF (
ALLOCATED(this%result_sendbufs))
THEN
556 DO i = 0, this%numnodes - 1
557 CALL this%result_sendbufs(i)%dealloc
559 DEALLOCATE (this%result_sendbufs)
561 IF (
ALLOCATED(this%result_blocks_for_rank))
THEN
562 DO i = 0, this%numnodes - 1
563 CALL this%result_blocks_for_rank(i)%reset
565 DEALLOCATE (this%result_blocks_for_rank)
567 IF (
ALLOCATED(this%result_blocks_from_rank))
THEN
568 DO i = 0, this%numnodes - 1
569 CALL this%result_blocks_from_rank(i)%reset
571 DEALLOCATE (this%result_blocks_from_rank)
573 IF (
ALLOCATED(this%coo_cols))
DEALLOCATE (this%coo_cols)
574 IF (
ALLOCATED(this%coo_rows))
DEALLOCATE (this%coo_rows)
575 IF (
ALLOCATED(this%coo_col_offsets))
DEALLOCATE (this%coo_col_offsets)
576 IF (
ALLOCATED(this%coo_dptr))
DEALLOCATE (this%coo_dptr)
577 IF (
ALLOCATED(this%submatrix_owners))
DEALLOCATE (this%submatrix_owners)
579 END SUBROUTINE submatrix_dissection_final
587 SUBROUTINE submatrix_generate_sm(this, sm_id, sm)
589 INTEGER,
INTENT(IN) :: sm_id
590 REAL(KIND=
dp),
DIMENSION(:, :),
ALLOCATABLE,
INTENT(OUT) :: sm
592 INTEGER :: sm_dim, i, j, k, offset_x1, offset_x2, offset_y1, &
593 offset_y2, k_limit_left, k_limit_right
596 IF (.NOT. this%initialized)
THEN
597 cpabort(
"Submatrix dissection not initialized")
600 IF (this%myrank .NE. this%submatrix_owners(sm_id))
THEN
601 cpabort(
"This rank is not supposed to construct this submatrix")
605 CALL nonzero_rows%reset
606 DO i = (sm_id - 1)*this%cols_per_sm + 1, sm_id*this%cols_per_sm
607 DO j = this%coo_col_offsets(i), this%coo_col_offsets(i + 1) - 1
608 CALL nonzero_rows%insert(this%coo_rows(j))
612 DO i = 1, nonzero_rows%elements
613 sm_dim = sm_dim + this%col_blk_size(nonzero_rows%get(i))
616 ALLOCATE (sm(sm_dim, sm_dim))
620 DO j = 1, nonzero_rows%elements
621 offset_x2 = offset_x1 + this%col_blk_size(nonzero_rows%get(j))
623 k_limit_left = this%coo_col_offsets(nonzero_rows%get(j))
624 k_limit_right = this%coo_col_offsets(nonzero_rows%get(j) + 1) - 1
625 DO i = 1, nonzero_rows%elements
626 offset_y2 = offset_y1 + this%col_blk_size(nonzero_rows%get(i))
629 DO WHILE (k .LE. k_limit_right)
630 IF (this%coo_rows(k) .GE. nonzero_rows%get(i))
EXIT
634 IF (this%coo_rows(k) .EQ. nonzero_rows%get(i))
THEN
635 sm(offset_y1 + 1:offset_y2, offset_x1 + 1:offset_x2) = reshape(this%coo_dptr(k)%target, &
636 (/offset_y2 - offset_y1, offset_x2 - offset_x1/))
638 offset_y1 = offset_y2
640 offset_x1 = offset_x2
644 CALL nonzero_rows%reset
646 END SUBROUTINE submatrix_generate_sm
654 SUBROUTINE submatrix_get_sm_ids_for_rank(this, rank, sm_ids)
656 INTEGER,
INTENT(IN) :: rank
657 INTEGER,
DIMENSION(:),
ALLOCATABLE,
INTENT(OUT) :: sm_ids
660 IF (.NOT. this%initialized)
THEN
661 cpabort(
"Submatrix dissection not initialized")
665 DO i = 1, this%number_of_submatrices
666 IF (this%submatrix_owners(i) .EQ. rank) count = count + 1
669 ALLOCATE (sm_ids(count))
672 DO i = 1, this%number_of_submatrices
673 IF (this%submatrix_owners(i) .EQ. rank)
THEN
679 END SUBROUTINE submatrix_get_sm_ids_for_rank
687 SUBROUTINE submatrix_cpy_resultcol(this, sm_id, sm)
689 INTEGER,
INTENT(in) :: sm_id
690 REAL(KIND=
dp),
DIMENSION(:, :),
ALLOCATABLE,
INTENT(IN) :: sm
693 INTEGER :: i, j, k, m, sm_col_offset, offset_x1, offset_x2, offset_y1, &
694 offset_y2, bufsize, bufsize_next, k_limit_left, k_limit_right
695 INTEGER,
DIMENSION(:),
ALLOCATABLE :: buf_offset_idxs
697 IF (.NOT. this%initialized)
THEN
698 cpabort(
"Submatrix dissection is not initizalized")
701 IF (this%myrank .NE. this%submatrix_owners(sm_id))
THEN
702 cpabort(
"This rank is not supposed to operate on this submatrix")
705 ALLOCATE (buf_offset_idxs(0:this%numnodes - 1))
710 DO i = (sm_id - 1)*this%cols_per_sm + 1, sm_id*this%cols_per_sm
711 DO j = this%coo_col_offsets(i), this%coo_col_offsets(i + 1) - 1
712 CALL nonzero_rows%insert(this%coo_rows(j))
717 DO i = 1, nonzero_rows%elements
718 IF (nonzero_rows%get(i) .EQ. (sm_id - 1)*this%cols_per_sm + 1)
THEN
724 IF (sm_col_offset .EQ. 0)
THEN
725 cpabort(
"Could not determine relevant result columns of submatrix")
729 DO j = 1, nonzero_rows%elements
730 offset_x2 = offset_x1 + this%col_blk_size(nonzero_rows%get(j))
732 IF ((j .GE. sm_col_offset) .AND. (j .LT. sm_col_offset + this%cols_per_sm))
THEN
734 k_limit_left = this%coo_col_offsets(nonzero_rows%get(j))
735 k_limit_right = this%coo_col_offsets(nonzero_rows%get(j) + 1) - 1
736 DO i = 1, nonzero_rows%elements
737 offset_y2 = offset_y1 + this%col_blk_size(nonzero_rows%get(i))
741 DO WHILE (k .LE. k_limit_right)
742 IF (this%coo_rows(k) .GE. nonzero_rows%get(i))
EXIT
746 IF (this%coo_rows(k) .EQ. nonzero_rows%get(i))
THEN
747 CALL dbcsr_get_stored_coordinates(matrix=this%dbcsr_mat, row=this%coo_rows(k), column=this%coo_cols(k), &
749 DO WHILE (buf_offset_idxs(m) .LE. this%result_blocks_for_rank(m)%elements)
750 IF (this%result_blocks_for_rank(m)%get(buf_offset_idxs(m)) .GE. k)
EXIT
751 buf_offset_idxs(m) = buf_offset_idxs(m) + 1
753 IF (this%result_blocks_for_rank(m)%get(buf_offset_idxs(m)) .NE. k)
THEN
754 cpabort(
"Could not determine buffer offset for block")
756 bufsize = this%result_blocks_for_rank_buf_offsets(m)%data(buf_offset_idxs(m))
757 bufsize_next = bufsize + this%col_blk_size(this%coo_cols(k))*this%col_blk_size(this%coo_rows(k))
758 this%result_sendbufs(m)%data(bufsize + 1:bufsize_next) = reshape( &
759 sm(offset_y1 + 1:offset_y2, offset_x1 + 1:offset_x2), &
760 (/bufsize_next - bufsize/))
762 offset_y1 = offset_y2
765 offset_x1 = offset_x2
768 DEALLOCATE (buf_offset_idxs)
771 CALL nonzero_rows%reset
773 END SUBROUTINE submatrix_cpy_resultcol
783 SUBROUTINE submatrix_communicate_results(this, resultmat)
785 TYPE(dbcsr_type),
INTENT(INOUT) :: resultmat
787 INTEGER :: i, j, k, cur_row, cur_col, cur_sm, cur_buf, last_buf, &
788 bufsize, bufsize_next
789 TYPE(
buffer_type),
DIMENSION(:),
ALLOCATABLE :: result_recvbufs
791 CHARACTER(LEN=*),
PARAMETER :: routineN =
'submatrix_communicate_results'
794 CALL timeset(routinen, handle)
796 ALLOCATE (result_recvbufs(0:this%numnodes - 1))
797 DO i = 0, this%numnodes - 1
799 DO j = 1, this%result_blocks_from_rank(i)%elements
800 k = this%result_blocks_from_rank(i)%get(j)
801 bufsize = bufsize + this%col_blk_size(this%coo_cols(k))*this%col_blk_size(this%coo_rows(k))
803 CALL result_recvbufs(i)%alloc(bufsize)
808 IF (this%numnodes .GT. 1)
THEN
809 DO i = 1, this%numnodes
810 k =
modulo(this%myrank - i, this%numnodes)
811 CALL this%group%irecv(msgout=result_recvbufs(k)%data, source=k, request=result_recvbufs(k)%mpi_request)
812 k =
modulo(this%myrank + i, this%numnodes)
813 CALL this%group%isend(msgin=this%result_sendbufs(k)%data, dest=k, request=this%result_sendbufs(k)%mpi_request)
815 DO i = 0, this%numnodes - 1
816 CALL this%result_sendbufs(i)%mpi_request%wait()
817 CALL result_recvbufs(i)%mpi_request%wait()
820 result_recvbufs(0)%data = this%result_sendbufs(0)%data
825 DO i = 1, this%local_blocks
826 cur_col = this%coo_cols_local(i)
827 cur_row = this%coo_rows_local(i)
828 cur_sm = (cur_col - 1)/this%cols_per_sm + 1
829 cur_buf = this%submatrix_owners(cur_sm)
830 IF (cur_buf .GT. last_buf) bufsize = 0
831 bufsize_next = bufsize + this%col_blk_size(cur_row)*this%col_blk_size(cur_col)
832 CALL dbcsr_put_block(matrix=resultmat, row=cur_row, col=cur_col, &
833 block=result_recvbufs(cur_buf)%data(bufsize + 1:bufsize_next))
834 bufsize = bufsize_next
838 DO i = 0, this%numnodes - 1
839 CALL result_recvbufs(i)%dealloc
841 DEALLOCATE (result_recvbufs)
843 CALL dbcsr_finalize(resultmat)
845 CALL timestop(handle)
846 END SUBROUTINE submatrix_communicate_results
855 RECURSIVE PURE SUBROUTINE qsort_two(arr_a, arr_b, left, right)
857 INTEGER,
DIMENSION(:),
INTENT(inout) :: arr_a, arr_b
858 INTEGER,
INTENT(in) :: left, right
860 INTEGER :: i, j, pivot_a, pivot_b, tmp
862 IF (right - left .LT. 1)
RETURN
866 pivot_a = arr_a(right)
867 pivot_b = arr_b(right)
870 DO WHILE ((arr_a(i) .LT. pivot_a) .OR. ((arr_a(i) .EQ. pivot_a) .AND. (arr_b(i) .LT. pivot_b)))
873 DO WHILE ((j .GT. i) .AND. ((arr_a(j) .GT. pivot_a) .OR. ((arr_a(j) .EQ. pivot_a) .AND. (arr_b(j) .GE. pivot_b))))
888 IF ((arr_a(i) .GT. pivot_a) .OR. (arr_a(i) .EQ. pivot_a .AND. arr_b(i) .GT. pivot_b))
THEN
890 arr_a(i) = arr_a(right)
893 arr_b(i) = arr_b(right)
897 IF (i - 1 .GT. left)
CALL qsort_two(arr_a, arr_b, left, i - 1)
898 IF (i + 1 .LT. right)
CALL qsort_two(arr_a, arr_b, i + 1, right)
900 END SUBROUTINE qsort_two
static GRID_HOST_DEVICE int modulo(int a, int m)
Equivalent of Fortran's MODULO, which always return a positive number. https://gcc....
collects all references to literature in CP2K as new algorithms / method are included from literature...
integer, save, public lass2018
Defines the basic variable types.
integer, parameter, public dp
Interface to the message passing library MPI.
subroutine submatrix_get_relevant_sm_columns(this, sm_id, first, last)
determine which columns of the submatrix are relevant for the result matrix