(git:ccc2433)
dbt_reshape_ops.F
Go to the documentation of this file.
1 !--------------------------------------------------------------------------------------------------!
2 ! CP2K: A general program to perform molecular dynamics simulations !
3 ! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4 ! !
5 ! SPDX-License-Identifier: GPL-2.0-or-later !
6 !--------------------------------------------------------------------------------------------------!
7 
8 ! **************************************************************************************************
9 !> \brief Routines to reshape / redistribute tensors
10 !> \author Patrick Seewald
11 ! **************************************************************************************************
13 
14 
15  USE dbt_allocate_wrap, ONLY: allocate_any
17  USE dbt_block, ONLY: &
18  block_nd, create_block, destroy_block, dbt_iterator_type, dbt_iterator_next_block, &
20  dbt_reserve_blocks, dbt_put_block
21  USE dbt_types, ONLY: dbt_blk_sizes, &
22  dbt_create, &
23  dbt_type, &
24  ndims_tensor, &
26  dbt_clear
27  USE kinds, ONLY: default_string_length
28  USE kinds, ONLY: dp, dp
29  USE message_passing, ONLY: &
30  mp_waitall, mp_comm_type, mp_request_type
31 
32 #include "../base/base_uses.f90"
33 
34  IMPLICIT NONE
35  PRIVATE
36  CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'dbt_reshape_ops'
37 
38  PUBLIC :: dbt_reshape
39 
40  TYPE block_buffer_type
41  INTEGER, DIMENSION(:, :), ALLOCATABLE :: blocks
42  REAL(dp), DIMENSION(:), ALLOCATABLE :: data
43  END TYPE
44 
45 CONTAINS
46 
47 ! **************************************************************************************************
48 !> \brief copy data (involves reshape)
49 !> tensor_out = tensor_out + tensor_in move_data memory optimization:
50 !> transfer data from tensor_in to tensor_out s.t. tensor_in is empty on return
51 !> \author Ole Schuett
52 ! **************************************************************************************************
53  SUBROUTINE dbt_reshape(tensor_in, tensor_out, summation, move_data)
54 
55  TYPE(dbt_type), INTENT(INOUT) :: tensor_in, tensor_out
56  LOGICAL, INTENT(IN), OPTIONAL :: summation
57  LOGICAL, INTENT(IN), OPTIONAL :: move_data
58 
59  CHARACTER(LEN=*), PARAMETER :: routinen = 'dbt_reshape'
60 
61  INTEGER :: iproc, numnodes, &
62  handle, iblk, jblk, offset, ndata, &
63  nblks_recv_mythread
64  INTEGER, ALLOCATABLE, DIMENSION(:, :) :: blks_to_allocate
65  TYPE(dbt_iterator_type) :: iter
66  TYPE(block_nd) :: blk_data
67  TYPE(block_buffer_type), ALLOCATABLE, DIMENSION(:) :: buffer_recv, buffer_send
68  INTEGER, DIMENSION(ndims_tensor(tensor_in)) :: blk_size, ind_nd
69  LOGICAL :: found, summation_prv, move_prv
70 
71  INTEGER, ALLOCATABLE, DIMENSION(:) :: nblks_send_total, ndata_send_total, &
72  nblks_recv_total, ndata_recv_total, &
73  nblks_send_mythread, ndata_send_mythread
74  TYPE(mp_comm_type) :: mp_comm
75 
76  CALL timeset(routinen, handle)
77 
78  IF (PRESENT(summation)) THEN
79  summation_prv = summation
80  ELSE
81  summation_prv = .false.
82  END IF
83 
84  IF (PRESENT(move_data)) THEN
85  move_prv = move_data
86  ELSE
87  move_prv = .false.
88  END IF
89 
90  cpassert(tensor_out%valid)
91 
92  IF (.NOT. summation_prv) CALL dbt_clear(tensor_out)
93 
94  mp_comm = tensor_in%pgrid%mp_comm_2d
95  numnodes = mp_comm%num_pe
96  ALLOCATE (buffer_send(0:numnodes - 1), buffer_recv(0:numnodes - 1))
97  ALLOCATE (nblks_send_total(0:numnodes - 1), ndata_send_total(0:numnodes - 1), source=0)
98  ALLOCATE (nblks_recv_total(0:numnodes - 1), ndata_recv_total(0:numnodes - 1), source=0)
99 
100 !$OMP PARALLEL DEFAULT(OMP_DEFAULT_NONE_WITH_OOP) &
101 !$OMP SHARED(tensor_in,tensor_out,summation) &
102 !$OMP SHARED(buffer_send,buffer_recv,mp_comm,numnodes) &
103 !$OMP SHARED(nblks_send_total,ndata_send_total,nblks_recv_total,ndata_recv_total) &
104 !$OMP PRIVATE(nblks_send_mythread,ndata_send_mythread,nblks_recv_mythread) &
105 !$OMP PRIVATE(iter,ind_nd,blk_size,blk_data,found,iproc) &
106 !$OMP PRIVATE(blks_to_allocate,offset,ndata,iblk,jblk)
107  ALLOCATE (nblks_send_mythread(0:numnodes - 1), ndata_send_mythread(0:numnodes - 1), source=0)
108 
109  CALL dbt_iterator_start(iter, tensor_in)
110  DO WHILE (dbt_iterator_blocks_left(iter))
111  CALL dbt_iterator_next_block(iter, ind_nd, blk_size=blk_size)
112  CALL dbt_get_stored_coordinates(tensor_out, ind_nd, iproc)
113  nblks_send_mythread(iproc) = nblks_send_mythread(iproc) + 1
114  ndata_send_mythread(iproc) = ndata_send_mythread(iproc) + product(blk_size)
115  END DO
116  CALL dbt_iterator_stop(iter)
117 !$OMP CRITICAL
118  nblks_send_total(:) = nblks_send_total(:) + nblks_send_mythread(:)
119  ndata_send_total(:) = ndata_send_total(:) + ndata_send_mythread(:)
120  nblks_send_mythread(:) = nblks_send_total(:) ! current totals indicate slot for this thread
121  ndata_send_mythread(:) = ndata_send_total(:)
122 !$OMP END CRITICAL
123 !$OMP BARRIER
124 
125 !$OMP MASTER
126  CALL mp_comm%alltoall(nblks_send_total, nblks_recv_total, 1)
127  CALL mp_comm%alltoall(ndata_send_total, ndata_recv_total, 1)
128 !$OMP END MASTER
129 !$OMP BARRIER
130 
131 !$OMP DO
132  DO iproc = 0, numnodes - 1
133  ALLOCATE (buffer_send(iproc)%data(ndata_send_total(iproc)))
134  ALLOCATE (buffer_recv(iproc)%data(ndata_recv_total(iproc)))
135  ! going to use buffer%blocks(:,0) to store data offsets
136  ALLOCATE (buffer_send(iproc)%blocks(nblks_send_total(iproc), 0:ndims_tensor(tensor_in)))
137  ALLOCATE (buffer_recv(iproc)%blocks(nblks_recv_total(iproc), 0:ndims_tensor(tensor_in)))
138  END DO
139 !$OMP END DO
140 !$OMP BARRIER
141 
142  CALL dbt_iterator_start(iter, tensor_in)
143  DO WHILE (dbt_iterator_blocks_left(iter))
144  CALL dbt_iterator_next_block(iter, ind_nd, blk_size=blk_size)
145  CALL dbt_get_stored_coordinates(tensor_out, ind_nd, iproc)
146  CALL dbt_get_block(tensor_in, ind_nd, blk_data, found)
147  cpassert(found)
148  ! insert block data
149  ndata = product(blk_size)
150  ndata_send_mythread(iproc) = ndata_send_mythread(iproc) - ndata
151  offset = ndata_send_mythread(iproc)
152  buffer_send(iproc)%data(offset + 1:offset + ndata) = blk_data%blk(:)
153  ! insert block index
154  nblks_send_mythread(iproc) = nblks_send_mythread(iproc) - 1
155  iblk = nblks_send_mythread(iproc) + 1
156  buffer_send(iproc)%blocks(iblk, 1:) = ind_nd(:)
157  buffer_send(iproc)%blocks(iblk, 0) = offset
158  CALL destroy_block(blk_data)
159  END DO
160  CALL dbt_iterator_stop(iter)
161 !$OMP BARRIER
162 
163  CALL dbt_communicate_buffer(mp_comm, buffer_recv, buffer_send)
164 !$OMP BARRIER
165 
166 !$OMP DO
167  DO iproc = 0, numnodes - 1
168  DEALLOCATE (buffer_send(iproc)%blocks, buffer_send(iproc)%data)
169  END DO
170 !$OMP END DO
171 
172  nblks_recv_mythread = 0
173  DO iproc = 0, numnodes - 1
174 !$OMP DO
175  DO iblk = 1, nblks_recv_total(iproc)
176  nblks_recv_mythread = nblks_recv_mythread + 1
177  END DO
178 !$OMP END DO
179  END DO
180  ALLOCATE (blks_to_allocate(nblks_recv_mythread, ndims_tensor(tensor_in)))
181 
182  jblk = 0
183  DO iproc = 0, numnodes - 1
184 !$OMP DO
185  DO iblk = 1, nblks_recv_total(iproc)
186  jblk = jblk + 1
187  blks_to_allocate(jblk, :) = buffer_recv(iproc)%blocks(iblk, 1:)
188  END DO
189 !$OMP END DO
190  END DO
191  cpassert(jblk == nblks_recv_mythread)
192  CALL dbt_reserve_blocks(tensor_out, blks_to_allocate)
193  DEALLOCATE (blks_to_allocate)
194 
195  DO iproc = 0, numnodes - 1
196 !$OMP DO
197  DO iblk = 1, nblks_recv_total(iproc)
198  ind_nd(:) = buffer_recv(iproc)%blocks(iblk, 1:)
199  CALL dbt_blk_sizes(tensor_out, ind_nd, blk_size)
200  offset = buffer_recv(iproc)%blocks(iblk, 0)
201  ndata = product(blk_size)
202  CALL create_block(blk_data, blk_size, &
203  array=buffer_recv(iproc)%data(offset + 1:offset + ndata))
204  CALL dbt_put_block(tensor_out, ind_nd, blk_data, summation=summation)
205  CALL destroy_block(blk_data)
206  END DO
207 !$OMP END DO
208  END DO
209 !$OMP END PARALLEL
210 
211  DO iproc = 0, numnodes - 1
212  DEALLOCATE (buffer_recv(iproc)%blocks, buffer_recv(iproc)%data)
213  END DO
214 
215  IF (move_prv) CALL dbt_clear(tensor_in)
216 
217  CALL timestop(handle)
218  END SUBROUTINE dbt_reshape
219 
220 ! **************************************************************************************************
221 !> \brief communicate buffer
222 !> \author Patrick Seewald
223 ! **************************************************************************************************
224  SUBROUTINE dbt_communicate_buffer(mp_comm, buffer_recv, buffer_send)
225  TYPE(mp_comm_type), INTENT(IN) :: mp_comm
226  TYPE(block_buffer_type), DIMENSION(0:), INTENT(INOUT) :: buffer_recv, buffer_send
227 
228  CHARACTER(LEN=*), PARAMETER :: routinen = 'dbt_communicate_buffer'
229 
230  INTEGER :: iproc, numnodes, &
231  rec_counter, send_counter, i
232  TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:, :) :: req_array
233  INTEGER :: handle
234 
235  CALL timeset(routinen, handle)
236  numnodes = mp_comm%num_pe
237 
238  IF (numnodes > 1) THEN
239 !$OMP MASTER
240  send_counter = 0
241  rec_counter = 0
242 
243  ALLOCATE (req_array(1:numnodes, 4))
244 
245  DO iproc = 0, numnodes - 1
246  IF (SIZE(buffer_recv(iproc)%blocks) > 0) THEN
247  rec_counter = rec_counter + 1
248  CALL mp_comm%irecv(buffer_recv(iproc)%blocks, iproc, req_array(rec_counter, 3), tag=4)
249  CALL mp_comm%irecv(buffer_recv(iproc)%data, iproc, req_array(rec_counter, 4), tag=7)
250  END IF
251  END DO
252 
253  DO iproc = 0, numnodes - 1
254  IF (SIZE(buffer_send(iproc)%blocks) > 0) THEN
255  send_counter = send_counter + 1
256  CALL mp_comm%isend(buffer_send(iproc)%blocks, iproc, req_array(send_counter, 1), tag=4)
257  CALL mp_comm%isend(buffer_send(iproc)%data, iproc, req_array(send_counter, 2), tag=7)
258  END IF
259  END DO
260 
261  IF (send_counter > 0) THEN
262  CALL mp_waitall(req_array(1:send_counter, 1:2))
263  END IF
264  IF (rec_counter > 0) THEN
265  CALL mp_waitall(req_array(1:rec_counter, 3:4))
266  END IF
267 !$OMP END MASTER
268 
269  ELSE
270 !$OMP DO SCHEDULE(static, 512)
271  DO i = 1, SIZE(buffer_send(0)%blocks, 1)
272  buffer_recv(0)%blocks(i, :) = buffer_send(0)%blocks(i, :)
273  END DO
274 !$OMP END DO
275 !$OMP DO SCHEDULE(static, 512)
276  DO i = 1, SIZE(buffer_send(0)%data)
277  buffer_recv(0)%data(i) = buffer_send(0)%data(i)
278  END DO
279 !$OMP END DO
280  END IF
281  CALL timestop(handle)
282 
283  END SUBROUTINE dbt_communicate_buffer
284 
285 END MODULE dbt_reshape_ops
Wrapper for allocating, copying and reshaping arrays.
Methods to operate on n-dimensional tensor blocks.
Definition: dbt_block.F:12
logical function, public dbt_iterator_blocks_left(iterator)
Generalization of block_iterator_blocks_left for tensors.
Definition: dbt_block.F:197
subroutine, public destroy_block(block)
Definition: dbt_block.F:435
subroutine, public dbt_iterator_stop(iterator)
Generalization of block_iterator_stop for tensors.
Definition: dbt_block.F:134
subroutine, public dbt_iterator_start(iterator, tensor)
Generalization of block_iterator_start for tensors.
Definition: dbt_block.F:121
subroutine, public dbt_iterator_next_block(iterator, ind_nd, blk_size, blk_offset)
iterate over nd blocks of an nd rank tensor, index only (blocks must be retrieved by calling dbt_get_...
Definition: dbt_block.F:161
Routines to reshape / redistribute tensors.
subroutine, public dbt_reshape(tensor_in, tensor_out, summation, move_data)
copy data (involves reshape) tensor_out = tensor_out + tensor_in move_data memory optimization: trans...
Tall-and-skinny matrices: base routines similar to DBM API, mostly wrappers around existing DBM routi...
Definition: dbt_tas_base.F:13
subroutine, public dbt_tas_get_info(matrix, nblkrows_total, nblkcols_total, local_rows, local_cols, proc_row_dist, proc_col_dist, row_blk_size, col_blk_size, distribution, name)
...
Definition: dbt_tas_base.F:999
subroutine, public dbt_tas_copy(matrix_b, matrix_a, summation)
Copy matrix_a to matrix_b.
Definition: dbt_tas_base.F:250
type(dbt_tas_split_info) function, public dbt_tas_info(matrix)
get info on mpi grid splitting
Definition: dbt_tas_base.F:822
DBT tensor framework for block-sparse tensor contraction: Types and create/destroy routines.
Definition: dbt_types.F:12
subroutine, public dbt_blk_sizes(tensor, ind, blk_size)
Size of tensor block.
Definition: dbt_types.F:1479
pure integer function, public ndims_tensor(tensor)
tensor rank
Definition: dbt_types.F:1227
subroutine, public dbt_clear(tensor)
Clear tensor (s.t. it does not contain any blocks)
Definition: dbt_types.F:1779
subroutine, public dbt_get_stored_coordinates(tensor, ind_nd, processor)
Generalization of block_get_stored_coordinates for tensors.
Definition: dbt_types.F:1510
Defines the basic variable types.
Definition: kinds.F:23
integer, parameter, public dp
Definition: kinds.F:34
integer, parameter, public default_string_length
Definition: kinds.F:57
Interface to the message passing library MPI.