(git:374b731)
Loading...
Searching...
No Matches
dbt_reshape_ops.F
Go to the documentation of this file.
1!--------------------------------------------------------------------------------------------------!
2! CP2K: A general program to perform molecular dynamics simulations !
3! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4! !
5! SPDX-License-Identifier: GPL-2.0-or-later !
6!--------------------------------------------------------------------------------------------------!
7
8! **************************************************************************************************
9!> \brief Routines to reshape / redistribute tensors
10!> \author Patrick Seewald
11! **************************************************************************************************
13
14
17 USE dbt_block, ONLY: &
21 USE dbt_types, ONLY: dbt_blk_sizes, &
22 dbt_create, &
23 dbt_type, &
28 USE kinds, ONLY: dp, dp
29 USE message_passing, ONLY: &
31
32#include "../base/base_uses.f90"
33
34 IMPLICIT NONE
35 PRIVATE
36 CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'dbt_reshape_ops'
37
38 PUBLIC :: dbt_reshape
39
40 TYPE block_buffer_type
41 INTEGER, DIMENSION(:, :), ALLOCATABLE :: blocks
42 REAL(dp), DIMENSION(:), ALLOCATABLE :: data
43 END TYPE
44
45CONTAINS
46
47! **************************************************************************************************
48!> \brief copy data (involves reshape)
49!> tensor_out = tensor_out + tensor_in move_data memory optimization:
50!> transfer data from tensor_in to tensor_out s.t. tensor_in is empty on return
51!> \author Ole Schuett
52! **************************************************************************************************
53 SUBROUTINE dbt_reshape(tensor_in, tensor_out, summation, move_data)
54
55 TYPE(dbt_type), INTENT(INOUT) :: tensor_in, tensor_out
56 LOGICAL, INTENT(IN), OPTIONAL :: summation
57 LOGICAL, INTENT(IN), OPTIONAL :: move_data
58
59 CHARACTER(LEN=*), PARAMETER :: routinen = 'dbt_reshape'
60
61 INTEGER :: iproc, numnodes, &
62 handle, iblk, jblk, offset, ndata, &
63 nblks_recv_mythread
64 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: blks_to_allocate
65 TYPE(dbt_iterator_type) :: iter
66 TYPE(block_nd) :: blk_data
67 TYPE(block_buffer_type), ALLOCATABLE, DIMENSION(:) :: buffer_recv, buffer_send
68 INTEGER, DIMENSION(ndims_tensor(tensor_in)) :: blk_size, ind_nd
69 LOGICAL :: found, summation_prv, move_prv
70
71 INTEGER, ALLOCATABLE, DIMENSION(:) :: nblks_send_total, ndata_send_total, &
72 nblks_recv_total, ndata_recv_total, &
73 nblks_send_mythread, ndata_send_mythread
74 TYPE(mp_comm_type) :: mp_comm
75
76 CALL timeset(routinen, handle)
77
78 IF (PRESENT(summation)) THEN
79 summation_prv = summation
80 ELSE
81 summation_prv = .false.
82 END IF
83
84 IF (PRESENT(move_data)) THEN
85 move_prv = move_data
86 ELSE
87 move_prv = .false.
88 END IF
89
90 cpassert(tensor_out%valid)
91
92 IF (.NOT. summation_prv) CALL dbt_clear(tensor_out)
93
94 mp_comm = tensor_in%pgrid%mp_comm_2d
95 numnodes = mp_comm%num_pe
96 ALLOCATE (buffer_send(0:numnodes - 1), buffer_recv(0:numnodes - 1))
97 ALLOCATE (nblks_send_total(0:numnodes - 1), ndata_send_total(0:numnodes - 1), source=0)
98 ALLOCATE (nblks_recv_total(0:numnodes - 1), ndata_recv_total(0:numnodes - 1), source=0)
99
100!$OMP PARALLEL DEFAULT(OMP_DEFAULT_NONE_WITH_OOP) &
101!$OMP SHARED(tensor_in,tensor_out,summation) &
102!$OMP SHARED(buffer_send,buffer_recv,mp_comm,numnodes) &
103!$OMP SHARED(nblks_send_total,ndata_send_total,nblks_recv_total,ndata_recv_total) &
104!$OMP PRIVATE(nblks_send_mythread,ndata_send_mythread,nblks_recv_mythread) &
105!$OMP PRIVATE(iter,ind_nd,blk_size,blk_data,found,iproc) &
106!$OMP PRIVATE(blks_to_allocate,offset,ndata,iblk,jblk)
107 ALLOCATE (nblks_send_mythread(0:numnodes - 1), ndata_send_mythread(0:numnodes - 1), source=0)
108
109 CALL dbt_iterator_start(iter, tensor_in)
110 DO WHILE (dbt_iterator_blocks_left(iter))
111 CALL dbt_iterator_next_block(iter, ind_nd, blk_size=blk_size)
112 CALL dbt_get_stored_coordinates(tensor_out, ind_nd, iproc)
113 nblks_send_mythread(iproc) = nblks_send_mythread(iproc) + 1
114 ndata_send_mythread(iproc) = ndata_send_mythread(iproc) + product(blk_size)
115 END DO
116 CALL dbt_iterator_stop(iter)
117!$OMP CRITICAL
118 nblks_send_total(:) = nblks_send_total(:) + nblks_send_mythread(:)
119 ndata_send_total(:) = ndata_send_total(:) + ndata_send_mythread(:)
120 nblks_send_mythread(:) = nblks_send_total(:) ! current totals indicate slot for this thread
121 ndata_send_mythread(:) = ndata_send_total(:)
122!$OMP END CRITICAL
123!$OMP BARRIER
124
125!$OMP MASTER
126 CALL mp_comm%alltoall(nblks_send_total, nblks_recv_total, 1)
127 CALL mp_comm%alltoall(ndata_send_total, ndata_recv_total, 1)
128!$OMP END MASTER
129!$OMP BARRIER
130
131!$OMP DO
132 DO iproc = 0, numnodes - 1
133 ALLOCATE (buffer_send(iproc)%data(ndata_send_total(iproc)))
134 ALLOCATE (buffer_recv(iproc)%data(ndata_recv_total(iproc)))
135 ! going to use buffer%blocks(:,0) to store data offsets
136 ALLOCATE (buffer_send(iproc)%blocks(nblks_send_total(iproc), 0:ndims_tensor(tensor_in)))
137 ALLOCATE (buffer_recv(iproc)%blocks(nblks_recv_total(iproc), 0:ndims_tensor(tensor_in)))
138 END DO
139!$OMP END DO
140!$OMP BARRIER
141
142 CALL dbt_iterator_start(iter, tensor_in)
143 DO WHILE (dbt_iterator_blocks_left(iter))
144 CALL dbt_iterator_next_block(iter, ind_nd, blk_size=blk_size)
145 CALL dbt_get_stored_coordinates(tensor_out, ind_nd, iproc)
146 CALL dbt_get_block(tensor_in, ind_nd, blk_data, found)
147 cpassert(found)
148 ! insert block data
149 ndata = product(blk_size)
150 ndata_send_mythread(iproc) = ndata_send_mythread(iproc) - ndata
151 offset = ndata_send_mythread(iproc)
152 buffer_send(iproc)%data(offset + 1:offset + ndata) = blk_data%blk(:)
153 ! insert block index
154 nblks_send_mythread(iproc) = nblks_send_mythread(iproc) - 1
155 iblk = nblks_send_mythread(iproc) + 1
156 buffer_send(iproc)%blocks(iblk, 1:) = ind_nd(:)
157 buffer_send(iproc)%blocks(iblk, 0) = offset
158 CALL destroy_block(blk_data)
159 END DO
160 CALL dbt_iterator_stop(iter)
161!$OMP BARRIER
162
163 CALL dbt_communicate_buffer(mp_comm, buffer_recv, buffer_send)
164!$OMP BARRIER
165
166!$OMP DO
167 DO iproc = 0, numnodes - 1
168 DEALLOCATE (buffer_send(iproc)%blocks, buffer_send(iproc)%data)
169 END DO
170!$OMP END DO
171
172 nblks_recv_mythread = 0
173 DO iproc = 0, numnodes - 1
174!$OMP DO
175 DO iblk = 1, nblks_recv_total(iproc)
176 nblks_recv_mythread = nblks_recv_mythread + 1
177 END DO
178!$OMP END DO
179 END DO
180 ALLOCATE (blks_to_allocate(nblks_recv_mythread, ndims_tensor(tensor_in)))
181
182 jblk = 0
183 DO iproc = 0, numnodes - 1
184!$OMP DO
185 DO iblk = 1, nblks_recv_total(iproc)
186 jblk = jblk + 1
187 blks_to_allocate(jblk, :) = buffer_recv(iproc)%blocks(iblk, 1:)
188 END DO
189!$OMP END DO
190 END DO
191 cpassert(jblk == nblks_recv_mythread)
192 CALL dbt_reserve_blocks(tensor_out, blks_to_allocate)
193 DEALLOCATE (blks_to_allocate)
194
195 DO iproc = 0, numnodes - 1
196!$OMP DO
197 DO iblk = 1, nblks_recv_total(iproc)
198 ind_nd(:) = buffer_recv(iproc)%blocks(iblk, 1:)
199 CALL dbt_blk_sizes(tensor_out, ind_nd, blk_size)
200 offset = buffer_recv(iproc)%blocks(iblk, 0)
201 ndata = product(blk_size)
202 CALL create_block(blk_data, blk_size, &
203 array=buffer_recv(iproc)%data(offset + 1:offset + ndata))
204 CALL dbt_put_block(tensor_out, ind_nd, blk_data, summation=summation)
205 CALL destroy_block(blk_data)
206 END DO
207!$OMP END DO
208 END DO
209!$OMP END PARALLEL
210
211 DO iproc = 0, numnodes - 1
212 DEALLOCATE (buffer_recv(iproc)%blocks, buffer_recv(iproc)%data)
213 END DO
214
215 IF (move_prv) CALL dbt_clear(tensor_in)
216
217 CALL timestop(handle)
218 END SUBROUTINE dbt_reshape
219
220! **************************************************************************************************
221!> \brief communicate buffer
222!> \author Patrick Seewald
223! **************************************************************************************************
224 SUBROUTINE dbt_communicate_buffer(mp_comm, buffer_recv, buffer_send)
225 TYPE(mp_comm_type), INTENT(IN) :: mp_comm
226 TYPE(block_buffer_type), DIMENSION(0:), INTENT(INOUT) :: buffer_recv, buffer_send
227
228 CHARACTER(LEN=*), PARAMETER :: routinen = 'dbt_communicate_buffer'
229
230 INTEGER :: iproc, numnodes, &
231 rec_counter, send_counter, i
232 TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:, :) :: req_array
233 INTEGER :: handle
234
235 CALL timeset(routinen, handle)
236 numnodes = mp_comm%num_pe
237
238 IF (numnodes > 1) THEN
239!$OMP MASTER
240 send_counter = 0
241 rec_counter = 0
242
243 ALLOCATE (req_array(1:numnodes, 4))
244
245 DO iproc = 0, numnodes - 1
246 IF (SIZE(buffer_recv(iproc)%blocks) > 0) THEN
247 rec_counter = rec_counter + 1
248 CALL mp_comm%irecv(buffer_recv(iproc)%blocks, iproc, req_array(rec_counter, 3), tag=4)
249 CALL mp_comm%irecv(buffer_recv(iproc)%data, iproc, req_array(rec_counter, 4), tag=7)
250 END IF
251 END DO
252
253 DO iproc = 0, numnodes - 1
254 IF (SIZE(buffer_send(iproc)%blocks) > 0) THEN
255 send_counter = send_counter + 1
256 CALL mp_comm%isend(buffer_send(iproc)%blocks, iproc, req_array(send_counter, 1), tag=4)
257 CALL mp_comm%isend(buffer_send(iproc)%data, iproc, req_array(send_counter, 2), tag=7)
258 END IF
259 END DO
260
261 IF (send_counter > 0) THEN
262 CALL mp_waitall(req_array(1:send_counter, 1:2))
263 END IF
264 IF (rec_counter > 0) THEN
265 CALL mp_waitall(req_array(1:rec_counter, 3:4))
266 END IF
267!$OMP END MASTER
268
269 ELSE
270!$OMP DO SCHEDULE(static, 512)
271 DO i = 1, SIZE(buffer_send(0)%blocks, 1)
272 buffer_recv(0)%blocks(i, :) = buffer_send(0)%blocks(i, :)
273 END DO
274!$OMP END DO
275!$OMP DO SCHEDULE(static, 512)
276 DO i = 1, SIZE(buffer_send(0)%data)
277 buffer_recv(0)%data(i) = buffer_send(0)%data(i)
278 END DO
279!$OMP END DO
280 END IF
281 CALL timestop(handle)
282
283 END SUBROUTINE dbt_communicate_buffer
284
285END MODULE dbt_reshape_ops
Wrapper for allocating, copying and reshaping arrays.
Methods to operate on n-dimensional tensor blocks.
Definition dbt_block.F:12
logical function, public dbt_iterator_blocks_left(iterator)
Generalization of block_iterator_blocks_left for tensors.
Definition dbt_block.F:197
subroutine, public destroy_block(block)
Definition dbt_block.F:435
subroutine, public dbt_iterator_stop(iterator)
Generalization of block_iterator_stop for tensors.
Definition dbt_block.F:134
subroutine, public dbt_iterator_start(iterator, tensor)
Generalization of block_iterator_start for tensors.
Definition dbt_block.F:121
subroutine, public dbt_iterator_next_block(iterator, ind_nd, blk_size, blk_offset)
iterate over nd blocks of an nd rank tensor, index only (blocks must be retrieved by calling dbt_get_...
Definition dbt_block.F:161
Routines to reshape / redistribute tensors.
subroutine, public dbt_reshape(tensor_in, tensor_out, summation, move_data)
copy data (involves reshape) tensor_out = tensor_out + tensor_in move_data memory optimization: trans...
Tall-and-skinny matrices: base routines similar to DBM API, mostly wrappers around existing DBM routi...
subroutine, public dbt_tas_get_info(matrix, nblkrows_total, nblkcols_total, local_rows, local_cols, proc_row_dist, proc_col_dist, row_blk_size, col_blk_size, distribution, name)
...
subroutine, public dbt_tas_copy(matrix_b, matrix_a, summation)
Copy matrix_a to matrix_b.
type(dbt_tas_split_info) function, public dbt_tas_info(matrix)
get info on mpi grid splitting
DBT tensor framework for block-sparse tensor contraction: Types and create/destroy routines.
Definition dbt_types.F:12
subroutine, public dbt_blk_sizes(tensor, ind, blk_size)
Size of tensor block.
Definition dbt_types.F:1479
pure integer function, public ndims_tensor(tensor)
tensor rank
Definition dbt_types.F:1227
subroutine, public dbt_clear(tensor)
Clear tensor (s.t. it does not contain any blocks)
Definition dbt_types.F:1779
subroutine, public dbt_get_stored_coordinates(tensor, ind_nd, processor)
Generalization of block_get_stored_coordinates for tensors.
Definition dbt_types.F:1510
Defines the basic variable types.
Definition kinds.F:23
integer, parameter, public dp
Definition kinds.F:34
integer, parameter, public default_string_length
Definition kinds.F:57
Interface to the message passing library MPI.