(git:87fe8d4)
Loading...
Searching...
No Matches
dbt_reshape_ops.F
Go to the documentation of this file.
1!--------------------------------------------------------------------------------------------------!
2! CP2K: A general program to perform molecular dynamics simulations !
3! Copyright 2000-2025 CP2K developers group <https://cp2k.org> !
4! !
5! SPDX-License-Identifier: GPL-2.0-or-later !
6!--------------------------------------------------------------------------------------------------!
7
8! **************************************************************************************************
9!> \brief Routines to reshape / redistribute tensors
10!> \author Patrick Seewald
11! **************************************************************************************************
13
14
17 USE dbt_block, ONLY: &
21 USE dbt_types, ONLY: dbt_blk_sizes, &
22 dbt_create, &
23 dbt_type, &
28 USE kinds, ONLY: dp, dp
29 USE message_passing, ONLY: &
31
32#include "../base/base_uses.f90"
33
34 IMPLICIT NONE
35 PRIVATE
36 CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'dbt_reshape_ops'
37
38 PUBLIC :: dbt_reshape
39
40 TYPE block_buffer_type
41 INTEGER, DIMENSION(:, :), ALLOCATABLE :: blocks
42 REAL(dp), DIMENSION(:), ALLOCATABLE :: data
43 END TYPE block_buffer_type
44
45CONTAINS
46
47! **************************************************************************************************
48!> \brief copy data (involves reshape)
49!> tensor_out = tensor_out + tensor_in move_data memory optimization:
50!> transfer data from tensor_in to tensor_out s.t. tensor_in is empty on return
51!> \author Ole Schuett
52! **************************************************************************************************
53 SUBROUTINE dbt_reshape(tensor_in, tensor_out, summation, move_data)
54
55 TYPE(dbt_type), INTENT(INOUT) :: tensor_in, tensor_out
56 LOGICAL, INTENT(IN), OPTIONAL :: summation
57 LOGICAL, INTENT(IN), OPTIONAL :: move_data
58
59 CHARACTER(LEN=*), PARAMETER :: routinen = 'dbt_reshape'
60
61 INTEGER :: iproc, numnodes, &
62 handle, iblk, jblk, offset, ndata, &
63 nblks_recv_mythread
64 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: blks_to_allocate
65 TYPE(dbt_iterator_type) :: iter
66 TYPE(block_nd) :: blk_data
67 TYPE(block_buffer_type), ALLOCATABLE, DIMENSION(:) :: buffer_recv, buffer_send
68 INTEGER, DIMENSION(ndims_tensor(tensor_in)) :: blk_size, ind_nd
69 LOGICAL :: found, summation_prv, move_prv
70
71 INTEGER, ALLOCATABLE, DIMENSION(:) :: nblks_send_total, ndata_send_total, &
72 nblks_recv_total, ndata_recv_total, &
73 nblks_send_mythread, ndata_send_mythread
74 TYPE(mp_comm_type) :: mp_comm
75
76 CALL timeset(routinen, handle)
77
78 IF (PRESENT(summation)) THEN
79 summation_prv = summation
80 ELSE
81 summation_prv = .false.
82 END IF
83
84 IF (PRESENT(move_data)) THEN
85 move_prv = move_data
86 ELSE
87 move_prv = .false.
88 END IF
89
90 cpassert(tensor_out%valid)
91
92 IF (.NOT. summation_prv) CALL dbt_clear(tensor_out)
93
94 mp_comm = tensor_in%pgrid%mp_comm_2d
95 numnodes = mp_comm%num_pe
96 ALLOCATE (buffer_send(0:numnodes - 1), buffer_recv(0:numnodes - 1))
97 ALLOCATE (nblks_send_total(0:numnodes - 1), ndata_send_total(0:numnodes - 1), source=0)
98 ALLOCATE (nblks_recv_total(0:numnodes - 1), ndata_recv_total(0:numnodes - 1), source=0)
99
100!$OMP PARALLEL DEFAULT(OMP_DEFAULT_NONE_WITH_OOP) &
101!$OMP SHARED(tensor_in,tensor_out,summation) &
102!$OMP SHARED(buffer_send,buffer_recv,mp_comm,numnodes) &
103!$OMP SHARED(nblks_send_total,ndata_send_total,nblks_recv_total,ndata_recv_total) &
104!$OMP PRIVATE(nblks_send_mythread,ndata_send_mythread,nblks_recv_mythread) &
105!$OMP PRIVATE(iter,ind_nd,blk_size,blk_data,found,iproc) &
106!$OMP PRIVATE(blks_to_allocate,offset,ndata,iblk,jblk)
107 ALLOCATE (nblks_send_mythread(0:numnodes - 1), ndata_send_mythread(0:numnodes - 1), source=0)
108
109 CALL dbt_iterator_start(iter, tensor_in)
110 DO WHILE (dbt_iterator_blocks_left(iter))
111 CALL dbt_iterator_next_block(iter, ind_nd, blk_size=blk_size)
112 CALL dbt_get_stored_coordinates(tensor_out, ind_nd, iproc)
113 nblks_send_mythread(iproc) = nblks_send_mythread(iproc) + 1
114 ndata_send_mythread(iproc) = ndata_send_mythread(iproc) + product(blk_size)
115 END DO
116 CALL dbt_iterator_stop(iter)
117!$OMP CRITICAL(omp_dbt_reshape)
118 nblks_send_total(:) = nblks_send_total(:) + nblks_send_mythread(:)
119 ndata_send_total(:) = ndata_send_total(:) + ndata_send_mythread(:)
120 nblks_send_mythread(:) = nblks_send_total(:) ! current totals indicate slot for this thread
121 ndata_send_mythread(:) = ndata_send_total(:)
122!$OMP END CRITICAL(omp_dbt_reshape)
123!$OMP BARRIER
124
125!$OMP MASTER
126 CALL mp_comm%alltoall(nblks_send_total, nblks_recv_total, 1)
127 CALL mp_comm%alltoall(ndata_send_total, ndata_recv_total, 1)
128!$OMP END MASTER
129!$OMP BARRIER
130
131!$OMP DO
132 DO iproc = 0, numnodes - 1
133 ALLOCATE (buffer_send(iproc)%data(ndata_send_total(iproc)))
134 ALLOCATE (buffer_recv(iproc)%data(ndata_recv_total(iproc)))
135 ! going to use buffer%blocks(:,0) to store data offsets
136 ALLOCATE (buffer_send(iproc)%blocks(nblks_send_total(iproc), 0:ndims_tensor(tensor_in)))
137 ALLOCATE (buffer_recv(iproc)%blocks(nblks_recv_total(iproc), 0:ndims_tensor(tensor_in)))
138 END DO
139!$OMP END DO
140!$OMP BARRIER
141
142 CALL dbt_iterator_start(iter, tensor_in)
143 DO WHILE (dbt_iterator_blocks_left(iter))
144 CALL dbt_iterator_next_block(iter, ind_nd, blk_size=blk_size)
145 CALL dbt_get_stored_coordinates(tensor_out, ind_nd, iproc)
146 CALL dbt_get_block(tensor_in, ind_nd, blk_data, found)
147 cpassert(found)
148 ! insert block data
149 ndata = product(blk_size)
150 ndata_send_mythread(iproc) = ndata_send_mythread(iproc) - ndata
151 offset = ndata_send_mythread(iproc)
152 buffer_send(iproc)%data(offset + 1:offset + ndata) = blk_data%blk(:)
153 ! insert block index
154 nblks_send_mythread(iproc) = nblks_send_mythread(iproc) - 1
155 iblk = nblks_send_mythread(iproc) + 1
156 buffer_send(iproc)%blocks(iblk, 1:) = ind_nd(:)
157 buffer_send(iproc)%blocks(iblk, 0) = offset
158 CALL destroy_block(blk_data)
159 END DO
160 CALL dbt_iterator_stop(iter)
161 DEALLOCATE (nblks_send_mythread, ndata_send_mythread)
162!$OMP BARRIER
163
164 CALL dbt_communicate_buffer(mp_comm, buffer_recv, buffer_send)
165!$OMP BARRIER
166
167!$OMP DO
168 DO iproc = 0, numnodes - 1
169 DEALLOCATE (buffer_send(iproc)%blocks, buffer_send(iproc)%data)
170 END DO
171!$OMP END DO NOWAIT
172
173 nblks_recv_mythread = 0
174 DO iproc = 0, numnodes - 1
175!$OMP DO
176 DO iblk = 1, nblks_recv_total(iproc)
177 nblks_recv_mythread = nblks_recv_mythread + 1
178 END DO
179!$OMP END DO
180 END DO
181 ALLOCATE (blks_to_allocate(nblks_recv_mythread, ndims_tensor(tensor_in)))
182
183 jblk = 0
184 DO iproc = 0, numnodes - 1
185!$OMP DO
186 DO iblk = 1, nblks_recv_total(iproc)
187 jblk = jblk + 1
188 blks_to_allocate(jblk, :) = buffer_recv(iproc)%blocks(iblk, 1:)
189 END DO
190!$OMP END DO
191 END DO
192 cpassert(jblk == nblks_recv_mythread)
193 CALL dbt_reserve_blocks(tensor_out, blks_to_allocate)
194 DEALLOCATE (blks_to_allocate)
195
196 DO iproc = 0, numnodes - 1
197!$OMP DO
198 DO iblk = 1, nblks_recv_total(iproc)
199 ind_nd(:) = buffer_recv(iproc)%blocks(iblk, 1:)
200 CALL dbt_blk_sizes(tensor_out, ind_nd, blk_size)
201 offset = buffer_recv(iproc)%blocks(iblk, 0)
202 ndata = product(blk_size)
203 CALL create_block(blk_data, blk_size, &
204 array=buffer_recv(iproc)%data(offset + 1:offset + ndata))
205 CALL dbt_put_block(tensor_out, ind_nd, blk_data, summation=summation)
206 CALL destroy_block(blk_data)
207 END DO
208!$OMP END DO
209 END DO
210
211!$OMP DO
212 DO iproc = 0, numnodes - 1
213 DEALLOCATE (buffer_recv(iproc)%blocks, buffer_recv(iproc)%data)
214 END DO
215!$OMP END DO
216!$OMP END PARALLEL
217
218 DEALLOCATE (nblks_recv_total, ndata_recv_total)
219 DEALLOCATE (nblks_send_total, ndata_send_total)
220 DEALLOCATE (buffer_send, buffer_recv)
221
222 IF (move_prv) CALL dbt_clear(tensor_in)
223
224 CALL timestop(handle)
225 END SUBROUTINE dbt_reshape
226
227! **************************************************************************************************
228!> \brief communicate buffer
229!> \author Patrick Seewald
230! **************************************************************************************************
231 SUBROUTINE dbt_communicate_buffer(mp_comm, buffer_recv, buffer_send)
232 TYPE(mp_comm_type), INTENT(IN) :: mp_comm
233 TYPE(block_buffer_type), DIMENSION(0:), INTENT(INOUT) :: buffer_recv, buffer_send
234
235 CHARACTER(LEN=*), PARAMETER :: routinen = 'dbt_communicate_buffer'
236
237 INTEGER :: iproc, numnodes, &
238 rec_counter, send_counter, i
239 TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:, :) :: req_array
240 INTEGER :: handle
241
242 CALL timeset(routinen, handle)
243 numnodes = mp_comm%num_pe
244
245 IF (numnodes > 1) THEN
246!$OMP MASTER
247 send_counter = 0
248 rec_counter = 0
249
250 ALLOCATE (req_array(1:numnodes, 4))
251
252 DO iproc = 0, numnodes - 1
253 IF (SIZE(buffer_recv(iproc)%blocks) > 0) THEN
254 rec_counter = rec_counter + 1
255 CALL mp_comm%irecv(buffer_recv(iproc)%blocks, iproc, req_array(rec_counter, 3), tag=4)
256 CALL mp_comm%irecv(buffer_recv(iproc)%data, iproc, req_array(rec_counter, 4), tag=7)
257 END IF
258 END DO
259
260 DO iproc = 0, numnodes - 1
261 IF (SIZE(buffer_send(iproc)%blocks) > 0) THEN
262 send_counter = send_counter + 1
263 CALL mp_comm%isend(buffer_send(iproc)%blocks, iproc, req_array(send_counter, 1), tag=4)
264 CALL mp_comm%isend(buffer_send(iproc)%data, iproc, req_array(send_counter, 2), tag=7)
265 END IF
266 END DO
267
268 IF (send_counter > 0) THEN
269 CALL mp_waitall(req_array(1:send_counter, 1:2))
270 END IF
271 IF (rec_counter > 0) THEN
272 CALL mp_waitall(req_array(1:rec_counter, 3:4))
273 END IF
274!$OMP END MASTER
275
276 ELSE
277!$OMP DO SCHEDULE(static)
278 DO i = 1, SIZE(buffer_send(0)%blocks, 1)
279 buffer_recv(0)%blocks(i, :) = buffer_send(0)%blocks(i, :)
280 END DO
281!$OMP END DO NOWAIT
282!$OMP DO SCHEDULE(static)
283 DO i = 1, SIZE(buffer_send(0)%data)
284 buffer_recv(0)%data(i) = buffer_send(0)%data(i)
285 END DO
286!$OMP END DO
287 END IF
288 CALL timestop(handle)
289
290 END SUBROUTINE dbt_communicate_buffer
291
292END MODULE dbt_reshape_ops
Wrapper for allocating, copying and reshaping arrays.
Methods to operate on n-dimensional tensor blocks.
Definition dbt_block.F:12
logical function, public dbt_iterator_blocks_left(iterator)
Generalization of block_iterator_blocks_left for tensors.
Definition dbt_block.F:197
subroutine, public destroy_block(block)
Definition dbt_block.F:435
subroutine, public dbt_iterator_stop(iterator)
Generalization of block_iterator_stop for tensors.
Definition dbt_block.F:134
subroutine, public dbt_iterator_start(iterator, tensor)
Generalization of block_iterator_start for tensors.
Definition dbt_block.F:121
subroutine, public dbt_iterator_next_block(iterator, ind_nd, blk_size, blk_offset)
iterate over nd blocks of an nd rank tensor, index only (blocks must be retrieved by calling dbt_get_...
Definition dbt_block.F:161
Routines to reshape / redistribute tensors.
subroutine, public dbt_reshape(tensor_in, tensor_out, summation, move_data)
copy data (involves reshape) tensor_out = tensor_out + tensor_in move_data memory optimization: trans...
Tall-and-skinny matrices: base routines similar to DBM API, mostly wrappers around existing DBM routi...
subroutine, public dbt_tas_get_info(matrix, nblkrows_total, nblkcols_total, local_rows, local_cols, proc_row_dist, proc_col_dist, row_blk_size, col_blk_size, distribution, name)
...
subroutine, public dbt_tas_copy(matrix_b, matrix_a, summation)
Copy matrix_a to matrix_b.
type(dbt_tas_split_info) function, pointer, public dbt_tas_info(matrix)
get info on mpi grid splitting
DBT tensor framework for block-sparse tensor contraction: Types and create/destroy routines.
Definition dbt_types.F:12
subroutine, public dbt_blk_sizes(tensor, ind, blk_size)
Size of tensor block.
Definition dbt_types.F:1468
pure integer function, public ndims_tensor(tensor)
tensor rank
Definition dbt_types.F:1216
subroutine, public dbt_clear(tensor)
Clear tensor (s.t. it does not contain any blocks)
Definition dbt_types.F:1768
subroutine, public dbt_get_stored_coordinates(tensor, ind_nd, processor)
Generalization of block_get_stored_coordinates for tensors.
Definition dbt_types.F:1499
Defines the basic variable types.
Definition kinds.F:23
integer, parameter, public dp
Definition kinds.F:34
integer, parameter, public default_string_length
Definition kinds.F:57
Interface to the message passing library MPI.