20 dbt_reserve_blocks, dbt_put_block
30 mp_waitall, mp_comm_type, mp_request_type
32 #include "../base/base_uses.f90"
36 CHARACTER(len=*),
PARAMETER,
PRIVATE :: moduleN =
'dbt_reshape_ops'
40 TYPE block_buffer_type
41 INTEGER,
DIMENSION(:, :),
ALLOCATABLE :: blocks
42 REAL(dp),
DIMENSION(:),
ALLOCATABLE :: data
53 SUBROUTINE dbt_reshape(tensor_in, tensor_out, summation, move_data)
55 TYPE(dbt_type),
INTENT(INOUT) :: tensor_in, tensor_out
56 LOGICAL,
INTENT(IN),
OPTIONAL :: summation
57 LOGICAL,
INTENT(IN),
OPTIONAL :: move_data
59 CHARACTER(LEN=*),
PARAMETER :: routinen =
'dbt_reshape'
61 INTEGER :: iproc, numnodes, &
62 handle, iblk, jblk, offset, ndata, &
64 INTEGER,
ALLOCATABLE,
DIMENSION(:, :) :: blks_to_allocate
65 TYPE(dbt_iterator_type) :: iter
66 TYPE(block_nd) :: blk_data
67 TYPE(block_buffer_type),
ALLOCATABLE,
DIMENSION(:) :: buffer_recv, buffer_send
68 INTEGER,
DIMENSION(ndims_tensor(tensor_in)) :: blk_size, ind_nd
69 LOGICAL :: found, summation_prv, move_prv
71 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: nblks_send_total, ndata_send_total, &
72 nblks_recv_total, ndata_recv_total, &
73 nblks_send_mythread, ndata_send_mythread
74 TYPE(mp_comm_type) :: mp_comm
76 CALL timeset(routinen, handle)
78 IF (
PRESENT(summation))
THEN
79 summation_prv = summation
81 summation_prv = .false.
84 IF (
PRESENT(move_data))
THEN
90 cpassert(tensor_out%valid)
92 IF (.NOT. summation_prv)
CALL dbt_clear(tensor_out)
94 mp_comm = tensor_in%pgrid%mp_comm_2d
95 numnodes = mp_comm%num_pe
96 ALLOCATE (buffer_send(0:numnodes - 1), buffer_recv(0:numnodes - 1))
97 ALLOCATE (nblks_send_total(0:numnodes - 1), ndata_send_total(0:numnodes - 1), source=0)
98 ALLOCATE (nblks_recv_total(0:numnodes - 1), ndata_recv_total(0:numnodes - 1), source=0)
107 ALLOCATE (nblks_send_mythread(0:numnodes - 1), ndata_send_mythread(0:numnodes - 1), source=0)
113 nblks_send_mythread(iproc) = nblks_send_mythread(iproc) + 1
114 ndata_send_mythread(iproc) = ndata_send_mythread(iproc) + product(blk_size)
118 nblks_send_total(:) = nblks_send_total(:) + nblks_send_mythread(:)
119 ndata_send_total(:) = ndata_send_total(:) + ndata_send_mythread(:)
120 nblks_send_mythread(:) = nblks_send_total(:)
121 ndata_send_mythread(:) = ndata_send_total(:)
126 CALL mp_comm%alltoall(nblks_send_total, nblks_recv_total, 1)
127 CALL mp_comm%alltoall(ndata_send_total, ndata_recv_total, 1)
132 DO iproc = 0, numnodes - 1
133 ALLOCATE (buffer_send(iproc)%data(ndata_send_total(iproc)))
134 ALLOCATE (buffer_recv(iproc)%data(ndata_recv_total(iproc)))
136 ALLOCATE (buffer_send(iproc)%blocks(nblks_send_total(iproc), 0:
ndims_tensor(tensor_in)))
137 ALLOCATE (buffer_recv(iproc)%blocks(nblks_recv_total(iproc), 0:
ndims_tensor(tensor_in)))
146 CALL dbt_get_block(tensor_in, ind_nd, blk_data, found)
149 ndata = product(blk_size)
150 ndata_send_mythread(iproc) = ndata_send_mythread(iproc) - ndata
151 offset = ndata_send_mythread(iproc)
152 buffer_send(iproc)%data(offset + 1:offset + ndata) = blk_data%blk(:)
154 nblks_send_mythread(iproc) = nblks_send_mythread(iproc) - 1
155 iblk = nblks_send_mythread(iproc) + 1
156 buffer_send(iproc)%blocks(iblk, 1:) = ind_nd(:)
157 buffer_send(iproc)%blocks(iblk, 0) = offset
163 CALL dbt_communicate_buffer(mp_comm, buffer_recv, buffer_send)
167 DO iproc = 0, numnodes - 1
168 DEALLOCATE (buffer_send(iproc)%blocks, buffer_send(iproc)%data)
172 nblks_recv_mythread = 0
173 DO iproc = 0, numnodes - 1
175 DO iblk = 1, nblks_recv_total(iproc)
176 nblks_recv_mythread = nblks_recv_mythread + 1
180 ALLOCATE (blks_to_allocate(nblks_recv_mythread,
ndims_tensor(tensor_in)))
183 DO iproc = 0, numnodes - 1
185 DO iblk = 1, nblks_recv_total(iproc)
187 blks_to_allocate(jblk, :) = buffer_recv(iproc)%blocks(iblk, 1:)
191 cpassert(jblk == nblks_recv_mythread)
192 CALL dbt_reserve_blocks(tensor_out, blks_to_allocate)
193 DEALLOCATE (blks_to_allocate)
195 DO iproc = 0, numnodes - 1
197 DO iblk = 1, nblks_recv_total(iproc)
198 ind_nd(:) = buffer_recv(iproc)%blocks(iblk, 1:)
200 offset = buffer_recv(iproc)%blocks(iblk, 0)
201 ndata = product(blk_size)
202 CALL create_block(blk_data, blk_size, &
203 array=buffer_recv(iproc)%data(offset + 1:offset + ndata))
204 CALL dbt_put_block(tensor_out, ind_nd, blk_data, summation=summation)
211 DO iproc = 0, numnodes - 1
212 DEALLOCATE (buffer_recv(iproc)%blocks, buffer_recv(iproc)%data)
217 CALL timestop(handle)
224 SUBROUTINE dbt_communicate_buffer(mp_comm, buffer_recv, buffer_send)
225 TYPE(mp_comm_type),
INTENT(IN) :: mp_comm
226 TYPE(block_buffer_type),
DIMENSION(0:),
INTENT(INOUT) :: buffer_recv, buffer_send
228 CHARACTER(LEN=*),
PARAMETER :: routinen =
'dbt_communicate_buffer'
230 INTEGER :: iproc, numnodes, &
231 rec_counter, send_counter, i
232 TYPE(mp_request_type),
ALLOCATABLE,
DIMENSION(:, :) :: req_array
235 CALL timeset(routinen, handle)
236 numnodes = mp_comm%num_pe
238 IF (numnodes > 1)
THEN
243 ALLOCATE (req_array(1:numnodes, 4))
245 DO iproc = 0, numnodes - 1
246 IF (
SIZE(buffer_recv(iproc)%blocks) > 0)
THEN
247 rec_counter = rec_counter + 1
248 CALL mp_comm%irecv(buffer_recv(iproc)%blocks, iproc, req_array(rec_counter, 3), tag=4)
249 CALL mp_comm%irecv(buffer_recv(iproc)%data, iproc, req_array(rec_counter, 4), tag=7)
253 DO iproc = 0, numnodes - 1
254 IF (
SIZE(buffer_send(iproc)%blocks) > 0)
THEN
255 send_counter = send_counter + 1
256 CALL mp_comm%isend(buffer_send(iproc)%blocks, iproc, req_array(send_counter, 1), tag=4)
257 CALL mp_comm%isend(buffer_send(iproc)%data, iproc, req_array(send_counter, 2), tag=7)
261 IF (send_counter > 0)
THEN
262 CALL mp_waitall(req_array(1:send_counter, 1:2))
264 IF (rec_counter > 0)
THEN
265 CALL mp_waitall(req_array(1:rec_counter, 3:4))
271 DO i = 1,
SIZE(buffer_send(0)%blocks, 1)
272 buffer_recv(0)%blocks(i, :) = buffer_send(0)%blocks(i, :)
276 DO i = 1,
SIZE(buffer_send(0)%data)
277 buffer_recv(0)%data(i) = buffer_send(0)%data(i)
281 CALL timestop(handle)
283 END SUBROUTINE dbt_communicate_buffer
Wrapper for allocating, copying and reshaping arrays.
Methods to operate on n-dimensional tensor blocks.
logical function, public dbt_iterator_blocks_left(iterator)
Generalization of block_iterator_blocks_left for tensors.
subroutine, public destroy_block(block)
subroutine, public dbt_iterator_stop(iterator)
Generalization of block_iterator_stop for tensors.
subroutine, public dbt_iterator_start(iterator, tensor)
Generalization of block_iterator_start for tensors.
subroutine, public dbt_iterator_next_block(iterator, ind_nd, blk_size, blk_offset)
iterate over nd blocks of an nd rank tensor, index only (blocks must be retrieved by calling dbt_get_...
Routines to reshape / redistribute tensors.
subroutine, public dbt_reshape(tensor_in, tensor_out, summation, move_data)
copy data (involves reshape) tensor_out = tensor_out + tensor_in move_data memory optimization: trans...
Tall-and-skinny matrices: base routines similar to DBM API, mostly wrappers around existing DBM routi...
subroutine, public dbt_tas_get_info(matrix, nblkrows_total, nblkcols_total, local_rows, local_cols, proc_row_dist, proc_col_dist, row_blk_size, col_blk_size, distribution, name)
...
subroutine, public dbt_tas_copy(matrix_b, matrix_a, summation)
Copy matrix_a to matrix_b.
type(dbt_tas_split_info) function, public dbt_tas_info(matrix)
get info on mpi grid splitting
DBT tensor framework for block-sparse tensor contraction: Types and create/destroy routines.
subroutine, public dbt_blk_sizes(tensor, ind, blk_size)
Size of tensor block.
pure integer function, public ndims_tensor(tensor)
tensor rank
subroutine, public dbt_clear(tensor)
Clear tensor (s.t. it does not contain any blocks)
subroutine, public dbt_get_stored_coordinates(tensor, ind_nd, processor)
Generalization of block_get_stored_coordinates for tensors.
Defines the basic variable types.
integer, parameter, public dp
integer, parameter, public default_string_length
Interface to the message passing library MPI.