53 SUBROUTINE dbt_reshape(tensor_in, tensor_out, summation, move_data)
55 TYPE(
dbt_type),
INTENT(INOUT) :: tensor_in, tensor_out
56 LOGICAL,
INTENT(IN),
OPTIONAL :: summation
57 LOGICAL,
INTENT(IN),
OPTIONAL :: move_data
59 CHARACTER(LEN=*),
PARAMETER :: routinen =
'dbt_reshape'
61 INTEGER :: iproc, numnodes, &
62 handle, iblk, jblk, offset, ndata, &
64 INTEGER,
ALLOCATABLE,
DIMENSION(:, :) :: blks_to_allocate
67 TYPE(block_buffer_type),
ALLOCATABLE,
DIMENSION(:) :: buffer_recv, buffer_send
68 INTEGER,
DIMENSION(ndims_tensor(tensor_in)) :: blk_size, ind_nd
69 LOGICAL :: found, summation_prv, move_prv
71 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: nblks_send_total, ndata_send_total, &
72 nblks_recv_total, ndata_recv_total, &
73 nblks_send_mythread, ndata_send_mythread
76 CALL timeset(routinen, handle)
78 IF (
PRESENT(summation))
THEN
79 summation_prv = summation
81 summation_prv = .false.
84 IF (
PRESENT(move_data))
THEN
90 cpassert(tensor_out%valid)
92 IF (.NOT. summation_prv)
CALL dbt_clear(tensor_out)
94 mp_comm = tensor_in%pgrid%mp_comm_2d
95 numnodes = mp_comm%num_pe
96 ALLOCATE (buffer_send(0:numnodes - 1), buffer_recv(0:numnodes - 1))
97 ALLOCATE (nblks_send_total(0:numnodes - 1), ndata_send_total(0:numnodes - 1), source=0)
98 ALLOCATE (nblks_recv_total(0:numnodes - 1), ndata_recv_total(0:numnodes - 1), source=0)
107 ALLOCATE (nblks_send_mythread(0:numnodes - 1), ndata_send_mythread(0:numnodes - 1), source=0)
113 nblks_send_mythread(iproc) = nblks_send_mythread(iproc) + 1
114 ndata_send_mythread(iproc) = ndata_send_mythread(iproc) + product(blk_size)
118 nblks_send_total(:) = nblks_send_total(:) + nblks_send_mythread(:)
119 ndata_send_total(:) = ndata_send_total(:) + ndata_send_mythread(:)
120 nblks_send_mythread(:) = nblks_send_total(:)
121 ndata_send_mythread(:) = ndata_send_total(:)
126 CALL mp_comm%alltoall(nblks_send_total, nblks_recv_total, 1)
127 CALL mp_comm%alltoall(ndata_send_total, ndata_recv_total, 1)
132 DO iproc = 0, numnodes - 1
133 ALLOCATE (buffer_send(iproc)%data(ndata_send_total(iproc)))
134 ALLOCATE (buffer_recv(iproc)%data(ndata_recv_total(iproc)))
136 ALLOCATE (buffer_send(iproc)%blocks(nblks_send_total(iproc), 0:
ndims_tensor(tensor_in)))
137 ALLOCATE (buffer_recv(iproc)%blocks(nblks_recv_total(iproc), 0:
ndims_tensor(tensor_in)))
149 ndata = product(blk_size)
150 ndata_send_mythread(iproc) = ndata_send_mythread(iproc) - ndata
151 offset = ndata_send_mythread(iproc)
152 buffer_send(iproc)%data(offset + 1:offset + ndata) = blk_data%blk(:)
154 nblks_send_mythread(iproc) = nblks_send_mythread(iproc) - 1
155 iblk = nblks_send_mythread(iproc) + 1
156 buffer_send(iproc)%blocks(iblk, 1:) = ind_nd(:)
157 buffer_send(iproc)%blocks(iblk, 0) = offset
163 CALL dbt_communicate_buffer(mp_comm, buffer_recv, buffer_send)
167 DO iproc = 0, numnodes - 1
168 DEALLOCATE (buffer_send(iproc)%blocks, buffer_send(iproc)%data)
172 nblks_recv_mythread = 0
173 DO iproc = 0, numnodes - 1
175 DO iblk = 1, nblks_recv_total(iproc)
176 nblks_recv_mythread = nblks_recv_mythread + 1
180 ALLOCATE (blks_to_allocate(nblks_recv_mythread,
ndims_tensor(tensor_in)))
183 DO iproc = 0, numnodes - 1
185 DO iblk = 1, nblks_recv_total(iproc)
187 blks_to_allocate(jblk, :) = buffer_recv(iproc)%blocks(iblk, 1:)
191 cpassert(jblk == nblks_recv_mythread)
193 DEALLOCATE (blks_to_allocate)
195 DO iproc = 0, numnodes - 1
197 DO iblk = 1, nblks_recv_total(iproc)
198 ind_nd(:) = buffer_recv(iproc)%blocks(iblk, 1:)
200 offset = buffer_recv(iproc)%blocks(iblk, 0)
201 ndata = product(blk_size)
203 array=buffer_recv(iproc)%data(offset + 1:offset + ndata))
204 CALL dbt_put_block(tensor_out, ind_nd, blk_data, summation=summation)
211 DO iproc = 0, numnodes - 1
212 DEALLOCATE (buffer_recv(iproc)%blocks, buffer_recv(iproc)%data)
217 DEALLOCATE (nblks_recv_total, ndata_recv_total)
218 DEALLOCATE (nblks_send_total, ndata_send_total)
219 DEALLOCATE (buffer_send, buffer_recv)
223 CALL timestop(handle)