(git:ccc2433)
mp2_ri_grad_util.F
Go to the documentation of this file.
1 !--------------------------------------------------------------------------------------------------!
2 ! CP2K: A general program to perform molecular dynamics simulations !
3 ! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4 ! !
5 ! SPDX-License-Identifier: GPL-2.0-or-later !
6 !--------------------------------------------------------------------------------------------------!
7 
8 ! **************************************************************************************************
9 !> \brief Routines for calculating RI-MP2 gradients
10 !> \par History
11 !> 10.2013 created [Mauro Del Ben]
12 ! **************************************************************************************************
14 !
17  cp_blacs_env_type
24  cp_fm_struct_type
25  USE cp_fm_types, ONLY: &
27  cp_fm_set_all, cp_fm_to_fm, cp_fm_type
28  USE dbcsr_api, ONLY: dbcsr_p_type,&
29  dbcsr_type,&
30  dbcsr_type_no_symmetry
31  USE group_dist_types, ONLY: create_group_dist,&
32  get_group_dist,&
33  group_dist_d1_type,&
34  release_group_dist
35  USE kinds, ONLY: dp
37  USE message_passing, ONLY: mp_para_env_type,&
39  mp_request_type,&
40  mp_waitall
41  USE mp2_types, ONLY: integ_mat_buffer_type,&
42  mp2_type
43  USE parallel_gemm_api, ONLY: parallel_gemm
44  USE util, ONLY: get_limit
45 #include "./base/base_uses.f90"
46 
47  IMPLICIT NONE
48 
49  PRIVATE
50 
51  CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'mp2_ri_grad_util'
52 
54 
55  TYPE index_map
56  INTEGER, DIMENSION(:, :), ALLOCATABLE :: map
57  END TYPE
58 
59 CONTAINS
60 
61 ! **************************************************************************************************
62 !> \brief complete the calculation of the Gamma matrices
63 !> \param mp2_env ...
64 !> \param B_ia_Q ...
65 !> \param dimen_RI ...
66 !> \param homo ...
67 !> \param virtual ...
68 !> \param para_env ...
69 !> \param para_env_sub ...
70 !> \param ngroup ...
71 !> \param my_group_L_size ...
72 !> \param my_group_L_start ...
73 !> \param my_group_L_end ...
74 !> \param my_B_size ...
75 !> \param my_B_virtual_start ...
76 !> \param gd_array ...
77 !> \param gd_B_virtual ...
78 !> \param kspin ...
79 !> \author Mauro Del Ben
80 ! **************************************************************************************************
81  SUBROUTINE complete_gamma(mp2_env, B_ia_Q, dimen_RI, homo, virtual, para_env, para_env_sub, ngroup, &
82  my_group_L_size, my_group_L_start, my_group_L_end, &
83  my_B_size, my_B_virtual_start, gd_array, gd_B_virtual, kspin)
84 
85  TYPE(mp2_type) :: mp2_env
86  REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :), &
87  INTENT(INOUT) :: b_ia_q
88  INTEGER, INTENT(IN) :: dimen_ri, homo, virtual
89  TYPE(mp_para_env_type), INTENT(IN), POINTER :: para_env, para_env_sub
90  INTEGER, INTENT(IN) :: ngroup, my_group_l_size, &
91  my_group_l_start, my_group_l_end, &
92  my_b_size, my_b_virtual_start
93  TYPE(group_dist_d1_type), INTENT(IN) :: gd_array, gd_b_virtual
94  INTEGER, INTENT(IN) :: kspin
95 
96  CHARACTER(LEN=*), PARAMETER :: routinen = 'complete_gamma'
97 
98  INTEGER :: dimen_ia, handle, i, ispin, kkb, my_ia_end, my_ia_size, my_ia_start, my_p_end, &
99  my_p_size, my_p_start, nspins, pos_group, pos_sub
100  INTEGER, ALLOCATABLE, DIMENSION(:) :: pos_info
101  INTEGER, ALLOCATABLE, DIMENSION(:, :) :: group_grid_2_mepos, mepos_2_grid_group
102  REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :) :: bib_c_2d, gamma_2d, gamma_pq
103  TYPE(cp_blacs_env_type), POINTER :: blacs_env
104  TYPE(cp_fm_struct_type), POINTER :: fm_struct_ia, fm_struct_ri
105  TYPE(cp_fm_type) :: fm_gamma, fm_gamma_pq, fm_gamma_pq_2, fm_gamma_pq_temp, &
106  fm_gamma_pq_temp_2, fm_ia_p, fm_y, operator_half, pq_half
107  TYPE(group_dist_d1_type) :: gd_array_new, gd_ia, gd_ia_new, gd_p, &
108  gd_p_new
109 
110  CALL timeset(routinen, handle)
111 
112  ! reshape the data into a 2D array
113  ! reorder the local data in such a way to help the next stage of matrix creation
114  ! now the data inside the group are divided into a ia x K matrix
115  dimen_ia = homo*virtual
116  CALL create_group_dist(gd_ia, para_env_sub%num_pe, dimen_ia)
117  CALL get_group_dist(gd_ia, para_env_sub%mepos, my_ia_start, my_ia_end, my_ia_size)
118 
119  CALL mat_3d_to_2d(b_ia_q, bib_c_2d, homo, my_b_size, virtual, my_b_virtual_start, &
120  my_ia_start, my_ia_end, my_ia_size, my_group_l_size, para_env_sub, gd_b_virtual)
121 
122  CALL mat_3d_to_2d_gamma(mp2_env%ri_grad%Gamma_P_ia(kspin)%array, gamma_2d, homo, my_b_size, virtual, my_b_virtual_start, &
123  my_ia_start, my_ia_end, my_ia_size, my_group_l_size, para_env_sub, gd_b_virtual)
124 
125  ! create the processor map and size arrays
126  CALL create_group_dist(gd_ia_new, para_env%num_pe)
127  CALL create_group_dist(gd_array_new, para_env%num_pe)
128  CALL create_group_dist(gd_p, para_env_sub%num_pe, dimen_ri)
129  CALL create_group_dist(gd_p_new, para_env%num_pe)
130 
131  CALL get_group_dist(gd_p, para_env_sub%mepos, my_p_start, my_p_end, my_p_size)
132 
133  CALL prepare_redistribution(para_env, para_env_sub, ngroup, &
134  group_grid_2_mepos, mepos_2_grid_group, pos_info=pos_info)
135 
136  DO i = 0, para_env%num_pe - 1
137  ! calculate position of the group
138  pos_group = i/para_env_sub%num_pe
139  ! calculate position in the subgroup
140  pos_sub = pos_info(i)
141  ! 1 -> rows, 2 -> cols
142  CALL get_group_dist(gd_ia, pos_sub, gd_ia_new, i)
143  CALL get_group_dist(gd_array, pos_group, gd_array_new, i)
144  CALL get_group_dist(gd_p, pos_sub, gd_p_new, i)
145  END DO
146 
147  ! create the blacs env
148  NULLIFY (blacs_env)
149  CALL cp_blacs_env_create(blacs_env=blacs_env, para_env=para_env)
150 
151  NULLIFY (fm_struct_ia)
152  CALL cp_fm_struct_create(fm_struct_ia, context=blacs_env, nrow_global=dimen_ia, &
153  ncol_global=dimen_ri, para_env=para_env)
154 
155  ! create the fm matrix Gamma
156  CALL array2fm(gamma_2d, fm_struct_ia, my_ia_start, my_ia_end, &
157  my_group_l_start, my_group_l_end, &
158  gd_ia_new, gd_array_new, &
159  group_grid_2_mepos, para_env_sub%num_pe, ngroup, &
160  fm_y)
161  ! create the fm matrix B_ia_P
162  CALL array2fm(bib_c_2d, fm_struct_ia, my_ia_start, my_ia_end, &
163  my_group_l_start, my_group_l_end, &
164  gd_ia_new, gd_array_new, &
165  group_grid_2_mepos, para_env_sub%num_pe, ngroup, &
166  fm_ia_p)
167 
168  NULLIFY (fm_struct_ri)
169  CALL cp_fm_struct_create(fm_struct_ri, context=blacs_env, nrow_global=dimen_ri, &
170  ncol_global=dimen_ri, para_env=para_env)
171 
172  ! since we will need (P|Q)^(-1/2) in the future, make a copy
173  CALL array2fm(mp2_env%ri_grad%PQ_half, fm_struct_ri, my_p_start, my_p_end, &
174  my_group_l_start, my_group_l_end, &
175  gd_p_new, gd_array_new, &
176  group_grid_2_mepos, para_env_sub%num_pe, ngroup, &
177  pq_half, do_release_mat=.false.)
178 
179  IF (.NOT. compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter)) THEN
180  CALL array2fm(mp2_env%ri_grad%operator_half, fm_struct_ri, my_p_start, my_p_end, &
181  my_group_l_start, my_group_l_end, &
182  gd_p_new, gd_array_new, &
183  group_grid_2_mepos, para_env_sub%num_pe, ngroup, &
184  operator_half, do_release_mat=.false.)
185  END IF
186 
187  CALL release_group_dist(gd_p_new)
188  CALL release_group_dist(gd_ia_new)
189  CALL release_group_dist(gd_array_new)
190 
191  ! complete the gamma matrix Gamma_ia^P = sum_Q (Y_ia^Q * (Q|P)^(-1/2))
192  IF (compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter)) THEN
193  CALL cp_fm_create(fm_gamma, fm_struct_ia, name="fm_Gamma")
194  CALL cp_fm_struct_release(fm_struct_ia)
195  ! perform the matrix multiplication
196  CALL parallel_gemm(transa="N", transb="T", m=dimen_ia, n=dimen_ri, k=dimen_ri, alpha=1.0_dp, &
197  matrix_a=fm_y, matrix_b=pq_half, beta=0.0_dp, &
198  matrix_c=fm_gamma)
199  ! release the Y matrix
200  CALL cp_fm_release(fm_y)
201 
202  ! complete gamma small (fm_Gamma_PQ)
203  ! create temp matrix
204  CALL cp_fm_create(fm_gamma_pq_temp, fm_struct_ri, name="fm_Gamma_PQ_temp")
205  CALL parallel_gemm(transa="T", transb="N", m=dimen_ri, n=dimen_ri, k=dimen_ia, alpha=1.0_dp, &
206  matrix_a=fm_gamma, matrix_b=fm_ia_p, beta=0.0_dp, &
207  matrix_c=fm_gamma_pq_temp)
208  CALL cp_fm_release(fm_ia_p)
209  ! create fm_Gamma_PQ matrix
210  CALL cp_fm_create(fm_gamma_pq, fm_struct_ri, name="fm_Gamma_PQ")
211  CALL cp_fm_struct_release(fm_struct_ri)
212  ! perform matrix multiplication
213  CALL parallel_gemm(transa="N", transb="T", m=dimen_ri, n=dimen_ri, k=dimen_ri, alpha=1.0_dp, &
214  matrix_a=fm_gamma_pq_temp, matrix_b=pq_half, beta=0.0_dp, &
215  matrix_c=fm_gamma_pq)
216  CALL cp_fm_release(fm_gamma_pq_temp)
217  CALL cp_fm_release(pq_half)
218 
219  ELSE
220  ! create temp matrix
221  CALL cp_fm_create(fm_gamma_pq_temp, fm_struct_ri, name="fm_Gamma_PQ_temp")
222  CALL parallel_gemm(transa="T", transb="N", m=dimen_ri, n=dimen_ri, k=dimen_ia, alpha=1.0_dp, &
223  matrix_a=fm_y, matrix_b=fm_ia_p, beta=0.0_dp, &
224  matrix_c=fm_gamma_pq_temp)
225  CALL cp_fm_release(fm_ia_p)
226 
227  CALL cp_fm_create(fm_gamma, fm_struct_ia, name="fm_Gamma")
228  CALL cp_fm_struct_release(fm_struct_ia)
229  ! perform the matrix multiplication
230  CALL parallel_gemm(transa="N", transb="T", m=dimen_ia, n=dimen_ri, k=dimen_ri, alpha=1.0_dp, &
231  matrix_a=fm_y, matrix_b=pq_half, beta=0.0_dp, &
232  matrix_c=fm_gamma)
233  CALL cp_fm_release(fm_y)
234 
235  CALL cp_fm_create(fm_gamma_pq_temp_2, fm_struct_ri, name="fm_Gamma_PQ_temp_2")
236  CALL parallel_gemm(transa="N", transb="T", m=dimen_ri, n=dimen_ri, k=dimen_ri, alpha=1.0_dp, &
237  matrix_a=fm_gamma_pq_temp, matrix_b=operator_half, beta=0.0_dp, &
238  matrix_c=fm_gamma_pq_temp_2)
239 
240  CALL cp_fm_create(fm_gamma_pq_2, fm_struct_ri, name="fm_Gamma_PQ_2")
241  CALL parallel_gemm(transa="N", transb="N", m=dimen_ri, n=dimen_ri, k=dimen_ri, alpha=1.0_dp, &
242  matrix_a=pq_half, matrix_b=fm_gamma_pq_temp_2, beta=0.0_dp, &
243  matrix_c=fm_gamma_pq_temp)
244  CALL cp_fm_to_fm(fm_gamma_pq_temp, fm_gamma_pq_2)
245  CALL cp_fm_geadd(1.0_dp, "T", fm_gamma_pq_temp, 1.0_dp, fm_gamma_pq_2)
246  CALL cp_fm_release(fm_gamma_pq_temp)
247  CALL cp_fm_release(pq_half)
248 
249  CALL cp_fm_create(fm_gamma_pq, fm_struct_ri)
250  CALL cp_fm_struct_release(fm_struct_ri)
251  CALL parallel_gemm(transa="N", transb="N", m=dimen_ri, n=dimen_ri, k=dimen_ri, alpha=-1.0_dp, &
252  matrix_a=operator_half, matrix_b=fm_gamma_pq_temp_2, beta=0.0_dp, &
253  matrix_c=fm_gamma_pq)
254  CALL cp_fm_release(operator_half)
255  CALL cp_fm_release(fm_gamma_pq_temp_2)
256  END IF
257 
258  ! now redistribute the data, in this case we go back
259  ! to the original way the integrals were distributed
260  CALL fm2array(gamma_2d, my_ia_size, my_ia_start, my_ia_end, &
261  my_group_l_size, my_group_l_start, my_group_l_end, &
262  group_grid_2_mepos, mepos_2_grid_group, &
263  para_env_sub%num_pe, ngroup, &
264  fm_gamma)
265 
266  ALLOCATE (gamma_pq(my_p_size, my_group_l_size))
267  CALL fm2array(gamma_pq, my_p_size, my_p_start, my_p_end, &
268  my_group_l_size, my_group_l_start, my_group_l_end, &
269  group_grid_2_mepos, mepos_2_grid_group, &
270  para_env_sub%num_pe, ngroup, &
271  fm_gamma_pq)
272  IF (.NOT. ALLOCATED(mp2_env%ri_grad%Gamma_PQ)) THEN
273  CALL move_alloc(gamma_pq, mp2_env%ri_grad%Gamma_PQ)
274  ELSE
275  mp2_env%ri_grad%Gamma_PQ(:, :) = mp2_env%ri_grad%Gamma_PQ + gamma_pq
276  DEALLOCATE (gamma_pq)
277  END IF
278 
279  IF (.NOT. compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter)) THEN
280  ALLOCATE (gamma_pq(my_p_size, my_group_l_size))
281  CALL fm2array(gamma_pq, my_p_size, my_p_start, my_p_end, &
282  my_group_l_size, my_group_l_start, my_group_l_end, &
283  group_grid_2_mepos, mepos_2_grid_group, &
284  para_env_sub%num_pe, ngroup, &
285  fm_gamma_pq_2)
286  IF (.NOT. ALLOCATED(mp2_env%ri_grad%Gamma_PQ_2)) THEN
287  CALL move_alloc(gamma_pq, mp2_env%ri_grad%Gamma_PQ_2)
288  ELSE
289  mp2_env%ri_grad%Gamma_PQ_2(:, :) = mp2_env%ri_grad%Gamma_PQ_2 + gamma_pq
290  DEALLOCATE (gamma_pq)
291  END IF
292  END IF
293 
294  ! allocate G_P_ia (DBCSR)
295  IF (.NOT. ALLOCATED(mp2_env%ri_grad%G_P_ia)) THEN
296  nspins = SIZE(mp2_env%ri_grad%mo_coeff_o)
297  ALLOCATE (mp2_env%ri_grad%G_P_ia(my_group_l_size, nspins))
298  DO ispin = 1, nspins
299  DO kkb = 1, my_group_l_size
300  NULLIFY (mp2_env%ri_grad%G_P_ia(kkb, ispin)%matrix)
301  END DO
302  END DO
303  END IF
304 
305  ! create the Gamma_ia_P in DBCSR style
306  CALL create_dbcsr_gamma(gamma_2d, homo, virtual, dimen_ia, para_env_sub, &
307  my_ia_start, my_ia_end, my_group_l_size, gd_ia, &
308  mp2_env%ri_grad%G_P_ia(:, kspin), mp2_env%ri_grad%mo_coeff_o(1)%matrix)
309 
310  DEALLOCATE (pos_info)
311  DEALLOCATE (group_grid_2_mepos, mepos_2_grid_group)
312  CALL release_group_dist(gd_ia)
313  CALL release_group_dist(gd_p)
314 
315  ! release blacs_env
316  CALL cp_blacs_env_release(blacs_env)
317 
318  CALL timestop(handle)
319 
320  END SUBROUTINE complete_gamma
321 
322 ! **************************************************************************************************
323 !> \brief Redistribute a 3d matrix to a 2d matrix
324 !> \param B_ia_Q ...
325 !> \param BIb_C_2D ...
326 !> \param homo ...
327 !> \param my_B_size ...
328 !> \param virtual ...
329 !> \param my_B_virtual_start ...
330 !> \param my_ia_start ...
331 !> \param my_ia_end ...
332 !> \param my_ia_size ...
333 !> \param my_group_L_size ...
334 !> \param para_env_sub ...
335 !> \param gd_B_virtual ...
336 ! **************************************************************************************************
337  SUBROUTINE mat_3d_to_2d(B_ia_Q, BIb_C_2D, homo, my_B_size, virtual, my_B_virtual_start, &
338  my_ia_start, my_ia_end, my_ia_size, my_group_L_size, para_env_sub, gd_B_virtual)
339  REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :), &
340  INTENT(INOUT) :: b_ia_q
341  REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :), &
342  INTENT(OUT) :: bib_c_2d
343  INTEGER, INTENT(IN) :: homo, my_b_size, virtual, &
344  my_b_virtual_start, my_ia_start, &
345  my_ia_end, my_ia_size, my_group_l_size
346  TYPE(mp_para_env_type), INTENT(IN) :: para_env_sub
347  TYPE(group_dist_d1_type), INTENT(IN) :: gd_b_virtual
348 
349  CHARACTER(LEN=*), PARAMETER :: routinen = 'mat_3d_to_2d'
350 
351  INTEGER :: handle, ia_global, iib, jjb, proc_receive, proc_send, proc_shift, rec_b_size, &
352  rec_b_virtual_end, rec_b_virtual_start
353  REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :) :: bib_c_rec
354 
355  CALL timeset(routinen, handle)
356 
357  ALLOCATE (bib_c_2d(my_ia_size, my_group_l_size))
358  bib_c_2d = 0.0_dp
359 
360  DO iib = 1, homo
361  DO jjb = 1, my_b_size
362  ia_global = (iib - 1)*virtual + my_b_virtual_start + jjb - 1
363  IF (ia_global >= my_ia_start .AND. ia_global <= my_ia_end) THEN
364  bib_c_2d(ia_global - my_ia_start + 1, :) = b_ia_q(iib, jjb, 1:my_group_l_size)
365  END IF
366  END DO
367  END DO
368 
369  DO proc_shift = 1, para_env_sub%num_pe - 1
370  proc_send = modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
371  proc_receive = modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
372 
373  CALL get_group_dist(gd_b_virtual, proc_receive, rec_b_virtual_start, rec_b_virtual_end, rec_b_size)
374 
375  ALLOCATE (bib_c_rec(homo, rec_b_size, my_group_l_size))
376  bib_c_rec = 0.0_dp
377 
378  CALL para_env_sub%sendrecv(b_ia_q, proc_send, &
379  bib_c_rec, proc_receive)
380 
381  DO iib = 1, homo
382  DO jjb = 1, rec_b_size
383  ia_global = (iib - 1)*virtual + rec_b_virtual_start + jjb - 1
384  IF (ia_global >= my_ia_start .AND. ia_global <= my_ia_end) THEN
385  bib_c_2d(ia_global - my_ia_start + 1, :) = bib_c_rec(iib, jjb, 1:my_group_l_size)
386  END IF
387  END DO
388  END DO
389 
390  DEALLOCATE (bib_c_rec)
391  END DO
392  DEALLOCATE (b_ia_q)
393 
394  CALL timestop(handle)
395  END SUBROUTINE mat_3d_to_2d
396 
397 ! **************************************************************************************************
398 !> \brief Redistribute a 3d matrix to a 2d matrix, specialized for Gamma_P_ia to account for a different order of indices
399 !> \param B_ia_Q ...
400 !> \param BIb_C_2D ...
401 !> \param homo ...
402 !> \param my_B_size ...
403 !> \param virtual ...
404 !> \param my_B_virtual_start ...
405 !> \param my_ia_start ...
406 !> \param my_ia_end ...
407 !> \param my_ia_size ...
408 !> \param my_group_L_size ...
409 !> \param para_env_sub ...
410 !> \param gd_B_virtual ...
411 ! **************************************************************************************************
412  SUBROUTINE mat_3d_to_2d_gamma(B_ia_Q, BIb_C_2D, homo, my_B_size, virtual, my_B_virtual_start, &
413  my_ia_start, my_ia_end, my_ia_size, my_group_L_size, para_env_sub, gd_B_virtual)
414  REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :), &
415  INTENT(INOUT) :: b_ia_q
416  REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :), &
417  INTENT(OUT) :: bib_c_2d
418  INTEGER, INTENT(IN) :: homo, my_b_size, virtual, &
419  my_b_virtual_start, my_ia_start, &
420  my_ia_end, my_ia_size, my_group_l_size
421  TYPE(mp_para_env_type), INTENT(IN) :: para_env_sub
422  TYPE(group_dist_d1_type), INTENT(IN) :: gd_b_virtual
423 
424  CHARACTER(LEN=*), PARAMETER :: routinen = 'mat_3d_to_2d_gamma'
425 
426  INTEGER :: handle, ia_global, iib, jjb, proc_receive, proc_send, proc_shift, rec_b_size, &
427  rec_b_virtual_end, rec_b_virtual_start
428  REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :) :: bib_c_rec
429 
430  CALL timeset(routinen, handle)
431 
432  ALLOCATE (bib_c_2d(my_ia_size, my_group_l_size))
433  bib_c_2d = 0.0_dp
434 
435  DO iib = 1, homo
436  DO jjb = 1, my_b_size
437  ia_global = (iib - 1)*virtual + my_b_virtual_start + jjb - 1
438  IF (ia_global >= my_ia_start .AND. ia_global <= my_ia_end) THEN
439  bib_c_2d(ia_global - my_ia_start + 1, :) = b_ia_q(jjb, iib, 1:my_group_l_size)
440  END IF
441  END DO
442  END DO
443 
444  DO proc_shift = 1, para_env_sub%num_pe - 1
445  proc_send = modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
446  proc_receive = modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
447 
448  CALL get_group_dist(gd_b_virtual, proc_receive, rec_b_virtual_start, rec_b_virtual_end, rec_b_size)
449 
450  ALLOCATE (bib_c_rec(rec_b_size, homo, my_group_l_size))
451  bib_c_rec = 0.0_dp
452 
453  CALL para_env_sub%sendrecv(b_ia_q, proc_send, &
454  bib_c_rec, proc_receive)
455 
456  DO iib = 1, homo
457  DO jjb = 1, rec_b_size
458  ia_global = (iib - 1)*virtual + rec_b_virtual_start + jjb - 1
459  IF (ia_global >= my_ia_start .AND. ia_global <= my_ia_end) THEN
460  bib_c_2d(ia_global - my_ia_start + 1, :) = bib_c_rec(jjb, iib, 1:my_group_l_size)
461  END IF
462  END DO
463  END DO
464 
465  DEALLOCATE (bib_c_rec)
466  END DO
467  DEALLOCATE (b_ia_q)
468 
469  CALL timestop(handle)
470  END SUBROUTINE mat_3d_to_2d_gamma
471 
472 ! **************************************************************************************************
473 !> \brief redistribute local part of array to fm
474 !> \param mat2D ...
475 !> \param fm_struct ...
476 !> \param my_start_row ...
477 !> \param my_end_row ...
478 !> \param my_start_col ...
479 !> \param my_end_col ...
480 !> \param gd_row ...
481 !> \param gd_col ...
482 !> \param group_grid_2_mepos ...
483 !> \param ngroup_row ...
484 !> \param ngroup_col ...
485 !> \param fm_mat ...
486 !> \param integ_group_size ...
487 !> \param color_group ...
488 !> \param do_release_mat whether to release the array (default: yes)
489 ! **************************************************************************************************
490  SUBROUTINE array2fm(mat2D, fm_struct, my_start_row, my_end_row, &
491  my_start_col, my_end_col, &
492  gd_row, gd_col, &
493  group_grid_2_mepos, ngroup_row, ngroup_col, &
494  fm_mat, integ_group_size, color_group, do_release_mat)
495 
496  REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :), &
497  INTENT(INOUT) :: mat2d
498  TYPE(cp_fm_struct_type), POINTER :: fm_struct
499  INTEGER, INTENT(IN) :: my_start_row, my_end_row, my_start_col, &
500  my_end_col
501  TYPE(group_dist_d1_type), INTENT(IN) :: gd_row, gd_col
502  INTEGER, ALLOCATABLE, DIMENSION(:, :), INTENT(IN) :: group_grid_2_mepos
503  INTEGER, INTENT(IN) :: ngroup_row, ngroup_col
504  TYPE(cp_fm_type), INTENT(OUT) :: fm_mat
505  INTEGER, INTENT(IN), OPTIONAL :: integ_group_size, color_group
506  LOGICAL, INTENT(IN), OPTIONAL :: do_release_mat
507 
508  CHARACTER(LEN=*), PARAMETER :: routinen = 'array2fm'
509 
510  INTEGER :: dummy_proc, end_col_block, end_row_block, handle, handle2, i_global, i_local, &
511  i_sub, iib, iii, itmp(2), j_global, j_local, j_sub, jjb, my_num_col_blocks, &
512  my_num_row_blocks, mypcol, myprow, ncol_block, ncol_local, npcol, nprow, nrow_block, &
513  nrow_local, num_cols, num_rec_cols, num_rows, number_of_rec, number_of_send, &
514  proc_receive, proc_send, proc_shift, rec_col_end, rec_col_size, rec_col_start, &
515  rec_counter, rec_pcol, rec_prow, rec_row_end, rec_row_size, rec_row_start, ref_send_pcol, &
516  ref_send_prow, send_counter, send_pcol, send_prow, size_rec_buffer, size_send_buffer
517  INTEGER :: start_col_block, start_row_block
518  INTEGER, ALLOCATABLE, DIMENSION(:) :: iii_vet, index_col_rec, map_rec_size, &
519  map_send_size
520  INTEGER, ALLOCATABLE, DIMENSION(:, :) :: blocks_ranges_col, blocks_ranges_row, &
521  grid_2_mepos, grid_ref_2_send_pos, &
522  mepos_2_grid
523  INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
524  LOGICAL :: convert_pos, my_do_release_mat
525  REAL(kind=dp) :: part_col, part_row
526  TYPE(integ_mat_buffer_type), ALLOCATABLE, &
527  DIMENSION(:) :: buffer_rec, buffer_send
528  TYPE(mp_para_env_type), POINTER :: para_env
529  TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:) :: req_send
530 
531  CALL timeset(routinen, handle)
532 
533  my_do_release_mat = .true.
534  IF (PRESENT(do_release_mat)) my_do_release_mat = do_release_mat
535 
536  CALL cp_fm_struct_get(fm_struct, para_env=para_env, nrow_global=num_rows, ncol_global=num_cols)
537 
538  ! create the full matrix, (num_rows x num_cols)
539  CALL cp_fm_create(fm_mat, fm_struct, name="fm_mat")
540  CALL cp_fm_set_all(matrix=fm_mat, alpha=0.0_dp)
541 
542  ! start filling procedure
543  ! fill the matrix
544  CALL cp_fm_get_info(matrix=fm_mat, &
545  nrow_local=nrow_local, &
546  ncol_local=ncol_local, &
547  row_indices=row_indices, &
548  col_indices=col_indices, &
549  nrow_block=nrow_block, &
550  ncol_block=ncol_block)
551  myprow = fm_mat%matrix_struct%context%mepos(1)
552  mypcol = fm_mat%matrix_struct%context%mepos(2)
553  nprow = fm_mat%matrix_struct%context%num_pe(1)
554  npcol = fm_mat%matrix_struct%context%num_pe(2)
555 
556  ! start the communication part
557  ! 0) create array containing the processes position
558  ! and supporting infos
559  CALL timeset(routinen//"_info", handle2)
560  ALLOCATE (grid_2_mepos(0:nprow - 1, 0:npcol - 1))
561  grid_2_mepos = 0
562  ALLOCATE (mepos_2_grid(2, 0:para_env%num_pe - 1))
563  grid_2_mepos(myprow, mypcol) = para_env%mepos
564  ! sum infos
565  CALL para_env%sum(grid_2_mepos)
566  CALL para_env%allgather([myprow, mypcol], mepos_2_grid)
567 
568  ! 1) loop over my local data and define a map for the proc to send data
569  ALLOCATE (map_send_size(0:para_env%num_pe - 1))
570  map_send_size = 0
571  dummy_proc = 0
572  DO jjb = my_start_col, my_end_col
573  send_pcol = cp_fm_indxg2p(jjb, ncol_block, dummy_proc, &
574  fm_mat%matrix_struct%first_p_pos(2), npcol)
575  DO iib = my_start_row, my_end_row
576  send_prow = cp_fm_indxg2p(iib, nrow_block, dummy_proc, &
577  fm_mat%matrix_struct%first_p_pos(1), nprow)
578  proc_send = grid_2_mepos(send_prow, send_pcol)
579  map_send_size(proc_send) = map_send_size(proc_send) + 1
580  END DO
581  END DO
582 
583  ! 2) loop over my local data of fm_mat and define a map for the proc from which rec data
584  ALLOCATE (map_rec_size(0:para_env%num_pe - 1))
585  map_rec_size = 0
586  part_row = real(num_rows, kind=dp)/real(ngroup_row, kind=dp)
587  part_col = real(num_cols, kind=dp)/real(ngroup_col, kind=dp)
588 
589  ! In some cases we have to convert global positions to positions in a subgroup (RPA)
590  convert_pos = .false.
591  IF (PRESENT(integ_group_size) .AND. PRESENT(color_group)) convert_pos = .true.
592 
593  DO jjb = 1, nrow_local
594  j_global = row_indices(jjb)
595  ! check the group holding this element
596  ! dirty way, if someone has a better idea ...
597  rec_prow = int(real(j_global - 1, kind=dp)/part_row)
598  rec_prow = max(0, rec_prow)
599  rec_prow = min(rec_prow, ngroup_row - 1)
600  DO
601  itmp = get_limit(num_rows, ngroup_row, rec_prow)
602  IF (j_global >= itmp(1) .AND. j_global <= itmp(2)) EXIT
603  IF (j_global < itmp(1)) rec_prow = rec_prow - 1
604  IF (j_global > itmp(2)) rec_prow = rec_prow + 1
605  END DO
606 
607  IF (convert_pos) THEN
608  ! if the group is not in the same RPA group cycle
609  IF ((rec_prow/integ_group_size) .NE. color_group) cycle
610  ! convert global position to position into the RPA group
611  rec_prow = mod(rec_prow, integ_group_size)
612  END IF
613 
614  DO iib = 1, ncol_local
615  i_global = col_indices(iib)
616  ! check the process in the group holding this element
617  rec_pcol = int(real(i_global - 1, kind=dp)/part_col)
618  rec_pcol = max(0, rec_pcol)
619  rec_pcol = min(rec_pcol, ngroup_col - 1)
620  DO
621  itmp = get_limit(num_cols, ngroup_col, rec_pcol)
622  IF (i_global >= itmp(1) .AND. i_global <= itmp(2)) EXIT
623  IF (i_global < itmp(1)) rec_pcol = rec_pcol - 1
624  IF (i_global > itmp(2)) rec_pcol = rec_pcol + 1
625  END DO
626 
627  proc_receive = group_grid_2_mepos(rec_prow, rec_pcol)
628 
629  map_rec_size(proc_receive) = map_rec_size(proc_receive) + 1
630 
631  END DO ! i_global
632  END DO ! j_global
633 
634  ! 3) check if the local data has to be stored in the new fm_mat
635  IF (map_rec_size(para_env%mepos) > 0) THEN
636  DO jjb = 1, ncol_local
637  j_global = col_indices(jjb)
638  IF (j_global >= my_start_col .AND. j_global <= my_end_col) THEN
639  DO iib = 1, nrow_local
640  i_global = row_indices(iib)
641  IF (i_global >= my_start_row .AND. i_global <= my_end_row) THEN
642  fm_mat%local_data(iib, jjb) = mat2d(i_global - my_start_row + 1, j_global - my_start_col + 1)
643  END IF
644  END DO
645  END IF
646  END DO
647  END IF
648  CALL timestop(handle2)
649 
650  ! 4) reorder data in the send_buffer
651  CALL timeset(routinen//"_buffer_send", handle2)
652  ! count the number of messages to send
653  number_of_send = 0
654  DO proc_shift = 1, para_env%num_pe - 1
655  proc_send = modulo(para_env%mepos + proc_shift, para_env%num_pe)
656  IF (map_send_size(proc_send) > 0) THEN
657  number_of_send = number_of_send + 1
658  END IF
659  END DO
660  ! allocate the structure that will hold the messages to be sent
661  ALLOCATE (buffer_send(number_of_send))
662  ! grid_ref_2_send_pos is an array (map) that given a pair
663  ! (ref_send_prow,ref_send_pcol) returns
664  ! the position in the buffer_send associated to that process
665  ALLOCATE (grid_ref_2_send_pos(0:nprow - 1, 0:npcol - 1))
666  grid_ref_2_send_pos = 0
667  ! finalize the allocation of buffer_send with the actual size
668  ! of each message (actual size is size_send_buffer)
669  send_counter = 0
670  DO proc_shift = 1, para_env%num_pe - 1
671  proc_send = modulo(para_env%mepos + proc_shift, para_env%num_pe)
672  size_send_buffer = map_send_size(proc_send)
673  IF (map_send_size(proc_send) > 0) THEN
674  send_counter = send_counter + 1
675  ! allocate the sending buffer (msg)
676  ALLOCATE (buffer_send(send_counter)%msg(size_send_buffer))
677  buffer_send(send_counter)%msg = 0.0_dp
678  buffer_send(send_counter)%proc = proc_send
679  ! get the pointer to prow, pcol of the process that has
680  ! to receive this message
681  ref_send_prow = mepos_2_grid(1, proc_send)
682  ref_send_pcol = mepos_2_grid(2, proc_send)
683  ! save the rank of the process that has to receive this message
684  grid_ref_2_send_pos(ref_send_prow, ref_send_pcol) = send_counter
685  END IF
686  END DO
687 
688  ! loop over the locally held data and fill the buffer_send
689  ! for doing that we need an array that keep track if the
690  ! sequential increase of the index for each message
691  ALLOCATE (iii_vet(number_of_send))
692  iii_vet = 0
693  DO iib = my_start_row, my_end_row
694  send_prow = cp_fm_indxg2p(iib, nrow_block, dummy_proc, &
695  fm_mat%matrix_struct%first_p_pos(1), nprow)
696  DO jjb = my_start_col, my_end_col
697  send_pcol = cp_fm_indxg2p(jjb, ncol_block, dummy_proc, &
698  fm_mat%matrix_struct%first_p_pos(2), npcol)
699  ! we don't need to send to ourselves
700  IF (grid_2_mepos(send_prow, send_pcol) == para_env%mepos) cycle
701 
702  send_counter = grid_ref_2_send_pos(send_prow, send_pcol)
703  iii_vet(send_counter) = iii_vet(send_counter) + 1
704  iii = iii_vet(send_counter)
705  buffer_send(send_counter)%msg(iii) = mat2d(iib - my_start_row + 1, jjb - my_start_col + 1)
706  END DO
707  END DO
708 
709  DEALLOCATE (iii_vet)
710  DEALLOCATE (grid_ref_2_send_pos)
711  IF (my_do_release_mat) DEALLOCATE (mat2d)
712  CALL timestop(handle2)
713 
714  ! 5) similarly to what done for the buffer_send
715  ! create the buffer for receive, post the message with irecv
716  ! and send the messages non-blocking
717  CALL timeset(routinen//"_isendrecv", handle2)
718  ! count the number of messages to be received
719  number_of_rec = 0
720  DO proc_shift = 1, para_env%num_pe - 1
721  proc_receive = modulo(para_env%mepos - proc_shift, para_env%num_pe)
722  IF (map_rec_size(proc_receive) > 0) THEN
723  number_of_rec = number_of_rec + 1
724  END IF
725  END DO
726 
727  ALLOCATE (buffer_rec(number_of_rec))
728 
729  rec_counter = 0
730  DO proc_shift = 1, para_env%num_pe - 1
731  proc_receive = modulo(para_env%mepos - proc_shift, para_env%num_pe)
732  size_rec_buffer = map_rec_size(proc_receive)
733  IF (map_rec_size(proc_receive) > 0) THEN
734  rec_counter = rec_counter + 1
735  ! prepare the buffer for receive
736  ALLOCATE (buffer_rec(rec_counter)%msg(size_rec_buffer))
737  buffer_rec(rec_counter)%msg = 0.0_dp
738  buffer_rec(rec_counter)%proc = proc_receive
739  ! post the message to be received
740  CALL para_env%irecv(buffer_rec(rec_counter)%msg, proc_receive, &
741  buffer_rec(rec_counter)%msg_req)
742  END IF
743  END DO
744 
745  ! send messages
746  ALLOCATE (req_send(number_of_send))
747  send_counter = 0
748  DO proc_shift = 1, para_env%num_pe - 1
749  proc_send = modulo(para_env%mepos + proc_shift, para_env%num_pe)
750  IF (map_send_size(proc_send) > 0) THEN
751  send_counter = send_counter + 1
752  CALL para_env%isend(buffer_send(send_counter)%msg, proc_send, &
753  buffer_send(send_counter)%msg_req)
754  req_send(send_counter) = buffer_send(send_counter)%msg_req
755  END IF
756  END DO
757  CALL timestop(handle2)
758 
759  ! 6) fill the fm_mat matrix with the messages received from the
760  ! other processes
761  CALL timeset(routinen//"_fill", handle2)
762  ! In order to perform this step efficiently first we have to know the
763  ! ranges of the blocks over which a given process hold its local data.
764  ! Start with the rows ...
765  my_num_row_blocks = 1
766  DO iib = 1, nrow_local - 1
767  IF (abs(row_indices(iib + 1) - row_indices(iib)) == 1) cycle
768  my_num_row_blocks = my_num_row_blocks + 1
769  END DO
770  ALLOCATE (blocks_ranges_row(2, my_num_row_blocks))
771  blocks_ranges_row = 0
772  blocks_ranges_row(1, 1) = row_indices(1)
773  iii = 1
774  DO iib = 1, nrow_local - 1
775  IF (abs(row_indices(iib + 1) - row_indices(iib)) == 1) cycle
776  iii = iii + 1
777  blocks_ranges_row(2, iii - 1) = row_indices(iib)
778  blocks_ranges_row(1, iii) = row_indices(iib + 1)
779  END DO
780  blocks_ranges_row(2, my_num_row_blocks) = row_indices(max(nrow_local, 1))
781  ! ... and columns
782  my_num_col_blocks = 1
783  DO jjb = 1, ncol_local - 1
784  IF (abs(col_indices(jjb + 1) - col_indices(jjb)) == 1) cycle
785  my_num_col_blocks = my_num_col_blocks + 1
786  END DO
787  ALLOCATE (blocks_ranges_col(2, my_num_col_blocks))
788  blocks_ranges_col = 0
789  blocks_ranges_col(1, 1) = col_indices(1)
790  iii = 1
791  DO jjb = 1, ncol_local - 1
792  IF (abs(col_indices(jjb + 1) - col_indices(jjb)) == 1) cycle
793  iii = iii + 1
794  blocks_ranges_col(2, iii - 1) = col_indices(jjb)
795  blocks_ranges_col(1, iii) = col_indices(jjb + 1)
796  END DO
797  blocks_ranges_col(2, my_num_col_blocks) = col_indices(max(ncol_local, 1))
798 
799  rec_counter = 0
800  DO proc_shift = 1, para_env%num_pe - 1
801  proc_receive = modulo(para_env%mepos - proc_shift, para_env%num_pe)
802  size_rec_buffer = map_rec_size(proc_receive)
803 
804  IF (map_rec_size(proc_receive) > 0) THEN
805  rec_counter = rec_counter + 1
806 
807  CALL get_group_dist(gd_col, proc_receive, rec_col_start, rec_col_end, rec_col_size)
808 
809  ! precompute the number of received columns and relative index
810  num_rec_cols = 0
811  DO jjb = 1, my_num_col_blocks
812  start_col_block = max(blocks_ranges_col(1, jjb), rec_col_start)
813  end_col_block = min(blocks_ranges_col(2, jjb), rec_col_end)
814  DO j_sub = start_col_block, end_col_block
815  num_rec_cols = num_rec_cols + 1
816  END DO
817  END DO
818  ALLOCATE (index_col_rec(num_rec_cols))
819  index_col_rec = 0
820  iii = 0
821  DO jjb = 1, my_num_col_blocks
822  start_col_block = max(blocks_ranges_col(1, jjb), rec_col_start)
823  end_col_block = min(blocks_ranges_col(2, jjb), rec_col_end)
824  DO j_sub = start_col_block, end_col_block
825  iii = iii + 1
826  j_local = cp_fm_indxg2l(j_sub, ncol_block, dummy_proc, &
827  fm_mat%matrix_struct%first_p_pos(2), npcol)
828  index_col_rec(iii) = j_local
829  END DO
830  END DO
831 
832  CALL get_group_dist(gd_row, proc_receive, rec_row_start, rec_row_end, rec_row_size)
833 
834  ! wait for the message
835  CALL buffer_rec(rec_counter)%msg_req%wait()
836 
837  ! fill the local data in fm_mat
838  iii = 0
839  DO iib = 1, my_num_row_blocks
840  start_row_block = max(blocks_ranges_row(1, iib), rec_row_start)
841  end_row_block = min(blocks_ranges_row(2, iib), rec_row_end)
842  DO i_sub = start_row_block, end_row_block
843  i_local = cp_fm_indxg2l(i_sub, nrow_block, dummy_proc, &
844  fm_mat%matrix_struct%first_p_pos(1), nprow)
845  DO jjb = 1, num_rec_cols
846  iii = iii + 1
847  j_local = index_col_rec(jjb)
848  fm_mat%local_data(i_local, j_local) = buffer_rec(rec_counter)%msg(iii)
849  END DO
850  END DO
851  END DO
852 
853  DEALLOCATE (buffer_rec(rec_counter)%msg)
854  DEALLOCATE (index_col_rec)
855  END IF
856  END DO
857  DEALLOCATE (buffer_rec)
858 
859  DEALLOCATE (blocks_ranges_row)
860  DEALLOCATE (blocks_ranges_col)
861 
862  CALL timestop(handle2)
863 
864  ! 7) Finally wait for all messeges to be sent
865  CALL timeset(routinen//"_waitall", handle2)
866  CALL mp_waitall(req_send(:))
867  DO send_counter = 1, number_of_send
868  DEALLOCATE (buffer_send(send_counter)%msg)
869  END DO
870  DEALLOCATE (buffer_send)
871  CALL timestop(handle2)
872 
873  DEALLOCATE (map_send_size)
874  DEALLOCATE (map_rec_size)
875  DEALLOCATE (grid_2_mepos)
876  DEALLOCATE (mepos_2_grid)
877 
878  CALL timestop(handle)
879 
880  END SUBROUTINE array2fm
881 
882 ! **************************************************************************************************
883 !> \brief redistribute fm to local part of array
884 !> \param mat2D ...
885 !> \param my_rows ...
886 !> \param my_start_row ...
887 !> \param my_end_row ...
888 !> \param my_cols ...
889 !> \param my_start_col ...
890 !> \param my_end_col ...
891 !> \param group_grid_2_mepos ...
892 !> \param mepos_2_grid_group ...
893 !> \param ngroup_row ...
894 !> \param ngroup_col ...
895 !> \param fm_mat ...
896 ! **************************************************************************************************
897  SUBROUTINE fm2array(mat2D, my_rows, my_start_row, my_end_row, &
898  my_cols, my_start_col, my_end_col, &
899  group_grid_2_mepos, mepos_2_grid_group, &
900  ngroup_row, ngroup_col, &
901  fm_mat)
902 
903  REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :), &
904  INTENT(OUT) :: mat2d
905  INTEGER, INTENT(IN) :: my_rows, my_start_row, my_end_row, &
906  my_cols, my_start_col, my_end_col
907  INTEGER, ALLOCATABLE, DIMENSION(:, :), INTENT(IN) :: group_grid_2_mepos, mepos_2_grid_group
908  INTEGER, INTENT(IN) :: ngroup_row, ngroup_col
909  TYPE(cp_fm_type), INTENT(INOUT) :: fm_mat
910 
911  CHARACTER(LEN=*), PARAMETER :: routinen = 'fm2array'
912 
913  INTEGER :: dummy_proc, handle, handle2, i_global, iib, iii, itmp(2), j_global, jjb, mypcol, &
914  myprow, ncol_block, ncol_local, npcol, nprow, nrow_block, nrow_local, num_cols, &
915  num_rec_rows, num_rows, number_of_rec, number_of_send, proc_receive, proc_send, &
916  proc_shift, rec_col_size, rec_counter, rec_pcol, rec_prow, rec_row_size, ref_send_pcol, &
917  ref_send_prow, send_counter, send_pcol, send_prow, size_rec_buffer, size_send_buffer
918  INTEGER, ALLOCATABLE, DIMENSION(:) :: iii_vet, index_row_rec, map_rec_size, &
919  map_send_size
920  INTEGER, ALLOCATABLE, DIMENSION(:, :) :: grid_2_mepos, grid_ref_2_send_pos, &
921  mepos_2_grid, sizes
922  INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
923  REAL(kind=dp) :: part_col, part_row
924  TYPE(integ_mat_buffer_type), ALLOCATABLE, &
925  DIMENSION(:) :: buffer_rec, buffer_send
926  TYPE(mp_para_env_type), POINTER :: para_env
927  TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:) :: req_send
928 
929  CALL timeset(routinen, handle)
930 
931  ! allocate the array
932  ALLOCATE (mat2d(my_rows, my_cols))
933  mat2d = 0.0_dp
934 
935  ! start procedure
936  ! get information of from the full matrix
937  CALL cp_fm_get_info(matrix=fm_mat, &
938  nrow_local=nrow_local, &
939  ncol_local=ncol_local, &
940  row_indices=row_indices, &
941  col_indices=col_indices, &
942  nrow_block=nrow_block, &
943  ncol_block=ncol_block, &
944  nrow_global=num_rows, &
945  ncol_global=num_cols, &
946  para_env=para_env)
947  myprow = fm_mat%matrix_struct%context%mepos(1)
948  mypcol = fm_mat%matrix_struct%context%mepos(2)
949  nprow = fm_mat%matrix_struct%context%num_pe(1)
950  npcol = fm_mat%matrix_struct%context%num_pe(2)
951 
952  ! start the communication part
953  ! 0) create array containing the processes position
954  ! and supporting infos
955  CALL timeset(routinen//"_info", handle2)
956  ALLOCATE (grid_2_mepos(0:nprow - 1, 0:npcol - 1))
957  grid_2_mepos = 0
958  ALLOCATE (mepos_2_grid(2, 0:para_env%num_pe - 1))
959 
960  ! fill the info array
961  grid_2_mepos(myprow, mypcol) = para_env%mepos
962 
963  ! sum infos
964  CALL para_env%sum(grid_2_mepos)
965  CALL para_env%allgather([myprow, mypcol], mepos_2_grid)
966 
967  ! 1) loop over my local data and define a map for the proc to send data
968  ALLOCATE (map_send_size(0:para_env%num_pe - 1))
969  map_send_size = 0
970  part_row = real(num_rows, kind=dp)/real(ngroup_row, kind=dp)
971  part_col = real(num_cols, kind=dp)/real(ngroup_col, kind=dp)
972 
973  DO jjb = 1, ncol_local
974  j_global = col_indices(jjb)
975  ! check the group holding this element
976  ! dirty way, if someone has a better idea ...
977  send_pcol = int(real(j_global - 1, kind=dp)/part_col)
978  send_pcol = max(0, send_pcol)
979  send_pcol = min(send_pcol, ngroup_col - 1)
980  DO
981  itmp = get_limit(num_cols, ngroup_col, send_pcol)
982  IF (j_global >= itmp(1) .AND. j_global <= itmp(2)) EXIT
983  IF (j_global < itmp(1)) send_pcol = send_pcol - 1
984  IF (j_global > itmp(2)) send_pcol = send_pcol + 1
985  END DO
986 
987  DO iib = 1, nrow_local
988  i_global = row_indices(iib)
989  ! check the process in the group holding this element
990  send_prow = int(real(i_global - 1, kind=dp)/part_row)
991  send_prow = max(0, send_prow)
992  send_prow = min(send_prow, ngroup_row - 1)
993  DO
994  itmp = get_limit(num_rows, ngroup_row, send_prow)
995  IF (i_global >= itmp(1) .AND. i_global <= itmp(2)) EXIT
996  IF (i_global < itmp(1)) send_prow = send_prow - 1
997  IF (i_global > itmp(2)) send_prow = send_prow + 1
998  END DO
999 
1000  proc_send = group_grid_2_mepos(send_prow, send_pcol)
1001 
1002  map_send_size(proc_send) = map_send_size(proc_send) + 1
1003 
1004  END DO ! i_global
1005  END DO ! j_global
1006 
1007  ! 2) loop over my local data of the array and define a map for the proc from which rec data
1008  ALLOCATE (map_rec_size(0:para_env%num_pe - 1))
1009  map_rec_size = 0
1010  dummy_proc = 0
1011  DO jjb = my_start_col, my_end_col
1012  rec_pcol = cp_fm_indxg2p(jjb, ncol_block, dummy_proc, &
1013  fm_mat%matrix_struct%first_p_pos(2), npcol)
1014  DO iib = my_start_row, my_end_row
1015  rec_prow = cp_fm_indxg2p(iib, nrow_block, dummy_proc, &
1016  fm_mat%matrix_struct%first_p_pos(1), nprow)
1017  proc_receive = grid_2_mepos(rec_prow, rec_pcol)
1018  map_rec_size(proc_receive) = map_rec_size(proc_receive) + 1
1019  END DO
1020  END DO
1021 
1022  ! 3) check if the local data in the fm_mat has to be stored in the new array
1023  IF (map_rec_size(para_env%mepos) > 0) THEN
1024  DO jjb = 1, ncol_local
1025  j_global = col_indices(jjb)
1026  IF (j_global >= my_start_col .AND. j_global <= my_end_col) THEN
1027  DO iib = 1, nrow_local
1028  i_global = row_indices(iib)
1029  IF (i_global >= my_start_row .AND. i_global <= my_end_row) THEN
1030  mat2d(i_global - my_start_row + 1, j_global - my_start_col + 1) = fm_mat%local_data(iib, jjb)
1031  END IF
1032  END DO
1033  END IF
1034  END DO
1035  END IF
1036  CALL timestop(handle2)
1037 
1038  ! 4) reorder data in the send_buffer
1039  CALL timeset(routinen//"_buffer_send", handle2)
1040  ! count the number of messages to send
1041  number_of_send = 0
1042  DO proc_shift = 1, para_env%num_pe - 1
1043  proc_send = modulo(para_env%mepos + proc_shift, para_env%num_pe)
1044  IF (map_send_size(proc_send) > 0) THEN
1045  number_of_send = number_of_send + 1
1046  END IF
1047  END DO
1048  ! allocate the structure that will hold the messages to be sent
1049  ALLOCATE (buffer_send(number_of_send))
1050  ! grid_ref_2_send_pos is an array (map) that given a pair
1051  ! (ref_send_prow,ref_send_pcol) returns
1052  ! the position in the buffer_send associated to that process
1053 
1054  ALLOCATE (grid_ref_2_send_pos(0:ngroup_row - 1, 0:ngroup_col - 1))
1055  grid_ref_2_send_pos = 0
1056 
1057  ! finalize the allocation of buffer_send with the actual size
1058  ! of each message (actual size is size_send_buffer)
1059  send_counter = 0
1060  DO proc_shift = 1, para_env%num_pe - 1
1061  proc_send = modulo(para_env%mepos + proc_shift, para_env%num_pe)
1062  size_send_buffer = map_send_size(proc_send)
1063  IF (map_send_size(proc_send) > 0) THEN
1064  send_counter = send_counter + 1
1065  ! allocate the sending buffer (msg)
1066  ALLOCATE (buffer_send(send_counter)%msg(size_send_buffer))
1067  buffer_send(send_counter)%msg = 0.0_dp
1068  buffer_send(send_counter)%proc = proc_send
1069  ! get the pointer to prow, pcol of the process that has
1070  ! to receive this message
1071  ref_send_prow = mepos_2_grid_group(1, proc_send)
1072  ref_send_pcol = mepos_2_grid_group(2, proc_send)
1073  ! save the rank of the process that has to receive this message
1074  grid_ref_2_send_pos(ref_send_prow, ref_send_pcol) = send_counter
1075  END IF
1076  END DO
1077 
1078  ! loop over the locally held data and fill the buffer_send
1079  ! for doing that we need an array that keep track if the
1080  ! sequential increase of the index for each message
1081  ALLOCATE (iii_vet(number_of_send))
1082  iii_vet = 0
1083  DO jjb = 1, ncol_local
1084  j_global = col_indices(jjb)
1085  ! check the group holding this element
1086  ! dirty way, if someone has a better idea ...
1087  send_pcol = int(real(j_global - 1, kind=dp)/part_col)
1088  send_pcol = max(0, send_pcol)
1089  send_pcol = min(send_pcol, ngroup_col - 1)
1090  DO
1091  itmp = get_limit(num_cols, ngroup_col, send_pcol)
1092  IF (j_global >= itmp(1) .AND. j_global <= itmp(2)) EXIT
1093  IF (j_global < itmp(1)) send_pcol = send_pcol - 1
1094  IF (j_global > itmp(2)) send_pcol = send_pcol + 1
1095  END DO
1096 
1097  DO iib = 1, nrow_local
1098  i_global = row_indices(iib)
1099  ! check the process in the group holding this element
1100  send_prow = int(real(i_global - 1, kind=dp)/part_row)
1101  send_prow = max(0, send_prow)
1102  send_prow = min(send_prow, ngroup_row - 1)
1103  DO
1104  itmp = get_limit(num_rows, ngroup_row, send_prow)
1105  IF (i_global >= itmp(1) .AND. i_global <= itmp(2)) EXIT
1106  IF (i_global < itmp(1)) send_prow = send_prow - 1
1107  IF (i_global > itmp(2)) send_prow = send_prow + 1
1108  END DO
1109  ! we don't need to send to ourselves
1110  IF (group_grid_2_mepos(send_prow, send_pcol) == para_env%mepos) cycle
1111 
1112  send_counter = grid_ref_2_send_pos(send_prow, send_pcol)
1113  iii_vet(send_counter) = iii_vet(send_counter) + 1
1114  iii = iii_vet(send_counter)
1115  buffer_send(send_counter)%msg(iii) = fm_mat%local_data(iib, jjb)
1116  END DO
1117  END DO
1118 
1119  DEALLOCATE (iii_vet)
1120  DEALLOCATE (grid_ref_2_send_pos)
1121  CALL timestop(handle2)
1122 
1123  ! 5) similarly to what done for the buffer_send
1124  ! create the buffer for receive, post the message with irecv
1125  ! and send the messages with non-blocking
1126  CALL timeset(routinen//"_isendrecv", handle2)
1127  ! count the number of messages to be received
1128  number_of_rec = 0
1129  DO proc_shift = 1, para_env%num_pe - 1
1130  proc_receive = modulo(para_env%mepos - proc_shift, para_env%num_pe)
1131  IF (map_rec_size(proc_receive) > 0) THEN
1132  number_of_rec = number_of_rec + 1
1133  END IF
1134  END DO
1135 
1136  ALLOCATE (buffer_rec(number_of_rec))
1137 
1138  rec_counter = 0
1139  DO proc_shift = 1, para_env%num_pe - 1
1140  proc_receive = modulo(para_env%mepos - proc_shift, para_env%num_pe)
1141  size_rec_buffer = map_rec_size(proc_receive)
1142  IF (map_rec_size(proc_receive) > 0) THEN
1143  rec_counter = rec_counter + 1
1144  ! prepare the buffer for receive
1145  ALLOCATE (buffer_rec(rec_counter)%msg(size_rec_buffer))
1146  buffer_rec(rec_counter)%msg = 0.0_dp
1147  buffer_rec(rec_counter)%proc = proc_receive
1148  ! post the message to be received
1149  CALL para_env%irecv(buffer_rec(rec_counter)%msg, proc_receive, &
1150  buffer_rec(rec_counter)%msg_req)
1151  END IF
1152  END DO
1153 
1154  ! send messages
1155  ALLOCATE (req_send(number_of_send))
1156  send_counter = 0
1157  DO proc_shift = 1, para_env%num_pe - 1
1158  proc_send = modulo(para_env%mepos + proc_shift, para_env%num_pe)
1159  IF (map_send_size(proc_send) > 0) THEN
1160  send_counter = send_counter + 1
1161  CALL para_env%isend(buffer_send(send_counter)%msg, proc_send, &
1162  buffer_send(send_counter)%msg_req)
1163  req_send(send_counter) = buffer_send(send_counter)%msg_req
1164  END IF
1165  END DO
1166  CALL timestop(handle2)
1167 
1168  ! 6) fill the fm_mat matrix with the messages received from the
1169  ! other processes
1170  CALL timeset(routinen//"_fill", handle2)
1171  CALL cp_fm_get_info(matrix=fm_mat, &
1172  nrow_local=nrow_local, &
1173  ncol_local=ncol_local)
1174  ALLOCATE (sizes(2, 0:para_env%num_pe - 1))
1175  CALL para_env%allgather([nrow_local, ncol_local], sizes)
1176  iib = maxval(sizes(1, :))
1177  ALLOCATE (index_row_rec(iib))
1178  index_row_rec = 0
1179  rec_counter = 0
1180  DO proc_shift = 1, para_env%num_pe - 1
1181  proc_receive = modulo(para_env%mepos - proc_shift, para_env%num_pe)
1182  size_rec_buffer = map_rec_size(proc_receive)
1183 
1184  IF (map_rec_size(proc_receive) > 0) THEN
1185  rec_counter = rec_counter + 1
1186 
1187  rec_col_size = sizes(2, proc_receive)
1188  rec_row_size = sizes(1, proc_receive)
1189 
1190  ! precompute the indeces of the rows
1191  num_rec_rows = 0
1192  DO iib = 1, rec_row_size
1193  i_global = cp_fm_indxl2g(iib, nrow_block, mepos_2_grid(1, proc_receive), &
1194  fm_mat%matrix_struct%first_p_pos(1), nprow)
1195  IF (i_global >= my_start_row .AND. i_global <= my_end_row) THEN
1196  num_rec_rows = num_rec_rows + 1
1197  index_row_rec(num_rec_rows) = i_global
1198  END IF
1199  END DO
1200 
1201  CALL buffer_rec(rec_counter)%msg_req%wait()
1202 
1203  iii = 0
1204  DO jjb = 1, rec_col_size
1205  j_global = cp_fm_indxl2g(jjb, ncol_block, mepos_2_grid(2, proc_receive), &
1206  fm_mat%matrix_struct%first_p_pos(2), npcol)
1207  IF (j_global >= my_start_col .AND. j_global <= my_end_col) THEN
1208  DO iib = 1, num_rec_rows
1209  i_global = index_row_rec(iib)
1210  iii = iii + 1
1211  mat2d(i_global - my_start_row + 1, j_global - my_start_col + 1) = buffer_rec(rec_counter)%msg(iii)
1212  END DO
1213  END IF
1214  END DO
1215 
1216  DEALLOCATE (buffer_rec(rec_counter)%msg)
1217  END IF
1218  END DO
1219  DEALLOCATE (buffer_rec)
1220  DEALLOCATE (index_row_rec)
1221  CALL cp_fm_release(fm_mat)
1222  CALL timestop(handle2)
1223 
1224  ! 7) Finally wait for all messeges to be sent
1225  CALL timeset(routinen//"_waitall", handle2)
1226  CALL mp_waitall(req_send(:))
1227  DO send_counter = 1, number_of_send
1228  DEALLOCATE (buffer_send(send_counter)%msg)
1229  END DO
1230  DEALLOCATE (buffer_send)
1231  CALL timestop(handle2)
1232 
1233  CALL timestop(handle)
1234 
1235  END SUBROUTINE fm2array
1236 
1237 ! **************************************************************************************************
1238 !> \brief redistribute 2D representation of 3d tensor to a set of dbcsr matrices
1239 !> \param Gamma_2D ...
1240 !> \param homo ...
1241 !> \param virtual ...
1242 !> \param dimen_ia ...
1243 !> \param para_env_sub ...
1244 !> \param my_ia_start ...
1245 !> \param my_ia_end ...
1246 !> \param my_group_L_size ...
1247 !> \param gd_ia ...
1248 !> \param dbcsr_Gamma ...
1249 !> \param mo_coeff_o ...
1250 ! **************************************************************************************************
1251  SUBROUTINE create_dbcsr_gamma(Gamma_2D, homo, virtual, dimen_ia, para_env_sub, &
1252  my_ia_start, my_ia_end, my_group_L_size, &
1253  gd_ia, dbcsr_Gamma, mo_coeff_o)
1254  REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :) :: gamma_2d
1255  INTEGER, INTENT(IN) :: homo, virtual, dimen_ia
1256  TYPE(mp_para_env_type), INTENT(IN), POINTER :: para_env_sub
1257  INTEGER, INTENT(IN) :: my_ia_start, my_ia_end, my_group_l_size
1258  TYPE(group_dist_d1_type), INTENT(IN) :: gd_ia
1259  TYPE(dbcsr_p_type), DIMENSION(:), INTENT(INOUT) :: dbcsr_gamma
1260  TYPE(dbcsr_type), INTENT(INOUT) :: mo_coeff_o
1261 
1262  CHARACTER(LEN=*), PARAMETER :: routinen = 'create_dbcsr_gamma'
1263 
1264  INTEGER :: dummy_proc, handle, i_global, i_local, iaia, iib, iii, itmp(2), j_global, &
1265  j_local, jjb, jjj, kkb, mypcol, myprow, ncol_block, ncol_local, npcol, nprow, nrow_block, &
1266  nrow_local, number_of_rec, number_of_send, proc_receive, proc_send, proc_shift, &
1267  rec_counter, rec_iaia_end, rec_iaia_size, rec_iaia_start, rec_pcol, rec_prow, &
1268  ref_send_pcol, ref_send_prow, send_counter, send_pcol, send_prow, size_rec_buffer, &
1269  size_send_buffer
1270  INTEGER, ALLOCATABLE, DIMENSION(:) :: iii_vet, map_rec_size, map_send_size
1271  INTEGER, ALLOCATABLE, DIMENSION(:, :) :: grid_2_mepos, grid_ref_2_send_pos, &
1272  indeces_map_my, mepos_2_grid
1273  INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1274  REAL(kind=dp) :: part_ia
1275  TYPE(cp_blacs_env_type), POINTER :: blacs_env
1276  TYPE(cp_fm_struct_type), POINTER :: fm_struct
1277  TYPE(cp_fm_type) :: fm_ia
1278  TYPE(index_map), ALLOCATABLE, DIMENSION(:) :: indeces_rec
1279  TYPE(integ_mat_buffer_type), ALLOCATABLE, &
1280  DIMENSION(:) :: buffer_rec, buffer_send
1281  TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:) :: req_send
1282 
1283  CALL timeset(routinen, handle)
1284 
1285  ! create sub blacs env
1286  NULLIFY (blacs_env)
1287  CALL cp_blacs_env_create(blacs_env=blacs_env, para_env=para_env_sub)
1288 
1289  ! create the fm_ia buffer matrix
1290  NULLIFY (fm_struct)
1291  CALL cp_fm_struct_create(fm_struct, context=blacs_env, nrow_global=homo, &
1292  ncol_global=virtual, para_env=para_env_sub)
1293  CALL cp_fm_create(fm_ia, fm_struct, name="fm_ia")
1294  ! release structure
1295  CALL cp_fm_struct_release(fm_struct)
1296  ! release blacs_env
1297  CALL cp_blacs_env_release(blacs_env)
1298 
1299  ! get array information
1300  CALL cp_fm_get_info(matrix=fm_ia, &
1301  nrow_local=nrow_local, &
1302  ncol_local=ncol_local, &
1303  row_indices=row_indices, &
1304  col_indices=col_indices, &
1305  nrow_block=nrow_block, &
1306  ncol_block=ncol_block)
1307  myprow = fm_ia%matrix_struct%context%mepos(1)
1308  mypcol = fm_ia%matrix_struct%context%mepos(2)
1309  nprow = fm_ia%matrix_struct%context%num_pe(1)
1310  npcol = fm_ia%matrix_struct%context%num_pe(2)
1311 
1312  ! 0) create array containing the processes position and supporting infos
1313  ALLOCATE (grid_2_mepos(0:nprow - 1, 0:npcol - 1))
1314  grid_2_mepos = 0
1315  ALLOCATE (mepos_2_grid(2, 0:para_env_sub%num_pe - 1))
1316  ! fill the info array
1317  grid_2_mepos(myprow, mypcol) = para_env_sub%mepos
1318  ! sum infos
1319  CALL para_env_sub%sum(grid_2_mepos)
1320  CALL para_env_sub%allgather([myprow, mypcol], mepos_2_grid)
1321 
1322  ! loop over local index range and define the sending map
1323  ALLOCATE (map_send_size(0:para_env_sub%num_pe - 1))
1324  map_send_size = 0
1325  dummy_proc = 0
1326  DO iaia = my_ia_start, my_ia_end
1327  i_global = (iaia - 1)/virtual + 1
1328  j_global = mod(iaia - 1, virtual) + 1
1329  send_prow = cp_fm_indxg2p(i_global, nrow_block, dummy_proc, &
1330  fm_ia%matrix_struct%first_p_pos(1), nprow)
1331  send_pcol = cp_fm_indxg2p(j_global, ncol_block, dummy_proc, &
1332  fm_ia%matrix_struct%first_p_pos(2), npcol)
1333  proc_send = grid_2_mepos(send_prow, send_pcol)
1334  map_send_size(proc_send) = map_send_size(proc_send) + 1
1335  END DO
1336 
1337  ! loop over local data of fm_ia and define the receiving map
1338  ALLOCATE (map_rec_size(0:para_env_sub%num_pe - 1))
1339  map_rec_size = 0
1340  part_ia = real(dimen_ia, kind=dp)/real(para_env_sub%num_pe, kind=dp)
1341 
1342  DO iib = 1, nrow_local
1343  i_global = row_indices(iib)
1344  DO jjb = 1, ncol_local
1345  j_global = col_indices(jjb)
1346  iaia = (i_global - 1)*virtual + j_global
1347  proc_receive = int(real(iaia - 1, kind=dp)/part_ia)
1348  proc_receive = max(0, proc_receive)
1349  proc_receive = min(proc_receive, para_env_sub%num_pe - 1)
1350  DO
1351  itmp = get_limit(dimen_ia, para_env_sub%num_pe, proc_receive)
1352  IF (iaia >= itmp(1) .AND. iaia <= itmp(2)) EXIT
1353  IF (iaia < itmp(1)) proc_receive = proc_receive - 1
1354  IF (iaia > itmp(2)) proc_receive = proc_receive + 1
1355  END DO
1356  map_rec_size(proc_receive) = map_rec_size(proc_receive) + 1
1357  END DO
1358  END DO
1359 
1360  ! allocate the buffer for sending data
1361  number_of_send = 0
1362  DO proc_shift = 1, para_env_sub%num_pe - 1
1363  proc_send = modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
1364  IF (map_send_size(proc_send) > 0) THEN
1365  number_of_send = number_of_send + 1
1366  END IF
1367  END DO
1368  ! allocate the structure that will hold the messages to be sent
1369  ALLOCATE (buffer_send(number_of_send))
1370  ! and the map from the grid of processess to the message position
1371  ALLOCATE (grid_ref_2_send_pos(0:nprow - 1, 0:npcol - 1))
1372  grid_ref_2_send_pos = 0
1373  ! finally allocate each message
1374  send_counter = 0
1375  DO proc_shift = 1, para_env_sub%num_pe - 1
1376  proc_send = modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
1377  size_send_buffer = map_send_size(proc_send)
1378  IF (map_send_size(proc_send) > 0) THEN
1379  send_counter = send_counter + 1
1380  ! allocate the sending buffer (msg)
1381  ALLOCATE (buffer_send(send_counter)%msg(size_send_buffer))
1382  buffer_send(send_counter)%proc = proc_send
1383  ! get the pointer to prow, pcol of the process that has
1384  ! to receive this message
1385  ref_send_prow = mepos_2_grid(1, proc_send)
1386  ref_send_pcol = mepos_2_grid(2, proc_send)
1387  ! save the rank of the process that has to receive this message
1388  grid_ref_2_send_pos(ref_send_prow, ref_send_pcol) = send_counter
1389  END IF
1390  END DO
1391 
1392  ! allocate the buffer for receiving data
1393  number_of_rec = 0
1394  DO proc_shift = 1, para_env_sub%num_pe - 1
1395  proc_receive = modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
1396  IF (map_rec_size(proc_receive) > 0) THEN
1397  number_of_rec = number_of_rec + 1
1398  END IF
1399  END DO
1400  ! allocate the structure that will hold the messages to be received
1401  ! and relative indeces
1402  ALLOCATE (buffer_rec(number_of_rec))
1403  ALLOCATE (indeces_rec(number_of_rec))
1404  ! finally allocate each message and fill the array of indeces
1405  rec_counter = 0
1406  DO proc_shift = 1, para_env_sub%num_pe - 1
1407  proc_receive = modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
1408  size_rec_buffer = map_rec_size(proc_receive)
1409  IF (map_rec_size(proc_receive) > 0) THEN
1410  rec_counter = rec_counter + 1
1411  ! prepare the buffer for receive
1412  ALLOCATE (buffer_rec(rec_counter)%msg(size_rec_buffer))
1413  buffer_rec(rec_counter)%proc = proc_receive
1414  ! create the indeces array
1415  ALLOCATE (indeces_rec(rec_counter)%map(2, size_rec_buffer))
1416  indeces_rec(rec_counter)%map = 0
1417  CALL get_group_dist(gd_ia, proc_receive, rec_iaia_start, rec_iaia_end, rec_iaia_size)
1418  iii = 0
1419  DO iaia = rec_iaia_start, rec_iaia_end
1420  i_global = (iaia - 1)/virtual + 1
1421  j_global = mod(iaia - 1, virtual) + 1
1422  rec_prow = cp_fm_indxg2p(i_global, nrow_block, dummy_proc, &
1423  fm_ia%matrix_struct%first_p_pos(1), nprow)
1424  rec_pcol = cp_fm_indxg2p(j_global, ncol_block, dummy_proc, &
1425  fm_ia%matrix_struct%first_p_pos(2), npcol)
1426  IF (grid_2_mepos(rec_prow, rec_pcol) /= para_env_sub%mepos) cycle
1427  iii = iii + 1
1428  i_local = cp_fm_indxg2l(i_global, nrow_block, dummy_proc, &
1429  fm_ia%matrix_struct%first_p_pos(1), nprow)
1430  j_local = cp_fm_indxg2l(j_global, ncol_block, dummy_proc, &
1431  fm_ia%matrix_struct%first_p_pos(2), npcol)
1432  indeces_rec(rec_counter)%map(1, iii) = i_local
1433  indeces_rec(rec_counter)%map(2, iii) = j_local
1434  END DO
1435  END IF
1436  END DO
1437  ! and create the index map for my local data
1438  IF (map_rec_size(para_env_sub%mepos) > 0) THEN
1439  size_rec_buffer = map_rec_size(para_env_sub%mepos)
1440  ALLOCATE (indeces_map_my(2, size_rec_buffer))
1441  indeces_map_my = 0
1442  iii = 0
1443  DO iaia = my_ia_start, my_ia_end
1444  i_global = (iaia - 1)/virtual + 1
1445  j_global = mod(iaia - 1, virtual) + 1
1446  rec_prow = cp_fm_indxg2p(i_global, nrow_block, dummy_proc, &
1447  fm_ia%matrix_struct%first_p_pos(1), nprow)
1448  rec_pcol = cp_fm_indxg2p(j_global, ncol_block, dummy_proc, &
1449  fm_ia%matrix_struct%first_p_pos(2), npcol)
1450  IF (grid_2_mepos(rec_prow, rec_pcol) /= para_env_sub%mepos) cycle
1451  iii = iii + 1
1452  i_local = cp_fm_indxg2l(i_global, nrow_block, dummy_proc, &
1453  fm_ia%matrix_struct%first_p_pos(1), nprow)
1454  j_local = cp_fm_indxg2l(j_global, ncol_block, dummy_proc, &
1455  fm_ia%matrix_struct%first_p_pos(2), npcol)
1456  indeces_map_my(1, iii) = i_local
1457  indeces_map_my(2, iii) = j_local
1458  END DO
1459  END IF
1460 
1461  ! auxiliary vector of indeces for the send buffer
1462  ALLOCATE (iii_vet(number_of_send))
1463  ! vector for the send requests
1464  ALLOCATE (req_send(number_of_send))
1465  ! loop over auxiliary basis function and redistribute into a fm
1466  ! and then compy the fm into a dbcsr matrix
1467  DO kkb = 1, my_group_l_size
1468  ! zero the matries of the buffers and post the messages to be received
1469  CALL cp_fm_set_all(matrix=fm_ia, alpha=0.0_dp)
1470  rec_counter = 0
1471  DO proc_shift = 1, para_env_sub%num_pe - 1
1472  proc_receive = modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
1473  IF (map_rec_size(proc_receive) > 0) THEN
1474  rec_counter = rec_counter + 1
1475  buffer_rec(rec_counter)%msg = 0.0_dp
1476  CALL para_env_sub%irecv(buffer_rec(rec_counter)%msg, proc_receive, &
1477  buffer_rec(rec_counter)%msg_req)
1478  END IF
1479  END DO
1480  ! fill the sending buffer and send the messages
1481  DO send_counter = 1, number_of_send
1482  buffer_send(send_counter)%msg = 0.0_dp
1483  END DO
1484  iii_vet = 0
1485  jjj = 0
1486  DO iaia = my_ia_start, my_ia_end
1487  i_global = (iaia - 1)/virtual + 1
1488  j_global = mod(iaia - 1, virtual) + 1
1489  send_prow = cp_fm_indxg2p(i_global, nrow_block, dummy_proc, &
1490  fm_ia%matrix_struct%first_p_pos(1), nprow)
1491  send_pcol = cp_fm_indxg2p(j_global, ncol_block, dummy_proc, &
1492  fm_ia%matrix_struct%first_p_pos(2), npcol)
1493  proc_send = grid_2_mepos(send_prow, send_pcol)
1494  ! we don't need to send to ourselves
1495  IF (grid_2_mepos(send_prow, send_pcol) == para_env_sub%mepos) THEN
1496  ! filling fm_ia with local data
1497  jjj = jjj + 1
1498  i_local = indeces_map_my(1, jjj)
1499  j_local = indeces_map_my(2, jjj)
1500  fm_ia%local_data(i_local, j_local) = gamma_2d(iaia - my_ia_start + 1, kkb)
1501  ELSE
1502  send_counter = grid_ref_2_send_pos(send_prow, send_pcol)
1503  iii_vet(send_counter) = iii_vet(send_counter) + 1
1504  iii = iii_vet(send_counter)
1505  buffer_send(send_counter)%msg(iii) = gamma_2d(iaia - my_ia_start + 1, kkb)
1506  END IF
1507  END DO
1508  req_send = mp_request_null
1509  send_counter = 0
1510  DO proc_shift = 1, para_env_sub%num_pe - 1
1511  proc_send = modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
1512  IF (map_send_size(proc_send) > 0) THEN
1513  send_counter = send_counter + 1
1514  CALL para_env_sub%isend(buffer_send(send_counter)%msg, proc_send, &
1515  buffer_send(send_counter)%msg_req)
1516  req_send(send_counter) = buffer_send(send_counter)%msg_req
1517  END IF
1518  END DO
1519 
1520  ! receive the massages and fill the fm_ia
1521  rec_counter = 0
1522  DO proc_shift = 1, para_env_sub%num_pe - 1
1523  proc_receive = modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
1524  size_rec_buffer = map_rec_size(proc_receive)
1525  IF (map_rec_size(proc_receive) > 0) THEN
1526  rec_counter = rec_counter + 1
1527  ! wait for the message
1528  CALL buffer_rec(rec_counter)%msg_req%wait()
1529  DO iii = 1, size_rec_buffer
1530  i_local = indeces_rec(rec_counter)%map(1, iii)
1531  j_local = indeces_rec(rec_counter)%map(2, iii)
1532  fm_ia%local_data(i_local, j_local) = buffer_rec(rec_counter)%msg(iii)
1533  END DO
1534  END IF
1535  END DO
1536 
1537  ! wait all
1538  CALL mp_waitall(req_send(:))
1539 
1540  ! now create the DBCSR matrix and copy fm_ia into it
1541  ALLOCATE (dbcsr_gamma(kkb)%matrix)
1542  CALL cp_dbcsr_m_by_n_from_template(dbcsr_gamma(kkb)%matrix, &
1543  template=mo_coeff_o, &
1544  m=homo, n=virtual, sym=dbcsr_type_no_symmetry)
1545  CALL copy_fm_to_dbcsr(fm_ia, dbcsr_gamma(kkb)%matrix, keep_sparsity=.false.)
1546 
1547  END DO
1548 
1549  ! deallocate stuff
1550  DEALLOCATE (gamma_2d)
1551  DEALLOCATE (iii_vet)
1552  DEALLOCATE (req_send)
1553  IF (map_rec_size(para_env_sub%mepos) > 0) THEN
1554  DEALLOCATE (indeces_map_my)
1555  END IF
1556  DO rec_counter = 1, number_of_rec
1557  DEALLOCATE (indeces_rec(rec_counter)%map)
1558  DEALLOCATE (buffer_rec(rec_counter)%msg)
1559  END DO
1560  DEALLOCATE (indeces_rec)
1561  DEALLOCATE (buffer_rec)
1562  DO send_counter = 1, number_of_send
1563  DEALLOCATE (buffer_send(send_counter)%msg)
1564  END DO
1565  DEALLOCATE (buffer_send)
1566  DEALLOCATE (map_send_size)
1567  DEALLOCATE (map_rec_size)
1568  DEALLOCATE (grid_2_mepos)
1569  DEALLOCATE (mepos_2_grid)
1570 
1571  ! release buffer matrix
1572  CALL cp_fm_release(fm_ia)
1573 
1574  CALL timestop(handle)
1575 
1576  END SUBROUTINE create_dbcsr_gamma
1577 
1578 ! **************************************************************************************************
1579 !> \brief prepare array for redistribution
1580 !> \param para_env ...
1581 !> \param para_env_sub ...
1582 !> \param ngroup ...
1583 !> \param group_grid_2_mepos ...
1584 !> \param mepos_2_grid_group ...
1585 !> \param pos_info ...
1586 ! **************************************************************************************************
1587  SUBROUTINE prepare_redistribution(para_env, para_env_sub, ngroup, &
1588  group_grid_2_mepos, mepos_2_grid_group, &
1589  pos_info)
1590  TYPE(mp_para_env_type), INTENT(IN) :: para_env, para_env_sub
1591  INTEGER, INTENT(IN) :: ngroup
1592  INTEGER, ALLOCATABLE, DIMENSION(:, :), INTENT(OUT) :: group_grid_2_mepos, mepos_2_grid_group
1593  INTEGER, ALLOCATABLE, DIMENSION(:), INTENT(OUT), &
1594  OPTIONAL :: pos_info
1595 
1596  INTEGER :: i, pos_group, pos_sub
1597  INTEGER, ALLOCATABLE, DIMENSION(:) :: my_pos_info
1598 
1599  ALLOCATE (my_pos_info(0:para_env%num_pe - 1))
1600  CALL para_env%allgather(para_env_sub%mepos, my_pos_info)
1601 
1602  ALLOCATE (group_grid_2_mepos(0:para_env_sub%num_pe - 1, 0:ngroup - 1))
1603  group_grid_2_mepos = 0
1604  ALLOCATE (mepos_2_grid_group(2, 0:para_env%num_pe - 1))
1605  mepos_2_grid_group = 0
1606 
1607  DO i = 0, para_env%num_pe - 1
1608  ! calculate position of the group
1609  pos_group = i/para_env_sub%num_pe
1610  ! calculate position in the subgroup
1611  pos_sub = my_pos_info(i)
1612  ! fill the map from the grid of groups to process
1613  group_grid_2_mepos(pos_sub, pos_group) = i
1614  ! and the opposite, from the global pos to the grid pos
1615  mepos_2_grid_group(1, i) = pos_sub
1616  mepos_2_grid_group(2, i) = pos_group
1617  END DO
1618 
1619  IF (PRESENT(pos_info)) CALL move_alloc(my_pos_info, pos_info)
1620 
1621  END SUBROUTINE prepare_redistribution
1622 
1623 END MODULE mp2_ri_grad_util
static GRID_HOST_DEVICE int modulo(int a, int m)
Equivalent of Fortran's MODULO, which always return a positive number. https://gcc....
Definition: grid_common.h:117
methods related to the blacs parallel environment
Definition: cp_blacs_env.F:15
subroutine, public cp_blacs_env_release(blacs_env)
releases the given blacs_env
Definition: cp_blacs_env.F:282
subroutine, public cp_blacs_env_create(blacs_env, para_env, blacs_grid_layout, blacs_repeatable, row_major, grid_2d)
allocates and initializes a type that represent a blacs context
Definition: cp_blacs_env.F:123
DBCSR operations in CP2K.
subroutine, public cp_dbcsr_m_by_n_from_template(matrix, template, m, n, sym, data_type)
Utility function to create an arbitrary shaped dbcsr matrix with the same processor grid as the templ...
subroutine, public copy_fm_to_dbcsr(fm, matrix, keep_sparsity)
Copy a BLACS matrix to a dbcsr matrix.
basic linear algebra operations for full matrices
subroutine, public cp_fm_geadd(alpha, trans, matrix_a, beta, matrix_b)
interface to BLACS geadd: matrix_b = beta*matrix_b + alpha*opt(matrix_a) where opt(matrix_a) can be e...
represent the structure of a full matrix
Definition: cp_fm_struct.F:14
subroutine, public cp_fm_struct_create(fmstruct, para_env, context, nrow_global, ncol_global, nrow_block, ncol_block, descriptor, first_p_pos, local_leading_dimension, template_fmstruct, square_blocks, force_block)
allocates and initializes a full matrix structure
Definition: cp_fm_struct.F:125
subroutine, public cp_fm_struct_get(fmstruct, para_env, context, descriptor, ncol_block, nrow_block, nrow_global, ncol_global, first_p_pos, row_indices, col_indices, nrow_local, ncol_local, nrow_locals, ncol_locals, local_leading_dimension)
returns the values of various attributes of the matrix structure
Definition: cp_fm_struct.F:409
subroutine, public cp_fm_struct_release(fmstruct)
releases a full matrix structure
Definition: cp_fm_struct.F:320
represent a full matrix distributed on many processors
Definition: cp_fm_types.F:15
integer function, public cp_fm_indxl2g(INDXLOC, NB, IPROC, ISRCPROC, NPROCS)
wrapper to scalapack function INDXL2G that computes the global index of a distributed matrix entry po...
Definition: cp_fm_types.F:2585
integer function, public cp_fm_indxg2l(INDXGLOB, NB, IPROC, ISRCPROC, NPROCS)
wrapper to scalapack function INDXG2L that computes the local index of a distributed matrix entry poi...
Definition: cp_fm_types.F:2525
subroutine, public cp_fm_get_info(matrix, name, nrow_global, ncol_global, nrow_block, ncol_block, nrow_local, ncol_local, row_indices, col_indices, local_data, context, nrow_locals, ncol_locals, matrix_struct, para_env)
returns all kind of information about the full matrix
Definition: cp_fm_types.F:1016
integer function, public cp_fm_indxg2p(INDXGLOB, NB, IPROC, ISRCPROC, NPROCS)
wrapper to scalapack function INDXG2P that computes the process coordinate which possesses the entry ...
Definition: cp_fm_types.F:2466
subroutine, public cp_fm_set_all(matrix, alpha, beta)
set all elements of a matrix to the same value, and optionally the diagonal to a different one
Definition: cp_fm_types.F:535
subroutine, public cp_fm_create(matrix, matrix_struct, name, use_sp)
creates a new full matrix with the given structure
Definition: cp_fm_types.F:167
Types to describe group distributions.
Defines the basic variable types.
Definition: kinds.F:23
integer, parameter, public dp
Definition: kinds.F:34
2- and 3-center electron repulsion integral routines based on libint2 Currently available operators: ...
Definition: libint_2c_3c.F:14
pure logical function, public compare_potential_types(potential1, potential2)
Helper function to compare libint_potential_types.
Definition: libint_2c_3c.F:963
Interface to the message passing library MPI.
type(mp_request_type), parameter, public mp_request_null
Routines for calculating RI-MP2 gradients.
subroutine, public complete_gamma(mp2_env, B_ia_Q, dimen_RI, homo, virtual, para_env, para_env_sub, ngroup, my_group_L_size, my_group_L_start, my_group_L_end, my_B_size, my_B_virtual_start, gd_array, gd_B_virtual, kspin)
complete the calculation of the Gamma matrices
subroutine, public prepare_redistribution(para_env, para_env_sub, ngroup, group_grid_2_mepos, mepos_2_grid_group, pos_info)
prepare array for redistribution
subroutine, public array2fm(mat2D, fm_struct, my_start_row, my_end_row, my_start_col, my_end_col, gd_row, gd_col, group_grid_2_mepos, ngroup_row, ngroup_col, fm_mat, integ_group_size, color_group, do_release_mat)
redistribute local part of array to fm
subroutine, public create_dbcsr_gamma(Gamma_2D, homo, virtual, dimen_ia, para_env_sub, my_ia_start, my_ia_end, my_group_L_size, gd_ia, dbcsr_Gamma, mo_coeff_o)
redistribute 2D representation of 3d tensor to a set of dbcsr matrices
subroutine, public fm2array(mat2D, my_rows, my_start_row, my_end_row, my_cols, my_start_col, my_end_col, group_grid_2_mepos, mepos_2_grid_group, ngroup_row, ngroup_col, fm_mat)
redistribute fm to local part of array
Types needed for MP2 calculations.
Definition: mp2_types.F:14
basic linear algebra operations for full matrixes
All kind of helpful little routines.
Definition: util.F:14
pure integer function, dimension(2), public get_limit(m, n, me)
divide m entries into n parts, return size of part me
Definition: util.F:333