(git:b17b328)
Loading...
Searching...
No Matches
mp2_ri_grad_util.F
Go to the documentation of this file.
1!--------------------------------------------------------------------------------------------------!
2! CP2K: A general program to perform molecular dynamics simulations !
3! Copyright 2000-2025 CP2K developers group <https://cp2k.org> !
4! !
5! SPDX-License-Identifier: GPL-2.0-or-later !
6!--------------------------------------------------------------------------------------------------!
7
8! **************************************************************************************************
9!> \brief Routines for calculating RI-MP2 gradients
10!> \par History
11!> 10.2013 created [Mauro Del Ben]
12! **************************************************************************************************
14!
18 USE cp_dbcsr_api, ONLY: dbcsr_p_type,&
20 dbcsr_type_no_symmetry
28 USE cp_fm_types, ONLY: cp_fm_create,&
38 USE kinds, ONLY: dp
47 USE util, ONLY: get_limit
48#include "./base/base_uses.f90"
49
50 IMPLICIT NONE
51
52 PRIVATE
53
54 CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'mp2_ri_grad_util'
55
57
58 TYPE index_map
59 INTEGER, DIMENSION(:, :), ALLOCATABLE :: map
60 END TYPE
61
62CONTAINS
63
64! **************************************************************************************************
65!> \brief complete the calculation of the Gamma matrices
66!> \param mp2_env ...
67!> \param B_ia_Q ...
68!> \param dimen_RI ...
69!> \param homo ...
70!> \param virtual ...
71!> \param para_env ...
72!> \param para_env_sub ...
73!> \param ngroup ...
74!> \param my_group_L_size ...
75!> \param my_group_L_start ...
76!> \param my_group_L_end ...
77!> \param my_B_size ...
78!> \param my_B_virtual_start ...
79!> \param gd_array ...
80!> \param gd_B_virtual ...
81!> \param kspin ...
82!> \author Mauro Del Ben
83! **************************************************************************************************
84 SUBROUTINE complete_gamma(mp2_env, B_ia_Q, dimen_RI, homo, virtual, para_env, para_env_sub, ngroup, &
85 my_group_L_size, my_group_L_start, my_group_L_end, &
86 my_B_size, my_B_virtual_start, gd_array, gd_B_virtual, kspin)
87
88 TYPE(mp2_type) :: mp2_env
89 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :), &
90 INTENT(INOUT) :: b_ia_q
91 INTEGER, INTENT(IN) :: dimen_ri, homo, virtual
92 TYPE(mp_para_env_type), INTENT(IN), POINTER :: para_env, para_env_sub
93 INTEGER, INTENT(IN) :: ngroup, my_group_l_size, &
94 my_group_l_start, my_group_l_end, &
95 my_b_size, my_b_virtual_start
96 TYPE(group_dist_d1_type), INTENT(IN) :: gd_array, gd_b_virtual
97 INTEGER, INTENT(IN) :: kspin
98
99 CHARACTER(LEN=*), PARAMETER :: routinen = 'complete_gamma'
100
101 INTEGER :: dimen_ia, handle, i, ispin, kkb, my_ia_end, my_ia_size, my_ia_start, my_p_end, &
102 my_p_size, my_p_start, nspins, pos_group, pos_sub
103 INTEGER, ALLOCATABLE, DIMENSION(:) :: pos_info
104 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: group_grid_2_mepos, mepos_2_grid_group
105 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :) :: bib_c_2d, gamma_2d, gamma_pq
106 TYPE(cp_blacs_env_type), POINTER :: blacs_env
107 TYPE(cp_fm_struct_type), POINTER :: fm_struct_ia, fm_struct_ri
108 TYPE(cp_fm_type) :: fm_gamma, fm_gamma_pq, fm_gamma_pq_2, fm_gamma_pq_temp, &
109 fm_gamma_pq_temp_2, fm_ia_p, fm_y, operator_half, pq_half
110 TYPE(group_dist_d1_type) :: gd_array_new, gd_ia, gd_ia_new, gd_p, &
111 gd_p_new
112
113 CALL timeset(routinen, handle)
114
115 ! reshape the data into a 2D array
116 ! reorder the local data in such a way to help the next stage of matrix creation
117 ! now the data inside the group are divided into a ia x K matrix
118 dimen_ia = homo*virtual
119 CALL create_group_dist(gd_ia, para_env_sub%num_pe, dimen_ia)
120 CALL get_group_dist(gd_ia, para_env_sub%mepos, my_ia_start, my_ia_end, my_ia_size)
121
122 CALL mat_3d_to_2d(b_ia_q, bib_c_2d, homo, my_b_size, virtual, my_b_virtual_start, &
123 my_ia_start, my_ia_end, my_ia_size, my_group_l_size, para_env_sub, gd_b_virtual)
124
125 CALL mat_3d_to_2d_gamma(mp2_env%ri_grad%Gamma_P_ia(kspin)%array, gamma_2d, homo, my_b_size, virtual, my_b_virtual_start, &
126 my_ia_start, my_ia_end, my_ia_size, my_group_l_size, para_env_sub, gd_b_virtual)
127
128 ! create the processor map and size arrays
129 CALL create_group_dist(gd_ia_new, para_env%num_pe)
130 CALL create_group_dist(gd_array_new, para_env%num_pe)
131 CALL create_group_dist(gd_p, para_env_sub%num_pe, dimen_ri)
132 CALL create_group_dist(gd_p_new, para_env%num_pe)
133
134 CALL get_group_dist(gd_p, para_env_sub%mepos, my_p_start, my_p_end, my_p_size)
135
136 CALL prepare_redistribution(para_env, para_env_sub, ngroup, &
137 group_grid_2_mepos, mepos_2_grid_group, pos_info=pos_info)
138
139 DO i = 0, para_env%num_pe - 1
140 ! calculate position of the group
141 pos_group = i/para_env_sub%num_pe
142 ! calculate position in the subgroup
143 pos_sub = pos_info(i)
144 ! 1 -> rows, 2 -> cols
145 CALL get_group_dist(gd_ia, pos_sub, gd_ia_new, i)
146 CALL get_group_dist(gd_array, pos_group, gd_array_new, i)
147 CALL get_group_dist(gd_p, pos_sub, gd_p_new, i)
148 END DO
149
150 ! create the blacs env
151 NULLIFY (blacs_env)
152 CALL cp_blacs_env_create(blacs_env=blacs_env, para_env=para_env)
153
154 NULLIFY (fm_struct_ia)
155 CALL cp_fm_struct_create(fm_struct_ia, context=blacs_env, nrow_global=dimen_ia, &
156 ncol_global=dimen_ri, para_env=para_env)
157
158 ! create the fm matrix Gamma
159 CALL array2fm(gamma_2d, fm_struct_ia, my_ia_start, my_ia_end, &
160 my_group_l_start, my_group_l_end, &
161 gd_ia_new, gd_array_new, &
162 group_grid_2_mepos, para_env_sub%num_pe, ngroup, &
163 fm_y)
164 ! create the fm matrix B_ia_P
165 CALL array2fm(bib_c_2d, fm_struct_ia, my_ia_start, my_ia_end, &
166 my_group_l_start, my_group_l_end, &
167 gd_ia_new, gd_array_new, &
168 group_grid_2_mepos, para_env_sub%num_pe, ngroup, &
169 fm_ia_p)
170
171 NULLIFY (fm_struct_ri)
172 CALL cp_fm_struct_create(fm_struct_ri, context=blacs_env, nrow_global=dimen_ri, &
173 ncol_global=dimen_ri, para_env=para_env)
174
175 ! since we will need (P|Q)^(-1/2) in the future, make a copy
176 CALL array2fm(mp2_env%ri_grad%PQ_half, fm_struct_ri, my_p_start, my_p_end, &
177 my_group_l_start, my_group_l_end, &
178 gd_p_new, gd_array_new, &
179 group_grid_2_mepos, para_env_sub%num_pe, ngroup, &
180 pq_half, do_release_mat=.false.)
181
182 IF (.NOT. compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter)) THEN
183 CALL array2fm(mp2_env%ri_grad%operator_half, fm_struct_ri, my_p_start, my_p_end, &
184 my_group_l_start, my_group_l_end, &
185 gd_p_new, gd_array_new, &
186 group_grid_2_mepos, para_env_sub%num_pe, ngroup, &
187 operator_half, do_release_mat=.false.)
188 END IF
189
190 CALL release_group_dist(gd_p_new)
191 CALL release_group_dist(gd_ia_new)
192 CALL release_group_dist(gd_array_new)
193
194 ! complete the gamma matrix Gamma_ia^P = sum_Q (Y_ia^Q * (Q|P)^(-1/2))
195 IF (compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter)) THEN
196 CALL cp_fm_create(fm_gamma, fm_struct_ia, name="fm_Gamma")
197 CALL cp_fm_struct_release(fm_struct_ia)
198 ! perform the matrix multiplication
199 CALL parallel_gemm(transa="N", transb="T", m=dimen_ia, n=dimen_ri, k=dimen_ri, alpha=1.0_dp, &
200 matrix_a=fm_y, matrix_b=pq_half, beta=0.0_dp, &
201 matrix_c=fm_gamma)
202 ! release the Y matrix
203 CALL cp_fm_release(fm_y)
204
205 ! complete gamma small (fm_Gamma_PQ)
206 ! create temp matrix
207 CALL cp_fm_create(fm_gamma_pq_temp, fm_struct_ri, name="fm_Gamma_PQ_temp")
208 CALL parallel_gemm(transa="T", transb="N", m=dimen_ri, n=dimen_ri, k=dimen_ia, alpha=1.0_dp, &
209 matrix_a=fm_gamma, matrix_b=fm_ia_p, beta=0.0_dp, &
210 matrix_c=fm_gamma_pq_temp)
211 CALL cp_fm_release(fm_ia_p)
212 ! create fm_Gamma_PQ matrix
213 CALL cp_fm_create(fm_gamma_pq, fm_struct_ri, name="fm_Gamma_PQ")
214 CALL cp_fm_struct_release(fm_struct_ri)
215 ! perform matrix multiplication
216 CALL parallel_gemm(transa="N", transb="T", m=dimen_ri, n=dimen_ri, k=dimen_ri, alpha=1.0_dp, &
217 matrix_a=fm_gamma_pq_temp, matrix_b=pq_half, beta=0.0_dp, &
218 matrix_c=fm_gamma_pq)
219 CALL cp_fm_release(fm_gamma_pq_temp)
220 CALL cp_fm_release(pq_half)
221
222 ELSE
223 ! create temp matrix
224 CALL cp_fm_create(fm_gamma_pq_temp, fm_struct_ri, name="fm_Gamma_PQ_temp")
225 CALL parallel_gemm(transa="T", transb="N", m=dimen_ri, n=dimen_ri, k=dimen_ia, alpha=1.0_dp, &
226 matrix_a=fm_y, matrix_b=fm_ia_p, beta=0.0_dp, &
227 matrix_c=fm_gamma_pq_temp)
228 CALL cp_fm_release(fm_ia_p)
229
230 CALL cp_fm_create(fm_gamma, fm_struct_ia, name="fm_Gamma")
231 CALL cp_fm_struct_release(fm_struct_ia)
232 ! perform the matrix multiplication
233 CALL parallel_gemm(transa="N", transb="T", m=dimen_ia, n=dimen_ri, k=dimen_ri, alpha=1.0_dp, &
234 matrix_a=fm_y, matrix_b=pq_half, beta=0.0_dp, &
235 matrix_c=fm_gamma)
236 CALL cp_fm_release(fm_y)
237
238 CALL cp_fm_create(fm_gamma_pq_temp_2, fm_struct_ri, name="fm_Gamma_PQ_temp_2")
239 CALL parallel_gemm(transa="N", transb="T", m=dimen_ri, n=dimen_ri, k=dimen_ri, alpha=1.0_dp, &
240 matrix_a=fm_gamma_pq_temp, matrix_b=operator_half, beta=0.0_dp, &
241 matrix_c=fm_gamma_pq_temp_2)
242
243 CALL cp_fm_create(fm_gamma_pq_2, fm_struct_ri, name="fm_Gamma_PQ_2")
244 CALL parallel_gemm(transa="N", transb="N", m=dimen_ri, n=dimen_ri, k=dimen_ri, alpha=1.0_dp, &
245 matrix_a=pq_half, matrix_b=fm_gamma_pq_temp_2, beta=0.0_dp, &
246 matrix_c=fm_gamma_pq_temp)
247 CALL cp_fm_to_fm(fm_gamma_pq_temp, fm_gamma_pq_2)
248 CALL cp_fm_geadd(1.0_dp, "T", fm_gamma_pq_temp, 1.0_dp, fm_gamma_pq_2)
249 CALL cp_fm_release(fm_gamma_pq_temp)
250 CALL cp_fm_release(pq_half)
251
252 CALL cp_fm_create(fm_gamma_pq, fm_struct_ri)
253 CALL cp_fm_struct_release(fm_struct_ri)
254 CALL parallel_gemm(transa="N", transb="N", m=dimen_ri, n=dimen_ri, k=dimen_ri, alpha=-1.0_dp, &
255 matrix_a=operator_half, matrix_b=fm_gamma_pq_temp_2, beta=0.0_dp, &
256 matrix_c=fm_gamma_pq)
257 CALL cp_fm_release(operator_half)
258 CALL cp_fm_release(fm_gamma_pq_temp_2)
259 END IF
260
261 ! now redistribute the data, in this case we go back
262 ! to the original way the integrals were distributed
263 CALL fm2array(gamma_2d, my_ia_start, my_ia_end, &
264 my_group_l_start, my_group_l_end, &
265 group_grid_2_mepos, mepos_2_grid_group, &
266 para_env_sub%num_pe, ngroup, &
267 fm_gamma)
268
269 ALLOCATE (gamma_pq(my_p_size, my_group_l_size))
270 CALL fm2array(gamma_pq, my_p_start, my_p_end, &
271 my_group_l_start, my_group_l_end, &
272 group_grid_2_mepos, mepos_2_grid_group, &
273 para_env_sub%num_pe, ngroup, &
274 fm_gamma_pq)
275 IF (.NOT. ALLOCATED(mp2_env%ri_grad%Gamma_PQ)) THEN
276 CALL move_alloc(gamma_pq, mp2_env%ri_grad%Gamma_PQ)
277 ELSE
278 mp2_env%ri_grad%Gamma_PQ(:, :) = mp2_env%ri_grad%Gamma_PQ + gamma_pq
279 DEALLOCATE (gamma_pq)
280 END IF
281
282 IF (.NOT. compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter)) THEN
283 ALLOCATE (gamma_pq(my_p_size, my_group_l_size))
284 CALL fm2array(gamma_pq, my_p_start, my_p_end, &
285 my_group_l_start, my_group_l_end, &
286 group_grid_2_mepos, mepos_2_grid_group, &
287 para_env_sub%num_pe, ngroup, &
288 fm_gamma_pq_2)
289 IF (.NOT. ALLOCATED(mp2_env%ri_grad%Gamma_PQ_2)) THEN
290 CALL move_alloc(gamma_pq, mp2_env%ri_grad%Gamma_PQ_2)
291 ELSE
292 mp2_env%ri_grad%Gamma_PQ_2(:, :) = mp2_env%ri_grad%Gamma_PQ_2 + gamma_pq
293 DEALLOCATE (gamma_pq)
294 END IF
295 END IF
296
297 ! allocate G_P_ia (DBCSR)
298 IF (.NOT. ALLOCATED(mp2_env%ri_grad%G_P_ia)) THEN
299 nspins = SIZE(mp2_env%ri_grad%mo_coeff_o)
300 ALLOCATE (mp2_env%ri_grad%G_P_ia(my_group_l_size, nspins))
301 DO ispin = 1, nspins
302 DO kkb = 1, my_group_l_size
303 NULLIFY (mp2_env%ri_grad%G_P_ia(kkb, ispin)%matrix)
304 END DO
305 END DO
306 END IF
307
308 ! create the Gamma_ia_P in DBCSR style
309 CALL create_dbcsr_gamma(gamma_2d, homo, virtual, dimen_ia, para_env_sub, &
310 my_ia_start, my_ia_end, my_group_l_size, gd_ia, &
311 mp2_env%ri_grad%G_P_ia(:, kspin), mp2_env%ri_grad%mo_coeff_o(1)%matrix)
312
313 DEALLOCATE (pos_info)
314 DEALLOCATE (group_grid_2_mepos, mepos_2_grid_group)
315 CALL release_group_dist(gd_ia)
316 CALL release_group_dist(gd_p)
317
318 ! release blacs_env
319 CALL cp_blacs_env_release(blacs_env)
320
321 CALL timestop(handle)
322
323 END SUBROUTINE complete_gamma
324
325! **************************************************************************************************
326!> \brief Redistribute a 3d matrix to a 2d matrix
327!> \param B_ia_Q ...
328!> \param BIb_C_2D ...
329!> \param homo ...
330!> \param my_B_size ...
331!> \param virtual ...
332!> \param my_B_virtual_start ...
333!> \param my_ia_start ...
334!> \param my_ia_end ...
335!> \param my_ia_size ...
336!> \param my_group_L_size ...
337!> \param para_env_sub ...
338!> \param gd_B_virtual ...
339! **************************************************************************************************
340 SUBROUTINE mat_3d_to_2d(B_ia_Q, BIb_C_2D, homo, my_B_size, virtual, my_B_virtual_start, &
341 my_ia_start, my_ia_end, my_ia_size, my_group_L_size, para_env_sub, gd_B_virtual)
342 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :), &
343 INTENT(INOUT) :: b_ia_q
344 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :), &
345 INTENT(OUT) :: bib_c_2d
346 INTEGER, INTENT(IN) :: homo, my_b_size, virtual, &
347 my_b_virtual_start, my_ia_start, &
348 my_ia_end, my_ia_size, my_group_l_size
349 TYPE(mp_para_env_type), INTENT(IN) :: para_env_sub
350 TYPE(group_dist_d1_type), INTENT(IN) :: gd_b_virtual
351
352 CHARACTER(LEN=*), PARAMETER :: routinen = 'mat_3d_to_2d'
353
354 INTEGER :: handle, ia_global, iib, jjb, proc_receive, proc_send, proc_shift, rec_b_size, &
355 rec_b_virtual_end, rec_b_virtual_start
356 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :) :: bib_c_rec
357
358 CALL timeset(routinen, handle)
359
360 ALLOCATE (bib_c_2d(my_ia_size, my_group_l_size))
361 bib_c_2d = 0.0_dp
362
363 DO iib = 1, homo
364 DO jjb = 1, my_b_size
365 ia_global = (iib - 1)*virtual + my_b_virtual_start + jjb - 1
366 IF (ia_global >= my_ia_start .AND. ia_global <= my_ia_end) THEN
367 bib_c_2d(ia_global - my_ia_start + 1, :) = b_ia_q(iib, jjb, 1:my_group_l_size)
368 END IF
369 END DO
370 END DO
371
372 DO proc_shift = 1, para_env_sub%num_pe - 1
373 proc_send = modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
374 proc_receive = modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
375
376 CALL get_group_dist(gd_b_virtual, proc_receive, rec_b_virtual_start, rec_b_virtual_end, rec_b_size)
377
378 ALLOCATE (bib_c_rec(homo, rec_b_size, my_group_l_size))
379 bib_c_rec = 0.0_dp
380
381 CALL para_env_sub%sendrecv(b_ia_q, proc_send, &
382 bib_c_rec, proc_receive)
383
384 DO iib = 1, homo
385 DO jjb = 1, rec_b_size
386 ia_global = (iib - 1)*virtual + rec_b_virtual_start + jjb - 1
387 IF (ia_global >= my_ia_start .AND. ia_global <= my_ia_end) THEN
388 bib_c_2d(ia_global - my_ia_start + 1, :) = bib_c_rec(iib, jjb, 1:my_group_l_size)
389 END IF
390 END DO
391 END DO
392
393 DEALLOCATE (bib_c_rec)
394 END DO
395 DEALLOCATE (b_ia_q)
396
397 CALL timestop(handle)
398 END SUBROUTINE mat_3d_to_2d
399
400! **************************************************************************************************
401!> \brief Redistribute a 3d matrix to a 2d matrix, specialized for Gamma_P_ia to account for a different order of indices
402!> \param B_ia_Q ...
403!> \param BIb_C_2D ...
404!> \param homo ...
405!> \param my_B_size ...
406!> \param virtual ...
407!> \param my_B_virtual_start ...
408!> \param my_ia_start ...
409!> \param my_ia_end ...
410!> \param my_ia_size ...
411!> \param my_group_L_size ...
412!> \param para_env_sub ...
413!> \param gd_B_virtual ...
414! **************************************************************************************************
415 SUBROUTINE mat_3d_to_2d_gamma(B_ia_Q, BIb_C_2D, homo, my_B_size, virtual, my_B_virtual_start, &
416 my_ia_start, my_ia_end, my_ia_size, my_group_L_size, para_env_sub, gd_B_virtual)
417 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :), &
418 INTENT(INOUT) :: b_ia_q
419 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :), &
420 INTENT(OUT) :: bib_c_2d
421 INTEGER, INTENT(IN) :: homo, my_b_size, virtual, &
422 my_b_virtual_start, my_ia_start, &
423 my_ia_end, my_ia_size, my_group_l_size
424 TYPE(mp_para_env_type), INTENT(IN) :: para_env_sub
425 TYPE(group_dist_d1_type), INTENT(IN) :: gd_b_virtual
426
427 CHARACTER(LEN=*), PARAMETER :: routinen = 'mat_3d_to_2d_gamma'
428
429 INTEGER :: handle, ia_global, iib, jjb, proc_receive, proc_send, proc_shift, rec_b_size, &
430 rec_b_virtual_end, rec_b_virtual_start
431 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :) :: bib_c_rec
432
433 CALL timeset(routinen, handle)
434
435 ALLOCATE (bib_c_2d(my_ia_size, my_group_l_size))
436 bib_c_2d = 0.0_dp
437
438 DO iib = 1, homo
439 DO jjb = 1, my_b_size
440 ia_global = (iib - 1)*virtual + my_b_virtual_start + jjb - 1
441 IF (ia_global >= my_ia_start .AND. ia_global <= my_ia_end) THEN
442 bib_c_2d(ia_global - my_ia_start + 1, :) = b_ia_q(jjb, iib, 1:my_group_l_size)
443 END IF
444 END DO
445 END DO
446
447 DO proc_shift = 1, para_env_sub%num_pe - 1
448 proc_send = modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
449 proc_receive = modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
450
451 CALL get_group_dist(gd_b_virtual, proc_receive, rec_b_virtual_start, rec_b_virtual_end, rec_b_size)
452
453 ALLOCATE (bib_c_rec(rec_b_size, homo, my_group_l_size))
454 bib_c_rec = 0.0_dp
455
456 CALL para_env_sub%sendrecv(b_ia_q, proc_send, &
457 bib_c_rec, proc_receive)
458
459 DO iib = 1, homo
460 DO jjb = 1, rec_b_size
461 ia_global = (iib - 1)*virtual + rec_b_virtual_start + jjb - 1
462 IF (ia_global >= my_ia_start .AND. ia_global <= my_ia_end) THEN
463 bib_c_2d(ia_global - my_ia_start + 1, :) = bib_c_rec(jjb, iib, 1:my_group_l_size)
464 END IF
465 END DO
466 END DO
467
468 DEALLOCATE (bib_c_rec)
469 END DO
470 DEALLOCATE (b_ia_q)
471
472 CALL timestop(handle)
473 END SUBROUTINE mat_3d_to_2d_gamma
474
475! **************************************************************************************************
476!> \brief redistribute local part of array to fm
477!> \param mat2D ...
478!> \param fm_struct ...
479!> \param my_start_row ...
480!> \param my_end_row ...
481!> \param my_start_col ...
482!> \param my_end_col ...
483!> \param gd_row ...
484!> \param gd_col ...
485!> \param group_grid_2_mepos ...
486!> \param ngroup_row ...
487!> \param ngroup_col ...
488!> \param fm_mat ...
489!> \param integ_group_size ...
490!> \param color_group ...
491!> \param do_release_mat whether to release the array (default: yes)
492! **************************************************************************************************
493 SUBROUTINE array2fm(mat2D, fm_struct, my_start_row, my_end_row, &
494 my_start_col, my_end_col, &
495 gd_row, gd_col, &
496 group_grid_2_mepos, ngroup_row, ngroup_col, &
497 fm_mat, integ_group_size, color_group, do_release_mat)
498
499 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :), &
500 INTENT(INOUT) :: mat2d
501 TYPE(cp_fm_struct_type), POINTER :: fm_struct
502 INTEGER, INTENT(IN) :: my_start_row, my_end_row, my_start_col, &
503 my_end_col
504 TYPE(group_dist_d1_type), INTENT(IN) :: gd_row, gd_col
505 INTEGER, ALLOCATABLE, DIMENSION(:, :), INTENT(IN) :: group_grid_2_mepos
506 INTEGER, INTENT(IN) :: ngroup_row, ngroup_col
507 TYPE(cp_fm_type), INTENT(OUT) :: fm_mat
508 INTEGER, INTENT(IN), OPTIONAL :: integ_group_size, color_group
509 LOGICAL, INTENT(IN), OPTIONAL :: do_release_mat
510
511 CHARACTER(LEN=*), PARAMETER :: routinen = 'array2fm'
512
513 INTEGER :: dummy_proc, end_col_block, end_row_block, handle, handle2, i_global, i_local, &
514 i_sub, iib, iii, itmp(2), j_global, j_local, j_sub, jjb, my_num_col_blocks, &
515 my_num_row_blocks, mypcol, myprow, ncol_local, npcol, nprow, nrow_local, num_cols, &
516 num_rec_cols, num_rows, number_of_rec, number_of_send, proc_receive, proc_send, &
517 proc_shift, rec_col_end, rec_col_size, rec_col_start, rec_counter, rec_pcol, rec_prow, &
518 rec_row_end, rec_row_size, rec_row_start, ref_send_pcol, ref_send_prow, send_counter, &
519 send_pcol, send_prow, size_rec_buffer, size_send_buffer, start_col_block, start_row_block
520 INTEGER, ALLOCATABLE, DIMENSION(:) :: iii_vet, index_col_rec, map_rec_size, &
521 map_send_size
522 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: blocks_ranges_col, blocks_ranges_row, &
523 grid_2_mepos, grid_ref_2_send_pos, &
524 mepos_2_grid
525 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
526 LOGICAL :: convert_pos, my_do_release_mat
527 REAL(kind=dp) :: part_col, part_row
528 TYPE(integ_mat_buffer_type), ALLOCATABLE, &
529 DIMENSION(:) :: buffer_rec, buffer_send
530 TYPE(mp_para_env_type), POINTER :: para_env
531 TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:) :: req_send
532
533 CALL timeset(routinen, handle)
534
535 my_do_release_mat = .true.
536 IF (PRESENT(do_release_mat)) my_do_release_mat = do_release_mat
537
538 CALL cp_fm_struct_get(fm_struct, para_env=para_env, nrow_global=num_rows, ncol_global=num_cols)
539
540 ! create the full matrix, (num_rows x num_cols)
541 CALL cp_fm_create(fm_mat, fm_struct, name="fm_mat")
542 CALL cp_fm_set_all(matrix=fm_mat, alpha=0.0_dp)
543
544 ! start filling procedure
545 ! fill the matrix
546 CALL cp_fm_get_info(matrix=fm_mat, &
547 nrow_local=nrow_local, &
548 ncol_local=ncol_local, &
549 row_indices=row_indices, &
550 col_indices=col_indices)
551 myprow = fm_mat%matrix_struct%context%mepos(1)
552 mypcol = fm_mat%matrix_struct%context%mepos(2)
553 nprow = fm_mat%matrix_struct%context%num_pe(1)
554 npcol = fm_mat%matrix_struct%context%num_pe(2)
555
556 ! start the communication part
557 ! 0) create array containing the processes position
558 ! and supporting infos
559 CALL timeset(routinen//"_info", handle2)
560 ALLOCATE (grid_2_mepos(0:nprow - 1, 0:npcol - 1))
561 grid_2_mepos = 0
562 ALLOCATE (mepos_2_grid(2, 0:para_env%num_pe - 1))
563 grid_2_mepos(myprow, mypcol) = para_env%mepos
564 ! sum infos
565 CALL para_env%sum(grid_2_mepos)
566 CALL para_env%allgather([myprow, mypcol], mepos_2_grid)
567
568 ! 1) loop over my local data and define a map for the proc to send data
569 ALLOCATE (map_send_size(0:para_env%num_pe - 1))
570 map_send_size = 0
571 dummy_proc = 0
572 DO jjb = my_start_col, my_end_col
573 send_pcol = fm_mat%matrix_struct%g2p_col(jjb)
574 DO iib = my_start_row, my_end_row
575 send_prow = fm_mat%matrix_struct%g2p_row(iib)
576 proc_send = grid_2_mepos(send_prow, send_pcol)
577 map_send_size(proc_send) = map_send_size(proc_send) + 1
578 END DO
579 END DO
580
581 ! 2) loop over my local data of fm_mat and define a map for the proc from which rec data
582 ALLOCATE (map_rec_size(0:para_env%num_pe - 1))
583 map_rec_size = 0
584 part_row = real(num_rows, kind=dp)/real(ngroup_row, kind=dp)
585 part_col = real(num_cols, kind=dp)/real(ngroup_col, kind=dp)
586
587 ! In some cases we have to convert global positions to positions in a subgroup (RPA)
588 convert_pos = .false.
589 IF (PRESENT(integ_group_size) .AND. PRESENT(color_group)) convert_pos = .true.
590
591 DO jjb = 1, nrow_local
592 j_global = row_indices(jjb)
593 ! check the group holding this element
594 ! dirty way, if someone has a better idea ...
595 rec_prow = int(real(j_global - 1, kind=dp)/part_row)
596 rec_prow = max(0, rec_prow)
597 rec_prow = min(rec_prow, ngroup_row - 1)
598 DO
599 itmp = get_limit(num_rows, ngroup_row, rec_prow)
600 IF (j_global >= itmp(1) .AND. j_global <= itmp(2)) EXIT
601 IF (j_global < itmp(1)) rec_prow = rec_prow - 1
602 IF (j_global > itmp(2)) rec_prow = rec_prow + 1
603 END DO
604
605 IF (convert_pos) THEN
606 ! if the group is not in the same RPA group cycle
607 IF ((rec_prow/integ_group_size) .NE. color_group) cycle
608 ! convert global position to position into the RPA group
609 rec_prow = mod(rec_prow, integ_group_size)
610 END IF
611
612 DO iib = 1, ncol_local
613 i_global = col_indices(iib)
614 ! check the process in the group holding this element
615 rec_pcol = int(real(i_global - 1, kind=dp)/part_col)
616 rec_pcol = max(0, rec_pcol)
617 rec_pcol = min(rec_pcol, ngroup_col - 1)
618 DO
619 itmp = get_limit(num_cols, ngroup_col, rec_pcol)
620 IF (i_global >= itmp(1) .AND. i_global <= itmp(2)) EXIT
621 IF (i_global < itmp(1)) rec_pcol = rec_pcol - 1
622 IF (i_global > itmp(2)) rec_pcol = rec_pcol + 1
623 END DO
624
625 proc_receive = group_grid_2_mepos(rec_prow, rec_pcol)
626
627 map_rec_size(proc_receive) = map_rec_size(proc_receive) + 1
628
629 END DO ! i_global
630 END DO ! j_global
631
632 ! 3) check if the local data has to be stored in the new fm_mat
633 IF (map_rec_size(para_env%mepos) > 0) THEN
634 DO jjb = 1, ncol_local
635 j_global = col_indices(jjb)
636 IF (j_global >= my_start_col .AND. j_global <= my_end_col) THEN
637 DO iib = 1, nrow_local
638 i_global = row_indices(iib)
639 IF (i_global >= my_start_row .AND. i_global <= my_end_row) THEN
640 fm_mat%local_data(iib, jjb) = mat2d(i_global - my_start_row + 1, j_global - my_start_col + 1)
641 END IF
642 END DO
643 END IF
644 END DO
645 END IF
646 CALL timestop(handle2)
647
648 ! 4) reorder data in the send_buffer
649 CALL timeset(routinen//"_buffer_send", handle2)
650 ! count the number of messages to send
651 number_of_send = 0
652 DO proc_shift = 1, para_env%num_pe - 1
653 proc_send = modulo(para_env%mepos + proc_shift, para_env%num_pe)
654 IF (map_send_size(proc_send) > 0) THEN
655 number_of_send = number_of_send + 1
656 END IF
657 END DO
658 ! allocate the structure that will hold the messages to be sent
659 ALLOCATE (buffer_send(number_of_send))
660 ! grid_ref_2_send_pos is an array (map) that given a pair
661 ! (ref_send_prow,ref_send_pcol) returns
662 ! the position in the buffer_send associated to that process
663 ALLOCATE (grid_ref_2_send_pos(0:nprow - 1, 0:npcol - 1))
664 grid_ref_2_send_pos = 0
665 ! finalize the allocation of buffer_send with the actual size
666 ! of each message (actual size is size_send_buffer)
667 send_counter = 0
668 DO proc_shift = 1, para_env%num_pe - 1
669 proc_send = modulo(para_env%mepos + proc_shift, para_env%num_pe)
670 size_send_buffer = map_send_size(proc_send)
671 IF (map_send_size(proc_send) > 0) THEN
672 send_counter = send_counter + 1
673 ! allocate the sending buffer (msg)
674 ALLOCATE (buffer_send(send_counter)%msg(size_send_buffer))
675 buffer_send(send_counter)%msg = 0.0_dp
676 buffer_send(send_counter)%proc = proc_send
677 ! get the pointer to prow, pcol of the process that has
678 ! to receive this message
679 ref_send_prow = mepos_2_grid(1, proc_send)
680 ref_send_pcol = mepos_2_grid(2, proc_send)
681 ! save the rank of the process that has to receive this message
682 grid_ref_2_send_pos(ref_send_prow, ref_send_pcol) = send_counter
683 END IF
684 END DO
685
686 ! loop over the locally held data and fill the buffer_send
687 ! for doing that we need an array that keep track if the
688 ! sequential increase of the index for each message
689 ALLOCATE (iii_vet(number_of_send))
690 iii_vet = 0
691 DO iib = my_start_row, my_end_row
692 send_prow = fm_mat%matrix_struct%g2p_row(iib)
693 DO jjb = my_start_col, my_end_col
694 send_pcol = fm_mat%matrix_struct%g2p_col(jjb)
695 ! we don't need to send to ourselves
696 IF (grid_2_mepos(send_prow, send_pcol) == para_env%mepos) cycle
697
698 send_counter = grid_ref_2_send_pos(send_prow, send_pcol)
699 iii_vet(send_counter) = iii_vet(send_counter) + 1
700 iii = iii_vet(send_counter)
701 buffer_send(send_counter)%msg(iii) = mat2d(iib - my_start_row + 1, jjb - my_start_col + 1)
702 END DO
703 END DO
704
705 DEALLOCATE (iii_vet)
706 DEALLOCATE (grid_ref_2_send_pos)
707 IF (my_do_release_mat) DEALLOCATE (mat2d)
708 CALL timestop(handle2)
709
710 ! 5) similarly to what done for the buffer_send
711 ! create the buffer for receive, post the message with irecv
712 ! and send the messages non-blocking
713 CALL timeset(routinen//"_isendrecv", handle2)
714 ! count the number of messages to be received
715 number_of_rec = 0
716 DO proc_shift = 1, para_env%num_pe - 1
717 proc_receive = modulo(para_env%mepos - proc_shift, para_env%num_pe)
718 IF (map_rec_size(proc_receive) > 0) THEN
719 number_of_rec = number_of_rec + 1
720 END IF
721 END DO
722
723 ALLOCATE (buffer_rec(number_of_rec))
724
725 rec_counter = 0
726 DO proc_shift = 1, para_env%num_pe - 1
727 proc_receive = modulo(para_env%mepos - proc_shift, para_env%num_pe)
728 size_rec_buffer = map_rec_size(proc_receive)
729 IF (map_rec_size(proc_receive) > 0) THEN
730 rec_counter = rec_counter + 1
731 ! prepare the buffer for receive
732 ALLOCATE (buffer_rec(rec_counter)%msg(size_rec_buffer))
733 buffer_rec(rec_counter)%msg = 0.0_dp
734 buffer_rec(rec_counter)%proc = proc_receive
735 ! post the message to be received
736 CALL para_env%irecv(buffer_rec(rec_counter)%msg, proc_receive, &
737 buffer_rec(rec_counter)%msg_req)
738 END IF
739 END DO
740
741 ! send messages
742 ALLOCATE (req_send(number_of_send))
743 send_counter = 0
744 DO proc_shift = 1, para_env%num_pe - 1
745 proc_send = modulo(para_env%mepos + proc_shift, para_env%num_pe)
746 IF (map_send_size(proc_send) > 0) THEN
747 send_counter = send_counter + 1
748 CALL para_env%isend(buffer_send(send_counter)%msg, proc_send, &
749 buffer_send(send_counter)%msg_req)
750 req_send(send_counter) = buffer_send(send_counter)%msg_req
751 END IF
752 END DO
753 CALL timestop(handle2)
754
755 ! 6) fill the fm_mat matrix with the messages received from the
756 ! other processes
757 CALL timeset(routinen//"_fill", handle2)
758 ! In order to perform this step efficiently first we have to know the
759 ! ranges of the blocks over which a given process hold its local data.
760 ! Start with the rows ...
761 my_num_row_blocks = 1
762 DO iib = 1, nrow_local - 1
763 IF (abs(row_indices(iib + 1) - row_indices(iib)) == 1) cycle
764 my_num_row_blocks = my_num_row_blocks + 1
765 END DO
766 ALLOCATE (blocks_ranges_row(2, my_num_row_blocks))
767 blocks_ranges_row = 0
768 blocks_ranges_row(1, 1) = row_indices(1)
769 iii = 1
770 DO iib = 1, nrow_local - 1
771 IF (abs(row_indices(iib + 1) - row_indices(iib)) == 1) cycle
772 iii = iii + 1
773 blocks_ranges_row(2, iii - 1) = row_indices(iib)
774 blocks_ranges_row(1, iii) = row_indices(iib + 1)
775 END DO
776 blocks_ranges_row(2, my_num_row_blocks) = row_indices(max(nrow_local, 1))
777 ! ... and columns
778 my_num_col_blocks = 1
779 DO jjb = 1, ncol_local - 1
780 IF (abs(col_indices(jjb + 1) - col_indices(jjb)) == 1) cycle
781 my_num_col_blocks = my_num_col_blocks + 1
782 END DO
783 ALLOCATE (blocks_ranges_col(2, my_num_col_blocks))
784 blocks_ranges_col = 0
785 blocks_ranges_col(1, 1) = col_indices(1)
786 iii = 1
787 DO jjb = 1, ncol_local - 1
788 IF (abs(col_indices(jjb + 1) - col_indices(jjb)) == 1) cycle
789 iii = iii + 1
790 blocks_ranges_col(2, iii - 1) = col_indices(jjb)
791 blocks_ranges_col(1, iii) = col_indices(jjb + 1)
792 END DO
793 blocks_ranges_col(2, my_num_col_blocks) = col_indices(max(ncol_local, 1))
794
795 rec_counter = 0
796 DO proc_shift = 1, para_env%num_pe - 1
797 proc_receive = modulo(para_env%mepos - proc_shift, para_env%num_pe)
798 size_rec_buffer = map_rec_size(proc_receive)
799
800 IF (map_rec_size(proc_receive) > 0) THEN
801 rec_counter = rec_counter + 1
802
803 CALL get_group_dist(gd_col, proc_receive, rec_col_start, rec_col_end, rec_col_size)
804
805 ! precompute the number of received columns and relative index
806 num_rec_cols = 0
807 DO jjb = 1, my_num_col_blocks
808 start_col_block = max(blocks_ranges_col(1, jjb), rec_col_start)
809 end_col_block = min(blocks_ranges_col(2, jjb), rec_col_end)
810 DO j_sub = start_col_block, end_col_block
811 num_rec_cols = num_rec_cols + 1
812 END DO
813 END DO
814 ALLOCATE (index_col_rec(num_rec_cols))
815 index_col_rec = 0
816 iii = 0
817 DO jjb = 1, my_num_col_blocks
818 start_col_block = max(blocks_ranges_col(1, jjb), rec_col_start)
819 end_col_block = min(blocks_ranges_col(2, jjb), rec_col_end)
820 DO j_sub = start_col_block, end_col_block
821 iii = iii + 1
822 j_local = fm_mat%matrix_struct%g2l_col(j_sub)
823 index_col_rec(iii) = j_local
824 END DO
825 END DO
826
827 CALL get_group_dist(gd_row, proc_receive, rec_row_start, rec_row_end, rec_row_size)
828
829 ! wait for the message
830 CALL buffer_rec(rec_counter)%msg_req%wait()
831
832 ! fill the local data in fm_mat
833 iii = 0
834 DO iib = 1, my_num_row_blocks
835 start_row_block = max(blocks_ranges_row(1, iib), rec_row_start)
836 end_row_block = min(blocks_ranges_row(2, iib), rec_row_end)
837 DO i_sub = start_row_block, end_row_block
838 i_local = fm_mat%matrix_struct%g2l_row(i_sub)
839 DO jjb = 1, num_rec_cols
840 iii = iii + 1
841 j_local = index_col_rec(jjb)
842 fm_mat%local_data(i_local, j_local) = buffer_rec(rec_counter)%msg(iii)
843 END DO
844 END DO
845 END DO
846
847 DEALLOCATE (buffer_rec(rec_counter)%msg)
848 DEALLOCATE (index_col_rec)
849 END IF
850 END DO
851 DEALLOCATE (buffer_rec)
852
853 DEALLOCATE (blocks_ranges_row)
854 DEALLOCATE (blocks_ranges_col)
855
856 CALL timestop(handle2)
857
858 ! 7) Finally wait for all messeges to be sent
859 CALL timeset(routinen//"_waitall", handle2)
860 CALL mp_waitall(req_send(:))
861 DO send_counter = 1, number_of_send
862 DEALLOCATE (buffer_send(send_counter)%msg)
863 END DO
864 DEALLOCATE (buffer_send)
865 CALL timestop(handle2)
866
867 DEALLOCATE (map_send_size)
868 DEALLOCATE (map_rec_size)
869 DEALLOCATE (grid_2_mepos)
870 DEALLOCATE (mepos_2_grid)
871
872 CALL timestop(handle)
873
874 END SUBROUTINE array2fm
875
876! **************************************************************************************************
877!> \brief redistribute fm to local part of array
878!> \param mat2D ...
879!> \param my_start_row ...
880!> \param my_end_row ...
881!> \param my_start_col ...
882!> \param my_end_col ...
883!> \param group_grid_2_mepos ...
884!> \param mepos_2_grid_group ...
885!> \param ngroup_row ...
886!> \param ngroup_col ...
887!> \param fm_mat ...
888! **************************************************************************************************
889 SUBROUTINE fm2array(mat2D, my_start_row, my_end_row, &
890 my_start_col, my_end_col, &
891 group_grid_2_mepos, mepos_2_grid_group, &
892 ngroup_row, ngroup_col, &
893 fm_mat)
894
895 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :), &
896 INTENT(OUT) :: mat2d
897 INTEGER, INTENT(IN) :: my_start_row, my_end_row, my_start_col, &
898 my_end_col
899 INTEGER, ALLOCATABLE, DIMENSION(:, :), INTENT(IN) :: group_grid_2_mepos, mepos_2_grid_group
900 INTEGER, INTENT(IN) :: ngroup_row, ngroup_col
901 TYPE(cp_fm_type), INTENT(INOUT) :: fm_mat
902
903 CHARACTER(LEN=*), PARAMETER :: routinen = 'fm2array'
904
905 INTEGER :: dummy_proc, handle, handle2, i_global, iib, iii, itmp(2), j_global, jjb, my_cols, &
906 my_rows, mypcol, myprow, ncol_local, npcol, nprow, nrow_local, num_cols, num_rec_rows, &
907 num_rows, number_of_rec, number_of_send, proc_receive, proc_send, proc_shift, &
908 rec_col_size, rec_counter, rec_pcol, rec_prow, rec_row_size, ref_send_pcol, &
909 ref_send_prow, send_counter, send_pcol, send_prow, size_rec_buffer, size_send_buffer
910 INTEGER, ALLOCATABLE, DIMENSION(:) :: iii_vet, index_row_rec, map_rec_size, &
911 map_send_size
912 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: grid_2_mepos, grid_ref_2_send_pos, &
913 mepos_2_grid, sizes
914 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
915 REAL(kind=dp) :: part_col, part_row
916 TYPE(integ_mat_buffer_type), ALLOCATABLE, &
917 DIMENSION(:) :: buffer_rec, buffer_send
918 TYPE(mp_para_env_type), POINTER :: para_env
919 TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:) :: req_send
920
921 CALL timeset(routinen, handle)
922
923 ! number of elements in each dimension
924 my_rows = my_end_row - my_start_row + 1
925 my_cols = my_end_col - my_start_col + 1
926
927 ! allocate the array
928 ALLOCATE (mat2d(my_rows, my_cols))
929 mat2d = 0.0_dp
930
931 ! start procedure
932 ! get information of from the full matrix
933 CALL cp_fm_get_info(matrix=fm_mat, &
934 nrow_local=nrow_local, &
935 ncol_local=ncol_local, &
936 row_indices=row_indices, &
937 col_indices=col_indices, &
938 nrow_global=num_rows, &
939 ncol_global=num_cols, &
940 para_env=para_env)
941 myprow = fm_mat%matrix_struct%context%mepos(1)
942 mypcol = fm_mat%matrix_struct%context%mepos(2)
943 nprow = fm_mat%matrix_struct%context%num_pe(1)
944 npcol = fm_mat%matrix_struct%context%num_pe(2)
945
946 ! start the communication part
947 ! 0) create array containing the processes position
948 ! and supporting infos
949 CALL timeset(routinen//"_info", handle2)
950 ALLOCATE (grid_2_mepos(0:nprow - 1, 0:npcol - 1))
951 grid_2_mepos = 0
952 ALLOCATE (mepos_2_grid(2, 0:para_env%num_pe - 1))
953
954 ! fill the info array
955 grid_2_mepos(myprow, mypcol) = para_env%mepos
956
957 ! sum infos
958 CALL para_env%sum(grid_2_mepos)
959 CALL para_env%allgather([myprow, mypcol], mepos_2_grid)
960
961 ! 1) loop over my local data and define a map for the proc to send data
962 ALLOCATE (map_send_size(0:para_env%num_pe - 1))
963 map_send_size = 0
964 part_row = real(num_rows, kind=dp)/real(ngroup_row, kind=dp)
965 part_col = real(num_cols, kind=dp)/real(ngroup_col, kind=dp)
966
967 DO jjb = 1, ncol_local
968 j_global = col_indices(jjb)
969 ! check the group holding this element
970 ! dirty way, if someone has a better idea ...
971 send_pcol = int(real(j_global - 1, kind=dp)/part_col)
972 send_pcol = max(0, send_pcol)
973 send_pcol = min(send_pcol, ngroup_col - 1)
974 DO
975 itmp = get_limit(num_cols, ngroup_col, send_pcol)
976 IF (j_global >= itmp(1) .AND. j_global <= itmp(2)) EXIT
977 IF (j_global < itmp(1)) send_pcol = send_pcol - 1
978 IF (j_global > itmp(2)) send_pcol = send_pcol + 1
979 END DO
980
981 DO iib = 1, nrow_local
982 i_global = row_indices(iib)
983 ! check the process in the group holding this element
984 send_prow = int(real(i_global - 1, kind=dp)/part_row)
985 send_prow = max(0, send_prow)
986 send_prow = min(send_prow, ngroup_row - 1)
987 DO
988 itmp = get_limit(num_rows, ngroup_row, send_prow)
989 IF (i_global >= itmp(1) .AND. i_global <= itmp(2)) EXIT
990 IF (i_global < itmp(1)) send_prow = send_prow - 1
991 IF (i_global > itmp(2)) send_prow = send_prow + 1
992 END DO
993
994 proc_send = group_grid_2_mepos(send_prow, send_pcol)
995
996 map_send_size(proc_send) = map_send_size(proc_send) + 1
997
998 END DO ! i_global
999 END DO ! j_global
1000
1001 ! 2) loop over my local data of the array and define a map for the proc from which rec data
1002 ALLOCATE (map_rec_size(0:para_env%num_pe - 1))
1003 map_rec_size = 0
1004 dummy_proc = 0
1005 DO jjb = my_start_col, my_end_col
1006 rec_pcol = fm_mat%matrix_struct%g2p_col(jjb)
1007 DO iib = my_start_row, my_end_row
1008 rec_prow = fm_mat%matrix_struct%g2p_row(iib)
1009 proc_receive = grid_2_mepos(rec_prow, rec_pcol)
1010 map_rec_size(proc_receive) = map_rec_size(proc_receive) + 1
1011 END DO
1012 END DO
1013
1014 ! 3) check if the local data in the fm_mat has to be stored in the new array
1015 IF (map_rec_size(para_env%mepos) > 0) THEN
1016 DO jjb = 1, ncol_local
1017 j_global = col_indices(jjb)
1018 IF (j_global >= my_start_col .AND. j_global <= my_end_col) THEN
1019 DO iib = 1, nrow_local
1020 i_global = row_indices(iib)
1021 IF (i_global >= my_start_row .AND. i_global <= my_end_row) THEN
1022 mat2d(i_global - my_start_row + 1, j_global - my_start_col + 1) = fm_mat%local_data(iib, jjb)
1023 END IF
1024 END DO
1025 END IF
1026 END DO
1027 END IF
1028 CALL timestop(handle2)
1029
1030 ! 4) reorder data in the send_buffer
1031 CALL timeset(routinen//"_buffer_send", handle2)
1032 ! count the number of messages to send
1033 number_of_send = 0
1034 DO proc_shift = 1, para_env%num_pe - 1
1035 proc_send = modulo(para_env%mepos + proc_shift, para_env%num_pe)
1036 IF (map_send_size(proc_send) > 0) THEN
1037 number_of_send = number_of_send + 1
1038 END IF
1039 END DO
1040 ! allocate the structure that will hold the messages to be sent
1041 ALLOCATE (buffer_send(number_of_send))
1042 ! grid_ref_2_send_pos is an array (map) that given a pair
1043 ! (ref_send_prow,ref_send_pcol) returns
1044 ! the position in the buffer_send associated to that process
1045
1046 ALLOCATE (grid_ref_2_send_pos(0:ngroup_row - 1, 0:ngroup_col - 1))
1047 grid_ref_2_send_pos = 0
1048
1049 ! finalize the allocation of buffer_send with the actual size
1050 ! of each message (actual size is size_send_buffer)
1051 send_counter = 0
1052 DO proc_shift = 1, para_env%num_pe - 1
1053 proc_send = modulo(para_env%mepos + proc_shift, para_env%num_pe)
1054 size_send_buffer = map_send_size(proc_send)
1055 IF (map_send_size(proc_send) > 0) THEN
1056 send_counter = send_counter + 1
1057 ! allocate the sending buffer (msg)
1058 ALLOCATE (buffer_send(send_counter)%msg(size_send_buffer))
1059 buffer_send(send_counter)%msg = 0.0_dp
1060 buffer_send(send_counter)%proc = proc_send
1061 ! get the pointer to prow, pcol of the process that has
1062 ! to receive this message
1063 ref_send_prow = mepos_2_grid_group(1, proc_send)
1064 ref_send_pcol = mepos_2_grid_group(2, proc_send)
1065 ! save the rank of the process that has to receive this message
1066 grid_ref_2_send_pos(ref_send_prow, ref_send_pcol) = send_counter
1067 END IF
1068 END DO
1069
1070 ! loop over the locally held data and fill the buffer_send
1071 ! for doing that we need an array that keep track if the
1072 ! sequential increase of the index for each message
1073 ALLOCATE (iii_vet(number_of_send))
1074 iii_vet = 0
1075 DO jjb = 1, ncol_local
1076 j_global = col_indices(jjb)
1077 ! check the group holding this element
1078 ! dirty way, if someone has a better idea ...
1079 send_pcol = int(real(j_global - 1, kind=dp)/part_col)
1080 send_pcol = max(0, send_pcol)
1081 send_pcol = min(send_pcol, ngroup_col - 1)
1082 DO
1083 itmp = get_limit(num_cols, ngroup_col, send_pcol)
1084 IF (j_global >= itmp(1) .AND. j_global <= itmp(2)) EXIT
1085 IF (j_global < itmp(1)) send_pcol = send_pcol - 1
1086 IF (j_global > itmp(2)) send_pcol = send_pcol + 1
1087 END DO
1088
1089 DO iib = 1, nrow_local
1090 i_global = row_indices(iib)
1091 ! check the process in the group holding this element
1092 send_prow = int(real(i_global - 1, kind=dp)/part_row)
1093 send_prow = max(0, send_prow)
1094 send_prow = min(send_prow, ngroup_row - 1)
1095 DO
1096 itmp = get_limit(num_rows, ngroup_row, send_prow)
1097 IF (i_global >= itmp(1) .AND. i_global <= itmp(2)) EXIT
1098 IF (i_global < itmp(1)) send_prow = send_prow - 1
1099 IF (i_global > itmp(2)) send_prow = send_prow + 1
1100 END DO
1101 ! we don't need to send to ourselves
1102 IF (group_grid_2_mepos(send_prow, send_pcol) == para_env%mepos) cycle
1103
1104 send_counter = grid_ref_2_send_pos(send_prow, send_pcol)
1105 iii_vet(send_counter) = iii_vet(send_counter) + 1
1106 iii = iii_vet(send_counter)
1107 buffer_send(send_counter)%msg(iii) = fm_mat%local_data(iib, jjb)
1108 END DO
1109 END DO
1110
1111 DEALLOCATE (iii_vet)
1112 DEALLOCATE (grid_ref_2_send_pos)
1113 CALL timestop(handle2)
1114
1115 ! 5) similarly to what done for the buffer_send
1116 ! create the buffer for receive, post the message with irecv
1117 ! and send the messages with non-blocking
1118 CALL timeset(routinen//"_isendrecv", handle2)
1119 ! count the number of messages to be received
1120 number_of_rec = 0
1121 DO proc_shift = 1, para_env%num_pe - 1
1122 proc_receive = modulo(para_env%mepos - proc_shift, para_env%num_pe)
1123 IF (map_rec_size(proc_receive) > 0) THEN
1124 number_of_rec = number_of_rec + 1
1125 END IF
1126 END DO
1127
1128 ALLOCATE (buffer_rec(number_of_rec))
1129
1130 rec_counter = 0
1131 DO proc_shift = 1, para_env%num_pe - 1
1132 proc_receive = modulo(para_env%mepos - proc_shift, para_env%num_pe)
1133 size_rec_buffer = map_rec_size(proc_receive)
1134 IF (map_rec_size(proc_receive) > 0) THEN
1135 rec_counter = rec_counter + 1
1136 ! prepare the buffer for receive
1137 ALLOCATE (buffer_rec(rec_counter)%msg(size_rec_buffer))
1138 buffer_rec(rec_counter)%msg = 0.0_dp
1139 buffer_rec(rec_counter)%proc = proc_receive
1140 ! post the message to be received
1141 CALL para_env%irecv(buffer_rec(rec_counter)%msg, proc_receive, &
1142 buffer_rec(rec_counter)%msg_req)
1143 END IF
1144 END DO
1145
1146 ! send messages
1147 ALLOCATE (req_send(number_of_send))
1148 send_counter = 0
1149 DO proc_shift = 1, para_env%num_pe - 1
1150 proc_send = modulo(para_env%mepos + proc_shift, para_env%num_pe)
1151 IF (map_send_size(proc_send) > 0) THEN
1152 send_counter = send_counter + 1
1153 CALL para_env%isend(buffer_send(send_counter)%msg, proc_send, &
1154 buffer_send(send_counter)%msg_req)
1155 req_send(send_counter) = buffer_send(send_counter)%msg_req
1156 END IF
1157 END DO
1158 CALL timestop(handle2)
1159
1160 ! 6) fill the fm_mat matrix with the messages received from the
1161 ! other processes
1162 CALL timeset(routinen//"_fill", handle2)
1163 CALL cp_fm_get_info(matrix=fm_mat, &
1164 nrow_local=nrow_local, &
1165 ncol_local=ncol_local)
1166 ALLOCATE (sizes(2, 0:para_env%num_pe - 1))
1167 CALL para_env%allgather([nrow_local, ncol_local], sizes)
1168 iib = maxval(sizes(1, :))
1169 ALLOCATE (index_row_rec(iib))
1170 index_row_rec = 0
1171 rec_counter = 0
1172 DO proc_shift = 1, para_env%num_pe - 1
1173 proc_receive = modulo(para_env%mepos - proc_shift, para_env%num_pe)
1174 size_rec_buffer = map_rec_size(proc_receive)
1175
1176 IF (map_rec_size(proc_receive) > 0) THEN
1177 rec_counter = rec_counter + 1
1178
1179 rec_col_size = sizes(2, proc_receive)
1180 rec_row_size = sizes(1, proc_receive)
1181
1182 ! precompute the indeces of the rows
1183 num_rec_rows = 0
1184 DO iib = 1, rec_row_size
1185 i_global = fm_mat%matrix_struct%l2g_row(iib, mepos_2_grid(1, proc_receive))
1186 IF (i_global >= my_start_row .AND. i_global <= my_end_row) THEN
1187 num_rec_rows = num_rec_rows + 1
1188 index_row_rec(num_rec_rows) = i_global
1189 END IF
1190 END DO
1191
1192 CALL buffer_rec(rec_counter)%msg_req%wait()
1193
1194 iii = 0
1195 DO jjb = 1, rec_col_size
1196 j_global = fm_mat%matrix_struct%l2g_col(jjb, mepos_2_grid(2, proc_receive))
1197 IF (j_global >= my_start_col .AND. j_global <= my_end_col) THEN
1198 DO iib = 1, num_rec_rows
1199 i_global = index_row_rec(iib)
1200 iii = iii + 1
1201 mat2d(i_global - my_start_row + 1, j_global - my_start_col + 1) = buffer_rec(rec_counter)%msg(iii)
1202 END DO
1203 END IF
1204 END DO
1205
1206 DEALLOCATE (buffer_rec(rec_counter)%msg)
1207 END IF
1208 END DO
1209 DEALLOCATE (buffer_rec)
1210 DEALLOCATE (index_row_rec)
1211 CALL cp_fm_release(fm_mat)
1212 CALL timestop(handle2)
1213
1214 ! 7) Finally wait for all messeges to be sent
1215 CALL timeset(routinen//"_waitall", handle2)
1216 CALL mp_waitall(req_send(:))
1217 DO send_counter = 1, number_of_send
1218 DEALLOCATE (buffer_send(send_counter)%msg)
1219 END DO
1220 DEALLOCATE (buffer_send)
1221 CALL timestop(handle2)
1222
1223 CALL timestop(handle)
1224
1225 END SUBROUTINE fm2array
1226
1227! **************************************************************************************************
1228!> \brief redistribute 2D representation of 3d tensor to a set of dbcsr matrices
1229!> \param Gamma_2D ...
1230!> \param homo ...
1231!> \param virtual ...
1232!> \param dimen_ia ...
1233!> \param para_env_sub ...
1234!> \param my_ia_start ...
1235!> \param my_ia_end ...
1236!> \param my_group_L_size ...
1237!> \param gd_ia ...
1238!> \param dbcsr_Gamma ...
1239!> \param mo_coeff_o ...
1240! **************************************************************************************************
1241 SUBROUTINE create_dbcsr_gamma(Gamma_2D, homo, virtual, dimen_ia, para_env_sub, &
1242 my_ia_start, my_ia_end, my_group_L_size, &
1243 gd_ia, dbcsr_Gamma, mo_coeff_o)
1244 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :) :: gamma_2d
1245 INTEGER, INTENT(IN) :: homo, virtual, dimen_ia
1246 TYPE(mp_para_env_type), INTENT(IN), POINTER :: para_env_sub
1247 INTEGER, INTENT(IN) :: my_ia_start, my_ia_end, my_group_l_size
1248 TYPE(group_dist_d1_type), INTENT(IN) :: gd_ia
1249 TYPE(dbcsr_p_type), DIMENSION(:), INTENT(INOUT) :: dbcsr_gamma
1250 TYPE(dbcsr_type), INTENT(INOUT) :: mo_coeff_o
1251
1252 CHARACTER(LEN=*), PARAMETER :: routinen = 'create_dbcsr_gamma'
1253
1254 INTEGER :: dummy_proc, handle, i_global, i_local, iaia, iib, iii, itmp(2), j_global, &
1255 j_local, jjb, jjj, kkb, mypcol, myprow, ncol_local, npcol, nprow, nrow_local, &
1256 number_of_rec, number_of_send, proc_receive, proc_send, proc_shift, rec_counter, &
1257 rec_iaia_end, rec_iaia_size, rec_iaia_start, rec_pcol, rec_prow, ref_send_pcol, &
1258 ref_send_prow, send_counter, send_pcol, send_prow, size_rec_buffer, size_send_buffer
1259 INTEGER, ALLOCATABLE, DIMENSION(:) :: iii_vet, map_rec_size, map_send_size
1260 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: grid_2_mepos, grid_ref_2_send_pos, &
1261 indeces_map_my, mepos_2_grid
1262 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1263 REAL(kind=dp) :: part_ia
1264 TYPE(cp_blacs_env_type), POINTER :: blacs_env
1265 TYPE(cp_fm_struct_type), POINTER :: fm_struct
1266 TYPE(cp_fm_type) :: fm_ia
1267 TYPE(index_map), ALLOCATABLE, DIMENSION(:) :: indeces_rec
1268 TYPE(integ_mat_buffer_type), ALLOCATABLE, &
1269 DIMENSION(:) :: buffer_rec, buffer_send
1270 TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:) :: req_send
1271
1272 CALL timeset(routinen, handle)
1273
1274 ! create sub blacs env
1275 NULLIFY (blacs_env)
1276 CALL cp_blacs_env_create(blacs_env=blacs_env, para_env=para_env_sub)
1277
1278 ! create the fm_ia buffer matrix
1279 NULLIFY (fm_struct)
1280 CALL cp_fm_struct_create(fm_struct, context=blacs_env, nrow_global=homo, &
1281 ncol_global=virtual, para_env=para_env_sub)
1282 CALL cp_fm_create(fm_ia, fm_struct, name="fm_ia")
1283 ! release structure
1284 CALL cp_fm_struct_release(fm_struct)
1285 ! release blacs_env
1286 CALL cp_blacs_env_release(blacs_env)
1287
1288 ! get array information
1289 CALL cp_fm_get_info(matrix=fm_ia, &
1290 nrow_local=nrow_local, &
1291 ncol_local=ncol_local, &
1292 row_indices=row_indices, &
1293 col_indices=col_indices)
1294 myprow = fm_ia%matrix_struct%context%mepos(1)
1295 mypcol = fm_ia%matrix_struct%context%mepos(2)
1296 nprow = fm_ia%matrix_struct%context%num_pe(1)
1297 npcol = fm_ia%matrix_struct%context%num_pe(2)
1298
1299 ! 0) create array containing the processes position and supporting infos
1300 ALLOCATE (grid_2_mepos(0:nprow - 1, 0:npcol - 1))
1301 grid_2_mepos = 0
1302 ALLOCATE (mepos_2_grid(2, 0:para_env_sub%num_pe - 1))
1303 ! fill the info array
1304 grid_2_mepos(myprow, mypcol) = para_env_sub%mepos
1305 ! sum infos
1306 CALL para_env_sub%sum(grid_2_mepos)
1307 CALL para_env_sub%allgather([myprow, mypcol], mepos_2_grid)
1308
1309 ! loop over local index range and define the sending map
1310 ALLOCATE (map_send_size(0:para_env_sub%num_pe - 1))
1311 map_send_size = 0
1312 dummy_proc = 0
1313 DO iaia = my_ia_start, my_ia_end
1314 i_global = (iaia - 1)/virtual + 1
1315 j_global = mod(iaia - 1, virtual) + 1
1316 send_prow = fm_ia%matrix_struct%g2p_row(i_global)
1317 send_pcol = fm_ia%matrix_struct%g2p_col(j_global)
1318 proc_send = grid_2_mepos(send_prow, send_pcol)
1319 map_send_size(proc_send) = map_send_size(proc_send) + 1
1320 END DO
1321
1322 ! loop over local data of fm_ia and define the receiving map
1323 ALLOCATE (map_rec_size(0:para_env_sub%num_pe - 1))
1324 map_rec_size = 0
1325 part_ia = real(dimen_ia, kind=dp)/real(para_env_sub%num_pe, kind=dp)
1326
1327 DO iib = 1, nrow_local
1328 i_global = row_indices(iib)
1329 DO jjb = 1, ncol_local
1330 j_global = col_indices(jjb)
1331 iaia = (i_global - 1)*virtual + j_global
1332 proc_receive = int(real(iaia - 1, kind=dp)/part_ia)
1333 proc_receive = max(0, proc_receive)
1334 proc_receive = min(proc_receive, para_env_sub%num_pe - 1)
1335 DO
1336 itmp = get_limit(dimen_ia, para_env_sub%num_pe, proc_receive)
1337 IF (iaia >= itmp(1) .AND. iaia <= itmp(2)) EXIT
1338 IF (iaia < itmp(1)) proc_receive = proc_receive - 1
1339 IF (iaia > itmp(2)) proc_receive = proc_receive + 1
1340 END DO
1341 map_rec_size(proc_receive) = map_rec_size(proc_receive) + 1
1342 END DO
1343 END DO
1344
1345 ! allocate the buffer for sending data
1346 number_of_send = 0
1347 DO proc_shift = 1, para_env_sub%num_pe - 1
1348 proc_send = modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
1349 IF (map_send_size(proc_send) > 0) THEN
1350 number_of_send = number_of_send + 1
1351 END IF
1352 END DO
1353 ! allocate the structure that will hold the messages to be sent
1354 ALLOCATE (buffer_send(number_of_send))
1355 ! and the map from the grid of processess to the message position
1356 ALLOCATE (grid_ref_2_send_pos(0:nprow - 1, 0:npcol - 1))
1357 grid_ref_2_send_pos = 0
1358 ! finally allocate each message
1359 send_counter = 0
1360 DO proc_shift = 1, para_env_sub%num_pe - 1
1361 proc_send = modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
1362 size_send_buffer = map_send_size(proc_send)
1363 IF (map_send_size(proc_send) > 0) THEN
1364 send_counter = send_counter + 1
1365 ! allocate the sending buffer (msg)
1366 ALLOCATE (buffer_send(send_counter)%msg(size_send_buffer))
1367 buffer_send(send_counter)%proc = proc_send
1368 ! get the pointer to prow, pcol of the process that has
1369 ! to receive this message
1370 ref_send_prow = mepos_2_grid(1, proc_send)
1371 ref_send_pcol = mepos_2_grid(2, proc_send)
1372 ! save the rank of the process that has to receive this message
1373 grid_ref_2_send_pos(ref_send_prow, ref_send_pcol) = send_counter
1374 END IF
1375 END DO
1376
1377 ! allocate the buffer for receiving data
1378 number_of_rec = 0
1379 DO proc_shift = 1, para_env_sub%num_pe - 1
1380 proc_receive = modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
1381 IF (map_rec_size(proc_receive) > 0) THEN
1382 number_of_rec = number_of_rec + 1
1383 END IF
1384 END DO
1385 ! allocate the structure that will hold the messages to be received
1386 ! and relative indeces
1387 ALLOCATE (buffer_rec(number_of_rec))
1388 ALLOCATE (indeces_rec(number_of_rec))
1389 ! finally allocate each message and fill the array of indeces
1390 rec_counter = 0
1391 DO proc_shift = 1, para_env_sub%num_pe - 1
1392 proc_receive = modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
1393 size_rec_buffer = map_rec_size(proc_receive)
1394 IF (map_rec_size(proc_receive) > 0) THEN
1395 rec_counter = rec_counter + 1
1396 ! prepare the buffer for receive
1397 ALLOCATE (buffer_rec(rec_counter)%msg(size_rec_buffer))
1398 buffer_rec(rec_counter)%proc = proc_receive
1399 ! create the indeces array
1400 ALLOCATE (indeces_rec(rec_counter)%map(2, size_rec_buffer))
1401 indeces_rec(rec_counter)%map = 0
1402 CALL get_group_dist(gd_ia, proc_receive, rec_iaia_start, rec_iaia_end, rec_iaia_size)
1403 iii = 0
1404 DO iaia = rec_iaia_start, rec_iaia_end
1405 i_global = (iaia - 1)/virtual + 1
1406 j_global = mod(iaia - 1, virtual) + 1
1407 rec_prow = fm_ia%matrix_struct%g2p_row(i_global)
1408 rec_pcol = fm_ia%matrix_struct%g2p_col(j_global)
1409 IF (grid_2_mepos(rec_prow, rec_pcol) /= para_env_sub%mepos) cycle
1410 iii = iii + 1
1411 i_local = fm_ia%matrix_struct%g2l_row(i_global)
1412 j_local = fm_ia%matrix_struct%g2l_col(j_global)
1413 indeces_rec(rec_counter)%map(1, iii) = i_local
1414 indeces_rec(rec_counter)%map(2, iii) = j_local
1415 END DO
1416 END IF
1417 END DO
1418 ! and create the index map for my local data
1419 IF (map_rec_size(para_env_sub%mepos) > 0) THEN
1420 size_rec_buffer = map_rec_size(para_env_sub%mepos)
1421 ALLOCATE (indeces_map_my(2, size_rec_buffer))
1422 indeces_map_my = 0
1423 iii = 0
1424 DO iaia = my_ia_start, my_ia_end
1425 i_global = (iaia - 1)/virtual + 1
1426 j_global = mod(iaia - 1, virtual) + 1
1427 rec_prow = fm_ia%matrix_struct%g2p_row(i_global)
1428 rec_pcol = fm_ia%matrix_struct%g2p_col(j_global)
1429 IF (grid_2_mepos(rec_prow, rec_pcol) /= para_env_sub%mepos) cycle
1430 iii = iii + 1
1431 i_local = fm_ia%matrix_struct%g2l_row(i_global)
1432 j_local = fm_ia%matrix_struct%g2l_col(j_global)
1433 indeces_map_my(1, iii) = i_local
1434 indeces_map_my(2, iii) = j_local
1435 END DO
1436 END IF
1437
1438 ! auxiliary vector of indeces for the send buffer
1439 ALLOCATE (iii_vet(number_of_send))
1440 ! vector for the send requests
1441 ALLOCATE (req_send(number_of_send))
1442 ! loop over auxiliary basis function and redistribute into a fm
1443 ! and then compy the fm into a dbcsr matrix
1444 DO kkb = 1, my_group_l_size
1445 ! zero the matries of the buffers and post the messages to be received
1446 CALL cp_fm_set_all(matrix=fm_ia, alpha=0.0_dp)
1447 rec_counter = 0
1448 DO proc_shift = 1, para_env_sub%num_pe - 1
1449 proc_receive = modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
1450 IF (map_rec_size(proc_receive) > 0) THEN
1451 rec_counter = rec_counter + 1
1452 buffer_rec(rec_counter)%msg = 0.0_dp
1453 CALL para_env_sub%irecv(buffer_rec(rec_counter)%msg, proc_receive, &
1454 buffer_rec(rec_counter)%msg_req)
1455 END IF
1456 END DO
1457 ! fill the sending buffer and send the messages
1458 DO send_counter = 1, number_of_send
1459 buffer_send(send_counter)%msg = 0.0_dp
1460 END DO
1461 iii_vet = 0
1462 jjj = 0
1463 DO iaia = my_ia_start, my_ia_end
1464 i_global = (iaia - 1)/virtual + 1
1465 j_global = mod(iaia - 1, virtual) + 1
1466 send_prow = fm_ia%matrix_struct%g2p_row(i_global)
1467 send_pcol = fm_ia%matrix_struct%g2p_col(j_global)
1468 proc_send = grid_2_mepos(send_prow, send_pcol)
1469 ! we don't need to send to ourselves
1470 IF (grid_2_mepos(send_prow, send_pcol) == para_env_sub%mepos) THEN
1471 ! filling fm_ia with local data
1472 jjj = jjj + 1
1473 i_local = indeces_map_my(1, jjj)
1474 j_local = indeces_map_my(2, jjj)
1475 fm_ia%local_data(i_local, j_local) = gamma_2d(iaia - my_ia_start + 1, kkb)
1476 ELSE
1477 send_counter = grid_ref_2_send_pos(send_prow, send_pcol)
1478 iii_vet(send_counter) = iii_vet(send_counter) + 1
1479 iii = iii_vet(send_counter)
1480 buffer_send(send_counter)%msg(iii) = gamma_2d(iaia - my_ia_start + 1, kkb)
1481 END IF
1482 END DO
1483 req_send = mp_request_null
1484 send_counter = 0
1485 DO proc_shift = 1, para_env_sub%num_pe - 1
1486 proc_send = modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
1487 IF (map_send_size(proc_send) > 0) THEN
1488 send_counter = send_counter + 1
1489 CALL para_env_sub%isend(buffer_send(send_counter)%msg, proc_send, &
1490 buffer_send(send_counter)%msg_req)
1491 req_send(send_counter) = buffer_send(send_counter)%msg_req
1492 END IF
1493 END DO
1494
1495 ! receive the massages and fill the fm_ia
1496 rec_counter = 0
1497 DO proc_shift = 1, para_env_sub%num_pe - 1
1498 proc_receive = modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
1499 size_rec_buffer = map_rec_size(proc_receive)
1500 IF (map_rec_size(proc_receive) > 0) THEN
1501 rec_counter = rec_counter + 1
1502 ! wait for the message
1503 CALL buffer_rec(rec_counter)%msg_req%wait()
1504 DO iii = 1, size_rec_buffer
1505 i_local = indeces_rec(rec_counter)%map(1, iii)
1506 j_local = indeces_rec(rec_counter)%map(2, iii)
1507 fm_ia%local_data(i_local, j_local) = buffer_rec(rec_counter)%msg(iii)
1508 END DO
1509 END IF
1510 END DO
1511
1512 ! wait all
1513 CALL mp_waitall(req_send(:))
1514
1515 ! now create the DBCSR matrix and copy fm_ia into it
1516 ALLOCATE (dbcsr_gamma(kkb)%matrix)
1517 CALL cp_dbcsr_m_by_n_from_template(dbcsr_gamma(kkb)%matrix, &
1518 template=mo_coeff_o, &
1519 m=homo, n=virtual, sym=dbcsr_type_no_symmetry)
1520 CALL copy_fm_to_dbcsr(fm_ia, dbcsr_gamma(kkb)%matrix, keep_sparsity=.false.)
1521
1522 END DO
1523
1524 ! deallocate stuff
1525 DEALLOCATE (gamma_2d)
1526 DEALLOCATE (iii_vet)
1527 DEALLOCATE (req_send)
1528 IF (map_rec_size(para_env_sub%mepos) > 0) THEN
1529 DEALLOCATE (indeces_map_my)
1530 END IF
1531 DO rec_counter = 1, number_of_rec
1532 DEALLOCATE (indeces_rec(rec_counter)%map)
1533 DEALLOCATE (buffer_rec(rec_counter)%msg)
1534 END DO
1535 DEALLOCATE (indeces_rec)
1536 DEALLOCATE (buffer_rec)
1537 DO send_counter = 1, number_of_send
1538 DEALLOCATE (buffer_send(send_counter)%msg)
1539 END DO
1540 DEALLOCATE (buffer_send)
1541 DEALLOCATE (map_send_size)
1542 DEALLOCATE (map_rec_size)
1543 DEALLOCATE (grid_2_mepos)
1544 DEALLOCATE (mepos_2_grid)
1545
1546 ! release buffer matrix
1547 CALL cp_fm_release(fm_ia)
1548
1549 CALL timestop(handle)
1550
1551 END SUBROUTINE create_dbcsr_gamma
1552
1553! **************************************************************************************************
1554!> \brief prepare array for redistribution
1555!> \param para_env ...
1556!> \param para_env_sub ...
1557!> \param ngroup ...
1558!> \param group_grid_2_mepos ...
1559!> \param mepos_2_grid_group ...
1560!> \param pos_info ...
1561! **************************************************************************************************
1562 SUBROUTINE prepare_redistribution(para_env, para_env_sub, ngroup, &
1563 group_grid_2_mepos, mepos_2_grid_group, &
1564 pos_info)
1565 TYPE(mp_para_env_type), INTENT(IN) :: para_env, para_env_sub
1566 INTEGER, INTENT(IN) :: ngroup
1567 INTEGER, ALLOCATABLE, DIMENSION(:, :), INTENT(OUT) :: group_grid_2_mepos, mepos_2_grid_group
1568 INTEGER, ALLOCATABLE, DIMENSION(:), INTENT(OUT), &
1569 OPTIONAL :: pos_info
1570
1571 INTEGER :: i, pos_group, pos_sub
1572 INTEGER, ALLOCATABLE, DIMENSION(:) :: my_pos_info
1573
1574 ALLOCATE (my_pos_info(0:para_env%num_pe - 1))
1575 CALL para_env%allgather(para_env_sub%mepos, my_pos_info)
1576
1577 ALLOCATE (group_grid_2_mepos(0:para_env_sub%num_pe - 1, 0:ngroup - 1))
1578 group_grid_2_mepos = 0
1579 ALLOCATE (mepos_2_grid_group(2, 0:para_env%num_pe - 1))
1580 mepos_2_grid_group = 0
1581
1582 DO i = 0, para_env%num_pe - 1
1583 ! calculate position of the group
1584 pos_group = i/para_env_sub%num_pe
1585 ! calculate position in the subgroup
1586 pos_sub = my_pos_info(i)
1587 ! fill the map from the grid of groups to process
1588 group_grid_2_mepos(pos_sub, pos_group) = i
1589 ! and the opposite, from the global pos to the grid pos
1590 mepos_2_grid_group(1, i) = pos_sub
1591 mepos_2_grid_group(2, i) = pos_group
1592 END DO
1593
1594 IF (PRESENT(pos_info)) CALL move_alloc(my_pos_info, pos_info)
1595
1596 END SUBROUTINE prepare_redistribution
1597
1598END MODULE mp2_ri_grad_util
static GRID_HOST_DEVICE int modulo(int a, int m)
Equivalent of Fortran's MODULO, which always return a positive number. https://gcc....
methods related to the blacs parallel environment
subroutine, public cp_blacs_env_release(blacs_env)
releases the given blacs_env
subroutine, public cp_blacs_env_create(blacs_env, para_env, blacs_grid_layout, blacs_repeatable, row_major, grid_2d)
allocates and initializes a type that represent a blacs context
DBCSR operations in CP2K.
subroutine, public cp_dbcsr_m_by_n_from_template(matrix, template, m, n, sym)
Utility function to create an arbitrary shaped dbcsr matrix with the same processor grid as the templ...
subroutine, public copy_fm_to_dbcsr(fm, matrix, keep_sparsity)
Copy a BLACS matrix to a dbcsr matrix.
Basic linear algebra operations for full matrices.
subroutine, public cp_fm_geadd(alpha, trans, matrix_a, beta, matrix_b)
interface to BLACS geadd: matrix_b = beta*matrix_b + alpha*opt(matrix_a) where opt(matrix_a) can be e...
represent the structure of a full matrix
subroutine, public cp_fm_struct_create(fmstruct, para_env, context, nrow_global, ncol_global, nrow_block, ncol_block, descriptor, first_p_pos, local_leading_dimension, template_fmstruct, square_blocks, force_block)
allocates and initializes a full matrix structure
subroutine, public cp_fm_struct_get(fmstruct, para_env, context, descriptor, ncol_block, nrow_block, nrow_global, ncol_global, first_p_pos, row_indices, col_indices, nrow_local, ncol_local, nrow_locals, ncol_locals, local_leading_dimension)
returns the values of various attributes of the matrix structure
subroutine, public cp_fm_struct_release(fmstruct)
releases a full matrix structure
represent a full matrix distributed on many processors
Definition cp_fm_types.F:15
subroutine, public cp_fm_get_info(matrix, name, nrow_global, ncol_global, nrow_block, ncol_block, nrow_local, ncol_local, row_indices, col_indices, local_data, context, nrow_locals, ncol_locals, matrix_struct, para_env)
returns all kind of information about the full matrix
subroutine, public cp_fm_set_all(matrix, alpha, beta)
set all elements of a matrix to the same value, and optionally the diagonal to a different one
subroutine, public cp_fm_create(matrix, matrix_struct, name, use_sp)
creates a new full matrix with the given structure
Types to describe group distributions.
Defines the basic variable types.
Definition kinds.F:23
integer, parameter, public dp
Definition kinds.F:34
2- and 3-center electron repulsion integral routines based on libint2 Currently available operators: ...
pure logical function, public compare_potential_types(potential1, potential2)
Helper function to compare libint_potential_types.
Interface to the message passing library MPI.
type(mp_request_type), parameter, public mp_request_null
Routines for calculating RI-MP2 gradients.
subroutine, public complete_gamma(mp2_env, b_ia_q, dimen_ri, homo, virtual, para_env, para_env_sub, ngroup, my_group_l_size, my_group_l_start, my_group_l_end, my_b_size, my_b_virtual_start, gd_array, gd_b_virtual, kspin)
complete the calculation of the Gamma matrices
subroutine, public prepare_redistribution(para_env, para_env_sub, ngroup, group_grid_2_mepos, mepos_2_grid_group, pos_info)
prepare array for redistribution
subroutine, public array2fm(mat2d, fm_struct, my_start_row, my_end_row, my_start_col, my_end_col, gd_row, gd_col, group_grid_2_mepos, ngroup_row, ngroup_col, fm_mat, integ_group_size, color_group, do_release_mat)
redistribute local part of array to fm
subroutine, public create_dbcsr_gamma(gamma_2d, homo, virtual, dimen_ia, para_env_sub, my_ia_start, my_ia_end, my_group_l_size, gd_ia, dbcsr_gamma, mo_coeff_o)
redistribute 2D representation of 3d tensor to a set of dbcsr matrices
subroutine, public fm2array(mat2d, my_start_row, my_end_row, my_start_col, my_end_col, group_grid_2_mepos, mepos_2_grid_group, ngroup_row, ngroup_col, fm_mat)
redistribute fm to local part of array
Types needed for MP2 calculations.
Definition mp2_types.F:14
basic linear algebra operations for full matrixes
All kind of helpful little routines.
Definition util.F:14
pure integer function, dimension(2), public get_limit(m, n, me)
divide m entries into n parts, return size of part me
Definition util.F:333
represent a blacs multidimensional parallel environment (for the mpi corrispective see cp_paratypes/m...
keeps the information about the structure of a full matrix
represent a full matrix
stores all the informations relevant to an mpi environment