(git:ed6f26b)
Loading...
Searching...
No Matches
rpa_grad.F
Go to the documentation of this file.
1!--------------------------------------------------------------------------------------------------!
2! CP2K: A general program to perform molecular dynamics simulations !
3! Copyright 2000-2025 CP2K developers group <https://cp2k.org> !
4! !
5! SPDX-License-Identifier: GPL-2.0-or-later !
6!--------------------------------------------------------------------------------------------------!
7
8! **************************************************************************************************
9!> \brief Routines to calculate RI-RPA and SOS-MP2 gradients
10!> \par History
11!> 10.2021 created [Frederick Stein]
12! **************************************************************************************************
25 USE cp_fm_types, ONLY: cp_fm_create,&
39 maxsize,&
43 USE kinds, ONLY: dp,&
44 int_8
48 USE machine, ONLY: m_flush,&
50 USE mathconstants, ONLY: pi
51 USE message_passing, ONLY: mp_comm_type,&
58 USE mp2_ri_grad_util, ONLY: array2fm,&
60 fm2array,&
62 USE mp2_types, ONLY: mp2_type,&
69 USE rpa_util, ONLY: calc_fm_mat_s_rpa,&
71 USE util, ONLY: get_limit
72#include "./base/base_uses.f90"
73
74 IMPLICIT NONE
75
76 PRIVATE
77
78 CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'rpa_grad'
79
81
82 TYPE sos_mp2_grad_work_type
83 PRIVATE
84 INTEGER, DIMENSION(:, :), ALLOCATABLE :: pair_list
85 TYPE(one_dim_int_array), DIMENSION(:), ALLOCATABLE :: index2send, index2recv
86 REAL(KIND=dp), DIMENSION(:), ALLOCATABLE :: p
87 END TYPE
88
89 TYPE rpa_grad_work_type
90 TYPE(cp_fm_type) :: fm_mat_Q_copy = cp_fm_type()
91 TYPE(one_dim_int_array), DIMENSION(:, :), ALLOCATABLE :: index2send
92 TYPE(two_dim_int_array), DIMENSION(:, :), ALLOCATABLE :: index2recv
93 TYPE(group_dist_d1_type), DIMENSION(:), ALLOCATABLE :: gd_homo, gd_virtual
94 INTEGER, DIMENSION(2) :: grid = -1, mepos = -1
95 TYPE(two_dim_real_array), DIMENSION(:), ALLOCATABLE :: P_ij, P_ab
96 END TYPE
97
99 PRIVATE
100 TYPE(cp_fm_type) :: fm_Gamma_PQ = cp_fm_type()
101 TYPE(cp_fm_type), DIMENSION(:), ALLOCATABLE :: fm_y
102 TYPE(sos_mp2_grad_work_type), ALLOCATABLE, DIMENSION(:) :: sos_mp2_work_occ, sos_mp2_work_virt
103 TYPE(rpa_grad_work_type) :: rpa_work
104 END TYPE
105
106 INTEGER, PARAMETER :: spla_threshold = 128*128*128*2
107 INTEGER, PARAMETER :: blksize_threshold = 4
108
109CONTAINS
110
111! **************************************************************************************************
112!> \brief Calculates the necessary minimum memory for the Gradient code ion MiB
113!> \param homo ...
114!> \param virtual ...
115!> \param dimen_RI ...
116!> \param mem_per_rank ...
117!> \param mem_per_repl ...
118!> \param do_ri_sos_laplace_mp2 ...
119!> \return ...
120! **************************************************************************************************
121 PURE SUBROUTINE rpa_grad_needed_mem(homo, virtual, dimen_RI, mem_per_rank, mem_per_repl, do_ri_sos_laplace_mp2)
122 INTEGER, DIMENSION(:), INTENT(IN) :: homo, virtual
123 INTEGER, INTENT(IN) :: dimen_ri
124 REAL(kind=dp), INTENT(INOUT) :: mem_per_rank, mem_per_repl
125 LOGICAL, INTENT(IN) :: do_ri_sos_laplace_mp2
126
127 REAL(kind=dp) :: mem_iak, mem_kl, mem_pab, mem_pij
128
129 mem_iak = sum(real(virtual, kind=dp)*homo)*dimen_ri
130 mem_pij = sum(real(homo, kind=dp)**2)
131 mem_pab = sum(real(virtual, kind=dp)**2)
132 mem_kl = real(dimen_ri, kind=dp)*dimen_ri
133
134 ! Required matrices iaK
135 ! Ytot_iaP = sum_tau Y_iaP(tau)
136 ! Y_iaP(tau) = S_iaP(tau)*Q_PQ(tau) (work array)
137 ! Required matrices density matrices
138 ! Pij (local)
139 ! Pab (local)
140 ! Additionally with SOS-MP2
141 ! Send and receive buffers for degenerate orbital pairs (rough estimate: everything)
142 ! Additionally with RPA
143 ! copy of work matrix
144 ! receive buffer for calculation of density matrix
145 ! copy of matrix Q
146 mem_per_rank = mem_per_rank + (mem_pij + mem_pab)*8.0_dp/(1024**2)
147 mem_per_repl = mem_per_repl + (mem_iak + 2.0_dp*mem_iak/SIZE(homo) + mem_kl)*8.0_dp/(1024**2)
148 IF (.NOT. do_ri_sos_laplace_mp2) THEN
149 mem_per_repl = mem_per_rank + (mem_iak/SIZE(homo) + mem_kl)*8.0_dp/(1024**2)
150 END IF
151
152 END SUBROUTINE rpa_grad_needed_mem
153
154! **************************************************************************************************
155!> \brief Creates the arrays of a rpa_grad_type
156!> \param rpa_grad ...
157!> \param fm_mat_Q ...
158!> \param fm_mat_S ...
159!> \param homo ...
160!> \param virtual ...
161!> \param mp2_env ...
162!> \param Eigenval ...
163!> \param unit_nr ...
164!> \param do_ri_sos_laplace_mp2 ...
165! **************************************************************************************************
166 SUBROUTINE rpa_grad_create(rpa_grad, fm_mat_Q, fm_mat_S, &
167 homo, virtual, mp2_env, Eigenval, unit_nr, do_ri_sos_laplace_mp2)
168 TYPE(rpa_grad_type), INTENT(OUT) :: rpa_grad
169 TYPE(cp_fm_type), INTENT(IN) :: fm_mat_q
170 TYPE(cp_fm_type), DIMENSION(:), INTENT(IN) :: fm_mat_s
171 INTEGER, DIMENSION(:), INTENT(IN) :: homo, virtual
172 TYPE(mp2_type), INTENT(INOUT) :: mp2_env
173 REAL(kind=dp), DIMENSION(:, :), INTENT(IN) :: eigenval
174 INTEGER, INTENT(IN) :: unit_nr
175 LOGICAL, INTENT(IN) :: do_ri_sos_laplace_mp2
176
177 CHARACTER(LEN=*), PARAMETER :: routinen = 'rpa_grad_create'
178
179 INTEGER :: handle, ispin, nrow_local, nspins
180
181 CALL timeset(routinen, handle)
182
183 CALL cp_fm_create(rpa_grad%fm_Gamma_PQ, matrix_struct=fm_mat_q%matrix_struct)
184 CALL cp_fm_set_all(rpa_grad%fm_Gamma_PQ, 0.0_dp)
185
186 nspins = SIZE(fm_mat_s)
187
188 ALLOCATE (rpa_grad%fm_Y(nspins))
189 DO ispin = 1, nspins
190 CALL cp_fm_create(rpa_grad%fm_Y(ispin), fm_mat_s(ispin)%matrix_struct)
191 END DO
192
193 IF (do_ri_sos_laplace_mp2) THEN
194 CALL sos_mp2_work_type_create(rpa_grad%sos_mp2_work_occ, rpa_grad%sos_mp2_work_virt, &
195 unit_nr, eigenval, homo, virtual, mp2_env%ri_grad%eps_canonical, fm_mat_s)
196 ELSE
197 CALL rpa_work_type_create(rpa_grad%rpa_work, fm_mat_q, fm_mat_s, homo, virtual)
198 END IF
199
200 ! Set blocksize
201 CALL cp_fm_struct_get(fm_mat_s(1)%matrix_struct, nrow_local=nrow_local)
202 IF (mp2_env%ri_grad%dot_blksize < 1) mp2_env%ri_grad%dot_blksize = nrow_local
203 mp2_env%ri_grad%dot_blksize = min(mp2_env%ri_grad%dot_blksize, nrow_local)
204 IF (unit_nr > 0) THEN
205 WRITE (unit_nr, '(T3,A,T75,I6)') 'GRAD_INFO| Block size for the contraction:', mp2_env%ri_grad%dot_blksize
206 CALL m_flush(unit_nr)
207 END IF
208 CALL fm_mat_s(1)%matrix_struct%para_env%sync()
209
210 CALL timestop(handle)
211
212 END SUBROUTINE rpa_grad_create
213
214! **************************************************************************************************
215!> \brief ...
216!> \param sos_mp2_work_occ ...
217!> \param sos_mp2_work_virt ...
218!> \param unit_nr ...
219!> \param Eigenval ...
220!> \param homo ...
221!> \param virtual ...
222!> \param eps_degenerate ...
223!> \param fm_mat_S ...
224! **************************************************************************************************
225 SUBROUTINE sos_mp2_work_type_create(sos_mp2_work_occ, sos_mp2_work_virt, unit_nr, &
226 Eigenval, homo, virtual, eps_degenerate, fm_mat_S)
227 TYPE(sos_mp2_grad_work_type), ALLOCATABLE, &
228 DIMENSION(:), INTENT(OUT) :: sos_mp2_work_occ, sos_mp2_work_virt
229 INTEGER, INTENT(IN) :: unit_nr
230 REAL(kind=dp), DIMENSION(:, :), INTENT(IN) :: eigenval
231 INTEGER, DIMENSION(:), INTENT(IN) :: homo, virtual
232 REAL(kind=dp), INTENT(IN) :: eps_degenerate
233 TYPE(cp_fm_type), DIMENSION(:), INTENT(IN) :: fm_mat_s
234
235 CHARACTER(LEN=*), PARAMETER :: routinen = 'sos_mp2_work_type_create'
236
237 INTEGER :: handle, ispin, nspins
238
239 CALL timeset(routinen, handle)
240
241 nspins = SIZE(fm_mat_s)
242 ALLOCATE (sos_mp2_work_occ(nspins), sos_mp2_work_virt(nspins))
243 DO ispin = 1, nspins
244
245 CALL create_list_nearly_degen_pairs(eigenval(1:homo(ispin), ispin), &
246 eps_degenerate, sos_mp2_work_occ(ispin)%pair_list)
247 IF (unit_nr > 0) WRITE (unit_nr, "(T3,A,T75,i6)") &
248 "MO_INFO| Number of ij pairs below EPS_CANONICAL:", SIZE(sos_mp2_work_occ(ispin)%pair_list, 2)
249 ALLOCATE (sos_mp2_work_occ(ispin)%P(homo(ispin) + SIZE(sos_mp2_work_occ(ispin)%pair_list, 2)))
250 sos_mp2_work_occ(ispin)%P = 0.0_dp
251 CALL prepare_comm_pij(sos_mp2_work_occ(ispin), virtual(ispin), fm_mat_s(ispin))
252
253 CALL create_list_nearly_degen_pairs(eigenval(homo(ispin) + 1:, ispin), &
254 eps_degenerate, sos_mp2_work_virt(ispin)%pair_list)
255 IF (unit_nr > 0) WRITE (unit_nr, "(T3,A,T75,i6)") &
256 "MO_INFO| Number of ab pairs below EPS_CANONICAL:", SIZE(sos_mp2_work_virt(ispin)%pair_list, 2)
257 ALLOCATE (sos_mp2_work_virt(ispin)%P(virtual(ispin) + SIZE(sos_mp2_work_virt(ispin)%pair_list, 2)))
258 sos_mp2_work_virt(ispin)%P = 0.0_dp
259 CALL prepare_comm_pab(sos_mp2_work_virt(ispin), virtual(ispin), fm_mat_s(ispin))
260 END DO
261
262 CALL timestop(handle)
263
264 END SUBROUTINE
265
266! **************************************************************************************************
267!> \brief ...
268!> \param rpa_work ...
269!> \param fm_mat_Q ...
270!> \param fm_mat_S ...
271!> \param homo ...
272!> \param virtual ...
273! **************************************************************************************************
274 SUBROUTINE rpa_work_type_create(rpa_work, fm_mat_Q, fm_mat_S, homo, virtual)
275 TYPE(rpa_grad_work_type), INTENT(OUT) :: rpa_work
276 TYPE(cp_fm_type), INTENT(IN) :: fm_mat_q
277 TYPE(cp_fm_type), DIMENSION(:), INTENT(IN) :: fm_mat_s
278 INTEGER, DIMENSION(:), INTENT(IN) :: homo, virtual
279
280 CHARACTER(LEN=*), PARAMETER :: routinen = 'rpa_work_type_create'
281
282 INTEGER :: avirt, col_global, col_local, handle, iocc, ispin, my_a, my_a_end, my_a_size, &
283 my_a_start, my_i, my_i_end, my_i_size, my_i_start, my_pcol, ncol_local, nspins, &
284 num_pe_col, proc_homo, proc_homo_send, proc_recv, proc_send, proc_virtual, &
285 proc_virtual_send
286 INTEGER, ALLOCATABLE, DIMENSION(:) :: data2recv, data2send
287 INTEGER, DIMENSION(:), POINTER :: col_indices
288
289 CALL timeset(routinen, handle)
290
291 CALL cp_fm_create(rpa_work%fm_mat_Q_copy, matrix_struct=fm_mat_q%matrix_struct)
292
293 CALL fm_mat_s(1)%matrix_struct%context%get(number_of_process_columns=num_pe_col, my_process_column=my_pcol)
294
295 nspins = SIZE(fm_mat_s)
296
297 ALLOCATE (rpa_work%index2send(0:num_pe_col - 1, nspins), &
298 rpa_work%index2recv(0:num_pe_col - 1, nspins), &
299 rpa_work%gd_homo(nspins), rpa_work%gd_virtual(nspins), &
300 data2send(0:num_pe_col - 1), data2recv(0:num_pe_col - 1), &
301 rpa_work%P_ij(nspins), rpa_work%P_ab(nspins))
302
303 ! Determine new process grid
304 proc_homo = max(1, ceiling(sqrt(real(num_pe_col, kind=dp))))
305 DO WHILE (mod(num_pe_col, proc_homo) /= 0)
306 proc_homo = proc_homo - 1
307 END DO
308 proc_virtual = num_pe_col/proc_homo
309
310 rpa_work%grid(1) = proc_virtual
311 rpa_work%grid(2) = proc_homo
312
313 rpa_work%mepos(1) = mod(my_pcol, proc_virtual)
314 rpa_work%mepos(2) = my_pcol/proc_virtual
315
316 DO ispin = 1, nspins
317
318 ! Determine distributions of the orbitals
319 CALL create_group_dist(rpa_work%gd_homo(ispin), proc_homo, homo(ispin))
320 CALL create_group_dist(rpa_work%gd_virtual(ispin), proc_virtual, virtual(ispin))
321
322 CALL cp_fm_struct_get(fm_mat_s(ispin)%matrix_struct, ncol_local=ncol_local, col_indices=col_indices)
323
324 data2send = 0
325 ! Count the amount of data2send to each process
326 DO col_local = 1, ncol_local
327 col_global = col_indices(col_local)
328
329 iocc = (col_global - 1)/virtual(ispin) + 1
330 avirt = col_global - (iocc - 1)*virtual(ispin)
331
332 proc_homo_send = group_dist_proc(rpa_work%gd_homo(ispin), iocc)
333 proc_virtual_send = group_dist_proc(rpa_work%gd_virtual(ispin), avirt)
334
335 proc_send = proc_homo_send*proc_virtual + proc_virtual_send
336
337 data2send(proc_send) = data2send(proc_send) + 1
338 END DO
339
340 DO proc_send = 0, num_pe_col - 1
341 ALLOCATE (rpa_work%index2send(proc_send, ispin)%array(data2send(proc_send)))
342 END DO
343
344 ! Prepare the indices
345 data2send = 0
346 DO col_local = 1, ncol_local
347 col_global = col_indices(col_local)
348
349 iocc = (col_global - 1)/virtual(ispin) + 1
350 avirt = col_global - (iocc - 1)*virtual(ispin)
351
352 proc_homo_send = group_dist_proc(rpa_work%gd_homo(ispin), iocc)
353 proc_virtual_send = group_dist_proc(rpa_work%gd_virtual(ispin), avirt)
354
355 proc_send = proc_homo_send*proc_virtual + proc_virtual_send
356
357 data2send(proc_send) = data2send(proc_send) + 1
358
359 rpa_work%index2send(proc_send, ispin)%array(data2send(proc_send)) = col_local
360 END DO
361
362 ! Count the amount of data2recv from each process
363 CALL get_group_dist(rpa_work%gd_homo(ispin), my_pcol/proc_virtual, my_i_start, my_i_end, my_i_size)
364 CALL get_group_dist(rpa_work%gd_virtual(ispin), mod(my_pcol, proc_virtual), my_a_start, my_a_end, my_a_size)
365
366 data2recv = 0
367 DO my_i = my_i_start, my_i_end
368 DO my_a = my_a_start, my_a_end
369 proc_recv = fm_mat_s(ispin)%matrix_struct%g2p_col((my_i - 1)*virtual(ispin) + my_a)
370 data2recv(proc_recv) = data2recv(proc_recv) + 1
371 END DO
372 END DO
373
374 DO proc_recv = 0, num_pe_col - 1
375 ALLOCATE (rpa_work%index2recv(proc_recv, ispin)%array(2, data2recv(proc_recv)))
376 END DO
377
378 data2recv = 0
379 DO my_i = my_i_start, my_i_end
380 DO my_a = my_a_start, my_a_end
381 proc_recv = fm_mat_s(ispin)%matrix_struct%g2p_col((my_i - 1)*virtual(ispin) + my_a)
382 data2recv(proc_recv) = data2recv(proc_recv) + 1
383
384 rpa_work%index2recv(proc_recv, ispin)%array(2, data2recv(proc_recv)) = my_i - my_i_start + 1
385 rpa_work%index2recv(proc_recv, ispin)%array(1, data2recv(proc_recv)) = my_a - my_a_start + 1
386 END DO
387 END DO
388
389 ALLOCATE (rpa_work%P_ij(ispin)%array(my_i_size, homo(ispin)), &
390 rpa_work%P_ab(ispin)%array(my_a_size, virtual(ispin)))
391 rpa_work%P_ij(ispin)%array(:, :) = 0.0_dp
392 rpa_work%P_ab(ispin)%array(:, :) = 0.0_dp
393
394 END DO
395
396 DEALLOCATE (data2send, data2recv)
397
398 CALL timestop(handle)
399
400 END SUBROUTINE
401
402! **************************************************************************************************
403!> \brief ...
404!> \param Eigenval ...
405!> \param eps_degen ...
406!> \param pair_list ...
407! **************************************************************************************************
408 SUBROUTINE create_list_nearly_degen_pairs(Eigenval, eps_degen, pair_list)
409 REAL(kind=dp), DIMENSION(:), INTENT(IN) :: eigenval
410 REAL(kind=dp), INTENT(IN) :: eps_degen
411 INTEGER, ALLOCATABLE, DIMENSION(:, :), INTENT(OUT) :: pair_list
412
413 INTEGER :: my_i, my_j, num_orbitals, num_pairs, &
414 pair_counter
415
416 num_orbitals = SIZE(eigenval)
417
418! Determine number of nearly degenerate orbital pairs
419! Trivial cases: diagonal elements
420 num_pairs = 0
421 DO my_i = 1, num_orbitals
422 DO my_j = 1, num_orbitals
423 IF (my_i == my_j) cycle
424 IF (abs(eigenval(my_i) - eigenval(my_j)) < eps_degen) num_pairs = num_pairs + 1
425 END DO
426 END DO
427 ALLOCATE (pair_list(2, num_pairs))
428
429! Print the required pairs
430 pair_counter = 1
431 DO my_i = 1, num_orbitals
432 DO my_j = 1, num_orbitals
433 IF (my_i == my_j) cycle
434 IF (abs(eigenval(my_i) - eigenval(my_j)) < eps_degen) THEN
435 pair_list(1, pair_counter) = my_i
436 pair_list(2, pair_counter) = my_j
437 pair_counter = pair_counter + 1
438 END IF
439 END DO
440 END DO
441
442 END SUBROUTINE create_list_nearly_degen_pairs
443
444! **************************************************************************************************
445!> \brief ...
446!> \param sos_mp2_work ...
447!> \param virtual ...
448!> \param fm_mat_S ...
449! **************************************************************************************************
450 SUBROUTINE prepare_comm_pij(sos_mp2_work, virtual, fm_mat_S)
451 TYPE(sos_mp2_grad_work_type), INTENT(INOUT) :: sos_mp2_work
452 INTEGER, INTENT(IN) :: virtual
453 TYPE(cp_fm_type), INTENT(IN) :: fm_mat_s
454
455 CHARACTER(LEN=*), PARAMETER :: routinen = 'prepare_comm_Pij'
456
457 INTEGER :: avirt, col_global, col_local, counter, handle, ij_counter, iocc, my_i, my_j, &
458 my_pcol, my_prow, ncol_local, nrow_local, num_ij_pairs, num_pe_col, pcol, pcol_recv, &
459 pcol_send, proc_shift, tag
460 INTEGER, ALLOCATABLE, DIMENSION(:) :: data2recv, data2send
461 INTEGER, DIMENSION(:), POINTER :: col_indices, ncol_locals
462 INTEGER, DIMENSION(:, :), POINTER :: blacs2mpi
463 TYPE(cp_blacs_env_type), POINTER :: context
464 TYPE(mp_comm_type) :: comm_exchange
465 TYPE(mp_para_env_type), POINTER :: para_env
466
467 CALL timeset(routinen, handle)
468
469 tag = 44
470
471 CALL fm_mat_s%matrix_struct%context%get(number_of_process_columns=num_pe_col)
472 ALLOCATE (sos_mp2_work%index2send(0:num_pe_col - 1), &
473 sos_mp2_work%index2recv(0:num_pe_col - 1))
474
475 ALLOCATE (data2send(0:num_pe_col - 1))
476 ALLOCATE (data2recv(0:num_pe_col - 1))
477
478 CALL cp_fm_struct_get(fm_mat_s%matrix_struct, para_env=para_env, ncol_locals=ncol_locals, &
479 ncol_local=ncol_local, col_indices=col_indices, &
480 context=context, nrow_local=nrow_local)
481 CALL context%get(my_process_row=my_prow, my_process_column=my_pcol, &
482 blacs2mpi=blacs2mpi)
483
484 num_ij_pairs = SIZE(sos_mp2_work%pair_list, 2)
485
486 IF (num_ij_pairs > 0) THEN
487
488 CALL comm_exchange%from_split(para_env, my_prow)
489
490 data2send = 0
491 data2recv = 0
492
493 DO proc_shift = 0, num_pe_col - 1
494 pcol_send = modulo(my_pcol + proc_shift, num_pe_col)
495
496 counter = 0
497 DO col_local = 1, ncol_local
498 col_global = col_indices(col_local)
499
500 iocc = max(1, col_global - 1)/virtual + 1
501 avirt = col_global - (iocc - 1)*virtual
502
503 DO ij_counter = 1, num_ij_pairs
504
505 my_i = sos_mp2_work%pair_list(1, ij_counter)
506 my_j = sos_mp2_work%pair_list(2, ij_counter)
507
508 IF (iocc /= my_j) cycle
509 pcol = fm_mat_s%matrix_struct%g2p_col((my_i - 1)*virtual + avirt)
510 IF (pcol /= pcol_send) cycle
511
512 counter = counter + 1
513
514 EXIT
515
516 END DO
517 END DO
518 data2send(pcol_send) = counter
519 END DO
520
521 CALL comm_exchange%alltoall(data2send, data2recv, 1)
522
523 DO proc_shift = 0, num_pe_col - 1
524 pcol_send = modulo(my_pcol + proc_shift, num_pe_col)
525 pcol_recv = modulo(my_pcol - proc_shift, num_pe_col)
526
527 ! Collect indices and exchange
528 ALLOCATE (sos_mp2_work%index2send(pcol_send)%array(data2send(pcol_send)))
529
530 counter = 0
531 DO col_local = 1, ncol_local
532 col_global = col_indices(col_local)
533
534 iocc = max(1, col_global - 1)/virtual + 1
535 avirt = col_global - (iocc - 1)*virtual
536
537 DO ij_counter = 1, num_ij_pairs
538
539 my_i = sos_mp2_work%pair_list(1, ij_counter)
540 my_j = sos_mp2_work%pair_list(2, ij_counter)
541
542 IF (iocc /= my_j) cycle
543 pcol = fm_mat_s%matrix_struct%g2p_col((my_i - 1)*virtual + avirt)
544 IF (pcol /= pcol_send) cycle
545
546 counter = counter + 1
547
548 sos_mp2_work%index2send(pcol_send)%array(counter) = col_global
549
550 EXIT
551
552 END DO
553 END DO
554
555 ALLOCATE (sos_mp2_work%index2recv(pcol_recv)%array(data2recv(pcol_recv)))
556 !
557 CALL para_env%sendrecv(sos_mp2_work%index2send(pcol_send)%array, blacs2mpi(my_prow, pcol_send), &
558 sos_mp2_work%index2recv(pcol_recv)%array, blacs2mpi(my_prow, pcol_recv), tag)
559
560 ! Convert to global coordinates to local coordinates as we always work with them
561 DO counter = 1, data2send(pcol_send)
562 sos_mp2_work%index2send(pcol_send)%array(counter) = &
563 fm_mat_s%matrix_struct%g2l_col(sos_mp2_work%index2send(pcol_send)%array(counter))
564 END DO
565 END DO
566
567 CALL comm_exchange%free()
568 END IF
569
570 DEALLOCATE (data2send, data2recv)
571
572 CALL timestop(handle)
573
574 END SUBROUTINE
575
576! **************************************************************************************************
577!> \brief ...
578!> \param sos_mp2_work ...
579!> \param virtual ...
580!> \param fm_mat_S ...
581! **************************************************************************************************
582 SUBROUTINE prepare_comm_pab(sos_mp2_work, virtual, fm_mat_S)
583 TYPE(sos_mp2_grad_work_type), INTENT(INOUT) :: sos_mp2_work
584 INTEGER, INTENT(IN) :: virtual
585 TYPE(cp_fm_type), INTENT(IN) :: fm_mat_s
586
587 CHARACTER(LEN=*), PARAMETER :: routinen = 'prepare_comm_Pab'
588
589 INTEGER :: ab_counter, avirt, col_global, col_local, counter, handle, iocc, my_a, my_b, &
590 my_pcol, my_prow, ncol_local, nrow_local, num_ab_pairs, num_pe_col, pcol, pcol_recv, &
591 pcol_send, proc_shift, tag
592 INTEGER, ALLOCATABLE, DIMENSION(:) :: data2recv, data2send
593 INTEGER, DIMENSION(:), POINTER :: col_indices, ncol_locals
594 INTEGER, DIMENSION(:, :), POINTER :: blacs2mpi
595 TYPE(cp_blacs_env_type), POINTER :: context
596 TYPE(mp_comm_type) :: comm_exchange
597 TYPE(mp_para_env_type), POINTER :: para_env
598
599 CALL timeset(routinen, handle)
600
601 tag = 44
602
603 CALL fm_mat_s%matrix_struct%context%get(number_of_process_columns=num_pe_col)
604 ALLOCATE (sos_mp2_work%index2send(0:num_pe_col - 1), &
605 sos_mp2_work%index2recv(0:num_pe_col - 1))
606
607 num_ab_pairs = SIZE(sos_mp2_work%pair_list, 2)
608 IF (num_ab_pairs > 0) THEN
609
610 CALL cp_fm_struct_get(fm_mat_s%matrix_struct, para_env=para_env, ncol_locals=ncol_locals, &
611 ncol_local=ncol_local, col_indices=col_indices, &
612 context=context, nrow_local=nrow_local)
613 CALL context%get(my_process_row=my_prow, my_process_column=my_pcol, &
614 blacs2mpi=blacs2mpi)
615
616 CALL comm_exchange%from_split(para_env, my_prow)
617
618 ALLOCATE (data2send(0:num_pe_col - 1))
619 ALLOCATE (data2recv(0:num_pe_col - 1))
620
621 data2send = 0
622 data2recv = 0
623 DO proc_shift = 0, num_pe_col - 1
624 pcol_send = modulo(my_pcol + proc_shift, num_pe_col)
625 pcol_recv = modulo(my_pcol - proc_shift, num_pe_col)
626
627 counter = 0
628 DO col_local = 1, ncol_local
629 col_global = col_indices(col_local)
630
631 iocc = max(1, col_global - 1)/virtual + 1
632 avirt = col_global - (iocc - 1)*virtual
633
634 DO ab_counter = 1, num_ab_pairs
635
636 my_a = sos_mp2_work%pair_list(1, ab_counter)
637 my_b = sos_mp2_work%pair_list(2, ab_counter)
638
639 IF (avirt /= my_b) cycle
640 pcol = fm_mat_s%matrix_struct%g2p_col((iocc - 1)*virtual + my_a)
641 IF (pcol /= pcol_send) cycle
642
643 counter = counter + 1
644
645 EXIT
646
647 END DO
648 END DO
649 data2send(pcol_send) = counter
650 END DO
651
652 CALL comm_exchange%alltoall(data2send, data2recv, 1)
653
654 DO proc_shift = 0, num_pe_col - 1
655 pcol_send = modulo(my_pcol + proc_shift, num_pe_col)
656 pcol_recv = modulo(my_pcol - proc_shift, num_pe_col)
657
658 ! Collect indices and exchange
659 ALLOCATE (sos_mp2_work%index2send(pcol_send)%array(data2send(pcol_send)))
660
661 counter = 0
662 DO col_local = 1, ncol_local
663 col_global = col_indices(col_local)
664
665 iocc = max(1, col_global - 1)/virtual + 1
666 avirt = col_global - (iocc - 1)*virtual
667
668 DO ab_counter = 1, num_ab_pairs
669
670 my_a = sos_mp2_work%pair_list(1, ab_counter)
671 my_b = sos_mp2_work%pair_list(2, ab_counter)
672
673 IF (avirt /= my_b) cycle
674 pcol = fm_mat_s%matrix_struct%g2p_col((iocc - 1)*virtual + my_a)
675 IF (pcol /= pcol_send) cycle
676
677 counter = counter + 1
678
679 sos_mp2_work%index2send(pcol_send)%array(counter) = col_global
680
681 EXIT
682
683 END DO
684 END DO
685
686 ALLOCATE (sos_mp2_work%index2recv(pcol_recv)%array(data2recv(pcol_recv)))
687 !
688 CALL para_env%sendrecv(sos_mp2_work%index2send(pcol_send)%array, blacs2mpi(my_prow, pcol_send), &
689 sos_mp2_work%index2recv(pcol_recv)%array, blacs2mpi(my_prow, pcol_recv), tag)
690
691 ! Convert to global coordinates to local coordinates as we always work with them
692 DO counter = 1, data2send(pcol_send)
693 sos_mp2_work%index2send(pcol_send)%array(counter) = &
694 fm_mat_s%matrix_struct%g2l_col(sos_mp2_work%index2send(pcol_send)%array(counter))
695 END DO
696 END DO
697
698 CALL comm_exchange%free()
699 DEALLOCATE (data2send, data2recv)
700
701 END IF
702
703 CALL timestop(handle)
704
705 END SUBROUTINE
706
707! **************************************************************************************************
708!> \brief ...
709!> \param fm_mat_Q ...
710!> \param rpa_grad ...
711! **************************************************************************************************
712 SUBROUTINE rpa_grad_copy_q(fm_mat_Q, rpa_grad)
713 TYPE(cp_fm_type), INTENT(IN) :: fm_mat_q
714 TYPE(rpa_grad_type), INTENT(INOUT) :: rpa_grad
715
716 CALL cp_fm_to_fm(fm_mat_q, rpa_grad%rpa_work%fm_mat_Q_copy)
717
718 END SUBROUTINE
719
720! **************************************************************************************************
721!> \brief ...
722!> \param mp2_env ...
723!> \param rpa_grad ...
724!> \param do_ri_sos_laplace_mp2 ...
725!> \param fm_mat_Q ...
726!> \param fm_mat_Q_gemm ...
727!> \param dgemm_counter ...
728!> \param fm_mat_S ...
729!> \param omega ...
730!> \param homo ...
731!> \param virtual ...
732!> \param Eigenval ...
733!> \param weight ...
734!> \param unit_nr ...
735! **************************************************************************************************
736 SUBROUTINE rpa_grad_matrix_operations(mp2_env, rpa_grad, do_ri_sos_laplace_mp2, fm_mat_Q, fm_mat_Q_gemm, &
737 dgemm_counter, fm_mat_S, omega, homo, virtual, Eigenval, weight, unit_nr)
738 TYPE(mp2_type), INTENT(INOUT) :: mp2_env
739 TYPE(rpa_grad_type), INTENT(INOUT) :: rpa_grad
740 LOGICAL, INTENT(IN) :: do_ri_sos_laplace_mp2
741 TYPE(cp_fm_type), DIMENSION(:), INTENT(IN) :: fm_mat_q, fm_mat_q_gemm
742 TYPE(dgemm_counter_type), INTENT(INOUT) :: dgemm_counter
743 TYPE(cp_fm_type), DIMENSION(:), INTENT(IN) :: fm_mat_s
744 REAL(kind=dp), INTENT(IN) :: omega
745 INTEGER, DIMENSION(:), INTENT(IN) :: homo, virtual
746 REAL(kind=dp), DIMENSION(:, :), INTENT(IN) :: eigenval
747 REAL(kind=dp), INTENT(IN) :: weight
748 INTEGER, INTENT(IN) :: unit_nr
749
750 CHARACTER(LEN=*), PARAMETER :: routinen = 'rpa_grad_matrix_operations'
751
752 INTEGER :: col_global, col_local, dimen_ia, &
753 dimen_ri, handle, handle2, ispin, &
754 jspin, ncol_local, nrow_local, nspins, &
755 row_local
756 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
757 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :), &
758 TARGET :: mat_s_3d, mat_work_iap_3d
759 TYPE(cp_fm_type) :: fm_work_iap, fm_work_pq
760
761 CALL timeset(routinen, handle)
762
763 nspins = SIZE(fm_mat_q)
764
765 CALL cp_fm_get_info(fm_mat_q(1), nrow_global=dimen_ri, nrow_local=nrow_local, ncol_local=ncol_local, &
766 col_indices=col_indices, row_indices=row_indices)
767
768 IF (.NOT. do_ri_sos_laplace_mp2) THEN
769 CALL cp_fm_create(fm_work_pq, fm_mat_q(1)%matrix_struct)
770
771 ! calculate [1+Q(iw')]^-1
772 CALL cp_fm_cholesky_invert(fm_mat_q(1))
773 ! symmetrize the result, fm_work_PQ is only a work matrix
774 CALL cp_fm_uplo_to_full(fm_mat_q(1), fm_work_pq)
775
776 CALL cp_fm_release(fm_work_pq)
777
778 DO col_local = 1, ncol_local
779 col_global = col_indices(col_local)
780 DO row_local = 1, nrow_local
781 IF (col_global == row_indices(row_local)) THEN
782 fm_mat_q(1)%local_data(row_local, col_local) = fm_mat_q(1)%local_data(row_local, col_local) - 1.0_dp
783 EXIT
784 END IF
785 END DO
786 END DO
787
788 CALL timeset(routinen//"_PQ", handle2)
789 CALL dgemm_counter_start(dgemm_counter)
790 CALL parallel_gemm(transa="N", transb="N", m=dimen_ri, n=dimen_ri, k=dimen_ri, alpha=weight, &
791 matrix_a=rpa_grad%rpa_work%fm_mat_Q_copy, matrix_b=fm_mat_q(1), beta=1.0_dp, &
792 matrix_c=rpa_grad%fm_Gamma_PQ)
793 CALL dgemm_counter_stop(dgemm_counter, dimen_ri, dimen_ri, dimen_ri)
794 CALL timestop(handle2)
795
796 CALL cp_fm_to_fm_submat_general(fm_mat_q(1), fm_mat_q_gemm(1), dimen_ri, dimen_ri, 1, 1, 1, 1, &
797 fm_mat_q_gemm(1)%matrix_struct%context)
798 END IF
799
800 DO ispin = 1, nspins
801 IF (do_ri_sos_laplace_mp2) THEN
802 ! The spin of the other Q matrix is always the other spin
803 jspin = nspins - ispin + 1
804 ELSE
805 ! or the first matrix in the case of RPA
806 jspin = 1
807 END IF
808
809 IF (do_ri_sos_laplace_mp2) THEN
810 CALL timeset(routinen//"_PQ", handle2)
811 CALL dgemm_counter_start(dgemm_counter)
812 CALL parallel_gemm(transa="N", transb="N", m=dimen_ri, n=dimen_ri, k=dimen_ri, alpha=weight, &
813 matrix_a=fm_mat_q(ispin), matrix_b=fm_mat_q(jspin), beta=1.0_dp, &
814 matrix_c=rpa_grad%fm_Gamma_PQ)
815 CALL dgemm_counter_stop(dgemm_counter, dimen_ri, dimen_ri, dimen_ri)
816 CALL timestop(handle2)
817
818 CALL cp_fm_to_fm_submat_general(fm_mat_q(jspin), fm_mat_q_gemm(jspin), dimen_ri, dimen_ri, 1, 1, 1, 1, &
819 fm_mat_q_gemm(jspin)%matrix_struct%context)
820 ELSE
821 CALL calc_fm_mat_s_rpa(fm_mat_s(ispin), .true., virtual(ispin), eigenval(:, ispin), &
822 homo(ispin), omega, 0.0_dp)
823 END IF
824
825 CALL timeset(routinen//"_contr_S", handle2)
826 CALL cp_fm_create(fm_work_iap, rpa_grad%fm_Y(ispin)%matrix_struct)
827
828 CALL cp_fm_get_info(fm_mat_s(ispin), ncol_global=dimen_ia)
829
830 CALL dgemm_counter_start(dgemm_counter)
831 CALL parallel_gemm(transa="N", transb="N", m=dimen_ri, n=dimen_ia, k=dimen_ri, alpha=1.0_dp, &
832 matrix_a=fm_mat_q_gemm(jspin), matrix_b=fm_mat_s(ispin), beta=0.0_dp, &
833 matrix_c=fm_work_iap)
834 CALL dgemm_counter_stop(dgemm_counter, dimen_ia, dimen_ri, dimen_ri)
835 CALL timestop(handle2)
836
837 IF (do_ri_sos_laplace_mp2) THEN
838 CALL calc_p_sos_mp2(homo(ispin), fm_mat_s(ispin), fm_work_iap, &
839 rpa_grad%sos_mp2_work_occ(ispin), rpa_grad%sos_mp2_work_virt(ispin), &
840 omega, weight, virtual(ispin), eigenval(:, ispin), mp2_env%ri_grad%dot_blksize)
841
842 CALL calc_fm_mat_s_laplace(fm_work_iap, homo(ispin), virtual(ispin), eigenval(:, ispin), omega)
843
844 CALL cp_fm_scale_and_add(1.0_dp, rpa_grad%fm_Y(ispin), -weight, fm_work_iap)
845
846 CALL cp_fm_release(fm_work_iap)
847 ELSE
848 ! To save memory, we add it now
849 CALL cp_fm_scale_and_add(1.0_dp, rpa_grad%fm_Y(ispin), -weight, fm_work_iap)
850
851 ! Redistribute both matrices and deallocate fm_work_iaP
852 CALL redistribute_fm_mat_s(rpa_grad%rpa_work%index2send(:, ispin), rpa_grad%rpa_work%index2recv(:, ispin), &
853 fm_work_iap, mat_work_iap_3d, &
854 rpa_grad%rpa_work%gd_homo(ispin), rpa_grad%rpa_work%gd_virtual(ispin), &
855 rpa_grad%rpa_work%mepos)
856 CALL cp_fm_release(fm_work_iap)
857
858 CALL redistribute_fm_mat_s(rpa_grad%rpa_work%index2send(:, ispin), rpa_grad%rpa_work%index2recv(:, ispin), &
859 fm_mat_s(ispin), mat_s_3d, &
860 rpa_grad%rpa_work%gd_homo(ispin), rpa_grad%rpa_work%gd_virtual(ispin), &
861 rpa_grad%rpa_work%mepos)
862
863 ! Now collect the density matrix
864 CALL calc_p_rpa(mat_s_3d, mat_work_iap_3d, rpa_grad%rpa_work%gd_homo(ispin), rpa_grad%rpa_work%gd_virtual(ispin), &
865 rpa_grad%rpa_work%grid, rpa_grad%rpa_work%mepos, &
866 fm_mat_s(ispin)%matrix_struct, &
867 rpa_grad%rpa_work%P_ij(ispin)%array, rpa_grad%rpa_work%P_ab(ispin)%array, &
868 weight, omega, eigenval(:, ispin), homo(ispin), unit_nr, mp2_env)
869
870 DEALLOCATE (mat_work_iap_3d, mat_s_3d)
871
872 CALL remove_scaling_factor_rpa(fm_mat_s(ispin), virtual(ispin), eigenval(:, ispin), homo(ispin), omega)
873
874 END IF
875
876 END DO
877
878 CALL timestop(handle)
879
880 END SUBROUTINE
881
882! **************************************************************************************************
883!> \brief ...
884!> \param homo ...
885!> \param fm_mat_S ...
886!> \param fm_work_iaP ...
887!> \param sos_mp2_work_occ ...
888!> \param sos_mp2_work_virt ...
889!> \param omega ...
890!> \param weight ...
891!> \param virtual ...
892!> \param Eigenval ...
893!> \param dot_blksize ...
894! **************************************************************************************************
895 SUBROUTINE calc_p_sos_mp2(homo, fm_mat_S, fm_work_iaP, sos_mp2_work_occ, sos_mp2_work_virt, &
896 omega, weight, virtual, Eigenval, dot_blksize)
897 INTEGER, INTENT(IN) :: homo
898 TYPE(cp_fm_type), INTENT(IN) :: fm_mat_s, fm_work_iap
899 TYPE(sos_mp2_grad_work_type), INTENT(INOUT) :: sos_mp2_work_occ, sos_mp2_work_virt
900 REAL(kind=dp), INTENT(IN) :: omega, weight
901 INTEGER, INTENT(IN) :: virtual
902 REAL(kind=dp), DIMENSION(:), INTENT(IN) :: eigenval
903 INTEGER, INTENT(IN) :: dot_blksize
904
905 CHARACTER(LEN=*), PARAMETER :: routinen = 'calc_P_sos_mp2'
906
907 INTEGER :: avirt, col_global, col_local, handle, &
908 handle2, iocc, my_a, my_i, ncol_local, &
909 nrow_local, num_ab_pairs, num_ij_pairs
910 INTEGER, DIMENSION(:), POINTER :: col_indices
911 REAL(kind=dp) :: ddot, trace
912
913 CALL timeset(routinen, handle)
914
915 CALL cp_fm_get_info(fm_mat_s, col_indices=col_indices, ncol_local=ncol_local, nrow_local=nrow_local)
916
917 CALL timeset(routinen//"_Pij_diag", handle2)
918 DO my_i = 1, homo
919 ! Collect the contributions of the matrix elements
920
921 trace = 0.0_dp
922
923 DO col_local = 1, ncol_local
924 col_global = col_indices(col_local)
925
926 iocc = max(1, col_global - 1)/virtual + 1
927 avirt = col_global - (iocc - 1)*virtual
928
929 IF (iocc == my_i) trace = trace + &
930 ddot(nrow_local, fm_mat_s%local_data(:, col_local), 1, fm_work_iap%local_data(:, col_local), 1)
931 END DO
932
933 sos_mp2_work_occ%P(my_i) = sos_mp2_work_occ%P(my_i) - trace*omega*weight
934
935 END DO
936 CALL timestop(handle2)
937
938 CALL timeset(routinen//"_Pab_diag", handle2)
939 DO my_a = 1, virtual
940 ! Collect the contributions of the matrix elements
941
942 trace = 0.0_dp
943
944 DO col_local = 1, ncol_local
945 col_global = col_indices(col_local)
946
947 iocc = max(1, col_global - 1)/virtual + 1
948 avirt = col_global - (iocc - 1)*virtual
949
950 IF (avirt == my_a) trace = trace + &
951 ddot(nrow_local, fm_mat_s%local_data(:, col_local), 1, fm_work_iap%local_data(:, col_local), 1)
952 END DO
953
954 sos_mp2_work_virt%P(my_a) = sos_mp2_work_virt%P(my_a) + trace*omega*weight
955
956 END DO
957 CALL timestop(handle2)
958
959 ! Loop over list and carry out operations
960 num_ij_pairs = SIZE(sos_mp2_work_occ%pair_list, 2)
961 num_ab_pairs = SIZE(sos_mp2_work_virt%pair_list, 2)
962 IF (num_ij_pairs > 0) THEN
963 CALL calc_pij_degen(fm_work_iap, fm_mat_s, sos_mp2_work_occ%pair_list, &
964 virtual, sos_mp2_work_occ%P(homo + 1:), eigenval(:homo), omega, weight, &
965 sos_mp2_work_occ%index2send, sos_mp2_work_occ%index2recv, dot_blksize)
966 END IF
967 IF (num_ab_pairs > 0) THEN
968 CALL calc_pab_degen(fm_work_iap, fm_mat_s, sos_mp2_work_virt%pair_list, &
969 virtual, sos_mp2_work_virt%P(virtual + 1:), eigenval(homo + 1:), omega, weight, &
970 sos_mp2_work_virt%index2send, sos_mp2_work_virt%index2recv, dot_blksize)
971 END IF
972
973 CALL timestop(handle)
974
975 END SUBROUTINE
976
977! **************************************************************************************************
978!> \brief ...
979!> \param mat_S_1D ...
980!> \param mat_work_iaP_3D ...
981!> \param gd_homo ...
982!> \param gd_virtual ...
983!> \param grid ...
984!> \param mepos ...
985!> \param fm_struct_S ...
986!> \param P_ij ...
987!> \param P_ab ...
988!> \param weight ...
989!> \param omega ...
990!> \param Eigenval ...
991!> \param homo ...
992!> \param unit_nr ...
993!> \param mp2_env ...
994! **************************************************************************************************
995 SUBROUTINE calc_p_rpa(mat_S_1D, mat_work_iaP_3D, gd_homo, gd_virtual, grid, mepos, &
996 fm_struct_S, P_ij, P_ab, weight, omega, Eigenval, homo, unit_nr, mp2_env)
997 REAL(kind=dp), DIMENSION(*), INTENT(INOUT), TARGET :: mat_s_1d
998 REAL(kind=dp), DIMENSION(:, :, :), INTENT(INOUT) :: mat_work_iap_3d
999 TYPE(group_dist_d1_type), INTENT(IN) :: gd_homo, gd_virtual
1000 INTEGER, DIMENSION(2), INTENT(IN) :: grid, mepos
1001 TYPE(cp_fm_struct_type), INTENT(IN), POINTER :: fm_struct_s
1002 REAL(kind=dp), DIMENSION(:, :) :: p_ij, p_ab
1003 REAL(kind=dp), INTENT(IN) :: weight, omega
1004 REAL(kind=dp), DIMENSION(:), INTENT(IN) :: eigenval
1005 INTEGER, INTENT(IN) :: homo, unit_nr
1006 TYPE(mp2_type), INTENT(INOUT) :: mp2_env
1007
1008 CHARACTER(LEN=*), PARAMETER :: routinen = 'calc_P_rpa'
1009
1010 INTEGER :: completed, handle, handle2, my_a_end, my_a_size, my_a_start, my_i_end, my_i_size, &
1011 my_i_start, my_p_size, my_prow, number_of_parallel_channels, proc_a_recv, proc_a_send, &
1012 proc_i_recv, proc_i_send, proc_recv, proc_send, proc_shift, recv_a_end, recv_a_size, &
1013 recv_a_start, recv_i_end, recv_i_size, recv_i_start, tag
1014 INTEGER(KIND=int_8) :: mem, number_of_elements_per_blk
1015 INTEGER, ALLOCATABLE, DIMENSION(:) :: procs_recv
1016 INTEGER, DIMENSION(:, :), POINTER :: blacs2mpi
1017 REAL(kind=dp) :: mem_per_block, mem_real
1018 REAL(kind=dp), ALLOCATABLE, DIMENSION(:), TARGET :: buffer_compens_1d
1019 REAL(kind=dp), DIMENSION(:, :, :), POINTER :: mat_s_3d
1020 TYPE(cp_1d_r_cp_type), ALLOCATABLE, DIMENSION(:) :: buffer_1d
1021 TYPE(cp_3d_r_cp_type), ALLOCATABLE, DIMENSION(:) :: buffer_3d
1022 TYPE(mp_para_env_type), POINTER :: para_env
1023 TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:) :: recv_requests, send_requests
1024
1025 CALL timeset(routinen, handle)
1026
1027 ! We allocate it at every step to reduce potential memory conflicts with COSMA
1028 IF (mp2_env%ri_grad%dot_blksize >= blksize_threshold) THEN
1029 CALL mp2_env%local_gemm_ctx%create(local_gemm_pu_gpu)
1030 CALL mp2_env%local_gemm_ctx%set_op_threshold_gpu(spla_threshold)
1031 END IF
1032
1033 tag = 47
1034
1035 my_p_size = SIZE(mat_work_iap_3d, 1)
1036
1037 CALL cp_fm_struct_get(fm_struct_s, para_env=para_env)
1038 CALL fm_struct_s%context%get(my_process_row=my_prow, blacs2mpi=blacs2mpi, para_env=para_env)
1039
1040 CALL get_group_dist(gd_virtual, mepos(1), my_a_start, my_a_end, my_a_size)
1041 CALL get_group_dist(gd_homo, mepos(2), my_i_start, my_i_end, my_i_size)
1042
1043 ! We have to remap the indices because mp_sendrecv requires a 3D array (because of mat_work_iaP_3D)
1044 ! and dgemm requires 2D arrays
1045 ! Fortran 2008 does allow pointer remapping independently of the ranks but GCC 7 does not properly support it
1046 mat_s_3d(1:my_p_size, 1:my_a_size, 1:my_i_size) => mat_s_1d(1:int(my_p_size, int_8)*my_a_size*my_i_size)
1047
1048 number_of_elements_per_blk = max(int(maxsize(gd_homo), kind=int_8)*my_a_size, &
1049 int(maxsize(gd_virtual), kind=int_8)*my_i_size)*my_p_size
1050
1051 ! Determine the available memory and estimate the number of possible parallel communication channels
1052 CALL m_memory(mem)
1053 mem_real = real(mem, kind=dp)
1054 mem_per_block = real(number_of_elements_per_blk, kind=dp)*8.0_dp
1055 number_of_parallel_channels = max(1, min(maxval(grid) - 1, floor(mem_real/mem_per_block)))
1056 CALL para_env%min(number_of_parallel_channels)
1057 IF (mp2_env%ri_grad%max_parallel_comm > 0) &
1058 number_of_parallel_channels = min(number_of_parallel_channels, mp2_env%ri_grad%max_parallel_comm)
1059
1060 IF (unit_nr > 0) THEN
1061 WRITE (unit_nr, '(T3,A,T75,I6)') 'GRAD_INFO| Number of parallel communication channels:', number_of_parallel_channels
1062 CALL m_flush(unit_nr)
1063 END IF
1064 CALL para_env%sync()
1065
1066 ALLOCATE (buffer_1d(number_of_parallel_channels))
1067 DO proc_shift = 1, number_of_parallel_channels
1068 ALLOCATE (buffer_1d(proc_shift)%array(number_of_elements_per_blk))
1069 END DO
1070
1071 ALLOCATE (buffer_3d(number_of_parallel_channels))
1072
1073 ! Allocate buffers for vector version of kahan summation
1074 IF (mp2_env%ri_grad%dot_blksize >= blksize_threshold) THEN
1075 ALLOCATE (buffer_compens_1d(2*max(my_a_size*maxsize(gd_virtual), my_i_size*maxsize(gd_homo))))
1076 END IF
1077
1078 IF (number_of_parallel_channels > 1) THEN
1079 ALLOCATE (procs_recv(number_of_parallel_channels))
1080 ALLOCATE (recv_requests(number_of_parallel_channels))
1081 ALLOCATE (send_requests(maxval(grid) - 1))
1082 END IF
1083
1084 IF (number_of_parallel_channels > 1 .AND. grid(1) > 1) THEN
1085 CALL timeset(routinen//"_comm_a", handle2)
1086 recv_requests(:) = mp_request_null
1087 procs_recv(:) = -1
1088 DO proc_shift = 1, min(grid(1) - 1, number_of_parallel_channels)
1089 proc_a_recv = modulo(mepos(1) - proc_shift, grid(1))
1090 proc_recv = mepos(2)*grid(1) + proc_a_recv
1091
1092 CALL get_group_dist(gd_virtual, proc_a_recv, recv_a_start, recv_a_end, recv_a_size)
1093
1094 buffer_3d(proc_shift)%array(1:my_p_size, 1:recv_a_size, 1:my_i_size) => &
1095 buffer_1d(proc_shift)%array(1:int(my_p_size, kind=int_8)*recv_a_size*my_i_size)
1096
1097 CALL para_env%irecv(buffer_3d(proc_shift)%array, blacs2mpi(my_prow, proc_recv), &
1098 recv_requests(proc_shift), tag)
1099
1100 procs_recv(proc_shift) = proc_a_recv
1101 END DO
1102
1103 send_requests(:) = mp_request_null
1104 DO proc_shift = 1, grid(1) - 1
1105 proc_a_send = modulo(mepos(1) + proc_shift, grid(1))
1106 proc_send = mepos(2)*grid(1) + proc_a_send
1107
1108 CALL para_env%isend(mat_work_iap_3d, blacs2mpi(my_prow, proc_send), &
1109 send_requests(proc_shift), tag)
1110 END DO
1111 CALL timestop(handle2)
1112 END IF
1113
1114 CALL calc_p_rpa_a(p_ab(:, my_a_start:my_a_end), &
1115 mat_s_3d, mat_work_iap_3d, &
1116 mp2_env%ri_grad%dot_blksize, buffer_compens_1d, mp2_env%local_gemm_ctx, &
1117 eigenval(homo + my_a_start:homo + my_a_end), eigenval(my_i_start:my_i_end), &
1118 eigenval(homo + my_a_start:homo + my_a_end), omega, weight)
1119
1120 DO proc_shift = 1, grid(1) - 1
1121 CALL timeset(routinen//"_comm_a", handle2)
1122 IF (number_of_parallel_channels > 1) THEN
1123 CALL mp_waitany(recv_requests, completed)
1124
1125 CALL get_group_dist(gd_virtual, procs_recv(completed), recv_a_start, recv_a_end, recv_a_size)
1126 ELSE
1127 proc_a_send = modulo(mepos(1) + proc_shift, grid(1))
1128 proc_a_recv = modulo(mepos(1) - proc_shift, grid(1))
1129
1130 proc_send = mepos(2)*grid(1) + proc_a_send
1131 proc_recv = mepos(2)*grid(1) + proc_a_recv
1132
1133 CALL get_group_dist(gd_virtual, proc_a_recv, recv_a_start, recv_a_end, recv_a_size)
1134
1135 buffer_3d(1)%array(1:my_p_size, 1:recv_a_size, 1:my_i_size) => &
1136 buffer_1d(1)%array(1:int(my_p_size, kind=int_8)*recv_a_size*my_i_size)
1137
1138 CALL para_env%sendrecv(mat_work_iap_3d, blacs2mpi(my_prow, proc_send), &
1139 buffer_3d(1)%array, blacs2mpi(my_prow, proc_recv), tag)
1140 completed = 1
1141 END IF
1142 CALL timestop(handle2)
1143
1144 CALL calc_p_rpa_a(p_ab(:, recv_a_start:recv_a_end), &
1145 mat_s_3d, buffer_3d(completed)%array, &
1146 mp2_env%ri_grad%dot_blksize, buffer_compens_1d, mp2_env%local_gemm_ctx, &
1147 eigenval(homo + my_a_start:homo + my_a_end), eigenval(my_i_start:my_i_end), &
1148 eigenval(homo + recv_a_start:homo + recv_a_end), omega, weight)
1149
1150 IF (number_of_parallel_channels > 1 .AND. number_of_parallel_channels + proc_shift < grid(1)) THEN
1151 proc_a_recv = modulo(mepos(1) - proc_shift - number_of_parallel_channels, grid(1))
1152 proc_recv = mepos(2)*grid(1) + proc_a_recv
1153
1154 CALL get_group_dist(gd_virtual, proc_a_recv, recv_a_start, recv_a_end, recv_a_size)
1155
1156 buffer_3d(completed)%array(1:my_p_size, 1:recv_a_size, 1:my_i_size) => &
1157 buffer_1d(completed)%array(1:int(my_p_size, kind=int_8)*recv_a_size*my_i_size)
1158
1159 CALL para_env%irecv(buffer_3d(completed)%array, blacs2mpi(my_prow, proc_recv), &
1160 recv_requests(completed), tag)
1161
1162 procs_recv(completed) = proc_a_recv
1163 END IF
1164 END DO
1165
1166 IF (number_of_parallel_channels > 1 .AND. grid(1) > 1) THEN
1167 CALL mp_waitall(send_requests)
1168 END IF
1169
1170 IF (number_of_parallel_channels > 1 .AND. grid(2) > 1) THEN
1171 recv_requests(:) = mp_request_null
1172 procs_recv(:) = -1
1173 DO proc_shift = 1, min(grid(2) - 1, number_of_parallel_channels)
1174 proc_i_recv = modulo(mepos(2) - proc_shift, grid(2))
1175 proc_recv = proc_i_recv*grid(1) + mepos(1)
1176
1177 CALL get_group_dist(gd_homo, proc_i_recv, recv_i_start, recv_i_end, recv_i_size)
1178
1179 buffer_3d(proc_shift)%array(1:my_p_size, 1:my_a_size, 1:recv_i_size) => &
1180 buffer_1d(proc_shift)%array(1:int(my_p_size, kind=int_8)*my_a_size*recv_i_size)
1181
1182 CALL para_env%irecv(buffer_3d(proc_shift)%array, blacs2mpi(my_prow, proc_recv), &
1183 recv_requests(proc_shift), tag)
1184
1185 procs_recv(proc_shift) = proc_i_recv
1186 END DO
1187
1188 send_requests(:) = mp_request_null
1189 DO proc_shift = 1, grid(2) - 1
1190 proc_i_send = modulo(mepos(2) + proc_shift, grid(2))
1191 proc_send = proc_i_send*grid(1) + mepos(1)
1192
1193 CALL para_env%isend(mat_work_iap_3d, blacs2mpi(my_prow, proc_send), &
1194 send_requests(proc_shift), tag)
1195 END DO
1196 END IF
1197
1198 CALL calc_p_rpa_i(p_ij(:, my_i_start:my_i_end), &
1199 mat_s_3d, mat_work_iap_3d, &
1200 mp2_env%ri_grad%dot_blksize, buffer_compens_1d, mp2_env%local_gemm_ctx, &
1201 eigenval(homo + my_a_start:homo + my_a_end), eigenval(my_i_start:my_i_end), &
1202 eigenval(my_i_start:my_i_end), omega, weight)
1203
1204 DO proc_shift = 1, grid(2) - 1
1205 CALL timeset(routinen//"_comm_i", handle2)
1206 IF (number_of_parallel_channels > 1) THEN
1207 CALL mp_waitany(recv_requests, completed)
1208
1209 CALL get_group_dist(gd_homo, procs_recv(completed), recv_i_start, recv_i_end, recv_i_size)
1210 ELSE
1211 proc_i_send = modulo(mepos(2) + proc_shift, grid(2))
1212 proc_i_recv = modulo(mepos(2) - proc_shift, grid(2))
1213
1214 proc_send = proc_i_send*grid(1) + mepos(1)
1215 proc_recv = proc_i_recv*grid(1) + mepos(1)
1216
1217 CALL get_group_dist(gd_homo, proc_i_recv, recv_i_start, recv_i_end, recv_i_size)
1218
1219 buffer_3d(1)%array(1:my_p_size, 1:my_a_size, 1:recv_i_size) => &
1220 buffer_1d(1)%array(1:int(my_p_size, kind=int_8)*my_a_size*recv_i_size)
1221
1222 CALL para_env%sendrecv(mat_work_iap_3d, blacs2mpi(my_prow, proc_send), &
1223 buffer_3d(1)%array, blacs2mpi(my_prow, proc_recv), tag)
1224 completed = 1
1225 END IF
1226 CALL timestop(handle2)
1227
1228 CALL calc_p_rpa_i(p_ij(:, recv_i_start:recv_i_end), &
1229 mat_s_3d, buffer_3d(completed)%array, &
1230 mp2_env%ri_grad%dot_blksize, buffer_compens_1d, mp2_env%local_gemm_ctx, &
1231 eigenval(homo + my_a_start:homo + my_a_end), eigenval(my_i_start:my_i_end), &
1232 eigenval(recv_i_start:recv_i_end), omega, weight)
1233
1234 IF (number_of_parallel_channels > 1 .AND. number_of_parallel_channels + proc_shift < grid(2)) THEN
1235 proc_i_recv = modulo(mepos(2) - proc_shift - number_of_parallel_channels, grid(2))
1236 proc_recv = proc_i_recv*grid(1) + mepos(1)
1237
1238 CALL get_group_dist(gd_homo, proc_i_recv, recv_i_start, recv_a_end, recv_i_size)
1239
1240 buffer_3d(completed)%array(1:my_p_size, 1:my_a_size, 1:recv_i_size) => &
1241 buffer_1d(completed)%array(1:int(my_p_size, kind=int_8)*my_a_size*recv_i_size)
1242
1243 CALL para_env%irecv(buffer_3d(completed)%array, blacs2mpi(my_prow, proc_recv), &
1244 recv_requests(completed), tag)
1245
1246 procs_recv(completed) = proc_i_recv
1247 END IF
1248 END DO
1249
1250 IF (number_of_parallel_channels > 1 .AND. grid(2) > 1) THEN
1251 CALL mp_waitall(send_requests)
1252 END IF
1253
1254 IF (number_of_parallel_channels > 1) THEN
1255 DEALLOCATE (procs_recv)
1256 DEALLOCATE (recv_requests)
1257 DEALLOCATE (send_requests)
1258 END IF
1259
1260 IF (mp2_env%ri_grad%dot_blksize >= blksize_threshold) THEN
1261 ! release memory allocated by local_gemm when run on GPU. local_gemm_ctx is null on cpu only runs
1262 CALL mp2_env%local_gemm_ctx%destroy()
1263 DEALLOCATE (buffer_compens_1d)
1264 END IF
1265
1266 DO proc_shift = 1, number_of_parallel_channels
1267 NULLIFY (buffer_3d(proc_shift)%array)
1268 DEALLOCATE (buffer_1d(proc_shift)%array)
1269 END DO
1270 DEALLOCATE (buffer_3d, buffer_1d)
1271
1272 CALL timestop(handle)
1273
1274 END SUBROUTINE
1275
1276! **************************************************************************************************
1277!> \brief ...
1278!> \param P_ab ...
1279!> \param mat_S ...
1280!> \param mat_work ...
1281!> \param dot_blksize ...
1282!> \param buffer_1D ...
1283!> \param local_gemm_ctx ...
1284!> \param my_eval_virt ...
1285!> \param my_eval_occ ...
1286!> \param recv_eval_virt ...
1287!> \param omega ...
1288!> \param weight ...
1289! **************************************************************************************************
1290 SUBROUTINE calc_p_rpa_a(P_ab, mat_S, mat_work, dot_blksize, buffer_1D, local_gemm_ctx, &
1291 my_eval_virt, my_eval_occ, recv_eval_virt, omega, weight)
1292 REAL(kind=dp), DIMENSION(:, :), INTENT(INOUT) :: p_ab
1293 REAL(kind=dp), DIMENSION(:, :, :), INTENT(IN) :: mat_s, mat_work
1294 INTEGER, INTENT(IN) :: dot_blksize
1295 REAL(kind=dp), ALLOCATABLE, DIMENSION(:), &
1296 INTENT(INOUT), TARGET :: buffer_1d
1297 TYPE(local_gemm_ctxt_type), INTENT(INOUT) :: local_gemm_ctx
1298 REAL(kind=dp), DIMENSION(:), INTENT(IN) :: my_eval_virt, my_eval_occ, recv_eval_virt
1299 REAL(kind=dp), INTENT(IN) :: omega, weight
1300
1301 CHARACTER(LEN=*), PARAMETER :: routinen = 'calc_P_rpa_a'
1302
1303 INTEGER :: handle, my_a, my_a_size, my_i, &
1304 my_i_size, my_p_size, p_end, p_start, &
1305 recv_a_size, stripesize
1306 REAL(kind=dp), DIMENSION(:, :), POINTER :: buffer_compens, buffer_unscaled
1307
1308 CALL timeset(routinen, handle)
1309
1310 my_i_size = SIZE(mat_s, 3)
1311 recv_a_size = SIZE(mat_work, 2)
1312 my_a_size = SIZE(mat_s, 2)
1313 my_p_size = SIZE(mat_s, 1)
1314
1315 IF (dot_blksize >= blksize_threshold) THEN
1316 buffer_compens(1:my_a_size, 1:recv_a_size) => buffer_1d(1:my_a_size*recv_a_size)
1317 buffer_compens = 0.0_dp
1318 buffer_unscaled(1:my_a_size, 1:recv_a_size) => buffer_1d(my_a_size*recv_a_size + 1:2*my_a_size*recv_a_size)
1319
1320 ! This loop imitates the actual tensor contraction
1321 DO my_i = 1, my_i_size
1322 DO p_start = 1, my_p_size, dot_blksize
1323 stripesize = min(dot_blksize, my_p_size - p_start + 1)
1324 p_end = p_start + stripesize - 1
1325
1326 CALL local_gemm_ctx%gemm("T", "N", my_a_size, recv_a_size, stripesize, &
1327 -weight, mat_s(p_start:p_end, :, my_i), stripesize, &
1328 mat_work(p_start:p_end, :, my_i), stripesize, &
1329 0.0_dp, buffer_unscaled, my_a_size)
1330
1331 CALL scale_buffer_and_add_compens_virt(buffer_unscaled, buffer_compens, omega, &
1332 my_eval_virt, recv_eval_virt, my_eval_occ(my_i))
1333
1334 CALL kahan_step(buffer_compens, p_ab)
1335 END DO
1336 END DO
1337 ELSE
1338 block
1339 INTEGER :: recv_a
1340 REAL(kind=dp) :: tmp, e_i, e_a, e_b, omega2, my_compens, my_p, s
1341 omega2 = -omega**2
1342!$OMP PARALLEL DO COLLAPSE(2) DEFAULT(NONE)&
1343!$OMP SHARED(my_a_size,recv_a_size,my_i_size,mat_S,my_eval_virt,recv_eval_virt,my_eval_occ,omega2,&
1344!$OMP P_ab,weight,mat_work)&
1345!$OMP PRIVATE(tmp,my_a,recv_a,my_i,e_a,e_b,e_i,my_compens,my_p,s)
1346 DO my_a = 1, my_a_size
1347 DO recv_a = 1, recv_a_size
1348 e_a = my_eval_virt(my_a)
1349 e_b = recv_eval_virt(recv_a)
1350 my_p = p_ab(my_a, recv_a)
1351 my_compens = 0.0_dp
1352 DO my_i = 1, my_i_size
1353 e_i = -my_eval_occ(my_i)
1354 tmp = -weight*accurate_dot_product(mat_s(:, my_a, my_i), mat_work(:, recv_a, my_i)) &
1355 *(1.0_dp + omega2/((e_a + e_i)*(e_b + e_i))) - my_compens
1356 s = my_p + tmp
1357 my_compens = (s - my_p) - tmp
1358 my_p = s
1359 END DO
1360 p_ab(my_a, recv_a) = my_p
1361 END DO
1362 END DO
1363 END block
1364 END IF
1365
1366 CALL timestop(handle)
1367
1368 END SUBROUTINE calc_p_rpa_a
1369
1370! **************************************************************************************************
1371!> \brief ...
1372!> \param P_ij ...
1373!> \param mat_S ...
1374!> \param mat_work ...
1375!> \param dot_blksize ...
1376!> \param buffer_1D ...
1377!> \param local_gemm_ctx ...
1378!> \param my_eval_virt ...
1379!> \param my_eval_occ ...
1380!> \param recv_eval_occ ...
1381!> \param omega ...
1382!> \param weight ...
1383! **************************************************************************************************
1384 SUBROUTINE calc_p_rpa_i(P_ij, mat_S, mat_work, dot_blksize, buffer_1D, local_gemm_ctx, &
1385 my_eval_virt, my_eval_occ, recv_eval_occ, omega, weight)
1386 REAL(kind=dp), DIMENSION(:, :), INTENT(INOUT) :: p_ij
1387 REAL(kind=dp), DIMENSION(:, :, :), INTENT(INOUT) :: mat_s, mat_work
1388 INTEGER, INTENT(IN) :: dot_blksize
1389 REAL(kind=dp), ALLOCATABLE, DIMENSION(:), &
1390 INTENT(INOUT), TARGET :: buffer_1d
1391 TYPE(local_gemm_ctxt_type), INTENT(INOUT) :: local_gemm_ctx
1392 REAL(kind=dp), DIMENSION(:), INTENT(IN) :: my_eval_virt, my_eval_occ, recv_eval_occ
1393 REAL(kind=dp), INTENT(IN) :: omega, weight
1394
1395 CHARACTER(LEN=*), PARAMETER :: routinen = 'calc_P_rpa_i'
1396
1397 INTEGER :: handle, my_a, my_a_size, my_i, &
1398 my_i_size, my_p_size, p_end, p_start, &
1399 recv_i_size, stripesize
1400 REAL(kind=dp), DIMENSION(:, :), POINTER :: buffer_compens, buffer_unscaled
1401
1402 CALL timeset(routinen, handle)
1403
1404 my_i_size = SIZE(mat_s, 3)
1405 recv_i_size = SIZE(mat_work, 3)
1406 my_a_size = SIZE(mat_s, 2)
1407 my_p_size = SIZE(mat_s, 1)
1408
1409 IF (dot_blksize >= blksize_threshold) THEN
1410 buffer_compens(1:my_i_size, 1:recv_i_size) => buffer_1d(1:my_i_size*recv_i_size)
1411 buffer_compens = 0.0_dp
1412 buffer_unscaled(1:my_i_size, 1:recv_i_size) => buffer_1d(my_i_size*recv_i_size + 1:2*my_i_size*recv_i_size)
1413
1414 ! This loop imitates the actual tensor contraction
1415 DO my_a = 1, my_a_size
1416 DO p_start = 1, my_p_size, dot_blksize
1417 stripesize = min(dot_blksize, my_p_size - p_start + 1)
1418 p_end = p_start + stripesize - 1
1419
1420 CALL local_gemm_ctx%gemm("T", "N", my_i_size, recv_i_size, stripesize, &
1421 weight, mat_s(p_start:p_end, my_a, :), stripesize, &
1422 mat_work(p_start:p_end, my_a, :), stripesize, &
1423 0.0_dp, buffer_unscaled, my_i_size)
1424
1425 CALL scale_buffer_and_add_compens_occ(buffer_unscaled, buffer_compens, omega, &
1426 my_eval_occ, recv_eval_occ, my_eval_virt(my_a))
1427
1428 CALL kahan_step(buffer_compens, p_ij)
1429 END DO
1430 END DO
1431 ELSE
1432 block
1433 REAL(kind=dp) :: tmp, e_i, e_a, e_j, omega2, my_compens, my_p, s
1434 INTEGER :: recv_i
1435 omega2 = -omega**2
1436!$OMP PARALLEL DO COLLAPSE(2) DEFAULT(NONE)&
1437!$OMP SHARED(my_a_size,recv_i_size,my_i_size,mat_S,my_eval_occ,my_eval_virt,omega2,&
1438!$OMP recv_eval_occ,P_ij,weight,mat_work)&
1439!$OMP PRIVATE(tmp,my_a,recv_i,my_i,e_i,e_j,e_a,my_compens,my_p,s)
1440 DO my_i = 1, my_i_size
1441 DO recv_i = 1, recv_i_size
1442 e_i = my_eval_occ(my_i)
1443 e_j = recv_eval_occ(recv_i)
1444 my_p = p_ij(my_i, recv_i)
1445 my_compens = 0.0_dp
1446 DO my_a = 1, my_a_size
1447 e_a = my_eval_virt(my_a)
1448 tmp = weight*accurate_dot_product(mat_s(:, my_a, my_i), mat_work(:, my_a, recv_i)) &
1449 *(1.0_dp + omega2/((e_a - e_i)*(e_a - e_j))) - my_compens
1450 s = my_p + tmp
1451 my_compens = (s - my_p) - tmp
1452 my_p = s
1453 END DO
1454 p_ij(my_i, recv_i) = my_p
1455 END DO
1456 END DO
1457 END block
1458 END IF
1459
1460 CALL timestop(handle)
1461
1462 END SUBROUTINE calc_p_rpa_i
1463
1464! **************************************************************************************************
1465!> \brief ...
1466!> \param compens ...
1467!> \param P ...
1468! **************************************************************************************************
1469 SUBROUTINE kahan_step(compens, P)
1470 REAL(kind=dp), DIMENSION(:, :), INTENT(INOUT) :: compens, p
1471
1472 CHARACTER(LEN=*), PARAMETER :: routinen = 'kahan_step'
1473
1474 INTEGER :: handle, i, j
1475 REAL(kind=dp) :: my_compens, my_p, s
1476
1477 CALL timeset(routinen, handle)
1478
1479!$OMP PARALLEL DO DEFAULT(NONE) SHARED(P,compens) PRIVATE(i,my_p,my_compens,s, j) COLLAPSE(2)
1480 DO j = 1, SIZE(compens, 2)
1481 DO i = 1, SIZE(compens, 1)
1482 my_p = p(i, j)
1483 my_compens = compens(i, j)
1484 s = my_p + my_compens
1485 compens(i, j) = (s - my_p) - my_compens
1486 p(i, j) = s
1487 END DO
1488 END DO
1489!$OMP END PARALLEL DO
1490
1491 CALL timestop(handle)
1492
1493 END SUBROUTINE
1494
1495! **************************************************************************************************
1496!> \brief ...
1497!> \param buffer_unscaled ...
1498!> \param buffer_compens ...
1499!> \param omega ...
1500!> \param my_eval_virt ...
1501!> \param recv_eval_virt ...
1502!> \param my_eval_occ ...
1503! **************************************************************************************************
1504 SUBROUTINE scale_buffer_and_add_compens_virt(buffer_unscaled, buffer_compens, omega, &
1505 my_eval_virt, recv_eval_virt, my_eval_occ)
1506 REAL(kind=dp), DIMENSION(:, :), INTENT(IN) :: buffer_unscaled
1507 REAL(kind=dp), DIMENSION(:, :), INTENT(INOUT) :: buffer_compens
1508 REAL(kind=dp), INTENT(IN) :: omega
1509 REAL(kind=dp), DIMENSION(:), INTENT(IN) :: my_eval_virt, recv_eval_virt
1510 REAL(kind=dp), INTENT(IN) :: my_eval_occ
1511
1512 CHARACTER(LEN=*), PARAMETER :: routinen = 'scale_buffer_and_add_compens_virt'
1513
1514 INTEGER :: handle, my_a, my_b
1515
1516 CALL timeset(routinen, handle)
1517
1518!$OMP PARALLEL DO DEFAULT(NONE) SHARED(buffer_unscaled,buffer_compens,omega,&
1519!$OMP my_eval_virt,recv_eval_virt,my_eval_occ) PRIVATE(my_a,my_b)
1520 DO my_b = 1, SIZE(buffer_compens, 2)
1521 DO my_a = 1, SIZE(buffer_compens, 1)
1522 buffer_compens(my_a, my_b) = buffer_unscaled(my_a, my_b) &
1523 *(1.0_dp - omega**2/((my_eval_virt(my_a) - my_eval_occ)*(recv_eval_virt(my_b) - my_eval_occ))) &
1524 - buffer_compens(my_a, my_b)
1525 END DO
1526 END DO
1527!$OMP END PARALLEL DO
1528
1529 CALL timestop(handle)
1530
1531 END SUBROUTINE scale_buffer_and_add_compens_virt
1532
1533! **************************************************************************************************
1534!> \brief ...
1535!> \param buffer_unscaled ...
1536!> \param buffer_compens ...
1537!> \param omega ...
1538!> \param my_eval_occ ...
1539!> \param recv_eval_occ ...
1540!> \param my_eval_virt ...
1541! **************************************************************************************************
1542 SUBROUTINE scale_buffer_and_add_compens_occ(buffer_unscaled, buffer_compens, omega, &
1543 my_eval_occ, recv_eval_occ, my_eval_virt)
1544 REAL(kind=dp), DIMENSION(:, :), INTENT(IN) :: buffer_unscaled
1545 REAL(kind=dp), DIMENSION(:, :), INTENT(INOUT) :: buffer_compens
1546 REAL(kind=dp), INTENT(IN) :: omega
1547 REAL(kind=dp), DIMENSION(:), INTENT(IN) :: my_eval_occ, recv_eval_occ
1548 REAL(kind=dp), INTENT(IN) :: my_eval_virt
1549
1550 CHARACTER(LEN=*), PARAMETER :: routinen = 'scale_buffer_and_add_compens_occ'
1551
1552 INTEGER :: handle, my_i, my_j
1553
1554 CALL timeset(routinen, handle)
1555
1556!$OMP PARALLEL DO DEFAULT(NONE) SHARED(buffer_compens,buffer_unscaled,omega,&
1557!$OMP my_eval_virt,my_eval_occ,recv_eval_occ) PRIVATE(my_i,my_j)
1558 DO my_j = 1, SIZE(buffer_compens, 2)
1559 DO my_i = 1, SIZE(buffer_compens, 1)
1560 buffer_compens(my_i, my_j) = buffer_unscaled(my_i, my_j) &
1561 *(1.0_dp - omega**2/((my_eval_virt - my_eval_occ(my_i))*(my_eval_virt - recv_eval_occ(my_j)))) &
1562 - buffer_compens(my_i, my_j)
1563 END DO
1564 END DO
1565!$OMP END PARALLEL DO
1566
1567 CALL timestop(handle)
1568
1569 END SUBROUTINE scale_buffer_and_add_compens_occ
1570
1571! **************************************************************************************************
1572!> \brief ...
1573!> \param x ...
1574!> \return ...
1575! **************************************************************************************************
1576 ELEMENTAL FUNCTION sinh_over_x(x) RESULT(res)
1577 REAL(kind=dp), INTENT(IN) :: x
1578 REAL(kind=dp) :: res
1579
1580 ! Calculate sinh(x)/x
1581 ! Split the intervall to prevent numerical instabilities
1582 IF (abs(x) > 3.0e-4_dp) THEN
1583 res = sinh(x)/x
1584 ELSE
1585 res = 1.0_dp + x**2/6.0_dp
1586 END IF
1587
1588 END FUNCTION sinh_over_x
1589
1590! **************************************************************************************************
1591!> \brief ...
1592!> \param fm_work_iaP ...
1593!> \param fm_mat_S ...
1594!> \param pair_list ...
1595!> \param virtual ...
1596!> \param P_ij ...
1597!> \param Eigenval ...
1598!> \param omega ...
1599!> \param weight ...
1600!> \param index2send ...
1601!> \param index2recv ...
1602!> \param dot_blksize ...
1603! **************************************************************************************************
1604 SUBROUTINE calc_pij_degen(fm_work_iaP, fm_mat_S, pair_list, virtual, P_ij, Eigenval, &
1605 omega, weight, index2send, index2recv, dot_blksize)
1606 TYPE(cp_fm_type), INTENT(IN) :: fm_work_iap, fm_mat_s
1607 INTEGER, DIMENSION(:, :), INTENT(IN) :: pair_list
1608 INTEGER, INTENT(IN) :: virtual
1609 REAL(kind=dp), DIMENSION(:), INTENT(INOUT) :: p_ij
1610 REAL(kind=dp), DIMENSION(:), INTENT(IN) :: eigenval
1611 REAL(kind=dp), INTENT(IN) :: omega, weight
1612 TYPE(one_dim_int_array), DIMENSION(0:), INTENT(IN) :: index2send, index2recv
1613 INTEGER, INTENT(IN) :: dot_blksize
1614
1615 CHARACTER(LEN=*), PARAMETER :: routinen = 'calc_Pij_degen'
1616
1617 INTEGER :: avirt, col_global, col_local, counter, handle, handle2, ij_counter, iocc, &
1618 my_col_local, my_i, my_j, my_pcol, my_prow, ncol_local, nrow_local, num_ij_pairs, &
1619 num_pe_col, pcol, pcol_recv, pcol_send, proc_shift, recv_size, send_size, &
1620 size_recv_buffer, size_send_buffer, tag
1621 INTEGER, DIMENSION(:), POINTER :: col_indices, ncol_locals
1622 INTEGER, DIMENSION(:, :), POINTER :: blacs2mpi
1623 REAL(kind=dp) :: trace
1624 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :) :: buffer_recv, buffer_send
1625 TYPE(cp_blacs_env_type), POINTER :: context
1626 TYPE(mp_para_env_type), POINTER :: para_env
1627
1628 CALL timeset(routinen, handle)
1629
1630 CALL cp_fm_struct_get(fm_work_iap%matrix_struct, para_env=para_env, ncol_locals=ncol_locals, &
1631 ncol_local=ncol_local, col_indices=col_indices, &
1632 context=context, nrow_local=nrow_local)
1633 CALL context%get(my_process_row=my_prow, my_process_column=my_pcol, &
1634 number_of_process_columns=num_pe_col, blacs2mpi=blacs2mpi)
1635
1636 num_ij_pairs = SIZE(pair_list, 2)
1637
1638 tag = 42
1639
1640 DO ij_counter = 1, num_ij_pairs
1641
1642 my_i = pair_list(1, ij_counter)
1643 my_j = pair_list(2, ij_counter)
1644
1645 trace = 0.0_dp
1646
1647 DO col_local = 1, ncol_local
1648 col_global = col_indices(col_local)
1649
1650 iocc = max(1, col_global - 1)/virtual + 1
1651 avirt = col_global - (iocc - 1)*virtual
1652
1653 IF (iocc /= my_j) cycle
1654 pcol = fm_work_iap%matrix_struct%g2p_col((my_i - 1)*virtual + avirt)
1655 IF (pcol /= my_pcol) cycle
1656
1657 my_col_local = fm_work_iap%matrix_struct%g2l_col((my_i - 1)*virtual + avirt)
1658
1659 trace = trace + accurate_dot_product_2(fm_mat_s%local_data(:, my_col_local), fm_work_iap%local_data(:, col_local), &
1660 dot_blksize)
1661 END DO
1662
1663 p_ij(ij_counter) = p_ij(ij_counter) - trace*sinh_over_x(0.5_dp*(eigenval(my_i) - eigenval(my_j))*omega)*omega*weight
1664
1665 END DO
1666
1667 IF (num_pe_col > 1) THEN
1668 size_send_buffer = 0
1669 size_recv_buffer = 0
1670 DO proc_shift = 1, num_pe_col - 1
1671 pcol_send = modulo(my_pcol + proc_shift, num_pe_col)
1672 pcol_recv = modulo(my_pcol - proc_shift, num_pe_col)
1673
1674 IF (ALLOCATED(index2send(pcol_send)%array)) &
1675 size_send_buffer = max(size_send_buffer, SIZE(index2send(pcol_send)%array))
1676
1677 IF (ALLOCATED(index2recv(pcol_recv)%array)) &
1678 size_recv_buffer = max(size_recv_buffer, SIZE(index2recv(pcol_recv)%array))
1679 END DO
1680
1681 ALLOCATE (buffer_send(nrow_local, size_send_buffer), buffer_recv(nrow_local, size_recv_buffer))
1682
1683 DO proc_shift = 1, num_pe_col - 1
1684 pcol_send = modulo(my_pcol + proc_shift, num_pe_col)
1685 pcol_recv = modulo(my_pcol - proc_shift, num_pe_col)
1686
1687 ! Collect data and exchange
1688 send_size = 0
1689 IF (ALLOCATED(index2send(pcol_send)%array)) send_size = SIZE(index2send(pcol_send)%array)
1690
1691 DO counter = 1, send_size
1692 buffer_send(:, counter) = fm_work_iap%local_data(:, index2send(pcol_send)%array(counter))
1693 END DO
1694
1695 recv_size = 0
1696 IF (ALLOCATED(index2recv(pcol_recv)%array)) recv_size = SIZE(index2recv(pcol_recv)%array)
1697 IF (recv_size > 0) THEN
1698 CALL timeset(routinen//"_send", handle2)
1699 IF (send_size > 0) THEN
1700 CALL para_env%sendrecv(buffer_send(:, :send_size), blacs2mpi(my_prow, pcol_send), &
1701 buffer_recv(:, :recv_size), blacs2mpi(my_prow, pcol_recv), tag)
1702 ELSE
1703 CALL para_env%recv(buffer_recv(:, :recv_size), blacs2mpi(my_prow, pcol_recv), tag)
1704 END IF
1705 CALL timestop(handle2)
1706
1707 DO ij_counter = 1, num_ij_pairs
1708 ! Collect the contributions of the matrix elements
1709
1710 my_i = pair_list(1, ij_counter)
1711 my_j = pair_list(2, ij_counter)
1712
1713 trace = 0.0_dp
1714
1715 DO col_local = 1, recv_size
1716 col_global = index2recv(pcol_recv)%array(col_local)
1717
1718 iocc = max(1, col_global - 1)/virtual + 1
1719 IF (iocc /= my_j) cycle
1720 avirt = col_global - (iocc - 1)*virtual
1721 pcol = fm_work_iap%matrix_struct%g2p_col((my_i - 1)*virtual + avirt)
1722 IF (pcol /= my_pcol) cycle
1723
1724 my_col_local = fm_work_iap%matrix_struct%g2l_col((my_i - 1)*virtual + avirt)
1725
1726 trace = trace + accurate_dot_product_2(fm_mat_s%local_data(:, my_col_local), buffer_recv(:, col_local), &
1727 dot_blksize)
1728 END DO
1729
1730 p_ij(ij_counter) = p_ij(ij_counter) &
1731 - trace*sinh_over_x(0.5_dp*(eigenval(my_i) - eigenval(my_j))*omega)*omega*weight
1732 END DO
1733 ELSE IF (send_size > 0) THEN
1734 CALL timeset(routinen//"_send", handle2)
1735 CALL para_env%send(buffer_send(:, :send_size), blacs2mpi(my_prow, pcol_send), tag)
1736 CALL timestop(handle2)
1737 END IF
1738 END DO
1739 IF (ALLOCATED(buffer_send)) DEALLOCATE (buffer_send)
1740 IF (ALLOCATED(buffer_recv)) DEALLOCATE (buffer_recv)
1741 END IF
1742
1743 CALL timestop(handle)
1744
1745 END SUBROUTINE
1746
1747! **************************************************************************************************
1748!> \brief ...
1749!> \param fm_work_iaP ...
1750!> \param fm_mat_S ...
1751!> \param pair_list ...
1752!> \param virtual ...
1753!> \param P_ab ...
1754!> \param Eigenval ...
1755!> \param omega ...
1756!> \param weight ...
1757!> \param index2send ...
1758!> \param index2recv ...
1759!> \param dot_blksize ...
1760! **************************************************************************************************
1761 SUBROUTINE calc_pab_degen(fm_work_iaP, fm_mat_S, pair_list, virtual, P_ab, Eigenval, &
1762 omega, weight, index2send, index2recv, dot_blksize)
1763 TYPE(cp_fm_type), INTENT(IN) :: fm_work_iap, fm_mat_s
1764 INTEGER, DIMENSION(:, :), INTENT(IN) :: pair_list
1765 INTEGER, INTENT(IN) :: virtual
1766 REAL(kind=dp), DIMENSION(:), INTENT(INOUT) :: p_ab
1767 REAL(kind=dp), DIMENSION(:), INTENT(IN) :: eigenval
1768 REAL(kind=dp), INTENT(IN) :: omega, weight
1769 TYPE(one_dim_int_array), DIMENSION(0:), INTENT(IN) :: index2send, index2recv
1770 INTEGER, INTENT(IN) :: dot_blksize
1771
1772 CHARACTER(LEN=*), PARAMETER :: routinen = 'calc_Pab_degen'
1773
1774 INTEGER :: ab_counter, avirt, col_global, col_local, counter, handle, handle2, iocc, my_a, &
1775 my_b, my_col_local, my_pcol, my_prow, ncol_local, nrow_local, num_ab_pairs, num_pe_col, &
1776 pcol, pcol_recv, pcol_send, proc_shift, recv_size, send_size, size_recv_buffer, &
1777 size_send_buffer, tag
1778 INTEGER, DIMENSION(:), POINTER :: col_indices, ncol_locals
1779 INTEGER, DIMENSION(:, :), POINTER :: blacs2mpi
1780 REAL(kind=dp) :: trace
1781 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :) :: buffer_recv, buffer_send
1782 TYPE(cp_blacs_env_type), POINTER :: context
1783 TYPE(mp_para_env_type), POINTER :: para_env
1784
1785 CALL timeset(routinen, handle)
1786
1787 CALL cp_fm_struct_get(fm_work_iap%matrix_struct, para_env=para_env, ncol_locals=ncol_locals, &
1788 ncol_local=ncol_local, col_indices=col_indices, &
1789 context=context, nrow_local=nrow_local)
1790 CALL context%get(my_process_row=my_prow, my_process_column=my_pcol, &
1791 number_of_process_columns=num_pe_col, blacs2mpi=blacs2mpi)
1792
1793 num_ab_pairs = SIZE(pair_list, 2)
1794
1795 tag = 43
1796
1797 DO ab_counter = 1, num_ab_pairs
1798
1799 my_a = pair_list(1, ab_counter)
1800 my_b = pair_list(2, ab_counter)
1801
1802 trace = 0.0_dp
1803
1804 DO col_local = 1, ncol_local
1805 col_global = col_indices(col_local)
1806
1807 iocc = max(1, col_global - 1)/virtual + 1
1808 avirt = col_global - (iocc - 1)*virtual
1809
1810 IF (avirt /= my_b) cycle
1811 pcol = fm_work_iap%matrix_struct%g2p_col((iocc - 1)*virtual + my_a)
1812 IF (pcol /= my_pcol) cycle
1813 my_col_local = fm_work_iap%matrix_struct%g2l_col((iocc - 1)*virtual + my_a)
1814
1815 trace = trace + accurate_dot_product_2(fm_mat_s%local_data(:, my_col_local), fm_work_iap%local_data(:, col_local), &
1816 dot_blksize)
1817
1818 END DO
1819
1820 p_ab(ab_counter) = p_ab(ab_counter) &
1821 + trace*sinh_over_x(0.5_dp*(eigenval(my_a) - eigenval(my_b))*omega)*omega*weight
1822
1823 END DO
1824
1825 IF (num_pe_col > 1) THEN
1826 size_send_buffer = 0
1827 size_recv_buffer = 0
1828 DO proc_shift = 1, num_pe_col - 1
1829 pcol_send = modulo(my_pcol + proc_shift, num_pe_col)
1830 pcol_recv = modulo(my_pcol - proc_shift, num_pe_col)
1831
1832 IF (ALLOCATED(index2send(pcol_send)%array)) &
1833 size_send_buffer = max(size_send_buffer, SIZE(index2send(pcol_send)%array))
1834
1835 IF (ALLOCATED(index2recv(pcol_recv)%array)) &
1836 size_recv_buffer = max(size_recv_buffer, SIZE(index2recv(pcol_recv)%array))
1837 END DO
1838
1839 ALLOCATE (buffer_send(nrow_local, size_send_buffer), buffer_recv(nrow_local, size_recv_buffer))
1840
1841 DO proc_shift = 1, num_pe_col - 1
1842 pcol_send = modulo(my_pcol + proc_shift, num_pe_col)
1843 pcol_recv = modulo(my_pcol - proc_shift, num_pe_col)
1844
1845 ! Collect data and exchange
1846 send_size = 0
1847 IF (ALLOCATED(index2send(pcol_send)%array)) send_size = SIZE(index2send(pcol_send)%array)
1848
1849 DO counter = 1, send_size
1850 buffer_send(:, counter) = fm_work_iap%local_data(:, index2send(pcol_send)%array(counter))
1851 END DO
1852
1853 recv_size = 0
1854 IF (ALLOCATED(index2recv(pcol_recv)%array)) recv_size = SIZE(index2recv(pcol_recv)%array)
1855 IF (recv_size > 0) THEN
1856 CALL timeset(routinen//"_send", handle2)
1857 IF (send_size > 0) THEN
1858 CALL para_env%sendrecv(buffer_send(:, :send_size), blacs2mpi(my_prow, pcol_send), &
1859 buffer_recv(:, :recv_size), blacs2mpi(my_prow, pcol_recv), tag)
1860 ELSE
1861 CALL para_env%recv(buffer_recv(:, :recv_size), blacs2mpi(my_prow, pcol_recv), tag)
1862 END IF
1863 CALL timestop(handle2)
1864
1865 DO ab_counter = 1, num_ab_pairs
1866 ! Collect the contributions of the matrix elements
1867
1868 my_a = pair_list(1, ab_counter)
1869 my_b = pair_list(2, ab_counter)
1870
1871 trace = 0.0_dp
1872
1873 DO col_local = 1, SIZE(index2recv(pcol_recv)%array)
1874 col_global = index2recv(pcol_recv)%array(col_local)
1875
1876 iocc = max(1, col_global - 1)/virtual + 1
1877 avirt = col_global - (iocc - 1)*virtual
1878 IF (avirt /= my_b) cycle
1879 pcol = fm_work_iap%matrix_struct%g2p_col((iocc - 1)*virtual + my_a)
1880 IF (pcol /= my_pcol) cycle
1881
1882 my_col_local = fm_work_iap%matrix_struct%g2l_col((iocc - 1)*virtual + my_a)
1883
1884 trace = trace + accurate_dot_product_2(fm_mat_s%local_data(:, my_col_local), buffer_recv(:, col_local), &
1885 dot_blksize)
1886 END DO
1887
1888 p_ab(ab_counter) = p_ab(ab_counter) &
1889 + trace*sinh_over_x(0.5_dp*(eigenval(my_a) - eigenval(my_b))*omega)*omega*weight
1890
1891 END DO
1892 ELSE IF (send_size > 0) THEN
1893 CALL timeset(routinen//"_send", handle2)
1894 CALL para_env%send(buffer_send(:, :send_size), blacs2mpi(my_prow, pcol_send), tag)
1895 CALL timestop(handle2)
1896 END IF
1897 END DO
1898 IF (ALLOCATED(buffer_send)) DEALLOCATE (buffer_send)
1899 IF (ALLOCATED(buffer_recv)) DEALLOCATE (buffer_recv)
1900 END IF
1901
1902 CALL timestop(handle)
1903
1904 END SUBROUTINE
1905
1906! **************************************************************************************************
1907!> \brief ...
1908!> \param index2send ...
1909!> \param index2recv ...
1910!> \param fm_mat_S ...
1911!> \param mat_S_3D ...
1912!> \param gd_homo ...
1913!> \param gd_virtual ...
1914!> \param mepos ...
1915! **************************************************************************************************
1916 SUBROUTINE redistribute_fm_mat_s(index2send, index2recv, fm_mat_S, mat_S_3D, gd_homo, gd_virtual, mepos)
1917 TYPE(one_dim_int_array), DIMENSION(0:), INTENT(IN) :: index2send
1918 TYPE(two_dim_int_array), DIMENSION(0:), INTENT(IN) :: index2recv
1919 TYPE(cp_fm_type), INTENT(IN) :: fm_mat_s
1920 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :), &
1921 INTENT(OUT) :: mat_s_3d
1922 TYPE(group_dist_d1_type), INTENT(IN) :: gd_homo, gd_virtual
1923 INTEGER, DIMENSION(2), INTENT(IN) :: mepos
1924
1925 CHARACTER(LEN=*), PARAMETER :: routinen = 'redistribute_fm_mat_S'
1926
1927 INTEGER :: col_local, handle, my_a, my_homo, my_i, my_pcol, my_prow, my_virtual, nrow_local, &
1928 num_pe_col, proc_recv, proc_send, proc_shift, recv_size, send_size, size_recv_buffer, &
1929 size_send_buffer, tag
1930 INTEGER, DIMENSION(:, :), POINTER :: blacs2mpi
1931 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :) :: buffer_recv, buffer_send
1932 TYPE(mp_para_env_type), POINTER :: para_env
1933
1934 CALL timeset(routinen, handle)
1935
1936 tag = 46
1937
1938 CALL fm_mat_s%matrix_struct%context%get(my_process_row=my_prow, my_process_column=my_pcol, &
1939 number_of_process_columns=num_pe_col, blacs2mpi=blacs2mpi)
1940
1941 CALL cp_fm_struct_get(fm_mat_s%matrix_struct, nrow_local=nrow_local, para_env=para_env)
1942
1943 CALL get_group_dist(gd_homo, mepos(2), sizes=my_homo)
1944 CALL get_group_dist(gd_virtual, mepos(1), sizes=my_virtual)
1945
1946 ALLOCATE (mat_s_3d(nrow_local, my_virtual, my_homo))
1947
1948 IF (ALLOCATED(index2send(my_pcol)%array)) THEN
1949 DO col_local = 1, SIZE(index2send(my_pcol)%array)
1950 my_a = index2recv(my_pcol)%array(1, col_local)
1951 my_i = index2recv(my_pcol)%array(2, col_local)
1952 mat_s_3d(:, my_a, my_i) = fm_mat_s%local_data(:, index2send(my_pcol)%array(col_local))
1953 END DO
1954 END IF
1955
1956 IF (num_pe_col > 1) THEN
1957 size_send_buffer = 0
1958 size_recv_buffer = 0
1959 DO proc_shift = 1, num_pe_col - 1
1960 proc_send = modulo(my_pcol + proc_shift, num_pe_col)
1961 proc_recv = modulo(my_pcol - proc_shift, num_pe_col)
1962
1963 send_size = 0
1964 IF (ALLOCATED(index2send(proc_send)%array)) send_size = SIZE(index2send(proc_send)%array)
1965 size_send_buffer = max(size_send_buffer, send_size)
1966
1967 recv_size = 0
1968 IF (ALLOCATED(index2recv(proc_recv)%array)) recv_size = SIZE(index2recv(proc_recv)%array)
1969 size_recv_buffer = max(size_recv_buffer, recv_size)
1970
1971 END DO
1972
1973 ALLOCATE (buffer_send(nrow_local, size_send_buffer), buffer_recv(nrow_local, size_recv_buffer))
1974
1975 DO proc_shift = 1, num_pe_col - 1
1976 proc_send = modulo(my_pcol + proc_shift, num_pe_col)
1977 proc_recv = modulo(my_pcol - proc_shift, num_pe_col)
1978
1979 send_size = 0
1980 IF (ALLOCATED(index2send(proc_send)%array)) send_size = SIZE(index2send(proc_send)%array)
1981 DO col_local = 1, send_size
1982 buffer_send(:, col_local) = fm_mat_s%local_data(:, index2send(proc_send)%array(col_local))
1983 END DO
1984
1985 recv_size = 0
1986 IF (ALLOCATED(index2recv(proc_recv)%array)) recv_size = SIZE(index2recv(proc_recv)%array, 2)
1987 IF (recv_size > 0) THEN
1988 IF (send_size > 0) THEN
1989 CALL para_env%sendrecv(buffer_send(:, :send_size), blacs2mpi(my_prow, proc_send), &
1990 buffer_recv(:, :recv_size), blacs2mpi(my_prow, proc_recv), tag)
1991 ELSE
1992 CALL para_env%recv(buffer_recv(:, :recv_size), blacs2mpi(my_prow, proc_recv), tag)
1993 END IF
1994
1995 DO col_local = 1, recv_size
1996 my_a = index2recv(proc_recv)%array(1, col_local)
1997 my_i = index2recv(proc_recv)%array(2, col_local)
1998 mat_s_3d(:, my_a, my_i) = buffer_recv(:, col_local)
1999 END DO
2000 ELSE IF (send_size > 0) THEN
2001 CALL para_env%send(buffer_send(:, :send_size), blacs2mpi(my_prow, proc_send), tag)
2002 END IF
2003
2004 END DO
2005
2006 IF (ALLOCATED(buffer_send)) DEALLOCATE (buffer_send)
2007 IF (ALLOCATED(buffer_recv)) DEALLOCATE (buffer_recv)
2008 END IF
2009
2010 CALL timestop(handle)
2011
2012 END SUBROUTINE
2013
2014! **************************************************************************************************
2015!> \brief ...
2016!> \param rpa_grad ...
2017!> \param mp2_env ...
2018!> \param para_env_sub ...
2019!> \param para_env ...
2020!> \param qs_env ...
2021!> \param gd_array ...
2022!> \param color_sub ...
2023!> \param do_ri_sos_laplace_mp2 ...
2024!> \param homo ...
2025!> \param virtual ...
2026! **************************************************************************************************
2027 SUBROUTINE rpa_grad_finalize(rpa_grad, mp2_env, para_env_sub, para_env, qs_env, gd_array, &
2028 color_sub, do_ri_sos_laplace_mp2, homo, virtual)
2029 TYPE(rpa_grad_type), INTENT(INOUT) :: rpa_grad
2030 TYPE(mp2_type), INTENT(INOUT) :: mp2_env
2031 TYPE(mp_para_env_type), INTENT(IN), POINTER :: para_env_sub, para_env
2032 TYPE(qs_environment_type), INTENT(IN), POINTER :: qs_env
2033 TYPE(group_dist_d1_type) :: gd_array
2034 INTEGER, INTENT(IN) :: color_sub
2035 LOGICAL, INTENT(IN) :: do_ri_sos_laplace_mp2
2036 INTEGER, DIMENSION(:), INTENT(IN) :: homo, virtual
2037
2038 CHARACTER(LEN=*), PARAMETER :: routinen = 'rpa_grad_finalize'
2039
2040 INTEGER :: dimen_ia, dimen_ri, handle, iib, ispin, my_group_l_end, my_group_l_size, &
2041 my_group_l_start, my_ia_end, my_ia_size, my_ia_start, my_p_end, my_p_size, my_p_start, &
2042 ngroup, nspins, pos_group, pos_sub, proc
2043 INTEGER, ALLOCATABLE, DIMENSION(:) :: pos_info
2044 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: group_grid_2_mepos, mepos_2_grid_group
2045 REAL(kind=dp) :: my_scale
2046 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :) :: gamma_2d
2047 TYPE(cp_blacs_env_type), POINTER :: blacs_env
2048 TYPE(cp_fm_struct_type), POINTER :: fm_struct
2049 TYPE(cp_fm_type) :: fm_g_p_ia, fm_pq, fm_pq_2, fm_pq_half, &
2050 fm_work_pq, fm_work_pq_2, fm_y, &
2051 operator_half
2052 TYPE(group_dist_d1_type) :: gd_array_new, gd_ia, gd_p, gd_p_new
2053
2054 CALL timeset(routinen, handle)
2055
2056 ! Release unnecessary matrices to save memory for next steps
2057
2058 nspins = SIZE(rpa_grad%fm_Y)
2059
2060 ! Scaling factor is required to scale the density matrices and the Gamma matrices later
2061 IF (do_ri_sos_laplace_mp2) THEN
2062 my_scale = mp2_env%scale_s
2063 ELSE
2064 my_scale = -mp2_env%ri_rpa%scale_rpa/(2.0_dp*pi)
2065 IF (mp2_env%ri_rpa%minimax_quad) my_scale = my_scale/2.0_dp
2066 END IF
2067
2068 IF (do_ri_sos_laplace_mp2) THEN
2069 CALL sos_mp2_grad_finalize(rpa_grad%sos_mp2_work_occ, rpa_grad%sos_mp2_work_virt, &
2070 para_env, para_env_sub, homo, virtual, mp2_env)
2071 ELSE
2072 CALL rpa_grad_work_finalize(rpa_grad%rpa_work, mp2_env, homo, &
2073 virtual, para_env, para_env_sub)
2074 END IF
2075
2076 CALL get_qs_env(qs_env, blacs_env=blacs_env)
2077
2078 CALL cp_fm_get_info(rpa_grad%fm_Gamma_PQ, ncol_global=dimen_ri)
2079
2080 NULLIFY (fm_struct)
2081 CALL cp_fm_struct_create(fm_struct, context=blacs_env, nrow_global=dimen_ri, &
2082 ncol_global=dimen_ri, para_env=para_env)
2083 CALL cp_fm_create(fm_pq, fm_struct)
2084 CALL cp_fm_create(fm_work_pq, fm_struct)
2085 IF (.NOT. compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter)) THEN
2086 CALL cp_fm_create(fm_pq_2, fm_struct)
2087 END IF
2088 CALL cp_fm_struct_release(fm_struct)
2089 CALL cp_fm_set_all(fm_pq, 0.0_dp)
2090
2091 ! We still have to left- and right multiply it with PQhalf
2092 CALL dereplicate_and_sum_fm(rpa_grad%fm_Gamma_PQ, fm_pq)
2093
2094 ngroup = para_env%num_pe/para_env_sub%num_pe
2095
2096 CALL prepare_redistribution(para_env, para_env_sub, ngroup, &
2097 group_grid_2_mepos, mepos_2_grid_group, pos_info=pos_info)
2098
2099 ! Create fm_PQ_half
2100 CALL create_group_dist(gd_p, para_env_sub%num_pe, dimen_ri)
2101 CALL get_group_dist(gd_p, para_env_sub%mepos, my_p_start, my_p_end, my_p_size)
2102
2103 CALL get_group_dist(gd_array, color_sub, my_group_l_start, my_group_l_end, my_group_l_size)
2104
2105 CALL create_group_dist(gd_p_new, para_env%num_pe)
2106 CALL create_group_dist(gd_array_new, para_env%num_pe)
2107
2108 DO proc = 0, para_env%num_pe - 1
2109 ! calculate position of the group
2110 pos_group = proc/para_env_sub%num_pe
2111 ! calculate position in the subgroup
2112 pos_sub = pos_info(proc)
2113 ! 1 -> rows, 2 -> cols
2114 CALL get_group_dist(gd_array, pos_group, gd_array_new, proc)
2115 CALL get_group_dist(gd_p, pos_sub, gd_p_new, proc)
2116 END DO
2117
2118 DEALLOCATE (pos_info)
2119 CALL release_group_dist(gd_p)
2120
2121 CALL array2fm(mp2_env%ri_grad%PQ_half, fm_pq%matrix_struct, &
2122 my_p_start, my_p_end, &
2123 my_group_l_start, my_group_l_end, &
2124 gd_p_new, gd_array_new, &
2125 group_grid_2_mepos, para_env_sub%num_pe, ngroup, &
2126 fm_pq_half)
2127
2128 IF (.NOT. compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter)) THEN
2129 CALL array2fm(mp2_env%ri_grad%operator_half, fm_pq%matrix_struct, my_p_start, my_p_end, &
2130 my_group_l_start, my_group_l_end, &
2131 gd_p_new, gd_array_new, &
2132 group_grid_2_mepos, para_env_sub%num_pe, ngroup, &
2133 operator_half)
2134 END IF
2135
2136 ! deallocate the info array
2137 CALL release_group_dist(gd_p_new)
2138 CALL release_group_dist(gd_array_new)
2139
2140 IF (compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter)) THEN
2141! Finish Gamma_PQ
2142 CALL parallel_gemm(transa="N", transb="T", m=dimen_ri, n=dimen_ri, k=dimen_ri, alpha=1.0_dp, &
2143 matrix_a=fm_pq, matrix_b=fm_pq_half, beta=0.0_dp, &
2144 matrix_c=fm_work_pq)
2145
2146 CALL parallel_gemm(transa="N", transb="N", m=dimen_ri, n=dimen_ri, k=dimen_ri, alpha=-my_scale, &
2147 matrix_a=fm_pq_half, matrix_b=fm_work_pq, beta=0.0_dp, &
2148 matrix_c=fm_pq)
2149
2150 CALL cp_fm_release(fm_work_pq)
2151 ELSE
2152 CALL parallel_gemm(transa="N", transb="T", m=dimen_ri, n=dimen_ri, k=dimen_ri, alpha=1.0_dp, &
2153 matrix_a=fm_pq, matrix_b=operator_half, beta=0.0_dp, &
2154 matrix_c=fm_work_pq)
2155
2156 CALL parallel_gemm(transa="N", transb="N", m=dimen_ri, n=dimen_ri, k=dimen_ri, alpha=my_scale, &
2157 matrix_a=operator_half, matrix_b=fm_work_pq, beta=0.0_dp, &
2158 matrix_c=fm_pq)
2159 CALL cp_fm_release(operator_half)
2160
2161 CALL cp_fm_create(fm_work_pq_2, fm_pq%matrix_struct, name="fm_Gamma_PQ_2")
2162 CALL parallel_gemm(transa="N", transb="N", m=dimen_ri, n=dimen_ri, k=dimen_ri, alpha=-my_scale, &
2163 matrix_a=fm_pq_half, matrix_b=fm_work_pq, beta=0.0_dp, &
2164 matrix_c=fm_work_pq_2)
2165 CALL cp_fm_to_fm(fm_work_pq_2, fm_pq_2)
2166 CALL cp_fm_geadd(1.0_dp, "T", fm_work_pq_2, 1.0_dp, fm_pq_2)
2167 CALL cp_fm_release(fm_work_pq_2)
2168 CALL cp_fm_release(fm_work_pq)
2169 END IF
2170
2171 ALLOCATE (mp2_env%ri_grad%Gamma_PQ(my_p_size, my_group_l_size))
2172 CALL fm2array(mp2_env%ri_grad%Gamma_PQ, &
2173 my_p_size, my_p_start, my_p_end, &
2174 my_group_l_size, my_group_l_start, my_group_l_end, &
2175 group_grid_2_mepos, mepos_2_grid_group, &
2176 para_env_sub%num_pe, ngroup, &
2177 fm_pq)
2178
2179 IF (.NOT. compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter)) THEN
2180 ALLOCATE (mp2_env%ri_grad%Gamma_PQ_2(my_p_size, my_group_l_size))
2181 CALL fm2array(mp2_env%ri_grad%Gamma_PQ_2, my_p_size, my_p_start, my_p_end, &
2182 my_group_l_size, my_group_l_start, my_group_l_end, &
2183 group_grid_2_mepos, mepos_2_grid_group, &
2184 para_env_sub%num_pe, ngroup, &
2185 fm_pq_2)
2186 END IF
2187
2188! Now, Gamma_Pia
2189 ALLOCATE (mp2_env%ri_grad%G_P_ia(my_group_l_size, nspins))
2190 DO ispin = 1, nspins
2191 DO iib = 1, my_group_l_size
2192 NULLIFY (mp2_env%ri_grad%G_P_ia(iib, ispin)%matrix)
2193 END DO
2194 END DO
2195
2196 ! Redistribute the Y matrix
2197 DO ispin = 1, nspins
2198 ! Collect all data of columns for the own sub group locally
2199 CALL cp_fm_get_info(rpa_grad%fm_Y(ispin), ncol_global=dimen_ia)
2200
2201 CALL get_qs_env(qs_env, blacs_env=blacs_env)
2202
2203 NULLIFY (fm_struct)
2204 CALL cp_fm_struct_create(fm_struct, template_fmstruct=fm_pq_half%matrix_struct, nrow_global=dimen_ia)
2205 CALL cp_fm_create(fm_y, fm_struct)
2206 CALL cp_fm_struct_release(fm_struct)
2207 CALL cp_fm_set_all(fm_y, 0.0_dp)
2208
2209 CALL dereplicate_and_sum_fm(rpa_grad%fm_Y(ispin), fm_y)
2210
2211 CALL cp_fm_create(fm_g_p_ia, fm_y%matrix_struct)
2212 CALL cp_fm_set_all(fm_g_p_ia, 0.0_dp)
2213
2214 CALL parallel_gemm(transa="N", transb="T", m=dimen_ia, n=dimen_ri, k=dimen_ri, alpha=my_scale, &
2215 matrix_a=fm_y, matrix_b=fm_pq_half, beta=0.0_dp, &
2216 matrix_c=fm_g_p_ia)
2217
2218 CALL cp_fm_release(fm_y)
2219
2220 CALL create_group_dist(gd_ia, para_env_sub%num_pe, dimen_ia)
2221 CALL get_group_dist(gd_ia, para_env_sub%mepos, my_ia_start, my_ia_end, my_ia_size)
2222
2223 CALL fm2array(gamma_2d, my_ia_size, my_ia_start, my_ia_end, &
2224 my_group_l_size, my_group_l_start, my_group_l_end, &
2225 group_grid_2_mepos, mepos_2_grid_group, &
2226 para_env_sub%num_pe, ngroup, &
2227 fm_g_p_ia)
2228
2229 ! create the Gamma_ia_P in DBCSR style
2230 CALL create_dbcsr_gamma(gamma_2d, homo(ispin), virtual(ispin), dimen_ia, para_env_sub, &
2231 my_ia_start, my_ia_end, my_group_l_size, gd_ia, &
2232 mp2_env%ri_grad%G_P_ia(:, ispin), mp2_env%ri_grad%mo_coeff_o(ispin)%matrix)
2233
2234 CALL release_group_dist(gd_ia)
2235
2236 END DO
2237 DEALLOCATE (rpa_grad%fm_Y)
2238 CALL cp_fm_release(fm_pq_half)
2239
2240 CALL timestop(handle)
2241
2242 END SUBROUTINE rpa_grad_finalize
2243
2244! **************************************************************************************************
2245!> \brief ...
2246!> \param sos_mp2_work_occ ...
2247!> \param sos_mp2_work_virt ...
2248!> \param para_env ...
2249!> \param para_env_sub ...
2250!> \param homo ...
2251!> \param virtual ...
2252!> \param mp2_env ...
2253! **************************************************************************************************
2254 SUBROUTINE sos_mp2_grad_finalize(sos_mp2_work_occ, sos_mp2_work_virt, para_env, para_env_sub, homo, virtual, mp2_env)
2255 TYPE(sos_mp2_grad_work_type), ALLOCATABLE, &
2256 DIMENSION(:), INTENT(INOUT) :: sos_mp2_work_occ, sos_mp2_work_virt
2257 TYPE(mp_para_env_type), INTENT(IN), POINTER :: para_env, para_env_sub
2258 INTEGER, DIMENSION(:), INTENT(IN) :: homo, virtual
2259 TYPE(mp2_type), INTENT(INOUT) :: mp2_env
2260
2261 CHARACTER(LEN=*), PARAMETER :: routinen = 'sos_mp2_grad_finalize'
2262
2263 INTEGER :: ab_counter, handle, ij_counter, ispin, &
2264 itmp(2), my_a, my_b, my_b_end, &
2265 my_b_size, my_b_start, my_i, my_j, &
2266 nspins, pcol
2267 REAL(kind=dp) :: my_scale
2268
2269 CALL timeset(routinen, handle)
2270
2271 nspins = SIZE(sos_mp2_work_occ)
2272 my_scale = mp2_env%scale_s
2273
2274 DO ispin = 1, nspins
2275 DO pcol = 0, SIZE(sos_mp2_work_occ(ispin)%index2send, 1) - 1
2276 IF (ALLOCATED(sos_mp2_work_occ(ispin)%index2send(pcol)%array)) &
2277 DEALLOCATE (sos_mp2_work_occ(ispin)%index2send(pcol)%array)
2278 IF (ALLOCATED(sos_mp2_work_occ(ispin)%index2send(pcol)%array)) &
2279 DEALLOCATE (sos_mp2_work_occ(ispin)%index2send(pcol)%array)
2280 IF (ALLOCATED(sos_mp2_work_virt(ispin)%index2recv(pcol)%array)) &
2281 DEALLOCATE (sos_mp2_work_virt(ispin)%index2recv(pcol)%array)
2282 IF (ALLOCATED(sos_mp2_work_virt(ispin)%index2recv(pcol)%array)) &
2283 DEALLOCATE (sos_mp2_work_virt(ispin)%index2recv(pcol)%array)
2284 END DO
2285 DEALLOCATE (sos_mp2_work_occ(ispin)%index2send, &
2286 sos_mp2_work_occ(ispin)%index2recv, &
2287 sos_mp2_work_virt(ispin)%index2send, &
2288 sos_mp2_work_virt(ispin)%index2recv)
2289 END DO
2290
2291 ! Sum P_ij and P_ab and redistribute them
2292 DO ispin = 1, nspins
2293 CALL para_env%sum(sos_mp2_work_occ(ispin)%P)
2294
2295 ALLOCATE (mp2_env%ri_grad%P_ij(ispin)%array(homo(ispin), homo(ispin)))
2296 mp2_env%ri_grad%P_ij(ispin)%array = 0.0_dp
2297 DO my_i = 1, homo(ispin)
2298 mp2_env%ri_grad%P_ij(ispin)%array(my_i, my_i) = my_scale*sos_mp2_work_occ(ispin)%P(my_i)
2299 END DO
2300 DO ij_counter = 1, SIZE(sos_mp2_work_occ(ispin)%pair_list, 2)
2301 my_i = sos_mp2_work_occ(ispin)%pair_list(1, ij_counter)
2302 my_j = sos_mp2_work_occ(ispin)%pair_list(2, ij_counter)
2303
2304 mp2_env%ri_grad%P_ij(ispin)%array(my_i, my_j) = my_scale*sos_mp2_work_occ(ispin)%P(homo(ispin) + ij_counter)
2305 END DO
2306 DEALLOCATE (sos_mp2_work_occ(ispin)%P, sos_mp2_work_occ(ispin)%pair_list)
2307
2308 ! Symmetrize P_ij
2309 mp2_env%ri_grad%P_ij(ispin)%array(:, :) = 0.5_dp*(mp2_env%ri_grad%P_ij(ispin)%array + &
2310 transpose(mp2_env%ri_grad%P_ij(ispin)%array))
2311
2312 ! The first index of P_ab has to be distributed within the subgroups, so sum it up first and add the required elements later
2313 CALL para_env%sum(sos_mp2_work_virt(ispin)%P)
2314
2315 itmp = get_limit(virtual(ispin), para_env_sub%num_pe, para_env_sub%mepos)
2316 my_b_size = itmp(2) - itmp(1) + 1
2317 my_b_start = itmp(1)
2318 my_b_end = itmp(2)
2319
2320 ALLOCATE (mp2_env%ri_grad%P_ab(ispin)%array(my_b_size, virtual(ispin)))
2321 mp2_env%ri_grad%P_ab(ispin)%array = 0.0_dp
2322 DO my_a = itmp(1), itmp(2)
2323 mp2_env%ri_grad%P_ab(ispin)%array(my_a - itmp(1) + 1, my_a) = my_scale*sos_mp2_work_virt(ispin)%P(my_a)
2324 END DO
2325 DO ab_counter = 1, SIZE(sos_mp2_work_virt(ispin)%pair_list, 2)
2326 my_a = sos_mp2_work_virt(ispin)%pair_list(1, ab_counter)
2327 my_b = sos_mp2_work_virt(ispin)%pair_list(2, ab_counter)
2328
2329 IF (my_a >= itmp(1) .AND. my_a <= itmp(2)) mp2_env%ri_grad%P_ab(ispin)%array(my_a - itmp(1) + 1, my_b) = &
2330 my_scale*sos_mp2_work_virt(ispin)%P(virtual(ispin) + ab_counter)
2331 END DO
2332
2333 DEALLOCATE (sos_mp2_work_virt(ispin)%P, sos_mp2_work_virt(ispin)%pair_list)
2334
2335 ! Symmetrize P_ab
2336 IF (para_env_sub%num_pe > 1) THEN
2337 block
2338 INTEGER :: send_a_start, send_a_end, send_a_size, &
2339 recv_a_start, recv_a_end, recv_a_size, proc_shift, proc_send, proc_recv
2340 REAL(kind=dp), DIMENSION(:), ALLOCATABLE, TARGET :: buffer_send_1d
2341 REAL(kind=dp), DIMENSION(:, :), POINTER :: buffer_send
2342 REAL(kind=dp), DIMENSION(:, :), ALLOCATABLE :: buffer_recv
2343 TYPE(group_dist_d1_type) :: gd_virtual_sub
2344
2345 CALL create_group_dist(gd_virtual_sub, para_env_sub%num_pe, virtual(ispin))
2346
2347 mp2_env%ri_grad%P_ab(ispin)%array(:, my_b_start:my_b_end) = &
2348 0.5_dp*(mp2_env%ri_grad%P_ab(ispin)%array(:, my_b_start:my_b_end) &
2349 + transpose(mp2_env%ri_grad%P_ab(ispin)%array(:, my_b_start:my_b_end)))
2350
2351 ALLOCATE (buffer_send_1d(my_b_size*maxsize(gd_virtual_sub)))
2352 ALLOCATE (buffer_recv(my_b_size, maxsize(gd_virtual_sub)))
2353
2354 DO proc_shift = 1, para_env_sub%num_pe - 1
2355
2356 proc_send = modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
2357 proc_recv = modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
2358
2359 CALL get_group_dist(gd_virtual_sub, proc_send, send_a_start, send_a_end, send_a_size)
2360 CALL get_group_dist(gd_virtual_sub, proc_recv, recv_a_start, recv_a_end, recv_a_size)
2361
2362 buffer_send(1:send_a_size, 1:my_b_size) => buffer_send_1d(1:my_b_size*send_a_size)
2363
2364 buffer_send(:send_a_size, :) = transpose(mp2_env%ri_grad%P_ab(ispin)%array(:, send_a_start:send_a_end))
2365 CALL para_env_sub%sendrecv(buffer_send(:send_a_size, :), proc_send, &
2366 buffer_recv(:, :recv_a_size), proc_recv)
2367
2368 mp2_env%ri_grad%P_ab(ispin)%array(:, recv_a_start:recv_a_end) = &
2369 0.5_dp*(mp2_env%ri_grad%P_ab(ispin)%array(:, recv_a_start:recv_a_end) + buffer_recv(:, 1:recv_a_size))
2370
2371 END DO
2372
2373 DEALLOCATE (buffer_send_1d, buffer_recv)
2374
2375 CALL release_group_dist(gd_virtual_sub)
2376 END block
2377 ELSE
2378 mp2_env%ri_grad%P_ab(ispin)%array(:, :) = 0.5_dp*(mp2_env%ri_grad%P_ab(ispin)%array + &
2379 transpose(mp2_env%ri_grad%P_ab(ispin)%array))
2380 END IF
2381
2382 END DO
2383 DEALLOCATE (sos_mp2_work_occ, sos_mp2_work_virt)
2384 IF (nspins == 1) THEN
2385 mp2_env%ri_grad%P_ij(1)%array(:, :) = 2.0_dp*mp2_env%ri_grad%P_ij(1)%array
2386 mp2_env%ri_grad%P_ab(1)%array(:, :) = 2.0_dp*mp2_env%ri_grad%P_ab(1)%array
2387 END IF
2388
2389 CALL timestop(handle)
2390
2391 END SUBROUTINE
2392
2393! **************************************************************************************************
2394!> \brief ...
2395!> \param rpa_work ...
2396!> \param mp2_env ...
2397!> \param homo ...
2398!> \param virtual ...
2399!> \param para_env ...
2400!> \param para_env_sub ...
2401! **************************************************************************************************
2402 SUBROUTINE rpa_grad_work_finalize(rpa_work, mp2_env, homo, virtual, para_env, para_env_sub)
2403 TYPE(rpa_grad_work_type), INTENT(INOUT) :: rpa_work
2404 TYPE(mp2_type), INTENT(INOUT) :: mp2_env
2405 INTEGER, DIMENSION(:), INTENT(IN) :: homo, virtual
2406 TYPE(mp_para_env_type), INTENT(IN), POINTER :: para_env, para_env_sub
2407
2408 CHARACTER(LEN=*), PARAMETER :: routinen = 'rpa_grad_work_finalize'
2409
2410 INTEGER :: handle, ispin, itmp(2), my_a_end, my_a_size, my_a_start, my_b_end, my_b_size, &
2411 my_b_start, my_i_end, my_i_size, my_i_start, nspins, proc, proc_recv, proc_send, &
2412 proc_shift, recv_a_end, recv_a_size, recv_a_start, recv_end, recv_start, send_a_end, &
2413 send_a_size, send_a_start, send_end, send_start, size_recv_buffer, size_send_buffer
2414 REAL(kind=dp) :: my_scale
2415 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :) :: buffer_recv, buffer_send
2416 TYPE(group_dist_d1_type) :: gd_a_sub, gd_virtual_sub
2417
2418 CALL timeset(routinen, handle)
2419
2420 nspins = SIZE(homo)
2421 my_scale = mp2_env%ri_rpa%scale_rpa/(2.0_dp*pi)
2422 IF (mp2_env%ri_rpa%minimax_quad) my_scale = my_scale/2.0_dp
2423
2424 CALL cp_fm_release(rpa_work%fm_mat_Q_copy)
2425
2426 DO ispin = 1, nspins
2427 DO proc = 0, SIZE(rpa_work%index2send, 1) - 1
2428 IF (ALLOCATED(rpa_work%index2send(proc, ispin)%array)) DEALLOCATE (rpa_work%index2send(proc, ispin)%array)
2429 IF (ALLOCATED(rpa_work%index2recv(proc, ispin)%array)) DEALLOCATE (rpa_work%index2recv(proc, ispin)%array)
2430 END DO
2431 END DO
2432 DEALLOCATE (rpa_work%index2send, rpa_work%index2recv)
2433
2434 DO ispin = 1, nspins
2435 CALL get_group_dist(rpa_work%gd_homo(ispin), rpa_work%mepos(2), my_i_start, my_i_end, my_i_size)
2436 CALL release_group_dist(rpa_work%gd_homo(ispin))
2437
2438 ALLOCATE (mp2_env%ri_grad%P_ij(ispin)%array(homo(ispin), homo(ispin)))
2439 mp2_env%ri_grad%P_ij(ispin)%array = 0.0_dp
2440 mp2_env%ri_grad%P_ij(ispin)%array(my_i_start:my_i_end, :) = my_scale*rpa_work%P_ij(ispin)%array
2441 DEALLOCATE (rpa_work%P_ij(ispin)%array)
2442 CALL para_env%sum(mp2_env%ri_grad%P_ij(ispin)%array)
2443
2444 ! Symmetrize P_ij
2445 mp2_env%ri_grad%P_ij(ispin)%array(:, :) = 0.5_dp*(mp2_env%ri_grad%P_ij(ispin)%array + &
2446 transpose(mp2_env%ri_grad%P_ij(ispin)%array))
2447
2448 itmp = get_limit(virtual(ispin), para_env_sub%num_pe, para_env_sub%mepos)
2449 my_b_start = itmp(1)
2450 my_b_end = itmp(2)
2451 my_b_size = my_b_end - my_b_start + 1
2452
2453 ALLOCATE (mp2_env%ri_grad%P_ab(ispin)%array(my_b_size, virtual(ispin)))
2454 mp2_env%ri_grad%P_ab(ispin)%array = 0.0_dp
2455
2456 CALL get_group_dist(rpa_work%gd_virtual(ispin), rpa_work%mepos(1), my_a_start, my_a_end, my_a_size)
2457 CALL release_group_dist(rpa_work%gd_virtual(ispin))
2458 ! This group dist contains the info which parts of Pab a process currently owns
2459 CALL create_group_dist(gd_a_sub, my_a_start, my_a_end, my_a_size, para_env_sub)
2460 ! This group dist contains the info which parts of Pab a process is supposed to own later
2461 CALL create_group_dist(gd_virtual_sub, para_env_sub%num_pe, virtual(ispin))
2462
2463 ! Calculate local indices of the common range of own matrix and send process
2464 send_start = max(1, my_b_start - my_a_start + 1)
2465 send_end = min(my_a_size, my_b_end - my_a_start + 1)
2466
2467 ! Same for recv process but with reverse positions
2468 recv_start = max(1, my_a_start - my_b_start + 1)
2469 recv_end = min(my_b_size, my_a_end - my_b_start + 1)
2470
2471 mp2_env%ri_grad%P_ab(ispin)%array(recv_start:recv_end, :) = &
2472 my_scale*rpa_work%P_ab(ispin)%array(send_start:send_end, :)
2473
2474 IF (para_env_sub%num_pe > 1) THEN
2475 size_send_buffer = 0
2476 size_recv_buffer = 0
2477 DO proc_shift = 1, para_env_sub%num_pe - 1
2478 proc_send = modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
2479 proc_recv = modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
2480
2481 CALL get_group_dist(gd_virtual_sub, proc_send, send_a_start, send_a_end)
2482 CALL get_group_dist(gd_a_sub, proc_recv, recv_a_start, recv_a_end)
2483
2484 ! Calculate local indices of the common range of own matrix and send process
2485 send_start = max(1, send_a_start - my_a_start + 1)
2486 send_end = min(my_a_size, send_a_end - my_a_start + 1)
2487
2488 size_send_buffer = max(size_send_buffer, max(send_end - send_start + 1, 0))
2489
2490 ! Same for recv process but with reverse positions
2491 recv_start = max(1, recv_a_start - my_b_start + 1)
2492 recv_end = min(my_b_size, recv_a_end - my_b_start + 1)
2493
2494 size_recv_buffer = max(size_recv_buffer, max(recv_end - recv_start + 1, 0))
2495 END DO
2496 ALLOCATE (buffer_send(size_send_buffer, virtual(ispin)), buffer_recv(size_recv_buffer, virtual(ispin)))
2497
2498 DO proc_shift = 1, para_env_sub%num_pe - 1
2499 proc_send = modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
2500 proc_recv = modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
2501
2502 CALL get_group_dist(gd_virtual_sub, proc_send, send_a_start, send_a_end)
2503 CALL get_group_dist(gd_a_sub, proc_recv, recv_a_start, recv_a_end)
2504
2505 ! Calculate local indices of the common range of own matrix and send process
2506 send_start = max(1, send_a_start - my_a_start + 1)
2507 send_end = min(my_a_size, send_a_end - my_a_start + 1)
2508 buffer_send(1:max(send_end - send_start + 1, 0), :) = rpa_work%P_ab(ispin)%array(send_start:send_end, :)
2509
2510 ! Same for recv process but with reverse positions
2511 recv_start = max(1, recv_a_start - my_b_start + 1)
2512 recv_end = min(my_b_size, recv_a_end - my_b_start + 1)
2513
2514 CALL para_env_sub%sendrecv(buffer_send(1:max(send_end - send_start + 1, 0), :), proc_send, &
2515 buffer_recv(1:max(recv_end - recv_start + 1, 0), :), proc_recv)
2516
2517 mp2_env%ri_grad%P_ab(ispin)%array(recv_start:recv_end, :) = &
2518 mp2_env%ri_grad%P_ab(ispin)%array(recv_start:recv_end, :) + &
2519 my_scale*buffer_recv(1:max(recv_end - recv_start + 1, 0), :)
2520
2521 END DO
2522
2523 IF (ALLOCATED(buffer_send)) DEALLOCATE (buffer_send)
2524 IF (ALLOCATED(buffer_recv)) DEALLOCATE (buffer_recv)
2525 END IF
2526 DEALLOCATE (rpa_work%P_ab(ispin)%array)
2527
2528 CALL release_group_dist(gd_a_sub)
2529
2530 block
2531 TYPE(mp_comm_type) :: comm_exchange
2532 CALL comm_exchange%from_split(para_env, para_env_sub%mepos)
2533 CALL comm_exchange%sum(mp2_env%ri_grad%P_ab(ispin)%array)
2534 CALL comm_exchange%free()
2535 END block
2536
2537 ! Symmetrize P_ab
2538 IF (para_env_sub%num_pe > 1) THEN
2539 block
2540 REAL(kind=dp), DIMENSION(:), ALLOCATABLE, TARGET :: buffer_send_1d
2541 REAL(kind=dp), DIMENSION(:, :), POINTER :: buffer_send
2542 REAL(kind=dp), DIMENSION(:, :), ALLOCATABLE :: buffer_recv
2543
2544 mp2_env%ri_grad%P_ab(ispin)%array(:, my_b_start:my_b_end) = &
2545 0.5_dp*(mp2_env%ri_grad%P_ab(ispin)%array(:, my_b_start:my_b_end) &
2546 + transpose(mp2_env%ri_grad%P_ab(ispin)%array(:, my_b_start:my_b_end)))
2547
2548 ALLOCATE (buffer_send_1d(my_b_size*maxsize(gd_virtual_sub)))
2549 ALLOCATE (buffer_recv(my_b_size, maxsize(gd_virtual_sub)))
2550
2551 DO proc_shift = 1, para_env_sub%num_pe - 1
2552
2553 proc_send = modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
2554 proc_recv = modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
2555
2556 CALL get_group_dist(gd_virtual_sub, proc_send, send_a_start, send_a_end, send_a_size)
2557 CALL get_group_dist(gd_virtual_sub, proc_recv, recv_a_start, recv_a_end, recv_a_size)
2558
2559 buffer_send(1:send_a_size, 1:my_b_size) => buffer_send_1d(1:my_b_size*send_a_size)
2560
2561 buffer_send(:send_a_size, :) = transpose(mp2_env%ri_grad%P_ab(ispin)%array(:, send_a_start:send_a_end))
2562 CALL para_env_sub%sendrecv(buffer_send(:send_a_size, :), proc_send, &
2563 buffer_recv(:, :recv_a_size), proc_recv)
2564
2565 mp2_env%ri_grad%P_ab(ispin)%array(:, recv_a_start:recv_a_end) = &
2566 0.5_dp*(mp2_env%ri_grad%P_ab(ispin)%array(:, recv_a_start:recv_a_end) + buffer_recv(:, 1:recv_a_size))
2567
2568 END DO
2569
2570 DEALLOCATE (buffer_send_1d, buffer_recv)
2571 END block
2572 ELSE
2573 mp2_env%ri_grad%P_ab(ispin)%array(:, :) = 0.5_dp*(mp2_env%ri_grad%P_ab(ispin)%array + &
2574 transpose(mp2_env%ri_grad%P_ab(ispin)%array))
2575 END IF
2576
2577 CALL release_group_dist(gd_virtual_sub)
2578
2579 END DO
2580 DEALLOCATE (rpa_work%gd_homo, rpa_work%gd_virtual, rpa_work%P_ij, rpa_work%P_ab)
2581 IF (nspins == 1) THEN
2582 mp2_env%ri_grad%P_ij(1)%array(:, :) = 2.0_dp*mp2_env%ri_grad%P_ij(1)%array
2583 mp2_env%ri_grad%P_ab(1)%array(:, :) = 2.0_dp*mp2_env%ri_grad%P_ab(1)%array
2584 END IF
2585
2586 CALL timestop(handle)
2587 END SUBROUTINE
2588
2589! **************************************************************************************************
2590!> \brief Dereplicate data from fm_sub and collect in fm_global, overlapping data will be added
2591!> \param fm_sub replicated matrix, all subgroups have the same size, will be release on output
2592!> \param fm_global global matrix, on output it will contain the sum of the replicated matrices redistributed
2593! **************************************************************************************************
2594 SUBROUTINE dereplicate_and_sum_fm(fm_sub, fm_global)
2595 TYPE(cp_fm_type), INTENT(INOUT) :: fm_sub, fm_global
2596
2597 CHARACTER(LEN=*), PARAMETER :: routinen = 'dereplicate_and_sum_fm'
2598
2599 INTEGER :: col_local, elements2recv_col, elements2recv_row, elements2send_col, &
2600 elements2send_row, handle, handle2, mypcol_global, myprow_global, ncol_local_global, &
2601 ncol_local_sub, npcol_global, npcol_sub, nprow_global, nprow_sub, nrow_local_global, &
2602 nrow_local_sub, pcol_recv, pcol_send, proc_recv, proc_send, proc_send_global, proc_shift, &
2603 prow_recv, prow_send, row_local, tag
2604 INTEGER(int_8) :: size_recv_buffer, size_send_buffer
2605 INTEGER, ALLOCATABLE, DIMENSION(:) :: data2recv_col, data2recv_row, &
2606 data2send_col, data2send_row, &
2607 subgroup2mepos
2608 INTEGER, DIMENSION(:), POINTER :: col_indices_global, col_indices_sub, &
2609 row_indices_global, row_indices_sub
2610 INTEGER, DIMENSION(:, :), POINTER :: blacs2mpi_global, blacs2mpi_sub, &
2611 mpi2blacs_global, mpi2blacs_sub
2612 REAL(kind=dp), ALLOCATABLE, DIMENSION(:), TARGET :: recv_buffer_1d, send_buffer_1d
2613 REAL(kind=dp), DIMENSION(:, :), POINTER :: recv_buffer, send_buffer
2614 TYPE(mp_para_env_type), POINTER :: para_env, para_env_sub
2615 TYPE(one_dim_int_array), ALLOCATABLE, DIMENSION(:) :: index2recv_col, index2recv_row, &
2616 index2send_col, index2send_row
2617
2618 CALL timeset(routinen, handle)
2619
2620 tag = 1
2621
2622 nprow_sub = fm_sub%matrix_struct%context%num_pe(1)
2623 npcol_sub = fm_sub%matrix_struct%context%num_pe(2)
2624
2625 myprow_global = fm_global%matrix_struct%context%mepos(1)
2626 mypcol_global = fm_global%matrix_struct%context%mepos(2)
2627 nprow_global = fm_global%matrix_struct%context%num_pe(1)
2628 npcol_global = fm_global%matrix_struct%context%num_pe(2)
2629
2630 CALL cp_fm_get_info(fm_sub, col_indices=col_indices_sub, row_indices=row_indices_sub, &
2631 nrow_local=nrow_local_sub, ncol_local=ncol_local_sub)
2632 CALL cp_fm_struct_get(fm_sub%matrix_struct, para_env=para_env_sub)
2633 CALL cp_fm_struct_get(fm_global%matrix_struct, para_env=para_env, &
2634 col_indices=col_indices_global, row_indices=row_indices_global, &
2635 nrow_local=nrow_local_global, ncol_local=ncol_local_global)
2636 CALL fm_sub%matrix_struct%context%get(blacs2mpi=blacs2mpi_sub, mpi2blacs=mpi2blacs_sub)
2637 CALL fm_global%matrix_struct%context%get(blacs2mpi=blacs2mpi_global, mpi2blacs=mpi2blacs_global)
2638
2639 IF (para_env%num_pe /= para_env_sub%num_pe) THEN
2640 block
2641 TYPE(mp_comm_type) :: comm_exchange
2642 comm_exchange = fm_sub%matrix_struct%context%interconnect(para_env)
2643 CALL comm_exchange%sum(fm_sub%local_data)
2644 CALL comm_exchange%free()
2645 END block
2646 END IF
2647
2648 ALLOCATE (subgroup2mepos(0:para_env_sub%num_pe - 1))
2649 CALL para_env_sub%allgather(para_env%mepos, subgroup2mepos)
2650
2651 CALL timeset(routinen//"_data2", handle2)
2652 ! Create a map how much data has to be sent to what process coordinate, interchange rows and columns to transpose the matrices
2653 CALL get_elements2send_col(data2send_col, fm_global%matrix_struct, row_indices_sub, index2send_col)
2654 CALL get_elements2send_row(data2send_row, fm_global%matrix_struct, col_indices_sub, index2send_row)
2655
2656 ! Create a map how much data has to be sent to what process coordinate, interchange rows and columns to transpose the matrices
2657 ! Do the reverse for the recieve processes
2658 CALL get_elements2send_col(data2recv_col, fm_sub%matrix_struct, row_indices_global, index2recv_col)
2659 CALL get_elements2send_row(data2recv_row, fm_sub%matrix_struct, col_indices_global, index2recv_row)
2660 CALL timestop(handle2)
2661
2662 CALL timeset(routinen//"_local", handle2)
2663 ! Loop over local data and transpose
2664 prow_send = mpi2blacs_global(1, para_env%mepos)
2665 pcol_send = mpi2blacs_global(2, para_env%mepos)
2666 prow_recv = mpi2blacs_sub(1, para_env_sub%mepos)
2667 pcol_recv = mpi2blacs_sub(2, para_env_sub%mepos)
2668 elements2recv_col = data2recv_col(pcol_recv)
2669 elements2recv_row = data2recv_row(prow_recv)
2670
2671!$OMP PARALLEL DO DEFAULT(NONE) PRIVATE(row_local,col_local) &
2672!$OMP SHARED(elements2recv_col,elements2recv_row,recv_buffer,fm_global,&
2673!$OMP index2recv_col,index2recv_row,pcol_recv,prow_recv, &
2674!$OMP fm_sub,index2send_col,index2send_row,pcol_send,prow_send)
2675 DO col_local = 1, elements2recv_col
2676 DO row_local = 1, elements2recv_row
2677 fm_global%local_data(index2recv_col(pcol_recv)%array(col_local), &
2678 index2recv_row(prow_recv)%array(row_local)) &
2679 = fm_sub%local_data(index2send_col(pcol_send)%array(row_local), &
2680 index2send_row(prow_send)%array(col_local))
2681 END DO
2682 END DO
2683!$OMP END PARALLEL DO
2684 CALL timestop(handle2)
2685
2686 IF (para_env_sub%num_pe > 1) THEN
2687 size_send_buffer = 0_int_8
2688 size_recv_buffer = 0_int_8
2689 ! Loop over all processes in para_env_sub
2690 DO proc_shift = 1, para_env_sub%num_pe - 1
2691 proc_send = modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
2692 proc_recv = modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
2693
2694 proc_send_global = subgroup2mepos(proc_send)
2695 prow_send = mpi2blacs_global(1, proc_send_global)
2696 pcol_send = mpi2blacs_global(2, proc_send_global)
2697 elements2send_col = data2send_col(pcol_send)
2698 elements2send_row = data2send_row(prow_send)
2699
2700 size_send_buffer = max(size_send_buffer, int(elements2send_col, int_8)*elements2send_row)
2701
2702 prow_recv = mpi2blacs_sub(1, proc_recv)
2703 pcol_recv = mpi2blacs_sub(2, proc_recv)
2704 elements2recv_col = data2recv_col(pcol_recv)
2705 elements2recv_row = data2recv_row(prow_recv)
2706
2707 size_recv_buffer = max(size_recv_buffer, int(elements2recv_col, int_8)*elements2recv_row)
2708 END DO
2709 ALLOCATE (send_buffer_1d(size_send_buffer), recv_buffer_1d(size_recv_buffer))
2710
2711 ! Loop over all processes in para_env_sub
2712 DO proc_shift = 1, para_env_sub%num_pe - 1
2713 proc_send = modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
2714 proc_recv = modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
2715
2716 proc_send_global = subgroup2mepos(proc_send)
2717 prow_send = mpi2blacs_global(1, proc_send_global)
2718 pcol_send = mpi2blacs_global(2, proc_send_global)
2719 elements2send_col = data2send_col(pcol_send)
2720 elements2send_row = data2send_row(prow_send)
2721
2722 CALL timeset(routinen//"_pack", handle2)
2723 ! Loop over local data and pack the buffer
2724 ! Transpose the matrix already
2725 send_buffer(1:elements2send_row, 1:elements2send_col) => send_buffer_1d(1:int(elements2send_row, int_8)*elements2send_col)
2726!$OMP PARALLEL DO DEFAULT(NONE) PRIVATE(row_local,col_local) &
2727!$OMP SHARED(elements2send_col,elements2send_row,send_buffer,fm_sub,&
2728!$OMP index2send_col,index2send_row,pcol_send,prow_send)
2729 DO row_local = 1, elements2send_col
2730 DO col_local = 1, elements2send_row
2731 send_buffer(col_local, row_local) = &
2732 fm_sub%local_data(index2send_col(pcol_send)%array(row_local), &
2733 index2send_row(prow_send)%array(col_local))
2734 END DO
2735 END DO
2736!$OMP END PARALLEL DO
2737 CALL timestop(handle2)
2738
2739 prow_recv = mpi2blacs_sub(1, proc_recv)
2740 pcol_recv = mpi2blacs_sub(2, proc_recv)
2741 elements2recv_col = data2recv_col(pcol_recv)
2742 elements2recv_row = data2recv_row(prow_recv)
2743
2744 ! Send data
2745 recv_buffer(1:elements2recv_col, 1:elements2recv_row) => recv_buffer_1d(1:int(elements2recv_row, int_8)*elements2recv_col)
2746 IF (SIZE(recv_buffer) > 0_int_8) THEN
2747 IF (SIZE(send_buffer) > 0_int_8) THEN
2748 CALL para_env_sub%sendrecv(send_buffer, proc_send, recv_buffer, proc_recv, tag)
2749 ELSE
2750 CALL para_env_sub%recv(recv_buffer, proc_recv, tag)
2751 END IF
2752
2753 CALL timeset(routinen//"_unpack", handle2)
2754!$OMP PARALLEL DO DEFAULT(NONE) PRIVATE(row_local,col_local) &
2755!$OMP SHARED(elements2recv_col,elements2recv_row,recv_buffer,fm_global,&
2756!$OMP index2recv_col,index2recv_row,pcol_recv,prow_recv)
2757 DO col_local = 1, elements2recv_col
2758 DO row_local = 1, elements2recv_row
2759 fm_global%local_data(index2recv_col(pcol_recv)%array(col_local), &
2760 index2recv_row(prow_recv)%array(row_local)) &
2761 = recv_buffer(col_local, row_local)
2762 END DO
2763 END DO
2764!$OMP END PARALLEL DO
2765 CALL timestop(handle2)
2766 ELSE IF (SIZE(send_buffer) > 0_int_8) THEN
2767 CALL para_env_sub%send(send_buffer, proc_send, tag)
2768 END IF
2769 END DO
2770 END IF
2771
2772 DEALLOCATE (data2send_col, data2send_row, data2recv_col, data2recv_row)
2773 DO proc_shift = 0, npcol_global - 1
2774 DEALLOCATE (index2send_col(proc_shift)%array)
2775 END DO
2776 DO proc_shift = 0, npcol_sub - 1
2777 DEALLOCATE (index2recv_col(proc_shift)%array)
2778 END DO
2779 DO proc_shift = 0, nprow_global - 1
2780 DEALLOCATE (index2send_row(proc_shift)%array)
2781 END DO
2782 DO proc_shift = 0, nprow_sub - 1
2783 DEALLOCATE (index2recv_row(proc_shift)%array)
2784 END DO
2785 DEALLOCATE (index2send_col, index2recv_col, index2send_row, index2recv_row)
2786
2787 CALL cp_fm_release(fm_sub)
2788
2789 CALL timestop(handle)
2790
2791 END SUBROUTINE dereplicate_and_sum_fm
2792
2793! **************************************************************************************************
2794!> \brief ...
2795!> \param data2send ...
2796!> \param struct_global ...
2797!> \param indices_sub ...
2798!> \param index2send ...
2799! **************************************************************************************************
2800 SUBROUTINE get_elements2send_col(data2send, struct_global, indices_sub, index2send)
2801 INTEGER, ALLOCATABLE, DIMENSION(:), INTENT(OUT) :: data2send
2802 TYPE(cp_fm_struct_type), INTENT(INOUT) :: struct_global
2803 INTEGER, DIMENSION(:), INTENT(IN) :: indices_sub
2804 TYPE(one_dim_int_array), ALLOCATABLE, &
2805 DIMENSION(:), INTENT(OUT) :: index2send
2806
2807 INTEGER :: i_global, i_local, np_global, proc
2808
2809 CALL struct_global%context%get(number_of_process_columns=np_global)
2810
2811 ALLOCATE (data2send(0:np_global - 1))
2812 data2send = 0
2813 DO i_local = 1, SIZE(indices_sub)
2814 i_global = indices_sub(i_local)
2815 proc = struct_global%g2p_col(i_global)
2816 data2send(proc) = data2send(proc) + 1
2817 END DO
2818
2819 ALLOCATE (index2send(0:np_global - 1))
2820 DO proc = 0, np_global - 1
2821 ALLOCATE (index2send(proc)%array(data2send(proc)))
2822 ! We want to crash if there is an error
2823 index2send(proc)%array = -1
2824 END DO
2825
2826 data2send = 0
2827 DO i_local = 1, SIZE(indices_sub)
2828 i_global = indices_sub(i_local)
2829 proc = struct_global%g2p_col(i_global)
2830 data2send(proc) = data2send(proc) + 1
2831 index2send(proc)%array(data2send(proc)) = i_local
2832 END DO
2833
2834 END SUBROUTINE
2835
2836! **************************************************************************************************
2837!> \brief ...
2838!> \param data2send ...
2839!> \param struct_global ...
2840!> \param indices_sub ...
2841!> \param index2send ...
2842! **************************************************************************************************
2843 SUBROUTINE get_elements2send_row(data2send, struct_global, indices_sub, index2send)
2844 INTEGER, ALLOCATABLE, DIMENSION(:), INTENT(OUT) :: data2send
2845 TYPE(cp_fm_struct_type), INTENT(INOUT) :: struct_global
2846 INTEGER, DIMENSION(:), INTENT(IN) :: indices_sub
2847 TYPE(one_dim_int_array), ALLOCATABLE, &
2848 DIMENSION(:), INTENT(OUT) :: index2send
2849
2850 INTEGER :: i_global, i_local, np_global, proc
2851
2852 CALL struct_global%context%get(number_of_process_rows=np_global)
2853
2854 ALLOCATE (data2send(0:np_global - 1))
2855 data2send = 0
2856 DO i_local = 1, SIZE(indices_sub)
2857 i_global = indices_sub(i_local)
2858 proc = struct_global%g2p_row(i_global)
2859 data2send(proc) = data2send(proc) + 1
2860 END DO
2861
2862 ALLOCATE (index2send(0:np_global - 1))
2863 DO proc = 0, np_global - 1
2864 ALLOCATE (index2send(proc)%array(data2send(proc)))
2865 ! We want to crash if there is an error
2866 index2send(proc)%array = -1
2867 END DO
2868
2869 data2send = 0
2870 DO i_local = 1, SIZE(indices_sub)
2871 i_global = indices_sub(i_local)
2872 proc = struct_global%g2p_row(i_global)
2873 data2send(proc) = data2send(proc) + 1
2874 index2send(proc)%array(data2send(proc)) = i_local
2875 END DO
2876
2877 END SUBROUTINE
2878
2879END MODULE rpa_grad
static GRID_HOST_DEVICE int modulo(int a, int m)
Equivalent of Fortran's MODULO, which always return a positive number. https://gcc....
various utilities that regard array of different kinds: output, allocation,... maybe it is not a good...
methods related to the blacs parallel environment
basic linear algebra operations for full matrices
subroutine, public cp_fm_geadd(alpha, trans, matrix_a, beta, matrix_b)
interface to BLACS geadd: matrix_b = beta*matrix_b + alpha*opt(matrix_a) where opt(matrix_a) can be e...
subroutine, public cp_fm_scale_and_add(alpha, matrix_a, beta, matrix_b)
calc A <- alpha*A + beta*B optimized for alpha == 1.0 (just add beta*B) and beta == 0....
subroutine, public cp_fm_uplo_to_full(matrix, work, uplo)
given a triangular matrix according to uplo, computes the corresponding full matrix
various cholesky decomposition related routines
subroutine, public cp_fm_cholesky_invert(matrix, n, info_out)
used to replace the cholesky decomposition by the inverse
represent the structure of a full matrix
subroutine, public cp_fm_struct_create(fmstruct, para_env, context, nrow_global, ncol_global, nrow_block, ncol_block, descriptor, first_p_pos, local_leading_dimension, template_fmstruct, square_blocks, force_block)
allocates and initializes a full matrix structure
subroutine, public cp_fm_struct_get(fmstruct, para_env, context, descriptor, ncol_block, nrow_block, nrow_global, ncol_global, first_p_pos, row_indices, col_indices, nrow_local, ncol_local, nrow_locals, ncol_locals, local_leading_dimension)
returns the values of various attributes of the matrix structure
subroutine, public cp_fm_struct_release(fmstruct)
releases a full matrix structure
represent a full matrix distributed on many processors
Definition cp_fm_types.F:15
subroutine, public cp_fm_get_info(matrix, name, nrow_global, ncol_global, nrow_block, ncol_block, nrow_local, ncol_local, row_indices, col_indices, local_data, context, nrow_locals, ncol_locals, matrix_struct, para_env)
returns all kind of information about the full matrix
subroutine, public cp_fm_to_fm_submat_general(source, destination, nrows, ncols, s_firstrow, s_firstcol, d_firstrow, d_firstcol, global_context)
General copy of a submatrix of fm matrix to a submatrix of another fm matrix. The two matrices can ha...
subroutine, public cp_fm_set_all(matrix, alpha, beta)
set all elements of a matrix to the same value, and optionally the diagonal to a different one
subroutine, public cp_fm_create(matrix, matrix_struct, name, use_sp)
creates a new full matrix with the given structure
Counters to determine the performance of parallel DGEMMs.
subroutine, public dgemm_counter_start(dgemm_counter)
start timer of the counter
subroutine, public dgemm_counter_stop(dgemm_counter, size1, size2, size3)
stop timer of the counter and provide matrix sizes
Types to describe group distributions.
elemental integer function, public maxsize(this)
...
elemental integer function, public group_dist_proc(this, pos)
...
sums arrays of real/complex numbers with much reduced round-off as compared to a naive implementation...
Definition kahan_sum.F:29
Defines the basic variable types.
Definition kinds.F:23
integer, parameter, public int_8
Definition kinds.F:54
integer, parameter, public dp
Definition kinds.F:34
2- and 3-center electron repulsion integral routines based on libint2 Currently available operators: ...
pure logical function, public compare_potential_types(potential1, potential2)
Helper function to compare libint_potential_types.
integer, parameter, public local_gemm_pu_gpu
Machine interface based on Fortran 2003 and POSIX.
Definition machine.F:17
subroutine, public m_memory(mem)
Returns the total amount of memory [bytes] in use, if known, zero otherwise.
Definition machine.F:440
subroutine, public m_flush(lunit)
flushes units if the &GLOBAL flag is set accordingly
Definition machine.F:130
Definition of mathematical constants and functions.
real(kind=dp), parameter, public pi
Interface to the message passing library MPI.
subroutine, public mp_waitany(requests, completed)
waits for completion of any of the given requests
type(mp_request_type), parameter, public mp_request_null
Routines to calculate MP2 energy with laplace approach.
Definition mp2_laplace.F:13
subroutine, public calc_fm_mat_s_laplace(fm_mat_s, homo, virtual, eigenval, dajquad)
...
Definition mp2_laplace.F:39
Routines for calculating RI-MP2 gradients.
subroutine, public prepare_redistribution(para_env, para_env_sub, ngroup, group_grid_2_mepos, mepos_2_grid_group, pos_info)
prepare array for redistribution
subroutine, public array2fm(mat2d, fm_struct, my_start_row, my_end_row, my_start_col, my_end_col, gd_row, gd_col, group_grid_2_mepos, ngroup_row, ngroup_col, fm_mat, integ_group_size, color_group, do_release_mat)
redistribute local part of array to fm
subroutine, public create_dbcsr_gamma(gamma_2d, homo, virtual, dimen_ia, para_env_sub, my_ia_start, my_ia_end, my_group_l_size, gd_ia, dbcsr_gamma, mo_coeff_o)
redistribute 2D representation of 3d tensor to a set of dbcsr matrices
subroutine, public fm2array(mat2d, my_rows, my_start_row, my_end_row, my_cols, my_start_col, my_end_col, group_grid_2_mepos, mepos_2_grid_group, ngroup_row, ngroup_col, fm_mat)
redistribute fm to local part of array
Types needed for MP2 calculations.
Definition mp2_types.F:14
basic linear algebra operations for full matrixes
subroutine, public get_qs_env(qs_env, atomic_kind_set, qs_kind_set, cell, super_cell, cell_ref, use_ref_cell, kpoints, dft_control, mos, sab_orb, sab_all, qmmm, qmmm_periodic, sac_ae, sac_ppl, sac_lri, sap_ppnl, sab_vdw, sab_scp, sap_oce, sab_lrc, sab_se, sab_xtbe, sab_tbe, sab_core, sab_xb, sab_xtb_pp, sab_xtb_nonbond, sab_almo, sab_kp, sab_kp_nosym, particle_set, energy, force, matrix_h, matrix_h_im, matrix_ks, matrix_ks_im, matrix_vxc, run_rtp, rtp, matrix_h_kp, matrix_h_im_kp, matrix_ks_kp, matrix_ks_im_kp, matrix_vxc_kp, kinetic_kp, matrix_s_kp, matrix_w_kp, matrix_s_ri_aux_kp, matrix_s, matrix_s_ri_aux, matrix_w, matrix_p_mp2, matrix_p_mp2_admm, rho, rho_xc, pw_env, ewald_env, ewald_pw, active_space, mpools, input, para_env, blacs_env, scf_control, rel_control, kinetic, qs_charges, vppl, rho_core, rho_nlcc, rho_nlcc_g, ks_env, ks_qmmm_env, wf_history, scf_env, local_particles, local_molecules, distribution_2d, dbcsr_dist, molecule_kind_set, molecule_set, subsys, cp_subsys, oce, local_rho_set, rho_atom_set, task_list, task_list_soft, rho0_atom_set, rho0_mpole, rhoz_set, ecoul_1c, rho0_s_rs, rho0_s_gs, do_kpoints, has_unit_metric, requires_mo_derivs, mo_derivs, mo_loc_history, nkind, natom, nelectron_total, nelectron_spin, efield, neighbor_list_id, linres_control, xas_env, virial, cp_ddapc_env, cp_ddapc_ewald, outer_scf_history, outer_scf_ihistory, x_data, et_coupling, dftb_potential, results, se_taper, se_store_int_env, se_nddo_mpole, se_nonbond_env, admm_env, lri_env, lri_density, exstate_env, ec_env, harris_env, dispersion_env, gcp_env, vee, rho_external, external_vxc, mask, mp2_env, bs_env, kg_env, wanniercentres, atprop, ls_scf_env, do_transport, transport_env, v_hartree_rspace, s_mstruct_changed, rho_changed, potential_changed, forces_up_to_date, mscfg_env, almo_scf_env, gradient_history, variable_history, embed_pot, spin_embed_pot, polar_env, mos_last_converged, eeq, rhs)
Get the QUICKSTEP environment.
Routines to calculate RI-RPA and SOS-MP2 gradients.
Definition rpa_grad.F:13
integer, parameter spla_threshold
Definition rpa_grad.F:106
subroutine, public rpa_grad_finalize(rpa_grad, mp2_env, para_env_sub, para_env, qs_env, gd_array, color_sub, do_ri_sos_laplace_mp2, homo, virtual)
...
Definition rpa_grad.F:2029
subroutine, public rpa_grad_copy_q(fm_mat_q, rpa_grad)
...
Definition rpa_grad.F:713
pure subroutine, public rpa_grad_needed_mem(homo, virtual, dimen_ri, mem_per_rank, mem_per_repl, do_ri_sos_laplace_mp2)
Calculates the necessary minimum memory for the Gradient code ion MiB.
Definition rpa_grad.F:122
subroutine, public rpa_grad_create(rpa_grad, fm_mat_q, fm_mat_s, homo, virtual, mp2_env, eigenval, unit_nr, do_ri_sos_laplace_mp2)
Creates the arrays of a rpa_grad_type.
Definition rpa_grad.F:168
subroutine, public rpa_grad_matrix_operations(mp2_env, rpa_grad, do_ri_sos_laplace_mp2, fm_mat_q, fm_mat_q_gemm, dgemm_counter, fm_mat_s, omega, homo, virtual, eigenval, weight, unit_nr)
...
Definition rpa_grad.F:738
Utility functions for RPA calculations.
Definition rpa_util.F:13
subroutine, public remove_scaling_factor_rpa(fm_mat_s, virtual, eigenval_last, homo, omega_old)
...
Definition rpa_util.F:711
subroutine, public calc_fm_mat_s_rpa(fm_mat_s, first_cycle, virtual, eigenval, homo, omega, omega_old)
...
Definition rpa_util.F:761
All kind of helpful little routines.
Definition util.F:14
pure integer function, dimension(2), public get_limit(m, n, me)
divide m entries into n parts, return size of part me
Definition util.F:333
represent a pointer to a contiguous 1d array
represent a pointer to a contiguous 3d array
represent a blacs multidimensional parallel environment (for the mpi corrispective see cp_paratypes/m...
keeps the information about the structure of a full matrix
represent a full matrix
stores all the informations relevant to an mpi environment