(git:374b731)
Loading...
Searching...
No Matches
rpa_grad.F
Go to the documentation of this file.
1!--------------------------------------------------------------------------------------------------!
2! CP2K: A general program to perform molecular dynamics simulations !
3! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4! !
5! SPDX-License-Identifier: GPL-2.0-or-later !
6!--------------------------------------------------------------------------------------------------!
7
8! **************************************************************************************************
9!> \brief Routines to calculate RI-RPA and SOS-MP2 gradients
10!> \par History
11!> 10.2021 created [Frederick Stein]
12! **************************************************************************************************
14 USE iso_c_binding, ONLY: c_null_ptr,&
15 c_ptr,&
16 c_associated
28 USE cp_fm_types, ONLY: &
38 maxsize,&
42 USE kinds, ONLY: dp,&
43 int_8
50 USE machine, ONLY: m_flush,&
52 USE mathconstants, ONLY: pi
53 USE message_passing, ONLY: mp_comm_type,&
60 USE mp2_ri_grad_util, ONLY: array2fm,&
62 fm2array,&
64 USE mp2_types, ONLY: mp2_type,&
71 USE rpa_util, ONLY: calc_fm_mat_s_rpa,&
73 USE util, ONLY: get_limit
74#include "./base/base_uses.f90"
75
76 IMPLICIT NONE
77
78 PRIVATE
79
80 CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'rpa_grad'
81
83
84 TYPE sos_mp2_grad_work_type
85 PRIVATE
86 INTEGER, DIMENSION(:, :), ALLOCATABLE :: pair_list
87 TYPE(one_dim_int_array), DIMENSION(:), ALLOCATABLE :: index2send, index2recv
88 REAL(KIND=dp), DIMENSION(:), ALLOCATABLE :: p
89 END TYPE
90
91 TYPE rpa_grad_work_type
92 TYPE(cp_fm_type) :: fm_mat_Q_copy = cp_fm_type()
93 TYPE(one_dim_int_array), DIMENSION(:, :), ALLOCATABLE :: index2send
94 TYPE(two_dim_int_array), DIMENSION(:, :), ALLOCATABLE :: index2recv
95 TYPE(group_dist_d1_type), DIMENSION(:), ALLOCATABLE :: gd_homo, gd_virtual
96 INTEGER, DIMENSION(2) :: grid = -1, mepos = -1
97 TYPE(two_dim_real_array), DIMENSION(:), ALLOCATABLE :: P_ij, P_ab
98 END TYPE
99
101 PRIVATE
102 TYPE(cp_fm_type) :: fm_Gamma_PQ = cp_fm_type()
103 TYPE(cp_fm_type), DIMENSION(:), ALLOCATABLE :: fm_y
104 TYPE(sos_mp2_grad_work_type), ALLOCATABLE, DIMENSION(:) :: sos_mp2_work_occ, sos_mp2_work_virt
105 TYPE(rpa_grad_work_type) :: rpa_work
106 END TYPE
107
108 INTEGER, PARAMETER :: spla_threshold = 128*128*128*2
109 INTEGER, PARAMETER :: blksize_threshold = 4
110
111CONTAINS
112
113! **************************************************************************************************
114!> \brief Calculates the necessary minimum memory for the Gradient code ion MiB
115!> \param homo ...
116!> \param virtual ...
117!> \param dimen_RI ...
118!> \param mem_per_rank ...
119!> \param mem_per_repl ...
120!> \param do_ri_sos_laplace_mp2 ...
121!> \return ...
122! **************************************************************************************************
123 PURE SUBROUTINE rpa_grad_needed_mem(homo, virtual, dimen_RI, mem_per_rank, mem_per_repl, do_ri_sos_laplace_mp2)
124 INTEGER, DIMENSION(:), INTENT(IN) :: homo, virtual
125 INTEGER, INTENT(IN) :: dimen_ri
126 REAL(kind=dp), INTENT(INOUT) :: mem_per_rank, mem_per_repl
127 LOGICAL, INTENT(IN) :: do_ri_sos_laplace_mp2
128
129 REAL(kind=dp) :: mem_iak, mem_kl, mem_pab, mem_pij
130
131 mem_iak = sum(real(virtual, kind=dp)*homo)*dimen_ri
132 mem_pij = sum(real(homo, kind=dp)**2)
133 mem_pab = sum(real(virtual, kind=dp)**2)
134 mem_kl = real(dimen_ri, kind=dp)*dimen_ri
135
136 ! Required matrices iaK
137 ! Ytot_iaP = sum_tau Y_iaP(tau)
138 ! Y_iaP(tau) = S_iaP(tau)*Q_PQ(tau) (work array)
139 ! Required matrices density matrices
140 ! Pij (local)
141 ! Pab (local)
142 ! Additionally with SOS-MP2
143 ! Send and receive buffers for degenerate orbital pairs (rough estimate: everything)
144 ! Additionally with RPA
145 ! copy of work matrix
146 ! receive buffer for calculation of density matrix
147 ! copy of matrix Q
148 mem_per_rank = mem_per_rank + (mem_pij + mem_pab)*8.0_dp/(1024**2)
149 mem_per_repl = mem_per_repl + (mem_iak + 2.0_dp*mem_iak/SIZE(homo) + mem_kl)*8.0_dp/(1024**2)
150 IF (.NOT. do_ri_sos_laplace_mp2) THEN
151 mem_per_repl = mem_per_rank + (mem_iak/SIZE(homo) + mem_kl)*8.0_dp/(1024**2)
152 END IF
153
154 END SUBROUTINE rpa_grad_needed_mem
155
156! **************************************************************************************************
157!> \brief Creates the arrays of a rpa_grad_type
158!> \param rpa_grad ...
159!> \param fm_mat_Q ...
160!> \param fm_mat_S ...
161!> \param homo ...
162!> \param virtual ...
163!> \param mp2_env ...
164!> \param Eigenval ...
165!> \param unit_nr ...
166!> \param do_ri_sos_laplace_mp2 ...
167! **************************************************************************************************
168 SUBROUTINE rpa_grad_create(rpa_grad, fm_mat_Q, fm_mat_S, &
169 homo, virtual, mp2_env, Eigenval, unit_nr, do_ri_sos_laplace_mp2)
170 TYPE(rpa_grad_type), INTENT(OUT) :: rpa_grad
171 TYPE(cp_fm_type), INTENT(IN) :: fm_mat_q
172 TYPE(cp_fm_type), DIMENSION(:), INTENT(IN) :: fm_mat_s
173 INTEGER, DIMENSION(:), INTENT(IN) :: homo, virtual
174 TYPE(mp2_type), INTENT(INOUT) :: mp2_env
175 REAL(kind=dp), DIMENSION(:, :), INTENT(IN) :: eigenval
176 INTEGER, INTENT(IN) :: unit_nr
177 LOGICAL, INTENT(IN) :: do_ri_sos_laplace_mp2
178
179 CHARACTER(LEN=*), PARAMETER :: routinen = 'rpa_grad_create'
180
181 INTEGER :: handle, ispin, nrow_local, nspins
182
183 CALL timeset(routinen, handle)
184
185 CALL cp_fm_create(rpa_grad%fm_Gamma_PQ, matrix_struct=fm_mat_q%matrix_struct)
186 CALL cp_fm_set_all(rpa_grad%fm_Gamma_PQ, 0.0_dp)
187
188 nspins = SIZE(fm_mat_s)
189
190 ALLOCATE (rpa_grad%fm_Y(nspins))
191 DO ispin = 1, nspins
192 CALL cp_fm_create(rpa_grad%fm_Y(ispin), fm_mat_s(ispin)%matrix_struct)
193 END DO
194
195 IF (do_ri_sos_laplace_mp2) THEN
196 CALL sos_mp2_work_type_create(rpa_grad%sos_mp2_work_occ, rpa_grad%sos_mp2_work_virt, &
197 unit_nr, eigenval, homo, virtual, mp2_env%ri_grad%eps_canonical, fm_mat_s)
198 ELSE
199 CALL rpa_work_type_create(rpa_grad%rpa_work, fm_mat_q, fm_mat_s, homo, virtual)
200 END IF
201
202 ! Set blocksize
203 CALL cp_fm_struct_get(fm_mat_s(1)%matrix_struct, nrow_local=nrow_local)
204 IF (mp2_env%ri_grad%dot_blksize < 1) mp2_env%ri_grad%dot_blksize = nrow_local
205 mp2_env%ri_grad%dot_blksize = min(mp2_env%ri_grad%dot_blksize, nrow_local)
206 IF (unit_nr > 0) THEN
207 WRITE (unit_nr, '(T3,A,T75,I6)') 'GRAD_INFO| Block size for the contraction:', mp2_env%ri_grad%dot_blksize
208 CALL m_flush(unit_nr)
209 END IF
210 CALL fm_mat_s(1)%matrix_struct%para_env%sync()
211
212 CALL timestop(handle)
213
214 END SUBROUTINE rpa_grad_create
215
216! **************************************************************************************************
217!> \brief ...
218!> \param sos_mp2_work_occ ...
219!> \param sos_mp2_work_virt ...
220!> \param unit_nr ...
221!> \param Eigenval ...
222!> \param homo ...
223!> \param virtual ...
224!> \param eps_degenerate ...
225!> \param fm_mat_S ...
226! **************************************************************************************************
227 SUBROUTINE sos_mp2_work_type_create(sos_mp2_work_occ, sos_mp2_work_virt, unit_nr, &
228 Eigenval, homo, virtual, eps_degenerate, fm_mat_S)
229 TYPE(sos_mp2_grad_work_type), ALLOCATABLE, &
230 DIMENSION(:), INTENT(OUT) :: sos_mp2_work_occ, sos_mp2_work_virt
231 INTEGER, INTENT(IN) :: unit_nr
232 REAL(kind=dp), DIMENSION(:, :), INTENT(IN) :: eigenval
233 INTEGER, DIMENSION(:), INTENT(IN) :: homo, virtual
234 REAL(kind=dp), INTENT(IN) :: eps_degenerate
235 TYPE(cp_fm_type), DIMENSION(:), INTENT(IN) :: fm_mat_s
236
237 CHARACTER(LEN=*), PARAMETER :: routinen = 'sos_mp2_work_type_create'
238
239 INTEGER :: handle, ispin, nspins
240
241 CALL timeset(routinen, handle)
242
243 nspins = SIZE(fm_mat_s)
244 ALLOCATE (sos_mp2_work_occ(nspins), sos_mp2_work_virt(nspins))
245 DO ispin = 1, nspins
246
247 CALL create_list_nearly_degen_pairs(eigenval(1:homo(ispin), ispin), &
248 eps_degenerate, sos_mp2_work_occ(ispin)%pair_list)
249 IF (unit_nr > 0) WRITE (unit_nr, "(T3,A,T75,i6)") &
250 "MO_INFO| Number of ij pairs below EPS_CANONICAL:", SIZE(sos_mp2_work_occ(ispin)%pair_list, 2)
251 ALLOCATE (sos_mp2_work_occ(ispin)%P(homo(ispin) + SIZE(sos_mp2_work_occ(ispin)%pair_list, 2)))
252 sos_mp2_work_occ(ispin)%P = 0.0_dp
253 CALL prepare_comm_pij(sos_mp2_work_occ(ispin), virtual(ispin), fm_mat_s(ispin))
254
255 CALL create_list_nearly_degen_pairs(eigenval(homo(ispin) + 1:, ispin), &
256 eps_degenerate, sos_mp2_work_virt(ispin)%pair_list)
257 IF (unit_nr > 0) WRITE (unit_nr, "(T3,A,T75,i6)") &
258 "MO_INFO| Number of ab pairs below EPS_CANONICAL:", SIZE(sos_mp2_work_virt(ispin)%pair_list, 2)
259 ALLOCATE (sos_mp2_work_virt(ispin)%P(virtual(ispin) + SIZE(sos_mp2_work_virt(ispin)%pair_list, 2)))
260 sos_mp2_work_virt(ispin)%P = 0.0_dp
261 CALL prepare_comm_pab(sos_mp2_work_virt(ispin), virtual(ispin), fm_mat_s(ispin))
262 END DO
263
264 CALL timestop(handle)
265
266 END SUBROUTINE
267
268! **************************************************************************************************
269!> \brief ...
270!> \param rpa_work ...
271!> \param fm_mat_Q ...
272!> \param fm_mat_S ...
273!> \param homo ...
274!> \param virtual ...
275! **************************************************************************************************
276 SUBROUTINE rpa_work_type_create(rpa_work, fm_mat_Q, fm_mat_S, homo, virtual)
277 TYPE(rpa_grad_work_type), INTENT(OUT) :: rpa_work
278 TYPE(cp_fm_type), INTENT(IN) :: fm_mat_q
279 TYPE(cp_fm_type), DIMENSION(:), INTENT(IN) :: fm_mat_s
280 INTEGER, DIMENSION(:), INTENT(IN) :: homo, virtual
281
282 CHARACTER(LEN=*), PARAMETER :: routinen = 'rpa_work_type_create'
283
284 INTEGER :: avirt, col_global, col_local, first_p_pos(2), first_p_pos_col, handle, iocc, &
285 ispin, my_a, my_a_end, my_a_size, my_a_start, my_i, my_i_end, my_i_size, my_i_start, &
286 my_pcol, ncol_block, ncol_local, nspins, num_pe_col, proc_homo, proc_homo_send, &
287 proc_recv, proc_send, proc_virtual, proc_virtual_send
288 INTEGER, ALLOCATABLE, DIMENSION(:) :: data2recv, data2send
289 INTEGER, DIMENSION(:), POINTER :: col_indices
290
291 CALL timeset(routinen, handle)
292
293 CALL cp_fm_create(rpa_work%fm_mat_Q_copy, matrix_struct=fm_mat_q%matrix_struct)
294
295 CALL fm_mat_s(1)%matrix_struct%context%get(number_of_process_columns=num_pe_col, my_process_column=my_pcol)
296
297 nspins = SIZE(fm_mat_s)
298
299 ALLOCATE (rpa_work%index2send(0:num_pe_col - 1, nspins), &
300 rpa_work%index2recv(0:num_pe_col - 1, nspins), &
301 rpa_work%gd_homo(nspins), rpa_work%gd_virtual(nspins), &
302 data2send(0:num_pe_col - 1), data2recv(0:num_pe_col - 1), &
303 rpa_work%P_ij(nspins), rpa_work%P_ab(nspins))
304
305 ! Determine new process grid
306 proc_homo = max(1, ceiling(sqrt(real(num_pe_col, kind=dp))))
307 DO WHILE (mod(num_pe_col, proc_homo) /= 0)
308 proc_homo = proc_homo - 1
309 END DO
310 proc_virtual = num_pe_col/proc_homo
311
312 rpa_work%grid(1) = proc_virtual
313 rpa_work%grid(2) = proc_homo
314
315 rpa_work%mepos(1) = mod(my_pcol, proc_virtual)
316 rpa_work%mepos(2) = my_pcol/proc_virtual
317
318 DO ispin = 1, nspins
319
320 ! Determine distributions of the orbitals
321 CALL create_group_dist(rpa_work%gd_homo(ispin), proc_homo, homo(ispin))
322 CALL create_group_dist(rpa_work%gd_virtual(ispin), proc_virtual, virtual(ispin))
323
324 CALL cp_fm_struct_get(fm_mat_s(ispin)%matrix_struct, ncol_local=ncol_local, col_indices=col_indices, &
325 first_p_pos=first_p_pos, ncol_block=ncol_block)
326 first_p_pos_col = first_p_pos(2)
327
328 data2send = 0
329 ! Count the amount of data2send to each process
330 DO col_local = 1, ncol_local
331 col_global = col_indices(col_local)
332
333 iocc = (col_global - 1)/virtual(ispin) + 1
334 avirt = col_global - (iocc - 1)*virtual(ispin)
335
336 proc_homo_send = group_dist_proc(rpa_work%gd_homo(ispin), iocc)
337 proc_virtual_send = group_dist_proc(rpa_work%gd_virtual(ispin), avirt)
338
339 proc_send = proc_homo_send*proc_virtual + proc_virtual_send
340
341 data2send(proc_send) = data2send(proc_send) + 1
342 END DO
343
344 DO proc_send = 0, num_pe_col - 1
345 ALLOCATE (rpa_work%index2send(proc_send, ispin)%array(data2send(proc_send)))
346 END DO
347
348 ! Prepare the indices
349 data2send = 0
350 DO col_local = 1, ncol_local
351 col_global = col_indices(col_local)
352
353 iocc = (col_global - 1)/virtual(ispin) + 1
354 avirt = col_global - (iocc - 1)*virtual(ispin)
355
356 proc_homo_send = group_dist_proc(rpa_work%gd_homo(ispin), iocc)
357 proc_virtual_send = group_dist_proc(rpa_work%gd_virtual(ispin), avirt)
358
359 proc_send = proc_homo_send*proc_virtual + proc_virtual_send
360
361 data2send(proc_send) = data2send(proc_send) + 1
362
363 rpa_work%index2send(proc_send, ispin)%array(data2send(proc_send)) = col_local
364 END DO
365
366 ! Count the amount of data2recv from each process
367 CALL get_group_dist(rpa_work%gd_homo(ispin), my_pcol/proc_virtual, my_i_start, my_i_end, my_i_size)
368 CALL get_group_dist(rpa_work%gd_virtual(ispin), mod(my_pcol, proc_virtual), my_a_start, my_a_end, my_a_size)
369
370 data2recv = 0
371 DO my_i = my_i_start, my_i_end
372 DO my_a = my_a_start, my_a_end
373 proc_recv = cp_fm_indxg2p((my_i - 1)*virtual(ispin) + my_a, ncol_block, 0, first_p_pos_col, num_pe_col)
374 data2recv(proc_recv) = data2recv(proc_recv) + 1
375 END DO
376 END DO
377
378 DO proc_recv = 0, num_pe_col - 1
379 ALLOCATE (rpa_work%index2recv(proc_recv, ispin)%array(2, data2recv(proc_recv)))
380 END DO
381
382 data2recv = 0
383 DO my_i = my_i_start, my_i_end
384 DO my_a = my_a_start, my_a_end
385 proc_recv = cp_fm_indxg2p((my_i - 1)*virtual(ispin) + my_a, ncol_block, 0, first_p_pos_col, num_pe_col)
386 data2recv(proc_recv) = data2recv(proc_recv) + 1
387
388 rpa_work%index2recv(proc_recv, ispin)%array(2, data2recv(proc_recv)) = my_i - my_i_start + 1
389 rpa_work%index2recv(proc_recv, ispin)%array(1, data2recv(proc_recv)) = my_a - my_a_start + 1
390 END DO
391 END DO
392
393 ALLOCATE (rpa_work%P_ij(ispin)%array(my_i_size, homo(ispin)), &
394 rpa_work%P_ab(ispin)%array(my_a_size, virtual(ispin)))
395 rpa_work%P_ij(ispin)%array(:, :) = 0.0_dp
396 rpa_work%P_ab(ispin)%array(:, :) = 0.0_dp
397
398 END DO
399
400 DEALLOCATE (data2send, data2recv)
401
402 CALL timestop(handle)
403
404 END SUBROUTINE
405
406! **************************************************************************************************
407!> \brief ...
408!> \param Eigenval ...
409!> \param eps_degen ...
410!> \param pair_list ...
411! **************************************************************************************************
412 SUBROUTINE create_list_nearly_degen_pairs(Eigenval, eps_degen, pair_list)
413 REAL(kind=dp), DIMENSION(:), INTENT(IN) :: eigenval
414 REAL(kind=dp), INTENT(IN) :: eps_degen
415 INTEGER, ALLOCATABLE, DIMENSION(:, :), INTENT(OUT) :: pair_list
416
417 INTEGER :: my_i, my_j, num_orbitals, num_pairs, &
418 pair_counter
419
420 num_orbitals = SIZE(eigenval)
421
422! Determine number of nearly degenerate orbital pairs
423! Trivial cases: diagonal elements
424 num_pairs = 0
425 DO my_i = 1, num_orbitals
426 DO my_j = 1, num_orbitals
427 IF (my_i == my_j) cycle
428 IF (abs(eigenval(my_i) - eigenval(my_j)) < eps_degen) num_pairs = num_pairs + 1
429 END DO
430 END DO
431 ALLOCATE (pair_list(2, num_pairs))
432
433! Print the required pairs
434 pair_counter = 1
435 DO my_i = 1, num_orbitals
436 DO my_j = 1, num_orbitals
437 IF (my_i == my_j) cycle
438 IF (abs(eigenval(my_i) - eigenval(my_j)) < eps_degen) THEN
439 pair_list(1, pair_counter) = my_i
440 pair_list(2, pair_counter) = my_j
441 pair_counter = pair_counter + 1
442 END IF
443 END DO
444 END DO
445
446 END SUBROUTINE create_list_nearly_degen_pairs
447
448! **************************************************************************************************
449!> \brief ...
450!> \param sos_mp2_work ...
451!> \param virtual ...
452!> \param fm_mat_S ...
453! **************************************************************************************************
454 SUBROUTINE prepare_comm_pij(sos_mp2_work, virtual, fm_mat_S)
455 TYPE(sos_mp2_grad_work_type), INTENT(INOUT) :: sos_mp2_work
456 INTEGER, INTENT(IN) :: virtual
457 TYPE(cp_fm_type), INTENT(IN) :: fm_mat_s
458
459 CHARACTER(LEN=*), PARAMETER :: routinen = 'prepare_comm_Pij'
460
461 INTEGER :: avirt, col_global, col_local, counter, first_p_pos(2), first_p_pos_col, handle, &
462 ij_counter, iocc, my_i, my_j, my_pcol, my_prow, ncol_block, ncol_local, nrow_local, &
463 num_ij_pairs, num_pe_col, pcol, pcol_recv, pcol_send, proc_shift, tag
464 INTEGER, ALLOCATABLE, DIMENSION(:) :: data2recv, data2send
465 INTEGER, DIMENSION(:), POINTER :: col_indices, ncol_locals
466 INTEGER, DIMENSION(:, :), POINTER :: blacs2mpi
467 TYPE(cp_blacs_env_type), POINTER :: context
468 TYPE(mp_comm_type) :: comm_exchange
469 TYPE(mp_para_env_type), POINTER :: para_env
470
471 CALL timeset(routinen, handle)
472
473 tag = 44
474
475 CALL fm_mat_s%matrix_struct%context%get(number_of_process_columns=num_pe_col)
476 ALLOCATE (sos_mp2_work%index2send(0:num_pe_col - 1), &
477 sos_mp2_work%index2recv(0:num_pe_col - 1))
478
479 ALLOCATE (data2send(0:num_pe_col - 1))
480 ALLOCATE (data2recv(0:num_pe_col - 1))
481
482 CALL cp_fm_struct_get(fm_mat_s%matrix_struct, para_env=para_env, ncol_locals=ncol_locals, &
483 ncol_local=ncol_local, col_indices=col_indices, &
484 context=context, ncol_block=ncol_block, first_p_pos=first_p_pos, nrow_local=nrow_local)
485 CALL context%get(my_process_row=my_prow, my_process_column=my_pcol, &
486 blacs2mpi=blacs2mpi)
487 first_p_pos_col = first_p_pos(2)
488
489 num_ij_pairs = SIZE(sos_mp2_work%pair_list, 2)
490
491 IF (num_ij_pairs > 0) THEN
492
493 CALL comm_exchange%from_split(para_env, my_prow)
494
495 data2send = 0
496 data2recv = 0
497
498 DO proc_shift = 0, num_pe_col - 1
499 pcol_send = modulo(my_pcol + proc_shift, num_pe_col)
500
501 counter = 0
502 DO col_local = 1, ncol_local
503 col_global = col_indices(col_local)
504
505 iocc = max(1, col_global - 1)/virtual + 1
506 avirt = col_global - (iocc - 1)*virtual
507
508 DO ij_counter = 1, num_ij_pairs
509
510 my_i = sos_mp2_work%pair_list(1, ij_counter)
511 my_j = sos_mp2_work%pair_list(2, ij_counter)
512
513 IF (iocc /= my_j) cycle
514 pcol = cp_fm_indxg2p((my_i - 1)*virtual + avirt, ncol_block, 0, first_p_pos_col, num_pe_col)
515 IF (pcol /= pcol_send) cycle
516
517 counter = counter + 1
518
519 EXIT
520
521 END DO
522 END DO
523 data2send(pcol_send) = counter
524 END DO
525
526 CALL comm_exchange%alltoall(data2send, data2recv, 1)
527
528 DO proc_shift = 0, num_pe_col - 1
529 pcol_send = modulo(my_pcol + proc_shift, num_pe_col)
530 pcol_recv = modulo(my_pcol - proc_shift, num_pe_col)
531
532 ! Collect indices and exchange
533 ALLOCATE (sos_mp2_work%index2send(pcol_send)%array(data2send(pcol_send)))
534
535 counter = 0
536 DO col_local = 1, ncol_local
537 col_global = col_indices(col_local)
538
539 iocc = max(1, col_global - 1)/virtual + 1
540 avirt = col_global - (iocc - 1)*virtual
541
542 DO ij_counter = 1, num_ij_pairs
543
544 my_i = sos_mp2_work%pair_list(1, ij_counter)
545 my_j = sos_mp2_work%pair_list(2, ij_counter)
546
547 IF (iocc /= my_j) cycle
548 pcol = cp_fm_indxg2p((my_i - 1)*virtual + avirt, ncol_block, 0, first_p_pos_col, num_pe_col)
549 IF (pcol /= pcol_send) cycle
550
551 counter = counter + 1
552
553 sos_mp2_work%index2send(pcol_send)%array(counter) = col_global
554
555 EXIT
556
557 END DO
558 END DO
559
560 ALLOCATE (sos_mp2_work%index2recv(pcol_recv)%array(data2recv(pcol_recv)))
561 !
562 CALL para_env%sendrecv(sos_mp2_work%index2send(pcol_send)%array, blacs2mpi(my_prow, pcol_send), &
563 sos_mp2_work%index2recv(pcol_recv)%array, blacs2mpi(my_prow, pcol_recv), tag)
564
565 ! Convert to global coordinates to local coordinates as we always work with them
566 DO counter = 1, data2send(pcol_send)
567 sos_mp2_work%index2send(pcol_send)%array(counter) = &
568 cp_fm_indxg2l(sos_mp2_work%index2send(pcol_send)%array(counter), &
569 ncol_block, 0, first_p_pos_col, num_pe_col)
570 END DO
571 END DO
572
573 CALL comm_exchange%free()
574 END IF
575
576 DEALLOCATE (data2send, data2recv)
577
578 CALL timestop(handle)
579
580 END SUBROUTINE
581
582! **************************************************************************************************
583!> \brief ...
584!> \param sos_mp2_work ...
585!> \param virtual ...
586!> \param fm_mat_S ...
587! **************************************************************************************************
588 SUBROUTINE prepare_comm_pab(sos_mp2_work, virtual, fm_mat_S)
589 TYPE(sos_mp2_grad_work_type), INTENT(INOUT) :: sos_mp2_work
590 INTEGER, INTENT(IN) :: virtual
591 TYPE(cp_fm_type), INTENT(IN) :: fm_mat_s
592
593 CHARACTER(LEN=*), PARAMETER :: routinen = 'prepare_comm_Pab'
594
595 INTEGER :: ab_counter, avirt, col_global, col_local, counter, first_p_pos(2), &
596 first_p_pos_col, handle, iocc, my_a, my_b, my_pcol, my_prow, ncol_block, ncol_local, &
597 nrow_local, num_ab_pairs, num_pe_col, pcol, pcol_recv, pcol_send, proc_shift, tag
598 INTEGER, ALLOCATABLE, DIMENSION(:) :: data2recv, data2send
599 INTEGER, DIMENSION(:), POINTER :: col_indices, ncol_locals
600 INTEGER, DIMENSION(:, :), POINTER :: blacs2mpi
601 TYPE(cp_blacs_env_type), POINTER :: context
602 TYPE(mp_comm_type) :: comm_exchange
603 TYPE(mp_para_env_type), POINTER :: para_env
604
605 CALL timeset(routinen, handle)
606
607 tag = 44
608
609 CALL fm_mat_s%matrix_struct%context%get(number_of_process_columns=num_pe_col)
610 ALLOCATE (sos_mp2_work%index2send(0:num_pe_col - 1), &
611 sos_mp2_work%index2recv(0:num_pe_col - 1))
612
613 num_ab_pairs = SIZE(sos_mp2_work%pair_list, 2)
614 IF (num_ab_pairs > 0) THEN
615
616 CALL cp_fm_struct_get(fm_mat_s%matrix_struct, para_env=para_env, ncol_locals=ncol_locals, &
617 ncol_local=ncol_local, col_indices=col_indices, &
618 context=context, ncol_block=ncol_block, first_p_pos=first_p_pos, nrow_local=nrow_local)
619 CALL context%get(my_process_row=my_prow, my_process_column=my_pcol, &
620 blacs2mpi=blacs2mpi)
621 first_p_pos_col = first_p_pos(2)
622
623 CALL comm_exchange%from_split(para_env, my_prow)
624
625 ALLOCATE (data2send(0:num_pe_col - 1))
626 ALLOCATE (data2recv(0:num_pe_col - 1))
627
628 data2send = 0
629 data2recv = 0
630 DO proc_shift = 0, num_pe_col - 1
631 pcol_send = modulo(my_pcol + proc_shift, num_pe_col)
632 pcol_recv = modulo(my_pcol - proc_shift, num_pe_col)
633
634 counter = 0
635 DO col_local = 1, ncol_local
636 col_global = col_indices(col_local)
637
638 iocc = max(1, col_global - 1)/virtual + 1
639 avirt = col_global - (iocc - 1)*virtual
640
641 DO ab_counter = 1, num_ab_pairs
642
643 my_a = sos_mp2_work%pair_list(1, ab_counter)
644 my_b = sos_mp2_work%pair_list(2, ab_counter)
645
646 IF (avirt /= my_b) cycle
647 pcol = cp_fm_indxg2p((iocc - 1)*virtual + my_a, ncol_block, 0, first_p_pos_col, num_pe_col)
648 IF (pcol /= pcol_send) cycle
649
650 counter = counter + 1
651
652 EXIT
653
654 END DO
655 END DO
656 data2send(pcol_send) = counter
657 END DO
658
659 CALL comm_exchange%alltoall(data2send, data2recv, 1)
660
661 DO proc_shift = 0, num_pe_col - 1
662 pcol_send = modulo(my_pcol + proc_shift, num_pe_col)
663 pcol_recv = modulo(my_pcol - proc_shift, num_pe_col)
664
665 ! Collect indices and exchange
666 ALLOCATE (sos_mp2_work%index2send(pcol_send)%array(data2send(pcol_send)))
667
668 counter = 0
669 DO col_local = 1, ncol_local
670 col_global = col_indices(col_local)
671
672 iocc = max(1, col_global - 1)/virtual + 1
673 avirt = col_global - (iocc - 1)*virtual
674
675 DO ab_counter = 1, num_ab_pairs
676
677 my_a = sos_mp2_work%pair_list(1, ab_counter)
678 my_b = sos_mp2_work%pair_list(2, ab_counter)
679
680 IF (avirt /= my_b) cycle
681 pcol = cp_fm_indxg2p((iocc - 1)*virtual + my_a, ncol_block, 0, first_p_pos_col, num_pe_col)
682 IF (pcol /= pcol_send) cycle
683
684 counter = counter + 1
685
686 sos_mp2_work%index2send(pcol_send)%array(counter) = col_global
687
688 EXIT
689
690 END DO
691 END DO
692
693 ALLOCATE (sos_mp2_work%index2recv(pcol_recv)%array(data2recv(pcol_recv)))
694 !
695 CALL para_env%sendrecv(sos_mp2_work%index2send(pcol_send)%array, blacs2mpi(my_prow, pcol_send), &
696 sos_mp2_work%index2recv(pcol_recv)%array, blacs2mpi(my_prow, pcol_recv), tag)
697
698 ! Convert to global coordinates to local coordinates as we always work with them
699 DO counter = 1, data2send(pcol_send)
700 sos_mp2_work%index2send(pcol_send)%array(counter) = &
701 cp_fm_indxg2l(sos_mp2_work%index2send(pcol_send)%array(counter), &
702 ncol_block, 0, first_p_pos_col, num_pe_col)
703 END DO
704 END DO
705
706 CALL comm_exchange%free()
707 DEALLOCATE (data2send, data2recv)
708
709 END IF
710
711 CALL timestop(handle)
712
713 END SUBROUTINE
714
715! **************************************************************************************************
716!> \brief ...
717!> \param fm_mat_Q ...
718!> \param rpa_grad ...
719! **************************************************************************************************
720 SUBROUTINE rpa_grad_copy_q(fm_mat_Q, rpa_grad)
721 TYPE(cp_fm_type), INTENT(IN) :: fm_mat_q
722 TYPE(rpa_grad_type), INTENT(INOUT) :: rpa_grad
723
724 CALL cp_fm_to_fm(fm_mat_q, rpa_grad%rpa_work%fm_mat_Q_copy)
725
726 END SUBROUTINE
727
728! **************************************************************************************************
729!> \brief ...
730!> \param mp2_env ...
731!> \param rpa_grad ...
732!> \param do_ri_sos_laplace_mp2 ...
733!> \param fm_mat_Q ...
734!> \param fm_mat_Q_gemm ...
735!> \param dgemm_counter ...
736!> \param fm_mat_S ...
737!> \param omega ...
738!> \param homo ...
739!> \param virtual ...
740!> \param Eigenval ...
741!> \param weight ...
742!> \param unit_nr ...
743! **************************************************************************************************
744 SUBROUTINE rpa_grad_matrix_operations(mp2_env, rpa_grad, do_ri_sos_laplace_mp2, fm_mat_Q, fm_mat_Q_gemm, &
745 dgemm_counter, fm_mat_S, omega, homo, virtual, Eigenval, weight, unit_nr)
746 TYPE(mp2_type), INTENT(INOUT) :: mp2_env
747 TYPE(rpa_grad_type), INTENT(INOUT) :: rpa_grad
748 LOGICAL, INTENT(IN) :: do_ri_sos_laplace_mp2
749 TYPE(cp_fm_type), DIMENSION(:), INTENT(IN) :: fm_mat_q, fm_mat_q_gemm
750 TYPE(dgemm_counter_type), INTENT(INOUT) :: dgemm_counter
751 TYPE(cp_fm_type), DIMENSION(:), INTENT(IN) :: fm_mat_s
752 REAL(kind=dp), INTENT(IN) :: omega
753 INTEGER, DIMENSION(:), INTENT(IN) :: homo, virtual
754 REAL(kind=dp), DIMENSION(:, :), INTENT(IN) :: eigenval
755 REAL(kind=dp), INTENT(IN) :: weight
756 INTEGER, INTENT(IN) :: unit_nr
757
758 CHARACTER(LEN=*), PARAMETER :: routinen = 'rpa_grad_matrix_operations'
759
760 INTEGER :: col_global, col_local, dimen_ia, &
761 dimen_ri, handle, handle2, ispin, &
762 jspin, ncol_local, nrow_local, nspins, &
763 row_local
764 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
765 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :), &
766 TARGET :: mat_s_3d, mat_work_iap_3d
767 TYPE(cp_fm_type) :: fm_work_iap, fm_work_pq
768
769 CALL timeset(routinen, handle)
770
771 nspins = SIZE(fm_mat_q)
772
773 CALL cp_fm_get_info(fm_mat_q(1), nrow_global=dimen_ri, nrow_local=nrow_local, ncol_local=ncol_local, &
774 col_indices=col_indices, row_indices=row_indices)
775
776 IF (.NOT. do_ri_sos_laplace_mp2) THEN
777 CALL cp_fm_create(fm_work_pq, fm_mat_q(1)%matrix_struct)
778
779 ! calculate [1+Q(iw')]^-1
780 CALL cp_fm_cholesky_invert(fm_mat_q(1))
781 ! symmetrize the result, fm_work_PQ is only a work matrix
782 CALL cp_fm_upper_to_full(fm_mat_q(1), fm_work_pq)
783
784 CALL cp_fm_release(fm_work_pq)
785
786 DO col_local = 1, ncol_local
787 col_global = col_indices(col_local)
788 DO row_local = 1, nrow_local
789 IF (col_global == row_indices(row_local)) THEN
790 fm_mat_q(1)%local_data(row_local, col_local) = fm_mat_q(1)%local_data(row_local, col_local) - 1.0_dp
791 EXIT
792 END IF
793 END DO
794 END DO
795
796 CALL timeset(routinen//"_PQ", handle2)
797 CALL dgemm_counter_start(dgemm_counter)
798 CALL parallel_gemm(transa="N", transb="N", m=dimen_ri, n=dimen_ri, k=dimen_ri, alpha=weight, &
799 matrix_a=rpa_grad%rpa_work%fm_mat_Q_copy, matrix_b=fm_mat_q(1), beta=1.0_dp, &
800 matrix_c=rpa_grad%fm_Gamma_PQ)
801 CALL dgemm_counter_stop(dgemm_counter, dimen_ri, dimen_ri, dimen_ri)
802 CALL timestop(handle2)
803
804 CALL cp_fm_to_fm_submat_general(fm_mat_q(1), fm_mat_q_gemm(1), dimen_ri, dimen_ri, 1, 1, 1, 1, &
805 fm_mat_q_gemm(1)%matrix_struct%context)
806 END IF
807
808 DO ispin = 1, nspins
809 IF (do_ri_sos_laplace_mp2) THEN
810 ! The spin of the other Q matrix is always the other spin
811 jspin = nspins - ispin + 1
812 ELSE
813 ! or the first matrix in the case of RPA
814 jspin = 1
815 END IF
816
817 IF (do_ri_sos_laplace_mp2) THEN
818 CALL timeset(routinen//"_PQ", handle2)
819 CALL dgemm_counter_start(dgemm_counter)
820 CALL parallel_gemm(transa="N", transb="N", m=dimen_ri, n=dimen_ri, k=dimen_ri, alpha=weight, &
821 matrix_a=fm_mat_q(ispin), matrix_b=fm_mat_q(jspin), beta=1.0_dp, &
822 matrix_c=rpa_grad%fm_Gamma_PQ)
823 CALL dgemm_counter_stop(dgemm_counter, dimen_ri, dimen_ri, dimen_ri)
824 CALL timestop(handle2)
825
826 CALL cp_fm_to_fm_submat_general(fm_mat_q(jspin), fm_mat_q_gemm(jspin), dimen_ri, dimen_ri, 1, 1, 1, 1, &
827 fm_mat_q_gemm(jspin)%matrix_struct%context)
828 ELSE
829 CALL calc_fm_mat_s_rpa(fm_mat_s(ispin), .true., virtual(ispin), eigenval(:, ispin), &
830 homo(ispin), omega, 0.0_dp)
831 END IF
832
833 CALL timeset(routinen//"_contr_S", handle2)
834 CALL cp_fm_create(fm_work_iap, rpa_grad%fm_Y(ispin)%matrix_struct)
835
836 CALL cp_fm_get_info(fm_mat_s(ispin), ncol_global=dimen_ia)
837
838 CALL dgemm_counter_start(dgemm_counter)
839 CALL parallel_gemm(transa="N", transb="N", m=dimen_ri, n=dimen_ia, k=dimen_ri, alpha=1.0_dp, &
840 matrix_a=fm_mat_q_gemm(jspin), matrix_b=fm_mat_s(ispin), beta=0.0_dp, &
841 matrix_c=fm_work_iap)
842 CALL dgemm_counter_stop(dgemm_counter, dimen_ia, dimen_ri, dimen_ri)
843 CALL timestop(handle2)
844
845 IF (do_ri_sos_laplace_mp2) THEN
846 CALL calc_p_sos_mp2(homo(ispin), fm_mat_s(ispin), fm_work_iap, &
847 rpa_grad%sos_mp2_work_occ(ispin), rpa_grad%sos_mp2_work_virt(ispin), &
848 omega, weight, virtual(ispin), eigenval(:, ispin), mp2_env%ri_grad%dot_blksize)
849
850 CALL calc_fm_mat_s_laplace(fm_work_iap, homo(ispin), virtual(ispin), eigenval(:, ispin), omega)
851
852 CALL cp_fm_scale_and_add(1.0_dp, rpa_grad%fm_Y(ispin), -weight, fm_work_iap)
853
854 CALL cp_fm_release(fm_work_iap)
855 ELSE
856 ! To save memory, we add it now
857 CALL cp_fm_scale_and_add(1.0_dp, rpa_grad%fm_Y(ispin), -weight, fm_work_iap)
858
859 ! Redistribute both matrices and deallocate fm_work_iaP
860 CALL redistribute_fm_mat_s(rpa_grad%rpa_work%index2send(:, ispin), rpa_grad%rpa_work%index2recv(:, ispin), &
861 fm_work_iap, mat_work_iap_3d, &
862 rpa_grad%rpa_work%gd_homo(ispin), rpa_grad%rpa_work%gd_virtual(ispin), &
863 rpa_grad%rpa_work%mepos)
864 CALL cp_fm_release(fm_work_iap)
865
866 CALL redistribute_fm_mat_s(rpa_grad%rpa_work%index2send(:, ispin), rpa_grad%rpa_work%index2recv(:, ispin), &
867 fm_mat_s(ispin), mat_s_3d, &
868 rpa_grad%rpa_work%gd_homo(ispin), rpa_grad%rpa_work%gd_virtual(ispin), &
869 rpa_grad%rpa_work%mepos)
870
871 ! Now collect the density matrix
872 CALL calc_p_rpa(mat_s_3d, mat_work_iap_3d, rpa_grad%rpa_work%gd_homo(ispin), rpa_grad%rpa_work%gd_virtual(ispin), &
873 rpa_grad%rpa_work%grid, rpa_grad%rpa_work%mepos, &
874 fm_mat_s(ispin)%matrix_struct, &
875 rpa_grad%rpa_work%P_ij(ispin)%array, rpa_grad%rpa_work%P_ab(ispin)%array, &
876 weight, omega, eigenval(:, ispin), homo(ispin), unit_nr, mp2_env)
877
878 DEALLOCATE (mat_work_iap_3d, mat_s_3d)
879
880 CALL remove_scaling_factor_rpa(fm_mat_s(ispin), virtual(ispin), eigenval(:, ispin), homo(ispin), omega)
881
882 END IF
883
884 END DO
885
886 CALL timestop(handle)
887
888 END SUBROUTINE
889
890! **************************************************************************************************
891!> \brief ...
892!> \param homo ...
893!> \param fm_mat_S ...
894!> \param fm_work_iaP ...
895!> \param sos_mp2_work_occ ...
896!> \param sos_mp2_work_virt ...
897!> \param omega ...
898!> \param weight ...
899!> \param virtual ...
900!> \param Eigenval ...
901!> \param dot_blksize ...
902! **************************************************************************************************
903 SUBROUTINE calc_p_sos_mp2(homo, fm_mat_S, fm_work_iaP, sos_mp2_work_occ, sos_mp2_work_virt, &
904 omega, weight, virtual, Eigenval, dot_blksize)
905 INTEGER, INTENT(IN) :: homo
906 TYPE(cp_fm_type), INTENT(IN) :: fm_mat_s, fm_work_iap
907 TYPE(sos_mp2_grad_work_type), INTENT(INOUT) :: sos_mp2_work_occ, sos_mp2_work_virt
908 REAL(kind=dp), INTENT(IN) :: omega, weight
909 INTEGER, INTENT(IN) :: virtual
910 REAL(kind=dp), DIMENSION(:), INTENT(IN) :: eigenval
911 INTEGER, INTENT(IN) :: dot_blksize
912
913 CHARACTER(LEN=*), PARAMETER :: routinen = 'calc_P_sos_mp2'
914
915 INTEGER :: avirt, col_global, col_local, handle, &
916 handle2, iocc, my_a, my_i, ncol_local, &
917 nrow_local, num_ab_pairs, num_ij_pairs
918 INTEGER, DIMENSION(:), POINTER :: col_indices
919 REAL(kind=dp) :: ddot, trace
920
921 CALL timeset(routinen, handle)
922
923 CALL cp_fm_get_info(fm_mat_s, col_indices=col_indices, ncol_local=ncol_local, nrow_local=nrow_local)
924
925 CALL timeset(routinen//"_Pij_diag", handle2)
926 DO my_i = 1, homo
927 ! Collect the contributions of the matrix elements
928
929 trace = 0.0_dp
930
931 DO col_local = 1, ncol_local
932 col_global = col_indices(col_local)
933
934 iocc = max(1, col_global - 1)/virtual + 1
935 avirt = col_global - (iocc - 1)*virtual
936
937 IF (iocc == my_i) trace = trace + &
938 ddot(nrow_local, fm_mat_s%local_data(:, col_local), 1, fm_work_iap%local_data(:, col_local), 1)
939 END DO
940
941 sos_mp2_work_occ%P(my_i) = sos_mp2_work_occ%P(my_i) - trace*omega*weight
942
943 END DO
944 CALL timestop(handle2)
945
946 CALL timeset(routinen//"_Pab_diag", handle2)
947 DO my_a = 1, virtual
948 ! Collect the contributions of the matrix elements
949
950 trace = 0.0_dp
951
952 DO col_local = 1, ncol_local
953 col_global = col_indices(col_local)
954
955 iocc = max(1, col_global - 1)/virtual + 1
956 avirt = col_global - (iocc - 1)*virtual
957
958 IF (avirt == my_a) trace = trace + &
959 ddot(nrow_local, fm_mat_s%local_data(:, col_local), 1, fm_work_iap%local_data(:, col_local), 1)
960 END DO
961
962 sos_mp2_work_virt%P(my_a) = sos_mp2_work_virt%P(my_a) + trace*omega*weight
963
964 END DO
965 CALL timestop(handle2)
966
967 ! Loop over list and carry out operations
968 num_ij_pairs = SIZE(sos_mp2_work_occ%pair_list, 2)
969 num_ab_pairs = SIZE(sos_mp2_work_virt%pair_list, 2)
970 IF (num_ij_pairs > 0) THEN
971 CALL calc_pij_degen(fm_work_iap, fm_mat_s, sos_mp2_work_occ%pair_list, &
972 virtual, sos_mp2_work_occ%P(homo + 1:), eigenval(:homo), omega, weight, &
973 sos_mp2_work_occ%index2send, sos_mp2_work_occ%index2recv, dot_blksize)
974 END IF
975 IF (num_ab_pairs > 0) THEN
976 CALL calc_pab_degen(fm_work_iap, fm_mat_s, sos_mp2_work_virt%pair_list, &
977 virtual, sos_mp2_work_virt%P(virtual + 1:), eigenval(homo + 1:), omega, weight, &
978 sos_mp2_work_virt%index2send, sos_mp2_work_virt%index2recv, dot_blksize)
979 END IF
980
981 CALL timestop(handle)
982
983 END SUBROUTINE
984
985! **************************************************************************************************
986!> \brief ...
987!> \param mat_S_1D ...
988!> \param mat_work_iaP_3D ...
989!> \param gd_homo ...
990!> \param gd_virtual ...
991!> \param grid ...
992!> \param mepos ...
993!> \param fm_struct_S ...
994!> \param P_ij ...
995!> \param P_ab ...
996!> \param weight ...
997!> \param omega ...
998!> \param Eigenval ...
999!> \param homo ...
1000!> \param unit_nr ...
1001!> \param mp2_env ...
1002! **************************************************************************************************
1003 SUBROUTINE calc_p_rpa(mat_S_1D, mat_work_iaP_3D, gd_homo, gd_virtual, grid, mepos, &
1004 fm_struct_S, P_ij, P_ab, weight, omega, Eigenval, homo, unit_nr, mp2_env)
1005 REAL(kind=dp), DIMENSION(*), INTENT(INOUT), TARGET :: mat_s_1d
1006 REAL(kind=dp), DIMENSION(:, :, :), INTENT(INOUT) :: mat_work_iap_3d
1007 TYPE(group_dist_d1_type), INTENT(IN) :: gd_homo, gd_virtual
1008 INTEGER, DIMENSION(2), INTENT(IN) :: grid, mepos
1009 TYPE(cp_fm_struct_type), INTENT(IN), POINTER :: fm_struct_s
1010 REAL(kind=dp), DIMENSION(:, :) :: p_ij, p_ab
1011 REAL(kind=dp), INTENT(IN) :: weight, omega
1012 REAL(kind=dp), DIMENSION(:), INTENT(IN) :: eigenval
1013 INTEGER, INTENT(IN) :: homo, unit_nr
1014 TYPE(mp2_type), INTENT(INOUT) :: mp2_env
1015
1016 CHARACTER(LEN=*), PARAMETER :: routinen = 'calc_P_rpa'
1017
1018 INTEGER :: completed, handle, handle2, my_a_end, my_a_size, my_a_start, my_i_end, my_i_size, &
1019 my_i_start, my_p_size, my_prow, number_of_parallel_channels, proc_a_recv, proc_a_send, &
1020 proc_i_recv, proc_i_send, proc_recv, proc_send, proc_shift, recv_a_end, recv_a_size, &
1021 recv_a_start, recv_i_end, recv_i_size, recv_i_start, tag
1022 INTEGER(KIND=int_8) :: mem, number_of_elements_per_blk
1023 INTEGER, ALLOCATABLE, DIMENSION(:) :: procs_recv
1024 INTEGER, DIMENSION(:, :), POINTER :: blacs2mpi
1025 REAL(kind=dp) :: mem_per_block, mem_real
1026 REAL(kind=dp), ALLOCATABLE, DIMENSION(:), TARGET :: buffer_compens_1d
1027 REAL(kind=dp), DIMENSION(:, :, :), POINTER :: mat_s_3d
1028 TYPE(cp_1d_r_cp_type), ALLOCATABLE, DIMENSION(:) :: buffer_1d
1029 TYPE(cp_3d_r_cp_type), ALLOCATABLE, DIMENSION(:) :: buffer_3d
1030 TYPE(mp_para_env_type), POINTER :: para_env
1031 TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:) :: recv_requests, send_requests
1032
1033 CALL timeset(routinen, handle)
1034
1035 ! We allocate it at every step to reduce potential memory conflicts with COSMA
1036 IF (mp2_env%ri_grad%dot_blksize >= blksize_threshold) THEN
1037 IF (.NOT. c_associated(mp2_env%local_gemm_ctx)) THEN
1038 CALL local_gemm_create(mp2_env%local_gemm_ctx, local_gemm_pu_gpu)
1039 CALL local_gemm_set_op_threshold_gpu(mp2_env%local_gemm_ctx, spla_threshold)
1040 END IF
1041 END IF
1042
1043 tag = 47
1044
1045 my_p_size = SIZE(mat_work_iap_3d, 1)
1046
1047 CALL cp_fm_struct_get(fm_struct_s, para_env=para_env)
1048 CALL fm_struct_s%context%get(my_process_row=my_prow, blacs2mpi=blacs2mpi, para_env=para_env)
1049
1050 CALL get_group_dist(gd_virtual, mepos(1), my_a_start, my_a_end, my_a_size)
1051 CALL get_group_dist(gd_homo, mepos(2), my_i_start, my_i_end, my_i_size)
1052
1053 ! We have to remap the indices because mp_sendrecv requires a 3D array (because of mat_work_iaP_3D)
1054 ! and dgemm requires 2D arrays
1055 ! Fortran 2008 does allow pointer remapping independently of the ranks but GCC 7 does not properly support it
1056 mat_s_3d(1:my_p_size, 1:my_a_size, 1:my_i_size) => mat_s_1d(1:int(my_p_size, int_8)*my_a_size*my_i_size)
1057
1058 number_of_elements_per_blk = max(int(maxsize(gd_homo), kind=int_8)*my_a_size, &
1059 int(maxsize(gd_virtual), kind=int_8)*my_i_size)*my_p_size
1060
1061 ! Determine the available memory and estimate the number of possible parallel communication channels
1062 CALL m_memory(mem)
1063 mem_real = real(mem, kind=dp)
1064 mem_per_block = real(number_of_elements_per_blk, kind=dp)*8.0_dp
1065 number_of_parallel_channels = max(1, min(maxval(grid) - 1, floor(mem_real/mem_per_block)))
1066 CALL para_env%min(number_of_parallel_channels)
1067 IF (mp2_env%ri_grad%max_parallel_comm > 0) &
1068 number_of_parallel_channels = min(number_of_parallel_channels, mp2_env%ri_grad%max_parallel_comm)
1069
1070 IF (unit_nr > 0) THEN
1071 WRITE (unit_nr, '(T3,A,T75,I6)') 'GRAD_INFO| Number of parallel communication channels:', number_of_parallel_channels
1072 CALL m_flush(unit_nr)
1073 END IF
1074 CALL para_env%sync()
1075
1076 ALLOCATE (buffer_1d(number_of_parallel_channels))
1077 DO proc_shift = 1, number_of_parallel_channels
1078 ALLOCATE (buffer_1d(proc_shift)%array(number_of_elements_per_blk))
1079 END DO
1080
1081 ALLOCATE (buffer_3d(number_of_parallel_channels))
1082
1083 ! Allocate buffers for vector version of kahan summation
1084 IF (mp2_env%ri_grad%dot_blksize >= blksize_threshold) THEN
1085 ALLOCATE (buffer_compens_1d(2*max(my_a_size*maxsize(gd_virtual), my_i_size*maxsize(gd_homo))))
1086 END IF
1087
1088 IF (number_of_parallel_channels > 1) THEN
1089 ALLOCATE (procs_recv(number_of_parallel_channels))
1090 ALLOCATE (recv_requests(number_of_parallel_channels))
1091 ALLOCATE (send_requests(maxval(grid) - 1))
1092 END IF
1093
1094 IF (number_of_parallel_channels > 1 .AND. grid(1) > 1) THEN
1095 CALL timeset(routinen//"_comm_a", handle2)
1096 recv_requests(:) = mp_request_null
1097 procs_recv(:) = -1
1098 DO proc_shift = 1, min(grid(1) - 1, number_of_parallel_channels)
1099 proc_a_recv = modulo(mepos(1) - proc_shift, grid(1))
1100 proc_recv = mepos(2)*grid(1) + proc_a_recv
1101
1102 CALL get_group_dist(gd_virtual, proc_a_recv, recv_a_start, recv_a_end, recv_a_size)
1103
1104 buffer_3d(proc_shift)%array(1:my_p_size, 1:recv_a_size, 1:my_i_size) => &
1105 buffer_1d(proc_shift)%array(1:int(my_p_size, kind=int_8)*recv_a_size*my_i_size)
1106
1107 CALL para_env%irecv(buffer_3d(proc_shift)%array, blacs2mpi(my_prow, proc_recv), &
1108 recv_requests(proc_shift), tag)
1109
1110 procs_recv(proc_shift) = proc_a_recv
1111 END DO
1112
1113 send_requests(:) = mp_request_null
1114 DO proc_shift = 1, grid(1) - 1
1115 proc_a_send = modulo(mepos(1) + proc_shift, grid(1))
1116 proc_send = mepos(2)*grid(1) + proc_a_send
1117
1118 CALL para_env%isend(mat_work_iap_3d, blacs2mpi(my_prow, proc_send), &
1119 send_requests(proc_shift), tag)
1120 END DO
1121 CALL timestop(handle2)
1122 END IF
1123
1124 CALL calc_p_rpa_a(p_ab(:, my_a_start:my_a_end), &
1125 mat_s_3d, mat_work_iap_3d, &
1126 mp2_env%ri_grad%dot_blksize, buffer_compens_1d, mp2_env%local_gemm_ctx, &
1127 eigenval(homo + my_a_start:homo + my_a_end), eigenval(my_i_start:my_i_end), &
1128 eigenval(homo + my_a_start:homo + my_a_end), omega, weight)
1129
1130 DO proc_shift = 1, grid(1) - 1
1131 CALL timeset(routinen//"_comm_a", handle2)
1132 IF (number_of_parallel_channels > 1) THEN
1133 CALL mp_waitany(recv_requests, completed)
1134
1135 CALL get_group_dist(gd_virtual, procs_recv(completed), recv_a_start, recv_a_end, recv_a_size)
1136 ELSE
1137 proc_a_send = modulo(mepos(1) + proc_shift, grid(1))
1138 proc_a_recv = modulo(mepos(1) - proc_shift, grid(1))
1139
1140 proc_send = mepos(2)*grid(1) + proc_a_send
1141 proc_recv = mepos(2)*grid(1) + proc_a_recv
1142
1143 CALL get_group_dist(gd_virtual, proc_a_recv, recv_a_start, recv_a_end, recv_a_size)
1144
1145 buffer_3d(1)%array(1:my_p_size, 1:recv_a_size, 1:my_i_size) => &
1146 buffer_1d(1)%array(1:int(my_p_size, kind=int_8)*recv_a_size*my_i_size)
1147
1148 CALL para_env%sendrecv(mat_work_iap_3d, blacs2mpi(my_prow, proc_send), &
1149 buffer_3d(1)%array, blacs2mpi(my_prow, proc_recv), tag)
1150 completed = 1
1151 END IF
1152 CALL timestop(handle2)
1153
1154 CALL calc_p_rpa_a(p_ab(:, recv_a_start:recv_a_end), &
1155 mat_s_3d, buffer_3d(completed)%array, &
1156 mp2_env%ri_grad%dot_blksize, buffer_compens_1d, mp2_env%local_gemm_ctx, &
1157 eigenval(homo + my_a_start:homo + my_a_end), eigenval(my_i_start:my_i_end), &
1158 eigenval(homo + recv_a_start:homo + recv_a_end), omega, weight)
1159
1160 IF (number_of_parallel_channels > 1 .AND. number_of_parallel_channels + proc_shift < grid(1)) THEN
1161 proc_a_recv = modulo(mepos(1) - proc_shift - number_of_parallel_channels, grid(1))
1162 proc_recv = mepos(2)*grid(1) + proc_a_recv
1163
1164 CALL get_group_dist(gd_virtual, proc_a_recv, recv_a_start, recv_a_end, recv_a_size)
1165
1166 buffer_3d(completed)%array(1:my_p_size, 1:recv_a_size, 1:my_i_size) => &
1167 buffer_1d(completed)%array(1:int(my_p_size, kind=int_8)*recv_a_size*my_i_size)
1168
1169 CALL para_env%irecv(buffer_3d(completed)%array, blacs2mpi(my_prow, proc_recv), &
1170 recv_requests(completed), tag)
1171
1172 procs_recv(completed) = proc_a_recv
1173 END IF
1174 END DO
1175
1176 IF (number_of_parallel_channels > 1 .AND. grid(1) > 1) THEN
1177 CALL mp_waitall(send_requests)
1178 END IF
1179
1180 IF (number_of_parallel_channels > 1 .AND. grid(2) > 1) THEN
1181 recv_requests(:) = mp_request_null
1182 procs_recv(:) = -1
1183 DO proc_shift = 1, min(grid(2) - 1, number_of_parallel_channels)
1184 proc_i_recv = modulo(mepos(2) - proc_shift, grid(2))
1185 proc_recv = proc_i_recv*grid(1) + mepos(1)
1186
1187 CALL get_group_dist(gd_homo, proc_i_recv, recv_i_start, recv_i_end, recv_i_size)
1188
1189 buffer_3d(proc_shift)%array(1:my_p_size, 1:my_a_size, 1:recv_i_size) => &
1190 buffer_1d(proc_shift)%array(1:int(my_p_size, kind=int_8)*my_a_size*recv_i_size)
1191
1192 CALL para_env%irecv(buffer_3d(proc_shift)%array, blacs2mpi(my_prow, proc_recv), &
1193 recv_requests(proc_shift), tag)
1194
1195 procs_recv(proc_shift) = proc_i_recv
1196 END DO
1197
1198 send_requests(:) = mp_request_null
1199 DO proc_shift = 1, grid(2) - 1
1200 proc_i_send = modulo(mepos(2) + proc_shift, grid(2))
1201 proc_send = proc_i_send*grid(1) + mepos(1)
1202
1203 CALL para_env%isend(mat_work_iap_3d, blacs2mpi(my_prow, proc_send), &
1204 send_requests(proc_shift), tag)
1205 END DO
1206 END IF
1207
1208 CALL calc_p_rpa_i(p_ij(:, my_i_start:my_i_end), &
1209 mat_s_3d, mat_work_iap_3d, &
1210 mp2_env%ri_grad%dot_blksize, buffer_compens_1d, mp2_env%local_gemm_ctx, &
1211 eigenval(homo + my_a_start:homo + my_a_end), eigenval(my_i_start:my_i_end), &
1212 eigenval(my_i_start:my_i_end), omega, weight)
1213
1214 DO proc_shift = 1, grid(2) - 1
1215 CALL timeset(routinen//"_comm_i", handle2)
1216 IF (number_of_parallel_channels > 1) THEN
1217 CALL mp_waitany(recv_requests, completed)
1218
1219 CALL get_group_dist(gd_homo, procs_recv(completed), recv_i_start, recv_i_end, recv_i_size)
1220 ELSE
1221 proc_i_send = modulo(mepos(2) + proc_shift, grid(2))
1222 proc_i_recv = modulo(mepos(2) - proc_shift, grid(2))
1223
1224 proc_send = proc_i_send*grid(1) + mepos(1)
1225 proc_recv = proc_i_recv*grid(1) + mepos(1)
1226
1227 CALL get_group_dist(gd_homo, proc_i_recv, recv_i_start, recv_i_end, recv_i_size)
1228
1229 buffer_3d(1)%array(1:my_p_size, 1:my_a_size, 1:recv_i_size) => &
1230 buffer_1d(1)%array(1:int(my_p_size, kind=int_8)*my_a_size*recv_i_size)
1231
1232 CALL para_env%sendrecv(mat_work_iap_3d, blacs2mpi(my_prow, proc_send), &
1233 buffer_3d(1)%array, blacs2mpi(my_prow, proc_recv), tag)
1234 completed = 1
1235 END IF
1236 CALL timestop(handle2)
1237
1238 CALL calc_p_rpa_i(p_ij(:, recv_i_start:recv_i_end), &
1239 mat_s_3d, buffer_3d(completed)%array, &
1240 mp2_env%ri_grad%dot_blksize, buffer_compens_1d, mp2_env%local_gemm_ctx, &
1241 eigenval(homo + my_a_start:homo + my_a_end), eigenval(my_i_start:my_i_end), &
1242 eigenval(recv_i_start:recv_i_end), omega, weight)
1243
1244 IF (number_of_parallel_channels > 1 .AND. number_of_parallel_channels + proc_shift < grid(2)) THEN
1245 proc_i_recv = modulo(mepos(2) - proc_shift - number_of_parallel_channels, grid(2))
1246 proc_recv = proc_i_recv*grid(1) + mepos(1)
1247
1248 CALL get_group_dist(gd_homo, proc_i_recv, recv_i_start, recv_a_end, recv_i_size)
1249
1250 buffer_3d(completed)%array(1:my_p_size, 1:my_a_size, 1:recv_i_size) => &
1251 buffer_1d(completed)%array(1:int(my_p_size, kind=int_8)*my_a_size*recv_i_size)
1252
1253 CALL para_env%irecv(buffer_3d(completed)%array, blacs2mpi(my_prow, proc_recv), &
1254 recv_requests(completed), tag)
1255
1256 procs_recv(completed) = proc_i_recv
1257 END IF
1258 END DO
1259
1260 IF (number_of_parallel_channels > 1 .AND. grid(2) > 1) THEN
1261 CALL mp_waitall(send_requests)
1262 END IF
1263
1264 IF (number_of_parallel_channels > 1) THEN
1265 DEALLOCATE (procs_recv)
1266 DEALLOCATE (recv_requests)
1267 DEALLOCATE (send_requests)
1268 END IF
1269
1270 IF (mp2_env%ri_grad%dot_blksize >= blksize_threshold) THEN
1271 ! release memory allocated by local_gemm when run on GPU. local_gemm_ctx is null on cpu only runs
1272 CALL local_gemm_destroy(mp2_env%local_gemm_ctx)
1273 mp2_env%local_gemm_ctx = c_null_ptr
1274 DEALLOCATE (buffer_compens_1d)
1275 END IF
1276
1277 DO proc_shift = 1, number_of_parallel_channels
1278 NULLIFY (buffer_3d(proc_shift)%array)
1279 DEALLOCATE (buffer_1d(proc_shift)%array)
1280 END DO
1281 DEALLOCATE (buffer_3d, buffer_1d)
1282
1283 CALL timestop(handle)
1284
1285 END SUBROUTINE
1286
1287! **************************************************************************************************
1288!> \brief ...
1289!> \param P_ab ...
1290!> \param mat_S ...
1291!> \param mat_work ...
1292!> \param dot_blksize ...
1293!> \param buffer_1D ...
1294!> \param local_gemm_ctx ...
1295!> \param my_eval_virt ...
1296!> \param my_eval_occ ...
1297!> \param recv_eval_virt ...
1298!> \param omega ...
1299!> \param weight ...
1300! **************************************************************************************************
1301 SUBROUTINE calc_p_rpa_a(P_ab, mat_S, mat_work, dot_blksize, buffer_1D, local_gemm_ctx, &
1302 my_eval_virt, my_eval_occ, recv_eval_virt, omega, weight)
1303 REAL(kind=dp), DIMENSION(:, :), INTENT(INOUT) :: p_ab
1304 REAL(kind=dp), DIMENSION(:, :, :), INTENT(IN) :: mat_s, mat_work
1305 INTEGER, INTENT(IN) :: dot_blksize
1306 REAL(kind=dp), ALLOCATABLE, DIMENSION(:), &
1307 INTENT(INOUT), TARGET :: buffer_1d
1308 TYPE(c_ptr), INTENT(INOUT) :: local_gemm_ctx
1309 REAL(kind=dp), DIMENSION(:), INTENT(IN) :: my_eval_virt, my_eval_occ, recv_eval_virt
1310 REAL(kind=dp), INTENT(IN) :: omega, weight
1311
1312 CHARACTER(LEN=*), PARAMETER :: routinen = 'calc_P_rpa_a'
1313
1314 INTEGER :: handle, my_a, my_a_size, my_i, &
1315 my_i_size, my_p_size, p_end, p_start, &
1316 recv_a_size, stripesize
1317 REAL(kind=dp), DIMENSION(:, :), POINTER :: buffer_compens, buffer_unscaled
1318
1319 CALL timeset(routinen, handle)
1320
1321 my_i_size = SIZE(mat_s, 3)
1322 recv_a_size = SIZE(mat_work, 2)
1323 my_a_size = SIZE(mat_s, 2)
1324 my_p_size = SIZE(mat_s, 1)
1325
1326 IF (dot_blksize >= blksize_threshold) THEN
1327 buffer_compens(1:my_a_size, 1:recv_a_size) => buffer_1d(1:my_a_size*recv_a_size)
1328 buffer_compens = 0.0_dp
1329 buffer_unscaled(1:my_a_size, 1:recv_a_size) => buffer_1d(my_a_size*recv_a_size + 1:2*my_a_size*recv_a_size)
1330
1331 ! This loop imitates the actual tensor contraction
1332 DO my_i = 1, my_i_size
1333 DO p_start = 1, my_p_size, dot_blksize
1334 stripesize = min(dot_blksize, my_p_size - p_start + 1)
1335 p_end = p_start + stripesize - 1
1336
1337 CALL local_gemm("T", "N", my_a_size, recv_a_size, stripesize, &
1338 -weight, mat_s(p_start:p_end, :, my_i), stripesize, &
1339 mat_work(p_start:p_end, :, my_i), stripesize, &
1340 0.0_dp, buffer_unscaled, my_a_size, local_gemm_ctx)
1341
1342 CALL scale_buffer_and_add_compens_virt(buffer_unscaled, buffer_compens, omega, &
1343 my_eval_virt, recv_eval_virt, my_eval_occ(my_i))
1344
1345 CALL kahan_step(buffer_compens, p_ab)
1346 END DO
1347 END DO
1348 ELSE
1349 block
1350 INTEGER :: recv_a
1351 REAL(kind=dp) :: tmp, e_i, e_a, e_b, omega2, my_compens, my_p, s
1352 omega2 = -omega**2
1353!$OMP PARALLEL DO COLLAPSE(2) DEFAULT(NONE)&
1354!$OMP SHARED(my_a_size,recv_a_size,my_i_size,mat_S,my_eval_virt,recv_eval_virt,my_eval_occ,omega2,&
1355!$OMP P_ab,weight,mat_work)&
1356!$OMP PRIVATE(tmp,my_a,recv_a,my_i,e_a,e_b,e_i,my_compens,my_p,s)
1357 DO my_a = 1, my_a_size
1358 DO recv_a = 1, recv_a_size
1359 e_a = my_eval_virt(my_a)
1360 e_b = recv_eval_virt(recv_a)
1361 my_p = p_ab(my_a, recv_a)
1362 my_compens = 0.0_dp
1363 DO my_i = 1, my_i_size
1364 e_i = -my_eval_occ(my_i)
1365 tmp = -weight*accurate_dot_product(mat_s(:, my_a, my_i), mat_work(:, recv_a, my_i)) &
1366 *(1.0_dp + omega2/((e_a + e_i)*(e_b + e_i))) - my_compens
1367 s = my_p + tmp
1368 my_compens = (s - my_p) - tmp
1369 my_p = s
1370 END DO
1371 p_ab(my_a, recv_a) = my_p
1372 END DO
1373 END DO
1374 END block
1375 END IF
1376
1377 CALL timestop(handle)
1378
1379 END SUBROUTINE calc_p_rpa_a
1380
1381! **************************************************************************************************
1382!> \brief ...
1383!> \param P_ij ...
1384!> \param mat_S ...
1385!> \param mat_work ...
1386!> \param dot_blksize ...
1387!> \param buffer_1D ...
1388!> \param local_gemm_ctx ...
1389!> \param my_eval_virt ...
1390!> \param my_eval_occ ...
1391!> \param recv_eval_occ ...
1392!> \param omega ...
1393!> \param weight ...
1394! **************************************************************************************************
1395 SUBROUTINE calc_p_rpa_i(P_ij, mat_S, mat_work, dot_blksize, buffer_1D, local_gemm_ctx, &
1396 my_eval_virt, my_eval_occ, recv_eval_occ, omega, weight)
1397 REAL(kind=dp), DIMENSION(:, :), INTENT(INOUT) :: p_ij
1398 REAL(kind=dp), DIMENSION(:, :, :), INTENT(INOUT) :: mat_s, mat_work
1399 INTEGER, INTENT(IN) :: dot_blksize
1400 REAL(kind=dp), ALLOCATABLE, DIMENSION(:), &
1401 INTENT(INOUT), TARGET :: buffer_1d
1402 TYPE(c_ptr), INTENT(INOUT) :: local_gemm_ctx
1403 REAL(kind=dp), DIMENSION(:), INTENT(IN) :: my_eval_virt, my_eval_occ, recv_eval_occ
1404 REAL(kind=dp), INTENT(IN) :: omega, weight
1405
1406 CHARACTER(LEN=*), PARAMETER :: routinen = 'calc_P_rpa_i'
1407
1408 INTEGER :: handle, my_a, my_a_size, my_i, &
1409 my_i_size, my_p_size, p_end, p_start, &
1410 recv_i_size, stripesize
1411 REAL(kind=dp), DIMENSION(:, :), POINTER :: buffer_compens, buffer_unscaled
1412
1413 CALL timeset(routinen, handle)
1414
1415 my_i_size = SIZE(mat_s, 3)
1416 recv_i_size = SIZE(mat_work, 3)
1417 my_a_size = SIZE(mat_s, 2)
1418 my_p_size = SIZE(mat_s, 1)
1419
1420 IF (dot_blksize >= blksize_threshold) THEN
1421 buffer_compens(1:my_i_size, 1:recv_i_size) => buffer_1d(1:my_i_size*recv_i_size)
1422 buffer_compens = 0.0_dp
1423 buffer_unscaled(1:my_i_size, 1:recv_i_size) => buffer_1d(my_i_size*recv_i_size + 1:2*my_i_size*recv_i_size)
1424
1425 ! This loop imitates the actual tensor contraction
1426 DO my_a = 1, my_a_size
1427 DO p_start = 1, my_p_size, dot_blksize
1428 stripesize = min(dot_blksize, my_p_size - p_start + 1)
1429 p_end = p_start + stripesize - 1
1430
1431 CALL local_gemm("T", "N", my_i_size, recv_i_size, stripesize, &
1432 weight, mat_s(p_start:p_end, my_a, :), stripesize, &
1433 mat_work(p_start:p_end, my_a, :), stripesize, &
1434 0.0_dp, buffer_unscaled, my_i_size, local_gemm_ctx)
1435
1436 CALL scale_buffer_and_add_compens_occ(buffer_unscaled, buffer_compens, omega, &
1437 my_eval_occ, recv_eval_occ, my_eval_virt(my_a))
1438
1439 CALL kahan_step(buffer_compens, p_ij)
1440 END DO
1441 END DO
1442 ELSE
1443 block
1444 REAL(kind=dp) :: tmp, e_i, e_a, e_j, omega2, my_compens, my_p, s
1445 INTEGER :: recv_i
1446 omega2 = -omega**2
1447!$OMP PARALLEL DO COLLAPSE(2) DEFAULT(NONE)&
1448!$OMP SHARED(my_a_size,recv_i_size,my_i_size,mat_S,my_eval_occ,my_eval_virt,omega2,&
1449!$OMP recv_eval_occ,P_ij,weight,mat_work)&
1450!$OMP PRIVATE(tmp,my_a,recv_i,my_i,e_i,e_j,e_a,my_compens,my_p,s)
1451 DO my_i = 1, my_i_size
1452 DO recv_i = 1, recv_i_size
1453 e_i = my_eval_occ(my_i)
1454 e_j = recv_eval_occ(recv_i)
1455 my_p = p_ij(my_i, recv_i)
1456 my_compens = 0.0_dp
1457 DO my_a = 1, my_a_size
1458 e_a = my_eval_virt(my_a)
1459 tmp = weight*accurate_dot_product(mat_s(:, my_a, my_i), mat_work(:, my_a, recv_i)) &
1460 *(1.0_dp + omega2/((e_a - e_i)*(e_a - e_j))) - my_compens
1461 s = my_p + tmp
1462 my_compens = (s - my_p) - tmp
1463 my_p = s
1464 END DO
1465 p_ij(my_i, recv_i) = my_p
1466 END DO
1467 END DO
1468 END block
1469 END IF
1470
1471 CALL timestop(handle)
1472
1473 END SUBROUTINE calc_p_rpa_i
1474
1475! **************************************************************************************************
1476!> \brief ...
1477!> \param compens ...
1478!> \param P ...
1479! **************************************************************************************************
1480 SUBROUTINE kahan_step(compens, P)
1481 REAL(kind=dp), DIMENSION(:, :), INTENT(INOUT) :: compens, p
1482
1483 CHARACTER(LEN=*), PARAMETER :: routinen = 'kahan_step'
1484
1485 INTEGER :: handle, i, j
1486 REAL(kind=dp) :: my_compens, my_p, s
1487
1488 CALL timeset(routinen, handle)
1489
1490!$OMP PARALLEL DO DEFAULT(NONE) SHARED(P,compens) PRIVATE(i,my_p,my_compens,s, j) COLLAPSE(2)
1491 DO j = 1, SIZE(compens, 2)
1492 DO i = 1, SIZE(compens, 1)
1493 my_p = p(i, j)
1494 my_compens = compens(i, j)
1495 s = my_p + my_compens
1496 compens(i, j) = (s - my_p) - my_compens
1497 p(i, j) = s
1498 END DO
1499 END DO
1500!$OMP END PARALLEL DO
1501
1502 CALL timestop(handle)
1503
1504 END SUBROUTINE
1505
1506! **************************************************************************************************
1507!> \brief ...
1508!> \param buffer_unscaled ...
1509!> \param buffer_compens ...
1510!> \param omega ...
1511!> \param my_eval_virt ...
1512!> \param recv_eval_virt ...
1513!> \param my_eval_occ ...
1514! **************************************************************************************************
1515 SUBROUTINE scale_buffer_and_add_compens_virt(buffer_unscaled, buffer_compens, omega, &
1516 my_eval_virt, recv_eval_virt, my_eval_occ)
1517 REAL(kind=dp), DIMENSION(:, :), INTENT(IN) :: buffer_unscaled
1518 REAL(kind=dp), DIMENSION(:, :), INTENT(INOUT) :: buffer_compens
1519 REAL(kind=dp), INTENT(IN) :: omega
1520 REAL(kind=dp), DIMENSION(:), INTENT(IN) :: my_eval_virt, recv_eval_virt
1521 REAL(kind=dp), INTENT(IN) :: my_eval_occ
1522
1523 CHARACTER(LEN=*), PARAMETER :: routinen = 'scale_buffer_and_add_compens_virt'
1524
1525 INTEGER :: handle, my_a, my_b
1526
1527 CALL timeset(routinen, handle)
1528
1529!$OMP PARALLEL DO DEFAULT(NONE) SHARED(buffer_unscaled,buffer_compens,omega,&
1530!$OMP my_eval_virt,recv_eval_virt,my_eval_occ) PRIVATE(my_a,my_b)
1531 DO my_b = 1, SIZE(buffer_compens, 2)
1532 DO my_a = 1, SIZE(buffer_compens, 1)
1533 buffer_compens(my_a, my_b) = buffer_unscaled(my_a, my_b) &
1534 *(1.0_dp - omega**2/((my_eval_virt(my_a) - my_eval_occ)*(recv_eval_virt(my_b) - my_eval_occ))) &
1535 - buffer_compens(my_a, my_b)
1536 END DO
1537 END DO
1538!$OMP END PARALLEL DO
1539
1540 CALL timestop(handle)
1541
1542 END SUBROUTINE scale_buffer_and_add_compens_virt
1543
1544! **************************************************************************************************
1545!> \brief ...
1546!> \param buffer_unscaled ...
1547!> \param buffer_compens ...
1548!> \param omega ...
1549!> \param my_eval_occ ...
1550!> \param recv_eval_occ ...
1551!> \param my_eval_virt ...
1552! **************************************************************************************************
1553 SUBROUTINE scale_buffer_and_add_compens_occ(buffer_unscaled, buffer_compens, omega, &
1554 my_eval_occ, recv_eval_occ, my_eval_virt)
1555 REAL(kind=dp), DIMENSION(:, :), INTENT(IN) :: buffer_unscaled
1556 REAL(kind=dp), DIMENSION(:, :), INTENT(INOUT) :: buffer_compens
1557 REAL(kind=dp), INTENT(IN) :: omega
1558 REAL(kind=dp), DIMENSION(:), INTENT(IN) :: my_eval_occ, recv_eval_occ
1559 REAL(kind=dp), INTENT(IN) :: my_eval_virt
1560
1561 CHARACTER(LEN=*), PARAMETER :: routinen = 'scale_buffer_and_add_compens_occ'
1562
1563 INTEGER :: handle, my_i, my_j
1564
1565 CALL timeset(routinen, handle)
1566
1567!$OMP PARALLEL DO DEFAULT(NONE) SHARED(buffer_compens,buffer_unscaled,omega,&
1568!$OMP my_eval_virt,my_eval_occ,recv_eval_occ) PRIVATE(my_i,my_j)
1569 DO my_j = 1, SIZE(buffer_compens, 2)
1570 DO my_i = 1, SIZE(buffer_compens, 1)
1571 buffer_compens(my_i, my_j) = buffer_unscaled(my_i, my_j) &
1572 *(1.0_dp - omega**2/((my_eval_virt - my_eval_occ(my_i))*(my_eval_virt - recv_eval_occ(my_j)))) &
1573 - buffer_compens(my_i, my_j)
1574 END DO
1575 END DO
1576!$OMP END PARALLEL DO
1577
1578 CALL timestop(handle)
1579
1580 END SUBROUTINE scale_buffer_and_add_compens_occ
1581
1582! **************************************************************************************************
1583!> \brief ...
1584!> \param x ...
1585!> \return ...
1586! **************************************************************************************************
1587 ELEMENTAL FUNCTION sinh_over_x(x) RESULT(res)
1588 REAL(kind=dp), INTENT(IN) :: x
1589 REAL(kind=dp) :: res
1590
1591 ! Calculate sinh(x)/x
1592 ! Split the intervall to prevent numerical instabilities
1593 IF (abs(x) > 3.0e-4_dp) THEN
1594 res = sinh(x)/x
1595 ELSE
1596 res = 1.0_dp + x**2/6.0_dp
1597 END IF
1598
1599 END FUNCTION sinh_over_x
1600
1601! **************************************************************************************************
1602!> \brief ...
1603!> \param fm_work_iaP ...
1604!> \param fm_mat_S ...
1605!> \param pair_list ...
1606!> \param virtual ...
1607!> \param P_ij ...
1608!> \param Eigenval ...
1609!> \param omega ...
1610!> \param weight ...
1611!> \param index2send ...
1612!> \param index2recv ...
1613!> \param dot_blksize ...
1614! **************************************************************************************************
1615 SUBROUTINE calc_pij_degen(fm_work_iaP, fm_mat_S, pair_list, virtual, P_ij, Eigenval, &
1616 omega, weight, index2send, index2recv, dot_blksize)
1617 TYPE(cp_fm_type), INTENT(IN) :: fm_work_iap, fm_mat_s
1618 INTEGER, DIMENSION(:, :), INTENT(IN) :: pair_list
1619 INTEGER, INTENT(IN) :: virtual
1620 REAL(kind=dp), DIMENSION(:), INTENT(INOUT) :: p_ij
1621 REAL(kind=dp), DIMENSION(:), INTENT(IN) :: eigenval
1622 REAL(kind=dp), INTENT(IN) :: omega, weight
1623 TYPE(one_dim_int_array), DIMENSION(0:), INTENT(IN) :: index2send, index2recv
1624 INTEGER, INTENT(IN) :: dot_blksize
1625
1626 CHARACTER(LEN=*), PARAMETER :: routinen = 'calc_Pij_degen'
1627
1628 INTEGER :: avirt, col_global, col_local, counter, first_p_pos(2), first_p_pos_col, handle, &
1629 handle2, ij_counter, iocc, my_col_local, my_i, my_j, my_pcol, my_prow, ncol_block, &
1630 ncol_local, nrow_local, num_ij_pairs, num_pe_col, pcol, pcol_recv, pcol_send, proc_shift, &
1631 recv_size, send_size, size_recv_buffer, size_send_buffer, tag
1632 INTEGER, DIMENSION(:), POINTER :: col_indices, ncol_locals
1633 INTEGER, DIMENSION(:, :), POINTER :: blacs2mpi
1634 REAL(kind=dp) :: trace
1635 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :) :: buffer_recv, buffer_send
1636 TYPE(cp_blacs_env_type), POINTER :: context
1637 TYPE(mp_para_env_type), POINTER :: para_env
1638
1639 CALL timeset(routinen, handle)
1640
1641 CALL cp_fm_struct_get(fm_work_iap%matrix_struct, para_env=para_env, ncol_locals=ncol_locals, &
1642 ncol_local=ncol_local, col_indices=col_indices, &
1643 context=context, ncol_block=ncol_block, first_p_pos=first_p_pos, nrow_local=nrow_local)
1644 CALL context%get(my_process_row=my_prow, my_process_column=my_pcol, &
1645 number_of_process_columns=num_pe_col, blacs2mpi=blacs2mpi)
1646 first_p_pos_col = first_p_pos(2)
1647
1648 num_ij_pairs = SIZE(pair_list, 2)
1649
1650 tag = 42
1651
1652 DO ij_counter = 1, num_ij_pairs
1653
1654 my_i = pair_list(1, ij_counter)
1655 my_j = pair_list(2, ij_counter)
1656
1657 trace = 0.0_dp
1658
1659 DO col_local = 1, ncol_local
1660 col_global = col_indices(col_local)
1661
1662 iocc = max(1, col_global - 1)/virtual + 1
1663 avirt = col_global - (iocc - 1)*virtual
1664
1665 IF (iocc /= my_j) cycle
1666 pcol = cp_fm_indxg2p((my_i - 1)*virtual + avirt, ncol_block, 0, first_p_pos_col, num_pe_col)
1667 IF (pcol /= my_pcol) cycle
1668
1669 my_col_local = cp_fm_indxg2l((my_i - 1)*virtual + avirt, ncol_block, 0, first_p_pos_col, num_pe_col)
1670
1671 trace = trace + accurate_dot_product_2(fm_mat_s%local_data(:, my_col_local), fm_work_iap%local_data(:, col_local), &
1672 dot_blksize)
1673 END DO
1674
1675 p_ij(ij_counter) = p_ij(ij_counter) - trace*sinh_over_x(0.5_dp*(eigenval(my_i) - eigenval(my_j))*omega)*omega*weight
1676
1677 END DO
1678
1679 IF (num_pe_col > 1) THEN
1680 size_send_buffer = 0
1681 size_recv_buffer = 0
1682 DO proc_shift = 1, num_pe_col - 1
1683 pcol_send = modulo(my_pcol + proc_shift, num_pe_col)
1684 pcol_recv = modulo(my_pcol - proc_shift, num_pe_col)
1685
1686 IF (ALLOCATED(index2send(pcol_send)%array)) &
1687 size_send_buffer = max(size_send_buffer, SIZE(index2send(pcol_send)%array))
1688
1689 IF (ALLOCATED(index2recv(pcol_recv)%array)) &
1690 size_recv_buffer = max(size_recv_buffer, SIZE(index2recv(pcol_recv)%array))
1691 END DO
1692
1693 ALLOCATE (buffer_send(nrow_local, size_send_buffer), buffer_recv(nrow_local, size_recv_buffer))
1694
1695 DO proc_shift = 1, num_pe_col - 1
1696 pcol_send = modulo(my_pcol + proc_shift, num_pe_col)
1697 pcol_recv = modulo(my_pcol - proc_shift, num_pe_col)
1698
1699 ! Collect data and exchange
1700 send_size = 0
1701 IF (ALLOCATED(index2send(pcol_send)%array)) send_size = SIZE(index2send(pcol_send)%array)
1702
1703 DO counter = 1, send_size
1704 buffer_send(:, counter) = fm_work_iap%local_data(:, index2send(pcol_send)%array(counter))
1705 END DO
1706
1707 recv_size = 0
1708 IF (ALLOCATED(index2recv(pcol_recv)%array)) recv_size = SIZE(index2recv(pcol_recv)%array)
1709 IF (recv_size > 0) THEN
1710 CALL timeset(routinen//"_send", handle2)
1711 IF (send_size > 0) THEN
1712 CALL para_env%sendrecv(buffer_send(:, :send_size), blacs2mpi(my_prow, pcol_send), &
1713 buffer_recv(:, :recv_size), blacs2mpi(my_prow, pcol_recv), tag)
1714 ELSE
1715 CALL para_env%recv(buffer_recv(:, :recv_size), blacs2mpi(my_prow, pcol_recv), tag)
1716 END IF
1717 CALL timestop(handle2)
1718
1719 DO ij_counter = 1, num_ij_pairs
1720 ! Collect the contributions of the matrix elements
1721
1722 my_i = pair_list(1, ij_counter)
1723 my_j = pair_list(2, ij_counter)
1724
1725 trace = 0.0_dp
1726
1727 DO col_local = 1, recv_size
1728 col_global = index2recv(pcol_recv)%array(col_local)
1729
1730 iocc = max(1, col_global - 1)/virtual + 1
1731 IF (iocc /= my_j) cycle
1732 avirt = col_global - (iocc - 1)*virtual
1733 pcol = cp_fm_indxg2p((my_i - 1)*virtual + avirt, ncol_block, 0, first_p_pos_col, num_pe_col)
1734 IF (pcol /= my_pcol) cycle
1735
1736 my_col_local = cp_fm_indxg2l((my_i - 1)*virtual + avirt, ncol_block, 0, first_p_pos_col, num_pe_col)
1737
1738 trace = trace + accurate_dot_product_2(fm_mat_s%local_data(:, my_col_local), buffer_recv(:, col_local), &
1739 dot_blksize)
1740 END DO
1741
1742 p_ij(ij_counter) = p_ij(ij_counter) &
1743 - trace*sinh_over_x(0.5_dp*(eigenval(my_i) - eigenval(my_j))*omega)*omega*weight
1744 END DO
1745 ELSE IF (send_size > 0) THEN
1746 CALL timeset(routinen//"_send", handle2)
1747 CALL para_env%send(buffer_send(:, :send_size), blacs2mpi(my_prow, pcol_send), tag)
1748 CALL timestop(handle2)
1749 END IF
1750 END DO
1751 IF (ALLOCATED(buffer_send)) DEALLOCATE (buffer_send)
1752 IF (ALLOCATED(buffer_recv)) DEALLOCATE (buffer_recv)
1753 END IF
1754
1755 CALL timestop(handle)
1756
1757 END SUBROUTINE
1758
1759! **************************************************************************************************
1760!> \brief ...
1761!> \param fm_work_iaP ...
1762!> \param fm_mat_S ...
1763!> \param pair_list ...
1764!> \param virtual ...
1765!> \param P_ab ...
1766!> \param Eigenval ...
1767!> \param omega ...
1768!> \param weight ...
1769!> \param index2send ...
1770!> \param index2recv ...
1771!> \param dot_blksize ...
1772! **************************************************************************************************
1773 SUBROUTINE calc_pab_degen(fm_work_iaP, fm_mat_S, pair_list, virtual, P_ab, Eigenval, &
1774 omega, weight, index2send, index2recv, dot_blksize)
1775 TYPE(cp_fm_type), INTENT(IN) :: fm_work_iap, fm_mat_s
1776 INTEGER, DIMENSION(:, :), INTENT(IN) :: pair_list
1777 INTEGER, INTENT(IN) :: virtual
1778 REAL(kind=dp), DIMENSION(:), INTENT(INOUT) :: p_ab
1779 REAL(kind=dp), DIMENSION(:), INTENT(IN) :: eigenval
1780 REAL(kind=dp), INTENT(IN) :: omega, weight
1781 TYPE(one_dim_int_array), DIMENSION(0:), INTENT(IN) :: index2send, index2recv
1782 INTEGER, INTENT(IN) :: dot_blksize
1783
1784 CHARACTER(LEN=*), PARAMETER :: routinen = 'calc_Pab_degen'
1785
1786 INTEGER :: ab_counter, avirt, col_global, col_local, counter, first_p_pos(2), &
1787 first_p_pos_col, handle, handle2, iocc, my_a, my_b, my_col_local, my_pcol, my_prow, &
1788 ncol_block, ncol_local, nrow_local, num_ab_pairs, num_pe_col, pcol, pcol_recv, pcol_send, &
1789 proc_shift, recv_size, send_size, size_recv_buffer, size_send_buffer, tag
1790 INTEGER, DIMENSION(:), POINTER :: col_indices, ncol_locals
1791 INTEGER, DIMENSION(:, :), POINTER :: blacs2mpi
1792 REAL(kind=dp) :: trace
1793 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :) :: buffer_recv, buffer_send
1794 TYPE(cp_blacs_env_type), POINTER :: context
1795 TYPE(mp_para_env_type), POINTER :: para_env
1796
1797 CALL timeset(routinen, handle)
1798
1799 CALL cp_fm_struct_get(fm_work_iap%matrix_struct, para_env=para_env, ncol_locals=ncol_locals, &
1800 ncol_local=ncol_local, col_indices=col_indices, &
1801 context=context, ncol_block=ncol_block, first_p_pos=first_p_pos, nrow_local=nrow_local)
1802 CALL context%get(my_process_row=my_prow, my_process_column=my_pcol, &
1803 number_of_process_columns=num_pe_col, blacs2mpi=blacs2mpi)
1804 first_p_pos_col = first_p_pos(2)
1805
1806 num_ab_pairs = SIZE(pair_list, 2)
1807
1808 tag = 43
1809
1810 DO ab_counter = 1, num_ab_pairs
1811
1812 my_a = pair_list(1, ab_counter)
1813 my_b = pair_list(2, ab_counter)
1814
1815 trace = 0.0_dp
1816
1817 DO col_local = 1, ncol_local
1818 col_global = col_indices(col_local)
1819
1820 iocc = max(1, col_global - 1)/virtual + 1
1821 avirt = col_global - (iocc - 1)*virtual
1822
1823 IF (avirt /= my_b) cycle
1824 pcol = cp_fm_indxg2p((iocc - 1)*virtual + my_a, ncol_block, 0, first_p_pos_col, num_pe_col)
1825 IF (pcol /= my_pcol) cycle
1826 my_col_local = cp_fm_indxg2l((iocc - 1)*virtual + my_a, ncol_block, 0, first_p_pos_col, num_pe_col)
1827
1828 trace = trace + accurate_dot_product_2(fm_mat_s%local_data(:, my_col_local), fm_work_iap%local_data(:, col_local), &
1829 dot_blksize)
1830
1831 END DO
1832
1833 p_ab(ab_counter) = p_ab(ab_counter) &
1834 + trace*sinh_over_x(0.5_dp*(eigenval(my_a) - eigenval(my_b))*omega)*omega*weight
1835
1836 END DO
1837
1838 IF (num_pe_col > 1) THEN
1839 size_send_buffer = 0
1840 size_recv_buffer = 0
1841 DO proc_shift = 1, num_pe_col - 1
1842 pcol_send = modulo(my_pcol + proc_shift, num_pe_col)
1843 pcol_recv = modulo(my_pcol - proc_shift, num_pe_col)
1844
1845 IF (ALLOCATED(index2send(pcol_send)%array)) &
1846 size_send_buffer = max(size_send_buffer, SIZE(index2send(pcol_send)%array))
1847
1848 IF (ALLOCATED(index2recv(pcol_recv)%array)) &
1849 size_recv_buffer = max(size_recv_buffer, SIZE(index2recv(pcol_recv)%array))
1850 END DO
1851
1852 ALLOCATE (buffer_send(nrow_local, size_send_buffer), buffer_recv(nrow_local, size_recv_buffer))
1853
1854 DO proc_shift = 1, num_pe_col - 1
1855 pcol_send = modulo(my_pcol + proc_shift, num_pe_col)
1856 pcol_recv = modulo(my_pcol - proc_shift, num_pe_col)
1857
1858 ! Collect data and exchange
1859 send_size = 0
1860 IF (ALLOCATED(index2send(pcol_send)%array)) send_size = SIZE(index2send(pcol_send)%array)
1861
1862 DO counter = 1, send_size
1863 buffer_send(:, counter) = fm_work_iap%local_data(:, index2send(pcol_send)%array(counter))
1864 END DO
1865
1866 recv_size = 0
1867 IF (ALLOCATED(index2recv(pcol_recv)%array)) recv_size = SIZE(index2recv(pcol_recv)%array)
1868 IF (recv_size > 0) THEN
1869 CALL timeset(routinen//"_send", handle2)
1870 IF (send_size > 0) THEN
1871 CALL para_env%sendrecv(buffer_send(:, :send_size), blacs2mpi(my_prow, pcol_send), &
1872 buffer_recv(:, :recv_size), blacs2mpi(my_prow, pcol_recv), tag)
1873 ELSE
1874 CALL para_env%recv(buffer_recv(:, :recv_size), blacs2mpi(my_prow, pcol_recv), tag)
1875 END IF
1876 CALL timestop(handle2)
1877
1878 DO ab_counter = 1, num_ab_pairs
1879 ! Collect the contributions of the matrix elements
1880
1881 my_a = pair_list(1, ab_counter)
1882 my_b = pair_list(2, ab_counter)
1883
1884 trace = 0.0_dp
1885
1886 DO col_local = 1, SIZE(index2recv(pcol_recv)%array)
1887 col_global = index2recv(pcol_recv)%array(col_local)
1888
1889 iocc = max(1, col_global - 1)/virtual + 1
1890 avirt = col_global - (iocc - 1)*virtual
1891 IF (avirt /= my_b) cycle
1892 pcol = cp_fm_indxg2p((iocc - 1)*virtual + my_a, ncol_block, 0, first_p_pos_col, num_pe_col)
1893 IF (pcol /= my_pcol) cycle
1894
1895 my_col_local = cp_fm_indxg2l((iocc - 1)*virtual + my_a, ncol_block, 0, first_p_pos_col, num_pe_col)
1896
1897 trace = trace + accurate_dot_product_2(fm_mat_s%local_data(:, my_col_local), buffer_recv(:, col_local), &
1898 dot_blksize)
1899 END DO
1900
1901 p_ab(ab_counter) = p_ab(ab_counter) &
1902 + trace*sinh_over_x(0.5_dp*(eigenval(my_a) - eigenval(my_b))*omega)*omega*weight
1903
1904 END DO
1905 ELSE IF (send_size > 0) THEN
1906 CALL timeset(routinen//"_send", handle2)
1907 CALL para_env%send(buffer_send(:, :send_size), blacs2mpi(my_prow, pcol_send), tag)
1908 CALL timestop(handle2)
1909 END IF
1910 END DO
1911 IF (ALLOCATED(buffer_send)) DEALLOCATE (buffer_send)
1912 IF (ALLOCATED(buffer_recv)) DEALLOCATE (buffer_recv)
1913 END IF
1914
1915 CALL timestop(handle)
1916
1917 END SUBROUTINE
1918
1919! **************************************************************************************************
1920!> \brief ...
1921!> \param index2send ...
1922!> \param index2recv ...
1923!> \param fm_mat_S ...
1924!> \param mat_S_3D ...
1925!> \param gd_homo ...
1926!> \param gd_virtual ...
1927!> \param mepos ...
1928! **************************************************************************************************
1929 SUBROUTINE redistribute_fm_mat_s(index2send, index2recv, fm_mat_S, mat_S_3D, gd_homo, gd_virtual, mepos)
1930 TYPE(one_dim_int_array), DIMENSION(0:), INTENT(IN) :: index2send
1931 TYPE(two_dim_int_array), DIMENSION(0:), INTENT(IN) :: index2recv
1932 TYPE(cp_fm_type), INTENT(IN) :: fm_mat_s
1933 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :), &
1934 INTENT(OUT) :: mat_s_3d
1935 TYPE(group_dist_d1_type), INTENT(IN) :: gd_homo, gd_virtual
1936 INTEGER, DIMENSION(2), INTENT(IN) :: mepos
1937
1938 CHARACTER(LEN=*), PARAMETER :: routinen = 'redistribute_fm_mat_S'
1939
1940 INTEGER :: col_local, handle, my_a, my_homo, my_i, my_pcol, my_prow, my_virtual, nrow_local, &
1941 num_pe_col, proc_recv, proc_send, proc_shift, recv_size, send_size, size_recv_buffer, &
1942 size_send_buffer, tag
1943 INTEGER, DIMENSION(:, :), POINTER :: blacs2mpi
1944 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :) :: buffer_recv, buffer_send
1945 TYPE(mp_para_env_type), POINTER :: para_env
1946
1947 CALL timeset(routinen, handle)
1948
1949 tag = 46
1950
1951 CALL fm_mat_s%matrix_struct%context%get(my_process_row=my_prow, my_process_column=my_pcol, &
1952 number_of_process_columns=num_pe_col, blacs2mpi=blacs2mpi)
1953
1954 CALL cp_fm_struct_get(fm_mat_s%matrix_struct, nrow_local=nrow_local, para_env=para_env)
1955
1956 CALL get_group_dist(gd_homo, mepos(2), sizes=my_homo)
1957 CALL get_group_dist(gd_virtual, mepos(1), sizes=my_virtual)
1958
1959 ALLOCATE (mat_s_3d(nrow_local, my_virtual, my_homo))
1960
1961 IF (ALLOCATED(index2send(my_pcol)%array)) THEN
1962 DO col_local = 1, SIZE(index2send(my_pcol)%array)
1963 my_a = index2recv(my_pcol)%array(1, col_local)
1964 my_i = index2recv(my_pcol)%array(2, col_local)
1965 mat_s_3d(:, my_a, my_i) = fm_mat_s%local_data(:, index2send(my_pcol)%array(col_local))
1966 END DO
1967 END IF
1968
1969 IF (num_pe_col > 1) THEN
1970 size_send_buffer = 0
1971 size_recv_buffer = 0
1972 DO proc_shift = 1, num_pe_col - 1
1973 proc_send = modulo(my_pcol + proc_shift, num_pe_col)
1974 proc_recv = modulo(my_pcol - proc_shift, num_pe_col)
1975
1976 send_size = 0
1977 IF (ALLOCATED(index2send(proc_send)%array)) send_size = SIZE(index2send(proc_send)%array)
1978 size_send_buffer = max(size_send_buffer, send_size)
1979
1980 recv_size = 0
1981 IF (ALLOCATED(index2recv(proc_recv)%array)) recv_size = SIZE(index2recv(proc_recv)%array)
1982 size_recv_buffer = max(size_recv_buffer, recv_size)
1983
1984 END DO
1985
1986 ALLOCATE (buffer_send(nrow_local, size_send_buffer), buffer_recv(nrow_local, size_recv_buffer))
1987
1988 DO proc_shift = 1, num_pe_col - 1
1989 proc_send = modulo(my_pcol + proc_shift, num_pe_col)
1990 proc_recv = modulo(my_pcol - proc_shift, num_pe_col)
1991
1992 send_size = 0
1993 IF (ALLOCATED(index2send(proc_send)%array)) send_size = SIZE(index2send(proc_send)%array)
1994 DO col_local = 1, send_size
1995 buffer_send(:, col_local) = fm_mat_s%local_data(:, index2send(proc_send)%array(col_local))
1996 END DO
1997
1998 recv_size = 0
1999 IF (ALLOCATED(index2recv(proc_recv)%array)) recv_size = SIZE(index2recv(proc_recv)%array, 2)
2000 IF (recv_size > 0) THEN
2001 IF (send_size > 0) THEN
2002 CALL para_env%sendrecv(buffer_send(:, :send_size), blacs2mpi(my_prow, proc_send), &
2003 buffer_recv(:, :recv_size), blacs2mpi(my_prow, proc_recv), tag)
2004 ELSE
2005 CALL para_env%recv(buffer_recv(:, :recv_size), blacs2mpi(my_prow, proc_recv), tag)
2006 END IF
2007
2008 DO col_local = 1, recv_size
2009 my_a = index2recv(proc_recv)%array(1, col_local)
2010 my_i = index2recv(proc_recv)%array(2, col_local)
2011 mat_s_3d(:, my_a, my_i) = buffer_recv(:, col_local)
2012 END DO
2013 ELSE IF (send_size > 0) THEN
2014 CALL para_env%send(buffer_send(:, :send_size), blacs2mpi(my_prow, proc_send), tag)
2015 END IF
2016
2017 END DO
2018
2019 IF (ALLOCATED(buffer_send)) DEALLOCATE (buffer_send)
2020 IF (ALLOCATED(buffer_recv)) DEALLOCATE (buffer_recv)
2021 END IF
2022
2023 CALL timestop(handle)
2024
2025 END SUBROUTINE
2026
2027! **************************************************************************************************
2028!> \brief ...
2029!> \param rpa_grad ...
2030!> \param mp2_env ...
2031!> \param para_env_sub ...
2032!> \param para_env ...
2033!> \param qs_env ...
2034!> \param gd_array ...
2035!> \param color_sub ...
2036!> \param do_ri_sos_laplace_mp2 ...
2037!> \param homo ...
2038!> \param virtual ...
2039! **************************************************************************************************
2040 SUBROUTINE rpa_grad_finalize(rpa_grad, mp2_env, para_env_sub, para_env, qs_env, gd_array, &
2041 color_sub, do_ri_sos_laplace_mp2, homo, virtual)
2042 TYPE(rpa_grad_type), INTENT(INOUT) :: rpa_grad
2043 TYPE(mp2_type), INTENT(INOUT) :: mp2_env
2044 TYPE(mp_para_env_type), INTENT(IN), POINTER :: para_env_sub, para_env
2045 TYPE(qs_environment_type), INTENT(IN), POINTER :: qs_env
2046 TYPE(group_dist_d1_type) :: gd_array
2047 INTEGER, INTENT(IN) :: color_sub
2048 LOGICAL, INTENT(IN) :: do_ri_sos_laplace_mp2
2049 INTEGER, DIMENSION(:), INTENT(IN) :: homo, virtual
2050
2051 CHARACTER(LEN=*), PARAMETER :: routinen = 'rpa_grad_finalize'
2052
2053 INTEGER :: dimen_ia, dimen_ri, handle, iib, ispin, my_group_l_end, my_group_l_size, &
2054 my_group_l_start, my_ia_end, my_ia_size, my_ia_start, my_p_end, my_p_size, my_p_start, &
2055 ngroup, nspins, pos_group, pos_sub, proc
2056 INTEGER, ALLOCATABLE, DIMENSION(:) :: pos_info
2057 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: group_grid_2_mepos, mepos_2_grid_group
2058 REAL(kind=dp) :: my_scale
2059 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :) :: gamma_2d
2060 TYPE(cp_blacs_env_type), POINTER :: blacs_env
2061 TYPE(cp_fm_struct_type), POINTER :: fm_struct
2062 TYPE(cp_fm_type) :: fm_g_p_ia, fm_pq, fm_pq_2, fm_pq_half, &
2063 fm_work_pq, fm_work_pq_2, fm_y, &
2064 operator_half
2065 TYPE(group_dist_d1_type) :: gd_array_new, gd_ia, gd_p, gd_p_new
2066
2067 CALL timeset(routinen, handle)
2068
2069 ! Release unnecessary matrices to save memory for next steps
2070
2071 nspins = SIZE(rpa_grad%fm_Y)
2072
2073 ! Scaling factor is required to scale the density matrices and the Gamma matrices later
2074 IF (do_ri_sos_laplace_mp2) THEN
2075 my_scale = mp2_env%scale_s
2076 ELSE
2077 my_scale = -mp2_env%ri_rpa%scale_rpa/(2.0_dp*pi)
2078 IF (mp2_env%ri_rpa%minimax_quad) my_scale = my_scale/2.0_dp
2079 END IF
2080
2081 IF (do_ri_sos_laplace_mp2) THEN
2082 CALL sos_mp2_grad_finalize(rpa_grad%sos_mp2_work_occ, rpa_grad%sos_mp2_work_virt, &
2083 para_env, para_env_sub, homo, virtual, mp2_env)
2084 ELSE
2085 CALL rpa_grad_work_finalize(rpa_grad%rpa_work, mp2_env, homo, &
2086 virtual, para_env, para_env_sub)
2087 END IF
2088
2089 CALL get_qs_env(qs_env, blacs_env=blacs_env)
2090
2091 CALL cp_fm_get_info(rpa_grad%fm_Gamma_PQ, ncol_global=dimen_ri)
2092
2093 NULLIFY (fm_struct)
2094 CALL cp_fm_struct_create(fm_struct, context=blacs_env, nrow_global=dimen_ri, &
2095 ncol_global=dimen_ri, para_env=para_env)
2096 CALL cp_fm_create(fm_pq, fm_struct)
2097 CALL cp_fm_create(fm_work_pq, fm_struct)
2098 IF (.NOT. compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter)) THEN
2099 CALL cp_fm_create(fm_pq_2, fm_struct)
2100 END IF
2101 CALL cp_fm_struct_release(fm_struct)
2102 CALL cp_fm_set_all(fm_pq, 0.0_dp)
2103
2104 ! We still have to left- and right multiply it with PQhalf
2105 CALL dereplicate_and_sum_fm(rpa_grad%fm_Gamma_PQ, fm_pq)
2106
2107 ngroup = para_env%num_pe/para_env_sub%num_pe
2108
2109 CALL prepare_redistribution(para_env, para_env_sub, ngroup, &
2110 group_grid_2_mepos, mepos_2_grid_group, pos_info=pos_info)
2111
2112 ! Create fm_PQ_half
2113 CALL create_group_dist(gd_p, para_env_sub%num_pe, dimen_ri)
2114 CALL get_group_dist(gd_p, para_env_sub%mepos, my_p_start, my_p_end, my_p_size)
2115
2116 CALL get_group_dist(gd_array, color_sub, my_group_l_start, my_group_l_end, my_group_l_size)
2117
2118 CALL create_group_dist(gd_p_new, para_env%num_pe)
2119 CALL create_group_dist(gd_array_new, para_env%num_pe)
2120
2121 DO proc = 0, para_env%num_pe - 1
2122 ! calculate position of the group
2123 pos_group = proc/para_env_sub%num_pe
2124 ! calculate position in the subgroup
2125 pos_sub = pos_info(proc)
2126 ! 1 -> rows, 2 -> cols
2127 CALL get_group_dist(gd_array, pos_group, gd_array_new, proc)
2128 CALL get_group_dist(gd_p, pos_sub, gd_p_new, proc)
2129 END DO
2130
2131 DEALLOCATE (pos_info)
2132 CALL release_group_dist(gd_p)
2133
2134 CALL array2fm(mp2_env%ri_grad%PQ_half, fm_pq%matrix_struct, &
2135 my_p_start, my_p_end, &
2136 my_group_l_start, my_group_l_end, &
2137 gd_p_new, gd_array_new, &
2138 group_grid_2_mepos, para_env_sub%num_pe, ngroup, &
2139 fm_pq_half)
2140
2141 IF (.NOT. compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter)) THEN
2142 CALL array2fm(mp2_env%ri_grad%operator_half, fm_pq%matrix_struct, my_p_start, my_p_end, &
2143 my_group_l_start, my_group_l_end, &
2144 gd_p_new, gd_array_new, &
2145 group_grid_2_mepos, para_env_sub%num_pe, ngroup, &
2146 operator_half)
2147 END IF
2148
2149 ! deallocate the info array
2150 CALL release_group_dist(gd_p_new)
2151 CALL release_group_dist(gd_array_new)
2152
2153 IF (compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter)) THEN
2154! Finish Gamma_PQ
2155 CALL parallel_gemm(transa="N", transb="T", m=dimen_ri, n=dimen_ri, k=dimen_ri, alpha=1.0_dp, &
2156 matrix_a=fm_pq, matrix_b=fm_pq_half, beta=0.0_dp, &
2157 matrix_c=fm_work_pq)
2158
2159 CALL parallel_gemm(transa="N", transb="N", m=dimen_ri, n=dimen_ri, k=dimen_ri, alpha=-my_scale, &
2160 matrix_a=fm_pq_half, matrix_b=fm_work_pq, beta=0.0_dp, &
2161 matrix_c=fm_pq)
2162
2163 CALL cp_fm_release(fm_work_pq)
2164 ELSE
2165 CALL parallel_gemm(transa="N", transb="T", m=dimen_ri, n=dimen_ri, k=dimen_ri, alpha=1.0_dp, &
2166 matrix_a=fm_pq, matrix_b=operator_half, beta=0.0_dp, &
2167 matrix_c=fm_work_pq)
2168
2169 CALL parallel_gemm(transa="N", transb="N", m=dimen_ri, n=dimen_ri, k=dimen_ri, alpha=my_scale, &
2170 matrix_a=operator_half, matrix_b=fm_work_pq, beta=0.0_dp, &
2171 matrix_c=fm_pq)
2172 CALL cp_fm_release(operator_half)
2173
2174 CALL cp_fm_create(fm_work_pq_2, fm_pq%matrix_struct, name="fm_Gamma_PQ_2")
2175 CALL parallel_gemm(transa="N", transb="N", m=dimen_ri, n=dimen_ri, k=dimen_ri, alpha=-my_scale, &
2176 matrix_a=fm_pq_half, matrix_b=fm_work_pq, beta=0.0_dp, &
2177 matrix_c=fm_work_pq_2)
2178 CALL cp_fm_to_fm(fm_work_pq_2, fm_pq_2)
2179 CALL cp_fm_geadd(1.0_dp, "T", fm_work_pq_2, 1.0_dp, fm_pq_2)
2180 CALL cp_fm_release(fm_work_pq_2)
2181 CALL cp_fm_release(fm_work_pq)
2182 END IF
2183
2184 ALLOCATE (mp2_env%ri_grad%Gamma_PQ(my_p_size, my_group_l_size))
2185 CALL fm2array(mp2_env%ri_grad%Gamma_PQ, &
2186 my_p_size, my_p_start, my_p_end, &
2187 my_group_l_size, my_group_l_start, my_group_l_end, &
2188 group_grid_2_mepos, mepos_2_grid_group, &
2189 para_env_sub%num_pe, ngroup, &
2190 fm_pq)
2191
2192 IF (.NOT. compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter)) THEN
2193 ALLOCATE (mp2_env%ri_grad%Gamma_PQ_2(my_p_size, my_group_l_size))
2194 CALL fm2array(mp2_env%ri_grad%Gamma_PQ_2, my_p_size, my_p_start, my_p_end, &
2195 my_group_l_size, my_group_l_start, my_group_l_end, &
2196 group_grid_2_mepos, mepos_2_grid_group, &
2197 para_env_sub%num_pe, ngroup, &
2198 fm_pq_2)
2199 END IF
2200
2201! Now, Gamma_Pia
2202 ALLOCATE (mp2_env%ri_grad%G_P_ia(my_group_l_size, nspins))
2203 DO ispin = 1, nspins
2204 DO iib = 1, my_group_l_size
2205 NULLIFY (mp2_env%ri_grad%G_P_ia(iib, ispin)%matrix)
2206 END DO
2207 END DO
2208
2209 ! Redistribute the Y matrix
2210 DO ispin = 1, nspins
2211 ! Collect all data of columns for the own sub group locally
2212 CALL cp_fm_get_info(rpa_grad%fm_Y(ispin), ncol_global=dimen_ia)
2213
2214 CALL get_qs_env(qs_env, blacs_env=blacs_env)
2215
2216 NULLIFY (fm_struct)
2217 CALL cp_fm_struct_create(fm_struct, template_fmstruct=fm_pq_half%matrix_struct, nrow_global=dimen_ia)
2218 CALL cp_fm_create(fm_y, fm_struct)
2219 CALL cp_fm_struct_release(fm_struct)
2220 CALL cp_fm_set_all(fm_y, 0.0_dp)
2221
2222 CALL dereplicate_and_sum_fm(rpa_grad%fm_Y(ispin), fm_y)
2223
2224 CALL cp_fm_create(fm_g_p_ia, fm_y%matrix_struct)
2225 CALL cp_fm_set_all(fm_g_p_ia, 0.0_dp)
2226
2227 CALL parallel_gemm(transa="N", transb="T", m=dimen_ia, n=dimen_ri, k=dimen_ri, alpha=my_scale, &
2228 matrix_a=fm_y, matrix_b=fm_pq_half, beta=0.0_dp, &
2229 matrix_c=fm_g_p_ia)
2230
2231 CALL cp_fm_release(fm_y)
2232
2233 CALL create_group_dist(gd_ia, para_env_sub%num_pe, dimen_ia)
2234 CALL get_group_dist(gd_ia, para_env_sub%mepos, my_ia_start, my_ia_end, my_ia_size)
2235
2236 CALL fm2array(gamma_2d, my_ia_size, my_ia_start, my_ia_end, &
2237 my_group_l_size, my_group_l_start, my_group_l_end, &
2238 group_grid_2_mepos, mepos_2_grid_group, &
2239 para_env_sub%num_pe, ngroup, &
2240 fm_g_p_ia)
2241
2242 ! create the Gamma_ia_P in DBCSR style
2243 CALL create_dbcsr_gamma(gamma_2d, homo(ispin), virtual(ispin), dimen_ia, para_env_sub, &
2244 my_ia_start, my_ia_end, my_group_l_size, gd_ia, &
2245 mp2_env%ri_grad%G_P_ia(:, ispin), mp2_env%ri_grad%mo_coeff_o(ispin)%matrix)
2246
2247 CALL release_group_dist(gd_ia)
2248
2249 END DO
2250 DEALLOCATE (rpa_grad%fm_Y)
2251 CALL cp_fm_release(fm_pq_half)
2252
2253 CALL timestop(handle)
2254
2255 END SUBROUTINE rpa_grad_finalize
2256
2257! **************************************************************************************************
2258!> \brief ...
2259!> \param sos_mp2_work_occ ...
2260!> \param sos_mp2_work_virt ...
2261!> \param para_env ...
2262!> \param para_env_sub ...
2263!> \param homo ...
2264!> \param virtual ...
2265!> \param mp2_env ...
2266! **************************************************************************************************
2267 SUBROUTINE sos_mp2_grad_finalize(sos_mp2_work_occ, sos_mp2_work_virt, para_env, para_env_sub, homo, virtual, mp2_env)
2268 TYPE(sos_mp2_grad_work_type), ALLOCATABLE, &
2269 DIMENSION(:), INTENT(INOUT) :: sos_mp2_work_occ, sos_mp2_work_virt
2270 TYPE(mp_para_env_type), INTENT(IN), POINTER :: para_env, para_env_sub
2271 INTEGER, DIMENSION(:), INTENT(IN) :: homo, virtual
2272 TYPE(mp2_type), INTENT(INOUT) :: mp2_env
2273
2274 CHARACTER(LEN=*), PARAMETER :: routinen = 'sos_mp2_grad_finalize'
2275
2276 INTEGER :: ab_counter, handle, ij_counter, ispin, &
2277 itmp(2), my_a, my_b, my_b_end, &
2278 my_b_size, my_b_start, my_i, my_j, &
2279 nspins, pcol
2280 REAL(kind=dp) :: my_scale
2281
2282 CALL timeset(routinen, handle)
2283
2284 nspins = SIZE(sos_mp2_work_occ)
2285 my_scale = mp2_env%scale_s
2286
2287 DO ispin = 1, nspins
2288 DO pcol = 0, SIZE(sos_mp2_work_occ(ispin)%index2send, 1) - 1
2289 IF (ALLOCATED(sos_mp2_work_occ(ispin)%index2send(pcol)%array)) &
2290 DEALLOCATE (sos_mp2_work_occ(ispin)%index2send(pcol)%array)
2291 IF (ALLOCATED(sos_mp2_work_occ(ispin)%index2send(pcol)%array)) &
2292 DEALLOCATE (sos_mp2_work_occ(ispin)%index2send(pcol)%array)
2293 IF (ALLOCATED(sos_mp2_work_virt(ispin)%index2recv(pcol)%array)) &
2294 DEALLOCATE (sos_mp2_work_virt(ispin)%index2recv(pcol)%array)
2295 IF (ALLOCATED(sos_mp2_work_virt(ispin)%index2recv(pcol)%array)) &
2296 DEALLOCATE (sos_mp2_work_virt(ispin)%index2recv(pcol)%array)
2297 END DO
2298 DEALLOCATE (sos_mp2_work_occ(ispin)%index2send, &
2299 sos_mp2_work_occ(ispin)%index2recv, &
2300 sos_mp2_work_virt(ispin)%index2send, &
2301 sos_mp2_work_virt(ispin)%index2recv)
2302 END DO
2303
2304 ! Sum P_ij and P_ab and redistribute them
2305 DO ispin = 1, nspins
2306 CALL para_env%sum(sos_mp2_work_occ(ispin)%P)
2307
2308 ALLOCATE (mp2_env%ri_grad%P_ij(ispin)%array(homo(ispin), homo(ispin)))
2309 mp2_env%ri_grad%P_ij(ispin)%array = 0.0_dp
2310 DO my_i = 1, homo(ispin)
2311 mp2_env%ri_grad%P_ij(ispin)%array(my_i, my_i) = my_scale*sos_mp2_work_occ(ispin)%P(my_i)
2312 END DO
2313 DO ij_counter = 1, SIZE(sos_mp2_work_occ(ispin)%pair_list, 2)
2314 my_i = sos_mp2_work_occ(ispin)%pair_list(1, ij_counter)
2315 my_j = sos_mp2_work_occ(ispin)%pair_list(2, ij_counter)
2316
2317 mp2_env%ri_grad%P_ij(ispin)%array(my_i, my_j) = my_scale*sos_mp2_work_occ(ispin)%P(homo(ispin) + ij_counter)
2318 END DO
2319 DEALLOCATE (sos_mp2_work_occ(ispin)%P, sos_mp2_work_occ(ispin)%pair_list)
2320
2321 ! Symmetrize P_ij
2322 mp2_env%ri_grad%P_ij(ispin)%array(:, :) = 0.5_dp*(mp2_env%ri_grad%P_ij(ispin)%array + &
2323 transpose(mp2_env%ri_grad%P_ij(ispin)%array))
2324
2325 ! The first index of P_ab has to be distributed within the subgroups, so sum it up first and add the required elements later
2326 CALL para_env%sum(sos_mp2_work_virt(ispin)%P)
2327
2328 itmp = get_limit(virtual(ispin), para_env_sub%num_pe, para_env_sub%mepos)
2329 my_b_size = itmp(2) - itmp(1) + 1
2330 my_b_start = itmp(1)
2331 my_b_end = itmp(2)
2332
2333 ALLOCATE (mp2_env%ri_grad%P_ab(ispin)%array(my_b_size, virtual(ispin)))
2334 mp2_env%ri_grad%P_ab(ispin)%array = 0.0_dp
2335 DO my_a = itmp(1), itmp(2)
2336 mp2_env%ri_grad%P_ab(ispin)%array(my_a - itmp(1) + 1, my_a) = my_scale*sos_mp2_work_virt(ispin)%P(my_a)
2337 END DO
2338 DO ab_counter = 1, SIZE(sos_mp2_work_virt(ispin)%pair_list, 2)
2339 my_a = sos_mp2_work_virt(ispin)%pair_list(1, ab_counter)
2340 my_b = sos_mp2_work_virt(ispin)%pair_list(2, ab_counter)
2341
2342 IF (my_a >= itmp(1) .AND. my_a <= itmp(2)) mp2_env%ri_grad%P_ab(ispin)%array(my_a - itmp(1) + 1, my_b) = &
2343 my_scale*sos_mp2_work_virt(ispin)%P(virtual(ispin) + ab_counter)
2344 END DO
2345
2346 DEALLOCATE (sos_mp2_work_virt(ispin)%P, sos_mp2_work_virt(ispin)%pair_list)
2347
2348 ! Symmetrize P_ab
2349 IF (para_env_sub%num_pe > 1) THEN
2350 block
2351 INTEGER :: send_a_start, send_a_end, send_a_size, &
2352 recv_a_start, recv_a_end, recv_a_size, proc_shift, proc_send, proc_recv
2353 REAL(kind=dp), DIMENSION(:), ALLOCATABLE, TARGET :: buffer_send_1d
2354 REAL(kind=dp), DIMENSION(:, :), POINTER :: buffer_send
2355 REAL(kind=dp), DIMENSION(:, :), ALLOCATABLE :: buffer_recv
2356 TYPE(group_dist_d1_type) :: gd_virtual_sub
2357
2358 CALL create_group_dist(gd_virtual_sub, para_env_sub%num_pe, virtual(ispin))
2359
2360 mp2_env%ri_grad%P_ab(ispin)%array(:, my_b_start:my_b_end) = &
2361 0.5_dp*(mp2_env%ri_grad%P_ab(ispin)%array(:, my_b_start:my_b_end) &
2362 + transpose(mp2_env%ri_grad%P_ab(ispin)%array(:, my_b_start:my_b_end)))
2363
2364 ALLOCATE (buffer_send_1d(my_b_size*maxsize(gd_virtual_sub)))
2365 ALLOCATE (buffer_recv(my_b_size, maxsize(gd_virtual_sub)))
2366
2367 DO proc_shift = 1, para_env_sub%num_pe - 1
2368
2369 proc_send = modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
2370 proc_recv = modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
2371
2372 CALL get_group_dist(gd_virtual_sub, proc_send, send_a_start, send_a_end, send_a_size)
2373 CALL get_group_dist(gd_virtual_sub, proc_recv, recv_a_start, recv_a_end, recv_a_size)
2374
2375 buffer_send(1:send_a_size, 1:my_b_size) => buffer_send_1d(1:my_b_size*send_a_size)
2376
2377 buffer_send(:send_a_size, :) = transpose(mp2_env%ri_grad%P_ab(ispin)%array(:, send_a_start:send_a_end))
2378 CALL para_env_sub%sendrecv(buffer_send(:send_a_size, :), proc_send, &
2379 buffer_recv(:, :recv_a_size), proc_recv)
2380
2381 mp2_env%ri_grad%P_ab(ispin)%array(:, recv_a_start:recv_a_end) = &
2382 0.5_dp*(mp2_env%ri_grad%P_ab(ispin)%array(:, recv_a_start:recv_a_end) + buffer_recv(:, 1:recv_a_size))
2383
2384 END DO
2385
2386 DEALLOCATE (buffer_send_1d, buffer_recv)
2387
2388 CALL release_group_dist(gd_virtual_sub)
2389 END block
2390 ELSE
2391 mp2_env%ri_grad%P_ab(ispin)%array(:, :) = 0.5_dp*(mp2_env%ri_grad%P_ab(ispin)%array + &
2392 transpose(mp2_env%ri_grad%P_ab(ispin)%array))
2393 END IF
2394
2395 END DO
2396 DEALLOCATE (sos_mp2_work_occ, sos_mp2_work_virt)
2397 IF (nspins == 1) THEN
2398 mp2_env%ri_grad%P_ij(1)%array(:, :) = 2.0_dp*mp2_env%ri_grad%P_ij(1)%array
2399 mp2_env%ri_grad%P_ab(1)%array(:, :) = 2.0_dp*mp2_env%ri_grad%P_ab(1)%array
2400 END IF
2401
2402 CALL timestop(handle)
2403
2404 END SUBROUTINE
2405
2406! **************************************************************************************************
2407!> \brief ...
2408!> \param rpa_work ...
2409!> \param mp2_env ...
2410!> \param homo ...
2411!> \param virtual ...
2412!> \param para_env ...
2413!> \param para_env_sub ...
2414! **************************************************************************************************
2415 SUBROUTINE rpa_grad_work_finalize(rpa_work, mp2_env, homo, virtual, para_env, para_env_sub)
2416 TYPE(rpa_grad_work_type), INTENT(INOUT) :: rpa_work
2417 TYPE(mp2_type), INTENT(INOUT) :: mp2_env
2418 INTEGER, DIMENSION(:), INTENT(IN) :: homo, virtual
2419 TYPE(mp_para_env_type), INTENT(IN), POINTER :: para_env, para_env_sub
2420
2421 CHARACTER(LEN=*), PARAMETER :: routinen = 'rpa_grad_work_finalize'
2422
2423 INTEGER :: handle, ispin, itmp(2), my_a_end, my_a_size, my_a_start, my_b_end, my_b_size, &
2424 my_b_start, my_i_end, my_i_size, my_i_start, nspins, proc, proc_recv, proc_send, &
2425 proc_shift, recv_a_end, recv_a_size, recv_a_start, recv_end, recv_start, send_a_end, &
2426 send_a_size, send_a_start, send_end, send_start, size_recv_buffer, size_send_buffer
2427 REAL(kind=dp) :: my_scale
2428 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :) :: buffer_recv, buffer_send
2429 TYPE(group_dist_d1_type) :: gd_a_sub, gd_virtual_sub
2430
2431 CALL timeset(routinen, handle)
2432
2433 nspins = SIZE(homo)
2434 my_scale = mp2_env%ri_rpa%scale_rpa/(2.0_dp*pi)
2435 IF (mp2_env%ri_rpa%minimax_quad) my_scale = my_scale/2.0_dp
2436
2437 CALL cp_fm_release(rpa_work%fm_mat_Q_copy)
2438
2439 DO ispin = 1, nspins
2440 DO proc = 0, SIZE(rpa_work%index2send, 1) - 1
2441 IF (ALLOCATED(rpa_work%index2send(proc, ispin)%array)) DEALLOCATE (rpa_work%index2send(proc, ispin)%array)
2442 IF (ALLOCATED(rpa_work%index2recv(proc, ispin)%array)) DEALLOCATE (rpa_work%index2recv(proc, ispin)%array)
2443 END DO
2444 END DO
2445 DEALLOCATE (rpa_work%index2send, rpa_work%index2recv)
2446
2447 DO ispin = 1, nspins
2448 CALL get_group_dist(rpa_work%gd_homo(ispin), rpa_work%mepos(2), my_i_start, my_i_end, my_i_size)
2449 CALL release_group_dist(rpa_work%gd_homo(ispin))
2450
2451 ALLOCATE (mp2_env%ri_grad%P_ij(ispin)%array(homo(ispin), homo(ispin)))
2452 mp2_env%ri_grad%P_ij(ispin)%array = 0.0_dp
2453 mp2_env%ri_grad%P_ij(ispin)%array(my_i_start:my_i_end, :) = my_scale*rpa_work%P_ij(ispin)%array
2454 DEALLOCATE (rpa_work%P_ij(ispin)%array)
2455 CALL para_env%sum(mp2_env%ri_grad%P_ij(ispin)%array)
2456
2457 ! Symmetrize P_ij
2458 mp2_env%ri_grad%P_ij(ispin)%array(:, :) = 0.5_dp*(mp2_env%ri_grad%P_ij(ispin)%array + &
2459 transpose(mp2_env%ri_grad%P_ij(ispin)%array))
2460
2461 itmp = get_limit(virtual(ispin), para_env_sub%num_pe, para_env_sub%mepos)
2462 my_b_start = itmp(1)
2463 my_b_end = itmp(2)
2464 my_b_size = my_b_end - my_b_start + 1
2465
2466 ALLOCATE (mp2_env%ri_grad%P_ab(ispin)%array(my_b_size, virtual(ispin)))
2467 mp2_env%ri_grad%P_ab(ispin)%array = 0.0_dp
2468
2469 CALL get_group_dist(rpa_work%gd_virtual(ispin), rpa_work%mepos(1), my_a_start, my_a_end, my_a_size)
2470 CALL release_group_dist(rpa_work%gd_virtual(ispin))
2471 ! This group dist contains the info which parts of Pab a process currently owns
2472 CALL create_group_dist(gd_a_sub, my_a_start, my_a_end, my_a_size, para_env_sub)
2473 ! This group dist contains the info which parts of Pab a process is supposed to own later
2474 CALL create_group_dist(gd_virtual_sub, para_env_sub%num_pe, virtual(ispin))
2475
2476 ! Calculate local indices of the common range of own matrix and send process
2477 send_start = max(1, my_b_start - my_a_start + 1)
2478 send_end = min(my_a_size, my_b_end - my_a_start + 1)
2479
2480 ! Same for recv process but with reverse positions
2481 recv_start = max(1, my_a_start - my_b_start + 1)
2482 recv_end = min(my_b_size, my_a_end - my_b_start + 1)
2483
2484 mp2_env%ri_grad%P_ab(ispin)%array(recv_start:recv_end, :) = &
2485 my_scale*rpa_work%P_ab(ispin)%array(send_start:send_end, :)
2486
2487 IF (para_env_sub%num_pe > 1) THEN
2488 size_send_buffer = 0
2489 size_recv_buffer = 0
2490 DO proc_shift = 1, para_env_sub%num_pe - 1
2491 proc_send = modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
2492 proc_recv = modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
2493
2494 CALL get_group_dist(gd_virtual_sub, proc_send, send_a_start, send_a_end)
2495 CALL get_group_dist(gd_a_sub, proc_recv, recv_a_start, recv_a_end)
2496
2497 ! Calculate local indices of the common range of own matrix and send process
2498 send_start = max(1, send_a_start - my_a_start + 1)
2499 send_end = min(my_a_size, send_a_end - my_a_start + 1)
2500
2501 size_send_buffer = max(size_send_buffer, max(send_end - send_start + 1, 0))
2502
2503 ! Same for recv process but with reverse positions
2504 recv_start = max(1, recv_a_start - my_b_start + 1)
2505 recv_end = min(my_b_size, recv_a_end - my_b_start + 1)
2506
2507 size_recv_buffer = max(size_recv_buffer, max(recv_end - recv_start + 1, 0))
2508 END DO
2509 ALLOCATE (buffer_send(size_send_buffer, virtual(ispin)), buffer_recv(size_recv_buffer, virtual(ispin)))
2510
2511 DO proc_shift = 1, para_env_sub%num_pe - 1
2512 proc_send = modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
2513 proc_recv = modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
2514
2515 CALL get_group_dist(gd_virtual_sub, proc_send, send_a_start, send_a_end)
2516 CALL get_group_dist(gd_a_sub, proc_recv, recv_a_start, recv_a_end)
2517
2518 ! Calculate local indices of the common range of own matrix and send process
2519 send_start = max(1, send_a_start - my_a_start + 1)
2520 send_end = min(my_a_size, send_a_end - my_a_start + 1)
2521 buffer_send(1:max(send_end - send_start + 1, 0), :) = rpa_work%P_ab(ispin)%array(send_start:send_end, :)
2522
2523 ! Same for recv process but with reverse positions
2524 recv_start = max(1, recv_a_start - my_b_start + 1)
2525 recv_end = min(my_b_size, recv_a_end - my_b_start + 1)
2526
2527 CALL para_env_sub%sendrecv(buffer_send(1:max(send_end - send_start + 1, 0), :), proc_send, &
2528 buffer_recv(1:max(recv_end - recv_start + 1, 0), :), proc_recv)
2529
2530 mp2_env%ri_grad%P_ab(ispin)%array(recv_start:recv_end, :) = &
2531 mp2_env%ri_grad%P_ab(ispin)%array(recv_start:recv_end, :) + &
2532 my_scale*buffer_recv(1:max(recv_end - recv_start + 1, 0), :)
2533
2534 END DO
2535
2536 IF (ALLOCATED(buffer_send)) DEALLOCATE (buffer_send)
2537 IF (ALLOCATED(buffer_recv)) DEALLOCATE (buffer_recv)
2538 END IF
2539 DEALLOCATE (rpa_work%P_ab(ispin)%array)
2540
2541 CALL release_group_dist(gd_a_sub)
2542
2543 block
2544 TYPE(mp_comm_type) :: comm_exchange
2545 CALL comm_exchange%from_split(para_env, para_env_sub%mepos)
2546 CALL comm_exchange%sum(mp2_env%ri_grad%P_ab(ispin)%array)
2547 CALL comm_exchange%free()
2548 END block
2549
2550 ! Symmetrize P_ab
2551 IF (para_env_sub%num_pe > 1) THEN
2552 block
2553 REAL(kind=dp), DIMENSION(:), ALLOCATABLE, TARGET :: buffer_send_1d
2554 REAL(kind=dp), DIMENSION(:, :), POINTER :: buffer_send
2555 REAL(kind=dp), DIMENSION(:, :), ALLOCATABLE :: buffer_recv
2556
2557 mp2_env%ri_grad%P_ab(ispin)%array(:, my_b_start:my_b_end) = &
2558 0.5_dp*(mp2_env%ri_grad%P_ab(ispin)%array(:, my_b_start:my_b_end) &
2559 + transpose(mp2_env%ri_grad%P_ab(ispin)%array(:, my_b_start:my_b_end)))
2560
2561 ALLOCATE (buffer_send_1d(my_b_size*maxsize(gd_virtual_sub)))
2562 ALLOCATE (buffer_recv(my_b_size, maxsize(gd_virtual_sub)))
2563
2564 DO proc_shift = 1, para_env_sub%num_pe - 1
2565
2566 proc_send = modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
2567 proc_recv = modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
2568
2569 CALL get_group_dist(gd_virtual_sub, proc_send, send_a_start, send_a_end, send_a_size)
2570 CALL get_group_dist(gd_virtual_sub, proc_recv, recv_a_start, recv_a_end, recv_a_size)
2571
2572 buffer_send(1:send_a_size, 1:my_b_size) => buffer_send_1d(1:my_b_size*send_a_size)
2573
2574 buffer_send(:send_a_size, :) = transpose(mp2_env%ri_grad%P_ab(ispin)%array(:, send_a_start:send_a_end))
2575 CALL para_env_sub%sendrecv(buffer_send(:send_a_size, :), proc_send, &
2576 buffer_recv(:, :recv_a_size), proc_recv)
2577
2578 mp2_env%ri_grad%P_ab(ispin)%array(:, recv_a_start:recv_a_end) = &
2579 0.5_dp*(mp2_env%ri_grad%P_ab(ispin)%array(:, recv_a_start:recv_a_end) + buffer_recv(:, 1:recv_a_size))
2580
2581 END DO
2582
2583 DEALLOCATE (buffer_send_1d, buffer_recv)
2584 END block
2585 ELSE
2586 mp2_env%ri_grad%P_ab(ispin)%array(:, :) = 0.5_dp*(mp2_env%ri_grad%P_ab(ispin)%array + &
2587 transpose(mp2_env%ri_grad%P_ab(ispin)%array))
2588 END IF
2589
2590 CALL release_group_dist(gd_virtual_sub)
2591
2592 END DO
2593 DEALLOCATE (rpa_work%gd_homo, rpa_work%gd_virtual, rpa_work%P_ij, rpa_work%P_ab)
2594 IF (nspins == 1) THEN
2595 mp2_env%ri_grad%P_ij(1)%array(:, :) = 2.0_dp*mp2_env%ri_grad%P_ij(1)%array
2596 mp2_env%ri_grad%P_ab(1)%array(:, :) = 2.0_dp*mp2_env%ri_grad%P_ab(1)%array
2597 END IF
2598
2599 CALL timestop(handle)
2600 END SUBROUTINE
2601
2602! **************************************************************************************************
2603!> \brief Dereplicate data from fm_sub and collect in fm_global, overlapping data will be added
2604!> \param fm_sub replicated matrix, all subgroups have the same size, will be release on output
2605!> \param fm_global global matrix, on output it will contain the sum of the replicated matrices redistributed
2606! **************************************************************************************************
2607 SUBROUTINE dereplicate_and_sum_fm(fm_sub, fm_global)
2608 TYPE(cp_fm_type), INTENT(INOUT) :: fm_sub, fm_global
2609
2610 CHARACTER(LEN=*), PARAMETER :: routinen = 'dereplicate_and_sum_fm'
2611
2612 INTEGER :: col_local, elements2recv_col, elements2recv_row, elements2send_col, &
2613 elements2send_row, first_p_pos_global(2), first_p_pos_sub(2), handle, handle2, &
2614 mypcol_global, myprow_global, ncol_block_global, ncol_block_sub, ncol_local_global, &
2615 ncol_local_sub, npcol_global, npcol_sub, nprow_global, nprow_sub, nrow_block_global, &
2616 nrow_block_sub, nrow_local_global, nrow_local_sub, pcol_recv, pcol_send, proc_recv, &
2617 proc_send, proc_send_global, proc_shift, prow_recv, prow_send, row_local, tag
2618 INTEGER(int_8) :: size_recv_buffer, size_send_buffer
2619 INTEGER, ALLOCATABLE, DIMENSION(:) :: data2recv_col, data2recv_row, &
2620 data2send_col, data2send_row, &
2621 subgroup2mepos
2622 INTEGER, DIMENSION(:), POINTER :: col_indices_global, col_indices_sub, &
2623 row_indices_global, row_indices_sub
2624 INTEGER, DIMENSION(:, :), POINTER :: blacs2mpi_global, blacs2mpi_sub, &
2625 mpi2blacs_global, mpi2blacs_sub
2626 REAL(kind=dp), ALLOCATABLE, DIMENSION(:), TARGET :: recv_buffer_1d, send_buffer_1d
2627 REAL(kind=dp), DIMENSION(:, :), POINTER :: recv_buffer, send_buffer
2628 TYPE(mp_para_env_type), POINTER :: para_env, para_env_sub
2629 TYPE(one_dim_int_array), ALLOCATABLE, DIMENSION(:) :: index2recv_col, index2recv_row, &
2630 index2send_col, index2send_row
2631
2632 CALL timeset(routinen, handle)
2633
2634 tag = 1
2635
2636 nprow_sub = fm_sub%matrix_struct%context%num_pe(1)
2637 npcol_sub = fm_sub%matrix_struct%context%num_pe(2)
2638
2639 myprow_global = fm_global%matrix_struct%context%mepos(1)
2640 mypcol_global = fm_global%matrix_struct%context%mepos(2)
2641 nprow_global = fm_global%matrix_struct%context%num_pe(1)
2642 npcol_global = fm_global%matrix_struct%context%num_pe(2)
2643
2644 CALL cp_fm_get_info(fm_sub, col_indices=col_indices_sub, row_indices=row_indices_sub, &
2645 nrow_local=nrow_local_sub, ncol_local=ncol_local_sub)
2646 CALL cp_fm_struct_get(fm_sub%matrix_struct, para_env=para_env_sub, first_p_pos=first_p_pos_sub, &
2647 nrow_block=nrow_block_sub, ncol_block=ncol_block_sub)
2648 CALL cp_fm_struct_get(fm_global%matrix_struct, first_p_pos=first_p_pos_global, nrow_block=nrow_block_global, &
2649 ncol_block=ncol_block_global, para_env=para_env, &
2650 col_indices=col_indices_global, row_indices=row_indices_global, &
2651 nrow_local=nrow_local_global, ncol_local=ncol_local_global)
2652 CALL fm_sub%matrix_struct%context%get(blacs2mpi=blacs2mpi_sub, mpi2blacs=mpi2blacs_sub)
2653 CALL fm_global%matrix_struct%context%get(blacs2mpi=blacs2mpi_global, mpi2blacs=mpi2blacs_global)
2654
2655 IF (para_env%num_pe /= para_env_sub%num_pe) THEN
2656 block
2657 TYPE(mp_comm_type) :: comm_exchange
2658 comm_exchange = fm_sub%matrix_struct%context%interconnect(para_env)
2659 CALL comm_exchange%sum(fm_sub%local_data)
2660 CALL comm_exchange%free()
2661 END block
2662 END IF
2663
2664 ALLOCATE (subgroup2mepos(0:para_env_sub%num_pe - 1))
2665 CALL para_env_sub%allgather(para_env%mepos, subgroup2mepos)
2666
2667 CALL timeset(routinen//"_data2", handle2)
2668 ! Create a map how much data has to be sent to what process coordinate, interchange rows and columns to transpose the matrices
2669 CALL get_elements2send(data2send_col, npcol_global, row_indices_sub, ncol_block_global, first_p_pos_global(2), index2send_col)
2670 CALL get_elements2send(data2send_row, nprow_global, col_indices_sub, nrow_block_global, first_p_pos_global(1), index2send_row)
2671
2672 ! Create a map how much data has to be sent to what process coordinate, interchange rows and columns to transpose the matrices
2673 ! Do the reverse for the recieve processes
2674 CALL get_elements2send(data2recv_col, npcol_sub, row_indices_global, ncol_block_sub, first_p_pos_sub(2), index2recv_col)
2675 CALL get_elements2send(data2recv_row, nprow_sub, col_indices_global, nrow_block_sub, first_p_pos_sub(1), index2recv_row)
2676 CALL timestop(handle2)
2677
2678 CALL timeset(routinen//"_local", handle2)
2679 ! Loop over local data and transpose
2680 prow_send = mpi2blacs_global(1, para_env%mepos)
2681 pcol_send = mpi2blacs_global(2, para_env%mepos)
2682 prow_recv = mpi2blacs_sub(1, para_env_sub%mepos)
2683 pcol_recv = mpi2blacs_sub(2, para_env_sub%mepos)
2684 elements2recv_col = data2recv_col(pcol_recv)
2685 elements2recv_row = data2recv_row(prow_recv)
2686
2687!$OMP PARALLEL DO DEFAULT(NONE) PRIVATE(row_local,col_local) &
2688!$OMP SHARED(elements2recv_col,elements2recv_row,recv_buffer,fm_global,&
2689!$OMP index2recv_col,index2recv_row,pcol_recv,prow_recv, &
2690!$OMP fm_sub,index2send_col,index2send_row,pcol_send,prow_send)
2691 DO col_local = 1, elements2recv_col
2692 DO row_local = 1, elements2recv_row
2693 fm_global%local_data(index2recv_col(pcol_recv)%array(col_local), &
2694 index2recv_row(prow_recv)%array(row_local)) &
2695 = fm_sub%local_data(index2send_col(pcol_send)%array(row_local), &
2696 index2send_row(prow_send)%array(col_local))
2697 END DO
2698 END DO
2699!$OMP END PARALLEL DO
2700 CALL timestop(handle2)
2701
2702 IF (para_env_sub%num_pe > 1) THEN
2703 size_send_buffer = 0_int_8
2704 size_recv_buffer = 0_int_8
2705 ! Loop over all processes in para_env_sub
2706 DO proc_shift = 1, para_env_sub%num_pe - 1
2707 proc_send = modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
2708 proc_recv = modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
2709
2710 proc_send_global = subgroup2mepos(proc_send)
2711 prow_send = mpi2blacs_global(1, proc_send_global)
2712 pcol_send = mpi2blacs_global(2, proc_send_global)
2713 elements2send_col = data2send_col(pcol_send)
2714 elements2send_row = data2send_row(prow_send)
2715
2716 size_send_buffer = max(size_send_buffer, int(elements2send_col, int_8)*elements2send_row)
2717
2718 prow_recv = mpi2blacs_sub(1, proc_recv)
2719 pcol_recv = mpi2blacs_sub(2, proc_recv)
2720 elements2recv_col = data2recv_col(pcol_recv)
2721 elements2recv_row = data2recv_row(prow_recv)
2722
2723 size_recv_buffer = max(size_recv_buffer, int(elements2recv_col, int_8)*elements2recv_row)
2724 END DO
2725 ALLOCATE (send_buffer_1d(size_send_buffer), recv_buffer_1d(size_recv_buffer))
2726
2727 ! Loop over all processes in para_env_sub
2728 DO proc_shift = 1, para_env_sub%num_pe - 1
2729 proc_send = modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
2730 proc_recv = modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
2731
2732 proc_send_global = subgroup2mepos(proc_send)
2733 prow_send = mpi2blacs_global(1, proc_send_global)
2734 pcol_send = mpi2blacs_global(2, proc_send_global)
2735 elements2send_col = data2send_col(pcol_send)
2736 elements2send_row = data2send_row(prow_send)
2737
2738 CALL timeset(routinen//"_pack", handle2)
2739 ! Loop over local data and pack the buffer
2740 ! Transpose the matrix already
2741 send_buffer(1:elements2send_row, 1:elements2send_col) => send_buffer_1d(1:int(elements2send_row, int_8)*elements2send_col)
2742!$OMP PARALLEL DO DEFAULT(NONE) PRIVATE(row_local,col_local) &
2743!$OMP SHARED(elements2send_col,elements2send_row,send_buffer,fm_sub,&
2744!$OMP index2send_col,index2send_row,pcol_send,prow_send)
2745 DO row_local = 1, elements2send_col
2746 DO col_local = 1, elements2send_row
2747 send_buffer(col_local, row_local) = &
2748 fm_sub%local_data(index2send_col(pcol_send)%array(row_local), &
2749 index2send_row(prow_send)%array(col_local))
2750 END DO
2751 END DO
2752!$OMP END PARALLEL DO
2753 CALL timestop(handle2)
2754
2755 prow_recv = mpi2blacs_sub(1, proc_recv)
2756 pcol_recv = mpi2blacs_sub(2, proc_recv)
2757 elements2recv_col = data2recv_col(pcol_recv)
2758 elements2recv_row = data2recv_row(prow_recv)
2759
2760 ! Send data
2761 recv_buffer(1:elements2recv_col, 1:elements2recv_row) => recv_buffer_1d(1:int(elements2recv_row, int_8)*elements2recv_col)
2762 IF (SIZE(recv_buffer) > 0_int_8) THEN
2763 IF (SIZE(send_buffer) > 0_int_8) THEN
2764 CALL para_env_sub%sendrecv(send_buffer, proc_send, recv_buffer, proc_recv, tag)
2765 ELSE
2766 CALL para_env_sub%recv(recv_buffer, proc_recv, tag)
2767 END IF
2768
2769 CALL timeset(routinen//"_unpack", handle2)
2770!$OMP PARALLEL DO DEFAULT(NONE) PRIVATE(row_local,col_local) &
2771!$OMP SHARED(elements2recv_col,elements2recv_row,recv_buffer,fm_global,&
2772!$OMP index2recv_col,index2recv_row,pcol_recv,prow_recv)
2773 DO col_local = 1, elements2recv_col
2774 DO row_local = 1, elements2recv_row
2775 fm_global%local_data(index2recv_col(pcol_recv)%array(col_local), &
2776 index2recv_row(prow_recv)%array(row_local)) &
2777 = recv_buffer(col_local, row_local)
2778 END DO
2779 END DO
2780!$OMP END PARALLEL DO
2781 CALL timestop(handle2)
2782 ELSE IF (SIZE(send_buffer) > 0_int_8) THEN
2783 CALL para_env_sub%send(send_buffer, proc_send, tag)
2784 END IF
2785 END DO
2786 END IF
2787
2788 DEALLOCATE (data2send_col, data2send_row, data2recv_col, data2recv_row)
2789 DO proc_shift = 0, npcol_global - 1
2790 DEALLOCATE (index2send_col(proc_shift)%array)
2791 END DO
2792 DO proc_shift = 0, npcol_sub - 1
2793 DEALLOCATE (index2recv_col(proc_shift)%array)
2794 END DO
2795 DO proc_shift = 0, nprow_global - 1
2796 DEALLOCATE (index2send_row(proc_shift)%array)
2797 END DO
2798 DO proc_shift = 0, nprow_sub - 1
2799 DEALLOCATE (index2recv_row(proc_shift)%array)
2800 END DO
2801 DEALLOCATE (index2send_col, index2recv_col, index2send_row, index2recv_row)
2802
2803 CALL cp_fm_release(fm_sub)
2804
2805 CALL timestop(handle)
2806
2807 END SUBROUTINE dereplicate_and_sum_fm
2808
2809! **************************************************************************************************
2810!> \brief ...
2811!> \param data2send ...
2812!> \param np_global ...
2813!> \param indices_sub ...
2814!> \param n_block_global ...
2815!> \param first_p_pos_global ...
2816!> \param index2send ...
2817! **************************************************************************************************
2818 SUBROUTINE get_elements2send(data2send, np_global, indices_sub, n_block_global, first_p_pos_global, index2send)
2819 INTEGER, ALLOCATABLE, DIMENSION(:), INTENT(OUT) :: data2send
2820 INTEGER, INTENT(IN) :: np_global
2821 INTEGER, DIMENSION(:), INTENT(IN) :: indices_sub
2822 INTEGER, INTENT(IN) :: n_block_global, first_p_pos_global
2823 TYPE(one_dim_int_array), ALLOCATABLE, &
2824 DIMENSION(:), INTENT(OUT) :: index2send
2825
2826 INTEGER :: i_global, i_local, proc
2827
2828 ALLOCATE (data2send(0:np_global - 1))
2829 data2send = 0
2830 DO i_local = 1, SIZE(indices_sub)
2831 i_global = indices_sub(i_local)
2832 proc = cp_fm_indxg2p(i_global, n_block_global, 0, first_p_pos_global, np_global)
2833 data2send(proc) = data2send(proc) + 1
2834 END DO
2835
2836 ALLOCATE (index2send(0:np_global - 1))
2837 DO proc = 0, np_global - 1
2838 ALLOCATE (index2send(proc)%array(data2send(proc)))
2839 ! We want to crash if there is an error
2840 index2send(proc)%array = -1
2841 END DO
2842
2843 data2send = 0
2844 DO i_local = 1, SIZE(indices_sub)
2845 i_global = indices_sub(i_local)
2846 proc = cp_fm_indxg2p(i_global, n_block_global, 0, first_p_pos_global, np_global)
2847 data2send(proc) = data2send(proc) + 1
2848 index2send(proc)%array(data2send(proc)) = i_local
2849 END DO
2850
2851 END SUBROUTINE
2852
2853END MODULE rpa_grad
static GRID_HOST_DEVICE int modulo(int a, int m)
Equivalent of Fortran's MODULO, which always return a positive number. https://gcc....
various utilities that regard array of different kinds: output, allocation,... maybe it is not a good...
methods related to the blacs parallel environment
basic linear algebra operations for full matrices
subroutine, public cp_fm_geadd(alpha, trans, matrix_a, beta, matrix_b)
interface to BLACS geadd: matrix_b = beta*matrix_b + alpha*opt(matrix_a) where opt(matrix_a) can be e...
subroutine, public cp_fm_upper_to_full(matrix, work)
given an upper triangular matrix computes the corresponding full matrix
subroutine, public cp_fm_scale_and_add(alpha, matrix_a, beta, matrix_b)
calc A <- alpha*A + beta*B optimized for alpha == 1.0 (just add beta*B) and beta == 0....
various cholesky decomposition related routines
subroutine, public cp_fm_cholesky_invert(matrix, n, info_out)
used to replace the cholesky decomposition by the inverse
represent the structure of a full matrix
subroutine, public cp_fm_struct_create(fmstruct, para_env, context, nrow_global, ncol_global, nrow_block, ncol_block, descriptor, first_p_pos, local_leading_dimension, template_fmstruct, square_blocks, force_block)
allocates and initializes a full matrix structure
subroutine, public cp_fm_struct_get(fmstruct, para_env, context, descriptor, ncol_block, nrow_block, nrow_global, ncol_global, first_p_pos, row_indices, col_indices, nrow_local, ncol_local, nrow_locals, ncol_locals, local_leading_dimension)
returns the values of various attributes of the matrix structure
subroutine, public cp_fm_struct_release(fmstruct)
releases a full matrix structure
represent a full matrix distributed on many processors
Definition cp_fm_types.F:15
subroutine, public cp_fm_get_info(matrix, name, nrow_global, ncol_global, nrow_block, ncol_block, nrow_local, ncol_local, row_indices, col_indices, local_data, context, nrow_locals, ncol_locals, matrix_struct, para_env)
returns all kind of information about the full matrix
integer function, public cp_fm_indxg2l(indxglob, nb, iproc, isrcproc, nprocs)
wrapper to scalapack function INDXG2L that computes the local index of a distributed matrix entry poi...
subroutine, public cp_fm_to_fm_submat_general(source, destination, nrows, ncols, s_firstrow, s_firstcol, d_firstrow, d_firstcol, global_context)
General copy of a submatrix of fm matrix to a submatrix of another fm matrix. The two matrices can ha...
subroutine, public cp_fm_set_all(matrix, alpha, beta)
set all elements of a matrix to the same value, and optionally the diagonal to a different one
integer function, public cp_fm_indxg2p(indxglob, nb, iproc, isrcproc, nprocs)
wrapper to scalapack function INDXG2P that computes the process coordinate which possesses the entry ...
subroutine, public cp_fm_create(matrix, matrix_struct, name, use_sp)
creates a new full matrix with the given structure
Counters to determine the performance of parallel DGEMMs.
subroutine, public dgemm_counter_start(dgemm_counter)
start timer of the counter
subroutine, public dgemm_counter_stop(dgemm_counter, size1, size2, size3)
stop timer of the counter and provide matrix sizes
Types to describe group distributions.
elemental integer function, public maxsize(this)
...
elemental integer function, public group_dist_proc(this, pos)
...
sums arrays of real/complex numbers with much reduced round-off as compared to a naive implementation...
Definition kahan_sum.F:29
Defines the basic variable types.
Definition kinds.F:23
integer, parameter, public int_8
Definition kinds.F:54
integer, parameter, public dp
Definition kinds.F:34
2- and 3-center electron repulsion integral routines based on libint2 Currently available operators: ...
pure logical function, public compare_potential_types(potential1, potential2)
Helper function to compare libint_potential_types.
subroutine, public local_gemm(opa, opb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, ctx)
...
integer, parameter, public local_gemm_pu_gpu
subroutine, public local_gemm_set_op_threshold_gpu(ctx, opthresholdgpu)
...
subroutine, public local_gemm_destroy(ctx)
release resources associated to a gemm context
subroutine, public local_gemm_create(ctx, pu)
create a context for handling gemm offloading
Machine interface based on Fortran 2003 and POSIX.
Definition machine.F:17
subroutine, public m_memory(mem)
Returns the total amount of memory [bytes] in use, if known, zero otherwise.
Definition machine.F:332
subroutine, public m_flush(lunit)
flushes units if the &GLOBAL flag is set accordingly
Definition machine.F:106
Definition of mathematical constants and functions.
real(kind=dp), parameter, public pi
Interface to the message passing library MPI.
subroutine, public mp_waitany(requests, completed)
waits for completion of any of the given requests
type(mp_request_type), parameter, public mp_request_null
Routines to calculate MP2 energy with laplace approach.
Definition mp2_laplace.F:13
subroutine, public calc_fm_mat_s_laplace(fm_mat_s, homo, virtual, eigenval, dajquad)
...
Definition mp2_laplace.F:39
Routines for calculating RI-MP2 gradients.
subroutine, public prepare_redistribution(para_env, para_env_sub, ngroup, group_grid_2_mepos, mepos_2_grid_group, pos_info)
prepare array for redistribution
subroutine, public array2fm(mat2d, fm_struct, my_start_row, my_end_row, my_start_col, my_end_col, gd_row, gd_col, group_grid_2_mepos, ngroup_row, ngroup_col, fm_mat, integ_group_size, color_group, do_release_mat)
redistribute local part of array to fm
subroutine, public create_dbcsr_gamma(gamma_2d, homo, virtual, dimen_ia, para_env_sub, my_ia_start, my_ia_end, my_group_l_size, gd_ia, dbcsr_gamma, mo_coeff_o)
redistribute 2D representation of 3d tensor to a set of dbcsr matrices
subroutine, public fm2array(mat2d, my_rows, my_start_row, my_end_row, my_cols, my_start_col, my_end_col, group_grid_2_mepos, mepos_2_grid_group, ngroup_row, ngroup_col, fm_mat)
redistribute fm to local part of array
Types needed for MP2 calculations.
Definition mp2_types.F:14
basic linear algebra operations for full matrixes
subroutine, public get_qs_env(qs_env, atomic_kind_set, qs_kind_set, cell, super_cell, cell_ref, use_ref_cell, kpoints, dft_control, mos, sab_orb, sab_all, qmmm, qmmm_periodic, sac_ae, sac_ppl, sac_lri, sap_ppnl, sab_vdw, sab_scp, sap_oce, sab_lrc, sab_se, sab_xtbe, sab_tbe, sab_core, sab_xb, sab_xtb_nonbond, sab_almo, sab_kp, sab_kp_nosym, particle_set, energy, force, matrix_h, matrix_h_im, matrix_ks, matrix_ks_im, matrix_vxc, run_rtp, rtp, matrix_h_kp, matrix_h_im_kp, matrix_ks_kp, matrix_ks_im_kp, matrix_vxc_kp, kinetic_kp, matrix_s_kp, matrix_w_kp, matrix_s_ri_aux_kp, matrix_s, matrix_s_ri_aux, matrix_w, matrix_p_mp2, matrix_p_mp2_admm, rho, rho_xc, pw_env, ewald_env, ewald_pw, active_space, mpools, input, para_env, blacs_env, scf_control, rel_control, kinetic, qs_charges, vppl, rho_core, rho_nlcc, rho_nlcc_g, ks_env, ks_qmmm_env, wf_history, scf_env, local_particles, local_molecules, distribution_2d, dbcsr_dist, molecule_kind_set, molecule_set, subsys, cp_subsys, oce, local_rho_set, rho_atom_set, task_list, task_list_soft, rho0_atom_set, rho0_mpole, rhoz_set, ecoul_1c, rho0_s_rs, rho0_s_gs, do_kpoints, has_unit_metric, requires_mo_derivs, mo_derivs, mo_loc_history, nkind, natom, nelectron_total, nelectron_spin, efield, neighbor_list_id, linres_control, xas_env, virial, cp_ddapc_env, cp_ddapc_ewald, outer_scf_history, outer_scf_ihistory, x_data, et_coupling, dftb_potential, results, se_taper, se_store_int_env, se_nddo_mpole, se_nonbond_env, admm_env, lri_env, lri_density, exstate_env, ec_env, dispersion_env, gcp_env, vee, rho_external, external_vxc, mask, mp2_env, bs_env, kg_env, wanniercentres, atprop, ls_scf_env, do_transport, transport_env, v_hartree_rspace, s_mstruct_changed, rho_changed, potential_changed, forces_up_to_date, mscfg_env, almo_scf_env, gradient_history, variable_history, embed_pot, spin_embed_pot, polar_env, mos_last_converged, rhs)
Get the QUICKSTEP environment.
Routines to calculate RI-RPA and SOS-MP2 gradients.
Definition rpa_grad.F:13
integer, parameter spla_threshold
Definition rpa_grad.F:108
subroutine, public rpa_grad_finalize(rpa_grad, mp2_env, para_env_sub, para_env, qs_env, gd_array, color_sub, do_ri_sos_laplace_mp2, homo, virtual)
...
Definition rpa_grad.F:2042
subroutine, public rpa_grad_copy_q(fm_mat_q, rpa_grad)
...
Definition rpa_grad.F:721
pure subroutine, public rpa_grad_needed_mem(homo, virtual, dimen_ri, mem_per_rank, mem_per_repl, do_ri_sos_laplace_mp2)
Calculates the necessary minimum memory for the Gradient code ion MiB.
Definition rpa_grad.F:124
subroutine, public rpa_grad_create(rpa_grad, fm_mat_q, fm_mat_s, homo, virtual, mp2_env, eigenval, unit_nr, do_ri_sos_laplace_mp2)
Creates the arrays of a rpa_grad_type.
Definition rpa_grad.F:170
subroutine, public rpa_grad_matrix_operations(mp2_env, rpa_grad, do_ri_sos_laplace_mp2, fm_mat_q, fm_mat_q_gemm, dgemm_counter, fm_mat_s, omega, homo, virtual, eigenval, weight, unit_nr)
...
Definition rpa_grad.F:746
Utility functions for RPA calculations.
Definition rpa_util.F:13
subroutine, public remove_scaling_factor_rpa(fm_mat_s, virtual, eigenval_last, homo, omega_old)
...
Definition rpa_util.F:716
subroutine, public calc_fm_mat_s_rpa(fm_mat_s, first_cycle, virtual, eigenval, homo, omega, omega_old)
...
Definition rpa_util.F:766
All kind of helpful little routines.
Definition util.F:14
pure integer function, dimension(2), public get_limit(m, n, me)
divide m entries into n parts, return size of part me
Definition util.F:333
represent a pointer to a contiguous 1d array
represent a pointer to a contiguous 3d array
represent a blacs multidimensional parallel environment (for the mpi corrispective see cp_paratypes/m...
keeps the information about the structure of a full matrix
represent a full matrix
stores all the informations relevant to an mpi environment