72#include "./base/base_uses.f90"
78 CHARACTER(len=*),
PARAMETER,
PRIVATE :: moduleN =
'rpa_grad'
82 TYPE sos_mp2_grad_work_type
84 INTEGER,
DIMENSION(:, :),
ALLOCATABLE :: pair_list
85 TYPE(one_dim_int_array),
DIMENSION(:),
ALLOCATABLE :: index2send, index2recv
86 REAL(KIND=
dp),
DIMENSION(:),
ALLOCATABLE :: p
89 TYPE rpa_grad_work_type
90 TYPE(cp_fm_type) :: fm_mat_Q_copy =
cp_fm_type()
91 TYPE(one_dim_int_array),
DIMENSION(:, :),
ALLOCATABLE :: index2send
92 TYPE(two_dim_int_array),
DIMENSION(:, :),
ALLOCATABLE :: index2recv
93 TYPE(group_dist_d1_type),
DIMENSION(:),
ALLOCATABLE :: gd_homo, gd_virtual
94 INTEGER,
DIMENSION(2) :: grid = -1, mepos = -1
95 TYPE(two_dim_real_array),
DIMENSION(:),
ALLOCATABLE :: P_ij, P_ab
100 TYPE(cp_fm_type) :: fm_Gamma_PQ =
cp_fm_type()
102 TYPE(sos_mp2_grad_work_type),
ALLOCATABLE,
DIMENSION(:) :: sos_mp2_work_occ, sos_mp2_work_virt
103 TYPE(rpa_grad_work_type) :: rpa_work
107 INTEGER,
PARAMETER :: blksize_threshold = 4
121 PURE SUBROUTINE rpa_grad_needed_mem(homo, virtual, dimen_RI, mem_per_rank, mem_per_repl, do_ri_sos_laplace_mp2)
122 INTEGER,
DIMENSION(:),
INTENT(IN) :: homo, virtual
123 INTEGER,
INTENT(IN) :: dimen_ri
124 REAL(kind=
dp),
INTENT(INOUT) :: mem_per_rank, mem_per_repl
125 LOGICAL,
INTENT(IN) :: do_ri_sos_laplace_mp2
127 REAL(kind=
dp) :: mem_iak, mem_kl, mem_pab, mem_pij
129 mem_iak = sum(real(virtual, kind=
dp)*homo)*dimen_ri
130 mem_pij = sum(real(homo, kind=
dp)**2)
131 mem_pab = sum(real(virtual, kind=
dp)**2)
132 mem_kl = real(dimen_ri, kind=
dp)*dimen_ri
146 mem_per_rank = mem_per_rank + (mem_pij + mem_pab)*8.0_dp/(1024**2)
147 mem_per_repl = mem_per_repl + (mem_iak + 2.0_dp*mem_iak/
SIZE(homo) + mem_kl)*8.0_dp/(1024**2)
148 IF (.NOT. do_ri_sos_laplace_mp2)
THEN
149 mem_per_repl = mem_per_rank + (mem_iak/
SIZE(homo) + mem_kl)*8.0_dp/(1024**2)
167 homo, virtual, mp2_env, Eigenval, unit_nr, do_ri_sos_laplace_mp2)
170 TYPE(
cp_fm_type),
DIMENSION(:),
INTENT(IN) :: fm_mat_s
171 INTEGER,
DIMENSION(:),
INTENT(IN) :: homo, virtual
172 TYPE(
mp2_type),
INTENT(INOUT) :: mp2_env
173 REAL(kind=
dp),
DIMENSION(:, :),
INTENT(IN) :: eigenval
174 INTEGER,
INTENT(IN) :: unit_nr
175 LOGICAL,
INTENT(IN) :: do_ri_sos_laplace_mp2
177 CHARACTER(LEN=*),
PARAMETER :: routinen =
'rpa_grad_create'
179 INTEGER :: handle, ispin, nrow_local, nspins
181 CALL timeset(routinen, handle)
186 nspins =
SIZE(fm_mat_s)
193 IF (do_ri_sos_laplace_mp2)
THEN
194 CALL sos_mp2_work_type_create(
rpa_grad%sos_mp2_work_occ,
rpa_grad%sos_mp2_work_virt, &
195 unit_nr, eigenval, homo, virtual, mp2_env%ri_grad%eps_canonical, fm_mat_s)
197 CALL rpa_work_type_create(
rpa_grad%rpa_work, fm_mat_q, fm_mat_s, homo, virtual)
202 IF (mp2_env%ri_grad%dot_blksize < 1) mp2_env%ri_grad%dot_blksize = nrow_local
203 mp2_env%ri_grad%dot_blksize = min(mp2_env%ri_grad%dot_blksize, nrow_local)
204 IF (unit_nr > 0)
THEN
205 WRITE (unit_nr,
'(T3,A,T75,I6)')
'GRAD_INFO| Block size for the contraction:', mp2_env%ri_grad%dot_blksize
208 CALL fm_mat_s(1)%matrix_struct%para_env%sync()
210 CALL timestop(handle)
225 SUBROUTINE sos_mp2_work_type_create(sos_mp2_work_occ, sos_mp2_work_virt, unit_nr, &
226 Eigenval, homo, virtual, eps_degenerate, fm_mat_S)
227 TYPE(sos_mp2_grad_work_type),
ALLOCATABLE, &
228 DIMENSION(:),
INTENT(OUT) :: sos_mp2_work_occ, sos_mp2_work_virt
229 INTEGER,
INTENT(IN) :: unit_nr
230 REAL(kind=
dp),
DIMENSION(:, :),
INTENT(IN) :: eigenval
231 INTEGER,
DIMENSION(:),
INTENT(IN) :: homo, virtual
232 REAL(kind=
dp),
INTENT(IN) :: eps_degenerate
233 TYPE(
cp_fm_type),
DIMENSION(:),
INTENT(IN) :: fm_mat_s
235 CHARACTER(LEN=*),
PARAMETER :: routinen =
'sos_mp2_work_type_create'
237 INTEGER :: handle, ispin, nspins
239 CALL timeset(routinen, handle)
241 nspins =
SIZE(fm_mat_s)
242 ALLOCATE (sos_mp2_work_occ(nspins), sos_mp2_work_virt(nspins))
245 CALL create_list_nearly_degen_pairs(eigenval(1:homo(ispin), ispin), &
246 eps_degenerate, sos_mp2_work_occ(ispin)%pair_list)
247 IF (unit_nr > 0)
WRITE (unit_nr,
"(T3,A,T75,i6)") &
248 "MO_INFO| Number of ij pairs below EPS_CANONICAL:",
SIZE(sos_mp2_work_occ(ispin)%pair_list, 2)
249 ALLOCATE (sos_mp2_work_occ(ispin)%P(homo(ispin) +
SIZE(sos_mp2_work_occ(ispin)%pair_list, 2)))
250 sos_mp2_work_occ(ispin)%P = 0.0_dp
251 CALL prepare_comm_pij(sos_mp2_work_occ(ispin), virtual(ispin), fm_mat_s(ispin))
253 CALL create_list_nearly_degen_pairs(eigenval(homo(ispin) + 1:, ispin), &
254 eps_degenerate, sos_mp2_work_virt(ispin)%pair_list)
255 IF (unit_nr > 0)
WRITE (unit_nr,
"(T3,A,T75,i6)") &
256 "MO_INFO| Number of ab pairs below EPS_CANONICAL:",
SIZE(sos_mp2_work_virt(ispin)%pair_list, 2)
257 ALLOCATE (sos_mp2_work_virt(ispin)%P(virtual(ispin) +
SIZE(sos_mp2_work_virt(ispin)%pair_list, 2)))
258 sos_mp2_work_virt(ispin)%P = 0.0_dp
259 CALL prepare_comm_pab(sos_mp2_work_virt(ispin), virtual(ispin), fm_mat_s(ispin))
262 CALL timestop(handle)
274 SUBROUTINE rpa_work_type_create(rpa_work, fm_mat_Q, fm_mat_S, homo, virtual)
275 TYPE(rpa_grad_work_type),
INTENT(OUT) :: rpa_work
277 TYPE(
cp_fm_type),
DIMENSION(:),
INTENT(IN) :: fm_mat_s
278 INTEGER,
DIMENSION(:),
INTENT(IN) :: homo, virtual
280 CHARACTER(LEN=*),
PARAMETER :: routinen =
'rpa_work_type_create'
282 INTEGER :: avirt, col_global, col_local, handle, iocc, ispin, my_a, my_a_end, my_a_size, &
283 my_a_start, my_i, my_i_end, my_i_size, my_i_start, my_pcol, ncol_local, nspins, &
284 num_pe_col, proc_homo, proc_homo_send, proc_recv, proc_send, proc_virtual, &
286 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: data2recv, data2send
287 INTEGER,
DIMENSION(:),
POINTER :: col_indices
289 CALL timeset(routinen, handle)
291 CALL cp_fm_create(rpa_work%fm_mat_Q_copy, matrix_struct=fm_mat_q%matrix_struct)
293 CALL fm_mat_s(1)%matrix_struct%context%get(number_of_process_columns=num_pe_col, my_process_column=my_pcol)
295 nspins =
SIZE(fm_mat_s)
297 ALLOCATE (rpa_work%index2send(0:num_pe_col - 1, nspins), &
298 rpa_work%index2recv(0:num_pe_col - 1, nspins), &
299 rpa_work%gd_homo(nspins), rpa_work%gd_virtual(nspins), &
300 data2send(0:num_pe_col - 1), data2recv(0:num_pe_col - 1), &
301 rpa_work%P_ij(nspins), rpa_work%P_ab(nspins))
304 proc_homo = max(1, ceiling(sqrt(real(num_pe_col, kind=
dp))))
305 DO WHILE (mod(num_pe_col, proc_homo) /= 0)
306 proc_homo = proc_homo - 1
308 proc_virtual = num_pe_col/proc_homo
310 rpa_work%grid(1) = proc_virtual
311 rpa_work%grid(2) = proc_homo
313 rpa_work%mepos(1) = mod(my_pcol, proc_virtual)
314 rpa_work%mepos(2) = my_pcol/proc_virtual
322 CALL cp_fm_struct_get(fm_mat_s(ispin)%matrix_struct, ncol_local=ncol_local, col_indices=col_indices)
326 DO col_local = 1, ncol_local
327 col_global = col_indices(col_local)
329 iocc = (col_global - 1)/virtual(ispin) + 1
330 avirt = col_global - (iocc - 1)*virtual(ispin)
335 proc_send = proc_homo_send*proc_virtual + proc_virtual_send
337 data2send(proc_send) = data2send(proc_send) + 1
340 DO proc_send = 0, num_pe_col - 1
341 ALLOCATE (rpa_work%index2send(proc_send, ispin)%array(data2send(proc_send)))
346 DO col_local = 1, ncol_local
347 col_global = col_indices(col_local)
349 iocc = (col_global - 1)/virtual(ispin) + 1
350 avirt = col_global - (iocc - 1)*virtual(ispin)
355 proc_send = proc_homo_send*proc_virtual + proc_virtual_send
357 data2send(proc_send) = data2send(proc_send) + 1
359 rpa_work%index2send(proc_send, ispin)%array(data2send(proc_send)) = col_local
363 CALL get_group_dist(rpa_work%gd_homo(ispin), my_pcol/proc_virtual, my_i_start, my_i_end, my_i_size)
364 CALL get_group_dist(rpa_work%gd_virtual(ispin), mod(my_pcol, proc_virtual), my_a_start, my_a_end, my_a_size)
367 DO my_i = my_i_start, my_i_end
368 DO my_a = my_a_start, my_a_end
369 proc_recv = fm_mat_s(ispin)%matrix_struct%g2p_col((my_i - 1)*virtual(ispin) + my_a)
370 data2recv(proc_recv) = data2recv(proc_recv) + 1
374 DO proc_recv = 0, num_pe_col - 1
375 ALLOCATE (rpa_work%index2recv(proc_recv, ispin)%array(2, data2recv(proc_recv)))
379 DO my_i = my_i_start, my_i_end
380 DO my_a = my_a_start, my_a_end
381 proc_recv = fm_mat_s(ispin)%matrix_struct%g2p_col((my_i - 1)*virtual(ispin) + my_a)
382 data2recv(proc_recv) = data2recv(proc_recv) + 1
384 rpa_work%index2recv(proc_recv, ispin)%array(2, data2recv(proc_recv)) = my_i - my_i_start + 1
385 rpa_work%index2recv(proc_recv, ispin)%array(1, data2recv(proc_recv)) = my_a - my_a_start + 1
389 ALLOCATE (rpa_work%P_ij(ispin)%array(my_i_size, homo(ispin)), &
390 rpa_work%P_ab(ispin)%array(my_a_size, virtual(ispin)))
391 rpa_work%P_ij(ispin)%array(:, :) = 0.0_dp
392 rpa_work%P_ab(ispin)%array(:, :) = 0.0_dp
396 DEALLOCATE (data2send, data2recv)
398 CALL timestop(handle)
408 SUBROUTINE create_list_nearly_degen_pairs(Eigenval, eps_degen, pair_list)
409 REAL(kind=
dp),
DIMENSION(:),
INTENT(IN) :: eigenval
410 REAL(kind=
dp),
INTENT(IN) :: eps_degen
411 INTEGER,
ALLOCATABLE,
DIMENSION(:, :),
INTENT(OUT) :: pair_list
413 INTEGER :: my_i, my_j, num_orbitals, num_pairs, &
416 num_orbitals =
SIZE(eigenval)
421 DO my_i = 1, num_orbitals
422 DO my_j = 1, num_orbitals
423 IF (my_i == my_j) cycle
424 IF (abs(eigenval(my_i) - eigenval(my_j)) < eps_degen) num_pairs = num_pairs + 1
427 ALLOCATE (pair_list(2, num_pairs))
431 DO my_i = 1, num_orbitals
432 DO my_j = 1, num_orbitals
433 IF (my_i == my_j) cycle
434 IF (abs(eigenval(my_i) - eigenval(my_j)) < eps_degen)
THEN
435 pair_list(1, pair_counter) = my_i
436 pair_list(2, pair_counter) = my_j
437 pair_counter = pair_counter + 1
442 END SUBROUTINE create_list_nearly_degen_pairs
450 SUBROUTINE prepare_comm_pij(sos_mp2_work, virtual, fm_mat_S)
451 TYPE(sos_mp2_grad_work_type),
INTENT(INOUT) :: sos_mp2_work
452 INTEGER,
INTENT(IN) :: virtual
455 CHARACTER(LEN=*),
PARAMETER :: routinen =
'prepare_comm_Pij'
457 INTEGER :: avirt, col_global, col_local, counter, handle, ij_counter, iocc, my_i, my_j, &
458 my_pcol, my_prow, ncol_local, nrow_local, num_ij_pairs, num_pe_col, pcol, pcol_recv, &
459 pcol_send, proc_shift, tag
460 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: data2recv, data2send
461 INTEGER,
DIMENSION(:),
POINTER :: col_indices, ncol_locals
462 INTEGER,
DIMENSION(:, :),
POINTER :: blacs2mpi
467 CALL timeset(routinen, handle)
471 CALL fm_mat_s%matrix_struct%context%get(number_of_process_columns=num_pe_col)
472 ALLOCATE (sos_mp2_work%index2send(0:num_pe_col - 1), &
473 sos_mp2_work%index2recv(0:num_pe_col - 1))
475 ALLOCATE (data2send(0:num_pe_col - 1))
476 ALLOCATE (data2recv(0:num_pe_col - 1))
478 CALL cp_fm_struct_get(fm_mat_s%matrix_struct, para_env=para_env, ncol_locals=ncol_locals, &
479 ncol_local=ncol_local, col_indices=col_indices, &
480 context=context, nrow_local=nrow_local)
481 CALL context%get(my_process_row=my_prow, my_process_column=my_pcol, &
484 num_ij_pairs =
SIZE(sos_mp2_work%pair_list, 2)
486 IF (num_ij_pairs > 0)
THEN
488 CALL comm_exchange%from_split(para_env, my_prow)
493 DO proc_shift = 0, num_pe_col - 1
494 pcol_send =
modulo(my_pcol + proc_shift, num_pe_col)
497 DO col_local = 1, ncol_local
498 col_global = col_indices(col_local)
500 iocc = max(1, col_global - 1)/virtual + 1
501 avirt = col_global - (iocc - 1)*virtual
503 DO ij_counter = 1, num_ij_pairs
505 my_i = sos_mp2_work%pair_list(1, ij_counter)
506 my_j = sos_mp2_work%pair_list(2, ij_counter)
508 IF (iocc /= my_j) cycle
509 pcol = fm_mat_s%matrix_struct%g2p_col((my_i - 1)*virtual + avirt)
510 IF (pcol /= pcol_send) cycle
512 counter = counter + 1
518 data2send(pcol_send) = counter
521 CALL comm_exchange%alltoall(data2send, data2recv, 1)
523 DO proc_shift = 0, num_pe_col - 1
524 pcol_send =
modulo(my_pcol + proc_shift, num_pe_col)
525 pcol_recv =
modulo(my_pcol - proc_shift, num_pe_col)
528 ALLOCATE (sos_mp2_work%index2send(pcol_send)%array(data2send(pcol_send)))
531 DO col_local = 1, ncol_local
532 col_global = col_indices(col_local)
534 iocc = max(1, col_global - 1)/virtual + 1
535 avirt = col_global - (iocc - 1)*virtual
537 DO ij_counter = 1, num_ij_pairs
539 my_i = sos_mp2_work%pair_list(1, ij_counter)
540 my_j = sos_mp2_work%pair_list(2, ij_counter)
542 IF (iocc /= my_j) cycle
543 pcol = fm_mat_s%matrix_struct%g2p_col((my_i - 1)*virtual + avirt)
544 IF (pcol /= pcol_send) cycle
546 counter = counter + 1
548 sos_mp2_work%index2send(pcol_send)%array(counter) = col_global
555 ALLOCATE (sos_mp2_work%index2recv(pcol_recv)%array(data2recv(pcol_recv)))
557 CALL para_env%sendrecv(sos_mp2_work%index2send(pcol_send)%array, blacs2mpi(my_prow, pcol_send), &
558 sos_mp2_work%index2recv(pcol_recv)%array, blacs2mpi(my_prow, pcol_recv), tag)
561 DO counter = 1, data2send(pcol_send)
562 sos_mp2_work%index2send(pcol_send)%array(counter) = &
563 fm_mat_s%matrix_struct%g2l_col(sos_mp2_work%index2send(pcol_send)%array(counter))
567 CALL comm_exchange%free()
570 DEALLOCATE (data2send, data2recv)
572 CALL timestop(handle)
582 SUBROUTINE prepare_comm_pab(sos_mp2_work, virtual, fm_mat_S)
583 TYPE(sos_mp2_grad_work_type),
INTENT(INOUT) :: sos_mp2_work
584 INTEGER,
INTENT(IN) :: virtual
587 CHARACTER(LEN=*),
PARAMETER :: routinen =
'prepare_comm_Pab'
589 INTEGER :: ab_counter, avirt, col_global, col_local, counter, handle, iocc, my_a, my_b, &
590 my_pcol, my_prow, ncol_local, nrow_local, num_ab_pairs, num_pe_col, pcol, pcol_recv, &
591 pcol_send, proc_shift, tag
592 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: data2recv, data2send
593 INTEGER,
DIMENSION(:),
POINTER :: col_indices, ncol_locals
594 INTEGER,
DIMENSION(:, :),
POINTER :: blacs2mpi
599 CALL timeset(routinen, handle)
603 CALL fm_mat_s%matrix_struct%context%get(number_of_process_columns=num_pe_col)
604 ALLOCATE (sos_mp2_work%index2send(0:num_pe_col - 1), &
605 sos_mp2_work%index2recv(0:num_pe_col - 1))
607 num_ab_pairs =
SIZE(sos_mp2_work%pair_list, 2)
608 IF (num_ab_pairs > 0)
THEN
610 CALL cp_fm_struct_get(fm_mat_s%matrix_struct, para_env=para_env, ncol_locals=ncol_locals, &
611 ncol_local=ncol_local, col_indices=col_indices, &
612 context=context, nrow_local=nrow_local)
613 CALL context%get(my_process_row=my_prow, my_process_column=my_pcol, &
616 CALL comm_exchange%from_split(para_env, my_prow)
618 ALLOCATE (data2send(0:num_pe_col - 1))
619 ALLOCATE (data2recv(0:num_pe_col - 1))
623 DO proc_shift = 0, num_pe_col - 1
624 pcol_send =
modulo(my_pcol + proc_shift, num_pe_col)
625 pcol_recv =
modulo(my_pcol - proc_shift, num_pe_col)
628 DO col_local = 1, ncol_local
629 col_global = col_indices(col_local)
631 iocc = max(1, col_global - 1)/virtual + 1
632 avirt = col_global - (iocc - 1)*virtual
634 DO ab_counter = 1, num_ab_pairs
636 my_a = sos_mp2_work%pair_list(1, ab_counter)
637 my_b = sos_mp2_work%pair_list(2, ab_counter)
639 IF (avirt /= my_b) cycle
640 pcol = fm_mat_s%matrix_struct%g2p_col((iocc - 1)*virtual + my_a)
641 IF (pcol /= pcol_send) cycle
643 counter = counter + 1
649 data2send(pcol_send) = counter
652 CALL comm_exchange%alltoall(data2send, data2recv, 1)
654 DO proc_shift = 0, num_pe_col - 1
655 pcol_send =
modulo(my_pcol + proc_shift, num_pe_col)
656 pcol_recv =
modulo(my_pcol - proc_shift, num_pe_col)
659 ALLOCATE (sos_mp2_work%index2send(pcol_send)%array(data2send(pcol_send)))
662 DO col_local = 1, ncol_local
663 col_global = col_indices(col_local)
665 iocc = max(1, col_global - 1)/virtual + 1
666 avirt = col_global - (iocc - 1)*virtual
668 DO ab_counter = 1, num_ab_pairs
670 my_a = sos_mp2_work%pair_list(1, ab_counter)
671 my_b = sos_mp2_work%pair_list(2, ab_counter)
673 IF (avirt /= my_b) cycle
674 pcol = fm_mat_s%matrix_struct%g2p_col((iocc - 1)*virtual + my_a)
675 IF (pcol /= pcol_send) cycle
677 counter = counter + 1
679 sos_mp2_work%index2send(pcol_send)%array(counter) = col_global
686 ALLOCATE (sos_mp2_work%index2recv(pcol_recv)%array(data2recv(pcol_recv)))
688 CALL para_env%sendrecv(sos_mp2_work%index2send(pcol_send)%array, blacs2mpi(my_prow, pcol_send), &
689 sos_mp2_work%index2recv(pcol_recv)%array, blacs2mpi(my_prow, pcol_recv), tag)
692 DO counter = 1, data2send(pcol_send)
693 sos_mp2_work%index2send(pcol_send)%array(counter) = &
694 fm_mat_s%matrix_struct%g2l_col(sos_mp2_work%index2send(pcol_send)%array(counter))
698 CALL comm_exchange%free()
699 DEALLOCATE (data2send, data2recv)
703 CALL timestop(handle)
737 dgemm_counter, fm_mat_S, omega, homo, virtual, Eigenval, weight, unit_nr)
738 TYPE(
mp2_type),
INTENT(INOUT) :: mp2_env
740 LOGICAL,
INTENT(IN) :: do_ri_sos_laplace_mp2
741 TYPE(
cp_fm_type),
DIMENSION(:),
INTENT(IN) :: fm_mat_q, fm_mat_q_gemm
743 TYPE(
cp_fm_type),
DIMENSION(:),
INTENT(IN) :: fm_mat_s
744 REAL(kind=
dp),
INTENT(IN) :: omega
745 INTEGER,
DIMENSION(:),
INTENT(IN) :: homo, virtual
746 REAL(kind=
dp),
DIMENSION(:, :),
INTENT(IN) :: eigenval
747 REAL(kind=
dp),
INTENT(IN) :: weight
748 INTEGER,
INTENT(IN) :: unit_nr
750 CHARACTER(LEN=*),
PARAMETER :: routinen =
'rpa_grad_matrix_operations'
752 INTEGER :: col_global, col_local, dimen_ia, &
753 dimen_ri, handle, handle2, ispin, &
754 jspin, ncol_local, nrow_local, nspins, &
756 INTEGER,
DIMENSION(:),
POINTER :: col_indices, row_indices
757 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:, :, :), &
758 TARGET :: mat_s_3d, mat_work_iap_3d
761 CALL timeset(routinen, handle)
763 nspins =
SIZE(fm_mat_q)
765 CALL cp_fm_get_info(fm_mat_q(1), nrow_global=dimen_ri, nrow_local=nrow_local, ncol_local=ncol_local, &
766 col_indices=col_indices, row_indices=row_indices)
768 IF (.NOT. do_ri_sos_laplace_mp2)
THEN
769 CALL cp_fm_create(fm_work_pq, fm_mat_q(1)%matrix_struct)
778 DO col_local = 1, ncol_local
779 col_global = col_indices(col_local)
780 DO row_local = 1, nrow_local
781 IF (col_global == row_indices(row_local))
THEN
782 fm_mat_q(1)%local_data(row_local, col_local) = fm_mat_q(1)%local_data(row_local, col_local) - 1.0_dp
788 CALL timeset(routinen//
"_PQ", handle2)
790 CALL parallel_gemm(transa=
"N", transb=
"N", m=dimen_ri, n=dimen_ri, k=dimen_ri, alpha=weight, &
791 matrix_a=
rpa_grad%rpa_work%fm_mat_Q_copy, matrix_b=fm_mat_q(1), beta=1.0_dp, &
794 CALL timestop(handle2)
797 fm_mat_q_gemm(1)%matrix_struct%context)
801 IF (do_ri_sos_laplace_mp2)
THEN
803 jspin = nspins - ispin + 1
809 IF (do_ri_sos_laplace_mp2)
THEN
810 CALL timeset(routinen//
"_PQ", handle2)
812 CALL parallel_gemm(transa=
"N", transb=
"N", m=dimen_ri, n=dimen_ri, k=dimen_ri, alpha=weight, &
813 matrix_a=fm_mat_q(ispin), matrix_b=fm_mat_q(jspin), beta=1.0_dp, &
816 CALL timestop(handle2)
819 fm_mat_q_gemm(jspin)%matrix_struct%context)
821 CALL calc_fm_mat_s_rpa(fm_mat_s(ispin), .true., virtual(ispin), eigenval(:, ispin), &
822 homo(ispin), omega, 0.0_dp)
825 CALL timeset(routinen//
"_contr_S", handle2)
831 CALL parallel_gemm(transa=
"N", transb=
"N", m=dimen_ri, n=dimen_ia, k=dimen_ri, alpha=1.0_dp, &
832 matrix_a=fm_mat_q_gemm(jspin), matrix_b=fm_mat_s(ispin), beta=0.0_dp, &
833 matrix_c=fm_work_iap)
835 CALL timestop(handle2)
837 IF (do_ri_sos_laplace_mp2)
THEN
838 CALL calc_p_sos_mp2(homo(ispin), fm_mat_s(ispin), fm_work_iap, &
840 omega, weight, virtual(ispin), eigenval(:, ispin), mp2_env%ri_grad%dot_blksize)
852 CALL redistribute_fm_mat_s(
rpa_grad%rpa_work%index2send(:, ispin),
rpa_grad%rpa_work%index2recv(:, ispin), &
853 fm_work_iap, mat_work_iap_3d, &
858 CALL redistribute_fm_mat_s(
rpa_grad%rpa_work%index2send(:, ispin),
rpa_grad%rpa_work%index2recv(:, ispin), &
859 fm_mat_s(ispin), mat_s_3d, &
864 CALL calc_p_rpa(mat_s_3d, mat_work_iap_3d,
rpa_grad%rpa_work%gd_homo(ispin),
rpa_grad%rpa_work%gd_virtual(ispin), &
866 fm_mat_s(ispin)%matrix_struct, &
868 weight, omega, eigenval(:, ispin), homo(ispin), unit_nr, mp2_env)
870 DEALLOCATE (mat_work_iap_3d, mat_s_3d)
878 CALL timestop(handle)
895 SUBROUTINE calc_p_sos_mp2(homo, fm_mat_S, fm_work_iaP, sos_mp2_work_occ, sos_mp2_work_virt, &
896 omega, weight, virtual, Eigenval, dot_blksize)
897 INTEGER,
INTENT(IN) :: homo
898 TYPE(
cp_fm_type),
INTENT(IN) :: fm_mat_s, fm_work_iap
899 TYPE(sos_mp2_grad_work_type),
INTENT(INOUT) :: sos_mp2_work_occ, sos_mp2_work_virt
900 REAL(kind=
dp),
INTENT(IN) :: omega, weight
901 INTEGER,
INTENT(IN) :: virtual
902 REAL(kind=
dp),
DIMENSION(:),
INTENT(IN) :: eigenval
903 INTEGER,
INTENT(IN) :: dot_blksize
905 CHARACTER(LEN=*),
PARAMETER :: routinen =
'calc_P_sos_mp2'
907 INTEGER :: avirt, col_global, col_local, handle, &
908 handle2, iocc, my_a, my_i, ncol_local, &
909 nrow_local, num_ab_pairs, num_ij_pairs
910 INTEGER,
DIMENSION(:),
POINTER :: col_indices
911 REAL(kind=
dp) :: ddot, trace
913 CALL timeset(routinen, handle)
915 CALL cp_fm_get_info(fm_mat_s, col_indices=col_indices, ncol_local=ncol_local, nrow_local=nrow_local)
917 CALL timeset(routinen//
"_Pij_diag", handle2)
923 DO col_local = 1, ncol_local
924 col_global = col_indices(col_local)
926 iocc = max(1, col_global - 1)/virtual + 1
927 avirt = col_global - (iocc - 1)*virtual
929 IF (iocc == my_i) trace = trace + &
930 ddot(nrow_local, fm_mat_s%local_data(:, col_local), 1, fm_work_iap%local_data(:, col_local), 1)
933 sos_mp2_work_occ%P(my_i) = sos_mp2_work_occ%P(my_i) - trace*omega*weight
936 CALL timestop(handle2)
938 CALL timeset(routinen//
"_Pab_diag", handle2)
944 DO col_local = 1, ncol_local
945 col_global = col_indices(col_local)
947 iocc = max(1, col_global - 1)/virtual + 1
948 avirt = col_global - (iocc - 1)*virtual
950 IF (avirt == my_a) trace = trace + &
951 ddot(nrow_local, fm_mat_s%local_data(:, col_local), 1, fm_work_iap%local_data(:, col_local), 1)
954 sos_mp2_work_virt%P(my_a) = sos_mp2_work_virt%P(my_a) + trace*omega*weight
957 CALL timestop(handle2)
960 num_ij_pairs =
SIZE(sos_mp2_work_occ%pair_list, 2)
961 num_ab_pairs =
SIZE(sos_mp2_work_virt%pair_list, 2)
962 IF (num_ij_pairs > 0)
THEN
963 CALL calc_pij_degen(fm_work_iap, fm_mat_s, sos_mp2_work_occ%pair_list, &
964 virtual, sos_mp2_work_occ%P(homo + 1:), eigenval(:homo), omega, weight, &
965 sos_mp2_work_occ%index2send, sos_mp2_work_occ%index2recv, dot_blksize)
967 IF (num_ab_pairs > 0)
THEN
968 CALL calc_pab_degen(fm_work_iap, fm_mat_s, sos_mp2_work_virt%pair_list, &
969 virtual, sos_mp2_work_virt%P(virtual + 1:), eigenval(homo + 1:), omega, weight, &
970 sos_mp2_work_virt%index2send, sos_mp2_work_virt%index2recv, dot_blksize)
973 CALL timestop(handle)
995 SUBROUTINE calc_p_rpa(mat_S_1D, mat_work_iaP_3D, gd_homo, gd_virtual, grid, mepos, &
996 fm_struct_S, P_ij, P_ab, weight, omega, Eigenval, homo, unit_nr, mp2_env)
997 REAL(kind=
dp),
DIMENSION(*),
INTENT(INOUT),
TARGET :: mat_s_1d
998 REAL(kind=
dp),
DIMENSION(:, :, :),
INTENT(INOUT) :: mat_work_iap_3d
1000 INTEGER,
DIMENSION(2),
INTENT(IN) :: grid, mepos
1002 REAL(kind=
dp),
DIMENSION(:, :) :: p_ij, p_ab
1003 REAL(kind=
dp),
INTENT(IN) :: weight, omega
1004 REAL(kind=
dp),
DIMENSION(:),
INTENT(IN) :: eigenval
1005 INTEGER,
INTENT(IN) :: homo, unit_nr
1006 TYPE(
mp2_type),
INTENT(INOUT) :: mp2_env
1008 CHARACTER(LEN=*),
PARAMETER :: routinen =
'calc_P_rpa'
1010 INTEGER :: completed, handle, handle2, my_a_end, my_a_size, my_a_start, my_i_end, my_i_size, &
1011 my_i_start, my_p_size, my_prow, number_of_parallel_channels, proc_a_recv, proc_a_send, &
1012 proc_i_recv, proc_i_send, proc_recv, proc_send, proc_shift, recv_a_end, recv_a_size, &
1013 recv_a_start, recv_i_end, recv_i_size, recv_i_start, tag
1014 INTEGER(KIND=int_8) :: mem, number_of_elements_per_blk
1015 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: procs_recv
1016 INTEGER,
DIMENSION(:, :),
POINTER :: blacs2mpi
1017 REAL(kind=
dp) :: mem_per_block, mem_real
1018 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:),
TARGET :: buffer_compens_1d
1019 REAL(kind=
dp),
DIMENSION(:, :, :),
POINTER :: mat_s_3d
1023 TYPE(
mp_request_type),
ALLOCATABLE,
DIMENSION(:) :: recv_requests, send_requests
1025 CALL timeset(routinen, handle)
1028 IF (mp2_env%ri_grad%dot_blksize >= blksize_threshold)
THEN
1035 my_p_size =
SIZE(mat_work_iap_3d, 1)
1038 CALL fm_struct_s%context%get(my_process_row=my_prow, blacs2mpi=blacs2mpi, para_env=para_env)
1040 CALL get_group_dist(gd_virtual, mepos(1), my_a_start, my_a_end, my_a_size)
1041 CALL get_group_dist(gd_homo, mepos(2), my_i_start, my_i_end, my_i_size)
1046 mat_s_3d(1:my_p_size, 1:my_a_size, 1:my_i_size) => mat_s_1d(1:int(my_p_size,
int_8)*my_a_size*my_i_size)
1048 number_of_elements_per_blk = max(int(
maxsize(gd_homo), kind=
int_8)*my_a_size, &
1049 int(
maxsize(gd_virtual), kind=
int_8)*my_i_size)*my_p_size
1053 mem_real = real(mem, kind=
dp)
1054 mem_per_block = real(number_of_elements_per_blk, kind=
dp)*8.0_dp
1055 number_of_parallel_channels = max(1, min(maxval(grid) - 1, floor(mem_real/mem_per_block)))
1056 CALL para_env%min(number_of_parallel_channels)
1057 IF (mp2_env%ri_grad%max_parallel_comm > 0) &
1058 number_of_parallel_channels = min(number_of_parallel_channels, mp2_env%ri_grad%max_parallel_comm)
1060 IF (unit_nr > 0)
THEN
1061 WRITE (unit_nr,
'(T3,A,T75,I6)')
'GRAD_INFO| Number of parallel communication channels:', number_of_parallel_channels
1064 CALL para_env%sync()
1066 ALLOCATE (buffer_1d(number_of_parallel_channels))
1067 DO proc_shift = 1, number_of_parallel_channels
1068 ALLOCATE (buffer_1d(proc_shift)%array(number_of_elements_per_blk))
1071 ALLOCATE (buffer_3d(number_of_parallel_channels))
1074 IF (mp2_env%ri_grad%dot_blksize >= blksize_threshold)
THEN
1075 ALLOCATE (buffer_compens_1d(2*max(my_a_size*
maxsize(gd_virtual), my_i_size*
maxsize(gd_homo))))
1078 IF (number_of_parallel_channels > 1)
THEN
1079 ALLOCATE (procs_recv(number_of_parallel_channels))
1080 ALLOCATE (recv_requests(number_of_parallel_channels))
1081 ALLOCATE (send_requests(maxval(grid) - 1))
1084 IF (number_of_parallel_channels > 1 .AND. grid(1) > 1)
THEN
1085 CALL timeset(routinen//
"_comm_a", handle2)
1088 DO proc_shift = 1, min(grid(1) - 1, number_of_parallel_channels)
1089 proc_a_recv =
modulo(mepos(1) - proc_shift, grid(1))
1090 proc_recv = mepos(2)*grid(1) + proc_a_recv
1092 CALL get_group_dist(gd_virtual, proc_a_recv, recv_a_start, recv_a_end, recv_a_size)
1094 buffer_3d(proc_shift)%array(1:my_p_size, 1:recv_a_size, 1:my_i_size) => &
1095 buffer_1d(proc_shift)%array(1:int(my_p_size, kind=
int_8)*recv_a_size*my_i_size)
1097 CALL para_env%irecv(buffer_3d(proc_shift)%array, blacs2mpi(my_prow, proc_recv), &
1098 recv_requests(proc_shift), tag)
1100 procs_recv(proc_shift) = proc_a_recv
1104 DO proc_shift = 1, grid(1) - 1
1105 proc_a_send =
modulo(mepos(1) + proc_shift, grid(1))
1106 proc_send = mepos(2)*grid(1) + proc_a_send
1108 CALL para_env%isend(mat_work_iap_3d, blacs2mpi(my_prow, proc_send), &
1109 send_requests(proc_shift), tag)
1111 CALL timestop(handle2)
1114 CALL calc_p_rpa_a(p_ab(:, my_a_start:my_a_end), &
1115 mat_s_3d, mat_work_iap_3d, &
1116 mp2_env%ri_grad%dot_blksize, buffer_compens_1d, mp2_env%local_gemm_ctx, &
1117 eigenval(homo + my_a_start:homo + my_a_end), eigenval(my_i_start:my_i_end), &
1118 eigenval(homo + my_a_start:homo + my_a_end), omega, weight)
1120 DO proc_shift = 1, grid(1) - 1
1121 CALL timeset(routinen//
"_comm_a", handle2)
1122 IF (number_of_parallel_channels > 1)
THEN
1125 CALL get_group_dist(gd_virtual, procs_recv(completed), recv_a_start, recv_a_end, recv_a_size)
1127 proc_a_send =
modulo(mepos(1) + proc_shift, grid(1))
1128 proc_a_recv =
modulo(mepos(1) - proc_shift, grid(1))
1130 proc_send = mepos(2)*grid(1) + proc_a_send
1131 proc_recv = mepos(2)*grid(1) + proc_a_recv
1133 CALL get_group_dist(gd_virtual, proc_a_recv, recv_a_start, recv_a_end, recv_a_size)
1135 buffer_3d(1)%array(1:my_p_size, 1:recv_a_size, 1:my_i_size) => &
1136 buffer_1d(1)%array(1:int(my_p_size, kind=
int_8)*recv_a_size*my_i_size)
1138 CALL para_env%sendrecv(mat_work_iap_3d, blacs2mpi(my_prow, proc_send), &
1139 buffer_3d(1)%array, blacs2mpi(my_prow, proc_recv), tag)
1142 CALL timestop(handle2)
1144 CALL calc_p_rpa_a(p_ab(:, recv_a_start:recv_a_end), &
1145 mat_s_3d, buffer_3d(completed)%array, &
1146 mp2_env%ri_grad%dot_blksize, buffer_compens_1d, mp2_env%local_gemm_ctx, &
1147 eigenval(homo + my_a_start:homo + my_a_end), eigenval(my_i_start:my_i_end), &
1148 eigenval(homo + recv_a_start:homo + recv_a_end), omega, weight)
1150 IF (number_of_parallel_channels > 1 .AND. number_of_parallel_channels + proc_shift < grid(1))
THEN
1151 proc_a_recv =
modulo(mepos(1) - proc_shift - number_of_parallel_channels, grid(1))
1152 proc_recv = mepos(2)*grid(1) + proc_a_recv
1154 CALL get_group_dist(gd_virtual, proc_a_recv, recv_a_start, recv_a_end, recv_a_size)
1156 buffer_3d(completed)%array(1:my_p_size, 1:recv_a_size, 1:my_i_size) => &
1157 buffer_1d(completed)%array(1:int(my_p_size, kind=
int_8)*recv_a_size*my_i_size)
1159 CALL para_env%irecv(buffer_3d(completed)%array, blacs2mpi(my_prow, proc_recv), &
1160 recv_requests(completed), tag)
1162 procs_recv(completed) = proc_a_recv
1166 IF (number_of_parallel_channels > 1 .AND. grid(1) > 1)
THEN
1170 IF (number_of_parallel_channels > 1 .AND. grid(2) > 1)
THEN
1173 DO proc_shift = 1, min(grid(2) - 1, number_of_parallel_channels)
1174 proc_i_recv =
modulo(mepos(2) - proc_shift, grid(2))
1175 proc_recv = proc_i_recv*grid(1) + mepos(1)
1177 CALL get_group_dist(gd_homo, proc_i_recv, recv_i_start, recv_i_end, recv_i_size)
1179 buffer_3d(proc_shift)%array(1:my_p_size, 1:my_a_size, 1:recv_i_size) => &
1180 buffer_1d(proc_shift)%array(1:int(my_p_size, kind=
int_8)*my_a_size*recv_i_size)
1182 CALL para_env%irecv(buffer_3d(proc_shift)%array, blacs2mpi(my_prow, proc_recv), &
1183 recv_requests(proc_shift), tag)
1185 procs_recv(proc_shift) = proc_i_recv
1189 DO proc_shift = 1, grid(2) - 1
1190 proc_i_send =
modulo(mepos(2) + proc_shift, grid(2))
1191 proc_send = proc_i_send*grid(1) + mepos(1)
1193 CALL para_env%isend(mat_work_iap_3d, blacs2mpi(my_prow, proc_send), &
1194 send_requests(proc_shift), tag)
1198 CALL calc_p_rpa_i(p_ij(:, my_i_start:my_i_end), &
1199 mat_s_3d, mat_work_iap_3d, &
1200 mp2_env%ri_grad%dot_blksize, buffer_compens_1d, mp2_env%local_gemm_ctx, &
1201 eigenval(homo + my_a_start:homo + my_a_end), eigenval(my_i_start:my_i_end), &
1202 eigenval(my_i_start:my_i_end), omega, weight)
1204 DO proc_shift = 1, grid(2) - 1
1205 CALL timeset(routinen//
"_comm_i", handle2)
1206 IF (number_of_parallel_channels > 1)
THEN
1209 CALL get_group_dist(gd_homo, procs_recv(completed), recv_i_start, recv_i_end, recv_i_size)
1211 proc_i_send =
modulo(mepos(2) + proc_shift, grid(2))
1212 proc_i_recv =
modulo(mepos(2) - proc_shift, grid(2))
1214 proc_send = proc_i_send*grid(1) + mepos(1)
1215 proc_recv = proc_i_recv*grid(1) + mepos(1)
1217 CALL get_group_dist(gd_homo, proc_i_recv, recv_i_start, recv_i_end, recv_i_size)
1219 buffer_3d(1)%array(1:my_p_size, 1:my_a_size, 1:recv_i_size) => &
1220 buffer_1d(1)%array(1:int(my_p_size, kind=
int_8)*my_a_size*recv_i_size)
1222 CALL para_env%sendrecv(mat_work_iap_3d, blacs2mpi(my_prow, proc_send), &
1223 buffer_3d(1)%array, blacs2mpi(my_prow, proc_recv), tag)
1226 CALL timestop(handle2)
1228 CALL calc_p_rpa_i(p_ij(:, recv_i_start:recv_i_end), &
1229 mat_s_3d, buffer_3d(completed)%array, &
1230 mp2_env%ri_grad%dot_blksize, buffer_compens_1d, mp2_env%local_gemm_ctx, &
1231 eigenval(homo + my_a_start:homo + my_a_end), eigenval(my_i_start:my_i_end), &
1232 eigenval(recv_i_start:recv_i_end), omega, weight)
1234 IF (number_of_parallel_channels > 1 .AND. number_of_parallel_channels + proc_shift < grid(2))
THEN
1235 proc_i_recv =
modulo(mepos(2) - proc_shift - number_of_parallel_channels, grid(2))
1236 proc_recv = proc_i_recv*grid(1) + mepos(1)
1238 CALL get_group_dist(gd_homo, proc_i_recv, recv_i_start, recv_a_end, recv_i_size)
1240 buffer_3d(completed)%array(1:my_p_size, 1:my_a_size, 1:recv_i_size) => &
1241 buffer_1d(completed)%array(1:int(my_p_size, kind=
int_8)*my_a_size*recv_i_size)
1243 CALL para_env%irecv(buffer_3d(completed)%array, blacs2mpi(my_prow, proc_recv), &
1244 recv_requests(completed), tag)
1246 procs_recv(completed) = proc_i_recv
1250 IF (number_of_parallel_channels > 1 .AND. grid(2) > 1)
THEN
1254 IF (number_of_parallel_channels > 1)
THEN
1255 DEALLOCATE (procs_recv)
1256 DEALLOCATE (recv_requests)
1257 DEALLOCATE (send_requests)
1260 IF (mp2_env%ri_grad%dot_blksize >= blksize_threshold)
THEN
1262 CALL mp2_env%local_gemm_ctx%destroy()
1263 DEALLOCATE (buffer_compens_1d)
1266 DO proc_shift = 1, number_of_parallel_channels
1267 NULLIFY (buffer_3d(proc_shift)%array)
1268 DEALLOCATE (buffer_1d(proc_shift)%array)
1270 DEALLOCATE (buffer_3d, buffer_1d)
1272 CALL timestop(handle)
1290 SUBROUTINE calc_p_rpa_a(P_ab, mat_S, mat_work, dot_blksize, buffer_1D, local_gemm_ctx, &
1291 my_eval_virt, my_eval_occ, recv_eval_virt, omega, weight)
1292 REAL(kind=
dp),
DIMENSION(:, :),
INTENT(INOUT) :: p_ab
1293 REAL(kind=
dp),
DIMENSION(:, :, :),
INTENT(IN) :: mat_s, mat_work
1294 INTEGER,
INTENT(IN) :: dot_blksize
1295 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:), &
1296 INTENT(INOUT),
TARGET :: buffer_1d
1298 REAL(kind=
dp),
DIMENSION(:),
INTENT(IN) :: my_eval_virt, my_eval_occ, recv_eval_virt
1299 REAL(kind=
dp),
INTENT(IN) :: omega, weight
1301 CHARACTER(LEN=*),
PARAMETER :: routinen =
'calc_P_rpa_a'
1303 INTEGER :: handle, my_a, my_a_size, my_i, &
1304 my_i_size, my_p_size, p_end, p_start, &
1305 recv_a_size, stripesize
1306 REAL(kind=
dp),
DIMENSION(:, :),
POINTER :: buffer_compens, buffer_unscaled
1308 CALL timeset(routinen, handle)
1310 my_i_size =
SIZE(mat_s, 3)
1311 recv_a_size =
SIZE(mat_work, 2)
1312 my_a_size =
SIZE(mat_s, 2)
1313 my_p_size =
SIZE(mat_s, 1)
1315 IF (dot_blksize >= blksize_threshold)
THEN
1316 buffer_compens(1:my_a_size, 1:recv_a_size) => buffer_1d(1:my_a_size*recv_a_size)
1317 buffer_compens = 0.0_dp
1318 buffer_unscaled(1:my_a_size, 1:recv_a_size) => buffer_1d(my_a_size*recv_a_size + 1:2*my_a_size*recv_a_size)
1321 DO my_i = 1, my_i_size
1322 DO p_start = 1, my_p_size, dot_blksize
1323 stripesize = min(dot_blksize, my_p_size - p_start + 1)
1324 p_end = p_start + stripesize - 1
1326 CALL local_gemm_ctx%gemm(
"T",
"N", my_a_size, recv_a_size, stripesize, &
1327 -weight, mat_s(p_start:p_end, :, my_i), stripesize, &
1328 mat_work(p_start:p_end, :, my_i), stripesize, &
1329 0.0_dp, buffer_unscaled, my_a_size)
1331 CALL scale_buffer_and_add_compens_virt(buffer_unscaled, buffer_compens, omega, &
1332 my_eval_virt, recv_eval_virt, my_eval_occ(my_i))
1334 CALL kahan_step(buffer_compens, p_ab)
1340 REAL(kind=
dp) :: tmp, e_i, e_a, e_b, omega2, my_compens, my_p, s
1346 DO my_a = 1, my_a_size
1347 DO recv_a = 1, recv_a_size
1348 e_a = my_eval_virt(my_a)
1349 e_b = recv_eval_virt(recv_a)
1350 my_p = p_ab(my_a, recv_a)
1352 DO my_i = 1, my_i_size
1353 e_i = -my_eval_occ(my_i)
1355 *(1.0_dp + omega2/((e_a + e_i)*(e_b + e_i))) - my_compens
1357 my_compens = (s - my_p) - tmp
1360 p_ab(my_a, recv_a) = my_p
1366 CALL timestop(handle)
1368 END SUBROUTINE calc_p_rpa_a
1384 SUBROUTINE calc_p_rpa_i(P_ij, mat_S, mat_work, dot_blksize, buffer_1D, local_gemm_ctx, &
1385 my_eval_virt, my_eval_occ, recv_eval_occ, omega, weight)
1386 REAL(kind=dp),
DIMENSION(:, :),
INTENT(INOUT) :: p_ij
1387 REAL(kind=dp),
DIMENSION(:, :, :),
INTENT(INOUT) :: mat_s, mat_work
1388 INTEGER,
INTENT(IN) :: dot_blksize
1389 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:), &
1390 INTENT(INOUT),
TARGET :: buffer_1d
1391 TYPE(local_gemm_ctxt_type),
INTENT(INOUT) :: local_gemm_ctx
1392 REAL(kind=dp),
DIMENSION(:),
INTENT(IN) :: my_eval_virt, my_eval_occ, recv_eval_occ
1393 REAL(kind=dp),
INTENT(IN) :: omega, weight
1395 CHARACTER(LEN=*),
PARAMETER :: routinen =
'calc_P_rpa_i'
1397 INTEGER :: handle, my_a, my_a_size, my_i, &
1398 my_i_size, my_p_size, p_end, p_start, &
1399 recv_i_size, stripesize
1400 REAL(kind=dp),
DIMENSION(:, :),
POINTER :: buffer_compens, buffer_unscaled
1402 CALL timeset(routinen, handle)
1404 my_i_size =
SIZE(mat_s, 3)
1405 recv_i_size =
SIZE(mat_work, 3)
1406 my_a_size =
SIZE(mat_s, 2)
1407 my_p_size =
SIZE(mat_s, 1)
1409 IF (dot_blksize >= blksize_threshold)
THEN
1410 buffer_compens(1:my_i_size, 1:recv_i_size) => buffer_1d(1:my_i_size*recv_i_size)
1411 buffer_compens = 0.0_dp
1412 buffer_unscaled(1:my_i_size, 1:recv_i_size) => buffer_1d(my_i_size*recv_i_size + 1:2*my_i_size*recv_i_size)
1415 DO my_a = 1, my_a_size
1416 DO p_start = 1, my_p_size, dot_blksize
1417 stripesize = min(dot_blksize, my_p_size - p_start + 1)
1418 p_end = p_start + stripesize - 1
1420 CALL local_gemm_ctx%gemm(
"T",
"N", my_i_size, recv_i_size, stripesize, &
1421 weight, mat_s(p_start:p_end, my_a, :), stripesize, &
1422 mat_work(p_start:p_end, my_a, :), stripesize, &
1423 0.0_dp, buffer_unscaled, my_i_size)
1425 CALL scale_buffer_and_add_compens_occ(buffer_unscaled, buffer_compens, omega, &
1426 my_eval_occ, recv_eval_occ, my_eval_virt(my_a))
1428 CALL kahan_step(buffer_compens, p_ij)
1433 REAL(kind=dp) :: tmp, e_i, e_a, e_j, omega2, my_compens, my_p, s
1440 DO my_i = 1, my_i_size
1441 DO recv_i = 1, recv_i_size
1442 e_i = my_eval_occ(my_i)
1443 e_j = recv_eval_occ(recv_i)
1444 my_p = p_ij(my_i, recv_i)
1446 DO my_a = 1, my_a_size
1447 e_a = my_eval_virt(my_a)
1448 tmp = weight*accurate_dot_product(mat_s(:, my_a, my_i), mat_work(:, my_a, recv_i)) &
1449 *(1.0_dp + omega2/((e_a - e_i)*(e_a - e_j))) - my_compens
1451 my_compens = (s - my_p) - tmp
1454 p_ij(my_i, recv_i) = my_p
1460 CALL timestop(handle)
1462 END SUBROUTINE calc_p_rpa_i
1469 SUBROUTINE kahan_step(compens, P)
1470 REAL(kind=dp),
DIMENSION(:, :),
INTENT(INOUT) :: compens, p
1472 CHARACTER(LEN=*),
PARAMETER :: routinen =
'kahan_step'
1474 INTEGER :: handle, i, j
1475 REAL(kind=dp) :: my_compens, my_p, s
1477 CALL timeset(routinen, handle)
1480 DO j = 1,
SIZE(compens, 2)
1481 DO i = 1,
SIZE(compens, 1)
1483 my_compens = compens(i, j)
1484 s = my_p + my_compens
1485 compens(i, j) = (s - my_p) - my_compens
1491 CALL timestop(handle)
1504 SUBROUTINE scale_buffer_and_add_compens_virt(buffer_unscaled, buffer_compens, omega, &
1505 my_eval_virt, recv_eval_virt, my_eval_occ)
1506 REAL(kind=dp),
DIMENSION(:, :),
INTENT(IN) :: buffer_unscaled
1507 REAL(kind=dp),
DIMENSION(:, :),
INTENT(INOUT) :: buffer_compens
1508 REAL(kind=dp),
INTENT(IN) :: omega
1509 REAL(kind=dp),
DIMENSION(:),
INTENT(IN) :: my_eval_virt, recv_eval_virt
1510 REAL(kind=dp),
INTENT(IN) :: my_eval_occ
1512 CHARACTER(LEN=*),
PARAMETER :: routinen =
'scale_buffer_and_add_compens_virt'
1514 INTEGER :: handle, my_a, my_b
1516 CALL timeset(routinen, handle)
1520 DO my_b = 1,
SIZE(buffer_compens, 2)
1521 DO my_a = 1,
SIZE(buffer_compens, 1)
1522 buffer_compens(my_a, my_b) = buffer_unscaled(my_a, my_b) &
1523 *(1.0_dp - omega**2/((my_eval_virt(my_a) - my_eval_occ)*(recv_eval_virt(my_b) - my_eval_occ))) &
1524 - buffer_compens(my_a, my_b)
1529 CALL timestop(handle)
1531 END SUBROUTINE scale_buffer_and_add_compens_virt
1542 SUBROUTINE scale_buffer_and_add_compens_occ(buffer_unscaled, buffer_compens, omega, &
1543 my_eval_occ, recv_eval_occ, my_eval_virt)
1544 REAL(kind=dp),
DIMENSION(:, :),
INTENT(IN) :: buffer_unscaled
1545 REAL(kind=dp),
DIMENSION(:, :),
INTENT(INOUT) :: buffer_compens
1546 REAL(kind=dp),
INTENT(IN) :: omega
1547 REAL(kind=dp),
DIMENSION(:),
INTENT(IN) :: my_eval_occ, recv_eval_occ
1548 REAL(kind=dp),
INTENT(IN) :: my_eval_virt
1550 CHARACTER(LEN=*),
PARAMETER :: routinen =
'scale_buffer_and_add_compens_occ'
1552 INTEGER :: handle, my_i, my_j
1554 CALL timeset(routinen, handle)
1558 DO my_j = 1,
SIZE(buffer_compens, 2)
1559 DO my_i = 1,
SIZE(buffer_compens, 1)
1560 buffer_compens(my_i, my_j) = buffer_unscaled(my_i, my_j) &
1561 *(1.0_dp - omega**2/((my_eval_virt - my_eval_occ(my_i))*(my_eval_virt - recv_eval_occ(my_j)))) &
1562 - buffer_compens(my_i, my_j)
1567 CALL timestop(handle)
1569 END SUBROUTINE scale_buffer_and_add_compens_occ
1576 ELEMENTAL FUNCTION sinh_over_x(x)
RESULT(res)
1577 REAL(kind=dp),
INTENT(IN) :: x
1578 REAL(kind=dp) :: res
1582 IF (abs(x) > 3.0e-4_dp)
THEN
1585 res = 1.0_dp + x**2/6.0_dp
1588 END FUNCTION sinh_over_x
1604 SUBROUTINE calc_pij_degen(fm_work_iaP, fm_mat_S, pair_list, virtual, P_ij, Eigenval, &
1605 omega, weight, index2send, index2recv, dot_blksize)
1606 TYPE(cp_fm_type),
INTENT(IN) :: fm_work_iap, fm_mat_s
1607 INTEGER,
DIMENSION(:, :),
INTENT(IN) :: pair_list
1608 INTEGER,
INTENT(IN) :: virtual
1609 REAL(kind=dp),
DIMENSION(:),
INTENT(INOUT) :: p_ij
1610 REAL(kind=dp),
DIMENSION(:),
INTENT(IN) :: eigenval
1611 REAL(kind=dp),
INTENT(IN) :: omega, weight
1612 TYPE(one_dim_int_array),
DIMENSION(0:),
INTENT(IN) :: index2send, index2recv
1613 INTEGER,
INTENT(IN) :: dot_blksize
1615 CHARACTER(LEN=*),
PARAMETER :: routinen =
'calc_Pij_degen'
1617 INTEGER :: avirt, col_global, col_local, counter, handle, handle2, ij_counter, iocc, &
1618 my_col_local, my_i, my_j, my_pcol, my_prow, ncol_local, nrow_local, num_ij_pairs, &
1619 num_pe_col, pcol, pcol_recv, pcol_send, proc_shift, recv_size, send_size, &
1620 size_recv_buffer, size_send_buffer, tag
1621 INTEGER,
DIMENSION(:),
POINTER :: col_indices, ncol_locals
1622 INTEGER,
DIMENSION(:, :),
POINTER :: blacs2mpi
1623 REAL(kind=dp) :: trace
1624 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:, :) :: buffer_recv, buffer_send
1625 TYPE(cp_blacs_env_type),
POINTER :: context
1626 TYPE(mp_para_env_type),
POINTER :: para_env
1628 CALL timeset(routinen, handle)
1630 CALL cp_fm_struct_get(fm_work_iap%matrix_struct, para_env=para_env, ncol_locals=ncol_locals, &
1631 ncol_local=ncol_local, col_indices=col_indices, &
1632 context=context, nrow_local=nrow_local)
1633 CALL context%get(my_process_row=my_prow, my_process_column=my_pcol, &
1634 number_of_process_columns=num_pe_col, blacs2mpi=blacs2mpi)
1636 num_ij_pairs =
SIZE(pair_list, 2)
1640 DO ij_counter = 1, num_ij_pairs
1642 my_i = pair_list(1, ij_counter)
1643 my_j = pair_list(2, ij_counter)
1647 DO col_local = 1, ncol_local
1648 col_global = col_indices(col_local)
1650 iocc = max(1, col_global - 1)/virtual + 1
1651 avirt = col_global - (iocc - 1)*virtual
1653 IF (iocc /= my_j) cycle
1654 pcol = fm_work_iap%matrix_struct%g2p_col((my_i - 1)*virtual + avirt)
1655 IF (pcol /= my_pcol) cycle
1657 my_col_local = fm_work_iap%matrix_struct%g2l_col((my_i - 1)*virtual + avirt)
1659 trace = trace + accurate_dot_product_2(fm_mat_s%local_data(:, my_col_local), fm_work_iap%local_data(:, col_local), &
1663 p_ij(ij_counter) = p_ij(ij_counter) - trace*sinh_over_x(0.5_dp*(eigenval(my_i) - eigenval(my_j))*omega)*omega*weight
1667 IF (num_pe_col > 1)
THEN
1668 size_send_buffer = 0
1669 size_recv_buffer = 0
1670 DO proc_shift = 1, num_pe_col - 1
1671 pcol_send =
modulo(my_pcol + proc_shift, num_pe_col)
1672 pcol_recv =
modulo(my_pcol - proc_shift, num_pe_col)
1674 IF (
ALLOCATED(index2send(pcol_send)%array)) &
1675 size_send_buffer = max(size_send_buffer,
SIZE(index2send(pcol_send)%array))
1677 IF (
ALLOCATED(index2recv(pcol_recv)%array)) &
1678 size_recv_buffer = max(size_recv_buffer,
SIZE(index2recv(pcol_recv)%array))
1681 ALLOCATE (buffer_send(nrow_local, size_send_buffer), buffer_recv(nrow_local, size_recv_buffer))
1683 DO proc_shift = 1, num_pe_col - 1
1684 pcol_send =
modulo(my_pcol + proc_shift, num_pe_col)
1685 pcol_recv =
modulo(my_pcol - proc_shift, num_pe_col)
1689 IF (
ALLOCATED(index2send(pcol_send)%array)) send_size =
SIZE(index2send(pcol_send)%array)
1691 DO counter = 1, send_size
1692 buffer_send(:, counter) = fm_work_iap%local_data(:, index2send(pcol_send)%array(counter))
1696 IF (
ALLOCATED(index2recv(pcol_recv)%array)) recv_size =
SIZE(index2recv(pcol_recv)%array)
1697 IF (recv_size > 0)
THEN
1698 CALL timeset(routinen//
"_send", handle2)
1699 IF (send_size > 0)
THEN
1700 CALL para_env%sendrecv(buffer_send(:, :send_size), blacs2mpi(my_prow, pcol_send), &
1701 buffer_recv(:, :recv_size), blacs2mpi(my_prow, pcol_recv), tag)
1703 CALL para_env%recv(buffer_recv(:, :recv_size), blacs2mpi(my_prow, pcol_recv), tag)
1705 CALL timestop(handle2)
1707 DO ij_counter = 1, num_ij_pairs
1710 my_i = pair_list(1, ij_counter)
1711 my_j = pair_list(2, ij_counter)
1715 DO col_local = 1, recv_size
1716 col_global = index2recv(pcol_recv)%array(col_local)
1718 iocc = max(1, col_global - 1)/virtual + 1
1719 IF (iocc /= my_j) cycle
1720 avirt = col_global - (iocc - 1)*virtual
1721 pcol = fm_work_iap%matrix_struct%g2p_col((my_i - 1)*virtual + avirt)
1722 IF (pcol /= my_pcol) cycle
1724 my_col_local = fm_work_iap%matrix_struct%g2l_col((my_i - 1)*virtual + avirt)
1726 trace = trace + accurate_dot_product_2(fm_mat_s%local_data(:, my_col_local), buffer_recv(:, col_local), &
1730 p_ij(ij_counter) = p_ij(ij_counter) &
1731 - trace*sinh_over_x(0.5_dp*(eigenval(my_i) - eigenval(my_j))*omega)*omega*weight
1733 ELSE IF (send_size > 0)
THEN
1734 CALL timeset(routinen//
"_send", handle2)
1735 CALL para_env%send(buffer_send(:, :send_size), blacs2mpi(my_prow, pcol_send), tag)
1736 CALL timestop(handle2)
1739 IF (
ALLOCATED(buffer_send))
DEALLOCATE (buffer_send)
1740 IF (
ALLOCATED(buffer_recv))
DEALLOCATE (buffer_recv)
1743 CALL timestop(handle)
1761 SUBROUTINE calc_pab_degen(fm_work_iaP, fm_mat_S, pair_list, virtual, P_ab, Eigenval, &
1762 omega, weight, index2send, index2recv, dot_blksize)
1763 TYPE(cp_fm_type),
INTENT(IN) :: fm_work_iap, fm_mat_s
1764 INTEGER,
DIMENSION(:, :),
INTENT(IN) :: pair_list
1765 INTEGER,
INTENT(IN) :: virtual
1766 REAL(kind=dp),
DIMENSION(:),
INTENT(INOUT) :: p_ab
1767 REAL(kind=dp),
DIMENSION(:),
INTENT(IN) :: eigenval
1768 REAL(kind=dp),
INTENT(IN) :: omega, weight
1769 TYPE(one_dim_int_array),
DIMENSION(0:),
INTENT(IN) :: index2send, index2recv
1770 INTEGER,
INTENT(IN) :: dot_blksize
1772 CHARACTER(LEN=*),
PARAMETER :: routinen =
'calc_Pab_degen'
1774 INTEGER :: ab_counter, avirt, col_global, col_local, counter, handle, handle2, iocc, my_a, &
1775 my_b, my_col_local, my_pcol, my_prow, ncol_local, nrow_local, num_ab_pairs, num_pe_col, &
1776 pcol, pcol_recv, pcol_send, proc_shift, recv_size, send_size, size_recv_buffer, &
1777 size_send_buffer, tag
1778 INTEGER,
DIMENSION(:),
POINTER :: col_indices, ncol_locals
1779 INTEGER,
DIMENSION(:, :),
POINTER :: blacs2mpi
1780 REAL(kind=dp) :: trace
1781 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:, :) :: buffer_recv, buffer_send
1782 TYPE(cp_blacs_env_type),
POINTER :: context
1783 TYPE(mp_para_env_type),
POINTER :: para_env
1785 CALL timeset(routinen, handle)
1787 CALL cp_fm_struct_get(fm_work_iap%matrix_struct, para_env=para_env, ncol_locals=ncol_locals, &
1788 ncol_local=ncol_local, col_indices=col_indices, &
1789 context=context, nrow_local=nrow_local)
1790 CALL context%get(my_process_row=my_prow, my_process_column=my_pcol, &
1791 number_of_process_columns=num_pe_col, blacs2mpi=blacs2mpi)
1793 num_ab_pairs =
SIZE(pair_list, 2)
1797 DO ab_counter = 1, num_ab_pairs
1799 my_a = pair_list(1, ab_counter)
1800 my_b = pair_list(2, ab_counter)
1804 DO col_local = 1, ncol_local
1805 col_global = col_indices(col_local)
1807 iocc = max(1, col_global - 1)/virtual + 1
1808 avirt = col_global - (iocc - 1)*virtual
1810 IF (avirt /= my_b) cycle
1811 pcol = fm_work_iap%matrix_struct%g2p_col((iocc - 1)*virtual + my_a)
1812 IF (pcol /= my_pcol) cycle
1813 my_col_local = fm_work_iap%matrix_struct%g2l_col((iocc - 1)*virtual + my_a)
1815 trace = trace + accurate_dot_product_2(fm_mat_s%local_data(:, my_col_local), fm_work_iap%local_data(:, col_local), &
1820 p_ab(ab_counter) = p_ab(ab_counter) &
1821 + trace*sinh_over_x(0.5_dp*(eigenval(my_a) - eigenval(my_b))*omega)*omega*weight
1825 IF (num_pe_col > 1)
THEN
1826 size_send_buffer = 0
1827 size_recv_buffer = 0
1828 DO proc_shift = 1, num_pe_col - 1
1829 pcol_send =
modulo(my_pcol + proc_shift, num_pe_col)
1830 pcol_recv =
modulo(my_pcol - proc_shift, num_pe_col)
1832 IF (
ALLOCATED(index2send(pcol_send)%array)) &
1833 size_send_buffer = max(size_send_buffer,
SIZE(index2send(pcol_send)%array))
1835 IF (
ALLOCATED(index2recv(pcol_recv)%array)) &
1836 size_recv_buffer = max(size_recv_buffer,
SIZE(index2recv(pcol_recv)%array))
1839 ALLOCATE (buffer_send(nrow_local, size_send_buffer), buffer_recv(nrow_local, size_recv_buffer))
1841 DO proc_shift = 1, num_pe_col - 1
1842 pcol_send =
modulo(my_pcol + proc_shift, num_pe_col)
1843 pcol_recv =
modulo(my_pcol - proc_shift, num_pe_col)
1847 IF (
ALLOCATED(index2send(pcol_send)%array)) send_size =
SIZE(index2send(pcol_send)%array)
1849 DO counter = 1, send_size
1850 buffer_send(:, counter) = fm_work_iap%local_data(:, index2send(pcol_send)%array(counter))
1854 IF (
ALLOCATED(index2recv(pcol_recv)%array)) recv_size =
SIZE(index2recv(pcol_recv)%array)
1855 IF (recv_size > 0)
THEN
1856 CALL timeset(routinen//
"_send", handle2)
1857 IF (send_size > 0)
THEN
1858 CALL para_env%sendrecv(buffer_send(:, :send_size), blacs2mpi(my_prow, pcol_send), &
1859 buffer_recv(:, :recv_size), blacs2mpi(my_prow, pcol_recv), tag)
1861 CALL para_env%recv(buffer_recv(:, :recv_size), blacs2mpi(my_prow, pcol_recv), tag)
1863 CALL timestop(handle2)
1865 DO ab_counter = 1, num_ab_pairs
1868 my_a = pair_list(1, ab_counter)
1869 my_b = pair_list(2, ab_counter)
1873 DO col_local = 1,
SIZE(index2recv(pcol_recv)%array)
1874 col_global = index2recv(pcol_recv)%array(col_local)
1876 iocc = max(1, col_global - 1)/virtual + 1
1877 avirt = col_global - (iocc - 1)*virtual
1878 IF (avirt /= my_b) cycle
1879 pcol = fm_work_iap%matrix_struct%g2p_col((iocc - 1)*virtual + my_a)
1880 IF (pcol /= my_pcol) cycle
1882 my_col_local = fm_work_iap%matrix_struct%g2l_col((iocc - 1)*virtual + my_a)
1884 trace = trace + accurate_dot_product_2(fm_mat_s%local_data(:, my_col_local), buffer_recv(:, col_local), &
1888 p_ab(ab_counter) = p_ab(ab_counter) &
1889 + trace*sinh_over_x(0.5_dp*(eigenval(my_a) - eigenval(my_b))*omega)*omega*weight
1892 ELSE IF (send_size > 0)
THEN
1893 CALL timeset(routinen//
"_send", handle2)
1894 CALL para_env%send(buffer_send(:, :send_size), blacs2mpi(my_prow, pcol_send), tag)
1895 CALL timestop(handle2)
1898 IF (
ALLOCATED(buffer_send))
DEALLOCATE (buffer_send)
1899 IF (
ALLOCATED(buffer_recv))
DEALLOCATE (buffer_recv)
1902 CALL timestop(handle)
1916 SUBROUTINE redistribute_fm_mat_s(index2send, index2recv, fm_mat_S, mat_S_3D, gd_homo, gd_virtual, mepos)
1917 TYPE(one_dim_int_array),
DIMENSION(0:),
INTENT(IN) :: index2send
1918 TYPE(two_dim_int_array),
DIMENSION(0:),
INTENT(IN) :: index2recv
1919 TYPE(cp_fm_type),
INTENT(IN) :: fm_mat_s
1920 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:, :, :), &
1921 INTENT(OUT) :: mat_s_3d
1922 TYPE(group_dist_d1_type),
INTENT(IN) :: gd_homo, gd_virtual
1923 INTEGER,
DIMENSION(2),
INTENT(IN) :: mepos
1925 CHARACTER(LEN=*),
PARAMETER :: routinen =
'redistribute_fm_mat_S'
1927 INTEGER :: col_local, handle, my_a, my_homo, my_i, my_pcol, my_prow, my_virtual, nrow_local, &
1928 num_pe_col, proc_recv, proc_send, proc_shift, recv_size, send_size, size_recv_buffer, &
1929 size_send_buffer, tag
1930 INTEGER,
DIMENSION(:, :),
POINTER :: blacs2mpi
1931 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:, :) :: buffer_recv, buffer_send
1932 TYPE(mp_para_env_type),
POINTER :: para_env
1934 CALL timeset(routinen, handle)
1938 CALL fm_mat_s%matrix_struct%context%get(my_process_row=my_prow, my_process_column=my_pcol, &
1939 number_of_process_columns=num_pe_col, blacs2mpi=blacs2mpi)
1941 CALL cp_fm_struct_get(fm_mat_s%matrix_struct, nrow_local=nrow_local, para_env=para_env)
1943 CALL get_group_dist(gd_homo, mepos(2), sizes=my_homo)
1944 CALL get_group_dist(gd_virtual, mepos(1), sizes=my_virtual)
1946 ALLOCATE (mat_s_3d(nrow_local, my_virtual, my_homo))
1948 IF (
ALLOCATED(index2send(my_pcol)%array))
THEN
1949 DO col_local = 1,
SIZE(index2send(my_pcol)%array)
1950 my_a = index2recv(my_pcol)%array(1, col_local)
1951 my_i = index2recv(my_pcol)%array(2, col_local)
1952 mat_s_3d(:, my_a, my_i) = fm_mat_s%local_data(:, index2send(my_pcol)%array(col_local))
1956 IF (num_pe_col > 1)
THEN
1957 size_send_buffer = 0
1958 size_recv_buffer = 0
1959 DO proc_shift = 1, num_pe_col - 1
1960 proc_send =
modulo(my_pcol + proc_shift, num_pe_col)
1961 proc_recv =
modulo(my_pcol - proc_shift, num_pe_col)
1964 IF (
ALLOCATED(index2send(proc_send)%array)) send_size =
SIZE(index2send(proc_send)%array)
1965 size_send_buffer = max(size_send_buffer, send_size)
1968 IF (
ALLOCATED(index2recv(proc_recv)%array)) recv_size =
SIZE(index2recv(proc_recv)%array)
1969 size_recv_buffer = max(size_recv_buffer, recv_size)
1973 ALLOCATE (buffer_send(nrow_local, size_send_buffer), buffer_recv(nrow_local, size_recv_buffer))
1975 DO proc_shift = 1, num_pe_col - 1
1976 proc_send =
modulo(my_pcol + proc_shift, num_pe_col)
1977 proc_recv =
modulo(my_pcol - proc_shift, num_pe_col)
1980 IF (
ALLOCATED(index2send(proc_send)%array)) send_size =
SIZE(index2send(proc_send)%array)
1981 DO col_local = 1, send_size
1982 buffer_send(:, col_local) = fm_mat_s%local_data(:, index2send(proc_send)%array(col_local))
1986 IF (
ALLOCATED(index2recv(proc_recv)%array)) recv_size =
SIZE(index2recv(proc_recv)%array, 2)
1987 IF (recv_size > 0)
THEN
1988 IF (send_size > 0)
THEN
1989 CALL para_env%sendrecv(buffer_send(:, :send_size), blacs2mpi(my_prow, proc_send), &
1990 buffer_recv(:, :recv_size), blacs2mpi(my_prow, proc_recv), tag)
1992 CALL para_env%recv(buffer_recv(:, :recv_size), blacs2mpi(my_prow, proc_recv), tag)
1995 DO col_local = 1, recv_size
1996 my_a = index2recv(proc_recv)%array(1, col_local)
1997 my_i = index2recv(proc_recv)%array(2, col_local)
1998 mat_s_3d(:, my_a, my_i) = buffer_recv(:, col_local)
2000 ELSE IF (send_size > 0)
THEN
2001 CALL para_env%send(buffer_send(:, :send_size), blacs2mpi(my_prow, proc_send), tag)
2006 IF (
ALLOCATED(buffer_send))
DEALLOCATE (buffer_send)
2007 IF (
ALLOCATED(buffer_recv))
DEALLOCATE (buffer_recv)
2010 CALL timestop(handle)
2028 color_sub, do_ri_sos_laplace_mp2, homo, virtual)
2030 TYPE(mp2_type),
INTENT(INOUT) :: mp2_env
2031 TYPE(mp_para_env_type),
INTENT(IN),
POINTER :: para_env_sub, para_env
2032 TYPE(qs_environment_type),
INTENT(IN),
POINTER :: qs_env
2033 TYPE(group_dist_d1_type) :: gd_array
2034 INTEGER,
INTENT(IN) :: color_sub
2035 LOGICAL,
INTENT(IN) :: do_ri_sos_laplace_mp2
2036 INTEGER,
DIMENSION(:),
INTENT(IN) :: homo, virtual
2038 CHARACTER(LEN=*),
PARAMETER :: routinen =
'rpa_grad_finalize'
2040 INTEGER :: dimen_ia, dimen_ri, handle, iib, ispin, my_group_l_end, my_group_l_size, &
2041 my_group_l_start, my_ia_end, my_ia_size, my_ia_start, my_p_end, my_p_size, my_p_start, &
2042 ngroup, nspins, pos_group, pos_sub, proc
2043 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: pos_info
2044 INTEGER,
ALLOCATABLE,
DIMENSION(:, :) :: group_grid_2_mepos, mepos_2_grid_group
2045 REAL(kind=dp) :: my_scale
2046 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:, :) :: gamma_2d
2047 TYPE(cp_blacs_env_type),
POINTER :: blacs_env
2048 TYPE(cp_fm_struct_type),
POINTER :: fm_struct
2049 TYPE(cp_fm_type) :: fm_g_p_ia, fm_pq, fm_pq_2, fm_pq_half, &
2050 fm_work_pq, fm_work_pq_2, fm_y, &
2052 TYPE(group_dist_d1_type) :: gd_array_new, gd_ia, gd_p, gd_p_new
2054 CALL timeset(routinen, handle)
2061 IF (do_ri_sos_laplace_mp2)
THEN
2062 my_scale = mp2_env%scale_s
2064 my_scale = -mp2_env%ri_rpa%scale_rpa/(2.0_dp*pi)
2065 IF (mp2_env%ri_rpa%minimax_quad) my_scale = my_scale/2.0_dp
2068 IF (do_ri_sos_laplace_mp2)
THEN
2069 CALL sos_mp2_grad_finalize(
rpa_grad%sos_mp2_work_occ,
rpa_grad%sos_mp2_work_virt, &
2070 para_env, para_env_sub, homo, virtual, mp2_env)
2072 CALL rpa_grad_work_finalize(
rpa_grad%rpa_work, mp2_env, homo, &
2073 virtual, para_env, para_env_sub)
2076 CALL get_qs_env(qs_env, blacs_env=blacs_env)
2078 CALL cp_fm_get_info(
rpa_grad%fm_Gamma_PQ, ncol_global=dimen_ri)
2081 CALL cp_fm_struct_create(fm_struct, context=blacs_env, nrow_global=dimen_ri, &
2082 ncol_global=dimen_ri, para_env=para_env)
2083 CALL cp_fm_create(fm_pq, fm_struct)
2084 CALL cp_fm_create(fm_work_pq, fm_struct)
2085 IF (.NOT. compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter))
THEN
2086 CALL cp_fm_create(fm_pq_2, fm_struct)
2088 CALL cp_fm_struct_release(fm_struct)
2089 CALL cp_fm_set_all(fm_pq, 0.0_dp)
2092 CALL dereplicate_and_sum_fm(
rpa_grad%fm_Gamma_PQ, fm_pq)
2094 ngroup = para_env%num_pe/para_env_sub%num_pe
2096 CALL prepare_redistribution(para_env, para_env_sub, ngroup, &
2097 group_grid_2_mepos, mepos_2_grid_group, pos_info=pos_info)
2100 CALL create_group_dist(gd_p, para_env_sub%num_pe, dimen_ri)
2101 CALL get_group_dist(gd_p, para_env_sub%mepos, my_p_start, my_p_end, my_p_size)
2103 CALL get_group_dist(gd_array, color_sub, my_group_l_start, my_group_l_end, my_group_l_size)
2105 CALL create_group_dist(gd_p_new, para_env%num_pe)
2106 CALL create_group_dist(gd_array_new, para_env%num_pe)
2108 DO proc = 0, para_env%num_pe - 1
2110 pos_group = proc/para_env_sub%num_pe
2112 pos_sub = pos_info(proc)
2114 CALL get_group_dist(gd_array, pos_group, gd_array_new, proc)
2115 CALL get_group_dist(gd_p, pos_sub, gd_p_new, proc)
2118 DEALLOCATE (pos_info)
2119 CALL release_group_dist(gd_p)
2121 CALL array2fm(mp2_env%ri_grad%PQ_half, fm_pq%matrix_struct, &
2122 my_p_start, my_p_end, &
2123 my_group_l_start, my_group_l_end, &
2124 gd_p_new, gd_array_new, &
2125 group_grid_2_mepos, para_env_sub%num_pe, ngroup, &
2128 IF (.NOT. compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter))
THEN
2129 CALL array2fm(mp2_env%ri_grad%operator_half, fm_pq%matrix_struct, my_p_start, my_p_end, &
2130 my_group_l_start, my_group_l_end, &
2131 gd_p_new, gd_array_new, &
2132 group_grid_2_mepos, para_env_sub%num_pe, ngroup, &
2137 CALL release_group_dist(gd_p_new)
2138 CALL release_group_dist(gd_array_new)
2140 IF (compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter))
THEN
2142 CALL parallel_gemm(transa=
"N", transb=
"T", m=dimen_ri, n=dimen_ri, k=dimen_ri, alpha=1.0_dp, &
2143 matrix_a=fm_pq, matrix_b=fm_pq_half, beta=0.0_dp, &
2144 matrix_c=fm_work_pq)
2146 CALL parallel_gemm(transa=
"N", transb=
"N", m=dimen_ri, n=dimen_ri, k=dimen_ri, alpha=-my_scale, &
2147 matrix_a=fm_pq_half, matrix_b=fm_work_pq, beta=0.0_dp, &
2150 CALL cp_fm_release(fm_work_pq)
2152 CALL parallel_gemm(transa=
"N", transb=
"T", m=dimen_ri, n=dimen_ri, k=dimen_ri, alpha=1.0_dp, &
2153 matrix_a=fm_pq, matrix_b=operator_half, beta=0.0_dp, &
2154 matrix_c=fm_work_pq)
2156 CALL parallel_gemm(transa=
"N", transb=
"N", m=dimen_ri, n=dimen_ri, k=dimen_ri, alpha=my_scale, &
2157 matrix_a=operator_half, matrix_b=fm_work_pq, beta=0.0_dp, &
2159 CALL cp_fm_release(operator_half)
2161 CALL cp_fm_create(fm_work_pq_2, fm_pq%matrix_struct, name=
"fm_Gamma_PQ_2")
2162 CALL parallel_gemm(transa=
"N", transb=
"N", m=dimen_ri, n=dimen_ri, k=dimen_ri, alpha=-my_scale, &
2163 matrix_a=fm_pq_half, matrix_b=fm_work_pq, beta=0.0_dp, &
2164 matrix_c=fm_work_pq_2)
2165 CALL cp_fm_to_fm(fm_work_pq_2, fm_pq_2)
2166 CALL cp_fm_geadd(1.0_dp,
"T", fm_work_pq_2, 1.0_dp, fm_pq_2)
2167 CALL cp_fm_release(fm_work_pq_2)
2168 CALL cp_fm_release(fm_work_pq)
2171 ALLOCATE (mp2_env%ri_grad%Gamma_PQ(my_p_size, my_group_l_size))
2172 CALL fm2array(mp2_env%ri_grad%Gamma_PQ, &
2173 my_p_size, my_p_start, my_p_end, &
2174 my_group_l_size, my_group_l_start, my_group_l_end, &
2175 group_grid_2_mepos, mepos_2_grid_group, &
2176 para_env_sub%num_pe, ngroup, &
2179 IF (.NOT. compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter))
THEN
2180 ALLOCATE (mp2_env%ri_grad%Gamma_PQ_2(my_p_size, my_group_l_size))
2181 CALL fm2array(mp2_env%ri_grad%Gamma_PQ_2, my_p_size, my_p_start, my_p_end, &
2182 my_group_l_size, my_group_l_start, my_group_l_end, &
2183 group_grid_2_mepos, mepos_2_grid_group, &
2184 para_env_sub%num_pe, ngroup, &
2189 ALLOCATE (mp2_env%ri_grad%G_P_ia(my_group_l_size, nspins))
2190 DO ispin = 1, nspins
2191 DO iib = 1, my_group_l_size
2192 NULLIFY (mp2_env%ri_grad%G_P_ia(iib, ispin)%matrix)
2197 DO ispin = 1, nspins
2199 CALL cp_fm_get_info(
rpa_grad%fm_Y(ispin), ncol_global=dimen_ia)
2201 CALL get_qs_env(qs_env, blacs_env=blacs_env)
2204 CALL cp_fm_struct_create(fm_struct, template_fmstruct=fm_pq_half%matrix_struct, nrow_global=dimen_ia)
2205 CALL cp_fm_create(fm_y, fm_struct)
2206 CALL cp_fm_struct_release(fm_struct)
2207 CALL cp_fm_set_all(fm_y, 0.0_dp)
2209 CALL dereplicate_and_sum_fm(
rpa_grad%fm_Y(ispin), fm_y)
2211 CALL cp_fm_create(fm_g_p_ia, fm_y%matrix_struct)
2212 CALL cp_fm_set_all(fm_g_p_ia, 0.0_dp)
2214 CALL parallel_gemm(transa=
"N", transb=
"T", m=dimen_ia, n=dimen_ri, k=dimen_ri, alpha=my_scale, &
2215 matrix_a=fm_y, matrix_b=fm_pq_half, beta=0.0_dp, &
2218 CALL cp_fm_release(fm_y)
2220 CALL create_group_dist(gd_ia, para_env_sub%num_pe, dimen_ia)
2221 CALL get_group_dist(gd_ia, para_env_sub%mepos, my_ia_start, my_ia_end, my_ia_size)
2223 CALL fm2array(gamma_2d, my_ia_size, my_ia_start, my_ia_end, &
2224 my_group_l_size, my_group_l_start, my_group_l_end, &
2225 group_grid_2_mepos, mepos_2_grid_group, &
2226 para_env_sub%num_pe, ngroup, &
2230 CALL create_dbcsr_gamma(gamma_2d, homo(ispin), virtual(ispin), dimen_ia, para_env_sub, &
2231 my_ia_start, my_ia_end, my_group_l_size, gd_ia, &
2232 mp2_env%ri_grad%G_P_ia(:, ispin), mp2_env%ri_grad%mo_coeff_o(ispin)%matrix)
2234 CALL release_group_dist(gd_ia)
2238 CALL cp_fm_release(fm_pq_half)
2240 CALL timestop(handle)
2254 SUBROUTINE sos_mp2_grad_finalize(sos_mp2_work_occ, sos_mp2_work_virt, para_env, para_env_sub, homo, virtual, mp2_env)
2255 TYPE(sos_mp2_grad_work_type),
ALLOCATABLE, &
2256 DIMENSION(:),
INTENT(INOUT) :: sos_mp2_work_occ, sos_mp2_work_virt
2257 TYPE(mp_para_env_type),
INTENT(IN),
POINTER :: para_env, para_env_sub
2258 INTEGER,
DIMENSION(:),
INTENT(IN) :: homo, virtual
2259 TYPE(mp2_type),
INTENT(INOUT) :: mp2_env
2261 CHARACTER(LEN=*),
PARAMETER :: routinen =
'sos_mp2_grad_finalize'
2263 INTEGER :: ab_counter, handle, ij_counter, ispin, &
2264 itmp(2), my_a, my_b, my_b_end, &
2265 my_b_size, my_b_start, my_i, my_j, &
2267 REAL(kind=dp) :: my_scale
2269 CALL timeset(routinen, handle)
2271 nspins =
SIZE(sos_mp2_work_occ)
2272 my_scale = mp2_env%scale_s
2274 DO ispin = 1, nspins
2275 DO pcol = 0,
SIZE(sos_mp2_work_occ(ispin)%index2send, 1) - 1
2276 IF (
ALLOCATED(sos_mp2_work_occ(ispin)%index2send(pcol)%array)) &
2277 DEALLOCATE (sos_mp2_work_occ(ispin)%index2send(pcol)%array)
2278 IF (
ALLOCATED(sos_mp2_work_occ(ispin)%index2send(pcol)%array)) &
2279 DEALLOCATE (sos_mp2_work_occ(ispin)%index2send(pcol)%array)
2280 IF (
ALLOCATED(sos_mp2_work_virt(ispin)%index2recv(pcol)%array)) &
2281 DEALLOCATE (sos_mp2_work_virt(ispin)%index2recv(pcol)%array)
2282 IF (
ALLOCATED(sos_mp2_work_virt(ispin)%index2recv(pcol)%array)) &
2283 DEALLOCATE (sos_mp2_work_virt(ispin)%index2recv(pcol)%array)
2285 DEALLOCATE (sos_mp2_work_occ(ispin)%index2send, &
2286 sos_mp2_work_occ(ispin)%index2recv, &
2287 sos_mp2_work_virt(ispin)%index2send, &
2288 sos_mp2_work_virt(ispin)%index2recv)
2292 DO ispin = 1, nspins
2293 CALL para_env%sum(sos_mp2_work_occ(ispin)%P)
2295 ALLOCATE (mp2_env%ri_grad%P_ij(ispin)%array(homo(ispin), homo(ispin)))
2296 mp2_env%ri_grad%P_ij(ispin)%array = 0.0_dp
2297 DO my_i = 1, homo(ispin)
2298 mp2_env%ri_grad%P_ij(ispin)%array(my_i, my_i) = my_scale*sos_mp2_work_occ(ispin)%P(my_i)
2300 DO ij_counter = 1,
SIZE(sos_mp2_work_occ(ispin)%pair_list, 2)
2301 my_i = sos_mp2_work_occ(ispin)%pair_list(1, ij_counter)
2302 my_j = sos_mp2_work_occ(ispin)%pair_list(2, ij_counter)
2304 mp2_env%ri_grad%P_ij(ispin)%array(my_i, my_j) = my_scale*sos_mp2_work_occ(ispin)%P(homo(ispin) + ij_counter)
2306 DEALLOCATE (sos_mp2_work_occ(ispin)%P, sos_mp2_work_occ(ispin)%pair_list)
2309 mp2_env%ri_grad%P_ij(ispin)%array(:, :) = 0.5_dp*(mp2_env%ri_grad%P_ij(ispin)%array + &
2310 transpose(mp2_env%ri_grad%P_ij(ispin)%array))
2313 CALL para_env%sum(sos_mp2_work_virt(ispin)%P)
2315 itmp = get_limit(virtual(ispin), para_env_sub%num_pe, para_env_sub%mepos)
2316 my_b_size = itmp(2) - itmp(1) + 1
2317 my_b_start = itmp(1)
2320 ALLOCATE (mp2_env%ri_grad%P_ab(ispin)%array(my_b_size, virtual(ispin)))
2321 mp2_env%ri_grad%P_ab(ispin)%array = 0.0_dp
2322 DO my_a = itmp(1), itmp(2)
2323 mp2_env%ri_grad%P_ab(ispin)%array(my_a - itmp(1) + 1, my_a) = my_scale*sos_mp2_work_virt(ispin)%P(my_a)
2325 DO ab_counter = 1,
SIZE(sos_mp2_work_virt(ispin)%pair_list, 2)
2326 my_a = sos_mp2_work_virt(ispin)%pair_list(1, ab_counter)
2327 my_b = sos_mp2_work_virt(ispin)%pair_list(2, ab_counter)
2329 IF (my_a >= itmp(1) .AND. my_a <= itmp(2)) mp2_env%ri_grad%P_ab(ispin)%array(my_a - itmp(1) + 1, my_b) = &
2330 my_scale*sos_mp2_work_virt(ispin)%P(virtual(ispin) + ab_counter)
2333 DEALLOCATE (sos_mp2_work_virt(ispin)%P, sos_mp2_work_virt(ispin)%pair_list)
2336 IF (para_env_sub%num_pe > 1)
THEN
2338 INTEGER :: send_a_start, send_a_end, send_a_size, &
2339 recv_a_start, recv_a_end, recv_a_size, proc_shift, proc_send, proc_recv
2340 REAL(kind=dp),
DIMENSION(:),
ALLOCATABLE,
TARGET :: buffer_send_1d
2341 REAL(kind=dp),
DIMENSION(:, :),
POINTER :: buffer_send
2342 REAL(kind=dp),
DIMENSION(:, :),
ALLOCATABLE :: buffer_recv
2343 TYPE(group_dist_d1_type) :: gd_virtual_sub
2345 CALL create_group_dist(gd_virtual_sub, para_env_sub%num_pe, virtual(ispin))
2347 mp2_env%ri_grad%P_ab(ispin)%array(:, my_b_start:my_b_end) = &
2348 0.5_dp*(mp2_env%ri_grad%P_ab(ispin)%array(:, my_b_start:my_b_end) &
2349 + transpose(mp2_env%ri_grad%P_ab(ispin)%array(:, my_b_start:my_b_end)))
2351 ALLOCATE (buffer_send_1d(my_b_size*maxsize(gd_virtual_sub)))
2352 ALLOCATE (buffer_recv(my_b_size, maxsize(gd_virtual_sub)))
2354 DO proc_shift = 1, para_env_sub%num_pe - 1
2356 proc_send =
modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
2357 proc_recv =
modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
2359 CALL get_group_dist(gd_virtual_sub, proc_send, send_a_start, send_a_end, send_a_size)
2360 CALL get_group_dist(gd_virtual_sub, proc_recv, recv_a_start, recv_a_end, recv_a_size)
2362 buffer_send(1:send_a_size, 1:my_b_size) => buffer_send_1d(1:my_b_size*send_a_size)
2364 buffer_send(:send_a_size, :) = transpose(mp2_env%ri_grad%P_ab(ispin)%array(:, send_a_start:send_a_end))
2365 CALL para_env_sub%sendrecv(buffer_send(:send_a_size, :), proc_send, &
2366 buffer_recv(:, :recv_a_size), proc_recv)
2368 mp2_env%ri_grad%P_ab(ispin)%array(:, recv_a_start:recv_a_end) = &
2369 0.5_dp*(mp2_env%ri_grad%P_ab(ispin)%array(:, recv_a_start:recv_a_end) + buffer_recv(:, 1:recv_a_size))
2373 DEALLOCATE (buffer_send_1d, buffer_recv)
2375 CALL release_group_dist(gd_virtual_sub)
2378 mp2_env%ri_grad%P_ab(ispin)%array(:, :) = 0.5_dp*(mp2_env%ri_grad%P_ab(ispin)%array + &
2379 transpose(mp2_env%ri_grad%P_ab(ispin)%array))
2383 DEALLOCATE (sos_mp2_work_occ, sos_mp2_work_virt)
2384 IF (nspins == 1)
THEN
2385 mp2_env%ri_grad%P_ij(1)%array(:, :) = 2.0_dp*mp2_env%ri_grad%P_ij(1)%array
2386 mp2_env%ri_grad%P_ab(1)%array(:, :) = 2.0_dp*mp2_env%ri_grad%P_ab(1)%array
2389 CALL timestop(handle)
2402 SUBROUTINE rpa_grad_work_finalize(rpa_work, mp2_env, homo, virtual, para_env, para_env_sub)
2403 TYPE(rpa_grad_work_type),
INTENT(INOUT) :: rpa_work
2404 TYPE(mp2_type),
INTENT(INOUT) :: mp2_env
2405 INTEGER,
DIMENSION(:),
INTENT(IN) :: homo, virtual
2406 TYPE(mp_para_env_type),
INTENT(IN),
POINTER :: para_env, para_env_sub
2408 CHARACTER(LEN=*),
PARAMETER :: routinen =
'rpa_grad_work_finalize'
2410 INTEGER :: handle, ispin, itmp(2), my_a_end, my_a_size, my_a_start, my_b_end, my_b_size, &
2411 my_b_start, my_i_end, my_i_size, my_i_start, nspins, proc, proc_recv, proc_send, &
2412 proc_shift, recv_a_end, recv_a_size, recv_a_start, recv_end, recv_start, send_a_end, &
2413 send_a_size, send_a_start, send_end, send_start, size_recv_buffer, size_send_buffer
2414 REAL(kind=dp) :: my_scale
2415 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:, :) :: buffer_recv, buffer_send
2416 TYPE(group_dist_d1_type) :: gd_a_sub, gd_virtual_sub
2418 CALL timeset(routinen, handle)
2421 my_scale = mp2_env%ri_rpa%scale_rpa/(2.0_dp*pi)
2422 IF (mp2_env%ri_rpa%minimax_quad) my_scale = my_scale/2.0_dp
2424 CALL cp_fm_release(rpa_work%fm_mat_Q_copy)
2426 DO ispin = 1, nspins
2427 DO proc = 0,
SIZE(rpa_work%index2send, 1) - 1
2428 IF (
ALLOCATED(rpa_work%index2send(proc, ispin)%array))
DEALLOCATE (rpa_work%index2send(proc, ispin)%array)
2429 IF (
ALLOCATED(rpa_work%index2recv(proc, ispin)%array))
DEALLOCATE (rpa_work%index2recv(proc, ispin)%array)
2432 DEALLOCATE (rpa_work%index2send, rpa_work%index2recv)
2434 DO ispin = 1, nspins
2435 CALL get_group_dist(rpa_work%gd_homo(ispin), rpa_work%mepos(2), my_i_start, my_i_end, my_i_size)
2436 CALL release_group_dist(rpa_work%gd_homo(ispin))
2438 ALLOCATE (mp2_env%ri_grad%P_ij(ispin)%array(homo(ispin), homo(ispin)))
2439 mp2_env%ri_grad%P_ij(ispin)%array = 0.0_dp
2440 mp2_env%ri_grad%P_ij(ispin)%array(my_i_start:my_i_end, :) = my_scale*rpa_work%P_ij(ispin)%array
2441 DEALLOCATE (rpa_work%P_ij(ispin)%array)
2442 CALL para_env%sum(mp2_env%ri_grad%P_ij(ispin)%array)
2445 mp2_env%ri_grad%P_ij(ispin)%array(:, :) = 0.5_dp*(mp2_env%ri_grad%P_ij(ispin)%array + &
2446 transpose(mp2_env%ri_grad%P_ij(ispin)%array))
2448 itmp = get_limit(virtual(ispin), para_env_sub%num_pe, para_env_sub%mepos)
2449 my_b_start = itmp(1)
2451 my_b_size = my_b_end - my_b_start + 1
2453 ALLOCATE (mp2_env%ri_grad%P_ab(ispin)%array(my_b_size, virtual(ispin)))
2454 mp2_env%ri_grad%P_ab(ispin)%array = 0.0_dp
2456 CALL get_group_dist(rpa_work%gd_virtual(ispin), rpa_work%mepos(1), my_a_start, my_a_end, my_a_size)
2457 CALL release_group_dist(rpa_work%gd_virtual(ispin))
2459 CALL create_group_dist(gd_a_sub, my_a_start, my_a_end, my_a_size, para_env_sub)
2461 CALL create_group_dist(gd_virtual_sub, para_env_sub%num_pe, virtual(ispin))
2464 send_start = max(1, my_b_start - my_a_start + 1)
2465 send_end = min(my_a_size, my_b_end - my_a_start + 1)
2468 recv_start = max(1, my_a_start - my_b_start + 1)
2469 recv_end = min(my_b_size, my_a_end - my_b_start + 1)
2471 mp2_env%ri_grad%P_ab(ispin)%array(recv_start:recv_end, :) = &
2472 my_scale*rpa_work%P_ab(ispin)%array(send_start:send_end, :)
2474 IF (para_env_sub%num_pe > 1)
THEN
2475 size_send_buffer = 0
2476 size_recv_buffer = 0
2477 DO proc_shift = 1, para_env_sub%num_pe - 1
2478 proc_send =
modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
2479 proc_recv =
modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
2481 CALL get_group_dist(gd_virtual_sub, proc_send, send_a_start, send_a_end)
2482 CALL get_group_dist(gd_a_sub, proc_recv, recv_a_start, recv_a_end)
2485 send_start = max(1, send_a_start - my_a_start + 1)
2486 send_end = min(my_a_size, send_a_end - my_a_start + 1)
2488 size_send_buffer = max(size_send_buffer, max(send_end - send_start + 1, 0))
2491 recv_start = max(1, recv_a_start - my_b_start + 1)
2492 recv_end = min(my_b_size, recv_a_end - my_b_start + 1)
2494 size_recv_buffer = max(size_recv_buffer, max(recv_end - recv_start + 1, 0))
2496 ALLOCATE (buffer_send(size_send_buffer, virtual(ispin)), buffer_recv(size_recv_buffer, virtual(ispin)))
2498 DO proc_shift = 1, para_env_sub%num_pe - 1
2499 proc_send =
modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
2500 proc_recv =
modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
2502 CALL get_group_dist(gd_virtual_sub, proc_send, send_a_start, send_a_end)
2503 CALL get_group_dist(gd_a_sub, proc_recv, recv_a_start, recv_a_end)
2506 send_start = max(1, send_a_start - my_a_start + 1)
2507 send_end = min(my_a_size, send_a_end - my_a_start + 1)
2508 buffer_send(1:max(send_end - send_start + 1, 0), :) = rpa_work%P_ab(ispin)%array(send_start:send_end, :)
2511 recv_start = max(1, recv_a_start - my_b_start + 1)
2512 recv_end = min(my_b_size, recv_a_end - my_b_start + 1)
2514 CALL para_env_sub%sendrecv(buffer_send(1:max(send_end - send_start + 1, 0), :), proc_send, &
2515 buffer_recv(1:max(recv_end - recv_start + 1, 0), :), proc_recv)
2517 mp2_env%ri_grad%P_ab(ispin)%array(recv_start:recv_end, :) = &
2518 mp2_env%ri_grad%P_ab(ispin)%array(recv_start:recv_end, :) + &
2519 my_scale*buffer_recv(1:max(recv_end - recv_start + 1, 0), :)
2523 IF (
ALLOCATED(buffer_send))
DEALLOCATE (buffer_send)
2524 IF (
ALLOCATED(buffer_recv))
DEALLOCATE (buffer_recv)
2526 DEALLOCATE (rpa_work%P_ab(ispin)%array)
2528 CALL release_group_dist(gd_a_sub)
2531 TYPE(mp_comm_type) :: comm_exchange
2532 CALL comm_exchange%from_split(para_env, para_env_sub%mepos)
2533 CALL comm_exchange%sum(mp2_env%ri_grad%P_ab(ispin)%array)
2534 CALL comm_exchange%free()
2538 IF (para_env_sub%num_pe > 1)
THEN
2540 REAL(kind=dp),
DIMENSION(:),
ALLOCATABLE,
TARGET :: buffer_send_1d
2541 REAL(kind=dp),
DIMENSION(:, :),
POINTER :: buffer_send
2542 REAL(kind=dp),
DIMENSION(:, :),
ALLOCATABLE :: buffer_recv
2544 mp2_env%ri_grad%P_ab(ispin)%array(:, my_b_start:my_b_end) = &
2545 0.5_dp*(mp2_env%ri_grad%P_ab(ispin)%array(:, my_b_start:my_b_end) &
2546 + transpose(mp2_env%ri_grad%P_ab(ispin)%array(:, my_b_start:my_b_end)))
2548 ALLOCATE (buffer_send_1d(my_b_size*maxsize(gd_virtual_sub)))
2549 ALLOCATE (buffer_recv(my_b_size, maxsize(gd_virtual_sub)))
2551 DO proc_shift = 1, para_env_sub%num_pe - 1
2553 proc_send =
modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
2554 proc_recv =
modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
2556 CALL get_group_dist(gd_virtual_sub, proc_send, send_a_start, send_a_end, send_a_size)
2557 CALL get_group_dist(gd_virtual_sub, proc_recv, recv_a_start, recv_a_end, recv_a_size)
2559 buffer_send(1:send_a_size, 1:my_b_size) => buffer_send_1d(1:my_b_size*send_a_size)
2561 buffer_send(:send_a_size, :) = transpose(mp2_env%ri_grad%P_ab(ispin)%array(:, send_a_start:send_a_end))
2562 CALL para_env_sub%sendrecv(buffer_send(:send_a_size, :), proc_send, &
2563 buffer_recv(:, :recv_a_size), proc_recv)
2565 mp2_env%ri_grad%P_ab(ispin)%array(:, recv_a_start:recv_a_end) = &
2566 0.5_dp*(mp2_env%ri_grad%P_ab(ispin)%array(:, recv_a_start:recv_a_end) + buffer_recv(:, 1:recv_a_size))
2570 DEALLOCATE (buffer_send_1d, buffer_recv)
2573 mp2_env%ri_grad%P_ab(ispin)%array(:, :) = 0.5_dp*(mp2_env%ri_grad%P_ab(ispin)%array + &
2574 transpose(mp2_env%ri_grad%P_ab(ispin)%array))
2577 CALL release_group_dist(gd_virtual_sub)
2580 DEALLOCATE (rpa_work%gd_homo, rpa_work%gd_virtual, rpa_work%P_ij, rpa_work%P_ab)
2581 IF (nspins == 1)
THEN
2582 mp2_env%ri_grad%P_ij(1)%array(:, :) = 2.0_dp*mp2_env%ri_grad%P_ij(1)%array
2583 mp2_env%ri_grad%P_ab(1)%array(:, :) = 2.0_dp*mp2_env%ri_grad%P_ab(1)%array
2586 CALL timestop(handle)
2594 SUBROUTINE dereplicate_and_sum_fm(fm_sub, fm_global)
2595 TYPE(cp_fm_type),
INTENT(INOUT) :: fm_sub, fm_global
2597 CHARACTER(LEN=*),
PARAMETER :: routinen =
'dereplicate_and_sum_fm'
2599 INTEGER :: col_local, elements2recv_col, elements2recv_row, elements2send_col, &
2600 elements2send_row, handle, handle2, mypcol_global, myprow_global, ncol_local_global, &
2601 ncol_local_sub, npcol_global, npcol_sub, nprow_global, nprow_sub, nrow_local_global, &
2602 nrow_local_sub, pcol_recv, pcol_send, proc_recv, proc_send, proc_send_global, proc_shift, &
2603 prow_recv, prow_send, row_local, tag
2604 INTEGER(int_8) :: size_recv_buffer, size_send_buffer
2605 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: data2recv_col, data2recv_row, &
2606 data2send_col, data2send_row, &
2608 INTEGER,
DIMENSION(:),
POINTER :: col_indices_global, col_indices_sub, &
2609 row_indices_global, row_indices_sub
2610 INTEGER,
DIMENSION(:, :),
POINTER :: blacs2mpi_global, blacs2mpi_sub, &
2611 mpi2blacs_global, mpi2blacs_sub
2612 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:),
TARGET :: recv_buffer_1d, send_buffer_1d
2613 REAL(kind=dp),
DIMENSION(:, :),
POINTER :: recv_buffer, send_buffer
2614 TYPE(mp_para_env_type),
POINTER :: para_env, para_env_sub
2615 TYPE(one_dim_int_array),
ALLOCATABLE,
DIMENSION(:) :: index2recv_col, index2recv_row, &
2616 index2send_col, index2send_row
2618 CALL timeset(routinen, handle)
2622 nprow_sub = fm_sub%matrix_struct%context%num_pe(1)
2623 npcol_sub = fm_sub%matrix_struct%context%num_pe(2)
2625 myprow_global = fm_global%matrix_struct%context%mepos(1)
2626 mypcol_global = fm_global%matrix_struct%context%mepos(2)
2627 nprow_global = fm_global%matrix_struct%context%num_pe(1)
2628 npcol_global = fm_global%matrix_struct%context%num_pe(2)
2630 CALL cp_fm_get_info(fm_sub, col_indices=col_indices_sub, row_indices=row_indices_sub, &
2631 nrow_local=nrow_local_sub, ncol_local=ncol_local_sub)
2632 CALL cp_fm_struct_get(fm_sub%matrix_struct, para_env=para_env_sub)
2633 CALL cp_fm_struct_get(fm_global%matrix_struct, para_env=para_env, &
2634 col_indices=col_indices_global, row_indices=row_indices_global, &
2635 nrow_local=nrow_local_global, ncol_local=ncol_local_global)
2636 CALL fm_sub%matrix_struct%context%get(blacs2mpi=blacs2mpi_sub, mpi2blacs=mpi2blacs_sub)
2637 CALL fm_global%matrix_struct%context%get(blacs2mpi=blacs2mpi_global, mpi2blacs=mpi2blacs_global)
2639 IF (para_env%num_pe /= para_env_sub%num_pe)
THEN
2641 TYPE(mp_comm_type) :: comm_exchange
2642 comm_exchange = fm_sub%matrix_struct%context%interconnect(para_env)
2643 CALL comm_exchange%sum(fm_sub%local_data)
2644 CALL comm_exchange%free()
2648 ALLOCATE (subgroup2mepos(0:para_env_sub%num_pe - 1))
2649 CALL para_env_sub%allgather(para_env%mepos, subgroup2mepos)
2651 CALL timeset(routinen//
"_data2", handle2)
2653 CALL get_elements2send_col(data2send_col, fm_global%matrix_struct, row_indices_sub, index2send_col)
2654 CALL get_elements2send_row(data2send_row, fm_global%matrix_struct, col_indices_sub, index2send_row)
2658 CALL get_elements2send_col(data2recv_col, fm_sub%matrix_struct, row_indices_global, index2recv_col)
2659 CALL get_elements2send_row(data2recv_row, fm_sub%matrix_struct, col_indices_global, index2recv_row)
2660 CALL timestop(handle2)
2662 CALL timeset(routinen//
"_local", handle2)
2664 prow_send = mpi2blacs_global(1, para_env%mepos)
2665 pcol_send = mpi2blacs_global(2, para_env%mepos)
2666 prow_recv = mpi2blacs_sub(1, para_env_sub%mepos)
2667 pcol_recv = mpi2blacs_sub(2, para_env_sub%mepos)
2668 elements2recv_col = data2recv_col(pcol_recv)
2669 elements2recv_row = data2recv_row(prow_recv)
2675 DO col_local = 1, elements2recv_col
2676 DO row_local = 1, elements2recv_row
2677 fm_global%local_data(index2recv_col(pcol_recv)%array(col_local), &
2678 index2recv_row(prow_recv)%array(row_local)) &
2679 = fm_sub%local_data(index2send_col(pcol_send)%array(row_local), &
2680 index2send_row(prow_send)%array(col_local))
2684 CALL timestop(handle2)
2686 IF (para_env_sub%num_pe > 1)
THEN
2687 size_send_buffer = 0_int_8
2688 size_recv_buffer = 0_int_8
2690 DO proc_shift = 1, para_env_sub%num_pe - 1
2691 proc_send =
modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
2692 proc_recv =
modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
2694 proc_send_global = subgroup2mepos(proc_send)
2695 prow_send = mpi2blacs_global(1, proc_send_global)
2696 pcol_send = mpi2blacs_global(2, proc_send_global)
2697 elements2send_col = data2send_col(pcol_send)
2698 elements2send_row = data2send_row(prow_send)
2700 size_send_buffer = max(size_send_buffer, int(elements2send_col, int_8)*elements2send_row)
2702 prow_recv = mpi2blacs_sub(1, proc_recv)
2703 pcol_recv = mpi2blacs_sub(2, proc_recv)
2704 elements2recv_col = data2recv_col(pcol_recv)
2705 elements2recv_row = data2recv_row(prow_recv)
2707 size_recv_buffer = max(size_recv_buffer, int(elements2recv_col, int_8)*elements2recv_row)
2709 ALLOCATE (send_buffer_1d(size_send_buffer), recv_buffer_1d(size_recv_buffer))
2712 DO proc_shift = 1, para_env_sub%num_pe - 1
2713 proc_send =
modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
2714 proc_recv =
modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
2716 proc_send_global = subgroup2mepos(proc_send)
2717 prow_send = mpi2blacs_global(1, proc_send_global)
2718 pcol_send = mpi2blacs_global(2, proc_send_global)
2719 elements2send_col = data2send_col(pcol_send)
2720 elements2send_row = data2send_row(prow_send)
2722 CALL timeset(routinen//
"_pack", handle2)
2725 send_buffer(1:elements2send_row, 1:elements2send_col) => send_buffer_1d(1:int(elements2send_row, int_8)*elements2send_col)
2729 DO row_local = 1, elements2send_col
2730 DO col_local = 1, elements2send_row
2731 send_buffer(col_local, row_local) = &
2732 fm_sub%local_data(index2send_col(pcol_send)%array(row_local), &
2733 index2send_row(prow_send)%array(col_local))
2737 CALL timestop(handle2)
2739 prow_recv = mpi2blacs_sub(1, proc_recv)
2740 pcol_recv = mpi2blacs_sub(2, proc_recv)
2741 elements2recv_col = data2recv_col(pcol_recv)
2742 elements2recv_row = data2recv_row(prow_recv)
2745 recv_buffer(1:elements2recv_col, 1:elements2recv_row) => recv_buffer_1d(1:int(elements2recv_row, int_8)*elements2recv_col)
2746 IF (
SIZE(recv_buffer) > 0_int_8)
THEN
2747 IF (
SIZE(send_buffer) > 0_int_8)
THEN
2748 CALL para_env_sub%sendrecv(send_buffer, proc_send, recv_buffer, proc_recv, tag)
2750 CALL para_env_sub%recv(recv_buffer, proc_recv, tag)
2753 CALL timeset(routinen//
"_unpack", handle2)
2757 DO col_local = 1, elements2recv_col
2758 DO row_local = 1, elements2recv_row
2759 fm_global%local_data(index2recv_col(pcol_recv)%array(col_local), &
2760 index2recv_row(prow_recv)%array(row_local)) &
2761 = recv_buffer(col_local, row_local)
2765 CALL timestop(handle2)
2766 ELSE IF (
SIZE(send_buffer) > 0_int_8)
THEN
2767 CALL para_env_sub%send(send_buffer, proc_send, tag)
2772 DEALLOCATE (data2send_col, data2send_row, data2recv_col, data2recv_row)
2773 DO proc_shift = 0, npcol_global - 1
2774 DEALLOCATE (index2send_col(proc_shift)%array)
2776 DO proc_shift = 0, npcol_sub - 1
2777 DEALLOCATE (index2recv_col(proc_shift)%array)
2779 DO proc_shift = 0, nprow_global - 1
2780 DEALLOCATE (index2send_row(proc_shift)%array)
2782 DO proc_shift = 0, nprow_sub - 1
2783 DEALLOCATE (index2recv_row(proc_shift)%array)
2785 DEALLOCATE (index2send_col, index2recv_col, index2send_row, index2recv_row)
2787 CALL cp_fm_release(fm_sub)
2789 CALL timestop(handle)
2791 END SUBROUTINE dereplicate_and_sum_fm
2800 SUBROUTINE get_elements2send_col(data2send, struct_global, indices_sub, index2send)
2801 INTEGER,
ALLOCATABLE,
DIMENSION(:),
INTENT(OUT) :: data2send
2802 TYPE(cp_fm_struct_type),
INTENT(INOUT) :: struct_global
2803 INTEGER,
DIMENSION(:),
INTENT(IN) :: indices_sub
2804 TYPE(one_dim_int_array),
ALLOCATABLE, &
2805 DIMENSION(:),
INTENT(OUT) :: index2send
2807 INTEGER :: i_global, i_local, np_global, proc
2809 CALL struct_global%context%get(number_of_process_columns=np_global)
2811 ALLOCATE (data2send(0:np_global - 1))
2813 DO i_local = 1,
SIZE(indices_sub)
2814 i_global = indices_sub(i_local)
2815 proc = struct_global%g2p_col(i_global)
2816 data2send(proc) = data2send(proc) + 1
2819 ALLOCATE (index2send(0:np_global - 1))
2820 DO proc = 0, np_global - 1
2821 ALLOCATE (index2send(proc)%array(data2send(proc)))
2823 index2send(proc)%array = -1
2827 DO i_local = 1,
SIZE(indices_sub)
2828 i_global = indices_sub(i_local)
2829 proc = struct_global%g2p_col(i_global)
2830 data2send(proc) = data2send(proc) + 1
2831 index2send(proc)%array(data2send(proc)) = i_local
2843 SUBROUTINE get_elements2send_row(data2send, struct_global, indices_sub, index2send)
2844 INTEGER,
ALLOCATABLE,
DIMENSION(:),
INTENT(OUT) :: data2send
2845 TYPE(cp_fm_struct_type),
INTENT(INOUT) :: struct_global
2846 INTEGER,
DIMENSION(:),
INTENT(IN) :: indices_sub
2847 TYPE(one_dim_int_array),
ALLOCATABLE, &
2848 DIMENSION(:),
INTENT(OUT) :: index2send
2850 INTEGER :: i_global, i_local, np_global, proc
2852 CALL struct_global%context%get(number_of_process_rows=np_global)
2854 ALLOCATE (data2send(0:np_global - 1))
2856 DO i_local = 1,
SIZE(indices_sub)
2857 i_global = indices_sub(i_local)
2858 proc = struct_global%g2p_row(i_global)
2859 data2send(proc) = data2send(proc) + 1
2862 ALLOCATE (index2send(0:np_global - 1))
2863 DO proc = 0, np_global - 1
2864 ALLOCATE (index2send(proc)%array(data2send(proc)))
2866 index2send(proc)%array = -1
2870 DO i_local = 1,
SIZE(indices_sub)
2871 i_global = indices_sub(i_local)
2872 proc = struct_global%g2p_row(i_global)
2873 data2send(proc) = data2send(proc) + 1
2874 index2send(proc)%array(data2send(proc)) = i_local
static GRID_HOST_DEVICE int modulo(int a, int m)
Equivalent of Fortran's MODULO, which always return a positive number. https://gcc....
various utilities that regard array of different kinds: output, allocation,... maybe it is not a good...
methods related to the blacs parallel environment
basic linear algebra operations for full matrices
subroutine, public cp_fm_geadd(alpha, trans, matrix_a, beta, matrix_b)
interface to BLACS geadd: matrix_b = beta*matrix_b + alpha*opt(matrix_a) where opt(matrix_a) can be e...
subroutine, public cp_fm_scale_and_add(alpha, matrix_a, beta, matrix_b)
calc A <- alpha*A + beta*B optimized for alpha == 1.0 (just add beta*B) and beta == 0....
subroutine, public cp_fm_uplo_to_full(matrix, work, uplo)
given a triangular matrix according to uplo, computes the corresponding full matrix
various cholesky decomposition related routines
subroutine, public cp_fm_cholesky_invert(matrix, n, info_out)
used to replace the cholesky decomposition by the inverse
represent the structure of a full matrix
subroutine, public cp_fm_struct_create(fmstruct, para_env, context, nrow_global, ncol_global, nrow_block, ncol_block, descriptor, first_p_pos, local_leading_dimension, template_fmstruct, square_blocks, force_block)
allocates and initializes a full matrix structure
subroutine, public cp_fm_struct_get(fmstruct, para_env, context, descriptor, ncol_block, nrow_block, nrow_global, ncol_global, first_p_pos, row_indices, col_indices, nrow_local, ncol_local, nrow_locals, ncol_locals, local_leading_dimension)
returns the values of various attributes of the matrix structure
subroutine, public cp_fm_struct_release(fmstruct)
releases a full matrix structure
represent a full matrix distributed on many processors
subroutine, public cp_fm_get_info(matrix, name, nrow_global, ncol_global, nrow_block, ncol_block, nrow_local, ncol_local, row_indices, col_indices, local_data, context, nrow_locals, ncol_locals, matrix_struct, para_env)
returns all kind of information about the full matrix
subroutine, public cp_fm_to_fm_submat_general(source, destination, nrows, ncols, s_firstrow, s_firstcol, d_firstrow, d_firstcol, global_context)
General copy of a submatrix of fm matrix to a submatrix of another fm matrix. The two matrices can ha...
subroutine, public cp_fm_set_all(matrix, alpha, beta)
set all elements of a matrix to the same value, and optionally the diagonal to a different one
subroutine, public cp_fm_create(matrix, matrix_struct, name, use_sp)
creates a new full matrix with the given structure
Counters to determine the performance of parallel DGEMMs.
subroutine, public dgemm_counter_start(dgemm_counter)
start timer of the counter
subroutine, public dgemm_counter_stop(dgemm_counter, size1, size2, size3)
stop timer of the counter and provide matrix sizes
Types to describe group distributions.
elemental integer function, public maxsize(this)
...
elemental integer function, public group_dist_proc(this, pos)
...
sums arrays of real/complex numbers with much reduced round-off as compared to a naive implementation...
Defines the basic variable types.
integer, parameter, public int_8
integer, parameter, public dp
2- and 3-center electron repulsion integral routines based on libint2 Currently available operators: ...
pure logical function, public compare_potential_types(potential1, potential2)
Helper function to compare libint_potential_types.
integer, parameter, public local_gemm_pu_gpu
Machine interface based on Fortran 2003 and POSIX.
subroutine, public m_memory(mem)
Returns the total amount of memory [bytes] in use, if known, zero otherwise.
subroutine, public m_flush(lunit)
flushes units if the &GLOBAL flag is set accordingly
Definition of mathematical constants and functions.
real(kind=dp), parameter, public pi
Interface to the message passing library MPI.
subroutine, public mp_waitany(requests, completed)
waits for completion of any of the given requests
type(mp_request_type), parameter, public mp_request_null
Routines to calculate MP2 energy with laplace approach.
subroutine, public calc_fm_mat_s_laplace(fm_mat_s, homo, virtual, eigenval, dajquad)
...
Routines for calculating RI-MP2 gradients.
subroutine, public prepare_redistribution(para_env, para_env_sub, ngroup, group_grid_2_mepos, mepos_2_grid_group, pos_info)
prepare array for redistribution
subroutine, public array2fm(mat2d, fm_struct, my_start_row, my_end_row, my_start_col, my_end_col, gd_row, gd_col, group_grid_2_mepos, ngroup_row, ngroup_col, fm_mat, integ_group_size, color_group, do_release_mat)
redistribute local part of array to fm
subroutine, public create_dbcsr_gamma(gamma_2d, homo, virtual, dimen_ia, para_env_sub, my_ia_start, my_ia_end, my_group_l_size, gd_ia, dbcsr_gamma, mo_coeff_o)
redistribute 2D representation of 3d tensor to a set of dbcsr matrices
subroutine, public fm2array(mat2d, my_rows, my_start_row, my_end_row, my_cols, my_start_col, my_end_col, group_grid_2_mepos, mepos_2_grid_group, ngroup_row, ngroup_col, fm_mat)
redistribute fm to local part of array
Types needed for MP2 calculations.
basic linear algebra operations for full matrixes
subroutine, public get_qs_env(qs_env, atomic_kind_set, qs_kind_set, cell, super_cell, cell_ref, use_ref_cell, kpoints, dft_control, mos, sab_orb, sab_all, qmmm, qmmm_periodic, sac_ae, sac_ppl, sac_lri, sap_ppnl, sab_vdw, sab_scp, sap_oce, sab_lrc, sab_se, sab_xtbe, sab_tbe, sab_core, sab_xb, sab_xtb_pp, sab_xtb_nonbond, sab_almo, sab_kp, sab_kp_nosym, particle_set, energy, force, matrix_h, matrix_h_im, matrix_ks, matrix_ks_im, matrix_vxc, run_rtp, rtp, matrix_h_kp, matrix_h_im_kp, matrix_ks_kp, matrix_ks_im_kp, matrix_vxc_kp, kinetic_kp, matrix_s_kp, matrix_w_kp, matrix_s_ri_aux_kp, matrix_s, matrix_s_ri_aux, matrix_w, matrix_p_mp2, matrix_p_mp2_admm, rho, rho_xc, pw_env, ewald_env, ewald_pw, active_space, mpools, input, para_env, blacs_env, scf_control, rel_control, kinetic, qs_charges, vppl, rho_core, rho_nlcc, rho_nlcc_g, ks_env, ks_qmmm_env, wf_history, scf_env, local_particles, local_molecules, distribution_2d, dbcsr_dist, molecule_kind_set, molecule_set, subsys, cp_subsys, oce, local_rho_set, rho_atom_set, task_list, task_list_soft, rho0_atom_set, rho0_mpole, rhoz_set, ecoul_1c, rho0_s_rs, rho0_s_gs, do_kpoints, has_unit_metric, requires_mo_derivs, mo_derivs, mo_loc_history, nkind, natom, nelectron_total, nelectron_spin, efield, neighbor_list_id, linres_control, xas_env, virial, cp_ddapc_env, cp_ddapc_ewald, outer_scf_history, outer_scf_ihistory, x_data, et_coupling, dftb_potential, results, se_taper, se_store_int_env, se_nddo_mpole, se_nonbond_env, admm_env, lri_env, lri_density, exstate_env, ec_env, harris_env, dispersion_env, gcp_env, vee, rho_external, external_vxc, mask, mp2_env, bs_env, kg_env, wanniercentres, atprop, ls_scf_env, do_transport, transport_env, v_hartree_rspace, s_mstruct_changed, rho_changed, potential_changed, forces_up_to_date, mscfg_env, almo_scf_env, gradient_history, variable_history, embed_pot, spin_embed_pot, polar_env, mos_last_converged, eeq, rhs)
Get the QUICKSTEP environment.
Routines to calculate RI-RPA and SOS-MP2 gradients.
integer, parameter spla_threshold
subroutine, public rpa_grad_finalize(rpa_grad, mp2_env, para_env_sub, para_env, qs_env, gd_array, color_sub, do_ri_sos_laplace_mp2, homo, virtual)
...
subroutine, public rpa_grad_copy_q(fm_mat_q, rpa_grad)
...
pure subroutine, public rpa_grad_needed_mem(homo, virtual, dimen_ri, mem_per_rank, mem_per_repl, do_ri_sos_laplace_mp2)
Calculates the necessary minimum memory for the Gradient code ion MiB.
subroutine, public rpa_grad_create(rpa_grad, fm_mat_q, fm_mat_s, homo, virtual, mp2_env, eigenval, unit_nr, do_ri_sos_laplace_mp2)
Creates the arrays of a rpa_grad_type.
subroutine, public rpa_grad_matrix_operations(mp2_env, rpa_grad, do_ri_sos_laplace_mp2, fm_mat_q, fm_mat_q_gemm, dgemm_counter, fm_mat_s, omega, homo, virtual, eigenval, weight, unit_nr)
...
Utility functions for RPA calculations.
subroutine, public remove_scaling_factor_rpa(fm_mat_s, virtual, eigenval_last, homo, omega_old)
...
subroutine, public calc_fm_mat_s_rpa(fm_mat_s, first_cycle, virtual, eigenval, homo, omega, omega_old)
...
All kind of helpful little routines.
pure integer function, dimension(2), public get_limit(m, n, me)
divide m entries into n parts, return size of part me
represent a pointer to a contiguous 1d array
represent a pointer to a contiguous 3d array
represent a blacs multidimensional parallel environment (for the mpi corrispective see cp_paratypes/m...
keeps the information about the structure of a full matrix
stores all the informations relevant to an mpi environment