14 USE iso_c_binding,
ONLY: c_null_ptr,&
40 USE kahan_sum,
ONLY: accurate_dot_product,&
41 accurate_dot_product_2
74 #include "./base/base_uses.f90"
80 CHARACTER(len=*),
PARAMETER,
PRIVATE :: moduleN =
'rpa_grad'
84 TYPE sos_mp2_grad_work_type
86 INTEGER,
DIMENSION(:, :),
ALLOCATABLE :: pair_list
87 TYPE(one_dim_int_array),
DIMENSION(:),
ALLOCATABLE :: index2send, index2recv
88 REAL(KIND=
dp),
DIMENSION(:),
ALLOCATABLE :: p
91 TYPE rpa_grad_work_type
92 TYPE(cp_fm_type) :: fm_mat_Q_copy = cp_fm_type()
93 TYPE(one_dim_int_array),
DIMENSION(:, :),
ALLOCATABLE :: index2send
94 TYPE(two_dim_int_array),
DIMENSION(:, :),
ALLOCATABLE :: index2recv
95 TYPE(group_dist_d1_type),
DIMENSION(:),
ALLOCATABLE :: gd_homo, gd_virtual
96 INTEGER,
DIMENSION(2) :: grid = -1, mepos = -1
97 TYPE(two_dim_real_array),
DIMENSION(:),
ALLOCATABLE :: P_ij, P_ab
102 TYPE(cp_fm_type) :: fm_Gamma_PQ = cp_fm_type()
103 TYPE(cp_fm_type),
DIMENSION(:),
ALLOCATABLE :: fm_Y
104 TYPE(sos_mp2_grad_work_type),
ALLOCATABLE,
DIMENSION(:) :: sos_mp2_work_occ, sos_mp2_work_virt
105 TYPE(rpa_grad_work_type) :: rpa_work
108 INTEGER,
PARAMETER :: spla_threshold = 128*128*128*2
109 INTEGER,
PARAMETER :: blksize_threshold = 4
123 PURE SUBROUTINE rpa_grad_needed_mem(homo, virtual, dimen_RI, mem_per_rank, mem_per_repl, do_ri_sos_laplace_mp2)
124 INTEGER,
DIMENSION(:),
INTENT(IN) :: homo, virtual
125 INTEGER,
INTENT(IN) :: dimen_ri
126 REAL(kind=
dp),
INTENT(INOUT) :: mem_per_rank, mem_per_repl
127 LOGICAL,
INTENT(IN) :: do_ri_sos_laplace_mp2
129 REAL(kind=
dp) :: mem_iak, mem_kl, mem_pab, mem_pij
131 mem_iak = sum(real(virtual, kind=
dp)*homo)*dimen_ri
132 mem_pij = sum(real(homo, kind=
dp)**2)
133 mem_pab = sum(real(virtual, kind=
dp)**2)
134 mem_kl = real(dimen_ri, kind=
dp)*dimen_ri
148 mem_per_rank = mem_per_rank + (mem_pij + mem_pab)*8.0_dp/(1024**2)
149 mem_per_repl = mem_per_repl + (mem_iak + 2.0_dp*mem_iak/
SIZE(homo) + mem_kl)*8.0_dp/(1024**2)
150 IF (.NOT. do_ri_sos_laplace_mp2)
THEN
151 mem_per_repl = mem_per_rank + (mem_iak/
SIZE(homo) + mem_kl)*8.0_dp/(1024**2)
169 homo, virtual, mp2_env, Eigenval, unit_nr, do_ri_sos_laplace_mp2)
170 TYPE(rpa_grad_type),
INTENT(OUT) ::
rpa_grad
171 TYPE(cp_fm_type),
INTENT(IN) :: fm_mat_q
172 TYPE(cp_fm_type),
DIMENSION(:),
INTENT(IN) :: fm_mat_s
173 INTEGER,
DIMENSION(:),
INTENT(IN) :: homo, virtual
174 TYPE(mp2_type),
INTENT(INOUT) :: mp2_env
175 REAL(kind=
dp),
DIMENSION(:, :),
INTENT(IN) :: eigenval
176 INTEGER,
INTENT(IN) :: unit_nr
177 LOGICAL,
INTENT(IN) :: do_ri_sos_laplace_mp2
179 CHARACTER(LEN=*),
PARAMETER :: routinen =
'rpa_grad_create'
181 INTEGER :: handle, ispin, nrow_local, nspins
183 CALL timeset(routinen, handle)
188 nspins =
SIZE(fm_mat_s)
195 IF (do_ri_sos_laplace_mp2)
THEN
196 CALL sos_mp2_work_type_create(
rpa_grad%sos_mp2_work_occ,
rpa_grad%sos_mp2_work_virt, &
197 unit_nr, eigenval, homo, virtual, mp2_env%ri_grad%eps_canonical, fm_mat_s)
199 CALL rpa_work_type_create(
rpa_grad%rpa_work, fm_mat_q, fm_mat_s, homo, virtual)
204 IF (mp2_env%ri_grad%dot_blksize < 1) mp2_env%ri_grad%dot_blksize = nrow_local
205 mp2_env%ri_grad%dot_blksize = min(mp2_env%ri_grad%dot_blksize, nrow_local)
206 IF (unit_nr > 0)
THEN
207 WRITE (unit_nr,
'(T3,A,T75,I6)')
'GRAD_INFO| Block size for the contraction:', mp2_env%ri_grad%dot_blksize
210 CALL fm_mat_s(1)%matrix_struct%para_env%sync()
212 CALL timestop(handle)
227 SUBROUTINE sos_mp2_work_type_create(sos_mp2_work_occ, sos_mp2_work_virt, unit_nr, &
228 Eigenval, homo, virtual, eps_degenerate, fm_mat_S)
229 TYPE(sos_mp2_grad_work_type),
ALLOCATABLE, &
230 DIMENSION(:),
INTENT(OUT) :: sos_mp2_work_occ, sos_mp2_work_virt
231 INTEGER,
INTENT(IN) :: unit_nr
232 REAL(kind=
dp),
DIMENSION(:, :),
INTENT(IN) :: eigenval
233 INTEGER,
DIMENSION(:),
INTENT(IN) :: homo, virtual
234 REAL(kind=
dp),
INTENT(IN) :: eps_degenerate
235 TYPE(cp_fm_type),
DIMENSION(:),
INTENT(IN) :: fm_mat_s
237 CHARACTER(LEN=*),
PARAMETER :: routinen =
'sos_mp2_work_type_create'
239 INTEGER :: handle, ispin, nspins
241 CALL timeset(routinen, handle)
243 nspins =
SIZE(fm_mat_s)
244 ALLOCATE (sos_mp2_work_occ(nspins), sos_mp2_work_virt(nspins))
247 CALL create_list_nearly_degen_pairs(eigenval(1:homo(ispin), ispin), &
248 eps_degenerate, sos_mp2_work_occ(ispin)%pair_list)
249 IF (unit_nr > 0)
WRITE (unit_nr,
"(T3,A,T75,i6)") &
250 "MO_INFO| Number of ij pairs below EPS_CANONICAL:",
SIZE(sos_mp2_work_occ(ispin)%pair_list, 2)
251 ALLOCATE (sos_mp2_work_occ(ispin)%P(homo(ispin) +
SIZE(sos_mp2_work_occ(ispin)%pair_list, 2)))
252 sos_mp2_work_occ(ispin)%P = 0.0_dp
253 CALL prepare_comm_pij(sos_mp2_work_occ(ispin), virtual(ispin), fm_mat_s(ispin))
255 CALL create_list_nearly_degen_pairs(eigenval(homo(ispin) + 1:, ispin), &
256 eps_degenerate, sos_mp2_work_virt(ispin)%pair_list)
257 IF (unit_nr > 0)
WRITE (unit_nr,
"(T3,A,T75,i6)") &
258 "MO_INFO| Number of ab pairs below EPS_CANONICAL:",
SIZE(sos_mp2_work_virt(ispin)%pair_list, 2)
259 ALLOCATE (sos_mp2_work_virt(ispin)%P(virtual(ispin) +
SIZE(sos_mp2_work_virt(ispin)%pair_list, 2)))
260 sos_mp2_work_virt(ispin)%P = 0.0_dp
261 CALL prepare_comm_pab(sos_mp2_work_virt(ispin), virtual(ispin), fm_mat_s(ispin))
264 CALL timestop(handle)
276 SUBROUTINE rpa_work_type_create(rpa_work, fm_mat_Q, fm_mat_S, homo, virtual)
277 TYPE(rpa_grad_work_type),
INTENT(OUT) :: rpa_work
278 TYPE(cp_fm_type),
INTENT(IN) :: fm_mat_q
279 TYPE(cp_fm_type),
DIMENSION(:),
INTENT(IN) :: fm_mat_s
280 INTEGER,
DIMENSION(:),
INTENT(IN) :: homo, virtual
282 CHARACTER(LEN=*),
PARAMETER :: routinen =
'rpa_work_type_create'
284 INTEGER :: avirt, col_global, col_local, first_p_pos(2), first_p_pos_col, handle, iocc, &
285 ispin, my_a, my_a_end, my_a_size, my_a_start, my_i, my_i_end, my_i_size, my_i_start, &
286 my_pcol, ncol_block, ncol_local, nspins, num_pe_col, proc_homo, proc_homo_send, &
287 proc_recv, proc_send, proc_virtual, proc_virtual_send
288 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: data2recv, data2send
289 INTEGER,
DIMENSION(:),
POINTER :: col_indices
291 CALL timeset(routinen, handle)
293 CALL cp_fm_create(rpa_work%fm_mat_Q_copy, matrix_struct=fm_mat_q%matrix_struct)
295 CALL fm_mat_s(1)%matrix_struct%context%get(number_of_process_columns=num_pe_col, my_process_column=my_pcol)
297 nspins =
SIZE(fm_mat_s)
299 ALLOCATE (rpa_work%index2send(0:num_pe_col - 1, nspins), &
300 rpa_work%index2recv(0:num_pe_col - 1, nspins), &
301 rpa_work%gd_homo(nspins), rpa_work%gd_virtual(nspins), &
302 data2send(0:num_pe_col - 1), data2recv(0:num_pe_col - 1), &
303 rpa_work%P_ij(nspins), rpa_work%P_ab(nspins))
306 proc_homo = max(1, ceiling(sqrt(real(num_pe_col, kind=
dp))))
307 DO WHILE (mod(num_pe_col, proc_homo) /= 0)
308 proc_homo = proc_homo - 1
310 proc_virtual = num_pe_col/proc_homo
312 rpa_work%grid(1) = proc_virtual
313 rpa_work%grid(2) = proc_homo
315 rpa_work%mepos(1) = mod(my_pcol, proc_virtual)
316 rpa_work%mepos(2) = my_pcol/proc_virtual
321 CALL create_group_dist(rpa_work%gd_homo(ispin), proc_homo, homo(ispin))
322 CALL create_group_dist(rpa_work%gd_virtual(ispin), proc_virtual, virtual(ispin))
324 CALL cp_fm_struct_get(fm_mat_s(ispin)%matrix_struct, ncol_local=ncol_local, col_indices=col_indices, &
325 first_p_pos=first_p_pos, ncol_block=ncol_block)
326 first_p_pos_col = first_p_pos(2)
330 DO col_local = 1, ncol_local
331 col_global = col_indices(col_local)
333 iocc = (col_global - 1)/virtual(ispin) + 1
334 avirt = col_global - (iocc - 1)*virtual(ispin)
339 proc_send = proc_homo_send*proc_virtual + proc_virtual_send
341 data2send(proc_send) = data2send(proc_send) + 1
344 DO proc_send = 0, num_pe_col - 1
345 ALLOCATE (rpa_work%index2send(proc_send, ispin)%array(data2send(proc_send)))
350 DO col_local = 1, ncol_local
351 col_global = col_indices(col_local)
353 iocc = (col_global - 1)/virtual(ispin) + 1
354 avirt = col_global - (iocc - 1)*virtual(ispin)
359 proc_send = proc_homo_send*proc_virtual + proc_virtual_send
361 data2send(proc_send) = data2send(proc_send) + 1
363 rpa_work%index2send(proc_send, ispin)%array(data2send(proc_send)) = col_local
367 CALL get_group_dist(rpa_work%gd_homo(ispin), my_pcol/proc_virtual, my_i_start, my_i_end, my_i_size)
368 CALL get_group_dist(rpa_work%gd_virtual(ispin), mod(my_pcol, proc_virtual), my_a_start, my_a_end, my_a_size)
371 DO my_i = my_i_start, my_i_end
372 DO my_a = my_a_start, my_a_end
373 proc_recv =
cp_fm_indxg2p((my_i - 1)*virtual(ispin) + my_a, ncol_block, 0, first_p_pos_col, num_pe_col)
374 data2recv(proc_recv) = data2recv(proc_recv) + 1
378 DO proc_recv = 0, num_pe_col - 1
379 ALLOCATE (rpa_work%index2recv(proc_recv, ispin)%array(2, data2recv(proc_recv)))
383 DO my_i = my_i_start, my_i_end
384 DO my_a = my_a_start, my_a_end
385 proc_recv =
cp_fm_indxg2p((my_i - 1)*virtual(ispin) + my_a, ncol_block, 0, first_p_pos_col, num_pe_col)
386 data2recv(proc_recv) = data2recv(proc_recv) + 1
388 rpa_work%index2recv(proc_recv, ispin)%array(2, data2recv(proc_recv)) = my_i - my_i_start + 1
389 rpa_work%index2recv(proc_recv, ispin)%array(1, data2recv(proc_recv)) = my_a - my_a_start + 1
393 ALLOCATE (rpa_work%P_ij(ispin)%array(my_i_size, homo(ispin)), &
394 rpa_work%P_ab(ispin)%array(my_a_size, virtual(ispin)))
395 rpa_work%P_ij(ispin)%array(:, :) = 0.0_dp
396 rpa_work%P_ab(ispin)%array(:, :) = 0.0_dp
400 DEALLOCATE (data2send, data2recv)
402 CALL timestop(handle)
412 SUBROUTINE create_list_nearly_degen_pairs(Eigenval, eps_degen, pair_list)
413 REAL(kind=
dp),
DIMENSION(:),
INTENT(IN) :: eigenval
414 REAL(kind=
dp),
INTENT(IN) :: eps_degen
415 INTEGER,
ALLOCATABLE,
DIMENSION(:, :),
INTENT(OUT) :: pair_list
417 INTEGER :: my_i, my_j, num_orbitals, num_pairs, &
420 num_orbitals =
SIZE(eigenval)
425 DO my_i = 1, num_orbitals
426 DO my_j = 1, num_orbitals
427 IF (my_i == my_j) cycle
428 IF (abs(eigenval(my_i) - eigenval(my_j)) < eps_degen) num_pairs = num_pairs + 1
431 ALLOCATE (pair_list(2, num_pairs))
435 DO my_i = 1, num_orbitals
436 DO my_j = 1, num_orbitals
437 IF (my_i == my_j) cycle
438 IF (abs(eigenval(my_i) - eigenval(my_j)) < eps_degen)
THEN
439 pair_list(1, pair_counter) = my_i
440 pair_list(2, pair_counter) = my_j
441 pair_counter = pair_counter + 1
446 END SUBROUTINE create_list_nearly_degen_pairs
454 SUBROUTINE prepare_comm_pij(sos_mp2_work, virtual, fm_mat_S)
455 TYPE(sos_mp2_grad_work_type),
INTENT(INOUT) :: sos_mp2_work
456 INTEGER,
INTENT(IN) :: virtual
457 TYPE(cp_fm_type),
INTENT(IN) :: fm_mat_s
459 CHARACTER(LEN=*),
PARAMETER :: routinen =
'prepare_comm_Pij'
461 INTEGER :: avirt, col_global, col_local, counter, first_p_pos(2), first_p_pos_col, handle, &
462 ij_counter, iocc, my_i, my_j, my_pcol, my_prow, ncol_block, ncol_local, nrow_local, &
463 num_ij_pairs, num_pe_col, pcol, pcol_recv, pcol_send, proc_shift, tag
464 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: data2recv, data2send
465 INTEGER,
DIMENSION(:),
POINTER :: col_indices, ncol_locals
466 INTEGER,
DIMENSION(:, :),
POINTER :: blacs2mpi
467 TYPE(cp_blacs_env_type),
POINTER :: context
468 TYPE(mp_comm_type) :: comm_exchange
469 TYPE(mp_para_env_type),
POINTER :: para_env
471 CALL timeset(routinen, handle)
475 CALL fm_mat_s%matrix_struct%context%get(number_of_process_columns=num_pe_col)
476 ALLOCATE (sos_mp2_work%index2send(0:num_pe_col - 1), &
477 sos_mp2_work%index2recv(0:num_pe_col - 1))
479 ALLOCATE (data2send(0:num_pe_col - 1))
480 ALLOCATE (data2recv(0:num_pe_col - 1))
482 CALL cp_fm_struct_get(fm_mat_s%matrix_struct, para_env=para_env, ncol_locals=ncol_locals, &
483 ncol_local=ncol_local, col_indices=col_indices, &
484 context=context, ncol_block=ncol_block, first_p_pos=first_p_pos, nrow_local=nrow_local)
485 CALL context%get(my_process_row=my_prow, my_process_column=my_pcol, &
487 first_p_pos_col = first_p_pos(2)
489 num_ij_pairs =
SIZE(sos_mp2_work%pair_list, 2)
491 IF (num_ij_pairs > 0)
THEN
493 CALL comm_exchange%from_split(para_env, my_prow)
498 DO proc_shift = 0, num_pe_col - 1
499 pcol_send =
modulo(my_pcol + proc_shift, num_pe_col)
502 DO col_local = 1, ncol_local
503 col_global = col_indices(col_local)
505 iocc = max(1, col_global - 1)/virtual + 1
506 avirt = col_global - (iocc - 1)*virtual
508 DO ij_counter = 1, num_ij_pairs
510 my_i = sos_mp2_work%pair_list(1, ij_counter)
511 my_j = sos_mp2_work%pair_list(2, ij_counter)
513 IF (iocc /= my_j) cycle
514 pcol =
cp_fm_indxg2p((my_i - 1)*virtual + avirt, ncol_block, 0, first_p_pos_col, num_pe_col)
515 IF (pcol /= pcol_send) cycle
517 counter = counter + 1
523 data2send(pcol_send) = counter
526 CALL comm_exchange%alltoall(data2send, data2recv, 1)
528 DO proc_shift = 0, num_pe_col - 1
529 pcol_send =
modulo(my_pcol + proc_shift, num_pe_col)
530 pcol_recv =
modulo(my_pcol - proc_shift, num_pe_col)
533 ALLOCATE (sos_mp2_work%index2send(pcol_send)%array(data2send(pcol_send)))
536 DO col_local = 1, ncol_local
537 col_global = col_indices(col_local)
539 iocc = max(1, col_global - 1)/virtual + 1
540 avirt = col_global - (iocc - 1)*virtual
542 DO ij_counter = 1, num_ij_pairs
544 my_i = sos_mp2_work%pair_list(1, ij_counter)
545 my_j = sos_mp2_work%pair_list(2, ij_counter)
547 IF (iocc /= my_j) cycle
548 pcol =
cp_fm_indxg2p((my_i - 1)*virtual + avirt, ncol_block, 0, first_p_pos_col, num_pe_col)
549 IF (pcol /= pcol_send) cycle
551 counter = counter + 1
553 sos_mp2_work%index2send(pcol_send)%array(counter) = col_global
560 ALLOCATE (sos_mp2_work%index2recv(pcol_recv)%array(data2recv(pcol_recv)))
562 CALL para_env%sendrecv(sos_mp2_work%index2send(pcol_send)%array, blacs2mpi(my_prow, pcol_send), &
563 sos_mp2_work%index2recv(pcol_recv)%array, blacs2mpi(my_prow, pcol_recv), tag)
566 DO counter = 1, data2send(pcol_send)
567 sos_mp2_work%index2send(pcol_send)%array(counter) = &
568 cp_fm_indxg2l(sos_mp2_work%index2send(pcol_send)%array(counter), &
569 ncol_block, 0, first_p_pos_col, num_pe_col)
573 CALL comm_exchange%free()
576 DEALLOCATE (data2send, data2recv)
578 CALL timestop(handle)
588 SUBROUTINE prepare_comm_pab(sos_mp2_work, virtual, fm_mat_S)
589 TYPE(sos_mp2_grad_work_type),
INTENT(INOUT) :: sos_mp2_work
590 INTEGER,
INTENT(IN) :: virtual
591 TYPE(cp_fm_type),
INTENT(IN) :: fm_mat_s
593 CHARACTER(LEN=*),
PARAMETER :: routinen =
'prepare_comm_Pab'
595 INTEGER :: ab_counter, avirt, col_global, col_local, counter, first_p_pos(2), &
596 first_p_pos_col, handle, iocc, my_a, my_b, my_pcol, my_prow, ncol_block, ncol_local, &
597 nrow_local, num_ab_pairs, num_pe_col, pcol, pcol_recv, pcol_send, proc_shift, tag
598 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: data2recv, data2send
599 INTEGER,
DIMENSION(:),
POINTER :: col_indices, ncol_locals
600 INTEGER,
DIMENSION(:, :),
POINTER :: blacs2mpi
601 TYPE(cp_blacs_env_type),
POINTER :: context
602 TYPE(mp_comm_type) :: comm_exchange
603 TYPE(mp_para_env_type),
POINTER :: para_env
605 CALL timeset(routinen, handle)
609 CALL fm_mat_s%matrix_struct%context%get(number_of_process_columns=num_pe_col)
610 ALLOCATE (sos_mp2_work%index2send(0:num_pe_col - 1), &
611 sos_mp2_work%index2recv(0:num_pe_col - 1))
613 num_ab_pairs =
SIZE(sos_mp2_work%pair_list, 2)
614 IF (num_ab_pairs > 0)
THEN
616 CALL cp_fm_struct_get(fm_mat_s%matrix_struct, para_env=para_env, ncol_locals=ncol_locals, &
617 ncol_local=ncol_local, col_indices=col_indices, &
618 context=context, ncol_block=ncol_block, first_p_pos=first_p_pos, nrow_local=nrow_local)
619 CALL context%get(my_process_row=my_prow, my_process_column=my_pcol, &
621 first_p_pos_col = first_p_pos(2)
623 CALL comm_exchange%from_split(para_env, my_prow)
625 ALLOCATE (data2send(0:num_pe_col - 1))
626 ALLOCATE (data2recv(0:num_pe_col - 1))
630 DO proc_shift = 0, num_pe_col - 1
631 pcol_send =
modulo(my_pcol + proc_shift, num_pe_col)
632 pcol_recv =
modulo(my_pcol - proc_shift, num_pe_col)
635 DO col_local = 1, ncol_local
636 col_global = col_indices(col_local)
638 iocc = max(1, col_global - 1)/virtual + 1
639 avirt = col_global - (iocc - 1)*virtual
641 DO ab_counter = 1, num_ab_pairs
643 my_a = sos_mp2_work%pair_list(1, ab_counter)
644 my_b = sos_mp2_work%pair_list(2, ab_counter)
646 IF (avirt /= my_b) cycle
647 pcol =
cp_fm_indxg2p((iocc - 1)*virtual + my_a, ncol_block, 0, first_p_pos_col, num_pe_col)
648 IF (pcol /= pcol_send) cycle
650 counter = counter + 1
656 data2send(pcol_send) = counter
659 CALL comm_exchange%alltoall(data2send, data2recv, 1)
661 DO proc_shift = 0, num_pe_col - 1
662 pcol_send =
modulo(my_pcol + proc_shift, num_pe_col)
663 pcol_recv =
modulo(my_pcol - proc_shift, num_pe_col)
666 ALLOCATE (sos_mp2_work%index2send(pcol_send)%array(data2send(pcol_send)))
669 DO col_local = 1, ncol_local
670 col_global = col_indices(col_local)
672 iocc = max(1, col_global - 1)/virtual + 1
673 avirt = col_global - (iocc - 1)*virtual
675 DO ab_counter = 1, num_ab_pairs
677 my_a = sos_mp2_work%pair_list(1, ab_counter)
678 my_b = sos_mp2_work%pair_list(2, ab_counter)
680 IF (avirt /= my_b) cycle
681 pcol =
cp_fm_indxg2p((iocc - 1)*virtual + my_a, ncol_block, 0, first_p_pos_col, num_pe_col)
682 IF (pcol /= pcol_send) cycle
684 counter = counter + 1
686 sos_mp2_work%index2send(pcol_send)%array(counter) = col_global
693 ALLOCATE (sos_mp2_work%index2recv(pcol_recv)%array(data2recv(pcol_recv)))
695 CALL para_env%sendrecv(sos_mp2_work%index2send(pcol_send)%array, blacs2mpi(my_prow, pcol_send), &
696 sos_mp2_work%index2recv(pcol_recv)%array, blacs2mpi(my_prow, pcol_recv), tag)
699 DO counter = 1, data2send(pcol_send)
700 sos_mp2_work%index2send(pcol_send)%array(counter) = &
701 cp_fm_indxg2l(sos_mp2_work%index2send(pcol_send)%array(counter), &
702 ncol_block, 0, first_p_pos_col, num_pe_col)
706 CALL comm_exchange%free()
707 DEALLOCATE (data2send, data2recv)
711 CALL timestop(handle)
721 TYPE(cp_fm_type),
INTENT(IN) :: fm_mat_q
722 TYPE(rpa_grad_type),
INTENT(INOUT) ::
rpa_grad
724 CALL cp_fm_to_fm(fm_mat_q,
rpa_grad%rpa_work%fm_mat_Q_copy)
745 dgemm_counter, fm_mat_S, omega, homo, virtual, Eigenval, weight, unit_nr)
746 TYPE(mp2_type),
INTENT(INOUT) :: mp2_env
747 TYPE(rpa_grad_type),
INTENT(INOUT) ::
rpa_grad
748 LOGICAL,
INTENT(IN) :: do_ri_sos_laplace_mp2
749 TYPE(cp_fm_type),
DIMENSION(:),
INTENT(IN) :: fm_mat_q, fm_mat_q_gemm
750 TYPE(dgemm_counter_type),
INTENT(INOUT) :: dgemm_counter
751 TYPE(cp_fm_type),
DIMENSION(:),
INTENT(IN) :: fm_mat_s
752 REAL(kind=
dp),
INTENT(IN) :: omega
753 INTEGER,
DIMENSION(:),
INTENT(IN) :: homo, virtual
754 REAL(kind=
dp),
DIMENSION(:, :),
INTENT(IN) :: eigenval
755 REAL(kind=
dp),
INTENT(IN) :: weight
756 INTEGER,
INTENT(IN) :: unit_nr
758 CHARACTER(LEN=*),
PARAMETER :: routinen =
'rpa_grad_matrix_operations'
760 INTEGER :: col_global, col_local, dimen_ia, &
761 dimen_ri, handle, handle2, ispin, &
762 jspin, ncol_local, nrow_local, nspins, &
764 INTEGER,
DIMENSION(:),
POINTER :: col_indices, row_indices
765 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:, :, :), &
766 TARGET :: mat_s_3d, mat_work_iap_3d
767 TYPE(cp_fm_type) :: fm_work_iap, fm_work_pq
769 CALL timeset(routinen, handle)
771 nspins =
SIZE(fm_mat_q)
773 CALL cp_fm_get_info(fm_mat_q(1), nrow_global=dimen_ri, nrow_local=nrow_local, ncol_local=ncol_local, &
774 col_indices=col_indices, row_indices=row_indices)
776 IF (.NOT. do_ri_sos_laplace_mp2)
THEN
777 CALL cp_fm_create(fm_work_pq, fm_mat_q(1)%matrix_struct)
784 CALL cp_fm_release(fm_work_pq)
786 DO col_local = 1, ncol_local
787 col_global = col_indices(col_local)
788 DO row_local = 1, nrow_local
789 IF (col_global == row_indices(row_local))
THEN
790 fm_mat_q(1)%local_data(row_local, col_local) = fm_mat_q(1)%local_data(row_local, col_local) - 1.0_dp
796 CALL timeset(routinen//
"_PQ", handle2)
798 CALL parallel_gemm(transa=
"N", transb=
"N", m=dimen_ri, n=dimen_ri, k=dimen_ri, alpha=weight, &
799 matrix_a=
rpa_grad%rpa_work%fm_mat_Q_copy, matrix_b=fm_mat_q(1), beta=1.0_dp, &
802 CALL timestop(handle2)
805 fm_mat_q_gemm(1)%matrix_struct%context)
809 IF (do_ri_sos_laplace_mp2)
THEN
811 jspin = nspins - ispin + 1
817 IF (do_ri_sos_laplace_mp2)
THEN
818 CALL timeset(routinen//
"_PQ", handle2)
820 CALL parallel_gemm(transa=
"N", transb=
"N", m=dimen_ri, n=dimen_ri, k=dimen_ri, alpha=weight, &
821 matrix_a=fm_mat_q(ispin), matrix_b=fm_mat_q(jspin), beta=1.0_dp, &
824 CALL timestop(handle2)
827 fm_mat_q_gemm(jspin)%matrix_struct%context)
829 CALL calc_fm_mat_s_rpa(fm_mat_s(ispin), .true., virtual(ispin), eigenval(:, ispin), &
830 homo(ispin), omega, 0.0_dp)
833 CALL timeset(routinen//
"_contr_S", handle2)
839 CALL parallel_gemm(transa=
"N", transb=
"N", m=dimen_ri, n=dimen_ia, k=dimen_ri, alpha=1.0_dp, &
840 matrix_a=fm_mat_q_gemm(jspin), matrix_b=fm_mat_s(ispin), beta=0.0_dp, &
841 matrix_c=fm_work_iap)
843 CALL timestop(handle2)
845 IF (do_ri_sos_laplace_mp2)
THEN
846 CALL calc_p_sos_mp2(homo(ispin), fm_mat_s(ispin), fm_work_iap, &
848 omega, weight, virtual(ispin), eigenval(:, ispin), mp2_env%ri_grad%dot_blksize)
854 CALL cp_fm_release(fm_work_iap)
860 CALL redistribute_fm_mat_s(
rpa_grad%rpa_work%index2send(:, ispin),
rpa_grad%rpa_work%index2recv(:, ispin), &
861 fm_work_iap, mat_work_iap_3d, &
864 CALL cp_fm_release(fm_work_iap)
866 CALL redistribute_fm_mat_s(
rpa_grad%rpa_work%index2send(:, ispin),
rpa_grad%rpa_work%index2recv(:, ispin), &
867 fm_mat_s(ispin), mat_s_3d, &
872 CALL calc_p_rpa(mat_s_3d, mat_work_iap_3d,
rpa_grad%rpa_work%gd_homo(ispin),
rpa_grad%rpa_work%gd_virtual(ispin), &
874 fm_mat_s(ispin)%matrix_struct, &
876 weight, omega, eigenval(:, ispin), homo(ispin), unit_nr, mp2_env)
878 DEALLOCATE (mat_work_iap_3d, mat_s_3d)
886 CALL timestop(handle)
903 SUBROUTINE calc_p_sos_mp2(homo, fm_mat_S, fm_work_iaP, sos_mp2_work_occ, sos_mp2_work_virt, &
904 omega, weight, virtual, Eigenval, dot_blksize)
905 INTEGER,
INTENT(IN) :: homo
906 TYPE(cp_fm_type),
INTENT(IN) :: fm_mat_s, fm_work_iap
907 TYPE(sos_mp2_grad_work_type),
INTENT(INOUT) :: sos_mp2_work_occ, sos_mp2_work_virt
908 REAL(kind=
dp),
INTENT(IN) :: omega, weight
909 INTEGER,
INTENT(IN) :: virtual
910 REAL(kind=
dp),
DIMENSION(:),
INTENT(IN) :: eigenval
911 INTEGER,
INTENT(IN) :: dot_blksize
913 CHARACTER(LEN=*),
PARAMETER :: routinen =
'calc_P_sos_mp2'
915 INTEGER :: avirt, col_global, col_local, handle, &
916 handle2, iocc, my_a, my_i, ncol_local, &
917 nrow_local, num_ab_pairs, num_ij_pairs
918 INTEGER,
DIMENSION(:),
POINTER :: col_indices
919 REAL(kind=
dp) :: ddot, trace
921 CALL timeset(routinen, handle)
923 CALL cp_fm_get_info(fm_mat_s, col_indices=col_indices, ncol_local=ncol_local, nrow_local=nrow_local)
925 CALL timeset(routinen//
"_Pij_diag", handle2)
931 DO col_local = 1, ncol_local
932 col_global = col_indices(col_local)
934 iocc = max(1, col_global - 1)/virtual + 1
935 avirt = col_global - (iocc - 1)*virtual
937 IF (iocc == my_i) trace = trace + &
938 ddot(nrow_local, fm_mat_s%local_data(:, col_local), 1, fm_work_iap%local_data(:, col_local), 1)
941 sos_mp2_work_occ%P(my_i) = sos_mp2_work_occ%P(my_i) - trace*omega*weight
944 CALL timestop(handle2)
946 CALL timeset(routinen//
"_Pab_diag", handle2)
952 DO col_local = 1, ncol_local
953 col_global = col_indices(col_local)
955 iocc = max(1, col_global - 1)/virtual + 1
956 avirt = col_global - (iocc - 1)*virtual
958 IF (avirt == my_a) trace = trace + &
959 ddot(nrow_local, fm_mat_s%local_data(:, col_local), 1, fm_work_iap%local_data(:, col_local), 1)
962 sos_mp2_work_virt%P(my_a) = sos_mp2_work_virt%P(my_a) + trace*omega*weight
965 CALL timestop(handle2)
968 num_ij_pairs =
SIZE(sos_mp2_work_occ%pair_list, 2)
969 num_ab_pairs =
SIZE(sos_mp2_work_virt%pair_list, 2)
970 IF (num_ij_pairs > 0)
THEN
971 CALL calc_pij_degen(fm_work_iap, fm_mat_s, sos_mp2_work_occ%pair_list, &
972 virtual, sos_mp2_work_occ%P(homo + 1:), eigenval(:homo), omega, weight, &
973 sos_mp2_work_occ%index2send, sos_mp2_work_occ%index2recv, dot_blksize)
975 IF (num_ab_pairs > 0)
THEN
976 CALL calc_pab_degen(fm_work_iap, fm_mat_s, sos_mp2_work_virt%pair_list, &
977 virtual, sos_mp2_work_virt%P(virtual + 1:), eigenval(homo + 1:), omega, weight, &
978 sos_mp2_work_virt%index2send, sos_mp2_work_virt%index2recv, dot_blksize)
981 CALL timestop(handle)
1003 SUBROUTINE calc_p_rpa(mat_S_1D, mat_work_iaP_3D, gd_homo, gd_virtual, grid, mepos, &
1004 fm_struct_S, P_ij, P_ab, weight, omega, Eigenval, homo, unit_nr, mp2_env)
1005 REAL(kind=
dp),
DIMENSION(*),
INTENT(INOUT),
TARGET :: mat_s_1d
1006 REAL(kind=
dp),
DIMENSION(:, :, :),
INTENT(INOUT) :: mat_work_iap_3d
1007 TYPE(group_dist_d1_type),
INTENT(IN) :: gd_homo, gd_virtual
1008 INTEGER,
DIMENSION(2),
INTENT(IN) :: grid, mepos
1009 TYPE(cp_fm_struct_type),
INTENT(IN),
POINTER :: fm_struct_s
1010 REAL(kind=
dp),
DIMENSION(:, :) :: p_ij, p_ab
1011 REAL(kind=
dp),
INTENT(IN) :: weight, omega
1012 REAL(kind=
dp),
DIMENSION(:),
INTENT(IN) :: eigenval
1013 INTEGER,
INTENT(IN) :: homo, unit_nr
1014 TYPE(mp2_type),
INTENT(INOUT) :: mp2_env
1016 CHARACTER(LEN=*),
PARAMETER :: routinen =
'calc_P_rpa'
1018 INTEGER :: completed, handle, handle2, my_a_end, my_a_size, my_a_start, my_i_end, my_i_size, &
1019 my_i_start, my_p_size, my_prow, number_of_parallel_channels, proc_a_recv, proc_a_send, &
1020 proc_i_recv, proc_i_send, proc_recv, proc_send, proc_shift, recv_a_end, recv_a_size, &
1021 recv_a_start, recv_i_end, recv_i_size, recv_i_start, tag
1022 INTEGER(KIND=int_8) :: mem, number_of_elements_per_blk
1023 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: procs_recv
1024 INTEGER,
DIMENSION(:, :),
POINTER :: blacs2mpi
1025 REAL(kind=
dp) :: mem_per_block, mem_real
1026 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:),
TARGET :: buffer_compens_1d
1027 REAL(kind=
dp),
DIMENSION(:, :, :),
POINTER :: mat_s_3d
1028 TYPE(cp_1d_r_cp_type),
ALLOCATABLE,
DIMENSION(:) :: buffer_1d
1029 TYPE(cp_3d_r_cp_type),
ALLOCATABLE,
DIMENSION(:) :: buffer_3d
1030 TYPE(mp_para_env_type),
POINTER :: para_env
1031 TYPE(mp_request_type),
ALLOCATABLE,
DIMENSION(:) :: recv_requests, send_requests
1033 CALL timeset(routinen, handle)
1036 IF (mp2_env%ri_grad%dot_blksize >= blksize_threshold)
THEN
1037 IF (.NOT. c_associated(mp2_env%local_gemm_ctx))
THEN
1045 my_p_size =
SIZE(mat_work_iap_3d, 1)
1048 CALL fm_struct_s%context%get(my_process_row=my_prow, blacs2mpi=blacs2mpi, para_env=para_env)
1050 CALL get_group_dist(gd_virtual, mepos(1), my_a_start, my_a_end, my_a_size)
1051 CALL get_group_dist(gd_homo, mepos(2), my_i_start, my_i_end, my_i_size)
1056 mat_s_3d(1:my_p_size, 1:my_a_size, 1:my_i_size) => mat_s_1d(1:int(my_p_size,
int_8)*my_a_size*my_i_size)
1058 number_of_elements_per_blk = max(int(
maxsize(gd_homo), kind=
int_8)*my_a_size, &
1059 int(
maxsize(gd_virtual), kind=
int_8)*my_i_size)*my_p_size
1063 mem_real = real(mem, kind=
dp)
1064 mem_per_block = real(number_of_elements_per_blk, kind=
dp)*8.0_dp
1065 number_of_parallel_channels = max(1, min(maxval(grid) - 1, floor(mem_real/mem_per_block)))
1066 CALL para_env%min(number_of_parallel_channels)
1067 IF (mp2_env%ri_grad%max_parallel_comm > 0) &
1068 number_of_parallel_channels = min(number_of_parallel_channels, mp2_env%ri_grad%max_parallel_comm)
1070 IF (unit_nr > 0)
THEN
1071 WRITE (unit_nr,
'(T3,A,T75,I6)')
'GRAD_INFO| Number of parallel communication channels:', number_of_parallel_channels
1074 CALL para_env%sync()
1076 ALLOCATE (buffer_1d(number_of_parallel_channels))
1077 DO proc_shift = 1, number_of_parallel_channels
1078 ALLOCATE (buffer_1d(proc_shift)%array(number_of_elements_per_blk))
1081 ALLOCATE (buffer_3d(number_of_parallel_channels))
1084 IF (mp2_env%ri_grad%dot_blksize >= blksize_threshold)
THEN
1085 ALLOCATE (buffer_compens_1d(2*max(my_a_size*
maxsize(gd_virtual), my_i_size*
maxsize(gd_homo))))
1088 IF (number_of_parallel_channels > 1)
THEN
1089 ALLOCATE (procs_recv(number_of_parallel_channels))
1090 ALLOCATE (recv_requests(number_of_parallel_channels))
1091 ALLOCATE (send_requests(maxval(grid) - 1))
1094 IF (number_of_parallel_channels > 1 .AND. grid(1) > 1)
THEN
1095 CALL timeset(routinen//
"_comm_a", handle2)
1098 DO proc_shift = 1, min(grid(1) - 1, number_of_parallel_channels)
1099 proc_a_recv =
modulo(mepos(1) - proc_shift, grid(1))
1100 proc_recv = mepos(2)*grid(1) + proc_a_recv
1102 CALL get_group_dist(gd_virtual, proc_a_recv, recv_a_start, recv_a_end, recv_a_size)
1104 buffer_3d(proc_shift)%array(1:my_p_size, 1:recv_a_size, 1:my_i_size) => &
1105 buffer_1d(proc_shift)%array(1:int(my_p_size, kind=
int_8)*recv_a_size*my_i_size)
1107 CALL para_env%irecv(buffer_3d(proc_shift)%array, blacs2mpi(my_prow, proc_recv), &
1108 recv_requests(proc_shift), tag)
1110 procs_recv(proc_shift) = proc_a_recv
1114 DO proc_shift = 1, grid(1) - 1
1115 proc_a_send =
modulo(mepos(1) + proc_shift, grid(1))
1116 proc_send = mepos(2)*grid(1) + proc_a_send
1118 CALL para_env%isend(mat_work_iap_3d, blacs2mpi(my_prow, proc_send), &
1119 send_requests(proc_shift), tag)
1121 CALL timestop(handle2)
1124 CALL calc_p_rpa_a(p_ab(:, my_a_start:my_a_end), &
1125 mat_s_3d, mat_work_iap_3d, &
1126 mp2_env%ri_grad%dot_blksize, buffer_compens_1d, mp2_env%local_gemm_ctx, &
1127 eigenval(homo + my_a_start:homo + my_a_end), eigenval(my_i_start:my_i_end), &
1128 eigenval(homo + my_a_start:homo + my_a_end), omega, weight)
1130 DO proc_shift = 1, grid(1) - 1
1131 CALL timeset(routinen//
"_comm_a", handle2)
1132 IF (number_of_parallel_channels > 1)
THEN
1135 CALL get_group_dist(gd_virtual, procs_recv(completed), recv_a_start, recv_a_end, recv_a_size)
1137 proc_a_send =
modulo(mepos(1) + proc_shift, grid(1))
1138 proc_a_recv =
modulo(mepos(1) - proc_shift, grid(1))
1140 proc_send = mepos(2)*grid(1) + proc_a_send
1141 proc_recv = mepos(2)*grid(1) + proc_a_recv
1143 CALL get_group_dist(gd_virtual, proc_a_recv, recv_a_start, recv_a_end, recv_a_size)
1145 buffer_3d(1)%array(1:my_p_size, 1:recv_a_size, 1:my_i_size) => &
1146 buffer_1d(1)%array(1:int(my_p_size, kind=
int_8)*recv_a_size*my_i_size)
1148 CALL para_env%sendrecv(mat_work_iap_3d, blacs2mpi(my_prow, proc_send), &
1149 buffer_3d(1)%array, blacs2mpi(my_prow, proc_recv), tag)
1152 CALL timestop(handle2)
1154 CALL calc_p_rpa_a(p_ab(:, recv_a_start:recv_a_end), &
1155 mat_s_3d, buffer_3d(completed)%array, &
1156 mp2_env%ri_grad%dot_blksize, buffer_compens_1d, mp2_env%local_gemm_ctx, &
1157 eigenval(homo + my_a_start:homo + my_a_end), eigenval(my_i_start:my_i_end), &
1158 eigenval(homo + recv_a_start:homo + recv_a_end), omega, weight)
1160 IF (number_of_parallel_channels > 1 .AND. number_of_parallel_channels + proc_shift < grid(1))
THEN
1161 proc_a_recv =
modulo(mepos(1) - proc_shift - number_of_parallel_channels, grid(1))
1162 proc_recv = mepos(2)*grid(1) + proc_a_recv
1164 CALL get_group_dist(gd_virtual, proc_a_recv, recv_a_start, recv_a_end, recv_a_size)
1166 buffer_3d(completed)%array(1:my_p_size, 1:recv_a_size, 1:my_i_size) => &
1167 buffer_1d(completed)%array(1:int(my_p_size, kind=
int_8)*recv_a_size*my_i_size)
1169 CALL para_env%irecv(buffer_3d(completed)%array, blacs2mpi(my_prow, proc_recv), &
1170 recv_requests(completed), tag)
1172 procs_recv(completed) = proc_a_recv
1176 IF (number_of_parallel_channels > 1 .AND. grid(1) > 1)
THEN
1177 CALL mp_waitall(send_requests)
1180 IF (number_of_parallel_channels > 1 .AND. grid(2) > 1)
THEN
1183 DO proc_shift = 1, min(grid(2) - 1, number_of_parallel_channels)
1184 proc_i_recv =
modulo(mepos(2) - proc_shift, grid(2))
1185 proc_recv = proc_i_recv*grid(1) + mepos(1)
1187 CALL get_group_dist(gd_homo, proc_i_recv, recv_i_start, recv_i_end, recv_i_size)
1189 buffer_3d(proc_shift)%array(1:my_p_size, 1:my_a_size, 1:recv_i_size) => &
1190 buffer_1d(proc_shift)%array(1:int(my_p_size, kind=
int_8)*my_a_size*recv_i_size)
1192 CALL para_env%irecv(buffer_3d(proc_shift)%array, blacs2mpi(my_prow, proc_recv), &
1193 recv_requests(proc_shift), tag)
1195 procs_recv(proc_shift) = proc_i_recv
1199 DO proc_shift = 1, grid(2) - 1
1200 proc_i_send =
modulo(mepos(2) + proc_shift, grid(2))
1201 proc_send = proc_i_send*grid(1) + mepos(1)
1203 CALL para_env%isend(mat_work_iap_3d, blacs2mpi(my_prow, proc_send), &
1204 send_requests(proc_shift), tag)
1208 CALL calc_p_rpa_i(p_ij(:, my_i_start:my_i_end), &
1209 mat_s_3d, mat_work_iap_3d, &
1210 mp2_env%ri_grad%dot_blksize, buffer_compens_1d, mp2_env%local_gemm_ctx, &
1211 eigenval(homo + my_a_start:homo + my_a_end), eigenval(my_i_start:my_i_end), &
1212 eigenval(my_i_start:my_i_end), omega, weight)
1214 DO proc_shift = 1, grid(2) - 1
1215 CALL timeset(routinen//
"_comm_i", handle2)
1216 IF (number_of_parallel_channels > 1)
THEN
1219 CALL get_group_dist(gd_homo, procs_recv(completed), recv_i_start, recv_i_end, recv_i_size)
1221 proc_i_send =
modulo(mepos(2) + proc_shift, grid(2))
1222 proc_i_recv =
modulo(mepos(2) - proc_shift, grid(2))
1224 proc_send = proc_i_send*grid(1) + mepos(1)
1225 proc_recv = proc_i_recv*grid(1) + mepos(1)
1227 CALL get_group_dist(gd_homo, proc_i_recv, recv_i_start, recv_i_end, recv_i_size)
1229 buffer_3d(1)%array(1:my_p_size, 1:my_a_size, 1:recv_i_size) => &
1230 buffer_1d(1)%array(1:int(my_p_size, kind=
int_8)*my_a_size*recv_i_size)
1232 CALL para_env%sendrecv(mat_work_iap_3d, blacs2mpi(my_prow, proc_send), &
1233 buffer_3d(1)%array, blacs2mpi(my_prow, proc_recv), tag)
1236 CALL timestop(handle2)
1238 CALL calc_p_rpa_i(p_ij(:, recv_i_start:recv_i_end), &
1239 mat_s_3d, buffer_3d(completed)%array, &
1240 mp2_env%ri_grad%dot_blksize, buffer_compens_1d, mp2_env%local_gemm_ctx, &
1241 eigenval(homo + my_a_start:homo + my_a_end), eigenval(my_i_start:my_i_end), &
1242 eigenval(recv_i_start:recv_i_end), omega, weight)
1244 IF (number_of_parallel_channels > 1 .AND. number_of_parallel_channels + proc_shift < grid(2))
THEN
1245 proc_i_recv =
modulo(mepos(2) - proc_shift - number_of_parallel_channels, grid(2))
1246 proc_recv = proc_i_recv*grid(1) + mepos(1)
1248 CALL get_group_dist(gd_homo, proc_i_recv, recv_i_start, recv_a_end, recv_i_size)
1250 buffer_3d(completed)%array(1:my_p_size, 1:my_a_size, 1:recv_i_size) => &
1251 buffer_1d(completed)%array(1:int(my_p_size, kind=
int_8)*my_a_size*recv_i_size)
1253 CALL para_env%irecv(buffer_3d(completed)%array, blacs2mpi(my_prow, proc_recv), &
1254 recv_requests(completed), tag)
1256 procs_recv(completed) = proc_i_recv
1260 IF (number_of_parallel_channels > 1 .AND. grid(2) > 1)
THEN
1261 CALL mp_waitall(send_requests)
1264 IF (number_of_parallel_channels > 1)
THEN
1265 DEALLOCATE (procs_recv)
1266 DEALLOCATE (recv_requests)
1267 DEALLOCATE (send_requests)
1270 IF (mp2_env%ri_grad%dot_blksize >= blksize_threshold)
THEN
1273 mp2_env%local_gemm_ctx = c_null_ptr
1274 DEALLOCATE (buffer_compens_1d)
1277 DO proc_shift = 1, number_of_parallel_channels
1278 NULLIFY (buffer_3d(proc_shift)%array)
1279 DEALLOCATE (buffer_1d(proc_shift)%array)
1281 DEALLOCATE (buffer_3d, buffer_1d)
1283 CALL timestop(handle)
1301 SUBROUTINE calc_p_rpa_a(P_ab, mat_S, mat_work, dot_blksize, buffer_1D, local_gemm_ctx, &
1302 my_eval_virt, my_eval_occ, recv_eval_virt, omega, weight)
1303 REAL(kind=
dp),
DIMENSION(:, :),
INTENT(INOUT) :: p_ab
1304 REAL(kind=
dp),
DIMENSION(:, :, :),
INTENT(IN) :: mat_s, mat_work
1305 INTEGER,
INTENT(IN) :: dot_blksize
1306 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:), &
1307 INTENT(INOUT),
TARGET :: buffer_1d
1308 TYPE(c_ptr),
INTENT(INOUT) :: local_gemm_ctx
1309 REAL(kind=
dp),
DIMENSION(:),
INTENT(IN) :: my_eval_virt, my_eval_occ, recv_eval_virt
1310 REAL(kind=
dp),
INTENT(IN) :: omega, weight
1312 CHARACTER(LEN=*),
PARAMETER :: routinen =
'calc_P_rpa_a'
1314 INTEGER :: handle, my_a, my_a_size, my_i, &
1315 my_i_size, my_p_size, p_end, p_start, &
1316 recv_a_size, stripesize
1317 REAL(kind=
dp),
DIMENSION(:, :),
POINTER :: buffer_compens, buffer_unscaled
1319 CALL timeset(routinen, handle)
1321 my_i_size =
SIZE(mat_s, 3)
1322 recv_a_size =
SIZE(mat_work, 2)
1323 my_a_size =
SIZE(mat_s, 2)
1324 my_p_size =
SIZE(mat_s, 1)
1326 IF (dot_blksize >= blksize_threshold)
THEN
1327 buffer_compens(1:my_a_size, 1:recv_a_size) => buffer_1d(1:my_a_size*recv_a_size)
1328 buffer_compens = 0.0_dp
1329 buffer_unscaled(1:my_a_size, 1:recv_a_size) => buffer_1d(my_a_size*recv_a_size + 1:2*my_a_size*recv_a_size)
1332 DO my_i = 1, my_i_size
1333 DO p_start = 1, my_p_size, dot_blksize
1334 stripesize = min(dot_blksize, my_p_size - p_start + 1)
1335 p_end = p_start + stripesize - 1
1337 CALL local_gemm(
"T",
"N", my_a_size, recv_a_size, stripesize, &
1338 -weight, mat_s(p_start:p_end, :, my_i), stripesize, &
1339 mat_work(p_start:p_end, :, my_i), stripesize, &
1340 0.0_dp, buffer_unscaled, my_a_size, local_gemm_ctx)
1342 CALL scale_buffer_and_add_compens_virt(buffer_unscaled, buffer_compens, omega, &
1343 my_eval_virt, recv_eval_virt, my_eval_occ(my_i))
1345 CALL kahan_step(buffer_compens, p_ab)
1351 REAL(kind=
dp) :: tmp, e_i, e_a, e_b, omega2, my_compens, my_p, s
1357 DO my_a = 1, my_a_size
1358 DO recv_a = 1, recv_a_size
1359 e_a = my_eval_virt(my_a)
1360 e_b = recv_eval_virt(recv_a)
1361 my_p = p_ab(my_a, recv_a)
1363 DO my_i = 1, my_i_size
1364 e_i = -my_eval_occ(my_i)
1365 tmp = -weight*accurate_dot_product(mat_s(:, my_a, my_i), mat_work(:, recv_a, my_i)) &
1366 *(1.0_dp + omega2/((e_a + e_i)*(e_b + e_i))) - my_compens
1368 my_compens = (s - my_p) - tmp
1371 p_ab(my_a, recv_a) = my_p
1377 CALL timestop(handle)
1379 END SUBROUTINE calc_p_rpa_a
1395 SUBROUTINE calc_p_rpa_i(P_ij, mat_S, mat_work, dot_blksize, buffer_1D, local_gemm_ctx, &
1396 my_eval_virt, my_eval_occ, recv_eval_occ, omega, weight)
1397 REAL(kind=dp),
DIMENSION(:, :),
INTENT(INOUT) :: p_ij
1398 REAL(kind=dp),
DIMENSION(:, :, :),
INTENT(INOUT) :: mat_s, mat_work
1399 INTEGER,
INTENT(IN) :: dot_blksize
1400 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:), &
1401 INTENT(INOUT),
TARGET :: buffer_1d
1402 TYPE(c_ptr),
INTENT(INOUT) :: local_gemm_ctx
1403 REAL(kind=dp),
DIMENSION(:),
INTENT(IN) :: my_eval_virt, my_eval_occ, recv_eval_occ
1404 REAL(kind=dp),
INTENT(IN) :: omega, weight
1406 CHARACTER(LEN=*),
PARAMETER :: routinen =
'calc_P_rpa_i'
1408 INTEGER :: handle, my_a, my_a_size, my_i, &
1409 my_i_size, my_p_size, p_end, p_start, &
1410 recv_i_size, stripesize
1411 REAL(kind=dp),
DIMENSION(:, :),
POINTER :: buffer_compens, buffer_unscaled
1413 CALL timeset(routinen, handle)
1415 my_i_size =
SIZE(mat_s, 3)
1416 recv_i_size =
SIZE(mat_work, 3)
1417 my_a_size =
SIZE(mat_s, 2)
1418 my_p_size =
SIZE(mat_s, 1)
1420 IF (dot_blksize >= blksize_threshold)
THEN
1421 buffer_compens(1:my_i_size, 1:recv_i_size) => buffer_1d(1:my_i_size*recv_i_size)
1422 buffer_compens = 0.0_dp
1423 buffer_unscaled(1:my_i_size, 1:recv_i_size) => buffer_1d(my_i_size*recv_i_size + 1:2*my_i_size*recv_i_size)
1426 DO my_a = 1, my_a_size
1427 DO p_start = 1, my_p_size, dot_blksize
1428 stripesize = min(dot_blksize, my_p_size - p_start + 1)
1429 p_end = p_start + stripesize - 1
1431 CALL local_gemm(
"T",
"N", my_i_size, recv_i_size, stripesize, &
1432 weight, mat_s(p_start:p_end, my_a, :), stripesize, &
1433 mat_work(p_start:p_end, my_a, :), stripesize, &
1434 0.0_dp, buffer_unscaled, my_i_size, local_gemm_ctx)
1436 CALL scale_buffer_and_add_compens_occ(buffer_unscaled, buffer_compens, omega, &
1437 my_eval_occ, recv_eval_occ, my_eval_virt(my_a))
1439 CALL kahan_step(buffer_compens, p_ij)
1444 REAL(kind=dp) :: tmp, e_i, e_a, e_j, omega2, my_compens, my_p, s
1451 DO my_i = 1, my_i_size
1452 DO recv_i = 1, recv_i_size
1453 e_i = my_eval_occ(my_i)
1454 e_j = recv_eval_occ(recv_i)
1455 my_p = p_ij(my_i, recv_i)
1457 DO my_a = 1, my_a_size
1458 e_a = my_eval_virt(my_a)
1459 tmp = weight*accurate_dot_product(mat_s(:, my_a, my_i), mat_work(:, my_a, recv_i)) &
1460 *(1.0_dp + omega2/((e_a - e_i)*(e_a - e_j))) - my_compens
1462 my_compens = (s - my_p) - tmp
1465 p_ij(my_i, recv_i) = my_p
1471 CALL timestop(handle)
1473 END SUBROUTINE calc_p_rpa_i
1480 SUBROUTINE kahan_step(compens, P)
1481 REAL(kind=dp),
DIMENSION(:, :),
INTENT(INOUT) :: compens, p
1483 CHARACTER(LEN=*),
PARAMETER :: routinen =
'kahan_step'
1485 INTEGER :: handle, i, j
1486 REAL(kind=dp) :: my_compens, my_p, s
1488 CALL timeset(routinen, handle)
1491 DO j = 1,
SIZE(compens, 2)
1492 DO i = 1,
SIZE(compens, 1)
1494 my_compens = compens(i, j)
1495 s = my_p + my_compens
1496 compens(i, j) = (s - my_p) - my_compens
1502 CALL timestop(handle)
1515 SUBROUTINE scale_buffer_and_add_compens_virt(buffer_unscaled, buffer_compens, omega, &
1516 my_eval_virt, recv_eval_virt, my_eval_occ)
1517 REAL(kind=dp),
DIMENSION(:, :),
INTENT(IN) :: buffer_unscaled
1518 REAL(kind=dp),
DIMENSION(:, :),
INTENT(INOUT) :: buffer_compens
1519 REAL(kind=dp),
INTENT(IN) :: omega
1520 REAL(kind=dp),
DIMENSION(:),
INTENT(IN) :: my_eval_virt, recv_eval_virt
1521 REAL(kind=dp),
INTENT(IN) :: my_eval_occ
1523 CHARACTER(LEN=*),
PARAMETER :: routinen =
'scale_buffer_and_add_compens_virt'
1525 INTEGER :: handle, my_a, my_b
1527 CALL timeset(routinen, handle)
1531 DO my_b = 1,
SIZE(buffer_compens, 2)
1532 DO my_a = 1,
SIZE(buffer_compens, 1)
1533 buffer_compens(my_a, my_b) = buffer_unscaled(my_a, my_b) &
1534 *(1.0_dp - omega**2/((my_eval_virt(my_a) - my_eval_occ)*(recv_eval_virt(my_b) - my_eval_occ))) &
1535 - buffer_compens(my_a, my_b)
1540 CALL timestop(handle)
1542 END SUBROUTINE scale_buffer_and_add_compens_virt
1553 SUBROUTINE scale_buffer_and_add_compens_occ(buffer_unscaled, buffer_compens, omega, &
1554 my_eval_occ, recv_eval_occ, my_eval_virt)
1555 REAL(kind=dp),
DIMENSION(:, :),
INTENT(IN) :: buffer_unscaled
1556 REAL(kind=dp),
DIMENSION(:, :),
INTENT(INOUT) :: buffer_compens
1557 REAL(kind=dp),
INTENT(IN) :: omega
1558 REAL(kind=dp),
DIMENSION(:),
INTENT(IN) :: my_eval_occ, recv_eval_occ
1559 REAL(kind=dp),
INTENT(IN) :: my_eval_virt
1561 CHARACTER(LEN=*),
PARAMETER :: routinen =
'scale_buffer_and_add_compens_occ'
1563 INTEGER :: handle, my_i, my_j
1565 CALL timeset(routinen, handle)
1569 DO my_j = 1,
SIZE(buffer_compens, 2)
1570 DO my_i = 1,
SIZE(buffer_compens, 1)
1571 buffer_compens(my_i, my_j) = buffer_unscaled(my_i, my_j) &
1572 *(1.0_dp - omega**2/((my_eval_virt - my_eval_occ(my_i))*(my_eval_virt - recv_eval_occ(my_j)))) &
1573 - buffer_compens(my_i, my_j)
1578 CALL timestop(handle)
1580 END SUBROUTINE scale_buffer_and_add_compens_occ
1587 ELEMENTAL FUNCTION sinh_over_x(x)
RESULT(res)
1588 REAL(kind=dp),
INTENT(IN) :: x
1589 REAL(kind=dp) :: res
1593 IF (abs(x) > 3.0e-4_dp)
THEN
1596 res = 1.0_dp + x**2/6.0_dp
1599 END FUNCTION sinh_over_x
1615 SUBROUTINE calc_pij_degen(fm_work_iaP, fm_mat_S, pair_list, virtual, P_ij, Eigenval, &
1616 omega, weight, index2send, index2recv, dot_blksize)
1617 TYPE(cp_fm_type),
INTENT(IN) :: fm_work_iap, fm_mat_s
1618 INTEGER,
DIMENSION(:, :),
INTENT(IN) :: pair_list
1619 INTEGER,
INTENT(IN) :: virtual
1620 REAL(kind=dp),
DIMENSION(:),
INTENT(INOUT) :: p_ij
1621 REAL(kind=dp),
DIMENSION(:),
INTENT(IN) :: eigenval
1622 REAL(kind=dp),
INTENT(IN) :: omega, weight
1623 TYPE(one_dim_int_array),
DIMENSION(0:),
INTENT(IN) :: index2send, index2recv
1624 INTEGER,
INTENT(IN) :: dot_blksize
1626 CHARACTER(LEN=*),
PARAMETER :: routinen =
'calc_Pij_degen'
1628 INTEGER :: avirt, col_global, col_local, counter, first_p_pos(2), first_p_pos_col, handle, &
1629 handle2, ij_counter, iocc, my_col_local, my_i, my_j, my_pcol, my_prow, ncol_block, &
1630 ncol_local, nrow_local, num_ij_pairs, num_pe_col, pcol, pcol_recv, pcol_send, proc_shift, &
1631 recv_size, send_size, size_recv_buffer, size_send_buffer, tag
1632 INTEGER,
DIMENSION(:),
POINTER :: col_indices, ncol_locals
1633 INTEGER,
DIMENSION(:, :),
POINTER :: blacs2mpi
1634 REAL(kind=dp) :: trace
1635 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:, :) :: buffer_recv, buffer_send
1636 TYPE(cp_blacs_env_type),
POINTER :: context
1637 TYPE(mp_para_env_type),
POINTER :: para_env
1639 CALL timeset(routinen, handle)
1641 CALL cp_fm_struct_get(fm_work_iap%matrix_struct, para_env=para_env, ncol_locals=ncol_locals, &
1642 ncol_local=ncol_local, col_indices=col_indices, &
1643 context=context, ncol_block=ncol_block, first_p_pos=first_p_pos, nrow_local=nrow_local)
1644 CALL context%get(my_process_row=my_prow, my_process_column=my_pcol, &
1645 number_of_process_columns=num_pe_col, blacs2mpi=blacs2mpi)
1646 first_p_pos_col = first_p_pos(2)
1648 num_ij_pairs =
SIZE(pair_list, 2)
1652 DO ij_counter = 1, num_ij_pairs
1654 my_i = pair_list(1, ij_counter)
1655 my_j = pair_list(2, ij_counter)
1659 DO col_local = 1, ncol_local
1660 col_global = col_indices(col_local)
1662 iocc = max(1, col_global - 1)/virtual + 1
1663 avirt = col_global - (iocc - 1)*virtual
1665 IF (iocc /= my_j) cycle
1666 pcol = cp_fm_indxg2p((my_i - 1)*virtual + avirt, ncol_block, 0, first_p_pos_col, num_pe_col)
1667 IF (pcol /= my_pcol) cycle
1669 my_col_local = cp_fm_indxg2l((my_i - 1)*virtual + avirt, ncol_block, 0, first_p_pos_col, num_pe_col)
1671 trace = trace + accurate_dot_product_2(fm_mat_s%local_data(:, my_col_local), fm_work_iap%local_data(:, col_local), &
1675 p_ij(ij_counter) = p_ij(ij_counter) - trace*sinh_over_x(0.5_dp*(eigenval(my_i) - eigenval(my_j))*omega)*omega*weight
1679 IF (num_pe_col > 1)
THEN
1680 size_send_buffer = 0
1681 size_recv_buffer = 0
1682 DO proc_shift = 1, num_pe_col - 1
1683 pcol_send =
modulo(my_pcol + proc_shift, num_pe_col)
1684 pcol_recv =
modulo(my_pcol - proc_shift, num_pe_col)
1686 IF (
ALLOCATED(index2send(pcol_send)%array)) &
1687 size_send_buffer = max(size_send_buffer,
SIZE(index2send(pcol_send)%array))
1689 IF (
ALLOCATED(index2recv(pcol_recv)%array)) &
1690 size_recv_buffer = max(size_recv_buffer,
SIZE(index2recv(pcol_recv)%array))
1693 ALLOCATE (buffer_send(nrow_local, size_send_buffer), buffer_recv(nrow_local, size_recv_buffer))
1695 DO proc_shift = 1, num_pe_col - 1
1696 pcol_send =
modulo(my_pcol + proc_shift, num_pe_col)
1697 pcol_recv =
modulo(my_pcol - proc_shift, num_pe_col)
1701 IF (
ALLOCATED(index2send(pcol_send)%array)) send_size =
SIZE(index2send(pcol_send)%array)
1703 DO counter = 1, send_size
1704 buffer_send(:, counter) = fm_work_iap%local_data(:, index2send(pcol_send)%array(counter))
1708 IF (
ALLOCATED(index2recv(pcol_recv)%array)) recv_size =
SIZE(index2recv(pcol_recv)%array)
1709 IF (recv_size > 0)
THEN
1710 CALL timeset(routinen//
"_send", handle2)
1711 IF (send_size > 0)
THEN
1712 CALL para_env%sendrecv(buffer_send(:, :send_size), blacs2mpi(my_prow, pcol_send), &
1713 buffer_recv(:, :recv_size), blacs2mpi(my_prow, pcol_recv), tag)
1715 CALL para_env%recv(buffer_recv(:, :recv_size), blacs2mpi(my_prow, pcol_recv), tag)
1717 CALL timestop(handle2)
1719 DO ij_counter = 1, num_ij_pairs
1722 my_i = pair_list(1, ij_counter)
1723 my_j = pair_list(2, ij_counter)
1727 DO col_local = 1, recv_size
1728 col_global = index2recv(pcol_recv)%array(col_local)
1730 iocc = max(1, col_global - 1)/virtual + 1
1731 IF (iocc /= my_j) cycle
1732 avirt = col_global - (iocc - 1)*virtual
1733 pcol = cp_fm_indxg2p((my_i - 1)*virtual + avirt, ncol_block, 0, first_p_pos_col, num_pe_col)
1734 IF (pcol /= my_pcol) cycle
1736 my_col_local = cp_fm_indxg2l((my_i - 1)*virtual + avirt, ncol_block, 0, first_p_pos_col, num_pe_col)
1738 trace = trace + accurate_dot_product_2(fm_mat_s%local_data(:, my_col_local), buffer_recv(:, col_local), &
1742 p_ij(ij_counter) = p_ij(ij_counter) &
1743 - trace*sinh_over_x(0.5_dp*(eigenval(my_i) - eigenval(my_j))*omega)*omega*weight
1745 ELSE IF (send_size > 0)
THEN
1746 CALL timeset(routinen//
"_send", handle2)
1747 CALL para_env%send(buffer_send(:, :send_size), blacs2mpi(my_prow, pcol_send), tag)
1748 CALL timestop(handle2)
1751 IF (
ALLOCATED(buffer_send))
DEALLOCATE (buffer_send)
1752 IF (
ALLOCATED(buffer_recv))
DEALLOCATE (buffer_recv)
1755 CALL timestop(handle)
1773 SUBROUTINE calc_pab_degen(fm_work_iaP, fm_mat_S, pair_list, virtual, P_ab, Eigenval, &
1774 omega, weight, index2send, index2recv, dot_blksize)
1775 TYPE(cp_fm_type),
INTENT(IN) :: fm_work_iap, fm_mat_s
1776 INTEGER,
DIMENSION(:, :),
INTENT(IN) :: pair_list
1777 INTEGER,
INTENT(IN) :: virtual
1778 REAL(kind=dp),
DIMENSION(:),
INTENT(INOUT) :: p_ab
1779 REAL(kind=dp),
DIMENSION(:),
INTENT(IN) :: eigenval
1780 REAL(kind=dp),
INTENT(IN) :: omega, weight
1781 TYPE(one_dim_int_array),
DIMENSION(0:),
INTENT(IN) :: index2send, index2recv
1782 INTEGER,
INTENT(IN) :: dot_blksize
1784 CHARACTER(LEN=*),
PARAMETER :: routinen =
'calc_Pab_degen'
1786 INTEGER :: ab_counter, avirt, col_global, col_local, counter, first_p_pos(2), &
1787 first_p_pos_col, handle, handle2, iocc, my_a, my_b, my_col_local, my_pcol, my_prow, &
1788 ncol_block, ncol_local, nrow_local, num_ab_pairs, num_pe_col, pcol, pcol_recv, pcol_send, &
1789 proc_shift, recv_size, send_size, size_recv_buffer, size_send_buffer, tag
1790 INTEGER,
DIMENSION(:),
POINTER :: col_indices, ncol_locals
1791 INTEGER,
DIMENSION(:, :),
POINTER :: blacs2mpi
1792 REAL(kind=dp) :: trace
1793 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:, :) :: buffer_recv, buffer_send
1794 TYPE(cp_blacs_env_type),
POINTER :: context
1795 TYPE(mp_para_env_type),
POINTER :: para_env
1797 CALL timeset(routinen, handle)
1799 CALL cp_fm_struct_get(fm_work_iap%matrix_struct, para_env=para_env, ncol_locals=ncol_locals, &
1800 ncol_local=ncol_local, col_indices=col_indices, &
1801 context=context, ncol_block=ncol_block, first_p_pos=first_p_pos, nrow_local=nrow_local)
1802 CALL context%get(my_process_row=my_prow, my_process_column=my_pcol, &
1803 number_of_process_columns=num_pe_col, blacs2mpi=blacs2mpi)
1804 first_p_pos_col = first_p_pos(2)
1806 num_ab_pairs =
SIZE(pair_list, 2)
1810 DO ab_counter = 1, num_ab_pairs
1812 my_a = pair_list(1, ab_counter)
1813 my_b = pair_list(2, ab_counter)
1817 DO col_local = 1, ncol_local
1818 col_global = col_indices(col_local)
1820 iocc = max(1, col_global - 1)/virtual + 1
1821 avirt = col_global - (iocc - 1)*virtual
1823 IF (avirt /= my_b) cycle
1824 pcol = cp_fm_indxg2p((iocc - 1)*virtual + my_a, ncol_block, 0, first_p_pos_col, num_pe_col)
1825 IF (pcol /= my_pcol) cycle
1826 my_col_local = cp_fm_indxg2l((iocc - 1)*virtual + my_a, ncol_block, 0, first_p_pos_col, num_pe_col)
1828 trace = trace + accurate_dot_product_2(fm_mat_s%local_data(:, my_col_local), fm_work_iap%local_data(:, col_local), &
1833 p_ab(ab_counter) = p_ab(ab_counter) &
1834 + trace*sinh_over_x(0.5_dp*(eigenval(my_a) - eigenval(my_b))*omega)*omega*weight
1838 IF (num_pe_col > 1)
THEN
1839 size_send_buffer = 0
1840 size_recv_buffer = 0
1841 DO proc_shift = 1, num_pe_col - 1
1842 pcol_send =
modulo(my_pcol + proc_shift, num_pe_col)
1843 pcol_recv =
modulo(my_pcol - proc_shift, num_pe_col)
1845 IF (
ALLOCATED(index2send(pcol_send)%array)) &
1846 size_send_buffer = max(size_send_buffer,
SIZE(index2send(pcol_send)%array))
1848 IF (
ALLOCATED(index2recv(pcol_recv)%array)) &
1849 size_recv_buffer = max(size_recv_buffer,
SIZE(index2recv(pcol_recv)%array))
1852 ALLOCATE (buffer_send(nrow_local, size_send_buffer), buffer_recv(nrow_local, size_recv_buffer))
1854 DO proc_shift = 1, num_pe_col - 1
1855 pcol_send =
modulo(my_pcol + proc_shift, num_pe_col)
1856 pcol_recv =
modulo(my_pcol - proc_shift, num_pe_col)
1860 IF (
ALLOCATED(index2send(pcol_send)%array)) send_size =
SIZE(index2send(pcol_send)%array)
1862 DO counter = 1, send_size
1863 buffer_send(:, counter) = fm_work_iap%local_data(:, index2send(pcol_send)%array(counter))
1867 IF (
ALLOCATED(index2recv(pcol_recv)%array)) recv_size =
SIZE(index2recv(pcol_recv)%array)
1868 IF (recv_size > 0)
THEN
1869 CALL timeset(routinen//
"_send", handle2)
1870 IF (send_size > 0)
THEN
1871 CALL para_env%sendrecv(buffer_send(:, :send_size), blacs2mpi(my_prow, pcol_send), &
1872 buffer_recv(:, :recv_size), blacs2mpi(my_prow, pcol_recv), tag)
1874 CALL para_env%recv(buffer_recv(:, :recv_size), blacs2mpi(my_prow, pcol_recv), tag)
1876 CALL timestop(handle2)
1878 DO ab_counter = 1, num_ab_pairs
1881 my_a = pair_list(1, ab_counter)
1882 my_b = pair_list(2, ab_counter)
1886 DO col_local = 1,
SIZE(index2recv(pcol_recv)%array)
1887 col_global = index2recv(pcol_recv)%array(col_local)
1889 iocc = max(1, col_global - 1)/virtual + 1
1890 avirt = col_global - (iocc - 1)*virtual
1891 IF (avirt /= my_b) cycle
1892 pcol = cp_fm_indxg2p((iocc - 1)*virtual + my_a, ncol_block, 0, first_p_pos_col, num_pe_col)
1893 IF (pcol /= my_pcol) cycle
1895 my_col_local = cp_fm_indxg2l((iocc - 1)*virtual + my_a, ncol_block, 0, first_p_pos_col, num_pe_col)
1897 trace = trace + accurate_dot_product_2(fm_mat_s%local_data(:, my_col_local), buffer_recv(:, col_local), &
1901 p_ab(ab_counter) = p_ab(ab_counter) &
1902 + trace*sinh_over_x(0.5_dp*(eigenval(my_a) - eigenval(my_b))*omega)*omega*weight
1905 ELSE IF (send_size > 0)
THEN
1906 CALL timeset(routinen//
"_send", handle2)
1907 CALL para_env%send(buffer_send(:, :send_size), blacs2mpi(my_prow, pcol_send), tag)
1908 CALL timestop(handle2)
1911 IF (
ALLOCATED(buffer_send))
DEALLOCATE (buffer_send)
1912 IF (
ALLOCATED(buffer_recv))
DEALLOCATE (buffer_recv)
1915 CALL timestop(handle)
1929 SUBROUTINE redistribute_fm_mat_s(index2send, index2recv, fm_mat_S, mat_S_3D, gd_homo, gd_virtual, mepos)
1930 TYPE(one_dim_int_array),
DIMENSION(0:),
INTENT(IN) :: index2send
1931 TYPE(two_dim_int_array),
DIMENSION(0:),
INTENT(IN) :: index2recv
1932 TYPE(cp_fm_type),
INTENT(IN) :: fm_mat_s
1933 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:, :, :), &
1934 INTENT(OUT) :: mat_s_3d
1935 TYPE(group_dist_d1_type),
INTENT(IN) :: gd_homo, gd_virtual
1936 INTEGER,
DIMENSION(2),
INTENT(IN) :: mepos
1938 CHARACTER(LEN=*),
PARAMETER :: routinen =
'redistribute_fm_mat_S'
1940 INTEGER :: col_local, handle, my_a, my_homo, my_i, my_pcol, my_prow, my_virtual, nrow_local, &
1941 num_pe_col, proc_recv, proc_send, proc_shift, recv_size, send_size, size_recv_buffer, &
1942 size_send_buffer, tag
1943 INTEGER,
DIMENSION(:, :),
POINTER :: blacs2mpi
1944 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:, :) :: buffer_recv, buffer_send
1945 TYPE(mp_para_env_type),
POINTER :: para_env
1947 CALL timeset(routinen, handle)
1951 CALL fm_mat_s%matrix_struct%context%get(my_process_row=my_prow, my_process_column=my_pcol, &
1952 number_of_process_columns=num_pe_col, blacs2mpi=blacs2mpi)
1954 CALL cp_fm_struct_get(fm_mat_s%matrix_struct, nrow_local=nrow_local, para_env=para_env)
1956 CALL get_group_dist(gd_homo, mepos(2), sizes=my_homo)
1957 CALL get_group_dist(gd_virtual, mepos(1), sizes=my_virtual)
1959 ALLOCATE (mat_s_3d(nrow_local, my_virtual, my_homo))
1961 IF (
ALLOCATED(index2send(my_pcol)%array))
THEN
1962 DO col_local = 1,
SIZE(index2send(my_pcol)%array)
1963 my_a = index2recv(my_pcol)%array(1, col_local)
1964 my_i = index2recv(my_pcol)%array(2, col_local)
1965 mat_s_3d(:, my_a, my_i) = fm_mat_s%local_data(:, index2send(my_pcol)%array(col_local))
1969 IF (num_pe_col > 1)
THEN
1970 size_send_buffer = 0
1971 size_recv_buffer = 0
1972 DO proc_shift = 1, num_pe_col - 1
1973 proc_send =
modulo(my_pcol + proc_shift, num_pe_col)
1974 proc_recv =
modulo(my_pcol - proc_shift, num_pe_col)
1977 IF (
ALLOCATED(index2send(proc_send)%array)) send_size =
SIZE(index2send(proc_send)%array)
1978 size_send_buffer = max(size_send_buffer, send_size)
1981 IF (
ALLOCATED(index2recv(proc_recv)%array)) recv_size =
SIZE(index2recv(proc_recv)%array)
1982 size_recv_buffer = max(size_recv_buffer, recv_size)
1986 ALLOCATE (buffer_send(nrow_local, size_send_buffer), buffer_recv(nrow_local, size_recv_buffer))
1988 DO proc_shift = 1, num_pe_col - 1
1989 proc_send =
modulo(my_pcol + proc_shift, num_pe_col)
1990 proc_recv =
modulo(my_pcol - proc_shift, num_pe_col)
1993 IF (
ALLOCATED(index2send(proc_send)%array)) send_size =
SIZE(index2send(proc_send)%array)
1994 DO col_local = 1, send_size
1995 buffer_send(:, col_local) = fm_mat_s%local_data(:, index2send(proc_send)%array(col_local))
1999 IF (
ALLOCATED(index2recv(proc_recv)%array)) recv_size =
SIZE(index2recv(proc_recv)%array, 2)
2000 IF (recv_size > 0)
THEN
2001 IF (send_size > 0)
THEN
2002 CALL para_env%sendrecv(buffer_send(:, :send_size), blacs2mpi(my_prow, proc_send), &
2003 buffer_recv(:, :recv_size), blacs2mpi(my_prow, proc_recv), tag)
2005 CALL para_env%recv(buffer_recv(:, :recv_size), blacs2mpi(my_prow, proc_recv), tag)
2008 DO col_local = 1, recv_size
2009 my_a = index2recv(proc_recv)%array(1, col_local)
2010 my_i = index2recv(proc_recv)%array(2, col_local)
2011 mat_s_3d(:, my_a, my_i) = buffer_recv(:, col_local)
2013 ELSE IF (send_size > 0)
THEN
2014 CALL para_env%send(buffer_send(:, :send_size), blacs2mpi(my_prow, proc_send), tag)
2019 IF (
ALLOCATED(buffer_send))
DEALLOCATE (buffer_send)
2020 IF (
ALLOCATED(buffer_recv))
DEALLOCATE (buffer_recv)
2023 CALL timestop(handle)
2041 color_sub, do_ri_sos_laplace_mp2, homo, virtual)
2042 TYPE(rpa_grad_type),
INTENT(INOUT) ::
rpa_grad
2043 TYPE(mp2_type),
INTENT(INOUT) :: mp2_env
2044 TYPE(mp_para_env_type),
INTENT(IN),
POINTER :: para_env_sub, para_env
2045 TYPE(qs_environment_type),
INTENT(IN),
POINTER :: qs_env
2046 TYPE(group_dist_d1_type) :: gd_array
2047 INTEGER,
INTENT(IN) :: color_sub
2048 LOGICAL,
INTENT(IN) :: do_ri_sos_laplace_mp2
2049 INTEGER,
DIMENSION(:),
INTENT(IN) :: homo, virtual
2051 CHARACTER(LEN=*),
PARAMETER :: routinen =
'rpa_grad_finalize'
2053 INTEGER :: dimen_ia, dimen_ri, handle, iib, ispin, my_group_l_end, my_group_l_size, &
2054 my_group_l_start, my_ia_end, my_ia_size, my_ia_start, my_p_end, my_p_size, my_p_start, &
2055 ngroup, nspins, pos_group, pos_sub, proc
2056 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: pos_info
2057 INTEGER,
ALLOCATABLE,
DIMENSION(:, :) :: group_grid_2_mepos, mepos_2_grid_group
2058 REAL(kind=dp) :: my_scale
2059 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:, :) :: gamma_2d
2060 TYPE(cp_blacs_env_type),
POINTER :: blacs_env
2061 TYPE(cp_fm_struct_type),
POINTER :: fm_struct
2062 TYPE(cp_fm_type) :: fm_g_p_ia, fm_pq, fm_pq_2, fm_pq_half, &
2063 fm_work_pq, fm_work_pq_2, fm_y, &
2065 TYPE(group_dist_d1_type) :: gd_array_new, gd_ia, gd_p, gd_p_new
2067 CALL timeset(routinen, handle)
2074 IF (do_ri_sos_laplace_mp2)
THEN
2075 my_scale = mp2_env%scale_s
2077 my_scale = -mp2_env%ri_rpa%scale_rpa/(2.0_dp*pi)
2078 IF (mp2_env%ri_rpa%minimax_quad) my_scale = my_scale/2.0_dp
2081 IF (do_ri_sos_laplace_mp2)
THEN
2082 CALL sos_mp2_grad_finalize(
rpa_grad%sos_mp2_work_occ,
rpa_grad%sos_mp2_work_virt, &
2083 para_env, para_env_sub, homo, virtual, mp2_env)
2085 CALL rpa_grad_work_finalize(
rpa_grad%rpa_work, mp2_env, homo, &
2086 virtual, para_env, para_env_sub)
2089 CALL get_qs_env(qs_env, blacs_env=blacs_env)
2091 CALL cp_fm_get_info(
rpa_grad%fm_Gamma_PQ, ncol_global=dimen_ri)
2094 CALL cp_fm_struct_create(fm_struct, context=blacs_env, nrow_global=dimen_ri, &
2095 ncol_global=dimen_ri, para_env=para_env)
2096 CALL cp_fm_create(fm_pq, fm_struct)
2097 CALL cp_fm_create(fm_work_pq, fm_struct)
2098 IF (.NOT. compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter))
THEN
2099 CALL cp_fm_create(fm_pq_2, fm_struct)
2101 CALL cp_fm_struct_release(fm_struct)
2102 CALL cp_fm_set_all(fm_pq, 0.0_dp)
2105 CALL dereplicate_and_sum_fm(
rpa_grad%fm_Gamma_PQ, fm_pq)
2107 ngroup = para_env%num_pe/para_env_sub%num_pe
2109 CALL prepare_redistribution(para_env, para_env_sub, ngroup, &
2110 group_grid_2_mepos, mepos_2_grid_group, pos_info=pos_info)
2113 CALL create_group_dist(gd_p, para_env_sub%num_pe, dimen_ri)
2114 CALL get_group_dist(gd_p, para_env_sub%mepos, my_p_start, my_p_end, my_p_size)
2116 CALL get_group_dist(gd_array, color_sub, my_group_l_start, my_group_l_end, my_group_l_size)
2118 CALL create_group_dist(gd_p_new, para_env%num_pe)
2119 CALL create_group_dist(gd_array_new, para_env%num_pe)
2121 DO proc = 0, para_env%num_pe - 1
2123 pos_group = proc/para_env_sub%num_pe
2125 pos_sub = pos_info(proc)
2127 CALL get_group_dist(gd_array, pos_group, gd_array_new, proc)
2128 CALL get_group_dist(gd_p, pos_sub, gd_p_new, proc)
2131 DEALLOCATE (pos_info)
2132 CALL release_group_dist(gd_p)
2134 CALL array2fm(mp2_env%ri_grad%PQ_half, fm_pq%matrix_struct, &
2135 my_p_start, my_p_end, &
2136 my_group_l_start, my_group_l_end, &
2137 gd_p_new, gd_array_new, &
2138 group_grid_2_mepos, para_env_sub%num_pe, ngroup, &
2141 IF (.NOT. compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter))
THEN
2142 CALL array2fm(mp2_env%ri_grad%operator_half, fm_pq%matrix_struct, my_p_start, my_p_end, &
2143 my_group_l_start, my_group_l_end, &
2144 gd_p_new, gd_array_new, &
2145 group_grid_2_mepos, para_env_sub%num_pe, ngroup, &
2150 CALL release_group_dist(gd_p_new)
2151 CALL release_group_dist(gd_array_new)
2153 IF (compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter))
THEN
2155 CALL parallel_gemm(transa=
"N", transb=
"T", m=dimen_ri, n=dimen_ri, k=dimen_ri, alpha=1.0_dp, &
2156 matrix_a=fm_pq, matrix_b=fm_pq_half, beta=0.0_dp, &
2157 matrix_c=fm_work_pq)
2159 CALL parallel_gemm(transa=
"N", transb=
"N", m=dimen_ri, n=dimen_ri, k=dimen_ri, alpha=-my_scale, &
2160 matrix_a=fm_pq_half, matrix_b=fm_work_pq, beta=0.0_dp, &
2163 CALL cp_fm_release(fm_work_pq)
2165 CALL parallel_gemm(transa=
"N", transb=
"T", m=dimen_ri, n=dimen_ri, k=dimen_ri, alpha=1.0_dp, &
2166 matrix_a=fm_pq, matrix_b=operator_half, beta=0.0_dp, &
2167 matrix_c=fm_work_pq)
2169 CALL parallel_gemm(transa=
"N", transb=
"N", m=dimen_ri, n=dimen_ri, k=dimen_ri, alpha=my_scale, &
2170 matrix_a=operator_half, matrix_b=fm_work_pq, beta=0.0_dp, &
2172 CALL cp_fm_release(operator_half)
2174 CALL cp_fm_create(fm_work_pq_2, fm_pq%matrix_struct, name=
"fm_Gamma_PQ_2")
2175 CALL parallel_gemm(transa=
"N", transb=
"N", m=dimen_ri, n=dimen_ri, k=dimen_ri, alpha=-my_scale, &
2176 matrix_a=fm_pq_half, matrix_b=fm_work_pq, beta=0.0_dp, &
2177 matrix_c=fm_work_pq_2)
2178 CALL cp_fm_to_fm(fm_work_pq_2, fm_pq_2)
2179 CALL cp_fm_geadd(1.0_dp,
"T", fm_work_pq_2, 1.0_dp, fm_pq_2)
2180 CALL cp_fm_release(fm_work_pq_2)
2181 CALL cp_fm_release(fm_work_pq)
2184 ALLOCATE (mp2_env%ri_grad%Gamma_PQ(my_p_size, my_group_l_size))
2185 CALL fm2array(mp2_env%ri_grad%Gamma_PQ, &
2186 my_p_size, my_p_start, my_p_end, &
2187 my_group_l_size, my_group_l_start, my_group_l_end, &
2188 group_grid_2_mepos, mepos_2_grid_group, &
2189 para_env_sub%num_pe, ngroup, &
2192 IF (.NOT. compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter))
THEN
2193 ALLOCATE (mp2_env%ri_grad%Gamma_PQ_2(my_p_size, my_group_l_size))
2194 CALL fm2array(mp2_env%ri_grad%Gamma_PQ_2, my_p_size, my_p_start, my_p_end, &
2195 my_group_l_size, my_group_l_start, my_group_l_end, &
2196 group_grid_2_mepos, mepos_2_grid_group, &
2197 para_env_sub%num_pe, ngroup, &
2202 ALLOCATE (mp2_env%ri_grad%G_P_ia(my_group_l_size, nspins))
2203 DO ispin = 1, nspins
2204 DO iib = 1, my_group_l_size
2205 NULLIFY (mp2_env%ri_grad%G_P_ia(iib, ispin)%matrix)
2210 DO ispin = 1, nspins
2212 CALL cp_fm_get_info(
rpa_grad%fm_Y(ispin), ncol_global=dimen_ia)
2214 CALL get_qs_env(qs_env, blacs_env=blacs_env)
2217 CALL cp_fm_struct_create(fm_struct, template_fmstruct=fm_pq_half%matrix_struct, nrow_global=dimen_ia)
2218 CALL cp_fm_create(fm_y, fm_struct)
2219 CALL cp_fm_struct_release(fm_struct)
2220 CALL cp_fm_set_all(fm_y, 0.0_dp)
2222 CALL dereplicate_and_sum_fm(
rpa_grad%fm_Y(ispin), fm_y)
2224 CALL cp_fm_create(fm_g_p_ia, fm_y%matrix_struct)
2225 CALL cp_fm_set_all(fm_g_p_ia, 0.0_dp)
2227 CALL parallel_gemm(transa=
"N", transb=
"T", m=dimen_ia, n=dimen_ri, k=dimen_ri, alpha=my_scale, &
2228 matrix_a=fm_y, matrix_b=fm_pq_half, beta=0.0_dp, &
2231 CALL cp_fm_release(fm_y)
2233 CALL create_group_dist(gd_ia, para_env_sub%num_pe, dimen_ia)
2234 CALL get_group_dist(gd_ia, para_env_sub%mepos, my_ia_start, my_ia_end, my_ia_size)
2236 CALL fm2array(gamma_2d, my_ia_size, my_ia_start, my_ia_end, &
2237 my_group_l_size, my_group_l_start, my_group_l_end, &
2238 group_grid_2_mepos, mepos_2_grid_group, &
2239 para_env_sub%num_pe, ngroup, &
2243 CALL create_dbcsr_gamma(gamma_2d, homo(ispin), virtual(ispin), dimen_ia, para_env_sub, &
2244 my_ia_start, my_ia_end, my_group_l_size, gd_ia, &
2245 mp2_env%ri_grad%G_P_ia(:, ispin), mp2_env%ri_grad%mo_coeff_o(ispin)%matrix)
2247 CALL release_group_dist(gd_ia)
2251 CALL cp_fm_release(fm_pq_half)
2253 CALL timestop(handle)
2267 SUBROUTINE sos_mp2_grad_finalize(sos_mp2_work_occ, sos_mp2_work_virt, para_env, para_env_sub, homo, virtual, mp2_env)
2268 TYPE(sos_mp2_grad_work_type),
ALLOCATABLE, &
2269 DIMENSION(:),
INTENT(INOUT) :: sos_mp2_work_occ, sos_mp2_work_virt
2270 TYPE(mp_para_env_type),
INTENT(IN),
POINTER :: para_env, para_env_sub
2271 INTEGER,
DIMENSION(:),
INTENT(IN) :: homo, virtual
2272 TYPE(mp2_type),
INTENT(INOUT) :: mp2_env
2274 CHARACTER(LEN=*),
PARAMETER :: routinen =
'sos_mp2_grad_finalize'
2276 INTEGER :: ab_counter, handle, ij_counter, ispin, &
2277 itmp(2), my_a, my_b, my_b_end, &
2278 my_b_size, my_b_start, my_i, my_j, &
2280 REAL(kind=dp) :: my_scale
2282 CALL timeset(routinen, handle)
2284 nspins =
SIZE(sos_mp2_work_occ)
2285 my_scale = mp2_env%scale_s
2287 DO ispin = 1, nspins
2288 DO pcol = 0,
SIZE(sos_mp2_work_occ(ispin)%index2send, 1) - 1
2289 IF (
ALLOCATED(sos_mp2_work_occ(ispin)%index2send(pcol)%array)) &
2290 DEALLOCATE (sos_mp2_work_occ(ispin)%index2send(pcol)%array)
2291 IF (
ALLOCATED(sos_mp2_work_occ(ispin)%index2send(pcol)%array)) &
2292 DEALLOCATE (sos_mp2_work_occ(ispin)%index2send(pcol)%array)
2293 IF (
ALLOCATED(sos_mp2_work_virt(ispin)%index2recv(pcol)%array)) &
2294 DEALLOCATE (sos_mp2_work_virt(ispin)%index2recv(pcol)%array)
2295 IF (
ALLOCATED(sos_mp2_work_virt(ispin)%index2recv(pcol)%array)) &
2296 DEALLOCATE (sos_mp2_work_virt(ispin)%index2recv(pcol)%array)
2298 DEALLOCATE (sos_mp2_work_occ(ispin)%index2send, &
2299 sos_mp2_work_occ(ispin)%index2recv, &
2300 sos_mp2_work_virt(ispin)%index2send, &
2301 sos_mp2_work_virt(ispin)%index2recv)
2305 DO ispin = 1, nspins
2306 CALL para_env%sum(sos_mp2_work_occ(ispin)%P)
2308 ALLOCATE (mp2_env%ri_grad%P_ij(ispin)%array(homo(ispin), homo(ispin)))
2309 mp2_env%ri_grad%P_ij(ispin)%array = 0.0_dp
2310 DO my_i = 1, homo(ispin)
2311 mp2_env%ri_grad%P_ij(ispin)%array(my_i, my_i) = my_scale*sos_mp2_work_occ(ispin)%P(my_i)
2313 DO ij_counter = 1,
SIZE(sos_mp2_work_occ(ispin)%pair_list, 2)
2314 my_i = sos_mp2_work_occ(ispin)%pair_list(1, ij_counter)
2315 my_j = sos_mp2_work_occ(ispin)%pair_list(2, ij_counter)
2317 mp2_env%ri_grad%P_ij(ispin)%array(my_i, my_j) = my_scale*sos_mp2_work_occ(ispin)%P(homo(ispin) + ij_counter)
2319 DEALLOCATE (sos_mp2_work_occ(ispin)%P, sos_mp2_work_occ(ispin)%pair_list)
2322 mp2_env%ri_grad%P_ij(ispin)%array(:, :) = 0.5_dp*(mp2_env%ri_grad%P_ij(ispin)%array + &
2323 transpose(mp2_env%ri_grad%P_ij(ispin)%array))
2326 CALL para_env%sum(sos_mp2_work_virt(ispin)%P)
2328 itmp = get_limit(virtual(ispin), para_env_sub%num_pe, para_env_sub%mepos)
2329 my_b_size = itmp(2) - itmp(1) + 1
2330 my_b_start = itmp(1)
2333 ALLOCATE (mp2_env%ri_grad%P_ab(ispin)%array(my_b_size, virtual(ispin)))
2334 mp2_env%ri_grad%P_ab(ispin)%array = 0.0_dp
2335 DO my_a = itmp(1), itmp(2)
2336 mp2_env%ri_grad%P_ab(ispin)%array(my_a - itmp(1) + 1, my_a) = my_scale*sos_mp2_work_virt(ispin)%P(my_a)
2338 DO ab_counter = 1,
SIZE(sos_mp2_work_virt(ispin)%pair_list, 2)
2339 my_a = sos_mp2_work_virt(ispin)%pair_list(1, ab_counter)
2340 my_b = sos_mp2_work_virt(ispin)%pair_list(2, ab_counter)
2342 IF (my_a >= itmp(1) .AND. my_a <= itmp(2)) mp2_env%ri_grad%P_ab(ispin)%array(my_a - itmp(1) + 1, my_b) = &
2343 my_scale*sos_mp2_work_virt(ispin)%P(virtual(ispin) + ab_counter)
2346 DEALLOCATE (sos_mp2_work_virt(ispin)%P, sos_mp2_work_virt(ispin)%pair_list)
2349 IF (para_env_sub%num_pe > 1)
THEN
2351 INTEGER :: send_a_start, send_a_end, send_a_size, &
2352 recv_a_start, recv_a_end, recv_a_size, proc_shift, proc_send, proc_recv
2353 REAL(kind=dp),
DIMENSION(:),
ALLOCATABLE,
TARGET :: buffer_send_1d
2354 REAL(kind=dp),
DIMENSION(:, :),
POINTER :: buffer_send
2355 REAL(kind=dp),
DIMENSION(:, :),
ALLOCATABLE :: buffer_recv
2356 TYPE(group_dist_d1_type) :: gd_virtual_sub
2358 CALL create_group_dist(gd_virtual_sub, para_env_sub%num_pe, virtual(ispin))
2360 mp2_env%ri_grad%P_ab(ispin)%array(:, my_b_start:my_b_end) = &
2361 0.5_dp*(mp2_env%ri_grad%P_ab(ispin)%array(:, my_b_start:my_b_end) &
2362 + transpose(mp2_env%ri_grad%P_ab(ispin)%array(:, my_b_start:my_b_end)))
2364 ALLOCATE (buffer_send_1d(my_b_size*maxsize(gd_virtual_sub)))
2365 ALLOCATE (buffer_recv(my_b_size, maxsize(gd_virtual_sub)))
2367 DO proc_shift = 1, para_env_sub%num_pe - 1
2369 proc_send =
modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
2370 proc_recv =
modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
2372 CALL get_group_dist(gd_virtual_sub, proc_send, send_a_start, send_a_end, send_a_size)
2373 CALL get_group_dist(gd_virtual_sub, proc_recv, recv_a_start, recv_a_end, recv_a_size)
2375 buffer_send(1:send_a_size, 1:my_b_size) => buffer_send_1d(1:my_b_size*send_a_size)
2377 buffer_send(:send_a_size, :) = transpose(mp2_env%ri_grad%P_ab(ispin)%array(:, send_a_start:send_a_end))
2378 CALL para_env_sub%sendrecv(buffer_send(:send_a_size, :), proc_send, &
2379 buffer_recv(:, :recv_a_size), proc_recv)
2381 mp2_env%ri_grad%P_ab(ispin)%array(:, recv_a_start:recv_a_end) = &
2382 0.5_dp*(mp2_env%ri_grad%P_ab(ispin)%array(:, recv_a_start:recv_a_end) + buffer_recv(:, 1:recv_a_size))
2386 DEALLOCATE (buffer_send_1d, buffer_recv)
2388 CALL release_group_dist(gd_virtual_sub)
2391 mp2_env%ri_grad%P_ab(ispin)%array(:, :) = 0.5_dp*(mp2_env%ri_grad%P_ab(ispin)%array + &
2392 transpose(mp2_env%ri_grad%P_ab(ispin)%array))
2396 DEALLOCATE (sos_mp2_work_occ, sos_mp2_work_virt)
2397 IF (nspins == 1)
THEN
2398 mp2_env%ri_grad%P_ij(1)%array(:, :) = 2.0_dp*mp2_env%ri_grad%P_ij(1)%array
2399 mp2_env%ri_grad%P_ab(1)%array(:, :) = 2.0_dp*mp2_env%ri_grad%P_ab(1)%array
2402 CALL timestop(handle)
2415 SUBROUTINE rpa_grad_work_finalize(rpa_work, mp2_env, homo, virtual, para_env, para_env_sub)
2416 TYPE(rpa_grad_work_type),
INTENT(INOUT) :: rpa_work
2417 TYPE(mp2_type),
INTENT(INOUT) :: mp2_env
2418 INTEGER,
DIMENSION(:),
INTENT(IN) :: homo, virtual
2419 TYPE(mp_para_env_type),
INTENT(IN),
POINTER :: para_env, para_env_sub
2421 CHARACTER(LEN=*),
PARAMETER :: routinen =
'rpa_grad_work_finalize'
2423 INTEGER :: handle, ispin, itmp(2), my_a_end, my_a_size, my_a_start, my_b_end, my_b_size, &
2424 my_b_start, my_i_end, my_i_size, my_i_start, nspins, proc, proc_recv, proc_send, &
2425 proc_shift, recv_a_end, recv_a_size, recv_a_start, recv_end, recv_start, send_a_end, &
2426 send_a_size, send_a_start, send_end, send_start, size_recv_buffer, size_send_buffer
2427 REAL(kind=dp) :: my_scale
2428 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:, :) :: buffer_recv, buffer_send
2429 TYPE(group_dist_d1_type) :: gd_a_sub, gd_virtual_sub
2431 CALL timeset(routinen, handle)
2434 my_scale = mp2_env%ri_rpa%scale_rpa/(2.0_dp*pi)
2435 IF (mp2_env%ri_rpa%minimax_quad) my_scale = my_scale/2.0_dp
2437 CALL cp_fm_release(rpa_work%fm_mat_Q_copy)
2439 DO ispin = 1, nspins
2440 DO proc = 0,
SIZE(rpa_work%index2send, 1) - 1
2441 IF (
ALLOCATED(rpa_work%index2send(proc, ispin)%array))
DEALLOCATE (rpa_work%index2send(proc, ispin)%array)
2442 IF (
ALLOCATED(rpa_work%index2recv(proc, ispin)%array))
DEALLOCATE (rpa_work%index2recv(proc, ispin)%array)
2445 DEALLOCATE (rpa_work%index2send, rpa_work%index2recv)
2447 DO ispin = 1, nspins
2448 CALL get_group_dist(rpa_work%gd_homo(ispin), rpa_work%mepos(2), my_i_start, my_i_end, my_i_size)
2449 CALL release_group_dist(rpa_work%gd_homo(ispin))
2451 ALLOCATE (mp2_env%ri_grad%P_ij(ispin)%array(homo(ispin), homo(ispin)))
2452 mp2_env%ri_grad%P_ij(ispin)%array = 0.0_dp
2453 mp2_env%ri_grad%P_ij(ispin)%array(my_i_start:my_i_end, :) = my_scale*rpa_work%P_ij(ispin)%array
2454 DEALLOCATE (rpa_work%P_ij(ispin)%array)
2455 CALL para_env%sum(mp2_env%ri_grad%P_ij(ispin)%array)
2458 mp2_env%ri_grad%P_ij(ispin)%array(:, :) = 0.5_dp*(mp2_env%ri_grad%P_ij(ispin)%array + &
2459 transpose(mp2_env%ri_grad%P_ij(ispin)%array))
2461 itmp = get_limit(virtual(ispin), para_env_sub%num_pe, para_env_sub%mepos)
2462 my_b_start = itmp(1)
2464 my_b_size = my_b_end - my_b_start + 1
2466 ALLOCATE (mp2_env%ri_grad%P_ab(ispin)%array(my_b_size, virtual(ispin)))
2467 mp2_env%ri_grad%P_ab(ispin)%array = 0.0_dp
2469 CALL get_group_dist(rpa_work%gd_virtual(ispin), rpa_work%mepos(1), my_a_start, my_a_end, my_a_size)
2470 CALL release_group_dist(rpa_work%gd_virtual(ispin))
2472 CALL create_group_dist(gd_a_sub, my_a_start, my_a_end, my_a_size, para_env_sub)
2474 CALL create_group_dist(gd_virtual_sub, para_env_sub%num_pe, virtual(ispin))
2477 send_start = max(1, my_b_start - my_a_start + 1)
2478 send_end = min(my_a_size, my_b_end - my_a_start + 1)
2481 recv_start = max(1, my_a_start - my_b_start + 1)
2482 recv_end = min(my_b_size, my_a_end - my_b_start + 1)
2484 mp2_env%ri_grad%P_ab(ispin)%array(recv_start:recv_end, :) = &
2485 my_scale*rpa_work%P_ab(ispin)%array(send_start:send_end, :)
2487 IF (para_env_sub%num_pe > 1)
THEN
2488 size_send_buffer = 0
2489 size_recv_buffer = 0
2490 DO proc_shift = 1, para_env_sub%num_pe - 1
2491 proc_send =
modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
2492 proc_recv =
modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
2494 CALL get_group_dist(gd_virtual_sub, proc_send, send_a_start, send_a_end)
2495 CALL get_group_dist(gd_a_sub, proc_recv, recv_a_start, recv_a_end)
2498 send_start = max(1, send_a_start - my_a_start + 1)
2499 send_end = min(my_a_size, send_a_end - my_a_start + 1)
2501 size_send_buffer = max(size_send_buffer, max(send_end - send_start + 1, 0))
2504 recv_start = max(1, recv_a_start - my_b_start + 1)
2505 recv_end = min(my_b_size, recv_a_end - my_b_start + 1)
2507 size_recv_buffer = max(size_recv_buffer, max(recv_end - recv_start + 1, 0))
2509 ALLOCATE (buffer_send(size_send_buffer, virtual(ispin)), buffer_recv(size_recv_buffer, virtual(ispin)))
2511 DO proc_shift = 1, para_env_sub%num_pe - 1
2512 proc_send =
modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
2513 proc_recv =
modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
2515 CALL get_group_dist(gd_virtual_sub, proc_send, send_a_start, send_a_end)
2516 CALL get_group_dist(gd_a_sub, proc_recv, recv_a_start, recv_a_end)
2519 send_start = max(1, send_a_start - my_a_start + 1)
2520 send_end = min(my_a_size, send_a_end - my_a_start + 1)
2521 buffer_send(1:max(send_end - send_start + 1, 0), :) = rpa_work%P_ab(ispin)%array(send_start:send_end, :)
2524 recv_start = max(1, recv_a_start - my_b_start + 1)
2525 recv_end = min(my_b_size, recv_a_end - my_b_start + 1)
2527 CALL para_env_sub%sendrecv(buffer_send(1:max(send_end - send_start + 1, 0), :), proc_send, &
2528 buffer_recv(1:max(recv_end - recv_start + 1, 0), :), proc_recv)
2530 mp2_env%ri_grad%P_ab(ispin)%array(recv_start:recv_end, :) = &
2531 mp2_env%ri_grad%P_ab(ispin)%array(recv_start:recv_end, :) + &
2532 my_scale*buffer_recv(1:max(recv_end - recv_start + 1, 0), :)
2536 IF (
ALLOCATED(buffer_send))
DEALLOCATE (buffer_send)
2537 IF (
ALLOCATED(buffer_recv))
DEALLOCATE (buffer_recv)
2539 DEALLOCATE (rpa_work%P_ab(ispin)%array)
2541 CALL release_group_dist(gd_a_sub)
2544 TYPE(mp_comm_type) :: comm_exchange
2545 CALL comm_exchange%from_split(para_env, para_env_sub%mepos)
2546 CALL comm_exchange%sum(mp2_env%ri_grad%P_ab(ispin)%array)
2547 CALL comm_exchange%free()
2551 IF (para_env_sub%num_pe > 1)
THEN
2553 REAL(kind=dp),
DIMENSION(:),
ALLOCATABLE,
TARGET :: buffer_send_1d
2554 REAL(kind=dp),
DIMENSION(:, :),
POINTER :: buffer_send
2555 REAL(kind=dp),
DIMENSION(:, :),
ALLOCATABLE :: buffer_recv
2557 mp2_env%ri_grad%P_ab(ispin)%array(:, my_b_start:my_b_end) = &
2558 0.5_dp*(mp2_env%ri_grad%P_ab(ispin)%array(:, my_b_start:my_b_end) &
2559 + transpose(mp2_env%ri_grad%P_ab(ispin)%array(:, my_b_start:my_b_end)))
2561 ALLOCATE (buffer_send_1d(my_b_size*maxsize(gd_virtual_sub)))
2562 ALLOCATE (buffer_recv(my_b_size, maxsize(gd_virtual_sub)))
2564 DO proc_shift = 1, para_env_sub%num_pe - 1
2566 proc_send =
modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
2567 proc_recv =
modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
2569 CALL get_group_dist(gd_virtual_sub, proc_send, send_a_start, send_a_end, send_a_size)
2570 CALL get_group_dist(gd_virtual_sub, proc_recv, recv_a_start, recv_a_end, recv_a_size)
2572 buffer_send(1:send_a_size, 1:my_b_size) => buffer_send_1d(1:my_b_size*send_a_size)
2574 buffer_send(:send_a_size, :) = transpose(mp2_env%ri_grad%P_ab(ispin)%array(:, send_a_start:send_a_end))
2575 CALL para_env_sub%sendrecv(buffer_send(:send_a_size, :), proc_send, &
2576 buffer_recv(:, :recv_a_size), proc_recv)
2578 mp2_env%ri_grad%P_ab(ispin)%array(:, recv_a_start:recv_a_end) = &
2579 0.5_dp*(mp2_env%ri_grad%P_ab(ispin)%array(:, recv_a_start:recv_a_end) + buffer_recv(:, 1:recv_a_size))
2583 DEALLOCATE (buffer_send_1d, buffer_recv)
2586 mp2_env%ri_grad%P_ab(ispin)%array(:, :) = 0.5_dp*(mp2_env%ri_grad%P_ab(ispin)%array + &
2587 transpose(mp2_env%ri_grad%P_ab(ispin)%array))
2590 CALL release_group_dist(gd_virtual_sub)
2593 DEALLOCATE (rpa_work%gd_homo, rpa_work%gd_virtual, rpa_work%P_ij, rpa_work%P_ab)
2594 IF (nspins == 1)
THEN
2595 mp2_env%ri_grad%P_ij(1)%array(:, :) = 2.0_dp*mp2_env%ri_grad%P_ij(1)%array
2596 mp2_env%ri_grad%P_ab(1)%array(:, :) = 2.0_dp*mp2_env%ri_grad%P_ab(1)%array
2599 CALL timestop(handle)
2607 SUBROUTINE dereplicate_and_sum_fm(fm_sub, fm_global)
2608 TYPE(cp_fm_type),
INTENT(INOUT) :: fm_sub, fm_global
2610 CHARACTER(LEN=*),
PARAMETER :: routinen =
'dereplicate_and_sum_fm'
2612 INTEGER :: col_local, elements2recv_col, elements2recv_row, elements2send_col, &
2613 elements2send_row, first_p_pos_global(2), first_p_pos_sub(2), handle, handle2, &
2614 mypcol_global, myprow_global, ncol_block_global, ncol_block_sub, ncol_local_global, &
2615 ncol_local_sub, npcol_global, npcol_sub, nprow_global, nprow_sub, nrow_block_global, &
2616 nrow_block_sub, nrow_local_global, nrow_local_sub, pcol_recv, pcol_send, proc_recv, &
2617 proc_send, proc_send_global, proc_shift, prow_recv, prow_send, row_local, tag
2618 INTEGER(int_8) :: size_recv_buffer, size_send_buffer
2619 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: data2recv_col, data2recv_row, &
2620 data2send_col, data2send_row, &
2622 INTEGER,
DIMENSION(:),
POINTER :: col_indices_global, col_indices_sub, &
2623 row_indices_global, row_indices_sub
2624 INTEGER,
DIMENSION(:, :),
POINTER :: blacs2mpi_global, blacs2mpi_sub, &
2625 mpi2blacs_global, mpi2blacs_sub
2626 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:),
TARGET :: recv_buffer_1d, send_buffer_1d
2627 REAL(kind=dp),
DIMENSION(:, :),
POINTER :: recv_buffer, send_buffer
2628 TYPE(mp_para_env_type),
POINTER :: para_env, para_env_sub
2629 TYPE(one_dim_int_array),
ALLOCATABLE,
DIMENSION(:) :: index2recv_col, index2recv_row, &
2630 index2send_col, index2send_row
2632 CALL timeset(routinen, handle)
2636 nprow_sub = fm_sub%matrix_struct%context%num_pe(1)
2637 npcol_sub = fm_sub%matrix_struct%context%num_pe(2)
2639 myprow_global = fm_global%matrix_struct%context%mepos(1)
2640 mypcol_global = fm_global%matrix_struct%context%mepos(2)
2641 nprow_global = fm_global%matrix_struct%context%num_pe(1)
2642 npcol_global = fm_global%matrix_struct%context%num_pe(2)
2644 CALL cp_fm_get_info(fm_sub, col_indices=col_indices_sub, row_indices=row_indices_sub, &
2645 nrow_local=nrow_local_sub, ncol_local=ncol_local_sub)
2646 CALL cp_fm_struct_get(fm_sub%matrix_struct, para_env=para_env_sub, first_p_pos=first_p_pos_sub, &
2647 nrow_block=nrow_block_sub, ncol_block=ncol_block_sub)
2648 CALL cp_fm_struct_get(fm_global%matrix_struct, first_p_pos=first_p_pos_global, nrow_block=nrow_block_global, &
2649 ncol_block=ncol_block_global, para_env=para_env, &
2650 col_indices=col_indices_global, row_indices=row_indices_global, &
2651 nrow_local=nrow_local_global, ncol_local=ncol_local_global)
2652 CALL fm_sub%matrix_struct%context%get(blacs2mpi=blacs2mpi_sub, mpi2blacs=mpi2blacs_sub)
2653 CALL fm_global%matrix_struct%context%get(blacs2mpi=blacs2mpi_global, mpi2blacs=mpi2blacs_global)
2655 IF (para_env%num_pe /= para_env_sub%num_pe)
THEN
2657 TYPE(mp_comm_type) :: comm_exchange
2658 comm_exchange = fm_sub%matrix_struct%context%interconnect(para_env)
2659 CALL comm_exchange%sum(fm_sub%local_data)
2660 CALL comm_exchange%free()
2664 ALLOCATE (subgroup2mepos(0:para_env_sub%num_pe - 1))
2665 CALL para_env_sub%allgather(para_env%mepos, subgroup2mepos)
2667 CALL timeset(routinen//
"_data2", handle2)
2669 CALL get_elements2send(data2send_col, npcol_global, row_indices_sub, ncol_block_global, first_p_pos_global(2), index2send_col)
2670 CALL get_elements2send(data2send_row, nprow_global, col_indices_sub, nrow_block_global, first_p_pos_global(1), index2send_row)
2674 CALL get_elements2send(data2recv_col, npcol_sub, row_indices_global, ncol_block_sub, first_p_pos_sub(2), index2recv_col)
2675 CALL get_elements2send(data2recv_row, nprow_sub, col_indices_global, nrow_block_sub, first_p_pos_sub(1), index2recv_row)
2676 CALL timestop(handle2)
2678 CALL timeset(routinen//
"_local", handle2)
2680 prow_send = mpi2blacs_global(1, para_env%mepos)
2681 pcol_send = mpi2blacs_global(2, para_env%mepos)
2682 prow_recv = mpi2blacs_sub(1, para_env_sub%mepos)
2683 pcol_recv = mpi2blacs_sub(2, para_env_sub%mepos)
2684 elements2recv_col = data2recv_col(pcol_recv)
2685 elements2recv_row = data2recv_row(prow_recv)
2691 DO col_local = 1, elements2recv_col
2692 DO row_local = 1, elements2recv_row
2693 fm_global%local_data(index2recv_col(pcol_recv)%array(col_local), &
2694 index2recv_row(prow_recv)%array(row_local)) &
2695 = fm_sub%local_data(index2send_col(pcol_send)%array(row_local), &
2696 index2send_row(prow_send)%array(col_local))
2700 CALL timestop(handle2)
2702 IF (para_env_sub%num_pe > 1)
THEN
2703 size_send_buffer = 0_int_8
2704 size_recv_buffer = 0_int_8
2706 DO proc_shift = 1, para_env_sub%num_pe - 1
2707 proc_send =
modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
2708 proc_recv =
modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
2710 proc_send_global = subgroup2mepos(proc_send)
2711 prow_send = mpi2blacs_global(1, proc_send_global)
2712 pcol_send = mpi2blacs_global(2, proc_send_global)
2713 elements2send_col = data2send_col(pcol_send)
2714 elements2send_row = data2send_row(prow_send)
2716 size_send_buffer = max(size_send_buffer, int(elements2send_col, int_8)*elements2send_row)
2718 prow_recv = mpi2blacs_sub(1, proc_recv)
2719 pcol_recv = mpi2blacs_sub(2, proc_recv)
2720 elements2recv_col = data2recv_col(pcol_recv)
2721 elements2recv_row = data2recv_row(prow_recv)
2723 size_recv_buffer = max(size_recv_buffer, int(elements2recv_col, int_8)*elements2recv_row)
2725 ALLOCATE (send_buffer_1d(size_send_buffer), recv_buffer_1d(size_recv_buffer))
2728 DO proc_shift = 1, para_env_sub%num_pe - 1
2729 proc_send =
modulo(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
2730 proc_recv =
modulo(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
2732 proc_send_global = subgroup2mepos(proc_send)
2733 prow_send = mpi2blacs_global(1, proc_send_global)
2734 pcol_send = mpi2blacs_global(2, proc_send_global)
2735 elements2send_col = data2send_col(pcol_send)
2736 elements2send_row = data2send_row(prow_send)
2738 CALL timeset(routinen//
"_pack", handle2)
2741 send_buffer(1:elements2send_row, 1:elements2send_col) => send_buffer_1d(1:int(elements2send_row, int_8)*elements2send_col)
2745 DO row_local = 1, elements2send_col
2746 DO col_local = 1, elements2send_row
2747 send_buffer(col_local, row_local) = &
2748 fm_sub%local_data(index2send_col(pcol_send)%array(row_local), &
2749 index2send_row(prow_send)%array(col_local))
2753 CALL timestop(handle2)
2755 prow_recv = mpi2blacs_sub(1, proc_recv)
2756 pcol_recv = mpi2blacs_sub(2, proc_recv)
2757 elements2recv_col = data2recv_col(pcol_recv)
2758 elements2recv_row = data2recv_row(prow_recv)
2761 recv_buffer(1:elements2recv_col, 1:elements2recv_row) => recv_buffer_1d(1:int(elements2recv_row, int_8)*elements2recv_col)
2762 IF (
SIZE(recv_buffer) > 0_int_8)
THEN
2763 IF (
SIZE(send_buffer) > 0_int_8)
THEN
2764 CALL para_env_sub%sendrecv(send_buffer, proc_send, recv_buffer, proc_recv, tag)
2766 CALL para_env_sub%recv(recv_buffer, proc_recv, tag)
2769 CALL timeset(routinen//
"_unpack", handle2)
2773 DO col_local = 1, elements2recv_col
2774 DO row_local = 1, elements2recv_row
2775 fm_global%local_data(index2recv_col(pcol_recv)%array(col_local), &
2776 index2recv_row(prow_recv)%array(row_local)) &
2777 = recv_buffer(col_local, row_local)
2781 CALL timestop(handle2)
2782 ELSE IF (
SIZE(send_buffer) > 0_int_8)
THEN
2783 CALL para_env_sub%send(send_buffer, proc_send, tag)
2788 DEALLOCATE (data2send_col, data2send_row, data2recv_col, data2recv_row)
2789 DO proc_shift = 0, npcol_global - 1
2790 DEALLOCATE (index2send_col(proc_shift)%array)
2792 DO proc_shift = 0, npcol_sub - 1
2793 DEALLOCATE (index2recv_col(proc_shift)%array)
2795 DO proc_shift = 0, nprow_global - 1
2796 DEALLOCATE (index2send_row(proc_shift)%array)
2798 DO proc_shift = 0, nprow_sub - 1
2799 DEALLOCATE (index2recv_row(proc_shift)%array)
2801 DEALLOCATE (index2send_col, index2recv_col, index2send_row, index2recv_row)
2803 CALL cp_fm_release(fm_sub)
2805 CALL timestop(handle)
2807 END SUBROUTINE dereplicate_and_sum_fm
2818 SUBROUTINE get_elements2send(data2send, np_global, indices_sub, n_block_global, first_p_pos_global, index2send)
2819 INTEGER,
ALLOCATABLE,
DIMENSION(:),
INTENT(OUT) :: data2send
2820 INTEGER,
INTENT(IN) :: np_global
2821 INTEGER,
DIMENSION(:),
INTENT(IN) :: indices_sub
2822 INTEGER,
INTENT(IN) :: n_block_global, first_p_pos_global
2823 TYPE(one_dim_int_array),
ALLOCATABLE, &
2824 DIMENSION(:),
INTENT(OUT) :: index2send
2826 INTEGER :: i_global, i_local, proc
2828 ALLOCATE (data2send(0:np_global - 1))
2830 DO i_local = 1,
SIZE(indices_sub)
2831 i_global = indices_sub(i_local)
2832 proc = cp_fm_indxg2p(i_global, n_block_global, 0, first_p_pos_global, np_global)
2833 data2send(proc) = data2send(proc) + 1
2836 ALLOCATE (index2send(0:np_global - 1))
2837 DO proc = 0, np_global - 1
2838 ALLOCATE (index2send(proc)%array(data2send(proc)))
2840 index2send(proc)%array = -1
2844 DO i_local = 1,
SIZE(indices_sub)
2845 i_global = indices_sub(i_local)
2846 proc = cp_fm_indxg2p(i_global, n_block_global, 0, first_p_pos_global, np_global)
2847 data2send(proc) = data2send(proc) + 1
2848 index2send(proc)%array(data2send(proc)) = i_local
static GRID_HOST_DEVICE int modulo(int a, int m)
Equivalent of Fortran's MODULO, which always return a positive number. https://gcc....
various utilities that regard array of different kinds: output, allocation,... maybe it is not a good...
methods related to the blacs parallel environment
basic linear algebra operations for full matrices
subroutine, public cp_fm_geadd(alpha, trans, matrix_a, beta, matrix_b)
interface to BLACS geadd: matrix_b = beta*matrix_b + alpha*opt(matrix_a) where opt(matrix_a) can be e...
subroutine, public cp_fm_upper_to_full(matrix, work)
given an upper triangular matrix computes the corresponding full matrix
subroutine, public cp_fm_scale_and_add(alpha, matrix_a, beta, matrix_b)
calc A <- alpha*A + beta*B optimized for alpha == 1.0 (just add beta*B) and beta == 0....
various cholesky decomposition related routines
subroutine, public cp_fm_cholesky_invert(matrix, n, info_out)
used to replace the cholesky decomposition by the inverse
represent the structure of a full matrix
subroutine, public cp_fm_struct_create(fmstruct, para_env, context, nrow_global, ncol_global, nrow_block, ncol_block, descriptor, first_p_pos, local_leading_dimension, template_fmstruct, square_blocks, force_block)
allocates and initializes a full matrix structure
subroutine, public cp_fm_struct_get(fmstruct, para_env, context, descriptor, ncol_block, nrow_block, nrow_global, ncol_global, first_p_pos, row_indices, col_indices, nrow_local, ncol_local, nrow_locals, ncol_locals, local_leading_dimension)
returns the values of various attributes of the matrix structure
subroutine, public cp_fm_struct_release(fmstruct)
releases a full matrix structure
represent a full matrix distributed on many processors
integer function, public cp_fm_indxg2l(INDXGLOB, NB, IPROC, ISRCPROC, NPROCS)
wrapper to scalapack function INDXG2L that computes the local index of a distributed matrix entry poi...
subroutine, public cp_fm_get_info(matrix, name, nrow_global, ncol_global, nrow_block, ncol_block, nrow_local, ncol_local, row_indices, col_indices, local_data, context, nrow_locals, ncol_locals, matrix_struct, para_env)
returns all kind of information about the full matrix
integer function, public cp_fm_indxg2p(INDXGLOB, NB, IPROC, ISRCPROC, NPROCS)
wrapper to scalapack function INDXG2P that computes the process coordinate which possesses the entry ...
subroutine, public cp_fm_to_fm_submat_general(source, destination, nrows, ncols, s_firstrow, s_firstcol, d_firstrow, d_firstcol, global_context)
General copy of a submatrix of fm matrix to a submatrix of another fm matrix. The two matrices can ha...
subroutine, public cp_fm_set_all(matrix, alpha, beta)
set all elements of a matrix to the same value, and optionally the diagonal to a different one
subroutine, public cp_fm_create(matrix, matrix_struct, name, use_sp)
creates a new full matrix with the given structure
Counters to determine the performance of parallel DGEMMs.
subroutine, public dgemm_counter_start(dgemm_counter)
start timer of the counter
subroutine, public dgemm_counter_stop(dgemm_counter, size1, size2, size3)
stop timer of the counter and provide matrix sizes
Types to describe group distributions.
elemental integer function, public maxsize(this)
...
elemental integer function, public group_dist_proc(this, pos)
...
sums arrays of real/complex numbers with much reduced round-off as compared to a naive implementation...
Defines the basic variable types.
integer, parameter, public int_8
integer, parameter, public dp
2- and 3-center electron repulsion integral routines based on libint2 Currently available operators: ...
pure logical function, public compare_potential_types(potential1, potential2)
Helper function to compare libint_potential_types.
subroutine, public local_gemm_set_op_threshold_gpu(ctx, opThresholdGPU)
...
integer, parameter, public local_gemm_pu_gpu
subroutine, public local_gemm_destroy(ctx)
release resources associated to a gemm context
subroutine, public local_gemm_create(ctx, pu)
create a context for handling gemm offloading
subroutine, public local_gemm(opA, opB, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc, ctx)
...
Machine interface based on Fortran 2003 and POSIX.
subroutine, public m_memory(mem)
Returns the total amount of memory [bytes] in use, if known, zero otherwise.
subroutine, public m_flush(lunit)
flushes units if the &GLOBAL flag is set accordingly
Definition of mathematical constants and functions.
real(kind=dp), parameter, public pi
Interface to the message passing library MPI.
subroutine, public mp_waitany(requests, completed)
waits for completion of any of the given requests
type(mp_request_type), parameter, public mp_request_null
Routines to calculate MP2 energy with laplace approach.
subroutine, public calc_fm_mat_s_laplace(fm_mat_S, homo, virtual, Eigenval, dajquad)
...
Routines for calculating RI-MP2 gradients.
subroutine, public prepare_redistribution(para_env, para_env_sub, ngroup, group_grid_2_mepos, mepos_2_grid_group, pos_info)
prepare array for redistribution
subroutine, public array2fm(mat2D, fm_struct, my_start_row, my_end_row, my_start_col, my_end_col, gd_row, gd_col, group_grid_2_mepos, ngroup_row, ngroup_col, fm_mat, integ_group_size, color_group, do_release_mat)
redistribute local part of array to fm
subroutine, public create_dbcsr_gamma(Gamma_2D, homo, virtual, dimen_ia, para_env_sub, my_ia_start, my_ia_end, my_group_L_size, gd_ia, dbcsr_Gamma, mo_coeff_o)
redistribute 2D representation of 3d tensor to a set of dbcsr matrices
subroutine, public fm2array(mat2D, my_rows, my_start_row, my_end_row, my_cols, my_start_col, my_end_col, group_grid_2_mepos, mepos_2_grid_group, ngroup_row, ngroup_col, fm_mat)
redistribute fm to local part of array
Types needed for MP2 calculations.
basic linear algebra operations for full matrixes
subroutine, public get_qs_env(qs_env, atomic_kind_set, qs_kind_set, cell, super_cell, cell_ref, use_ref_cell, kpoints, dft_control, mos, sab_orb, sab_all, qmmm, qmmm_periodic, sac_ae, sac_ppl, sac_lri, sap_ppnl, sab_vdw, sab_scp, sap_oce, sab_lrc, sab_se, sab_xtbe, sab_tbe, sab_core, sab_xb, sab_xtb_nonbond, sab_almo, sab_kp, sab_kp_nosym, particle_set, energy, force, matrix_h, matrix_h_im, matrix_ks, matrix_ks_im, matrix_vxc, run_rtp, rtp, matrix_h_kp, matrix_h_im_kp, matrix_ks_kp, matrix_ks_im_kp, matrix_vxc_kp, kinetic_kp, matrix_s_kp, matrix_w_kp, matrix_s_RI_aux_kp, matrix_s, matrix_s_RI_aux, matrix_w, matrix_p_mp2, matrix_p_mp2_admm, rho, rho_xc, pw_env, ewald_env, ewald_pw, active_space, mpools, input, para_env, blacs_env, scf_control, rel_control, kinetic, qs_charges, vppl, rho_core, rho_nlcc, rho_nlcc_g, ks_env, ks_qmmm_env, wf_history, scf_env, local_particles, local_molecules, distribution_2d, dbcsr_dist, molecule_kind_set, molecule_set, subsys, cp_subsys, oce, local_rho_set, rho_atom_set, task_list, task_list_soft, rho0_atom_set, rho0_mpole, rhoz_set, ecoul_1c, rho0_s_rs, rho0_s_gs, do_kpoints, has_unit_metric, requires_mo_derivs, mo_derivs, mo_loc_history, nkind, natom, nelectron_total, nelectron_spin, efield, neighbor_list_id, linres_control, xas_env, virial, cp_ddapc_env, cp_ddapc_ewald, outer_scf_history, outer_scf_ihistory, x_data, et_coupling, dftb_potential, results, se_taper, se_store_int_env, se_nddo_mpole, se_nonbond_env, admm_env, lri_env, lri_density, exstate_env, ec_env, dispersion_env, gcp_env, vee, rho_external, external_vxc, mask, mp2_env, bs_env, kg_env, WannierCentres, atprop, ls_scf_env, do_transport, transport_env, v_hartree_rspace, s_mstruct_changed, rho_changed, potential_changed, forces_up_to_date, mscfg_env, almo_scf_env, gradient_history, variable_history, embed_pot, spin_embed_pot, polar_env, mos_last_converged, rhs)
Get the QUICKSTEP environment.
Routines to calculate RI-RPA and SOS-MP2 gradients.
subroutine, public rpa_grad_matrix_operations(mp2_env, rpa_grad, do_ri_sos_laplace_mp2, fm_mat_Q, fm_mat_Q_gemm, dgemm_counter, fm_mat_S, omega, homo, virtual, Eigenval, weight, unit_nr)
...
pure subroutine, public rpa_grad_needed_mem(homo, virtual, dimen_RI, mem_per_rank, mem_per_repl, do_ri_sos_laplace_mp2)
Calculates the necessary minimum memory for the Gradient code ion MiB.
subroutine, public rpa_grad_create(rpa_grad, fm_mat_Q, fm_mat_S, homo, virtual, mp2_env, Eigenval, unit_nr, do_ri_sos_laplace_mp2)
Creates the arrays of a rpa_grad_type.
subroutine, public rpa_grad_finalize(rpa_grad, mp2_env, para_env_sub, para_env, qs_env, gd_array, color_sub, do_ri_sos_laplace_mp2, homo, virtual)
...
subroutine, public rpa_grad_copy_q(fm_mat_Q, rpa_grad)
...
Utility functions for RPA calculations.
subroutine, public calc_fm_mat_s_rpa(fm_mat_S, first_cycle, virtual, Eigenval, homo, omega, omega_old)
...
subroutine, public remove_scaling_factor_rpa(fm_mat_S, virtual, Eigenval_last, homo, omega_old)
...
All kind of helpful little routines.
pure integer function, dimension(2), public get_limit(m, n, me)
divide m entries into n parts, return size of part me