64 SUBROUTINE mult_b_with_w(fm_mat_S_ij_bse, fm_mat_S_ia_bse, fm_mat_S_bar_ia_bse, &
65 fm_mat_S_bar_ij_bse, fm_mat_Q_static_bse_gemm, &
66 dimen_RI, homo, virtual)
68 TYPE(
cp_fm_type),
INTENT(IN) :: fm_mat_s_ij_bse, fm_mat_s_ia_bse
69 TYPE(
cp_fm_type),
INTENT(OUT) :: fm_mat_s_bar_ia_bse, fm_mat_s_bar_ij_bse
70 TYPE(
cp_fm_type),
INTENT(IN) :: fm_mat_q_static_bse_gemm
71 INTEGER,
INTENT(IN) :: dimen_ri, homo, virtual
73 CHARACTER(LEN=*),
PARAMETER :: routinen =
'mult_B_with_W'
75 INTEGER :: handle, i_global, iib, info_chol, &
76 j_global, jjb, ncol_local, nrow_local
77 INTEGER,
DIMENSION(:),
POINTER :: col_indices, row_indices
80 CALL timeset(routinen, handle)
82 CALL cp_fm_create(fm_mat_s_bar_ia_bse, fm_mat_s_ia_bse%matrix_struct)
85 CALL cp_fm_create(fm_mat_s_bar_ij_bse, fm_mat_s_ij_bse%matrix_struct)
88 CALL cp_fm_create(fm_work, fm_mat_q_static_bse_gemm%matrix_struct)
93 nrow_local=nrow_local, &
94 ncol_local=ncol_local, &
95 row_indices=row_indices, &
96 col_indices=col_indices)
98 DO jjb = 1, ncol_local
99 j_global = col_indices(jjb)
100 DO iib = 1, nrow_local
101 i_global = row_indices(iib)
102 IF (j_global == i_global .AND. i_global <= dimen_ri)
THEN
103 fm_mat_q_static_bse_gemm%local_data(iib, jjb) = fm_mat_q_static_bse_gemm%local_data(iib, jjb) + 1.0_dp
111 cpassert(info_chol == 0)
119 CALL parallel_gemm(transa=
"N", transb=
"N", m=dimen_ri, n=homo**2, k=dimen_ri, alpha=1.0_dp, &
120 matrix_a=fm_mat_q_static_bse_gemm, matrix_b=fm_mat_s_ij_bse, beta=0.0_dp, &
121 matrix_c=fm_mat_s_bar_ij_bse)
125 CALL parallel_gemm(transa=
"N", transb=
"N", m=dimen_ri, n=homo*virtual, k=dimen_ri, alpha=1.0_dp, &
126 matrix_a=fm_mat_q_static_bse_gemm, matrix_b=fm_mat_s_ia_bse, beta=0.0_dp, &
127 matrix_c=fm_mat_s_bar_ia_bse)
131 CALL timestop(handle)
150 nrow_secidx_out, ncol_secidx_out, unit_nr, reordering, mp2_env)
152 TYPE(
cp_fm_type),
INTENT(INOUT) :: fm_out, fm_in
153 REAL(kind=
dp) :: beta
154 INTEGER,
INTENT(IN) :: nrow_secidx_in, ncol_secidx_in, &
155 nrow_secidx_out, ncol_secidx_out
157 INTEGER,
DIMENSION(4) :: reordering
158 TYPE(
mp2_type),
INTENT(INOUT) :: mp2_env
160 CHARACTER(LEN=*),
PARAMETER :: routinen =
'fm_general_add_bse'
162 INTEGER :: col_idx_loc, dummy, handle, handle2, i_entry_rec, idx_col_out, idx_row_out, ii, &
163 iproc, jj, ncol_block_in, ncol_block_out, ncol_local_in, ncol_local_out, npcol, nprocs, &
164 nprow, nrow_block_in, nrow_block_out, nrow_local_in, nrow_local_out, proc_send, &
165 row_idx_loc, send_pcol, send_prow
166 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: entry_counter, num_entries_rec, &
168 INTEGER,
DIMENSION(4) :: indices_in
169 INTEGER,
DIMENSION(:),
POINTER :: col_indices_in, col_indices_out, &
170 row_indices_in, row_indices_out
172 DIMENSION(:) :: buffer_rec, buffer_send
176 CALL timeset(routinen, handle)
177 CALL timeset(routinen//
"_1_setup", handle2)
179 para_env_out => fm_out%matrix_struct%para_env
183 nrow_local=nrow_local_out, &
184 ncol_local=ncol_local_out, &
185 row_indices=row_indices_out, &
186 col_indices=col_indices_out, &
187 nrow_block=nrow_block_out, &
188 ncol_block=ncol_block_out)
190 nprow = fm_out%matrix_struct%context%num_pe(1)
191 npcol = fm_out%matrix_struct%context%num_pe(2)
193 ALLOCATE (num_entries_rec(0:para_env_out%num_pe - 1))
194 ALLOCATE (num_entries_send(0:para_env_out%num_pe - 1))
196 num_entries_rec(:) = 0
197 num_entries_send(:) = 0
202 nrow_local=nrow_local_in, &
203 ncol_local=ncol_local_in, &
204 row_indices=row_indices_in, &
205 col_indices=col_indices_in, &
206 nrow_block=nrow_block_in, &
207 ncol_block=ncol_block_in)
209 IF (unit_nr > 0 .AND. mp2_env%ri_g0w0%bse_debug_print)
THEN
210 WRITE (unit_nr,
'(T2,A10,T13,A14,A10,T71,I10)')
'BSE|DEBUG|',
'Row number of ', fm_out%name, &
211 fm_out%matrix_struct%nrow_global
212 WRITE (unit_nr,
'(T2,A10,T13,A17,A10,T71,I10)')
'BSE|DEBUG|',
'Column number of ', fm_out%name, &
213 fm_out%matrix_struct%ncol_global
215 WRITE (unit_nr,
'(T2,A10,T13,A18,A10,T71,I10)')
'BSE|DEBUG|',
'Row block size of ', fm_out%name, nrow_block_out
216 WRITE (unit_nr,
'(T2,A10,T13,A21,A10,T71,I10)')
'BSE|DEBUG|',
'Column block size of ', fm_out%name, ncol_block_out
218 WRITE (unit_nr,
'(T2,A10,T13,A14,A10,T71,I10)')
'BSE|DEBUG|',
'Row number of ', fm_in%name, &
219 fm_in%matrix_struct%nrow_global
220 WRITE (unit_nr,
'(T2,A10,T13,A17,A10,T71,I10)')
'BSE|DEBUG|',
'Column number of ', fm_in%name, &
221 fm_in%matrix_struct%ncol_global
223 WRITE (unit_nr,
'(T2,A10,T13,A18,A10,T71,I10)')
'BSE|DEBUG|',
'Row block size of ', fm_in%name, nrow_block_in
224 WRITE (unit_nr,
'(T2,A10,T13,A21,A10,T71,I10)')
'BSE|DEBUG|',
'Column block size of ', fm_in%name, ncol_block_in
230 DO row_idx_loc = 1, nrow_local_in
231 indices_in(1) = (row_indices_in(row_idx_loc) - 1)/nrow_secidx_in + 1
232 indices_in(2) = mod(row_indices_in(row_idx_loc) - 1, nrow_secidx_in) + 1
233 DO col_idx_loc = 1, ncol_local_in
234 indices_in(3) = (col_indices_in(col_idx_loc) - 1)/ncol_secidx_in + 1
235 indices_in(4) = mod(col_indices_in(col_idx_loc) - 1, ncol_secidx_in) + 1
237 idx_row_out = indices_in(reordering(2)) + (indices_in(reordering(1)) - 1)*nrow_secidx_out
238 idx_col_out = indices_in(reordering(4)) + (indices_in(reordering(3)) - 1)*ncol_secidx_out
240 send_prow =
cp_fm_indxg2p(idx_row_out, nrow_block_out, dummy, &
241 fm_out%matrix_struct%first_p_pos(1), nprow)
243 send_pcol =
cp_fm_indxg2p(idx_col_out, ncol_block_out, dummy, &
244 fm_out%matrix_struct%first_p_pos(2), npcol)
246 proc_send = fm_out%matrix_struct%context%blacs2mpi(send_prow, send_pcol)
248 num_entries_send(proc_send) = num_entries_send(proc_send) + 1
253 CALL timestop(handle2)
255 CALL timeset(routinen//
"_2_comm_entry_nums", handle2)
256 IF (unit_nr > 0 .AND. mp2_env%ri_g0w0%bse_debug_print)
THEN
257 WRITE (unit_nr,
'(T2,A10,T13,A27)')
'BSE|DEBUG|',
'Communicating entry numbers'
260 CALL para_env_out%alltoall(num_entries_send, num_entries_rec, 1)
262 CALL timestop(handle2)
264 CALL timeset(routinen//
"_3_alloc_buffer", handle2)
265 IF (unit_nr > 0 .AND. mp2_env%ri_g0w0%bse_debug_print)
THEN
266 WRITE (unit_nr,
'(T2,A10,T13,A18)')
'BSE|DEBUG|',
'Allocating buffers'
270 ALLOCATE (buffer_rec(0:para_env_out%num_pe - 1))
271 ALLOCATE (buffer_send(0:para_env_out%num_pe - 1))
274 DO iproc = 0, para_env_out%num_pe - 1
276 ALLOCATE (buffer_rec(iproc)%msg(num_entries_rec(iproc)))
277 buffer_rec(iproc)%msg = 0.0_dp
281 DO iproc = 0, para_env_out%num_pe - 1
283 ALLOCATE (buffer_send(iproc)%msg(num_entries_send(iproc)))
284 buffer_send(iproc)%msg = 0.0_dp
288 DO iproc = 0, para_env_out%num_pe - 1
290 ALLOCATE (buffer_rec(iproc)%indx(num_entries_rec(iproc), 2))
291 buffer_rec(iproc)%indx = 0
295 DO iproc = 0, para_env_out%num_pe - 1
297 ALLOCATE (buffer_send(iproc)%indx(num_entries_send(iproc), 2))
298 buffer_send(iproc)%indx = 0
302 CALL timestop(handle2)
304 CALL timeset(routinen//
"_4_buf_from_fmin_"//fm_out%name, handle2)
305 IF (unit_nr > 0 .AND. mp2_env%ri_g0w0%bse_debug_print)
THEN
306 WRITE (unit_nr,
'(T2,A10,T13,A18,A10,A13)')
'BSE|DEBUG|',
'Writing data from ', fm_in%name,
' into buffers'
309 ALLOCATE (entry_counter(0:para_env_out%num_pe - 1))
313 DO row_idx_loc = 1, nrow_local_in
314 indices_in(1) = (row_indices_in(row_idx_loc) - 1)/nrow_secidx_in + 1
315 indices_in(2) = mod(row_indices_in(row_idx_loc) - 1, nrow_secidx_in) + 1
316 DO col_idx_loc = 1, ncol_local_in
317 indices_in(3) = (col_indices_in(col_idx_loc) - 1)/ncol_secidx_in + 1
318 indices_in(4) = mod(col_indices_in(col_idx_loc) - 1, ncol_secidx_in) + 1
320 idx_row_out = indices_in(reordering(2)) + (indices_in(reordering(1)) - 1)*nrow_secidx_out
321 idx_col_out = indices_in(reordering(4)) + (indices_in(reordering(3)) - 1)*ncol_secidx_out
323 send_prow =
cp_fm_indxg2p(idx_row_out, nrow_block_out, dummy, &
324 fm_out%matrix_struct%first_p_pos(1), nprow)
326 send_pcol =
cp_fm_indxg2p(idx_col_out, ncol_block_out, dummy, &
327 fm_out%matrix_struct%first_p_pos(2), npcol)
329 proc_send = fm_out%matrix_struct%context%blacs2mpi(send_prow, send_pcol)
330 entry_counter(proc_send) = entry_counter(proc_send) + 1
332 buffer_send(proc_send)%msg(entry_counter(proc_send)) = &
333 fm_in%local_data(row_idx_loc, col_idx_loc)
335 buffer_send(proc_send)%indx(entry_counter(proc_send), 1) = idx_row_out
336 buffer_send(proc_send)%indx(entry_counter(proc_send), 2) = idx_col_out
341 ALLOCATE (req_array(1:para_env_out%num_pe, 4))
343 CALL timestop(handle2)
345 CALL timeset(routinen//
"_5_comm_buffer", handle2)
346 IF (unit_nr > 0 .AND. mp2_env%ri_g0w0%bse_debug_print)
THEN
347 WRITE (unit_nr,
'(T2,A10,T13,A21)')
'BSE|DEBUG|',
'Communicating buffers'
351 CALL communicate_buffer(para_env_out, num_entries_rec, num_entries_send, buffer_rec, &
352 buffer_send, req_array)
354 CALL timestop(handle2)
356 CALL timeset(routinen//
"_6_buffer_to_fmout"//fm_out%name, handle2)
357 IF (unit_nr > 0 .AND. mp2_env%ri_g0w0%bse_debug_print)
THEN
358 WRITE (unit_nr,
'(T2,A10,T13,A24,A10)')
'BSE|DEBUG|',
'Writing from buffers to ', fm_out%name
362 nprocs = para_env_out%num_pe
368 DO iproc = 0, nprocs - 1
369 DO i_entry_rec = 1, num_entries_rec(iproc)
370 ii =
cp_fm_indxg2l(buffer_rec(iproc)%indx(i_entry_rec, 1), nrow_block_out, &
372 jj =
cp_fm_indxg2l(buffer_rec(iproc)%indx(i_entry_rec, 2), ncol_block_out, &
375 fm_out%local_data(ii, jj) = fm_out%local_data(ii, jj) + beta*buffer_rec(iproc)%msg(i_entry_rec)
380 CALL timestop(handle2)
382 CALL timeset(routinen//
"_7_cleanup", handle2)
383 IF (unit_nr > 0 .AND. mp2_env%ri_g0w0%bse_debug_print)
THEN
384 WRITE (unit_nr,
'(T2,A10,T13,A41)')
'BSE|DEBUG|',
'Starting cleanup of communication buffers'
388 DO iproc = 0, para_env_out%num_pe - 1
389 DEALLOCATE (buffer_rec(iproc)%msg)
390 DEALLOCATE (buffer_rec(iproc)%indx)
391 DEALLOCATE (buffer_send(iproc)%msg)
392 DEALLOCATE (buffer_send(iproc)%indx)
394 DEALLOCATE (buffer_rec, buffer_send)
395 DEALLOCATE (req_array)
396 DEALLOCATE (entry_counter)
397 DEALLOCATE (num_entries_rec, num_entries_send)
399 CALL timestop(handle2)
400 CALL timestop(handle)
420 nrow_out, ncol_out, unit_nr, mp2_env, &
421 nrow_offset, ncol_offset)
425 INTEGER :: ncol_in, nrow_out, ncol_out, unit_nr
426 TYPE(
mp2_type),
INTENT(INOUT) :: mp2_env
427 INTEGER,
INTENT(IN),
OPTIONAL :: nrow_offset, ncol_offset
429 CHARACTER(LEN=*),
PARAMETER :: routinen =
'truncate_fm'
431 INTEGER :: col_idx_loc, dummy, handle, handle2, i_entry_rec, idx_col_first, idx_col_in, &
432 idx_col_out, idx_col_sec, idx_row_in, ii, iproc, jj, ncol_block_in, ncol_block_out, &
433 ncol_local_in, ncol_local_out, npcol, nprocs, nprow, nrow_block_in, nrow_block_out, &
434 nrow_local_in, nrow_local_out, proc_send, row_idx_loc, send_pcol, send_prow
435 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: entry_counter, num_entries_rec, &
437 INTEGER,
DIMENSION(:),
POINTER :: col_indices_in, col_indices_out, &
438 row_indices_in, row_indices_out
439 LOGICAL :: correct_ncol, correct_nrow
441 DIMENSION(:) :: buffer_rec, buffer_send
445 CALL timeset(routinen, handle)
446 CALL timeset(routinen//
"_1_setup", handle2)
448 correct_nrow = .false.
449 correct_ncol = .false.
451 IF (
PRESENT(nrow_offset))
THEN
452 correct_nrow = .true.
454 IF (
PRESENT(ncol_offset))
THEN
455 correct_ncol = .true.
458 para_env_out => fm_out%matrix_struct%para_env
461 nrow_local=nrow_local_out, &
462 ncol_local=ncol_local_out, &
463 row_indices=row_indices_out, &
464 col_indices=col_indices_out, &
465 nrow_block=nrow_block_out, &
466 ncol_block=ncol_block_out)
468 nprow = fm_out%matrix_struct%context%num_pe(1)
469 npcol = fm_out%matrix_struct%context%num_pe(2)
471 ALLOCATE (num_entries_rec(0:para_env_out%num_pe - 1))
472 ALLOCATE (num_entries_send(0:para_env_out%num_pe - 1))
474 num_entries_rec(:) = 0
475 num_entries_send(:) = 0
480 nrow_local=nrow_local_in, &
481 ncol_local=ncol_local_in, &
482 row_indices=row_indices_in, &
483 col_indices=col_indices_in, &
484 nrow_block=nrow_block_in, &
485 ncol_block=ncol_block_in)
487 IF (unit_nr > 0 .AND. mp2_env%ri_g0w0%bse_debug_print)
THEN
488 WRITE (unit_nr,
'(T2,A10,T13,A14,A10,T71,I10)')
'BSE|DEBUG|',
'Row number of ', fm_out%name, &
489 fm_out%matrix_struct%nrow_global
490 WRITE (unit_nr,
'(T2,A10,T13,A17,A10,T71,I10)')
'BSE|DEBUG|',
'Column number of ', fm_out%name, &
491 fm_out%matrix_struct%ncol_global
493 WRITE (unit_nr,
'(T2,A10,T13,A18,A10,T71,I10)')
'BSE|DEBUG|',
'Row block size of ', fm_out%name, nrow_block_out
494 WRITE (unit_nr,
'(T2,A10,T13,A21,A10,T71,I10)')
'BSE|DEBUG|',
'Column block size of ', fm_out%name, ncol_block_out
496 WRITE (unit_nr,
'(T2,A10,T13,A14,A10,T71,I10)')
'BSE|DEBUG|',
'Row number of ', fm_in%name, &
497 fm_in%matrix_struct%nrow_global
498 WRITE (unit_nr,
'(T2,A10,T13,A17,A10,T71,I10)')
'BSE|DEBUG|',
'Column number of ', fm_in%name, &
499 fm_in%matrix_struct%ncol_global
501 WRITE (unit_nr,
'(T2,A10,T13,A18,A10,T71,I10)')
'BSE|DEBUG|',
'Row block size of ', fm_in%name, nrow_block_in
502 WRITE (unit_nr,
'(T2,A10,T13,A21,A10,T71,I10)')
'BSE|DEBUG|',
'Column block size of ', fm_in%name, ncol_block_in
506 DO col_idx_loc = 1, ncol_local_in
507 idx_col_in = col_indices_in(col_idx_loc)
509 idx_col_first = (idx_col_in - 1)/ncol_in + 1
510 idx_col_sec = mod(idx_col_in - 1, ncol_in) + 1
514 IF (correct_nrow)
THEN
515 idx_col_first = idx_col_first - nrow_offset + 1
516 IF (idx_col_first .LE. 0) cycle
518 IF (idx_col_first > nrow_out)
EXIT
520 IF (correct_ncol)
THEN
521 idx_col_sec = idx_col_sec - ncol_offset + 1
522 IF (idx_col_sec .LE. 0) cycle
524 IF (idx_col_sec > ncol_out) cycle
527 idx_col_out = idx_col_sec + (idx_col_first - 1)*ncol_out
529 DO row_idx_loc = 1, nrow_local_in
530 idx_row_in = row_indices_in(row_idx_loc)
532 send_prow =
cp_fm_indxg2p(idx_row_in, nrow_block_out, dummy, &
533 fm_out%matrix_struct%first_p_pos(1), nprow)
535 send_pcol =
cp_fm_indxg2p(idx_col_out, ncol_block_out, dummy, &
536 fm_out%matrix_struct%first_p_pos(2), npcol)
538 proc_send = fm_out%matrix_struct%context%blacs2mpi(send_prow, send_pcol)
540 num_entries_send(proc_send) = num_entries_send(proc_send) + 1
545 CALL timestop(handle2)
547 CALL timeset(routinen//
"_2_comm_entry_nums", handle2)
548 IF (unit_nr > 0 .AND. mp2_env%ri_g0w0%bse_debug_print)
THEN
549 WRITE (unit_nr,
'(T2,A10,T13,A27)')
'BSE|DEBUG|',
'Communicating entry numbers'
552 CALL para_env_out%alltoall(num_entries_send, num_entries_rec, 1)
554 CALL timestop(handle2)
556 CALL timeset(routinen//
"_3_alloc_buffer", handle2)
557 IF (unit_nr > 0 .AND. mp2_env%ri_g0w0%bse_debug_print)
THEN
558 WRITE (unit_nr,
'(T2,A10,T13,A18)')
'BSE|DEBUG|',
'Allocating buffers'
562 ALLOCATE (buffer_rec(0:para_env_out%num_pe - 1))
563 ALLOCATE (buffer_send(0:para_env_out%num_pe - 1))
566 DO iproc = 0, para_env_out%num_pe - 1
568 ALLOCATE (buffer_rec(iproc)%msg(num_entries_rec(iproc)))
569 buffer_rec(iproc)%msg = 0.0_dp
573 DO iproc = 0, para_env_out%num_pe - 1
575 ALLOCATE (buffer_send(iproc)%msg(num_entries_send(iproc)))
576 buffer_send(iproc)%msg = 0.0_dp
580 DO iproc = 0, para_env_out%num_pe - 1
582 ALLOCATE (buffer_rec(iproc)%indx(num_entries_rec(iproc), 2))
583 buffer_rec(iproc)%indx = 0
587 DO iproc = 0, para_env_out%num_pe - 1
589 ALLOCATE (buffer_send(iproc)%indx(num_entries_send(iproc), 2))
590 buffer_send(iproc)%indx = 0
594 CALL timestop(handle2)
596 CALL timeset(routinen//
"_4_buf_from_fmin_"//fm_out%name, handle2)
597 IF (unit_nr > 0 .AND. mp2_env%ri_g0w0%bse_debug_print)
THEN
598 WRITE (unit_nr,
'(T2,A10,T13,A18,A10,A13)')
'BSE|DEBUG|',
'Writing data from ', fm_in%name,
' into buffers'
601 ALLOCATE (entry_counter(0:para_env_out%num_pe - 1))
605 DO col_idx_loc = 1, ncol_local_in
606 idx_col_in = col_indices_in(col_idx_loc)
608 idx_col_first = (idx_col_in - 1)/ncol_in + 1
609 idx_col_sec = mod(idx_col_in - 1, ncol_in) + 1
613 IF (correct_nrow)
THEN
614 idx_col_first = idx_col_first - nrow_offset + 1
615 IF (idx_col_first .LE. 0) cycle
617 IF (idx_col_first > nrow_out)
EXIT
619 IF (correct_ncol)
THEN
620 idx_col_sec = idx_col_sec - ncol_offset + 1
621 IF (idx_col_sec .LE. 0) cycle
623 IF (idx_col_sec > ncol_out) cycle
626 idx_col_out = idx_col_sec + (idx_col_first - 1)*ncol_out
628 DO row_idx_loc = 1, nrow_local_in
629 idx_row_in = row_indices_in(row_idx_loc)
631 send_prow =
cp_fm_indxg2p(idx_row_in, nrow_block_out, dummy, &
632 fm_out%matrix_struct%first_p_pos(1), nprow)
634 send_pcol =
cp_fm_indxg2p(idx_col_out, ncol_block_out, dummy, &
635 fm_out%matrix_struct%first_p_pos(2), npcol)
637 proc_send = fm_out%matrix_struct%context%blacs2mpi(send_prow, send_pcol)
638 entry_counter(proc_send) = entry_counter(proc_send) + 1
640 buffer_send(proc_send)%msg(entry_counter(proc_send)) = &
641 fm_in%local_data(row_idx_loc, col_idx_loc)
644 buffer_send(proc_send)%indx(entry_counter(proc_send), 1) = idx_row_in
645 buffer_send(proc_send)%indx(entry_counter(proc_send), 2) = idx_col_out
650 ALLOCATE (req_array(1:para_env_out%num_pe, 4))
652 CALL timestop(handle2)
654 CALL timeset(routinen//
"_5_comm_buffer", handle2)
655 IF (unit_nr > 0 .AND. mp2_env%ri_g0w0%bse_debug_print)
THEN
656 WRITE (unit_nr,
'(T2,A10,T13,A21)')
'BSE|DEBUG|',
'Communicating buffers'
660 CALL communicate_buffer(para_env_out, num_entries_rec, num_entries_send, buffer_rec, &
661 buffer_send, req_array)
663 CALL timestop(handle2)
665 CALL timeset(routinen//
"_6_buffer_to_fmout"//fm_out%name, handle2)
666 IF (unit_nr > 0 .AND. mp2_env%ri_g0w0%bse_debug_print)
THEN
667 WRITE (unit_nr,
'(T2,A10,T13,A24,A10)')
'BSE|DEBUG|',
'Writing from buffers to ', fm_out%name
671 nprocs = para_env_out%num_pe
677 DO iproc = 0, nprocs - 1
678 DO i_entry_rec = 1, num_entries_rec(iproc)
679 ii =
cp_fm_indxg2l(buffer_rec(iproc)%indx(i_entry_rec, 1), nrow_block_out, &
681 jj =
cp_fm_indxg2l(buffer_rec(iproc)%indx(i_entry_rec, 2), ncol_block_out, &
684 fm_out%local_data(ii, jj) = fm_out%local_data(ii, jj) + buffer_rec(iproc)%msg(i_entry_rec)
689 CALL timestop(handle2)
691 CALL timeset(routinen//
"_7_cleanup", handle2)
692 IF (unit_nr > 0 .AND. mp2_env%ri_g0w0%bse_debug_print)
THEN
693 WRITE (unit_nr,
'(T2,A10,T13,A41)')
'BSE|DEBUG|',
'Starting cleanup of communication buffers'
697 DO iproc = 0, para_env_out%num_pe - 1
698 DEALLOCATE (buffer_rec(iproc)%msg)
699 DEALLOCATE (buffer_rec(iproc)%indx)
700 DEALLOCATE (buffer_send(iproc)%msg)
701 DEALLOCATE (buffer_send(iproc)%indx)
703 DEALLOCATE (buffer_rec, buffer_send)
704 DEALLOCATE (req_array)
705 DEALLOCATE (entry_counter)
706 DEALLOCATE (num_entries_rec, num_entries_send)
708 CALL timestop(handle2)
709 CALL timestop(handle)
931 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: idx_prim, idx_sec
932 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:) :: eigvec_entries
934 CHARACTER(LEN=*),
PARAMETER :: routinen =
'sort_excitations'
936 INTEGER :: handle, ii, kk, num_entries, num_mults
937 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: idx_prim_work, idx_sec_work, tmp_index
938 LOGICAL :: unique_entries
939 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:) :: eigvec_entries_work
941 CALL timeset(routinen, handle)
943 num_entries =
SIZE(idx_prim)
945 ALLOCATE (tmp_index(num_entries))
947 CALL sort(idx_prim, num_entries, tmp_index)
949 ALLOCATE (idx_sec_work(num_entries))
950 ALLOCATE (eigvec_entries_work(num_entries))
952 DO ii = 1, num_entries
953 idx_sec_work(ii) = idx_sec(tmp_index(ii))
954 eigvec_entries_work(ii) = eigvec_entries(tmp_index(ii))
957 DEALLOCATE (tmp_index)
959 DEALLOCATE (eigvec_entries)
961 CALL move_alloc(idx_sec_work, idx_sec)
962 CALL move_alloc(eigvec_entries_work, eigvec_entries)
966 IF (.NOT. unique_entries)
THEN
967 ALLOCATE (idx_prim_work(num_entries))
968 idx_prim_work(:) = idx_prim(:)
970 DO ii = 1, num_entries
971 IF (idx_prim_work(ii) == 0) cycle
972 num_mults = count(idx_prim_work == idx_prim_work(ii))
973 IF (num_mults > 1)
THEN
975 idx_prim_work(ii:ii + num_mults - 1) = 0
977 ALLOCATE (idx_sec_work(num_mults))
978 ALLOCATE (eigvec_entries_work(num_mults))
979 idx_sec_work(:) = idx_sec(ii:ii + num_mults - 1)
980 eigvec_entries_work(:) = eigvec_entries(ii:ii + num_mults - 1)
981 ALLOCATE (tmp_index(num_mults))
982 CALL sort(idx_sec_work, num_mults, tmp_index)
985 DO kk = ii, ii + num_mults - 1
986 idx_sec(kk) = idx_sec_work(kk - ii + 1)
987 eigvec_entries(kk) = eigvec_entries_work(tmp_index(kk - ii + 1))
990 DEALLOCATE (tmp_index)
991 DEALLOCATE (idx_sec_work)
992 DEALLOCATE (eigvec_entries_work)
994 idx_prim_work(ii) = idx_prim(ii)
996 DEALLOCATE (idx_prim_work)
999 CALL timestop(handle)
1064 i_exc, virtual, num_entries, mp2_env)
1067 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: idx_homo, idx_virt
1068 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:) :: eigvec_entries
1069 INTEGER :: i_exc, virtual, num_entries
1070 TYPE(
mp2_type),
INTENT(INOUT) :: mp2_env
1072 CHARACTER(LEN=*),
PARAMETER :: routinen =
'filter_eigvec_contrib'
1074 INTEGER :: eigvec_idx, handle, ii, iproc, jj, kk, &
1075 ncol_local, nrow_local, &
1077 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: num_entries_to_comm
1078 INTEGER,
DIMENSION(:),
POINTER :: col_indices, row_indices
1079 REAL(kind=
dp) :: eigvec_entry
1081 DIMENSION(:) :: buffer_entries
1084 CALL timeset(routinen, handle)
1086 para_env => fm_eigvec%matrix_struct%para_env
1089 nrow_local=nrow_local, &
1090 ncol_local=ncol_local, &
1091 row_indices=row_indices, &
1092 col_indices=col_indices)
1094 ALLOCATE (num_entries_to_comm(0:para_env%num_pe - 1))
1095 num_entries_to_comm(:) = 0
1097 DO jj = 1, ncol_local
1099 IF (col_indices(jj) /= i_exc)
THEN
1102 DO ii = 1, nrow_local
1103 eigvec_idx = row_indices(ii)
1104 eigvec_entry = fm_eigvec%local_data(ii, jj)/sqrt(2.0_dp)
1105 IF (abs(eigvec_entry) > mp2_env%ri_g0w0%eps_x)
THEN
1106 num_entries_to_comm(para_env%mepos) = num_entries_to_comm(para_env%mepos) + 1
1112 CALL para_env%sum(num_entries_to_comm)
1114 num_entries_local = num_entries_to_comm(para_env%mepos)
1116 ALLOCATE (buffer_entries(0:para_env%num_pe - 1))
1118 DO iproc = 0, para_env%num_pe - 1
1119 ALLOCATE (buffer_entries(iproc)%msg(num_entries_to_comm(iproc)))
1120 ALLOCATE (buffer_entries(iproc)%indx(num_entries_to_comm(iproc), 2))
1121 buffer_entries(iproc)%msg = 0.0_dp
1122 buffer_entries(iproc)%indx = 0
1126 DO jj = 1, ncol_local
1128 IF (col_indices(jj) /= i_exc)
THEN
1131 DO ii = 1, nrow_local
1132 eigvec_idx = row_indices(ii)
1133 eigvec_entry = fm_eigvec%local_data(ii, jj)/sqrt(2.0_dp)
1134 IF (abs(eigvec_entry) > mp2_env%ri_g0w0%eps_x)
THEN
1135 buffer_entries(para_env%mepos)%indx(kk, 1) = (eigvec_idx - 1)/virtual + 1
1136 buffer_entries(para_env%mepos)%indx(kk, 2) = mod(eigvec_idx - 1, virtual) + 1
1137 buffer_entries(para_env%mepos)%msg(kk) = eigvec_entry
1143 DO iproc = 0, para_env%num_pe - 1
1144 CALL para_env%sum(buffer_entries(iproc)%msg)
1145 CALL para_env%sum(buffer_entries(iproc)%indx)
1149 num_entries = sum(num_entries_to_comm)
1150 ALLOCATE (idx_homo(num_entries))
1151 ALLOCATE (idx_virt(num_entries))
1152 ALLOCATE (eigvec_entries(num_entries))
1155 DO iproc = 0, para_env%num_pe - 1
1156 IF (num_entries_to_comm(iproc) /= 0)
THEN
1157 DO ii = 1, num_entries_to_comm(iproc)
1158 idx_homo(kk) = buffer_entries(iproc)%indx(ii, 1)
1159 idx_virt(kk) = buffer_entries(iproc)%indx(ii, 2)
1160 eigvec_entries(kk) = buffer_entries(iproc)%msg(ii)
1167 DO iproc = 0, para_env%num_pe - 1
1168 DEALLOCATE (buffer_entries(iproc)%msg)
1169 DEALLOCATE (buffer_entries(iproc)%indx)
1171 DEALLOCATE (buffer_entries)
1172 DEALLOCATE (num_entries_to_comm)
1173 NULLIFY (row_indices)
1174 NULLIFY (col_indices)
1180 CALL timestop(handle)
1203 fm_mat_S_trunc, fm_mat_S_ij_trunc, fm_mat_S_ab_trunc, &
1204 Eigenval, Eigenval_reduced, &
1205 homo, virtual, dimen_RI, unit_nr, &
1206 homo_red, virt_red, &
1209 TYPE(
cp_fm_type),
INTENT(IN) :: fm_mat_s_ia_bse, fm_mat_s_ij_bse, &
1211 TYPE(
cp_fm_type),
INTENT(INOUT) :: fm_mat_s_trunc, fm_mat_s_ij_trunc, &
1213 REAL(kind=
dp),
DIMENSION(:) :: eigenval
1214 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:) :: eigenval_reduced
1215 INTEGER,
INTENT(IN) :: homo, virtual, dimen_ri, unit_nr
1216 INTEGER,
INTENT(OUT) :: homo_red, virt_red
1217 TYPE(
mp2_type),
INTENT(INOUT) :: mp2_env
1219 CHARACTER(LEN=*),
PARAMETER :: routinen =
'truncate_BSE_matrices'
1221 INTEGER :: handle, homo_incl, i_homo, j_virt, &
1227 CALL timeset(routinen, handle)
1231 IF (mp2_env%ri_g0w0%bse_cutoff_occ > 0 .OR. mp2_env%ri_g0w0%bse_cutoff_virt > 0)
THEN
1232 IF (-mp2_env%ri_g0w0%bse_cutoff_occ .LT. eigenval(1) - eigenval(homo) &
1233 .OR. mp2_env%ri_g0w0%bse_cutoff_occ < 0)
THEN
1239 IF (eigenval(i_homo) - eigenval(homo) .GT. -mp2_env%ri_g0w0%bse_cutoff_occ)
THEN
1244 homo_red = homo - homo_incl + 1
1247 IF (mp2_env%ri_g0w0%bse_cutoff_virt .GT. eigenval(homo + virtual) - eigenval(homo + 1) &
1248 .OR. mp2_env%ri_g0w0%bse_cutoff_virt < 0)
THEN
1252 virt_incl = homo + 1
1253 DO j_virt = 1, virtual
1254 IF (eigenval(homo + j_virt) - eigenval(homo + 1) .GT. mp2_env%ri_g0w0%bse_cutoff_virt)
THEN
1255 virt_incl = j_virt - 1
1259 virt_red = virt_incl
1267 IF (unit_nr > 0)
THEN
1268 IF (mp2_env%ri_g0w0%bse_cutoff_occ > 0)
THEN
1269 WRITE (unit_nr,
'(T2,A4,T7,A29,T71,F10.3)')
'BSE|',
'Cutoff occupied orbitals [eV]', &
1270 mp2_env%ri_g0w0%bse_cutoff_occ*
evolt
1272 WRITE (unit_nr,
'(T2,A4,T7,A37)')
'BSE|',
'No cutoff given for occupied orbitals'
1274 IF (mp2_env%ri_g0w0%bse_cutoff_virt > 0)
THEN
1275 WRITE (unit_nr,
'(T2,A4,T7,A28,T71,F10.3)')
'BSE|',
'Cutoff virtual orbitals [eV]', &
1276 mp2_env%ri_g0w0%bse_cutoff_virt*
evolt
1278 WRITE (unit_nr,
'(T2,A4,T7,A36)')
'BSE|',
'No cutoff given for virtual orbitals'
1280 WRITE (unit_nr,
'(T2,A4,T7,A20,T71,I10)')
'BSE|',
'First occupied index', homo_incl
1281 WRITE (unit_nr,
'(T2,A4,T7,A34,T71,I10)')
'BSE|',
'Last virtual index (not MO index!)', virt_incl
1282 WRITE (unit_nr,
'(T2,A4,T7,A35,T71,F10.3)')
'BSE|',
'Energy of first occupied index [eV]', eigenval(homo_incl)*
evolt
1283 WRITE (unit_nr,
'(T2,A4,T7,A33,T71,F10.3)')
'BSE|',
'Energy of last virtual index [eV]', eigenval(homo + virt_incl)*
evolt
1284 WRITE (unit_nr,
'(T2,A4,T7,A54,T71,F10.3)')
'BSE|',
'Energy difference of first occupied index to HOMO [eV]', &
1285 -(eigenval(homo_incl) - eigenval(homo))*
evolt
1286 WRITE (unit_nr,
'(T2,A4,T7,A52,T71,F10.3)')
'BSE|',
'Energy difference of last virtual index to LUMO [eV]', &
1287 (eigenval(homo + virt_incl) - eigenval(homo + 1))*
evolt
1288 WRITE (unit_nr,
'(T2,A4,T7,A35,T71,I10)')
'BSE|',
'Number of GW-corrected occupied MOs', mp2_env%ri_g0w0%corr_mos_occ
1289 WRITE (unit_nr,
'(T2,A4,T7,A34,T71,I10)')
'BSE|',
'Number of GW-corrected virtual MOs', mp2_env%ri_g0w0%corr_mos_virt
1290 WRITE (unit_nr,
'(T2,A4)')
'BSE|'
1292 IF (unit_nr > 0)
THEN
1293 IF (homo - homo_incl + 1 > mp2_env%ri_g0w0%corr_mos_occ)
THEN
1294 cpabort(
"Number of GW-corrected occupied MOs too small for chosen BSE cutoff")
1296 IF (virt_incl > mp2_env%ri_g0w0%corr_mos_virt)
THEN
1297 cpabort(
"Number of GW-corrected virtual MOs too small for chosen BSE cutoff")
1302 para_env => fm_mat_s_ia_bse%matrix_struct%para_env
1303 context => fm_mat_s_ia_bse%matrix_struct%context
1309 CALL cp_fm_create(fm_mat_s_trunc, fm_struct_ia,
"fm_S_trunc")
1310 CALL cp_fm_create(fm_mat_s_ij_trunc, fm_struct_ij,
"fm_S_ij_trunc")
1311 CALL cp_fm_create(fm_mat_s_ab_trunc, fm_struct_ab,
"fm_S_ab_trunc")
1314 IF (mp2_env%ri_g0w0%bse_cutoff_occ > 0 .OR. mp2_env%ri_g0w0%bse_cutoff_virt > 0)
THEN
1316 ALLOCATE (eigenval_reduced(homo_red + virt_red))
1317 eigenval_reduced(:) = eigenval(homo_incl:homo + virt_incl)
1319 CALL truncate_fm(fm_mat_s_trunc, fm_mat_s_ia_bse, virtual, &
1320 homo_red, virt_red, unit_nr, mp2_env, &
1321 nrow_offset=homo_incl)
1322 CALL truncate_fm(fm_mat_s_ij_trunc, fm_mat_s_ij_bse, homo, &
1323 homo_red, homo_red, unit_nr, mp2_env, &
1324 homo_incl, homo_incl)
1325 CALL truncate_fm(fm_mat_s_ab_trunc, fm_mat_s_ab_bse, virtual, &
1326 virt_red, virt_red, unit_nr, mp2_env)
1329 IF (unit_nr > 0)
THEN
1330 WRITE (unit_nr,
'(T2,A4,T7,A37)')
'BSE|',
'No truncation of BSE matrices applied'
1331 WRITE (unit_nr,
'(T2,A4)')
'BSE|'
1333 ALLOCATE (eigenval_reduced(homo_red + virt_red))
1334 eigenval_reduced(:) = eigenval(:)
1336 1, 1, 1, 1, context)
1338 1, 1, 1, 1, context)
1340 1, 1, 1, 1, context)
1350 CALL timestop(handle)