(git:374b731)
Loading...
Searching...
No Matches
bse_util.F
Go to the documentation of this file.
1!--------------------------------------------------------------------------------------------------!
2! CP2K: A general program to perform molecular dynamics simulations !
3! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4! !
5! SPDX-License-Identifier: GPL-2.0-or-later !
6!--------------------------------------------------------------------------------------------------!
7
8! **************************************************************************************************
9!> \brief Auxiliary routines for GW + Bethe-Salpeter for computing electronic excitations
10!> \par History
11!> 11.2023 created [Maximilian Graml]
12! **************************************************************************************************
21 USE cp_fm_types, ONLY: cp_fm_create,&
29 USE kinds, ONLY: dp
35 USE physcon, ONLY: evolt
37 USE util, ONLY: sort,&
39#include "./base/base_uses.f90"
40
41 IMPLICIT NONE
42
43 PRIVATE
44
45 CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'bse_util'
46
50
51CONTAINS
52
53! **************************************************************************************************
54!> \brief Multiplies B-matrix (RI-3c-Integrals) with W (screening) to obtain \bar{B}
55!> \param fm_mat_S_ij_bse ...
56!> \param fm_mat_S_ia_bse ...
57!> \param fm_mat_S_bar_ia_bse ...
58!> \param fm_mat_S_bar_ij_bse ...
59!> \param fm_mat_Q_static_bse_gemm ...
60!> \param dimen_RI ...
61!> \param homo ...
62!> \param virtual ...
63! **************************************************************************************************
64 SUBROUTINE mult_b_with_w(fm_mat_S_ij_bse, fm_mat_S_ia_bse, fm_mat_S_bar_ia_bse, &
65 fm_mat_S_bar_ij_bse, fm_mat_Q_static_bse_gemm, &
66 dimen_RI, homo, virtual)
67
68 TYPE(cp_fm_type), INTENT(IN) :: fm_mat_s_ij_bse, fm_mat_s_ia_bse
69 TYPE(cp_fm_type), INTENT(OUT) :: fm_mat_s_bar_ia_bse, fm_mat_s_bar_ij_bse
70 TYPE(cp_fm_type), INTENT(IN) :: fm_mat_q_static_bse_gemm
71 INTEGER, INTENT(IN) :: dimen_ri, homo, virtual
72
73 CHARACTER(LEN=*), PARAMETER :: routinen = 'mult_B_with_W'
74
75 INTEGER :: handle, i_global, iib, info_chol, &
76 j_global, jjb, ncol_local, nrow_local
77 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
78 TYPE(cp_fm_type) :: fm_work
79
80 CALL timeset(routinen, handle)
81
82 CALL cp_fm_create(fm_mat_s_bar_ia_bse, fm_mat_s_ia_bse%matrix_struct)
83 CALL cp_fm_set_all(fm_mat_s_bar_ia_bse, 0.0_dp)
84
85 CALL cp_fm_create(fm_mat_s_bar_ij_bse, fm_mat_s_ij_bse%matrix_struct)
86 CALL cp_fm_set_all(fm_mat_s_bar_ij_bse, 0.0_dp)
87
88 CALL cp_fm_create(fm_work, fm_mat_q_static_bse_gemm%matrix_struct)
89 CALL cp_fm_set_all(fm_work, 0.0_dp)
90
91 ! get info of fm_mat_Q_static_bse and compute ((1+Q(0))^-1-1)
92 CALL cp_fm_get_info(matrix=fm_mat_q_static_bse_gemm, &
93 nrow_local=nrow_local, &
94 ncol_local=ncol_local, &
95 row_indices=row_indices, &
96 col_indices=col_indices)
97
98 DO jjb = 1, ncol_local
99 j_global = col_indices(jjb)
100 DO iib = 1, nrow_local
101 i_global = row_indices(iib)
102 IF (j_global == i_global .AND. i_global <= dimen_ri) THEN
103 fm_mat_q_static_bse_gemm%local_data(iib, jjb) = fm_mat_q_static_bse_gemm%local_data(iib, jjb) + 1.0_dp
104 END IF
105 END DO
106 END DO
107
108 ! calculate Trace(Log(Matrix)) as Log(DET(Matrix)) via cholesky decomposition
109 CALL cp_fm_cholesky_decompose(matrix=fm_mat_q_static_bse_gemm, n=dimen_ri, info_out=info_chol)
110
111 cpassert(info_chol == 0)
112
113 ! calculate [1+Q(i0)]^-1
114 CALL cp_fm_cholesky_invert(fm_mat_q_static_bse_gemm)
115
116 ! symmetrize the result
117 CALL cp_fm_upper_to_full(fm_mat_q_static_bse_gemm, fm_work)
118
119 CALL parallel_gemm(transa="N", transb="N", m=dimen_ri, n=homo**2, k=dimen_ri, alpha=1.0_dp, &
120 matrix_a=fm_mat_q_static_bse_gemm, matrix_b=fm_mat_s_ij_bse, beta=0.0_dp, &
121 matrix_c=fm_mat_s_bar_ij_bse)
122
123 ! fm_mat_S_bar_ia_bse has a different blacs_env as fm_mat_S_ij_bse since we take
124 ! fm_mat_S_ia_bse from RPA. Therefore, we also need a different fm_mat_Q_static_bse_gemm
125 CALL parallel_gemm(transa="N", transb="N", m=dimen_ri, n=homo*virtual, k=dimen_ri, alpha=1.0_dp, &
126 matrix_a=fm_mat_q_static_bse_gemm, matrix_b=fm_mat_s_ia_bse, beta=0.0_dp, &
127 matrix_c=fm_mat_s_bar_ia_bse)
128
129 CALL cp_fm_release(fm_work)
130
131 CALL timestop(handle)
132
133 END SUBROUTINE
134
135! **************************************************************************************************
136!> \brief Adds and reorders full matrices with a combined index structure, e.g. adding W_ij,ab
137!> to A_ia, which needs MPI communication.
138!> \param fm_out ...
139!> \param fm_in ...
140!> \param beta ...
141!> \param nrow_secidx_in ...
142!> \param ncol_secidx_in ...
143!> \param nrow_secidx_out ...
144!> \param ncol_secidx_out ...
145!> \param unit_nr ...
146!> \param reordering ...
147!> \param mp2_env ...
148! **************************************************************************************************
149 SUBROUTINE fm_general_add_bse(fm_out, fm_in, beta, nrow_secidx_in, ncol_secidx_in, &
150 nrow_secidx_out, ncol_secidx_out, unit_nr, reordering, mp2_env)
151
152 TYPE(cp_fm_type), INTENT(INOUT) :: fm_out, fm_in
153 REAL(kind=dp) :: beta
154 INTEGER, INTENT(IN) :: nrow_secidx_in, ncol_secidx_in, &
155 nrow_secidx_out, ncol_secidx_out
156 INTEGER :: unit_nr
157 INTEGER, DIMENSION(4) :: reordering
158 TYPE(mp2_type), INTENT(INOUT) :: mp2_env
159
160 CHARACTER(LEN=*), PARAMETER :: routinen = 'fm_general_add_bse'
161
162 INTEGER :: col_idx_loc, dummy, handle, handle2, i_entry_rec, idx_col_out, idx_row_out, ii, &
163 iproc, jj, ncol_block_in, ncol_block_out, ncol_local_in, ncol_local_out, npcol, nprocs, &
164 nprow, nrow_block_in, nrow_block_out, nrow_local_in, nrow_local_out, proc_send, &
165 row_idx_loc, send_pcol, send_prow
166 INTEGER, ALLOCATABLE, DIMENSION(:) :: entry_counter, num_entries_rec, &
167 num_entries_send
168 INTEGER, DIMENSION(4) :: indices_in
169 INTEGER, DIMENSION(:), POINTER :: col_indices_in, col_indices_out, &
170 row_indices_in, row_indices_out
171 TYPE(integ_mat_buffer_type), ALLOCATABLE, &
172 DIMENSION(:) :: buffer_rec, buffer_send
173 TYPE(mp_para_env_type), POINTER :: para_env_out
174 TYPE(mp_request_type), DIMENSION(:, :), POINTER :: req_array
175
176 CALL timeset(routinen, handle)
177 CALL timeset(routinen//"_1_setup", handle2)
178
179 para_env_out => fm_out%matrix_struct%para_env
180 ! A_iajb
181 ! We start by moving data from local parts of W_ijab to the full matrix A_iajb using buffers
182 CALL cp_fm_get_info(matrix=fm_out, &
183 nrow_local=nrow_local_out, &
184 ncol_local=ncol_local_out, &
185 row_indices=row_indices_out, &
186 col_indices=col_indices_out, &
187 nrow_block=nrow_block_out, &
188 ncol_block=ncol_block_out)
189
190 nprow = fm_out%matrix_struct%context%num_pe(1)
191 npcol = fm_out%matrix_struct%context%num_pe(2)
192
193 ALLOCATE (num_entries_rec(0:para_env_out%num_pe - 1))
194 ALLOCATE (num_entries_send(0:para_env_out%num_pe - 1))
195
196 num_entries_rec(:) = 0
197 num_entries_send(:) = 0
198
199 dummy = 0
200
201 CALL cp_fm_get_info(matrix=fm_in, &
202 nrow_local=nrow_local_in, &
203 ncol_local=ncol_local_in, &
204 row_indices=row_indices_in, &
205 col_indices=col_indices_in, &
206 nrow_block=nrow_block_in, &
207 ncol_block=ncol_block_in)
208
209 IF (unit_nr > 0 .AND. mp2_env%ri_g0w0%bse_debug_print) THEN
210 WRITE (unit_nr, '(T2,A10,T13,A14,A10,T71,I10)') 'BSE|DEBUG|', 'Row number of ', fm_out%name, &
211 fm_out%matrix_struct%nrow_global
212 WRITE (unit_nr, '(T2,A10,T13,A17,A10,T71,I10)') 'BSE|DEBUG|', 'Column number of ', fm_out%name, &
213 fm_out%matrix_struct%ncol_global
214
215 WRITE (unit_nr, '(T2,A10,T13,A18,A10,T71,I10)') 'BSE|DEBUG|', 'Row block size of ', fm_out%name, nrow_block_out
216 WRITE (unit_nr, '(T2,A10,T13,A21,A10,T71,I10)') 'BSE|DEBUG|', 'Column block size of ', fm_out%name, ncol_block_out
217
218 WRITE (unit_nr, '(T2,A10,T13,A14,A10,T71,I10)') 'BSE|DEBUG|', 'Row number of ', fm_in%name, &
219 fm_in%matrix_struct%nrow_global
220 WRITE (unit_nr, '(T2,A10,T13,A17,A10,T71,I10)') 'BSE|DEBUG|', 'Column number of ', fm_in%name, &
221 fm_in%matrix_struct%ncol_global
222
223 WRITE (unit_nr, '(T2,A10,T13,A18,A10,T71,I10)') 'BSE|DEBUG|', 'Row block size of ', fm_in%name, nrow_block_in
224 WRITE (unit_nr, '(T2,A10,T13,A21,A10,T71,I10)') 'BSE|DEBUG|', 'Column block size of ', fm_in%name, ncol_block_in
225 END IF
226
227 ! Use scalapack wrapper to find process index in fm_out
228 ! To that end, we obtain the global index in fm_out from the level indices
229 indices_in(:) = 0
230 DO row_idx_loc = 1, nrow_local_in
231 indices_in(1) = (row_indices_in(row_idx_loc) - 1)/nrow_secidx_in + 1
232 indices_in(2) = mod(row_indices_in(row_idx_loc) - 1, nrow_secidx_in) + 1
233 DO col_idx_loc = 1, ncol_local_in
234 indices_in(3) = (col_indices_in(col_idx_loc) - 1)/ncol_secidx_in + 1
235 indices_in(4) = mod(col_indices_in(col_idx_loc) - 1, ncol_secidx_in) + 1
236
237 idx_row_out = indices_in(reordering(2)) + (indices_in(reordering(1)) - 1)*nrow_secidx_out
238 idx_col_out = indices_in(reordering(4)) + (indices_in(reordering(3)) - 1)*ncol_secidx_out
239
240 send_prow = cp_fm_indxg2p(idx_row_out, nrow_block_out, dummy, &
241 fm_out%matrix_struct%first_p_pos(1), nprow)
242
243 send_pcol = cp_fm_indxg2p(idx_col_out, ncol_block_out, dummy, &
244 fm_out%matrix_struct%first_p_pos(2), npcol)
245
246 proc_send = fm_out%matrix_struct%context%blacs2mpi(send_prow, send_pcol)
247
248 num_entries_send(proc_send) = num_entries_send(proc_send) + 1
249
250 END DO
251 END DO
252
253 CALL timestop(handle2)
254
255 CALL timeset(routinen//"_2_comm_entry_nums", handle2)
256 IF (unit_nr > 0 .AND. mp2_env%ri_g0w0%bse_debug_print) THEN
257 WRITE (unit_nr, '(T2,A10,T13,A27)') 'BSE|DEBUG|', 'Communicating entry numbers'
258 END IF
259
260 CALL para_env_out%alltoall(num_entries_send, num_entries_rec, 1)
261
262 CALL timestop(handle2)
263
264 CALL timeset(routinen//"_3_alloc_buffer", handle2)
265 IF (unit_nr > 0 .AND. mp2_env%ri_g0w0%bse_debug_print) THEN
266 WRITE (unit_nr, '(T2,A10,T13,A18)') 'BSE|DEBUG|', 'Allocating buffers'
267 END IF
268
269 ! Buffers for entries and their indices
270 ALLOCATE (buffer_rec(0:para_env_out%num_pe - 1))
271 ALLOCATE (buffer_send(0:para_env_out%num_pe - 1))
272
273 ! allocate data message and corresponding indices
274 DO iproc = 0, para_env_out%num_pe - 1
275
276 ALLOCATE (buffer_rec(iproc)%msg(num_entries_rec(iproc)))
277 buffer_rec(iproc)%msg = 0.0_dp
278
279 END DO
280
281 DO iproc = 0, para_env_out%num_pe - 1
282
283 ALLOCATE (buffer_send(iproc)%msg(num_entries_send(iproc)))
284 buffer_send(iproc)%msg = 0.0_dp
285
286 END DO
287
288 DO iproc = 0, para_env_out%num_pe - 1
289
290 ALLOCATE (buffer_rec(iproc)%indx(num_entries_rec(iproc), 2))
291 buffer_rec(iproc)%indx = 0
292
293 END DO
294
295 DO iproc = 0, para_env_out%num_pe - 1
296
297 ALLOCATE (buffer_send(iproc)%indx(num_entries_send(iproc), 2))
298 buffer_send(iproc)%indx = 0
299
300 END DO
301
302 CALL timestop(handle2)
303
304 CALL timeset(routinen//"_4_buf_from_fmin_"//fm_out%name, handle2)
305 IF (unit_nr > 0 .AND. mp2_env%ri_g0w0%bse_debug_print) THEN
306 WRITE (unit_nr, '(T2,A10,T13,A18,A10,A13)') 'BSE|DEBUG|', 'Writing data from ', fm_in%name, ' into buffers'
307 END IF
308
309 ALLOCATE (entry_counter(0:para_env_out%num_pe - 1))
310 entry_counter(:) = 0
311
312 ! Now we can write the actual data and indices to the send-buffer
313 DO row_idx_loc = 1, nrow_local_in
314 indices_in(1) = (row_indices_in(row_idx_loc) - 1)/nrow_secidx_in + 1
315 indices_in(2) = mod(row_indices_in(row_idx_loc) - 1, nrow_secidx_in) + 1
316 DO col_idx_loc = 1, ncol_local_in
317 indices_in(3) = (col_indices_in(col_idx_loc) - 1)/ncol_secidx_in + 1
318 indices_in(4) = mod(col_indices_in(col_idx_loc) - 1, ncol_secidx_in) + 1
319
320 idx_row_out = indices_in(reordering(2)) + (indices_in(reordering(1)) - 1)*nrow_secidx_out
321 idx_col_out = indices_in(reordering(4)) + (indices_in(reordering(3)) - 1)*ncol_secidx_out
322
323 send_prow = cp_fm_indxg2p(idx_row_out, nrow_block_out, dummy, &
324 fm_out%matrix_struct%first_p_pos(1), nprow)
325
326 send_pcol = cp_fm_indxg2p(idx_col_out, ncol_block_out, dummy, &
327 fm_out%matrix_struct%first_p_pos(2), npcol)
328
329 proc_send = fm_out%matrix_struct%context%blacs2mpi(send_prow, send_pcol)
330 entry_counter(proc_send) = entry_counter(proc_send) + 1
331
332 buffer_send(proc_send)%msg(entry_counter(proc_send)) = &
333 fm_in%local_data(row_idx_loc, col_idx_loc)
334
335 buffer_send(proc_send)%indx(entry_counter(proc_send), 1) = idx_row_out
336 buffer_send(proc_send)%indx(entry_counter(proc_send), 2) = idx_col_out
337
338 END DO
339 END DO
340
341 ALLOCATE (req_array(1:para_env_out%num_pe, 4))
342
343 CALL timestop(handle2)
344
345 CALL timeset(routinen//"_5_comm_buffer", handle2)
346 IF (unit_nr > 0 .AND. mp2_env%ri_g0w0%bse_debug_print) THEN
347 WRITE (unit_nr, '(T2,A10,T13,A21)') 'BSE|DEBUG|', 'Communicating buffers'
348 END IF
349
350 ! communicate the buffer
351 CALL communicate_buffer(para_env_out, num_entries_rec, num_entries_send, buffer_rec, &
352 buffer_send, req_array)
353
354 CALL timestop(handle2)
355
356 CALL timeset(routinen//"_6_buffer_to_fmout"//fm_out%name, handle2)
357 IF (unit_nr > 0 .AND. mp2_env%ri_g0w0%bse_debug_print) THEN
358 WRITE (unit_nr, '(T2,A10,T13,A24,A10)') 'BSE|DEBUG|', 'Writing from buffers to ', fm_out%name
359 END IF
360
361 ! fill fm_out with the entries from buffer_rec, i.e. buffer_rec are parts of fm_in
362 nprocs = para_env_out%num_pe
363
364!$OMP PARALLEL DO DEFAULT(NONE) &
365!$OMP SHARED(fm_out, nprocs, nrow_block_out, ncol_block_out, &
366!$OMP num_entries_rec, buffer_rec, beta, dummy, nprow, npcol) &
367!$OMP PRIVATE(iproc, i_entry_rec, ii, jj)
368 DO iproc = 0, nprocs - 1
369 DO i_entry_rec = 1, num_entries_rec(iproc)
370 ii = cp_fm_indxg2l(buffer_rec(iproc)%indx(i_entry_rec, 1), nrow_block_out, &
371 dummy, dummy, nprow)
372 jj = cp_fm_indxg2l(buffer_rec(iproc)%indx(i_entry_rec, 2), ncol_block_out, &
373 dummy, dummy, npcol)
374
375 fm_out%local_data(ii, jj) = fm_out%local_data(ii, jj) + beta*buffer_rec(iproc)%msg(i_entry_rec)
376 END DO
377 END DO
378!$OMP END PARALLEL DO
379
380 CALL timestop(handle2)
381
382 CALL timeset(routinen//"_7_cleanup", handle2)
383 IF (unit_nr > 0 .AND. mp2_env%ri_g0w0%bse_debug_print) THEN
384 WRITE (unit_nr, '(T2,A10,T13,A41)') 'BSE|DEBUG|', 'Starting cleanup of communication buffers'
385 END IF
386
387 !Clean up all the arrays from the communication process
388 DO iproc = 0, para_env_out%num_pe - 1
389 DEALLOCATE (buffer_rec(iproc)%msg)
390 DEALLOCATE (buffer_rec(iproc)%indx)
391 DEALLOCATE (buffer_send(iproc)%msg)
392 DEALLOCATE (buffer_send(iproc)%indx)
393 END DO
394 DEALLOCATE (buffer_rec, buffer_send)
395 DEALLOCATE (req_array)
396 DEALLOCATE (entry_counter)
397 DEALLOCATE (num_entries_rec, num_entries_send)
398
399 CALL timestop(handle2)
400 CALL timestop(handle)
401
402 END SUBROUTINE fm_general_add_bse
403
404! **************************************************************************************************
405!> \brief Routine for truncating a full matrix as given by the energy cutoffs in the input file.
406!> Logic: Matrices have some dimension dimen_RI x nrow_in*ncol_in for the incoming (untruncated) matrix
407!> and dimen_RI x nrow_out*ncol_out for the truncated matrix. The truncation is done by resorting the indices
408!> via parallel communication.
409!> \param fm_out ...
410!> \param fm_in ...
411!> \param ncol_in ...
412!> \param nrow_out ...
413!> \param ncol_out ...
414!> \param unit_nr ...
415!> \param mp2_env ...
416!> \param nrow_offset ...
417!> \param ncol_offset ...
418! **************************************************************************************************
419 SUBROUTINE truncate_fm(fm_out, fm_in, ncol_in, &
420 nrow_out, ncol_out, unit_nr, mp2_env, &
421 nrow_offset, ncol_offset)
422
423 TYPE(cp_fm_type), INTENT(INOUT) :: fm_out
424 TYPE(cp_fm_type), INTENT(IN) :: fm_in
425 INTEGER :: ncol_in, nrow_out, ncol_out, unit_nr
426 TYPE(mp2_type), INTENT(INOUT) :: mp2_env
427 INTEGER, INTENT(IN), OPTIONAL :: nrow_offset, ncol_offset
428
429 CHARACTER(LEN=*), PARAMETER :: routinen = 'truncate_fm'
430
431 INTEGER :: col_idx_loc, dummy, handle, handle2, i_entry_rec, idx_col_first, idx_col_in, &
432 idx_col_out, idx_col_sec, idx_row_in, ii, iproc, jj, ncol_block_in, ncol_block_out, &
433 ncol_local_in, ncol_local_out, npcol, nprocs, nprow, nrow_block_in, nrow_block_out, &
434 nrow_local_in, nrow_local_out, proc_send, row_idx_loc, send_pcol, send_prow
435 INTEGER, ALLOCATABLE, DIMENSION(:) :: entry_counter, num_entries_rec, &
436 num_entries_send
437 INTEGER, DIMENSION(:), POINTER :: col_indices_in, col_indices_out, &
438 row_indices_in, row_indices_out
439 LOGICAL :: correct_ncol, correct_nrow
440 TYPE(integ_mat_buffer_type), ALLOCATABLE, &
441 DIMENSION(:) :: buffer_rec, buffer_send
442 TYPE(mp_para_env_type), POINTER :: para_env_out
443 TYPE(mp_request_type), DIMENSION(:, :), POINTER :: req_array
444
445 CALL timeset(routinen, handle)
446 CALL timeset(routinen//"_1_setup", handle2)
447
448 correct_nrow = .false.
449 correct_ncol = .false.
450 !In case of truncation in the occupied space, we need to correct the interval of indices
451 IF (PRESENT(nrow_offset)) THEN
452 correct_nrow = .true.
453 END IF
454 IF (PRESENT(ncol_offset)) THEN
455 correct_ncol = .true.
456 END IF
457
458 para_env_out => fm_out%matrix_struct%para_env
459
460 CALL cp_fm_get_info(matrix=fm_out, &
461 nrow_local=nrow_local_out, &
462 ncol_local=ncol_local_out, &
463 row_indices=row_indices_out, &
464 col_indices=col_indices_out, &
465 nrow_block=nrow_block_out, &
466 ncol_block=ncol_block_out)
467
468 nprow = fm_out%matrix_struct%context%num_pe(1)
469 npcol = fm_out%matrix_struct%context%num_pe(2)
470
471 ALLOCATE (num_entries_rec(0:para_env_out%num_pe - 1))
472 ALLOCATE (num_entries_send(0:para_env_out%num_pe - 1))
473
474 num_entries_rec(:) = 0
475 num_entries_send(:) = 0
476
477 dummy = 0
478
479 CALL cp_fm_get_info(matrix=fm_in, &
480 nrow_local=nrow_local_in, &
481 ncol_local=ncol_local_in, &
482 row_indices=row_indices_in, &
483 col_indices=col_indices_in, &
484 nrow_block=nrow_block_in, &
485 ncol_block=ncol_block_in)
486
487 IF (unit_nr > 0 .AND. mp2_env%ri_g0w0%bse_debug_print) THEN
488 WRITE (unit_nr, '(T2,A10,T13,A14,A10,T71,I10)') 'BSE|DEBUG|', 'Row number of ', fm_out%name, &
489 fm_out%matrix_struct%nrow_global
490 WRITE (unit_nr, '(T2,A10,T13,A17,A10,T71,I10)') 'BSE|DEBUG|', 'Column number of ', fm_out%name, &
491 fm_out%matrix_struct%ncol_global
492
493 WRITE (unit_nr, '(T2,A10,T13,A18,A10,T71,I10)') 'BSE|DEBUG|', 'Row block size of ', fm_out%name, nrow_block_out
494 WRITE (unit_nr, '(T2,A10,T13,A21,A10,T71,I10)') 'BSE|DEBUG|', 'Column block size of ', fm_out%name, ncol_block_out
495
496 WRITE (unit_nr, '(T2,A10,T13,A14,A10,T71,I10)') 'BSE|DEBUG|', 'Row number of ', fm_in%name, &
497 fm_in%matrix_struct%nrow_global
498 WRITE (unit_nr, '(T2,A10,T13,A17,A10,T71,I10)') 'BSE|DEBUG|', 'Column number of ', fm_in%name, &
499 fm_in%matrix_struct%ncol_global
500
501 WRITE (unit_nr, '(T2,A10,T13,A18,A10,T71,I10)') 'BSE|DEBUG|', 'Row block size of ', fm_in%name, nrow_block_in
502 WRITE (unit_nr, '(T2,A10,T13,A21,A10,T71,I10)') 'BSE|DEBUG|', 'Column block size of ', fm_in%name, ncol_block_in
503 END IF
504
505 ! We find global indices in S with nrow_in and ncol_in for truncation
506 DO col_idx_loc = 1, ncol_local_in
507 idx_col_in = col_indices_in(col_idx_loc)
508
509 idx_col_first = (idx_col_in - 1)/ncol_in + 1
510 idx_col_sec = mod(idx_col_in - 1, ncol_in) + 1
511
512 ! If occupied orbitals are included, these have to be handled differently
513 ! due to their reversed indexing
514 IF (correct_nrow) THEN
515 idx_col_first = idx_col_first - nrow_offset + 1
516 IF (idx_col_first .LE. 0) cycle
517 ELSE
518 IF (idx_col_first > nrow_out) EXIT
519 END IF
520 IF (correct_ncol) THEN
521 idx_col_sec = idx_col_sec - ncol_offset + 1
522 IF (idx_col_sec .LE. 0) cycle
523 ELSE
524 IF (idx_col_sec > ncol_out) cycle
525 END IF
526
527 idx_col_out = idx_col_sec + (idx_col_first - 1)*ncol_out
528
529 DO row_idx_loc = 1, nrow_local_in
530 idx_row_in = row_indices_in(row_idx_loc)
531
532 send_prow = cp_fm_indxg2p(idx_row_in, nrow_block_out, dummy, &
533 fm_out%matrix_struct%first_p_pos(1), nprow)
534
535 send_pcol = cp_fm_indxg2p(idx_col_out, ncol_block_out, dummy, &
536 fm_out%matrix_struct%first_p_pos(2), npcol)
537
538 proc_send = fm_out%matrix_struct%context%blacs2mpi(send_prow, send_pcol)
539
540 num_entries_send(proc_send) = num_entries_send(proc_send) + 1
541
542 END DO
543 END DO
544
545 CALL timestop(handle2)
546
547 CALL timeset(routinen//"_2_comm_entry_nums", handle2)
548 IF (unit_nr > 0 .AND. mp2_env%ri_g0w0%bse_debug_print) THEN
549 WRITE (unit_nr, '(T2,A10,T13,A27)') 'BSE|DEBUG|', 'Communicating entry numbers'
550 END IF
551
552 CALL para_env_out%alltoall(num_entries_send, num_entries_rec, 1)
553
554 CALL timestop(handle2)
555
556 CALL timeset(routinen//"_3_alloc_buffer", handle2)
557 IF (unit_nr > 0 .AND. mp2_env%ri_g0w0%bse_debug_print) THEN
558 WRITE (unit_nr, '(T2,A10,T13,A18)') 'BSE|DEBUG|', 'Allocating buffers'
559 END IF
560
561 ! Buffers for entries and their indices
562 ALLOCATE (buffer_rec(0:para_env_out%num_pe - 1))
563 ALLOCATE (buffer_send(0:para_env_out%num_pe - 1))
564
565 ! allocate data message and corresponding indices
566 DO iproc = 0, para_env_out%num_pe - 1
567
568 ALLOCATE (buffer_rec(iproc)%msg(num_entries_rec(iproc)))
569 buffer_rec(iproc)%msg = 0.0_dp
570
571 END DO
572
573 DO iproc = 0, para_env_out%num_pe - 1
574
575 ALLOCATE (buffer_send(iproc)%msg(num_entries_send(iproc)))
576 buffer_send(iproc)%msg = 0.0_dp
577
578 END DO
579
580 DO iproc = 0, para_env_out%num_pe - 1
581
582 ALLOCATE (buffer_rec(iproc)%indx(num_entries_rec(iproc), 2))
583 buffer_rec(iproc)%indx = 0
584
585 END DO
586
587 DO iproc = 0, para_env_out%num_pe - 1
588
589 ALLOCATE (buffer_send(iproc)%indx(num_entries_send(iproc), 2))
590 buffer_send(iproc)%indx = 0
591
592 END DO
593
594 CALL timestop(handle2)
595
596 CALL timeset(routinen//"_4_buf_from_fmin_"//fm_out%name, handle2)
597 IF (unit_nr > 0 .AND. mp2_env%ri_g0w0%bse_debug_print) THEN
598 WRITE (unit_nr, '(T2,A10,T13,A18,A10,A13)') 'BSE|DEBUG|', 'Writing data from ', fm_in%name, ' into buffers'
599 END IF
600
601 ALLOCATE (entry_counter(0:para_env_out%num_pe - 1))
602 entry_counter(:) = 0
603
604 ! Now we can write the actual data and indices to the send-buffer
605 DO col_idx_loc = 1, ncol_local_in
606 idx_col_in = col_indices_in(col_idx_loc)
607
608 idx_col_first = (idx_col_in - 1)/ncol_in + 1
609 idx_col_sec = mod(idx_col_in - 1, ncol_in) + 1
610
611 ! If occupied orbitals are included, these have to be handled differently
612 ! due to their reversed indexing
613 IF (correct_nrow) THEN
614 idx_col_first = idx_col_first - nrow_offset + 1
615 IF (idx_col_first .LE. 0) cycle
616 ELSE
617 IF (idx_col_first > nrow_out) EXIT
618 END IF
619 IF (correct_ncol) THEN
620 idx_col_sec = idx_col_sec - ncol_offset + 1
621 IF (idx_col_sec .LE. 0) cycle
622 ELSE
623 IF (idx_col_sec > ncol_out) cycle
624 END IF
625
626 idx_col_out = idx_col_sec + (idx_col_first - 1)*ncol_out
627
628 DO row_idx_loc = 1, nrow_local_in
629 idx_row_in = row_indices_in(row_idx_loc)
630
631 send_prow = cp_fm_indxg2p(idx_row_in, nrow_block_out, dummy, &
632 fm_out%matrix_struct%first_p_pos(1), nprow)
633
634 send_pcol = cp_fm_indxg2p(idx_col_out, ncol_block_out, dummy, &
635 fm_out%matrix_struct%first_p_pos(2), npcol)
636
637 proc_send = fm_out%matrix_struct%context%blacs2mpi(send_prow, send_pcol)
638 entry_counter(proc_send) = entry_counter(proc_send) + 1
639
640 buffer_send(proc_send)%msg(entry_counter(proc_send)) = &
641 fm_in%local_data(row_idx_loc, col_idx_loc)
642 !No need to create row_out, since it is identical to incoming
643 !We dont change the RI index for any fm_mat_XX_BSE
644 buffer_send(proc_send)%indx(entry_counter(proc_send), 1) = idx_row_in
645 buffer_send(proc_send)%indx(entry_counter(proc_send), 2) = idx_col_out
646
647 END DO
648 END DO
649
650 ALLOCATE (req_array(1:para_env_out%num_pe, 4))
651
652 CALL timestop(handle2)
653
654 CALL timeset(routinen//"_5_comm_buffer", handle2)
655 IF (unit_nr > 0 .AND. mp2_env%ri_g0w0%bse_debug_print) THEN
656 WRITE (unit_nr, '(T2,A10,T13,A21)') 'BSE|DEBUG|', 'Communicating buffers'
657 END IF
658
659 ! communicate the buffer
660 CALL communicate_buffer(para_env_out, num_entries_rec, num_entries_send, buffer_rec, &
661 buffer_send, req_array)
662
663 CALL timestop(handle2)
664
665 CALL timeset(routinen//"_6_buffer_to_fmout"//fm_out%name, handle2)
666 IF (unit_nr > 0 .AND. mp2_env%ri_g0w0%bse_debug_print) THEN
667 WRITE (unit_nr, '(T2,A10,T13,A24,A10)') 'BSE|DEBUG|', 'Writing from buffers to ', fm_out%name
668 END IF
669
670 ! fill fm_out with the entries from buffer_rec, i.e. buffer_rec are parts of fm_in
671 nprocs = para_env_out%num_pe
672
673!$OMP PARALLEL DO DEFAULT(NONE) &
674!$OMP SHARED(fm_out, nprocs, nrow_block_out, ncol_block_out, &
675!$OMP num_entries_rec, buffer_rec, nprow, npcol, dummy) &
676!$OMP PRIVATE(iproc, i_entry_rec, ii, jj)
677 DO iproc = 0, nprocs - 1
678 DO i_entry_rec = 1, num_entries_rec(iproc)
679 ii = cp_fm_indxg2l(buffer_rec(iproc)%indx(i_entry_rec, 1), nrow_block_out, &
680 dummy, dummy, nprow)
681 jj = cp_fm_indxg2l(buffer_rec(iproc)%indx(i_entry_rec, 2), ncol_block_out, &
682 dummy, dummy, npcol)
683
684 fm_out%local_data(ii, jj) = fm_out%local_data(ii, jj) + buffer_rec(iproc)%msg(i_entry_rec)
685 END DO
686 END DO
687!$OMP END PARALLEL DO
688
689 CALL timestop(handle2)
690
691 CALL timeset(routinen//"_7_cleanup", handle2)
692 IF (unit_nr > 0 .AND. mp2_env%ri_g0w0%bse_debug_print) THEN
693 WRITE (unit_nr, '(T2,A10,T13,A41)') 'BSE|DEBUG|', 'Starting cleanup of communication buffers'
694 END IF
695
696 !Clean up all the arrays from the communication process
697 DO iproc = 0, para_env_out%num_pe - 1
698 DEALLOCATE (buffer_rec(iproc)%msg)
699 DEALLOCATE (buffer_rec(iproc)%indx)
700 DEALLOCATE (buffer_send(iproc)%msg)
701 DEALLOCATE (buffer_send(iproc)%indx)
702 END DO
703 DEALLOCATE (buffer_rec, buffer_send)
704 DEALLOCATE (req_array)
705 DEALLOCATE (entry_counter)
706 DEALLOCATE (num_entries_rec, num_entries_send)
707
708 CALL timestop(handle2)
709 CALL timestop(handle)
710
711 END SUBROUTINE truncate_fm
712
713! **************************************************************************************************
714!> \brief Debug function to write elements of a full matrix to file, if they are larger than a given threshold
715!> \param fm ...
716!> \param thresh ...
717!> \param header ...
718!> \param unit_nr ...
719!> \param abs_vals ...
720! **************************************************************************************************
721 SUBROUTINE fm_write_thresh(fm, thresh, header, unit_nr, abs_vals)
722
723 TYPE(cp_fm_type), INTENT(IN) :: fm
724 REAL(kind=dp), INTENT(IN) :: thresh
725 CHARACTER(LEN=*), INTENT(IN) :: header
726 INTEGER, INTENT(IN) :: unit_nr
727 LOGICAL, OPTIONAL :: abs_vals
728
729 CHARACTER(LEN=*), PARAMETER :: my_footer = " | ENDING WRITING OF MATRIX", &
730 routinen = 'fm_write_thresh'
731
732 INTEGER :: handle, i, j, ncol_local, nrow_local
733 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
734 LOGICAL :: my_abs_vals
735
736 CALL timeset(routinen, handle)
737
738 IF (PRESENT(abs_vals)) THEN
739 my_abs_vals = abs_vals
740 ELSE
741 my_abs_vals = .false.
742 END IF
743
744 CALL cp_fm_get_info(matrix=fm, &
745 nrow_local=nrow_local, &
746 ncol_local=ncol_local, &
747 row_indices=row_indices, &
748 col_indices=col_indices)
749
750 IF (unit_nr > 0) THEN
751 WRITE (unit_nr, *) header
752 END IF
753 IF (my_abs_vals) THEN
754 DO i = 1, nrow_local
755 DO j = 1, ncol_local
756 IF (abs(fm%local_data(i, j)) > thresh) THEN
757 WRITE (unit_nr, "(A7,T10,I5,T20,I5,T30,F13.5)") header, row_indices(i), col_indices(j), &
758 abs(fm%local_data(i, j))
759 END IF
760 END DO
761 END DO
762 ELSE
763 DO i = 1, nrow_local
764 DO j = 1, ncol_local
765 IF (abs(fm%local_data(i, j)) > thresh) THEN
766 WRITE (unit_nr, "(A7,T10,I5,T20,I5,T30,F13.5)") header, row_indices(i), col_indices(j), &
767 fm%local_data(i, j)
768 END IF
769 END DO
770 END DO
771 END IF
772 CALL fm%matrix_struct%para_env%sync()
773 IF (unit_nr > 0) THEN
774 WRITE (unit_nr, *) my_footer
775 END IF
776
777 CALL timestop(handle)
778
779 END SUBROUTINE
780
781! **************************************************************************************************
782!> \brief ...
783!> \param bse_tda ...
784!> \param bse_abba ...
785!> \param unit_nr ...
786! **************************************************************************************************
787 SUBROUTINE print_bse_start_flag(bse_tda, bse_abba, unit_nr)
788
789 LOGICAL, INTENT(IN) :: bse_tda, bse_abba
790 INTEGER, INTENT(IN) :: unit_nr
791
792 CHARACTER(LEN=*), PARAMETER :: routinen = 'print_BSE_start_flag'
793
794 INTEGER :: handle
795
796 CALL timeset(routinen, handle)
797
798 IF (unit_nr > 0) THEN
799 WRITE (unit_nr, *) ' '
800 WRITE (unit_nr, '(T2,A79)') '*******************************************************************************'
801 WRITE (unit_nr, '(T2,A79)') '** **'
802 WRITE (unit_nr, '(T2,A79)') '** Bethe Salpeter equation (BSE) for excitation energies **'
803 IF (bse_tda .AND. bse_abba) THEN
804 WRITE (unit_nr, '(T2,A79)') '** solved with and without Tamm-Dancoff approximation (TDA) **'
805 ELSE IF (bse_tda) THEN
806 WRITE (unit_nr, '(T2,A79)') '** solved with Tamm-Dancoff approximation (TDA) **'
807 ELSE
808 WRITE (unit_nr, '(T2,A79)') '** solved without Tamm-Dancoff approximation (TDA) **'
809 END IF
810
811 WRITE (unit_nr, '(T2,A79)') '** **'
812 WRITE (unit_nr, '(T2,A79)') '*******************************************************************************'
813 WRITE (unit_nr, *) ' '
814 END IF
815
816 CALL timestop(handle)
817
818 END SUBROUTINE
819
820! **************************************************************************************************
821!> \brief ...
822!> \param fm_mat_S_bar_ia_bse ...
823!> \param fm_mat_S_bar_ij_bse ...
824!> \param fm_mat_S_trunc ...
825!> \param fm_mat_S_ij_trunc ...
826!> \param fm_mat_S_ab_trunc ...
827!> \param fm_mat_Q_static_bse ...
828!> \param fm_mat_Q_static_bse_gemm ...
829! **************************************************************************************************
830 SUBROUTINE deallocate_matrices_bse(fm_mat_S_bar_ia_bse, fm_mat_S_bar_ij_bse, &
831 fm_mat_S_trunc, fm_mat_S_ij_trunc, fm_mat_S_ab_trunc, &
832 fm_mat_Q_static_bse, fm_mat_Q_static_bse_gemm)
833
834 TYPE(cp_fm_type), INTENT(INOUT) :: fm_mat_s_bar_ia_bse, fm_mat_s_bar_ij_bse, fm_mat_s_trunc, &
835 fm_mat_s_ij_trunc, fm_mat_s_ab_trunc, fm_mat_q_static_bse, fm_mat_q_static_bse_gemm
836
837 CHARACTER(LEN=*), PARAMETER :: routinen = 'deallocate_matrices_bse'
838
839 INTEGER :: handle
840
841 CALL timeset(routinen, handle)
842
843 CALL cp_fm_release(fm_mat_s_bar_ia_bse)
844 CALL cp_fm_release(fm_mat_s_bar_ij_bse)
845 CALL cp_fm_release(fm_mat_s_trunc)
846 CALL cp_fm_release(fm_mat_s_ij_trunc)
847 CALL cp_fm_release(fm_mat_s_ab_trunc)
848 CALL cp_fm_release(fm_mat_q_static_bse)
849 CALL cp_fm_release(fm_mat_q_static_bse_gemm)
850
851 CALL timestop(handle)
852
853 END SUBROUTINE deallocate_matrices_bse
854
855! **************************************************************************************************
856!> \brief Routine for computing the coefficients of the eigenvectors of the BSE matrix from a
857!> multiplication with the eigenvalues
858!> \param fm_work ...
859!> \param eig_vals ...
860!> \param beta ...
861!> \param gamma ...
862!> \param do_transpose ...
863! **************************************************************************************************
864 SUBROUTINE comp_eigvec_coeff_bse(fm_work, eig_vals, beta, gamma, do_transpose)
865
866 TYPE(cp_fm_type), INTENT(INOUT) :: fm_work
867 REAL(kind=dp), ALLOCATABLE, DIMENSION(:), &
868 INTENT(IN) :: eig_vals
869 REAL(kind=dp), INTENT(IN) :: beta
870 REAL(kind=dp), INTENT(IN), OPTIONAL :: gamma
871 LOGICAL, INTENT(IN), OPTIONAL :: do_transpose
872
873 CHARACTER(LEN=*), PARAMETER :: routinen = 'comp_eigvec_coeff_BSE'
874
875 INTEGER :: handle, i_row_global, ii, j_col_global, &
876 jj, ncol_local, nrow_local
877 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
878 LOGICAL :: my_do_transpose
879 REAL(kind=dp) :: coeff, my_gamma
880
881 CALL timeset(routinen, handle)
882
883 IF (PRESENT(gamma)) THEN
884 my_gamma = gamma
885 ELSE
886 my_gamma = 2.0_dp
887 END IF
888
889 IF (PRESENT(do_transpose)) THEN
890 my_do_transpose = do_transpose
891 ELSE
892 my_do_transpose = .false.
893 END IF
894
895 CALL cp_fm_get_info(matrix=fm_work, &
896 nrow_local=nrow_local, &
897 ncol_local=ncol_local, &
898 row_indices=row_indices, &
899 col_indices=col_indices)
900
901 IF (my_do_transpose) THEN
902 DO jj = 1, ncol_local
903 j_col_global = col_indices(jj)
904 DO ii = 1, nrow_local
905 coeff = (eig_vals(j_col_global)**beta)/my_gamma
906 fm_work%local_data(ii, jj) = fm_work%local_data(ii, jj)*coeff
907 END DO
908 END DO
909 ELSE
910 DO jj = 1, ncol_local
911 DO ii = 1, nrow_local
912 i_row_global = row_indices(ii)
913 coeff = (eig_vals(i_row_global)**beta)/my_gamma
914 fm_work%local_data(ii, jj) = fm_work%local_data(ii, jj)*coeff
915 END DO
916 END DO
917 END IF
918
919 CALL timestop(handle)
920
921 END SUBROUTINE
922
923! **************************************************************************************************
924!> \brief ...
925!> \param idx_prim ...
926!> \param idx_sec ...
927!> \param eigvec_entries ...
928! **************************************************************************************************
929 SUBROUTINE sort_excitations(idx_prim, idx_sec, eigvec_entries)
930
931 INTEGER, ALLOCATABLE, DIMENSION(:) :: idx_prim, idx_sec
932 REAL(kind=dp), ALLOCATABLE, DIMENSION(:) :: eigvec_entries
933
934 CHARACTER(LEN=*), PARAMETER :: routinen = 'sort_excitations'
935
936 INTEGER :: handle, ii, kk, num_entries, num_mults
937 INTEGER, ALLOCATABLE, DIMENSION(:) :: idx_prim_work, idx_sec_work, tmp_index
938 LOGICAL :: unique_entries
939 REAL(kind=dp), ALLOCATABLE, DIMENSION(:) :: eigvec_entries_work
940
941 CALL timeset(routinen, handle)
942
943 num_entries = SIZE(idx_prim)
944
945 ALLOCATE (tmp_index(num_entries))
946
947 CALL sort(idx_prim, num_entries, tmp_index)
948
949 ALLOCATE (idx_sec_work(num_entries))
950 ALLOCATE (eigvec_entries_work(num_entries))
951
952 DO ii = 1, num_entries
953 idx_sec_work(ii) = idx_sec(tmp_index(ii))
954 eigvec_entries_work(ii) = eigvec_entries(tmp_index(ii))
955 END DO
956
957 DEALLOCATE (tmp_index)
958 DEALLOCATE (idx_sec)
959 DEALLOCATE (eigvec_entries)
960
961 CALL move_alloc(idx_sec_work, idx_sec)
962 CALL move_alloc(eigvec_entries_work, eigvec_entries)
963
964 !Now check for multiple entries in first idx to check necessity of sorting in second idx
965 CALL sort_unique(idx_prim, unique_entries)
966 IF (.NOT. unique_entries) THEN
967 ALLOCATE (idx_prim_work(num_entries))
968 idx_prim_work(:) = idx_prim(:)
969 ! Find duplicate entries in idx_prim
970 DO ii = 1, num_entries
971 IF (idx_prim_work(ii) == 0) cycle
972 num_mults = count(idx_prim_work == idx_prim_work(ii))
973 IF (num_mults > 1) THEN
974 !Set all duplicate entries to 0
975 idx_prim_work(ii:ii + num_mults - 1) = 0
976 !Start sorting in secondary index
977 ALLOCATE (idx_sec_work(num_mults))
978 ALLOCATE (eigvec_entries_work(num_mults))
979 idx_sec_work(:) = idx_sec(ii:ii + num_mults - 1)
980 eigvec_entries_work(:) = eigvec_entries(ii:ii + num_mults - 1)
981 ALLOCATE (tmp_index(num_mults))
982 CALL sort(idx_sec_work, num_mults, tmp_index)
983
984 !Now write newly sorted indices to original arrays
985 DO kk = ii, ii + num_mults - 1
986 idx_sec(kk) = idx_sec_work(kk - ii + 1)
987 eigvec_entries(kk) = eigvec_entries_work(tmp_index(kk - ii + 1))
988 END DO
989 !Deallocate work arrays
990 DEALLOCATE (tmp_index)
991 DEALLOCATE (idx_sec_work)
992 DEALLOCATE (eigvec_entries_work)
993 END IF
994 idx_prim_work(ii) = idx_prim(ii)
995 END DO
996 DEALLOCATE (idx_prim_work)
997 END IF
998
999 CALL timestop(handle)
1000
1001 END SUBROUTINE sort_excitations
1002
1003! **************************************************************************************************
1004!> \brief Roughly estimates the needed runtime and memory during the BSE run
1005!> \param homo_red ...
1006!> \param virtual_red ...
1007!> \param unit_nr ...
1008!> \param bse_abba ...
1009!> \param para_env ...
1010!> \param diag_runtime_est ...
1011! **************************************************************************************************
1012 SUBROUTINE estimate_bse_resources(homo_red, virtual_red, unit_nr, bse_abba, &
1013 para_env, diag_runtime_est)
1014
1015 INTEGER :: homo_red, virtual_red, unit_nr
1016 LOGICAL :: bse_abba
1017 TYPE(mp_para_env_type), POINTER :: para_env
1018 REAL(kind=dp) :: diag_runtime_est
1019
1020 CHARACTER(LEN=*), PARAMETER :: routinen = 'estimate_BSE_resources'
1021
1022 INTEGER :: handle, num_bse_matrices
1023 REAL(kind=dp) :: mem_est, mem_est_per_rank
1024
1025 CALL timeset(routinen, handle)
1026
1027 ! Number of matrices with size of A in TDA is 2 (A itself and W_ijab)
1028 num_bse_matrices = 2
1029 ! With the full diagonalization of ABBA, we need several auxiliary matrices in the process
1030 ! The maximum number is 2 + 2 + 6 (additional B and C matrix as well as 6 matrices to create C)
1031 IF (bse_abba) THEN
1032 num_bse_matrices = 10
1033 END IF
1034 mem_est = real(8*(homo_red**2*virtual_red**2)*num_bse_matrices, kind=dp)/real(1024**3, kind=dp)
1035 mem_est_per_rank = real(mem_est/para_env%num_pe, kind=dp)
1036 IF (unit_nr > 0) THEN
1037 WRITE (unit_nr, '(T2,A4,T7,A40,T68,F13.3)') 'BSE|', 'Total peak memory estimate from BSE [GB]', &
1038 mem_est
1039 WRITE (unit_nr, '(T2,A4,T7,A47,T68,F13.3)') 'BSE|', 'Peak memory estimate per MPI rank from BSE [GB]', &
1040 mem_est_per_rank
1041 WRITE (unit_nr, '(T2,A4)') 'BSE|'
1042 END IF
1043 ! Rough estimation of diagonalization runtimes. Baseline was a full BSE Naphthalene
1044 ! run with 11000x11000 entries in A/B/C, which took 10s on 32 ranks
1045 diag_runtime_est = real(homo_red*virtual_red/11000, kind=dp)**3*10*32/real(para_env%num_pe, kind=dp)
1046
1047 CALL timestop(handle)
1048
1049 END SUBROUTINE estimate_bse_resources
1050
1051! **************************************************************************************************
1052!> \brief Filters eigenvector entries above a given threshold to describe excitations in the
1053!> singleparticle basis
1054!> \param fm_eigvec ...
1055!> \param idx_homo ...
1056!> \param idx_virt ...
1057!> \param eigvec_entries ...
1058!> \param i_exc ...
1059!> \param virtual ...
1060!> \param num_entries ...
1061!> \param mp2_env ...
1062! **************************************************************************************************
1063 SUBROUTINE filter_eigvec_contrib(fm_eigvec, idx_homo, idx_virt, eigvec_entries, &
1064 i_exc, virtual, num_entries, mp2_env)
1065
1066 TYPE(cp_fm_type), INTENT(IN) :: fm_eigvec
1067 INTEGER, ALLOCATABLE, DIMENSION(:) :: idx_homo, idx_virt
1068 REAL(kind=dp), ALLOCATABLE, DIMENSION(:) :: eigvec_entries
1069 INTEGER :: i_exc, virtual, num_entries
1070 TYPE(mp2_type), INTENT(INOUT) :: mp2_env
1071
1072 CHARACTER(LEN=*), PARAMETER :: routinen = 'filter_eigvec_contrib'
1073
1074 INTEGER :: eigvec_idx, handle, ii, iproc, jj, kk, &
1075 ncol_local, nrow_local, &
1076 num_entries_local
1077 INTEGER, ALLOCATABLE, DIMENSION(:) :: num_entries_to_comm
1078 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1079 REAL(kind=dp) :: eigvec_entry
1080 TYPE(integ_mat_buffer_type), ALLOCATABLE, &
1081 DIMENSION(:) :: buffer_entries
1082 TYPE(mp_para_env_type), POINTER :: para_env
1083
1084 CALL timeset(routinen, handle)
1085
1086 para_env => fm_eigvec%matrix_struct%para_env
1087
1088 CALL cp_fm_get_info(matrix=fm_eigvec, &
1089 nrow_local=nrow_local, &
1090 ncol_local=ncol_local, &
1091 row_indices=row_indices, &
1092 col_indices=col_indices)
1093
1094 ALLOCATE (num_entries_to_comm(0:para_env%num_pe - 1))
1095 num_entries_to_comm(:) = 0
1096
1097 DO jj = 1, ncol_local
1098 !First check if i is localized on this proc
1099 IF (col_indices(jj) /= i_exc) THEN
1100 cycle
1101 END IF
1102 DO ii = 1, nrow_local
1103 eigvec_idx = row_indices(ii)
1104 eigvec_entry = fm_eigvec%local_data(ii, jj)/sqrt(2.0_dp)
1105 IF (abs(eigvec_entry) > mp2_env%ri_g0w0%eps_x) THEN
1106 num_entries_to_comm(para_env%mepos) = num_entries_to_comm(para_env%mepos) + 1
1107 END IF
1108 END DO
1109 END DO
1110
1111 !Gather number of entries of other processes
1112 CALL para_env%sum(num_entries_to_comm)
1113
1114 num_entries_local = num_entries_to_comm(para_env%mepos)
1115
1116 ALLOCATE (buffer_entries(0:para_env%num_pe - 1))
1117
1118 DO iproc = 0, para_env%num_pe - 1
1119 ALLOCATE (buffer_entries(iproc)%msg(num_entries_to_comm(iproc)))
1120 ALLOCATE (buffer_entries(iproc)%indx(num_entries_to_comm(iproc), 2))
1121 buffer_entries(iproc)%msg = 0.0_dp
1122 buffer_entries(iproc)%indx = 0
1123 END DO
1124
1125 kk = 1
1126 DO jj = 1, ncol_local
1127 !First check if i is localized on this proc
1128 IF (col_indices(jj) /= i_exc) THEN
1129 cycle
1130 END IF
1131 DO ii = 1, nrow_local
1132 eigvec_idx = row_indices(ii)
1133 eigvec_entry = fm_eigvec%local_data(ii, jj)/sqrt(2.0_dp)
1134 IF (abs(eigvec_entry) > mp2_env%ri_g0w0%eps_x) THEN
1135 buffer_entries(para_env%mepos)%indx(kk, 1) = (eigvec_idx - 1)/virtual + 1
1136 buffer_entries(para_env%mepos)%indx(kk, 2) = mod(eigvec_idx - 1, virtual) + 1
1137 buffer_entries(para_env%mepos)%msg(kk) = eigvec_entry
1138 kk = kk + 1
1139 END IF
1140 END DO
1141 END DO
1142
1143 DO iproc = 0, para_env%num_pe - 1
1144 CALL para_env%sum(buffer_entries(iproc)%msg)
1145 CALL para_env%sum(buffer_entries(iproc)%indx)
1146 END DO
1147
1148 !Now sum up gathered information
1149 num_entries = sum(num_entries_to_comm)
1150 ALLOCATE (idx_homo(num_entries))
1151 ALLOCATE (idx_virt(num_entries))
1152 ALLOCATE (eigvec_entries(num_entries))
1153
1154 kk = 1
1155 DO iproc = 0, para_env%num_pe - 1
1156 IF (num_entries_to_comm(iproc) /= 0) THEN
1157 DO ii = 1, num_entries_to_comm(iproc)
1158 idx_homo(kk) = buffer_entries(iproc)%indx(ii, 1)
1159 idx_virt(kk) = buffer_entries(iproc)%indx(ii, 2)
1160 eigvec_entries(kk) = buffer_entries(iproc)%msg(ii)
1161 kk = kk + 1
1162 END DO
1163 END IF
1164 END DO
1165
1166 !Deallocate all the used arrays
1167 DO iproc = 0, para_env%num_pe - 1
1168 DEALLOCATE (buffer_entries(iproc)%msg)
1169 DEALLOCATE (buffer_entries(iproc)%indx)
1170 END DO
1171 DEALLOCATE (buffer_entries)
1172 DEALLOCATE (num_entries_to_comm)
1173 NULLIFY (row_indices)
1174 NULLIFY (col_indices)
1175
1176 !Now sort the results according to the involved singleparticle orbitals
1177 ! (homo first, then virtual)
1178 CALL sort_excitations(idx_homo, idx_virt, eigvec_entries)
1179
1180 CALL timestop(handle)
1181
1182 END SUBROUTINE
1183
1184! **************************************************************************************************
1185!> \brief Determines indices within the given energy cutoffs and truncates Eigenvalues and matrices
1186!> \param fm_mat_S_ia_bse ...
1187!> \param fm_mat_S_ij_bse ...
1188!> \param fm_mat_S_ab_bse ...
1189!> \param fm_mat_S_trunc ...
1190!> \param fm_mat_S_ij_trunc ...
1191!> \param fm_mat_S_ab_trunc ...
1192!> \param Eigenval ...
1193!> \param Eigenval_reduced ...
1194!> \param homo ...
1195!> \param virtual ...
1196!> \param dimen_RI ...
1197!> \param unit_nr ...
1198!> \param homo_red ...
1199!> \param virt_red ...
1200!> \param mp2_env ...
1201! **************************************************************************************************
1202 SUBROUTINE truncate_bse_matrices(fm_mat_S_ia_bse, fm_mat_S_ij_bse, fm_mat_S_ab_bse, &
1203 fm_mat_S_trunc, fm_mat_S_ij_trunc, fm_mat_S_ab_trunc, &
1204 Eigenval, Eigenval_reduced, &
1205 homo, virtual, dimen_RI, unit_nr, &
1206 homo_red, virt_red, &
1207 mp2_env)
1208
1209 TYPE(cp_fm_type), INTENT(IN) :: fm_mat_s_ia_bse, fm_mat_s_ij_bse, &
1210 fm_mat_s_ab_bse
1211 TYPE(cp_fm_type), INTENT(INOUT) :: fm_mat_s_trunc, fm_mat_s_ij_trunc, &
1212 fm_mat_s_ab_trunc
1213 REAL(kind=dp), DIMENSION(:) :: eigenval
1214 REAL(kind=dp), ALLOCATABLE, DIMENSION(:) :: eigenval_reduced
1215 INTEGER, INTENT(IN) :: homo, virtual, dimen_ri, unit_nr
1216 INTEGER, INTENT(OUT) :: homo_red, virt_red
1217 TYPE(mp2_type), INTENT(INOUT) :: mp2_env
1218
1219 CHARACTER(LEN=*), PARAMETER :: routinen = 'truncate_BSE_matrices'
1220
1221 INTEGER :: handle, homo_incl, i_homo, j_virt, &
1222 virt_incl
1223 TYPE(cp_blacs_env_type), POINTER :: context
1224 TYPE(cp_fm_struct_type), POINTER :: fm_struct_ab, fm_struct_ia, fm_struct_ij
1225 TYPE(mp_para_env_type), POINTER :: para_env
1226
1227 CALL timeset(routinen, handle)
1228
1229 ! Determine index in homo and virtual for truncation
1230 ! Uses indices of outermost orbitals within energy range (-mp2_env%ri_g0w0%bse_cutoff_occ,mp2_env%ri_g0w0%bse_cutoff_virt)
1231 IF (mp2_env%ri_g0w0%bse_cutoff_occ > 0 .OR. mp2_env%ri_g0w0%bse_cutoff_virt > 0) THEN
1232 IF (-mp2_env%ri_g0w0%bse_cutoff_occ .LT. eigenval(1) - eigenval(homo) &
1233 .OR. mp2_env%ri_g0w0%bse_cutoff_occ < 0) THEN
1234 homo_red = homo
1235 homo_incl = 1
1236 ELSE
1237 homo_incl = 1
1238 DO i_homo = 1, homo
1239 IF (eigenval(i_homo) - eigenval(homo) .GT. -mp2_env%ri_g0w0%bse_cutoff_occ) THEN
1240 homo_incl = i_homo
1241 EXIT
1242 END IF
1243 END DO
1244 homo_red = homo - homo_incl + 1
1245 END IF
1246
1247 IF (mp2_env%ri_g0w0%bse_cutoff_virt .GT. eigenval(homo + virtual) - eigenval(homo + 1) &
1248 .OR. mp2_env%ri_g0w0%bse_cutoff_virt < 0) THEN
1249 virt_red = virtual
1250 virt_incl = virtual
1251 ELSE
1252 virt_incl = homo + 1
1253 DO j_virt = 1, virtual
1254 IF (eigenval(homo + j_virt) - eigenval(homo + 1) .GT. mp2_env%ri_g0w0%bse_cutoff_virt) THEN
1255 virt_incl = j_virt - 1
1256 EXIT
1257 END IF
1258 END DO
1259 virt_red = virt_incl
1260 END IF
1261 ELSE
1262 homo_red = homo
1263 virt_red = virtual
1264 homo_incl = 1
1265 virt_incl = virtual
1266 END IF
1267 IF (unit_nr > 0) THEN
1268 IF (mp2_env%ri_g0w0%bse_cutoff_occ > 0) THEN
1269 WRITE (unit_nr, '(T2,A4,T7,A29,T71,F10.3)') 'BSE|', 'Cutoff occupied orbitals [eV]', &
1270 mp2_env%ri_g0w0%bse_cutoff_occ*evolt
1271 ELSE
1272 WRITE (unit_nr, '(T2,A4,T7,A37)') 'BSE|', 'No cutoff given for occupied orbitals'
1273 END IF
1274 IF (mp2_env%ri_g0w0%bse_cutoff_virt > 0) THEN
1275 WRITE (unit_nr, '(T2,A4,T7,A28,T71,F10.3)') 'BSE|', 'Cutoff virtual orbitals [eV]', &
1276 mp2_env%ri_g0w0%bse_cutoff_virt*evolt
1277 ELSE
1278 WRITE (unit_nr, '(T2,A4,T7,A36)') 'BSE|', 'No cutoff given for virtual orbitals'
1279 END IF
1280 WRITE (unit_nr, '(T2,A4,T7,A20,T71,I10)') 'BSE|', 'First occupied index', homo_incl
1281 WRITE (unit_nr, '(T2,A4,T7,A34,T71,I10)') 'BSE|', 'Last virtual index (not MO index!)', virt_incl
1282 WRITE (unit_nr, '(T2,A4,T7,A35,T71,F10.3)') 'BSE|', 'Energy of first occupied index [eV]', eigenval(homo_incl)*evolt
1283 WRITE (unit_nr, '(T2,A4,T7,A33,T71,F10.3)') 'BSE|', 'Energy of last virtual index [eV]', eigenval(homo + virt_incl)*evolt
1284 WRITE (unit_nr, '(T2,A4,T7,A54,T71,F10.3)') 'BSE|', 'Energy difference of first occupied index to HOMO [eV]', &
1285 -(eigenval(homo_incl) - eigenval(homo))*evolt
1286 WRITE (unit_nr, '(T2,A4,T7,A52,T71,F10.3)') 'BSE|', 'Energy difference of last virtual index to LUMO [eV]', &
1287 (eigenval(homo + virt_incl) - eigenval(homo + 1))*evolt
1288 WRITE (unit_nr, '(T2,A4,T7,A35,T71,I10)') 'BSE|', 'Number of GW-corrected occupied MOs', mp2_env%ri_g0w0%corr_mos_occ
1289 WRITE (unit_nr, '(T2,A4,T7,A34,T71,I10)') 'BSE|', 'Number of GW-corrected virtual MOs', mp2_env%ri_g0w0%corr_mos_virt
1290 WRITE (unit_nr, '(T2,A4)') 'BSE|'
1291 END IF
1292 IF (unit_nr > 0) THEN
1293 IF (homo - homo_incl + 1 > mp2_env%ri_g0w0%corr_mos_occ) THEN
1294 cpabort("Number of GW-corrected occupied MOs too small for chosen BSE cutoff")
1295 END IF
1296 IF (virt_incl > mp2_env%ri_g0w0%corr_mos_virt) THEN
1297 cpabort("Number of GW-corrected virtual MOs too small for chosen BSE cutoff")
1298 END IF
1299 END IF
1300 !Truncate full fm_S matrices
1301 !Allocate new truncated matrices of proper size
1302 para_env => fm_mat_s_ia_bse%matrix_struct%para_env
1303 context => fm_mat_s_ia_bse%matrix_struct%context
1304
1305 CALL cp_fm_struct_create(fm_struct_ia, para_env, context, dimen_ri, homo_red*virt_red)
1306 CALL cp_fm_struct_create(fm_struct_ij, para_env, context, dimen_ri, homo_red*homo_red)
1307 CALL cp_fm_struct_create(fm_struct_ab, para_env, context, dimen_ri, virt_red*virt_red)
1308
1309 CALL cp_fm_create(fm_mat_s_trunc, fm_struct_ia, "fm_S_trunc")
1310 CALL cp_fm_create(fm_mat_s_ij_trunc, fm_struct_ij, "fm_S_ij_trunc")
1311 CALL cp_fm_create(fm_mat_s_ab_trunc, fm_struct_ab, "fm_S_ab_trunc")
1312
1313 !Copy parts of original matrices to truncated ones
1314 IF (mp2_env%ri_g0w0%bse_cutoff_occ > 0 .OR. mp2_env%ri_g0w0%bse_cutoff_virt > 0) THEN
1315 !Truncate eigenvals
1316 ALLOCATE (eigenval_reduced(homo_red + virt_red))
1317 eigenval_reduced(:) = eigenval(homo_incl:homo + virt_incl)
1318
1319 CALL truncate_fm(fm_mat_s_trunc, fm_mat_s_ia_bse, virtual, &
1320 homo_red, virt_red, unit_nr, mp2_env, &
1321 nrow_offset=homo_incl)
1322 CALL truncate_fm(fm_mat_s_ij_trunc, fm_mat_s_ij_bse, homo, &
1323 homo_red, homo_red, unit_nr, mp2_env, &
1324 homo_incl, homo_incl)
1325 CALL truncate_fm(fm_mat_s_ab_trunc, fm_mat_s_ab_bse, virtual, &
1326 virt_red, virt_red, unit_nr, mp2_env)
1327
1328 ELSE
1329 IF (unit_nr > 0) THEN
1330 WRITE (unit_nr, '(T2,A4,T7,A37)') 'BSE|', 'No truncation of BSE matrices applied'
1331 WRITE (unit_nr, '(T2,A4)') 'BSE|'
1332 END IF
1333 ALLOCATE (eigenval_reduced(homo_red + virt_red))
1334 eigenval_reduced(:) = eigenval(:)
1335 CALL cp_fm_to_fm_submat_general(fm_mat_s_ia_bse, fm_mat_s_trunc, dimen_ri, homo_red*virt_red, &
1336 1, 1, 1, 1, context)
1337 CALL cp_fm_to_fm_submat_general(fm_mat_s_ij_bse, fm_mat_s_ij_trunc, dimen_ri, homo_red*homo_red, &
1338 1, 1, 1, 1, context)
1339 CALL cp_fm_to_fm_submat_general(fm_mat_s_ab_bse, fm_mat_s_ab_trunc, dimen_ri, virt_red*virt_red, &
1340 1, 1, 1, 1, context)
1341 END IF
1342
1343 CALL cp_fm_struct_release(fm_struct_ia)
1344 CALL cp_fm_struct_release(fm_struct_ij)
1345 CALL cp_fm_struct_release(fm_struct_ab)
1346
1347 NULLIFY (para_env)
1348 NULLIFY (context)
1349
1350 CALL timestop(handle)
1351
1352 END SUBROUTINE truncate_bse_matrices
1353
1354END MODULE
Auxiliary routines for GW + Bethe-Salpeter for computing electronic excitations.
Definition bse_util.F:13
subroutine, public filter_eigvec_contrib(fm_eigvec, idx_homo, idx_virt, eigvec_entries, i_exc, virtual, num_entries, mp2_env)
Filters eigenvector entries above a given threshold to describe excitations in the singleparticle bas...
Definition bse_util.F:1065
subroutine, public truncate_bse_matrices(fm_mat_s_ia_bse, fm_mat_s_ij_bse, fm_mat_s_ab_bse, fm_mat_s_trunc, fm_mat_s_ij_trunc, fm_mat_s_ab_trunc, eigenval, eigenval_reduced, homo, virtual, dimen_ri, unit_nr, homo_red, virt_red, mp2_env)
Determines indices within the given energy cutoffs and truncates Eigenvalues and matrices.
Definition bse_util.F:1208
subroutine, public estimate_bse_resources(homo_red, virtual_red, unit_nr, bse_abba, para_env, diag_runtime_est)
Roughly estimates the needed runtime and memory during the BSE run.
Definition bse_util.F:1014
subroutine, public truncate_fm(fm_out, fm_in, ncol_in, nrow_out, ncol_out, unit_nr, mp2_env, nrow_offset, ncol_offset)
Routine for truncating a full matrix as given by the energy cutoffs in the input file....
Definition bse_util.F:422
subroutine, public mult_b_with_w(fm_mat_s_ij_bse, fm_mat_s_ia_bse, fm_mat_s_bar_ia_bse, fm_mat_s_bar_ij_bse, fm_mat_q_static_bse_gemm, dimen_ri, homo, virtual)
Multiplies B-matrix (RI-3c-Integrals) with W (screening) to obtain \bar{B}.
Definition bse_util.F:67
subroutine, public print_bse_start_flag(bse_tda, bse_abba, unit_nr)
...
Definition bse_util.F:788
subroutine, public fm_write_thresh(fm, thresh, header, unit_nr, abs_vals)
Debug function to write elements of a full matrix to file, if they are larger than a given threshold.
Definition bse_util.F:722
subroutine, public comp_eigvec_coeff_bse(fm_work, eig_vals, beta, gamma, do_transpose)
Routine for computing the coefficients of the eigenvectors of the BSE matrix from a multiplication wi...
Definition bse_util.F:865
subroutine, public fm_general_add_bse(fm_out, fm_in, beta, nrow_secidx_in, ncol_secidx_in, nrow_secidx_out, ncol_secidx_out, unit_nr, reordering, mp2_env)
Adds and reorders full matrices with a combined index structure, e.g. adding W_ij,...
Definition bse_util.F:151
subroutine, public sort_excitations(idx_prim, idx_sec, eigvec_entries)
...
Definition bse_util.F:930
subroutine, public deallocate_matrices_bse(fm_mat_s_bar_ia_bse, fm_mat_s_bar_ij_bse, fm_mat_s_trunc, fm_mat_s_ij_trunc, fm_mat_s_ab_trunc, fm_mat_q_static_bse, fm_mat_q_static_bse_gemm)
...
Definition bse_util.F:833
methods related to the blacs parallel environment
basic linear algebra operations for full matrices
subroutine, public cp_fm_upper_to_full(matrix, work)
given an upper triangular matrix computes the corresponding full matrix
various cholesky decomposition related routines
subroutine, public cp_fm_cholesky_invert(matrix, n, info_out)
used to replace the cholesky decomposition by the inverse
subroutine, public cp_fm_cholesky_decompose(matrix, n, info_out)
used to replace a symmetric positive def. matrix M with its cholesky decomposition U: M = U^T * U,...
represent the structure of a full matrix
subroutine, public cp_fm_struct_create(fmstruct, para_env, context, nrow_global, ncol_global, nrow_block, ncol_block, descriptor, first_p_pos, local_leading_dimension, template_fmstruct, square_blocks, force_block)
allocates and initializes a full matrix structure
subroutine, public cp_fm_struct_release(fmstruct)
releases a full matrix structure
represent a full matrix distributed on many processors
Definition cp_fm_types.F:15
subroutine, public cp_fm_get_info(matrix, name, nrow_global, ncol_global, nrow_block, ncol_block, nrow_local, ncol_local, row_indices, col_indices, local_data, context, nrow_locals, ncol_locals, matrix_struct, para_env)
returns all kind of information about the full matrix
integer function, public cp_fm_indxg2l(indxglob, nb, iproc, isrcproc, nprocs)
wrapper to scalapack function INDXG2L that computes the local index of a distributed matrix entry poi...
subroutine, public cp_fm_to_fm_submat_general(source, destination, nrows, ncols, s_firstrow, s_firstcol, d_firstrow, d_firstcol, global_context)
General copy of a submatrix of fm matrix to a submatrix of another fm matrix. The two matrices can ha...
subroutine, public cp_fm_set_all(matrix, alpha, beta)
set all elements of a matrix to the same value, and optionally the diagonal to a different one
integer function, public cp_fm_indxg2p(indxglob, nb, iproc, isrcproc, nprocs)
wrapper to scalapack function INDXG2P that computes the process coordinate which possesses the entry ...
subroutine, public cp_fm_create(matrix, matrix_struct, name, use_sp)
creates a new full matrix with the given structure
Calculation of the incomplete Gamma function F_n(t) for multi-center integrals over Cartesian Gaussia...
Definition gamma.F:15
Defines the basic variable types.
Definition kinds.F:23
integer, parameter, public dp
Definition kinds.F:34
Interface to the message passing library MPI.
Types needed for MP2 calculations.
Definition mp2_types.F:14
basic linear algebra operations for full matrixes
Definition of physical constants:
Definition physcon.F:68
real(kind=dp), parameter, public evolt
Definition physcon.F:183
Auxiliary routines necessary to redistribute an fm_matrix from a given blacs_env to another.
subroutine, public communicate_buffer(para_env, num_entries_rec, num_entries_send, buffer_rec, buffer_send, req_array, do_indx, do_msg)
...
All kind of helpful little routines.
Definition util.F:14
represent a blacs multidimensional parallel environment (for the mpi corrispective see cp_paratypes/m...
keeps the information about the structure of a full matrix
represent a full matrix
stores all the informations relevant to an mpi environment