(git:0de0cc2)
xas_tdp_correction.F
Go to the documentation of this file.
1 !--------------------------------------------------------------------------------------------------!
2 ! CP2K: A general program to perform molecular dynamics simulations !
3 ! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4 ! !
5 ! SPDX-License-Identifier: GPL-2.0-or-later !
6 !--------------------------------------------------------------------------------------------------!
7 
8 ! **************************************************************************************************
9 !> \brief Second order perturbation correction to XAS_TDP spectra (i.e. shift)
10 !> \author A. Bussy (01.2020)
11 ! **************************************************************************************************
12 
14  USE admm_types, ONLY: admm_type
16  USE bibliography, ONLY: bussy2021b,&
17  shigeta2001,&
18  cite_reference
19  USE cp_array_utils, ONLY: cp_1d_i_p_type,&
20  cp_1d_r_p_type
21  USE cp_blacs_env, ONLY: cp_blacs_env_type
22  USE cp_cfm_types, ONLY: cp_cfm_create,&
25  cp_cfm_type,&
27  USE cp_control_types, ONLY: dft_control_type
33  cp_fm_struct_p_type,&
35  cp_fm_struct_type
36  USE cp_fm_types, ONLY: cp_fm_create,&
39  cp_fm_release,&
40  cp_fm_to_fm,&
42  cp_fm_type
44  USE dbcsr_api, ONLY: &
45  dbcsr_copy, dbcsr_create, dbcsr_distribution_get, dbcsr_distribution_new, &
46  dbcsr_distribution_release, dbcsr_distribution_type, dbcsr_get_info, dbcsr_p_type, &
47  dbcsr_release, dbcsr_type
48  USE dbt_api, ONLY: &
49  dbt_contract, dbt_copy, dbt_copy_matrix_to_tensor, dbt_create, dbt_default_distvec, &
50  dbt_destroy, dbt_distribution_destroy, dbt_distribution_new, dbt_distribution_type, &
51  dbt_finalize, dbt_get_block, dbt_get_info, dbt_iterator_blocks_left, &
52  dbt_iterator_next_block, dbt_iterator_start, dbt_iterator_stop, dbt_iterator_type, &
53  dbt_pgrid_create, dbt_pgrid_destroy, dbt_pgrid_type, dbt_put_block, dbt_type
60  section_vals_type
61  USE kinds, ONLY: dp
62  USE machine, ONLY: m_flush
63  USE mathlib, ONLY: complex_diag
64  USE message_passing, ONLY: mp_para_env_type
65  USE parallel_gemm_api, ONLY: parallel_gemm
66  USE physcon, ONLY: evolt
67  USE qs_environment_types, ONLY: get_qs_env,&
68  qs_environment_type
70  USE qs_mo_types, ONLY: deallocate_mo_set,&
72  get_mo_set,&
73  mo_set_type,&
75  USE util, ONLY: get_limit
78  USE xas_tdp_types, ONLY: donor_state_type,&
79  xas_tdp_control_type,&
80  xas_tdp_env_type
81 
82 !$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num
83 #include "./base/base_uses.f90"
84 
85  IMPLICIT NONE
86  PRIVATE
87 
88  CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'xas_tdp_correction'
89 
90  PUBLIC :: gw2x_shift, get_soc_splitting
91 
92 CONTAINS
93 
94 ! **************************************************************************************************
95 !> \brief Computes the ionization potential using the GW2X method of Shigeta et. al. The result cam
96 !> be used for XAS correction (shift) or XPS directly.
97 !> \param donor_state ...
98 !> \param xas_tdp_env ...
99 !> \param xas_tdp_control ...
100 !> \param qs_env ...
101 ! **************************************************************************************************
102  SUBROUTINE gw2x_shift(donor_state, xas_tdp_env, xas_tdp_control, qs_env)
103 
104  TYPE(donor_state_type), POINTER :: donor_state
105  TYPE(xas_tdp_env_type), POINTER :: xas_tdp_env
106  TYPE(xas_tdp_control_type), POINTER :: xas_tdp_control
107  TYPE(qs_environment_type), POINTER :: qs_env
108 
109  CHARACTER(len=*), PARAMETER :: routinen = 'GW2X_shift'
110 
111  INTEGER :: ex_idx, exat, first_domo(2), handle, i, ido_mo, iloc, ilocat, ispin, jspin, &
112  locat, nao, natom, ndo_mo, nhomo(2), nlumo(2), nonloc, nspins, start_sgf
113  INTEGER, DIMENSION(:), POINTER :: nsgf_blk
114  LOGICAL :: pseudo_canonical
115  REAL(dp) :: og_hfx_frac
116  REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: contract_coeffs_backup
117  TYPE(admm_type), POINTER :: admm_env
118  TYPE(cp_1d_r_p_type), ALLOCATABLE, DIMENSION(:) :: homo_evals, lumo_evals
119  TYPE(cp_blacs_env_type), POINTER :: blacs_env
120  TYPE(cp_fm_struct_p_type), ALLOCATABLE, &
121  DIMENSION(:) :: all_struct, homo_struct, lumo_struct
122  TYPE(cp_fm_struct_type), POINTER :: hoho_struct, lulu_struct
123  TYPE(cp_fm_type) :: hoho_fock, hoho_work, homo_work, &
124  lulu_fock, lulu_work, lumo_work
125  TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:) :: all_coeffs, homo_coeffs, lumo_coeffs
126  TYPE(cp_fm_type), POINTER :: mo_coeff
127  TYPE(dbcsr_p_type), DIMENSION(:), POINTER :: dbcsr_work, fock_matrix, matrix_ks
128  TYPE(dbt_type), ALLOCATABLE, DIMENSION(:) :: ja_x, oi_y
129  TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :) :: mo_template
130  TYPE(dft_control_type), POINTER :: dft_control
131  TYPE(mo_set_type), DIMENSION(:), POINTER :: mos
132  TYPE(mp_para_env_type), POINTER :: para_env
133  TYPE(section_vals_type), POINTER :: xc_fun_empty, xc_fun_original, xc_section
134 
135  NULLIFY (xc_fun_empty, xc_fun_original, xc_section, mos, dft_control, dbcsr_work, &
136  fock_matrix, matrix_ks, para_env, mo_coeff, blacs_env, nsgf_blk)
137 
138  CALL cite_reference(shigeta2001)
139  CALL cite_reference(bussy2021b)
140 
141  CALL timeset(routinen, handle)
142 
143  !The GW2X correction we want to compute goes like this, where omega is the corrected epsilon_I:
144  !omega = eps_I + 0.5 * sum_ajk |<Ia||jk>|^2/(omega + eps_a - eps_j - eps_k)
145  ! + 0.5 * sum_jab |<Ij||ab>|^2/(omega + eps_j - eps_a - eps_b)
146  ! j,k denote occupied spin-orbitals and a,b denote virtual spin orbitals
147 
148  !The strategy is the following (we assume restricted closed-shell):
149  !1) Get the LUMOs from xas_tdp_env
150  !2) Get the HOMOs from qs_env
151  !3) Compute or fetch the generalize Fock matric
152  !4) Diagonalize it in the subspace of HOMOs and LUMOs (or just take diagonal matrix elements)
153  !5) Build the full HOMO-LUMO basis that we will use and compute eigenvalues
154  !6) Iterate over GW2X steps to compute the self energy
155 
156  !We implement 2 approaches => diagonal elements of Fock matrix with original MOs and
157  !pseudo-canonical MOs
158  pseudo_canonical = xas_tdp_control%pseudo_canonical
159 
160  !Get donor state info
161  ndo_mo = donor_state%ndo_mo
162  nspins = 1; IF (xas_tdp_control%do_uks .OR. xas_tdp_control%do_roks) nspins = 2
163 
164  !1) Get the LUMO coefficients from the xas_tdp_env, that have been precomputed
165 
166  CALL get_qs_env(qs_env, matrix_ks=matrix_ks, mos=mos, para_env=para_env, &
167  blacs_env=blacs_env, natom=natom)
168 
169  ALLOCATE (lumo_struct(nspins), lumo_coeffs(nspins))
170 
171  DO ispin = 1, nspins
172  CALL get_mo_set(mos(ispin), homo=nhomo(ispin), nao=nao)
173  nlumo(ispin) = nao - nhomo(ispin)
174 
175  CALL cp_fm_struct_create(lumo_struct(ispin)%struct, para_env=para_env, context=blacs_env, &
176  ncol_global=nlumo(ispin), nrow_global=nao)
177 
178  CALL cp_fm_create(lumo_coeffs(ispin), lumo_struct(ispin)%struct)
179  CALL cp_fm_to_fm(xas_tdp_env%lumo_evecs(ispin), lumo_coeffs(ispin))
180  END DO
181 
182  !2) get the HOMO coeffs. Reminder: keep all non-localized MOs + those localized on core atom
183  ! For this to work, it is assumed that the LOCALIZE keyword is used
184  ALLOCATE (homo_struct(nspins), homo_coeffs(nspins))
185 
186  DO ispin = 1, nspins
187  nonloc = nhomo(ispin) - xas_tdp_control%n_search
188  exat = donor_state%at_index
189  ex_idx = minloc(abs(xas_tdp_env%ex_atom_indices - exat), 1)
190  locat = count(xas_tdp_env%mos_of_ex_atoms(:, ex_idx, ispin) == 1)
191 
192  CALL cp_fm_struct_create(homo_struct(ispin)%struct, para_env=para_env, context=blacs_env, &
193  ncol_global=locat + nonloc, nrow_global=nao)
194  CALL cp_fm_create(homo_coeffs(ispin), homo_struct(ispin)%struct)
195 
196  CALL get_mo_set(mos(ispin), mo_coeff=mo_coeff)
197  CALL cp_fm_to_fm_submat(mo_coeff, homo_coeffs(ispin), nrow=nao, ncol=nonloc, s_firstrow=1, &
198  s_firstcol=xas_tdp_control%n_search + 1, t_firstrow=1, t_firstcol=locat + 1)
199 
200  !this bit is taken from xas_tdp_methods
201  ilocat = 1
202  DO iloc = 1, xas_tdp_control%n_search
203  IF (xas_tdp_env%mos_of_ex_atoms(iloc, ex_idx, ispin) == -1) cycle
204  CALL cp_fm_to_fm_submat(mo_coeff, homo_coeffs(ispin), nrow=nao, ncol=1, s_firstrow=1, &
205  s_firstcol=iloc, t_firstrow=1, t_firstcol=ilocat)
206  !keep track of donor MO index
207  IF (iloc == donor_state%mo_indices(1, ispin)) first_domo(ispin) = ilocat !first donor MO
208 
209  ilocat = ilocat + 1
210  END DO
211  nhomo(ispin) = locat + nonloc
212  END DO
213 
214  !3) Computing the generalized Fock Matrix, if not there already
215  IF (ASSOCIATED(xas_tdp_env%fock_matrix)) THEN
216  fock_matrix => xas_tdp_env%fock_matrix
217  ELSE
218  block
219  TYPE(mo_set_type), DIMENSION(:), ALLOCATABLE :: backup_mos
220 
221  ALLOCATE (xas_tdp_env%fock_matrix(nspins))
222  fock_matrix => xas_tdp_env%fock_matrix
223 
224  ! remove the xc_functionals and set HF fraction to 1
225  xc_section => section_vals_get_subs_vals(qs_env%input, "DFT%XC")
226  xc_fun_original => section_vals_get_subs_vals(xc_section, "XC_FUNCTIONAL")
227  CALL section_vals_retain(xc_fun_original)
228  CALL section_vals_create(xc_fun_empty, xc_fun_original%section)
229  CALL section_vals_set_subs_vals(xc_section, "XC_FUNCTIONAL", xc_fun_empty)
230  CALL section_vals_release(xc_fun_empty)
231  og_hfx_frac = qs_env%x_data(1, 1)%general_parameter%fraction
232  qs_env%x_data(:, :)%general_parameter%fraction = 1.0_dp
233 
234  !In case of ADMM, we need to re-create the admm XC section for the new hfx_fraction
235  !We also need to make a backup of the MOs as theiy are modified
236  CALL get_qs_env(qs_env, dft_control=dft_control, admm_env=admm_env)
237  IF (dft_control%do_admm) THEN
238  IF (ASSOCIATED(admm_env%xc_section_primary)) CALL section_vals_release(admm_env%xc_section_primary)
239  IF (ASSOCIATED(admm_env%xc_section_aux)) CALL section_vals_release(admm_env%xc_section_aux)
240  CALL create_admm_xc_section(qs_env%x_data, xc_section, admm_env)
241 
242  ALLOCATE (backup_mos(SIZE(mos)))
243  DO i = 1, SIZE(mos)
244  CALL duplicate_mo_set(backup_mos(i), mos(i))
245  END DO
246  END IF
247 
248  ALLOCATE (dbcsr_work(nspins))
249  DO ispin = 1, nspins
250  ALLOCATE (dbcsr_work(ispin)%matrix)
251  CALL dbcsr_copy(dbcsr_work(ispin)%matrix, matrix_ks(ispin)%matrix)
252  END DO
253 
254  !both spins treated internally
255  CALL qs_ks_build_kohn_sham_matrix(qs_env, calculate_forces=.false., just_energy=.false.)
256 
257  DO ispin = 1, nspins
258  ALLOCATE (fock_matrix(ispin)%matrix)
259  CALL dbcsr_copy(fock_matrix(ispin)%matrix, matrix_ks(ispin)%matrix, name="FOCK MATRIX")
260  CALL dbcsr_release(matrix_ks(ispin)%matrix)
261  CALL dbcsr_copy(matrix_ks(ispin)%matrix, dbcsr_work(ispin)%matrix)
262  END DO
263  CALL dbcsr_deallocate_matrix_set(dbcsr_work)
264 
265  !In case of ADMM, we want to correct for eigenvalues
266  IF (dft_control%do_admm) THEN
267  DO ispin = 1, nspins
268  CALL admm_correct_for_eigenvalues(ispin, admm_env, fock_matrix(ispin)%matrix)
269  END DO
270  END IF
271 
272  !restore xc and HF fraction
273  CALL section_vals_set_subs_vals(xc_section, "XC_FUNCTIONAL", xc_fun_original)
274  CALL section_vals_release(xc_fun_original)
275  qs_env%x_data(:, :)%general_parameter%fraction = og_hfx_frac
276 
277  IF (dft_control%do_admm) THEN
278  IF (ASSOCIATED(admm_env%xc_section_primary)) CALL section_vals_release(admm_env%xc_section_primary)
279  IF (ASSOCIATED(admm_env%xc_section_aux)) CALL section_vals_release(admm_env%xc_section_aux)
280  CALL create_admm_xc_section(qs_env%x_data, xc_section, admm_env)
281 
282  DO i = 1, SIZE(mos)
283  CALL reassign_allocated_mos(mos(i), backup_mos(i))
284  CALL deallocate_mo_set(backup_mos(i))
285  END DO
286  DEALLOCATE (backup_mos)
287  END IF
288  END block
289  END IF
290 
291  !4,5) Build pseudo-canonical MOs if needed + get related Fock matrix elements
292  ALLOCATE (all_struct(nspins), all_coeffs(nspins))
293  ALLOCATE (homo_evals(nspins), lumo_evals(nspins))
294  CALL dbcsr_get_info(matrix_ks(1)%matrix, row_blk_size=nsgf_blk)
295  ALLOCATE (contract_coeffs_backup(nsgf_blk(exat), nspins*ndo_mo))
296 
297  DO ispin = 1, nspins
298  CALL cp_fm_struct_create(hoho_struct, para_env=para_env, context=blacs_env, &
299  ncol_global=nhomo(ispin), nrow_global=nhomo(ispin))
300  CALL cp_fm_struct_create(lulu_struct, para_env=para_env, context=blacs_env, &
301  ncol_global=nlumo(ispin), nrow_global=nlumo(ispin))
302 
303  CALL cp_fm_create(hoho_work, hoho_struct)
304  CALL cp_fm_create(lulu_work, lulu_struct)
305  CALL cp_fm_create(homo_work, homo_struct(ispin)%struct)
306  CALL cp_fm_create(lumo_work, lumo_struct(ispin)%struct)
307 
308  IF (pseudo_canonical) THEN
309  !That is where we rotate the MOs to make them pseudo canonical
310  !The eigenvalues we get from the diagonalization
311 
312  !The Fock matrix in the HOMO subspace
313  CALL cp_fm_create(hoho_fock, hoho_struct)
314  NULLIFY (homo_evals(ispin)%array)
315  ALLOCATE (homo_evals(ispin)%array(nhomo(ispin)))
316  CALL cp_dbcsr_sm_fm_multiply(fock_matrix(ispin)%matrix, homo_coeffs(ispin), &
317  homo_work, ncol=nhomo(ispin))
318  CALL parallel_gemm('T', 'N', nhomo(ispin), nhomo(ispin), nao, 1.0_dp, homo_coeffs(ispin), &
319  homo_work, 0.0_dp, hoho_fock)
320 
321  !diagonalize and get pseudo-canonical MOs
322  CALL choose_eigv_solver(hoho_fock, hoho_work, homo_evals(ispin)%array)
323  CALL parallel_gemm('N', 'N', nao, nhomo(ispin), nhomo(ispin), 1.0_dp, homo_coeffs(ispin), &
324  hoho_work, 0.0_dp, homo_work)
325  CALL cp_fm_to_fm(homo_work, homo_coeffs(ispin))
326 
327  !overwrite the donor_state's contract coeffs with those
328  contract_coeffs_backup(:, (ispin - 1)*ndo_mo + 1:ispin*ndo_mo) = &
329  donor_state%contract_coeffs(:, (ispin - 1)*ndo_mo + 1:ispin*ndo_mo)
330  start_sgf = sum(nsgf_blk(1:exat - 1)) + 1
331  CALL cp_fm_get_submatrix(homo_coeffs(ispin), &
332  donor_state%contract_coeffs(:, (ispin - 1)*ndo_mo + 1:ispin*ndo_mo), &
333  start_row=start_sgf, start_col=first_domo(ispin), &
334  n_rows=nsgf_blk(exat), n_cols=ndo_mo)
335 
336  !do the same for the pseudo-LUMOs
337  CALL cp_fm_create(lulu_fock, lulu_struct)
338  NULLIFY (lumo_evals(ispin)%array)
339  ALLOCATE (lumo_evals(ispin)%array(nlumo(ispin)))
340  CALL cp_dbcsr_sm_fm_multiply(fock_matrix(ispin)%matrix, lumo_coeffs(ispin), &
341  lumo_work, ncol=nlumo(ispin))
342  CALL parallel_gemm('T', 'N', nlumo(ispin), nlumo(ispin), nao, 1.0_dp, lumo_coeffs(ispin), &
343  lumo_work, 0.0_dp, lulu_fock)
344 
345  !diagonalize and get pseudo-canonical MOs
346  CALL choose_eigv_solver(lulu_fock, lulu_work, lumo_evals(ispin)%array)
347  CALL parallel_gemm('N', 'N', nao, nlumo(ispin), nlumo(ispin), 1.0_dp, lumo_coeffs(ispin), &
348  lulu_work, 0.0_dp, lumo_work)
349  CALL cp_fm_to_fm(lumo_work, lumo_coeffs(ispin))
350 
351  CALL cp_fm_release(lulu_fock)
352  CALL cp_fm_release(hoho_fock)
353 
354  ELSE !using the generalized Fock matrix diagonal elements
355 
356  !Compute their Fock matrix diagonal
357  ALLOCATE (homo_evals(ispin)%array(nhomo(ispin)))
358  CALL cp_dbcsr_sm_fm_multiply(fock_matrix(ispin)%matrix, homo_coeffs(ispin), &
359  homo_work, ncol=nhomo(ispin))
360  CALL parallel_gemm('T', 'N', nhomo(ispin), nhomo(ispin), nao, 1.0_dp, homo_coeffs(ispin), &
361  homo_work, 0.0_dp, hoho_work)
362  CALL cp_fm_get_diag(hoho_work, homo_evals(ispin)%array)
363 
364  ALLOCATE (lumo_evals(ispin)%array(nlumo(ispin)))
365  CALL cp_dbcsr_sm_fm_multiply(fock_matrix(ispin)%matrix, lumo_coeffs(ispin), &
366  lumo_work, ncol=nlumo(ispin))
367  CALL parallel_gemm('T', 'N', nlumo(ispin), nlumo(ispin), nao, 1.0_dp, lumo_coeffs(ispin), &
368  lumo_work, 0.0_dp, lulu_work)
369  CALL cp_fm_get_diag(lulu_work, lumo_evals(ispin)%array)
370 
371  END IF
372  CALL cp_fm_release(homo_work)
373  CALL cp_fm_release(hoho_work)
374  CALL cp_fm_struct_release(hoho_struct)
375  CALL cp_fm_release(lumo_work)
376  CALL cp_fm_release(lulu_work)
377  CALL cp_fm_struct_release(lulu_struct)
378 
379  !Put back homo and lumo coeffs together, to fit tensor structure
380  CALL cp_fm_struct_create(all_struct(ispin)%struct, para_env=para_env, context=blacs_env, &
381  ncol_global=nhomo(ispin) + nlumo(ispin), nrow_global=nao)
382  CALL cp_fm_create(all_coeffs(ispin), all_struct(ispin)%struct)
383  CALL cp_fm_to_fm(homo_coeffs(ispin), all_coeffs(ispin), ncol=nhomo(ispin), &
384  source_start=1, target_start=1)
385  CALL cp_fm_to_fm(lumo_coeffs(ispin), all_coeffs(ispin), ncol=nlumo(ispin), &
386  source_start=1, target_start=nhomo(ispin) + 1)
387 
388  END DO !ispin
389 
390  !get semi-contracted tensor (AOs to MOs, keep RI uncontracted)
391  CALL contract_aos_to_mos(ja_x, oi_y, mo_template, all_coeffs, nhomo, nlumo, &
392  donor_state, xas_tdp_env, xas_tdp_control, qs_env)
393 
394  !intermediate clean-up
395  DO ispin = 1, nspins
396  CALL cp_fm_release(all_coeffs(ispin))
397  CALL cp_fm_release(homo_coeffs(ispin))
398  CALL cp_fm_release(lumo_coeffs(ispin))
399  CALL cp_fm_struct_release(all_struct(ispin)%struct)
400  CALL cp_fm_struct_release(lumo_struct(ispin)%struct)
401  CALL cp_fm_struct_release(homo_struct(ispin)%struct)
402  END DO
403 
404  !6) GW2X iterations
405 
406  IF (nspins == 1) THEN
407  !restricted-closed shell: only alpha spin
408  CALL gw2x_rcs_iterations(first_domo(1), ja_x(1), oi_y, mo_template(1, 1), homo_evals(1)%array, &
409  lumo_evals(1)%array, donor_state, xas_tdp_control, qs_env)
410  ELSE
411  !open-shell, need both spins
412  CALL gw2x_os_iterations(first_domo, ja_x, oi_y, mo_template, homo_evals, lumo_evals, &
413  donor_state, xas_tdp_control, qs_env)
414  END IF
415 
416  !restore proper contract_coeffs
417  IF (pseudo_canonical) THEN
418  donor_state%contract_coeffs(:, :) = contract_coeffs_backup(:, :)
419  END IF
420 
421  !Final clean-up
422  DO ido_mo = 1, nspins*ndo_mo
423  CALL dbt_destroy(oi_y(ido_mo))
424  END DO
425  DO ispin = 1, nspins
426  CALL dbt_destroy(ja_x(ispin))
427  DEALLOCATE (homo_evals(ispin)%array)
428  DEALLOCATE (lumo_evals(ispin)%array)
429  DO jspin = 1, nspins
430  CALL dbt_destroy(mo_template(ispin, jspin))
431  END DO
432  END DO
433  DEALLOCATE (oi_y, homo_evals, lumo_evals)
434 
435  CALL timestop(handle)
436 
437  END SUBROUTINE gw2x_shift
438 
439 ! **************************************************************************************************
440 !> \brief Preforms the GW2X iterations in the restricted-closed shell formalism according to the
441 !> Newton-Raphson method
442 !> \param first_domo index of the first core donor MO to consider
443 !> \param ja_X semi-contracted tensor with j: occupied MO, a: virtual MO, X: RI basis element
444 !> \param oI_Y semi-contracted tensors with o: all MOs, I donor core MO, Y: RI basis element
445 !> \param mo_template tensor template for fully MO contracted tensor
446 !> \param homo_evals ...
447 !> \param lumo_evals ...
448 !> \param donor_state ...
449 !> \param xas_tdp_control ...
450 !> \param qs_env ...
451 ! **************************************************************************************************
452  SUBROUTINE gw2x_rcs_iterations(first_domo, ja_X, oI_Y, mo_template, homo_evals, lumo_evals, &
453  donor_state, xas_tdp_control, qs_env)
454 
455  INTEGER, INTENT(IN) :: first_domo
456  TYPE(dbt_type), INTENT(inout) :: ja_x
457  TYPE(dbt_type), DIMENSION(:), INTENT(inout) :: oi_y
458  TYPE(dbt_type), INTENT(inout) :: mo_template
459  REAL(dp), DIMENSION(:), INTENT(IN) :: homo_evals, lumo_evals
460  TYPE(donor_state_type), POINTER :: donor_state
461  TYPE(xas_tdp_control_type), POINTER :: xas_tdp_control
462  TYPE(qs_environment_type), POINTER :: qs_env
463 
464  CHARACTER(len=*), PARAMETER :: routinen = 'GW2X_rcs_iterations'
465 
466  INTEGER :: batch_size, bounds_1d(2), bounds_2d(2, 2), handle, i, ibatch, ido_mo, iloop, &
467  max_iter, nbatch_occ, nbatch_virt, nblk_occ, nblk_virt, nblks(3), ndo_mo, nhomo, nlumo, &
468  occ_bo(2), output_unit, tmp_sum, virt_bo(2)
469  INTEGER, ALLOCATABLE, DIMENSION(:) :: mo_blk_size
470  REAL(dp) :: c_os, c_ss, dg, diff, ds1, ds2, eps_i, &
471  eps_iter, g, omega_k, parts(4), s1, s2
472  TYPE(dbt_type) :: aj_ib, aj_ib_diff, aj_x, ja_ik, &
473  ja_ik_diff
474  TYPE(mp_para_env_type), POINTER :: para_env
475 
476  CALL timeset(routinen, handle)
477 
478  eps_iter = xas_tdp_control%gw2x_eps
479  max_iter = xas_tdp_control%max_gw2x_iter
480  c_os = xas_tdp_control%c_os
481  c_ss = xas_tdp_control%c_ss
482  batch_size = xas_tdp_control%batch_size
483 
484  ndo_mo = donor_state%ndo_mo
485  output_unit = cp_logger_get_default_io_unit()
486 
487  nhomo = SIZE(homo_evals)
488  nlumo = SIZE(lumo_evals)
489 
490  CALL get_qs_env(qs_env, para_env=para_env)
491 
492  !We use the Newton-Raphson method to find the zero of the function:
493  !g(omega) = eps_I - omega + mp2 terms, dg(omega) = -1 + d/d_omega (mp2 terms)
494  !We simply compute at each iteration: omega_k+1 = omega_k - g(omega_k)/dg(omega_k)
495 
496  !need transposed tensor of (ja|X) for optimal contraction scheme (s.t. (aj|X) block is on same
497  !processor as (ja|X))
498  CALL dbt_create(ja_x, aj_x)
499  CALL dbt_copy(ja_x, aj_x, order=[2, 1, 3])
500 
501  !split the MO blocks into batches for memory friendly batched contraction
502  !huge dense tensors never need to be stored
503  CALL dbt_get_info(ja_x, nblks_total=nblks)
504  ALLOCATE (mo_blk_size(nblks(1)))
505  CALL dbt_get_info(ja_x, blk_size_1=mo_blk_size)
506 
507  tmp_sum = 0
508  DO i = 1, nblks(1)
509  tmp_sum = tmp_sum + mo_blk_size(i)
510  IF (tmp_sum == nhomo) THEN
511  nblk_occ = i
512  nblk_virt = nblks(1) - i
513  EXIT
514  END IF
515  END DO
516  nbatch_occ = max(1, nblk_occ/batch_size)
517  nbatch_virt = max(1, nblk_virt/batch_size)
518 
519  !Loop over donor_states
520  DO ido_mo = 1, ndo_mo
521  IF (output_unit > 0) THEN
522  WRITE (unit=output_unit, fmt="(/,T5,A,I2,A,I4,A,/,T5,A)") &
523  "- GW2X correction for donor MO with spin ", 1, &
524  " and MO index ", donor_state%mo_indices(ido_mo, 1), ":", &
525  " iteration convergence (eV)"
526  CALL m_flush(output_unit)
527  END IF
528 
529  !starting values
530  eps_i = homo_evals(first_domo + ido_mo - 1)
531  omega_k = eps_i
532  iloop = 0
533  diff = 2.0_dp*eps_iter
534 
535  DO WHILE (abs(diff) > eps_iter)
536  iloop = iloop + 1
537 
538  !Compute the mp2 terms and their first derivative
539  parts = 0.0_dp
540 
541  !We do batched contraction for (ja|Ik) and (ja|Ib) to never have to carry the full tensor
542  DO ibatch = 1, nbatch_occ
543 
544  occ_bo = get_limit(nblk_occ, nbatch_occ, ibatch - 1)
545  bounds_1d = [sum(mo_blk_size(1:occ_bo(1) - 1)) + 1, sum(mo_blk_size(1:occ_bo(2)))]
546 
547  CALL dbt_create(mo_template, ja_ik)
548  CALL dbt_contract(alpha=1.0_dp, tensor_1=ja_x, tensor_2=oi_y(ido_mo), &
549  beta=0.0_dp, tensor_3=ja_ik, contract_1=[3], &
550  notcontract_1=[1, 2], contract_2=[2], notcontract_2=[1], &
551  map_1=[1, 2], map_2=[3], bounds_3=bounds_1d)
552 
553  !opposite-spin contribution
554  CALL calc_os_oov_contrib(parts(1), parts(2), ja_ik, homo_evals, lumo_evals, homo_evals, &
555  omega_k, c_os, nhomo)
556 
557  bounds_2d(:, 2) = bounds_1d
558  bounds_2d(1, 1) = nhomo + 1
559  bounds_2d(2, 1) = nhomo + nlumo
560 
561  !same-spin contribution. Contraction only neede if c_ss != 0
562  !directly compute the difference (ja|Ik) - (ka|Ij)
563  IF (abs(c_ss) > epsilon(1.0_dp)) THEN
564 
565  CALL dbt_create(ja_ik, ja_ik_diff, map1_2d=[1], map2_2d=[2, 3])
566  CALL dbt_copy(ja_ik, ja_ik_diff, move_data=.true.)
567 
568  CALL dbt_contract(alpha=-1.0_dp, tensor_1=oi_y(ido_mo), tensor_2=aj_x, &
569  beta=1.0_dp, tensor_3=ja_ik_diff, contract_1=[2], &
570  notcontract_1=[1], contract_2=[3], notcontract_2=[1, 2], &
571  map_1=[1], map_2=[2, 3], bounds_2=[1, nhomo], bounds_3=bounds_2d)
572 
573  CALL calc_ss_oov_contrib(parts(1), parts(2), ja_ik_diff, homo_evals, lumo_evals, omega_k, c_ss)
574 
575  CALL dbt_destroy(ja_ik_diff)
576  END IF !c_ss != 0
577 
578  CALL dbt_destroy(ja_ik)
579  END DO
580 
581  DO ibatch = 1, nbatch_virt
582 
583  virt_bo = get_limit(nblk_virt, nbatch_virt, ibatch - 1)
584  bounds_1d = [sum(mo_blk_size(1:nblk_occ + virt_bo(1) - 1)) + 1, &
585  sum(mo_blk_size(1:nblk_occ + virt_bo(2)))]
586 
587  CALL dbt_create(mo_template, aj_ib)
588  CALL dbt_contract(alpha=1.0_dp, tensor_1=aj_x, tensor_2=oi_y(ido_mo), &
589  beta=0.0_dp, tensor_3=aj_ib, contract_1=[3], &
590  notcontract_1=[1, 2], contract_2=[2], notcontract_2=[1], &
591  map_1=[1, 2], map_2=[3], bounds_3=bounds_1d)
592 
593  !opposite-spin contribution
594  CALL calc_os_ovv_contrib(parts(3), parts(4), aj_ib, lumo_evals, homo_evals, lumo_evals, &
595  omega_k, c_os, nhomo, nhomo)
596 
597  !same-spin contribution, only if c_ss is not 0
598  !directly compute the difference (aj|Ib) - (bj|Ia)
599  IF (abs(c_ss) > epsilon(1.0_dp)) THEN
600  bounds_2d(1, 1) = 1
601  bounds_2d(2, 1) = nhomo
602  bounds_2d(:, 2) = bounds_1d
603 
604  CALL dbt_create(aj_ib, aj_ib_diff, map1_2d=[1], map2_2d=[2, 3])
605  CALL dbt_copy(aj_ib, aj_ib_diff, move_data=.true.)
606 
607  CALL dbt_contract(alpha=-1.0_dp, tensor_1=oi_y(ido_mo), tensor_2=ja_x, &
608  beta=1.0_dp, tensor_3=aj_ib_diff, contract_1=[2], &
609  notcontract_1=[1], contract_2=[3], notcontract_2=[1, 2], &
610  map_1=[1], map_2=[2, 3], &
611  bounds_2=[nhomo + 1, nhomo + nlumo], bounds_3=bounds_2d)
612 
613  CALL calc_ss_ovv_contrib(parts(3), parts(4), aj_ib_diff, homo_evals, lumo_evals, omega_k, c_ss)
614 
615  CALL dbt_destroy(aj_ib_diff)
616  END IF ! c_ss not 0
617 
618  CALL dbt_destroy(aj_ib)
619  END DO
620 
621  CALL para_env%sum(parts)
622  s1 = parts(1); ds1 = parts(2)
623  s2 = parts(3); ds2 = parts(4)
624 
625  !evaluate g and its derivative
626  g = eps_i - omega_k + s1 + s2
627  dg = -1.0_dp + ds1 + ds2
628 
629  !compute the diff to the new step
630  diff = -g/dg
631 
632  !and the new omega
633  omega_k = omega_k + diff
634  diff = diff*evolt
635 
636  IF (output_unit > 0) THEN
637  WRITE (unit=output_unit, fmt="(T21,I18,F32.6)") &
638  iloop, diff
639  CALL m_flush(output_unit)
640  END IF
641 
642  IF (iloop > max_iter) THEN
643  cpwarn("GW2X iteration not converged.")
644  EXIT
645  END IF
646  END DO !while loop on eps_iter
647 
648  !compute the shift and update donor_state
649  donor_state%gw2x_evals(ido_mo, 1) = omega_k
650 
651  IF (output_unit > 0) THEN
652  WRITE (unit=output_unit, fmt="(/T7,A,F11.6,/,T5,A,F11.6)") &
653  "Final GW2X shift for this donor MO (eV):", &
654  (donor_state%energy_evals(ido_mo, 1) - omega_k)*evolt
655  END IF
656 
657  END DO !ido_mo
658 
659  CALL dbt_destroy(aj_x)
660 
661  CALL timestop(handle)
662 
663  END SUBROUTINE gw2x_rcs_iterations
664 
665 ! **************************************************************************************************
666 !> \brief Preforms the GW2X iterations in the open-shell shell formalism according to the
667 !> Newton-Raphson method
668 !> \param first_domo index of the first core donor MO to consider, for each spin
669 !> \param ja_X semi-contracted tensors with j: occupied MO, a: virtual MO, X: RI basis element
670 !> \param oI_Y semi-contracted tensors with o: all MOs, I donor core MO, Y: RI basis element
671 !> \param mo_template tensor template for fully MO contracted tensor, for each spin combination
672 !> \param homo_evals ...
673 !> \param lumo_evals ...
674 !> \param donor_state ...
675 !> \param xas_tdp_control ...
676 !> \param qs_env ...
677 ! **************************************************************************************************
678  SUBROUTINE gw2x_os_iterations(first_domo, ja_X, oI_Y, mo_template, homo_evals, lumo_evals, &
679  donor_state, xas_tdp_control, qs_env)
680 
681  INTEGER, INTENT(IN) :: first_domo(2)
682  TYPE(dbt_type), DIMENSION(:), INTENT(inout) :: ja_x, oi_y
683  TYPE(dbt_type), DIMENSION(:, :), INTENT(inout) :: mo_template
684  TYPE(cp_1d_r_p_type), DIMENSION(:), INTENT(in) :: homo_evals, lumo_evals
685  TYPE(donor_state_type), POINTER :: donor_state
686  TYPE(xas_tdp_control_type), POINTER :: xas_tdp_control
687  TYPE(qs_environment_type), POINTER :: qs_env
688 
689  CHARACTER(len=*), PARAMETER :: routinen = 'GW2X_os_iterations'
690 
691  INTEGER :: batch_size, bounds_1d(2), bounds_2d(2, 2), handle, i, ibatch, ido_mo, iloop, &
692  ispin, max_iter, nbatch_occ, nbatch_virt, nblk_occ, nblk_virt, nblks(3), ndo_mo, &
693  nhomo(2), nlumo(2), nspins, occ_bo(2), other_spin, output_unit, tmp_sum, virt_bo(2)
694  INTEGER, ALLOCATABLE, DIMENSION(:) :: mo_blk_size
695  REAL(dp) :: c_os, c_ss, dg, diff, ds1, ds2, eps_i, &
696  eps_iter, g, omega_k, parts(4), s1, s2
697  TYPE(dbt_type) :: aj_ib, aj_ib_diff, ja_ik, ja_ik_diff
698  TYPE(dbt_type), ALLOCATABLE, DIMENSION(:) :: aj_x
699  TYPE(mp_para_env_type), POINTER :: para_env
700 
701  CALL timeset(routinen, handle)
702 
703  eps_iter = xas_tdp_control%gw2x_eps
704  max_iter = xas_tdp_control%max_gw2x_iter
705  c_os = xas_tdp_control%c_os
706  c_ss = xas_tdp_control%c_ss
707  batch_size = xas_tdp_control%batch_size
708 
709  nspins = 2
710  ndo_mo = donor_state%ndo_mo
711  output_unit = cp_logger_get_default_io_unit()
712 
713  DO ispin = 1, nspins
714  nhomo(ispin) = SIZE(homo_evals(ispin)%array)
715  nlumo(ispin) = SIZE(lumo_evals(ispin)%array)
716  END DO
717 
718  CALL get_qs_env(qs_env, para_env=para_env)
719 
720  !We use the Newton-Raphson method to find the zero of the function:
721  !g(omega) = eps_I - omega + mp2 terms, dg(omega) = -1 + d/d_omega (mp2 terms)
722  !We simply compute at each iteration: omega_k+1 = omega_k - g(omega_k)/dg(omega_k)
723 
724  ALLOCATE (aj_x(2))
725  DO ispin = 1, nspins
726 
727  !need transposed tensor of (ja|X) for optimal contraction scheme,
728  !s.t. (aj|X) block is on same processor as (ja|X)) and differences can be taken
729  CALL dbt_create(ja_x(ispin), aj_x(ispin))
730  CALL dbt_copy(ja_x(ispin), aj_x(ispin), order=[2, 1, 3])
731 
732  END DO ! ispin
733  DO ispin = 1, nspins
734 
735  other_spin = 3 - ispin
736 
737  !split the MO blocks into batches for memory friendly batched contraction
738  !huge dense tensors never need to be stored. Split MOs for the current spin
739  CALL dbt_get_info(ja_x(ispin), nblks_total=nblks)
740  ALLOCATE (mo_blk_size(nblks(1)))
741  CALL dbt_get_info(ja_x(ispin), blk_size_1=mo_blk_size)
742 
743  tmp_sum = 0
744  DO i = 1, nblks(1)
745  tmp_sum = tmp_sum + mo_blk_size(i)
746  IF (tmp_sum == nhomo(ispin)) THEN
747  nblk_occ = i
748  nblk_virt = nblks(1) - i
749  EXIT
750  END IF
751  END DO
752  nbatch_occ = max(1, nblk_occ/batch_size)
753  nbatch_virt = max(1, nblk_virt/batch_size)
754 
755  !Loop over donor_states of the current spin
756  DO ido_mo = 1, ndo_mo
757  IF (output_unit > 0) THEN
758  WRITE (unit=output_unit, fmt="(/,T5,A,I2,A,I4,A,/,T5,A)") &
759  "- GW2X correction for donor MO with spin ", ispin, &
760  " and MO index ", donor_state%mo_indices(ido_mo, ispin), ":", &
761  " iteration convergence (eV)"
762  CALL m_flush(output_unit)
763  END IF
764 
765  !starting values
766  eps_i = homo_evals(ispin)%array(first_domo(ispin) + ido_mo - 1)
767  omega_k = eps_i
768  iloop = 0
769  diff = 2.0_dp*eps_iter
770 
771  DO WHILE (abs(diff) > eps_iter)
772  iloop = iloop + 1
773 
774  !Compute the mp2 terms and their first derivative
775  parts = 0.0_dp
776 
777  !We do batched contraction for (ja|Ik) and (ja|Ib) to never have to carry the full tensor
778  DO ibatch = 1, nbatch_occ
779 
780  !opposite-spin contribution, i.e. (j_beta a_beta| I_alpha k_alpha) and vice-versa
781  !do the batching along k because same spin as donor MO
782  occ_bo = get_limit(nblk_occ, nbatch_occ, ibatch - 1)
783  bounds_1d = [sum(mo_blk_size(1:occ_bo(1) - 1)) + 1, sum(mo_blk_size(1:occ_bo(2)))]
784 
785  CALL dbt_create(mo_template(other_spin, ispin), ja_ik)
786  CALL dbt_contract(alpha=1.0_dp, tensor_1=ja_x(other_spin), &
787  tensor_2=oi_y((ispin - 1)*ndo_mo + ido_mo), &
788  beta=0.0_dp, tensor_3=ja_ik, contract_1=[3], &
789  notcontract_1=[1, 2], contract_2=[2], notcontract_2=[1], &
790  map_1=[1, 2], map_2=[3], bounds_3=bounds_1d)
791 
792  CALL calc_os_oov_contrib(parts(1), parts(2), ja_ik, homo_evals(other_spin)%array, &
793  lumo_evals(other_spin)%array, homo_evals(ispin)%array, &
794  omega_k, c_os, nhomo(other_spin))
795 
796  CALL dbt_destroy(ja_ik)
797 
798  !same-spin contribution, need to compute (ja|Ik) - (ka|Ij), all with the current spin
799  !skip if c_ss == 0
800  IF (abs(c_ss) > epsilon(1.0_dp)) THEN
801 
802  !same batching as opposite spin
803  CALL dbt_create(mo_template(ispin, ispin), ja_ik)
804  CALL dbt_contract(alpha=1.0_dp, tensor_1=ja_x(ispin), &
805  tensor_2=oi_y((ispin - 1)*ndo_mo + ido_mo), &
806  beta=0.0_dp, tensor_3=ja_ik, contract_1=[3], &
807  notcontract_1=[1, 2], contract_2=[2], notcontract_2=[1], &
808  map_1=[1, 2], map_2=[3], bounds_3=bounds_1d)
809 
810  bounds_2d(:, 2) = bounds_1d
811  bounds_2d(1, 1) = nhomo(ispin) + 1
812  bounds_2d(2, 1) = nhomo(ispin) + nlumo(ispin)
813 
814  !the tensor difference is directly taken here
815  CALL dbt_create(ja_ik, ja_ik_diff, map1_2d=[1], map2_2d=[2, 3])
816  CALL dbt_copy(ja_ik, ja_ik_diff, move_data=.true.)
817 
818  CALL dbt_contract(alpha=-1.0_dp, tensor_1=oi_y((ispin - 1)*ndo_mo + ido_mo), &
819  tensor_2=aj_x(ispin), beta=1.0_dp, tensor_3=ja_ik_diff, &
820  contract_1=[2], notcontract_1=[1], contract_2=[3], notcontract_2=[1, 2], &
821  map_1=[1], map_2=[2, 3], bounds_2=[1, nhomo(ispin)], bounds_3=bounds_2d)
822 
823  CALL calc_ss_oov_contrib(parts(1), parts(2), ja_ik_diff, homo_evals(ispin)%array, &
824  lumo_evals(ispin)%array, omega_k, c_ss)
825 
826  CALL dbt_destroy(ja_ik_diff)
827  CALL dbt_destroy(ja_ik)
828  END IF !c_ss !!= 0
829 
830  END DO
831 
832  DO ibatch = 1, nbatch_virt
833 
834  !opposite-spin contribution, i.e. (a_beta j_beta| I_alpha b_alpha) and vice-versa
835  !do the batching along b because same spin as donor MO
836  virt_bo = get_limit(nblk_virt, nbatch_virt, ibatch - 1)
837  bounds_1d = [sum(mo_blk_size(1:nblk_occ + virt_bo(1) - 1)) + 1, &
838  sum(mo_blk_size(1:nblk_occ + virt_bo(2)))]
839 
840  CALL dbt_create(mo_template(other_spin, ispin), aj_ib)
841  CALL dbt_contract(alpha=1.0_dp, tensor_1=aj_x(other_spin), &
842  tensor_2=oi_y((ispin - 1)*ndo_mo + ido_mo), &
843  beta=0.0_dp, tensor_3=aj_ib, contract_1=[3], &
844  notcontract_1=[1, 2], contract_2=[2], notcontract_2=[1], &
845  map_1=[1, 2], map_2=[3], bounds_3=bounds_1d)
846 
847  CALL calc_os_ovv_contrib(parts(3), parts(4), aj_ib, lumo_evals(other_spin)%array, &
848  homo_evals(other_spin)%array, lumo_evals(ispin)%array, &
849  omega_k, c_os, nhomo(other_spin), nhomo(ispin))
850 
851  CALL dbt_destroy(aj_ib)
852 
853  !same-spin contribution, need to compute (aj|Ib) - (bj|Ia), all with the current spin
854  !skip if c_ss == 0
855  IF (abs(c_ss) > epsilon(1.0_dp)) THEN
856 
857  !same batching as opposite spin
858  CALL dbt_create(mo_template(ispin, ispin), aj_ib)
859  CALL dbt_contract(alpha=1.0_dp, tensor_1=aj_x(ispin), &
860  tensor_2=oi_y((ispin - 1)*ndo_mo + ido_mo), &
861  beta=0.0_dp, tensor_3=aj_ib, contract_1=[3], &
862  notcontract_1=[1, 2], contract_2=[2], notcontract_2=[1], &
863  map_1=[1, 2], map_2=[3], bounds_3=bounds_1d)
864 
865  bounds_2d(1, 1) = 1
866  bounds_2d(2, 1) = nhomo(ispin)
867  bounds_2d(:, 2) = bounds_1d
868 
869  CALL dbt_create(aj_ib, aj_ib_diff, map1_2d=[1], map2_2d=[2, 3])
870  CALL dbt_copy(aj_ib, aj_ib_diff, move_data=.true.)
871 
872  CALL dbt_contract(alpha=-1.0_dp, tensor_1=oi_y((ispin - 1)*ndo_mo + ido_mo), &
873  tensor_2=ja_x(ispin), beta=1.0_dp, tensor_3=aj_ib_diff, &
874  contract_1=[2], notcontract_1=[1], contract_2=[3], &
875  notcontract_2=[1, 2], map_1=[1], map_2=[2, 3], &
876  bounds_2=[nhomo(ispin) + 1, nhomo(ispin) + nlumo(ispin)], &
877  bounds_3=bounds_2d)
878 
879  CALL calc_ss_ovv_contrib(parts(3), parts(4), aj_ib_diff, homo_evals(ispin)%array, &
880  lumo_evals(ispin)%array, omega_k, c_ss)
881 
882  CALL dbt_destroy(aj_ib_diff)
883  CALL dbt_destroy(aj_ib)
884  END IF ! c_ss not 0
885 
886  END DO
887 
888  CALL para_env%sum(parts)
889  s1 = parts(1); ds1 = parts(2)
890  s2 = parts(3); ds2 = parts(4)
891 
892  !evaluate g and its derivative
893  g = eps_i - omega_k + s1 + s2
894  dg = -1.0_dp + ds1 + ds2
895 
896  !compute the diff to the new step
897  diff = -g/dg
898 
899  !and the new omega
900  omega_k = omega_k + diff
901  diff = diff*evolt
902 
903  IF (output_unit > 0) THEN
904  WRITE (unit=output_unit, fmt="(T21,I18,F32.6)") &
905  iloop, diff
906  CALL m_flush(output_unit)
907  END IF
908 
909  IF (iloop > max_iter) THEN
910  cpwarn("GW2X iteration not converged.")
911  EXIT
912  END IF
913  END DO !while loop on eps_iter
914 
915  !compute the shift and update donor_state
916  donor_state%gw2x_evals(ido_mo, ispin) = omega_k
917 
918  IF (output_unit > 0) THEN
919  WRITE (unit=output_unit, fmt="(/T7,A,F11.6,/,T5,A,F11.6)") &
920  "Final GW2X shift for this donor MO (eV):", &
921  (donor_state%energy_evals(ido_mo, ispin) - omega_k)*evolt
922  END IF
923 
924  END DO !ido_mo
925 
926  DEALLOCATE (mo_blk_size)
927  END DO ! ispin
928 
929  DO ispin = 1, nspins
930  CALL dbt_destroy(aj_x(ispin))
931  END DO
932 
933  CALL timestop(handle)
934 
935  END SUBROUTINE gw2x_os_iterations
936 
937 ! **************************************************************************************************
938 !> \brief Takes the 3-center integrals from the ri_ex_3c tensor and returns a full tensor. Since
939 !> ri_ex_3c is only half filled because of symmetry, we have to add the transpose
940 !> and scale the diagonal blocks by 0.5
941 !> \param pq_X the full (desymmetrized) tensor containing the (pq|X) exchange integrals, in a new
942 !> 3d distribution and optimized block sizes
943 !> \param exat index of current excited atom
944 !> \param xas_tdp_env ...
945 !> \param qs_env ...
946 ! **************************************************************************************************
947  SUBROUTINE get_full_pqx_from_3c_ex(pq_X, exat, xas_tdp_env, qs_env)
948 
949  TYPE(dbt_type), INTENT(INOUT) :: pq_x
950  INTEGER, INTENT(IN) :: exat
951  TYPE(xas_tdp_env_type), POINTER :: xas_tdp_env
952  TYPE(qs_environment_type), POINTER :: qs_env
953 
954  INTEGER :: i, ind(3), natom, nblk_ri, nsgf_x
955  INTEGER, ALLOCATABLE, DIMENSION(:) :: orb_blk_size, proc_dist_1, proc_dist_2, &
956  proc_dist_3
957  INTEGER, DIMENSION(3) :: pdims
958  LOGICAL :: found
959  REAL(dp), ALLOCATABLE, DIMENSION(:, :, :) :: pblock
960  TYPE(dbt_distribution_type) :: t_dist
961  TYPE(dbt_iterator_type) :: iter
962  TYPE(dbt_pgrid_type) :: t_pgrid
963  TYPE(dbt_type) :: pq_x_tmp, work
964  TYPE(mp_para_env_type), POINTER :: para_env
965 
966  NULLIFY (para_env)
967 
968  !create work tensor with same 2D dist as pq_X, but only keep excited atom along RI direction
969  CALL get_qs_env(qs_env, para_env=para_env, natom=natom)
970  CALL dbt_get_info(xas_tdp_env%ri_3c_ex, pdims=pdims)
971  nsgf_x = SIZE(xas_tdp_env%ri_inv_ex, 1)
972  nblk_ri = 1
973 
974  CALL dbt_pgrid_create(para_env, pdims, t_pgrid)
975  ALLOCATE (proc_dist_1(natom), proc_dist_2(natom), orb_blk_size(natom))
976  CALL dbt_get_info(xas_tdp_env%ri_3c_ex, proc_dist_1=proc_dist_1, proc_dist_2=proc_dist_2, &
977  blk_size_1=orb_blk_size)
978  CALL dbt_distribution_new(t_dist, t_pgrid, nd_dist_1=proc_dist_1, nd_dist_2=proc_dist_2, &
979  nd_dist_3=[(0, i=1, nblk_ri)])
980 
981  CALL dbt_create(work, name="(pq|X)", dist=t_dist, map1_2d=[1], map2_2d=[2, 3], &
982  blk_size_1=orb_blk_size, blk_size_2=orb_blk_size, blk_size_3=[nsgf_x])
983  CALL dbt_distribution_destroy(t_dist)
984 
985  !dist of 3c_ex and work match, can simply copy blocks over. Diagonal with factor 0.5
986 
987 !$OMP PARALLEL DEFAULT(NONE) SHARED(xas_tdp_env,exat,work,orb_blk_size,nsgf_x) &
988 !$OMP PRIVATE(iter,ind,pblock,found)
989  CALL dbt_iterator_start(iter, xas_tdp_env%ri_3c_ex)
990  DO WHILE (dbt_iterator_blocks_left(iter))
991  CALL dbt_iterator_next_block(iter, ind)
992  CALL dbt_get_block(xas_tdp_env%ri_3c_ex, ind, pblock, found)
993 
994  IF (ind(1) == ind(2)) pblock = 0.5_dp*pblock
995  IF (ind(3) /= exat) cycle
996 
997  CALL dbt_put_block(work, [ind(1), ind(2), 1], &
998  [orb_blk_size(ind(1)), orb_blk_size(ind(2)), nsgf_x], pblock)
999 
1000  DEALLOCATE (pblock)
1001  END DO
1002  CALL dbt_iterator_stop(iter)
1003 !$OMP END PARALLEL
1004  CALL dbt_finalize(work)
1005 
1006  !create (pq|X) based on work and copy over
1007  CALL dbt_create(work, pq_x_tmp)
1008  CALL dbt_copy(work, pq_x_tmp)
1009  CALL dbt_copy(work, pq_x_tmp, order=[2, 1, 3], summation=.true., move_data=.true.)
1010 
1011  CALL dbt_destroy(work)
1012 
1013  !create the pgrid, based on the 2D dbcsr grid
1014  CALL dbt_pgrid_destroy(t_pgrid)
1015  pdims = 0
1016  CALL dbt_pgrid_create(para_env, pdims, t_pgrid, tensor_dims=[natom, natom, 1])
1017 
1018  !cyclic distribution accross all directions.
1019  ALLOCATE (proc_dist_3(nblk_ri))
1020  CALL dbt_default_distvec(natom, pdims(1), orb_blk_size, proc_dist_1)
1021  CALL dbt_default_distvec(natom, pdims(2), orb_blk_size, proc_dist_2)
1022  CALL dbt_default_distvec(nblk_ri, pdims(3), [nsgf_x], proc_dist_3)
1023  CALL dbt_distribution_new(t_dist, t_pgrid, nd_dist_1=proc_dist_1, nd_dist_2=proc_dist_2, &
1024  nd_dist_3=proc_dist_3)
1025 
1026  CALL dbt_create(pq_x, name="(pq|X)", dist=t_dist, map1_2d=[2, 3], map2_2d=[1], &
1027  blk_size_1=orb_blk_size, blk_size_2=orb_blk_size, blk_size_3=[nsgf_x])
1028  CALL dbt_copy(pq_x_tmp, pq_x, move_data=.true.)
1029 
1030  CALL dbt_distribution_destroy(t_dist)
1031  CALL dbt_pgrid_destroy(t_pgrid)
1032  CALL dbt_destroy(pq_x_tmp)
1033 
1034  END SUBROUTINE get_full_pqx_from_3c_ex
1035 
1036 ! **************************************************************************************************
1037 !> \brief Contracts (pq|X) and (rI|Y) from AOs to MOs to (ja|X) and (oI|Y) respectively, where
1038 !> j is a occupied MO, a is a virtual MO and o is a general MO
1039 !> \param ja_X partial contraction over occupied MOs j, virtual MOs a: (ja|X), for both spins (alpha-alpha or beta-beta)
1040 !> \param oI_Y partial contraction over all MOs o and donor MOs I (can be more than 1 if 2p or open-shell)
1041 !> \param ja_Io_template template to be able to build tensors after calling this routine, for each spin combination
1042 !> \param mo_coeffs ...
1043 !> \param nocc ...
1044 !> \param nvirt ...
1045 !> \param donor_state ...
1046 !> \param xas_tdp_env ...
1047 !> \param xas_tdp_control ...
1048 !> \param qs_env ...
1049 !> \note the multiplication by (X|Y)^-1 is included in the final (oI|Y) tensor. Only integrals with the
1050 !> same spin on one center are non-zero, i.e. (oI|Y) is non zero only if both o and Y have the same spin
1051 ! **************************************************************************************************
1052  SUBROUTINE contract_aos_to_mos(ja_X, oI_Y, ja_Io_template, mo_coeffs, nocc, nvirt, &
1053  donor_state, xas_tdp_env, xas_tdp_control, qs_env)
1054 
1055  TYPE(dbt_type), ALLOCATABLE, DIMENSION(:), &
1056  INTENT(INOUT) :: ja_x, oi_y
1057  TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :), &
1058  INTENT(INOUT) :: ja_io_template
1059  TYPE(cp_fm_type), DIMENSION(:), INTENT(INOUT) :: mo_coeffs
1060  INTEGER, INTENT(IN) :: nocc(2), nvirt(2)
1061  TYPE(donor_state_type), POINTER :: donor_state
1062  TYPE(xas_tdp_env_type), POINTER :: xas_tdp_env
1063  TYPE(xas_tdp_control_type), POINTER :: xas_tdp_control
1064  TYPE(qs_environment_type), POINTER :: qs_env
1065 
1066  CHARACTER(len=*), PARAMETER :: routinen = 'contract_AOs_to_MOs'
1067 
1068  INTEGER :: bo(2), handle, i, ispin, jspin, &
1069  nblk_aos, nblk_mos(2), nblk_occ(2), &
1070  nblk_pqx(3), nblk_ri, nblk_virt(2), &
1071  nspins
1072  INTEGER, DIMENSION(3) :: pdims
1073  INTEGER, DIMENSION(:), POINTER :: ao_blk_size, ao_col_dist, ao_row_dist, &
1074  mo_dist_3, ri_blk_size, ri_dist_3
1075  INTEGER, DIMENSION(:, :), POINTER :: mat_pgrid
1076  TYPE(cp_1d_i_p_type), ALLOCATABLE, DIMENSION(:) :: mo_blk_size, mo_col_dist, mo_row_dist
1077  TYPE(dbcsr_distribution_type) :: mat_dist
1078  TYPE(dbcsr_distribution_type), POINTER :: std_mat_dist
1079  TYPE(dbcsr_type) :: dbcsr_mo_coeffs
1080  TYPE(dbt_distribution_type) :: t_dist
1081  TYPE(dbt_pgrid_type) :: t_pgrid
1082  TYPE(dbt_type) :: jq_x, pq_x, t_mo_coeffs
1083  TYPE(mp_para_env_type), POINTER :: para_env
1084 
1085  NULLIFY (ao_blk_size, ao_col_dist, ao_row_dist, mo_dist_3, ri_blk_size, ri_dist_3, mat_pgrid, &
1086  para_env, std_mat_dist)
1087 
1088  CALL timeset(routinen, handle)
1089 
1090  nspins = 1; IF (xas_tdp_control%do_uks .OR. xas_tdp_control%do_roks) nspins = 2
1091 
1092  !There are 2 contractions to do for the first tensor: (pq|X) --> (jq|X) --> (ja|X)
1093  !Because memory is the main concern, we move_data everytime at the cost of extra copies
1094 
1095  !Some quantities need to be stored for both spins, because they are later combined
1096  CALL get_qs_env(qs_env, para_env=para_env)
1097  ALLOCATE (mo_blk_size(nspins), mo_row_dist(nspins), mo_col_dist(nspins))
1098  ALLOCATE (ja_x(nspins))
1099  ALLOCATE (oi_y(nspins*donor_state%ndo_mo))
1100 
1101  DO ispin = 1, nspins
1102 
1103  !First, we need a fully populated pq_X (spin-independent)
1104  CALL get_full_pqx_from_3c_ex(pq_x, donor_state%at_index, xas_tdp_env, qs_env)
1105 
1106  !Create the tensor pgrid. AOs and RI independent from spin
1107  IF (ispin == 1) THEN
1108  CALL dbt_get_info(pq_x, pdims=pdims, nblks_total=nblk_pqx)
1109  CALL dbt_pgrid_create(para_env, pdims, t_pgrid)
1110  nblk_aos = nblk_pqx(1)
1111  nblk_ri = nblk_pqx(3)
1112  END IF
1113 
1114  !Define MO block sizes, at worst, take one block per proc
1115  nblk_occ(ispin) = max(pdims(1), nocc(ispin)/16)
1116  nblk_virt(ispin) = max(pdims(2), nvirt(ispin)/16)
1117  nblk_mos(ispin) = nblk_occ(ispin) + nblk_virt(ispin)
1118  ALLOCATE (mo_blk_size(ispin)%array(nblk_mos(ispin)))
1119  DO i = 1, nblk_occ(ispin)
1120  bo = get_limit(nocc(ispin), nblk_occ(ispin), i - 1)
1121  mo_blk_size(ispin)%array(i) = bo(2) - bo(1) + 1
1122  END DO
1123  DO i = 1, nblk_virt(ispin)
1124  bo = get_limit(nvirt(ispin), nblk_virt(ispin), i - 1)
1125  mo_blk_size(ispin)%array(nblk_occ(ispin) + i) = bo(2) - bo(1) + 1
1126  END DO
1127 
1128  !Convert the fm mo_coeffs into a dbcsr matrix and then a tensor
1129  CALL get_qs_env(qs_env, dbcsr_dist=std_mat_dist)
1130  CALL dbcsr_distribution_get(std_mat_dist, pgrid=mat_pgrid)
1131  ALLOCATE (ao_blk_size(nblk_aos), ri_blk_size(nblk_ri))
1132  CALL dbt_get_info(pq_x, blk_size_1=ao_blk_size, blk_size_3=ri_blk_size)
1133 
1134  !we opt for a cyclic dist for the MOs (since they should be rather dense anyways)
1135  ALLOCATE (ao_row_dist(nblk_aos), mo_col_dist(ispin)%array(nblk_mos(ispin)))
1136  CALL dbt_default_distvec(nblk_aos, SIZE(mat_pgrid, 1), ao_blk_size, ao_row_dist)
1137  CALL dbt_default_distvec(nblk_mos(ispin), SIZE(mat_pgrid, 2), mo_blk_size(ispin)%array, &
1138  mo_col_dist(ispin)%array)
1139  CALL dbcsr_distribution_new(mat_dist, group=para_env%get_handle(), pgrid=mat_pgrid, &
1140  row_dist=ao_row_dist, col_dist=mo_col_dist(ispin)%array)
1141 
1142  CALL dbcsr_create(dbcsr_mo_coeffs, name="MO coeffs", matrix_type="N", dist=mat_dist, &
1143  row_blk_size=ao_blk_size, col_blk_size=mo_blk_size(ispin)%array)
1144  CALL copy_fm_to_dbcsr(mo_coeffs(ispin), dbcsr_mo_coeffs)
1145 
1146  CALL dbt_create(dbcsr_mo_coeffs, t_mo_coeffs)
1147  CALL dbt_copy_matrix_to_tensor(dbcsr_mo_coeffs, t_mo_coeffs)
1148 
1149  !prepare the (jq|X) tensor for the first contraction (over occupied MOs)
1150  ALLOCATE (mo_row_dist(ispin)%array(nblk_mos(ispin)), ao_col_dist(nblk_aos), ri_dist_3(nblk_ri))
1151  CALL dbt_default_distvec(nblk_mos(ispin), pdims(1), mo_blk_size(ispin)%array, mo_row_dist(ispin)%array)
1152  CALL dbt_default_distvec(nblk_aos, pdims(2), ao_blk_size, ao_col_dist)
1153  CALL dbt_default_distvec(nblk_ri, pdims(3), ri_blk_size, ri_dist_3)
1154  CALL dbt_distribution_new(t_dist, t_pgrid, nd_dist_1=mo_row_dist(ispin)%array, &
1155  nd_dist_2=ao_col_dist, nd_dist_3=ri_dist_3)
1156 
1157  CALL dbt_create(jq_x, name="(jq|X)", dist=t_dist, map1_2d=[1, 3], map2_2d=[2], &
1158  blk_size_1=mo_blk_size(ispin)%array, blk_size_2=ao_blk_size, blk_size_3=ri_blk_size)
1159  CALL dbt_distribution_destroy(t_dist)
1160 
1161  !contract (pq|X) into (jq|X)
1162  CALL dbt_contract(alpha=1.0_dp, tensor_1=pq_x, tensor_2=t_mo_coeffs, &
1163  beta=0.0_dp, tensor_3=jq_x, contract_1=[1], &
1164  notcontract_1=[2, 3], contract_2=[1], notcontract_2=[2], &
1165  map_1=[2, 3], map_2=[1], bounds_3=[1, nocc(ispin)], &!only want occupied MOs for j
1166  move_data=.true.)
1167 
1168  CALL dbt_destroy(pq_x)
1169  CALL dbt_copy_matrix_to_tensor(dbcsr_mo_coeffs, t_mo_coeffs)
1170 
1171  !prepare (ja|X) tensor for the second contraction (over virtual MOs)
1172  !only virtual-occupied bit of the first 2 indices is occupied + it should be dense
1173  !take blk dist such that blocks are evenly distributed
1174  CALL dbt_default_distvec(nblk_occ(ispin), pdims(1), mo_blk_size(ispin)%array(1:nblk_occ(ispin)), &
1175  mo_row_dist(ispin)%array(1:nblk_occ(ispin)))
1176  CALL dbt_default_distvec(nblk_virt(ispin), pdims(1), &
1177  mo_blk_size(ispin)%array(nblk_occ(ispin) + 1:nblk_mos(ispin)), &
1178  mo_row_dist(ispin)%array(nblk_occ(ispin) + 1:nblk_mos(ispin)))
1179  CALL dbt_default_distvec(nblk_occ(ispin), pdims(2), mo_blk_size(ispin)%array(1:nblk_occ(ispin)), &
1180  mo_col_dist(ispin)%array(1:nblk_occ(ispin)))
1181  CALL dbt_default_distvec(nblk_virt(ispin), pdims(2), &
1182  mo_blk_size(ispin)%array(nblk_occ(ispin) + 1:nblk_mos(ispin)), &
1183  mo_col_dist(ispin)%array(nblk_occ(ispin) + 1:nblk_mos(ispin)))
1184  CALL dbt_distribution_new(t_dist, t_pgrid, nd_dist_1=mo_row_dist(ispin)%array, &
1185  nd_dist_2=mo_col_dist(ispin)%array, nd_dist_3=ri_dist_3)
1186 
1187  CALL dbt_create(ja_x(ispin), name="(ja|X)", dist=t_dist, map1_2d=[1, 2], map2_2d=[3], &
1188  blk_size_1=mo_blk_size(ispin)%array, blk_size_2=mo_blk_size(ispin)%array, &
1189  blk_size_3=ri_blk_size)
1190  CALL dbt_distribution_destroy(t_dist)
1191 
1192  !contract (jq|X) into (ja|X)
1193  CALL dbt_contract(alpha=1.0_dp, tensor_1=jq_x, tensor_2=t_mo_coeffs, &
1194  beta=0.0_dp, tensor_3=ja_x(ispin), contract_1=[2], &
1195  notcontract_1=[1, 3], contract_2=[1], notcontract_2=[2], &
1196  map_1=[1, 3], map_2=[2], move_data=.true., &
1197  bounds_3=[nocc(ispin) + 1, nocc(ispin) + nvirt(ispin)])
1198 
1199  CALL dbt_destroy(jq_x)
1200  CALL dbt_copy_matrix_to_tensor(dbcsr_mo_coeffs, t_mo_coeffs)
1201 
1202  !Finally, get the oI_Y tensors
1203  CALL get_oiy_tensors(oi_y, ispin, ao_blk_size, mo_blk_size(ispin)%array, ri_blk_size, &
1204  t_mo_coeffs, donor_state, xas_tdp_env, xas_tdp_control, qs_env)
1205 
1206  !intermediate clen-up
1207  CALL dbt_destroy(t_mo_coeffs)
1208  CALL dbcsr_distribution_release(mat_dist)
1209  CALL dbcsr_release(dbcsr_mo_coeffs)
1210  DEALLOCATE (ao_col_dist, ri_dist_3, ri_blk_size, ao_blk_size, ao_row_dist)
1211 
1212  END DO !ispin
1213 
1214  !create a empty tensor template for the fully contracted (ja|Io) MO integrals, for all spin
1215  !configureations: alpha-alpha|alpha-alpha, alpha-alpha|beta-beta, etc.
1216  ALLOCATE (ja_io_template(nspins, nspins))
1217  DO ispin = 1, nspins
1218  DO jspin = 1, nspins
1219  ALLOCATE (mo_dist_3(nblk_mos(jspin)))
1220  CALL dbt_default_distvec(nblk_occ(jspin), pdims(3), mo_blk_size(jspin)%array(1:nblk_occ(jspin)), &
1221  mo_dist_3(1:nblk_occ(jspin)))
1222  CALL dbt_default_distvec(nblk_virt(jspin), pdims(3), &
1223  mo_blk_size(jspin)%array(nblk_occ(jspin) + 1:nblk_mos(jspin)), &
1224  mo_dist_3(nblk_occ(jspin) + 1:nblk_mos(jspin)))
1225  CALL dbt_distribution_new(t_dist, t_pgrid, nd_dist_1=mo_row_dist(ispin)%array, &
1226  nd_dist_2=mo_col_dist(ispin)%array, nd_dist_3=mo_dist_3)
1227 
1228  CALL dbt_create(ja_io_template(ispin, jspin), name="(ja|Io)", dist=t_dist, map1_2d=[1, 2], &
1229  map2_2d=[3], blk_size_1=mo_blk_size(ispin)%array, &
1230  blk_size_2=mo_blk_size(ispin)%array, blk_size_3=mo_blk_size(jspin)%array)
1231  CALL dbt_distribution_destroy(t_dist)
1232  DEALLOCATE (mo_dist_3)
1233  END DO
1234  END DO
1235 
1236  !clean-up
1237  CALL dbt_pgrid_destroy(t_pgrid)
1238  DO ispin = 1, nspins
1239  DEALLOCATE (mo_blk_size(ispin)%array)
1240  DEALLOCATE (mo_col_dist(ispin)%array)
1241  DEALLOCATE (mo_row_dist(ispin)%array)
1242  END DO
1243 
1244  CALL timestop(handle)
1245 
1246  END SUBROUTINE contract_aos_to_mos
1247 
1248 ! **************************************************************************************************
1249 !> \brief Contracts the (oI|Y) tensors, for each donor MO
1250 !> \param oI_Y the contracted tensr. It is assumed to be allocated outside of this routine
1251 !> \param ispin ...
1252 !> \param ao_blk_size ...
1253 !> \param mo_blk_size ...
1254 !> \param ri_blk_size ...
1255 !> \param t_mo_coeffs ...
1256 !> \param donor_state ...
1257 !> \param xas_tdp_env ...
1258 !> \param xas_tdp_control ...
1259 !> \param qs_env ...
1260 ! **************************************************************************************************
1261  SUBROUTINE get_oiy_tensors(oI_Y, ispin, ao_blk_size, mo_blk_size, ri_blk_size, t_mo_coeffs, &
1262  donor_state, xas_tdp_env, xas_tdp_control, qs_env)
1263 
1264  TYPE(dbt_type), ALLOCATABLE, DIMENSION(:), &
1265  INTENT(INOUT) :: oi_y
1266  INTEGER, INTENT(IN) :: ispin
1267  INTEGER, DIMENSION(:), POINTER :: ao_blk_size, mo_blk_size, ri_blk_size
1268  TYPE(dbt_type), INTENT(inout) :: t_mo_coeffs
1269  TYPE(donor_state_type), POINTER :: donor_state
1270  TYPE(xas_tdp_env_type), POINTER :: xas_tdp_env
1271  TYPE(xas_tdp_control_type), POINTER :: xas_tdp_control
1272  TYPE(qs_environment_type), POINTER :: qs_env
1273 
1274  CHARACTER(len=*), PARAMETER :: routinen = 'get_oIY_tensors'
1275 
1276  INTEGER :: bo(2), handle, i, ido_mo, ind(2), natom, &
1277  nblk_aos, nblk_mos, nblk_ri, ndo_mo, &
1278  pdims_2d(2), proc_id
1279  INTEGER, DIMENSION(:), POINTER :: ao_row_dist, mo_row_dist, ri_col_dist
1280  INTEGER, DIMENSION(:, :), POINTER :: mat_pgrid
1281  LOGICAL :: found
1282  REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: pblock
1283  TYPE(dbcsr_distribution_type), POINTER :: std_mat_dist
1284  TYPE(dbcsr_p_type), DIMENSION(:), POINTER :: pi_y
1285  TYPE(dbt_distribution_type) :: t_dist
1286  TYPE(dbt_iterator_type) :: iter
1287  TYPE(dbt_pgrid_type) :: t_pgrid
1288  TYPE(dbt_type) :: t_pi_y, t_work
1289  TYPE(mp_para_env_type), POINTER :: para_env
1290 
1291  CALL timeset(routinen, handle)
1292 
1293  CALL get_qs_env(qs_env, natom=natom, para_env=para_env, dbcsr_dist=std_mat_dist)
1294  ndo_mo = donor_state%ndo_mo
1295  nblk_aos = SIZE(ao_blk_size)
1296  nblk_mos = SIZE(mo_blk_size)
1297  nblk_ri = SIZE(ri_blk_size)
1298 
1299  !We first contract (pq|X) over q into I using kernel routines (goes over all MOs and spins)
1300  CALL contract2_ao_to_domo(pi_y, "EXCHANGE", donor_state, xas_tdp_env, xas_tdp_control, qs_env)
1301 
1302  !multiply by (X|Y)^-1
1303  CALL ri_all_blocks_mm(pi_y, xas_tdp_env%ri_inv_ex)
1304 
1305  !get standaed 2d matrix proc grid
1306  CALL dbcsr_distribution_get(std_mat_dist, pgrid=mat_pgrid)
1307 
1308  !Loop over donor MOs of this spin
1309  DO ido_mo = (ispin - 1)*ndo_mo + 1, ispin*ndo_mo
1310 
1311  !cast the matrix into a tensor
1312  CALL dbt_create(pi_y(ido_mo)%matrix, t_work)
1313  CALL dbt_copy_matrix_to_tensor(pi_y(ido_mo)%matrix, t_work)
1314 
1315  !find col proc_id of the only populated column of t_work
1316  ALLOCATE (ri_col_dist(natom))
1317  CALL dbt_get_info(t_work, proc_dist_2=ri_col_dist)
1318  proc_id = ri_col_dist(donor_state%at_index)
1319  DEALLOCATE (ri_col_dist)
1320 
1321  !preapre (oI_Y) tensor and (pI|Y) tensor in proper dist and blk sizes
1322  pdims_2d(1) = SIZE(mat_pgrid, 1); pdims_2d(2) = SIZE(mat_pgrid, 2)
1323  CALL dbt_pgrid_create(para_env, pdims_2d, t_pgrid)
1324 
1325  ALLOCATE (ri_col_dist(nblk_ri), ao_row_dist(nblk_aos), mo_row_dist(nblk_mos))
1326  CALL dbt_get_info(t_work, proc_dist_1=ao_row_dist)
1327  ri_col_dist = proc_id
1328 
1329  CALL dbt_distribution_new(t_dist, t_pgrid, nd_dist_1=ao_row_dist, nd_dist_2=ri_col_dist)
1330  CALL dbt_create(t_pi_y, name="(pI|Y)", dist=t_dist, map1_2d=[1], map2_2d=[2], &
1331  blk_size_1=ao_blk_size, blk_size_2=ri_blk_size)
1332  CALL dbt_distribution_destroy(t_dist)
1333 
1334  !copy block by block, dist match
1335 
1336 !$OMP PARALLEL DEFAULT(NONE) SHARED(t_work,t_pI_Y,nblk_ri,ri_blk_size,ao_blk_size) &
1337 !$OMP PRIVATE(iter,ind,pblock,found,bo)
1338  CALL dbt_iterator_start(iter, t_work)
1339  DO WHILE (dbt_iterator_blocks_left(iter))
1340  CALL dbt_iterator_next_block(iter, ind)
1341  CALL dbt_get_block(t_work, ind, pblock, found)
1342 
1343  DO i = 1, nblk_ri
1344  bo(1) = sum(ri_blk_size(1:i - 1)) + 1
1345  bo(2) = bo(1) + ri_blk_size(i) - 1
1346  CALL dbt_put_block(t_pi_y, [ind(1), i], [ao_blk_size(ind(1)), ri_blk_size(i)], &
1347  pblock(:, bo(1):bo(2)))
1348  END DO
1349 
1350  DEALLOCATE (pblock)
1351  END DO
1352  CALL dbt_iterator_stop(iter)
1353 !$OMP END PARALLEL
1354  CALL dbt_finalize(t_pi_y)
1355 
1356  !get optimal pgrid for (oI|Y)
1357  CALL dbt_pgrid_destroy(t_pgrid)
1358  pdims_2d = 0
1359  CALL dbt_pgrid_create(para_env, pdims_2d, t_pgrid, tensor_dims=[nblk_mos, nblk_ri])
1360 
1361  CALL dbt_default_distvec(nblk_aos, pdims_2d(1), ao_blk_size, ao_row_dist)
1362  CALL dbt_default_distvec(nblk_mos, pdims_2d(1), mo_blk_size, mo_row_dist)
1363  CALL dbt_default_distvec(nblk_ri, pdims_2d(2), ri_blk_size, ri_col_dist)
1364 
1365  !transfer pI_Y to the correct pgrid
1366  CALL dbt_destroy(t_work)
1367  CALL dbt_distribution_new(t_dist, t_pgrid, nd_dist_1=ao_row_dist, nd_dist_2=ri_col_dist)
1368  CALL dbt_create(t_work, name="t_pI_Y", dist=t_dist, map1_2d=[1], map2_2d=[2], &
1369  blk_size_1=ao_blk_size, blk_size_2=ri_blk_size)
1370  CALL dbt_copy(t_pi_y, t_work, move_data=.true.)
1371  CALL dbt_distribution_destroy(t_dist)
1372 
1373  !create (oI|Y)
1374  CALL dbt_distribution_new(t_dist, t_pgrid, nd_dist_1=mo_row_dist, nd_dist_2=ri_col_dist)
1375  CALL dbt_create(oi_y(ido_mo), name="(oI|Y)", dist=t_dist, map1_2d=[1], map2_2d=[2], &
1376  blk_size_1=mo_blk_size, blk_size_2=ri_blk_size)
1377  CALL dbt_distribution_destroy(t_dist)
1378 
1379  !contract (pI|Y) into (oI|Y)
1380  CALL dbt_contract(alpha=1.0_dp, tensor_1=t_work, tensor_2=t_mo_coeffs, &
1381  beta=0.0_dp, tensor_3=oi_y(ido_mo), contract_1=[1], &
1382  notcontract_1=[2], contract_2=[1], notcontract_2=[2], &
1383  map_1=[2], map_2=[1]) !no bound, all MOs needed
1384 
1385  !intermediate clean-up
1386  CALL dbt_destroy(t_work)
1387  CALL dbt_destroy(t_pi_y)
1388  CALL dbt_pgrid_destroy(t_pgrid)
1389  DEALLOCATE (ri_col_dist, ao_row_dist, mo_row_dist)
1390 
1391  END DO !ido_mo
1392 
1393  !final clean-up
1394  CALL dbcsr_deallocate_matrix_set(pi_y)
1395 
1396  CALL timestop(handle)
1397 
1398  END SUBROUTINE get_oiy_tensors
1399 
1400 ! **************************************************************************************************
1401 !> \brief Computes the same spin, occupied-occupied-virtual MO contribution to the electron propagator:
1402 !> 0.5 * sum_ajk |<Ia||jk>|^2/(omega + eps_a - epsj_j - eps_k) and its 1st derivative wrt omega:
1403 !> -0.5 * sum_ajk |<Ia||jk>|^2/(omega + eps_a - epsj_j - eps_k)**2
1404 !> \param contrib ...
1405 !> \param dev the first derivative
1406 !> \param ja_Ik_diff ... contains the (ja|Ik) - (ka|Ij) tensor
1407 !> \param occ_evals ...
1408 !> \param virt_evals ...
1409 !> \param omega ...
1410 !> \param c_ss ...
1411 !> \note since the is same-spin, there is only one possibility for occ_evals and virt_evals
1412 ! **************************************************************************************************
1413  SUBROUTINE calc_ss_oov_contrib(contrib, dev, ja_Ik_diff, occ_evals, virt_evals, omega, c_ss)
1414 
1415  REAL(dp), INTENT(inout) :: contrib, dev
1416  TYPE(dbt_type), INTENT(inout) :: ja_ik_diff
1417  REAL(dp), DIMENSION(:), INTENT(IN) :: occ_evals, virt_evals
1418  REAL(dp), INTENT(in) :: omega, c_ss
1419 
1420  CHARACTER(len=*), PARAMETER :: routinen = 'calc_ss_oov_contrib'
1421 
1422  INTEGER :: a, boff(3), bsize(3), handle, idx1, &
1423  idx2, idx3, ind(3), j, k, nocc
1424  LOGICAL :: found
1425  REAL(dp) :: denom, tmp
1426  REAL(dp), ALLOCATABLE, DIMENSION(:, :, :) :: tensor_blk
1427  TYPE(dbt_iterator_type) :: iter
1428 
1429  CALL timeset(routinen, handle)
1430 
1431  !<Ia||jk> = <Ia|jk> - <Ia|kj> = (Ij|ak) - (Ik|aj) = (ka|Ij) - (ja|Ik)
1432  !Note: the same spin contribution only involve spib-orbitals that are all of the same spin
1433 
1434  nocc = SIZE(occ_evals, 1)
1435 
1436  !Iterate over the tensors and sum. Both tensors have same dist
1437 
1438 !$OMP PARALLEL DEFAULT(NONE) REDUCTION(+:contrib,dev) &
1439 !$OMP SHARED(ja_Ik_diff,occ_evals,virt_evals,omega,c_ss,nocc) &
1440 !$OMP PRIVATE(iter,ind,boff,bsize,tensor_blk,found,idx1,idx2,idx3,j,A,k,denom,tmp)
1441  CALL dbt_iterator_start(iter, ja_ik_diff)
1442  DO WHILE (dbt_iterator_blocks_left(iter))
1443  CALL dbt_iterator_next_block(iter, ind, blk_offset=boff, blk_size=bsize)
1444  CALL dbt_get_block(ja_ik_diff, ind, tensor_blk, found)
1445 
1446  IF (found) THEN
1447 
1448  DO idx3 = 1, bsize(3)
1449  DO idx2 = 1, bsize(2)
1450  DO idx1 = 1, bsize(1)
1451 
1452  !get proper MO indices
1453  j = boff(1) + idx1 - 1
1454  a = boff(2) + idx2 - 1 - nocc
1455  k = boff(3) + idx3 - 1
1456 
1457  !the denominator
1458  denom = omega + virt_evals(a) - occ_evals(j) - occ_evals(k)
1459 
1460  !the same spin contribution
1461  tmp = c_ss*tensor_blk(idx1, idx2, idx3)**2
1462 
1463  contrib = contrib + 0.5_dp*tmp/denom
1464  dev = dev - 0.5_dp*tmp/denom**2
1465 
1466  END DO
1467  END DO
1468  END DO
1469  END IF
1470  DEALLOCATE (tensor_blk)
1471  END DO
1472  CALL dbt_iterator_stop(iter)
1473 !$OMP END PARALLEL
1474 
1475  CALL timestop(handle)
1476 
1477  END SUBROUTINE calc_ss_oov_contrib
1478 
1479 ! **************************************************************************************************
1480 !> \brief Computes the opposite spin, occupied-occupied-virtual MO contribution to the electron propagator:
1481 !> 0.5 * sum_ajk |<Ia||jk>|^2/(omega + eps_a - epsj_j - eps_k) and its 1st derivative wrt omega:
1482 !> -0.5 * sum_ajk |<Ia||jk>|^2/(omega + eps_a - epsj_j - eps_k)**2
1483 !> \param contrib ...
1484 !> \param dev the first derivative
1485 !> \param ja_Ik ...
1486 !> \param j_evals ocucpied evals for j MO
1487 !> \param a_evals virtual evals for a MO
1488 !> \param k_evals ocucpied evals for k MO
1489 !> \param omega ...
1490 !> \param c_os ...
1491 !> \param a_offset the number of occupied MOs for the same spin as a MOs
1492 !> \note since this is opposite-spin, evals might be different for different spins
1493 ! **************************************************************************************************
1494  SUBROUTINE calc_os_oov_contrib(contrib, dev, ja_Ik, j_evals, a_evals, k_evals, omega, c_os, a_offset)
1495 
1496  REAL(dp), INTENT(inout) :: contrib, dev
1497  TYPE(dbt_type), INTENT(inout) :: ja_ik
1498  REAL(dp), DIMENSION(:), INTENT(IN) :: j_evals, a_evals, k_evals
1499  REAL(dp), INTENT(in) :: omega, c_os
1500  INTEGER, INTENT(IN) :: a_offset
1501 
1502  CHARACTER(len=*), PARAMETER :: routinen = 'calc_os_oov_contrib'
1503 
1504  INTEGER :: a, boff(3), bsize(3), handle, idx1, &
1505  idx2, idx3, ind(3), j, k
1506  LOGICAL :: found
1507  REAL(dp) :: denom, tmp
1508  REAL(dp), ALLOCATABLE, DIMENSION(:, :, :) :: ja_ik_blk
1509  TYPE(dbt_iterator_type) :: iter
1510 
1511  CALL timeset(routinen, handle)
1512 
1513  !<Ia||jk> = <Ia|jk> - <Ia|kj> = (Ij|ak) - (Ik|aj) = (ka|Ij) - (ja|Ik)
1514  !Note: the opposite spin contribution comes in 2 parts, once (ka|Ij) and once (ja|Ik) only,
1515  ! where both spin-orbitals on one center have the same spin, but it is the opposite of
1516  ! the spin on the other center. Because it is eventually summed, can consider only one
1517  ! of the 2 terms, but with a factor 2
1518 
1519  !Iterate over the tensor and sum
1520 
1521 !$OMP PARALLEL DEFAULT(NONE) REDUCTION(+:contrib,dev) &
1522 !$OMP SHARED(ja_Ik,j_evals,a_evals,k_evals,omega,c_os,a_offset) &
1523 !$OMP PRIVATE(iter,ind,boff,bsize,ja_Ik_blk,found,idx1,idx2,idx3,j,A,k,denom,tmp)
1524  CALL dbt_iterator_start(iter, ja_ik)
1525  DO WHILE (dbt_iterator_blocks_left(iter))
1526  CALL dbt_iterator_next_block(iter, ind, blk_offset=boff, blk_size=bsize)
1527  CALL dbt_get_block(ja_ik, ind, ja_ik_blk, found)
1528 
1529  IF (found) THEN
1530 
1531  DO idx3 = 1, bsize(3)
1532  DO idx2 = 1, bsize(2)
1533  DO idx1 = 1, bsize(1)
1534 
1535  !get proper MO indices
1536  j = boff(1) + idx1 - 1
1537  a = boff(2) + idx2 - 1 - a_offset
1538  k = boff(3) + idx3 - 1
1539 
1540  !the denominator
1541  denom = omega + a_evals(a) - j_evals(j) - k_evals(k)
1542 
1543  !the opposite spin contribution
1544  tmp = c_os*ja_ik_blk(idx1, idx2, idx3)**2
1545 
1546  !take factor 2 into acocunt (2 x 0.5 = 1)
1547  contrib = contrib + tmp/denom
1548  dev = dev - tmp/denom**2
1549 
1550  END DO
1551  END DO
1552  END DO
1553  END IF
1554  DEALLOCATE (ja_ik_blk)
1555  END DO
1556  CALL dbt_iterator_stop(iter)
1557 !$OMP END PARALLEL
1558 
1559  CALL timestop(handle)
1560 
1561  END SUBROUTINE calc_os_oov_contrib
1562 
1563 ! **************************************************************************************************
1564 !> \brief Computes the same-spin occupied-virtual-virtual MO contribution to the electron propagator:
1565 !> 0.5 * sum_abj |<Ij||ab>|^2/(omega + eps_j - eps_a - eps_b) as well as its first derivative:
1566 !> -0.5 * sum_abj |<Ij||ab>|^2/(omega + eps_j - eps_a - eps_b)**2
1567 !> \param contrib ...
1568 !> \param dev the first derivative
1569 !> \param aj_Ib_diff contatins the (aj|Ib) - (bj|Ia) tensor
1570 !> \param occ_evals ...
1571 !> \param virt_evals ...
1572 !> \param omega ...
1573 !> \param c_ss ...
1574 !> \note since the is same-spin, there is only one possibility for occ_evals and virt_evals
1575 ! **************************************************************************************************
1576  SUBROUTINE calc_ss_ovv_contrib(contrib, dev, aj_Ib_diff, occ_evals, virt_evals, omega, c_ss)
1577 
1578  REAL(dp), INTENT(inout) :: contrib, dev
1579  TYPE(dbt_type), INTENT(inout) :: aj_ib_diff
1580  REAL(dp), DIMENSION(:), INTENT(IN) :: occ_evals, virt_evals
1581  REAL(dp), INTENT(in) :: omega, c_ss
1582 
1583  CHARACTER(len=*), PARAMETER :: routinen = 'calc_ss_ovv_contrib'
1584 
1585  INTEGER :: a, b, boff(3), bsize(3), handle, idx1, &
1586  idx2, idx3, ind(3), j, nocc
1587  LOGICAL :: found
1588  REAL(dp) :: denom, tmp
1589  REAL(dp), ALLOCATABLE, DIMENSION(:, :, :) :: tensor_blk
1590  TYPE(dbt_iterator_type) :: iter
1591 
1592  CALL timeset(routinen, handle)
1593 
1594  !<Ij||ab> = <Ij|ab> - <Ij|ba> = (Ia|jb) - (Ib|ja) = (jb|Ia) - (ja|Ib)
1595  !Notes: only non-zero contribution if all MOs have the same spin
1596 
1597  nocc = SIZE(occ_evals, 1)
1598 
1599  !tensors have matching distributions, can do that safely
1600 
1601 !$OMP PARALLEL DEFAULT(NONE) REDUCTION(+:contrib,dev) &
1602 !$OMP SHARED(aj_Ib_diff,occ_evals,virt_evals,omega,c_ss,nocc) &
1603 !$OMP PRIVATE(iter,ind,boff,bsize,tensor_blk,found,idx1,idx2,idx3,j,A,b,denom,tmp)
1604  CALL dbt_iterator_start(iter, aj_ib_diff)
1605  DO WHILE (dbt_iterator_blocks_left(iter))
1606  CALL dbt_iterator_next_block(iter, ind, blk_offset=boff, blk_size=bsize)
1607  CALL dbt_get_block(aj_ib_diff, ind, tensor_blk, found)
1608 
1609  IF (found) THEN
1610 
1611  DO idx3 = 1, bsize(3)
1612  DO idx2 = 1, bsize(2)
1613  DO idx1 = 1, bsize(1)
1614 
1615  !get proper MO indices
1616  a = boff(1) + idx1 - 1 - nocc
1617  j = boff(2) + idx2 - 1
1618  b = boff(3) + idx3 - 1 - nocc
1619 
1620  !the common denominator
1621  denom = omega + occ_evals(j) - virt_evals(a) - virt_evals(b)
1622 
1623  !the same spin contribution
1624  tmp = c_ss*tensor_blk(idx1, idx2, idx3)**2
1625 
1626  contrib = contrib + 0.5_dp*tmp/denom
1627  dev = dev - 0.5_dp*tmp/denom**2
1628 
1629  END DO
1630  END DO
1631  END DO
1632  END IF
1633  DEALLOCATE (tensor_blk)
1634  END DO
1635  CALL dbt_iterator_stop(iter)
1636 !$OMP END PARALLEL
1637 
1638  CALL timestop(handle)
1639 
1640  END SUBROUTINE calc_ss_ovv_contrib
1641 
1642 ! **************************************************************************************************
1643 !> \brief Computes the opposite-spin occupied-virtual-virtual MO contribution to the electron propagator:
1644 !> 0.5 * sum_abj |<Ij||ab>|^2/(omega + eps_j - eps_a - eps_b) as well as its first derivative:
1645 !> -0.5 * sum_abj |<Ij||ab>|^2/(omega + eps_j - eps_a - eps_b)**2
1646 !> \param contrib ...
1647 !> \param dev the first derivative
1648 !> \param aj_Ib ...
1649 !> \param a_evals virtual evals for a MO
1650 !> \param j_evals occupied evals for j MO
1651 !> \param b_evals virtual evals for b MO
1652 !> \param omega ...
1653 !> \param c_os ...
1654 !> \param a_offset number of occupied MOs for the same spin as a MO
1655 !> \param b_offset number of occupied MOs for the same spin as b MO
1656 !> \note since this is opposite-spin, evals might be different for different spins
1657 ! **************************************************************************************************
1658  SUBROUTINE calc_os_ovv_contrib(contrib, dev, aj_Ib, a_evals, j_evals, b_evals, omega, c_os, &
1659  a_offset, b_offset)
1660 
1661  REAL(dp), INTENT(inout) :: contrib, dev
1662  TYPE(dbt_type), INTENT(inout) :: aj_ib
1663  REAL(dp), DIMENSION(:), INTENT(IN) :: a_evals, j_evals, b_evals
1664  REAL(dp), INTENT(in) :: omega, c_os
1665  INTEGER, INTENT(IN) :: a_offset, b_offset
1666 
1667  CHARACTER(len=*), PARAMETER :: routinen = 'calc_os_ovv_contrib'
1668 
1669  INTEGER :: a, b, boff(3), bsize(3), handle, idx1, &
1670  idx2, idx3, ind(3), j
1671  LOGICAL :: found
1672  REAL(dp) :: denom, tmp
1673  REAL(dp), ALLOCATABLE, DIMENSION(:, :, :) :: aj_ib_blk
1674  TYPE(dbt_iterator_type) :: iter
1675 
1676  CALL timeset(routinen, handle)
1677 
1678  !<Ij||ab> = <Ij|ab> - <Ij|ba> = (Ia|jb) - (Ib|ja) = (jb|Ia) - (ja|Ib)
1679  !Notes: only 2 distinct contributions, once from (jb|Ia) and once form (ja|Ib) only, when the 2
1680  ! MOs on one center have one spin and the 2 MOs on the other center have another spin
1681  ! In the end, the sum is such that can take one of those with a factor 2
1682 
1683 !$OMP PARALLEL DEFAULT(NONE) REDUCTION(+:contrib,dev) &
1684 !$OMP SHARED(aj_Ib,a_evals,j_evals,b_evals,omega,c_os,a_offset,b_offset) &
1685 !$OMP PRIVATE(iter,ind,boff,bsize,aj_Ib_blk,found,idx1,idx2,idx3,j,A,b,denom,tmp)
1686  CALL dbt_iterator_start(iter, aj_ib)
1687  DO WHILE (dbt_iterator_blocks_left(iter))
1688  CALL dbt_iterator_next_block(iter, ind, blk_offset=boff, blk_size=bsize)
1689  CALL dbt_get_block(aj_ib, ind, aj_ib_blk, found)
1690 
1691  IF (found) THEN
1692 
1693  DO idx3 = 1, bsize(3)
1694  DO idx2 = 1, bsize(2)
1695  DO idx1 = 1, bsize(1)
1696 
1697  !get proper MO indices
1698  a = boff(1) + idx1 - 1 - a_offset
1699  j = boff(2) + idx2 - 1
1700  b = boff(3) + idx3 - 1 - b_offset
1701 
1702  !the denominator
1703  denom = omega + j_evals(j) - a_evals(a) - b_evals(b)
1704 
1705  !the opposite-spin contribution. Factor 2 taken into account (2 x 0.5 = 1)
1706  tmp = c_os*(aj_ib_blk(idx1, idx2, idx3))**2
1707 
1708  contrib = contrib + tmp/denom
1709  dev = dev - tmp/denom**2
1710 
1711  END DO
1712  END DO
1713  END DO
1714  END IF
1715  DEALLOCATE (aj_ib_blk)
1716  END DO
1717  CALL dbt_iterator_stop(iter)
1718 !$OMP END PARALLEL
1719 
1720  CALL timestop(handle)
1721 
1722  END SUBROUTINE calc_os_ovv_contrib
1723 
1724 ! **************************************************************************************************
1725 !> \brief We try to compute the spin-orbit splitting via perturbation theory. We keep it
1726 !>\ cheap by only inculding the degenerate states (2p, 3d, 3p, etc.).
1727 !> \param soc_shifts the SOC corrected orbital shifts to apply to original energies, for both spins
1728 !> \param donor_state ...
1729 !> \param xas_tdp_env ...
1730 !> \param xas_tdp_control ...
1731 !> \param qs_env ...
1732 ! **************************************************************************************************
1733  SUBROUTINE get_soc_splitting(soc_shifts, donor_state, xas_tdp_env, xas_tdp_control, qs_env)
1734 
1735  REAL(dp), ALLOCATABLE, DIMENSION(:, :), &
1736  INTENT(out) :: soc_shifts
1737  TYPE(donor_state_type), POINTER :: donor_state
1738  TYPE(xas_tdp_env_type), POINTER :: xas_tdp_env
1739  TYPE(xas_tdp_control_type), POINTER :: xas_tdp_control
1740  TYPE(qs_environment_type), POINTER :: qs_env
1741 
1742  CHARACTER(len=*), PARAMETER :: routinen = 'get_soc_splitting'
1743 
1744  COMPLEX(dp), ALLOCATABLE, DIMENSION(:, :) :: evecs, hami
1745  INTEGER :: beta_spin, handle, ialpha, ibeta, &
1746  ido_mo, ispin, nao, ndo_mo, ndo_so, &
1747  nspins
1748  REAL(dp) :: alpha_tot_contrib, beta_tot_contrib
1749  REAL(dp), ALLOCATABLE, DIMENSION(:) :: evals
1750  REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: tmp_shifts
1751  TYPE(cp_blacs_env_type), POINTER :: blacs_env
1752  TYPE(cp_cfm_type) :: hami_cfm
1753  TYPE(cp_fm_struct_type), POINTER :: ao_domo_struct, domo_domo_struct, &
1754  doso_doso_struct
1755  TYPE(cp_fm_type) :: alpha_gs_coeffs, ao_domo_work, &
1756  beta_gs_coeffs, domo_domo_work, &
1757  img_fm, real_fm
1758  TYPE(dbcsr_p_type), DIMENSION(:), POINTER :: matrix_ks
1759  TYPE(dbcsr_type), POINTER :: orb_soc_x, orb_soc_y, orb_soc_z
1760  TYPE(mp_para_env_type), POINTER :: para_env
1761 
1762  NULLIFY (matrix_ks, para_env, blacs_env, ao_domo_struct, domo_domo_struct, &
1763  doso_doso_struct, orb_soc_x, orb_soc_y, orb_soc_z)
1764 
1765  CALL timeset(routinen, handle)
1766 
1767  ! Idea: we compute the SOC matrix in the space of the degenerate spin-orbitals, add it to
1768  ! the KS matrix in the same basis, diagonalize the whole thing and get the corrected energies
1769  ! for SOC
1770 
1771  CALL get_qs_env(qs_env, matrix_ks=matrix_ks, para_env=para_env, blacs_env=blacs_env)
1772 
1773  orb_soc_x => xas_tdp_env%orb_soc(1)%matrix
1774  orb_soc_y => xas_tdp_env%orb_soc(2)%matrix
1775  orb_soc_z => xas_tdp_env%orb_soc(3)%matrix
1776 
1777  ! Whether it is open-shell or not, we have 2*ndo_mo spin-orbitals
1778  nspins = 2
1779  ndo_mo = donor_state%ndo_mo
1780  ndo_so = nspins*ndo_mo
1781  CALL dbcsr_get_info(matrix_ks(1)%matrix, nfullrows_total=nao)
1782 
1783  ! Build the fm infrastructure
1784  CALL cp_fm_struct_create(ao_domo_struct, context=blacs_env, para_env=para_env, &
1785  nrow_global=nao, ncol_global=ndo_mo)
1786  CALL cp_fm_struct_create(domo_domo_struct, context=blacs_env, para_env=para_env, &
1787  nrow_global=ndo_mo, ncol_global=ndo_mo)
1788  CALL cp_fm_struct_create(doso_doso_struct, context=blacs_env, para_env=para_env, &
1789  nrow_global=ndo_so, ncol_global=ndo_so)
1790 
1791  CALL cp_fm_create(alpha_gs_coeffs, ao_domo_struct)
1792  CALL cp_fm_create(beta_gs_coeffs, ao_domo_struct)
1793  CALL cp_fm_create(ao_domo_work, ao_domo_struct)
1794  CALL cp_fm_create(domo_domo_work, domo_domo_struct)
1795  CALL cp_fm_create(real_fm, doso_doso_struct)
1796  CALL cp_fm_create(img_fm, doso_doso_struct)
1797 
1798  ! Put the gs_coeffs in the correct format.
1799  IF (xas_tdp_control%do_uks) THEN
1800 
1801  CALL cp_fm_to_fm_submat(msource=donor_state%gs_coeffs, mtarget=alpha_gs_coeffs, nrow=nao, &
1802  ncol=ndo_mo, s_firstrow=1, s_firstcol=1, t_firstrow=1, t_firstcol=1)
1803  CALL cp_fm_to_fm_submat(msource=donor_state%gs_coeffs, mtarget=beta_gs_coeffs, nrow=nao, &
1804  ncol=ndo_mo, s_firstrow=1, s_firstcol=ndo_mo + 1, t_firstrow=1, t_firstcol=1)
1805 
1806  ELSE
1807 
1808  CALL cp_fm_to_fm(donor_state%gs_coeffs, alpha_gs_coeffs)
1809  CALL cp_fm_to_fm(donor_state%gs_coeffs, beta_gs_coeffs)
1810  END IF
1811 
1812  ! Compute the KS matrix in this basis, add it to the real part of the final matrix
1813  !alpha-alpha block in upper left quadrant
1814  CALL cp_dbcsr_sm_fm_multiply(matrix_ks(1)%matrix, alpha_gs_coeffs, ao_domo_work, ncol=ndo_mo)
1815  CALL parallel_gemm('T', 'N', ndo_mo, ndo_mo, nao, 1.0_dp, alpha_gs_coeffs, ao_domo_work, 0.0_dp, &
1816  domo_domo_work)
1817  CALL cp_fm_to_fm_submat(msource=domo_domo_work, mtarget=real_fm, nrow=ndo_mo, ncol=ndo_mo, &
1818  s_firstrow=1, s_firstcol=1, t_firstrow=1, t_firstcol=1)
1819 
1820  !beta-beta block in lower right quadrant
1821  beta_spin = 1; IF (xas_tdp_control%do_uks .OR. xas_tdp_control%do_roks) beta_spin = 2
1822  CALL cp_dbcsr_sm_fm_multiply(matrix_ks(beta_spin)%matrix, beta_gs_coeffs, ao_domo_work, ncol=ndo_mo)
1823  CALL parallel_gemm('T', 'N', ndo_mo, ndo_mo, nao, 1.0_dp, beta_gs_coeffs, ao_domo_work, 0.0_dp, &
1824  domo_domo_work)
1825  CALL cp_fm_to_fm_submat(msource=domo_domo_work, mtarget=real_fm, nrow=ndo_mo, ncol=ndo_mo, &
1826  s_firstrow=1, s_firstcol=1, t_firstrow=ndo_mo + 1, t_firstcol=ndo_mo + 1)
1827 
1828  ! Compute the SOC matrix elements and add them to the real or imaginary part of the matrix
1829  ! alpha-alpha block, only Hz not zero, purely imaginary, addition
1830  CALL cp_dbcsr_sm_fm_multiply(orb_soc_z, alpha_gs_coeffs, ao_domo_work, ncol=ndo_mo)
1831  CALL parallel_gemm('T', 'N', ndo_mo, ndo_mo, nao, 1.0_dp, alpha_gs_coeffs, ao_domo_work, 0.0_dp, &
1832  domo_domo_work)
1833  CALL cp_fm_to_fm_submat(msource=domo_domo_work, mtarget=img_fm, nrow=ndo_mo, ncol=ndo_mo, &
1834  s_firstrow=1, s_firstcol=1, t_firstrow=1, t_firstcol=1)
1835 
1836  ! beta-beta block, only Hz not zero, purely imaginary, substraciton
1837  CALL cp_dbcsr_sm_fm_multiply(orb_soc_z, beta_gs_coeffs, ao_domo_work, ncol=ndo_mo)
1838  CALL parallel_gemm('T', 'N', ndo_mo, ndo_mo, nao, -1.0_dp, beta_gs_coeffs, ao_domo_work, 0.0_dp, &
1839  domo_domo_work)
1840  CALL cp_fm_to_fm_submat(msource=domo_domo_work, mtarget=img_fm, nrow=ndo_mo, ncol=ndo_mo, &
1841  s_firstrow=1, s_firstcol=1, t_firstrow=ndo_mo + 1, t_firstcol=ndo_mo + 1)
1842 
1843  ! alpha-beta block, two non-zero terms in Hx and Hy
1844  ! Hx term, purely imaginary, addition
1845  CALL cp_dbcsr_sm_fm_multiply(orb_soc_x, beta_gs_coeffs, ao_domo_work, ncol=ndo_mo)
1846  CALL parallel_gemm('T', 'N', ndo_mo, ndo_mo, nao, 1.0_dp, alpha_gs_coeffs, ao_domo_work, 0.0_dp, &
1847  domo_domo_work)
1848  CALL cp_fm_to_fm_submat(msource=domo_domo_work, mtarget=img_fm, nrow=ndo_mo, ncol=ndo_mo, &
1849  s_firstrow=1, s_firstcol=1, t_firstrow=1, t_firstcol=ndo_mo + 1)
1850  ! Hy term, purely real, addition
1851  CALL cp_dbcsr_sm_fm_multiply(orb_soc_y, beta_gs_coeffs, ao_domo_work, ncol=ndo_mo)
1852  CALL parallel_gemm('T', 'N', ndo_mo, ndo_mo, nao, 1.0_dp, alpha_gs_coeffs, ao_domo_work, 0.0_dp, &
1853  domo_domo_work)
1854  CALL cp_fm_to_fm_submat(msource=domo_domo_work, mtarget=real_fm, nrow=ndo_mo, ncol=ndo_mo, &
1855  s_firstrow=1, s_firstcol=1, t_firstrow=1, t_firstcol=ndo_mo + 1)
1856 
1857  ! beta-alpha block, two non-zero terms in Hx and Hy
1858  ! Hx term, purely imaginary, addition
1859  CALL cp_dbcsr_sm_fm_multiply(orb_soc_x, alpha_gs_coeffs, ao_domo_work, ncol=ndo_mo)
1860  CALL parallel_gemm('T', 'N', ndo_mo, ndo_mo, nao, 1.0_dp, beta_gs_coeffs, ao_domo_work, 0.0_dp, &
1861  domo_domo_work)
1862  CALL cp_fm_to_fm_submat(msource=domo_domo_work, mtarget=img_fm, nrow=ndo_mo, ncol=ndo_mo, &
1863  s_firstrow=1, s_firstcol=1, t_firstrow=ndo_mo + 1, t_firstcol=1)
1864  ! Hy term, purely real, substraction
1865  CALL cp_dbcsr_sm_fm_multiply(orb_soc_y, alpha_gs_coeffs, ao_domo_work, ncol=ndo_mo)
1866  CALL parallel_gemm('T', 'N', ndo_mo, ndo_mo, nao, -1.0_dp, beta_gs_coeffs, ao_domo_work, 0.0_dp, &
1867  domo_domo_work)
1868  CALL cp_fm_to_fm_submat(msource=domo_domo_work, mtarget=real_fm, nrow=ndo_mo, ncol=ndo_mo, &
1869  s_firstrow=1, s_firstcol=1, t_firstrow=ndo_mo + 1, t_firstcol=1)
1870 
1871  ! Cast everything in complex fm format
1872  CALL cp_cfm_create(hami_cfm, doso_doso_struct)
1873  CALL cp_fm_to_cfm(real_fm, img_fm, hami_cfm)
1874 
1875  ! And diagonalize. Since tiny matrix (6x6), diagonalize locally
1876  ALLOCATE (evals(ndo_so), evecs(ndo_so, ndo_so), hami(ndo_so, ndo_so))
1877  CALL cp_cfm_get_submatrix(hami_cfm, hami)
1878  CALL complex_diag(hami, evecs, evals)
1879 
1880  !The SOC corrected KS eigenvalues
1881  ALLOCATE (tmp_shifts(ndo_mo, 2))
1882 
1883  ialpha = 1; ibeta = 1;
1884  DO ido_mo = 1, ndo_so
1885  !need to find out whether the eigenvalue corresponds to an alpha or beta spin-orbtial
1886  alpha_tot_contrib = real(dot_product(evecs(1:ndo_mo, ido_mo), evecs(1:ndo_mo, ido_mo)))
1887  beta_tot_contrib = real(dot_product(evecs(ndo_mo + 1:ndo_so, ido_mo), evecs(ndo_mo + 1:ndo_so, ido_mo)))
1888 
1889  IF (alpha_tot_contrib > beta_tot_contrib) THEN
1890  tmp_shifts(ialpha, 1) = evals(ido_mo)
1891  ialpha = ialpha + 1
1892  ELSE
1893  tmp_shifts(ibeta, 2) = evals(ido_mo)
1894  ibeta = ibeta + 1
1895  END IF
1896  END DO
1897 
1898  !compute shift from KS evals
1899  ALLOCATE (soc_shifts(ndo_mo, SIZE(donor_state%energy_evals, 2)))
1900  DO ispin = 1, SIZE(donor_state%energy_evals, 2)
1901  soc_shifts(:, ispin) = tmp_shifts(:, ispin) - donor_state%energy_evals(:, ispin)
1902  END DO
1903 
1904  ! clean-up
1905  CALL cp_fm_release(alpha_gs_coeffs)
1906  CALL cp_fm_release(beta_gs_coeffs)
1907  CALL cp_fm_release(ao_domo_work)
1908  CALL cp_fm_release(domo_domo_work)
1909  CALL cp_fm_release(real_fm)
1910  CALL cp_fm_release(img_fm)
1911 
1912  CALL cp_cfm_release(hami_cfm)
1913 
1914  CALL cp_fm_struct_release(ao_domo_struct)
1915  CALL cp_fm_struct_release(domo_domo_struct)
1916  CALL cp_fm_struct_release(doso_doso_struct)
1917 
1918  CALL timestop(handle)
1919 
1920  END SUBROUTINE get_soc_splitting
1921 
1922 END MODULE xas_tdp_correction
#define idx2(a, i, j)
#define idx3(a, i, j, k)
Types and set/get functions for auxiliary density matrix methods.
Definition: admm_types.F:15
Contains methods used in the context of density fitting.
Definition: admm_utils.F:15
subroutine, public admm_correct_for_eigenvalues(ispin, admm_env, ks_matrix)
...
Definition: admm_utils.F:53
collects all references to literature in CP2K as new algorithms / method are included from literature...
Definition: bibliography.F:28
integer, save, public shigeta2001
Definition: bibliography.F:43
integer, save, public bussy2021b
Definition: bibliography.F:43
various utilities that regard array of different kinds: output, allocation,... maybe it is not a good...
methods related to the blacs parallel environment
Definition: cp_blacs_env.F:15
Represents a complex full matrix distributed on many processors.
Definition: cp_cfm_types.F:12
subroutine, public cp_cfm_get_submatrix(fm, target_m, start_row, start_col, n_rows, n_cols, transpose)
Extract a sub-matrix from the full matrix: op(target_m)(1:n_rows,1:n_cols) = fm(start_row:start_row+n...
Definition: cp_cfm_types.F:333
subroutine, public cp_cfm_create(matrix, matrix_struct, name)
Creates a new full matrix with the given structure.
Definition: cp_cfm_types.F:121
subroutine, public cp_cfm_release(matrix)
Releases a full matrix.
Definition: cp_cfm_types.F:159
subroutine, public cp_fm_to_cfm(msourcer, msourcei, mtarget)
Construct a complex full matrix by taking its real and imaginary parts from two separate real-value f...
Definition: cp_cfm_types.F:817
Defines control structures, which contain the parameters and the settings for the DFT-based calculati...
DBCSR operations in CP2K.
subroutine, public cp_dbcsr_sm_fm_multiply(matrix, fm_in, fm_out, ncol, alpha, beta)
multiply a dbcsr with a fm matrix
subroutine, public copy_fm_to_dbcsr(fm, matrix, keep_sparsity)
Copy a BLACS matrix to a dbcsr matrix.
used for collecting some of the diagonalization schemes available for cp_fm_type. cp_fm_power also mo...
Definition: cp_fm_diag.F:17
subroutine, public choose_eigv_solver(matrix, eigenvectors, eigenvalues, info)
Choose the Eigensolver depending on which library is available ELPA seems to be unstable for small sy...
Definition: cp_fm_diag.F:208
represent the structure of a full matrix
Definition: cp_fm_struct.F:14
subroutine, public cp_fm_struct_create(fmstruct, para_env, context, nrow_global, ncol_global, nrow_block, ncol_block, descriptor, first_p_pos, local_leading_dimension, template_fmstruct, square_blocks, force_block)
allocates and initializes a full matrix structure
Definition: cp_fm_struct.F:125
subroutine, public cp_fm_struct_release(fmstruct)
releases a full matrix structure
Definition: cp_fm_struct.F:320
represent a full matrix distributed on many processors
Definition: cp_fm_types.F:15
subroutine, public cp_fm_get_diag(matrix, diag)
returns the diagonal elements of a fm
Definition: cp_fm_types.F:570
subroutine, public cp_fm_to_fm_submat(msource, mtarget, nrow, ncol, s_firstrow, s_firstcol, t_firstrow, t_firstcol)
copy just a part ot the matrix
Definition: cp_fm_types.F:1473
subroutine, public cp_fm_get_submatrix(fm, target_m, start_row, start_col, n_rows, n_cols, transpose)
gets a submatrix of a full matrix op(target_m)(1:n_rows,1:n_cols) =fm(start_row:start_row+n_rows,...
Definition: cp_fm_types.F:901
subroutine, public cp_fm_create(matrix, matrix_struct, name, use_sp)
creates a new full matrix with the given structure
Definition: cp_fm_types.F:167
various routines to log and control the output. The idea is that decisions about where to log should ...
integer function, public cp_logger_get_default_io_unit(logger)
returns the unit nr for the ionode (-1 on all other processors) skips as well checks if the procs cal...
This is the start of a dbt_api, all publically needed functions are exported here....
Definition: dbt_api.F:17
Utilities for hfx and admm methods.
subroutine, public create_admm_xc_section(x_data, xc_section, admm_env)
This routine modifies the xc section depending on the potential type used for the HF exchange and the...
objects that represent the structure of input sections and the data contained in an input section
recursive subroutine, public section_vals_create(section_vals, section)
creates a object where to store the values of a section
subroutine, public section_vals_retain(section_vals)
retains the given section values (see doc/ReferenceCounting.html)
recursive type(section_vals_type) function, pointer, public section_vals_get_subs_vals(section_vals, subsection_name, i_rep_section, can_return_null)
returns the values of the requested subsection
subroutine, public section_vals_set_subs_vals(section_vals, subsection_name, new_section_vals, i_rep_section)
replaces of the requested subsection with the one given
recursive subroutine, public section_vals_release(section_vals)
releases the given object
Defines the basic variable types.
Definition: kinds.F:23
integer, parameter, public dp
Definition: kinds.F:34
Machine interface based on Fortran 2003 and POSIX.
Definition: machine.F:17
subroutine, public m_flush(lunit)
flushes units if the &GLOBAL flag is set accordingly
Definition: machine.F:106
Collection of simple mathematical functions and subroutines.
Definition: mathlib.F:15
subroutine, public complex_diag(matrix, eigenvectors, eigenvalues)
Diagonalizes a local complex Hermitian matrix using LAPACK. Based on cp_cfm_heevd.
Definition: mathlib.F:1752
Interface to the message passing library MPI.
basic linear algebra operations for full matrixes
Definition of physical constants:
Definition: physcon.F:68
real(kind=dp), parameter, public evolt
Definition: physcon.F:183
subroutine, public get_qs_env(qs_env, atomic_kind_set, qs_kind_set, cell, super_cell, cell_ref, use_ref_cell, kpoints, dft_control, mos, sab_orb, sab_all, qmmm, qmmm_periodic, sac_ae, sac_ppl, sac_lri, sap_ppnl, sab_vdw, sab_scp, sap_oce, sab_lrc, sab_se, sab_xtbe, sab_tbe, sab_core, sab_xb, sab_xtb_nonbond, sab_almo, sab_kp, sab_kp_nosym, particle_set, energy, force, matrix_h, matrix_h_im, matrix_ks, matrix_ks_im, matrix_vxc, run_rtp, rtp, matrix_h_kp, matrix_h_im_kp, matrix_ks_kp, matrix_ks_im_kp, matrix_vxc_kp, kinetic_kp, matrix_s_kp, matrix_w_kp, matrix_s_RI_aux_kp, matrix_s, matrix_s_RI_aux, matrix_w, matrix_p_mp2, matrix_p_mp2_admm, rho, rho_xc, pw_env, ewald_env, ewald_pw, active_space, mpools, input, para_env, blacs_env, scf_control, rel_control, kinetic, qs_charges, vppl, rho_core, rho_nlcc, rho_nlcc_g, ks_env, ks_qmmm_env, wf_history, scf_env, local_particles, local_molecules, distribution_2d, dbcsr_dist, molecule_kind_set, molecule_set, subsys, cp_subsys, oce, local_rho_set, rho_atom_set, task_list, task_list_soft, rho0_atom_set, rho0_mpole, rhoz_set, ecoul_1c, rho0_s_rs, rho0_s_gs, do_kpoints, has_unit_metric, requires_mo_derivs, mo_derivs, mo_loc_history, nkind, natom, nelectron_total, nelectron_spin, efield, neighbor_list_id, linres_control, xas_env, virial, cp_ddapc_env, cp_ddapc_ewald, outer_scf_history, outer_scf_ihistory, x_data, et_coupling, dftb_potential, results, se_taper, se_store_int_env, se_nddo_mpole, se_nonbond_env, admm_env, lri_env, lri_density, exstate_env, ec_env, dispersion_env, gcp_env, vee, rho_external, external_vxc, mask, mp2_env, bs_env, kg_env, WannierCentres, atprop, ls_scf_env, do_transport, transport_env, v_hartree_rspace, s_mstruct_changed, rho_changed, potential_changed, forces_up_to_date, mscfg_env, almo_scf_env, gradient_history, variable_history, embed_pot, spin_embed_pot, polar_env, mos_last_converged, rhs)
Get the QUICKSTEP environment.
routines that build the Kohn-Sham matrix (i.e calculate the coulomb and xc parts
Definition: qs_ks_methods.F:22
subroutine, public qs_ks_build_kohn_sham_matrix(qs_env, calculate_forces, just_energy, print_active, ext_ks_matrix)
routine where the real calculations are made: the KS matrix is calculated
Definition and initialisation of the mo data type.
Definition: qs_mo_types.F:22
subroutine, public duplicate_mo_set(mo_set_new, mo_set_old)
allocate a new mo_set, and copy the old data
Definition: qs_mo_types.F:149
subroutine, public deallocate_mo_set(mo_set)
Deallocate a wavefunction data structure.
Definition: qs_mo_types.F:352
subroutine, public get_mo_set(mo_set, maxocc, homo, lfomo, nao, nelectron, n_el_f, nmo, eigenvalues, occupation_numbers, mo_coeff, mo_coeff_b, uniform_occupation, kTS, mu, flexible_electron_count)
Get the components of a MO set data structure.
Definition: qs_mo_types.F:397
subroutine, public reassign_allocated_mos(mo_set_new, mo_set_old)
reassign an already allocated mo_set
Definition: qs_mo_types.F:109
All kind of helpful little routines.
Definition: util.F:14
pure integer function, dimension(2), public get_limit(m, n, me)
divide m entries into n parts, return size of part me
Definition: util.F:333
Second order perturbation correction to XAS_TDP spectra (i.e. shift)
subroutine, public gw2x_shift(donor_state, xas_tdp_env, xas_tdp_control, qs_env)
Computes the ionization potential using the GW2X method of Shigeta et. al. The result cam be used for...
subroutine, public get_soc_splitting(soc_shifts, donor_state, xas_tdp_env, xas_tdp_control, qs_env)
We try to compute the spin-orbit splitting via perturbation theory. We keep it \ cheap by only inculd...
All the kernel specific subroutines for XAS TDP calculations.
subroutine, public contract2_ao_to_domo(contr_int, op_type, donor_state, xas_tdp_env, xas_tdp_control, qs_env)
Contract the ri 3-center integrals stored in a tensor with repect to the donor MOs coeffs,...
subroutine, public ri_all_blocks_mm(contr_int, PQ)
Multiply all the blocks of a contracted RI integral (aI|P) by a matrix of type (P|....
Define XAS TDP control type and associated create, release, etc subroutines, as well as XAS TDP envir...
Definition: xas_tdp_types.F:13