(git:374b731)
Loading...
Searching...
No Matches
xas_tdp_correction.F
Go to the documentation of this file.
1!--------------------------------------------------------------------------------------------------!
2! CP2K: A general program to perform molecular dynamics simulations !
3! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4! !
5! SPDX-License-Identifier: GPL-2.0-or-later !
6!--------------------------------------------------------------------------------------------------!
7
8! **************************************************************************************************
9!> \brief Second order perturbation correction to XAS_TDP spectra (i.e. shift)
10!> \author A. Bussy (01.2020)
11! **************************************************************************************************
12
14 USE admm_types, ONLY: admm_type
16 USE bibliography, ONLY: bussy2021b,&
18 cite_reference
22 USE cp_cfm_types, ONLY: cp_cfm_create,&
36 USE cp_fm_types, ONLY: cp_fm_create,&
44 USE dbcsr_api, ONLY: &
45 dbcsr_copy, dbcsr_create, dbcsr_distribution_get, dbcsr_distribution_new, &
46 dbcsr_distribution_release, dbcsr_distribution_type, dbcsr_get_info, dbcsr_p_type, &
47 dbcsr_release, dbcsr_type
48 USE dbt_api, ONLY: &
49 dbt_contract, dbt_copy, dbt_copy_matrix_to_tensor, dbt_create, dbt_default_distvec, &
50 dbt_destroy, dbt_distribution_destroy, dbt_distribution_new, dbt_distribution_type, &
51 dbt_finalize, dbt_get_block, dbt_get_info, dbt_iterator_blocks_left, &
52 dbt_iterator_next_block, dbt_iterator_start, dbt_iterator_stop, dbt_iterator_type, &
53 dbt_pgrid_create, dbt_pgrid_destroy, dbt_pgrid_type, dbt_put_block, dbt_type
61 USE kinds, ONLY: dp
62 USE machine, ONLY: m_flush
63 USE mathlib, ONLY: complex_diag
66 USE physcon, ONLY: evolt
75 USE util, ONLY: get_limit
81
82!$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num
83#include "./base/base_uses.f90"
84
85 IMPLICIT NONE
86 PRIVATE
87
88 CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'xas_tdp_correction'
89
91
92CONTAINS
93
94! **************************************************************************************************
95!> \brief Computes the ionization potential using the GW2X method of Shigeta et. al. The result cam
96!> be used for XAS correction (shift) or XPS directly.
97!> \param donor_state ...
98!> \param xas_tdp_env ...
99!> \param xas_tdp_control ...
100!> \param qs_env ...
101! **************************************************************************************************
102 SUBROUTINE gw2x_shift(donor_state, xas_tdp_env, xas_tdp_control, qs_env)
103
104 TYPE(donor_state_type), POINTER :: donor_state
105 TYPE(xas_tdp_env_type), POINTER :: xas_tdp_env
106 TYPE(xas_tdp_control_type), POINTER :: xas_tdp_control
107 TYPE(qs_environment_type), POINTER :: qs_env
108
109 CHARACTER(len=*), PARAMETER :: routinen = 'GW2X_shift'
110
111 INTEGER :: ex_idx, exat, first_domo(2), handle, i, ido_mo, iloc, ilocat, ispin, jspin, &
112 locat, nao, natom, ndo_mo, nhomo(2), nlumo(2), nonloc, nspins, start_sgf
113 INTEGER, DIMENSION(:), POINTER :: nsgf_blk
114 LOGICAL :: pseudo_canonical
115 REAL(dp) :: og_hfx_frac
116 REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: contract_coeffs_backup
117 TYPE(admm_type), POINTER :: admm_env
118 TYPE(cp_1d_r_p_type), ALLOCATABLE, DIMENSION(:) :: homo_evals, lumo_evals
119 TYPE(cp_blacs_env_type), POINTER :: blacs_env
120 TYPE(cp_fm_struct_p_type), ALLOCATABLE, &
121 DIMENSION(:) :: all_struct, homo_struct, lumo_struct
122 TYPE(cp_fm_struct_type), POINTER :: hoho_struct, lulu_struct
123 TYPE(cp_fm_type) :: hoho_fock, hoho_work, homo_work, &
124 lulu_fock, lulu_work, lumo_work
125 TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:) :: all_coeffs, homo_coeffs, lumo_coeffs
126 TYPE(cp_fm_type), POINTER :: mo_coeff
127 TYPE(dbcsr_p_type), DIMENSION(:), POINTER :: dbcsr_work, fock_matrix, matrix_ks
128 TYPE(dbt_type), ALLOCATABLE, DIMENSION(:) :: ja_x, oi_y
129 TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :) :: mo_template
130 TYPE(dft_control_type), POINTER :: dft_control
131 TYPE(mo_set_type), DIMENSION(:), POINTER :: mos
132 TYPE(mp_para_env_type), POINTER :: para_env
133 TYPE(section_vals_type), POINTER :: xc_fun_empty, xc_fun_original, xc_section
134
135 NULLIFY (xc_fun_empty, xc_fun_original, xc_section, mos, dft_control, dbcsr_work, &
136 fock_matrix, matrix_ks, para_env, mo_coeff, blacs_env, nsgf_blk)
137
138 CALL cite_reference(shigeta2001)
139 CALL cite_reference(bussy2021b)
140
141 CALL timeset(routinen, handle)
142
143 !The GW2X correction we want to compute goes like this, where omega is the corrected epsilon_I:
144 !omega = eps_I + 0.5 * sum_ajk |<Ia||jk>|^2/(omega + eps_a - eps_j - eps_k)
145 ! + 0.5 * sum_jab |<Ij||ab>|^2/(omega + eps_j - eps_a - eps_b)
146 ! j,k denote occupied spin-orbitals and a,b denote virtual spin orbitals
147
148 !The strategy is the following (we assume restricted closed-shell):
149 !1) Get the LUMOs from xas_tdp_env
150 !2) Get the HOMOs from qs_env
151 !3) Compute or fetch the generalize Fock matric
152 !4) Diagonalize it in the subspace of HOMOs and LUMOs (or just take diagonal matrix elements)
153 !5) Build the full HOMO-LUMO basis that we will use and compute eigenvalues
154 !6) Iterate over GW2X steps to compute the self energy
155
156 !We implement 2 approaches => diagonal elements of Fock matrix with original MOs and
157 !pseudo-canonical MOs
158 pseudo_canonical = xas_tdp_control%pseudo_canonical
159
160 !Get donor state info
161 ndo_mo = donor_state%ndo_mo
162 nspins = 1; IF (xas_tdp_control%do_uks .OR. xas_tdp_control%do_roks) nspins = 2
163
164 !1) Get the LUMO coefficients from the xas_tdp_env, that have been precomputed
165
166 CALL get_qs_env(qs_env, matrix_ks=matrix_ks, mos=mos, para_env=para_env, &
167 blacs_env=blacs_env, natom=natom)
168
169 ALLOCATE (lumo_struct(nspins), lumo_coeffs(nspins))
170
171 DO ispin = 1, nspins
172 CALL get_mo_set(mos(ispin), homo=nhomo(ispin), nao=nao)
173 nlumo(ispin) = nao - nhomo(ispin)
174
175 CALL cp_fm_struct_create(lumo_struct(ispin)%struct, para_env=para_env, context=blacs_env, &
176 ncol_global=nlumo(ispin), nrow_global=nao)
177
178 CALL cp_fm_create(lumo_coeffs(ispin), lumo_struct(ispin)%struct)
179 CALL cp_fm_to_fm(xas_tdp_env%lumo_evecs(ispin), lumo_coeffs(ispin))
180 END DO
181
182 !2) get the HOMO coeffs. Reminder: keep all non-localized MOs + those localized on core atom
183 ! For this to work, it is assumed that the LOCALIZE keyword is used
184 ALLOCATE (homo_struct(nspins), homo_coeffs(nspins))
185
186 DO ispin = 1, nspins
187 nonloc = nhomo(ispin) - xas_tdp_control%n_search
188 exat = donor_state%at_index
189 ex_idx = minloc(abs(xas_tdp_env%ex_atom_indices - exat), 1)
190 locat = count(xas_tdp_env%mos_of_ex_atoms(:, ex_idx, ispin) == 1)
191
192 CALL cp_fm_struct_create(homo_struct(ispin)%struct, para_env=para_env, context=blacs_env, &
193 ncol_global=locat + nonloc, nrow_global=nao)
194 CALL cp_fm_create(homo_coeffs(ispin), homo_struct(ispin)%struct)
195
196 CALL get_mo_set(mos(ispin), mo_coeff=mo_coeff)
197 CALL cp_fm_to_fm_submat(mo_coeff, homo_coeffs(ispin), nrow=nao, ncol=nonloc, s_firstrow=1, &
198 s_firstcol=xas_tdp_control%n_search + 1, t_firstrow=1, t_firstcol=locat + 1)
199
200 !this bit is taken from xas_tdp_methods
201 ilocat = 1
202 DO iloc = 1, xas_tdp_control%n_search
203 IF (xas_tdp_env%mos_of_ex_atoms(iloc, ex_idx, ispin) == -1) cycle
204 CALL cp_fm_to_fm_submat(mo_coeff, homo_coeffs(ispin), nrow=nao, ncol=1, s_firstrow=1, &
205 s_firstcol=iloc, t_firstrow=1, t_firstcol=ilocat)
206 !keep track of donor MO index
207 IF (iloc == donor_state%mo_indices(1, ispin)) first_domo(ispin) = ilocat !first donor MO
208
209 ilocat = ilocat + 1
210 END DO
211 nhomo(ispin) = locat + nonloc
212 END DO
213
214 !3) Computing the generalized Fock Matrix, if not there already
215 IF (ASSOCIATED(xas_tdp_env%fock_matrix)) THEN
216 fock_matrix => xas_tdp_env%fock_matrix
217 ELSE
218 block
219 TYPE(mo_set_type), DIMENSION(:), ALLOCATABLE :: backup_mos
220
221 ALLOCATE (xas_tdp_env%fock_matrix(nspins))
222 fock_matrix => xas_tdp_env%fock_matrix
223
224 ! remove the xc_functionals and set HF fraction to 1
225 xc_section => section_vals_get_subs_vals(qs_env%input, "DFT%XC")
226 xc_fun_original => section_vals_get_subs_vals(xc_section, "XC_FUNCTIONAL")
227 CALL section_vals_retain(xc_fun_original)
228 CALL section_vals_create(xc_fun_empty, xc_fun_original%section)
229 CALL section_vals_set_subs_vals(xc_section, "XC_FUNCTIONAL", xc_fun_empty)
230 CALL section_vals_release(xc_fun_empty)
231 og_hfx_frac = qs_env%x_data(1, 1)%general_parameter%fraction
232 qs_env%x_data(:, :)%general_parameter%fraction = 1.0_dp
233
234 !In case of ADMM, we need to re-create the admm XC section for the new hfx_fraction
235 !We also need to make a backup of the MOs as theiy are modified
236 CALL get_qs_env(qs_env, dft_control=dft_control, admm_env=admm_env)
237 IF (dft_control%do_admm) THEN
238 IF (ASSOCIATED(admm_env%xc_section_primary)) CALL section_vals_release(admm_env%xc_section_primary)
239 IF (ASSOCIATED(admm_env%xc_section_aux)) CALL section_vals_release(admm_env%xc_section_aux)
240 CALL create_admm_xc_section(qs_env%x_data, xc_section, admm_env)
241
242 ALLOCATE (backup_mos(SIZE(mos)))
243 DO i = 1, SIZE(mos)
244 CALL duplicate_mo_set(backup_mos(i), mos(i))
245 END DO
246 END IF
247
248 ALLOCATE (dbcsr_work(nspins))
249 DO ispin = 1, nspins
250 ALLOCATE (dbcsr_work(ispin)%matrix)
251 CALL dbcsr_copy(dbcsr_work(ispin)%matrix, matrix_ks(ispin)%matrix)
252 END DO
253
254 !both spins treated internally
255 CALL qs_ks_build_kohn_sham_matrix(qs_env, calculate_forces=.false., just_energy=.false.)
256
257 DO ispin = 1, nspins
258 ALLOCATE (fock_matrix(ispin)%matrix)
259 CALL dbcsr_copy(fock_matrix(ispin)%matrix, matrix_ks(ispin)%matrix, name="FOCK MATRIX")
260 CALL dbcsr_release(matrix_ks(ispin)%matrix)
261 CALL dbcsr_copy(matrix_ks(ispin)%matrix, dbcsr_work(ispin)%matrix)
262 END DO
263 CALL dbcsr_deallocate_matrix_set(dbcsr_work)
264
265 !In case of ADMM, we want to correct for eigenvalues
266 IF (dft_control%do_admm) THEN
267 DO ispin = 1, nspins
268 CALL admm_correct_for_eigenvalues(ispin, admm_env, fock_matrix(ispin)%matrix)
269 END DO
270 END IF
271
272 !restore xc and HF fraction
273 CALL section_vals_set_subs_vals(xc_section, "XC_FUNCTIONAL", xc_fun_original)
274 CALL section_vals_release(xc_fun_original)
275 qs_env%x_data(:, :)%general_parameter%fraction = og_hfx_frac
276
277 IF (dft_control%do_admm) THEN
278 IF (ASSOCIATED(admm_env%xc_section_primary)) CALL section_vals_release(admm_env%xc_section_primary)
279 IF (ASSOCIATED(admm_env%xc_section_aux)) CALL section_vals_release(admm_env%xc_section_aux)
280 CALL create_admm_xc_section(qs_env%x_data, xc_section, admm_env)
281
282 DO i = 1, SIZE(mos)
283 CALL reassign_allocated_mos(mos(i), backup_mos(i))
284 CALL deallocate_mo_set(backup_mos(i))
285 END DO
286 DEALLOCATE (backup_mos)
287 END IF
288 END block
289 END IF
290
291 !4,5) Build pseudo-canonical MOs if needed + get related Fock matrix elements
292 ALLOCATE (all_struct(nspins), all_coeffs(nspins))
293 ALLOCATE (homo_evals(nspins), lumo_evals(nspins))
294 CALL dbcsr_get_info(matrix_ks(1)%matrix, row_blk_size=nsgf_blk)
295 ALLOCATE (contract_coeffs_backup(nsgf_blk(exat), nspins*ndo_mo))
296
297 DO ispin = 1, nspins
298 CALL cp_fm_struct_create(hoho_struct, para_env=para_env, context=blacs_env, &
299 ncol_global=nhomo(ispin), nrow_global=nhomo(ispin))
300 CALL cp_fm_struct_create(lulu_struct, para_env=para_env, context=blacs_env, &
301 ncol_global=nlumo(ispin), nrow_global=nlumo(ispin))
302
303 CALL cp_fm_create(hoho_work, hoho_struct)
304 CALL cp_fm_create(lulu_work, lulu_struct)
305 CALL cp_fm_create(homo_work, homo_struct(ispin)%struct)
306 CALL cp_fm_create(lumo_work, lumo_struct(ispin)%struct)
307
308 IF (pseudo_canonical) THEN
309 !That is where we rotate the MOs to make them pseudo canonical
310 !The eigenvalues we get from the diagonalization
311
312 !The Fock matrix in the HOMO subspace
313 CALL cp_fm_create(hoho_fock, hoho_struct)
314 NULLIFY (homo_evals(ispin)%array)
315 ALLOCATE (homo_evals(ispin)%array(nhomo(ispin)))
316 CALL cp_dbcsr_sm_fm_multiply(fock_matrix(ispin)%matrix, homo_coeffs(ispin), &
317 homo_work, ncol=nhomo(ispin))
318 CALL parallel_gemm('T', 'N', nhomo(ispin), nhomo(ispin), nao, 1.0_dp, homo_coeffs(ispin), &
319 homo_work, 0.0_dp, hoho_fock)
320
321 !diagonalize and get pseudo-canonical MOs
322 CALL choose_eigv_solver(hoho_fock, hoho_work, homo_evals(ispin)%array)
323 CALL parallel_gemm('N', 'N', nao, nhomo(ispin), nhomo(ispin), 1.0_dp, homo_coeffs(ispin), &
324 hoho_work, 0.0_dp, homo_work)
325 CALL cp_fm_to_fm(homo_work, homo_coeffs(ispin))
326
327 !overwrite the donor_state's contract coeffs with those
328 contract_coeffs_backup(:, (ispin - 1)*ndo_mo + 1:ispin*ndo_mo) = &
329 donor_state%contract_coeffs(:, (ispin - 1)*ndo_mo + 1:ispin*ndo_mo)
330 start_sgf = sum(nsgf_blk(1:exat - 1)) + 1
331 CALL cp_fm_get_submatrix(homo_coeffs(ispin), &
332 donor_state%contract_coeffs(:, (ispin - 1)*ndo_mo + 1:ispin*ndo_mo), &
333 start_row=start_sgf, start_col=first_domo(ispin), &
334 n_rows=nsgf_blk(exat), n_cols=ndo_mo)
335
336 !do the same for the pseudo-LUMOs
337 CALL cp_fm_create(lulu_fock, lulu_struct)
338 NULLIFY (lumo_evals(ispin)%array)
339 ALLOCATE (lumo_evals(ispin)%array(nlumo(ispin)))
340 CALL cp_dbcsr_sm_fm_multiply(fock_matrix(ispin)%matrix, lumo_coeffs(ispin), &
341 lumo_work, ncol=nlumo(ispin))
342 CALL parallel_gemm('T', 'N', nlumo(ispin), nlumo(ispin), nao, 1.0_dp, lumo_coeffs(ispin), &
343 lumo_work, 0.0_dp, lulu_fock)
344
345 !diagonalize and get pseudo-canonical MOs
346 CALL choose_eigv_solver(lulu_fock, lulu_work, lumo_evals(ispin)%array)
347 CALL parallel_gemm('N', 'N', nao, nlumo(ispin), nlumo(ispin), 1.0_dp, lumo_coeffs(ispin), &
348 lulu_work, 0.0_dp, lumo_work)
349 CALL cp_fm_to_fm(lumo_work, lumo_coeffs(ispin))
350
351 CALL cp_fm_release(lulu_fock)
352 CALL cp_fm_release(hoho_fock)
353
354 ELSE !using the generalized Fock matrix diagonal elements
355
356 !Compute their Fock matrix diagonal
357 ALLOCATE (homo_evals(ispin)%array(nhomo(ispin)))
358 CALL cp_dbcsr_sm_fm_multiply(fock_matrix(ispin)%matrix, homo_coeffs(ispin), &
359 homo_work, ncol=nhomo(ispin))
360 CALL parallel_gemm('T', 'N', nhomo(ispin), nhomo(ispin), nao, 1.0_dp, homo_coeffs(ispin), &
361 homo_work, 0.0_dp, hoho_work)
362 CALL cp_fm_get_diag(hoho_work, homo_evals(ispin)%array)
363
364 ALLOCATE (lumo_evals(ispin)%array(nlumo(ispin)))
365 CALL cp_dbcsr_sm_fm_multiply(fock_matrix(ispin)%matrix, lumo_coeffs(ispin), &
366 lumo_work, ncol=nlumo(ispin))
367 CALL parallel_gemm('T', 'N', nlumo(ispin), nlumo(ispin), nao, 1.0_dp, lumo_coeffs(ispin), &
368 lumo_work, 0.0_dp, lulu_work)
369 CALL cp_fm_get_diag(lulu_work, lumo_evals(ispin)%array)
370
371 END IF
372 CALL cp_fm_release(homo_work)
373 CALL cp_fm_release(hoho_work)
374 CALL cp_fm_struct_release(hoho_struct)
375 CALL cp_fm_release(lumo_work)
376 CALL cp_fm_release(lulu_work)
377 CALL cp_fm_struct_release(lulu_struct)
378
379 !Put back homo and lumo coeffs together, to fit tensor structure
380 CALL cp_fm_struct_create(all_struct(ispin)%struct, para_env=para_env, context=blacs_env, &
381 ncol_global=nhomo(ispin) + nlumo(ispin), nrow_global=nao)
382 CALL cp_fm_create(all_coeffs(ispin), all_struct(ispin)%struct)
383 CALL cp_fm_to_fm(homo_coeffs(ispin), all_coeffs(ispin), ncol=nhomo(ispin), &
384 source_start=1, target_start=1)
385 CALL cp_fm_to_fm(lumo_coeffs(ispin), all_coeffs(ispin), ncol=nlumo(ispin), &
386 source_start=1, target_start=nhomo(ispin) + 1)
387
388 END DO !ispin
389
390 !get semi-contracted tensor (AOs to MOs, keep RI uncontracted)
391 CALL contract_aos_to_mos(ja_x, oi_y, mo_template, all_coeffs, nhomo, nlumo, &
392 donor_state, xas_tdp_env, xas_tdp_control, qs_env)
393
394 !intermediate clean-up
395 DO ispin = 1, nspins
396 CALL cp_fm_release(all_coeffs(ispin))
397 CALL cp_fm_release(homo_coeffs(ispin))
398 CALL cp_fm_release(lumo_coeffs(ispin))
399 CALL cp_fm_struct_release(all_struct(ispin)%struct)
400 CALL cp_fm_struct_release(lumo_struct(ispin)%struct)
401 CALL cp_fm_struct_release(homo_struct(ispin)%struct)
402 END DO
403
404 !6) GW2X iterations
405
406 IF (nspins == 1) THEN
407 !restricted-closed shell: only alpha spin
408 CALL gw2x_rcs_iterations(first_domo(1), ja_x(1), oi_y, mo_template(1, 1), homo_evals(1)%array, &
409 lumo_evals(1)%array, donor_state, xas_tdp_control, qs_env)
410 ELSE
411 !open-shell, need both spins
412 CALL gw2x_os_iterations(first_domo, ja_x, oi_y, mo_template, homo_evals, lumo_evals, &
413 donor_state, xas_tdp_control, qs_env)
414 END IF
415
416 !restore proper contract_coeffs
417 IF (pseudo_canonical) THEN
418 donor_state%contract_coeffs(:, :) = contract_coeffs_backup(:, :)
419 END IF
420
421 !Final clean-up
422 DO ido_mo = 1, nspins*ndo_mo
423 CALL dbt_destroy(oi_y(ido_mo))
424 END DO
425 DO ispin = 1, nspins
426 CALL dbt_destroy(ja_x(ispin))
427 DEALLOCATE (homo_evals(ispin)%array)
428 DEALLOCATE (lumo_evals(ispin)%array)
429 DO jspin = 1, nspins
430 CALL dbt_destroy(mo_template(ispin, jspin))
431 END DO
432 END DO
433 DEALLOCATE (oi_y, homo_evals, lumo_evals)
434
435 CALL timestop(handle)
436
437 END SUBROUTINE gw2x_shift
438
439! **************************************************************************************************
440!> \brief Preforms the GW2X iterations in the restricted-closed shell formalism according to the
441!> Newton-Raphson method
442!> \param first_domo index of the first core donor MO to consider
443!> \param ja_X semi-contracted tensor with j: occupied MO, a: virtual MO, X: RI basis element
444!> \param oI_Y semi-contracted tensors with o: all MOs, I donor core MO, Y: RI basis element
445!> \param mo_template tensor template for fully MO contracted tensor
446!> \param homo_evals ...
447!> \param lumo_evals ...
448!> \param donor_state ...
449!> \param xas_tdp_control ...
450!> \param qs_env ...
451! **************************************************************************************************
452 SUBROUTINE gw2x_rcs_iterations(first_domo, ja_X, oI_Y, mo_template, homo_evals, lumo_evals, &
453 donor_state, xas_tdp_control, qs_env)
454
455 INTEGER, INTENT(IN) :: first_domo
456 TYPE(dbt_type), INTENT(inout) :: ja_x
457 TYPE(dbt_type), DIMENSION(:), INTENT(inout) :: oi_y
458 TYPE(dbt_type), INTENT(inout) :: mo_template
459 REAL(dp), DIMENSION(:), INTENT(IN) :: homo_evals, lumo_evals
460 TYPE(donor_state_type), POINTER :: donor_state
461 TYPE(xas_tdp_control_type), POINTER :: xas_tdp_control
462 TYPE(qs_environment_type), POINTER :: qs_env
463
464 CHARACTER(len=*), PARAMETER :: routinen = 'GW2X_rcs_iterations'
465
466 INTEGER :: batch_size, bounds_1d(2), bounds_2d(2, 2), handle, i, ibatch, ido_mo, iloop, &
467 max_iter, nbatch_occ, nbatch_virt, nblk_occ, nblk_virt, nblks(3), ndo_mo, nhomo, nlumo, &
468 occ_bo(2), output_unit, tmp_sum, virt_bo(2)
469 INTEGER, ALLOCATABLE, DIMENSION(:) :: mo_blk_size
470 REAL(dp) :: c_os, c_ss, dg, diff, ds1, ds2, eps_i, &
471 eps_iter, g, omega_k, parts(4), s1, s2
472 TYPE(dbt_type) :: aj_ib, aj_ib_diff, aj_x, ja_ik, &
473 ja_ik_diff
474 TYPE(mp_para_env_type), POINTER :: para_env
475
476 CALL timeset(routinen, handle)
477
478 eps_iter = xas_tdp_control%gw2x_eps
479 max_iter = xas_tdp_control%max_gw2x_iter
480 c_os = xas_tdp_control%c_os
481 c_ss = xas_tdp_control%c_ss
482 batch_size = xas_tdp_control%batch_size
483
484 ndo_mo = donor_state%ndo_mo
485 output_unit = cp_logger_get_default_io_unit()
486
487 nhomo = SIZE(homo_evals)
488 nlumo = SIZE(lumo_evals)
489
490 CALL get_qs_env(qs_env, para_env=para_env)
491
492 !We use the Newton-Raphson method to find the zero of the function:
493 !g(omega) = eps_I - omega + mp2 terms, dg(omega) = -1 + d/d_omega (mp2 terms)
494 !We simply compute at each iteration: omega_k+1 = omega_k - g(omega_k)/dg(omega_k)
495
496 !need transposed tensor of (ja|X) for optimal contraction scheme (s.t. (aj|X) block is on same
497 !processor as (ja|X))
498 CALL dbt_create(ja_x, aj_x)
499 CALL dbt_copy(ja_x, aj_x, order=[2, 1, 3])
500
501 !split the MO blocks into batches for memory friendly batched contraction
502 !huge dense tensors never need to be stored
503 CALL dbt_get_info(ja_x, nblks_total=nblks)
504 ALLOCATE (mo_blk_size(nblks(1)))
505 CALL dbt_get_info(ja_x, blk_size_1=mo_blk_size)
506
507 tmp_sum = 0
508 DO i = 1, nblks(1)
509 tmp_sum = tmp_sum + mo_blk_size(i)
510 IF (tmp_sum == nhomo) THEN
511 nblk_occ = i
512 nblk_virt = nblks(1) - i
513 EXIT
514 END IF
515 END DO
516 nbatch_occ = max(1, nblk_occ/batch_size)
517 nbatch_virt = max(1, nblk_virt/batch_size)
518
519 !Loop over donor_states
520 DO ido_mo = 1, ndo_mo
521 IF (output_unit > 0) THEN
522 WRITE (unit=output_unit, fmt="(/,T5,A,I2,A,I4,A,/,T5,A)") &
523 "- GW2X correction for donor MO with spin ", 1, &
524 " and MO index ", donor_state%mo_indices(ido_mo, 1), ":", &
525 " iteration convergence (eV)"
526 CALL m_flush(output_unit)
527 END IF
528
529 !starting values
530 eps_i = homo_evals(first_domo + ido_mo - 1)
531 omega_k = eps_i
532 iloop = 0
533 diff = 2.0_dp*eps_iter
534
535 DO WHILE (abs(diff) > eps_iter)
536 iloop = iloop + 1
537
538 !Compute the mp2 terms and their first derivative
539 parts = 0.0_dp
540
541 !We do batched contraction for (ja|Ik) and (ja|Ib) to never have to carry the full tensor
542 DO ibatch = 1, nbatch_occ
543
544 occ_bo = get_limit(nblk_occ, nbatch_occ, ibatch - 1)
545 bounds_1d = [sum(mo_blk_size(1:occ_bo(1) - 1)) + 1, sum(mo_blk_size(1:occ_bo(2)))]
546
547 CALL dbt_create(mo_template, ja_ik)
548 CALL dbt_contract(alpha=1.0_dp, tensor_1=ja_x, tensor_2=oi_y(ido_mo), &
549 beta=0.0_dp, tensor_3=ja_ik, contract_1=[3], &
550 notcontract_1=[1, 2], contract_2=[2], notcontract_2=[1], &
551 map_1=[1, 2], map_2=[3], bounds_3=bounds_1d)
552
553 !opposite-spin contribution
554 CALL calc_os_oov_contrib(parts(1), parts(2), ja_ik, homo_evals, lumo_evals, homo_evals, &
555 omega_k, c_os, nhomo)
556
557 bounds_2d(:, 2) = bounds_1d
558 bounds_2d(1, 1) = nhomo + 1
559 bounds_2d(2, 1) = nhomo + nlumo
560
561 !same-spin contribution. Contraction only neede if c_ss != 0
562 !directly compute the difference (ja|Ik) - (ka|Ij)
563 IF (abs(c_ss) > epsilon(1.0_dp)) THEN
564
565 CALL dbt_create(ja_ik, ja_ik_diff, map1_2d=[1], map2_2d=[2, 3])
566 CALL dbt_copy(ja_ik, ja_ik_diff, move_data=.true.)
567
568 CALL dbt_contract(alpha=-1.0_dp, tensor_1=oi_y(ido_mo), tensor_2=aj_x, &
569 beta=1.0_dp, tensor_3=ja_ik_diff, contract_1=[2], &
570 notcontract_1=[1], contract_2=[3], notcontract_2=[1, 2], &
571 map_1=[1], map_2=[2, 3], bounds_2=[1, nhomo], bounds_3=bounds_2d)
572
573 CALL calc_ss_oov_contrib(parts(1), parts(2), ja_ik_diff, homo_evals, lumo_evals, omega_k, c_ss)
574
575 CALL dbt_destroy(ja_ik_diff)
576 END IF !c_ss != 0
577
578 CALL dbt_destroy(ja_ik)
579 END DO
580
581 DO ibatch = 1, nbatch_virt
582
583 virt_bo = get_limit(nblk_virt, nbatch_virt, ibatch - 1)
584 bounds_1d = [sum(mo_blk_size(1:nblk_occ + virt_bo(1) - 1)) + 1, &
585 sum(mo_blk_size(1:nblk_occ + virt_bo(2)))]
586
587 CALL dbt_create(mo_template, aj_ib)
588 CALL dbt_contract(alpha=1.0_dp, tensor_1=aj_x, tensor_2=oi_y(ido_mo), &
589 beta=0.0_dp, tensor_3=aj_ib, contract_1=[3], &
590 notcontract_1=[1, 2], contract_2=[2], notcontract_2=[1], &
591 map_1=[1, 2], map_2=[3], bounds_3=bounds_1d)
592
593 !opposite-spin contribution
594 CALL calc_os_ovv_contrib(parts(3), parts(4), aj_ib, lumo_evals, homo_evals, lumo_evals, &
595 omega_k, c_os, nhomo, nhomo)
596
597 !same-spin contribution, only if c_ss is not 0
598 !directly compute the difference (aj|Ib) - (bj|Ia)
599 IF (abs(c_ss) > epsilon(1.0_dp)) THEN
600 bounds_2d(1, 1) = 1
601 bounds_2d(2, 1) = nhomo
602 bounds_2d(:, 2) = bounds_1d
603
604 CALL dbt_create(aj_ib, aj_ib_diff, map1_2d=[1], map2_2d=[2, 3])
605 CALL dbt_copy(aj_ib, aj_ib_diff, move_data=.true.)
606
607 CALL dbt_contract(alpha=-1.0_dp, tensor_1=oi_y(ido_mo), tensor_2=ja_x, &
608 beta=1.0_dp, tensor_3=aj_ib_diff, contract_1=[2], &
609 notcontract_1=[1], contract_2=[3], notcontract_2=[1, 2], &
610 map_1=[1], map_2=[2, 3], &
611 bounds_2=[nhomo + 1, nhomo + nlumo], bounds_3=bounds_2d)
612
613 CALL calc_ss_ovv_contrib(parts(3), parts(4), aj_ib_diff, homo_evals, lumo_evals, omega_k, c_ss)
614
615 CALL dbt_destroy(aj_ib_diff)
616 END IF ! c_ss not 0
617
618 CALL dbt_destroy(aj_ib)
619 END DO
620
621 CALL para_env%sum(parts)
622 s1 = parts(1); ds1 = parts(2)
623 s2 = parts(3); ds2 = parts(4)
624
625 !evaluate g and its derivative
626 g = eps_i - omega_k + s1 + s2
627 dg = -1.0_dp + ds1 + ds2
628
629 !compute the diff to the new step
630 diff = -g/dg
631
632 !and the new omega
633 omega_k = omega_k + diff
634 diff = diff*evolt
635
636 IF (output_unit > 0) THEN
637 WRITE (unit=output_unit, fmt="(T21,I18,F32.6)") &
638 iloop, diff
639 CALL m_flush(output_unit)
640 END IF
641
642 IF (iloop > max_iter) THEN
643 cpwarn("GW2X iteration not converged.")
644 EXIT
645 END IF
646 END DO !while loop on eps_iter
647
648 !compute the shift and update donor_state
649 donor_state%gw2x_evals(ido_mo, 1) = omega_k
650
651 IF (output_unit > 0) THEN
652 WRITE (unit=output_unit, fmt="(/T7,A,F11.6,/,T5,A,F11.6)") &
653 "Final GW2X shift for this donor MO (eV):", &
654 (donor_state%energy_evals(ido_mo, 1) - omega_k)*evolt
655 END IF
656
657 END DO !ido_mo
658
659 CALL dbt_destroy(aj_x)
660
661 CALL timestop(handle)
662
663 END SUBROUTINE gw2x_rcs_iterations
664
665! **************************************************************************************************
666!> \brief Preforms the GW2X iterations in the open-shell shell formalism according to the
667!> Newton-Raphson method
668!> \param first_domo index of the first core donor MO to consider, for each spin
669!> \param ja_X semi-contracted tensors with j: occupied MO, a: virtual MO, X: RI basis element
670!> \param oI_Y semi-contracted tensors with o: all MOs, I donor core MO, Y: RI basis element
671!> \param mo_template tensor template for fully MO contracted tensor, for each spin combination
672!> \param homo_evals ...
673!> \param lumo_evals ...
674!> \param donor_state ...
675!> \param xas_tdp_control ...
676!> \param qs_env ...
677! **************************************************************************************************
678 SUBROUTINE gw2x_os_iterations(first_domo, ja_X, oI_Y, mo_template, homo_evals, lumo_evals, &
679 donor_state, xas_tdp_control, qs_env)
680
681 INTEGER, INTENT(IN) :: first_domo(2)
682 TYPE(dbt_type), DIMENSION(:), INTENT(inout) :: ja_x, oi_y
683 TYPE(dbt_type), DIMENSION(:, :), INTENT(inout) :: mo_template
684 TYPE(cp_1d_r_p_type), DIMENSION(:), INTENT(in) :: homo_evals, lumo_evals
685 TYPE(donor_state_type), POINTER :: donor_state
686 TYPE(xas_tdp_control_type), POINTER :: xas_tdp_control
687 TYPE(qs_environment_type), POINTER :: qs_env
688
689 CHARACTER(len=*), PARAMETER :: routinen = 'GW2X_os_iterations'
690
691 INTEGER :: batch_size, bounds_1d(2), bounds_2d(2, 2), handle, i, ibatch, ido_mo, iloop, &
692 ispin, max_iter, nbatch_occ, nbatch_virt, nblk_occ, nblk_virt, nblks(3), ndo_mo, &
693 nhomo(2), nlumo(2), nspins, occ_bo(2), other_spin, output_unit, tmp_sum, virt_bo(2)
694 INTEGER, ALLOCATABLE, DIMENSION(:) :: mo_blk_size
695 REAL(dp) :: c_os, c_ss, dg, diff, ds1, ds2, eps_i, &
696 eps_iter, g, omega_k, parts(4), s1, s2
697 TYPE(dbt_type) :: aj_ib, aj_ib_diff, ja_ik, ja_ik_diff
698 TYPE(dbt_type), ALLOCATABLE, DIMENSION(:) :: aj_x
699 TYPE(mp_para_env_type), POINTER :: para_env
700
701 CALL timeset(routinen, handle)
702
703 eps_iter = xas_tdp_control%gw2x_eps
704 max_iter = xas_tdp_control%max_gw2x_iter
705 c_os = xas_tdp_control%c_os
706 c_ss = xas_tdp_control%c_ss
707 batch_size = xas_tdp_control%batch_size
708
709 nspins = 2
710 ndo_mo = donor_state%ndo_mo
711 output_unit = cp_logger_get_default_io_unit()
712
713 DO ispin = 1, nspins
714 nhomo(ispin) = SIZE(homo_evals(ispin)%array)
715 nlumo(ispin) = SIZE(lumo_evals(ispin)%array)
716 END DO
717
718 CALL get_qs_env(qs_env, para_env=para_env)
719
720 !We use the Newton-Raphson method to find the zero of the function:
721 !g(omega) = eps_I - omega + mp2 terms, dg(omega) = -1 + d/d_omega (mp2 terms)
722 !We simply compute at each iteration: omega_k+1 = omega_k - g(omega_k)/dg(omega_k)
723
724 ALLOCATE (aj_x(2))
725 DO ispin = 1, nspins
726
727 !need transposed tensor of (ja|X) for optimal contraction scheme,
728 !s.t. (aj|X) block is on same processor as (ja|X)) and differences can be taken
729 CALL dbt_create(ja_x(ispin), aj_x(ispin))
730 CALL dbt_copy(ja_x(ispin), aj_x(ispin), order=[2, 1, 3])
731
732 END DO ! ispin
733 DO ispin = 1, nspins
734
735 other_spin = 3 - ispin
736
737 !split the MO blocks into batches for memory friendly batched contraction
738 !huge dense tensors never need to be stored. Split MOs for the current spin
739 CALL dbt_get_info(ja_x(ispin), nblks_total=nblks)
740 ALLOCATE (mo_blk_size(nblks(1)))
741 CALL dbt_get_info(ja_x(ispin), blk_size_1=mo_blk_size)
742
743 tmp_sum = 0
744 DO i = 1, nblks(1)
745 tmp_sum = tmp_sum + mo_blk_size(i)
746 IF (tmp_sum == nhomo(ispin)) THEN
747 nblk_occ = i
748 nblk_virt = nblks(1) - i
749 EXIT
750 END IF
751 END DO
752 nbatch_occ = max(1, nblk_occ/batch_size)
753 nbatch_virt = max(1, nblk_virt/batch_size)
754
755 !Loop over donor_states of the current spin
756 DO ido_mo = 1, ndo_mo
757 IF (output_unit > 0) THEN
758 WRITE (unit=output_unit, fmt="(/,T5,A,I2,A,I4,A,/,T5,A)") &
759 "- GW2X correction for donor MO with spin ", ispin, &
760 " and MO index ", donor_state%mo_indices(ido_mo, ispin), ":", &
761 " iteration convergence (eV)"
762 CALL m_flush(output_unit)
763 END IF
764
765 !starting values
766 eps_i = homo_evals(ispin)%array(first_domo(ispin) + ido_mo - 1)
767 omega_k = eps_i
768 iloop = 0
769 diff = 2.0_dp*eps_iter
770
771 DO WHILE (abs(diff) > eps_iter)
772 iloop = iloop + 1
773
774 !Compute the mp2 terms and their first derivative
775 parts = 0.0_dp
776
777 !We do batched contraction for (ja|Ik) and (ja|Ib) to never have to carry the full tensor
778 DO ibatch = 1, nbatch_occ
779
780 !opposite-spin contribution, i.e. (j_beta a_beta| I_alpha k_alpha) and vice-versa
781 !do the batching along k because same spin as donor MO
782 occ_bo = get_limit(nblk_occ, nbatch_occ, ibatch - 1)
783 bounds_1d = [sum(mo_blk_size(1:occ_bo(1) - 1)) + 1, sum(mo_blk_size(1:occ_bo(2)))]
784
785 CALL dbt_create(mo_template(other_spin, ispin), ja_ik)
786 CALL dbt_contract(alpha=1.0_dp, tensor_1=ja_x(other_spin), &
787 tensor_2=oi_y((ispin - 1)*ndo_mo + ido_mo), &
788 beta=0.0_dp, tensor_3=ja_ik, contract_1=[3], &
789 notcontract_1=[1, 2], contract_2=[2], notcontract_2=[1], &
790 map_1=[1, 2], map_2=[3], bounds_3=bounds_1d)
791
792 CALL calc_os_oov_contrib(parts(1), parts(2), ja_ik, homo_evals(other_spin)%array, &
793 lumo_evals(other_spin)%array, homo_evals(ispin)%array, &
794 omega_k, c_os, nhomo(other_spin))
795
796 CALL dbt_destroy(ja_ik)
797
798 !same-spin contribution, need to compute (ja|Ik) - (ka|Ij), all with the current spin
799 !skip if c_ss == 0
800 IF (abs(c_ss) > epsilon(1.0_dp)) THEN
801
802 !same batching as opposite spin
803 CALL dbt_create(mo_template(ispin, ispin), ja_ik)
804 CALL dbt_contract(alpha=1.0_dp, tensor_1=ja_x(ispin), &
805 tensor_2=oi_y((ispin - 1)*ndo_mo + ido_mo), &
806 beta=0.0_dp, tensor_3=ja_ik, contract_1=[3], &
807 notcontract_1=[1, 2], contract_2=[2], notcontract_2=[1], &
808 map_1=[1, 2], map_2=[3], bounds_3=bounds_1d)
809
810 bounds_2d(:, 2) = bounds_1d
811 bounds_2d(1, 1) = nhomo(ispin) + 1
812 bounds_2d(2, 1) = nhomo(ispin) + nlumo(ispin)
813
814 !the tensor difference is directly taken here
815 CALL dbt_create(ja_ik, ja_ik_diff, map1_2d=[1], map2_2d=[2, 3])
816 CALL dbt_copy(ja_ik, ja_ik_diff, move_data=.true.)
817
818 CALL dbt_contract(alpha=-1.0_dp, tensor_1=oi_y((ispin - 1)*ndo_mo + ido_mo), &
819 tensor_2=aj_x(ispin), beta=1.0_dp, tensor_3=ja_ik_diff, &
820 contract_1=[2], notcontract_1=[1], contract_2=[3], notcontract_2=[1, 2], &
821 map_1=[1], map_2=[2, 3], bounds_2=[1, nhomo(ispin)], bounds_3=bounds_2d)
822
823 CALL calc_ss_oov_contrib(parts(1), parts(2), ja_ik_diff, homo_evals(ispin)%array, &
824 lumo_evals(ispin)%array, omega_k, c_ss)
825
826 CALL dbt_destroy(ja_ik_diff)
827 CALL dbt_destroy(ja_ik)
828 END IF !c_ss !!= 0
829
830 END DO
831
832 DO ibatch = 1, nbatch_virt
833
834 !opposite-spin contribution, i.e. (a_beta j_beta| I_alpha b_alpha) and vice-versa
835 !do the batching along b because same spin as donor MO
836 virt_bo = get_limit(nblk_virt, nbatch_virt, ibatch - 1)
837 bounds_1d = [sum(mo_blk_size(1:nblk_occ + virt_bo(1) - 1)) + 1, &
838 sum(mo_blk_size(1:nblk_occ + virt_bo(2)))]
839
840 CALL dbt_create(mo_template(other_spin, ispin), aj_ib)
841 CALL dbt_contract(alpha=1.0_dp, tensor_1=aj_x(other_spin), &
842 tensor_2=oi_y((ispin - 1)*ndo_mo + ido_mo), &
843 beta=0.0_dp, tensor_3=aj_ib, contract_1=[3], &
844 notcontract_1=[1, 2], contract_2=[2], notcontract_2=[1], &
845 map_1=[1, 2], map_2=[3], bounds_3=bounds_1d)
846
847 CALL calc_os_ovv_contrib(parts(3), parts(4), aj_ib, lumo_evals(other_spin)%array, &
848 homo_evals(other_spin)%array, lumo_evals(ispin)%array, &
849 omega_k, c_os, nhomo(other_spin), nhomo(ispin))
850
851 CALL dbt_destroy(aj_ib)
852
853 !same-spin contribution, need to compute (aj|Ib) - (bj|Ia), all with the current spin
854 !skip if c_ss == 0
855 IF (abs(c_ss) > epsilon(1.0_dp)) THEN
856
857 !same batching as opposite spin
858 CALL dbt_create(mo_template(ispin, ispin), aj_ib)
859 CALL dbt_contract(alpha=1.0_dp, tensor_1=aj_x(ispin), &
860 tensor_2=oi_y((ispin - 1)*ndo_mo + ido_mo), &
861 beta=0.0_dp, tensor_3=aj_ib, contract_1=[3], &
862 notcontract_1=[1, 2], contract_2=[2], notcontract_2=[1], &
863 map_1=[1, 2], map_2=[3], bounds_3=bounds_1d)
864
865 bounds_2d(1, 1) = 1
866 bounds_2d(2, 1) = nhomo(ispin)
867 bounds_2d(:, 2) = bounds_1d
868
869 CALL dbt_create(aj_ib, aj_ib_diff, map1_2d=[1], map2_2d=[2, 3])
870 CALL dbt_copy(aj_ib, aj_ib_diff, move_data=.true.)
871
872 CALL dbt_contract(alpha=-1.0_dp, tensor_1=oi_y((ispin - 1)*ndo_mo + ido_mo), &
873 tensor_2=ja_x(ispin), beta=1.0_dp, tensor_3=aj_ib_diff, &
874 contract_1=[2], notcontract_1=[1], contract_2=[3], &
875 notcontract_2=[1, 2], map_1=[1], map_2=[2, 3], &
876 bounds_2=[nhomo(ispin) + 1, nhomo(ispin) + nlumo(ispin)], &
877 bounds_3=bounds_2d)
878
879 CALL calc_ss_ovv_contrib(parts(3), parts(4), aj_ib_diff, homo_evals(ispin)%array, &
880 lumo_evals(ispin)%array, omega_k, c_ss)
881
882 CALL dbt_destroy(aj_ib_diff)
883 CALL dbt_destroy(aj_ib)
884 END IF ! c_ss not 0
885
886 END DO
887
888 CALL para_env%sum(parts)
889 s1 = parts(1); ds1 = parts(2)
890 s2 = parts(3); ds2 = parts(4)
891
892 !evaluate g and its derivative
893 g = eps_i - omega_k + s1 + s2
894 dg = -1.0_dp + ds1 + ds2
895
896 !compute the diff to the new step
897 diff = -g/dg
898
899 !and the new omega
900 omega_k = omega_k + diff
901 diff = diff*evolt
902
903 IF (output_unit > 0) THEN
904 WRITE (unit=output_unit, fmt="(T21,I18,F32.6)") &
905 iloop, diff
906 CALL m_flush(output_unit)
907 END IF
908
909 IF (iloop > max_iter) THEN
910 cpwarn("GW2X iteration not converged.")
911 EXIT
912 END IF
913 END DO !while loop on eps_iter
914
915 !compute the shift and update donor_state
916 donor_state%gw2x_evals(ido_mo, ispin) = omega_k
917
918 IF (output_unit > 0) THEN
919 WRITE (unit=output_unit, fmt="(/T7,A,F11.6,/,T5,A,F11.6)") &
920 "Final GW2X shift for this donor MO (eV):", &
921 (donor_state%energy_evals(ido_mo, ispin) - omega_k)*evolt
922 END IF
923
924 END DO !ido_mo
925
926 DEALLOCATE (mo_blk_size)
927 END DO ! ispin
928
929 DO ispin = 1, nspins
930 CALL dbt_destroy(aj_x(ispin))
931 END DO
932
933 CALL timestop(handle)
934
935 END SUBROUTINE gw2x_os_iterations
936
937! **************************************************************************************************
938!> \brief Takes the 3-center integrals from the ri_ex_3c tensor and returns a full tensor. Since
939!> ri_ex_3c is only half filled because of symmetry, we have to add the transpose
940!> and scale the diagonal blocks by 0.5
941!> \param pq_X the full (desymmetrized) tensor containing the (pq|X) exchange integrals, in a new
942!> 3d distribution and optimized block sizes
943!> \param exat index of current excited atom
944!> \param xas_tdp_env ...
945!> \param qs_env ...
946! **************************************************************************************************
947 SUBROUTINE get_full_pqx_from_3c_ex(pq_X, exat, xas_tdp_env, qs_env)
948
949 TYPE(dbt_type), INTENT(INOUT) :: pq_x
950 INTEGER, INTENT(IN) :: exat
951 TYPE(xas_tdp_env_type), POINTER :: xas_tdp_env
952 TYPE(qs_environment_type), POINTER :: qs_env
953
954 INTEGER :: i, ind(3), natom, nblk_ri, nsgf_x
955 INTEGER, ALLOCATABLE, DIMENSION(:) :: orb_blk_size, proc_dist_1, proc_dist_2, &
956 proc_dist_3
957 INTEGER, DIMENSION(3) :: pdims
958 LOGICAL :: found
959 REAL(dp), ALLOCATABLE, DIMENSION(:, :, :) :: pblock
960 TYPE(dbt_distribution_type) :: t_dist
961 TYPE(dbt_iterator_type) :: iter
962 TYPE(dbt_pgrid_type) :: t_pgrid
963 TYPE(dbt_type) :: pq_x_tmp, work
964 TYPE(mp_para_env_type), POINTER :: para_env
965
966 NULLIFY (para_env)
967
968 !create work tensor with same 2D dist as pq_X, but only keep excited atom along RI direction
969 CALL get_qs_env(qs_env, para_env=para_env, natom=natom)
970 CALL dbt_get_info(xas_tdp_env%ri_3c_ex, pdims=pdims)
971 nsgf_x = SIZE(xas_tdp_env%ri_inv_ex, 1)
972 nblk_ri = 1
973
974 CALL dbt_pgrid_create(para_env, pdims, t_pgrid)
975 ALLOCATE (proc_dist_1(natom), proc_dist_2(natom), orb_blk_size(natom))
976 CALL dbt_get_info(xas_tdp_env%ri_3c_ex, proc_dist_1=proc_dist_1, proc_dist_2=proc_dist_2, &
977 blk_size_1=orb_blk_size)
978 CALL dbt_distribution_new(t_dist, t_pgrid, nd_dist_1=proc_dist_1, nd_dist_2=proc_dist_2, &
979 nd_dist_3=[(0, i=1, nblk_ri)])
980
981 CALL dbt_create(work, name="(pq|X)", dist=t_dist, map1_2d=[1], map2_2d=[2, 3], &
982 blk_size_1=orb_blk_size, blk_size_2=orb_blk_size, blk_size_3=[nsgf_x])
983 CALL dbt_distribution_destroy(t_dist)
984
985 !dist of 3c_ex and work match, can simply copy blocks over. Diagonal with factor 0.5
986
987!$OMP PARALLEL DEFAULT(NONE) SHARED(xas_tdp_env,exat,work,orb_blk_size,nsgf_x) &
988!$OMP PRIVATE(iter,ind,pblock,found)
989 CALL dbt_iterator_start(iter, xas_tdp_env%ri_3c_ex)
990 DO WHILE (dbt_iterator_blocks_left(iter))
991 CALL dbt_iterator_next_block(iter, ind)
992 CALL dbt_get_block(xas_tdp_env%ri_3c_ex, ind, pblock, found)
993
994 IF (ind(1) == ind(2)) pblock = 0.5_dp*pblock
995 IF (ind(3) /= exat) cycle
996
997 CALL dbt_put_block(work, [ind(1), ind(2), 1], &
998 [orb_blk_size(ind(1)), orb_blk_size(ind(2)), nsgf_x], pblock)
999
1000 DEALLOCATE (pblock)
1001 END DO
1002 CALL dbt_iterator_stop(iter)
1003!$OMP END PARALLEL
1004 CALL dbt_finalize(work)
1005
1006 !create (pq|X) based on work and copy over
1007 CALL dbt_create(work, pq_x_tmp)
1008 CALL dbt_copy(work, pq_x_tmp)
1009 CALL dbt_copy(work, pq_x_tmp, order=[2, 1, 3], summation=.true., move_data=.true.)
1010
1011 CALL dbt_destroy(work)
1012
1013 !create the pgrid, based on the 2D dbcsr grid
1014 CALL dbt_pgrid_destroy(t_pgrid)
1015 pdims = 0
1016 CALL dbt_pgrid_create(para_env, pdims, t_pgrid, tensor_dims=[natom, natom, 1])
1017
1018 !cyclic distribution accross all directions.
1019 ALLOCATE (proc_dist_3(nblk_ri))
1020 CALL dbt_default_distvec(natom, pdims(1), orb_blk_size, proc_dist_1)
1021 CALL dbt_default_distvec(natom, pdims(2), orb_blk_size, proc_dist_2)
1022 CALL dbt_default_distvec(nblk_ri, pdims(3), [nsgf_x], proc_dist_3)
1023 CALL dbt_distribution_new(t_dist, t_pgrid, nd_dist_1=proc_dist_1, nd_dist_2=proc_dist_2, &
1024 nd_dist_3=proc_dist_3)
1025
1026 CALL dbt_create(pq_x, name="(pq|X)", dist=t_dist, map1_2d=[2, 3], map2_2d=[1], &
1027 blk_size_1=orb_blk_size, blk_size_2=orb_blk_size, blk_size_3=[nsgf_x])
1028 CALL dbt_copy(pq_x_tmp, pq_x, move_data=.true.)
1029
1030 CALL dbt_distribution_destroy(t_dist)
1031 CALL dbt_pgrid_destroy(t_pgrid)
1032 CALL dbt_destroy(pq_x_tmp)
1033
1034 END SUBROUTINE get_full_pqx_from_3c_ex
1035
1036! **************************************************************************************************
1037!> \brief Contracts (pq|X) and (rI|Y) from AOs to MOs to (ja|X) and (oI|Y) respectively, where
1038!> j is a occupied MO, a is a virtual MO and o is a general MO
1039!> \param ja_X partial contraction over occupied MOs j, virtual MOs a: (ja|X), for both spins (alpha-alpha or beta-beta)
1040!> \param oI_Y partial contraction over all MOs o and donor MOs I (can be more than 1 if 2p or open-shell)
1041!> \param ja_Io_template template to be able to build tensors after calling this routine, for each spin combination
1042!> \param mo_coeffs ...
1043!> \param nocc ...
1044!> \param nvirt ...
1045!> \param donor_state ...
1046!> \param xas_tdp_env ...
1047!> \param xas_tdp_control ...
1048!> \param qs_env ...
1049!> \note the multiplication by (X|Y)^-1 is included in the final (oI|Y) tensor. Only integrals with the
1050!> same spin on one center are non-zero, i.e. (oI|Y) is non zero only if both o and Y have the same spin
1051! **************************************************************************************************
1052 SUBROUTINE contract_aos_to_mos(ja_X, oI_Y, ja_Io_template, mo_coeffs, nocc, nvirt, &
1053 donor_state, xas_tdp_env, xas_tdp_control, qs_env)
1054
1055 TYPE(dbt_type), ALLOCATABLE, DIMENSION(:), &
1056 INTENT(INOUT) :: ja_x, oi_y
1057 TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :), &
1058 INTENT(INOUT) :: ja_io_template
1059 TYPE(cp_fm_type), DIMENSION(:), INTENT(INOUT) :: mo_coeffs
1060 INTEGER, INTENT(IN) :: nocc(2), nvirt(2)
1061 TYPE(donor_state_type), POINTER :: donor_state
1062 TYPE(xas_tdp_env_type), POINTER :: xas_tdp_env
1063 TYPE(xas_tdp_control_type), POINTER :: xas_tdp_control
1064 TYPE(qs_environment_type), POINTER :: qs_env
1065
1066 CHARACTER(len=*), PARAMETER :: routinen = 'contract_AOs_to_MOs'
1067
1068 INTEGER :: bo(2), handle, i, ispin, jspin, &
1069 nblk_aos, nblk_mos(2), nblk_occ(2), &
1070 nblk_pqx(3), nblk_ri, nblk_virt(2), &
1071 nspins
1072 INTEGER, DIMENSION(3) :: pdims
1073 INTEGER, DIMENSION(:), POINTER :: ao_blk_size, ao_col_dist, ao_row_dist, &
1074 mo_dist_3, ri_blk_size, ri_dist_3
1075 INTEGER, DIMENSION(:, :), POINTER :: mat_pgrid
1076 TYPE(cp_1d_i_p_type), ALLOCATABLE, DIMENSION(:) :: mo_blk_size, mo_col_dist, mo_row_dist
1077 TYPE(dbcsr_distribution_type) :: mat_dist
1078 TYPE(dbcsr_distribution_type), POINTER :: std_mat_dist
1079 TYPE(dbcsr_type) :: dbcsr_mo_coeffs
1080 TYPE(dbt_distribution_type) :: t_dist
1081 TYPE(dbt_pgrid_type) :: t_pgrid
1082 TYPE(dbt_type) :: jq_x, pq_x, t_mo_coeffs
1083 TYPE(mp_para_env_type), POINTER :: para_env
1084
1085 NULLIFY (ao_blk_size, ao_col_dist, ao_row_dist, mo_dist_3, ri_blk_size, ri_dist_3, mat_pgrid, &
1086 para_env, std_mat_dist)
1087
1088 CALL timeset(routinen, handle)
1089
1090 nspins = 1; IF (xas_tdp_control%do_uks .OR. xas_tdp_control%do_roks) nspins = 2
1091
1092 !There are 2 contractions to do for the first tensor: (pq|X) --> (jq|X) --> (ja|X)
1093 !Because memory is the main concern, we move_data everytime at the cost of extra copies
1094
1095 !Some quantities need to be stored for both spins, because they are later combined
1096 CALL get_qs_env(qs_env, para_env=para_env)
1097 ALLOCATE (mo_blk_size(nspins), mo_row_dist(nspins), mo_col_dist(nspins))
1098 ALLOCATE (ja_x(nspins))
1099 ALLOCATE (oi_y(nspins*donor_state%ndo_mo))
1100
1101 DO ispin = 1, nspins
1102
1103 !First, we need a fully populated pq_X (spin-independent)
1104 CALL get_full_pqx_from_3c_ex(pq_x, donor_state%at_index, xas_tdp_env, qs_env)
1105
1106 !Create the tensor pgrid. AOs and RI independent from spin
1107 IF (ispin == 1) THEN
1108 CALL dbt_get_info(pq_x, pdims=pdims, nblks_total=nblk_pqx)
1109 CALL dbt_pgrid_create(para_env, pdims, t_pgrid)
1110 nblk_aos = nblk_pqx(1)
1111 nblk_ri = nblk_pqx(3)
1112 END IF
1113
1114 !Define MO block sizes, at worst, take one block per proc
1115 nblk_occ(ispin) = max(pdims(1), nocc(ispin)/16)
1116 nblk_virt(ispin) = max(pdims(2), nvirt(ispin)/16)
1117 nblk_mos(ispin) = nblk_occ(ispin) + nblk_virt(ispin)
1118 ALLOCATE (mo_blk_size(ispin)%array(nblk_mos(ispin)))
1119 DO i = 1, nblk_occ(ispin)
1120 bo = get_limit(nocc(ispin), nblk_occ(ispin), i - 1)
1121 mo_blk_size(ispin)%array(i) = bo(2) - bo(1) + 1
1122 END DO
1123 DO i = 1, nblk_virt(ispin)
1124 bo = get_limit(nvirt(ispin), nblk_virt(ispin), i - 1)
1125 mo_blk_size(ispin)%array(nblk_occ(ispin) + i) = bo(2) - bo(1) + 1
1126 END DO
1127
1128 !Convert the fm mo_coeffs into a dbcsr matrix and then a tensor
1129 CALL get_qs_env(qs_env, dbcsr_dist=std_mat_dist)
1130 CALL dbcsr_distribution_get(std_mat_dist, pgrid=mat_pgrid)
1131 ALLOCATE (ao_blk_size(nblk_aos), ri_blk_size(nblk_ri))
1132 CALL dbt_get_info(pq_x, blk_size_1=ao_blk_size, blk_size_3=ri_blk_size)
1133
1134 !we opt for a cyclic dist for the MOs (since they should be rather dense anyways)
1135 ALLOCATE (ao_row_dist(nblk_aos), mo_col_dist(ispin)%array(nblk_mos(ispin)))
1136 CALL dbt_default_distvec(nblk_aos, SIZE(mat_pgrid, 1), ao_blk_size, ao_row_dist)
1137 CALL dbt_default_distvec(nblk_mos(ispin), SIZE(mat_pgrid, 2), mo_blk_size(ispin)%array, &
1138 mo_col_dist(ispin)%array)
1139 CALL dbcsr_distribution_new(mat_dist, group=para_env%get_handle(), pgrid=mat_pgrid, &
1140 row_dist=ao_row_dist, col_dist=mo_col_dist(ispin)%array)
1141
1142 CALL dbcsr_create(dbcsr_mo_coeffs, name="MO coeffs", matrix_type="N", dist=mat_dist, &
1143 row_blk_size=ao_blk_size, col_blk_size=mo_blk_size(ispin)%array)
1144 CALL copy_fm_to_dbcsr(mo_coeffs(ispin), dbcsr_mo_coeffs)
1145
1146 CALL dbt_create(dbcsr_mo_coeffs, t_mo_coeffs)
1147 CALL dbt_copy_matrix_to_tensor(dbcsr_mo_coeffs, t_mo_coeffs)
1148
1149 !prepare the (jq|X) tensor for the first contraction (over occupied MOs)
1150 ALLOCATE (mo_row_dist(ispin)%array(nblk_mos(ispin)), ao_col_dist(nblk_aos), ri_dist_3(nblk_ri))
1151 CALL dbt_default_distvec(nblk_mos(ispin), pdims(1), mo_blk_size(ispin)%array, mo_row_dist(ispin)%array)
1152 CALL dbt_default_distvec(nblk_aos, pdims(2), ao_blk_size, ao_col_dist)
1153 CALL dbt_default_distvec(nblk_ri, pdims(3), ri_blk_size, ri_dist_3)
1154 CALL dbt_distribution_new(t_dist, t_pgrid, nd_dist_1=mo_row_dist(ispin)%array, &
1155 nd_dist_2=ao_col_dist, nd_dist_3=ri_dist_3)
1156
1157 CALL dbt_create(jq_x, name="(jq|X)", dist=t_dist, map1_2d=[1, 3], map2_2d=[2], &
1158 blk_size_1=mo_blk_size(ispin)%array, blk_size_2=ao_blk_size, blk_size_3=ri_blk_size)
1159 CALL dbt_distribution_destroy(t_dist)
1160
1161 !contract (pq|X) into (jq|X)
1162 CALL dbt_contract(alpha=1.0_dp, tensor_1=pq_x, tensor_2=t_mo_coeffs, &
1163 beta=0.0_dp, tensor_3=jq_x, contract_1=[1], &
1164 notcontract_1=[2, 3], contract_2=[1], notcontract_2=[2], &
1165 map_1=[2, 3], map_2=[1], bounds_3=[1, nocc(ispin)], &!only want occupied MOs for j
1166 move_data=.true.)
1167
1168 CALL dbt_destroy(pq_x)
1169 CALL dbt_copy_matrix_to_tensor(dbcsr_mo_coeffs, t_mo_coeffs)
1170
1171 !prepare (ja|X) tensor for the second contraction (over virtual MOs)
1172 !only virtual-occupied bit of the first 2 indices is occupied + it should be dense
1173 !take blk dist such that blocks are evenly distributed
1174 CALL dbt_default_distvec(nblk_occ(ispin), pdims(1), mo_blk_size(ispin)%array(1:nblk_occ(ispin)), &
1175 mo_row_dist(ispin)%array(1:nblk_occ(ispin)))
1176 CALL dbt_default_distvec(nblk_virt(ispin), pdims(1), &
1177 mo_blk_size(ispin)%array(nblk_occ(ispin) + 1:nblk_mos(ispin)), &
1178 mo_row_dist(ispin)%array(nblk_occ(ispin) + 1:nblk_mos(ispin)))
1179 CALL dbt_default_distvec(nblk_occ(ispin), pdims(2), mo_blk_size(ispin)%array(1:nblk_occ(ispin)), &
1180 mo_col_dist(ispin)%array(1:nblk_occ(ispin)))
1181 CALL dbt_default_distvec(nblk_virt(ispin), pdims(2), &
1182 mo_blk_size(ispin)%array(nblk_occ(ispin) + 1:nblk_mos(ispin)), &
1183 mo_col_dist(ispin)%array(nblk_occ(ispin) + 1:nblk_mos(ispin)))
1184 CALL dbt_distribution_new(t_dist, t_pgrid, nd_dist_1=mo_row_dist(ispin)%array, &
1185 nd_dist_2=mo_col_dist(ispin)%array, nd_dist_3=ri_dist_3)
1186
1187 CALL dbt_create(ja_x(ispin), name="(ja|X)", dist=t_dist, map1_2d=[1, 2], map2_2d=[3], &
1188 blk_size_1=mo_blk_size(ispin)%array, blk_size_2=mo_blk_size(ispin)%array, &
1189 blk_size_3=ri_blk_size)
1190 CALL dbt_distribution_destroy(t_dist)
1191
1192 !contract (jq|X) into (ja|X)
1193 CALL dbt_contract(alpha=1.0_dp, tensor_1=jq_x, tensor_2=t_mo_coeffs, &
1194 beta=0.0_dp, tensor_3=ja_x(ispin), contract_1=[2], &
1195 notcontract_1=[1, 3], contract_2=[1], notcontract_2=[2], &
1196 map_1=[1, 3], map_2=[2], move_data=.true., &
1197 bounds_3=[nocc(ispin) + 1, nocc(ispin) + nvirt(ispin)])
1198
1199 CALL dbt_destroy(jq_x)
1200 CALL dbt_copy_matrix_to_tensor(dbcsr_mo_coeffs, t_mo_coeffs)
1201
1202 !Finally, get the oI_Y tensors
1203 CALL get_oiy_tensors(oi_y, ispin, ao_blk_size, mo_blk_size(ispin)%array, ri_blk_size, &
1204 t_mo_coeffs, donor_state, xas_tdp_env, xas_tdp_control, qs_env)
1205
1206 !intermediate clen-up
1207 CALL dbt_destroy(t_mo_coeffs)
1208 CALL dbcsr_distribution_release(mat_dist)
1209 CALL dbcsr_release(dbcsr_mo_coeffs)
1210 DEALLOCATE (ao_col_dist, ri_dist_3, ri_blk_size, ao_blk_size, ao_row_dist)
1211
1212 END DO !ispin
1213
1214 !create a empty tensor template for the fully contracted (ja|Io) MO integrals, for all spin
1215 !configureations: alpha-alpha|alpha-alpha, alpha-alpha|beta-beta, etc.
1216 ALLOCATE (ja_io_template(nspins, nspins))
1217 DO ispin = 1, nspins
1218 DO jspin = 1, nspins
1219 ALLOCATE (mo_dist_3(nblk_mos(jspin)))
1220 CALL dbt_default_distvec(nblk_occ(jspin), pdims(3), mo_blk_size(jspin)%array(1:nblk_occ(jspin)), &
1221 mo_dist_3(1:nblk_occ(jspin)))
1222 CALL dbt_default_distvec(nblk_virt(jspin), pdims(3), &
1223 mo_blk_size(jspin)%array(nblk_occ(jspin) + 1:nblk_mos(jspin)), &
1224 mo_dist_3(nblk_occ(jspin) + 1:nblk_mos(jspin)))
1225 CALL dbt_distribution_new(t_dist, t_pgrid, nd_dist_1=mo_row_dist(ispin)%array, &
1226 nd_dist_2=mo_col_dist(ispin)%array, nd_dist_3=mo_dist_3)
1227
1228 CALL dbt_create(ja_io_template(ispin, jspin), name="(ja|Io)", dist=t_dist, map1_2d=[1, 2], &
1229 map2_2d=[3], blk_size_1=mo_blk_size(ispin)%array, &
1230 blk_size_2=mo_blk_size(ispin)%array, blk_size_3=mo_blk_size(jspin)%array)
1231 CALL dbt_distribution_destroy(t_dist)
1232 DEALLOCATE (mo_dist_3)
1233 END DO
1234 END DO
1235
1236 !clean-up
1237 CALL dbt_pgrid_destroy(t_pgrid)
1238 DO ispin = 1, nspins
1239 DEALLOCATE (mo_blk_size(ispin)%array)
1240 DEALLOCATE (mo_col_dist(ispin)%array)
1241 DEALLOCATE (mo_row_dist(ispin)%array)
1242 END DO
1243
1244 CALL timestop(handle)
1245
1246 END SUBROUTINE contract_aos_to_mos
1247
1248! **************************************************************************************************
1249!> \brief Contracts the (oI|Y) tensors, for each donor MO
1250!> \param oI_Y the contracted tensr. It is assumed to be allocated outside of this routine
1251!> \param ispin ...
1252!> \param ao_blk_size ...
1253!> \param mo_blk_size ...
1254!> \param ri_blk_size ...
1255!> \param t_mo_coeffs ...
1256!> \param donor_state ...
1257!> \param xas_tdp_env ...
1258!> \param xas_tdp_control ...
1259!> \param qs_env ...
1260! **************************************************************************************************
1261 SUBROUTINE get_oiy_tensors(oI_Y, ispin, ao_blk_size, mo_blk_size, ri_blk_size, t_mo_coeffs, &
1262 donor_state, xas_tdp_env, xas_tdp_control, qs_env)
1263
1264 TYPE(dbt_type), ALLOCATABLE, DIMENSION(:), &
1265 INTENT(INOUT) :: oi_y
1266 INTEGER, INTENT(IN) :: ispin
1267 INTEGER, DIMENSION(:), POINTER :: ao_blk_size, mo_blk_size, ri_blk_size
1268 TYPE(dbt_type), INTENT(inout) :: t_mo_coeffs
1269 TYPE(donor_state_type), POINTER :: donor_state
1270 TYPE(xas_tdp_env_type), POINTER :: xas_tdp_env
1271 TYPE(xas_tdp_control_type), POINTER :: xas_tdp_control
1272 TYPE(qs_environment_type), POINTER :: qs_env
1273
1274 CHARACTER(len=*), PARAMETER :: routinen = 'get_oIY_tensors'
1275
1276 INTEGER :: bo(2), handle, i, ido_mo, ind(2), natom, &
1277 nblk_aos, nblk_mos, nblk_ri, ndo_mo, &
1278 pdims_2d(2), proc_id
1279 INTEGER, DIMENSION(:), POINTER :: ao_row_dist, mo_row_dist, ri_col_dist
1280 INTEGER, DIMENSION(:, :), POINTER :: mat_pgrid
1281 LOGICAL :: found
1282 REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: pblock
1283 TYPE(dbcsr_distribution_type), POINTER :: std_mat_dist
1284 TYPE(dbcsr_p_type), DIMENSION(:), POINTER :: pi_y
1285 TYPE(dbt_distribution_type) :: t_dist
1286 TYPE(dbt_iterator_type) :: iter
1287 TYPE(dbt_pgrid_type) :: t_pgrid
1288 TYPE(dbt_type) :: t_pi_y, t_work
1289 TYPE(mp_para_env_type), POINTER :: para_env
1290
1291 CALL timeset(routinen, handle)
1292
1293 CALL get_qs_env(qs_env, natom=natom, para_env=para_env, dbcsr_dist=std_mat_dist)
1294 ndo_mo = donor_state%ndo_mo
1295 nblk_aos = SIZE(ao_blk_size)
1296 nblk_mos = SIZE(mo_blk_size)
1297 nblk_ri = SIZE(ri_blk_size)
1298
1299 !We first contract (pq|X) over q into I using kernel routines (goes over all MOs and spins)
1300 CALL contract2_ao_to_domo(pi_y, "EXCHANGE", donor_state, xas_tdp_env, xas_tdp_control, qs_env)
1301
1302 !multiply by (X|Y)^-1
1303 CALL ri_all_blocks_mm(pi_y, xas_tdp_env%ri_inv_ex)
1304
1305 !get standaed 2d matrix proc grid
1306 CALL dbcsr_distribution_get(std_mat_dist, pgrid=mat_pgrid)
1307
1308 !Loop over donor MOs of this spin
1309 DO ido_mo = (ispin - 1)*ndo_mo + 1, ispin*ndo_mo
1310
1311 !cast the matrix into a tensor
1312 CALL dbt_create(pi_y(ido_mo)%matrix, t_work)
1313 CALL dbt_copy_matrix_to_tensor(pi_y(ido_mo)%matrix, t_work)
1314
1315 !find col proc_id of the only populated column of t_work
1316 ALLOCATE (ri_col_dist(natom))
1317 CALL dbt_get_info(t_work, proc_dist_2=ri_col_dist)
1318 proc_id = ri_col_dist(donor_state%at_index)
1319 DEALLOCATE (ri_col_dist)
1320
1321 !preapre (oI_Y) tensor and (pI|Y) tensor in proper dist and blk sizes
1322 pdims_2d(1) = SIZE(mat_pgrid, 1); pdims_2d(2) = SIZE(mat_pgrid, 2)
1323 CALL dbt_pgrid_create(para_env, pdims_2d, t_pgrid)
1324
1325 ALLOCATE (ri_col_dist(nblk_ri), ao_row_dist(nblk_aos), mo_row_dist(nblk_mos))
1326 CALL dbt_get_info(t_work, proc_dist_1=ao_row_dist)
1327 ri_col_dist = proc_id
1328
1329 CALL dbt_distribution_new(t_dist, t_pgrid, nd_dist_1=ao_row_dist, nd_dist_2=ri_col_dist)
1330 CALL dbt_create(t_pi_y, name="(pI|Y)", dist=t_dist, map1_2d=[1], map2_2d=[2], &
1331 blk_size_1=ao_blk_size, blk_size_2=ri_blk_size)
1332 CALL dbt_distribution_destroy(t_dist)
1333
1334 !copy block by block, dist match
1335
1336!$OMP PARALLEL DEFAULT(NONE) SHARED(t_work,t_pI_Y,nblk_ri,ri_blk_size,ao_blk_size) &
1337!$OMP PRIVATE(iter,ind,pblock,found,bo)
1338 CALL dbt_iterator_start(iter, t_work)
1339 DO WHILE (dbt_iterator_blocks_left(iter))
1340 CALL dbt_iterator_next_block(iter, ind)
1341 CALL dbt_get_block(t_work, ind, pblock, found)
1342
1343 DO i = 1, nblk_ri
1344 bo(1) = sum(ri_blk_size(1:i - 1)) + 1
1345 bo(2) = bo(1) + ri_blk_size(i) - 1
1346 CALL dbt_put_block(t_pi_y, [ind(1), i], [ao_blk_size(ind(1)), ri_blk_size(i)], &
1347 pblock(:, bo(1):bo(2)))
1348 END DO
1349
1350 DEALLOCATE (pblock)
1351 END DO
1352 CALL dbt_iterator_stop(iter)
1353!$OMP END PARALLEL
1354 CALL dbt_finalize(t_pi_y)
1355
1356 !get optimal pgrid for (oI|Y)
1357 CALL dbt_pgrid_destroy(t_pgrid)
1358 pdims_2d = 0
1359 CALL dbt_pgrid_create(para_env, pdims_2d, t_pgrid, tensor_dims=[nblk_mos, nblk_ri])
1360
1361 CALL dbt_default_distvec(nblk_aos, pdims_2d(1), ao_blk_size, ao_row_dist)
1362 CALL dbt_default_distvec(nblk_mos, pdims_2d(1), mo_blk_size, mo_row_dist)
1363 CALL dbt_default_distvec(nblk_ri, pdims_2d(2), ri_blk_size, ri_col_dist)
1364
1365 !transfer pI_Y to the correct pgrid
1366 CALL dbt_destroy(t_work)
1367 CALL dbt_distribution_new(t_dist, t_pgrid, nd_dist_1=ao_row_dist, nd_dist_2=ri_col_dist)
1368 CALL dbt_create(t_work, name="t_pI_Y", dist=t_dist, map1_2d=[1], map2_2d=[2], &
1369 blk_size_1=ao_blk_size, blk_size_2=ri_blk_size)
1370 CALL dbt_copy(t_pi_y, t_work, move_data=.true.)
1371 CALL dbt_distribution_destroy(t_dist)
1372
1373 !create (oI|Y)
1374 CALL dbt_distribution_new(t_dist, t_pgrid, nd_dist_1=mo_row_dist, nd_dist_2=ri_col_dist)
1375 CALL dbt_create(oi_y(ido_mo), name="(oI|Y)", dist=t_dist, map1_2d=[1], map2_2d=[2], &
1376 blk_size_1=mo_blk_size, blk_size_2=ri_blk_size)
1377 CALL dbt_distribution_destroy(t_dist)
1378
1379 !contract (pI|Y) into (oI|Y)
1380 CALL dbt_contract(alpha=1.0_dp, tensor_1=t_work, tensor_2=t_mo_coeffs, &
1381 beta=0.0_dp, tensor_3=oi_y(ido_mo), contract_1=[1], &
1382 notcontract_1=[2], contract_2=[1], notcontract_2=[2], &
1383 map_1=[2], map_2=[1]) !no bound, all MOs needed
1384
1385 !intermediate clean-up
1386 CALL dbt_destroy(t_work)
1387 CALL dbt_destroy(t_pi_y)
1388 CALL dbt_pgrid_destroy(t_pgrid)
1389 DEALLOCATE (ri_col_dist, ao_row_dist, mo_row_dist)
1390
1391 END DO !ido_mo
1392
1393 !final clean-up
1394 CALL dbcsr_deallocate_matrix_set(pi_y)
1395
1396 CALL timestop(handle)
1397
1398 END SUBROUTINE get_oiy_tensors
1399
1400! **************************************************************************************************
1401!> \brief Computes the same spin, occupied-occupied-virtual MO contribution to the electron propagator:
1402!> 0.5 * sum_ajk |<Ia||jk>|^2/(omega + eps_a - epsj_j - eps_k) and its 1st derivative wrt omega:
1403!> -0.5 * sum_ajk |<Ia||jk>|^2/(omega + eps_a - epsj_j - eps_k)**2
1404!> \param contrib ...
1405!> \param dev the first derivative
1406!> \param ja_Ik_diff ... contains the (ja|Ik) - (ka|Ij) tensor
1407!> \param occ_evals ...
1408!> \param virt_evals ...
1409!> \param omega ...
1410!> \param c_ss ...
1411!> \note since the is same-spin, there is only one possibility for occ_evals and virt_evals
1412! **************************************************************************************************
1413 SUBROUTINE calc_ss_oov_contrib(contrib, dev, ja_Ik_diff, occ_evals, virt_evals, omega, c_ss)
1414
1415 REAL(dp), INTENT(inout) :: contrib, dev
1416 TYPE(dbt_type), INTENT(inout) :: ja_ik_diff
1417 REAL(dp), DIMENSION(:), INTENT(IN) :: occ_evals, virt_evals
1418 REAL(dp), INTENT(in) :: omega, c_ss
1419
1420 CHARACTER(len=*), PARAMETER :: routinen = 'calc_ss_oov_contrib'
1421
1422 INTEGER :: a, boff(3), bsize(3), handle, idx1, &
1423 idx2, idx3, ind(3), j, k, nocc
1424 LOGICAL :: found
1425 REAL(dp) :: denom, tmp
1426 REAL(dp), ALLOCATABLE, DIMENSION(:, :, :) :: tensor_blk
1427 TYPE(dbt_iterator_type) :: iter
1428
1429 CALL timeset(routinen, handle)
1430
1431 !<Ia||jk> = <Ia|jk> - <Ia|kj> = (Ij|ak) - (Ik|aj) = (ka|Ij) - (ja|Ik)
1432 !Note: the same spin contribution only involve spib-orbitals that are all of the same spin
1433
1434 nocc = SIZE(occ_evals, 1)
1435
1436 !Iterate over the tensors and sum. Both tensors have same dist
1437
1438!$OMP PARALLEL DEFAULT(NONE) REDUCTION(+:contrib,dev) &
1439!$OMP SHARED(ja_Ik_diff,occ_evals,virt_evals,omega,c_ss,nocc) &
1440!$OMP PRIVATE(iter,ind,boff,bsize,tensor_blk,found,idx1,idx2,idx3,j,A,k,denom,tmp)
1441 CALL dbt_iterator_start(iter, ja_ik_diff)
1442 DO WHILE (dbt_iterator_blocks_left(iter))
1443 CALL dbt_iterator_next_block(iter, ind, blk_offset=boff, blk_size=bsize)
1444 CALL dbt_get_block(ja_ik_diff, ind, tensor_blk, found)
1445
1446 IF (found) THEN
1447
1448 DO idx3 = 1, bsize(3)
1449 DO idx2 = 1, bsize(2)
1450 DO idx1 = 1, bsize(1)
1451
1452 !get proper MO indices
1453 j = boff(1) + idx1 - 1
1454 a = boff(2) + idx2 - 1 - nocc
1455 k = boff(3) + idx3 - 1
1456
1457 !the denominator
1458 denom = omega + virt_evals(a) - occ_evals(j) - occ_evals(k)
1459
1460 !the same spin contribution
1461 tmp = c_ss*tensor_blk(idx1, idx2, idx3)**2
1462
1463 contrib = contrib + 0.5_dp*tmp/denom
1464 dev = dev - 0.5_dp*tmp/denom**2
1465
1466 END DO
1467 END DO
1468 END DO
1469 END IF
1470 DEALLOCATE (tensor_blk)
1471 END DO
1472 CALL dbt_iterator_stop(iter)
1473!$OMP END PARALLEL
1474
1475 CALL timestop(handle)
1476
1477 END SUBROUTINE calc_ss_oov_contrib
1478
1479! **************************************************************************************************
1480!> \brief Computes the opposite spin, occupied-occupied-virtual MO contribution to the electron propagator:
1481!> 0.5 * sum_ajk |<Ia||jk>|^2/(omega + eps_a - epsj_j - eps_k) and its 1st derivative wrt omega:
1482!> -0.5 * sum_ajk |<Ia||jk>|^2/(omega + eps_a - epsj_j - eps_k)**2
1483!> \param contrib ...
1484!> \param dev the first derivative
1485!> \param ja_Ik ...
1486!> \param j_evals ocucpied evals for j MO
1487!> \param a_evals virtual evals for a MO
1488!> \param k_evals ocucpied evals for k MO
1489!> \param omega ...
1490!> \param c_os ...
1491!> \param a_offset the number of occupied MOs for the same spin as a MOs
1492!> \note since this is opposite-spin, evals might be different for different spins
1493! **************************************************************************************************
1494 SUBROUTINE calc_os_oov_contrib(contrib, dev, ja_Ik, j_evals, a_evals, k_evals, omega, c_os, a_offset)
1495
1496 REAL(dp), INTENT(inout) :: contrib, dev
1497 TYPE(dbt_type), INTENT(inout) :: ja_ik
1498 REAL(dp), DIMENSION(:), INTENT(IN) :: j_evals, a_evals, k_evals
1499 REAL(dp), INTENT(in) :: omega, c_os
1500 INTEGER, INTENT(IN) :: a_offset
1501
1502 CHARACTER(len=*), PARAMETER :: routinen = 'calc_os_oov_contrib'
1503
1504 INTEGER :: a, boff(3), bsize(3), handle, idx1, &
1505 idx2, idx3, ind(3), j, k
1506 LOGICAL :: found
1507 REAL(dp) :: denom, tmp
1508 REAL(dp), ALLOCATABLE, DIMENSION(:, :, :) :: ja_ik_blk
1509 TYPE(dbt_iterator_type) :: iter
1510
1511 CALL timeset(routinen, handle)
1512
1513 !<Ia||jk> = <Ia|jk> - <Ia|kj> = (Ij|ak) - (Ik|aj) = (ka|Ij) - (ja|Ik)
1514 !Note: the opposite spin contribution comes in 2 parts, once (ka|Ij) and once (ja|Ik) only,
1515 ! where both spin-orbitals on one center have the same spin, but it is the opposite of
1516 ! the spin on the other center. Because it is eventually summed, can consider only one
1517 ! of the 2 terms, but with a factor 2
1518
1519 !Iterate over the tensor and sum
1520
1521!$OMP PARALLEL DEFAULT(NONE) REDUCTION(+:contrib,dev) &
1522!$OMP SHARED(ja_Ik,j_evals,a_evals,k_evals,omega,c_os,a_offset) &
1523!$OMP PRIVATE(iter,ind,boff,bsize,ja_Ik_blk,found,idx1,idx2,idx3,j,A,k,denom,tmp)
1524 CALL dbt_iterator_start(iter, ja_ik)
1525 DO WHILE (dbt_iterator_blocks_left(iter))
1526 CALL dbt_iterator_next_block(iter, ind, blk_offset=boff, blk_size=bsize)
1527 CALL dbt_get_block(ja_ik, ind, ja_ik_blk, found)
1528
1529 IF (found) THEN
1530
1531 DO idx3 = 1, bsize(3)
1532 DO idx2 = 1, bsize(2)
1533 DO idx1 = 1, bsize(1)
1534
1535 !get proper MO indices
1536 j = boff(1) + idx1 - 1
1537 a = boff(2) + idx2 - 1 - a_offset
1538 k = boff(3) + idx3 - 1
1539
1540 !the denominator
1541 denom = omega + a_evals(a) - j_evals(j) - k_evals(k)
1542
1543 !the opposite spin contribution
1544 tmp = c_os*ja_ik_blk(idx1, idx2, idx3)**2
1545
1546 !take factor 2 into acocunt (2 x 0.5 = 1)
1547 contrib = contrib + tmp/denom
1548 dev = dev - tmp/denom**2
1549
1550 END DO
1551 END DO
1552 END DO
1553 END IF
1554 DEALLOCATE (ja_ik_blk)
1555 END DO
1556 CALL dbt_iterator_stop(iter)
1557!$OMP END PARALLEL
1558
1559 CALL timestop(handle)
1560
1561 END SUBROUTINE calc_os_oov_contrib
1562
1563! **************************************************************************************************
1564!> \brief Computes the same-spin occupied-virtual-virtual MO contribution to the electron propagator:
1565!> 0.5 * sum_abj |<Ij||ab>|^2/(omega + eps_j - eps_a - eps_b) as well as its first derivative:
1566!> -0.5 * sum_abj |<Ij||ab>|^2/(omega + eps_j - eps_a - eps_b)**2
1567!> \param contrib ...
1568!> \param dev the first derivative
1569!> \param aj_Ib_diff contatins the (aj|Ib) - (bj|Ia) tensor
1570!> \param occ_evals ...
1571!> \param virt_evals ...
1572!> \param omega ...
1573!> \param c_ss ...
1574!> \note since the is same-spin, there is only one possibility for occ_evals and virt_evals
1575! **************************************************************************************************
1576 SUBROUTINE calc_ss_ovv_contrib(contrib, dev, aj_Ib_diff, occ_evals, virt_evals, omega, c_ss)
1577
1578 REAL(dp), INTENT(inout) :: contrib, dev
1579 TYPE(dbt_type), INTENT(inout) :: aj_ib_diff
1580 REAL(dp), DIMENSION(:), INTENT(IN) :: occ_evals, virt_evals
1581 REAL(dp), INTENT(in) :: omega, c_ss
1582
1583 CHARACTER(len=*), PARAMETER :: routinen = 'calc_ss_ovv_contrib'
1584
1585 INTEGER :: a, b, boff(3), bsize(3), handle, idx1, &
1586 idx2, idx3, ind(3), j, nocc
1587 LOGICAL :: found
1588 REAL(dp) :: denom, tmp
1589 REAL(dp), ALLOCATABLE, DIMENSION(:, :, :) :: tensor_blk
1590 TYPE(dbt_iterator_type) :: iter
1591
1592 CALL timeset(routinen, handle)
1593
1594 !<Ij||ab> = <Ij|ab> - <Ij|ba> = (Ia|jb) - (Ib|ja) = (jb|Ia) - (ja|Ib)
1595 !Notes: only non-zero contribution if all MOs have the same spin
1596
1597 nocc = SIZE(occ_evals, 1)
1598
1599 !tensors have matching distributions, can do that safely
1600
1601!$OMP PARALLEL DEFAULT(NONE) REDUCTION(+:contrib,dev) &
1602!$OMP SHARED(aj_Ib_diff,occ_evals,virt_evals,omega,c_ss,nocc) &
1603!$OMP PRIVATE(iter,ind,boff,bsize,tensor_blk,found,idx1,idx2,idx3,j,A,b,denom,tmp)
1604 CALL dbt_iterator_start(iter, aj_ib_diff)
1605 DO WHILE (dbt_iterator_blocks_left(iter))
1606 CALL dbt_iterator_next_block(iter, ind, blk_offset=boff, blk_size=bsize)
1607 CALL dbt_get_block(aj_ib_diff, ind, tensor_blk, found)
1608
1609 IF (found) THEN
1610
1611 DO idx3 = 1, bsize(3)
1612 DO idx2 = 1, bsize(2)
1613 DO idx1 = 1, bsize(1)
1614
1615 !get proper MO indices
1616 a = boff(1) + idx1 - 1 - nocc
1617 j = boff(2) + idx2 - 1
1618 b = boff(3) + idx3 - 1 - nocc
1619
1620 !the common denominator
1621 denom = omega + occ_evals(j) - virt_evals(a) - virt_evals(b)
1622
1623 !the same spin contribution
1624 tmp = c_ss*tensor_blk(idx1, idx2, idx3)**2
1625
1626 contrib = contrib + 0.5_dp*tmp/denom
1627 dev = dev - 0.5_dp*tmp/denom**2
1628
1629 END DO
1630 END DO
1631 END DO
1632 END IF
1633 DEALLOCATE (tensor_blk)
1634 END DO
1635 CALL dbt_iterator_stop(iter)
1636!$OMP END PARALLEL
1637
1638 CALL timestop(handle)
1639
1640 END SUBROUTINE calc_ss_ovv_contrib
1641
1642! **************************************************************************************************
1643!> \brief Computes the opposite-spin occupied-virtual-virtual MO contribution to the electron propagator:
1644!> 0.5 * sum_abj |<Ij||ab>|^2/(omega + eps_j - eps_a - eps_b) as well as its first derivative:
1645!> -0.5 * sum_abj |<Ij||ab>|^2/(omega + eps_j - eps_a - eps_b)**2
1646!> \param contrib ...
1647!> \param dev the first derivative
1648!> \param aj_Ib ...
1649!> \param a_evals virtual evals for a MO
1650!> \param j_evals occupied evals for j MO
1651!> \param b_evals virtual evals for b MO
1652!> \param omega ...
1653!> \param c_os ...
1654!> \param a_offset number of occupied MOs for the same spin as a MO
1655!> \param b_offset number of occupied MOs for the same spin as b MO
1656!> \note since this is opposite-spin, evals might be different for different spins
1657! **************************************************************************************************
1658 SUBROUTINE calc_os_ovv_contrib(contrib, dev, aj_Ib, a_evals, j_evals, b_evals, omega, c_os, &
1659 a_offset, b_offset)
1660
1661 REAL(dp), INTENT(inout) :: contrib, dev
1662 TYPE(dbt_type), INTENT(inout) :: aj_ib
1663 REAL(dp), DIMENSION(:), INTENT(IN) :: a_evals, j_evals, b_evals
1664 REAL(dp), INTENT(in) :: omega, c_os
1665 INTEGER, INTENT(IN) :: a_offset, b_offset
1666
1667 CHARACTER(len=*), PARAMETER :: routinen = 'calc_os_ovv_contrib'
1668
1669 INTEGER :: a, b, boff(3), bsize(3), handle, idx1, &
1670 idx2, idx3, ind(3), j
1671 LOGICAL :: found
1672 REAL(dp) :: denom, tmp
1673 REAL(dp), ALLOCATABLE, DIMENSION(:, :, :) :: aj_ib_blk
1674 TYPE(dbt_iterator_type) :: iter
1675
1676 CALL timeset(routinen, handle)
1677
1678 !<Ij||ab> = <Ij|ab> - <Ij|ba> = (Ia|jb) - (Ib|ja) = (jb|Ia) - (ja|Ib)
1679 !Notes: only 2 distinct contributions, once from (jb|Ia) and once form (ja|Ib) only, when the 2
1680 ! MOs on one center have one spin and the 2 MOs on the other center have another spin
1681 ! In the end, the sum is such that can take one of those with a factor 2
1682
1683!$OMP PARALLEL DEFAULT(NONE) REDUCTION(+:contrib,dev) &
1684!$OMP SHARED(aj_Ib,a_evals,j_evals,b_evals,omega,c_os,a_offset,b_offset) &
1685!$OMP PRIVATE(iter,ind,boff,bsize,aj_Ib_blk,found,idx1,idx2,idx3,j,A,b,denom,tmp)
1686 CALL dbt_iterator_start(iter, aj_ib)
1687 DO WHILE (dbt_iterator_blocks_left(iter))
1688 CALL dbt_iterator_next_block(iter, ind, blk_offset=boff, blk_size=bsize)
1689 CALL dbt_get_block(aj_ib, ind, aj_ib_blk, found)
1690
1691 IF (found) THEN
1692
1693 DO idx3 = 1, bsize(3)
1694 DO idx2 = 1, bsize(2)
1695 DO idx1 = 1, bsize(1)
1696
1697 !get proper MO indices
1698 a = boff(1) + idx1 - 1 - a_offset
1699 j = boff(2) + idx2 - 1
1700 b = boff(3) + idx3 - 1 - b_offset
1701
1702 !the denominator
1703 denom = omega + j_evals(j) - a_evals(a) - b_evals(b)
1704
1705 !the opposite-spin contribution. Factor 2 taken into account (2 x 0.5 = 1)
1706 tmp = c_os*(aj_ib_blk(idx1, idx2, idx3))**2
1707
1708 contrib = contrib + tmp/denom
1709 dev = dev - tmp/denom**2
1710
1711 END DO
1712 END DO
1713 END DO
1714 END IF
1715 DEALLOCATE (aj_ib_blk)
1716 END DO
1717 CALL dbt_iterator_stop(iter)
1718!$OMP END PARALLEL
1719
1720 CALL timestop(handle)
1721
1722 END SUBROUTINE calc_os_ovv_contrib
1723
1724! **************************************************************************************************
1725!> \brief We try to compute the spin-orbit splitting via perturbation theory. We keep it
1726!>\ cheap by only inculding the degenerate states (2p, 3d, 3p, etc.).
1727!> \param soc_shifts the SOC corrected orbital shifts to apply to original energies, for both spins
1728!> \param donor_state ...
1729!> \param xas_tdp_env ...
1730!> \param xas_tdp_control ...
1731!> \param qs_env ...
1732! **************************************************************************************************
1733 SUBROUTINE get_soc_splitting(soc_shifts, donor_state, xas_tdp_env, xas_tdp_control, qs_env)
1734
1735 REAL(dp), ALLOCATABLE, DIMENSION(:, :), &
1736 INTENT(out) :: soc_shifts
1737 TYPE(donor_state_type), POINTER :: donor_state
1738 TYPE(xas_tdp_env_type), POINTER :: xas_tdp_env
1739 TYPE(xas_tdp_control_type), POINTER :: xas_tdp_control
1740 TYPE(qs_environment_type), POINTER :: qs_env
1741
1742 CHARACTER(len=*), PARAMETER :: routinen = 'get_soc_splitting'
1743
1744 COMPLEX(dp), ALLOCATABLE, DIMENSION(:, :) :: evecs, hami
1745 INTEGER :: beta_spin, handle, ialpha, ibeta, &
1746 ido_mo, ispin, nao, ndo_mo, ndo_so, &
1747 nspins
1748 REAL(dp) :: alpha_tot_contrib, beta_tot_contrib
1749 REAL(dp), ALLOCATABLE, DIMENSION(:) :: evals
1750 REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: tmp_shifts
1751 TYPE(cp_blacs_env_type), POINTER :: blacs_env
1752 TYPE(cp_cfm_type) :: hami_cfm
1753 TYPE(cp_fm_struct_type), POINTER :: ao_domo_struct, domo_domo_struct, &
1754 doso_doso_struct
1755 TYPE(cp_fm_type) :: alpha_gs_coeffs, ao_domo_work, &
1756 beta_gs_coeffs, domo_domo_work, &
1757 img_fm, real_fm
1758 TYPE(dbcsr_p_type), DIMENSION(:), POINTER :: matrix_ks
1759 TYPE(dbcsr_type), POINTER :: orb_soc_x, orb_soc_y, orb_soc_z
1760 TYPE(mp_para_env_type), POINTER :: para_env
1761
1762 NULLIFY (matrix_ks, para_env, blacs_env, ao_domo_struct, domo_domo_struct, &
1763 doso_doso_struct, orb_soc_x, orb_soc_y, orb_soc_z)
1764
1765 CALL timeset(routinen, handle)
1766
1767 ! Idea: we compute the SOC matrix in the space of the degenerate spin-orbitals, add it to
1768 ! the KS matrix in the same basis, diagonalize the whole thing and get the corrected energies
1769 ! for SOC
1770
1771 CALL get_qs_env(qs_env, matrix_ks=matrix_ks, para_env=para_env, blacs_env=blacs_env)
1772
1773 orb_soc_x => xas_tdp_env%orb_soc(1)%matrix
1774 orb_soc_y => xas_tdp_env%orb_soc(2)%matrix
1775 orb_soc_z => xas_tdp_env%orb_soc(3)%matrix
1776
1777 ! Whether it is open-shell or not, we have 2*ndo_mo spin-orbitals
1778 nspins = 2
1779 ndo_mo = donor_state%ndo_mo
1780 ndo_so = nspins*ndo_mo
1781 CALL dbcsr_get_info(matrix_ks(1)%matrix, nfullrows_total=nao)
1782
1783 ! Build the fm infrastructure
1784 CALL cp_fm_struct_create(ao_domo_struct, context=blacs_env, para_env=para_env, &
1785 nrow_global=nao, ncol_global=ndo_mo)
1786 CALL cp_fm_struct_create(domo_domo_struct, context=blacs_env, para_env=para_env, &
1787 nrow_global=ndo_mo, ncol_global=ndo_mo)
1788 CALL cp_fm_struct_create(doso_doso_struct, context=blacs_env, para_env=para_env, &
1789 nrow_global=ndo_so, ncol_global=ndo_so)
1790
1791 CALL cp_fm_create(alpha_gs_coeffs, ao_domo_struct)
1792 CALL cp_fm_create(beta_gs_coeffs, ao_domo_struct)
1793 CALL cp_fm_create(ao_domo_work, ao_domo_struct)
1794 CALL cp_fm_create(domo_domo_work, domo_domo_struct)
1795 CALL cp_fm_create(real_fm, doso_doso_struct)
1796 CALL cp_fm_create(img_fm, doso_doso_struct)
1797
1798 ! Put the gs_coeffs in the correct format.
1799 IF (xas_tdp_control%do_uks) THEN
1800
1801 CALL cp_fm_to_fm_submat(msource=donor_state%gs_coeffs, mtarget=alpha_gs_coeffs, nrow=nao, &
1802 ncol=ndo_mo, s_firstrow=1, s_firstcol=1, t_firstrow=1, t_firstcol=1)
1803 CALL cp_fm_to_fm_submat(msource=donor_state%gs_coeffs, mtarget=beta_gs_coeffs, nrow=nao, &
1804 ncol=ndo_mo, s_firstrow=1, s_firstcol=ndo_mo + 1, t_firstrow=1, t_firstcol=1)
1805
1806 ELSE
1807
1808 CALL cp_fm_to_fm(donor_state%gs_coeffs, alpha_gs_coeffs)
1809 CALL cp_fm_to_fm(donor_state%gs_coeffs, beta_gs_coeffs)
1810 END IF
1811
1812 ! Compute the KS matrix in this basis, add it to the real part of the final matrix
1813 !alpha-alpha block in upper left quadrant
1814 CALL cp_dbcsr_sm_fm_multiply(matrix_ks(1)%matrix, alpha_gs_coeffs, ao_domo_work, ncol=ndo_mo)
1815 CALL parallel_gemm('T', 'N', ndo_mo, ndo_mo, nao, 1.0_dp, alpha_gs_coeffs, ao_domo_work, 0.0_dp, &
1816 domo_domo_work)
1817 CALL cp_fm_to_fm_submat(msource=domo_domo_work, mtarget=real_fm, nrow=ndo_mo, ncol=ndo_mo, &
1818 s_firstrow=1, s_firstcol=1, t_firstrow=1, t_firstcol=1)
1819
1820 !beta-beta block in lower right quadrant
1821 beta_spin = 1; IF (xas_tdp_control%do_uks .OR. xas_tdp_control%do_roks) beta_spin = 2
1822 CALL cp_dbcsr_sm_fm_multiply(matrix_ks(beta_spin)%matrix, beta_gs_coeffs, ao_domo_work, ncol=ndo_mo)
1823 CALL parallel_gemm('T', 'N', ndo_mo, ndo_mo, nao, 1.0_dp, beta_gs_coeffs, ao_domo_work, 0.0_dp, &
1824 domo_domo_work)
1825 CALL cp_fm_to_fm_submat(msource=domo_domo_work, mtarget=real_fm, nrow=ndo_mo, ncol=ndo_mo, &
1826 s_firstrow=1, s_firstcol=1, t_firstrow=ndo_mo + 1, t_firstcol=ndo_mo + 1)
1827
1828 ! Compute the SOC matrix elements and add them to the real or imaginary part of the matrix
1829 ! alpha-alpha block, only Hz not zero, purely imaginary, addition
1830 CALL cp_dbcsr_sm_fm_multiply(orb_soc_z, alpha_gs_coeffs, ao_domo_work, ncol=ndo_mo)
1831 CALL parallel_gemm('T', 'N', ndo_mo, ndo_mo, nao, 1.0_dp, alpha_gs_coeffs, ao_domo_work, 0.0_dp, &
1832 domo_domo_work)
1833 CALL cp_fm_to_fm_submat(msource=domo_domo_work, mtarget=img_fm, nrow=ndo_mo, ncol=ndo_mo, &
1834 s_firstrow=1, s_firstcol=1, t_firstrow=1, t_firstcol=1)
1835
1836 ! beta-beta block, only Hz not zero, purely imaginary, substraciton
1837 CALL cp_dbcsr_sm_fm_multiply(orb_soc_z, beta_gs_coeffs, ao_domo_work, ncol=ndo_mo)
1838 CALL parallel_gemm('T', 'N', ndo_mo, ndo_mo, nao, -1.0_dp, beta_gs_coeffs, ao_domo_work, 0.0_dp, &
1839 domo_domo_work)
1840 CALL cp_fm_to_fm_submat(msource=domo_domo_work, mtarget=img_fm, nrow=ndo_mo, ncol=ndo_mo, &
1841 s_firstrow=1, s_firstcol=1, t_firstrow=ndo_mo + 1, t_firstcol=ndo_mo + 1)
1842
1843 ! alpha-beta block, two non-zero terms in Hx and Hy
1844 ! Hx term, purely imaginary, addition
1845 CALL cp_dbcsr_sm_fm_multiply(orb_soc_x, beta_gs_coeffs, ao_domo_work, ncol=ndo_mo)
1846 CALL parallel_gemm('T', 'N', ndo_mo, ndo_mo, nao, 1.0_dp, alpha_gs_coeffs, ao_domo_work, 0.0_dp, &
1847 domo_domo_work)
1848 CALL cp_fm_to_fm_submat(msource=domo_domo_work, mtarget=img_fm, nrow=ndo_mo, ncol=ndo_mo, &
1849 s_firstrow=1, s_firstcol=1, t_firstrow=1, t_firstcol=ndo_mo + 1)
1850 ! Hy term, purely real, addition
1851 CALL cp_dbcsr_sm_fm_multiply(orb_soc_y, beta_gs_coeffs, ao_domo_work, ncol=ndo_mo)
1852 CALL parallel_gemm('T', 'N', ndo_mo, ndo_mo, nao, 1.0_dp, alpha_gs_coeffs, ao_domo_work, 0.0_dp, &
1853 domo_domo_work)
1854 CALL cp_fm_to_fm_submat(msource=domo_domo_work, mtarget=real_fm, nrow=ndo_mo, ncol=ndo_mo, &
1855 s_firstrow=1, s_firstcol=1, t_firstrow=1, t_firstcol=ndo_mo + 1)
1856
1857 ! beta-alpha block, two non-zero terms in Hx and Hy
1858 ! Hx term, purely imaginary, addition
1859 CALL cp_dbcsr_sm_fm_multiply(orb_soc_x, alpha_gs_coeffs, ao_domo_work, ncol=ndo_mo)
1860 CALL parallel_gemm('T', 'N', ndo_mo, ndo_mo, nao, 1.0_dp, beta_gs_coeffs, ao_domo_work, 0.0_dp, &
1861 domo_domo_work)
1862 CALL cp_fm_to_fm_submat(msource=domo_domo_work, mtarget=img_fm, nrow=ndo_mo, ncol=ndo_mo, &
1863 s_firstrow=1, s_firstcol=1, t_firstrow=ndo_mo + 1, t_firstcol=1)
1864 ! Hy term, purely real, substraction
1865 CALL cp_dbcsr_sm_fm_multiply(orb_soc_y, alpha_gs_coeffs, ao_domo_work, ncol=ndo_mo)
1866 CALL parallel_gemm('T', 'N', ndo_mo, ndo_mo, nao, -1.0_dp, beta_gs_coeffs, ao_domo_work, 0.0_dp, &
1867 domo_domo_work)
1868 CALL cp_fm_to_fm_submat(msource=domo_domo_work, mtarget=real_fm, nrow=ndo_mo, ncol=ndo_mo, &
1869 s_firstrow=1, s_firstcol=1, t_firstrow=ndo_mo + 1, t_firstcol=1)
1870
1871 ! Cast everything in complex fm format
1872 CALL cp_cfm_create(hami_cfm, doso_doso_struct)
1873 CALL cp_fm_to_cfm(real_fm, img_fm, hami_cfm)
1874
1875 ! And diagonalize. Since tiny matrix (6x6), diagonalize locally
1876 ALLOCATE (evals(ndo_so), evecs(ndo_so, ndo_so), hami(ndo_so, ndo_so))
1877 CALL cp_cfm_get_submatrix(hami_cfm, hami)
1878 CALL complex_diag(hami, evecs, evals)
1879
1880 !The SOC corrected KS eigenvalues
1881 ALLOCATE (tmp_shifts(ndo_mo, 2))
1882
1883 ialpha = 1; ibeta = 1;
1884 DO ido_mo = 1, ndo_so
1885 !need to find out whether the eigenvalue corresponds to an alpha or beta spin-orbtial
1886 alpha_tot_contrib = real(dot_product(evecs(1:ndo_mo, ido_mo), evecs(1:ndo_mo, ido_mo)))
1887 beta_tot_contrib = real(dot_product(evecs(ndo_mo + 1:ndo_so, ido_mo), evecs(ndo_mo + 1:ndo_so, ido_mo)))
1888
1889 IF (alpha_tot_contrib > beta_tot_contrib) THEN
1890 tmp_shifts(ialpha, 1) = evals(ido_mo)
1891 ialpha = ialpha + 1
1892 ELSE
1893 tmp_shifts(ibeta, 2) = evals(ido_mo)
1894 ibeta = ibeta + 1
1895 END IF
1896 END DO
1897
1898 !compute shift from KS evals
1899 ALLOCATE (soc_shifts(ndo_mo, SIZE(donor_state%energy_evals, 2)))
1900 DO ispin = 1, SIZE(donor_state%energy_evals, 2)
1901 soc_shifts(:, ispin) = tmp_shifts(:, ispin) - donor_state%energy_evals(:, ispin)
1902 END DO
1903
1904 ! clean-up
1905 CALL cp_fm_release(alpha_gs_coeffs)
1906 CALL cp_fm_release(beta_gs_coeffs)
1907 CALL cp_fm_release(ao_domo_work)
1908 CALL cp_fm_release(domo_domo_work)
1909 CALL cp_fm_release(real_fm)
1910 CALL cp_fm_release(img_fm)
1911
1912 CALL cp_cfm_release(hami_cfm)
1913
1914 CALL cp_fm_struct_release(ao_domo_struct)
1915 CALL cp_fm_struct_release(domo_domo_struct)
1916 CALL cp_fm_struct_release(doso_doso_struct)
1917
1918 CALL timestop(handle)
1919
1920 END SUBROUTINE get_soc_splitting
1921
1922END MODULE xas_tdp_correction
#define idx2(a, i, j)
#define idx3(a, i, j, k)
Types and set/get functions for auxiliary density matrix methods.
Definition admm_types.F:15
Contains methods used in the context of density fitting.
Definition admm_utils.F:15
subroutine, public admm_correct_for_eigenvalues(ispin, admm_env, ks_matrix)
...
Definition admm_utils.F:53
collects all references to literature in CP2K as new algorithms / method are included from literature...
integer, save, public shigeta2001
integer, save, public bussy2021b
various utilities that regard array of different kinds: output, allocation,... maybe it is not a good...
methods related to the blacs parallel environment
Represents a complex full matrix distributed on many processors.
subroutine, public cp_cfm_get_submatrix(fm, target_m, start_row, start_col, n_rows, n_cols, transpose)
Extract a sub-matrix from the full matrix: op(target_m)(1:n_rows,1:n_cols) = fm(start_row:start_row+n...
subroutine, public cp_cfm_create(matrix, matrix_struct, name)
Creates a new full matrix with the given structure.
subroutine, public cp_cfm_release(matrix)
Releases a full matrix.
subroutine, public cp_fm_to_cfm(msourcer, msourcei, mtarget)
Construct a complex full matrix by taking its real and imaginary parts from two separate real-value f...
Defines control structures, which contain the parameters and the settings for the DFT-based calculati...
DBCSR operations in CP2K.
subroutine, public cp_dbcsr_sm_fm_multiply(matrix, fm_in, fm_out, ncol, alpha, beta)
multiply a dbcsr with a fm matrix
subroutine, public copy_fm_to_dbcsr(fm, matrix, keep_sparsity)
Copy a BLACS matrix to a dbcsr matrix.
used for collecting some of the diagonalization schemes available for cp_fm_type. cp_fm_power also mo...
Definition cp_fm_diag.F:17
subroutine, public choose_eigv_solver(matrix, eigenvectors, eigenvalues, info)
Choose the Eigensolver depending on which library is available ELPA seems to be unstable for small sy...
Definition cp_fm_diag.F:208
represent the structure of a full matrix
subroutine, public cp_fm_struct_create(fmstruct, para_env, context, nrow_global, ncol_global, nrow_block, ncol_block, descriptor, first_p_pos, local_leading_dimension, template_fmstruct, square_blocks, force_block)
allocates and initializes a full matrix structure
subroutine, public cp_fm_struct_release(fmstruct)
releases a full matrix structure
represent a full matrix distributed on many processors
Definition cp_fm_types.F:15
subroutine, public cp_fm_get_diag(matrix, diag)
returns the diagonal elements of a fm
subroutine, public cp_fm_to_fm_submat(msource, mtarget, nrow, ncol, s_firstrow, s_firstcol, t_firstrow, t_firstcol)
copy just a part ot the matrix
subroutine, public cp_fm_get_submatrix(fm, target_m, start_row, start_col, n_rows, n_cols, transpose)
gets a submatrix of a full matrix op(target_m)(1:n_rows,1:n_cols) =fm(start_row:start_row+n_rows,...
subroutine, public cp_fm_create(matrix, matrix_struct, name, use_sp)
creates a new full matrix with the given structure
various routines to log and control the output. The idea is that decisions about where to log should ...
integer function, public cp_logger_get_default_io_unit(logger)
returns the unit nr for the ionode (-1 on all other processors) skips as well checks if the procs cal...
This is the start of a dbt_api, all publically needed functions are exported here....
Definition dbt_api.F:17
Utilities for hfx and admm methods.
subroutine, public create_admm_xc_section(x_data, xc_section, admm_env)
This routine modifies the xc section depending on the potential type used for the HF exchange and the...
objects that represent the structure of input sections and the data contained in an input section
recursive subroutine, public section_vals_create(section_vals, section)
creates a object where to store the values of a section
subroutine, public section_vals_retain(section_vals)
retains the given section values (see doc/ReferenceCounting.html)
recursive type(section_vals_type) function, pointer, public section_vals_get_subs_vals(section_vals, subsection_name, i_rep_section, can_return_null)
returns the values of the requested subsection
subroutine, public section_vals_set_subs_vals(section_vals, subsection_name, new_section_vals, i_rep_section)
replaces of the requested subsection with the one given
recursive subroutine, public section_vals_release(section_vals)
releases the given object
Defines the basic variable types.
Definition kinds.F:23
integer, parameter, public dp
Definition kinds.F:34
Machine interface based on Fortran 2003 and POSIX.
Definition machine.F:17
subroutine, public m_flush(lunit)
flushes units if the &GLOBAL flag is set accordingly
Definition machine.F:106
Collection of simple mathematical functions and subroutines.
Definition mathlib.F:15
subroutine, public complex_diag(matrix, eigenvectors, eigenvalues)
Diagonalizes a local complex Hermitian matrix using LAPACK. Based on cp_cfm_heevd.
Definition mathlib.F:1746
Interface to the message passing library MPI.
basic linear algebra operations for full matrixes
Definition of physical constants:
Definition physcon.F:68
real(kind=dp), parameter, public evolt
Definition physcon.F:183
subroutine, public get_qs_env(qs_env, atomic_kind_set, qs_kind_set, cell, super_cell, cell_ref, use_ref_cell, kpoints, dft_control, mos, sab_orb, sab_all, qmmm, qmmm_periodic, sac_ae, sac_ppl, sac_lri, sap_ppnl, sab_vdw, sab_scp, sap_oce, sab_lrc, sab_se, sab_xtbe, sab_tbe, sab_core, sab_xb, sab_xtb_nonbond, sab_almo, sab_kp, sab_kp_nosym, particle_set, energy, force, matrix_h, matrix_h_im, matrix_ks, matrix_ks_im, matrix_vxc, run_rtp, rtp, matrix_h_kp, matrix_h_im_kp, matrix_ks_kp, matrix_ks_im_kp, matrix_vxc_kp, kinetic_kp, matrix_s_kp, matrix_w_kp, matrix_s_ri_aux_kp, matrix_s, matrix_s_ri_aux, matrix_w, matrix_p_mp2, matrix_p_mp2_admm, rho, rho_xc, pw_env, ewald_env, ewald_pw, active_space, mpools, input, para_env, blacs_env, scf_control, rel_control, kinetic, qs_charges, vppl, rho_core, rho_nlcc, rho_nlcc_g, ks_env, ks_qmmm_env, wf_history, scf_env, local_particles, local_molecules, distribution_2d, dbcsr_dist, molecule_kind_set, molecule_set, subsys, cp_subsys, oce, local_rho_set, rho_atom_set, task_list, task_list_soft, rho0_atom_set, rho0_mpole, rhoz_set, ecoul_1c, rho0_s_rs, rho0_s_gs, do_kpoints, has_unit_metric, requires_mo_derivs, mo_derivs, mo_loc_history, nkind, natom, nelectron_total, nelectron_spin, efield, neighbor_list_id, linres_control, xas_env, virial, cp_ddapc_env, cp_ddapc_ewald, outer_scf_history, outer_scf_ihistory, x_data, et_coupling, dftb_potential, results, se_taper, se_store_int_env, se_nddo_mpole, se_nonbond_env, admm_env, lri_env, lri_density, exstate_env, ec_env, dispersion_env, gcp_env, vee, rho_external, external_vxc, mask, mp2_env, bs_env, kg_env, wanniercentres, atprop, ls_scf_env, do_transport, transport_env, v_hartree_rspace, s_mstruct_changed, rho_changed, potential_changed, forces_up_to_date, mscfg_env, almo_scf_env, gradient_history, variable_history, embed_pot, spin_embed_pot, polar_env, mos_last_converged, rhs)
Get the QUICKSTEP environment.
routines that build the Kohn-Sham matrix (i.e calculate the coulomb and xc parts
subroutine, public qs_ks_build_kohn_sham_matrix(qs_env, calculate_forces, just_energy, print_active, ext_ks_matrix)
routine where the real calculations are made: the KS matrix is calculated
Definition and initialisation of the mo data type.
Definition qs_mo_types.F:22
subroutine, public duplicate_mo_set(mo_set_new, mo_set_old)
allocate a new mo_set, and copy the old data
subroutine, public deallocate_mo_set(mo_set)
Deallocate a wavefunction data structure.
subroutine, public get_mo_set(mo_set, maxocc, homo, lfomo, nao, nelectron, n_el_f, nmo, eigenvalues, occupation_numbers, mo_coeff, mo_coeff_b, uniform_occupation, kts, mu, flexible_electron_count)
Get the components of a MO set data structure.
subroutine, public reassign_allocated_mos(mo_set_new, mo_set_old)
reassign an already allocated mo_set
All kind of helpful little routines.
Definition util.F:14
pure integer function, dimension(2), public get_limit(m, n, me)
divide m entries into n parts, return size of part me
Definition util.F:333
Second order perturbation correction to XAS_TDP spectra (i.e. shift)
subroutine, public gw2x_shift(donor_state, xas_tdp_env, xas_tdp_control, qs_env)
Computes the ionization potential using the GW2X method of Shigeta et. al. The result cam be used for...
subroutine, public get_soc_splitting(soc_shifts, donor_state, xas_tdp_env, xas_tdp_control, qs_env)
We try to compute the spin-orbit splitting via perturbation theory. We keep it \ cheap by only inculd...
All the kernel specific subroutines for XAS TDP calculations.
subroutine, public contract2_ao_to_domo(contr_int, op_type, donor_state, xas_tdp_env, xas_tdp_control, qs_env)
Contract the ri 3-center integrals stored in a tensor with repect to the donor MOs coeffs,...
subroutine, public ri_all_blocks_mm(contr_int, pq)
Multiply all the blocks of a contracted RI integral (aI|P) by a matrix of type (P|....
Define XAS TDP control type and associated create, release, etc subroutines, as well as XAS TDP envir...
stores some data used in wavefunction fitting
Definition admm_types.F:120
represent a pointer to a 1d array
represent a pointer to a 1d array
represent a blacs multidimensional parallel environment (for the mpi corrispective see cp_paratypes/m...
Represent a complex full matrix.
keeps the information about the structure of a full matrix
represent a full matrix
stores all the informations relevant to an mpi environment
Type containing informations about a single donor state.
Type containing control information for TDP XAS calculations.
Type containing informations such as inputs and results for TDP XAS calculations.