(git:ed6f26b)
Loading...
Searching...
No Matches
hfx_ri.F
Go to the documentation of this file.
1!--------------------------------------------------------------------------------------------------!
2! CP2K: A general program to perform molecular dynamics simulations !
3! Copyright 2000-2025 CP2K developers group <https://cp2k.org> !
4! !
5! SPDX-License-Identifier: GPL-2.0-or-later !
6!--------------------------------------------------------------------------------------------------!
7
8! **************************************************************************************************
9!> \brief RI-methods for HFX
10! **************************************************************************************************
11
12MODULE hfx_ri
13
14 USE omp_lib, ONLY: omp_get_num_threads,&
15 omp_get_thread_num
21 USE cell_types, ONLY: cell_type
24 USE cp_dbcsr_api, ONLY: &
28 dbcsr_scale, dbcsr_type, dbcsr_type_antisymmetric, dbcsr_type_no_symmetry, &
29 dbcsr_type_symmetric
33 dbcsr_dot,&
43 USE cp_fm_types, ONLY: cp_fm_create,&
49 USE cp_output_handling, ONLY: cp_p_file,&
53 USE dbt_api, ONLY: &
54 dbt_batched_contract_finalize, dbt_batched_contract_init, dbt_clear, dbt_contract, &
55 dbt_copy, dbt_copy_matrix_to_tensor, dbt_copy_tensor_to_matrix, dbt_create, &
56 dbt_default_distvec, dbt_destroy, dbt_distribution_destroy, dbt_distribution_new, &
57 dbt_distribution_type, dbt_filter, dbt_get_block, dbt_get_info, dbt_get_num_blocks_total, &
58 dbt_iterator_blocks_left, dbt_iterator_next_block, dbt_iterator_start, dbt_iterator_stop, &
59 dbt_iterator_type, dbt_mp_environ_pgrid, dbt_nd_mp_comm, dbt_pgrid_create, &
60 dbt_pgrid_destroy, dbt_pgrid_type, dbt_put_block, dbt_reserve_blocks, dbt_scale, dbt_type
62 USE hfx_types, ONLY: alloc_containers,&
72 USE input_cp2k_hfx, ONLY: ri_mo,&
79 USE kinds, ONLY: default_string_length,&
80 dp,&
81 int_8
82 USE machine, ONLY: m_walltime
83 USE message_passing, ONLY: mp_cart_type,&
95 USE qs_mo_types, ONLY: get_mo_set,&
99 USE qs_rho_types, ONLY: qs_rho_get,&
101 USE qs_tensors, ONLY: &
112 USE util, ONLY: sort
113 USE virial_types, ONLY: virial_type
114#include "./base/base_uses.f90"
115
116!$ USE OMP_LIB, ONLY: omp_get_num_threads
117
118 IMPLICIT NONE
119 PRIVATE
120
123
124 CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'hfx_ri'
125CONTAINS
126
127! **************************************************************************************************
128!> \brief Switches the RI_FLAVOR from MO to RHO or vice-versa
129!> \param ri_data ...
130!> \param qs_env ...
131!> \note As a side product, the ri_data is mostly reinitialized and the integrals recomputed
132! **************************************************************************************************
133 SUBROUTINE switch_ri_flavor(ri_data, qs_env)
134 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
135 TYPE(qs_environment_type), POINTER :: qs_env
136
137 CHARACTER(LEN=*), PARAMETER :: routineN = 'switch_ri_flavor'
138
139 INTEGER :: handle, n_mem, new_flavor
140 TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
141 TYPE(dft_control_type), POINTER :: dft_control
142 TYPE(mp_para_env_type), POINTER :: para_env
143 TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
144 TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
145
146 NULLIFY (qs_kind_set, particle_set, atomic_kind_set, para_env, dft_control)
147
148 CALL timeset(routinen, handle)
149
150 CALL get_qs_env(qs_env, para_env=para_env, dft_control=dft_control, atomic_kind_set=atomic_kind_set, &
151 particle_set=particle_set, qs_kind_set=qs_kind_set)
152
153 CALL hfx_ri_release(ri_data, write_stats=.false.)
154
155 IF (ri_data%flavor == ri_pmat) THEN
156 new_flavor = ri_mo
157 ELSE
158 new_flavor = ri_pmat
159 END IF
160 ri_data%flavor = new_flavor
161
162 n_mem = ri_data%n_mem_input
163 ri_data%n_mem_input = ri_data%n_mem_flavor_switch
164 ri_data%n_mem_flavor_switch = n_mem
165
166 CALL hfx_ri_init(ri_data, qs_kind_set, particle_set, atomic_kind_set, para_env)
167
168 !Need to recalculate the integrals in the new flavor
169 !TODO: should we backup the integrals and symmetrize/desymmetrize them instead of recomputing ?!?
170 ! that only makes sense if actual integral calculation is not negligible
171 IF (ri_data%flavor == ri_pmat) THEN
172 CALL hfx_ri_pre_scf_pmat(qs_env, ri_data)
173 ELSE
174 CALL hfx_ri_pre_scf_mo(qs_env, ri_data, dft_control%nspins)
175 END IF
176
177 IF (ri_data%unit_nr > 0) THEN
178 IF (ri_data%flavor == ri_pmat) THEN
179 WRITE (ri_data%unit_nr, '(T2,A)') "HFX_RI_INFO| temporarily switched to RI_FLAVOR RHO"
180 ELSE
181 WRITE (ri_data%unit_nr, '(T2,A)') "HFX_RI_INFO| temporarily switched to RI_FLAVOR MO"
182 END IF
183 END IF
184
185 CALL timestop(handle)
186
187 END SUBROUTINE switch_ri_flavor
188
189! **************************************************************************************************
190!> \brief Pre-SCF steps in MO flavor of RI HFX
191!>
192!> Calculate 2-center & 3-center integrals (see hfx_ri_pre_scf_calc_tensors) and contract
193!> K(P, S) = sum_R K_2(P, R)^{-1} K_1(R, S)^{1/2}
194!> B(mu, lambda, R) = sum_P int_3c(mu, lambda, P) K(P, R)
195!> \param qs_env ...
196!> \param ri_data ...
197!> \param nspins ...
198! **************************************************************************************************
199 SUBROUTINE hfx_ri_pre_scf_mo(qs_env, ri_data, nspins)
200 TYPE(qs_environment_type), POINTER :: qs_env
201 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
202 INTEGER, INTENT(IN) :: nspins
203
204 CHARACTER(LEN=*), PARAMETER :: routineN = 'hfx_ri_pre_scf_mo'
205
206 INTEGER :: handle, handle2, ispin, n_dependent, &
207 unit_nr, unit_nr_dbcsr
208 REAL(KIND=dp) :: threshold
209 TYPE(cp_blacs_env_type), POINTER :: blacs_env
210 TYPE(dbcsr_type), DIMENSION(1) :: dbcsr_work_1, dbcsr_work_2, t_2c_int_mat, t_2c_op_pot, &
211 t_2c_op_pot_sqrt, t_2c_op_pot_sqrt_inv, t_2c_op_RI, t_2c_op_RI_inv
212 TYPE(dbt_type), ALLOCATABLE, DIMENSION(:) :: t_2c_int, t_2c_work
213 TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :) :: t_3c_int
214 TYPE(mp_para_env_type), POINTER :: para_env
215
216 CALL timeset(routinen, handle)
217
218 unit_nr_dbcsr = ri_data%unit_nr_dbcsr
219 unit_nr = ri_data%unit_nr
220
221 CALL get_qs_env(qs_env, para_env=para_env, blacs_env=blacs_env)
222
223 CALL timeset(routinen//"_int", handle2)
224
225 ALLOCATE (t_2c_int(1), t_2c_work(1), t_3c_int(1, 1))
226 CALL hfx_ri_pre_scf_calc_tensors(qs_env, ri_data, t_2c_op_ri, t_2c_op_pot, t_3c_int)
227
228 CALL timestop(handle2)
229
230 CALL timeset(routinen//"_2c", handle2)
231 IF (.NOT. ri_data%same_op) THEN
232 SELECT CASE (ri_data%t2c_method)
233 CASE (hfx_ri_do_2c_iter)
234 CALL dbcsr_create(t_2c_op_ri_inv(1), template=t_2c_op_ri(1), matrix_type=dbcsr_type_no_symmetry)
235 threshold = max(ri_data%filter_eps, 1.0e-12_dp)
236 CALL invert_hotelling(t_2c_op_ri_inv(1), t_2c_op_ri(1), threshold=threshold, silent=.false.)
238 CALL dbcsr_copy(t_2c_op_ri_inv(1), t_2c_op_ri(1))
239 CALL cp_dbcsr_cholesky_decompose(t_2c_op_ri_inv(1), para_env=para_env, blacs_env=blacs_env)
240 CALL cp_dbcsr_cholesky_invert(t_2c_op_ri_inv(1), para_env=para_env, blacs_env=blacs_env, uplo_to_full=.true.)
241 CASE (hfx_ri_do_2c_diag)
242 CALL dbcsr_copy(t_2c_op_ri_inv(1), t_2c_op_ri(1))
243 CALL cp_dbcsr_power(t_2c_op_ri_inv(1), -1.0_dp, ri_data%eps_eigval, n_dependent, &
244 para_env, blacs_env, verbose=ri_data%unit_nr_dbcsr > 0)
245 END SELECT
246
247 IF (ri_data%check_2c_inv) THEN
248 CALL check_inverse(t_2c_op_ri_inv(1), t_2c_op_ri(1), unit_nr=unit_nr)
249 END IF
250
251 CALL dbcsr_release(t_2c_op_ri(1))
252
253 SELECT CASE (ri_data%t2c_method)
254 CASE (hfx_ri_do_2c_iter)
255 CALL dbcsr_create(t_2c_op_pot_sqrt(1), template=t_2c_op_pot(1), matrix_type=dbcsr_type_symmetric)
256 CALL dbcsr_create(t_2c_op_pot_sqrt_inv(1), template=t_2c_op_pot(1), matrix_type=dbcsr_type_symmetric)
257 CALL matrix_sqrt_newton_schulz(t_2c_op_pot_sqrt(1), t_2c_op_pot_sqrt_inv(1), t_2c_op_pot(1), &
258 ri_data%filter_eps, ri_data%t2c_sqrt_order, ri_data%eps_lanczos, &
259 ri_data%max_iter_lanczos)
260
261 CALL dbcsr_release(t_2c_op_pot_sqrt_inv(1))
263 CALL dbcsr_copy(t_2c_op_pot_sqrt(1), t_2c_op_pot(1))
264 CALL cp_dbcsr_power(t_2c_op_pot_sqrt(1), 0.5_dp, ri_data%eps_eigval, n_dependent, &
265 para_env, blacs_env, verbose=ri_data%unit_nr_dbcsr > 0)
266 END SELECT
267
268 !We need S^-1 and (P|Q) for the forces.
269 CALL dbt_create(t_2c_op_ri_inv(1), t_2c_work(1))
270 CALL dbt_copy_matrix_to_tensor(t_2c_op_ri_inv(1), t_2c_work(1))
271 CALL dbt_copy(t_2c_work(1), ri_data%t_2c_inv(1, 1), move_data=.true.)
272 CALL dbt_destroy(t_2c_work(1))
273 CALL dbt_filter(ri_data%t_2c_inv(1, 1), ri_data%filter_eps)
274
275 CALL dbt_create(t_2c_op_pot(1), t_2c_work(1))
276 CALL dbt_copy_matrix_to_tensor(t_2c_op_pot(1), t_2c_work(1))
277 CALL dbt_copy(t_2c_work(1), ri_data%t_2c_pot(1, 1), move_data=.true.)
278 CALL dbt_destroy(t_2c_work(1))
279 CALL dbt_filter(ri_data%t_2c_pot(1, 1), ri_data%filter_eps)
280
281 IF (ri_data%check_2c_inv) THEN
282 CALL check_sqrt(t_2c_op_pot(1), matrix_sqrt=t_2c_op_pot_sqrt(1), unit_nr=unit_nr)
283 END IF
284 CALL dbcsr_create(t_2c_int_mat(1), template=t_2c_op_pot(1), matrix_type=dbcsr_type_no_symmetry)
285 CALL dbcsr_multiply("N", "N", 1.0_dp, t_2c_op_ri_inv(1), t_2c_op_pot_sqrt(1), &
286 0.0_dp, t_2c_int_mat(1), filter_eps=ri_data%filter_eps)
287 CALL dbcsr_release(t_2c_op_ri_inv(1))
288 CALL dbcsr_release(t_2c_op_pot_sqrt(1))
289 ELSE
290 SELECT CASE (ri_data%t2c_method)
291 CASE (hfx_ri_do_2c_iter)
292 CALL dbcsr_create(t_2c_int_mat(1), template=t_2c_op_pot(1), matrix_type=dbcsr_type_symmetric)
293 CALL dbcsr_create(t_2c_op_pot_sqrt(1), template=t_2c_op_pot(1), matrix_type=dbcsr_type_symmetric)
294 CALL matrix_sqrt_newton_schulz(t_2c_op_pot_sqrt(1), t_2c_int_mat(1), t_2c_op_pot(1), &
295 ri_data%filter_eps, ri_data%t2c_sqrt_order, ri_data%eps_lanczos, &
296 ri_data%max_iter_lanczos)
297 CALL dbcsr_release(t_2c_op_pot_sqrt(1))
299 CALL dbcsr_copy(t_2c_int_mat(1), t_2c_op_pot(1))
300 CALL cp_dbcsr_power(t_2c_int_mat(1), -0.5_dp, ri_data%eps_eigval, n_dependent, &
301 para_env, blacs_env, verbose=ri_data%unit_nr_dbcsr > 0)
302 END SELECT
303 IF (ri_data%check_2c_inv) THEN
304 CALL check_sqrt(t_2c_op_pot(1), matrix_sqrt_inv=t_2c_int_mat(1), unit_nr=unit_nr)
305 END IF
306
307 !We need (P|Q)^-1 for the forces
308 CALL dbcsr_copy(dbcsr_work_1(1), t_2c_int_mat(1))
309 CALL dbcsr_create(dbcsr_work_2(1), template=t_2c_int_mat(1))
310 CALL dbcsr_multiply("N", "N", 1.0_dp, dbcsr_work_1(1), t_2c_int_mat(1), 0.0_dp, dbcsr_work_2(1))
311 CALL dbcsr_release(dbcsr_work_1(1))
312 CALL dbt_create(dbcsr_work_2(1), t_2c_work(1))
313 CALL dbt_copy_matrix_to_tensor(dbcsr_work_2(1), t_2c_work(1))
314 CALL dbcsr_release(dbcsr_work_2(1))
315 CALL dbt_copy(t_2c_work(1), ri_data%t_2c_inv(1, 1), move_data=.true.)
316 CALL dbt_destroy(t_2c_work(1))
317 CALL dbt_filter(ri_data%t_2c_inv(1, 1), ri_data%filter_eps)
318 END IF
319
320 CALL dbcsr_release(t_2c_op_pot(1))
321
322 CALL dbt_create(t_2c_int_mat(1), t_2c_int(1), name="(RI|RI)")
323 CALL dbt_copy_matrix_to_tensor(t_2c_int_mat(1), t_2c_int(1))
324 CALL dbcsr_release(t_2c_int_mat(1))
325 DO ispin = 1, nspins
326 CALL dbt_copy(t_2c_int(1), ri_data%t_2c_int(ispin, 1))
327 END DO
328 CALL dbt_destroy(t_2c_int(1))
329 CALL timestop(handle2)
330
331 CALL timeset(routinen//"_3c", handle2)
332 CALL dbt_copy(t_3c_int(1, 1), ri_data%t_3c_int_ctr_1(1, 1), order=[2, 1, 3], move_data=.true.)
333 CALL dbt_filter(ri_data%t_3c_int_ctr_1(1, 1), ri_data%filter_eps)
334 CALL dbt_copy(ri_data%t_3c_int_ctr_1(1, 1), ri_data%t_3c_int_ctr_2(1, 1))
335 CALL dbt_destroy(t_3c_int(1, 1))
336 CALL timestop(handle2)
337
338 CALL timestop(handle)
339
340 END SUBROUTINE
341
342! **************************************************************************************************
343!> \brief ...
344!> \param matrix_1 ...
345!> \param matrix_2 ...
346!> \param name ...
347!> \param unit_nr ...
348! **************************************************************************************************
349 SUBROUTINE check_inverse(matrix_1, matrix_2, name, unit_nr)
350 TYPE(dbcsr_type), INTENT(INOUT) :: matrix_1, matrix_2
351 CHARACTER(len=*), INTENT(IN), OPTIONAL :: name
352 INTEGER, INTENT(IN) :: unit_nr
353
354 CHARACTER(len=default_string_length) :: name_prv
355 REAL(kind=dp) :: error, frob_matrix, frob_matrix_base
356 TYPE(dbcsr_type) :: matrix_tmp
357
358 IF (PRESENT(name)) THEN
359 name_prv = name
360 ELSE
361 CALL dbcsr_get_info(matrix_1, name=name_prv)
362 END IF
363
364 CALL dbcsr_create(matrix_tmp, template=matrix_1)
365 CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_1, matrix_2, &
366 0.0_dp, matrix_tmp)
367 frob_matrix_base = dbcsr_frobenius_norm(matrix_tmp)
368 CALL dbcsr_add_on_diag(matrix_tmp, -1.0_dp)
369 frob_matrix = dbcsr_frobenius_norm(matrix_tmp)
370 error = frob_matrix/frob_matrix_base
371 IF (unit_nr > 0) THEN
372 WRITE (unit=unit_nr, fmt="(T3,A,A,A,T73,ES8.1)") &
373 "HFX_RI_INFO| Error for INV(", trim(name_prv), "):", error
374 END IF
375
376 CALL dbcsr_release(matrix_tmp)
377 END SUBROUTINE
378
379! **************************************************************************************************
380!> \brief ...
381!> \param matrix ...
382!> \param matrix_sqrt ...
383!> \param matrix_sqrt_inv ...
384!> \param name ...
385!> \param unit_nr ...
386! **************************************************************************************************
387 SUBROUTINE check_sqrt(matrix, matrix_sqrt, matrix_sqrt_inv, name, unit_nr)
388 TYPE(dbcsr_type), INTENT(INOUT) :: matrix
389 TYPE(dbcsr_type), INTENT(IN), OPTIONAL :: matrix_sqrt, matrix_sqrt_inv
390 CHARACTER(len=*), INTENT(IN), OPTIONAL :: name
391 INTEGER, INTENT(IN) :: unit_nr
392
393 CHARACTER(len=default_string_length) :: name_prv
394 REAL(kind=dp) :: frob_matrix
395 TYPE(dbcsr_type) :: matrix_copy, matrix_tmp
396
397 IF (PRESENT(name)) THEN
398 name_prv = name
399 ELSE
400 CALL dbcsr_get_info(matrix, name=name_prv)
401 END IF
402 IF (PRESENT(matrix_sqrt)) THEN
403 CALL dbcsr_create(matrix_tmp, template=matrix)
404 CALL dbcsr_copy(matrix_copy, matrix_sqrt)
405 CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_sqrt, matrix_copy, &
406 0.0_dp, matrix_tmp)
407 CALL dbcsr_add(matrix_tmp, matrix, 1.0_dp, -1.0_dp)
408 frob_matrix = dbcsr_frobenius_norm(matrix_tmp)
409 IF (unit_nr > 0) THEN
410 WRITE (unit=unit_nr, fmt="(T3,A,A,A,T73,ES8.1)") &
411 "HFX_RI_INFO| Error for SQRT(", trim(name_prv), "):", frob_matrix
412 END IF
413 CALL dbcsr_release(matrix_tmp)
414 CALL dbcsr_release(matrix_copy)
415 END IF
416
417 IF (PRESENT(matrix_sqrt_inv)) THEN
418 CALL dbcsr_create(matrix_tmp, template=matrix)
419 CALL dbcsr_copy(matrix_copy, matrix_sqrt_inv)
420 CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_sqrt_inv, matrix_copy, &
421 0.0_dp, matrix_tmp)
422 CALL check_inverse(matrix_tmp, matrix, name="SQRT("//trim(name_prv)//")", unit_nr=unit_nr)
423 CALL dbcsr_release(matrix_tmp)
424 CALL dbcsr_release(matrix_copy)
425 END IF
426
427 END SUBROUTINE
428
429! **************************************************************************************************
430!> \brief Calculate 2-center and 3-center integrals
431!>
432!> 2c: K_1(P, R) = (P|v1|R) and K_2(P, R) = (P|v2|R)
433!> 3c: int_3c(mu, lambda, P) = (mu lambda |v2| P)
434!> v_1 is HF operator, v_2 is RI metric
435!> \param qs_env ...
436!> \param ri_data ...
437!> \param t_2c_int_RI K_2(P, R) note: even with k-point, only need on central cell
438!> \param t_2c_int_pot K_1(P, R)
439!> \param t_3c_int int_3c(mu, lambda, P)
440!> \param do_kpoints ...
441!> \notes the integral tensor arrays are already allocated on entry
442! **************************************************************************************************
443 SUBROUTINE hfx_ri_pre_scf_calc_tensors(qs_env, ri_data, t_2c_int_RI, t_2c_int_pot, t_3c_int, do_kpoints)
444 TYPE(qs_environment_type), POINTER :: qs_env
445 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
446 TYPE(dbcsr_type), DIMENSION(:), INTENT(OUT) :: t_2c_int_ri, t_2c_int_pot
447 TYPE(dbt_type), DIMENSION(:, :) :: t_3c_int
448 LOGICAL, INTENT(IN), OPTIONAL :: do_kpoints
449
450 CHARACTER(LEN=*), PARAMETER :: routinen = 'hfx_ri_pre_scf_calc_tensors'
451
452 CHARACTER :: symm
453 INTEGER :: handle, i_img, i_mem, ibasis, j_img, &
454 n_mem, natom, nblks, nimg, nkind, &
455 nthreads
456 INTEGER(int_8) :: nze
457 INTEGER, ALLOCATABLE, DIMENSION(:) :: dist_ao_1, dist_ao_2, dist_ri, dist_ri_ext, &
458 ends_array_mc_block_int, ends_array_mc_int, sizes_ao, sizes_ri, sizes_ri_ext, &
459 sizes_ri_ext_split, starts_array_mc_block_int, starts_array_mc_int
460 INTEGER, DIMENSION(3) :: pcoord, pdims
461 INTEGER, DIMENSION(:), POINTER :: col_bsize, row_bsize
462 LOGICAL :: converged, do_kpoints_prv
463 REAL(dp) :: max_ev, min_ev, occ, ri_range
464 TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
465 TYPE(dbcsr_distribution_type) :: dbcsr_dist
466 TYPE(dbt_distribution_type) :: t_dist
467 TYPE(dbt_pgrid_type) :: pgrid
468 TYPE(dbt_type) :: t_3c_tmp
469 TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :) :: t_3c_int_batched
470 TYPE(dft_control_type), POINTER :: dft_control
471 TYPE(distribution_2d_type), POINTER :: dist_2d
472 TYPE(distribution_3d_type) :: dist_3d
473 TYPE(gto_basis_set_p_type), ALLOCATABLE, &
474 DIMENSION(:), TARGET :: basis_set_ao, basis_set_ri
475 TYPE(gto_basis_set_type), POINTER :: orb_basis, ri_basis
476 TYPE(mp_cart_type) :: mp_comm_t3c
477 TYPE(mp_para_env_type), POINTER :: para_env
478 TYPE(neighbor_list_3c_type) :: nl_3c
479 TYPE(neighbor_list_set_p_type), DIMENSION(:), &
480 POINTER :: nl_2c_pot, nl_2c_ri
481 TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
482 TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
483 TYPE(qs_ks_env_type), POINTER :: ks_env
484
485 CALL timeset(routinen, handle)
486 NULLIFY (col_bsize, row_bsize, dist_2d, nl_2c_pot, nl_2c_ri, &
487 particle_set, qs_kind_set, ks_env, para_env)
488
489 CALL get_qs_env(qs_env, natom=natom, nkind=nkind, qs_kind_set=qs_kind_set, particle_set=particle_set, &
490 distribution_2d=dist_2d, ks_env=ks_env, dft_control=dft_control, para_env=para_env)
491
492 ri_range = 0.0_dp
493 do_kpoints_prv = .false.
494 IF (PRESENT(do_kpoints)) do_kpoints_prv = do_kpoints
495 nimg = 1
496 IF (do_kpoints_prv) THEN
497 nimg = ri_data%nimg
498 ri_range = ri_data%kp_RI_range
499 END IF
500
501 ALLOCATE (sizes_ri(natom), sizes_ao(natom))
502 ALLOCATE (basis_set_ri(nkind), basis_set_ao(nkind))
503 CALL basis_set_list_setup(basis_set_ri, ri_data%ri_basis_type, qs_kind_set)
504 CALL get_particle_set(particle_set, qs_kind_set, nsgf=sizes_ri, basis=basis_set_ri)
505 CALL basis_set_list_setup(basis_set_ao, ri_data%orb_basis_type, qs_kind_set)
506 CALL get_particle_set(particle_set, qs_kind_set, nsgf=sizes_ao, basis=basis_set_ao)
507
508 DO ibasis = 1, SIZE(basis_set_ao)
509 orb_basis => basis_set_ao(ibasis)%gto_basis_set
510 ri_basis => basis_set_ri(ibasis)%gto_basis_set
511 ! interaction radii should be based on eps_pgf_orb controlled in RI section
512 ! (since hartree-fock needs very tight eps_pgf_orb for Kohn-Sham/Fock matrix but eps_pgf_orb
513 ! can be much looser in RI HFX since no systematic error is introduced with tensor sparsity)
514 CALL init_interaction_radii_orb_basis(orb_basis, ri_data%eps_pgf_orb)
515 CALL init_interaction_radii_orb_basis(ri_basis, ri_data%eps_pgf_orb)
516 END DO
517
518 n_mem = ri_data%n_mem
519 CALL create_tensor_batches(sizes_ri, n_mem, starts_array_mc_int, ends_array_mc_int, &
520 starts_array_mc_block_int, ends_array_mc_block_int)
521
522 DEALLOCATE (starts_array_mc_int, ends_array_mc_int)
523
524 !We separate the K-points and standard 3c integrals, because handle quite differently
525 IF (.NOT. do_kpoints_prv) THEN
526
527 nthreads = 1
528!$ nthreads = omp_get_num_threads()
529 pdims = 0
530 CALL dbt_pgrid_create(para_env, pdims, pgrid, tensor_dims=[max(1, natom/(n_mem*nthreads)), natom, natom])
531
532 ALLOCATE (t_3c_int_batched(1, 1))
533 CALL create_3c_tensor(t_3c_int_batched(1, 1), dist_ri, dist_ao_1, dist_ao_2, pgrid, &
534 sizes_ri, sizes_ao, sizes_ao, map1=[1], map2=[2, 3], &
535 name="(RI | AO AO)")
536
537 CALL get_qs_env(qs_env, nkind=nkind, particle_set=particle_set, atomic_kind_set=atomic_kind_set)
538 CALL dbt_mp_environ_pgrid(pgrid, pdims, pcoord)
539 CALL mp_comm_t3c%create(pgrid%mp_comm_2d, 3, pdims)
540 CALL distribution_3d_create(dist_3d, dist_ri, dist_ao_1, dist_ao_2, &
541 nkind, particle_set, mp_comm_t3c, own_comm=.true.)
542 DEALLOCATE (dist_ri, dist_ao_1, dist_ao_2)
543
544 CALL create_3c_tensor(t_3c_int(1, 1), dist_ri, dist_ao_1, dist_ao_2, ri_data%pgrid, &
545 ri_data%bsizes_RI_split, ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
546 map1=[1], map2=[2, 3], &
547 name="O (RI AO | AO)")
548
549 CALL build_3c_neighbor_lists(nl_3c, basis_set_ri, basis_set_ao, basis_set_ao, dist_3d, ri_data%ri_metric, &
550 "HFX_3c_nl", qs_env, op_pos=1, sym_jk=.true., own_dist=.true.)
551
552 DO i_mem = 1, n_mem
553 CALL build_3c_integrals(t_3c_int_batched, ri_data%filter_eps/2, qs_env, nl_3c, &
554 basis_set_ri, basis_set_ao, basis_set_ao, &
555 ri_data%ri_metric, int_eps=ri_data%eps_schwarz, op_pos=1, &
556 desymmetrize=.false., &
557 bounds_i=[starts_array_mc_block_int(i_mem), ends_array_mc_block_int(i_mem)])
558 CALL dbt_copy(t_3c_int_batched(1, 1), t_3c_int(1, 1), summation=.true., move_data=.true.)
559 CALL dbt_filter(t_3c_int(1, 1), ri_data%filter_eps/2)
560 END DO
561
562 CALL dbt_destroy(t_3c_int_batched(1, 1))
563
564 CALL neighbor_list_3c_destroy(nl_3c)
565
566 CALL dbt_create(t_3c_int(1, 1), t_3c_tmp)
567
568 IF (ri_data%flavor == ri_pmat) THEN ! desymmetrize
569 ! desymmetrize
570 CALL dbt_copy(t_3c_int(1, 1), t_3c_tmp)
571 CALL dbt_copy(t_3c_tmp, t_3c_int(1, 1), order=[1, 3, 2], summation=.true., move_data=.true.)
572
573 ! For RI-RHO filter_eps_storage is reserved for screening tensor contracted with RI-metric
574 ! with RI metric but not to bare integral tensor
575 CALL dbt_filter(t_3c_int(1, 1), ri_data%filter_eps)
576 ELSE
577 CALL dbt_filter(t_3c_int(1, 1), ri_data%filter_eps_storage/2)
578 END IF
579
580 CALL dbt_destroy(t_3c_tmp)
581
582 ELSE !do_kpoints
583
584 nthreads = 1
585!$ nthreads = omp_get_num_threads()
586 pdims = 0
587 CALL dbt_pgrid_create(para_env, pdims, pgrid, tensor_dims=[natom, natom, max(1, natom/(n_mem*nthreads))])
588
589 !In k-points HFX, we stack all images along the RI direction in the same tensors, in order
590 !to avoid storing nimg x ncell_RI different tensors (very memory intensive)
591 nblks = SIZE(ri_data%bsizes_RI_split)
592 ALLOCATE (sizes_ri_ext(natom*ri_data%ncell_RI), sizes_ri_ext_split(nblks*ri_data%ncell_RI))
593 DO i_img = 1, ri_data%ncell_RI
594 sizes_ri_ext((i_img - 1)*natom + 1:i_img*natom) = sizes_ri(:)
595 sizes_ri_ext_split((i_img - 1)*nblks + 1:i_img*nblks) = ri_data%bsizes_RI_split(:)
596 END DO
597
598 CALL create_3c_tensor(t_3c_tmp, dist_ao_1, dist_ao_2, dist_ri, &
599 pgrid, sizes_ao, sizes_ao, sizes_ri, map1=[1, 2], map2=[3], &
600 name="(AO AO | RI)")
601 CALL dbt_destroy(t_3c_tmp)
602
603 !For the integrals to work, the distribution along the RI direction must be repeated
604 ALLOCATE (dist_ri_ext(natom*ri_data%ncell_RI))
605 DO i_img = 1, ri_data%ncell_RI
606 dist_ri_ext((i_img - 1)*natom + 1:i_img*natom) = dist_ri(:)
607 END DO
608
609 ALLOCATE (t_3c_int_batched(nimg, 1))
610 CALL dbt_distribution_new(t_dist, pgrid, dist_ao_1, dist_ao_2, dist_ri_ext)
611 CALL dbt_create(t_3c_int_batched(1, 1), "KP_3c_ints", t_dist, [1, 2], [3], &
612 sizes_ao, sizes_ao, sizes_ri_ext)
613 DO i_img = 2, nimg
614 CALL dbt_create(t_3c_int_batched(1, 1), t_3c_int_batched(i_img, 1))
615 END DO
616 CALL dbt_distribution_destroy(t_dist)
617
618 CALL get_qs_env(qs_env, nkind=nkind, particle_set=particle_set, atomic_kind_set=atomic_kind_set)
619 CALL dbt_mp_environ_pgrid(pgrid, pdims, pcoord)
620 CALL mp_comm_t3c%create(pgrid%mp_comm_2d, 3, pdims)
621 CALL distribution_3d_create(dist_3d, dist_ao_1, dist_ao_2, dist_ri, &
622 nkind, particle_set, mp_comm_t3c, own_comm=.true.)
623 DEALLOCATE (dist_ri, dist_ao_1, dist_ao_2)
624
625 ! create 3c tensor for storage of ints
626 CALL build_3c_neighbor_lists(nl_3c, basis_set_ao, basis_set_ao, basis_set_ri, dist_3d, ri_data%ri_metric, &
627 "HFX_3c_nl", qs_env, op_pos=2, sym_ij=.false., own_dist=.true.)
628
629 CALL create_3c_tensor(t_3c_int(1, 1), dist_ri, dist_ao_1, dist_ao_2, ri_data%pgrid, &
630 sizes_ri_ext_split, ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
631 map1=[1], map2=[2, 3], &
632 name="O (RI AO | AO)")
633 DO j_img = 2, nimg
634 CALL dbt_create(t_3c_int(1, 1), t_3c_int(1, j_img))
635 END DO
636
637 CALL dbt_create(t_3c_int(1, 1), t_3c_tmp)
638 DO i_mem = 1, n_mem
639 CALL build_3c_integrals(t_3c_int_batched, ri_data%filter_eps, qs_env, nl_3c, &
640 basis_set_ao, basis_set_ao, basis_set_ri, &
641 ri_data%ri_metric, int_eps=ri_data%eps_schwarz, op_pos=2, &
642 desymmetrize=.false., do_kpoints=.true., do_hfx_kpoints=.true., &
643 bounds_k=[starts_array_mc_block_int(i_mem), ends_array_mc_block_int(i_mem)], &
644 ri_range=ri_range, img_to_ri_cell=ri_data%img_to_RI_cell)
645
646 DO i_img = 1, nimg
647 !we start with (mu^0 sigma^i | P^j) and finish with (P^i | mu^0 sigma^j)
648 CALL get_tensor_occupancy(t_3c_int_batched(i_img, 1), nze, occ)
649 IF (nze > 0) THEN
650 CALL dbt_copy(t_3c_int_batched(i_img, 1), t_3c_tmp, order=[3, 2, 1], move_data=.true.)
651 CALL dbt_filter(t_3c_tmp, ri_data%filter_eps)
652 CALL dbt_copy(t_3c_tmp, t_3c_int(1, i_img), order=[1, 3, 2], &
653 summation=.true., move_data=.true.)
654 END IF
655 END DO
656 END DO
657
658 DO i_img = 1, nimg
659 CALL dbt_destroy(t_3c_int_batched(i_img, 1))
660 END DO
661 DEALLOCATE (t_3c_int_batched)
662 CALL neighbor_list_3c_destroy(nl_3c)
663 CALL dbt_destroy(t_3c_tmp)
664 END IF
665 CALL dbt_pgrid_destroy(pgrid)
666
667 CALL build_2c_neighbor_lists(nl_2c_pot, basis_set_ri, basis_set_ri, ri_data%hfx_pot, &
668 "HFX_2c_nl_pot", qs_env, sym_ij=.NOT. do_kpoints_prv, &
669 dist_2d=dist_2d)
670
671 CALL cp_dbcsr_dist2d_to_dist(dist_2d, dbcsr_dist)
672 ALLOCATE (row_bsize(SIZE(sizes_ri)))
673 ALLOCATE (col_bsize(SIZE(sizes_ri)))
674 row_bsize(:) = sizes_ri
675 col_bsize(:) = sizes_ri
676
677 !Use non-symmetric nl and matrices for k-points
678 symm = dbcsr_type_symmetric
679 IF (do_kpoints_prv) symm = dbcsr_type_no_symmetry
680
681 CALL dbcsr_create(t_2c_int_pot(1), "(R|P) HFX", dbcsr_dist, symm, row_bsize, col_bsize)
682 DO i_img = 2, nimg
683 CALL dbcsr_create(t_2c_int_pot(i_img), template=t_2c_int_pot(1))
684 END DO
685
686 IF (.NOT. ri_data%same_op) THEN
687 CALL dbcsr_create(t_2c_int_ri(1), "(R|P) HFX", dbcsr_dist, symm, row_bsize, col_bsize)
688 DO i_img = 2, nimg
689 CALL dbcsr_create(t_2c_int_ri(i_img), template=t_2c_int_ri(1))
690 END DO
691 END IF
692 DEALLOCATE (col_bsize, row_bsize)
693
694 CALL dbcsr_distribution_release(dbcsr_dist)
695
696 CALL build_2c_integrals(t_2c_int_pot, ri_data%filter_eps_2c, qs_env, nl_2c_pot, basis_set_ri, basis_set_ri, &
697 ri_data%hfx_pot, do_kpoints=do_kpoints_prv, do_hfx_kpoints=do_kpoints_prv)
698 CALL release_neighbor_list_sets(nl_2c_pot)
699
700 IF (.NOT. ri_data%same_op) THEN
701 CALL build_2c_neighbor_lists(nl_2c_ri, basis_set_ri, basis_set_ri, ri_data%ri_metric, &
702 "HFX_2c_nl_RI", qs_env, sym_ij=.NOT. do_kpoints_prv, &
703 dist_2d=dist_2d)
704
705 CALL build_2c_integrals(t_2c_int_ri, ri_data%filter_eps_2c, qs_env, nl_2c_ri, basis_set_ri, basis_set_ri, &
706 ri_data%ri_metric, do_kpoints=do_kpoints_prv, do_hfx_kpoints=do_kpoints_prv)
707
708 CALL release_neighbor_list_sets(nl_2c_ri)
709 END IF
710
711 DO ibasis = 1, SIZE(basis_set_ao)
712 orb_basis => basis_set_ao(ibasis)%gto_basis_set
713 ri_basis => basis_set_ri(ibasis)%gto_basis_set
714 ! reset interaction radii of orb basis
715 CALL init_interaction_radii_orb_basis(orb_basis, dft_control%qs_control%eps_pgf_orb)
716 CALL init_interaction_radii_orb_basis(ri_basis, dft_control%qs_control%eps_pgf_orb)
717 END DO
718
719 IF (ri_data%calc_condnum) THEN
720 CALL arnoldi_extremal(t_2c_int_pot(1), max_ev, min_ev, threshold=ri_data%eps_lanczos, &
721 max_iter=ri_data%max_iter_lanczos, converged=converged)
722
723 IF (.NOT. converged) THEN
724 cpwarn("Not converged: unreliable condition number estimate of (P|Q) matrix (HFX potential).")
725 END IF
726
727 IF (ri_data%unit_nr > 0) THEN
728 WRITE (ri_data%unit_nr, '(T2,A)') "2-Norm Condition Number of (P|Q) integrals (HFX potential)"
729 IF (min_ev > 0) THEN
730 WRITE (ri_data%unit_nr, '(T4,A,ES11.3E3,T32,A,ES11.3E3,A4,ES11.3E3,T63,A,F8.4)') &
731 "CN : max/min ev: ", max_ev, " / ", min_ev, "=", max_ev/min_ev, "Log(2-CN):", log10(max_ev/min_ev)
732 ELSE
733 WRITE (ri_data%unit_nr, '(T4,A,ES11.3E3,T32,A,ES11.3E3,T63,A)') &
734 "CN : max/min ev: ", max_ev, " / ", min_ev, "Log(CN): infinity"
735 END IF
736 END IF
737
738 IF (.NOT. ri_data%same_op) THEN
739 CALL arnoldi_extremal(t_2c_int_ri(1), max_ev, min_ev, threshold=ri_data%eps_lanczos, &
740 max_iter=ri_data%max_iter_lanczos, converged=converged)
741
742 IF (.NOT. converged) THEN
743 cpwarn("Not converged: unreliable condition number estimate of (P|Q) matrix (RI metric).")
744 END IF
745
746 IF (ri_data%unit_nr > 0) THEN
747 WRITE (ri_data%unit_nr, '(T2,A)') "2-Norm Condition Number of (P|Q) integrals (RI metric)"
748 IF (min_ev > 0) THEN
749 WRITE (ri_data%unit_nr, '(T4,A,ES11.3E3,T32,A,ES11.3E3,A4,ES11.3E3,T63,A,F8.4)') &
750 "CN : max/min ev: ", max_ev, " / ", min_ev, "=", max_ev/min_ev, "Log(2-CN):", log10(max_ev/min_ev)
751 ELSE
752 WRITE (ri_data%unit_nr, '(T4,A,ES11.3E3,T32,A,ES11.3E3,T63,A)') &
753 "CN : max/min ev: ", max_ev, " / ", min_ev, "Log(CN): infinity"
754 END IF
755 END IF
756 END IF
757 END IF
758
759 CALL timestop(handle)
760 END SUBROUTINE
761
762! **************************************************************************************************
763!> \brief Pre-SCF steps in rho flavor of RI HFX
764!>
765!> K(P, S) = sum_{R,Q} K_2(P, R)^{-1} K_1(R, Q) K_2(Q, S)^{-1}
766!> Calculate B(mu, lambda, R) = sum_P int_3c(mu, lambda, P) K(P, R)
767!> \param qs_env ...
768!> \param ri_data ...
769! **************************************************************************************************
770 SUBROUTINE hfx_ri_pre_scf_pmat(qs_env, ri_data)
771 TYPE(qs_environment_type), POINTER :: qs_env
772 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
773
774 CHARACTER(LEN=*), PARAMETER :: routinen = 'hfx_ri_pre_scf_Pmat'
775
776 INTEGER :: handle, handle2, i_mem, j_mem, &
777 n_dependent, unit_nr, unit_nr_dbcsr
778 INTEGER(int_8) :: nflop, nze, nze_o
779 INTEGER, ALLOCATABLE, DIMENSION(:) :: batch_ranges_ao, batch_ranges_ri
780 INTEGER, DIMENSION(2, 1) :: bounds_i
781 INTEGER, DIMENSION(2, 2) :: bounds_j
782 INTEGER, DIMENSION(3) :: dims_3c
783 REAL(kind=dp) :: compression_factor, memory_3c, occ, &
784 threshold
785 TYPE(cp_blacs_env_type), POINTER :: blacs_env
786 TYPE(dbcsr_type), DIMENSION(1) :: t_2c_int_mat, t_2c_op_pot, t_2c_op_ri, &
787 t_2c_tmp, t_2c_tmp_2
788 TYPE(dbt_type) :: t_3c_2
789 TYPE(dbt_type), ALLOCATABLE, DIMENSION(:) :: t_2c_int, t_2c_work
790 TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :) :: t_3c_int_1
791 TYPE(mp_para_env_type), POINTER :: para_env
792
793 CALL timeset(routinen, handle)
794
795 unit_nr_dbcsr = ri_data%unit_nr_dbcsr
796 unit_nr = ri_data%unit_nr
797
798 CALL get_qs_env(qs_env, para_env=para_env, blacs_env=blacs_env)
799
800 CALL timeset(routinen//"_int", handle2)
801
802 ALLOCATE (t_2c_int(1), t_2c_work(1), t_3c_int_1(1, 1))
803 CALL hfx_ri_pre_scf_calc_tensors(qs_env, ri_data, t_2c_op_ri, t_2c_op_pot, t_3c_int_1)
804
805 CALL dbt_copy(t_3c_int_1(1, 1), ri_data%t_3c_int_ctr_3(1, 1), order=[1, 2, 3], move_data=.true.)
806
807 CALL dbt_destroy(t_3c_int_1(1, 1))
808
809 CALL timestop(handle2)
810
811 CALL timeset(routinen//"_2c", handle2)
812
813 IF (ri_data%same_op) t_2c_op_ri(1) = t_2c_op_pot(1)
814 CALL dbcsr_create(t_2c_int_mat(1), template=t_2c_op_ri(1), matrix_type=dbcsr_type_no_symmetry)
815 threshold = max(ri_data%filter_eps, 1.0e-12_dp)
816
817 SELECT CASE (ri_data%t2c_method)
818 CASE (hfx_ri_do_2c_iter)
819 CALL invert_hotelling(t_2c_int_mat(1), t_2c_op_ri(1), &
820 threshold=threshold, silent=.false.)
822 CALL dbcsr_copy(t_2c_int_mat(1), t_2c_op_ri(1))
823 CALL cp_dbcsr_cholesky_decompose(t_2c_int_mat(1), para_env=para_env, blacs_env=blacs_env)
824 CALL cp_dbcsr_cholesky_invert(t_2c_int_mat(1), para_env=para_env, blacs_env=blacs_env, uplo_to_full=.true.)
825 CASE (hfx_ri_do_2c_diag)
826 CALL dbcsr_copy(t_2c_int_mat(1), t_2c_op_ri(1))
827 CALL cp_dbcsr_power(t_2c_int_mat(1), -1.0_dp, ri_data%eps_eigval, n_dependent, &
828 para_env, blacs_env, verbose=ri_data%unit_nr_dbcsr > 0)
829 END SELECT
830
831 IF (ri_data%check_2c_inv) THEN
832 CALL check_inverse(t_2c_int_mat(1), t_2c_op_ri(1), unit_nr=unit_nr)
833 END IF
834
835 !Need to save the (P|Q)^-1 tensor for forces (inverse metric if not same_op)
836 CALL dbt_create(t_2c_int_mat(1), t_2c_work(1))
837 CALL dbt_copy_matrix_to_tensor(t_2c_int_mat(1), t_2c_work(1))
838 CALL dbt_copy(t_2c_work(1), ri_data%t_2c_inv(1, 1), move_data=.true.)
839 CALL dbt_destroy(t_2c_work(1))
840 CALL dbt_filter(ri_data%t_2c_inv(1, 1), ri_data%filter_eps)
841 IF (.NOT. ri_data%same_op) THEN
842 !Also save the RI (P|Q) integral
843 CALL dbt_create(t_2c_op_pot(1), t_2c_work(1))
844 CALL dbt_copy_matrix_to_tensor(t_2c_op_pot(1), t_2c_work(1))
845 CALL dbt_copy(t_2c_work(1), ri_data%t_2c_pot(1, 1), move_data=.true.)
846 CALL dbt_destroy(t_2c_work(1))
847 CALL dbt_filter(ri_data%t_2c_pot(1, 1), ri_data%filter_eps)
848 END IF
849
850 IF (ri_data%same_op) THEN
851 CALL dbcsr_release(t_2c_op_pot(1))
852 ELSE
853 CALL dbcsr_create(t_2c_tmp(1), template=t_2c_op_ri(1), matrix_type=dbcsr_type_no_symmetry)
854 CALL dbcsr_create(t_2c_tmp_2(1), template=t_2c_op_ri(1), matrix_type=dbcsr_type_no_symmetry)
855 CALL dbcsr_release(t_2c_op_ri(1))
856 CALL dbcsr_multiply('N', 'N', 1.0_dp, t_2c_int_mat(1), t_2c_op_pot(1), 0.0_dp, t_2c_tmp(1), &
857 filter_eps=ri_data%filter_eps)
858
859 CALL dbcsr_release(t_2c_op_pot(1))
860 CALL dbcsr_multiply('N', 'N', 1.0_dp, t_2c_tmp(1), t_2c_int_mat(1), 0.0_dp, t_2c_tmp_2(1), &
861 filter_eps=ri_data%filter_eps)
862 CALL dbcsr_release(t_2c_tmp(1))
863 CALL dbcsr_release(t_2c_int_mat(1))
864 t_2c_int_mat(1) = t_2c_tmp_2(1)
865 END IF
866
867 CALL dbt_create(t_2c_int_mat(1), t_2c_int(1), name="(RI|RI)")
868 CALL dbt_copy_matrix_to_tensor(t_2c_int_mat(1), t_2c_int(1))
869 CALL dbcsr_release(t_2c_int_mat(1))
870 CALL dbt_copy(t_2c_int(1), ri_data%t_2c_int(1, 1), move_data=.true.)
871 CALL dbt_destroy(t_2c_int(1))
872 CALL dbt_filter(ri_data%t_2c_int(1, 1), ri_data%filter_eps)
873
874 CALL timestop(handle2)
875
876 CALL dbt_create(ri_data%t_3c_int_ctr_3(1, 1), t_3c_2)
877
878 CALL dbt_get_info(ri_data%t_3c_int_ctr_3(1, 1), nfull_total=dims_3c)
879
880 memory_3c = 0.0_dp
881 nze_o = 0
882
883 ALLOCATE (batch_ranges_ri(ri_data%n_mem_RI + 1))
884 ALLOCATE (batch_ranges_ao(ri_data%n_mem + 1))
885 batch_ranges_ri(:ri_data%n_mem_RI) = ri_data%starts_array_RI_mem_block(:)
886 batch_ranges_ri(ri_data%n_mem_RI + 1) = ri_data%ends_array_RI_mem_block(ri_data%n_mem_RI) + 1
887 batch_ranges_ao(:ri_data%n_mem) = ri_data%starts_array_mem_block(:)
888 batch_ranges_ao(ri_data%n_mem + 1) = ri_data%ends_array_mem_block(ri_data%n_mem) + 1
889
890 CALL dbt_batched_contract_init(ri_data%t_3c_int_ctr_3(1, 1), batch_range_1=batch_ranges_ri, &
891 batch_range_2=batch_ranges_ao)
892 CALL dbt_batched_contract_init(t_3c_2, batch_range_1=batch_ranges_ri, &
893 batch_range_2=batch_ranges_ao)
894
895 DO i_mem = 1, ri_data%n_mem_RI
896 bounds_i(:, 1) = [ri_data%starts_array_RI_mem(i_mem), ri_data%ends_array_RI_mem(i_mem)]
897
898 CALL dbt_batched_contract_init(ri_data%t_2c_int(1, 1))
899 DO j_mem = 1, ri_data%n_mem
900 bounds_j(:, 1) = [ri_data%starts_array_mem(j_mem), ri_data%ends_array_mem(j_mem)]
901 bounds_j(:, 2) = [1, dims_3c(3)]
902 CALL timeset(routinen//"_RIx3C", handle2)
903 CALL dbt_contract(1.0_dp, ri_data%t_2c_int(1, 1), ri_data%t_3c_int_ctr_3(1, 1), &
904 0.0_dp, t_3c_2, &
905 contract_1=[2], notcontract_1=[1], &
906 contract_2=[1], notcontract_2=[2, 3], &
907 map_1=[1], map_2=[2, 3], filter_eps=ri_data%filter_eps_storage, &
908 bounds_2=bounds_i, &
909 bounds_3=bounds_j, &
910 unit_nr=unit_nr_dbcsr, &
911 flop=nflop)
912
913 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
914 CALL timestop(handle2)
915
916 CALL timeset(routinen//"_copy_2", handle2)
917 CALL dbt_copy(t_3c_2, ri_data%t_3c_int_ctr_1(1, 1), order=[2, 1, 3], move_data=.true.)
918
919 CALL get_tensor_occupancy(ri_data%t_3c_int_ctr_1(1, 1), nze, occ)
920 nze_o = nze_o + nze
921
922 CALL compress_tensor(ri_data%t_3c_int_ctr_1(1, 1), ri_data%blk_indices(j_mem, i_mem)%ind, &
923 ri_data%store_3c(j_mem, i_mem), ri_data%filter_eps_storage, memory_3c)
924
925 CALL timestop(handle2)
926 END DO
927 CALL dbt_batched_contract_finalize(ri_data%t_2c_int(1, 1))
928 END DO
929 CALL dbt_batched_contract_finalize(t_3c_2)
930 CALL dbt_batched_contract_finalize(ri_data%t_3c_int_ctr_3(1, 1))
931
932 CALL para_env%sum(memory_3c)
933 compression_factor = real(nze_o, dp)*1.0e-06*8.0_dp/memory_3c
934
935 IF (unit_nr > 0) THEN
936 WRITE (unit=unit_nr, fmt="((T3,A,T66,F11.2,A4))") &
937 "MEMORY_INFO| Memory for 3-center HFX integrals (compressed):", memory_3c, ' MiB'
938
939 WRITE (unit=unit_nr, fmt="((T3,A,T60,F21.2))") &
940 "MEMORY_INFO| Compression factor: ", compression_factor
941 END IF
942
943 CALL dbt_clear(ri_data%t_2c_int(1, 1))
944 CALL dbt_destroy(t_3c_2)
945
946 CALL dbt_copy(ri_data%t_3c_int_ctr_3(1, 1), ri_data%t_3c_int_ctr_2(1, 1), order=[2, 1, 3], move_data=.true.)
947
948 CALL timestop(handle)
949 END SUBROUTINE
950
951! **************************************************************************************************
952!> \brief Sorts 2d indices w.r.t. rows and columns
953!> \param blk_ind ...
954! **************************************************************************************************
955 SUBROUTINE sort_unique_blkind_2d(blk_ind)
956 INTEGER, ALLOCATABLE, DIMENSION(:, :), &
957 INTENT(INOUT) :: blk_ind
958
959 INTEGER :: end_ind, iblk, iblk_all, irow, nblk, &
960 ncols, start_ind
961 INTEGER, ALLOCATABLE, DIMENSION(:) :: ind_1, ind_2, sort_1, sort_2
962 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: blk_ind_tmp
963
964 nblk = SIZE(blk_ind, 1)
965
966 ALLOCATE (sort_1(nblk))
967 ALLOCATE (ind_1(nblk))
968
969 sort_1(:) = blk_ind(:, 1)
970 CALL sort(sort_1, nblk, ind_1)
971
972 blk_ind(:, :) = blk_ind(ind_1, :)
973
974 start_ind = 1
975
976 DO WHILE (start_ind <= nblk)
977 irow = blk_ind(start_ind, 1)
978 end_ind = start_ind
979
980 IF (end_ind + 1 <= nblk) THEN
981 DO WHILE (blk_ind(end_ind + 1, 1) == irow)
982 end_ind = end_ind + 1
983 IF (end_ind + 1 > nblk) EXIT
984 END DO
985 END IF
986
987 ncols = end_ind - start_ind + 1
988 ALLOCATE (sort_2(ncols))
989 ALLOCATE (ind_2(ncols))
990 sort_2(:) = blk_ind(start_ind:end_ind, 2)
991 CALL sort(sort_2, ncols, ind_2)
992 ind_2 = ind_2 + start_ind - 1
993
994 blk_ind(start_ind:end_ind, :) = blk_ind(ind_2, :)
995 start_ind = end_ind + 1
996
997 DEALLOCATE (sort_2, ind_2)
998 END DO
999
1000 ALLOCATE (blk_ind_tmp(nblk, 2))
1001 blk_ind_tmp = 0
1002
1003 iblk = 0
1004 DO iblk_all = 1, nblk
1005 IF (iblk >= 1) THEN
1006 IF (all(blk_ind_tmp(iblk, :) == blk_ind(iblk_all, :))) THEN
1007 cycle
1008 END IF
1009 END IF
1010 iblk = iblk + 1
1011 blk_ind_tmp(iblk, :) = blk_ind(iblk_all, :)
1012 END DO
1013 nblk = iblk
1014
1015 DEALLOCATE (blk_ind)
1016 ALLOCATE (blk_ind(nblk, 2))
1017
1018 blk_ind(:, :) = blk_ind_tmp(:nblk, :)
1019
1020 END SUBROUTINE
1021
1022! **************************************************************************************************
1023!> \brief ...
1024!> \param qs_env ...
1025!> \param ri_data ...
1026!> \param ks_matrix ...
1027!> \param ehfx ...
1028!> \param mos ...
1029!> \param rho_ao ...
1030!> \param geometry_did_change ...
1031!> \param nspins ...
1032!> \param hf_fraction ...
1033! **************************************************************************************************
1034 SUBROUTINE hfx_ri_update_ks(qs_env, ri_data, ks_matrix, ehfx, mos, rho_ao, &
1035 geometry_did_change, nspins, hf_fraction)
1036 TYPE(qs_environment_type), POINTER :: qs_env
1037 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
1038 TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: ks_matrix
1039 REAL(kind=dp), INTENT(OUT) :: ehfx
1040 TYPE(mo_set_type), DIMENSION(:), INTENT(IN), &
1041 OPTIONAL :: mos
1042 TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: rho_ao
1043 LOGICAL, INTENT(IN) :: geometry_did_change
1044 INTEGER, INTENT(IN) :: nspins
1045 REAL(kind=dp), INTENT(IN) :: hf_fraction
1046
1047 CHARACTER(LEN=*), PARAMETER :: routinen = 'hfx_ri_update_ks'
1048
1049 CHARACTER(1) :: mtype
1050 INTEGER :: handle, handle2, i, ispin, j
1051 INTEGER(int_8) :: nblks
1052 INTEGER, DIMENSION(2) :: homo
1053 LOGICAL :: is_antisymmetric
1054 REAL(dp) :: etmp, fac
1055 REAL(kind=dp), DIMENSION(:), POINTER :: mo_eigenvalues
1056 TYPE(cp_fm_type), POINTER :: mo_coeff
1057 TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: my_ks_matrix, my_rho_ao
1058 TYPE(dbcsr_type), DIMENSION(2) :: mo_coeff_b
1059 TYPE(dbcsr_type), POINTER :: mo_coeff_b_tmp
1060 TYPE(mp_para_env_type), POINTER :: para_env
1061
1062 CALL timeset(routinen, handle)
1063
1064 IF (nspins == 1) THEN
1065 fac = 0.5_dp*hf_fraction
1066 ELSE
1067 fac = 1.0_dp*hf_fraction
1068 END IF
1069
1070 !If incoming assymetric matrices, need to convert to normal
1071 NULLIFY (my_ks_matrix, my_rho_ao)
1072 CALL dbcsr_get_info(ks_matrix(1, 1)%matrix, matrix_type=mtype)
1073 is_antisymmetric = mtype == dbcsr_type_antisymmetric
1074 IF (is_antisymmetric) THEN
1075 ALLOCATE (my_ks_matrix(SIZE(ks_matrix, 1), SIZE(ks_matrix, 2)))
1076 ALLOCATE (my_rho_ao(SIZE(rho_ao, 1), SIZE(rho_ao, 2)))
1077
1078 DO i = 1, SIZE(ks_matrix, 1)
1079 DO j = 1, SIZE(ks_matrix, 2)
1080 ALLOCATE (my_ks_matrix(i, j)%matrix, my_rho_ao(i, j)%matrix)
1081 CALL dbcsr_create(my_ks_matrix(i, j)%matrix, template=ks_matrix(i, j)%matrix, &
1082 matrix_type=dbcsr_type_no_symmetry)
1083 CALL dbcsr_desymmetrize(ks_matrix(i, j)%matrix, my_ks_matrix(i, j)%matrix)
1084 CALL dbcsr_create(my_rho_ao(i, j)%matrix, template=rho_ao(i, j)%matrix, &
1085 matrix_type=dbcsr_type_no_symmetry)
1086 CALL dbcsr_desymmetrize(rho_ao(i, j)%matrix, my_rho_ao(i, j)%matrix)
1087 END DO
1088 END DO
1089 ELSE
1090 my_ks_matrix => ks_matrix
1091 my_rho_ao => rho_ao
1092 END IF
1093
1094 !Case analysis on RI_FLAVOR: we switch if the input flavor is MO, there is no provided MO, and
1095 ! the current flavor is not yet RHO. We switch back to MO if there are
1096 ! MOs available and the current flavor is actually RHO
1097 IF (ri_data%input_flavor == ri_mo .AND. (.NOT. PRESENT(mos)) .AND. ri_data%flavor == ri_mo) THEN
1098 CALL switch_ri_flavor(ri_data, qs_env)
1099 ELSE IF (ri_data%input_flavor == ri_mo .AND. PRESENT(mos) .AND. ri_data%flavor == ri_pmat) THEN
1100 CALL switch_ri_flavor(ri_data, qs_env)
1101 END IF
1102
1103 SELECT CASE (ri_data%flavor)
1104 CASE (ri_mo)
1105 cpassert(PRESENT(mos))
1106 CALL timeset(routinen//"_MO", handle2)
1107
1108 DO ispin = 1, nspins
1109 NULLIFY (mo_coeff_b_tmp)
1110 cpassert(mos(ispin)%uniform_occupation)
1111 CALL get_mo_set(mo_set=mos(ispin), mo_coeff=mo_coeff, eigenvalues=mo_eigenvalues, mo_coeff_b=mo_coeff_b_tmp)
1112
1113 IF (.NOT. mos(ispin)%use_mo_coeff_b) CALL copy_fm_to_dbcsr(mo_coeff, mo_coeff_b_tmp)
1114 CALL dbcsr_copy(mo_coeff_b(ispin), mo_coeff_b_tmp)
1115 END DO
1116
1117 DO ispin = 1, nspins
1118 CALL dbcsr_scale(mo_coeff_b(ispin), sqrt(mos(ispin)%maxocc))
1119 homo(ispin) = mos(ispin)%homo
1120 END DO
1121
1122 CALL timestop(handle2)
1123
1124 CALL hfx_ri_update_ks_mo(qs_env, ri_data, my_ks_matrix, mo_coeff_b, homo, &
1125 geometry_did_change, nspins, fac)
1126 CASE (ri_pmat)
1127
1128 NULLIFY (para_env)
1129 CALL get_qs_env(qs_env, para_env=para_env)
1130 DO ispin = 1, SIZE(my_rho_ao, 1)
1131 nblks = dbcsr_get_num_blocks(my_rho_ao(ispin, 1)%matrix)
1132 CALL para_env%sum(nblks)
1133 IF (nblks == 0) THEN
1134 cpabort("received empty density matrix")
1135 END IF
1136 END DO
1137
1138 CALL hfx_ri_update_ks_pmat(qs_env, ri_data, my_ks_matrix, my_rho_ao, &
1139 geometry_did_change, nspins, fac)
1140
1141 END SELECT
1142
1143 DO ispin = 1, nspins
1144 CALL dbcsr_release(mo_coeff_b(ispin))
1145 END DO
1146
1147 DO ispin = 1, nspins
1148 CALL dbcsr_filter(my_ks_matrix(ispin, 1)%matrix, ri_data%filter_eps)
1149 END DO
1150
1151 CALL timeset(routinen//"_energy", handle2)
1152 ! Calculate the exchange energy
1153 ehfx = 0.0_dp
1154 DO ispin = 1, nspins
1155 CALL dbcsr_dot(my_ks_matrix(ispin, 1)%matrix, my_rho_ao(ispin, 1)%matrix, &
1156 etmp)
1157 ehfx = ehfx + 0.5_dp*etmp
1158
1159 END DO
1160 CALL timestop(handle2)
1161
1162 !Anti-symmetric case
1163 IF (is_antisymmetric) THEN
1164 DO i = 1, SIZE(ks_matrix, 1)
1165 DO j = 1, SIZE(ks_matrix, 2)
1166 CALL dbcsr_complete_redistribute(my_ks_matrix(i, j)%matrix, ks_matrix(i, j)%matrix)
1167 CALL dbcsr_complete_redistribute(my_rho_ao(i, j)%matrix, rho_ao(i, j)%matrix)
1168 END DO
1169 END DO
1170 CALL dbcsr_deallocate_matrix_set(my_ks_matrix)
1171 CALL dbcsr_deallocate_matrix_set(my_rho_ao)
1172 END IF
1173
1174 CALL timestop(handle)
1175 END SUBROUTINE
1176
1177! **************************************************************************************************
1178!> \brief Calculate Fock (AKA Kohn-Sham) matrix in MO flavor
1179!>
1180!> C(mu, i) (MO coefficients)
1181!> M(mu, i, R) = sum_nu B(mu, nu, R) C(nu, i)
1182!> KS(mu, lambda) = sum_{i,R} M(mu, i, R) M(lambda, i, R)
1183!> \param qs_env ...
1184!> \param ri_data ...
1185!> \param ks_matrix ...
1186!> \param mo_coeff C(mu, i)
1187!> \param homo ...
1188!> \param geometry_did_change ...
1189!> \param nspins ...
1190!> \param fac ...
1191! **************************************************************************************************
1192 SUBROUTINE hfx_ri_update_ks_mo(qs_env, ri_data, ks_matrix, mo_coeff, &
1193 homo, geometry_did_change, nspins, fac)
1194 TYPE(qs_environment_type), POINTER :: qs_env
1195 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
1196 TYPE(dbcsr_p_type), DIMENSION(:, :) :: ks_matrix
1197 TYPE(dbcsr_type), DIMENSION(:), INTENT(IN) :: mo_coeff
1198 INTEGER, DIMENSION(:) :: homo
1199 LOGICAL, INTENT(IN) :: geometry_did_change
1200 INTEGER, INTENT(IN) :: nspins
1201 REAL(dp), INTENT(IN) :: fac
1202
1203 CHARACTER(LEN=*), PARAMETER :: routinen = 'hfx_ri_update_ks_mo'
1204
1205 INTEGER :: bsize, bsum, comm_2d_handle, handle, &
1206 handle2, i_mem, iblock, iproc, ispin, &
1207 n_mem, n_mos, nblock, unit_nr_dbcsr
1208 INTEGER(int_8) :: nblks, nflop
1209 INTEGER, ALLOCATABLE, DIMENSION(:) :: batch_ranges_1, batch_ranges_2, dist1, dist2, dist3, &
1210 mem_end, mem_end_block_1, mem_end_block_2, mem_size, mem_start, mem_start_block_1, &
1211 mem_start_block_2, mo_bsizes_1, mo_bsizes_2
1212 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: bounds
1213 INTEGER, DIMENSION(2) :: pdims_2d
1214 INTEGER, DIMENSION(3) :: pdims
1215 LOGICAL :: do_initialize
1216 REAL(dp) :: t1, t2
1217 TYPE(dbcsr_distribution_type) :: ks_dist
1218 TYPE(dbt_pgrid_type) :: pgrid, pgrid_2d
1219 TYPE(dbt_type) :: ks_t, ks_t_mat, mo_coeff_t, &
1220 mo_coeff_t_split
1221 TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :) :: t_3c_int_mo_1, t_3c_int_mo_2
1222 TYPE(mp_comm_type) :: comm_2d
1223 TYPE(mp_para_env_type), POINTER :: para_env
1224
1225 CALL timeset(routinen, handle)
1226
1227 cpassert(SIZE(ks_matrix, 2) == 1)
1228
1229 unit_nr_dbcsr = ri_data%unit_nr_dbcsr
1230
1231 IF (geometry_did_change) THEN
1232 CALL hfx_ri_pre_scf_mo(qs_env, ri_data, nspins)
1233 END IF
1234
1235 nblks = dbt_get_num_blocks_total(ri_data%t_3c_int_ctr_1(1, 1))
1236 IF (nblks == 0) THEN
1237 cpabort("3-center integrals are not available (first call requires geometry_did_change=.TRUE.)")
1238 END IF
1239
1240 DO ispin = 1, nspins
1241 nblks = dbt_get_num_blocks_total(ri_data%t_2c_int(ispin, 1))
1242 IF (nblks == 0) THEN
1243 cpabort("2-center integrals are not available (first call requires geometry_did_change=.TRUE.)")
1244 END IF
1245 END DO
1246
1247 IF (.NOT. ALLOCATED(ri_data%t_3c_int_mo)) THEN
1248 do_initialize = .true.
1249 cpassert(.NOT. ALLOCATED(ri_data%t_3c_ctr_RI))
1250 cpassert(.NOT. ALLOCATED(ri_data%t_3c_ctr_KS))
1251 cpassert(.NOT. ALLOCATED(ri_data%t_3c_ctr_KS_copy))
1252 ALLOCATE (ri_data%t_3c_int_mo(nspins, 1, 1))
1253 ALLOCATE (ri_data%t_3c_ctr_RI(nspins, 1, 1))
1254 ALLOCATE (ri_data%t_3c_ctr_KS(nspins, 1, 1))
1255 ALLOCATE (ri_data%t_3c_ctr_KS_copy(nspins, 1, 1))
1256 ELSE
1257 do_initialize = .false.
1258 END IF
1259
1260 CALL get_qs_env(qs_env, para_env=para_env)
1261
1262 ALLOCATE (bounds(2, 1))
1263
1264 CALL dbcsr_get_info(ks_matrix(1, 1)%matrix, distribution=ks_dist)
1265 CALL dbcsr_distribution_get(ks_dist, group=comm_2d_handle, nprows=pdims_2d(1), npcols=pdims_2d(2))
1266
1267 CALL comm_2d%set_handle(comm_2d_handle)
1268 pgrid_2d = dbt_nd_mp_comm(comm_2d, [1], [2], pdims_2d=pdims_2d)
1269
1270 CALL create_2c_tensor(ks_t, dist1, dist2, pgrid_2d, ri_data%bsizes_AO_fit, ri_data%bsizes_AO_fit, &
1271 name="(AO | AO)")
1272
1273 DEALLOCATE (dist1, dist2)
1274
1275 CALL para_env%sync()
1276 t1 = m_walltime()
1277
1278 ALLOCATE (t_3c_int_mo_1(1, 1), t_3c_int_mo_2(1, 1))
1279 DO ispin = 1, nspins
1280
1281 CALL dbcsr_get_info(mo_coeff(ispin), nfullcols_total=n_mos)
1282 ALLOCATE (mo_bsizes_2(n_mos))
1283 mo_bsizes_2 = 1
1284
1285 CALL create_tensor_batches(mo_bsizes_2, ri_data%n_mem, mem_start, mem_end, &
1286 mem_start_block_2, mem_end_block_2)
1287 n_mem = ri_data%n_mem
1288 ALLOCATE (mem_size(n_mem))
1289
1290 DO i_mem = 1, n_mem
1291 bsize = sum(mo_bsizes_2(mem_start_block_2(i_mem):mem_end_block_2(i_mem)))
1292 mem_size(i_mem) = bsize
1293 END DO
1294
1295 CALL split_block_sizes(mem_size, mo_bsizes_1, ri_data%max_bsize_MO)
1296 ALLOCATE (mem_start_block_1(n_mem))
1297 ALLOCATE (mem_end_block_1(n_mem))
1298 nblock = SIZE(mo_bsizes_1)
1299 iblock = 0
1300 DO i_mem = 1, n_mem
1301 bsum = 0
1302 DO
1303 iblock = iblock + 1
1304 cpassert(iblock <= nblock)
1305 bsum = bsum + mo_bsizes_1(iblock)
1306 IF (bsum == mem_size(i_mem)) THEN
1307 IF (i_mem == 1) THEN
1308 mem_start_block_1(i_mem) = 1
1309 ELSE
1310 mem_start_block_1(i_mem) = mem_end_block_1(i_mem - 1) + 1
1311 END IF
1312 mem_end_block_1(i_mem) = iblock
1313 EXIT
1314 END IF
1315 END DO
1316 END DO
1317
1318 ALLOCATE (batch_ranges_1(ri_data%n_mem + 1))
1319 batch_ranges_1(:ri_data%n_mem) = mem_start_block_1(:)
1320 batch_ranges_1(ri_data%n_mem + 1) = mem_end_block_1(ri_data%n_mem) + 1
1321
1322 ALLOCATE (batch_ranges_2(ri_data%n_mem + 1))
1323 batch_ranges_2(:ri_data%n_mem) = mem_start_block_2(:)
1324 batch_ranges_2(ri_data%n_mem + 1) = mem_end_block_2(ri_data%n_mem) + 1
1325
1326 iproc = para_env%mepos
1327
1328 CALL create_3c_tensor(t_3c_int_mo_1(1, 1), dist1, dist2, dist3, ri_data%pgrid_1, &
1329 ri_data%bsizes_AO_split, ri_data%bsizes_RI_split, mo_bsizes_1, &
1330 [1, 2], [3], &
1331 name="(AO RI | MO)")
1332
1333 DEALLOCATE (dist1, dist2, dist3)
1334
1335 CALL create_3c_tensor(t_3c_int_mo_2(1, 1), dist1, dist2, dist3, ri_data%pgrid_2, &
1336 mo_bsizes_1, ri_data%bsizes_RI_split, ri_data%bsizes_AO_split, &
1337 [1], [2, 3], &
1338 name="(MO | RI AO)")
1339
1340 DEALLOCATE (dist1, dist2, dist3)
1341
1342 CALL create_2c_tensor(mo_coeff_t_split, dist1, dist2, pgrid_2d, ri_data%bsizes_AO_split, mo_bsizes_1, &
1343 name="(AO | MO)")
1344
1345 DEALLOCATE (dist1, dist2)
1346
1347 cpassert(homo(ispin)/ri_data%n_mem > 0)
1348
1349 IF (do_initialize) THEN
1350 pdims(:) = 0
1351
1352 CALL dbt_pgrid_create(para_env, pdims, pgrid, &
1353 tensor_dims=[SIZE(ri_data%bsizes_RI_fit), &
1354 (homo(ispin) - 1)/ri_data%n_mem + 1, &
1355 SIZE(ri_data%bsizes_AO_fit)])
1356 CALL create_3c_tensor(ri_data%t_3c_int_mo(ispin, 1, 1), dist1, dist2, dist3, pgrid, &
1357 ri_data%bsizes_RI_fit, mo_bsizes_2, ri_data%bsizes_AO_fit, &
1358 [1], [2, 3], &
1359 name="(RI | MO AO)")
1360
1361 DEALLOCATE (dist1, dist2, dist3)
1362
1363 CALL create_3c_tensor(ri_data%t_3c_ctr_KS(ispin, 1, 1), dist1, dist2, dist3, pgrid, &
1364 ri_data%bsizes_RI_fit, mo_bsizes_2, ri_data%bsizes_AO_fit, &
1365 [1, 2], [3], &
1366 name="(RI MO | AO)")
1367 DEALLOCATE (dist1, dist2, dist3)
1368 CALL dbt_pgrid_destroy(pgrid)
1369
1370 CALL dbt_create(ri_data%t_3c_int_mo(ispin, 1, 1), ri_data%t_3c_ctr_RI(ispin, 1, 1), name="(RI | MO AO)")
1371 CALL dbt_create(ri_data%t_3c_ctr_KS(ispin, 1, 1), ri_data%t_3c_ctr_KS_copy(ispin, 1, 1))
1372 END IF
1373
1374 CALL dbt_create(mo_coeff(ispin), mo_coeff_t, name="MO coeffs")
1375 CALL dbt_copy_matrix_to_tensor(mo_coeff(ispin), mo_coeff_t)
1376 CALL dbt_copy(mo_coeff_t, mo_coeff_t_split, move_data=.true.)
1377 CALL dbt_filter(mo_coeff_t_split, ri_data%filter_eps_mo)
1378 CALL dbt_destroy(mo_coeff_t)
1379
1380 CALL dbt_batched_contract_init(ks_t)
1381 CALL dbt_batched_contract_init(ri_data%t_3c_ctr_KS(ispin, 1, 1), batch_range_2=batch_ranges_2)
1382 CALL dbt_batched_contract_init(ri_data%t_3c_ctr_KS_copy(ispin, 1, 1), batch_range_2=batch_ranges_2)
1383
1384 CALL dbt_batched_contract_init(ri_data%t_2c_int(ispin, 1))
1385 CALL dbt_batched_contract_init(ri_data%t_3c_int_mo(ispin, 1, 1), batch_range_2=batch_ranges_2)
1386 CALL dbt_batched_contract_init(ri_data%t_3c_ctr_RI(ispin, 1, 1), batch_range_2=batch_ranges_2)
1387
1388 DO i_mem = 1, n_mem
1389
1390 bounds(:, 1) = [mem_start(i_mem), mem_end(i_mem)]
1391
1392 CALL dbt_batched_contract_init(mo_coeff_t_split)
1393 CALL dbt_batched_contract_init(ri_data%t_3c_int_ctr_1(1, 1))
1394 CALL dbt_batched_contract_init(t_3c_int_mo_1(1, 1), &
1395 batch_range_3=batch_ranges_1)
1396 CALL timeset(routinen//"_MOx3C_R", handle2)
1397 CALL dbt_contract(1.0_dp, mo_coeff_t_split, ri_data%t_3c_int_ctr_1(1, 1), &
1398 0.0_dp, t_3c_int_mo_1(1, 1), &
1399 contract_1=[1], notcontract_1=[2], &
1400 contract_2=[3], notcontract_2=[1, 2], &
1401 map_1=[3], map_2=[1, 2], &
1402 bounds_2=bounds, &
1403 filter_eps=ri_data%filter_eps_mo/2, &
1404 unit_nr=unit_nr_dbcsr, &
1405 move_data=.false., &
1406 flop=nflop)
1407
1408 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1409
1410 CALL timestop(handle2)
1411 CALL dbt_batched_contract_finalize(mo_coeff_t_split)
1412 CALL dbt_batched_contract_finalize(ri_data%t_3c_int_ctr_1(1, 1))
1413 CALL dbt_batched_contract_finalize(t_3c_int_mo_1(1, 1))
1414
1415 CALL timeset(routinen//"_copy_1", handle2)
1416 CALL dbt_copy(t_3c_int_mo_1(1, 1), ri_data%t_3c_int_mo(ispin, 1, 1), order=[3, 1, 2], move_data=.true.)
1417 CALL timestop(handle2)
1418
1419 CALL dbt_batched_contract_init(mo_coeff_t_split)
1420 CALL dbt_batched_contract_init(ri_data%t_3c_int_ctr_2(1, 1))
1421 CALL dbt_batched_contract_init(t_3c_int_mo_2(1, 1), &
1422 batch_range_1=batch_ranges_1)
1423
1424 CALL timeset(routinen//"_MOx3C_L", handle2)
1425 CALL dbt_contract(1.0_dp, mo_coeff_t_split, ri_data%t_3c_int_ctr_2(1, 1), &
1426 0.0_dp, t_3c_int_mo_2(1, 1), &
1427 contract_1=[1], notcontract_1=[2], &
1428 contract_2=[1], notcontract_2=[2, 3], &
1429 map_1=[1], map_2=[2, 3], &
1430 bounds_2=bounds, &
1431 filter_eps=ri_data%filter_eps_mo/2, &
1432 unit_nr=unit_nr_dbcsr, &
1433 move_data=.false., &
1434 flop=nflop)
1435
1436 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1437
1438 CALL timestop(handle2)
1439
1440 CALL dbt_batched_contract_finalize(mo_coeff_t_split)
1441 CALL dbt_batched_contract_finalize(ri_data%t_3c_int_ctr_2(1, 1))
1442 CALL dbt_batched_contract_finalize(t_3c_int_mo_2(1, 1))
1443
1444 CALL timeset(routinen//"_copy_1", handle2)
1445 CALL dbt_copy(t_3c_int_mo_2(1, 1), ri_data%t_3c_int_mo(ispin, 1, 1), order=[2, 1, 3], &
1446 summation=.true., move_data=.true.)
1447
1448 CALL dbt_filter(ri_data%t_3c_int_mo(ispin, 1, 1), ri_data%filter_eps_mo)
1449 CALL timestop(handle2)
1450
1451 CALL timeset(routinen//"_RIx3C", handle2)
1452
1453 CALL dbt_contract(1.0_dp, ri_data%t_2c_int(ispin, 1), ri_data%t_3c_int_mo(ispin, 1, 1), &
1454 0.0_dp, ri_data%t_3c_ctr_RI(ispin, 1, 1), &
1455 contract_1=[1], notcontract_1=[2], &
1456 contract_2=[1], notcontract_2=[2, 3], &
1457 map_1=[1], map_2=[2, 3], filter_eps=ri_data%filter_eps, &
1458 unit_nr=unit_nr_dbcsr, &
1459 flop=nflop)
1460
1461 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1462
1463 CALL timestop(handle2)
1464
1465 CALL timeset(routinen//"_copy_2", handle2)
1466
1467 ! note: this copy should not involve communication (same block sizes, same 3d distribution on same process grid)
1468 CALL dbt_copy(ri_data%t_3c_ctr_RI(ispin, 1, 1), ri_data%t_3c_ctr_KS(ispin, 1, 1), move_data=.true.)
1469 CALL dbt_copy(ri_data%t_3c_ctr_KS(ispin, 1, 1), ri_data%t_3c_ctr_KS_copy(ispin, 1, 1))
1470 CALL timestop(handle2)
1471
1472 CALL timeset(routinen//"_3Cx3C", handle2)
1473 CALL dbt_contract(-fac, ri_data%t_3c_ctr_KS(ispin, 1, 1), ri_data%t_3c_ctr_KS_copy(ispin, 1, 1), &
1474 1.0_dp, ks_t, &
1475 contract_1=[1, 2], notcontract_1=[3], &
1476 contract_2=[1, 2], notcontract_2=[3], &
1477 map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps/n_mem, &
1478 unit_nr=unit_nr_dbcsr, move_data=.true., &
1479 flop=nflop)
1480
1481 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1482
1483 CALL timestop(handle2)
1484 END DO
1485
1486 CALL dbt_batched_contract_finalize(ks_t)
1487 CALL dbt_batched_contract_finalize(ri_data%t_3c_ctr_KS(ispin, 1, 1))
1488 CALL dbt_batched_contract_finalize(ri_data%t_3c_ctr_KS_copy(ispin, 1, 1))
1489
1490 CALL dbt_batched_contract_finalize(ri_data%t_2c_int(ispin, 1))
1491 CALL dbt_batched_contract_finalize(ri_data%t_3c_int_mo(ispin, 1, 1))
1492 CALL dbt_batched_contract_finalize(ri_data%t_3c_ctr_RI(ispin, 1, 1))
1493
1494 CALL dbt_destroy(t_3c_int_mo_1(1, 1))
1495 CALL dbt_destroy(t_3c_int_mo_2(1, 1))
1496 CALL dbt_clear(ri_data%t_3c_int_mo(ispin, 1, 1))
1497
1498 CALL dbt_destroy(mo_coeff_t_split)
1499
1500 CALL dbt_filter(ks_t, ri_data%filter_eps)
1501
1502 CALL dbt_create(ks_matrix(ispin, 1)%matrix, ks_t_mat)
1503 CALL dbt_copy(ks_t, ks_t_mat, move_data=.true.)
1504 CALL dbt_copy_tensor_to_matrix(ks_t_mat, ks_matrix(ispin, 1)%matrix, summation=.true.)
1505 CALL dbt_destroy(ks_t_mat)
1506
1507 DEALLOCATE (mem_end, mem_start, mo_bsizes_2, mem_size, mem_start_block_1, mem_end_block_1, &
1508 mem_start_block_2, mem_end_block_2, batch_ranges_1, batch_ranges_2)
1509
1510 END DO
1511
1512 CALL dbt_pgrid_destroy(pgrid_2d)
1513 CALL dbt_destroy(ks_t)
1514
1515 CALL para_env%sync()
1516 t2 = m_walltime()
1517
1518 ri_data%dbcsr_time = ri_data%dbcsr_time + t2 - t1
1519
1520 CALL timestop(handle)
1521
1522 END SUBROUTINE
1523
1524! **************************************************************************************************
1525!> \brief Calculate Fock (AKA Kohn-Sham) matrix in rho flavor
1526!>
1527!> M(mu, lambda, R) = sum_{nu} int_3c(mu, nu, R) P(nu, lambda)
1528!> KS(mu, lambda) = sum_{nu,R} B(mu, nu, R) M(lambda, nu, R)
1529!> \param qs_env ...
1530!> \param ri_data ...
1531!> \param ks_matrix ...
1532!> \param rho_ao ...
1533!> \param geometry_did_change ...
1534!> \param nspins ...
1535!> \param fac ...
1536! **************************************************************************************************
1537 SUBROUTINE hfx_ri_update_ks_pmat(qs_env, ri_data, ks_matrix, rho_ao, &
1538 geometry_did_change, nspins, fac)
1539 TYPE(qs_environment_type), POINTER :: qs_env
1540 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
1541 TYPE(dbcsr_p_type), DIMENSION(:, :) :: ks_matrix, rho_ao
1542 LOGICAL, INTENT(IN) :: geometry_did_change
1543 INTEGER, INTENT(IN) :: nspins
1544 REAL(dp), INTENT(IN) :: fac
1545
1546 CHARACTER(LEN=*), PARAMETER :: routinen = 'hfx_ri_update_ks_Pmat'
1547
1548 INTEGER :: handle, handle2, i_mem, ispin, j_mem, &
1549 n_mem, n_mem_ri, unit_nr, unit_nr_dbcsr
1550 INTEGER(int_8) :: flops_ks_max, flops_p_max, nblks, nflop, &
1551 nze, nze_3c, nze_3c_1, nze_3c_2, &
1552 nze_ks, nze_rho
1553 INTEGER, ALLOCATABLE, DIMENSION(:) :: batch_ranges_ao, batch_ranges_ri, dist1, &
1554 dist2
1555 INTEGER, DIMENSION(2, 1) :: bounds_i
1556 INTEGER, DIMENSION(2, 2) :: bounds_ij, bounds_j
1557 INTEGER, DIMENSION(3) :: dims_3c
1558 REAL(dp) :: memory_3c, occ, occ_3c, occ_3c_1, &
1559 occ_3c_2, occ_ks, occ_rho, t1, t2, &
1560 unused
1561 TYPE(dbt_type) :: ks_t, ks_tmp, rho_ao_tmp, t_3c_1, &
1562 t_3c_3, tensor_old
1563 TYPE(mp_para_env_type), POINTER :: para_env
1564
1565 IF (.NOT. fac > epsilon(0.0_dp)) RETURN
1566
1567 CALL timeset(routinen, handle)
1568
1569 NULLIFY (para_env)
1570
1571 ! get a useful output_unit
1572 unit_nr_dbcsr = ri_data%unit_nr_dbcsr
1573 unit_nr = ri_data%unit_nr
1574
1575 CALL get_qs_env(qs_env, para_env=para_env)
1576
1577 cpassert(SIZE(ks_matrix, 2) == 1)
1578
1579 IF (geometry_did_change) THEN
1580 CALL hfx_ri_pre_scf_pmat(qs_env, ri_data)
1581 DO ispin = 1, nspins
1582 CALL dbt_scale(ri_data%rho_ao_t(ispin, 1), 0.0_dp)
1583 CALL dbt_scale(ri_data%ks_t(ispin, 1), 0.0_dp)
1584 END DO
1585 END IF
1586
1587 nblks = dbt_get_num_blocks_total(ri_data%t_3c_int_ctr_2(1, 1))
1588 IF (nblks == 0) THEN
1589 cpabort("3-center integrals are not available (first call requires geometry_did_change=.TRUE.)")
1590 END IF
1591
1592 n_mem = ri_data%n_mem
1593 n_mem_ri = ri_data%n_mem_RI
1594
1595 CALL dbt_create(ks_matrix(1, 1)%matrix, ks_tmp)
1596 CALL dbt_create(rho_ao(1, 1)%matrix, rho_ao_tmp)
1597
1598 CALL create_2c_tensor(ks_t, dist1, dist2, ri_data%pgrid_2d, &
1599 ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
1600 name="(AO | AO)")
1601 DEALLOCATE (dist1, dist2)
1602
1603 CALL dbt_create(ri_data%t_3c_int_ctr_2(1, 1), t_3c_1)
1604 CALL dbt_create(ri_data%t_3c_int_ctr_1(1, 1), t_3c_3)
1605
1606 CALL para_env%sync()
1607 t1 = m_walltime()
1608
1609 flops_ks_max = 0; flops_p_max = 0
1610
1611 ALLOCATE (batch_ranges_ri(ri_data%n_mem_RI + 1))
1612 ALLOCATE (batch_ranges_ao(ri_data%n_mem + 1))
1613 batch_ranges_ri(:ri_data%n_mem_RI) = ri_data%starts_array_RI_mem_block(:)
1614 batch_ranges_ri(ri_data%n_mem_RI + 1) = ri_data%ends_array_RI_mem_block(ri_data%n_mem_RI) + 1
1615 batch_ranges_ao(:ri_data%n_mem) = ri_data%starts_array_mem_block(:)
1616 batch_ranges_ao(ri_data%n_mem + 1) = ri_data%ends_array_mem_block(ri_data%n_mem) + 1
1617
1618 memory_3c = 0.0_dp
1619 DO ispin = 1, nspins
1620
1621 CALL get_tensor_occupancy(ri_data%t_3c_int_ctr_2(1, 1), nze_3c, occ_3c)
1622
1623 nze_rho = 0
1624 occ_rho = 0.0_dp
1625 nze_3c_1 = 0
1626 occ_3c_1 = 0.0_dp
1627 nze_3c_2 = 0
1628 occ_3c_2 = 0.0_dp
1629
1630 CALL dbt_copy_matrix_to_tensor(rho_ao(ispin, 1)%matrix, rho_ao_tmp)
1631
1632 !We work with Delta P: the diff between previous SCF step and this one, for increased sparsity
1633 CALL dbt_scale(ri_data%rho_ao_t(ispin, 1), -1.0_dp)
1634 CALL dbt_copy(rho_ao_tmp, ri_data%rho_ao_t(ispin, 1), summation=.true., move_data=.true.)
1635
1636 CALL get_tensor_occupancy(ri_data%rho_ao_t(ispin, 1), nze_rho, occ_rho)
1637
1638 CALL dbt_batched_contract_init(ri_data%t_3c_int_ctr_1(1, 1), batch_range_1=batch_ranges_ao, &
1639 batch_range_2=batch_ranges_ri)
1640 CALL dbt_batched_contract_init(t_3c_3, batch_range_1=batch_ranges_ao, batch_range_2=batch_ranges_ri)
1641
1642 CALL dbt_create(ri_data%t_3c_int_ctr_1(1, 1), tensor_old)
1643
1644 DO i_mem = 1, n_mem
1645
1646 CALL dbt_batched_contract_init(ri_data%rho_ao_t(ispin, 1))
1647 CALL dbt_batched_contract_init(ri_data%t_3c_int_ctr_2(1, 1), batch_range_2=batch_ranges_ri, &
1648 batch_range_3=batch_ranges_ao)
1649 CALL dbt_batched_contract_init(t_3c_1, batch_range_2=batch_ranges_ri, batch_range_3=batch_ranges_ao)
1650 DO j_mem = 1, n_mem_ri
1651
1652 CALL timeset(routinen//"_Px3C", handle2)
1653
1654 CALL dbt_get_info(t_3c_1, nfull_total=dims_3c)
1655 bounds_i(:, 1) = [ri_data%starts_array_mem(i_mem), ri_data%ends_array_mem(i_mem)]
1656 bounds_j(:, 1) = [1, dims_3c(1)]
1657 bounds_j(:, 2) = [ri_data%starts_array_RI_mem(j_mem), ri_data%ends_array_RI_mem(j_mem)]
1658
1659 CALL dbt_contract(1.0_dp, ri_data%rho_ao_t(ispin, 1), ri_data%t_3c_int_ctr_2(1, 1), &
1660 0.0_dp, t_3c_1, &
1661 contract_1=[2], notcontract_1=[1], &
1662 contract_2=[3], notcontract_2=[1, 2], &
1663 map_1=[3], map_2=[1, 2], filter_eps=ri_data%filter_eps, &
1664 bounds_2=bounds_i, &
1665 bounds_3=bounds_j, &
1666 unit_nr=unit_nr_dbcsr, &
1667 flop=nflop)
1668
1669 CALL timestop(handle2)
1670
1671 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1672
1673 CALL get_tensor_occupancy(t_3c_1, nze, occ)
1674 nze_3c_1 = nze_3c_1 + nze
1675 occ_3c_1 = occ_3c_1 + occ
1676
1677 CALL timeset(routinen//"_copy_2", handle2)
1678 CALL dbt_copy(t_3c_1, t_3c_3, order=[3, 2, 1], move_data=.true.)
1679 CALL timestop(handle2)
1680
1681 bounds_ij(:, 1) = [ri_data%starts_array_mem(i_mem), ri_data%ends_array_mem(i_mem)]
1682 bounds_ij(:, 2) = [ri_data%starts_array_RI_mem(j_mem), ri_data%ends_array_RI_mem(j_mem)]
1683
1684 CALL decompress_tensor(tensor_old, ri_data%blk_indices(i_mem, j_mem)%ind, &
1685 ri_data%store_3c(i_mem, j_mem), ri_data%filter_eps_storage)
1686
1687 CALL dbt_copy(tensor_old, ri_data%t_3c_int_ctr_1(1, 1), move_data=.true.)
1688
1689 CALL get_tensor_occupancy(ri_data%t_3c_int_ctr_1(1, 1), nze, occ)
1690 nze_3c_2 = nze_3c_2 + nze
1691 occ_3c_2 = occ_3c_2 + occ
1692 CALL timeset(routinen//"_KS", handle2)
1693 CALL dbt_batched_contract_init(ks_t)
1694 CALL dbt_contract(-fac, ri_data%t_3c_int_ctr_1(1, 1), t_3c_3, &
1695 1.0_dp, ks_t, &
1696 contract_1=[1, 2], notcontract_1=[3], &
1697 contract_2=[1, 2], notcontract_2=[3], &
1698 map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps/n_mem, &
1699 bounds_1=bounds_ij, &
1700 unit_nr=unit_nr_dbcsr, &
1701 flop=nflop, move_data=.true.)
1702
1703 CALL dbt_batched_contract_finalize(ks_t, unit_nr=unit_nr_dbcsr)
1704 CALL timestop(handle2)
1705
1706 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1707
1708 END DO
1709 CALL dbt_batched_contract_finalize(ri_data%rho_ao_t(ispin, 1), unit_nr=unit_nr_dbcsr)
1710 CALL dbt_batched_contract_finalize(ri_data%t_3c_int_ctr_2(1, 1))
1711 CALL dbt_batched_contract_finalize(t_3c_1)
1712 END DO
1713 CALL dbt_batched_contract_finalize(ri_data%t_3c_int_ctr_1(1, 1))
1714 CALL dbt_batched_contract_finalize(t_3c_3)
1715
1716 DO i_mem = 1, n_mem
1717 DO j_mem = 1, n_mem_ri
1718 associate(blk_indices => ri_data%blk_indices(i_mem, j_mem), t_3c => ri_data%t_3c_int_ctr_1(1, 1))
1719 CALL decompress_tensor(tensor_old, blk_indices%ind, &
1720 ri_data%store_3c(i_mem, j_mem), ri_data%filter_eps_storage)
1721 CALL dbt_copy(tensor_old, t_3c, move_data=.true.)
1722
1723 unused = 0
1724 CALL compress_tensor(t_3c, blk_indices%ind, ri_data%store_3c(i_mem, j_mem), &
1725 ri_data%filter_eps_storage, unused)
1726 END associate
1727 END DO
1728 END DO
1729
1730 CALL dbt_destroy(tensor_old)
1731
1732 CALL get_tensor_occupancy(ks_t, nze_ks, occ_ks)
1733
1734 !rho_ao_t holds the density difference, and ks_t is built upon it => need the full picture
1735 CALL dbt_copy_matrix_to_tensor(rho_ao(ispin, 1)%matrix, rho_ao_tmp)
1736 CALL dbt_copy(rho_ao_tmp, ri_data%rho_ao_t(ispin, 1), move_data=.true.)
1737 CALL dbt_copy(ks_t, ri_data%ks_t(ispin, 1), summation=.true., move_data=.true.)
1738
1739 CALL dbt_copy(ri_data%ks_t(ispin, 1), ks_tmp)
1740 CALL dbt_copy_tensor_to_matrix(ks_tmp, ks_matrix(ispin, 1)%matrix, summation=.true.)
1741 CALL dbt_clear(ks_tmp)
1742
1743 IF (unit_nr > 0 .AND. geometry_did_change) THEN
1744 WRITE (unit_nr, '(T6,A,T63,ES7.1,1X,A1,1X,F7.3,A1)') &
1745 'Occupancy of density matrix P:', real(nze_rho, dp), '/', occ_rho*100, '%'
1746 WRITE (unit_nr, '(T6,A,T63,ES7.1,1X,A1,1X,F7.3,A1)') &
1747 'Occupancy of 3c ints:', real(nze_3c, dp), '/', occ_3c*100, '%'
1748 WRITE (unit_nr, '(T6,A,T63,ES7.1,1X,A1,1X,F7.3,A1)') &
1749 'Occupancy after contraction with K:', real(nze_3c_2, dp), '/', occ_3c_2*100, '%'
1750 WRITE (unit_nr, '(T6,A,T63,ES7.1,1X,A1,1X,F7.3,A1)') &
1751 'Occupancy after contraction with P:', real(nze_3c_1, dp), '/', occ_3c_1*100, '%'
1752 WRITE (unit_nr, '(T6,A,T63,ES7.1,1X,A1,1X,F7.3,A1)') &
1753 'Occupancy of Kohn-Sham matrix:', real(nze_ks, dp), '/', occ_ks*100, '%'
1754 END IF
1755
1756 END DO
1757
1758 CALL para_env%sync()
1759 t2 = m_walltime()
1760
1761 ri_data%dbcsr_time = ri_data%dbcsr_time + t2 - t1
1762
1763 CALL dbt_destroy(t_3c_1)
1764 CALL dbt_destroy(t_3c_3)
1765
1766 CALL dbt_destroy(rho_ao_tmp)
1767 CALL dbt_destroy(ks_t)
1768 CALL dbt_destroy(ks_tmp)
1769
1770 CALL timestop(handle)
1771
1772 END SUBROUTINE
1773
1774! **************************************************************************************************
1775!> \brief Implementation based on the MO flavor
1776!> \param qs_env ...
1777!> \param ri_data ...
1778!> \param nspins ...
1779!> \param hf_fraction ...
1780!> \param mo_coeff ...
1781!> \param use_virial ...
1782!> \note There is no response code for forces with the MO flavor
1783! **************************************************************************************************
1784 SUBROUTINE hfx_ri_forces_mo(qs_env, ri_data, nspins, hf_fraction, mo_coeff, use_virial)
1785
1786 TYPE(qs_environment_type), POINTER :: qs_env
1787 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
1788 INTEGER, INTENT(IN) :: nspins
1789 REAL(dp), INTENT(IN) :: hf_fraction
1790 TYPE(dbcsr_type), DIMENSION(:), INTENT(IN) :: mo_coeff
1791 LOGICAL, INTENT(IN), OPTIONAL :: use_virial
1792
1793 CHARACTER(LEN=*), PARAMETER :: routinen = 'hfx_ri_forces_mo'
1794
1795 INTEGER :: dummy_int, handle, i_mem, i_xyz, ibasis, ispin, j_mem, k_mem, n_mem, n_mem_input, &
1796 n_mem_input_ri, n_mem_ri, n_mem_ri_fit, n_mos, natom, nkind, unit_nr_dbcsr
1797 INTEGER(int_8) :: nflop
1798 INTEGER, ALLOCATABLE, DIMENSION(:) :: atom_of_kind, batch_blk_end, batch_blk_start, &
1799 batch_end, batch_end_ri, batch_end_ri_fit, batch_ranges, batch_ranges_ri, &
1800 batch_ranges_ri_fit, batch_start, batch_start_ri, batch_start_ri_fit, bsizes_mo, dist1, &
1801 dist2, dist3, idx_to_at_ao, idx_to_at_ri, kind_of
1802 INTEGER, DIMENSION(2, 1) :: bounds_ctr_1d
1803 INTEGER, DIMENSION(2, 2) :: bounds_ctr_2d
1804 INTEGER, DIMENSION(3) :: pdims
1805 LOGICAL :: use_virial_prv
1806 REAL(dp) :: pref, spin_fac, t1, t2
1807 TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
1808 TYPE(block_ind_type), ALLOCATABLE, DIMENSION(:, :) :: t_3c_der_ao_ind, t_3c_der_ri_ind
1809 TYPE(cell_type), POINTER :: cell
1810 TYPE(dbcsr_p_type), DIMENSION(:), POINTER :: matrix_s
1811 TYPE(dbt_pgrid_type) :: pgrid_1, pgrid_2
1812 TYPE(dbt_type) :: t_2c_ri, t_2c_ri_inv, t_2c_ri_met, t_2c_ri_pq, t_2c_tmp, t_3c_0, t_3c_1, &
1813 t_3c_2, t_3c_3, t_3c_4, t_3c_5, t_3c_6, t_3c_ao_ri_ao, t_3c_ao_ri_mo, t_3c_desymm, &
1814 t_3c_mo_ri_ao, t_3c_mo_ri_mo, t_3c_ri_ao_ao, t_3c_ri_ctr, t_3c_ri_mo_mo, &
1815 t_3c_ri_mo_mo_fit, t_3c_work, t_mo_coeff, t_mo_cpy
1816 TYPE(dbt_type), ALLOCATABLE, DIMENSION(:) :: t_2c_der_metric, t_2c_der_ri, t_2c_mo_ao, &
1817 t_2c_mo_ao_ctr, t_3c_der_ao, t_3c_der_ao_ctr_1, t_3c_der_ri, t_3c_der_ri_ctr_1, &
1818 t_3c_der_ri_ctr_2
1819 TYPE(dft_control_type), POINTER :: dft_control
1820 TYPE(gto_basis_set_p_type), ALLOCATABLE, &
1821 DIMENSION(:), TARGET :: basis_set_ao, basis_set_ri
1822 TYPE(gto_basis_set_type), POINTER :: orb_basis, ri_basis
1823 TYPE(hfx_compression_type), ALLOCATABLE, &
1824 DIMENSION(:, :) :: t_3c_der_ao_comp, t_3c_der_ri_comp
1825 TYPE(mp_para_env_type), POINTER :: para_env
1826 TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
1827 TYPE(qs_force_type), DIMENSION(:), POINTER :: force
1828 TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
1829
1830 ! 1) Precompute the derivatives that are needed (3c, 3c RI and metric)
1831 ! 2) Go over batched of occupied MOs so as to save memory and optimize contractions
1832 ! 3) Contract all 3c integrals and derivatives with MO coeffs
1833 ! 4) Contract relevant quantities with the inverse 2c RI (metric or pot)
1834 ! 5) First force contribution with the 2c RI derivative d/dx (Q|R)
1835 ! 6) If metric, do the additional contraction with S_pq^-1 (Q|R)
1836 ! 7) Do the force contribution due to 3c integrals (a'b|P) and (ab|P')
1837 ! 8) If metric, do the last force contribution due to d/dx S^-1 (First contract (ab|P), then S^-1)
1838
1839 use_virial_prv = .false.
1840 IF (PRESENT(use_virial)) use_virial_prv = use_virial
1841 IF (use_virial_prv) THEN
1842 cpabort("Stress tensor with RI-HFX MO flavor not implemented.")
1843 END IF
1844
1845 unit_nr_dbcsr = ri_data%unit_nr_dbcsr
1846
1847 CALL get_qs_env(qs_env, natom=natom, particle_set=particle_set, nkind=nkind, &
1848 atomic_kind_set=atomic_kind_set, cell=cell, force=force, &
1849 matrix_s=matrix_s, para_env=para_env, dft_control=dft_control, &
1850 qs_kind_set=qs_kind_set)
1851
1852 pdims(:) = 0
1853 CALL dbt_pgrid_create(para_env, pdims, pgrid_1, tensor_dims=[SIZE(ri_data%bsizes_AO_split), &
1854 SIZE(ri_data%bsizes_RI_split), &
1855 SIZE(ri_data%bsizes_AO_split)])
1856 pdims(:) = 0
1857 CALL dbt_pgrid_create(para_env, pdims, pgrid_2, tensor_dims=[SIZE(ri_data%bsizes_RI_split), &
1858 SIZE(ri_data%bsizes_AO_split), &
1859 SIZE(ri_data%bsizes_AO_split)])
1860
1861 CALL create_3c_tensor(t_3c_ao_ri_ao, dist1, dist2, dist3, pgrid_1, &
1862 ri_data%bsizes_AO_split, ri_data%bsizes_RI_split, ri_data%bsizes_AO_split, &
1863 [1, 2], [3], name="(AO RI | AO)")
1864 DEALLOCATE (dist1, dist2, dist3)
1865 CALL create_3c_tensor(t_3c_ri_ao_ao, dist1, dist2, dist3, pgrid_2, &
1866 ri_data%bsizes_RI_split, ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
1867 [1], [2, 3], name="(RI | AO AO)")
1868 DEALLOCATE (dist1, dist2, dist3)
1869
1870 ALLOCATE (basis_set_ri(nkind), basis_set_ao(nkind))
1871 CALL basis_set_list_setup(basis_set_ri, ri_data%ri_basis_type, qs_kind_set)
1872 CALL get_particle_set(particle_set, qs_kind_set, basis=basis_set_ri)
1873 CALL basis_set_list_setup(basis_set_ao, ri_data%orb_basis_type, qs_kind_set)
1874 CALL get_particle_set(particle_set, qs_kind_set, basis=basis_set_ao)
1875
1876 DO ibasis = 1, SIZE(basis_set_ao)
1877 orb_basis => basis_set_ao(ibasis)%gto_basis_set
1878 CALL init_interaction_radii_orb_basis(orb_basis, ri_data%eps_pgf_orb)
1879 ri_basis => basis_set_ri(ibasis)%gto_basis_set
1880 CALL init_interaction_radii_orb_basis(ri_basis, ri_data%eps_pgf_orb)
1881 END DO
1882
1883 ALLOCATE (t_2c_der_metric(3), t_2c_der_ri(3), t_2c_mo_ao(3), t_2c_mo_ao_ctr(3), t_3c_der_ao(3), &
1884 t_3c_der_ao_ctr_1(3), t_3c_der_ri(3), t_3c_der_ri_ctr_1(3), t_3c_der_ri_ctr_2(3))
1885
1886 ! 1) Precompute the derivatives
1887 CALL precalc_derivatives(t_3c_der_ri_comp, t_3c_der_ao_comp, t_3c_der_ri_ind, t_3c_der_ao_ind, &
1888 t_2c_der_ri, t_2c_der_metric, t_3c_ri_ao_ao, &
1889 basis_set_ao, basis_set_ri, ri_data, qs_env)
1890
1891 DO ibasis = 1, SIZE(basis_set_ao)
1892 orb_basis => basis_set_ao(ibasis)%gto_basis_set
1893 ri_basis => basis_set_ri(ibasis)%gto_basis_set
1894 CALL init_interaction_radii_orb_basis(orb_basis, dft_control%qs_control%eps_pgf_orb)
1895 CALL init_interaction_radii_orb_basis(ri_basis, dft_control%qs_control%eps_pgf_orb)
1896 END DO
1897
1898 n_mem = SIZE(t_3c_der_ri_comp, 1)
1899 DO i_xyz = 1, 3
1900 CALL dbt_create(t_3c_ao_ri_ao, t_3c_der_ri(i_xyz))
1901 CALL dbt_create(t_3c_ao_ri_ao, t_3c_der_ao(i_xyz))
1902
1903 DO i_mem = 1, n_mem
1904 CALL decompress_tensor(t_3c_ri_ao_ao, t_3c_der_ri_ind(i_mem, i_xyz)%ind, &
1905 t_3c_der_ri_comp(i_mem, i_xyz), ri_data%filter_eps_storage)
1906 CALL dbt_copy(t_3c_ri_ao_ao, t_3c_der_ri(i_xyz), order=[2, 1, 3], move_data=.true., summation=.true.)
1907
1908 CALL decompress_tensor(t_3c_ri_ao_ao, t_3c_der_ao_ind(i_mem, i_xyz)%ind, &
1909 t_3c_der_ao_comp(i_mem, i_xyz), ri_data%filter_eps_storage)
1910 CALL dbt_copy(t_3c_ri_ao_ao, t_3c_der_ao(i_xyz), order=[2, 1, 3], move_data=.true., summation=.true.)
1911 END DO
1912 END DO
1913
1914 DO i_xyz = 1, 3
1915 DO i_mem = 1, n_mem
1916 CALL dealloc_containers(t_3c_der_ao_comp(i_mem, i_xyz), dummy_int)
1917 CALL dealloc_containers(t_3c_der_ri_comp(i_mem, i_xyz), dummy_int)
1918 END DO
1919 END DO
1920 DEALLOCATE (t_3c_der_ao_ind, t_3c_der_ri_ind)
1921
1922 ! Get the 3c integrals (desymmetrized)
1923 CALL dbt_create(t_3c_ao_ri_ao, t_3c_desymm)
1924 CALL dbt_copy(ri_data%t_3c_int_ctr_1(1, 1), t_3c_desymm)
1925 CALL dbt_copy(ri_data%t_3c_int_ctr_1(1, 1), t_3c_desymm, order=[3, 2, 1], &
1926 summation=.true., move_data=.true.)
1927
1928 CALL dbt_destroy(t_3c_ao_ri_ao)
1929 CALL dbt_destroy(t_3c_ri_ao_ao)
1930
1931 ! Some utilities
1932 spin_fac = 0.5_dp
1933 IF (nspins == 2) spin_fac = 1.0_dp
1934
1935 ALLOCATE (idx_to_at_ri(SIZE(ri_data%bsizes_RI_split)))
1936 CALL get_idx_to_atom(idx_to_at_ri, ri_data%bsizes_RI_split, ri_data%bsizes_RI)
1937
1938 ALLOCATE (idx_to_at_ao(SIZE(ri_data%bsizes_AO_split)))
1939 CALL get_idx_to_atom(idx_to_at_ao, ri_data%bsizes_AO_split, ri_data%bsizes_AO)
1940
1941 CALL get_atomic_kind_set(atomic_kind_set, kind_of=kind_of, atom_of_kind=atom_of_kind)
1942
1943 ! 2-center RI tensors
1944 CALL create_2c_tensor(t_2c_ri, dist1, dist2, ri_data%pgrid_2d, &
1945 ri_data%bsizes_RI_split, ri_data%bsizes_RI_split, name="(RI | RI)")
1946 DEALLOCATE (dist1, dist2)
1947
1948 CALL create_2c_tensor(t_2c_ri_pq, dist1, dist2, ri_data%pgrid_2d, &
1949 ri_data%bsizes_RI_fit, ri_data%bsizes_RI_fit, name="(RI | RI)")
1950 DEALLOCATE (dist1, dist2)
1951
1952 IF (.NOT. ri_data%same_op) THEN
1953 !precompute the (P|Q)*S^-1 product
1954 CALL dbt_create(t_2c_ri_pq, t_2c_ri_inv)
1955 CALL dbt_create(t_2c_ri_pq, t_2c_ri_met)
1956 CALL dbt_create(ri_data%t_2c_inv(1, 1), t_2c_tmp)
1957
1958 CALL dbt_contract(1.0_dp, ri_data%t_2c_inv(1, 1), ri_data%t_2c_pot(1, 1), &
1959 0.0_dp, t_2c_tmp, &
1960 contract_1=[2], notcontract_1=[1], &
1961 contract_2=[1], notcontract_2=[2], &
1962 map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
1963 unit_nr=unit_nr_dbcsr, flop=nflop)
1964 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1965
1966 CALL dbt_copy(t_2c_tmp, t_2c_ri_inv, move_data=.true.)
1967 CALL dbt_destroy(t_2c_tmp)
1968 END IF
1969
1970 !3 loops in MO force evaluations. To be consistent with input MEMORY_CUT, need to take the cubic root
1971 !No need to cut memory further because SCF tensors alrady dense
1972 n_mem_input = floor((ri_data%n_mem_input - 0.1_dp)**(1._dp/3._dp)) + 1
1973 n_mem_input_ri = floor((ri_data%n_mem_input - 0.1_dp)/n_mem_input**2) + 1
1974
1975 !batches on RI_split and RI_fit blocks
1976 n_mem_ri = n_mem_input_ri
1977 CALL create_tensor_batches(ri_data%bsizes_RI_split, n_mem_ri, batch_start_ri, batch_end_ri, &
1978 batch_blk_start, batch_blk_end)
1979 ALLOCATE (batch_ranges_ri(n_mem_ri + 1))
1980 batch_ranges_ri(1:n_mem_ri) = batch_blk_start(1:n_mem_ri)
1981 batch_ranges_ri(n_mem_ri + 1) = batch_blk_end(n_mem_ri) + 1
1982 DEALLOCATE (batch_blk_start, batch_blk_end)
1983
1984 n_mem_ri_fit = n_mem_input_ri
1985 CALL create_tensor_batches(ri_data%bsizes_RI_fit, n_mem_ri_fit, batch_start_ri_fit, batch_end_ri_fit, &
1986 batch_blk_start, batch_blk_end)
1987 ALLOCATE (batch_ranges_ri_fit(n_mem_ri_fit + 1))
1988 batch_ranges_ri_fit(1:n_mem_ri_fit) = batch_blk_start(1:n_mem_ri_fit)
1989 batch_ranges_ri_fit(n_mem_ri_fit + 1) = batch_blk_end(n_mem_ri_fit) + 1
1990 DEALLOCATE (batch_blk_start, batch_blk_end)
1991
1992 DO ispin = 1, nspins
1993
1994 ! 2 )Prepare the batches for this spin
1995 CALL dbcsr_get_info(mo_coeff(ispin), nfullcols_total=n_mos)
1996 !note: optimized GPU block size for SCF is 64x1x64. Here we do 8x8x64
1997 CALL split_block_sizes([n_mos], bsizes_mo, max_size=floor(sqrt(ri_data%max_bsize_MO - 0.1)) + 1)
1998
1999 !batching on MO blocks
2000 n_mem = n_mem_input
2001 CALL create_tensor_batches(bsizes_mo, n_mem, batch_start, batch_end, &
2002 batch_blk_start, batch_blk_end)
2003 ALLOCATE (batch_ranges(n_mem + 1))
2004 batch_ranges(1:n_mem) = batch_blk_start(1:n_mem)
2005 batch_ranges(n_mem + 1) = batch_blk_end(n_mem) + 1
2006 DEALLOCATE (batch_blk_start, batch_blk_end)
2007
2008 ! Initialize the different tensors needed (Note: keep MO coeffs as (MO | AO) for less transpose)
2009 CALL create_2c_tensor(t_mo_coeff, dist1, dist2, ri_data%pgrid_2d, bsizes_mo, &
2010 ri_data%bsizes_AO_split, name="MO coeffs")
2011 DEALLOCATE (dist1, dist2)
2012 CALL dbt_create(mo_coeff(ispin), t_2c_tmp, name="MO coeffs")
2013 CALL dbt_copy_matrix_to_tensor(mo_coeff(ispin), t_2c_tmp)
2014 CALL dbt_copy(t_2c_tmp, t_mo_coeff, order=[2, 1], move_data=.true.)
2015 CALL dbt_destroy(t_2c_tmp)
2016
2017 CALL dbt_create(t_mo_coeff, t_mo_cpy)
2018 CALL dbt_copy(t_mo_coeff, t_mo_cpy)
2019 DO i_xyz = 1, 3
2020 CALL dbt_create(t_mo_coeff, t_2c_mo_ao_ctr(i_xyz))
2021 CALL dbt_create(t_mo_coeff, t_2c_mo_ao(i_xyz))
2022 END DO
2023
2024 CALL create_3c_tensor(t_3c_ao_ri_mo, dist1, dist2, dist3, pgrid_1, ri_data%bsizes_AO_split, &
2025 ri_data%bsizes_RI_split, bsizes_mo, [1, 2], [3], name="(AO RI| MO)")
2026 DEALLOCATE (dist1, dist2, dist3)
2027
2028 CALL dbt_create(t_3c_ao_ri_mo, t_3c_0)
2029 CALL dbt_destroy(t_3c_ao_ri_mo)
2030
2031 CALL create_3c_tensor(t_3c_mo_ri_ao, dist1, dist2, dist3, pgrid_1, bsizes_mo, ri_data%bsizes_RI_split, &
2032 ri_data%bsizes_AO_split, [1, 2], [3], name="(MO RI | AO)")
2033 DEALLOCATE (dist1, dist2, dist3)
2034 CALL dbt_create(t_3c_mo_ri_ao, t_3c_1)
2035
2036 DO i_xyz = 1, 3
2037 CALL dbt_create(t_3c_mo_ri_ao, t_3c_der_ri_ctr_1(i_xyz))
2038 CALL dbt_create(t_3c_mo_ri_ao, t_3c_der_ao_ctr_1(i_xyz))
2039 END DO
2040
2041 CALL create_3c_tensor(t_3c_mo_ri_mo, dist1, dist2, dist3, pgrid_1, bsizes_mo, &
2042 ri_data%bsizes_RI_split, bsizes_mo, [1, 2], [3], name="(MO RI | MO)")
2043 DEALLOCATE (dist1, dist2, dist3)
2044 CALL dbt_create(t_3c_mo_ri_mo, t_3c_work)
2045
2046 CALL create_3c_tensor(t_3c_ri_mo_mo, dist1, dist2, dist3, pgrid_2, ri_data%bsizes_RI_split, &
2047 bsizes_mo, bsizes_mo, [1], [2, 3], name="(RI| MO MO)")
2048 DEALLOCATE (dist1, dist2, dist3)
2049
2050 CALL dbt_create(t_3c_ri_mo_mo, t_3c_2)
2051 CALL dbt_create(t_3c_ri_mo_mo, t_3c_3)
2052 CALL dbt_create(t_3c_ri_mo_mo, t_3c_ri_ctr)
2053 DO i_xyz = 1, 3
2054 CALL dbt_create(t_3c_ri_mo_mo, t_3c_der_ri_ctr_2(i_xyz))
2055 END DO
2056
2057 !Very large RI_fit blocks => new pgrid to make sure distribution is ideal
2058 pdims(:) = 0
2059 CALL create_3c_tensor(t_3c_ri_mo_mo_fit, dist1, dist2, dist3, pgrid_2, ri_data%bsizes_RI_fit, &
2060 bsizes_mo, bsizes_mo, [1], [2, 3], name="(RI| MO MO)")
2061 DEALLOCATE (dist1, dist2, dist3)
2062
2063 CALL dbt_create(t_3c_ri_mo_mo_fit, t_3c_4)
2064 CALL dbt_create(t_3c_ri_mo_mo_fit, t_3c_5)
2065 CALL dbt_create(t_3c_ri_mo_mo_fit, t_3c_6)
2066
2067 CALL dbt_batched_contract_init(t_3c_desymm, batch_range_2=batch_ranges_ri)
2068 CALL dbt_batched_contract_init(t_3c_0, batch_range_2=batch_ranges_ri, batch_range_3=batch_ranges)
2069
2070 DO i_xyz = 1, 3
2071 CALL dbt_batched_contract_init(t_3c_der_ao(i_xyz), batch_range_2=batch_ranges_ri)
2072 CALL dbt_batched_contract_init(t_3c_der_ri(i_xyz), batch_range_2=batch_ranges_ri)
2073 END DO
2074
2075 CALL para_env%sync()
2076 t1 = m_walltime()
2077
2078 ! 2) Loop over batches
2079 DO i_mem = 1, n_mem
2080
2081 bounds_ctr_1d(1, 1) = batch_start(i_mem)
2082 bounds_ctr_1d(2, 1) = batch_end(i_mem)
2083
2084 bounds_ctr_2d(1, 1) = 1
2085 bounds_ctr_2d(2, 1) = sum(ri_data%bsizes_AO)
2086
2087 ! 3) Do the first AO to MO contraction here
2088 CALL timeset(routinen//"_AO2MO_1", handle)
2089 CALL dbt_batched_contract_init(t_mo_coeff)
2090 DO k_mem = 1, n_mem_ri
2091 bounds_ctr_2d(1, 2) = batch_start_ri(k_mem)
2092 bounds_ctr_2d(2, 2) = batch_end_ri(k_mem)
2093
2094 CALL dbt_contract(1.0_dp, t_mo_coeff, t_3c_desymm, &
2095 1.0_dp, t_3c_0, &
2096 contract_1=[2], notcontract_1=[1], &
2097 contract_2=[3], notcontract_2=[1, 2], &
2098 map_1=[3], map_2=[1, 2], filter_eps=ri_data%filter_eps, &
2099 bounds_2=bounds_ctr_1d, &
2100 bounds_3=bounds_ctr_2d, &
2101 unit_nr=unit_nr_dbcsr, flop=nflop)
2102 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2103 END DO
2104 CALL dbt_copy(t_3c_0, t_3c_1, order=[3, 2, 1], move_data=.true.)
2105
2106 DO i_xyz = 1, 3
2107 DO k_mem = 1, n_mem_ri
2108 bounds_ctr_2d(1, 2) = batch_start_ri(k_mem)
2109 bounds_ctr_2d(2, 2) = batch_end_ri(k_mem)
2110
2111 CALL dbt_contract(1.0_dp, t_mo_coeff, t_3c_der_ao(i_xyz), &
2112 1.0_dp, t_3c_0, &
2113 contract_1=[2], notcontract_1=[1], &
2114 contract_2=[3], notcontract_2=[1, 2], &
2115 map_1=[3], map_2=[1, 2], filter_eps=ri_data%filter_eps, &
2116 bounds_2=bounds_ctr_1d, &
2117 bounds_3=bounds_ctr_2d, &
2118 unit_nr=unit_nr_dbcsr, flop=nflop)
2119 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2120 END DO
2121 CALL dbt_copy(t_3c_0, t_3c_der_ao_ctr_1(i_xyz), order=[3, 2, 1], move_data=.true.)
2122
2123 DO k_mem = 1, n_mem_ri
2124 bounds_ctr_2d(1, 2) = batch_start_ri(k_mem)
2125 bounds_ctr_2d(2, 2) = batch_end_ri(k_mem)
2126
2127 CALL dbt_contract(1.0_dp, t_mo_coeff, t_3c_der_ri(i_xyz), &
2128 1.0_dp, t_3c_0, &
2129 contract_1=[2], notcontract_1=[1], &
2130 contract_2=[3], notcontract_2=[1, 2], &
2131 map_1=[3], map_2=[1, 2], filter_eps=ri_data%filter_eps, &
2132 bounds_2=bounds_ctr_1d, &
2133 bounds_3=bounds_ctr_2d, &
2134 unit_nr=unit_nr_dbcsr, flop=nflop)
2135 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2136 END DO
2137 CALL dbt_copy(t_3c_0, t_3c_der_ri_ctr_1(i_xyz), order=[3, 2, 1], move_data=.true.)
2138 END DO
2139 CALL dbt_batched_contract_finalize(t_mo_coeff)
2140 CALL timestop(handle)
2141
2142 CALL dbt_batched_contract_init(t_3c_1, batch_range_1=batch_ranges, batch_range_2=batch_ranges_ri)
2143 CALL dbt_batched_contract_init(t_3c_work, batch_range_1=batch_ranges, batch_range_2=batch_ranges_ri, &
2144 batch_range_3=batch_ranges)
2145 CALL dbt_batched_contract_init(t_3c_2, batch_range_2=batch_ranges, batch_range_3=batch_ranges)
2146 CALL dbt_batched_contract_init(t_3c_3, batch_range_1=batch_ranges_ri, &
2147 batch_range_2=batch_ranges, batch_range_3=batch_ranges)
2148
2149 CALL dbt_batched_contract_init(t_3c_4, batch_range_1=batch_ranges_ri_fit, &
2150 batch_range_2=batch_ranges, batch_range_3=batch_ranges)
2151 CALL dbt_batched_contract_init(t_3c_5, batch_range_2=batch_ranges, batch_range_3=batch_ranges)
2152
2153 DO i_xyz = 1, 3
2154 CALL dbt_batched_contract_init(t_3c_der_ri_ctr_1(i_xyz), batch_range_1=batch_ranges, &
2155 batch_range_2=batch_ranges_ri)
2156 CALL dbt_batched_contract_init(t_3c_der_ao_ctr_1(i_xyz), batch_range_1=batch_ranges, &
2157 batch_range_2=batch_ranges_ri)
2158
2159 END DO
2160
2161 IF (.NOT. ri_data%same_op) THEN
2162 CALL dbt_batched_contract_init(t_3c_6, batch_range_2=batch_ranges, batch_range_3=batch_ranges)
2163 END IF
2164
2165 DO j_mem = 1, n_mem
2166
2167 bounds_ctr_1d(1, 1) = batch_start(j_mem)
2168 bounds_ctr_1d(2, 1) = batch_end(j_mem)
2169
2170 bounds_ctr_2d(1, 1) = batch_start(i_mem)
2171 bounds_ctr_2d(2, 1) = batch_end(i_mem)
2172
2173 ! 3) Do the second AO to MO contraction here, followed by the S^-1 contraction
2174 CALL timeset(routinen//"_AO2MO_2", handle)
2175 CALL dbt_batched_contract_init(t_mo_coeff)
2176 DO k_mem = 1, n_mem_ri
2177 bounds_ctr_2d(1, 2) = batch_start_ri(k_mem)
2178 bounds_ctr_2d(2, 2) = batch_end_ri(k_mem)
2179
2180 CALL dbt_contract(1.0_dp, t_mo_coeff, t_3c_1, &
2181 1.0_dp, t_3c_work, &
2182 contract_1=[2], notcontract_1=[1], &
2183 contract_2=[3], notcontract_2=[1, 2], &
2184 map_1=[3], map_2=[1, 2], filter_eps=ri_data%filter_eps, &
2185 bounds_2=bounds_ctr_1d, &
2186 bounds_3=bounds_ctr_2d, &
2187 unit_nr=unit_nr_dbcsr, flop=nflop)
2188 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2189 END DO
2190 CALL dbt_batched_contract_finalize(t_mo_coeff)
2191 CALL timestop(handle)
2192
2193 bounds_ctr_2d(1, 1) = batch_start(i_mem)
2194 bounds_ctr_2d(2, 1) = batch_end(i_mem)
2195 bounds_ctr_2d(1, 2) = batch_start(j_mem)
2196 bounds_ctr_2d(2, 2) = batch_end(j_mem)
2197
2198 ! 4) Contract 3c MO integrals with S^-1 as well
2199 CALL timeset(routinen//"_2c_inv", handle)
2200 CALL dbt_copy(t_3c_work, t_3c_3, order=[2, 1, 3], move_data=.true.)
2201 DO k_mem = 1, n_mem_ri
2202 bounds_ctr_1d(1, 1) = batch_start_ri(k_mem)
2203 bounds_ctr_1d(2, 1) = batch_end_ri(k_mem)
2204
2205 CALL dbt_batched_contract_init(ri_data%t_2c_inv(1, 1))
2206 CALL dbt_contract(1.0_dp, ri_data%t_2c_inv(1, 1), t_3c_3, &
2207 1.0_dp, t_3c_2, &
2208 contract_1=[2], notcontract_1=[1], &
2209 contract_2=[1], notcontract_2=[2, 3], &
2210 map_1=[1], map_2=[2, 3], filter_eps=ri_data%filter_eps, &
2211 bounds_1=bounds_ctr_1d, &
2212 bounds_3=bounds_ctr_2d, &
2213 unit_nr=unit_nr_dbcsr, flop=nflop)
2214 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2215 CALL dbt_batched_contract_finalize(ri_data%t_2c_inv(1, 1))
2216 END DO
2217 CALL dbt_copy(t_3c_ri_mo_mo, t_3c_3)
2218 CALL timestop(handle)
2219
2220 !Only contract (ab|P') with MO coeffs since need AO rep for the force of (a'b|P)
2221 bounds_ctr_1d(1, 1) = batch_start(j_mem)
2222 bounds_ctr_1d(2, 1) = batch_end(j_mem)
2223
2224 bounds_ctr_2d(1, 1) = batch_start(i_mem)
2225 bounds_ctr_2d(2, 1) = batch_end(i_mem)
2226
2227 CALL timeset(routinen//"_AO2MO_2", handle)
2228 CALL dbt_batched_contract_init(t_mo_coeff)
2229 DO i_xyz = 1, 3
2230 DO k_mem = 1, n_mem_ri
2231 bounds_ctr_2d(1, 2) = batch_start_ri(k_mem)
2232 bounds_ctr_2d(2, 2) = batch_end_ri(k_mem)
2233
2234 CALL dbt_contract(1.0_dp, t_mo_coeff, t_3c_der_ri_ctr_1(i_xyz), &
2235 1.0_dp, t_3c_work, &
2236 contract_1=[2], notcontract_1=[1], &
2237 contract_2=[3], notcontract_2=[1, 2], &
2238 map_1=[3], map_2=[1, 2], filter_eps=ri_data%filter_eps, &
2239 bounds_2=bounds_ctr_1d, &
2240 bounds_3=bounds_ctr_2d, &
2241 unit_nr=unit_nr_dbcsr, flop=nflop)
2242 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2243 END DO
2244 CALL dbt_copy(t_3c_work, t_3c_der_ri_ctr_2(i_xyz), order=[2, 1, 3], move_data=.true.)
2245 END DO
2246 CALL dbt_batched_contract_finalize(t_mo_coeff)
2247 CALL timestop(handle)
2248
2249 bounds_ctr_2d(1, 1) = batch_start(i_mem)
2250 bounds_ctr_2d(2, 1) = batch_end(i_mem)
2251 bounds_ctr_2d(1, 2) = batch_start(j_mem)
2252 bounds_ctr_2d(2, 2) = batch_end(j_mem)
2253
2254 ! 5) Force due to d/dx (P|Q)
2255 CALL timeset(routinen//"_PQ_der", handle)
2256 CALL dbt_copy(t_3c_2, t_3c_4, move_data=.true.)
2257 CALL dbt_copy(t_3c_4, t_3c_5)
2258 DO k_mem = 1, n_mem_ri_fit
2259 bounds_ctr_1d(1, 1) = batch_start_ri_fit(k_mem)
2260 bounds_ctr_1d(2, 1) = batch_end_ri_fit(k_mem)
2261
2262 CALL dbt_batched_contract_init(t_2c_ri_pq)
2263 CALL dbt_contract(1.0_dp, t_3c_4, t_3c_5, &
2264 1.0_dp, t_2c_ri_pq, &
2265 contract_1=[2, 3], notcontract_1=[1], &
2266 contract_2=[2, 3], notcontract_2=[1], &
2267 bounds_1=bounds_ctr_2d, &
2268 bounds_2=bounds_ctr_1d, &
2269 map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
2270 unit_nr=unit_nr_dbcsr, flop=nflop)
2271 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2272 CALL dbt_batched_contract_finalize(t_2c_ri_pq)
2273 END DO
2274 CALL timestop(handle)
2275
2276 ! 6) If metric, do the additional contraction with S_pq^-1 (Q|R) (not on the derivatives)
2277 IF (.NOT. ri_data%same_op) THEN
2278 CALL timeset(routinen//"_metric", handle)
2279 DO k_mem = 1, n_mem_ri_fit
2280 bounds_ctr_1d(1, 1) = batch_start_ri_fit(k_mem)
2281 bounds_ctr_1d(2, 1) = batch_end_ri_fit(k_mem)
2282
2283 CALL dbt_batched_contract_init(t_2c_ri_inv)
2284 CALL dbt_contract(1.0_dp, t_2c_ri_inv, t_3c_4, &
2285 1.0_dp, t_3c_6, &
2286 contract_1=[2], notcontract_1=[1], &
2287 contract_2=[1], notcontract_2=[2, 3], &
2288 bounds_1=bounds_ctr_1d, &
2289 bounds_3=bounds_ctr_2d, &
2290 map_1=[1], map_2=[2, 3], filter_eps=ri_data%filter_eps, &
2291 unit_nr=unit_nr_dbcsr, flop=nflop)
2292 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2293 CALL dbt_batched_contract_finalize(t_2c_ri_inv)
2294 END DO
2295 CALL dbt_copy(t_3c_6, t_3c_4, move_data=.true.)
2296
2297 ! 8) and get the force due to d/dx S^-1
2298 DO k_mem = 1, n_mem_ri_fit
2299 bounds_ctr_1d(1, 1) = batch_start_ri_fit(k_mem)
2300 bounds_ctr_1d(2, 1) = batch_end_ri_fit(k_mem)
2301
2302 CALL dbt_batched_contract_init(t_2c_ri_met)
2303 CALL dbt_contract(1.0_dp, t_3c_4, t_3c_5, &
2304 1.0_dp, t_2c_ri_met, &
2305 contract_1=[2, 3], notcontract_1=[1], &
2306 contract_2=[2, 3], notcontract_2=[1], &
2307 bounds_1=bounds_ctr_2d, &
2308 bounds_2=bounds_ctr_1d, &
2309 map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
2310 unit_nr=unit_nr_dbcsr, flop=nflop)
2311 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2312 CALL dbt_batched_contract_finalize(t_2c_ri_met)
2313 END DO
2314 CALL timestop(handle)
2315 END IF
2316 CALL dbt_copy(t_3c_ri_mo_mo_fit, t_3c_5)
2317
2318 ! 7) Do the force contribution due to 3c integrals (a'b|P) and (ab|P')
2319
2320 ! (ab|P')
2321 CALL timeset(routinen//"_3c_RI", handle)
2322 pref = -0.5_dp*2.0_dp*hf_fraction*spin_fac
2323 CALL dbt_copy(t_3c_4, t_3c_ri_ctr, move_data=.true.)
2324 CALL get_force_from_3c_trace(force, t_3c_ri_ctr, t_3c_der_ri_ctr_2, atom_of_kind, kind_of, &
2325 idx_to_at_ri, pref)
2326 CALL timestop(handle)
2327
2328 ! (a'b|P) Note that derivative remains in AO rep until the actual force evaluation,
2329 ! which also prevents doing a direct 3-center trace
2330 bounds_ctr_2d(1, 1) = batch_start(i_mem)
2331 bounds_ctr_2d(2, 1) = batch_end(i_mem)
2332
2333 bounds_ctr_1d(1, 1) = batch_start(j_mem)
2334 bounds_ctr_1d(2, 1) = batch_end(j_mem)
2335
2336 CALL timeset(routinen//"_3c_AO", handle)
2337 CALL dbt_copy(t_3c_ri_ctr, t_3c_work, order=[2, 1, 3], move_data=.true.)
2338 DO i_xyz = 1, 3
2339
2340 CALL dbt_batched_contract_init(t_2c_mo_ao_ctr(i_xyz))
2341 DO k_mem = 1, n_mem_ri
2342 bounds_ctr_2d(1, 2) = batch_start_ri(k_mem)
2343 bounds_ctr_2d(2, 2) = batch_end_ri(k_mem)
2344
2345 CALL dbt_contract(1.0_dp, t_3c_work, t_3c_der_ao_ctr_1(i_xyz), &
2346 1.0_dp, t_2c_mo_ao_ctr(i_xyz), &
2347 contract_1=[1, 2], notcontract_1=[3], &
2348 contract_2=[1, 2], notcontract_2=[3], &
2349 map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
2350 bounds_1=bounds_ctr_2d, &
2351 bounds_2=bounds_ctr_1d, &
2352 unit_nr=unit_nr_dbcsr, flop=nflop)
2353 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2354 END DO
2355 CALL dbt_batched_contract_finalize(t_2c_mo_ao_ctr(i_xyz))
2356 END DO
2357 CALL timestop(handle)
2358
2359 END DO !j_mem
2360 CALL dbt_batched_contract_finalize(t_3c_1)
2361 CALL dbt_batched_contract_finalize(t_3c_work)
2362 CALL dbt_batched_contract_finalize(t_3c_2)
2363 CALL dbt_batched_contract_finalize(t_3c_3)
2364 CALL dbt_batched_contract_finalize(t_3c_4)
2365 CALL dbt_batched_contract_finalize(t_3c_5)
2366
2367 DO i_xyz = 1, 3
2368 CALL dbt_batched_contract_finalize(t_3c_der_ri_ctr_1(i_xyz))
2369 CALL dbt_batched_contract_finalize(t_3c_der_ao_ctr_1(i_xyz))
2370 END DO
2371
2372 IF (.NOT. ri_data%same_op) THEN
2373 CALL dbt_batched_contract_finalize(t_3c_6)
2374 END IF
2375
2376 END DO !i_mem
2377 CALL dbt_batched_contract_finalize(t_3c_desymm)
2378 CALL dbt_batched_contract_finalize(t_3c_0)
2379
2380 DO i_xyz = 1, 3
2381 CALL dbt_batched_contract_finalize(t_3c_der_ao(i_xyz))
2382 CALL dbt_batched_contract_finalize(t_3c_der_ri(i_xyz))
2383 END DO
2384
2385 !Force contribution due to 3-center AO derivatives (a'b|P)
2386 pref = -0.5_dp*4.0_dp*hf_fraction*spin_fac
2387 DO i_xyz = 1, 3
2388 CALL dbt_copy(t_2c_mo_ao_ctr(i_xyz), t_2c_mo_ao(i_xyz), move_data=.true.) !ensures matching distributions
2389 CALL get_mo_ao_force(force, t_mo_cpy, t_2c_mo_ao(i_xyz), atom_of_kind, kind_of, idx_to_at_ao, pref, i_xyz)
2390 CALL dbt_clear(t_2c_mo_ao(i_xyz))
2391 END DO
2392
2393 !Force contribution of d/dx (P|Q)
2394 pref = 0.5_dp*hf_fraction*spin_fac
2395 IF (.NOT. ri_data%same_op) pref = -pref
2396
2397 !Making sure dists of the t_2c_RI tensors match
2398 CALL dbt_copy(t_2c_ri_pq, t_2c_ri, move_data=.true.)
2399 CALL get_2c_der_force(force, t_2c_ri, t_2c_der_ri, atom_of_kind, &
2400 kind_of, idx_to_at_ri, pref)
2401 CALL dbt_clear(t_2c_ri)
2402
2403 !Force contribution due to the inverse metric
2404 IF (.NOT. ri_data%same_op) THEN
2405 pref = 0.5_dp*2.0_dp*hf_fraction*spin_fac
2406
2407 CALL dbt_copy(t_2c_ri_met, t_2c_ri, move_data=.true.)
2408 CALL get_2c_der_force(force, t_2c_ri, t_2c_der_metric, atom_of_kind, &
2409 kind_of, idx_to_at_ri, pref)
2410 CALL dbt_clear(t_2c_ri)
2411 END IF
2412
2413 CALL dbt_destroy(t_3c_0)
2414 CALL dbt_destroy(t_3c_1)
2415 CALL dbt_destroy(t_3c_2)
2416 CALL dbt_destroy(t_3c_3)
2417 CALL dbt_destroy(t_3c_4)
2418 CALL dbt_destroy(t_3c_5)
2419 CALL dbt_destroy(t_3c_6)
2420 CALL dbt_destroy(t_3c_work)
2421 CALL dbt_destroy(t_3c_ri_ctr)
2422 CALL dbt_destroy(t_3c_mo_ri_ao)
2423 CALL dbt_destroy(t_3c_mo_ri_mo)
2424 CALL dbt_destroy(t_3c_ri_mo_mo)
2425 CALL dbt_destroy(t_3c_ri_mo_mo_fit)
2426 CALL dbt_destroy(t_mo_coeff)
2427 CALL dbt_destroy(t_mo_cpy)
2428 DO i_xyz = 1, 3
2429 CALL dbt_destroy(t_2c_mo_ao(i_xyz))
2430 CALL dbt_destroy(t_2c_mo_ao_ctr(i_xyz))
2431 CALL dbt_destroy(t_3c_der_ri_ctr_1(i_xyz))
2432 CALL dbt_destroy(t_3c_der_ao_ctr_1(i_xyz))
2433 CALL dbt_destroy(t_3c_der_ri_ctr_2(i_xyz))
2434 END DO
2435 DEALLOCATE (batch_ranges, batch_start, batch_end)
2436 END DO !ispin
2437
2438 ! Clean-up
2439 CALL dbt_pgrid_destroy(pgrid_1)
2440 CALL dbt_pgrid_destroy(pgrid_2)
2441 CALL dbt_destroy(t_3c_desymm)
2442 CALL dbt_destroy(t_2c_ri)
2443 CALL dbt_destroy(t_2c_ri_pq)
2444 IF (.NOT. ri_data%same_op) THEN
2445 CALL dbt_destroy(t_2c_ri_met)
2446 CALL dbt_destroy(t_2c_ri_inv)
2447 END IF
2448 DO i_xyz = 1, 3
2449 CALL dbt_destroy(t_3c_der_ao(i_xyz))
2450 CALL dbt_destroy(t_3c_der_ri(i_xyz))
2451 CALL dbt_destroy(t_2c_der_ri(i_xyz))
2452 IF (.NOT. ri_data%same_op) CALL dbt_destroy(t_2c_der_metric(i_xyz))
2453 END DO
2454 CALL dbt_copy(ri_data%t_3c_int_ctr_2(1, 1), ri_data%t_3c_int_ctr_1(1, 1))
2455
2456 CALL para_env%sync()
2457 t2 = m_walltime()
2458
2459 ri_data%dbcsr_time = ri_data%dbcsr_time + t2 - t1
2460
2461 END SUBROUTINE hfx_ri_forces_mo
2462
2463! **************************************************************************************************
2464!> \brief New sparser implementation
2465!> \param qs_env ...
2466!> \param ri_data ...
2467!> \param nspins ...
2468!> \param hf_fraction ...
2469!> \param rho_ao ...
2470!> \param rho_ao_resp ...
2471!> \param use_virial ...
2472!> \param resp_only ...
2473!> \param rescale_factor ...
2474! **************************************************************************************************
2475 SUBROUTINE hfx_ri_forces_pmat(qs_env, ri_data, nspins, hf_fraction, rho_ao, rho_ao_resp, &
2476 use_virial, resp_only, rescale_factor)
2477
2478 TYPE(qs_environment_type), POINTER :: qs_env
2479 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
2480 INTEGER, INTENT(IN) :: nspins
2481 REAL(dp), INTENT(IN) :: hf_fraction
2482 TYPE(dbcsr_p_type), DIMENSION(:, :) :: rho_ao
2483 TYPE(dbcsr_p_type), DIMENSION(:), OPTIONAL :: rho_ao_resp
2484 LOGICAL, INTENT(IN), OPTIONAL :: use_virial, resp_only
2485 REAL(dp), INTENT(IN), OPTIONAL :: rescale_factor
2486
2487 CHARACTER(LEN=*), PARAMETER :: routinen = 'hfx_ri_forces_Pmat'
2488
2489 INTEGER :: dummy_int, handle, i_mem, i_spin, i_xyz, &
2490 ibasis, j_mem, j_xyz, k_mem, k_xyz, &
2491 n_mem, n_mem_ri, natom, nkind, &
2492 unit_nr_dbcsr
2493 INTEGER(int_8) :: nflop
2494 INTEGER, ALLOCATABLE, DIMENSION(:) :: atom_of_kind, batch_end, batch_end_ri, batch_ranges, &
2495 batch_ranges_ri, batch_start, batch_start_ri, dist1, dist2, dist3, idx_to_at_ao, &
2496 idx_to_at_ri, kind_of
2497 INTEGER, DIMENSION(2, 1) :: ibounds, jbounds, kbounds
2498 INTEGER, DIMENSION(2, 2) :: ijbounds
2499 INTEGER, DIMENSION(2, 3) :: bounds_cpy
2500 INTEGER, DIMENSION(:), POINTER :: col_bsize, row_bsize
2501 LOGICAL :: do_resp, resp_only_prv, use_virial_prv
2502 REAL(dp) :: pref, spin_fac, t1, t2
2503 REAL(dp), DIMENSION(3, 3) :: work_virial
2504 TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
2505 TYPE(block_ind_type), ALLOCATABLE, DIMENSION(:, :) :: t_3c_der_ao_ind, t_3c_der_ri_ind
2506 TYPE(cell_type), POINTER :: cell
2507 TYPE(dbcsr_distribution_type), POINTER :: dbcsr_dist
2508 TYPE(dbcsr_type) :: dbcsr_tmp, virial_trace
2509 TYPE(dbt_type) :: rho_ao_1, rho_ao_2, t_2c_ri, t_2c_ri_tmp, t_2c_tmp, t_2c_virial, t_3c_1, &
2510 t_3c_2, t_3c_3, t_3c_4, t_3c_5, t_3c_ao_ri_ao, t_3c_help_1, t_3c_help_2, t_3c_int, &
2511 t_3c_int_2, t_3c_ri_ao_ao, t_3c_sparse, t_3c_virial, t_r, t_svs
2512 TYPE(dbt_type), ALLOCATABLE, DIMENSION(:) :: t_2c_der_metric, t_2c_der_ri, &
2513 t_3c_der_ao, t_3c_der_ri
2514 TYPE(dft_control_type), POINTER :: dft_control
2515 TYPE(gto_basis_set_p_type), ALLOCATABLE, &
2516 DIMENSION(:), TARGET :: basis_set_ao, basis_set_ri
2517 TYPE(gto_basis_set_type), POINTER :: orb_basis, ri_basis
2518 TYPE(hfx_compression_type), ALLOCATABLE, &
2519 DIMENSION(:, :) :: t_3c_der_ao_comp, t_3c_der_ri_comp
2520 TYPE(mp_para_env_type), POINTER :: para_env
2521 TYPE(neighbor_list_3c_type) :: nl_3c
2522 TYPE(neighbor_list_set_p_type), DIMENSION(:), &
2523 POINTER :: nl_2c_met, nl_2c_pot
2524 TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
2525 TYPE(qs_force_type), DIMENSION(:), POINTER :: force
2526 TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
2527 TYPE(virial_type), POINTER :: virial
2528
2529 !The idea is the following: we need to compute the gradients
2530 ! d/dx [P_ab P_cd (acP) S^-1_PQ (Q|R) S^-1_RS (Sbd)]
2531 ! Which we do in a few steps:
2532 ! 1) Contract the density matrices with the 3c integrals: M_acS = P_ab P_cd (Sbd)
2533 ! 2) Calculate the 3c contributions: d/dx (acP) [S^-1_PQ (Q|R) S^-1_RS M_acS]
2534 ! For maximum perf, we first multiply all 2c matrices together, than contract with retain_sparsity
2535 ! 3) Contract the 3c integrals and the M tensor together in order to only work with 2c quantities:
2536 ! R_PS = (acP) M_acS
2537 ! 4) From there, we can easily calculate the 2c contributions to the force:
2538 ! Potential: [S^-1*R*S^-1]_QR d/dx (Q|R)
2539 ! Metric: [S^-1*R*S^-1*(Q|R)*S^-1]_UV d/dx S_UV
2540
2541 NULLIFY (particle_set, virial, cell, force, atomic_kind_set, nl_2c_pot, nl_2c_met)
2542 NULLIFY (orb_basis, ri_basis, qs_kind_set, particle_set, dft_control, dbcsr_dist)
2543
2544 use_virial_prv = .false.
2545 IF (PRESENT(use_virial)) use_virial_prv = use_virial
2546
2547 do_resp = .false.
2548 IF (PRESENT(rho_ao_resp)) THEN
2549 IF (ASSOCIATED(rho_ao_resp(1)%matrix)) do_resp = .true.
2550 END IF
2551
2552 resp_only_prv = .false.
2553 IF (PRESENT(resp_only)) resp_only_prv = resp_only
2554
2555 unit_nr_dbcsr = ri_data%unit_nr_dbcsr
2556
2557 CALL get_qs_env(qs_env, natom=natom, particle_set=particle_set, nkind=nkind, &
2558 atomic_kind_set=atomic_kind_set, virial=virial, &
2559 cell=cell, force=force, para_env=para_env, dft_control=dft_control, &
2560 qs_kind_set=qs_kind_set, dbcsr_dist=dbcsr_dist)
2561
2562 CALL create_3c_tensor(t_3c_ao_ri_ao, dist1, dist2, dist3, ri_data%pgrid_1, &
2563 ri_data%bsizes_AO_split, ri_data%bsizes_RI_split, ri_data%bsizes_AO_split, &
2564 [1, 2], [3], name="(AO RI | AO)")
2565 DEALLOCATE (dist1, dist2, dist3)
2566
2567 CALL create_3c_tensor(t_3c_ri_ao_ao, dist1, dist2, dist3, ri_data%pgrid_2, &
2568 ri_data%bsizes_RI_split, ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
2569 [1], [2, 3], name="(RI | AO AO)")
2570 DEALLOCATE (dist1, dist2, dist3)
2571
2572 ALLOCATE (basis_set_ri(nkind), basis_set_ao(nkind))
2573 CALL basis_set_list_setup(basis_set_ri, ri_data%ri_basis_type, qs_kind_set)
2574 CALL get_particle_set(particle_set, qs_kind_set, basis=basis_set_ri)
2575 CALL basis_set_list_setup(basis_set_ao, ri_data%orb_basis_type, qs_kind_set)
2576 CALL get_particle_set(particle_set, qs_kind_set, basis=basis_set_ao)
2577
2578 DO ibasis = 1, SIZE(basis_set_ao)
2579 orb_basis => basis_set_ao(ibasis)%gto_basis_set
2580 CALL init_interaction_radii_orb_basis(orb_basis, ri_data%eps_pgf_orb)
2581 ri_basis => basis_set_ri(ibasis)%gto_basis_set
2582 CALL init_interaction_radii_orb_basis(ri_basis, ri_data%eps_pgf_orb)
2583 END DO
2584
2585 ! Precompute the derivatives
2586 ALLOCATE (t_2c_der_metric(3), t_2c_der_ri(3), t_3c_der_ao(3), t_3c_der_ri(3))
2587 IF (use_virial) THEN
2588 CALL precalc_derivatives(t_3c_der_ri_comp, t_3c_der_ao_comp, t_3c_der_ri_ind, t_3c_der_ao_ind, &
2589 t_2c_der_ri, t_2c_der_metric, t_3c_ri_ao_ao, &
2590 basis_set_ao, basis_set_ri, ri_data, qs_env, &
2591 nl_2c_pot=nl_2c_pot, nl_2c_met=nl_2c_met, &
2592 nl_3c_out=nl_3c, t_3c_virial=t_3c_virial)
2593
2594 ALLOCATE (col_bsize(natom), row_bsize(natom))
2595 col_bsize(:) = ri_data%bsizes_RI
2596 row_bsize(:) = ri_data%bsizes_RI
2597 CALL dbcsr_create(virial_trace, "virial_trace", dbcsr_dist, dbcsr_type_no_symmetry, row_bsize, col_bsize)
2598 CALL dbt_create(virial_trace, t_2c_virial)
2599 DEALLOCATE (col_bsize, row_bsize)
2600 ELSE
2601 CALL precalc_derivatives(t_3c_der_ri_comp, t_3c_der_ao_comp, t_3c_der_ri_ind, t_3c_der_ao_ind, &
2602 t_2c_der_ri, t_2c_der_metric, t_3c_ri_ao_ao, &
2603 basis_set_ao, basis_set_ri, ri_data, qs_env)
2604 END IF
2605
2606 ! Keep track of derivative sparsity to be able to use retain_sparsity in contraction
2607 CALL dbt_create(t_3c_ri_ao_ao, t_3c_sparse)
2608 DO i_xyz = 1, 3
2609 DO i_mem = 1, SIZE(t_3c_der_ri_comp, 1)
2610 CALL decompress_tensor(t_3c_ri_ao_ao, t_3c_der_ri_ind(i_mem, i_xyz)%ind, &
2611 t_3c_der_ri_comp(i_mem, i_xyz), ri_data%filter_eps_storage)
2612 CALL dbt_copy(t_3c_ri_ao_ao, t_3c_sparse, summation=.true., move_data=.true.)
2613
2614 CALL decompress_tensor(t_3c_ri_ao_ao, t_3c_der_ao_ind(i_mem, i_xyz)%ind, &
2615 t_3c_der_ao_comp(i_mem, i_xyz), ri_data%filter_eps_storage)
2616 CALL dbt_copy(t_3c_ri_ao_ao, t_3c_sparse, summation=.true.)
2617 CALL dbt_copy(t_3c_ri_ao_ao, t_3c_sparse, order=[1, 3, 2], summation=.true., move_data=.true.)
2618 END DO
2619 END DO
2620
2621 DO i_xyz = 1, 3
2622 CALL dbt_create(t_3c_ri_ao_ao, t_3c_der_ri(i_xyz))
2623 CALL dbt_create(t_3c_ri_ao_ao, t_3c_der_ao(i_xyz))
2624 END DO
2625
2626 ! Some utilities
2627 spin_fac = 0.5_dp
2628 IF (nspins == 2) spin_fac = 1.0_dp
2629 IF (PRESENT(rescale_factor)) spin_fac = spin_fac*rescale_factor
2630
2631 ALLOCATE (idx_to_at_ri(SIZE(ri_data%bsizes_RI_split)))
2632 CALL get_idx_to_atom(idx_to_at_ri, ri_data%bsizes_RI_split, ri_data%bsizes_RI)
2633
2634 ALLOCATE (idx_to_at_ao(SIZE(ri_data%bsizes_AO_split)))
2635 CALL get_idx_to_atom(idx_to_at_ao, ri_data%bsizes_AO_split, ri_data%bsizes_AO)
2636
2637 CALL get_atomic_kind_set(atomic_kind_set, kind_of=kind_of, atom_of_kind=atom_of_kind)
2638
2639 ! Go over batches of the 2 AO indices to save memory
2640 n_mem = ri_data%n_mem
2641 ALLOCATE (batch_start(n_mem), batch_end(n_mem))
2642 batch_start(:) = ri_data%starts_array_mem(:)
2643 batch_end(:) = ri_data%ends_array_mem(:)
2644
2645 ALLOCATE (batch_ranges(n_mem + 1))
2646 batch_ranges(:n_mem) = ri_data%starts_array_mem_block(:)
2647 batch_ranges(n_mem + 1) = ri_data%ends_array_mem_block(n_mem) + 1
2648
2649 n_mem_ri = ri_data%n_mem_RI
2650 ALLOCATE (batch_start_ri(n_mem_ri), batch_end_ri(n_mem_ri))
2651 batch_start_ri(:) = ri_data%starts_array_RI_mem(:)
2652 batch_end_ri(:) = ri_data%ends_array_RI_mem(:)
2653
2654 ALLOCATE (batch_ranges_ri(n_mem_ri + 1))
2655 batch_ranges_ri(:n_mem_ri) = ri_data%starts_array_RI_mem_block(:)
2656 batch_ranges_ri(n_mem_ri + 1) = ri_data%ends_array_RI_mem_block(n_mem_ri) + 1
2657
2658 ! Pre-create all the needed tensors
2659 CALL create_2c_tensor(rho_ao_1, dist1, dist2, ri_data%pgrid_2d, &
2660 ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
2661 name="(AO | AO)")
2662 DEALLOCATE (dist1, dist2)
2663 CALL dbt_create(rho_ao_1, rho_ao_2)
2664
2665 CALL create_2c_tensor(t_2c_ri, dist1, dist2, ri_data%pgrid_2d, &
2666 ri_data%bsizes_RI_split, ri_data%bsizes_RI_split, name="(RI | RI)")
2667 DEALLOCATE (dist1, dist2)
2668 CALL dbt_create(t_2c_ri, t_svs)
2669 CALL dbt_create(t_2c_ri, t_r)
2670 CALL dbt_create(t_2c_ri, t_2c_ri_tmp)
2671
2672 CALL dbt_create(t_3c_ao_ri_ao, t_3c_1)
2673 CALL dbt_create(t_3c_ao_ri_ao, t_3c_2)
2674 CALL dbt_create(t_3c_ri_ao_ao, t_3c_3)
2675 CALL dbt_create(t_3c_ri_ao_ao, t_3c_4)
2676 CALL dbt_create(t_3c_ri_ao_ao, t_3c_5)
2677 CALL dbt_create(t_3c_ri_ao_ao, t_3c_help_1)
2678 CALL dbt_create(t_3c_ri_ao_ao, t_3c_help_2)
2679
2680 CALL dbt_create(t_3c_ao_ri_ao, t_3c_int)
2681 CALL dbt_copy(ri_data%t_3c_int_ctr_2(1, 1), t_3c_int)
2682
2683 CALL dbt_create(t_3c_ri_ao_ao, t_3c_int_2)
2684
2685 CALL para_env%sync()
2686 t1 = m_walltime()
2687
2688 !Pre-calculate the necessary 2-center quantities
2689 IF (.NOT. ri_data%same_op) THEN
2690 !S^-1 * V * S^-1
2691 CALL dbt_contract(1.0_dp, ri_data%t_2c_inv(1, 1), ri_data%t_2c_pot(1, 1), 0.0_dp, t_2c_ri, &
2692 contract_1=[2], notcontract_1=[1], &
2693 contract_2=[1], notcontract_2=[2], &
2694 map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
2695 unit_nr=unit_nr_dbcsr, flop=nflop)
2696 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2697
2698 CALL dbt_contract(1.0_dp, t_2c_ri, ri_data%t_2c_inv(1, 1), 0.0_dp, t_svs, &
2699 contract_1=[2], notcontract_1=[1], &
2700 contract_2=[1], notcontract_2=[2], &
2701 map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
2702 unit_nr=unit_nr_dbcsr, flop=nflop)
2703 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2704 ELSE
2705 ! Simply V^-1
2706 CALL dbt_copy(ri_data%t_2c_inv(1, 1), t_svs)
2707 END IF
2708
2709 CALL dbt_batched_contract_init(t_3c_int, batch_range_1=batch_ranges, batch_range_3=batch_ranges)
2710 CALL dbt_batched_contract_init(t_3c_int_2, batch_range_1=batch_ranges_ri, &
2711 batch_range_2=batch_ranges, batch_range_3=batch_ranges)
2712 CALL dbt_batched_contract_init(t_3c_1, batch_range_1=batch_ranges, batch_range_3=batch_ranges)
2713 CALL dbt_batched_contract_init(t_3c_2, batch_range_1=batch_ranges, batch_range_3=batch_ranges)
2714 CALL dbt_batched_contract_init(t_3c_3, batch_range_1=batch_ranges_ri, &
2715 batch_range_2=batch_ranges, batch_range_3=batch_ranges)
2716 CALL dbt_batched_contract_init(t_3c_4, batch_range_1=batch_ranges_ri, &
2717 batch_range_2=batch_ranges, batch_range_3=batch_ranges)
2718 CALL dbt_batched_contract_init(t_3c_5, batch_range_1=batch_ranges_ri, &
2719 batch_range_2=batch_ranges, batch_range_3=batch_ranges)
2720 CALL dbt_batched_contract_init(t_3c_sparse, batch_range_1=batch_ranges_ri, &
2721 batch_range_2=batch_ranges, batch_range_3=batch_ranges)
2722
2723 DO i_spin = 1, nspins
2724
2725 !Prepare Pmat in tensor format
2726 CALL dbt_create(rho_ao(i_spin, 1)%matrix, t_2c_tmp)
2727 CALL dbt_copy_matrix_to_tensor(rho_ao(i_spin, 1)%matrix, t_2c_tmp)
2728 CALL dbt_copy(t_2c_tmp, rho_ao_1, move_data=.true.)
2729 CALL dbt_destroy(t_2c_tmp)
2730
2731 IF (.NOT. do_resp) THEN
2732 CALL dbt_copy(rho_ao_1, rho_ao_2)
2733 ELSE IF (do_resp .AND. resp_only_prv) THEN
2734
2735 CALL dbt_create(rho_ao_resp(i_spin)%matrix, t_2c_tmp)
2736 CALL dbt_copy_matrix_to_tensor(rho_ao_resp(i_spin)%matrix, t_2c_tmp)
2737 CALL dbt_copy(t_2c_tmp, rho_ao_2)
2738 !symmetry allows to take 2*P_resp rasther than explicitely take all cross products
2739 CALL dbt_copy(t_2c_tmp, rho_ao_2, summation=.true., move_data=.true.)
2740 CALL dbt_destroy(t_2c_tmp)
2741 ELSE
2742
2743 !if not resp_only, need P-P_resp and P+P_resp
2744 CALL dbt_copy(rho_ao_1, rho_ao_2)
2745 CALL dbcsr_create(dbcsr_tmp, template=rho_ao_resp(i_spin)%matrix)
2746 CALL dbcsr_add(dbcsr_tmp, rho_ao_resp(i_spin)%matrix, 0.0_dp, -1.0_dp)
2747 CALL dbt_create(dbcsr_tmp, t_2c_tmp)
2748 CALL dbt_copy_matrix_to_tensor(dbcsr_tmp, t_2c_tmp)
2749 CALL dbt_copy(t_2c_tmp, rho_ao_1, summation=.true., move_data=.true.)
2750 CALL dbcsr_release(dbcsr_tmp)
2751
2752 CALL dbt_copy_matrix_to_tensor(rho_ao_resp(i_spin)%matrix, t_2c_tmp)
2753 CALL dbt_copy(t_2c_tmp, rho_ao_2, summation=.true., move_data=.true.)
2754 CALL dbt_destroy(t_2c_tmp)
2755
2756 END IF
2757 work_virial = 0.0_dp
2758
2759 CALL timeset(routinen//"_3c", handle)
2760 !Start looping of the batches
2761 DO i_mem = 1, n_mem
2762 ibounds(:, 1) = [batch_start(i_mem), batch_end(i_mem)]
2763
2764 CALL dbt_batched_contract_init(rho_ao_1)
2765 CALL dbt_contract(1.0_dp, rho_ao_1, t_3c_int, 0.0_dp, t_3c_1, &
2766 contract_1=[1], notcontract_1=[2], &
2767 contract_2=[3], notcontract_2=[1, 2], &
2768 map_1=[3], map_2=[1, 2], filter_eps=ri_data%filter_eps, &
2769 bounds_2=ibounds, unit_nr=unit_nr_dbcsr, flop=nflop)
2770 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2771 CALL dbt_batched_contract_finalize(rho_ao_1)
2772
2773 CALL dbt_copy(t_3c_1, t_3c_2, order=[3, 2, 1], move_data=.true.)
2774
2775 DO j_mem = 1, n_mem
2776 jbounds(:, 1) = [batch_start(j_mem), batch_end(j_mem)]
2777
2778 CALL dbt_batched_contract_init(rho_ao_2)
2779 CALL dbt_contract(1.0_dp, rho_ao_2, t_3c_2, 0.0_dp, t_3c_1, &
2780 contract_1=[1], notcontract_1=[2], &
2781 contract_2=[3], notcontract_2=[1, 2], &
2782 map_1=[3], map_2=[1, 2], filter_eps=ri_data%filter_eps, &
2783 bounds_2=jbounds, unit_nr=unit_nr_dbcsr, flop=nflop)
2784 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2785 CALL dbt_batched_contract_finalize(rho_ao_2)
2786
2787 bounds_cpy(:, 1) = [batch_start(i_mem), batch_end(i_mem)]
2788 bounds_cpy(:, 2) = [1, sum(ri_data%bsizes_RI)]
2789 bounds_cpy(:, 3) = [batch_start(j_mem), batch_end(j_mem)]
2790 CALL dbt_copy(t_3c_int, t_3c_int_2, order=[2, 1, 3], bounds=bounds_cpy)
2791 CALL dbt_copy(t_3c_1, t_3c_3, order=[2, 1, 3], move_data=.true.)
2792
2793 DO k_mem = 1, n_mem_ri
2794 kbounds(:, 1) = [batch_start_ri(k_mem), batch_end_ri(k_mem)]
2795
2796 bounds_cpy(:, 1) = [batch_start_ri(k_mem), batch_end_ri(k_mem)]
2797 bounds_cpy(:, 2) = [batch_start(i_mem), batch_end(i_mem)]
2798 bounds_cpy(:, 3) = [batch_start(j_mem), batch_end(j_mem)]
2799 CALL dbt_copy(t_3c_sparse, t_3c_4, bounds=bounds_cpy)
2800
2801 !Contract with the 2-center product S^-1 * V * S^-1 while keeping sparsity of derivatives
2802 CALL dbt_batched_contract_init(t_svs)
2803 CALL dbt_contract(1.0_dp, t_svs, t_3c_3, 0.0_dp, t_3c_4, &
2804 contract_1=[2], notcontract_1=[1], &
2805 contract_2=[1], notcontract_2=[2, 3], &
2806 map_1=[1], map_2=[2, 3], filter_eps=ri_data%filter_eps, &
2807 retain_sparsity=.true., unit_nr=unit_nr_dbcsr, flop=nflop)
2808 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2809 CALL dbt_batched_contract_finalize(t_svs)
2810
2811 CALL dbt_copy(t_3c_4, t_3c_5, summation=.true., move_data=.true.)
2812
2813 ijbounds(:, 1) = ibounds(:, 1)
2814 ijbounds(:, 2) = jbounds(:, 1)
2815
2816 !Contract R_PS = (acP) M_acS
2817 CALL dbt_batched_contract_init(t_r)
2818 CALL dbt_contract(1.0_dp, t_3c_int_2, t_3c_3, 1.0_dp, t_r, &
2819 contract_1=[2, 3], notcontract_1=[1], &
2820 contract_2=[2, 3], notcontract_2=[1], &
2821 map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
2822 bounds_1=ijbounds, bounds_3=kbounds, &
2823 unit_nr=unit_nr_dbcsr, flop=nflop)
2824 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2825 CALL dbt_batched_contract_finalize(t_r)
2826
2827 END DO !k_mem
2828 END DO !j_mem
2829
2830 CALL dbt_copy(t_3c_5, t_3c_help_1, move_data=.true.)
2831
2832 !The force from the 3c derivatives
2833 pref = -0.5_dp*2.0_dp*hf_fraction*spin_fac
2834
2835 DO k_mem = 1, SIZE(t_3c_der_ri_comp, 1)
2836 DO i_xyz = 1, 3
2837 CALL dbt_clear(t_3c_der_ri(i_xyz))
2838 CALL decompress_tensor(t_3c_der_ri(i_xyz), t_3c_der_ri_ind(k_mem, i_xyz)%ind, &
2839 t_3c_der_ri_comp(k_mem, i_xyz), ri_data%filter_eps_storage)
2840 END DO
2841 CALL get_force_from_3c_trace(force, t_3c_help_1, t_3c_der_ri, atom_of_kind, kind_of, &
2842 idx_to_at_ri, pref)
2843 END DO
2844
2845 pref = -0.5_dp*4.0_dp*hf_fraction*spin_fac
2846 IF (do_resp) THEN
2847 pref = 0.5_dp*pref
2848 CALL dbt_copy(t_3c_help_1, t_3c_help_2, order=[1, 3, 2])
2849 END IF
2850
2851 DO k_mem = 1, SIZE(t_3c_der_ao_comp, 1)
2852 DO i_xyz = 1, 3
2853 CALL dbt_clear(t_3c_der_ao(i_xyz))
2854 CALL decompress_tensor(t_3c_der_ao(i_xyz), t_3c_der_ao_ind(k_mem, i_xyz)%ind, &
2855 t_3c_der_ao_comp(k_mem, i_xyz), ri_data%filter_eps_storage)
2856 END DO
2857 CALL get_force_from_3c_trace(force, t_3c_help_1, t_3c_der_ao, atom_of_kind, kind_of, &
2858 idx_to_at_ao, pref, deriv_dim=2)
2859
2860 IF (do_resp) THEN
2861 CALL get_force_from_3c_trace(force, t_3c_help_2, t_3c_der_ao, atom_of_kind, kind_of, &
2862 idx_to_at_ao, pref, deriv_dim=2)
2863 END IF
2864 END DO
2865
2866 !The 3c virial contribution. Note: only fraction of integrals correspondig to i_mem calculated
2867 IF (use_virial) THEN
2868 pref = -0.5_dp*2.0_dp*hf_fraction*spin_fac
2869 CALL dbt_copy(t_3c_help_1, t_3c_virial, move_data=.true.)
2870 CALL calc_3c_virial(work_virial, t_3c_virial, pref, qs_env, nl_3c, basis_set_ri, &
2871 basis_set_ao, basis_set_ao, ri_data%ri_metric, &
2872 der_eps=ri_data%eps_schwarz_forces, op_pos=1)
2873
2874 CALL dbt_clear(t_3c_virial)
2875 END IF
2876
2877 CALL dbt_clear(t_3c_help_1)
2878 CALL dbt_clear(t_3c_help_2)
2879 END DO !i_mem
2880 CALL timestop(handle)
2881
2882 CALL timeset(routinen//"_2c", handle)
2883 !Now we deal with all the 2-center quantities
2884 !Calculate S^-1 * R * S^-1
2885 CALL dbt_contract(1.0_dp, ri_data%t_2c_inv(1, 1), t_r, 0.0_dp, t_2c_ri_tmp, &
2886 contract_1=[2], notcontract_1=[1], &
2887 contract_2=[1], notcontract_2=[2], &
2888 map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
2889 unit_nr=unit_nr_dbcsr, flop=nflop)
2890 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2891
2892 CALL dbt_contract(1.0_dp, t_2c_ri_tmp, ri_data%t_2c_inv(1, 1), 0.0_dp, t_2c_ri, &
2893 contract_1=[2], notcontract_1=[1], &
2894 contract_2=[1], notcontract_2=[2], &
2895 map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
2896 unit_nr=unit_nr_dbcsr, flop=nflop)
2897 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2898
2899 !Calculate the potential contribution to the force: [S^-1*R*S^-1]_QR d/dx (Q|R)
2900 pref = 0.5_dp*hf_fraction*spin_fac
2901 IF (.NOT. ri_data%same_op) pref = -pref
2902 CALL get_2c_der_force(force, t_2c_ri, t_2c_der_ri, atom_of_kind, kind_of, idx_to_at_ri, pref)
2903
2904 !Calculate the contribution to the virial on the fly
2905 IF (use_virial_prv) THEN
2906 CALL dbt_copy(t_2c_ri, t_2c_virial)
2907 CALL dbt_copy_tensor_to_matrix(t_2c_virial, virial_trace)
2908 CALL calc_2c_virial(work_virial, virial_trace, pref, qs_env, nl_2c_pot, &
2909 basis_set_ri, basis_set_ri, ri_data%hfx_pot)
2910 END IF
2911
2912 !And that from the metric: [S^-1*R*S^-1*(Q|R)*S^-1]_UV d/dx S_UV
2913 IF (.NOT. ri_data%same_op) THEN
2914 CALL dbt_contract(1.0_dp, t_2c_ri, ri_data%t_2c_pot(1, 1), 0.0_dp, t_2c_ri_tmp, &
2915 contract_1=[2], notcontract_1=[1], &
2916 contract_2=[1], notcontract_2=[2], &
2917 map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
2918 unit_nr=unit_nr_dbcsr, flop=nflop)
2919 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2920
2921 CALL dbt_contract(1.0_dp, t_2c_ri_tmp, ri_data%t_2c_inv(1, 1), 0.0_dp, t_2c_ri, &
2922 contract_1=[2], notcontract_1=[1], &
2923 contract_2=[1], notcontract_2=[2], &
2924 map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
2925 unit_nr=unit_nr_dbcsr, flop=nflop)
2926 ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2927
2928 pref = 0.5_dp*2.0_dp*hf_fraction*spin_fac
2929 CALL get_2c_der_force(force, t_2c_ri, t_2c_der_metric, atom_of_kind, kind_of, idx_to_at_ri, pref)
2930
2931 IF (use_virial_prv) THEN
2932 CALL dbt_copy(t_2c_ri, t_2c_virial)
2933 CALL dbt_copy_tensor_to_matrix(t_2c_virial, virial_trace)
2934 CALL calc_2c_virial(work_virial, virial_trace, pref, qs_env, nl_2c_met, &
2935 basis_set_ri, basis_set_ri, ri_data%ri_metric)
2936 END IF
2937 END IF
2938 CALL dbt_clear(t_2c_ri)
2939 CALL dbt_clear(t_2c_ri_tmp)
2940 CALL dbt_clear(t_r)
2941 CALL dbt_clear(t_3c_help_1)
2942 CALL dbt_clear(t_3c_help_2)
2943 CALL timestop(handle)
2944
2945 IF (use_virial_prv) THEN
2946 DO k_xyz = 1, 3
2947 DO j_xyz = 1, 3
2948 DO i_xyz = 1, 3
2949 virial%pv_fock_4c(i_xyz, j_xyz) = virial%pv_fock_4c(i_xyz, j_xyz) &
2950 + work_virial(i_xyz, k_xyz)*cell%hmat(j_xyz, k_xyz)
2951 END DO
2952 END DO
2953 END DO
2954 END IF
2955 END DO !i_spin
2956
2957 CALL dbt_batched_contract_finalize(t_3c_int)
2958 CALL dbt_batched_contract_finalize(t_3c_int_2)
2959 CALL dbt_batched_contract_finalize(t_3c_1)
2960 CALL dbt_batched_contract_finalize(t_3c_2)
2961 CALL dbt_batched_contract_finalize(t_3c_3)
2962 CALL dbt_batched_contract_finalize(t_3c_4)
2963 CALL dbt_batched_contract_finalize(t_3c_5)
2964 CALL dbt_batched_contract_finalize(t_3c_sparse)
2965
2966 CALL para_env%sync()
2967 t2 = m_walltime()
2968
2969 CALL dbt_copy(t_3c_int, ri_data%t_3c_int_ctr_2(1, 1), move_data=.true.)
2970
2971 !clean-up
2972 CALL dbt_destroy(rho_ao_1)
2973 CALL dbt_destroy(rho_ao_2)
2974 CALL dbt_destroy(t_3c_ao_ri_ao)
2975 CALL dbt_destroy(t_3c_ri_ao_ao)
2976 CALL dbt_destroy(t_3c_int)
2977 CALL dbt_destroy(t_3c_int_2)
2978 CALL dbt_destroy(t_3c_1)
2979 CALL dbt_destroy(t_3c_2)
2980 CALL dbt_destroy(t_3c_3)
2981 CALL dbt_destroy(t_3c_4)
2982 CALL dbt_destroy(t_3c_5)
2983 CALL dbt_destroy(t_3c_help_1)
2984 CALL dbt_destroy(t_3c_help_2)
2985 CALL dbt_destroy(t_3c_sparse)
2986 CALL dbt_destroy(t_svs)
2987 CALL dbt_destroy(t_r)
2988 CALL dbt_destroy(t_2c_ri)
2989 CALL dbt_destroy(t_2c_ri_tmp)
2990
2991 DO i_xyz = 1, 3
2992 CALL dbt_destroy(t_3c_der_ri(i_xyz))
2993 CALL dbt_destroy(t_3c_der_ao(i_xyz))
2994 CALL dbt_destroy(t_2c_der_ri(i_xyz))
2995 IF (.NOT. ri_data%same_op) CALL dbt_destroy(t_2c_der_metric(i_xyz))
2996 END DO
2997
2998 DO i_xyz = 1, 3
2999 DO i_mem = 1, SIZE(t_3c_der_ao_comp, 1)
3000 CALL dealloc_containers(t_3c_der_ao_comp(i_mem, i_xyz), dummy_int)
3001 CALL dealloc_containers(t_3c_der_ri_comp(i_mem, i_xyz), dummy_int)
3002 END DO
3003 END DO
3004 DEALLOCATE (t_3c_der_ao_ind, t_3c_der_ri_ind)
3005
3006 DO ibasis = 1, SIZE(basis_set_ao)
3007 orb_basis => basis_set_ao(ibasis)%gto_basis_set
3008 ri_basis => basis_set_ri(ibasis)%gto_basis_set
3009 CALL init_interaction_radii_orb_basis(orb_basis, dft_control%qs_control%eps_pgf_orb)
3010 CALL init_interaction_radii_orb_basis(ri_basis, dft_control%qs_control%eps_pgf_orb)
3011 END DO
3012
3013 IF (use_virial) THEN
3014 CALL release_neighbor_list_sets(nl_2c_met)
3015 CALL release_neighbor_list_sets(nl_2c_pot)
3016 CALL neighbor_list_3c_destroy(nl_3c)
3017 CALL dbcsr_release(virial_trace)
3018 CALL dbt_destroy(t_2c_virial)
3019 CALL dbt_destroy(t_3c_virial)
3020 END IF
3021
3022 END SUBROUTINE hfx_ri_forces_pmat
3023
3024! **************************************************************************************************
3025!> \brief the general routine that calls the relevant force code
3026!> \param qs_env ...
3027!> \param ri_data ...
3028!> \param nspins ...
3029!> \param hf_fraction ...
3030!> \param rho_ao ...
3031!> \param rho_ao_resp ...
3032!> \param mos ...
3033!> \param use_virial ...
3034!> \param resp_only ...
3035!> \param rescale_factor ...
3036! **************************************************************************************************
3037 SUBROUTINE hfx_ri_update_forces(qs_env, ri_data, nspins, hf_fraction, rho_ao, rho_ao_resp, &
3038 mos, use_virial, resp_only, rescale_factor)
3039
3040 TYPE(qs_environment_type), POINTER :: qs_env
3041 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
3042 INTEGER, INTENT(IN) :: nspins
3043 REAL(kind=dp), INTENT(IN) :: hf_fraction
3044 TYPE(dbcsr_p_type), DIMENSION(:, :), OPTIONAL :: rho_ao
3045 TYPE(dbcsr_p_type), DIMENSION(:), OPTIONAL :: rho_ao_resp
3046 TYPE(mo_set_type), DIMENSION(:), INTENT(IN), &
3047 OPTIONAL :: mos
3048 LOGICAL, INTENT(IN), OPTIONAL :: use_virial, resp_only
3049 REAL(dp), INTENT(IN), OPTIONAL :: rescale_factor
3050
3051 CHARACTER(LEN=*), PARAMETER :: routinen = 'hfx_ri_update_forces'
3052
3053 INTEGER :: handle, ispin
3054 INTEGER, DIMENSION(2) :: homo
3055 REAL(kind=dp), DIMENSION(:), POINTER :: mo_eigenvalues
3056 TYPE(cp_fm_type), POINTER :: mo_coeff
3057 TYPE(dbcsr_type), DIMENSION(2) :: mo_coeff_b
3058 TYPE(dbcsr_type), POINTER :: mo_coeff_b_tmp
3059
3060 CALL timeset(routinen, handle)
3061
3062 SELECT CASE (ri_data%flavor)
3063 CASE (ri_mo)
3064
3065 DO ispin = 1, nspins
3066 NULLIFY (mo_coeff_b_tmp)
3067 cpassert(mos(ispin)%uniform_occupation)
3068 CALL get_mo_set(mo_set=mos(ispin), mo_coeff=mo_coeff, eigenvalues=mo_eigenvalues, mo_coeff_b=mo_coeff_b_tmp)
3069
3070 IF (.NOT. mos(ispin)%use_mo_coeff_b) CALL copy_fm_to_dbcsr(mo_coeff, mo_coeff_b_tmp)
3071 CALL dbcsr_copy(mo_coeff_b(ispin), mo_coeff_b_tmp)
3072 END DO
3073
3074 DO ispin = 1, nspins
3075 CALL dbcsr_scale(mo_coeff_b(ispin), sqrt(mos(ispin)%maxocc))
3076 homo(ispin) = mos(ispin)%homo
3077 END DO
3078
3079 CALL hfx_ri_forces_mo(qs_env, ri_data, nspins, hf_fraction, mo_coeff_b, use_virial)
3080
3081 CASE (ri_pmat)
3082
3083 CALL hfx_ri_forces_pmat(qs_env, ri_data, nspins, hf_fraction, rho_ao, rho_ao_resp, use_virial, &
3084 resp_only, rescale_factor)
3085 END SELECT
3086
3087 DO ispin = 1, nspins
3088 CALL dbcsr_release(mo_coeff_b(ispin))
3089 END DO
3090
3091 CALL timestop(handle)
3092
3093 END SUBROUTINE hfx_ri_update_forces
3094
3095! **************************************************************************************************
3096!> \brief Calculate the derivatives tensors for the force, in a format fit for contractions
3097!> \param t_3c_der_RI_comp compressed RI derivatives
3098!> \param t_3c_der_AO_comp compressed AO derivatives
3099!> \param t_3c_der_RI_ind ...
3100!> \param t_3c_der_AO_ind ...
3101!> \param t_2c_der_RI format based on standard atomic block sizes
3102!> \param t_2c_der_metric format based on standard atomic block sizes
3103!> \param ri_ao_ao_template ...
3104!> \param basis_set_AO ...
3105!> \param basis_set_RI ...
3106!> \param ri_data ...
3107!> \param qs_env ...
3108!> \param nl_2c_pot ...
3109!> \param nl_2c_met ...
3110!> \param nl_3c_out ...
3111!> \param t_3c_virial ...
3112! **************************************************************************************************
3113 SUBROUTINE precalc_derivatives(t_3c_der_RI_comp, t_3c_der_AO_comp, t_3c_der_RI_ind, t_3c_der_AO_ind, &
3114 t_2c_der_RI, t_2c_der_metric, ri_ao_ao_template, &
3115 basis_set_AO, basis_set_RI, ri_data, qs_env, &
3116 nl_2c_pot, nl_2c_met, nl_3c_out, t_3c_virial)
3117
3118 TYPE(hfx_compression_type), ALLOCATABLE, &
3119 DIMENSION(:, :), INTENT(INOUT) :: t_3c_der_ri_comp, t_3c_der_ao_comp
3120 TYPE(block_ind_type), ALLOCATABLE, DIMENSION(:, :) :: t_3c_der_ri_ind, t_3c_der_ao_ind
3121 TYPE(dbt_type), DIMENSION(3), INTENT(OUT) :: t_2c_der_ri, t_2c_der_metric
3122 TYPE(dbt_type), INTENT(INOUT) :: ri_ao_ao_template
3123 TYPE(gto_basis_set_p_type), ALLOCATABLE, &
3124 DIMENSION(:), TARGET :: basis_set_ao, basis_set_ri
3125 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
3126 TYPE(qs_environment_type), POINTER :: qs_env
3127 TYPE(neighbor_list_set_p_type), DIMENSION(:), &
3128 OPTIONAL, POINTER :: nl_2c_pot, nl_2c_met
3129 TYPE(neighbor_list_3c_type), OPTIONAL :: nl_3c_out
3130 TYPE(dbt_type), INTENT(INOUT), OPTIONAL :: t_3c_virial
3131
3132 CHARACTER(LEN=*), PARAMETER :: routinen = 'precalc_derivatives'
3133
3134 INTEGER :: handle, i_mem, i_xyz, n_mem, natom, &
3135 nkind, nthreads
3136 INTEGER(int_8) :: nze, nze_tot
3137 INTEGER, ALLOCATABLE, DIMENSION(:) :: dist1, dist2, dist_ao_1, dist_ao_2, &
3138 dist_ri, dummy_end, dummy_start, &
3139 end_blocks, start_blocks
3140 INTEGER, DIMENSION(3) :: pcoord, pdims
3141 INTEGER, DIMENSION(:), POINTER :: col_bsize, row_bsize
3142 REAL(dp) :: compression_factor, memory, occ
3143 TYPE(dbcsr_distribution_type) :: dbcsr_dist
3144 TYPE(dbcsr_type), DIMENSION(1, 3) :: t_2c_der_metric_prv, t_2c_der_ri_prv
3145 TYPE(dbt_pgrid_type) :: pgrid
3146 TYPE(dbt_type) :: t_2c_template, t_2c_tmp, t_3c_template
3147 TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :, :) :: t_3c_der_ao_prv, t_3c_der_ri_prv
3148 TYPE(dft_control_type), POINTER :: dft_control
3149 TYPE(distribution_2d_type), POINTER :: dist_2d
3150 TYPE(distribution_3d_type) :: dist_3d, dist_3d_out
3151 TYPE(mp_cart_type) :: mp_comm_t3c, mp_comm_t3c_out
3152 TYPE(mp_para_env_type), POINTER :: para_env
3153 TYPE(neighbor_list_3c_type) :: nl_3c
3154 TYPE(neighbor_list_set_p_type), DIMENSION(:), &
3155 POINTER :: nl_2c
3156 TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
3157 TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
3158
3159 NULLIFY (qs_kind_set, dist_2d, nl_2c, particle_set, dft_control, para_env)
3160
3161 CALL timeset(routinen, handle)
3162
3163 CALL get_qs_env(qs_env, nkind=nkind, qs_kind_set=qs_kind_set, distribution_2d=dist_2d, natom=natom, &
3164 particle_set=particle_set, dft_control=dft_control, para_env=para_env)
3165
3166 !TODO: is such a pgrid correct?
3167 !Dealing with the 3c derivatives
3168 nthreads = 1
3169!$ nthreads = omp_get_num_threads()
3170 pdims = 0
3171 CALL dbt_pgrid_create(para_env, pdims, pgrid, tensor_dims=[max(1, natom/(ri_data%n_mem*nthreads)), natom, natom])
3172
3173 CALL create_3c_tensor(t_3c_template, dist_ri, dist_ao_1, dist_ao_2, pgrid, &
3174 ri_data%bsizes_RI, ri_data%bsizes_AO, ri_data%bsizes_AO, &
3175 map1=[1], map2=[2, 3], &
3176 name="der (RI AO | AO)")
3177
3178 ALLOCATE (t_3c_der_ao_prv(1, 1, 3), t_3c_der_ri_prv(1, 1, 3))
3179 DO i_xyz = 1, 3
3180 CALL dbt_create(t_3c_template, t_3c_der_ri_prv(1, 1, i_xyz))
3181 CALL dbt_create(t_3c_template, t_3c_der_ao_prv(1, 1, i_xyz))
3182 END DO
3183 IF (PRESENT(t_3c_virial)) THEN
3184 CALL dbt_create(t_3c_template, t_3c_virial)
3185 END IF
3186 CALL dbt_destroy(t_3c_template)
3187
3188 CALL dbt_mp_environ_pgrid(pgrid, pdims, pcoord)
3189 CALL mp_comm_t3c%create(pgrid%mp_comm_2d, 3, pdims)
3190 CALL distribution_3d_create(dist_3d, dist_ri, dist_ao_1, dist_ao_2, &
3191 nkind, particle_set, mp_comm_t3c, own_comm=.true.)
3192
3193 CALL build_3c_neighbor_lists(nl_3c, basis_set_ri, basis_set_ao, basis_set_ao, dist_3d, ri_data%ri_metric, &
3194 "HFX_3c_nl", qs_env, op_pos=1, sym_jk=.true., own_dist=.true.)
3195
3196 IF (PRESENT(nl_3c_out)) THEN
3197 CALL dbt_mp_environ_pgrid(pgrid, pdims, pcoord)
3198 CALL mp_comm_t3c_out%create(pgrid%mp_comm_2d, 3, pdims)
3199 CALL distribution_3d_create(dist_3d_out, dist_ri, dist_ao_1, dist_ao_2, &
3200 nkind, particle_set, mp_comm_t3c_out, own_comm=.true.)
3201 CALL build_3c_neighbor_lists(nl_3c_out, basis_set_ri, basis_set_ao, basis_set_ao, dist_3d_out, &
3202 ri_data%ri_metric, "HFX_3c_nl", qs_env, op_pos=1, sym_jk=.false., &
3203 own_dist=.true.)
3204 END IF
3205 DEALLOCATE (dist_ri, dist_ao_1, dist_ao_2)
3206 CALL dbt_pgrid_destroy(pgrid)
3207
3208 n_mem = ri_data%n_mem
3209 CALL create_tensor_batches(ri_data%bsizes_RI, n_mem, dummy_start, dummy_end, &
3210 start_blocks, end_blocks)
3211 DEALLOCATE (dummy_start, dummy_end)
3212
3213 ALLOCATE (t_3c_der_ao_comp(n_mem, 3), t_3c_der_ri_comp(n_mem, 3))
3214 ALLOCATE (t_3c_der_ao_ind(n_mem, 3), t_3c_der_ri_ind(n_mem, 3))
3215
3216 memory = 0.0_dp
3217 nze_tot = 0
3218 DO i_mem = 1, n_mem
3219 CALL build_3c_derivatives(t_3c_der_ri_prv, t_3c_der_ao_prv, ri_data%filter_eps, qs_env, &
3220 nl_3c, basis_set_ri, basis_set_ao, basis_set_ao, &
3221 ri_data%ri_metric, der_eps=ri_data%eps_schwarz_forces, op_pos=1, &
3222 bounds_i=[start_blocks(i_mem), end_blocks(i_mem)])
3223
3224 DO i_xyz = 1, 3
3225 CALL dbt_copy(t_3c_der_ri_prv(1, 1, i_xyz), ri_ao_ao_template, move_data=.true.)
3226 CALL dbt_filter(ri_ao_ao_template, ri_data%filter_eps)
3227 CALL get_tensor_occupancy(ri_ao_ao_template, nze, occ)
3228 nze_tot = nze_tot + nze
3229
3230 CALL alloc_containers(t_3c_der_ri_comp(i_mem, i_xyz), 1)
3231 CALL compress_tensor(ri_ao_ao_template, t_3c_der_ri_ind(i_mem, i_xyz)%ind, &
3232 t_3c_der_ri_comp(i_mem, i_xyz), ri_data%filter_eps_storage, memory)
3233 CALL dbt_clear(ri_ao_ao_template)
3234
3235 !put AO derivative as middle index
3236 CALL dbt_copy(t_3c_der_ao_prv(1, 1, i_xyz), ri_ao_ao_template, order=[1, 3, 2], move_data=.true.)
3237 CALL dbt_filter(ri_ao_ao_template, ri_data%filter_eps)
3238 CALL get_tensor_occupancy(ri_ao_ao_template, nze, occ)
3239 nze_tot = nze_tot + nze
3240
3241 CALL alloc_containers(t_3c_der_ao_comp(i_mem, i_xyz), 1)
3242 CALL compress_tensor(ri_ao_ao_template, t_3c_der_ao_ind(i_mem, i_xyz)%ind, &
3243 t_3c_der_ao_comp(i_mem, i_xyz), ri_data%filter_eps_storage, memory)
3244 CALL dbt_clear(ri_ao_ao_template)
3245 END DO
3246 END DO
3247
3248 CALL neighbor_list_3c_destroy(nl_3c)
3249 DO i_xyz = 1, 3
3250 CALL dbt_destroy(t_3c_der_ri_prv(1, 1, i_xyz))
3251 CALL dbt_destroy(t_3c_der_ao_prv(1, 1, i_xyz))
3252 END DO
3253
3254 CALL para_env%sum(memory)
3255 compression_factor = real(nze_tot, dp)*1.0e-06*8.0_dp/memory
3256 IF (ri_data%unit_nr > 0) THEN
3257 WRITE (unit=ri_data%unit_nr, fmt="((T3,A,T66,F11.2,A4))") &
3258 "MEMORY_INFO| Memory for 3-center HFX derivatives (compressed):", memory, ' MiB'
3259
3260 WRITE (unit=ri_data%unit_nr, fmt="((T3,A,T60,F21.2))") &
3261 "MEMORY_INFO| Compression factor: ", compression_factor
3262 END IF
3263
3264 !Deal with the 2-center derivatives
3265 CALL cp_dbcsr_dist2d_to_dist(dist_2d, dbcsr_dist)
3266 ALLOCATE (row_bsize(SIZE(ri_data%bsizes_RI)))
3267 ALLOCATE (col_bsize(SIZE(ri_data%bsizes_RI)))
3268 row_bsize(:) = ri_data%bsizes_RI
3269 col_bsize(:) = ri_data%bsizes_RI
3270
3271 CALL build_2c_neighbor_lists(nl_2c, basis_set_ri, basis_set_ri, ri_data%hfx_pot, &
3272 "HFX_2c_nl_pot", qs_env, sym_ij=.true., dist_2d=dist_2d)
3273
3274 DO i_xyz = 1, 3
3275 CALL dbcsr_create(t_2c_der_ri_prv(1, i_xyz), "(R|P) HFX der", dbcsr_dist, &
3276 dbcsr_type_antisymmetric, row_bsize, col_bsize)
3277 END DO
3278
3279 CALL build_2c_derivatives(t_2c_der_ri_prv, ri_data%filter_eps_2c, qs_env, nl_2c, basis_set_ri, &
3280 basis_set_ri, ri_data%hfx_pot)
3281 CALL release_neighbor_list_sets(nl_2c)
3282
3283 IF (PRESENT(nl_2c_pot)) THEN
3284 NULLIFY (nl_2c_pot)
3285 CALL build_2c_neighbor_lists(nl_2c_pot, basis_set_ri, basis_set_ri, ri_data%hfx_pot, &
3286 "HFX_2c_nl_pot", qs_env, sym_ij=.false., dist_2d=dist_2d)
3287 END IF
3288
3289 !copy 2c derivative tensor into the standard format
3290 CALL create_2c_tensor(t_2c_template, dist1, dist2, ri_data%pgrid_2d, ri_data%bsizes_RI_split, &
3291 ri_data%bsizes_RI_split, name='(RI| RI)')
3292 DEALLOCATE (dist1, dist2)
3293
3294 DO i_xyz = 1, 3
3295 CALL dbt_create(t_2c_der_ri_prv(1, i_xyz), t_2c_tmp)
3296 CALL dbt_copy_matrix_to_tensor(t_2c_der_ri_prv(1, i_xyz), t_2c_tmp)
3297
3298 CALL dbt_create(t_2c_template, t_2c_der_ri(i_xyz))
3299 CALL dbt_copy(t_2c_tmp, t_2c_der_ri(i_xyz), move_data=.true.)
3300
3301 CALL dbt_destroy(t_2c_tmp)
3302 CALL dbcsr_release(t_2c_der_ri_prv(1, i_xyz))
3303 END DO
3304
3305 !Repeat with the metric, if required
3306 IF (.NOT. ri_data%same_op) THEN
3307
3308 CALL build_2c_neighbor_lists(nl_2c, basis_set_ri, basis_set_ri, ri_data%ri_metric, &
3309 "HFX_2c_nl_RI", qs_env, sym_ij=.true., dist_2d=dist_2d)
3310
3311 DO i_xyz = 1, 3
3312 CALL dbcsr_create(t_2c_der_metric_prv(1, i_xyz), "(R|P) HFX der", dbcsr_dist, &
3313 dbcsr_type_antisymmetric, row_bsize, col_bsize)
3314 END DO
3315
3316 CALL build_2c_derivatives(t_2c_der_metric_prv, ri_data%filter_eps_2c, qs_env, nl_2c, &
3317 basis_set_ri, basis_set_ri, ri_data%ri_metric)
3318 CALL release_neighbor_list_sets(nl_2c)
3319
3320 IF (PRESENT(nl_2c_met)) THEN
3321 NULLIFY (nl_2c_met)
3322 CALL build_2c_neighbor_lists(nl_2c_met, basis_set_ri, basis_set_ri, ri_data%ri_metric, &
3323 "HFX_2c_nl_RI", qs_env, sym_ij=.false., dist_2d=dist_2d)
3324 END IF
3325
3326 DO i_xyz = 1, 3
3327 CALL dbt_create(t_2c_der_metric_prv(1, i_xyz), t_2c_tmp)
3328 CALL dbt_copy_matrix_to_tensor(t_2c_der_metric_prv(1, i_xyz), t_2c_tmp)
3329
3330 CALL dbt_create(t_2c_template, t_2c_der_metric(i_xyz))
3331 CALL dbt_copy(t_2c_tmp, t_2c_der_metric(i_xyz), move_data=.true.)
3332
3333 CALL dbt_destroy(t_2c_tmp)
3334 CALL dbcsr_release(t_2c_der_metric_prv(1, i_xyz))
3335 END DO
3336
3337 END IF
3338
3339 CALL dbt_destroy(t_2c_template)
3340 CALL dbcsr_distribution_release(dbcsr_dist)
3341 DEALLOCATE (row_bsize, col_bsize)
3342
3343 CALL timestop(handle)
3344
3345 END SUBROUTINE precalc_derivatives
3346
3347! **************************************************************************************************
3348!> \brief This routines calculates the force contribution from a trace over 3D tensors, i.e.
3349!> force = sum_ijk A_ijk B_ijk. An iteration over the blocks is made, which index determin
3350!> the atom on which the force acts
3351!> \param force ...
3352!> \param t_3c_contr ...
3353!> \param t_3c_der ...
3354!> \param atom_of_kind ...
3355!> \param kind_of ...
3356!> \param idx_to_at ...
3357!> \param pref ...
3358!> \param do_mp2 ...
3359!> \param deriv_dim the dimension of the tensor corresponding to the derivative (by default 1)
3360! **************************************************************************************************
3361 SUBROUTINE get_force_from_3c_trace(force, t_3c_contr, t_3c_der, atom_of_kind, kind_of, idx_to_at, &
3362 pref, do_mp2, deriv_dim)
3363
3364 TYPE(qs_force_type), DIMENSION(:), POINTER :: force
3365 TYPE(dbt_type), INTENT(INOUT) :: t_3c_contr
3366 TYPE(dbt_type), DIMENSION(3), INTENT(INOUT) :: t_3c_der
3367 INTEGER, DIMENSION(:), INTENT(IN) :: atom_of_kind, kind_of, idx_to_at
3368 REAL(dp), INTENT(IN) :: pref
3369 LOGICAL, INTENT(IN), OPTIONAL :: do_mp2
3370 INTEGER, INTENT(IN), OPTIONAL :: deriv_dim
3371
3372 CHARACTER(LEN=*), PARAMETER :: routinen = 'get_force_from_3c_trace'
3373
3374 INTEGER :: deriv_dim_prv, handle, i_xyz, iat, &
3375 iat_of_kind, ikind, j_xyz
3376 INTEGER, DIMENSION(3) :: ind
3377 LOGICAL :: do_mp2_prv, found
3378 REAL(dp) :: new_force
3379 REAL(dp), ALLOCATABLE, DIMENSION(:, :, :), TARGET :: contr_blk, der_blk
3380 REAL(dp), DIMENSION(3) :: scoord
3381 TYPE(dbt_iterator_type) :: iter
3382
3383 CALL timeset(routinen, handle)
3384
3385 do_mp2_prv = .false.
3386 IF (PRESENT(do_mp2)) do_mp2_prv = do_mp2
3387
3388 deriv_dim_prv = 1
3389 IF (PRESENT(deriv_dim)) deriv_dim_prv = deriv_dim
3390
3391!$OMP PARALLEL DEFAULT(NONE) &
3392!$OMP SHARED(t_3c_der,t_3c_contr,force,do_mp2_prv,deriv_dim_prv,pref,idx_to_at,atom_of_kind,kind_of) &
3393!$OMP PRIVATE(i_xyz,j_xyz,iter,ind,der_blk,contr_blk,found,new_force,iat,iat_of_kind,ikind,scoord)
3394 DO i_xyz = 1, 3
3395 CALL dbt_iterator_start(iter, t_3c_der(i_xyz))
3396 DO WHILE (dbt_iterator_blocks_left(iter))
3397 CALL dbt_iterator_next_block(iter, ind)
3398
3399 CALL dbt_get_block(t_3c_der(i_xyz), ind, der_blk, found)
3400 cpassert(found)
3401 CALL dbt_get_block(t_3c_contr, ind, contr_blk, found)
3402
3403 IF (found) THEN
3404
3405 !take the trace of the blocks
3406 new_force = pref*sum(der_blk(:, :, :)*contr_blk(:, :, :))
3407
3408 !the first index of the derivative tensor defines the atom
3409 iat = idx_to_at(ind(deriv_dim_prv))
3410 iat_of_kind = atom_of_kind(iat)
3411 ikind = kind_of(iat)
3412
3413 IF (.NOT. do_mp2_prv) THEN
3414!$OMP ATOMIC
3415 force(ikind)%fock_4c(i_xyz, iat_of_kind) = force(ikind)%fock_4c(i_xyz, iat_of_kind) &
3416 + new_force
3417 ELSE
3418!$OMP ATOMIC
3419 force(ikind)%mp2_non_sep(i_xyz, iat_of_kind) = force(ikind)%mp2_non_sep(i_xyz, iat_of_kind) &
3420 + new_force
3421 END IF
3422
3423 DEALLOCATE (contr_blk)
3424 END IF
3425 DEALLOCATE (der_blk)
3426 END DO !iter
3427 CALL dbt_iterator_stop(iter)
3428 END DO
3429!$OMP END PARALLEL
3430 CALL timestop(handle)
3431
3432 END SUBROUTINE get_force_from_3c_trace
3433
3434! **************************************************************************************************
3435!> \brief Update the forces due to the derivative of the a 2-center product d/dR (Q|R)
3436!> \param force ...
3437!> \param t_2c_contr A precontracted tensor containing sum_abcdPS (ab|P)(P|Q)^-1 (R|S)^-1 (S|cd) P_ac P_bd
3438!> \param t_2c_der the d/dR (Q|R) tensor, in all 3 cartesian directions
3439!> \param atom_of_kind ...
3440!> \param kind_of ...
3441!> \param idx_to_at ...
3442!> \param pref ...
3443!> \param do_mp2 ...
3444!> \param do_ovlp ...
3445!> \note IMPORTANT: t_tc_contr and t_2c_der need to have the same distribution
3446! **************************************************************************************************
3447 SUBROUTINE get_2c_der_force(force, t_2c_contr, t_2c_der, atom_of_kind, kind_of, idx_to_at, &
3448 pref, do_mp2, do_ovlp)
3449
3450 TYPE(qs_force_type), DIMENSION(:), POINTER :: force
3451 TYPE(dbt_type), INTENT(INOUT) :: t_2c_contr
3452 TYPE(dbt_type), DIMENSION(3), INTENT(INOUT) :: t_2c_der
3453 INTEGER, DIMENSION(:), INTENT(IN) :: atom_of_kind, kind_of, idx_to_at
3454 REAL(dp), INTENT(IN) :: pref
3455 LOGICAL, INTENT(IN), OPTIONAL :: do_mp2, do_ovlp
3456
3457 CHARACTER(LEN=*), PARAMETER :: routinen = 'get_2c_der_force'
3458
3459 INTEGER :: handle, i_xyz, iat, iat_of_kind, ikind, &
3460 j_xyz, jat, jat_of_kind, jkind
3461 INTEGER, DIMENSION(2) :: ind
3462 LOGICAL :: do_mp2_prv, do_ovlp_prv, found
3463 REAL(dp) :: new_force
3464 REAL(dp), ALLOCATABLE, DIMENSION(:, :), TARGET :: contr_blk, der_blk
3465 REAL(dp), DIMENSION(3) :: scoord
3466 TYPE(dbt_iterator_type) :: iter
3467
3468 !Loop over the blocks of d/dR (Q|R), contract with the corresponding block of t_2c_contr and
3469 !update the relevant force
3470
3471 CALL timeset(routinen, handle)
3472
3473 do_mp2_prv = .false.
3474 IF (PRESENT(do_mp2)) do_mp2_prv = do_mp2
3475
3476 do_ovlp_prv = .false.
3477 IF (PRESENT(do_ovlp)) do_ovlp_prv = do_ovlp
3478
3479!$OMP PARALLEL DEFAULT(NONE) &
3480!$OMP SHARED(t_2c_der,t_2c_contr,force,do_mp2_prv,do_ovlp_prv,pref,idx_to_at,atom_of_kind,kind_of) &
3481!$OMP PRIVATE(i_xyz,j_xyz,iter,ind,der_blk,contr_blk,found,new_force) &
3482!$OMP PRIVATE(iat,jat,iat_of_kind,jat_of_kind,ikind,jkind,scoord)
3483 DO i_xyz = 1, 3
3484 CALL dbt_iterator_start(iter, t_2c_der(i_xyz))
3485 DO WHILE (dbt_iterator_blocks_left(iter))
3486 CALL dbt_iterator_next_block(iter, ind)
3487
3488 IF (ind(1) == ind(2)) cycle
3489
3490 CALL dbt_get_block(t_2c_der(i_xyz), ind, der_blk, found)
3491 cpassert(found)
3492 CALL dbt_get_block(t_2c_contr, ind, contr_blk, found)
3493
3494 IF (found) THEN
3495
3496 !an element of d/dR (Q|R) corresponds to 2 things because of translational invariance
3497 !(Q'| R) = - (Q| R'), once wrt the center on Q, and once on R
3498 new_force = pref*sum(der_blk(:, :)*contr_blk(:, :))
3499
3500 iat = idx_to_at(ind(1))
3501 iat_of_kind = atom_of_kind(iat)
3502 ikind = kind_of(iat)
3503
3504 IF (do_mp2_prv) THEN
3505!$OMP ATOMIC
3506 force(ikind)%mp2_non_sep(i_xyz, iat_of_kind) = force(ikind)%mp2_non_sep(i_xyz, iat_of_kind) &
3507 + new_force
3508 ELSE IF (do_ovlp_prv) THEN
3509!$OMP ATOMIC
3510 force(ikind)%overlap(i_xyz, iat_of_kind) = force(ikind)%overlap(i_xyz, iat_of_kind) &
3511 + new_force
3512 ELSE
3513!$OMP ATOMIC
3514 force(ikind)%fock_4c(i_xyz, iat_of_kind) = force(ikind)%fock_4c(i_xyz, iat_of_kind) &
3515 + new_force
3516 END IF
3517
3518 jat = idx_to_at(ind(2))
3519 jat_of_kind = atom_of_kind(jat)
3520 jkind = kind_of(jat)
3521
3522 IF (do_mp2_prv) THEN
3523!$OMP ATOMIC
3524 force(jkind)%mp2_non_sep(i_xyz, jat_of_kind) = force(jkind)%mp2_non_sep(i_xyz, jat_of_kind) &
3525 - new_force
3526 ELSE IF (do_ovlp_prv) THEN
3527!$OMP ATOMIC
3528 force(jkind)%overlap(i_xyz, jat_of_kind) = force(jkind)%overlap(i_xyz, jat_of_kind) &
3529 - new_force
3530 ELSE
3531!$OMP ATOMIC
3532 force(jkind)%fock_4c(i_xyz, jat_of_kind) = force(jkind)%fock_4c(i_xyz, jat_of_kind) &
3533 - new_force
3534 END IF
3535
3536 DEALLOCATE (contr_blk)
3537 END IF
3538
3539 DEALLOCATE (der_blk)
3540 END DO !iter
3541 CALL dbt_iterator_stop(iter)
3542
3543 END DO !i_xyz
3544!$OMP END PARALLEL
3545 CALL timestop(handle)
3546
3547 END SUBROUTINE get_2c_der_force
3548
3549! **************************************************************************************************
3550!> \brief Get the force from a contraction of type SUM_a,beta (a|beta') C_a,beta, where beta is an AO
3551!> and a is a MO
3552!> \param force ...
3553!> \param t_mo_coeff ...
3554!> \param t_2c_MO_AO ...
3555!> \param atom_of_kind ...
3556!> \param kind_of ...
3557!> \param idx_to_at ...
3558!> \param pref ...
3559!> \param i_xyz ...
3560! **************************************************************************************************
3561 SUBROUTINE get_mo_ao_force(force, t_mo_coeff, t_2c_MO_AO, atom_of_kind, kind_of, idx_to_at, pref, i_xyz)
3562
3563 TYPE(qs_force_type), DIMENSION(:), POINTER :: force
3564 TYPE(dbt_type), INTENT(INOUT) :: t_mo_coeff, t_2c_mo_ao
3565 INTEGER, DIMENSION(:), INTENT(IN) :: atom_of_kind, kind_of, idx_to_at
3566 REAL(dp), INTENT(IN) :: pref
3567 INTEGER, INTENT(IN) :: i_xyz
3568
3569 CHARACTER(LEN=*), PARAMETER :: routinen = 'get_MO_AO_force'
3570
3571 INTEGER :: handle, iat, iat_of_kind, ikind, j_xyz
3572 INTEGER, DIMENSION(2) :: ind
3573 LOGICAL :: found
3574 REAL(dp) :: new_force
3575 REAL(dp), ALLOCATABLE, DIMENSION(:, :), TARGET :: mo_ao_blk, mo_coeff_blk
3576 REAL(dp), DIMENSION(3) :: scoord
3577 TYPE(dbt_iterator_type) :: iter
3578
3579 CALL timeset(routinen, handle)
3580
3581!$OMP PARALLEL DEFAULT(NONE) &
3582!$OMP SHARED(t_2c_MO_AO,t_mo_coeff,pref,force,idx_to_at,atom_of_kind,kind_of,i_xyz) &
3583!$OMP PRIVATE(iter,ind,mo_ao_blk,mo_coeff_blk,found,new_force,iat,iat_of_kind,ikind,scoord,j_xyz)
3584 CALL dbt_iterator_start(iter, t_2c_mo_ao)
3585 DO WHILE (dbt_iterator_blocks_left(iter))
3586 CALL dbt_iterator_next_block(iter, ind)
3587
3588 CALL dbt_get_block(t_2c_mo_ao, ind, mo_ao_blk, found)
3589 cpassert(found)
3590 CALL dbt_get_block(t_mo_coeff, ind, mo_coeff_blk, found)
3591
3592 IF (found) THEN
3593
3594 new_force = pref*sum(mo_ao_blk(:, :)*mo_coeff_blk(:, :))
3595
3596 iat = idx_to_at(ind(2)) !AO index is column index
3597 iat_of_kind = atom_of_kind(iat)
3598 ikind = kind_of(iat)
3599
3600!$OMP ATOMIC
3601 force(ikind)%fock_4c(i_xyz, iat_of_kind) = force(ikind)%fock_4c(i_xyz, iat_of_kind) &
3602 + new_force
3603
3604 DEALLOCATE (mo_coeff_blk)
3605 END IF
3606
3607 DEALLOCATE (mo_ao_blk)
3608 END DO !iter
3609 CALL dbt_iterator_stop(iter)
3610!$OMP END PARALLEL
3611
3612 CALL timestop(handle)
3613
3614 END SUBROUTINE get_mo_ao_force
3615
3616! **************************************************************************************************
3617!> \brief Print RI-HFX quantities, as required by the PRINT subsection
3618!> \param ri_data ...
3619!> \param qs_env ...
3620! **************************************************************************************************
3621 SUBROUTINE print_ri_hfx(ri_data, qs_env)
3622
3623 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
3624 TYPE(qs_environment_type), POINTER :: qs_env
3625
3626 INTEGER :: i_ri, ibasis, nkind, nspins, unit_nr
3627 INTEGER, DIMENSION(:), POINTER :: col_bsize, row_bsize
3628 LOGICAL :: mult_by_s, print_density, print_ri_metric
3629 REAL(dp), ALLOCATABLE, DIMENSION(:) :: density_coeffs, density_coeffs_2
3630 TYPE(cp_blacs_env_type), POINTER :: blacs_env
3631 TYPE(cp_fm_struct_type), POINTER :: fm_struct
3632 TYPE(cp_fm_type) :: matrix_s_fm
3633 TYPE(cp_logger_type), POINTER :: logger
3634 TYPE(dbcsr_distribution_type) :: dbcsr_dist
3635 TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: rho_ao
3636 TYPE(dbcsr_type), DIMENSION(1) :: matrix_s
3637 TYPE(dft_control_type), POINTER :: dft_control
3638 TYPE(distribution_2d_type), POINTER :: dist_2d
3639 TYPE(gto_basis_set_p_type), ALLOCATABLE, &
3640 DIMENSION(:), TARGET :: basis_set_ao, basis_set_ri
3641 TYPE(gto_basis_set_type), POINTER :: orb_basis, ri_basis
3642 TYPE(mp_para_env_type), POINTER :: para_env
3643 TYPE(neighbor_list_set_p_type), DIMENSION(:), &
3644 POINTER :: nl_2c
3645 TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
3646 TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
3647 TYPE(qs_rho_type), POINTER :: rho
3648 TYPE(section_vals_type), POINTER :: input, print_section
3649
3650 NULLIFY (rho_ao, input, print_section, logger, rho, particle_set, qs_kind_set, ri_basis, nl_2c, &
3651 dist_2d, col_bsize, row_bsize, para_env, blacs_env, fm_struct, orb_basis, dft_control)
3652
3653 CALL get_qs_env(qs_env, input=input, dft_control=dft_control)
3654 logger => cp_get_default_logger()
3655 print_density = .false.
3656 print_ri_metric = .false.
3657
3658 !Do we print the RI density coeffs and/or RI_metric 2c integrals?
3659 print_section => section_vals_get_subs_vals(input, "DFT%XC%HF%RI%PRINT")
3660 IF (btest(cp_print_key_should_output(logger%iter_info, print_section, "RI_DENSITY_COEFFS"), &
3661 cp_p_file)) print_density = .true.
3662 IF (btest(cp_print_key_should_output(logger%iter_info, print_section, "RI_METRIC_2C_INTS"), &
3663 cp_p_file)) print_ri_metric = .true.
3664
3665 !common stuff
3666 IF (print_density .OR. print_ri_metric) THEN
3667
3668 !Re-calculate the 2-center RI_metric integrals (because not stored and cheap)
3669 !Recalculated the RI_metric 2c-integrals, as it is cheap, and not stored
3670 CALL get_qs_env(qs_env, nkind=nkind, qs_kind_set=qs_kind_set, particle_set=particle_set, &
3671 distribution_2d=dist_2d, para_env=para_env, blacs_env=blacs_env)
3672 ALLOCATE (basis_set_ri(nkind), basis_set_ao(nkind))
3673 CALL basis_set_list_setup(basis_set_ri, ri_data%ri_basis_type, qs_kind_set)
3674 CALL get_particle_set(particle_set, qs_kind_set, basis=basis_set_ri)
3675 CALL basis_set_list_setup(basis_set_ao, ri_data%orb_basis_type, qs_kind_set)
3676 CALL get_particle_set(particle_set, qs_kind_set, basis=basis_set_ao)
3677
3678 DO ibasis = 1, nkind
3679 ri_basis => basis_set_ri(ibasis)%gto_basis_set
3680 CALL init_interaction_radii_orb_basis(ri_basis, ri_data%eps_pgf_orb)
3681 orb_basis => basis_set_ao(ibasis)%gto_basis_set
3682 CALL init_interaction_radii_orb_basis(orb_basis, ri_data%eps_pgf_orb)
3683 END DO
3684
3685 CALL cp_dbcsr_dist2d_to_dist(dist_2d, dbcsr_dist)
3686 ALLOCATE (row_bsize(SIZE(ri_data%bsizes_RI)))
3687 ALLOCATE (col_bsize(SIZE(ri_data%bsizes_RI)))
3688 row_bsize(:) = ri_data%bsizes_RI
3689 col_bsize(:) = ri_data%bsizes_RI
3690
3691 CALL dbcsr_create(matrix_s(1), "RI metric", dbcsr_dist, dbcsr_type_symmetric, row_bsize, col_bsize)
3692
3693 CALL build_2c_neighbor_lists(nl_2c, basis_set_ri, basis_set_ri, ri_data%ri_metric, &
3694 "HFX_2c_nl_pot", qs_env, sym_ij=.true., dist_2d=dist_2d)
3695 CALL build_2c_integrals(matrix_s, ri_data%filter_eps_2c, qs_env, nl_2c, basis_set_ri, &
3696 basis_set_ri, ri_data%ri_metric)
3697
3698 CALL release_neighbor_list_sets(nl_2c)
3699 CALL dbcsr_distribution_release(dbcsr_dist)
3700 END IF
3701
3702 IF (print_density) THEN
3703 CALL get_qs_env(qs_env, rho=rho)
3704 CALL qs_rho_get(rho, rho_ao_kp=rho_ao)
3705 nspins = SIZE(rho_ao, 1)
3706
3707 CALL section_vals_val_get(print_section, "RI_DENSITY_COEFFS%MULTIPLY_BY_RI_2C_INTEGRALS", l_val=mult_by_s)
3708
3709 CALL get_ri_density_coeffs(density_coeffs, matrix_s(1), rho_ao, 1, basis_set_ao, basis_set_ri, &
3710 mult_by_s, ri_data, qs_env)
3711 IF (nspins == 2) &
3712 CALL get_ri_density_coeffs(density_coeffs_2, matrix_s(1), rho_ao, 2, basis_set_ao, &
3713 basis_set_ri, mult_by_s, ri_data, qs_env)
3714
3715 unit_nr = cp_print_key_unit_nr(logger, input, "DFT%XC%HF%RI%PRINT%RI_DENSITY_COEFFS", &
3716 extension=".dat", file_status="REPLACE", &
3717 file_action="WRITE", file_form="FORMATTED")
3718
3719 IF (unit_nr > 0) THEN
3720 IF (nspins == 1) THEN
3721 WRITE (unit_nr, fmt="(A,A,A)") &
3722 "# Coefficients of the electronic density projected on the RI_HFX basis for ", &
3723 trim(logger%iter_info%project_name), " project"
3724 DO i_ri = 1, SIZE(density_coeffs)
3725 WRITE (unit_nr, fmt="(F20.12)") density_coeffs(i_ri)
3726 END DO
3727 ELSE
3728 WRITE (unit_nr, fmt="(A,A,A)") &
3729 "# Coefficients of the electronic density projected on the RI_HFX basis for ", &
3730 trim(logger%iter_info%project_name), " project. Spin up, spin down"
3731 DO i_ri = 1, SIZE(density_coeffs)
3732 WRITE (unit_nr, fmt="(F20.12,F20.12)") density_coeffs(i_ri), density_coeffs_2(i_ri)
3733 END DO
3734 END IF
3735 END IF
3736
3737 CALL cp_print_key_finished_output(unit_nr, logger, input, "DFT%XC%HF%RI%PRINT%RI_DENSITY_COEFFS")
3738 END IF
3739
3740 IF (print_ri_metric) THEN
3741
3742 !convert 2c integrals to fm for dumping
3743 CALL cp_fm_struct_create(fm_struct, context=blacs_env, para_env=para_env, &
3744 nrow_global=sum(row_bsize), ncol_global=sum(col_bsize))
3745 CALL cp_fm_create(matrix_s_fm, fm_struct)
3746
3747 CALL copy_dbcsr_to_fm(matrix_s(1), matrix_s_fm)
3748
3749 unit_nr = cp_print_key_unit_nr(logger, input, "DFT%XC%HF%RI%PRINT%RI_METRIC_2C_INTS", &
3750 extension=".fm", file_status="REPLACE", &
3751 file_action="WRITE", file_form="UNFORMATTED")
3752 CALL cp_fm_write_unformatted(matrix_s_fm, unit_nr)
3753
3754 CALL cp_print_key_finished_output(unit_nr, logger, input, "DFT%XC%HF%RI%PRINT%RI_METRIC_2C_INTS")
3755
3756 CALL cp_fm_struct_release(fm_struct)
3757 CALL cp_fm_release(matrix_s_fm)
3758 END IF
3759
3760 !clean-up
3761 IF (print_density .OR. print_ri_metric) THEN
3762 DO ibasis = 1, nkind
3763 ri_basis => basis_set_ri(ibasis)%gto_basis_set
3764 CALL init_interaction_radii_orb_basis(ri_basis, dft_control%qs_control%eps_pgf_orb)
3765 orb_basis => basis_set_ao(ibasis)%gto_basis_set
3766 CALL init_interaction_radii_orb_basis(orb_basis, dft_control%qs_control%eps_pgf_orb)
3767 END DO
3768
3769 CALL dbcsr_release(matrix_s(1))
3770 DEALLOCATE (row_bsize, col_bsize)
3771 END IF
3772
3773 END SUBROUTINE print_ri_hfx
3774
3775! **************************************************************************************************
3776!> \brief Projects the density on the RI basis and return the array of the RI coefficients
3777!> \param density_coeffs ...
3778!> \param ri_2c_ints ...
3779!> \param rho_ao ...
3780!> \param ispin ...
3781!> \param basis_set_AO ...
3782!> \param basis_set_RI ...
3783!> \param multiply_by_S ...
3784!> \param ri_data ...
3785!> \param qs_env ...
3786! **************************************************************************************************
3787 SUBROUTINE get_ri_density_coeffs(density_coeffs, ri_2c_ints, rho_ao, ispin, basis_set_AO, &
3788 basis_set_RI, multiply_by_S, ri_data, qs_env)
3789
3790 REAL(dp), ALLOCATABLE, DIMENSION(:) :: density_coeffs
3791 TYPE(dbcsr_type), INTENT(INOUT) :: ri_2c_ints
3792 TYPE(dbcsr_p_type), DIMENSION(:, :) :: rho_ao
3793 INTEGER, INTENT(IN) :: ispin
3794 TYPE(gto_basis_set_p_type), DIMENSION(:) :: basis_set_ao, basis_set_ri
3795 LOGICAL, INTENT(IN) :: multiply_by_s
3796 TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
3797 TYPE(qs_environment_type), POINTER :: qs_env
3798
3799 CHARACTER(LEN=*), PARAMETER :: routinen = 'get_RI_density_coeffs'
3800
3801 INTEGER :: a, b, handle, i_mem, idx, n_dependent, &
3802 n_mem, n_mem_ri, natom, &
3803 nblk_per_thread, nblks, nkind
3804 INTEGER(int_8) :: nze
3805 INTEGER, ALLOCATABLE, DIMENSION(:) :: batch_block_end, batch_block_start, &
3806 dist1, dist2, dist3, dummy1, dummy2, &
3807 idx1, idx2, idx3
3808 INTEGER, DIMENSION(2) :: ind, pdims_2d
3809 INTEGER, DIMENSION(2, 3) :: bounds_cpy
3810 INTEGER, DIMENSION(3) :: dims_3c, pcoord_3d, pdims_3d
3811 LOGICAL :: calc_ints, found
3812 REAL(dp) :: occ, threshold
3813 REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: blk
3814 REAL(dp), ALLOCATABLE, DIMENSION(:, :, :) :: blk_3d
3815 TYPE(cp_blacs_env_type), POINTER :: blacs_env
3816 TYPE(dbcsr_type) :: ri_2c_inv
3817 TYPE(dbt_distribution_type) :: dist_2d, dist_3d
3818 TYPE(dbt_iterator_type) :: iter
3819 TYPE(dbt_pgrid_type) :: pgrid_2d, pgrid_3d
3820 TYPE(dbt_type) :: density_coeffs_t, density_tmp, rho_ao_t, &
3821 rho_ao_t_3d, rho_ao_tmp, t2c_ri_ints, &
3822 t2c_ri_inv, t2c_ri_tmp
3823 TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :) :: t_3c_int_batched
3824 TYPE(dft_control_type), POINTER :: dft_control
3825 TYPE(distribution_3d_type) :: dist_nl3c
3826 TYPE(mp_cart_type) :: mp_comm_t3c
3827 TYPE(mp_para_env_type), POINTER :: para_env
3828 TYPE(neighbor_list_3c_type) :: nl_3c
3829 TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
3830 TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
3831
3832 NULLIFY (dft_control, para_env, blacs_env, particle_set, qs_kind_set)
3833
3834 CALL timeset(routinen, handle)
3835
3836 ! Projection of the density on the RI basis: n(r) = sum_pq sum_munu P_pq (pq|mu) (mu|nu)^-1 nu(r)
3837 ! = sum_nu d_nu nu(r)
3838 ! the (pq|mu) (mu|nu)^-1 contraction is already stored in compressed format
3839
3840 IF (.NOT. ri_data%flavor == ri_pmat) THEN
3841 cpabort("Can only calculate and print the RI density coefficients within the RHO flavor of RI-HFX")
3842 END IF
3843
3844 CALL get_qs_env(qs_env, dft_control=dft_control, para_env=para_env, blacs_env=blacs_env, nkind=nkind, &
3845 particle_set=particle_set, qs_kind_set=qs_kind_set, natom=natom)
3846 n_mem = ri_data%n_mem
3847 n_mem_ri = ri_data%n_mem_RI
3848
3849 ! The RI 2c int tensor and its inverse
3850 CALL dbcsr_create(ri_2c_inv, template=ri_2c_ints, matrix_type=dbcsr_type_no_symmetry)
3851
3852 SELECT CASE (ri_data%t2c_method)
3853 CASE (hfx_ri_do_2c_iter)
3854 threshold = max(ri_data%filter_eps, 1.0e-12_dp)
3855 CALL invert_hotelling(ri_2c_inv, ri_2c_ints, threshold=threshold, silent=.false.)
3856 CASE (hfx_ri_do_2c_cholesky)
3857 CALL dbcsr_copy(ri_2c_inv, ri_2c_ints)
3858 CALL cp_dbcsr_cholesky_decompose(ri_2c_inv, para_env=para_env, blacs_env=blacs_env)
3859 CALL cp_dbcsr_cholesky_invert(ri_2c_inv, para_env=para_env, blacs_env=blacs_env, uplo_to_full=.true.)
3860 CASE (hfx_ri_do_2c_diag)
3861 CALL dbcsr_copy(ri_2c_inv, ri_2c_ints)
3862 CALL cp_dbcsr_power(ri_2c_inv, -1.0_dp, ri_data%eps_eigval, n_dependent, &
3863 para_env, blacs_env, verbose=ri_data%unit_nr_dbcsr > 0)
3864 END SELECT
3865
3866 CALL dbt_create(ri_2c_ints, t2c_ri_tmp)
3867 CALL create_2c_tensor(t2c_ri_ints, dist1, dist2, ri_data%pgrid_2d, &
3868 ri_data%bsizes_RI_split, ri_data%bsizes_RI_split, &
3869 name="(RI | RI)")
3870 CALL dbt_create(t2c_ri_ints, t2c_ri_inv)
3871
3872 CALL dbt_copy_matrix_to_tensor(ri_2c_ints, t2c_ri_tmp)
3873 CALL dbt_copy(t2c_ri_tmp, t2c_ri_ints, move_data=.true.)
3874 CALL dbt_filter(t2c_ri_ints, ri_data%filter_eps)
3875
3876 CALL dbt_copy_matrix_to_tensor(ri_2c_inv, t2c_ri_tmp)
3877 CALL dbt_copy(t2c_ri_tmp, t2c_ri_inv, move_data=.true.)
3878 CALL dbt_filter(t2c_ri_inv, ri_data%filter_eps)
3879
3880 CALL dbcsr_release(ri_2c_inv)
3881 CALL dbt_destroy(t2c_ri_tmp)
3882 DEALLOCATE (dist1, dist2)
3883
3884 ! The AO density tensor
3885 CALL dbt_create(rho_ao(ispin, 1)%matrix, rho_ao_tmp)
3886 CALL create_2c_tensor(rho_ao_t, dist1, dist2, ri_data%pgrid_2d, &
3887 ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
3888 name="(AO | AO)")
3889 DEALLOCATE (dist1, dist2)
3890
3891 CALL dbt_copy_matrix_to_tensor(rho_ao(ispin, 1)%matrix, rho_ao_tmp)
3892 CALL dbt_copy(rho_ao_tmp, rho_ao_t, move_data=.true.)
3893 CALL dbt_filter(rho_ao_t, ri_data%filter_eps)
3894 CALL dbt_destroy(rho_ao_tmp)
3895
3896 ! Put in in 3D
3897 ALLOCATE (dist1(SIZE(ri_data%bsizes_AO_split)), dist2(SIZE(ri_data%bsizes_AO_split)), dist3(1))
3898 dist3(1) = 0
3899 CALL dbt_get_info(rho_ao_t, pdims=pdims_2d, proc_dist_1=dist1, proc_dist_2=dist2)
3900 CALL dbt_default_distvec(1, 1, [1], dist3)
3901
3902 pdims_3d(1) = pdims_2d(1)
3903 pdims_3d(2) = pdims_2d(2)
3904 pdims_3d(3) = 1
3905
3906 CALL dbt_pgrid_create(para_env, pdims_3d, pgrid_3d)
3907 CALL dbt_distribution_new(dist_3d, pgrid_3d, dist1, dist2, dist3)
3908 CALL dbt_create(rho_ao_t_3d, "rho_ao_3d", dist_3d, [1, 2], [3], ri_data%bsizes_AO_split, &
3909 ri_data%bsizes_AO_split, [1])
3910 DEALLOCATE (dist1, dist2, dist3)
3911 CALL dbt_pgrid_destroy(pgrid_3d)
3912 CALL dbt_distribution_destroy(dist_3d)
3913
3914 ! copy density
3915 nblks = 0
3916!$OMP PARALLEL DEFAULT(NONE) &
3917!$OMP SHARED(rho_ao_t,nblks) &
3918!$OMP PRIVATE(iter,ind,blk,found)
3919 CALL dbt_iterator_start(iter, rho_ao_t)
3920 DO WHILE (dbt_iterator_blocks_left(iter))
3921 CALL dbt_iterator_next_block(iter, ind)
3922 CALL dbt_get_block(rho_ao_t, ind, blk, found)
3923 IF (found) THEN
3924!$OMP ATOMIC
3925 nblks = nblks + 1
3926 DEALLOCATE (blk)
3927 END IF
3928 END DO
3929 CALL dbt_iterator_stop(iter)
3930!$OMP END PARALLEL
3931
3932 ALLOCATE (idx1(nblks), idx2(nblks), idx3(nblks))
3933 idx3 = 1
3934 nblks = 0
3935!$OMP PARALLEL DEFAULT(NONE) &
3936!$OMP SHARED(rho_ao_t,nblks,idx1,idx2) &
3937!$OMP PRIVATE(iter,ind,blk,found)
3938 CALL dbt_iterator_start(iter, rho_ao_t)
3939 DO WHILE (dbt_iterator_blocks_left(iter))
3940 CALL dbt_iterator_next_block(iter, ind)
3941 CALL dbt_get_block(rho_ao_t, ind, blk, found)
3942 IF (found) THEN
3943!$OMP CRITICAL
3944 nblks = nblks + 1
3945 idx1(nblks) = ind(1)
3946 idx2(nblks) = ind(2)
3947!$OMP END CRITICAL
3948 DEALLOCATE (blk)
3949 END IF
3950 END DO
3951 CALL dbt_iterator_stop(iter)
3952!$OMP END PARALLEL
3953
3954!$OMP PARALLEL DEFAULT(NONE) SHARED(rho_ao_t_3d,nblks,idx1,idx2,idx3) PRIVATE(nblk_per_thread,A,b)
3955 nblk_per_thread = nblks/omp_get_num_threads() + 1
3956 a = omp_get_thread_num()*nblk_per_thread + 1
3957 b = min(a + nblk_per_thread, nblks)
3958 CALL dbt_reserve_blocks(rho_ao_t_3d, idx1(a:b), idx2(a:b), idx3(a:b))
3959!$OMP END PARALLEL
3960
3961!$OMP PARALLEL DEFAULT(NONE) &
3962!$OMP SHARED(rho_ao_t,rho_ao_t_3d) &
3963!$OMP PRIVATE(iter,ind,blk,found,blk_3d)
3964 CALL dbt_iterator_start(iter, rho_ao_t)
3965 DO WHILE (dbt_iterator_blocks_left(iter))
3966 CALL dbt_iterator_next_block(iter, ind)
3967 CALL dbt_get_block(rho_ao_t, ind, blk, found)
3968 IF (found) THEN
3969 ALLOCATE (blk_3d(SIZE(blk, 1), SIZE(blk, 2), 1))
3970 blk_3d(:, :, 1) = blk(:, :)
3971!$OMP CRITICAL
3972 CALL dbt_put_block(rho_ao_t_3d, [ind(1), ind(2), 1], [SIZE(blk, 1), SIZE(blk, 2), 1], blk_3d)
3973!$OMP END CRITICAL
3974 DEALLOCATE (blk, blk_3d)
3975 END IF
3976 END DO
3977 CALL dbt_iterator_stop(iter)
3978!$OMP END PARALLEL
3979
3980 ! The 1D tensor with the density coeffs
3981 pdims_2d(1) = para_env%num_pe
3982 pdims_2d(2) = 1
3983
3984 ALLOCATE (dist1(SIZE(ri_data%bsizes_RI_split)), dist2(1))
3985 CALL dbt_default_distvec(SIZE(ri_data%bsizes_RI_split), pdims_2d(1), ri_data%bsizes_RI_split, dist1)
3986 CALL dbt_default_distvec(1, pdims_2d(2), [1], dist2)
3987
3988 CALL dbt_pgrid_create(para_env, pdims_2d, pgrid_2d)
3989 CALL dbt_distribution_new(dist_2d, pgrid_2d, dist1, dist2)
3990 CALL dbt_create(density_coeffs_t, "density_coeffs", dist_2d, [1], [2], ri_data%bsizes_RI_split, [1])
3991 CALL dbt_create(density_coeffs_t, density_tmp)
3992 DEALLOCATE (dist1, dist2)
3993 CALL dbt_pgrid_destroy(pgrid_2d)
3994 CALL dbt_distribution_destroy(dist_2d)
3995
3996 CALL dbt_get_info(ri_data%t_3c_int_ctr_3(1, 1), nfull_total=dims_3c)
3997
3998 ! The 3c integrals tensor, in case we compute them here
3999 pdims_3d = 0
4000 CALL dbt_pgrid_create(para_env, pdims_3d, pgrid_3d, tensor_dims=[max(1, natom/n_mem), natom, natom])
4001 ALLOCATE (t_3c_int_batched(1, 1))
4002 CALL create_3c_tensor(t_3c_int_batched(1, 1), dist1, dist2, dist3, pgrid_3d, &
4003 ri_data%bsizes_RI, ri_data%bsizes_AO, ri_data%bsizes_AO, map1=[1], map2=[2, 3], &
4004 name="(RI | AO AO)")
4005
4006 CALL dbt_mp_environ_pgrid(pgrid_3d, pdims_3d, pcoord_3d)
4007 CALL mp_comm_t3c%create(pgrid_3d%mp_comm_2d, 3, pdims_3d)
4008 CALL distribution_3d_create(dist_nl3c, dist1, dist2, dist3, nkind, particle_set, &
4009 mp_comm_t3c, own_comm=.true.)
4010 DEALLOCATE (dist1, dist2, dist3)
4011 CALL dbt_pgrid_destroy(pgrid_3d)
4012
4013 CALL build_3c_neighbor_lists(nl_3c, basis_set_ri, basis_set_ao, basis_set_ao, dist_nl3c, ri_data%ri_metric, &
4014 "HFX_3c_nl", qs_env, op_pos=1, sym_jk=.true., own_dist=.true.)
4015
4016 n_mem = ri_data%n_mem
4017 CALL create_tensor_batches(ri_data%bsizes_RI, n_mem, dummy1, dummy2, batch_block_start, batch_block_end)
4018
4019 calc_ints = .false.
4020 CALL get_tensor_occupancy(ri_data%t_3c_int_ctr_2(1, 1), nze, occ)
4021 IF (nze == 0) calc_ints = .true.
4022
4023 DO i_mem = 1, n_mem
4024 IF (calc_ints) THEN
4025 CALL build_3c_integrals(t_3c_int_batched, ri_data%filter_eps, qs_env, nl_3c, &
4026 basis_set_ri, basis_set_ao, basis_set_ao, &
4027 ri_data%ri_metric, int_eps=ri_data%eps_schwarz, op_pos=1, &
4028 desymmetrize=.false., &
4029 bounds_i=[batch_block_start(i_mem), batch_block_end(i_mem)])
4030 CALL dbt_copy(t_3c_int_batched(1, 1), ri_data%t_3c_int_ctr_3(1, 1), order=[1, 3, 2])
4031 CALL dbt_copy(t_3c_int_batched(1, 1), ri_data%t_3c_int_ctr_3(1, 1), move_data=.true., summation=.true.)
4032 CALL dbt_filter(ri_data%t_3c_int_ctr_3(1, 1), ri_data%filter_eps)
4033 ELSE
4034 bounds_cpy(:, 2) = [sum(ri_data%bsizes_RI(1:batch_block_start(i_mem) - 1)) + 1, &
4035 sum(ri_data%bsizes_RI(1:batch_block_end(i_mem)))]
4036 bounds_cpy(:, 1) = [1, sum(ri_data%bsizes_AO)]
4037 bounds_cpy(:, 3) = [1, sum(ri_data%bsizes_AO)]
4038 CALL dbt_copy(ri_data%t_3c_int_ctr_2(1, 1), ri_data%t_3c_int_ctr_3(1, 1), &
4039 order=[2, 1, 3], bounds=bounds_cpy)
4040 END IF
4041
4042 !contract the integrals with the density P_pq (pq|R)
4043 CALL dbt_contract(1.0_dp, ri_data%t_3c_int_ctr_3(1, 1), rho_ao_t_3d, 0.0_dp, density_tmp, &
4044 contract_1=[2, 3], notcontract_1=[1], &
4045 contract_2=[1, 2], notcontract_2=[3], &
4046 map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps)
4047 CALL dbt_clear(ri_data%t_3c_int_ctr_3(1, 1))
4048
4049 !contract the above vector with the inverse metric
4050 CALL dbt_contract(1.0_dp, t2c_ri_inv, density_tmp, 1.0_dp, density_coeffs_t, &
4051 contract_1=[2], notcontract_1=[1], &
4052 contract_2=[1], notcontract_2=[2], &
4053 map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps)
4054
4055 END DO
4056 CALL neighbor_list_3c_destroy(nl_3c)
4057
4058 IF (multiply_by_s) THEN
4059 CALL dbt_contract(1.0_dp, t2c_ri_ints, density_coeffs_t, 0.0_dp, density_tmp, &
4060 contract_1=[2], notcontract_1=[1], &
4061 contract_2=[1], notcontract_2=[2], &
4062 map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps)
4063 CALL dbt_copy(density_tmp, density_coeffs_t, move_data=.true.)
4064 END IF
4065
4066 ALLOCATE (density_coeffs(sum(ri_data%bsizes_RI)))
4067 density_coeffs = 0.0
4068
4069!$OMP PARALLEL DEFAULT(NONE) &
4070!$OMP SHARED(density_coeffs_t,ri_data,density_coeffs) &
4071!$OMP PRIVATE(iter,ind,blk,found,idx)
4072 CALL dbt_iterator_start(iter, density_coeffs_t)
4073 DO WHILE (dbt_iterator_blocks_left(iter))
4074 CALL dbt_iterator_next_block(iter, ind)
4075 CALL dbt_get_block(density_coeffs_t, ind, blk, found)
4076 IF (found) THEN
4077
4078 idx = sum(ri_data%bsizes_RI_split(1:ind(1) - 1))
4079!$OMP CRITICAL
4080 density_coeffs(idx + 1:idx + ri_data%bsizes_RI_split(ind(1))) = blk(:, 1)
4081!$OMP END CRITICAL
4082 DEALLOCATE (blk)
4083 END IF
4084 END DO
4085 CALL dbt_iterator_stop(iter)
4086!$OMP END PARALLEL
4087 CALL para_env%sum(density_coeffs)
4088
4089 CALL dbt_destroy(t2c_ri_ints)
4090 CALL dbt_destroy(t2c_ri_inv)
4091 CALL dbt_destroy(density_tmp)
4092 CALL dbt_destroy(rho_ao_t)
4093 CALL dbt_destroy(rho_ao_t_3d)
4094 CALL dbt_destroy(density_coeffs_t)
4095 CALL dbt_destroy(t_3c_int_batched(1, 1))
4096
4097 CALL timestop(handle)
4098
4099 END SUBROUTINE get_ri_density_coeffs
4100
4101! **************************************************************************************************
4102!> \brief a small utility function that returns the atom corresponding to a block of a split tensor
4103!> \param idx_to_at ...
4104!> \param bsizes_split ...
4105!> \param bsizes_orig ...
4106!> \return ...
4107! **************************************************************************************************
4108 SUBROUTINE get_idx_to_atom(idx_to_at, bsizes_split, bsizes_orig)
4109 INTEGER, DIMENSION(:), INTENT(INOUT) :: idx_to_at
4110 INTEGER, DIMENSION(:), INTENT(IN) :: bsizes_split, bsizes_orig
4111
4112 INTEGER :: full_sum, iat, iblk, split_sum
4113
4114 iat = 1
4115 full_sum = bsizes_orig(iat)
4116 split_sum = 0
4117 DO iblk = 1, SIZE(bsizes_split)
4118 split_sum = split_sum + bsizes_split(iblk)
4119
4120 IF (split_sum .GT. full_sum) THEN
4121 iat = iat + 1
4122 full_sum = full_sum + bsizes_orig(iat)
4123 END IF
4124
4125 idx_to_at(iblk) = iat
4126 END DO
4127
4128 END SUBROUTINE get_idx_to_atom
4129
4130! **************************************************************************************************
4131!> \brief Function for calculating sqrt of a matrix
4132!> \param values ...
4133!> \return ...
4134! **************************************************************************************************
4135 FUNCTION my_sqrt(values)
4136 REAL(kind=dp), DIMENSION(:), INTENT(IN) :: values
4137 REAL(kind=dp), DIMENSION(SIZE(values)) :: my_sqrt
4138
4139 my_sqrt = sqrt(values)
4140 END FUNCTION
4141
4142! **************************************************************************************************
4143!> \brief Function for calculation inverse sqrt of a matrix
4144!> \param values ...
4145!> \return ...
4146! **************************************************************************************************
4147 FUNCTION my_invsqrt(values)
4148 REAL(kind=dp), DIMENSION(:), INTENT(IN) :: values
4149 REAL(kind=dp), DIMENSION(SIZE(values)) :: my_invsqrt
4150
4151 my_invsqrt = sqrt(1.0_dp/values)
4152 END FUNCTION
4153END MODULE
static GRID_HOST_DEVICE double fac(const int i)
Factorial function, e.g. fac(5) = 5! = 120.
Definition grid_common.h:51
static GRID_HOST_DEVICE int idx(const orbital a)
Return coset index of given orbital angular momentum.
#define idx2(a, i, j)
#define idx3(a, i, j, k)
arnoldi iteration using dbcsr
Definition arnoldi_api.F:16
subroutine, public arnoldi_extremal(matrix_a, max_ev, min_ev, converged, threshold, max_iter)
simple wrapper to estimate extremal eigenvalues with arnoldi, using the old lanczos interface this hi...
Define the atomic kind types and their sub types.
subroutine, public get_atomic_kind_set(atomic_kind_set, atom_of_kind, kind_of, natom_of_kind, maxatom, natom, nshell, fist_potential_present, shell_present, shell_adiabatic, shell_check_distance, damping_present)
Get attributes of an atomic kind set.
Handles all functions related to the CELL.
Definition cell_types.F:15
methods related to the blacs parallel environment
Defines control structures, which contain the parameters and the settings for the DFT-based calculati...
subroutine, public dbcsr_distribution_release(dist)
...
subroutine, public dbcsr_scale(matrix, alpha_scalar)
...
subroutine, public dbcsr_desymmetrize(matrix_a, matrix_b)
...
subroutine, public dbcsr_copy(matrix_b, matrix_a, name, keep_sparsity, keep_imaginary)
...
subroutine, public dbcsr_multiply(transa, transb, alpha, matrix_a, matrix_b, beta, matrix_c, first_row, last_row, first_column, last_column, first_k, last_k, retain_sparsity, filter_eps, flop)
...
subroutine, public dbcsr_get_info(matrix, nblkrows_total, nblkcols_total, nfullrows_total, nfullcols_total, nblkrows_local, nblkcols_local, nfullrows_local, nfullcols_local, my_prow, my_pcol, local_rows, local_cols, proc_row_dist, proc_col_dist, row_blk_size, col_blk_size, row_blk_offset, col_blk_offset, distribution, name, matrix_type, group)
...
subroutine, public dbcsr_filter(matrix, eps)
...
subroutine, public dbcsr_release(matrix)
...
subroutine, public dbcsr_complete_redistribute(matrix, redist)
...
integer function, public dbcsr_get_num_blocks(matrix)
...
subroutine, public dbcsr_add(matrix_a, matrix_b, alpha_scalar, beta_scalar)
...
subroutine, public dbcsr_distribution_get(dist, row_dist, col_dist, nrows, ncols, has_threads, group, mynode, numnodes, nprows, npcols, myprow, mypcol, pgrid, subgroups_defined, prow_group, pcol_group)
...
Interface to (sca)lapack for the Cholesky based procedures.
subroutine, public cp_dbcsr_cholesky_decompose(matrix, n, para_env, blacs_env)
used to replace a symmetric positive def. matrix M with its cholesky decomposition U: M = U^T * U,...
subroutine, public cp_dbcsr_cholesky_invert(matrix, n, para_env, blacs_env, uplo_to_full)
used to replace the cholesky decomposition by the inverse
subroutine, public dbcsr_add_on_diag(matrix, alpha)
Adds the given scalar to the diagonal of the matrix. Reserves any missing diagonal blocks.
real(dp) function, public dbcsr_frobenius_norm(matrix)
Compute the frobenius norm of a dbcsr matrix.
subroutine, public dbcsr_dot(matrix_a, matrix_b, trace)
Computes the dot product of two matrices, also known as the trace of their matrix product.
Interface to (sca)lapack for the Cholesky based procedures.
subroutine, public cp_dbcsr_power(matrix, exponent, threshold, n_dependent, para_env, blacs_env, verbose, eigenvectors, eigenvalues)
...
DBCSR operations in CP2K.
subroutine, public cp_dbcsr_dist2d_to_dist(dist2d, dist)
Creates a DBCSR distribution from a distribution_2d.
subroutine, public copy_dbcsr_to_fm(matrix, fm)
Copy a DBCSR matrix to a BLACS matrix.
subroutine, public copy_fm_to_dbcsr(fm, matrix, keep_sparsity)
Copy a BLACS matrix to a dbcsr matrix.
represent the structure of a full matrix
subroutine, public cp_fm_struct_create(fmstruct, para_env, context, nrow_global, ncol_global, nrow_block, ncol_block, descriptor, first_p_pos, local_leading_dimension, template_fmstruct, square_blocks, force_block)
allocates and initializes a full matrix structure
subroutine, public cp_fm_struct_release(fmstruct)
releases a full matrix structure
represent a full matrix distributed on many processors
Definition cp_fm_types.F:15
subroutine, public cp_fm_write_unformatted(fm, unit)
...
subroutine, public cp_fm_create(matrix, matrix_struct, name, use_sp)
creates a new full matrix with the given structure
various routines to log and control the output. The idea is that decisions about where to log should ...
type(cp_logger_type) function, pointer, public cp_get_default_logger()
returns the default logger
routines to handle the output, The idea is to remove the decision of wheter to output and what to out...
integer function, public cp_print_key_unit_nr(logger, basis_section, print_key_path, extension, middle_name, local, log_filename, ignore_should_output, file_form, file_position, file_action, file_status, do_backup, on_file, is_new_file, mpi_io, fout)
...
subroutine, public cp_print_key_finished_output(unit_nr, logger, basis_section, print_key_path, local, ignore_should_output, on_file, mpi_io)
should be called after you finish working with a unit obtained with cp_print_key_unit_nr,...
integer, parameter, public cp_p_file
integer function, public cp_print_key_should_output(iteration_info, basis_section, print_key_path, used_print_key, first_time)
returns what should be done with the given property if btest(res,cp_p_store) then the property should...
This is the start of a dbt_api, all publically needed functions are exported here....
Definition dbt_api.F:17
stores a mapping of 2D info (e.g. matrix) on a 2D processor distribution (i.e. blacs grid) where cpus...
RI-methods for HFX.
Definition hfx_ri.F:12
subroutine, public hfx_ri_update_ks(qs_env, ri_data, ks_matrix, ehfx, mos, rho_ao, geometry_did_change, nspins, hf_fraction)
...
Definition hfx_ri.F:1036
subroutine, public check_inverse(matrix_1, matrix_2, name, unit_nr)
...
Definition hfx_ri.F:350
subroutine, public get_force_from_3c_trace(force, t_3c_contr, t_3c_der, atom_of_kind, kind_of, idx_to_at, pref, do_mp2, deriv_dim)
This routines calculates the force contribution from a trace over 3D tensors, i.e....
Definition hfx_ri.F:3363
subroutine, public get_idx_to_atom(idx_to_at, bsizes_split, bsizes_orig)
a small utility function that returns the atom corresponding to a block of a split tensor
Definition hfx_ri.F:4109
subroutine, public hfx_ri_pre_scf_calc_tensors(qs_env, ri_data, t_2c_int_ri, t_2c_int_pot, t_3c_int, do_kpoints)
Calculate 2-center and 3-center integrals.
Definition hfx_ri.F:444
subroutine, public hfx_ri_update_forces(qs_env, ri_data, nspins, hf_fraction, rho_ao, rho_ao_resp, mos, use_virial, resp_only, rescale_factor)
the general routine that calls the relevant force code
Definition hfx_ri.F:3039
subroutine, public print_ri_hfx(ri_data, qs_env)
Print RI-HFX quantities, as required by the PRINT subsection.
Definition hfx_ri.F:3622
subroutine, public get_2c_der_force(force, t_2c_contr, t_2c_der, atom_of_kind, kind_of, idx_to_at, pref, do_mp2, do_ovlp)
Update the forces due to the derivative of the a 2-center product d/dR (Q|R)
Definition hfx_ri.F:3449
Types and set/get functions for HFX.
Definition hfx_types.F:15
subroutine, public alloc_containers(data, bin_size)
...
Definition hfx_types.F:2905
subroutine, public dealloc_containers(data, memory_usage)
...
Definition hfx_types.F:2873
subroutine, public hfx_ri_init(ri_data, qs_kind_set, particle_set, atomic_kind_set, para_env)
...
Definition hfx_types.F:1199
subroutine, public hfx_ri_release(ri_data, write_stats)
...
Definition hfx_types.F:1464
collects all constants needed in input so that they can be used without circular dependencies
integer, parameter, public hfx_ri_do_2c_cholesky
integer, parameter, public hfx_ri_do_2c_diag
integer, parameter, public hfx_ri_do_2c_iter
function that builds the hartree fock exchange section of the input
integer, parameter, public ri_pmat
integer, parameter, public ri_mo
objects that represent the structure of input sections and the data contained in an input section
recursive type(section_vals_type) function, pointer, public section_vals_get_subs_vals(section_vals, subsection_name, i_rep_section, can_return_null)
returns the values of the requested subsection
subroutine, public section_vals_val_get(section_vals, keyword_name, i_rep_section, i_rep_val, n_rep_val, val, l_val, i_val, r_val, c_val, l_vals, i_vals, r_vals, c_vals, explicit)
returns the requested value
Routines useful for iterative matrix calculations.
subroutine, public invert_hotelling(matrix_inverse, matrix, threshold, use_inv_as_guess, norm_convergence, filter_eps, accelerator_order, max_iter_lanczos, eps_lanczos, silent)
invert a symmetric positive definite matrix by Hotelling's method explicit symmetrization makes this ...
subroutine, public matrix_sqrt_newton_schulz(matrix_sqrt, matrix_sqrt_inv, matrix, threshold, order, eps_lanczos, max_iter_lanczos, symmetrize, converged, iounit)
compute the sqrt of a matrix via the sign function and the corresponding Newton-Schulz iterations the...
Defines the basic variable types.
Definition kinds.F:23
integer, parameter, public int_8
Definition kinds.F:54
integer, parameter, public dp
Definition kinds.F:34
integer, parameter, public default_string_length
Definition kinds.F:57
Machine interface based on Fortran 2003 and POSIX.
Definition machine.F:17
real(kind=dp) function, public m_walltime()
returns time from a real-time clock, protected against rolling early/easily
Definition machine.F:147
Interface to the message passing library MPI.
Define methods related to particle_type.
subroutine, public get_particle_set(particle_set, qs_kind_set, first_sgf, last_sgf, nsgf, nmao, basis)
Get the components of a particle set.
Define the data structure for the particle information.
subroutine, public get_qs_env(qs_env, atomic_kind_set, qs_kind_set, cell, super_cell, cell_ref, use_ref_cell, kpoints, dft_control, mos, sab_orb, sab_all, qmmm, qmmm_periodic, sac_ae, sac_ppl, sac_lri, sap_ppnl, sab_vdw, sab_scp, sap_oce, sab_lrc, sab_se, sab_xtbe, sab_tbe, sab_core, sab_xb, sab_xtb_pp, sab_xtb_nonbond, sab_almo, sab_kp, sab_kp_nosym, particle_set, energy, force, matrix_h, matrix_h_im, matrix_ks, matrix_ks_im, matrix_vxc, run_rtp, rtp, matrix_h_kp, matrix_h_im_kp, matrix_ks_kp, matrix_ks_im_kp, matrix_vxc_kp, kinetic_kp, matrix_s_kp, matrix_w_kp, matrix_s_ri_aux_kp, matrix_s, matrix_s_ri_aux, matrix_w, matrix_p_mp2, matrix_p_mp2_admm, rho, rho_xc, pw_env, ewald_env, ewald_pw, active_space, mpools, input, para_env, blacs_env, scf_control, rel_control, kinetic, qs_charges, vppl, rho_core, rho_nlcc, rho_nlcc_g, ks_env, ks_qmmm_env, wf_history, scf_env, local_particles, local_molecules, distribution_2d, dbcsr_dist, molecule_kind_set, molecule_set, subsys, cp_subsys, oce, local_rho_set, rho_atom_set, task_list, task_list_soft, rho0_atom_set, rho0_mpole, rhoz_set, ecoul_1c, rho0_s_rs, rho0_s_gs, do_kpoints, has_unit_metric, requires_mo_derivs, mo_derivs, mo_loc_history, nkind, natom, nelectron_total, nelectron_spin, efield, neighbor_list_id, linres_control, xas_env, virial, cp_ddapc_env, cp_ddapc_ewald, outer_scf_history, outer_scf_ihistory, x_data, et_coupling, dftb_potential, results, se_taper, se_store_int_env, se_nddo_mpole, se_nonbond_env, admm_env, lri_env, lri_density, exstate_env, ec_env, harris_env, dispersion_env, gcp_env, vee, rho_external, external_vxc, mask, mp2_env, bs_env, kg_env, wanniercentres, atprop, ls_scf_env, do_transport, transport_env, v_hartree_rspace, s_mstruct_changed, rho_changed, potential_changed, forces_up_to_date, mscfg_env, almo_scf_env, gradient_history, variable_history, embed_pot, spin_embed_pot, polar_env, mos_last_converged, eeq, rhs)
Get the QUICKSTEP environment.
Some utility functions for the calculation of integrals.
subroutine, public basis_set_list_setup(basis_set_list, basis_type, qs_kind_set)
Set up an easy accessible list of the basis sets for all kinds.
Calculate the interaction radii for the operator matrix calculation.
subroutine, public init_interaction_radii_orb_basis(orb_basis_set, eps_pgf_orb, eps_pgf_short)
...
Define the quickstep kind type and their sub types.
Definition and initialisation of the mo data type.
Definition qs_mo_types.F:22
subroutine, public get_mo_set(mo_set, maxocc, homo, lfomo, nao, nelectron, n_el_f, nmo, eigenvalues, occupation_numbers, mo_coeff, mo_coeff_b, uniform_occupation, kts, mu, flexible_electron_count)
Get the components of a MO set data structure.
Define the neighbor list data types and the corresponding functionality.
subroutine, public release_neighbor_list_sets(nlists)
releases an array of neighbor_list_sets
superstucture that hold various representations of the density and keeps track of which ones are vali...
subroutine, public qs_rho_get(rho_struct, rho_ao, rho_ao_im, rho_ao_kp, rho_ao_im_kp, rho_r, drho_r, rho_g, drho_g, tau_r, tau_g, rho_r_valid, drho_r_valid, rho_g_valid, drho_g_valid, tau_r_valid, tau_g_valid, tot_rho_r, tot_rho_g, rho_r_sccs, soft_valid, complex_rho_ao)
returns info about the density described by this object. If some representation is not available an e...
Utility methods to build 3-center integral tensors of various types.
subroutine, public distribution_3d_create(dist_3d, dist1, dist2, dist3, nkind, particle_set, mp_comm_3d, own_comm)
Create a 3d distribution.
subroutine, public create_2c_tensor(t2c, dist_1, dist_2, pgrid, sizes_1, sizes_2, order, name)
...
subroutine, public split_block_sizes(blk_sizes, blk_sizes_split, max_size)
...
subroutine, public create_tensor_batches(sizes, nbatches, starts_array, ends_array, starts_array_block, ends_array_block)
...
subroutine, public create_3c_tensor(t3c, dist_1, dist_2, dist_3, pgrid, sizes_1, sizes_2, sizes_3, map1, map2, name)
...
Utility methods to build 3-center integral tensors of various types.
Definition qs_tensors.F:11
subroutine, public build_2c_integrals(t2c, filter_eps, qs_env, nl_2c, basis_i, basis_j, potential_parameter, do_kpoints, do_hfx_kpoints, ext_kpoints, regularization_ri)
...
subroutine, public calc_3c_virial(work_virial, t3c_trace, pref, qs_env, nl_3c, basis_i, basis_j, basis_k, potential_parameter, der_eps, op_pos)
Calculates the 3c virial contributions on the fly.
subroutine, public build_3c_derivatives(t3c_der_i, t3c_der_k, filter_eps, qs_env, nl_3c, basis_i, basis_j, basis_k, potential_parameter, der_eps, op_pos, do_kpoints, do_hfx_kpoints, bounds_i, bounds_j, bounds_k, ri_range, img_to_ri_cell)
Build 3-center derivative tensors.
Definition qs_tensors.F:926
subroutine, public build_2c_neighbor_lists(ij_list, basis_i, basis_j, potential_parameter, name, qs_env, sym_ij, molecular, dist_2d, pot_to_rad)
Build 2-center neighborlists adapted to different operators This mainly wraps build_neighbor_lists fo...
Definition qs_tensors.F:143
subroutine, public build_3c_integrals(t3c, filter_eps, qs_env, nl_3c, basis_i, basis_j, basis_k, potential_parameter, int_eps, op_pos, do_kpoints, do_hfx_kpoints, desymmetrize, cell_sym, bounds_i, bounds_j, bounds_k, ri_range, img_to_ri_cell, cell_to_index_ext)
Build 3-center integral tensor.
subroutine, public compress_tensor(tensor, blk_indices, compressed, eps, memory)
...
subroutine, public neighbor_list_3c_destroy(ijk_list)
Destroy 3c neighborlist.
Definition qs_tensors.F:383
subroutine, public calc_2c_virial(work_virial, t2c_trace, pref, qs_env, nl_2c, basis_i, basis_j, potential_parameter)
Calculates the virial coming from 2c derivatives on the fly.
subroutine, public decompress_tensor(tensor, blk_indices, compressed, eps)
...
subroutine, public build_2c_derivatives(t2c_der, filter_eps, qs_env, nl_2c, basis_i, basis_j, potential_parameter, do_kpoints)
Calculates the derivatives of 2-center integrals, wrt to the first center.
subroutine, public get_tensor_occupancy(tensor, nze, occ)
...
subroutine, public build_3c_neighbor_lists(ijk_list, basis_i, basis_j, basis_k, dist_3d, potential_parameter, name, qs_env, sym_ij, sym_jk, sym_ik, molecular, op_pos, own_dist)
Build a 3-center neighbor list.
Definition qs_tensors.F:282
All kind of helpful little routines.
Definition util.F:14
Provides all information about an atomic kind.
Type defining parameters related to the simulation cell.
Definition cell_types.F:55
represent a blacs multidimensional parallel environment (for the mpi corrispective see cp_paratypes/m...
keeps the information about the structure of a full matrix
represent a full matrix
type of a logger, at the moment it contains just a print level starting at which level it should be l...
distributes pairs on a 2d grid of processors
stores all the informations relevant to an mpi environment
Provides all information about a quickstep kind.
calculation environment to calculate the ks matrix, holds all the needed vars. assumes that the core ...
keeps the density in various representations, keeping track of which ones are valid.