(git:1f285aa)
hfx_ri.F
Go to the documentation of this file.
1 !--------------------------------------------------------------------------------------------------!
2 ! CP2K: A general program to perform molecular dynamics simulations !
3 ! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4 ! !
5 ! SPDX-License-Identifier: GPL-2.0-or-later !
6 !--------------------------------------------------------------------------------------------------!
7 
8 ! **************************************************************************************************
9 !> \brief RI-methods for HFX
10 ! **************************************************************************************************
11 
12 MODULE hfx_ri
13 
14  USE omp_lib, ONLY: omp_get_num_threads,&
15  omp_get_thread_num
16  USE arnoldi_api, ONLY: arnoldi_extremal
17  USE atomic_kind_types, ONLY: atomic_kind_type,&
19  USE basis_set_types, ONLY: gto_basis_set_p_type,&
20  gto_basis_set_type
21  USE cell_types, ONLY: cell_type
22  USE cp_blacs_env, ONLY: cp_blacs_env_type
23  USE cp_control_types, ONLY: dft_control_type
26  USE cp_dbcsr_diag, ONLY: cp_dbcsr_power
33  cp_fm_struct_type
34  USE cp_fm_types, ONLY: cp_fm_create,&
35  cp_fm_release,&
36  cp_fm_type,&
39  cp_logger_type
40  USE cp_output_handling, ONLY: cp_p_file,&
44  USE dbcsr_api, ONLY: &
45  dbcsr_add, dbcsr_add_on_diag, dbcsr_complete_redistribute, dbcsr_copy, dbcsr_create, &
46  dbcsr_desymmetrize, dbcsr_distribution_get, dbcsr_distribution_release, &
47  dbcsr_distribution_type, dbcsr_dot, dbcsr_filter, dbcsr_frobenius_norm, dbcsr_get_info, &
48  dbcsr_get_num_blocks, dbcsr_multiply, dbcsr_p_type, dbcsr_release, dbcsr_scale, &
49  dbcsr_type, dbcsr_type_antisymmetric, dbcsr_type_no_symmetry, dbcsr_type_symmetric
50  USE dbt_api, ONLY: &
51  dbt_batched_contract_finalize, dbt_batched_contract_init, dbt_clear, dbt_contract, &
52  dbt_copy, dbt_copy_matrix_to_tensor, dbt_copy_tensor_to_matrix, dbt_create, &
53  dbt_default_distvec, dbt_destroy, dbt_distribution_destroy, dbt_distribution_new, &
54  dbt_distribution_type, dbt_filter, dbt_get_block, dbt_get_info, dbt_get_num_blocks_total, &
55  dbt_iterator_blocks_left, dbt_iterator_next_block, dbt_iterator_start, dbt_iterator_stop, &
56  dbt_iterator_type, dbt_mp_environ_pgrid, dbt_nd_mp_comm, dbt_pgrid_create, &
57  dbt_pgrid_destroy, dbt_pgrid_type, dbt_put_block, dbt_reserve_blocks, dbt_scale, dbt_type
58  USE distribution_2d_types, ONLY: distribution_2d_type
59  USE hfx_types, ONLY: alloc_containers,&
60  block_ind_type,&
62  hfx_compression_type,&
63  hfx_ri_init,&
65  hfx_ri_type
69  USE input_cp2k_hfx, ONLY: ri_mo,&
70  ri_pmat
72  section_vals_type,&
74  USE iterate_matrix, ONLY: invert_hotelling,&
76  USE kinds, ONLY: default_string_length,&
77  dp,&
78  int_8
79  USE machine, ONLY: m_walltime
80  USE message_passing, ONLY: mp_cart_type,&
81  mp_comm_type,&
82  mp_para_env_type
84  USE particle_types, ONLY: particle_type
85  USE qs_environment_types, ONLY: get_qs_env,&
86  qs_environment_type
87  USE qs_force_types, ONLY: qs_force_type
90  USE qs_kind_types, ONLY: qs_kind_type
91  USE qs_ks_types, ONLY: qs_ks_env_type
92  USE qs_mo_types, ONLY: get_mo_set,&
93  mo_set_type
94  USE qs_neighbor_list_types, ONLY: neighbor_list_set_p_type,&
96  USE qs_rho_types, ONLY: qs_rho_get,&
97  qs_rho_type
98  USE qs_tensors, ONLY: &
106  distribution_3d_type,&
107  neighbor_list_3c_type,&
109  USE util, ONLY: sort
110  USE virial_types, ONLY: virial_type
111 #include "./base/base_uses.f90"
112 
113 !$ USE OMP_LIB, ONLY: omp_get_num_threads
114 
115  IMPLICIT NONE
116  PRIVATE
117 
120 
121  CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'hfx_ri'
122 CONTAINS
123 
124 ! **************************************************************************************************
125 !> \brief Switches the RI_FLAVOR from MO to RHO or vice-versa
126 !> \param ri_data ...
127 !> \param qs_env ...
128 !> \note As a side product, the ri_data is mostly reinitialized and the integrals recomputed
129 ! **************************************************************************************************
130  SUBROUTINE switch_ri_flavor(ri_data, qs_env)
131  TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
132  TYPE(qs_environment_type), POINTER :: qs_env
133 
134  CHARACTER(LEN=*), PARAMETER :: routineN = 'switch_ri_flavor'
135 
136  INTEGER :: handle, n_mem, new_flavor
137  TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
138  TYPE(dft_control_type), POINTER :: dft_control
139  TYPE(mp_para_env_type), POINTER :: para_env
140  TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
141  TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
142 
143  NULLIFY (qs_kind_set, particle_set, atomic_kind_set, para_env, dft_control)
144 
145  CALL timeset(routinen, handle)
146 
147  CALL get_qs_env(qs_env, para_env=para_env, dft_control=dft_control, atomic_kind_set=atomic_kind_set, &
148  particle_set=particle_set, qs_kind_set=qs_kind_set)
149 
150  CALL hfx_ri_release(ri_data, write_stats=.false.)
151 
152  IF (ri_data%flavor == ri_pmat) THEN
153  new_flavor = ri_mo
154  ELSE
155  new_flavor = ri_pmat
156  END IF
157  ri_data%flavor = new_flavor
158 
159  n_mem = ri_data%n_mem_input
160  ri_data%n_mem_input = ri_data%n_mem_flavor_switch
161  ri_data%n_mem_flavor_switch = n_mem
162 
163  CALL hfx_ri_init(ri_data, qs_kind_set, particle_set, atomic_kind_set, para_env)
164 
165  !Need to recalculate the integrals in the new flavor
166  !TODO: should we backup the integrals and symmetrize/desymmetrize them instead of recomputing ?!?
167  ! that only makes sense if actual integral calculation is not negligible
168  IF (ri_data%flavor == ri_pmat) THEN
169  CALL hfx_ri_pre_scf_pmat(qs_env, ri_data)
170  ELSE
171  CALL hfx_ri_pre_scf_mo(qs_env, ri_data, dft_control%nspins)
172  END IF
173 
174  IF (ri_data%unit_nr > 0) THEN
175  IF (ri_data%flavor == ri_pmat) THEN
176  WRITE (ri_data%unit_nr, '(T2,A)') "HFX_RI_INFO| temporarily switched to RI_FLAVOR RHO"
177  ELSE
178  WRITE (ri_data%unit_nr, '(T2,A)') "HFX_RI_INFO| temporarily switched to RI_FLAVOR MO"
179  END IF
180  END IF
181 
182  CALL timestop(handle)
183 
184  END SUBROUTINE switch_ri_flavor
185 
186 ! **************************************************************************************************
187 !> \brief Pre-SCF steps in MO flavor of RI HFX
188 !>
189 !> Calculate 2-center & 3-center integrals (see hfx_ri_pre_scf_calc_tensors) and contract
190 !> K(P, S) = sum_R K_2(P, R)^{-1} K_1(R, S)^{1/2}
191 !> B(mu, lambda, R) = sum_P int_3c(mu, lambda, P) K(P, R)
192 !> \param qs_env ...
193 !> \param ri_data ...
194 !> \param nspins ...
195 ! **************************************************************************************************
196  SUBROUTINE hfx_ri_pre_scf_mo(qs_env, ri_data, nspins)
197  TYPE(qs_environment_type), POINTER :: qs_env
198  TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
199  INTEGER, INTENT(IN) :: nspins
200 
201  CHARACTER(LEN=*), PARAMETER :: routineN = 'hfx_ri_pre_scf_mo'
202 
203  INTEGER :: handle, handle2, ispin, n_dependent, &
204  unit_nr, unit_nr_dbcsr
205  REAL(KIND=dp) :: threshold
206  TYPE(cp_blacs_env_type), POINTER :: blacs_env
207  TYPE(dbcsr_type), DIMENSION(1) :: dbcsr_work_1, dbcsr_work_2, t_2c_int_mat, t_2c_op_pot, &
208  t_2c_op_pot_sqrt, t_2c_op_pot_sqrt_inv, t_2c_op_RI, t_2c_op_RI_inv
209  TYPE(dbt_type), ALLOCATABLE, DIMENSION(:) :: t_2c_int, t_2c_work
210  TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :) :: t_3c_int
211  TYPE(mp_para_env_type), POINTER :: para_env
212 
213  CALL timeset(routinen, handle)
214 
215  unit_nr_dbcsr = ri_data%unit_nr_dbcsr
216  unit_nr = ri_data%unit_nr
217 
218  CALL get_qs_env(qs_env, para_env=para_env, blacs_env=blacs_env)
219 
220  CALL timeset(routinen//"_int", handle2)
221 
222  ALLOCATE (t_2c_int(1), t_2c_work(1), t_3c_int(1, 1))
223  CALL hfx_ri_pre_scf_calc_tensors(qs_env, ri_data, t_2c_op_ri, t_2c_op_pot, t_3c_int)
224 
225  CALL timestop(handle2)
226 
227  CALL timeset(routinen//"_2c", handle2)
228  IF (.NOT. ri_data%same_op) THEN
229  SELECT CASE (ri_data%t2c_method)
230  CASE (hfx_ri_do_2c_iter)
231  CALL dbcsr_create(t_2c_op_ri_inv(1), template=t_2c_op_ri(1), matrix_type=dbcsr_type_no_symmetry)
232  threshold = max(ri_data%filter_eps, 1.0e-12_dp)
233  CALL invert_hotelling(t_2c_op_ri_inv(1), t_2c_op_ri(1), threshold=threshold, silent=.false.)
234  CASE (hfx_ri_do_2c_cholesky)
235  CALL dbcsr_copy(t_2c_op_ri_inv(1), t_2c_op_ri(1))
236  CALL cp_dbcsr_cholesky_decompose(t_2c_op_ri_inv(1), para_env=para_env, blacs_env=blacs_env)
237  CALL cp_dbcsr_cholesky_invert(t_2c_op_ri_inv(1), para_env=para_env, blacs_env=blacs_env, upper_to_full=.true.)
238  CASE (hfx_ri_do_2c_diag)
239  CALL dbcsr_copy(t_2c_op_ri_inv(1), t_2c_op_ri(1))
240  CALL cp_dbcsr_power(t_2c_op_ri_inv(1), -1.0_dp, ri_data%eps_eigval, n_dependent, &
241  para_env, blacs_env, verbose=ri_data%unit_nr_dbcsr > 0)
242  END SELECT
243 
244  IF (ri_data%check_2c_inv) THEN
245  CALL check_inverse(t_2c_op_ri_inv(1), t_2c_op_ri(1), unit_nr=unit_nr)
246  END IF
247 
248  CALL dbcsr_release(t_2c_op_ri(1))
249 
250  SELECT CASE (ri_data%t2c_method)
251  CASE (hfx_ri_do_2c_iter)
252  CALL dbcsr_create(t_2c_op_pot_sqrt(1), template=t_2c_op_pot(1), matrix_type=dbcsr_type_symmetric)
253  CALL dbcsr_create(t_2c_op_pot_sqrt_inv(1), template=t_2c_op_pot(1), matrix_type=dbcsr_type_symmetric)
254  CALL matrix_sqrt_newton_schulz(t_2c_op_pot_sqrt(1), t_2c_op_pot_sqrt_inv(1), t_2c_op_pot(1), &
255  ri_data%filter_eps, ri_data%t2c_sqrt_order, ri_data%eps_lanczos, &
256  ri_data%max_iter_lanczos)
257 
258  CALL dbcsr_release(t_2c_op_pot_sqrt_inv(1))
260  CALL dbcsr_copy(t_2c_op_pot_sqrt(1), t_2c_op_pot(1))
261  CALL cp_dbcsr_power(t_2c_op_pot_sqrt(1), 0.5_dp, ri_data%eps_eigval, n_dependent, &
262  para_env, blacs_env, verbose=ri_data%unit_nr_dbcsr > 0)
263  END SELECT
264 
265  !We need S^-1 and (P|Q) for the forces.
266  CALL dbt_create(t_2c_op_ri_inv(1), t_2c_work(1))
267  CALL dbt_copy_matrix_to_tensor(t_2c_op_ri_inv(1), t_2c_work(1))
268  CALL dbt_copy(t_2c_work(1), ri_data%t_2c_inv(1, 1), move_data=.true.)
269  CALL dbt_destroy(t_2c_work(1))
270  CALL dbt_filter(ri_data%t_2c_inv(1, 1), ri_data%filter_eps)
271 
272  CALL dbt_create(t_2c_op_pot(1), t_2c_work(1))
273  CALL dbt_copy_matrix_to_tensor(t_2c_op_pot(1), t_2c_work(1))
274  CALL dbt_copy(t_2c_work(1), ri_data%t_2c_pot(1, 1), move_data=.true.)
275  CALL dbt_destroy(t_2c_work(1))
276  CALL dbt_filter(ri_data%t_2c_pot(1, 1), ri_data%filter_eps)
277 
278  IF (ri_data%check_2c_inv) THEN
279  CALL check_sqrt(t_2c_op_pot(1), matrix_sqrt=t_2c_op_pot_sqrt(1), unit_nr=unit_nr)
280  END IF
281  CALL dbcsr_create(t_2c_int_mat(1), template=t_2c_op_pot(1), matrix_type=dbcsr_type_no_symmetry)
282  CALL dbcsr_multiply("N", "N", 1.0_dp, t_2c_op_ri_inv(1), t_2c_op_pot_sqrt(1), &
283  0.0_dp, t_2c_int_mat(1), filter_eps=ri_data%filter_eps)
284  CALL dbcsr_release(t_2c_op_ri_inv(1))
285  CALL dbcsr_release(t_2c_op_pot_sqrt(1))
286  ELSE
287  SELECT CASE (ri_data%t2c_method)
288  CASE (hfx_ri_do_2c_iter)
289  CALL dbcsr_create(t_2c_int_mat(1), template=t_2c_op_pot(1), matrix_type=dbcsr_type_symmetric)
290  CALL dbcsr_create(t_2c_op_pot_sqrt(1), template=t_2c_op_pot(1), matrix_type=dbcsr_type_symmetric)
291  CALL matrix_sqrt_newton_schulz(t_2c_op_pot_sqrt(1), t_2c_int_mat(1), t_2c_op_pot(1), &
292  ri_data%filter_eps, ri_data%t2c_sqrt_order, ri_data%eps_lanczos, &
293  ri_data%max_iter_lanczos)
294  CALL dbcsr_release(t_2c_op_pot_sqrt(1))
296  CALL dbcsr_copy(t_2c_int_mat(1), t_2c_op_pot(1))
297  CALL cp_dbcsr_power(t_2c_int_mat(1), -0.5_dp, ri_data%eps_eigval, n_dependent, &
298  para_env, blacs_env, verbose=ri_data%unit_nr_dbcsr > 0)
299  END SELECT
300  IF (ri_data%check_2c_inv) THEN
301  CALL check_sqrt(t_2c_op_pot(1), matrix_sqrt_inv=t_2c_int_mat(1), unit_nr=unit_nr)
302  END IF
303 
304  !We need (P|Q)^-1 for the forces
305  CALL dbcsr_copy(dbcsr_work_1(1), t_2c_int_mat(1))
306  CALL dbcsr_create(dbcsr_work_2(1), template=t_2c_int_mat(1))
307  CALL dbcsr_multiply("N", "N", 1.0_dp, dbcsr_work_1(1), t_2c_int_mat(1), 0.0_dp, dbcsr_work_2(1))
308  CALL dbcsr_release(dbcsr_work_1(1))
309  CALL dbt_create(dbcsr_work_2(1), t_2c_work(1))
310  CALL dbt_copy_matrix_to_tensor(dbcsr_work_2(1), t_2c_work(1))
311  CALL dbcsr_release(dbcsr_work_2(1))
312  CALL dbt_copy(t_2c_work(1), ri_data%t_2c_inv(1, 1), move_data=.true.)
313  CALL dbt_destroy(t_2c_work(1))
314  CALL dbt_filter(ri_data%t_2c_inv(1, 1), ri_data%filter_eps)
315  END IF
316 
317  CALL dbcsr_release(t_2c_op_pot(1))
318 
319  CALL dbt_create(t_2c_int_mat(1), t_2c_int(1), name="(RI|RI)")
320  CALL dbt_copy_matrix_to_tensor(t_2c_int_mat(1), t_2c_int(1))
321  CALL dbcsr_release(t_2c_int_mat(1))
322  DO ispin = 1, nspins
323  CALL dbt_copy(t_2c_int(1), ri_data%t_2c_int(ispin, 1))
324  END DO
325  CALL dbt_destroy(t_2c_int(1))
326  CALL timestop(handle2)
327 
328  CALL timeset(routinen//"_3c", handle2)
329  CALL dbt_copy(t_3c_int(1, 1), ri_data%t_3c_int_ctr_1(1, 1), order=[2, 1, 3], move_data=.true.)
330  CALL dbt_filter(ri_data%t_3c_int_ctr_1(1, 1), ri_data%filter_eps)
331  CALL dbt_copy(ri_data%t_3c_int_ctr_1(1, 1), ri_data%t_3c_int_ctr_2(1, 1))
332  CALL dbt_destroy(t_3c_int(1, 1))
333  CALL timestop(handle2)
334 
335  CALL timestop(handle)
336 
337  END SUBROUTINE
338 
339 ! **************************************************************************************************
340 !> \brief ...
341 !> \param matrix_1 ...
342 !> \param matrix_2 ...
343 !> \param name ...
344 !> \param unit_nr ...
345 ! **************************************************************************************************
346  SUBROUTINE check_inverse(matrix_1, matrix_2, name, unit_nr)
347  TYPE(dbcsr_type), INTENT(INOUT) :: matrix_1, matrix_2
348  CHARACTER(len=*), INTENT(IN), OPTIONAL :: name
349  INTEGER, INTENT(IN) :: unit_nr
350 
351  CHARACTER(len=default_string_length) :: name_prv
352  REAL(kind=dp) :: error, frob_matrix, frob_matrix_base
353  TYPE(dbcsr_type) :: matrix_tmp
354 
355  IF (PRESENT(name)) THEN
356  name_prv = name
357  ELSE
358  CALL dbcsr_get_info(matrix_1, name=name_prv)
359  END IF
360 
361  CALL dbcsr_create(matrix_tmp, template=matrix_1)
362  CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_1, matrix_2, &
363  0.0_dp, matrix_tmp)
364  frob_matrix_base = dbcsr_frobenius_norm(matrix_tmp)
365  CALL dbcsr_add_on_diag(matrix_tmp, -1.0_dp)
366  frob_matrix = dbcsr_frobenius_norm(matrix_tmp)
367  error = frob_matrix/frob_matrix_base
368  IF (unit_nr > 0) THEN
369  WRITE (unit=unit_nr, fmt="(T3,A,A,A,T73,ES8.1)") &
370  "HFX_RI_INFO| Error for INV(", trim(name_prv), "):", error
371  END IF
372 
373  CALL dbcsr_release(matrix_tmp)
374  END SUBROUTINE
375 
376 ! **************************************************************************************************
377 !> \brief ...
378 !> \param matrix ...
379 !> \param matrix_sqrt ...
380 !> \param matrix_sqrt_inv ...
381 !> \param name ...
382 !> \param unit_nr ...
383 ! **************************************************************************************************
384  SUBROUTINE check_sqrt(matrix, matrix_sqrt, matrix_sqrt_inv, name, unit_nr)
385  TYPE(dbcsr_type), INTENT(INOUT) :: matrix
386  TYPE(dbcsr_type), INTENT(IN), OPTIONAL :: matrix_sqrt, matrix_sqrt_inv
387  CHARACTER(len=*), INTENT(IN), OPTIONAL :: name
388  INTEGER, INTENT(IN) :: unit_nr
389 
390  CHARACTER(len=default_string_length) :: name_prv
391  REAL(kind=dp) :: frob_matrix
392  TYPE(dbcsr_type) :: matrix_copy, matrix_tmp
393 
394  IF (PRESENT(name)) THEN
395  name_prv = name
396  ELSE
397  CALL dbcsr_get_info(matrix, name=name_prv)
398  END IF
399  IF (PRESENT(matrix_sqrt)) THEN
400  CALL dbcsr_create(matrix_tmp, template=matrix)
401  CALL dbcsr_copy(matrix_copy, matrix_sqrt)
402  CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_sqrt, matrix_copy, &
403  0.0_dp, matrix_tmp)
404  CALL dbcsr_add(matrix_tmp, matrix, 1.0_dp, -1.0_dp)
405  frob_matrix = dbcsr_frobenius_norm(matrix_tmp)
406  IF (unit_nr > 0) THEN
407  WRITE (unit=unit_nr, fmt="(T3,A,A,A,T73,ES8.1)") &
408  "HFX_RI_INFO| Error for SQRT(", trim(name_prv), "):", frob_matrix
409  END IF
410  CALL dbcsr_release(matrix_tmp)
411  CALL dbcsr_release(matrix_copy)
412  END IF
413 
414  IF (PRESENT(matrix_sqrt_inv)) THEN
415  CALL dbcsr_create(matrix_tmp, template=matrix)
416  CALL dbcsr_copy(matrix_copy, matrix_sqrt_inv)
417  CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_sqrt_inv, matrix_copy, &
418  0.0_dp, matrix_tmp)
419  CALL check_inverse(matrix_tmp, matrix, name="SQRT("//trim(name_prv)//")", unit_nr=unit_nr)
420  CALL dbcsr_release(matrix_tmp)
421  CALL dbcsr_release(matrix_copy)
422  END IF
423 
424  END SUBROUTINE
425 
426 ! **************************************************************************************************
427 !> \brief Calculate 2-center and 3-center integrals
428 !>
429 !> 2c: K_1(P, R) = (P|v1|R) and K_2(P, R) = (P|v2|R)
430 !> 3c: int_3c(mu, lambda, P) = (mu lambda |v2| P)
431 !> v_1 is HF operator, v_2 is RI metric
432 !> \param qs_env ...
433 !> \param ri_data ...
434 !> \param t_2c_int_RI K_2(P, R) note: even with k-point, only need on central cell
435 !> \param t_2c_int_pot K_1(P, R)
436 !> \param t_3c_int int_3c(mu, lambda, P)
437 !> \param do_kpoints ...
438 !> \notes the integral tensor arrays are already allocated on entry
439 ! **************************************************************************************************
440  SUBROUTINE hfx_ri_pre_scf_calc_tensors(qs_env, ri_data, t_2c_int_RI, t_2c_int_pot, t_3c_int, do_kpoints)
441  TYPE(qs_environment_type), POINTER :: qs_env
442  TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
443  TYPE(dbcsr_type), DIMENSION(:), INTENT(OUT) :: t_2c_int_ri, t_2c_int_pot
444  TYPE(dbt_type), DIMENSION(:, :) :: t_3c_int
445  LOGICAL, INTENT(IN), OPTIONAL :: do_kpoints
446 
447  CHARACTER(LEN=*), PARAMETER :: routinen = 'hfx_ri_pre_scf_calc_tensors'
448 
449  CHARACTER :: symm
450  INTEGER :: handle, i_img, i_mem, ibasis, j_img, &
451  n_mem, natom, nblks, nimg, nkind, &
452  nthreads
453  INTEGER(int_8) :: nze
454  INTEGER, ALLOCATABLE, DIMENSION(:) :: dist_ao_1, dist_ao_2, dist_ri, dist_ri_ext, &
455  ends_array_mc_block_int, ends_array_mc_int, sizes_ao, sizes_ri, sizes_ri_ext, &
456  sizes_ri_ext_split, starts_array_mc_block_int, starts_array_mc_int
457  INTEGER, DIMENSION(3) :: pcoord, pdims
458  INTEGER, DIMENSION(:), POINTER :: col_bsize, row_bsize
459  LOGICAL :: converged, do_kpoints_prv
460  REAL(dp) :: max_ev, min_ev, occ, ri_range
461  TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
462  TYPE(dbcsr_distribution_type) :: dbcsr_dist
463  TYPE(dbt_distribution_type) :: t_dist
464  TYPE(dbt_pgrid_type) :: pgrid
465  TYPE(dbt_type) :: t_3c_tmp
466  TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :) :: t_3c_int_batched
467  TYPE(dft_control_type), POINTER :: dft_control
468  TYPE(distribution_2d_type), POINTER :: dist_2d
469  TYPE(distribution_3d_type) :: dist_3d
470  TYPE(gto_basis_set_p_type), ALLOCATABLE, &
471  DIMENSION(:), TARGET :: basis_set_ao, basis_set_ri
472  TYPE(gto_basis_set_type), POINTER :: orb_basis, ri_basis
473  TYPE(mp_cart_type) :: mp_comm_t3c
474  TYPE(mp_para_env_type), POINTER :: para_env
475  TYPE(neighbor_list_3c_type) :: nl_3c
476  TYPE(neighbor_list_set_p_type), DIMENSION(:), &
477  POINTER :: nl_2c_pot, nl_2c_ri
478  TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
479  TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
480  TYPE(qs_ks_env_type), POINTER :: ks_env
481 
482  CALL timeset(routinen, handle)
483  NULLIFY (col_bsize, row_bsize, dist_2d, nl_2c_pot, nl_2c_ri, &
484  particle_set, qs_kind_set, ks_env, para_env)
485 
486  CALL get_qs_env(qs_env, natom=natom, nkind=nkind, qs_kind_set=qs_kind_set, particle_set=particle_set, &
487  distribution_2d=dist_2d, ks_env=ks_env, dft_control=dft_control, para_env=para_env)
488 
489  ri_range = 0.0_dp
490  do_kpoints_prv = .false.
491  IF (PRESENT(do_kpoints)) do_kpoints_prv = do_kpoints
492  nimg = 1
493  IF (do_kpoints_prv) THEN
494  nimg = ri_data%nimg
495  ri_range = ri_data%kp_RI_range
496  END IF
497 
498  ALLOCATE (sizes_ri(natom), sizes_ao(natom))
499  ALLOCATE (basis_set_ri(nkind), basis_set_ao(nkind))
500  CALL basis_set_list_setup(basis_set_ri, ri_data%ri_basis_type, qs_kind_set)
501  CALL get_particle_set(particle_set, qs_kind_set, nsgf=sizes_ri, basis=basis_set_ri)
502  CALL basis_set_list_setup(basis_set_ao, ri_data%orb_basis_type, qs_kind_set)
503  CALL get_particle_set(particle_set, qs_kind_set, nsgf=sizes_ao, basis=basis_set_ao)
504 
505  DO ibasis = 1, SIZE(basis_set_ao)
506  orb_basis => basis_set_ao(ibasis)%gto_basis_set
507  ri_basis => basis_set_ri(ibasis)%gto_basis_set
508  ! interaction radii should be based on eps_pgf_orb controlled in RI section
509  ! (since hartree-fock needs very tight eps_pgf_orb for Kohn-Sham/Fock matrix but eps_pgf_orb
510  ! can be much looser in RI HFX since no systematic error is introduced with tensor sparsity)
511  CALL init_interaction_radii_orb_basis(orb_basis, ri_data%eps_pgf_orb)
512  CALL init_interaction_radii_orb_basis(ri_basis, ri_data%eps_pgf_orb)
513  END DO
514 
515  n_mem = ri_data%n_mem
516  CALL create_tensor_batches(sizes_ri, n_mem, starts_array_mc_int, ends_array_mc_int, &
517  starts_array_mc_block_int, ends_array_mc_block_int)
518 
519  DEALLOCATE (starts_array_mc_int, ends_array_mc_int)
520 
521  !We separate the K-points and standard 3c integrals, because handle quite differently
522  IF (.NOT. do_kpoints_prv) THEN
523 
524  nthreads = 1
525 !$ nthreads = omp_get_num_threads()
526  pdims = 0
527  CALL dbt_pgrid_create(para_env, pdims, pgrid, tensor_dims=[max(1, natom/(n_mem*nthreads)), natom, natom])
528 
529  ALLOCATE (t_3c_int_batched(1, 1))
530  CALL create_3c_tensor(t_3c_int_batched(1, 1), dist_ri, dist_ao_1, dist_ao_2, pgrid, &
531  sizes_ri, sizes_ao, sizes_ao, map1=[1], map2=[2, 3], &
532  name="(RI | AO AO)")
533 
534  CALL get_qs_env(qs_env, nkind=nkind, particle_set=particle_set, atomic_kind_set=atomic_kind_set)
535  CALL dbt_mp_environ_pgrid(pgrid, pdims, pcoord)
536  CALL mp_comm_t3c%create(pgrid%mp_comm_2d, 3, pdims)
537  CALL distribution_3d_create(dist_3d, dist_ri, dist_ao_1, dist_ao_2, &
538  nkind, particle_set, mp_comm_t3c, own_comm=.true.)
539  DEALLOCATE (dist_ri, dist_ao_1, dist_ao_2)
540 
541  CALL create_3c_tensor(t_3c_int(1, 1), dist_ri, dist_ao_1, dist_ao_2, ri_data%pgrid, &
542  ri_data%bsizes_RI_split, ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
543  map1=[1], map2=[2, 3], &
544  name="O (RI AO | AO)")
545 
546  CALL build_3c_neighbor_lists(nl_3c, basis_set_ri, basis_set_ao, basis_set_ao, dist_3d, ri_data%ri_metric, &
547  "HFX_3c_nl", qs_env, op_pos=1, sym_jk=.true., own_dist=.true.)
548 
549  DO i_mem = 1, n_mem
550  CALL build_3c_integrals(t_3c_int_batched, ri_data%filter_eps/2, qs_env, nl_3c, &
551  basis_set_ri, basis_set_ao, basis_set_ao, &
552  ri_data%ri_metric, int_eps=ri_data%eps_schwarz, op_pos=1, &
553  desymmetrize=.false., &
554  bounds_i=[starts_array_mc_block_int(i_mem), ends_array_mc_block_int(i_mem)])
555  CALL dbt_copy(t_3c_int_batched(1, 1), t_3c_int(1, 1), summation=.true., move_data=.true.)
556  CALL dbt_filter(t_3c_int(1, 1), ri_data%filter_eps/2)
557  END DO
558 
559  CALL dbt_destroy(t_3c_int_batched(1, 1))
560 
561  CALL neighbor_list_3c_destroy(nl_3c)
562 
563  CALL dbt_create(t_3c_int(1, 1), t_3c_tmp)
564 
565  IF (ri_data%flavor == ri_pmat) THEN ! desymmetrize
566  ! desymmetrize
567  CALL dbt_copy(t_3c_int(1, 1), t_3c_tmp)
568  CALL dbt_copy(t_3c_tmp, t_3c_int(1, 1), order=[1, 3, 2], summation=.true., move_data=.true.)
569 
570  ! For RI-RHO filter_eps_storage is reserved for screening tensor contracted with RI-metric
571  ! with RI metric but not to bare integral tensor
572  CALL dbt_filter(t_3c_int(1, 1), ri_data%filter_eps)
573  ELSE
574  CALL dbt_filter(t_3c_int(1, 1), ri_data%filter_eps_storage/2)
575  END IF
576 
577  CALL dbt_destroy(t_3c_tmp)
578 
579  ELSE !do_kpoints
580 
581  nthreads = 1
582 !$ nthreads = omp_get_num_threads()
583  pdims = 0
584  CALL dbt_pgrid_create(para_env, pdims, pgrid, tensor_dims=[natom, natom, max(1, natom/(n_mem*nthreads))])
585 
586  !In k-points HFX, we stack all images along the RI direction in the same tensors, in order
587  !to avoid storing nimg x ncell_RI different tensors (very memory intensive)
588  nblks = SIZE(ri_data%bsizes_RI_split)
589  ALLOCATE (sizes_ri_ext(natom*ri_data%ncell_RI), sizes_ri_ext_split(nblks*ri_data%ncell_RI))
590  DO i_img = 1, ri_data%ncell_RI
591  sizes_ri_ext((i_img - 1)*natom + 1:i_img*natom) = sizes_ri(:)
592  sizes_ri_ext_split((i_img - 1)*nblks + 1:i_img*nblks) = ri_data%bsizes_RI_split(:)
593  END DO
594 
595  CALL create_3c_tensor(t_3c_tmp, dist_ao_1, dist_ao_2, dist_ri, &
596  pgrid, sizes_ao, sizes_ao, sizes_ri, map1=[1, 2], map2=[3], &
597  name="(AO AO | RI)")
598  CALL dbt_destroy(t_3c_tmp)
599 
600  !For the integrals to work, the distribution along the RI direction must be repeated
601  ALLOCATE (dist_ri_ext(natom*ri_data%ncell_RI))
602  DO i_img = 1, ri_data%ncell_RI
603  dist_ri_ext((i_img - 1)*natom + 1:i_img*natom) = dist_ri(:)
604  END DO
605 
606  ALLOCATE (t_3c_int_batched(nimg, 1))
607  CALL dbt_distribution_new(t_dist, pgrid, dist_ao_1, dist_ao_2, dist_ri_ext)
608  CALL dbt_create(t_3c_int_batched(1, 1), "KP_3c_ints", t_dist, [1, 2], [3], &
609  sizes_ao, sizes_ao, sizes_ri_ext)
610  DO i_img = 2, nimg
611  CALL dbt_create(t_3c_int_batched(1, 1), t_3c_int_batched(i_img, 1))
612  END DO
613  CALL dbt_distribution_destroy(t_dist)
614 
615  CALL get_qs_env(qs_env, nkind=nkind, particle_set=particle_set, atomic_kind_set=atomic_kind_set)
616  CALL dbt_mp_environ_pgrid(pgrid, pdims, pcoord)
617  CALL mp_comm_t3c%create(pgrid%mp_comm_2d, 3, pdims)
618  CALL distribution_3d_create(dist_3d, dist_ao_1, dist_ao_2, dist_ri, &
619  nkind, particle_set, mp_comm_t3c, own_comm=.true.)
620  DEALLOCATE (dist_ri, dist_ao_1, dist_ao_2)
621 
622  ! create 3c tensor for storage of ints
623  CALL build_3c_neighbor_lists(nl_3c, basis_set_ao, basis_set_ao, basis_set_ri, dist_3d, ri_data%ri_metric, &
624  "HFX_3c_nl", qs_env, op_pos=2, sym_ij=.false., own_dist=.true.)
625 
626  CALL create_3c_tensor(t_3c_int(1, 1), dist_ri, dist_ao_1, dist_ao_2, ri_data%pgrid, &
627  sizes_ri_ext_split, ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
628  map1=[1], map2=[2, 3], &
629  name="O (RI AO | AO)")
630  DO j_img = 2, nimg
631  CALL dbt_create(t_3c_int(1, 1), t_3c_int(1, j_img))
632  END DO
633 
634  CALL dbt_create(t_3c_int(1, 1), t_3c_tmp)
635  DO i_mem = 1, n_mem
636  CALL build_3c_integrals(t_3c_int_batched, ri_data%filter_eps, qs_env, nl_3c, &
637  basis_set_ao, basis_set_ao, basis_set_ri, &
638  ri_data%ri_metric, int_eps=ri_data%eps_schwarz, op_pos=2, &
639  desymmetrize=.false., do_kpoints=.true., do_hfx_kpoints=.true., &
640  bounds_k=[starts_array_mc_block_int(i_mem), ends_array_mc_block_int(i_mem)], &
641  ri_range=ri_range, img_to_ri_cell=ri_data%img_to_RI_cell)
642 
643  DO i_img = 1, nimg
644  !we start with (mu^0 sigma^i | P^j) and finish with (P^i | mu^0 sigma^j)
645  CALL get_tensor_occupancy(t_3c_int_batched(i_img, 1), nze, occ)
646  IF (nze > 0) THEN
647  CALL dbt_copy(t_3c_int_batched(i_img, 1), t_3c_tmp, order=[3, 2, 1], move_data=.true.)
648  CALL dbt_filter(t_3c_tmp, ri_data%filter_eps)
649  CALL dbt_copy(t_3c_tmp, t_3c_int(1, i_img), order=[1, 3, 2], &
650  summation=.true., move_data=.true.)
651  END IF
652  END DO
653  END DO
654 
655  DO i_img = 1, nimg
656  CALL dbt_destroy(t_3c_int_batched(i_img, 1))
657  END DO
658  DEALLOCATE (t_3c_int_batched)
659  CALL neighbor_list_3c_destroy(nl_3c)
660  CALL dbt_destroy(t_3c_tmp)
661  END IF
662  CALL dbt_pgrid_destroy(pgrid)
663 
664  CALL build_2c_neighbor_lists(nl_2c_pot, basis_set_ri, basis_set_ri, ri_data%hfx_pot, &
665  "HFX_2c_nl_pot", qs_env, sym_ij=.NOT. do_kpoints_prv, &
666  dist_2d=dist_2d)
667 
668  CALL cp_dbcsr_dist2d_to_dist(dist_2d, dbcsr_dist)
669  ALLOCATE (row_bsize(SIZE(sizes_ri)))
670  ALLOCATE (col_bsize(SIZE(sizes_ri)))
671  row_bsize(:) = sizes_ri
672  col_bsize(:) = sizes_ri
673 
674  !Use non-symmetric nl and matrices for k-points
675  symm = dbcsr_type_symmetric
676  IF (do_kpoints_prv) symm = dbcsr_type_no_symmetry
677 
678  CALL dbcsr_create(t_2c_int_pot(1), "(R|P) HFX", dbcsr_dist, symm, row_bsize, col_bsize)
679  DO i_img = 2, nimg
680  CALL dbcsr_create(t_2c_int_pot(i_img), template=t_2c_int_pot(1))
681  END DO
682 
683  IF (.NOT. ri_data%same_op) THEN
684  CALL dbcsr_create(t_2c_int_ri(1), "(R|P) HFX", dbcsr_dist, symm, row_bsize, col_bsize)
685  DO i_img = 2, nimg
686  CALL dbcsr_create(t_2c_int_ri(i_img), template=t_2c_int_ri(1))
687  END DO
688  END IF
689  DEALLOCATE (col_bsize, row_bsize)
690 
691  CALL dbcsr_distribution_release(dbcsr_dist)
692 
693  CALL build_2c_integrals(t_2c_int_pot, ri_data%filter_eps_2c, qs_env, nl_2c_pot, basis_set_ri, basis_set_ri, &
694  ri_data%hfx_pot, do_kpoints=do_kpoints_prv, do_hfx_kpoints=do_kpoints_prv)
695  CALL release_neighbor_list_sets(nl_2c_pot)
696 
697  IF (.NOT. ri_data%same_op) THEN
698  CALL build_2c_neighbor_lists(nl_2c_ri, basis_set_ri, basis_set_ri, ri_data%ri_metric, &
699  "HFX_2c_nl_RI", qs_env, sym_ij=.NOT. do_kpoints_prv, &
700  dist_2d=dist_2d)
701 
702  CALL build_2c_integrals(t_2c_int_ri, ri_data%filter_eps_2c, qs_env, nl_2c_ri, basis_set_ri, basis_set_ri, &
703  ri_data%ri_metric, do_kpoints=do_kpoints_prv, do_hfx_kpoints=do_kpoints_prv)
704 
705  CALL release_neighbor_list_sets(nl_2c_ri)
706  END IF
707 
708  DO ibasis = 1, SIZE(basis_set_ao)
709  orb_basis => basis_set_ao(ibasis)%gto_basis_set
710  ri_basis => basis_set_ri(ibasis)%gto_basis_set
711  ! reset interaction radii of orb basis
712  CALL init_interaction_radii_orb_basis(orb_basis, dft_control%qs_control%eps_pgf_orb)
713  CALL init_interaction_radii_orb_basis(ri_basis, dft_control%qs_control%eps_pgf_orb)
714  END DO
715 
716  IF (ri_data%calc_condnum) THEN
717  CALL arnoldi_extremal(t_2c_int_pot(1), max_ev, min_ev, threshold=ri_data%eps_lanczos, &
718  max_iter=ri_data%max_iter_lanczos, converged=converged)
719 
720  IF (.NOT. converged) THEN
721  cpwarn("Condition number estimate of (P|Q) (HFX potential) is not reliable (not converged).")
722  END IF
723 
724  IF (ri_data%unit_nr > 0) THEN
725  WRITE (ri_data%unit_nr, '(T2,A)') "2-Norm Condition Number of (P|Q) integrals (HFX potential)"
726  IF (min_ev > 0) THEN
727  WRITE (ri_data%unit_nr, '(T4,A,ES11.3E3,T32,A,ES11.3E3,A4,ES11.3E3,T63,A,F8.4)') &
728  "CN : max/min ev: ", max_ev, " / ", min_ev, "=", max_ev/min_ev, "Log(2-CN):", log10(max_ev/min_ev)
729  ELSE
730  WRITE (ri_data%unit_nr, '(T4,A,ES11.3E3,T32,A,ES11.3E3,T63,A)') &
731  "CN : max/min ev: ", max_ev, " / ", min_ev, "Log(CN): infinity"
732  END IF
733  END IF
734 
735  IF (.NOT. ri_data%same_op) THEN
736  CALL arnoldi_extremal(t_2c_int_ri(1), max_ev, min_ev, threshold=ri_data%eps_lanczos, &
737  max_iter=ri_data%max_iter_lanczos, converged=converged)
738 
739  IF (.NOT. converged) THEN
740  cpwarn("Condition number estimate of (P|Q) matrix (RI metric) is not reliable (not converged).")
741  END IF
742 
743  IF (ri_data%unit_nr > 0) THEN
744  WRITE (ri_data%unit_nr, '(T2,A)') "2-Norm Condition Number of (P|Q) integrals (RI metric)"
745  IF (min_ev > 0) THEN
746  WRITE (ri_data%unit_nr, '(T4,A,ES11.3E3,T32,A,ES11.3E3,A4,ES11.3E3,T63,A,F8.4)') &
747  "CN : max/min ev: ", max_ev, " / ", min_ev, "=", max_ev/min_ev, "Log(2-CN):", log10(max_ev/min_ev)
748  ELSE
749  WRITE (ri_data%unit_nr, '(T4,A,ES11.3E3,T32,A,ES11.3E3,T63,A)') &
750  "CN : max/min ev: ", max_ev, " / ", min_ev, "Log(CN): infinity"
751  END IF
752  END IF
753  END IF
754  END IF
755 
756  CALL timestop(handle)
757  END SUBROUTINE
758 
759 ! **************************************************************************************************
760 !> \brief Pre-SCF steps in rho flavor of RI HFX
761 !>
762 !> K(P, S) = sum_{R,Q} K_2(P, R)^{-1} K_1(R, Q) K_2(Q, S)^{-1}
763 !> Calculate B(mu, lambda, R) = sum_P int_3c(mu, lambda, P) K(P, R)
764 !> \param qs_env ...
765 !> \param ri_data ...
766 ! **************************************************************************************************
767  SUBROUTINE hfx_ri_pre_scf_pmat(qs_env, ri_data)
768  TYPE(qs_environment_type), POINTER :: qs_env
769  TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
770 
771  CHARACTER(LEN=*), PARAMETER :: routinen = 'hfx_ri_pre_scf_Pmat'
772 
773  INTEGER :: handle, handle2, i_mem, j_mem, &
774  n_dependent, unit_nr, unit_nr_dbcsr
775  INTEGER(int_8) :: nflop, nze, nze_o
776  INTEGER, ALLOCATABLE, DIMENSION(:) :: batch_ranges_ao, batch_ranges_ri
777  INTEGER, DIMENSION(2, 1) :: bounds_i
778  INTEGER, DIMENSION(2, 2) :: bounds_j
779  INTEGER, DIMENSION(3) :: dims_3c
780  REAL(kind=dp) :: compression_factor, memory_3c, occ, &
781  threshold
782  TYPE(cp_blacs_env_type), POINTER :: blacs_env
783  TYPE(dbcsr_type), DIMENSION(1) :: t_2c_int_mat, t_2c_op_pot, t_2c_op_ri, &
784  t_2c_tmp, t_2c_tmp_2
785  TYPE(dbt_type) :: t_3c_2
786  TYPE(dbt_type), ALLOCATABLE, DIMENSION(:) :: t_2c_int, t_2c_work
787  TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :) :: t_3c_int_1
788  TYPE(mp_para_env_type), POINTER :: para_env
789 
790  CALL timeset(routinen, handle)
791 
792  unit_nr_dbcsr = ri_data%unit_nr_dbcsr
793  unit_nr = ri_data%unit_nr
794 
795  CALL get_qs_env(qs_env, para_env=para_env, blacs_env=blacs_env)
796 
797  CALL timeset(routinen//"_int", handle2)
798 
799  ALLOCATE (t_2c_int(1), t_2c_work(1), t_3c_int_1(1, 1))
800  CALL hfx_ri_pre_scf_calc_tensors(qs_env, ri_data, t_2c_op_ri, t_2c_op_pot, t_3c_int_1)
801 
802  CALL dbt_copy(t_3c_int_1(1, 1), ri_data%t_3c_int_ctr_3(1, 1), order=[1, 2, 3], move_data=.true.)
803 
804  CALL dbt_destroy(t_3c_int_1(1, 1))
805 
806  CALL timestop(handle2)
807 
808  CALL timeset(routinen//"_2c", handle2)
809 
810  IF (ri_data%same_op) t_2c_op_ri(1) = t_2c_op_pot(1)
811  CALL dbcsr_create(t_2c_int_mat(1), template=t_2c_op_ri(1), matrix_type=dbcsr_type_no_symmetry)
812  threshold = max(ri_data%filter_eps, 1.0e-12_dp)
813 
814  SELECT CASE (ri_data%t2c_method)
815  CASE (hfx_ri_do_2c_iter)
816  CALL invert_hotelling(t_2c_int_mat(1), t_2c_op_ri(1), &
817  threshold=threshold, silent=.false.)
818  CASE (hfx_ri_do_2c_cholesky)
819  CALL dbcsr_copy(t_2c_int_mat(1), t_2c_op_ri(1))
820  CALL cp_dbcsr_cholesky_decompose(t_2c_int_mat(1), para_env=para_env, blacs_env=blacs_env)
821  CALL cp_dbcsr_cholesky_invert(t_2c_int_mat(1), para_env=para_env, blacs_env=blacs_env, upper_to_full=.true.)
822  CASE (hfx_ri_do_2c_diag)
823  CALL dbcsr_copy(t_2c_int_mat(1), t_2c_op_ri(1))
824  CALL cp_dbcsr_power(t_2c_int_mat(1), -1.0_dp, ri_data%eps_eigval, n_dependent, &
825  para_env, blacs_env, verbose=ri_data%unit_nr_dbcsr > 0)
826  END SELECT
827 
828  IF (ri_data%check_2c_inv) THEN
829  CALL check_inverse(t_2c_int_mat(1), t_2c_op_ri(1), unit_nr=unit_nr)
830  END IF
831 
832  !Need to save the (P|Q)^-1 tensor for forces (inverse metric if not same_op)
833  CALL dbt_create(t_2c_int_mat(1), t_2c_work(1))
834  CALL dbt_copy_matrix_to_tensor(t_2c_int_mat(1), t_2c_work(1))
835  CALL dbt_copy(t_2c_work(1), ri_data%t_2c_inv(1, 1), move_data=.true.)
836  CALL dbt_destroy(t_2c_work(1))
837  CALL dbt_filter(ri_data%t_2c_inv(1, 1), ri_data%filter_eps)
838  IF (.NOT. ri_data%same_op) THEN
839  !Also save the RI (P|Q) integral
840  CALL dbt_create(t_2c_op_pot(1), t_2c_work(1))
841  CALL dbt_copy_matrix_to_tensor(t_2c_op_pot(1), t_2c_work(1))
842  CALL dbt_copy(t_2c_work(1), ri_data%t_2c_pot(1, 1), move_data=.true.)
843  CALL dbt_destroy(t_2c_work(1))
844  CALL dbt_filter(ri_data%t_2c_pot(1, 1), ri_data%filter_eps)
845  END IF
846 
847  IF (ri_data%same_op) THEN
848  CALL dbcsr_release(t_2c_op_pot(1))
849  ELSE
850  CALL dbcsr_create(t_2c_tmp(1), template=t_2c_op_ri(1), matrix_type=dbcsr_type_no_symmetry)
851  CALL dbcsr_create(t_2c_tmp_2(1), template=t_2c_op_ri(1), matrix_type=dbcsr_type_no_symmetry)
852  CALL dbcsr_release(t_2c_op_ri(1))
853  CALL dbcsr_multiply('N', 'N', 1.0_dp, t_2c_int_mat(1), t_2c_op_pot(1), 0.0_dp, t_2c_tmp(1), &
854  filter_eps=ri_data%filter_eps)
855 
856  CALL dbcsr_release(t_2c_op_pot(1))
857  CALL dbcsr_multiply('N', 'N', 1.0_dp, t_2c_tmp(1), t_2c_int_mat(1), 0.0_dp, t_2c_tmp_2(1), &
858  filter_eps=ri_data%filter_eps)
859  CALL dbcsr_release(t_2c_tmp(1))
860  CALL dbcsr_release(t_2c_int_mat(1))
861  t_2c_int_mat(1) = t_2c_tmp_2(1)
862  END IF
863 
864  CALL dbt_create(t_2c_int_mat(1), t_2c_int(1), name="(RI|RI)")
865  CALL dbt_copy_matrix_to_tensor(t_2c_int_mat(1), t_2c_int(1))
866  CALL dbcsr_release(t_2c_int_mat(1))
867  CALL dbt_copy(t_2c_int(1), ri_data%t_2c_int(1, 1), move_data=.true.)
868  CALL dbt_destroy(t_2c_int(1))
869  CALL dbt_filter(ri_data%t_2c_int(1, 1), ri_data%filter_eps)
870 
871  CALL timestop(handle2)
872 
873  CALL dbt_create(ri_data%t_3c_int_ctr_3(1, 1), t_3c_2)
874 
875  CALL dbt_get_info(ri_data%t_3c_int_ctr_3(1, 1), nfull_total=dims_3c)
876 
877  memory_3c = 0.0_dp
878  nze_o = 0
879 
880  ALLOCATE (batch_ranges_ri(ri_data%n_mem_RI + 1))
881  ALLOCATE (batch_ranges_ao(ri_data%n_mem + 1))
882  batch_ranges_ri(:ri_data%n_mem_RI) = ri_data%starts_array_RI_mem_block(:)
883  batch_ranges_ri(ri_data%n_mem_RI + 1) = ri_data%ends_array_RI_mem_block(ri_data%n_mem_RI) + 1
884  batch_ranges_ao(:ri_data%n_mem) = ri_data%starts_array_mem_block(:)
885  batch_ranges_ao(ri_data%n_mem + 1) = ri_data%ends_array_mem_block(ri_data%n_mem) + 1
886 
887  CALL dbt_batched_contract_init(ri_data%t_3c_int_ctr_3(1, 1), batch_range_1=batch_ranges_ri, &
888  batch_range_2=batch_ranges_ao)
889  CALL dbt_batched_contract_init(t_3c_2, batch_range_1=batch_ranges_ri, &
890  batch_range_2=batch_ranges_ao)
891 
892  DO i_mem = 1, ri_data%n_mem_RI
893  bounds_i(:, 1) = [ri_data%starts_array_RI_mem(i_mem), ri_data%ends_array_RI_mem(i_mem)]
894 
895  CALL dbt_batched_contract_init(ri_data%t_2c_int(1, 1))
896  DO j_mem = 1, ri_data%n_mem
897  bounds_j(:, 1) = [ri_data%starts_array_mem(j_mem), ri_data%ends_array_mem(j_mem)]
898  bounds_j(:, 2) = [1, dims_3c(3)]
899  CALL timeset(routinen//"_RIx3C", handle2)
900  CALL dbt_contract(1.0_dp, ri_data%t_2c_int(1, 1), ri_data%t_3c_int_ctr_3(1, 1), &
901  0.0_dp, t_3c_2, &
902  contract_1=[2], notcontract_1=[1], &
903  contract_2=[1], notcontract_2=[2, 3], &
904  map_1=[1], map_2=[2, 3], filter_eps=ri_data%filter_eps_storage, &
905  bounds_2=bounds_i, &
906  bounds_3=bounds_j, &
907  unit_nr=unit_nr_dbcsr, &
908  flop=nflop)
909 
910  ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
911  CALL timestop(handle2)
912 
913  CALL timeset(routinen//"_copy_2", handle2)
914  CALL dbt_copy(t_3c_2, ri_data%t_3c_int_ctr_1(1, 1), order=[2, 1, 3], move_data=.true.)
915 
916  CALL get_tensor_occupancy(ri_data%t_3c_int_ctr_1(1, 1), nze, occ)
917  nze_o = nze_o + nze
918 
919  CALL compress_tensor(ri_data%t_3c_int_ctr_1(1, 1), ri_data%blk_indices(j_mem, i_mem)%ind, &
920  ri_data%store_3c(j_mem, i_mem), ri_data%filter_eps_storage, memory_3c)
921 
922  CALL timestop(handle2)
923  END DO
924  CALL dbt_batched_contract_finalize(ri_data%t_2c_int(1, 1))
925  END DO
926  CALL dbt_batched_contract_finalize(t_3c_2)
927  CALL dbt_batched_contract_finalize(ri_data%t_3c_int_ctr_3(1, 1))
928 
929  CALL para_env%sum(memory_3c)
930  compression_factor = real(nze_o, dp)*1.0e-06*8.0_dp/memory_3c
931 
932  IF (unit_nr > 0) THEN
933  WRITE (unit=unit_nr, fmt="((T3,A,T66,F11.2,A4))") &
934  "MEMORY_INFO| Memory for 3-center HFX integrals (compressed):", memory_3c, ' MiB'
935 
936  WRITE (unit=unit_nr, fmt="((T3,A,T60,F21.2))") &
937  "MEMORY_INFO| Compression factor: ", compression_factor
938  END IF
939 
940  CALL dbt_clear(ri_data%t_2c_int(1, 1))
941  CALL dbt_destroy(t_3c_2)
942 
943  CALL dbt_copy(ri_data%t_3c_int_ctr_3(1, 1), ri_data%t_3c_int_ctr_2(1, 1), order=[2, 1, 3], move_data=.true.)
944 
945  CALL timestop(handle)
946  END SUBROUTINE
947 
948 ! **************************************************************************************************
949 !> \brief Sorts 2d indices w.r.t. rows and columns
950 !> \param blk_ind ...
951 ! **************************************************************************************************
952  SUBROUTINE sort_unique_blkind_2d(blk_ind)
953  INTEGER, ALLOCATABLE, DIMENSION(:, :), &
954  INTENT(INOUT) :: blk_ind
955 
956  INTEGER :: end_ind, iblk, iblk_all, irow, nblk, &
957  ncols, start_ind
958  INTEGER, ALLOCATABLE, DIMENSION(:) :: ind_1, ind_2, sort_1, sort_2
959  INTEGER, ALLOCATABLE, DIMENSION(:, :) :: blk_ind_tmp
960 
961  nblk = SIZE(blk_ind, 1)
962 
963  ALLOCATE (sort_1(nblk))
964  ALLOCATE (ind_1(nblk))
965 
966  sort_1(:) = blk_ind(:, 1)
967  CALL sort(sort_1, nblk, ind_1)
968 
969  blk_ind(:, :) = blk_ind(ind_1, :)
970 
971  start_ind = 1
972 
973  DO WHILE (start_ind <= nblk)
974  irow = blk_ind(start_ind, 1)
975  end_ind = start_ind
976 
977  IF (end_ind + 1 <= nblk) THEN
978  DO WHILE (blk_ind(end_ind + 1, 1) == irow)
979  end_ind = end_ind + 1
980  IF (end_ind + 1 > nblk) EXIT
981  END DO
982  END IF
983 
984  ncols = end_ind - start_ind + 1
985  ALLOCATE (sort_2(ncols))
986  ALLOCATE (ind_2(ncols))
987  sort_2(:) = blk_ind(start_ind:end_ind, 2)
988  CALL sort(sort_2, ncols, ind_2)
989  ind_2 = ind_2 + start_ind - 1
990 
991  blk_ind(start_ind:end_ind, :) = blk_ind(ind_2, :)
992  start_ind = end_ind + 1
993 
994  DEALLOCATE (sort_2, ind_2)
995  END DO
996 
997  ALLOCATE (blk_ind_tmp(nblk, 2))
998  blk_ind_tmp = 0
999 
1000  iblk = 0
1001  DO iblk_all = 1, nblk
1002  IF (iblk >= 1) THEN
1003  IF (all(blk_ind_tmp(iblk, :) == blk_ind(iblk_all, :))) THEN
1004  cycle
1005  END IF
1006  END IF
1007  iblk = iblk + 1
1008  blk_ind_tmp(iblk, :) = blk_ind(iblk_all, :)
1009  END DO
1010  nblk = iblk
1011 
1012  DEALLOCATE (blk_ind)
1013  ALLOCATE (blk_ind(nblk, 2))
1014 
1015  blk_ind(:, :) = blk_ind_tmp(:nblk, :)
1016 
1017  END SUBROUTINE
1018 
1019 ! **************************************************************************************************
1020 !> \brief ...
1021 !> \param qs_env ...
1022 !> \param ri_data ...
1023 !> \param ks_matrix ...
1024 !> \param ehfx ...
1025 !> \param mos ...
1026 !> \param rho_ao ...
1027 !> \param geometry_did_change ...
1028 !> \param nspins ...
1029 !> \param hf_fraction ...
1030 ! **************************************************************************************************
1031  SUBROUTINE hfx_ri_update_ks(qs_env, ri_data, ks_matrix, ehfx, mos, rho_ao, &
1032  geometry_did_change, nspins, hf_fraction)
1033  TYPE(qs_environment_type), POINTER :: qs_env
1034  TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
1035  TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: ks_matrix
1036  REAL(kind=dp), INTENT(OUT) :: ehfx
1037  TYPE(mo_set_type), DIMENSION(:), INTENT(IN), &
1038  OPTIONAL :: mos
1039  TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: rho_ao
1040  LOGICAL, INTENT(IN) :: geometry_did_change
1041  INTEGER, INTENT(IN) :: nspins
1042  REAL(kind=dp), INTENT(IN) :: hf_fraction
1043 
1044  CHARACTER(LEN=*), PARAMETER :: routinen = 'hfx_ri_update_ks'
1045 
1046  CHARACTER(1) :: mtype
1047  INTEGER :: handle, handle2, i, ispin, j
1048  INTEGER(int_8) :: nblks
1049  INTEGER, DIMENSION(2) :: homo
1050  LOGICAL :: is_antisymmetric
1051  REAL(dp) :: etmp, fac
1052  REAL(kind=dp), DIMENSION(:), POINTER :: mo_eigenvalues
1053  TYPE(cp_fm_type), POINTER :: mo_coeff
1054  TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: my_ks_matrix, my_rho_ao
1055  TYPE(dbcsr_type), DIMENSION(2) :: mo_coeff_b
1056  TYPE(dbcsr_type), POINTER :: mo_coeff_b_tmp
1057  TYPE(mp_para_env_type), POINTER :: para_env
1058 
1059  CALL timeset(routinen, handle)
1060 
1061  IF (nspins == 1) THEN
1062  fac = 0.5_dp*hf_fraction
1063  ELSE
1064  fac = 1.0_dp*hf_fraction
1065  END IF
1066 
1067  !If incoming assymetric matrices, need to convert to normal
1068  NULLIFY (my_ks_matrix, my_rho_ao)
1069  CALL dbcsr_get_info(ks_matrix(1, 1)%matrix, matrix_type=mtype)
1070  is_antisymmetric = mtype == dbcsr_type_antisymmetric
1071  IF (is_antisymmetric) THEN
1072  ALLOCATE (my_ks_matrix(SIZE(ks_matrix, 1), SIZE(ks_matrix, 2)))
1073  ALLOCATE (my_rho_ao(SIZE(rho_ao, 1), SIZE(rho_ao, 2)))
1074 
1075  DO i = 1, SIZE(ks_matrix, 1)
1076  DO j = 1, SIZE(ks_matrix, 2)
1077  ALLOCATE (my_ks_matrix(i, j)%matrix, my_rho_ao(i, j)%matrix)
1078  CALL dbcsr_create(my_ks_matrix(i, j)%matrix, template=ks_matrix(i, j)%matrix, &
1079  matrix_type=dbcsr_type_no_symmetry)
1080  CALL dbcsr_desymmetrize(ks_matrix(i, j)%matrix, my_ks_matrix(i, j)%matrix)
1081  CALL dbcsr_create(my_rho_ao(i, j)%matrix, template=rho_ao(i, j)%matrix, &
1082  matrix_type=dbcsr_type_no_symmetry)
1083  CALL dbcsr_desymmetrize(rho_ao(i, j)%matrix, my_rho_ao(i, j)%matrix)
1084  END DO
1085  END DO
1086  ELSE
1087  my_ks_matrix => ks_matrix
1088  my_rho_ao => rho_ao
1089  END IF
1090 
1091  !Case analysis on RI_FLAVOR: we switch if the input flavor is MO, there is no provided MO, and
1092  ! the current flavor is not yet RHO. We switch back to MO if there are
1093  ! MOs available and the current flavor is actually RHO
1094  IF (ri_data%input_flavor == ri_mo .AND. (.NOT. PRESENT(mos)) .AND. ri_data%flavor == ri_mo) THEN
1095  CALL switch_ri_flavor(ri_data, qs_env)
1096  ELSE IF (ri_data%input_flavor == ri_mo .AND. PRESENT(mos) .AND. ri_data%flavor == ri_pmat) THEN
1097  CALL switch_ri_flavor(ri_data, qs_env)
1098  END IF
1099 
1100  SELECT CASE (ri_data%flavor)
1101  CASE (ri_mo)
1102  cpassert(PRESENT(mos))
1103  CALL timeset(routinen//"_MO", handle2)
1104 
1105  DO ispin = 1, nspins
1106  NULLIFY (mo_coeff_b_tmp)
1107  cpassert(mos(ispin)%uniform_occupation)
1108  CALL get_mo_set(mo_set=mos(ispin), mo_coeff=mo_coeff, eigenvalues=mo_eigenvalues, mo_coeff_b=mo_coeff_b_tmp)
1109 
1110  IF (.NOT. mos(ispin)%use_mo_coeff_b) CALL copy_fm_to_dbcsr(mo_coeff, mo_coeff_b_tmp)
1111  CALL dbcsr_copy(mo_coeff_b(ispin), mo_coeff_b_tmp)
1112  END DO
1113 
1114  DO ispin = 1, nspins
1115  CALL dbcsr_scale(mo_coeff_b(ispin), sqrt(mos(ispin)%maxocc))
1116  homo(ispin) = mos(ispin)%homo
1117  END DO
1118 
1119  CALL timestop(handle2)
1120 
1121  CALL hfx_ri_update_ks_mo(qs_env, ri_data, my_ks_matrix, mo_coeff_b, homo, &
1122  geometry_did_change, nspins, fac)
1123  CASE (ri_pmat)
1124 
1125  NULLIFY (para_env)
1126  CALL get_qs_env(qs_env, para_env=para_env)
1127  DO ispin = 1, SIZE(my_rho_ao, 1)
1128  nblks = dbcsr_get_num_blocks(my_rho_ao(ispin, 1)%matrix)
1129  CALL para_env%sum(nblks)
1130  IF (nblks == 0) THEN
1131  cpabort("received empty density matrix")
1132  END IF
1133  END DO
1134 
1135  CALL hfx_ri_update_ks_pmat(qs_env, ri_data, my_ks_matrix, my_rho_ao, &
1136  geometry_did_change, nspins, fac)
1137 
1138  END SELECT
1139 
1140  DO ispin = 1, nspins
1141  CALL dbcsr_release(mo_coeff_b(ispin))
1142  END DO
1143 
1144  DO ispin = 1, nspins
1145  CALL dbcsr_filter(my_ks_matrix(ispin, 1)%matrix, ri_data%filter_eps)
1146  END DO
1147 
1148  CALL timeset(routinen//"_energy", handle2)
1149  ! Calculate the exchange energy
1150  ehfx = 0.0_dp
1151  DO ispin = 1, nspins
1152  CALL dbcsr_dot(my_ks_matrix(ispin, 1)%matrix, my_rho_ao(ispin, 1)%matrix, &
1153  etmp)
1154  ehfx = ehfx + 0.5_dp*etmp
1155 
1156  END DO
1157  CALL timestop(handle2)
1158 
1159  !Anti-symmetric case
1160  IF (is_antisymmetric) THEN
1161  DO i = 1, SIZE(ks_matrix, 1)
1162  DO j = 1, SIZE(ks_matrix, 2)
1163  CALL dbcsr_complete_redistribute(my_ks_matrix(i, j)%matrix, ks_matrix(i, j)%matrix)
1164  CALL dbcsr_complete_redistribute(my_rho_ao(i, j)%matrix, rho_ao(i, j)%matrix)
1165  END DO
1166  END DO
1167  CALL dbcsr_deallocate_matrix_set(my_ks_matrix)
1168  CALL dbcsr_deallocate_matrix_set(my_rho_ao)
1169  END IF
1170 
1171  CALL timestop(handle)
1172  END SUBROUTINE
1173 
1174 ! **************************************************************************************************
1175 !> \brief Calculate Fock (AKA Kohn-Sham) matrix in MO flavor
1176 !>
1177 !> C(mu, i) (MO coefficients)
1178 !> M(mu, i, R) = sum_nu B(mu, nu, R) C(nu, i)
1179 !> KS(mu, lambda) = sum_{i,R} M(mu, i, R) M(lambda, i, R)
1180 !> \param qs_env ...
1181 !> \param ri_data ...
1182 !> \param ks_matrix ...
1183 !> \param mo_coeff C(mu, i)
1184 !> \param homo ...
1185 !> \param geometry_did_change ...
1186 !> \param nspins ...
1187 !> \param fac ...
1188 ! **************************************************************************************************
1189  SUBROUTINE hfx_ri_update_ks_mo(qs_env, ri_data, ks_matrix, mo_coeff, &
1190  homo, geometry_did_change, nspins, fac)
1191  TYPE(qs_environment_type), POINTER :: qs_env
1192  TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
1193  TYPE(dbcsr_p_type), DIMENSION(:, :) :: ks_matrix
1194  TYPE(dbcsr_type), DIMENSION(:), INTENT(IN) :: mo_coeff
1195  INTEGER, DIMENSION(:) :: homo
1196  LOGICAL, INTENT(IN) :: geometry_did_change
1197  INTEGER, INTENT(IN) :: nspins
1198  REAL(dp), INTENT(IN) :: fac
1199 
1200  CHARACTER(LEN=*), PARAMETER :: routinen = 'hfx_ri_update_ks_mo'
1201 
1202  INTEGER :: bsize, bsum, comm_2d_handle, handle, &
1203  handle2, i_mem, iblock, iproc, ispin, &
1204  n_mem, n_mos, nblock, unit_nr_dbcsr
1205  INTEGER(int_8) :: nblks, nflop
1206  INTEGER, ALLOCATABLE, DIMENSION(:) :: batch_ranges_1, batch_ranges_2, dist1, dist2, dist3, &
1207  mem_end, mem_end_block_1, mem_end_block_2, mem_size, mem_start, mem_start_block_1, &
1208  mem_start_block_2, mo_bsizes_1, mo_bsizes_2
1209  INTEGER, ALLOCATABLE, DIMENSION(:, :) :: bounds
1210  INTEGER, DIMENSION(2) :: pdims_2d
1211  INTEGER, DIMENSION(3) :: pdims
1212  LOGICAL :: do_initialize
1213  REAL(dp) :: t1, t2
1214  TYPE(dbcsr_distribution_type) :: ks_dist
1215  TYPE(dbt_pgrid_type) :: pgrid, pgrid_2d
1216  TYPE(dbt_type) :: ks_t, ks_t_mat, mo_coeff_t, &
1217  mo_coeff_t_split
1218  TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :) :: t_3c_int_mo_1, t_3c_int_mo_2
1219  TYPE(mp_comm_type) :: comm_2d
1220  TYPE(mp_para_env_type), POINTER :: para_env
1221 
1222  CALL timeset(routinen, handle)
1223 
1224  cpassert(SIZE(ks_matrix, 2) == 1)
1225 
1226  unit_nr_dbcsr = ri_data%unit_nr_dbcsr
1227 
1228  IF (geometry_did_change) THEN
1229  CALL hfx_ri_pre_scf_mo(qs_env, ri_data, nspins)
1230  END IF
1231 
1232  nblks = dbt_get_num_blocks_total(ri_data%t_3c_int_ctr_1(1, 1))
1233  IF (nblks == 0) THEN
1234  cpabort("3-center integrals are not available (first call requires geometry_did_change=.TRUE.)")
1235  END IF
1236 
1237  DO ispin = 1, nspins
1238  nblks = dbt_get_num_blocks_total(ri_data%t_2c_int(ispin, 1))
1239  IF (nblks == 0) THEN
1240  cpabort("2-center integrals are not available (first call requires geometry_did_change=.TRUE.)")
1241  END IF
1242  END DO
1243 
1244  IF (.NOT. ALLOCATED(ri_data%t_3c_int_mo)) THEN
1245  do_initialize = .true.
1246  cpassert(.NOT. ALLOCATED(ri_data%t_3c_ctr_RI))
1247  cpassert(.NOT. ALLOCATED(ri_data%t_3c_ctr_KS))
1248  cpassert(.NOT. ALLOCATED(ri_data%t_3c_ctr_KS_copy))
1249  ALLOCATE (ri_data%t_3c_int_mo(nspins, 1, 1))
1250  ALLOCATE (ri_data%t_3c_ctr_RI(nspins, 1, 1))
1251  ALLOCATE (ri_data%t_3c_ctr_KS(nspins, 1, 1))
1252  ALLOCATE (ri_data%t_3c_ctr_KS_copy(nspins, 1, 1))
1253  ELSE
1254  do_initialize = .false.
1255  END IF
1256 
1257  CALL get_qs_env(qs_env, para_env=para_env)
1258 
1259  ALLOCATE (bounds(2, 1))
1260 
1261  CALL dbcsr_get_info(ks_matrix(1, 1)%matrix, distribution=ks_dist)
1262  CALL dbcsr_distribution_get(ks_dist, group=comm_2d_handle, nprows=pdims_2d(1), npcols=pdims_2d(2))
1263 
1264  CALL comm_2d%set_handle(comm_2d_handle)
1265  pgrid_2d = dbt_nd_mp_comm(comm_2d, [1], [2], pdims_2d=pdims_2d)
1266 
1267  CALL create_2c_tensor(ks_t, dist1, dist2, pgrid_2d, ri_data%bsizes_AO_fit, ri_data%bsizes_AO_fit, &
1268  name="(AO | AO)")
1269 
1270  DEALLOCATE (dist1, dist2)
1271 
1272  CALL para_env%sync()
1273  t1 = m_walltime()
1274 
1275  ALLOCATE (t_3c_int_mo_1(1, 1), t_3c_int_mo_2(1, 1))
1276  DO ispin = 1, nspins
1277 
1278  CALL dbcsr_get_info(mo_coeff(ispin), nfullcols_total=n_mos)
1279  ALLOCATE (mo_bsizes_2(n_mos))
1280  mo_bsizes_2 = 1
1281 
1282  CALL create_tensor_batches(mo_bsizes_2, ri_data%n_mem, mem_start, mem_end, &
1283  mem_start_block_2, mem_end_block_2)
1284  n_mem = ri_data%n_mem
1285  ALLOCATE (mem_size(n_mem))
1286 
1287  DO i_mem = 1, n_mem
1288  bsize = sum(mo_bsizes_2(mem_start_block_2(i_mem):mem_end_block_2(i_mem)))
1289  mem_size(i_mem) = bsize
1290  END DO
1291 
1292  CALL split_block_sizes(mem_size, mo_bsizes_1, ri_data%max_bsize_MO)
1293  ALLOCATE (mem_start_block_1(n_mem))
1294  ALLOCATE (mem_end_block_1(n_mem))
1295  nblock = SIZE(mo_bsizes_1)
1296  iblock = 0
1297  DO i_mem = 1, n_mem
1298  bsum = 0
1299  DO
1300  iblock = iblock + 1
1301  cpassert(iblock <= nblock)
1302  bsum = bsum + mo_bsizes_1(iblock)
1303  IF (bsum == mem_size(i_mem)) THEN
1304  IF (i_mem == 1) THEN
1305  mem_start_block_1(i_mem) = 1
1306  ELSE
1307  mem_start_block_1(i_mem) = mem_end_block_1(i_mem - 1) + 1
1308  END IF
1309  mem_end_block_1(i_mem) = iblock
1310  EXIT
1311  END IF
1312  END DO
1313  END DO
1314 
1315  ALLOCATE (batch_ranges_1(ri_data%n_mem + 1))
1316  batch_ranges_1(:ri_data%n_mem) = mem_start_block_1(:)
1317  batch_ranges_1(ri_data%n_mem + 1) = mem_end_block_1(ri_data%n_mem) + 1
1318 
1319  ALLOCATE (batch_ranges_2(ri_data%n_mem + 1))
1320  batch_ranges_2(:ri_data%n_mem) = mem_start_block_2(:)
1321  batch_ranges_2(ri_data%n_mem + 1) = mem_end_block_2(ri_data%n_mem) + 1
1322 
1323  iproc = para_env%mepos
1324 
1325  CALL create_3c_tensor(t_3c_int_mo_1(1, 1), dist1, dist2, dist3, ri_data%pgrid_1, &
1326  ri_data%bsizes_AO_split, ri_data%bsizes_RI_split, mo_bsizes_1, &
1327  [1, 2], [3], &
1328  name="(AO RI | MO)")
1329 
1330  DEALLOCATE (dist1, dist2, dist3)
1331 
1332  CALL create_3c_tensor(t_3c_int_mo_2(1, 1), dist1, dist2, dist3, ri_data%pgrid_2, &
1333  mo_bsizes_1, ri_data%bsizes_RI_split, ri_data%bsizes_AO_split, &
1334  [1], [2, 3], &
1335  name="(MO | RI AO)")
1336 
1337  DEALLOCATE (dist1, dist2, dist3)
1338 
1339  CALL create_2c_tensor(mo_coeff_t_split, dist1, dist2, pgrid_2d, ri_data%bsizes_AO_split, mo_bsizes_1, &
1340  name="(AO | MO)")
1341 
1342  DEALLOCATE (dist1, dist2)
1343 
1344  cpassert(homo(ispin)/ri_data%n_mem > 0)
1345 
1346  IF (do_initialize) THEN
1347  pdims(:) = 0
1348 
1349  CALL dbt_pgrid_create(para_env, pdims, pgrid, &
1350  tensor_dims=[SIZE(ri_data%bsizes_RI_fit), &
1351  (homo(ispin) - 1)/ri_data%n_mem + 1, &
1352  SIZE(ri_data%bsizes_AO_fit)])
1353  CALL create_3c_tensor(ri_data%t_3c_int_mo(ispin, 1, 1), dist1, dist2, dist3, pgrid, &
1354  ri_data%bsizes_RI_fit, mo_bsizes_2, ri_data%bsizes_AO_fit, &
1355  [1], [2, 3], &
1356  name="(RI | MO AO)")
1357 
1358  DEALLOCATE (dist1, dist2, dist3)
1359 
1360  CALL create_3c_tensor(ri_data%t_3c_ctr_KS(ispin, 1, 1), dist1, dist2, dist3, pgrid, &
1361  ri_data%bsizes_RI_fit, mo_bsizes_2, ri_data%bsizes_AO_fit, &
1362  [1, 2], [3], &
1363  name="(RI MO | AO)")
1364  DEALLOCATE (dist1, dist2, dist3)
1365  CALL dbt_pgrid_destroy(pgrid)
1366 
1367  CALL dbt_create(ri_data%t_3c_int_mo(ispin, 1, 1), ri_data%t_3c_ctr_RI(ispin, 1, 1), name="(RI | MO AO)")
1368  CALL dbt_create(ri_data%t_3c_ctr_KS(ispin, 1, 1), ri_data%t_3c_ctr_KS_copy(ispin, 1, 1))
1369  END IF
1370 
1371  CALL dbt_create(mo_coeff(ispin), mo_coeff_t, name="MO coeffs")
1372  CALL dbt_copy_matrix_to_tensor(mo_coeff(ispin), mo_coeff_t)
1373  CALL dbt_copy(mo_coeff_t, mo_coeff_t_split, move_data=.true.)
1374  CALL dbt_filter(mo_coeff_t_split, ri_data%filter_eps_mo)
1375  CALL dbt_destroy(mo_coeff_t)
1376 
1377  CALL dbt_batched_contract_init(ks_t)
1378  CALL dbt_batched_contract_init(ri_data%t_3c_ctr_KS(ispin, 1, 1), batch_range_2=batch_ranges_2)
1379  CALL dbt_batched_contract_init(ri_data%t_3c_ctr_KS_copy(ispin, 1, 1), batch_range_2=batch_ranges_2)
1380 
1381  CALL dbt_batched_contract_init(ri_data%t_2c_int(ispin, 1))
1382  CALL dbt_batched_contract_init(ri_data%t_3c_int_mo(ispin, 1, 1), batch_range_2=batch_ranges_2)
1383  CALL dbt_batched_contract_init(ri_data%t_3c_ctr_RI(ispin, 1, 1), batch_range_2=batch_ranges_2)
1384 
1385  DO i_mem = 1, n_mem
1386 
1387  bounds(:, 1) = [mem_start(i_mem), mem_end(i_mem)]
1388 
1389  CALL dbt_batched_contract_init(mo_coeff_t_split)
1390  CALL dbt_batched_contract_init(ri_data%t_3c_int_ctr_1(1, 1))
1391  CALL dbt_batched_contract_init(t_3c_int_mo_1(1, 1), &
1392  batch_range_3=batch_ranges_1)
1393  CALL timeset(routinen//"_MOx3C_R", handle2)
1394  CALL dbt_contract(1.0_dp, mo_coeff_t_split, ri_data%t_3c_int_ctr_1(1, 1), &
1395  0.0_dp, t_3c_int_mo_1(1, 1), &
1396  contract_1=[1], notcontract_1=[2], &
1397  contract_2=[3], notcontract_2=[1, 2], &
1398  map_1=[3], map_2=[1, 2], &
1399  bounds_2=bounds, &
1400  filter_eps=ri_data%filter_eps_mo/2, &
1401  unit_nr=unit_nr_dbcsr, &
1402  move_data=.false., &
1403  flop=nflop)
1404 
1405  ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1406 
1407  CALL timestop(handle2)
1408  CALL dbt_batched_contract_finalize(mo_coeff_t_split)
1409  CALL dbt_batched_contract_finalize(ri_data%t_3c_int_ctr_1(1, 1))
1410  CALL dbt_batched_contract_finalize(t_3c_int_mo_1(1, 1))
1411 
1412  CALL timeset(routinen//"_copy_1", handle2)
1413  CALL dbt_copy(t_3c_int_mo_1(1, 1), ri_data%t_3c_int_mo(ispin, 1, 1), order=[3, 1, 2], move_data=.true.)
1414  CALL timestop(handle2)
1415 
1416  CALL dbt_batched_contract_init(mo_coeff_t_split)
1417  CALL dbt_batched_contract_init(ri_data%t_3c_int_ctr_2(1, 1))
1418  CALL dbt_batched_contract_init(t_3c_int_mo_2(1, 1), &
1419  batch_range_1=batch_ranges_1)
1420 
1421  CALL timeset(routinen//"_MOx3C_L", handle2)
1422  CALL dbt_contract(1.0_dp, mo_coeff_t_split, ri_data%t_3c_int_ctr_2(1, 1), &
1423  0.0_dp, t_3c_int_mo_2(1, 1), &
1424  contract_1=[1], notcontract_1=[2], &
1425  contract_2=[1], notcontract_2=[2, 3], &
1426  map_1=[1], map_2=[2, 3], &
1427  bounds_2=bounds, &
1428  filter_eps=ri_data%filter_eps_mo/2, &
1429  unit_nr=unit_nr_dbcsr, &
1430  move_data=.false., &
1431  flop=nflop)
1432 
1433  ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1434 
1435  CALL timestop(handle2)
1436 
1437  CALL dbt_batched_contract_finalize(mo_coeff_t_split)
1438  CALL dbt_batched_contract_finalize(ri_data%t_3c_int_ctr_2(1, 1))
1439  CALL dbt_batched_contract_finalize(t_3c_int_mo_2(1, 1))
1440 
1441  CALL timeset(routinen//"_copy_1", handle2)
1442  CALL dbt_copy(t_3c_int_mo_2(1, 1), ri_data%t_3c_int_mo(ispin, 1, 1), order=[2, 1, 3], &
1443  summation=.true., move_data=.true.)
1444 
1445  CALL dbt_filter(ri_data%t_3c_int_mo(ispin, 1, 1), ri_data%filter_eps_mo)
1446  CALL timestop(handle2)
1447 
1448  CALL timeset(routinen//"_RIx3C", handle2)
1449 
1450  CALL dbt_contract(1.0_dp, ri_data%t_2c_int(ispin, 1), ri_data%t_3c_int_mo(ispin, 1, 1), &
1451  0.0_dp, ri_data%t_3c_ctr_RI(ispin, 1, 1), &
1452  contract_1=[1], notcontract_1=[2], &
1453  contract_2=[1], notcontract_2=[2, 3], &
1454  map_1=[1], map_2=[2, 3], filter_eps=ri_data%filter_eps, &
1455  unit_nr=unit_nr_dbcsr, &
1456  flop=nflop)
1457 
1458  ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1459 
1460  CALL timestop(handle2)
1461 
1462  CALL timeset(routinen//"_copy_2", handle2)
1463 
1464  ! note: this copy should not involve communication (same block sizes, same 3d distribution on same process grid)
1465  CALL dbt_copy(ri_data%t_3c_ctr_RI(ispin, 1, 1), ri_data%t_3c_ctr_KS(ispin, 1, 1), move_data=.true.)
1466  CALL dbt_copy(ri_data%t_3c_ctr_KS(ispin, 1, 1), ri_data%t_3c_ctr_KS_copy(ispin, 1, 1))
1467  CALL timestop(handle2)
1468 
1469  CALL timeset(routinen//"_3Cx3C", handle2)
1470  CALL dbt_contract(-fac, ri_data%t_3c_ctr_KS(ispin, 1, 1), ri_data%t_3c_ctr_KS_copy(ispin, 1, 1), &
1471  1.0_dp, ks_t, &
1472  contract_1=[1, 2], notcontract_1=[3], &
1473  contract_2=[1, 2], notcontract_2=[3], &
1474  map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps/n_mem, &
1475  unit_nr=unit_nr_dbcsr, move_data=.true., &
1476  flop=nflop)
1477 
1478  ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1479 
1480  CALL timestop(handle2)
1481  END DO
1482 
1483  CALL dbt_batched_contract_finalize(ks_t)
1484  CALL dbt_batched_contract_finalize(ri_data%t_3c_ctr_KS(ispin, 1, 1))
1485  CALL dbt_batched_contract_finalize(ri_data%t_3c_ctr_KS_copy(ispin, 1, 1))
1486 
1487  CALL dbt_batched_contract_finalize(ri_data%t_2c_int(ispin, 1))
1488  CALL dbt_batched_contract_finalize(ri_data%t_3c_int_mo(ispin, 1, 1))
1489  CALL dbt_batched_contract_finalize(ri_data%t_3c_ctr_RI(ispin, 1, 1))
1490 
1491  CALL dbt_destroy(t_3c_int_mo_1(1, 1))
1492  CALL dbt_destroy(t_3c_int_mo_2(1, 1))
1493  CALL dbt_clear(ri_data%t_3c_int_mo(ispin, 1, 1))
1494 
1495  CALL dbt_destroy(mo_coeff_t_split)
1496 
1497  CALL dbt_filter(ks_t, ri_data%filter_eps)
1498 
1499  CALL dbt_create(ks_matrix(ispin, 1)%matrix, ks_t_mat)
1500  CALL dbt_copy(ks_t, ks_t_mat, move_data=.true.)
1501  CALL dbt_copy_tensor_to_matrix(ks_t_mat, ks_matrix(ispin, 1)%matrix, summation=.true.)
1502  CALL dbt_destroy(ks_t_mat)
1503 
1504  DEALLOCATE (mem_end, mem_start, mo_bsizes_2, mem_size, mem_start_block_1, mem_end_block_1, &
1505  mem_start_block_2, mem_end_block_2, batch_ranges_1, batch_ranges_2)
1506 
1507  END DO
1508 
1509  CALL dbt_pgrid_destroy(pgrid_2d)
1510  CALL dbt_destroy(ks_t)
1511 
1512  CALL para_env%sync()
1513  t2 = m_walltime()
1514 
1515  ri_data%dbcsr_time = ri_data%dbcsr_time + t2 - t1
1516 
1517  CALL timestop(handle)
1518 
1519  END SUBROUTINE
1520 
1521 ! **************************************************************************************************
1522 !> \brief Calculate Fock (AKA Kohn-Sham) matrix in rho flavor
1523 !>
1524 !> M(mu, lambda, R) = sum_{nu} int_3c(mu, nu, R) P(nu, lambda)
1525 !> KS(mu, lambda) = sum_{nu,R} B(mu, nu, R) M(lambda, nu, R)
1526 !> \param qs_env ...
1527 !> \param ri_data ...
1528 !> \param ks_matrix ...
1529 !> \param rho_ao ...
1530 !> \param geometry_did_change ...
1531 !> \param nspins ...
1532 !> \param fac ...
1533 ! **************************************************************************************************
1534  SUBROUTINE hfx_ri_update_ks_pmat(qs_env, ri_data, ks_matrix, rho_ao, &
1535  geometry_did_change, nspins, fac)
1536  TYPE(qs_environment_type), POINTER :: qs_env
1537  TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
1538  TYPE(dbcsr_p_type), DIMENSION(:, :) :: ks_matrix, rho_ao
1539  LOGICAL, INTENT(IN) :: geometry_did_change
1540  INTEGER, INTENT(IN) :: nspins
1541  REAL(dp), INTENT(IN) :: fac
1542 
1543  CHARACTER(LEN=*), PARAMETER :: routinen = 'hfx_ri_update_ks_Pmat'
1544 
1545  INTEGER :: handle, handle2, i_mem, ispin, j_mem, &
1546  n_mem, n_mem_ri, unit_nr, unit_nr_dbcsr
1547  INTEGER(int_8) :: flops_ks_max, flops_p_max, nblks, nflop, &
1548  nze, nze_3c, nze_3c_1, nze_3c_2, &
1549  nze_ks, nze_rho
1550  INTEGER, ALLOCATABLE, DIMENSION(:) :: batch_ranges_ao, batch_ranges_ri, dist1, &
1551  dist2
1552  INTEGER, DIMENSION(2, 1) :: bounds_i
1553  INTEGER, DIMENSION(2, 2) :: bounds_ij, bounds_j
1554  INTEGER, DIMENSION(3) :: dims_3c
1555  REAL(dp) :: memory_3c, occ, occ_3c, occ_3c_1, &
1556  occ_3c_2, occ_ks, occ_rho, t1, t2, &
1557  unused
1558  TYPE(dbt_type) :: ks_t, ks_tmp, rho_ao_tmp, t_3c_1, &
1559  t_3c_3, tensor_old
1560  TYPE(mp_para_env_type), POINTER :: para_env
1561 
1562  IF (.NOT. fac > epsilon(0.0_dp)) RETURN
1563 
1564  CALL timeset(routinen, handle)
1565 
1566  NULLIFY (para_env)
1567 
1568  ! get a useful output_unit
1569  unit_nr_dbcsr = ri_data%unit_nr_dbcsr
1570  unit_nr = ri_data%unit_nr
1571 
1572  CALL get_qs_env(qs_env, para_env=para_env)
1573 
1574  cpassert(SIZE(ks_matrix, 2) == 1)
1575 
1576  IF (geometry_did_change) THEN
1577  CALL hfx_ri_pre_scf_pmat(qs_env, ri_data)
1578  DO ispin = 1, nspins
1579  CALL dbt_scale(ri_data%rho_ao_t(ispin, 1), 0.0_dp)
1580  CALL dbt_scale(ri_data%ks_t(ispin, 1), 0.0_dp)
1581  END DO
1582  END IF
1583 
1584  nblks = dbt_get_num_blocks_total(ri_data%t_3c_int_ctr_2(1, 1))
1585  IF (nblks == 0) THEN
1586  cpabort("3-center integrals are not available (first call requires geometry_did_change=.TRUE.)")
1587  END IF
1588 
1589  n_mem = ri_data%n_mem
1590  n_mem_ri = ri_data%n_mem_RI
1591 
1592  CALL dbt_create(ks_matrix(1, 1)%matrix, ks_tmp)
1593  CALL dbt_create(rho_ao(1, 1)%matrix, rho_ao_tmp)
1594 
1595  CALL create_2c_tensor(ks_t, dist1, dist2, ri_data%pgrid_2d, &
1596  ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
1597  name="(AO | AO)")
1598  DEALLOCATE (dist1, dist2)
1599 
1600  CALL dbt_create(ri_data%t_3c_int_ctr_2(1, 1), t_3c_1)
1601  CALL dbt_create(ri_data%t_3c_int_ctr_1(1, 1), t_3c_3)
1602 
1603  CALL para_env%sync()
1604  t1 = m_walltime()
1605 
1606  flops_ks_max = 0; flops_p_max = 0
1607 
1608  ALLOCATE (batch_ranges_ri(ri_data%n_mem_RI + 1))
1609  ALLOCATE (batch_ranges_ao(ri_data%n_mem + 1))
1610  batch_ranges_ri(:ri_data%n_mem_RI) = ri_data%starts_array_RI_mem_block(:)
1611  batch_ranges_ri(ri_data%n_mem_RI + 1) = ri_data%ends_array_RI_mem_block(ri_data%n_mem_RI) + 1
1612  batch_ranges_ao(:ri_data%n_mem) = ri_data%starts_array_mem_block(:)
1613  batch_ranges_ao(ri_data%n_mem + 1) = ri_data%ends_array_mem_block(ri_data%n_mem) + 1
1614 
1615  memory_3c = 0.0_dp
1616  DO ispin = 1, nspins
1617 
1618  CALL get_tensor_occupancy(ri_data%t_3c_int_ctr_2(1, 1), nze_3c, occ_3c)
1619 
1620  nze_rho = 0
1621  occ_rho = 0.0_dp
1622  nze_3c_1 = 0
1623  occ_3c_1 = 0.0_dp
1624  nze_3c_2 = 0
1625  occ_3c_2 = 0.0_dp
1626 
1627  CALL dbt_copy_matrix_to_tensor(rho_ao(ispin, 1)%matrix, rho_ao_tmp)
1628 
1629  !We work with Delta P: the diff between previous SCF step and this one, for increased sparsity
1630  CALL dbt_scale(ri_data%rho_ao_t(ispin, 1), -1.0_dp)
1631  CALL dbt_copy(rho_ao_tmp, ri_data%rho_ao_t(ispin, 1), summation=.true., move_data=.true.)
1632 
1633  CALL get_tensor_occupancy(ri_data%rho_ao_t(ispin, 1), nze_rho, occ_rho)
1634 
1635  CALL dbt_batched_contract_init(ri_data%t_3c_int_ctr_1(1, 1), batch_range_1=batch_ranges_ao, &
1636  batch_range_2=batch_ranges_ri)
1637  CALL dbt_batched_contract_init(t_3c_3, batch_range_1=batch_ranges_ao, batch_range_2=batch_ranges_ri)
1638 
1639  CALL dbt_create(ri_data%t_3c_int_ctr_1(1, 1), tensor_old)
1640 
1641  DO i_mem = 1, n_mem
1642 
1643  CALL dbt_batched_contract_init(ri_data%rho_ao_t(ispin, 1))
1644  CALL dbt_batched_contract_init(ri_data%t_3c_int_ctr_2(1, 1), batch_range_2=batch_ranges_ri, &
1645  batch_range_3=batch_ranges_ao)
1646  CALL dbt_batched_contract_init(t_3c_1, batch_range_2=batch_ranges_ri, batch_range_3=batch_ranges_ao)
1647  DO j_mem = 1, n_mem_ri
1648 
1649  CALL timeset(routinen//"_Px3C", handle2)
1650 
1651  CALL dbt_get_info(t_3c_1, nfull_total=dims_3c)
1652  bounds_i(:, 1) = [ri_data%starts_array_mem(i_mem), ri_data%ends_array_mem(i_mem)]
1653  bounds_j(:, 1) = [1, dims_3c(1)]
1654  bounds_j(:, 2) = [ri_data%starts_array_RI_mem(j_mem), ri_data%ends_array_RI_mem(j_mem)]
1655 
1656  CALL dbt_contract(1.0_dp, ri_data%rho_ao_t(ispin, 1), ri_data%t_3c_int_ctr_2(1, 1), &
1657  0.0_dp, t_3c_1, &
1658  contract_1=[2], notcontract_1=[1], &
1659  contract_2=[3], notcontract_2=[1, 2], &
1660  map_1=[3], map_2=[1, 2], filter_eps=ri_data%filter_eps, &
1661  bounds_2=bounds_i, &
1662  bounds_3=bounds_j, &
1663  unit_nr=unit_nr_dbcsr, &
1664  flop=nflop)
1665 
1666  CALL timestop(handle2)
1667 
1668  ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1669 
1670  CALL get_tensor_occupancy(t_3c_1, nze, occ)
1671  nze_3c_1 = nze_3c_1 + nze
1672  occ_3c_1 = occ_3c_1 + occ
1673 
1674  CALL timeset(routinen//"_copy_2", handle2)
1675  CALL dbt_copy(t_3c_1, t_3c_3, order=[3, 2, 1], move_data=.true.)
1676  CALL timestop(handle2)
1677 
1678  bounds_ij(:, 1) = [ri_data%starts_array_mem(i_mem), ri_data%ends_array_mem(i_mem)]
1679  bounds_ij(:, 2) = [ri_data%starts_array_RI_mem(j_mem), ri_data%ends_array_RI_mem(j_mem)]
1680 
1681  CALL decompress_tensor(tensor_old, ri_data%blk_indices(i_mem, j_mem)%ind, &
1682  ri_data%store_3c(i_mem, j_mem), ri_data%filter_eps_storage)
1683 
1684  CALL dbt_copy(tensor_old, ri_data%t_3c_int_ctr_1(1, 1), move_data=.true.)
1685 
1686  CALL get_tensor_occupancy(ri_data%t_3c_int_ctr_1(1, 1), nze, occ)
1687  nze_3c_2 = nze_3c_2 + nze
1688  occ_3c_2 = occ_3c_2 + occ
1689  CALL timeset(routinen//"_KS", handle2)
1690  CALL dbt_batched_contract_init(ks_t)
1691  CALL dbt_contract(-fac, ri_data%t_3c_int_ctr_1(1, 1), t_3c_3, &
1692  1.0_dp, ks_t, &
1693  contract_1=[1, 2], notcontract_1=[3], &
1694  contract_2=[1, 2], notcontract_2=[3], &
1695  map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps/n_mem, &
1696  bounds_1=bounds_ij, &
1697  unit_nr=unit_nr_dbcsr, &
1698  flop=nflop, move_data=.true.)
1699 
1700  CALL dbt_batched_contract_finalize(ks_t, unit_nr=unit_nr_dbcsr)
1701  CALL timestop(handle2)
1702 
1703  ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1704 
1705  END DO
1706  CALL dbt_batched_contract_finalize(ri_data%rho_ao_t(ispin, 1), unit_nr=unit_nr_dbcsr)
1707  CALL dbt_batched_contract_finalize(ri_data%t_3c_int_ctr_2(1, 1))
1708  CALL dbt_batched_contract_finalize(t_3c_1)
1709  END DO
1710  CALL dbt_batched_contract_finalize(ri_data%t_3c_int_ctr_1(1, 1))
1711  CALL dbt_batched_contract_finalize(t_3c_3)
1712 
1713  DO i_mem = 1, n_mem
1714  DO j_mem = 1, n_mem_ri
1715  associate(blk_indices => ri_data%blk_indices(i_mem, j_mem), t_3c => ri_data%t_3c_int_ctr_1(1, 1))
1716  CALL decompress_tensor(tensor_old, blk_indices%ind, &
1717  ri_data%store_3c(i_mem, j_mem), ri_data%filter_eps_storage)
1718  CALL dbt_copy(tensor_old, t_3c, move_data=.true.)
1719 
1720  unused = 0
1721  CALL compress_tensor(t_3c, blk_indices%ind, ri_data%store_3c(i_mem, j_mem), &
1722  ri_data%filter_eps_storage, unused)
1723  END associate
1724  END DO
1725  END DO
1726 
1727  CALL dbt_destroy(tensor_old)
1728 
1729  CALL get_tensor_occupancy(ks_t, nze_ks, occ_ks)
1730 
1731  !rho_ao_t holds the density difference, and ks_t is built upon it => need the full picture
1732  CALL dbt_copy_matrix_to_tensor(rho_ao(ispin, 1)%matrix, rho_ao_tmp)
1733  CALL dbt_copy(rho_ao_tmp, ri_data%rho_ao_t(ispin, 1), move_data=.true.)
1734  CALL dbt_copy(ks_t, ri_data%ks_t(ispin, 1), summation=.true., move_data=.true.)
1735 
1736  CALL dbt_copy(ri_data%ks_t(ispin, 1), ks_tmp)
1737  CALL dbt_copy_tensor_to_matrix(ks_tmp, ks_matrix(ispin, 1)%matrix, summation=.true.)
1738  CALL dbt_clear(ks_tmp)
1739 
1740  IF (unit_nr > 0 .AND. geometry_did_change) THEN
1741  WRITE (unit_nr, '(T6,A,T63,ES7.1,1X,A1,1X,F7.3,A1)') &
1742  'Occupancy of density matrix P:', real(nze_rho, dp), '/', occ_rho*100, '%'
1743  WRITE (unit_nr, '(T6,A,T63,ES7.1,1X,A1,1X,F7.3,A1)') &
1744  'Occupancy of 3c ints:', real(nze_3c, dp), '/', occ_3c*100, '%'
1745  WRITE (unit_nr, '(T6,A,T63,ES7.1,1X,A1,1X,F7.3,A1)') &
1746  'Occupancy after contraction with K:', real(nze_3c_2, dp), '/', occ_3c_2*100, '%'
1747  WRITE (unit_nr, '(T6,A,T63,ES7.1,1X,A1,1X,F7.3,A1)') &
1748  'Occupancy after contraction with P:', real(nze_3c_1, dp), '/', occ_3c_1*100, '%'
1749  WRITE (unit_nr, '(T6,A,T63,ES7.1,1X,A1,1X,F7.3,A1)') &
1750  'Occupancy of Kohn-Sham matrix:', real(nze_ks, dp), '/', occ_ks*100, '%'
1751  END IF
1752 
1753  END DO
1754 
1755  CALL para_env%sync()
1756  t2 = m_walltime()
1757 
1758  ri_data%dbcsr_time = ri_data%dbcsr_time + t2 - t1
1759 
1760  CALL dbt_destroy(t_3c_1)
1761  CALL dbt_destroy(t_3c_3)
1762 
1763  CALL dbt_destroy(rho_ao_tmp)
1764  CALL dbt_destroy(ks_t)
1765  CALL dbt_destroy(ks_tmp)
1766 
1767  CALL timestop(handle)
1768 
1769  END SUBROUTINE
1770 
1771 ! **************************************************************************************************
1772 !> \brief Implementation based on the MO flavor
1773 !> \param qs_env ...
1774 !> \param ri_data ...
1775 !> \param nspins ...
1776 !> \param hf_fraction ...
1777 !> \param mo_coeff ...
1778 !> \param use_virial ...
1779 !> \note There is no response code for forces with the MO flavor
1780 ! **************************************************************************************************
1781  SUBROUTINE hfx_ri_forces_mo(qs_env, ri_data, nspins, hf_fraction, mo_coeff, use_virial)
1782 
1783  TYPE(qs_environment_type), POINTER :: qs_env
1784  TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
1785  INTEGER, INTENT(IN) :: nspins
1786  REAL(dp), INTENT(IN) :: hf_fraction
1787  TYPE(dbcsr_type), DIMENSION(:), INTENT(IN) :: mo_coeff
1788  LOGICAL, INTENT(IN), OPTIONAL :: use_virial
1789 
1790  CHARACTER(LEN=*), PARAMETER :: routinen = 'hfx_ri_forces_mo'
1791 
1792  INTEGER :: dummy_int, handle, i_mem, i_xyz, ibasis, ispin, j_mem, k_mem, n_mem, n_mem_input, &
1793  n_mem_input_ri, n_mem_ri, n_mem_ri_fit, n_mos, natom, nkind, unit_nr_dbcsr
1794  INTEGER(int_8) :: nflop
1795  INTEGER, ALLOCATABLE, DIMENSION(:) :: atom_of_kind, batch_blk_end, batch_blk_start, &
1796  batch_end, batch_end_ri, batch_end_ri_fit, batch_ranges, batch_ranges_ri, &
1797  batch_ranges_ri_fit, batch_start, batch_start_ri, batch_start_ri_fit, bsizes_mo, dist1, &
1798  dist2, dist3, idx_to_at_ao, idx_to_at_ri, kind_of
1799  INTEGER, DIMENSION(2, 1) :: bounds_ctr_1d
1800  INTEGER, DIMENSION(2, 2) :: bounds_ctr_2d
1801  INTEGER, DIMENSION(3) :: pdims
1802  LOGICAL :: use_virial_prv
1803  REAL(dp) :: pref, spin_fac, t1, t2
1804  TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
1805  TYPE(block_ind_type), ALLOCATABLE, DIMENSION(:, :) :: t_3c_der_ao_ind, t_3c_der_ri_ind
1806  TYPE(cell_type), POINTER :: cell
1807  TYPE(dbcsr_p_type), DIMENSION(:), POINTER :: matrix_s
1808  TYPE(dbt_pgrid_type) :: pgrid_1, pgrid_2
1809  TYPE(dbt_type) :: t_2c_ri, t_2c_ri_inv, t_2c_ri_met, t_2c_ri_pq, t_2c_tmp, t_3c_0, t_3c_1, &
1810  t_3c_2, t_3c_3, t_3c_4, t_3c_5, t_3c_6, t_3c_ao_ri_ao, t_3c_ao_ri_mo, t_3c_desymm, &
1811  t_3c_mo_ri_ao, t_3c_mo_ri_mo, t_3c_ri_ao_ao, t_3c_ri_ctr, t_3c_ri_mo_mo, &
1812  t_3c_ri_mo_mo_fit, t_3c_work, t_mo_coeff, t_mo_cpy
1813  TYPE(dbt_type), ALLOCATABLE, DIMENSION(:) :: t_2c_der_metric, t_2c_der_ri, t_2c_mo_ao, &
1814  t_2c_mo_ao_ctr, t_3c_der_ao, t_3c_der_ao_ctr_1, t_3c_der_ri, t_3c_der_ri_ctr_1, &
1815  t_3c_der_ri_ctr_2
1816  TYPE(dft_control_type), POINTER :: dft_control
1817  TYPE(gto_basis_set_p_type), ALLOCATABLE, &
1818  DIMENSION(:), TARGET :: basis_set_ao, basis_set_ri
1819  TYPE(gto_basis_set_type), POINTER :: orb_basis, ri_basis
1820  TYPE(hfx_compression_type), ALLOCATABLE, &
1821  DIMENSION(:, :) :: t_3c_der_ao_comp, t_3c_der_ri_comp
1822  TYPE(mp_para_env_type), POINTER :: para_env
1823  TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
1824  TYPE(qs_force_type), DIMENSION(:), POINTER :: force
1825  TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
1826 
1827  ! 1) Precompute the derivatives that are needed (3c, 3c RI and metric)
1828  ! 2) Go over batched of occupied MOs so as to save memory and optimize contractions
1829  ! 3) Contract all 3c integrals and derivatives with MO coeffs
1830  ! 4) Contract relevant quantities with the inverse 2c RI (metric or pot)
1831  ! 5) First force contribution with the 2c RI derivative d/dx (Q|R)
1832  ! 6) If metric, do the additional contraction with S_pq^-1 (Q|R)
1833  ! 7) Do the force contribution due to 3c integrals (a'b|P) and (ab|P')
1834  ! 8) If metric, do the last force contribution due to d/dx S^-1 (First contract (ab|P), then S^-1)
1835 
1836  use_virial_prv = .false.
1837  IF (PRESENT(use_virial)) use_virial_prv = use_virial
1838  IF (use_virial_prv) THEN
1839  cpabort("Stress tensor with RI-HFX MO flavor not implemented.")
1840  END IF
1841 
1842  unit_nr_dbcsr = ri_data%unit_nr_dbcsr
1843 
1844  CALL get_qs_env(qs_env, natom=natom, particle_set=particle_set, nkind=nkind, &
1845  atomic_kind_set=atomic_kind_set, cell=cell, force=force, &
1846  matrix_s=matrix_s, para_env=para_env, dft_control=dft_control, &
1847  qs_kind_set=qs_kind_set)
1848 
1849  pdims(:) = 0
1850  CALL dbt_pgrid_create(para_env, pdims, pgrid_1, tensor_dims=[SIZE(ri_data%bsizes_AO_split), &
1851  SIZE(ri_data%bsizes_RI_split), &
1852  SIZE(ri_data%bsizes_AO_split)])
1853  pdims(:) = 0
1854  CALL dbt_pgrid_create(para_env, pdims, pgrid_2, tensor_dims=[SIZE(ri_data%bsizes_RI_split), &
1855  SIZE(ri_data%bsizes_AO_split), &
1856  SIZE(ri_data%bsizes_AO_split)])
1857 
1858  CALL create_3c_tensor(t_3c_ao_ri_ao, dist1, dist2, dist3, pgrid_1, &
1859  ri_data%bsizes_AO_split, ri_data%bsizes_RI_split, ri_data%bsizes_AO_split, &
1860  [1, 2], [3], name="(AO RI | AO)")
1861  DEALLOCATE (dist1, dist2, dist3)
1862  CALL create_3c_tensor(t_3c_ri_ao_ao, dist1, dist2, dist3, pgrid_2, &
1863  ri_data%bsizes_RI_split, ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
1864  [1], [2, 3], name="(RI | AO AO)")
1865  DEALLOCATE (dist1, dist2, dist3)
1866 
1867  ALLOCATE (basis_set_ri(nkind), basis_set_ao(nkind))
1868  CALL basis_set_list_setup(basis_set_ri, ri_data%ri_basis_type, qs_kind_set)
1869  CALL get_particle_set(particle_set, qs_kind_set, basis=basis_set_ri)
1870  CALL basis_set_list_setup(basis_set_ao, ri_data%orb_basis_type, qs_kind_set)
1871  CALL get_particle_set(particle_set, qs_kind_set, basis=basis_set_ao)
1872 
1873  DO ibasis = 1, SIZE(basis_set_ao)
1874  orb_basis => basis_set_ao(ibasis)%gto_basis_set
1875  CALL init_interaction_radii_orb_basis(orb_basis, ri_data%eps_pgf_orb)
1876  ri_basis => basis_set_ri(ibasis)%gto_basis_set
1877  CALL init_interaction_radii_orb_basis(ri_basis, ri_data%eps_pgf_orb)
1878  END DO
1879 
1880  ALLOCATE (t_2c_der_metric(3), t_2c_der_ri(3), t_2c_mo_ao(3), t_2c_mo_ao_ctr(3), t_3c_der_ao(3), &
1881  t_3c_der_ao_ctr_1(3), t_3c_der_ri(3), t_3c_der_ri_ctr_1(3), t_3c_der_ri_ctr_2(3))
1882 
1883  ! 1) Precompute the derivatives
1884  CALL precalc_derivatives(t_3c_der_ri_comp, t_3c_der_ao_comp, t_3c_der_ri_ind, t_3c_der_ao_ind, &
1885  t_2c_der_ri, t_2c_der_metric, t_3c_ri_ao_ao, &
1886  basis_set_ao, basis_set_ri, ri_data, qs_env)
1887 
1888  DO ibasis = 1, SIZE(basis_set_ao)
1889  orb_basis => basis_set_ao(ibasis)%gto_basis_set
1890  ri_basis => basis_set_ri(ibasis)%gto_basis_set
1891  CALL init_interaction_radii_orb_basis(orb_basis, dft_control%qs_control%eps_pgf_orb)
1892  CALL init_interaction_radii_orb_basis(ri_basis, dft_control%qs_control%eps_pgf_orb)
1893  END DO
1894 
1895  n_mem = SIZE(t_3c_der_ri_comp, 1)
1896  DO i_xyz = 1, 3
1897  CALL dbt_create(t_3c_ao_ri_ao, t_3c_der_ri(i_xyz))
1898  CALL dbt_create(t_3c_ao_ri_ao, t_3c_der_ao(i_xyz))
1899 
1900  DO i_mem = 1, n_mem
1901  CALL decompress_tensor(t_3c_ri_ao_ao, t_3c_der_ri_ind(i_mem, i_xyz)%ind, &
1902  t_3c_der_ri_comp(i_mem, i_xyz), ri_data%filter_eps_storage)
1903  CALL dbt_copy(t_3c_ri_ao_ao, t_3c_der_ri(i_xyz), order=[2, 1, 3], move_data=.true., summation=.true.)
1904 
1905  CALL decompress_tensor(t_3c_ri_ao_ao, t_3c_der_ao_ind(i_mem, i_xyz)%ind, &
1906  t_3c_der_ao_comp(i_mem, i_xyz), ri_data%filter_eps_storage)
1907  CALL dbt_copy(t_3c_ri_ao_ao, t_3c_der_ao(i_xyz), order=[2, 1, 3], move_data=.true., summation=.true.)
1908  END DO
1909  END DO
1910 
1911  DO i_xyz = 1, 3
1912  DO i_mem = 1, n_mem
1913  CALL dealloc_containers(t_3c_der_ao_comp(i_mem, i_xyz), dummy_int)
1914  CALL dealloc_containers(t_3c_der_ri_comp(i_mem, i_xyz), dummy_int)
1915  END DO
1916  END DO
1917  DEALLOCATE (t_3c_der_ao_ind, t_3c_der_ri_ind)
1918 
1919  ! Get the 3c integrals (desymmetrized)
1920  CALL dbt_create(t_3c_ao_ri_ao, t_3c_desymm)
1921  CALL dbt_copy(ri_data%t_3c_int_ctr_1(1, 1), t_3c_desymm)
1922  CALL dbt_copy(ri_data%t_3c_int_ctr_1(1, 1), t_3c_desymm, order=[3, 2, 1], &
1923  summation=.true., move_data=.true.)
1924 
1925  CALL dbt_destroy(t_3c_ao_ri_ao)
1926  CALL dbt_destroy(t_3c_ri_ao_ao)
1927 
1928  ! Some utilities
1929  spin_fac = 0.5_dp
1930  IF (nspins == 2) spin_fac = 1.0_dp
1931 
1932  ALLOCATE (idx_to_at_ri(SIZE(ri_data%bsizes_RI_split)))
1933  CALL get_idx_to_atom(idx_to_at_ri, ri_data%bsizes_RI_split, ri_data%bsizes_RI)
1934 
1935  ALLOCATE (idx_to_at_ao(SIZE(ri_data%bsizes_AO_split)))
1936  CALL get_idx_to_atom(idx_to_at_ao, ri_data%bsizes_AO_split, ri_data%bsizes_AO)
1937 
1938  CALL get_atomic_kind_set(atomic_kind_set, kind_of=kind_of, atom_of_kind=atom_of_kind)
1939 
1940  ! 2-center RI tensors
1941  CALL create_2c_tensor(t_2c_ri, dist1, dist2, ri_data%pgrid_2d, &
1942  ri_data%bsizes_RI_split, ri_data%bsizes_RI_split, name="(RI | RI)")
1943  DEALLOCATE (dist1, dist2)
1944 
1945  CALL create_2c_tensor(t_2c_ri_pq, dist1, dist2, ri_data%pgrid_2d, &
1946  ri_data%bsizes_RI_fit, ri_data%bsizes_RI_fit, name="(RI | RI)")
1947  DEALLOCATE (dist1, dist2)
1948 
1949  IF (.NOT. ri_data%same_op) THEN
1950  !precompute the (P|Q)*S^-1 product
1951  CALL dbt_create(t_2c_ri_pq, t_2c_ri_inv)
1952  CALL dbt_create(t_2c_ri_pq, t_2c_ri_met)
1953  CALL dbt_create(ri_data%t_2c_inv(1, 1), t_2c_tmp)
1954 
1955  CALL dbt_contract(1.0_dp, ri_data%t_2c_inv(1, 1), ri_data%t_2c_pot(1, 1), &
1956  0.0_dp, t_2c_tmp, &
1957  contract_1=[2], notcontract_1=[1], &
1958  contract_2=[1], notcontract_2=[2], &
1959  map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
1960  unit_nr=unit_nr_dbcsr, flop=nflop)
1961  ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1962 
1963  CALL dbt_copy(t_2c_tmp, t_2c_ri_inv, move_data=.true.)
1964  CALL dbt_destroy(t_2c_tmp)
1965  END IF
1966 
1967  !3 loops in MO force evaluations. To be consistent with input MEMORY_CUT, need to take the cubic root
1968  !No need to cut memory further because SCF tensors alrady dense
1969  n_mem_input = floor((ri_data%n_mem_input - 0.1_dp)**(1._dp/3._dp)) + 1
1970  n_mem_input_ri = floor((ri_data%n_mem_input - 0.1_dp)/n_mem_input**2) + 1
1971 
1972  !batches on RI_split and RI_fit blocks
1973  n_mem_ri = n_mem_input_ri
1974  CALL create_tensor_batches(ri_data%bsizes_RI_split, n_mem_ri, batch_start_ri, batch_end_ri, &
1975  batch_blk_start, batch_blk_end)
1976  ALLOCATE (batch_ranges_ri(n_mem_ri + 1))
1977  batch_ranges_ri(1:n_mem_ri) = batch_blk_start(1:n_mem_ri)
1978  batch_ranges_ri(n_mem_ri + 1) = batch_blk_end(n_mem_ri) + 1
1979  DEALLOCATE (batch_blk_start, batch_blk_end)
1980 
1981  n_mem_ri_fit = n_mem_input_ri
1982  CALL create_tensor_batches(ri_data%bsizes_RI_fit, n_mem_ri_fit, batch_start_ri_fit, batch_end_ri_fit, &
1983  batch_blk_start, batch_blk_end)
1984  ALLOCATE (batch_ranges_ri_fit(n_mem_ri_fit + 1))
1985  batch_ranges_ri_fit(1:n_mem_ri_fit) = batch_blk_start(1:n_mem_ri_fit)
1986  batch_ranges_ri_fit(n_mem_ri_fit + 1) = batch_blk_end(n_mem_ri_fit) + 1
1987  DEALLOCATE (batch_blk_start, batch_blk_end)
1988 
1989  DO ispin = 1, nspins
1990 
1991  ! 2 )Prepare the batches for this spin
1992  CALL dbcsr_get_info(mo_coeff(ispin), nfullcols_total=n_mos)
1993  !note: optimized GPU block size for SCF is 64x1x64. Here we do 8x8x64
1994  CALL split_block_sizes([n_mos], bsizes_mo, max_size=floor(sqrt(ri_data%max_bsize_MO - 0.1)) + 1)
1995 
1996  !batching on MO blocks
1997  n_mem = n_mem_input
1998  CALL create_tensor_batches(bsizes_mo, n_mem, batch_start, batch_end, &
1999  batch_blk_start, batch_blk_end)
2000  ALLOCATE (batch_ranges(n_mem + 1))
2001  batch_ranges(1:n_mem) = batch_blk_start(1:n_mem)
2002  batch_ranges(n_mem + 1) = batch_blk_end(n_mem) + 1
2003  DEALLOCATE (batch_blk_start, batch_blk_end)
2004 
2005  ! Initialize the different tensors needed (Note: keep MO coeffs as (MO | AO) for less transpose)
2006  CALL create_2c_tensor(t_mo_coeff, dist1, dist2, ri_data%pgrid_2d, bsizes_mo, &
2007  ri_data%bsizes_AO_split, name="MO coeffs")
2008  DEALLOCATE (dist1, dist2)
2009  CALL dbt_create(mo_coeff(ispin), t_2c_tmp, name="MO coeffs")
2010  CALL dbt_copy_matrix_to_tensor(mo_coeff(ispin), t_2c_tmp)
2011  CALL dbt_copy(t_2c_tmp, t_mo_coeff, order=[2, 1], move_data=.true.)
2012  CALL dbt_destroy(t_2c_tmp)
2013 
2014  CALL dbt_create(t_mo_coeff, t_mo_cpy)
2015  CALL dbt_copy(t_mo_coeff, t_mo_cpy)
2016  DO i_xyz = 1, 3
2017  CALL dbt_create(t_mo_coeff, t_2c_mo_ao_ctr(i_xyz))
2018  CALL dbt_create(t_mo_coeff, t_2c_mo_ao(i_xyz))
2019  END DO
2020 
2021  CALL create_3c_tensor(t_3c_ao_ri_mo, dist1, dist2, dist3, pgrid_1, ri_data%bsizes_AO_split, &
2022  ri_data%bsizes_RI_split, bsizes_mo, [1, 2], [3], name="(AO RI| MO)")
2023  DEALLOCATE (dist1, dist2, dist3)
2024 
2025  CALL dbt_create(t_3c_ao_ri_mo, t_3c_0)
2026  CALL dbt_destroy(t_3c_ao_ri_mo)
2027 
2028  CALL create_3c_tensor(t_3c_mo_ri_ao, dist1, dist2, dist3, pgrid_1, bsizes_mo, ri_data%bsizes_RI_split, &
2029  ri_data%bsizes_AO_split, [1, 2], [3], name="(MO RI | AO)")
2030  DEALLOCATE (dist1, dist2, dist3)
2031  CALL dbt_create(t_3c_mo_ri_ao, t_3c_1)
2032 
2033  DO i_xyz = 1, 3
2034  CALL dbt_create(t_3c_mo_ri_ao, t_3c_der_ri_ctr_1(i_xyz))
2035  CALL dbt_create(t_3c_mo_ri_ao, t_3c_der_ao_ctr_1(i_xyz))
2036  END DO
2037 
2038  CALL create_3c_tensor(t_3c_mo_ri_mo, dist1, dist2, dist3, pgrid_1, bsizes_mo, &
2039  ri_data%bsizes_RI_split, bsizes_mo, [1, 2], [3], name="(MO RI | MO)")
2040  DEALLOCATE (dist1, dist2, dist3)
2041  CALL dbt_create(t_3c_mo_ri_mo, t_3c_work)
2042 
2043  CALL create_3c_tensor(t_3c_ri_mo_mo, dist1, dist2, dist3, pgrid_2, ri_data%bsizes_RI_split, &
2044  bsizes_mo, bsizes_mo, [1], [2, 3], name="(RI| MO MO)")
2045  DEALLOCATE (dist1, dist2, dist3)
2046 
2047  CALL dbt_create(t_3c_ri_mo_mo, t_3c_2)
2048  CALL dbt_create(t_3c_ri_mo_mo, t_3c_3)
2049  CALL dbt_create(t_3c_ri_mo_mo, t_3c_ri_ctr)
2050  DO i_xyz = 1, 3
2051  CALL dbt_create(t_3c_ri_mo_mo, t_3c_der_ri_ctr_2(i_xyz))
2052  END DO
2053 
2054  !Very large RI_fit blocks => new pgrid to make sure distribution is ideal
2055  pdims(:) = 0
2056  CALL create_3c_tensor(t_3c_ri_mo_mo_fit, dist1, dist2, dist3, pgrid_2, ri_data%bsizes_RI_fit, &
2057  bsizes_mo, bsizes_mo, [1], [2, 3], name="(RI| MO MO)")
2058  DEALLOCATE (dist1, dist2, dist3)
2059 
2060  CALL dbt_create(t_3c_ri_mo_mo_fit, t_3c_4)
2061  CALL dbt_create(t_3c_ri_mo_mo_fit, t_3c_5)
2062  CALL dbt_create(t_3c_ri_mo_mo_fit, t_3c_6)
2063 
2064  CALL dbt_batched_contract_init(t_3c_desymm, batch_range_2=batch_ranges_ri)
2065  CALL dbt_batched_contract_init(t_3c_0, batch_range_2=batch_ranges_ri, batch_range_3=batch_ranges)
2066 
2067  DO i_xyz = 1, 3
2068  CALL dbt_batched_contract_init(t_3c_der_ao(i_xyz), batch_range_2=batch_ranges_ri)
2069  CALL dbt_batched_contract_init(t_3c_der_ri(i_xyz), batch_range_2=batch_ranges_ri)
2070  END DO
2071 
2072  CALL para_env%sync()
2073  t1 = m_walltime()
2074 
2075  ! 2) Loop over batches
2076  DO i_mem = 1, n_mem
2077 
2078  bounds_ctr_1d(1, 1) = batch_start(i_mem)
2079  bounds_ctr_1d(2, 1) = batch_end(i_mem)
2080 
2081  bounds_ctr_2d(1, 1) = 1
2082  bounds_ctr_2d(2, 1) = sum(ri_data%bsizes_AO)
2083 
2084  ! 3) Do the first AO to MO contraction here
2085  CALL timeset(routinen//"_AO2MO_1", handle)
2086  CALL dbt_batched_contract_init(t_mo_coeff)
2087  DO k_mem = 1, n_mem_ri
2088  bounds_ctr_2d(1, 2) = batch_start_ri(k_mem)
2089  bounds_ctr_2d(2, 2) = batch_end_ri(k_mem)
2090 
2091  CALL dbt_contract(1.0_dp, t_mo_coeff, t_3c_desymm, &
2092  1.0_dp, t_3c_0, &
2093  contract_1=[2], notcontract_1=[1], &
2094  contract_2=[3], notcontract_2=[1, 2], &
2095  map_1=[3], map_2=[1, 2], filter_eps=ri_data%filter_eps, &
2096  bounds_2=bounds_ctr_1d, &
2097  bounds_3=bounds_ctr_2d, &
2098  unit_nr=unit_nr_dbcsr, flop=nflop)
2099  ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2100  END DO
2101  CALL dbt_copy(t_3c_0, t_3c_1, order=[3, 2, 1], move_data=.true.)
2102 
2103  DO i_xyz = 1, 3
2104  DO k_mem = 1, n_mem_ri
2105  bounds_ctr_2d(1, 2) = batch_start_ri(k_mem)
2106  bounds_ctr_2d(2, 2) = batch_end_ri(k_mem)
2107 
2108  CALL dbt_contract(1.0_dp, t_mo_coeff, t_3c_der_ao(i_xyz), &
2109  1.0_dp, t_3c_0, &
2110  contract_1=[2], notcontract_1=[1], &
2111  contract_2=[3], notcontract_2=[1, 2], &
2112  map_1=[3], map_2=[1, 2], filter_eps=ri_data%filter_eps, &
2113  bounds_2=bounds_ctr_1d, &
2114  bounds_3=bounds_ctr_2d, &
2115  unit_nr=unit_nr_dbcsr, flop=nflop)
2116  ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2117  END DO
2118  CALL dbt_copy(t_3c_0, t_3c_der_ao_ctr_1(i_xyz), order=[3, 2, 1], move_data=.true.)
2119 
2120  DO k_mem = 1, n_mem_ri
2121  bounds_ctr_2d(1, 2) = batch_start_ri(k_mem)
2122  bounds_ctr_2d(2, 2) = batch_end_ri(k_mem)
2123 
2124  CALL dbt_contract(1.0_dp, t_mo_coeff, t_3c_der_ri(i_xyz), &
2125  1.0_dp, t_3c_0, &
2126  contract_1=[2], notcontract_1=[1], &
2127  contract_2=[3], notcontract_2=[1, 2], &
2128  map_1=[3], map_2=[1, 2], filter_eps=ri_data%filter_eps, &
2129  bounds_2=bounds_ctr_1d, &
2130  bounds_3=bounds_ctr_2d, &
2131  unit_nr=unit_nr_dbcsr, flop=nflop)
2132  ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2133  END DO
2134  CALL dbt_copy(t_3c_0, t_3c_der_ri_ctr_1(i_xyz), order=[3, 2, 1], move_data=.true.)
2135  END DO
2136  CALL dbt_batched_contract_finalize(t_mo_coeff)
2137  CALL timestop(handle)
2138 
2139  CALL dbt_batched_contract_init(t_3c_1, batch_range_1=batch_ranges, batch_range_2=batch_ranges_ri)
2140  CALL dbt_batched_contract_init(t_3c_work, batch_range_1=batch_ranges, batch_range_2=batch_ranges_ri, &
2141  batch_range_3=batch_ranges)
2142  CALL dbt_batched_contract_init(t_3c_2, batch_range_2=batch_ranges, batch_range_3=batch_ranges)
2143  CALL dbt_batched_contract_init(t_3c_3, batch_range_1=batch_ranges_ri, &
2144  batch_range_2=batch_ranges, batch_range_3=batch_ranges)
2145 
2146  CALL dbt_batched_contract_init(t_3c_4, batch_range_1=batch_ranges_ri_fit, &
2147  batch_range_2=batch_ranges, batch_range_3=batch_ranges)
2148  CALL dbt_batched_contract_init(t_3c_5, batch_range_2=batch_ranges, batch_range_3=batch_ranges)
2149 
2150  DO i_xyz = 1, 3
2151  CALL dbt_batched_contract_init(t_3c_der_ri_ctr_1(i_xyz), batch_range_1=batch_ranges, &
2152  batch_range_2=batch_ranges_ri)
2153  CALL dbt_batched_contract_init(t_3c_der_ao_ctr_1(i_xyz), batch_range_1=batch_ranges, &
2154  batch_range_2=batch_ranges_ri)
2155 
2156  END DO
2157 
2158  IF (.NOT. ri_data%same_op) THEN
2159  CALL dbt_batched_contract_init(t_3c_6, batch_range_2=batch_ranges, batch_range_3=batch_ranges)
2160  END IF
2161 
2162  DO j_mem = 1, n_mem
2163 
2164  bounds_ctr_1d(1, 1) = batch_start(j_mem)
2165  bounds_ctr_1d(2, 1) = batch_end(j_mem)
2166 
2167  bounds_ctr_2d(1, 1) = batch_start(i_mem)
2168  bounds_ctr_2d(2, 1) = batch_end(i_mem)
2169 
2170  ! 3) Do the second AO to MO contraction here, followed by the S^-1 contraction
2171  CALL timeset(routinen//"_AO2MO_2", handle)
2172  CALL dbt_batched_contract_init(t_mo_coeff)
2173  DO k_mem = 1, n_mem_ri
2174  bounds_ctr_2d(1, 2) = batch_start_ri(k_mem)
2175  bounds_ctr_2d(2, 2) = batch_end_ri(k_mem)
2176 
2177  CALL dbt_contract(1.0_dp, t_mo_coeff, t_3c_1, &
2178  1.0_dp, t_3c_work, &
2179  contract_1=[2], notcontract_1=[1], &
2180  contract_2=[3], notcontract_2=[1, 2], &
2181  map_1=[3], map_2=[1, 2], filter_eps=ri_data%filter_eps, &
2182  bounds_2=bounds_ctr_1d, &
2183  bounds_3=bounds_ctr_2d, &
2184  unit_nr=unit_nr_dbcsr, flop=nflop)
2185  ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2186  END DO
2187  CALL dbt_batched_contract_finalize(t_mo_coeff)
2188  CALL timestop(handle)
2189 
2190  bounds_ctr_2d(1, 1) = batch_start(i_mem)
2191  bounds_ctr_2d(2, 1) = batch_end(i_mem)
2192  bounds_ctr_2d(1, 2) = batch_start(j_mem)
2193  bounds_ctr_2d(2, 2) = batch_end(j_mem)
2194 
2195  ! 4) Contract 3c MO integrals with S^-1 as well
2196  CALL timeset(routinen//"_2c_inv", handle)
2197  CALL dbt_copy(t_3c_work, t_3c_3, order=[2, 1, 3], move_data=.true.)
2198  DO k_mem = 1, n_mem_ri
2199  bounds_ctr_1d(1, 1) = batch_start_ri(k_mem)
2200  bounds_ctr_1d(2, 1) = batch_end_ri(k_mem)
2201 
2202  CALL dbt_batched_contract_init(ri_data%t_2c_inv(1, 1))
2203  CALL dbt_contract(1.0_dp, ri_data%t_2c_inv(1, 1), t_3c_3, &
2204  1.0_dp, t_3c_2, &
2205  contract_1=[2], notcontract_1=[1], &
2206  contract_2=[1], notcontract_2=[2, 3], &
2207  map_1=[1], map_2=[2, 3], filter_eps=ri_data%filter_eps, &
2208  bounds_1=bounds_ctr_1d, &
2209  bounds_3=bounds_ctr_2d, &
2210  unit_nr=unit_nr_dbcsr, flop=nflop)
2211  ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2212  CALL dbt_batched_contract_finalize(ri_data%t_2c_inv(1, 1))
2213  END DO
2214  CALL dbt_copy(t_3c_ri_mo_mo, t_3c_3)
2215  CALL timestop(handle)
2216 
2217  !Only contract (ab|P') with MO coeffs since need AO rep for the force of (a'b|P)
2218  bounds_ctr_1d(1, 1) = batch_start(j_mem)
2219  bounds_ctr_1d(2, 1) = batch_end(j_mem)
2220 
2221  bounds_ctr_2d(1, 1) = batch_start(i_mem)
2222  bounds_ctr_2d(2, 1) = batch_end(i_mem)
2223 
2224  CALL timeset(routinen//"_AO2MO_2", handle)
2225  CALL dbt_batched_contract_init(t_mo_coeff)
2226  DO i_xyz = 1, 3
2227  DO k_mem = 1, n_mem_ri
2228  bounds_ctr_2d(1, 2) = batch_start_ri(k_mem)
2229  bounds_ctr_2d(2, 2) = batch_end_ri(k_mem)
2230 
2231  CALL dbt_contract(1.0_dp, t_mo_coeff, t_3c_der_ri_ctr_1(i_xyz), &
2232  1.0_dp, t_3c_work, &
2233  contract_1=[2], notcontract_1=[1], &
2234  contract_2=[3], notcontract_2=[1, 2], &
2235  map_1=[3], map_2=[1, 2], filter_eps=ri_data%filter_eps, &
2236  bounds_2=bounds_ctr_1d, &
2237  bounds_3=bounds_ctr_2d, &
2238  unit_nr=unit_nr_dbcsr, flop=nflop)
2239  ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2240  END DO
2241  CALL dbt_copy(t_3c_work, t_3c_der_ri_ctr_2(i_xyz), order=[2, 1, 3], move_data=.true.)
2242  END DO
2243  CALL dbt_batched_contract_finalize(t_mo_coeff)
2244  CALL timestop(handle)
2245 
2246  bounds_ctr_2d(1, 1) = batch_start(i_mem)
2247  bounds_ctr_2d(2, 1) = batch_end(i_mem)
2248  bounds_ctr_2d(1, 2) = batch_start(j_mem)
2249  bounds_ctr_2d(2, 2) = batch_end(j_mem)
2250 
2251  ! 5) Force due to d/dx (P|Q)
2252  CALL timeset(routinen//"_PQ_der", handle)
2253  CALL dbt_copy(t_3c_2, t_3c_4, move_data=.true.)
2254  CALL dbt_copy(t_3c_4, t_3c_5)
2255  DO k_mem = 1, n_mem_ri_fit
2256  bounds_ctr_1d(1, 1) = batch_start_ri_fit(k_mem)
2257  bounds_ctr_1d(2, 1) = batch_end_ri_fit(k_mem)
2258 
2259  CALL dbt_batched_contract_init(t_2c_ri_pq)
2260  CALL dbt_contract(1.0_dp, t_3c_4, t_3c_5, &
2261  1.0_dp, t_2c_ri_pq, &
2262  contract_1=[2, 3], notcontract_1=[1], &
2263  contract_2=[2, 3], notcontract_2=[1], &
2264  bounds_1=bounds_ctr_2d, &
2265  bounds_2=bounds_ctr_1d, &
2266  map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
2267  unit_nr=unit_nr_dbcsr, flop=nflop)
2268  ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2269  CALL dbt_batched_contract_finalize(t_2c_ri_pq)
2270  END DO
2271  CALL timestop(handle)
2272 
2273  ! 6) If metric, do the additional contraction with S_pq^-1 (Q|R) (not on the derivatives)
2274  IF (.NOT. ri_data%same_op) THEN
2275  CALL timeset(routinen//"_metric", handle)
2276  DO k_mem = 1, n_mem_ri_fit
2277  bounds_ctr_1d(1, 1) = batch_start_ri_fit(k_mem)
2278  bounds_ctr_1d(2, 1) = batch_end_ri_fit(k_mem)
2279 
2280  CALL dbt_batched_contract_init(t_2c_ri_inv)
2281  CALL dbt_contract(1.0_dp, t_2c_ri_inv, t_3c_4, &
2282  1.0_dp, t_3c_6, &
2283  contract_1=[2], notcontract_1=[1], &
2284  contract_2=[1], notcontract_2=[2, 3], &
2285  bounds_1=bounds_ctr_1d, &
2286  bounds_3=bounds_ctr_2d, &
2287  map_1=[1], map_2=[2, 3], filter_eps=ri_data%filter_eps, &
2288  unit_nr=unit_nr_dbcsr, flop=nflop)
2289  ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2290  CALL dbt_batched_contract_finalize(t_2c_ri_inv)
2291  END DO
2292  CALL dbt_copy(t_3c_6, t_3c_4, move_data=.true.)
2293 
2294  ! 8) and get the force due to d/dx S^-1
2295  DO k_mem = 1, n_mem_ri_fit
2296  bounds_ctr_1d(1, 1) = batch_start_ri_fit(k_mem)
2297  bounds_ctr_1d(2, 1) = batch_end_ri_fit(k_mem)
2298 
2299  CALL dbt_batched_contract_init(t_2c_ri_met)
2300  CALL dbt_contract(1.0_dp, t_3c_4, t_3c_5, &
2301  1.0_dp, t_2c_ri_met, &
2302  contract_1=[2, 3], notcontract_1=[1], &
2303  contract_2=[2, 3], notcontract_2=[1], &
2304  bounds_1=bounds_ctr_2d, &
2305  bounds_2=bounds_ctr_1d, &
2306  map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
2307  unit_nr=unit_nr_dbcsr, flop=nflop)
2308  ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2309  CALL dbt_batched_contract_finalize(t_2c_ri_met)
2310  END DO
2311  CALL timestop(handle)
2312  END IF
2313  CALL dbt_copy(t_3c_ri_mo_mo_fit, t_3c_5)
2314 
2315  ! 7) Do the force contribution due to 3c integrals (a'b|P) and (ab|P')
2316 
2317  ! (ab|P')
2318  CALL timeset(routinen//"_3c_RI", handle)
2319  pref = -0.5_dp*2.0_dp*hf_fraction*spin_fac
2320  CALL dbt_copy(t_3c_4, t_3c_ri_ctr, move_data=.true.)
2321  CALL get_force_from_3c_trace(force, t_3c_ri_ctr, t_3c_der_ri_ctr_2, atom_of_kind, kind_of, &
2322  idx_to_at_ri, pref)
2323  CALL timestop(handle)
2324 
2325  ! (a'b|P) Note that derivative remains in AO rep until the actual force evaluation,
2326  ! which also prevents doing a direct 3-center trace
2327  bounds_ctr_2d(1, 1) = batch_start(i_mem)
2328  bounds_ctr_2d(2, 1) = batch_end(i_mem)
2329 
2330  bounds_ctr_1d(1, 1) = batch_start(j_mem)
2331  bounds_ctr_1d(2, 1) = batch_end(j_mem)
2332 
2333  CALL timeset(routinen//"_3c_AO", handle)
2334  CALL dbt_copy(t_3c_ri_ctr, t_3c_work, order=[2, 1, 3], move_data=.true.)
2335  DO i_xyz = 1, 3
2336 
2337  CALL dbt_batched_contract_init(t_2c_mo_ao_ctr(i_xyz))
2338  DO k_mem = 1, n_mem_ri
2339  bounds_ctr_2d(1, 2) = batch_start_ri(k_mem)
2340  bounds_ctr_2d(2, 2) = batch_end_ri(k_mem)
2341 
2342  CALL dbt_contract(1.0_dp, t_3c_work, t_3c_der_ao_ctr_1(i_xyz), &
2343  1.0_dp, t_2c_mo_ao_ctr(i_xyz), &
2344  contract_1=[1, 2], notcontract_1=[3], &
2345  contract_2=[1, 2], notcontract_2=[3], &
2346  map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
2347  bounds_1=bounds_ctr_2d, &
2348  bounds_2=bounds_ctr_1d, &
2349  unit_nr=unit_nr_dbcsr, flop=nflop)
2350  ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2351  END DO
2352  CALL dbt_batched_contract_finalize(t_2c_mo_ao_ctr(i_xyz))
2353  END DO
2354  CALL timestop(handle)
2355 
2356  END DO !j_mem
2357  CALL dbt_batched_contract_finalize(t_3c_1)
2358  CALL dbt_batched_contract_finalize(t_3c_work)
2359  CALL dbt_batched_contract_finalize(t_3c_2)
2360  CALL dbt_batched_contract_finalize(t_3c_3)
2361  CALL dbt_batched_contract_finalize(t_3c_4)
2362  CALL dbt_batched_contract_finalize(t_3c_5)
2363 
2364  DO i_xyz = 1, 3
2365  CALL dbt_batched_contract_finalize(t_3c_der_ri_ctr_1(i_xyz))
2366  CALL dbt_batched_contract_finalize(t_3c_der_ao_ctr_1(i_xyz))
2367  END DO
2368 
2369  IF (.NOT. ri_data%same_op) THEN
2370  CALL dbt_batched_contract_finalize(t_3c_6)
2371  END IF
2372 
2373  END DO !i_mem
2374  CALL dbt_batched_contract_finalize(t_3c_desymm)
2375  CALL dbt_batched_contract_finalize(t_3c_0)
2376 
2377  DO i_xyz = 1, 3
2378  CALL dbt_batched_contract_finalize(t_3c_der_ao(i_xyz))
2379  CALL dbt_batched_contract_finalize(t_3c_der_ri(i_xyz))
2380  END DO
2381 
2382  !Force contribution due to 3-center AO derivatives (a'b|P)
2383  pref = -0.5_dp*4.0_dp*hf_fraction*spin_fac
2384  DO i_xyz = 1, 3
2385  CALL dbt_copy(t_2c_mo_ao_ctr(i_xyz), t_2c_mo_ao(i_xyz), move_data=.true.) !ensures matching distributions
2386  CALL get_mo_ao_force(force, t_mo_cpy, t_2c_mo_ao(i_xyz), atom_of_kind, kind_of, idx_to_at_ao, pref, i_xyz)
2387  CALL dbt_clear(t_2c_mo_ao(i_xyz))
2388  END DO
2389 
2390  !Force contribution of d/dx (P|Q)
2391  pref = 0.5_dp*hf_fraction*spin_fac
2392  IF (.NOT. ri_data%same_op) pref = -pref
2393 
2394  !Making sure dists of the t_2c_RI tensors match
2395  CALL dbt_copy(t_2c_ri_pq, t_2c_ri, move_data=.true.)
2396  CALL get_2c_der_force(force, t_2c_ri, t_2c_der_ri, atom_of_kind, &
2397  kind_of, idx_to_at_ri, pref)
2398  CALL dbt_clear(t_2c_ri)
2399 
2400  !Force contribution due to the inverse metric
2401  IF (.NOT. ri_data%same_op) THEN
2402  pref = 0.5_dp*2.0_dp*hf_fraction*spin_fac
2403 
2404  CALL dbt_copy(t_2c_ri_met, t_2c_ri, move_data=.true.)
2405  CALL get_2c_der_force(force, t_2c_ri, t_2c_der_metric, atom_of_kind, &
2406  kind_of, idx_to_at_ri, pref)
2407  CALL dbt_clear(t_2c_ri)
2408  END IF
2409 
2410  CALL dbt_destroy(t_3c_0)
2411  CALL dbt_destroy(t_3c_1)
2412  CALL dbt_destroy(t_3c_2)
2413  CALL dbt_destroy(t_3c_3)
2414  CALL dbt_destroy(t_3c_4)
2415  CALL dbt_destroy(t_3c_5)
2416  CALL dbt_destroy(t_3c_6)
2417  CALL dbt_destroy(t_3c_work)
2418  CALL dbt_destroy(t_3c_ri_ctr)
2419  CALL dbt_destroy(t_3c_mo_ri_ao)
2420  CALL dbt_destroy(t_3c_mo_ri_mo)
2421  CALL dbt_destroy(t_3c_ri_mo_mo)
2422  CALL dbt_destroy(t_3c_ri_mo_mo_fit)
2423  CALL dbt_destroy(t_mo_coeff)
2424  CALL dbt_destroy(t_mo_cpy)
2425  DO i_xyz = 1, 3
2426  CALL dbt_destroy(t_2c_mo_ao(i_xyz))
2427  CALL dbt_destroy(t_2c_mo_ao_ctr(i_xyz))
2428  CALL dbt_destroy(t_3c_der_ri_ctr_1(i_xyz))
2429  CALL dbt_destroy(t_3c_der_ao_ctr_1(i_xyz))
2430  CALL dbt_destroy(t_3c_der_ri_ctr_2(i_xyz))
2431  END DO
2432  DEALLOCATE (batch_ranges, batch_start, batch_end)
2433  END DO !ispin
2434 
2435  ! Clean-up
2436  CALL dbt_pgrid_destroy(pgrid_1)
2437  CALL dbt_pgrid_destroy(pgrid_2)
2438  CALL dbt_destroy(t_3c_desymm)
2439  CALL dbt_destroy(t_2c_ri)
2440  CALL dbt_destroy(t_2c_ri_pq)
2441  IF (.NOT. ri_data%same_op) THEN
2442  CALL dbt_destroy(t_2c_ri_met)
2443  CALL dbt_destroy(t_2c_ri_inv)
2444  END IF
2445  DO i_xyz = 1, 3
2446  CALL dbt_destroy(t_3c_der_ao(i_xyz))
2447  CALL dbt_destroy(t_3c_der_ri(i_xyz))
2448  CALL dbt_destroy(t_2c_der_ri(i_xyz))
2449  IF (.NOT. ri_data%same_op) CALL dbt_destroy(t_2c_der_metric(i_xyz))
2450  END DO
2451  CALL dbt_copy(ri_data%t_3c_int_ctr_2(1, 1), ri_data%t_3c_int_ctr_1(1, 1))
2452 
2453  CALL para_env%sync()
2454  t2 = m_walltime()
2455 
2456  ri_data%dbcsr_time = ri_data%dbcsr_time + t2 - t1
2457 
2458  END SUBROUTINE hfx_ri_forces_mo
2459 
2460 ! **************************************************************************************************
2461 !> \brief New sparser implementation
2462 !> \param qs_env ...
2463 !> \param ri_data ...
2464 !> \param nspins ...
2465 !> \param hf_fraction ...
2466 !> \param rho_ao ...
2467 !> \param rho_ao_resp ...
2468 !> \param use_virial ...
2469 !> \param resp_only ...
2470 !> \param rescale_factor ...
2471 ! **************************************************************************************************
2472  SUBROUTINE hfx_ri_forces_pmat(qs_env, ri_data, nspins, hf_fraction, rho_ao, rho_ao_resp, &
2473  use_virial, resp_only, rescale_factor)
2474 
2475  TYPE(qs_environment_type), POINTER :: qs_env
2476  TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
2477  INTEGER, INTENT(IN) :: nspins
2478  REAL(dp), INTENT(IN) :: hf_fraction
2479  TYPE(dbcsr_p_type), DIMENSION(:, :) :: rho_ao
2480  TYPE(dbcsr_p_type), DIMENSION(:), OPTIONAL :: rho_ao_resp
2481  LOGICAL, INTENT(IN), OPTIONAL :: use_virial, resp_only
2482  REAL(dp), INTENT(IN), OPTIONAL :: rescale_factor
2483 
2484  CHARACTER(LEN=*), PARAMETER :: routinen = 'hfx_ri_forces_Pmat'
2485 
2486  INTEGER :: dummy_int, handle, i_mem, i_spin, i_xyz, &
2487  ibasis, j_mem, j_xyz, k_mem, k_xyz, &
2488  n_mem, n_mem_ri, natom, nkind, &
2489  unit_nr_dbcsr
2490  INTEGER(int_8) :: nflop
2491  INTEGER, ALLOCATABLE, DIMENSION(:) :: atom_of_kind, batch_end, batch_end_ri, batch_ranges, &
2492  batch_ranges_ri, batch_start, batch_start_ri, dist1, dist2, dist3, idx_to_at_ao, &
2493  idx_to_at_ri, kind_of
2494  INTEGER, DIMENSION(2, 1) :: ibounds, jbounds, kbounds
2495  INTEGER, DIMENSION(2, 2) :: ijbounds
2496  INTEGER, DIMENSION(2, 3) :: bounds_cpy
2497  INTEGER, DIMENSION(:), POINTER :: col_bsize, row_bsize
2498  LOGICAL :: do_resp, resp_only_prv, use_virial_prv
2499  REAL(dp) :: pref, spin_fac, t1, t2
2500  REAL(dp), DIMENSION(3, 3) :: work_virial
2501  TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
2502  TYPE(block_ind_type), ALLOCATABLE, DIMENSION(:, :) :: t_3c_der_ao_ind, t_3c_der_ri_ind
2503  TYPE(cell_type), POINTER :: cell
2504  TYPE(dbcsr_distribution_type), POINTER :: dbcsr_dist
2505  TYPE(dbcsr_type) :: dbcsr_tmp, virial_trace
2506  TYPE(dbt_type) :: rho_ao_1, rho_ao_2, t_2c_ri, t_2c_ri_tmp, t_2c_tmp, t_2c_virial, t_3c_1, &
2507  t_3c_2, t_3c_3, t_3c_4, t_3c_5, t_3c_ao_ri_ao, t_3c_help_1, t_3c_help_2, t_3c_int, &
2508  t_3c_int_2, t_3c_ri_ao_ao, t_3c_sparse, t_3c_virial, t_r, t_svs
2509  TYPE(dbt_type), ALLOCATABLE, DIMENSION(:) :: t_2c_der_metric, t_2c_der_ri, &
2510  t_3c_der_ao, t_3c_der_ri
2511  TYPE(dft_control_type), POINTER :: dft_control
2512  TYPE(gto_basis_set_p_type), ALLOCATABLE, &
2513  DIMENSION(:), TARGET :: basis_set_ao, basis_set_ri
2514  TYPE(gto_basis_set_type), POINTER :: orb_basis, ri_basis
2515  TYPE(hfx_compression_type), ALLOCATABLE, &
2516  DIMENSION(:, :) :: t_3c_der_ao_comp, t_3c_der_ri_comp
2517  TYPE(mp_para_env_type), POINTER :: para_env
2518  TYPE(neighbor_list_3c_type) :: nl_3c
2519  TYPE(neighbor_list_set_p_type), DIMENSION(:), &
2520  POINTER :: nl_2c_met, nl_2c_pot
2521  TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
2522  TYPE(qs_force_type), DIMENSION(:), POINTER :: force
2523  TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
2524  TYPE(virial_type), POINTER :: virial
2525 
2526  !The idea is the following: we need to compute the gradients
2527  ! d/dx [P_ab P_cd (acP) S^-1_PQ (Q|R) S^-1_RS (Sbd)]
2528  ! Which we do in a few steps:
2529  ! 1) Contract the density matrices with the 3c integrals: M_acS = P_ab P_cd (Sbd)
2530  ! 2) Calculate the 3c contributions: d/dx (acP) [S^-1_PQ (Q|R) S^-1_RS M_acS]
2531  ! For maximum perf, we first multiply all 2c matrices together, than contract with retain_sparsity
2532  ! 3) Contract the 3c integrals and the M tensor together in order to only work with 2c quantities:
2533  ! R_PS = (acP) M_acS
2534  ! 4) From there, we can easily calculate the 2c contributions to the force:
2535  ! Potential: [S^-1*R*S^-1]_QR d/dx (Q|R)
2536  ! Metric: [S^-1*R*S^-1*(Q|R)*S^-1]_UV d/dx S_UV
2537 
2538  NULLIFY (particle_set, virial, cell, force, atomic_kind_set, nl_2c_pot, nl_2c_met)
2539  NULLIFY (orb_basis, ri_basis, qs_kind_set, particle_set, dft_control, dbcsr_dist)
2540 
2541  use_virial_prv = .false.
2542  IF (PRESENT(use_virial)) use_virial_prv = use_virial
2543 
2544  do_resp = .false.
2545  IF (PRESENT(rho_ao_resp)) THEN
2546  IF (ASSOCIATED(rho_ao_resp(1)%matrix)) do_resp = .true.
2547  END IF
2548 
2549  resp_only_prv = .false.
2550  IF (PRESENT(resp_only)) resp_only_prv = resp_only
2551 
2552  unit_nr_dbcsr = ri_data%unit_nr_dbcsr
2553 
2554  CALL get_qs_env(qs_env, natom=natom, particle_set=particle_set, nkind=nkind, &
2555  atomic_kind_set=atomic_kind_set, virial=virial, &
2556  cell=cell, force=force, para_env=para_env, dft_control=dft_control, &
2557  qs_kind_set=qs_kind_set, dbcsr_dist=dbcsr_dist)
2558 
2559  CALL create_3c_tensor(t_3c_ao_ri_ao, dist1, dist2, dist3, ri_data%pgrid_1, &
2560  ri_data%bsizes_AO_split, ri_data%bsizes_RI_split, ri_data%bsizes_AO_split, &
2561  [1, 2], [3], name="(AO RI | AO)")
2562  DEALLOCATE (dist1, dist2, dist3)
2563 
2564  CALL create_3c_tensor(t_3c_ri_ao_ao, dist1, dist2, dist3, ri_data%pgrid_2, &
2565  ri_data%bsizes_RI_split, ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
2566  [1], [2, 3], name="(RI | AO AO)")
2567  DEALLOCATE (dist1, dist2, dist3)
2568 
2569  ALLOCATE (basis_set_ri(nkind), basis_set_ao(nkind))
2570  CALL basis_set_list_setup(basis_set_ri, ri_data%ri_basis_type, qs_kind_set)
2571  CALL get_particle_set(particle_set, qs_kind_set, basis=basis_set_ri)
2572  CALL basis_set_list_setup(basis_set_ao, ri_data%orb_basis_type, qs_kind_set)
2573  CALL get_particle_set(particle_set, qs_kind_set, basis=basis_set_ao)
2574 
2575  DO ibasis = 1, SIZE(basis_set_ao)
2576  orb_basis => basis_set_ao(ibasis)%gto_basis_set
2577  CALL init_interaction_radii_orb_basis(orb_basis, ri_data%eps_pgf_orb)
2578  ri_basis => basis_set_ri(ibasis)%gto_basis_set
2579  CALL init_interaction_radii_orb_basis(ri_basis, ri_data%eps_pgf_orb)
2580  END DO
2581 
2582  ! Precompute the derivatives
2583  ALLOCATE (t_2c_der_metric(3), t_2c_der_ri(3), t_3c_der_ao(3), t_3c_der_ri(3))
2584  IF (use_virial) THEN
2585  CALL precalc_derivatives(t_3c_der_ri_comp, t_3c_der_ao_comp, t_3c_der_ri_ind, t_3c_der_ao_ind, &
2586  t_2c_der_ri, t_2c_der_metric, t_3c_ri_ao_ao, &
2587  basis_set_ao, basis_set_ri, ri_data, qs_env, &
2588  nl_2c_pot=nl_2c_pot, nl_2c_met=nl_2c_met, &
2589  nl_3c_out=nl_3c, t_3c_virial=t_3c_virial)
2590 
2591  ALLOCATE (col_bsize(natom), row_bsize(natom))
2592  col_bsize(:) = ri_data%bsizes_RI
2593  row_bsize(:) = ri_data%bsizes_RI
2594  CALL dbcsr_create(virial_trace, "virial_trace", dbcsr_dist, dbcsr_type_no_symmetry, row_bsize, col_bsize)
2595  CALL dbt_create(virial_trace, t_2c_virial)
2596  DEALLOCATE (col_bsize, row_bsize)
2597  ELSE
2598  CALL precalc_derivatives(t_3c_der_ri_comp, t_3c_der_ao_comp, t_3c_der_ri_ind, t_3c_der_ao_ind, &
2599  t_2c_der_ri, t_2c_der_metric, t_3c_ri_ao_ao, &
2600  basis_set_ao, basis_set_ri, ri_data, qs_env)
2601  END IF
2602 
2603  ! Keep track of derivative sparsity to be able to use retain_sparsity in contraction
2604  CALL dbt_create(t_3c_ri_ao_ao, t_3c_sparse)
2605  DO i_xyz = 1, 3
2606  DO i_mem = 1, SIZE(t_3c_der_ri_comp, 1)
2607  CALL decompress_tensor(t_3c_ri_ao_ao, t_3c_der_ri_ind(i_mem, i_xyz)%ind, &
2608  t_3c_der_ri_comp(i_mem, i_xyz), ri_data%filter_eps_storage)
2609  CALL dbt_copy(t_3c_ri_ao_ao, t_3c_sparse, summation=.true., move_data=.true.)
2610 
2611  CALL decompress_tensor(t_3c_ri_ao_ao, t_3c_der_ao_ind(i_mem, i_xyz)%ind, &
2612  t_3c_der_ao_comp(i_mem, i_xyz), ri_data%filter_eps_storage)
2613  CALL dbt_copy(t_3c_ri_ao_ao, t_3c_sparse, summation=.true.)
2614  CALL dbt_copy(t_3c_ri_ao_ao, t_3c_sparse, order=[1, 3, 2], summation=.true., move_data=.true.)
2615  END DO
2616  END DO
2617 
2618  DO i_xyz = 1, 3
2619  CALL dbt_create(t_3c_ri_ao_ao, t_3c_der_ri(i_xyz))
2620  CALL dbt_create(t_3c_ri_ao_ao, t_3c_der_ao(i_xyz))
2621  END DO
2622 
2623  ! Some utilities
2624  spin_fac = 0.5_dp
2625  IF (nspins == 2) spin_fac = 1.0_dp
2626  IF (PRESENT(rescale_factor)) spin_fac = spin_fac*rescale_factor
2627 
2628  ALLOCATE (idx_to_at_ri(SIZE(ri_data%bsizes_RI_split)))
2629  CALL get_idx_to_atom(idx_to_at_ri, ri_data%bsizes_RI_split, ri_data%bsizes_RI)
2630 
2631  ALLOCATE (idx_to_at_ao(SIZE(ri_data%bsizes_AO_split)))
2632  CALL get_idx_to_atom(idx_to_at_ao, ri_data%bsizes_AO_split, ri_data%bsizes_AO)
2633 
2634  CALL get_atomic_kind_set(atomic_kind_set, kind_of=kind_of, atom_of_kind=atom_of_kind)
2635 
2636  ! Go over batches of the 2 AO indices to save memory
2637  n_mem = ri_data%n_mem
2638  ALLOCATE (batch_start(n_mem), batch_end(n_mem))
2639  batch_start(:) = ri_data%starts_array_mem(:)
2640  batch_end(:) = ri_data%ends_array_mem(:)
2641 
2642  ALLOCATE (batch_ranges(n_mem + 1))
2643  batch_ranges(:n_mem) = ri_data%starts_array_mem_block(:)
2644  batch_ranges(n_mem + 1) = ri_data%ends_array_mem_block(n_mem) + 1
2645 
2646  n_mem_ri = ri_data%n_mem_RI
2647  ALLOCATE (batch_start_ri(n_mem_ri), batch_end_ri(n_mem_ri))
2648  batch_start_ri(:) = ri_data%starts_array_RI_mem(:)
2649  batch_end_ri(:) = ri_data%ends_array_RI_mem(:)
2650 
2651  ALLOCATE (batch_ranges_ri(n_mem_ri + 1))
2652  batch_ranges_ri(:n_mem_ri) = ri_data%starts_array_RI_mem_block(:)
2653  batch_ranges_ri(n_mem_ri + 1) = ri_data%ends_array_RI_mem_block(n_mem_ri) + 1
2654 
2655  ! Pre-create all the needed tensors
2656  CALL create_2c_tensor(rho_ao_1, dist1, dist2, ri_data%pgrid_2d, &
2657  ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
2658  name="(AO | AO)")
2659  DEALLOCATE (dist1, dist2)
2660  CALL dbt_create(rho_ao_1, rho_ao_2)
2661 
2662  CALL create_2c_tensor(t_2c_ri, dist1, dist2, ri_data%pgrid_2d, &
2663  ri_data%bsizes_RI_split, ri_data%bsizes_RI_split, name="(RI | RI)")
2664  DEALLOCATE (dist1, dist2)
2665  CALL dbt_create(t_2c_ri, t_svs)
2666  CALL dbt_create(t_2c_ri, t_r)
2667  CALL dbt_create(t_2c_ri, t_2c_ri_tmp)
2668 
2669  CALL dbt_create(t_3c_ao_ri_ao, t_3c_1)
2670  CALL dbt_create(t_3c_ao_ri_ao, t_3c_2)
2671  CALL dbt_create(t_3c_ri_ao_ao, t_3c_3)
2672  CALL dbt_create(t_3c_ri_ao_ao, t_3c_4)
2673  CALL dbt_create(t_3c_ri_ao_ao, t_3c_5)
2674  CALL dbt_create(t_3c_ri_ao_ao, t_3c_help_1)
2675  CALL dbt_create(t_3c_ri_ao_ao, t_3c_help_2)
2676 
2677  CALL dbt_create(t_3c_ao_ri_ao, t_3c_int)
2678  CALL dbt_copy(ri_data%t_3c_int_ctr_2(1, 1), t_3c_int)
2679 
2680  CALL dbt_create(t_3c_ri_ao_ao, t_3c_int_2)
2681 
2682  CALL para_env%sync()
2683  t1 = m_walltime()
2684 
2685  !Pre-calculate the necessary 2-center quantities
2686  IF (.NOT. ri_data%same_op) THEN
2687  !S^-1 * V * S^-1
2688  CALL dbt_contract(1.0_dp, ri_data%t_2c_inv(1, 1), ri_data%t_2c_pot(1, 1), 0.0_dp, t_2c_ri, &
2689  contract_1=[2], notcontract_1=[1], &
2690  contract_2=[1], notcontract_2=[2], &
2691  map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
2692  unit_nr=unit_nr_dbcsr, flop=nflop)
2693  ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2694 
2695  CALL dbt_contract(1.0_dp, t_2c_ri, ri_data%t_2c_inv(1, 1), 0.0_dp, t_svs, &
2696  contract_1=[2], notcontract_1=[1], &
2697  contract_2=[1], notcontract_2=[2], &
2698  map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
2699  unit_nr=unit_nr_dbcsr, flop=nflop)
2700  ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2701  ELSE
2702  ! Simply V^-1
2703  CALL dbt_copy(ri_data%t_2c_inv(1, 1), t_svs)
2704  END IF
2705 
2706  CALL dbt_batched_contract_init(t_3c_int, batch_range_1=batch_ranges, batch_range_3=batch_ranges)
2707  CALL dbt_batched_contract_init(t_3c_int_2, batch_range_1=batch_ranges_ri, &
2708  batch_range_2=batch_ranges, batch_range_3=batch_ranges)
2709  CALL dbt_batched_contract_init(t_3c_1, batch_range_1=batch_ranges, batch_range_3=batch_ranges)
2710  CALL dbt_batched_contract_init(t_3c_2, batch_range_1=batch_ranges, batch_range_3=batch_ranges)
2711  CALL dbt_batched_contract_init(t_3c_3, batch_range_1=batch_ranges_ri, &
2712  batch_range_2=batch_ranges, batch_range_3=batch_ranges)
2713  CALL dbt_batched_contract_init(t_3c_4, batch_range_1=batch_ranges_ri, &
2714  batch_range_2=batch_ranges, batch_range_3=batch_ranges)
2715  CALL dbt_batched_contract_init(t_3c_5, batch_range_1=batch_ranges_ri, &
2716  batch_range_2=batch_ranges, batch_range_3=batch_ranges)
2717  CALL dbt_batched_contract_init(t_3c_sparse, batch_range_1=batch_ranges_ri, &
2718  batch_range_2=batch_ranges, batch_range_3=batch_ranges)
2719 
2720  DO i_spin = 1, nspins
2721 
2722  !Prepare Pmat in tensor format
2723  CALL dbt_create(rho_ao(i_spin, 1)%matrix, t_2c_tmp)
2724  CALL dbt_copy_matrix_to_tensor(rho_ao(i_spin, 1)%matrix, t_2c_tmp)
2725  CALL dbt_copy(t_2c_tmp, rho_ao_1, move_data=.true.)
2726  CALL dbt_destroy(t_2c_tmp)
2727 
2728  IF (.NOT. do_resp) THEN
2729  CALL dbt_copy(rho_ao_1, rho_ao_2)
2730  ELSE IF (do_resp .AND. resp_only_prv) THEN
2731 
2732  CALL dbt_create(rho_ao_resp(i_spin)%matrix, t_2c_tmp)
2733  CALL dbt_copy_matrix_to_tensor(rho_ao_resp(i_spin)%matrix, t_2c_tmp)
2734  CALL dbt_copy(t_2c_tmp, rho_ao_2)
2735  !symmetry allows to take 2*P_resp rasther than explicitely take all cross products
2736  CALL dbt_copy(t_2c_tmp, rho_ao_2, summation=.true., move_data=.true.)
2737  CALL dbt_destroy(t_2c_tmp)
2738  ELSE
2739 
2740  !if not resp_only, need P-P_resp and P+P_resp
2741  CALL dbt_copy(rho_ao_1, rho_ao_2)
2742  CALL dbcsr_create(dbcsr_tmp, template=rho_ao_resp(i_spin)%matrix)
2743  CALL dbcsr_add(dbcsr_tmp, rho_ao_resp(i_spin)%matrix, 0.0_dp, -1.0_dp)
2744  CALL dbt_create(dbcsr_tmp, t_2c_tmp)
2745  CALL dbt_copy_matrix_to_tensor(dbcsr_tmp, t_2c_tmp)
2746  CALL dbt_copy(t_2c_tmp, rho_ao_1, summation=.true., move_data=.true.)
2747  CALL dbcsr_release(dbcsr_tmp)
2748 
2749  CALL dbt_copy_matrix_to_tensor(rho_ao_resp(i_spin)%matrix, t_2c_tmp)
2750  CALL dbt_copy(t_2c_tmp, rho_ao_2, summation=.true., move_data=.true.)
2751  CALL dbt_destroy(t_2c_tmp)
2752 
2753  END IF
2754  work_virial = 0.0_dp
2755 
2756  CALL timeset(routinen//"_3c", handle)
2757  !Start looping of the batches
2758  DO i_mem = 1, n_mem
2759  ibounds(:, 1) = [batch_start(i_mem), batch_end(i_mem)]
2760 
2761  CALL dbt_batched_contract_init(rho_ao_1)
2762  CALL dbt_contract(1.0_dp, rho_ao_1, t_3c_int, 0.0_dp, t_3c_1, &
2763  contract_1=[1], notcontract_1=[2], &
2764  contract_2=[3], notcontract_2=[1, 2], &
2765  map_1=[3], map_2=[1, 2], filter_eps=ri_data%filter_eps, &
2766  bounds_2=ibounds, unit_nr=unit_nr_dbcsr, flop=nflop)
2767  ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2768  CALL dbt_batched_contract_finalize(rho_ao_1)
2769 
2770  CALL dbt_copy(t_3c_1, t_3c_2, order=[3, 2, 1], move_data=.true.)
2771 
2772  DO j_mem = 1, n_mem
2773  jbounds(:, 1) = [batch_start(j_mem), batch_end(j_mem)]
2774 
2775  CALL dbt_batched_contract_init(rho_ao_2)
2776  CALL dbt_contract(1.0_dp, rho_ao_2, t_3c_2, 0.0_dp, t_3c_1, &
2777  contract_1=[1], notcontract_1=[2], &
2778  contract_2=[3], notcontract_2=[1, 2], &
2779  map_1=[3], map_2=[1, 2], filter_eps=ri_data%filter_eps, &
2780  bounds_2=jbounds, unit_nr=unit_nr_dbcsr, flop=nflop)
2781  ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2782  CALL dbt_batched_contract_finalize(rho_ao_2)
2783 
2784  bounds_cpy(:, 1) = [batch_start(i_mem), batch_end(i_mem)]
2785  bounds_cpy(:, 2) = [1, sum(ri_data%bsizes_RI)]
2786  bounds_cpy(:, 3) = [batch_start(j_mem), batch_end(j_mem)]
2787  CALL dbt_copy(t_3c_int, t_3c_int_2, order=[2, 1, 3], bounds=bounds_cpy)
2788  CALL dbt_copy(t_3c_1, t_3c_3, order=[2, 1, 3], move_data=.true.)
2789 
2790  DO k_mem = 1, n_mem_ri
2791  kbounds(:, 1) = [batch_start_ri(k_mem), batch_end_ri(k_mem)]
2792 
2793  bounds_cpy(:, 1) = [batch_start_ri(k_mem), batch_end_ri(k_mem)]
2794  bounds_cpy(:, 2) = [batch_start(i_mem), batch_end(i_mem)]
2795  bounds_cpy(:, 3) = [batch_start(j_mem), batch_end(j_mem)]
2796  CALL dbt_copy(t_3c_sparse, t_3c_4, bounds=bounds_cpy)
2797 
2798  !Contract with the 2-center product S^-1 * V * S^-1 while keeping sparsity of derivatives
2799  CALL dbt_batched_contract_init(t_svs)
2800  CALL dbt_contract(1.0_dp, t_svs, t_3c_3, 0.0_dp, t_3c_4, &
2801  contract_1=[2], notcontract_1=[1], &
2802  contract_2=[1], notcontract_2=[2, 3], &
2803  map_1=[1], map_2=[2, 3], filter_eps=ri_data%filter_eps, &
2804  retain_sparsity=.true., unit_nr=unit_nr_dbcsr, flop=nflop)
2805  ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2806  CALL dbt_batched_contract_finalize(t_svs)
2807 
2808  CALL dbt_copy(t_3c_4, t_3c_5, summation=.true., move_data=.true.)
2809 
2810  ijbounds(:, 1) = ibounds(:, 1)
2811  ijbounds(:, 2) = jbounds(:, 1)
2812 
2813  !Contract R_PS = (acP) M_acS
2814  CALL dbt_batched_contract_init(t_r)
2815  CALL dbt_contract(1.0_dp, t_3c_int_2, t_3c_3, 1.0_dp, t_r, &
2816  contract_1=[2, 3], notcontract_1=[1], &
2817  contract_2=[2, 3], notcontract_2=[1], &
2818  map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
2819  bounds_1=ijbounds, bounds_3=kbounds, &
2820  unit_nr=unit_nr_dbcsr, flop=nflop)
2821  ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2822  CALL dbt_batched_contract_finalize(t_r)
2823 
2824  END DO !k_mem
2825  END DO !j_mem
2826 
2827  CALL dbt_copy(t_3c_5, t_3c_help_1, move_data=.true.)
2828 
2829  !The force from the 3c derivatives
2830  pref = -0.5_dp*2.0_dp*hf_fraction*spin_fac
2831 
2832  DO k_mem = 1, SIZE(t_3c_der_ri_comp, 1)
2833  DO i_xyz = 1, 3
2834  CALL dbt_clear(t_3c_der_ri(i_xyz))
2835  CALL decompress_tensor(t_3c_der_ri(i_xyz), t_3c_der_ri_ind(k_mem, i_xyz)%ind, &
2836  t_3c_der_ri_comp(k_mem, i_xyz), ri_data%filter_eps_storage)
2837  END DO
2838  CALL get_force_from_3c_trace(force, t_3c_help_1, t_3c_der_ri, atom_of_kind, kind_of, &
2839  idx_to_at_ri, pref)
2840  END DO
2841 
2842  pref = -0.5_dp*4.0_dp*hf_fraction*spin_fac
2843  IF (do_resp) THEN
2844  pref = 0.5_dp*pref
2845  CALL dbt_copy(t_3c_help_1, t_3c_help_2, order=[1, 3, 2])
2846  END IF
2847 
2848  DO k_mem = 1, SIZE(t_3c_der_ao_comp, 1)
2849  DO i_xyz = 1, 3
2850  CALL dbt_clear(t_3c_der_ao(i_xyz))
2851  CALL decompress_tensor(t_3c_der_ao(i_xyz), t_3c_der_ao_ind(k_mem, i_xyz)%ind, &
2852  t_3c_der_ao_comp(k_mem, i_xyz), ri_data%filter_eps_storage)
2853  END DO
2854  CALL get_force_from_3c_trace(force, t_3c_help_1, t_3c_der_ao, atom_of_kind, kind_of, &
2855  idx_to_at_ao, pref, deriv_dim=2)
2856 
2857  IF (do_resp) THEN
2858  CALL get_force_from_3c_trace(force, t_3c_help_2, t_3c_der_ao, atom_of_kind, kind_of, &
2859  idx_to_at_ao, pref, deriv_dim=2)
2860  END IF
2861  END DO
2862 
2863  !The 3c virial contribution. Note: only fraction of integrals correspondig to i_mem calculated
2864  IF (use_virial) THEN
2865  pref = -0.5_dp*2.0_dp*hf_fraction*spin_fac
2866  CALL dbt_copy(t_3c_help_1, t_3c_virial, move_data=.true.)
2867  CALL calc_3c_virial(work_virial, t_3c_virial, pref, qs_env, nl_3c, basis_set_ri, &
2868  basis_set_ao, basis_set_ao, ri_data%ri_metric, &
2869  der_eps=ri_data%eps_schwarz_forces, op_pos=1)
2870 
2871  CALL dbt_clear(t_3c_virial)
2872  END IF
2873 
2874  CALL dbt_clear(t_3c_help_1)
2875  CALL dbt_clear(t_3c_help_2)
2876  END DO !i_mem
2877  CALL timestop(handle)
2878 
2879  CALL timeset(routinen//"_2c", handle)
2880  !Now we deal with all the 2-center quantities
2881  !Calculate S^-1 * R * S^-1
2882  CALL dbt_contract(1.0_dp, ri_data%t_2c_inv(1, 1), t_r, 0.0_dp, t_2c_ri_tmp, &
2883  contract_1=[2], notcontract_1=[1], &
2884  contract_2=[1], notcontract_2=[2], &
2885  map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
2886  unit_nr=unit_nr_dbcsr, flop=nflop)
2887  ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2888 
2889  CALL dbt_contract(1.0_dp, t_2c_ri_tmp, ri_data%t_2c_inv(1, 1), 0.0_dp, t_2c_ri, &
2890  contract_1=[2], notcontract_1=[1], &
2891  contract_2=[1], notcontract_2=[2], &
2892  map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
2893  unit_nr=unit_nr_dbcsr, flop=nflop)
2894  ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2895 
2896  !Calculate the potential contribution to the force: [S^-1*R*S^-1]_QR d/dx (Q|R)
2897  pref = 0.5_dp*hf_fraction*spin_fac
2898  IF (.NOT. ri_data%same_op) pref = -pref
2899  CALL get_2c_der_force(force, t_2c_ri, t_2c_der_ri, atom_of_kind, kind_of, idx_to_at_ri, pref)
2900 
2901  !Calculate the contribution to the virial on the fly
2902  IF (use_virial_prv) THEN
2903  CALL dbt_copy(t_2c_ri, t_2c_virial)
2904  CALL dbt_copy_tensor_to_matrix(t_2c_virial, virial_trace)
2905  CALL calc_2c_virial(work_virial, virial_trace, pref, qs_env, nl_2c_pot, &
2906  basis_set_ri, basis_set_ri, ri_data%hfx_pot)
2907  END IF
2908 
2909  !And that from the metric: [S^-1*R*S^-1*(Q|R)*S^-1]_UV d/dx S_UV
2910  IF (.NOT. ri_data%same_op) THEN
2911  CALL dbt_contract(1.0_dp, t_2c_ri, ri_data%t_2c_pot(1, 1), 0.0_dp, t_2c_ri_tmp, &
2912  contract_1=[2], notcontract_1=[1], &
2913  contract_2=[1], notcontract_2=[2], &
2914  map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
2915  unit_nr=unit_nr_dbcsr, flop=nflop)
2916  ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2917 
2918  CALL dbt_contract(1.0_dp, t_2c_ri_tmp, ri_data%t_2c_inv(1, 1), 0.0_dp, t_2c_ri, &
2919  contract_1=[2], notcontract_1=[1], &
2920  contract_2=[1], notcontract_2=[2], &
2921  map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
2922  unit_nr=unit_nr_dbcsr, flop=nflop)
2923  ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2924 
2925  pref = 0.5_dp*2.0_dp*hf_fraction*spin_fac
2926  CALL get_2c_der_force(force, t_2c_ri, t_2c_der_metric, atom_of_kind, kind_of, idx_to_at_ri, pref)
2927 
2928  IF (use_virial_prv) THEN
2929  CALL dbt_copy(t_2c_ri, t_2c_virial)
2930  CALL dbt_copy_tensor_to_matrix(t_2c_virial, virial_trace)
2931  CALL calc_2c_virial(work_virial, virial_trace, pref, qs_env, nl_2c_met, &
2932  basis_set_ri, basis_set_ri, ri_data%ri_metric)
2933  END IF
2934  END IF
2935  CALL dbt_clear(t_2c_ri)
2936  CALL dbt_clear(t_2c_ri_tmp)
2937  CALL dbt_clear(t_r)
2938  CALL dbt_clear(t_3c_help_1)
2939  CALL dbt_clear(t_3c_help_2)
2940  CALL timestop(handle)
2941 
2942  IF (use_virial_prv) THEN
2943  DO k_xyz = 1, 3
2944  DO j_xyz = 1, 3
2945  DO i_xyz = 1, 3
2946  virial%pv_fock_4c(i_xyz, j_xyz) = virial%pv_fock_4c(i_xyz, j_xyz) &
2947  + work_virial(i_xyz, k_xyz)*cell%hmat(j_xyz, k_xyz)
2948  END DO
2949  END DO
2950  END DO
2951  END IF
2952  END DO !i_spin
2953 
2954  CALL dbt_batched_contract_finalize(t_3c_int)
2955  CALL dbt_batched_contract_finalize(t_3c_int_2)
2956  CALL dbt_batched_contract_finalize(t_3c_1)
2957  CALL dbt_batched_contract_finalize(t_3c_2)
2958  CALL dbt_batched_contract_finalize(t_3c_3)
2959  CALL dbt_batched_contract_finalize(t_3c_4)
2960  CALL dbt_batched_contract_finalize(t_3c_5)
2961  CALL dbt_batched_contract_finalize(t_3c_sparse)
2962 
2963  CALL para_env%sync()
2964  t2 = m_walltime()
2965 
2966  CALL dbt_copy(t_3c_int, ri_data%t_3c_int_ctr_2(1, 1), move_data=.true.)
2967 
2968  !clean-up
2969  CALL dbt_destroy(rho_ao_1)
2970  CALL dbt_destroy(rho_ao_2)
2971  CALL dbt_destroy(t_3c_ao_ri_ao)
2972  CALL dbt_destroy(t_3c_ri_ao_ao)
2973  CALL dbt_destroy(t_3c_int)
2974  CALL dbt_destroy(t_3c_int_2)
2975  CALL dbt_destroy(t_3c_1)
2976  CALL dbt_destroy(t_3c_2)
2977  CALL dbt_destroy(t_3c_3)
2978  CALL dbt_destroy(t_3c_4)
2979  CALL dbt_destroy(t_3c_5)
2980  CALL dbt_destroy(t_3c_help_1)
2981  CALL dbt_destroy(t_3c_help_2)
2982  CALL dbt_destroy(t_3c_sparse)
2983  CALL dbt_destroy(t_svs)
2984  CALL dbt_destroy(t_r)
2985  CALL dbt_destroy(t_2c_ri)
2986  CALL dbt_destroy(t_2c_ri_tmp)
2987 
2988  DO i_xyz = 1, 3
2989  CALL dbt_destroy(t_3c_der_ri(i_xyz))
2990  CALL dbt_destroy(t_3c_der_ao(i_xyz))
2991  CALL dbt_destroy(t_2c_der_ri(i_xyz))
2992  IF (.NOT. ri_data%same_op) CALL dbt_destroy(t_2c_der_metric(i_xyz))
2993  END DO
2994 
2995  DO i_xyz = 1, 3
2996  DO i_mem = 1, SIZE(t_3c_der_ao_comp, 1)
2997  CALL dealloc_containers(t_3c_der_ao_comp(i_mem, i_xyz), dummy_int)
2998  CALL dealloc_containers(t_3c_der_ri_comp(i_mem, i_xyz), dummy_int)
2999  END DO
3000  END DO
3001  DEALLOCATE (t_3c_der_ao_ind, t_3c_der_ri_ind)
3002 
3003  DO ibasis = 1, SIZE(basis_set_ao)
3004  orb_basis => basis_set_ao(ibasis)%gto_basis_set
3005  ri_basis => basis_set_ri(ibasis)%gto_basis_set
3006  CALL init_interaction_radii_orb_basis(orb_basis, dft_control%qs_control%eps_pgf_orb)
3007  CALL init_interaction_radii_orb_basis(ri_basis, dft_control%qs_control%eps_pgf_orb)
3008  END DO
3009 
3010  IF (use_virial) THEN
3011  CALL release_neighbor_list_sets(nl_2c_met)
3012  CALL release_neighbor_list_sets(nl_2c_pot)
3013  CALL neighbor_list_3c_destroy(nl_3c)
3014  CALL dbcsr_release(virial_trace)
3015  CALL dbt_destroy(t_2c_virial)
3016  CALL dbt_destroy(t_3c_virial)
3017  END IF
3018 
3019  END SUBROUTINE hfx_ri_forces_pmat
3020 
3021 ! **************************************************************************************************
3022 !> \brief the general routine that calls the relevant force code
3023 !> \param qs_env ...
3024 !> \param ri_data ...
3025 !> \param nspins ...
3026 !> \param hf_fraction ...
3027 !> \param rho_ao ...
3028 !> \param rho_ao_resp ...
3029 !> \param mos ...
3030 !> \param use_virial ...
3031 !> \param resp_only ...
3032 !> \param rescale_factor ...
3033 ! **************************************************************************************************
3034  SUBROUTINE hfx_ri_update_forces(qs_env, ri_data, nspins, hf_fraction, rho_ao, rho_ao_resp, &
3035  mos, use_virial, resp_only, rescale_factor)
3036 
3037  TYPE(qs_environment_type), POINTER :: qs_env
3038  TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
3039  INTEGER, INTENT(IN) :: nspins
3040  REAL(kind=dp), INTENT(IN) :: hf_fraction
3041  TYPE(dbcsr_p_type), DIMENSION(:, :), OPTIONAL :: rho_ao
3042  TYPE(dbcsr_p_type), DIMENSION(:), OPTIONAL :: rho_ao_resp
3043  TYPE(mo_set_type), DIMENSION(:), INTENT(IN), &
3044  OPTIONAL :: mos
3045  LOGICAL, INTENT(IN), OPTIONAL :: use_virial, resp_only
3046  REAL(dp), INTENT(IN), OPTIONAL :: rescale_factor
3047 
3048  CHARACTER(LEN=*), PARAMETER :: routinen = 'hfx_ri_update_forces'
3049 
3050  INTEGER :: handle, ispin
3051  INTEGER, DIMENSION(2) :: homo
3052  REAL(kind=dp), DIMENSION(:), POINTER :: mo_eigenvalues
3053  TYPE(cp_fm_type), POINTER :: mo_coeff
3054  TYPE(dbcsr_type), DIMENSION(2) :: mo_coeff_b
3055  TYPE(dbcsr_type), POINTER :: mo_coeff_b_tmp
3056 
3057  CALL timeset(routinen, handle)
3058 
3059  SELECT CASE (ri_data%flavor)
3060  CASE (ri_mo)
3061 
3062  DO ispin = 1, nspins
3063  NULLIFY (mo_coeff_b_tmp)
3064  cpassert(mos(ispin)%uniform_occupation)
3065  CALL get_mo_set(mo_set=mos(ispin), mo_coeff=mo_coeff, eigenvalues=mo_eigenvalues, mo_coeff_b=mo_coeff_b_tmp)
3066 
3067  IF (.NOT. mos(ispin)%use_mo_coeff_b) CALL copy_fm_to_dbcsr(mo_coeff, mo_coeff_b_tmp)
3068  CALL dbcsr_copy(mo_coeff_b(ispin), mo_coeff_b_tmp)
3069  END DO
3070 
3071  DO ispin = 1, nspins
3072  CALL dbcsr_scale(mo_coeff_b(ispin), sqrt(mos(ispin)%maxocc))
3073  homo(ispin) = mos(ispin)%homo
3074  END DO
3075 
3076  CALL hfx_ri_forces_mo(qs_env, ri_data, nspins, hf_fraction, mo_coeff_b, use_virial)
3077 
3078  CASE (ri_pmat)
3079 
3080  CALL hfx_ri_forces_pmat(qs_env, ri_data, nspins, hf_fraction, rho_ao, rho_ao_resp, use_virial, &
3081  resp_only, rescale_factor)
3082  END SELECT
3083 
3084  DO ispin = 1, nspins
3085  CALL dbcsr_release(mo_coeff_b(ispin))
3086  END DO
3087 
3088  CALL timestop(handle)
3089 
3090  END SUBROUTINE hfx_ri_update_forces
3091 
3092 ! **************************************************************************************************
3093 !> \brief Calculate the derivatives tensors for the force, in a format fit for contractions
3094 !> \param t_3c_der_RI_comp compressed RI derivatives
3095 !> \param t_3c_der_AO_comp compressed AO derivatives
3096 !> \param t_3c_der_RI_ind ...
3097 !> \param t_3c_der_AO_ind ...
3098 !> \param t_2c_der_RI format based on standard atomic block sizes
3099 !> \param t_2c_der_metric format based on standard atomic block sizes
3100 !> \param ri_ao_ao_template ...
3101 !> \param basis_set_AO ...
3102 !> \param basis_set_RI ...
3103 !> \param ri_data ...
3104 !> \param qs_env ...
3105 !> \param nl_2c_pot ...
3106 !> \param nl_2c_met ...
3107 !> \param nl_3c_out ...
3108 !> \param t_3c_virial ...
3109 ! **************************************************************************************************
3110  SUBROUTINE precalc_derivatives(t_3c_der_RI_comp, t_3c_der_AO_comp, t_3c_der_RI_ind, t_3c_der_AO_ind, &
3111  t_2c_der_RI, t_2c_der_metric, ri_ao_ao_template, &
3112  basis_set_AO, basis_set_RI, ri_data, qs_env, &
3113  nl_2c_pot, nl_2c_met, nl_3c_out, t_3c_virial)
3114 
3115  TYPE(hfx_compression_type), ALLOCATABLE, &
3116  DIMENSION(:, :), INTENT(INOUT) :: t_3c_der_ri_comp, t_3c_der_ao_comp
3117  TYPE(block_ind_type), ALLOCATABLE, DIMENSION(:, :) :: t_3c_der_ri_ind, t_3c_der_ao_ind
3118  TYPE(dbt_type), DIMENSION(3), INTENT(OUT) :: t_2c_der_ri, t_2c_der_metric
3119  TYPE(dbt_type), INTENT(INOUT) :: ri_ao_ao_template
3120  TYPE(gto_basis_set_p_type), ALLOCATABLE, &
3121  DIMENSION(:), TARGET :: basis_set_ao, basis_set_ri
3122  TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
3123  TYPE(qs_environment_type), POINTER :: qs_env
3124  TYPE(neighbor_list_set_p_type), DIMENSION(:), &
3125  OPTIONAL, POINTER :: nl_2c_pot, nl_2c_met
3126  TYPE(neighbor_list_3c_type), OPTIONAL :: nl_3c_out
3127  TYPE(dbt_type), INTENT(INOUT), OPTIONAL :: t_3c_virial
3128 
3129  CHARACTER(LEN=*), PARAMETER :: routinen = 'precalc_derivatives'
3130 
3131  INTEGER :: handle, i_mem, i_xyz, n_mem, natom, &
3132  nkind, nthreads
3133  INTEGER(int_8) :: nze, nze_tot
3134  INTEGER, ALLOCATABLE, DIMENSION(:) :: dist1, dist2, dist_ao_1, dist_ao_2, &
3135  dist_ri, dummy_end, dummy_start, &
3136  end_blocks, start_blocks
3137  INTEGER, DIMENSION(3) :: pcoord, pdims
3138  INTEGER, DIMENSION(:), POINTER :: col_bsize, row_bsize
3139  REAL(dp) :: compression_factor, memory, occ
3140  TYPE(dbcsr_distribution_type) :: dbcsr_dist
3141  TYPE(dbcsr_type), DIMENSION(1, 3) :: t_2c_der_metric_prv, t_2c_der_ri_prv
3142  TYPE(dbt_pgrid_type) :: pgrid
3143  TYPE(dbt_type) :: t_2c_template, t_2c_tmp, t_3c_template
3144  TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :, :) :: t_3c_der_ao_prv, t_3c_der_ri_prv
3145  TYPE(dft_control_type), POINTER :: dft_control
3146  TYPE(distribution_2d_type), POINTER :: dist_2d
3147  TYPE(distribution_3d_type) :: dist_3d, dist_3d_out
3148  TYPE(mp_cart_type) :: mp_comm_t3c, mp_comm_t3c_out
3149  TYPE(mp_para_env_type), POINTER :: para_env
3150  TYPE(neighbor_list_3c_type) :: nl_3c
3151  TYPE(neighbor_list_set_p_type), DIMENSION(:), &
3152  POINTER :: nl_2c
3153  TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
3154  TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
3155 
3156  NULLIFY (qs_kind_set, dist_2d, nl_2c, particle_set, dft_control, para_env)
3157 
3158  CALL timeset(routinen, handle)
3159 
3160  CALL get_qs_env(qs_env, nkind=nkind, qs_kind_set=qs_kind_set, distribution_2d=dist_2d, natom=natom, &
3161  particle_set=particle_set, dft_control=dft_control, para_env=para_env)
3162 
3163  !TODO: is such a pgrid correct?
3164  !Dealing with the 3c derivatives
3165  nthreads = 1
3166 !$ nthreads = omp_get_num_threads()
3167  pdims = 0
3168  CALL dbt_pgrid_create(para_env, pdims, pgrid, tensor_dims=[max(1, natom/(ri_data%n_mem*nthreads)), natom, natom])
3169 
3170  CALL create_3c_tensor(t_3c_template, dist_ri, dist_ao_1, dist_ao_2, pgrid, &
3171  ri_data%bsizes_RI, ri_data%bsizes_AO, ri_data%bsizes_AO, &
3172  map1=[1], map2=[2, 3], &
3173  name="der (RI AO | AO)")
3174 
3175  ALLOCATE (t_3c_der_ao_prv(1, 1, 3), t_3c_der_ri_prv(1, 1, 3))
3176  DO i_xyz = 1, 3
3177  CALL dbt_create(t_3c_template, t_3c_der_ri_prv(1, 1, i_xyz))
3178  CALL dbt_create(t_3c_template, t_3c_der_ao_prv(1, 1, i_xyz))
3179  END DO
3180  IF (PRESENT(t_3c_virial)) THEN
3181  CALL dbt_create(t_3c_template, t_3c_virial)
3182  END IF
3183  CALL dbt_destroy(t_3c_template)
3184 
3185  CALL dbt_mp_environ_pgrid(pgrid, pdims, pcoord)
3186  CALL mp_comm_t3c%create(pgrid%mp_comm_2d, 3, pdims)
3187  CALL distribution_3d_create(dist_3d, dist_ri, dist_ao_1, dist_ao_2, &
3188  nkind, particle_set, mp_comm_t3c, own_comm=.true.)
3189 
3190  CALL build_3c_neighbor_lists(nl_3c, basis_set_ri, basis_set_ao, basis_set_ao, dist_3d, ri_data%ri_metric, &
3191  "HFX_3c_nl", qs_env, op_pos=1, sym_jk=.true., own_dist=.true.)
3192 
3193  IF (PRESENT(nl_3c_out)) THEN
3194  CALL dbt_mp_environ_pgrid(pgrid, pdims, pcoord)
3195  CALL mp_comm_t3c_out%create(pgrid%mp_comm_2d, 3, pdims)
3196  CALL distribution_3d_create(dist_3d_out, dist_ri, dist_ao_1, dist_ao_2, &
3197  nkind, particle_set, mp_comm_t3c_out, own_comm=.true.)
3198  CALL build_3c_neighbor_lists(nl_3c_out, basis_set_ri, basis_set_ao, basis_set_ao, dist_3d_out, &
3199  ri_data%ri_metric, "HFX_3c_nl", qs_env, op_pos=1, sym_jk=.false., &
3200  own_dist=.true.)
3201  END IF
3202  DEALLOCATE (dist_ri, dist_ao_1, dist_ao_2)
3203  CALL dbt_pgrid_destroy(pgrid)
3204 
3205  n_mem = ri_data%n_mem
3206  CALL create_tensor_batches(ri_data%bsizes_RI, n_mem, dummy_start, dummy_end, &
3207  start_blocks, end_blocks)
3208  DEALLOCATE (dummy_start, dummy_end)
3209 
3210  ALLOCATE (t_3c_der_ao_comp(n_mem, 3), t_3c_der_ri_comp(n_mem, 3))
3211  ALLOCATE (t_3c_der_ao_ind(n_mem, 3), t_3c_der_ri_ind(n_mem, 3))
3212 
3213  memory = 0.0_dp
3214  nze_tot = 0
3215  DO i_mem = 1, n_mem
3216  CALL build_3c_derivatives(t_3c_der_ri_prv, t_3c_der_ao_prv, ri_data%filter_eps, qs_env, &
3217  nl_3c, basis_set_ri, basis_set_ao, basis_set_ao, &
3218  ri_data%ri_metric, der_eps=ri_data%eps_schwarz_forces, op_pos=1, &
3219  bounds_i=[start_blocks(i_mem), end_blocks(i_mem)])
3220 
3221  DO i_xyz = 1, 3
3222  CALL dbt_copy(t_3c_der_ri_prv(1, 1, i_xyz), ri_ao_ao_template, move_data=.true.)
3223  CALL dbt_filter(ri_ao_ao_template, ri_data%filter_eps)
3224  CALL get_tensor_occupancy(ri_ao_ao_template, nze, occ)
3225  nze_tot = nze_tot + nze
3226 
3227  CALL alloc_containers(t_3c_der_ri_comp(i_mem, i_xyz), 1)
3228  CALL compress_tensor(ri_ao_ao_template, t_3c_der_ri_ind(i_mem, i_xyz)%ind, &
3229  t_3c_der_ri_comp(i_mem, i_xyz), ri_data%filter_eps_storage, memory)
3230  CALL dbt_clear(ri_ao_ao_template)
3231 
3232  !put AO derivative as middle index
3233  CALL dbt_copy(t_3c_der_ao_prv(1, 1, i_xyz), ri_ao_ao_template, order=[1, 3, 2], move_data=.true.)
3234  CALL dbt_filter(ri_ao_ao_template, ri_data%filter_eps)
3235  CALL get_tensor_occupancy(ri_ao_ao_template, nze, occ)
3236  nze_tot = nze_tot + nze
3237 
3238  CALL alloc_containers(t_3c_der_ao_comp(i_mem, i_xyz), 1)
3239  CALL compress_tensor(ri_ao_ao_template, t_3c_der_ao_ind(i_mem, i_xyz)%ind, &
3240  t_3c_der_ao_comp(i_mem, i_xyz), ri_data%filter_eps_storage, memory)
3241  CALL dbt_clear(ri_ao_ao_template)
3242  END DO
3243  END DO
3244 
3245  CALL neighbor_list_3c_destroy(nl_3c)
3246  DO i_xyz = 1, 3
3247  CALL dbt_destroy(t_3c_der_ri_prv(1, 1, i_xyz))
3248  CALL dbt_destroy(t_3c_der_ao_prv(1, 1, i_xyz))
3249  END DO
3250 
3251  CALL para_env%sum(memory)
3252  compression_factor = real(nze_tot, dp)*1.0e-06*8.0_dp/memory
3253  IF (ri_data%unit_nr > 0) THEN
3254  WRITE (unit=ri_data%unit_nr, fmt="((T3,A,T66,F11.2,A4))") &
3255  "MEMORY_INFO| Memory for 3-center HFX derivatives (compressed):", memory, ' MiB'
3256 
3257  WRITE (unit=ri_data%unit_nr, fmt="((T3,A,T60,F21.2))") &
3258  "MEMORY_INFO| Compression factor: ", compression_factor
3259  END IF
3260 
3261  !Deal with the 2-center derivatives
3262  CALL cp_dbcsr_dist2d_to_dist(dist_2d, dbcsr_dist)
3263  ALLOCATE (row_bsize(SIZE(ri_data%bsizes_RI)))
3264  ALLOCATE (col_bsize(SIZE(ri_data%bsizes_RI)))
3265  row_bsize(:) = ri_data%bsizes_RI
3266  col_bsize(:) = ri_data%bsizes_RI
3267 
3268  CALL build_2c_neighbor_lists(nl_2c, basis_set_ri, basis_set_ri, ri_data%hfx_pot, &
3269  "HFX_2c_nl_pot", qs_env, sym_ij=.true., dist_2d=dist_2d)
3270 
3271  DO i_xyz = 1, 3
3272  CALL dbcsr_create(t_2c_der_ri_prv(1, i_xyz), "(R|P) HFX der", dbcsr_dist, &
3273  dbcsr_type_antisymmetric, row_bsize, col_bsize)
3274  END DO
3275 
3276  CALL build_2c_derivatives(t_2c_der_ri_prv, ri_data%filter_eps_2c, qs_env, nl_2c, basis_set_ri, &
3277  basis_set_ri, ri_data%hfx_pot)
3278  CALL release_neighbor_list_sets(nl_2c)
3279 
3280  IF (PRESENT(nl_2c_pot)) THEN
3281  NULLIFY (nl_2c_pot)
3282  CALL build_2c_neighbor_lists(nl_2c_pot, basis_set_ri, basis_set_ri, ri_data%hfx_pot, &
3283  "HFX_2c_nl_pot", qs_env, sym_ij=.false., dist_2d=dist_2d)
3284  END IF
3285 
3286  !copy 2c derivative tensor into the standard format
3287  CALL create_2c_tensor(t_2c_template, dist1, dist2, ri_data%pgrid_2d, ri_data%bsizes_RI_split, &
3288  ri_data%bsizes_RI_split, name='(RI| RI)')
3289  DEALLOCATE (dist1, dist2)
3290 
3291  DO i_xyz = 1, 3
3292  CALL dbt_create(t_2c_der_ri_prv(1, i_xyz), t_2c_tmp)
3293  CALL dbt_copy_matrix_to_tensor(t_2c_der_ri_prv(1, i_xyz), t_2c_tmp)
3294 
3295  CALL dbt_create(t_2c_template, t_2c_der_ri(i_xyz))
3296  CALL dbt_copy(t_2c_tmp, t_2c_der_ri(i_xyz), move_data=.true.)
3297 
3298  CALL dbt_destroy(t_2c_tmp)
3299  CALL dbcsr_release(t_2c_der_ri_prv(1, i_xyz))
3300  END DO
3301 
3302  !Repeat with the metric, if required
3303  IF (.NOT. ri_data%same_op) THEN
3304 
3305  CALL build_2c_neighbor_lists(nl_2c, basis_set_ri, basis_set_ri, ri_data%ri_metric, &
3306  "HFX_2c_nl_RI", qs_env, sym_ij=.true., dist_2d=dist_2d)
3307 
3308  DO i_xyz = 1, 3
3309  CALL dbcsr_create(t_2c_der_metric_prv(1, i_xyz), "(R|P) HFX der", dbcsr_dist, &
3310  dbcsr_type_antisymmetric, row_bsize, col_bsize)
3311  END DO
3312 
3313  CALL build_2c_derivatives(t_2c_der_metric_prv, ri_data%filter_eps_2c, qs_env, nl_2c, &
3314  basis_set_ri, basis_set_ri, ri_data%ri_metric)
3315  CALL release_neighbor_list_sets(nl_2c)
3316 
3317  IF (PRESENT(nl_2c_met)) THEN
3318  NULLIFY (nl_2c_met)
3319  CALL build_2c_neighbor_lists(nl_2c_met, basis_set_ri, basis_set_ri, ri_data%ri_metric, &
3320  "HFX_2c_nl_RI", qs_env, sym_ij=.false., dist_2d=dist_2d)
3321  END IF
3322 
3323  DO i_xyz = 1, 3
3324  CALL dbt_create(t_2c_der_metric_prv(1, i_xyz), t_2c_tmp)
3325  CALL dbt_copy_matrix_to_tensor(t_2c_der_metric_prv(1, i_xyz), t_2c_tmp)
3326 
3327  CALL dbt_create(t_2c_template, t_2c_der_metric(i_xyz))
3328  CALL dbt_copy(t_2c_tmp, t_2c_der_metric(i_xyz), move_data=.true.)
3329 
3330  CALL dbt_destroy(t_2c_tmp)
3331  CALL dbcsr_release(t_2c_der_metric_prv(1, i_xyz))
3332  END DO
3333 
3334  END IF
3335 
3336  CALL dbt_destroy(t_2c_template)
3337  CALL dbcsr_distribution_release(dbcsr_dist)
3338  DEALLOCATE (row_bsize, col_bsize)
3339 
3340  CALL timestop(handle)
3341 
3342  END SUBROUTINE precalc_derivatives
3343 
3344 ! **************************************************************************************************
3345 !> \brief This routines calculates the force contribution from a trace over 3D tensors, i.e.
3346 !> force = sum_ijk A_ijk B_ijk. An iteration over the blocks is made, which index determin
3347 !> the atom on which the force acts
3348 !> \param force ...
3349 !> \param t_3c_contr ...
3350 !> \param t_3c_der ...
3351 !> \param atom_of_kind ...
3352 !> \param kind_of ...
3353 !> \param idx_to_at ...
3354 !> \param pref ...
3355 !> \param do_mp2 ...
3356 !> \param deriv_dim the dimension of the tensor corresponding to the derivative (by default 1)
3357 ! **************************************************************************************************
3358  SUBROUTINE get_force_from_3c_trace(force, t_3c_contr, t_3c_der, atom_of_kind, kind_of, idx_to_at, &
3359  pref, do_mp2, deriv_dim)
3360 
3361  TYPE(qs_force_type), DIMENSION(:), POINTER :: force
3362  TYPE(dbt_type), INTENT(INOUT) :: t_3c_contr
3363  TYPE(dbt_type), DIMENSION(3), INTENT(INOUT) :: t_3c_der
3364  INTEGER, DIMENSION(:), INTENT(IN) :: atom_of_kind, kind_of, idx_to_at
3365  REAL(dp), INTENT(IN) :: pref
3366  LOGICAL, INTENT(IN), OPTIONAL :: do_mp2
3367  INTEGER, INTENT(IN), OPTIONAL :: deriv_dim
3368 
3369  CHARACTER(LEN=*), PARAMETER :: routinen = 'get_force_from_3c_trace'
3370 
3371  INTEGER :: deriv_dim_prv, handle, i_xyz, iat, &
3372  iat_of_kind, ikind, j_xyz
3373  INTEGER, DIMENSION(3) :: ind
3374  LOGICAL :: do_mp2_prv, found
3375  REAL(dp) :: new_force
3376  REAL(dp), ALLOCATABLE, DIMENSION(:, :, :), TARGET :: contr_blk, der_blk
3377  REAL(dp), DIMENSION(3) :: scoord
3378  TYPE(dbt_iterator_type) :: iter
3379 
3380  CALL timeset(routinen, handle)
3381 
3382  do_mp2_prv = .false.
3383  IF (PRESENT(do_mp2)) do_mp2_prv = do_mp2
3384 
3385  deriv_dim_prv = 1
3386  IF (PRESENT(deriv_dim)) deriv_dim_prv = deriv_dim
3387 
3388 !$OMP PARALLEL DEFAULT(NONE) &
3389 !$OMP SHARED(t_3c_der,t_3c_contr,force,do_mp2_prv,deriv_dim_prv,pref,idx_to_at,atom_of_kind,kind_of) &
3390 !$OMP PRIVATE(i_xyz,j_xyz,iter,ind,der_blk,contr_blk,found,new_force,iat,iat_of_kind,ikind,scoord)
3391  DO i_xyz = 1, 3
3392  CALL dbt_iterator_start(iter, t_3c_der(i_xyz))
3393  DO WHILE (dbt_iterator_blocks_left(iter))
3394  CALL dbt_iterator_next_block(iter, ind)
3395 
3396  CALL dbt_get_block(t_3c_der(i_xyz), ind, der_blk, found)
3397  cpassert(found)
3398  CALL dbt_get_block(t_3c_contr, ind, contr_blk, found)
3399 
3400  IF (found) THEN
3401 
3402  !take the trace of the blocks
3403  new_force = pref*sum(der_blk(:, :, :)*contr_blk(:, :, :))
3404 
3405  !the first index of the derivative tensor defines the atom
3406  iat = idx_to_at(ind(deriv_dim_prv))
3407  iat_of_kind = atom_of_kind(iat)
3408  ikind = kind_of(iat)
3409 
3410  IF (.NOT. do_mp2_prv) THEN
3411 !$OMP ATOMIC
3412  force(ikind)%fock_4c(i_xyz, iat_of_kind) = force(ikind)%fock_4c(i_xyz, iat_of_kind) &
3413  + new_force
3414  ELSE
3415 !$OMP ATOMIC
3416  force(ikind)%mp2_non_sep(i_xyz, iat_of_kind) = force(ikind)%mp2_non_sep(i_xyz, iat_of_kind) &
3417  + new_force
3418  END IF
3419 
3420  DEALLOCATE (contr_blk)
3421  END IF
3422  DEALLOCATE (der_blk)
3423  END DO !iter
3424  CALL dbt_iterator_stop(iter)
3425  END DO
3426 !$OMP END PARALLEL
3427  CALL timestop(handle)
3428 
3429  END SUBROUTINE get_force_from_3c_trace
3430 
3431 ! **************************************************************************************************
3432 !> \brief Update the forces due to the derivative of the a 2-center product d/dR (Q|R)
3433 !> \param force ...
3434 !> \param t_2c_contr A precontracted tensor containing sum_abcdPS (ab|P)(P|Q)^-1 (R|S)^-1 (S|cd) P_ac P_bd
3435 !> \param t_2c_der the d/dR (Q|R) tensor, in all 3 cartesian directions
3436 !> \param atom_of_kind ...
3437 !> \param kind_of ...
3438 !> \param idx_to_at ...
3439 !> \param pref ...
3440 !> \param do_mp2 ...
3441 !> \param do_ovlp ...
3442 !> \note IMPORTANT: t_tc_contr and t_2c_der need to have the same distribution
3443 ! **************************************************************************************************
3444  SUBROUTINE get_2c_der_force(force, t_2c_contr, t_2c_der, atom_of_kind, kind_of, idx_to_at, &
3445  pref, do_mp2, do_ovlp)
3446 
3447  TYPE(qs_force_type), DIMENSION(:), POINTER :: force
3448  TYPE(dbt_type), INTENT(INOUT) :: t_2c_contr
3449  TYPE(dbt_type), DIMENSION(3), INTENT(INOUT) :: t_2c_der
3450  INTEGER, DIMENSION(:), INTENT(IN) :: atom_of_kind, kind_of, idx_to_at
3451  REAL(dp), INTENT(IN) :: pref
3452  LOGICAL, INTENT(IN), OPTIONAL :: do_mp2, do_ovlp
3453 
3454  CHARACTER(LEN=*), PARAMETER :: routinen = 'get_2c_der_force'
3455 
3456  INTEGER :: handle, i_xyz, iat, iat_of_kind, ikind, &
3457  j_xyz, jat, jat_of_kind, jkind
3458  INTEGER, DIMENSION(2) :: ind
3459  LOGICAL :: do_mp2_prv, do_ovlp_prv, found
3460  REAL(dp) :: new_force
3461  REAL(dp), ALLOCATABLE, DIMENSION(:, :), TARGET :: contr_blk, der_blk
3462  REAL(dp), DIMENSION(3) :: scoord
3463  TYPE(dbt_iterator_type) :: iter
3464 
3465  !Loop over the blocks of d/dR (Q|R), contract with the corresponding block of t_2c_contr and
3466  !update the relevant force
3467 
3468  CALL timeset(routinen, handle)
3469 
3470  do_mp2_prv = .false.
3471  IF (PRESENT(do_mp2)) do_mp2_prv = do_mp2
3472 
3473  do_ovlp_prv = .false.
3474  IF (PRESENT(do_ovlp)) do_ovlp_prv = do_ovlp
3475 
3476 !$OMP PARALLEL DEFAULT(NONE) &
3477 !$OMP SHARED(t_2c_der,t_2c_contr,force,do_mp2_prv,do_ovlp_prv,pref,idx_to_at,atom_of_kind,kind_of) &
3478 !$OMP PRIVATE(i_xyz,j_xyz,iter,ind,der_blk,contr_blk,found,new_force) &
3479 !$OMP PRIVATE(iat,jat,iat_of_kind,jat_of_kind,ikind,jkind,scoord)
3480  DO i_xyz = 1, 3
3481  CALL dbt_iterator_start(iter, t_2c_der(i_xyz))
3482  DO WHILE (dbt_iterator_blocks_left(iter))
3483  CALL dbt_iterator_next_block(iter, ind)
3484 
3485  IF (ind(1) == ind(2)) cycle
3486 
3487  CALL dbt_get_block(t_2c_der(i_xyz), ind, der_blk, found)
3488  cpassert(found)
3489  CALL dbt_get_block(t_2c_contr, ind, contr_blk, found)
3490 
3491  IF (found) THEN
3492 
3493  !an element of d/dR (Q|R) corresponds to 2 things because of translational invariance
3494  !(Q'| R) = - (Q| R'), once wrt the center on Q, and once on R
3495  new_force = pref*sum(der_blk(:, :)*contr_blk(:, :))
3496 
3497  iat = idx_to_at(ind(1))
3498  iat_of_kind = atom_of_kind(iat)
3499  ikind = kind_of(iat)
3500 
3501  IF (do_mp2_prv) THEN
3502 !$OMP ATOMIC
3503  force(ikind)%mp2_non_sep(i_xyz, iat_of_kind) = force(ikind)%mp2_non_sep(i_xyz, iat_of_kind) &
3504  + new_force
3505  ELSE IF (do_ovlp_prv) THEN
3506 !$OMP ATOMIC
3507  force(ikind)%overlap(i_xyz, iat_of_kind) = force(ikind)%overlap(i_xyz, iat_of_kind) &
3508  + new_force
3509  ELSE
3510 !$OMP ATOMIC
3511  force(ikind)%fock_4c(i_xyz, iat_of_kind) = force(ikind)%fock_4c(i_xyz, iat_of_kind) &
3512  + new_force
3513  END IF
3514 
3515  jat = idx_to_at(ind(2))
3516  jat_of_kind = atom_of_kind(jat)
3517  jkind = kind_of(jat)
3518 
3519  IF (do_mp2_prv) THEN
3520 !$OMP ATOMIC
3521  force(jkind)%mp2_non_sep(i_xyz, jat_of_kind) = force(jkind)%mp2_non_sep(i_xyz, jat_of_kind) &
3522  - new_force
3523  ELSE IF (do_ovlp_prv) THEN
3524 !$OMP ATOMIC
3525  force(jkind)%overlap(i_xyz, jat_of_kind) = force(jkind)%overlap(i_xyz, jat_of_kind) &
3526  - new_force
3527  ELSE
3528 !$OMP ATOMIC
3529  force(jkind)%fock_4c(i_xyz, jat_of_kind) = force(jkind)%fock_4c(i_xyz, jat_of_kind) &
3530  - new_force
3531  END IF
3532 
3533  DEALLOCATE (contr_blk)
3534  END IF
3535 
3536  DEALLOCATE (der_blk)
3537  END DO !iter
3538  CALL dbt_iterator_stop(iter)
3539 
3540  END DO !i_xyz
3541 !$OMP END PARALLEL
3542  CALL timestop(handle)
3543 
3544  END SUBROUTINE get_2c_der_force
3545 
3546 ! **************************************************************************************************
3547 !> \brief Get the force from a contraction of type SUM_a,beta (a|beta') C_a,beta, where beta is an AO
3548 !> and a is a MO
3549 !> \param force ...
3550 !> \param t_mo_coeff ...
3551 !> \param t_2c_MO_AO ...
3552 !> \param atom_of_kind ...
3553 !> \param kind_of ...
3554 !> \param idx_to_at ...
3555 !> \param pref ...
3556 !> \param i_xyz ...
3557 ! **************************************************************************************************
3558  SUBROUTINE get_mo_ao_force(force, t_mo_coeff, t_2c_MO_AO, atom_of_kind, kind_of, idx_to_at, pref, i_xyz)
3559 
3560  TYPE(qs_force_type), DIMENSION(:), POINTER :: force
3561  TYPE(dbt_type), INTENT(INOUT) :: t_mo_coeff, t_2c_mo_ao
3562  INTEGER, DIMENSION(:), INTENT(IN) :: atom_of_kind, kind_of, idx_to_at
3563  REAL(dp), INTENT(IN) :: pref
3564  INTEGER, INTENT(IN) :: i_xyz
3565 
3566  CHARACTER(LEN=*), PARAMETER :: routinen = 'get_MO_AO_force'
3567 
3568  INTEGER :: handle, iat, iat_of_kind, ikind, j_xyz
3569  INTEGER, DIMENSION(2) :: ind
3570  LOGICAL :: found
3571  REAL(dp) :: new_force
3572  REAL(dp), ALLOCATABLE, DIMENSION(:, :), TARGET :: mo_ao_blk, mo_coeff_blk
3573  REAL(dp), DIMENSION(3) :: scoord
3574  TYPE(dbt_iterator_type) :: iter
3575 
3576  CALL timeset(routinen, handle)
3577 
3578 !$OMP PARALLEL DEFAULT(NONE) &
3579 !$OMP SHARED(t_2c_MO_AO,t_mo_coeff,pref,force,idx_to_at,atom_of_kind,kind_of,i_xyz) &
3580 !$OMP PRIVATE(iter,ind,mo_ao_blk,mo_coeff_blk,found,new_force,iat,iat_of_kind,ikind,scoord,j_xyz)
3581  CALL dbt_iterator_start(iter, t_2c_mo_ao)
3582  DO WHILE (dbt_iterator_blocks_left(iter))
3583  CALL dbt_iterator_next_block(iter, ind)
3584 
3585  CALL dbt_get_block(t_2c_mo_ao, ind, mo_ao_blk, found)
3586  cpassert(found)
3587  CALL dbt_get_block(t_mo_coeff, ind, mo_coeff_blk, found)
3588 
3589  IF (found) THEN
3590 
3591  new_force = pref*sum(mo_ao_blk(:, :)*mo_coeff_blk(:, :))
3592 
3593  iat = idx_to_at(ind(2)) !AO index is column index
3594  iat_of_kind = atom_of_kind(iat)
3595  ikind = kind_of(iat)
3596 
3597 !$OMP ATOMIC
3598  force(ikind)%fock_4c(i_xyz, iat_of_kind) = force(ikind)%fock_4c(i_xyz, iat_of_kind) &
3599  + new_force
3600 
3601  DEALLOCATE (mo_coeff_blk)
3602  END IF
3603 
3604  DEALLOCATE (mo_ao_blk)
3605  END DO !iter
3606  CALL dbt_iterator_stop(iter)
3607 !$OMP END PARALLEL
3608 
3609  CALL timestop(handle)
3610 
3611  END SUBROUTINE get_mo_ao_force
3612 
3613 ! **************************************************************************************************
3614 !> \brief Print RI-HFX quantities, as required by the PRINT subsection
3615 !> \param ri_data ...
3616 !> \param qs_env ...
3617 ! **************************************************************************************************
3618  SUBROUTINE print_ri_hfx(ri_data, qs_env)
3619 
3620  TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
3621  TYPE(qs_environment_type), POINTER :: qs_env
3622 
3623  INTEGER :: i_ri, ibasis, nkind, nspins, unit_nr
3624  INTEGER, DIMENSION(:), POINTER :: col_bsize, row_bsize
3625  LOGICAL :: mult_by_s, print_density, print_ri_metric
3626  REAL(dp), ALLOCATABLE, DIMENSION(:) :: density_coeffs, density_coeffs_2
3627  TYPE(cp_blacs_env_type), POINTER :: blacs_env
3628  TYPE(cp_fm_struct_type), POINTER :: fm_struct
3629  TYPE(cp_fm_type) :: matrix_s_fm
3630  TYPE(cp_logger_type), POINTER :: logger
3631  TYPE(dbcsr_distribution_type) :: dbcsr_dist
3632  TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: rho_ao
3633  TYPE(dbcsr_type), DIMENSION(1) :: matrix_s
3634  TYPE(dft_control_type), POINTER :: dft_control
3635  TYPE(distribution_2d_type), POINTER :: dist_2d
3636  TYPE(gto_basis_set_p_type), ALLOCATABLE, &
3637  DIMENSION(:), TARGET :: basis_set_ao, basis_set_ri
3638  TYPE(gto_basis_set_type), POINTER :: orb_basis, ri_basis
3639  TYPE(mp_para_env_type), POINTER :: para_env
3640  TYPE(neighbor_list_set_p_type), DIMENSION(:), &
3641  POINTER :: nl_2c
3642  TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
3643  TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
3644  TYPE(qs_rho_type), POINTER :: rho
3645  TYPE(section_vals_type), POINTER :: input, print_section
3646 
3647  NULLIFY (rho_ao, input, print_section, logger, rho, particle_set, qs_kind_set, ri_basis, nl_2c, &
3648  dist_2d, col_bsize, row_bsize, para_env, blacs_env, fm_struct, orb_basis, dft_control)
3649 
3650  CALL get_qs_env(qs_env, input=input, dft_control=dft_control)
3651  logger => cp_get_default_logger()
3652  print_density = .false.
3653  print_ri_metric = .false.
3654 
3655  !Do we print the RI density coeffs and/or RI_metric 2c integrals?
3656  print_section => section_vals_get_subs_vals(input, "DFT%XC%HF%RI%PRINT")
3657  IF (btest(cp_print_key_should_output(logger%iter_info, print_section, "RI_DENSITY_COEFFS"), &
3658  cp_p_file)) print_density = .true.
3659  IF (btest(cp_print_key_should_output(logger%iter_info, print_section, "RI_METRIC_2C_INTS"), &
3660  cp_p_file)) print_ri_metric = .true.
3661 
3662  !common stuff
3663  IF (print_density .OR. print_ri_metric) THEN
3664 
3665  !Re-calculate the 2-center RI_metric integrals (because not stored and cheap)
3666  !Recalculated the RI_metric 2c-integrals, as it is cheap, and not stored
3667  CALL get_qs_env(qs_env, nkind=nkind, qs_kind_set=qs_kind_set, particle_set=particle_set, &
3668  distribution_2d=dist_2d, para_env=para_env, blacs_env=blacs_env)
3669  ALLOCATE (basis_set_ri(nkind), basis_set_ao(nkind))
3670  CALL basis_set_list_setup(basis_set_ri, ri_data%ri_basis_type, qs_kind_set)
3671  CALL get_particle_set(particle_set, qs_kind_set, basis=basis_set_ri)
3672  CALL basis_set_list_setup(basis_set_ao, ri_data%orb_basis_type, qs_kind_set)
3673  CALL get_particle_set(particle_set, qs_kind_set, basis=basis_set_ao)
3674 
3675  DO ibasis = 1, nkind
3676  ri_basis => basis_set_ri(ibasis)%gto_basis_set
3677  CALL init_interaction_radii_orb_basis(ri_basis, ri_data%eps_pgf_orb)
3678  orb_basis => basis_set_ao(ibasis)%gto_basis_set
3679  CALL init_interaction_radii_orb_basis(orb_basis, ri_data%eps_pgf_orb)
3680  END DO
3681 
3682  CALL cp_dbcsr_dist2d_to_dist(dist_2d, dbcsr_dist)
3683  ALLOCATE (row_bsize(SIZE(ri_data%bsizes_RI)))
3684  ALLOCATE (col_bsize(SIZE(ri_data%bsizes_RI)))
3685  row_bsize(:) = ri_data%bsizes_RI
3686  col_bsize(:) = ri_data%bsizes_RI
3687 
3688  CALL dbcsr_create(matrix_s(1), "RI metric", dbcsr_dist, dbcsr_type_symmetric, row_bsize, col_bsize)
3689 
3690  CALL build_2c_neighbor_lists(nl_2c, basis_set_ri, basis_set_ri, ri_data%ri_metric, &
3691  "HFX_2c_nl_pot", qs_env, sym_ij=.true., dist_2d=dist_2d)
3692  CALL build_2c_integrals(matrix_s, ri_data%filter_eps_2c, qs_env, nl_2c, basis_set_ri, &
3693  basis_set_ri, ri_data%ri_metric)
3694 
3695  CALL release_neighbor_list_sets(nl_2c)
3696  CALL dbcsr_distribution_release(dbcsr_dist)
3697  END IF
3698 
3699  IF (print_density) THEN
3700  CALL get_qs_env(qs_env, rho=rho)
3701  CALL qs_rho_get(rho, rho_ao_kp=rho_ao)
3702  nspins = SIZE(rho_ao, 1)
3703 
3704  CALL section_vals_val_get(print_section, "RI_DENSITY_COEFFS%MULTIPLY_BY_RI_2C_INTEGRALS", l_val=mult_by_s)
3705 
3706  CALL get_ri_density_coeffs(density_coeffs, matrix_s(1), rho_ao, 1, basis_set_ao, basis_set_ri, &
3707  mult_by_s, ri_data, qs_env)
3708  IF (nspins == 2) &
3709  CALL get_ri_density_coeffs(density_coeffs_2, matrix_s(1), rho_ao, 2, basis_set_ao, &
3710  basis_set_ri, mult_by_s, ri_data, qs_env)
3711 
3712  unit_nr = cp_print_key_unit_nr(logger, input, "DFT%XC%HF%RI%PRINT%RI_DENSITY_COEFFS", &
3713  extension=".dat", file_status="REPLACE", &
3714  file_action="WRITE", file_form="FORMATTED")
3715 
3716  IF (unit_nr > 0) THEN
3717  IF (nspins == 1) THEN
3718  WRITE (unit_nr, fmt="(A,A,A)") &
3719  "# Coefficients of the electronic density projected on the RI_HFX basis for ", &
3720  trim(logger%iter_info%project_name), " project"
3721  DO i_ri = 1, SIZE(density_coeffs)
3722  WRITE (unit_nr, fmt="(F20.12)") density_coeffs(i_ri)
3723  END DO
3724  ELSE
3725  WRITE (unit_nr, fmt="(A,A,A)") &
3726  "# Coefficients of the electronic density projected on the RI_HFX basis for ", &
3727  trim(logger%iter_info%project_name), " project. Spin up, spin down"
3728  DO i_ri = 1, SIZE(density_coeffs)
3729  WRITE (unit_nr, fmt="(F20.12,F20.12)") density_coeffs(i_ri), density_coeffs_2(i_ri)
3730  END DO
3731  END IF
3732  END IF
3733 
3734  CALL cp_print_key_finished_output(unit_nr, logger, input, "DFT%XC%HF%RI%PRINT%RI_DENSITY_COEFFS")
3735  END IF
3736 
3737  IF (print_ri_metric) THEN
3738 
3739  !convert 2c integrals to fm for dumping
3740  CALL cp_fm_struct_create(fm_struct, context=blacs_env, para_env=para_env, &
3741  nrow_global=sum(row_bsize), ncol_global=sum(col_bsize))
3742  CALL cp_fm_create(matrix_s_fm, fm_struct)
3743 
3744  CALL copy_dbcsr_to_fm(matrix_s(1), matrix_s_fm)
3745 
3746  unit_nr = cp_print_key_unit_nr(logger, input, "DFT%XC%HF%RI%PRINT%RI_METRIC_2C_INTS", &
3747  extension=".fm", file_status="REPLACE", &
3748  file_action="WRITE", file_form="UNFORMATTED")
3749  CALL cp_fm_write_unformatted(matrix_s_fm, unit_nr)
3750 
3751  CALL cp_print_key_finished_output(unit_nr, logger, input, "DFT%XC%HF%RI%PRINT%RI_METRIC_2C_INTS")
3752 
3753  CALL cp_fm_struct_release(fm_struct)
3754  CALL cp_fm_release(matrix_s_fm)
3755  END IF
3756 
3757  !clean-up
3758  IF (print_density .OR. print_ri_metric) THEN
3759  DO ibasis = 1, nkind
3760  ri_basis => basis_set_ri(ibasis)%gto_basis_set
3761  CALL init_interaction_radii_orb_basis(ri_basis, dft_control%qs_control%eps_pgf_orb)
3762  orb_basis => basis_set_ao(ibasis)%gto_basis_set
3763  CALL init_interaction_radii_orb_basis(orb_basis, dft_control%qs_control%eps_pgf_orb)
3764  END DO
3765 
3766  CALL dbcsr_release(matrix_s(1))
3767  DEALLOCATE (row_bsize, col_bsize)
3768  END IF
3769 
3770  END SUBROUTINE print_ri_hfx
3771 
3772 ! **************************************************************************************************
3773 !> \brief Projects the density on the RI basis and return the array of the RI coefficients
3774 !> \param density_coeffs ...
3775 !> \param ri_2c_ints ...
3776 !> \param rho_ao ...
3777 !> \param ispin ...
3778 !> \param basis_set_AO ...
3779 !> \param basis_set_RI ...
3780 !> \param multiply_by_S ...
3781 !> \param ri_data ...
3782 !> \param qs_env ...
3783 ! **************************************************************************************************
3784  SUBROUTINE get_ri_density_coeffs(density_coeffs, ri_2c_ints, rho_ao, ispin, basis_set_AO, &
3785  basis_set_RI, multiply_by_S, ri_data, qs_env)
3786 
3787  REAL(dp), ALLOCATABLE, DIMENSION(:) :: density_coeffs
3788  TYPE(dbcsr_type), INTENT(INOUT) :: ri_2c_ints
3789  TYPE(dbcsr_p_type), DIMENSION(:, :) :: rho_ao
3790  INTEGER, INTENT(IN) :: ispin
3791  TYPE(gto_basis_set_p_type), DIMENSION(:) :: basis_set_ao, basis_set_ri
3792  LOGICAL, INTENT(IN) :: multiply_by_s
3793  TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
3794  TYPE(qs_environment_type), POINTER :: qs_env
3795 
3796  CHARACTER(LEN=*), PARAMETER :: routinen = 'get_RI_density_coeffs'
3797 
3798  INTEGER :: a, b, handle, i_mem, idx, n_dependent, &
3799  n_mem, n_mem_ri, natom, &
3800  nblk_per_thread, nblks, nkind
3801  INTEGER(int_8) :: nze
3802  INTEGER, ALLOCATABLE, DIMENSION(:) :: batch_block_end, batch_block_start, &
3803  dist1, dist2, dist3, dummy1, dummy2, &
3804  idx1, idx2, idx3
3805  INTEGER, DIMENSION(2) :: ind, pdims_2d
3806  INTEGER, DIMENSION(2, 3) :: bounds_cpy
3807  INTEGER, DIMENSION(3) :: dims_3c, pcoord_3d, pdims_3d
3808  LOGICAL :: calc_ints, found
3809  REAL(dp) :: occ, threshold
3810  REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: blk
3811  REAL(dp), ALLOCATABLE, DIMENSION(:, :, :) :: blk_3d
3812  TYPE(cp_blacs_env_type), POINTER :: blacs_env
3813  TYPE(dbcsr_type) :: ri_2c_inv
3814  TYPE(dbt_distribution_type) :: dist_2d, dist_3d
3815  TYPE(dbt_iterator_type) :: iter
3816  TYPE(dbt_pgrid_type) :: pgrid_2d, pgrid_3d
3817  TYPE(dbt_type) :: density_coeffs_t, density_tmp, rho_ao_t, &
3818  rho_ao_t_3d, rho_ao_tmp, t2c_ri_ints, &
3819  t2c_ri_inv, t2c_ri_tmp
3820  TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :) :: t_3c_int_batched
3821  TYPE(dft_control_type), POINTER :: dft_control
3822  TYPE(distribution_3d_type) :: dist_nl3c
3823  TYPE(mp_cart_type) :: mp_comm_t3c
3824  TYPE(mp_para_env_type), POINTER :: para_env
3825  TYPE(neighbor_list_3c_type) :: nl_3c
3826  TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
3827  TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
3828 
3829  NULLIFY (dft_control, para_env, blacs_env, particle_set, qs_kind_set)
3830 
3831  CALL timeset(routinen, handle)
3832 
3833  ! Projection of the density on the RI basis: n(r) = sum_pq sum_munu P_pq (pq|mu) (mu|nu)^-1 nu(r)
3834  ! = sum_nu d_nu nu(r)
3835  ! the (pq|mu) (mu|nu)^-1 contraction is already stored in compressed format
3836 
3837  IF (.NOT. ri_data%flavor == ri_pmat) THEN
3838  cpabort("Can only calculate and print the RI density coefficients within the RHO flavor of RI-HFX")
3839  END IF
3840 
3841  CALL get_qs_env(qs_env, dft_control=dft_control, para_env=para_env, blacs_env=blacs_env, nkind=nkind, &
3842  particle_set=particle_set, qs_kind_set=qs_kind_set, natom=natom)
3843  n_mem = ri_data%n_mem
3844  n_mem_ri = ri_data%n_mem_RI
3845 
3846  ! The RI 2c int tensor and its inverse
3847  CALL dbcsr_create(ri_2c_inv, template=ri_2c_ints, matrix_type=dbcsr_type_no_symmetry)
3848 
3849  SELECT CASE (ri_data%t2c_method)
3850  CASE (hfx_ri_do_2c_iter)
3851  threshold = max(ri_data%filter_eps, 1.0e-12_dp)
3852  CALL invert_hotelling(ri_2c_inv, ri_2c_ints, threshold=threshold, silent=.false.)
3853  CASE (hfx_ri_do_2c_cholesky)
3854  CALL dbcsr_copy(ri_2c_inv, ri_2c_ints)
3855  CALL cp_dbcsr_cholesky_decompose(ri_2c_inv, para_env=para_env, blacs_env=blacs_env)
3856  CALL cp_dbcsr_cholesky_invert(ri_2c_inv, para_env=para_env, blacs_env=blacs_env, upper_to_full=.true.)
3857  CASE (hfx_ri_do_2c_diag)
3858  CALL dbcsr_copy(ri_2c_inv, ri_2c_ints)
3859  CALL cp_dbcsr_power(ri_2c_inv, -1.0_dp, ri_data%eps_eigval, n_dependent, &
3860  para_env, blacs_env, verbose=ri_data%unit_nr_dbcsr > 0)
3861  END SELECT
3862 
3863  CALL dbt_create(ri_2c_ints, t2c_ri_tmp)
3864  CALL create_2c_tensor(t2c_ri_ints, dist1, dist2, ri_data%pgrid_2d, &
3865  ri_data%bsizes_RI_split, ri_data%bsizes_RI_split, &
3866  name="(RI | RI)")
3867  CALL dbt_create(t2c_ri_ints, t2c_ri_inv)
3868 
3869  CALL dbt_copy_matrix_to_tensor(ri_2c_ints, t2c_ri_tmp)
3870  CALL dbt_copy(t2c_ri_tmp, t2c_ri_ints, move_data=.true.)
3871  CALL dbt_filter(t2c_ri_ints, ri_data%filter_eps)
3872 
3873  CALL dbt_copy_matrix_to_tensor(ri_2c_inv, t2c_ri_tmp)
3874  CALL dbt_copy(t2c_ri_tmp, t2c_ri_inv, move_data=.true.)
3875  CALL dbt_filter(t2c_ri_inv, ri_data%filter_eps)
3876 
3877  CALL dbcsr_release(ri_2c_inv)
3878  CALL dbt_destroy(t2c_ri_tmp)
3879  DEALLOCATE (dist1, dist2)
3880 
3881  ! The AO density tensor
3882  CALL dbt_create(rho_ao(ispin, 1)%matrix, rho_ao_tmp)
3883  CALL create_2c_tensor(rho_ao_t, dist1, dist2, ri_data%pgrid_2d, &
3884  ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
3885  name="(AO | AO)")
3886  DEALLOCATE (dist1, dist2)
3887 
3888  CALL dbt_copy_matrix_to_tensor(rho_ao(ispin, 1)%matrix, rho_ao_tmp)
3889  CALL dbt_copy(rho_ao_tmp, rho_ao_t, move_data=.true.)
3890  CALL dbt_filter(rho_ao_t, ri_data%filter_eps)
3891  CALL dbt_destroy(rho_ao_tmp)
3892 
3893  ! Put in in 3D
3894  ALLOCATE (dist1(SIZE(ri_data%bsizes_AO_split)), dist2(SIZE(ri_data%bsizes_AO_split)), dist3(1))
3895  dist3(1) = 0
3896  CALL dbt_get_info(rho_ao_t, pdims=pdims_2d, proc_dist_1=dist1, proc_dist_2=dist2)
3897  CALL dbt_default_distvec(1, 1, [1], dist3)
3898 
3899  pdims_3d(1) = pdims_2d(1)
3900  pdims_3d(2) = pdims_2d(2)
3901  pdims_3d(3) = 1
3902 
3903  CALL dbt_pgrid_create(para_env, pdims_3d, pgrid_3d)
3904  CALL dbt_distribution_new(dist_3d, pgrid_3d, dist1, dist2, dist3)
3905  CALL dbt_create(rho_ao_t_3d, "rho_ao_3d", dist_3d, [1, 2], [3], ri_data%bsizes_AO_split, &
3906  ri_data%bsizes_AO_split, [1])
3907  DEALLOCATE (dist1, dist2, dist3)
3908  CALL dbt_pgrid_destroy(pgrid_3d)
3909  CALL dbt_distribution_destroy(dist_3d)
3910 
3911  ! copy density
3912  nblks = 0
3913 !$OMP PARALLEL DEFAULT(NONE) &
3914 !$OMP SHARED(rho_ao_t,nblks) &
3915 !$OMP PRIVATE(iter,ind,blk,found)
3916  CALL dbt_iterator_start(iter, rho_ao_t)
3917  DO WHILE (dbt_iterator_blocks_left(iter))
3918  CALL dbt_iterator_next_block(iter, ind)
3919  CALL dbt_get_block(rho_ao_t, ind, blk, found)
3920  IF (found) THEN
3921 !$OMP ATOMIC
3922  nblks = nblks + 1
3923  DEALLOCATE (blk)
3924  END IF
3925  END DO
3926  CALL dbt_iterator_stop(iter)
3927 !$OMP END PARALLEL
3928 
3929  ALLOCATE (idx1(nblks), idx2(nblks), idx3(nblks))
3930  idx3 = 1
3931  nblks = 0
3932 !$OMP PARALLEL DEFAULT(NONE) &
3933 !$OMP SHARED(rho_ao_t,nblks,idx1,idx2) &
3934 !$OMP PRIVATE(iter,ind,blk,found)
3935  CALL dbt_iterator_start(iter, rho_ao_t)
3936  DO WHILE (dbt_iterator_blocks_left(iter))
3937  CALL dbt_iterator_next_block(iter, ind)
3938  CALL dbt_get_block(rho_ao_t, ind, blk, found)
3939  IF (found) THEN
3940 !$OMP CRITICAL
3941  nblks = nblks + 1
3942  idx1(nblks) = ind(1)
3943  idx2(nblks) = ind(2)
3944 !$OMP END CRITICAL
3945  DEALLOCATE (blk)
3946  END IF
3947  END DO
3948  CALL dbt_iterator_stop(iter)
3949 !$OMP END PARALLEL
3950 
3951 !$OMP PARALLEL DEFAULT(NONE) SHARED(rho_ao_t_3d,nblks,idx1,idx2,idx3) PRIVATE(nblk_per_thread,A,b)
3952  nblk_per_thread = nblks/omp_get_num_threads() + 1
3953  a = omp_get_thread_num()*nblk_per_thread + 1
3954  b = min(a + nblk_per_thread, nblks)
3955  CALL dbt_reserve_blocks(rho_ao_t_3d, idx1(a:b), idx2(a:b), idx3(a:b))
3956 !$OMP END PARALLEL
3957 
3958 !$OMP PARALLEL DEFAULT(NONE) &
3959 !$OMP SHARED(rho_ao_t,rho_ao_t_3d) &
3960 !$OMP PRIVATE(iter,ind,blk,found,blk_3d)
3961  CALL dbt_iterator_start(iter, rho_ao_t)
3962  DO WHILE (dbt_iterator_blocks_left(iter))
3963  CALL dbt_iterator_next_block(iter, ind)
3964  CALL dbt_get_block(rho_ao_t, ind, blk, found)
3965  IF (found) THEN
3966  ALLOCATE (blk_3d(SIZE(blk, 1), SIZE(blk, 2), 1))
3967  blk_3d(:, :, 1) = blk(:, :)
3968 !$OMP CRITICAL
3969  CALL dbt_put_block(rho_ao_t_3d, [ind(1), ind(2), 1], [SIZE(blk, 1), SIZE(blk, 2), 1], blk_3d)
3970 !$OMP END CRITICAL
3971  DEALLOCATE (blk, blk_3d)
3972  END IF
3973  END DO
3974  CALL dbt_iterator_stop(iter)
3975 !$OMP END PARALLEL
3976 
3977  ! The 1D tensor with the density coeffs
3978  pdims_2d(1) = para_env%num_pe
3979  pdims_2d(2) = 1
3980 
3981  ALLOCATE (dist1(SIZE(ri_data%bsizes_RI_split)), dist2(1))
3982  CALL dbt_default_distvec(SIZE(ri_data%bsizes_RI_split), pdims_2d(1), ri_data%bsizes_RI_split, dist1)
3983  CALL dbt_default_distvec(1, pdims_2d(2), [1], dist2)
3984 
3985  CALL dbt_pgrid_create(para_env, pdims_2d, pgrid_2d)
3986  CALL dbt_distribution_new(dist_2d, pgrid_2d, dist1, dist2)
3987  CALL dbt_create(density_coeffs_t, "density_coeffs", dist_2d, [1], [2], ri_data%bsizes_RI_split, [1])
3988  CALL dbt_create(density_coeffs_t, density_tmp)
3989  DEALLOCATE (dist1, dist2)
3990  CALL dbt_pgrid_destroy(pgrid_2d)
3991  CALL dbt_distribution_destroy(dist_2d)
3992 
3993  CALL dbt_get_info(ri_data%t_3c_int_ctr_3(1, 1), nfull_total=dims_3c)
3994 
3995  ! The 3c integrals tensor, in case we compute them here
3996  pdims_3d = 0
3997  CALL dbt_pgrid_create(para_env, pdims_3d, pgrid_3d, tensor_dims=[max(1, natom/n_mem), natom, natom])
3998  ALLOCATE (t_3c_int_batched(1, 1))
3999  CALL create_3c_tensor(t_3c_int_batched(1, 1), dist1, dist2, dist3, pgrid_3d, &
4000  ri_data%bsizes_RI, ri_data%bsizes_AO, ri_data%bsizes_AO, map1=[1], map2=[2, 3], &
4001  name="(RI | AO AO)")
4002 
4003  CALL dbt_mp_environ_pgrid(pgrid_3d, pdims_3d, pcoord_3d)
4004  CALL mp_comm_t3c%create(pgrid_3d%mp_comm_2d, 3, pdims_3d)
4005  CALL distribution_3d_create(dist_nl3c, dist1, dist2, dist3, nkind, particle_set, &
4006  mp_comm_t3c, own_comm=.true.)
4007  DEALLOCATE (dist1, dist2, dist3)
4008  CALL dbt_pgrid_destroy(pgrid_3d)
4009 
4010  CALL build_3c_neighbor_lists(nl_3c, basis_set_ri, basis_set_ao, basis_set_ao, dist_nl3c, ri_data%ri_metric, &
4011  "HFX_3c_nl", qs_env, op_pos=1, sym_jk=.true., own_dist=.true.)
4012 
4013  n_mem = ri_data%n_mem
4014  CALL create_tensor_batches(ri_data%bsizes_RI, n_mem, dummy1, dummy2, batch_block_start, batch_block_end)
4015 
4016  calc_ints = .false.
4017  CALL get_tensor_occupancy(ri_data%t_3c_int_ctr_2(1, 1), nze, occ)
4018  IF (nze == 0) calc_ints = .true.
4019 
4020  DO i_mem = 1, n_mem
4021  IF (calc_ints) THEN
4022  CALL build_3c_integrals(t_3c_int_batched, ri_data%filter_eps, qs_env, nl_3c, &
4023  basis_set_ri, basis_set_ao, basis_set_ao, &
4024  ri_data%ri_metric, int_eps=ri_data%eps_schwarz, op_pos=1, &
4025  desymmetrize=.false., &
4026  bounds_i=[batch_block_start(i_mem), batch_block_end(i_mem)])
4027  CALL dbt_copy(t_3c_int_batched(1, 1), ri_data%t_3c_int_ctr_3(1, 1), order=[1, 3, 2])
4028  CALL dbt_copy(t_3c_int_batched(1, 1), ri_data%t_3c_int_ctr_3(1, 1), move_data=.true., summation=.true.)
4029  CALL dbt_filter(ri_data%t_3c_int_ctr_3(1, 1), ri_data%filter_eps)
4030  ELSE
4031  bounds_cpy(:, 2) = [sum(ri_data%bsizes_RI(1:batch_block_start(i_mem) - 1)) + 1, &
4032  sum(ri_data%bsizes_RI(1:batch_block_end(i_mem)))]
4033  bounds_cpy(:, 1) = [1, sum(ri_data%bsizes_AO)]
4034  bounds_cpy(:, 3) = [1, sum(ri_data%bsizes_AO)]
4035  CALL dbt_copy(ri_data%t_3c_int_ctr_2(1, 1), ri_data%t_3c_int_ctr_3(1, 1), &
4036  order=[2, 1, 3], bounds=bounds_cpy)
4037  END IF
4038 
4039  !contract the integrals with the density P_pq (pq|R)
4040  CALL dbt_contract(1.0_dp, ri_data%t_3c_int_ctr_3(1, 1), rho_ao_t_3d, 0.0_dp, density_tmp, &
4041  contract_1=[2, 3], notcontract_1=[1], &
4042  contract_2=[1, 2], notcontract_2=[3], &
4043  map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps)
4044  CALL dbt_clear(ri_data%t_3c_int_ctr_3(1, 1))
4045 
4046  !contract the above vector with the inverse metric
4047  CALL dbt_contract(1.0_dp, t2c_ri_inv, density_tmp, 1.0_dp, density_coeffs_t, &
4048  contract_1=[2], notcontract_1=[1], &
4049  contract_2=[1], notcontract_2=[2], &
4050  map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps)
4051 
4052  END DO
4053  CALL neighbor_list_3c_destroy(nl_3c)
4054 
4055  IF (multiply_by_s) THEN
4056  CALL dbt_contract(1.0_dp, t2c_ri_ints, density_coeffs_t, 0.0_dp, density_tmp, &
4057  contract_1=[2], notcontract_1=[1], &
4058  contract_2=[1], notcontract_2=[2], &
4059  map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps)
4060  CALL dbt_copy(density_tmp, density_coeffs_t, move_data=.true.)
4061  END IF
4062 
4063  ALLOCATE (density_coeffs(sum(ri_data%bsizes_RI)))
4064  density_coeffs = 0.0
4065 
4066 !$OMP PARALLEL DEFAULT(NONE) &
4067 !$OMP SHARED(density_coeffs_t,ri_data,density_coeffs) &
4068 !$OMP PRIVATE(iter,ind,blk,found,idx)
4069  CALL dbt_iterator_start(iter, density_coeffs_t)
4070  DO WHILE (dbt_iterator_blocks_left(iter))
4071  CALL dbt_iterator_next_block(iter, ind)
4072  CALL dbt_get_block(density_coeffs_t, ind, blk, found)
4073  IF (found) THEN
4074 
4075  idx = sum(ri_data%bsizes_RI_split(1:ind(1) - 1))
4076 !$OMP CRITICAL
4077  density_coeffs(idx + 1:idx + ri_data%bsizes_RI_split(ind(1))) = blk(:, 1)
4078 !$OMP END CRITICAL
4079  DEALLOCATE (blk)
4080  END IF
4081  END DO
4082  CALL dbt_iterator_stop(iter)
4083 !$OMP END PARALLEL
4084  CALL para_env%sum(density_coeffs)
4085 
4086  CALL dbt_destroy(t2c_ri_ints)
4087  CALL dbt_destroy(t2c_ri_inv)
4088  CALL dbt_destroy(density_tmp)
4089  CALL dbt_destroy(rho_ao_t)
4090  CALL dbt_destroy(rho_ao_t_3d)
4091  CALL dbt_destroy(density_coeffs_t)
4092  CALL dbt_destroy(t_3c_int_batched(1, 1))
4093 
4094  CALL timestop(handle)
4095 
4096  END SUBROUTINE get_ri_density_coeffs
4097 
4098 ! **************************************************************************************************
4099 !> \brief a small utility function that returns the atom corresponding to a block of a split tensor
4100 !> \param idx_to_at ...
4101 !> \param bsizes_split ...
4102 !> \param bsizes_orig ...
4103 !> \return ...
4104 ! **************************************************************************************************
4105  SUBROUTINE get_idx_to_atom(idx_to_at, bsizes_split, bsizes_orig)
4106  INTEGER, DIMENSION(:), INTENT(INOUT) :: idx_to_at
4107  INTEGER, DIMENSION(:), INTENT(IN) :: bsizes_split, bsizes_orig
4108 
4109  INTEGER :: full_sum, iat, iblk, split_sum
4110 
4111  iat = 1
4112  full_sum = bsizes_orig(iat)
4113  split_sum = 0
4114  DO iblk = 1, SIZE(bsizes_split)
4115  split_sum = split_sum + bsizes_split(iblk)
4116 
4117  IF (split_sum .GT. full_sum) THEN
4118  iat = iat + 1
4119  full_sum = full_sum + bsizes_orig(iat)
4120  END IF
4121 
4122  idx_to_at(iblk) = iat
4123  END DO
4124 
4125  END SUBROUTINE get_idx_to_atom
4126 
4127 ! **************************************************************************************************
4128 !> \brief Function for calculating sqrt of a matrix
4129 !> \param values ...
4130 !> \return ...
4131 ! **************************************************************************************************
4132  FUNCTION my_sqrt(values)
4133  REAL(kind=dp), DIMENSION(:), INTENT(IN) :: values
4134  REAL(kind=dp), DIMENSION(SIZE(values)) :: my_sqrt
4135 
4136  my_sqrt = sqrt(values)
4137  END FUNCTION
4138 
4139 ! **************************************************************************************************
4140 !> \brief Function for calculation inverse sqrt of a matrix
4141 !> \param values ...
4142 !> \return ...
4143 ! **************************************************************************************************
4144  FUNCTION my_invsqrt(values)
4145  REAL(kind=dp), DIMENSION(:), INTENT(IN) :: values
4146  REAL(kind=dp), DIMENSION(SIZE(values)) :: my_invsqrt
4147 
4148  my_invsqrt = sqrt(1.0_dp/values)
4149  END FUNCTION
4150 END MODULE
static GRID_HOST_DEVICE double fac(const int i)
Factorial function, e.g. fac(5) = 5! = 120.
Definition: grid_common.h:48
static GRID_HOST_DEVICE int idx(const orbital a)
Return coset index of given orbital angular momentum.
Definition: grid_common.h:153
#define idx2(a, i, j)
#define idx3(a, i, j, k)
arnoldi iteration using dbcsr
Definition: arnoldi_api.F:15
subroutine, public arnoldi_extremal(matrix_a, max_ev, min_ev, converged, threshold, max_iter)
simple wrapper to estimate extremal eigenvalues with arnoldi, using the old lanczos interface this hi...
Definition: arnoldi_api.F:254
Define the atomic kind types and their sub types.
subroutine, public get_atomic_kind_set(atomic_kind_set, atom_of_kind, kind_of, natom_of_kind, maxatom, natom, nshell, fist_potential_present, shell_present, shell_adiabatic, shell_check_distance, damping_present)
Get attributes of an atomic kind set.
Handles all functions related to the CELL.
Definition: cell_types.F:15
methods related to the blacs parallel environment
Definition: cp_blacs_env.F:15
Defines control structures, which contain the parameters and the settings for the DFT-based calculati...
Interface to (sca)lapack for the Cholesky based procedures.
subroutine, public cp_dbcsr_cholesky_decompose(matrix, n, para_env, blacs_env)
used to replace a symmetric positive def. matrix M with its cholesky decomposition U: M = U^T * U,...
subroutine, public cp_dbcsr_cholesky_invert(matrix, n, para_env, blacs_env, upper_to_full)
used to replace the cholesky decomposition by the inverse
Interface to (sca)lapack for the Cholesky based procedures.
Definition: cp_dbcsr_diag.F:17
subroutine, public cp_dbcsr_power(matrix, exponent, threshold, n_dependent, para_env, blacs_env, verbose, eigenvectors, eigenvalues)
...
DBCSR operations in CP2K.
subroutine, public cp_dbcsr_dist2d_to_dist(dist2d, dist)
Creates a DBCSR distribution from a distribution_2d.
subroutine, public copy_dbcsr_to_fm(matrix, fm)
Copy a DBCSR matrix to a BLACS matrix.
subroutine, public copy_fm_to_dbcsr(fm, matrix, keep_sparsity)
Copy a BLACS matrix to a dbcsr matrix.
represent the structure of a full matrix
Definition: cp_fm_struct.F:14
subroutine, public cp_fm_struct_create(fmstruct, para_env, context, nrow_global, ncol_global, nrow_block, ncol_block, descriptor, first_p_pos, local_leading_dimension, template_fmstruct, square_blocks, force_block)
allocates and initializes a full matrix structure
Definition: cp_fm_struct.F:125
subroutine, public cp_fm_struct_release(fmstruct)
releases a full matrix structure
Definition: cp_fm_struct.F:320
represent a full matrix distributed on many processors
Definition: cp_fm_types.F:15
subroutine, public cp_fm_write_unformatted(fm, unit)
...
Definition: cp_fm_types.F:2147
subroutine, public cp_fm_create(matrix, matrix_struct, name, use_sp)
creates a new full matrix with the given structure
Definition: cp_fm_types.F:167
various routines to log and control the output. The idea is that decisions about where to log should ...
type(cp_logger_type) function, pointer, public cp_get_default_logger()
returns the default logger
routines to handle the output, The idea is to remove the decision of wheter to output and what to out...
integer function, public cp_print_key_unit_nr(logger, basis_section, print_key_path, extension, middle_name, local, log_filename, ignore_should_output, file_form, file_position, file_action, file_status, do_backup, on_file, is_new_file, mpi_io, fout)
...
subroutine, public cp_print_key_finished_output(unit_nr, logger, basis_section, print_key_path, local, ignore_should_output, on_file, mpi_io)
should be called after you finish working with a unit obtained with cp_print_key_unit_nr,...
integer, parameter, public cp_p_file
integer function, public cp_print_key_should_output(iteration_info, basis_section, print_key_path, used_print_key, first_time)
returns what should be done with the given property if btest(res,cp_p_store) then the property should...
This is the start of a dbt_api, all publically needed functions are exported here....
Definition: dbt_api.F:17
stores a mapping of 2D info (e.g. matrix) on a 2D processor distribution (i.e. blacs grid) where cpus...
RI-methods for HFX.
Definition: hfx_ri.F:12
subroutine, public hfx_ri_update_ks(qs_env, ri_data, ks_matrix, ehfx, mos, rho_ao, geometry_did_change, nspins, hf_fraction)
...
Definition: hfx_ri.F:1033
subroutine, public check_inverse(matrix_1, matrix_2, name, unit_nr)
...
Definition: hfx_ri.F:347
subroutine, public hfx_ri_pre_scf_calc_tensors(qs_env, ri_data, t_2c_int_RI, t_2c_int_pot, t_3c_int, do_kpoints)
Calculate 2-center and 3-center integrals.
Definition: hfx_ri.F:441
subroutine, public get_force_from_3c_trace(force, t_3c_contr, t_3c_der, atom_of_kind, kind_of, idx_to_at, pref, do_mp2, deriv_dim)
This routines calculates the force contribution from a trace over 3D tensors, i.e....
Definition: hfx_ri.F:3360
subroutine, public get_idx_to_atom(idx_to_at, bsizes_split, bsizes_orig)
a small utility function that returns the atom corresponding to a block of a split tensor
Definition: hfx_ri.F:4106
subroutine, public hfx_ri_update_forces(qs_env, ri_data, nspins, hf_fraction, rho_ao, rho_ao_resp, mos, use_virial, resp_only, rescale_factor)
the general routine that calls the relevant force code
Definition: hfx_ri.F:3036
subroutine, public print_ri_hfx(ri_data, qs_env)
Print RI-HFX quantities, as required by the PRINT subsection.
Definition: hfx_ri.F:3619
subroutine, public get_2c_der_force(force, t_2c_contr, t_2c_der, atom_of_kind, kind_of, idx_to_at, pref, do_mp2, do_ovlp)
Update the forces due to the derivative of the a 2-center product d/dR (Q|R)
Definition: hfx_ri.F:3446
Types and set/get functions for HFX.
Definition: hfx_types.F:15
subroutine, public dealloc_containers(DATA, memory_usage)
...
Definition: hfx_types.F:2874
subroutine, public alloc_containers(DATA, bin_size)
...
Definition: hfx_types.F:2906
subroutine, public hfx_ri_init(ri_data, qs_kind_set, particle_set, atomic_kind_set, para_env)
...
Definition: hfx_types.F:1199
subroutine, public hfx_ri_release(ri_data, write_stats)
...
Definition: hfx_types.F:1464
collects all constants needed in input so that they can be used without circular dependencies
integer, parameter, public hfx_ri_do_2c_cholesky
integer, parameter, public hfx_ri_do_2c_diag
integer, parameter, public hfx_ri_do_2c_iter
function that builds the hartree fock exchange section of the input
integer, parameter, public ri_pmat
integer, parameter, public ri_mo
objects that represent the structure of input sections and the data contained in an input section
recursive type(section_vals_type) function, pointer, public section_vals_get_subs_vals(section_vals, subsection_name, i_rep_section, can_return_null)
returns the values of the requested subsection
subroutine, public section_vals_val_get(section_vals, keyword_name, i_rep_section, i_rep_val, n_rep_val, val, l_val, i_val, r_val, c_val, l_vals, i_vals, r_vals, c_vals, explicit)
returns the requested value
Routines useful for iterative matrix calculations.
subroutine, public matrix_sqrt_newton_schulz(matrix_sqrt, matrix_sqrt_inv, matrix, threshold, order, eps_lanczos, max_iter_lanczos, symmetrize, converged)
compute the sqrt of a matrix via the sign function and the corresponding Newton-Schulz iterations the...
subroutine, public invert_hotelling(matrix_inverse, matrix, threshold, use_inv_as_guess, norm_convergence, filter_eps, accelerator_order, max_iter_lanczos, eps_lanczos, silent)
invert a symmetric positive definite matrix by Hotelling's method explicit symmetrization makes this ...
Defines the basic variable types.
Definition: kinds.F:23
integer, parameter, public int_8
Definition: kinds.F:54
integer, parameter, public dp
Definition: kinds.F:34
integer, parameter, public default_string_length
Definition: kinds.F:57
Machine interface based on Fortran 2003 and POSIX.
Definition: machine.F:17
real(kind=dp) function, public m_walltime()
returns time from a real-time clock, protected against rolling early/easily
Definition: machine.F:123
Interface to the message passing library MPI.
Define methods related to particle_type.
subroutine, public get_particle_set(particle_set, qs_kind_set, first_sgf, last_sgf, nsgf, nmao, basis)
Get the components of a particle set.
Define the data structure for the particle information.
subroutine, public get_qs_env(qs_env, atomic_kind_set, qs_kind_set, cell, super_cell, cell_ref, use_ref_cell, kpoints, dft_control, mos, sab_orb, sab_all, qmmm, qmmm_periodic, sac_ae, sac_ppl, sac_lri, sap_ppnl, sab_vdw, sab_scp, sap_oce, sab_lrc, sab_se, sab_xtbe, sab_tbe, sab_core, sab_xb, sab_xtb_nonbond, sab_almo, sab_kp, sab_kp_nosym, particle_set, energy, force, matrix_h, matrix_h_im, matrix_ks, matrix_ks_im, matrix_vxc, run_rtp, rtp, matrix_h_kp, matrix_h_im_kp, matrix_ks_kp, matrix_ks_im_kp, matrix_vxc_kp, kinetic_kp, matrix_s_kp, matrix_w_kp, matrix_s_RI_aux_kp, matrix_s, matrix_s_RI_aux, matrix_w, matrix_p_mp2, matrix_p_mp2_admm, rho, rho_xc, pw_env, ewald_env, ewald_pw, active_space, mpools, input, para_env, blacs_env, scf_control, rel_control, kinetic, qs_charges, vppl, rho_core, rho_nlcc, rho_nlcc_g, ks_env, ks_qmmm_env, wf_history, scf_env, local_particles, local_molecules, distribution_2d, dbcsr_dist, molecule_kind_set, molecule_set, subsys, cp_subsys, oce, local_rho_set, rho_atom_set, task_list, task_list_soft, rho0_atom_set, rho0_mpole, rhoz_set, ecoul_1c, rho0_s_rs, rho0_s_gs, do_kpoints, has_unit_metric, requires_mo_derivs, mo_derivs, mo_loc_history, nkind, natom, nelectron_total, nelectron_spin, efield, neighbor_list_id, linres_control, xas_env, virial, cp_ddapc_env, cp_ddapc_ewald, outer_scf_history, outer_scf_ihistory, x_data, et_coupling, dftb_potential, results, se_taper, se_store_int_env, se_nddo_mpole, se_nonbond_env, admm_env, lri_env, lri_density, exstate_env, ec_env, dispersion_env, gcp_env, vee, rho_external, external_vxc, mask, mp2_env, bs_env, kg_env, WannierCentres, atprop, ls_scf_env, do_transport, transport_env, v_hartree_rspace, s_mstruct_changed, rho_changed, potential_changed, forces_up_to_date, mscfg_env, almo_scf_env, gradient_history, variable_history, embed_pot, spin_embed_pot, polar_env, mos_last_converged, rhs)
Get the QUICKSTEP environment.
Some utility functions for the calculation of integrals.
subroutine, public basis_set_list_setup(basis_set_list, basis_type, qs_kind_set)
Set up an easy accessible list of the basis sets for all kinds.
Calculate the interaction radii for the operator matrix calculation.
subroutine, public init_interaction_radii_orb_basis(orb_basis_set, eps_pgf_orb, eps_pgf_short)
...
Define the quickstep kind type and their sub types.
Definition: qs_kind_types.F:23
Definition and initialisation of the mo data type.
Definition: qs_mo_types.F:22
subroutine, public get_mo_set(mo_set, maxocc, homo, lfomo, nao, nelectron, n_el_f, nmo, eigenvalues, occupation_numbers, mo_coeff, mo_coeff_b, uniform_occupation, kTS, mu, flexible_electron_count)
Get the components of a MO set data structure.
Definition: qs_mo_types.F:397
Define the neighbor list data types and the corresponding functionality.
subroutine, public release_neighbor_list_sets(nlists)
releases an array of neighbor_list_sets
superstucture that hold various representations of the density and keeps track of which ones are vali...
Definition: qs_rho_types.F:18
subroutine, public qs_rho_get(rho_struct, rho_ao, rho_ao_im, rho_ao_kp, rho_ao_im_kp, rho_r, drho_r, rho_g, drho_g, tau_r, tau_g, rho_r_valid, drho_r_valid, rho_g_valid, drho_g_valid, tau_r_valid, tau_g_valid, tot_rho_r, tot_rho_g, rho_r_sccs, soft_valid, complex_rho_ao)
returns info about the density described by this object. If some representation is not available an e...
Definition: qs_rho_types.F:229
Utility methods to build 3-center integral tensors of various types.
subroutine, public distribution_3d_create(dist_3d, dist1, dist2, dist3, nkind, particle_set, mp_comm_3d, own_comm)
Create a 3d distribution.
subroutine, public create_2c_tensor(t2c, dist_1, dist_2, pgrid, sizes_1, sizes_2, order, name)
...
subroutine, public split_block_sizes(blk_sizes, blk_sizes_split, max_size)
...
subroutine, public create_tensor_batches(sizes, nbatches, starts_array, ends_array, starts_array_block, ends_array_block)
...
subroutine, public create_3c_tensor(t3c, dist_1, dist_2, dist_3, pgrid, sizes_1, sizes_2, sizes_3, map1, map2, name)
...
Utility methods to build 3-center integral tensors of various types.
Definition: qs_tensors.F:11
subroutine, public calc_3c_virial(work_virial, t3c_trace, pref, qs_env, nl_3c, basis_i, basis_j, basis_k, potential_parameter, der_eps, op_pos)
Calculates the 3c virial contributions on the fly.
Definition: qs_tensors.F:1868
subroutine, public build_2c_neighbor_lists(ij_list, basis_i, basis_j, potential_parameter, name, qs_env, sym_ij, molecular, dist_2d, pot_to_rad)
Build 2-center neighborlists adapted to different operators This mainly wraps build_neighbor_lists fo...
Definition: qs_tensors.F:143
subroutine, public build_3c_derivatives(t3c_der_i, t3c_der_k, filter_eps, qs_env, nl_3c, basis_i, basis_j, basis_k, potential_parameter, der_eps, op_pos, do_kpoints, do_hfx_kpoints, bounds_i, bounds_j, bounds_k, RI_range, img_to_RI_cell)
Build 3-center derivative tensors.
Definition: qs_tensors.F:1147
subroutine, public compress_tensor(tensor, blk_indices, compressed, eps, memory)
...
Definition: qs_tensors.F:3666
subroutine, public neighbor_list_3c_destroy(ijk_list)
Destroy 3c neighborlist.
Definition: qs_tensors.F:383
subroutine, public calc_2c_virial(work_virial, t2c_trace, pref, qs_env, nl_2c, basis_i, basis_j, potential_parameter)
Calculates the virial coming from 2c derivatives on the fly.
Definition: qs_tensors.F:3232
subroutine, public decompress_tensor(tensor, blk_indices, compressed, eps)
...
Definition: qs_tensors.F:3794
subroutine, public build_2c_derivatives(t2c_der, filter_eps, qs_env, nl_2c, basis_i, basis_j, potential_parameter, do_kpoints)
Calculates the derivatives of 2-center integrals, wrt to the first center.
Definition: qs_tensors.F:2989
subroutine, public get_tensor_occupancy(tensor, nze, occ)
...
Definition: qs_tensors.F:3885
subroutine, public build_3c_neighbor_lists(ijk_list, basis_i, basis_j, basis_k, dist_3d, potential_parameter, name, qs_env, sym_ij, sym_jk, sym_ik, molecular, op_pos, own_dist)
Build a 3-center neighbor list.
Definition: qs_tensors.F:282
subroutine, public build_2c_integrals(t2c, filter_eps, qs_env, nl_2c, basis_i, basis_j, potential_parameter, do_kpoints, do_hfx_kpoints, ext_kpoints, regularization_RI)
...
Definition: qs_tensors.F:3412
subroutine, public build_3c_integrals(t3c, filter_eps, qs_env, nl_3c, basis_i, basis_j, basis_k, potential_parameter, int_eps, op_pos, do_kpoints, do_hfx_kpoints, desymmetrize, bounds_i, bounds_j, bounds_k, RI_range, img_to_RI_cell)
Build 3-center integral tensor.
Definition: qs_tensors.F:2376
All kind of helpful little routines.
Definition: util.F:14