d5/d38/xas__tdp__kernel_8F_source.html

!--------------------------------------------------------------------------------------------------!

!   CP2K: A general program to perform molecular dynamics simulations                              !

!   Copyright 2000-2025 CP2K developers group <https://cp2k.org>                                   !

!                                                                                                  !

!   SPDX-License-Identifier: GPL-2.0-or-later                                                      !

!--------------------------------------------------------------------------------------------------!


! **************************************************************************************************

!> \brief All the kernel specific subroutines for XAS TDP calculations

!> \author A. Bussy (03.2019)

! **************************************************************************************************


MODULE xas_tdp_kernel

   USE basis_set_types,                 ONLY: gto_basis_set_p_type

   USE cp_dbcsr_api,                    ONLY: &

        dbcsr_add, dbcsr_complete_redistribute, dbcsr_copy, dbcsr_create, dbcsr_desymmetrize, &

        dbcsr_distribution_get, dbcsr_distribution_new, dbcsr_distribution_release, &

        dbcsr_distribution_type, dbcsr_filter, dbcsr_finalize, dbcsr_get_block_p, dbcsr_get_info, &

        dbcsr_get_num_blocks, dbcsr_get_stored_coordinates, dbcsr_iterator_blocks_left, &

        dbcsr_iterator_next_block, dbcsr_iterator_start, dbcsr_iterator_stop, dbcsr_iterator_type, &

        dbcsr_multiply, dbcsr_p_type, dbcsr_put_block, dbcsr_release, dbcsr_reserve_blocks, &

        dbcsr_set, dbcsr_transposed, dbcsr_type, dbcsr_type_no_symmetry, dbcsr_type_symmetric

   USE cp_dbcsr_operations,             ONLY: cp_dbcsr_dist2d_to_dist,&

                                              dbcsr_deallocate_matrix_set

   USE dbt_api,                         ONLY: dbt_get_block,&

                                              dbt_iterator_blocks_left,&

                                              dbt_iterator_next_block,&

                                              dbt_iterator_start,&

                                              dbt_iterator_stop,&

                                              dbt_iterator_type,&

                                              dbt_type

   USE distribution_2d_types,           ONLY: distribution_2d_type

   USE kinds,                           ONLY: dp

   USE message_passing,                 ONLY: mp_para_env_type

   USE particle_methods,                ONLY: get_particle_set

   USE particle_types,                  ONLY: particle_type

   USE qs_environment_types,            ONLY: get_qs_env,&

                                              qs_environment_type

   USE qs_integral_utils,               ONLY: basis_set_list_setup

   USE qs_kind_types,                   ONLY: qs_kind_type

   USE util,                            ONLY: get_limit

   USE xas_tdp_types,                   ONLY: donor_state_type,&

                                              get_proc_batch_sizes,&

                                              xas_tdp_control_type,&

                                              xas_tdp_env_type


!$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num

#include "./base/base_uses.f90"


   IMPLICIT NONE

   PRIVATE


   CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'xas_tdp_kernel'


   PUBLIC :: kernel_coulomb_xc, kernel_exchange, contract2_ao_to_domo, &

             reserve_contraction_blocks, ri_all_blocks_mm


CONTAINS


! **************************************************************************************************

!> \brief Computes, if asked for it, the Coulomb and XC kernel matrices, in the usuall matrix format

!> \param coul_ker pointer the the Coulomb kernel matrix (can be void pointer)

!> \param xc_ker array of pointer to the different xc kernels (5 of them):

!>               1) the restricted closed-shell singlet kernel

!>               2) the restricted closed-shell triplet kernel

!>               3) the spin-conserving open-shell xc kernel

!>               4) the on-diagonal spin-flip open-shell xc kernel

!> \param donor_state ...

!> \param xas_tdp_env ...

!> \param xas_tdp_control ...

!> \param qs_env ...

!> \note Coulomb and xc kernel are put together in the same routine because they use the same RI

!>       Coulomb: (aI|Jb) = (aI|P) (P|Q)^-1 (Q|Jb)

!>       XC : (aI|fxc|Jb) = (aI|P) (P|Q)^-1 (Q|fxc|R) (R|S)^-1 (S|Jb)

!>       In the above formula, a,b label the sgfs

!>       The routine analyses the xas_tdp_control to know which kernel must be computed and how

!>       (open-shell, singlet, triplet, ROKS, LSD, etc...)

!>       On entry, the pointers should be allocated

! **************************************************************************************************


   SUBROUTINE kernel_coulomb_xc(coul_ker, xc_ker, donor_state, xas_tdp_env, xas_tdp_control, qs_env)


      TYPE(dbcsr_type), INTENT(INOUT)                    :: coul_ker

      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: xc_ker

      TYPE(donor_state_type), POINTER                    :: donor_state

      TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env

      TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control

      TYPE(qs_environment_type), POINTER                 :: qs_env


      CHARACTER(len=*), PARAMETER                        :: routinen = 'kernel_coulomb_xc'


      INTEGER                                            :: batch_size, bo(2), handle, i, ibatch, &

                                                            iex, lb, natom, nbatch, ndo_mo, &

                                                            ndo_so, nex_atom, nsgfp, ri_atom, &

                                                            source, ub

      INTEGER, DIMENSION(:), POINTER                     :: blk_size

      LOGICAL                                            :: do_coulomb, do_sc, do_sf, do_sg, do_tp, &

                                                            do_xc, found

      REAL(dp), DIMENSION(:, :), POINTER                 :: pq

      TYPE(dbcsr_distribution_type), POINTER             :: dist

      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: contr1_int

      TYPE(mp_para_env_type), POINTER                    :: para_env


      NULLIFY (contr1_int, pq, para_env, dist, blk_size)


!  Initialization

      ndo_mo = donor_state%ndo_mo

      do_xc = xas_tdp_control%do_xc

      do_sg = xas_tdp_control%do_singlet

      do_tp = xas_tdp_control%do_triplet

      do_sc = xas_tdp_control%do_spin_cons

      do_sf = xas_tdp_control%do_spin_flip

      ndo_so = ndo_mo; IF (xas_tdp_control%do_uks) ndo_so = 2*ndo_mo

      ri_atom = donor_state%at_index

      CALL get_qs_env(qs_env, natom=natom, para_env=para_env)

      do_coulomb = xas_tdp_control%do_coulomb

      dist => donor_state%dbcsr_dist

      blk_size => donor_state%blk_size


!  If no Coulomb nor xc, simply exit

      IF ((.NOT. do_coulomb) .AND. (.NOT. do_xc)) RETURN


      CALL timeset(routinen, handle)


!  Contract the RI 3-center integrals once to get (aI|P)

      CALL contract2_ao_to_domo(contr1_int, "COULOMB", donor_state, xas_tdp_env, xas_tdp_control, qs_env)


!  Deal with the Coulomb case

      IF (do_coulomb) CALL coulomb(coul_ker, contr1_int, dist, blk_size, xas_tdp_env, &

                                   xas_tdp_control, qs_env)


!  Deal with the XC case

      IF (do_xc) THEN


         ! In the end, we compute: (aI|fxc|Jb) = (aI|P) (P|Q)^-1 (Q|fxc|R) (R|S)^-1 (S|Jb)

         ! where fxc can take different spin contributions.


         ! Precompute the product (aI|P) * (P|Q)^-1 and store it in contr1_int

         pq => xas_tdp_env%ri_inv_coul

         CALL ri_all_blocks_mm(contr1_int, pq)


         ! If not already done (e.g. when multpile donor states for a given excited atom), broadcast

         ! the RI matrix (Q|fxc|R) on all procs

         IF (.NOT. xas_tdp_env%fxc_avail) THEN

            ! Find on which processor the integrals (Q|fxc|R) for this atom are stored

            nsgfp = SIZE(pq, 1)

            CALL get_qs_env(qs_env, para_env=para_env)

            found = .false.

            nex_atom = SIZE(xas_tdp_env%ex_atom_indices)

            CALL get_proc_batch_sizes(batch_size, nbatch, nex_atom, para_env%num_pe)


            DO ibatch = 0, nbatch - 1


               bo = get_limit(nex_atom, nbatch, ibatch)

               DO iex = bo(1), bo(2)


                  IF (xas_tdp_env%ex_atom_indices(iex) == ri_atom) THEN

                     source = ibatch*batch_size

                     found = .true. !but simply take the first

                     EXIT

                  END IF

               END DO !iex

               IF (found) EXIT

            END DO !ip


            ! Broadcast the integrals to all procs (deleted after all donor states for this atoms are treated)

            lb = 1; IF (do_sf .AND. .NOT. do_sc) lb = 4

            ub = 2; IF (do_sc) ub = 3; IF (do_sf) ub = 4

            DO i = lb, ub

               IF (.NOT. ASSOCIATED(xas_tdp_env%ri_fxc(ri_atom, i)%array)) THEN

                  ALLOCATE (xas_tdp_env%ri_fxc(ri_atom, i)%array(nsgfp, nsgfp))

               END IF

               CALL para_env%bcast(xas_tdp_env%ri_fxc(ri_atom, i)%array, source)

            END DO


            xas_tdp_env%fxc_avail = .true.

         END IF


         ! Case study on the calculation type

         IF (do_sg .OR. do_tp) THEN

            CALL rcs_xc(xc_ker(1)%matrix, xc_ker(2)%matrix, contr1_int, dist, blk_size, &

                        donor_state, xas_tdp_env, xas_tdp_control, qs_env)

         END IF


         IF (do_sc) THEN

            CALL sc_os_xc(xc_ker(3)%matrix, contr1_int, dist, blk_size, donor_state, &

                          xas_tdp_env, xas_tdp_control, qs_env)

         END IF


         IF (do_sf) THEN

            CALL ondiag_sf_os_xc(xc_ker(4)%matrix, contr1_int, dist, blk_size, donor_state, &

                                 xas_tdp_env, xas_tdp_control, qs_env)

         END IF


      END IF ! do_xc


!  Clean-up

      CALL dbcsr_deallocate_matrix_set(contr1_int)


      CALL timestop(handle)


   END SUBROUTINE kernel_coulomb_xc


! **************************************************************************************************

!> \brief Create the matrix containing the Coulomb kernel, which is:

!>        (aI_sigma|J_tau b) ~= (aI_sigma|P) * (P|Q) * (Q|J_tau b)

!> \param coul_ker the Coulomb kernel

!> \param contr1_int the once contracted RI integrals (aI|P)

!> \param dist the inherited dbcsr ditribution

!> \param blk_size the inherited block sizes

!> \param xas_tdp_env ...

!> \param xas_tdp_control ...

!> \param qs_env ...

! **************************************************************************************************

   SUBROUTINE coulomb(coul_ker, contr1_int, dist, blk_size, xas_tdp_env, xas_tdp_control, qs_env)


      TYPE(dbcsr_type), INTENT(INOUT)                    :: coul_ker

      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: contr1_int

      TYPE(dbcsr_distribution_type), POINTER             :: dist

      INTEGER, DIMENSION(:), POINTER                     :: blk_size

      TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env

      TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control

      TYPE(qs_environment_type), POINTER                 :: qs_env


      LOGICAL                                            :: quadrants(3)

      REAL(dp), DIMENSION(:, :), POINTER                 :: pq

      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: lhs_int, rhs_int

      TYPE(dbcsr_type)                                   :: work_mat


      NULLIFY (pq, rhs_int, lhs_int)


      ! Get the inver RI coulomb

      pq => xas_tdp_env%ri_inv_coul


      ! Create a normal type work matrix

      CALL dbcsr_create(work_mat, name="WORK", matrix_type=dbcsr_type_no_symmetry, dist=dist, &

                        row_blk_size=blk_size, col_blk_size=blk_size)


      ! Compute the product (aI|P) * (P|Q)^-1 * (Q|Jb) = (aI|Jb)

      rhs_int => contr1_int ! the incoming contr1_int is not modified

      ALLOCATE (lhs_int(SIZE(contr1_int)))

      CALL copy_ri_contr_int(lhs_int, rhs_int) ! RHS containts (Q|JB)^T

      CALL ri_all_blocks_mm(lhs_int, pq) ! LHS contatins (aI|P)*(P|Q)^-1


      !In the special case of ROKS, same MOs for each spin => put same (aI|Jb) product on the

      !alpha-alpha, alpha-beta and beta-beta quadrants of the kernel matrix.

      IF (xas_tdp_control%do_roks) THEN

         quadrants = [.true., .true., .true.]

      ELSE

         quadrants = [.true., .false., .false.]

      END IF

      CALL ri_int_product(work_mat, lhs_int, rhs_int, quadrants, qs_env, &

                          eps_filter=xas_tdp_control%eps_filter)

      CALL dbcsr_finalize(work_mat)


      !Create the symmetric kernel matrix and redistribute work_mat into it

      CALL dbcsr_create(coul_ker, name="COULOMB KERNEL", matrix_type=dbcsr_type_symmetric, dist=dist, &

                        row_blk_size=blk_size, col_blk_size=blk_size)

      CALL dbcsr_complete_redistribute(work_mat, coul_ker)


      !clean-up

      CALL dbcsr_release(work_mat)

      CALL dbcsr_deallocate_matrix_set(lhs_int)


   END SUBROUTINE coulomb


! **************************************************************************************************

!> \brief Create the matrix containing the XC kenrel in the spin-conserving open-shell case:

!>        (aI_sigma|fxc|J_tau b) ~= (aI_sigma|P) (P|Q)^-1 (Q|fxc|R) (R|S)^-1 (S|J_tau b)

!> \param xc_ker the kernel matrix

!> \param contr1_int_PQ the once contracted RI integrals, with inverse coulomb: (aI_sigma|P) (P|Q)^-1

!> \param dist inherited dbcsr dist

!> \param blk_size inherited block sizes

!> \param donor_state ...

!> \param xas_tdp_env ...

!> \param xas_tdp_control ...

!> \param qs_env ...

!> note Prior to calling this function, the (Q|fxc|R) integral must be brodcasted to all procs

! **************************************************************************************************

   SUBROUTINE sc_os_xc(xc_ker, contr1_int_PQ, dist, blk_size, donor_state, xas_tdp_env, &

                       xas_tdp_control, qs_env)


      TYPE(dbcsr_type), INTENT(INOUT)                    :: xc_ker

      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: contr1_int_pq

      TYPE(dbcsr_distribution_type), POINTER             :: dist

      INTEGER, DIMENSION(:), POINTER                     :: blk_size

      TYPE(donor_state_type), POINTER                    :: donor_state

      TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env

      TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control

      TYPE(qs_environment_type), POINTER                 :: qs_env


      INTEGER                                            :: ndo_mo, ri_atom

      LOGICAL                                            :: quadrants(3)

      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: lhs_int, rhs_int

      TYPE(dbcsr_type)                                   :: work_mat


      NULLIFY (lhs_int, rhs_int)


      ! Initialization

      ndo_mo = donor_state%ndo_mo

      ri_atom = donor_state%at_index

      !normal type work matrix such that distribution of all spin quadrants match

      CALL dbcsr_create(work_mat, name="WORK", matrix_type=dbcsr_type_no_symmetry, dist=dist, &

                        row_blk_size=blk_size, col_blk_size=blk_size)


      rhs_int => contr1_int_pq ! contains [ (aI|P)*(P|Q)^-1 ]^T

      ALLOCATE (lhs_int(SIZE(contr1_int_pq))) ! will contain (aI|P)*(P|Q)^-1 * (Q|fxc|R)


      ! Case study: UKS or ROKS ?

      IF (xas_tdp_control%do_uks) THEN


         ! In the case of UKS, donor MOs might be different for different spins. Moreover, the

         ! fxc itself might change since fxc = fxc_sigma,tau

         ! => Carfully treat each spin-quadrant separately


         ! alpha-alpha spin quadrant (upper-lefet)

         quadrants = [.true., .false., .false.]


         ! Copy the alpha part into lhs_int, multiply by the alpha-alpha (Q|fxc|R) and then

         ! by the alpha part of rhs_int

         CALL copy_ri_contr_int(lhs_int(1:ndo_mo), rhs_int(1:ndo_mo))

         CALL ri_all_blocks_mm(lhs_int(1:ndo_mo), xas_tdp_env%ri_fxc(ri_atom, 1)%array)

         CALL ri_int_product(work_mat, lhs_int(1:ndo_mo), rhs_int(1:ndo_mo), quadrants, qs_env, &

                             eps_filter=xas_tdp_control%eps_filter)


         ! alpha-beta spin quadrant (upper-right)

         quadrants = [.false., .true., .false.]


         !Copy the alpha part into LHS, multiply by the alpha-beta kernel and the beta part of RHS

         CALL copy_ri_contr_int(lhs_int(1:ndo_mo), rhs_int(1:ndo_mo))

         CALL ri_all_blocks_mm(lhs_int(1:ndo_mo), xas_tdp_env%ri_fxc(ri_atom, 2)%array)

         CALL ri_int_product(work_mat, lhs_int(1:ndo_mo), rhs_int(ndo_mo + 1:2*ndo_mo), &

                             quadrants, qs_env, eps_filter=xas_tdp_control%eps_filter)


         ! beta-beta spin quadrant (lower-right)

         quadrants = [.false., .false., .true.]


         !Copy the beta part into LHS, multiply by the beta-beta kernel and the beta part of RHS

         CALL copy_ri_contr_int(lhs_int(ndo_mo + 1:2*ndo_mo), rhs_int(ndo_mo + 1:2*ndo_mo))

         CALL ri_all_blocks_mm(lhs_int(ndo_mo + 1:2*ndo_mo), xas_tdp_env%ri_fxc(ri_atom, 3)%array)

         CALL ri_int_product(work_mat, lhs_int(ndo_mo + 1:2*ndo_mo), rhs_int(ndo_mo + 1:2*ndo_mo), &

                             quadrants, qs_env, eps_filter=xas_tdp_control%eps_filter)


      ELSE IF (xas_tdp_control%do_roks) THEN


         ! In the case of ROKS, fxc = fxc_sigma,tau is different for each spin quadrant, but the

         ! donor MOs remain the same


         ! alpha-alpha kernel in the upper left quadrant

         quadrants = [.true., .false., .false.]


         !Copy the LHS and multiply by alpha-alpha kernel

         CALL copy_ri_contr_int(lhs_int, rhs_int)

         CALL ri_all_blocks_mm(lhs_int, xas_tdp_env%ri_fxc(ri_atom, 1)%array)

         CALL ri_int_product(work_mat, lhs_int, rhs_int, quadrants, qs_env, &

                             eps_filter=xas_tdp_control%eps_filter)


         ! alpha-beta kernel in the upper-right quadrant

         quadrants = [.false., .true., .false.]


         !Copy LHS and multiply by the alpha-beta kernel

         CALL copy_ri_contr_int(lhs_int, rhs_int)

         CALL ri_all_blocks_mm(lhs_int, xas_tdp_env%ri_fxc(ri_atom, 2)%array)

         CALL ri_int_product(work_mat, lhs_int, rhs_int, quadrants, qs_env, &

                             eps_filter=xas_tdp_control%eps_filter)


         ! beta-beta kernel in the lower-right quadrant

         quadrants = [.false., .false., .true.]


         !Copy the LHS and multiply by the beta-beta kernel

         CALL copy_ri_contr_int(lhs_int, rhs_int)

         CALL ri_all_blocks_mm(lhs_int, xas_tdp_env%ri_fxc(ri_atom, 3)%array)

         CALL ri_int_product(work_mat, lhs_int, rhs_int, quadrants, qs_env, &

                             eps_filter=xas_tdp_control%eps_filter)


      END IF

      CALL dbcsr_finalize(work_mat)


      ! Create a symmetric kernel matrix and redistribute the normal work matrix into it

      CALL dbcsr_create(xc_ker, name="SC OS XC KERNEL", matrix_type=dbcsr_type_symmetric, dist=dist, &

                        row_blk_size=blk_size, col_blk_size=blk_size)

      CALL dbcsr_complete_redistribute(work_mat, xc_ker)


      !clean-up

      CALL dbcsr_deallocate_matrix_set(lhs_int)

      CALL dbcsr_release(work_mat)


   END SUBROUTINE sc_os_xc


! **************************************************************************************************

!> \brief Create the matrix containing the on-diagonal spin-flip XC kernel (open-shell), which is:

!>        (a I_sigma|fxc|J_tau b) * delta_sigma,tau, fxc = 1/(rhoa-rhob) * (dE/drhoa - dE/drhob)

!>        with RI: (a I_sigma|fxc|J_tau b) ~= (aI_sigma|P) (P|Q)^-1 (Q|fxc|R) (R|S)^-1 (S|J_tau b)

!> \param xc_ker the kernel matrix

!> \param contr1_int_PQ the once contracted RI integrals with coulomb product: (aI_sigma|P) (P|Q)^-1

!> \param dist inherited dbcsr dist

!> \param blk_size inherited block sizes

!> \param donor_state ...

!> \param xas_tdp_env ...

!> \param xas_tdp_control ...

!> \param qs_env ...

!> \note  It must be later on multiplied by the spin-swapped Q projector

!>        Prior to calling this function, the (Q|fxc|R) integral must be brodcasted to all procs

! **************************************************************************************************

   SUBROUTINE ondiag_sf_os_xc(xc_ker, contr1_int_PQ, dist, blk_size, donor_state, xas_tdp_env, &

                              xas_tdp_control, qs_env)


      TYPE(dbcsr_type), INTENT(INOUT)                    :: xc_ker

      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: contr1_int_pq

      TYPE(dbcsr_distribution_type), POINTER             :: dist

      INTEGER, DIMENSION(:), POINTER                     :: blk_size

      TYPE(donor_state_type), POINTER                    :: donor_state

      TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env

      TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control

      TYPE(qs_environment_type), POINTER                 :: qs_env


      INTEGER                                            :: ndo_mo, ri_atom

      LOGICAL                                            :: quadrants(3)

      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: lhs_int, rhs_int

      TYPE(dbcsr_type)                                   :: work_mat


      NULLIFY (lhs_int, rhs_int)


      ! Initialization

      ndo_mo = donor_state%ndo_mo

      ri_atom = donor_state%at_index

      !normal type work matrix such that distribution of all spin quadrants match

      CALL dbcsr_create(work_mat, name="WORK", matrix_type=dbcsr_type_no_symmetry, dist=dist, &

                        row_blk_size=blk_size, col_blk_size=blk_size)


      !Create a lhs_int, in which the whole (aI_sigma|P) (P|Q)^-1 (Q|fxc|R) will be put

      !because in spin-flip fxc is spin-independent, can take the product once and for all

      rhs_int => contr1_int_pq

      ALLOCATE (lhs_int(SIZE(contr1_int_pq)))

      CALL copy_ri_contr_int(lhs_int, rhs_int)

      CALL ri_all_blocks_mm(lhs_int, xas_tdp_env%ri_fxc(ri_atom, 4)%array)


      ! Case study: UKS or ROKS ?

      IF (xas_tdp_control%do_uks) THEN


         ! In the case of UKS, donor MOs might be different for different spins

         ! => Carfully treat each spin-quadrant separately

         ! NO alpha-beta because of the delta_sigma,tau


         ! alpha-alpha spin quadrant (upper-lefet)

         quadrants = [.true., .false., .false.]

         CALL ri_int_product(work_mat, lhs_int(1:ndo_mo), rhs_int(1:ndo_mo), quadrants, qs_env, &

                             eps_filter=xas_tdp_control%eps_filter)


         ! beta-beta spin quadrant (lower-right)

         quadrants = [.false., .false., .true.]

         CALL ri_int_product(work_mat, lhs_int(ndo_mo + 1:2*ndo_mo), rhs_int(ndo_mo + 1:2*ndo_mo), &

                             quadrants, qs_env, eps_filter=xas_tdp_control%eps_filter)


      ELSE IF (xas_tdp_control%do_roks) THEN


         ! In the case of ROKS, same donor MOs for both spins => can do it all at once

         ! But NOT the alpha-beta quadrant because of delta_sigma,tau


         quadrants = [.true., .false., .true.]

         CALL ri_int_product(work_mat, lhs_int, rhs_int, quadrants, qs_env, &

                             eps_filter=xas_tdp_control%eps_filter)


      END IF

      CALL dbcsr_finalize(work_mat)


      ! Create a symmetric kernel matrix and redistribute the normal work matrix into it

      CALL dbcsr_create(xc_ker, name="ON-DIAG SF OS XC KERNEL", matrix_type=dbcsr_type_symmetric, &

                        dist=dist, row_blk_size=blk_size, col_blk_size=blk_size)

      CALL dbcsr_complete_redistribute(work_mat, xc_ker)


      !clean-up

      CALL dbcsr_deallocate_matrix_set(lhs_int)

      CALL dbcsr_release(work_mat)


   END SUBROUTINE ondiag_sf_os_xc


! **************************************************************************************************

!> \brief Create the matrix containing the XC kernel in the restricted closed-shell case, for

!>        singlets: (aI|fxc|Jb) = (aI|P) (P|Q)^-1 (Q|fxc_alp,alp+fxc_alp,bet|R) (R|S)^-1 (S|Jb)

!>        triplets: (aI|fxc|Jb) = (aI|P) (P|Q)^-1 (Q|fxc_alp,alp-fxc_alp,bet|R) (R|S)^-1 (S|Jb)

!> \param sg_xc_ker the singlet kernel matrix

!> \param tp_xc_ker the triplet kernel matrix

!> \param contr1_int_PQ the once contracted RI integrals including inverse 2-center Coulomb prodcut:

!>                      (aI|P)*(P|Q)^-1

!> \param dist inherited dbcsr dist

!> \param blk_size inherited block sizes

!> \param donor_state ...

!> \param xas_tdp_env ...

!> \param xas_tdp_control ...

!> \param qs_env ...

! **************************************************************************************************

   SUBROUTINE rcs_xc(sg_xc_ker, tp_xc_ker, contr1_int_PQ, dist, blk_size, donor_state, &

                     xas_tdp_env, xas_tdp_control, qs_env)


      TYPE(dbcsr_type), INTENT(INOUT)                    :: sg_xc_ker, tp_xc_ker

      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: contr1_int_pq

      TYPE(dbcsr_distribution_type), POINTER             :: dist

      INTEGER, DIMENSION(:), POINTER                     :: blk_size

      TYPE(donor_state_type), POINTER                    :: donor_state

      TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env

      TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control

      TYPE(qs_environment_type), POINTER                 :: qs_env


      INTEGER                                            :: nsgfp, ri_atom

      LOGICAL                                            :: quadrants(3)

      REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: fxc

      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: lhs_int, rhs_int

      TYPE(dbcsr_type)                                   :: work_mat


      NULLIFY (lhs_int, rhs_int)


      ! Initialization

      ri_atom = donor_state%at_index

      nsgfp = SIZE(xas_tdp_env%ri_fxc(ri_atom, 1)%array, 1)

      rhs_int => contr1_int_pq ! RHS contains [ (aI|P)*(P|Q)^-1 ]^T

      ALLOCATE (lhs_int(SIZE(contr1_int_pq))) ! LHS will contatin (aI|P)*(P|Q)^-1 * (Q|fxc|R)


      ! Work structures

      ALLOCATE (fxc(nsgfp, nsgfp))

      CALL dbcsr_create(work_mat, name="WORK", matrix_type=dbcsr_type_no_symmetry, dist=dist, &

                        row_blk_size=blk_size, col_blk_size=blk_size)


      ! Case study: singlet and/or triplet ?

      IF (xas_tdp_control%do_singlet) THEN


         ! Take the sum of fxc for alpha-alpha and alpha-beta

         CALL dcopy(nsgfp*nsgfp, xas_tdp_env%ri_fxc(ri_atom, 1)%array, 1, fxc, 1)

         CALL daxpy(nsgfp*nsgfp, 1.0_dp, xas_tdp_env%ri_fxc(ri_atom, 2)%array, 1, fxc, 1)


         ! Copy the fresh lhs_int = (aI|P) (P|Q)^-1 and multiply by (Q|fxc|R)

         CALL copy_ri_contr_int(lhs_int, rhs_int)

         CALL ri_all_blocks_mm(lhs_int, fxc)


         ! Compute the final LHS RHS product => spin-restricted, only upper-left quadrant

         quadrants = [.true., .false., .false.]

         CALL ri_int_product(work_mat, lhs_int, rhs_int, quadrants, qs_env, &

                             eps_filter=xas_tdp_control%eps_filter)

         CALL dbcsr_finalize(work_mat)


         !Create the symmetric kernel matrix and redistribute work_mat into it

         CALL dbcsr_create(sg_xc_ker, name="XC SINGLET KERNEL", matrix_type=dbcsr_type_symmetric, &

                           dist=dist, row_blk_size=blk_size, col_blk_size=blk_size)

         CALL dbcsr_complete_redistribute(work_mat, sg_xc_ker)


      END IF


      IF (xas_tdp_control%do_triplet) THEN


         ! Take the difference of fxc for alpha-alpha and alpha-beta

         CALL dcopy(nsgfp*nsgfp, xas_tdp_env%ri_fxc(ri_atom, 1)%array, 1, fxc, 1)

         CALL daxpy(nsgfp*nsgfp, -1.0_dp, xas_tdp_env%ri_fxc(ri_atom, 2)%array, 1, fxc, 1)


         ! Copy the fresh lhs_int = (aI|P) (P|Q)^-1 and multiply by (Q|fxc|R)

         CALL copy_ri_contr_int(lhs_int, rhs_int)

         CALL ri_all_blocks_mm(lhs_int, fxc)


         ! Compute the final LHS RHS product => spin-restricted, only upper-left quadrant

         quadrants = [.true., .false., .false.]

         CALL ri_int_product(work_mat, lhs_int, rhs_int, quadrants, qs_env, &

                             eps_filter=xas_tdp_control%eps_filter)

         CALL dbcsr_finalize(work_mat)


         !Create the symmetric kernel matrix and redistribute work_mat into it

         CALL dbcsr_create(tp_xc_ker, name="XC TRIPLET KERNEL", matrix_type=dbcsr_type_symmetric, &

                           dist=dist, row_blk_size=blk_size, col_blk_size=blk_size)

         CALL dbcsr_complete_redistribute(work_mat, tp_xc_ker)


      END IF


      ! clean-up

      CALL dbcsr_deallocate_matrix_set(lhs_int)

      CALL dbcsr_release(work_mat)

      DEALLOCATE (fxc)


   END SUBROUTINE rcs_xc


! **************************************************************************************************

!> \brief Computes the exact exchange kernel matrix using RI. Returns an array of 2 matrices,

!>        which are:

!>        1) the on-diagonal kernel: (ab|I_sigma J_tau) * delta_sigma,tau

!>        2) the off-diagonal spin-conserving kernel: (aJ_sigma|I_tau b) * delta_sigma,tau

!>        An internal analysis determines which of the above are computed (can range from 0 to 2),

!> \param ex_ker ...

!> \param donor_state ...

!> \param xas_tdp_env ...

!> \param xas_tdp_control ...

!> \param qs_env ...

!> \note In the case of spin-conserving excitation, the kernel must later be multiplied by the

!>       usual Q projector. In the case of spin-flip, one needs to project the excitations coming

!>       from alpha donor MOs on the unoccupied beta MOs. This is done by multiplying by a Q

!>       projector where the alpha-alpha and beta-beta quadrants are swapped

!>       The ex_ker array should be allocated on entry (not the internals)

! **************************************************************************************************


   SUBROUTINE kernel_exchange(ex_ker, donor_state, xas_tdp_env, xas_tdp_control, qs_env)


      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: ex_ker

      TYPE(donor_state_type), POINTER                    :: donor_state

      TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env

      TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control

      TYPE(qs_environment_type), POINTER                 :: qs_env


      CHARACTER(len=*), PARAMETER                        :: routinen = 'kernel_exchange'


      INTEGER                                            :: handle

      INTEGER, DIMENSION(:), POINTER                     :: blk_size

      LOGICAL                                            :: do_off_sc

      TYPE(dbcsr_distribution_type), POINTER             :: dist

      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: contr1_int


      NULLIFY (contr1_int, dist, blk_size)


      !Don't do anything if no hfx

      IF (.NOT. xas_tdp_control%do_hfx) RETURN


      CALL timeset(routinen, handle)


      dist => donor_state%dbcsr_dist

      blk_size => donor_state%blk_size


      !compute the off-diag spin-conserving only if not TDA and anything that is spin-conserving

      do_off_sc = (.NOT. xas_tdp_control%tamm_dancoff) .AND. &

                  (xas_tdp_control%do_spin_cons .OR. xas_tdp_control%do_singlet .OR. xas_tdp_control%do_triplet)


      ! Need the once contracted integrals (aI|P)

      CALL contract2_ao_to_domo(contr1_int, "EXCHANGE", donor_state, xas_tdp_env, xas_tdp_control, qs_env)


!  The on-diagonal exchange : (ab|P) * (P|Q)^-1 * (Q|I_sigma J_tau) * delta_sigma,tau

      CALL ondiag_ex(ex_ker(1)%matrix, contr1_int, dist, blk_size, donor_state, xas_tdp_env, &

                     xas_tdp_control, qs_env)


!  The off-diag spin-conserving case: (aJ_sigma|P) * (P|Q)^-1 * (Q|I_tau b) * delta_sigma,tau

      IF (do_off_sc) THEN

         CALL offdiag_ex_sc(ex_ker(2)%matrix, contr1_int, dist, blk_size, donor_state, &

                            xas_tdp_env, xas_tdp_control, qs_env)

      END IF


      !clean-up

      CALL dbcsr_deallocate_matrix_set(contr1_int)


      CALL timestop(handle)


   END SUBROUTINE kernel_exchange


! **************************************************************************************************

!> \brief Create the matrix containing the on-diagonal exact exchange kernel, which is:

!>        (ab|I_sigma J_tau) * delta_sigma,tau, where a,b are AOs, I_sigma and J_tau are the donor

!>        spin-orbitals. A RI is done: (ab|I_sigma J_tau) = (ab|P) * (P|Q)^-1 * (Q|I_sigma J_tau)

!> \param ondiag_ex_ker the on-diagonal exchange kernel in dbcsr format

!> \param contr1_int the already once-contracted RI 3-center integrals (aI_sigma|P)

!>        where each matrix of the array contains the contraction for the donor spin-orbital I_sigma

!> \param dist the inherited dbcsr distribution

!> \param blk_size the inherited dbcsr block sizes

!> \param donor_state ...

!> \param xas_tdp_env ...

!> \param xas_tdp_control ...

!> \param qs_env ...

!> \note In the presence of a RI metric, we have instead M^-1 * (P|Q) * M^-1

! **************************************************************************************************

   SUBROUTINE ondiag_ex(ondiag_ex_ker, contr1_int, dist, blk_size, donor_state, xas_tdp_env, &

                        xas_tdp_control, qs_env)


      TYPE(dbcsr_type), INTENT(INOUT)                    :: ondiag_ex_ker

      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: contr1_int

      TYPE(dbcsr_distribution_type), POINTER             :: dist

      INTEGER, DIMENSION(:), POINTER                     :: blk_size

      TYPE(donor_state_type), POINTER                    :: donor_state

      TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env

      TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control

      TYPE(qs_environment_type), POINTER                 :: qs_env


      INTEGER                                            :: group, iblk, iso, jblk, jso, nblk, &

                                                            ndo_mo, ndo_so, nsgfa, nsgfp, ri_atom, &

                                                            source

      INTEGER, DIMENSION(:), POINTER                     :: col_dist, col_dist_work, row_dist, &

                                                            row_dist_work

      INTEGER, DIMENSION(:, :), POINTER                  :: pgrid

      LOGICAL                                            :: do_roks, do_uks, found

      REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: coeffs, ri_coeffs

      REAL(dp), DIMENSION(:, :), POINTER                 :: aiq, pblock, pq

      TYPE(dbcsr_distribution_type)                      :: opt_dbcsr_dist, work_dbcsr_dist

      TYPE(dbcsr_iterator_type)                          :: iter

      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: matrix_s

      TYPE(dbcsr_type)                                   :: abij, mats_desymm, work_mat

      TYPE(mp_para_env_type), POINTER                    :: para_env


      NULLIFY (para_env, matrix_s, pblock, aiq, row_dist, col_dist, row_dist_work, col_dist_work, pgrid)


      ! We want to compute (ab|I_sigma J_tau) = (ab|P) * (P|Q)^-1 * (Q|I_sigma J_tau)

      ! Already have (cJ_tau|P)  stored in contr1_int. Need to further contract the

      ! AOs with the coeff of the I_alpha spin-orbital.


      ! Initialization

      ndo_mo = donor_state%ndo_mo

      ri_atom = donor_state%at_index

      do_roks = xas_tdp_control%do_roks

      do_uks = xas_tdp_control%do_uks

      ndo_so = ndo_mo; IF (do_uks) ndo_so = 2*ndo_mo !if not UKS, same donor MOs for both spins

      pq => xas_tdp_env%ri_inv_ex


      CALL get_qs_env(qs_env, para_env=para_env, matrix_s=matrix_s, natom=nblk)

      nsgfp = SIZE(pq, 1)

      nsgfa = SIZE(donor_state%contract_coeffs, 1)

      ALLOCATE (coeffs(nsgfp, ndo_so), ri_coeffs(nsgfp, ndo_so))


      ! a and b need to overlap for non-zero (ab|IJ) => same block structure as overlap S

      ! need compatible distribution_2d with 3c tensor + normal type

      CALL cp_dbcsr_dist2d_to_dist(xas_tdp_env%opt_dist2d_ex, opt_dbcsr_dist)


      CALL dbcsr_desymmetrize(matrix_s(1)%matrix, mats_desymm)


      CALL dbcsr_create(abij, template=mats_desymm, name="(ab|IJ)", dist=opt_dbcsr_dist)

      CALL dbcsr_complete_redistribute(mats_desymm, abij)


      CALL dbcsr_release(mats_desymm)


      ! Create a work distribution based on opt_dbcsr_dist, but for full size matrices

      CALL dbcsr_distribution_get(opt_dbcsr_dist, row_dist=row_dist, col_dist=col_dist, group=group, &

                                  pgrid=pgrid)


      ALLOCATE (row_dist_work(ndo_so*nblk))

      ALLOCATE (col_dist_work(ndo_so*nblk))

      DO iso = 1, ndo_so

         row_dist_work((iso - 1)*nblk + 1:iso*nblk) = row_dist(:)

         col_dist_work((iso - 1)*nblk + 1:iso*nblk) = col_dist(:)

      END DO


      CALL dbcsr_distribution_new(work_dbcsr_dist, group=group, pgrid=pgrid, row_dist=row_dist_work, &

                                  col_dist=col_dist_work)


      CALL dbcsr_create(work_mat, name="WORK", matrix_type=dbcsr_type_no_symmetry, dist=work_dbcsr_dist, &

                        row_blk_size=blk_size, col_blk_size=blk_size)


      ! Loop over donor spin-orbitals. End matrix is symmetric => span only upper half

      DO iso = 1, ndo_so


         ! take the (aI|Q) block in contr1_int that has a centered on the excited atom

         CALL dbcsr_get_stored_coordinates(contr1_int(iso)%matrix, ri_atom, ri_atom, source)

         IF (para_env%mepos == source) THEN

            CALL dbcsr_get_block_p(contr1_int(iso)%matrix, ri_atom, ri_atom, aiq, found)

         ELSE

            ALLOCATE (aiq(nsgfa, nsgfp))

         END IF

         CALL para_env%bcast(aiq, source)


         ! get the contraction (Q|IJ) by taking (Q|Ia)*contract_coeffs and put it in coeffs

         CALL dgemm('T', 'N', nsgfp, ndo_so, nsgfa, 1.0_dp, aiq, nsgfa, donor_state%contract_coeffs, &

                    nsgfa, 0.0_dp, coeffs, nsgfp)


         ! take (P|Q)^-1 * (Q|IJ) and put that in ri_coeffs

         CALL dgemm('N', 'N', nsgfp, ndo_so, nsgfp, 1.0_dp, pq, nsgfp, coeffs, nsgfp, 0.0_dp, &

                    ri_coeffs, nsgfp)


         IF (.NOT. para_env%mepos == source) DEALLOCATE (aiq)


         DO jso = iso, ndo_so


            ! There is no alpha-beta exchange. In case of UKS, iso,jso span all spin-orbitals

            ! => CYCLE if iso and jso are indexing MOs with different spin (and we have UKS)

            IF (do_uks .AND. (iso <= ndo_mo .AND. jso > ndo_mo)) cycle


            ! compute (ab|IJ) = sum_P (ab|P) * (P|Q)^-1 * (Q|IJ)

            CALL dbcsr_set(abij, 0.0_dp)

            CALL contract3_ri_to_domos(xas_tdp_env%ri_3c_ex, ri_coeffs(:, jso), abij, ri_atom)


            ! Loop over (ab|IJ) and copy into work. OK because dist are made to match

            CALL dbcsr_iterator_start(iter, abij)

            DO WHILE (dbcsr_iterator_blocks_left(iter))


               CALL dbcsr_iterator_next_block(iter, row=iblk, column=jblk)

               IF (iso == jso .AND. jblk < iblk) cycle


               CALL dbcsr_get_block_p(abij, iblk, jblk, pblock, found)


               IF (found) THEN

                  CALL dbcsr_put_block(work_mat, (iso - 1)*nblk + iblk, (jso - 1)*nblk + jblk, pblock)


                  !In case of ROKS, we have (ab|IJ) for alpha-alpha spin, but it is the same for

                  !beta-beta => replicate the blocks (alpha-beta is zero)

                  IF (do_roks) THEN

                     !the beta-beta block

                     CALL dbcsr_put_block(work_mat, (ndo_so + iso - 1)*nblk + iblk, &

                                          (ndo_so + jso - 1)*nblk + jblk, pblock)

                  END IF

               END IF


            END DO !iterator

            CALL dbcsr_iterator_stop(iter)


         END DO !jso

      END DO !iso


      CALL dbcsr_finalize(work_mat)

      CALL dbcsr_create(ondiag_ex_ker, name="ONDIAG EX KERNEL", matrix_type=dbcsr_type_symmetric, &

                        dist=dist, row_blk_size=blk_size, col_blk_size=blk_size)

      CALL dbcsr_complete_redistribute(work_mat, ondiag_ex_ker)


      !Clean-up

      CALL dbcsr_release(work_mat)

      CALL dbcsr_release(abij)

      CALL dbcsr_distribution_release(opt_dbcsr_dist)

      CALL dbcsr_distribution_release(work_dbcsr_dist)

      DEALLOCATE (col_dist_work, row_dist_work)


   END SUBROUTINE ondiag_ex


! **************************************************************************************************

!> \brief Create the matrix containing the off-diagonal exact exchange kernel in the spin-conserving

!>        case (which also includes excitations from the closed=shell ref state ) This matrix reads:

!>        (aJ_sigma|I_tau b) * delta_sigma,tau , where a, b are AOs and J_sigma, I_tau are the donor

!>        spin-orbital. A RI is done: (aJ_sigma|I_tau b) = (aJ_sigma|P) * (P|Q)^-1 * (Q|I_tau b)

!> \param offdiag_ex_ker the off-diagonal, spin-conserving exchange kernel in dbcsr format

!> \param contr1_int the once-contracted RI integrals: (aJ_sigma|P)

!> \param dist the inherited dbcsr ditribution

!> \param blk_size the inherited block sizes

!> \param donor_state ...

!> \param xas_tdp_env ...

!> \param xas_tdp_control ...

!> \param qs_env ...

! **************************************************************************************************

   SUBROUTINE offdiag_ex_sc(offdiag_ex_ker, contr1_int, dist, blk_size, donor_state, xas_tdp_env, &

                            xas_tdp_control, qs_env)


      TYPE(dbcsr_type), INTENT(INOUT)                    :: offdiag_ex_ker

      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: contr1_int

      TYPE(dbcsr_distribution_type), POINTER             :: dist

      INTEGER, DIMENSION(:), POINTER                     :: blk_size

      TYPE(donor_state_type), POINTER                    :: donor_state

      TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env

      TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control

      TYPE(qs_environment_type), POINTER                 :: qs_env


      INTEGER                                            :: ndo_mo

      LOGICAL                                            :: do_roks, do_uks, quadrants(3)

      REAL(dp), DIMENSION(:, :), POINTER                 :: pq

      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: lhs_int, rhs_int

      TYPE(dbcsr_type)                                   :: work_mat


      NULLIFY (pq, lhs_int, rhs_int)


      !Initialization

      ndo_mo = donor_state%ndo_mo

      do_roks = xas_tdp_control%do_roks

      do_uks = xas_tdp_control%do_uks

      pq => xas_tdp_env%ri_inv_ex


      rhs_int => contr1_int

      ALLOCATE (lhs_int(SIZE(contr1_int)))

      CALL copy_ri_contr_int(lhs_int, rhs_int)

      CALL ri_all_blocks_mm(lhs_int, pq)


      !Given the lhs_int and rhs_int, all we need to do is multiply elements from the former by

      !the transpose of the later, and put the result in the correct spin quadrants


      !Create a normal type work matrix

      CALL dbcsr_create(work_mat, name="WORK", matrix_type=dbcsr_type_no_symmetry, dist=dist, &

                        row_blk_size=blk_size, col_blk_size=blk_size)


      !Case study on closed-shell, ROKS or UKS

      IF (do_roks) THEN

         !In ROKS, the donor MOs for each spin are the same => copy the product in both the

         !alpha-alpha and the beta-beta quadrants

         quadrants = [.true., .false., .true.]

         CALL ri_int_product(work_mat, lhs_int, rhs_int, quadrants, qs_env, &

                             eps_filter=xas_tdp_control%eps_filter, mo_transpose=.true.)


      ELSE IF (do_uks) THEN

         !In UKS, the donor MOs are possibly different for each spin => start with the

         !alpha-alpha product and the perform the beta-beta product separately

         quadrants = [.true., .false., .false.]

         CALL ri_int_product(work_mat, lhs_int(1:ndo_mo), rhs_int(1:ndo_mo), quadrants, &

                             qs_env, eps_filter=xas_tdp_control%eps_filter, mo_transpose=.true.)


         quadrants = [.false., .false., .true.]

         CALL ri_int_product(work_mat, lhs_int(ndo_mo + 1:2*ndo_mo), rhs_int(ndo_mo + 1:2*ndo_mo), &

                             quadrants, qs_env, eps_filter=xas_tdp_control%eps_filter, mo_transpose=.true.)

      ELSE

         !In the restricted closed-shell case, only have one spin and a single qudarant

         quadrants = [.true., .false., .false.]

         CALL ri_int_product(work_mat, lhs_int, rhs_int, quadrants, qs_env, &

                             eps_filter=xas_tdp_control%eps_filter, mo_transpose=.true.)

      END IF

      CALL dbcsr_finalize(work_mat)


      !Create the symmetric kernel matrix and redistribute work_mat into it

      CALL dbcsr_create(offdiag_ex_ker, name="OFFDIAG EX KERNEL", matrix_type=dbcsr_type_symmetric, &

                        dist=dist, row_blk_size=blk_size, col_blk_size=blk_size)

      CALL dbcsr_complete_redistribute(work_mat, offdiag_ex_ker)


      !clean-up

      CALL dbcsr_release(work_mat)

      CALL dbcsr_deallocate_matrix_set(lhs_int)


   END SUBROUTINE offdiag_ex_sc


! **************************************************************************************************

!> \brief Reserves the blocks in of a dbcsr matrix as needed for RI 3-center contraction (aI|P)

!> \param matrices the matrices for which blocks are reserved

!> \param ri_atom the index of the atom on which RI is done (= all coeffs of I are there, and P too)

!> \param qs_env ...

!> \note the end product are normal type matrices that are possibly slightly spraser as matrix_s

! **************************************************************************************************


   SUBROUTINE reserve_contraction_blocks(matrices, ri_atom, qs_env)


      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: matrices

      INTEGER, INTENT(IN)                                :: ri_atom

      TYPE(qs_environment_type), POINTER                 :: qs_env


      INTEGER                                            :: i, iblk, jblk, max_nblks, nblks

      INTEGER, ALLOCATABLE, DIMENSION(:)                 :: reserve_cols, reserve_rows

      TYPE(dbcsr_distribution_type)                      :: dist

      TYPE(dbcsr_iterator_type)                          :: iter

      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: matrix_s

      TYPE(dbcsr_type)                                   :: template, work


      NULLIFY (matrix_s)


      ! Initialization

      CALL get_qs_env(qs_env, matrix_s=matrix_s)

      CALL dbcsr_get_info(matrices(1)%matrix, distribution=dist)


      ! Need to redistribute matrix_s in the distribution of matrices

      CALL dbcsr_create(work, template=matrix_s(1)%matrix, dist=dist)

      CALL dbcsr_complete_redistribute(matrix_s(1)%matrix, work)


      ! Need to desymmetrize matrix as as a template

      CALL dbcsr_desymmetrize(work, template)


      ! Allocate space for block indicies to reserve.

      max_nblks = dbcsr_get_num_blocks(template)

      ALLOCATE (reserve_rows(max_nblks), reserve_cols(max_nblks))


      ! Loop over matrix_s as need a,b to overlap

      nblks = 0

      CALL dbcsr_iterator_start(iter, template)

      DO WHILE (dbcsr_iterator_blocks_left(iter))

         CALL dbcsr_iterator_next_block(iter, row=iblk, column=jblk)

         !only have a,b pair if one of them is the ri_atom

         IF (iblk == ri_atom .OR. jblk == ri_atom) THEN

            nblks = nblks + 1

            reserve_rows(nblks) = iblk

            reserve_cols(nblks) = jblk

         END IF

      END DO

      CALL dbcsr_iterator_stop(iter)


      DO i = 1, SIZE(matrices)

         CALL dbcsr_reserve_blocks(matrices(i)%matrix, rows=reserve_rows(1:nblks), cols=reserve_cols(1:nblks))

      END DO


      ! Clean-up

      CALL dbcsr_release(template)

      CALL dbcsr_release(work)


   END SUBROUTINE reserve_contraction_blocks


! **************************************************************************************************

!> \brief Contract the ri 3-center integrals stored in a tensor with repect to the donor MOs coeffs,

!>        for a given excited atom k => (aI|k) = sum_b c_Ib (ab|k)

!> \param contr_int the contracted integrals as array of dbcsr matrices

!> \param op_type for which operator type we contract (COULOMB or EXCHANGE)

!> \param donor_state ...

!> \param xas_tdp_env ...

!> \param xas_tdp_control ...

!> \param qs_env ...

!> \note  In the output matrices, (aI_b|k) is stored at block a,b where I_b is the partial

!>        contraction that only includes coeffs from atom b. Note that the contracted matrix is

!>        not symmetric. To get the fully contracted matrix over b, one need to add the block

!>        columns of (aI_b|k) (get an array of size nao*nsgfp). This step is unnessary in our case

!>        because we assume locality of donor state, and only one column od (aI_b|k) is pouplated

! **************************************************************************************************


   SUBROUTINE contract2_ao_to_domo(contr_int, op_type, donor_state, xas_tdp_env, xas_tdp_control, qs_env)


      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: contr_int

      CHARACTER(len=*), INTENT(IN)                       :: op_type

      TYPE(donor_state_type), POINTER                    :: donor_state

      TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env

      TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control

      TYPE(qs_environment_type), POINTER                 :: qs_env


      CHARACTER(len=*), PARAMETER :: routinen = 'contract2_AO_to_doMO'


      INTEGER                                            :: handle, i, imo, ispin, katom, kkind, &

                                                            natom, ndo_mo, ndo_so, nkind, nspins

      INTEGER, DIMENSION(:), POINTER                     :: ri_blk_size, std_blk_size

      LOGICAL                                            :: do_uks

      REAL(dp), DIMENSION(:, :), POINTER                 :: coeffs

      TYPE(dbcsr_distribution_type)                      :: opt_dbcsr_dist

      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: matrices, matrix_s

      TYPE(dbcsr_type), POINTER                          :: ai_p, p_ib, work

      TYPE(dbt_type), POINTER                            :: pq_x

      TYPE(distribution_2d_type), POINTER                :: opt_dist2d

      TYPE(gto_basis_set_p_type), DIMENSION(:), POINTER  :: ri_basis

      TYPE(mp_para_env_type), POINTER                    :: para_env

      TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set

      TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set


      NULLIFY (matrix_s, std_blk_size, ri_blk_size, qs_kind_set, ri_basis, pq_x)

      NULLIFY (ai_p, p_ib, work, matrices, coeffs, opt_dist2d, particle_set)


      CALL timeset(routinen, handle)


!  Initialization

      CALL get_qs_env(qs_env, natom=natom, matrix_s=matrix_s, qs_kind_set=qs_kind_set, para_env=para_env)

      ndo_mo = donor_state%ndo_mo

      kkind = donor_state%kind_index

      katom = donor_state%at_index

      !by default contract for Coulomb

      pq_x => xas_tdp_env%ri_3c_coul

      opt_dist2d => xas_tdp_env%opt_dist2d_coul

      IF (op_type == "EXCHANGE") THEN

         cpassert(ASSOCIATED(xas_tdp_env%ri_3c_ex))

         pq_x => xas_tdp_env%ri_3c_ex

         opt_dist2d => xas_tdp_env%opt_dist2d_ex

      END IF

      do_uks = xas_tdp_control%do_uks

      nspins = 1; IF (do_uks) nspins = 2

      ndo_so = nspins*ndo_mo


!  contracted integrals block sizes

      CALL dbcsr_get_info(matrix_s(1)%matrix, col_blk_size=std_blk_size)

      ! getting the block dimensions for the RI basis

      CALL get_qs_env(qs_env, particle_set=particle_set, nkind=nkind)

      ALLOCATE (ri_basis(nkind), ri_blk_size(natom))

      CALL basis_set_list_setup(ri_basis, "RI_XAS", qs_kind_set)

      CALL get_particle_set(particle_set, qs_kind_set, nsgf=ri_blk_size, basis=ri_basis)


!  Create  work matrices. Everything that goes into a 3c routine must be compatible with the optimal dist_2d

      CALL cp_dbcsr_dist2d_to_dist(opt_dist2d, opt_dbcsr_dist)


      ALLOCATE (ai_p, p_ib, work, matrices(2))

      CALL dbcsr_create(ai_p, dist=opt_dbcsr_dist, matrix_type=dbcsr_type_no_symmetry, name="(aI|P)", &

                        row_blk_size=std_blk_size, col_blk_size=ri_blk_size)


      CALL dbcsr_create(p_ib, dist=opt_dbcsr_dist, matrix_type=dbcsr_type_no_symmetry, name="(P|Ib)", &

                        row_blk_size=ri_blk_size, col_blk_size=std_blk_size)


      !reserve the blocks (needed for 3c contraction routines)

      matrices(1)%matrix => ai_p; matrices(2)%matrix => p_ib

      CALL reserve_contraction_blocks(matrices, katom, qs_env)

      DEALLOCATE (matrices)


      ! Create the contracted integral matrices

      ALLOCATE (contr_int(ndo_so))

      DO i = 1, ndo_so

         ALLOCATE (contr_int(i)%matrix)

         CALL dbcsr_create(matrix=contr_int(i)%matrix, template=matrix_s(1)%matrix, &

                           matrix_type=dbcsr_type_no_symmetry, row_blk_size=std_blk_size, &

                           col_blk_size=ri_blk_size)

      END DO


      ! Only take the coeffs for atom on which MOs I,J are localized

      coeffs => donor_state%contract_coeffs


      DO ispin = 1, nspins


!     Loop over the donor MOs and contract

         DO imo = 1, ndo_mo


            ! do the contraction

            CALL dbcsr_set(ai_p, 0.0_dp); CALL dbcsr_set(p_ib, 0.0_dp)

            CALL contract2_ao_to_domo_low(pq_x, coeffs(:, (ispin - 1)*ndo_mo + imo), ai_p, p_ib, katom)


            ! Get the full (aI|P) contracted integrals

            CALL dbcsr_transposed(work, p_ib)

            CALL dbcsr_add(work, ai_p, 1.0_dp, 1.0_dp)

            CALL dbcsr_complete_redistribute(work, contr_int((ispin - 1)*ndo_mo + imo)%matrix)

            CALL dbcsr_filter(contr_int((ispin - 1)*ndo_mo + imo)%matrix, 1.0e-16_dp)


            CALL dbcsr_release(work)

         END DO !imo

      END DO !ispin


!  Clean-up

      CALL dbcsr_release(ai_p)

      CALL dbcsr_release(p_ib)

      CALL dbcsr_distribution_release(opt_dbcsr_dist)

      DEALLOCATE (ri_blk_size, ai_p, p_ib, work, ri_basis)


      CALL timestop(handle)


   END SUBROUTINE contract2_ao_to_domo


! **************************************************************************************************

!> \brief Contraction of the 3-center integrals (ab|Q) over the RI basis elements Q to get donor MOS

!>        => (ab|IJ) = sum_X (ab|Q) coeffs_Q

!> \param ab_Q the tensor holding the integrals

!> \param vec the contraction coefficients

!> \param mat_abIJ the matrix holding the (ab|IJ) integrals (blocks must be reserved)

!> \param atom_k the atom for which we contract, i.e. we only take RI basis Q centered on atom_k

!> \note By construction, distribution of tensor and matrix match, also for OMP threads

! **************************************************************************************************

   SUBROUTINE contract3_ri_to_domos(ab_Q, vec, mat_abIJ, atom_k)


      TYPE(dbt_type)                                     :: ab_q

      REAL(dp), DIMENSION(:), INTENT(IN)                 :: vec

      TYPE(dbcsr_type)                                   :: mat_abij

      INTEGER, INTENT(IN)                                :: atom_k


      CHARACTER(len=*), PARAMETER :: routinen = 'contract3_RI_to_doMOs'


      INTEGER                                            :: handle, i, iatom, ind(3), j, jatom, katom

      LOGICAL                                            :: found, t_found

      REAL(dp)                                           :: prefac

      REAL(dp), ALLOCATABLE, DIMENSION(:, :, :)          :: iabc

      REAL(kind=dp), DIMENSION(:, :), POINTER            :: pblock

      TYPE(dbcsr_type)                                   :: work

      TYPE(dbt_iterator_type)                            :: iter


      NULLIFY (pblock)


      CALL timeset(routinen, handle)


!$OMP PARALLEL DEFAULT(NONE) &

!$OMP SHARED(ab_Q,vec,mat_abIJ,atom_k) &

!$OMP PRIVATE(iter,ind,iatom,jatom,katom,prefac,iabc,t_found,found,pblock,i,j)

      CALL dbt_iterator_start(iter, ab_q)

      DO WHILE (dbt_iterator_blocks_left(iter))

         CALL dbt_iterator_next_block(iter, ind)


         iatom = ind(1)

         jatom = ind(2)

         katom = ind(3)


         IF (.NOT. atom_k == katom) cycle


         prefac = 1.0_dp

         IF (iatom == jatom) prefac = 0.5_dp


         CALL dbt_get_block(ab_q, ind, iabc, t_found)


         CALL dbcsr_get_block_p(mat_abij, iatom, jatom, pblock, found)

         IF ((.NOT. found) .OR. (.NOT. t_found)) cycle


         DO i = 1, SIZE(pblock, 1)

            DO j = 1, SIZE(pblock, 2)

!$OMP ATOMIC

               pblock(i, j) = pblock(i, j) + prefac*dot_product(vec(:), iabc(i, j, :))

            END DO

         END DO


         DEALLOCATE (iabc)

      END DO !iter

      CALL dbt_iterator_stop(iter)

!$OMP END PARALLEL


      !matrix only half filled => need to add its transpose

      CALL dbcsr_create(work, template=mat_abij)

      CALL dbcsr_transposed(work, mat_abij)

      CALL dbcsr_add(mat_abij, work, 1.0_dp, 1.0_dp)

      CALL dbcsr_release(work)


      CALL timestop(handle)


   END SUBROUTINE contract3_ri_to_domos


! **************************************************************************************************

!> \brief Contraction of the 3-center integrals over index 1 and 2, for a given atom_k. The results

!>        are stored in two matrices, such that (a,b are block indices):

!>        mat_aIb(ab) = mat_aIb(ab) + sum j_b (i_aj_b|k)*v(j_b) and

!>        mat_bIa(ba) = mat_bIa(ba) + sum i_a (i_aj_b|k)*v(i_a)

!>        The block size of the columns of mat_aIb and the rows of mat_bIa are the size of k (RI)

!> \param ab_Q the tensor containing the 3-center integrals

!> \param vec the contraction coefficients

!> \param mat_aIb normal type dbcsr matrix

!> \param mat_bIa normal type dbcsr matrix

!> \param atom_k the atom for which we contract

!>       It is assumed that the contraction coefficients for MO I are all on atom_k

!>       We do the classic thing when we fill half the matrix and add its transposed to get the full

!>       one, but here, the matrix is not symmetric, hence we explicitely have 2 input matrices

!>       The distribution of the integrals and the normal dbcsr matrix are compatible out of the box

! **************************************************************************************************

   SUBROUTINE contract2_ao_to_domo_low(ab_Q, vec, mat_aIb, mat_bIa, atom_k)


      TYPE(dbt_type)                                     :: ab_q

      REAL(dp), DIMENSION(:), INTENT(IN)                 :: vec

      TYPE(dbcsr_type), INTENT(INOUT)                    :: mat_aib, mat_bia

      INTEGER, INTENT(IN)                                :: atom_k


      CHARACTER(LEN=*), PARAMETER :: routinen = 'contract2_AO_to_doMO_low'


      INTEGER                                            :: handle, i, iatom, ind(3), j, jatom, &

                                                            katom, s1, s2

      INTEGER, DIMENSION(:), POINTER                     :: atom_blk_size

      LOGICAL                                            :: found, t_found

      REAL(dp), ALLOCATABLE, DIMENSION(:, :, :)          :: iabc

      REAL(dp), DIMENSION(:, :), POINTER                 :: pblock

      TYPE(dbt_iterator_type)                            :: iter


      NULLIFY (atom_blk_size, pblock)


      CALL timeset(routinen, handle)


      CALL dbcsr_get_info(mat_aib, row_blk_size=atom_blk_size)


!$OMP PARALLEL DEFAULT(NONE) &

!$OMP SHARED(ab_Q,vec,mat_aIb,mat_bIa,atom_k,atom_blk_size) &

!$OMP PRIVATE(iter,ind,iatom,jatom,katom,iabc,t_found,found,s1,s2,j,i,pblock)

      CALL dbt_iterator_start(iter, ab_q)

      DO WHILE (dbt_iterator_blocks_left(iter))

         CALL dbt_iterator_next_block(iter, ind)


         iatom = ind(1)

         jatom = ind(2)

         katom = ind(3)


         IF (atom_k .NE. katom) cycle


         CALL dbt_get_block(ab_q, ind, iabc, t_found)

         IF (.NOT. t_found) cycle


         ! Deal with mat_aIb

         IF (jatom == atom_k) THEN

            s1 = atom_blk_size(iatom)

            s2 = SIZE(iabc, 3)


            CALL dbcsr_get_block_p(matrix=mat_aib, row=iatom, col=jatom, block=pblock, found=found)


            IF (found) THEN

               DO i = 1, s1

                  DO j = 1, s2

!$OMP ATOMIC

                     pblock(i, j) = pblock(i, j) + dot_product(vec, iabc(i, :, j))

                  END DO

               END DO

            END IF

         END IF ! jatom == atom_k


         ! Deal with mat_bIa, keep block diagonal empty

         IF (iatom == jatom) cycle

         IF (iatom == atom_k) THEN

            s1 = SIZE(iabc, 3)

            s2 = atom_blk_size(jatom)


            CALL dbcsr_get_block_p(matrix=mat_bia, row=iatom, col=jatom, block=pblock, found=found)


            IF (found) THEN

               DO i = 1, s1

                  DO j = 1, s2

!$OMP ATOMIC

                     pblock(i, j) = pblock(i, j) + dot_product(vec, iabc(:, j, i))

                  END DO

               END DO

            END IF

         END IF !iatom== atom_k


         DEALLOCATE (iabc)

      END DO !iter

      CALL dbt_iterator_stop(iter)

!$OMP END PARALLEL


      CALL timestop(handle)


   END SUBROUTINE contract2_ao_to_domo_low


! **************************************************************************************************

!> \brief Multiply all the blocks of a contracted RI integral (aI|P) by a matrix of type (P|...|Q)

!> \param contr_int the integral array

!> \param PQ the smaller matrix to multiply all blocks

!> \note  It is assumed that all non-zero blocks have the same number of columns. Can pass partial

!>        arrays, e.g. contr_int(1:3)

! **************************************************************************************************


   SUBROUTINE ri_all_blocks_mm(contr_int, PQ)


      TYPE(dbcsr_p_type), DIMENSION(:)                   :: contr_int

      REAL(dp), DIMENSION(:, :), INTENT(IN)              :: pq


      INTEGER                                            :: iblk, imo, jblk, ndo_mo, s1, s2

      LOGICAL                                            :: found

      REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: work

      REAL(dp), DIMENSION(:, :), POINTER                 :: pblock

      TYPE(dbcsr_iterator_type)                          :: iter


      NULLIFY (pblock)


      ndo_mo = SIZE(contr_int)


      DO imo = 1, ndo_mo

         CALL dbcsr_iterator_start(iter, contr_int(imo)%matrix)

         DO WHILE (dbcsr_iterator_blocks_left(iter))


            CALL dbcsr_iterator_next_block(iter, row=iblk, column=jblk)

            CALL dbcsr_get_block_p(contr_int(imo)%matrix, iblk, jblk, pblock, found)


            IF (found) THEN

               s1 = SIZE(pblock, 1)

               s2 = SIZE(pblock, 2)

               ALLOCATE (work(s1, s2))

               CALL dgemm('N', 'N', s1, s2, s2, 1.0_dp, pblock, s1, pq, s2, 0.0_dp, work, s1)

               CALL dcopy(s1*s2, work, 1, pblock, 1)

               DEALLOCATE (work)

            END IF


         END DO ! dbcsr iterator

         CALL dbcsr_iterator_stop(iter)

      END DO !imo


   END SUBROUTINE ri_all_blocks_mm


! **************************************************************************************************

!> \brief Copies an (partial) array of contracted RI integrals into anoter one

!> \param new_int where the copy is stored

!> \param ref_int what is copied

!> \note Allocate the matrices of new_int if not done already

! **************************************************************************************************

   SUBROUTINE copy_ri_contr_int(new_int, ref_int)


      TYPE(dbcsr_p_type), DIMENSION(:), INTENT(INOUT)    :: new_int

      TYPE(dbcsr_p_type), DIMENSION(:), INTENT(IN)       :: ref_int


      INTEGER                                            :: iso, ndo_so


      cpassert(SIZE(new_int) == SIZE(ref_int))

      ndo_so = SIZE(ref_int)


      DO iso = 1, ndo_so

         IF (.NOT. ASSOCIATED(new_int(iso)%matrix)) ALLOCATE (new_int(iso)%matrix)

         CALL dbcsr_copy(new_int(iso)%matrix, ref_int(iso)%matrix)

      END DO


   END SUBROUTINE copy_ri_contr_int


! **************************************************************************************************

!> \brief Takes the product of contracted integrals and put them in a kernel matrix

!> \param kernel the matrix where the products are stored

!> \param lhs_int the left-hand side contracted integrals

!> \param rhs_int the right-hand side contracted integrals

!> \param quadrants on which quadrant(s) on the kernel matrix the product is stored

!> \param qs_env ...

!> \param eps_filter filter for dbcsr matrix multiplication

!> \param mo_transpose whether the MO blocks should be transpose, i.e. (aI|Jb) => (aJ|Ib)

!> \note It is assumed that the kerenl matrix is NOT symmetric

!>       There are three quadrants, corresponding to 1: the upper-left (diagonal), 2: the

!>       upper-right (off-diagonal) and 3: the lower-right (diagonal).

!>       Need to finalize the kernel matrix after calling this routine (possibly multiple times)

! **************************************************************************************************

   SUBROUTINE ri_int_product(kernel, lhs_int, rhs_int, quadrants, qs_env, eps_filter, mo_transpose)


      TYPE(dbcsr_type), INTENT(INOUT)                    :: kernel

      TYPE(dbcsr_p_type), DIMENSION(:), INTENT(IN)       :: lhs_int, rhs_int

      LOGICAL, DIMENSION(3), INTENT(IN)                  :: quadrants

      TYPE(qs_environment_type), POINTER                 :: qs_env

      REAL(dp), INTENT(IN), OPTIONAL                     :: eps_filter

      LOGICAL, INTENT(IN), OPTIONAL                      :: mo_transpose


      INTEGER                                            :: i, iblk, iso, j, jblk, jso, nblk, ndo_so

      LOGICAL                                            :: found, my_mt

      REAL(dp), DIMENSION(:, :), POINTER                 :: pblock

      TYPE(dbcsr_iterator_type)                          :: iter

      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: matrix_s

      TYPE(dbcsr_type)                                   :: prod


      NULLIFY (matrix_s, pblock)


!  Initialization

      cpassert(SIZE(lhs_int) == SIZE(rhs_int))

      cpassert(any(quadrants))

      ndo_so = SIZE(lhs_int)

      CALL get_qs_env(qs_env, matrix_s=matrix_s, natom=nblk)

      CALL dbcsr_create(prod, template=matrix_s(1)%matrix, matrix_type=dbcsr_type_no_symmetry)

      my_mt = .false.

      IF (PRESENT(mo_transpose)) my_mt = mo_transpose


      ! The kernel matrix is symmetric (even if normal type) => only fill upper half on diagonal

      ! quadrants, but the whole thing on upper-right quadrant

      DO iso = 1, ndo_so

         DO jso = 1, ndo_so


            ! If on-diagonal quadrants only, can skip jso < iso

            IF (.NOT. quadrants(2) .AND. jso < iso) cycle


            i = iso; j = jso;

            IF (my_mt) THEN

               i = jso; j = iso;

            END IF


            ! Take the product lhs*rhs^T

            CALL dbcsr_multiply('N', 'T', 1.0_dp, lhs_int(i)%matrix, rhs_int(j)%matrix, &

                                0.0_dp, prod, filter_eps=eps_filter)


            ! Loop over blocks of prod and fill kernel matrix => ok cuz same (but replicated) dist

            CALL dbcsr_iterator_start(iter, prod)

            DO WHILE (dbcsr_iterator_blocks_left(iter))


               CALL dbcsr_iterator_next_block(iter, row=iblk, column=jblk)

               IF ((iso == jso .AND. jblk < iblk) .AND. .NOT. quadrants(2)) cycle


               CALL dbcsr_get_block_p(prod, iblk, jblk, pblock, found)


               IF (found) THEN


                  ! Case study on quadrant

                  !upper-left

                  IF (quadrants(1)) THEN

                     CALL dbcsr_put_block(kernel, (iso - 1)*nblk + iblk, (jso - 1)*nblk + jblk, pblock)

                  END IF


                  !upper-right

                  IF (quadrants(2)) THEN

                     CALL dbcsr_put_block(kernel, (iso - 1)*nblk + iblk, (ndo_so + jso - 1)*nblk + jblk, pblock)

                  END IF


                  !lower-right

                  IF (quadrants(3)) THEN

                     CALL dbcsr_put_block(kernel, (ndo_so + iso - 1)*nblk + iblk, (ndo_so + jso - 1)*nblk + jblk, pblock)

                  END IF


               END IF


            END DO ! dbcsr iterator

            CALL dbcsr_iterator_stop(iter)


         END DO !jso

      END DO !iso


!  Clean-up

      CALL dbcsr_release(prod)


   END SUBROUTINE ri_int_product


END MODULE xas_tdp_kernel

dgemm
static void dgemm(const char transa, const char transb, const int m, const int n, const int k, const double alpha, const double *a, const int lda, const double *b, const int ldb, const double beta, double *c, const int ldc)
Convenient wrapper to hide Fortran nature of dgemm_, swapping a and b.
Definition grid_cpu_task_list.c:206

cp_dbcsr_api::dbcsr_create
Definition cp_dbcsr_api.F:192

cp_dbcsr_operations::dbcsr_deallocate_matrix_set
Definition cp_dbcsr_operations.F:85

basis_set_types
Definition basis_set_types.F:15

cp_dbcsr_api
Definition cp_dbcsr_api.F:8

cp_dbcsr_api::dbcsr_transposed
subroutine, public dbcsr_transposed(transposed, normal, shallow_data_copy, transpose_distribution, use_distribution)
...
Definition cp_dbcsr_api.F:1233

cp_dbcsr_api::dbcsr_distribution_release
subroutine, public dbcsr_distribution_release(dist)
...
Definition cp_dbcsr_api.F:643

cp_dbcsr_api::dbcsr_distribution_new
subroutine, public dbcsr_distribution_new(dist, template, group, pgrid, row_dist, col_dist, reuse_arrays)
...
Definition cp_dbcsr_api.F:616

cp_dbcsr_api::dbcsr_iterator_next_block
subroutine, public dbcsr_iterator_next_block(iterator, row, column, block, block_number_argument_has_been_removed, row_size, col_size, row_offset, col_offset)
...
Definition cp_dbcsr_api.F:970

cp_dbcsr_api::dbcsr_iterator_blocks_left
logical function, public dbcsr_iterator_blocks_left(iterator)
...
Definition cp_dbcsr_api.F:944

cp_dbcsr_api::dbcsr_iterator_stop
subroutine, public dbcsr_iterator_stop(iterator)
...
Definition cp_dbcsr_api.F:1041

cp_dbcsr_api::dbcsr_desymmetrize
subroutine, public dbcsr_desymmetrize(matrix_a, matrix_b)
...
Definition cp_dbcsr_api.F:525

cp_dbcsr_api::dbcsr_copy
subroutine, public dbcsr_copy(matrix_b, matrix_a, name, keep_sparsity, keep_imaginary)
...
Definition cp_dbcsr_api.F:368

cp_dbcsr_api::dbcsr_get_block_p
subroutine, public dbcsr_get_block_p(matrix, row, col, block, found, row_size, col_size)
...
Definition cp_dbcsr_api.F:693

cp_dbcsr_api::dbcsr_multiply
subroutine, public dbcsr_multiply(transa, transb, alpha, matrix_a, matrix_b, beta, matrix_c, first_row, last_row, first_column, last_column, first_k, last_k, retain_sparsity, filter_eps, flop)
...
Definition cp_dbcsr_api.F:1087

cp_dbcsr_api::dbcsr_get_info
subroutine, public dbcsr_get_info(matrix, nblkrows_total, nblkcols_total, nfullrows_total, nfullcols_total, nblkrows_local, nblkcols_local, nfullrows_local, nfullcols_local, my_prow, my_pcol, local_rows, local_cols, proc_row_dist, proc_col_dist, row_blk_size, col_blk_size, row_blk_offset, col_blk_offset, distribution, name, matrix_type, group)
...
Definition cp_dbcsr_api.F:808

cp_dbcsr_api::dbcsr_reserve_blocks
subroutine, public dbcsr_reserve_blocks(matrix, rows, cols)
...
Definition cp_dbcsr_api.F:1163

cp_dbcsr_api::dbcsr_get_stored_coordinates
subroutine, public dbcsr_get_stored_coordinates(matrix, row, column, processor)
...
Definition cp_dbcsr_api.F:911

cp_dbcsr_api::dbcsr_filter
subroutine, public dbcsr_filter(matrix, eps)
...
Definition cp_dbcsr_api.F:658

cp_dbcsr_api::dbcsr_finalize
subroutine, public dbcsr_finalize(matrix)
...
Definition cp_dbcsr_api.F:673

cp_dbcsr_api::dbcsr_iterator_start
subroutine, public dbcsr_iterator_start(iterator, matrix, shared, dynamic, dynamic_byrows)
...
Definition cp_dbcsr_api.F:1003

cp_dbcsr_api::dbcsr_set
subroutine, public dbcsr_set(matrix, alpha)
...
Definition cp_dbcsr_api.F:1195

cp_dbcsr_api::dbcsr_release
subroutine, public dbcsr_release(matrix)
...
Definition cp_dbcsr_api.F:1133

cp_dbcsr_api::dbcsr_complete_redistribute
subroutine, public dbcsr_complete_redistribute(matrix, redist)
...
Definition cp_dbcsr_api.F:317

cp_dbcsr_api::dbcsr_get_num_blocks
integer function, public dbcsr_get_num_blocks(matrix)
...
Definition cp_dbcsr_api.F:877

cp_dbcsr_api::dbcsr_put_block
subroutine, public dbcsr_put_block(matrix, row, col, block, summation)
...
Definition cp_dbcsr_api.F:1116

cp_dbcsr_api::dbcsr_add
subroutine, public dbcsr_add(matrix_a, matrix_b, alpha_scalar, beta_scalar)
...
Definition cp_dbcsr_api.F:251

cp_dbcsr_api::dbcsr_distribution_get
subroutine, public dbcsr_distribution_get(dist, row_dist, col_dist, nrows, ncols, has_threads, group, mynode, numnodes, nprows, npcols, myprow, mypcol, pgrid, subgroups_defined, prow_group, pcol_group)
...
Definition cp_dbcsr_api.F:572

cp_dbcsr_operations
DBCSR operations in CP2K.
Definition cp_dbcsr_operations.F:18

cp_dbcsr_operations::cp_dbcsr_dist2d_to_dist
subroutine, public cp_dbcsr_dist2d_to_dist(dist2d, dist)
Creates a DBCSR distribution from a distribution_2d.
Definition cp_dbcsr_operations.F:419

dbt_api
This is the start of a dbt_api, all publically needed functions are exported here....
Definition dbt_api.F:17

distribution_2d_types
stores a mapping of 2D info (e.g. matrix) on a 2D processor distribution (i.e. blacs grid) where cpus...
Definition distribution_2d_types.F:15

kinds
Defines the basic variable types.
Definition kinds.F:23

kinds::dp
integer, parameter, public dp
Definition kinds.F:34

message_passing
Interface to the message passing library MPI.
Definition message_passing.F:23

particle_methods
Define methods related to particle_type.
Definition particle_methods.F:14

particle_methods::get_particle_set
subroutine, public get_particle_set(particle_set, qs_kind_set, first_sgf, last_sgf, nsgf, nmao, basis)
Get the components of a particle set.
Definition particle_methods.F:98

particle_types
Define the data structure for the particle information.
Definition particle_types.F:19

qs_environment_types
Definition qs_environment_types.F:14

qs_environment_types::get_qs_env
subroutine, public get_qs_env(qs_env, atomic_kind_set, qs_kind_set, cell, super_cell, cell_ref, use_ref_cell, kpoints, dft_control, mos, sab_orb, sab_all, qmmm, qmmm_periodic, sac_ae, sac_ppl, sac_lri, sap_ppnl, sab_vdw, sab_scp, sap_oce, sab_lrc, sab_se, sab_xtbe, sab_tbe, sab_core, sab_xb, sab_xtb_pp, sab_xtb_nonbond, sab_almo, sab_kp, sab_kp_nosym, particle_set, energy, force, matrix_h, matrix_h_im, matrix_ks, matrix_ks_im, matrix_vxc, run_rtp, rtp, matrix_h_kp, matrix_h_im_kp, matrix_ks_kp, matrix_ks_im_kp, matrix_vxc_kp, kinetic_kp, matrix_s_kp, matrix_w_kp, matrix_s_ri_aux_kp, matrix_s, matrix_s_ri_aux, matrix_w, matrix_p_mp2, matrix_p_mp2_admm, rho, rho_xc, pw_env, ewald_env, ewald_pw, active_space, mpools, input, para_env, blacs_env, scf_control, rel_control, kinetic, qs_charges, vppl, rho_core, rho_nlcc, rho_nlcc_g, ks_env, ks_qmmm_env, wf_history, scf_env, local_particles, local_molecules, distribution_2d, dbcsr_dist, molecule_kind_set, molecule_set, subsys, cp_subsys, oce, local_rho_set, rho_atom_set, task_list, task_list_soft, rho0_atom_set, rho0_mpole, rhoz_set, ecoul_1c, rho0_s_rs, rho0_s_gs, do_kpoints, has_unit_metric, requires_mo_derivs, mo_derivs, mo_loc_history, nkind, natom, nelectron_total, nelectron_spin, efield, neighbor_list_id, linres_control, xas_env, virial, cp_ddapc_env, cp_ddapc_ewald, outer_scf_history, outer_scf_ihistory, x_data, et_coupling, dftb_potential, results, se_taper, se_store_int_env, se_nddo_mpole, se_nonbond_env, admm_env, lri_env, lri_density, exstate_env, ec_env, harris_env, dispersion_env, gcp_env, vee, rho_external, external_vxc, mask, mp2_env, bs_env, kg_env, wanniercentres, atprop, ls_scf_env, do_transport, transport_env, v_hartree_rspace, s_mstruct_changed, rho_changed, potential_changed, forces_up_to_date, mscfg_env, almo_scf_env, gradient_history, variable_history, embed_pot, spin_embed_pot, polar_env, mos_last_converged, eeq, rhs, tb_tblite)
Get the QUICKSTEP environment.
Definition qs_environment_types.F:518

qs_integral_utils
Some utility functions for the calculation of integrals.
Definition qs_integral_utils.F:14

qs_integral_utils::basis_set_list_setup
subroutine, public basis_set_list_setup(basis_set_list, basis_type, qs_kind_set)
Set up an easy accessible list of the basis sets for all kinds.
Definition qs_integral_utils.F:149

qs_kind_types
Define the quickstep kind type and their sub types.
Definition qs_kind_types.F:23

util
All kind of helpful little routines.
Definition util.F:14

util::get_limit
pure integer function, dimension(2), public get_limit(m, n, me)
divide m entries into n parts, return size of part me
Definition util.F:333

xas_tdp_kernel
All the kernel specific subroutines for XAS TDP calculations.
Definition xas_tdp_kernel.F:13

xas_tdp_kernel::kernel_coulomb_xc
subroutine, public kernel_coulomb_xc(coul_ker, xc_ker, donor_state, xas_tdp_env, xas_tdp_control, qs_env)
Computes, if asked for it, the Coulomb and XC kernel matrices, in the usuall matrix format.
Definition xas_tdp_kernel.F:81

xas_tdp_kernel::contract2_ao_to_domo
subroutine, public contract2_ao_to_domo(contr_int, op_type, donor_state, xas_tdp_env, xas_tdp_control, qs_env)
Contract the ri 3-center integrals stored in a tensor with repect to the donor MOs coeffs,...
Definition xas_tdp_kernel.F:972

xas_tdp_kernel::kernel_exchange
subroutine, public kernel_exchange(ex_ker, donor_state, xas_tdp_env, xas_tdp_control, qs_env)
Computes the exact exchange kernel matrix using RI. Returns an array of 2 matrices,...
Definition xas_tdp_kernel.F:595

xas_tdp_kernel::reserve_contraction_blocks
subroutine, public reserve_contraction_blocks(matrices, ri_atom, qs_env)
Reserves the blocks in of a dbcsr matrix as needed for RI 3-center contraction (aI|P)
Definition xas_tdp_kernel.F:903

xas_tdp_kernel::ri_all_blocks_mm
subroutine, public ri_all_blocks_mm(contr_int, pq)
Multiply all the blocks of a contracted RI integral (aI|P) by a matrix of type (P|....
Definition xas_tdp_kernel.F:1263

xas_tdp_types
Define XAS TDP control type and associated create, release, etc subroutines, as well as XAS TDP envir...
Definition xas_tdp_types.F:13

xas_tdp_types::get_proc_batch_sizes
subroutine, public get_proc_batch_sizes(batch_size, nbatch, nex_atom, nprocs)
Uses heuristics to determine a good batching of the processros for fxc integration.
Definition xas_tdp_types.F:1427

basis_set_types::gto_basis_set_p_type
Definition basis_set_types.F:94

cp_dbcsr_api::dbcsr_distribution_type
Definition cp_dbcsr_api.F:180

cp_dbcsr_api::dbcsr_iterator_type
Definition cp_dbcsr_api.F:186

cp_dbcsr_api::dbcsr_p_type
Definition cp_dbcsr_api.F:170

cp_dbcsr_api::dbcsr_type
Definition cp_dbcsr_api.F:174

distribution_2d_types::distribution_2d_type
distributes pairs on a 2d grid of processors
Definition distribution_2d_types.F:62

message_passing::mp_para_env_type
stores all the informations relevant to an mpi environment
Definition message_passing.F:763

particle_types::particle_type
Definition particle_types.F:35

qs_environment_types::qs_environment_type
Definition qs_environment_types.F:219

qs_kind_types::qs_kind_type
Provides all information about a quickstep kind.
Definition qs_kind_types.F:171

xas_tdp_types::donor_state_type
Type containing informations about a single donor state.
Definition xas_tdp_types.F:269

xas_tdp_types::xas_tdp_control_type
Type containing control information for TDP XAS calculations.
Definition xas_tdp_types.F:102

xas_tdp_types::xas_tdp_env_type
Type containing informations such as inputs and results for TDP XAS calculations.
Definition xas_tdp_types.F:194