(git:1e0ab71)
Loading...
Searching...
No Matches
gw_non_periodic_ri_rs.F
Go to the documentation of this file.
1!--------------------------------------------------------------------------------------------------!
2! CP2K: A general program to perform molecular dynamics simulations !
3! Copyright 2000-2026 CP2K developers group <https://cp2k.org> !
4! !
5! SPDX-License-Identifier: GPL-2.0-or-later !
6!--------------------------------------------------------------------------------------------------!
7
8! **************************************************************************************************
9!> \brief GW using RI-RS Approximation for molecules
10!> \par History
11!> 04.2026 created [Ritaj Tyagi]
12! **************************************************************************************************
17 USE cell_types, ONLY: cell_type,&
18 pbc
23 USE cp_dbcsr_api, ONLY: &
30 dbcsr_type_no_symmetry
36 USE cp_files, ONLY: close_file,&
43 USE cp_fm_diag, ONLY: cp_fm_power
47 USE cp_fm_types, ONLY: cp_fm_create,&
57 USE cp_output_handling, ONLY: cp_p_file,&
66 USE gw_large_cell_gamma, ONLY: &
70 USE gw_utils, ONLY: de_init_bs_env
73 USE kinds, ONLY: default_path_length,&
75 dp
77 USE machine, ONLY: m_walltime
80 USE orbital_pointers, ONLY: indco,&
81 ncoset
87 USE qs_kind_types, ONLY: get_qs_kind,&
89#include "./base/base_uses.f90"
90
91 IMPLICIT NONE
92
93 PRIVATE
94
95 CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'gw_non_periodic_ri_rs'
96
99
100CONTAINS
101
102! **************************************************************************************************
103!> \brief GW calculation using RI-RS formalism for molecules
104!> \param qs_env ...
105!> \param bs_env ...
106! **************************************************************************************************
107
108 SUBROUTINE gw_calc_non_periodic_ri_rs(qs_env, bs_env)
109
110 TYPE(qs_environment_type), POINTER :: qs_env
111 TYPE(post_scf_bandstructure_type), POINTER :: bs_env
112
113 CHARACTER(LEN=*), PARAMETER :: routinen = 'gw_calc_non_periodic_ri_rs'
114
115 INTEGER :: handle
116 TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:) :: fm_sigma_x_gamma, fm_w_time
117 TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:, :, :) :: fm_sigma_c_gamma_time
118
119 CALL timeset(routinen, handle)
120
121 !!========================================================================
122 !! 0. Precompute AO and RI Radii
123 !! Determine per-atom cutoff radii from the most diffuse Gaussian
124 !! primitives in the AO and RI auxiliary basis sets.
125 !! Equations:
126 !! a. α_min,ao = min_{i,j} { ζ_ao(i,j) | ζ_ao(i,j) > 10⁻³ }
127 !! b. α_min,ri = min_{i,j} { ζ_ri(i,j) | ζ_ri(i,j) > 10⁻³ }
128 !! c. r_ao = sqrt( -ln(ε) / α_min,ao ) (radius_ao_per_atom)
129 !! d. r_ri = sqrt( -ln(ε) / α_min,ri ) (radius_ri_per_atom)
130 !!========================================================================
131 CALL precompute_ri_rs_radii(qs_env, bs_env)
132
133 !!========================================================================
134 !! 1. Grid Generation for RI-RS
135 !! (Modified Lebedev grids from Ivan Duchemin and Xavier Blase)
136 !! Generate flattened 1D array of grid points for RI-RS.
137 !! Equation: r_g(k) = R_A + r_g(A)
138 !!========================================================================
139 CALL ri_rs_grid_assembler(qs_env, bs_env, bs_env%ri_rs%grid_points)
140
141 !!========================================================================
142 !! 2. Atomic Basis Evaluation
143 !! Compute values of spherical atomic basis functions at grid points.
144 !! Expression: Φ_μl = Φ_μ(r_l) (mat_phi_mu_l)
145 !!========================================================================
146 CALL atomic_basis_at_grid_point(qs_env, bs_env, bs_env%ri_rs%grid_points, &
147 bs_env%ri_rs%mat_phi_mu_l)
148
149 !!========================================================================
150 !! 3. Compute RI-RS Coefficients (Z_lp)
151 !! Solve the regularized system for each atom P, where the grid domain
152 !! is restricted to r_l within a cutoff distance of atom P:
153 !! a. D_ll' = [ Σ_μ Φ_μ(r_l) Φ_μ(r_l') ]^2 (Equation 13)
154 !! b. D_lP = Σ_{μν} Φ_μ(r_l) Φ_ν(r_l) (μν|P) (Equation 15)
155 !! c. Conditioning:
156 !! Dvec_l = 1 / sqrt(D_ll) (Diagonal scaling vector)
157 !! D'_ll' = Dvec_l * D_ll' * Dvec_l' + λδ_ll'
158 !! D'_lP = Dvec_l * D_lP
159 !! d. Solve: Σ_l' D'_ll' * Z'_l'P = D'_lP (Equation 14)
160 !! e. Rescale: Z_lP = Z'_lP * Dvec_l (Z_lP stored in mat_Z_lP)
161 !!========================================================================
162 CALL compute_coeff_z_lp(qs_env, bs_env, bs_env%ri_rs%grid_points, &
163 bs_env%ri_rs%mat_phi_mu_l, bs_env%ri_rs%mat_Z_lP)
164
165 !!========================================================================
166 !! 4. Compute Independent-Particle Polarizability (χ)
167 !! G^occ_µλ(i|τ|) = sum_n^occ C_µn e^(-|(ϵ_n-ϵ_F)τ|) C_λn
168 !! G^vir_µλ(i|τ|) = sum_n^vir C_µn e^(-|(ϵ_n-ϵ_F)τ|) C_λn
169 !! G^occ_ll'(i|τ|) = sum_µν Φ_µ(r_l) G^occ_µν Φ_ν(r_l')
170 !! G^vir_ll'(i|τ|) = sum_µν Φ_µ(r_l) G^vir_µν Φ_ν(r_l')
171 !! χ_ll'(iτ) = G^occ_ll'(i|τ|) * G^vir_ll'(i|τ|)
172 !! χ_PQ(iτ) = sum_ll' Z_lP χ_ll'(iτ) Z_l'Q
173 !!========================================================================
174 CALL get_mat_chi_gamma_tau(bs_env, bs_env%mat_chi_Gamma_tau, &
175 bs_env%ri_rs%mat_phi_mu_l, bs_env%ri_rs%mat_Z_lP)
176
177 !!========================================================================
178 !! 5. Compute Screened Interaction (W)
179 !! χ_PQ(iτ) -> χ_PQ(iω) -> ε_PQ(iω) -> W_PQ(iω) -> W_PQ(iτ)
180 !!========================================================================
181 CALL compute_w(bs_env, qs_env, bs_env%mat_chi_Gamma_tau, fm_w_time)
182
183 !!========================================================================
184 !! 6. Compute Exact Exchange Self-Energy (Σ^x)
185 !! D_µν = sum_n^occ C_µn C_νn
186 !! D_ll' = sum_µν Φ_µ(r_l) D_µν Φ_ν(r_l')
187 !! V^trunc_ll' = sum_PQ Z_lP V^trunc_PQ Z_l'Q
188 !! Σ^x_ll' = D_ll' * V^trunc_ll'
189 !! Σ^x_λσ(k=0) = -sum_ll' Φ_λ(r_l) Σ^x_ll' Φ_σ(r_l')
190 !!========================================================================
191 CALL compute_sigma_x(bs_env, qs_env, bs_env%ri_rs%mat_phi_mu_l, &
192 bs_env%ri_rs%mat_Z_lP, fm_sigma_x_gamma)
193
194 !!========================================================================
195 !! 7. Compute Correlation Self-Energy (Σ^c)
196 !! W_ll'(iτ) = sum_PQ Z_lP W^MIC_PQ(iτ) Z_l'Q
197 !! Σ^c_ll'(iτ) = -G^occ_ll'(i|τ|) * W_ll'(iτ), for τ < 0
198 !! Σ^c_ll'(iτ) = G^vir_ll'(i|τ|) * W_ll'(iτ), for τ > 0
199 !! Σ^c_λσ(iτ) = sum_ll' Φ_λ(r_l) Σ^c_ll'(iτ) Φ_σ(r_l')
200 !!========================================================================
201 CALL compute_sigma_c(bs_env, fm_w_time, bs_env%ri_rs%mat_phi_mu_l, &
202 bs_env%ri_rs%mat_Z_lP, fm_sigma_c_gamma_time)
203
204 !!========================================================================
205 !! 8. Compute Quasiparticle Energies
206 !! Σ^c_λσ(iτ) -> Σ^c_nn(ϵ)
207 !! ϵ_nk^GW = ϵ_nk^DFT + Σ^c_nn(ϵ) + Σ^x_nn - v^xc_nn
208 !!========================================================================
209 CALL compute_qp_energies(bs_env, qs_env, fm_sigma_x_gamma, fm_sigma_c_gamma_time)
210
211 CALL de_init_bs_env(bs_env)
212
213 CALL timestop(handle)
214
215 END SUBROUTINE gw_calc_non_periodic_ri_rs
216
217! **************************************************************************************************
218!> \brief Compute per-atom AO and RI basis radii from the most diffuse Gaussian
219!> primitive in the AO ("ORB") and RI auxiliary ("RI_AUX") basis sets.
220!> Stores results in bs_env%ri_rs%radius_ao_per_atom(:) and
221!> bs_env%ri_rs%radius_ri_per_atom(:) and prints a per-atom table.
222!> Radius: r_kind = sqrt(-log(eps) / alpha_min_kind)
223!> with eps = eps_filter.
224!> \param qs_env ...
225!> \param bs_env ...
226! **************************************************************************************************
227
228 SUBROUTINE precompute_ri_rs_radii(qs_env, bs_env)
229
230 TYPE(qs_environment_type), POINTER :: qs_env
231 TYPE(post_scf_bandstructure_type), POINTER :: bs_env
232
233 CHARACTER(LEN=*), PARAMETER :: routinen = 'precompute_ri_rs_radii'
234
235 INTEGER :: handle, i, iatom, ikind, j, natom, nkind
236 INTEGER, ALLOCATABLE, DIMENSION(:) :: kind_of
237 REAL(kind=dp) :: eps
238 REAL(kind=dp), ALLOCATABLE, DIMENSION(:) :: alpha_min_ao_kind, alpha_min_ri_kind
239 REAL(kind=dp), DIMENSION(:, :), POINTER :: zet_ao, zet_ri
240 TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
241 TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
242
243 CALL timeset(routinen, handle)
244
245 CALL get_qs_env(qs_env, nkind=nkind, atomic_kind_set=atomic_kind_set, &
246 particle_set=particle_set)
247 natom = SIZE(particle_set)
248
249 eps = bs_env%eps_filter
250
251 ALLOCATE (alpha_min_ao_kind(nkind), alpha_min_ri_kind(nkind))
252 alpha_min_ao_kind = huge(1.0_dp)
253 alpha_min_ri_kind = huge(1.0_dp)
254
255 DO ikind = 1, nkind
256 zet_ao => bs_env%basis_set_AO(ikind)%gto_basis_set%zet
257 zet_ri => bs_env%basis_set_RI(ikind)%gto_basis_set%zet
258
259 DO i = 1, SIZE(zet_ao, 1)
260 DO j = 1, SIZE(zet_ao, 2)
261 IF (zet_ao(i, j) > 1.0e-3_dp) &
262 alpha_min_ao_kind(ikind) = min(alpha_min_ao_kind(ikind), zet_ao(i, j))
263 END DO
264 END DO
265 DO i = 1, SIZE(zet_ri, 1)
266 DO j = 1, SIZE(zet_ri, 2)
267 IF (zet_ri(i, j) > 1.0e-3_dp) &
268 alpha_min_ri_kind(ikind) = min(alpha_min_ri_kind(ikind), zet_ri(i, j))
269 END DO
270 END DO
271 END DO
272
273 CALL get_atomic_kind_set(atomic_kind_set=atomic_kind_set, kind_of=kind_of)
274
275 ALLOCATE (bs_env%ri_rs%radius_ao_per_atom(natom))
276 ALLOCATE (bs_env%ri_rs%radius_ri_per_atom(natom))
277 DO iatom = 1, natom
278 ikind = kind_of(iatom)
279 bs_env%ri_rs%radius_ao_per_atom(iatom) = sqrt(-log(eps)/alpha_min_ao_kind(ikind))
280 bs_env%ri_rs%radius_ri_per_atom(iatom) = sqrt(-log(eps)/alpha_min_ri_kind(ikind))
281 END DO
282
283 IF (bs_env%unit_nr > 0) THEN
284 WRITE (bs_env%unit_nr, '(T2,A)') 'Per-kind RI-RS basis radii (Bohr):'
285 WRITE (bs_env%unit_nr, '(T4,A6,2X,A4,2A14)') 'Kind', 'Elem', 'r_AO', 'r_RI'
286 DO ikind = 1, nkind
287 WRITE (bs_env%unit_nr, '(T4,I6,2X,A4,2F14.4)') &
288 ikind, &
289 atomic_kind_set(ikind)%element_symbol, &
290 sqrt(-log(eps)/alpha_min_ao_kind(ikind)), &
291 sqrt(-log(eps)/alpha_min_ri_kind(ikind))
292 END DO
293 WRITE (bs_env%unit_nr, '(A)') ' '
294 END IF
295
296 DEALLOCATE (alpha_min_ao_kind, alpha_min_ri_kind, kind_of)
297
298 CALL timestop(handle)
299
300 END SUBROUTINE precompute_ri_rs_radii
301
302! **************************************************************************************************
303!> \brief Compute grid points for RI-RS
304!> Right now based on Ivan and Xavier implementation
305!> JCP 150, 174120 (2019), JCTC 17, 2383 (2021)
306!> \param qs_env ...
307!> \param bs_env ...
308!> \param ri_rs_grid_points ...
309! **************************************************************************************************
310
311 SUBROUTINE ri_rs_grid_assembler(qs_env, bs_env, ri_rs_grid_points)
312
313 TYPE(qs_environment_type), POINTER :: qs_env
314 TYPE(post_scf_bandstructure_type), POINTER :: bs_env
315 REAL(kind=dp), ALLOCATABLE, INTENT(OUT) :: ri_rs_grid_points(:, :)
316
317 CHARACTER(LEN=*), PARAMETER :: routinen = 'ri_rs_grid_assembler'
318
319 INTEGER :: atom_idx, end_idx, handle, ikind, j, &
320 natom, nkind, start_idx, &
321 total_grid_npts
322 INTEGER, ALLOCATABLE :: atom_to_kind(:), ri_rs_grid_offsets(:)
323 REAL(kind=dp) :: atomic_center(3)
324 TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
325 TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
326
327 CALL timeset(routinen, handle)
328
329 !! Get the information about the atoms in the system
330 CALL get_qs_env(qs_env, atomic_kind_set=atomic_kind_set, particle_set=particle_set)
331
332 nkind = SIZE(atomic_kind_set)
333 natom = SIZE(particle_set)
334
335 !! 1. Build the grid cache
336 CALL build_grid_cache(bs_env, atomic_kind_set)
337
338 !! 2. Calculate grid counts and offsets
339 ALLOCATE (ri_rs_grid_offsets(natom + 1))
340 ALLOCATE (atom_to_kind(natom))
341
342 total_grid_npts = 0
343 DO ikind = 1, nkind
344 DO j = 1, SIZE(atomic_kind_set(ikind)%atom_list)
345 atom_idx = atomic_kind_set(ikind)%atom_list(j)
346 atom_to_kind(atom_idx) = ikind
347 ri_rs_grid_offsets(atom_idx) = total_grid_npts + 1
348 total_grid_npts = total_grid_npts + bs_env%ri_rs%grid_cache(ikind)%npts
349 END DO
350 END DO
351
352 ri_rs_grid_offsets(natom + 1) = total_grid_npts + 1
353
354 IF (bs_env%unit_nr > 0) THEN
355 WRITE (bs_env%unit_nr, fmt="(T2,A,T69,I12)") &
356 'Total grid points used for RI-RS:', total_grid_npts
357 WRITE (bs_env%unit_nr, "(A)") ' '
358 END IF
359
360 !! 3. Allocate the global ri_rs_grid arrays
361 ALLOCATE (ri_rs_grid_points(3, total_grid_npts))
362
363 !! 4. Parallelize the grid generation loop
364 !$OMP PARALLEL DO DEFAULT(NONE) &
365 !$OMP SHARED(ri_rs_grid_points, ri_rs_grid_offsets, atom_to_kind, &
366 !$OMP particle_set, bs_env, natom) &
367 !$OMP PRIVATE(atom_idx, ikind, atomic_center, start_idx, end_idx) &
368 !$OMP SCHEDULE(DYNAMIC, 1)
369 DO atom_idx = 1, natom
370 ikind = atom_to_kind(atom_idx)
371 atomic_center(:) = particle_set(atom_idx)%r(:)
372
373 start_idx = ri_rs_grid_offsets(atom_idx)
374 end_idx = start_idx + bs_env%ri_rs%grid_cache(ikind)%npts - 1
375
376 !! Shift the cached origin grid by the atom's center
377 ri_rs_grid_points(1, start_idx:end_idx) = bs_env%ri_rs%grid_cache(ikind)%raw_points(1, :) + atomic_center(1)
378 ri_rs_grid_points(2, start_idx:end_idx) = bs_env%ri_rs%grid_cache(ikind)%raw_points(2, :) + atomic_center(2)
379 ri_rs_grid_points(3, start_idx:end_idx) = bs_env%ri_rs%grid_cache(ikind)%raw_points(3, :) + atomic_center(3)
380
381 END DO
382 !$OMP END PARALLEL DO
383
384 !! 5. Cleanup memory
385 IF (ALLOCATED(bs_env%ri_rs%grid_cache)) THEN
386 DO ikind = 1, nkind
387 IF (ALLOCATED(bs_env%ri_rs%grid_cache(ikind)%raw_points)) DEALLOCATE (bs_env%ri_rs%grid_cache(ikind)%raw_points)
388 END DO
389 DEALLOCATE (bs_env%ri_rs%grid_cache)
390 END IF
391
392 DEALLOCATE (atom_to_kind, ri_rs_grid_offsets)
393 CALL timestop(handle)
394
395 END SUBROUTINE ri_rs_grid_assembler
396
397! **************************************************************************************************
398!> \brief Reads grids from .ion files and stores them in memory based on grid_select
399!> \param bs_env ...
400!> \param atomic_kind_set ...
401! **************************************************************************************************
402
403 SUBROUTINE build_grid_cache(bs_env, atomic_kind_set)
404
405 TYPE(post_scf_bandstructure_type), POINTER :: bs_env
406 TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
407
408 CHARACTER(LEN=default_path_length) :: filename, full_path, line
409 CHARACTER(LEN=default_string_length) :: atom_sym, suffix
410 INTEGER :: colon_idx, i, ierr, ikind, iunit, nkind
411 REAL(kind=dp) :: pt(3)
412
413 ! Determine the file suffix based on the user's choice
414 IF (bs_env%ri_rs%grid_select == 1) THEN
415 suffix = "_def2-tzvp-rs.ion"
416 ELSE IF (bs_env%ri_rs%grid_select == 2) THEN
417 suffix = "_cc-pvtz-rs.ion"
418 ELSE
419 cpabort("Unknown grid_select value. Valid options are 1 (def2-TZVPP) or 2 (cc-pVTZ).")
420 END IF
421
422 nkind = SIZE(atomic_kind_set)
423 IF (.NOT. ALLOCATED(bs_env%ri_rs%grid_cache)) ALLOCATE (bs_env%ri_rs%grid_cache(nkind))
424
425 DO ikind = 1, nkind
426 atom_sym = trim(atomic_kind_set(ikind)%element_symbol)
427 filename = trim(atom_sym)//trim(suffix)
428
429 full_path = "ri_rs_grid/"//trim(filename)
430
431 CALL open_file(file_name=trim(full_path), unit_number=iunit, &
432 file_action="READ", file_status="OLD")
433
434 ! Parse the preamble for 'n points'
435 bs_env%ri_rs%grid_cache(ikind)%npts = 0
436 DO
437 READ (iunit, '(A)', iostat=ierr) line
438 IF (ierr /= 0) EXIT
439 IF (index(line, 'n points') > 0) THEN
440 colon_idx = index(line, ':')
441 READ (line(colon_idx + 1:), *) bs_env%ri_rs%grid_cache(ikind)%npts
442 EXIT
443 END IF
444 END DO
445
446 ! Allocate the cache array for this specific element
447 ALLOCATE (bs_env%ri_rs%grid_cache(ikind)%raw_points(3, bs_env%ri_rs%grid_cache(ikind)%npts))
448
449 rewind(iunit)
450 DO
451 READ (iunit, '(A)', iostat=ierr) line
452 IF (ierr /= 0) EXIT
453 IF (index(line, '<grid_points>') > 0) EXIT
454 END DO
455
456 ! Read the raw grid coordinates
457 DO i = 1, bs_env%ri_rs%grid_cache(ikind)%npts
458 READ (iunit, *, iostat=ierr) pt(1), pt(2), pt(3)
459 IF (ierr /= 0) THEN
460 cpabort("Unexpected EOF in grid file ")
461 END IF
462 bs_env%ri_rs%grid_cache(ikind)%raw_points(:, i) = pt(:)
463 END DO
464
465 CALL close_file(unit_number=iunit)
466 END DO
467
468 END SUBROUTINE build_grid_cache
469
470! **************************************************************************************************
471!> \brief Evaluates atomic basis functions on a real-space grid and builds a sparse DBCSR matrix.
472!> \param qs_env ...
473!> \param bs_env ...
474!> \param ri_rs_grid_points ...
475!> \param mat_phi_mu_l ...
476! **************************************************************************************************
477
478 SUBROUTINE atomic_basis_at_grid_point(qs_env, bs_env, ri_rs_grid_points, mat_phi_mu_l)
479
480 TYPE(qs_environment_type), POINTER :: qs_env
481 TYPE(post_scf_bandstructure_type), POINTER :: bs_env
482 REAL(kind=dp), ALLOCATABLE, INTENT(INOUT) :: ri_rs_grid_points(:, :)
483 TYPE(dbcsr_type), INTENT(OUT) :: mat_phi_mu_l
484
485 CHARACTER(LEN=*), PARAMETER :: routinen = 'atomic_basis_at_grid_point'
486
487 INTEGER :: c_size, chunk_size, dimen_orb, handle, i, i_blk, iatom, natom, npcol, nprow, &
488 num_grid_chunks, r_end, r_start, total_grid_npts
489 INTEGER, ALLOCATABLE, DIMENSION(:) :: first_sgf
490 INTEGER, DIMENSION(:), POINTER :: c_blk_sizes, col_dist, col_dist_ks, &
491 r_blk_sizes, row_dist, row_dist_ks
492 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :) :: atom_col_buffer
493 TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
494 TYPE(cell_type), POINTER :: cell
495 TYPE(dbcsr_distribution_type) :: dist
496 TYPE(dbcsr_distribution_type), POINTER :: dbcsr_dist_ks
497 TYPE(mp_para_env_type), POINTER :: para_env
498 TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
499 TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
500
501 CALL timeset(routinen, handle)
502
503 chunk_size = max_elements_per_block
504
505 ! Extract environment variables
506 CALL get_qs_env(qs_env, cell=cell, atomic_kind_set=atomic_kind_set, &
507 qs_kind_set=qs_kind_set, particle_set=particle_set, &
508 para_env=para_env)
509
510 natom = SIZE(particle_set)
511 total_grid_npts = SIZE(ri_rs_grid_points, 2)
512
513 ! Map the starting indices of spherical gaussian functions (SGF) for each atom
514 ALLOCATE (first_sgf(natom + 1))
515 CALL get_basis_offsets(particle_set, qs_kind_set, first_sgf, dimen_orb)
516
517 ! =========================================================================
518 ! 1. SETUP DBCSR MATRIX TOPOLOGY
519 ! =========================================================================
520
521 ! A. Define Column Block Sizes (1 Block = 1 Atom's full basis set)
522 ALLOCATE (c_blk_sizes(natom))
523 DO iatom = 1, natom
524 c_blk_sizes(iatom) = first_sgf(iatom + 1) - first_sgf(iatom)
525 END DO
526
527 ! B. Define Row Block Sizes (Grid chunks of max size 256)
528 num_grid_chunks = ceiling(real(total_grid_npts, kind=dp)/real(chunk_size, kind=dp))
529 ALLOCATE (r_blk_sizes(num_grid_chunks))
530 r_blk_sizes = chunk_size
531 IF (mod(total_grid_npts, chunk_size) /= 0) THEN
532 r_blk_sizes(num_grid_chunks) = mod(total_grid_npts, chunk_size)
533 END IF
534
535 ! C. Fetch CP2K's Default Process Grid Configuration
536 CALL get_qs_env(qs_env, dbcsr_dist=dbcsr_dist_ks)
537 CALL dbcsr_distribution_get(dbcsr_dist_ks, row_dist=row_dist_ks, col_dist=col_dist_ks)
538
539 ! D. Build Custom Mappings using Round-Robin across the 2D process grid
540 ! (MAXVAL + 1 accounts for 0-based process indexing in DBCSR)
541 nprow = maxval(row_dist_ks) + 1
542 npcol = maxval(col_dist_ks) + 1
543
544 ALLOCATE (row_dist(num_grid_chunks))
545 DO i = 1, num_grid_chunks
546 row_dist(i) = mod(i - 1, nprow)
547 END DO
548
549 ALLOCATE (col_dist(natom))
550 DO i = 1, natom
551 col_dist(i) = mod(i - 1, npcol)
552 END DO
553
554 ! E. Create the DBCSR Distribution and Initialize the Matrix
555 CALL dbcsr_distribution_new(dist, template=dbcsr_dist_ks, &
556 row_dist=row_dist, col_dist=col_dist)
557
558 CALL dbcsr_create(mat_phi_mu_l, name="phi_val_sparse", dist=dist, &
559 matrix_type=dbcsr_type_no_symmetry, &
560 row_blk_size=r_blk_sizes, col_blk_size=c_blk_sizes)
561
562 ! =========================================================================
563 ! 2. STREAM DATA DIRECTLY INTO SPARSE MATRIX
564 ! =========================================================================
565 ! Iterate over the atoms assigned to this specific MPI rank
566 DO iatom = para_env%mepos + 1, natom, para_env%num_pe
567
568 c_size = c_blk_sizes(iatom)
569
570 ! Allocate a temporary dense buffer just for this specific atom
571 ALLOCATE (atom_col_buffer(total_grid_npts, c_size))
572 atom_col_buffer = 0.0_dp
573
574 ! Evaluate the basis functions on the grid. Skip grid points outside
575 ! the spatial extent of the most diffuse AO Gaussian on iatom; beyond
576 ! that radius the contribution is guaranteed below eps_filter.
577 CALL fill_phi_for_atom(atom_col_buffer, ri_rs_grid_points, total_grid_npts, &
578 iatom, particle_set, qs_kind_set, cell, &
579 bs_env%ri_rs%radius_ao_per_atom(iatom)**2)
580
581 ! Slice the dense column into chunks and insert into DBCSR
582 DO i_blk = 1, num_grid_chunks
583 r_start = (i_blk - 1)*chunk_size + 1
584 r_end = min(i_blk*chunk_size, total_grid_npts)
585
586 ! Apply dynamic sparsity filtering: Only store blocks with physical significance
587 IF (maxval(abs(atom_col_buffer(r_start:r_end, 1:c_size))) > bs_env%eps_filter) THEN
588 CALL dbcsr_put_block(mat_phi_mu_l, row=i_blk, col=iatom, &
589 block=atom_col_buffer(r_start:r_end, 1:c_size))
590 END IF
591 END DO
592
593 DEALLOCATE (atom_col_buffer)
594
595 END DO
596
597 ! Finalize triggers internal MPI communication to route blocks to their correct 2D process owners
598 CALL dbcsr_finalize(mat_phi_mu_l)
599
600 ! -------------------------------------------------------------------------
601 ! CLEANUP
602 ! -------------------------------------------------------------------------
603 DEALLOCATE (first_sgf, r_blk_sizes, c_blk_sizes, row_dist, col_dist)
605
606 CALL timestop(handle)
607
608 END SUBROUTINE atomic_basis_at_grid_point
609
610! **************************************************************************************************
611!> \brief Compute value of all basis function for single atom across all grid points
612!> \param phi_val ...
613!> \param ri_rs_grid ...
614!> \param npts ...
615!> \param iatom ...
616!> \param particle_set ...
617!> \param qs_kind_set ...
618!> \param cell ...
619!> \param r2_threshold ...
620! **************************************************************************************************
621
622 SUBROUTINE fill_phi_for_atom(phi_val, ri_rs_grid, npts, iatom, &
623 particle_set, qs_kind_set, cell, r2_threshold)
624
625 REAL(kind=dp), INTENT(INOUT) :: phi_val(:, :)
626 INTEGER, INTENT(IN) :: npts
627 REAL(kind=dp), INTENT(IN) :: ri_rs_grid(3, npts)
628 INTEGER, INTENT(IN) :: iatom
629 TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
630 TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
631 TYPE(cell_type), POINTER :: cell
632 REAL(kind=dp), INTENT(IN) :: r2_threshold
633
634 INTEGER :: first_sgf, i_pt, ico, iend_co, ikind, &
635 ipgf, iset, isgf, ishell, istart_co, &
636 l, last_sgf, lx, ly, lz, n_cart_total, &
637 row_idx
638 REAL(kind=dp) :: alpha, dist_vec(3), exp_val, poly, r2, &
639 r_atom(3), weight
640 TYPE(gto_basis_set_type), POINTER :: orb_basis_set
641
642 ! Get Atom Info
643 ikind = particle_set(iatom)%atomic_kind%kind_number
644 CALL get_qs_kind(qs_kind_set(ikind), basis_set=orb_basis_set, basis_type="ORB")
645 IF (.NOT. ASSOCIATED(orb_basis_set)) RETURN
646
647 r_atom = particle_set(iatom)%r
648
649 !$OMP PARALLEL DO DEFAULT(NONE) &
650 !$OMP SHARED(phi_val, ri_rs_grid, npts, orb_basis_set, r_atom, cell, ncoset, indco, &
651 !$OMP r2_threshold) &
652 !$OMP PRIVATE(i_pt, dist_vec, r2, iset, n_cart_total, ishell, l, istart_co, iend_co, &
653 !$OMP first_sgf, last_sgf, ipgf, alpha, exp_val, isgf, ico, row_idx, weight, &
654 !$OMP lx, ly, lz, poly) &
655 !$OMP SCHEDULE(STATIC)
656 DO i_pt = 1, npts
657 dist_vec = pbc(ri_rs_grid(:, i_pt) - r_atom, cell)
658 r2 = dot_product(dist_vec, dist_vec)
659 IF (r2 > r2_threshold) cycle
660
661 DO iset = 1, orb_basis_set%nset
662 n_cart_total = ncoset(orb_basis_set%lmax(iset))
663
664 DO ishell = 1, orb_basis_set%nshell(iset)
665 l = orb_basis_set%l(ishell, iset)
666 istart_co = ncoset(l - 1) + 1
667 iend_co = ncoset(l)
668
669 first_sgf = orb_basis_set%first_sgf(ishell, iset)
670 last_sgf = orb_basis_set%last_sgf(ishell, iset)
671
672 DO ipgf = 1, orb_basis_set%npgf(iset)
673 alpha = orb_basis_set%zet(ipgf, iset)
674 exp_val = exp(-alpha*r2)
675
676 DO isgf = first_sgf, last_sgf
677 DO ico = istart_co, iend_co
678 row_idx = (ipgf - 1)*n_cart_total + ico
679 weight = orb_basis_set%sphi(row_idx, isgf)
680 lx = indco(1, ico)
681 ly = indco(2, ico)
682 lz = indco(3, ico)
683 poly = (dist_vec(1)**lx)*(dist_vec(2)**ly)*(dist_vec(3)**lz)
684
685 phi_val(i_pt, isgf) = phi_val(i_pt, isgf) + (weight*poly*exp_val)
686
687 END DO
688 END DO
689 END DO
690 END DO
691 END DO
692 END DO
693 !$OMP END PARALLEL DO
694
695 END SUBROUTINE fill_phi_for_atom
696
697! **************************************************************************************************
698!> \brief Helper for OMP threads to fill phi_val column values
699!> \param particle_set ...
700!> \param qs_kind_set ...
701!> \param first_sgf ...
702!> \param total_sgf ...
703! **************************************************************************************************
704
705 SUBROUTINE get_basis_offsets(particle_set, qs_kind_set, first_sgf, total_sgf)
706
707 TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
708 TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
709 INTEGER, INTENT(OUT) :: first_sgf(:), total_sgf
710
711 CHARACTER(LEN=*), PARAMETER :: routinen = 'get_basis_offsets'
712
713 INTEGER :: handle, iatom, ikind, nsgf
714
715 CALL timeset(routinen, handle)
716
717 total_sgf = 0
718 DO iatom = 1, SIZE(particle_set)
719 first_sgf(iatom) = total_sgf + 1
720 ikind = particle_set(iatom)%atomic_kind%kind_number
721 CALL get_qs_kind(qs_kind_set(ikind), nsgf=nsgf, basis_type="ORB")
722 total_sgf = total_sgf + nsgf
723 END DO
724 first_sgf(SIZE(particle_set) + 1) = total_sgf + 1
725
726 CALL timestop(handle)
727
728 END SUBROUTINE get_basis_offsets
729
730! **************************************************************************************************
731!> \brief Compute RI-RS Coefficients (Z_lP)
732!> \param qs_env ...
733!> \param bs_env ...
734!> \param ri_rs_grid_points ...
735!> \param mat_phi_mu_l ...
736!> \param mat_Z_lP ...
737! **************************************************************************************************
738
739 SUBROUTINE compute_coeff_z_lp(qs_env, bs_env, ri_rs_grid_points, mat_phi_mu_l, mat_Z_lP)
740
741 ! Arguments
742 TYPE(qs_environment_type), POINTER :: qs_env
743 TYPE(post_scf_bandstructure_type), POINTER :: bs_env
744 REAL(kind=dp), ALLOCATABLE, INTENT(INOUT) :: ri_rs_grid_points(:, :)
745 TYPE(dbcsr_type), INTENT(INOUT) :: mat_phi_mu_l
746 TYPE(dbcsr_type), INTENT(OUT) :: mat_z_lp
747
748 CHARACTER(LEN=*), PARAMETER :: key = 'PROPERTIES%BANDSTRUCTURE%GW%PRINT%RESTART', &
749 routinen = 'compute_coeff_Z_lP'
750
751 INTEGER :: atom_j_mepos, atom_j_stride, atom_p, atom_p_start, atom_p_stride, col_end, &
752 col_start, current_chunk_size, g, group_handle, handle, i, i_blk, ikind, info, j, j_ri, &
753 l, loc_idx, loc_ptr, max_ao_size, max_loc_ri, my_group, n_ao_total, n_grid_total, &
754 n_groups, n_loc_ri, n_local_grid, n_procs_per_atom, natom, nkind, num_grid_chunks, &
755 p_loop_atom, r_end, r_start, source_atom
756 INTEGER, ALLOCATABLE, DIMENSION(:) :: local_grid_idx, row_offset
757 INTEGER, DIMENSION(:), POINTER :: col_dist_phi, col_dist_ri, r_blk_sizes, &
758 ri_blk_sizes, row_dist_grid
759 REAL(kind=dp) :: cutoff_ri, d_sp, dist, r2_threshold, &
760 r_c, t1
761 REAL(kind=dp), ALLOCATABLE :: cutoff_ri_per_kind(:)
762 REAL(kind=dp), ALLOCATABLE, DIMENSION(:) :: cutoff_ri_per_atom, d_vec_local
763 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :) :: d_local, d_lp_local, phi_local, &
764 sphere_grid, z_blk
765 REAL(kind=dp), DIMENSION(3) :: pos_p
766 TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
767 TYPE(cell_type), POINTER :: cell
768 TYPE(cp_blacs_env_type), POINTER :: blacs_env_sub
769 TYPE(cp_fm_struct_type), POINTER :: fm_struct_b, fm_struct_d
770 TYPE(cp_fm_type) :: fm_b, fm_d
771 TYPE(cp_logger_type), POINTER :: logger
772 TYPE(dbcsr_distribution_type) :: dist_phi, dist_z
773 TYPE(gw_3c_ctx_type) :: ctx_3c
774 TYPE(mp_para_env_type), POINTER :: para_env, para_env_sub
775 TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
776 TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
777 TYPE(section_vals_type), POINTER :: input
778
779 CALL timeset(routinen, handle)
780
781 t1 = m_walltime()
782
783 CALL get_qs_env(qs_env, para_env=para_env, particle_set=particle_set, input=input, &
784 qs_kind_set=qs_kind_set, cell=cell, atomic_kind_set=atomic_kind_set)
785
786 ! ---------------------------------------------------------------------
787 ! Subgroup setup. Default G=1 keeps the single-rank BLAS path; G>1 splits
788 ! ranks into atom-groups so the Cholesky on D_local distributes across G
789 ! ranks (memory ~1/G) and the compute_d_lp build also splits across the
790 ! subgroup. G=1 leaves para_env_sub / blacs_env_sub NULL — no subgroup
791 ! comms created, atom_P loop uses per-rank round-robin, compute_d_lp runs
792 ! its full atom_j range on each rank, no allreduce.
793 ! ---------------------------------------------------------------------
794 n_procs_per_atom = min(bs_env%ri_rs%n_procs_per_atom_z_lp, para_env%num_pe)
795 IF (n_procs_per_atom < 1) n_procs_per_atom = 1
796
797 NULLIFY (para_env_sub, blacs_env_sub)
798 IF (n_procs_per_atom > 1) THEN
799 n_groups = para_env%num_pe/n_procs_per_atom
800 my_group = min(para_env%mepos/n_procs_per_atom, n_groups - 1)
801 ALLOCATE (para_env_sub)
802 CALL para_env_sub%from_split(para_env, my_group)
803 CALL cp_blacs_env_create(blacs_env=blacs_env_sub, para_env=para_env_sub)
804 atom_p_start = my_group + 1
805 atom_p_stride = n_groups
806 atom_j_mepos = para_env_sub%mepos
807 atom_j_stride = para_env_sub%num_pe
808 ELSE
809 atom_p_start = para_env%mepos + 1
810 atom_p_stride = para_env%num_pe
811 atom_j_mepos = 0
812 atom_j_stride = 1
813 END IF
814
815 natom = SIZE(bs_env%i_RI_start_from_atom)
816 n_ao_total = bs_env%i_ao_end_from_atom(natom)
817 n_grid_total = SIZE(ri_rs_grid_points, 2)
818
819 ! =========================================================================
820 ! 1. SETUP DBCSR TOPOLOGY & EXACT OFFSETS
821 ! =========================================================================
822 CALL dbcsr_get_info(mat_phi_mu_l, row_blk_size=r_blk_sizes, distribution=dist_phi)
823 CALL dbcsr_distribution_get(dist_phi, row_dist=row_dist_grid, col_dist=col_dist_phi, group=group_handle)
824
825 num_grid_chunks = SIZE(r_blk_sizes)
826
827 ALLOCATE (row_offset(num_grid_chunks))
828 row_offset(1) = 0
829 DO i_blk = 2, num_grid_chunks
830 row_offset(i_blk) = row_offset(i_blk - 1) + r_blk_sizes(i_blk - 1)
831 END DO
832
833 ALLOCATE (ri_blk_sizes(natom), col_dist_ri(natom))
834 DO atom_p = 1, natom
835 ri_blk_sizes(atom_p) = bs_env%i_RI_end_from_atom(atom_p) - bs_env%i_RI_start_from_atom(atom_p) + 1
836 col_dist_ri(atom_p) = mod(atom_p - 1, maxval(col_dist_phi) + 1)
837 END DO
838
839 CALL dbcsr_distribution_new(dist_z, template=dist_phi, row_dist=row_dist_grid, col_dist=col_dist_ri)
840
841 IF (bs_env%ri_rs%Z_lP_exists) THEN
842 CALL dbcsr_binary_read(filepath=trim(bs_env%prefix)//"Z_lP.matrix", &
843 distribution=dist_z, &
844 matrix_new=mat_z_lp)
845 IF (bs_env%unit_nr > 0) THEN
846 WRITE (bs_env%unit_nr, '(T2,A,T57,A,F7.1,A)') &
847 'Read Z_lP from file ', ' Execution time', m_walltime() - t1, ' s'
848 WRITE (bs_env%unit_nr, '(A)') ' '
849 END IF
850 ELSE
851
852 CALL dbcsr_create(mat_z_lp, name="mat_Z_lP", dist=dist_z, &
853 matrix_type=dbcsr_type_no_symmetry, &
854 row_blk_size=r_blk_sizes, col_blk_size=ri_blk_sizes)
855
856 max_ao_size = 0
857 DO j = 1, SIZE(bs_env%i_ao_start_from_atom)
858 max_ao_size = max(max_ao_size, bs_env%i_ao_end_from_atom(j) - bs_env%i_ao_start_from_atom(j) + 1)
859 END DO
860 max_loc_ri = maxval(ri_blk_sizes)
861
862 ! Per-atom RI-RS integration sphere:
863 ! cutoff_ri(P) = r_c + r_AO(P)
864 ! where r_c is the truncated-Coulomb cutoff of the RI metric. The
865 ! CUTOFF_RADIUS_RI_RS keyword (when > 0) overrides the entire cutoff calculation.
866 nkind = SIZE(atomic_kind_set)
867 ALLOCATE (cutoff_ri_per_atom(natom))
868
869 IF (bs_env%ri_rs%cutoff_radius_ri_rs > 0.0_dp) THEN
870 cutoff_ri_per_atom(:) = bs_env%ri_rs%cutoff_radius_ri_rs
871 ELSE
872 r_c = bs_env%ri_metric%cutoff_radius
873 DO p_loop_atom = 1, natom
874 cutoff_ri_per_atom(p_loop_atom) = &
875 r_c + bs_env%ri_rs%radius_ao_per_atom(p_loop_atom)
876 END DO
877 END IF
878
879 ALLOCATE (cutoff_ri_per_kind(nkind))
880 cutoff_ri_per_kind(:) = 0.0_dp
881 IF (bs_env%unit_nr > 0) THEN
882 WRITE (bs_env%unit_nr, '(T2,A)') 'Per-kind maximum RI-RS sphere cutoff (Bohr):'
883 WRITE (bs_env%unit_nr, '(T4,A4,A14)') 'Kind', 'max cutoff_ri'
884 ! Walk atoms once; accumulate per-kind max using atomic_kind index
885 DO p_loop_atom = 1, natom
886 ikind = particle_set(p_loop_atom)%atomic_kind%kind_number
887 cutoff_ri_per_kind(ikind) = max(cutoff_ri_per_kind(ikind), &
888 cutoff_ri_per_atom(p_loop_atom))
889 END DO
890 DO ikind = 1, nkind
891 WRITE (bs_env%unit_nr, '(T4,A4,F14.4)') &
892 atomic_kind_set(ikind)%element_symbol, &
893 cutoff_ri_per_kind(ikind)
894 END DO
895 WRITE (bs_env%unit_nr, '(A)') ' '
896 END IF
897 DEALLOCATE (cutoff_ri_per_kind)
898
899 ! Build the shared 3c-integral context once, before the atom_P loop and outside
900 ! any OMP region. This hoists out the libint / t_c_g0 / md_ftable inits and the
901 ! contracted sphi tables, which would otherwise re-run on every per-block call.
902 ! gw_3c_ctx_create is MPI-collective (truncated-Coulomb init reads a file + bcasts).
903 CALL gw_3c_ctx_create(ctx_3c, qs_env, bs_env%ri_metric, &
904 basis_j=bs_env%basis_set_AO, basis_k=bs_env%basis_set_AO, &
905 basis_i=bs_env%basis_set_RI)
906
907 ! =========================================================================
908 ! 2. MPI LOOP OVER ATOMS (Fully independent, no MPI barriers inside)
909 ! This distributes the N_atoms among the available MPI processors.
910 ! phi_local for each atom_P's cutoff sphere is built on the fly via
911 ! fill_phi_for_atom — no dense replicated phi_global, no allreduce.
912 ! =========================================================================
913 DO atom_p = atom_p_start, natom, atom_p_stride
914
915 n_loc_ri = ri_blk_sizes(atom_p)
916 pos_p(:) = particle_set(atom_p)%r(:)
917
918 cutoff_ri = cutoff_ri_per_atom(atom_p)
919
920 ! ---------------------------------------------------------------------
921 ! A. Determine Local Grid Domain based on cutoff_ri
922 ! ---------------------------------------------------------------------
923 n_local_grid = 0
924 DO l = 1, n_grid_total
925 dist = sqrt(sum((ri_rs_grid_points(1:3, l) - pos_p(1:3))**2))
926 IF (dist <= cutoff_ri) n_local_grid = n_local_grid + 1
927 END DO
928
929 ALLOCATE (local_grid_idx(n_local_grid))
930
931 n_local_grid = 0
932 DO l = 1, n_grid_total
933 dist = sqrt(sum((ri_rs_grid_points(1:3, l) - pos_p(1:3))**2))
934 IF (dist <= cutoff_ri) THEN
935 n_local_grid = n_local_grid + 1
936 local_grid_idx(n_local_grid) = l
937 END IF
938 END DO
939
940 ! ---------------------------------------------------------------------
941 ! B. Build phi_local on the fly via fill_phi_for_atom (no replication)
942 ! ---------------------------------------------------------------------
943 ! Copy the local sphere coordinates into a contiguous buffer for
944 ! fill_phi_for_atom. Only source atoms whose AO basis can reach the
945 ! sphere of atom_P contribute; the others are pruned by the inequality
946 ! using the per-atom radii precomputed in precompute_ri_rs_radii.
947 ALLOCATE (sphere_grid(3, n_local_grid))
948 DO loc_idx = 1, n_local_grid
949 sphere_grid(:, loc_idx) = ri_rs_grid_points(:, local_grid_idx(loc_idx))
950 END DO
951
952 ALLOCATE (phi_local(n_local_grid, n_ao_total))
953 phi_local = 0.0_dp
954
955 DO source_atom = 1, natom
956 d_sp = norm2(particle_set(source_atom)%r(:) - pos_p(:))
957 IF (d_sp > bs_env%ri_rs%radius_ao_per_atom(source_atom) + cutoff_ri) cycle
958
959 col_start = bs_env%i_ao_start_from_atom(source_atom)
960 col_end = bs_env%i_ao_end_from_atom(source_atom)
961 r2_threshold = bs_env%ri_rs%radius_ao_per_atom(source_atom)**2
962
963 CALL fill_phi_for_atom(phi_local(:, col_start:col_end), sphere_grid, &
964 n_local_grid, source_atom, particle_set, qs_kind_set, &
965 cell, r2_threshold)
966 END DO
967
968 DEALLOCATE (sphere_grid)
969
970 ! ---------------------------------------------------------------------
971 ! C. Build Local RHS Matrix (d_lp_local)
972 ! Done first so the subgroup-distributed compute_d_lp + allreduce
973 ! is not entangled with the LHS build. compute_d_lp does not
974 ! depend on D_local or d_vec_local.
975 ! ---------------------------------------------------------------------
976 ALLOCATE (d_lp_local(n_local_grid, n_loc_ri))
977 d_lp_local = 0.0_dp
978
979 CALL compute_d_lp(bs_env, ctx_3c, phi_local, d_lp_local, n_local_grid, &
980 n_loc_ri, atom_p, max_ao_size, atom_j_mepos, atom_j_stride)
981
982 ! Reduce per-subgroup-rank partials into the replicated d_lp_local.
983 ! Skipped for G=1 (BLAS path): each rank has the full sum locally.
984 IF (n_procs_per_atom > 1) THEN
985 CALL para_env_sub%sum(d_lp_local)
986 END IF
987
988 ! ---------------------------------------------------------------------
989 ! D. Build d_vec_local (Jacobi diagonal) + LHS — BLAS or ScaLAPACK
990 ! ---------------------------------------------------------------------
991 ALLOCATE (d_vec_local(n_local_grid))
992
993 IF (n_procs_per_atom == 1) THEN
994 ! BLAS path: build D_local densely, compute d_vec as side-effect
995 ! of the Jacobi step (preserves bit-identical arithmetic with the
996 ! previous branch).
997 ALLOCATE (d_local(n_local_grid, n_local_grid))
998 d_local = 0.0_dp
999
1000 CALL dsyrk("L", "N", n_local_grid, n_ao_total, 1.0_dp, phi_local, &
1001 n_local_grid, 0.0_dp, d_local, n_local_grid)
1002
1003 !$OMP PARALLEL DO DEFAULT(NONE) &
1004 !$OMP SHARED(n_local_grid, D_local, d_vec_local, bs_env) &
1005 !$OMP PRIVATE(i) &
1006 !$OMP SCHEDULE(STATIC)
1007 DO i = 1, n_local_grid
1008 d_local(i, i) = d_local(i, i)**2
1009 d_vec_local(i) = 1.0_dp/sqrt(max(d_local(i, i), 1.0e-16_dp))
1010 d_local(i, i) = (d_local(i, i)*d_vec_local(i)**2) + bs_env%ri_rs%tikhonov
1011 END DO
1012 !$OMP END PARALLEL DO
1013
1014 !$OMP PARALLEL DO DEFAULT(NONE) &
1015 !$OMP SHARED(n_local_grid, D_local, d_vec_local) &
1016 !$OMP PRIVATE(j, i) &
1017 !$OMP SCHEDULE(DYNAMIC)
1018 DO j = 1, n_local_grid
1019 DO i = j + 1, n_local_grid
1020 d_local(i, j) = d_local(i, j)**2
1021 d_local(i, j) = d_local(i, j)*d_vec_local(i)*d_vec_local(j)
1022 d_local(j, i) = d_local(i, j)
1023 END DO
1024 END DO
1025 !$OMP END PARALLEL DO
1026 ELSE
1027 ! ScaLAPACK path: d_vec computed directly from phi (= 1/||phi_i||^2);
1028 ! solve_D_lp_distributed builds D block-cyclic internally with
1029 ! the squared+scaled values, so no dense D_local on this rank.
1030 !$OMP PARALLEL DO DEFAULT(NONE) &
1031 !$OMP SHARED(n_local_grid, n_ao_total, phi_local, d_vec_local) &
1032 !$OMP PRIVATE(i, j) &
1033 !$OMP SCHEDULE(STATIC)
1034 DO i = 1, n_local_grid
1035 d_vec_local(i) = 0.0_dp
1036 DO j = 1, n_ao_total
1037 d_vec_local(i) = d_vec_local(i) + phi_local(i, j)*phi_local(i, j)
1038 END DO
1039 d_vec_local(i) = 1.0_dp/max(d_vec_local(i), 1.0e-16_dp)
1040 END DO
1041 !$OMP END PARALLEL DO
1042 END IF
1043
1044 ! ---------------------------------------------------------------------
1045 ! E. Pre-scale d_lp by d_vec
1046 ! ---------------------------------------------------------------------
1047 !$OMP PARALLEL DO DEFAULT(NONE) &
1048 !$OMP SHARED(n_loc_ri, n_local_grid, d_lp_local, d_vec_local) &
1049 !$OMP PRIVATE(j_ri, i) &
1050 !$OMP SCHEDULE(STATIC)
1051 DO j_ri = 1, n_loc_ri
1052 DO i = 1, n_local_grid
1053 d_lp_local(i, j_ri) = d_lp_local(i, j_ri)*d_vec_local(i)
1054 END DO
1055 END DO
1056 !$OMP END PARALLEL DO
1057
1058 ! ---------------------------------------------------------------------
1059 ! F. Solve — BLAS dpotrf/dpotrs or ScaLAPACK pdpotrf/pdpotrs
1060 ! ---------------------------------------------------------------------
1061 IF (n_procs_per_atom == 1) THEN
1062 CALL dpotrf('L', n_local_grid, d_local, n_local_grid, info)
1063 CALL dpotrs('L', n_local_grid, n_loc_ri, d_local, n_local_grid, &
1064 d_lp_local, n_local_grid, info)
1065 DEALLOCATE (d_local)
1066 ELSE
1067 CALL solve_d_lp_distributed(phi_local, d_vec_local, d_lp_local, &
1068 n_local_grid, n_ao_total, n_loc_ri, &
1069 bs_env%ri_rs%tikhonov, &
1070 para_env_sub, blacs_env_sub, &
1071 fm_struct_d, fm_struct_b, fm_d, fm_b, info)
1072 END IF
1073
1074 ! ---------------------------------------------------------------------
1075 ! G. Post-scale solution by d_vec (common to both paths)
1076 ! ---------------------------------------------------------------------
1077 !$OMP PARALLEL DO DEFAULT(NONE) &
1078 !$OMP SHARED(n_loc_ri, n_local_grid, d_lp_local, d_vec_local) &
1079 !$OMP PRIVATE(j_ri, i) &
1080 !$OMP SCHEDULE(STATIC)
1081 DO j_ri = 1, n_loc_ri
1082 DO i = 1, n_local_grid
1083 d_lp_local(i, j_ri) = d_lp_local(i, j_ri)*d_vec_local(i)
1084 END DO
1085 END DO
1086 !$OMP END PARALLEL DO
1087
1088 ! ---------------------------------------------------------------------
1089 ! H. Scatter Local Solution Back to Global DBCSR Matrix
1090 ! local_grid_idx is ascending (built by the ordered scan above), so a
1091 ! single walking pointer over it sweeps the chunks in one pass.
1092 ! Under ScaLAPACK (G>1) the d_lp_local solution is identical on all
1093 ! G subgroup ranks (gathered via cp_fm_get_submatrix); only the
1094 ! subgroup root writes to mat_Z_lP so each atom column is emitted
1095 ! exactly once. DBCSR routes blocks to their global owner on finalize.
1096 ! ---------------------------------------------------------------------
1097 IF (n_procs_per_atom == 1 .OR. para_env_sub%mepos == 0) THEN
1098 ALLOCATE (z_blk(maxval(r_blk_sizes), n_loc_ri))
1099 loc_ptr = 1
1100
1101 DO i_blk = 1, num_grid_chunks
1102 r_start = row_offset(i_blk) + 1
1103 r_end = row_offset(i_blk) + r_blk_sizes(i_blk)
1104 current_chunk_size = r_blk_sizes(i_blk)
1105
1106 z_blk = 0.0_dp
1107
1108 DO WHILE (loc_ptr <= n_local_grid)
1109 g = local_grid_idx(loc_ptr)
1110 IF (g > r_end) EXIT
1111 z_blk(g - r_start + 1, 1:n_loc_ri) = d_lp_local(loc_ptr, 1:n_loc_ri)
1112 loc_ptr = loc_ptr + 1
1113 END DO
1114
1115 IF (maxval(abs(z_blk(1:current_chunk_size, 1:n_loc_ri))) > bs_env%eps_filter) THEN
1116 CALL dbcsr_put_block(mat_z_lp, row=i_blk, col=atom_p, &
1117 block=z_blk(1:current_chunk_size, 1:n_loc_ri))
1118 END IF
1119 END DO
1120
1121 DEALLOCATE (z_blk)
1122 END IF
1123
1124 DEALLOCATE (d_vec_local, d_lp_local)
1125 DEALLOCATE (local_grid_idx, phi_local)
1126
1127 END DO
1128
1129 DEALLOCATE (cutoff_ri_per_atom)
1130
1131 CALL gw_3c_ctx_release(ctx_3c)
1132
1133 CALL dbcsr_finalize(mat_z_lp)
1134
1135 IF (bs_env%unit_nr > 0) THEN
1136 WRITE (bs_env%unit_nr, '(T2,A,T57,A,F7.1,A)') &
1137 'Computed Z_lP ', ' Execution time', m_walltime() - t1, ' s'
1138 WRITE (bs_env%unit_nr, '(A)') ' '
1139 END IF
1140
1141 logger => cp_get_default_logger()
1142
1143 IF (btest(cp_print_key_should_output(logger%iter_info, input, key), cp_p_file)) THEN
1144 CALL dbcsr_binary_write(matrix=mat_z_lp, filepath=trim(bs_env%prefix)//"Z_lP.matrix")
1145 END IF
1146
1147 END IF
1148
1149 DEALLOCATE (row_offset, ri_blk_sizes, col_dist_ri)
1150 CALL dbcsr_distribution_release(dist_z)
1151
1152 IF (n_procs_per_atom > 1) THEN
1153 CALL cp_blacs_env_release(blacs_env_sub)
1154 CALL para_env_sub%free()
1155 DEALLOCATE (para_env_sub)
1156 END IF
1157
1158 DEALLOCATE (ri_rs_grid_points)
1159
1160 CALL timestop(handle)
1161
1162 END SUBROUTINE compute_coeff_z_lp
1163
1164! **************************************************************************************************
1165!> \brief Computes the dense localized RHS d_lp(l,P) = Σ_{μν} Φ_μ(r_l)·Φ_ν(r_l)·(μν|P) for one
1166!> RI atom P, OMP-threaded over (atom_j, atom_k) AO-pair blocks: per thread, build the 3c
1167!> block, then contract grid-chunked pair densities into a private d_lp partial; partials
1168!> are reduced into d_lp at the end.
1169!> Pair screening is handled inside build_3c_integral_block_ctx via the `screened` output,
1170!> \param bs_env ...
1171!> \param ctx shared 3c-integral context (gw_3c_ctx_create)
1172!> \param phi_val Φ_μ(r_l) on the local-sphere grid (n_grid_total × n_ao)
1173!> \param d_lp output (n_grid_total × n_loc_ri), zeroed by the caller, accumulated here
1174!> \param n_grid_total number of local-sphere grid rows
1175!> \param n_loc_ri number of RI functions of atom_P
1176!> \param atom_P ...
1177!> \param max_ao_size ...
1178!> \param atom_j_mepos ...
1179!> \param atom_j_stride ...
1180! **************************************************************************************************
1181
1182 SUBROUTINE compute_d_lp(bs_env, ctx, phi_val, d_lp, n_grid_total, n_loc_ri, atom_P, &
1183 max_ao_size, atom_j_mepos, atom_j_stride)
1184
1185 TYPE(post_scf_bandstructure_type), POINTER :: bs_env
1186 TYPE(gw_3c_ctx_type), INTENT(IN) :: ctx
1187 REAL(kind=dp), DIMENSION(:, :), INTENT(IN) :: phi_val
1188 INTEGER, INTENT(IN) :: n_grid_total, n_loc_ri
1189 REAL(kind=dp), INTENT(INOUT) :: d_lp(n_grid_total, n_loc_ri)
1190 INTEGER, INTENT(IN) :: atom_p, max_ao_size, atom_j_mepos, &
1191 atom_j_stride
1192
1193 CHARACTER(LEN=*), PARAMETER :: routinen = 'compute_d_lp'
1194 INTEGER, PARAMETER :: grid_chunk = 1024
1195
1196 INTEGER :: atom_j, atom_k, c, handle, j, jk_idx, &
1197 jsize, jstart, k, ksize, kstart, l, &
1198 l0, ri
1199 LOGICAL :: screened
1200 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :) :: int_2d_prv, rho_chunk
1201 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :) :: int_3c_prv
1202 TYPE(gw_3c_ws_type) :: ws
1203
1204 CALL timeset(routinen, handle)
1205
1206 !$OMP PARALLEL DEFAULT(NONE) &
1207 !$OMP SHARED(bs_env, ctx, phi_val, d_lp, n_grid_total, n_loc_ri, atom_P, max_ao_size, &
1208 !$OMP atom_j_mepos, atom_j_stride) &
1209 !$OMP PRIVATE(atom_j, atom_k, c, j, jk_idx, jsize, jstart, k, ksize, kstart, l, l0, ri, &
1210 !$OMP screened, int_2d_prv, rho_chunk, int_3c_prv, ws)
1211
1212 CALL gw_3c_ws_create(ws, ctx)
1213 ALLOCATE (int_3c_prv(max_ao_size, max_ao_size, n_loc_ri))
1214 ALLOCATE (int_2d_prv(max_ao_size*max_ao_size, n_loc_ri))
1215 ALLOCATE (rho_chunk(grid_chunk, max_ao_size*max_ao_size))
1216
1217 ! MPI-stride atom_j over the subgroup (atom_j_stride = 1 for the BLAS
1218 ! path, > 1 for the ScaLAPACK path). The OMP DO parallelizes the inner
1219 ! atom_k while the outer atom_j carries the MPI stride. COLLAPSE(2) is
1220 ! dropped because the outer stride is non-unit under ScaLAPACK; for
1221 ! typical natom the inner loop has plenty of work for DYNAMIC.
1222 !$OMP DO SCHEDULE(DYNAMIC) REDUCTION(+:d_lp)
1223 DO atom_j = atom_j_mepos + 1, SIZE(bs_env%i_ao_start_from_atom), atom_j_stride
1224 DO atom_k = 1, SIZE(bs_env%i_ao_start_from_atom)
1225 jstart = bs_env%i_ao_start_from_atom(atom_j)
1226 jsize = bs_env%i_ao_end_from_atom(atom_j) - jstart + 1
1227 kstart = bs_env%i_ao_start_from_atom(atom_k)
1228 ksize = bs_env%i_ao_end_from_atom(atom_k) - kstart + 1
1229
1230 int_3c_prv(1:jsize, 1:ksize, 1:n_loc_ri) = 0.0_dp
1231
1232 ! Compute B_{μν,P} = (μν|P); ctx-internal triangle-inequality screening on
1233 ! kind_radius sets `screened=.TRUE.` for negligible triples.
1234 CALL build_3c_integral_block_ctx(int_3c_prv(1:jsize, 1:ksize, 1:n_loc_ri), &
1235 ctx, ws, atom_j=atom_j, atom_k=atom_k, atom_i=atom_p, &
1236 screened=screened)
1237
1238 IF (screened) cycle
1239
1240 ! Flatten 3D B_{μν, P} tensor to 2D B_{(μν), P} matrix for BLAS
1241 DO ri = 1, n_loc_ri
1242 DO k = 1, ksize
1243 DO j = 1, jsize
1244 jk_idx = (k - 1)*jsize + j
1245 int_2d_prv(jk_idx, ri) = int_3c_prv(j, k, ri)
1246 END DO
1247 END DO
1248 END DO
1249
1250 ! Pair density ρ(l, μν) = Φ_μ(r_l) Φ_ν(r_l) in grid chunks, contracted on the fly:
1251 ! d_{l,P} += ρ(l, μν) B_{(μν),P} (dgemm runs serially inside the parallel region)
1252 DO l0 = 1, n_grid_total, grid_chunk
1253 c = min(grid_chunk, n_grid_total - l0 + 1)
1254 DO k = 1, ksize
1255 DO j = 1, jsize
1256 jk_idx = (k - 1)*jsize + j
1257 DO l = 1, c
1258 rho_chunk(l, jk_idx) = phi_val(l0 + l - 1, jstart + j - 1)* &
1259 phi_val(l0 + l - 1, kstart + k - 1)
1260 END DO
1261 END DO
1262 END DO
1263 CALL dgemm("N", "N", c, n_loc_ri, jsize*ksize, &
1264 1.0_dp, rho_chunk, grid_chunk, &
1265 int_2d_prv, max_ao_size*max_ao_size, &
1266 1.0_dp, d_lp(l0, 1), n_grid_total)
1267 END DO
1268 END DO
1269 END DO
1270 !$OMP END DO
1271
1272 DEALLOCATE (int_3c_prv, int_2d_prv, rho_chunk)
1273 CALL gw_3c_ws_release(ws)
1274
1275 !$OMP END PARALLEL
1276
1277 CALL timestop(handle)
1278
1279 END SUBROUTINE compute_d_lp
1280
1281! **************************************************************************************************
1282!> \brief Distributed pdpotrf/pdpotrs solve of D x = b for one atom of the
1283!> RI-RS Z_lP build. Called when N_PROCS_PER_ATOM_Z_LP > 1, with a
1284!> subgroup of cooperating ranks and an associated BLACS context.
1285!> Each rank in the subgroup holds the (replicated) phi_local and the
1286!> (replicated) RHS d_lp; it builds its own block-cyclic slice of the
1287!> squared+Jacobi-scaled Gram matrix D via tiled DGEMM, factorizes via
1288!> cp_fm_cholesky_decompose (UPLO='U'), and solves with
1289!> cp_fm_cholesky_solve. The replicated d_lp is updated in place via
1290!> cp_fm_get_submatrix.
1291!> \param phi_local replicated (n_loc x n_ao) AO values on the sphere grid
1292!> \param d_vec replicated Jacobi diagonal = 1 / ||phi_i||^2
1293!> \param d_lp replicated RHS in/out (n_loc x n_rhs); on input pre-scaled
1294!> by d_vec, on output also pre-scaled (caller post-scales)
1295!> \param n_loc number of sphere-local grid points
1296!> \param n_ao number of AO basis functions
1297!> \param n_rhs number of RI functions of this atom_P (RHS columns)
1298!> \param tikhonov regularisation added to the diagonal
1299!> \param para_env_sub sub-communicator of the atom-group
1300!> \param blacs_env_sub BLACS context on the sub-communicator
1301!> \param fm_struct_D ...
1302!> \param fm_struct_b ...
1303!> \param fm_D ...
1304!> \param fm_b ...
1305!> \param info 0 on success, non-zero if pdpotrf or pdpotrs failed
1306! **************************************************************************************************
1307 SUBROUTINE solve_d_lp_distributed(phi_local, d_vec, d_lp, n_loc, n_ao, n_rhs, &
1308 tikhonov, para_env_sub, blacs_env_sub, &
1309 fm_struct_D, fm_struct_b, fm_D, fm_b, info)
1310
1311 REAL(kind=dp), DIMENSION(:, :), INTENT(IN) :: phi_local
1312 REAL(kind=dp), DIMENSION(:), INTENT(IN) :: d_vec
1313 REAL(kind=dp), DIMENSION(:, :), INTENT(INOUT) :: d_lp
1314 INTEGER, INTENT(IN) :: n_loc, n_ao, n_rhs
1315 REAL(kind=dp), INTENT(IN) :: tikhonov
1316 TYPE(mp_para_env_type), POINTER :: para_env_sub
1317 TYPE(cp_blacs_env_type), POINTER :: blacs_env_sub
1318 TYPE(cp_fm_struct_type), POINTER :: fm_struct_d, fm_struct_b
1319 TYPE(cp_fm_type), INTENT(INOUT) :: fm_d, fm_b
1320 INTEGER, INTENT(OUT) :: info
1321
1322 CHARACTER(LEN=*), PARAMETER :: routinen = 'solve_D_lp_distributed'
1323
1324 INTEGER :: handle, i_loc, ig, j_loc, jg, &
1325 ncol_local, nrow_local
1326 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1327 REAL(kind=dp), CONTIGUOUS, DIMENSION(:, :), &
1328 POINTER :: local_data
1329
1330 CALL timeset(routinen, handle)
1331 info = 0
1332
1333 NULLIFY (fm_struct_d, fm_struct_b)
1334 CALL cp_fm_struct_create(fm_struct_d, para_env=para_env_sub, &
1335 context=blacs_env_sub, &
1336 nrow_global=n_loc, ncol_global=n_loc)
1337 CALL cp_fm_struct_create(fm_struct_b, para_env=para_env_sub, &
1338 context=blacs_env_sub, &
1339 nrow_global=n_loc, ncol_global=n_rhs)
1340 CALL cp_fm_create(fm_d, fm_struct_d)
1341 CALL cp_fm_create(fm_b, fm_struct_b)
1342
1343 ! ---- Build the local block-cyclic slice of fm_D --------------------
1344 ! Tiled DGEMM build: for each (row_tile x col_tile) sub-block we
1345 ! gather small phi strips, DGEMM into a small gram tile, then write
1346 ! the squared & d_vec-scaled result directly into fm_D%local_data.
1347 ! This avoids materialising the full (nrow_local x n_ao),
1348 ! (n_ao x ncol_local), and (nrow_local x ncol_local) buffers.
1349 CALL cp_fm_get_info(fm_d, nrow_local=nrow_local, ncol_local=ncol_local, &
1350 row_indices=row_indices, col_indices=col_indices, &
1351 local_data=local_data)
1352
1353 IF (nrow_local > 0 .AND. ncol_local > 0) THEN
1354 block
1355 INTEGER, PARAMETER :: ntile = 1024
1356 INTEGER :: ib, ie, jb, je, mb, kb, ti, tj
1357 REAL(kind=dp), ALLOCATABLE :: gram_t(:, :), phi_cols_t(:, :), phi_rows_t(:, :)
1358 ALLOCATE (phi_rows_t(ntile, n_ao), phi_cols_t(n_ao, ntile), gram_t(ntile, ntile))
1359 DO ib = 1, nrow_local, ntile
1360 ie = min(ib + ntile - 1, nrow_local)
1361 mb = ie - ib + 1
1362 !$OMP PARALLEL DO DEFAULT(NONE) &
1363 !$OMP SHARED(mb, n_ao, phi_rows_t, phi_local, row_indices, ib) &
1364 !$OMP PRIVATE(ti, j_loc) SCHEDULE(STATIC)
1365 DO j_loc = 1, n_ao
1366 DO ti = 1, mb
1367 phi_rows_t(ti, j_loc) = phi_local(row_indices(ib + ti - 1), j_loc)
1368 END DO
1369 END DO
1370 !$OMP END PARALLEL DO
1371 DO jb = 1, ncol_local, ntile
1372 je = min(jb + ntile - 1, ncol_local)
1373 kb = je - jb + 1
1374 !$OMP PARALLEL DO DEFAULT(NONE) &
1375 !$OMP SHARED(kb, n_ao, phi_cols_t, phi_local, col_indices, jb) &
1376 !$OMP PRIVATE(tj, i_loc) SCHEDULE(STATIC)
1377 DO tj = 1, kb
1378 DO i_loc = 1, n_ao
1379 phi_cols_t(i_loc, tj) = phi_local(col_indices(jb + tj - 1), i_loc)
1380 END DO
1381 END DO
1382 !$OMP END PARALLEL DO
1383 CALL dgemm('N', 'N', mb, kb, n_ao, &
1384 1.0_dp, phi_rows_t, ntile, phi_cols_t, n_ao, &
1385 0.0_dp, gram_t, ntile)
1386 !$OMP PARALLEL DO DEFAULT(NONE) &
1387 !$OMP SHARED(mb, kb, gram_t, d_vec, row_indices, col_indices, ib, jb) &
1388 !$OMP SHARED(local_data, tikhonov) &
1389 !$OMP PRIVATE(ti, tj, ig, jg) SCHEDULE(STATIC)
1390 DO tj = 1, kb
1391 jg = col_indices(jb + tj - 1)
1392 DO ti = 1, mb
1393 ig = row_indices(ib + ti - 1)
1394 local_data(ib + ti - 1, jb + tj - 1) = &
1395 gram_t(ti, tj)*gram_t(ti, tj)*d_vec(ig)*d_vec(jg)
1396 IF (ig == jg) THEN
1397 local_data(ib + ti - 1, jb + tj - 1) = &
1398 local_data(ib + ti - 1, jb + tj - 1) + tikhonov
1399 END IF
1400 END DO
1401 END DO
1402 !$OMP END PARALLEL DO
1403 END DO
1404 END DO
1405 DEALLOCATE (phi_rows_t, phi_cols_t, gram_t)
1406 END block
1407 END IF
1408
1409 ! ---- Load the replicated d_lp into the block-cyclic fm_b -----------
1410 CALL cp_fm_set_submatrix(fm_b, d_lp)
1411
1412 ! ---- pdpotrf (Cholesky factorisation; cp_fm_cholesky_decompose
1413 ! factors with UPLO='U', so pdpotrs must match)
1414 CALL cp_fm_cholesky_decompose(fm_d, n=n_loc, info_out=info)
1415 IF (info /= 0) THEN
1416 cpabort("pdpotrf failed in solve_D_lp_distributed")
1417 END IF
1418
1419 ! ---- pdpotrs/dpotrs (solve in place on fm_b) -----------------------
1420 CALL cp_fm_cholesky_solve(fm_d, fm_b, n=n_loc, info_out=info)
1421 IF (info /= 0) THEN
1422 cpabort("pdpotrs failed in solve_D_lp_distributed")
1423 END IF
1424
1425 ! ---- Gather distributed solution back into the replicated d_lp ----
1426 CALL cp_fm_get_submatrix(fm_b, d_lp)
1427
1428 CALL cp_fm_release(fm_d)
1429 CALL cp_fm_release(fm_b)
1430 CALL cp_fm_struct_release(fm_struct_d)
1431 CALL cp_fm_struct_release(fm_struct_b)
1432
1433 CALL timestop(handle)
1434
1435 END SUBROUTINE solve_d_lp_distributed
1436
1437! **************************************************************************************************
1438!> \brief Computes the χ(iτ) matrix
1439!> \param bs_env ...
1440!> \param mat_chi_Gamma_tau ...
1441!> \param mat_phi_mu_l ...
1442!> \param mat_Z_lP ...
1443! **************************************************************************************************
1444
1445 SUBROUTINE get_mat_chi_gamma_tau(bs_env, mat_chi_Gamma_tau, mat_phi_mu_l, mat_Z_lP)
1446
1447 TYPE(post_scf_bandstructure_type), POINTER :: bs_env
1448 TYPE(dbcsr_p_type), DIMENSION(:), POINTER :: mat_chi_gamma_tau
1449 TYPE(dbcsr_type), INTENT(INOUT) :: mat_phi_mu_l, mat_z_lp
1450
1451 CHARACTER(LEN=*), PARAMETER :: routinen = 'get_mat_chi_Gamma_tau'
1452
1453 INTEGER :: handle, i, i_t, ispin, npcol
1454 INTEGER, DIMENSION(:), POINTER :: blk_ao, blk_grid, dist_col_ao, &
1455 dist_col_grid, dist_row_grid
1456 REAL(kind=dp) :: t1, tau
1457 TYPE(dbcsr_distribution_type) :: dist_grid_grid, dist_phi
1458 TYPE(dbcsr_type) :: matrix_chi_grid, matrix_chi_grid_spin, &
1459 matrix_g_occ_grid, matrix_g_vir_grid
1460
1461 CALL timeset(routinen, handle)
1462
1463 ! =========================================================================
1464 ! 1. SETUP CORE TOPOLOGIES
1465 ! =========================================================================
1466 CALL dbcsr_get_info(mat_phi_mu_l, distribution=dist_phi, row_blk_size=blk_grid, col_blk_size=blk_ao)
1467 CALL dbcsr_distribution_get(dist_phi, row_dist=dist_row_grid, col_dist=dist_col_ao)
1468
1469 ! Determine number of MPI process columns
1470 npcol = maxval(dist_col_ao) + 1
1471
1472 ! Build a perfectly safe column distribution for the Grid dimension
1473 ALLOCATE (dist_col_grid(SIZE(blk_grid)))
1474 DO i = 1, SIZE(blk_grid)
1475 dist_col_grid(i) = mod(i - 1, npcol)
1476 END DO
1477
1478 CALL dbcsr_distribution_new(dist_grid_grid, template=dist_phi, &
1479 row_dist=dist_row_grid, col_dist=dist_col_grid)
1480
1481 CALL dbcsr_create(matrix_g_occ_grid, "G_occ_grid", dist_grid_grid, dbcsr_type_no_symmetry, blk_grid, blk_grid)
1482 CALL dbcsr_create(matrix_g_vir_grid, "G_vir_grid", dist_grid_grid, dbcsr_type_no_symmetry, blk_grid, blk_grid)
1483 CALL dbcsr_create(matrix_chi_grid, "chi_grid", dist_grid_grid, dbcsr_type_no_symmetry, blk_grid, blk_grid)
1484 CALL dbcsr_create(matrix_chi_grid_spin, "chi_grid_spin", dist_grid_grid, dbcsr_type_no_symmetry, blk_grid, blk_grid)
1485
1486 ! =========================================================================
1487 ! 2. MAIN IMAGINARY TIME LOOP
1488 ! =========================================================================
1489 DO i_t = 1, bs_env%num_time_freq_points
1490 t1 = m_walltime()
1491
1492 tau = bs_env%imag_time_points(i_t)
1493 CALL dbcsr_set(matrix_chi_grid, 0.0_dp)
1494
1495 ! ----------------------------------------------------------------------
1496 ! A. SPIN LOOP (Allocations safely encapsulated in wrappers)
1497 ! ----------------------------------------------------------------------
1498 DO ispin = 1, bs_env%n_spin
1499
1500 ! G^occ_µλ(i|τ|) = sum_n^occ C_µn e^(-|(ϵ_n-ϵ_F)τ|) C_λn
1501 ! G^occ_ll'(i|τ|) = sum_µν Φ_µ(r_l) G^occ_µν Φ_ν(r_l')
1502 CALL build_g_grid(bs_env, tau, ispin, .true., .false., mat_phi_mu_l, &
1503 matrix_g_occ_grid, bs_env%eps_filter)
1504
1505 ! G^vir_µλ(i|τ|) = sum_n^vir C_µn e^(-|(ϵ_n-ϵ_F)τ|) C_λn
1506 ! G^vir_ll'(i|τ|) = sum_µν Φ_µ(r_l) G^vir_µν Φ_ν(r_l')
1507 CALL build_g_grid(bs_env, tau, ispin, .false., .true., mat_phi_mu_l, &
1508 matrix_g_vir_grid, bs_env%eps_filter)
1509
1510 ! -------------------------------------------------------------------
1511 ! B. ELEMENT-WISE HADAMARD PRODUCT
1512 ! -------------------------------------------------------------------
1513 ! χ_ll'(iτ) = G^occ_ll'(i|τ|) * G^vir_ll'(i|τ|)
1514 CALL hadamard_product(matrix_g_occ_grid, matrix_g_vir_grid, matrix_chi_grid_spin, bs_env%spin_degeneracy)
1515
1516 ! Accumulate spin contributions
1517 CALL dbcsr_add(matrix_chi_grid, matrix_chi_grid_spin, 1.0_dp, 1.0_dp)
1518
1519 END DO ! ispin
1520
1521 ! ----------------------------------------------------------------------
1522 ! C. TRANSFORM TO AUXILIARY BASIS & EXPORT DIRECTLY
1523 ! χ_aux = Z^T * χ_grid * Z
1524 ! χ_PQ(iτ) = sum_ll' Z_lP χ_ll'(iτ) Z_l'Q
1525 ! Result is dumped into the final array mat_chi_Gamma_tau!
1526 ! ----------------------------------------------------------------------
1527 CALL contract_a_b_a("T", "N", mat_z_lp, matrix_chi_grid, &
1528 mat_chi_gamma_tau(i_t)%matrix, bs_env%eps_filter)
1529
1530 IF (bs_env%unit_nr > 0) THEN
1531 WRITE (bs_env%unit_nr, '(T2,A,I13,A,I3,A,F7.1,A)') &
1532 χτ'Computed (i,k=0) for time point', i_t, ' /', bs_env%num_time_freq_points, &
1533 ', Execution time', m_walltime() - t1, ' s'
1534 END IF
1535
1536 END DO ! i_t
1537
1538 ! =========================================================================
1539 ! 3. FINAL CLEANUP
1540 ! =========================================================================
1541 CALL dbcsr_release(matrix_g_occ_grid)
1542 CALL dbcsr_release(matrix_g_vir_grid)
1543 CALL dbcsr_release(matrix_chi_grid)
1544 CALL dbcsr_release(matrix_chi_grid_spin)
1545 CALL dbcsr_distribution_release(dist_grid_grid)
1546 DEALLOCATE (dist_col_grid)
1547
1548 IF (bs_env%unit_nr > 0) WRITE (bs_env%unit_nr, '(A)') ' '
1549
1550 CALL timestop(handle)
1551
1552 END SUBROUTINE get_mat_chi_gamma_tau
1553
1554! **************************************************************************************************
1555!> \brief Computes Green's Function in grid basis
1556!> \param bs_env ...
1557!> \param tau ...
1558!> \param ispin ...
1559!> \param occ ...
1560!> \param vir ...
1561!> \param mat_phi_mu_l ...
1562!> \param matrix_G_grid ...
1563!> \param eps_filter ...
1564! **************************************************************************************************
1565
1566 SUBROUTINE build_g_grid(bs_env, tau, ispin, occ, vir, mat_phi_mu_l, matrix_G_grid, eps_filter)
1567
1568 TYPE(post_scf_bandstructure_type), POINTER :: bs_env
1569 REAL(kind=dp), INTENT(IN) :: tau
1570 INTEGER, INTENT(IN) :: ispin
1571 LOGICAL, INTENT(IN) :: occ, vir
1572 TYPE(dbcsr_type), INTENT(INOUT) :: mat_phi_mu_l, matrix_g_grid
1573 REAL(kind=dp), INTENT(IN) :: eps_filter
1574
1575 CHARACTER(LEN=*), PARAMETER :: routinen = 'build_G_grid'
1576
1577 INTEGER :: handle
1578 INTEGER, DIMENSION(:), POINTER :: blk_ao, dist_row_ao
1579 TYPE(cp_fm_type), POINTER :: fm_g
1580 TYPE(dbcsr_distribution_type) :: dist_ao_ao
1581 TYPE(dbcsr_type) :: matrix_g_ao
1582
1583 CALL timeset(routinen, handle)
1584
1585 ! 1. Select the FM matrix based on occ/vir flags
1586 IF (occ) THEN
1587 fm_g => bs_env%fm_Gocc
1588 ELSE
1589 fm_g => bs_env%fm_Gvir
1590 END IF
1591
1592 ! 2. Compute Dense FM Green's Function
1593 ! G^occ/vir_µλ(i|τ|) = sum_G^occ/vir_µλn^occ/vir C_µn e^(-|(ϵ_n-ϵ_F)τ|) C_λn
1594 CALL g_occ_vir(bs_env, tau, fm_g, ispin, occ=occ, vir=vir)
1595
1596 ! 3. Setup AO DBCSR Topology and Create Matrix dynamically
1597 CALL setup_square_topology(mat_phi_mu_l, 'COL', dist_ao_ao, blk_ao, dist_row_ao)
1598
1599 CALL dbcsr_create(matrix_g_ao, name="G_ao", dist=dist_ao_ao, &
1600 matrix_type=dbcsr_type_no_symmetry, &
1601 row_blk_size=blk_ao, col_blk_size=blk_ao)
1602
1603 ! 4. Convert FM to Sparse DBCSR
1604 CALL copy_fm_to_dbcsr(fm_g, matrix_g_ao, keep_sparsity=.false.)
1605
1606 ! 5. Transform to Grid Basis: G_grid = phi * G_ao * phi^T
1607 ! G^occ/vir_ll'(i|τ|) = sum_µν Φ_µ(r_l) G^occ/vir_µν Φ_ν(r_l')
1608 CALL contract_a_b_a("N", "T", mat_phi_mu_l, matrix_g_ao, matrix_g_grid, eps_filter)
1609
1610 ! 6. Release AO matrix and topology
1611 CALL release_dbcsr_topology_and_matrices(dist=dist_ao_ao, mapped_dist=dist_row_ao, m1=matrix_g_ao)
1612
1613 CALL timestop(handle)
1614
1615 END SUBROUTINE build_g_grid
1616
1617! **************************************************************************************************
1618!> \brief Generalized routine to compute OUT = A * B * A^T OR OUT = A^T * B * A using DBCSR
1619!> \param transA_left ...
1620!> \param transA_right ...
1621!> \param matrix_A ...
1622!> \param matrix_B ...
1623!> \param matrix_out ...
1624!> \param eps_filter ...
1625! **************************************************************************************************
1626
1627 SUBROUTINE contract_a_b_a(transA_left, transA_right, matrix_A, matrix_B, matrix_out, eps_filter)
1628
1629 CHARACTER(LEN=1), INTENT(IN) :: transa_left, transa_right
1630 TYPE(dbcsr_type), INTENT(INOUT) :: matrix_a, matrix_b, matrix_out
1631 REAL(kind=dp), INTENT(IN) :: eps_filter
1632
1633 CHARACTER(LEN=*), PARAMETER :: routinen = 'contract_A_B_A'
1634
1635 INTEGER :: handle
1636 TYPE(dbcsr_type) :: matrix_tmp
1637
1638 CALL timeset(routinen, handle)
1639
1640 CALL dbcsr_create(matrix_tmp, template=matrix_a)
1641
1642 IF (transa_left == "N" .AND. transa_right == "T") THEN
1643 ! Path 1: Out = A * B * A^T
1644 CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_a, matrix_b, &
1645 0.0_dp, matrix_tmp, filter_eps=eps_filter)
1646 CALL dbcsr_multiply("N", "T", 1.0_dp, matrix_tmp, matrix_a, &
1647 0.0_dp, matrix_out, filter_eps=eps_filter)
1648
1649 ELSE IF (transa_left == "T" .AND. transa_right == "N") THEN
1650 ! Path 2: Out = A^T * B * A
1651 CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_b, matrix_a, &
1652 0.0_dp, matrix_tmp, filter_eps=eps_filter)
1653 CALL dbcsr_multiply("T", "N", 1.0_dp, matrix_a, matrix_tmp, &
1654 0.0_dp, matrix_out, filter_eps=eps_filter)
1655 ELSE
1656 cpabort("Unsupported transposition pair in contract_A_B_A")
1657 END IF
1658
1659 CALL dbcsr_release(matrix_tmp)
1660
1661 CALL timestop(handle)
1662
1663 END SUBROUTINE contract_a_b_a
1664
1665! **************************************************************************************************
1666!> \brief Computes C = A ◦ B (Element-wise Hadamard product) for sparse DBCSR matrices.
1667!> \param matrix_A ...
1668!> \param matrix_B ...
1669!> \param matrix_C ...
1670!> \param fac (Scaling factor applied to the product)
1671! **************************************************************************************************
1672
1673 SUBROUTINE hadamard_product(matrix_A, matrix_B, matrix_C, fac)
1674
1675 TYPE(dbcsr_type), INTENT(INOUT) :: matrix_a, matrix_b, matrix_c
1676 REAL(kind=dp), INTENT(IN) :: fac
1677
1678 CHARACTER(LEN=*), PARAMETER :: routinen = 'hadamard_product'
1679
1680 INTEGER :: col, handle, row
1681 LOGICAL :: found
1682 REAL(kind=dp), DIMENSION(:, :), POINTER :: blk_b, blk_c
1683 TYPE(dbcsr_iterator_type) :: iter
1684
1685 CALL timeset(routinen, handle)
1686
1687 CALL dbcsr_copy(matrix_c, matrix_a)
1688
1689 CALL dbcsr_iterator_start(iter, matrix_c)
1690 DO WHILE (dbcsr_iterator_blocks_left(iter))
1691 CALL dbcsr_iterator_next_block(iter, row, col, blk_c)
1692
1693 CALL dbcsr_get_block_p(matrix_b, row, col, blk_b, found)
1694
1695 IF (found) THEN
1696 blk_c(:, :) = fac*blk_c(:, :)*blk_b(:, :)
1697 ELSE
1698 ! If B is sparse here, the product is zero
1699 blk_c(:, :) = 0.0_dp
1700 END IF
1701 END DO
1702 CALL dbcsr_iterator_stop(iter)
1703
1704 CALL timestop(handle)
1705
1706 END SUBROUTINE hadamard_product
1707
1708! **************************************************************************************************
1709!> \brief Compute screened Coulomb interaction matrix
1710!> \param bs_env ...
1711!> \param qs_env ...
1712!> \param mat_chi_Gamma_tau ...
1713!> \param fm_W_time ...
1714! **************************************************************************************************
1715
1716 SUBROUTINE compute_w(bs_env, qs_env, mat_chi_Gamma_tau, fm_W_time)
1717 TYPE(post_scf_bandstructure_type), POINTER :: bs_env
1718 TYPE(qs_environment_type), POINTER :: qs_env
1719 TYPE(dbcsr_p_type), DIMENSION(:), POINTER :: mat_chi_gamma_tau
1720 TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:) :: fm_w_time
1721
1722 CHARACTER(LEN=*), PARAMETER :: routinen = 'compute_W'
1723
1724 INTEGER :: handle, i_t, j_w
1725 REAL(kind=dp) :: t1
1726 TYPE(cp_fm_type) :: fm_m_inv_v_sqrt, fm_v, fm_v_sqrt
1727
1728 CALL timeset(routinen, handle)
1729
1730 t1 = m_walltime()
1731
1732 CALL create_fm_w_mic_time(bs_env, fm_w_time)
1733
1734 ! 1. Allocate V and M matrices
1735 CALL cp_fm_create(fm_v, bs_env%fm_RI_RI%matrix_struct)
1736 CALL cp_fm_create(fm_v_sqrt, bs_env%fm_RI_RI%matrix_struct)
1737 CALL cp_fm_create(fm_m_inv_v_sqrt, bs_env%fm_RI_RI%matrix_struct)
1738
1739 ! Compute V and M^-1 * V^0.5
1740 CALL compute_v_minvvsqrt(bs_env, qs_env, fm_v, fm_v_sqrt, fm_m_inv_v_sqrt)
1741
1742 ! 2. Loop over frequencies
1743 DO j_w = 1, bs_env%num_time_freq_points
1744 ! Fourier transformation of χ_PQ(iτ) to χ_PQ(iω_j)
1745 CALL compute_fm_chi_gamma_freq(bs_env, bs_env%fm_chi_Gamma_freq, j_w, mat_chi_gamma_tau)
1746
1747 ! ε(iω_j) = Id - V^0.5*M^-1*χ(iω_j)*M^-1*V^0.5
1748 ! W(iω_j) = V^0.5*(ε^-1(iω_j)-Id)*V^0.5
1749 CALL compute_fm_w_freq(bs_env, bs_env%fm_chi_Gamma_freq, fm_v_sqrt, &
1750 fm_m_inv_v_sqrt, bs_env%fm_W_MIC_freq)
1751
1752 ! Fourier transform from W_PQ^MIC(iω_j) to W_PQ^MIC(iτ)
1753 CALL fourier_transform_w_to_t(bs_env, fm_w_time, bs_env%fm_W_MIC_freq, j_w)
1754 END DO
1755
1756 ! M^-1*W^MIC(iτ)*M^-1
1757 CALL multiply_fm_w_mic_time_with_minv_gamma(bs_env, qs_env, fm_w_time)
1758
1759 IF (bs_env%unit_nr > 0) THEN
1760 WRITE (bs_env%unit_nr, '(T2,A,T55,A,F10.1,A)') &
1761 τ'Computed W(i),', ' Execution time', m_walltime() - t1, ' s'
1762 END IF
1763
1764 CALL dbcsr_deallocate_matrix_set(mat_chi_gamma_tau)
1765
1766 ! Cleanup
1767 CALL cp_fm_release(fm_v)
1768 CALL cp_fm_release(fm_v_sqrt)
1769 CALL cp_fm_release(fm_m_inv_v_sqrt)
1770
1771 ! Marek : Fourier transform W^MIC(itau) back to get it at a specific im.frequency point - iomega = 0
1772 IF (bs_env%rtp_method == rtp_method_bse) THEN
1773 t1 = m_walltime()
1774 CALL cp_fm_create(bs_env%fm_W_MIC_freq_zero, bs_env%fm_W_MIC_freq%matrix_struct)
1775 ! Set to zero
1776 CALL cp_fm_set_all(bs_env%fm_W_MIC_freq_zero, 0.0_dp)
1777 ! Sum over all times
1778 DO i_t = 1, bs_env%num_time_freq_points
1779 ! Add the relevant structure with correct weight
1780 CALL cp_fm_scale_and_add(1.0_dp, bs_env%fm_W_MIC_freq_zero, &
1781 bs_env%imag_time_weights_freq_zero(i_t), fm_w_time(i_t))
1782 END DO
1783 ! Done, save to file
1784 CALL fm_write(bs_env%fm_W_MIC_freq_zero, 0, "W_freq_rtp", qs_env)
1785 ! Report calculation
1786 IF (bs_env%unit_nr > 0) THEN
1787 WRITE (bs_env%unit_nr, '(T2,A,T55,A,F10.1,A)') &
1788 'Computed W(0),', ' Execution time', m_walltime() - t1, ' s'
1789 END IF
1790 END IF
1791
1792 IF (bs_env%unit_nr > 0) WRITE (bs_env%unit_nr, '(A)') ' '
1793
1794 CALL timestop(handle)
1795
1796 END SUBROUTINE compute_w
1797
1798! **************************************************************************************************
1799!> \brief Computes V, V^0.5, and M^-1 * V^0.5
1800!> \param bs_env ...
1801!> \param qs_env ...
1802!> \param fm_V ...
1803!> \param fm_V_sqrt ...
1804!> \param fm_Minv_Vsqrt ...
1805! **************************************************************************************************
1806
1807 SUBROUTINE compute_v_minvvsqrt(bs_env, qs_env, fm_V, fm_V_sqrt, fm_Minv_Vsqrt)
1808 TYPE(post_scf_bandstructure_type), POINTER :: bs_env
1809 TYPE(qs_environment_type), POINTER :: qs_env
1810 TYPE(cp_fm_type), INTENT(INOUT) :: fm_v, fm_v_sqrt, fm_minv_vsqrt
1811
1812 CHARACTER(LEN=*), PARAMETER :: routinen = 'compute_V_MinvVsqrt'
1813
1814 INTEGER :: handle, info, n_ri, ndep
1815 TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
1816 TYPE(cell_type), POINTER :: cell
1817 TYPE(cp_fm_type) :: fm_work
1818 TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:, :) :: fm_m
1819 TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: mat_v_kp
1820 TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
1821 TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
1822
1823 CALL timeset(routinen, handle)
1824
1825 n_ri = bs_env%n_RI
1826 CALL cp_fm_create(fm_work, fm_v%matrix_struct)
1827
1828 ! -----------------------------------------------------------------------
1829 ! 1. Build Coulomb Matrix V(k=0) using the kp-routine but only for ikp=1
1830 ! -----------------------------------------------------------------------
1831 CALL get_qs_env(qs_env=qs_env, particle_set=particle_set, cell=cell, &
1832 qs_kind_set=qs_kind_set, atomic_kind_set=atomic_kind_set)
1833
1834 ALLOCATE (mat_v_kp(1:1, 1:2))
1835 NULLIFY (mat_v_kp(1, 1)%matrix, mat_v_kp(1, 2)%matrix)
1836 ALLOCATE (mat_v_kp(1, 1)%matrix, mat_v_kp(1, 2)%matrix)
1837
1838 CALL dbcsr_create(mat_v_kp(1, 1)%matrix, template=bs_env%mat_RI_RI%matrix)
1839 CALL dbcsr_reserve_all_blocks(mat_v_kp(1, 1)%matrix)
1840 CALL dbcsr_set(mat_v_kp(1, 1)%matrix, 0.0_dp)
1841
1842 ! Dummy imaginary part just to satisfy the routine
1843 CALL dbcsr_create(mat_v_kp(1, 2)%matrix, template=bs_env%mat_RI_RI%matrix)
1844 CALL dbcsr_reserve_all_blocks(mat_v_kp(1, 2)%matrix)
1845 CALL dbcsr_set(mat_v_kp(1, 2)%matrix, 0.0_dp)
1846
1847 bs_env%kpoints_chi_eps_W%nkp_grid = bs_env%nkp_grid_chi_eps_W_orig
1848
1849 CALL build_2c_coulomb_matrix_kp(mat_v_kp, bs_env%kpoints_chi_eps_W, "RI_AUX", cell, &
1850 particle_set, qs_kind_set, atomic_kind_set, &
1851 bs_env%size_lattice_sum_V, operator_coulomb, 1, 1)
1852
1853 ! Copy real part to fm_V
1854 CALL copy_dbcsr_to_fm(mat_v_kp(1, 1)%matrix, fm_v)
1855
1856 CALL dbcsr_deallocate_matrix(mat_v_kp(1, 1)%matrix)
1857 CALL dbcsr_deallocate_matrix(mat_v_kp(1, 2)%matrix)
1858 DEALLOCATE (mat_v_kp)
1859
1860 ! -----------------------------------------------------------------------
1861 ! 2. Get RI-Metric Matrix M(k=0)
1862 ! -----------------------------------------------------------------------
1863 CALL ri_2c_integral_mat(qs_env, fm_m, fm_v, n_ri, bs_env%ri_metric, &
1864 do_kpoints=.false., regularization_ri=bs_env%regularization_RI)
1865
1866 ! -----------------------------------------------------------------------
1867 ! 3. M -> M^-1
1868 ! -----------------------------------------------------------------------
1869 CALL cp_fm_cholesky_decompose(fm_m(1, 1), info_out=info)
1870 IF (info == 0) THEN
1871 CALL cp_fm_cholesky_invert(fm_m(1, 1))
1872 CALL cp_fm_uplo_to_full(fm_m(1, 1), fm_work)
1873 ELSE
1874 ! Fallback if Cholesky fails due to conditioning
1875 CALL cp_fm_power(fm_m(1, 1), fm_work, -1.0_dp, bs_env%eps_eigval_mat_RI, ndep)
1876 CALL cp_fm_to_fm(fm_work, fm_m(1, 1))
1877 END IF
1878
1879 ! -----------------------------------------------------------------------
1880 ! 4. V -> V^0.5
1881 ! -----------------------------------------------------------------------
1882 CALL cp_fm_to_fm(fm_v, fm_v_sqrt)
1883 CALL cp_fm_cholesky_decompose(fm_v_sqrt, info_out=info)
1884 IF (info == 0) THEN
1885 CALL clean_lower_part(fm_v_sqrt)
1886 ELSE
1887 CALL cp_fm_power(fm_v, fm_v_sqrt, 0.5_dp, bs_env%eps_eigval_mat_RI, ndep)
1888 END IF
1889
1890 ! -----------------------------------------------------------------------
1891 ! 5. M^-1 * V^0.5
1892 ! -----------------------------------------------------------------------
1893 CALL parallel_gemm("N", "T", n_ri, n_ri, n_ri, 1.0_dp, fm_m(1, 1), fm_v_sqrt, &
1894 0.0_dp, fm_minv_vsqrt)
1895
1896 CALL cp_fm_release(fm_m)
1897 CALL cp_fm_release(fm_work)
1898
1899 CALL timestop(handle)
1900
1901 END SUBROUTINE compute_v_minvvsqrt
1902
1903! **************************************************************************************************
1904!> \brief Computes W(iω) from χ_PQ(iω_j)
1905!> \param bs_env ...
1906!> \param fm_chi_freq_j ...
1907!> \param fm_V_sqrt ...
1908!> \param fm_Minv_Vsqrt ...
1909!> \param fm_W_freq_j ...
1910! **************************************************************************************************
1911
1912 SUBROUTINE compute_fm_w_freq(bs_env, fm_chi_freq_j, fm_V_sqrt, fm_Minv_Vsqrt, fm_W_freq_j)
1913 TYPE(post_scf_bandstructure_type), POINTER :: bs_env
1914 TYPE(cp_fm_type), INTENT(IN) :: fm_chi_freq_j, fm_v_sqrt, fm_minv_vsqrt
1915 TYPE(cp_fm_type), INTENT(INOUT) :: fm_w_freq_j
1916
1917 CHARACTER(LEN=*), PARAMETER :: routinen = 'compute_fm_W_freq'
1918
1919 INTEGER :: handle, info, n_ri, ndep
1920 TYPE(cp_fm_type) :: fm_eps_freq_j, fm_work
1921
1922 CALL timeset(routinen, handle)
1923
1924 n_ri = bs_env%n_RI
1925
1926 CALL cp_fm_create(fm_eps_freq_j, fm_chi_freq_j%matrix_struct)
1927 CALL cp_fm_create(fm_work, fm_chi_freq_j%matrix_struct)
1928
1929 ! -----------------------------------------------------------------------
1930 ! 1. ε(iω_j) = Id - (M^-1 * V^0.5)^T * χ(iω_j) * (M^-1 * V^0.5)
1931 ! -----------------------------------------------------------------------
1932 ! work = χ(iω_j) * (M^-1 * V^0.5)
1933 CALL parallel_gemm('N', 'N', n_ri, n_ri, n_ri, 1.0_dp, &
1934 fm_chi_freq_j, fm_minv_vsqrt, 0.0_dp, fm_work)
1935
1936 ! eps_work = (M^-1 * V^0.5)^T * work
1937 CALL parallel_gemm('T', 'N', n_ri, n_ri, n_ri, 1.0_dp, &
1938 fm_minv_vsqrt, fm_work, 0.0_dp, fm_eps_freq_j)
1939
1940 ! ε(iω_j) = Id - eps_work --> -eps_work + Id
1941 CALL fm_add_on_diag(fm_eps_freq_j, 1.0_dp)
1942
1943 ! Force perfect symmetry before Cholesky to avoid info != 0 due to GEMM noise
1944 CALL cp_fm_uplo_to_full(fm_eps_freq_j, fm_work)
1945
1946 ! -----------------------------------------------------------------------
1947 ! 2. W(iω_j) = V^0.5^T * (ε^-1(iω_j) - Id) * V^0.5
1948 ! -----------------------------------------------------------------------
1949
1950 ! a) Cholesky decomposition of ε(iω_j)
1951 CALL cp_fm_cholesky_decompose(fm_eps_freq_j, info_out=info)
1952
1953 ! b) Inversion
1954 IF (info == 0) THEN
1955 CALL cp_fm_cholesky_invert(fm_eps_freq_j)
1956 CALL cp_fm_uplo_to_full(fm_eps_freq_j, fm_work)
1957 ELSE
1958 ! Fallback to expensive diagonalization if Cholesky fails
1959 CALL cp_fm_power(fm_eps_freq_j, fm_work, -1.0_dp, bs_env%eps_eigval_mat_RI, ndep)
1960 CALL cp_fm_to_fm(fm_work, fm_eps_freq_j)
1961 END IF
1962
1963 ! c) ε^-1(iω_j) - Id
1964 CALL fm_add_on_diag(fm_eps_freq_j, -1.0_dp)
1965
1966 ! d) work = (ε^-1(iω_j) - Id) * V^0.5
1967 CALL parallel_gemm('N', 'N', n_ri, n_ri, n_ri, 1.0_dp, fm_eps_freq_j, fm_v_sqrt, &
1968 0.0_dp, fm_work)
1969
1970 ! e) W(iw) = V^0.5^T * work
1971 CALL parallel_gemm('T', 'N', n_ri, n_ri, n_ri, 1.0_dp, fm_v_sqrt, fm_work, &
1972 0.0_dp, fm_w_freq_j)
1973
1974 ! Cleanup
1975 CALL cp_fm_release(fm_work)
1976 CALL cp_fm_release(fm_eps_freq_j)
1977
1978 CALL timestop(handle)
1979
1980 END SUBROUTINE compute_fm_w_freq
1981
1982! **************************************************************************************************
1983!> \brief Adds a real scalar value to the diagonal of a real full matrix (fm)
1984!> \param fm ...
1985!> \param alpha ...
1986! **************************************************************************************************
1987
1988 SUBROUTINE fm_add_on_diag(fm, alpha)
1989 TYPE(cp_fm_type), INTENT(INOUT) :: fm
1990 REAL(kind=dp), INTENT(IN) :: alpha
1991
1992 CHARACTER(LEN=*), PARAMETER :: routinen = 'fm_add_on_diag'
1993
1994 INTEGER :: handle, i_global, i_row, j_col, &
1995 j_global, ncol_local, nrow_local
1996 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1997
1998 CALL timeset(routinen, handle)
1999
2000 CALL cp_fm_get_info(matrix=fm, &
2001 nrow_local=nrow_local, &
2002 ncol_local=ncol_local, &
2003 row_indices=row_indices, &
2004 col_indices=col_indices)
2005
2006 DO j_col = 1, ncol_local
2007 j_global = col_indices(j_col)
2008 DO i_row = 1, nrow_local
2009 i_global = row_indices(i_row)
2010 IF (j_global == i_global) THEN
2011 fm%local_data(i_row, j_col) = fm%local_data(i_row, j_col) + alpha
2012 END IF
2013 END DO
2014 END DO
2015
2016 CALL timestop(handle)
2017
2018 END SUBROUTINE fm_add_on_diag
2019
2020! **************************************************************************************************
2021!> \brief Zeroes out the strictly lower triangular part of a real matrix
2022!> \param fm_mat ...
2023! **************************************************************************************************
2024 SUBROUTINE clean_lower_part(fm_mat)
2025 TYPE(cp_fm_type) :: fm_mat
2026
2027 CHARACTER(LEN=*), PARAMETER :: routinen = 'clean_lower_part'
2028
2029 INTEGER :: handle, i_row, j_col, j_global, &
2030 ncol_local, nrow_local
2031 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
2032
2033 CALL timeset(routinen, handle)
2034
2035 CALL cp_fm_get_info(matrix=fm_mat, &
2036 nrow_local=nrow_local, ncol_local=ncol_local, &
2037 row_indices=row_indices, col_indices=col_indices)
2038
2039 DO j_col = 1, ncol_local
2040 j_global = col_indices(j_col)
2041 DO i_row = 1, nrow_local
2042 IF (j_global < row_indices(i_row)) fm_mat%local_data(i_row, j_col) = 0.0_dp
2043 END DO
2044 END DO
2045
2046 CALL timestop(handle)
2047
2048 END SUBROUTINE clean_lower_part
2049
2050! **************************************************************************************************
2051!> \brief Computes the exact exchange part of the GW self-energy
2052!> \param bs_env ...
2053!> \param qs_env ...
2054!> \param mat_phi_mu_l ...
2055!> \param mat_Z_lP ...
2056!> \param fm_Sigma_x_Gamma ...
2057! **************************************************************************************************
2058
2059 SUBROUTINE compute_sigma_x(bs_env, qs_env, mat_phi_mu_l, mat_Z_lP, fm_Sigma_x_Gamma)
2060
2061 TYPE(post_scf_bandstructure_type), POINTER :: bs_env
2062 TYPE(qs_environment_type), POINTER :: qs_env
2063 TYPE(dbcsr_type), INTENT(INOUT) :: mat_phi_mu_l, mat_z_lp
2064 TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:) :: fm_sigma_x_gamma
2065
2066 CHARACTER(LEN=*), PARAMETER :: routinen = 'compute_Sigma_x'
2067
2068 INTEGER :: handle, ispin
2069 INTEGER, DIMENSION(:), POINTER :: blk_aux, blk_grid, dist_col_grid, &
2070 dist_row_aux
2071 REAL(kind=dp) :: t1
2072 TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:, :) :: fm_vtr_gamma
2073 TYPE(dbcsr_distribution_type) :: dist_aux_aux, dist_grid_grid
2074 TYPE(dbcsr_type) :: mat_sigma_x_gamma, matrix_d_grid, &
2075 matrix_sigma_x_grid, matrix_v_aux, &
2076 matrix_v_grid
2077
2078 CALL timeset(routinen, handle)
2079
2080 t1 = m_walltime()
2081
2082 ALLOCATE (fm_sigma_x_gamma(bs_env%n_spin))
2083 DO ispin = 1, bs_env%n_spin
2084 CALL cp_fm_create(fm_sigma_x_gamma(ispin), bs_env%fm_s_Gamma%matrix_struct)
2085 END DO
2086
2087 CALL dbcsr_create(mat_sigma_x_gamma, template=bs_env%mat_ao_ao%matrix)
2088
2089 ! =========================================================================
2090 ! 1. SETUP CORE TOPOLOGIES
2091 ! =========================================================================
2092 CALL setup_square_topology(mat_phi_mu_l, 'ROW', dist_grid_grid, blk_grid, dist_col_grid)
2093 CALL setup_square_topology(mat_z_lp, 'COL', dist_aux_aux, blk_aux, dist_row_aux)
2094
2095 ! =========================================================================
2096 ! 2. COMPUTE V^tr_ll'
2097 ! =========================================================================
2098 CALL ri_2c_integral_mat(qs_env, fm_vtr_gamma, bs_env%fm_RI_RI, bs_env%n_RI, &
2099 bs_env%trunc_coulomb, do_kpoints=.false.)
2100
2101 ! M^-1 * V^tr * M^-1 directly modifies fm_Vtr_Gamma(:, 1)
2102 CALL multiply_fm_w_mic_time_with_minv_gamma(bs_env, qs_env, fm_vtr_gamma(:, 1))
2103
2104 CALL dbcsr_create(matrix_v_aux, "V_aux", dist_aux_aux, dbcsr_type_no_symmetry, blk_aux, blk_aux)
2105 CALL dbcsr_create(matrix_v_grid, "V_grid", dist_grid_grid, dbcsr_type_no_symmetry, blk_grid, blk_grid)
2106
2107 CALL copy_fm_to_dbcsr(fm_vtr_gamma(1, 1), matrix_v_aux, keep_sparsity=.false.)
2108
2109 ! V^tr_ll' = sum_PQ Z_lP V^trunc_PQ Z_l'Q
2110 CALL contract_a_b_a("N", "T", mat_z_lp, matrix_v_aux, matrix_v_grid, bs_env%eps_filter)
2111 CALL dbcsr_release(matrix_v_aux)
2112
2113 ! =========================================================================
2114 ! 3. SPIN LOOP FOR EXACT EXCHANGE
2115 ! =========================================================================
2116 DO ispin = 1, bs_env%n_spin
2117
2118 ! Density matrix on grid is essentially G_occ at tau = 0.0
2119 ! D_µν = sum_n^occ C_µn C_νn
2120 ! D_ll' = sum_µν Φ_µ(r_l) D_µν Φ_ν(r_l')
2121 CALL dbcsr_create(matrix_d_grid, "D_grid", dist_grid_grid, dbcsr_type_no_symmetry, blk_grid, blk_grid)
2122 CALL build_g_grid(bs_env, 0.0_dp, ispin, .true., .false., mat_phi_mu_l, matrix_d_grid, bs_env%eps_filter)
2123
2124 ! Element-wise Hadamard product: Σ^x_grid = D_grid ◦ V_grid
2125 ! Σ^x_ll' = D_ll' * V^tr_ll'
2126 CALL dbcsr_create(matrix_sigma_x_grid, template=matrix_v_grid)
2127 CALL hadamard_product(matrix_d_grid, matrix_v_grid, matrix_sigma_x_grid, 1.0_dp)
2128
2129 CALL dbcsr_release(matrix_d_grid)
2130
2131 ! Transform back to AO basis: Σ^x_ao = -1.0 * phi^T * Σ^x_grid * phi
2132 ! Σ^x_λσ = -sum_ll' Φ_λ(r_l) Σ^x_ll' Φ_σ(r_l')
2133 CALL contract_a_b_a("T", "N", mat_phi_mu_l, matrix_sigma_x_grid, mat_sigma_x_gamma, bs_env%eps_filter)
2134 CALL dbcsr_scale(mat_sigma_x_gamma, -1.0_dp)
2135
2136 CALL dbcsr_release(matrix_sigma_x_grid)
2137
2138 ! Data I/O and Export to CP2K Full Matrices
2139 CALL copy_dbcsr_to_fm(mat_sigma_x_gamma, fm_sigma_x_gamma(ispin))
2140
2141 END DO ! ispin
2142
2143 IF (bs_env%unit_nr > 0) THEN
2144 WRITE (bs_env%unit_nr, '(T2,A,T58,A,F7.1,A)') &
2145 Σ'Computed ^x(k=0),', ' Execution time', m_walltime() - t1, ' s'
2146 WRITE (bs_env%unit_nr, '(A)') ' '
2147 END IF
2148
2149 ! =========================================================================
2150 ! 4. CLEANUP
2151 ! =========================================================================
2152 CALL release_dbcsr_topology_and_matrices(dist=dist_grid_grid, mapped_dist=dist_col_grid, &
2153 m1=mat_sigma_x_gamma, m2=matrix_v_grid)
2154 CALL release_dbcsr_topology_and_matrices(dist=dist_aux_aux, mapped_dist=dist_row_aux)
2155
2156 CALL cp_fm_release(fm_vtr_gamma)
2157
2158 CALL timestop(handle)
2159
2160 END SUBROUTINE compute_sigma_x
2161
2162! **************************************************************************************************
2163!> \brief Computes the correlation part of the GW self-energy
2164!> \param bs_env ...
2165!> \param fm_W_time ...
2166!> \param mat_phi_mu_l ...
2167!> \param mat_Z_lP ...
2168!> \param fm_Sigma_c_Gamma_time ...
2169! **************************************************************************************************
2170
2171 SUBROUTINE compute_sigma_c(bs_env, fm_W_time, mat_phi_mu_l, mat_Z_lP, fm_Sigma_c_Gamma_time)
2172
2173 TYPE(post_scf_bandstructure_type), POINTER :: bs_env
2174 TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:) :: fm_w_time
2175 TYPE(dbcsr_type), INTENT(INOUT) :: mat_phi_mu_l, mat_z_lp
2176 TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:, :, :) :: fm_sigma_c_gamma_time
2177
2178 CHARACTER(LEN=*), PARAMETER :: routinen = 'compute_Sigma_c'
2179
2180 INTEGER :: handle, i_t, ispin
2181 INTEGER, DIMENSION(:), POINTER :: blk_aux, blk_grid, dist_col_grid, &
2182 dist_row_aux
2183 REAL(kind=dp) :: t1, tau
2184 TYPE(dbcsr_distribution_type) :: dist_aux_aux, dist_grid_grid
2185 TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: mat_sigma_neg_tau, mat_sigma_pos_tau
2186 TYPE(dbcsr_type) :: matrix_g_occ_grid, matrix_g_vir_grid, matrix_sigma_neg_grid, &
2187 matrix_sigma_pos_grid, matrix_w_aux, matrix_w_grid
2188
2189 CALL timeset(routinen, handle)
2190
2191 ! =========================================================================
2192 ! 1. SETUP CORE TOPOLOGIES AND PRE-ALLOCATE OUTPUT ARRAYS
2193 ! =========================================================================
2194 CALL setup_square_topology(mat_phi_mu_l, 'ROW', dist_grid_grid, blk_grid, dist_col_grid)
2195 CALL setup_square_topology(mat_z_lp, 'COL', dist_aux_aux, blk_aux, dist_row_aux)
2196
2197 ! Pre-allocate local DBCSR matrices to act as targets for final output
2198 NULLIFY (mat_sigma_neg_tau, mat_sigma_pos_tau)
2199 ALLOCATE (mat_sigma_neg_tau(bs_env%num_time_freq_points, bs_env%n_spin))
2200 ALLOCATE (mat_sigma_pos_tau(bs_env%num_time_freq_points, bs_env%n_spin))
2201
2202 DO i_t = 1, bs_env%num_time_freq_points
2203 DO ispin = 1, bs_env%n_spin
2204 ALLOCATE (mat_sigma_neg_tau(i_t, ispin)%matrix)
2205 ALLOCATE (mat_sigma_pos_tau(i_t, ispin)%matrix)
2206 CALL dbcsr_create(mat_sigma_neg_tau(i_t, ispin)%matrix, template=bs_env%mat_ao_ao%matrix)
2207 CALL dbcsr_create(mat_sigma_pos_tau(i_t, ispin)%matrix, template=bs_env%mat_ao_ao%matrix)
2208 END DO
2209 END DO
2210
2211 ! =========================================================================
2212 ! 2. MAIN IMAGINARY TIME LOOP
2213 ! =========================================================================
2214 DO i_t = 1, bs_env%num_time_freq_points
2215 tau = bs_env%imag_time_points(i_t)
2216
2217 ! -------------------------------------------------------------------
2218 ! Compute W_grid = Z * W_aux * Z^T
2219 ! -------------------------------------------------------------------
2220 CALL dbcsr_create(matrix_w_aux, "W_aux", dist_aux_aux, dbcsr_type_no_symmetry, blk_aux, blk_aux)
2221 CALL dbcsr_create(matrix_w_grid, "W_grid", dist_grid_grid, dbcsr_type_no_symmetry, blk_grid, blk_grid)
2222
2223 CALL copy_fm_to_dbcsr(fm_w_time(i_t), matrix_w_aux, keep_sparsity=.false.)
2224
2225 ! W^MIC_ll'(iτ,k=0) = sum_PQ Z_lP W^MIC_PQ(iτ) Z_l'Q
2226 CALL contract_a_b_a("N", "T", mat_z_lp, matrix_w_aux, matrix_w_grid, bs_env%eps_filter)
2227
2228 CALL dbcsr_release(matrix_w_aux) ! Clean up aux basis immediately
2229
2230 DO ispin = 1, bs_env%n_spin
2231 t1 = m_walltime()
2232
2233 ! -------------------------------------------------------------------
2234 ! A. Transform Green's Functions to the Grid
2235 ! -------------------------------------------------------------------
2236 CALL dbcsr_create(matrix_g_occ_grid, "G_occ_grid", dist_grid_grid, dbcsr_type_no_symmetry, blk_grid, blk_grid)
2237 CALL dbcsr_create(matrix_g_vir_grid, "G_vir_grid", dist_grid_grid, dbcsr_type_no_symmetry, blk_grid, blk_grid)
2238
2239 ! G^occ_µλ(i|τ|) = sum_G^occ_µλn^occ C_µn e^(-|(ϵ_n-ϵ_F)τ|) C_λn
2240 ! G^occ_ll'(i|τ|) = sum_µν Φ_µ(r_l) G^occ_µν Φ_ν(r_l')
2241 CALL build_g_grid(bs_env, tau, ispin, .true., .false., mat_phi_mu_l, matrix_g_occ_grid, bs_env%eps_filter)
2242
2243 ! G^vir_µλ(i|τ|) = sum_n^vir C_µn e^(-|(ϵ_n-ϵ_F)τ|) C_λn
2244 ! G^vir_ll'(i|τ|) = sum_µν Φ_µ(r_l) G^vir_µν Φ_ν(r_l')
2245 CALL build_g_grid(bs_env, tau, ispin, .false., .true., mat_phi_mu_l, matrix_g_vir_grid, bs_env%eps_filter)
2246
2247 ! -------------------------------------------------------------------
2248 ! B. Element-wise Hadamard Products for Sigma_c on Grid
2249 ! Σ_neg_grid = G_occ_grid ◦ W_grid
2250 ! Σ_pos_grid = G_vir_grid ◦ W_grid
2251 ! -------------------------------------------------------------------
2252 CALL dbcsr_create(matrix_sigma_neg_grid, template=matrix_w_grid)
2253 CALL dbcsr_create(matrix_sigma_pos_grid, template=matrix_w_grid)
2254
2255 ! Σ^c_ll'(iτ) = -G^occ_ll'(i|τ|) * W^MIC_ll'(iτ), for τ < 0
2256 CALL hadamard_product(matrix_g_occ_grid, matrix_w_grid, matrix_sigma_neg_grid, 1.0_dp)
2257
2258 ! Σ^c_ll'(iτ) = G^vir_ll'(i|τ|) * W^MIC_ll'(iτ), for τ > 0
2259 CALL hadamard_product(matrix_g_vir_grid, matrix_w_grid, matrix_sigma_pos_grid, 1.0_dp)
2260
2261 ! Instantly purge massive G_grid arrays to save memory
2262 CALL dbcsr_release(matrix_g_occ_grid)
2263 CALL dbcsr_release(matrix_g_vir_grid)
2264
2265 ! -------------------------------------------------------------------
2266 ! C. Transform Sigma back to AO Basis
2267 ! Σ_AO = phi^T * Σ_grid * phi
2268 ! -------------------------------------------------------------------
2269
2270 ! Σ^c_λσ(iτ) = sum_ll' Φ_λ(r_l) Σ^c_ll'(iτ) Φ_σ(r_l'), for τ < 0
2271 CALL contract_a_b_a("T", "N", mat_phi_mu_l, matrix_sigma_neg_grid, &
2272 mat_sigma_neg_tau(i_t, ispin)%matrix, bs_env%eps_filter)
2273 CALL dbcsr_scale(mat_sigma_neg_tau(i_t, ispin)%matrix, -1.0_dp)
2274
2275 ! Σ^c_λσ(iτ) = sum_ll' Φ_λ(r_l) Σ^c_ll'(iτ) Φ_σ(r_l'), for τ > 0
2276 CALL contract_a_b_a("T", "N", mat_phi_mu_l, matrix_sigma_pos_grid, &
2277 mat_sigma_pos_tau(i_t, ispin)%matrix, bs_env%eps_filter)
2278
2279 ! Purge Grid Sigma arrays
2280 CALL dbcsr_release(matrix_sigma_neg_grid)
2281 CALL dbcsr_release(matrix_sigma_pos_grid)
2282
2283 IF (bs_env%unit_nr > 0) THEN
2284 WRITE (bs_env%unit_nr, '(T2,A,I10,A,I3,A,F7.1,A)') &
2285 Στ'Computed ^c(i) for time point ', i_t, ' /', bs_env%num_time_freq_points, &
2286 ', Execution time', m_walltime() - t1, ' s'
2287 END IF
2288
2289 END DO ! ispin
2290
2291 ! Release the W_grid for this time point
2292 CALL dbcsr_release(matrix_w_grid)
2293
2294 END DO ! i_t
2295
2296 IF (bs_env%unit_nr > 0) WRITE (bs_env%unit_nr, '(A)') ' '
2297
2298 ! -------------------------------------------------------------------------
2299 ! 3. FINALIZE AND CLEANUP
2300 ! -------------------------------------------------------------------------
2301 CALL fill_fm_sigma_c_gamma_time(fm_sigma_c_gamma_time, bs_env, &
2302 mat_sigma_pos_tau, mat_sigma_neg_tau)
2303
2304 CALL cp_fm_release(fm_w_time)
2305
2306 CALL dbcsr_deallocate_matrix_set(mat_sigma_neg_tau)
2307 CALL dbcsr_deallocate_matrix_set(mat_sigma_pos_tau)
2308
2309 CALL release_dbcsr_topology_and_matrices(dist=dist_grid_grid, mapped_dist=dist_col_grid)
2310 CALL release_dbcsr_topology_and_matrices(dist=dist_aux_aux, mapped_dist=dist_row_aux)
2311
2312 CALL delete_unnecessary_files(bs_env)
2313 CALL timestop(handle)
2314
2315 END SUBROUTINE compute_sigma_c
2316
2317! **************************************************************************************************
2318!> \brief DBCSR Topology Generation
2319!> \param matrix_template ...
2320!> \param dim_type ...
2321!> \param square_dist ...
2322!> \param blk_sizes ...
2323!> \param mapped_dist ...
2324! **************************************************************************************************
2325
2326 SUBROUTINE setup_square_topology(matrix_template, dim_type, square_dist, blk_sizes, mapped_dist)
2327
2328 TYPE(dbcsr_type), INTENT(IN) :: matrix_template
2329 CHARACTER(LEN=*), INTENT(IN) :: dim_type
2330 TYPE(dbcsr_distribution_type), INTENT(OUT) :: square_dist
2331 INTEGER, DIMENSION(:), INTENT(OUT), POINTER :: blk_sizes, mapped_dist
2332
2333 INTEGER :: i, np
2334 INTEGER, DIMENSION(:), POINTER :: col_blk, col_dist, row_blk, row_dist
2335 TYPE(dbcsr_distribution_type) :: dist_template
2336
2337 CALL dbcsr_get_info(matrix_template, distribution=dist_template, &
2338 row_blk_size=row_blk, col_blk_size=col_blk)
2339 CALL dbcsr_distribution_get(dist_template, row_dist=row_dist, col_dist=col_dist)
2340
2341 IF (trim(dim_type) == 'ROW') THEN
2342 ! Creates ROW x ROW (e.g., Grid x Grid from mat_phi_mu_l)
2343 blk_sizes => row_blk
2344 np = maxval(col_dist) + 1 ! npcol
2345 ALLOCATE (mapped_dist(SIZE(blk_sizes)))
2346 DO i = 1, SIZE(blk_sizes)
2347 mapped_dist(i) = mod(i - 1, np)
2348 END DO
2349 CALL dbcsr_distribution_new(square_dist, template=dist_template, &
2350 row_dist=row_dist, col_dist=mapped_dist)
2351
2352 ELSE IF (trim(dim_type) == 'COL') THEN
2353 ! Creates COL x COL (e.g., Aux x Aux from mat_Z_lP)
2354 blk_sizes => col_blk
2355 np = maxval(row_dist) + 1 ! nprow
2356 ALLOCATE (mapped_dist(SIZE(blk_sizes)))
2357 DO i = 1, SIZE(blk_sizes)
2358 mapped_dist(i) = mod(i - 1, np)
2359 END DO
2360 CALL dbcsr_distribution_new(square_dist, template=dist_template, &
2361 row_dist=mapped_dist, col_dist=col_dist)
2362 END IF
2363
2364 END SUBROUTINE setup_square_topology
2365
2366! **************************************************************************************************
2367!> \brief DBCSR matrices deallocation
2368!> \param dist ...
2369!> \param mapped_dist ...
2370!> \param m1 ...
2371!> \param m2 ...
2372!> \param m3 ...
2373!> \param m4 ...
2374! **************************************************************************************************
2375
2376 SUBROUTINE release_dbcsr_topology_and_matrices(dist, mapped_dist, m1, m2, m3, m4)
2377
2378 TYPE(dbcsr_distribution_type), INTENT(INOUT), &
2379 OPTIONAL :: dist
2380 INTEGER, DIMENSION(:), INTENT(INOUT), OPTIONAL, &
2381 POINTER :: mapped_dist
2382 TYPE(dbcsr_type), INTENT(INOUT), OPTIONAL :: m1, m2, m3, m4
2383
2384 IF (PRESENT(dist)) CALL dbcsr_distribution_release(dist)
2385 IF (PRESENT(mapped_dist)) THEN
2386 IF (ASSOCIATED(mapped_dist)) THEN
2387 DEALLOCATE (mapped_dist)
2388 NULLIFY (mapped_dist)
2389 END IF
2390 END IF
2391 IF (PRESENT(m1)) CALL dbcsr_release(m1)
2392 IF (PRESENT(m2)) CALL dbcsr_release(m2)
2393 IF (PRESENT(m3)) CALL dbcsr_release(m3)
2394 IF (PRESENT(m4)) CALL dbcsr_release(m4)
2395
2396 END SUBROUTINE release_dbcsr_topology_and_matrices
2397
2398END MODULE gw_non_periodic_ri_rs
static GRID_HOST_DEVICE double fac(const int i)
Factorial function, e.g. fac(5) = 5! = 120.
Definition grid_common.h:56
static void dgemm(const char transa, const char transb, const int m, const int n, const int k, const double alpha, const double *a, const int lda, const double *b, const int ldb, const double beta, double *c, const int ldc)
Convenient wrapper to hide Fortran nature of dgemm_, swapping a and b.
Define the atomic kind types and their sub types.
subroutine, public get_atomic_kind_set(atomic_kind_set, atom_of_kind, kind_of, natom_of_kind, maxatom, natom, nshell, fist_potential_present, shell_present, shell_adiabatic, shell_check_distance, damping_present)
Get attributes of an atomic kind set.
Handles all functions related to the CELL.
Definition cell_types.F:15
constants for the different operators of the 2c-integrals
integer, parameter, public operator_coulomb
methods related to the blacs parallel environment
subroutine, public cp_blacs_env_release(blacs_env)
releases the given blacs_env
subroutine, public cp_blacs_env_create(blacs_env, para_env, blacs_grid_layout, blacs_repeatable, row_major, grid_2d)
allocates and initializes a type that represent a blacs context
subroutine, public dbcsr_distribution_release(dist)
...
subroutine, public dbcsr_scale(matrix, alpha_scalar)
...
subroutine, public dbcsr_distribution_new(dist, template, group, pgrid, row_dist, col_dist, reuse_arrays)
...
subroutine, public dbcsr_deallocate_matrix(matrix)
...
logical function, public dbcsr_iterator_blocks_left(iterator)
...
subroutine, public dbcsr_iterator_stop(iterator)
...
subroutine, public dbcsr_copy(matrix_b, matrix_a, name, keep_sparsity, keep_imaginary)
...
subroutine, public dbcsr_get_block_p(matrix, row, col, block, found, row_size, col_size)
...
subroutine, public dbcsr_multiply(transa, transb, alpha, matrix_a, matrix_b, beta, matrix_c, first_row, last_row, first_column, last_column, first_k, last_k, retain_sparsity, filter_eps, flop)
...
subroutine, public dbcsr_get_info(matrix, nblkrows_total, nblkcols_total, nfullrows_total, nfullcols_total, nblkrows_local, nblkcols_local, nfullrows_local, nfullcols_local, my_prow, my_pcol, local_rows, local_cols, proc_row_dist, proc_col_dist, row_blk_size, col_blk_size, row_blk_offset, col_blk_offset, distribution, name, matrix_type, group)
...
subroutine, public dbcsr_iterator_next_block(iterator, row, column, block, block_number_argument_has_been_removed, row_size, col_size, row_offset, col_offset, transposed)
...
subroutine, public dbcsr_binary_write(matrix, filepath)
...
subroutine, public dbcsr_finalize(matrix)
...
subroutine, public dbcsr_iterator_start(iterator, matrix, shared, dynamic, dynamic_byrows)
...
subroutine, public dbcsr_set(matrix, alpha)
...
subroutine, public dbcsr_release(matrix)
...
subroutine, public dbcsr_binary_read(filepath, distribution, matrix_new)
...
subroutine, public dbcsr_put_block(matrix, row, col, block, summation)
...
subroutine, public dbcsr_add(matrix_a, matrix_b, alpha_scalar, beta_scalar)
...
subroutine, public dbcsr_distribution_get(dist, row_dist, col_dist, nrows, ncols, has_threads, group, mynode, numnodes, nprows, npcols, myprow, mypcol, pgrid, subgroups_defined, prow_group, pcol_group)
...
subroutine, public dbcsr_reserve_all_blocks(matrix)
Reserves all blocks.
DBCSR operations in CP2K.
integer, save, public max_elements_per_block
subroutine, public copy_dbcsr_to_fm(matrix, fm)
Copy a DBCSR matrix to a BLACS matrix.
subroutine, public copy_fm_to_dbcsr(fm, matrix, keep_sparsity)
Copy a BLACS matrix to a dbcsr matrix.
Utility routines to open and close files. Tracking of preconnections.
Definition cp_files.F:16
subroutine, public open_file(file_name, file_status, file_form, file_action, file_position, file_pad, unit_number, debug, skip_get_unit_number, file_access)
Opens the requested file using a free unit number.
Definition cp_files.F:311
subroutine, public close_file(unit_number, file_status, keep_preconnection)
Close an open file given by its logical unit number. Optionally, keep the file and unit preconnected.
Definition cp_files.F:122
Basic linear algebra operations for full matrices.
subroutine, public cp_fm_scale_and_add(alpha, matrix_a, beta, matrix_b)
calc A <- alpha*A + beta*B optimized for alpha == 1.0 (just add beta*B) and beta == 0....
subroutine, public cp_fm_uplo_to_full(matrix, work, uplo)
given a triangular matrix according to uplo, computes the corresponding full matrix
various cholesky decomposition related routines
subroutine, public cp_fm_cholesky_solve(matrix, matrixb, n, info_out)
solves A*X = B for X, given the Cholesky decomposition U of the symmetric positive def....
subroutine, public cp_fm_cholesky_invert(matrix, n, info_out)
used to replace the cholesky decomposition by the inverse
subroutine, public cp_fm_cholesky_decompose(matrix, n, info_out)
used to replace a symmetric positive def. matrix M with its cholesky decomposition U: M = U^T * U,...
used for collecting some of the diagonalization schemes available for cp_fm_type. cp_fm_power also mo...
Definition cp_fm_diag.F:17
subroutine, public cp_fm_power(matrix, work, exponent, threshold, n_dependent, verbose, eigvals)
...
represent the structure of a full matrix
subroutine, public cp_fm_struct_create(fmstruct, para_env, context, nrow_global, ncol_global, nrow_block, ncol_block, descriptor, first_p_pos, local_leading_dimension, template_fmstruct, square_blocks, force_block)
allocates and initializes a full matrix structure
subroutine, public cp_fm_struct_release(fmstruct)
releases a full matrix structure
represent a full matrix distributed on many processors
Definition cp_fm_types.F:15
subroutine, public cp_fm_get_info(matrix, name, nrow_global, ncol_global, nrow_block, ncol_block, nrow_local, ncol_local, row_indices, col_indices, local_data, context, nrow_locals, ncol_locals, matrix_struct, para_env)
returns all kind of information about the full matrix
subroutine, public cp_fm_set_submatrix(fm, new_values, start_row, start_col, n_rows, n_cols, alpha, beta, transpose)
sets a submatrix of a full matrix fm(start_row:start_row+n_rows,start_col:start_col+n_cols) = alpha*o...
subroutine, public cp_fm_set_all(matrix, alpha, beta)
set all elements of a matrix to the same value, and optionally the diagonal to a different one
subroutine, public cp_fm_create(matrix, matrix_struct, name, nrow, ncol, set_zero)
creates a new full matrix with the given structure
subroutine, public cp_fm_get_submatrix(fm, target_m, start_row, start_col, n_rows, n_cols, transpose)
gets a submatrix of a full matrix op(target_m)(1:n_rows,1:n_cols) =fm(start_row:start_row+n_rows,...
various routines to log and control the output. The idea is that decisions about where to log should ...
type(cp_logger_type) function, pointer, public cp_get_default_logger()
returns the default logger
routines to handle the output, The idea is to remove the decision of wheter to output and what to out...
integer, parameter, public cp_p_file
integer function, public cp_print_key_should_output(iteration_info, basis_section, print_key_path, used_print_key, first_time)
returns what should be done with the given property if btest(res,cp_p_store) then the property should...
Utility method to build 3-center integrals for small cell GW.
subroutine, public build_3c_integral_block_ctx(int_3c, ctx, ws, atom_j, atom_k, atom_i, cell_j, cell_k, cell_i, j_offset, k_offset, i_offset, screened)
Computes the 3c integral block (mu(atom_j) nu(atom_k) | P(atom_i)) for ONE atom triple,...
subroutine, public gw_3c_ws_create(ws, ctx)
Creates a per-thread 3c workspace: libint object + LIBXSMM contraction buffers.
subroutine, public gw_3c_ws_release(ws)
Releases a per-thread 3c workspace.
subroutine, public gw_3c_ctx_release(ctx)
Releases the shared 3c-integral context.
subroutine, public gw_3c_ctx_create(ctx, qs_env, potential_parameter, basis_j, basis_k, basis_i)
Builds the shared 3c-integral context: screening radii, basis maxima, contracted sphi tables,...
Routines from paper [Graml2024].
subroutine, public compute_fm_chi_gamma_freq(bs_env, fm_chi_gamma_freq, j_w, mat_chi_gamma_tau)
...
subroutine, public delete_unnecessary_files(bs_env)
...
subroutine, public fill_fm_sigma_c_gamma_time(fm_sigma_c_gamma_time, bs_env, mat_sigma_pos_tau, mat_sigma_neg_tau)
...
subroutine, public fm_write(fm, matrix_index, matrix_name, qs_env)
...
subroutine, public compute_qp_energies(bs_env, qs_env, fm_sigma_x_gamma, fm_sigma_c_gamma_time)
...
subroutine, public multiply_fm_w_mic_time_with_minv_gamma(bs_env, qs_env, fm_w_mic_time)
...
subroutine, public g_occ_vir(bs_env, tau, fm_g_gamma, ispin, occ, vir)
...
subroutine, public create_fm_w_mic_time(bs_env, fm_w_mic_time)
...
subroutine, public fourier_transform_w_to_t(bs_env, fm_w_mic_time, fm_w_mic_freq_j, j_w)
...
GW using RI-RS Approximation for molecules.
subroutine, public ri_rs_grid_assembler(qs_env, bs_env, ri_rs_grid_points)
Compute grid points for RI-RS Right now based on Ivan and Xavier implementation JCP 150,...
subroutine, public precompute_ri_rs_radii(qs_env, bs_env)
Compute per-atom AO and RI basis radii from the most diffuse Gaussian primitive in the AO ("ORB") and...
subroutine, public gw_calc_non_periodic_ri_rs(qs_env, bs_env)
GW calculation using RI-RS formalism for molecules.
subroutine, public get_basis_offsets(particle_set, qs_kind_set, first_sgf, total_sgf)
Helper for OMP threads to fill phi_val column values.
subroutine, public solve_d_lp_distributed(phi_local, d_vec, d_lp, n_loc, n_ao, n_rhs, tikhonov, para_env_sub, blacs_env_sub, fm_struct_d, fm_struct_b, fm_d, fm_b, info)
Distributed pdpotrf/pdpotrs solve of D x = b for one atom of the RI-RS Z_lP build....
subroutine, public de_init_bs_env(bs_env)
...
Definition gw_utils.F:241
collects all constants needed in input so that they can be used without circular dependencies
integer, parameter, public rtp_method_bse
objects that represent the structure of input sections and the data contained in an input section
Defines the basic variable types.
Definition kinds.F:23
integer, parameter, public dp
Definition kinds.F:34
integer, parameter, public default_string_length
Definition kinds.F:57
integer, parameter, public default_path_length
Definition kinds.F:58
Routines to compute the Coulomb integral V_(alpha beta)(k) for a k-point k using lattice summation in...
subroutine, public build_2c_coulomb_matrix_kp(matrix_v_kp, kpoints, basis_type, cell, particle_set, qs_kind_set, atomic_kind_set, size_lattice_sum, operator_type, ikp_start, ikp_end)
...
Machine interface based on Fortran 2003 and POSIX.
Definition machine.F:17
real(kind=dp) function, public m_walltime()
returns time from a real-time clock, protected against rolling early/easily
Definition machine.F:141
Interface to the message passing library MPI.
Framework for 2c-integrals for RI.
Definition mp2_ri_2c.F:14
subroutine, public ri_2c_integral_mat(qs_env, fm_matrix_minv_l_kpoints, fm_matrix_l, dimen_ri, ri_metric, do_kpoints, kpoints, put_mat_ks_env, regularization_ri, ikp_ext, do_build_cell_index)
...
Definition mp2_ri_2c.F:564
Provides Cartesian and spherical orbital pointers and indices.
integer, dimension(:), allocatable, public ncoset
integer, dimension(:, :), allocatable, public indco
basic linear algebra operations for full matrixes
Define the data structure for the particle information.
subroutine, public get_qs_env(qs_env, atomic_kind_set, qs_kind_set, cell, super_cell, cell_ref, use_ref_cell, kpoints, dft_control, mos, sab_orb, sab_all, qmmm, qmmm_periodic, mimic, sac_ae, sac_ppl, sac_lri, sap_ppnl, sab_vdw, sab_scp, sap_oce, sab_lrc, sab_se, sab_xtbe, sab_tbe, sab_core, sab_xb, sab_xtb_pp, sab_xtb_nonbond, sab_almo, sab_kp, sab_kp_nosym, sab_cneo, particle_set, energy, force, matrix_h, matrix_h_im, matrix_ks, matrix_ks_im, matrix_vxc, run_rtp, rtp, matrix_h_kp, matrix_h_im_kp, matrix_ks_kp, matrix_ks_im_kp, matrix_vxc_kp, kinetic_kp, matrix_s_kp, matrix_w_kp, matrix_s_ri_aux_kp, matrix_s, matrix_s_ri_aux, matrix_w, matrix_p_mp2, matrix_p_mp2_admm, rho, rho_xc, pw_env, ewald_env, ewald_pw, active_space, mpools, input, para_env, blacs_env, scf_control, rel_control, kinetic, qs_charges, vppl, xcint_weights, rho_core, rho_nlcc, rho_nlcc_g, ks_env, ks_qmmm_env, wf_history, scf_env, local_particles, local_molecules, distribution_2d, dbcsr_dist, molecule_kind_set, molecule_set, subsys, cp_subsys, oce, local_rho_set, rho_atom_set, task_list, task_list_soft, rho0_atom_set, rho0_mpole, rhoz_set, rhoz_cneo_set, ecoul_1c, rho0_s_rs, rho0_s_gs, rhoz_cneo_s_rs, rhoz_cneo_s_gs, do_kpoints, has_unit_metric, requires_mo_derivs, mo_derivs, mo_loc_history, nkind, natom, nelectron_total, nelectron_spin, efield, neighbor_list_id, linres_control, xas_env, virial, cp_ddapc_env, cp_ddapc_ewald, outer_scf_history, outer_scf_ihistory, x_data, et_coupling, dftb_potential, results, se_taper, se_store_int_env, se_nddo_mpole, se_nonbond_env, admm_env, lri_env, lri_density, exstate_env, ec_env, harris_env, dispersion_env, gcp_env, vee, rho_external, external_vxc, mask, mp2_env, bs_env, kg_env, wanniercentres, atprop, ls_scf_env, do_transport, transport_env, v_hartree_rspace, s_mstruct_changed, rho_changed, potential_changed, forces_up_to_date, mscfg_env, almo_scf_env, gradient_history, variable_history, embed_pot, spin_embed_pot, polar_env, mos_last_converged, eeq, rhs, do_rixs, tb_tblite)
Get the QUICKSTEP environment.
Define the quickstep kind type and their sub types.
subroutine, public get_qs_kind(qs_kind, basis_set, basis_type, ncgf, nsgf, all_potential, tnadd_potential, gth_potential, sgp_potential, upf_potential, cneo_potential, se_parameter, dftb_parameter, xtb_parameter, dftb3_param, zatom, zeff, elec_conf, mao, lmax_dftb, alpha_core_charge, ccore_charge, core_charge, core_charge_radius, paw_proj_set, paw_atom, hard_radius, hard0_radius, max_rad_local, covalent_radius, vdw_radius, gpw_type_forced, harmonics, max_iso_not0, max_s_harm, grid_atom, ngrid_ang, ngrid_rad, lmax_rho0, dft_plus_u_atom, l_of_dft_plus_u, n_of_dft_plus_u, u_minus_j, u_of_dft_plus_u, j_of_dft_plus_u, alpha_of_dft_plus_u, beta_of_dft_plus_u, j0_of_dft_plus_u, occupation_of_dft_plus_u, dispersion, bs_occupation, magnetization, no_optimize, addel, laddel, naddel, orbitals, max_scf, eps_scf, smear, u_ramping, u_minus_j_target, eps_u_ramping, init_u_ramping_each_scf, reltmat, ghost, monovalent, floating, name, element_symbol, pao_basis_size, pao_model_file, pao_potentials, pao_descriptors, nelec)
Get attributes of an atomic kind.
Provides all information about an atomic kind.
Type defining parameters related to the simulation cell.
Definition cell_types.F:60
represent a blacs multidimensional parallel environment (for the mpi corrispective see cp_paratypes/m...
keeps the information about the structure of a full matrix
represent a full matrix
type of a logger, at the moment it contains just a print level starting at which level it should be l...
Shared read-only context for repeated 3-center integral block builds: screening parameters,...
Per-thread workspace for 3-center integral block builds: libint object + contraction buffers....
stores all the informations relevant to an mpi environment
Provides all information about a quickstep kind.