(git:374b731)
Loading...
Searching...
No Matches
mp2_direct_method.F
Go to the documentation of this file.
1!--------------------------------------------------------------------------------------------------!
2! CP2K: A general program to perform molecular dynamics simulations !
3! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4! !
5! SPDX-License-Identifier: GPL-2.0-or-later !
6!--------------------------------------------------------------------------------------------------!
7
8! **************************************************************************************************
9!> \brief Routines to calculate MP2 energy
10!> \par History
11!> 06.2011 created [Mauro Del Ben]
12!> \author Mauro Del Ben
13! **************************************************************************************************
15 USE cell_types, ONLY: cell_type
20 p1_energy,&
21 p2_energy,&
24 USE hfx_types, ONLY: hfx_basis_type,&
29 hfx_type,&
32 USE kinds, ONLY: dp,&
33 int_8
35 USE machine, ONLY: m_flush
36 USE mathconstants, ONLY: zero
40 USE mp2_types, ONLY: init_tshpsc_lmax,&
42 mp2_type,&
44 USE orbital_pointers, ONLY: ncoset
47 USE t_sh_p_s_c, ONLY: free_c0
48#include "./base/base_uses.f90"
49
50 IMPLICIT NONE
51 PRIVATE
52
54
55 CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'mp2_direct_method'
56
57!***
58
59CONTAINS
60
61! **************************************************************************************************
62!> \brief ...
63!> \param dimen ...
64!> \param occ_i ...
65!> \param occ_j ...
66!> \param mp2_biel ...
67!> \param mp2_env ...
68!> \param C_i ...
69!> \param Auto_i ...
70!> \param Emp2 ...
71!> \param Emp2_Cou ...
72!> \param Emp2_ex ...
73!> \param qs_env ...
74!> \param para_env ...
75!> \param unit_nr ...
76!> \param C_j ...
77!> \param Auto_j ...
78! **************************************************************************************************
79 SUBROUTINE mp2_direct_energy(dimen, occ_i, occ_j, mp2_biel, mp2_env, C_i, Auto_i, Emp2, Emp2_Cou, Emp2_ex, &
80 qs_env, para_env, unit_nr, C_j, Auto_j)
81 INTEGER :: dimen, occ_i, occ_j
82 TYPE(mp2_biel_type) :: mp2_biel
83 TYPE(mp2_type) :: mp2_env
84 REAL(kind=dp), DIMENSION(dimen, dimen) :: c_i
85 REAL(kind=dp), DIMENSION(dimen) :: auto_i
86 REAL(kind=dp) :: emp2, emp2_cou, emp2_ex
87 TYPE(qs_environment_type), POINTER :: qs_env
88 TYPE(mp_para_env_type), POINTER :: para_env
89 INTEGER :: unit_nr
90 REAL(kind=dp), DIMENSION(dimen, dimen), OPTIONAL :: c_j
91 REAL(kind=dp), DIMENSION(dimen), OPTIONAL :: auto_j
92
93 CHARACTER(LEN=*), PARAMETER :: routinen = 'mp2_direct_energy'
94 REAL(kind=dp), PARAMETER :: zero = 0.d+00
95
96 INTEGER :: batch_number, color_sub, counter, elements_ij_proc, group_counter, handle, i, &
97 i_batch, i_batch_start, i_group_counter, j, j_batch_start, j_group_counter, last_batch, &
98 max_batch_number, max_batch_size, max_set, minimum_memory_needed, my_batch_size, &
99 my_i_batch_size, my_i_occupied_end, my_i_occupied_start, my_j_batch_size, &
100 my_j_occupied_end, my_j_occupied_start, natom, ni_occupied, nj_occupied, number_groups, &
101 number_i_subset, number_j_subset, one, sqrt_number_groups, total_i_size_batch_group, &
102 total_j_size_batch_group, virt_i, virt_j
103 INTEGER, ALLOCATABLE, DIMENSION(:) :: batch_sizes, batch_sizes_tmp, &
104 vector_batch_i_size_group, &
105 vector_batch_j_size_group
106 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: ij_list_proc, ij_list_proc_temp, &
107 ij_matrix
108 LOGICAL :: alpha_beta_case
109 TYPE(mp_para_env_type), POINTER :: para_env_sub
110
111 CALL timeset(routinen, handle)
112
113 alpha_beta_case = .false.
114 IF (PRESENT(c_j) .AND. PRESENT(auto_j)) alpha_beta_case = .true.
115
116 IF (unit_nr > 0 .AND. mp2_env%potential_parameter%potential_type == do_potential_tshpsc) THEN
117 IF (unit_nr > 0) WRITE (unit_nr, '(T3,A,T64,F12.6,A5)') 'Truncated MP2 method, Rt=', &
118 mp2_env%potential_parameter%cutoff_radius, ' Bohr'
119 END IF
120
121 ! create the local para env
122 ! each para_env_sub corresponds to a group that is going to compute
123 ! all the integrals. To each group a batch I is assigned and the
124 ! communication takes place only inside the group
125 number_groups = para_env%num_pe/mp2_env%mp2_num_proc
126 IF (number_groups*mp2_env%mp2_num_proc /= para_env%num_pe) THEN
127 cpabort(" The number of processors needs to be a multiple of the processors per group. ")
128 END IF
129 IF (number_groups > occ_i*occ_j) THEN
130 IF (unit_nr > 0) WRITE (unit_nr, '(T3,A)') 'Number of groups greater then the number of IJ pairs!'
131 IF (unit_nr > 0) WRITE (unit_nr, '(T3,A)') 'Consider using more processors per group for improved efficiency'
132 END IF
133
134 color_sub = para_env%mepos/mp2_env%mp2_num_proc
135 ALLOCATE (para_env_sub)
136 CALL para_env_sub%from_split(para_env, color_sub)
137
138 ! calculate the maximal size of the batch, according to the maximum RS size
139 max_set = SIZE(mp2_biel%index_table, 2)
140 minimum_memory_needed = (8*(max_set**4))/1024**2
141 IF (minimum_memory_needed > mp2_env%mp2_memory) THEN
142 IF (unit_nr > 0) WRITE (unit_nr, '(T3,A,T66,F12.6,A3)') 'Memory required below the minimum, new memory:', &
143 minimum_memory_needed, 'MiB'
144 mp2_env%mp2_memory = minimum_memory_needed
145 END IF
146
147 ! Distribute the batches over the groups in
148 ! a rectangular fashion, bigger size for J index
149 ! the sizes of the I batches should be as small as possible
150 sqrt_number_groups = int(sqrt(real(number_groups, kind=dp)))
151 DO i = 1, number_groups
152 IF (mod(number_groups, i) == 0) THEN
153 IF (sqrt_number_groups/i <= 1) THEN
154 number_j_subset = i
155 EXIT
156 END IF
157 END IF
158 END DO
159 number_i_subset = number_groups/number_j_subset
160
161 IF (number_i_subset < number_j_subset) THEN
162 number_i_subset = number_j_subset
163 number_j_subset = number_groups/number_i_subset
164 END IF
165
166 ! Distribute the I index and the J index over groups
167 total_i_size_batch_group = occ_i/number_i_subset
168 IF (total_i_size_batch_group < 1) total_i_size_batch_group = 1
169 ALLOCATE (vector_batch_i_size_group(0:number_i_subset - 1))
170
171 vector_batch_i_size_group = 0
172 DO i = 0, number_i_subset - 1
173 vector_batch_i_size_group(i) = total_i_size_batch_group
174 END DO
175 IF (sum(vector_batch_i_size_group) /= occ_i) THEN
176 one = 1
177 IF (sum(vector_batch_i_size_group) > occ_i) one = -1
178 i = -1
179 DO
180 i = i + 1
181 vector_batch_i_size_group(i) = vector_batch_i_size_group(i) + one
182 IF (sum(vector_batch_i_size_group) == occ_i) EXIT
183 IF (i == number_i_subset - 1) i = -1
184 END DO
185 END IF
186
187 total_j_size_batch_group = occ_j/number_j_subset
188 IF (total_j_size_batch_group < 1) total_j_size_batch_group = 1
189 ALLOCATE (vector_batch_j_size_group(0:number_j_subset - 1))
190
191 vector_batch_j_size_group = 0
192 DO i = 0, number_j_subset - 1
193 vector_batch_j_size_group(i) = total_j_size_batch_group
194 END DO
195 IF (sum(vector_batch_j_size_group) /= occ_j) THEN
196 one = 1
197 IF (sum(vector_batch_j_size_group) > occ_j) one = -1
198 i = -1
199 DO
200 i = i + 1
201 vector_batch_j_size_group(i) = vector_batch_j_size_group(i) + one
202 IF (sum(vector_batch_j_size_group) == occ_j) EXIT
203 IF (i == number_j_subset - 1) i = -1
204 END DO
205 END IF
206
207 ! now the starting and ending I and J occupied orbitals are assigned to each group
208 group_counter = 0
209 i_group_counter = 0
210 my_i_occupied_start = 1
211 DO i = 0, number_i_subset - 1
212 my_j_occupied_start = 1
213 j_group_counter = 0
214 DO j = 0, number_j_subset - 1
215 group_counter = group_counter + 1
216 IF (color_sub == group_counter - 1) EXIT
217 my_j_occupied_start = my_j_occupied_start + vector_batch_j_size_group(j)
218 j_group_counter = j_group_counter + 1
219 END DO
220 IF (color_sub == group_counter - 1) EXIT
221 my_i_occupied_start = my_i_occupied_start + vector_batch_i_size_group(i)
222 i_group_counter = i_group_counter + 1
223 END DO
224 my_i_occupied_end = my_i_occupied_start + vector_batch_i_size_group(i_group_counter) - 1
225 my_i_batch_size = vector_batch_i_size_group(i_group_counter)
226 my_j_occupied_end = my_j_occupied_start + vector_batch_j_size_group(j_group_counter) - 1
227 my_j_batch_size = vector_batch_j_size_group(j_group_counter)
228
229 DEALLOCATE (vector_batch_i_size_group)
230 DEALLOCATE (vector_batch_j_size_group)
231
232 max_batch_size = min( &
233 max(1, &
234 int(mp2_env%mp2_memory*int(1024, kind=int_8)**2/ &
235 (8*(2*dimen - occ_i)*int(dimen, kind=int_8)*my_j_batch_size/para_env_sub%num_pe))) &
236 , my_i_batch_size)
237 IF (max_batch_size < 1) THEN
238 max_batch_size = int((8*(occ_i + 1)*int(dimen, kind=int_8)**2/para_env%num_pe)/1024**2)
239 IF (unit_nr > 0) WRITE (unit_nr, '(T3,A,T72,I6,A3)') 'More memory required, at least:', max_batch_size, 'MiB'
240 max_batch_size = 1
241 END IF
242
243 ! create the size of the batches inside the group
244 my_batch_size = my_i_batch_size
245 ALLOCATE (batch_sizes(my_batch_size))
246
247 batch_sizes = -huge(0)
248 batch_number = 0
249 DO i = 1, my_batch_size
250 IF (i*max_batch_size > my_batch_size) EXIT
251 batch_number = batch_number + 1
252 batch_sizes(i) = max_batch_size
253 END DO
254 last_batch = my_batch_size - max_batch_size*batch_number
255 IF (last_batch > 0) THEN
256 batch_number = batch_number + 1
257 batch_sizes(batch_number) = last_batch
258 END IF
259
260 ALLOCATE (batch_sizes_tmp(batch_number))
261 batch_sizes_tmp(1:batch_number) = batch_sizes(1:batch_number)
262 DEALLOCATE (batch_sizes)
263 ALLOCATE (batch_sizes(batch_number))
264 batch_sizes(:) = batch_sizes_tmp
265 DEALLOCATE (batch_sizes_tmp)
266
267 max_batch_size = maxval(batch_sizes)
268 CALL para_env%max(max_batch_size)
269 max_batch_number = batch_number
270 CALL para_env%max(max_batch_number)
271 IF (unit_nr > 0) THEN
272 WRITE (unit_nr, '(T3,A,T76,I5)') 'Maximum used batch size: ', max_batch_size
273 WRITE (unit_nr, '(T3,A,T76,I5)') 'Number of integral recomputations: ', max_batch_number
274 CALL m_flush(unit_nr)
275 END IF
276
277 ! Batches sizes exceed the occupied orbitals allocated for group
278 cpassert(sum(batch_sizes) <= my_batch_size)
279
280 virt_i = dimen - occ_i
281 virt_j = dimen - occ_j
282 natom = SIZE(mp2_biel%index_table, 1)
283
284 CALL para_env%sync()
285 emp2 = zero
286 emp2_cou = zero
287 emp2_ex = zero
288 i_batch_start = my_i_occupied_start - 1
289 j_batch_start = my_j_occupied_start - 1
290 nj_occupied = my_j_batch_size
291 DO i_batch = 1, batch_number
292
293 ni_occupied = batch_sizes(i_batch)
294
295 counter = -1
296 ALLOCATE (ij_matrix(ni_occupied, nj_occupied))
297
298 ij_matrix = 0
299 DO i = 1, ni_occupied
300 DO j = 1, nj_occupied
301 counter = counter + 1
302 IF (mod(counter, para_env_sub%num_pe) == para_env_sub%mepos) THEN
303 ij_matrix(i, j) = ij_matrix(i, j) + 1
304 END IF
305 END DO
306 END DO
307
308 ALLOCATE (ij_list_proc_temp(ni_occupied*occ_j, 2))
309
310 elements_ij_proc = 0
311 DO i = 1, ni_occupied
312 DO j = 1, nj_occupied
313 IF (ij_matrix(i, j) == 0) cycle
314 elements_ij_proc = elements_ij_proc + 1
315 ij_list_proc_temp(elements_ij_proc, 1) = i
316 ij_list_proc_temp(elements_ij_proc, 2) = j
317 END DO
318 END DO
319 DEALLOCATE (ij_matrix)
320
321 ALLOCATE (ij_list_proc(elements_ij_proc, 2))
322 DO i = 1, elements_ij_proc
323 ij_list_proc(i, 1) = ij_list_proc_temp(i, 1)
324 ij_list_proc(i, 2) = ij_list_proc_temp(i, 2)
325 END DO
326 DEALLOCATE (ij_list_proc_temp)
327
328 IF (.NOT. alpha_beta_case) THEN
329 CALL mp2_canonical_direct_single_batch(emp2, emp2_cou, emp2_ex, mp2_env, qs_env, para_env_sub, &
330 mp2_biel, dimen, c_i, auto_i, i_batch_start, ni_occupied, occ_i, &
331 elements_ij_proc, ij_list_proc, nj_occupied, j_batch_start)
332 ELSE
333 CALL mp2_canonical_direct_single_batch(emp2, emp2_cou, emp2_ex, mp2_env, qs_env, para_env_sub, &
334 mp2_biel, dimen, c_i, auto_i, i_batch_start, ni_occupied, occ_i, &
335 elements_ij_proc, ij_list_proc, nj_occupied, j_batch_start, &
336 occ_j, c_j, auto_j)
337 END IF
338
339 i_batch_start = i_batch_start + ni_occupied
340
341 DEALLOCATE (ij_list_proc)
342
343 END DO
344
345 CALL para_env%sum(emp2_cou)
346 CALL para_env%sum(emp2_ex)
347 CALL para_env%sum(emp2)
348
349 CALL mp_para_env_release(para_env_sub)
350
351 CALL timestop(handle)
352
353 END SUBROUTINE mp2_direct_energy
354
355! **************************************************************************************************
356!> \brief ...
357!> \param Emp2 ...
358!> \param Emp2_Cou ...
359!> \param Emp2_ex ...
360!> \param mp2_env ...
361!> \param qs_env ...
362!> \param para_env ...
363!> \param mp2_biel ...
364!> \param dimen ...
365!> \param C ...
366!> \param Auto ...
367!> \param i_batch_start ...
368!> \param Ni_occupied ...
369!> \param occupied ...
370!> \param elements_ij_proc ...
371!> \param ij_list_proc ...
372!> \param Nj_occupied ...
373!> \param j_batch_start ...
374!> \param occupied_beta ...
375!> \param C_beta ...
376!> \param Auto_beta ...
377!> \param Integ_MP2 ...
378!> \par History
379!> 06.2011 created [Mauro Del Ben]
380!> \author Mauro Del Ben
381! **************************************************************************************************
382 SUBROUTINE mp2_canonical_direct_single_batch(Emp2, Emp2_Cou, Emp2_ex, mp2_env, qs_env, para_env, &
383 mp2_biel, dimen, C, Auto, i_batch_start, Ni_occupied, &
384 occupied, elements_ij_proc, ij_list_proc, Nj_occupied, j_batch_start, &
385 occupied_beta, C_beta, Auto_beta, Integ_MP2)
386
387 REAL(kind=dp), INTENT(INOUT) :: emp2, emp2_cou, emp2_ex
388 TYPE(mp2_type) :: mp2_env
389 TYPE(qs_environment_type), POINTER :: qs_env
390 TYPE(mp_para_env_type), INTENT(IN) :: para_env
391 TYPE(mp2_biel_type), INTENT(IN) :: mp2_biel
392 INTEGER, INTENT(IN) :: dimen
393 REAL(kind=dp), DIMENSION(dimen, dimen), INTENT(IN) :: c
394 REAL(kind=dp), DIMENSION(dimen), INTENT(IN) :: auto
395 INTEGER, INTENT(IN) :: i_batch_start, ni_occupied, occupied, &
396 elements_ij_proc
397 INTEGER, DIMENSION(elements_ij_proc, 2), &
398 INTENT(IN) :: ij_list_proc
399 INTEGER, INTENT(IN) :: nj_occupied, j_batch_start
400 INTEGER, INTENT(IN), OPTIONAL :: occupied_beta
401 REAL(kind=dp), DIMENSION(dimen, dimen), &
402 INTENT(IN), OPTIONAL :: c_beta
403 REAL(kind=dp), DIMENSION(dimen), INTENT(IN), &
404 OPTIONAL :: auto_beta
405 REAL(kind=dp), ALLOCATABLE, &
406 DIMENSION(:, :, :, :), INTENT(OUT), OPTIONAL :: integ_mp2
407
408 CHARACTER(LEN=*), PARAMETER :: routinen = 'mp2_canonical_direct_single_batch'
409
410 INTEGER :: case_index, counter_proc, elements_ij_proc_rec, elements_kl_proc, global_counter, &
411 handle, i, i_list_ij, i_list_kl, i_set_list_ij, i_set_list_ij_start, i_set_list_ij_stop, &
412 i_set_list_kl, i_set_list_kl_start, i_set_list_kl_stop, i_start, iatom, iatom_end, &
413 iatom_start, iib, ij_elem_max, ikind, index_ij_rec, index_ij_send, index_kl, &
414 index_proc_ij, index_proc_shift, iset, jatom, jatom_end, jatom_start, jjb, jkind, jset, &
415 katom, katom_end, katom_start, kkb, kkind, kset, latom, latom_end, latom_start, lkind, &
416 llb, lset, max_num_call_sec_transf, max_pgf, max_set, multiple
417 INTEGER :: my_num_call_sec_transf, natom, ncob, nints, nseta, nsetb, nsgf_max, nspins, &
418 primitive_counter, proc_num, proc_receive, proc_send, r_offset_rec, rsize_rec, &
419 s_offset_rec, same_size_kl_index, sphi_a_u1, sphi_a_u2, sphi_a_u3, sphi_b_u1, sphi_b_u2, &
420 sphi_b_u3, sphi_c_u1, sphi_c_u2, sphi_c_u3, sphi_d_u1, sphi_d_u2, sphi_d_u3, ssize_rec, &
421 step_size, total_num_rs_task
422 INTEGER(int_8) :: estimate_to_store_int, neris_tmp, &
423 neris_total, nprim_ints
424 INTEGER, ALLOCATABLE, DIMENSION(:) :: kind_of, nimages, proc_num_task, &
425 same_size_kl_elements_counter
426 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: kl_list_proc, task_counter_rs, &
427 task_counter_rs_temp
428 INTEGER, DIMENSION(4) :: rs_counter_temp
429 INTEGER, DIMENSION(5) :: size_parameter_rec, size_parameter_send
430 INTEGER, DIMENSION(:), POINTER :: la_max, la_min, lb_max, lb_min, lc_max, &
431 lc_min, ld_max, ld_min, npgfa, npgfb, &
432 npgfc, npgfd, nsgfa, nsgfb, nsgfc, &
433 nsgfd
434 INTEGER, DIMENSION(:, :), POINTER :: nsgfl_a, nsgfl_b, nsgfl_c, nsgfl_d
435 LOGICAL :: alpha_beta_case, case_send_receive, &
436 copy_integrals, do_periodic
437 LOGICAL, DIMENSION(:, :), POINTER :: shm_atomic_pair_list
438 REAL(kind=dp) :: cartesian_estimate, coeffs_kind_max0, cost_tmp, eps_schwarz, ln_10, &
439 log10_eps_schwarz, log10_pmax, max_contraction_val, max_val1, max_val2, max_val2_set, &
440 pmax_atom, pmax_entry, ra(3), rab2, rb(3), rc(3), rcd2, rd(3), screen_kind_ij, &
441 screen_kind_kl
442 REAL(kind=dp), ALLOCATABLE, DIMENSION(:) :: cost_rs, cost_rs_temp, ee_buffer1, &
443 ee_buffer2, ee_primitives_tmp, &
444 ee_work, ee_work2, primitive_integrals
445 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :) :: bib_rs_mat_rec, c_beta_t, max_contraction
446 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :) :: bib, bib_rs_mat_rec_big, zero_mat_big
447 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :, :) :: bi1, mnrs
448 REAL(kind=dp), DIMENSION(:), POINTER :: p_work
449 REAL(kind=dp), DIMENSION(:, :), POINTER :: shm_pmax_block, zeta, zetb, zetc, zetd
450 REAL(kind=dp), DIMENSION(:, :, :), POINTER :: sphi_a_ext_set, sphi_b_ext_set, &
451 sphi_c_ext_set, sphi_d_ext_set
452 REAL(kind=dp), DIMENSION(:, :, :, :), POINTER :: sphi_a_ext, sphi_b_ext, sphi_c_ext, &
453 sphi_d_ext
454 REAL(kind=dp), DIMENSION(dimen, 2) :: zero_mat
455 REAL(kind=dp), DIMENSION(dimen, dimen) :: c_t
456 TYPE(cell_type), POINTER :: cell
457 TYPE(cp_libint_t) :: private_lib
458 TYPE(cp_logger_type), POINTER :: logger
459 TYPE(hfx_basis_type), DIMENSION(:), POINTER :: basis_parameter
460 TYPE(hfx_pgf_list), ALLOCATABLE, DIMENSION(:) :: pgf_list_ij, pgf_list_kl
461 TYPE(hfx_pgf_product_list), ALLOCATABLE, &
462 DIMENSION(:) :: pgf_product_list
463 TYPE(hfx_potential_type) :: mp2_potential_parameter
464 TYPE(hfx_screen_coeff_type), DIMENSION(:, :), &
465 POINTER :: screen_coeffs_kind, tmp_r_1, tmp_r_2, &
466 tmp_screen_pgf1, tmp_screen_pgf2
467 TYPE(hfx_screen_coeff_type), &
468 DIMENSION(:, :, :, :), POINTER :: screen_coeffs_set
469 TYPE(hfx_screen_coeff_type), &
470 DIMENSION(:, :, :, :, :, :), POINTER :: radii_pgf, screen_coeffs_pgf
471 TYPE(hfx_type), POINTER :: actual_x_data
472 TYPE(pair_list_type_mp2) :: list_ij, list_kl
473 TYPE(pair_set_list_type), ALLOCATABLE, &
474 DIMENSION(:) :: set_list_ij, set_list_kl
475 TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
476
477 CALL timeset(routinen, handle)
478
479 ! The Integ_MP2 will contain the (ia|jb) integrals, necessary for example
480 ! for the RI-MP2 basis optimization. In this case the number of ij batches
481 ! has to be equal to 1 (all integrals over molecular orbitals are computed
482 ! in a single step).
483 copy_integrals = .false.
484 IF (PRESENT(integ_mp2)) copy_integrals = .true.
485
486 alpha_beta_case = .false.
487
488 CALL prepare_integral_calc(cell, qs_env, mp2_env, para_env, mp2_potential_parameter, actual_x_data, &
489 do_periodic, basis_parameter, max_set, particle_set, natom, kind_of, &
490 nsgf_max, primitive_integrals, ee_work, ee_work2, ee_buffer1, ee_buffer2, &
491 ee_primitives_tmp, nspins, max_contraction, max_pgf, pgf_list_ij, &
492 pgf_list_kl, pgf_product_list, nimages, eps_schwarz, log10_eps_schwarz, &
493 private_lib, p_work, screen_coeffs_set, screen_coeffs_kind, screen_coeffs_pgf, &
494 radii_pgf)
495
496 ln_10 = log(10.0_dp)
497
498 neris_tmp = 0_int_8
499 neris_total = 0_int_8
500 nprim_ints = 0_int_8
501
502!!!!!!!!!
503 ALLOCATE (list_ij%elements(natom**2))
504 ALLOCATE (list_kl%elements(natom**2))
505!!!!!!!!!
506
507 coeffs_kind_max0 = maxval(screen_coeffs_kind(:, :)%x(2))
508 ALLOCATE (set_list_ij((max_set*natom)**2))
509 ALLOCATE (set_list_kl((max_set*natom)**2))
510
511 !! precalculate maximum density matrix elements in blocks
512 actual_x_data%pmax_block = 0.0_dp
513 shm_pmax_block => actual_x_data%pmax_block
514
515 shm_atomic_pair_list => actual_x_data%atomic_pair_list
516
517 iatom_start = 1
518 iatom_end = natom
519 jatom_start = 1
520 jatom_end = natom
521 katom_start = 1
522 katom_end = natom
523 latom_start = 1
524 latom_end = natom
525
526 CALL build_pair_list_mp2(natom, list_ij, set_list_ij, iatom_start, iatom_end, &
527 jatom_start, jatom_end, &
528 kind_of, basis_parameter, particle_set, &
529 do_periodic, screen_coeffs_set, screen_coeffs_kind, &
530 coeffs_kind_max0, log10_eps_schwarz, cell, 0.d+00, &
531 shm_atomic_pair_list)
532
533 CALL build_pair_list_mp2(natom, list_kl, set_list_kl, katom_start, katom_end, &
534 latom_start, latom_end, &
535 kind_of, basis_parameter, particle_set, &
536 do_periodic, screen_coeffs_set, screen_coeffs_kind, &
537 coeffs_kind_max0, log10_eps_schwarz, cell, 0.d+00, &
538 shm_atomic_pair_list)
539
540 ALLOCATE (bib(dimen, dimen, elements_ij_proc))
541 bib = 0.0d+00
542 c_t = transpose(c)
543
544 IF (PRESENT(occupied_beta) .AND. PRESENT(c_beta) .AND. PRESENT(auto_beta)) THEN
545 ALLOCATE (c_beta_t(dimen, dimen))
546 c_beta_t(:, :) = transpose(c_beta)
547 alpha_beta_case = .true.
548 END IF
549
550 ij_elem_max = elements_ij_proc
551 CALL para_env%max(ij_elem_max)
552
553 ! calculate the minimum multiple of num_pe >= to Ni_occupied*occupied, in such a way
554 ! that the i, j loop is performed exactly the same number of time for each procewssor
555 multiple = 0
556 DO
557 multiple = multiple + para_env%num_pe
558 IF (multiple >= ni_occupied*nj_occupied) EXIT
559 END DO
560
561 ! proc_num_task contains the number of times the second occupied
562 ! orbital transformation is called for each processor, needs for balancing
563 ! the point to point send
564 ALLOCATE (proc_num_task(0:para_env%num_pe - 1))
565
566 proc_num_task = 0
567
568 counter_proc = 0
569 DO i_list_ij = 1, list_ij%n_element
570 iatom = list_ij%elements(i_list_ij)%pair(1)
571 jatom = list_ij%elements(i_list_ij)%pair(2)
572 i_set_list_ij_start = list_ij%elements(i_list_ij)%set_bounds(1)
573 i_set_list_ij_stop = list_ij%elements(i_list_ij)%set_bounds(2)
574 ikind = list_ij%elements(i_list_ij)%kind_pair(1)
575 jkind = list_ij%elements(i_list_ij)%kind_pair(2)
576
577 nsgfb => basis_parameter(jkind)%nsgf
578 nsgfa => basis_parameter(ikind)%nsgf
579
580 DO i_set_list_ij = i_set_list_ij_start, i_set_list_ij_stop
581 iset = set_list_ij(i_set_list_ij)%pair(1)
582 jset = set_list_ij(i_set_list_ij)%pair(2)
583 IF (iatom == jatom .AND. jset < iset) cycle
584
585 counter_proc = counter_proc + 1
586 proc_num = mod(counter_proc, para_env%num_pe)
587
588 proc_num_task(proc_num) = proc_num_task(proc_num) + 1
589
590 END DO
591 END DO
592 ! calculate the exact maximum number of calls to the second occupied
593 ! orbital transformation
594 ! max_num_call_sec_transf=MAXVAL(proc_num_task)
595
596 ! distribute the RS pair over all processor
597 ALLOCATE (kl_list_proc(proc_num_task(para_env%mepos), 3))
598
599 kl_list_proc = 0
600
601 counter_proc = 0
602 elements_kl_proc = 0
603 DO i_list_ij = 1, list_ij%n_element
604 iatom = list_ij%elements(i_list_ij)%pair(1)
605 jatom = list_ij%elements(i_list_ij)%pair(2)
606 i_set_list_ij_start = list_ij%elements(i_list_ij)%set_bounds(1)
607 i_set_list_ij_stop = list_ij%elements(i_list_ij)%set_bounds(2)
608 ikind = list_ij%elements(i_list_ij)%kind_pair(1)
609 jkind = list_ij%elements(i_list_ij)%kind_pair(2)
610
611 nsgfb => basis_parameter(jkind)%nsgf
612 nsgfa => basis_parameter(ikind)%nsgf
613
614 DO i_set_list_ij = i_set_list_ij_start, i_set_list_ij_stop
615 iset = set_list_ij(i_set_list_ij)%pair(1)
616 jset = set_list_ij(i_set_list_ij)%pair(2)
617 IF (iatom == jatom .AND. jset < iset) cycle
618
619 counter_proc = counter_proc + 1
620 proc_num = mod(counter_proc, para_env%num_pe)
621
622 IF (proc_num == para_env%mepos) THEN
623 elements_kl_proc = elements_kl_proc + 1
624 kl_list_proc(elements_kl_proc, 1) = i_list_ij
625 kl_list_proc(elements_kl_proc, 2) = i_set_list_ij
626 kl_list_proc(elements_kl_proc, 3) = counter_proc
627 END IF
628
629 END DO
630 END DO
631
632 total_num_rs_task = sum(proc_num_task)
633 ALLOCATE (task_counter_rs(total_num_rs_task, 4))
634
635 ALLOCATE (cost_rs(total_num_rs_task))
636
637 task_counter_rs = 0
638 cost_rs = 0.0_dp
639
640 DO case_index = 1, 2
641
642 my_num_call_sec_transf = 0
643 DO index_kl = 1, elements_kl_proc
644
645 i_list_ij = kl_list_proc(index_kl, 1)
646 i_set_list_ij = kl_list_proc(index_kl, 2)
647
648 iatom = list_ij%elements(i_list_ij)%pair(1)
649 jatom = list_ij%elements(i_list_ij)%pair(2)
650 i_set_list_ij_start = list_ij%elements(i_list_ij)%set_bounds(1)
651 i_set_list_ij_stop = list_ij%elements(i_list_ij)%set_bounds(2)
652 ikind = list_ij%elements(i_list_ij)%kind_pair(1)
653 jkind = list_ij%elements(i_list_ij)%kind_pair(2)
654 ra = list_ij%elements(i_list_ij)%r1
655 rb = list_ij%elements(i_list_ij)%r2
656 rab2 = list_ij%elements(i_list_ij)%dist2
657
658 la_max => basis_parameter(ikind)%lmax
659 la_min => basis_parameter(ikind)%lmin
660 npgfa => basis_parameter(ikind)%npgf
661 nseta = basis_parameter(ikind)%nset
662 zeta => basis_parameter(ikind)%zet
663 nsgfa => basis_parameter(ikind)%nsgf
664 sphi_a_ext => basis_parameter(ikind)%sphi_ext(:, :, :, :)
665 nsgfl_a => basis_parameter(ikind)%nsgfl
666 sphi_a_u1 = ubound(sphi_a_ext, 1)
667 sphi_a_u2 = ubound(sphi_a_ext, 2)
668 sphi_a_u3 = ubound(sphi_a_ext, 3)
669
670 lb_max => basis_parameter(jkind)%lmax
671 lb_min => basis_parameter(jkind)%lmin
672 npgfb => basis_parameter(jkind)%npgf
673 nsetb = basis_parameter(jkind)%nset
674 zetb => basis_parameter(jkind)%zet
675 nsgfb => basis_parameter(jkind)%nsgf
676 sphi_b_ext => basis_parameter(jkind)%sphi_ext(:, :, :, :)
677 nsgfl_b => basis_parameter(jkind)%nsgfl
678 sphi_b_u1 = ubound(sphi_b_ext, 1)
679 sphi_b_u2 = ubound(sphi_b_ext, 2)
680 sphi_b_u3 = ubound(sphi_b_ext, 3)
681
682 iset = set_list_ij(i_set_list_ij)%pair(1)
683 jset = set_list_ij(i_set_list_ij)%pair(2)
684
685 ncob = npgfb(jset)*ncoset(lb_max(jset))
686 max_val1 = screen_coeffs_set(jset, iset, jkind, ikind)%x(1)*rab2 + &
687 screen_coeffs_set(jset, iset, jkind, ikind)%x(2)
688
689 sphi_a_ext_set => sphi_a_ext(:, :, :, iset)
690 sphi_b_ext_set => sphi_b_ext(:, :, :, jset)
691
692 IF (case_index == 1) THEN
693 global_counter = kl_list_proc(index_kl, 3)
694 task_counter_rs(global_counter, 1) = i_list_ij
695 task_counter_rs(global_counter, 2) = i_set_list_ij
696 task_counter_rs(global_counter, 3) = nsgfb(jset)*nsgfa(iset)
697 END IF
698
699 IF (ALLOCATED(bi1)) DEALLOCATE (bi1)
700 ALLOCATE (bi1(dimen, ni_occupied, nsgfb(jset), nsgfa(iset)))
701
702 bi1 = 0.d+00
703
704 DO i_list_kl = 1, list_kl%n_element
705
706 katom = list_kl%elements(i_list_kl)%pair(1)
707 latom = list_kl%elements(i_list_kl)%pair(2)
708
709 i_set_list_kl_start = list_kl%elements(i_list_kl)%set_bounds(1)
710 i_set_list_kl_stop = list_kl%elements(i_list_kl)%set_bounds(2)
711 kkind = list_kl%elements(i_list_kl)%kind_pair(1)
712 lkind = list_kl%elements(i_list_kl)%kind_pair(2)
713 rc = list_kl%elements(i_list_kl)%r1
714 rd = list_kl%elements(i_list_kl)%r2
715 rcd2 = list_kl%elements(i_list_kl)%dist2
716
717 pmax_atom = 0.0_dp
718
719 screen_kind_ij = screen_coeffs_kind(jkind, ikind)%x(1)*rab2 + &
720 screen_coeffs_kind(jkind, ikind)%x(2)
721 screen_kind_kl = screen_coeffs_kind(lkind, kkind)%x(1)*rcd2 + &
722 screen_coeffs_kind(lkind, kkind)%x(2)
723
724 !!!!! Change the loop order
725 IF (max_val1 + screen_kind_kl + pmax_atom < log10_eps_schwarz) cycle
726 !!!!!
727 IF (screen_kind_ij + screen_kind_kl + pmax_atom < log10_eps_schwarz) cycle
728
729 lc_max => basis_parameter(kkind)%lmax
730 lc_min => basis_parameter(kkind)%lmin
731 npgfc => basis_parameter(kkind)%npgf
732 zetc => basis_parameter(kkind)%zet
733 nsgfc => basis_parameter(kkind)%nsgf
734 sphi_c_ext => basis_parameter(kkind)%sphi_ext(:, :, :, :)
735 nsgfl_c => basis_parameter(kkind)%nsgfl
736 sphi_c_u1 = ubound(sphi_c_ext, 1)
737 sphi_c_u2 = ubound(sphi_c_ext, 2)
738 sphi_c_u3 = ubound(sphi_c_ext, 3)
739
740 ld_max => basis_parameter(lkind)%lmax
741 ld_min => basis_parameter(lkind)%lmin
742 npgfd => basis_parameter(lkind)%npgf
743 zetd => basis_parameter(lkind)%zet
744 nsgfd => basis_parameter(lkind)%nsgf
745 sphi_d_ext => basis_parameter(lkind)%sphi_ext(:, :, :, :)
746 nsgfl_d => basis_parameter(lkind)%nsgfl
747 sphi_d_u1 = ubound(sphi_d_ext, 1)
748 sphi_d_u2 = ubound(sphi_d_ext, 2)
749 sphi_d_u3 = ubound(sphi_d_ext, 3)
750
751 DO i_set_list_kl = i_set_list_kl_start, i_set_list_kl_stop
752 kset = set_list_kl(i_set_list_kl)%pair(1)
753 lset = set_list_kl(i_set_list_kl)%pair(2)
754
755 IF (katom == latom .AND. lset < kset) cycle
756
757 max_val2_set = (screen_coeffs_set(lset, kset, lkind, kkind)%x(1)*rcd2 + &
758 screen_coeffs_set(lset, kset, lkind, kkind)%x(2))
759 max_val2 = max_val1 + max_val2_set
760
761 !! Near field screening
762 IF (max_val2 + pmax_atom < log10_eps_schwarz) cycle
763 sphi_c_ext_set => sphi_c_ext(:, :, :, kset)
764 sphi_d_ext_set => sphi_d_ext(:, :, :, lset)
765 !! get max_vals if we screen on initial density
766 pmax_entry = 0.0_dp
767
768 log10_pmax = pmax_entry
769 max_val2 = max_val2 + log10_pmax
770 IF (max_val2 < log10_eps_schwarz) cycle
771 pmax_entry = exp(log10_pmax*ln_10)
772
773 IF (case_index == 2) THEN
774 IF (ALLOCATED(mnrs)) DEALLOCATE (mnrs)
775 ALLOCATE (mnrs(nsgfd(lset), nsgfc(kset), nsgfb(jset), nsgfa(iset)))
776
777 mnrs = 0.d+00
778
779 max_contraction_val = max_contraction(iset, iatom)* &
780 max_contraction(jset, jatom)* &
781 max_contraction(kset, katom)* &
782 max_contraction(lset, latom)*pmax_entry
783 tmp_r_1 => radii_pgf(:, :, jset, iset, jkind, ikind)
784 tmp_r_2 => radii_pgf(:, :, lset, kset, lkind, kkind)
785 tmp_screen_pgf1 => screen_coeffs_pgf(:, :, jset, iset, jkind, ikind)
786 tmp_screen_pgf2 => screen_coeffs_pgf(:, :, lset, kset, lkind, kkind)
787
788 CALL coulomb4(private_lib, ra, rb, rc, rd, npgfa(iset), npgfb(jset), npgfc(kset), npgfd(lset), &
789 la_min(iset), la_max(iset), lb_min(jset), lb_max(jset), &
790 lc_min(kset), lc_max(kset), ld_min(lset), ld_max(lset), &
791 nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
792 sphi_a_u1, sphi_a_u2, sphi_a_u3, &
793 sphi_b_u1, sphi_b_u2, sphi_b_u3, &
794 sphi_c_u1, sphi_c_u2, sphi_c_u3, &
795 sphi_d_u1, sphi_d_u2, sphi_d_u3, &
796 zeta(1:npgfa(iset), iset), zetb(1:npgfb(jset), jset), &
797 zetc(1:npgfc(kset), kset), zetd(1:npgfd(lset), lset), &
798 primitive_integrals, &
799 mp2_potential_parameter, &
800 actual_x_data%neighbor_cells, screen_coeffs_set(jset, iset, jkind, ikind)%x, &
801 screen_coeffs_set(lset, kset, lkind, kkind)%x, eps_schwarz, &
802 max_contraction_val, cartesian_estimate, cell, neris_tmp, &
803 log10_pmax, log10_eps_schwarz, &
804 tmp_r_1, tmp_r_2, tmp_screen_pgf1, tmp_screen_pgf2, &
805 pgf_list_ij, pgf_list_kl, pgf_product_list, &
806 nsgfl_a(:, iset), nsgfl_b(:, jset), &
807 nsgfl_c(:, kset), nsgfl_d(:, lset), &
808 sphi_a_ext_set, &
809 sphi_b_ext_set, &
810 sphi_c_ext_set, &
811 sphi_d_ext_set, &
812 ee_work, ee_work2, ee_buffer1, ee_buffer2, ee_primitives_tmp, &
813 nimages, do_periodic, p_work)
814
815 nints = nsgfa(iset)*nsgfb(jset)*nsgfc(kset)*nsgfd(lset)
816 neris_total = neris_total + nints
817 nprim_ints = nprim_ints + neris_tmp
818 IF (cartesian_estimate == 0.0_dp) cartesian_estimate = tiny(cartesian_estimate)
819 estimate_to_store_int = exponent(cartesian_estimate)
820 estimate_to_store_int = max(estimate_to_store_int, -15_int_8)
821 cartesian_estimate = set_exponent(1.0_dp, estimate_to_store_int + 1)
822
823 IF (cartesian_estimate < eps_schwarz) cycle
824
825 primitive_counter = 0
826 DO llb = 1, nsgfd(lset)
827 DO kkb = 1, nsgfc(kset)
828 DO jjb = 1, nsgfb(jset)
829 DO iib = 1, nsgfa(iset)
830 primitive_counter = primitive_counter + 1
831 mnrs(llb, kkb, jjb, iib) = primitive_integrals(primitive_counter)
832 END DO
833 END DO
834 END DO
835 END DO
836
837 CALL transform_occupied_orbitals_first(dimen, iatom, jatom, katom, latom, &
838 iset, jset, kset, lset, &
839 nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
840 i_batch_start, ni_occupied, &
841 mnrs, c_t, mp2_biel, bi1)
842 ELSE
843 task_counter_rs(global_counter, 4) = task_counter_rs(global_counter, 4) + 1
844
845 cost_tmp = 0.0_dp
846 cost_tmp = cost_model(nsgfd(lset), nsgfc(kset), nsgfb(jset), nsgfa(iset), &
847 npgfd(lset), npgfc(kset), npgfb(jset), npgfa(iset), &
848 max_val2/log10_eps_schwarz, &
850 cost_rs(global_counter) = cost_rs(global_counter) + cost_tmp
851 END IF
852
853 END DO ! i_set_list_kl
854 END DO ! i_list_kl
855
856 IF (case_index == 2) THEN
857 my_num_call_sec_transf = my_num_call_sec_transf + 1
858 IF (.NOT. alpha_beta_case) THEN
859 IF (.NOT. mp2_env%direct_canonical%big_send) THEN
860 CALL transform_occupied_orbitals_second(dimen, iatom, jatom, iset, jset, &
861 nsgfa(iset), nsgfb(jset), ni_occupied, nj_occupied, j_batch_start, &
862 bi1, c_t, mp2_biel, para_env, elements_ij_proc, &
863 multiple, bib)
864 ELSE
865 CALL transform_occupied_orbitals_second_big( &
866 dimen, iatom, jatom, iset, jset, &
867 nsgfa(iset), nsgfb(jset), ni_occupied, nj_occupied, j_batch_start, &
868 ij_elem_max, bi1, c_t, mp2_biel, para_env, elements_ij_proc, bib)
869 END IF
870 ELSE
871 IF (.NOT. mp2_env%direct_canonical%big_send) THEN
872 CALL transform_occupied_orbitals_second(dimen, iatom, jatom, iset, jset, &
873 nsgfa(iset), nsgfb(jset), ni_occupied, nj_occupied, j_batch_start, &
874 bi1, c_beta_t, mp2_biel, para_env, elements_ij_proc, &
875 multiple, bib)
876 ELSE
877 CALL transform_occupied_orbitals_second_big( &
878 dimen, iatom, jatom, iset, jset, &
879 nsgfa(iset), nsgfb(jset), ni_occupied, nj_occupied, j_batch_start, &
880 ij_elem_max, bi1, c_beta_t, mp2_biel, para_env, elements_ij_proc, bib)
881 END IF
882 END IF
883 END IF
884
885 END DO !i_list_ij
886
887 IF (case_index == 1) THEN
888 CALL para_env%sum(task_counter_rs)
889 CALL para_env%sum(cost_rs)
890 ALLOCATE (task_counter_rs_temp(total_num_rs_task, 4))
891
892 ALLOCATE (cost_rs_temp(total_num_rs_task))
893
894 step_size = 1
895 ALLOCATE (same_size_kl_elements_counter((nsgf_max**2 + 1)/step_size + 1))
896
897 same_size_kl_elements_counter = 0
898
899 same_size_kl_index = 0
900 global_counter = 0
901 DO iib = nsgf_max**2 + 1, 0, -step_size
902 DO jjb = 1, total_num_rs_task
903 IF (task_counter_rs(jjb, 3) > iib - step_size .AND. task_counter_rs(jjb, 3) <= iib) THEN
904 global_counter = global_counter + 1
905 task_counter_rs_temp(global_counter, 1:4) = task_counter_rs(jjb, 1:4)
906 cost_rs_temp(global_counter) = cost_rs(jjb)
907 END IF
908 END DO
909 same_size_kl_index = same_size_kl_index + 1
910 same_size_kl_elements_counter(same_size_kl_index) = global_counter
911 END DO
912
913 DEALLOCATE (task_counter_rs)
914 DEALLOCATE (cost_rs)
915
916 i_start = 1
917 DO same_size_kl_index = 1, SIZE(same_size_kl_elements_counter)
918 DO iib = i_start, same_size_kl_elements_counter(same_size_kl_index)
919 DO jjb = iib + 1, same_size_kl_elements_counter(same_size_kl_index)
920
921 IF (cost_rs_temp(jjb) >= cost_rs_temp(iib)) THEN
922 rs_counter_temp = task_counter_rs_temp(iib, 1:4)
923 task_counter_rs_temp(iib, 1:4) = task_counter_rs_temp(jjb, 1:4)
924 task_counter_rs_temp(jjb, 1:4) = rs_counter_temp
925
926 cost_tmp = cost_rs_temp(iib)
927 cost_rs_temp(iib) = cost_rs_temp(jjb)
928 cost_rs_temp(jjb) = cost_tmp
929 END IF
930 END DO
931 END DO
932 i_start = same_size_kl_elements_counter(same_size_kl_index) + 1
933 END DO
934
935 proc_num_task = 0
936 DO counter_proc = 1, total_num_rs_task
937 proc_num = mod(counter_proc, para_env%num_pe)
938 proc_num_task(proc_num) = proc_num_task(proc_num) + 1
939 END DO
940
941 max_num_call_sec_transf = maxval(proc_num_task)
942
943 DEALLOCATE (kl_list_proc)
944 ALLOCATE (kl_list_proc(proc_num_task(para_env%mepos), 2))
945
946 kl_list_proc = 0
947
948 elements_kl_proc = 0
949 DO counter_proc = 1, total_num_rs_task
950 proc_num = mod(counter_proc, para_env%num_pe)
951 IF (proc_num == para_env%mepos) THEN
952 elements_kl_proc = elements_kl_proc + 1
953 kl_list_proc(elements_kl_proc, 1) = task_counter_rs_temp(counter_proc, 1)
954 kl_list_proc(elements_kl_proc, 2) = task_counter_rs_temp(counter_proc, 2)
955 END IF
956 END DO
957
958 DEALLOCATE (task_counter_rs_temp)
959 DEALLOCATE (cost_rs_temp)
960 END IF
961 END DO ! case_index
962
963 size_parameter_send(1) = 1
964 size_parameter_send(2) = 1
965 size_parameter_send(3) = 0
966 size_parameter_send(4) = 0
967 size_parameter_send(5) = elements_ij_proc
968
969 IF (mp2_env%direct_canonical%big_send) THEN
970 ALLOCATE (zero_mat_big(dimen, 2, ij_elem_max))
971
972 END IF
973
974 DO iib = my_num_call_sec_transf + 1, max_num_call_sec_transf
975 DO index_proc_shift = 0, para_env%num_pe - 1
976
977 proc_send = modulo(para_env%mepos + index_proc_shift, para_env%num_pe)
978 proc_receive = modulo(para_env%mepos - index_proc_shift, para_env%num_pe)
979
980 case_send_receive = (proc_send /= para_env%mepos)
981
982 IF (case_send_receive) THEN
983 ! the processor starts to send (and receive?)
984
985 CALL para_env%sendrecv(size_parameter_send, proc_send, size_parameter_rec, proc_receive)
986
987 rsize_rec = size_parameter_rec(1)
988 ssize_rec = size_parameter_rec(2)
989 r_offset_rec = size_parameter_rec(3)
990 s_offset_rec = size_parameter_rec(4)
991 elements_ij_proc_rec = size_parameter_rec(5)
992 IF (.NOT. mp2_env%direct_canonical%big_send) THEN
993 ALLOCATE (bib_rs_mat_rec(dimen, rsize_rec + ssize_rec))
994 ELSE
995 ALLOCATE (bib_rs_mat_rec_big(dimen, rsize_rec + ssize_rec, ij_elem_max))
996 END IF
997 ELSE
998 elements_ij_proc_rec = elements_ij_proc
999 END IF
1000
1001 IF (.NOT. mp2_env%direct_canonical%big_send) THEN
1002 index_ij_send = 0
1003 index_ij_rec = 0
1004 DO index_proc_ij = proc_send + 1, multiple, para_env%num_pe
1005
1006 zero_mat = 0.d+00
1007
1008 IF (case_send_receive) THEN
1009
1010 CALL para_env%sendrecv(zero_mat, proc_send, bib_rs_mat_rec, proc_receive)
1011
1012 index_ij_rec = index_ij_rec + 1
1013 IF (index_ij_rec <= elements_ij_proc .AND. elements_ij_proc > 0) THEN
1014
1015 bib(1:dimen, r_offset_rec + 1:r_offset_rec + rsize_rec, index_ij_rec) = &
1016 bib(1:dimen, r_offset_rec + 1:r_offset_rec + rsize_rec, index_ij_rec) + &
1017 bib_rs_mat_rec(1:dimen, 1:rsize_rec)
1018
1019 bib(1:dimen, s_offset_rec + 1:s_offset_rec + ssize_rec, index_ij_rec) = &
1020 bib(1:dimen, s_offset_rec + 1:s_offset_rec + ssize_rec, index_ij_rec) + &
1021 bib_rs_mat_rec(1:dimen, rsize_rec + 1:rsize_rec + ssize_rec)
1022
1023 END IF
1024 END IF
1025
1026 END DO
1027 ELSE
1028 zero_mat_big = 0.d+00
1029
1030 IF (case_send_receive) THEN
1031
1032 CALL para_env%sendrecv(zero_mat_big, proc_send, bib_rs_mat_rec_big, proc_receive)
1033
1034 bib(1:dimen, r_offset_rec + 1:r_offset_rec + rsize_rec, 1:elements_ij_proc) = &
1035 bib(1:dimen, r_offset_rec + 1:r_offset_rec + rsize_rec, 1:elements_ij_proc) + &
1036 bib_rs_mat_rec_big(1:dimen, 1:rsize_rec, 1:elements_ij_proc)
1037
1038 bib(1:dimen, s_offset_rec + 1:s_offset_rec + ssize_rec, 1:elements_ij_proc) = &
1039 bib(1:dimen, s_offset_rec + 1:s_offset_rec + ssize_rec, 1:elements_ij_proc) + &
1040 bib_rs_mat_rec_big(1:dimen, rsize_rec + 1:rsize_rec + ssize_rec, 1:elements_ij_proc)
1041
1042 END IF
1043 END IF
1044
1045 IF (case_send_receive) THEN
1046 IF (.NOT. mp2_env%direct_canonical%big_send) THEN
1047 DEALLOCATE (bib_rs_mat_rec)
1048 ELSE
1049 DEALLOCATE (bib_rs_mat_rec_big)
1050 END IF
1051 END IF
1052
1053 END DO
1054 END DO
1055
1056 IF (mp2_env%direct_canonical%big_send) THEN
1057 DEALLOCATE (zero_mat_big)
1058 END IF
1059
1060 logger => cp_get_default_logger()
1061
1062 DEALLOCATE (primitive_integrals)
1063
1064 IF (.NOT. alpha_beta_case) THEN
1065 CALL transform_virtual_orbitals_and_accumulate(dimen, occupied, dimen - occupied, i_batch_start, &
1066 j_batch_start, bib, c, auto, elements_ij_proc, ij_list_proc, &
1067 nspins, emp2, emp2_cou, emp2_ex)
1068 ELSE
1069 CALL transform_virtual_orbitals_and_accumulate_abcase( &
1070 dimen, occupied, occupied_beta, dimen - occupied, dimen - occupied_beta, &
1071 i_batch_start, j_batch_start, &
1072 bib, c, c_beta, auto, auto_beta, &
1073 elements_ij_proc, ij_list_proc, emp2, emp2_cou)
1074 DEALLOCATE (c_beta_t)
1075 END IF
1076
1077 IF (copy_integrals) THEN
1078 IF (.NOT. alpha_beta_case) THEN
1079 ALLOCATE (integ_mp2(dimen - occupied, dimen - occupied, occupied, occupied))
1080 integ_mp2 = 0.0_dp
1081 DO i = 1, elements_ij_proc
1082 iib = ij_list_proc(i, 1)
1083 jjb = ij_list_proc(i, 2)
1084 integ_mp2(:, :, iib + i_batch_start, jjb + j_batch_start) = bib(1:dimen - occupied, 1:dimen - occupied, i)
1085 END DO
1086 ELSE
1087 ALLOCATE (integ_mp2(dimen - occupied, dimen - occupied_beta, occupied, occupied_beta))
1088 integ_mp2 = 0.0_dp
1089 DO i = 1, elements_ij_proc
1090 iib = ij_list_proc(i, 1)
1091 jjb = ij_list_proc(i, 2)
1092 integ_mp2(:, :, iib + i_batch_start, jjb + j_batch_start) = bib(1:dimen - occupied, 1:dimen - occupied_beta, i)
1093 END DO
1094 END IF
1095 END IF
1096 DEALLOCATE (bib)
1097
1098 DEALLOCATE (set_list_ij, set_list_kl)
1099
1100 DO i = 1, max_pgf**2
1101 DEALLOCATE (pgf_list_ij(i)%image_list)
1102 DEALLOCATE (pgf_list_kl(i)%image_list)
1103 END DO
1104
1105 DEALLOCATE (pgf_list_ij)
1106 DEALLOCATE (pgf_list_kl)
1107 DEALLOCATE (pgf_product_list)
1108
1109 DEALLOCATE (max_contraction, kind_of)
1110
1111 DEALLOCATE (ee_work, ee_work2, ee_buffer1, ee_buffer2, ee_primitives_tmp)
1112
1113 DEALLOCATE (nimages)
1114
1115 IF (mp2_env%potential_parameter%potential_type == do_potential_tshpsc) THEN
1116 init_tshpsc_lmax = -1
1117 CALL free_c0()
1118 END IF
1119
1120 CALL timestop(handle)
1121
1123
1124! **************************************************************************************************
1125!> \brief ...
1126!> \param dimen ...
1127!> \param latom ...
1128!> \param katom ...
1129!> \param jatom ...
1130!> \param iatom ...
1131!> \param lset ...
1132!> \param kset ...
1133!> \param jset ...
1134!> \param iset ...
1135!> \param Ssize ...
1136!> \param Rsize ...
1137!> \param Nsize ...
1138!> \param Msize ...
1139!> \param i_batch_start ...
1140!> \param Ni_occupied ...
1141!> \param MNRS ...
1142!> \param C_T ...
1143!> \param mp2_biel ...
1144!> \param BI1 ...
1145! **************************************************************************************************
1146 SUBROUTINE transform_occupied_orbitals_first(dimen, latom, katom, jatom, iatom, &
1147 lset, kset, jset, iset, &
1148 Ssize, Rsize, Nsize, Msize, &
1149 i_batch_start, Ni_occupied, &
1150 MNRS, C_T, mp2_biel, BI1)
1151
1152 INTEGER, INTENT(IN) :: dimen, latom, katom, jatom, iatom, lset, &
1153 kset, jset, iset, ssize, rsize, nsize, &
1154 msize, i_batch_start, ni_occupied
1155 REAL(kind=dp), &
1156 DIMENSION(Msize, Nsize, Rsize, Ssize), &
1157 INTENT(IN) :: mnrs
1158 REAL(kind=dp), DIMENSION(dimen, dimen), INTENT(IN) :: c_t
1159 TYPE(mp2_biel_type), INTENT(IN) :: mp2_biel
1160 REAL(kind=dp), &
1161 DIMENSION(dimen, Ni_occupied, Rsize, Ssize), &
1162 INTENT(INOUT) :: bi1
1163
1164 INTEGER :: i, i_global, m, m_global, m_offset, &
1165 m_start, n, n_global, n_offset, r, &
1166 r_offset, r_start, s, s_offset
1167 REAL(kind=dp) :: mnrs_element
1168
1169 n_offset = mp2_biel%index_table(jatom, jset) - 1
1170 m_offset = mp2_biel%index_table(iatom, iset) - 1
1171 s_offset = mp2_biel%index_table(latom, lset) - 1
1172 r_offset = mp2_biel%index_table(katom, kset) - 1
1173
1174 DO s = 1, ssize
1175 r_start = 1
1176 IF (katom == latom .AND. kset == lset) r_start = s
1177 DO r = r_start, rsize
1178
1179 ! fast i don't know why
1180 DO n = 1, nsize
1181 n_global = n + n_offset
1182 m_start = 1
1183 IF (iatom == jatom .AND. iset == jset) THEN
1184 m = n
1185 m_global = m + m_offset
1186 mnrs_element = mnrs(m, n, r, s)
1187 DO i = 1, ni_occupied
1188 i_global = i + i_batch_start
1189 bi1(n_global, i, r, s) = bi1(n_global, i, r, s) + c_t(i_global, m_global)*mnrs_element
1190 END DO
1191 m_start = n + 1
1192 END IF
1193
1194 DO m = m_start, msize
1195 m_global = m + m_offset
1196 mnrs_element = mnrs(m, n, r, s)
1197 DO i = 1, ni_occupied
1198 i_global = i + i_batch_start
1199 bi1(n_global, i, r, s) = bi1(n_global, i, r, s) + c_t(i_global, m_global)*mnrs_element
1200 bi1(m_global, i, r, s) = bi1(m_global, i, r, s) + c_t(i_global, n_global)*mnrs_element
1201 END DO
1202 END DO
1203 END DO
1204
1205 END DO
1206 END DO
1207
1208 END SUBROUTINE transform_occupied_orbitals_first
1209
1210! **************************************************************************************************
1211!> \brief ...
1212!> \param dimen ...
1213!> \param latom ...
1214!> \param katom ...
1215!> \param lset ...
1216!> \param kset ...
1217!> \param Ssize ...
1218!> \param Rsize ...
1219!> \param Ni_occupied ...
1220!> \param Nj_occupied ...
1221!> \param j_batch_start ...
1222!> \param BI1 ...
1223!> \param C_T ...
1224!> \param mp2_biel ...
1225!> \param para_env ...
1226!> \param elements_ij_proc ...
1227!> \param multiple ...
1228!> \param BIb ...
1229! **************************************************************************************************
1230 SUBROUTINE transform_occupied_orbitals_second(dimen, latom, katom, lset, kset, &
1231 Ssize, Rsize, Ni_occupied, Nj_occupied, j_batch_start, &
1232 BI1, C_T, mp2_biel, para_env, &
1233 elements_ij_proc, &
1234 multiple, BIb)
1235
1236 INTEGER, INTENT(IN) :: dimen, latom, katom, lset, kset, ssize, &
1237 rsize, ni_occupied, nj_occupied, &
1238 j_batch_start
1239 REAL(kind=dp), &
1240 DIMENSION(dimen, Ni_occupied, Rsize, Ssize), &
1241 INTENT(IN) :: bi1
1242 REAL(kind=dp), DIMENSION(dimen, dimen), INTENT(IN) :: c_t
1243 TYPE(mp2_biel_type), INTENT(IN) :: mp2_biel
1244 TYPE(mp_para_env_type), INTENT(IN) :: para_env
1245 INTEGER, INTENT(IN) :: elements_ij_proc, multiple
1246 REAL(kind=dp), &
1247 DIMENSION(dimen, dimen, elements_ij_proc), &
1248 INTENT(INOUT) :: bib
1249
1250 CHARACTER(LEN=*), PARAMETER :: routinen = 'transform_occupied_orbitals_second'
1251
1252 INTEGER :: elements_ij_proc_rec, handle, i, index_ij_rec, index_ij_send, index_proc_ij, &
1253 index_proc_shift, j, n, proc_receive, proc_send, r, r_global, r_offset, r_offset_rec, &
1254 r_start, rsize_rec, s, s_global, s_offset, s_offset_rec, ssize_rec
1255 INTEGER, DIMENSION(5) :: size_parameter_rec, size_parameter_send
1256 LOGICAL :: case_send_receive
1257 REAL(kind=dp) :: c_t_r, c_t_s
1258 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :) :: bib_rs_mat_rec
1259 REAL(kind=dp), DIMENSION(dimen, Rsize+Ssize) :: bib_rs_mat
1260
1261 CALL timeset(routinen, handle)
1262
1263 s_offset = mp2_biel%index_table(latom, lset) - 1
1264 r_offset = mp2_biel%index_table(katom, kset) - 1
1265
1266 size_parameter_send(1) = rsize
1267 size_parameter_send(2) = ssize
1268 size_parameter_send(3) = r_offset
1269 size_parameter_send(4) = s_offset
1270 size_parameter_send(5) = elements_ij_proc
1271
1272 DO index_proc_shift = 0, para_env%num_pe - 1
1273
1274 proc_send = modulo(para_env%mepos + index_proc_shift, para_env%num_pe)
1275 proc_receive = modulo(para_env%mepos - index_proc_shift, para_env%num_pe)
1276
1277 case_send_receive = (proc_send /= para_env%mepos)
1278
1279 IF (case_send_receive) THEN
1280 ! the processor starts to send (and receive?)
1281
1282 CALL para_env%sendrecv(size_parameter_send, proc_send, size_parameter_rec, proc_receive)
1283
1284 rsize_rec = size_parameter_rec(1)
1285 ssize_rec = size_parameter_rec(2)
1286 r_offset_rec = size_parameter_rec(3)
1287 s_offset_rec = size_parameter_rec(4)
1288 elements_ij_proc_rec = size_parameter_rec(5)
1289 ALLOCATE (bib_rs_mat_rec(dimen, rsize_rec + ssize_rec))
1290
1291 ELSE
1292 elements_ij_proc_rec = elements_ij_proc
1293 END IF
1294
1295 index_ij_send = 0
1296 index_ij_rec = 0
1297 DO index_proc_ij = proc_send + 1, multiple, para_env%num_pe
1298
1299 bib_rs_mat = zero
1300 IF (index_proc_ij <= ni_occupied*nj_occupied) THEN
1301
1302 index_ij_send = index_ij_send + 1
1303
1304 i = (index_proc_ij - 1)/nj_occupied + 1
1305 j = index_proc_ij - (i - 1)*nj_occupied + j_batch_start
1306
1307 DO s = 1, ssize
1308 s_global = s + s_offset
1309 r_start = 1
1310 IF (katom == latom .AND. kset == lset) r_start = s
1311 DO r = r_start, rsize
1312 r_global = r + r_offset
1313
1314 IF (r_global /= s_global) THEN
1315 c_t_r = c_t(j, r_global)
1316 c_t_s = c_t(j, s_global)
1317 DO n = 1, dimen
1318 bib_rs_mat(n, r) = bib_rs_mat(n, r) + c_t_s*bi1(n, i, r, s)
1319 END DO
1320 DO n = 1, dimen
1321 bib_rs_mat(n, rsize + s) = bib_rs_mat(n, rsize + s) + c_t_r*bi1(n, i, r, s)
1322 END DO
1323 ELSE
1324 c_t_s = c_t(j, s_global)
1325 DO n = 1, dimen
1326 bib_rs_mat(n, r) = bib_rs_mat(n, r) + c_t_s*bi1(n, i, r, s)
1327 END DO
1328 END IF
1329
1330 END DO
1331 END DO
1332
1333 END IF
1334
1335 IF (case_send_receive) THEN
1336
1337 CALL para_env%sendrecv(bib_rs_mat, proc_send, bib_rs_mat_rec, proc_receive)
1338
1339 index_ij_rec = index_ij_rec + 1
1340 IF (index_ij_rec <= elements_ij_proc .AND. elements_ij_proc > 0) THEN
1341
1342 bib(1:dimen, r_offset_rec + 1:r_offset_rec + rsize_rec, index_ij_rec) = &
1343 bib(1:dimen, r_offset_rec + 1:r_offset_rec + rsize_rec, index_ij_rec) + &
1344 bib_rs_mat_rec(1:dimen, 1:rsize_rec)
1345
1346 bib(1:dimen, s_offset_rec + 1:s_offset_rec + ssize_rec, index_ij_rec) = &
1347 bib(1:dimen, s_offset_rec + 1:s_offset_rec + ssize_rec, index_ij_rec) + &
1348 bib_rs_mat_rec(1:dimen, rsize_rec + 1:rsize_rec + ssize_rec)
1349
1350 END IF
1351 ELSE
1352 ! the processor is the sender and receiver itself
1353 IF (index_ij_send <= elements_ij_proc .AND. elements_ij_proc > 0) THEN
1354
1355 bib(1:dimen, r_offset + 1:r_offset + rsize, index_ij_send) = &
1356 bib(1:dimen, r_offset + 1:r_offset + rsize, index_ij_send) + bib_rs_mat(1:dimen, 1:rsize)
1357
1358 bib(1:dimen, s_offset + 1:s_offset + ssize, index_ij_send) = &
1359 bib(1:dimen, s_offset + 1:s_offset + ssize, index_ij_send) + bib_rs_mat(1:dimen, rsize + 1:rsize + ssize)
1360
1361 END IF
1362 END IF
1363
1364 END DO ! loop over the ij of the processor
1365
1366 IF (case_send_receive) THEN
1367 DEALLOCATE (bib_rs_mat_rec)
1368 END IF
1369
1370 END DO ! loop over the processor starting from itself
1371
1372 CALL timestop(handle)
1373
1374 END SUBROUTINE transform_occupied_orbitals_second
1375
1376! **************************************************************************************************
1377!> \brief ...
1378!> \param dimen ...
1379!> \param latom ...
1380!> \param katom ...
1381!> \param lset ...
1382!> \param kset ...
1383!> \param Ssize ...
1384!> \param Rsize ...
1385!> \param Ni_occupied ...
1386!> \param Nj_occupied ...
1387!> \param j_batch_start ...
1388!> \param ij_elem_max ...
1389!> \param BI1 ...
1390!> \param C_T ...
1391!> \param mp2_biel ...
1392!> \param para_env ...
1393!> \param elements_ij_proc ...
1394!> \param BIb ...
1395! **************************************************************************************************
1396 SUBROUTINE transform_occupied_orbitals_second_big(dimen, latom, katom, lset, kset, &
1397 Ssize, Rsize, Ni_occupied, Nj_occupied, j_batch_start, &
1398 ij_elem_max, BI1, C_T, mp2_biel, para_env, &
1399 elements_ij_proc, BIb)
1400
1401 INTEGER, INTENT(IN) :: dimen, latom, katom, lset, kset, ssize, &
1402 rsize, ni_occupied, nj_occupied, &
1403 j_batch_start, ij_elem_max
1404 REAL(kind=dp), &
1405 DIMENSION(dimen, Ni_occupied, Rsize, Ssize), &
1406 INTENT(IN) :: bi1
1407 REAL(kind=dp), DIMENSION(dimen, dimen), INTENT(IN) :: c_t
1408 TYPE(mp2_biel_type), INTENT(IN) :: mp2_biel
1409 TYPE(mp_para_env_type), INTENT(IN) :: para_env
1410 INTEGER, INTENT(IN) :: elements_ij_proc
1411 REAL(kind=dp), &
1412 DIMENSION(dimen, dimen, elements_ij_proc), &
1413 INTENT(INOUT) :: bib
1414
1415 CHARACTER(LEN=*), PARAMETER :: routinen = 'transform_occupied_orbitals_second_big'
1416
1417 INTEGER :: elements_ij_proc_rec, handle, i, index_ij_rec, index_ij_send, index_proc_ij, &
1418 index_proc_shift, j, n, proc_receive, proc_send, r, r_global, r_offset, r_offset_rec, &
1419 r_start, rsize_rec, s, s_global, s_offset, s_offset_rec, ssize_rec
1420 INTEGER, DIMENSION(5) :: size_parameter_rec, size_parameter_send
1421 LOGICAL :: case_send_receive
1422 REAL(kind=dp) :: c_t_r, c_t_s
1423 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :) :: bib_rs_mat_rec
1424 REAL(kind=dp), &
1425 DIMENSION(dimen, Rsize+Ssize, ij_elem_max) :: bib_rs_mat
1426
1427 CALL timeset(routinen, handle)
1428
1429 s_offset = mp2_biel%index_table(latom, lset) - 1
1430 r_offset = mp2_biel%index_table(katom, kset) - 1
1431
1432 size_parameter_send(1) = rsize
1433 size_parameter_send(2) = ssize
1434 size_parameter_send(3) = r_offset
1435 size_parameter_send(4) = s_offset
1436 size_parameter_send(5) = elements_ij_proc
1437
1438 DO index_proc_shift = 0, para_env%num_pe - 1
1439
1440 proc_send = modulo(para_env%mepos + index_proc_shift, para_env%num_pe)
1441 proc_receive = modulo(para_env%mepos - index_proc_shift, para_env%num_pe)
1442
1443 case_send_receive = (proc_send /= para_env%mepos)
1444
1445 IF (case_send_receive) THEN
1446 ! the processor starts to send (and receive?)
1447
1448 CALL para_env%sendrecv(size_parameter_send, proc_send, size_parameter_rec, proc_receive)
1449
1450 rsize_rec = size_parameter_rec(1)
1451 ssize_rec = size_parameter_rec(2)
1452 r_offset_rec = size_parameter_rec(3)
1453 s_offset_rec = size_parameter_rec(4)
1454 elements_ij_proc_rec = size_parameter_rec(5)
1455 ALLOCATE (bib_rs_mat_rec(dimen, rsize_rec + ssize_rec, ij_elem_max))
1456 ELSE
1457 elements_ij_proc_rec = elements_ij_proc
1458 END IF
1459
1460 index_ij_send = 0
1461 index_ij_rec = 0
1462 bib_rs_mat = zero
1463
1464 DO index_proc_ij = proc_send + 1, ni_occupied*nj_occupied, para_env%num_pe
1465
1466 index_ij_send = index_ij_send + 1
1467
1468 i = (index_proc_ij - 1)/nj_occupied + 1
1469 j = index_proc_ij - (i - 1)*nj_occupied + j_batch_start
1470
1471 DO s = 1, ssize
1472 s_global = s + s_offset
1473 r_start = 1
1474 IF (katom == latom .AND. kset == lset) r_start = s
1475 DO r = r_start, rsize
1476 r_global = r + r_offset
1477
1478 IF (r_global /= s_global) THEN
1479 c_t_r = c_t(j, r_global)
1480 c_t_s = c_t(j, s_global)
1481 DO n = 1, dimen
1482 bib_rs_mat(n, r, index_ij_send) = bib_rs_mat(n, r, index_ij_send) + c_t_s*bi1(n, i, r, s)
1483 END DO
1484 DO n = 1, dimen
1485 bib_rs_mat(n, rsize + s, index_ij_send) = bib_rs_mat(n, rsize + s, index_ij_send) + c_t_r*bi1(n, i, r, s)
1486 END DO
1487 ELSE
1488 c_t_s = c_t(j, s_global)
1489 DO n = 1, dimen
1490 bib_rs_mat(n, r, index_ij_send) = bib_rs_mat(n, r, index_ij_send) + c_t_s*bi1(n, i, r, s)
1491 END DO
1492 END IF
1493
1494 END DO
1495 END DO
1496
1497 END DO
1498
1499 IF (case_send_receive) THEN
1500
1501 CALL para_env%sendrecv(bib_rs_mat, proc_send, bib_rs_mat_rec, proc_receive)
1502
1503 bib(1:dimen, r_offset_rec + 1:r_offset_rec + rsize_rec, 1:elements_ij_proc) = &
1504 bib(1:dimen, r_offset_rec + 1:r_offset_rec + rsize_rec, 1:elements_ij_proc) + &
1505 bib_rs_mat_rec(1:dimen, 1:rsize_rec, 1:elements_ij_proc)
1506
1507 bib(1:dimen, s_offset_rec + 1:s_offset_rec + ssize_rec, 1:elements_ij_proc) = &
1508 bib(1:dimen, s_offset_rec + 1:s_offset_rec + ssize_rec, 1:elements_ij_proc) + &
1509 bib_rs_mat_rec(1:dimen, rsize_rec + 1:rsize_rec + ssize_rec, 1:elements_ij_proc)
1510
1511 DEALLOCATE (bib_rs_mat_rec)
1512 ELSE
1513 ! the processor is the sender and receiver itself
1514 bib(1:dimen, r_offset + 1:r_offset + rsize, 1:elements_ij_proc) = &
1515 bib(1:dimen, r_offset + 1:r_offset + rsize, 1:elements_ij_proc) + &
1516 bib_rs_mat(1:dimen, 1:rsize, 1:elements_ij_proc)
1517
1518 bib(1:dimen, s_offset + 1:s_offset + ssize, 1:elements_ij_proc) = &
1519 bib(1:dimen, s_offset + 1:s_offset + ssize, 1:elements_ij_proc) + &
1520 bib_rs_mat(1:dimen, rsize + 1:rsize + ssize, 1:elements_ij_proc)
1521
1522 END IF
1523
1524 END DO ! loop over the processor starting from itself
1525
1526 CALL timestop(handle)
1527
1528 END SUBROUTINE transform_occupied_orbitals_second_big
1529
1530! **************************************************************************************************
1531!> \brief ...
1532!> \param dimen ...
1533!> \param occupied ...
1534!> \param virtual ...
1535!> \param i_batch_start ...
1536!> \param j_batch_start ...
1537!> \param BIb ...
1538!> \param C ...
1539!> \param Auto ...
1540!> \param elements_ij_proc ...
1541!> \param ij_list_proc ...
1542!> \param nspins ...
1543!> \param Emp2 ...
1544!> \param Emp2_Cou ...
1545!> \param Emp2_ex ...
1546! **************************************************************************************************
1547 SUBROUTINE transform_virtual_orbitals_and_accumulate(dimen, occupied, virtual, i_batch_start, &
1548 j_batch_start, BIb, C, Auto, elements_ij_proc, &
1549 ij_list_proc, nspins, Emp2, Emp2_Cou, Emp2_ex)
1550
1551 INTEGER, INTENT(IN) :: dimen, occupied, virtual, i_batch_start, &
1552 j_batch_start
1553 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :), &
1554 INTENT(INOUT) :: bib
1555 REAL(kind=dp), DIMENSION(dimen, dimen), INTENT(IN) :: c
1556 REAL(kind=dp), DIMENSION(dimen), INTENT(IN) :: auto
1557 INTEGER, INTENT(IN) :: elements_ij_proc
1558 INTEGER, DIMENSION(elements_ij_proc, 2), &
1559 INTENT(IN) :: ij_list_proc
1560 INTEGER, INTENT(IN) :: nspins
1561 REAL(kind=dp), INTENT(INOUT) :: emp2, emp2_cou, emp2_ex
1562
1563 CHARACTER(LEN=*), PARAMETER :: routinen = 'transform_virtual_orbitals_and_accumulate'
1564 REAL(kind=dp), PARAMETER :: zero = 0.0_dp
1565
1566 INTEGER :: a, a_global, b, b_global, handle, i, &
1567 i_global, index_ij, j, j_global
1568 REAL(kind=dp) :: iajb, ibja, parz, two
1569 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :) :: bia
1570
1571 CALL timeset(routinen, handle)
1572
1573 ALLOCATE (bia(dimen, virtual))
1574
1575 bia = zero
1576 DO index_ij = 1, elements_ij_proc
1577
1578 CALL dgemm('T', 'N', dimen, virtual, dimen, 1.0_dp, bib(1, 1, index_ij), &
1579 dimen, c(1, occupied + 1), dimen, 0.0_dp, bia(1, 1), dimen)
1580 bib(1:dimen, 1:virtual, index_ij) = bia(1:dimen, 1:virtual)
1581
1582 END DO
1583
1584 DEALLOCATE (bia)
1585 ALLOCATE (bia(virtual, virtual))
1586
1587 bia = zero
1588 DO index_ij = 1, elements_ij_proc
1589
1590 CALL dgemm('T', 'N', virtual, virtual, dimen, 1.0_dp, bib(1, 1, index_ij), dimen, c(1, occupied + 1), dimen, 0.0_dp, &
1591 bia(1, 1), virtual)
1592 bib(1:virtual, 1:virtual, index_ij) = bia(1:virtual, 1:virtual)
1593
1594 END DO
1595
1596 two = 2.0_dp/nspins
1597 DO index_ij = 1, elements_ij_proc
1598 i = ij_list_proc(index_ij, 1)
1599 j = ij_list_proc(index_ij, 2)
1600 i_global = i + i_batch_start
1601 j_global = j + j_batch_start
1602 DO a = 1, virtual
1603 a_global = a + occupied
1604 DO b = 1, virtual
1605 b_global = b + occupied
1606 iajb = bib(a, b, index_ij)
1607 ibja = bib(b, a, index_ij)
1608 parz = iajb/(auto(i_global) + auto(j_global) - auto(a_global) - auto(b_global))
1609 ! parz=parz*(two*iajb-ibja) !Full
1610 ! parz=parz*(iajb) !Coulomb
1611 ! parz=parz*(ibja) !Coulomb
1612 ! Emp2=Emp2+parz/nspins
1613 emp2_cou = emp2_cou + parz*two*(iajb)/nspins
1614 emp2_ex = emp2_ex - parz*(ibja)/nspins
1615 emp2 = emp2 + parz*(two*iajb - ibja)/nspins
1616 END DO
1617 END DO
1618 END DO
1619
1620 DEALLOCATE (bia)
1621
1622 CALL timestop(handle)
1623
1624 END SUBROUTINE transform_virtual_orbitals_and_accumulate
1625
1626! **************************************************************************************************
1627!> \brief ...
1628!> \param dimen ...
1629!> \param occ_i ...
1630!> \param occ_j ...
1631!> \param virt_i ...
1632!> \param virt_j ...
1633!> \param i_batch_start ...
1634!> \param j_batch_start ...
1635!> \param BIb ...
1636!> \param C_i ...
1637!> \param C_j ...
1638!> \param Auto_i ...
1639!> \param Auto_j ...
1640!> \param elements_ij_proc ...
1641!> \param ij_list_proc ...
1642!> \param Emp2 ...
1643!> \param Emp2_Cou ...
1644! **************************************************************************************************
1645 SUBROUTINE transform_virtual_orbitals_and_accumulate_abcase(dimen, occ_i, occ_j, virt_i, virt_j, i_batch_start, &
1646 j_batch_start, BIb, C_i, C_j, Auto_i, Auto_j, elements_ij_proc, &
1647 ij_list_proc, Emp2, Emp2_Cou)
1648
1649 INTEGER, INTENT(IN) :: dimen, occ_i, occ_j, virt_i, virt_j, &
1650 i_batch_start, j_batch_start
1651 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :), &
1652 INTENT(INOUT) :: bib
1653 REAL(kind=dp), DIMENSION(dimen, dimen), INTENT(IN) :: c_i, c_j
1654 REAL(kind=dp), DIMENSION(dimen), INTENT(IN) :: auto_i, auto_j
1655 INTEGER, INTENT(IN) :: elements_ij_proc
1656 INTEGER, DIMENSION(elements_ij_proc, 2), &
1657 INTENT(IN) :: ij_list_proc
1658 REAL(kind=dp), INTENT(INOUT) :: emp2, emp2_cou
1659
1660 CHARACTER(LEN=*), PARAMETER :: routinen = 'transform_virtual_orbitals_and_accumulate_ABcase'
1661 REAL(kind=dp), PARAMETER :: two = 2.d+00, zero = 0.d+00
1662
1663 INTEGER :: a, a_global, b, b_global, handle, i, &
1664 i_global, index_ij, j, j_global, n, s
1665 REAL(kind=dp) :: iajb, parz
1666 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :) :: bia
1667
1668 CALL timeset(routinen, handle)
1669
1670 ALLOCATE (bia(dimen, virt_i))
1671
1672 DO index_ij = 1, elements_ij_proc
1673
1674 DO a = 1, virt_i
1675 a_global = a + occ_i
1676 DO s = 1, dimen
1677 parz = zero
1678 DO n = 1, dimen
1679 parz = parz + c_i(n, a_global)*bib(n, s, index_ij)
1680 END DO
1681 bia(s, a) = parz
1682 END DO
1683 END DO
1684
1685 bib(1:dimen, 1:virt_i, index_ij) = bia
1686
1687 END DO
1688
1689 DEALLOCATE (bia)
1690 ALLOCATE (bia(virt_i, virt_j))
1691
1692 DO index_ij = 1, elements_ij_proc
1693
1694 DO a = 1, virt_i
1695 DO b = 1, virt_j
1696 b_global = b + occ_j
1697 parz = zero
1698 DO s = 1, dimen
1699 parz = parz + c_j(s, b_global)*bib(s, a, index_ij)
1700 END DO
1701 bia(a, b) = parz
1702 END DO
1703 END DO
1704
1705 bib(1:virt_i, 1:virt_j, index_ij) = bia
1706
1707 END DO
1708
1709 DO index_ij = 1, elements_ij_proc
1710 i = ij_list_proc(index_ij, 1)
1711 j = ij_list_proc(index_ij, 2)
1712 i_global = i + i_batch_start
1713 j_global = j + j_batch_start
1714 DO a = 1, virt_i
1715 a_global = a + occ_i
1716 DO b = 1, virt_j
1717 b_global = b + occ_j
1718 iajb = bib(a, b, index_ij)
1719 parz = iajb*iajb/(auto_i(i_global) + auto_j(j_global) - auto_i(a_global) - auto_j(b_global))
1720 emp2_cou = emp2_cou + parz/two
1721 emp2 = emp2 + parz/two
1722 END DO
1723 END DO
1724 END DO
1725
1726 DEALLOCATE (bia)
1727
1728 CALL timestop(handle)
1729
1730 END SUBROUTINE transform_virtual_orbitals_and_accumulate_abcase
1731
1732END MODULE mp2_direct_method
static GRID_HOST_DEVICE int modulo(int a, int m)
Equivalent of Fortran's MODULO, which always return a positive number. https://gcc....
static void dgemm(const char transa, const char transb, const int m, const int n, const int k, const double alpha, const double *a, const int lda, const double *b, const int ldb, const double beta, double *c, const int ldc)
Convenient wrapper to hide Fortran nature of dgemm_, swapping a and b.
Handles all functions related to the CELL.
Definition cell_types.F:15
various routines to log and control the output. The idea is that decisions about where to log should ...
type(cp_logger_type) function, pointer, public cp_get_default_logger()
returns the default logger
Routines to calculate HFX energy and potential.
subroutine, public coulomb4(lib, ra, rb, rc, rd, npgfa, npgfb, npgfc, npgfd, la_min, la_max, lb_min, lb_max, lc_min, lc_max, ld_min, ld_max, nsgfa, nsgfb, nsgfc, nsgfd, sphi_a_u1, sphi_a_u2, sphi_a_u3, sphi_b_u1, sphi_b_u2, sphi_b_u3, sphi_c_u1, sphi_c_u2, sphi_c_u3, sphi_d_u1, sphi_d_u2, sphi_d_u3, zeta, zetb, zetc, zetd, primitive_integrals, potential_parameter, neighbor_cells, screen1, screen2, eps_schwarz, max_contraction_val, cart_estimate, cell, neris_tmp, log10_pmax, log10_eps_schwarz, r1_pgf, r2_pgf, pgf1, pgf2, pgf_list_ij, pgf_list_kl, pgf_product_list, nsgfl_a, nsgfl_b, nsgfl_c, nsgfl_d, sphi_a_ext, sphi_b_ext, sphi_c_ext, sphi_d_ext, ee_work, ee_work2, ee_buffer1, ee_buffer2, ee_primitives_tmp, nimages, do_periodic, p_work)
calculates two-electron integrals of a quartet/shell using the library lib_int in the periodic case
Routines for optimizing load balance between processes in HFX calculations.
real(kind=dp), dimension(12), parameter, public p1_energy
real(kind=dp), dimension(2), parameter, public p3_energy
real(kind=dp), dimension(12), parameter, public p2_energy
integer(kind=int_8) function, public cost_model(nsa, nsb, nsc, nsd, npgfa, npgfb, npgfc, npgfd, ratio, p1, p2, p3)
estimates the cost of a set quartet with info available at load balance time i.e. without much info o...
Routines for optimizing load balance between processes in HFX calculations.
subroutine, public build_pair_list_mp2(natom, list, set_list, i_start, i_end, j_start, j_end, kind_of, basis_parameter, particle_set, do_periodic, coeffs_set, coeffs_kind, coeffs_kind_max0, log10_eps_schwarz, cell, pmax_blocks, atomic_pair_list, skip_atom_symmetry)
...
Types and set/get functions for HFX.
Definition hfx_types.F:15
collects all constants needed in input so that they can be used without circular dependencies
integer, parameter, public do_potential_tshpsc
Defines the basic variable types.
Definition kinds.F:23
integer, parameter, public int_8
Definition kinds.F:54
integer, parameter, public dp
Definition kinds.F:34
Interface to the Libint-Library or a c++ wrapper.
Machine interface based on Fortran 2003 and POSIX.
Definition machine.F:17
subroutine, public m_flush(lunit)
flushes units if the &GLOBAL flag is set accordingly
Definition machine.F:106
Definition of mathematical constants and functions.
real(kind=dp), parameter, public one
real(kind=dp), parameter, public zero
Interface to the message passing library MPI.
subroutine, public mp_para_env_release(para_env)
releases the para object (to be called when you don't want anymore the shared copy of this object)
Routines to calculate MP2 energy.
subroutine, public mp2_direct_energy(dimen, occ_i, occ_j, mp2_biel, mp2_env, c_i, auto_i, emp2, emp2_cou, emp2_ex, qs_env, para_env, unit_nr, c_j, auto_j)
...
subroutine, public mp2_canonical_direct_single_batch(emp2, emp2_cou, emp2_ex, mp2_env, qs_env, para_env, mp2_biel, dimen, c, auto, i_batch_start, ni_occupied, occupied, elements_ij_proc, ij_list_proc, nj_occupied, j_batch_start, occupied_beta, c_beta, auto_beta, integ_mp2)
...
Routines to calculate the 3 and 2 center ERI's needed in the RI approximation using libint.
subroutine, public prepare_integral_calc(cell, qs_env, mp2_env, para_env, mp2_potential_parameter, actual_x_data, do_periodic, basis_parameter, max_set, particle_set, natom, kind_of, nsgf_max, primitive_integrals, ee_work, ee_work2, ee_buffer1, ee_buffer2, ee_primitives_tmp, nspins, max_contraction, max_pgf, pgf_list_ij, pgf_list_kl, pgf_product_list, nimages, eps_schwarz, log10_eps_schwarz, private_lib, p_work, screen_coeffs_set, screen_coeffs_kind, screen_coeffs_pgf, radii_pgf, ri_basis_parameter, ri_basis_info)
...
Types needed for MP2 calculations.
Definition mp2_types.F:14
integer, save, public init_tshpsc_lmax
Definition mp2_types.F:71
Provides Cartesian and spherical orbital pointers and indices.
integer, dimension(:), allocatable, public ncoset
Define the data structure for the particle information.
subroutine, public free_c0()
...
Type defining parameters related to the simulation cell.
Definition cell_types.F:55
type of a logger, at the moment it contains just a print level starting at which level it should be l...
stores some data used in construction of Kohn-Sham matrix
Definition hfx_types.F:509
stores all the informations relevant to an mpi environment