(git:ccc2433)
mp2_direct_method.F
Go to the documentation of this file.
1 !--------------------------------------------------------------------------------------------------!
2 ! CP2K: A general program to perform molecular dynamics simulations !
3 ! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4 ! !
5 ! SPDX-License-Identifier: GPL-2.0-or-later !
6 !--------------------------------------------------------------------------------------------------!
7 
8 ! **************************************************************************************************
9 !> \brief Routines to calculate MP2 energy
10 !> \par History
11 !> 06.2011 created [Mauro Del Ben]
12 !> \author Mauro Del Ben
13 ! **************************************************************************************************
15  USE cell_types, ONLY: cell_type
17  cp_logger_type
20  p1_energy,&
21  p2_energy,&
22  p3_energy
24  USE hfx_types, ONLY: hfx_basis_type,&
25  hfx_pgf_list,&
26  hfx_pgf_product_list,&
27  hfx_potential_type,&
28  hfx_screen_coeff_type,&
29  hfx_type,&
30  pair_set_list_type
32  USE kinds, ONLY: dp,&
33  int_8
34  USE libint_wrapper, ONLY: cp_libint_t
35  USE machine, ONLY: m_flush
36  USE mathconstants, ONLY: zero
38  mp_para_env_type
40  USE mp2_types, ONLY: init_tshpsc_lmax,&
41  mp2_biel_type,&
42  mp2_type,&
43  pair_list_type_mp2
44  USE orbital_pointers, ONLY: ncoset
45  USE particle_types, ONLY: particle_type
46  USE qs_environment_types, ONLY: qs_environment_type
47  USE t_sh_p_s_c, ONLY: free_c0
48 #include "./base/base_uses.f90"
49 
50  IMPLICIT NONE
51  PRIVATE
52 
54 
55  CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'mp2_direct_method'
56 
57 !***
58 
59 CONTAINS
60 
61 ! **************************************************************************************************
62 !> \brief ...
63 !> \param dimen ...
64 !> \param occ_i ...
65 !> \param occ_j ...
66 !> \param mp2_biel ...
67 !> \param mp2_env ...
68 !> \param C_i ...
69 !> \param Auto_i ...
70 !> \param Emp2 ...
71 !> \param Emp2_Cou ...
72 !> \param Emp2_ex ...
73 !> \param qs_env ...
74 !> \param para_env ...
75 !> \param unit_nr ...
76 !> \param C_j ...
77 !> \param Auto_j ...
78 ! **************************************************************************************************
79  SUBROUTINE mp2_direct_energy(dimen, occ_i, occ_j, mp2_biel, mp2_env, C_i, Auto_i, Emp2, Emp2_Cou, Emp2_ex, &
80  qs_env, para_env, unit_nr, C_j, Auto_j)
81  INTEGER :: dimen, occ_i, occ_j
82  TYPE(mp2_biel_type) :: mp2_biel
83  TYPE(mp2_type) :: mp2_env
84  REAL(kind=dp), DIMENSION(dimen, dimen) :: c_i
85  REAL(kind=dp), DIMENSION(dimen) :: auto_i
86  REAL(kind=dp) :: emp2, emp2_cou, emp2_ex
87  TYPE(qs_environment_type), POINTER :: qs_env
88  TYPE(mp_para_env_type), POINTER :: para_env
89  INTEGER :: unit_nr
90  REAL(kind=dp), DIMENSION(dimen, dimen), OPTIONAL :: c_j
91  REAL(kind=dp), DIMENSION(dimen), OPTIONAL :: auto_j
92 
93  CHARACTER(LEN=*), PARAMETER :: routinen = 'mp2_direct_energy'
94  REAL(kind=dp), PARAMETER :: zero = 0.d+00
95 
96  INTEGER :: batch_number, color_sub, counter, elements_ij_proc, group_counter, handle, i, &
97  i_batch, i_batch_start, i_group_counter, j, j_batch_start, j_group_counter, last_batch, &
98  max_batch_number, max_batch_size, max_set, minimum_memory_needed, my_batch_size, &
99  my_i_batch_size, my_i_occupied_end, my_i_occupied_start, my_j_batch_size, &
100  my_j_occupied_end, my_j_occupied_start, natom, ni_occupied, nj_occupied, number_groups, &
101  number_i_subset, number_j_subset, one, sqrt_number_groups, total_i_size_batch_group, &
102  total_j_size_batch_group, virt_i, virt_j
103  INTEGER, ALLOCATABLE, DIMENSION(:) :: batch_sizes, batch_sizes_tmp, &
104  vector_batch_i_size_group, &
105  vector_batch_j_size_group
106  INTEGER, ALLOCATABLE, DIMENSION(:, :) :: ij_list_proc, ij_list_proc_temp, &
107  ij_matrix
108  LOGICAL :: alpha_beta_case
109  TYPE(mp_para_env_type), POINTER :: para_env_sub
110 
111  CALL timeset(routinen, handle)
112 
113  alpha_beta_case = .false.
114  IF (PRESENT(c_j) .AND. PRESENT(auto_j)) alpha_beta_case = .true.
115 
116  IF (unit_nr > 0 .AND. mp2_env%potential_parameter%potential_type == do_potential_tshpsc) THEN
117  IF (unit_nr > 0) WRITE (unit_nr, '(T3,A,T64,F12.6,A5)') 'Truncated MP2 method, Rt=', &
118  mp2_env%potential_parameter%cutoff_radius, ' Bohr'
119  END IF
120 
121  ! create the local para env
122  ! each para_env_sub corresponds to a group that is going to compute
123  ! all the integrals. To each group a batch I is assigned and the
124  ! communication takes place only inside the group
125  number_groups = para_env%num_pe/mp2_env%mp2_num_proc
126  IF (number_groups*mp2_env%mp2_num_proc /= para_env%num_pe) THEN
127  cpabort(" The number of processors needs to be a multiple of the processors per group. ")
128  END IF
129  IF (number_groups > occ_i*occ_j) THEN
130  IF (unit_nr > 0) WRITE (unit_nr, '(T3,A)') 'Number of groups greater then the number of IJ pairs!'
131  IF (unit_nr > 0) WRITE (unit_nr, '(T3,A)') 'Consider using more processors per group for improved efficiency'
132  END IF
133 
134  color_sub = para_env%mepos/mp2_env%mp2_num_proc
135  ALLOCATE (para_env_sub)
136  CALL para_env_sub%from_split(para_env, color_sub)
137 
138  ! calculate the maximal size of the batch, according to the maximum RS size
139  max_set = SIZE(mp2_biel%index_table, 2)
140  minimum_memory_needed = (8*(max_set**4))/1024**2
141  IF (minimum_memory_needed > mp2_env%mp2_memory) THEN
142  IF (unit_nr > 0) WRITE (unit_nr, '(T3,A,T66,F12.6,A3)') 'Memory required below the minimum, new memory:', &
143  minimum_memory_needed, 'MiB'
144  mp2_env%mp2_memory = minimum_memory_needed
145  END IF
146 
147  ! Distribute the batches over the groups in
148  ! a rectangular fashion, bigger size for J index
149  ! the sizes of the I batches should be as small as possible
150  sqrt_number_groups = int(sqrt(real(number_groups, kind=dp)))
151  DO i = 1, number_groups
152  IF (mod(number_groups, i) == 0) THEN
153  IF (sqrt_number_groups/i <= 1) THEN
154  number_j_subset = i
155  EXIT
156  END IF
157  END IF
158  END DO
159  number_i_subset = number_groups/number_j_subset
160 
161  IF (number_i_subset < number_j_subset) THEN
162  number_i_subset = number_j_subset
163  number_j_subset = number_groups/number_i_subset
164  END IF
165 
166  ! Distribute the I index and the J index over groups
167  total_i_size_batch_group = occ_i/number_i_subset
168  IF (total_i_size_batch_group < 1) total_i_size_batch_group = 1
169  ALLOCATE (vector_batch_i_size_group(0:number_i_subset - 1))
170 
171  vector_batch_i_size_group = 0
172  DO i = 0, number_i_subset - 1
173  vector_batch_i_size_group(i) = total_i_size_batch_group
174  END DO
175  IF (sum(vector_batch_i_size_group) /= occ_i) THEN
176  one = 1
177  IF (sum(vector_batch_i_size_group) > occ_i) one = -1
178  i = -1
179  DO
180  i = i + 1
181  vector_batch_i_size_group(i) = vector_batch_i_size_group(i) + one
182  IF (sum(vector_batch_i_size_group) == occ_i) EXIT
183  IF (i == number_i_subset - 1) i = -1
184  END DO
185  END IF
186 
187  total_j_size_batch_group = occ_j/number_j_subset
188  IF (total_j_size_batch_group < 1) total_j_size_batch_group = 1
189  ALLOCATE (vector_batch_j_size_group(0:number_j_subset - 1))
190 
191  vector_batch_j_size_group = 0
192  DO i = 0, number_j_subset - 1
193  vector_batch_j_size_group(i) = total_j_size_batch_group
194  END DO
195  IF (sum(vector_batch_j_size_group) /= occ_j) THEN
196  one = 1
197  IF (sum(vector_batch_j_size_group) > occ_j) one = -1
198  i = -1
199  DO
200  i = i + 1
201  vector_batch_j_size_group(i) = vector_batch_j_size_group(i) + one
202  IF (sum(vector_batch_j_size_group) == occ_j) EXIT
203  IF (i == number_j_subset - 1) i = -1
204  END DO
205  END IF
206 
207  ! now the starting and ending I and J occupied orbitals are assigned to each group
208  group_counter = 0
209  i_group_counter = 0
210  my_i_occupied_start = 1
211  DO i = 0, number_i_subset - 1
212  my_j_occupied_start = 1
213  j_group_counter = 0
214  DO j = 0, number_j_subset - 1
215  group_counter = group_counter + 1
216  IF (color_sub == group_counter - 1) EXIT
217  my_j_occupied_start = my_j_occupied_start + vector_batch_j_size_group(j)
218  j_group_counter = j_group_counter + 1
219  END DO
220  IF (color_sub == group_counter - 1) EXIT
221  my_i_occupied_start = my_i_occupied_start + vector_batch_i_size_group(i)
222  i_group_counter = i_group_counter + 1
223  END DO
224  my_i_occupied_end = my_i_occupied_start + vector_batch_i_size_group(i_group_counter) - 1
225  my_i_batch_size = vector_batch_i_size_group(i_group_counter)
226  my_j_occupied_end = my_j_occupied_start + vector_batch_j_size_group(j_group_counter) - 1
227  my_j_batch_size = vector_batch_j_size_group(j_group_counter)
228 
229  DEALLOCATE (vector_batch_i_size_group)
230  DEALLOCATE (vector_batch_j_size_group)
231 
232  max_batch_size = min( &
233  max(1, &
234  int(mp2_env%mp2_memory*int(1024, kind=int_8)**2/ &
235  (8*(2*dimen - occ_i)*int(dimen, kind=int_8)*my_j_batch_size/para_env_sub%num_pe))) &
236  , my_i_batch_size)
237  IF (max_batch_size < 1) THEN
238  max_batch_size = int((8*(occ_i + 1)*int(dimen, kind=int_8)**2/para_env%num_pe)/1024**2)
239  IF (unit_nr > 0) WRITE (unit_nr, '(T3,A,T72,I6,A3)') 'More memory required, at least:', max_batch_size, 'MiB'
240  max_batch_size = 1
241  END IF
242 
243  ! create the size of the batches inside the group
244  my_batch_size = my_i_batch_size
245  ALLOCATE (batch_sizes(my_batch_size))
246 
247  batch_sizes = -huge(0)
248  batch_number = 0
249  DO i = 1, my_batch_size
250  IF (i*max_batch_size > my_batch_size) EXIT
251  batch_number = batch_number + 1
252  batch_sizes(i) = max_batch_size
253  END DO
254  last_batch = my_batch_size - max_batch_size*batch_number
255  IF (last_batch > 0) THEN
256  batch_number = batch_number + 1
257  batch_sizes(batch_number) = last_batch
258  END IF
259 
260  ALLOCATE (batch_sizes_tmp(batch_number))
261  batch_sizes_tmp(1:batch_number) = batch_sizes(1:batch_number)
262  DEALLOCATE (batch_sizes)
263  ALLOCATE (batch_sizes(batch_number))
264  batch_sizes(:) = batch_sizes_tmp
265  DEALLOCATE (batch_sizes_tmp)
266 
267  max_batch_size = maxval(batch_sizes)
268  CALL para_env%max(max_batch_size)
269  max_batch_number = batch_number
270  CALL para_env%max(max_batch_number)
271  IF (unit_nr > 0) THEN
272  WRITE (unit_nr, '(T3,A,T76,I5)') 'Maximum used batch size: ', max_batch_size
273  WRITE (unit_nr, '(T3,A,T76,I5)') 'Number of integral recomputations: ', max_batch_number
274  CALL m_flush(unit_nr)
275  END IF
276 
277  ! Batches sizes exceed the occupied orbitals allocated for group
278  cpassert(sum(batch_sizes) <= my_batch_size)
279 
280  virt_i = dimen - occ_i
281  virt_j = dimen - occ_j
282  natom = SIZE(mp2_biel%index_table, 1)
283 
284  CALL para_env%sync()
285  emp2 = zero
286  emp2_cou = zero
287  emp2_ex = zero
288  i_batch_start = my_i_occupied_start - 1
289  j_batch_start = my_j_occupied_start - 1
290  nj_occupied = my_j_batch_size
291  DO i_batch = 1, batch_number
292 
293  ni_occupied = batch_sizes(i_batch)
294 
295  counter = -1
296  ALLOCATE (ij_matrix(ni_occupied, nj_occupied))
297 
298  ij_matrix = 0
299  DO i = 1, ni_occupied
300  DO j = 1, nj_occupied
301  counter = counter + 1
302  IF (mod(counter, para_env_sub%num_pe) == para_env_sub%mepos) THEN
303  ij_matrix(i, j) = ij_matrix(i, j) + 1
304  END IF
305  END DO
306  END DO
307 
308  ALLOCATE (ij_list_proc_temp(ni_occupied*occ_j, 2))
309 
310  elements_ij_proc = 0
311  DO i = 1, ni_occupied
312  DO j = 1, nj_occupied
313  IF (ij_matrix(i, j) == 0) cycle
314  elements_ij_proc = elements_ij_proc + 1
315  ij_list_proc_temp(elements_ij_proc, 1) = i
316  ij_list_proc_temp(elements_ij_proc, 2) = j
317  END DO
318  END DO
319  DEALLOCATE (ij_matrix)
320 
321  ALLOCATE (ij_list_proc(elements_ij_proc, 2))
322  DO i = 1, elements_ij_proc
323  ij_list_proc(i, 1) = ij_list_proc_temp(i, 1)
324  ij_list_proc(i, 2) = ij_list_proc_temp(i, 2)
325  END DO
326  DEALLOCATE (ij_list_proc_temp)
327 
328  IF (.NOT. alpha_beta_case) THEN
329  CALL mp2_canonical_direct_single_batch(emp2, emp2_cou, emp2_ex, mp2_env, qs_env, para_env_sub, &
330  mp2_biel, dimen, c_i, auto_i, i_batch_start, ni_occupied, occ_i, &
331  elements_ij_proc, ij_list_proc, nj_occupied, j_batch_start)
332  ELSE
333  CALL mp2_canonical_direct_single_batch(emp2, emp2_cou, emp2_ex, mp2_env, qs_env, para_env_sub, &
334  mp2_biel, dimen, c_i, auto_i, i_batch_start, ni_occupied, occ_i, &
335  elements_ij_proc, ij_list_proc, nj_occupied, j_batch_start, &
336  occ_j, c_j, auto_j)
337  END IF
338 
339  i_batch_start = i_batch_start + ni_occupied
340 
341  DEALLOCATE (ij_list_proc)
342 
343  END DO
344 
345  CALL para_env%sum(emp2_cou)
346  CALL para_env%sum(emp2_ex)
347  CALL para_env%sum(emp2)
348 
349  CALL mp_para_env_release(para_env_sub)
350 
351  CALL timestop(handle)
352 
353  END SUBROUTINE mp2_direct_energy
354 
355 ! **************************************************************************************************
356 !> \brief ...
357 !> \param Emp2 ...
358 !> \param Emp2_Cou ...
359 !> \param Emp2_ex ...
360 !> \param mp2_env ...
361 !> \param qs_env ...
362 !> \param para_env ...
363 !> \param mp2_biel ...
364 !> \param dimen ...
365 !> \param C ...
366 !> \param Auto ...
367 !> \param i_batch_start ...
368 !> \param Ni_occupied ...
369 !> \param occupied ...
370 !> \param elements_ij_proc ...
371 !> \param ij_list_proc ...
372 !> \param Nj_occupied ...
373 !> \param j_batch_start ...
374 !> \param occupied_beta ...
375 !> \param C_beta ...
376 !> \param Auto_beta ...
377 !> \param Integ_MP2 ...
378 !> \par History
379 !> 06.2011 created [Mauro Del Ben]
380 !> \author Mauro Del Ben
381 ! **************************************************************************************************
382  SUBROUTINE mp2_canonical_direct_single_batch(Emp2, Emp2_Cou, Emp2_ex, mp2_env, qs_env, para_env, &
383  mp2_biel, dimen, C, Auto, i_batch_start, Ni_occupied, &
384  occupied, elements_ij_proc, ij_list_proc, Nj_occupied, j_batch_start, &
385  occupied_beta, C_beta, Auto_beta, Integ_MP2)
386 
387  REAL(kind=dp), INTENT(INOUT) :: emp2, emp2_cou, emp2_ex
388  TYPE(mp2_type) :: mp2_env
389  TYPE(qs_environment_type), POINTER :: qs_env
390  TYPE(mp_para_env_type), INTENT(IN) :: para_env
391  TYPE(mp2_biel_type), INTENT(IN) :: mp2_biel
392  INTEGER, INTENT(IN) :: dimen
393  REAL(kind=dp), DIMENSION(dimen, dimen), INTENT(IN) :: c
394  REAL(kind=dp), DIMENSION(dimen), INTENT(IN) :: auto
395  INTEGER, INTENT(IN) :: i_batch_start, ni_occupied, occupied, &
396  elements_ij_proc
397  INTEGER, DIMENSION(elements_ij_proc, 2), &
398  INTENT(IN) :: ij_list_proc
399  INTEGER, INTENT(IN) :: nj_occupied, j_batch_start
400  INTEGER, INTENT(IN), OPTIONAL :: occupied_beta
401  REAL(kind=dp), DIMENSION(dimen, dimen), &
402  INTENT(IN), OPTIONAL :: c_beta
403  REAL(kind=dp), DIMENSION(dimen), INTENT(IN), &
404  OPTIONAL :: auto_beta
405  REAL(kind=dp), ALLOCATABLE, &
406  DIMENSION(:, :, :, :), INTENT(OUT), OPTIONAL :: integ_mp2
407 
408  CHARACTER(LEN=*), PARAMETER :: routinen = 'mp2_canonical_direct_single_batch'
409 
410  INTEGER :: case_index, counter_proc, elements_ij_proc_rec, elements_kl_proc, global_counter, &
411  handle, i, i_list_ij, i_list_kl, i_set_list_ij, i_set_list_ij_start, i_set_list_ij_stop, &
412  i_set_list_kl, i_set_list_kl_start, i_set_list_kl_stop, i_start, iatom, iatom_end, &
413  iatom_start, iib, ij_elem_max, ikind, index_ij_rec, index_ij_send, index_kl, &
414  index_proc_ij, index_proc_shift, iset, jatom, jatom_end, jatom_start, jjb, jkind, jset, &
415  katom, katom_end, katom_start, kkb, kkind, kset, latom, latom_end, latom_start, lkind, &
416  llb, lset, max_num_call_sec_transf, max_pgf, max_set, multiple
417  INTEGER :: my_num_call_sec_transf, natom, ncob, nints, nseta, nsetb, nsgf_max, nspins, &
418  primitive_counter, proc_num, proc_receive, proc_send, r_offset_rec, rsize_rec, &
419  s_offset_rec, same_size_kl_index, sphi_a_u1, sphi_a_u2, sphi_a_u3, sphi_b_u1, sphi_b_u2, &
420  sphi_b_u3, sphi_c_u1, sphi_c_u2, sphi_c_u3, sphi_d_u1, sphi_d_u2, sphi_d_u3, ssize_rec, &
421  step_size, total_num_rs_task
422  INTEGER(int_8) :: estimate_to_store_int, neris_tmp, &
423  neris_total, nprim_ints
424  INTEGER, ALLOCATABLE, DIMENSION(:) :: kind_of, nimages, proc_num_task, &
425  same_size_kl_elements_counter
426  INTEGER, ALLOCATABLE, DIMENSION(:, :) :: kl_list_proc, task_counter_rs, &
427  task_counter_rs_temp
428  INTEGER, DIMENSION(4) :: rs_counter_temp
429  INTEGER, DIMENSION(5) :: size_parameter_rec, size_parameter_send
430  INTEGER, DIMENSION(:), POINTER :: la_max, la_min, lb_max, lb_min, lc_max, &
431  lc_min, ld_max, ld_min, npgfa, npgfb, &
432  npgfc, npgfd, nsgfa, nsgfb, nsgfc, &
433  nsgfd
434  INTEGER, DIMENSION(:, :), POINTER :: nsgfl_a, nsgfl_b, nsgfl_c, nsgfl_d
435  LOGICAL :: alpha_beta_case, case_send_receive, &
436  copy_integrals, do_periodic
437  LOGICAL, DIMENSION(:, :), POINTER :: shm_atomic_pair_list
438  REAL(kind=dp) :: cartesian_estimate, coeffs_kind_max0, cost_tmp, eps_schwarz, ln_10, &
439  log10_eps_schwarz, log10_pmax, max_contraction_val, max_val1, max_val2, max_val2_set, &
440  pmax_atom, pmax_entry, ra(3), rab2, rb(3), rc(3), rcd2, rd(3), screen_kind_ij, &
441  screen_kind_kl
442  REAL(kind=dp), ALLOCATABLE, DIMENSION(:) :: cost_rs, cost_rs_temp, ee_buffer1, &
443  ee_buffer2, ee_primitives_tmp, &
444  ee_work, ee_work2, primitive_integrals
445  REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :) :: bib_rs_mat_rec, c_beta_t, max_contraction
446  REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :) :: bib, bib_rs_mat_rec_big, zero_mat_big
447  REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :, :) :: bi1, mnrs
448  REAL(kind=dp), DIMENSION(:), POINTER :: p_work
449  REAL(kind=dp), DIMENSION(:, :), POINTER :: shm_pmax_block, zeta, zetb, zetc, zetd
450  REAL(kind=dp), DIMENSION(:, :, :), POINTER :: sphi_a_ext_set, sphi_b_ext_set, &
451  sphi_c_ext_set, sphi_d_ext_set
452  REAL(kind=dp), DIMENSION(:, :, :, :), POINTER :: sphi_a_ext, sphi_b_ext, sphi_c_ext, &
453  sphi_d_ext
454  REAL(kind=dp), DIMENSION(dimen, 2) :: zero_mat
455  REAL(kind=dp), DIMENSION(dimen, dimen) :: c_t
456  TYPE(cell_type), POINTER :: cell
457  TYPE(cp_libint_t) :: private_lib
458  TYPE(cp_logger_type), POINTER :: logger
459  TYPE(hfx_basis_type), DIMENSION(:), POINTER :: basis_parameter
460  TYPE(hfx_pgf_list), ALLOCATABLE, DIMENSION(:) :: pgf_list_ij, pgf_list_kl
461  TYPE(hfx_pgf_product_list), ALLOCATABLE, &
462  DIMENSION(:) :: pgf_product_list
463  TYPE(hfx_potential_type) :: mp2_potential_parameter
464  TYPE(hfx_screen_coeff_type), DIMENSION(:, :), &
465  POINTER :: screen_coeffs_kind, tmp_r_1, tmp_r_2, &
466  tmp_screen_pgf1, tmp_screen_pgf2
467  TYPE(hfx_screen_coeff_type), &
468  DIMENSION(:, :, :, :), POINTER :: screen_coeffs_set
469  TYPE(hfx_screen_coeff_type), &
470  DIMENSION(:, :, :, :, :, :), POINTER :: radii_pgf, screen_coeffs_pgf
471  TYPE(hfx_type), POINTER :: actual_x_data
472  TYPE(pair_list_type_mp2) :: list_ij, list_kl
473  TYPE(pair_set_list_type), ALLOCATABLE, &
474  DIMENSION(:) :: set_list_ij, set_list_kl
475  TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
476 
477  CALL timeset(routinen, handle)
478 
479  ! The Integ_MP2 will contain the (ia|jb) integrals, necessary for example
480  ! for the RI-MP2 basis optimization. In this case the number of ij batches
481  ! has to be equal to 1 (all integrals over molecular orbitals are computed
482  ! in a single step).
483  copy_integrals = .false.
484  IF (PRESENT(integ_mp2)) copy_integrals = .true.
485 
486  alpha_beta_case = .false.
487 
488  CALL prepare_integral_calc(cell, qs_env, mp2_env, para_env, mp2_potential_parameter, actual_x_data, &
489  do_periodic, basis_parameter, max_set, particle_set, natom, kind_of, &
490  nsgf_max, primitive_integrals, ee_work, ee_work2, ee_buffer1, ee_buffer2, &
491  ee_primitives_tmp, nspins, max_contraction, max_pgf, pgf_list_ij, &
492  pgf_list_kl, pgf_product_list, nimages, eps_schwarz, log10_eps_schwarz, &
493  private_lib, p_work, screen_coeffs_set, screen_coeffs_kind, screen_coeffs_pgf, &
494  radii_pgf)
495 
496  ln_10 = log(10.0_dp)
497 
498  neris_tmp = 0_int_8
499  neris_total = 0_int_8
500  nprim_ints = 0_int_8
501 
502 !!!!!!!!!
503  ALLOCATE (list_ij%elements(natom**2))
504  ALLOCATE (list_kl%elements(natom**2))
505 !!!!!!!!!
506 
507  coeffs_kind_max0 = maxval(screen_coeffs_kind(:, :)%x(2))
508  ALLOCATE (set_list_ij((max_set*natom)**2))
509  ALLOCATE (set_list_kl((max_set*natom)**2))
510 
511  !! precalculate maximum density matrix elements in blocks
512  actual_x_data%pmax_block = 0.0_dp
513  shm_pmax_block => actual_x_data%pmax_block
514 
515  shm_atomic_pair_list => actual_x_data%atomic_pair_list
516 
517  iatom_start = 1
518  iatom_end = natom
519  jatom_start = 1
520  jatom_end = natom
521  katom_start = 1
522  katom_end = natom
523  latom_start = 1
524  latom_end = natom
525 
526  CALL build_pair_list_mp2(natom, list_ij, set_list_ij, iatom_start, iatom_end, &
527  jatom_start, jatom_end, &
528  kind_of, basis_parameter, particle_set, &
529  do_periodic, screen_coeffs_set, screen_coeffs_kind, &
530  coeffs_kind_max0, log10_eps_schwarz, cell, 0.d+00, &
531  shm_atomic_pair_list)
532 
533  CALL build_pair_list_mp2(natom, list_kl, set_list_kl, katom_start, katom_end, &
534  latom_start, latom_end, &
535  kind_of, basis_parameter, particle_set, &
536  do_periodic, screen_coeffs_set, screen_coeffs_kind, &
537  coeffs_kind_max0, log10_eps_schwarz, cell, 0.d+00, &
538  shm_atomic_pair_list)
539 
540  ALLOCATE (bib(dimen, dimen, elements_ij_proc))
541  bib = 0.0d+00
542  c_t = transpose(c)
543 
544  IF (PRESENT(occupied_beta) .AND. PRESENT(c_beta) .AND. PRESENT(auto_beta)) THEN
545  ALLOCATE (c_beta_t(dimen, dimen))
546  c_beta_t(:, :) = transpose(c_beta)
547  alpha_beta_case = .true.
548  END IF
549 
550  ij_elem_max = elements_ij_proc
551  CALL para_env%max(ij_elem_max)
552 
553  ! calculate the minimum multiple of num_pe >= to Ni_occupied*occupied, in such a way
554  ! that the i, j loop is performed exactly the same number of time for each procewssor
555  multiple = 0
556  DO
557  multiple = multiple + para_env%num_pe
558  IF (multiple >= ni_occupied*nj_occupied) EXIT
559  END DO
560 
561  ! proc_num_task contains the number of times the second occupied
562  ! orbital transformation is called for each processor, needs for balancing
563  ! the point to point send
564  ALLOCATE (proc_num_task(0:para_env%num_pe - 1))
565 
566  proc_num_task = 0
567 
568  counter_proc = 0
569  DO i_list_ij = 1, list_ij%n_element
570  iatom = list_ij%elements(i_list_ij)%pair(1)
571  jatom = list_ij%elements(i_list_ij)%pair(2)
572  i_set_list_ij_start = list_ij%elements(i_list_ij)%set_bounds(1)
573  i_set_list_ij_stop = list_ij%elements(i_list_ij)%set_bounds(2)
574  ikind = list_ij%elements(i_list_ij)%kind_pair(1)
575  jkind = list_ij%elements(i_list_ij)%kind_pair(2)
576 
577  nsgfb => basis_parameter(jkind)%nsgf
578  nsgfa => basis_parameter(ikind)%nsgf
579 
580  DO i_set_list_ij = i_set_list_ij_start, i_set_list_ij_stop
581  iset = set_list_ij(i_set_list_ij)%pair(1)
582  jset = set_list_ij(i_set_list_ij)%pair(2)
583  IF (iatom == jatom .AND. jset < iset) cycle
584 
585  counter_proc = counter_proc + 1
586  proc_num = mod(counter_proc, para_env%num_pe)
587 
588  proc_num_task(proc_num) = proc_num_task(proc_num) + 1
589 
590  END DO
591  END DO
592  ! calculate the exact maximum number of calls to the second occupied
593  ! orbital transformation
594  ! max_num_call_sec_transf=MAXVAL(proc_num_task)
595 
596  ! distribute the RS pair over all processor
597  ALLOCATE (kl_list_proc(proc_num_task(para_env%mepos), 3))
598 
599  kl_list_proc = 0
600 
601  counter_proc = 0
602  elements_kl_proc = 0
603  DO i_list_ij = 1, list_ij%n_element
604  iatom = list_ij%elements(i_list_ij)%pair(1)
605  jatom = list_ij%elements(i_list_ij)%pair(2)
606  i_set_list_ij_start = list_ij%elements(i_list_ij)%set_bounds(1)
607  i_set_list_ij_stop = list_ij%elements(i_list_ij)%set_bounds(2)
608  ikind = list_ij%elements(i_list_ij)%kind_pair(1)
609  jkind = list_ij%elements(i_list_ij)%kind_pair(2)
610 
611  nsgfb => basis_parameter(jkind)%nsgf
612  nsgfa => basis_parameter(ikind)%nsgf
613 
614  DO i_set_list_ij = i_set_list_ij_start, i_set_list_ij_stop
615  iset = set_list_ij(i_set_list_ij)%pair(1)
616  jset = set_list_ij(i_set_list_ij)%pair(2)
617  IF (iatom == jatom .AND. jset < iset) cycle
618 
619  counter_proc = counter_proc + 1
620  proc_num = mod(counter_proc, para_env%num_pe)
621 
622  IF (proc_num == para_env%mepos) THEN
623  elements_kl_proc = elements_kl_proc + 1
624  kl_list_proc(elements_kl_proc, 1) = i_list_ij
625  kl_list_proc(elements_kl_proc, 2) = i_set_list_ij
626  kl_list_proc(elements_kl_proc, 3) = counter_proc
627  END IF
628 
629  END DO
630  END DO
631 
632  total_num_rs_task = sum(proc_num_task)
633  ALLOCATE (task_counter_rs(total_num_rs_task, 4))
634 
635  ALLOCATE (cost_rs(total_num_rs_task))
636 
637  task_counter_rs = 0
638  cost_rs = 0.0_dp
639 
640  DO case_index = 1, 2
641 
642  my_num_call_sec_transf = 0
643  DO index_kl = 1, elements_kl_proc
644 
645  i_list_ij = kl_list_proc(index_kl, 1)
646  i_set_list_ij = kl_list_proc(index_kl, 2)
647 
648  iatom = list_ij%elements(i_list_ij)%pair(1)
649  jatom = list_ij%elements(i_list_ij)%pair(2)
650  i_set_list_ij_start = list_ij%elements(i_list_ij)%set_bounds(1)
651  i_set_list_ij_stop = list_ij%elements(i_list_ij)%set_bounds(2)
652  ikind = list_ij%elements(i_list_ij)%kind_pair(1)
653  jkind = list_ij%elements(i_list_ij)%kind_pair(2)
654  ra = list_ij%elements(i_list_ij)%r1
655  rb = list_ij%elements(i_list_ij)%r2
656  rab2 = list_ij%elements(i_list_ij)%dist2
657 
658  la_max => basis_parameter(ikind)%lmax
659  la_min => basis_parameter(ikind)%lmin
660  npgfa => basis_parameter(ikind)%npgf
661  nseta = basis_parameter(ikind)%nset
662  zeta => basis_parameter(ikind)%zet
663  nsgfa => basis_parameter(ikind)%nsgf
664  sphi_a_ext => basis_parameter(ikind)%sphi_ext(:, :, :, :)
665  nsgfl_a => basis_parameter(ikind)%nsgfl
666  sphi_a_u1 = ubound(sphi_a_ext, 1)
667  sphi_a_u2 = ubound(sphi_a_ext, 2)
668  sphi_a_u3 = ubound(sphi_a_ext, 3)
669 
670  lb_max => basis_parameter(jkind)%lmax
671  lb_min => basis_parameter(jkind)%lmin
672  npgfb => basis_parameter(jkind)%npgf
673  nsetb = basis_parameter(jkind)%nset
674  zetb => basis_parameter(jkind)%zet
675  nsgfb => basis_parameter(jkind)%nsgf
676  sphi_b_ext => basis_parameter(jkind)%sphi_ext(:, :, :, :)
677  nsgfl_b => basis_parameter(jkind)%nsgfl
678  sphi_b_u1 = ubound(sphi_b_ext, 1)
679  sphi_b_u2 = ubound(sphi_b_ext, 2)
680  sphi_b_u3 = ubound(sphi_b_ext, 3)
681 
682  iset = set_list_ij(i_set_list_ij)%pair(1)
683  jset = set_list_ij(i_set_list_ij)%pair(2)
684 
685  ncob = npgfb(jset)*ncoset(lb_max(jset))
686  max_val1 = screen_coeffs_set(jset, iset, jkind, ikind)%x(1)*rab2 + &
687  screen_coeffs_set(jset, iset, jkind, ikind)%x(2)
688 
689  sphi_a_ext_set => sphi_a_ext(:, :, :, iset)
690  sphi_b_ext_set => sphi_b_ext(:, :, :, jset)
691 
692  IF (case_index == 1) THEN
693  global_counter = kl_list_proc(index_kl, 3)
694  task_counter_rs(global_counter, 1) = i_list_ij
695  task_counter_rs(global_counter, 2) = i_set_list_ij
696  task_counter_rs(global_counter, 3) = nsgfb(jset)*nsgfa(iset)
697  END IF
698 
699  IF (ALLOCATED(bi1)) DEALLOCATE (bi1)
700  ALLOCATE (bi1(dimen, ni_occupied, nsgfb(jset), nsgfa(iset)))
701 
702  bi1 = 0.d+00
703 
704  DO i_list_kl = 1, list_kl%n_element
705 
706  katom = list_kl%elements(i_list_kl)%pair(1)
707  latom = list_kl%elements(i_list_kl)%pair(2)
708 
709  i_set_list_kl_start = list_kl%elements(i_list_kl)%set_bounds(1)
710  i_set_list_kl_stop = list_kl%elements(i_list_kl)%set_bounds(2)
711  kkind = list_kl%elements(i_list_kl)%kind_pair(1)
712  lkind = list_kl%elements(i_list_kl)%kind_pair(2)
713  rc = list_kl%elements(i_list_kl)%r1
714  rd = list_kl%elements(i_list_kl)%r2
715  rcd2 = list_kl%elements(i_list_kl)%dist2
716 
717  pmax_atom = 0.0_dp
718 
719  screen_kind_ij = screen_coeffs_kind(jkind, ikind)%x(1)*rab2 + &
720  screen_coeffs_kind(jkind, ikind)%x(2)
721  screen_kind_kl = screen_coeffs_kind(lkind, kkind)%x(1)*rcd2 + &
722  screen_coeffs_kind(lkind, kkind)%x(2)
723 
724  !!!!! Change the loop order
725  IF (max_val1 + screen_kind_kl + pmax_atom < log10_eps_schwarz) cycle
726  !!!!!
727  IF (screen_kind_ij + screen_kind_kl + pmax_atom < log10_eps_schwarz) cycle
728 
729  lc_max => basis_parameter(kkind)%lmax
730  lc_min => basis_parameter(kkind)%lmin
731  npgfc => basis_parameter(kkind)%npgf
732  zetc => basis_parameter(kkind)%zet
733  nsgfc => basis_parameter(kkind)%nsgf
734  sphi_c_ext => basis_parameter(kkind)%sphi_ext(:, :, :, :)
735  nsgfl_c => basis_parameter(kkind)%nsgfl
736  sphi_c_u1 = ubound(sphi_c_ext, 1)
737  sphi_c_u2 = ubound(sphi_c_ext, 2)
738  sphi_c_u3 = ubound(sphi_c_ext, 3)
739 
740  ld_max => basis_parameter(lkind)%lmax
741  ld_min => basis_parameter(lkind)%lmin
742  npgfd => basis_parameter(lkind)%npgf
743  zetd => basis_parameter(lkind)%zet
744  nsgfd => basis_parameter(lkind)%nsgf
745  sphi_d_ext => basis_parameter(lkind)%sphi_ext(:, :, :, :)
746  nsgfl_d => basis_parameter(lkind)%nsgfl
747  sphi_d_u1 = ubound(sphi_d_ext, 1)
748  sphi_d_u2 = ubound(sphi_d_ext, 2)
749  sphi_d_u3 = ubound(sphi_d_ext, 3)
750 
751  DO i_set_list_kl = i_set_list_kl_start, i_set_list_kl_stop
752  kset = set_list_kl(i_set_list_kl)%pair(1)
753  lset = set_list_kl(i_set_list_kl)%pair(2)
754 
755  IF (katom == latom .AND. lset < kset) cycle
756 
757  max_val2_set = (screen_coeffs_set(lset, kset, lkind, kkind)%x(1)*rcd2 + &
758  screen_coeffs_set(lset, kset, lkind, kkind)%x(2))
759  max_val2 = max_val1 + max_val2_set
760 
761  !! Near field screening
762  IF (max_val2 + pmax_atom < log10_eps_schwarz) cycle
763  sphi_c_ext_set => sphi_c_ext(:, :, :, kset)
764  sphi_d_ext_set => sphi_d_ext(:, :, :, lset)
765  !! get max_vals if we screen on initial density
766  pmax_entry = 0.0_dp
767 
768  log10_pmax = pmax_entry
769  max_val2 = max_val2 + log10_pmax
770  IF (max_val2 < log10_eps_schwarz) cycle
771  pmax_entry = exp(log10_pmax*ln_10)
772 
773  IF (case_index == 2) THEN
774  IF (ALLOCATED(mnrs)) DEALLOCATE (mnrs)
775  ALLOCATE (mnrs(nsgfd(lset), nsgfc(kset), nsgfb(jset), nsgfa(iset)))
776 
777  mnrs = 0.d+00
778 
779  max_contraction_val = max_contraction(iset, iatom)* &
780  max_contraction(jset, jatom)* &
781  max_contraction(kset, katom)* &
782  max_contraction(lset, latom)*pmax_entry
783  tmp_r_1 => radii_pgf(:, :, jset, iset, jkind, ikind)
784  tmp_r_2 => radii_pgf(:, :, lset, kset, lkind, kkind)
785  tmp_screen_pgf1 => screen_coeffs_pgf(:, :, jset, iset, jkind, ikind)
786  tmp_screen_pgf2 => screen_coeffs_pgf(:, :, lset, kset, lkind, kkind)
787 
788  CALL coulomb4(private_lib, ra, rb, rc, rd, npgfa(iset), npgfb(jset), npgfc(kset), npgfd(lset), &
789  la_min(iset), la_max(iset), lb_min(jset), lb_max(jset), &
790  lc_min(kset), lc_max(kset), ld_min(lset), ld_max(lset), &
791  nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
792  sphi_a_u1, sphi_a_u2, sphi_a_u3, &
793  sphi_b_u1, sphi_b_u2, sphi_b_u3, &
794  sphi_c_u1, sphi_c_u2, sphi_c_u3, &
795  sphi_d_u1, sphi_d_u2, sphi_d_u3, &
796  zeta(1:npgfa(iset), iset), zetb(1:npgfb(jset), jset), &
797  zetc(1:npgfc(kset), kset), zetd(1:npgfd(lset), lset), &
798  primitive_integrals, &
799  mp2_potential_parameter, &
800  actual_x_data%neighbor_cells, screen_coeffs_set(jset, iset, jkind, ikind)%x, &
801  screen_coeffs_set(lset, kset, lkind, kkind)%x, eps_schwarz, &
802  max_contraction_val, cartesian_estimate, cell, neris_tmp, &
803  log10_pmax, log10_eps_schwarz, &
804  tmp_r_1, tmp_r_2, tmp_screen_pgf1, tmp_screen_pgf2, &
805  pgf_list_ij, pgf_list_kl, pgf_product_list, &
806  nsgfl_a(:, iset), nsgfl_b(:, jset), &
807  nsgfl_c(:, kset), nsgfl_d(:, lset), &
808  sphi_a_ext_set, &
809  sphi_b_ext_set, &
810  sphi_c_ext_set, &
811  sphi_d_ext_set, &
812  ee_work, ee_work2, ee_buffer1, ee_buffer2, ee_primitives_tmp, &
813  nimages, do_periodic, p_work)
814 
815  nints = nsgfa(iset)*nsgfb(jset)*nsgfc(kset)*nsgfd(lset)
816  neris_total = neris_total + nints
817  nprim_ints = nprim_ints + neris_tmp
818  IF (cartesian_estimate == 0.0_dp) cartesian_estimate = tiny(cartesian_estimate)
819  estimate_to_store_int = exponent(cartesian_estimate)
820  estimate_to_store_int = max(estimate_to_store_int, -15_int_8)
821  cartesian_estimate = set_exponent(1.0_dp, estimate_to_store_int + 1)
822 
823  IF (cartesian_estimate < eps_schwarz) cycle
824 
825  primitive_counter = 0
826  DO llb = 1, nsgfd(lset)
827  DO kkb = 1, nsgfc(kset)
828  DO jjb = 1, nsgfb(jset)
829  DO iib = 1, nsgfa(iset)
830  primitive_counter = primitive_counter + 1
831  mnrs(llb, kkb, jjb, iib) = primitive_integrals(primitive_counter)
832  END DO
833  END DO
834  END DO
835  END DO
836 
837  CALL transform_occupied_orbitals_first(dimen, iatom, jatom, katom, latom, &
838  iset, jset, kset, lset, &
839  nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
840  i_batch_start, ni_occupied, &
841  mnrs, c_t, mp2_biel, bi1)
842  ELSE
843  task_counter_rs(global_counter, 4) = task_counter_rs(global_counter, 4) + 1
844 
845  cost_tmp = 0.0_dp
846  cost_tmp = cost_model(nsgfd(lset), nsgfc(kset), nsgfb(jset), nsgfa(iset), &
847  npgfd(lset), npgfc(kset), npgfb(jset), npgfa(iset), &
848  max_val2/log10_eps_schwarz, &
850  cost_rs(global_counter) = cost_rs(global_counter) + cost_tmp
851  END IF
852 
853  END DO ! i_set_list_kl
854  END DO ! i_list_kl
855 
856  IF (case_index == 2) THEN
857  my_num_call_sec_transf = my_num_call_sec_transf + 1
858  IF (.NOT. alpha_beta_case) THEN
859  IF (.NOT. mp2_env%direct_canonical%big_send) THEN
860  CALL transform_occupied_orbitals_second(dimen, iatom, jatom, iset, jset, &
861  nsgfa(iset), nsgfb(jset), ni_occupied, nj_occupied, j_batch_start, &
862  bi1, c_t, mp2_biel, para_env, elements_ij_proc, &
863  multiple, bib)
864  ELSE
865  CALL transform_occupied_orbitals_second_big( &
866  dimen, iatom, jatom, iset, jset, &
867  nsgfa(iset), nsgfb(jset), ni_occupied, nj_occupied, j_batch_start, &
868  ij_elem_max, bi1, c_t, mp2_biel, para_env, elements_ij_proc, bib)
869  END IF
870  ELSE
871  IF (.NOT. mp2_env%direct_canonical%big_send) THEN
872  CALL transform_occupied_orbitals_second(dimen, iatom, jatom, iset, jset, &
873  nsgfa(iset), nsgfb(jset), ni_occupied, nj_occupied, j_batch_start, &
874  bi1, c_beta_t, mp2_biel, para_env, elements_ij_proc, &
875  multiple, bib)
876  ELSE
877  CALL transform_occupied_orbitals_second_big( &
878  dimen, iatom, jatom, iset, jset, &
879  nsgfa(iset), nsgfb(jset), ni_occupied, nj_occupied, j_batch_start, &
880  ij_elem_max, bi1, c_beta_t, mp2_biel, para_env, elements_ij_proc, bib)
881  END IF
882  END IF
883  END IF
884 
885  END DO !i_list_ij
886 
887  IF (case_index == 1) THEN
888  CALL para_env%sum(task_counter_rs)
889  CALL para_env%sum(cost_rs)
890  ALLOCATE (task_counter_rs_temp(total_num_rs_task, 4))
891 
892  ALLOCATE (cost_rs_temp(total_num_rs_task))
893 
894  step_size = 1
895  ALLOCATE (same_size_kl_elements_counter((nsgf_max**2 + 1)/step_size + 1))
896 
897  same_size_kl_elements_counter = 0
898 
899  same_size_kl_index = 0
900  global_counter = 0
901  DO iib = nsgf_max**2 + 1, 0, -step_size
902  DO jjb = 1, total_num_rs_task
903  IF (task_counter_rs(jjb, 3) > iib - step_size .AND. task_counter_rs(jjb, 3) <= iib) THEN
904  global_counter = global_counter + 1
905  task_counter_rs_temp(global_counter, 1:4) = task_counter_rs(jjb, 1:4)
906  cost_rs_temp(global_counter) = cost_rs(jjb)
907  END IF
908  END DO
909  same_size_kl_index = same_size_kl_index + 1
910  same_size_kl_elements_counter(same_size_kl_index) = global_counter
911  END DO
912 
913  DEALLOCATE (task_counter_rs)
914  DEALLOCATE (cost_rs)
915 
916  i_start = 1
917  DO same_size_kl_index = 1, SIZE(same_size_kl_elements_counter)
918  DO iib = i_start, same_size_kl_elements_counter(same_size_kl_index)
919  DO jjb = iib + 1, same_size_kl_elements_counter(same_size_kl_index)
920 
921  IF (cost_rs_temp(jjb) >= cost_rs_temp(iib)) THEN
922  rs_counter_temp = task_counter_rs_temp(iib, 1:4)
923  task_counter_rs_temp(iib, 1:4) = task_counter_rs_temp(jjb, 1:4)
924  task_counter_rs_temp(jjb, 1:4) = rs_counter_temp
925 
926  cost_tmp = cost_rs_temp(iib)
927  cost_rs_temp(iib) = cost_rs_temp(jjb)
928  cost_rs_temp(jjb) = cost_tmp
929  END IF
930  END DO
931  END DO
932  i_start = same_size_kl_elements_counter(same_size_kl_index) + 1
933  END DO
934 
935  proc_num_task = 0
936  DO counter_proc = 1, total_num_rs_task
937  proc_num = mod(counter_proc, para_env%num_pe)
938  proc_num_task(proc_num) = proc_num_task(proc_num) + 1
939  END DO
940 
941  max_num_call_sec_transf = maxval(proc_num_task)
942 
943  DEALLOCATE (kl_list_proc)
944  ALLOCATE (kl_list_proc(proc_num_task(para_env%mepos), 2))
945 
946  kl_list_proc = 0
947 
948  elements_kl_proc = 0
949  DO counter_proc = 1, total_num_rs_task
950  proc_num = mod(counter_proc, para_env%num_pe)
951  IF (proc_num == para_env%mepos) THEN
952  elements_kl_proc = elements_kl_proc + 1
953  kl_list_proc(elements_kl_proc, 1) = task_counter_rs_temp(counter_proc, 1)
954  kl_list_proc(elements_kl_proc, 2) = task_counter_rs_temp(counter_proc, 2)
955  END IF
956  END DO
957 
958  DEALLOCATE (task_counter_rs_temp)
959  DEALLOCATE (cost_rs_temp)
960  END IF
961  END DO ! case_index
962 
963  size_parameter_send(1) = 1
964  size_parameter_send(2) = 1
965  size_parameter_send(3) = 0
966  size_parameter_send(4) = 0
967  size_parameter_send(5) = elements_ij_proc
968 
969  IF (mp2_env%direct_canonical%big_send) THEN
970  ALLOCATE (zero_mat_big(dimen, 2, ij_elem_max))
971 
972  END IF
973 
974  DO iib = my_num_call_sec_transf + 1, max_num_call_sec_transf
975  DO index_proc_shift = 0, para_env%num_pe - 1
976 
977  proc_send = modulo(para_env%mepos + index_proc_shift, para_env%num_pe)
978  proc_receive = modulo(para_env%mepos - index_proc_shift, para_env%num_pe)
979 
980  case_send_receive = (proc_send /= para_env%mepos)
981 
982  IF (case_send_receive) THEN
983  ! the processor starts to send (and receive?)
984 
985  CALL para_env%sendrecv(size_parameter_send, proc_send, size_parameter_rec, proc_receive)
986 
987  rsize_rec = size_parameter_rec(1)
988  ssize_rec = size_parameter_rec(2)
989  r_offset_rec = size_parameter_rec(3)
990  s_offset_rec = size_parameter_rec(4)
991  elements_ij_proc_rec = size_parameter_rec(5)
992  IF (.NOT. mp2_env%direct_canonical%big_send) THEN
993  ALLOCATE (bib_rs_mat_rec(dimen, rsize_rec + ssize_rec))
994  ELSE
995  ALLOCATE (bib_rs_mat_rec_big(dimen, rsize_rec + ssize_rec, ij_elem_max))
996  END IF
997  ELSE
998  elements_ij_proc_rec = elements_ij_proc
999  END IF
1000 
1001  IF (.NOT. mp2_env%direct_canonical%big_send) THEN
1002  index_ij_send = 0
1003  index_ij_rec = 0
1004  DO index_proc_ij = proc_send + 1, multiple, para_env%num_pe
1005 
1006  zero_mat = 0.d+00
1007 
1008  IF (case_send_receive) THEN
1009 
1010  CALL para_env%sendrecv(zero_mat, proc_send, bib_rs_mat_rec, proc_receive)
1011 
1012  index_ij_rec = index_ij_rec + 1
1013  IF (index_ij_rec <= elements_ij_proc .AND. elements_ij_proc > 0) THEN
1014 
1015  bib(1:dimen, r_offset_rec + 1:r_offset_rec + rsize_rec, index_ij_rec) = &
1016  bib(1:dimen, r_offset_rec + 1:r_offset_rec + rsize_rec, index_ij_rec) + &
1017  bib_rs_mat_rec(1:dimen, 1:rsize_rec)
1018 
1019  bib(1:dimen, s_offset_rec + 1:s_offset_rec + ssize_rec, index_ij_rec) = &
1020  bib(1:dimen, s_offset_rec + 1:s_offset_rec + ssize_rec, index_ij_rec) + &
1021  bib_rs_mat_rec(1:dimen, rsize_rec + 1:rsize_rec + ssize_rec)
1022 
1023  END IF
1024  END IF
1025 
1026  END DO
1027  ELSE
1028  zero_mat_big = 0.d+00
1029 
1030  IF (case_send_receive) THEN
1031 
1032  CALL para_env%sendrecv(zero_mat_big, proc_send, bib_rs_mat_rec_big, proc_receive)
1033 
1034  bib(1:dimen, r_offset_rec + 1:r_offset_rec + rsize_rec, 1:elements_ij_proc) = &
1035  bib(1:dimen, r_offset_rec + 1:r_offset_rec + rsize_rec, 1:elements_ij_proc) + &
1036  bib_rs_mat_rec_big(1:dimen, 1:rsize_rec, 1:elements_ij_proc)
1037 
1038  bib(1:dimen, s_offset_rec + 1:s_offset_rec + ssize_rec, 1:elements_ij_proc) = &
1039  bib(1:dimen, s_offset_rec + 1:s_offset_rec + ssize_rec, 1:elements_ij_proc) + &
1040  bib_rs_mat_rec_big(1:dimen, rsize_rec + 1:rsize_rec + ssize_rec, 1:elements_ij_proc)
1041 
1042  END IF
1043  END IF
1044 
1045  IF (case_send_receive) THEN
1046  IF (.NOT. mp2_env%direct_canonical%big_send) THEN
1047  DEALLOCATE (bib_rs_mat_rec)
1048  ELSE
1049  DEALLOCATE (bib_rs_mat_rec_big)
1050  END IF
1051  END IF
1052 
1053  END DO
1054  END DO
1055 
1056  IF (mp2_env%direct_canonical%big_send) THEN
1057  DEALLOCATE (zero_mat_big)
1058  END IF
1059 
1060  logger => cp_get_default_logger()
1061 
1062  DEALLOCATE (primitive_integrals)
1063 
1064  IF (.NOT. alpha_beta_case) THEN
1065  CALL transform_virtual_orbitals_and_accumulate(dimen, occupied, dimen - occupied, i_batch_start, &
1066  j_batch_start, bib, c, auto, elements_ij_proc, ij_list_proc, &
1067  nspins, emp2, emp2_cou, emp2_ex)
1068  ELSE
1069  CALL transform_virtual_orbitals_and_accumulate_abcase( &
1070  dimen, occupied, occupied_beta, dimen - occupied, dimen - occupied_beta, &
1071  i_batch_start, j_batch_start, &
1072  bib, c, c_beta, auto, auto_beta, &
1073  elements_ij_proc, ij_list_proc, emp2, emp2_cou)
1074  DEALLOCATE (c_beta_t)
1075  END IF
1076 
1077  IF (copy_integrals) THEN
1078  IF (.NOT. alpha_beta_case) THEN
1079  ALLOCATE (integ_mp2(dimen - occupied, dimen - occupied, occupied, occupied))
1080  integ_mp2 = 0.0_dp
1081  DO i = 1, elements_ij_proc
1082  iib = ij_list_proc(i, 1)
1083  jjb = ij_list_proc(i, 2)
1084  integ_mp2(:, :, iib + i_batch_start, jjb + j_batch_start) = bib(1:dimen - occupied, 1:dimen - occupied, i)
1085  END DO
1086  ELSE
1087  ALLOCATE (integ_mp2(dimen - occupied, dimen - occupied_beta, occupied, occupied_beta))
1088  integ_mp2 = 0.0_dp
1089  DO i = 1, elements_ij_proc
1090  iib = ij_list_proc(i, 1)
1091  jjb = ij_list_proc(i, 2)
1092  integ_mp2(:, :, iib + i_batch_start, jjb + j_batch_start) = bib(1:dimen - occupied, 1:dimen - occupied_beta, i)
1093  END DO
1094  END IF
1095  END IF
1096  DEALLOCATE (bib)
1097 
1098  DEALLOCATE (set_list_ij, set_list_kl)
1099 
1100  DO i = 1, max_pgf**2
1101  DEALLOCATE (pgf_list_ij(i)%image_list)
1102  DEALLOCATE (pgf_list_kl(i)%image_list)
1103  END DO
1104 
1105  DEALLOCATE (pgf_list_ij)
1106  DEALLOCATE (pgf_list_kl)
1107  DEALLOCATE (pgf_product_list)
1108 
1109  DEALLOCATE (max_contraction, kind_of)
1110 
1111  DEALLOCATE (ee_work, ee_work2, ee_buffer1, ee_buffer2, ee_primitives_tmp)
1112 
1113  DEALLOCATE (nimages)
1114 
1115  IF (mp2_env%potential_parameter%potential_type == do_potential_tshpsc) THEN
1116  init_tshpsc_lmax = -1
1117  CALL free_c0()
1118  END IF
1119 
1120  CALL timestop(handle)
1121 
1122  END SUBROUTINE mp2_canonical_direct_single_batch
1123 
1124 ! **************************************************************************************************
1125 !> \brief ...
1126 !> \param dimen ...
1127 !> \param latom ...
1128 !> \param katom ...
1129 !> \param jatom ...
1130 !> \param iatom ...
1131 !> \param lset ...
1132 !> \param kset ...
1133 !> \param jset ...
1134 !> \param iset ...
1135 !> \param Ssize ...
1136 !> \param Rsize ...
1137 !> \param Nsize ...
1138 !> \param Msize ...
1139 !> \param i_batch_start ...
1140 !> \param Ni_occupied ...
1141 !> \param MNRS ...
1142 !> \param C_T ...
1143 !> \param mp2_biel ...
1144 !> \param BI1 ...
1145 ! **************************************************************************************************
1146  SUBROUTINE transform_occupied_orbitals_first(dimen, latom, katom, jatom, iatom, &
1147  lset, kset, jset, iset, &
1148  Ssize, Rsize, Nsize, Msize, &
1149  i_batch_start, Ni_occupied, &
1150  MNRS, C_T, mp2_biel, BI1)
1151 
1152  INTEGER, INTENT(IN) :: dimen, latom, katom, jatom, iatom, lset, &
1153  kset, jset, iset, ssize, rsize, nsize, &
1154  msize, i_batch_start, ni_occupied
1155  REAL(kind=dp), &
1156  DIMENSION(Msize, Nsize, Rsize, Ssize), &
1157  INTENT(IN) :: mnrs
1158  REAL(kind=dp), DIMENSION(dimen, dimen), INTENT(IN) :: c_t
1159  TYPE(mp2_biel_type), INTENT(IN) :: mp2_biel
1160  REAL(kind=dp), &
1161  DIMENSION(dimen, Ni_occupied, Rsize, Ssize), &
1162  INTENT(INOUT) :: bi1
1163 
1164  INTEGER :: i, i_global, m, m_global, m_offset, &
1165  m_start, n, n_global, n_offset, r, &
1166  r_offset, r_start, s, s_offset
1167  REAL(kind=dp) :: mnrs_element
1168 
1169  n_offset = mp2_biel%index_table(jatom, jset) - 1
1170  m_offset = mp2_biel%index_table(iatom, iset) - 1
1171  s_offset = mp2_biel%index_table(latom, lset) - 1
1172  r_offset = mp2_biel%index_table(katom, kset) - 1
1173 
1174  DO s = 1, ssize
1175  r_start = 1
1176  IF (katom == latom .AND. kset == lset) r_start = s
1177  DO r = r_start, rsize
1178 
1179  ! fast i don't know why
1180  DO n = 1, nsize
1181  n_global = n + n_offset
1182  m_start = 1
1183  IF (iatom == jatom .AND. iset == jset) THEN
1184  m = n
1185  m_global = m + m_offset
1186  mnrs_element = mnrs(m, n, r, s)
1187  DO i = 1, ni_occupied
1188  i_global = i + i_batch_start
1189  bi1(n_global, i, r, s) = bi1(n_global, i, r, s) + c_t(i_global, m_global)*mnrs_element
1190  END DO
1191  m_start = n + 1
1192  END IF
1193 
1194  DO m = m_start, msize
1195  m_global = m + m_offset
1196  mnrs_element = mnrs(m, n, r, s)
1197  DO i = 1, ni_occupied
1198  i_global = i + i_batch_start
1199  bi1(n_global, i, r, s) = bi1(n_global, i, r, s) + c_t(i_global, m_global)*mnrs_element
1200  bi1(m_global, i, r, s) = bi1(m_global, i, r, s) + c_t(i_global, n_global)*mnrs_element
1201  END DO
1202  END DO
1203  END DO
1204 
1205  END DO
1206  END DO
1207 
1208  END SUBROUTINE transform_occupied_orbitals_first
1209 
1210 ! **************************************************************************************************
1211 !> \brief ...
1212 !> \param dimen ...
1213 !> \param latom ...
1214 !> \param katom ...
1215 !> \param lset ...
1216 !> \param kset ...
1217 !> \param Ssize ...
1218 !> \param Rsize ...
1219 !> \param Ni_occupied ...
1220 !> \param Nj_occupied ...
1221 !> \param j_batch_start ...
1222 !> \param BI1 ...
1223 !> \param C_T ...
1224 !> \param mp2_biel ...
1225 !> \param para_env ...
1226 !> \param elements_ij_proc ...
1227 !> \param multiple ...
1228 !> \param BIb ...
1229 ! **************************************************************************************************
1230  SUBROUTINE transform_occupied_orbitals_second(dimen, latom, katom, lset, kset, &
1231  Ssize, Rsize, Ni_occupied, Nj_occupied, j_batch_start, &
1232  BI1, C_T, mp2_biel, para_env, &
1233  elements_ij_proc, &
1234  multiple, BIb)
1235 
1236  INTEGER, INTENT(IN) :: dimen, latom, katom, lset, kset, ssize, &
1237  rsize, ni_occupied, nj_occupied, &
1238  j_batch_start
1239  REAL(kind=dp), &
1240  DIMENSION(dimen, Ni_occupied, Rsize, Ssize), &
1241  INTENT(IN) :: bi1
1242  REAL(kind=dp), DIMENSION(dimen, dimen), INTENT(IN) :: c_t
1243  TYPE(mp2_biel_type), INTENT(IN) :: mp2_biel
1244  TYPE(mp_para_env_type), INTENT(IN) :: para_env
1245  INTEGER, INTENT(IN) :: elements_ij_proc, multiple
1246  REAL(kind=dp), &
1247  DIMENSION(dimen, dimen, elements_ij_proc), &
1248  INTENT(INOUT) :: bib
1249 
1250  CHARACTER(LEN=*), PARAMETER :: routinen = 'transform_occupied_orbitals_second'
1251 
1252  INTEGER :: elements_ij_proc_rec, handle, i, index_ij_rec, index_ij_send, index_proc_ij, &
1253  index_proc_shift, j, n, proc_receive, proc_send, r, r_global, r_offset, r_offset_rec, &
1254  r_start, rsize_rec, s, s_global, s_offset, s_offset_rec, ssize_rec
1255  INTEGER, DIMENSION(5) :: size_parameter_rec, size_parameter_send
1256  LOGICAL :: case_send_receive
1257  REAL(kind=dp) :: c_t_r, c_t_s
1258  REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :) :: bib_rs_mat_rec
1259  REAL(kind=dp), DIMENSION(dimen, Rsize+Ssize) :: bib_rs_mat
1260 
1261  CALL timeset(routinen, handle)
1262 
1263  s_offset = mp2_biel%index_table(latom, lset) - 1
1264  r_offset = mp2_biel%index_table(katom, kset) - 1
1265 
1266  size_parameter_send(1) = rsize
1267  size_parameter_send(2) = ssize
1268  size_parameter_send(3) = r_offset
1269  size_parameter_send(4) = s_offset
1270  size_parameter_send(5) = elements_ij_proc
1271 
1272  DO index_proc_shift = 0, para_env%num_pe - 1
1273 
1274  proc_send = modulo(para_env%mepos + index_proc_shift, para_env%num_pe)
1275  proc_receive = modulo(para_env%mepos - index_proc_shift, para_env%num_pe)
1276 
1277  case_send_receive = (proc_send /= para_env%mepos)
1278 
1279  IF (case_send_receive) THEN
1280  ! the processor starts to send (and receive?)
1281 
1282  CALL para_env%sendrecv(size_parameter_send, proc_send, size_parameter_rec, proc_receive)
1283 
1284  rsize_rec = size_parameter_rec(1)
1285  ssize_rec = size_parameter_rec(2)
1286  r_offset_rec = size_parameter_rec(3)
1287  s_offset_rec = size_parameter_rec(4)
1288  elements_ij_proc_rec = size_parameter_rec(5)
1289  ALLOCATE (bib_rs_mat_rec(dimen, rsize_rec + ssize_rec))
1290 
1291  ELSE
1292  elements_ij_proc_rec = elements_ij_proc
1293  END IF
1294 
1295  index_ij_send = 0
1296  index_ij_rec = 0
1297  DO index_proc_ij = proc_send + 1, multiple, para_env%num_pe
1298 
1299  bib_rs_mat = zero
1300  IF (index_proc_ij <= ni_occupied*nj_occupied) THEN
1301 
1302  index_ij_send = index_ij_send + 1
1303 
1304  i = (index_proc_ij - 1)/nj_occupied + 1
1305  j = index_proc_ij - (i - 1)*nj_occupied + j_batch_start
1306 
1307  DO s = 1, ssize
1308  s_global = s + s_offset
1309  r_start = 1
1310  IF (katom == latom .AND. kset == lset) r_start = s
1311  DO r = r_start, rsize
1312  r_global = r + r_offset
1313 
1314  IF (r_global /= s_global) THEN
1315  c_t_r = c_t(j, r_global)
1316  c_t_s = c_t(j, s_global)
1317  DO n = 1, dimen
1318  bib_rs_mat(n, r) = bib_rs_mat(n, r) + c_t_s*bi1(n, i, r, s)
1319  END DO
1320  DO n = 1, dimen
1321  bib_rs_mat(n, rsize + s) = bib_rs_mat(n, rsize + s) + c_t_r*bi1(n, i, r, s)
1322  END DO
1323  ELSE
1324  c_t_s = c_t(j, s_global)
1325  DO n = 1, dimen
1326  bib_rs_mat(n, r) = bib_rs_mat(n, r) + c_t_s*bi1(n, i, r, s)
1327  END DO
1328  END IF
1329 
1330  END DO
1331  END DO
1332 
1333  END IF
1334 
1335  IF (case_send_receive) THEN
1336 
1337  CALL para_env%sendrecv(bib_rs_mat, proc_send, bib_rs_mat_rec, proc_receive)
1338 
1339  index_ij_rec = index_ij_rec + 1
1340  IF (index_ij_rec <= elements_ij_proc .AND. elements_ij_proc > 0) THEN
1341 
1342  bib(1:dimen, r_offset_rec + 1:r_offset_rec + rsize_rec, index_ij_rec) = &
1343  bib(1:dimen, r_offset_rec + 1:r_offset_rec + rsize_rec, index_ij_rec) + &
1344  bib_rs_mat_rec(1:dimen, 1:rsize_rec)
1345 
1346  bib(1:dimen, s_offset_rec + 1:s_offset_rec + ssize_rec, index_ij_rec) = &
1347  bib(1:dimen, s_offset_rec + 1:s_offset_rec + ssize_rec, index_ij_rec) + &
1348  bib_rs_mat_rec(1:dimen, rsize_rec + 1:rsize_rec + ssize_rec)
1349 
1350  END IF
1351  ELSE
1352  ! the processor is the sender and receiver itself
1353  IF (index_ij_send <= elements_ij_proc .AND. elements_ij_proc > 0) THEN
1354 
1355  bib(1:dimen, r_offset + 1:r_offset + rsize, index_ij_send) = &
1356  bib(1:dimen, r_offset + 1:r_offset + rsize, index_ij_send) + bib_rs_mat(1:dimen, 1:rsize)
1357 
1358  bib(1:dimen, s_offset + 1:s_offset + ssize, index_ij_send) = &
1359  bib(1:dimen, s_offset + 1:s_offset + ssize, index_ij_send) + bib_rs_mat(1:dimen, rsize + 1:rsize + ssize)
1360 
1361  END IF
1362  END IF
1363 
1364  END DO ! loop over the ij of the processor
1365 
1366  IF (case_send_receive) THEN
1367  DEALLOCATE (bib_rs_mat_rec)
1368  END IF
1369 
1370  END DO ! loop over the processor starting from itself
1371 
1372  CALL timestop(handle)
1373 
1374  END SUBROUTINE transform_occupied_orbitals_second
1375 
1376 ! **************************************************************************************************
1377 !> \brief ...
1378 !> \param dimen ...
1379 !> \param latom ...
1380 !> \param katom ...
1381 !> \param lset ...
1382 !> \param kset ...
1383 !> \param Ssize ...
1384 !> \param Rsize ...
1385 !> \param Ni_occupied ...
1386 !> \param Nj_occupied ...
1387 !> \param j_batch_start ...
1388 !> \param ij_elem_max ...
1389 !> \param BI1 ...
1390 !> \param C_T ...
1391 !> \param mp2_biel ...
1392 !> \param para_env ...
1393 !> \param elements_ij_proc ...
1394 !> \param BIb ...
1395 ! **************************************************************************************************
1396  SUBROUTINE transform_occupied_orbitals_second_big(dimen, latom, katom, lset, kset, &
1397  Ssize, Rsize, Ni_occupied, Nj_occupied, j_batch_start, &
1398  ij_elem_max, BI1, C_T, mp2_biel, para_env, &
1399  elements_ij_proc, BIb)
1400 
1401  INTEGER, INTENT(IN) :: dimen, latom, katom, lset, kset, ssize, &
1402  rsize, ni_occupied, nj_occupied, &
1403  j_batch_start, ij_elem_max
1404  REAL(kind=dp), &
1405  DIMENSION(dimen, Ni_occupied, Rsize, Ssize), &
1406  INTENT(IN) :: bi1
1407  REAL(kind=dp), DIMENSION(dimen, dimen), INTENT(IN) :: c_t
1408  TYPE(mp2_biel_type), INTENT(IN) :: mp2_biel
1409  TYPE(mp_para_env_type), INTENT(IN) :: para_env
1410  INTEGER, INTENT(IN) :: elements_ij_proc
1411  REAL(kind=dp), &
1412  DIMENSION(dimen, dimen, elements_ij_proc), &
1413  INTENT(INOUT) :: bib
1414 
1415  CHARACTER(LEN=*), PARAMETER :: routinen = 'transform_occupied_orbitals_second_big'
1416 
1417  INTEGER :: elements_ij_proc_rec, handle, i, index_ij_rec, index_ij_send, index_proc_ij, &
1418  index_proc_shift, j, n, proc_receive, proc_send, r, r_global, r_offset, r_offset_rec, &
1419  r_start, rsize_rec, s, s_global, s_offset, s_offset_rec, ssize_rec
1420  INTEGER, DIMENSION(5) :: size_parameter_rec, size_parameter_send
1421  LOGICAL :: case_send_receive
1422  REAL(kind=dp) :: c_t_r, c_t_s
1423  REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :) :: bib_rs_mat_rec
1424  REAL(kind=dp), &
1425  DIMENSION(dimen, Rsize+Ssize, ij_elem_max) :: bib_rs_mat
1426 
1427  CALL timeset(routinen, handle)
1428 
1429  s_offset = mp2_biel%index_table(latom, lset) - 1
1430  r_offset = mp2_biel%index_table(katom, kset) - 1
1431 
1432  size_parameter_send(1) = rsize
1433  size_parameter_send(2) = ssize
1434  size_parameter_send(3) = r_offset
1435  size_parameter_send(4) = s_offset
1436  size_parameter_send(5) = elements_ij_proc
1437 
1438  DO index_proc_shift = 0, para_env%num_pe - 1
1439 
1440  proc_send = modulo(para_env%mepos + index_proc_shift, para_env%num_pe)
1441  proc_receive = modulo(para_env%mepos - index_proc_shift, para_env%num_pe)
1442 
1443  case_send_receive = (proc_send /= para_env%mepos)
1444 
1445  IF (case_send_receive) THEN
1446  ! the processor starts to send (and receive?)
1447 
1448  CALL para_env%sendrecv(size_parameter_send, proc_send, size_parameter_rec, proc_receive)
1449 
1450  rsize_rec = size_parameter_rec(1)
1451  ssize_rec = size_parameter_rec(2)
1452  r_offset_rec = size_parameter_rec(3)
1453  s_offset_rec = size_parameter_rec(4)
1454  elements_ij_proc_rec = size_parameter_rec(5)
1455  ALLOCATE (bib_rs_mat_rec(dimen, rsize_rec + ssize_rec, ij_elem_max))
1456  ELSE
1457  elements_ij_proc_rec = elements_ij_proc
1458  END IF
1459 
1460  index_ij_send = 0
1461  index_ij_rec = 0
1462  bib_rs_mat = zero
1463 
1464  DO index_proc_ij = proc_send + 1, ni_occupied*nj_occupied, para_env%num_pe
1465 
1466  index_ij_send = index_ij_send + 1
1467 
1468  i = (index_proc_ij - 1)/nj_occupied + 1
1469  j = index_proc_ij - (i - 1)*nj_occupied + j_batch_start
1470 
1471  DO s = 1, ssize
1472  s_global = s + s_offset
1473  r_start = 1
1474  IF (katom == latom .AND. kset == lset) r_start = s
1475  DO r = r_start, rsize
1476  r_global = r + r_offset
1477 
1478  IF (r_global /= s_global) THEN
1479  c_t_r = c_t(j, r_global)
1480  c_t_s = c_t(j, s_global)
1481  DO n = 1, dimen
1482  bib_rs_mat(n, r, index_ij_send) = bib_rs_mat(n, r, index_ij_send) + c_t_s*bi1(n, i, r, s)
1483  END DO
1484  DO n = 1, dimen
1485  bib_rs_mat(n, rsize + s, index_ij_send) = bib_rs_mat(n, rsize + s, index_ij_send) + c_t_r*bi1(n, i, r, s)
1486  END DO
1487  ELSE
1488  c_t_s = c_t(j, s_global)
1489  DO n = 1, dimen
1490  bib_rs_mat(n, r, index_ij_send) = bib_rs_mat(n, r, index_ij_send) + c_t_s*bi1(n, i, r, s)
1491  END DO
1492  END IF
1493 
1494  END DO
1495  END DO
1496 
1497  END DO
1498 
1499  IF (case_send_receive) THEN
1500 
1501  CALL para_env%sendrecv(bib_rs_mat, proc_send, bib_rs_mat_rec, proc_receive)
1502 
1503  bib(1:dimen, r_offset_rec + 1:r_offset_rec + rsize_rec, 1:elements_ij_proc) = &
1504  bib(1:dimen, r_offset_rec + 1:r_offset_rec + rsize_rec, 1:elements_ij_proc) + &
1505  bib_rs_mat_rec(1:dimen, 1:rsize_rec, 1:elements_ij_proc)
1506 
1507  bib(1:dimen, s_offset_rec + 1:s_offset_rec + ssize_rec, 1:elements_ij_proc) = &
1508  bib(1:dimen, s_offset_rec + 1:s_offset_rec + ssize_rec, 1:elements_ij_proc) + &
1509  bib_rs_mat_rec(1:dimen, rsize_rec + 1:rsize_rec + ssize_rec, 1:elements_ij_proc)
1510 
1511  DEALLOCATE (bib_rs_mat_rec)
1512  ELSE
1513  ! the processor is the sender and receiver itself
1514  bib(1:dimen, r_offset + 1:r_offset + rsize, 1:elements_ij_proc) = &
1515  bib(1:dimen, r_offset + 1:r_offset + rsize, 1:elements_ij_proc) + &
1516  bib_rs_mat(1:dimen, 1:rsize, 1:elements_ij_proc)
1517 
1518  bib(1:dimen, s_offset + 1:s_offset + ssize, 1:elements_ij_proc) = &
1519  bib(1:dimen, s_offset + 1:s_offset + ssize, 1:elements_ij_proc) + &
1520  bib_rs_mat(1:dimen, rsize + 1:rsize + ssize, 1:elements_ij_proc)
1521 
1522  END IF
1523 
1524  END DO ! loop over the processor starting from itself
1525 
1526  CALL timestop(handle)
1527 
1528  END SUBROUTINE transform_occupied_orbitals_second_big
1529 
1530 ! **************************************************************************************************
1531 !> \brief ...
1532 !> \param dimen ...
1533 !> \param occupied ...
1534 !> \param virtual ...
1535 !> \param i_batch_start ...
1536 !> \param j_batch_start ...
1537 !> \param BIb ...
1538 !> \param C ...
1539 !> \param Auto ...
1540 !> \param elements_ij_proc ...
1541 !> \param ij_list_proc ...
1542 !> \param nspins ...
1543 !> \param Emp2 ...
1544 !> \param Emp2_Cou ...
1545 !> \param Emp2_ex ...
1546 ! **************************************************************************************************
1547  SUBROUTINE transform_virtual_orbitals_and_accumulate(dimen, occupied, virtual, i_batch_start, &
1548  j_batch_start, BIb, C, Auto, elements_ij_proc, &
1549  ij_list_proc, nspins, Emp2, Emp2_Cou, Emp2_ex)
1550 
1551  INTEGER, INTENT(IN) :: dimen, occupied, virtual, i_batch_start, &
1552  j_batch_start
1553  REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :), &
1554  INTENT(INOUT) :: bib
1555  REAL(kind=dp), DIMENSION(dimen, dimen), INTENT(IN) :: c
1556  REAL(kind=dp), DIMENSION(dimen), INTENT(IN) :: auto
1557  INTEGER, INTENT(IN) :: elements_ij_proc
1558  INTEGER, DIMENSION(elements_ij_proc, 2), &
1559  INTENT(IN) :: ij_list_proc
1560  INTEGER, INTENT(IN) :: nspins
1561  REAL(kind=dp), INTENT(INOUT) :: emp2, emp2_cou, emp2_ex
1562 
1563  CHARACTER(LEN=*), PARAMETER :: routinen = 'transform_virtual_orbitals_and_accumulate'
1564  REAL(kind=dp), PARAMETER :: zero = 0.0_dp
1565 
1566  INTEGER :: a, a_global, b, b_global, handle, i, &
1567  i_global, index_ij, j, j_global
1568  REAL(kind=dp) :: iajb, ibja, parz, two
1569  REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :) :: bia
1570 
1571  CALL timeset(routinen, handle)
1572 
1573  ALLOCATE (bia(dimen, virtual))
1574 
1575  bia = zero
1576  DO index_ij = 1, elements_ij_proc
1577 
1578  CALL dgemm('T', 'N', dimen, virtual, dimen, 1.0_dp, bib(1, 1, index_ij), &
1579  dimen, c(1, occupied + 1), dimen, 0.0_dp, bia(1, 1), dimen)
1580  bib(1:dimen, 1:virtual, index_ij) = bia(1:dimen, 1:virtual)
1581 
1582  END DO
1583 
1584  DEALLOCATE (bia)
1585  ALLOCATE (bia(virtual, virtual))
1586 
1587  bia = zero
1588  DO index_ij = 1, elements_ij_proc
1589 
1590  CALL dgemm('T', 'N', virtual, virtual, dimen, 1.0_dp, bib(1, 1, index_ij), dimen, c(1, occupied + 1), dimen, 0.0_dp, &
1591  bia(1, 1), virtual)
1592  bib(1:virtual, 1:virtual, index_ij) = bia(1:virtual, 1:virtual)
1593 
1594  END DO
1595 
1596  two = 2.0_dp/nspins
1597  DO index_ij = 1, elements_ij_proc
1598  i = ij_list_proc(index_ij, 1)
1599  j = ij_list_proc(index_ij, 2)
1600  i_global = i + i_batch_start
1601  j_global = j + j_batch_start
1602  DO a = 1, virtual
1603  a_global = a + occupied
1604  DO b = 1, virtual
1605  b_global = b + occupied
1606  iajb = bib(a, b, index_ij)
1607  ibja = bib(b, a, index_ij)
1608  parz = iajb/(auto(i_global) + auto(j_global) - auto(a_global) - auto(b_global))
1609  ! parz=parz*(two*iajb-ibja) !Full
1610  ! parz=parz*(iajb) !Coulomb
1611  ! parz=parz*(ibja) !Coulomb
1612  ! Emp2=Emp2+parz/nspins
1613  emp2_cou = emp2_cou + parz*two*(iajb)/nspins
1614  emp2_ex = emp2_ex - parz*(ibja)/nspins
1615  emp2 = emp2 + parz*(two*iajb - ibja)/nspins
1616  END DO
1617  END DO
1618  END DO
1619 
1620  DEALLOCATE (bia)
1621 
1622  CALL timestop(handle)
1623 
1624  END SUBROUTINE transform_virtual_orbitals_and_accumulate
1625 
1626 ! **************************************************************************************************
1627 !> \brief ...
1628 !> \param dimen ...
1629 !> \param occ_i ...
1630 !> \param occ_j ...
1631 !> \param virt_i ...
1632 !> \param virt_j ...
1633 !> \param i_batch_start ...
1634 !> \param j_batch_start ...
1635 !> \param BIb ...
1636 !> \param C_i ...
1637 !> \param C_j ...
1638 !> \param Auto_i ...
1639 !> \param Auto_j ...
1640 !> \param elements_ij_proc ...
1641 !> \param ij_list_proc ...
1642 !> \param Emp2 ...
1643 !> \param Emp2_Cou ...
1644 ! **************************************************************************************************
1645  SUBROUTINE transform_virtual_orbitals_and_accumulate_abcase(dimen, occ_i, occ_j, virt_i, virt_j, i_batch_start, &
1646  j_batch_start, BIb, C_i, C_j, Auto_i, Auto_j, elements_ij_proc, &
1647  ij_list_proc, Emp2, Emp2_Cou)
1648 
1649  INTEGER, INTENT(IN) :: dimen, occ_i, occ_j, virt_i, virt_j, &
1650  i_batch_start, j_batch_start
1651  REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :), &
1652  INTENT(INOUT) :: bib
1653  REAL(kind=dp), DIMENSION(dimen, dimen), INTENT(IN) :: c_i, c_j
1654  REAL(kind=dp), DIMENSION(dimen), INTENT(IN) :: auto_i, auto_j
1655  INTEGER, INTENT(IN) :: elements_ij_proc
1656  INTEGER, DIMENSION(elements_ij_proc, 2), &
1657  INTENT(IN) :: ij_list_proc
1658  REAL(kind=dp), INTENT(INOUT) :: emp2, emp2_cou
1659 
1660  CHARACTER(LEN=*), PARAMETER :: routinen = 'transform_virtual_orbitals_and_accumulate_ABcase'
1661  REAL(kind=dp), PARAMETER :: two = 2.d+00, zero = 0.d+00
1662 
1663  INTEGER :: a, a_global, b, b_global, handle, i, &
1664  i_global, index_ij, j, j_global, n, s
1665  REAL(kind=dp) :: iajb, parz
1666  REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :) :: bia
1667 
1668  CALL timeset(routinen, handle)
1669 
1670  ALLOCATE (bia(dimen, virt_i))
1671 
1672  DO index_ij = 1, elements_ij_proc
1673 
1674  DO a = 1, virt_i
1675  a_global = a + occ_i
1676  DO s = 1, dimen
1677  parz = zero
1678  DO n = 1, dimen
1679  parz = parz + c_i(n, a_global)*bib(n, s, index_ij)
1680  END DO
1681  bia(s, a) = parz
1682  END DO
1683  END DO
1684 
1685  bib(1:dimen, 1:virt_i, index_ij) = bia
1686 
1687  END DO
1688 
1689  DEALLOCATE (bia)
1690  ALLOCATE (bia(virt_i, virt_j))
1691 
1692  DO index_ij = 1, elements_ij_proc
1693 
1694  DO a = 1, virt_i
1695  DO b = 1, virt_j
1696  b_global = b + occ_j
1697  parz = zero
1698  DO s = 1, dimen
1699  parz = parz + c_j(s, b_global)*bib(s, a, index_ij)
1700  END DO
1701  bia(a, b) = parz
1702  END DO
1703  END DO
1704 
1705  bib(1:virt_i, 1:virt_j, index_ij) = bia
1706 
1707  END DO
1708 
1709  DO index_ij = 1, elements_ij_proc
1710  i = ij_list_proc(index_ij, 1)
1711  j = ij_list_proc(index_ij, 2)
1712  i_global = i + i_batch_start
1713  j_global = j + j_batch_start
1714  DO a = 1, virt_i
1715  a_global = a + occ_i
1716  DO b = 1, virt_j
1717  b_global = b + occ_j
1718  iajb = bib(a, b, index_ij)
1719  parz = iajb*iajb/(auto_i(i_global) + auto_j(j_global) - auto_i(a_global) - auto_j(b_global))
1720  emp2_cou = emp2_cou + parz/two
1721  emp2 = emp2 + parz/two
1722  END DO
1723  END DO
1724  END DO
1725 
1726  DEALLOCATE (bia)
1727 
1728  CALL timestop(handle)
1729 
1730  END SUBROUTINE transform_virtual_orbitals_and_accumulate_abcase
1731 
1732 END MODULE mp2_direct_method
static GRID_HOST_DEVICE int modulo(int a, int m)
Equivalent of Fortran's MODULO, which always return a positive number. https://gcc....
Definition: grid_common.h:117
static void dgemm(const char transa, const char transb, const int m, const int n, const int k, const double alpha, const double *a, const int lda, const double *b, const int ldb, const double beta, double *c, const int ldc)
Convenient wrapper to hide Fortran nature of dgemm_, swapping a and b.
Handles all functions related to the CELL.
Definition: cell_types.F:15
various routines to log and control the output. The idea is that decisions about where to log should ...
type(cp_logger_type) function, pointer, public cp_get_default_logger()
returns the default logger
Routines to calculate HFX energy and potential.
subroutine, public coulomb4(lib, ra, rb, rc, rd, npgfa, npgfb, npgfc, npgfd, la_min, la_max, lb_min, lb_max, lc_min, lc_max, ld_min, ld_max, nsgfa, nsgfb, nsgfc, nsgfd, sphi_a_u1, sphi_a_u2, sphi_a_u3, sphi_b_u1, sphi_b_u2, sphi_b_u3, sphi_c_u1, sphi_c_u2, sphi_c_u3, sphi_d_u1, sphi_d_u2, sphi_d_u3, zeta, zetb, zetc, zetd, primitive_integrals, potential_parameter, neighbor_cells, screen1, screen2, eps_schwarz, max_contraction_val, cart_estimate, cell, neris_tmp, log10_pmax, log10_eps_schwarz, R1_pgf, R2_pgf, pgf1, pgf2, pgf_list_ij, pgf_list_kl, pgf_product_list, nsgfl_a, nsgfl_b, nsgfl_c, nsgfl_d, sphi_a_ext, sphi_b_ext, sphi_c_ext, sphi_d_ext, ee_work, ee_work2, ee_buffer1, ee_buffer2, ee_primitives_tmp, nimages, do_periodic, p_work)
calculates two-electron integrals of a quartet/shell using the library lib_int in the periodic case
Routines for optimizing load balance between processes in HFX calculations.
real(kind=dp), dimension(12), parameter, public p1_energy
real(kind=dp), dimension(2), parameter, public p3_energy
real(kind=dp), dimension(12), parameter, public p2_energy
integer(kind=int_8) function, public cost_model(nsa, nsb, nsc, nsd, npgfa, npgfb, npgfc, npgfd, ratio, p1, p2, p3)
estimates the cost of a set quartet with info available at load balance time i.e. without much info o...
Routines for optimizing load balance between processes in HFX calculations.
subroutine, public build_pair_list_mp2(natom, list, set_list, i_start, i_end, j_start, j_end, kind_of, basis_parameter, particle_set, do_periodic, coeffs_set, coeffs_kind, coeffs_kind_max0, log10_eps_schwarz, cell, pmax_blocks, atomic_pair_list, skip_atom_symmetry)
...
Types and set/get functions for HFX.
Definition: hfx_types.F:15
collects all constants needed in input so that they can be used without circular dependencies
integer, parameter, public do_potential_tshpsc
Defines the basic variable types.
Definition: kinds.F:23
integer, parameter, public int_8
Definition: kinds.F:54
integer, parameter, public dp
Definition: kinds.F:34
Interface to the Libint-Library or a c++ wrapper.
Machine interface based on Fortran 2003 and POSIX.
Definition: machine.F:17
subroutine, public m_flush(lunit)
flushes units if the &GLOBAL flag is set accordingly
Definition: machine.F:106
Definition of mathematical constants and functions.
Definition: mathconstants.F:16
real(kind=dp), parameter, public zero
Interface to the message passing library MPI.
subroutine, public mp_para_env_release(para_env)
releases the para object (to be called when you don't want anymore the shared copy of this object)
Routines to calculate MP2 energy.
subroutine, public mp2_direct_energy(dimen, occ_i, occ_j, mp2_biel, mp2_env, C_i, Auto_i, Emp2, Emp2_Cou, Emp2_ex, qs_env, para_env, unit_nr, C_j, Auto_j)
...
subroutine, public mp2_canonical_direct_single_batch(Emp2, Emp2_Cou, Emp2_ex, mp2_env, qs_env, para_env, mp2_biel, dimen, C, Auto, i_batch_start, Ni_occupied, occupied, elements_ij_proc, ij_list_proc, Nj_occupied, j_batch_start, occupied_beta, C_beta, Auto_beta, Integ_MP2)
...
Routines to calculate the 3 and 2 center ERI's needed in the RI approximation using libint.
Definition: mp2_ri_libint.F:15
subroutine, public prepare_integral_calc(cell, qs_env, mp2_env, para_env, mp2_potential_parameter, actual_x_data, do_periodic, basis_parameter, max_set, particle_set, natom, kind_of, nsgf_max, primitive_integrals, ee_work, ee_work2, ee_buffer1, ee_buffer2, ee_primitives_tmp, nspins, max_contraction, max_pgf, pgf_list_ij, pgf_list_kl, pgf_product_list, nimages, eps_schwarz, log10_eps_schwarz, private_lib, p_work, screen_coeffs_set, screen_coeffs_kind, screen_coeffs_pgf, radii_pgf, RI_basis_parameter, RI_basis_info)
...
Types needed for MP2 calculations.
Definition: mp2_types.F:14
integer, save, public init_tshpsc_lmax
Definition: mp2_types.F:71
Provides Cartesian and spherical orbital pointers and indices.
integer, dimension(:), allocatable, public ncoset
Define the data structure for the particle information.
subroutine, public free_c0()
...
Definition: t_sh_p_s_c.F:1142