(git:ccc2433)
cp_cfm_types.F
Go to the documentation of this file.
1 !--------------------------------------------------------------------------------------------------!
2 ! CP2K: A general program to perform molecular dynamics simulations !
3 ! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4 ! !
5 ! SPDX-License-Identifier: GPL-2.0-or-later !
6 !--------------------------------------------------------------------------------------------------!
7 
8 ! **************************************************************************************************
9 !> \brief Represents a complex full matrix distributed on many processors.
10 !> \author Joost VandeVondele, based on Fawzi's cp_fm_* routines
11 ! **************************************************************************************************
13  USE cp_blacs_env, ONLY: cp_blacs_env_type
18  cp_fm_struct_type
19  USE cp_fm_types, ONLY: cp_fm_type
20  USE kinds, ONLY: dp
21  USE mathconstants, ONLY: z_one,&
22  z_zero
25  mp_para_env_type,&
26  mp_proc_null,&
28  mp_request_type,&
29  mp_waitall
30 #include "../base/base_uses.f90"
31 
32  IMPLICIT NONE
33  PRIVATE
34 
35  LOGICAL, PRIVATE, PARAMETER :: debug_this_module = .true.
36  CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_cfm_types'
37  INTEGER, PARAMETER, PRIVATE :: src_tag = 3, dest_tag = 5, send_tag = 7, recv_tag = 11
38 
39  PUBLIC :: cp_cfm_type, cp_cfm_p_type, copy_cfm_info_type
40  PUBLIC :: cp_cfm_cleanup_copy_general, &
41  cp_cfm_create, &
51  cp_cfm_to_cfm, &
52  cp_cfm_to_fm, &
54 
55  INTERFACE cp_cfm_to_cfm
56  MODULE PROCEDURE cp_cfm_to_cfm_matrix, & ! a full matrix
57  cp_cfm_to_cfm_columns ! just a number of columns
58  END INTERFACE
59 
60 ! **************************************************************************************************
61 !> \brief Represent a complex full matrix.
62 !> \param name the name of the matrix, used for printing
63 !> \param matrix_struct structure of this matrix
64 !> \param local_data array with the data of the matrix (its content depends on
65 !> the matrix type used: in parallel run it will be in
66 !> ScaLAPACK format, in sequential run it will simply contain the matrix)
67 ! **************************************************************************************************
68  TYPE cp_cfm_type
69  CHARACTER(len=60) :: name = ""
70  TYPE(cp_fm_struct_type), POINTER :: matrix_struct => null()
71  COMPLEX(kind=dp), DIMENSION(:, :), POINTER, CONTIGUOUS :: local_data => null()
72  END TYPE cp_cfm_type
73 
74 ! **************************************************************************************************
75 !> \brief Just to build arrays of pointers to matrices.
76 !> \param matrix the pointer to the matrix
77 ! **************************************************************************************************
78  TYPE cp_cfm_p_type
79  TYPE(cp_cfm_type), POINTER :: matrix => null()
80  END TYPE cp_cfm_p_type
81 
82 ! **************************************************************************************************
83 !> \brief Stores the state of a copy between cp_cfm_start_copy_general
84 !> and cp_cfm_finish_copy_general.
85 !> \par History
86 !> Jan 2017 derived type 'copy_info_type' has been created [Mark T]
87 !> Jan 2018 the type 'copy_info_type' has been adapted for complex matrices [Sergey Chulkov]
88 ! **************************************************************************************************
89  TYPE copy_cfm_info_type
90  !> number of MPI processes that send data
91  INTEGER :: send_size = -1
92  !> number of locally stored rows (1) and columns (2) of the destination matrix
93  INTEGER, DIMENSION(2) :: nlocal_recv = -1
94  !> number of rows (1) and columns (2) of the ScaLAPACK block of the source matrix
95  INTEGER, DIMENSION(2) :: nblock_src = -1
96  !> BLACS process grid shape of the source matrix: (1) nproc_row, (2) nproc_col
97  INTEGER, DIMENSION(2) :: src_num_pe = -1
98  !> displacements into recv_buf
99  INTEGER, ALLOCATABLE, DIMENSION(:) :: recv_disp
100  !> MPI requests for non-blocking receive and send operations
101  TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:) :: recv_request, send_request
102  !> global column and row indices of locally stored elements of the destination matrix
103  INTEGER, DIMENSION(:), POINTER :: recv_col_indices => null(), recv_row_indices => null()
104  !> rank of MPI process with BLACS coordinates (prow, pcol)
105  INTEGER, ALLOCATABLE, DIMENSION(:, :) :: src_blacs2mpi
106  !> receiving and sending buffers for non-blocking MPI communication
107  COMPLEX(kind=dp), ALLOCATABLE, DIMENSION(:) :: recv_buf, send_buf
108  END TYPE copy_cfm_info_type
109 
110 CONTAINS
111 
112 ! **************************************************************************************************
113 !> \brief Creates a new full matrix with the given structure.
114 !> \param matrix matrix to be created
115 !> \param matrix_struct structure of the matrix
116 !> \param name name of the matrix
117 !> \note
118 !> preferred allocation routine
119 ! **************************************************************************************************
120  SUBROUTINE cp_cfm_create(matrix, matrix_struct, name)
121  TYPE(cp_cfm_type), INTENT(OUT) :: matrix
122  TYPE(cp_fm_struct_type), TARGET, INTENT(IN) :: matrix_struct
123  CHARACTER(len=*), INTENT(in), OPTIONAL :: name
124 
125  INTEGER :: ncol_local, npcol, nprow, nrow_local
126  TYPE(cp_blacs_env_type), POINTER :: context
127 
128 #if defined(__parallel) && ! defined(__SCALAPACK)
129  cpabort("full matrices need ScaLAPACK for parallel run")
130 #endif
131 
132  context => matrix_struct%context
133  matrix%matrix_struct => matrix_struct
134  CALL cp_fm_struct_retain(matrix%matrix_struct)
135 
136  nprow = context%num_pe(1)
137  npcol = context%num_pe(2)
138  NULLIFY (matrix%local_data)
139 
140  nrow_local = matrix_struct%local_leading_dimension
141  ncol_local = max(1, matrix_struct%ncol_locals(context%mepos(2)))
142  ALLOCATE (matrix%local_data(nrow_local, ncol_local))
143 
144  ! do not initialise created matrix as it is up to the user to do so
145  !CALL zcopy(nrow_local*ncol_local, z_zero, 0, matrix%local_data, 1)
146 
147  IF (PRESENT(name)) THEN
148  matrix%name = name
149  ELSE
150  matrix%name = 'full complex matrix'
151  END IF
152  END SUBROUTINE cp_cfm_create
153 
154 ! **************************************************************************************************
155 !> \brief Releases a full matrix.
156 !> \param matrix the matrix to release
157 ! **************************************************************************************************
158  SUBROUTINE cp_cfm_release(matrix)
159  TYPE(cp_cfm_type), INTENT(INOUT) :: matrix
160 
161  IF (ASSOCIATED(matrix%local_data)) THEN
162  DEALLOCATE (matrix%local_data)
163  END IF
164  matrix%name = ""
165  CALL cp_fm_struct_release(matrix%matrix_struct)
166  END SUBROUTINE cp_cfm_release
167 
168 ! **************************************************************************************************
169 !> \brief Set all elements of the full matrix to alpha. Besides, set all
170 !> diagonal matrix elements to beta (if given explicitly).
171 !> \param matrix matrix to initialise
172 !> \param alpha value of off-diagonal matrix elements
173 !> \param beta value of diagonal matrix elements (equal to alpha if absent)
174 !> \date 12.06.2001
175 !> \author Matthias Krack
176 !> \version 1.0
177 ! **************************************************************************************************
178  SUBROUTINE cp_cfm_set_all(matrix, alpha, beta)
179  TYPE(cp_cfm_type), INTENT(IN) :: matrix
180  COMPLEX(kind=dp), INTENT(in) :: alpha
181  COMPLEX(kind=dp), INTENT(in), OPTIONAL :: beta
182 
183  INTEGER :: irow_local, nrow_local
184 #if defined(__SCALAPACK)
185  INTEGER :: icol_local, ncol_local
186  INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
187 #endif
188 
189  CALL zcopy(SIZE(matrix%local_data), alpha, 0, matrix%local_data(1, 1), 1)
190 
191  IF (PRESENT(beta)) THEN
192 #if defined(__SCALAPACK)
193  CALL cp_cfm_get_info(matrix, nrow_local=nrow_local, ncol_local=ncol_local, &
194  row_indices=row_indices, col_indices=col_indices)
195 
196  icol_local = 1
197  irow_local = 1
198 
199  DO WHILE (irow_local <= nrow_local .AND. icol_local <= ncol_local)
200  IF (row_indices(irow_local) < col_indices(icol_local)) THEN
201  irow_local = irow_local + 1
202  ELSE IF (row_indices(irow_local) > col_indices(icol_local)) THEN
203  icol_local = icol_local + 1
204  ELSE
205  matrix%local_data(irow_local, icol_local) = beta
206  irow_local = irow_local + 1
207  icol_local = icol_local + 1
208  END IF
209  END DO
210 #else
211  nrow_local = min(matrix%matrix_struct%nrow_global, matrix%matrix_struct%ncol_global)
212 
213  DO irow_local = 1, nrow_local
214  matrix%local_data(irow_local, irow_local) = beta
215  END DO
216 #endif
217  END IF
218 
219  END SUBROUTINE cp_cfm_set_all
220 
221 ! **************************************************************************************************
222 !> \brief Get the matrix element by its global index.
223 !> \param matrix full matrix
224 !> \param irow_global global row index
225 !> \param icol_global global column index
226 !> \param alpha matrix element
227 !> \par History
228 !> , TCH, created
229 !> always return the answer
230 ! **************************************************************************************************
231  SUBROUTINE cp_cfm_get_element(matrix, irow_global, icol_global, alpha)
232  TYPE(cp_cfm_type), INTENT(IN) :: matrix
233  INTEGER, INTENT(in) :: irow_global, icol_global
234  COMPLEX(kind=dp), INTENT(out) :: alpha
235 
236 #if defined(__SCALAPACK)
237  INTEGER :: icol_local, ipcol, iprow, irow_local, &
238  mypcol, myprow, npcol, nprow
239  INTEGER, DIMENSION(9) :: desca
240  TYPE(cp_blacs_env_type), POINTER :: context
241 #endif
242 
243 #if defined(__SCALAPACK)
244  context => matrix%matrix_struct%context
245  myprow = context%mepos(1)
246  mypcol = context%mepos(2)
247  nprow = context%num_pe(1)
248  npcol = context%num_pe(2)
249 
250  desca(:) = matrix%matrix_struct%descriptor(:)
251 
252  CALL infog2l(irow_global, icol_global, desca, nprow, npcol, myprow, mypcol, &
253  irow_local, icol_local, iprow, ipcol)
254 
255  IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
256  alpha = matrix%local_data(irow_local, icol_local)
257  CALL context%ZGEBS2D('All', ' ', 1, 1, alpha, 1)
258  ELSE
259  CALL context%ZGEBR2D('All', ' ', 1, 1, alpha, 1, iprow, ipcol)
260  END IF
261 
262 #else
263  alpha = matrix%local_data(irow_global, icol_global)
264 #endif
265 
266  END SUBROUTINE cp_cfm_get_element
267 
268 ! **************************************************************************************************
269 !> \brief Set the matrix element (irow_global,icol_global) of the full matrix to alpha.
270 !> \param matrix full matrix
271 !> \param irow_global global row index
272 !> \param icol_global global column index
273 !> \param alpha value of the matrix element
274 !> \date 12.06.2001
275 !> \author Matthias Krack
276 !> \version 1.0
277 ! **************************************************************************************************
278  SUBROUTINE cp_cfm_set_element(matrix, irow_global, icol_global, alpha)
279  TYPE(cp_cfm_type), INTENT(IN) :: matrix
280  INTEGER, INTENT(in) :: irow_global, icol_global
281  COMPLEX(kind=dp), INTENT(in) :: alpha
282 
283 #if defined(__SCALAPACK)
284  INTEGER :: icol_local, ipcol, iprow, irow_local, &
285  mypcol, myprow, npcol, nprow
286  INTEGER, DIMENSION(9) :: desca
287  TYPE(cp_blacs_env_type), POINTER :: context
288 #endif
289 
290 #if defined(__SCALAPACK)
291  context => matrix%matrix_struct%context
292  myprow = context%mepos(1)
293  mypcol = context%mepos(2)
294  nprow = context%num_pe(1)
295  npcol = context%num_pe(2)
296 
297  desca(:) = matrix%matrix_struct%descriptor(:)
298 
299  CALL infog2l(irow_global, icol_global, desca, nprow, npcol, myprow, mypcol, &
300  irow_local, icol_local, iprow, ipcol)
301 
302  IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
303  matrix%local_data(irow_local, icol_local) = alpha
304  END IF
305 
306 #else
307  matrix%local_data(irow_global, icol_global) = alpha
308 #endif
309 
310  END SUBROUTINE cp_cfm_set_element
311 
312 ! **************************************************************************************************
313 !> \brief Extract a sub-matrix from the full matrix:
314 !> op(target_m)(1:n_rows,1:n_cols) = fm(start_row:start_row+n_rows,start_col:start_col+n_cols).
315 !> Sub-matrix 'target_m' is replicated on each CPU. Using this call is expensive.
316 !> \param fm full matrix you want to get the elements from
317 !> \param target_m 2-D array to store the extracted sub-matrix
318 !> \param start_row global row index of the matrix element target_m(1,1) (defaults to 1)
319 !> \param start_col global column index of the matrix element target_m(1,1) (defaults to 1)
320 !> \param n_rows number of rows to extract (defaults to size(op(target_m),1))
321 !> \param n_cols number of columns to extract (defaults to size(op(target_m),2))
322 !> \param transpose indicates that the extracted sub-matrix target_m should be transposed:
323 !> op(target_m) = target_m^T if .TRUE.,
324 !> op(target_m) = target_m if .FALSE. (defaults to false)
325 !> \par History
326 !> * 04.2016 created borrowing from Fawzi's cp_fm_get_submatrix [Lianheng Tong]
327 !> * 01.2018 drop innermost conditional branching [Sergey Chulkov]
328 !> \author Lianheng Tong
329 !> \note
330 !> Optimized for full column updates. The matrix target_m is replicated and valid on all CPUs.
331 ! **************************************************************************************************
332  SUBROUTINE cp_cfm_get_submatrix(fm, target_m, start_row, start_col, n_rows, n_cols, transpose)
333  TYPE(cp_cfm_type), INTENT(IN) :: fm
334  COMPLEX(kind=dp), DIMENSION(:, :), INTENT(out) :: target_m
335  INTEGER, INTENT(in), OPTIONAL :: start_row, start_col, n_rows, n_cols
336  LOGICAL, INTENT(in), OPTIONAL :: transpose
337 
338  CHARACTER(len=*), PARAMETER :: routinen = 'cp_cfm_get_submatrix'
339 
340  COMPLEX(kind=dp), DIMENSION(:, :), POINTER :: local_data
341  INTEGER :: end_col_global, end_col_local, end_row_global, end_row_local, handle, i, j, &
342  ncol_global, ncol_local, nrow_global, nrow_local, start_col_global, start_col_local, &
343  start_row_global, start_row_local, this_col
344  INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
345  LOGICAL :: do_zero, tr_a
346  TYPE(mp_para_env_type), POINTER :: para_env
347 
348  CALL timeset(routinen, handle)
349 
350  IF (SIZE(target_m) /= 0) THEN
351 #if defined(__SCALAPACK)
352  do_zero = .true.
353 #else
354  do_zero = .false.
355 #endif
356 
357  tr_a = .false.
358  IF (PRESENT(transpose)) tr_a = transpose
359 
360  ! find out the first and last global row/column indices
361  start_row_global = 1
362  start_col_global = 1
363  IF (PRESENT(start_row)) start_row_global = start_row
364  IF (PRESENT(start_col)) start_col_global = start_col
365 
366  IF (tr_a) THEN
367  end_row_global = SIZE(target_m, 2)
368  end_col_global = SIZE(target_m, 1)
369  ELSE
370  end_row_global = SIZE(target_m, 1)
371  end_col_global = SIZE(target_m, 2)
372  END IF
373  IF (PRESENT(n_rows)) end_row_global = n_rows
374  IF (PRESENT(n_cols)) end_col_global = n_cols
375 
376  end_row_global = end_row_global + start_row_global - 1
377  end_col_global = end_col_global + start_col_global - 1
378 
379  CALL cp_cfm_get_info(matrix=fm, &
380  nrow_global=nrow_global, ncol_global=ncol_global, &
381  nrow_local=nrow_local, ncol_local=ncol_local, &
382  row_indices=row_indices, col_indices=col_indices)
383  IF (end_row_global > nrow_global) THEN
384  end_row_global = nrow_global
385  do_zero = .true.
386  END IF
387  IF (end_col_global > ncol_global) THEN
388  end_col_global = ncol_global
389  do_zero = .true.
390  END IF
391 
392  ! find out row/column indices of locally stored matrix elements that needs to be copied.
393  ! Arrays row_indices and col_indices are assumed to be sorted in ascending order
394  DO start_row_local = 1, nrow_local
395  IF (row_indices(start_row_local) >= start_row_global) EXIT
396  END DO
397 
398  DO end_row_local = start_row_local, nrow_local
399  IF (row_indices(end_row_local) > end_row_global) EXIT
400  END DO
401  end_row_local = end_row_local - 1
402 
403  DO start_col_local = 1, ncol_local
404  IF (col_indices(start_col_local) >= start_col_global) EXIT
405  END DO
406 
407  DO end_col_local = start_col_local, ncol_local
408  IF (col_indices(end_col_local) > end_col_global) EXIT
409  END DO
410  end_col_local = end_col_local - 1
411 
412  para_env => fm%matrix_struct%para_env
413  local_data => fm%local_data
414 
415  ! wipe the content of the target matrix if:
416  ! * the source matrix is distributed across a number of processes, or
417  ! * not all elements of the target matrix will be assigned, e.g.
418  ! when the target matrix is larger then the source matrix
419  IF (do_zero) &
420  CALL zcopy(SIZE(target_m), z_zero, 0, target_m(1, 1), 1)
421 
422  IF (tr_a) THEN
423  DO j = start_col_local, end_col_local
424  this_col = col_indices(j) - start_col_global + 1
425  DO i = start_row_local, end_row_local
426  target_m(this_col, row_indices(i) - start_row_global + 1) = local_data(i, j)
427  END DO
428  END DO
429  ELSE
430  DO j = start_col_local, end_col_local
431  this_col = col_indices(j) - start_col_global + 1
432  DO i = start_row_local, end_row_local
433  target_m(row_indices(i) - start_row_global + 1, this_col) = local_data(i, j)
434  END DO
435  END DO
436  END IF
437 
438  CALL para_env%sum(target_m)
439  END IF
440 
441  CALL timestop(handle)
442  END SUBROUTINE cp_cfm_get_submatrix
443 
444 ! **************************************************************************************************
445 !> \brief Set a sub-matrix of the full matrix:
446 !> matrix(start_row:start_row+n_rows,start_col:start_col+n_cols)
447 !> = alpha*op(new_values)(1:n_rows,1:n_cols) +
448 !> beta*matrix(start_row:start_row+n_rows,start_col:start_col+n_cols)
449 !> \param matrix full to update
450 !> \param new_values replicated 2-D array that holds new elements of the updated sub-matrix
451 !> \param start_row global row index of the matrix element new_values(1,1) (defaults to 1)
452 !> \param start_col global column index of the matrix element new_values(1,1) (defaults to 1)
453 !> \param n_rows number of rows to update (defaults to size(op(new_values),1))
454 !> \param n_cols number of columns to update (defaults to size(op(new_values),2))
455 !> \param alpha scale factor for the new values (defaults to (1.0,0.0))
456 !> \param beta scale factor for the old values (defaults to (0.0,0.0))
457 !> \param transpose indicates that the matrix new_values should be transposed:
458 !> op(new_values) = new_values^T if .TRUE.,
459 !> op(new_values) = new_values if .FALSE. (defaults to false)
460 !> \par History
461 !> * 04.2016 created borrowing from Fawzi's cp_fm_set_submatrix [Lianheng Tong]
462 !> * 01.2018 drop innermost conditional branching [Sergey Chulkov]
463 !> \author Lianheng Tong
464 !> \note
465 !> Optimized for alpha=(1.0,0.0), beta=(0.0,0.0)
466 !> All matrix elements 'new_values' need to be valid on all CPUs
467 ! **************************************************************************************************
468  SUBROUTINE cp_cfm_set_submatrix(matrix, new_values, start_row, &
469  start_col, n_rows, n_cols, alpha, beta, transpose)
470  TYPE(cp_cfm_type), INTENT(IN) :: matrix
471  COMPLEX(kind=dp), DIMENSION(:, :), INTENT(in) :: new_values
472  INTEGER, INTENT(in), OPTIONAL :: start_row, start_col, n_rows, n_cols
473  COMPLEX(kind=dp), INTENT(in), OPTIONAL :: alpha, beta
474  LOGICAL, INTENT(in), OPTIONAL :: transpose
475 
476  CHARACTER(len=*), PARAMETER :: routinen = 'cp_cfm_set_submatrix'
477 
478  COMPLEX(kind=dp) :: al, be
479  COMPLEX(kind=dp), DIMENSION(:, :), POINTER :: local_data
480  INTEGER :: end_col_global, end_col_local, end_row_global, end_row_local, handle, i, j, &
481  ncol_global, ncol_local, nrow_global, nrow_local, start_col_global, start_col_local, &
482  start_row_global, start_row_local, this_col
483  INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
484  LOGICAL :: tr_a
485 
486  CALL timeset(routinen, handle)
487 
488  al = z_one
489  be = z_zero
490  IF (PRESENT(alpha)) al = alpha
491  IF (PRESENT(beta)) be = beta
492 
493  ! find out the first and last global row/column indices
494  start_row_global = 1
495  start_col_global = 1
496  IF (PRESENT(start_row)) start_row_global = start_row
497  IF (PRESENT(start_col)) start_col_global = start_col
498 
499  tr_a = .false.
500  IF (PRESENT(transpose)) tr_a = transpose
501 
502  IF (tr_a) THEN
503  end_row_global = SIZE(new_values, 2)
504  end_col_global = SIZE(new_values, 1)
505  ELSE
506  end_row_global = SIZE(new_values, 1)
507  end_col_global = SIZE(new_values, 2)
508  END IF
509  IF (PRESENT(n_rows)) end_row_global = n_rows
510  IF (PRESENT(n_cols)) end_col_global = n_cols
511 
512  end_row_global = end_row_global + start_row_global - 1
513  end_col_global = end_col_global + start_col_global - 1
514 
515  CALL cp_cfm_get_info(matrix=matrix, &
516  nrow_global=nrow_global, ncol_global=ncol_global, &
517  nrow_local=nrow_local, ncol_local=ncol_local, &
518  row_indices=row_indices, col_indices=col_indices)
519  IF (end_row_global > nrow_global) end_row_global = nrow_global
520  IF (end_col_global > ncol_global) end_col_global = ncol_global
521 
522  ! find out row/column indices of locally stored matrix elements that needs to be set.
523  ! Arrays row_indices and col_indices are assumed to be sorted in ascending order
524  DO start_row_local = 1, nrow_local
525  IF (row_indices(start_row_local) >= start_row_global) EXIT
526  END DO
527 
528  DO end_row_local = start_row_local, nrow_local
529  IF (row_indices(end_row_local) > end_row_global) EXIT
530  END DO
531  end_row_local = end_row_local - 1
532 
533  DO start_col_local = 1, ncol_local
534  IF (col_indices(start_col_local) >= start_col_global) EXIT
535  END DO
536 
537  DO end_col_local = start_col_local, ncol_local
538  IF (col_indices(end_col_local) > end_col_global) EXIT
539  END DO
540  end_col_local = end_col_local - 1
541 
542  local_data => matrix%local_data
543 
544  IF (al == z_one .AND. be == z_zero) THEN
545  IF (tr_a) THEN
546  DO j = start_col_local, end_col_local
547  this_col = col_indices(j) - start_col_global + 1
548  DO i = start_row_local, end_row_local
549  local_data(i, j) = new_values(this_col, row_indices(i) - start_row_global + 1)
550  END DO
551  END DO
552  ELSE
553  DO j = start_col_local, end_col_local
554  this_col = col_indices(j) - start_col_global + 1
555  DO i = start_row_local, end_row_local
556  local_data(i, j) = new_values(row_indices(i) - start_row_global + 1, this_col)
557  END DO
558  END DO
559  END IF
560  ELSE
561  IF (tr_a) THEN
562  DO j = start_col_local, end_col_local
563  this_col = col_indices(j) - start_col_global + 1
564  DO i = start_row_local, end_row_local
565  local_data(i, j) = al*new_values(this_col, row_indices(i) - start_row_global + 1) + &
566  be*local_data(i, j)
567  END DO
568  END DO
569  ELSE
570  DO j = start_col_local, end_col_local
571  this_col = col_indices(j) - start_col_global + 1
572  DO i = start_row_local, end_row_local
573  local_data(i, j) = al*new_values(row_indices(i) - start_row_global + 1, this_col) + &
574  be*local_data(i, j)
575  END DO
576  END DO
577  END IF
578  END IF
579 
580  CALL timestop(handle)
581  END SUBROUTINE cp_cfm_set_submatrix
582 
583 ! **************************************************************************************************
584 !> \brief Returns information about a full matrix.
585 !> \param matrix matrix
586 !> \param name name of the matrix
587 !> \param nrow_global total number of rows
588 !> \param ncol_global total number of columns
589 !> \param nrow_block number of rows per ScaLAPACK block
590 !> \param ncol_block number of columns per ScaLAPACK block
591 !> \param nrow_local number of locally stored rows
592 !> \param ncol_local number of locally stored columns
593 !> \param row_indices global indices of locally stored rows
594 !> \param col_indices global indices of locally stored columns
595 !> \param local_data locally stored matrix elements
596 !> \param context BLACS context
597 !> \param matrix_struct matrix structure
598 !> \param para_env parallel environment
599 !> \date 12.06.2001
600 !> \author Matthias Krack
601 !> \version 1.0
602 ! **************************************************************************************************
603  SUBROUTINE cp_cfm_get_info(matrix, name, nrow_global, ncol_global, &
604  nrow_block, ncol_block, nrow_local, ncol_local, &
605  row_indices, col_indices, local_data, context, &
606  matrix_struct, para_env)
607  TYPE(cp_cfm_type), INTENT(IN) :: matrix
608  CHARACTER(len=*), INTENT(OUT), OPTIONAL :: name
609  INTEGER, INTENT(OUT), OPTIONAL :: nrow_global, ncol_global, nrow_block, &
610  ncol_block, nrow_local, ncol_local
611  INTEGER, DIMENSION(:), OPTIONAL, POINTER :: row_indices, col_indices
612  COMPLEX(kind=dp), CONTIGUOUS, DIMENSION(:, :), &
613  OPTIONAL, POINTER :: local_data
614  TYPE(cp_blacs_env_type), OPTIONAL, POINTER :: context
615  TYPE(cp_fm_struct_type), OPTIONAL, POINTER :: matrix_struct
616  TYPE(mp_para_env_type), OPTIONAL, POINTER :: para_env
617 
618  IF (PRESENT(name)) name = matrix%name
619  IF (PRESENT(matrix_struct)) matrix_struct => matrix%matrix_struct
620  IF (PRESENT(local_data)) local_data => matrix%local_data ! not hiding things anymore :-(
621 
622  CALL cp_fm_struct_get(matrix%matrix_struct, nrow_local=nrow_local, &
623  ncol_local=ncol_local, nrow_global=nrow_global, &
624  ncol_global=ncol_global, nrow_block=nrow_block, &
625  ncol_block=ncol_block, context=context, &
626  row_indices=row_indices, col_indices=col_indices, para_env=para_env)
627 
628  END SUBROUTINE cp_cfm_get_info
629 
630 ! **************************************************************************************************
631 !> \brief Copy content of a full matrix into another full matrix of the same size.
632 !> \param source source matrix
633 !> \param destination destination matrix
634 !> \author Joost VandeVondele
635 ! **************************************************************************************************
636  SUBROUTINE cp_cfm_to_cfm_matrix(source, destination)
637  TYPE(cp_cfm_type), INTENT(IN) :: source, destination
638 
639  INTEGER :: npcol, nprow
640 
641  nprow = source%matrix_struct%context%num_pe(1)
642  npcol = source%matrix_struct%context%num_pe(2)
643 
644  IF (.NOT. cp2k_is_parallel .OR. &
645  cp_fm_struct_equivalent(source%matrix_struct, &
646  destination%matrix_struct)) THEN
647  IF (SIZE(source%local_data, 1) /= SIZE(destination%local_data, 1) .OR. &
648  SIZE(source%local_data, 2) /= SIZE(destination%local_data, 2)) &
649  cpabort("internal local_data has different sizes")
650  CALL zcopy(SIZE(source%local_data), source%local_data(1, 1), 1, destination%local_data(1, 1), 1)
651  ELSE
652  IF (source%matrix_struct%nrow_global /= destination%matrix_struct%nrow_global) &
653  cpabort("cannot copy between full matrixes of differen sizes")
654  IF (source%matrix_struct%ncol_global /= destination%matrix_struct%ncol_global) &
655  cpabort("cannot copy between full matrixes of differen sizes")
656 #if defined(__SCALAPACK)
657  CALL pzcopy(source%matrix_struct%nrow_global* &
658  source%matrix_struct%ncol_global, &
659  source%local_data(1, 1), 1, 1, source%matrix_struct%descriptor, 1, &
660  destination%local_data(1, 1), 1, 1, destination%matrix_struct%descriptor, 1)
661 #else
662  cpabort("")
663 #endif
664  END IF
665  END SUBROUTINE cp_cfm_to_cfm_matrix
666 
667 ! **************************************************************************************************
668 !> \brief Copy a number of sequential columns of a full matrix into another full matrix.
669 !> \param msource source matrix
670 !> \param mtarget destination matrix
671 !> \param ncol number of columns to copy
672 !> \param source_start global index of the first column to copy within the source matrix
673 !> \param target_start global index of the first column to copy within the destination matrix
674 ! **************************************************************************************************
675  SUBROUTINE cp_cfm_to_cfm_columns(msource, mtarget, ncol, source_start, &
676  target_start)
677 
678  TYPE(cp_cfm_type), INTENT(IN) :: msource, mtarget
679  INTEGER, INTENT(IN) :: ncol
680  INTEGER, INTENT(IN), OPTIONAL :: source_start, target_start
681 
682  CHARACTER(len=*), PARAMETER :: routinen = 'cp_cfm_to_cfm_columns'
683 
684  COMPLEX(kind=dp), DIMENSION(:, :), POINTER :: a, b
685  INTEGER :: handle, n, ss, ts
686 #if defined(__SCALAPACK)
687  INTEGER :: i
688  INTEGER, DIMENSION(9) :: desca, descb
689 #endif
690 
691  CALL timeset(routinen, handle)
692 
693  ss = 1
694  ts = 1
695 
696  IF (PRESENT(source_start)) ss = source_start
697  IF (PRESENT(target_start)) ts = target_start
698 
699  n = msource%matrix_struct%nrow_global
700 
701  a => msource%local_data
702  b => mtarget%local_data
703 
704 #if defined(__SCALAPACK)
705  desca(:) = msource%matrix_struct%descriptor(:)
706  descb(:) = mtarget%matrix_struct%descriptor(:)
707  DO i = 0, ncol - 1
708  CALL pzcopy(n, a(1, 1), 1, ss + i, desca, 1, b(1, 1), 1, ts + i, descb, 1)
709  END DO
710 #else
711  CALL zcopy(ncol*n, a(1, ss), 1, b(1, ts), 1)
712 #endif
713 
714  CALL timestop(handle)
715 
716  END SUBROUTINE cp_cfm_to_cfm_columns
717 
718 ! **************************************************************************************************
719 !> \brief Copy just a triangular matrix.
720 !> \param msource source matrix
721 !> \param mtarget target matrix
722 !> \param uplo 'U' for upper triangular, 'L' for lower triangular
723 ! **************************************************************************************************
724  SUBROUTINE cp_cfm_to_cfm_triangular(msource, mtarget, uplo)
725  TYPE(cp_cfm_type), INTENT(IN) :: msource, mtarget
726  CHARACTER(len=*), INTENT(IN) :: uplo
727 
728  CHARACTER(len=*), PARAMETER :: routinen = 'cp_cfm_to_cfm_triangular'
729 
730  COMPLEX(kind=dp), DIMENSION(:, :), POINTER :: aa, bb
731  INTEGER :: handle, ncol, nrow
732 #if defined(__SCALAPACK)
733  INTEGER, DIMENSION(9) :: desca, descb
734 #endif
735 
736  CALL timeset(routinen, handle)
737 
738  nrow = msource%matrix_struct%nrow_global
739  ncol = msource%matrix_struct%ncol_global
740 
741  aa => msource%local_data
742  bb => mtarget%local_data
743 
744 #if defined(__SCALAPACK)
745  desca(:) = msource%matrix_struct%descriptor(:)
746  descb(:) = mtarget%matrix_struct%descriptor(:)
747  CALL pzlacpy(uplo, nrow, ncol, aa(1, 1), 1, 1, desca, bb(1, 1), 1, 1, descb)
748 #else
749  CALL zlacpy(uplo, nrow, ncol, aa(1, 1), nrow, bb(1, 1), nrow)
750 #endif
751 
752  CALL timestop(handle)
753  END SUBROUTINE cp_cfm_to_cfm_triangular
754 
755 ! **************************************************************************************************
756 !> \brief Copy real and imaginary parts of a complex full matrix into
757 !> separate real-value full matrices.
758 !> \param msource complex matrix
759 !> \param mtargetr (optional) real part of the source matrix
760 !> \param mtargeti (optional) imaginary part of the source matrix
761 !> \note
762 !> Matrix structures are assumed to be equivalent.
763 ! **************************************************************************************************
764  SUBROUTINE cp_cfm_to_fm(msource, mtargetr, mtargeti)
765 
766  TYPE(cp_cfm_type), INTENT(IN) :: msource
767  TYPE(cp_fm_type), INTENT(IN), OPTIONAL :: mtargetr, mtargeti
768 
769  CHARACTER(len=*), PARAMETER :: routinen = 'cp_cfm_to_fm'
770 
771  COMPLEX(kind=dp), DIMENSION(:, :), POINTER :: zmat
772  INTEGER :: handle
773  REAL(kind=dp), DIMENSION(:, :), POINTER :: imat, rmat
774 
775  CALL timeset(routinen, handle)
776 
777  zmat => msource%local_data
778  IF (PRESENT(mtargetr)) THEN
779  rmat => mtargetr%local_data
780  IF ((.NOT. cp_fm_struct_equivalent(mtargetr%matrix_struct, msource%matrix_struct)) .OR. &
781  (SIZE(rmat, 1) .NE. SIZE(zmat, 1)) .OR. &
782  (SIZE(rmat, 2) .NE. SIZE(zmat, 2))) THEN
783  cpabort("size of local_data of mtargetr differ to msource")
784  END IF
785  ! copy local data
786  rmat = real(zmat, kind=dp)
787  ELSE
788  NULLIFY (rmat)
789  END IF
790  IF (PRESENT(mtargeti)) THEN
791  imat => mtargeti%local_data
792  IF ((.NOT. cp_fm_struct_equivalent(mtargeti%matrix_struct, msource%matrix_struct)) .OR. &
793  (SIZE(imat, 1) .NE. SIZE(zmat, 1)) .OR. &
794  (SIZE(imat, 2) .NE. SIZE(zmat, 2))) THEN
795  cpabort("size of local_data of mtargeti differ to msource")
796  END IF
797  ! copy local data
798  imat = real(aimag(zmat), kind=dp)
799  ELSE
800  NULLIFY (imat)
801  END IF
802 
803  CALL timestop(handle)
804 
805  END SUBROUTINE cp_cfm_to_fm
806 
807 ! **************************************************************************************************
808 !> \brief Construct a complex full matrix by taking its real and imaginary parts from
809 !> two separate real-value full matrices.
810 !> \param msourcer (optional) real part of the complex matrix (defaults to 0.0)
811 !> \param msourcei (optional) imaginary part of the complex matrix (defaults to 0.0)
812 !> \param mtarget resulting complex matrix
813 !> \note
814 !> Matrix structures are assumed to be equivalent.
815 ! **************************************************************************************************
816  SUBROUTINE cp_fm_to_cfm(msourcer, msourcei, mtarget)
817  TYPE(cp_fm_type), INTENT(IN), OPTIONAL :: msourcer, msourcei
818  TYPE(cp_cfm_type), INTENT(IN) :: mtarget
819 
820  CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_to_cfm'
821 
822  COMPLEX(kind=dp), DIMENSION(:, :), POINTER :: zmat
823  INTEGER :: handle, mode
824  REAL(kind=dp), DIMENSION(:, :), POINTER :: imat, rmat
825 
826  CALL timeset(routinen, handle)
827 
828  mode = 0
829  zmat => mtarget%local_data
830  IF (PRESENT(msourcer)) THEN
831  rmat => msourcer%local_data
832  IF ((.NOT. cp_fm_struct_equivalent(msourcer%matrix_struct, mtarget%matrix_struct)) .OR. &
833  (SIZE(rmat, 1) .NE. SIZE(zmat, 1)) .OR. &
834  (SIZE(rmat, 2) .NE. SIZE(zmat, 2))) THEN
835  cpabort("size of local_data of msourcer differ to mtarget")
836  END IF
837  mode = mode + 1
838  ELSE
839  NULLIFY (rmat)
840  END IF
841  IF (PRESENT(msourcei)) THEN
842  imat => msourcei%local_data
843  IF ((.NOT. cp_fm_struct_equivalent(msourcei%matrix_struct, mtarget%matrix_struct)) .OR. &
844  (SIZE(imat, 1) .NE. SIZE(zmat, 1)) .OR. &
845  (SIZE(imat, 2) .NE. SIZE(zmat, 2))) THEN
846  cpabort("size of local_data of msourcei differ to mtarget")
847  END IF
848  mode = mode + 2
849  ELSE
850  NULLIFY (imat)
851  END IF
852  ! copy local data
853  SELECT CASE (mode)
854  CASE (0)
855  zmat(:, :) = z_zero
856  CASE (1)
857  zmat(:, :) = cmplx(rmat(:, :), 0.0_dp, kind=dp)
858  CASE (2)
859  zmat(:, :) = cmplx(0.0_dp, imat(:, :), kind=dp)
860  CASE (3)
861  zmat(:, :) = cmplx(rmat(:, :), imat(:, :), kind=dp)
862  END SELECT
863 
864  CALL timestop(handle)
865 
866  END SUBROUTINE cp_fm_to_cfm
867 
868 ! **************************************************************************************************
869 !> \brief Initiate the copy operation: get distribution data, post MPI isend and irecvs.
870 !> \param source input complex-valued fm matrix
871 !> \param destination output complex-valued fm matrix
872 !> \param para_env parallel environment corresponding to the BLACS env that covers all parts
873 !> of the input and output matrices
874 !> \param info all of the data that will be needed to complete the copy operation
875 !> \note a slightly modified version of the subroutine cp_fm_start_copy_general() that uses
876 !> allocatable arrays instead of pointers wherever possible.
877 ! **************************************************************************************************
878  SUBROUTINE cp_cfm_start_copy_general(source, destination, para_env, info)
879  TYPE(cp_cfm_type), INTENT(IN) :: source, destination
880  TYPE(mp_para_env_type), INTENT(IN), POINTER :: para_env
881  TYPE(copy_cfm_info_type), INTENT(out) :: info
882 
883  CHARACTER(len=*), PARAMETER :: routinen = 'cp_cfm_start_copy_general'
884 
885  INTEGER :: dest_p_i, dest_q_j, global_rank, global_size, handle, i, j, k, mpi_rank, &
886  ncol_block_dest, ncol_block_src, ncol_local_recv, ncol_local_send, ncols, &
887  nrow_block_dest, nrow_block_src, nrow_local_recv, nrow_local_send, nrows, p, q, &
888  recv_rank, recv_size, send_rank, send_size
889  INTEGER, ALLOCATABLE, DIMENSION(:) :: all_ranks, dest2global, dest_p, dest_q, &
890  recv_count, send_count, send_disp, &
891  source2global, src_p, src_q
892  INTEGER, ALLOCATABLE, DIMENSION(:, :) :: dest_blacs2mpi
893  INTEGER, DIMENSION(2) :: dest_block, dest_block_tmp, dest_num_pe, &
894  src_block, src_block_tmp, src_num_pe
895  INTEGER, DIMENSION(:), POINTER :: recv_col_indices, recv_row_indices, &
896  send_col_indices, send_row_indices
897  TYPE(cp_fm_struct_type), POINTER :: recv_dist, send_dist
898  TYPE(mp_request_type), DIMENSION(6) :: recv_req, send_req
899 
900  CALL timeset(routinen, handle)
901 
902  IF (.NOT. cp2k_is_parallel) THEN
903  ! Just copy all of the matrix data into a 'send buffer', to be unpacked later
904  nrow_local_send = SIZE(source%local_data, 1)
905  ncol_local_send = SIZE(source%local_data, 2)
906  ALLOCATE (info%send_buf(nrow_local_send*ncol_local_send))
907  k = 0
908  DO j = 1, ncol_local_send
909  DO i = 1, nrow_local_send
910  k = k + 1
911  info%send_buf(k) = source%local_data(i, j)
912  END DO
913  END DO
914  ELSE
915  NULLIFY (recv_dist, send_dist)
916  NULLIFY (recv_col_indices, recv_row_indices, send_col_indices, send_row_indices)
917 
918  ! The 'global' communicator contains both the source and destination decompositions
919  global_size = para_env%num_pe
920  global_rank = para_env%mepos
921 
922  ! The source/send decomposition and destination/recv decompositions may only exist on
923  ! on a subset of the processes involved in the communication
924  ! Check if the source and/or destination arguments are .not. ASSOCIATED():
925  ! if so, skip the send / recv parts (since these processes do not participate in the sending/receiving distribution)
926  IF (ASSOCIATED(destination%matrix_struct)) THEN
927  recv_dist => destination%matrix_struct
928  recv_rank = recv_dist%para_env%mepos
929  ELSE
930  recv_rank = mp_proc_null
931  END IF
932 
933  IF (ASSOCIATED(source%matrix_struct)) THEN
934  send_dist => source%matrix_struct
935  send_rank = send_dist%para_env%mepos
936  ELSE
937  send_rank = mp_proc_null
938  END IF
939 
940  ! Map the rank in the source/dest communicator to the global rank
941  ALLOCATE (all_ranks(0:global_size - 1))
942 
943  CALL para_env%allgather(send_rank, all_ranks)
944  IF (ASSOCIATED(destination%matrix_struct)) THEN
945  ALLOCATE (source2global(0:count(all_ranks .NE. mp_proc_null) - 1))
946  DO i = 0, global_size - 1
947  IF (all_ranks(i) .NE. mp_proc_null) THEN
948  source2global(all_ranks(i)) = i
949  END IF
950  END DO
951  END IF
952 
953  CALL para_env%allgather(recv_rank, all_ranks)
954  IF (ASSOCIATED(source%matrix_struct)) THEN
955  ALLOCATE (dest2global(0:count(all_ranks .NE. mp_proc_null) - 1))
956  DO i = 0, global_size - 1
957  IF (all_ranks(i) .NE. mp_proc_null) THEN
958  dest2global(all_ranks(i)) = i
959  END IF
960  END DO
961  END IF
962  DEALLOCATE (all_ranks)
963 
964  ! Some data from the two decompositions will be needed by all processes in the global group :
965  ! process grid shape, block size, and the BLACS-to-MPI mapping
966 
967  ! The global root process will receive the data (from the root process in each decomposition)
968  send_req(:) = mp_request_null
969  IF (global_rank == 0) THEN
970  recv_req(:) = mp_request_null
971  CALL para_env%irecv(src_block, mp_any_source, recv_req(1), tag=src_tag)
972  CALL para_env%irecv(dest_block, mp_any_source, recv_req(2), tag=dest_tag)
973  CALL para_env%irecv(src_num_pe, mp_any_source, recv_req(3), tag=src_tag)
974  CALL para_env%irecv(dest_num_pe, mp_any_source, recv_req(4), tag=dest_tag)
975  END IF
976 
977  IF (ASSOCIATED(source%matrix_struct)) THEN
978  IF ((send_rank == 0)) THEN
979  ! need to use separate buffers here in case this is actually global rank 0
980  src_block_tmp = (/send_dist%nrow_block, send_dist%ncol_block/)
981  CALL para_env%isend(src_block_tmp, 0, send_req(1), tag=src_tag)
982  CALL para_env%isend(send_dist%context%num_pe, 0, send_req(2), tag=src_tag)
983  END IF
984  END IF
985 
986  IF (ASSOCIATED(destination%matrix_struct)) THEN
987  IF ((recv_rank == 0)) THEN
988  dest_block_tmp = (/recv_dist%nrow_block, recv_dist%ncol_block/)
989  CALL para_env%isend(dest_block_tmp, 0, send_req(3), tag=dest_tag)
990  CALL para_env%isend(recv_dist%context%num_pe, 0, send_req(4), tag=dest_tag)
991  END IF
992  END IF
993 
994  IF (global_rank == 0) THEN
995  CALL mp_waitall(recv_req(1:4))
996  ! Now we know the process decomposition, we can allocate the arrays to hold the blacs2mpi mapping
997  ALLOCATE (info%src_blacs2mpi(0:src_num_pe(1) - 1, 0:src_num_pe(2) - 1), &
998  dest_blacs2mpi(0:dest_num_pe(1) - 1, 0:dest_num_pe(2) - 1))
999  CALL para_env%irecv(info%src_blacs2mpi, mp_any_source, recv_req(5), tag=src_tag)
1000  CALL para_env%irecv(dest_blacs2mpi, mp_any_source, recv_req(6), tag=dest_tag)
1001  END IF
1002 
1003  IF (ASSOCIATED(source%matrix_struct)) THEN
1004  IF ((send_rank == 0)) THEN
1005  CALL para_env%isend(send_dist%context%blacs2mpi(:, :), 0, send_req(5), tag=src_tag)
1006  END IF
1007  END IF
1008 
1009  IF (ASSOCIATED(destination%matrix_struct)) THEN
1010  IF ((recv_rank == 0)) THEN
1011  CALL para_env%isend(recv_dist%context%blacs2mpi(:, :), 0, send_req(6), tag=dest_tag)
1012  END IF
1013  END IF
1014 
1015  IF (global_rank == 0) THEN
1016  CALL mp_waitall(recv_req(5:6))
1017  END IF
1018 
1019  ! Finally, broadcast the data to all processes in the global communicator
1020  CALL para_env%bcast(src_block, 0)
1021  CALL para_env%bcast(dest_block, 0)
1022  CALL para_env%bcast(src_num_pe, 0)
1023  CALL para_env%bcast(dest_num_pe, 0)
1024  info%src_num_pe(1:2) = src_num_pe(1:2)
1025  info%nblock_src(1:2) = src_block(1:2)
1026  IF (global_rank /= 0) THEN
1027  ALLOCATE (info%src_blacs2mpi(0:src_num_pe(1) - 1, 0:src_num_pe(2) - 1), &
1028  dest_blacs2mpi(0:dest_num_pe(1) - 1, 0:dest_num_pe(2) - 1))
1029  END IF
1030  CALL para_env%bcast(info%src_blacs2mpi, 0)
1031  CALL para_env%bcast(dest_blacs2mpi, 0)
1032 
1033  recv_size = dest_num_pe(1)*dest_num_pe(2)
1034  send_size = src_num_pe(1)*src_num_pe(2)
1035  info%send_size = send_size
1036  CALL mp_waitall(send_req(:))
1037 
1038  ! Setup is now complete, we can start the actual communication here.
1039  ! The order implemented here is:
1040  ! DEST_1
1041  ! compute recv sizes
1042  ! call irecv
1043  ! SRC_1
1044  ! compute send sizes
1045  ! pack send buffers
1046  ! call isend
1047  ! DEST_2
1048  ! wait for the recvs and unpack buffers (this part eventually will go into another routine to allow comms to run concurrently)
1049  ! SRC_2
1050  ! wait for the sends
1051 
1052  ! DEST_1
1053  IF (ASSOCIATED(destination%matrix_struct)) THEN
1054  CALL cp_fm_struct_get(recv_dist, row_indices=recv_row_indices, &
1055  col_indices=recv_col_indices)
1056  info%recv_col_indices => recv_col_indices
1057  info%recv_row_indices => recv_row_indices
1058  nrow_block_src = src_block(1)
1059  ncol_block_src = src_block(2)
1060  ALLOCATE (recv_count(0:send_size - 1), info%recv_disp(0:send_size - 1), info%recv_request(0:send_size - 1))
1061 
1062  ! Determine the recv counts, allocate the receive buffers, call mpi_irecv for all the non-zero sized receives
1063  nrow_local_recv = recv_dist%nrow_locals(recv_dist%context%mepos(1))
1064  ncol_local_recv = recv_dist%ncol_locals(recv_dist%context%mepos(2))
1065  info%nlocal_recv(1) = nrow_local_recv
1066  info%nlocal_recv(2) = ncol_local_recv
1067  ! Initialise src_p, src_q arrays (sized using number of rows/cols in the receiving distribution)
1068  ALLOCATE (src_p(nrow_local_recv), src_q(ncol_local_recv))
1069  DO i = 1, nrow_local_recv
1070  ! For each local row we will receive, we look up its global row (in recv_row_indices),
1071  ! then work out which row block it comes from, and which process row that row block comes from.
1072  src_p(i) = mod(((recv_row_indices(i) - 1)/nrow_block_src), src_num_pe(1))
1073  END DO
1074  DO j = 1, ncol_local_recv
1075  ! Similarly for the columns
1076  src_q(j) = mod(((recv_col_indices(j) - 1)/ncol_block_src), src_num_pe(2))
1077  END DO
1078  ! src_p/q now contains the process row/column ID that will send data to that row/column
1079 
1080  DO q = 0, src_num_pe(2) - 1
1081  ncols = count(src_q .EQ. q)
1082  DO p = 0, src_num_pe(1) - 1
1083  nrows = count(src_p .EQ. p)
1084  ! Use the send_dist here as we are looking up the processes where the data comes from
1085  recv_count(info%src_blacs2mpi(p, q)) = nrows*ncols
1086  END DO
1087  END DO
1088  DEALLOCATE (src_p, src_q)
1089 
1090  ! Use one long buffer (and displacements into that buffer)
1091  ! this prevents the need for a rectangular array where not all elements will be populated
1092  ALLOCATE (info%recv_buf(sum(recv_count(:))))
1093  info%recv_disp(0) = 0
1094  DO i = 1, send_size - 1
1095  info%recv_disp(i) = info%recv_disp(i - 1) + recv_count(i - 1)
1096  END DO
1097 
1098  ! Issue receive calls on ranks which expect data
1099  DO k = 0, send_size - 1
1100  IF (recv_count(k) .GT. 0) THEN
1101  CALL para_env%irecv(info%recv_buf(info%recv_disp(k) + 1:info%recv_disp(k) + recv_count(k)), &
1102  source2global(k), info%recv_request(k))
1103  ELSE
1104  info%recv_request(k) = mp_request_null
1105  END IF
1106  END DO
1107  DEALLOCATE (source2global)
1108  END IF ! ASSOCIATED(destination)
1109 
1110  ! SRC_1
1111  IF (ASSOCIATED(source%matrix_struct)) THEN
1112  CALL cp_fm_struct_get(send_dist, row_indices=send_row_indices, &
1113  col_indices=send_col_indices)
1114  nrow_block_dest = dest_block(1)
1115  ncol_block_dest = dest_block(2)
1116  ALLOCATE (send_count(0:recv_size - 1), send_disp(0:recv_size - 1), info%send_request(0:recv_size - 1))
1117 
1118  ! Determine the send counts, allocate the send buffers
1119  nrow_local_send = send_dist%nrow_locals(send_dist%context%mepos(1))
1120  ncol_local_send = send_dist%ncol_locals(send_dist%context%mepos(2))
1121 
1122  ! Initialise dest_p, dest_q arrays (sized nrow_local, ncol_local)
1123  ! i.e. number of rows,cols in the sending distribution
1124  ALLOCATE (dest_p(nrow_local_send), dest_q(ncol_local_send))
1125 
1126  DO i = 1, nrow_local_send
1127  ! Use the send_dist%row_indices() here (we are looping over the local rows we will send)
1128  dest_p(i) = mod(((send_row_indices(i) - 1)/nrow_block_dest), dest_num_pe(1))
1129  END DO
1130  DO j = 1, ncol_local_send
1131  dest_q(j) = mod(((send_col_indices(j) - 1)/ncol_block_dest), dest_num_pe(2))
1132  END DO
1133  ! dest_p/q now contain the process row/column ID that will receive data from that row/column
1134 
1135  DO q = 0, dest_num_pe(2) - 1
1136  ncols = count(dest_q .EQ. q)
1137  DO p = 0, dest_num_pe(1) - 1
1138  nrows = count(dest_p .EQ. p)
1139  send_count(dest_blacs2mpi(p, q)) = nrows*ncols
1140  END DO
1141  END DO
1142  DEALLOCATE (dest_p, dest_q)
1143 
1144  ! Allocate the send buffer using send_count -- and calculate the offset into the buffer for each process
1145  ALLOCATE (info%send_buf(sum(send_count(:))))
1146  send_disp(0) = 0
1147  DO k = 1, recv_size - 1
1148  send_disp(k) = send_disp(k - 1) + send_count(k - 1)
1149  END DO
1150 
1151  ! Loop over the smat, pack the send buffers
1152  send_count(:) = 0
1153  DO j = 1, ncol_local_send
1154  ! Use send_col_indices and row_indices here, as we are looking up the global row/column number of local rows.
1155  dest_q_j = mod(((send_col_indices(j) - 1)/ncol_block_dest), dest_num_pe(2))
1156  DO i = 1, nrow_local_send
1157  dest_p_i = mod(((send_row_indices(i) - 1)/nrow_block_dest), dest_num_pe(1))
1158  mpi_rank = dest_blacs2mpi(dest_p_i, dest_q_j)
1159  send_count(mpi_rank) = send_count(mpi_rank) + 1
1160  info%send_buf(send_disp(mpi_rank) + send_count(mpi_rank)) = source%local_data(i, j)
1161  END DO
1162  END DO
1163 
1164  ! For each non-zero send_count, call mpi_isend
1165  DO k = 0, recv_size - 1
1166  IF (send_count(k) .GT. 0) THEN
1167  CALL para_env%isend(info%send_buf(send_disp(k) + 1:send_disp(k) + send_count(k)), &
1168  dest2global(k), info%send_request(k))
1169  ELSE
1170  info%send_request(k) = mp_request_null
1171  END IF
1172  END DO
1173  DEALLOCATE (send_count, send_disp, dest2global)
1174  END IF ! ASSOCIATED(source)
1175  DEALLOCATE (dest_blacs2mpi)
1176 
1177  END IF !IF (.NOT. cp2k_is_parallel)
1178 
1179  CALL timestop(handle)
1180 
1181  END SUBROUTINE cp_cfm_start_copy_general
1182 
1183 ! **************************************************************************************************
1184 !> \brief Complete the copy operation: wait for comms, unpack, clean up MPI state.
1185 !> \param destination output cfm matrix
1186 !> \param info all of the data that will be needed to complete the copy operation
1187 !> \note a slightly modified version of the subroutine cp_fm_finish_copy_general() that uses
1188 !> allocatable arrays instead of pointers wherever possible.
1189 ! **************************************************************************************************
1190  SUBROUTINE cp_cfm_finish_copy_general(destination, info)
1191  TYPE(cp_cfm_type), INTENT(IN) :: destination
1192  TYPE(copy_cfm_info_type), INTENT(inout) :: info
1193 
1194  CHARACTER(len=*), PARAMETER :: routinen = 'cp_cfm_finish_copy_general'
1195 
1196  INTEGER :: handle, i, j, k, mpi_rank, ni, nj, &
1197  src_q_j
1198  INTEGER, ALLOCATABLE, DIMENSION(:) :: recv_count, src_p_i
1199  INTEGER, DIMENSION(:), POINTER :: recv_col_indices, recv_row_indices
1200 
1201  CALL timeset(routinen, handle)
1202 
1203  IF (.NOT. cp2k_is_parallel) THEN
1204  ! Now unpack the data from the 'send buffer'
1205  k = 0
1206  DO j = 1, SIZE(destination%local_data, 2)
1207  DO i = 1, SIZE(destination%local_data, 1)
1208  k = k + 1
1209  destination%local_data(i, j) = info%send_buf(k)
1210  END DO
1211  END DO
1212  DEALLOCATE (info%send_buf)
1213  ELSE
1214  ! Set up local variables ...
1215  recv_col_indices => info%recv_col_indices
1216  recv_row_indices => info%recv_row_indices
1217 
1218  ! ... use the local variables to do the work
1219  ! DEST_2
1220  CALL mp_waitall(info%recv_request(:))
1221 
1222  nj = info%nlocal_recv(2)
1223  ni = info%nlocal_recv(1)
1224  ALLOCATE (recv_count(0:info%send_size - 1), src_p_i(ni))
1225  ! Loop over the rmat, filling it in with data from the recv buffers
1226  ! (here the block sizes, num_pes refer to the distribution of the source matrix)
1227  recv_count(:) = 0
1228  DO i = 1, ni
1229  src_p_i(i) = mod(((recv_row_indices(i) - 1)/info%nblock_src(1)), info%src_num_pe(1))
1230  END DO
1231 
1232  DO j = 1, nj
1233  src_q_j = mod(((recv_col_indices(j) - 1)/info%nblock_src(2)), info%src_num_pe(2))
1234  DO i = 1, ni
1235  mpi_rank = info%src_blacs2mpi(src_p_i(i), src_q_j)
1236  recv_count(mpi_rank) = recv_count(mpi_rank) + 1
1237  destination%local_data(i, j) = info%recv_buf(info%recv_disp(mpi_rank) + recv_count(mpi_rank))
1238  END DO
1239  END DO
1240 
1241  DEALLOCATE (recv_count, src_p_i)
1242  ! Invalidate the stored state
1243  NULLIFY (info%recv_col_indices, info%recv_row_indices)
1244  DEALLOCATE (info%recv_disp, info%recv_request, info%recv_buf, info%src_blacs2mpi)
1245  END IF
1246 
1247  CALL timestop(handle)
1248 
1249  END SUBROUTINE cp_cfm_finish_copy_general
1250 
1251 ! **************************************************************************************************
1252 !> \brief Complete the copy operation: wait for comms clean up MPI state.
1253 !> \param info all of the data that will be needed to complete the copy operation
1254 !> \note a slightly modified version of the subroutine cp_fm_cleanup_copy_general() that uses
1255 !> allocatable arrays instead of pointers wherever possible.
1256 ! **************************************************************************************************
1258  TYPE(copy_cfm_info_type), INTENT(inout) :: info
1259 
1260  CHARACTER(len=*), PARAMETER :: routinen = 'cp_cfm_cleanup_copy_general'
1261 
1262  INTEGER :: handle
1263 
1264  CALL timeset(routinen, handle)
1265 
1266  IF (cp2k_is_parallel) THEN
1267  ! SRC_2
1268  ! If this process is also in the destination decomposition, this deallocate
1269  ! Was already done in cp_fm_finish_copy_general
1270  IF (ALLOCATED(info%src_blacs2mpi)) DEALLOCATE (info%src_blacs2mpi)
1271  CALL mp_waitall(info%send_request(:))
1272  DEALLOCATE (info%send_request, info%send_buf)
1273  END IF
1274 
1275  CALL timestop(handle)
1276  END SUBROUTINE cp_cfm_cleanup_copy_general
1277 END MODULE cp_cfm_types
methods related to the blacs parallel environment
Definition: cp_blacs_env.F:15
Represents a complex full matrix distributed on many processors.
Definition: cp_cfm_types.F:12
subroutine, public cp_cfm_get_submatrix(fm, target_m, start_row, start_col, n_rows, n_cols, transpose)
Extract a sub-matrix from the full matrix: op(target_m)(1:n_rows,1:n_cols) = fm(start_row:start_row+n...
Definition: cp_cfm_types.F:333
subroutine, public cp_cfm_get_element(matrix, irow_global, icol_global, alpha)
Get the matrix element by its global index.
Definition: cp_cfm_types.F:232
subroutine, public cp_cfm_create(matrix, matrix_struct, name)
Creates a new full matrix with the given structure.
Definition: cp_cfm_types.F:121
subroutine, public cp_cfm_release(matrix)
Releases a full matrix.
Definition: cp_cfm_types.F:159
subroutine, public cp_cfm_set_element(matrix, irow_global, icol_global, alpha)
Set the matrix element (irow_global,icol_global) of the full matrix to alpha.
Definition: cp_cfm_types.F:279
subroutine, public cp_cfm_start_copy_general(source, destination, para_env, info)
Initiate the copy operation: get distribution data, post MPI isend and irecvs.
Definition: cp_cfm_types.F:879
subroutine, public cp_fm_to_cfm(msourcer, msourcei, mtarget)
Construct a complex full matrix by taking its real and imaginary parts from two separate real-value f...
Definition: cp_cfm_types.F:817
subroutine, public cp_cfm_cleanup_copy_general(info)
Complete the copy operation: wait for comms clean up MPI state.
subroutine, public cp_cfm_get_info(matrix, name, nrow_global, ncol_global, nrow_block, ncol_block, nrow_local, ncol_local, row_indices, col_indices, local_data, context, matrix_struct, para_env)
Returns information about a full matrix.
Definition: cp_cfm_types.F:607
subroutine, public cp_cfm_set_submatrix(matrix, new_values, start_row, start_col, n_rows, n_cols, alpha, beta, transpose)
Set a sub-matrix of the full matrix: matrix(start_row:start_row+n_rows,start_col:start_col+n_cols) = ...
Definition: cp_cfm_types.F:470
subroutine, public cp_cfm_set_all(matrix, alpha, beta)
Set all elements of the full matrix to alpha. Besides, set all diagonal matrix elements to beta (if g...
Definition: cp_cfm_types.F:179
subroutine, public cp_cfm_to_fm(msource, mtargetr, mtargeti)
Copy real and imaginary parts of a complex full matrix into separate real-value full matrices.
Definition: cp_cfm_types.F:765
subroutine, public cp_cfm_finish_copy_general(destination, info)
Complete the copy operation: wait for comms, unpack, clean up MPI state.
represent the structure of a full matrix
Definition: cp_fm_struct.F:14
subroutine, public cp_fm_struct_get(fmstruct, para_env, context, descriptor, ncol_block, nrow_block, nrow_global, ncol_global, first_p_pos, row_indices, col_indices, nrow_local, ncol_local, nrow_locals, ncol_locals, local_leading_dimension)
returns the values of various attributes of the matrix structure
Definition: cp_fm_struct.F:409
logical function, public cp_fm_struct_equivalent(fmstruct1, fmstruct2)
returns true if the two matrix structures are equivalent, false otherwise.
Definition: cp_fm_struct.F:357
subroutine, public cp_fm_struct_retain(fmstruct)
retains a full matrix structure
Definition: cp_fm_struct.F:306
subroutine, public cp_fm_struct_release(fmstruct)
releases a full matrix structure
Definition: cp_fm_struct.F:320
represent a full matrix distributed on many processors
Definition: cp_fm_types.F:15
Defines the basic variable types.
Definition: kinds.F:23
integer, parameter, public dp
Definition: kinds.F:34
Definition of mathematical constants and functions.
Definition: mathconstants.F:16
complex(kind=dp), parameter, public z_one
complex(kind=dp), parameter, public z_zero
Interface to the message passing library MPI.
integer, parameter, public mp_proc_null
logical, parameter, public cp2k_is_parallel
integer, parameter, public mp_any_source
type(mp_request_type), parameter, public mp_request_null