(git:ccc2433)
cp_fm_types.F
Go to the documentation of this file.
1 !--------------------------------------------------------------------------------------------------!
2 ! CP2K: A general program to perform molecular dynamics simulations !
3 ! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4 ! !
5 ! SPDX-License-Identifier: GPL-2.0-or-later !
6 !--------------------------------------------------------------------------------------------------!
7 
8 ! **************************************************************************************************
9 !> \brief represent a full matrix distributed on many processors
10 !> \par History
11 !> 3) separated structure object, removed globenv, renamed to full matrix
12 !> many changes (fawzi 08.2002)
13 !> \author Matthias Krack (22.05.2001)
14 ! **************************************************************************************************
16  USE cp_blacs_env, ONLY: cp_blacs_env_type
17  USE cp_blacs_types, ONLY: cp_blacs_type
22  cp_fm_struct_type,&
24  USE kinds, ONLY: dp,&
25  sp
28  mp_para_env_type,&
29  mp_proc_null,&
31  mp_request_type,&
32  mp_waitall
33  USE parallel_rng_types, ONLY: uniform,&
34  rng_stream_type
35 #include "../base/base_uses.f90"
36 
37  IMPLICIT NONE
38 
39  PRIVATE
40 
41  CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_fm_types'
42  LOGICAL, PARAMETER :: debug_this_module = .true.
43  INTEGER, PARAMETER :: src_tag = 3, dest_tag = 5, send_tag = 7, recv_tag = 11
44 
45  INTEGER, PRIVATE :: mm_type = 1
46 
47  PUBLIC :: cp_fm_type, &
48  cp_fm_p_type, copy_info_type
49 
50  PUBLIC :: cp_fm_add_to_element, &
51  cp_fm_create, &
52  cp_fm_release, &
56  cp_fm_get_diag, & ! get diagonal
57  cp_fm_set_all, & ! set all elements and diagonal
58  cp_fm_set_submatrix, & ! set a submatrix to given values
59  cp_fm_get_submatrix, & ! get a submatrix of given values
61  cp_fm_maxabsval, & ! find the maximum absolute value
62  cp_fm_maxabsrownorm, & ! find the maximum of the sum of the abs of the elements of a row
63  cp_fm_to_fm, & ! copy (parts of) a fm to a fm
64  cp_fm_vectorsnorm, & ! compute the norm of the column-vectors
65  cp_fm_vectorssum, & ! compute the sum of all elements of the column-vectors
66  cp_fm_to_fm_submat, & ! copy (parts of) a fm to a fm
72  cp_fm_write_unformatted, & ! writes a full matrix to an open unit
73  cp_fm_write_formatted, & ! writes a full matrix to an open unit
74  cp_fm_read_unformatted, & ! reads a full matrix from an open unit
75  cp_fm_setup, & ! allows to set flags for fms
78  cp_fm_to_fm_submat_general ! copy matrix across different contexts
79 
80  PUBLIC :: cp_fm_indxg2p, &
81  cp_fm_indxg2l, &
82  cp_fm_indxl2g, &
84 
85  INTERFACE cp_fm_to_fm
86  MODULE PROCEDURE cp_fm_to_fm_matrix, & ! a full matrix
87  cp_fm_to_fm_columns ! just a number of columns
88  END INTERFACE
89 
90  INTERFACE cp_fm_release
91  MODULE PROCEDURE cp_fm_release_aa0, &
92  cp_fm_release_aa1, &
93  cp_fm_release_aa2, &
94  cp_fm_release_aa3, &
95  cp_fm_release_ap1, &
96  cp_fm_release_ap2, &
97  cp_fm_release_pa1, &
98  cp_fm_release_pa2, &
99  cp_fm_release_pa3, &
100  cp_fm_release_pp1, &
101  cp_fm_release_pp2
102  END INTERFACE
103 
104 ! **************************************************************************************************
105 !> \brief represent a full matrix
106 !> \param name the name of the matrix, used for printing
107 !> \param matrix_struct structure of this matrix
108 !> \param local_data array with the data of the matrix (its contents
109 !> depend on the matrix type used: in parallel runs it will be
110 !> in scalapack format, in sequential, it will simply contain
111 !> the matrix)
112 !> \par History
113 !> 08.2002 created [fawzi]
114 !> \author fawzi
115 ! **************************************************************************************************
116  TYPE cp_fm_type
117 ! PRIVATE
118  CHARACTER(LEN=60) :: name = ""
119  LOGICAL :: use_sp = .false.
120  TYPE(cp_fm_struct_type), POINTER :: matrix_struct => null()
121  REAL(KIND=dp), DIMENSION(:, :), POINTER, CONTIGUOUS :: local_data => null()
122  REAL(KIND=sp), DIMENSION(:, :), POINTER, CONTIGUOUS :: local_data_sp => null()
123  END TYPE cp_fm_type
124 
125 ! **************************************************************************************************
126 !> \brief just to build arrays of pointers to matrices
127 !> \param matrix the pointer to the matrix
128 !> \par History
129 !> 08.2002 created [fawzi]
130 !> \author fawzi
131 ! **************************************************************************************************
132  TYPE cp_fm_p_type
133  TYPE(cp_fm_type), POINTER :: matrix => null()
134  END TYPE cp_fm_p_type
135 
136 ! **************************************************************************************************
137 !> \brief Stores the state of a copy between cp_fm_start_copy_general
138 !> and cp_fm_finish_copy_general
139 !> \par History
140 !> Jan 2017 [Mark T]
141 ! **************************************************************************************************
142  TYPE copy_info_type
143  INTEGER :: send_size = -1
144  INTEGER, DIMENSION(2) :: nlocal_recv = -1, nblock_src = -1, src_num_pe = -1 ! 1->row 2->col
145  TYPE(mp_request_type), DIMENSION(:), ALLOCATABLE :: send_request, recv_request
146  INTEGER, DIMENSION(:), ALLOCATABLE :: recv_disp
147  INTEGER, DIMENSION(:), POINTER :: recv_col_indices => null(), recv_row_indices => null()
148  INTEGER, DIMENSION(:, :), ALLOCATABLE :: src_blacs2mpi
149  REAL(KIND=dp), DIMENSION(:), ALLOCATABLE :: recv_buf, send_buf
150  END TYPE copy_info_type
151 
152 CONTAINS
153 
154 ! **************************************************************************************************
155 !> \brief creates a new full matrix with the given structure
156 !> \param matrix the matrix to be created
157 !> \param matrix_struct the structure of matrix
158 !> \param name ...
159 !> \param use_sp ...
160 !> \par History
161 !> 08.2002 created [fawzi]
162 !> \author Fawzi Mohamed
163 !> \note
164 !> preferred allocation routine
165 ! **************************************************************************************************
166  SUBROUTINE cp_fm_create(matrix, matrix_struct, name, use_sp)
167  TYPE(cp_fm_type), INTENT(OUT) :: matrix
168  TYPE(cp_fm_struct_type), INTENT(IN), TARGET :: matrix_struct
169  CHARACTER(len=*), INTENT(in), OPTIONAL :: name
170  LOGICAL, INTENT(in), OPTIONAL :: use_sp
171 
172  CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_create'
173 
174  INTEGER :: handle, ncol_local, npcol, nprow, &
175  nrow_local
176  TYPE(cp_blacs_env_type), POINTER :: context
177 
178  CALL timeset(routinen, handle)
179 
180 #if defined(__parallel) && ! defined(__SCALAPACK)
181  cpabort("full matrices need scalapack for parallel runs")
182 #endif
183 
184  context => matrix_struct%context
185  matrix%matrix_struct => matrix_struct
186  CALL cp_fm_struct_retain(matrix%matrix_struct)
187 
188  matrix%use_sp = .false.
189  IF (PRESENT(use_sp)) matrix%use_sp = use_sp
190 
191  nprow = context%num_pe(1)
192  npcol = context%num_pe(2)
193  NULLIFY (matrix%local_data)
194  NULLIFY (matrix%local_data_sp)
195 
196  ! OK, we allocate here at least a 1 x 1 matrix
197  ! this must (and is) compatible with the descinit call
198  ! in cp_fm_struct
199  nrow_local = matrix_struct%local_leading_dimension
200  ncol_local = max(1, matrix_struct%ncol_locals(context%mepos(2)))
201  IF (matrix%use_sp) THEN
202  ALLOCATE (matrix%local_data_sp(nrow_local, ncol_local))
203  ELSE
204  ALLOCATE (matrix%local_data(nrow_local, ncol_local))
205  END IF
206 
207  ! JVDV we should remove this, as it is up to the user to zero afterwards
208  IF (matrix%use_sp) THEN
209  matrix%local_data_sp(1:nrow_local, 1:ncol_local) = 0.0_sp
210  ELSE
211  matrix%local_data(1:nrow_local, 1:ncol_local) = 0.0_dp
212  END IF
213 
214  IF (PRESENT(name)) THEN
215  matrix%name = name
216  ELSE
217  matrix%name = 'full matrix'
218  END IF
219 
220  CALL timestop(handle)
221 
222  END SUBROUTINE cp_fm_create
223 
224 ! **************************************************************************************************
225 !> \brief releases a full matrix
226 !> \param matrix the matrix to release
227 !> \par History
228 !> 08.2002 created [fawzi]
229 !> \author Fawzi Mohamed
230 ! **************************************************************************************************
231  SUBROUTINE cp_fm_release_aa0(matrix)
232  TYPE(cp_fm_type), INTENT(INOUT) :: matrix
233 
234  IF (ASSOCIATED(matrix%local_data)) THEN
235  DEALLOCATE (matrix%local_data)
236  NULLIFY (matrix%local_data)
237  END IF
238  IF (ASSOCIATED(matrix%local_data_sp)) THEN
239  DEALLOCATE (matrix%local_data_sp)
240  NULLIFY (matrix%local_data_sp)
241  END IF
242  matrix%name = ""
243  CALL cp_fm_struct_release(matrix%matrix_struct)
244 
245  END SUBROUTINE cp_fm_release_aa0
246 
247 ! **************************************************************************************************
248 !> \brief ...
249 !> \param matrices ...
250 ! **************************************************************************************************
251  SUBROUTINE cp_fm_release_aa1(matrices)
252  TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:) :: matrices
253 
254  INTEGER :: i
255 
256  IF (ALLOCATED(matrices)) THEN
257  DO i = 1, SIZE(matrices)
258  CALL cp_fm_release(matrices(i))
259  END DO
260  DEALLOCATE (matrices)
261  END IF
262  END SUBROUTINE cp_fm_release_aa1
263 
264 ! **************************************************************************************************
265 !> \brief ...
266 !> \param matrices ...
267 ! **************************************************************************************************
268  SUBROUTINE cp_fm_release_aa2(matrices)
269  TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:, :) :: matrices
270 
271  INTEGER :: i, j
272 
273  IF (ALLOCATED(matrices)) THEN
274  DO i = 1, SIZE(matrices, 1)
275  DO j = 1, SIZE(matrices, 2)
276  CALL cp_fm_release(matrices(i, j))
277  END DO
278  END DO
279  DEALLOCATE (matrices)
280  END IF
281  END SUBROUTINE cp_fm_release_aa2
282 
283 ! **************************************************************************************************
284 !> \brief ...
285 !> \param matrices ...
286 ! **************************************************************************************************
287  SUBROUTINE cp_fm_release_aa3(matrices)
288  TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:, :, :) :: matrices
289 
290  INTEGER :: i, j, k
291 
292  IF (ALLOCATED(matrices)) THEN
293  DO i = 1, SIZE(matrices, 1)
294  DO j = 1, SIZE(matrices, 2)
295  DO k = 1, SIZE(matrices, 3)
296  CALL cp_fm_release(matrices(i, j, k))
297  END DO
298  END DO
299  END DO
300  DEALLOCATE (matrices)
301  END IF
302  END SUBROUTINE cp_fm_release_aa3
303 
304 ! **************************************************************************************************
305 !> \brief ...
306 !> \param matrices ...
307 ! **************************************************************************************************
308  SUBROUTINE cp_fm_release_pa1(matrices)
309  TYPE(cp_fm_type), DIMENSION(:), POINTER :: matrices
310 
311  INTEGER :: i
312 
313  IF (ASSOCIATED(matrices)) THEN
314  DO i = 1, SIZE(matrices)
315  CALL cp_fm_release(matrices(i))
316  END DO
317  DEALLOCATE (matrices)
318  NULLIFY (matrices)
319  END IF
320  END SUBROUTINE cp_fm_release_pa1
321 
322 ! **************************************************************************************************
323 !> \brief ...
324 !> \param matrices ...
325 ! **************************************************************************************************
326  SUBROUTINE cp_fm_release_pa2(matrices)
327  TYPE(cp_fm_type), DIMENSION(:, :), POINTER :: matrices
328 
329  INTEGER :: i, j
330 
331  IF (ASSOCIATED(matrices)) THEN
332  DO i = 1, SIZE(matrices, 1)
333  DO j = 1, SIZE(matrices, 2)
334  CALL cp_fm_release(matrices(i, j))
335  END DO
336  END DO
337  DEALLOCATE (matrices)
338  NULLIFY (matrices)
339  END IF
340  END SUBROUTINE cp_fm_release_pa2
341 
342 ! **************************************************************************************************
343 !> \brief ...
344 !> \param matrices ...
345 ! **************************************************************************************************
346  SUBROUTINE cp_fm_release_pa3(matrices)
347  TYPE(cp_fm_type), DIMENSION(:, :, :), POINTER :: matrices
348 
349  INTEGER :: i, j, k
350 
351  IF (ASSOCIATED(matrices)) THEN
352  DO i = 1, SIZE(matrices, 1)
353  DO j = 1, SIZE(matrices, 2)
354  DO k = 1, SIZE(matrices, 3)
355  CALL cp_fm_release(matrices(i, j, k))
356  END DO
357  END DO
358  END DO
359  DEALLOCATE (matrices)
360  NULLIFY (matrices)
361  END IF
362  END SUBROUTINE cp_fm_release_pa3
363 
364 ! **************************************************************************************************
365 !> \brief ...
366 !> \param matrices ...
367 ! **************************************************************************************************
368  SUBROUTINE cp_fm_release_ap1(matrices)
369  TYPE(cp_fm_p_type), ALLOCATABLE, DIMENSION(:) :: matrices
370 
371  INTEGER :: i
372 
373  IF (ALLOCATED(matrices)) THEN
374  DO i = 1, SIZE(matrices)
375  CALL cp_fm_release(matrices(i)%matrix)
376  DEALLOCATE (matrices(i)%matrix)
377  END DO
378  DEALLOCATE (matrices)
379  END IF
380  END SUBROUTINE cp_fm_release_ap1
381 
382 ! **************************************************************************************************
383 !> \brief ...
384 !> \param matrices ...
385 ! **************************************************************************************************
386  SUBROUTINE cp_fm_release_ap2(matrices)
387  TYPE(cp_fm_p_type), ALLOCATABLE, DIMENSION(:, :) :: matrices
388 
389  INTEGER :: i, j
390 
391  IF (ALLOCATED(matrices)) THEN
392  DO i = 1, SIZE(matrices, 1)
393  DO j = 1, SIZE(matrices, 2)
394  CALL cp_fm_release(matrices(i, j)%matrix)
395  DEALLOCATE (matrices(i, j)%matrix)
396  END DO
397  END DO
398  DEALLOCATE (matrices)
399  END IF
400  END SUBROUTINE cp_fm_release_ap2
401 
402 ! **************************************************************************************************
403 !> \brief ...
404 !> \param matrices ...
405 ! **************************************************************************************************
406  SUBROUTINE cp_fm_release_pp1(matrices)
407  TYPE(cp_fm_p_type), DIMENSION(:), POINTER :: matrices
408 
409  INTEGER :: i
410 
411  IF (ASSOCIATED(matrices)) THEN
412  DO i = 1, SIZE(matrices)
413  CALL cp_fm_release(matrices(i)%matrix)
414  DEALLOCATE (matrices(i)%matrix)
415  END DO
416  DEALLOCATE (matrices)
417  NULLIFY (matrices)
418  END IF
419  END SUBROUTINE cp_fm_release_pp1
420 
421 ! **************************************************************************************************
422 !> \brief ...
423 !> \param matrices ...
424 ! **************************************************************************************************
425  SUBROUTINE cp_fm_release_pp2(matrices)
426  TYPE(cp_fm_p_type), DIMENSION(:, :), POINTER :: matrices
427 
428  INTEGER :: i, j
429 
430  IF (ASSOCIATED(matrices)) THEN
431  DO i = 1, SIZE(matrices, 1)
432  DO j = 1, SIZE(matrices, 2)
433  CALL cp_fm_release(matrices(i, j)%matrix)
434  DEALLOCATE (matrices(i, j)%matrix)
435  END DO
436  END DO
437  DEALLOCATE (matrices)
438  NULLIFY (matrices)
439  END IF
440  END SUBROUTINE cp_fm_release_pp2
441 
442 ! **************************************************************************************************
443 !> \brief fills a matrix with random numbers
444 !> \param matrix : to be initialized
445 !> \param ncol : numbers of cols to fill
446 !> \param start_col : starting at coll number
447 !> \author Joost VandeVondele
448 !> \note
449 !> the value of a_ij is independent of the number of cpus
450 ! **************************************************************************************************
451  SUBROUTINE cp_fm_init_random(matrix, ncol, start_col)
452  TYPE(cp_fm_type), INTENT(IN) :: matrix
453  INTEGER, INTENT(IN), OPTIONAL :: ncol, start_col
454 
455  CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_init_random'
456 
457  INTEGER :: handle, icol_global, icol_local, irow_local, my_ncol, my_start_col, ncol_global, &
458  ncol_local, nrow_global, nrow_local
459  INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
460  REAL(kind=dp), ALLOCATABLE, DIMENSION(:) :: buff
461  REAL(kind=dp), CONTIGUOUS, DIMENSION(:, :), &
462  POINTER :: local_data
463  REAL(kind=dp), DIMENSION(3, 2), SAVE :: &
464  seed = reshape((/1.0_dp, 2.0_dp, 3.0_dp, 4.0_dp, 5.0_dp, 6.0_dp/), (/3, 2/))
465  TYPE(rng_stream_type) :: rng
466 
467  CALL timeset(routinen, handle)
468 
469  ! guarantee same seed over all tasks
470  CALL matrix%matrix_struct%para_env%bcast(seed, 0)
471 
472  rng = rng_stream_type("cp_fm_init_random_stream", distribution_type=uniform, &
473  extended_precision=.true., seed=seed)
474 
475  cpassert(.NOT. matrix%use_sp)
476 
477  CALL cp_fm_get_info(matrix, nrow_global=nrow_global, ncol_global=ncol_global, &
478  nrow_local=nrow_local, ncol_local=ncol_local, &
479  local_data=local_data, &
480  row_indices=row_indices, col_indices=col_indices)
481 
482  my_start_col = 1
483  IF (PRESENT(start_col)) my_start_col = start_col
484  my_ncol = matrix%matrix_struct%ncol_global
485  IF (PRESENT(ncol)) my_ncol = ncol
486 
487  IF (ncol_global < (my_start_col + my_ncol - 1)) &
488  cpabort("ncol_global>=(my_start_col+my_ncol-1)")
489 
490  ALLOCATE (buff(nrow_global))
491 
492  ! each global row has its own substream, in order to reach the stream for the local col,
493  ! we just reset to the next substream
494  ! following this, we fill the full buff with random numbers, and pick those we need
495  icol_global = 0
496  DO icol_local = 1, ncol_local
497  cpassert(col_indices(icol_local) > icol_global)
498  DO
499  CALL rng%reset_to_next_substream()
500  icol_global = icol_global + 1
501  IF (icol_global == col_indices(icol_local)) EXIT
502  END DO
503  CALL rng%fill(buff)
504  DO irow_local = 1, nrow_local
505  local_data(irow_local, icol_local) = buff(row_indices(irow_local))
506  END DO
507  END DO
508 
509  DEALLOCATE (buff)
510 
511  ! store seed before deletion (unclear if this is the proper seed)
512 
513  ! Note that, the initial state (rng%ig) instead of the current state (rng%cg) is stored in the
514  ! seed variable. As a consequence, each invocation of cp_fm_init_random uses exactly the same
515  ! stream of random numbers. While this seems odd and contrary to the original design,
516  ! it was probably introduced to improve reproducibility.
517  ! See also https://github.com/cp2k/cp2k/pull/506
518  CALL rng%get(ig=seed)
519 
520  CALL timestop(handle)
521 
522  END SUBROUTINE cp_fm_init_random
523 
524 ! **************************************************************************************************
525 !> \brief set all elements of a matrix to the same value,
526 !> and optionally the diagonal to a different one
527 !> \param matrix input matrix
528 !> \param alpha scalar used to set all elements of the matrix
529 !> \param beta scalar used to set diagonal of the matrix
530 !> \note
531 !> can be used to zero a matrix
532 !> can be used to create a unit matrix (I-matrix) alpha=0.0_dp beta=1.0_dp
533 ! **************************************************************************************************
534  SUBROUTINE cp_fm_set_all(matrix, alpha, beta)
535 
536  TYPE(cp_fm_type), INTENT(IN) :: matrix
537  REAL(kind=dp), INTENT(IN) :: alpha
538  REAL(kind=dp), INTENT(IN), OPTIONAL :: beta
539 
540  CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_set_all'
541 
542  INTEGER :: handle, i, n
543 
544  CALL timeset(routinen, handle)
545 
546  IF (matrix%use_sp) THEN
547  matrix%local_data_sp(:, :) = real(alpha, sp)
548  ELSE
549  matrix%local_data(:, :) = alpha
550  END IF
551 
552  IF (PRESENT(beta)) THEN
553  cpassert(.NOT. matrix%use_sp)
554  n = min(matrix%matrix_struct%nrow_global, matrix%matrix_struct%ncol_global)
555  DO i = 1, n
556  CALL cp_fm_set_element(matrix, i, i, beta)
557  END DO
558  END IF
559 
560  CALL timestop(handle)
561 
562  END SUBROUTINE cp_fm_set_all
563 
564 ! **************************************************************************************************
565 !> \brief returns the diagonal elements of a fm
566 !> \param matrix ...
567 !> \param diag ...
568 ! **************************************************************************************************
569  SUBROUTINE cp_fm_get_diag(matrix, diag)
570 
571  ! arguments
572  TYPE(cp_fm_type), INTENT(IN) :: matrix
573  REAL(kind=dp), DIMENSION(:), INTENT(OUT) :: diag
574 
575  ! locals
576  INTEGER :: i, nrow_global
577 
578 #if defined(__SCALAPACK)
579  INTEGER, DIMENSION(9) :: desca
580  TYPE(cp_blacs_env_type), POINTER :: context
581  INTEGER :: icol_local, ipcol, iprow, irow_local, mypcol, myprow, npcol, &
582  nprow
583  REAL(kind=dp), DIMENSION(:, :), POINTER :: a
584  REAL(kind=sp), DIMENSION(:, :), POINTER :: a_sp
585 #endif
586 
587  CALL cp_fm_get_info(matrix, nrow_global=nrow_global)
588 
589 #if defined(__SCALAPACK)
590  diag = 0.0_dp
591  context => matrix%matrix_struct%context
592  myprow = context%mepos(1)
593  mypcol = context%mepos(2)
594  nprow = context%num_pe(1)
595  npcol = context%num_pe(2)
596 
597  a => matrix%local_data
598  a_sp => matrix%local_data_sp
599  desca(:) = matrix%matrix_struct%descriptor(:)
600 
601  DO i = 1, nrow_global
602  CALL infog2l(i, i, desca, nprow, npcol, myprow, mypcol, &
603  irow_local, icol_local, iprow, ipcol)
604  IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
605  IF (matrix%use_sp) THEN
606  diag(i) = real(a_sp(irow_local, icol_local), dp)
607  ELSE
608  diag(i) = a(irow_local, icol_local)
609  END IF
610  END IF
611  END DO
612 #else
613  DO i = 1, nrow_global
614  IF (matrix%use_sp) THEN
615  diag(i) = real(matrix%local_data_sp(i, i), dp)
616  ELSE
617  diag(i) = matrix%local_data(i, i)
618  END IF
619  END DO
620 #endif
621  CALL matrix%matrix_struct%para_env%sum(diag)
622 
623  END SUBROUTINE cp_fm_get_diag
624 
625 ! **************************************************************************************************
626 !> \brief returns an element of a fm
627 !> this value is valid on every cpu
628 !> using this call is expensive
629 !> \param matrix the matrix to read
630 !> \param irow_global the row
631 !> \param icol_global the col
632 !> \param alpha the value of matrix(irow_global, icol_global)
633 !> \param local true if the element is on this cpu, false otherwise
634 !> \note
635 !> - modified semantics. now this function always returns the value
636 !> previously the value was zero on cpus that didn't own the relevant
637 !> part of the matrix (Joost VandeVondele, May 2003)
638 !> - usage of the function should be avoided, as it is likely to rather slow
639 !> using row_indices/col_indices/local_data + some smart scheme normally
640 !> yields a real parallel code
641 ! **************************************************************************************************
642  SUBROUTINE cp_fm_get_element(matrix, irow_global, icol_global, alpha, local)
643 
644  ! arguments
645  TYPE(cp_fm_type), INTENT(IN) :: matrix
646  REAL(kind=dp), INTENT(OUT) :: alpha
647  INTEGER, INTENT(IN) :: icol_global, &
648  irow_global
649  LOGICAL, INTENT(OUT), OPTIONAL :: local
650 
651  ! locals
652 #if defined(__SCALAPACK)
653  INTEGER, DIMENSION(9) :: desca
654  TYPE(cp_blacs_env_type), POINTER :: context
655  INTEGER :: icol_local, ipcol, iprow, irow_local, mypcol, myprow, npcol, &
656  nprow
657  REAL(kind=dp), DIMENSION(:, :), POINTER :: a
658 #endif
659 
660 #if defined(__SCALAPACK)
661  context => matrix%matrix_struct%context
662  myprow = context%mepos(1)
663  mypcol = context%mepos(2)
664  nprow = context%num_pe(1)
665  npcol = context%num_pe(2)
666 
667  a => matrix%local_data
668  desca(:) = matrix%matrix_struct%descriptor(:)
669 
670  CALL infog2l(irow_global, icol_global, desca, nprow, npcol, myprow, mypcol, &
671  irow_local, icol_local, iprow, ipcol)
672 
673  IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
674  alpha = a(irow_local, icol_local)
675  CALL context%dgebs2d('All', ' ', 1, 1, alpha, 1)
676  IF (PRESENT(local)) local = .true.
677  ELSE
678  CALL context%dgebr2d('All', ' ', 1, 1, alpha, 1, iprow, ipcol)
679  IF (PRESENT(local)) local = .false.
680  END IF
681 
682 #else
683  IF (PRESENT(local)) local = .true.
684  alpha = matrix%local_data(irow_global, icol_global)
685 #endif
686 
687  END SUBROUTINE cp_fm_get_element
688 
689 ! **************************************************************************************************
690 !> \brief sets an element of a matrix
691 !> \param matrix ...
692 !> \param irow_global ...
693 !> \param icol_global ...
694 !> \param alpha ...
695 !> \note
696 !> we expect all cpus to have the same arguments in the call to this function
697 !> (otherwise one should use local_data tricks)
698 ! **************************************************************************************************
699  SUBROUTINE cp_fm_set_element(matrix, irow_global, icol_global, alpha)
700  TYPE(cp_fm_type), INTENT(IN) :: matrix
701  INTEGER, INTENT(IN) :: irow_global, icol_global
702  REAL(kind=dp), INTENT(IN) :: alpha
703 
704  INTEGER :: mypcol, myprow, npcol, nprow
705  TYPE(cp_blacs_env_type), POINTER :: context
706 #if defined(__SCALAPACK)
707  INTEGER :: icol_local, ipcol, iprow, &
708  irow_local
709  INTEGER, DIMENSION(9) :: desca
710  REAL(kind=dp), DIMENSION(:, :), POINTER :: a
711 #endif
712 
713  context => matrix%matrix_struct%context
714  myprow = context%mepos(1)
715  mypcol = context%mepos(2)
716  nprow = context%num_pe(1)
717  npcol = context%num_pe(2)
718 
719  cpassert(.NOT. matrix%use_sp)
720 
721 #if defined(__SCALAPACK)
722 
723  a => matrix%local_data
724 
725  desca(:) = matrix%matrix_struct%descriptor(:)
726 
727  CALL infog2l(irow_global, icol_global, desca, nprow, npcol, myprow, mypcol, &
728  irow_local, icol_local, iprow, ipcol)
729 
730  IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
731  a(irow_local, icol_local) = alpha
732  END IF
733 
734 #else
735 
736  matrix%local_data(irow_global, icol_global) = alpha
737 
738 #endif
739  END SUBROUTINE cp_fm_set_element
740 
741 ! **************************************************************************************************
742 !> \brief sets a submatrix of a full matrix
743 !> fm(start_row:start_row+n_rows,start_col:start_col+n_cols)
744 !> = alpha*op(new_values)(1:n_rows,1:n_cols)+ beta
745 !> * fm(start_row:start_row+n_rows,start_col:start_col+n_cols)
746 !> \param fm the full to change
747 !> \param new_values a replicated full matrix with the new values
748 !> \param start_row the starting row of b_matrix (defaults to 1)
749 !> \param start_col the starting col of b_matrix (defaults to 1)
750 !> \param n_rows the number of row to change in b (defaults to
751 !> size(op(new_values),1))
752 !> \param n_cols the number of columns to change in b (defaults to
753 !> size(op(new_values),2))
754 !> \param alpha rescaling factor for the new values (defaults to 1.0)
755 !> \param beta rescaling factor for the old values (defaults to 0.0)
756 !> \param transpose if new_values should be transposed: if true
757 !> op(new_values)=new_values^T, else op(new_values)=new_values
758 !> (defaults to false)
759 !> \par History
760 !> 07.2002 created borrowing from Joost's blacs_replicated_copy [fawzi]
761 !> \author Fawzi Mohamed
762 !> \note
763 !> optimized for full column updates and alpha=1.0, beta=0.0
764 !> the new_values need to be valid on all cpus
765 ! **************************************************************************************************
766  SUBROUTINE cp_fm_set_submatrix(fm, new_values, start_row, &
767  start_col, n_rows, n_cols, alpha, beta, transpose)
768  TYPE(cp_fm_type), INTENT(IN) :: fm
769  REAL(kind=dp), DIMENSION(:, :), INTENT(in) :: new_values
770  INTEGER, INTENT(in), OPTIONAL :: start_row, start_col, n_rows, n_cols
771  REAL(kind=dp), INTENT(in), OPTIONAL :: alpha, beta
772  LOGICAL, INTENT(in), OPTIONAL :: transpose
773 
774  INTEGER :: i, i0, j, j0, ncol, ncol_block, &
775  ncol_global, ncol_local, nrow, &
776  nrow_block, nrow_global, nrow_local, &
777  this_col, this_row
778  INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
779  LOGICAL :: tr_a
780  REAL(kind=dp) :: al, be
781  REAL(kind=dp), DIMENSION(:, :), POINTER :: full_block
782 
783  al = 1.0_dp; be = 0.0_dp; i0 = 1; j0 = 1; tr_a = .false.
784  ! can be called too many times, making it a bit useless
785  ! CALL timeset(routineN//','//moduleN,handle)
786 
787  cpassert(.NOT. fm%use_sp)
788 
789  IF (PRESENT(alpha)) al = alpha
790  IF (PRESENT(beta)) be = beta
791  IF (PRESENT(start_row)) i0 = start_row
792  IF (PRESENT(start_col)) j0 = start_col
793  IF (PRESENT(transpose)) tr_a = transpose
794  IF (tr_a) THEN
795  nrow = SIZE(new_values, 2)
796  ncol = SIZE(new_values, 1)
797  ELSE
798  nrow = SIZE(new_values, 1)
799  ncol = SIZE(new_values, 2)
800  END IF
801  IF (PRESENT(n_rows)) nrow = n_rows
802  IF (PRESENT(n_cols)) ncol = n_cols
803 
804  full_block => fm%local_data
805 
806  CALL cp_fm_get_info(matrix=fm, &
807  nrow_global=nrow_global, ncol_global=ncol_global, &
808  nrow_block=nrow_block, ncol_block=ncol_block, &
809  nrow_local=nrow_local, ncol_local=ncol_local, &
810  row_indices=row_indices, col_indices=col_indices)
811 
812  IF (al == 1.0 .AND. be == 0.0) THEN
813  DO j = 1, ncol_local
814  this_col = col_indices(j) - j0 + 1
815  IF (this_col .GE. 1 .AND. this_col .LE. ncol) THEN
816  IF (tr_a) THEN
817  IF (i0 == 1 .AND. nrow_global == nrow) THEN
818  DO i = 1, nrow_local
819  full_block(i, j) = new_values(this_col, row_indices(i))
820  END DO
821  ELSE
822  DO i = 1, nrow_local
823  this_row = row_indices(i) - i0 + 1
824  IF (this_row >= 1 .AND. this_row <= nrow) THEN
825  full_block(i, j) = new_values(this_col, this_row)
826  END IF
827  END DO
828  END IF
829  ELSE
830  IF (i0 == 1 .AND. nrow_global == nrow) THEN
831  DO i = 1, nrow_local
832  full_block(i, j) = new_values(row_indices(i), this_col)
833  END DO
834  ELSE
835  DO i = 1, nrow_local
836  this_row = row_indices(i) - i0 + 1
837  IF (this_row >= 1 .AND. this_row <= nrow) THEN
838  full_block(i, j) = new_values(this_row, this_col)
839  END IF
840  END DO
841  END IF
842  END IF
843  END IF
844  END DO
845  ELSE
846  DO j = 1, ncol_local
847  this_col = col_indices(j) - j0 + 1
848  IF (this_col .GE. 1 .AND. this_col .LE. ncol) THEN
849  IF (tr_a) THEN
850  DO i = 1, nrow_local
851  this_row = row_indices(i) - i0 + 1
852  IF (this_row >= 1 .AND. this_row <= nrow) THEN
853  full_block(i, j) = al*new_values(this_col, this_row) + &
854  be*full_block(i, j)
855  END IF
856  END DO
857  ELSE
858  DO i = 1, nrow_local
859  this_row = row_indices(i) - i0 + 1
860  IF (this_row >= 1 .AND. this_row <= nrow) THEN
861  full_block(i, j) = al*new_values(this_row, this_col) + &
862  be*full_block(i, j)
863  END IF
864  END DO
865  END IF
866  END IF
867  END DO
868  END IF
869 
870  ! CALL timestop(handle)
871 
872  END SUBROUTINE cp_fm_set_submatrix
873 
874 ! **************************************************************************************************
875 !> \brief gets a submatrix of a full matrix
876 !> op(target_m)(1:n_rows,1:n_cols)
877 !> =fm(start_row:start_row+n_rows,start_col:start_col+n_cols)
878 !> target_m is replicated on all cpus
879 !> using this call is expensive
880 !> \param fm the full you want to get the info from
881 !> \param target_m a replicated full matrix that will contain the result
882 !> \param start_row the starting row of b_matrix (defaults to 1)
883 !> \param start_col the starting col of b_matrix (defaults to 1)
884 !> \param n_rows the number of row to change in b (defaults to
885 !> size(op(new_values),1))
886 !> \param n_cols the number of columns to change in b (defaults to
887 !> size(op(new_values),2))
888 !> \param transpose if target_m should be transposed: if true
889 !> op(target_m)=target_m^T, else op(target_m)=target_m
890 !> (defaults to false)
891 !> \par History
892 !> 07.2002 created borrowing from Joost's blacs_replicated_copy [fawzi]
893 !> \author Fawzi Mohamed
894 !> \note
895 !> optimized for full column updates. Zeros out a little too much
896 !> of target_m
897 !> the target_m is replicated and valid on all cpus
898 ! **************************************************************************************************
899  SUBROUTINE cp_fm_get_submatrix(fm, target_m, start_row, &
900  start_col, n_rows, n_cols, transpose)
901  TYPE(cp_fm_type), INTENT(IN) :: fm
902  REAL(kind=dp), DIMENSION(:, :), INTENT(out) :: target_m
903  INTEGER, INTENT(in), OPTIONAL :: start_row, start_col, n_rows, n_cols
904  LOGICAL, INTENT(in), OPTIONAL :: transpose
905 
906  CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_get_submatrix'
907 
908  INTEGER :: handle, i, i0, j, j0, ncol, ncol_global, &
909  ncol_local, nrow, nrow_global, &
910  nrow_local, this_col, this_row
911  INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
912  LOGICAL :: tr_a
913  REAL(kind=dp), DIMENSION(:, :), POINTER :: full_block
914  TYPE(mp_para_env_type), POINTER :: para_env
915 
916  CALL timeset(routinen, handle)
917 
918  i0 = 1; j0 = 1; tr_a = .false.
919 
920  cpassert(.NOT. fm%use_sp)
921 
922  IF (PRESENT(start_row)) i0 = start_row
923  IF (PRESENT(start_col)) j0 = start_col
924  IF (PRESENT(transpose)) tr_a = transpose
925  IF (tr_a) THEN
926  nrow = SIZE(target_m, 2)
927  ncol = SIZE(target_m, 1)
928  ELSE
929  nrow = SIZE(target_m, 1)
930  ncol = SIZE(target_m, 2)
931  END IF
932  IF (PRESENT(n_rows)) nrow = n_rows
933  IF (PRESENT(n_cols)) ncol = n_cols
934 
935  para_env => fm%matrix_struct%para_env
936 
937  full_block => fm%local_data
938 #if defined(__SCALAPACK)
939  ! zero-out whole target_m
940  IF (SIZE(target_m, 1)*SIZE(target_m, 2) /= 0) THEN
941  CALL dcopy(SIZE(target_m, 1)*SIZE(target_m, 2), [0.0_dp], 0, target_m, 1)
942  END IF
943 #endif
944 
945  CALL cp_fm_get_info(matrix=fm, &
946  nrow_global=nrow_global, ncol_global=ncol_global, &
947  nrow_local=nrow_local, ncol_local=ncol_local, &
948  row_indices=row_indices, col_indices=col_indices)
949 
950  DO j = 1, ncol_local
951  this_col = col_indices(j) - j0 + 1
952  IF (this_col .GE. 1 .AND. this_col .LE. ncol) THEN
953  IF (tr_a) THEN
954  IF (i0 == 1 .AND. nrow_global == nrow) THEN
955  DO i = 1, nrow_local
956  target_m(this_col, row_indices(i)) = full_block(i, j)
957  END DO
958  ELSE
959  DO i = 1, nrow_local
960  this_row = row_indices(i) - i0 + 1
961  IF (this_row >= 1 .AND. this_row <= nrow) THEN
962  target_m(this_col, this_row) = full_block(i, j)
963  END IF
964  END DO
965  END IF
966  ELSE
967  IF (i0 == 1 .AND. nrow_global == nrow) THEN
968  DO i = 1, nrow_local
969  target_m(row_indices(i), this_col) = full_block(i, j)
970  END DO
971  ELSE
972  DO i = 1, nrow_local
973  this_row = row_indices(i) - i0 + 1
974  IF (this_row >= 1 .AND. this_row <= nrow) THEN
975  target_m(this_row, this_col) = full_block(i, j)
976  END IF
977  END DO
978  END IF
979  END IF
980  END IF
981  END DO
982 
983  CALL para_env%sum(target_m)
984 
985  CALL timestop(handle)
986 
987  END SUBROUTINE cp_fm_get_submatrix
988 
989 ! **************************************************************************************************
990 !> \brief returns all kind of information about the full matrix
991 !> \param matrix ...
992 !> \param name ...
993 !> \param nrow_global ...
994 !> \param ncol_global ...
995 !> \param nrow_block ...
996 !> \param ncol_block ...
997 !> \param nrow_local ...
998 !> \param ncol_local ...
999 !> \param row_indices ...
1000 !> \param col_indices ...
1001 !> \param local_data ...
1002 !> \param context ...
1003 !> \param nrow_locals ...
1004 !> \param ncol_locals ...
1005 !> \param matrix_struct ...
1006 !> \param para_env ...
1007 !> \note
1008 !> see also cp_fm_struct for explanation
1009 !> - nrow_local, ncol_local, row_indices, col_indices, local_data are hooks for efficient
1010 !> access to the local blacs block
1011 ! **************************************************************************************************
1012  SUBROUTINE cp_fm_get_info(matrix, name, nrow_global, ncol_global, &
1013  nrow_block, ncol_block, nrow_local, ncol_local, &
1014  row_indices, col_indices, local_data, context, &
1015  nrow_locals, ncol_locals, matrix_struct, para_env)
1016 
1017  TYPE(cp_fm_type), INTENT(IN) :: matrix
1018  CHARACTER(LEN=*), INTENT(OUT), OPTIONAL :: name
1019  INTEGER, INTENT(OUT), OPTIONAL :: nrow_global, ncol_global, nrow_block, &
1020  ncol_block, nrow_local, ncol_local
1021  INTEGER, DIMENSION(:), OPTIONAL, POINTER :: row_indices, col_indices
1022  REAL(kind=dp), CONTIGUOUS, DIMENSION(:, :), &
1023  OPTIONAL, POINTER :: local_data
1024  TYPE(cp_blacs_env_type), OPTIONAL, POINTER :: context
1025  INTEGER, DIMENSION(:), OPTIONAL, POINTER :: nrow_locals, ncol_locals
1026  TYPE(cp_fm_struct_type), OPTIONAL, POINTER :: matrix_struct
1027  TYPE(mp_para_env_type), OPTIONAL, POINTER :: para_env
1028 
1029  IF (PRESENT(name)) name = matrix%name
1030  IF (PRESENT(matrix_struct)) matrix_struct => matrix%matrix_struct
1031  IF (PRESENT(local_data)) local_data => matrix%local_data ! not hiding things anymore :-(
1032 
1033  CALL cp_fm_struct_get(matrix%matrix_struct, nrow_local=nrow_local, &
1034  ncol_local=ncol_local, nrow_global=nrow_global, &
1035  ncol_global=ncol_global, nrow_block=nrow_block, &
1036  ncol_block=ncol_block, row_indices=row_indices, &
1037  col_indices=col_indices, nrow_locals=nrow_locals, &
1038  ncol_locals=ncol_locals, context=context, para_env=para_env)
1039 
1040  END SUBROUTINE cp_fm_get_info
1041 
1042 ! **************************************************************************************************
1043 !> \brief Write nicely formatted info about the FM to the given I/O unit (including the underlying FM struct)
1044 !> \param matrix a cp_fm_type instance
1045 !> \param io_unit the I/O unit to use for writing
1046 ! **************************************************************************************************
1047  SUBROUTINE cp_fm_write_info(matrix, io_unit)
1048  TYPE(cp_fm_type), INTENT(IN) :: matrix
1049  INTEGER, INTENT(IN) :: io_unit
1050 
1051  WRITE (io_unit, '(/,A,A12)') "CP_FM | Name: ", matrix%name
1052  CALL cp_fm_struct_write_info(matrix%matrix_struct, io_unit)
1053  END SUBROUTINE cp_fm_write_info
1054 
1055 ! **************************************************************************************************
1056 !> \brief find the maximum absolute value of the matrix element
1057 !> maxval(abs(matrix))
1058 !> \param matrix ...
1059 !> \param a_max ...
1060 !> \param ir_max ...
1061 !> \param ic_max ...
1062 ! **************************************************************************************************
1063  SUBROUTINE cp_fm_maxabsval(matrix, a_max, ir_max, ic_max)
1064  TYPE(cp_fm_type), INTENT(IN) :: matrix
1065  REAL(kind=dp), INTENT(OUT) :: a_max
1066  INTEGER, INTENT(OUT), OPTIONAL :: ir_max, ic_max
1067 
1068  CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_maxabsval'
1069 
1070  INTEGER :: handle, i, ic_max_local, ir_max_local, &
1071  j, mepos, ncol_local, nrow_local, &
1072  num_pe
1073  INTEGER, ALLOCATABLE, DIMENSION(:) :: ic_max_vec, ir_max_vec
1074  INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1075  REAL(dp) :: my_max
1076  REAL(dp), ALLOCATABLE, DIMENSION(:) :: a_max_vec
1077  REAL(kind=dp), DIMENSION(:, :), POINTER :: my_block
1078  REAL(kind=sp), DIMENSION(:, :), POINTER :: my_block_sp
1079 
1080  CALL timeset(routinen, handle)
1081 
1082  my_block => matrix%local_data
1083  my_block_sp => matrix%local_data_sp
1084 
1085  CALL cp_fm_get_info(matrix, nrow_local=nrow_local, ncol_local=ncol_local, &
1086  row_indices=row_indices, col_indices=col_indices)
1087 
1088  IF (matrix%use_sp) THEN
1089  a_max = real(maxval(abs(my_block_sp(1:nrow_local, 1:ncol_local))), dp)
1090  ELSE
1091  a_max = maxval(abs(my_block(1:nrow_local, 1:ncol_local)))
1092  END IF
1093 
1094  IF (PRESENT(ir_max)) THEN
1095  num_pe = matrix%matrix_struct%para_env%num_pe
1096  mepos = matrix%matrix_struct%para_env%mepos
1097  ALLOCATE (ir_max_vec(0:num_pe - 1))
1098  ir_max_vec(0:num_pe - 1) = 0
1099  ALLOCATE (ic_max_vec(0:num_pe - 1))
1100  ic_max_vec(0:num_pe - 1) = 0
1101  ALLOCATE (a_max_vec(0:num_pe - 1))
1102  a_max_vec(0:num_pe - 1) = 0.0_dp
1103  my_max = 0.0_dp
1104 
1105  IF ((ncol_local > 0) .AND. (nrow_local > 0)) THEN
1106  DO i = 1, ncol_local
1107  DO j = 1, nrow_local
1108  IF (matrix%use_sp) THEN
1109  IF (abs(my_block_sp(j, i)) > my_max) THEN
1110  my_max = real(my_block_sp(j, i), dp)
1111  ir_max_local = j
1112  ic_max_local = i
1113  END IF
1114  ELSE
1115  IF (abs(my_block(j, i)) > my_max) THEN
1116  my_max = my_block(j, i)
1117  ir_max_local = j
1118  ic_max_local = i
1119  END IF
1120  END IF
1121  END DO
1122  END DO
1123 
1124  a_max_vec(mepos) = my_max
1125  ir_max_vec(mepos) = row_indices(ir_max_local)
1126  ic_max_vec(mepos) = col_indices(ic_max_local)
1127 
1128  END IF
1129 
1130  CALL matrix%matrix_struct%para_env%sum(a_max_vec)
1131  CALL matrix%matrix_struct%para_env%sum(ir_max_vec)
1132  CALL matrix%matrix_struct%para_env%sum(ic_max_vec)
1133 
1134  my_max = 0.0_dp
1135  DO i = 0, num_pe - 1
1136  IF (a_max_vec(i) > my_max) THEN
1137  ir_max = ir_max_vec(i)
1138  ic_max = ic_max_vec(i)
1139  END IF
1140  END DO
1141 
1142  DEALLOCATE (ir_max_vec, ic_max_vec, a_max_vec)
1143  cpassert(ic_max > 0)
1144  cpassert(ir_max > 0)
1145 
1146  END IF
1147 
1148  CALL matrix%matrix_struct%para_env%max(a_max)
1149 
1150  CALL timestop(handle)
1151 
1152  END SUBROUTINE cp_fm_maxabsval
1153 
1154 ! **************************************************************************************************
1155 !> \brief find the maximum over the rows of the sum of the absolute values of the elements of a given row
1156 !> = || A ||_infinity
1157 !> \param matrix ...
1158 !> \param a_max ...
1159 !> \note
1160 !> for a real symmetric matrix it holds that || A ||_2 = |lambda_max| < || A ||_infinity
1161 !> Hence this can be used to estimate an upper bound for the eigenvalues of a matrix
1162 !> http://mathworld.wolfram.com/MatrixNorm.html
1163 !> (but the bound is not so tight in the general case)
1164 ! **************************************************************************************************
1165  SUBROUTINE cp_fm_maxabsrownorm(matrix, a_max)
1166  TYPE(cp_fm_type), INTENT(IN) :: matrix
1167  REAL(kind=dp), INTENT(OUT) :: a_max
1168 
1169  CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_maxabsrownorm'
1170 
1171  INTEGER :: handle, i, j, ncol_local, nrow_global, &
1172  nrow_local
1173  INTEGER, DIMENSION(:), POINTER :: row_indices
1174  REAL(kind=dp), ALLOCATABLE, DIMENSION(:) :: values
1175  REAL(kind=dp), DIMENSION(:, :), POINTER :: my_block
1176 
1177  CALL timeset(routinen, handle)
1178 
1179  my_block => matrix%local_data
1180 
1181  cpassert(.NOT. matrix%use_sp)
1182 
1183  CALL cp_fm_get_info(matrix, row_indices=row_indices, nrow_global=nrow_global, &
1184  nrow_local=nrow_local, ncol_local=ncol_local)
1185 
1186  ! the efficiency could be improved by making use of the row-col distribution of scalapack
1187  ALLOCATE (values(nrow_global))
1188  values = 0.0_dp
1189  DO j = 1, ncol_local
1190  DO i = 1, nrow_local
1191  values(row_indices(i)) = values(row_indices(i)) + abs(my_block(i, j))
1192  END DO
1193  END DO
1194  CALL matrix%matrix_struct%para_env%sum(values)
1195  a_max = maxval(values)
1196  DEALLOCATE (values)
1197 
1198  CALL timestop(handle)
1199  END SUBROUTINE
1200 
1201 ! **************************************************************************************************
1202 !> \brief find the inorm of each column norm_{j}= sqrt( \sum_{i} A_{ij}*A_{ij} )
1203 !> \param matrix ...
1204 !> \param norm_array ...
1205 ! **************************************************************************************************
1206  SUBROUTINE cp_fm_vectorsnorm(matrix, norm_array)
1207  TYPE(cp_fm_type), INTENT(IN) :: matrix
1208  REAL(kind=dp), DIMENSION(:), INTENT(OUT) :: norm_array
1209 
1210  CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_vectorsnorm'
1211 
1212  INTEGER :: handle, i, j, ncol_global, ncol_local, &
1213  nrow_local
1214  INTEGER, DIMENSION(:), POINTER :: col_indices
1215  REAL(kind=dp), DIMENSION(:, :), POINTER :: my_block
1216 
1217  CALL timeset(routinen, handle)
1218 
1219  my_block => matrix%local_data
1220 
1221  cpassert(.NOT. matrix%use_sp)
1222 
1223  CALL cp_fm_get_info(matrix, col_indices=col_indices, ncol_global=ncol_global, &
1224  nrow_local=nrow_local, ncol_local=ncol_local)
1225 
1226  ! the efficiency could be improved by making use of the row-col distribution of scalapack
1227  norm_array = 0.0_dp
1228  DO j = 1, ncol_local
1229  DO i = 1, nrow_local
1230  norm_array(col_indices(j)) = norm_array(col_indices(j)) + my_block(i, j)*my_block(i, j)
1231  END DO
1232  END DO
1233  CALL matrix%matrix_struct%para_env%sum(norm_array)
1234  norm_array = sqrt(norm_array)
1235 
1236  CALL timestop(handle)
1237  END SUBROUTINE
1238 
1239 ! **************************************************************************************************
1240 !> \brief summing up all the elements along the matrix's i-th index
1241 !> \f$ \mathrm{sum}_{j} = \sum_{i} A_{ij} \f$
1242 !> or
1243 !> \f$ \mathrm{sum}_{i} = \sum_{j} A_{ij} \f$
1244 !> \param matrix an input matrix A
1245 !> \param sum_array sums of elements in each column/row
1246 !> \param dir ...
1247 !> \note forked from cp_fm_vectorsnorm() to be used with
1248 !> the maximum overlap method
1249 !> added row variation
1250 ! **************************************************************************************************
1251  SUBROUTINE cp_fm_vectorssum(matrix, sum_array, dir)
1252  TYPE(cp_fm_type), INTENT(IN) :: matrix
1253  REAL(kind=dp), DIMENSION(:), INTENT(OUT) :: sum_array
1254  CHARACTER(LEN=1), INTENT(IN), OPTIONAL :: dir
1255 
1256  CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_vectorssum'
1257 
1258  INTEGER :: handle, i, j, ncol_local, nrow_local
1259  INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1260  LOGICAL :: docol
1261  REAL(kind=dp), DIMENSION(:, :), POINTER :: my_block
1262 
1263  CALL timeset(routinen, handle)
1264 
1265  IF (PRESENT(dir)) THEN
1266  IF (dir == 'c' .OR. dir == 'C') THEN
1267  docol = .true.
1268  ELSEIF (dir == 'r' .OR. dir == 'R') THEN
1269  docol = .false.
1270  ELSE
1271  cpabort('Wrong argument DIR')
1272  END IF
1273  ELSE
1274  docol = .true.
1275  END IF
1276 
1277  my_block => matrix%local_data
1278 
1279  cpassert(.NOT. matrix%use_sp)
1280 
1281  CALL cp_fm_get_info(matrix, col_indices=col_indices, row_indices=row_indices, &
1282  nrow_local=nrow_local, ncol_local=ncol_local)
1283 
1284  ! the efficiency could be improved by making use of the row-col distribution of scalapack
1285  sum_array(:) = 0.0_dp
1286  IF (docol) THEN
1287  DO j = 1, ncol_local
1288  DO i = 1, nrow_local
1289  sum_array(col_indices(j)) = sum_array(col_indices(j)) + my_block(i, j)
1290  END DO
1291  END DO
1292  ELSE
1293  DO j = 1, ncol_local
1294  DO i = 1, nrow_local
1295  sum_array(row_indices(i)) = sum_array(row_indices(i)) + my_block(i, j)
1296  END DO
1297  END DO
1298  END IF
1299  CALL matrix%matrix_struct%para_env%sum(sum_array)
1300 
1301  CALL timestop(handle)
1302  END SUBROUTINE
1303 
1304 ! **************************************************************************************************
1305 !> \brief copy one identically sized matrix in the other
1306 !> \param source ...
1307 !> \param destination ...
1308 !> \note
1309 !> see also cp_fm_to_fm_columns
1310 ! **************************************************************************************************
1311  SUBROUTINE cp_fm_to_fm_matrix(source, destination)
1312 
1313  TYPE(cp_fm_type), INTENT(IN) :: source, destination
1314 
1315  CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_to_fm_matrix'
1316 
1317  INTEGER :: handle, npcol, nprow
1318 
1319  CALL timeset(routinen, handle)
1320 
1321  nprow = source%matrix_struct%context%num_pe(1)
1322  npcol = source%matrix_struct%context%num_pe(2)
1323 
1324  IF ((.NOT. cp2k_is_parallel) .OR. &
1325  cp_fm_struct_equivalent(source%matrix_struct, &
1326  destination%matrix_struct)) THEN
1327  IF (source%use_sp .AND. destination%use_sp) THEN
1328  IF (SIZE(source%local_data_sp, 1) /= SIZE(destination%local_data_sp, 1) .OR. &
1329  SIZE(source%local_data_sp, 2) /= SIZE(destination%local_data_sp, 2)) &
1330  CALL cp_abort(__location__, &
1331  "Cannot copy full matrix <"//trim(source%name)// &
1332  "> to full matrix <"//trim(destination%name)// &
1333  ">. The local_data blocks have different sizes.")
1334  CALL scopy(SIZE(source%local_data_sp, 1)*SIZE(source%local_data_sp, 2), &
1335  source%local_data_sp, 1, destination%local_data_sp, 1)
1336  ELSE IF (source%use_sp .AND. .NOT. destination%use_sp) THEN
1337  IF (SIZE(source%local_data_sp, 1) /= SIZE(destination%local_data, 1) .OR. &
1338  SIZE(source%local_data_sp, 2) /= SIZE(destination%local_data, 2)) &
1339  CALL cp_abort(__location__, &
1340  "Cannot copy full matrix <"//trim(source%name)// &
1341  "> to full matrix <"//trim(destination%name)// &
1342  ">. The local_data blocks have different sizes.")
1343  destination%local_data = real(source%local_data_sp, dp)
1344  ELSE IF (.NOT. source%use_sp .AND. destination%use_sp) THEN
1345  IF (SIZE(source%local_data, 1) /= SIZE(destination%local_data_sp, 1) .OR. &
1346  SIZE(source%local_data, 2) /= SIZE(destination%local_data_sp, 2)) &
1347  CALL cp_abort(__location__, &
1348  "Cannot copy full matrix <"//trim(source%name)// &
1349  "> to full matrix <"//trim(destination%name)// &
1350  ">. The local_data blocks have different sizes.")
1351  destination%local_data_sp = real(source%local_data, sp)
1352  ELSE
1353  IF (SIZE(source%local_data, 1) /= SIZE(destination%local_data, 1) .OR. &
1354  SIZE(source%local_data, 2) /= SIZE(destination%local_data, 2)) &
1355  CALL cp_abort(__location__, &
1356  "Cannot copy full matrix <"//trim(source%name)// &
1357  "> to full matrix <"//trim(destination%name)// &
1358  ">. The local_data blocks have different sizes.")
1359  CALL dcopy(SIZE(source%local_data, 1)*SIZE(source%local_data, 2), &
1360  source%local_data, 1, destination%local_data, 1)
1361  END IF
1362  ELSE
1363  cpabort("Data structures of source and target full matrix are not equivalent")
1364  END IF
1365 
1366  CALL timestop(handle)
1367 
1368  END SUBROUTINE cp_fm_to_fm_matrix
1369 
1370 ! **************************************************************************************************
1371 !> \brief copy just a subset of columns of a fm to a fm
1372 !> \param msource ...
1373 !> \param mtarget ...
1374 !> \param ncol ...
1375 !> \param source_start ...
1376 !> \param target_start ...
1377 ! **************************************************************************************************
1378  SUBROUTINE cp_fm_to_fm_columns(msource, mtarget, ncol, source_start, &
1379  target_start)
1380 
1381  TYPE(cp_fm_type), INTENT(IN) :: msource, mtarget
1382  INTEGER, INTENT(IN) :: ncol
1383  INTEGER, INTENT(IN), OPTIONAL :: source_start, target_start
1384 
1385  CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_to_fm_columns'
1386 
1387  INTEGER :: handle, n, ss, ts
1388  REAL(kind=dp), DIMENSION(:, :), POINTER :: a, b
1389 #if defined(__SCALAPACK)
1390  INTEGER :: i
1391  INTEGER, DIMENSION(9) :: desca, descb
1392 #endif
1393 
1394  CALL timeset(routinen, handle)
1395 
1396  ss = 1
1397  ts = 1
1398 
1399  IF (PRESENT(source_start)) ss = source_start
1400  IF (PRESENT(target_start)) ts = target_start
1401 
1402  n = msource%matrix_struct%nrow_global
1403 
1404  a => msource%local_data
1405  b => mtarget%local_data
1406 
1407 #if defined(__SCALAPACK)
1408  desca(:) = msource%matrix_struct%descriptor(:)
1409  descb(:) = mtarget%matrix_struct%descriptor(:)
1410  DO i = 0, ncol - 1
1411  CALL pdcopy(n, a, 1, ss + i, desca, 1, b, 1, ts + i, descb, 1)
1412  END DO
1413 #else
1414  CALL dcopy(ncol*n, a(:, ss), 1, b(:, ts), 1)
1415 #endif
1416 
1417  CALL timestop(handle)
1418 
1419  END SUBROUTINE cp_fm_to_fm_columns
1420 
1421 ! **************************************************************************************************
1422 !> \brief copy just a triangular matrix
1423 !> \param msource ...
1424 !> \param mtarget ...
1425 !> \param uplo ...
1426 ! **************************************************************************************************
1427  SUBROUTINE cp_fm_to_fm_triangular(msource, mtarget, uplo)
1428 
1429  TYPE(cp_fm_type), INTENT(IN) :: msource, mtarget
1430  CHARACTER(LEN=*), INTENT(IN) :: uplo
1431 
1432  CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_to_fm_triangular'
1433 
1434  INTEGER :: handle, ncol, nrow
1435  REAL(kind=dp), DIMENSION(:, :), POINTER :: a, b
1436 #if defined(__SCALAPACK)
1437  INTEGER, DIMENSION(9) :: desca, descb
1438 #endif
1439 
1440  CALL timeset(routinen, handle)
1441 
1442  nrow = msource%matrix_struct%nrow_global
1443  ncol = msource%matrix_struct%ncol_global
1444 
1445  a => msource%local_data
1446  b => mtarget%local_data
1447 
1448 #if defined(__SCALAPACK)
1449  desca(:) = msource%matrix_struct%descriptor(:)
1450  descb(:) = mtarget%matrix_struct%descriptor(:)
1451  CALL pdlacpy(uplo, nrow, ncol, a(1, 1), 1, 1, desca, b(1, 1), 1, 1, descb)
1452 #else
1453  CALL dlacpy(uplo, nrow, ncol, a(1, 1), nrow, b(1, 1), nrow)
1454 #endif
1455 
1456  CALL timestop(handle)
1457 
1458  END SUBROUTINE cp_fm_to_fm_triangular
1459 
1460 ! **************************************************************************************************
1461 !> \brief copy just a part ot the matrix
1462 !> \param msource ...
1463 !> \param mtarget ...
1464 !> \param nrow ...
1465 !> \param ncol ...
1466 !> \param s_firstrow ...
1467 !> \param s_firstcol ...
1468 !> \param t_firstrow ...
1469 !> \param t_firstcol ...
1470 ! **************************************************************************************************
1471 
1472  SUBROUTINE cp_fm_to_fm_submat(msource, mtarget, nrow, ncol, s_firstrow, s_firstcol, t_firstrow, t_firstcol)
1473 
1474  TYPE(cp_fm_type), INTENT(IN) :: msource, mtarget
1475  INTEGER, INTENT(IN) :: nrow, ncol, s_firstrow, &
1476  s_firstcol, t_firstrow, &
1477  t_firstcol
1478 
1479  CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_to_fm_submat'
1480 
1481  INTEGER :: handle, i, na, nb, ss, ts
1482  REAL(kind=dp), DIMENSION(:, :), POINTER :: a, b
1483 #if defined(__SCALAPACK)
1484  INTEGER, DIMENSION(9) :: desca, descb
1485 #endif
1486 
1487  CALL timeset(routinen, handle)
1488 
1489  a => msource%local_data
1490  b => mtarget%local_data
1491 
1492  na = msource%matrix_struct%nrow_global
1493  nb = mtarget%matrix_struct%nrow_global
1494 ! nrow must be <= na and nb
1495  IF (nrow > na) &
1496  cpabort("cannot copy because nrow > number of rows of source matrix")
1497  IF (nrow > nb) &
1498  cpabort("cannot copy because nrow > number of rows of target matrix")
1499  na = msource%matrix_struct%ncol_global
1500  nb = mtarget%matrix_struct%ncol_global
1501 ! ncol must be <= na_col and nb_col
1502  IF (ncol > na) &
1503  cpabort("cannot copy because nrow > number of rows of source matrix")
1504  IF (ncol > nb) &
1505  cpabort("cannot copy because nrow > number of rows of target matrix")
1506 
1507 #if defined(__SCALAPACK)
1508  desca(:) = msource%matrix_struct%descriptor(:)
1509  descb(:) = mtarget%matrix_struct%descriptor(:)
1510  DO i = 0, ncol - 1
1511  ss = s_firstcol + i
1512  ts = t_firstcol + i
1513  CALL pdcopy(nrow, a, s_firstrow, ss, desca, 1, b, t_firstrow, ts, descb, 1)
1514  END DO
1515 #else
1516  DO i = 0, ncol - 1
1517  ss = s_firstcol + i
1518  ts = t_firstcol + i
1519  CALL dcopy(nrow, a(s_firstrow:, ss), 1, b(t_firstrow:, ts), 1)
1520  END DO
1521 #endif
1522 
1523  CALL timestop(handle)
1524  END SUBROUTINE cp_fm_to_fm_submat
1525 
1526 ! **************************************************************************************************
1527 !> \brief General copy of a fm matrix to another fm matrix.
1528 !> Uses non-blocking MPI rather than ScaLAPACK.
1529 !>
1530 !> \param source input fm matrix
1531 !> \param destination output fm matrix
1532 !> \param para_env parallel environment corresponding to the BLACS env that covers all parts
1533 !> of the input and output matrices
1534 !> \par History
1535 !> 31-Jan-2017 : Re-implemented using non-blocking MPI [IainB, MarkT]
1536 ! **************************************************************************************************
1537  SUBROUTINE cp_fm_copy_general(source, destination, para_env)
1538  TYPE(cp_fm_type), INTENT(IN) :: source, destination
1539  TYPE(mp_para_env_type), INTENT(IN) :: para_env
1540 
1541  CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_copy_general'
1542 
1543  INTEGER :: handle
1544  TYPE(copy_info_type) :: info
1545 
1546  CALL timeset(routinen, handle)
1547 
1548  CALL cp_fm_start_copy_general(source, destination, para_env, info)
1549  IF (ASSOCIATED(destination%matrix_struct)) THEN
1550  CALL cp_fm_finish_copy_general(destination, info)
1551  END IF
1552  IF (ASSOCIATED(source%matrix_struct)) THEN
1553  CALL cp_fm_cleanup_copy_general(info)
1554  END IF
1555 
1556  CALL timestop(handle)
1557  END SUBROUTINE cp_fm_copy_general
1558 
1559 ! **************************************************************************************************
1560 !> \brief Initiates the copy operation: get distribution data, post MPI isend and irecvs
1561 !> \param source input fm matrix
1562 !> \param destination output fm matrix
1563 !> \param para_env parallel environment corresponding to the BLACS env that covers all parts
1564 !> of the input and output matrices
1565 !> \param info all of the data that will be needed to complete the copy operation
1566 ! **************************************************************************************************
1567  SUBROUTINE cp_fm_start_copy_general(source, destination, para_env, info)
1568  TYPE(cp_fm_type), INTENT(IN) :: source, destination
1569  TYPE(mp_para_env_type), INTENT(IN) :: para_env
1570  TYPE(copy_info_type), INTENT(OUT) :: info
1571 
1572  CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_start_copy_general'
1573 
1574  INTEGER :: dest_p_i, dest_q_j, global_rank, global_size, handle, i, j, k, mpi_rank, &
1575  ncol_block_dest, ncol_block_src, ncol_local_recv, ncol_local_send, ncols, &
1576  nrow_block_dest, nrow_block_src, nrow_local_recv, nrow_local_send, nrows, p, q, &
1577  recv_rank, recv_size, send_rank, send_size
1578  INTEGER, ALLOCATABLE, DIMENSION(:) :: all_ranks, dest2global, dest_p, dest_q, &
1579  recv_count, send_count, send_disp, &
1580  source2global, src_p, src_q
1581  INTEGER, ALLOCATABLE, DIMENSION(:, :) :: dest_blacs2mpi
1582  INTEGER, DIMENSION(2) :: dest_block, dest_block_tmp, dest_num_pe, &
1583  src_block, src_block_tmp, src_num_pe
1584  INTEGER, DIMENSION(:), POINTER :: recv_col_indices, recv_row_indices, &
1585  send_col_indices, send_row_indices
1586  TYPE(cp_fm_struct_type), POINTER :: recv_dist, send_dist
1587  TYPE(mp_request_type), DIMENSION(6) :: recv_req, send_req
1588 
1589  CALL timeset(routinen, handle)
1590 
1591  IF (.NOT. cp2k_is_parallel) THEN
1592  ! Just copy all of the matrix data into a 'send buffer', to be unpacked later
1593  nrow_local_send = SIZE(source%local_data, 1)
1594  ncol_local_send = SIZE(source%local_data, 2)
1595  ALLOCATE (info%send_buf(nrow_local_send*ncol_local_send))
1596  k = 0
1597  DO j = 1, ncol_local_send
1598  DO i = 1, nrow_local_send
1599  k = k + 1
1600  info%send_buf(k) = source%local_data(i, j)
1601  END DO
1602  END DO
1603  ELSE
1604  ! assumption of double precision data is carried forward from ScaLAPACK version
1605  IF (source%use_sp) cpabort("only DP kind implemented")
1606  IF (destination%use_sp) cpabort("only DP kind implemented")
1607 
1608  NULLIFY (recv_dist, send_dist)
1609  NULLIFY (recv_col_indices, recv_row_indices, send_col_indices, send_row_indices)
1610 
1611  ! The 'global' communicator contains both the source and destination decompositions
1612  global_size = para_env%num_pe
1613  global_rank = para_env%mepos
1614 
1615  ! The source/send decomposition and destination/recv decompositions may only exist on
1616  ! on a subset of the processes involved in the communication
1617  ! Check if the source and/or destination arguments are .not. ASSOCIATED():
1618  ! if so, skip the send / recv parts (since these processes do not participate in the sending/receiving distribution)
1619  IF (ASSOCIATED(destination%matrix_struct)) THEN
1620  recv_dist => destination%matrix_struct
1621  recv_rank = recv_dist%para_env%mepos
1622  ELSE
1623  recv_rank = mp_proc_null
1624  END IF
1625 
1626  IF (ASSOCIATED(source%matrix_struct)) THEN
1627  send_dist => source%matrix_struct
1628  send_rank = send_dist%para_env%mepos
1629  ELSE
1630  send_rank = mp_proc_null
1631  END IF
1632 
1633  ! Map the rank in the source/dest communicator to the global rank
1634  ALLOCATE (all_ranks(0:global_size - 1))
1635 
1636  CALL para_env%allgather(send_rank, all_ranks)
1637  IF (ASSOCIATED(recv_dist)) THEN
1638  ALLOCATE (source2global(0:count(all_ranks .NE. mp_proc_null) - 1))
1639  DO i = 0, global_size - 1
1640  IF (all_ranks(i) .NE. mp_proc_null) THEN
1641  source2global(all_ranks(i)) = i
1642  END IF
1643  END DO
1644  END IF
1645 
1646  CALL para_env%allgather(recv_rank, all_ranks)
1647  IF (ASSOCIATED(send_dist)) THEN
1648  ALLOCATE (dest2global(0:count(all_ranks .NE. mp_proc_null) - 1))
1649  DO i = 0, global_size - 1
1650  IF (all_ranks(i) .NE. mp_proc_null) THEN
1651  dest2global(all_ranks(i)) = i
1652  END IF
1653  END DO
1654  END IF
1655  DEALLOCATE (all_ranks)
1656 
1657  ! Some data from the two decompositions will be needed by all processes in the global group :
1658  ! process grid shape, block size, and the BLACS-to-MPI mapping
1659 
1660  ! The global root process will receive the data (from the root process in each decomposition)
1661  send_req(:) = mp_request_null
1662  IF (global_rank == 0) THEN
1663  recv_req(:) = mp_request_null
1664  CALL para_env%irecv(src_block, mp_any_source, recv_req(1), tag=src_tag)
1665  CALL para_env%irecv(dest_block, mp_any_source, recv_req(2), tag=dest_tag)
1666  CALL para_env%irecv(src_num_pe, mp_any_source, recv_req(3), tag=src_tag)
1667  CALL para_env%irecv(dest_num_pe, mp_any_source, recv_req(4), tag=dest_tag)
1668  END IF
1669 
1670  IF (ASSOCIATED(send_dist)) THEN
1671  IF ((send_rank .EQ. 0)) THEN
1672  ! need to use separate buffers here in case this is actually global rank 0
1673  src_block_tmp = (/send_dist%nrow_block, send_dist%ncol_block/)
1674  CALL para_env%isend(src_block_tmp, 0, send_req(1), tag=src_tag)
1675  CALL para_env%isend(send_dist%context%num_pe, 0, send_req(2), tag=src_tag)
1676  END IF
1677  END IF
1678 
1679  IF (ASSOCIATED(recv_dist)) THEN
1680  IF ((recv_rank .EQ. 0)) THEN
1681  dest_block_tmp = (/recv_dist%nrow_block, recv_dist%ncol_block/)
1682  CALL para_env%isend(dest_block_tmp, 0, send_req(3), tag=dest_tag)
1683  CALL para_env%isend(recv_dist%context%num_pe, 0, send_req(4), tag=dest_tag)
1684  END IF
1685  END IF
1686 
1687  IF (global_rank == 0) THEN
1688  CALL mp_waitall(recv_req(1:4))
1689  ! Now we know the process decomposition, we can allocate the arrays to hold the blacs2mpi mapping
1690  ALLOCATE (info%src_blacs2mpi(0:src_num_pe(1) - 1, 0:src_num_pe(2) - 1), &
1691  dest_blacs2mpi(0:dest_num_pe(1) - 1, 0:dest_num_pe(2) - 1) &
1692  )
1693  CALL para_env%irecv(info%src_blacs2mpi, mp_any_source, recv_req(5), tag=src_tag)
1694  CALL para_env%irecv(dest_blacs2mpi, mp_any_source, recv_req(6), tag=dest_tag)
1695  END IF
1696 
1697  IF (ASSOCIATED(send_dist)) THEN
1698  IF ((send_rank .EQ. 0)) THEN
1699  CALL para_env%isend(send_dist%context%blacs2mpi(:, :), 0, send_req(5), tag=src_tag)
1700  END IF
1701  END IF
1702 
1703  IF (ASSOCIATED(recv_dist)) THEN
1704  IF ((recv_rank .EQ. 0)) THEN
1705  CALL para_env%isend(recv_dist%context%blacs2mpi(:, :), 0, send_req(6), tag=dest_tag)
1706  END IF
1707  END IF
1708 
1709  IF (global_rank == 0) THEN
1710  CALL mp_waitall(recv_req(5:6))
1711  END IF
1712 
1713  ! Finally, broadcast the data to all processes in the global communicator
1714  CALL para_env%bcast(src_block, 0)
1715  CALL para_env%bcast(dest_block, 0)
1716  CALL para_env%bcast(src_num_pe, 0)
1717  CALL para_env%bcast(dest_num_pe, 0)
1718  info%src_num_pe(1:2) = src_num_pe(1:2)
1719  info%nblock_src(1:2) = src_block(1:2)
1720  IF (global_rank .NE. 0) THEN
1721  ALLOCATE (info%src_blacs2mpi(0:src_num_pe(1) - 1, 0:src_num_pe(2) - 1), &
1722  dest_blacs2mpi(0:dest_num_pe(1) - 1, 0:dest_num_pe(2) - 1) &
1723  )
1724  END IF
1725  CALL para_env%bcast(info%src_blacs2mpi, 0)
1726  CALL para_env%bcast(dest_blacs2mpi, 0)
1727 
1728  recv_size = dest_num_pe(1)*dest_num_pe(2)
1729  send_size = src_num_pe(1)*src_num_pe(2)
1730  info%send_size = send_size
1731  CALL mp_waitall(send_req(:))
1732 
1733  ! Setup is now complete, we can start the actual communication here.
1734  ! The order implemented here is:
1735  ! DEST_1
1736  ! compute recv sizes
1737  ! call irecv
1738  ! SRC_1
1739  ! compute send sizes
1740  ! pack send buffers
1741  ! call isend
1742  ! DEST_2
1743  ! wait for the recvs and unpack buffers (this part eventually will go into another routine to allow comms to run concurrently)
1744  ! SRC_2
1745  ! wait for the sends
1746 
1747  ! DEST_1
1748  IF (ASSOCIATED(recv_dist)) THEN
1749  CALL cp_fm_struct_get(recv_dist, row_indices=recv_row_indices, &
1750  col_indices=recv_col_indices &
1751  )
1752  info%recv_col_indices => recv_col_indices
1753  info%recv_row_indices => recv_row_indices
1754  nrow_block_src = src_block(1)
1755  ncol_block_src = src_block(2)
1756  ALLOCATE (recv_count(0:send_size - 1), info%recv_disp(0:send_size - 1), info%recv_request(0:send_size - 1))
1757 
1758  ! Determine the recv counts, allocate the receive buffers, call mpi_irecv for all the non-zero sized receives
1759  nrow_local_recv = recv_dist%nrow_locals(recv_dist%context%mepos(1))
1760  ncol_local_recv = recv_dist%ncol_locals(recv_dist%context%mepos(2))
1761  info%nlocal_recv(1) = nrow_local_recv
1762  info%nlocal_recv(2) = ncol_local_recv
1763  ! Initialise src_p, src_q arrays (sized using number of rows/cols in the receiving distribution)
1764  ALLOCATE (src_p(nrow_local_recv), src_q(ncol_local_recv))
1765  DO i = 1, nrow_local_recv
1766  ! For each local row we will receive, we look up its global row (in recv_row_indices),
1767  ! then work out which row block it comes from, and which process row that row block comes from.
1768  src_p(i) = mod(((recv_row_indices(i) - 1)/nrow_block_src), src_num_pe(1))
1769  END DO
1770  DO j = 1, ncol_local_recv
1771  ! Similarly for the columns
1772  src_q(j) = mod(((recv_col_indices(j) - 1)/ncol_block_src), src_num_pe(2))
1773  END DO
1774  ! src_p/q now contains the process row/column ID that will send data to that row/column
1775 
1776  DO q = 0, src_num_pe(2) - 1
1777  ncols = count(src_q .EQ. q)
1778  DO p = 0, src_num_pe(1) - 1
1779  nrows = count(src_p .EQ. p)
1780  ! Use the send_dist here as we are looking up the processes where the data comes from
1781  recv_count(info%src_blacs2mpi(p, q)) = nrows*ncols
1782  END DO
1783  END DO
1784  DEALLOCATE (src_p, src_q)
1785 
1786  ! Use one long buffer (and displacements into that buffer)
1787  ! this prevents the need for a rectangular array where not all elements will be populated
1788  ALLOCATE (info%recv_buf(sum(recv_count(:))))
1789  info%recv_disp(0) = 0
1790  DO i = 1, send_size - 1
1791  info%recv_disp(i) = info%recv_disp(i - 1) + recv_count(i - 1)
1792  END DO
1793 
1794  ! Issue receive calls on ranks which expect data
1795  DO k = 0, send_size - 1
1796  IF (recv_count(k) .GT. 0) THEN
1797  CALL para_env%irecv(info%recv_buf(info%recv_disp(k) + 1:info%recv_disp(k) + recv_count(k)), &
1798  source2global(k), info%recv_request(k))
1799  END IF
1800  END DO
1801  DEALLOCATE (source2global)
1802  END IF ! ASSOCIATED(recv_dist)
1803 
1804  ! SRC_1
1805  IF (ASSOCIATED(send_dist)) THEN
1806  CALL cp_fm_struct_get(send_dist, row_indices=send_row_indices, &
1807  col_indices=send_col_indices &
1808  )
1809  nrow_block_dest = dest_block(1)
1810  ncol_block_dest = dest_block(2)
1811  ALLOCATE (send_count(0:recv_size - 1), send_disp(0:recv_size - 1), info%send_request(0:recv_size - 1))
1812 
1813  ! Determine the send counts, allocate the send buffers
1814  nrow_local_send = send_dist%nrow_locals(send_dist%context%mepos(1))
1815  ncol_local_send = send_dist%ncol_locals(send_dist%context%mepos(2))
1816 
1817  ! Initialise dest_p, dest_q arrays (sized nrow_local, ncol_local)
1818  ! i.e. number of rows,cols in the sending distribution
1819  ALLOCATE (dest_p(nrow_local_send), dest_q(ncol_local_send))
1820 
1821  DO i = 1, nrow_local_send
1822  ! Use the send_dist%row_indices() here (we are looping over the local rows we will send)
1823  dest_p(i) = mod(((send_row_indices(i) - 1)/nrow_block_dest), dest_num_pe(1))
1824  END DO
1825  DO j = 1, ncol_local_send
1826  dest_q(j) = mod(((send_col_indices(j) - 1)/ncol_block_dest), dest_num_pe(2))
1827  END DO
1828  ! dest_p/q now contain the process row/column ID that will receive data from that row/column
1829 
1830  DO q = 0, dest_num_pe(2) - 1
1831  ncols = count(dest_q .EQ. q)
1832  DO p = 0, dest_num_pe(1) - 1
1833  nrows = count(dest_p .EQ. p)
1834  send_count(dest_blacs2mpi(p, q)) = nrows*ncols
1835  END DO
1836  END DO
1837  DEALLOCATE (dest_p, dest_q)
1838 
1839  ! Allocate the send buffer using send_count -- and calculate the offset into the buffer for each process
1840  ALLOCATE (info%send_buf(sum(send_count(:))))
1841  send_disp(0) = 0
1842  DO k = 1, recv_size - 1
1843  send_disp(k) = send_disp(k - 1) + send_count(k - 1)
1844  END DO
1845 
1846  ! Loop over the smat, pack the send buffers
1847  send_count(:) = 0
1848  DO j = 1, ncol_local_send
1849  ! Use send_col_indices and row_indices here, as we are looking up the global row/column number of local rows.
1850  dest_q_j = mod(((send_col_indices(j) - 1)/ncol_block_dest), dest_num_pe(2))
1851  DO i = 1, nrow_local_send
1852  dest_p_i = mod(((send_row_indices(i) - 1)/nrow_block_dest), dest_num_pe(1))
1853  mpi_rank = dest_blacs2mpi(dest_p_i, dest_q_j)
1854  send_count(mpi_rank) = send_count(mpi_rank) + 1
1855  info%send_buf(send_disp(mpi_rank) + send_count(mpi_rank)) = source%local_data(i, j)
1856  END DO
1857  END DO
1858 
1859  ! For each non-zero send_count, call mpi_isend
1860  DO k = 0, recv_size - 1
1861  IF (send_count(k) .GT. 0) THEN
1862  CALL para_env%isend(info%send_buf(send_disp(k) + 1:send_disp(k) + send_count(k)), &
1863  dest2global(k), info%send_request(k))
1864  END IF
1865  END DO
1866  DEALLOCATE (send_count, send_disp, dest2global)
1867  END IF ! ASSOCIATED(send_dist)
1868  DEALLOCATE (dest_blacs2mpi)
1869 
1870  END IF !IF (.NOT. cp2k_is_parallel)
1871 
1872  CALL timestop(handle)
1873 
1874  END SUBROUTINE cp_fm_start_copy_general
1875 
1876 ! **************************************************************************************************
1877 !> \brief Completes the copy operation: wait for comms, unpack, clean up MPI state
1878 !> \param destination output fm matrix
1879 !> \param info all of the data that will be needed to complete the copy operation
1880 ! **************************************************************************************************
1881  SUBROUTINE cp_fm_finish_copy_general(destination, info)
1882  TYPE(cp_fm_type), INTENT(IN) :: destination
1883  TYPE(copy_info_type), INTENT(INOUT) :: info
1884 
1885  CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_finish_copy_general'
1886 
1887  INTEGER :: handle, i, j, k, mpi_rank, send_size, &
1888  src_p_i, src_q_j
1889  INTEGER, ALLOCATABLE, DIMENSION(:) :: recv_count
1890  INTEGER, DIMENSION(2) :: nblock_src, nlocal_recv, src_num_pe
1891  INTEGER, DIMENSION(:), POINTER :: recv_col_indices, recv_row_indices
1892 
1893  CALL timeset(routinen, handle)
1894 
1895  IF (.NOT. cp2k_is_parallel) THEN
1896  ! Now unpack the data from the 'send buffer'
1897  k = 0
1898  DO j = 1, SIZE(destination%local_data, 2)
1899  DO i = 1, SIZE(destination%local_data, 1)
1900  k = k + 1
1901  destination%local_data(i, j) = info%send_buf(k)
1902  END DO
1903  END DO
1904  DEALLOCATE (info%send_buf)
1905  ELSE
1906  ! Set up local variables ...
1907  send_size = info%send_size
1908  nlocal_recv(1:2) = info%nlocal_recv(:)
1909  nblock_src(1:2) = info%nblock_src(:)
1910  src_num_pe(1:2) = info%src_num_pe(:)
1911  recv_col_indices => info%recv_col_indices
1912  recv_row_indices => info%recv_row_indices
1913 
1914  ! ... use the local variables to do the work
1915  ! DEST_2
1916  CALL mp_waitall(info%recv_request(:))
1917  ALLOCATE (recv_count(0:send_size - 1))
1918  ! Loop over the rmat, filling it in with data from the recv buffers
1919  ! (here the block sizes, num_pes refer to the distribution of the source matrix)
1920  recv_count(:) = 0
1921  DO j = 1, nlocal_recv(2)
1922  src_q_j = mod(((recv_col_indices(j) - 1)/nblock_src(2)), src_num_pe(2))
1923  DO i = 1, nlocal_recv(1)
1924  src_p_i = mod(((recv_row_indices(i) - 1)/nblock_src(1)), src_num_pe(1))
1925  mpi_rank = info%src_blacs2mpi(src_p_i, src_q_j)
1926  recv_count(mpi_rank) = recv_count(mpi_rank) + 1
1927  destination%local_data(i, j) = info%recv_buf(info%recv_disp(mpi_rank) + recv_count(mpi_rank))
1928  END DO
1929  END DO
1930  DEALLOCATE (recv_count, info%recv_disp, info%recv_request, info%recv_buf, info%src_blacs2mpi)
1931  ! Invalidate the stored state
1932  NULLIFY (info%recv_col_indices, &
1933  info%recv_row_indices)
1934 
1935  END IF
1936 
1937  CALL timestop(handle)
1938 
1939  END SUBROUTINE cp_fm_finish_copy_general
1940 
1941 ! **************************************************************************************************
1942 !> \brief Completes the copy operation: wait for comms clean up MPI state
1943 !> \param info all of the data that will be needed to complete the copy operation
1944 ! **************************************************************************************************
1946  TYPE(copy_info_type), INTENT(INOUT) :: info
1947 
1948  CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_cleanup_copy_general'
1949 
1950  INTEGER :: handle
1951 
1952  CALL timeset(routinen, handle)
1953 
1954  IF (.NOT. cp2k_is_parallel) THEN
1955  ! Don't do anything - no MPI state for the serial case
1956  ELSE
1957  ! SRC_2
1958  ! If this process is also in the destination decomposition, this deallocate
1959  ! Was already done in cp_fm_finish_copy_general
1960  IF (ALLOCATED(info%src_blacs2mpi)) THEN
1961  DEALLOCATE (info%src_blacs2mpi)
1962  END IF
1963  CALL mp_waitall(info%send_request)
1964  DEALLOCATE (info%send_request, info%send_buf)
1965 
1966  END IF
1967 
1968  CALL timestop(handle)
1969 
1970  END SUBROUTINE cp_fm_cleanup_copy_general
1971 
1972 ! **************************************************************************************************
1973 !> \brief General copy of a submatrix of fm matrix to a submatrix of another fm matrix.
1974 !> The two matrices can have different contexts.
1975 !>
1976 !> Summary of distribution routines for dense matrices
1977 !> The following will copy A(iA:iA+M-1,jA:jA+N-1) to B(iB:iB+M-1,jB:jB+N-1):
1978 !>
1979 !> call pdgemr2d(M,N,Aloc,iA,jA,descA,Bloc,iB,jB,descB,context)
1980 !>
1981 !> A process that is not a part of the context of A should set descA(2)
1982 !> to -1, and similarly for B.
1983 !>
1984 !> \param source input fm matrix
1985 !> \param destination output fm matrix
1986 !> \param nrows number of rows of sub matrix to be copied
1987 !> \param ncols number of cols of sub matrix to be copied
1988 !> \param s_firstrow starting global row index of sub matrix in source
1989 !> \param s_firstcol starting global col index of sub matrix in source
1990 !> \param d_firstrow starting global row index of sub matrix in destination
1991 !> \param d_firstcol starting global col index of sub matrix in destination
1992 !> \param global_context process grid that covers all parts of either A or B.
1993 ! **************************************************************************************************
1994  SUBROUTINE cp_fm_to_fm_submat_general(source, &
1995  destination, &
1996  nrows, &
1997  ncols, &
1998  s_firstrow, &
1999  s_firstcol, &
2000  d_firstrow, &
2001  d_firstcol, &
2002  global_context)
2003 
2004  TYPE(cp_fm_type), INTENT(IN) :: source, destination
2005  INTEGER, INTENT(IN) :: nrows, ncols, s_firstrow, s_firstcol, &
2006  d_firstrow, d_firstcol
2007 
2008  CLASS(cp_blacs_type), INTENT(IN) :: global_context
2009 
2010  CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_to_fm_submat_general'
2011 
2012  LOGICAL :: debug
2013  INTEGER :: handle
2014 #if defined(__SCALAPACK)
2015  INTEGER, DIMENSION(9) :: desca, descb
2016  REAL(kind=dp), DIMENSION(1, 1), TARGET :: dummy
2017  REAL(kind=dp), DIMENSION(:, :), POINTER :: smat, dmat
2018 #endif
2019 
2020  CALL timeset(routinen, handle)
2021 
2022  debug = debug_this_module
2023 
2024  IF (.NOT. cp2k_is_parallel) THEN
2025  CALL cp_fm_to_fm_submat(source, &
2026  destination, &
2027  nrows, &
2028  ncols, &
2029  s_firstrow, &
2030  s_firstcol, &
2031  d_firstrow, &
2032  d_firstcol)
2033  ELSE
2034 #ifdef __SCALAPACK
2035  NULLIFY (smat, dmat)
2036  ! check whether source is available on this process
2037  IF (ASSOCIATED(source%matrix_struct)) THEN
2038  desca = source%matrix_struct%descriptor
2039  IF (source%use_sp) cpabort("only DP kind implemented")
2040  IF (nrows .GT. source%matrix_struct%nrow_global) &
2041  cpabort("nrows is greater than nrow_global of source")
2042  IF (ncols .GT. source%matrix_struct%ncol_global) &
2043  cpabort("ncols is greater than ncol_global of source")
2044  smat => source%local_data
2045  ELSE
2046  desca = -1
2047  smat => dummy
2048  END IF
2049  ! check destination is available on this process
2050  IF (ASSOCIATED(destination%matrix_struct)) THEN
2051  descb = destination%matrix_struct%descriptor
2052  IF (destination%use_sp) cpabort("only DP kind implemented")
2053  IF (nrows .GT. destination%matrix_struct%nrow_global) &
2054  cpabort("nrows is greater than nrow_global of destination")
2055  IF (ncols .GT. destination%matrix_struct%ncol_global) &
2056  cpabort("ncols is greater than ncol_global of destination")
2057  dmat => destination%local_data
2058  ELSE
2059  descb = -1
2060  dmat => dummy
2061  END IF
2062  ! do copy
2063 
2064  CALL pdgemr2d(nrows, &
2065  ncols, &
2066  smat, &
2067  s_firstrow, &
2068  s_firstcol, &
2069  desca, &
2070  dmat, &
2071  d_firstrow, &
2072  d_firstcol, &
2073  descb, &
2074  global_context%get_handle())
2075 #else
2076  mark_used(global_context)
2077  cpabort("this subroutine only supports SCALAPACK")
2078 #endif
2079  END IF
2080 
2081  CALL timestop(handle)
2082 
2083  END SUBROUTINE cp_fm_to_fm_submat_general
2084 
2085 ! **************************************************************************************************
2086 !> \brief ...
2087 !> \param matrix ...
2088 !> \param irow_global ...
2089 !> \param icol_global ...
2090 !> \param alpha ...
2091 ! **************************************************************************************************
2092  SUBROUTINE cp_fm_add_to_element(matrix, irow_global, icol_global, alpha)
2093 
2094  ! Add alpha to the matrix element specified by the global indices
2095  ! irow_global and icol_global
2096 
2097  ! - Creation (05.05.06,MK)
2098 
2099  TYPE(cp_fm_type), INTENT(IN) :: matrix
2100  INTEGER, INTENT(IN) :: irow_global, icol_global
2101  REAL(kind=dp), INTENT(IN) :: alpha
2102 
2103  INTEGER :: mypcol, myprow, npcol, nprow
2104  REAL(kind=dp), DIMENSION(:, :), POINTER :: a
2105  TYPE(cp_blacs_env_type), POINTER :: context
2106 #if defined(__SCALAPACK)
2107  INTEGER :: icol_local, ipcol, iprow, &
2108  irow_local
2109  INTEGER, DIMENSION(9) :: desca
2110 #endif
2111 
2112  context => matrix%matrix_struct%context
2113 
2114  myprow = context%mepos(1)
2115  mypcol = context%mepos(2)
2116 
2117  nprow = context%num_pe(1)
2118  npcol = context%num_pe(2)
2119 
2120  a => matrix%local_data
2121 
2122 #if defined(__SCALAPACK)
2123 
2124  desca(:) = matrix%matrix_struct%descriptor(:)
2125 
2126  CALL infog2l(irow_global, icol_global, desca, nprow, npcol, myprow, mypcol, &
2127  irow_local, icol_local, iprow, ipcol)
2128 
2129  IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
2130  a(irow_local, icol_local) = a(irow_local, icol_local) + alpha
2131  END IF
2132 
2133 #else
2134 
2135  a(irow_global, icol_global) = a(irow_global, icol_global) + alpha
2136 
2137 #endif
2138 
2139  END SUBROUTINE cp_fm_add_to_element
2140 
2141 ! **************************************************************************************************
2142 !> \brief ...
2143 !> \param fm ...
2144 !> \param unit ...
2145 ! **************************************************************************************************
2146  SUBROUTINE cp_fm_write_unformatted(fm, unit)
2147  TYPE(cp_fm_type), INTENT(IN) :: fm
2148  INTEGER, INTENT(IN) :: unit
2149 
2150  CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_write_unformatted'
2151 
2152  INTEGER :: handle, j, max_block, &
2153  ncol_global, nrow_global
2154  TYPE(mp_para_env_type), POINTER :: para_env
2155 #if defined(__SCALAPACK)
2156  INTEGER :: i, i_block, icol_local, &
2157  in, info, ipcol, &
2158  iprow, irow_local, &
2159  mepos, &
2160  num_pe, rb, tag
2161  INTEGER, DIMENSION(9) :: desc
2162  REAL(kind=dp), DIMENSION(:), POINTER :: vecbuf
2163  REAL(kind=dp), DIMENSION(:, :), POINTER :: newdat
2164  TYPE(cp_blacs_type) :: ictxt_loc
2165  INTEGER, EXTERNAL :: numroc
2166 #endif
2167 
2168  CALL timeset(routinen, handle)
2169  CALL cp_fm_get_info(fm, nrow_global=nrow_global, ncol_global=ncol_global, ncol_block=max_block, &
2170  para_env=para_env)
2171 
2172 #if defined(__SCALAPACK)
2173  num_pe = para_env%num_pe
2174  mepos = para_env%mepos
2175  rb = nrow_global
2176  tag = 0
2177  ! get a new context
2178  CALL ictxt_loc%gridinit(para_env, 'R', 1, num_pe)
2179  CALL descinit(desc, nrow_global, ncol_global, rb, max_block, 0, 0, ictxt_loc%get_handle(), nrow_global, info)
2180  cpassert(info == 0)
2181  associate(nprow => ictxt_loc%num_pe(1), npcol => ictxt_loc%num_pe(2), &
2182  myprow => ictxt_loc%mepos(1), mypcol => ictxt_loc%mepos(2))
2183  in = numroc(ncol_global, max_block, mypcol, 0, npcol)
2184 
2185  ALLOCATE (newdat(nrow_global, max(1, in)))
2186 
2187  ! do the actual scalapack to cols reordering
2188  CALL pdgemr2d(nrow_global, ncol_global, fm%local_data, 1, 1, &
2189  fm%matrix_struct%descriptor, &
2190  newdat, 1, 1, desc, ictxt_loc%get_handle())
2191 
2192  ALLOCATE (vecbuf(nrow_global*max_block))
2193  vecbuf = huge(1.0_dp) ! init for valgrind
2194 
2195  DO i = 1, ncol_global, max(max_block, 1)
2196  i_block = min(max_block, ncol_global - i + 1)
2197  CALL infog2l(1, i, desc, nprow, npcol, myprow, mypcol, &
2198  irow_local, icol_local, iprow, ipcol)
2199  IF (ipcol == mypcol) THEN
2200  DO j = 1, i_block
2201  vecbuf((j - 1)*nrow_global + 1:nrow_global*j) = newdat(:, icol_local + j - 1)
2202  END DO
2203  END IF
2204 
2205  IF (ipcol == 0) THEN
2206  ! do nothing
2207  ELSE
2208  IF (ipcol == mypcol) THEN
2209  CALL para_env%send(vecbuf(:), 0, tag)
2210  END IF
2211  IF (mypcol == 0) THEN
2212  CALL para_env%recv(vecbuf(:), ipcol, tag)
2213  END IF
2214  END IF
2215 
2216  IF (unit > 0) THEN
2217  DO j = 1, i_block
2218  WRITE (unit) vecbuf((j - 1)*nrow_global + 1:nrow_global*j)
2219  END DO
2220  END IF
2221 
2222  END DO
2223  END associate
2224  DEALLOCATE (vecbuf)
2225 
2226  CALL ictxt_loc%gridexit()
2227 
2228  DEALLOCATE (newdat)
2229 
2230 #else
2231 
2232  IF (unit > 0) THEN
2233  DO j = 1, ncol_global
2234  WRITE (unit) fm%local_data(:, j)
2235  END DO
2236  END IF
2237 
2238 #endif
2239  CALL timestop(handle)
2240 
2241  END SUBROUTINE cp_fm_write_unformatted
2242 
2243 ! **************************************************************************************************
2244 !> \brief Write out a full matrix in plain text.
2245 !> \param fm the matrix to be outputted
2246 !> \param unit the unit number for I/O
2247 !> \param header optional header
2248 !> \param value_format ...
2249 ! **************************************************************************************************
2250  SUBROUTINE cp_fm_write_formatted(fm, unit, header, value_format)
2251  TYPE(cp_fm_type), INTENT(IN) :: fm
2252  INTEGER, INTENT(IN) :: unit
2253  CHARACTER(LEN=*), INTENT(IN), OPTIONAL :: header, value_format
2254 
2255  CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_write_formatted'
2256 
2257  CHARACTER(LEN=21) :: my_value_format
2258  INTEGER :: handle, i, j, max_block, &
2259  ncol_global, nrow_global, &
2260  nrow_block
2261  TYPE(mp_para_env_type), POINTER :: para_env
2262 #if defined(__SCALAPACK)
2263  INTEGER :: i_block, icol_local, &
2264  in, info, ipcol, &
2265  iprow, irow_local, &
2266  mepos, num_pe, rb, tag, k, &
2267  icol, irow
2268  INTEGER, DIMENSION(9) :: desc
2269  REAL(kind=dp), DIMENSION(:), POINTER :: vecbuf
2270  REAL(kind=dp), DIMENSION(:, :), POINTER :: newdat
2271  TYPE(cp_blacs_type) :: ictxt_loc
2272  INTEGER, EXTERNAL :: numroc
2273 #endif
2274 
2275  CALL timeset(routinen, handle)
2276  CALL cp_fm_get_info(fm, nrow_global=nrow_global, ncol_global=ncol_global, ncol_block=max_block, &
2277  nrow_block=nrow_block, para_env=para_env)
2278 
2279  IF (PRESENT(value_format)) THEN
2280  cpassert(len_trim(adjustl(value_format)) < 11)
2281  my_value_format = "(I10, I10, "//trim(adjustl(value_format))//")"
2282  ELSE
2283  my_value_format = "(I10, I10, ES24.12)"
2284  END IF
2285 
2286  IF (unit > 0) THEN
2287  IF (PRESENT(header)) WRITE (unit, *) header
2288  WRITE (unit, "(A2, A8, A10, A24)") "#", "Row", "Column", adjustl("Value")
2289  END IF
2290 
2291 #if defined(__SCALAPACK)
2292  num_pe = para_env%num_pe
2293  mepos = para_env%mepos
2294  rb = nrow_global
2295  tag = 0
2296  ! get a new context
2297  CALL ictxt_loc%gridinit(para_env, 'R', 1, num_pe)
2298  CALL descinit(desc, nrow_global, ncol_global, rb, max_block, 0, 0, ictxt_loc%get_handle(), nrow_global, info)
2299  cpassert(info == 0)
2300  associate(nprow => ictxt_loc%num_pe(1), npcol => ictxt_loc%num_pe(2), &
2301  myprow => ictxt_loc%mepos(1), mypcol => ictxt_loc%mepos(2))
2302  in = numroc(ncol_global, max_block, mypcol, 0, npcol)
2303 
2304  ALLOCATE (newdat(nrow_global, max(1, in)))
2305 
2306  ! do the actual scalapack to cols reordering
2307  CALL pdgemr2d(nrow_global, ncol_global, fm%local_data, 1, 1, &
2308  fm%matrix_struct%descriptor, &
2309  newdat, 1, 1, desc, ictxt_loc%get_handle())
2310 
2311  ALLOCATE (vecbuf(nrow_global*max_block))
2312  vecbuf = huge(1.0_dp) ! init for valgrind
2313  irow = 1
2314  icol = 1
2315 
2316  DO i = 1, ncol_global, max(max_block, 1)
2317  i_block = min(max_block, ncol_global - i + 1)
2318  CALL infog2l(1, i, desc, nprow, npcol, myprow, mypcol, &
2319  irow_local, icol_local, iprow, ipcol)
2320  IF (ipcol == mypcol) THEN
2321  DO j = 1, i_block
2322  vecbuf((j - 1)*nrow_global + 1:nrow_global*j) = newdat(:, icol_local + j - 1)
2323  END DO
2324  END IF
2325 
2326  IF (ipcol == 0) THEN
2327  ! do nothing
2328  ELSE
2329  IF (ipcol == mypcol) THEN
2330  CALL para_env%send(vecbuf(:), 0, tag)
2331  END IF
2332  IF (mypcol == 0) THEN
2333  CALL para_env%recv(vecbuf(:), ipcol, tag)
2334  END IF
2335  END IF
2336 
2337  IF (unit > 0) THEN
2338  DO j = 1, i_block
2339  DO k = (j - 1)*nrow_global + 1, nrow_global*j
2340  WRITE (unit=unit, fmt=my_value_format) irow, icol, vecbuf(k)
2341  irow = irow + 1
2342  IF (irow > nrow_global) THEN
2343  irow = 1
2344  icol = icol + 1
2345  END IF
2346  END DO
2347  END DO
2348  END IF
2349 
2350  END DO
2351  END associate
2352  DEALLOCATE (vecbuf)
2353 
2354  CALL ictxt_loc%gridexit()
2355 
2356  DEALLOCATE (newdat)
2357 
2358 #else
2359 
2360  IF (unit > 0) THEN
2361  DO j = 1, ncol_global
2362  DO i = 1, nrow_global
2363  WRITE (unit=unit, fmt=my_value_format) i, j, fm%local_data(i, j)
2364  END DO
2365  END DO
2366  END IF
2367 
2368 #endif
2369  CALL timestop(handle)
2370 
2371  END SUBROUTINE cp_fm_write_formatted
2372 
2373 ! **************************************************************************************************
2374 !> \brief ...
2375 !> \param fm ...
2376 !> \param unit ...
2377 ! **************************************************************************************************
2378  SUBROUTINE cp_fm_read_unformatted(fm, unit)
2379  TYPE(cp_fm_type), INTENT(INOUT) :: fm
2380  INTEGER, INTENT(IN) :: unit
2381 
2382  CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_read_unformatted'
2383 
2384  INTEGER :: handle, j, max_block, &
2385  ncol_global, nrow_global
2386  TYPE(mp_para_env_type), POINTER :: para_env
2387 #if defined(__SCALAPACK)
2388  INTEGER :: k, n_cols
2389  REAL(kind=dp), DIMENSION(:, :), POINTER :: vecbuf
2390 #endif
2391 
2392  CALL timeset(routinen, handle)
2393 
2394  CALL cp_fm_get_info(fm, nrow_global=nrow_global, ncol_global=ncol_global, ncol_block=max_block, &
2395  para_env=para_env)
2396 
2397 #if defined(__SCALAPACK)
2398 
2399  ! the parallel case could be made more efficient (see cp_fm_write_unformatted)
2400 
2401  ALLOCATE (vecbuf(nrow_global, max_block))
2402 
2403  DO j = 1, ncol_global, max_block
2404 
2405  n_cols = min(max_block, ncol_global - j + 1)
2406  IF (para_env%mepos == 0) THEN
2407  DO k = 1, n_cols
2408  READ (unit) vecbuf(:, k)
2409  END DO
2410  END IF
2411  CALL para_env%bcast(vecbuf, 0)
2412  CALL cp_fm_set_submatrix(fm, vecbuf, start_row=1, start_col=j, n_cols=n_cols)
2413 
2414  END DO
2415 
2416  DEALLOCATE (vecbuf)
2417 
2418 #else
2419 
2420  DO j = 1, ncol_global
2421  READ (unit) fm%local_data(:, j)
2422  END DO
2423 
2424 #endif
2425 
2426  CALL timestop(handle)
2427 
2428  END SUBROUTINE cp_fm_read_unformatted
2429 
2430 ! **************************************************************************************************
2431 !> \brief wrapper to scalapack function INDXG2P that computes the process
2432 !> coordinate which possesses the entry of a distributed matrix specified
2433 !> by a global index INDXGLOB.
2434 !>
2435 !> Arguments
2436 !> =========
2437 !>
2438 !> INDXGLOB (global input) INTEGER
2439 !> The global index of the element.
2440 !>
2441 !> NB (global input) INTEGER
2442 !> Block size, size of the blocks the distributed matrix is
2443 !> split into.
2444 !>
2445 !> IPROC (local dummy) INTEGER
2446 !> Dummy argument in this case in order to unify the calling
2447 !> sequence of the tool-routines.
2448 !>
2449 !> ISRCPROC (global input) INTEGER
2450 !> The coordinate of the process that possesses the first
2451 !> row/column of the distributed matrix.
2452 !>
2453 !> NPROCS (global input) INTEGER
2454 !> The total number processes over which the matrix is
2455 !> distributed.
2456 !>
2457 !> \param INDXGLOB ...
2458 !> \param NB ...
2459 !> \param IPROC ...
2460 !> \param ISRCPROC ...
2461 !> \param NPROCS ...
2462 !> \return ...
2463 !> \author Mauro Del Ben [MDB] - 12.2012
2464 ! **************************************************************************************************
2465  FUNCTION cp_fm_indxg2p(INDXGLOB, NB, IPROC, ISRCPROC, NPROCS) RESULT(G2P)
2466  INTEGER, INTENT(IN) :: indxglob, nb, iproc, isrcproc, nprocs
2467  INTEGER :: g2p
2468 
2469 #if defined(__SCALAPACK)
2470  INTEGER, EXTERNAL :: indxg2p
2471 #endif
2472 
2473 #if defined(__SCALAPACK)
2474 
2475  g2p = indxg2p(indxglob, nb, iproc, isrcproc, nprocs)
2476 
2477 #else
2478  mark_used(indxglob)
2479  mark_used(iproc)
2480  mark_used(isrcproc)
2481  mark_used(nb)
2482  mark_used(nprocs)
2483 
2484  g2p = 0
2485 
2486 #endif
2487 
2488  END FUNCTION cp_fm_indxg2p
2489 
2490 ! **************************************************************************************************
2491 !> \brief wrapper to scalapack function INDXG2L that computes the local index
2492 !> of a distributed matrix entry pointed to by the global index INDXGLOB.
2493 !>
2494 !> Arguments
2495 !> =========
2496 !>
2497 !> INDXGLOB (global input) INTEGER
2498 !> The global index of the distributed matrix entry.
2499 !>
2500 !> NB (global input) INTEGER
2501 !> Block size, size of the blocks the distributed matrix is
2502 !> split into.
2503 !>
2504 !> IPROC (local dummy) INTEGER
2505 !> Dummy argument in this case in order to unify the calling
2506 !> sequence of the tool-routines.
2507 !>
2508 !> ISRCPROC (local dummy) INTEGER
2509 !> Dummy argument in this case in order to unify the calling
2510 !> sequence of the tool-routines.
2511 !>
2512 !> NPROCS (global input) INTEGER
2513 !> The total number processes over which the distributed
2514 !> matrix is distributed.
2515 !>
2516 !> \param INDXGLOB ...
2517 !> \param NB ...
2518 !> \param IPROC ...
2519 !> \param ISRCPROC ...
2520 !> \param NPROCS ...
2521 !> \return ...
2522 !> \author Mauro Del Ben [MDB] - 12.2012
2523 ! **************************************************************************************************
2524  FUNCTION cp_fm_indxg2l(INDXGLOB, NB, IPROC, ISRCPROC, NPROCS) RESULT(G2L)
2525  INTEGER, INTENT(IN) :: indxglob, nb, iproc, isrcproc, nprocs
2526  INTEGER :: g2l
2527 
2528 #if defined(__SCALAPACK)
2529  INTEGER, EXTERNAL :: indxg2l
2530 #endif
2531 
2532 #if defined(__SCALAPACK)
2533 
2534  g2l = indxg2l(indxglob, nb, iproc, isrcproc, nprocs)
2535 
2536 #else
2537  mark_used(iproc)
2538  mark_used(isrcproc)
2539  mark_used(nb)
2540  mark_used(nprocs)
2541  mark_used(indxglob)
2542 
2543  g2l = indxglob
2544 
2545 #endif
2546 
2547  END FUNCTION cp_fm_indxg2l
2548 
2549 ! **************************************************************************************************
2550 !> \brief wrapper to scalapack function INDXL2G that computes the global index
2551 !> of a distributed matrix entry pointed to by the local index INDXLOC
2552 !> of the process indicated by IPROC.
2553 !>
2554 !> Arguments
2555 !> =========
2556 !>
2557 !> INDXLOC (global input) INTEGER
2558 !> The local index of the distributed matrix entry.
2559 !>
2560 !> NB (global input) INTEGER
2561 !> Block size, size of the blocks the distributed matrix is
2562 !> split into.
2563 !>
2564 !> IPROC (local input) INTEGER
2565 !> The coordinate of the process whose local array row or
2566 !> column is to be determined.
2567 !>
2568 !> ISRCPROC (global input) INTEGER
2569 !> The coordinate of the process that possesses the first
2570 !> row/column of the distributed matrix.
2571 !>
2572 !> NPROCS (global input) INTEGER
2573 !> The total number processes over which the distributed
2574 !> matrix is distributed.
2575 !>
2576 !> \param INDXLOC ...
2577 !> \param NB ...
2578 !> \param IPROC ...
2579 !> \param ISRCPROC ...
2580 !> \param NPROCS ...
2581 !> \return ...
2582 !> \author Mauro Del Ben [MDB] - 12.2012
2583 ! **************************************************************************************************
2584  FUNCTION cp_fm_indxl2g(INDXLOC, NB, IPROC, ISRCPROC, NPROCS) RESULT(L2G)
2585  INTEGER, INTENT(IN) :: indxloc, nb, iproc, isrcproc, nprocs
2586  INTEGER :: l2g
2587 
2588 #if defined(__SCALAPACK)
2589  INTEGER, EXTERNAL :: indxl2g
2590 #endif
2591 
2592 #if defined(__SCALAPACK)
2593 
2594  l2g = indxl2g(indxloc, nb, iproc, isrcproc, nprocs)
2595 
2596 #else
2597  mark_used(iproc)
2598  mark_used(isrcproc)
2599  mark_used(nb)
2600  mark_used(nprocs)
2601 
2602  l2g = indxloc
2603 
2604 #endif
2605 
2606  END FUNCTION cp_fm_indxl2g
2607 
2608 ! **************************************************************************************************
2609 !> \brief ...
2610 !> \param mult_type ...
2611 ! **************************************************************************************************
2612  SUBROUTINE cp_fm_setup(mult_type)
2613  INTEGER, INTENT(IN) :: mult_type
2614 
2615  mm_type = mult_type
2616  END SUBROUTINE cp_fm_setup
2617 
2618 ! **************************************************************************************************
2619 !> \brief ...
2620 !> \return ...
2621 ! **************************************************************************************************
2622  FUNCTION cp_fm_get_mm_type() RESULT(res)
2623  INTEGER :: res
2624 
2625  res = mm_type
2626  END FUNCTION
2627 
2628 ! **************************************************************************************************
2629 !> \brief ...
2630 !> \param ictxt ...
2631 !> \param prec ...
2632 !> \return ...
2633 ! **************************************************************************************************
2634  FUNCTION cp_fm_pilaenv(ictxt, prec) RESULT(res)
2635  INTEGER :: ictxt
2636  CHARACTER(LEN=1) :: prec
2637  INTEGER :: res
2638 #if defined(__SCALAPACK)
2639  INTEGER :: pilaenv
2640  res = pilaenv(ictxt, prec)
2641 #else
2642  mark_used(ictxt)
2643  mark_used(prec)
2644  res = -1
2645 #endif
2646 
2647  END FUNCTION
2648 
2649 END MODULE cp_fm_types
methods related to the blacs parallel environment
Definition: cp_blacs_env.F:15
wrappers for the actual blacs calls. all functionality needed in the code should actually be provide ...
represent the structure of a full matrix
Definition: cp_fm_struct.F:14
subroutine, public cp_fm_struct_get(fmstruct, para_env, context, descriptor, ncol_block, nrow_block, nrow_global, ncol_global, first_p_pos, row_indices, col_indices, nrow_local, ncol_local, nrow_locals, ncol_locals, local_leading_dimension)
returns the values of various attributes of the matrix structure
Definition: cp_fm_struct.F:409
logical function, public cp_fm_struct_equivalent(fmstruct1, fmstruct2)
returns true if the two matrix structures are equivalent, false otherwise.
Definition: cp_fm_struct.F:357
subroutine, public cp_fm_struct_retain(fmstruct)
retains a full matrix structure
Definition: cp_fm_struct.F:306
subroutine, public cp_fm_struct_write_info(fmstruct, io_unit)
Write nicely formatted info about the FM struct to the given I/O unit.
Definition: cp_fm_struct.F:491
subroutine, public cp_fm_struct_release(fmstruct)
releases a full matrix structure
Definition: cp_fm_struct.F:320
represent a full matrix distributed on many processors
Definition: cp_fm_types.F:15
subroutine, public cp_fm_copy_general(source, destination, para_env)
General copy of a fm matrix to another fm matrix. Uses non-blocking MPI rather than ScaLAPACK.
Definition: cp_fm_types.F:1538
integer function, public cp_fm_indxl2g(INDXLOC, NB, IPROC, ISRCPROC, NPROCS)
wrapper to scalapack function INDXL2G that computes the global index of a distributed matrix entry po...
Definition: cp_fm_types.F:2585
subroutine, public cp_fm_get_diag(matrix, diag)
returns the diagonal elements of a fm
Definition: cp_fm_types.F:570
subroutine, public cp_fm_start_copy_general(source, destination, para_env, info)
Initiates the copy operation: get distribution data, post MPI isend and irecvs.
Definition: cp_fm_types.F:1568
integer function, public cp_fm_indxg2l(INDXGLOB, NB, IPROC, ISRCPROC, NPROCS)
wrapper to scalapack function INDXG2L that computes the local index of a distributed matrix entry poi...
Definition: cp_fm_types.F:2525
subroutine, public cp_fm_cleanup_copy_general(info)
Completes the copy operation: wait for comms clean up MPI state.
Definition: cp_fm_types.F:1946
subroutine, public cp_fm_get_info(matrix, name, nrow_global, ncol_global, nrow_block, ncol_block, nrow_local, ncol_local, row_indices, col_indices, local_data, context, nrow_locals, ncol_locals, matrix_struct, para_env)
returns all kind of information about the full matrix
Definition: cp_fm_types.F:1016
integer function, public cp_fm_get_mm_type()
...
Definition: cp_fm_types.F:2623
subroutine, public cp_fm_vectorssum(matrix, sum_array, dir)
summing up all the elements along the matrix's i-th index or
Definition: cp_fm_types.F:1252
integer function, public cp_fm_indxg2p(INDXGLOB, NB, IPROC, ISRCPROC, NPROCS)
wrapper to scalapack function INDXG2P that computes the process coordinate which possesses the entry ...
Definition: cp_fm_types.F:2466
subroutine, public cp_fm_write_unformatted(fm, unit)
...
Definition: cp_fm_types.F:2147
subroutine, public cp_fm_get_element(matrix, irow_global, icol_global, alpha, local)
returns an element of a fm this value is valid on every cpu using this call is expensive
Definition: cp_fm_types.F:643
subroutine, public cp_fm_add_to_element(matrix, irow_global, icol_global, alpha)
...
Definition: cp_fm_types.F:2093
subroutine, public cp_fm_set_submatrix(fm, new_values, start_row, start_col, n_rows, n_cols, alpha, beta, transpose)
sets a submatrix of a full matrix fm(start_row:start_row+n_rows,start_col:start_col+n_cols) = alpha*o...
Definition: cp_fm_types.F:768
subroutine, public cp_fm_read_unformatted(fm, unit)
...
Definition: cp_fm_types.F:2379
subroutine, public cp_fm_vectorsnorm(matrix, norm_array)
find the inorm of each column norm_{j}= sqrt( \sum_{i} A_{ij}*A_{ij} )
Definition: cp_fm_types.F:1207
subroutine, public cp_fm_setup(mult_type)
...
Definition: cp_fm_types.F:2613
subroutine, public cp_fm_write_info(matrix, io_unit)
Write nicely formatted info about the FM to the given I/O unit (including the underlying FM struct)
Definition: cp_fm_types.F:1048
subroutine, public cp_fm_maxabsrownorm(matrix, a_max)
find the maximum over the rows of the sum of the absolute values of the elements of a given row = || ...
Definition: cp_fm_types.F:1166
subroutine, public cp_fm_to_fm_submat_general(source, destination, nrows, ncols, s_firstrow, s_firstcol, d_firstrow, d_firstcol, global_context)
General copy of a submatrix of fm matrix to a submatrix of another fm matrix. The two matrices can ha...
Definition: cp_fm_types.F:2003
subroutine, public cp_fm_to_fm_submat(msource, mtarget, nrow, ncol, s_firstrow, s_firstcol, t_firstrow, t_firstcol)
copy just a part ot the matrix
Definition: cp_fm_types.F:1473
subroutine, public cp_fm_maxabsval(matrix, a_max, ir_max, ic_max)
find the maximum absolute value of the matrix element maxval(abs(matrix))
Definition: cp_fm_types.F:1064
subroutine, public cp_fm_set_all(matrix, alpha, beta)
set all elements of a matrix to the same value, and optionally the diagonal to a different one
Definition: cp_fm_types.F:535
subroutine, public cp_fm_finish_copy_general(destination, info)
Completes the copy operation: wait for comms, unpack, clean up MPI state.
Definition: cp_fm_types.F:1882
subroutine, public cp_fm_get_submatrix(fm, target_m, start_row, start_col, n_rows, n_cols, transpose)
gets a submatrix of a full matrix op(target_m)(1:n_rows,1:n_cols) =fm(start_row:start_row+n_rows,...
Definition: cp_fm_types.F:901
subroutine, public cp_fm_set_element(matrix, irow_global, icol_global, alpha)
sets an element of a matrix
Definition: cp_fm_types.F:700
subroutine, public cp_fm_init_random(matrix, ncol, start_col)
fills a matrix with random numbers
Definition: cp_fm_types.F:452
subroutine, public cp_fm_to_fm_triangular(msource, mtarget, uplo)
copy just a triangular matrix
Definition: cp_fm_types.F:1428
integer function, public cp_fm_pilaenv(ictxt, prec)
...
Definition: cp_fm_types.F:2635
subroutine, public cp_fm_create(matrix, matrix_struct, name, use_sp)
creates a new full matrix with the given structure
Definition: cp_fm_types.F:167
subroutine, public cp_fm_write_formatted(fm, unit, header, value_format)
Write out a full matrix in plain text.
Definition: cp_fm_types.F:2251
Definition: header.F:13
Defines the basic variable types.
Definition: kinds.F:23
integer, parameter, public dp
Definition: kinds.F:34
integer, parameter, public sp
Definition: kinds.F:33
Interface to the message passing library MPI.
integer, parameter, public mp_proc_null
logical, parameter, public cp2k_is_parallel
integer, parameter, public mp_any_source
type(mp_request_type), parameter, public mp_request_null
Parallel (pseudo)random number generator (RNG) for multiple streams and substreams of random numbers.
integer, parameter, public uniform