(git:374b731)
Loading...
Searching...
No Matches
cp_fm_types.F
Go to the documentation of this file.
1!--------------------------------------------------------------------------------------------------!
2! CP2K: A general program to perform molecular dynamics simulations !
3! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4! !
5! SPDX-License-Identifier: GPL-2.0-or-later !
6!--------------------------------------------------------------------------------------------------!
7
8! **************************************************************************************************
9!> \brief represent a full matrix distributed on many processors
10!> \par History
11!> 3) separated structure object, removed globenv, renamed to full matrix
12!> many changes (fawzi 08.2002)
13!> \author Matthias Krack (22.05.2001)
14! **************************************************************************************************
24 USE kinds, ONLY: dp,&
25 sp
33 USE parallel_rng_types, ONLY: uniform,&
35#include "../base/base_uses.f90"
36
37 IMPLICIT NONE
38
39 PRIVATE
40
41 CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_fm_types'
42 LOGICAL, PARAMETER :: debug_this_module = .true.
43 INTEGER, PARAMETER :: src_tag = 3, dest_tag = 5, send_tag = 7, recv_tag = 11
44
45 INTEGER, PRIVATE :: mm_type = 1
46
47 PUBLIC :: cp_fm_type, &
49
50 PUBLIC :: cp_fm_add_to_element, &
56 cp_fm_get_diag, & ! get diagonal
57 cp_fm_set_all, & ! set all elements and diagonal
58 cp_fm_set_submatrix, & ! set a submatrix to given values
59 cp_fm_get_submatrix, & ! get a submatrix of given values
61 cp_fm_maxabsval, & ! find the maximum absolute value
62 cp_fm_maxabsrownorm, & ! find the maximum of the sum of the abs of the elements of a row
63 cp_fm_to_fm, & ! copy (parts of) a fm to a fm
64 cp_fm_vectorsnorm, & ! compute the norm of the column-vectors
65 cp_fm_vectorssum, & ! compute the sum of all elements of the column-vectors
66 cp_fm_to_fm_submat, & ! copy (parts of) a fm to a fm
72 cp_fm_write_unformatted, & ! writes a full matrix to an open unit
73 cp_fm_write_formatted, & ! writes a full matrix to an open unit
74 cp_fm_read_unformatted, & ! reads a full matrix from an open unit
75 cp_fm_setup, & ! allows to set flags for fms
78 cp_fm_to_fm_submat_general ! copy matrix across different contexts
79
80 PUBLIC :: cp_fm_indxg2p, &
84
85 INTERFACE cp_fm_to_fm
86 MODULE PROCEDURE cp_fm_to_fm_matrix, & ! a full matrix
87 cp_fm_to_fm_columns ! just a number of columns
88 END INTERFACE
89
90 INTERFACE cp_fm_release
91 MODULE PROCEDURE cp_fm_release_aa0, &
92 cp_fm_release_aa1, &
93 cp_fm_release_aa2, &
94 cp_fm_release_aa3, &
95 cp_fm_release_ap1, &
96 cp_fm_release_ap2, &
97 cp_fm_release_pa1, &
98 cp_fm_release_pa2, &
99 cp_fm_release_pa3, &
100 cp_fm_release_pp1, &
101 cp_fm_release_pp2
102 END INTERFACE
103
104! **************************************************************************************************
105!> \brief represent a full matrix
106!> \param name the name of the matrix, used for printing
107!> \param matrix_struct structure of this matrix
108!> \param local_data array with the data of the matrix (its contents
109!> depend on the matrix type used: in parallel runs it will be
110!> in scalapack format, in sequential, it will simply contain
111!> the matrix)
112!> \par History
113!> 08.2002 created [fawzi]
114!> \author fawzi
115! **************************************************************************************************
117! PRIVATE
118 CHARACTER(LEN=60) :: name = ""
119 LOGICAL :: use_sp = .false.
120 TYPE(cp_fm_struct_type), POINTER :: matrix_struct => null()
121 REAL(kind=dp), DIMENSION(:, :), POINTER, CONTIGUOUS :: local_data => null()
122 REAL(kind=sp), DIMENSION(:, :), POINTER, CONTIGUOUS :: local_data_sp => null()
123 END TYPE cp_fm_type
124
125! **************************************************************************************************
126!> \brief just to build arrays of pointers to matrices
127!> \param matrix the pointer to the matrix
128!> \par History
129!> 08.2002 created [fawzi]
130!> \author fawzi
131! **************************************************************************************************
133 TYPE(cp_fm_type), POINTER :: matrix => null()
134 END TYPE cp_fm_p_type
135
136! **************************************************************************************************
137!> \brief Stores the state of a copy between cp_fm_start_copy_general
138!> and cp_fm_finish_copy_general
139!> \par History
140!> Jan 2017 [Mark T]
141! **************************************************************************************************
143 INTEGER :: send_size = -1
144 INTEGER, DIMENSION(2) :: nlocal_recv = -1, nblock_src = -1, src_num_pe = -1 ! 1->row 2->col
145 TYPE(mp_request_type), DIMENSION(:), ALLOCATABLE :: send_request, recv_request
146 INTEGER, DIMENSION(:), ALLOCATABLE :: recv_disp
147 INTEGER, DIMENSION(:), POINTER :: recv_col_indices => null(), recv_row_indices => null()
148 INTEGER, DIMENSION(:, :), ALLOCATABLE :: src_blacs2mpi
149 REAL(kind=dp), DIMENSION(:), ALLOCATABLE :: recv_buf, send_buf
150 END TYPE copy_info_type
151
152CONTAINS
153
154! **************************************************************************************************
155!> \brief creates a new full matrix with the given structure
156!> \param matrix the matrix to be created
157!> \param matrix_struct the structure of matrix
158!> \param name ...
159!> \param use_sp ...
160!> \par History
161!> 08.2002 created [fawzi]
162!> \author Fawzi Mohamed
163!> \note
164!> preferred allocation routine
165! **************************************************************************************************
166 SUBROUTINE cp_fm_create(matrix, matrix_struct, name, use_sp)
167 TYPE(cp_fm_type), INTENT(OUT) :: matrix
168 TYPE(cp_fm_struct_type), INTENT(IN), TARGET :: matrix_struct
169 CHARACTER(len=*), INTENT(in), OPTIONAL :: name
170 LOGICAL, INTENT(in), OPTIONAL :: use_sp
171
172 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_create'
173
174 INTEGER :: handle, ncol_local, npcol, nprow, &
175 nrow_local
176 TYPE(cp_blacs_env_type), POINTER :: context
177
178 CALL timeset(routinen, handle)
179
180#if defined(__parallel) && ! defined(__SCALAPACK)
181 cpabort("full matrices need scalapack for parallel runs")
182#endif
183
184 context => matrix_struct%context
185 matrix%matrix_struct => matrix_struct
186 CALL cp_fm_struct_retain(matrix%matrix_struct)
187
188 matrix%use_sp = .false.
189 IF (PRESENT(use_sp)) matrix%use_sp = use_sp
190
191 nprow = context%num_pe(1)
192 npcol = context%num_pe(2)
193 NULLIFY (matrix%local_data)
194 NULLIFY (matrix%local_data_sp)
195
196 ! OK, we allocate here at least a 1 x 1 matrix
197 ! this must (and is) compatible with the descinit call
198 ! in cp_fm_struct
199 nrow_local = matrix_struct%local_leading_dimension
200 ncol_local = max(1, matrix_struct%ncol_locals(context%mepos(2)))
201 IF (matrix%use_sp) THEN
202 ALLOCATE (matrix%local_data_sp(nrow_local, ncol_local))
203 ELSE
204 ALLOCATE (matrix%local_data(nrow_local, ncol_local))
205 END IF
206
207 ! JVDV we should remove this, as it is up to the user to zero afterwards
208 IF (matrix%use_sp) THEN
209 matrix%local_data_sp(1:nrow_local, 1:ncol_local) = 0.0_sp
210 ELSE
211 matrix%local_data(1:nrow_local, 1:ncol_local) = 0.0_dp
212 END IF
213
214 IF (PRESENT(name)) THEN
215 matrix%name = name
216 ELSE
217 matrix%name = 'full matrix'
218 END IF
219
220 CALL timestop(handle)
221
222 END SUBROUTINE cp_fm_create
223
224! **************************************************************************************************
225!> \brief releases a full matrix
226!> \param matrix the matrix to release
227!> \par History
228!> 08.2002 created [fawzi]
229!> \author Fawzi Mohamed
230! **************************************************************************************************
231 SUBROUTINE cp_fm_release_aa0(matrix)
232 TYPE(cp_fm_type), INTENT(INOUT) :: matrix
233
234 IF (ASSOCIATED(matrix%local_data)) THEN
235 DEALLOCATE (matrix%local_data)
236 NULLIFY (matrix%local_data)
237 END IF
238 IF (ASSOCIATED(matrix%local_data_sp)) THEN
239 DEALLOCATE (matrix%local_data_sp)
240 NULLIFY (matrix%local_data_sp)
241 END IF
242 matrix%name = ""
243 CALL cp_fm_struct_release(matrix%matrix_struct)
244
245 END SUBROUTINE cp_fm_release_aa0
246
247! **************************************************************************************************
248!> \brief ...
249!> \param matrices ...
250! **************************************************************************************************
251 SUBROUTINE cp_fm_release_aa1(matrices)
252 TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:) :: matrices
253
254 INTEGER :: i
255
256 IF (ALLOCATED(matrices)) THEN
257 DO i = 1, SIZE(matrices)
258 CALL cp_fm_release(matrices(i))
259 END DO
260 DEALLOCATE (matrices)
261 END IF
262 END SUBROUTINE cp_fm_release_aa1
263
264! **************************************************************************************************
265!> \brief ...
266!> \param matrices ...
267! **************************************************************************************************
268 SUBROUTINE cp_fm_release_aa2(matrices)
269 TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:, :) :: matrices
270
271 INTEGER :: i, j
272
273 IF (ALLOCATED(matrices)) THEN
274 DO i = 1, SIZE(matrices, 1)
275 DO j = 1, SIZE(matrices, 2)
276 CALL cp_fm_release(matrices(i, j))
277 END DO
278 END DO
279 DEALLOCATE (matrices)
280 END IF
281 END SUBROUTINE cp_fm_release_aa2
282
283! **************************************************************************************************
284!> \brief ...
285!> \param matrices ...
286! **************************************************************************************************
287 SUBROUTINE cp_fm_release_aa3(matrices)
288 TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:, :, :) :: matrices
289
290 INTEGER :: i, j, k
291
292 IF (ALLOCATED(matrices)) THEN
293 DO i = 1, SIZE(matrices, 1)
294 DO j = 1, SIZE(matrices, 2)
295 DO k = 1, SIZE(matrices, 3)
296 CALL cp_fm_release(matrices(i, j, k))
297 END DO
298 END DO
299 END DO
300 DEALLOCATE (matrices)
301 END IF
302 END SUBROUTINE cp_fm_release_aa3
303
304! **************************************************************************************************
305!> \brief ...
306!> \param matrices ...
307! **************************************************************************************************
308 SUBROUTINE cp_fm_release_pa1(matrices)
309 TYPE(cp_fm_type), DIMENSION(:), POINTER :: matrices
310
311 INTEGER :: i
312
313 IF (ASSOCIATED(matrices)) THEN
314 DO i = 1, SIZE(matrices)
315 CALL cp_fm_release(matrices(i))
316 END DO
317 DEALLOCATE (matrices)
318 NULLIFY (matrices)
319 END IF
320 END SUBROUTINE cp_fm_release_pa1
321
322! **************************************************************************************************
323!> \brief ...
324!> \param matrices ...
325! **************************************************************************************************
326 SUBROUTINE cp_fm_release_pa2(matrices)
327 TYPE(cp_fm_type), DIMENSION(:, :), POINTER :: matrices
328
329 INTEGER :: i, j
330
331 IF (ASSOCIATED(matrices)) THEN
332 DO i = 1, SIZE(matrices, 1)
333 DO j = 1, SIZE(matrices, 2)
334 CALL cp_fm_release(matrices(i, j))
335 END DO
336 END DO
337 DEALLOCATE (matrices)
338 NULLIFY (matrices)
339 END IF
340 END SUBROUTINE cp_fm_release_pa2
341
342! **************************************************************************************************
343!> \brief ...
344!> \param matrices ...
345! **************************************************************************************************
346 SUBROUTINE cp_fm_release_pa3(matrices)
347 TYPE(cp_fm_type), DIMENSION(:, :, :), POINTER :: matrices
348
349 INTEGER :: i, j, k
350
351 IF (ASSOCIATED(matrices)) THEN
352 DO i = 1, SIZE(matrices, 1)
353 DO j = 1, SIZE(matrices, 2)
354 DO k = 1, SIZE(matrices, 3)
355 CALL cp_fm_release(matrices(i, j, k))
356 END DO
357 END DO
358 END DO
359 DEALLOCATE (matrices)
360 NULLIFY (matrices)
361 END IF
362 END SUBROUTINE cp_fm_release_pa3
363
364! **************************************************************************************************
365!> \brief ...
366!> \param matrices ...
367! **************************************************************************************************
368 SUBROUTINE cp_fm_release_ap1(matrices)
369 TYPE(cp_fm_p_type), ALLOCATABLE, DIMENSION(:) :: matrices
370
371 INTEGER :: i
372
373 IF (ALLOCATED(matrices)) THEN
374 DO i = 1, SIZE(matrices)
375 CALL cp_fm_release(matrices(i)%matrix)
376 DEALLOCATE (matrices(i)%matrix)
377 END DO
378 DEALLOCATE (matrices)
379 END IF
380 END SUBROUTINE cp_fm_release_ap1
381
382! **************************************************************************************************
383!> \brief ...
384!> \param matrices ...
385! **************************************************************************************************
386 SUBROUTINE cp_fm_release_ap2(matrices)
387 TYPE(cp_fm_p_type), ALLOCATABLE, DIMENSION(:, :) :: matrices
388
389 INTEGER :: i, j
390
391 IF (ALLOCATED(matrices)) THEN
392 DO i = 1, SIZE(matrices, 1)
393 DO j = 1, SIZE(matrices, 2)
394 CALL cp_fm_release(matrices(i, j)%matrix)
395 DEALLOCATE (matrices(i, j)%matrix)
396 END DO
397 END DO
398 DEALLOCATE (matrices)
399 END IF
400 END SUBROUTINE cp_fm_release_ap2
401
402! **************************************************************************************************
403!> \brief ...
404!> \param matrices ...
405! **************************************************************************************************
406 SUBROUTINE cp_fm_release_pp1(matrices)
407 TYPE(cp_fm_p_type), DIMENSION(:), POINTER :: matrices
408
409 INTEGER :: i
410
411 IF (ASSOCIATED(matrices)) THEN
412 DO i = 1, SIZE(matrices)
413 CALL cp_fm_release(matrices(i)%matrix)
414 DEALLOCATE (matrices(i)%matrix)
415 END DO
416 DEALLOCATE (matrices)
417 NULLIFY (matrices)
418 END IF
419 END SUBROUTINE cp_fm_release_pp1
420
421! **************************************************************************************************
422!> \brief ...
423!> \param matrices ...
424! **************************************************************************************************
425 SUBROUTINE cp_fm_release_pp2(matrices)
426 TYPE(cp_fm_p_type), DIMENSION(:, :), POINTER :: matrices
427
428 INTEGER :: i, j
429
430 IF (ASSOCIATED(matrices)) THEN
431 DO i = 1, SIZE(matrices, 1)
432 DO j = 1, SIZE(matrices, 2)
433 CALL cp_fm_release(matrices(i, j)%matrix)
434 DEALLOCATE (matrices(i, j)%matrix)
435 END DO
436 END DO
437 DEALLOCATE (matrices)
438 NULLIFY (matrices)
439 END IF
440 END SUBROUTINE cp_fm_release_pp2
441
442! **************************************************************************************************
443!> \brief fills a matrix with random numbers
444!> \param matrix : to be initialized
445!> \param ncol : numbers of cols to fill
446!> \param start_col : starting at coll number
447!> \author Joost VandeVondele
448!> \note
449!> the value of a_ij is independent of the number of cpus
450! **************************************************************************************************
451 SUBROUTINE cp_fm_init_random(matrix, ncol, start_col)
452 TYPE(cp_fm_type), INTENT(IN) :: matrix
453 INTEGER, INTENT(IN), OPTIONAL :: ncol, start_col
454
455 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_init_random'
456
457 INTEGER :: handle, icol_global, icol_local, irow_local, my_ncol, my_start_col, ncol_global, &
458 ncol_local, nrow_global, nrow_local
459 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
460 REAL(kind=dp), ALLOCATABLE, DIMENSION(:) :: buff
461 REAL(kind=dp), CONTIGUOUS, DIMENSION(:, :), &
462 POINTER :: local_data
463 REAL(kind=dp), DIMENSION(3, 2), SAVE :: &
464 seed = reshape((/1.0_dp, 2.0_dp, 3.0_dp, 4.0_dp, 5.0_dp, 6.0_dp/), (/3, 2/))
465 TYPE(rng_stream_type) :: rng
466
467 CALL timeset(routinen, handle)
468
469 ! guarantee same seed over all tasks
470 CALL matrix%matrix_struct%para_env%bcast(seed, 0)
471
472 rng = rng_stream_type("cp_fm_init_random_stream", distribution_type=uniform, &
473 extended_precision=.true., seed=seed)
474
475 cpassert(.NOT. matrix%use_sp)
476
477 CALL cp_fm_get_info(matrix, nrow_global=nrow_global, ncol_global=ncol_global, &
478 nrow_local=nrow_local, ncol_local=ncol_local, &
479 local_data=local_data, &
480 row_indices=row_indices, col_indices=col_indices)
481
482 my_start_col = 1
483 IF (PRESENT(start_col)) my_start_col = start_col
484 my_ncol = matrix%matrix_struct%ncol_global
485 IF (PRESENT(ncol)) my_ncol = ncol
486
487 IF (ncol_global < (my_start_col + my_ncol - 1)) &
488 cpabort("ncol_global>=(my_start_col+my_ncol-1)")
489
490 ALLOCATE (buff(nrow_global))
491
492 ! each global row has its own substream, in order to reach the stream for the local col,
493 ! we just reset to the next substream
494 ! following this, we fill the full buff with random numbers, and pick those we need
495 icol_global = 0
496 DO icol_local = 1, ncol_local
497 cpassert(col_indices(icol_local) > icol_global)
498 DO
499 CALL rng%reset_to_next_substream()
500 icol_global = icol_global + 1
501 IF (icol_global == col_indices(icol_local)) EXIT
502 END DO
503 CALL rng%fill(buff)
504 DO irow_local = 1, nrow_local
505 local_data(irow_local, icol_local) = buff(row_indices(irow_local))
506 END DO
507 END DO
508
509 DEALLOCATE (buff)
510
511 ! store seed before deletion (unclear if this is the proper seed)
512
513 ! Note that, the initial state (rng%ig) instead of the current state (rng%cg) is stored in the
514 ! seed variable. As a consequence, each invocation of cp_fm_init_random uses exactly the same
515 ! stream of random numbers. While this seems odd and contrary to the original design,
516 ! it was probably introduced to improve reproducibility.
517 ! See also https://github.com/cp2k/cp2k/pull/506
518 CALL rng%get(ig=seed)
519
520 CALL timestop(handle)
521
522 END SUBROUTINE cp_fm_init_random
523
524! **************************************************************************************************
525!> \brief set all elements of a matrix to the same value,
526!> and optionally the diagonal to a different one
527!> \param matrix input matrix
528!> \param alpha scalar used to set all elements of the matrix
529!> \param beta scalar used to set diagonal of the matrix
530!> \note
531!> can be used to zero a matrix
532!> can be used to create a unit matrix (I-matrix) alpha=0.0_dp beta=1.0_dp
533! **************************************************************************************************
534 SUBROUTINE cp_fm_set_all(matrix, alpha, beta)
535
536 TYPE(cp_fm_type), INTENT(IN) :: matrix
537 REAL(kind=dp), INTENT(IN) :: alpha
538 REAL(kind=dp), INTENT(IN), OPTIONAL :: beta
539
540 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_set_all'
541
542 INTEGER :: handle, i, n
543
544 CALL timeset(routinen, handle)
545
546 IF (matrix%use_sp) THEN
547 matrix%local_data_sp(:, :) = real(alpha, sp)
548 ELSE
549 matrix%local_data(:, :) = alpha
550 END IF
551
552 IF (PRESENT(beta)) THEN
553 cpassert(.NOT. matrix%use_sp)
554 n = min(matrix%matrix_struct%nrow_global, matrix%matrix_struct%ncol_global)
555 DO i = 1, n
556 CALL cp_fm_set_element(matrix, i, i, beta)
557 END DO
558 END IF
559
560 CALL timestop(handle)
561
562 END SUBROUTINE cp_fm_set_all
563
564! **************************************************************************************************
565!> \brief returns the diagonal elements of a fm
566!> \param matrix ...
567!> \param diag ...
568! **************************************************************************************************
569 SUBROUTINE cp_fm_get_diag(matrix, diag)
570
571 ! arguments
572 TYPE(cp_fm_type), INTENT(IN) :: matrix
573 REAL(kind=dp), DIMENSION(:), INTENT(OUT) :: diag
574
575 ! locals
576 INTEGER :: i, nrow_global
577
578#if defined(__SCALAPACK)
579 INTEGER, DIMENSION(9) :: desca
580 TYPE(cp_blacs_env_type), POINTER :: context
581 INTEGER :: icol_local, ipcol, iprow, irow_local, mypcol, myprow, npcol, &
582 nprow
583 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
584 REAL(kind=sp), DIMENSION(:, :), POINTER :: a_sp
585#endif
586
587 CALL cp_fm_get_info(matrix, nrow_global=nrow_global)
588
589#if defined(__SCALAPACK)
590 diag = 0.0_dp
591 context => matrix%matrix_struct%context
592 myprow = context%mepos(1)
593 mypcol = context%mepos(2)
594 nprow = context%num_pe(1)
595 npcol = context%num_pe(2)
596
597 a => matrix%local_data
598 a_sp => matrix%local_data_sp
599 desca(:) = matrix%matrix_struct%descriptor(:)
600
601 DO i = 1, nrow_global
602 CALL infog2l(i, i, desca, nprow, npcol, myprow, mypcol, &
603 irow_local, icol_local, iprow, ipcol)
604 IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
605 IF (matrix%use_sp) THEN
606 diag(i) = real(a_sp(irow_local, icol_local), dp)
607 ELSE
608 diag(i) = a(irow_local, icol_local)
609 END IF
610 END IF
611 END DO
612#else
613 DO i = 1, nrow_global
614 IF (matrix%use_sp) THEN
615 diag(i) = real(matrix%local_data_sp(i, i), dp)
616 ELSE
617 diag(i) = matrix%local_data(i, i)
618 END IF
619 END DO
620#endif
621 CALL matrix%matrix_struct%para_env%sum(diag)
622
623 END SUBROUTINE cp_fm_get_diag
624
625! **************************************************************************************************
626!> \brief returns an element of a fm
627!> this value is valid on every cpu
628!> using this call is expensive
629!> \param matrix the matrix to read
630!> \param irow_global the row
631!> \param icol_global the col
632!> \param alpha the value of matrix(irow_global, icol_global)
633!> \param local true if the element is on this cpu, false otherwise
634!> \note
635!> - modified semantics. now this function always returns the value
636!> previously the value was zero on cpus that didn't own the relevant
637!> part of the matrix (Joost VandeVondele, May 2003)
638!> - usage of the function should be avoided, as it is likely to rather slow
639!> using row_indices/col_indices/local_data + some smart scheme normally
640!> yields a real parallel code
641! **************************************************************************************************
642 SUBROUTINE cp_fm_get_element(matrix, irow_global, icol_global, alpha, local)
643
644 ! arguments
645 TYPE(cp_fm_type), INTENT(IN) :: matrix
646 REAL(kind=dp), INTENT(OUT) :: alpha
647 INTEGER, INTENT(IN) :: icol_global, &
648 irow_global
649 LOGICAL, INTENT(OUT), OPTIONAL :: local
650
651 ! locals
652#if defined(__SCALAPACK)
653 INTEGER, DIMENSION(9) :: desca
654 TYPE(cp_blacs_env_type), POINTER :: context
655 INTEGER :: icol_local, ipcol, iprow, irow_local, mypcol, myprow, npcol, &
656 nprow
657 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
658#endif
659
660#if defined(__SCALAPACK)
661 context => matrix%matrix_struct%context
662 myprow = context%mepos(1)
663 mypcol = context%mepos(2)
664 nprow = context%num_pe(1)
665 npcol = context%num_pe(2)
666
667 a => matrix%local_data
668 desca(:) = matrix%matrix_struct%descriptor(:)
669
670 CALL infog2l(irow_global, icol_global, desca, nprow, npcol, myprow, mypcol, &
671 irow_local, icol_local, iprow, ipcol)
672
673 IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
674 alpha = a(irow_local, icol_local)
675 CALL context%dgebs2d('All', ' ', 1, 1, alpha, 1)
676 IF (PRESENT(local)) local = .true.
677 ELSE
678 CALL context%dgebr2d('All', ' ', 1, 1, alpha, 1, iprow, ipcol)
679 IF (PRESENT(local)) local = .false.
680 END IF
681
682#else
683 IF (PRESENT(local)) local = .true.
684 alpha = matrix%local_data(irow_global, icol_global)
685#endif
686
687 END SUBROUTINE cp_fm_get_element
688
689! **************************************************************************************************
690!> \brief sets an element of a matrix
691!> \param matrix ...
692!> \param irow_global ...
693!> \param icol_global ...
694!> \param alpha ...
695!> \note
696!> we expect all cpus to have the same arguments in the call to this function
697!> (otherwise one should use local_data tricks)
698! **************************************************************************************************
699 SUBROUTINE cp_fm_set_element(matrix, irow_global, icol_global, alpha)
700 TYPE(cp_fm_type), INTENT(IN) :: matrix
701 INTEGER, INTENT(IN) :: irow_global, icol_global
702 REAL(kind=dp), INTENT(IN) :: alpha
703
704 INTEGER :: mypcol, myprow, npcol, nprow
705 TYPE(cp_blacs_env_type), POINTER :: context
706#if defined(__SCALAPACK)
707 INTEGER :: icol_local, ipcol, iprow, &
708 irow_local
709 INTEGER, DIMENSION(9) :: desca
710 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
711#endif
712
713 context => matrix%matrix_struct%context
714 myprow = context%mepos(1)
715 mypcol = context%mepos(2)
716 nprow = context%num_pe(1)
717 npcol = context%num_pe(2)
718
719 cpassert(.NOT. matrix%use_sp)
720
721#if defined(__SCALAPACK)
722
723 a => matrix%local_data
724
725 desca(:) = matrix%matrix_struct%descriptor(:)
726
727 CALL infog2l(irow_global, icol_global, desca, nprow, npcol, myprow, mypcol, &
728 irow_local, icol_local, iprow, ipcol)
729
730 IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
731 a(irow_local, icol_local) = alpha
732 END IF
733
734#else
735
736 matrix%local_data(irow_global, icol_global) = alpha
737
738#endif
739 END SUBROUTINE cp_fm_set_element
740
741! **************************************************************************************************
742!> \brief sets a submatrix of a full matrix
743!> fm(start_row:start_row+n_rows,start_col:start_col+n_cols)
744!> = alpha*op(new_values)(1:n_rows,1:n_cols)+ beta
745!> * fm(start_row:start_row+n_rows,start_col:start_col+n_cols)
746!> \param fm the full to change
747!> \param new_values a replicated full matrix with the new values
748!> \param start_row the starting row of b_matrix (defaults to 1)
749!> \param start_col the starting col of b_matrix (defaults to 1)
750!> \param n_rows the number of row to change in b (defaults to
751!> size(op(new_values),1))
752!> \param n_cols the number of columns to change in b (defaults to
753!> size(op(new_values),2))
754!> \param alpha rescaling factor for the new values (defaults to 1.0)
755!> \param beta rescaling factor for the old values (defaults to 0.0)
756!> \param transpose if new_values should be transposed: if true
757!> op(new_values)=new_values^T, else op(new_values)=new_values
758!> (defaults to false)
759!> \par History
760!> 07.2002 created borrowing from Joost's blacs_replicated_copy [fawzi]
761!> \author Fawzi Mohamed
762!> \note
763!> optimized for full column updates and alpha=1.0, beta=0.0
764!> the new_values need to be valid on all cpus
765! **************************************************************************************************
766 SUBROUTINE cp_fm_set_submatrix(fm, new_values, start_row, &
767 start_col, n_rows, n_cols, alpha, beta, transpose)
768 TYPE(cp_fm_type), INTENT(IN) :: fm
769 REAL(kind=dp), DIMENSION(:, :), INTENT(in) :: new_values
770 INTEGER, INTENT(in), OPTIONAL :: start_row, start_col, n_rows, n_cols
771 REAL(kind=dp), INTENT(in), OPTIONAL :: alpha, beta
772 LOGICAL, INTENT(in), OPTIONAL :: transpose
773
774 INTEGER :: i, i0, j, j0, ncol, ncol_block, &
775 ncol_global, ncol_local, nrow, &
776 nrow_block, nrow_global, nrow_local, &
777 this_col, this_row
778 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
779 LOGICAL :: tr_a
780 REAL(kind=dp) :: al, be
781 REAL(kind=dp), DIMENSION(:, :), POINTER :: full_block
782
783 al = 1.0_dp; be = 0.0_dp; i0 = 1; j0 = 1; tr_a = .false.
784 ! can be called too many times, making it a bit useless
785 ! CALL timeset(routineN//','//moduleN,handle)
786
787 cpassert(.NOT. fm%use_sp)
788
789 IF (PRESENT(alpha)) al = alpha
790 IF (PRESENT(beta)) be = beta
791 IF (PRESENT(start_row)) i0 = start_row
792 IF (PRESENT(start_col)) j0 = start_col
793 IF (PRESENT(transpose)) tr_a = transpose
794 IF (tr_a) THEN
795 nrow = SIZE(new_values, 2)
796 ncol = SIZE(new_values, 1)
797 ELSE
798 nrow = SIZE(new_values, 1)
799 ncol = SIZE(new_values, 2)
800 END IF
801 IF (PRESENT(n_rows)) nrow = n_rows
802 IF (PRESENT(n_cols)) ncol = n_cols
803
804 full_block => fm%local_data
805
806 CALL cp_fm_get_info(matrix=fm, &
807 nrow_global=nrow_global, ncol_global=ncol_global, &
808 nrow_block=nrow_block, ncol_block=ncol_block, &
809 nrow_local=nrow_local, ncol_local=ncol_local, &
810 row_indices=row_indices, col_indices=col_indices)
811
812 IF (al == 1.0 .AND. be == 0.0) THEN
813 DO j = 1, ncol_local
814 this_col = col_indices(j) - j0 + 1
815 IF (this_col .GE. 1 .AND. this_col .LE. ncol) THEN
816 IF (tr_a) THEN
817 IF (i0 == 1 .AND. nrow_global == nrow) THEN
818 DO i = 1, nrow_local
819 full_block(i, j) = new_values(this_col, row_indices(i))
820 END DO
821 ELSE
822 DO i = 1, nrow_local
823 this_row = row_indices(i) - i0 + 1
824 IF (this_row >= 1 .AND. this_row <= nrow) THEN
825 full_block(i, j) = new_values(this_col, this_row)
826 END IF
827 END DO
828 END IF
829 ELSE
830 IF (i0 == 1 .AND. nrow_global == nrow) THEN
831 DO i = 1, nrow_local
832 full_block(i, j) = new_values(row_indices(i), this_col)
833 END DO
834 ELSE
835 DO i = 1, nrow_local
836 this_row = row_indices(i) - i0 + 1
837 IF (this_row >= 1 .AND. this_row <= nrow) THEN
838 full_block(i, j) = new_values(this_row, this_col)
839 END IF
840 END DO
841 END IF
842 END IF
843 END IF
844 END DO
845 ELSE
846 DO j = 1, ncol_local
847 this_col = col_indices(j) - j0 + 1
848 IF (this_col .GE. 1 .AND. this_col .LE. ncol) THEN
849 IF (tr_a) THEN
850 DO i = 1, nrow_local
851 this_row = row_indices(i) - i0 + 1
852 IF (this_row >= 1 .AND. this_row <= nrow) THEN
853 full_block(i, j) = al*new_values(this_col, this_row) + &
854 be*full_block(i, j)
855 END IF
856 END DO
857 ELSE
858 DO i = 1, nrow_local
859 this_row = row_indices(i) - i0 + 1
860 IF (this_row >= 1 .AND. this_row <= nrow) THEN
861 full_block(i, j) = al*new_values(this_row, this_col) + &
862 be*full_block(i, j)
863 END IF
864 END DO
865 END IF
866 END IF
867 END DO
868 END IF
869
870 ! CALL timestop(handle)
871
872 END SUBROUTINE cp_fm_set_submatrix
873
874! **************************************************************************************************
875!> \brief gets a submatrix of a full matrix
876!> op(target_m)(1:n_rows,1:n_cols)
877!> =fm(start_row:start_row+n_rows,start_col:start_col+n_cols)
878!> target_m is replicated on all cpus
879!> using this call is expensive
880!> \param fm the full you want to get the info from
881!> \param target_m a replicated full matrix that will contain the result
882!> \param start_row the starting row of b_matrix (defaults to 1)
883!> \param start_col the starting col of b_matrix (defaults to 1)
884!> \param n_rows the number of row to change in b (defaults to
885!> size(op(new_values),1))
886!> \param n_cols the number of columns to change in b (defaults to
887!> size(op(new_values),2))
888!> \param transpose if target_m should be transposed: if true
889!> op(target_m)=target_m^T, else op(target_m)=target_m
890!> (defaults to false)
891!> \par History
892!> 07.2002 created borrowing from Joost's blacs_replicated_copy [fawzi]
893!> \author Fawzi Mohamed
894!> \note
895!> optimized for full column updates. Zeros out a little too much
896!> of target_m
897!> the target_m is replicated and valid on all cpus
898! **************************************************************************************************
899 SUBROUTINE cp_fm_get_submatrix(fm, target_m, start_row, &
900 start_col, n_rows, n_cols, transpose)
901 TYPE(cp_fm_type), INTENT(IN) :: fm
902 REAL(kind=dp), DIMENSION(:, :), INTENT(out) :: target_m
903 INTEGER, INTENT(in), OPTIONAL :: start_row, start_col, n_rows, n_cols
904 LOGICAL, INTENT(in), OPTIONAL :: transpose
905
906 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_get_submatrix'
907
908 INTEGER :: handle, i, i0, j, j0, ncol, ncol_global, &
909 ncol_local, nrow, nrow_global, &
910 nrow_local, this_col, this_row
911 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
912 LOGICAL :: tr_a
913 REAL(kind=dp), DIMENSION(:, :), POINTER :: full_block
914 TYPE(mp_para_env_type), POINTER :: para_env
915
916 CALL timeset(routinen, handle)
917
918 i0 = 1; j0 = 1; tr_a = .false.
919
920 cpassert(.NOT. fm%use_sp)
921
922 IF (PRESENT(start_row)) i0 = start_row
923 IF (PRESENT(start_col)) j0 = start_col
924 IF (PRESENT(transpose)) tr_a = transpose
925 IF (tr_a) THEN
926 nrow = SIZE(target_m, 2)
927 ncol = SIZE(target_m, 1)
928 ELSE
929 nrow = SIZE(target_m, 1)
930 ncol = SIZE(target_m, 2)
931 END IF
932 IF (PRESENT(n_rows)) nrow = n_rows
933 IF (PRESENT(n_cols)) ncol = n_cols
934
935 para_env => fm%matrix_struct%para_env
936
937 full_block => fm%local_data
938#if defined(__SCALAPACK)
939 ! zero-out whole target_m
940 IF (SIZE(target_m, 1)*SIZE(target_m, 2) /= 0) THEN
941 CALL dcopy(SIZE(target_m, 1)*SIZE(target_m, 2), [0.0_dp], 0, target_m, 1)
942 END IF
943#endif
944
945 CALL cp_fm_get_info(matrix=fm, &
946 nrow_global=nrow_global, ncol_global=ncol_global, &
947 nrow_local=nrow_local, ncol_local=ncol_local, &
948 row_indices=row_indices, col_indices=col_indices)
949
950 DO j = 1, ncol_local
951 this_col = col_indices(j) - j0 + 1
952 IF (this_col .GE. 1 .AND. this_col .LE. ncol) THEN
953 IF (tr_a) THEN
954 IF (i0 == 1 .AND. nrow_global == nrow) THEN
955 DO i = 1, nrow_local
956 target_m(this_col, row_indices(i)) = full_block(i, j)
957 END DO
958 ELSE
959 DO i = 1, nrow_local
960 this_row = row_indices(i) - i0 + 1
961 IF (this_row >= 1 .AND. this_row <= nrow) THEN
962 target_m(this_col, this_row) = full_block(i, j)
963 END IF
964 END DO
965 END IF
966 ELSE
967 IF (i0 == 1 .AND. nrow_global == nrow) THEN
968 DO i = 1, nrow_local
969 target_m(row_indices(i), this_col) = full_block(i, j)
970 END DO
971 ELSE
972 DO i = 1, nrow_local
973 this_row = row_indices(i) - i0 + 1
974 IF (this_row >= 1 .AND. this_row <= nrow) THEN
975 target_m(this_row, this_col) = full_block(i, j)
976 END IF
977 END DO
978 END IF
979 END IF
980 END IF
981 END DO
982
983 CALL para_env%sum(target_m)
984
985 CALL timestop(handle)
986
987 END SUBROUTINE cp_fm_get_submatrix
988
989! **************************************************************************************************
990!> \brief returns all kind of information about the full matrix
991!> \param matrix ...
992!> \param name ...
993!> \param nrow_global ...
994!> \param ncol_global ...
995!> \param nrow_block ...
996!> \param ncol_block ...
997!> \param nrow_local ...
998!> \param ncol_local ...
999!> \param row_indices ...
1000!> \param col_indices ...
1001!> \param local_data ...
1002!> \param context ...
1003!> \param nrow_locals ...
1004!> \param ncol_locals ...
1005!> \param matrix_struct ...
1006!> \param para_env ...
1007!> \note
1008!> see also cp_fm_struct for explanation
1009!> - nrow_local, ncol_local, row_indices, col_indices, local_data are hooks for efficient
1010!> access to the local blacs block
1011! **************************************************************************************************
1012 SUBROUTINE cp_fm_get_info(matrix, name, nrow_global, ncol_global, &
1013 nrow_block, ncol_block, nrow_local, ncol_local, &
1014 row_indices, col_indices, local_data, context, &
1015 nrow_locals, ncol_locals, matrix_struct, para_env)
1016
1017 TYPE(cp_fm_type), INTENT(IN) :: matrix
1018 CHARACTER(LEN=*), INTENT(OUT), OPTIONAL :: name
1019 INTEGER, INTENT(OUT), OPTIONAL :: nrow_global, ncol_global, nrow_block, &
1020 ncol_block, nrow_local, ncol_local
1021 INTEGER, DIMENSION(:), OPTIONAL, POINTER :: row_indices, col_indices
1022 REAL(kind=dp), CONTIGUOUS, DIMENSION(:, :), &
1023 OPTIONAL, POINTER :: local_data
1024 TYPE(cp_blacs_env_type), OPTIONAL, POINTER :: context
1025 INTEGER, DIMENSION(:), OPTIONAL, POINTER :: nrow_locals, ncol_locals
1026 TYPE(cp_fm_struct_type), OPTIONAL, POINTER :: matrix_struct
1027 TYPE(mp_para_env_type), OPTIONAL, POINTER :: para_env
1028
1029 IF (PRESENT(name)) name = matrix%name
1030 IF (PRESENT(matrix_struct)) matrix_struct => matrix%matrix_struct
1031 IF (PRESENT(local_data)) local_data => matrix%local_data ! not hiding things anymore :-(
1032
1033 CALL cp_fm_struct_get(matrix%matrix_struct, nrow_local=nrow_local, &
1034 ncol_local=ncol_local, nrow_global=nrow_global, &
1035 ncol_global=ncol_global, nrow_block=nrow_block, &
1036 ncol_block=ncol_block, row_indices=row_indices, &
1037 col_indices=col_indices, nrow_locals=nrow_locals, &
1038 ncol_locals=ncol_locals, context=context, para_env=para_env)
1039
1040 END SUBROUTINE cp_fm_get_info
1041
1042! **************************************************************************************************
1043!> \brief Write nicely formatted info about the FM to the given I/O unit (including the underlying FM struct)
1044!> \param matrix a cp_fm_type instance
1045!> \param io_unit the I/O unit to use for writing
1046! **************************************************************************************************
1047 SUBROUTINE cp_fm_write_info(matrix, io_unit)
1048 TYPE(cp_fm_type), INTENT(IN) :: matrix
1049 INTEGER, INTENT(IN) :: io_unit
1050
1051 WRITE (io_unit, '(/,A,A12)') "CP_FM | Name: ", matrix%name
1052 CALL cp_fm_struct_write_info(matrix%matrix_struct, io_unit)
1053 END SUBROUTINE cp_fm_write_info
1054
1055! **************************************************************************************************
1056!> \brief find the maximum absolute value of the matrix element
1057!> maxval(abs(matrix))
1058!> \param matrix ...
1059!> \param a_max ...
1060!> \param ir_max ...
1061!> \param ic_max ...
1062! **************************************************************************************************
1063 SUBROUTINE cp_fm_maxabsval(matrix, a_max, ir_max, ic_max)
1064 TYPE(cp_fm_type), INTENT(IN) :: matrix
1065 REAL(kind=dp), INTENT(OUT) :: a_max
1066 INTEGER, INTENT(OUT), OPTIONAL :: ir_max, ic_max
1067
1068 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_maxabsval'
1069
1070 INTEGER :: handle, i, ic_max_local, ir_max_local, &
1071 j, mepos, ncol_local, nrow_local, &
1072 num_pe
1073 INTEGER, ALLOCATABLE, DIMENSION(:) :: ic_max_vec, ir_max_vec
1074 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1075 REAL(dp) :: my_max
1076 REAL(dp), ALLOCATABLE, DIMENSION(:) :: a_max_vec
1077 REAL(kind=dp), DIMENSION(:, :), POINTER :: my_block
1078 REAL(kind=sp), DIMENSION(:, :), POINTER :: my_block_sp
1079
1080 CALL timeset(routinen, handle)
1081
1082 my_block => matrix%local_data
1083 my_block_sp => matrix%local_data_sp
1084
1085 CALL cp_fm_get_info(matrix, nrow_local=nrow_local, ncol_local=ncol_local, &
1086 row_indices=row_indices, col_indices=col_indices)
1087
1088 IF (matrix%use_sp) THEN
1089 a_max = real(maxval(abs(my_block_sp(1:nrow_local, 1:ncol_local))), dp)
1090 ELSE
1091 a_max = maxval(abs(my_block(1:nrow_local, 1:ncol_local)))
1092 END IF
1093
1094 IF (PRESENT(ir_max)) THEN
1095 num_pe = matrix%matrix_struct%para_env%num_pe
1096 mepos = matrix%matrix_struct%para_env%mepos
1097 ALLOCATE (ir_max_vec(0:num_pe - 1))
1098 ir_max_vec(0:num_pe - 1) = 0
1099 ALLOCATE (ic_max_vec(0:num_pe - 1))
1100 ic_max_vec(0:num_pe - 1) = 0
1101 ALLOCATE (a_max_vec(0:num_pe - 1))
1102 a_max_vec(0:num_pe - 1) = 0.0_dp
1103 my_max = 0.0_dp
1104
1105 IF ((ncol_local > 0) .AND. (nrow_local > 0)) THEN
1106 DO i = 1, ncol_local
1107 DO j = 1, nrow_local
1108 IF (matrix%use_sp) THEN
1109 IF (abs(my_block_sp(j, i)) > my_max) THEN
1110 my_max = real(my_block_sp(j, i), dp)
1111 ir_max_local = j
1112 ic_max_local = i
1113 END IF
1114 ELSE
1115 IF (abs(my_block(j, i)) > my_max) THEN
1116 my_max = my_block(j, i)
1117 ir_max_local = j
1118 ic_max_local = i
1119 END IF
1120 END IF
1121 END DO
1122 END DO
1123
1124 a_max_vec(mepos) = my_max
1125 ir_max_vec(mepos) = row_indices(ir_max_local)
1126 ic_max_vec(mepos) = col_indices(ic_max_local)
1127
1128 END IF
1129
1130 CALL matrix%matrix_struct%para_env%sum(a_max_vec)
1131 CALL matrix%matrix_struct%para_env%sum(ir_max_vec)
1132 CALL matrix%matrix_struct%para_env%sum(ic_max_vec)
1133
1134 my_max = 0.0_dp
1135 DO i = 0, num_pe - 1
1136 IF (a_max_vec(i) > my_max) THEN
1137 ir_max = ir_max_vec(i)
1138 ic_max = ic_max_vec(i)
1139 END IF
1140 END DO
1141
1142 DEALLOCATE (ir_max_vec, ic_max_vec, a_max_vec)
1143 cpassert(ic_max > 0)
1144 cpassert(ir_max > 0)
1145
1146 END IF
1147
1148 CALL matrix%matrix_struct%para_env%max(a_max)
1149
1150 CALL timestop(handle)
1151
1152 END SUBROUTINE cp_fm_maxabsval
1153
1154! **************************************************************************************************
1155!> \brief find the maximum over the rows of the sum of the absolute values of the elements of a given row
1156!> = || A ||_infinity
1157!> \param matrix ...
1158!> \param a_max ...
1159!> \note
1160!> for a real symmetric matrix it holds that || A ||_2 = |lambda_max| < || A ||_infinity
1161!> Hence this can be used to estimate an upper bound for the eigenvalues of a matrix
1162!> http://mathworld.wolfram.com/MatrixNorm.html
1163!> (but the bound is not so tight in the general case)
1164! **************************************************************************************************
1165 SUBROUTINE cp_fm_maxabsrownorm(matrix, a_max)
1166 TYPE(cp_fm_type), INTENT(IN) :: matrix
1167 REAL(kind=dp), INTENT(OUT) :: a_max
1168
1169 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_maxabsrownorm'
1170
1171 INTEGER :: handle, i, j, ncol_local, nrow_global, &
1172 nrow_local
1173 INTEGER, DIMENSION(:), POINTER :: row_indices
1174 REAL(kind=dp), ALLOCATABLE, DIMENSION(:) :: values
1175 REAL(kind=dp), DIMENSION(:, :), POINTER :: my_block
1176
1177 CALL timeset(routinen, handle)
1178
1179 my_block => matrix%local_data
1180
1181 cpassert(.NOT. matrix%use_sp)
1182
1183 CALL cp_fm_get_info(matrix, row_indices=row_indices, nrow_global=nrow_global, &
1184 nrow_local=nrow_local, ncol_local=ncol_local)
1185
1186 ! the efficiency could be improved by making use of the row-col distribution of scalapack
1187 ALLOCATE (values(nrow_global))
1188 values = 0.0_dp
1189 DO j = 1, ncol_local
1190 DO i = 1, nrow_local
1191 values(row_indices(i)) = values(row_indices(i)) + abs(my_block(i, j))
1192 END DO
1193 END DO
1194 CALL matrix%matrix_struct%para_env%sum(values)
1195 a_max = maxval(values)
1196 DEALLOCATE (values)
1197
1198 CALL timestop(handle)
1199 END SUBROUTINE
1200
1201! **************************************************************************************************
1202!> \brief find the inorm of each column norm_{j}= sqrt( \sum_{i} A_{ij}*A_{ij} )
1203!> \param matrix ...
1204!> \param norm_array ...
1205! **************************************************************************************************
1206 SUBROUTINE cp_fm_vectorsnorm(matrix, norm_array)
1207 TYPE(cp_fm_type), INTENT(IN) :: matrix
1208 REAL(kind=dp), DIMENSION(:), INTENT(OUT) :: norm_array
1209
1210 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_vectorsnorm'
1211
1212 INTEGER :: handle, i, j, ncol_global, ncol_local, &
1213 nrow_local
1214 INTEGER, DIMENSION(:), POINTER :: col_indices
1215 REAL(kind=dp), DIMENSION(:, :), POINTER :: my_block
1216
1217 CALL timeset(routinen, handle)
1218
1219 my_block => matrix%local_data
1220
1221 cpassert(.NOT. matrix%use_sp)
1222
1223 CALL cp_fm_get_info(matrix, col_indices=col_indices, ncol_global=ncol_global, &
1224 nrow_local=nrow_local, ncol_local=ncol_local)
1225
1226 ! the efficiency could be improved by making use of the row-col distribution of scalapack
1227 norm_array = 0.0_dp
1228 DO j = 1, ncol_local
1229 DO i = 1, nrow_local
1230 norm_array(col_indices(j)) = norm_array(col_indices(j)) + my_block(i, j)*my_block(i, j)
1231 END DO
1232 END DO
1233 CALL matrix%matrix_struct%para_env%sum(norm_array)
1234 norm_array = sqrt(norm_array)
1235
1236 CALL timestop(handle)
1237 END SUBROUTINE
1238
1239! **************************************************************************************************
1240!> \brief summing up all the elements along the matrix's i-th index
1241!> \f$ \mathrm{sum}_{j} = \sum_{i} A_{ij} \f$
1242!> or
1243!> \f$ \mathrm{sum}_{i} = \sum_{j} A_{ij} \f$
1244!> \param matrix an input matrix A
1245!> \param sum_array sums of elements in each column/row
1246!> \param dir ...
1247!> \note forked from cp_fm_vectorsnorm() to be used with
1248!> the maximum overlap method
1249!> added row variation
1250! **************************************************************************************************
1251 SUBROUTINE cp_fm_vectorssum(matrix, sum_array, dir)
1252 TYPE(cp_fm_type), INTENT(IN) :: matrix
1253 REAL(kind=dp), DIMENSION(:), INTENT(OUT) :: sum_array
1254 CHARACTER(LEN=1), INTENT(IN), OPTIONAL :: dir
1255
1256 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_vectorssum'
1257
1258 INTEGER :: handle, i, j, ncol_local, nrow_local
1259 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1260 LOGICAL :: docol
1261 REAL(kind=dp), DIMENSION(:, :), POINTER :: my_block
1262
1263 CALL timeset(routinen, handle)
1264
1265 IF (PRESENT(dir)) THEN
1266 IF (dir == 'c' .OR. dir == 'C') THEN
1267 docol = .true.
1268 ELSEIF (dir == 'r' .OR. dir == 'R') THEN
1269 docol = .false.
1270 ELSE
1271 cpabort('Wrong argument DIR')
1272 END IF
1273 ELSE
1274 docol = .true.
1275 END IF
1276
1277 my_block => matrix%local_data
1278
1279 cpassert(.NOT. matrix%use_sp)
1280
1281 CALL cp_fm_get_info(matrix, col_indices=col_indices, row_indices=row_indices, &
1282 nrow_local=nrow_local, ncol_local=ncol_local)
1283
1284 ! the efficiency could be improved by making use of the row-col distribution of scalapack
1285 sum_array(:) = 0.0_dp
1286 IF (docol) THEN
1287 DO j = 1, ncol_local
1288 DO i = 1, nrow_local
1289 sum_array(col_indices(j)) = sum_array(col_indices(j)) + my_block(i, j)
1290 END DO
1291 END DO
1292 ELSE
1293 DO j = 1, ncol_local
1294 DO i = 1, nrow_local
1295 sum_array(row_indices(i)) = sum_array(row_indices(i)) + my_block(i, j)
1296 END DO
1297 END DO
1298 END IF
1299 CALL matrix%matrix_struct%para_env%sum(sum_array)
1300
1301 CALL timestop(handle)
1302 END SUBROUTINE
1303
1304! **************************************************************************************************
1305!> \brief copy one identically sized matrix in the other
1306!> \param source ...
1307!> \param destination ...
1308!> \note
1309!> see also cp_fm_to_fm_columns
1310! **************************************************************************************************
1311 SUBROUTINE cp_fm_to_fm_matrix(source, destination)
1312
1313 TYPE(cp_fm_type), INTENT(IN) :: source, destination
1314
1315 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_to_fm_matrix'
1316
1317 INTEGER :: handle, npcol, nprow
1318
1319 CALL timeset(routinen, handle)
1320
1321 nprow = source%matrix_struct%context%num_pe(1)
1322 npcol = source%matrix_struct%context%num_pe(2)
1323
1324 IF ((.NOT. cp2k_is_parallel) .OR. &
1325 cp_fm_struct_equivalent(source%matrix_struct, &
1326 destination%matrix_struct)) THEN
1327 IF (source%use_sp .AND. destination%use_sp) THEN
1328 IF (SIZE(source%local_data_sp, 1) /= SIZE(destination%local_data_sp, 1) .OR. &
1329 SIZE(source%local_data_sp, 2) /= SIZE(destination%local_data_sp, 2)) &
1330 CALL cp_abort(__location__, &
1331 "Cannot copy full matrix <"//trim(source%name)// &
1332 "> to full matrix <"//trim(destination%name)// &
1333 ">. The local_data blocks have different sizes.")
1334 CALL scopy(SIZE(source%local_data_sp, 1)*SIZE(source%local_data_sp, 2), &
1335 source%local_data_sp, 1, destination%local_data_sp, 1)
1336 ELSE IF (source%use_sp .AND. .NOT. destination%use_sp) THEN
1337 IF (SIZE(source%local_data_sp, 1) /= SIZE(destination%local_data, 1) .OR. &
1338 SIZE(source%local_data_sp, 2) /= SIZE(destination%local_data, 2)) &
1339 CALL cp_abort(__location__, &
1340 "Cannot copy full matrix <"//trim(source%name)// &
1341 "> to full matrix <"//trim(destination%name)// &
1342 ">. The local_data blocks have different sizes.")
1343 destination%local_data = real(source%local_data_sp, dp)
1344 ELSE IF (.NOT. source%use_sp .AND. destination%use_sp) THEN
1345 IF (SIZE(source%local_data, 1) /= SIZE(destination%local_data_sp, 1) .OR. &
1346 SIZE(source%local_data, 2) /= SIZE(destination%local_data_sp, 2)) &
1347 CALL cp_abort(__location__, &
1348 "Cannot copy full matrix <"//trim(source%name)// &
1349 "> to full matrix <"//trim(destination%name)// &
1350 ">. The local_data blocks have different sizes.")
1351 destination%local_data_sp = real(source%local_data, sp)
1352 ELSE
1353 IF (SIZE(source%local_data, 1) /= SIZE(destination%local_data, 1) .OR. &
1354 SIZE(source%local_data, 2) /= SIZE(destination%local_data, 2)) &
1355 CALL cp_abort(__location__, &
1356 "Cannot copy full matrix <"//trim(source%name)// &
1357 "> to full matrix <"//trim(destination%name)// &
1358 ">. The local_data blocks have different sizes.")
1359 CALL dcopy(SIZE(source%local_data, 1)*SIZE(source%local_data, 2), &
1360 source%local_data, 1, destination%local_data, 1)
1361 END IF
1362 ELSE
1363 cpabort("Data structures of source and target full matrix are not equivalent")
1364 END IF
1365
1366 CALL timestop(handle)
1367
1368 END SUBROUTINE cp_fm_to_fm_matrix
1369
1370! **************************************************************************************************
1371!> \brief copy just a subset of columns of a fm to a fm
1372!> \param msource ...
1373!> \param mtarget ...
1374!> \param ncol ...
1375!> \param source_start ...
1376!> \param target_start ...
1377! **************************************************************************************************
1378 SUBROUTINE cp_fm_to_fm_columns(msource, mtarget, ncol, source_start, &
1379 target_start)
1380
1381 TYPE(cp_fm_type), INTENT(IN) :: msource, mtarget
1382 INTEGER, INTENT(IN) :: ncol
1383 INTEGER, INTENT(IN), OPTIONAL :: source_start, target_start
1384
1385 CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_to_fm_columns'
1386
1387 INTEGER :: handle, n, ss, ts
1388 REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, b
1389#if defined(__SCALAPACK)
1390 INTEGER :: i
1391 INTEGER, DIMENSION(9) :: desca, descb
1392#endif
1393
1394 CALL timeset(routinen, handle)
1395
1396 ss = 1
1397 ts = 1
1398
1399 IF (PRESENT(source_start)) ss = source_start
1400 IF (PRESENT(target_start)) ts = target_start
1401
1402 n = msource%matrix_struct%nrow_global
1403
1404 a => msource%local_data
1405 b => mtarget%local_data
1406
1407#if defined(__SCALAPACK)
1408 desca(:) = msource%matrix_struct%descriptor(:)
1409 descb(:) = mtarget%matrix_struct%descriptor(:)
1410 DO i = 0, ncol - 1
1411 CALL pdcopy(n, a, 1, ss + i, desca, 1, b, 1, ts + i, descb, 1)
1412 END DO
1413#else
1414 CALL dcopy(ncol*n, a(:, ss), 1, b(:, ts), 1)
1415#endif
1416
1417 CALL timestop(handle)
1418
1419 END SUBROUTINE cp_fm_to_fm_columns
1420
1421! **************************************************************************************************
1422!> \brief copy just a triangular matrix
1423!> \param msource ...
1424!> \param mtarget ...
1425!> \param uplo ...
1426! **************************************************************************************************
1427 SUBROUTINE cp_fm_to_fm_triangular(msource, mtarget, uplo)
1428
1429 TYPE(cp_fm_type), INTENT(IN) :: msource, mtarget
1430 CHARACTER(LEN=*), INTENT(IN) :: uplo
1431
1432 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_to_fm_triangular'
1433
1434 INTEGER :: handle, ncol, nrow
1435 REAL(kind=dp), DIMENSION(:, :), POINTER :: a, b
1436#if defined(__SCALAPACK)
1437 INTEGER, DIMENSION(9) :: desca, descb
1438#endif
1439
1440 CALL timeset(routinen, handle)
1441
1442 nrow = msource%matrix_struct%nrow_global
1443 ncol = msource%matrix_struct%ncol_global
1444
1445 a => msource%local_data
1446 b => mtarget%local_data
1447
1448#if defined(__SCALAPACK)
1449 desca(:) = msource%matrix_struct%descriptor(:)
1450 descb(:) = mtarget%matrix_struct%descriptor(:)
1451 CALL pdlacpy(uplo, nrow, ncol, a(1, 1), 1, 1, desca, b(1, 1), 1, 1, descb)
1452#else
1453 CALL dlacpy(uplo, nrow, ncol, a(1, 1), nrow, b(1, 1), nrow)
1454#endif
1455
1456 CALL timestop(handle)
1457
1458 END SUBROUTINE cp_fm_to_fm_triangular
1459
1460! **************************************************************************************************
1461!> \brief copy just a part ot the matrix
1462!> \param msource ...
1463!> \param mtarget ...
1464!> \param nrow ...
1465!> \param ncol ...
1466!> \param s_firstrow ...
1467!> \param s_firstcol ...
1468!> \param t_firstrow ...
1469!> \param t_firstcol ...
1470! **************************************************************************************************
1471
1472 SUBROUTINE cp_fm_to_fm_submat(msource, mtarget, nrow, ncol, s_firstrow, s_firstcol, t_firstrow, t_firstcol)
1473
1474 TYPE(cp_fm_type), INTENT(IN) :: msource, mtarget
1475 INTEGER, INTENT(IN) :: nrow, ncol, s_firstrow, &
1476 s_firstcol, t_firstrow, &
1477 t_firstcol
1478
1479 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_to_fm_submat'
1480
1481 INTEGER :: handle, i, na, nb, ss, ts
1482 REAL(kind=dp), DIMENSION(:, :), POINTER :: a, b
1483#if defined(__SCALAPACK)
1484 INTEGER, DIMENSION(9) :: desca, descb
1485#endif
1486
1487 CALL timeset(routinen, handle)
1488
1489 a => msource%local_data
1490 b => mtarget%local_data
1491
1492 na = msource%matrix_struct%nrow_global
1493 nb = mtarget%matrix_struct%nrow_global
1494! nrow must be <= na and nb
1495 IF (nrow > na) &
1496 cpabort("cannot copy because nrow > number of rows of source matrix")
1497 IF (nrow > nb) &
1498 cpabort("cannot copy because nrow > number of rows of target matrix")
1499 na = msource%matrix_struct%ncol_global
1500 nb = mtarget%matrix_struct%ncol_global
1501! ncol must be <= na_col and nb_col
1502 IF (ncol > na) &
1503 cpabort("cannot copy because nrow > number of rows of source matrix")
1504 IF (ncol > nb) &
1505 cpabort("cannot copy because nrow > number of rows of target matrix")
1506
1507#if defined(__SCALAPACK)
1508 desca(:) = msource%matrix_struct%descriptor(:)
1509 descb(:) = mtarget%matrix_struct%descriptor(:)
1510 DO i = 0, ncol - 1
1511 ss = s_firstcol + i
1512 ts = t_firstcol + i
1513 CALL pdcopy(nrow, a, s_firstrow, ss, desca, 1, b, t_firstrow, ts, descb, 1)
1514 END DO
1515#else
1516 DO i = 0, ncol - 1
1517 ss = s_firstcol + i
1518 ts = t_firstcol + i
1519 CALL dcopy(nrow, a(s_firstrow:, ss), 1, b(t_firstrow:, ts), 1)
1520 END DO
1521#endif
1522
1523 CALL timestop(handle)
1524 END SUBROUTINE cp_fm_to_fm_submat
1525
1526! **************************************************************************************************
1527!> \brief General copy of a fm matrix to another fm matrix.
1528!> Uses non-blocking MPI rather than ScaLAPACK.
1529!>
1530!> \param source input fm matrix
1531!> \param destination output fm matrix
1532!> \param para_env parallel environment corresponding to the BLACS env that covers all parts
1533!> of the input and output matrices
1534!> \par History
1535!> 31-Jan-2017 : Re-implemented using non-blocking MPI [IainB, MarkT]
1536! **************************************************************************************************
1537 SUBROUTINE cp_fm_copy_general(source, destination, para_env)
1538 TYPE(cp_fm_type), INTENT(IN) :: source, destination
1539 TYPE(mp_para_env_type), INTENT(IN) :: para_env
1540
1541 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_copy_general'
1542
1543 INTEGER :: handle
1544 TYPE(copy_info_type) :: info
1545
1546 CALL timeset(routinen, handle)
1547
1548 CALL cp_fm_start_copy_general(source, destination, para_env, info)
1549 IF (ASSOCIATED(destination%matrix_struct)) THEN
1550 CALL cp_fm_finish_copy_general(destination, info)
1551 END IF
1552 IF (ASSOCIATED(source%matrix_struct)) THEN
1554 END IF
1555
1556 CALL timestop(handle)
1557 END SUBROUTINE cp_fm_copy_general
1558
1559! **************************************************************************************************
1560!> \brief Initiates the copy operation: get distribution data, post MPI isend and irecvs
1561!> \param source input fm matrix
1562!> \param destination output fm matrix
1563!> \param para_env parallel environment corresponding to the BLACS env that covers all parts
1564!> of the input and output matrices
1565!> \param info all of the data that will be needed to complete the copy operation
1566! **************************************************************************************************
1567 SUBROUTINE cp_fm_start_copy_general(source, destination, para_env, info)
1568 TYPE(cp_fm_type), INTENT(IN) :: source, destination
1569 TYPE(mp_para_env_type), INTENT(IN) :: para_env
1570 TYPE(copy_info_type), INTENT(OUT) :: info
1571
1572 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_start_copy_general'
1573
1574 INTEGER :: dest_p_i, dest_q_j, global_rank, global_size, handle, i, j, k, mpi_rank, &
1575 ncol_block_dest, ncol_block_src, ncol_local_recv, ncol_local_send, ncols, &
1576 nrow_block_dest, nrow_block_src, nrow_local_recv, nrow_local_send, nrows, p, q, &
1577 recv_rank, recv_size, send_rank, send_size
1578 INTEGER, ALLOCATABLE, DIMENSION(:) :: all_ranks, dest2global, dest_p, dest_q, &
1579 recv_count, send_count, send_disp, &
1580 source2global, src_p, src_q
1581 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: dest_blacs2mpi
1582 INTEGER, DIMENSION(2) :: dest_block, dest_block_tmp, dest_num_pe, &
1583 src_block, src_block_tmp, src_num_pe
1584 INTEGER, DIMENSION(:), POINTER :: recv_col_indices, recv_row_indices, &
1585 send_col_indices, send_row_indices
1586 TYPE(cp_fm_struct_type), POINTER :: recv_dist, send_dist
1587 TYPE(mp_request_type), DIMENSION(6) :: recv_req, send_req
1588
1589 CALL timeset(routinen, handle)
1590
1591 IF (.NOT. cp2k_is_parallel) THEN
1592 ! Just copy all of the matrix data into a 'send buffer', to be unpacked later
1593 nrow_local_send = SIZE(source%local_data, 1)
1594 ncol_local_send = SIZE(source%local_data, 2)
1595 ALLOCATE (info%send_buf(nrow_local_send*ncol_local_send))
1596 k = 0
1597 DO j = 1, ncol_local_send
1598 DO i = 1, nrow_local_send
1599 k = k + 1
1600 info%send_buf(k) = source%local_data(i, j)
1601 END DO
1602 END DO
1603 ELSE
1604 ! assumption of double precision data is carried forward from ScaLAPACK version
1605 IF (source%use_sp) cpabort("only DP kind implemented")
1606 IF (destination%use_sp) cpabort("only DP kind implemented")
1607
1608 NULLIFY (recv_dist, send_dist)
1609 NULLIFY (recv_col_indices, recv_row_indices, send_col_indices, send_row_indices)
1610
1611 ! The 'global' communicator contains both the source and destination decompositions
1612 global_size = para_env%num_pe
1613 global_rank = para_env%mepos
1614
1615 ! The source/send decomposition and destination/recv decompositions may only exist on
1616 ! on a subset of the processes involved in the communication
1617 ! Check if the source and/or destination arguments are .not. ASSOCIATED():
1618 ! if so, skip the send / recv parts (since these processes do not participate in the sending/receiving distribution)
1619 IF (ASSOCIATED(destination%matrix_struct)) THEN
1620 recv_dist => destination%matrix_struct
1621 recv_rank = recv_dist%para_env%mepos
1622 ELSE
1623 recv_rank = mp_proc_null
1624 END IF
1625
1626 IF (ASSOCIATED(source%matrix_struct)) THEN
1627 send_dist => source%matrix_struct
1628 send_rank = send_dist%para_env%mepos
1629 ELSE
1630 send_rank = mp_proc_null
1631 END IF
1632
1633 ! Map the rank in the source/dest communicator to the global rank
1634 ALLOCATE (all_ranks(0:global_size - 1))
1635
1636 CALL para_env%allgather(send_rank, all_ranks)
1637 IF (ASSOCIATED(recv_dist)) THEN
1638 ALLOCATE (source2global(0:count(all_ranks .NE. mp_proc_null) - 1))
1639 DO i = 0, global_size - 1
1640 IF (all_ranks(i) .NE. mp_proc_null) THEN
1641 source2global(all_ranks(i)) = i
1642 END IF
1643 END DO
1644 END IF
1645
1646 CALL para_env%allgather(recv_rank, all_ranks)
1647 IF (ASSOCIATED(send_dist)) THEN
1648 ALLOCATE (dest2global(0:count(all_ranks .NE. mp_proc_null) - 1))
1649 DO i = 0, global_size - 1
1650 IF (all_ranks(i) .NE. mp_proc_null) THEN
1651 dest2global(all_ranks(i)) = i
1652 END IF
1653 END DO
1654 END IF
1655 DEALLOCATE (all_ranks)
1656
1657 ! Some data from the two decompositions will be needed by all processes in the global group :
1658 ! process grid shape, block size, and the BLACS-to-MPI mapping
1659
1660 ! The global root process will receive the data (from the root process in each decomposition)
1661 send_req(:) = mp_request_null
1662 IF (global_rank == 0) THEN
1663 recv_req(:) = mp_request_null
1664 CALL para_env%irecv(src_block, mp_any_source, recv_req(1), tag=src_tag)
1665 CALL para_env%irecv(dest_block, mp_any_source, recv_req(2), tag=dest_tag)
1666 CALL para_env%irecv(src_num_pe, mp_any_source, recv_req(3), tag=src_tag)
1667 CALL para_env%irecv(dest_num_pe, mp_any_source, recv_req(4), tag=dest_tag)
1668 END IF
1669
1670 IF (ASSOCIATED(send_dist)) THEN
1671 IF ((send_rank .EQ. 0)) THEN
1672 ! need to use separate buffers here in case this is actually global rank 0
1673 src_block_tmp = (/send_dist%nrow_block, send_dist%ncol_block/)
1674 CALL para_env%isend(src_block_tmp, 0, send_req(1), tag=src_tag)
1675 CALL para_env%isend(send_dist%context%num_pe, 0, send_req(2), tag=src_tag)
1676 END IF
1677 END IF
1678
1679 IF (ASSOCIATED(recv_dist)) THEN
1680 IF ((recv_rank .EQ. 0)) THEN
1681 dest_block_tmp = (/recv_dist%nrow_block, recv_dist%ncol_block/)
1682 CALL para_env%isend(dest_block_tmp, 0, send_req(3), tag=dest_tag)
1683 CALL para_env%isend(recv_dist%context%num_pe, 0, send_req(4), tag=dest_tag)
1684 END IF
1685 END IF
1686
1687 IF (global_rank == 0) THEN
1688 CALL mp_waitall(recv_req(1:4))
1689 ! Now we know the process decomposition, we can allocate the arrays to hold the blacs2mpi mapping
1690 ALLOCATE (info%src_blacs2mpi(0:src_num_pe(1) - 1, 0:src_num_pe(2) - 1), &
1691 dest_blacs2mpi(0:dest_num_pe(1) - 1, 0:dest_num_pe(2) - 1) &
1692 )
1693 CALL para_env%irecv(info%src_blacs2mpi, mp_any_source, recv_req(5), tag=src_tag)
1694 CALL para_env%irecv(dest_blacs2mpi, mp_any_source, recv_req(6), tag=dest_tag)
1695 END IF
1696
1697 IF (ASSOCIATED(send_dist)) THEN
1698 IF ((send_rank .EQ. 0)) THEN
1699 CALL para_env%isend(send_dist%context%blacs2mpi(:, :), 0, send_req(5), tag=src_tag)
1700 END IF
1701 END IF
1702
1703 IF (ASSOCIATED(recv_dist)) THEN
1704 IF ((recv_rank .EQ. 0)) THEN
1705 CALL para_env%isend(recv_dist%context%blacs2mpi(:, :), 0, send_req(6), tag=dest_tag)
1706 END IF
1707 END IF
1708
1709 IF (global_rank == 0) THEN
1710 CALL mp_waitall(recv_req(5:6))
1711 END IF
1712
1713 ! Finally, broadcast the data to all processes in the global communicator
1714 CALL para_env%bcast(src_block, 0)
1715 CALL para_env%bcast(dest_block, 0)
1716 CALL para_env%bcast(src_num_pe, 0)
1717 CALL para_env%bcast(dest_num_pe, 0)
1718 info%src_num_pe(1:2) = src_num_pe(1:2)
1719 info%nblock_src(1:2) = src_block(1:2)
1720 IF (global_rank .NE. 0) THEN
1721 ALLOCATE (info%src_blacs2mpi(0:src_num_pe(1) - 1, 0:src_num_pe(2) - 1), &
1722 dest_blacs2mpi(0:dest_num_pe(1) - 1, 0:dest_num_pe(2) - 1) &
1723 )
1724 END IF
1725 CALL para_env%bcast(info%src_blacs2mpi, 0)
1726 CALL para_env%bcast(dest_blacs2mpi, 0)
1727
1728 recv_size = dest_num_pe(1)*dest_num_pe(2)
1729 send_size = src_num_pe(1)*src_num_pe(2)
1730 info%send_size = send_size
1731 CALL mp_waitall(send_req(:))
1732
1733 ! Setup is now complete, we can start the actual communication here.
1734 ! The order implemented here is:
1735 ! DEST_1
1736 ! compute recv sizes
1737 ! call irecv
1738 ! SRC_1
1739 ! compute send sizes
1740 ! pack send buffers
1741 ! call isend
1742 ! DEST_2
1743 ! wait for the recvs and unpack buffers (this part eventually will go into another routine to allow comms to run concurrently)
1744 ! SRC_2
1745 ! wait for the sends
1746
1747 ! DEST_1
1748 IF (ASSOCIATED(recv_dist)) THEN
1749 CALL cp_fm_struct_get(recv_dist, row_indices=recv_row_indices, &
1750 col_indices=recv_col_indices &
1751 )
1752 info%recv_col_indices => recv_col_indices
1753 info%recv_row_indices => recv_row_indices
1754 nrow_block_src = src_block(1)
1755 ncol_block_src = src_block(2)
1756 ALLOCATE (recv_count(0:send_size - 1), info%recv_disp(0:send_size - 1), info%recv_request(0:send_size - 1))
1757
1758 ! Determine the recv counts, allocate the receive buffers, call mpi_irecv for all the non-zero sized receives
1759 nrow_local_recv = recv_dist%nrow_locals(recv_dist%context%mepos(1))
1760 ncol_local_recv = recv_dist%ncol_locals(recv_dist%context%mepos(2))
1761 info%nlocal_recv(1) = nrow_local_recv
1762 info%nlocal_recv(2) = ncol_local_recv
1763 ! Initialise src_p, src_q arrays (sized using number of rows/cols in the receiving distribution)
1764 ALLOCATE (src_p(nrow_local_recv), src_q(ncol_local_recv))
1765 DO i = 1, nrow_local_recv
1766 ! For each local row we will receive, we look up its global row (in recv_row_indices),
1767 ! then work out which row block it comes from, and which process row that row block comes from.
1768 src_p(i) = mod(((recv_row_indices(i) - 1)/nrow_block_src), src_num_pe(1))
1769 END DO
1770 DO j = 1, ncol_local_recv
1771 ! Similarly for the columns
1772 src_q(j) = mod(((recv_col_indices(j) - 1)/ncol_block_src), src_num_pe(2))
1773 END DO
1774 ! src_p/q now contains the process row/column ID that will send data to that row/column
1775
1776 DO q = 0, src_num_pe(2) - 1
1777 ncols = count(src_q .EQ. q)
1778 DO p = 0, src_num_pe(1) - 1
1779 nrows = count(src_p .EQ. p)
1780 ! Use the send_dist here as we are looking up the processes where the data comes from
1781 recv_count(info%src_blacs2mpi(p, q)) = nrows*ncols
1782 END DO
1783 END DO
1784 DEALLOCATE (src_p, src_q)
1785
1786 ! Use one long buffer (and displacements into that buffer)
1787 ! this prevents the need for a rectangular array where not all elements will be populated
1788 ALLOCATE (info%recv_buf(sum(recv_count(:))))
1789 info%recv_disp(0) = 0
1790 DO i = 1, send_size - 1
1791 info%recv_disp(i) = info%recv_disp(i - 1) + recv_count(i - 1)
1792 END DO
1793
1794 ! Issue receive calls on ranks which expect data
1795 DO k = 0, send_size - 1
1796 IF (recv_count(k) .GT. 0) THEN
1797 CALL para_env%irecv(info%recv_buf(info%recv_disp(k) + 1:info%recv_disp(k) + recv_count(k)), &
1798 source2global(k), info%recv_request(k))
1799 END IF
1800 END DO
1801 DEALLOCATE (source2global)
1802 END IF ! ASSOCIATED(recv_dist)
1803
1804 ! SRC_1
1805 IF (ASSOCIATED(send_dist)) THEN
1806 CALL cp_fm_struct_get(send_dist, row_indices=send_row_indices, &
1807 col_indices=send_col_indices &
1808 )
1809 nrow_block_dest = dest_block(1)
1810 ncol_block_dest = dest_block(2)
1811 ALLOCATE (send_count(0:recv_size - 1), send_disp(0:recv_size - 1), info%send_request(0:recv_size - 1))
1812
1813 ! Determine the send counts, allocate the send buffers
1814 nrow_local_send = send_dist%nrow_locals(send_dist%context%mepos(1))
1815 ncol_local_send = send_dist%ncol_locals(send_dist%context%mepos(2))
1816
1817 ! Initialise dest_p, dest_q arrays (sized nrow_local, ncol_local)
1818 ! i.e. number of rows,cols in the sending distribution
1819 ALLOCATE (dest_p(nrow_local_send), dest_q(ncol_local_send))
1820
1821 DO i = 1, nrow_local_send
1822 ! Use the send_dist%row_indices() here (we are looping over the local rows we will send)
1823 dest_p(i) = mod(((send_row_indices(i) - 1)/nrow_block_dest), dest_num_pe(1))
1824 END DO
1825 DO j = 1, ncol_local_send
1826 dest_q(j) = mod(((send_col_indices(j) - 1)/ncol_block_dest), dest_num_pe(2))
1827 END DO
1828 ! dest_p/q now contain the process row/column ID that will receive data from that row/column
1829
1830 DO q = 0, dest_num_pe(2) - 1
1831 ncols = count(dest_q .EQ. q)
1832 DO p = 0, dest_num_pe(1) - 1
1833 nrows = count(dest_p .EQ. p)
1834 send_count(dest_blacs2mpi(p, q)) = nrows*ncols
1835 END DO
1836 END DO
1837 DEALLOCATE (dest_p, dest_q)
1838
1839 ! Allocate the send buffer using send_count -- and calculate the offset into the buffer for each process
1840 ALLOCATE (info%send_buf(sum(send_count(:))))
1841 send_disp(0) = 0
1842 DO k = 1, recv_size - 1
1843 send_disp(k) = send_disp(k - 1) + send_count(k - 1)
1844 END DO
1845
1846 ! Loop over the smat, pack the send buffers
1847 send_count(:) = 0
1848 DO j = 1, ncol_local_send
1849 ! Use send_col_indices and row_indices here, as we are looking up the global row/column number of local rows.
1850 dest_q_j = mod(((send_col_indices(j) - 1)/ncol_block_dest), dest_num_pe(2))
1851 DO i = 1, nrow_local_send
1852 dest_p_i = mod(((send_row_indices(i) - 1)/nrow_block_dest), dest_num_pe(1))
1853 mpi_rank = dest_blacs2mpi(dest_p_i, dest_q_j)
1854 send_count(mpi_rank) = send_count(mpi_rank) + 1
1855 info%send_buf(send_disp(mpi_rank) + send_count(mpi_rank)) = source%local_data(i, j)
1856 END DO
1857 END DO
1858
1859 ! For each non-zero send_count, call mpi_isend
1860 DO k = 0, recv_size - 1
1861 IF (send_count(k) .GT. 0) THEN
1862 CALL para_env%isend(info%send_buf(send_disp(k) + 1:send_disp(k) + send_count(k)), &
1863 dest2global(k), info%send_request(k))
1864 END IF
1865 END DO
1866 DEALLOCATE (send_count, send_disp, dest2global)
1867 END IF ! ASSOCIATED(send_dist)
1868 DEALLOCATE (dest_blacs2mpi)
1869
1870 END IF !IF (.NOT. cp2k_is_parallel)
1871
1872 CALL timestop(handle)
1873
1874 END SUBROUTINE cp_fm_start_copy_general
1875
1876! **************************************************************************************************
1877!> \brief Completes the copy operation: wait for comms, unpack, clean up MPI state
1878!> \param destination output fm matrix
1879!> \param info all of the data that will be needed to complete the copy operation
1880! **************************************************************************************************
1881 SUBROUTINE cp_fm_finish_copy_general(destination, info)
1882 TYPE(cp_fm_type), INTENT(IN) :: destination
1883 TYPE(copy_info_type), INTENT(INOUT) :: info
1884
1885 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_finish_copy_general'
1886
1887 INTEGER :: handle, i, j, k, mpi_rank, send_size, &
1888 src_p_i, src_q_j
1889 INTEGER, ALLOCATABLE, DIMENSION(:) :: recv_count
1890 INTEGER, DIMENSION(2) :: nblock_src, nlocal_recv, src_num_pe
1891 INTEGER, DIMENSION(:), POINTER :: recv_col_indices, recv_row_indices
1892
1893 CALL timeset(routinen, handle)
1894
1895 IF (.NOT. cp2k_is_parallel) THEN
1896 ! Now unpack the data from the 'send buffer'
1897 k = 0
1898 DO j = 1, SIZE(destination%local_data, 2)
1899 DO i = 1, SIZE(destination%local_data, 1)
1900 k = k + 1
1901 destination%local_data(i, j) = info%send_buf(k)
1902 END DO
1903 END DO
1904 DEALLOCATE (info%send_buf)
1905 ELSE
1906 ! Set up local variables ...
1907 send_size = info%send_size
1908 nlocal_recv(1:2) = info%nlocal_recv(:)
1909 nblock_src(1:2) = info%nblock_src(:)
1910 src_num_pe(1:2) = info%src_num_pe(:)
1911 recv_col_indices => info%recv_col_indices
1912 recv_row_indices => info%recv_row_indices
1913
1914 ! ... use the local variables to do the work
1915 ! DEST_2
1916 CALL mp_waitall(info%recv_request(:))
1917 ALLOCATE (recv_count(0:send_size - 1))
1918 ! Loop over the rmat, filling it in with data from the recv buffers
1919 ! (here the block sizes, num_pes refer to the distribution of the source matrix)
1920 recv_count(:) = 0
1921 DO j = 1, nlocal_recv(2)
1922 src_q_j = mod(((recv_col_indices(j) - 1)/nblock_src(2)), src_num_pe(2))
1923 DO i = 1, nlocal_recv(1)
1924 src_p_i = mod(((recv_row_indices(i) - 1)/nblock_src(1)), src_num_pe(1))
1925 mpi_rank = info%src_blacs2mpi(src_p_i, src_q_j)
1926 recv_count(mpi_rank) = recv_count(mpi_rank) + 1
1927 destination%local_data(i, j) = info%recv_buf(info%recv_disp(mpi_rank) + recv_count(mpi_rank))
1928 END DO
1929 END DO
1930 DEALLOCATE (recv_count, info%recv_disp, info%recv_request, info%recv_buf, info%src_blacs2mpi)
1931 ! Invalidate the stored state
1932 NULLIFY (info%recv_col_indices, &
1933 info%recv_row_indices)
1934
1935 END IF
1936
1937 CALL timestop(handle)
1938
1939 END SUBROUTINE cp_fm_finish_copy_general
1940
1941! **************************************************************************************************
1942!> \brief Completes the copy operation: wait for comms clean up MPI state
1943!> \param info all of the data that will be needed to complete the copy operation
1944! **************************************************************************************************
1946 TYPE(copy_info_type), INTENT(INOUT) :: info
1947
1948 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_cleanup_copy_general'
1949
1950 INTEGER :: handle
1951
1952 CALL timeset(routinen, handle)
1953
1954 IF (.NOT. cp2k_is_parallel) THEN
1955 ! Don't do anything - no MPI state for the serial case
1956 ELSE
1957 ! SRC_2
1958 ! If this process is also in the destination decomposition, this deallocate
1959 ! Was already done in cp_fm_finish_copy_general
1960 IF (ALLOCATED(info%src_blacs2mpi)) THEN
1961 DEALLOCATE (info%src_blacs2mpi)
1962 END IF
1963 CALL mp_waitall(info%send_request)
1964 DEALLOCATE (info%send_request, info%send_buf)
1965
1966 END IF
1967
1968 CALL timestop(handle)
1969
1970 END SUBROUTINE cp_fm_cleanup_copy_general
1971
1972! **************************************************************************************************
1973!> \brief General copy of a submatrix of fm matrix to a submatrix of another fm matrix.
1974!> The two matrices can have different contexts.
1975!>
1976!> Summary of distribution routines for dense matrices
1977!> The following will copy A(iA:iA+M-1,jA:jA+N-1) to B(iB:iB+M-1,jB:jB+N-1):
1978!>
1979!> call pdgemr2d(M,N,Aloc,iA,jA,descA,Bloc,iB,jB,descB,context)
1980!>
1981!> A process that is not a part of the context of A should set descA(2)
1982!> to -1, and similarly for B.
1983!>
1984!> \param source input fm matrix
1985!> \param destination output fm matrix
1986!> \param nrows number of rows of sub matrix to be copied
1987!> \param ncols number of cols of sub matrix to be copied
1988!> \param s_firstrow starting global row index of sub matrix in source
1989!> \param s_firstcol starting global col index of sub matrix in source
1990!> \param d_firstrow starting global row index of sub matrix in destination
1991!> \param d_firstcol starting global col index of sub matrix in destination
1992!> \param global_context process grid that covers all parts of either A or B.
1993! **************************************************************************************************
1994 SUBROUTINE cp_fm_to_fm_submat_general(source, &
1995 destination, &
1996 nrows, &
1997 ncols, &
1998 s_firstrow, &
1999 s_firstcol, &
2000 d_firstrow, &
2001 d_firstcol, &
2002 global_context)
2003
2004 TYPE(cp_fm_type), INTENT(IN) :: source, destination
2005 INTEGER, INTENT(IN) :: nrows, ncols, s_firstrow, s_firstcol, &
2006 d_firstrow, d_firstcol
2007
2008 CLASS(cp_blacs_type), INTENT(IN) :: global_context
2009
2010 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_to_fm_submat_general'
2011
2012 LOGICAL :: debug
2013 INTEGER :: handle
2014#if defined(__SCALAPACK)
2015 INTEGER, DIMENSION(9) :: desca, descb
2016 REAL(kind=dp), DIMENSION(1, 1), TARGET :: dummy
2017 REAL(kind=dp), DIMENSION(:, :), POINTER :: smat, dmat
2018#endif
2019
2020 CALL timeset(routinen, handle)
2021
2022 debug = debug_this_module
2023
2024 IF (.NOT. cp2k_is_parallel) THEN
2025 CALL cp_fm_to_fm_submat(source, &
2026 destination, &
2027 nrows, &
2028 ncols, &
2029 s_firstrow, &
2030 s_firstcol, &
2031 d_firstrow, &
2032 d_firstcol)
2033 ELSE
2034#ifdef __SCALAPACK
2035 NULLIFY (smat, dmat)
2036 ! check whether source is available on this process
2037 IF (ASSOCIATED(source%matrix_struct)) THEN
2038 desca = source%matrix_struct%descriptor
2039 IF (source%use_sp) cpabort("only DP kind implemented")
2040 IF (nrows .GT. source%matrix_struct%nrow_global) &
2041 cpabort("nrows is greater than nrow_global of source")
2042 IF (ncols .GT. source%matrix_struct%ncol_global) &
2043 cpabort("ncols is greater than ncol_global of source")
2044 smat => source%local_data
2045 ELSE
2046 desca = -1
2047 smat => dummy
2048 END IF
2049 ! check destination is available on this process
2050 IF (ASSOCIATED(destination%matrix_struct)) THEN
2051 descb = destination%matrix_struct%descriptor
2052 IF (destination%use_sp) cpabort("only DP kind implemented")
2053 IF (nrows .GT. destination%matrix_struct%nrow_global) &
2054 cpabort("nrows is greater than nrow_global of destination")
2055 IF (ncols .GT. destination%matrix_struct%ncol_global) &
2056 cpabort("ncols is greater than ncol_global of destination")
2057 dmat => destination%local_data
2058 ELSE
2059 descb = -1
2060 dmat => dummy
2061 END IF
2062 ! do copy
2063
2064 CALL pdgemr2d(nrows, &
2065 ncols, &
2066 smat, &
2067 s_firstrow, &
2068 s_firstcol, &
2069 desca, &
2070 dmat, &
2071 d_firstrow, &
2072 d_firstcol, &
2073 descb, &
2074 global_context%get_handle())
2075#else
2076 mark_used(global_context)
2077 cpabort("this subroutine only supports SCALAPACK")
2078#endif
2079 END IF
2080
2081 CALL timestop(handle)
2082
2083 END SUBROUTINE cp_fm_to_fm_submat_general
2084
2085! **************************************************************************************************
2086!> \brief ...
2087!> \param matrix ...
2088!> \param irow_global ...
2089!> \param icol_global ...
2090!> \param alpha ...
2091! **************************************************************************************************
2092 SUBROUTINE cp_fm_add_to_element(matrix, irow_global, icol_global, alpha)
2093
2094 ! Add alpha to the matrix element specified by the global indices
2095 ! irow_global and icol_global
2096
2097 ! - Creation (05.05.06,MK)
2098
2099 TYPE(cp_fm_type), INTENT(IN) :: matrix
2100 INTEGER, INTENT(IN) :: irow_global, icol_global
2101 REAL(kind=dp), INTENT(IN) :: alpha
2102
2103 INTEGER :: mypcol, myprow, npcol, nprow
2104 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
2105 TYPE(cp_blacs_env_type), POINTER :: context
2106#if defined(__SCALAPACK)
2107 INTEGER :: icol_local, ipcol, iprow, &
2108 irow_local
2109 INTEGER, DIMENSION(9) :: desca
2110#endif
2111
2112 context => matrix%matrix_struct%context
2113
2114 myprow = context%mepos(1)
2115 mypcol = context%mepos(2)
2116
2117 nprow = context%num_pe(1)
2118 npcol = context%num_pe(2)
2119
2120 a => matrix%local_data
2121
2122#if defined(__SCALAPACK)
2123
2124 desca(:) = matrix%matrix_struct%descriptor(:)
2125
2126 CALL infog2l(irow_global, icol_global, desca, nprow, npcol, myprow, mypcol, &
2127 irow_local, icol_local, iprow, ipcol)
2128
2129 IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
2130 a(irow_local, icol_local) = a(irow_local, icol_local) + alpha
2131 END IF
2132
2133#else
2134
2135 a(irow_global, icol_global) = a(irow_global, icol_global) + alpha
2136
2137#endif
2138
2139 END SUBROUTINE cp_fm_add_to_element
2140
2141! **************************************************************************************************
2142!> \brief ...
2143!> \param fm ...
2144!> \param unit ...
2145! **************************************************************************************************
2146 SUBROUTINE cp_fm_write_unformatted(fm, unit)
2147 TYPE(cp_fm_type), INTENT(IN) :: fm
2148 INTEGER, INTENT(IN) :: unit
2149
2150 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_write_unformatted'
2151
2152 INTEGER :: handle, j, max_block, &
2153 ncol_global, nrow_global
2154 TYPE(mp_para_env_type), POINTER :: para_env
2155#if defined(__SCALAPACK)
2156 INTEGER :: i, i_block, icol_local, &
2157 in, info, ipcol, &
2158 iprow, irow_local, &
2159 mepos, &
2160 num_pe, rb, tag
2161 INTEGER, DIMENSION(9) :: desc
2162 REAL(kind=dp), DIMENSION(:), POINTER :: vecbuf
2163 REAL(kind=dp), DIMENSION(:, :), POINTER :: newdat
2164 TYPE(cp_blacs_type) :: ictxt_loc
2165 INTEGER, EXTERNAL :: numroc
2166#endif
2167
2168 CALL timeset(routinen, handle)
2169 CALL cp_fm_get_info(fm, nrow_global=nrow_global, ncol_global=ncol_global, ncol_block=max_block, &
2170 para_env=para_env)
2171
2172#if defined(__SCALAPACK)
2173 num_pe = para_env%num_pe
2174 mepos = para_env%mepos
2175 rb = nrow_global
2176 tag = 0
2177 ! get a new context
2178 CALL ictxt_loc%gridinit(para_env, 'R', 1, num_pe)
2179 CALL descinit(desc, nrow_global, ncol_global, rb, max_block, 0, 0, ictxt_loc%get_handle(), nrow_global, info)
2180 cpassert(info == 0)
2181 associate(nprow => ictxt_loc%num_pe(1), npcol => ictxt_loc%num_pe(2), &
2182 myprow => ictxt_loc%mepos(1), mypcol => ictxt_loc%mepos(2))
2183 in = numroc(ncol_global, max_block, mypcol, 0, npcol)
2184
2185 ALLOCATE (newdat(nrow_global, max(1, in)))
2186
2187 ! do the actual scalapack to cols reordering
2188 CALL pdgemr2d(nrow_global, ncol_global, fm%local_data, 1, 1, &
2189 fm%matrix_struct%descriptor, &
2190 newdat, 1, 1, desc, ictxt_loc%get_handle())
2191
2192 ALLOCATE (vecbuf(nrow_global*max_block))
2193 vecbuf = huge(1.0_dp) ! init for valgrind
2194
2195 DO i = 1, ncol_global, max(max_block, 1)
2196 i_block = min(max_block, ncol_global - i + 1)
2197 CALL infog2l(1, i, desc, nprow, npcol, myprow, mypcol, &
2198 irow_local, icol_local, iprow, ipcol)
2199 IF (ipcol == mypcol) THEN
2200 DO j = 1, i_block
2201 vecbuf((j - 1)*nrow_global + 1:nrow_global*j) = newdat(:, icol_local + j - 1)
2202 END DO
2203 END IF
2204
2205 IF (ipcol == 0) THEN
2206 ! do nothing
2207 ELSE
2208 IF (ipcol == mypcol) THEN
2209 CALL para_env%send(vecbuf(:), 0, tag)
2210 END IF
2211 IF (mypcol == 0) THEN
2212 CALL para_env%recv(vecbuf(:), ipcol, tag)
2213 END IF
2214 END IF
2215
2216 IF (unit > 0) THEN
2217 DO j = 1, i_block
2218 WRITE (unit) vecbuf((j - 1)*nrow_global + 1:nrow_global*j)
2219 END DO
2220 END IF
2221
2222 END DO
2223 END associate
2224 DEALLOCATE (vecbuf)
2225
2226 CALL ictxt_loc%gridexit()
2227
2228 DEALLOCATE (newdat)
2229
2230#else
2231
2232 IF (unit > 0) THEN
2233 DO j = 1, ncol_global
2234 WRITE (unit) fm%local_data(:, j)
2235 END DO
2236 END IF
2237
2238#endif
2239 CALL timestop(handle)
2240
2241 END SUBROUTINE cp_fm_write_unformatted
2242
2243! **************************************************************************************************
2244!> \brief Write out a full matrix in plain text.
2245!> \param fm the matrix to be outputted
2246!> \param unit the unit number for I/O
2247!> \param header optional header
2248!> \param value_format ...
2249! **************************************************************************************************
2250 SUBROUTINE cp_fm_write_formatted(fm, unit, header, value_format)
2251 TYPE(cp_fm_type), INTENT(IN) :: fm
2252 INTEGER, INTENT(IN) :: unit
2253 CHARACTER(LEN=*), INTENT(IN), OPTIONAL :: header, value_format
2254
2255 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_write_formatted'
2256
2257 CHARACTER(LEN=21) :: my_value_format
2258 INTEGER :: handle, i, j, max_block, &
2259 ncol_global, nrow_global, &
2260 nrow_block
2261 TYPE(mp_para_env_type), POINTER :: para_env
2262#if defined(__SCALAPACK)
2263 INTEGER :: i_block, icol_local, &
2264 in, info, ipcol, &
2265 iprow, irow_local, &
2266 mepos, num_pe, rb, tag, k, &
2267 icol, irow
2268 INTEGER, DIMENSION(9) :: desc
2269 REAL(kind=dp), DIMENSION(:), POINTER :: vecbuf
2270 REAL(kind=dp), DIMENSION(:, :), POINTER :: newdat
2271 TYPE(cp_blacs_type) :: ictxt_loc
2272 INTEGER, EXTERNAL :: numroc
2273#endif
2274
2275 CALL timeset(routinen, handle)
2276 CALL cp_fm_get_info(fm, nrow_global=nrow_global, ncol_global=ncol_global, ncol_block=max_block, &
2277 nrow_block=nrow_block, para_env=para_env)
2278
2279 IF (PRESENT(value_format)) THEN
2280 cpassert(len_trim(adjustl(value_format)) < 11)
2281 my_value_format = "(I10, I10, "//trim(adjustl(value_format))//")"
2282 ELSE
2283 my_value_format = "(I10, I10, ES24.12)"
2284 END IF
2285
2286 IF (unit > 0) THEN
2287 IF (PRESENT(header)) WRITE (unit, *) header
2288 WRITE (unit, "(A2, A8, A10, A24)") "#", "Row", "Column", adjustl("Value")
2289 END IF
2290
2291#if defined(__SCALAPACK)
2292 num_pe = para_env%num_pe
2293 mepos = para_env%mepos
2294 rb = nrow_global
2295 tag = 0
2296 ! get a new context
2297 CALL ictxt_loc%gridinit(para_env, 'R', 1, num_pe)
2298 CALL descinit(desc, nrow_global, ncol_global, rb, max_block, 0, 0, ictxt_loc%get_handle(), nrow_global, info)
2299 cpassert(info == 0)
2300 associate(nprow => ictxt_loc%num_pe(1), npcol => ictxt_loc%num_pe(2), &
2301 myprow => ictxt_loc%mepos(1), mypcol => ictxt_loc%mepos(2))
2302 in = numroc(ncol_global, max_block, mypcol, 0, npcol)
2303
2304 ALLOCATE (newdat(nrow_global, max(1, in)))
2305
2306 ! do the actual scalapack to cols reordering
2307 CALL pdgemr2d(nrow_global, ncol_global, fm%local_data, 1, 1, &
2308 fm%matrix_struct%descriptor, &
2309 newdat, 1, 1, desc, ictxt_loc%get_handle())
2310
2311 ALLOCATE (vecbuf(nrow_global*max_block))
2312 vecbuf = huge(1.0_dp) ! init for valgrind
2313 irow = 1
2314 icol = 1
2315
2316 DO i = 1, ncol_global, max(max_block, 1)
2317 i_block = min(max_block, ncol_global - i + 1)
2318 CALL infog2l(1, i, desc, nprow, npcol, myprow, mypcol, &
2319 irow_local, icol_local, iprow, ipcol)
2320 IF (ipcol == mypcol) THEN
2321 DO j = 1, i_block
2322 vecbuf((j - 1)*nrow_global + 1:nrow_global*j) = newdat(:, icol_local + j - 1)
2323 END DO
2324 END IF
2325
2326 IF (ipcol == 0) THEN
2327 ! do nothing
2328 ELSE
2329 IF (ipcol == mypcol) THEN
2330 CALL para_env%send(vecbuf(:), 0, tag)
2331 END IF
2332 IF (mypcol == 0) THEN
2333 CALL para_env%recv(vecbuf(:), ipcol, tag)
2334 END IF
2335 END IF
2336
2337 IF (unit > 0) THEN
2338 DO j = 1, i_block
2339 DO k = (j - 1)*nrow_global + 1, nrow_global*j
2340 WRITE (unit=unit, fmt=my_value_format) irow, icol, vecbuf(k)
2341 irow = irow + 1
2342 IF (irow > nrow_global) THEN
2343 irow = 1
2344 icol = icol + 1
2345 END IF
2346 END DO
2347 END DO
2348 END IF
2349
2350 END DO
2351 END associate
2352 DEALLOCATE (vecbuf)
2353
2354 CALL ictxt_loc%gridexit()
2355
2356 DEALLOCATE (newdat)
2357
2358#else
2359
2360 IF (unit > 0) THEN
2361 DO j = 1, ncol_global
2362 DO i = 1, nrow_global
2363 WRITE (unit=unit, fmt=my_value_format) i, j, fm%local_data(i, j)
2364 END DO
2365 END DO
2366 END IF
2367
2368#endif
2369 CALL timestop(handle)
2370
2371 END SUBROUTINE cp_fm_write_formatted
2372
2373! **************************************************************************************************
2374!> \brief ...
2375!> \param fm ...
2376!> \param unit ...
2377! **************************************************************************************************
2378 SUBROUTINE cp_fm_read_unformatted(fm, unit)
2379 TYPE(cp_fm_type), INTENT(INOUT) :: fm
2380 INTEGER, INTENT(IN) :: unit
2381
2382 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_read_unformatted'
2383
2384 INTEGER :: handle, j, max_block, &
2385 ncol_global, nrow_global
2386 TYPE(mp_para_env_type), POINTER :: para_env
2387#if defined(__SCALAPACK)
2388 INTEGER :: k, n_cols
2389 REAL(kind=dp), DIMENSION(:, :), POINTER :: vecbuf
2390#endif
2391
2392 CALL timeset(routinen, handle)
2393
2394 CALL cp_fm_get_info(fm, nrow_global=nrow_global, ncol_global=ncol_global, ncol_block=max_block, &
2395 para_env=para_env)
2396
2397#if defined(__SCALAPACK)
2398
2399 ! the parallel case could be made more efficient (see cp_fm_write_unformatted)
2400
2401 ALLOCATE (vecbuf(nrow_global, max_block))
2402
2403 DO j = 1, ncol_global, max_block
2404
2405 n_cols = min(max_block, ncol_global - j + 1)
2406 IF (para_env%mepos == 0) THEN
2407 DO k = 1, n_cols
2408 READ (unit) vecbuf(:, k)
2409 END DO
2410 END IF
2411 CALL para_env%bcast(vecbuf, 0)
2412 CALL cp_fm_set_submatrix(fm, vecbuf, start_row=1, start_col=j, n_cols=n_cols)
2413
2414 END DO
2415
2416 DEALLOCATE (vecbuf)
2417
2418#else
2419
2420 DO j = 1, ncol_global
2421 READ (unit) fm%local_data(:, j)
2422 END DO
2423
2424#endif
2425
2426 CALL timestop(handle)
2427
2428 END SUBROUTINE cp_fm_read_unformatted
2429
2430! **************************************************************************************************
2431!> \brief wrapper to scalapack function INDXG2P that computes the process
2432!> coordinate which possesses the entry of a distributed matrix specified
2433!> by a global index INDXGLOB.
2434!>
2435!> Arguments
2436!> =========
2437!>
2438!> INDXGLOB (global input) INTEGER
2439!> The global index of the element.
2440!>
2441!> NB (global input) INTEGER
2442!> Block size, size of the blocks the distributed matrix is
2443!> split into.
2444!>
2445!> IPROC (local dummy) INTEGER
2446!> Dummy argument in this case in order to unify the calling
2447!> sequence of the tool-routines.
2448!>
2449!> ISRCPROC (global input) INTEGER
2450!> The coordinate of the process that possesses the first
2451!> row/column of the distributed matrix.
2452!>
2453!> NPROCS (global input) INTEGER
2454!> The total number processes over which the matrix is
2455!> distributed.
2456!>
2457!> \param INDXGLOB ...
2458!> \param NB ...
2459!> \param IPROC ...
2460!> \param ISRCPROC ...
2461!> \param NPROCS ...
2462!> \return ...
2463!> \author Mauro Del Ben [MDB] - 12.2012
2464! **************************************************************************************************
2465 FUNCTION cp_fm_indxg2p(INDXGLOB, NB, IPROC, ISRCPROC, NPROCS) RESULT(G2P)
2466 INTEGER, INTENT(IN) :: indxglob, nb, iproc, isrcproc, nprocs
2467 INTEGER :: g2p
2468
2469#if defined(__SCALAPACK)
2470 INTEGER, EXTERNAL :: indxg2p
2471#endif
2472
2473#if defined(__SCALAPACK)
2474
2475 g2p = indxg2p(indxglob, nb, iproc, isrcproc, nprocs)
2476
2477#else
2478 mark_used(indxglob)
2479 mark_used(iproc)
2480 mark_used(isrcproc)
2481 mark_used(nb)
2482 mark_used(nprocs)
2483
2484 g2p = 0
2485
2486#endif
2487
2488 END FUNCTION cp_fm_indxg2p
2489
2490! **************************************************************************************************
2491!> \brief wrapper to scalapack function INDXG2L that computes the local index
2492!> of a distributed matrix entry pointed to by the global index INDXGLOB.
2493!>
2494!> Arguments
2495!> =========
2496!>
2497!> INDXGLOB (global input) INTEGER
2498!> The global index of the distributed matrix entry.
2499!>
2500!> NB (global input) INTEGER
2501!> Block size, size of the blocks the distributed matrix is
2502!> split into.
2503!>
2504!> IPROC (local dummy) INTEGER
2505!> Dummy argument in this case in order to unify the calling
2506!> sequence of the tool-routines.
2507!>
2508!> ISRCPROC (local dummy) INTEGER
2509!> Dummy argument in this case in order to unify the calling
2510!> sequence of the tool-routines.
2511!>
2512!> NPROCS (global input) INTEGER
2513!> The total number processes over which the distributed
2514!> matrix is distributed.
2515!>
2516!> \param INDXGLOB ...
2517!> \param NB ...
2518!> \param IPROC ...
2519!> \param ISRCPROC ...
2520!> \param NPROCS ...
2521!> \return ...
2522!> \author Mauro Del Ben [MDB] - 12.2012
2523! **************************************************************************************************
2524 FUNCTION cp_fm_indxg2l(INDXGLOB, NB, IPROC, ISRCPROC, NPROCS) RESULT(G2L)
2525 INTEGER, INTENT(IN) :: indxglob, nb, iproc, isrcproc, nprocs
2526 INTEGER :: g2l
2527
2528#if defined(__SCALAPACK)
2529 INTEGER, EXTERNAL :: indxg2l
2530#endif
2531
2532#if defined(__SCALAPACK)
2533
2534 g2l = indxg2l(indxglob, nb, iproc, isrcproc, nprocs)
2535
2536#else
2537 mark_used(iproc)
2538 mark_used(isrcproc)
2539 mark_used(nb)
2540 mark_used(nprocs)
2541 mark_used(indxglob)
2542
2543 g2l = indxglob
2544
2545#endif
2546
2547 END FUNCTION cp_fm_indxg2l
2548
2549! **************************************************************************************************
2550!> \brief wrapper to scalapack function INDXL2G that computes the global index
2551!> of a distributed matrix entry pointed to by the local index INDXLOC
2552!> of the process indicated by IPROC.
2553!>
2554!> Arguments
2555!> =========
2556!>
2557!> INDXLOC (global input) INTEGER
2558!> The local index of the distributed matrix entry.
2559!>
2560!> NB (global input) INTEGER
2561!> Block size, size of the blocks the distributed matrix is
2562!> split into.
2563!>
2564!> IPROC (local input) INTEGER
2565!> The coordinate of the process whose local array row or
2566!> column is to be determined.
2567!>
2568!> ISRCPROC (global input) INTEGER
2569!> The coordinate of the process that possesses the first
2570!> row/column of the distributed matrix.
2571!>
2572!> NPROCS (global input) INTEGER
2573!> The total number processes over which the distributed
2574!> matrix is distributed.
2575!>
2576!> \param INDXLOC ...
2577!> \param NB ...
2578!> \param IPROC ...
2579!> \param ISRCPROC ...
2580!> \param NPROCS ...
2581!> \return ...
2582!> \author Mauro Del Ben [MDB] - 12.2012
2583! **************************************************************************************************
2584 FUNCTION cp_fm_indxl2g(INDXLOC, NB, IPROC, ISRCPROC, NPROCS) RESULT(L2G)
2585 INTEGER, INTENT(IN) :: indxloc, nb, iproc, isrcproc, nprocs
2586 INTEGER :: l2g
2587
2588#if defined(__SCALAPACK)
2589 INTEGER, EXTERNAL :: indxl2g
2590#endif
2591
2592#if defined(__SCALAPACK)
2593
2594 l2g = indxl2g(indxloc, nb, iproc, isrcproc, nprocs)
2595
2596#else
2597 mark_used(iproc)
2598 mark_used(isrcproc)
2599 mark_used(nb)
2600 mark_used(nprocs)
2601
2602 l2g = indxloc
2603
2604#endif
2605
2606 END FUNCTION cp_fm_indxl2g
2607
2608! **************************************************************************************************
2609!> \brief ...
2610!> \param mult_type ...
2611! **************************************************************************************************
2612 SUBROUTINE cp_fm_setup(mult_type)
2613 INTEGER, INTENT(IN) :: mult_type
2614
2615 mm_type = mult_type
2616 END SUBROUTINE cp_fm_setup
2617
2618! **************************************************************************************************
2619!> \brief ...
2620!> \return ...
2621! **************************************************************************************************
2622 FUNCTION cp_fm_get_mm_type() RESULT(res)
2623 INTEGER :: res
2624
2625 res = mm_type
2626 END FUNCTION
2627
2628! **************************************************************************************************
2629!> \brief ...
2630!> \param ictxt ...
2631!> \param prec ...
2632!> \return ...
2633! **************************************************************************************************
2634 FUNCTION cp_fm_pilaenv(ictxt, prec) RESULT(res)
2635 INTEGER :: ictxt
2636 CHARACTER(LEN=1) :: prec
2637 INTEGER :: res
2638#if defined(__SCALAPACK)
2639 INTEGER :: pilaenv
2640 res = pilaenv(ictxt, prec)
2641#else
2642 mark_used(ictxt)
2643 mark_used(prec)
2644 res = -1
2645#endif
2646
2647 END FUNCTION
2648
2649END MODULE cp_fm_types
methods related to the blacs parallel environment
wrappers for the actual blacs calls. all functionality needed in the code should actually be provide ...
represent the structure of a full matrix
subroutine, public cp_fm_struct_get(fmstruct, para_env, context, descriptor, ncol_block, nrow_block, nrow_global, ncol_global, first_p_pos, row_indices, col_indices, nrow_local, ncol_local, nrow_locals, ncol_locals, local_leading_dimension)
returns the values of various attributes of the matrix structure
logical function, public cp_fm_struct_equivalent(fmstruct1, fmstruct2)
returns true if the two matrix structures are equivalent, false otherwise.
subroutine, public cp_fm_struct_retain(fmstruct)
retains a full matrix structure
subroutine, public cp_fm_struct_write_info(fmstruct, io_unit)
Write nicely formatted info about the FM struct to the given I/O unit.
subroutine, public cp_fm_struct_release(fmstruct)
releases a full matrix structure
represent a full matrix distributed on many processors
Definition cp_fm_types.F:15
subroutine, public cp_fm_copy_general(source, destination, para_env)
General copy of a fm matrix to another fm matrix. Uses non-blocking MPI rather than ScaLAPACK.
subroutine, public cp_fm_get_diag(matrix, diag)
returns the diagonal elements of a fm
subroutine, public cp_fm_start_copy_general(source, destination, para_env, info)
Initiates the copy operation: get distribution data, post MPI isend and irecvs.
integer function, public cp_fm_indxl2g(indxloc, nb, iproc, isrcproc, nprocs)
wrapper to scalapack function INDXL2G that computes the global index of a distributed matrix entry po...
subroutine, public cp_fm_cleanup_copy_general(info)
Completes the copy operation: wait for comms clean up MPI state.
subroutine, public cp_fm_get_info(matrix, name, nrow_global, ncol_global, nrow_block, ncol_block, nrow_local, ncol_local, row_indices, col_indices, local_data, context, nrow_locals, ncol_locals, matrix_struct, para_env)
returns all kind of information about the full matrix
integer function, public cp_fm_get_mm_type()
...
subroutine, public cp_fm_vectorssum(matrix, sum_array, dir)
summing up all the elements along the matrix's i-th index or
integer function, public cp_fm_indxg2l(indxglob, nb, iproc, isrcproc, nprocs)
wrapper to scalapack function INDXG2L that computes the local index of a distributed matrix entry poi...
subroutine, public cp_fm_write_unformatted(fm, unit)
...
subroutine, public cp_fm_get_element(matrix, irow_global, icol_global, alpha, local)
returns an element of a fm this value is valid on every cpu using this call is expensive
subroutine, public cp_fm_add_to_element(matrix, irow_global, icol_global, alpha)
...
subroutine, public cp_fm_set_submatrix(fm, new_values, start_row, start_col, n_rows, n_cols, alpha, beta, transpose)
sets a submatrix of a full matrix fm(start_row:start_row+n_rows,start_col:start_col+n_cols) = alpha*o...
subroutine, public cp_fm_read_unformatted(fm, unit)
...
subroutine, public cp_fm_vectorsnorm(matrix, norm_array)
find the inorm of each column norm_{j}= sqrt( \sum_{i} A_{ij}*A_{ij} )
subroutine, public cp_fm_setup(mult_type)
...
subroutine, public cp_fm_write_info(matrix, io_unit)
Write nicely formatted info about the FM to the given I/O unit (including the underlying FM struct)
subroutine, public cp_fm_maxabsrownorm(matrix, a_max)
find the maximum over the rows of the sum of the absolute values of the elements of a given row = || ...
subroutine, public cp_fm_to_fm_submat_general(source, destination, nrows, ncols, s_firstrow, s_firstcol, d_firstrow, d_firstcol, global_context)
General copy of a submatrix of fm matrix to a submatrix of another fm matrix. The two matrices can ha...
subroutine, public cp_fm_to_fm_submat(msource, mtarget, nrow, ncol, s_firstrow, s_firstcol, t_firstrow, t_firstcol)
copy just a part ot the matrix
subroutine, public cp_fm_maxabsval(matrix, a_max, ir_max, ic_max)
find the maximum absolute value of the matrix element maxval(abs(matrix))
subroutine, public cp_fm_set_all(matrix, alpha, beta)
set all elements of a matrix to the same value, and optionally the diagonal to a different one
integer function, public cp_fm_indxg2p(indxglob, nb, iproc, isrcproc, nprocs)
wrapper to scalapack function INDXG2P that computes the process coordinate which possesses the entry ...
subroutine, public cp_fm_finish_copy_general(destination, info)
Completes the copy operation: wait for comms, unpack, clean up MPI state.
subroutine, public cp_fm_get_submatrix(fm, target_m, start_row, start_col, n_rows, n_cols, transpose)
gets a submatrix of a full matrix op(target_m)(1:n_rows,1:n_cols) =fm(start_row:start_row+n_rows,...
subroutine, public cp_fm_set_element(matrix, irow_global, icol_global, alpha)
sets an element of a matrix
subroutine, public cp_fm_init_random(matrix, ncol, start_col)
fills a matrix with random numbers
subroutine, public cp_fm_to_fm_triangular(msource, mtarget, uplo)
copy just a triangular matrix
integer function, public cp_fm_pilaenv(ictxt, prec)
...
subroutine, public cp_fm_create(matrix, matrix_struct, name, use_sp)
creates a new full matrix with the given structure
subroutine, public cp_fm_write_formatted(fm, unit, header, value_format)
Write out a full matrix in plain text.
Defines the basic variable types.
Definition kinds.F:23
integer, parameter, public dp
Definition kinds.F:34
integer, parameter, public sp
Definition kinds.F:33
Interface to the message passing library MPI.
integer, parameter, public mp_proc_null
logical, parameter, public cp2k_is_parallel
integer, parameter, public mp_any_source
type(mp_request_type), parameter, public mp_request_null
Parallel (pseudo)random number generator (RNG) for multiple streams and substreams of random numbers.
integer, parameter, public uniform
represent a blacs multidimensional parallel environment (for the mpi corrispective see cp_paratypes/m...
keeps the information about the structure of a full matrix
Stores the state of a copy between cp_fm_start_copy_general and cp_fm_finish_copy_general.
just to build arrays of pointers to matrices
represent a full matrix
stores all the informations relevant to an mpi environment